xref: /openbsd-src/sys/kern/vfs_biomem.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: vfs_biomem.c,v 1.29 2014/03/28 17:57:11 mpi Exp $ */
2 
3 /*
4  * Copyright (c) 2007 Artur Grabowski <art@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/buf.h>
23 #include <sys/pool.h>
24 #include <sys/proc.h>		/* XXX for atomic */
25 #include <sys/mount.h>
26 
27 #include <uvm/uvm_extern.h>
28 
29 vaddr_t buf_kva_start, buf_kva_end;
30 int buf_needva;
31 TAILQ_HEAD(,buf) buf_valist;
32 
33 extern struct bcachestats bcstats;
34 
35 /*
36  * Pages are allocated from a uvm object (we only use it for page storage,
37  * all pages are wired). Since every buffer contains a contiguous range of
38  * pages, reusing the pages could be very painful. Fortunately voff_t is
39  * 64 bits, so we can just increment buf_page_offset all the time and ignore
40  * wraparound. Even if you reuse 4GB worth of buffers every second
41  * you'll still run out of time_t faster than buffers.
42  *
43  */
44 voff_t buf_page_offset;
45 struct uvm_object *buf_object, buf_object_store;
46 
47 vaddr_t buf_unmap(struct buf *);
48 
49 void
50 buf_mem_init(vsize_t size)
51 {
52 	TAILQ_INIT(&buf_valist);
53 
54 	buf_kva_start = vm_map_min(kernel_map);
55 	if (uvm_map(kernel_map, &buf_kva_start, size, NULL,
56 	    UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(UVM_PROT_NONE,
57 	    UVM_PROT_NONE, UVM_INH_NONE, UVM_ADV_NORMAL, 0)))
58 		panic("bufinit: can't reserve VM for buffers");
59 	buf_kva_end = buf_kva_start + size;
60 
61 	/* Contiguous mapping */
62 	bcstats.kvaslots = bcstats.kvaslots_avail = size / MAXPHYS;
63 
64 	buf_object = &buf_object_store;
65 
66 	uvm_objinit(buf_object, NULL, 1);
67 }
68 
69 /*
70  * buf_acquire and buf_release manage the kvm mappings of buffers.
71  */
72 void
73 buf_acquire(struct buf *bp)
74 {
75 	KASSERT((bp->b_flags & B_BUSY) == 0);
76 	splassert(IPL_BIO);
77 	/*
78 	 * Busy before waiting for kvm.
79 	 */
80 	SET(bp->b_flags, B_BUSY);
81 	buf_map(bp);
82 }
83 
84 /*
85  * Acquire a buf but do not map it. Preserve any mapping it did have.
86  */
87 void
88 buf_acquire_nomap(struct buf *bp)
89 {
90 	splassert(IPL_BIO);
91 	SET(bp->b_flags, B_BUSY);
92 	if (bp->b_data != NULL) {
93 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
94 		bcstats.kvaslots_avail--;
95 		bcstats.busymapped++;
96 	}
97 }
98 
99 void
100 buf_map(struct buf *bp)
101 {
102 	vaddr_t va;
103 
104 	splassert(IPL_BIO);
105 
106 	if (bp->b_data == NULL) {
107 		unsigned long i;
108 
109 		/*
110 		 * First, just use the pre-allocated space until we run out.
111 		 */
112 		if (buf_kva_start < buf_kva_end) {
113 			va = buf_kva_start;
114 			buf_kva_start += MAXPHYS;
115 			bcstats.kvaslots_avail--;
116 		} else {
117 			struct buf *vbp;
118 
119 			/*
120 			 * Find some buffer we can steal the space from.
121 			 */
122 			vbp = TAILQ_FIRST(&buf_valist);
123 			while ((curproc != syncerproc &&
124 			   curproc != cleanerproc &&
125 			   bcstats.kvaslots_avail <= RESERVE_SLOTS) ||
126 			   vbp == NULL) {
127 				buf_needva++;
128 				tsleep(&buf_needva, PRIBIO, "buf_needva", 0);
129 				vbp = TAILQ_FIRST(&buf_valist);
130 			}
131 			va = buf_unmap(vbp);
132 		}
133 
134 		for (i = 0; i < atop(bp->b_bufsize); i++) {
135 			struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
136 			    bp->b_poffs + ptoa(i));
137 
138 			KASSERT(pg != NULL);
139 
140 			pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
141 			    VM_PROT_READ|VM_PROT_WRITE);
142 			pmap_update(pmap_kernel());
143 		}
144 		bp->b_data = (caddr_t)va;
145 	} else {
146 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
147 		bcstats.kvaslots_avail--;
148 	}
149 
150 	bcstats.busymapped++;
151 }
152 
153 void
154 buf_release(struct buf *bp)
155 {
156 
157 	KASSERT(bp->b_flags & B_BUSY);
158 	splassert(IPL_BIO);
159 
160 	if (bp->b_data) {
161 		bcstats.busymapped--;
162 		TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist);
163 		bcstats.kvaslots_avail++;
164 		if (buf_needva) {
165 			buf_needva=0;
166 			wakeup(&buf_needva);
167 		}
168 	}
169 	CLR(bp->b_flags, B_BUSY);
170 }
171 
172 /*
173  * Deallocate all memory resources for this buffer. We need to be careful
174  * to not drop kvm since we have no way to reclaim it. So, if the buffer
175  * has kvm, we need to free it later. We put it on the front of the
176  * freelist just so it gets picked up faster.
177  *
178  * Also, lots of assertions count on bp->b_data being NULL, so we
179  * set it temporarily to NULL.
180  *
181  * Return non-zero if we take care of the freeing later.
182  */
183 int
184 buf_dealloc_mem(struct buf *bp)
185 {
186 	caddr_t data;
187 
188 	splassert(IPL_BIO);
189 
190 	data = bp->b_data;
191 	bp->b_data = NULL;
192 
193 	if (data) {
194 		if (bp->b_flags & B_BUSY)
195 			bcstats.busymapped--;
196 		pmap_kremove((vaddr_t)data, bp->b_bufsize);
197 		pmap_update(pmap_kernel());
198 	}
199 
200 	if (bp->b_pobj)
201 		buf_free_pages(bp);
202 
203 	if (data == NULL)
204 		return (0);
205 
206 	bp->b_data = data;
207 	if (!(bp->b_flags & B_BUSY)) {		/* XXX - need better test */
208 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
209 		bcstats.kvaslots_avail--;
210 	} else {
211 		CLR(bp->b_flags, B_BUSY);
212 		if (buf_needva) {
213 			buf_needva = 0;
214 			wakeup(&buf_needva);
215 		}
216 	}
217 	SET(bp->b_flags, B_RELEASED);
218 	TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist);
219 	bcstats.kvaslots_avail++;
220 
221 	return (1);
222 }
223 
224 /*
225  * Only used by bread_cluster.
226  */
227 void
228 buf_fix_mapping(struct buf *bp, vsize_t newsize)
229 {
230 	vaddr_t va = (vaddr_t)bp->b_data;
231 
232 	if (newsize < bp->b_bufsize) {
233 		pmap_kremove(va + newsize, bp->b_bufsize - newsize);
234 		pmap_update(pmap_kernel());
235 		/*
236 		 * Note: the size we lost is actually with the other
237 		 * buffers read in by bread_cluster
238 		 */
239 		bp->b_bufsize = newsize;
240 	}
241 }
242 
243 vaddr_t
244 buf_unmap(struct buf *bp)
245 {
246 	vaddr_t va;
247 
248 	KASSERT((bp->b_flags & B_BUSY) == 0);
249 	KASSERT(bp->b_data != NULL);
250 	splassert(IPL_BIO);
251 
252 	TAILQ_REMOVE(&buf_valist, bp, b_valist);
253 	bcstats.kvaslots_avail--;
254 	va = (vaddr_t)bp->b_data;
255 	bp->b_data = 0;
256 	pmap_kremove(va, bp->b_bufsize);
257 	pmap_update(pmap_kernel());
258 
259 	if (bp->b_flags & B_RELEASED)
260 		pool_put(&bufpool, bp);
261 
262 	return (va);
263 }
264 
265 /* Always allocates in dma-reachable memory */
266 void
267 buf_alloc_pages(struct buf *bp, vsize_t size)
268 {
269 	voff_t offs;
270 
271 	KASSERT(size == round_page(size));
272 	KASSERT(bp->b_pobj == NULL);
273 	KASSERT(bp->b_data == NULL);
274 	splassert(IPL_BIO);
275 
276 	offs = buf_page_offset;
277 	buf_page_offset += size;
278 
279 	KASSERT(buf_page_offset > 0);
280 
281 	uvm_pagealloc_multi(buf_object, offs, size, UVM_PLA_WAITOK);
282 	bcstats.numbufpages += atop(size);
283 	bp->b_pobj = buf_object;
284 	bp->b_poffs = offs;
285 	bp->b_bufsize = size;
286 }
287 
288 void
289 buf_free_pages(struct buf *bp)
290 {
291 	struct uvm_object *uobj = bp->b_pobj;
292 	struct vm_page *pg;
293 	voff_t off, i;
294 
295 	KASSERT(bp->b_data == NULL);
296 	KASSERT(uobj != NULL);
297 	splassert(IPL_BIO);
298 
299 	off = bp->b_poffs;
300 	bp->b_pobj = NULL;
301 	bp->b_poffs = 0;
302 
303 	for (i = 0; i < atop(bp->b_bufsize); i++) {
304 		pg = uvm_pagelookup(uobj, off + ptoa(i));
305 		KASSERT(pg != NULL);
306 		KASSERT(pg->wire_count == 1);
307 		pg->wire_count = 0;
308 		uvm_pagefree(pg);
309 		bcstats.numbufpages--;
310 	}
311 }
312 
313 /*
314  * XXX - it might make sense to make a buf_realloc_pages to avoid
315  *       bouncing through the free list all the time.
316  */
317