xref: /openbsd-src/sys/kern/vfs_biomem.c (revision 9f11ffb7133c203312a01e4b986886bc88c7d74b)
1 /*	$OpenBSD: vfs_biomem.c,v 1.39 2018/03/29 01:43:41 mlarkin Exp $ */
2 
3 /*
4  * Copyright (c) 2007 Artur Grabowski <art@openbsd.org>
5  * Copyright (c) 2012-2016 Bob Beck <beck@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/buf.h>
24 #include <sys/pool.h>
25 #include <sys/proc.h>		/* XXX for atomic */
26 #include <sys/mount.h>
27 
28 #include <uvm/uvm_extern.h>
29 
30 vaddr_t buf_kva_start, buf_kva_end;
31 int buf_needva;
32 TAILQ_HEAD(,buf) buf_valist;
33 
34 extern struct bcachestats bcstats;
35 
36 /*
37  * Pages are allocated from a uvm object (we only use it for page storage,
38  * all pages are wired). Since every buffer contains a contiguous range of
39  * pages, reusing the pages could be very painful. Fortunately voff_t is
40  * 64 bits, so we can just increment buf_page_offset all the time and ignore
41  * wraparound. Even if you reuse 4GB worth of buffers every second
42  * you'll still run out of time_t faster than buffers.
43  *
44  */
45 voff_t buf_page_offset;
46 struct uvm_object *buf_object, buf_object_store;
47 
48 vaddr_t buf_unmap(struct buf *);
49 
50 void
51 buf_mem_init(vsize_t size)
52 {
53 	TAILQ_INIT(&buf_valist);
54 
55 	buf_kva_start = vm_map_min(kernel_map);
56 	if (uvm_map(kernel_map, &buf_kva_start, size, NULL,
57 	    UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(PROT_NONE,
58 	    PROT_NONE, MAP_INHERIT_NONE, MADV_NORMAL, 0)))
59 		panic("bufinit: can't reserve VM for buffers");
60 	buf_kva_end = buf_kva_start + size;
61 
62 	/* Contiguous mapping */
63 	bcstats.kvaslots = bcstats.kvaslots_avail = size / MAXPHYS;
64 
65 	buf_object = &buf_object_store;
66 
67 	uvm_objinit(buf_object, NULL, 1);
68 }
69 
70 /*
71  * buf_acquire and buf_release manage the kvm mappings of buffers.
72  */
73 void
74 buf_acquire(struct buf *bp)
75 {
76 	KASSERT((bp->b_flags & B_BUSY) == 0);
77 	splassert(IPL_BIO);
78 	/*
79 	 * Busy before waiting for kvm.
80 	 */
81 	SET(bp->b_flags, B_BUSY);
82 	buf_map(bp);
83 }
84 
85 /*
86  * Acquire a buf but do not map it. Preserve any mapping it did have.
87  */
88 void
89 buf_acquire_nomap(struct buf *bp)
90 {
91 	splassert(IPL_BIO);
92 	SET(bp->b_flags, B_BUSY);
93 	if (bp->b_data != NULL) {
94 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
95 		bcstats.kvaslots_avail--;
96 		bcstats.busymapped++;
97 	}
98 }
99 
100 void
101 buf_map(struct buf *bp)
102 {
103 	vaddr_t va;
104 
105 	splassert(IPL_BIO);
106 
107 	if (bp->b_data == NULL) {
108 		unsigned long i;
109 
110 		/*
111 		 * First, just use the pre-allocated space until we run out.
112 		 */
113 		if (buf_kva_start < buf_kva_end) {
114 			va = buf_kva_start;
115 			buf_kva_start += MAXPHYS;
116 			bcstats.kvaslots_avail--;
117 		} else {
118 			struct buf *vbp;
119 
120 			/*
121 			 * Find some buffer we can steal the space from.
122 			 */
123 			vbp = TAILQ_FIRST(&buf_valist);
124 			while ((curproc != syncerproc &&
125 			   curproc != cleanerproc &&
126 			   bcstats.kvaslots_avail <= RESERVE_SLOTS) ||
127 			   vbp == NULL) {
128 				buf_needva++;
129 				tsleep(&buf_needva, PRIBIO, "buf_needva", 0);
130 				vbp = TAILQ_FIRST(&buf_valist);
131 			}
132 			va = buf_unmap(vbp);
133 		}
134 
135 		for (i = 0; i < atop(bp->b_bufsize); i++) {
136 			struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
137 			    bp->b_poffs + ptoa(i));
138 
139 			KASSERT(pg != NULL);
140 
141 			pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
142 			    PROT_READ | PROT_WRITE);
143 		}
144 		pmap_update(pmap_kernel());
145 		bp->b_data = (caddr_t)va;
146 	} else {
147 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
148 		bcstats.kvaslots_avail--;
149 	}
150 
151 	bcstats.busymapped++;
152 }
153 
154 void
155 buf_release(struct buf *bp)
156 {
157 
158 	KASSERT(bp->b_flags & B_BUSY);
159 	splassert(IPL_BIO);
160 
161 	if (bp->b_data) {
162 		bcstats.busymapped--;
163 		TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist);
164 		bcstats.kvaslots_avail++;
165 		if (buf_needva) {
166 			buf_needva=0;
167 			wakeup(&buf_needva);
168 		}
169 	}
170 	CLR(bp->b_flags, B_BUSY);
171 }
172 
173 /*
174  * Deallocate all memory resources for this buffer. We need to be careful
175  * to not drop kvm since we have no way to reclaim it. So, if the buffer
176  * has kvm, we need to free it later. We put it on the front of the
177  * freelist just so it gets picked up faster.
178  *
179  * Also, lots of assertions count on bp->b_data being NULL, so we
180  * set it temporarily to NULL.
181  *
182  * Return non-zero if we take care of the freeing later.
183  */
184 int
185 buf_dealloc_mem(struct buf *bp)
186 {
187 	caddr_t data;
188 
189 	splassert(IPL_BIO);
190 
191 	data = bp->b_data;
192 	bp->b_data = NULL;
193 
194 	if (data) {
195 		if (bp->b_flags & B_BUSY)
196 			bcstats.busymapped--;
197 		pmap_kremove((vaddr_t)data, bp->b_bufsize);
198 		pmap_update(pmap_kernel());
199 	}
200 
201 	if (bp->b_pobj)
202 		buf_free_pages(bp);
203 
204 	if (data == NULL)
205 		return (0);
206 
207 	bp->b_data = data;
208 	if (!(bp->b_flags & B_BUSY)) {		/* XXX - need better test */
209 		TAILQ_REMOVE(&buf_valist, bp, b_valist);
210 		bcstats.kvaslots_avail--;
211 	} else {
212 		CLR(bp->b_flags, B_BUSY);
213 		if (buf_needva) {
214 			buf_needva = 0;
215 			wakeup(&buf_needva);
216 		}
217 	}
218 	SET(bp->b_flags, B_RELEASED);
219 	TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist);
220 	bcstats.kvaslots_avail++;
221 
222 	return (1);
223 }
224 
225 /*
226  * Only used by bread_cluster.
227  */
228 void
229 buf_fix_mapping(struct buf *bp, vsize_t newsize)
230 {
231 	vaddr_t va = (vaddr_t)bp->b_data;
232 
233 	if (newsize < bp->b_bufsize) {
234 		pmap_kremove(va + newsize, bp->b_bufsize - newsize);
235 		pmap_update(pmap_kernel());
236 		/*
237 		 * Note: the size we lost is actually with the other
238 		 * buffers read in by bread_cluster
239 		 */
240 		bp->b_bufsize = newsize;
241 	}
242 }
243 
244 vaddr_t
245 buf_unmap(struct buf *bp)
246 {
247 	vaddr_t va;
248 
249 	KASSERT((bp->b_flags & B_BUSY) == 0);
250 	KASSERT(bp->b_data != NULL);
251 	splassert(IPL_BIO);
252 
253 	TAILQ_REMOVE(&buf_valist, bp, b_valist);
254 	bcstats.kvaslots_avail--;
255 	va = (vaddr_t)bp->b_data;
256 	bp->b_data = 0;
257 	pmap_kremove(va, bp->b_bufsize);
258 	pmap_update(pmap_kernel());
259 
260 	if (bp->b_flags & B_RELEASED)
261 		pool_put(&bufpool, bp);
262 
263 	return (va);
264 }
265 
266 /* Always allocates in dma-reachable memory */
267 void
268 buf_alloc_pages(struct buf *bp, vsize_t size)
269 {
270 	voff_t offs;
271 	int i;
272 
273 	KASSERT(size == round_page(size));
274 	KASSERT(bp->b_pobj == NULL);
275 	KASSERT(bp->b_data == NULL);
276 	splassert(IPL_BIO);
277 
278 	offs = buf_page_offset;
279 	buf_page_offset += size;
280 
281 	KASSERT(buf_page_offset > 0);
282 
283 	/*
284 	 * Attempt to allocate with NOWAIT. if we can't, then throw
285 	 * away some clean pages and try again. Finally, if that
286 	 * fails, do a WAITOK allocation so the page daemon can find
287 	 * memory for us.
288 	 */
289 	do {
290 		i = uvm_pagealloc_multi(buf_object, offs, size,
291 		    UVM_PLA_NOWAIT);
292 		if (i == 0)
293 			break;
294 	} while	(bufbackoff(&dma_constraint, 100) == 0);
295 	if (i != 0)
296 		i = uvm_pagealloc_multi(buf_object, offs, size,
297 		    UVM_PLA_WAITOK);
298 	/* should not happen */
299 	if (i != 0)
300 		panic("uvm_pagealloc_multi unable to allocate an buf_object "
301 		    "of size %lu", size);
302 
303 	bcstats.numbufpages += atop(size);
304 	bcstats.dmapages += atop(size);
305 	SET(bp->b_flags, B_DMA);
306 	bp->b_pobj = buf_object;
307 	bp->b_poffs = offs;
308 	bp->b_bufsize = size;
309 }
310 
311 void
312 buf_free_pages(struct buf *bp)
313 {
314 	struct uvm_object *uobj = bp->b_pobj;
315 	struct vm_page *pg;
316 	voff_t off, i;
317 
318 	KASSERT(bp->b_data == NULL);
319 	KASSERT(uobj != NULL);
320 	splassert(IPL_BIO);
321 
322 	off = bp->b_poffs;
323 	bp->b_pobj = NULL;
324 	bp->b_poffs = 0;
325 
326 	for (i = 0; i < atop(bp->b_bufsize); i++) {
327 		pg = uvm_pagelookup(uobj, off + ptoa(i));
328 		KASSERT(pg != NULL);
329 		KASSERT(pg->wire_count == 1);
330 		pg->wire_count = 0;
331 		uvm_pagefree(pg);
332 		bcstats.numbufpages--;
333 		if (ISSET(bp->b_flags, B_DMA))
334 			bcstats.dmapages--;
335 	}
336 	CLR(bp->b_flags, B_DMA);
337 }
338 
339 /* Reallocate a buf into a particular pmem range specified by "where". */
340 int
341 buf_realloc_pages(struct buf *bp, struct uvm_constraint_range *where,
342     int flags)
343 {
344 	vaddr_t va;
345 	int dma;
346   	int i, r;
347 	KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT));
348 
349 	splassert(IPL_BIO);
350 	KASSERT(ISSET(bp->b_flags, B_BUSY));
351 	dma = ISSET(bp->b_flags, B_DMA);
352 
353 	/* if the original buf is mapped, unmap it */
354 	if (bp->b_data != NULL) {
355 		va = (vaddr_t)bp->b_data;
356 		pmap_kremove(va, bp->b_bufsize);
357 		pmap_update(pmap_kernel());
358 	}
359 
360 	do {
361 		r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs,
362 		    bp->b_bufsize, UVM_PLA_NOWAIT, where);
363 		if (r == 0)
364 			break;
365 	} while	((bufbackoff(where, atop(bp->b_bufsize)) == 0));
366 
367 	/*
368 	 * bufbackoff() failed, so there's no more we can do without
369 	 * waiting.  If allowed do, make that attempt.
370 	 */
371 	if (r != 0 && (flags & UVM_PLA_WAITOK))
372 		r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs,
373 		    bp->b_bufsize, flags, where);
374 
375 	/*
376 	 * If the allocation has succeeded, we may be somewhere different.
377 	 * If the allocation has failed, we are in the same place.
378 	 *
379 	 * We still have to re-map the buffer before returning.
380 	 */
381 
382 	/* take it out of dma stats until we know where we are */
383 	if (dma)
384 		bcstats.dmapages -= atop(bp->b_bufsize);
385 
386 	dma = 1;
387 	/* if the original buf was mapped, re-map it */
388 	for (i = 0; i < atop(bp->b_bufsize); i++) {
389 		struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
390 		    bp->b_poffs + ptoa(i));
391 		KASSERT(pg != NULL);
392 		if  (!PADDR_IS_DMA_REACHABLE(VM_PAGE_TO_PHYS(pg)))
393 			dma = 0;
394 		if (bp->b_data != NULL) {
395 			pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
396 			    PROT_READ|PROT_WRITE);
397 			pmap_update(pmap_kernel());
398 		}
399 	}
400 	if (dma) {
401 		SET(bp->b_flags, B_DMA);
402 		bcstats.dmapages += atop(bp->b_bufsize);
403 	} else
404 		CLR(bp->b_flags, B_DMA);
405 	return(r);
406 }
407