1 /* $OpenBSD: vfs_biomem.c,v 1.29 2014/03/28 17:57:11 mpi Exp $ */ 2 3 /* 4 * Copyright (c) 2007 Artur Grabowski <art@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/buf.h> 23 #include <sys/pool.h> 24 #include <sys/proc.h> /* XXX for atomic */ 25 #include <sys/mount.h> 26 27 #include <uvm/uvm_extern.h> 28 29 vaddr_t buf_kva_start, buf_kva_end; 30 int buf_needva; 31 TAILQ_HEAD(,buf) buf_valist; 32 33 extern struct bcachestats bcstats; 34 35 /* 36 * Pages are allocated from a uvm object (we only use it for page storage, 37 * all pages are wired). Since every buffer contains a contiguous range of 38 * pages, reusing the pages could be very painful. Fortunately voff_t is 39 * 64 bits, so we can just increment buf_page_offset all the time and ignore 40 * wraparound. Even if you reuse 4GB worth of buffers every second 41 * you'll still run out of time_t faster than buffers. 42 * 43 */ 44 voff_t buf_page_offset; 45 struct uvm_object *buf_object, buf_object_store; 46 47 vaddr_t buf_unmap(struct buf *); 48 49 void 50 buf_mem_init(vsize_t size) 51 { 52 TAILQ_INIT(&buf_valist); 53 54 buf_kva_start = vm_map_min(kernel_map); 55 if (uvm_map(kernel_map, &buf_kva_start, size, NULL, 56 UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(UVM_PROT_NONE, 57 UVM_PROT_NONE, UVM_INH_NONE, UVM_ADV_NORMAL, 0))) 58 panic("bufinit: can't reserve VM for buffers"); 59 buf_kva_end = buf_kva_start + size; 60 61 /* Contiguous mapping */ 62 bcstats.kvaslots = bcstats.kvaslots_avail = size / MAXPHYS; 63 64 buf_object = &buf_object_store; 65 66 uvm_objinit(buf_object, NULL, 1); 67 } 68 69 /* 70 * buf_acquire and buf_release manage the kvm mappings of buffers. 71 */ 72 void 73 buf_acquire(struct buf *bp) 74 { 75 KASSERT((bp->b_flags & B_BUSY) == 0); 76 splassert(IPL_BIO); 77 /* 78 * Busy before waiting for kvm. 79 */ 80 SET(bp->b_flags, B_BUSY); 81 buf_map(bp); 82 } 83 84 /* 85 * Acquire a buf but do not map it. Preserve any mapping it did have. 86 */ 87 void 88 buf_acquire_nomap(struct buf *bp) 89 { 90 splassert(IPL_BIO); 91 SET(bp->b_flags, B_BUSY); 92 if (bp->b_data != NULL) { 93 TAILQ_REMOVE(&buf_valist, bp, b_valist); 94 bcstats.kvaslots_avail--; 95 bcstats.busymapped++; 96 } 97 } 98 99 void 100 buf_map(struct buf *bp) 101 { 102 vaddr_t va; 103 104 splassert(IPL_BIO); 105 106 if (bp->b_data == NULL) { 107 unsigned long i; 108 109 /* 110 * First, just use the pre-allocated space until we run out. 111 */ 112 if (buf_kva_start < buf_kva_end) { 113 va = buf_kva_start; 114 buf_kva_start += MAXPHYS; 115 bcstats.kvaslots_avail--; 116 } else { 117 struct buf *vbp; 118 119 /* 120 * Find some buffer we can steal the space from. 121 */ 122 vbp = TAILQ_FIRST(&buf_valist); 123 while ((curproc != syncerproc && 124 curproc != cleanerproc && 125 bcstats.kvaslots_avail <= RESERVE_SLOTS) || 126 vbp == NULL) { 127 buf_needva++; 128 tsleep(&buf_needva, PRIBIO, "buf_needva", 0); 129 vbp = TAILQ_FIRST(&buf_valist); 130 } 131 va = buf_unmap(vbp); 132 } 133 134 for (i = 0; i < atop(bp->b_bufsize); i++) { 135 struct vm_page *pg = uvm_pagelookup(bp->b_pobj, 136 bp->b_poffs + ptoa(i)); 137 138 KASSERT(pg != NULL); 139 140 pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg), 141 VM_PROT_READ|VM_PROT_WRITE); 142 pmap_update(pmap_kernel()); 143 } 144 bp->b_data = (caddr_t)va; 145 } else { 146 TAILQ_REMOVE(&buf_valist, bp, b_valist); 147 bcstats.kvaslots_avail--; 148 } 149 150 bcstats.busymapped++; 151 } 152 153 void 154 buf_release(struct buf *bp) 155 { 156 157 KASSERT(bp->b_flags & B_BUSY); 158 splassert(IPL_BIO); 159 160 if (bp->b_data) { 161 bcstats.busymapped--; 162 TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist); 163 bcstats.kvaslots_avail++; 164 if (buf_needva) { 165 buf_needva=0; 166 wakeup(&buf_needva); 167 } 168 } 169 CLR(bp->b_flags, B_BUSY); 170 } 171 172 /* 173 * Deallocate all memory resources for this buffer. We need to be careful 174 * to not drop kvm since we have no way to reclaim it. So, if the buffer 175 * has kvm, we need to free it later. We put it on the front of the 176 * freelist just so it gets picked up faster. 177 * 178 * Also, lots of assertions count on bp->b_data being NULL, so we 179 * set it temporarily to NULL. 180 * 181 * Return non-zero if we take care of the freeing later. 182 */ 183 int 184 buf_dealloc_mem(struct buf *bp) 185 { 186 caddr_t data; 187 188 splassert(IPL_BIO); 189 190 data = bp->b_data; 191 bp->b_data = NULL; 192 193 if (data) { 194 if (bp->b_flags & B_BUSY) 195 bcstats.busymapped--; 196 pmap_kremove((vaddr_t)data, bp->b_bufsize); 197 pmap_update(pmap_kernel()); 198 } 199 200 if (bp->b_pobj) 201 buf_free_pages(bp); 202 203 if (data == NULL) 204 return (0); 205 206 bp->b_data = data; 207 if (!(bp->b_flags & B_BUSY)) { /* XXX - need better test */ 208 TAILQ_REMOVE(&buf_valist, bp, b_valist); 209 bcstats.kvaslots_avail--; 210 } else { 211 CLR(bp->b_flags, B_BUSY); 212 if (buf_needva) { 213 buf_needva = 0; 214 wakeup(&buf_needva); 215 } 216 } 217 SET(bp->b_flags, B_RELEASED); 218 TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist); 219 bcstats.kvaslots_avail++; 220 221 return (1); 222 } 223 224 /* 225 * Only used by bread_cluster. 226 */ 227 void 228 buf_fix_mapping(struct buf *bp, vsize_t newsize) 229 { 230 vaddr_t va = (vaddr_t)bp->b_data; 231 232 if (newsize < bp->b_bufsize) { 233 pmap_kremove(va + newsize, bp->b_bufsize - newsize); 234 pmap_update(pmap_kernel()); 235 /* 236 * Note: the size we lost is actually with the other 237 * buffers read in by bread_cluster 238 */ 239 bp->b_bufsize = newsize; 240 } 241 } 242 243 vaddr_t 244 buf_unmap(struct buf *bp) 245 { 246 vaddr_t va; 247 248 KASSERT((bp->b_flags & B_BUSY) == 0); 249 KASSERT(bp->b_data != NULL); 250 splassert(IPL_BIO); 251 252 TAILQ_REMOVE(&buf_valist, bp, b_valist); 253 bcstats.kvaslots_avail--; 254 va = (vaddr_t)bp->b_data; 255 bp->b_data = 0; 256 pmap_kremove(va, bp->b_bufsize); 257 pmap_update(pmap_kernel()); 258 259 if (bp->b_flags & B_RELEASED) 260 pool_put(&bufpool, bp); 261 262 return (va); 263 } 264 265 /* Always allocates in dma-reachable memory */ 266 void 267 buf_alloc_pages(struct buf *bp, vsize_t size) 268 { 269 voff_t offs; 270 271 KASSERT(size == round_page(size)); 272 KASSERT(bp->b_pobj == NULL); 273 KASSERT(bp->b_data == NULL); 274 splassert(IPL_BIO); 275 276 offs = buf_page_offset; 277 buf_page_offset += size; 278 279 KASSERT(buf_page_offset > 0); 280 281 uvm_pagealloc_multi(buf_object, offs, size, UVM_PLA_WAITOK); 282 bcstats.numbufpages += atop(size); 283 bp->b_pobj = buf_object; 284 bp->b_poffs = offs; 285 bp->b_bufsize = size; 286 } 287 288 void 289 buf_free_pages(struct buf *bp) 290 { 291 struct uvm_object *uobj = bp->b_pobj; 292 struct vm_page *pg; 293 voff_t off, i; 294 295 KASSERT(bp->b_data == NULL); 296 KASSERT(uobj != NULL); 297 splassert(IPL_BIO); 298 299 off = bp->b_poffs; 300 bp->b_pobj = NULL; 301 bp->b_poffs = 0; 302 303 for (i = 0; i < atop(bp->b_bufsize); i++) { 304 pg = uvm_pagelookup(uobj, off + ptoa(i)); 305 KASSERT(pg != NULL); 306 KASSERT(pg->wire_count == 1); 307 pg->wire_count = 0; 308 uvm_pagefree(pg); 309 bcstats.numbufpages--; 310 } 311 } 312 313 /* 314 * XXX - it might make sense to make a buf_realloc_pages to avoid 315 * bouncing through the free list all the time. 316 */ 317