1 /* $OpenBSD: vfs_biomem.c,v 1.12 2009/08/09 17:45:02 art Exp $ */ 2 /* 3 * Copyright (c) 2007 Artur Grabowski <art@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/buf.h> 22 #include <sys/pool.h> 23 #include <sys/mount.h> 24 25 #include <uvm/uvm_extern.h> 26 #include <uvm/uvm.h> 27 28 vaddr_t buf_kva_start, buf_kva_end; 29 int buf_needva; 30 TAILQ_HEAD(,buf) buf_valist; 31 32 int buf_nkvmsleep; 33 34 extern struct bcachestats bcstats; 35 36 /* 37 * Pages are allocated from a uvm object (we only use it for page storage, 38 * all pages are wired). Since every buffer contains a contiguous range of 39 * pages, reusing the pages could be very painful. Fortunately voff_t is 40 * 64 bits, so we can just increment buf_page_offset all the time and ignore 41 * wraparound. Even if you reuse 4GB worth of buffers every second 42 * you'll still run out of time_t faster than buffers. 43 * 44 * XXX - the spl locking in here is extreme paranoia right now until I figure 45 * it all out. 46 */ 47 voff_t buf_page_offset; 48 struct uvm_object *buf_object, buf_object_store; 49 50 vaddr_t buf_unmap(struct buf *); 51 52 void 53 buf_mem_init(vsize_t size) 54 { 55 TAILQ_INIT(&buf_valist); 56 57 buf_kva_start = vm_map_min(kernel_map); 58 if (uvm_map(kernel_map, &buf_kva_start, size, NULL, 59 UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(UVM_PROT_NONE, 60 UVM_PROT_NONE, UVM_INH_NONE, UVM_ADV_NORMAL, 0))) 61 panic("bufinit: can't reserve VM for buffers"); 62 buf_kva_end = buf_kva_start + size; 63 64 buf_object = &buf_object_store; 65 66 buf_object->pgops = NULL; 67 RB_INIT(&buf_object->memt); 68 buf_object->uo_npages = 0; 69 buf_object->uo_refs = 1; 70 } 71 72 /* 73 * buf_acquire and buf_release manage the kvm mappings of buffers. 74 */ 75 void 76 buf_acquire(struct buf *bp) 77 { 78 int s; 79 80 KASSERT((bp->b_flags & B_BUSY) == 0); 81 82 s = splbio(); 83 /* 84 * Busy before waiting for kvm. 85 */ 86 SET(bp->b_flags, B_BUSY); 87 buf_map(bp); 88 89 splx(s); 90 } 91 92 /* 93 * Busy a buffer, but don't map it. 94 * If it has a mapping, we keep it, but we also keep the mapping on 95 * the list since we assume that it won't be used anymore. 96 */ 97 void 98 buf_acquire_unmapped(struct buf *bp) 99 { 100 int s; 101 102 s = splbio(); 103 SET(bp->b_flags, B_BUSY|B_NOTMAPPED); 104 splx(s); 105 } 106 107 void 108 buf_map(struct buf *bp) 109 { 110 vaddr_t va; 111 112 splassert(IPL_BIO); 113 114 if (bp->b_data == NULL) { 115 unsigned long i; 116 117 /* 118 * First, just use the pre-allocated space until we run out. 119 */ 120 if (buf_kva_start < buf_kva_end) { 121 va = buf_kva_start; 122 buf_kva_start += MAXPHYS; 123 } else { 124 struct buf *vbp; 125 126 /* 127 * Find some buffer we can steal the space from. 128 */ 129 while ((vbp = TAILQ_FIRST(&buf_valist)) == NULL) { 130 buf_needva++; 131 buf_nkvmsleep++; 132 tsleep(&buf_needva, PRIBIO, "buf_needva", 0); 133 } 134 va = buf_unmap(vbp); 135 } 136 137 for (i = 0; i < atop(bp->b_bufsize); i++) { 138 struct vm_page *pg = uvm_pagelookup(bp->b_pobj, 139 bp->b_poffs + ptoa(i)); 140 141 KASSERT(pg != NULL); 142 143 pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg), 144 VM_PROT_READ|VM_PROT_WRITE); 145 pmap_update(pmap_kernel()); 146 } 147 bp->b_data = (caddr_t)va; 148 } else { 149 TAILQ_REMOVE(&buf_valist, bp, b_valist); 150 } 151 152 bcstats.busymapped++; 153 154 CLR(bp->b_flags, B_NOTMAPPED); 155 } 156 157 void 158 buf_release(struct buf *bp) 159 { 160 int s; 161 162 KASSERT(bp->b_flags & B_BUSY); 163 KASSERT((bp->b_data != NULL) || (bp->b_flags & B_NOTMAPPED)); 164 165 s = splbio(); 166 if (bp->b_data) { 167 bcstats.busymapped--; 168 TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist); 169 if (buf_needva) { 170 buf_needva--; 171 wakeup_one(&buf_needva); 172 } 173 } 174 CLR(bp->b_flags, B_BUSY|B_NOTMAPPED); 175 splx(s); 176 } 177 178 /* 179 * Deallocate all memory resources for this buffer. We need to be careful 180 * to not drop kvm since we have no way to reclaim it. So, if the buffer 181 * has kvm, we need to free it later. We put it on the front of the 182 * freelist just so it gets picked up faster. 183 * 184 * Also, lots of assertions count on bp->b_data being NULL, so we 185 * set it temporarily to NULL. 186 * 187 * Return non-zero if we take care of the freeing later. 188 */ 189 int 190 buf_dealloc_mem(struct buf *bp) 191 { 192 caddr_t data; 193 int s; 194 195 s = splbio(); 196 197 data = bp->b_data; 198 bp->b_data = NULL; 199 200 if (data) { 201 if (bp->b_flags & B_BUSY) 202 bcstats.busymapped--; 203 pmap_kremove((vaddr_t)data, bp->b_bufsize); 204 pmap_update(pmap_kernel()); 205 } 206 207 if (bp->b_pobj) 208 buf_free_pages(bp); 209 210 if (data == NULL) { 211 splx(s); 212 return (0); 213 } 214 215 bp->b_data = data; 216 if (!(bp->b_flags & B_BUSY)) /* XXX - need better test */ 217 TAILQ_REMOVE(&buf_valist, bp, b_valist); 218 else 219 CLR(bp->b_flags, B_BUSY); 220 SET(bp->b_flags, B_RELEASED); 221 TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist); 222 223 splx(s); 224 225 return (1); 226 } 227 228 void 229 buf_shrink_mem(struct buf *bp, vsize_t newsize) 230 { 231 vaddr_t va = (vaddr_t)bp->b_data; 232 233 if (newsize < bp->b_bufsize) { 234 pmap_kremove(va + newsize, bp->b_bufsize - newsize); 235 pmap_update(pmap_kernel()); 236 bp->b_bufsize = newsize; 237 } 238 } 239 240 vaddr_t 241 buf_unmap(struct buf *bp) 242 { 243 vaddr_t va; 244 int s; 245 246 KASSERT((bp->b_flags & B_BUSY) == 0); 247 KASSERT(bp->b_data != NULL); 248 249 s = splbio(); 250 TAILQ_REMOVE(&buf_valist, bp, b_valist); 251 va = (vaddr_t)bp->b_data; 252 bp->b_data = 0; 253 pmap_kremove(va, bp->b_bufsize); 254 pmap_update(pmap_kernel()); 255 256 if (bp->b_flags & B_RELEASED) 257 pool_put(&bufpool, bp); 258 259 splx(s); 260 261 return (va); 262 } 263 264 void 265 buf_alloc_pages(struct buf *bp, vsize_t size) 266 { 267 struct vm_page *pg; 268 voff_t offs, i; 269 int s; 270 271 KASSERT(size == round_page(size)); 272 KASSERT(bp->b_pobj == NULL); 273 KASSERT(bp->b_data == NULL); 274 275 s = splbio(); 276 277 offs = buf_page_offset; 278 buf_page_offset += size; 279 280 KASSERT(buf_page_offset > 0); 281 282 for (i = 0; i < atop(size); i++) { 283 #if defined(DEBUG) || 1 284 if ((pg = uvm_pagelookup(buf_object, offs + ptoa(i)))) 285 panic("buf_alloc_pages: overlap buf: %p page: %p", 286 bp, pg); 287 #endif 288 289 while ((pg = uvm_pagealloc(buf_object, offs + ptoa(i), 290 NULL, 0)) == NULL) { 291 uvm_wait("buf_alloc_pages"); 292 } 293 pg->wire_count = 1; 294 atomic_clearbits_int(&pg->pg_flags, PG_BUSY); 295 bcstats.numbufpages++; 296 } 297 298 bp->b_pobj = buf_object; 299 bp->b_poffs = offs; 300 bp->b_bufsize = size; 301 splx(s); 302 } 303 304 void 305 buf_free_pages(struct buf *bp) 306 { 307 struct uvm_object *uobj = bp->b_pobj; 308 struct vm_page *pg; 309 voff_t off, i; 310 int s; 311 312 KASSERT(bp->b_data == NULL); 313 KASSERT(uobj != NULL); 314 315 s = splbio(); 316 317 off = bp->b_poffs; 318 bp->b_pobj = NULL; 319 bp->b_poffs = 0; 320 321 for (i = 0; i < atop(bp->b_bufsize); i++) { 322 pg = uvm_pagelookup(uobj, off + ptoa(i)); 323 KASSERT(pg != NULL); 324 KASSERT(pg->wire_count == 1); 325 pg->wire_count = 0; 326 uvm_pagefree(pg); 327 bcstats.numbufpages--; 328 } 329 splx(s); 330 } 331 332 /* 333 * XXX - it might make sense to make a buf_realloc_pages to avoid 334 * bouncing through the free list all the time. 335 */ 336