1 /* $OpenBSD: vfs_biomem.c,v 1.18 2011/09/19 14:48:04 beck Exp $ */ 2 /* 3 * Copyright (c) 2007 Artur Grabowski <art@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/buf.h> 22 #include <sys/pool.h> 23 #include <sys/proc.h> /* XXX for atomic */ 24 #include <sys/mount.h> 25 26 #include <uvm/uvm_extern.h> 27 #include <uvm/uvm.h> 28 29 vaddr_t buf_kva_start, buf_kva_end; 30 int buf_needva; 31 TAILQ_HEAD(,buf) buf_valist; 32 33 int buf_nkvmsleep; 34 35 extern struct bcachestats bcstats; 36 37 /* 38 * Pages are allocated from a uvm object (we only use it for page storage, 39 * all pages are wired). Since every buffer contains a contiguous range of 40 * pages, reusing the pages could be very painful. Fortunately voff_t is 41 * 64 bits, so we can just increment buf_page_offset all the time and ignore 42 * wraparound. Even if you reuse 4GB worth of buffers every second 43 * you'll still run out of time_t faster than buffers. 44 * 45 * XXX - the spl locking in here is extreme paranoia right now until I figure 46 * it all out. 47 */ 48 voff_t buf_page_offset; 49 struct uvm_object *buf_object, buf_object_store; 50 51 vaddr_t buf_unmap(struct buf *); 52 53 void 54 buf_mem_init(vsize_t size) 55 { 56 TAILQ_INIT(&buf_valist); 57 58 buf_kva_start = vm_map_min(kernel_map); 59 if (uvm_map(kernel_map, &buf_kva_start, size, NULL, 60 UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(UVM_PROT_NONE, 61 UVM_PROT_NONE, UVM_INH_NONE, UVM_ADV_NORMAL, 0))) 62 panic("bufinit: can't reserve VM for buffers"); 63 buf_kva_end = buf_kva_start + size; 64 65 /* Contiguous mapping */ 66 bcstats.kvaslots = bcstats.kvaslots_avail = size / MAXPHYS; 67 68 buf_object = &buf_object_store; 69 70 uvm_objinit(buf_object, NULL, 1); 71 } 72 73 /* 74 * buf_acquire and buf_release manage the kvm mappings of buffers. 75 */ 76 void 77 buf_acquire(struct buf *bp) 78 { 79 int s; 80 81 KASSERT((bp->b_flags & B_BUSY) == 0); 82 83 s = splbio(); 84 /* 85 * Busy before waiting for kvm. 86 */ 87 SET(bp->b_flags, B_BUSY); 88 buf_map(bp); 89 90 splx(s); 91 } 92 93 /* 94 * Busy a buffer, but don't map it. 95 * If it has a mapping, we keep it, but we also keep the mapping on 96 * the list since we assume that it won't be used anymore. 97 */ 98 void 99 buf_acquire_unmapped(struct buf *bp) 100 { 101 int s; 102 103 s = splbio(); 104 SET(bp->b_flags, B_BUSY|B_NOTMAPPED); 105 splx(s); 106 } 107 108 void 109 buf_map(struct buf *bp) 110 { 111 vaddr_t va; 112 113 splassert(IPL_BIO); 114 115 if (bp->b_data == NULL) { 116 unsigned long i; 117 118 /* 119 * First, just use the pre-allocated space until we run out. 120 */ 121 if (buf_kva_start < buf_kva_end) { 122 va = buf_kva_start; 123 buf_kva_start += MAXPHYS; 124 bcstats.kvaslots_avail--; 125 } else { 126 struct buf *vbp; 127 128 /* 129 * Find some buffer we can steal the space from. 130 */ 131 while ((vbp = TAILQ_FIRST(&buf_valist)) == NULL) { 132 buf_needva++; 133 buf_nkvmsleep++; 134 tsleep(&buf_needva, PRIBIO, "buf_needva", 0); 135 } 136 va = buf_unmap(vbp); 137 } 138 139 for (i = 0; i < atop(bp->b_bufsize); i++) { 140 struct vm_page *pg = uvm_pagelookup(bp->b_pobj, 141 bp->b_poffs + ptoa(i)); 142 143 KASSERT(pg != NULL); 144 145 pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg), 146 VM_PROT_READ|VM_PROT_WRITE); 147 pmap_update(pmap_kernel()); 148 } 149 bp->b_data = (caddr_t)va; 150 } else { 151 TAILQ_REMOVE(&buf_valist, bp, b_valist); 152 bcstats.kvaslots_avail--; 153 } 154 155 bcstats.busymapped++; 156 157 CLR(bp->b_flags, B_NOTMAPPED); 158 } 159 160 void 161 buf_release(struct buf *bp) 162 { 163 int s; 164 165 KASSERT(bp->b_flags & B_BUSY); 166 KASSERT((bp->b_data != NULL) || (bp->b_flags & B_NOTMAPPED)); 167 168 s = splbio(); 169 if (bp->b_data) { 170 bcstats.busymapped--; 171 TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist); 172 bcstats.kvaslots_avail++; 173 if (buf_needva) { 174 buf_needva--; 175 wakeup_one(&buf_needva); 176 } 177 } 178 CLR(bp->b_flags, B_BUSY|B_NOTMAPPED); 179 splx(s); 180 } 181 182 /* 183 * Deallocate all memory resources for this buffer. We need to be careful 184 * to not drop kvm since we have no way to reclaim it. So, if the buffer 185 * has kvm, we need to free it later. We put it on the front of the 186 * freelist just so it gets picked up faster. 187 * 188 * Also, lots of assertions count on bp->b_data being NULL, so we 189 * set it temporarily to NULL. 190 * 191 * Return non-zero if we take care of the freeing later. 192 */ 193 int 194 buf_dealloc_mem(struct buf *bp) 195 { 196 caddr_t data; 197 int s; 198 199 s = splbio(); 200 201 data = bp->b_data; 202 bp->b_data = NULL; 203 204 if (data) { 205 if (bp->b_flags & B_BUSY) 206 bcstats.busymapped--; 207 pmap_kremove((vaddr_t)data, bp->b_bufsize); 208 pmap_update(pmap_kernel()); 209 } 210 211 if (bp->b_pobj) 212 buf_free_pages(bp); 213 214 if (data == NULL) { 215 splx(s); 216 return (0); 217 } 218 219 bp->b_data = data; 220 if (!(bp->b_flags & B_BUSY)) { /* XXX - need better test */ 221 TAILQ_REMOVE(&buf_valist, bp, b_valist); 222 bcstats.kvaslots_avail--; 223 } else 224 CLR(bp->b_flags, B_BUSY); 225 SET(bp->b_flags, B_RELEASED); 226 TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist); 227 bcstats.kvaslots_avail++; 228 splx(s); 229 230 return (1); 231 } 232 233 /* 234 * Only used by bread_cluster. 235 */ 236 void 237 buf_fix_mapping(struct buf *bp, vsize_t newsize) 238 { 239 vaddr_t va = (vaddr_t)bp->b_data; 240 241 if (newsize < bp->b_bufsize) { 242 pmap_kremove(va + newsize, bp->b_bufsize - newsize); 243 pmap_update(pmap_kernel()); 244 /* 245 * Note: the size we lost is actually with the other 246 * buffers read in by bread_cluster 247 */ 248 bp->b_bufsize = newsize; 249 } 250 } 251 252 vaddr_t 253 buf_unmap(struct buf *bp) 254 { 255 vaddr_t va; 256 int s; 257 258 KASSERT((bp->b_flags & B_BUSY) == 0); 259 KASSERT(bp->b_data != NULL); 260 261 s = splbio(); 262 TAILQ_REMOVE(&buf_valist, bp, b_valist); 263 bcstats.kvaslots_avail--; 264 va = (vaddr_t)bp->b_data; 265 bp->b_data = 0; 266 pmap_kremove(va, bp->b_bufsize); 267 pmap_update(pmap_kernel()); 268 269 if (bp->b_flags & B_RELEASED) 270 pool_put(&bufpool, bp); 271 272 splx(s); 273 274 return (va); 275 } 276 277 /* Always allocates in dma-reachable memory */ 278 void 279 buf_alloc_pages(struct buf *bp, vsize_t size) 280 { 281 voff_t offs; 282 int s; 283 284 KASSERT(size == round_page(size)); 285 KASSERT(bp->b_pobj == NULL); 286 KASSERT(bp->b_data == NULL); 287 288 s = splbio(); 289 290 offs = buf_page_offset; 291 buf_page_offset += size; 292 293 KASSERT(buf_page_offset > 0); 294 295 uvm_pagealloc_multi(buf_object, offs, size, UVM_PLA_WAITOK); 296 bcstats.numbufpages += atop(size); 297 bp->b_pobj = buf_object; 298 bp->b_poffs = offs; 299 bp->b_bufsize = size; 300 splx(s); 301 } 302 303 void 304 buf_free_pages(struct buf *bp) 305 { 306 struct uvm_object *uobj = bp->b_pobj; 307 struct vm_page *pg; 308 voff_t off, i; 309 int s; 310 311 KASSERT(bp->b_data == NULL); 312 KASSERT(uobj != NULL); 313 314 s = splbio(); 315 316 off = bp->b_poffs; 317 bp->b_pobj = NULL; 318 bp->b_poffs = 0; 319 320 for (i = 0; i < atop(bp->b_bufsize); i++) { 321 pg = uvm_pagelookup(uobj, off + ptoa(i)); 322 KASSERT(pg != NULL); 323 KASSERT(pg->wire_count == 1); 324 pg->wire_count = 0; 325 uvm_pagefree(pg); 326 bcstats.numbufpages--; 327 } 328 splx(s); 329 } 330 331 /* 332 * XXX - it might make sense to make a buf_realloc_pages to avoid 333 * bouncing through the free list all the time. 334 */ 335