1 /* $OpenBSD: vfs_biomem.c,v 1.36 2016/04/28 13:13:02 beck Exp $ */ 2 3 /* 4 * Copyright (c) 2007 Artur Grabowski <art@openbsd.org> 5 * Copyright (c) 2012-2016 Bob Beck <beck@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/buf.h> 24 #include <sys/pool.h> 25 #include <sys/proc.h> /* XXX for atomic */ 26 #include <sys/mount.h> 27 28 #include <uvm/uvm_extern.h> 29 30 vaddr_t buf_kva_start, buf_kva_end; 31 int buf_needva; 32 TAILQ_HEAD(,buf) buf_valist; 33 34 extern struct bcachestats bcstats; 35 36 /* 37 * Pages are allocated from a uvm object (we only use it for page storage, 38 * all pages are wired). Since every buffer contains a contiguous range of 39 * pages, reusing the pages could be very painful. Fortunately voff_t is 40 * 64 bits, so we can just increment buf_page_offset all the time and ignore 41 * wraparound. Even if you reuse 4GB worth of buffers every second 42 * you'll still run out of time_t faster than buffers. 43 * 44 */ 45 voff_t buf_page_offset; 46 struct uvm_object *buf_object, buf_object_store; 47 48 vaddr_t buf_unmap(struct buf *); 49 50 void 51 buf_mem_init(vsize_t size) 52 { 53 TAILQ_INIT(&buf_valist); 54 55 buf_kva_start = vm_map_min(kernel_map); 56 if (uvm_map(kernel_map, &buf_kva_start, size, NULL, 57 UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(PROT_NONE, 58 PROT_NONE, MAP_INHERIT_NONE, MADV_NORMAL, 0))) 59 panic("bufinit: can't reserve VM for buffers"); 60 buf_kva_end = buf_kva_start + size; 61 62 /* Contiguous mapping */ 63 bcstats.kvaslots = bcstats.kvaslots_avail = size / MAXPHYS; 64 65 buf_object = &buf_object_store; 66 67 uvm_objinit(buf_object, NULL, 1); 68 } 69 70 /* 71 * buf_acquire and buf_release manage the kvm mappings of buffers. 72 */ 73 void 74 buf_acquire(struct buf *bp) 75 { 76 KASSERT((bp->b_flags & B_BUSY) == 0); 77 splassert(IPL_BIO); 78 /* 79 * Busy before waiting for kvm. 80 */ 81 SET(bp->b_flags, B_BUSY); 82 buf_map(bp); 83 } 84 85 /* 86 * Acquire a buf but do not map it. Preserve any mapping it did have. 87 */ 88 void 89 buf_acquire_nomap(struct buf *bp) 90 { 91 splassert(IPL_BIO); 92 SET(bp->b_flags, B_BUSY); 93 if (bp->b_data != NULL) { 94 TAILQ_REMOVE(&buf_valist, bp, b_valist); 95 bcstats.kvaslots_avail--; 96 bcstats.busymapped++; 97 } 98 } 99 100 void 101 buf_map(struct buf *bp) 102 { 103 vaddr_t va; 104 105 splassert(IPL_BIO); 106 107 if (bp->b_data == NULL) { 108 unsigned long i; 109 110 /* 111 * First, just use the pre-allocated space until we run out. 112 */ 113 if (buf_kva_start < buf_kva_end) { 114 va = buf_kva_start; 115 buf_kva_start += MAXPHYS; 116 bcstats.kvaslots_avail--; 117 } else { 118 struct buf *vbp; 119 120 /* 121 * Find some buffer we can steal the space from. 122 */ 123 vbp = TAILQ_FIRST(&buf_valist); 124 while ((curproc != syncerproc && 125 curproc != cleanerproc && 126 bcstats.kvaslots_avail <= RESERVE_SLOTS) || 127 vbp == NULL) { 128 buf_needva++; 129 tsleep(&buf_needva, PRIBIO, "buf_needva", 0); 130 vbp = TAILQ_FIRST(&buf_valist); 131 } 132 va = buf_unmap(vbp); 133 } 134 135 for (i = 0; i < atop(bp->b_bufsize); i++) { 136 struct vm_page *pg = uvm_pagelookup(bp->b_pobj, 137 bp->b_poffs + ptoa(i)); 138 139 KASSERT(pg != NULL); 140 141 pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg), 142 PROT_READ | PROT_WRITE); 143 } 144 pmap_update(pmap_kernel()); 145 bp->b_data = (caddr_t)va; 146 } else { 147 TAILQ_REMOVE(&buf_valist, bp, b_valist); 148 bcstats.kvaslots_avail--; 149 } 150 151 bcstats.busymapped++; 152 } 153 154 void 155 buf_release(struct buf *bp) 156 { 157 158 KASSERT(bp->b_flags & B_BUSY); 159 splassert(IPL_BIO); 160 161 if (bp->b_data) { 162 bcstats.busymapped--; 163 TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist); 164 bcstats.kvaslots_avail++; 165 if (buf_needva) { 166 buf_needva=0; 167 wakeup(&buf_needva); 168 } 169 } 170 CLR(bp->b_flags, B_BUSY); 171 } 172 173 /* 174 * Deallocate all memory resources for this buffer. We need to be careful 175 * to not drop kvm since we have no way to reclaim it. So, if the buffer 176 * has kvm, we need to free it later. We put it on the front of the 177 * freelist just so it gets picked up faster. 178 * 179 * Also, lots of assertions count on bp->b_data being NULL, so we 180 * set it temporarily to NULL. 181 * 182 * Return non-zero if we take care of the freeing later. 183 */ 184 int 185 buf_dealloc_mem(struct buf *bp) 186 { 187 caddr_t data; 188 189 splassert(IPL_BIO); 190 191 data = bp->b_data; 192 bp->b_data = NULL; 193 194 if (data) { 195 if (bp->b_flags & B_BUSY) 196 bcstats.busymapped--; 197 pmap_kremove((vaddr_t)data, bp->b_bufsize); 198 pmap_update(pmap_kernel()); 199 } 200 201 if (bp->b_pobj) 202 buf_free_pages(bp); 203 204 if (data == NULL) 205 return (0); 206 207 bp->b_data = data; 208 if (!(bp->b_flags & B_BUSY)) { /* XXX - need better test */ 209 TAILQ_REMOVE(&buf_valist, bp, b_valist); 210 bcstats.kvaslots_avail--; 211 } else { 212 CLR(bp->b_flags, B_BUSY); 213 if (buf_needva) { 214 buf_needva = 0; 215 wakeup(&buf_needva); 216 } 217 } 218 SET(bp->b_flags, B_RELEASED); 219 TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist); 220 bcstats.kvaslots_avail++; 221 222 return (1); 223 } 224 225 /* 226 * Only used by bread_cluster. 227 */ 228 void 229 buf_fix_mapping(struct buf *bp, vsize_t newsize) 230 { 231 vaddr_t va = (vaddr_t)bp->b_data; 232 233 if (newsize < bp->b_bufsize) { 234 pmap_kremove(va + newsize, bp->b_bufsize - newsize); 235 pmap_update(pmap_kernel()); 236 /* 237 * Note: the size we lost is actually with the other 238 * buffers read in by bread_cluster 239 */ 240 bp->b_bufsize = newsize; 241 } 242 } 243 244 vaddr_t 245 buf_unmap(struct buf *bp) 246 { 247 vaddr_t va; 248 249 KASSERT((bp->b_flags & B_BUSY) == 0); 250 KASSERT(bp->b_data != NULL); 251 splassert(IPL_BIO); 252 253 TAILQ_REMOVE(&buf_valist, bp, b_valist); 254 bcstats.kvaslots_avail--; 255 va = (vaddr_t)bp->b_data; 256 bp->b_data = 0; 257 pmap_kremove(va, bp->b_bufsize); 258 pmap_update(pmap_kernel()); 259 260 if (bp->b_flags & B_RELEASED) 261 pool_put(&bufpool, bp); 262 263 return (va); 264 } 265 266 /* Always allocates in dma-reachable memory */ 267 void 268 buf_alloc_pages(struct buf *bp, vsize_t size) 269 { 270 voff_t offs; 271 int i; 272 273 KASSERT(size == round_page(size)); 274 KASSERT(bp->b_pobj == NULL); 275 KASSERT(bp->b_data == NULL); 276 splassert(IPL_BIO); 277 278 offs = buf_page_offset; 279 buf_page_offset += size; 280 281 KASSERT(buf_page_offset > 0); 282 283 /* 284 * Attempt to allocate with NOWAIT. if we can't, then throw 285 * away some clean pages and try again. Finally, if that 286 * fails, do a WAITOK allocation so the page daemon can find 287 * memory for us. 288 */ 289 do { 290 i = uvm_pagealloc_multi(buf_object, offs, size, 291 UVM_PLA_NOWAIT); 292 if (i == 0) 293 break; 294 } while (bufbackoff(&dma_constraint, 100) == 0); 295 if (i != 0) 296 i = uvm_pagealloc_multi(buf_object, offs, size, 297 UVM_PLA_WAITOK); 298 /* should not happen */ 299 if (i != 0) 300 panic("uvm_pagealloc_multi unable to allocate an buf_object of size %lu", size); 301 bcstats.numbufpages += atop(size); 302 bcstats.dmapages += atop(size); 303 SET(bp->b_flags, B_DMA); 304 bp->b_pobj = buf_object; 305 bp->b_poffs = offs; 306 bp->b_bufsize = size; 307 } 308 309 void 310 buf_free_pages(struct buf *bp) 311 { 312 struct uvm_object *uobj = bp->b_pobj; 313 struct vm_page *pg; 314 voff_t off, i; 315 316 KASSERT(bp->b_data == NULL); 317 KASSERT(uobj != NULL); 318 splassert(IPL_BIO); 319 320 off = bp->b_poffs; 321 bp->b_pobj = NULL; 322 bp->b_poffs = 0; 323 324 for (i = 0; i < atop(bp->b_bufsize); i++) { 325 pg = uvm_pagelookup(uobj, off + ptoa(i)); 326 KASSERT(pg != NULL); 327 KASSERT(pg->wire_count == 1); 328 pg->wire_count = 0; 329 uvm_pagefree(pg); 330 bcstats.numbufpages--; 331 if (ISSET(bp->b_flags, B_DMA)) 332 bcstats.dmapages--; 333 } 334 CLR(bp->b_flags, B_DMA); 335 } 336 337 /* Reallocate a buf into a particular pmem range specified by "where". */ 338 int 339 buf_realloc_pages(struct buf *bp, struct uvm_constraint_range *where, 340 int flags) 341 { 342 vaddr_t va; 343 int dma; 344 int i, r; 345 KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT)); 346 347 splassert(IPL_BIO); 348 KASSERT(ISSET(bp->b_flags, B_BUSY)); 349 dma = ISSET(bp->b_flags, B_DMA); 350 351 /* if the original buf is mapped, unmap it */ 352 if (bp->b_data != NULL) { 353 va = (vaddr_t)bp->b_data; 354 pmap_kremove(va, bp->b_bufsize); 355 pmap_update(pmap_kernel()); 356 } 357 358 do { 359 r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs, 360 bp->b_bufsize, UVM_PLA_NOWAIT, where); 361 if (r == 0) 362 break; 363 } while ((bufbackoff(where, 100) == 0) && (flags & UVM_PLA_WAITOK)); 364 if (r != 0 && !(flags & UVM_PLA_NOWAIT)) 365 r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs, 366 bp->b_bufsize, flags, where); 367 368 /* 369 * If the allocation has succeeded, we may be somewhere different. 370 * If the allocation has failed, we are in the same place. 371 * 372 * We still have to re-map the buffer before returning. 373 */ 374 375 /* take it out of dma stats until we know where we are */ 376 if (dma) 377 bcstats.dmapages -= atop(bp->b_bufsize); 378 379 dma = 1; 380 /* if the original buf was mapped, re-map it */ 381 for (i = 0; i < atop(bp->b_bufsize); i++) { 382 struct vm_page *pg = uvm_pagelookup(bp->b_pobj, 383 bp->b_poffs + ptoa(i)); 384 KASSERT(pg != NULL); 385 if (!PADDR_IS_DMA_REACHABLE(VM_PAGE_TO_PHYS(pg))) 386 dma = 0; 387 if (bp->b_data != NULL) { 388 pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg), 389 PROT_READ|PROT_WRITE); 390 pmap_update(pmap_kernel()); 391 } 392 } 393 if (dma) { 394 SET(bp->b_flags, B_DMA); 395 bcstats.dmapages += atop(bp->b_bufsize); 396 } else 397 CLR(bp->b_flags, B_DMA); 398 return(r); 399 } 400