1 /* $OpenBSD: vfs_biomem.c,v 1.39 2018/03/29 01:43:41 mlarkin Exp $ */ 2 3 /* 4 * Copyright (c) 2007 Artur Grabowski <art@openbsd.org> 5 * Copyright (c) 2012-2016 Bob Beck <beck@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/buf.h> 24 #include <sys/pool.h> 25 #include <sys/proc.h> /* XXX for atomic */ 26 #include <sys/mount.h> 27 28 #include <uvm/uvm_extern.h> 29 30 vaddr_t buf_kva_start, buf_kva_end; 31 int buf_needva; 32 TAILQ_HEAD(,buf) buf_valist; 33 34 extern struct bcachestats bcstats; 35 36 /* 37 * Pages are allocated from a uvm object (we only use it for page storage, 38 * all pages are wired). Since every buffer contains a contiguous range of 39 * pages, reusing the pages could be very painful. Fortunately voff_t is 40 * 64 bits, so we can just increment buf_page_offset all the time and ignore 41 * wraparound. Even if you reuse 4GB worth of buffers every second 42 * you'll still run out of time_t faster than buffers. 43 * 44 */ 45 voff_t buf_page_offset; 46 struct uvm_object *buf_object, buf_object_store; 47 48 vaddr_t buf_unmap(struct buf *); 49 50 void 51 buf_mem_init(vsize_t size) 52 { 53 TAILQ_INIT(&buf_valist); 54 55 buf_kva_start = vm_map_min(kernel_map); 56 if (uvm_map(kernel_map, &buf_kva_start, size, NULL, 57 UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(PROT_NONE, 58 PROT_NONE, MAP_INHERIT_NONE, MADV_NORMAL, 0))) 59 panic("bufinit: can't reserve VM for buffers"); 60 buf_kva_end = buf_kva_start + size; 61 62 /* Contiguous mapping */ 63 bcstats.kvaslots = bcstats.kvaslots_avail = size / MAXPHYS; 64 65 buf_object = &buf_object_store; 66 67 uvm_objinit(buf_object, NULL, 1); 68 } 69 70 /* 71 * buf_acquire and buf_release manage the kvm mappings of buffers. 72 */ 73 void 74 buf_acquire(struct buf *bp) 75 { 76 KASSERT((bp->b_flags & B_BUSY) == 0); 77 splassert(IPL_BIO); 78 /* 79 * Busy before waiting for kvm. 80 */ 81 SET(bp->b_flags, B_BUSY); 82 buf_map(bp); 83 } 84 85 /* 86 * Acquire a buf but do not map it. Preserve any mapping it did have. 87 */ 88 void 89 buf_acquire_nomap(struct buf *bp) 90 { 91 splassert(IPL_BIO); 92 SET(bp->b_flags, B_BUSY); 93 if (bp->b_data != NULL) { 94 TAILQ_REMOVE(&buf_valist, bp, b_valist); 95 bcstats.kvaslots_avail--; 96 bcstats.busymapped++; 97 } 98 } 99 100 void 101 buf_map(struct buf *bp) 102 { 103 vaddr_t va; 104 105 splassert(IPL_BIO); 106 107 if (bp->b_data == NULL) { 108 unsigned long i; 109 110 /* 111 * First, just use the pre-allocated space until we run out. 112 */ 113 if (buf_kva_start < buf_kva_end) { 114 va = buf_kva_start; 115 buf_kva_start += MAXPHYS; 116 bcstats.kvaslots_avail--; 117 } else { 118 struct buf *vbp; 119 120 /* 121 * Find some buffer we can steal the space from. 122 */ 123 vbp = TAILQ_FIRST(&buf_valist); 124 while ((curproc != syncerproc && 125 curproc != cleanerproc && 126 bcstats.kvaslots_avail <= RESERVE_SLOTS) || 127 vbp == NULL) { 128 buf_needva++; 129 tsleep(&buf_needva, PRIBIO, "buf_needva", 0); 130 vbp = TAILQ_FIRST(&buf_valist); 131 } 132 va = buf_unmap(vbp); 133 } 134 135 for (i = 0; i < atop(bp->b_bufsize); i++) { 136 struct vm_page *pg = uvm_pagelookup(bp->b_pobj, 137 bp->b_poffs + ptoa(i)); 138 139 KASSERT(pg != NULL); 140 141 pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg), 142 PROT_READ | PROT_WRITE); 143 } 144 pmap_update(pmap_kernel()); 145 bp->b_data = (caddr_t)va; 146 } else { 147 TAILQ_REMOVE(&buf_valist, bp, b_valist); 148 bcstats.kvaslots_avail--; 149 } 150 151 bcstats.busymapped++; 152 } 153 154 void 155 buf_release(struct buf *bp) 156 { 157 158 KASSERT(bp->b_flags & B_BUSY); 159 splassert(IPL_BIO); 160 161 if (bp->b_data) { 162 bcstats.busymapped--; 163 TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist); 164 bcstats.kvaslots_avail++; 165 if (buf_needva) { 166 buf_needva=0; 167 wakeup(&buf_needva); 168 } 169 } 170 CLR(bp->b_flags, B_BUSY); 171 } 172 173 /* 174 * Deallocate all memory resources for this buffer. We need to be careful 175 * to not drop kvm since we have no way to reclaim it. So, if the buffer 176 * has kvm, we need to free it later. We put it on the front of the 177 * freelist just so it gets picked up faster. 178 * 179 * Also, lots of assertions count on bp->b_data being NULL, so we 180 * set it temporarily to NULL. 181 * 182 * Return non-zero if we take care of the freeing later. 183 */ 184 int 185 buf_dealloc_mem(struct buf *bp) 186 { 187 caddr_t data; 188 189 splassert(IPL_BIO); 190 191 data = bp->b_data; 192 bp->b_data = NULL; 193 194 if (data) { 195 if (bp->b_flags & B_BUSY) 196 bcstats.busymapped--; 197 pmap_kremove((vaddr_t)data, bp->b_bufsize); 198 pmap_update(pmap_kernel()); 199 } 200 201 if (bp->b_pobj) 202 buf_free_pages(bp); 203 204 if (data == NULL) 205 return (0); 206 207 bp->b_data = data; 208 if (!(bp->b_flags & B_BUSY)) { /* XXX - need better test */ 209 TAILQ_REMOVE(&buf_valist, bp, b_valist); 210 bcstats.kvaslots_avail--; 211 } else { 212 CLR(bp->b_flags, B_BUSY); 213 if (buf_needva) { 214 buf_needva = 0; 215 wakeup(&buf_needva); 216 } 217 } 218 SET(bp->b_flags, B_RELEASED); 219 TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist); 220 bcstats.kvaslots_avail++; 221 222 return (1); 223 } 224 225 /* 226 * Only used by bread_cluster. 227 */ 228 void 229 buf_fix_mapping(struct buf *bp, vsize_t newsize) 230 { 231 vaddr_t va = (vaddr_t)bp->b_data; 232 233 if (newsize < bp->b_bufsize) { 234 pmap_kremove(va + newsize, bp->b_bufsize - newsize); 235 pmap_update(pmap_kernel()); 236 /* 237 * Note: the size we lost is actually with the other 238 * buffers read in by bread_cluster 239 */ 240 bp->b_bufsize = newsize; 241 } 242 } 243 244 vaddr_t 245 buf_unmap(struct buf *bp) 246 { 247 vaddr_t va; 248 249 KASSERT((bp->b_flags & B_BUSY) == 0); 250 KASSERT(bp->b_data != NULL); 251 splassert(IPL_BIO); 252 253 TAILQ_REMOVE(&buf_valist, bp, b_valist); 254 bcstats.kvaslots_avail--; 255 va = (vaddr_t)bp->b_data; 256 bp->b_data = 0; 257 pmap_kremove(va, bp->b_bufsize); 258 pmap_update(pmap_kernel()); 259 260 if (bp->b_flags & B_RELEASED) 261 pool_put(&bufpool, bp); 262 263 return (va); 264 } 265 266 /* Always allocates in dma-reachable memory */ 267 void 268 buf_alloc_pages(struct buf *bp, vsize_t size) 269 { 270 voff_t offs; 271 int i; 272 273 KASSERT(size == round_page(size)); 274 KASSERT(bp->b_pobj == NULL); 275 KASSERT(bp->b_data == NULL); 276 splassert(IPL_BIO); 277 278 offs = buf_page_offset; 279 buf_page_offset += size; 280 281 KASSERT(buf_page_offset > 0); 282 283 /* 284 * Attempt to allocate with NOWAIT. if we can't, then throw 285 * away some clean pages and try again. Finally, if that 286 * fails, do a WAITOK allocation so the page daemon can find 287 * memory for us. 288 */ 289 do { 290 i = uvm_pagealloc_multi(buf_object, offs, size, 291 UVM_PLA_NOWAIT); 292 if (i == 0) 293 break; 294 } while (bufbackoff(&dma_constraint, 100) == 0); 295 if (i != 0) 296 i = uvm_pagealloc_multi(buf_object, offs, size, 297 UVM_PLA_WAITOK); 298 /* should not happen */ 299 if (i != 0) 300 panic("uvm_pagealloc_multi unable to allocate an buf_object " 301 "of size %lu", size); 302 303 bcstats.numbufpages += atop(size); 304 bcstats.dmapages += atop(size); 305 SET(bp->b_flags, B_DMA); 306 bp->b_pobj = buf_object; 307 bp->b_poffs = offs; 308 bp->b_bufsize = size; 309 } 310 311 void 312 buf_free_pages(struct buf *bp) 313 { 314 struct uvm_object *uobj = bp->b_pobj; 315 struct vm_page *pg; 316 voff_t off, i; 317 318 KASSERT(bp->b_data == NULL); 319 KASSERT(uobj != NULL); 320 splassert(IPL_BIO); 321 322 off = bp->b_poffs; 323 bp->b_pobj = NULL; 324 bp->b_poffs = 0; 325 326 for (i = 0; i < atop(bp->b_bufsize); i++) { 327 pg = uvm_pagelookup(uobj, off + ptoa(i)); 328 KASSERT(pg != NULL); 329 KASSERT(pg->wire_count == 1); 330 pg->wire_count = 0; 331 uvm_pagefree(pg); 332 bcstats.numbufpages--; 333 if (ISSET(bp->b_flags, B_DMA)) 334 bcstats.dmapages--; 335 } 336 CLR(bp->b_flags, B_DMA); 337 } 338 339 /* Reallocate a buf into a particular pmem range specified by "where". */ 340 int 341 buf_realloc_pages(struct buf *bp, struct uvm_constraint_range *where, 342 int flags) 343 { 344 vaddr_t va; 345 int dma; 346 int i, r; 347 KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT)); 348 349 splassert(IPL_BIO); 350 KASSERT(ISSET(bp->b_flags, B_BUSY)); 351 dma = ISSET(bp->b_flags, B_DMA); 352 353 /* if the original buf is mapped, unmap it */ 354 if (bp->b_data != NULL) { 355 va = (vaddr_t)bp->b_data; 356 pmap_kremove(va, bp->b_bufsize); 357 pmap_update(pmap_kernel()); 358 } 359 360 do { 361 r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs, 362 bp->b_bufsize, UVM_PLA_NOWAIT, where); 363 if (r == 0) 364 break; 365 } while ((bufbackoff(where, atop(bp->b_bufsize)) == 0)); 366 367 /* 368 * bufbackoff() failed, so there's no more we can do without 369 * waiting. If allowed do, make that attempt. 370 */ 371 if (r != 0 && (flags & UVM_PLA_WAITOK)) 372 r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs, 373 bp->b_bufsize, flags, where); 374 375 /* 376 * If the allocation has succeeded, we may be somewhere different. 377 * If the allocation has failed, we are in the same place. 378 * 379 * We still have to re-map the buffer before returning. 380 */ 381 382 /* take it out of dma stats until we know where we are */ 383 if (dma) 384 bcstats.dmapages -= atop(bp->b_bufsize); 385 386 dma = 1; 387 /* if the original buf was mapped, re-map it */ 388 for (i = 0; i < atop(bp->b_bufsize); i++) { 389 struct vm_page *pg = uvm_pagelookup(bp->b_pobj, 390 bp->b_poffs + ptoa(i)); 391 KASSERT(pg != NULL); 392 if (!PADDR_IS_DMA_REACHABLE(VM_PAGE_TO_PHYS(pg))) 393 dma = 0; 394 if (bp->b_data != NULL) { 395 pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg), 396 PROT_READ|PROT_WRITE); 397 pmap_update(pmap_kernel()); 398 } 399 } 400 if (dma) { 401 SET(bp->b_flags, B_DMA); 402 bcstats.dmapages += atop(bp->b_bufsize); 403 } else 404 CLR(bp->b_flags, B_DMA); 405 return(r); 406 } 407