1*0b4f309dSmpi /* $OpenBSD: vfs_biomem.c,v 1.52 2024/11/05 17:28:31 mpi Exp $ */ 24bb7a4f6Smpi 330ab7dc4Sbeck /* 430ab7dc4Sbeck * Copyright (c) 2007 Artur Grabowski <art@openbsd.org> 5e91f82a5Sbeck * Copyright (c) 2012-2016,2019 Bob Beck <beck@openbsd.org> 630ab7dc4Sbeck * 730ab7dc4Sbeck * Permission to use, copy, modify, and distribute this software for any 830ab7dc4Sbeck * purpose with or without fee is hereby granted, provided that the above 930ab7dc4Sbeck * copyright notice and this permission notice appear in all copies. 1030ab7dc4Sbeck * 1130ab7dc4Sbeck * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 1230ab7dc4Sbeck * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 1330ab7dc4Sbeck * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 1430ab7dc4Sbeck * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1530ab7dc4Sbeck * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 1630ab7dc4Sbeck * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 1730ab7dc4Sbeck * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 1830ab7dc4Sbeck */ 1930ab7dc4Sbeck 2030ab7dc4Sbeck 2130ab7dc4Sbeck #include <sys/param.h> 2230ab7dc4Sbeck #include <sys/systm.h> 2330ab7dc4Sbeck #include <sys/buf.h> 2430ab7dc4Sbeck #include <sys/pool.h> 2594bba482Sderaadt #include <sys/proc.h> /* XXX for atomic */ 2630ab7dc4Sbeck #include <sys/mount.h> 2730ab7dc4Sbeck 2830ab7dc4Sbeck #include <uvm/uvm_extern.h> 2930ab7dc4Sbeck 3030ab7dc4Sbeck vaddr_t buf_kva_start, buf_kva_end; 3130ab7dc4Sbeck int buf_needva; 3230ab7dc4Sbeck TAILQ_HEAD(,buf) buf_valist; 3330ab7dc4Sbeck 3430ab7dc4Sbeck extern struct bcachestats bcstats; 3530ab7dc4Sbeck 3630ab7dc4Sbeck vaddr_t buf_unmap(struct buf *); 3730ab7dc4Sbeck 3830ab7dc4Sbeck void 3930ab7dc4Sbeck buf_mem_init(vsize_t size) 4030ab7dc4Sbeck { 4130ab7dc4Sbeck TAILQ_INIT(&buf_valist); 4230ab7dc4Sbeck 4330ab7dc4Sbeck buf_kva_start = vm_map_min(kernel_map); 4430ab7dc4Sbeck if (uvm_map(kernel_map, &buf_kva_start, size, NULL, 451e8cdc2eSderaadt UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(PROT_NONE, 4615cd8707Sguenther PROT_NONE, MAP_INHERIT_NONE, MADV_NORMAL, 0))) 471a382f7cSkettenis panic("%s: can't reserve VM for buffers", __func__); 4830ab7dc4Sbeck buf_kva_end = buf_kva_start + size; 4930ab7dc4Sbeck 50419be8d3Sbeck /* Contiguous mapping */ 51419be8d3Sbeck bcstats.kvaslots = bcstats.kvaslots_avail = size / MAXPHYS; 5230ab7dc4Sbeck } 5330ab7dc4Sbeck 5430ab7dc4Sbeck /* 5530ab7dc4Sbeck * buf_acquire and buf_release manage the kvm mappings of buffers. 5630ab7dc4Sbeck */ 5730ab7dc4Sbeck void 5830ab7dc4Sbeck buf_acquire(struct buf *bp) 5930ab7dc4Sbeck { 6030ab7dc4Sbeck KASSERT((bp->b_flags & B_BUSY) == 0); 614836fee5Sbeck splassert(IPL_BIO); 6230ab7dc4Sbeck /* 6330ab7dc4Sbeck * Busy before waiting for kvm. 6430ab7dc4Sbeck */ 6530ab7dc4Sbeck SET(bp->b_flags, B_BUSY); 66478bb1aeSart buf_map(bp); 67478bb1aeSart } 68478bb1aeSart 69478bb1aeSart /* 7013b2d6afSbeck * Acquire a buf but do not map it. Preserve any mapping it did have. 7113b2d6afSbeck */ 7213b2d6afSbeck void 7313b2d6afSbeck buf_acquire_nomap(struct buf *bp) 7413b2d6afSbeck { 7513b2d6afSbeck splassert(IPL_BIO); 7613b2d6afSbeck SET(bp->b_flags, B_BUSY); 7744a0a198Sbeck if (bp->b_data != NULL) { 7813b2d6afSbeck TAILQ_REMOVE(&buf_valist, bp, b_valist); 7913b2d6afSbeck bcstats.kvaslots_avail--; 8013b2d6afSbeck bcstats.busymapped++; 8113b2d6afSbeck } 8213b2d6afSbeck } 8313b2d6afSbeck 84478bb1aeSart void 85478bb1aeSart buf_map(struct buf *bp) 86478bb1aeSart { 87478bb1aeSart vaddr_t va; 88478bb1aeSart 89478bb1aeSart splassert(IPL_BIO); 9030ab7dc4Sbeck 9130ab7dc4Sbeck if (bp->b_data == NULL) { 9230ab7dc4Sbeck unsigned long i; 9330ab7dc4Sbeck 9430ab7dc4Sbeck /* 9530ab7dc4Sbeck * First, just use the pre-allocated space until we run out. 9630ab7dc4Sbeck */ 9730ab7dc4Sbeck if (buf_kva_start < buf_kva_end) { 9830ab7dc4Sbeck va = buf_kva_start; 9930ab7dc4Sbeck buf_kva_start += MAXPHYS; 100419be8d3Sbeck bcstats.kvaslots_avail--; 10130ab7dc4Sbeck } else { 10230ab7dc4Sbeck struct buf *vbp; 10330ab7dc4Sbeck 10430ab7dc4Sbeck /* 10530ab7dc4Sbeck * Find some buffer we can steal the space from. 10630ab7dc4Sbeck */ 1079f7c186bSbeck vbp = TAILQ_FIRST(&buf_valist); 1089f7c186bSbeck while ((curproc != syncerproc && 1099f7c186bSbeck curproc != cleanerproc && 1109f7c186bSbeck bcstats.kvaslots_avail <= RESERVE_SLOTS) || 1119f7c186bSbeck vbp == NULL) { 11230ab7dc4Sbeck buf_needva++; 113668db6efSmpi tsleep_nsec(&buf_needva, PRIBIO, "buf_needva", 114668db6efSmpi INFSLP); 1159f7c186bSbeck vbp = TAILQ_FIRST(&buf_valist); 11630ab7dc4Sbeck } 11730ab7dc4Sbeck va = buf_unmap(vbp); 11830ab7dc4Sbeck } 11930ab7dc4Sbeck 12030ab7dc4Sbeck for (i = 0; i < atop(bp->b_bufsize); i++) { 12130ab7dc4Sbeck struct vm_page *pg = uvm_pagelookup(bp->b_pobj, 12230ab7dc4Sbeck bp->b_poffs + ptoa(i)); 12330ab7dc4Sbeck 12430ab7dc4Sbeck KASSERT(pg != NULL); 12530ab7dc4Sbeck 12630ab7dc4Sbeck pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg), 1271e8cdc2eSderaadt PROT_READ | PROT_WRITE); 12830ab7dc4Sbeck } 1290763dae8Smiod pmap_update(pmap_kernel()); 13030ab7dc4Sbeck bp->b_data = (caddr_t)va; 13130ab7dc4Sbeck } else { 13230ab7dc4Sbeck TAILQ_REMOVE(&buf_valist, bp, b_valist); 133419be8d3Sbeck bcstats.kvaslots_avail--; 13430ab7dc4Sbeck } 13530ab7dc4Sbeck 136d0191fa0Sart bcstats.busymapped++; 13730ab7dc4Sbeck } 13830ab7dc4Sbeck 13930ab7dc4Sbeck void 14030ab7dc4Sbeck buf_release(struct buf *bp) 14130ab7dc4Sbeck { 14230ab7dc4Sbeck 14330ab7dc4Sbeck KASSERT(bp->b_flags & B_BUSY); 1444836fee5Sbeck splassert(IPL_BIO); 14530ab7dc4Sbeck 14630ab7dc4Sbeck if (bp->b_data) { 147d0191fa0Sart bcstats.busymapped--; 14830ab7dc4Sbeck TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist); 149419be8d3Sbeck bcstats.kvaslots_avail++; 15030ab7dc4Sbeck if (buf_needva) { 1519f7c186bSbeck buf_needva=0; 1529f7c186bSbeck wakeup(&buf_needva); 15330ab7dc4Sbeck } 15430ab7dc4Sbeck } 15544a0a198Sbeck CLR(bp->b_flags, B_BUSY); 15630ab7dc4Sbeck } 15730ab7dc4Sbeck 15830ab7dc4Sbeck /* 15930ab7dc4Sbeck * Deallocate all memory resources for this buffer. We need to be careful 16030ab7dc4Sbeck * to not drop kvm since we have no way to reclaim it. So, if the buffer 16130ab7dc4Sbeck * has kvm, we need to free it later. We put it on the front of the 16230ab7dc4Sbeck * freelist just so it gets picked up faster. 16330ab7dc4Sbeck * 16430ab7dc4Sbeck * Also, lots of assertions count on bp->b_data being NULL, so we 16530ab7dc4Sbeck * set it temporarily to NULL. 16630ab7dc4Sbeck * 16730ab7dc4Sbeck * Return non-zero if we take care of the freeing later. 16830ab7dc4Sbeck */ 16930ab7dc4Sbeck int 17030ab7dc4Sbeck buf_dealloc_mem(struct buf *bp) 17130ab7dc4Sbeck { 172780a7c37Skettenis caddr_t data; 17330ab7dc4Sbeck 1744836fee5Sbeck splassert(IPL_BIO); 17530ab7dc4Sbeck 176780a7c37Skettenis data = bp->b_data; 17730ab7dc4Sbeck bp->b_data = NULL; 17830ab7dc4Sbeck 17930ab7dc4Sbeck if (data) { 18041f543c5Sart if (bp->b_flags & B_BUSY) 181d0191fa0Sart bcstats.busymapped--; 18230ab7dc4Sbeck pmap_kremove((vaddr_t)data, bp->b_bufsize); 18330ab7dc4Sbeck pmap_update(pmap_kernel()); 18430ab7dc4Sbeck } 185780a7c37Skettenis 186780a7c37Skettenis if (bp->b_pobj) 18730ab7dc4Sbeck buf_free_pages(bp); 18830ab7dc4Sbeck 1894836fee5Sbeck if (data == NULL) 19030ab7dc4Sbeck return (0); 19130ab7dc4Sbeck 19230ab7dc4Sbeck bp->b_data = data; 193419be8d3Sbeck if (!(bp->b_flags & B_BUSY)) { /* XXX - need better test */ 19430ab7dc4Sbeck TAILQ_REMOVE(&buf_valist, bp, b_valist); 195419be8d3Sbeck bcstats.kvaslots_avail--; 1969f7c186bSbeck } else { 19730ab7dc4Sbeck CLR(bp->b_flags, B_BUSY); 1989f7c186bSbeck if (buf_needva) { 1999f7c186bSbeck buf_needva = 0; 2009f7c186bSbeck wakeup(&buf_needva); 2019f7c186bSbeck } 2029f7c186bSbeck } 20330ab7dc4Sbeck SET(bp->b_flags, B_RELEASED); 20430ab7dc4Sbeck TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist); 205419be8d3Sbeck bcstats.kvaslots_avail++; 20630ab7dc4Sbeck 20730ab7dc4Sbeck return (1); 20830ab7dc4Sbeck } 20930ab7dc4Sbeck 21037e8a80aSbeck /* 21137e8a80aSbeck * Only used by bread_cluster. 21237e8a80aSbeck */ 213478bb1aeSart void 21437e8a80aSbeck buf_fix_mapping(struct buf *bp, vsize_t newsize) 215478bb1aeSart { 216478bb1aeSart vaddr_t va = (vaddr_t)bp->b_data; 217478bb1aeSart 218478bb1aeSart if (newsize < bp->b_bufsize) { 219478bb1aeSart pmap_kremove(va + newsize, bp->b_bufsize - newsize); 220478bb1aeSart pmap_update(pmap_kernel()); 22137e8a80aSbeck /* 22237e8a80aSbeck * Note: the size we lost is actually with the other 22337e8a80aSbeck * buffers read in by bread_cluster 22437e8a80aSbeck */ 225478bb1aeSart bp->b_bufsize = newsize; 226478bb1aeSart } 227478bb1aeSart } 228478bb1aeSart 22930ab7dc4Sbeck vaddr_t 23030ab7dc4Sbeck buf_unmap(struct buf *bp) 23130ab7dc4Sbeck { 23230ab7dc4Sbeck vaddr_t va; 23330ab7dc4Sbeck 23430ab7dc4Sbeck KASSERT((bp->b_flags & B_BUSY) == 0); 23530ab7dc4Sbeck KASSERT(bp->b_data != NULL); 2364836fee5Sbeck splassert(IPL_BIO); 23730ab7dc4Sbeck 23830ab7dc4Sbeck TAILQ_REMOVE(&buf_valist, bp, b_valist); 239419be8d3Sbeck bcstats.kvaslots_avail--; 24030ab7dc4Sbeck va = (vaddr_t)bp->b_data; 241da571dddSjsg bp->b_data = NULL; 24230ab7dc4Sbeck pmap_kremove(va, bp->b_bufsize); 24330ab7dc4Sbeck pmap_update(pmap_kernel()); 24430ab7dc4Sbeck 24530ab7dc4Sbeck if (bp->b_flags & B_RELEASED) 24630ab7dc4Sbeck pool_put(&bufpool, bp); 24730ab7dc4Sbeck 24830ab7dc4Sbeck return (va); 24930ab7dc4Sbeck } 25030ab7dc4Sbeck 251e97e67d8Sbeck /* Always allocates in dma-reachable memory */ 25230ab7dc4Sbeck void 25330ab7dc4Sbeck buf_alloc_pages(struct buf *bp, vsize_t size) 25430ab7dc4Sbeck { 255821c2aaeSbeck int i; 25630ab7dc4Sbeck 25730ab7dc4Sbeck KASSERT(size == round_page(size)); 25830ab7dc4Sbeck KASSERT(bp->b_pobj == NULL); 25930ab7dc4Sbeck KASSERT(bp->b_data == NULL); 2604836fee5Sbeck splassert(IPL_BIO); 26130ab7dc4Sbeck 26294151407Smpi uvm_obj_init(&bp->b_uobj, &bufcache_pager, 1); 26330ab7dc4Sbeck 264821c2aaeSbeck /* 265821c2aaeSbeck * Attempt to allocate with NOWAIT. if we can't, then throw 266821c2aaeSbeck * away some clean pages and try again. Finally, if that 267821c2aaeSbeck * fails, do a WAITOK allocation so the page daemon can find 268821c2aaeSbeck * memory for us. 269821c2aaeSbeck */ 270821c2aaeSbeck do { 271cdcc14baSbeck i = uvm_pagealloc_multi(&bp->b_uobj, 0, size, 272d4c6c9b5Sbeck UVM_PLA_NOWAIT | UVM_PLA_NOWAKE); 273821c2aaeSbeck if (i == 0) 274821c2aaeSbeck break; 275*0b4f309dSmpi } while (bufbackoff(&dma_constraint, size) >= size); 276821c2aaeSbeck if (i != 0) 277cdcc14baSbeck i = uvm_pagealloc_multi(&bp->b_uobj, 0, size, 278821c2aaeSbeck UVM_PLA_WAITOK); 279821c2aaeSbeck /* should not happen */ 280821c2aaeSbeck if (i != 0) 281d5ad5a19Smlarkin panic("uvm_pagealloc_multi unable to allocate an buf_object " 282d5ad5a19Smlarkin "of size %lu", size); 283d5ad5a19Smlarkin 284e97e67d8Sbeck bcstats.numbufpages += atop(size); 285821c2aaeSbeck bcstats.dmapages += atop(size); 286821c2aaeSbeck SET(bp->b_flags, B_DMA); 287cdcc14baSbeck bp->b_pobj = &bp->b_uobj; 288cdcc14baSbeck bp->b_poffs = 0; 28930ab7dc4Sbeck bp->b_bufsize = size; 29030ab7dc4Sbeck } 29130ab7dc4Sbeck 29230ab7dc4Sbeck void 29330ab7dc4Sbeck buf_free_pages(struct buf *bp) 29430ab7dc4Sbeck { 29530ab7dc4Sbeck struct uvm_object *uobj = bp->b_pobj; 29630ab7dc4Sbeck struct vm_page *pg; 29730ab7dc4Sbeck voff_t off, i; 29830ab7dc4Sbeck 29930ab7dc4Sbeck KASSERT(bp->b_data == NULL); 30030ab7dc4Sbeck KASSERT(uobj != NULL); 3014836fee5Sbeck splassert(IPL_BIO); 30230ab7dc4Sbeck 30330ab7dc4Sbeck off = bp->b_poffs; 30430ab7dc4Sbeck bp->b_pobj = NULL; 30530ab7dc4Sbeck bp->b_poffs = 0; 30630ab7dc4Sbeck 3071314c0daSpedro for (i = 0; i < atop(bp->b_bufsize); i++) { 3081314c0daSpedro pg = uvm_pagelookup(uobj, off + ptoa(i)); 30930ab7dc4Sbeck KASSERT(pg != NULL); 31030ab7dc4Sbeck KASSERT(pg->wire_count == 1); 31130ab7dc4Sbeck pg->wire_count = 0; 31230ab7dc4Sbeck bcstats.numbufpages--; 313821c2aaeSbeck if (ISSET(bp->b_flags, B_DMA)) 314821c2aaeSbeck bcstats.dmapages--; 31530ab7dc4Sbeck } 316821c2aaeSbeck CLR(bp->b_flags, B_DMA); 317e91f82a5Sbeck 318e91f82a5Sbeck /* XXX refactor to do this without splbio later */ 319da3d0110Smpi uvm_obj_free(uobj); 32030ab7dc4Sbeck } 32130ab7dc4Sbeck 322821c2aaeSbeck /* Reallocate a buf into a particular pmem range specified by "where". */ 323821c2aaeSbeck int 324821c2aaeSbeck buf_realloc_pages(struct buf *bp, struct uvm_constraint_range *where, 325821c2aaeSbeck int flags) 326821c2aaeSbeck { 327*0b4f309dSmpi vsize_t size; 328821c2aaeSbeck vaddr_t va; 329821c2aaeSbeck int dma; 330821c2aaeSbeck int i, r; 331821c2aaeSbeck KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT)); 332821c2aaeSbeck 333821c2aaeSbeck splassert(IPL_BIO); 334821c2aaeSbeck KASSERT(ISSET(bp->b_flags, B_BUSY)); 335821c2aaeSbeck dma = ISSET(bp->b_flags, B_DMA); 336821c2aaeSbeck 337821c2aaeSbeck /* if the original buf is mapped, unmap it */ 338821c2aaeSbeck if (bp->b_data != NULL) { 339821c2aaeSbeck va = (vaddr_t)bp->b_data; 340821c2aaeSbeck pmap_kremove(va, bp->b_bufsize); 341821c2aaeSbeck pmap_update(pmap_kernel()); 342821c2aaeSbeck } 343821c2aaeSbeck 344821c2aaeSbeck do { 345821c2aaeSbeck r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs, 346d4c6c9b5Sbeck bp->b_bufsize, UVM_PLA_NOWAIT | UVM_PLA_NOWAKE, where); 347821c2aaeSbeck if (r == 0) 348821c2aaeSbeck break; 349*0b4f309dSmpi size = atop(bp->b_bufsize); 350*0b4f309dSmpi } while ((bufbackoff(where, size) >= size)); 351109c9b24Sguenther 352109c9b24Sguenther /* 353109c9b24Sguenther * bufbackoff() failed, so there's no more we can do without 354109c9b24Sguenther * waiting. If allowed do, make that attempt. 355109c9b24Sguenther */ 356109c9b24Sguenther if (r != 0 && (flags & UVM_PLA_WAITOK)) 357821c2aaeSbeck r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs, 358821c2aaeSbeck bp->b_bufsize, flags, where); 359821c2aaeSbeck 36030ab7dc4Sbeck /* 361821c2aaeSbeck * If the allocation has succeeded, we may be somewhere different. 362821c2aaeSbeck * If the allocation has failed, we are in the same place. 363821c2aaeSbeck * 364821c2aaeSbeck * We still have to re-map the buffer before returning. 36530ab7dc4Sbeck */ 366821c2aaeSbeck 367821c2aaeSbeck /* take it out of dma stats until we know where we are */ 368821c2aaeSbeck if (dma) 369821c2aaeSbeck bcstats.dmapages -= atop(bp->b_bufsize); 370821c2aaeSbeck 371821c2aaeSbeck dma = 1; 372821c2aaeSbeck /* if the original buf was mapped, re-map it */ 373821c2aaeSbeck for (i = 0; i < atop(bp->b_bufsize); i++) { 374821c2aaeSbeck struct vm_page *pg = uvm_pagelookup(bp->b_pobj, 375821c2aaeSbeck bp->b_poffs + ptoa(i)); 376821c2aaeSbeck KASSERT(pg != NULL); 377821c2aaeSbeck if (!PADDR_IS_DMA_REACHABLE(VM_PAGE_TO_PHYS(pg))) 378821c2aaeSbeck dma = 0; 379821c2aaeSbeck if (bp->b_data != NULL) { 380821c2aaeSbeck pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg), 381821c2aaeSbeck PROT_READ|PROT_WRITE); 382821c2aaeSbeck pmap_update(pmap_kernel()); 383821c2aaeSbeck } 384821c2aaeSbeck } 385821c2aaeSbeck if (dma) { 386821c2aaeSbeck SET(bp->b_flags, B_DMA); 387821c2aaeSbeck bcstats.dmapages += atop(bp->b_bufsize); 388821c2aaeSbeck } else 389821c2aaeSbeck CLR(bp->b_flags, B_DMA); 390821c2aaeSbeck return(r); 391821c2aaeSbeck } 392