Lines Matching +full:local +full:- +full:bd +full:- +full:address

1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2004 Poul-Henning Kamp
114 #define BQ_LOCKPTR(bq) (&(bq)->bq_lock)
144 #define BD_LOCKPTR(bd) (&(bd)->bd_cleanq->bq_lock)
145 #define BD_LOCK(bd) mtx_lock(BD_LOCKPTR((bd)))
146 #define BD_UNLOCK(bd) mtx_unlock(BD_LOCKPTR((bd)))
147 #define BD_ASSERT_LOCKED(bd) mtx_assert(BD_LOCKPTR((bd)), MA_OWNED)
148 #define BD_RUN_LOCKPTR(bd) (&(bd)->bd_run_lock)
149 #define BD_RUN_LOCK(bd) mtx_lock(BD_RUN_LOCKPTR((bd)))
150 #define BD_RUN_UNLOCK(bd) mtx_unlock(BD_RUN_LOCKPTR((bd)))
151 #define BD_DOMAIN(bd) (bd - bdomain)
163 caddr_t poisoned_buf = (void *)-1;
200 static void bd_init(struct bufdomain *bd);
201 static int bd_flushall(struct bufdomain *bd);
252 "Minimum preferred space used for in-progress I/O");
256 "Maximum amount of space to use for in-progress I/O");
364 * defragment the address space where a simple count of the number of dirty
387 #define QUEUE_CLEAN 3 /* non-B_DELWRI buffers */
404 * per-cpu empty buffer cache.
416 if (error != 0 || req->newptr == NULL)
445 if (error != 0 || req->newptr == NULL)
464 if (error != 0 || req->newptr == NULL)
486 if (sizeof(int) == sizeof(long) || req->oldlen >= sizeof(long))
543 bd_clear(struct bufdomain *bd)
547 if (bd->bd_numdirtybuffers <= bd->bd_lodirtybuffers)
548 BIT_CLR(BUF_DOMAINS, BD_DOMAIN(bd), &bdlodirty);
549 if (bd->bd_numdirtybuffers <= bd->bd_hidirtybuffers)
550 BIT_CLR(BUF_DOMAINS, BD_DOMAIN(bd), &bdhidirty);
561 bd_set(struct bufdomain *bd)
565 if (bd->bd_numdirtybuffers > bd->bd_lodirtybuffers)
566 BIT_SET(BUF_DOMAINS, BD_DOMAIN(bd), &bdlodirty);
567 if (bd->bd_numdirtybuffers > bd->bd_hidirtybuffers)
568 BIT_SET(BUF_DOMAINS, BD_DOMAIN(bd), &bdhidirty);
581 struct bufdomain *bd;
584 bd = bufdomain(bp);
585 num = atomic_fetchadd_int(&bd->bd_numdirtybuffers, -1);
586 if (num == (bd->bd_lodirtybuffers + bd->bd_hidirtybuffers) / 2)
588 if (num == bd->bd_lodirtybuffers || num == bd->bd_hidirtybuffers)
589 bd_clear(bd);
601 struct bufdomain *bd;
608 bd = bufdomain(bp);
609 num = atomic_fetchadd_int(&bd->bd_numdirtybuffers, 1);
610 if (num == (bd->bd_lodirtybuffers + bd->bd_hidirtybuffers) / 2)
612 if (num == bd->bd_lodirtybuffers || num == bd->bd_hidirtybuffers)
613 bd_set(bd);
622 bufspace_daemon_wakeup(struct bufdomain *bd)
628 if (atomic_fetchadd_int(&bd->bd_running, 1) == 0) {
629 BD_RUN_LOCK(bd);
630 atomic_store_int(&bd->bd_running, 1);
631 wakeup(&bd->bd_running);
632 BD_RUN_UNLOCK(bd);
645 struct bufdomain *bd;
649 KASSERT((bp->b_flags & B_MALLOC) == 0,
651 bd = bufdomain(bp);
652 diff = bufsize - bp->b_bufsize;
654 atomic_subtract_long(&bd->bd_bufspace, -diff);
656 space = atomic_fetchadd_long(&bd->bd_bufspace, diff);
658 if (space < bd->bd_bufspacethresh &&
659 space + diff >= bd->bd_bufspacethresh)
660 bufspace_daemon_wakeup(bd);
662 bp->b_bufsize = bufsize;
672 bufspace_reserve(struct bufdomain *bd, int size, bool metadata)
678 limit = bd->bd_maxbufspace;
680 limit = bd->bd_hibufspace;
681 space = atomic_fetchadd_long(&bd->bd_bufspace, size);
684 atomic_subtract_long(&bd->bd_bufspace, size);
689 if (space < bd->bd_bufspacethresh && new >= bd->bd_bufspacethresh)
690 bufspace_daemon_wakeup(bd);
701 bufspace_release(struct bufdomain *bd, int size)
704 atomic_subtract_long(&bd->bd_bufspace, size);
712 * operation must be re-tried on return.
715 bufspace_wait(struct bufdomain *bd, struct vnode *vp, int gbflags,
725 BD_LOCK(bd);
726 while (bd->bd_wanted) {
727 if (vp != NULL && vp->v_type != VCHR &&
728 (td->td_pflags & TDP_BUFNEED) == 0) {
729 BD_UNLOCK(bd);
739 (td->td_pflags & TDP_NORUNNINGBUF);
749 td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF;
750 fl = buf_flush(vp, bd, flushbufqtarget);
751 td->td_pflags &= norunbuf;
752 BD_LOCK(bd);
755 if (bd->bd_wanted == 0)
758 error = msleep(&bd->bd_wanted, BD_LOCKPTR(bd),
763 BD_UNLOCK(bd);
769 struct bufdomain *bd = arg;
775 BD_RUN_LOCK(bd);
776 bd->bd_shutdown = true;
777 wakeup(&bd->bd_running);
778 error = msleep(&bd->bd_shutdown, BD_RUN_LOCKPTR(bd), 0,
780 BD_RUN_UNLOCK(bd);
795 struct bufdomain *bd = arg;
797 EVENTHANDLER_REGISTER(shutdown_pre_sync, bufspace_daemon_shutdown, bd,
800 BD_RUN_LOCK(bd);
801 while (!bd->bd_shutdown) {
802 BD_RUN_UNLOCK(bd);
815 * 1) The daemon wakes up voluntarily once per-second
829 while (bd->bd_bufspace > bd->bd_lobufspace ||
830 bd->bd_freebuffers < bd->bd_hifreebuffers) {
831 if (buf_recycle(bd, false) != 0) {
832 if (bd_flushall(bd))
843 BD_LOCK(bd);
844 if (bd->bd_wanted) {
845 msleep(&bd->bd_wanted, BD_LOCKPTR(bd),
848 BD_UNLOCK(bd);
854 * Re-check our limits and sleep. bd_running must be
859 BD_RUN_LOCK(bd);
860 if (bd->bd_shutdown)
862 atomic_store_int(&bd->bd_running, 0);
863 if (bd->bd_bufspace < bd->bd_bufspacethresh &&
864 bd->bd_freebuffers > bd->bd_lofreebuffers) {
865 msleep(&bd->bd_running, BD_RUN_LOCKPTR(bd),
866 PRIBIO, "-", hz);
869 atomic_store_int(&bd->bd_running, 1);
872 wakeup(&bd->bd_shutdown);
873 BD_RUN_UNLOCK(bd);
888 KASSERT((bp->b_flags & B_MALLOC) != 0,
889 ("bufmallocadjust: non-malloc buf %p", bp));
890 diff = bufsize - bp->b_bufsize;
892 atomic_subtract_long(&bufmallocspace, -diff);
895 bp->b_bufsize = bufsize;
926 bspace = bp->b_runningbufspace;
929 space = atomic_fetchadd_long(&runningbufspace, -bspace);
932 bp->b_runningbufspace = 0;
939 if (space - bspace > lorunningspace)
950 bp->b_runningbufspace = space;
958 * running. This routine is used in async-write situations to
995 if (bp->b_flags & B_CACHE) {
998 bp->b_flags &= ~B_CACHE;
1039 * bd_speedup - speedup the buffer cache flushing code
1104 * buffers to cover 1/10 of our ram over 64MB. When auto-sizing
1115 nbuf += min((physmem_est - 4096) / factor,
1118 nbuf += min((physmem_est - 65536) * 2 / (factor * 5),
1145 * The fall-back to the maxbuf in case of maxbcache unset,
1153 (TRANSIENT_DENOM - 1)) {
1159 biotmap_sz = maxbuf_sz - buf_sz;
1167 buf_sz -= biotmap_sz;
1174 * Artificially limit to 1024 simultaneous in-flight I/Os
1221 bq_init(&bqempty, QUEUE_EMPTY, -1, "bufq empty lock");
1235 bp->b_flags = B_INVAL;
1236 bp->b_rcred = NOCRED;
1237 bp->b_wcred = NOCRED;
1238 bp->b_qindex = QUEUE_NONE;
1239 bp->b_domain = -1;
1240 bp->b_subqueue = mp_maxid + 1;
1241 bp->b_xflags = 0;
1242 bp->b_data = bp->b_kvabase = unmapped_buf;
1243 LIST_INIT(&bp->b_dep);
1261 hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - maxbcachebuf * 10);
1263 bufspacethresh = lobufspace + (hibufspace - lobufspace) / 2;
1289 * of delayed-write dirty buffers we allow to stack up.
1294 * To support extreme low-memory systems, make sure hidirtybuffers
1306 * buf headers under heavy utilization. The bufs in per-cpu caches
1311 * should be set appropriately to limit work per-iteration.
1325 * One queue per-256mb up to the max. More queues gives better
1330 struct bufdomain *bd;
1332 bd = &bdomain[i];
1333 bd_init(bd);
1334 bd->bd_freebuffers = nbuf / buf_domains;
1335 bd->bd_hifreebuffers = hifreebuffers / buf_domains;
1336 bd->bd_lofreebuffers = lofreebuffers / buf_domains;
1337 bd->bd_bufspace = 0;
1338 bd->bd_maxbufspace = maxbufspace / buf_domains;
1339 bd->bd_hibufspace = hibufspace / buf_domains;
1340 bd->bd_lobufspace = lobufspace / buf_domains;
1341 bd->bd_bufspacethresh = bufspacethresh / buf_domains;
1342 bd->bd_numdirtybuffers = 0;
1343 bd->bd_hidirtybuffers = hidirtybuffers / buf_domains;
1344 bd->bd_lodirtybuffers = lodirtybuffers / buf_domains;
1345 bd->bd_dirtybufthresh = dirtybufthresh / buf_domains;
1346 /* Don't allow more than 2% of bufs in the per-cpu caches. */
1347 bd->bd_lim = nbuf / buf_domains / 50 / mp_ncpus;
1365 KASSERT(bp->b_kvabase != unmapped_buf,
1367 KASSERT(bp->b_data != unmapped_buf,
1369 KASSERT(bp->b_data < unmapped_buf || bp->b_data >= unmapped_buf +
1377 KASSERT(bp->b_data == unmapped_buf,
1391 if (((bp->b_flags & B_INVAL) == 0 && BUF_ISLOCKED(bp)) ||
1392 ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI))
1423 for (i = nbuf - 1; i >= 0; i--) {
1463 * Count only busy local buffers to prevent forcing
1467 for (i = nbuf - 1; i >= 0; i--) {
1472 if (bp->b_dev == NULL) {
1474 bp->b_vp->v_mount, mnt_list);
1482 nbusy, bp, bp->b_vp, bp->b_flags,
1483 (intmax_t)bp->b_blkno,
1484 (intmax_t)bp->b_lblkno);
1487 vn_printf(bp->b_vp,
1514 * Swapoff before unmount, because file-backed swap is
1515 * non-operational after unmount of the underlying
1534 * bp->b_data is relative to bp->b_offset, but
1535 * bp->b_offset may be offset into the first page.
1537 bp->b_data = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
1538 pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages);
1539 bp->b_data = (caddr_t)((vm_offset_t)bp->b_data |
1540 (vm_offset_t)(bp->b_offset & PAGE_MASK));
1547 return (&bdomain[bp->b_domain]);
1554 switch (bp->b_qindex) {
1562 return (&bufdomain(bp)->bd_dirtyq);
1564 return (&bufdomain(bp)->bd_subq[bp->b_subqueue]);
1568 panic("bufqueue(%p): Unhandled type %d\n", bp, bp->b_qindex);
1580 * bp can be pushed from a per-cpu queue to the
1606 struct bufdomain *bd;
1616 if (bp->b_flags & B_REMFREE) {
1617 if (bp->b_qindex == qindex) {
1618 bp->b_flags |= B_REUSE;
1619 bp->b_flags &= ~B_REMFREE;
1627 bd = bufdomain(bp);
1629 if (bd->bd_lim != 0)
1630 bq = &bd->bd_subq[PCPU_GET(cpuid)];
1632 bq = bd->bd_cleanq;
1634 bq = &bd->bd_dirtyq;
1647 if (bp->b_flags & B_REMFREE)
1649 if (bp->b_vflags & BV_BKGRDINPROG)
1651 if (bp->b_rcred != NOCRED) {
1652 crfree(bp->b_rcred);
1653 bp->b_rcred = NOCRED;
1655 if (bp->b_wcred != NOCRED) {
1656 crfree(bp->b_wcred);
1657 bp->b_wcred = NOCRED;
1659 if (!LIST_EMPTY(&bp->b_dep))
1662 atomic_add_int(&bufdomain(bp)->bd_freebuffers, 1);
1663 MPASS((bp->b_flags & B_MAXPHYS) == 0);
1674 * only as a per-cpu cache of bufs still maintained on a global list.
1712 TAILQ_INSERT_TAIL(&bq->bq_queue, bp, b_freelist);
1713 bp->b_flags &= ~(B_AGE | B_REUSE);
1714 bq->bq_len++;
1715 bp->b_qindex = bq->bq_index;
1726 buf_alloc(struct bufdomain *bd)
1737 freebufs = atomic_fetchadd_int(&bd->bd_freebuffers, -1);
1741 atomic_add_int(&bd->bd_freebuffers, 1);
1742 bufspace_daemon_wakeup(bd);
1747 * Wake-up the bufspace daemon on transition below threshold.
1749 if (freebufs == bd->bd_lofreebuffers)
1750 bufspace_daemon_wakeup(bd);
1757 KASSERT(bp->b_vp == NULL,
1758 ("bp: %p still has vnode %p.", bp, bp->b_vp));
1759 KASSERT((bp->b_flags & (B_DELWRI | B_NOREUSE)) == 0,
1760 ("invalid buffer %p flags %#x", bp, bp->b_flags));
1761 KASSERT((bp->b_xflags & (BX_VNCLEAN|BX_VNDIRTY)) == 0,
1762 ("bp: %p still on a buffer list. xflags %X", bp, bp->b_xflags));
1763 KASSERT(bp->b_npages == 0,
1764 ("bp: %p still has %d vm pages\n", bp, bp->b_npages));
1765 KASSERT(bp->b_kvasize == 0, ("bp: %p still has kva\n", bp));
1766 KASSERT(bp->b_bufsize == 0, ("bp: %p still has bufspace\n", bp));
1767 MPASS((bp->b_flags & B_MAXPHYS) == 0);
1769 bp->b_domain = BD_DOMAIN(bd);
1770 bp->b_flags = 0;
1771 bp->b_ioflags = 0;
1772 bp->b_xflags = 0;
1773 bp->b_vflags = 0;
1774 bp->b_vp = NULL;
1775 bp->b_blkno = bp->b_lblkno = 0;
1776 bp->b_offset = NOOFFSET;
1777 bp->b_iodone = 0;
1778 bp->b_error = 0;
1779 bp->b_resid = 0;
1780 bp->b_bcount = 0;
1781 bp->b_npages = 0;
1782 bp->b_dirtyoff = bp->b_dirtyend = 0;
1783 bp->b_bufobj = NULL;
1784 bp->b_data = bp->b_kvabase = unmapped_buf;
1785 bp->b_fsprivate1 = NULL;
1786 bp->b_fsprivate2 = NULL;
1787 bp->b_fsprivate3 = NULL;
1788 LIST_INIT(&bp->b_dep);
1801 buf_recycle(struct bufdomain *bd, bool kva)
1809 bq = bd->bd_cleanq;
1811 KASSERT(BQ_LOCKPTR(bq) == BD_LOCKPTR(bd),
1813 nbp = TAILQ_FIRST(&bq->bq_queue);
1830 if (kva && bp->b_kvasize == 0)
1840 if ((bp->b_flags & B_REUSE) != 0) {
1841 TAILQ_REMOVE(&bq->bq_queue, bp, b_freelist);
1842 TAILQ_INSERT_TAIL(&bq->bq_queue, bp, b_freelist);
1843 bp->b_flags &= ~B_REUSE;
1851 if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
1856 KASSERT(bp->b_qindex == QUEUE_CLEAN,
1858 bp->b_qindex, bp));
1859 KASSERT(bp->b_domain == BD_DOMAIN(bd),
1861 bp->b_domain, (int)BD_DOMAIN(bd)));
1873 if ((bp->b_vflags & BV_BKGRDERR) != 0) {
1876 nbp = TAILQ_FIRST(&bq->bq_queue);
1879 bp->b_flags |= B_INVAL;
1883 bd->bd_wanted = 1;
1899 CTR3(KTR_BUF, "bremfree(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1900 KASSERT((bp->b_flags & B_REMFREE) == 0,
1902 KASSERT(bp->b_qindex != QUEUE_NONE,
1906 bp->b_flags |= B_REMFREE;
1929 mtx_init(&bq->bq_lock, lockname, NULL, MTX_DEF);
1930 TAILQ_INIT(&bq->bq_queue);
1931 bq->bq_len = 0;
1932 bq->bq_index = qindex;
1933 bq->bq_subqueue = subqueue;
1937 bd_init(struct bufdomain *bd)
1941 /* Per-CPU clean buf queues, plus one global queue. */
1942 bd->bd_subq = mallocarray(mp_maxid + 2, sizeof(struct bufqueue),
1944 bd->bd_cleanq = &bd->bd_subq[mp_maxid + 1];
1945 bq_init(bd->bd_cleanq, QUEUE_CLEAN, mp_maxid + 1, "bufq clean lock");
1946 bq_init(&bd->bd_dirtyq, QUEUE_DIRTY, -1, "bufq dirty lock");
1948 bq_init(&bd->bd_subq[i], QUEUE_CLEAN, i,
1950 mtx_init(&bd->bd_run_lock, "bufspace daemon run lock", NULL, MTX_DEF);
1964 bp, bp->b_vp, bp->b_flags);
1965 KASSERT(bp->b_qindex != QUEUE_NONE,
1971 if (bp->b_qindex != QUEUE_EMPTY) {
1974 KASSERT(bq->bq_len >= 1,
1975 ("queue %d underflow", bp->b_qindex));
1976 TAILQ_REMOVE(&bq->bq_queue, bp, b_freelist);
1977 bq->bq_len--;
1978 bp->b_qindex = QUEUE_NONE;
1979 bp->b_flags &= ~(B_REMFREE | B_REUSE);
1983 bd_flush(struct bufdomain *bd, struct bufqueue *bq)
1988 if (bq != bd->bd_cleanq) {
1989 BD_LOCK(bd);
1990 while ((bp = TAILQ_FIRST(&bq->bq_queue)) != NULL) {
1991 TAILQ_REMOVE(&bq->bq_queue, bp, b_freelist);
1992 TAILQ_INSERT_TAIL(&bd->bd_cleanq->bq_queue, bp,
1994 bp->b_subqueue = bd->bd_cleanq->bq_subqueue;
1996 bd->bd_cleanq->bq_len += bq->bq_len;
1997 bq->bq_len = 0;
1999 if (bd->bd_wanted) {
2000 bd->bd_wanted = 0;
2001 wakeup(&bd->bd_wanted);
2003 if (bq != bd->bd_cleanq)
2004 BD_UNLOCK(bd);
2008 bd_flushall(struct bufdomain *bd)
2014 if (bd->bd_lim == 0)
2018 bq = &bd->bd_subq[i];
2019 if (bq->bq_len == 0)
2022 bd_flush(bd, bq);
2033 struct bufdomain *bd;
2035 if (bp->b_qindex != QUEUE_NONE)
2038 bd = bufdomain(bp);
2039 if (bp->b_flags & B_AGE) {
2041 if (bq->bq_index == QUEUE_CLEAN)
2042 bq = bd->bd_cleanq;
2044 TAILQ_INSERT_HEAD(&bq->bq_queue, bp, b_freelist);
2047 TAILQ_INSERT_TAIL(&bq->bq_queue, bp, b_freelist);
2049 bp->b_flags &= ~(B_AGE | B_REUSE);
2050 bq->bq_len++;
2051 bp->b_qindex = bq->bq_index;
2052 bp->b_subqueue = bq->bq_subqueue;
2061 if (bp->b_qindex == QUEUE_CLEAN) {
2063 * Flush the per-cpu queue and notify any waiters.
2065 if (bd->bd_wanted || (bq != bd->bd_cleanq &&
2066 bq->bq_len >= bd->bd_lim))
2067 bd_flush(bd, bq);
2083 if (bp->b_kvasize == 0) {
2084 KASSERT(bp->b_kvabase == unmapped_buf &&
2085 bp->b_data == unmapped_buf,
2092 if (bp->b_kvasize == 0)
2095 vmem_free(buffer_arena, (vm_offset_t)bp->b_kvabase, bp->b_kvasize);
2096 counter_u64_add(bufkvaspace, -bp->b_kvasize);
2098 bp->b_data = bp->b_kvabase = unmapped_buf;
2099 bp->b_kvasize = 0;
2115 MPASS((bp->b_flags & B_MAXPHYS) == 0);
2130 bp->b_kvabase = (caddr_t)addr;
2131 bp->b_kvasize = maxsize;
2132 counter_u64_add(bufkvaspace, bp->b_kvasize);
2134 bp->b_data = unmapped_buf;
2137 bp->b_data = bp->b_kvabase;
2168 * Attempt to initiate asynchronous I/O on read-ahead blocks. We must
2186 if ((rabp->b_flags & B_CACHE) != 0) {
2197 td->td_ru.ru_inblock++;
2198 rabp->b_flags |= B_ASYNC;
2199 rabp->b_flags &= ~B_INVAL;
2201 rabp->b_flags |= B_CKHASH;
2202 rabp->b_ckhashcalc = ckhashfunc;
2204 rabp->b_ioflags &= ~BIO_ERROR;
2205 rabp->b_iocmd = BIO_READ;
2206 if (rabp->b_rcred == NOCRED && cred != NOCRED)
2207 rabp->b_rcred = crhold(cred);
2210 rabp->b_iooffset = dbtob(rabp->b_blkno);
2221 * getblk(). Also starts asynchronous I/O on read-ahead blocks.
2226 * the mapping of logical block number to disk block address is done
2228 * disk block address can be passed using the dblkno parameter. If the
2229 * disk block address is not known, then the same value should be passed
2252 KASSERT(blkno == bp->b_lblkno,
2254 (intmax_t)bp->b_lblkno, (intmax_t)blkno));
2262 if ((bp->b_flags & B_CACHE) == 0) {
2265 PROC_LOCK(td->td_proc);
2266 racct_add_buf(td->td_proc, bp, 0);
2267 PROC_UNLOCK(td->td_proc);
2270 td->td_ru.ru_inblock++;
2271 bp->b_iocmd = BIO_READ;
2272 bp->b_flags &= ~B_INVAL;
2274 bp->b_flags |= B_CKHASH;
2275 bp->b_ckhashcalc = ckhashfunc;
2278 bp->b_xflags |= BX_CVTENXIO;
2279 bp->b_ioflags &= ~BIO_ERROR;
2280 if (bp->b_rcred == NOCRED && cred != NOCRED)
2281 bp->b_rcred = crhold(cred);
2283 bp->b_iooffset = dbtob(bp->b_blkno);
2289 * Attempt to initiate asynchronous I/O on read-ahead blocks.
2323 CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
2324 if ((bp->b_bufobj->bo_flag & BO_DEAD) != 0) {
2325 bp->b_flags |= B_INVAL | B_RELBUF;
2326 bp->b_flags &= ~B_CACHE;
2330 if ((bp->b_flags & B_INVAL) != 0) {
2335 if ((bp->b_flags & B_BARRIER) != 0)
2338 oldflags = bp->b_flags;
2340 KASSERT((bp->b_vflags & BV_BKGRDINPROG) == 0,
2343 vp = bp->b_vp;
2344 vp_md = vp != NULL && (vp->v_vflag & VV_MD) != 0;
2352 bufobj_wref(bp->b_bufobj);
2355 bp->b_flags &= ~B_DONE;
2356 bp->b_ioflags &= ~BIO_ERROR;
2357 bp->b_flags |= B_CACHE;
2358 bp->b_iocmd = BIO_WRITE;
2365 space = runningbufclaim(bp, bp->b_bufsize);
2374 curthread->td_ru.ru_oublock++;
2377 bp->b_iooffset = dbtob(bp->b_blkno);
2392 if ((curthread->td_pflags & TDP_NORUNNINGBUF) == 0 && !vp_md)
2403 struct bufdomain *bd;
2405 bd = &bdomain[bo->bo_domain];
2406 if (bo->bo_dirty.bv_cnt > bd->bd_dirtybufthresh + 10) {
2407 (void) VOP_FSYNC(bp->b_vp, MNT_NOWAIT, curthread);
2409 } else if (bo->bo_dirty.bv_cnt > bd->bd_dirtybufthresh) {
2414 TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) {
2415 if ((nbp->b_vflags & BV_BKGRDINPROG) ||
2428 if (nbp->b_flags & B_CLUSTEROK) {
2458 CTR3(KTR_BUF, "bdwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
2459 KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
2460 KASSERT((bp->b_flags & B_BARRIER) == 0,
2463 if (bp->b_flags & B_INVAL) {
2475 vp = bp->b_vp;
2476 bo = bp->b_bufobj;
2477 if ((td->td_pflags & (TDP_COWINPROGRESS|TDP_INBDFLUSH)) == 0) {
2478 td->td_pflags |= TDP_INBDFLUSH;
2480 td->td_pflags &= ~TDP_INBDFLUSH;
2489 bp->b_flags |= B_CACHE;
2494 * is likely that the indirect block -- or whatever other datastructure
2497 * requesting a sync -- there might not be enough memory to do
2500 if (vp->v_type != VCHR && bp->b_lblkno == bp->b_blkno) {
2501 VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
2513 * the pages are in a delayed write buffer -- the VFS layer
2535 * bdirty() is kinda like bdwrite() - we have to clear B_INVAL which
2536 * might have been set pre-getblk(). Unlike bwrite/bdwrite, bdirty()
2537 * should only be called if the buffer is known-good.
2549 bp, bp->b_vp, bp->b_flags);
2550 KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
2551 KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex == QUEUE_NONE,
2552 ("bdirty: buffer %p still on queue %d", bp, bp->b_qindex));
2553 bp->b_flags &= ~(B_RELBUF);
2554 bp->b_iocmd = BIO_WRITE;
2556 if ((bp->b_flags & B_DELWRI) == 0) {
2557 bp->b_flags |= /* XXX B_DONE | */ B_DELWRI;
2578 CTR3(KTR_BUF, "bundirty(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
2579 KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
2580 KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex == QUEUE_NONE,
2581 ("bundirty: buffer %p still on queue %d", bp, bp->b_qindex));
2583 if (bp->b_flags & B_DELWRI) {
2584 bp->b_flags &= ~B_DELWRI;
2591 bp->b_flags &= ~B_DEFERRED;
2607 bp->b_flags |= B_ASYNC;
2624 bp->b_flags |= B_ASYNC | B_BARRIER;
2641 bp->b_flags |= B_BARRIER;
2699 bp, bp->b_vp, bp->b_flags);
2700 KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)),
2702 KASSERT((bp->b_flags & B_VMIO) != 0 || (bp->b_flags & B_NOREUSE) == 0,
2703 ("brelse: non-VMIO buffer marked NOREUSE"));
2714 if (bp->b_flags & B_MANAGED) {
2719 if (LIST_EMPTY(&bp->b_dep)) {
2720 bp->b_flags &= ~B_IOSTARTED;
2722 KASSERT((bp->b_flags & B_IOSTARTED) == 0,
2726 if ((bp->b_vflags & (BV_BKGRDINPROG | BV_BKGRDERR)) == BV_BKGRDERR) {
2727 BO_LOCK(bp->b_bufobj);
2728 bp->b_vflags &= ~BV_BKGRDERR;
2729 BO_UNLOCK(bp->b_bufobj);
2733 if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) &&
2734 (bp->b_flags & B_INVALONERR)) {
2741 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
2742 bp->b_flags &= ~(B_ASYNC | B_CACHE);
2745 if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) &&
2746 (bp->b_error != ENXIO || !LIST_EMPTY(&bp->b_dep)) &&
2747 !(bp->b_flags & B_INVAL)) {
2754 * contract with the local storage device drivers is that
2764 * non-empty dependencies - the soft updates code might need
2769 bp->b_ioflags &= ~BIO_ERROR;
2771 } else if ((bp->b_flags & (B_NOCACHE | B_INVAL)) ||
2772 (bp->b_ioflags & BIO_ERROR) || (bp->b_bufsize <= 0)) {
2778 bp->b_flags |= B_INVAL;
2779 if (!LIST_EMPTY(&bp->b_dep))
2781 if (bp->b_flags & B_DELWRI)
2783 bp->b_flags &= ~(B_DELWRI | B_CACHE);
2784 if ((bp->b_flags & B_VMIO) == 0) {
2786 if (bp->b_vp)
2800 if (bp->b_flags & B_DELWRI)
2801 bp->b_flags &= ~B_RELBUF;
2811 * buffer is also B_INVAL because it hits the re-dirtying code above.
2821 v_mnt = bp->b_vp != NULL ? bp->b_vp->v_mount : NULL;
2823 if ((bp->b_flags & B_VMIO) && (bp->b_flags & B_NOCACHE ||
2824 (bp->b_ioflags & BIO_ERROR && bp->b_iocmd == BIO_READ)) &&
2825 (v_mnt == NULL || (v_mnt->mnt_vfc->vfc_flags & VFCF_NETWORK) == 0 ||
2826 vn_isdisk(bp->b_vp) || (bp->b_flags & B_DELWRI) == 0)) {
2831 if ((bp->b_flags & (B_INVAL | B_RELBUF)) != 0 ||
2832 (bp->b_flags & (B_DELWRI | B_NOREUSE)) == B_NOREUSE) {
2834 bp->b_flags &= ~B_NOREUSE;
2835 if (bp->b_vp != NULL)
2844 if (bp->b_bufsize == 0 || (bp->b_ioflags & BIO_ERROR) != 0 ||
2845 (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF)) != 0)
2846 bp->b_flags |= B_INVAL;
2847 if (bp->b_flags & B_INVAL) {
2848 if (bp->b_flags & B_DELWRI)
2850 if (bp->b_vp)
2857 if (bp->b_bufsize == 0) {
2862 if (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF) ||
2863 (bp->b_ioflags & BIO_ERROR)) {
2864 bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA);
2865 if (bp->b_vflags & BV_BKGRDINPROG)
2868 bp->b_flags |= B_AGE;
2870 } else if (bp->b_flags & B_DELWRI)
2875 if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
2878 bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_RELBUF | B_DIRECT);
2879 bp->b_xflags &= ~(BX_CVTENXIO);
2900 CTR3(KTR_BUF, "bqrelse(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
2901 KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)),
2910 bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
2911 bp->b_xflags &= ~(BX_CVTENXIO);
2913 if (LIST_EMPTY(&bp->b_dep)) {
2914 bp->b_flags &= ~B_IOSTARTED;
2916 KASSERT((bp->b_flags & B_IOSTARTED) == 0,
2920 if (bp->b_flags & B_MANAGED) {
2921 if (bp->b_flags & B_REMFREE)
2927 if ((bp->b_flags & B_DELWRI) != 0 || (bp->b_vflags & (BV_BKGRDINPROG |
2929 BO_LOCK(bp->b_bufobj);
2930 bp->b_vflags &= ~BV_BKGRDERR;
2931 BO_UNLOCK(bp->b_bufobj);
2934 if ((bp->b_flags & B_DELWRI) == 0 &&
2935 (bp->b_xflags & BX_VNDIRTY))
2937 if ((bp->b_flags & B_NOREUSE) != 0) {
2968 obj = bp->b_bufobj->bo_object;
2969 KASSERT(blockcount_read(&obj->paging_in_progress) >= bp->b_npages,
2971 blockcount_read(&obj->paging_in_progress), bp->b_npages));
2973 vp = bp->b_vp;
2974 VNPASS(vp->v_holdcnt > 0, vp);
2975 VNPASS(vp->v_object != NULL, vp);
2977 foff = bp->b_offset;
2978 KASSERT(bp->b_offset != NOOFFSET,
2982 iosize = bp->b_bcount - bp->b_resid;
2983 for (i = 0; i < bp->b_npages; i++) {
2984 resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff;
2991 m = bp->b_pages[i];
2997 bp->b_pages[i] = m;
2998 } else if ((bp->b_iocmd == BIO_READ) && resid > 0) {
3004 KASSERT((m->dirty & vm_page_bits(foff & PAGE_MASK,
3009 KASSERT(OFF_TO_IDX(foff) == m->pindex,
3011 (intmax_t)foff, (uintmax_t)m->pindex));
3015 iosize -= resid;
3017 vm_object_pip_wakeupn(obj, bp->b_npages);
3020 pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
3021 bp->b_pages, bp->b_npages);
3038 pmap_qremove(trunc_page((vm_offset_t)bp->b_data), bp->b_npages);
3044 * page-aligned then b_data pointer may not be page-aligned.
3048 * supported due to the page granularity bits (m->valid,
3049 * m->dirty, etc...).
3053 flags = (bp->b_flags & B_NOREUSE) != 0 ? VPR_NOREUSE : 0;
3054 obj = bp->b_bufobj->bo_object;
3055 resid = bp->b_bufsize;
3056 poffset = bp->b_offset & PAGE_MASK;
3058 for (i = 0; i < bp->b_npages; i++) {
3059 m = bp->b_pages[i];
3062 bp->b_pages[i] = NULL;
3064 presid = resid > (PAGE_SIZE - poffset) ?
3065 (PAGE_SIZE - poffset) : resid;
3072 resid -= presid;
3076 bp->b_npages = 0;
3080 * Page-granular truncation of an existing VMIO buffer.
3089 if (bp->b_npages == desiredpages)
3094 pmap_qremove((vm_offset_t)trunc_page((vm_offset_t)bp->b_data) +
3095 (desiredpages << PAGE_SHIFT), bp->b_npages - desiredpages);
3102 flags = (bp->b_flags & B_NOREUSE) != 0 ? VPR_NOREUSE : 0;
3103 if ((bp->b_flags & B_DIRECT) != 0) {
3105 obj = bp->b_bufobj->bo_object;
3110 for (i = desiredpages; i < bp->b_npages; i++) {
3111 m = bp->b_pages[i];
3113 bp->b_pages[i] = NULL;
3121 bp->b_npages = desiredpages;
3132 * byte-granular fashion.
3144 obj = bp->b_bufobj->bo_object;
3145 if (bp->b_npages < desiredpages) {
3161 OFF_TO_IDX(bp->b_offset) + bp->b_npages,
3164 &bp->b_pages[bp->b_npages], desiredpages - bp->b_npages);
3165 bp->b_npages = desiredpages;
3172 * byte-granular range ( bcount and size ), not the
3175 * The VM test is against m->valid, which is DEV_BSIZE
3182 toff = bp->b_bcount;
3183 tinc = PAGE_SIZE - ((bp->b_offset + toff) & PAGE_MASK);
3184 while ((bp->b_flags & B_CACHE) && toff < size) {
3187 if (tinc > (size - toff))
3188 tinc = size - toff;
3189 pi = ((bp->b_offset & PAGE_MASK) + toff) >> PAGE_SHIFT;
3190 m = bp->b_pages[pi];
3191 vfs_buf_test_cache(bp, bp->b_offset, toff, tinc, m);
3218 if ((bpa = gbincore(&vp->v_bufobj, lblkno)) == NULL)
3226 if ((bpa->b_flags & (B_DELWRI | B_CLUSTEROK | B_INVAL)) !=
3230 if (bpa->b_bufsize != size)
3237 if ((bpa->b_blkno != bpa->b_lblkno) && (bpa->b_blkno == blkno))
3258 daddr_t lblkno = bp->b_lblkno;
3259 struct vnode *vp = bp->b_vp;
3266 bo = &vp->v_bufobj;
3267 gbflags = (bp->b_data == unmapped_buf) ? GB_UNMAPPED : 0;
3273 if ((vp->v_type == VREG) &&
3274 (vp->v_mount != 0) && /* Only on nodes that have the size info */
3275 (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) {
3276 size = vp->v_mount->mnt_stat.f_iosize;
3282 bp->b_blkno + ((i * size) >> DEV_BSHIFT)) == 0)
3286 if (vfs_bio_clcheck(vp, size, lblkno - j,
3287 bp->b_blkno - ((j * size) >> DEV_BSHIFT)) == 0)
3290 --j;
3297 nwritten = cluster_wbuild(vp, size, lblkno - j, ncl,
3303 bp->b_flags |= B_ASYNC;
3309 nwritten = bp->b_bufsize;
3331 if (maxsize != bp->b_kvasize &&
3356 struct bufdomain *bd;
3366 if (vp == NULL || (vp->v_vflag & (VV_MD | VV_SYSTEM)) != 0 ||
3367 vp->v_type == VCHR)
3372 bd = &bdomain[0];
3374 bd = &bdomain[vp->v_bufobj.bo_domain];
3380 bufspace_reserve(bd, maxsize, metadata) != 0) {
3385 if ((bp = buf_alloc(bd)) == NULL) {
3392 } while (buf_recycle(bd, false) == 0);
3395 bufspace_release(bd, maxsize);
3397 bp->b_flags |= B_INVAL;
3400 bufspace_wait(bd, vp, gbflags, slpflag, slptimeo);
3420 buf_flush(struct vnode *vp, struct bufdomain *bd, int target)
3424 flushed = flushbufqueues(vp, bd, target, 0);
3433 flushbufqueues(vp, bd, target, 1);
3459 struct bufdomain *bd;
3477 &bdomain[i], curproc, NULL, 0, 0, "bufspacedaemon-%d", i);
3485 curthread->td_pflags |= TDP_NORUNNINGBUF | TDP_BUFNEED;
3503 bd = &bdomain[i];
3505 lodirty = bd->bd_numdirtybuffers / 2;
3507 lodirty = bd->bd_lodirtybuffers;
3508 while (bd->bd_numdirtybuffers > lodirty) {
3509 if (buf_flush(NULL, bd,
3510 bd->bd_numdirtybuffers - lodirty) == 0)
3572 flushbufqueues(struct vnode *lvp, struct bufdomain *bd, int target,
3586 bq = &bd->bd_dirtyq;
3589 sentinel->b_qindex = QUEUE_SENTINEL;
3591 TAILQ_INSERT_HEAD(&bq->bq_queue, sentinel, b_freelist);
3598 TAILQ_REMOVE(&bq->bq_queue, sentinel, b_freelist);
3599 TAILQ_INSERT_AFTER(&bq->bq_queue, bp, sentinel,
3612 if (bp->b_qindex == QUEUE_SENTINEL || (lvp != NULL &&
3613 bp->b_vp != lvp)) {
3626 if ((bp->b_vflags & BV_BKGRDINPROG) != 0 ||
3627 (bp->b_flags & B_DELWRI) == 0) {
3631 if (bp->b_flags & B_INVAL) {
3638 if (!LIST_EMPTY(&bp->b_dep) && buf_countdeps(bp, 0)) {
3656 vp = bp->b_vp;
3672 bp, bp->b_vp, bp->b_flags);
3699 TAILQ_REMOVE(&bq->bq_queue, sentinel, b_freelist);
3730 if (incore(&vp->v_bufobj, blkno))
3732 if (vp->v_mount == NULL)
3734 obj = vp->v_object;
3739 if (size > vp->v_mount->mnt_stat.f_iosize)
3740 size = vp->v_mount->mnt_stat.f_iosize;
3741 off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize;
3743 for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
3750 if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK))
3751 tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
3788 if ((bp->b_flags & B_VMIO) == 0 || bp->b_bufsize == 0)
3791 foff = bp->b_offset;
3792 KASSERT(bp->b_offset != NOOFFSET,
3797 for (i = 0; i < bp->b_npages; i++) {
3800 if (eoff > bp->b_offset + bp->b_bufsize)
3801 eoff = bp->b_offset + bp->b_bufsize;
3802 m = bp->b_pages[i];
3804 /* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */
3821 for (i = 0; i < bp->b_npages; i++)
3822 vm_page_test_dirty(bp->b_pages[i]);
3826 * (eoffset - boffset) bytes.
3829 for (i = 0; i < bp->b_npages; i++) {
3830 if (bp->b_pages[i]->dirty)
3833 boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
3835 for (i = bp->b_npages - 1; i >= 0; --i) {
3836 if (bp->b_pages[i]->dirty) {
3840 eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
3846 if (eoffset > bp->b_bcount)
3847 eoffset = bp->b_bcount;
3855 if (bp->b_dirtyoff > boffset)
3856 bp->b_dirtyoff = boffset;
3857 if (bp->b_dirtyend < eoffset)
3858 bp->b_dirtyend = eoffset;
3873 need_mapping = bp->b_data == unmapped_buf &&
3875 need_kva = bp->b_kvabase == unmapped_buf &&
3876 bp->b_data == unmapped_buf &&
3883 if (need_mapping && bp->b_kvabase != unmapped_buf) {
3893 * Calculate the amount of the address space we would reserve
3896 bsize = vn_isdisk(bp->b_vp) ? DEV_BSIZE : bp->b_bufobj->bo_bsize;
3897 KASSERT(bsize != 0, ("bsize == 0, check bo->bo_bsize"));
3911 bufspace_wait(bufdomain(bp), bp->b_vp, gbflags, 0, 0);
3916 bp->b_data = bp->b_kvabase;
3944 * For a non-VMIO buffer, B_CACHE is set to the opposite of B_INVAL for
3948 * If getblk()ing a previously 0-sized invalid buffer, B_CACHE is set
3950 * non-0-sized but invalid, B_CACHE will be cleared.
3972 * the mapping of logical block number to disk block address is done
3974 * disk block address can be passed using the dblkno parameter. If the
3975 * disk block address is not known, then the same value should be passed
3991 if (vp->v_type != VCHR)
4001 bo = &vp->v_bufobj;
4015 if ((bo->bo_flag & BO_NONSTERILE) == 0)
4026 ("getblk: unexpected error %d from buf try-lock", error));
4028 * We failed a buf try-lock.
4043 if (bp->b_bufobj == bo && bp->b_lblkno == blkno)
4060 * Buffer is in-core. If the buffer is not busy nor managed,
4089 * invalid. Otherwise, for a non-VMIO buffer, B_CACHE is set
4093 if (bp->b_flags & B_INVAL)
4094 bp->b_flags &= ~B_CACHE;
4095 else if ((bp->b_flags & (B_VMIO | B_INVAL)) == 0)
4096 bp->b_flags |= B_CACHE;
4097 if (bp->b_flags & B_MANAGED)
4098 MPASS(bp->b_qindex == QUEUE_NONE);
4103 * check for size inconsistencies for non-VMIO case.
4105 if (bp->b_bcount != size) {
4106 if ((bp->b_flags & B_VMIO) == 0 ||
4107 (size > bp->b_kvasize)) {
4108 if (bp->b_flags & B_DELWRI) {
4109 bp->b_flags |= B_NOCACHE;
4112 if (LIST_EMPTY(&bp->b_dep)) {
4113 bp->b_flags |= B_RELBUF;
4116 bp->b_flags |= B_NOCACHE;
4139 KASSERT(bp->b_offset != NOOFFSET,
4153 * to softupdates re-dirtying the buffer. In the latter
4169 if ((bp->b_flags & (B_CACHE|B_DELWRI)) == B_DELWRI) {
4170 bp->b_flags |= B_NOCACHE;
4174 bp->b_flags &= ~B_DONE;
4177 * Buffer is not in-core, create new buffer. The buffer
4190 bsize = vn_isdisk(vp) ? DEV_BSIZE : bo->bo_bsize;
4191 KASSERT(bsize != 0, ("bsize == 0, check bo->bo_bsize"));
4193 vmio = vp->v_object != NULL;
4204 /* Do not allow non-VMIO notmapped buffers. */
4216 if (d_blkno == -1)
4228 * There's an issue on low memory, 4BSD+non-preempt
4237 * so we aren't effectively busy-waiting in a loop
4255 bp->b_lblkno = blkno;
4256 bp->b_blkno = d_blkno;
4257 bp->b_offset = offset;
4263 bp->b_flags |= B_INVAL;
4277 bp->b_flags |= B_VMIO;
4278 KASSERT(vp->v_object == bp->b_bufobj->bo_object,
4279 ("ARGH! different b_bufobj->bo_object %p %p %p\n",
4280 bp, vp->v_object, bp->b_bufobj->bo_object));
4282 bp->b_flags &= ~B_VMIO;
4283 KASSERT(bp->b_bufobj->bo_object == NULL,
4284 ("ARGH! has b_bufobj->bo_object %p %p\n",
4285 bp, bp->b_bufobj->bo_object));
4291 bp->b_flags &= ~B_DONE;
4296 KASSERT(bp->b_bufobj == bo,
4297 ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
4315 (curthread->td_pflags & TDP_BUFNEED) != 0)
4320 bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */
4325 * Truncate the backing store for a non-vmio buffer.
4331 if (bp->b_flags & B_MALLOC) {
4337 free(bp->b_data, M_BIOBUF);
4338 bp->b_data = bp->b_kvabase;
4339 bp->b_flags &= ~B_MALLOC;
4348 * Extend the backing for a non-VMIO buffer.
4358 * and revert to page-allocated memory when the buffer
4366 if (bp->b_bufsize == 0 && newbsize <= PAGE_SIZE/2 &&
4368 bp->b_data = malloc(newbsize, M_BIOBUF, M_WAITOK);
4369 bp->b_flags |= B_MALLOC;
4375 * If the buffer is growing on its other-than-first
4376 * allocation then we revert to the page-allocation
4381 if (bp->b_flags & B_MALLOC) {
4382 origbuf = bp->b_data;
4383 origbufsize = bp->b_bufsize;
4384 bp->b_data = bp->b_kvabase;
4386 bp->b_flags &= ~B_MALLOC;
4389 vm_hold_load_pages(bp, (vm_offset_t) bp->b_data + bp->b_bufsize,
4390 (vm_offset_t) bp->b_data + newbsize);
4392 bcopy(origbuf, bp->b_data, origbufsize);
4400 * memory (in the case of non-VMIO operations) or from an associated
4410 * B_CACHE for the non-VMIO case.
4417 if (bp->b_bcount == size)
4420 KASSERT(bp->b_kvasize == 0 || bp->b_kvasize >= size,
4422 bp, bp->b_kvasize, size));
4425 if ((bp->b_flags & B_VMIO) == 0) {
4426 if ((bp->b_flags & B_MALLOC) == 0)
4432 if (newbsize < bp->b_bufsize)
4434 else if (newbsize > bp->b_bufsize)
4440 num_pages((bp->b_offset & PAGE_MASK) + newbsize);
4442 KASSERT((bp->b_flags & B_MALLOC) == 0,
4447 * 0-length.
4449 if (size == 0 || bp->b_bufsize == 0)
4450 bp->b_flags |= B_CACHE;
4452 if (newbsize < bp->b_bufsize)
4455 else if (size > bp->b_bcount)
4459 bp->b_bcount = size; /* requested buffer size. */
4486 if ((bp->bio_flags & BIO_TRANSIENT_MAPPING) != 0) {
4487 bp->bio_flags &= ~BIO_TRANSIENT_MAPPING;
4488 bp->bio_flags |= BIO_UNMAPPED;
4489 start = trunc_page((vm_offset_t)bp->bio_data);
4490 end = round_page((vm_offset_t)bp->bio_data + bp->bio_length);
4491 bp->bio_data = unmapped_buf;
4492 pmap_qremove(start, atop(end - start));
4493 vmem_free(transient_arena, start, end - start);
4494 atomic_add_int(&inflight_transient_maps, -1);
4496 done = bp->bio_done;
4504 bp->bio_flags |= BIO_DONE;
4521 while ((bp->bio_flags & BIO_DONE) == 0)
4524 if (bp->bio_error != 0)
4525 return (bp->bio_error);
4526 if (!(bp->bio_flags & BIO_ERROR))
4536 bp->bio_error = error;
4537 bp->bio_flags |= BIO_ERROR;
4549 buf_track(bp->bio_track_bp, location);
4563 if (bp->b_iocmd == BIO_READ)
4567 if (bp->b_flags & B_EINTR) {
4568 bp->b_flags &= ~B_EINTR;
4571 if (bp->b_ioflags & BIO_ERROR) {
4572 return (bp->b_error ? bp->b_error : EIO);
4586 * In a non-VMIO bp, B_CACHE will be set on the next getblk()
4604 CTR3(KTR_BUF, "bufdone(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
4607 KASSERT(!(bp->b_flags & B_DONE), ("biodone: bp %p already done", bp));
4610 if (bp->b_iocmd == BIO_WRITE)
4611 dropobj = bp->b_bufobj;
4613 if (bp->b_iodone != NULL) {
4614 biodone = bp->b_iodone;
4615 bp->b_iodone = NULL;
4621 if (bp->b_flags & B_VMIO) {
4627 if (bp->b_iocmd == BIO_READ &&
4628 !(bp->b_flags & (B_INVAL|B_NOCACHE)) &&
4629 !(bp->b_ioflags & BIO_ERROR))
4630 bp->b_flags |= B_CACHE;
4633 if (!LIST_EMPTY(&bp->b_dep))
4635 if ((bp->b_flags & B_CKHASH) != 0) {
4636 KASSERT(bp->b_iocmd == BIO_READ,
4637 ("bufdone: b_iocmd %d not BIO_READ", bp->b_iocmd));
4639 (*bp->b_ckhashcalc)(bp);
4643 * will do a wakeup there if necessary - so no need to do a wakeup
4646 if (bp->b_flags & B_ASYNC) {
4647 if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_RELBUF)) ||
4648 (bp->b_ioflags & BIO_ERROR))
4671 if (!(bp->b_flags & B_VMIO))
4674 obj = bp->b_bufobj->bo_object;
4675 for (i = 0; i < bp->b_npages; i++) {
4676 m = bp->b_pages[i];
4678 m = vm_page_relookup(obj, OFF_TO_IDX(bp->b_offset) + i);
4681 bp->b_pages[i] = m;
4684 pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
4685 bp->b_pages, bp->b_npages);
4691 vm_object_pip_wakeupn(obj, bp->b_npages);
4714 if (eoff > bp->b_offset + bp->b_bcount)
4715 eoff = bp->b_offset + bp->b_bcount;
4722 vm_page_set_valid_range(m, off & PAGE_MASK, eoff - off);
4737 * Start and end offsets in buffer. eoff - soff may not cross a
4744 if (eoff > bp->b_offset + bp->b_bcount)
4745 eoff = bp->b_offset + bp->b_bcount;
4755 (vm_offset_t) (eoff - soff)
4768 for (i = 0; i < bp->b_npages; i++)
4769 vm_page_busy_acquire(bp->b_pages[i], VM_ALLOC_SBUSY);
4777 for (i = 0; i < bp->b_npages; i++)
4778 vm_page_sunbusy(bp->b_pages[i]);
4802 if (!(bp->b_flags & B_VMIO))
4805 obj = bp->b_bufobj->bo_object;
4806 foff = bp->b_offset;
4807 KASSERT(bp->b_offset != NOOFFSET,
4809 if ((bp->b_flags & B_CLUSTER) == 0) {
4810 vm_object_pip_add(obj, bp->b_npages);
4813 if (bp->b_bufsize != 0)
4816 for (i = 0; i < bp->b_npages; i++) {
4817 m = bp->b_pages[i];
4830 * I/O from overwriting potentially dirty VM-backed
4832 * It may not work properly with small-block devices.
4839 (bp->b_flags & B_CACHE) == 0) {
4840 bp->b_pages[i] = bogus_page;
4847 pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
4848 bp->b_pages, bp->b_npages);
4866 if (!(bp->b_flags & B_VMIO))
4874 base += (bp->b_offset & PAGE_MASK);
4875 n = PAGE_SIZE - (base & PAGE_MASK);
4884 for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) {
4885 m = bp->b_pages[i];
4890 size -= n;
4899 * If the specified buffer is a non-VMIO buffer, clear the entire
4914 if ((bp->b_flags & (B_VMIO | B_MALLOC)) != B_VMIO) {
4918 bp->b_flags &= ~B_INVAL;
4919 bp->b_ioflags &= ~BIO_ERROR;
4921 sa = bp->b_offset & PAGE_MASK;
4923 for (i = 0; i < bp->b_npages; i++, sa = 0) {
4924 slide = imin(slide + PAGE_SIZE, bp->b_offset + bp->b_bufsize);
4928 if (bp->b_pages[i] == bogus_page)
4931 zbits = (sizeof(vm_page_bits_t) * NBBY) -
4932 (ea - sa) / DEV_BSIZE;
4934 if ((bp->b_pages[i]->valid & mask) == mask)
4936 if ((bp->b_pages[i]->valid & mask) == 0)
4937 pmap_zero_page_area(bp->b_pages[i], sa, ea - sa);
4940 if ((bp->b_pages[i]->valid & (1 << j)) == 0) {
4941 pmap_zero_page_area(bp->b_pages[i],
4946 vm_page_set_valid_range(bp->b_pages[i], j * DEV_BSIZE,
4947 roundup2(ea - sa, DEV_BSIZE));
4950 bp->b_resid = 0;
4961 bzero(bp->b_data + base, size);
4964 n = PAGE_SIZE - (base & PAGE_MASK);
4965 for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) {
4966 m = bp->b_pages[i];
4971 size -= n;
4988 ("buf %p non-VMIO noreuse", bp));
4991 bp->b_flags |= B_DIRECT;
4993 bp->b_xflags |= BX_ALTDATA;
4994 if ((ioflag & (IO_VMIO | IO_DIRECT)) != 0 && LIST_EMPTY(&bp->b_dep)) {
4995 bp->b_flags |= B_RELBUF;
4997 bp->b_flags |= B_NOREUSE;
5020 * a buffers address space. The pages are anonymous and are
5034 index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
5035 MPASS((bp->b_flags & B_MAXPHYS) == 0);
5036 KASSERT(to - from <= maxbcachebuf,
5047 VM_ALLOC_COUNT((to - pg) >> PAGE_SHIFT) | VM_ALLOC_WAITOK);
5049 bp->b_pages[index] = p;
5051 bp->b_npages = index;
5064 from = round_page((vm_offset_t)bp->b_data + newbsize);
5065 newnpages = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
5066 if (bp->b_npages > newnpages)
5067 pmap_qremove(from, bp->b_npages - newnpages);
5068 for (index = newnpages; index < bp->b_npages; index++) {
5069 p = bp->b_pages[index];
5070 bp->b_pages[index] = NULL;
5074 bp->b_npages = newnpages;
5078 * Map an IO request into kernel virtual address space.
5084 * Note that even if the caller determines that the address space should
5085 * be valid, a race or a smaller-file mapped into a larger space may
5097 MPASS((bp->b_flags & B_MAXPHYS) != 0);
5099 if (bp->b_iocmd == BIO_READ)
5101 pidx = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
5102 (vm_offset_t)uaddr, len, prot, bp->b_pages, PBUF_PAGES);
5104 return (-1);
5105 bp->b_bufsize = len;
5106 bp->b_npages = pidx;
5107 bp->b_offset = ((vm_offset_t)uaddr) & PAGE_MASK;
5109 pmap_qenter((vm_offset_t)bp->b_kvabase, bp->b_pages, pidx);
5110 bp->b_data = bp->b_kvabase + bp->b_offset;
5112 bp->b_data = unmapped_buf;
5127 npages = bp->b_npages;
5129 pmap_qremove(trunc_page((vm_offset_t)bp->b_data), npages);
5130 vm_page_unhold_pages(bp->b_pages, npages);
5132 bp->b_data = unmapped_buf;
5142 bp->b_flags |= B_DONE;
5154 while ((bp->b_flags & B_DONE) == 0)
5172 vp = bp->b_vp;
5173 KASSERT(vp == bo->bo_private, ("Inconsistent vnode bufstrategy"));
5174 KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
5177 KASSERT(i == 0, ("VOP_STRATEGY failed bp=%p vp=%p", bp, bp->b_vp));
5188 bo->bo_domain =
5191 bo->bo_private = private;
5192 TAILQ_INIT(&bo->bo_clean.bv_hd);
5193 pctrie_init(&bo->bo_clean.bv_root);
5194 TAILQ_INIT(&bo->bo_dirty.bv_hd);
5195 pctrie_init(&bo->bo_dirty.bv_root);
5204 bo->bo_numoutput++;
5213 bo->bo_numoutput++;
5223 KASSERT(bo->bo_numoutput > 0, ("bufobj_wdrop non-positive count"));
5224 if ((--bo->bo_numoutput == 0) && (bo->bo_flag & BO_WWAIT)) {
5225 bo->bo_flag &= ~BO_WWAIT;
5226 wakeup(&bo->bo_numoutput);
5239 while (bo->bo_numoutput) {
5240 bo->bo_flag |= BO_WWAIT;
5241 error = msleep(&bo->bo_numoutput, BO_LOCKPTR(bo),
5258 bip->bio_ma = bp->b_pages;
5259 bip->bio_ma_n = bp->b_npages;
5260 bip->bio_data = unmapped_buf;
5261 bip->bio_ma_offset = (vm_offset_t)bp->b_offset & PAGE_MASK;
5262 bip->bio_flags |= BIO_UNMAPPED;
5263 KASSERT(round_page(bip->bio_ma_offset + bip->bio_length) /
5264 PAGE_SIZE == bp->b_npages,
5265 ("Buffer %p too short: %d %lld %d", bp, bip->bio_ma_offset,
5266 (long long)bip->bio_length, bip->bio_ma_n));
5268 bip->bio_data = bp->b_data;
5269 bip->bio_ma = NULL;
5276 if ((bio->bio_flags & BIO_VLIST) != 0)
5277 return (memdesc_vlist((struct bus_dma_segment *)bio->bio_data,
5278 bio->bio_ma_n));
5280 if ((bio->bio_flags & BIO_UNMAPPED) != 0)
5281 return (memdesc_vmpages(bio->bio_ma, bio->bio_bcount,
5282 bio->bio_ma_offset));
5284 return (memdesc_vaddr(bio->bio_data, bio->bio_bcount));
5295 * In contrast to the generic local pager from vm/vnode_pager.c, this
5303 * The only non-trivial issue is that the exclusive busy state for
5305 * incompatible with the VMIO buffer cache's desire to share-busy the
5308 * shared-busy to excl-busy state after the read.
5325 object = vp->v_object;
5326 mp = vp->v_mount;
5328 la = IDX_TO_OFF(ma[count - 1]->pindex);
5329 if (la >= object->un_pager.vnp.vnp_size)
5335 * and the start of the potential read-ahead region.
5338 lpart = la > object->un_pager.vnp.vnp_size;
5339 error = get_blksize(vp, get_lblkno(vp, IDX_TO_OFF(ma[0]->pindex)),
5345 * Calculate read-ahead, behind and total pages.
5348 lb = IDX_TO_OFF(ma[0]->pindex);
5349 pgsin_b = OFF_TO_IDX(lb - rounddown2(lb, bo_bs));
5353 pgsin_a = OFF_TO_IDX(roundup2(la, bo_bs) - la);
5354 if (la + IDX_TO_OFF(pgsin_a) >= object->un_pager.vnp.vnp_size)
5355 pgsin_a = OFF_TO_IDX(roundup2(object->un_pager.vnp.vnp_size,
5356 PAGE_SIZE) - la);
5363 br_flags = (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS)
5371 lbnp = -1;
5383 * parallel read. The shared->excl upgrade loop at
5390 poff = IDX_TO_OFF(m->pindex);
5391 poffe = MIN(poff + PAGE_SIZE, object->un_pager.vnp.vnp_size);
5401 curthread->td_ucred, br_flags, &bp);
5404 if (bp->b_rcred == curthread->td_ucred) {
5405 crfree(bp->b_rcred);
5406 bp->b_rcred = NOCRED;
5408 if (LIST_EMPTY(&bp->b_dep)) {
5410 * Invalidation clears m->valid, but
5423 bp->b_flags |= B_RELBUF;
5425 bp->b_flags &= ~B_NOCACHE;
5432 vm_page_all_valid(m) || i == count - 1,
5434 if (i == count - 1 && lpart) {
5449 ma[i] = vm_page_grab_unlocked(object, ma[i]->pindex,
5458 * Recheck the valid bits and re-read as needed.
5462 * index count - 1 could mean that the page was
5494 (u_int)bp->b_flags, PRINT_BUF_FLAGS,
5495 (u_int)bp->b_xflags, PRINT_BUF_XFLAGS);
5497 (u_int)bp->b_vflags, PRINT_BUF_VFLAGS,
5498 (u_int)bp->b_ioflags, PRINT_BIO_FLAGS);
5503 bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid,
5504 bp->b_bufobj, bp->b_data, (intmax_t)bp->b_blkno,
5505 (intmax_t)bp->b_lblkno, bp->b_vp, bp->b_dep.lh_first);
5507 bp->b_kvabase, bp->b_kvasize);
5508 if (bp->b_npages) {
5510 db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages);
5511 for (i = 0; i < bp->b_npages; i++) {
5513 m = bp->b_pages[i];
5515 db_printf("(%p, 0x%lx, 0x%lx)", m->object,
5516 (u_long)m->pindex,
5520 if ((i + 1) < bp->b_npages)
5527 db_printf("b_io_tracking: b_io_tcnt = %u\n", bp->b_io_tcnt);
5529 i = bp->b_io_tcnt % BUF_TRACKING_SIZE;
5531 if (bp->b_io_tracking[BUF_TRACKING_ENTRY(i - j)] == NULL)
5534 bp->b_io_tracking[BUF_TRACKING_ENTRY(i - j)]);
5537 db_printf("b_io_tracking: %s\n", bp->b_io_tracking);
5543 struct bufdomain *bd;
5551 bd = &bdomain[i];
5553 db_printf("\tfreebufs\t%d\n", bd->bd_freebuffers);
5554 db_printf("\tlofreebufs\t%d\n", bd->bd_lofreebuffers);
5555 db_printf("\thifreebufs\t%d\n", bd->bd_hifreebuffers);
5557 db_printf("\tbufspace\t%ld\n", bd->bd_bufspace);
5558 db_printf("\tmaxbufspace\t%ld\n", bd->bd_maxbufspace);
5559 db_printf("\thibufspace\t%ld\n", bd->bd_hibufspace);
5560 db_printf("\tlobufspace\t%ld\n", bd->bd_lobufspace);
5561 db_printf("\tbufspacethresh\t%ld\n", bd->bd_bufspacethresh);
5563 db_printf("\tnumdirtybuffers\t%d\n", bd->bd_numdirtybuffers);
5564 db_printf("\tlodirtybuffers\t%d\n", bd->bd_lodirtybuffers);
5565 db_printf("\thidirtybuffers\t%d\n", bd->bd_hidirtybuffers);
5566 db_printf("\tdirtybufthresh\t%d\n", bd->bd_dirtybufthresh);
5569 TAILQ_FOREACH(bp, &bd->bd_cleanq->bq_queue, b_freelist)
5570 total += bp->b_bufsize;
5572 bd->bd_cleanq->bq_len, total);
5574 TAILQ_FOREACH(bp, &bd->bd_dirtyq.bq_queue, b_freelist)
5575 total += bp->b_bufsize;
5577 bd->bd_dirtyq.bq_len, total);
5578 db_printf("\twakeup\t\t%d\n", bd->bd_wanted);
5579 db_printf("\tlim\t\t%d\n", bd->bd_lim);
5582 db_printf("%d, ", bd->bd_subq[j].bq_len);
5588 if (bp->b_domain == i && BUF_ISLOCKED(bp)) {
5590 total += bp->b_bufsize;
5598 if (bp->b_domain == i) {
5600 total += bp->b_bufsize;
5634 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_clean.bv_hd, b_bobufs) {
5639 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) {
5657 if (bp->b_qindex == QUEUE_EMPTY)