vm_pageout.c - OpenGrok cross reference for /freebsd-src/sys/vm/vm

Lines Matching +full:num +full:- +full:ss +full:- +full:bits
1 /*-
2  * SPDX-License-Identifier: (BSD-4-Clause AND MIT-CMU)
14  * The Mach Operating System project at Carnegie-Mellon University.
45  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
65  *  Pittsburgh PA 15213-3890
72  *	The proverbial page-out daemon.
153     "Panic on the given number of out-of-memory errors instead of "
183     "back-to-back calls to oom detector to start OOM");
203     "system-wide limit to user-wired page count");
205 static u_int isqrt(u_int num);
219 vm_pageout_init_scan(struct scan_state *ss, struct vm_pagequeue *pq,
224 	KASSERT((marker->a.flags & PGA_ENQUEUED) == 0,
228 		TAILQ_INSERT_HEAD(&pq->pq_pl, marker, plinks.q);
230 		TAILQ_INSERT_AFTER(&pq->pq_pl, after, marker, plinks.q);
233 	vm_batchqueue_init(&ss->bq);
234 	ss->pq = pq;
235 	ss->marker = marker;
236 	ss->maxscan = maxscan;
237 	ss->scanned = 0;
242 vm_pageout_end_scan(struct scan_state *ss)
246 	pq = ss->pq;
248 	KASSERT((ss->marker->a.flags & PGA_ENQUEUED) != 0,
249 	    ("marker %p not enqueued", ss->marker));
251 	TAILQ_REMOVE(&pq->pq_pl, ss->marker, plinks.q);
252 	vm_page_aflag_clear(ss->marker, PGA_ENQUEUED);
253 	pq->pq_pdpages += ss->scanned;
268 vm_pageout_collect_batch(struct scan_state *ss, const bool dequeue)
273 	marker = ss->marker;
274 	pq = ss->pq;
276 	KASSERT((marker->a.flags & PGA_ENQUEUED) != 0,
277 	    ("marker %p not enqueued", ss->marker));
281 	    ss->scanned < ss->maxscan && ss->bq.bq_cnt < VM_BATCHQUEUE_SIZE;
282 	    m = n, ss->scanned++) {
284 		if ((m->flags & PG_MARKER) == 0) {
285 			KASSERT((m->a.flags & PGA_ENQUEUED) != 0,
287 			KASSERT((m->flags & PG_FICTITIOUS) == 0,
289 			KASSERT((m->oflags & VPO_UNMANAGED) == 0,
294 		(void)vm_batchqueue_insert(&ss->bq, m);
296 			TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
300 	TAILQ_REMOVE(&pq->pq_pl, marker, plinks.q);
304 		TAILQ_INSERT_TAIL(&pq->pq_pl, marker, plinks.q);
306 		vm_pagequeue_cnt_add(pq, -ss->bq.bq_cnt);
314 vm_pageout_next(struct scan_state *ss, const bool dequeue)
317 	if (ss->bq.bq_cnt == 0)
318 		vm_pageout_collect_batch(ss, dequeue);
319 	return (vm_batchqueue_pop(&ss->bq));
353 		if (m->dirty != 0 && vm_page_in_laundry(m) &&
369 	vm_page_t mc[2 * vm_pageout_page_count - 1];
372 	VM_OBJECT_ASSERT_WLOCKED(m->object);
376 	alignment = m->pindex % vm_pageout_page_count;
386 	 * align the clusters (which leaves sporadic out-of-order
399 		if (alignment == pageout_count - 1 && num_ends == 0)
407 		mc[--page_base] = m;
410 	m = mc[page_base + pageout_count - 1];
428  * vm_pageout_flush() - launder the given pages
431  *	I/O ( i.e. busy the page ), mark it read-only, and bump the object
445 	vm_object_t object = mc[0]->object;
454 	 * valid and read-only.
456 	 * We do not have to fixup the clean/dirty bits here... we can
459 	 * NOTE! mc[i]->dirty may be partial or fragmented due to an
466 		KASSERT((mc[i]->a.flags & PGA_WRITEABLE) == 0,
474 	runlen = count - mreq;
520 			if ((object->flags & OBJ_SWAP) != 0 &&
526 			if (eio != NULL && i >= mreq && i - mreq < runlen)
530 			if (i >= mreq && i - mreq < runlen)
531 				runlen = i - mreq;
583 	object = m->object;
592 	 * pageout daemon, but the new low-memory handling
601 	if (object->type == OBJT_VNODE) {
603 		vp = object->handle;
604 		if (vp->v_type == VREG &&
613 		pindex = m->pindex;
626 		if (vp->v_object != object) {
638 		if (!vm_page_in_laundry(m) || m->object != object ||
639 		    m->pindex != pindex || m->dirty == 0) {
694 	struct scan_state ss;
722 	marker = &vmd->vmd_markers[queue];
723 	pq = &vmd->vmd_pagequeues[queue];
725 	vm_pageout_init_scan(&ss, pq, marker, NULL, pq->pq_cnt);
726 	while (launder > 0 && (m = vm_pageout_next(&ss, false)) != NULL) {
727 		if (__predict_false((m->flags & PG_MARKER) != 0))
742 		if (object == NULL || object != m->object) {
745 			object = atomic_load_ptr(&m->object);
750 			/* Depends on type-stability. */
752 			if (__predict_false(m->object != object)) {
780 		refs = object->ref_count != 0 ? pmap_ts_referenced(m) : 0;
800 			} else if (object->ref_count != 0) {
827 					launder--;
830 			} else if ((object->flags & OBJ_DEAD) == 0) {
840 		 * If the page appears to be clean at the machine-independent
846 		if (object->ref_count != 0) {
848 			if (m->dirty == 0 && !vm_page_try_remove_all(m))
858 		if (m->dirty == 0) {
862 			 * manipulating the page, check for a last-second
869 		} else if ((object->flags & OBJ_DEAD) == 0) {
870 			if ((object->flags & OBJ_SWAP) != 0)
894 				launder -= numpagedout;
895 				ss.scanned += numpagedout;
911 	vm_pageout_end_scan(&ss);
926 	return (starting_target - launder);
933 isqrt(u_int num)
937 	bit = num != 0 ? (1u << ((fls(num) - 1) & ~1)) : 0;
942 		if (num >= tmp) {
943 			num -= tmp;
967 	pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
968 	KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
1015 		launder = target / shortfall_cycle--;
1033 		 * that the threshold is non-zero after an inactive queue
1037 		nclean = vmd->vmd_free_count +
1038 		    vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt;
1039 		ndirty = vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt;
1041 		    vmd->vmd_free_target - vmd->vmd_free_min)) >= nclean) {
1042 			target = vmd->vmd_background_launder_target;
1046 		 * We have a non-zero background laundering target.  If we've
1057 			} else if (last_target - target >=
1074 			target -= min(vm_pageout_launder(vmd, launder,
1085 		if (target == 0 && vmd->vmd_laundry_request == VM_LAUNDRY_IDLE)
1086 			(void)mtx_sleep(&vmd->vmd_laundry_request,
1094 		if (vmd->vmd_laundry_request == VM_LAUNDRY_SHORTFALL &&
1097 			    vmd->vmd_pageout_deficit;
1103 			vmd->vmd_laundry_request = VM_LAUNDRY_IDLE;
1104 		nfreed += vmd->vmd_clean_pages_freed;
1105 		vmd->vmd_clean_pages_freed = 0;
1127 	shortage = vmd->vmd_inactive_target + vm_paging_target(vmd) -
1128 	    (vmd->vmd_pagequeues[PQ_INACTIVE].pq_cnt +
1129 	    vmd->vmd_pagequeues[PQ_LAUNDRY].pq_cnt / act_scan_laundry_weight);
1136  * small portion of the queue in order to maintain quasi-LRU.
1141 	struct scan_state ss;
1150 	marker = &vmd->vmd_markers[PQ_ACTIVE];
1151 	pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
1160 		min_scan = pq->pq_cnt;
1161 		min_scan *= scan_tick - vmd->vmd_last_active_scan;
1165 	if (min_scan > 0 || (page_shortage > 0 && pq->pq_cnt > 0))
1166 		vmd->vmd_last_active_scan = scan_tick;
1170 	 * the per-page activity counter and use it to identify deactivation
1181 	max_scan = page_shortage > 0 ? pq->pq_cnt : min_scan;
1183 	vm_pageout_init_scan(&ss, pq, marker, &vmd->vmd_clock[0], max_scan);
1184 	while ((m = vm_pageout_next(&ss, false)) != NULL) {
1185 		if (__predict_false(m == &vmd->vmd_clock[1])) {
1187 			TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[0], plinks.q);
1188 			TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[1], plinks.q);
1189 			TAILQ_INSERT_HEAD(&pq->pq_pl, &vmd->vmd_clock[0],
1191 			TAILQ_INSERT_TAIL(&pq->pq_pl, &vmd->vmd_clock[1],
1193 			max_scan -= ss.scanned;
1194 			vm_pageout_end_scan(&ss);
1197 		if (__predict_false((m->flags & PG_MARKER) != 0))
1213 		object = atomic_load_ptr(&m->object);
1221 		if ((m->a.flags & PGA_SWAP_FREE) != 0 &&
1223 			if (m->object == object)
1240 		 * 1) The count was transitioning to zero, but we saw a non-
1247 		refs = object->ref_count != 0 ? pmap_ts_referenced(m) : 0;
1276 				new.act_count -= min(new.act_count,
1321 				} else if (m->dirty == 0) {
1335 		page_shortage -= ps_delta;
1338 	TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[0], plinks.q);
1339 	TAILQ_INSERT_AFTER(&pq->pq_pl, marker, &vmd->vmd_clock[0], plinks.q);
1340 	vm_pageout_end_scan(&ss);
1361  * Re-add stuck pages to the inactive queue.  We will examine them again
1367 vm_pageout_reinsert_inactive(struct scan_state *ss, struct vm_batchqueue *bq,
1375 	marker = ss->marker;
1376 	pq = ss->pq;
1396 	struct scan_state ss;
1420 	 * entire queue.  (Note that m->a.act_count is not used to make
1426 	pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
1428 	vm_pageout_init_scan(&ss, pq, marker, NULL, pq->pq_cnt);
1436 		if (object != NULL && vm_batchqueue_empty(&ss.bq)) {
1441 		m = vm_pageout_next(&ss, true);
1444 		KASSERT((m->flags & PG_MARKER) == 0,
1459 		if (object == NULL || object != m->object) {
1462 			object = atomic_load_ptr(&m->object);
1467 			/* Depends on type-stability. */
1469 			if (__predict_false(m->object != object)) {
1490 		if ((m->a.flags & PGA_SWAP_FREE) != 0)
1511 		refs = object->ref_count != 0 ? pmap_ts_referenced(m) : 0;
1531 			} else if (object->ref_count != 0) {
1552 			} else if ((object->flags & OBJ_DEAD) == 0) {
1563 		 * If the page appears to be clean at the machine-independent
1569 		if (object->ref_count != 0) {
1571 			if (m->dirty == 0 && !vm_page_try_remove_all(m))
1582 		if (m->dirty == 0) {
1586 			 * manipulating the page, check for a last-second
1598 			m->a.queue = PQ_NONE;
1600 			page_shortage--;
1603 		if ((object->flags & OBJ_DEAD) == 0)
1609 		vm_pageout_reinsert_inactive(&ss, &rq, m);
1613 	vm_pageout_reinsert_inactive(&ss, &rq, NULL);
1614 	vm_pageout_reinsert_inactive(&ss, &ss.bq, NULL);
1616 	vm_pageout_end_scan(&ss);
1622 	atomic_add_int(&vmd->vmd_addl_shortage, addl_page_shortage);
1625 	atomic_add_int(&vmd->vmd_inactive_us,
1627 	atomic_add_int(&vmd->vmd_inactive_freed,
1628 	    starting_page_shortage - page_shortage);
1640 	vmd->vmd_inactive_shortage = shortage;
1647 	if ((threads = vmd->vmd_inactive_threads) > 1 &&
1648 	    vmd->vmd_helper_threads_enabled &&
1649 	    vmd->vmd_inactive_pps != 0 &&
1650 	    shortage > vmd->vmd_inactive_pps / VM_INACT_SCAN_RATE / 4) {
1651 		vmd->vmd_inactive_shortage /= threads;
1654 		blockcount_acquire(&vmd->vmd_inactive_starting, threads - 1);
1655 		blockcount_acquire(&vmd->vmd_inactive_running, threads - 1);
1656 		wakeup(&vmd->vmd_inactive_shortage);
1661 	vm_pageout_scan_inactive(vmd, vmd->vmd_inactive_shortage + slop);
1667 	blockcount_wait(&vmd->vmd_inactive_running, NULL, "vmpoid", PVM);
1668 	freed = atomic_readandclear_int(&vmd->vmd_inactive_freed);
1672 	 * Calculate the per-thread paging rate with an exponential decay of
1676 	us = max(atomic_readandclear_int(&vmd->vmd_inactive_us), 1);
1682 	vmd->vmd_inactive_pps = (vmd->vmd_inactive_pps / 2) + (pps / 2);
1684 	return (shortage - freed);
1704 	deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
1711 	addl_page_shortage = atomic_readandclear_int(&vmd->vmd_addl_shortage);
1725 		pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
1727 		if (vmd->vmd_laundry_request == VM_LAUNDRY_IDLE &&
1728 		    (pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled))) {
1730 				vmd->vmd_laundry_request = VM_LAUNDRY_SHORTFALL;
1732 			} else if (vmd->vmd_laundry_request !=
1734 				vmd->vmd_laundry_request =
1736 			wakeup(&vmd->vmd_laundry_request);
1738 		vmd->vmd_clean_pages_freed +=
1739 		    starting_page_shortage - page_shortage;
1772 		vmd->vmd_oom_seq = 0;
1774 		vmd->vmd_oom_seq++;
1775 	if (vmd->vmd_oom_seq < vm_pageout_oom_seq) {
1776 		if (vmd->vmd_oom) {
1777 			vmd->vmd_oom = false;
1787 	vmd->vmd_oom_seq = 0;
1789 	if (vmd->vmd_oom)
1792 	vmd->vmd_oom = true;
1794 	if (old_vote != vm_ndomains - 1)
1810 	vmd->vmd_oom = false;
1828  * efforts to write-back vnode-backed pages may have stalled.  This
1831  * belonging to vnode-backed objects are counted, because they might
1847 	map = &vmspace->vm_map;
1849 	sx_assert(&map->lock, SA_LOCKED);
1852 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
1854 		obj = entry->object.vm_object;
1857 		if ((entry->eflags & MAP_ENTRY_NEEDS_COPY) != 0 &&
1858 		    obj->ref_count != 1)
1860 		if (obj->type == OBJT_PHYS || obj->type == OBJT_VNODE ||
1861 		    (obj->flags & OBJ_SWAP) != 0)
1862 			res += obj->resident_page_count;
1888 	 * processes of middle-size, like buildworld, all of them
1891 	 * To avoid killing too many processes, rate-limit OOMs
1892 	 * initiated by vm_fault() time-outs on the waits for free
1898 	    (u_int)(now - vm_oom_ratelim_last) < hz * vm_oom_pf_secs) {
1922 		if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC |
1924 		    p->p_pid == 1 || P_KILLED(p) ||
1925 		    (p->p_pid < 48 && swap_pager_avail != 0)) {
1930 		 * If the process is in a non-running type state,
1961 		if (!vm_map_trylock_read(&vm->vm_map)) {
1970 		vm_map_unlock_read(&vm->vm_map);
2003 		if (vm_panic_on_oom != 0 && --vm_panic_on_oom == 0)
2016  * Return true if the free page count should be re-evaluated.
2028 	while ((u_int)(ticks - last) / hz >= lowmem_period) {
2055 	 * UMA reclaim worker has its own rate-limiting mechanism, so don't
2083 	KASSERT(vmd->vmd_segs != 0, ("domain without segments"));
2084 	vmd->vmd_last_active_scan = ticks;
2097 		atomic_store_int(&vmd->vmd_pageout_wanted, 0);
2102 		if (vm_paging_needed(vmd, vmd->vmd_free_count)) {
2116 			if (mtx_sleep(&vmd->vmd_pageout_wanted,
2123 		atomic_store_int(&vmd->vmd_pageout_wanted, 1);
2131 		shortage = pidctrl_daemon(&vmd->vmd_pid, vmd->vmd_free_count);
2133 			ofree = vmd->vmd_free_count;
2134 			if (vm_pageout_lowmem() && vmd->vmd_free_count > ofree)
2135 				shortage -= min(vmd->vmd_free_count - ofree,
2167 		msleep(&vmd->vmd_inactive_shortage,
2169 		blockcount_release(&vmd->vmd_inactive_starting, 1);
2172 		vm_pageout_scan_inactive(vmd, vmd->vmd_inactive_shortage);
2179 		blockcount_release(&vmd->vmd_inactive_running, 1);
2188 	if (VM_DOMAIN_EMPTY(vmd->vmd_domain))
2192 	 * Semi-arbitrarily constrain pagedaemon threads to less than half the
2201 	domain_cpus = CPU_COUNT(&cpuset_domain[vmd->vmd_domain]);
2207 			eligible_cpus -= CPU_COUNT(&cpuset_domain[i]);
2211 	 * corresponding to the fraction of pagedaemon-eligible CPUs in the
2230 	vmd->vmd_interrupt_free_min = 2;
2237 	vmd->vmd_pageout_free_min = 2 * MAXBSIZE / PAGE_SIZE +
2238 	    vmd->vmd_interrupt_free_min;
2239 	vmd->vmd_free_reserved = vm_pageout_page_count +
2240 	    vmd->vmd_pageout_free_min + vmd->vmd_page_count / 768;
2241 	vmd->vmd_free_min = vmd->vmd_page_count / 200;
2242 	vmd->vmd_free_severe = vmd->vmd_free_min / 2;
2243 	vmd->vmd_free_target = 4 * vmd->vmd_free_min + vmd->vmd_free_reserved;
2244 	vmd->vmd_free_min += vmd->vmd_free_reserved;
2245 	vmd->vmd_free_severe += vmd->vmd_free_reserved;
2246 	vmd->vmd_inactive_target = (3 * vmd->vmd_free_target) / 2;
2247 	if (vmd->vmd_inactive_target > vmd->vmd_free_count / 3)
2248 		vmd->vmd_inactive_target = vmd->vmd_free_count / 3;
2254 	vmd->vmd_pageout_wakeup_thresh = (vmd->vmd_free_target / 10) * 9;
2261 	vmd->vmd_background_launder_target = (vmd->vmd_free_target -
2262 	    vmd->vmd_free_min) / 10;
2265 	pidctrl_init(&vmd->vmd_pid, hz / VM_INACT_SCAN_RATE,
2266 	    vmd->vmd_free_target, PIDCTRL_BOUND,
2268 	oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(vmd->vmd_oid), OID_AUTO,
2270 	pidctrl_init_sysctl(&vmd->vmd_pid, SYSCTL_CHILDREN(oid));
2272 	vmd->vmd_inactive_threads = get_pageout_threads_per_domain(vmd);
2273 	SYSCTL_ADD_BOOL(NULL, SYSCTL_CHILDREN(vmd->vmd_oid), OID_AUTO,
2275 	    &vmd->vmd_helper_threads_enabled, 0,
2276 	    "Enable multi-threaded inactive queue scanning");
2294 		vm_cnt.v_free_reserved += vmd->vmd_free_reserved;
2295 		vm_cnt.v_free_target += vmd->vmd_free_target;
2296 		vm_cnt.v_free_min += vmd->vmd_free_min;
2297 		vm_cnt.v_inactive_target += vmd->vmd_inactive_target;
2298 		vm_cnt.v_pageout_free_min += vmd->vmd_pageout_free_min;
2299 		vm_cnt.v_interrupt_free_min += vmd->vmd_interrupt_free_min;
2300 		vm_cnt.v_free_severe += vmd->vmd_free_severe;
2301 		freecount += vmd->vmd_free_count;
2313 	 * Set the maximum number of user-wired virtual pages.  Historically the
2315 	 * may also request user-wired memory.
2336 	for (first = -1, i = 0; i < vm_ndomains; i++) {
2343 		if (first == -1)
2352 		pageout_threads = VM_DOMAIN(i)->vmd_inactive_threads;
2353 		for (j = 0; j < pageout_threads - 1; j++) {
2370 	snprintf(td->td_name, sizeof(td->td_name), "dom%d", first);
2387 	if (atomic_fetchadd_int(&vmd->vmd_pageout_wanted, 1) == 0) {
2389 		atomic_store_int(&vmd->vmd_pageout_wanted, 1);
2390 		wakeup(&vmd->vmd_pageout_wanted);