xref: /dflybsd-src/sys/vm/vm_page2.h (revision e91e64c7af5788faa55682cd78c0442c83d5d6d5)
1 /*-
2  * Copyright (c) 1982, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)vmmeter.h	8.2 (Berkeley) 7/10/94
30  * $FreeBSD: src/sys/sys/vmmeter.h,v 1.21.2.2 2002/10/10 19:28:21 dillon Exp $
31  */
32 
33 #ifndef _VM_VM_PAGE2_H_
34 #define _VM_VM_PAGE2_H_
35 
36 #ifdef _KERNEL
37 
38 #ifndef _SYS_VMMETER_H_
39 #include <sys/vmmeter.h>
40 #endif
41 #ifndef _SYS_QUEUE_H_
42 #include <sys/queue.h>
43 #endif
44 #ifndef _VM_VM_PAGE_H_
45 #include <vm/vm_page.h>
46 #endif
47 #ifndef _SYS_SPINLOCK_H_
48 #include <sys/spinlock.h>
49 #endif
50 #ifndef _SYS_SPINLOCK2_H_
51 #include <sys/spinlock2.h>
52 #endif
53 
54 /*
55  * SMP NOTE
56  *
57  * VM fault rates are highly dependent on SMP locking conflicts and, on
58  * multi-socket systems, cache mastership changes for globals due to atomic
59  * ops (even simple atomic_add_*() calls).  Cache mastership changes can
60  * limit the aggregate fault rate.
61  *
62  * For this reason we go through some hoops to access VM statistics for
63  * low-memory handling, pageout, and other triggers.  Each cpu collects
64  * adjustments in gd->gd_vmstats_adj.  These get rolled up into the global
65  * vmstats structure.  The global vmstats structure is then pulled into
66  * gd->gd_vmstats by each cpu when it needs it.  Critical path checks always
67  * use the pcpu gd->gd_vmstats structure.
68  */
69 /*
70  * Return TRUE if we are under our severe low-free-pages threshold
71  *
72  * This causes user processes to stall to avoid exhausting memory that
73  * the kernel might need.
74  *
75  * reserved < severe < minimum < wait < start < target1 < target2
76  */
77 static __inline
78 int
79 vm_paging_severe(void)
80 {
81 	globaldata_t gd = mycpu;
82 
83 	if (__predict_false(gd->gd_vmstats.v_free_severe >
84 			    gd->gd_vmstats.v_free_count +
85 			    gd->gd_vmstats.v_cache_count))
86 	{
87 		return 1;
88 	}
89 	if (__predict_false(gd->gd_vmstats.v_free_reserved >
90 			    gd->gd_vmstats.v_free_count))
91 	{
92 		return 1;
93 	}
94 	return 0;
95 }
96 
97 /*
98  * Return TRUE if we are under our minimum low-free-pages threshold.  We
99  * will not count (donotcount) free pages as being free (used mainly for
100  * hystersis tests).
101  *
102  * This will cause most normal page faults to block and activate the
103  * pageout daemon.
104  *
105  * The pageout daemon should already be active due to vm_paging_start(n)
106  * and will typically continue running until it hits target2
107  *
108  * reserved < severe < minimum < wait < start < target1 < target2
109  */
110 static __inline
111 int
112 vm_paging_min_dnc(long donotcount)
113 {
114 	globaldata_t gd = mycpu;
115 
116 	if (__predict_false(gd->gd_vmstats.v_free_min + donotcount >
117 			    (gd->gd_vmstats.v_free_count +
118 			     gd->gd_vmstats.v_cache_count)))
119 	{
120 		return 1;
121 	}
122 	if (__predict_false(gd->gd_vmstats.v_free_reserved >
123 			    gd->gd_vmstats.v_free_count))
124 	{
125 		return 1;
126 	}
127 	return 0;
128 }
129 
130 static __inline
131 int
132 vm_paging_min(void)
133 {
134 	return vm_paging_min_dnc(0);
135 }
136 
137 /*
138  * Return TRUE if nominal userland / VM-system allocations should slow
139  * down (but not stop) due to low free pages in the system.  This is
140  * typically 1/2 way between min and start.
141  *
142  * reserved < severe < minimum < wait < start < target1 < target2
143  */
144 static __inline
145 int
146 vm_paging_wait(void)
147 {
148 	globaldata_t gd = mycpu;
149 
150 	if (__predict_false(gd->gd_vmstats.v_paging_wait >
151 			    (gd->gd_vmstats.v_free_count +
152 			     gd->gd_vmstats.v_cache_count)))
153         {
154 		return 1;
155 	}
156 	if (__predict_false(gd->gd_vmstats.v_free_reserved >
157 			    gd->gd_vmstats.v_free_count))
158 	{
159 		return 1;
160 	}
161 	return 0;
162 }
163 
164 /*
165  * Return TRUE if the pageout daemon should be started up or continue
166  * running.  Available pages have dropped to a level where we need to
167  * think about freeing some up.
168  *
169  * Also handles edge cases for required 'actually-free' pages.
170  *
171  * reserved < severe < minimum < wait < start < target1 < target2
172  */
173 static __inline
174 int
175 vm_paging_start(int adj)
176 {
177 	globaldata_t gd = mycpu;
178 
179 	if (__predict_false(gd->gd_vmstats.v_paging_start >
180 			    (gd->gd_vmstats.v_free_count +
181 			     gd->gd_vmstats.v_cache_count + adj)))
182 	{
183 		return 1;
184 	}
185 	if (__predict_false(gd->gd_vmstats.v_free_min >
186 			    gd->gd_vmstats.v_free_count + adj))
187 	{
188 		return 1;
189 	}
190 	if (__predict_false(gd->gd_vmstats.v_free_reserved >
191 			    gd->gd_vmstats.v_free_count))
192 	{
193 		return 1;
194 	}
195 	return 0;
196 }
197 
198 /*
199  * Return TRUE if the pageout daemon has not yet reached its initial target.
200  * The pageout daemon works hard to reach target1.
201  *
202  * reserved < severe < minimum < wait < start < target1 < target2
203  */
204 static __inline
205 int
206 vm_paging_target1(void)
207 {
208 	globaldata_t gd = mycpu;
209 
210 	if (__predict_false(gd->gd_vmstats.v_paging_target1 >
211 			    (gd->gd_vmstats.v_free_count +
212 			     gd->gd_vmstats.v_cache_count)))
213 	{
214 		return 1;
215 	}
216 	if (__predict_false(gd->gd_vmstats.v_free_reserved >
217 			    gd->gd_vmstats.v_free_count))
218 	{
219 		return 1;
220 	}
221 	return 0;
222 }
223 
224 static __inline
225 long
226 vm_paging_target1_count(void)
227 {
228 	globaldata_t gd = mycpu;
229 	long delta;
230 
231 	delta = gd->gd_vmstats.v_paging_target1 -
232 		(gd->gd_vmstats.v_free_count + gd->gd_vmstats.v_cache_count);
233 	return delta;
234 }
235 
236 /*
237  * Return TRUE if the pageout daemon has not yet reached its final target.
238  * The pageout daemon takes it easy on its way between target1 and target2.
239  *
240  * reserved < severe < minimum < wait < start < target1 < target2
241  */
242 static __inline
243 int
244 vm_paging_target2(void)
245 {
246 	globaldata_t gd = mycpu;
247 
248 	if (__predict_false(gd->gd_vmstats.v_paging_target2 >
249 			    (gd->gd_vmstats.v_free_count +
250 			     gd->gd_vmstats.v_cache_count)))
251 	{
252 		return 1;
253 	}
254 	if (__predict_false(gd->gd_vmstats.v_free_reserved >
255 			    gd->gd_vmstats.v_free_count))
256 	{
257 		return 1;
258 	}
259 	return 0;
260 }
261 
262 static __inline
263 long
264 vm_paging_target2_count(void)
265 {
266 	globaldata_t gd = mycpu;
267 	long delta;
268 
269 	delta = gd->gd_vmstats.v_paging_target2 -
270 		(gd->gd_vmstats.v_free_count + gd->gd_vmstats.v_cache_count);
271 	return delta;
272 }
273 
274 /*
275  * Returns TRUE if additional pages must be deactivated, either during a
276  * pageout operation or during the page stats scan.
277  *
278  * Inactive tests are used in two places.  During heavy paging the
279  * inactive_target is used to refill the inactive queue in staged.
280  * Those pages are then ultimately flushed and moved to the cache or free
281  * queues.
282  *
283  * The inactive queue is also used to manage scans to update page stats
284  * (m->act_count).  The page stats scan occurs lazily in small batches to
285  * update m->act_count for pages in the active queue and to move pages
286  * (limited by inactive_target) to the inactive queue.  Page stats scanning
287  * and active deactivations only run while the inactive queue is below target.
288  * After this, additional page stats scanning just to update m->act_count
289  * (but not do further deactivations) continues to run for a limited period
290  * of time after any pageout daemon activity.
291  */
292 static __inline
293 int
294 vm_paging_inactive(void)
295 {
296 	globaldata_t gd = mycpu;
297 
298 	if (__predict_false((gd->gd_vmstats.v_free_count +
299 			     gd->gd_vmstats.v_cache_count +
300 			     gd->gd_vmstats.v_inactive_count) <
301 			    (gd->gd_vmstats.v_free_min +
302 			     gd->gd_vmstats.v_inactive_target)))
303 	{
304 		return 1;
305 	}
306 	return 0;
307 }
308 
309 /*
310  * Return number of pages that need to be deactivated to achieve the inactive
311  * target as a positive number.  A negative number indicates that there are
312  * already a sufficient number of inactive pages.
313  */
314 static __inline
315 long
316 vm_paging_inactive_count(void)
317 {
318 	globaldata_t gd = mycpu;
319 	long delta;
320 
321 	delta = (gd->gd_vmstats.v_free_min + gd->gd_vmstats.v_inactive_target) -
322 		(gd->gd_vmstats.v_free_count + gd->gd_vmstats.v_cache_count +
323 		 gd->gd_vmstats.v_inactive_count);
324 
325 	return delta;
326 }
327 
328 /*
329  * Clear dirty bits in the VM page but truncate the
330  * end to a DEV_BSIZE'd boundary.
331  *
332  * Used when reading data in, typically via getpages.
333  * The partial device block at the end of the truncation
334  * range should not lose its dirty bit.
335  *
336  * NOTE: This function does not clear the pmap modified bit.
337  */
338 static __inline
339 void
340 vm_page_clear_dirty_end_nonincl(vm_page_t m, int base, int size)
341 {
342     size = (base + size) & ~DEV_BMASK;
343     if (base < size)
344 	vm_page_clear_dirty(m, base, size - base);
345 }
346 
347 /*
348  * Clear dirty bits in the VM page but truncate the
349  * beginning to a DEV_BSIZE'd boundary.
350  *
351  * Used when truncating a buffer.  The partial device
352  * block at the beginning of the truncation range
353  * should not lose its dirty bit.
354  *
355  * NOTE: This function does not clear the pmap modified bit.
356  */
357 static __inline
358 void
359 vm_page_clear_dirty_beg_nonincl(vm_page_t m, int base, int size)
360 {
361     size += base;
362     base = (base + DEV_BMASK) & ~DEV_BMASK;
363     if (base < size)
364 	vm_page_clear_dirty(m, base, size - base);
365 }
366 
367 static __inline
368 void
369 vm_page_spin_lock(vm_page_t m)
370 {
371     spin_lock(&m->spin);
372 }
373 
374 static __inline
375 void
376 vm_page_spin_unlock(vm_page_t m)
377 {
378     spin_unlock(&m->spin);
379 }
380 
381 /*
382  * Wire a vm_page that is already wired.  Does not require a busied
383  * page.
384  */
385 static __inline
386 void
387 vm_page_wire_quick(vm_page_t m)
388 {
389     if (atomic_fetchadd_int(&m->wire_count, 1) == 0)
390 	panic("vm_page_wire_quick: wire_count was 0");
391 }
392 
393 /*
394  * Unwire a vm_page quickly, does not require a busied page.
395  *
396  * This routine refuses to drop the wire_count to 0 and will return
397  * TRUE if it would have had to (instead of decrementing it to 0).
398  * The caller can then busy the page and deal with it.
399  */
400 static __inline
401 int
402 vm_page_unwire_quick(vm_page_t m)
403 {
404     KKASSERT(m->wire_count > 0);
405     for (;;) {
406 	u_int wire_count = m->wire_count;
407 
408 	cpu_ccfence();
409 	if (wire_count == 1)
410 		return TRUE;
411 	if (atomic_cmpset_int(&m->wire_count, wire_count, wire_count - 1))
412 		return FALSE;
413     }
414 }
415 
416 /*
417  *	Functions implemented as macros
418  */
419 
420 static __inline void
421 vm_page_flag_set(vm_page_t m, unsigned int bits)
422 {
423 	atomic_set_int(&(m)->flags, bits);
424 }
425 
426 static __inline void
427 vm_page_flag_clear(vm_page_t m, unsigned int bits)
428 {
429 	atomic_clear_int(&(m)->flags, bits);
430 }
431 
432 /*
433  * Wakeup anyone waiting for the page after potentially unbusying
434  * (hard or soft) or doing other work on a page that might make a
435  * waiter ready.  The setting of PBUSY_WANTED is integrated into the
436  * related flags and it can't be set once the flags are already
437  * clear, so there should be no races here.
438  */
439 static __inline void
440 vm_page_flash(vm_page_t m)
441 {
442 	if (m->busy_count & PBUSY_WANTED) {
443 		atomic_clear_int(&m->busy_count, PBUSY_WANTED);
444 		wakeup(m);
445 	}
446 }
447 
448 /*
449  * Adjust the soft-busy count on a page.  The drop code will issue an
450  * integrated wakeup if busy_count becomes 0.
451  */
452 static __inline void
453 vm_page_sbusy_hold(vm_page_t m)
454 {
455 	atomic_add_int(&m->busy_count, 1);
456 }
457 
458 static __inline void
459 vm_page_sbusy_drop(vm_page_t m)
460 {
461 	uint32_t ocount;
462 
463 	ocount = atomic_fetchadd_int(&m->busy_count, -1);
464 	if (ocount - 1 == PBUSY_WANTED) {
465 		/* WANTED and no longer BUSY or SBUSY */
466 		atomic_clear_int(&m->busy_count, PBUSY_WANTED);
467 		wakeup(m);
468 	}
469 }
470 
471 /*
472  * Reduce the protection of a page.  This routine never raises the
473  * protection and therefore can be safely called if the page is already
474  * at VM_PROT_NONE (it will be a NOP effectively ).
475  *
476  * VM_PROT_NONE will remove all user mappings of a page.  This is often
477  * necessary when a page changes state (for example, turns into a copy-on-write
478  * page or needs to be frozen for write I/O) in order to force a fault, or
479  * to force a page's dirty bits to be synchronized and avoid hardware
480  * (modified/accessed) bit update races with pmap changes.
481  *
482  * Since 'prot' is usually a constant, this inline usually winds up optimizing
483  * out the primary conditional.
484  *
485  * Must be called with (m) hard-busied.
486  *
487  * WARNING: VM_PROT_NONE can block, but will loop until all mappings have
488  *	    been cleared.  Callers should be aware that other page related
489  *	    elements might have changed, however.
490  */
491 static __inline void
492 vm_page_protect(vm_page_t m, int prot)
493 {
494 	KKASSERT(m->busy_count & PBUSY_LOCKED);
495 	if (prot == VM_PROT_NONE) {
496 		if (pmap_mapped_sync(m) & (PG_MAPPED | PG_WRITEABLE)) {
497 			pmap_page_protect(m, VM_PROT_NONE);
498 			/* PG_WRITEABLE & PG_MAPPED cleared by call */
499 		}
500 	} else if ((prot == VM_PROT_READ) &&
501 		   (m->flags & PG_WRITEABLE) &&
502 		   (pmap_mapped_sync(m) & PG_WRITEABLE)) {
503 		pmap_page_protect(m, VM_PROT_READ);
504 		/* PG_WRITEABLE cleared by call */
505 	}
506 }
507 
508 /*
509  * Zero-fill the specified page.  The entire contents of the page will be
510  * zero'd out.
511  */
512 static __inline boolean_t
513 vm_page_zero_fill(vm_page_t m)
514 {
515 	pmap_zero_page(VM_PAGE_TO_PHYS(m));
516 	return (TRUE);
517 }
518 
519 /*
520  * Copy the contents of src_m to dest_m.  The pages must be stable but spl
521  * and other protections depend on context.
522  */
523 static __inline void
524 vm_page_copy(vm_page_t src_m, vm_page_t dest_m)
525 {
526 	pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m));
527 	dest_m->valid = VM_PAGE_BITS_ALL;
528 	dest_m->dirty = VM_PAGE_BITS_ALL;
529 }
530 
531 /*
532  * Free a page.  The page must be marked BUSY.
533  */
534 static __inline void
535 vm_page_free(vm_page_t m)
536 {
537 	vm_page_free_toq(m);
538 }
539 
540 /*
541  * Free a page to the zerod-pages queue.  The caller must ensure that the
542  * page has been zerod.
543  */
544 static __inline void
545 vm_page_free_zero(vm_page_t m)
546 {
547 #ifdef PMAP_DEBUG
548 #ifdef PHYS_TO_DMAP
549 	char *p = (char *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
550 	int i;
551 
552 	for (i = 0; i < PAGE_SIZE; i++) {
553 		if (p[i] != 0) {
554 			panic("non-zero page in vm_page_free_zero()");
555 		}
556 	}
557 #endif
558 #endif
559 	vm_page_free_toq(m);
560 }
561 
562 /*
563  * Set page to not be dirty.  Note: does not clear pmap modify bits .
564  */
565 static __inline void
566 vm_page_undirty(vm_page_t m)
567 {
568 	m->dirty = 0;
569 }
570 
571 #endif	/* _KERNEL */
572 #endif	/* _VM_VM_PAGE2_H_ */
573 
574