xref: /openbsd-src/sys/uvm/uvm_page.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*	$OpenBSD: uvm_page.c,v 1.114 2011/07/08 00:10:59 tedu Exp $	*/
2 /*	$NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * Copyright (c) 1991, 1993, The Regents of the University of California.
7  *
8  * All rights reserved.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * The Mach Operating System project at Carnegie-Mellon University.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by Charles D. Cranor,
24  *      Washington University, the University of California, Berkeley and
25  *      its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	@(#)vm_page.c   8.3 (Berkeley) 3/21/94
43  * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
44  *
45  *
46  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
47  * All rights reserved.
48  *
49  * Permission to use, copy, modify and distribute this software and
50  * its documentation is hereby granted, provided that both the copyright
51  * notice and this permission notice appear in all copies of the
52  * software, derivative works or modified versions, and any portions
53  * thereof, and that both notices appear in supporting documentation.
54  *
55  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
56  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
57  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
58  *
59  * Carnegie Mellon requests users of this software to return to
60  *
61  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
62  *  School of Computer Science
63  *  Carnegie Mellon University
64  *  Pittsburgh PA 15213-3890
65  *
66  * any improvements or extensions that they make and grant Carnegie the
67  * rights to redistribute these changes.
68  */
69 
70 /*
71  * uvm_page.c: page ops.
72  */
73 
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/sched.h>
77 #include <sys/kernel.h>
78 #include <sys/vnode.h>
79 #include <sys/mount.h>
80 #include <sys/proc.h>
81 
82 #include <uvm/uvm.h>
83 
84 /*
85  * for object trees
86  */
87 RB_GENERATE(uvm_objtree, vm_page, objt, uvm_pagecmp);
88 
89 int
90 uvm_pagecmp(struct vm_page *a, struct vm_page *b)
91 {
92 	return (a->offset < b->offset ? -1 : a->offset > b->offset);
93 }
94 
95 /*
96  * global vars... XXXCDC: move to uvm. structure.
97  */
98 
99 /*
100  * physical memory config is stored in vm_physmem.
101  */
102 
103 struct vm_physseg vm_physmem[VM_PHYSSEG_MAX];	/* XXXCDC: uvm.physmem */
104 int vm_nphysseg = 0;				/* XXXCDC: uvm.nphysseg */
105 
106 /*
107  * Some supported CPUs in a given architecture don't support all
108  * of the things necessary to do idle page zero'ing efficiently.
109  * We therefore provide a way to disable it from machdep code here.
110  */
111 
112 /*
113  * XXX disabled until we can find a way to do this without causing
114  * problems for either cpu caches or DMA latency.
115  */
116 boolean_t vm_page_zero_enable = FALSE;
117 
118 /*
119  * local variables
120  */
121 
122 /*
123  * these variables record the values returned by vm_page_bootstrap,
124  * for debugging purposes.  The implementation of uvm_pageboot_alloc
125  * and pmap_startup here also uses them internally.
126  */
127 
128 static vaddr_t      virtual_space_start;
129 static vaddr_t      virtual_space_end;
130 
131 /*
132  * local prototypes
133  */
134 
135 static void uvm_pageinsert(struct vm_page *);
136 static void uvm_pageremove(struct vm_page *);
137 
138 /*
139  * inline functions
140  */
141 
142 /*
143  * uvm_pageinsert: insert a page in the object
144  *
145  * => caller must lock object
146  * => caller must lock page queues XXX questionable
147  * => call should have already set pg's object and offset pointers
148  *    and bumped the version counter
149  */
150 
151 __inline static void
152 uvm_pageinsert(struct vm_page *pg)
153 {
154 	struct vm_page	*dupe;
155 
156 	KASSERT((pg->pg_flags & PG_TABLED) == 0);
157 	dupe = RB_INSERT(uvm_objtree, &pg->uobject->memt, pg);
158 	/* not allowed to insert over another page */
159 	KASSERT(dupe == NULL);
160 	atomic_setbits_int(&pg->pg_flags, PG_TABLED);
161 	pg->uobject->uo_npages++;
162 }
163 
164 /*
165  * uvm_page_remove: remove page from object
166  *
167  * => caller must lock object
168  * => caller must lock page queues
169  */
170 
171 static __inline void
172 uvm_pageremove(struct vm_page *pg)
173 {
174 
175 	KASSERT(pg->pg_flags & PG_TABLED);
176 	RB_REMOVE(uvm_objtree, &pg->uobject->memt, pg);
177 
178 	atomic_clearbits_int(&pg->pg_flags, PG_TABLED);
179 	pg->uobject->uo_npages--;
180 	pg->uobject = NULL;
181 	pg->pg_version++;
182 }
183 
184 /*
185  * uvm_page_init: init the page system.   called from uvm_init().
186  *
187  * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
188  */
189 
190 void
191 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
192 {
193 	vsize_t freepages, pagecount, n;
194 	vm_page_t pagearray;
195 	int lcv, i;
196 	paddr_t paddr;
197 
198 	/*
199 	 * init the page queues and page queue locks
200 	 */
201 
202 	TAILQ_INIT(&uvm.page_active);
203 	TAILQ_INIT(&uvm.page_inactive_swp);
204 	TAILQ_INIT(&uvm.page_inactive_obj);
205 	simple_lock_init(&uvm.pageqlock);
206 	mtx_init(&uvm.fpageqlock, IPL_VM);
207 	uvm_pmr_init();
208 
209 	/*
210 	 * allocate vm_page structures.
211 	 */
212 
213 	/*
214 	 * sanity check:
215 	 * before calling this function the MD code is expected to register
216 	 * some free RAM with the uvm_page_physload() function.   our job
217 	 * now is to allocate vm_page structures for this memory.
218 	 */
219 
220 	if (vm_nphysseg == 0)
221 		panic("uvm_page_bootstrap: no memory pre-allocated");
222 
223 	/*
224 	 * first calculate the number of free pages...
225 	 *
226 	 * note that we use start/end rather than avail_start/avail_end.
227 	 * this allows us to allocate extra vm_page structures in case we
228 	 * want to return some memory to the pool after booting.
229 	 */
230 
231 	freepages = 0;
232 	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
233 		freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
234 
235 	/*
236 	 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
237 	 * use.   for each page of memory we use we need a vm_page structure.
238 	 * thus, the total number of pages we can use is the total size of
239 	 * the memory divided by the PAGE_SIZE plus the size of the vm_page
240 	 * structure.   we add one to freepages as a fudge factor to avoid
241 	 * truncation errors (since we can only allocate in terms of whole
242 	 * pages).
243 	 */
244 
245 	pagecount = (((paddr_t)freepages + 1) << PAGE_SHIFT) /
246 	    (PAGE_SIZE + sizeof(struct vm_page));
247 	pagearray = (vm_page_t)uvm_pageboot_alloc(pagecount *
248 	    sizeof(struct vm_page));
249 	memset(pagearray, 0, pagecount * sizeof(struct vm_page));
250 
251 	/*
252 	 * init the vm_page structures and put them in the correct place.
253 	 */
254 
255 	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
256 		n = vm_physmem[lcv].end - vm_physmem[lcv].start;
257 		if (n > pagecount) {
258 			panic("uvm_page_init: lost %ld page(s) in init",
259 			    (long)(n - pagecount));
260 			    /* XXXCDC: shouldn't happen? */
261 			/* n = pagecount; */
262 		}
263 
264 		/* set up page array pointers */
265 		vm_physmem[lcv].pgs = pagearray;
266 		pagearray += n;
267 		pagecount -= n;
268 		vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
269 
270 		/* init and free vm_pages (we've already zeroed them) */
271 		paddr = ptoa(vm_physmem[lcv].start);
272 		for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
273 			vm_physmem[lcv].pgs[i].phys_addr = paddr;
274 #ifdef __HAVE_VM_PAGE_MD
275 			VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]);
276 #endif
277 			if (atop(paddr) >= vm_physmem[lcv].avail_start &&
278 			    atop(paddr) <= vm_physmem[lcv].avail_end) {
279 				uvmexp.npages++;
280 			}
281 		}
282 
283 		/*
284 		 * Add pages to free pool.
285 		 */
286 		uvm_pmr_freepages(&vm_physmem[lcv].pgs[
287 		    vm_physmem[lcv].avail_start - vm_physmem[lcv].start],
288 		    vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
289 	}
290 
291 	/*
292 	 * pass up the values of virtual_space_start and
293 	 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
294 	 * layers of the VM.
295 	 */
296 
297 	*kvm_startp = round_page(virtual_space_start);
298 	*kvm_endp = trunc_page(virtual_space_end);
299 
300 	/*
301 	 * init locks for kernel threads
302 	 */
303 	mtx_init(&uvm.aiodoned_lock, IPL_BIO);
304 
305 	/*
306 	 * init reserve thresholds
307 	 * XXXCDC - values may need adjusting
308 	 */
309 	uvmexp.reserve_pagedaemon = 4;
310 	uvmexp.reserve_kernel = 6;
311 	uvmexp.anonminpct = 10;
312 	uvmexp.vnodeminpct = 10;
313 	uvmexp.vtextminpct = 5;
314 	uvmexp.anonmin = uvmexp.anonminpct * 256 / 100;
315 	uvmexp.vnodemin = uvmexp.vnodeminpct * 256 / 100;
316 	uvmexp.vtextmin = uvmexp.vtextminpct * 256 / 100;
317 
318   	/*
319 	 * determine if we should zero pages in the idle loop.
320 	 */
321 
322 	uvm.page_idle_zero = vm_page_zero_enable;
323 
324 	/*
325 	 * done!
326 	 */
327 
328 	uvm.page_init_done = TRUE;
329 }
330 
331 /*
332  * uvm_setpagesize: set the page size
333  *
334  * => sets page_shift and page_mask from uvmexp.pagesize.
335  */
336 
337 void
338 uvm_setpagesize(void)
339 {
340 	if (uvmexp.pagesize == 0)
341 		uvmexp.pagesize = DEFAULT_PAGE_SIZE;
342 	uvmexp.pagemask = uvmexp.pagesize - 1;
343 	if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
344 		panic("uvm_setpagesize: page size not a power of two");
345 	for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
346 		if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
347 			break;
348 }
349 
350 /*
351  * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
352  */
353 
354 vaddr_t
355 uvm_pageboot_alloc(vsize_t size)
356 {
357 #if defined(PMAP_STEAL_MEMORY)
358 	vaddr_t addr;
359 
360 	/*
361 	 * defer bootstrap allocation to MD code (it may want to allocate
362 	 * from a direct-mapped segment).  pmap_steal_memory should round
363 	 * off virtual_space_start/virtual_space_end.
364 	 */
365 
366 	addr = pmap_steal_memory(size, &virtual_space_start,
367 	    &virtual_space_end);
368 
369 	return(addr);
370 
371 #else /* !PMAP_STEAL_MEMORY */
372 
373 	static boolean_t initialized = FALSE;
374 	vaddr_t addr, vaddr;
375 	paddr_t paddr;
376 
377 	/* round to page size */
378 	size = round_page(size);
379 
380 	/*
381 	 * on first call to this function, initialize ourselves.
382 	 */
383 	if (initialized == FALSE) {
384 		pmap_virtual_space(&virtual_space_start, &virtual_space_end);
385 
386 		/* round it the way we like it */
387 		virtual_space_start = round_page(virtual_space_start);
388 		virtual_space_end = trunc_page(virtual_space_end);
389 
390 		initialized = TRUE;
391 	}
392 
393 	/*
394 	 * allocate virtual memory for this request
395 	 */
396 	if (virtual_space_start == virtual_space_end ||
397 	    (virtual_space_end - virtual_space_start) < size)
398 		panic("uvm_pageboot_alloc: out of virtual space");
399 
400 	addr = virtual_space_start;
401 
402 #ifdef PMAP_GROWKERNEL
403 	/*
404 	 * If the kernel pmap can't map the requested space,
405 	 * then allocate more resources for it.
406 	 */
407 	if (uvm_maxkaddr < (addr + size)) {
408 		uvm_maxkaddr = pmap_growkernel(addr + size);
409 		if (uvm_maxkaddr < (addr + size))
410 			panic("uvm_pageboot_alloc: pmap_growkernel() failed");
411 	}
412 #endif
413 
414 	virtual_space_start += size;
415 
416 	/*
417 	 * allocate and mapin physical pages to back new virtual pages
418 	 */
419 
420 	for (vaddr = round_page(addr) ; vaddr < addr + size ;
421 	    vaddr += PAGE_SIZE) {
422 
423 		if (!uvm_page_physget(&paddr))
424 			panic("uvm_pageboot_alloc: out of memory");
425 
426 		/*
427 		 * Note this memory is no longer managed, so using
428 		 * pmap_kenter is safe.
429 		 */
430 		pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
431 	}
432 	pmap_update(pmap_kernel());
433 	return(addr);
434 #endif	/* PMAP_STEAL_MEMORY */
435 }
436 
437 #if !defined(PMAP_STEAL_MEMORY)
438 /*
439  * uvm_page_physget: "steal" one page from the vm_physmem structure.
440  *
441  * => attempt to allocate it off the end of a segment in which the "avail"
442  *    values match the start/end values.   if we can't do that, then we
443  *    will advance both values (making them equal, and removing some
444  *    vm_page structures from the non-avail area).
445  * => return false if out of memory.
446  */
447 
448 boolean_t
449 uvm_page_physget(paddr_t *paddrp)
450 {
451 	int lcv, x;
452 
453 	/* pass 1: try allocating from a matching end */
454 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) || \
455 	(VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
456 	for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
457 #else
458 	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
459 #endif
460 	{
461 
462 		if (uvm.page_init_done == TRUE)
463 			panic("uvm_page_physget: called _after_ bootstrap");
464 
465 		/* try from front */
466 		if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
467 		    vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
468 			*paddrp = ptoa(vm_physmem[lcv].avail_start);
469 			vm_physmem[lcv].avail_start++;
470 			vm_physmem[lcv].start++;
471 			/* nothing left?   nuke it */
472 			if (vm_physmem[lcv].avail_start ==
473 			    vm_physmem[lcv].end) {
474 				if (vm_nphysseg == 1)
475 				    panic("uvm_page_physget: out of memory!");
476 				vm_nphysseg--;
477 				for (x = lcv ; x < vm_nphysseg ; x++)
478 					/* structure copy */
479 					vm_physmem[x] = vm_physmem[x+1];
480 			}
481 			return (TRUE);
482 		}
483 
484 		/* try from rear */
485 		if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
486 		    vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
487 			*paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
488 			vm_physmem[lcv].avail_end--;
489 			vm_physmem[lcv].end--;
490 			/* nothing left?   nuke it */
491 			if (vm_physmem[lcv].avail_end ==
492 			    vm_physmem[lcv].start) {
493 				if (vm_nphysseg == 1)
494 				    panic("uvm_page_physget: out of memory!");
495 				vm_nphysseg--;
496 				for (x = lcv ; x < vm_nphysseg ; x++)
497 					/* structure copy */
498 					vm_physmem[x] = vm_physmem[x+1];
499 			}
500 			return (TRUE);
501 		}
502 	}
503 
504 	/* pass2: forget about matching ends, just allocate something */
505 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) || \
506 	(VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
507 	for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
508 #else
509 	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
510 #endif
511 	{
512 
513 		/* any room in this bank? */
514 		if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
515 			continue;  /* nope */
516 
517 		*paddrp = ptoa(vm_physmem[lcv].avail_start);
518 		vm_physmem[lcv].avail_start++;
519 		/* truncate! */
520 		vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
521 
522 		/* nothing left?   nuke it */
523 		if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
524 			if (vm_nphysseg == 1)
525 				panic("uvm_page_physget: out of memory!");
526 			vm_nphysseg--;
527 			for (x = lcv ; x < vm_nphysseg ; x++)
528 				/* structure copy */
529 				vm_physmem[x] = vm_physmem[x+1];
530 		}
531 		return (TRUE);
532 	}
533 
534 	return (FALSE);        /* whoops! */
535 }
536 
537 #endif /* PMAP_STEAL_MEMORY */
538 
539 /*
540  * uvm_page_physload: load physical memory into VM system
541  *
542  * => all args are PFs
543  * => all pages in start/end get vm_page structures
544  * => areas marked by avail_start/avail_end get added to the free page pool
545  * => we are limited to VM_PHYSSEG_MAX physical memory segments
546  */
547 
548 void
549 uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
550     paddr_t avail_end, int flags)
551 {
552 	int preload, lcv;
553 	psize_t npages;
554 	struct vm_page *pgs;
555 	struct vm_physseg *ps;
556 
557 	if (uvmexp.pagesize == 0)
558 		panic("uvm_page_physload: page size not set!");
559 
560 	if (start >= end)
561 		panic("uvm_page_physload: start >= end");
562 
563 	/*
564 	 * do we have room?
565 	 */
566 	if (vm_nphysseg == VM_PHYSSEG_MAX) {
567 		printf("uvm_page_physload: unable to load physical memory "
568 		    "segment\n");
569 		printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
570 		    VM_PHYSSEG_MAX, (long long)start, (long long)end);
571 		printf("\tincrease VM_PHYSSEG_MAX\n");
572 		return;
573 	}
574 
575 	/*
576 	 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
577 	 * called yet, so malloc is not available).
578 	 */
579 	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
580 		if (vm_physmem[lcv].pgs)
581 			break;
582 	}
583 	preload = (lcv == vm_nphysseg);
584 
585 	/*
586 	 * if VM is already running, attempt to malloc() vm_page structures
587 	 */
588 	if (!preload) {
589 		/*
590 		 * XXXCDC: need some sort of lockout for this case
591 		 * right now it is only used by devices so it should be alright.
592 		 */
593  		paddr_t paddr;
594 
595  		npages = end - start;  /* # of pages */
596 
597 		pgs = (struct vm_page *)uvm_km_zalloc(kernel_map,
598 		    npages * sizeof(*pgs));
599 		if (pgs == NULL) {
600 			printf("uvm_page_physload: can not malloc vm_page "
601 			    "structs for segment\n");
602 			printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
603 			return;
604 		}
605 		/* init phys_addr and free pages, XXX uvmexp.npages */
606 		for (lcv = 0, paddr = ptoa(start); lcv < npages;
607 		    lcv++, paddr += PAGE_SIZE) {
608 			pgs[lcv].phys_addr = paddr;
609 #ifdef __HAVE_VM_PAGE_MD
610 			VM_MDPAGE_INIT(&pgs[lcv]);
611 #endif
612 			if (atop(paddr) >= avail_start &&
613 			    atop(paddr) <= avail_end) {
614 				if (flags & PHYSLOAD_DEVICE) {
615 					atomic_setbits_int(&pgs[lcv].pg_flags,
616 					    PG_DEV);
617 					pgs[lcv].wire_count = 1;
618 				} else {
619 #if defined(VM_PHYSSEG_NOADD)
620 		panic("uvm_page_physload: tried to add RAM after vm_mem_init");
621 #endif
622 				}
623 			}
624 		}
625 
626 		/*
627 		 * Add pages to free pool.
628 		 */
629 		if ((flags & PHYSLOAD_DEVICE) == 0) {
630 			uvm_pmr_freepages(&pgs[avail_start - start],
631 			    avail_end - avail_start);
632 		}
633 
634 		/* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
635 	} else {
636 
637 		/* gcc complains if these don't get init'd */
638 		pgs = NULL;
639 		npages = 0;
640 
641 	}
642 
643 	/*
644 	 * now insert us in the proper place in vm_physmem[]
645 	 */
646 
647 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
648 
649 	/* random: put it at the end (easy!) */
650 	ps = &vm_physmem[vm_nphysseg];
651 
652 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
653 
654 	{
655 		int x;
656 		/* sort by address for binary search */
657 		for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
658 			if (start < vm_physmem[lcv].start)
659 				break;
660 		ps = &vm_physmem[lcv];
661 		/* move back other entries, if necessary ... */
662 		for (x = vm_nphysseg ; x > lcv ; x--)
663 			/* structure copy */
664 			vm_physmem[x] = vm_physmem[x - 1];
665 	}
666 
667 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
668 
669 	{
670 		int x;
671 		/* sort by largest segment first */
672 		for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
673 			if ((end - start) >
674 			    (vm_physmem[lcv].end - vm_physmem[lcv].start))
675 				break;
676 		ps = &vm_physmem[lcv];
677 		/* move back other entries, if necessary ... */
678 		for (x = vm_nphysseg ; x > lcv ; x--)
679 			/* structure copy */
680 			vm_physmem[x] = vm_physmem[x - 1];
681 	}
682 
683 #else
684 
685 	panic("uvm_page_physload: unknown physseg strategy selected!");
686 
687 #endif
688 
689 	ps->start = start;
690 	ps->end = end;
691 	ps->avail_start = avail_start;
692 	ps->avail_end = avail_end;
693 	if (preload) {
694 		ps->pgs = NULL;
695 	} else {
696 		ps->pgs = pgs;
697 		ps->lastpg = pgs + npages - 1;
698 	}
699 	vm_nphysseg++;
700 
701 	/*
702 	 * done!
703 	 */
704 
705 	return;
706 }
707 
708 #ifdef DDB /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */
709 
710 void uvm_page_physdump(void); /* SHUT UP GCC */
711 
712 /* call from DDB */
713 void
714 uvm_page_physdump(void)
715 {
716 	int lcv;
717 
718 	printf("uvm_page_physdump: physical memory config [segs=%d of %d]:\n",
719 	    vm_nphysseg, VM_PHYSSEG_MAX);
720 	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
721 		printf("0x%llx->0x%llx [0x%llx->0x%llx]\n",
722 		    (long long)vm_physmem[lcv].start,
723 		    (long long)vm_physmem[lcv].end,
724 		    (long long)vm_physmem[lcv].avail_start,
725 		    (long long)vm_physmem[lcv].avail_end);
726 	printf("STRATEGY = ");
727 	switch (VM_PHYSSEG_STRAT) {
728 	case VM_PSTRAT_RANDOM: printf("RANDOM\n"); break;
729 	case VM_PSTRAT_BSEARCH: printf("BSEARCH\n"); break;
730 	case VM_PSTRAT_BIGFIRST: printf("BIGFIRST\n"); break;
731 	default: printf("<<UNKNOWN>>!!!!\n");
732 	}
733 }
734 #endif
735 
736 void
737 uvm_shutdown(void)
738 {
739 #ifdef UVM_SWAP_ENCRYPT
740 	uvm_swap_finicrypt_all();
741 #endif
742 }
743 
744 /*
745  * Perform insert of a given page in the specified anon of obj.
746  * This is basically, uvm_pagealloc, but with the page already given.
747  */
748 void
749 uvm_pagealloc_pg(struct vm_page *pg, struct uvm_object *obj, voff_t off,
750     struct vm_anon *anon)
751 {
752 	int	flags;
753 
754 	flags = PG_BUSY | PG_FAKE;
755 	pg->offset = off;
756 	pg->uobject = obj;
757 	pg->uanon = anon;
758 
759 	if (anon) {
760 		anon->an_page = pg;
761 		flags |= PQ_ANON;
762 	} else if (obj)
763 		uvm_pageinsert(pg);
764 	atomic_setbits_int(&pg->pg_flags, flags);
765 #if defined(UVM_PAGE_TRKOWN)
766 	pg->owner_tag = NULL;
767 #endif
768 	UVM_PAGE_OWN(pg, "new alloc");
769 }
770 
771 /*
772  * uvm_pglistalloc: allocate a list of pages
773  *
774  * => allocated pages are placed at the tail of rlist.  rlist is
775  *    assumed to be properly initialized by caller.
776  * => returns 0 on success or errno on failure
777  * => doesn't take into account clean non-busy pages on inactive list
778  *	that could be used(?)
779  * => params:
780  *	size		the size of the allocation, rounded to page size.
781  *	low		the low address of the allowed allocation range.
782  *	high		the high address of the allowed allocation range.
783  *	alignment	memory must be aligned to this power-of-two boundary.
784  *	boundary	no segment in the allocation may cross this
785  *			power-of-two boundary (relative to zero).
786  * => flags:
787  *	UVM_PLA_NOWAIT	fail if allocation fails
788  *	UVM_PLA_WAITOK	wait for memory to become avail
789  *	UVM_PLA_ZERO	return zeroed memory
790  */
791 int
792 uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment,
793     paddr_t boundary, struct pglist *rlist, int nsegs, int flags)
794 {
795 
796 	KASSERT((alignment & (alignment - 1)) == 0);
797 	KASSERT((boundary & (boundary - 1)) == 0);
798 	KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT));
799 
800 	if (size == 0)
801 		return (EINVAL);
802 
803 	if ((high & PAGE_MASK) != PAGE_MASK) {
804 		printf("uvm_pglistalloc: Upper boundary 0x%lx "
805 		    "not on pagemask.\n", (unsigned long)high);
806 	}
807 
808 	/*
809 	 * Our allocations are always page granularity, so our alignment
810 	 * must be, too.
811 	 */
812 	if (alignment < PAGE_SIZE)
813 		alignment = PAGE_SIZE;
814 
815 	low = atop(roundup(low, alignment));
816 	/*
817 	 * high + 1 may result in overflow, in which case high becomes 0x0,
818 	 * which is the 'don't care' value.
819 	 * The only requirement in that case is that low is also 0x0, or the
820 	 * low<high assert will fail.
821 	 */
822 	high = atop(high + 1);
823 	size = atop(round_page(size));
824 	alignment = atop(alignment);
825 	if (boundary < PAGE_SIZE && boundary != 0)
826 		boundary = PAGE_SIZE;
827 	boundary = atop(boundary);
828 
829 	return uvm_pmr_getpages(size, low, high, alignment, boundary, nsegs,
830 	    flags, rlist);
831 }
832 
833 /*
834  * uvm_pglistfree: free a list of pages
835  *
836  * => pages should already be unmapped
837  */
838 void
839 uvm_pglistfree(struct pglist *list)
840 {
841 	uvm_pmr_freepageq(list);
842 }
843 
844 /*
845  * interface used by the buffer cache to allocate a buffer at a time.
846  * The pages are allocated wired in DMA accessible memory
847  */
848 void
849 uvm_pagealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size,
850     int flags)
851 {
852 	struct pglist    plist;
853 	struct vm_page  *pg;
854 	int              i;
855 
856 
857 	TAILQ_INIT(&plist);
858 	(void) uvm_pglistalloc(size, dma_constraint.ucr_low,
859 	    dma_constraint.ucr_high, 0, 0, &plist, atop(round_page(size)),
860 	    UVM_PLA_WAITOK);
861 	i = 0;
862 	while ((pg = TAILQ_FIRST(&plist)) != NULL) {
863 		pg->wire_count = 1;
864 		atomic_setbits_int(&pg->pg_flags, PG_CLEAN | PG_FAKE);
865 		KASSERT((pg->pg_flags & PG_DEV) == 0);
866 		TAILQ_REMOVE(&plist, pg, pageq);
867 		uvm_pagealloc_pg(pg, obj, off + ptoa(i++), NULL);
868 	}
869 }
870 
871 /*
872  * interface used by the buffer cache to reallocate a buffer at a time.
873  * The pages are reallocated wired outside the DMA accessible region.
874  *
875  */
876 void
877 uvm_pagerealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size,
878     int flags, struct uvm_constraint_range *where)
879 {
880 	struct pglist    plist;
881 	struct vm_page  *pg, *tpg;
882 	int              i;
883 	voff_t		offset;
884 
885 
886 	TAILQ_INIT(&plist);
887 	if (size == 0)
888 		panic("size 0 uvm_pagerealloc");
889 	(void) uvm_pglistalloc(size, where->ucr_low, where->ucr_high, 0,
890 	    0, &plist, atop(round_page(size)), UVM_PLA_WAITOK);
891 	i = 0;
892 	while((pg = TAILQ_FIRST(&plist)) != NULL) {
893 		offset = off + ptoa(i++);
894 		tpg = uvm_pagelookup(obj, offset);
895 		pg->wire_count = 1;
896 		atomic_setbits_int(&pg->pg_flags, PG_CLEAN | PG_FAKE);
897 		KASSERT((pg->pg_flags & PG_DEV) == 0);
898 		TAILQ_REMOVE(&plist, pg, pageq);
899 		uvm_pagecopy(tpg, pg);
900 		uvm_pagefree(tpg);
901 		uvm_pagealloc_pg(pg, obj, offset, NULL);
902 	}
903 }
904 
905 /*
906  * uvm_pagealloc_strat: allocate vm_page from a particular free list.
907  *
908  * => return null if no pages free
909  * => wake up pagedaemon if number of free pages drops below low water mark
910  * => if obj != NULL, obj must be locked (to put in tree)
911  * => if anon != NULL, anon must be locked (to put in anon)
912  * => only one of obj or anon can be non-null
913  * => caller must activate/deactivate page if it is not wired.
914  */
915 
916 struct vm_page *
917 uvm_pagealloc(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
918     int flags)
919 {
920 	struct vm_page *pg;
921 	struct pglist pgl;
922 	int pmr_flags;
923 	boolean_t use_reserve;
924 
925 	KASSERT(obj == NULL || anon == NULL);
926 	KASSERT(off == trunc_page(off));
927 
928 	/*
929 	 * check to see if we need to generate some free pages waking
930 	 * the pagedaemon.
931 	 */
932 	if ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freemin ||
933 	    ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg &&
934 	    (uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg))
935 		wakeup(&uvm.pagedaemon);
936 
937 	/*
938 	 * fail if any of these conditions is true:
939 	 * [1]  there really are no free pages, or
940 	 * [2]  only kernel "reserved" pages remain and
941 	 *        the page isn't being allocated to a kernel object.
942 	 * [3]  only pagedaemon "reserved" pages remain and
943 	 *        the requestor isn't the pagedaemon.
944 	 */
945 
946 	use_reserve = (flags & UVM_PGA_USERESERVE) ||
947 		(obj && UVM_OBJ_IS_KERN_OBJECT(obj));
948 	if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) ||
949 	    (uvmexp.free <= uvmexp.reserve_pagedaemon &&
950 	     !((curproc == uvm.pagedaemon_proc) ||
951 	      (curproc == syncerproc))))
952 		goto fail;
953 
954 	pmr_flags = UVM_PLA_NOWAIT;
955 	if (flags & UVM_PGA_ZERO)
956 		pmr_flags |= UVM_PLA_ZERO;
957 	TAILQ_INIT(&pgl);
958 	if (uvm_pmr_getpages(1, 0, 0, 1, 0, 1, pmr_flags, &pgl) != 0)
959 		goto fail;
960 
961 	pg = TAILQ_FIRST(&pgl);
962 	KASSERT(pg != NULL && TAILQ_NEXT(pg, pageq) == NULL);
963 
964 	uvm_pagealloc_pg(pg, obj, off, anon);
965 	KASSERT((pg->pg_flags & PG_DEV) == 0);
966 	atomic_setbits_int(&pg->pg_flags, PG_BUSY|PG_CLEAN|PG_FAKE);
967 	if (flags & UVM_PGA_ZERO)
968 		atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
969 
970 	return(pg);
971 
972  fail:
973 	return (NULL);
974 }
975 
976 /*
977  * uvm_pagerealloc: reallocate a page from one object to another
978  *
979  * => both objects must be locked
980  */
981 
982 void
983 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
984 {
985 
986 	/*
987 	 * remove it from the old object
988 	 */
989 
990 	if (pg->uobject) {
991 		uvm_pageremove(pg);
992 	}
993 
994 	/*
995 	 * put it in the new object
996 	 */
997 
998 	if (newobj) {
999 		pg->uobject = newobj;
1000 		pg->offset = newoff;
1001 		pg->pg_version++;
1002 		uvm_pageinsert(pg);
1003 	}
1004 }
1005 
1006 
1007 /*
1008  * uvm_pagefree: free page
1009  *
1010  * => erase page's identity (i.e. remove from object)
1011  * => put page on free list
1012  * => caller must lock owning object (either anon or uvm_object)
1013  * => caller must lock page queues
1014  * => assumes all valid mappings of pg are gone
1015  */
1016 
1017 void
1018 uvm_pagefree(struct vm_page *pg)
1019 {
1020 	int saved_loan_count = pg->loan_count;
1021 
1022 #ifdef DEBUG
1023 	if (pg->uobject == (void *)0xdeadbeef &&
1024 	    pg->uanon == (void *)0xdeadbeef) {
1025 		panic("uvm_pagefree: freeing free page %p", pg);
1026 	}
1027 #endif
1028 
1029 	KASSERT((pg->pg_flags & PG_DEV) == 0);
1030 
1031 	/*
1032 	 * if the page was an object page (and thus "TABLED"), remove it
1033 	 * from the object.
1034 	 */
1035 
1036 	if (pg->pg_flags & PG_TABLED) {
1037 
1038 		/*
1039 		 * if the object page is on loan we are going to drop ownership.
1040 		 * it is possible that an anon will take over as owner for this
1041 		 * page later on.   the anon will want a !PG_CLEAN page so that
1042 		 * it knows it needs to allocate swap if it wants to page the
1043 		 * page out.
1044 		 */
1045 
1046 		/* in case an anon takes over */
1047 		if (saved_loan_count)
1048 			atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
1049 		uvm_pageremove(pg);
1050 
1051 		/*
1052 		 * if our page was on loan, then we just lost control over it
1053 		 * (in fact, if it was loaned to an anon, the anon may have
1054 		 * already taken over ownership of the page by now and thus
1055 		 * changed the loan_count [e.g. in uvmfault_anonget()]) we just
1056 		 * return (when the last loan is dropped, then the page can be
1057 		 * freed by whatever was holding the last loan).
1058 		 */
1059 
1060 		if (saved_loan_count)
1061 			return;
1062 	} else if (saved_loan_count && pg->uanon) {
1063 		/*
1064 		 * if our page is owned by an anon and is loaned out to the
1065 		 * kernel then we just want to drop ownership and return.
1066 		 * the kernel must free the page when all its loans clear ...
1067 		 * note that the kernel can't change the loan status of our
1068 		 * page as long as we are holding PQ lock.
1069 		 */
1070 		atomic_clearbits_int(&pg->pg_flags, PQ_ANON);
1071 		pg->uanon->an_page = NULL;
1072 		pg->uanon = NULL;
1073 		return;
1074 	}
1075 	KASSERT(saved_loan_count == 0);
1076 
1077 	/*
1078 	 * now remove the page from the queues
1079 	 */
1080 
1081 	if (pg->pg_flags & PQ_ACTIVE) {
1082 		TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1083 		atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE);
1084 		uvmexp.active--;
1085 	}
1086 	if (pg->pg_flags & PQ_INACTIVE) {
1087 		if (pg->pg_flags & PQ_SWAPBACKED)
1088 			TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
1089 		else
1090 			TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
1091 		atomic_clearbits_int(&pg->pg_flags, PQ_INACTIVE);
1092 		uvmexp.inactive--;
1093 	}
1094 
1095 	/*
1096 	 * if the page was wired, unwire it now.
1097 	 */
1098 
1099 	if (pg->wire_count) {
1100 		pg->wire_count = 0;
1101 		uvmexp.wired--;
1102 	}
1103 	if (pg->uanon) {
1104 		pg->uanon->an_page = NULL;
1105 		pg->uanon = NULL;
1106 		atomic_clearbits_int(&pg->pg_flags, PQ_ANON);
1107 	}
1108 
1109 	/*
1110 	 * Clean page state bits.
1111 	 */
1112 	atomic_clearbits_int(&pg->pg_flags, PQ_AOBJ); /* XXX: find culprit */
1113 	atomic_clearbits_int(&pg->pg_flags, PQ_ENCRYPT|
1114 	    PG_ZERO|PG_FAKE|PG_BUSY|PG_RELEASED|PG_CLEAN|PG_CLEANCHK);
1115 
1116 	/*
1117 	 * and put on free queue
1118 	 */
1119 
1120 #ifdef DEBUG
1121 	pg->uobject = (void *)0xdeadbeef;
1122 	pg->offset = 0xdeadbeef;
1123 	pg->uanon = (void *)0xdeadbeef;
1124 #endif
1125 
1126 	uvm_pmr_freepages(pg, 1);
1127 
1128 	if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
1129 		uvm.page_idle_zero = vm_page_zero_enable;
1130 }
1131 
1132 /*
1133  * uvm_page_unbusy: unbusy an array of pages.
1134  *
1135  * => pages must either all belong to the same object, or all belong to anons.
1136  * => if pages are object-owned, object must be locked.
1137  * => if pages are anon-owned, anons must be unlockd and have 0 refcount.
1138  */
1139 
1140 void
1141 uvm_page_unbusy(struct vm_page **pgs, int npgs)
1142 {
1143 	struct vm_page *pg;
1144 	struct uvm_object *uobj;
1145 	int i;
1146 
1147 	for (i = 0; i < npgs; i++) {
1148 		pg = pgs[i];
1149 
1150 		if (pg == NULL || pg == PGO_DONTCARE) {
1151 			continue;
1152 		}
1153 		if (pg->pg_flags & PG_WANTED) {
1154 			wakeup(pg);
1155 		}
1156 		if (pg->pg_flags & PG_RELEASED) {
1157 			uobj = pg->uobject;
1158 			if (uobj != NULL) {
1159 				uvm_lock_pageq();
1160 				pmap_page_protect(pg, VM_PROT_NONE);
1161 				/* XXX won't happen right now */
1162 				if (pg->pg_flags & PQ_AOBJ)
1163 					uao_dropswap(uobj,
1164 					    pg->offset >> PAGE_SHIFT);
1165 				uvm_pagefree(pg);
1166 				uvm_unlock_pageq();
1167 			} else {
1168 				atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
1169 				UVM_PAGE_OWN(pg, NULL);
1170 				uvm_anfree(pg->uanon);
1171 			}
1172 		} else {
1173 			atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY);
1174 			UVM_PAGE_OWN(pg, NULL);
1175 		}
1176 	}
1177 }
1178 
1179 #if defined(UVM_PAGE_TRKOWN)
1180 /*
1181  * uvm_page_own: set or release page ownership
1182  *
1183  * => this is a debugging function that keeps track of who sets PG_BUSY
1184  *	and where they do it.   it can be used to track down problems
1185  *	such a process setting "PG_BUSY" and never releasing it.
1186  * => page's object [if any] must be locked
1187  * => if "tag" is NULL then we are releasing page ownership
1188  */
1189 void
1190 uvm_page_own(struct vm_page *pg, char *tag)
1191 {
1192 	/* gain ownership? */
1193 	if (tag) {
1194 		if (pg->owner_tag) {
1195 			printf("uvm_page_own: page %p already owned "
1196 			    "by proc %d [%s]\n", pg,
1197 			     pg->owner, pg->owner_tag);
1198 			panic("uvm_page_own");
1199 		}
1200 		pg->owner = (curproc) ? curproc->p_pid :  (pid_t) -1;
1201 		pg->owner_tag = tag;
1202 		return;
1203 	}
1204 
1205 	/* drop ownership */
1206 	if (pg->owner_tag == NULL) {
1207 		printf("uvm_page_own: dropping ownership of an non-owned "
1208 		    "page (%p)\n", pg);
1209 		panic("uvm_page_own");
1210 	}
1211 	pg->owner_tag = NULL;
1212 	return;
1213 }
1214 #endif
1215 
1216 /*
1217  * uvm_pageidlezero: zero free pages while the system is idle.
1218  *
1219  * => we do at least one iteration per call, if we are below the target.
1220  * => we loop until we either reach the target or whichqs indicates that
1221  *	there is a process ready to run.
1222  */
1223 void
1224 uvm_pageidlezero(void)
1225 {
1226 #if 0 /* disabled: need new code */
1227 	struct vm_page *pg;
1228 	struct pgfreelist *pgfl;
1229 	int free_list;
1230 
1231 	do {
1232 		uvm_lock_fpageq();
1233 
1234 		if (uvmexp.zeropages >= UVM_PAGEZERO_TARGET) {
1235 			uvm.page_idle_zero = FALSE;
1236 			uvm_unlock_fpageq();
1237 			return;
1238 		}
1239 
1240 		for (free_list = 0; free_list < VM_NFREELIST; free_list++) {
1241 			pgfl = &uvm.page_free[free_list];
1242 			if ((pg = TAILQ_FIRST(&pgfl->pgfl_queues[
1243 			    PGFL_UNKNOWN])) != NULL)
1244 				break;
1245 		}
1246 
1247 		if (pg == NULL) {
1248 			/*
1249 			 * No non-zero'd pages; don't bother trying again
1250 			 * until we know we have non-zero'd pages free.
1251 			 */
1252 			uvm.page_idle_zero = FALSE;
1253 			uvm_unlock_fpageq();
1254 			return;
1255 		}
1256 
1257 		TAILQ_REMOVE(&pgfl->pgfl_queues[PGFL_UNKNOWN], pg, pageq);
1258 		uvmexp.free--;
1259 		uvm_unlock_fpageq();
1260 
1261 #ifdef PMAP_PAGEIDLEZERO
1262 		if (PMAP_PAGEIDLEZERO(pg) == FALSE) {
1263 			/*
1264 			 * The machine-dependent code detected some
1265 			 * reason for us to abort zeroing pages,
1266 			 * probably because there is a process now
1267 			 * ready to run.
1268 			 */
1269 			uvm_lock_fpageq();
1270 			TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_UNKNOWN],
1271 			    pg, pageq);
1272 			uvmexp.free++;
1273 			uvmexp.zeroaborts++;
1274 			uvm_unlock_fpageq();
1275 			return;
1276 		}
1277 #else
1278 		/*
1279 		 * XXX This will toast the cache unless the pmap_zero_page()
1280 		 * XXX implementation does uncached access.
1281 		 */
1282 		pmap_zero_page(pg);
1283 #endif
1284 		atomic_setbits_int(&pg->pg_flags, PG_ZERO);
1285 
1286 		uvm_lock_fpageq();
1287 		TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_ZEROS], pg, pageq);
1288 		uvmexp.free++;
1289 		uvmexp.zeropages++;
1290 		uvm_unlock_fpageq();
1291 	} while (curcpu_is_idle());
1292 #endif /* 0 */
1293 }
1294 
1295 /*
1296  * when VM_PHYSSEG_MAX is 1, we can simplify these functions
1297  */
1298 
1299 #if VM_PHYSSEG_MAX > 1
1300 /*
1301  * vm_physseg_find: find vm_physseg structure that belongs to a PA
1302  */
1303 int
1304 vm_physseg_find(paddr_t pframe, int *offp)
1305 {
1306 
1307 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
1308 	/* binary search for it */
1309 	int	start, len, try;
1310 
1311 	/*
1312 	 * if try is too large (thus target is less than than try) we reduce
1313 	 * the length to trunc(len/2) [i.e. everything smaller than "try"]
1314 	 *
1315 	 * if the try is too small (thus target is greater than try) then
1316 	 * we set the new start to be (try + 1).   this means we need to
1317 	 * reduce the length to (round(len/2) - 1).
1318 	 *
1319 	 * note "adjust" below which takes advantage of the fact that
1320 	 *  (round(len/2) - 1) == trunc((len - 1) / 2)
1321 	 * for any value of len we may have
1322 	 */
1323 
1324 	for (start = 0, len = vm_nphysseg ; len != 0 ; len = len / 2) {
1325 		try = start + (len / 2);	/* try in the middle */
1326 
1327 		/* start past our try? */
1328 		if (pframe >= vm_physmem[try].start) {
1329 			/* was try correct? */
1330 			if (pframe < vm_physmem[try].end) {
1331 				if (offp)
1332 					*offp = pframe - vm_physmem[try].start;
1333 				return(try);            /* got it */
1334 			}
1335 			start = try + 1;	/* next time, start here */
1336 			len--;			/* "adjust" */
1337 		} else {
1338 			/*
1339 			 * pframe before try, just reduce length of
1340 			 * region, done in "for" loop
1341 			 */
1342 		}
1343 	}
1344 	return(-1);
1345 
1346 #else
1347 	/* linear search for it */
1348 	int	lcv;
1349 
1350 	for (lcv = 0; lcv < vm_nphysseg; lcv++) {
1351 		if (pframe >= vm_physmem[lcv].start &&
1352 		    pframe < vm_physmem[lcv].end) {
1353 			if (offp)
1354 				*offp = pframe - vm_physmem[lcv].start;
1355 			return(lcv);		   /* got it */
1356 		}
1357 	}
1358 	return(-1);
1359 
1360 #endif
1361 }
1362 
1363 /*
1364  * PHYS_TO_VM_PAGE: find vm_page for a PA.   used by MI code to get vm_pages
1365  * back from an I/O mapping (ugh!).   used in some MD code as well.
1366  */
1367 struct vm_page *
1368 PHYS_TO_VM_PAGE(paddr_t pa)
1369 {
1370 	paddr_t pf = atop(pa);
1371 	int	off;
1372 	int	psi;
1373 
1374 	psi = vm_physseg_find(pf, &off);
1375 
1376 	return ((psi == -1) ? NULL : &vm_physmem[psi].pgs[off]);
1377 }
1378 #endif /* VM_PHYSSEG_MAX > 1 */
1379 
1380 /*
1381  * uvm_pagelookup: look up a page
1382  *
1383  * => caller should lock object to keep someone from pulling the page
1384  *	out from under it
1385  */
1386 struct vm_page *
1387 uvm_pagelookup(struct uvm_object *obj, voff_t off)
1388 {
1389 	/* XXX if stack is too much, handroll */
1390 	struct vm_page pg;
1391 
1392 	pg.offset = off;
1393 	return (RB_FIND(uvm_objtree, &obj->memt, &pg));
1394 }
1395 
1396 /*
1397  * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
1398  *
1399  * => caller must lock page queues
1400  */
1401 void
1402 uvm_pagewire(struct vm_page *pg)
1403 {
1404 	if (pg->wire_count == 0) {
1405 		if (pg->pg_flags & PQ_ACTIVE) {
1406 			TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1407 			atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE);
1408 			uvmexp.active--;
1409 		}
1410 		if (pg->pg_flags & PQ_INACTIVE) {
1411 			if (pg->pg_flags & PQ_SWAPBACKED)
1412 				TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
1413 			else
1414 				TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
1415 			atomic_clearbits_int(&pg->pg_flags, PQ_INACTIVE);
1416 			uvmexp.inactive--;
1417 		}
1418 		uvmexp.wired++;
1419 	}
1420 	pg->wire_count++;
1421 }
1422 
1423 /*
1424  * uvm_pageunwire: unwire the page.
1425  *
1426  * => activate if wire count goes to zero.
1427  * => caller must lock page queues
1428  */
1429 void
1430 uvm_pageunwire(struct vm_page *pg)
1431 {
1432 	pg->wire_count--;
1433 	if (pg->wire_count == 0) {
1434 		TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
1435 		uvmexp.active++;
1436 		atomic_setbits_int(&pg->pg_flags, PQ_ACTIVE);
1437 		uvmexp.wired--;
1438 	}
1439 }
1440 
1441 /*
1442  * uvm_pagedeactivate: deactivate page -- no pmaps have access to page
1443  *
1444  * => caller must lock page queues
1445  * => caller must check to make sure page is not wired
1446  * => object that page belongs to must be locked (so we can adjust pg->flags)
1447  */
1448 void
1449 uvm_pagedeactivate(struct vm_page *pg)
1450 {
1451 	if (pg->pg_flags & PQ_ACTIVE) {
1452 		TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1453 		atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE);
1454 		uvmexp.active--;
1455 	}
1456 	if ((pg->pg_flags & PQ_INACTIVE) == 0) {
1457 		KASSERT(pg->wire_count == 0);
1458 		if (pg->pg_flags & PQ_SWAPBACKED)
1459 			TAILQ_INSERT_TAIL(&uvm.page_inactive_swp, pg, pageq);
1460 		else
1461 			TAILQ_INSERT_TAIL(&uvm.page_inactive_obj, pg, pageq);
1462 		atomic_setbits_int(&pg->pg_flags, PQ_INACTIVE);
1463 		uvmexp.inactive++;
1464 		pmap_clear_reference(pg);
1465 		/*
1466 		 * update the "clean" bit.  this isn't 100%
1467 		 * accurate, and doesn't have to be.  we'll
1468 		 * re-sync it after we zap all mappings when
1469 		 * scanning the inactive list.
1470 		 */
1471 		if ((pg->pg_flags & PG_CLEAN) != 0 &&
1472 		    pmap_is_modified(pg))
1473 			atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
1474 	}
1475 }
1476 
1477 /*
1478  * uvm_pageactivate: activate page
1479  *
1480  * => caller must lock page queues
1481  */
1482 void
1483 uvm_pageactivate(struct vm_page *pg)
1484 {
1485 	if (pg->pg_flags & PQ_INACTIVE) {
1486 		if (pg->pg_flags & PQ_SWAPBACKED)
1487 			TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
1488 		else
1489 			TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
1490 		atomic_clearbits_int(&pg->pg_flags, PQ_INACTIVE);
1491 		uvmexp.inactive--;
1492 	}
1493 	if (pg->wire_count == 0) {
1494 
1495 		/*
1496 		 * if page is already active, remove it from list so we
1497 		 * can put it at tail.  if it wasn't active, then mark
1498 		 * it active and bump active count
1499 		 */
1500 		if (pg->pg_flags & PQ_ACTIVE)
1501 			TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1502 		else {
1503 			atomic_setbits_int(&pg->pg_flags, PQ_ACTIVE);
1504 			uvmexp.active++;
1505 		}
1506 
1507 		TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
1508 	}
1509 }
1510 
1511 /*
1512  * uvm_pagezero: zero fill a page
1513  *
1514  * => if page is part of an object then the object should be locked
1515  *	to protect pg->flags.
1516  */
1517 void
1518 uvm_pagezero(struct vm_page *pg)
1519 {
1520 	atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
1521 	pmap_zero_page(pg);
1522 }
1523 
1524 /*
1525  * uvm_pagecopy: copy a page
1526  *
1527  * => if page is part of an object then the object should be locked
1528  *	to protect pg->flags.
1529  */
1530 void
1531 uvm_pagecopy(struct vm_page *src, struct vm_page *dst)
1532 {
1533 	atomic_clearbits_int(&dst->pg_flags, PG_CLEAN);
1534 	pmap_copy_page(src, dst);
1535 }
1536 
1537 /*
1538  * uvm_pagecount: count the number of physical pages in the address range.
1539  */
1540 psize_t
1541 uvm_pagecount(struct uvm_constraint_range* constraint)
1542 {
1543 	int lcv;
1544 	psize_t sz;
1545 	paddr_t low, high;
1546 	paddr_t ps_low, ps_high;
1547 
1548 	/* Algorithm uses page numbers. */
1549 	low = atop(constraint->ucr_low);
1550 	high = atop(constraint->ucr_high);
1551 
1552 	sz = 0;
1553 	for (lcv = 0; lcv < vm_nphysseg; lcv++) {
1554 		ps_low = MAX(low, vm_physmem[lcv].avail_start);
1555 		ps_high = MIN(high, vm_physmem[lcv].avail_end);
1556 		if (ps_low < ps_high)
1557 			sz += ps_high - ps_low;
1558 	}
1559 	return sz;
1560 }
1561