xref: /openbsd-src/sys/uvm/uvm_km.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /*	$OpenBSD: uvm_km.c,v 1.70 2009/02/22 19:59:01 miod Exp $	*/
2 /*	$NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * Copyright (c) 1991, 1993, The Regents of the University of California.
7  *
8  * All rights reserved.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * The Mach Operating System project at Carnegie-Mellon University.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by Charles D. Cranor,
24  *      Washington University, the University of California, Berkeley and
25  *      its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	@(#)vm_kern.c   8.3 (Berkeley) 1/12/94
43  * from: Id: uvm_km.c,v 1.1.2.14 1998/02/06 05:19:27 chs Exp
44  *
45  *
46  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
47  * All rights reserved.
48  *
49  * Permission to use, copy, modify and distribute this software and
50  * its documentation is hereby granted, provided that both the copyright
51  * notice and this permission notice appear in all copies of the
52  * software, derivative works or modified versions, and any portions
53  * thereof, and that both notices appear in supporting documentation.
54  *
55  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
56  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
57  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
58  *
59  * Carnegie Mellon requests users of this software to return to
60  *
61  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
62  *  School of Computer Science
63  *  Carnegie Mellon University
64  *  Pittsburgh PA 15213-3890
65  *
66  * any improvements or extensions that they make and grant Carnegie the
67  * rights to redistribute these changes.
68  */
69 
70 /*
71  * uvm_km.c: handle kernel memory allocation and management
72  */
73 
74 /*
75  * overview of kernel memory management:
76  *
77  * the kernel virtual address space is mapped by "kernel_map."   kernel_map
78  * starts at VM_MIN_KERNEL_ADDRESS and goes to VM_MAX_KERNEL_ADDRESS.
79  * note that VM_MIN_KERNEL_ADDRESS is equal to vm_map_min(kernel_map).
80  *
81  * the kernel_map has several "submaps."   submaps can only appear in
82  * the kernel_map (user processes can't use them).   submaps "take over"
83  * the management of a sub-range of the kernel's address space.  submaps
84  * are typically allocated at boot time and are never released.   kernel
85  * virtual address space that is mapped by a submap is locked by the
86  * submap's lock -- not the kernel_map's lock.
87  *
88  * thus, the useful feature of submaps is that they allow us to break
89  * up the locking and protection of the kernel address space into smaller
90  * chunks.
91  *
92  * the vm system has several standard kernel submaps, including:
93  *   kmem_map => contains only wired kernel memory for the kernel
94  *		malloc.   *** access to kmem_map must be protected
95  *		by splvm() because we are allowed to call malloc()
96  *		at interrupt time ***
97  *   pager_map => used to map "buf" structures into kernel space
98  *   exec_map => used during exec to handle exec args
99  *   etc...
100  *
101  * the kernel allocates its private memory out of special uvm_objects whose
102  * reference count is set to UVM_OBJ_KERN (thus indicating that the objects
103  * are "special" and never die).   all kernel objects should be thought of
104  * as large, fixed-sized, sparsely populated uvm_objects.   each kernel
105  * object is equal to the size of kernel virtual address space (i.e. the
106  * value "VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS").
107  *
108  * most kernel private memory lives in kernel_object.   the only exception
109  * to this is for memory that belongs to submaps that must be protected
110  * by splvm(). each of these submaps manages their own pages.
111  *
112  * note that just because a kernel object spans the entire kernel virtual
113  * address space doesn't mean that it has to be mapped into the entire space.
114  * large chunks of a kernel object's space go unused either because
115  * that area of kernel VM is unmapped, or there is some other type of
116  * object mapped into that range (e.g. a vnode).    for submap's kernel
117  * objects, the only part of the object that can ever be populated is the
118  * offsets that are managed by the submap.
119  *
120  * note that the "offset" in a kernel object is always the kernel virtual
121  * address minus the VM_MIN_KERNEL_ADDRESS (aka vm_map_min(kernel_map)).
122  * example:
123  *   suppose VM_MIN_KERNEL_ADDRESS is 0xf8000000 and the kernel does a
124  *   uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the
125  *   kernel map].    if uvm_km_alloc returns virtual address 0xf8235000,
126  *   then that means that the page at offset 0x235000 in kernel_object is
127  *   mapped at 0xf8235000.
128  *
129  * kernel objects have one other special property: when the kernel virtual
130  * memory mapping them is unmapped, the backing memory in the object is
131  * freed right away.   this is done with the uvm_km_pgremove() function.
132  * this has to be done because there is no backing store for kernel pages
133  * and no need to save them after they are no longer referenced.
134  */
135 
136 #include <sys/param.h>
137 #include <sys/systm.h>
138 #include <sys/proc.h>
139 #include <sys/kthread.h>
140 
141 #include <uvm/uvm.h>
142 
143 /*
144  * global data structures
145  */
146 
147 struct vm_map *kernel_map = NULL;
148 
149 /*
150  * local data structues
151  */
152 
153 static struct vm_map		kernel_map_store;
154 
155 /*
156  * uvm_km_init: init kernel maps and objects to reflect reality (i.e.
157  * KVM already allocated for text, data, bss, and static data structures).
158  *
159  * => KVM is defined by VM_MIN_KERNEL_ADDRESS/VM_MAX_KERNEL_ADDRESS.
160  *    we assume that [min -> start] has already been allocated and that
161  *    "end" is the end.
162  */
163 
164 void
165 uvm_km_init(vaddr_t start, vaddr_t end)
166 {
167 	vaddr_t base = VM_MIN_KERNEL_ADDRESS;
168 
169 	/*
170 	 * next, init kernel memory objects.
171 	 */
172 
173 	/* kernel_object: for pageable anonymous kernel memory */
174 	uao_init();
175 	uvm.kernel_object = uao_create(VM_MAX_KERNEL_ADDRESS -
176 				 VM_MIN_KERNEL_ADDRESS, UAO_FLAG_KERNOBJ);
177 
178 	/*
179 	 * init the map and reserve already allocated kernel space
180 	 * before installing.
181 	 */
182 
183 	uvm_map_setup(&kernel_map_store, base, end, VM_MAP_PAGEABLE);
184 	kernel_map_store.pmap = pmap_kernel();
185 	if (base != start && uvm_map(&kernel_map_store, &base, start - base,
186 	    NULL, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,
187 	    UVM_INH_NONE, UVM_ADV_RANDOM,UVM_FLAG_FIXED)) != 0)
188 		panic("uvm_km_init: could not reserve space for kernel");
189 
190 	/*
191 	 * install!
192 	 */
193 
194 	kernel_map = &kernel_map_store;
195 }
196 
197 /*
198  * uvm_km_suballoc: allocate a submap in the kernel map.   once a submap
199  * is allocated all references to that area of VM must go through it.  this
200  * allows the locking of VAs in kernel_map to be broken up into regions.
201  *
202  * => if `fixed' is true, *min specifies where the region described
203  *      by the submap must start
204  * => if submap is non NULL we use that as the submap, otherwise we
205  *	alloc a new map
206  */
207 struct vm_map *
208 uvm_km_suballoc(struct vm_map *map, vaddr_t *min, vaddr_t *max, vsize_t size,
209     int flags, boolean_t fixed, struct vm_map *submap)
210 {
211 	int mapflags = UVM_FLAG_NOMERGE | (fixed ? UVM_FLAG_FIXED : 0);
212 
213 	size = round_page(size);	/* round up to pagesize */
214 
215 	/*
216 	 * first allocate a blank spot in the parent map
217 	 */
218 
219 	if (uvm_map(map, min, size, NULL, UVM_UNKNOWN_OFFSET, 0,
220 	    UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
221 	    UVM_ADV_RANDOM, mapflags)) != 0) {
222 	       panic("uvm_km_suballoc: unable to allocate space in parent map");
223 	}
224 
225 	/*
226 	 * set VM bounds (min is filled in by uvm_map)
227 	 */
228 
229 	*max = *min + size;
230 
231 	/*
232 	 * add references to pmap and create or init the submap
233 	 */
234 
235 	pmap_reference(vm_map_pmap(map));
236 	if (submap == NULL) {
237 		submap = uvm_map_create(vm_map_pmap(map), *min, *max, flags);
238 		if (submap == NULL)
239 			panic("uvm_km_suballoc: unable to create submap");
240 	} else {
241 		uvm_map_setup(submap, *min, *max, flags);
242 		submap->pmap = vm_map_pmap(map);
243 	}
244 
245 	/*
246 	 * now let uvm_map_submap plug in it...
247 	 */
248 
249 	if (uvm_map_submap(map, *min, *max, submap) != 0)
250 		panic("uvm_km_suballoc: submap allocation failed");
251 
252 	return(submap);
253 }
254 
255 /*
256  * uvm_km_pgremove: remove pages from a kernel uvm_object.
257  *
258  * => when you unmap a part of anonymous kernel memory you want to toss
259  *    the pages right away.    (this gets called from uvm_unmap_...).
260  */
261 void
262 uvm_km_pgremove(struct uvm_object *uobj, vaddr_t start, vaddr_t end)
263 {
264 	struct vm_page *pp;
265 	voff_t curoff;
266 	UVMHIST_FUNC("uvm_km_pgremove"); UVMHIST_CALLED(maphist);
267 
268 	KASSERT(uobj->pgops == &aobj_pager);
269 
270 	for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) {
271 		pp = uvm_pagelookup(uobj, curoff);
272 		if (pp == NULL)
273 			continue;
274 
275 		UVMHIST_LOG(maphist,"  page %p, busy=%ld", pp,
276 		    pp->pg_flags & PG_BUSY, 0, 0);
277 
278 		if (pp->pg_flags & PG_BUSY) {
279 			/* owner must check for this when done */
280 			atomic_setbits_int(&pp->pg_flags, PG_RELEASED);
281 		} else {
282 			/* free the swap slot... */
283 			uao_dropswap(uobj, curoff >> PAGE_SHIFT);
284 
285 			/*
286 			 * ...and free the page; note it may be on the
287 			 * active or inactive queues.
288 			 */
289 			uvm_lock_pageq();
290 			uvm_pagefree(pp);
291 			uvm_unlock_pageq();
292 		}
293 	}
294 }
295 
296 
297 /*
298  * uvm_km_pgremove_intrsafe: like uvm_km_pgremove(), but for "intrsafe"
299  *    objects
300  *
301  * => when you unmap a part of anonymous kernel memory you want to toss
302  *    the pages right away.    (this gets called from uvm_unmap_...).
303  * => none of the pages will ever be busy, and none of them will ever
304  *    be on the active or inactive queues (because these objects are
305  *    never allowed to "page").
306  */
307 
308 void
309 uvm_km_pgremove_intrsafe(vaddr_t start, vaddr_t end)
310 {
311 	struct vm_page *pg;
312 	vaddr_t va;
313 	paddr_t pa;
314 
315 	for (va = start; va < end; va += PAGE_SIZE) {
316 		if (!pmap_extract(pmap_kernel(), va, &pa))
317 			continue;
318 		pg = PHYS_TO_VM_PAGE(pa);
319 		if (pg == NULL)
320 			panic("uvm_km_pgremove_intrsafe: no page");
321 		uvm_pagefree(pg);
322 	}
323 }
324 
325 
326 /*
327  * uvm_km_kmemalloc: lower level kernel memory allocator for malloc()
328  *
329  * => we map wired memory into the specified map using the obj passed in
330  * => NOTE: we can return NULL even if we can wait if there is not enough
331  *	free VM space in the map... caller should be prepared to handle
332  *	this case.
333  * => we return KVA of memory allocated
334  * => flags: NOWAIT, VALLOC - just allocate VA, TRYLOCK - fail if we can't
335  *	lock the map
336  */
337 
338 vaddr_t
339 uvm_km_kmemalloc(struct vm_map *map, struct uvm_object *obj, vsize_t size,
340     int flags)
341 {
342 	vaddr_t kva, loopva;
343 	voff_t offset;
344 	struct vm_page *pg;
345 	UVMHIST_FUNC("uvm_km_kmemalloc"); UVMHIST_CALLED(maphist);
346 
347 	UVMHIST_LOG(maphist,"  (map=%p, obj=%p, size=0x%lx, flags=%d)",
348 		    map, obj, size, flags);
349 	KASSERT(vm_map_pmap(map) == pmap_kernel());
350 
351 	/*
352 	 * setup for call
353 	 */
354 
355 	size = round_page(size);
356 	kva = vm_map_min(map);	/* hint */
357 
358 	/*
359 	 * allocate some virtual space
360 	 */
361 
362 	if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET,
363 	      0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE,
364 			  UVM_ADV_RANDOM, (flags & UVM_KMF_TRYLOCK))) != 0)) {
365 		UVMHIST_LOG(maphist, "<- done (no VM)",0,0,0,0);
366 		return(0);
367 	}
368 
369 	/*
370 	 * if all we wanted was VA, return now
371 	 */
372 
373 	if (flags & UVM_KMF_VALLOC) {
374 		UVMHIST_LOG(maphist,"<- done valloc (kva=0x%lx)", kva,0,0,0);
375 		return(kva);
376 	}
377 
378 	/*
379 	 * recover object offset from virtual address
380 	 */
381 
382 	if (obj != NULL)
383 		offset = kva - vm_map_min(kernel_map);
384 	else
385 		offset = 0;
386 
387 	UVMHIST_LOG(maphist, "  kva=0x%lx, offset=0x%lx", kva, offset,0,0);
388 
389 	/*
390 	 * now allocate and map in the memory... note that we are the only ones
391 	 * whom should ever get a handle on this area of VM.
392 	 */
393 
394 	loopva = kva;
395 	while (loopva != kva + size) {
396 		pg = uvm_pagealloc(obj, offset, NULL, 0);
397 		if (pg) {
398 			atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
399 			UVM_PAGE_OWN(pg, NULL);
400 		}
401 
402 		if (__predict_false(pg == NULL)) {
403 			if ((flags & UVM_KMF_NOWAIT) ||
404 			    ((flags & UVM_KMF_CANFAIL) &&
405 			    uvmexp.swpgonly == uvmexp.swpages)) {
406 				/* free everything! */
407 				uvm_unmap(map, kva, kva + size);
408 				return (0);
409 			} else {
410 				uvm_wait("km_getwait2");	/* sleep here */
411 				continue;
412 			}
413 		}
414 
415 		/*
416 		 * map it in: note that we call pmap_enter with the map and
417 		 * object unlocked in case we are kmem_map.
418 		 */
419 
420 		if (obj == NULL) {
421 			pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg),
422 			    UVM_PROT_RW);
423 		} else {
424 			pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
425 			    UVM_PROT_RW,
426 			    PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE);
427 		}
428 		loopva += PAGE_SIZE;
429 		offset += PAGE_SIZE;
430 	}
431 	pmap_update(pmap_kernel());
432 
433 	UVMHIST_LOG(maphist,"<- done (kva=0x%lx)", kva,0,0,0);
434 	return(kva);
435 }
436 
437 /*
438  * uvm_km_free: free an area of kernel memory
439  */
440 
441 void
442 uvm_km_free(struct vm_map *map, vaddr_t addr, vsize_t size)
443 {
444 	uvm_unmap(map, trunc_page(addr), round_page(addr+size));
445 }
446 
447 /*
448  * uvm_km_free_wakeup: free an area of kernel memory and wake up
449  * anyone waiting for vm space.
450  *
451  * => XXX: "wanted" bit + unlock&wait on other end?
452  */
453 
454 void
455 uvm_km_free_wakeup(struct vm_map *map, vaddr_t addr, vsize_t size)
456 {
457 	struct vm_map_entry *dead_entries;
458 
459 	vm_map_lock(map);
460 	uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size),
461 			 &dead_entries, NULL);
462 	wakeup(map);
463 	vm_map_unlock(map);
464 
465 	if (dead_entries != NULL)
466 		uvm_unmap_detach(dead_entries, 0);
467 }
468 
469 /*
470  * uvm_km_alloc1: allocate wired down memory in the kernel map.
471  *
472  * => we can sleep if needed
473  */
474 
475 vaddr_t
476 uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t align, boolean_t zeroit)
477 {
478 	vaddr_t kva, loopva;
479 	voff_t offset;
480 	struct vm_page *pg;
481 	UVMHIST_FUNC("uvm_km_alloc1"); UVMHIST_CALLED(maphist);
482 
483 	UVMHIST_LOG(maphist,"(map=%p, size=0x%lx)", map, size,0,0);
484 	KASSERT(vm_map_pmap(map) == pmap_kernel());
485 
486 	size = round_page(size);
487 	kva = vm_map_min(map);		/* hint */
488 
489 	/*
490 	 * allocate some virtual space
491 	 */
492 
493 	if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object,
494 	    UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,
495 	    UVM_INH_NONE, UVM_ADV_RANDOM, 0)) != 0)) {
496 		UVMHIST_LOG(maphist,"<- done (no VM)",0,0,0,0);
497 		return(0);
498 	}
499 
500 	/*
501 	 * recover object offset from virtual address
502 	 */
503 
504 	offset = kva - vm_map_min(kernel_map);
505 	UVMHIST_LOG(maphist,"  kva=0x%lx, offset=0x%lx", kva, offset,0,0);
506 
507 	/*
508 	 * now allocate the memory.  we must be careful about released pages.
509 	 */
510 
511 	loopva = kva;
512 	while (size) {
513 		simple_lock(&uvm.kernel_object->vmobjlock);
514 		pg = uvm_pagelookup(uvm.kernel_object, offset);
515 
516 		/*
517 		 * if we found a page in an unallocated region, it must be
518 		 * released
519 		 */
520 		if (pg) {
521 			if ((pg->pg_flags & PG_RELEASED) == 0)
522 				panic("uvm_km_alloc1: non-released page");
523 			atomic_setbits_int(&pg->pg_flags, PG_WANTED);
524 			UVM_UNLOCK_AND_WAIT(pg, &uvm.kernel_object->vmobjlock,
525 			    FALSE, "km_alloc", 0);
526 			continue;   /* retry */
527 		}
528 
529 		/* allocate ram */
530 		pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0);
531 		if (pg) {
532 			atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
533 			UVM_PAGE_OWN(pg, NULL);
534 		}
535 		simple_unlock(&uvm.kernel_object->vmobjlock);
536 		if (__predict_false(pg == NULL)) {
537 			if (curproc == uvm.pagedaemon_proc) {
538 				/*
539 				 * It is unfeasible for the page daemon to
540 				 * sleep for memory, so free what we have
541 				 * allocated and fail.
542 				 */
543 				uvm_unmap(map, kva, loopva - kva);
544 				return (NULL);
545 			} else {
546 				uvm_wait("km_alloc1w");	/* wait for memory */
547 				continue;
548 			}
549 		}
550 
551 		/*
552 		 * map it in; note we're never called with an intrsafe
553 		 * object, so we always use regular old pmap_enter().
554 		 */
555 		pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
556 		    UVM_PROT_ALL, PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE);
557 
558 		loopva += PAGE_SIZE;
559 		offset += PAGE_SIZE;
560 		size -= PAGE_SIZE;
561 	}
562 	pmap_update(map->pmap);
563 
564 	/*
565 	 * zero on request (note that "size" is now zero due to the above loop
566 	 * so we need to subtract kva from loopva to reconstruct the size).
567 	 */
568 
569 	if (zeroit)
570 		memset((caddr_t)kva, 0, loopva - kva);
571 
572 	UVMHIST_LOG(maphist,"<- done (kva=0x%lx)", kva,0,0,0);
573 	return(kva);
574 }
575 
576 /*
577  * uvm_km_valloc: allocate zero-fill memory in the kernel's address space
578  *
579  * => memory is not allocated until fault time
580  */
581 
582 vaddr_t
583 uvm_km_valloc(struct vm_map *map, vsize_t size)
584 {
585 	return(uvm_km_valloc_align(map, size, 0));
586 }
587 
588 vaddr_t
589 uvm_km_valloc_align(struct vm_map *map, vsize_t size, vsize_t align)
590 {
591 	vaddr_t kva;
592 	UVMHIST_FUNC("uvm_km_valloc"); UVMHIST_CALLED(maphist);
593 
594 	UVMHIST_LOG(maphist, "(map=%p, size=0x%lx)", map, size, 0,0);
595 	KASSERT(vm_map_pmap(map) == pmap_kernel());
596 
597 	size = round_page(size);
598 	kva = vm_map_min(map);		/* hint */
599 
600 	/*
601 	 * allocate some virtual space.  will be demand filled by kernel_object.
602 	 */
603 
604 	if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object,
605 	    UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,
606 	    UVM_INH_NONE, UVM_ADV_RANDOM, 0)) != 0)) {
607 		UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0);
608 		return(0);
609 	}
610 
611 	UVMHIST_LOG(maphist, "<- done (kva=0x%lx)", kva,0,0,0);
612 	return(kva);
613 }
614 
615 /*
616  * uvm_km_valloc_wait: allocate zero-fill memory in the kernel's address space
617  *
618  * => memory is not allocated until fault time
619  * => if no room in map, wait for space to free, unless requested size
620  *    is larger than map (in which case we return 0)
621  */
622 
623 vaddr_t
624 uvm_km_valloc_prefer_wait(struct vm_map *map, vsize_t size, voff_t prefer)
625 {
626 	vaddr_t kva;
627 	UVMHIST_FUNC("uvm_km_valloc_prefer_wait"); UVMHIST_CALLED(maphist);
628 
629 	UVMHIST_LOG(maphist, "(map=%p, size=0x%lx)", map, size, 0,0);
630 	KASSERT(vm_map_pmap(map) == pmap_kernel());
631 
632 	size = round_page(size);
633 	if (size > vm_map_max(map) - vm_map_min(map))
634 		return(0);
635 
636 	while (1) {
637 		kva = vm_map_min(map);		/* hint */
638 
639 		/*
640 		 * allocate some virtual space.   will be demand filled
641 		 * by kernel_object.
642 		 */
643 
644 		if (__predict_true(uvm_map(map, &kva, size, uvm.kernel_object,
645 		    prefer, 0, UVM_MAPFLAG(UVM_PROT_ALL,
646 		    UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, 0)) == 0)) {
647 			UVMHIST_LOG(maphist,"<- done (kva=0x%lx)", kva,0,0,0);
648 			return(kva);
649 		}
650 
651 		/*
652 		 * failed.  sleep for a while (on map)
653 		 */
654 
655 		UVMHIST_LOG(maphist,"<<<sleeping>>>",0,0,0,0);
656 		tsleep((caddr_t)map, PVM, "vallocwait", 0);
657 	}
658 	/*NOTREACHED*/
659 }
660 
661 vaddr_t
662 uvm_km_valloc_wait(struct vm_map *map, vsize_t size)
663 {
664 	return uvm_km_valloc_prefer_wait(map, size, UVM_UNKNOWN_OFFSET);
665 }
666 
667 int uvm_km_pages_free; /* number of pages currently on free list */
668 
669 #if defined(__HAVE_PMAP_DIRECT)
670 /*
671  * uvm_km_page allocator, __HAVE_PMAP_DIRECT arch
672  * On architectures with machine memory direct mapped into a portion
673  * of KVM, we have very little work to do.  Just get a physical page,
674  * and find and return its VA.  We use the poolpage functions for this.
675  */
676 void
677 uvm_km_page_init(void)
678 {
679 	/* nothing */
680 }
681 
682 void *
683 uvm_km_getpage(boolean_t waitok, int *slowdown)
684 {
685 	struct vm_page *pg;
686 	vaddr_t va;
687 
688 	*slowdown = 0;
689  again:
690 	pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE);
691 	if (__predict_false(pg == NULL)) {
692 		if (waitok) {
693 			uvm_wait("plpg");
694 			goto again;
695 		} else
696 			return (NULL);
697 	}
698 	va = pmap_map_direct(pg);
699 	if (__predict_false(va == 0))
700 		uvm_pagefree(pg);
701 	return ((void *)va);
702 }
703 
704 void
705 uvm_km_putpage(void *v)
706 {
707 	uvm_pagefree(pmap_unmap_direct((vaddr_t)v));
708 }
709 
710 #else
711 /*
712  * uvm_km_page allocator, non __HAVE_PMAP_DIRECT archs
713  * This is a special allocator that uses a reserve of free pages
714  * to fulfill requests.  It is fast and interrupt safe, but can only
715  * return page sized regions.  Its primary use is as a backend for pool.
716  *
717  * The memory returned is allocated from the larger kernel_map, sparing
718  * pressure on the small interrupt-safe kmem_map.  It is wired, but
719  * not zero filled.
720  */
721 
722 struct mutex uvm_km_mtx;
723 int uvm_km_pages_lowat; /* allocate more when reserve drops below this */
724 struct km_page {
725 	struct km_page *next;
726 } *uvm_km_pages_head;
727 
728 struct proc *uvm_km_proc;
729 
730 void uvm_km_createthread(void *);
731 void uvm_km_thread(void *);
732 
733 /*
734  * Allocate the initial reserve, and create the thread which will
735  * keep the reserve full.  For bootstrapping, we allocate more than
736  * the lowat amount, because it may be a while before the thread is
737  * running.
738  */
739 void
740 uvm_km_page_init(void)
741 {
742 	struct km_page *page;
743 	int lowat_min;
744 	int i;
745 
746 	mtx_init(&uvm_km_mtx, IPL_VM);
747 	if (!uvm_km_pages_lowat) {
748 		/* based on physmem, calculate a good value here */
749 		uvm_km_pages_lowat = physmem / 256;
750 		if (uvm_km_pages_lowat > 2048)
751 			uvm_km_pages_lowat = 2048;
752 		lowat_min = physmem < atop(16 * 1024 * 1024) ? 32 : 128;
753 		if (uvm_km_pages_lowat < lowat_min)
754 			uvm_km_pages_lowat = lowat_min;
755 	}
756 
757 	for (i = 0; i < uvm_km_pages_lowat * 4; i++) {
758 		page = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE);
759 		page->next = uvm_km_pages_head;
760 		uvm_km_pages_head = page;
761 	}
762 	uvm_km_pages_free = i;
763 
764 	/* tone down if really high */
765 	if (uvm_km_pages_lowat > 512)
766 		uvm_km_pages_lowat = 512;
767 
768 	kthread_create_deferred(uvm_km_createthread, NULL);
769 }
770 
771 void
772 uvm_km_createthread(void *arg)
773 {
774 	kthread_create(uvm_km_thread, NULL, &uvm_km_proc, "kmthread");
775 }
776 
777 /*
778  * Endless loop.  We grab pages in increments of 16 pages, then
779  * quickly swap them into the list.  At some point we can consider
780  * returning memory to the system if we have too many free pages,
781  * but that's not implemented yet.
782  */
783 void
784 uvm_km_thread(void *arg)
785 {
786 	struct km_page *head, *tail, *page;
787 	int i, want;
788 
789 	for (i = want = 16; ; ) {
790 		if (i < want || uvm_km_pages_free >= uvm_km_pages_lowat)
791 			tsleep(&uvm_km_pages_head, PVM, "kmalloc", 0);
792 		for (i = 0; i < want; i++) {
793 			page = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE);
794 			if (i == 0)
795 				head = tail = page;
796 			if (page == NULL)
797 				break;
798 			page->next = head;
799 			head = page;
800 		}
801 		if (head != NULL) {
802 			mtx_enter(&uvm_km_mtx);
803 			tail->next = uvm_km_pages_head;
804 			uvm_km_pages_head = head;
805 			uvm_km_pages_free += i;
806 			mtx_leave(&uvm_km_mtx);
807 		}
808 		if (uvm_km_pages_free)
809 			wakeup(&uvm_km_pages_free);
810 	}
811 }
812 
813 
814 /*
815  * Allocate one page.  We can sleep for more if the caller
816  * permits it.  Wake up the thread if we've dropped below lowat.
817  */
818 void *
819 uvm_km_getpage(boolean_t waitok, int *slowdown)
820 {
821 	struct km_page *page = NULL;
822 
823 	*slowdown = 0;
824 	mtx_enter(&uvm_km_mtx);
825 	for (;;) {
826 		page = uvm_km_pages_head;
827 		if (page) {
828 			uvm_km_pages_head = page->next;
829 			uvm_km_pages_free--;
830 			break;
831 		}
832 		if (!waitok)
833 			break;
834 		msleep(&uvm_km_pages_free, &uvm_km_mtx, PVM, "getpage", 0);
835 	}
836 	mtx_leave(&uvm_km_mtx);
837 	if (uvm_km_pages_free < uvm_km_pages_lowat) {
838 		if (curproc != uvm_km_proc)
839 			*slowdown = 1;
840 		wakeup(&uvm_km_pages_head);
841 	}
842 	return (page);
843 }
844 
845 void
846 uvm_km_putpage(void *v)
847 {
848 	struct km_page *page = v;
849 
850 	mtx_enter(&uvm_km_mtx);
851 	page->next = uvm_km_pages_head;
852 	uvm_km_pages_head = page;
853 	uvm_km_pages_free++;
854 	mtx_leave(&uvm_km_mtx);
855 }
856 #endif
857