xref: /csrg-svn/sys/vm/vm_kern.c (revision 68164)
145748Smckusick /*
263379Sbostic  * Copyright (c) 1991, 1993
363379Sbostic  *	The Regents of the University of California.  All rights reserved.
445748Smckusick  *
545748Smckusick  * This code is derived from software contributed to Berkeley by
645748Smckusick  * The Mach Operating System project at Carnegie-Mellon University.
745748Smckusick  *
848493Smckusick  * %sccs.include.redist.c%
945748Smckusick  *
10*68164Scgd  *	@(#)vm_kern.c	8.4 (Berkeley) 01/09/95
1148493Smckusick  *
1248493Smckusick  *
1348493Smckusick  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
1448493Smckusick  * All rights reserved.
1548493Smckusick  *
1648493Smckusick  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
1748493Smckusick  *
1848493Smckusick  * Permission to use, copy, modify and distribute this software and
1948493Smckusick  * its documentation is hereby granted, provided that both the copyright
2048493Smckusick  * notice and this permission notice appear in all copies of the
2148493Smckusick  * software, derivative works or modified versions, and any portions
2248493Smckusick  * thereof, and that both notices appear in supporting documentation.
2348493Smckusick  *
2448493Smckusick  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
2548493Smckusick  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
2648493Smckusick  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
2748493Smckusick  *
2848493Smckusick  * Carnegie Mellon requests users of this software to return to
2948493Smckusick  *
3048493Smckusick  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
3148493Smckusick  *  School of Computer Science
3248493Smckusick  *  Carnegie Mellon University
3348493Smckusick  *  Pittsburgh PA 15213-3890
3448493Smckusick  *
3548493Smckusick  * any improvements or extensions that they make and grant Carnegie the
3648493Smckusick  * rights to redistribute these changes.
3745748Smckusick  */
3845748Smckusick 
3945748Smckusick /*
4045748Smckusick  *	Kernel memory management.
4145748Smckusick  */
4245748Smckusick 
4353328Sbostic #include <sys/param.h>
4453328Sbostic #include <sys/systm.h>
4545748Smckusick 
4653328Sbostic #include <vm/vm.h>
4753328Sbostic #include <vm/vm_page.h>
4853328Sbostic #include <vm/vm_pageout.h>
4953328Sbostic #include <vm/vm_kern.h>
5045748Smckusick 
5145748Smckusick /*
5245748Smckusick  *	kmem_alloc_pageable:
5345748Smckusick  *
5445748Smckusick  *	Allocate pageable memory to the kernel's address map.
5545748Smckusick  *	map must be "kernel_map" below.
5645748Smckusick  */
57*68164Scgd vm_offset_t
kmem_alloc_pageable(map,size)58*68164Scgd kmem_alloc_pageable(map, size)
5945748Smckusick 	vm_map_t		map;
6045748Smckusick 	register vm_size_t	size;
6145748Smckusick {
6245748Smckusick 	vm_offset_t		addr;
6345748Smckusick 	register int		result;
6445748Smckusick 
6545748Smckusick #if	0
6645748Smckusick 	if (map != kernel_map)
6745748Smckusick 		panic("kmem_alloc_pageable: not called with kernel_map");
6860345Storek #endif
6945748Smckusick 
7045748Smckusick 	size = round_page(size);
7145748Smckusick 
7245748Smckusick 	addr = vm_map_min(map);
7348386Skarels 	result = vm_map_find(map, NULL, (vm_offset_t) 0,
7445748Smckusick 				&addr, size, TRUE);
7545748Smckusick 	if (result != KERN_SUCCESS) {
7645748Smckusick 		return(0);
7745748Smckusick 	}
7845748Smckusick 
7945748Smckusick 	return(addr);
8045748Smckusick }
8145748Smckusick 
8245748Smckusick /*
8345748Smckusick  *	Allocate wired-down memory in the kernel's address map
8445748Smckusick  *	or a submap.
8545748Smckusick  */
86*68164Scgd vm_offset_t
kmem_alloc(map,size)87*68164Scgd kmem_alloc(map, size)
8845748Smckusick 	register vm_map_t	map;
8945748Smckusick 	register vm_size_t	size;
9045748Smckusick {
9145748Smckusick 	vm_offset_t		addr;
9245748Smckusick 	register vm_offset_t	offset;
9345748Smckusick 	extern vm_object_t	kernel_object;
9445748Smckusick 	vm_offset_t		i;
9545748Smckusick 
9645748Smckusick 	size = round_page(size);
9745748Smckusick 
9845748Smckusick 	/*
9945748Smckusick 	 *	Use the kernel object for wired-down kernel pages.
10045748Smckusick 	 *	Assume that no region of the kernel object is
10145748Smckusick 	 *	referenced more than once.
10245748Smckusick 	 */
10345748Smckusick 
10445748Smckusick 	/*
10552610Storek 	 * Locate sufficient space in the map.  This will give us the
10652610Storek 	 * final virtual address for the new memory, and thus will tell
10752610Storek 	 * us the offset within the kernel map.
10845748Smckusick 	 */
10952610Storek 	vm_map_lock(map);
11052610Storek 	if (vm_map_findspace(map, 0, size, &addr)) {
11152610Storek 		vm_map_unlock(map);
11252610Storek 		return (0);
11352610Storek 	}
11445748Smckusick 	offset = addr - VM_MIN_KERNEL_ADDRESS;
11545748Smckusick 	vm_object_reference(kernel_object);
11645748Smckusick 	vm_map_insert(map, kernel_object, offset, addr, addr + size);
11745748Smckusick 	vm_map_unlock(map);
11845748Smckusick 
11945748Smckusick 	/*
12045748Smckusick 	 *	Guarantee that there are pages already in this object
12145748Smckusick 	 *	before calling vm_map_pageable.  This is to prevent the
12245748Smckusick 	 *	following scenario:
12345748Smckusick 	 *
12445748Smckusick 	 *		1) Threads have swapped out, so that there is a
12545748Smckusick 	 *		   pager for the kernel_object.
12645748Smckusick 	 *		2) The kmsg zone is empty, and so we are kmem_allocing
12745748Smckusick 	 *		   a new page for it.
12845748Smckusick 	 *		3) vm_map_pageable calls vm_fault; there is no page,
12945748Smckusick 	 *		   but there is a pager, so we call
13045748Smckusick 	 *		   pager_data_request.  But the kmsg zone is empty,
13145748Smckusick 	 *		   so we must kmem_alloc.
13245748Smckusick 	 *		4) goto 1
13345748Smckusick 	 *		5) Even if the kmsg zone is not empty: when we get
13445748Smckusick 	 *		   the data back from the pager, it will be (very
13545748Smckusick 	 *		   stale) non-zero data.  kmem_alloc is defined to
13645748Smckusick 	 *		   return zero-filled memory.
13745748Smckusick 	 *
13845748Smckusick 	 *	We're intentionally not activating the pages we allocate
13945748Smckusick 	 *	to prevent a race with page-out.  vm_map_pageable will wire
14045748Smckusick 	 *	the pages.
14145748Smckusick 	 */
14245748Smckusick 
14345748Smckusick 	vm_object_lock(kernel_object);
14445748Smckusick 	for (i = 0 ; i < size; i+= PAGE_SIZE) {
14545748Smckusick 		vm_page_t	mem;
14645748Smckusick 
14748386Skarels 		while ((mem = vm_page_alloc(kernel_object, offset+i)) == NULL) {
14845748Smckusick 			vm_object_unlock(kernel_object);
14945748Smckusick 			VM_WAIT;
15045748Smckusick 			vm_object_lock(kernel_object);
15145748Smckusick 		}
15245748Smckusick 		vm_page_zero_fill(mem);
15356382Smckusick 		mem->flags &= ~PG_BUSY;
15445748Smckusick 	}
15545748Smckusick 	vm_object_unlock(kernel_object);
15645748Smckusick 
15745748Smckusick 	/*
15845748Smckusick 	 *	And finally, mark the data as non-pageable.
15945748Smckusick 	 */
16045748Smckusick 
16145748Smckusick 	(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE);
16245748Smckusick 
16345748Smckusick 	/*
16445748Smckusick 	 *	Try to coalesce the map
16545748Smckusick 	 */
16645748Smckusick 
16745748Smckusick 	vm_map_simplify(map, addr);
16845748Smckusick 
16945748Smckusick 	return(addr);
17045748Smckusick }
17145748Smckusick 
17245748Smckusick /*
17345748Smckusick  *	kmem_free:
17445748Smckusick  *
17545748Smckusick  *	Release a region of kernel virtual memory allocated
17645748Smckusick  *	with kmem_alloc, and return the physical pages
17745748Smckusick  *	associated with that region.
17845748Smckusick  */
179*68164Scgd void
kmem_free(map,addr,size)180*68164Scgd kmem_free(map, addr, size)
18145748Smckusick 	vm_map_t		map;
18245748Smckusick 	register vm_offset_t	addr;
18345748Smckusick 	vm_size_t		size;
18445748Smckusick {
18545748Smckusick 	(void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
18645748Smckusick }
18745748Smckusick 
18845748Smckusick /*
18945748Smckusick  *	kmem_suballoc:
19045748Smckusick  *
19145748Smckusick  *	Allocates a map to manage a subrange
19245748Smckusick  *	of the kernel virtual address space.
19345748Smckusick  *
19445748Smckusick  *	Arguments are as follows:
19545748Smckusick  *
19645748Smckusick  *	parent		Map to take range from
19745748Smckusick  *	size		Size of range to find
19845748Smckusick  *	min, max	Returned endpoints of map
19945748Smckusick  *	pageable	Can the region be paged
20045748Smckusick  */
201*68164Scgd vm_map_t
kmem_suballoc(parent,min,max,size,pageable)202*68164Scgd kmem_suballoc(parent, min, max, size, pageable)
20345748Smckusick 	register vm_map_t	parent;
20445748Smckusick 	vm_offset_t		*min, *max;
20545748Smckusick 	register vm_size_t	size;
20645748Smckusick 	boolean_t		pageable;
20745748Smckusick {
20845748Smckusick 	register int	ret;
20945748Smckusick 	vm_map_t	result;
21045748Smckusick 
21145748Smckusick 	size = round_page(size);
21245748Smckusick 
21345748Smckusick 	*min = (vm_offset_t) vm_map_min(parent);
21448386Skarels 	ret = vm_map_find(parent, NULL, (vm_offset_t) 0,
21545748Smckusick 				min, size, TRUE);
21645748Smckusick 	if (ret != KERN_SUCCESS) {
21745748Smckusick 		printf("kmem_suballoc: bad status return of %d.\n", ret);
21845748Smckusick 		panic("kmem_suballoc");
21945748Smckusick 	}
22045748Smckusick 	*max = *min + size;
22145748Smckusick 	pmap_reference(vm_map_pmap(parent));
22245748Smckusick 	result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable);
22348386Skarels 	if (result == NULL)
22445748Smckusick 		panic("kmem_suballoc: cannot create submap");
22545748Smckusick 	if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS)
22645748Smckusick 		panic("kmem_suballoc: unable to change range to submap");
22745748Smckusick 	return(result);
22845748Smckusick }
22945748Smckusick 
23045748Smckusick /*
23145748Smckusick  * Allocate wired-down memory in the kernel's address map for the higher
23245748Smckusick  * level kernel memory allocator (kern/kern_malloc.c).  We cannot use
23345748Smckusick  * kmem_alloc() because we may need to allocate memory at interrupt
23445748Smckusick  * level where we cannot block (canwait == FALSE).
23545748Smckusick  *
23645748Smckusick  * This routine has its own private kernel submap (kmem_map) and object
23745748Smckusick  * (kmem_object).  This, combined with the fact that only malloc uses
23845748Smckusick  * this routine, ensures that we will never block in map or object waits.
23945748Smckusick  *
24045748Smckusick  * Note that this still only works in a uni-processor environment and
24145748Smckusick  * when called at splhigh().
24245748Smckusick  *
24345748Smckusick  * We don't worry about expanding the map (adding entries) since entries
24445748Smckusick  * for wired maps are statically allocated.
24545748Smckusick  */
24645748Smckusick vm_offset_t
kmem_malloc(map,size,canwait)24745748Smckusick kmem_malloc(map, size, canwait)
24845748Smckusick 	register vm_map_t	map;
24945748Smckusick 	register vm_size_t	size;
25045748Smckusick 	boolean_t		canwait;
25145748Smckusick {
25245748Smckusick 	register vm_offset_t	offset, i;
25345748Smckusick 	vm_map_entry_t		entry;
25445748Smckusick 	vm_offset_t		addr;
25545748Smckusick 	vm_page_t		m;
25645748Smckusick 	extern vm_object_t	kmem_object;
25745748Smckusick 
25845748Smckusick 	if (map != kmem_map && map != mb_map)
25945748Smckusick 		panic("kern_malloc_alloc: map != {kmem,mb}_map");
26045748Smckusick 
26145748Smckusick 	size = round_page(size);
26245748Smckusick 	addr = vm_map_min(map);
26345748Smckusick 
26445748Smckusick 	/*
26552610Storek 	 * Locate sufficient space in the map.  This will give us the
26652610Storek 	 * final virtual address for the new memory, and thus will tell
26752610Storek 	 * us the offset within the kernel map.
26845748Smckusick 	 */
26952610Storek 	vm_map_lock(map);
27052610Storek 	if (vm_map_findspace(map, 0, size, &addr)) {
27152610Storek 		vm_map_unlock(map);
27252610Storek 		if (canwait)		/* XXX  should wait */
27352610Storek 			panic("kmem_malloc: %s too small",
27452610Storek 			    map == kmem_map ? "kmem_map" : "mb_map");
27552610Storek 		return (0);
27652610Storek 	}
27745748Smckusick 	offset = addr - vm_map_min(kmem_map);
27845748Smckusick 	vm_object_reference(kmem_object);
27945748Smckusick 	vm_map_insert(map, kmem_object, offset, addr, addr + size);
28045748Smckusick 
28145748Smckusick 	/*
28245748Smckusick 	 * If we can wait, just mark the range as wired
28345748Smckusick 	 * (will fault pages as necessary).
28445748Smckusick 	 */
28545748Smckusick 	if (canwait) {
28645748Smckusick 		vm_map_unlock(map);
28745748Smckusick 		(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size,
28845748Smckusick 				       FALSE);
28945748Smckusick 		vm_map_simplify(map, addr);
29045748Smckusick 		return(addr);
29145748Smckusick 	}
29245748Smckusick 
29345748Smckusick 	/*
29445748Smckusick 	 * If we cannot wait then we must allocate all memory up front,
29545748Smckusick 	 * pulling it off the active queue to prevent pageout.
29645748Smckusick 	 */
29745748Smckusick 	vm_object_lock(kmem_object);
29845748Smckusick 	for (i = 0; i < size; i += PAGE_SIZE) {
29945748Smckusick 		m = vm_page_alloc(kmem_object, offset + i);
30045748Smckusick 
30145748Smckusick 		/*
30245748Smckusick 		 * Ran out of space, free everything up and return.
30345748Smckusick 		 * Don't need to lock page queues here as we know
30445748Smckusick 		 * that the pages we got aren't on any queues.
30545748Smckusick 		 */
30648386Skarels 		if (m == NULL) {
30745748Smckusick 			while (i != 0) {
30845748Smckusick 				i -= PAGE_SIZE;
30945748Smckusick 				m = vm_page_lookup(kmem_object, offset + i);
31045748Smckusick 				vm_page_free(m);
31145748Smckusick 			}
31245748Smckusick 			vm_object_unlock(kmem_object);
31345748Smckusick 			vm_map_delete(map, addr, addr + size);
31445748Smckusick 			vm_map_unlock(map);
31545748Smckusick 			return(0);
31645748Smckusick 		}
31745748Smckusick #if 0
31845748Smckusick 		vm_page_zero_fill(m);
31945748Smckusick #endif
32056382Smckusick 		m->flags &= ~PG_BUSY;
32145748Smckusick 	}
32245748Smckusick 	vm_object_unlock(kmem_object);
32345748Smckusick 
32445748Smckusick 	/*
32545748Smckusick 	 * Mark map entry as non-pageable.
32645748Smckusick 	 * Assert: vm_map_insert() will never be able to extend the previous
32745748Smckusick 	 * entry so there will be a new entry exactly corresponding to this
32845748Smckusick 	 * address range and it will have wired_count == 0.
32945748Smckusick 	 */
33045748Smckusick 	if (!vm_map_lookup_entry(map, addr, &entry) ||
33145748Smckusick 	    entry->start != addr || entry->end != addr + size ||
33245748Smckusick 	    entry->wired_count)
33345748Smckusick 		panic("kmem_malloc: entry not found or misaligned");
33445748Smckusick 	entry->wired_count++;
33545748Smckusick 
33645748Smckusick 	/*
33745748Smckusick 	 * Loop thru pages, entering them in the pmap.
33845748Smckusick 	 * (We cannot add them to the wired count without
33945748Smckusick 	 * wrapping the vm_page_queue_lock in splimp...)
34045748Smckusick 	 */
34145748Smckusick 	for (i = 0; i < size; i += PAGE_SIZE) {
34245748Smckusick 		vm_object_lock(kmem_object);
34345748Smckusick 		m = vm_page_lookup(kmem_object, offset + i);
34445748Smckusick 		vm_object_unlock(kmem_object);
34545748Smckusick 		pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m),
34645748Smckusick 			   VM_PROT_DEFAULT, TRUE);
34745748Smckusick 	}
34845748Smckusick 	vm_map_unlock(map);
34945748Smckusick 
35045748Smckusick 	vm_map_simplify(map, addr);
35145748Smckusick 	return(addr);
35245748Smckusick }
35345748Smckusick 
35445748Smckusick /*
35545748Smckusick  *	kmem_alloc_wait
35645748Smckusick  *
35745748Smckusick  *	Allocates pageable memory from a sub-map of the kernel.  If the submap
35845748Smckusick  *	has no room, the caller sleeps waiting for more memory in the submap.
35945748Smckusick  *
36045748Smckusick  */
361*68164Scgd vm_offset_t
kmem_alloc_wait(map,size)362*68164Scgd kmem_alloc_wait(map, size)
36345748Smckusick 	vm_map_t	map;
36445748Smckusick 	vm_size_t	size;
36545748Smckusick {
36645748Smckusick 	vm_offset_t	addr;
36745748Smckusick 
36845748Smckusick 	size = round_page(size);
36945748Smckusick 
37052610Storek 	for (;;) {
37145748Smckusick 		/*
37252610Storek 		 * To make this work for more than one map,
37352610Storek 		 * use the map's lock to lock out sleepers/wakers.
37445748Smckusick 		 */
37545748Smckusick 		vm_map_lock(map);
37652615Storek 		if (vm_map_findspace(map, 0, size, &addr) == 0)
37752610Storek 			break;
37852610Storek 		/* no space now; see if we can ever get space */
37952610Storek 		if (vm_map_max(map) - vm_map_min(map) < size) {
38045748Smckusick 			vm_map_unlock(map);
38152610Storek 			return (0);
38245748Smckusick 		}
383*68164Scgd 		assert_wait(map, TRUE);
38452610Storek 		vm_map_unlock(map);
38552610Storek 		thread_block();
38652610Storek 	}
38752610Storek 	vm_map_insert(map, NULL, (vm_offset_t)0, addr, addr + size);
38852610Storek 	vm_map_unlock(map);
38952610Storek 	return (addr);
39045748Smckusick }
39145748Smckusick 
39245748Smckusick /*
39345748Smckusick  *	kmem_free_wakeup
39445748Smckusick  *
39545748Smckusick  *	Returns memory to a submap of the kernel, and wakes up any threads
39645748Smckusick  *	waiting for memory in that map.
39745748Smckusick  */
398*68164Scgd void
kmem_free_wakeup(map,addr,size)399*68164Scgd kmem_free_wakeup(map, addr, size)
40045748Smckusick 	vm_map_t	map;
40145748Smckusick 	vm_offset_t	addr;
40245748Smckusick 	vm_size_t	size;
40345748Smckusick {
40445748Smckusick 	vm_map_lock(map);
40545748Smckusick 	(void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
406*68164Scgd 	thread_wakeup(map);
40745748Smckusick 	vm_map_unlock(map);
40845748Smckusick }
40945748Smckusick 
41045748Smckusick /*
41152610Storek  * Create the kernel map; insert a mapping covering kernel text, data, bss,
41252610Storek  * and all space allocated thus far (`boostrap' data).  The new map will thus
41352610Storek  * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and
41452610Storek  * the range between `start' and `end' as free.
41545748Smckusick  */
416*68164Scgd void
kmem_init(start,end)417*68164Scgd kmem_init(start, end)
41852610Storek 	vm_offset_t start, end;
41945748Smckusick {
42052610Storek 	register vm_map_t m;
42145748Smckusick 
42252612Smckusick 	m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end, FALSE);
42352610Storek 	vm_map_lock(m);
42452610Storek 	/* N.B.: cannot use kgdb to debug, starting with this assignment ... */
42552610Storek 	kernel_map = m;
42652610Storek 	(void) vm_map_insert(m, NULL, (vm_offset_t)0,
42752610Storek 	    VM_MIN_KERNEL_ADDRESS, start);
42852610Storek 	/* ... and ending with the completion of the above `insert' */
42952610Storek 	vm_map_unlock(m);
43045748Smckusick }
431