145748Smckusick /*
263379Sbostic * Copyright (c) 1991, 1993
363379Sbostic * The Regents of the University of California. All rights reserved.
445748Smckusick *
545748Smckusick * This code is derived from software contributed to Berkeley by
645748Smckusick * The Mach Operating System project at Carnegie-Mellon University.
745748Smckusick *
848493Smckusick * %sccs.include.redist.c%
945748Smckusick *
10*68164Scgd * @(#)vm_kern.c 8.4 (Berkeley) 01/09/95
1148493Smckusick *
1248493Smckusick *
1348493Smckusick * Copyright (c) 1987, 1990 Carnegie-Mellon University.
1448493Smckusick * All rights reserved.
1548493Smckusick *
1648493Smckusick * Authors: Avadis Tevanian, Jr., Michael Wayne Young
1748493Smckusick *
1848493Smckusick * Permission to use, copy, modify and distribute this software and
1948493Smckusick * its documentation is hereby granted, provided that both the copyright
2048493Smckusick * notice and this permission notice appear in all copies of the
2148493Smckusick * software, derivative works or modified versions, and any portions
2248493Smckusick * thereof, and that both notices appear in supporting documentation.
2348493Smckusick *
2448493Smckusick * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
2548493Smckusick * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
2648493Smckusick * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
2748493Smckusick *
2848493Smckusick * Carnegie Mellon requests users of this software to return to
2948493Smckusick *
3048493Smckusick * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
3148493Smckusick * School of Computer Science
3248493Smckusick * Carnegie Mellon University
3348493Smckusick * Pittsburgh PA 15213-3890
3448493Smckusick *
3548493Smckusick * any improvements or extensions that they make and grant Carnegie the
3648493Smckusick * rights to redistribute these changes.
3745748Smckusick */
3845748Smckusick
3945748Smckusick /*
4045748Smckusick * Kernel memory management.
4145748Smckusick */
4245748Smckusick
4353328Sbostic #include <sys/param.h>
4453328Sbostic #include <sys/systm.h>
4545748Smckusick
4653328Sbostic #include <vm/vm.h>
4753328Sbostic #include <vm/vm_page.h>
4853328Sbostic #include <vm/vm_pageout.h>
4953328Sbostic #include <vm/vm_kern.h>
5045748Smckusick
5145748Smckusick /*
5245748Smckusick * kmem_alloc_pageable:
5345748Smckusick *
5445748Smckusick * Allocate pageable memory to the kernel's address map.
5545748Smckusick * map must be "kernel_map" below.
5645748Smckusick */
57*68164Scgd vm_offset_t
kmem_alloc_pageable(map,size)58*68164Scgd kmem_alloc_pageable(map, size)
5945748Smckusick vm_map_t map;
6045748Smckusick register vm_size_t size;
6145748Smckusick {
6245748Smckusick vm_offset_t addr;
6345748Smckusick register int result;
6445748Smckusick
6545748Smckusick #if 0
6645748Smckusick if (map != kernel_map)
6745748Smckusick panic("kmem_alloc_pageable: not called with kernel_map");
6860345Storek #endif
6945748Smckusick
7045748Smckusick size = round_page(size);
7145748Smckusick
7245748Smckusick addr = vm_map_min(map);
7348386Skarels result = vm_map_find(map, NULL, (vm_offset_t) 0,
7445748Smckusick &addr, size, TRUE);
7545748Smckusick if (result != KERN_SUCCESS) {
7645748Smckusick return(0);
7745748Smckusick }
7845748Smckusick
7945748Smckusick return(addr);
8045748Smckusick }
8145748Smckusick
8245748Smckusick /*
8345748Smckusick * Allocate wired-down memory in the kernel's address map
8445748Smckusick * or a submap.
8545748Smckusick */
86*68164Scgd vm_offset_t
kmem_alloc(map,size)87*68164Scgd kmem_alloc(map, size)
8845748Smckusick register vm_map_t map;
8945748Smckusick register vm_size_t size;
9045748Smckusick {
9145748Smckusick vm_offset_t addr;
9245748Smckusick register vm_offset_t offset;
9345748Smckusick extern vm_object_t kernel_object;
9445748Smckusick vm_offset_t i;
9545748Smckusick
9645748Smckusick size = round_page(size);
9745748Smckusick
9845748Smckusick /*
9945748Smckusick * Use the kernel object for wired-down kernel pages.
10045748Smckusick * Assume that no region of the kernel object is
10145748Smckusick * referenced more than once.
10245748Smckusick */
10345748Smckusick
10445748Smckusick /*
10552610Storek * Locate sufficient space in the map. This will give us the
10652610Storek * final virtual address for the new memory, and thus will tell
10752610Storek * us the offset within the kernel map.
10845748Smckusick */
10952610Storek vm_map_lock(map);
11052610Storek if (vm_map_findspace(map, 0, size, &addr)) {
11152610Storek vm_map_unlock(map);
11252610Storek return (0);
11352610Storek }
11445748Smckusick offset = addr - VM_MIN_KERNEL_ADDRESS;
11545748Smckusick vm_object_reference(kernel_object);
11645748Smckusick vm_map_insert(map, kernel_object, offset, addr, addr + size);
11745748Smckusick vm_map_unlock(map);
11845748Smckusick
11945748Smckusick /*
12045748Smckusick * Guarantee that there are pages already in this object
12145748Smckusick * before calling vm_map_pageable. This is to prevent the
12245748Smckusick * following scenario:
12345748Smckusick *
12445748Smckusick * 1) Threads have swapped out, so that there is a
12545748Smckusick * pager for the kernel_object.
12645748Smckusick * 2) The kmsg zone is empty, and so we are kmem_allocing
12745748Smckusick * a new page for it.
12845748Smckusick * 3) vm_map_pageable calls vm_fault; there is no page,
12945748Smckusick * but there is a pager, so we call
13045748Smckusick * pager_data_request. But the kmsg zone is empty,
13145748Smckusick * so we must kmem_alloc.
13245748Smckusick * 4) goto 1
13345748Smckusick * 5) Even if the kmsg zone is not empty: when we get
13445748Smckusick * the data back from the pager, it will be (very
13545748Smckusick * stale) non-zero data. kmem_alloc is defined to
13645748Smckusick * return zero-filled memory.
13745748Smckusick *
13845748Smckusick * We're intentionally not activating the pages we allocate
13945748Smckusick * to prevent a race with page-out. vm_map_pageable will wire
14045748Smckusick * the pages.
14145748Smckusick */
14245748Smckusick
14345748Smckusick vm_object_lock(kernel_object);
14445748Smckusick for (i = 0 ; i < size; i+= PAGE_SIZE) {
14545748Smckusick vm_page_t mem;
14645748Smckusick
14748386Skarels while ((mem = vm_page_alloc(kernel_object, offset+i)) == NULL) {
14845748Smckusick vm_object_unlock(kernel_object);
14945748Smckusick VM_WAIT;
15045748Smckusick vm_object_lock(kernel_object);
15145748Smckusick }
15245748Smckusick vm_page_zero_fill(mem);
15356382Smckusick mem->flags &= ~PG_BUSY;
15445748Smckusick }
15545748Smckusick vm_object_unlock(kernel_object);
15645748Smckusick
15745748Smckusick /*
15845748Smckusick * And finally, mark the data as non-pageable.
15945748Smckusick */
16045748Smckusick
16145748Smckusick (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE);
16245748Smckusick
16345748Smckusick /*
16445748Smckusick * Try to coalesce the map
16545748Smckusick */
16645748Smckusick
16745748Smckusick vm_map_simplify(map, addr);
16845748Smckusick
16945748Smckusick return(addr);
17045748Smckusick }
17145748Smckusick
17245748Smckusick /*
17345748Smckusick * kmem_free:
17445748Smckusick *
17545748Smckusick * Release a region of kernel virtual memory allocated
17645748Smckusick * with kmem_alloc, and return the physical pages
17745748Smckusick * associated with that region.
17845748Smckusick */
179*68164Scgd void
kmem_free(map,addr,size)180*68164Scgd kmem_free(map, addr, size)
18145748Smckusick vm_map_t map;
18245748Smckusick register vm_offset_t addr;
18345748Smckusick vm_size_t size;
18445748Smckusick {
18545748Smckusick (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
18645748Smckusick }
18745748Smckusick
18845748Smckusick /*
18945748Smckusick * kmem_suballoc:
19045748Smckusick *
19145748Smckusick * Allocates a map to manage a subrange
19245748Smckusick * of the kernel virtual address space.
19345748Smckusick *
19445748Smckusick * Arguments are as follows:
19545748Smckusick *
19645748Smckusick * parent Map to take range from
19745748Smckusick * size Size of range to find
19845748Smckusick * min, max Returned endpoints of map
19945748Smckusick * pageable Can the region be paged
20045748Smckusick */
201*68164Scgd vm_map_t
kmem_suballoc(parent,min,max,size,pageable)202*68164Scgd kmem_suballoc(parent, min, max, size, pageable)
20345748Smckusick register vm_map_t parent;
20445748Smckusick vm_offset_t *min, *max;
20545748Smckusick register vm_size_t size;
20645748Smckusick boolean_t pageable;
20745748Smckusick {
20845748Smckusick register int ret;
20945748Smckusick vm_map_t result;
21045748Smckusick
21145748Smckusick size = round_page(size);
21245748Smckusick
21345748Smckusick *min = (vm_offset_t) vm_map_min(parent);
21448386Skarels ret = vm_map_find(parent, NULL, (vm_offset_t) 0,
21545748Smckusick min, size, TRUE);
21645748Smckusick if (ret != KERN_SUCCESS) {
21745748Smckusick printf("kmem_suballoc: bad status return of %d.\n", ret);
21845748Smckusick panic("kmem_suballoc");
21945748Smckusick }
22045748Smckusick *max = *min + size;
22145748Smckusick pmap_reference(vm_map_pmap(parent));
22245748Smckusick result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable);
22348386Skarels if (result == NULL)
22445748Smckusick panic("kmem_suballoc: cannot create submap");
22545748Smckusick if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS)
22645748Smckusick panic("kmem_suballoc: unable to change range to submap");
22745748Smckusick return(result);
22845748Smckusick }
22945748Smckusick
23045748Smckusick /*
23145748Smckusick * Allocate wired-down memory in the kernel's address map for the higher
23245748Smckusick * level kernel memory allocator (kern/kern_malloc.c). We cannot use
23345748Smckusick * kmem_alloc() because we may need to allocate memory at interrupt
23445748Smckusick * level where we cannot block (canwait == FALSE).
23545748Smckusick *
23645748Smckusick * This routine has its own private kernel submap (kmem_map) and object
23745748Smckusick * (kmem_object). This, combined with the fact that only malloc uses
23845748Smckusick * this routine, ensures that we will never block in map or object waits.
23945748Smckusick *
24045748Smckusick * Note that this still only works in a uni-processor environment and
24145748Smckusick * when called at splhigh().
24245748Smckusick *
24345748Smckusick * We don't worry about expanding the map (adding entries) since entries
24445748Smckusick * for wired maps are statically allocated.
24545748Smckusick */
24645748Smckusick vm_offset_t
kmem_malloc(map,size,canwait)24745748Smckusick kmem_malloc(map, size, canwait)
24845748Smckusick register vm_map_t map;
24945748Smckusick register vm_size_t size;
25045748Smckusick boolean_t canwait;
25145748Smckusick {
25245748Smckusick register vm_offset_t offset, i;
25345748Smckusick vm_map_entry_t entry;
25445748Smckusick vm_offset_t addr;
25545748Smckusick vm_page_t m;
25645748Smckusick extern vm_object_t kmem_object;
25745748Smckusick
25845748Smckusick if (map != kmem_map && map != mb_map)
25945748Smckusick panic("kern_malloc_alloc: map != {kmem,mb}_map");
26045748Smckusick
26145748Smckusick size = round_page(size);
26245748Smckusick addr = vm_map_min(map);
26345748Smckusick
26445748Smckusick /*
26552610Storek * Locate sufficient space in the map. This will give us the
26652610Storek * final virtual address for the new memory, and thus will tell
26752610Storek * us the offset within the kernel map.
26845748Smckusick */
26952610Storek vm_map_lock(map);
27052610Storek if (vm_map_findspace(map, 0, size, &addr)) {
27152610Storek vm_map_unlock(map);
27252610Storek if (canwait) /* XXX should wait */
27352610Storek panic("kmem_malloc: %s too small",
27452610Storek map == kmem_map ? "kmem_map" : "mb_map");
27552610Storek return (0);
27652610Storek }
27745748Smckusick offset = addr - vm_map_min(kmem_map);
27845748Smckusick vm_object_reference(kmem_object);
27945748Smckusick vm_map_insert(map, kmem_object, offset, addr, addr + size);
28045748Smckusick
28145748Smckusick /*
28245748Smckusick * If we can wait, just mark the range as wired
28345748Smckusick * (will fault pages as necessary).
28445748Smckusick */
28545748Smckusick if (canwait) {
28645748Smckusick vm_map_unlock(map);
28745748Smckusick (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size,
28845748Smckusick FALSE);
28945748Smckusick vm_map_simplify(map, addr);
29045748Smckusick return(addr);
29145748Smckusick }
29245748Smckusick
29345748Smckusick /*
29445748Smckusick * If we cannot wait then we must allocate all memory up front,
29545748Smckusick * pulling it off the active queue to prevent pageout.
29645748Smckusick */
29745748Smckusick vm_object_lock(kmem_object);
29845748Smckusick for (i = 0; i < size; i += PAGE_SIZE) {
29945748Smckusick m = vm_page_alloc(kmem_object, offset + i);
30045748Smckusick
30145748Smckusick /*
30245748Smckusick * Ran out of space, free everything up and return.
30345748Smckusick * Don't need to lock page queues here as we know
30445748Smckusick * that the pages we got aren't on any queues.
30545748Smckusick */
30648386Skarels if (m == NULL) {
30745748Smckusick while (i != 0) {
30845748Smckusick i -= PAGE_SIZE;
30945748Smckusick m = vm_page_lookup(kmem_object, offset + i);
31045748Smckusick vm_page_free(m);
31145748Smckusick }
31245748Smckusick vm_object_unlock(kmem_object);
31345748Smckusick vm_map_delete(map, addr, addr + size);
31445748Smckusick vm_map_unlock(map);
31545748Smckusick return(0);
31645748Smckusick }
31745748Smckusick #if 0
31845748Smckusick vm_page_zero_fill(m);
31945748Smckusick #endif
32056382Smckusick m->flags &= ~PG_BUSY;
32145748Smckusick }
32245748Smckusick vm_object_unlock(kmem_object);
32345748Smckusick
32445748Smckusick /*
32545748Smckusick * Mark map entry as non-pageable.
32645748Smckusick * Assert: vm_map_insert() will never be able to extend the previous
32745748Smckusick * entry so there will be a new entry exactly corresponding to this
32845748Smckusick * address range and it will have wired_count == 0.
32945748Smckusick */
33045748Smckusick if (!vm_map_lookup_entry(map, addr, &entry) ||
33145748Smckusick entry->start != addr || entry->end != addr + size ||
33245748Smckusick entry->wired_count)
33345748Smckusick panic("kmem_malloc: entry not found or misaligned");
33445748Smckusick entry->wired_count++;
33545748Smckusick
33645748Smckusick /*
33745748Smckusick * Loop thru pages, entering them in the pmap.
33845748Smckusick * (We cannot add them to the wired count without
33945748Smckusick * wrapping the vm_page_queue_lock in splimp...)
34045748Smckusick */
34145748Smckusick for (i = 0; i < size; i += PAGE_SIZE) {
34245748Smckusick vm_object_lock(kmem_object);
34345748Smckusick m = vm_page_lookup(kmem_object, offset + i);
34445748Smckusick vm_object_unlock(kmem_object);
34545748Smckusick pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m),
34645748Smckusick VM_PROT_DEFAULT, TRUE);
34745748Smckusick }
34845748Smckusick vm_map_unlock(map);
34945748Smckusick
35045748Smckusick vm_map_simplify(map, addr);
35145748Smckusick return(addr);
35245748Smckusick }
35345748Smckusick
35445748Smckusick /*
35545748Smckusick * kmem_alloc_wait
35645748Smckusick *
35745748Smckusick * Allocates pageable memory from a sub-map of the kernel. If the submap
35845748Smckusick * has no room, the caller sleeps waiting for more memory in the submap.
35945748Smckusick *
36045748Smckusick */
361*68164Scgd vm_offset_t
kmem_alloc_wait(map,size)362*68164Scgd kmem_alloc_wait(map, size)
36345748Smckusick vm_map_t map;
36445748Smckusick vm_size_t size;
36545748Smckusick {
36645748Smckusick vm_offset_t addr;
36745748Smckusick
36845748Smckusick size = round_page(size);
36945748Smckusick
37052610Storek for (;;) {
37145748Smckusick /*
37252610Storek * To make this work for more than one map,
37352610Storek * use the map's lock to lock out sleepers/wakers.
37445748Smckusick */
37545748Smckusick vm_map_lock(map);
37652615Storek if (vm_map_findspace(map, 0, size, &addr) == 0)
37752610Storek break;
37852610Storek /* no space now; see if we can ever get space */
37952610Storek if (vm_map_max(map) - vm_map_min(map) < size) {
38045748Smckusick vm_map_unlock(map);
38152610Storek return (0);
38245748Smckusick }
383*68164Scgd assert_wait(map, TRUE);
38452610Storek vm_map_unlock(map);
38552610Storek thread_block();
38652610Storek }
38752610Storek vm_map_insert(map, NULL, (vm_offset_t)0, addr, addr + size);
38852610Storek vm_map_unlock(map);
38952610Storek return (addr);
39045748Smckusick }
39145748Smckusick
39245748Smckusick /*
39345748Smckusick * kmem_free_wakeup
39445748Smckusick *
39545748Smckusick * Returns memory to a submap of the kernel, and wakes up any threads
39645748Smckusick * waiting for memory in that map.
39745748Smckusick */
398*68164Scgd void
kmem_free_wakeup(map,addr,size)399*68164Scgd kmem_free_wakeup(map, addr, size)
40045748Smckusick vm_map_t map;
40145748Smckusick vm_offset_t addr;
40245748Smckusick vm_size_t size;
40345748Smckusick {
40445748Smckusick vm_map_lock(map);
40545748Smckusick (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
406*68164Scgd thread_wakeup(map);
40745748Smckusick vm_map_unlock(map);
40845748Smckusick }
40945748Smckusick
41045748Smckusick /*
41152610Storek * Create the kernel map; insert a mapping covering kernel text, data, bss,
41252610Storek * and all space allocated thus far (`boostrap' data). The new map will thus
41352610Storek * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and
41452610Storek * the range between `start' and `end' as free.
41545748Smckusick */
416*68164Scgd void
kmem_init(start,end)417*68164Scgd kmem_init(start, end)
41852610Storek vm_offset_t start, end;
41945748Smckusick {
42052610Storek register vm_map_t m;
42145748Smckusick
42252612Smckusick m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end, FALSE);
42352610Storek vm_map_lock(m);
42452610Storek /* N.B.: cannot use kgdb to debug, starting with this assignment ... */
42552610Storek kernel_map = m;
42652610Storek (void) vm_map_insert(m, NULL, (vm_offset_t)0,
42752610Storek VM_MIN_KERNEL_ADDRESS, start);
42852610Storek /* ... and ending with the completion of the above `insert' */
42952610Storek vm_map_unlock(m);
43045748Smckusick }
431