xref: /netbsd-src/share/man/man9/uvm.9 (revision 23c8222edbfb0f0932d88a8351d3a0cf817dfb9e)
1.\"	$NetBSD: uvm.9,v 1.54 2004/01/08 09:24:31 wiz Exp $
2.\"
3.\" Copyright (c) 1998 Matthew R. Green
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\" 3. The name of the author may not be used to endorse or promote products
15.\"    derived from this software without specific prior written permission.
16.\"
17.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27.\" SUCH DAMAGE.
28.\"
29.Dd January 7, 2004
30.Dt UVM 9
31.Os
32.Sh NAME
33.Nm uvm
34.Nd virtual memory system external interface
35.Sh SYNOPSIS
36.In sys/param.h
37.In uvm/uvm.h
38.Sh DESCRIPTION
39The UVM virtual memory system manages access to the computer's memory
40resources.
41User processes and the kernel access these resources through
42UVM's external interface.
43UVM's external interface includes functions that:
44.Pp
45.Bl -hyphen -compact
46.It
47initialise UVM sub-systems
48.It
49manage virtual address spaces
50.It
51resolve page faults
52.It
53memory map files and devices
54.It
55perform uio-based I/O to virtual memory
56.It
57allocate and free kernel virtual memory
58.It
59allocate and free physical memory
60.El
61.Pp
62In addition to exporting these services, UVM has two kernel-level processes:
63pagedaemon and swapper.
64The pagedaemon process sleeps until physical memory becomes scarce.
65When that happens, pagedaemon is awoken.
66It scans physical memory, paging out and freeing memory that has not
67been recently used.
68The swapper process swaps in runnable processes that are currently swapped
69out, if there is room.
70.Pp
71There are also several miscellaneous functions.
72.Sh INITIALISATION
73.Ft void
74.br
75.Fn uvm_init "void" ;
76.Pp
77.Ft void
78.br
79.Fn uvm_init_limits "struct proc *p" ;
80.Pp
81.Ft void
82.br
83.Fn uvm_setpagesize "void" ;
84.Pp
85.Ft void
86.br
87.Fn uvm_swap_init "void" ;
88.Pp
89.Fn uvm_init
90sets up the UVM system at system boot time, after the
91copyright has been printed.
92It initialises global state, the page, map, kernel virtual memory state,
93machine-dependent physical map, kernel memory allocator,
94pager and anonymous memory sub-systems, and then enables
95paging of kernel objects.
96.Pp
97.Fn uvm_init_limits
98initialises process limits for the named process.
99This is for use by the system startup for process zero, before any
100other processes are created.
101.Pp
102.Fn uvm_setpagesize
103initialises the uvmexp members pagesize (if not already done by
104machine-dependent code), pageshift and pagemask.
105It should be called by machine-dependent code early in the
106.Fn pmap_init
107call (see
108.Xr pmap 9 ) .
109.Pp
110.Fn uvm_swap_init
111initialises the swap sub-system.
112.Sh VIRTUAL ADDRESS SPACE MANAGEMENT
113.Ft int
114.br
115.Fn uvm_map "struct vm_map *map" "vaddr_t *startp" "vsize_t size" "struct uvm_object *uobj" "voff_t uoffset" "vsize_t align" "uvm_flag_t flags" ;
116.Pp
117.Ft int
118.br
119.Fn uvm_map_pageable "struct vm_map *map" "vaddr_t start" "vaddr_t end" "boolean_t new_pageable" "int lockflags" ;
120.Pp
121.Ft boolean_t
122.br
123.Fn uvm_map_checkprot "struct vm_map *map" "vaddr_t start" "vaddr_t end" "vm_prot_t protection" ;
124.Pp
125.Ft int
126.br
127.Fn uvm_map_protect "struct vm_map *map" "vaddr_t start" "vaddr_t end" "vm_prot_t new_prot" "boolean_t set_max" ;
128.Pp
129.Ft int
130.br
131.Fn uvm_deallocate "struct vm_map *map" "vaddr_t start" "vsize_t size" ;
132.Pp
133.Ft struct vmspace *
134.br
135.Fn uvmspace_alloc "vaddr_t min" "vaddr_t max" "int pageable" ;
136.Pp
137.Ft void
138.br
139.Fn uvmspace_exec "struct proc *p" "vaddr_t start" "vaddr_t end" ;
140.Pp
141.Ft struct vmspace *
142.br
143.Fn uvmspace_fork "struct vmspace *vm" ;
144.Pp
145.Ft void
146.br
147.Fn uvmspace_free "struct vmspace *vm1" ;
148.Pp
149.Ft void
150.br
151.Fn uvmspace_share "struct proc *p1" "struct proc *p2" ;
152.Pp
153.Ft void
154.br
155.Fn uvmspace_unshare "struct proc *p" ;
156.Pp
157.Ft boolean_t
158.br
159.Fn uvm_uarea_alloc "vaddr_t *uaddrp" ;
160.Pp
161.Ft void
162.br
163.Fn uvm_uarea_free "vaddr_t uaddr" ;
164.Pp
165.Fn uvm_map
166establishes a valid mapping in map
167.Fa map ,
168which must be unlocked.
169The new mapping has size
170.Fa size ,
171which must a multiple of
172.Dv PAGE_SIZE .
173The
174.Fa uobj
175and
176.Fa uoffset
177arguments can have four meanings.
178When
179.Fa uobj
180is
181.Dv NULL
182and
183.Fa uoffset
184is
185.Dv UVM_UNKNOWN_OFFSET ,
186.Fn uvm_map
187does not use the machine-dependent
188.Dv PMAP_PREFER
189function.
190If
191.Fa uoffset
192is any other value, it is used as the hint to
193.Dv PMAP_PREFER .
194When
195.Fa uobj
196is not
197.Dv NULL
198and
199.Fa uoffset
200is
201.Dv UVM_UNKNOWN_OFFSET ,
202.Fn uvm_map
203finds the offset based upon the virtual address, passed as
204.Fa startp .
205If
206.Fa uoffset
207is any other value, we are doing a normal mapping at this offset.
208The start address of the map will be returned in
209.Fa startp .
210.Pp
211.Fa align
212specifies alignment of mapping unless
213.Dv UVM_FLAG_FIXED
214is specified in
215.Fa flags .
216.Fa align
217must be a power of 2.
218.Pp
219.Fa flags
220passed to
221.Fn uvm_map
222are typically created using the
223.Fn UVM_MAPFLAG "vm_prot_t prot" "vm_prot_t maxprot" "vm_inherit_t inh" "int advice" "int flags"
224macro, which uses the following values.
225The
226.Fa prot
227and
228.Fa maxprot
229can take are:
230.Bd -literal
231#define UVM_PROT_MASK   0x07    /* protection mask */
232#define UVM_PROT_NONE   0x00    /* protection none */
233#define UVM_PROT_ALL    0x07    /* everything */
234#define UVM_PROT_READ   0x01    /* read */
235#define UVM_PROT_WRITE  0x02    /* write */
236#define UVM_PROT_EXEC   0x04    /* exec */
237#define UVM_PROT_R      0x01    /* read */
238#define UVM_PROT_W      0x02    /* write */
239#define UVM_PROT_RW     0x03    /* read-write */
240#define UVM_PROT_X      0x04    /* exec */
241#define UVM_PROT_RX     0x05    /* read-exec */
242#define UVM_PROT_WX     0x06    /* write-exec */
243#define UVM_PROT_RWX    0x07    /* read-write-exec */
244.Ed
245.Pp
246The values that
247.Fa inh
248can take are:
249.Bd -literal
250#define UVM_INH_MASK    0x30    /* inherit mask */
251#define UVM_INH_SHARE   0x00    /* "share" */
252#define UVM_INH_COPY    0x10    /* "copy" */
253#define UVM_INH_NONE    0x20    /* "none" */
254#define UVM_INH_DONATE  0x30    /* "donate" \*[Lt]\*[Lt] not used */
255.Ed
256.Pp
257The values that
258.Fa advice
259can take are:
260.Bd -literal
261#define UVM_ADV_NORMAL     0x0  /* 'normal' */
262#define UVM_ADV_RANDOM     0x1  /* 'random' */
263#define UVM_ADV_SEQUENTIAL 0x2  /* 'sequential' */
264#define UVM_ADV_MASK       0x7  /* mask */
265.Ed
266.Pp
267The values that
268.Fa flags
269can take are:
270.Bd -literal
271#define UVM_FLAG_FIXED   0x010000 /* find space */
272#define UVM_FLAG_OVERLAY 0x020000 /* establish overlay */
273#define UVM_FLAG_NOMERGE 0x040000 /* don't merge map entries */
274#define UVM_FLAG_COPYONW 0x080000 /* set copy_on_write flag */
275#define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */
276#define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */
277.Ed
278.Pp
279The
280.Dv UVM_MAPFLAG
281macro arguments can be combined with an or operator.
282There are several special purpose macros for checking protection
283combinations, e.g., the
284.Dv UVM_PROT_WX
285macro.
286There are also some additional macros to extract bits from the flags.
287The
288.Dv UVM_PROTECTION ,
289.Dv UVM_INHERIT ,
290.Dv UVM_MAXPROTECTION
291and
292.Dv UVM_ADVICE
293macros return the protection, inheritance, maximum protection and advice,
294respectively.
295.Fn uvm_map
296returns a standard UVM return value.
297.Pp
298.Fn uvm_map_pageable
299changes the pageability of the pages in the range from
300.Fa start
301to
302.Fa end
303in map
304.Fa map
305to
306.Fa new_pageable .
307.Fn uvm_map_pageable
308returns a standard UVM return value.
309.Pp
310.Fn uvm_map_checkprot
311checks the protection of the range from
312.Fa start
313to
314.Fa end
315in map
316.Fa map
317against
318.Fa protection .
319This returns either
320.Dv TRUE
321or
322.Dv FALSE .
323.Pp
324.Fn uvm_map_protect
325changes the protection
326.Fa start
327to
328.Fa end
329in map
330.Fa map
331to
332.Fa new_prot ,
333also setting the maximum protection to the region to
334.Fa new_prot
335if
336.Fa set_max
337is non-zero.
338This function returns a standard UVM return value.
339.Pp
340.Fn uvm_deallocate
341deallocates kernel memory in map
342.Fa map
343from address
344.Fa start
345to
346.Fa start + size .
347.Pp
348.Fn uvmspace_alloc
349allocates and returns a new address space, with ranges from
350.Fa min
351to
352.Fa max ,
353setting the pageability of the address space to
354.Fa pageable .
355.Pp
356.Fn uvmspace_exec
357either reuses the address space of process
358.Fa p
359if there are no other references to it, or creates
360a new one with
361.Fn uvmspace_alloc .
362The range of valid addresses in the address space is reset to
363.Fa start
364through
365.Fa end .
366.Pp
367.Fn uvmspace_fork
368creates and returns a new address space based upon the
369.Fa vm1
370address space, typically used when allocating an address space for a
371child process.
372.Pp
373.Fn uvmspace_free
374lowers the reference count on the address space
375.Fa vm ,
376freeing the data structures if there are no other references.
377.Pp
378.Fn uvmspace_share
379causes process
380.Pa p2
381to share the address space of
382.Fa p1 .
383.Pp
384.Fn uvmspace_unshare
385ensures that process
386.Fa p
387has its own, unshared address space, by creating a new one if
388necessary by calling
389.Fn uvmspace_fork .
390.Pp
391.Fn uvm_uarea_alloc
392allocates virtual space for a u-area (i.e., a kernel stack) and stores
393its virtual address in
394.Fa *uaddrp .
395The return value is
396.Dv TRUE
397if the u-area is already backed by wired physical memory, otherwise
398.Dv FALSE .
399.Pp
400.Fn uvm_uarea_free
401frees a u-area allocated with
402.Fn uvm_uarea_alloc ,
403freeing both the virtual space and any physical pages which may have been
404allocated to back that virtual space later.
405.Sh PAGE FAULT HANDLING
406.Ft int
407.br
408.Fn uvm_fault "struct vm_map *orig_map" "vaddr_t vaddr" "vm_fault_t fault_type" "vm_prot_t access_type" ;
409.Pp
410.Fn uvm_fault
411is the main entry point for faults.
412It takes
413.Fa orig_map
414as the map the fault originated in, a
415.Fa vaddr
416offset into the map the fault occurred,
417.Fa fault_type
418describing the type of fault, and
419.Fa access_type
420describing the type of access requested.
421.Fn uvm_fault
422returns a standard UVM return value.
423.Sh MEMORY MAPPING FILES AND DEVICES
424.Ft struct uvm_object *
425.br
426.Fn uvn_attach "void *arg" "vm_prot_t accessprot" ;
427.Pp
428.Ft void
429.br
430.Fn uvm_vnp_setsize "struct vnode *vp" "voff_t newsize" ;
431.Pp
432.Ft void *
433.br
434.Fn ubc_alloc "struct uvm_object *uobj" "voff_t offset" "vsize_t *lenp" "int flags" ;
435.Pp
436.Ft void
437.br
438.Fn ubc_release "void *va" "int flags" ;
439.Pp
440.Fn uvn_attach
441attaches a UVM object to vnode
442.Fa arg ,
443creating the object if necessary.
444The object is returned.
445.Pp
446.Fn uvm_vnp_setsize
447sets the size of vnode
448.Fa vp
449to
450.Fa newsize .
451Caller must hold a reference to the vnode.
452If the vnode shrinks, pages no longer used are discarded.
453.Pp
454.Fn ubc_alloc
455creates a kernel mappings of
456.Fa uobj
457starting at offset
458.Fa offset .
459the desired length of the mapping is pointed to by
460.Fa lenp ,
461but the actual mapping may be smaller than this.
462.Fa lenp
463is updated to contain the actual length mapped.
464The flags must be one of
465.Bd -literal
466#define UBC_READ        0x01    /* mapping will be accessed for read */
467#define UBC_WRITE       0x02    /* mapping will be accessed for write */
468.Ed
469.Pp
470Currently,
471.Fa uobj
472must actually be a vnode object.
473Once the mapping is created, it must be accessed only by methods that can
474handle faults, such as
475.Fn uiomove
476or
477.Fn kcopy .
478Page faults on the mapping will result in the vnode's
479.Fn VOP_GETPAGES
480method being called to resolve the fault.
481.Pp
482.Fn ubc_release
483frees the mapping at
484.Fa va
485for reuse.
486The mapping may be cached to speed future accesses to the same region
487of the object.
488The flags are currently unused.
489.Sh VIRTUAL MEMORY I/O
490.Ft int
491.br
492.Fn uvm_io "struct vm_map *map" "struct uio *uio" ;
493.Pp
494.Fn uvm_io
495performs the I/O described in
496.Fa uio
497on the memory described in
498.Fa map .
499.Sh ALLOCATION OF KERNEL MEMORY
500.Ft vaddr_t
501.br
502.Fn uvm_km_alloc "struct vm_map *map" "vsize_t size" ;
503.Pp
504.Ft vaddr_t
505.br
506.Fn uvm_km_zalloc "struct vm_map *map" "vsize_t size" ;
507.Pp
508.Ft vaddr_t
509.br
510.Fn uvm_km_alloc1 "struct vm_map *map" "vsize_t size" "boolean_t zeroit" ;
511.Pp
512.Ft vaddr_t
513.br
514.Fn uvm_km_kmemalloc1 "struct vm_map *map" "struct uvm_object *obj" "vsize_t size" "vsize_t align" "voff_t preferred offset" "int flags" ;
515.Pp
516.Ft vaddr_t
517.br
518.Fn uvm_km_kmemalloc "struct vm_map *map" "struct uvm_object *obj" "vsize_t size" "int flags" ;
519.Pp
520.Ft vaddr_t
521.br
522.Fn uvm_km_valloc "struct vm_map *map" "vsize_t size" ;
523.Pp
524.Ft vaddr_t
525.br
526.Fn uvm_km_valloc_wait "struct vm_map *map" "vsize_t size" ;
527.Pp
528.Ft struct vm_map *
529.br
530.Fn uvm_km_suballoc "struct vm_map *map" "vaddr_t *min" "vaddr_t *max " "vsize_t size" "boolean_t pageable" "boolean_t fixed" "struct vm_map *submap" ;
531.Pp
532.Ft void
533.br
534.Fn uvm_km_free "struct vm_map *map" "vaddr_t addr" "vsize_t size" ;
535.Pp
536.Ft void
537.br
538.Fn uvm_km_free_wakeup "struct vm_map *map" "vaddr_t addr" "vsize_t size" ;
539.Pp
540.Fn uvm_km_alloc
541and
542.Fn uvm_km_zalloc
543allocate
544.Fa size
545bytes of wired kernel memory in map
546.Fa map .
547In addition to allocation,
548.Fn uvm_km_zalloc
549zeros the memory.
550Both of these functions are defined as macros in terms of
551.Fn uvm_km_alloc1 ,
552and should almost always be used in preference to
553.Fn uvm_km_alloc1 .
554.Pp
555.Fn uvm_km_alloc1
556allocates and returns
557.Fa size
558bytes of wired memory in the kernel map, zeroing the memory if the
559.Fa zeroit
560argument is non-zero.
561.Pp
562.Fn uvm_km_kmemalloc1
563allocates and returns
564.Fa size
565bytes of wired kernel memory into
566.Fa obj .
567The first address of the allocated memory range will be aligned according to the
568.Fa align
569argument
570.Pq specify 0 if no alignment is necessary .
571The flags can be any of:
572.Bd -literal
573#define UVM_KMF_NOWAIT  0x1                     /* matches M_NOWAIT */
574#define UVM_KMF_VALLOC  0x2                     /* allocate VA only */
575#define UVM_KMF_CANFAIL 0x4			/* caller handles failure */
576#define UVM_KMF_TRYLOCK UVM_FLAG_TRYLOCK        /* try locking only */
577.Ed
578.Pp
579.Dv UVM_KMF_NOWAIT
580causes
581.Fn uvm_km_kmemalloc1
582to return immediately if no memory is available.
583.Dv UVM_KMF_VALLOC
584causes no physical pages to be allocated, only virtual space.
585.Dv UVM_KMF_TRYLOCK
586causes
587.Fn uvm_km_kmemalloc1
588to use
589.Fn simple_lock_try
590when locking maps.
591.Dv UVM_KMF_CANFAIL
592indicates that
593.Fn uvm_km_kmemalloc1
594can return 0 even if
595.Dv UVM_KMF_NOWAIT
596is not specified.
597(If neither
598.Dv UVM_KMF_NOWAIT
599nor
600.Dv UVM_KMF_CANFAIL
601are specified,
602.Fn uvm_km_kmemalloc1
603will never fail, but rather sleep indefinitely until the allocation succeeds.)
604.Pp
605.Fn uvm_km_kmemalloc
606allocates kernel memory like
607.Fn uvm_km_kmemalloc1
608but uses the default values
609.Dv 0
610for the
611.Fa align ,
612and
613.Dv UVM_UNKNOWN_OFFSET
614for the
615.Fa prefer
616arguments.
617.Pp
618.Fn uvm_km_valloc
619and
620.Fn uvm_km_valloc_wait
621return a newly allocated zero-filled address in the kernel map of size
622.Fa size .
623.Fn uvm_km_valloc_wait
624will also wait for kernel memory to become available, if there is a
625memory shortage.
626.Pp
627.Fn uvm_km_free
628and
629.Fn uvm_km_free_wakeup
630free
631.Fa size
632bytes of memory in the kernel map, starting at address
633.Fa addr .
634.Fn uvm_km_free_wakeup
635calls
636.Fn wakeup
637on the map before unlocking the map.
638.Pp
639.Fn uvm_km_suballoc
640allocates submap from
641.Fa map ,
642creating a new map if
643.Fa submap
644is
645.Dv NULL .
646The addresses of the submap can be specified exactly by setting the
647.Fa fixed
648argument to non-zero, which causes the
649.Fa min
650argument to specify the beginning of the address in the submap.
651If
652.Fa fixed
653is zero, any address of size
654.Fa size
655will be allocated from
656.Fa map
657and the start and end addresses returned in
658.Fa min
659and
660.Fa max .
661If
662.Fa pageable
663is non-zero, entries in the map may be paged out.
664.Sh ALLOCATION OF PHYSICAL MEMORY
665.Ft struct vm_page *
666.br
667.Fn uvm_pagealloc "struct uvm_object *uobj" "voff_t off" "struct vm_anon *anon" "int flags" ;
668.Pp
669.Ft void
670.br
671.Fn uvm_pagerealloc "struct vm_page *pg" "struct uvm_object *newobj" "voff_t newoff" ;
672.Pp
673.Ft void
674.br
675.Fn uvm_pagefree "struct vm_page *pg" ;
676.Pp
677.Ft int
678.br
679.Fn uvm_pglistalloc "psize_t size" "paddr_t low" "paddr_t high" "paddr_t alignment" "paddr_t boundary" "struct pglist *rlist" "int nsegs" "int waitok" ;
680.Pp
681.Ft void
682.br
683.Fn uvm_pglistfree "struct pglist *list" ;
684.Pp
685.Ft void
686.br
687.Fn uvm_page_physload "vaddr_t start" "vaddr_t end" "vaddr_t avail_start" "vaddr_t avail_end" "int free_list" ;
688.Pp
689.Fn uvm_pagealloc
690allocates a page of memory at virtual address
691.Fa off
692in either the object
693.Fa uobj
694or the anonymous memory
695.Fa anon ,
696which must be locked by the caller.
697Only one of
698.Fa uobj
699and
700.Fa anon
701can be non
702.Dv NULL .
703Returns
704.Dv NULL
705when no page can be found.
706The flags can be any of
707.Bd -literal
708#define UVM_PGA_USERESERVE      0x0001  /* ok to use reserve pages */
709#define UVM_PGA_ZERO            0x0002  /* returned page must be zero'd */
710.Ed
711.Pp
712.Dv UVM_PGA_USERESERVE
713means to allocate a page even if that will result in the number of free pages
714being lower than
715.Dv uvmexp.reserve_pagedaemon
716(if the current thread is the pagedaemon) or
717.Dv uvmexp.reserve_kernel
718(if the current thread is not the pagedaemon).
719.Dv UVM_PGA_ZERO
720causes the returned page to be filled with zeroes, either by allocating it
721from a pool of pre-zeroed pages or by zeroing it in-line as necessary.
722.Pp
723.Fn uvm_pagerealloc
724reallocates page
725.Fa pg
726to a new object
727.Fa newobj ,
728at a new offset
729.Fa newoff .
730.Pp
731.Fn uvm_pagefree
732frees the physical page
733.Fa pg .
734If the content of the page is known to be zero-filled,
735caller should set
736.Dv PG_ZERO
737in pg-\*[Gt]flags so that the page allocator will use
738the page to serve future
739.Dv UVM_PGA_ZERO
740requests efficiently.
741.Pp
742.Fn uvm_pglistalloc
743allocates a list of pages for size
744.Fa size
745byte under various constraints.
746.Fa low
747and
748.Fa high
749describe the lowest and highest addresses acceptable for the list.
750If
751.Fa alignment
752is non-zero, it describes the required alignment of the list, in
753power-of-two notation.
754If
755.Fa boundary
756is non-zero, no segment of the list may cross this power-of-two
757boundary, relative to zero.
758.Fa nsegs
759is the maximum number of physically contigous segments.
760If
761.Fa waitok
762is non-zero, the function may sleep until enough memory is available.
763(It also may give up in some situations, so a non-zero
764.Fa waitok
765does not imply that
766.Fn uvm_pglistalloc
767cannot return an error.)
768The allocated memory is returned in the
769.Fa rlist
770list; the caller has to provide storage only, the list is initialized by
771.Fn uvm_pglistalloc .
772.Pp
773.Fn uvm_pglistfree
774frees the list of pages pointed to by
775.Fa list .
776If the content of the page is known to be zero-filled,
777caller should set
778.Dv PG_ZERO
779in pg-\*[Gt]flags so that the page allocator will use
780the page to serve future
781.Dv UVM_PGA_ZERO
782requests efficiently.
783.Pp
784.Fn uvm_page_physload
785loads physical memory segments into VM space on the specified
786.Fa free_list .
787It must be called at system boot time to set up physical memory
788management pages.
789The arguments describe the
790.Fa start
791and
792.Fa end
793of the physical addresses of the segment, and the available start and end
794addresses of pages not already in use.
795.\" XXX expand on "system boot time"!
796.Sh PROCESSES
797.Ft void
798.br
799.Fn uvm_pageout "void" ;
800.Pp
801.Ft void
802.br
803.Fn uvm_scheduler "void" ;
804.Pp
805.Ft void
806.br
807.Fn uvm_swapin "struct proc *p" ;
808.Pp
809.Fn uvm_pageout
810is the main loop for the page daemon.
811.Pp
812.Fn uvm_scheduler
813is the process zero main loop, which is to be called after the
814system has finished starting other processes.
815It handles the swapping in of runnable, swapped out processes in priority
816order.
817.Pp
818.Fn uvm_swapin
819swaps in the named process.
820.Sh PAGE LOAN
821.Ft int
822.br
823.Fn uvm_loan "struct vm_map *map" "vaddr_t start" "vsize_t len" "void *v" "int flags" ;
824.Pp
825.Ft void
826.br
827.Fn uvm_unloan "void *v" "int npages" "int flags" ;
828.Pp
829.Fn uvm_loan
830loans pages in a map out to anons or to the kernel.
831.Fa map
832should be unlocked ,
833.Fa start
834and
835.Fa len
836should be multiples of
837.Dv PAGE_SIZE .
838Argument
839.Fa flags
840should be one of
841.Bd -literal
842#define UVM_LOAN_TOANON       0x01    /* loan to anons */
843#define UVM_LOAN_TOPAGE       0x02    /* loan to kernel */
844.Ed
845.Pp
846.Fa v
847should be pointer to array of pointers to
848.Li struct anon
849or
850.Li struct vm_page ,
851as appropriate.
852The caller has to allocate memory for the array and
853ensure it's big enough to hold
854.Fa len / PAGE_SIZE
855pointers.
856Returns 0 for success, or appropriate error number otherwise.
857.Pp
858.Fn uvm_unloan
859kills loans on pages or anons.
860The
861.Fa v
862must point to the array of pointers initialized by previous call to
863.Fn uvm_loan .
864.Fa npages
865should match number of pages allocated for loan, this also matches
866number of items in the array.
867Argument
868.Fa flags
869should be one of
870.Bd -literal
871#define UVM_LOAN_TOANON       0x01    /* loan to anons */
872#define UVM_LOAN_TOPAGE       0x02    /* loan to kernel */
873.Ed
874.Pp
875and should match what was used for previous call to
876.Fn uvm_loan .
877.Sh MISCELLANEOUS FUNCTIONS
878.Ft struct uvm_object *
879.br
880.Fn uao_create "vsize_t size" "int flags" ;
881.Pp
882.Ft void
883.br
884.Fn uao_detach "struct uvm_object *uobj" ;
885.Pp
886.Ft void
887.br
888.Fn uao_reference "struct uvm_object *uobj" ;
889.Pp
890.Ft boolean_t
891.br
892.Fn uvm_chgkprot "caddr_t addr" "size_t len" "int rw" ;
893.Pp
894.Ft void
895.br
896.Fn uvm_kernacc "caddr_t addr" "size_t len" "int rw" ;
897.Pp
898.Ft int
899.br
900.Fn uvm_vslock "struct proc *p" "caddr_t addr" "size_t len" "vm_prot_t prot" ;
901.Pp
902.Ft void
903.br
904.Fn uvm_vsunlock "struct proc *p" "caddr_t addr" "size_t len" ;
905.Pp
906.Ft void
907.br
908.Fn uvm_meter "void" ;
909.Pp
910.Ft void
911.br
912.Fn uvm_fork "struct proc *p1" "struct proc *p2" "boolean_t shared" ;
913.Pp
914.Ft int
915.br
916.Fn uvm_grow "struct proc *p" "vaddr_t sp" ;
917.Pp
918.Ft int
919.br
920.Fn uvm_coredump "struct proc *p" "struct vnode *vp" "struct ucred *cred" "struct core *chdr" ;
921.Pp
922.Ft void
923.br
924.Fn uvn_findpages "struct uvm_object *uobj" "voff_t offset" "int *npagesp" "struct vm_page **pps" "int flags" ;
925.Pp
926.Ft void
927.br
928.Fn uvm_swap_stats "int cmd" "struct swapent *sep" "int sec" "register_t *retval" ;
929.Pp
930The
931.Fn uao_create ,
932.Fn uao_detach ,
933and
934.Fn uao_reference
935functions operate on anonymous memory objects, such as those used to support
936System V shared memory.
937.Fn uao_create
938returns an object of size
939.Fa size
940with flags:
941.Bd -literal
942#define UAO_FLAG_KERNOBJ        0x1     /* create kernel object */
943#define UAO_FLAG_KERNSWAP       0x2     /* enable kernel swap */
944.Ed
945.Pp
946which can only be used once each at system boot time.
947.Fn uao_reference
948creates an additional reference to the named anonymous memory object.
949.Fn uao_detach
950removes a reference from the named anonymous memory object, destroying
951it if removing the last reference.
952.Pp
953.Fn uvm_chgkprot
954changes the protection of kernel memory from
955.Fa addr
956to
957.Fa addr + len
958to the value of
959.Fa rw .
960This is primarily useful for debuggers, for setting breakpoints.
961This function is only available with options
962.Dv KGDB .
963.Pp
964.Fn uvm_kernacc
965checks the access at address
966.Fa addr
967to
968.Fa addr + len
969for
970.Fa rw
971access in the kernel address space.
972.Pp
973.Fn uvm_vslock
974and
975.Fn uvm_vsunlock
976control the wiring and unwiring of pages for process
977.Fa p
978from
979.Fa addr
980to
981.Fa addr + len .
982These functions are normally used to wire memory for I/O.
983.Pp
984.Fn uvm_meter
985calculates the load average and wakes up the swapper if necessary.
986.Pp
987.Fn uvm_fork
988forks a virtual address space for process' (old)
989.Fa p1
990and (new)
991.Fa p2 .
992If the
993.Fa shared
994argument is non zero, p1 shares its address space with p2,
995otherwise a new address space is created.
996This function currently has no return value, and thus cannot fail.
997In the future, this function will be changed to allow it to
998fail in low memory conditions.
999.Pp
1000.Fn uvm_grow
1001increases the stack segment of process
1002.Fa p
1003to include
1004.Fa sp .
1005.Pp
1006.Fn uvm_coredump
1007generates a coredump on vnode
1008.Fa vp
1009for process
1010.Fa p
1011with credentials
1012.Fa cred
1013and core header description in
1014.Fa chdr .
1015.Pp
1016.Fn uvn_findpages
1017looks up or creates pages in
1018.Fa uobj
1019at offset
1020.Fa offset ,
1021marks them busy and returns them in the
1022.Fa pps
1023array.
1024Currently
1025.Fa uobj
1026must be a vnode object.
1027The number of pages requested is pointed to by
1028.Fa npagesp ,
1029and this value is updated with the actual number of pages returned.
1030The flags can be
1031.Bd -literal
1032#define UFP_ALL         0x00    /* return all pages requested */
1033#define UFP_NOWAIT      0x01    /* don't sleep */
1034#define UFP_NOALLOC     0x02    /* don't allocate new pages */
1035#define UFP_NOCACHE     0x04    /* don't return pages which already exist */
1036#define UFP_NORDONLY    0x08    /* don't return PG_READONLY pages */
1037.Ed
1038.Pp
1039.Dv UFP_ALL
1040is a pseudo-flag meaning all requested pages should be returned.
1041.Dv UFP_NOWAIT
1042means that we must not sleep.
1043.Dv UFP_NOALLOC
1044causes any pages which do not already exist to be skipped.
1045.Dv UFP_NOCACHE
1046causes any pages which do already exist to be skipped.
1047.Dv UFP_NORDONLY
1048causes any pages which are marked PG_READONLY to be skipped.
1049.Pp
1050.Fn uvm_swap_stats
1051implements the
1052.Dv SWAP_STATS
1053and
1054.Dv SWAP_OSTATS
1055operation of the
1056.Xr swapctl 2
1057system call.
1058.Fa cmd
1059is the requested command,
1060.Dv SWAP_STATS
1061or
1062.Dv SWAP_OSTATS .
1063The function will copy no more than
1064.Fa sec
1065entries in the array pointed by
1066.Fa sep .
1067On return,
1068.Fa retval
1069holds the actual number of entries copied in the array.
1070.Sh SYSCTL
1071.Pp
1072UVM provides support for the
1073.Dv CTL_VM
1074domain of the
1075.Xr sysctl 3
1076hierarchy.
1077It handles the
1078.Dv VM_LOADAVG ,
1079.Dv VM_METER ,
1080.Dv VM_UVMEXP ,
1081and
1082.Dv VM_UVMEXP2
1083nodes, which return the current load averages, calculates current VM
1084totals, returns the uvmexp structure, and a kernel version independent
1085view of the uvmexp structure, respectively.
1086It also exports a number of tunables that control how much VM space is
1087allowed to be consumed by various tasks.
1088The load averages are typically accessed from userland using the
1089.Xr getloadavg 3
1090function.
1091The uvmexp structure has all global state of the UVM system,
1092and has the following members:
1093.Bd -literal
1094/* vm_page constants */
1095int pagesize;   /* size of a page (PAGE_SIZE): must be power of 2 */
1096int pagemask;   /* page mask */
1097int pageshift;  /* page shift */
1098
1099/* vm_page counters */
1100int npages;     /* number of pages we manage */
1101int free;       /* number of free pages */
1102int active;     /* number of active pages */
1103int inactive;   /* number of pages that we free'd but may want back */
1104int paging;     /* number of pages in the process of being paged out */
1105int wired;      /* number of wired pages */
1106int reserve_pagedaemon; /* number of pages reserved for pagedaemon */
1107int reserve_kernel; /* number of pages reserved for kernel */
1108
1109/* pageout params */
1110int freemin;    /* min number of free pages */
1111int freetarg;   /* target number of free pages */
1112int inactarg;   /* target number of inactive pages */
1113int wiredmax;   /* max number of wired pages */
1114
1115/* swap */
1116int nswapdev;   /* number of configured swap devices in system */
1117int swpages;    /* number of PAGE_SIZE'ed swap pages */
1118int swpginuse;  /* number of swap pages in use */
1119int nswget;     /* number of times fault calls uvm_swap_get() */
1120int nanon;      /* number total of anon's in system */
1121int nfreeanon;  /* number of free anon's */
1122
1123/* stat counters */
1124int faults;             /* page fault count */
1125int traps;              /* trap count */
1126int intrs;              /* interrupt count */
1127int swtch;              /* context switch count */
1128int softs;              /* software interrupt count */
1129int syscalls;           /* system calls */
1130int pageins;            /* pagein operation count */
1131                        /* pageouts are in pdpageouts below */
1132int swapins;            /* swapins */
1133int swapouts;           /* swapouts */
1134int pgswapin;           /* pages swapped in */
1135int pgswapout;          /* pages swapped out */
1136int forks;              /* forks */
1137int forks_ppwait;       /* forks where parent waits */
1138int forks_sharevm;      /* forks where vmspace is shared */
1139
1140/* fault subcounters */
1141int fltnoram;   /* number of times fault was out of ram */
1142int fltnoanon;  /* number of times fault was out of anons */
1143int fltpgwait;  /* number of times fault had to wait on a page */
1144int fltpgrele;  /* number of times fault found a released page */
1145int fltrelck;   /* number of times fault relock called */
1146int fltrelckok; /* number of times fault relock is a success */
1147int fltanget;   /* number of times fault gets anon page */
1148int fltanretry; /* number of times fault retrys an anon get */
1149int fltamcopy;  /* number of times fault clears "needs copy" */
1150int fltnamap;   /* number of times fault maps a neighbor anon page */
1151int fltnomap;   /* number of times fault maps a neighbor obj page */
1152int fltlget;    /* number of times fault does a locked pgo_get */
1153int fltget;     /* number of times fault does an unlocked get */
1154int flt_anon;   /* number of times fault anon (case 1a) */
1155int flt_acow;   /* number of times fault anon cow (case 1b) */
1156int flt_obj;    /* number of times fault is on object page (2a) */
1157int flt_prcopy; /* number of times fault promotes with copy (2b) */
1158int flt_przero; /* number of times fault promotes with zerofill (2b) */
1159
1160/* daemon counters */
1161int pdwoke;     /* number of times daemon woke up */
1162int pdrevs;     /* number of times daemon rev'd clock hand */
1163int pdswout;    /* number of times daemon called for swapout */
1164int pdfreed;    /* number of pages daemon freed since boot */
1165int pdscans;    /* number of pages daemon scanned since boot */
1166int pdanscan;   /* number of anonymous pages scanned by daemon */
1167int pdobscan;   /* number of object pages scanned by daemon */
1168int pdreact;    /* number of pages daemon reactivated since boot */
1169int pdbusy;     /* number of times daemon found a busy page */
1170int pdpageouts; /* number of times daemon started a pageout */
1171int pdpending;  /* number of times daemon got a pending pageout */
1172int pddeact;    /* number of pages daemon deactivates */
1173.Ed
1174.Sh NOTES
1175.Fn uvm_chgkprot
1176is only available if the kernel has been compiled with options
1177.Dv KGDB .
1178.Pp
1179All structure and types whose names begin with
1180.Dq vm_
1181will be renamed to
1182.Dq uvm_ .
1183.Sh SEE ALSO
1184.Xr swapctl 2 ,
1185.Xr getloadavg 3 ,
1186.Xr kvm 3 ,
1187.Xr sysctl 3 ,
1188.Xr ddb 4 ,
1189.Xr options 4 ,
1190.Xr pmap 9
1191.Sh HISTORY
1192UVM is a new VM system developed at Washington University in St. Louis
1193(Missouri).
1194UVM's roots lie partly in the Mach-based
1195.Bx 4.4
1196VM system, the
1197.Fx
1198VM system, and the SunOS 4 VM system.
1199UVM's basic structure is based on the
1200.Bx 4.4
1201VM system.
1202UVM's new anonymous memory system is based on the
1203anonymous memory system found in the SunOS 4 VM (as described in papers
1204published by Sun Microsystems, Inc.).
1205UVM also includes a number of feature new to
1206.Bx
1207including page loanout, map entry passing, simplified
1208copy-on-write, and clustered anonymous memory pageout.
1209UVM is also further documented in an August 1998 dissertation by
1210Charles D. Cranor.
1211.Pp
1212UVM appeared in
1213.Nx 1.4 .
1214.Sh AUTHORS
1215Charles D. Cranor
1216.Aq chuck@ccrc.wustl.edu
1217designed and implemented UVM.
1218.Pp
1219Matthew Green
1220.Aq mrg@eterna.com.au
1221wrote the swap-space management code and handled the logistical issues
1222involved with merging UVM into the
1223.Nx
1224source tree.
1225.Pp
1226Chuck Silvers
1227.Aq chuq@chuq.com
1228implemented the aobj pager, thus allowing UVM to support System V shared
1229memory and process swapping.
1230He also designed and implemented the UBC part of UVM, which uses UVM pages
1231to cache vnode data rather than the traditional buffer cache buffers.
1232