xref: /netbsd-src/share/man/man9/uvm.9 (revision cac8e449158efc7261bebc8657cbb0125a2cfdde)
1.\"	$NetBSD: uvm.9,v 1.91 2008/08/04 13:31:14 pooka Exp $
2.\"
3.\" Copyright (c) 1998 Matthew R. Green
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\"
15.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
22.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25.\" SUCH DAMAGE.
26.\"
27.Dd August 4, 2008
28.Dt UVM 9
29.Os
30.Sh NAME
31.Nm uvm
32.Nd virtual memory system external interface
33.Sh SYNOPSIS
34.In sys/param.h
35.In uvm/uvm.h
36.Sh DESCRIPTION
37The UVM virtual memory system manages access to the computer's memory
38resources.
39User processes and the kernel access these resources through
40UVM's external interface.
41UVM's external interface includes functions that:
42.Pp
43.Bl -hyphen -compact
44.It
45initialize UVM sub-systems
46.It
47manage virtual address spaces
48.It
49resolve page faults
50.It
51memory map files and devices
52.It
53perform uio-based I/O to virtual memory
54.It
55allocate and free kernel virtual memory
56.It
57allocate and free physical memory
58.El
59.Pp
60In addition to exporting these services, UVM has two kernel-level processes:
61pagedaemon and swapper.
62The pagedaemon process sleeps until physical memory becomes scarce.
63When that happens, pagedaemon is awoken.
64It scans physical memory, paging out and freeing memory that has not
65been recently used.
66The swapper process swaps in runnable processes that are currently swapped
67out, if there is room.
68.Pp
69There are also several miscellaneous functions.
70.Sh INITIALIZATION
71.Ft void
72.br
73.Fn uvm_init "void" ;
74.Pp
75.Ft void
76.br
77.Fn uvm_init_limits "struct lwp *l" ;
78.Pp
79.Ft void
80.br
81.Fn uvm_setpagesize "void" ;
82.Pp
83.Ft void
84.br
85.Fn uvm_swap_init "void" ;
86.Pp
87.Fn uvm_init
88sets up the UVM system at system boot time, after the
89console has been setup.
90It initializes global state, the page, map, kernel virtual memory state,
91machine-dependent physical map, kernel memory allocator,
92pager and anonymous memory sub-systems, and then enables
93paging of kernel objects.
94.Pp
95.Fn uvm_init_limits
96initializes process limits for the named process.
97This is for use by the system startup for process zero, before any
98other processes are created.
99.Pp
100.Fn uvm_setpagesize
101initializes the uvmexp members pagesize (if not already done by
102machine-dependent code), pageshift and pagemask.
103It should be called by machine-dependent code early in the
104.Fn pmap_init
105call (see
106.Xr pmap 9 ) .
107.Pp
108.Fn uvm_swap_init
109initializes the swap sub-system.
110.Sh VIRTUAL ADDRESS SPACE MANAGEMENT
111.Ft int
112.br
113.Fn uvm_map "struct vm_map *map" "vaddr_t *startp" "vsize_t size" "struct uvm_object *uobj" "voff_t uoffset" "vsize_t align" "uvm_flag_t flags" ;
114.Pp
115.Ft void
116.br
117.Fn uvm_unmap "struct vm_map *map" "vaddr_t start" "vaddr_t end" ;
118.Pp
119.Ft int
120.br
121.Fn uvm_map_pageable "struct vm_map *map" "vaddr_t start" "vaddr_t end" "bool new_pageable" "int lockflags" ;
122.Pp
123.Ft bool
124.br
125.Fn uvm_map_checkprot "struct vm_map *map" "vaddr_t start" "vaddr_t end" "vm_prot_t protection" ;
126.Pp
127.Ft int
128.br
129.Fn uvm_map_protect "struct vm_map *map" "vaddr_t start" "vaddr_t end" "vm_prot_t new_prot" "bool set_max" ;
130.Pp
131.Ft int
132.br
133.Fn uvm_deallocate "struct vm_map *map" "vaddr_t start" "vsize_t size" ;
134.Pp
135.Ft struct vmspace *
136.br
137.Fn uvmspace_alloc "vaddr_t min" "vaddr_t max" "int pageable" ;
138.Pp
139.Ft void
140.br
141.Fn uvmspace_exec "struct lwp *l" "vaddr_t start" "vaddr_t end" ;
142.Pp
143.Ft struct vmspace *
144.br
145.Fn uvmspace_fork "struct vmspace *vm" ;
146.Pp
147.Ft void
148.br
149.Fn uvmspace_free "struct vmspace *vm1" ;
150.Pp
151.Ft void
152.br
153.Fn uvmspace_share "struct proc *p1" "struct proc *p2" ;
154.Pp
155.Ft void
156.br
157.Fn uvmspace_unshare "struct lwp *l" ;
158.Pp
159.Ft bool
160.br
161.Fn uvm_uarea_alloc "vaddr_t *uaddrp" ;
162.Pp
163.Ft void
164.br
165.Fn uvm_uarea_free "vaddr_t uaddr" ;
166.Pp
167.Fn uvm_map
168establishes a valid mapping in map
169.Fa map ,
170which must be unlocked.
171The new mapping has size
172.Fa size ,
173which must be a multiple of
174.Dv PAGE_SIZE .
175The
176.Fa uobj
177and
178.Fa uoffset
179arguments can have four meanings.
180When
181.Fa uobj
182is
183.Dv NULL
184and
185.Fa uoffset
186is
187.Dv UVM_UNKNOWN_OFFSET ,
188.Fn uvm_map
189does not use the machine-dependent
190.Dv PMAP_PREFER
191function.
192If
193.Fa uoffset
194is any other value, it is used as the hint to
195.Dv PMAP_PREFER .
196When
197.Fa uobj
198is not
199.Dv NULL
200and
201.Fa uoffset
202is
203.Dv UVM_UNKNOWN_OFFSET ,
204.Fn uvm_map
205finds the offset based upon the virtual address, passed as
206.Fa startp .
207If
208.Fa uoffset
209is any other value, we are doing a normal mapping at this offset.
210The start address of the map will be returned in
211.Fa startp .
212.Pp
213.Fa align
214specifies alignment of mapping unless
215.Dv UVM_FLAG_FIXED
216is specified in
217.Fa flags .
218.Fa align
219must be a power of 2.
220.Pp
221.Fa flags
222passed to
223.Fn uvm_map
224are typically created using the
225.Fn UVM_MAPFLAG "vm_prot_t prot" "vm_prot_t maxprot" "vm_inherit_t inh" "int advice" "int flags"
226macro, which uses the following values.
227The
228.Fa prot
229and
230.Fa maxprot
231can take are:
232.Bd -literal
233#define UVM_PROT_MASK   0x07    /* protection mask */
234#define UVM_PROT_NONE   0x00    /* protection none */
235#define UVM_PROT_ALL    0x07    /* everything */
236#define UVM_PROT_READ   0x01    /* read */
237#define UVM_PROT_WRITE  0x02    /* write */
238#define UVM_PROT_EXEC   0x04    /* exec */
239#define UVM_PROT_R      0x01    /* read */
240#define UVM_PROT_W      0x02    /* write */
241#define UVM_PROT_RW     0x03    /* read-write */
242#define UVM_PROT_X      0x04    /* exec */
243#define UVM_PROT_RX     0x05    /* read-exec */
244#define UVM_PROT_WX     0x06    /* write-exec */
245#define UVM_PROT_RWX    0x07    /* read-write-exec */
246.Ed
247.Pp
248The values that
249.Fa inh
250can take are:
251.Bd -literal
252#define UVM_INH_MASK    0x30    /* inherit mask */
253#define UVM_INH_SHARE   0x00    /* "share" */
254#define UVM_INH_COPY    0x10    /* "copy" */
255#define UVM_INH_NONE    0x20    /* "none" */
256#define UVM_INH_DONATE  0x30    /* "donate" \*[Lt]\*[Lt] not used */
257.Ed
258.Pp
259The values that
260.Fa advice
261can take are:
262.Bd -literal
263#define UVM_ADV_NORMAL     0x0  /* 'normal' */
264#define UVM_ADV_RANDOM     0x1  /* 'random' */
265#define UVM_ADV_SEQUENTIAL 0x2  /* 'sequential' */
266#define UVM_ADV_MASK       0x7  /* mask */
267.Ed
268.Pp
269The values that
270.Fa flags
271can take are:
272.Bd -literal
273#define UVM_FLAG_FIXED   0x010000 /* find space */
274#define UVM_FLAG_OVERLAY 0x020000 /* establish overlay */
275#define UVM_FLAG_NOMERGE 0x040000 /* don't merge map entries */
276#define UVM_FLAG_COPYONW 0x080000 /* set copy_on_write flag */
277#define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */
278#define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */
279.Ed
280.Pp
281The
282.Dv UVM_MAPFLAG
283macro arguments can be combined with an or operator.
284There are several special purpose macros for checking protection
285combinations, e.g., the
286.Dv UVM_PROT_WX
287macro.
288There are also some additional macros to extract bits from the flags.
289The
290.Dv UVM_PROTECTION ,
291.Dv UVM_INHERIT ,
292.Dv UVM_MAXPROTECTION
293and
294.Dv UVM_ADVICE
295macros return the protection, inheritance, maximum protection and advice,
296respectively.
297.Fn uvm_map
298returns a standard UVM return value.
299.Pp
300.Fn uvm_unmap
301removes a valid mapping,
302from
303.Fa start
304to
305.Fa end ,
306in map
307.Fa map ,
308which must be unlocked.
309.Pp
310.Fn uvm_map_pageable
311changes the pageability of the pages in the range from
312.Fa start
313to
314.Fa end
315in map
316.Fa map
317to
318.Fa new_pageable .
319.Fn uvm_map_pageable
320returns a standard UVM return value.
321.Pp
322.Fn uvm_map_checkprot
323checks the protection of the range from
324.Fa start
325to
326.Fa end
327in map
328.Fa map
329against
330.Fa protection .
331This returns either
332.Dv true
333or
334.Dv false .
335.Pp
336.Fn uvm_map_protect
337changes the protection
338.Fa start
339to
340.Fa end
341in map
342.Fa map
343to
344.Fa new_prot ,
345also setting the maximum protection to the region to
346.Fa new_prot
347if
348.Fa set_max
349is non-zero.
350This function returns a standard UVM return value.
351.Pp
352.Fn uvm_deallocate
353deallocates kernel memory in map
354.Fa map
355from address
356.Fa start
357to
358.Fa start + size .
359.Pp
360.Fn uvmspace_alloc
361allocates and returns a new address space, with ranges from
362.Fa min
363to
364.Fa max ,
365setting the pageability of the address space to
366.Fa pageable .
367.Pp
368.Fn uvmspace_exec
369either reuses the address space of lwp
370.Fa l
371if there are no other references to it, or creates
372a new one with
373.Fn uvmspace_alloc .
374The range of valid addresses in the address space is reset to
375.Fa start
376through
377.Fa end .
378.Pp
379.Fn uvmspace_fork
380creates and returns a new address space based upon the
381.Fa vm1
382address space, typically used when allocating an address space for a
383child process.
384.Pp
385.Fn uvmspace_free
386lowers the reference count on the address space
387.Fa vm ,
388freeing the data structures if there are no other references.
389.Pp
390.Fn uvmspace_share
391causes process
392.Pa p2
393to share the address space of
394.Fa p1 .
395.Pp
396.Fn uvmspace_unshare
397ensures that lwp
398.Fa l
399has its own, unshared address space, by creating a new one if
400necessary by calling
401.Fn uvmspace_fork .
402.Pp
403.Fn uvm_uarea_alloc
404allocates virtual space for a u-area (i.e., a kernel stack) and stores
405its virtual address in
406.Fa *uaddrp .
407The return value is
408.Dv true
409if the u-area is already backed by wired physical memory, otherwise
410.Dv false .
411.Pp
412.Fn uvm_uarea_free
413frees a u-area allocated with
414.Fn uvm_uarea_alloc ,
415freeing both the virtual space and any physical pages which may have been
416allocated to back that virtual space later.
417.Sh PAGE FAULT HANDLING
418.Ft int
419.br
420.Fn uvm_fault "struct vm_map *orig_map" "vaddr_t vaddr" "vm_prot_t access_type" ;
421.Pp
422.Fn uvm_fault
423is the main entry point for faults.
424It takes
425.Fa orig_map
426as the map the fault originated in, a
427.Fa vaddr
428offset into the map the fault occurred, and
429.Fa access_type
430describing the type of access requested.
431.Fn uvm_fault
432returns a standard UVM return value.
433.Sh MEMORY MAPPING FILES AND DEVICES
434.Ft void
435.br
436.Fn uvm_vnp_setsize "struct vnode *vp" "voff_t newsize" ;
437.Pp
438.Ft void *
439.br
440.Fn ubc_alloc "struct uvm_object *uobj" "voff_t offset" "vsize_t *lenp" \
441"int advice" "int flags" ;
442.Pp
443.Ft void
444.br
445.Fn ubc_release "void *va" "int flags" ;
446.Pp
447int
448.br
449.Fn ubc_uiomove "struct uvm_object *uobj" "struct uio *uio" "vsize_t todo" \
450"int advice" "int flags" ;
451.Pp
452.Fn uvm_vnp_setsize
453sets the size of vnode
454.Fa vp
455to
456.Fa newsize .
457Caller must hold a reference to the vnode.
458If the vnode shrinks, pages no longer used are discarded.
459.Pp
460.Fn ubc_alloc
461creates a kernel mapping of
462.Fa uobj
463starting at offset
464.Fa offset .
465The desired length of the mapping is pointed to by
466.Fa lenp ,
467but the actual mapping may be smaller than this.
468.Fa lenp
469is updated to contain the actual length mapped.
470.Fa advice
471is the access pattern hint, which must be one of
472.Pp
473.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
474.It UVM_ADV_NORMAL
475No hint
476.It UVM_ADV_RANDOM
477Random access hint
478.It UVM_ADV_SEQUENTIAL
479Sequential access hint (from lower offset to higher offset)
480.El
481.Pp
482The possible
483.Fa flags
484are
485.Pp
486.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
487.It UBC_READ
488Mapping will be accessed for read.
489.It UBC_WRITE
490Mapping will be accessed for write.
491.It UBC_FAULTBUSY
492Fault in window's pages already during mapping operation.
493Makes sense only for write.
494.El
495.Pp
496Once the mapping is created, it must be accessed only by methods that can
497handle faults, such as
498.Fn uiomove
499or
500.Fn kcopy .
501Page faults on the mapping will result in the object's pager
502method being called to resolve the fault.
503.Pp
504.Fn ubc_release
505frees the mapping at
506.Fa va
507for reuse.
508The mapping may be cached to speed future accesses to the same region
509of the object.
510The flags can be any of
511.Pp
512.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
513.It UBC_UNMAP
514Do not cache mapping.
515.El
516.Pp
517.Fn ubc_uiomove
518allocates an UBC memory window, performs I/O on it and unmaps the window.
519The
520.Fa advice
521parameter takes the same values as the respective parameter in
522.Fn ubc_alloc
523and the
524.Fa flags
525parameter takes the same arguments as
526.Fn ubc_alloc
527and
528.Fn ubc_unmap .
529Additionally, the flag
530.Dv UBC_PARTIALOK
531can be provided to indicate that it is acceptable to return if an error
532occurs mid-transfer.
533.Sh VIRTUAL MEMORY I/O
534.Ft int
535.br
536.Fn uvm_io "struct vm_map *map" "struct uio *uio" ;
537.Pp
538.Fn uvm_io
539performs the I/O described in
540.Fa uio
541on the memory described in
542.Fa map .
543.Sh ALLOCATION OF KERNEL MEMORY
544.Ft vaddr_t
545.br
546.Fn uvm_km_alloc "struct vm_map *map" "vsize_t size" "vsize_t align" "uvm_flag_t flags" ;
547.Pp
548.Ft void
549.br
550.Fn uvm_km_free "struct vm_map *map" "vaddr_t addr" "vsize_t size" "uvm_flag_t flags" ;
551.Pp
552.Ft struct vm_map *
553.br
554.Fn uvm_km_suballoc "struct vm_map *map" "vaddr_t *min" "vaddr_t *max" "vsize_t size" "bool pageable" "bool fixed" "struct vm_map *submap" ;
555.Pp
556.Fn uvm_km_alloc
557allocates
558.Fa size
559bytes of kernel memory in map
560.Fa map .
561The first address of the allocated memory range will be aligned according to the
562.Fa align
563argument
564.Pq specify 0 if no alignment is necessary .
565The alignment must be a multiple of page size.
566The
567.Fa flags
568is a bitwise inclusive OR of the allocation type and operation flags.
569.Pp
570The allocation type should be one of:
571.Bl -tag -width UVM_KMF_PAGEABLE
572.It UVM_KMF_WIRED
573Wired memory.
574.It UVM_KMF_PAGEABLE
575Demand-paged zero-filled memory.
576.It UVM_KMF_VAONLY
577Virtual address only.
578No physical pages are mapped in the allocated region.
579If necessary, it's the caller's responsibility to enter page mappings.
580It's also the caller's responsibility to clean up the mappings before freeing
581the address range.
582.El
583.Pp
584The following operation flags are available:
585.Bl -tag -width UVM_KMF_PAGEABLE
586.It UVM_KMF_CANFAIL
587Can fail even if
588.Dv UVM_KMF_NOWAIT
589is not specified and
590.Dv UVM_KMF_WAITVA
591is specified.
592.It UVM_KMF_ZERO
593Request zero-filled memory.
594Only supported for
595.Dv UVM_KMF_WIRED .
596Shouldn't be used with other types.
597.It UVM_KMF_TRYLOCK
598Fail if we can't lock the map.
599.It UVM_KMF_NOWAIT
600Fail immediately if no memory is available.
601.It UVM_KMF_WAITVA
602Sleep to wait for the virtual address resources if needed.
603.El
604.Pp
605(If neither
606.Dv UVM_KMF_NOWAIT
607nor
608.Dv UVM_KMF_CANFAIL
609are specified and
610.Dv UVM_KMF_WAITVA
611is specified,
612.Fn uvm_km_alloc
613will never fail, but rather sleep indefinitely until the allocation succeeds.)
614.Pp
615Pageability of the pages allocated with
616.Dv UVM_KMF_PAGEABLE
617can be changed by
618.Fn uvm_map_pageable .
619In that case, the entire range must be changed atomically.
620Changing a part of the range is not supported.
621.Pp
622.Fn uvm_km_free
623frees the memory range allocated by
624.Fn uvm_km_alloc .
625.Fa addr
626must be an address returned by
627.Fn uvm_km_alloc .
628.Fa map
629and
630.Fa size
631must be the same as the ones used for the corresponding
632.Fn uvm_km_alloc .
633.Fa flags
634must be the allocation type used for the corresponding
635.Fn uvm_km_alloc .
636.Pp
637.Fn uvm_km_free
638is the only way to free memory ranges allocated by
639.Fn uvm_km_alloc .
640.Fn uvm_unmap
641must not be used.
642.Pp
643.Fn uvm_km_suballoc
644allocates submap from
645.Fa map ,
646creating a new map if
647.Fa submap
648is
649.Dv NULL .
650The addresses of the submap can be specified exactly by setting the
651.Fa fixed
652argument to non-zero, which causes the
653.Fa min
654argument to specify the beginning of the address in the submap.
655If
656.Fa fixed
657is zero, any address of size
658.Fa size
659will be allocated from
660.Fa map
661and the start and end addresses returned in
662.Fa min
663and
664.Fa max .
665If
666.Fa pageable
667is non-zero, entries in the map may be paged out.
668.Sh ALLOCATION OF PHYSICAL MEMORY
669.Ft struct vm_page *
670.br
671.Fn uvm_pagealloc "struct uvm_object *uobj" "voff_t off" "struct vm_anon *anon" "int flags" ;
672.Pp
673.Ft void
674.br
675.Fn uvm_pagerealloc "struct vm_page *pg" "struct uvm_object *newobj" "voff_t newoff" ;
676.Pp
677.Ft void
678.br
679.Fn uvm_pagefree "struct vm_page *pg" ;
680.Pp
681.Ft int
682.br
683.Fn uvm_pglistalloc "psize_t size" "paddr_t low" "paddr_t high" "paddr_t alignment" "paddr_t boundary" "struct pglist *rlist" "int nsegs" "int waitok" ;
684.Pp
685.Ft void
686.br
687.Fn uvm_pglistfree "struct pglist *list" ;
688.Pp
689.Ft void
690.br
691.Fn uvm_page_physload "vaddr_t start" "vaddr_t end" "vaddr_t avail_start" "vaddr_t avail_end" "int free_list" ;
692.Pp
693.Fn uvm_pagealloc
694allocates a page of memory at virtual address
695.Fa off
696in either the object
697.Fa uobj
698or the anonymous memory
699.Fa anon ,
700which must be locked by the caller.
701Only one of
702.Fa uobj
703and
704.Fa anon
705can be non
706.Dv NULL .
707Returns
708.Dv NULL
709when no page can be found.
710The flags can be any of
711.Bd -literal
712#define UVM_PGA_USERESERVE      0x0001  /* ok to use reserve pages */
713#define UVM_PGA_ZERO            0x0002  /* returned page must be zero'd */
714.Ed
715.Pp
716.Dv UVM_PGA_USERESERVE
717means to allocate a page even if that will result in the number of free pages
718being lower than
719.Dv uvmexp.reserve_pagedaemon
720(if the current thread is the pagedaemon) or
721.Dv uvmexp.reserve_kernel
722(if the current thread is not the pagedaemon).
723.Dv UVM_PGA_ZERO
724causes the returned page to be filled with zeroes, either by allocating it
725from a pool of pre-zeroed pages or by zeroing it in-line as necessary.
726.Pp
727.Fn uvm_pagerealloc
728reallocates page
729.Fa pg
730to a new object
731.Fa newobj ,
732at a new offset
733.Fa newoff .
734.Pp
735.Fn uvm_pagefree
736frees the physical page
737.Fa pg .
738If the content of the page is known to be zero-filled,
739caller should set
740.Dv PG_ZERO
741in pg-\*[Gt]flags so that the page allocator will use
742the page to serve future
743.Dv UVM_PGA_ZERO
744requests efficiently.
745.Pp
746.Fn uvm_pglistalloc
747allocates a list of pages for size
748.Fa size
749byte under various constraints.
750.Fa low
751and
752.Fa high
753describe the lowest and highest addresses acceptable for the list.
754If
755.Fa alignment
756is non-zero, it describes the required alignment of the list, in
757power-of-two notation.
758If
759.Fa boundary
760is non-zero, no segment of the list may cross this power-of-two
761boundary, relative to zero.
762.Fa nsegs
763is the maximum number of physically contiguous segments.
764If
765.Fa waitok
766is non-zero, the function may sleep until enough memory is available.
767(It also may give up in some situations, so a non-zero
768.Fa waitok
769does not imply that
770.Fn uvm_pglistalloc
771cannot return an error.)
772The allocated memory is returned in the
773.Fa rlist
774list; the caller has to provide storage only, the list is initialized by
775.Fn uvm_pglistalloc .
776.Pp
777.Fn uvm_pglistfree
778frees the list of pages pointed to by
779.Fa list .
780If the content of the page is known to be zero-filled,
781caller should set
782.Dv PG_ZERO
783in pg-\*[Gt]flags so that the page allocator will use
784the page to serve future
785.Dv UVM_PGA_ZERO
786requests efficiently.
787.Pp
788.Fn uvm_page_physload
789loads physical memory segments into VM space on the specified
790.Fa free_list .
791It must be called at system boot time to set up physical memory
792management pages.
793The arguments describe the
794.Fa start
795and
796.Fa end
797of the physical addresses of the segment, and the available start and end
798addresses of pages not already in use.
799.\" XXX expand on "system boot time"!
800.Sh PROCESSES
801.Ft void
802.br
803.Fn uvm_pageout "void" ;
804.Pp
805.Ft void
806.br
807.Fn uvm_scheduler "void" ;
808.Pp
809.Ft void
810.br
811.Fn uvm_swapin "struct lwp *l" ;
812.Pp
813.Fn uvm_pageout
814is the main loop for the page daemon.
815.Pp
816.Fn uvm_scheduler
817is the process zero main loop, which is to be called after the
818system has finished starting other processes.
819It handles the swapping in of runnable, swapped out processes in priority
820order.
821.Pp
822.Fn uvm_swapin
823swaps in the named lwp.
824.Sh PAGE LOAN
825.Ft int
826.br
827.Fn uvm_loan "struct vm_map *map" "vaddr_t start" "vsize_t len" "void *v" "int flags" ;
828.Pp
829.Ft void
830.br
831.Fn uvm_unloan "void *v" "int npages" "int flags" ;
832.Pp
833.Fn uvm_loan
834loans pages in a map out to anons or to the kernel.
835.Fa map
836should be unlocked,
837.Fa start
838and
839.Fa len
840should be multiples of
841.Dv PAGE_SIZE .
842Argument
843.Fa flags
844should be one of
845.Bd -literal
846#define UVM_LOAN_TOANON       0x01    /* loan to anons */
847#define UVM_LOAN_TOPAGE       0x02    /* loan to kernel */
848.Ed
849.Pp
850.Fa v
851should be pointer to array of pointers to
852.Li struct anon
853or
854.Li struct vm_page ,
855as appropriate.
856The caller has to allocate memory for the array and
857ensure it's big enough to hold
858.Fa len / PAGE_SIZE
859pointers.
860Returns 0 for success, or appropriate error number otherwise.
861Note that wired pages can't be loaned out and
862.Fn uvm_loan
863will fail in that case.
864.Pp
865.Fn uvm_unloan
866kills loans on pages or anons.
867The
868.Fa v
869must point to the array of pointers initialized by previous call to
870.Fn uvm_loan .
871.Fa npages
872should match number of pages allocated for loan, this also matches
873number of items in the array.
874Argument
875.Fa flags
876should be one of
877.Bd -literal
878#define UVM_LOAN_TOANON       0x01    /* loan to anons */
879#define UVM_LOAN_TOPAGE       0x02    /* loan to kernel */
880.Ed
881.Pp
882and should match what was used for previous call to
883.Fn uvm_loan .
884.Sh MISCELLANEOUS FUNCTIONS
885.Ft struct uvm_object *
886.br
887.Fn uao_create "vsize_t size" "int flags" ;
888.Pp
889.Ft void
890.br
891.Fn uao_detach "struct uvm_object *uobj" ;
892.Pp
893.Ft void
894.br
895.Fn uao_reference "struct uvm_object *uobj" ;
896.Pp
897.Ft bool
898.br
899.Fn uvm_chgkprot "void *addr" "size_t len" "int rw" ;
900.Pp
901.Ft void
902.br
903.Fn uvm_kernacc "void *addr" "size_t len" "int rw" ;
904.Pp
905.Ft int
906.br
907.Fn uvm_vslock "struct vmspace *vs" "void *addr" "size_t len" "vm_prot_t prot" ;
908.Pp
909.Ft void
910.br
911.Fn uvm_vsunlock "struct vmspace *vs" "void *addr" "size_t len" ;
912.Pp
913.Ft void
914.br
915.Fn uvm_meter "void" ;
916.Pp
917.Ft void
918.br
919.Fn uvm_fork "struct lwp *l1" "struct lwp *l2" "bool shared" ;
920.Pp
921.Ft int
922.br
923.Fn uvm_grow "struct proc *p" "vaddr_t sp" ;
924.Pp
925.Ft void
926.br
927.Fn uvn_findpages "struct uvm_object *uobj" "voff_t offset" "int *npagesp" "struct vm_page **pps" "int flags" ;
928.Pp
929.Ft void
930.br
931.Fn uvm_swap_stats "int cmd" "struct swapent *sep" "int sec" "register_t *retval" ;
932.Pp
933The
934.Fn uao_create ,
935.Fn uao_detach ,
936and
937.Fn uao_reference
938functions operate on anonymous memory objects, such as those used to support
939System V shared memory.
940.Fn uao_create
941returns an object of size
942.Fa size
943with flags:
944.Bd -literal
945#define UAO_FLAG_KERNOBJ        0x1     /* create kernel object */
946#define UAO_FLAG_KERNSWAP       0x2     /* enable kernel swap */
947.Ed
948.Pp
949which can only be used once each at system boot time.
950.Fn uao_reference
951creates an additional reference to the named anonymous memory object.
952.Fn uao_detach
953removes a reference from the named anonymous memory object, destroying
954it if removing the last reference.
955.Pp
956.Fn uvm_chgkprot
957changes the protection of kernel memory from
958.Fa addr
959to
960.Fa addr + len
961to the value of
962.Fa rw .
963This is primarily useful for debuggers, for setting breakpoints.
964This function is only available with options
965.Dv KGDB .
966.Pp
967.Fn uvm_kernacc
968checks the access at address
969.Fa addr
970to
971.Fa addr + len
972for
973.Fa rw
974access in the kernel address space.
975.Pp
976.Fn uvm_vslock
977and
978.Fn uvm_vsunlock
979control the wiring and unwiring of pages for process
980.Fa p
981from
982.Fa addr
983to
984.Fa addr + len .
985These functions are normally used to wire memory for I/O.
986.Pp
987.Fn uvm_meter
988calculates the load average and wakes up the swapper if necessary.
989.Pp
990.Fn uvm_fork
991forks a virtual address space for process' (old)
992.Fa p1
993and (new)
994.Fa p2 .
995If the
996.Fa shared
997argument is non zero, p1 shares its address space with p2,
998otherwise a new address space is created.
999This function currently has no return value, and thus cannot fail.
1000In the future, this function will be changed to allow it to
1001fail in low memory conditions.
1002.Pp
1003.Fn uvm_grow
1004increases the stack segment of process
1005.Fa p
1006to include
1007.Fa sp .
1008.Pp
1009.Fn uvn_findpages
1010looks up or creates pages in
1011.Fa uobj
1012at offset
1013.Fa offset ,
1014marks them busy and returns them in the
1015.Fa pps
1016array.
1017Currently
1018.Fa uobj
1019must be a vnode object.
1020The number of pages requested is pointed to by
1021.Fa npagesp ,
1022and this value is updated with the actual number of pages returned.
1023The flags can be
1024.Bd -literal
1025#define UFP_ALL         0x00    /* return all pages requested */
1026#define UFP_NOWAIT      0x01    /* don't sleep */
1027#define UFP_NOALLOC     0x02    /* don't allocate new pages */
1028#define UFP_NOCACHE     0x04    /* don't return pages which already exist */
1029#define UFP_NORDONLY    0x08    /* don't return PG_READONLY pages */
1030.Ed
1031.Pp
1032.Dv UFP_ALL
1033is a pseudo-flag meaning all requested pages should be returned.
1034.Dv UFP_NOWAIT
1035means that we must not sleep.
1036.Dv UFP_NOALLOC
1037causes any pages which do not already exist to be skipped.
1038.Dv UFP_NOCACHE
1039causes any pages which do already exist to be skipped.
1040.Dv UFP_NORDONLY
1041causes any pages which are marked PG_READONLY to be skipped.
1042.Pp
1043.Fn uvm_swap_stats
1044implements the
1045.Dv SWAP_STATS
1046and
1047.Dv SWAP_OSTATS
1048operation of the
1049.Xr swapctl 2
1050system call.
1051.Fa cmd
1052is the requested command,
1053.Dv SWAP_STATS
1054or
1055.Dv SWAP_OSTATS .
1056The function will copy no more than
1057.Fa sec
1058entries in the array pointed by
1059.Fa sep .
1060On return,
1061.Fa retval
1062holds the actual number of entries copied in the array.
1063.Sh SYSCTL
1064UVM provides support for the
1065.Dv CTL_VM
1066domain of the
1067.Xr sysctl 3
1068hierarchy.
1069It handles the
1070.Dv VM_LOADAVG ,
1071.Dv VM_METER ,
1072.Dv VM_UVMEXP ,
1073and
1074.Dv VM_UVMEXP2
1075nodes, which return the current load averages, calculates current VM
1076totals, returns the uvmexp structure, and a kernel version independent
1077view of the uvmexp structure, respectively.
1078It also exports a number of tunables that control how much VM space is
1079allowed to be consumed by various tasks.
1080The load averages are typically accessed from userland using the
1081.Xr getloadavg 3
1082function.
1083The uvmexp structure has all global state of the UVM system,
1084and has the following members:
1085.Bd -literal
1086/* vm_page constants */
1087int pagesize;   /* size of a page (PAGE_SIZE): must be power of 2 */
1088int pagemask;   /* page mask */
1089int pageshift;  /* page shift */
1090
1091/* vm_page counters */
1092int npages;     /* number of pages we manage */
1093int free;       /* number of free pages */
1094int active;     /* number of active pages */
1095int inactive;   /* number of pages that we free'd but may want back */
1096int paging;     /* number of pages in the process of being paged out */
1097int wired;      /* number of wired pages */
1098int reserve_pagedaemon; /* number of pages reserved for pagedaemon */
1099int reserve_kernel; /* number of pages reserved for kernel */
1100
1101/* pageout params */
1102int freemin;    /* min number of free pages */
1103int freetarg;   /* target number of free pages */
1104int inactarg;   /* target number of inactive pages */
1105int wiredmax;   /* max number of wired pages */
1106
1107/* swap */
1108int nswapdev;   /* number of configured swap devices in system */
1109int swpages;    /* number of PAGE_SIZE'ed swap pages */
1110int swpginuse;  /* number of swap pages in use */
1111int nswget;     /* number of times fault calls uvm_swap_get() */
1112int nanon;      /* number total of anon's in system */
1113int nfreeanon;  /* number of free anon's */
1114
1115/* stat counters */
1116int faults;             /* page fault count */
1117int traps;              /* trap count */
1118int intrs;              /* interrupt count */
1119int swtch;              /* context switch count */
1120int softs;              /* software interrupt count */
1121int syscalls;           /* system calls */
1122int pageins;            /* pagein operation count */
1123                        /* pageouts are in pdpageouts below */
1124int swapins;            /* swapins */
1125int swapouts;           /* swapouts */
1126int pgswapin;           /* pages swapped in */
1127int pgswapout;          /* pages swapped out */
1128int forks;              /* forks */
1129int forks_ppwait;       /* forks where parent waits */
1130int forks_sharevm;      /* forks where vmspace is shared */
1131
1132/* fault subcounters */
1133int fltnoram;   /* number of times fault was out of ram */
1134int fltnoanon;  /* number of times fault was out of anons */
1135int fltpgwait;  /* number of times fault had to wait on a page */
1136int fltpgrele;  /* number of times fault found a released page */
1137int fltrelck;   /* number of times fault relock called */
1138int fltrelckok; /* number of times fault relock is a success */
1139int fltanget;   /* number of times fault gets anon page */
1140int fltanretry; /* number of times fault retrys an anon get */
1141int fltamcopy;  /* number of times fault clears "needs copy" */
1142int fltnamap;   /* number of times fault maps a neighbor anon page */
1143int fltnomap;   /* number of times fault maps a neighbor obj page */
1144int fltlget;    /* number of times fault does a locked pgo_get */
1145int fltget;     /* number of times fault does an unlocked get */
1146int flt_anon;   /* number of times fault anon (case 1a) */
1147int flt_acow;   /* number of times fault anon cow (case 1b) */
1148int flt_obj;    /* number of times fault is on object page (2a) */
1149int flt_prcopy; /* number of times fault promotes with copy (2b) */
1150int flt_przero; /* number of times fault promotes with zerofill (2b) */
1151
1152/* daemon counters */
1153int pdwoke;     /* number of times daemon woke up */
1154int pdrevs;     /* number of times daemon rev'd clock hand */
1155int pdswout;    /* number of times daemon called for swapout */
1156int pdfreed;    /* number of pages daemon freed since boot */
1157int pdscans;    /* number of pages daemon scanned since boot */
1158int pdanscan;   /* number of anonymous pages scanned by daemon */
1159int pdobscan;   /* number of object pages scanned by daemon */
1160int pdreact;    /* number of pages daemon reactivated since boot */
1161int pdbusy;     /* number of times daemon found a busy page */
1162int pdpageouts; /* number of times daemon started a pageout */
1163int pdpending;  /* number of times daemon got a pending pageout */
1164int pddeact;    /* number of pages daemon deactivates */
1165.Ed
1166.Sh NOTES
1167.Fn uvm_chgkprot
1168is only available if the kernel has been compiled with options
1169.Dv KGDB .
1170.Pp
1171All structure and types whose names begin with
1172.Dq vm_
1173will be renamed to
1174.Dq uvm_ .
1175.Sh SEE ALSO
1176.Xr swapctl 2 ,
1177.Xr getloadavg 3 ,
1178.Xr kvm 3 ,
1179.Xr sysctl 3 ,
1180.Xr ddb 4 ,
1181.Xr options 4 ,
1182.Xr memoryallocators 9 ,
1183.Xr pmap 9
1184.Sh HISTORY
1185UVM is a new VM system developed at Washington University in St. Louis
1186(Missouri).
1187UVM's roots lie partly in the Mach-based
1188.Bx 4.4
1189VM system, the
1190.Fx
1191VM system, and the SunOS 4 VM system.
1192UVM's basic structure is based on the
1193.Bx 4.4
1194VM system.
1195UVM's new anonymous memory system is based on the
1196anonymous memory system found in the SunOS 4 VM (as described in papers
1197published by Sun Microsystems, Inc.).
1198UVM also includes a number of features new to
1199.Bx
1200including page loanout, map entry passing, simplified
1201copy-on-write, and clustered anonymous memory pageout.
1202UVM is also further documented in an August 1998 dissertation by
1203Charles D. Cranor.
1204.Pp
1205UVM appeared in
1206.Nx 1.4 .
1207.Sh AUTHORS
1208Charles D. Cranor
1209.Aq chuck@ccrc.wustl.edu
1210designed and implemented UVM.
1211.Pp
1212Matthew Green
1213.Aq mrg@eterna.com.au
1214wrote the swap-space management code and handled the logistical issues
1215involved with merging UVM into the
1216.Nx
1217source tree.
1218.Pp
1219Chuck Silvers
1220.Aq chuq@chuq.com
1221implemented the aobj pager, thus allowing UVM to support System V shared
1222memory and process swapping.
1223He also designed and implemented the UBC part of UVM, which uses UVM pages
1224to cache vnode data rather than the traditional buffer cache buffers.
1225