xref: /netbsd-src/share/man/man9/uvm.9 (revision 404fbe5fb94ca1e054339640cabb2801ce52dd30)
1.\"	$NetBSD: uvm.9,v 1.94 2008/11/09 19:57:18 wiz Exp $
2.\"
3.\" Copyright (c) 1998 Matthew R. Green
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\"
15.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
22.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25.\" SUCH DAMAGE.
26.\"
27.Dd October 9, 2008
28.Dt UVM 9
29.Os
30.Sh NAME
31.Nm uvm
32.Nd virtual memory system external interface
33.Sh SYNOPSIS
34.In sys/param.h
35.In uvm/uvm.h
36.Sh DESCRIPTION
37The UVM virtual memory system manages access to the computer's memory
38resources.
39User processes and the kernel access these resources through
40UVM's external interface.
41UVM's external interface includes functions that:
42.Pp
43.Bl -hyphen -compact
44.It
45initialize UVM sub-systems
46.It
47manage virtual address spaces
48.It
49resolve page faults
50.It
51memory map files and devices
52.It
53perform uio-based I/O to virtual memory
54.It
55allocate and free kernel virtual memory
56.It
57allocate and free physical memory
58.El
59.Pp
60In addition to exporting these services, UVM has two kernel-level processes:
61pagedaemon and swapper.
62The pagedaemon process sleeps until physical memory becomes scarce.
63When that happens, pagedaemon is awoken.
64It scans physical memory, paging out and freeing memory that has not
65been recently used.
66The swapper process swaps in runnable processes that are currently swapped
67out, if there is room.
68.Pp
69There are also several miscellaneous functions.
70.Sh INITIALIZATION
71.Ft void
72.br
73.Fn uvm_init "void" ;
74.Pp
75.Ft void
76.br
77.Fn uvm_init_limits "struct lwp *l" ;
78.Pp
79.Ft void
80.br
81.Fn uvm_setpagesize "void" ;
82.Pp
83.Ft void
84.br
85.Fn uvm_swap_init "void" ;
86.Pp
87.Fn uvm_init
88sets up the UVM system at system boot time, after the
89console has been setup.
90It initializes global state, the page, map, kernel virtual memory state,
91machine-dependent physical map, kernel memory allocator,
92pager and anonymous memory sub-systems, and then enables
93paging of kernel objects.
94.Pp
95.Fn uvm_init_limits
96initializes process limits for the named process.
97This is for use by the system startup for process zero, before any
98other processes are created.
99.Pp
100.Fn uvm_setpagesize
101initializes the uvmexp members pagesize (if not already done by
102machine-dependent code), pageshift and pagemask.
103It should be called by machine-dependent code early in the
104.Fn pmap_init
105call (see
106.Xr pmap 9 ) .
107.Pp
108.Fn uvm_swap_init
109initializes the swap sub-system.
110.Sh VIRTUAL ADDRESS SPACE MANAGEMENT
111.Ft int
112.br
113.Fn uvm_map "struct vm_map *map" "vaddr_t *startp" "vsize_t size" "struct uvm_object *uobj" "voff_t uoffset" "vsize_t align" "uvm_flag_t flags" ;
114.Pp
115.Ft void
116.br
117.Fn uvm_unmap "struct vm_map *map" "vaddr_t start" "vaddr_t end" ;
118.Pp
119.Ft int
120.br
121.Fn uvm_map_pageable "struct vm_map *map" "vaddr_t start" "vaddr_t end" "bool new_pageable" "int lockflags" ;
122.Pp
123.Ft bool
124.br
125.Fn uvm_map_checkprot "struct vm_map *map" "vaddr_t start" "vaddr_t end" "vm_prot_t protection" ;
126.Pp
127.Ft int
128.br
129.Fn uvm_map_protect "struct vm_map *map" "vaddr_t start" "vaddr_t end" "vm_prot_t new_prot" "bool set_max" ;
130.Pp
131.Ft int
132.br
133.Fn uvm_deallocate "struct vm_map *map" "vaddr_t start" "vsize_t size" ;
134.Pp
135.Ft struct vmspace *
136.br
137.Fn uvmspace_alloc "vaddr_t min" "vaddr_t max" "int pageable" ;
138.Pp
139.Ft void
140.br
141.Fn uvmspace_exec "struct lwp *l" "vaddr_t start" "vaddr_t end" ;
142.Pp
143.Ft struct vmspace *
144.br
145.Fn uvmspace_fork "struct vmspace *vm" ;
146.Pp
147.Ft void
148.br
149.Fn uvmspace_free "struct vmspace *vm1" ;
150.Pp
151.Ft void
152.br
153.Fn uvmspace_share "struct proc *p1" "struct proc *p2" ;
154.Pp
155.Ft void
156.br
157.Fn uvmspace_unshare "struct lwp *l" ;
158.Pp
159.Ft bool
160.br
161.Fn uvm_uarea_alloc "vaddr_t *uaddrp" ;
162.Pp
163.Ft void
164.br
165.Fn uvm_uarea_free "vaddr_t uaddr" ;
166.Pp
167.Fn uvm_map
168establishes a valid mapping in map
169.Fa map ,
170which must be unlocked.
171The new mapping has size
172.Fa size ,
173which must be a multiple of
174.Dv PAGE_SIZE .
175The
176.Fa uobj
177and
178.Fa uoffset
179arguments can have four meanings.
180When
181.Fa uobj
182is
183.Dv NULL
184and
185.Fa uoffset
186is
187.Dv UVM_UNKNOWN_OFFSET ,
188.Fn uvm_map
189does not use the machine-dependent
190.Dv PMAP_PREFER
191function.
192If
193.Fa uoffset
194is any other value, it is used as the hint to
195.Dv PMAP_PREFER .
196When
197.Fa uobj
198is not
199.Dv NULL
200and
201.Fa uoffset
202is
203.Dv UVM_UNKNOWN_OFFSET ,
204.Fn uvm_map
205finds the offset based upon the virtual address, passed as
206.Fa startp .
207If
208.Fa uoffset
209is any other value, we are doing a normal mapping at this offset.
210The start address of the map will be returned in
211.Fa startp .
212.Pp
213.Fa align
214specifies alignment of mapping unless
215.Dv UVM_FLAG_FIXED
216is specified in
217.Fa flags .
218.Fa align
219must be a power of 2.
220.Pp
221.Fa flags
222passed to
223.Fn uvm_map
224are typically created using the
225.Fn UVM_MAPFLAG "vm_prot_t prot" "vm_prot_t maxprot" "vm_inherit_t inh" "int advice" "int flags"
226macro, which uses the following values.
227The
228.Fa prot
229and
230.Fa maxprot
231can take are:
232.Bd -literal
233#define UVM_PROT_MASK   0x07    /* protection mask */
234#define UVM_PROT_NONE   0x00    /* protection none */
235#define UVM_PROT_ALL    0x07    /* everything */
236#define UVM_PROT_READ   0x01    /* read */
237#define UVM_PROT_WRITE  0x02    /* write */
238#define UVM_PROT_EXEC   0x04    /* exec */
239#define UVM_PROT_R      0x01    /* read */
240#define UVM_PROT_W      0x02    /* write */
241#define UVM_PROT_RW     0x03    /* read-write */
242#define UVM_PROT_X      0x04    /* exec */
243#define UVM_PROT_RX     0x05    /* read-exec */
244#define UVM_PROT_WX     0x06    /* write-exec */
245#define UVM_PROT_RWX    0x07    /* read-write-exec */
246.Ed
247.Pp
248The values that
249.Fa inh
250can take are:
251.Bd -literal
252#define UVM_INH_MASK    0x30    /* inherit mask */
253#define UVM_INH_SHARE   0x00    /* "share" */
254#define UVM_INH_COPY    0x10    /* "copy" */
255#define UVM_INH_NONE    0x20    /* "none" */
256#define UVM_INH_DONATE  0x30    /* "donate" \*[Lt]\*[Lt] not used */
257.Ed
258.Pp
259The values that
260.Fa advice
261can take are:
262.Bd -literal
263#define UVM_ADV_NORMAL     0x0  /* 'normal' */
264#define UVM_ADV_RANDOM     0x1  /* 'random' */
265#define UVM_ADV_SEQUENTIAL 0x2  /* 'sequential' */
266#define UVM_ADV_MASK       0x7  /* mask */
267.Ed
268.Pp
269The values that
270.Fa flags
271can take are:
272.Bd -literal
273#define UVM_FLAG_FIXED   0x010000 /* find space */
274#define UVM_FLAG_OVERLAY 0x020000 /* establish overlay */
275#define UVM_FLAG_NOMERGE 0x040000 /* don't merge map entries */
276#define UVM_FLAG_COPYONW 0x080000 /* set copy_on_write flag */
277#define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */
278#define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */
279.Ed
280.Pp
281The
282.Dv UVM_MAPFLAG
283macro arguments can be combined with an or operator.
284There are several special purpose macros for checking protection
285combinations, e.g., the
286.Dv UVM_PROT_WX
287macro.
288There are also some additional macros to extract bits from the flags.
289The
290.Dv UVM_PROTECTION ,
291.Dv UVM_INHERIT ,
292.Dv UVM_MAXPROTECTION
293and
294.Dv UVM_ADVICE
295macros return the protection, inheritance, maximum protection and advice,
296respectively.
297.Fn uvm_map
298returns a standard UVM return value.
299.Pp
300.Fn uvm_unmap
301removes a valid mapping,
302from
303.Fa start
304to
305.Fa end ,
306in map
307.Fa map ,
308which must be unlocked.
309.Pp
310.Fn uvm_map_pageable
311changes the pageability of the pages in the range from
312.Fa start
313to
314.Fa end
315in map
316.Fa map
317to
318.Fa new_pageable .
319.Fn uvm_map_pageable
320returns a standard UVM return value.
321.Pp
322.Fn uvm_map_checkprot
323checks the protection of the range from
324.Fa start
325to
326.Fa end
327in map
328.Fa map
329against
330.Fa protection .
331This returns either
332.Dv true
333or
334.Dv false .
335.Pp
336.Fn uvm_map_protect
337changes the protection
338.Fa start
339to
340.Fa end
341in map
342.Fa map
343to
344.Fa new_prot ,
345also setting the maximum protection to the region to
346.Fa new_prot
347if
348.Fa set_max
349is true.
350This function returns a standard UVM return value.
351.Pp
352.Fn uvm_deallocate
353deallocates kernel memory in map
354.Fa map
355from address
356.Fa start
357to
358.Fa start + size .
359.Pp
360.Fn uvmspace_alloc
361allocates and returns a new address space, with ranges from
362.Fa min
363to
364.Fa max ,
365setting the pageability of the address space to
366.Fa pageable .
367.Pp
368.Fn uvmspace_exec
369either reuses the address space of lwp
370.Fa l
371if there are no other references to it, or creates
372a new one with
373.Fn uvmspace_alloc .
374The range of valid addresses in the address space is reset to
375.Fa start
376through
377.Fa end .
378.Pp
379.Fn uvmspace_fork
380creates and returns a new address space based upon the
381.Fa vm1
382address space, typically used when allocating an address space for a
383child process.
384.Pp
385.Fn uvmspace_free
386lowers the reference count on the address space
387.Fa vm ,
388freeing the data structures if there are no other references.
389.Pp
390.Fn uvmspace_share
391causes process
392.Pa p2
393to share the address space of
394.Fa p1 .
395.Pp
396.Fn uvmspace_unshare
397ensures that lwp
398.Fa l
399has its own, unshared address space, by creating a new one if
400necessary by calling
401.Fn uvmspace_fork .
402.Pp
403.Fn uvm_uarea_alloc
404allocates virtual space for a u-area (i.e., a kernel stack) and stores
405its virtual address in
406.Fa *uaddrp .
407The return value is
408.Dv true
409if the u-area is already backed by wired physical memory, otherwise
410.Dv false .
411.Pp
412.Fn uvm_uarea_free
413frees a u-area allocated with
414.Fn uvm_uarea_alloc ,
415freeing both the virtual space and any physical pages which may have been
416allocated to back that virtual space later.
417.Sh PAGE FAULT HANDLING
418.Ft int
419.br
420.Fn uvm_fault "struct vm_map *orig_map" "vaddr_t vaddr" "vm_prot_t access_type" ;
421.Pp
422.Fn uvm_fault
423is the main entry point for faults.
424It takes
425.Fa orig_map
426as the map the fault originated in, a
427.Fa vaddr
428offset into the map the fault occurred, and
429.Fa access_type
430describing the type of access requested.
431.Fn uvm_fault
432returns a standard UVM return value.
433.Sh MEMORY MAPPING FILES AND DEVICES
434.Ft void
435.br
436.Fn uvm_vnp_setsize "struct vnode *vp" "voff_t newsize" ;
437.Pp
438.Ft void *
439.br
440.Fn ubc_alloc "struct uvm_object *uobj" "voff_t offset" "vsize_t *lenp" \
441"int advice" "int flags" ;
442.Pp
443.Ft void
444.br
445.Fn ubc_release "void *va" "int flags" ;
446.Pp
447int
448.br
449.Fn ubc_uiomove "struct uvm_object *uobj" "struct uio *uio" "vsize_t todo" \
450"int advice" "int flags" ;
451.Pp
452.Fn uvm_vnp_setsize
453sets the size of vnode
454.Fa vp
455to
456.Fa newsize .
457Caller must hold a reference to the vnode.
458If the vnode shrinks, pages no longer used are discarded.
459.Pp
460.Fn ubc_alloc
461creates a kernel mapping of
462.Fa uobj
463starting at offset
464.Fa offset .
465The desired length of the mapping is pointed to by
466.Fa lenp ,
467but the actual mapping may be smaller than this.
468.Fa lenp
469is updated to contain the actual length mapped.
470.Fa advice
471is the access pattern hint, which must be one of
472.Pp
473.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
474.It UVM_ADV_NORMAL
475No hint
476.It UVM_ADV_RANDOM
477Random access hint
478.It UVM_ADV_SEQUENTIAL
479Sequential access hint (from lower offset to higher offset)
480.El
481.Pp
482The possible
483.Fa flags
484are
485.Pp
486.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
487.It UBC_READ
488Mapping will be accessed for read.
489.It UBC_WRITE
490Mapping will be accessed for write.
491.It UBC_FAULTBUSY
492Fault in window's pages already during mapping operation.
493Makes sense only for write.
494.El
495.Pp
496Once the mapping is created, it must be accessed only by methods that can
497handle faults, such as
498.Fn uiomove
499or
500.Fn kcopy .
501Page faults on the mapping will result in the object's pager
502method being called to resolve the fault.
503.Pp
504.Fn ubc_release
505frees the mapping at
506.Fa va
507for reuse.
508The mapping may be cached to speed future accesses to the same region
509of the object.
510The flags can be any of
511.Pp
512.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
513.It UBC_UNMAP
514Do not cache mapping.
515.El
516.Pp
517.Fn ubc_uiomove
518allocates an UBC memory window, performs I/O on it and unmaps the window.
519The
520.Fa advice
521parameter takes the same values as the respective parameter in
522.Fn ubc_alloc
523and the
524.Fa flags
525parameter takes the same arguments as
526.Fn ubc_alloc
527and
528.Fn ubc_unmap .
529Additionally, the flag
530.Dv UBC_PARTIALOK
531can be provided to indicate that it is acceptable to return if an error
532occurs mid-transfer.
533.Sh VIRTUAL MEMORY I/O
534.Ft int
535.br
536.Fn uvm_io "struct vm_map *map" "struct uio *uio" ;
537.Pp
538.Fn uvm_io
539performs the I/O described in
540.Fa uio
541on the memory described in
542.Fa map .
543.Sh ALLOCATION OF KERNEL MEMORY
544.Ft vaddr_t
545.br
546.Fn uvm_km_alloc "struct vm_map *map" "vsize_t size" "vsize_t align" "uvm_flag_t flags" ;
547.Pp
548.Ft void
549.br
550.Fn uvm_km_free "struct vm_map *map" "vaddr_t addr" "vsize_t size" "uvm_flag_t flags" ;
551.Pp
552.Ft struct vm_map *
553.br
554.Fn uvm_km_suballoc "struct vm_map *map" "vaddr_t *min" "vaddr_t *max" \
555"vsize_t size" "int flags" "bool fixed" "struct vm_map *submap" ;
556.Pp
557.Fn uvm_km_alloc
558allocates
559.Fa size
560bytes of kernel memory in map
561.Fa map .
562The first address of the allocated memory range will be aligned according to the
563.Fa align
564argument
565.Pq specify 0 if no alignment is necessary .
566The alignment must be a multiple of page size.
567The
568.Fa flags
569is a bitwise inclusive OR of the allocation type and operation flags.
570.Pp
571The allocation type should be one of:
572.Bl -tag -width UVM_KMF_PAGEABLE
573.It UVM_KMF_WIRED
574Wired memory.
575.It UVM_KMF_PAGEABLE
576Demand-paged zero-filled memory.
577.It UVM_KMF_VAONLY
578Virtual address only.
579No physical pages are mapped in the allocated region.
580If necessary, it's the caller's responsibility to enter page mappings.
581It's also the caller's responsibility to clean up the mappings before freeing
582the address range.
583.El
584.Pp
585The following operation flags are available:
586.Bl -tag -width UVM_KMF_PAGEABLE
587.It UVM_KMF_CANFAIL
588Can fail even if
589.Dv UVM_KMF_NOWAIT
590is not specified and
591.Dv UVM_KMF_WAITVA
592is specified.
593.It UVM_KMF_ZERO
594Request zero-filled memory.
595Only supported for
596.Dv UVM_KMF_WIRED .
597Shouldn't be used with other types.
598.It UVM_KMF_TRYLOCK
599Fail if we can't lock the map.
600.It UVM_KMF_NOWAIT
601Fail immediately if no memory is available.
602.It UVM_KMF_WAITVA
603Sleep to wait for the virtual address resources if needed.
604.El
605.Pp
606(If neither
607.Dv UVM_KMF_NOWAIT
608nor
609.Dv UVM_KMF_CANFAIL
610are specified and
611.Dv UVM_KMF_WAITVA
612is specified,
613.Fn uvm_km_alloc
614will never fail, but rather sleep indefinitely until the allocation succeeds.)
615.Pp
616Pageability of the pages allocated with
617.Dv UVM_KMF_PAGEABLE
618can be changed by
619.Fn uvm_map_pageable .
620In that case, the entire range must be changed atomically.
621Changing a part of the range is not supported.
622.Pp
623.Fn uvm_km_free
624frees the memory range allocated by
625.Fn uvm_km_alloc .
626.Fa addr
627must be an address returned by
628.Fn uvm_km_alloc .
629.Fa map
630and
631.Fa size
632must be the same as the ones used for the corresponding
633.Fn uvm_km_alloc .
634.Fa flags
635must be the allocation type used for the corresponding
636.Fn uvm_km_alloc .
637.Pp
638.Fn uvm_km_free
639is the only way to free memory ranges allocated by
640.Fn uvm_km_alloc .
641.Fn uvm_unmap
642must not be used.
643.Pp
644.Fn uvm_km_suballoc
645allocates submap from
646.Fa map ,
647creating a new map if
648.Fa submap
649is
650.Dv NULL .
651The addresses of the submap can be specified exactly by setting the
652.Fa fixed
653argument to true, which causes the
654.Fa min
655argument to specify the beginning of the address in the submap.
656If
657.Fa fixed
658is false, any address of size
659.Fa size
660will be allocated from
661.Fa map
662and the start and end addresses returned in
663.Fa min
664and
665.Fa max .
666The
667.Fa flags
668are used to initialize the created submap.
669The following flags could be set:
670.Bl -tag -width VM_MAP_PAGEABLE
671.It VM_MAP_PAGEABLE
672Entries in the map may be paged out.
673.It VM_MAP_INTRSAFE
674Map should be interrupt-safe.
675.It VM_MAP_TOPDOWN
676A top-down mapping should be arranged.
677.Sh ALLOCATION OF PHYSICAL MEMORY
678.Ft struct vm_page *
679.br
680.Fn uvm_pagealloc "struct uvm_object *uobj" "voff_t off" "struct vm_anon *anon" "int flags" ;
681.Pp
682.Ft void
683.br
684.Fn uvm_pagerealloc "struct vm_page *pg" "struct uvm_object *newobj" "voff_t newoff" ;
685.Pp
686.Ft void
687.br
688.Fn uvm_pagefree "struct vm_page *pg" ;
689.Pp
690.Ft int
691.br
692.Fn uvm_pglistalloc "psize_t size" "paddr_t low" "paddr_t high" "paddr_t alignment" "paddr_t boundary" "struct pglist *rlist" "int nsegs" "int waitok" ;
693.Pp
694.Ft void
695.br
696.Fn uvm_pglistfree "struct pglist *list" ;
697.Pp
698.Ft void
699.br
700.Fn uvm_page_physload "vaddr_t start" "vaddr_t end" "vaddr_t avail_start" "vaddr_t avail_end" "int free_list" ;
701.Pp
702.Fn uvm_pagealloc
703allocates a page of memory at virtual address
704.Fa off
705in either the object
706.Fa uobj
707or the anonymous memory
708.Fa anon ,
709which must be locked by the caller.
710Only one of
711.Fa uobj
712and
713.Fa anon
714can be non
715.Dv NULL .
716Returns
717.Dv NULL
718when no page can be found.
719The flags can be any of
720.Bd -literal
721#define UVM_PGA_USERESERVE      0x0001  /* ok to use reserve pages */
722#define UVM_PGA_ZERO            0x0002  /* returned page must be zero'd */
723.Ed
724.Pp
725.Dv UVM_PGA_USERESERVE
726means to allocate a page even if that will result in the number of free pages
727being lower than
728.Dv uvmexp.reserve_pagedaemon
729(if the current thread is the pagedaemon) or
730.Dv uvmexp.reserve_kernel
731(if the current thread is not the pagedaemon).
732.Dv UVM_PGA_ZERO
733causes the returned page to be filled with zeroes, either by allocating it
734from a pool of pre-zeroed pages or by zeroing it in-line as necessary.
735.Pp
736.Fn uvm_pagerealloc
737reallocates page
738.Fa pg
739to a new object
740.Fa newobj ,
741at a new offset
742.Fa newoff .
743.Pp
744.Fn uvm_pagefree
745frees the physical page
746.Fa pg .
747If the content of the page is known to be zero-filled,
748caller should set
749.Dv PG_ZERO
750in pg-\*[Gt]flags so that the page allocator will use
751the page to serve future
752.Dv UVM_PGA_ZERO
753requests efficiently.
754.Pp
755.Fn uvm_pglistalloc
756allocates a list of pages for size
757.Fa size
758byte under various constraints.
759.Fa low
760and
761.Fa high
762describe the lowest and highest addresses acceptable for the list.
763If
764.Fa alignment
765is non-zero, it describes the required alignment of the list, in
766power-of-two notation.
767If
768.Fa boundary
769is non-zero, no segment of the list may cross this power-of-two
770boundary, relative to zero.
771.Fa nsegs
772is the maximum number of physically contiguous segments.
773If
774.Fa waitok
775is non-zero, the function may sleep until enough memory is available.
776(It also may give up in some situations, so a non-zero
777.Fa waitok
778does not imply that
779.Fn uvm_pglistalloc
780cannot return an error.)
781The allocated memory is returned in the
782.Fa rlist
783list; the caller has to provide storage only, the list is initialized by
784.Fn uvm_pglistalloc .
785.Pp
786.Fn uvm_pglistfree
787frees the list of pages pointed to by
788.Fa list .
789If the content of the page is known to be zero-filled,
790caller should set
791.Dv PG_ZERO
792in pg-\*[Gt]flags so that the page allocator will use
793the page to serve future
794.Dv UVM_PGA_ZERO
795requests efficiently.
796.Pp
797.Fn uvm_page_physload
798loads physical memory segments into VM space on the specified
799.Fa free_list .
800It must be called at system boot time to set up physical memory
801management pages.
802The arguments describe the
803.Fa start
804and
805.Fa end
806of the physical addresses of the segment, and the available start and end
807addresses of pages not already in use.
808.\" XXX expand on "system boot time"!
809.Sh PROCESSES
810.Ft void
811.br
812.Fn uvm_pageout "void" ;
813.Pp
814.Ft void
815.br
816.Fn uvm_scheduler "void" ;
817.Pp
818.Ft void
819.br
820.Fn uvm_swapin "struct lwp *l" ;
821.Pp
822.Fn uvm_pageout
823is the main loop for the page daemon.
824.Pp
825.Fn uvm_scheduler
826is the process zero main loop, which is to be called after the
827system has finished starting other processes.
828It handles the swapping in of runnable, swapped out processes in priority
829order.
830.Pp
831.Fn uvm_swapin
832swaps in the named lwp.
833.Sh PAGE LOAN
834.Ft int
835.br
836.Fn uvm_loan "struct vm_map *map" "vaddr_t start" "vsize_t len" "void *v" "int flags" ;
837.Pp
838.Ft void
839.br
840.Fn uvm_unloan "void *v" "int npages" "int flags" ;
841.Pp
842.Fn uvm_loan
843loans pages in a map out to anons or to the kernel.
844.Fa map
845should be unlocked,
846.Fa start
847and
848.Fa len
849should be multiples of
850.Dv PAGE_SIZE .
851Argument
852.Fa flags
853should be one of
854.Bd -literal
855#define UVM_LOAN_TOANON       0x01    /* loan to anons */
856#define UVM_LOAN_TOPAGE       0x02    /* loan to kernel */
857.Ed
858.Pp
859.Fa v
860should be pointer to array of pointers to
861.Li struct anon
862or
863.Li struct vm_page ,
864as appropriate.
865The caller has to allocate memory for the array and
866ensure it's big enough to hold
867.Fa len / PAGE_SIZE
868pointers.
869Returns 0 for success, or appropriate error number otherwise.
870Note that wired pages can't be loaned out and
871.Fn uvm_loan
872will fail in that case.
873.Pp
874.Fn uvm_unloan
875kills loans on pages or anons.
876The
877.Fa v
878must point to the array of pointers initialized by previous call to
879.Fn uvm_loan .
880.Fa npages
881should match number of pages allocated for loan, this also matches
882number of items in the array.
883Argument
884.Fa flags
885should be one of
886.Bd -literal
887#define UVM_LOAN_TOANON       0x01    /* loan to anons */
888#define UVM_LOAN_TOPAGE       0x02    /* loan to kernel */
889.Ed
890.Pp
891and should match what was used for previous call to
892.Fn uvm_loan .
893.Sh MISCELLANEOUS FUNCTIONS
894.Ft struct uvm_object *
895.br
896.Fn uao_create "vsize_t size" "int flags" ;
897.Pp
898.Ft void
899.br
900.Fn uao_detach "struct uvm_object *uobj" ;
901.Pp
902.Ft void
903.br
904.Fn uao_reference "struct uvm_object *uobj" ;
905.Pp
906.Ft bool
907.br
908.Fn uvm_chgkprot "void *addr" "size_t len" "int rw" ;
909.Pp
910.Ft void
911.br
912.Fn uvm_kernacc "void *addr" "size_t len" "int rw" ;
913.Pp
914.Ft int
915.br
916.Fn uvm_vslock "struct vmspace *vs" "void *addr" "size_t len" "vm_prot_t prot" ;
917.Pp
918.Ft void
919.br
920.Fn uvm_vsunlock "struct vmspace *vs" "void *addr" "size_t len" ;
921.Pp
922.Ft void
923.br
924.Fn uvm_meter "void" ;
925.Pp
926.Ft void
927.br
928.Fn uvm_fork "struct lwp *l1" "struct lwp *l2" "bool shared" ;
929.Pp
930.Ft int
931.br
932.Fn uvm_grow "struct proc *p" "vaddr_t sp" ;
933.Pp
934.Ft void
935.br
936.Fn uvn_findpages "struct uvm_object *uobj" "voff_t offset" "int *npagesp" "struct vm_page **pps" "int flags" ;
937.Pp
938.Ft void
939.br
940.Fn uvm_swap_stats "int cmd" "struct swapent *sep" "int sec" "register_t *retval" ;
941.Pp
942The
943.Fn uao_create ,
944.Fn uao_detach ,
945and
946.Fn uao_reference
947functions operate on anonymous memory objects, such as those used to support
948System V shared memory.
949.Fn uao_create
950returns an object of size
951.Fa size
952with flags:
953.Bd -literal
954#define UAO_FLAG_KERNOBJ        0x1     /* create kernel object */
955#define UAO_FLAG_KERNSWAP       0x2     /* enable kernel swap */
956.Ed
957.Pp
958which can only be used once each at system boot time.
959.Fn uao_reference
960creates an additional reference to the named anonymous memory object.
961.Fn uao_detach
962removes a reference from the named anonymous memory object, destroying
963it if removing the last reference.
964.Pp
965.Fn uvm_chgkprot
966changes the protection of kernel memory from
967.Fa addr
968to
969.Fa addr + len
970to the value of
971.Fa rw .
972This is primarily useful for debuggers, for setting breakpoints.
973This function is only available with options
974.Dv KGDB .
975.Pp
976.Fn uvm_kernacc
977checks the access at address
978.Fa addr
979to
980.Fa addr + len
981for
982.Fa rw
983access in the kernel address space.
984.Pp
985.Fn uvm_vslock
986and
987.Fn uvm_vsunlock
988control the wiring and unwiring of pages for process
989.Fa p
990from
991.Fa addr
992to
993.Fa addr + len .
994These functions are normally used to wire memory for I/O.
995.Pp
996.Fn uvm_meter
997calculates the load average and wakes up the swapper if necessary.
998.Pp
999.Fn uvm_fork
1000forks a virtual address space for process' (old)
1001.Fa p1
1002and (new)
1003.Fa p2 .
1004If the
1005.Fa shared
1006argument is non zero, p1 shares its address space with p2,
1007otherwise a new address space is created.
1008This function currently has no return value, and thus cannot fail.
1009In the future, this function will be changed to allow it to
1010fail in low memory conditions.
1011.Pp
1012.Fn uvm_grow
1013increases the stack segment of process
1014.Fa p
1015to include
1016.Fa sp .
1017.Pp
1018.Fn uvn_findpages
1019looks up or creates pages in
1020.Fa uobj
1021at offset
1022.Fa offset ,
1023marks them busy and returns them in the
1024.Fa pps
1025array.
1026Currently
1027.Fa uobj
1028must be a vnode object.
1029The number of pages requested is pointed to by
1030.Fa npagesp ,
1031and this value is updated with the actual number of pages returned.
1032The flags can be
1033.Bd -literal
1034#define UFP_ALL         0x00    /* return all pages requested */
1035#define UFP_NOWAIT      0x01    /* don't sleep */
1036#define UFP_NOALLOC     0x02    /* don't allocate new pages */
1037#define UFP_NOCACHE     0x04    /* don't return pages which already exist */
1038#define UFP_NORDONLY    0x08    /* don't return PG_READONLY pages */
1039.Ed
1040.Pp
1041.Dv UFP_ALL
1042is a pseudo-flag meaning all requested pages should be returned.
1043.Dv UFP_NOWAIT
1044means that we must not sleep.
1045.Dv UFP_NOALLOC
1046causes any pages which do not already exist to be skipped.
1047.Dv UFP_NOCACHE
1048causes any pages which do already exist to be skipped.
1049.Dv UFP_NORDONLY
1050causes any pages which are marked PG_READONLY to be skipped.
1051.Pp
1052.Fn uvm_swap_stats
1053implements the
1054.Dv SWAP_STATS
1055and
1056.Dv SWAP_OSTATS
1057operation of the
1058.Xr swapctl 2
1059system call.
1060.Fa cmd
1061is the requested command,
1062.Dv SWAP_STATS
1063or
1064.Dv SWAP_OSTATS .
1065The function will copy no more than
1066.Fa sec
1067entries in the array pointed by
1068.Fa sep .
1069On return,
1070.Fa retval
1071holds the actual number of entries copied in the array.
1072.Sh SYSCTL
1073UVM provides support for the
1074.Dv CTL_VM
1075domain of the
1076.Xr sysctl 3
1077hierarchy.
1078It handles the
1079.Dv VM_LOADAVG ,
1080.Dv VM_METER ,
1081.Dv VM_UVMEXP ,
1082and
1083.Dv VM_UVMEXP2
1084nodes, which return the current load averages, calculates current VM
1085totals, returns the uvmexp structure, and a kernel version independent
1086view of the uvmexp structure, respectively.
1087It also exports a number of tunables that control how much VM space is
1088allowed to be consumed by various tasks.
1089The load averages are typically accessed from userland using the
1090.Xr getloadavg 3
1091function.
1092The uvmexp structure has all global state of the UVM system,
1093and has the following members:
1094.Bd -literal
1095/* vm_page constants */
1096int pagesize;   /* size of a page (PAGE_SIZE): must be power of 2 */
1097int pagemask;   /* page mask */
1098int pageshift;  /* page shift */
1099
1100/* vm_page counters */
1101int npages;     /* number of pages we manage */
1102int free;       /* number of free pages */
1103int active;     /* number of active pages */
1104int inactive;   /* number of pages that we free'd but may want back */
1105int paging;     /* number of pages in the process of being paged out */
1106int wired;      /* number of wired pages */
1107int reserve_pagedaemon; /* number of pages reserved for pagedaemon */
1108int reserve_kernel; /* number of pages reserved for kernel */
1109
1110/* pageout params */
1111int freemin;    /* min number of free pages */
1112int freetarg;   /* target number of free pages */
1113int inactarg;   /* target number of inactive pages */
1114int wiredmax;   /* max number of wired pages */
1115
1116/* swap */
1117int nswapdev;   /* number of configured swap devices in system */
1118int swpages;    /* number of PAGE_SIZE'ed swap pages */
1119int swpginuse;  /* number of swap pages in use */
1120int nswget;     /* number of times fault calls uvm_swap_get() */
1121int nanon;      /* number total of anon's in system */
1122int nfreeanon;  /* number of free anon's */
1123
1124/* stat counters */
1125int faults;             /* page fault count */
1126int traps;              /* trap count */
1127int intrs;              /* interrupt count */
1128int swtch;              /* context switch count */
1129int softs;              /* software interrupt count */
1130int syscalls;           /* system calls */
1131int pageins;            /* pagein operation count */
1132                        /* pageouts are in pdpageouts below */
1133int swapins;            /* swapins */
1134int swapouts;           /* swapouts */
1135int pgswapin;           /* pages swapped in */
1136int pgswapout;          /* pages swapped out */
1137int forks;              /* forks */
1138int forks_ppwait;       /* forks where parent waits */
1139int forks_sharevm;      /* forks where vmspace is shared */
1140
1141/* fault subcounters */
1142int fltnoram;   /* number of times fault was out of ram */
1143int fltnoanon;  /* number of times fault was out of anons */
1144int fltpgwait;  /* number of times fault had to wait on a page */
1145int fltpgrele;  /* number of times fault found a released page */
1146int fltrelck;   /* number of times fault relock called */
1147int fltrelckok; /* number of times fault relock is a success */
1148int fltanget;   /* number of times fault gets anon page */
1149int fltanretry; /* number of times fault retrys an anon get */
1150int fltamcopy;  /* number of times fault clears "needs copy" */
1151int fltnamap;   /* number of times fault maps a neighbor anon page */
1152int fltnomap;   /* number of times fault maps a neighbor obj page */
1153int fltlget;    /* number of times fault does a locked pgo_get */
1154int fltget;     /* number of times fault does an unlocked get */
1155int flt_anon;   /* number of times fault anon (case 1a) */
1156int flt_acow;   /* number of times fault anon cow (case 1b) */
1157int flt_obj;    /* number of times fault is on object page (2a) */
1158int flt_prcopy; /* number of times fault promotes with copy (2b) */
1159int flt_przero; /* number of times fault promotes with zerofill (2b) */
1160
1161/* daemon counters */
1162int pdwoke;     /* number of times daemon woke up */
1163int pdrevs;     /* number of times daemon rev'd clock hand */
1164int pdswout;    /* number of times daemon called for swapout */
1165int pdfreed;    /* number of pages daemon freed since boot */
1166int pdscans;    /* number of pages daemon scanned since boot */
1167int pdanscan;   /* number of anonymous pages scanned by daemon */
1168int pdobscan;   /* number of object pages scanned by daemon */
1169int pdreact;    /* number of pages daemon reactivated since boot */
1170int pdbusy;     /* number of times daemon found a busy page */
1171int pdpageouts; /* number of times daemon started a pageout */
1172int pdpending;  /* number of times daemon got a pending pageout */
1173int pddeact;    /* number of pages daemon deactivates */
1174.Ed
1175.Sh NOTES
1176.Fn uvm_chgkprot
1177is only available if the kernel has been compiled with options
1178.Dv KGDB .
1179.Pp
1180All structure and types whose names begin with
1181.Dq vm_
1182will be renamed to
1183.Dq uvm_ .
1184.Sh SEE ALSO
1185.Xr swapctl 2 ,
1186.Xr getloadavg 3 ,
1187.Xr kvm 3 ,
1188.Xr sysctl 3 ,
1189.Xr ddb 4 ,
1190.Xr options 4 ,
1191.Xr memoryallocators 9 ,
1192.Xr pmap 9
1193.Sh HISTORY
1194UVM is a new VM system developed at Washington University in St. Louis
1195(Missouri).
1196UVM's roots lie partly in the Mach-based
1197.Bx 4.4
1198VM system, the
1199.Fx
1200VM system, and the SunOS 4 VM system.
1201UVM's basic structure is based on the
1202.Bx 4.4
1203VM system.
1204UVM's new anonymous memory system is based on the
1205anonymous memory system found in the SunOS 4 VM (as described in papers
1206published by Sun Microsystems, Inc.).
1207UVM also includes a number of features new to
1208.Bx
1209including page loanout, map entry passing, simplified
1210copy-on-write, and clustered anonymous memory pageout.
1211UVM is also further documented in an August 1998 dissertation by
1212Charles D. Cranor.
1213.Pp
1214UVM appeared in
1215.Nx 1.4 .
1216.Sh AUTHORS
1217Charles D. Cranor
1218.Aq chuck@ccrc.wustl.edu
1219designed and implemented UVM.
1220.Pp
1221Matthew Green
1222.Aq mrg@eterna.com.au
1223wrote the swap-space management code and handled the logistical issues
1224involved with merging UVM into the
1225.Nx
1226source tree.
1227.Pp
1228Chuck Silvers
1229.Aq chuq@chuq.com
1230implemented the aobj pager, thus allowing UVM to support System V shared
1231memory and process swapping.
1232He also designed and implemented the UBC part of UVM, which uses UVM pages
1233to cache vnode data rather than the traditional buffer cache buffers.
1234