xref: /netbsd-src/share/man/man9/uvm.9 (revision 7fa608457b817eca6e0977b37f758ae064f3c99c)
1.\"	$NetBSD: uvm.9,v 1.87 2007/10/15 13:39:50 pooka Exp $
2.\"
3.\" Copyright (c) 1998 Matthew R. Green
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\" 3. The name of the author may not be used to endorse or promote products
15.\"    derived from this software without specific prior written permission.
16.\"
17.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27.\" SUCH DAMAGE.
28.\"
29.Dd October 15, 2007
30.Dt UVM 9
31.Os
32.Sh NAME
33.Nm uvm
34.Nd virtual memory system external interface
35.Sh SYNOPSIS
36.In sys/param.h
37.In uvm/uvm.h
38.Sh DESCRIPTION
39The UVM virtual memory system manages access to the computer's memory
40resources.
41User processes and the kernel access these resources through
42UVM's external interface.
43UVM's external interface includes functions that:
44.Pp
45.Bl -hyphen -compact
46.It
47initialise UVM sub-systems
48.It
49manage virtual address spaces
50.It
51resolve page faults
52.It
53memory map files and devices
54.It
55perform uio-based I/O to virtual memory
56.It
57allocate and free kernel virtual memory
58.It
59allocate and free physical memory
60.El
61.Pp
62In addition to exporting these services, UVM has two kernel-level processes:
63pagedaemon and swapper.
64The pagedaemon process sleeps until physical memory becomes scarce.
65When that happens, pagedaemon is awoken.
66It scans physical memory, paging out and freeing memory that has not
67been recently used.
68The swapper process swaps in runnable processes that are currently swapped
69out, if there is room.
70.Pp
71There are also several miscellaneous functions.
72.Sh INITIALISATION
73.Ft void
74.br
75.Fn uvm_init "void" ;
76.Pp
77.Ft void
78.br
79.Fn uvm_init_limits "struct lwp *l" ;
80.Pp
81.Ft void
82.br
83.Fn uvm_setpagesize "void" ;
84.Pp
85.Ft void
86.br
87.Fn uvm_swap_init "void" ;
88.Pp
89.Fn uvm_init
90sets up the UVM system at system boot time, after the
91console has been setup.
92It initialises global state, the page, map, kernel virtual memory state,
93machine-dependent physical map, kernel memory allocator,
94pager and anonymous memory sub-systems, and then enables
95paging of kernel objects.
96.Pp
97.Fn uvm_init_limits
98initialises process limits for the named process.
99This is for use by the system startup for process zero, before any
100other processes are created.
101.Pp
102.Fn uvm_setpagesize
103initialises the uvmexp members pagesize (if not already done by
104machine-dependent code), pageshift and pagemask.
105It should be called by machine-dependent code early in the
106.Fn pmap_init
107call (see
108.Xr pmap 9 ) .
109.Pp
110.Fn uvm_swap_init
111initialises the swap sub-system.
112.Sh VIRTUAL ADDRESS SPACE MANAGEMENT
113.Ft int
114.br
115.Fn uvm_map "struct vm_map *map" "vaddr_t *startp" "vsize_t size" "struct uvm_object *uobj" "voff_t uoffset" "vsize_t align" "uvm_flag_t flags" ;
116.Pp
117.Ft void
118.br
119.Fn uvm_unmap "struct vm_map *map" "vaddr_t start" "vaddr_t end" ;
120.Pp
121.Ft int
122.br
123.Fn uvm_map_pageable "struct vm_map *map" "vaddr_t start" "vaddr_t end" "bool new_pageable" "int lockflags" ;
124.Pp
125.Ft bool
126.br
127.Fn uvm_map_checkprot "struct vm_map *map" "vaddr_t start" "vaddr_t end" "vm_prot_t protection" ;
128.Pp
129.Ft int
130.br
131.Fn uvm_map_protect "struct vm_map *map" "vaddr_t start" "vaddr_t end" "vm_prot_t new_prot" "bool set_max" ;
132.Pp
133.Ft int
134.br
135.Fn uvm_deallocate "struct vm_map *map" "vaddr_t start" "vsize_t size" ;
136.Pp
137.Ft struct vmspace *
138.br
139.Fn uvmspace_alloc "vaddr_t min" "vaddr_t max" "int pageable" ;
140.Pp
141.Ft void
142.br
143.Fn uvmspace_exec "struct lwp *l" "vaddr_t start" "vaddr_t end" ;
144.Pp
145.Ft struct vmspace *
146.br
147.Fn uvmspace_fork "struct vmspace *vm" ;
148.Pp
149.Ft void
150.br
151.Fn uvmspace_free "struct vmspace *vm1" ;
152.Pp
153.Ft void
154.br
155.Fn uvmspace_share "struct proc *p1" "struct proc *p2" ;
156.Pp
157.Ft void
158.br
159.Fn uvmspace_unshare "struct lwp *l" ;
160.Pp
161.Ft bool
162.br
163.Fn uvm_uarea_alloc "vaddr_t *uaddrp" ;
164.Pp
165.Ft void
166.br
167.Fn uvm_uarea_free "vaddr_t uaddr" ;
168.Pp
169.Fn uvm_map
170establishes a valid mapping in map
171.Fa map ,
172which must be unlocked.
173The new mapping has size
174.Fa size ,
175which must be a multiple of
176.Dv PAGE_SIZE .
177The
178.Fa uobj
179and
180.Fa uoffset
181arguments can have four meanings.
182When
183.Fa uobj
184is
185.Dv NULL
186and
187.Fa uoffset
188is
189.Dv UVM_UNKNOWN_OFFSET ,
190.Fn uvm_map
191does not use the machine-dependent
192.Dv PMAP_PREFER
193function.
194If
195.Fa uoffset
196is any other value, it is used as the hint to
197.Dv PMAP_PREFER .
198When
199.Fa uobj
200is not
201.Dv NULL
202and
203.Fa uoffset
204is
205.Dv UVM_UNKNOWN_OFFSET ,
206.Fn uvm_map
207finds the offset based upon the virtual address, passed as
208.Fa startp .
209If
210.Fa uoffset
211is any other value, we are doing a normal mapping at this offset.
212The start address of the map will be returned in
213.Fa startp .
214.Pp
215.Fa align
216specifies alignment of mapping unless
217.Dv UVM_FLAG_FIXED
218is specified in
219.Fa flags .
220.Fa align
221must be a power of 2.
222.Pp
223.Fa flags
224passed to
225.Fn uvm_map
226are typically created using the
227.Fn UVM_MAPFLAG "vm_prot_t prot" "vm_prot_t maxprot" "vm_inherit_t inh" "int advice" "int flags"
228macro, which uses the following values.
229The
230.Fa prot
231and
232.Fa maxprot
233can take are:
234.Bd -literal
235#define UVM_PROT_MASK   0x07    /* protection mask */
236#define UVM_PROT_NONE   0x00    /* protection none */
237#define UVM_PROT_ALL    0x07    /* everything */
238#define UVM_PROT_READ   0x01    /* read */
239#define UVM_PROT_WRITE  0x02    /* write */
240#define UVM_PROT_EXEC   0x04    /* exec */
241#define UVM_PROT_R      0x01    /* read */
242#define UVM_PROT_W      0x02    /* write */
243#define UVM_PROT_RW     0x03    /* read-write */
244#define UVM_PROT_X      0x04    /* exec */
245#define UVM_PROT_RX     0x05    /* read-exec */
246#define UVM_PROT_WX     0x06    /* write-exec */
247#define UVM_PROT_RWX    0x07    /* read-write-exec */
248.Ed
249.Pp
250The values that
251.Fa inh
252can take are:
253.Bd -literal
254#define UVM_INH_MASK    0x30    /* inherit mask */
255#define UVM_INH_SHARE   0x00    /* "share" */
256#define UVM_INH_COPY    0x10    /* "copy" */
257#define UVM_INH_NONE    0x20    /* "none" */
258#define UVM_INH_DONATE  0x30    /* "donate" \*[Lt]\*[Lt] not used */
259.Ed
260.Pp
261The values that
262.Fa advice
263can take are:
264.Bd -literal
265#define UVM_ADV_NORMAL     0x0  /* 'normal' */
266#define UVM_ADV_RANDOM     0x1  /* 'random' */
267#define UVM_ADV_SEQUENTIAL 0x2  /* 'sequential' */
268#define UVM_ADV_MASK       0x7  /* mask */
269.Ed
270.Pp
271The values that
272.Fa flags
273can take are:
274.Bd -literal
275#define UVM_FLAG_FIXED   0x010000 /* find space */
276#define UVM_FLAG_OVERLAY 0x020000 /* establish overlay */
277#define UVM_FLAG_NOMERGE 0x040000 /* don't merge map entries */
278#define UVM_FLAG_COPYONW 0x080000 /* set copy_on_write flag */
279#define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */
280#define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */
281.Ed
282.Pp
283The
284.Dv UVM_MAPFLAG
285macro arguments can be combined with an or operator.
286There are several special purpose macros for checking protection
287combinations, e.g., the
288.Dv UVM_PROT_WX
289macro.
290There are also some additional macros to extract bits from the flags.
291The
292.Dv UVM_PROTECTION ,
293.Dv UVM_INHERIT ,
294.Dv UVM_MAXPROTECTION
295and
296.Dv UVM_ADVICE
297macros return the protection, inheritance, maximum protection and advice,
298respectively.
299.Fn uvm_map
300returns a standard UVM return value.
301.Pp
302.Fn uvm_unmap
303removes a valid mapping,
304from
305.Fa start
306to
307.Fa end ,
308in map
309.Fa map ,
310which must be unlocked.
311.Pp
312.Fn uvm_map_pageable
313changes the pageability of the pages in the range from
314.Fa start
315to
316.Fa end
317in map
318.Fa map
319to
320.Fa new_pageable .
321.Fn uvm_map_pageable
322returns a standard UVM return value.
323.Pp
324.Fn uvm_map_checkprot
325checks the protection of the range from
326.Fa start
327to
328.Fa end
329in map
330.Fa map
331against
332.Fa protection .
333This returns either
334.Dv true
335or
336.Dv false .
337.Pp
338.Fn uvm_map_protect
339changes the protection
340.Fa start
341to
342.Fa end
343in map
344.Fa map
345to
346.Fa new_prot ,
347also setting the maximum protection to the region to
348.Fa new_prot
349if
350.Fa set_max
351is non-zero.
352This function returns a standard UVM return value.
353.Pp
354.Fn uvm_deallocate
355deallocates kernel memory in map
356.Fa map
357from address
358.Fa start
359to
360.Fa start + size .
361.Pp
362.Fn uvmspace_alloc
363allocates and returns a new address space, with ranges from
364.Fa min
365to
366.Fa max ,
367setting the pageability of the address space to
368.Fa pageable .
369.Pp
370.Fn uvmspace_exec
371either reuses the address space of lwp
372.Fa l
373if there are no other references to it, or creates
374a new one with
375.Fn uvmspace_alloc .
376The range of valid addresses in the address space is reset to
377.Fa start
378through
379.Fa end .
380.Pp
381.Fn uvmspace_fork
382creates and returns a new address space based upon the
383.Fa vm1
384address space, typically used when allocating an address space for a
385child process.
386.Pp
387.Fn uvmspace_free
388lowers the reference count on the address space
389.Fa vm ,
390freeing the data structures if there are no other references.
391.Pp
392.Fn uvmspace_share
393causes process
394.Pa p2
395to share the address space of
396.Fa p1 .
397.Pp
398.Fn uvmspace_unshare
399ensures that lwp
400.Fa l
401has its own, unshared address space, by creating a new one if
402necessary by calling
403.Fn uvmspace_fork .
404.Pp
405.Fn uvm_uarea_alloc
406allocates virtual space for a u-area (i.e., a kernel stack) and stores
407its virtual address in
408.Fa *uaddrp .
409The return value is
410.Dv true
411if the u-area is already backed by wired physical memory, otherwise
412.Dv false .
413.Pp
414.Fn uvm_uarea_free
415frees a u-area allocated with
416.Fn uvm_uarea_alloc ,
417freeing both the virtual space and any physical pages which may have been
418allocated to back that virtual space later.
419.Sh PAGE FAULT HANDLING
420.Ft int
421.br
422.Fn uvm_fault "struct vm_map *orig_map" "vaddr_t vaddr" "vm_prot_t access_type" ;
423.Pp
424.Fn uvm_fault
425is the main entry point for faults.
426It takes
427.Fa orig_map
428as the map the fault originated in, a
429.Fa vaddr
430offset into the map the fault occurred, and
431.Fa access_type
432describing the type of access requested.
433.Fn uvm_fault
434returns a standard UVM return value.
435.Sh MEMORY MAPPING FILES AND DEVICES
436.Pp
437.Ft void
438.br
439.Fn uvm_vnp_setsize "struct vnode *vp" "voff_t newsize" ;
440.Pp
441.Ft void *
442.br
443.Fn ubc_alloc "struct uvm_object *uobj" "voff_t offset" "vsize_t *lenp" \
444"int advice" "int flags" ;
445.Pp
446.Ft void
447.br
448.Fn ubc_release "void *va" "int flags" ;
449.Pp
450int
451.br
452.Fn ubc_uiomove "struct uvm_object *uobj" "struct uio *uio" "vsize_t todo" \
453"int advice" "int flags" ;
454.Pp
455.Fn uvm_vnp_setsize
456sets the size of vnode
457.Fa vp
458to
459.Fa newsize .
460Caller must hold a reference to the vnode.
461If the vnode shrinks, pages no longer used are discarded.
462.Pp
463.Fn ubc_alloc
464creates a kernel mapping of
465.Fa uobj
466starting at offset
467.Fa offset .
468The desired length of the mapping is pointed to by
469.Fa lenp ,
470but the actual mapping may be smaller than this.
471.Fa lenp
472is updated to contain the actual length mapped.
473.Fa advice
474is the access pattern hint, which must be one of
475.Pp
476.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
477.It UVM_ADV_NORMAL
478No hint
479.It UVM_ADV_RANDOM
480Random access hint
481.It UVM_ADV_SEQUENTIAL
482Sequential access hint (from lower offset to higher offset)
483.El
484.Pp
485The possible
486.Fa flags
487are
488.Pp
489.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
490.It UBC_READ
491Mapping will be accessed for read.
492.It UBC_WRITE
493Mapping will be accessed for write.
494.It UBC_FAULTBUSY
495Fault in window's pages already during mapping operation.
496Makes sense only for write.
497.El
498.Ed
499.Pp
500Currently,
501.Fa uobj
502must actually be a vnode object.
503Once the mapping is created, it must be accessed only by methods that can
504handle faults, such as
505.Fn uiomove
506or
507.Fn kcopy .
508Page faults on the mapping will result in the vnode's
509.Fn VOP_GETPAGES
510method being called to resolve the fault.
511.Pp
512.Fn ubc_release
513frees the mapping at
514.Fa va
515for reuse.
516The mapping may be cached to speed future accesses to the same region
517of the object.
518The flags can be any of
519.Pp
520.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
521.It UBC_UNMAP
522Do not cache mapping.
523.El
524.Pp
525.Fn ubc_uiomove
526allocates an UBC memory window, performs I/O on it and unmaps the window.
527The
528.Fa advice
529parameter takes the same values as the respective parameter in
530.Fn ubc_alloc
531and the
532.Fa flags
533parameter takes the same arguments as
534.Fn ubc_alloc
535and
536.Fn ubc_unmap .
537Additionally, the flag
538.Dv UBC_PARTIALOK
539can be provided to indicate that it is acceptable to return if an error
540occurs mid-transfer.
541.Sh VIRTUAL MEMORY I/O
542.Ft int
543.br
544.Fn uvm_io "struct vm_map *map" "struct uio *uio" ;
545.Pp
546.Fn uvm_io
547performs the I/O described in
548.Fa uio
549on the memory described in
550.Fa map .
551.Sh ALLOCATION OF KERNEL MEMORY
552.Ft vaddr_t
553.br
554.Fn uvm_km_alloc "struct vm_map *map" "vsize_t size" "vsize_t align" "uvm_flag_t flags" ;
555.Pp
556.Ft void
557.br
558.Fn uvm_km_free "struct vm_map *map" "vaddr_t addr" "vsize_t size" "uvm_flag_t flags" ;
559.Pp
560.Ft struct vm_map *
561.br
562.Fn uvm_km_suballoc "struct vm_map *map" "vaddr_t *min" "vaddr_t *max" "vsize_t size" "bool pageable" "bool fixed" "struct vm_map *submap" ;
563.Pp
564.Fn uvm_km_alloc
565allocates
566.Fa size
567bytes of kernel memory in map
568.Fa map .
569The first address of the allocated memory range will be aligned according to the
570.Fa align
571argument
572.Pq specify 0 if no alignment is necessary .
573The alignment must be a multiple of page size.
574The
575.Fa flags
576is a bitwise inclusive OR of the allocation type and operation flags.
577.Pp
578The allocation type should be one of:
579.Bl -tag -width UVM_KMF_PAGEABLE
580.It UVM_KMF_WIRED
581Wired memory.
582.It UVM_KMF_PAGEABLE
583Demand-paged zero-filled memory.
584.It UVM_KMF_VAONLY
585Virtual address only.
586No physical pages are mapped in the allocated region.
587If necessary, it's the caller's responsibility to enter page mappings.
588It's also the caller's responsibility to clean up the mappings before freeing
589the address range.
590.El
591.Pp
592The following operation flags are available:
593.Bl -tag -width UVM_KMF_PAGEABLE
594.It UVM_KMF_CANFAIL
595Can fail even if
596.Dv UVM_KMF_NOWAIT
597is not specified and
598.Dv UVM_KMF_WAITVA
599is specified.
600.It UVM_KMF_ZERO
601Request zero-filled memory.
602Only supported for
603.Dv UVM_KMF_WIRED .
604Shouldn't be used with other types.
605.It UVM_KMF_TRYLOCK
606Fail if we can't lock the map.
607.It UVM_KMF_NOWAIT
608Fail immediately if no memory is available.
609.It UVM_KMF_WAITVA
610Sleep to wait for the virtual address resources if needed.
611.El
612.Pp
613(If neither
614.Dv UVM_KMF_NOWAIT
615nor
616.Dv UVM_KMF_CANFAIL
617are specified and
618.Dv UVM_KMF_WAITVA
619is specified,
620.Fn uvm_km_alloc
621will never fail, but rather sleep indefinitely until the allocation succeeds.)
622.Pp
623Pageability of the pages allocated with
624.Dv UVM_KMF_PAGEABLE
625can be changed by
626.Fn uvm_map_pageable .
627In that case, the entire range must be changed atomically.
628Changing a part of the range is not supported.
629.Pp
630.Fn uvm_km_free
631frees the memory range allocated by
632.Fn uvm_km_alloc .
633.Fa addr
634must be an address returned by
635.Fn uvm_km_alloc .
636.Fa map
637and
638.Fa size
639must be the same as the ones used for the corresponding
640.Fn uvm_km_alloc .
641.Fa flags
642must be the allocation type used for the corresponding
643.Fn uvm_km_alloc .
644.Pp
645.Fn uvm_km_free
646is the only way to free memory ranges allocated by
647.Fn uvm_km_alloc .
648.Fn uvm_unmap
649must not be used.
650.Pp
651.Fn uvm_km_suballoc
652allocates submap from
653.Fa map ,
654creating a new map if
655.Fa submap
656is
657.Dv NULL .
658The addresses of the submap can be specified exactly by setting the
659.Fa fixed
660argument to non-zero, which causes the
661.Fa min
662argument to specify the beginning of the address in the submap.
663If
664.Fa fixed
665is zero, any address of size
666.Fa size
667will be allocated from
668.Fa map
669and the start and end addresses returned in
670.Fa min
671and
672.Fa max .
673If
674.Fa pageable
675is non-zero, entries in the map may be paged out.
676.Sh ALLOCATION OF PHYSICAL MEMORY
677.Ft struct vm_page *
678.br
679.Fn uvm_pagealloc "struct uvm_object *uobj" "voff_t off" "struct vm_anon *anon" "int flags" ;
680.Pp
681.Ft void
682.br
683.Fn uvm_pagerealloc "struct vm_page *pg" "struct uvm_object *newobj" "voff_t newoff" ;
684.Pp
685.Ft void
686.br
687.Fn uvm_pagefree "struct vm_page *pg" ;
688.Pp
689.Ft int
690.br
691.Fn uvm_pglistalloc "psize_t size" "paddr_t low" "paddr_t high" "paddr_t alignment" "paddr_t boundary" "struct pglist *rlist" "int nsegs" "int waitok" ;
692.Pp
693.Ft void
694.br
695.Fn uvm_pglistfree "struct pglist *list" ;
696.Pp
697.Ft void
698.br
699.Fn uvm_page_physload "vaddr_t start" "vaddr_t end" "vaddr_t avail_start" "vaddr_t avail_end" "int free_list" ;
700.Pp
701.Fn uvm_pagealloc
702allocates a page of memory at virtual address
703.Fa off
704in either the object
705.Fa uobj
706or the anonymous memory
707.Fa anon ,
708which must be locked by the caller.
709Only one of
710.Fa uobj
711and
712.Fa anon
713can be non
714.Dv NULL .
715Returns
716.Dv NULL
717when no page can be found.
718The flags can be any of
719.Bd -literal
720#define UVM_PGA_USERESERVE      0x0001  /* ok to use reserve pages */
721#define UVM_PGA_ZERO            0x0002  /* returned page must be zero'd */
722.Ed
723.Pp
724.Dv UVM_PGA_USERESERVE
725means to allocate a page even if that will result in the number of free pages
726being lower than
727.Dv uvmexp.reserve_pagedaemon
728(if the current thread is the pagedaemon) or
729.Dv uvmexp.reserve_kernel
730(if the current thread is not the pagedaemon).
731.Dv UVM_PGA_ZERO
732causes the returned page to be filled with zeroes, either by allocating it
733from a pool of pre-zeroed pages or by zeroing it in-line as necessary.
734.Pp
735.Fn uvm_pagerealloc
736reallocates page
737.Fa pg
738to a new object
739.Fa newobj ,
740at a new offset
741.Fa newoff .
742.Pp
743.Fn uvm_pagefree
744frees the physical page
745.Fa pg .
746If the content of the page is known to be zero-filled,
747caller should set
748.Dv PG_ZERO
749in pg-\*[Gt]flags so that the page allocator will use
750the page to serve future
751.Dv UVM_PGA_ZERO
752requests efficiently.
753.Pp
754.Fn uvm_pglistalloc
755allocates a list of pages for size
756.Fa size
757byte under various constraints.
758.Fa low
759and
760.Fa high
761describe the lowest and highest addresses acceptable for the list.
762If
763.Fa alignment
764is non-zero, it describes the required alignment of the list, in
765power-of-two notation.
766If
767.Fa boundary
768is non-zero, no segment of the list may cross this power-of-two
769boundary, relative to zero.
770.Fa nsegs
771is the maximum number of physically contigous segments.
772If
773.Fa waitok
774is non-zero, the function may sleep until enough memory is available.
775(It also may give up in some situations, so a non-zero
776.Fa waitok
777does not imply that
778.Fn uvm_pglistalloc
779cannot return an error.)
780The allocated memory is returned in the
781.Fa rlist
782list; the caller has to provide storage only, the list is initialized by
783.Fn uvm_pglistalloc .
784.Pp
785.Fn uvm_pglistfree
786frees the list of pages pointed to by
787.Fa list .
788If the content of the page is known to be zero-filled,
789caller should set
790.Dv PG_ZERO
791in pg-\*[Gt]flags so that the page allocator will use
792the page to serve future
793.Dv UVM_PGA_ZERO
794requests efficiently.
795.Pp
796.Fn uvm_page_physload
797loads physical memory segments into VM space on the specified
798.Fa free_list .
799It must be called at system boot time to set up physical memory
800management pages.
801The arguments describe the
802.Fa start
803and
804.Fa end
805of the physical addresses of the segment, and the available start and end
806addresses of pages not already in use.
807.\" XXX expand on "system boot time"!
808.Sh PROCESSES
809.Ft void
810.br
811.Fn uvm_pageout "void" ;
812.Pp
813.Ft void
814.br
815.Fn uvm_scheduler "void" ;
816.Pp
817.Ft void
818.br
819.Fn uvm_swapin "struct lwp *l" ;
820.Pp
821.Fn uvm_pageout
822is the main loop for the page daemon.
823.Pp
824.Fn uvm_scheduler
825is the process zero main loop, which is to be called after the
826system has finished starting other processes.
827It handles the swapping in of runnable, swapped out processes in priority
828order.
829.Pp
830.Fn uvm_swapin
831swaps in the named lwp.
832.Sh PAGE LOAN
833.Ft int
834.br
835.Fn uvm_loan "struct vm_map *map" "vaddr_t start" "vsize_t len" "void *v" "int flags" ;
836.Pp
837.Ft void
838.br
839.Fn uvm_unloan "void *v" "int npages" "int flags" ;
840.Pp
841.Fn uvm_loan
842loans pages in a map out to anons or to the kernel.
843.Fa map
844should be unlocked,
845.Fa start
846and
847.Fa len
848should be multiples of
849.Dv PAGE_SIZE .
850Argument
851.Fa flags
852should be one of
853.Bd -literal
854#define UVM_LOAN_TOANON       0x01    /* loan to anons */
855#define UVM_LOAN_TOPAGE       0x02    /* loan to kernel */
856.Ed
857.Pp
858.Fa v
859should be pointer to array of pointers to
860.Li struct anon
861or
862.Li struct vm_page ,
863as appropriate.
864The caller has to allocate memory for the array and
865ensure it's big enough to hold
866.Fa len / PAGE_SIZE
867pointers.
868Returns 0 for success, or appropriate error number otherwise.
869Note that wired pages can't be loaned out and
870.Fn uvm_loan
871will fail in that case.
872.Pp
873.Fn uvm_unloan
874kills loans on pages or anons.
875The
876.Fa v
877must point to the array of pointers initialized by previous call to
878.Fn uvm_loan .
879.Fa npages
880should match number of pages allocated for loan, this also matches
881number of items in the array.
882Argument
883.Fa flags
884should be one of
885.Bd -literal
886#define UVM_LOAN_TOANON       0x01    /* loan to anons */
887#define UVM_LOAN_TOPAGE       0x02    /* loan to kernel */
888.Ed
889.Pp
890and should match what was used for previous call to
891.Fn uvm_loan .
892.Sh MISCELLANEOUS FUNCTIONS
893.Ft struct uvm_object *
894.br
895.Fn uao_create "vsize_t size" "int flags" ;
896.Pp
897.Ft void
898.br
899.Fn uao_detach "struct uvm_object *uobj" ;
900.Pp
901.Ft void
902.br
903.Fn uao_reference "struct uvm_object *uobj" ;
904.Pp
905.Ft bool
906.br
907.Fn uvm_chgkprot "void *addr" "size_t len" "int rw" ;
908.Pp
909.Ft void
910.br
911.Fn uvm_kernacc "void *addr" "size_t len" "int rw" ;
912.Pp
913.Ft int
914.br
915.Fn uvm_vslock "struct vmspace *vs" "void *addr" "size_t len" "vm_prot_t prot" ;
916.Pp
917.Ft void
918.br
919.Fn uvm_vsunlock "struct vmspace *vs" "void *addr" "size_t len" ;
920.Pp
921.Ft void
922.br
923.Fn uvm_meter "void" ;
924.Pp
925.Ft void
926.br
927.Fn uvm_fork "struct lwp *l1" "struct lwp *l2" "bool shared" ;
928.Pp
929.Ft int
930.br
931.Fn uvm_grow "struct proc *p" "vaddr_t sp" ;
932.Pp
933.Ft void
934.br
935.Fn uvn_findpages "struct uvm_object *uobj" "voff_t offset" "int *npagesp" "struct vm_page **pps" "int flags" ;
936.Pp
937.Ft void
938.br
939.Fn uvm_swap_stats "int cmd" "struct swapent *sep" "int sec" "register_t *retval" ;
940.Pp
941The
942.Fn uao_create ,
943.Fn uao_detach ,
944and
945.Fn uao_reference
946functions operate on anonymous memory objects, such as those used to support
947System V shared memory.
948.Fn uao_create
949returns an object of size
950.Fa size
951with flags:
952.Bd -literal
953#define UAO_FLAG_KERNOBJ        0x1     /* create kernel object */
954#define UAO_FLAG_KERNSWAP       0x2     /* enable kernel swap */
955.Ed
956.Pp
957which can only be used once each at system boot time.
958.Fn uao_reference
959creates an additional reference to the named anonymous memory object.
960.Fn uao_detach
961removes a reference from the named anonymous memory object, destroying
962it if removing the last reference.
963.Pp
964.Fn uvm_chgkprot
965changes the protection of kernel memory from
966.Fa addr
967to
968.Fa addr + len
969to the value of
970.Fa rw .
971This is primarily useful for debuggers, for setting breakpoints.
972This function is only available with options
973.Dv KGDB .
974.Pp
975.Fn uvm_kernacc
976checks the access at address
977.Fa addr
978to
979.Fa addr + len
980for
981.Fa rw
982access in the kernel address space.
983.Pp
984.Fn uvm_vslock
985and
986.Fn uvm_vsunlock
987control the wiring and unwiring of pages for process
988.Fa p
989from
990.Fa addr
991to
992.Fa addr + len .
993These functions are normally used to wire memory for I/O.
994.Pp
995.Fn uvm_meter
996calculates the load average and wakes up the swapper if necessary.
997.Pp
998.Fn uvm_fork
999forks a virtual address space for process' (old)
1000.Fa p1
1001and (new)
1002.Fa p2 .
1003If the
1004.Fa shared
1005argument is non zero, p1 shares its address space with p2,
1006otherwise a new address space is created.
1007This function currently has no return value, and thus cannot fail.
1008In the future, this function will be changed to allow it to
1009fail in low memory conditions.
1010.Pp
1011.Fn uvm_grow
1012increases the stack segment of process
1013.Fa p
1014to include
1015.Fa sp .
1016.Pp
1017.Fn uvn_findpages
1018looks up or creates pages in
1019.Fa uobj
1020at offset
1021.Fa offset ,
1022marks them busy and returns them in the
1023.Fa pps
1024array.
1025Currently
1026.Fa uobj
1027must be a vnode object.
1028The number of pages requested is pointed to by
1029.Fa npagesp ,
1030and this value is updated with the actual number of pages returned.
1031The flags can be
1032.Bd -literal
1033#define UFP_ALL         0x00    /* return all pages requested */
1034#define UFP_NOWAIT      0x01    /* don't sleep */
1035#define UFP_NOALLOC     0x02    /* don't allocate new pages */
1036#define UFP_NOCACHE     0x04    /* don't return pages which already exist */
1037#define UFP_NORDONLY    0x08    /* don't return PG_READONLY pages */
1038.Ed
1039.Pp
1040.Dv UFP_ALL
1041is a pseudo-flag meaning all requested pages should be returned.
1042.Dv UFP_NOWAIT
1043means that we must not sleep.
1044.Dv UFP_NOALLOC
1045causes any pages which do not already exist to be skipped.
1046.Dv UFP_NOCACHE
1047causes any pages which do already exist to be skipped.
1048.Dv UFP_NORDONLY
1049causes any pages which are marked PG_READONLY to be skipped.
1050.Pp
1051.Fn uvm_swap_stats
1052implements the
1053.Dv SWAP_STATS
1054and
1055.Dv SWAP_OSTATS
1056operation of the
1057.Xr swapctl 2
1058system call.
1059.Fa cmd
1060is the requested command,
1061.Dv SWAP_STATS
1062or
1063.Dv SWAP_OSTATS .
1064The function will copy no more than
1065.Fa sec
1066entries in the array pointed by
1067.Fa sep .
1068On return,
1069.Fa retval
1070holds the actual number of entries copied in the array.
1071.Sh SYSCTL
1072UVM provides support for the
1073.Dv CTL_VM
1074domain of the
1075.Xr sysctl 3
1076hierarchy.
1077It handles the
1078.Dv VM_LOADAVG ,
1079.Dv VM_METER ,
1080.Dv VM_UVMEXP ,
1081and
1082.Dv VM_UVMEXP2
1083nodes, which return the current load averages, calculates current VM
1084totals, returns the uvmexp structure, and a kernel version independent
1085view of the uvmexp structure, respectively.
1086It also exports a number of tunables that control how much VM space is
1087allowed to be consumed by various tasks.
1088The load averages are typically accessed from userland using the
1089.Xr getloadavg 3
1090function.
1091The uvmexp structure has all global state of the UVM system,
1092and has the following members:
1093.Bd -literal
1094/* vm_page constants */
1095int pagesize;   /* size of a page (PAGE_SIZE): must be power of 2 */
1096int pagemask;   /* page mask */
1097int pageshift;  /* page shift */
1098
1099/* vm_page counters */
1100int npages;     /* number of pages we manage */
1101int free;       /* number of free pages */
1102int active;     /* number of active pages */
1103int inactive;   /* number of pages that we free'd but may want back */
1104int paging;     /* number of pages in the process of being paged out */
1105int wired;      /* number of wired pages */
1106int reserve_pagedaemon; /* number of pages reserved for pagedaemon */
1107int reserve_kernel; /* number of pages reserved for kernel */
1108
1109/* pageout params */
1110int freemin;    /* min number of free pages */
1111int freetarg;   /* target number of free pages */
1112int inactarg;   /* target number of inactive pages */
1113int wiredmax;   /* max number of wired pages */
1114
1115/* swap */
1116int nswapdev;   /* number of configured swap devices in system */
1117int swpages;    /* number of PAGE_SIZE'ed swap pages */
1118int swpginuse;  /* number of swap pages in use */
1119int nswget;     /* number of times fault calls uvm_swap_get() */
1120int nanon;      /* number total of anon's in system */
1121int nfreeanon;  /* number of free anon's */
1122
1123/* stat counters */
1124int faults;             /* page fault count */
1125int traps;              /* trap count */
1126int intrs;              /* interrupt count */
1127int swtch;              /* context switch count */
1128int softs;              /* software interrupt count */
1129int syscalls;           /* system calls */
1130int pageins;            /* pagein operation count */
1131                        /* pageouts are in pdpageouts below */
1132int swapins;            /* swapins */
1133int swapouts;           /* swapouts */
1134int pgswapin;           /* pages swapped in */
1135int pgswapout;          /* pages swapped out */
1136int forks;              /* forks */
1137int forks_ppwait;       /* forks where parent waits */
1138int forks_sharevm;      /* forks where vmspace is shared */
1139
1140/* fault subcounters */
1141int fltnoram;   /* number of times fault was out of ram */
1142int fltnoanon;  /* number of times fault was out of anons */
1143int fltpgwait;  /* number of times fault had to wait on a page */
1144int fltpgrele;  /* number of times fault found a released page */
1145int fltrelck;   /* number of times fault relock called */
1146int fltrelckok; /* number of times fault relock is a success */
1147int fltanget;   /* number of times fault gets anon page */
1148int fltanretry; /* number of times fault retrys an anon get */
1149int fltamcopy;  /* number of times fault clears "needs copy" */
1150int fltnamap;   /* number of times fault maps a neighbor anon page */
1151int fltnomap;   /* number of times fault maps a neighbor obj page */
1152int fltlget;    /* number of times fault does a locked pgo_get */
1153int fltget;     /* number of times fault does an unlocked get */
1154int flt_anon;   /* number of times fault anon (case 1a) */
1155int flt_acow;   /* number of times fault anon cow (case 1b) */
1156int flt_obj;    /* number of times fault is on object page (2a) */
1157int flt_prcopy; /* number of times fault promotes with copy (2b) */
1158int flt_przero; /* number of times fault promotes with zerofill (2b) */
1159
1160/* daemon counters */
1161int pdwoke;     /* number of times daemon woke up */
1162int pdrevs;     /* number of times daemon rev'd clock hand */
1163int pdswout;    /* number of times daemon called for swapout */
1164int pdfreed;    /* number of pages daemon freed since boot */
1165int pdscans;    /* number of pages daemon scanned since boot */
1166int pdanscan;   /* number of anonymous pages scanned by daemon */
1167int pdobscan;   /* number of object pages scanned by daemon */
1168int pdreact;    /* number of pages daemon reactivated since boot */
1169int pdbusy;     /* number of times daemon found a busy page */
1170int pdpageouts; /* number of times daemon started a pageout */
1171int pdpending;  /* number of times daemon got a pending pageout */
1172int pddeact;    /* number of pages daemon deactivates */
1173.Ed
1174.Sh NOTES
1175.Fn uvm_chgkprot
1176is only available if the kernel has been compiled with options
1177.Dv KGDB .
1178.Pp
1179All structure and types whose names begin with
1180.Dq vm_
1181will be renamed to
1182.Dq uvm_ .
1183.Sh SEE ALSO
1184.Xr swapctl 2 ,
1185.Xr getloadavg 3 ,
1186.Xr kvm 3 ,
1187.Xr sysctl 3 ,
1188.Xr ddb 4 ,
1189.Xr options 4 ,
1190.Xr memoryallocators 9 ,
1191.Xr pmap 9
1192.Sh HISTORY
1193UVM is a new VM system developed at Washington University in St. Louis
1194(Missouri).
1195UVM's roots lie partly in the Mach-based
1196.Bx 4.4
1197VM system, the
1198.Fx
1199VM system, and the SunOS 4 VM system.
1200UVM's basic structure is based on the
1201.Bx 4.4
1202VM system.
1203UVM's new anonymous memory system is based on the
1204anonymous memory system found in the SunOS 4 VM (as described in papers
1205published by Sun Microsystems, Inc.).
1206UVM also includes a number of features new to
1207.Bx
1208including page loanout, map entry passing, simplified
1209copy-on-write, and clustered anonymous memory pageout.
1210UVM is also further documented in an August 1998 dissertation by
1211Charles D. Cranor.
1212.Pp
1213UVM appeared in
1214.Nx 1.4 .
1215.Sh AUTHORS
1216Charles D. Cranor
1217.Aq chuck@ccrc.wustl.edu
1218designed and implemented UVM.
1219.Pp
1220Matthew Green
1221.Aq mrg@eterna.com.au
1222wrote the swap-space management code and handled the logistical issues
1223involved with merging UVM into the
1224.Nx
1225source tree.
1226.Pp
1227Chuck Silvers
1228.Aq chuq@chuq.com
1229implemented the aobj pager, thus allowing UVM to support System V shared
1230memory and process swapping.
1231He also designed and implemented the UBC part of UVM, which uses UVM pages
1232to cache vnode data rather than the traditional buffer cache buffers.
1233