xref: /netbsd-src/share/man/man9/uvm.9 (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1.\"	$NetBSD: uvm.9,v 1.89 2007/12/02 22:09:34 wiz Exp $
2.\"
3.\" Copyright (c) 1998 Matthew R. Green
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\" 3. The name of the author may not be used to endorse or promote products
15.\"    derived from this software without specific prior written permission.
16.\"
17.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27.\" SUCH DAMAGE.
28.\"
29.Dd October 15, 2007
30.Dt UVM 9
31.Os
32.Sh NAME
33.Nm uvm
34.Nd virtual memory system external interface
35.Sh SYNOPSIS
36.In sys/param.h
37.In uvm/uvm.h
38.Sh DESCRIPTION
39The UVM virtual memory system manages access to the computer's memory
40resources.
41User processes and the kernel access these resources through
42UVM's external interface.
43UVM's external interface includes functions that:
44.Pp
45.Bl -hyphen -compact
46.It
47initialize UVM sub-systems
48.It
49manage virtual address spaces
50.It
51resolve page faults
52.It
53memory map files and devices
54.It
55perform uio-based I/O to virtual memory
56.It
57allocate and free kernel virtual memory
58.It
59allocate and free physical memory
60.El
61.Pp
62In addition to exporting these services, UVM has two kernel-level processes:
63pagedaemon and swapper.
64The pagedaemon process sleeps until physical memory becomes scarce.
65When that happens, pagedaemon is awoken.
66It scans physical memory, paging out and freeing memory that has not
67been recently used.
68The swapper process swaps in runnable processes that are currently swapped
69out, if there is room.
70.Pp
71There are also several miscellaneous functions.
72.Sh INITIALIZATION
73.Ft void
74.br
75.Fn uvm_init "void" ;
76.Pp
77.Ft void
78.br
79.Fn uvm_init_limits "struct lwp *l" ;
80.Pp
81.Ft void
82.br
83.Fn uvm_setpagesize "void" ;
84.Pp
85.Ft void
86.br
87.Fn uvm_swap_init "void" ;
88.Pp
89.Fn uvm_init
90sets up the UVM system at system boot time, after the
91console has been setup.
92It initializes global state, the page, map, kernel virtual memory state,
93machine-dependent physical map, kernel memory allocator,
94pager and anonymous memory sub-systems, and then enables
95paging of kernel objects.
96.Pp
97.Fn uvm_init_limits
98initializes process limits for the named process.
99This is for use by the system startup for process zero, before any
100other processes are created.
101.Pp
102.Fn uvm_setpagesize
103initializes the uvmexp members pagesize (if not already done by
104machine-dependent code), pageshift and pagemask.
105It should be called by machine-dependent code early in the
106.Fn pmap_init
107call (see
108.Xr pmap 9 ) .
109.Pp
110.Fn uvm_swap_init
111initializes the swap sub-system.
112.Sh VIRTUAL ADDRESS SPACE MANAGEMENT
113.Ft int
114.br
115.Fn uvm_map "struct vm_map *map" "vaddr_t *startp" "vsize_t size" "struct uvm_object *uobj" "voff_t uoffset" "vsize_t align" "uvm_flag_t flags" ;
116.Pp
117.Ft void
118.br
119.Fn uvm_unmap "struct vm_map *map" "vaddr_t start" "vaddr_t end" ;
120.Pp
121.Ft int
122.br
123.Fn uvm_map_pageable "struct vm_map *map" "vaddr_t start" "vaddr_t end" "bool new_pageable" "int lockflags" ;
124.Pp
125.Ft bool
126.br
127.Fn uvm_map_checkprot "struct vm_map *map" "vaddr_t start" "vaddr_t end" "vm_prot_t protection" ;
128.Pp
129.Ft int
130.br
131.Fn uvm_map_protect "struct vm_map *map" "vaddr_t start" "vaddr_t end" "vm_prot_t new_prot" "bool set_max" ;
132.Pp
133.Ft int
134.br
135.Fn uvm_deallocate "struct vm_map *map" "vaddr_t start" "vsize_t size" ;
136.Pp
137.Ft struct vmspace *
138.br
139.Fn uvmspace_alloc "vaddr_t min" "vaddr_t max" "int pageable" ;
140.Pp
141.Ft void
142.br
143.Fn uvmspace_exec "struct lwp *l" "vaddr_t start" "vaddr_t end" ;
144.Pp
145.Ft struct vmspace *
146.br
147.Fn uvmspace_fork "struct vmspace *vm" ;
148.Pp
149.Ft void
150.br
151.Fn uvmspace_free "struct vmspace *vm1" ;
152.Pp
153.Ft void
154.br
155.Fn uvmspace_share "struct proc *p1" "struct proc *p2" ;
156.Pp
157.Ft void
158.br
159.Fn uvmspace_unshare "struct lwp *l" ;
160.Pp
161.Ft bool
162.br
163.Fn uvm_uarea_alloc "vaddr_t *uaddrp" ;
164.Pp
165.Ft void
166.br
167.Fn uvm_uarea_free "vaddr_t uaddr" ;
168.Pp
169.Fn uvm_map
170establishes a valid mapping in map
171.Fa map ,
172which must be unlocked.
173The new mapping has size
174.Fa size ,
175which must be a multiple of
176.Dv PAGE_SIZE .
177The
178.Fa uobj
179and
180.Fa uoffset
181arguments can have four meanings.
182When
183.Fa uobj
184is
185.Dv NULL
186and
187.Fa uoffset
188is
189.Dv UVM_UNKNOWN_OFFSET ,
190.Fn uvm_map
191does not use the machine-dependent
192.Dv PMAP_PREFER
193function.
194If
195.Fa uoffset
196is any other value, it is used as the hint to
197.Dv PMAP_PREFER .
198When
199.Fa uobj
200is not
201.Dv NULL
202and
203.Fa uoffset
204is
205.Dv UVM_UNKNOWN_OFFSET ,
206.Fn uvm_map
207finds the offset based upon the virtual address, passed as
208.Fa startp .
209If
210.Fa uoffset
211is any other value, we are doing a normal mapping at this offset.
212The start address of the map will be returned in
213.Fa startp .
214.Pp
215.Fa align
216specifies alignment of mapping unless
217.Dv UVM_FLAG_FIXED
218is specified in
219.Fa flags .
220.Fa align
221must be a power of 2.
222.Pp
223.Fa flags
224passed to
225.Fn uvm_map
226are typically created using the
227.Fn UVM_MAPFLAG "vm_prot_t prot" "vm_prot_t maxprot" "vm_inherit_t inh" "int advice" "int flags"
228macro, which uses the following values.
229The
230.Fa prot
231and
232.Fa maxprot
233can take are:
234.Bd -literal
235#define UVM_PROT_MASK   0x07    /* protection mask */
236#define UVM_PROT_NONE   0x00    /* protection none */
237#define UVM_PROT_ALL    0x07    /* everything */
238#define UVM_PROT_READ   0x01    /* read */
239#define UVM_PROT_WRITE  0x02    /* write */
240#define UVM_PROT_EXEC   0x04    /* exec */
241#define UVM_PROT_R      0x01    /* read */
242#define UVM_PROT_W      0x02    /* write */
243#define UVM_PROT_RW     0x03    /* read-write */
244#define UVM_PROT_X      0x04    /* exec */
245#define UVM_PROT_RX     0x05    /* read-exec */
246#define UVM_PROT_WX     0x06    /* write-exec */
247#define UVM_PROT_RWX    0x07    /* read-write-exec */
248.Ed
249.Pp
250The values that
251.Fa inh
252can take are:
253.Bd -literal
254#define UVM_INH_MASK    0x30    /* inherit mask */
255#define UVM_INH_SHARE   0x00    /* "share" */
256#define UVM_INH_COPY    0x10    /* "copy" */
257#define UVM_INH_NONE    0x20    /* "none" */
258#define UVM_INH_DONATE  0x30    /* "donate" \*[Lt]\*[Lt] not used */
259.Ed
260.Pp
261The values that
262.Fa advice
263can take are:
264.Bd -literal
265#define UVM_ADV_NORMAL     0x0  /* 'normal' */
266#define UVM_ADV_RANDOM     0x1  /* 'random' */
267#define UVM_ADV_SEQUENTIAL 0x2  /* 'sequential' */
268#define UVM_ADV_MASK       0x7  /* mask */
269.Ed
270.Pp
271The values that
272.Fa flags
273can take are:
274.Bd -literal
275#define UVM_FLAG_FIXED   0x010000 /* find space */
276#define UVM_FLAG_OVERLAY 0x020000 /* establish overlay */
277#define UVM_FLAG_NOMERGE 0x040000 /* don't merge map entries */
278#define UVM_FLAG_COPYONW 0x080000 /* set copy_on_write flag */
279#define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */
280#define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */
281.Ed
282.Pp
283The
284.Dv UVM_MAPFLAG
285macro arguments can be combined with an or operator.
286There are several special purpose macros for checking protection
287combinations, e.g., the
288.Dv UVM_PROT_WX
289macro.
290There are also some additional macros to extract bits from the flags.
291The
292.Dv UVM_PROTECTION ,
293.Dv UVM_INHERIT ,
294.Dv UVM_MAXPROTECTION
295and
296.Dv UVM_ADVICE
297macros return the protection, inheritance, maximum protection and advice,
298respectively.
299.Fn uvm_map
300returns a standard UVM return value.
301.Pp
302.Fn uvm_unmap
303removes a valid mapping,
304from
305.Fa start
306to
307.Fa end ,
308in map
309.Fa map ,
310which must be unlocked.
311.Pp
312.Fn uvm_map_pageable
313changes the pageability of the pages in the range from
314.Fa start
315to
316.Fa end
317in map
318.Fa map
319to
320.Fa new_pageable .
321.Fn uvm_map_pageable
322returns a standard UVM return value.
323.Pp
324.Fn uvm_map_checkprot
325checks the protection of the range from
326.Fa start
327to
328.Fa end
329in map
330.Fa map
331against
332.Fa protection .
333This returns either
334.Dv true
335or
336.Dv false .
337.Pp
338.Fn uvm_map_protect
339changes the protection
340.Fa start
341to
342.Fa end
343in map
344.Fa map
345to
346.Fa new_prot ,
347also setting the maximum protection to the region to
348.Fa new_prot
349if
350.Fa set_max
351is non-zero.
352This function returns a standard UVM return value.
353.Pp
354.Fn uvm_deallocate
355deallocates kernel memory in map
356.Fa map
357from address
358.Fa start
359to
360.Fa start + size .
361.Pp
362.Fn uvmspace_alloc
363allocates and returns a new address space, with ranges from
364.Fa min
365to
366.Fa max ,
367setting the pageability of the address space to
368.Fa pageable .
369.Pp
370.Fn uvmspace_exec
371either reuses the address space of lwp
372.Fa l
373if there are no other references to it, or creates
374a new one with
375.Fn uvmspace_alloc .
376The range of valid addresses in the address space is reset to
377.Fa start
378through
379.Fa end .
380.Pp
381.Fn uvmspace_fork
382creates and returns a new address space based upon the
383.Fa vm1
384address space, typically used when allocating an address space for a
385child process.
386.Pp
387.Fn uvmspace_free
388lowers the reference count on the address space
389.Fa vm ,
390freeing the data structures if there are no other references.
391.Pp
392.Fn uvmspace_share
393causes process
394.Pa p2
395to share the address space of
396.Fa p1 .
397.Pp
398.Fn uvmspace_unshare
399ensures that lwp
400.Fa l
401has its own, unshared address space, by creating a new one if
402necessary by calling
403.Fn uvmspace_fork .
404.Pp
405.Fn uvm_uarea_alloc
406allocates virtual space for a u-area (i.e., a kernel stack) and stores
407its virtual address in
408.Fa *uaddrp .
409The return value is
410.Dv true
411if the u-area is already backed by wired physical memory, otherwise
412.Dv false .
413.Pp
414.Fn uvm_uarea_free
415frees a u-area allocated with
416.Fn uvm_uarea_alloc ,
417freeing both the virtual space and any physical pages which may have been
418allocated to back that virtual space later.
419.Sh PAGE FAULT HANDLING
420.Ft int
421.br
422.Fn uvm_fault "struct vm_map *orig_map" "vaddr_t vaddr" "vm_prot_t access_type" ;
423.Pp
424.Fn uvm_fault
425is the main entry point for faults.
426It takes
427.Fa orig_map
428as the map the fault originated in, a
429.Fa vaddr
430offset into the map the fault occurred, and
431.Fa access_type
432describing the type of access requested.
433.Fn uvm_fault
434returns a standard UVM return value.
435.Sh MEMORY MAPPING FILES AND DEVICES
436.Ft void
437.br
438.Fn uvm_vnp_setsize "struct vnode *vp" "voff_t newsize" ;
439.Pp
440.Ft void *
441.br
442.Fn ubc_alloc "struct uvm_object *uobj" "voff_t offset" "vsize_t *lenp" \
443"int advice" "int flags" ;
444.Pp
445.Ft void
446.br
447.Fn ubc_release "void *va" "int flags" ;
448.Pp
449int
450.br
451.Fn ubc_uiomove "struct uvm_object *uobj" "struct uio *uio" "vsize_t todo" \
452"int advice" "int flags" ;
453.Pp
454.Fn uvm_vnp_setsize
455sets the size of vnode
456.Fa vp
457to
458.Fa newsize .
459Caller must hold a reference to the vnode.
460If the vnode shrinks, pages no longer used are discarded.
461.Pp
462.Fn ubc_alloc
463creates a kernel mapping of
464.Fa uobj
465starting at offset
466.Fa offset .
467The desired length of the mapping is pointed to by
468.Fa lenp ,
469but the actual mapping may be smaller than this.
470.Fa lenp
471is updated to contain the actual length mapped.
472.Fa advice
473is the access pattern hint, which must be one of
474.Pp
475.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
476.It UVM_ADV_NORMAL
477No hint
478.It UVM_ADV_RANDOM
479Random access hint
480.It UVM_ADV_SEQUENTIAL
481Sequential access hint (from lower offset to higher offset)
482.El
483.Pp
484The possible
485.Fa flags
486are
487.Pp
488.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
489.It UBC_READ
490Mapping will be accessed for read.
491.It UBC_WRITE
492Mapping will be accessed for write.
493.It UBC_FAULTBUSY
494Fault in window's pages already during mapping operation.
495Makes sense only for write.
496.El
497.Pp
498Currently,
499.Fa uobj
500must actually be a vnode object.
501Once the mapping is created, it must be accessed only by methods that can
502handle faults, such as
503.Fn uiomove
504or
505.Fn kcopy .
506Page faults on the mapping will result in the vnode's
507.Fn VOP_GETPAGES
508method being called to resolve the fault.
509.Pp
510.Fn ubc_release
511frees the mapping at
512.Fa va
513for reuse.
514The mapping may be cached to speed future accesses to the same region
515of the object.
516The flags can be any of
517.Pp
518.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
519.It UBC_UNMAP
520Do not cache mapping.
521.El
522.Pp
523.Fn ubc_uiomove
524allocates an UBC memory window, performs I/O on it and unmaps the window.
525The
526.Fa advice
527parameter takes the same values as the respective parameter in
528.Fn ubc_alloc
529and the
530.Fa flags
531parameter takes the same arguments as
532.Fn ubc_alloc
533and
534.Fn ubc_unmap .
535Additionally, the flag
536.Dv UBC_PARTIALOK
537can be provided to indicate that it is acceptable to return if an error
538occurs mid-transfer.
539.Sh VIRTUAL MEMORY I/O
540.Ft int
541.br
542.Fn uvm_io "struct vm_map *map" "struct uio *uio" ;
543.Pp
544.Fn uvm_io
545performs the I/O described in
546.Fa uio
547on the memory described in
548.Fa map .
549.Sh ALLOCATION OF KERNEL MEMORY
550.Ft vaddr_t
551.br
552.Fn uvm_km_alloc "struct vm_map *map" "vsize_t size" "vsize_t align" "uvm_flag_t flags" ;
553.Pp
554.Ft void
555.br
556.Fn uvm_km_free "struct vm_map *map" "vaddr_t addr" "vsize_t size" "uvm_flag_t flags" ;
557.Pp
558.Ft struct vm_map *
559.br
560.Fn uvm_km_suballoc "struct vm_map *map" "vaddr_t *min" "vaddr_t *max" "vsize_t size" "bool pageable" "bool fixed" "struct vm_map *submap" ;
561.Pp
562.Fn uvm_km_alloc
563allocates
564.Fa size
565bytes of kernel memory in map
566.Fa map .
567The first address of the allocated memory range will be aligned according to the
568.Fa align
569argument
570.Pq specify 0 if no alignment is necessary .
571The alignment must be a multiple of page size.
572The
573.Fa flags
574is a bitwise inclusive OR of the allocation type and operation flags.
575.Pp
576The allocation type should be one of:
577.Bl -tag -width UVM_KMF_PAGEABLE
578.It UVM_KMF_WIRED
579Wired memory.
580.It UVM_KMF_PAGEABLE
581Demand-paged zero-filled memory.
582.It UVM_KMF_VAONLY
583Virtual address only.
584No physical pages are mapped in the allocated region.
585If necessary, it's the caller's responsibility to enter page mappings.
586It's also the caller's responsibility to clean up the mappings before freeing
587the address range.
588.El
589.Pp
590The following operation flags are available:
591.Bl -tag -width UVM_KMF_PAGEABLE
592.It UVM_KMF_CANFAIL
593Can fail even if
594.Dv UVM_KMF_NOWAIT
595is not specified and
596.Dv UVM_KMF_WAITVA
597is specified.
598.It UVM_KMF_ZERO
599Request zero-filled memory.
600Only supported for
601.Dv UVM_KMF_WIRED .
602Shouldn't be used with other types.
603.It UVM_KMF_TRYLOCK
604Fail if we can't lock the map.
605.It UVM_KMF_NOWAIT
606Fail immediately if no memory is available.
607.It UVM_KMF_WAITVA
608Sleep to wait for the virtual address resources if needed.
609.El
610.Pp
611(If neither
612.Dv UVM_KMF_NOWAIT
613nor
614.Dv UVM_KMF_CANFAIL
615are specified and
616.Dv UVM_KMF_WAITVA
617is specified,
618.Fn uvm_km_alloc
619will never fail, but rather sleep indefinitely until the allocation succeeds.)
620.Pp
621Pageability of the pages allocated with
622.Dv UVM_KMF_PAGEABLE
623can be changed by
624.Fn uvm_map_pageable .
625In that case, the entire range must be changed atomically.
626Changing a part of the range is not supported.
627.Pp
628.Fn uvm_km_free
629frees the memory range allocated by
630.Fn uvm_km_alloc .
631.Fa addr
632must be an address returned by
633.Fn uvm_km_alloc .
634.Fa map
635and
636.Fa size
637must be the same as the ones used for the corresponding
638.Fn uvm_km_alloc .
639.Fa flags
640must be the allocation type used for the corresponding
641.Fn uvm_km_alloc .
642.Pp
643.Fn uvm_km_free
644is the only way to free memory ranges allocated by
645.Fn uvm_km_alloc .
646.Fn uvm_unmap
647must not be used.
648.Pp
649.Fn uvm_km_suballoc
650allocates submap from
651.Fa map ,
652creating a new map if
653.Fa submap
654is
655.Dv NULL .
656The addresses of the submap can be specified exactly by setting the
657.Fa fixed
658argument to non-zero, which causes the
659.Fa min
660argument to specify the beginning of the address in the submap.
661If
662.Fa fixed
663is zero, any address of size
664.Fa size
665will be allocated from
666.Fa map
667and the start and end addresses returned in
668.Fa min
669and
670.Fa max .
671If
672.Fa pageable
673is non-zero, entries in the map may be paged out.
674.Sh ALLOCATION OF PHYSICAL MEMORY
675.Ft struct vm_page *
676.br
677.Fn uvm_pagealloc "struct uvm_object *uobj" "voff_t off" "struct vm_anon *anon" "int flags" ;
678.Pp
679.Ft void
680.br
681.Fn uvm_pagerealloc "struct vm_page *pg" "struct uvm_object *newobj" "voff_t newoff" ;
682.Pp
683.Ft void
684.br
685.Fn uvm_pagefree "struct vm_page *pg" ;
686.Pp
687.Ft int
688.br
689.Fn uvm_pglistalloc "psize_t size" "paddr_t low" "paddr_t high" "paddr_t alignment" "paddr_t boundary" "struct pglist *rlist" "int nsegs" "int waitok" ;
690.Pp
691.Ft void
692.br
693.Fn uvm_pglistfree "struct pglist *list" ;
694.Pp
695.Ft void
696.br
697.Fn uvm_page_physload "vaddr_t start" "vaddr_t end" "vaddr_t avail_start" "vaddr_t avail_end" "int free_list" ;
698.Pp
699.Fn uvm_pagealloc
700allocates a page of memory at virtual address
701.Fa off
702in either the object
703.Fa uobj
704or the anonymous memory
705.Fa anon ,
706which must be locked by the caller.
707Only one of
708.Fa uobj
709and
710.Fa anon
711can be non
712.Dv NULL .
713Returns
714.Dv NULL
715when no page can be found.
716The flags can be any of
717.Bd -literal
718#define UVM_PGA_USERESERVE      0x0001  /* ok to use reserve pages */
719#define UVM_PGA_ZERO            0x0002  /* returned page must be zero'd */
720.Ed
721.Pp
722.Dv UVM_PGA_USERESERVE
723means to allocate a page even if that will result in the number of free pages
724being lower than
725.Dv uvmexp.reserve_pagedaemon
726(if the current thread is the pagedaemon) or
727.Dv uvmexp.reserve_kernel
728(if the current thread is not the pagedaemon).
729.Dv UVM_PGA_ZERO
730causes the returned page to be filled with zeroes, either by allocating it
731from a pool of pre-zeroed pages or by zeroing it in-line as necessary.
732.Pp
733.Fn uvm_pagerealloc
734reallocates page
735.Fa pg
736to a new object
737.Fa newobj ,
738at a new offset
739.Fa newoff .
740.Pp
741.Fn uvm_pagefree
742frees the physical page
743.Fa pg .
744If the content of the page is known to be zero-filled,
745caller should set
746.Dv PG_ZERO
747in pg-\*[Gt]flags so that the page allocator will use
748the page to serve future
749.Dv UVM_PGA_ZERO
750requests efficiently.
751.Pp
752.Fn uvm_pglistalloc
753allocates a list of pages for size
754.Fa size
755byte under various constraints.
756.Fa low
757and
758.Fa high
759describe the lowest and highest addresses acceptable for the list.
760If
761.Fa alignment
762is non-zero, it describes the required alignment of the list, in
763power-of-two notation.
764If
765.Fa boundary
766is non-zero, no segment of the list may cross this power-of-two
767boundary, relative to zero.
768.Fa nsegs
769is the maximum number of physically contiguous segments.
770If
771.Fa waitok
772is non-zero, the function may sleep until enough memory is available.
773(It also may give up in some situations, so a non-zero
774.Fa waitok
775does not imply that
776.Fn uvm_pglistalloc
777cannot return an error.)
778The allocated memory is returned in the
779.Fa rlist
780list; the caller has to provide storage only, the list is initialized by
781.Fn uvm_pglistalloc .
782.Pp
783.Fn uvm_pglistfree
784frees the list of pages pointed to by
785.Fa list .
786If the content of the page is known to be zero-filled,
787caller should set
788.Dv PG_ZERO
789in pg-\*[Gt]flags so that the page allocator will use
790the page to serve future
791.Dv UVM_PGA_ZERO
792requests efficiently.
793.Pp
794.Fn uvm_page_physload
795loads physical memory segments into VM space on the specified
796.Fa free_list .
797It must be called at system boot time to set up physical memory
798management pages.
799The arguments describe the
800.Fa start
801and
802.Fa end
803of the physical addresses of the segment, and the available start and end
804addresses of pages not already in use.
805.\" XXX expand on "system boot time"!
806.Sh PROCESSES
807.Ft void
808.br
809.Fn uvm_pageout "void" ;
810.Pp
811.Ft void
812.br
813.Fn uvm_scheduler "void" ;
814.Pp
815.Ft void
816.br
817.Fn uvm_swapin "struct lwp *l" ;
818.Pp
819.Fn uvm_pageout
820is the main loop for the page daemon.
821.Pp
822.Fn uvm_scheduler
823is the process zero main loop, which is to be called after the
824system has finished starting other processes.
825It handles the swapping in of runnable, swapped out processes in priority
826order.
827.Pp
828.Fn uvm_swapin
829swaps in the named lwp.
830.Sh PAGE LOAN
831.Ft int
832.br
833.Fn uvm_loan "struct vm_map *map" "vaddr_t start" "vsize_t len" "void *v" "int flags" ;
834.Pp
835.Ft void
836.br
837.Fn uvm_unloan "void *v" "int npages" "int flags" ;
838.Pp
839.Fn uvm_loan
840loans pages in a map out to anons or to the kernel.
841.Fa map
842should be unlocked,
843.Fa start
844and
845.Fa len
846should be multiples of
847.Dv PAGE_SIZE .
848Argument
849.Fa flags
850should be one of
851.Bd -literal
852#define UVM_LOAN_TOANON       0x01    /* loan to anons */
853#define UVM_LOAN_TOPAGE       0x02    /* loan to kernel */
854.Ed
855.Pp
856.Fa v
857should be pointer to array of pointers to
858.Li struct anon
859or
860.Li struct vm_page ,
861as appropriate.
862The caller has to allocate memory for the array and
863ensure it's big enough to hold
864.Fa len / PAGE_SIZE
865pointers.
866Returns 0 for success, or appropriate error number otherwise.
867Note that wired pages can't be loaned out and
868.Fn uvm_loan
869will fail in that case.
870.Pp
871.Fn uvm_unloan
872kills loans on pages or anons.
873The
874.Fa v
875must point to the array of pointers initialized by previous call to
876.Fn uvm_loan .
877.Fa npages
878should match number of pages allocated for loan, this also matches
879number of items in the array.
880Argument
881.Fa flags
882should be one of
883.Bd -literal
884#define UVM_LOAN_TOANON       0x01    /* loan to anons */
885#define UVM_LOAN_TOPAGE       0x02    /* loan to kernel */
886.Ed
887.Pp
888and should match what was used for previous call to
889.Fn uvm_loan .
890.Sh MISCELLANEOUS FUNCTIONS
891.Ft struct uvm_object *
892.br
893.Fn uao_create "vsize_t size" "int flags" ;
894.Pp
895.Ft void
896.br
897.Fn uao_detach "struct uvm_object *uobj" ;
898.Pp
899.Ft void
900.br
901.Fn uao_reference "struct uvm_object *uobj" ;
902.Pp
903.Ft bool
904.br
905.Fn uvm_chgkprot "void *addr" "size_t len" "int rw" ;
906.Pp
907.Ft void
908.br
909.Fn uvm_kernacc "void *addr" "size_t len" "int rw" ;
910.Pp
911.Ft int
912.br
913.Fn uvm_vslock "struct vmspace *vs" "void *addr" "size_t len" "vm_prot_t prot" ;
914.Pp
915.Ft void
916.br
917.Fn uvm_vsunlock "struct vmspace *vs" "void *addr" "size_t len" ;
918.Pp
919.Ft void
920.br
921.Fn uvm_meter "void" ;
922.Pp
923.Ft void
924.br
925.Fn uvm_fork "struct lwp *l1" "struct lwp *l2" "bool shared" ;
926.Pp
927.Ft int
928.br
929.Fn uvm_grow "struct proc *p" "vaddr_t sp" ;
930.Pp
931.Ft void
932.br
933.Fn uvn_findpages "struct uvm_object *uobj" "voff_t offset" "int *npagesp" "struct vm_page **pps" "int flags" ;
934.Pp
935.Ft void
936.br
937.Fn uvm_swap_stats "int cmd" "struct swapent *sep" "int sec" "register_t *retval" ;
938.Pp
939The
940.Fn uao_create ,
941.Fn uao_detach ,
942and
943.Fn uao_reference
944functions operate on anonymous memory objects, such as those used to support
945System V shared memory.
946.Fn uao_create
947returns an object of size
948.Fa size
949with flags:
950.Bd -literal
951#define UAO_FLAG_KERNOBJ        0x1     /* create kernel object */
952#define UAO_FLAG_KERNSWAP       0x2     /* enable kernel swap */
953.Ed
954.Pp
955which can only be used once each at system boot time.
956.Fn uao_reference
957creates an additional reference to the named anonymous memory object.
958.Fn uao_detach
959removes a reference from the named anonymous memory object, destroying
960it if removing the last reference.
961.Pp
962.Fn uvm_chgkprot
963changes the protection of kernel memory from
964.Fa addr
965to
966.Fa addr + len
967to the value of
968.Fa rw .
969This is primarily useful for debuggers, for setting breakpoints.
970This function is only available with options
971.Dv KGDB .
972.Pp
973.Fn uvm_kernacc
974checks the access at address
975.Fa addr
976to
977.Fa addr + len
978for
979.Fa rw
980access in the kernel address space.
981.Pp
982.Fn uvm_vslock
983and
984.Fn uvm_vsunlock
985control the wiring and unwiring of pages for process
986.Fa p
987from
988.Fa addr
989to
990.Fa addr + len .
991These functions are normally used to wire memory for I/O.
992.Pp
993.Fn uvm_meter
994calculates the load average and wakes up the swapper if necessary.
995.Pp
996.Fn uvm_fork
997forks a virtual address space for process' (old)
998.Fa p1
999and (new)
1000.Fa p2 .
1001If the
1002.Fa shared
1003argument is non zero, p1 shares its address space with p2,
1004otherwise a new address space is created.
1005This function currently has no return value, and thus cannot fail.
1006In the future, this function will be changed to allow it to
1007fail in low memory conditions.
1008.Pp
1009.Fn uvm_grow
1010increases the stack segment of process
1011.Fa p
1012to include
1013.Fa sp .
1014.Pp
1015.Fn uvn_findpages
1016looks up or creates pages in
1017.Fa uobj
1018at offset
1019.Fa offset ,
1020marks them busy and returns them in the
1021.Fa pps
1022array.
1023Currently
1024.Fa uobj
1025must be a vnode object.
1026The number of pages requested is pointed to by
1027.Fa npagesp ,
1028and this value is updated with the actual number of pages returned.
1029The flags can be
1030.Bd -literal
1031#define UFP_ALL         0x00    /* return all pages requested */
1032#define UFP_NOWAIT      0x01    /* don't sleep */
1033#define UFP_NOALLOC     0x02    /* don't allocate new pages */
1034#define UFP_NOCACHE     0x04    /* don't return pages which already exist */
1035#define UFP_NORDONLY    0x08    /* don't return PG_READONLY pages */
1036.Ed
1037.Pp
1038.Dv UFP_ALL
1039is a pseudo-flag meaning all requested pages should be returned.
1040.Dv UFP_NOWAIT
1041means that we must not sleep.
1042.Dv UFP_NOALLOC
1043causes any pages which do not already exist to be skipped.
1044.Dv UFP_NOCACHE
1045causes any pages which do already exist to be skipped.
1046.Dv UFP_NORDONLY
1047causes any pages which are marked PG_READONLY to be skipped.
1048.Pp
1049.Fn uvm_swap_stats
1050implements the
1051.Dv SWAP_STATS
1052and
1053.Dv SWAP_OSTATS
1054operation of the
1055.Xr swapctl 2
1056system call.
1057.Fa cmd
1058is the requested command,
1059.Dv SWAP_STATS
1060or
1061.Dv SWAP_OSTATS .
1062The function will copy no more than
1063.Fa sec
1064entries in the array pointed by
1065.Fa sep .
1066On return,
1067.Fa retval
1068holds the actual number of entries copied in the array.
1069.Sh SYSCTL
1070UVM provides support for the
1071.Dv CTL_VM
1072domain of the
1073.Xr sysctl 3
1074hierarchy.
1075It handles the
1076.Dv VM_LOADAVG ,
1077.Dv VM_METER ,
1078.Dv VM_UVMEXP ,
1079and
1080.Dv VM_UVMEXP2
1081nodes, which return the current load averages, calculates current VM
1082totals, returns the uvmexp structure, and a kernel version independent
1083view of the uvmexp structure, respectively.
1084It also exports a number of tunables that control how much VM space is
1085allowed to be consumed by various tasks.
1086The load averages are typically accessed from userland using the
1087.Xr getloadavg 3
1088function.
1089The uvmexp structure has all global state of the UVM system,
1090and has the following members:
1091.Bd -literal
1092/* vm_page constants */
1093int pagesize;   /* size of a page (PAGE_SIZE): must be power of 2 */
1094int pagemask;   /* page mask */
1095int pageshift;  /* page shift */
1096
1097/* vm_page counters */
1098int npages;     /* number of pages we manage */
1099int free;       /* number of free pages */
1100int active;     /* number of active pages */
1101int inactive;   /* number of pages that we free'd but may want back */
1102int paging;     /* number of pages in the process of being paged out */
1103int wired;      /* number of wired pages */
1104int reserve_pagedaemon; /* number of pages reserved for pagedaemon */
1105int reserve_kernel; /* number of pages reserved for kernel */
1106
1107/* pageout params */
1108int freemin;    /* min number of free pages */
1109int freetarg;   /* target number of free pages */
1110int inactarg;   /* target number of inactive pages */
1111int wiredmax;   /* max number of wired pages */
1112
1113/* swap */
1114int nswapdev;   /* number of configured swap devices in system */
1115int swpages;    /* number of PAGE_SIZE'ed swap pages */
1116int swpginuse;  /* number of swap pages in use */
1117int nswget;     /* number of times fault calls uvm_swap_get() */
1118int nanon;      /* number total of anon's in system */
1119int nfreeanon;  /* number of free anon's */
1120
1121/* stat counters */
1122int faults;             /* page fault count */
1123int traps;              /* trap count */
1124int intrs;              /* interrupt count */
1125int swtch;              /* context switch count */
1126int softs;              /* software interrupt count */
1127int syscalls;           /* system calls */
1128int pageins;            /* pagein operation count */
1129                        /* pageouts are in pdpageouts below */
1130int swapins;            /* swapins */
1131int swapouts;           /* swapouts */
1132int pgswapin;           /* pages swapped in */
1133int pgswapout;          /* pages swapped out */
1134int forks;              /* forks */
1135int forks_ppwait;       /* forks where parent waits */
1136int forks_sharevm;      /* forks where vmspace is shared */
1137
1138/* fault subcounters */
1139int fltnoram;   /* number of times fault was out of ram */
1140int fltnoanon;  /* number of times fault was out of anons */
1141int fltpgwait;  /* number of times fault had to wait on a page */
1142int fltpgrele;  /* number of times fault found a released page */
1143int fltrelck;   /* number of times fault relock called */
1144int fltrelckok; /* number of times fault relock is a success */
1145int fltanget;   /* number of times fault gets anon page */
1146int fltanretry; /* number of times fault retrys an anon get */
1147int fltamcopy;  /* number of times fault clears "needs copy" */
1148int fltnamap;   /* number of times fault maps a neighbor anon page */
1149int fltnomap;   /* number of times fault maps a neighbor obj page */
1150int fltlget;    /* number of times fault does a locked pgo_get */
1151int fltget;     /* number of times fault does an unlocked get */
1152int flt_anon;   /* number of times fault anon (case 1a) */
1153int flt_acow;   /* number of times fault anon cow (case 1b) */
1154int flt_obj;    /* number of times fault is on object page (2a) */
1155int flt_prcopy; /* number of times fault promotes with copy (2b) */
1156int flt_przero; /* number of times fault promotes with zerofill (2b) */
1157
1158/* daemon counters */
1159int pdwoke;     /* number of times daemon woke up */
1160int pdrevs;     /* number of times daemon rev'd clock hand */
1161int pdswout;    /* number of times daemon called for swapout */
1162int pdfreed;    /* number of pages daemon freed since boot */
1163int pdscans;    /* number of pages daemon scanned since boot */
1164int pdanscan;   /* number of anonymous pages scanned by daemon */
1165int pdobscan;   /* number of object pages scanned by daemon */
1166int pdreact;    /* number of pages daemon reactivated since boot */
1167int pdbusy;     /* number of times daemon found a busy page */
1168int pdpageouts; /* number of times daemon started a pageout */
1169int pdpending;  /* number of times daemon got a pending pageout */
1170int pddeact;    /* number of pages daemon deactivates */
1171.Ed
1172.Sh NOTES
1173.Fn uvm_chgkprot
1174is only available if the kernel has been compiled with options
1175.Dv KGDB .
1176.Pp
1177All structure and types whose names begin with
1178.Dq vm_
1179will be renamed to
1180.Dq uvm_ .
1181.Sh SEE ALSO
1182.Xr swapctl 2 ,
1183.Xr getloadavg 3 ,
1184.Xr kvm 3 ,
1185.Xr sysctl 3 ,
1186.Xr ddb 4 ,
1187.Xr options 4 ,
1188.Xr memoryallocators 9 ,
1189.Xr pmap 9
1190.Sh HISTORY
1191UVM is a new VM system developed at Washington University in St. Louis
1192(Missouri).
1193UVM's roots lie partly in the Mach-based
1194.Bx 4.4
1195VM system, the
1196.Fx
1197VM system, and the SunOS 4 VM system.
1198UVM's basic structure is based on the
1199.Bx 4.4
1200VM system.
1201UVM's new anonymous memory system is based on the
1202anonymous memory system found in the SunOS 4 VM (as described in papers
1203published by Sun Microsystems, Inc.).
1204UVM also includes a number of features new to
1205.Bx
1206including page loanout, map entry passing, simplified
1207copy-on-write, and clustered anonymous memory pageout.
1208UVM is also further documented in an August 1998 dissertation by
1209Charles D. Cranor.
1210.Pp
1211UVM appeared in
1212.Nx 1.4 .
1213.Sh AUTHORS
1214Charles D. Cranor
1215.Aq chuck@ccrc.wustl.edu
1216designed and implemented UVM.
1217.Pp
1218Matthew Green
1219.Aq mrg@eterna.com.au
1220wrote the swap-space management code and handled the logistical issues
1221involved with merging UVM into the
1222.Nx
1223source tree.
1224.Pp
1225Chuck Silvers
1226.Aq chuq@chuq.com
1227implemented the aobj pager, thus allowing UVM to support System V shared
1228memory and process swapping.
1229He also designed and implemented the UBC part of UVM, which uses UVM pages
1230to cache vnode data rather than the traditional buffer cache buffers.
1231