xref: /netbsd-src/share/man/man9/kmem.9 (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1.\"	$NetBSD: kmem.9,v 1.23 2017/11/07 18:36:27 christos Exp $
2.\"
3.\" Copyright (c)2006 YAMAMOTO Takashi,
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\"
15.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25.\" SUCH DAMAGE.
26.\"
27.\" ------------------------------------------------------------
28.Dd November 7, 2017
29.Dt KMEM 9
30.Os
31.\" ------------------------------------------------------------
32.Sh NAME
33.Nm kmem
34.Nd kernel wired memory allocator
35.\" ------------------------------------------------------------
36.Sh SYNOPSIS
37.In sys/kmem.h
38.\" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
39.Ft void *
40.Fn kmem_alloc \
41"size_t size" "km_flag_t kmflags"
42.Ft void *
43.Fn kmem_zalloc \
44"size_t size" "km_flag_t kmflags"
45.Ft void
46.Fn kmem_free \
47"void *p" "size_t size"
48.\" ---
49.Ft void *
50.Fn kmem_intr_alloc \
51"size_t size" "km_flag_t kmflags"
52.Ft void *
53.Fn kmem_intr_zalloc \
54"size_t size" "km_flag_t kmflags"
55.Ft void
56.Fn kmem_intr_free \
57"void *p" "size_t size"
58.\" ---
59.Ft char *
60.Fn kmem_asprintf \
61"const char *fmt" "..."
62.\" ---
63.Ft char *
64.Fn kmem_strdupsize \
65"const char *str" "size_t *size" "km_flag_t kmflags"
66.Ft void
67.Fn kmem_strfree \
68"char *str"
69.\" ------------------------------------------------------------
70.Pp
71.Cd "options KMEM_SIZE"
72.Cd "options KMEM_REDZONE"
73.Cd "options KMEM_GUARD"
74.Sh DESCRIPTION
75.Fn kmem_alloc
76allocates kernel wired memory.
77It takes the following arguments.
78.Bl -tag -width kmflags
79.It Fa size
80Specify the size of allocation in bytes.
81.It Fa kmflags
82Either of the following:
83.Bl -tag -width KM_NOSLEEP
84.It Dv KM_SLEEP
85If the allocation cannot be satisfied immediately, sleep until enough
86memory is available.
87If
88.Dv KM_SLEEP
89is specified, then the allocation cannot fail.
90.It Dv KM_NOSLEEP
91Don't sleep.
92Immediately return
93.Dv NULL
94if there is not enough memory available.
95It should only be used when failure to allocate will not have harmful,
96user-visible effects.
97.Pp
98.Bf -symbolic
99Use of
100.Dv KM_NOSLEEP
101is strongly discouraged as it can create transient, hard to debug failures
102that occur when the system is under memory pressure.
103.Ef
104.Pp
105In situations where it is not possible to sleep, for example because locks
106are held by the caller, the code path should be restructured to allow the
107allocation to be made in another place.
108.El
109.El
110.Pp
111The contents of allocated memory are uninitialized.
112.Pp
113Unlike Solaris, kmem_alloc(0, flags) is illegal.
114.Pp
115.\" ------------------------------------------------------------
116.Fn kmem_zalloc
117is the equivalent of
118.Fn kmem_alloc ,
119except that it initializes the memory to zero.
120.Pp
121.\" ------------------------------------------------------------
122.Fn kmem_asprintf
123functions as the well known
124.Fn asprintf
125function, but allocates memory using
126.Fn kmem_alloc .
127This routine can sleep during allocation.
128The size of the allocated area is the length of the returned character string, plus one (for the NUL terminator).
129This must be taken into consideration when freeing the returned area with
130.Fn kmem_free .
131.Pp
132.\" ------------------------------------------------------------
133.Fn kmem_free
134frees kernel wired memory allocated by
135.Fn kmem_alloc
136or
137.Fn kmem_zalloc
138so that it can be used for other purposes.
139It takes the following arguments.
140.Bl -tag -width kmflags
141.It Fa p
142The pointer to the memory being freed.
143It must be the one returned by
144.Fn kmem_alloc
145or
146.Fn kmem_zalloc .
147.It Fa size
148The size of the memory being freed, in bytes.
149It must be the same as the
150.Fa size
151argument used for
152.Fn kmem_alloc
153or
154.Fn kmem_zalloc
155when the memory was allocated.
156.El
157.Pp
158Freeing
159.Dv NULL
160is illegal.
161.Pp
162.\" ------------------------------------------------------------
163.Fn kmem_intr_alloc ,
164.Fn kmem_intr_zalloc
165and
166.Fn kmem_intr_free
167are the equivalents of the above kmem routines which can be called
168from the interrupt context.
169These routines are for the special cases.
170Normally,
171.Xr pool_cache 9
172should be used for memory allocation from interrupt context.
173.Pp
174The
175.Fn kmem_strdupsize
176function is a utility function that can be used to copy the string in the
177.Fa str
178argument to a new buffer allocated using
179.Fn kmem_alloc
180and optionally return the size of the allocation (the length of the string
181plus the trailing
182.Dv NUL )
183in the
184.Fa size
185argument if that is not
186.Dv NULL .
187.Pp
188The
189.Fn kmem_strfree
190function can be used to free a
191.Dv NUL
192terminated string computing the length of the string using
193.Xr strlen 3
194and adding one for the
195.Dv NUL
196and then using
197.Fn kmem_free .
198.\" ------------------------------------------------------------
199.Sh NOTES
200Making
201.Dv KM_SLEEP
202allocations while holding mutexes or reader/writer locks is discouraged, as the
203caller can sleep for an unbounded amount of time in order to satisfy the
204allocation.
205This can in turn block other threads that wish to acquire locks held by the
206caller.
207It should be noted that
208.Fn kmem_free
209may also block.
210.Pp
211For some locks this is permissible or even unavoidable.
212For others, particularly locks that may be taken from soft interrupt context,
213it is a serious problem.
214As a general rule it is better not to allow this type of situation to develop.
215One way to circumvent the problem is to make allocations speculative and part
216of a retryable sequence.
217For example:
218.Bd -literal
219  retry:
220        /* speculative unlocked check */
221        if (need to allocate) {
222                new_item = kmem_alloc(sizeof(*new_item), KM_SLEEP);
223        } else {
224                new_item = NULL;
225        }
226        mutex_enter(lock);
227        /* check while holding lock for true status */
228        if (need to allocate) {
229                if (new_item == NULL) {
230                        mutex_exit(lock);
231                        goto retry;
232                }
233                consume(new_item);
234                new_item = NULL;
235        }
236        mutex_exit(lock);
237        if (new_item != NULL) {
238                /* did not use it after all */
239                kmem_free(new_item, sizeof(*new_item));
240        }
241.Ed
242.\" ------------------------------------------------------------
243.Sh OPTIONS
244.Ss KMEM_SIZE
245Kernels compiled with the
246.Dv KMEM_SIZE
247option ensure the size given in
248.Fn kmem_free
249matches the actual allocated size.
250On
251.Fn kmem_alloc ,
252the kernel will allocate an additional contiguous kmem page of eight
253bytes in the buffer, will register the allocated size in the first kmem
254page of that buffer, and will return a pointer to the second kmem page
255in that same buffer.
256When freeing, the kernel reads the first page, and compares the
257size registered with the one given in
258.Fn kmem_free .
259Any mismatch triggers a panic.
260.Pp
261.Dv KMEM_SIZE
262is enabled by default on
263.Dv DIAGNOSTIC
264and
265.Dv DEBUG .
266.Ss KMEM_REDZONE
267Kernels compiled with the
268.Dv KMEM_REDZONE
269option add a dynamic pattern of two bytes at the end of each allocated
270buffer, and check this pattern when freeing to ensure the caller hasn't
271written outside the requested area.
272This option does not introduce a significant performance impact,
273but has two drawbacks: it only catches write overflows, and catches
274them only on
275.Fn kmem_free .
276.Pp
277.Dv KMEM_REDZONE
278is enabled by default on
279.Dv DIAGNOSTIC .
280.Ss KMEM_GUARD
281Kernels compiled with the
282.Dv KMEM_GUARD
283option perform CPU intensive sanity checks on kmem operations.
284It adds additional, very high overhead runtime verification to kmem
285operations.
286It must be enabled with
287.Dv KMEM_SIZE .
288.Pp
289.Dv KMEM_GUARD
290tries to catch the following types of bugs:
291.Bl -bullet
292.It
293Overflow at time of occurrence, by means of a guard page.
294An unmapped guard page sits immediately after the requested area;
295a read/write overflow therefore triggers a page fault.
296.It
297Underflow at
298.Fn kmem_free ,
299by using
300.Dv KMEM_SIZE Ap s
301registered size.
302If an underflow occurs, the size stored by
303.Dv KMEM_SIZE
304will be overwritten, which means that when freeing, the kernel will
305spot the mismatch.
306.It
307Use-after-free at time of occurrence.
308When freeing, the memory is unmapped, and depending on the value
309of kmem_guard_depth, the kernel will more or less delay the recycling
310of that memory.
311Which means that any ulterior read/write access to the memory will
312trigger a page fault, given it hasn't been recycled yet.
313.El
314.Pp
315To enable it, boot the system with the
316.Fl d
317option, which causes the debugger to be entered early during the kernel
318boot process.
319Issue commands such as the following:
320.Bd -literal
321db> w kmem_guard_depth 0t30000
322db> c
323.Ed
324.Pp
325This instructs
326.Dv kmem_guard
327to queue up to 60000 (30000*2) pages of unmapped KVA to catch
328use-after-free type errors.
329When
330.Fn kmem_free
331is called, memory backing a freed item is unmapped and the kernel VA
332space pushed onto a FIFO.
333The VA space will not be reused until another 30k items have been freed.
334Until reused the kernel will catch invalid accesses and panic with a page fault.
335Limitations:
336.Bl -bullet
337.It
338It has a severe impact on performance.
339.It
340It is best used on a 64-bit machine with lots of RAM.
341.El
342.Pp
343.Dv KMEM_GUARD
344is enabled by default on
345.Dv DEBUG .
346.Sh RETURN VALUES
347On success,
348.Fn kmem_alloc ,
349.Fn kmem_asprintf ,
350.Fn kmem_intr_alloc ,
351.Fn kmem_intr_zalloc ,
352.Fn kmem_strdupsize ,
353and
354.Fn kmem_zalloc
355return a pointer to allocated memory.
356Otherwise,
357.Dv NULL
358is returned.
359.\" ------------------------------------------------------------
360.Sh CODE REFERENCES
361The
362.Nm
363subsystem is implemented within the file
364.Pa sys/kern/subr_kmem.c .
365.\" ------------------------------------------------------------
366.Sh SEE ALSO
367.Xr intro 9 ,
368.Xr memoryallocators 9 ,
369.Xr percpu 9 ,
370.Xr pool_cache 9 ,
371.Xr uvm_km 9
372.\" ------------------------------------------------------------
373.Sh CAVEATS
374The
375.Fn kmem_alloc ,
376.Fn kmem_asprintf ,
377.Fn kmem_free ,
378.Fn kmem_strdupsize ,
379.Fn kmem_strfree ,
380and
381.Fn kmem_zalloc
382functions cannot be used from interrupt context, from a soft interrupt,
383or from a callout.
384Use
385.Xr pool_cache 9
386in these situations.
387.\" ------------------------------------------------------------
388.Sh SECURITY CONSIDERATIONS
389As the memory allocated by
390.Fn kmem_alloc
391is uninitialized, it can contain security-sensitive data left by its
392previous user.
393It is the caller's responsibility not to expose it to the world.
394