xref: /openbsd-src/sys/kern/subr_hibernate.c (revision 0f0ce22aeec69fd01eb6e6f433e67477e2fb596e)
1 /*	$OpenBSD: subr_hibernate.c,v 1.4 2011/07/08 18:31:16 ariane Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/hibernate.h>
20 #include <sys/param.h>
21 #include <sys/tree.h>
22 #include <sys/types.h>
23 #include <sys/systm.h>
24 #include <uvm/uvm.h>
25 
26 
27 /*
28  * Hib alloc enforced alignment.
29  */
30 #define HIB_ALIGN		8 /* bytes alignment */
31 
32 /*
33  * sizeof builtin operation, but with alignment constraint.
34  */
35 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
36 
37 struct hiballoc_entry
38 {
39 	size_t			hibe_use;
40 	size_t			hibe_space;
41 	RB_ENTRY(hiballoc_entry) hibe_entry;
42 };
43 
44 /*
45  * Compare hiballoc entries based on the address they manage.
46  *
47  * Since the address is fixed, relative to struct hiballoc_entry,
48  * we just compare the hiballoc_entry pointers.
49  */
50 static __inline int
51 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r)
52 {
53 	return l < r ? -1 : (l > r);
54 }
55 
56 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
57 
58 /*
59  * Given a hiballoc entry, return the address it manages.
60  */
61 static __inline void*
62 hib_entry_to_addr(struct hiballoc_entry *entry)
63 {
64 	caddr_t addr;
65 
66 	addr = (caddr_t)entry;
67 	addr += HIB_SIZEOF(struct hiballoc_entry);
68 	return addr;
69 }
70 
71 /*
72  * Given an address, find the hiballoc that corresponds.
73  */
74 static __inline struct hiballoc_entry*
75 hib_addr_to_entry(void* addr_param)
76 {
77 	caddr_t addr;
78 
79 	addr = (caddr_t)addr_param;
80 	addr -= HIB_SIZEOF(struct hiballoc_entry);
81 	return (struct hiballoc_entry*)addr;
82 }
83 
84 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
85 
86 /*
87  * Allocate memory from the arena.
88  *
89  * Returns NULL if no memory is available.
90  */
91 void*
92 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
93 {
94 	struct hiballoc_entry *entry, *new_entry;
95 	size_t find_sz;
96 
97 	/*
98 	 * Enforce alignment of HIB_ALIGN bytes.
99 	 *
100 	 * Note that, because the entry is put in front of the allocation,
101 	 * 0-byte allocations are guaranteed a unique address.
102 	 */
103 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
104 
105 	/*
106 	 * Find an entry with hibe_space >= find_sz.
107 	 *
108 	 * If the root node is not large enough, we switch to tree traversal.
109 	 * Because all entries are made at the bottom of the free space,
110 	 * traversal from the end has a slightly better chance of yielding
111 	 * a sufficiently large space.
112 	 */
113 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
114 	entry = RB_ROOT(&arena->hib_addrs);
115 	if (entry != NULL && entry->hibe_space < find_sz) {
116 		RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
117 			if (entry->hibe_space >= find_sz)
118 				break;
119 		}
120 	}
121 
122 	/*
123 	 * Insufficient or too fragmented memory.
124 	 */
125 	if (entry == NULL)
126 		return NULL;
127 
128 	/*
129 	 * Create new entry in allocated space.
130 	 */
131 	new_entry = (struct hiballoc_entry*)(
132 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
133 	new_entry->hibe_space = entry->hibe_space - find_sz;
134 	new_entry->hibe_use = alloc_sz;
135 
136 	/*
137 	 * Insert entry.
138 	 */
139 	if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
140 		panic("hib_alloc: insert failure");
141 	entry->hibe_space = 0;
142 
143 	/* Return address managed by entry. */
144 	return hib_entry_to_addr(new_entry);
145 }
146 
147 /*
148  * Free a pointer previously allocated from this arena.
149  *
150  * If addr is NULL, this will be silently accepted.
151  */
152 void
153 hib_free(struct hiballoc_arena *arena, void *addr)
154 {
155 	struct hiballoc_entry *entry, *prev;
156 
157 	if (addr == NULL)
158 		return;
159 
160 	/*
161 	 * Derive entry from addr and check it is really in this arena.
162 	 */
163 	entry = hib_addr_to_entry(addr);
164 	if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
165 		panic("hib_free: freed item %p not in hib arena", addr);
166 
167 	/*
168 	 * Give the space in entry to its predecessor.
169 	 *
170 	 * If entry has no predecessor, change its used space into free space
171 	 * instead.
172 	 */
173 	prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry);
174 	if (prev != NULL &&
175 	    (void*)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
176 	    prev->hibe_use + prev->hibe_space) == entry) {
177 		/* Merge entry. */
178 		RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
179 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
180 		    entry->hibe_use + entry->hibe_space;
181 	} else {
182 	  	/* Flip used memory to free space. */
183 		entry->hibe_space += entry->hibe_use;
184 		entry->hibe_use = 0;
185 	}
186 }
187 
188 /*
189  * Initialize hiballoc.
190  *
191  * The allocator will manage memmory at ptr, which is len bytes.
192  */
193 int
194 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
195 {
196 	struct hiballoc_entry *entry;
197 	caddr_t ptr;
198 	size_t len;
199 
200 	RB_INIT(&arena->hib_addrs);
201 
202 	/*
203 	 * Hib allocator enforces HIB_ALIGN alignment.
204 	 * Fixup ptr and len.
205 	 */
206 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
207 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
208 	len &= ~((size_t)HIB_ALIGN - 1);
209 
210 	/*
211 	 * Insufficient memory to be able to allocate and also do bookkeeping.
212 	 */
213 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
214 		return ENOMEM;
215 
216 	/*
217 	 * Create entry describing space.
218 	 */
219 	entry = (struct hiballoc_entry*)ptr;
220 	entry->hibe_use = 0;
221 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
222 	RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
223 
224 	return 0;
225 }
226 
227 
228 /*
229  * Zero all free memory.
230  */
231 void
232 uvm_pmr_zero_everything(void)
233 {
234 	struct uvm_pmemrange	*pmr;
235 	struct vm_page		*pg;
236 	int			 i;
237 
238 	uvm_lock_fpageq();
239 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
240 		/* Zero single pages. */
241 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
242 		    != NULL) {
243 			uvm_pmr_remove(pmr, pg);
244 			uvm_pagezero(pg);
245 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
246 			uvmexp.zeropages++;
247 			uvm_pmr_insert(pmr, pg, 0);
248 		}
249 
250 		/* Zero multi page ranges. */
251 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY]))
252 		    != NULL) {
253 			pg--; /* Size tree always has second page. */
254 			uvm_pmr_remove(pmr, pg);
255 			for (i = 0; i < pg->fpgsz; i++) {
256 				uvm_pagezero(&pg[i]);
257 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
258 				uvmexp.zeropages++;
259 			}
260 			uvm_pmr_insert(pmr, pg, 0);
261 		}
262 	}
263 	uvm_unlock_fpageq();
264 }
265 
266 /*
267  * Allocate the highest address that can hold sz.
268  *
269  * sz in bytes.
270  */
271 int
272 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz)
273 {
274 	struct uvm_pmemrange	*pmr;
275 	struct vm_page		*pig_pg, *pg;
276 
277 	/*
278 	 * Convert sz to pages, since that is what pmemrange uses internally.
279 	 */
280 	sz = atop(round_page(sz));
281 
282 	uvm_lock_fpageq();
283 
284 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
285 		RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) {
286 			if (pig_pg->fpgsz >= sz) {
287 				goto found;
288 			}
289 		}
290 	}
291 
292 	/*
293 	 * Allocation failure.
294 	 */
295 	uvm_unlock_pageq();
296 	return ENOMEM;
297 
298 found:
299 	/* Remove page from freelist. */
300 	uvm_pmr_remove_size(pmr, pig_pg);
301 	pig_pg->fpgsz -= sz;
302 	pg = pig_pg + pig_pg->fpgsz;
303 	if (pig_pg->fpgsz == 0)
304 		uvm_pmr_remove_addr(pmr, pig_pg);
305 	else
306 		uvm_pmr_insert_size(pmr, pig_pg);
307 
308 	uvmexp.free -= sz;
309 	*addr = VM_PAGE_TO_PHYS(pg);
310 
311 	/*
312 	 * Update pg flags.
313 	 *
314 	 * Note that we trash the sz argument now.
315 	 */
316 	while (sz > 0) {
317 		KASSERT(pg->pg_flags & PQ_FREE);
318 
319 		atomic_clearbits_int(&pg->pg_flags,
320 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
321 
322 		if (pg->pg_flags & PG_ZERO)
323 			uvmexp.zeropages -= sz;
324 		atomic_clearbits_int(&pg->pg_flags,
325 		    PG_ZERO|PQ_FREE);
326 
327 		pg->uobject = NULL;
328 		pg->uanon = NULL;
329 		pg->pg_version++;
330 
331 		/*
332 		 * Next.
333 		 */
334 		pg++;
335 		sz--;
336 	}
337 
338 	/* Return. */
339 	uvm_unlock_fpageq();
340 	return 0;
341 }
342 
343 /*
344  * Allocate a piglet area.
345  *
346  * This is as low as possible.
347  * Piglets are aligned.
348  *
349  * sz and align in bytes.
350  *
351  * The call will sleep for the pagedaemon to attempt to free memory.
352  * The pagedaemon may decide its not possible to free enough memory, causing
353  * the allocation to fail.
354  */
355 int
356 uvm_pmr_alloc_piglet(paddr_t *addr, psize_t sz, paddr_t align)
357 {
358 	vaddr_t			 pg_addr, piglet_addr;
359 	struct uvm_pmemrange	*pmr;
360 	struct vm_page		*pig_pg, *pg;
361 	struct pglist		 pageq;
362 	int			 pdaemon_woken;
363 
364 	KASSERT((align & (align - 1)) == 0);
365 	pdaemon_woken = 0; /* Didn't wake the pagedaemon. */
366 
367 	/*
368 	 * Fixup arguments: align must be at least PAGE_SIZE,
369 	 * sz will be converted to pagecount, since that is what
370 	 * pmemrange uses internally.
371 	 */
372 	if (align < PAGE_SIZE)
373 		align = PAGE_SIZE;
374 	sz = atop(round_page(sz));
375 
376 	uvm_lock_fpageq();
377 
378 	TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use,
379 	    pmr_use) {
380 retry:
381 		/*
382 		 * Search for a range with enough space.
383 		 * Use the address tree, to ensure the range is as low as
384 		 * possible.
385 		 */
386 		RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) {
387 			pg_addr = VM_PAGE_TO_PHYS(pig_pg);
388 			piglet_addr = (pg_addr + (align - 1)) & ~(align - 1);
389 
390 			if (pig_pg->fpgsz >= sz) {
391 				goto found;
392 			}
393 
394 			if (atop(pg_addr) + pig_pg->fpgsz >
395 			    atop(piglet_addr) + sz) {
396 				goto found;
397 			}
398 		}
399 
400 		/*
401 		 * Try to coerse the pagedaemon into freeing memory
402 		 * for the piglet.
403 		 *
404 		 * pdaemon_woken is set to prevent the code from
405 		 * falling into an endless loop.
406 		 */
407 		if (!pdaemon_woken) {
408 			pdaemon_woken = 1;
409 			if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1,
410 			    ptoa(sz), UVM_PLA_FAILOK) == 0)
411 				goto retry;
412 		}
413 	}
414 
415 	/* Return failure. */
416 	uvm_unlock_fpageq();
417 	return ENOMEM;
418 
419 found:
420 	/*
421 	 * Extract piglet from pigpen.
422 	 */
423 	TAILQ_INIT(&pageq);
424 	uvm_pmr_extract_range(pmr, pig_pg,
425 	    atop(piglet_addr), atop(piglet_addr) + sz, &pageq);
426 
427 	*addr = piglet_addr;
428 	uvmexp.free -= sz;
429 
430 	/*
431 	 * Update pg flags.
432 	 *
433 	 * Note that we trash the sz argument now.
434 	 */
435 	TAILQ_FOREACH(pg, &pageq, pageq) {
436 		KASSERT(pg->pg_flags & PQ_FREE);
437 
438 		atomic_clearbits_int(&pg->pg_flags,
439 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
440 
441 		if (pg->pg_flags & PG_ZERO)
442 			uvmexp.zeropages--;
443 		atomic_clearbits_int(&pg->pg_flags,
444 		    PG_ZERO|PQ_FREE);
445 
446 		pg->uobject = NULL;
447 		pg->uanon = NULL;
448 		pg->pg_version++;
449 	}
450 
451 	uvm_unlock_fpageq();
452 	return 0;
453 }
454