xref: /openbsd-src/sys/kern/subr_hibernate.c (revision 82c7a60e60c8dfe0343a59ff53b62e0298f872d4)
1 /*	$OpenBSD: subr_hibernate.c,v 1.7 2011/07/08 21:02:49 ariane Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/hibernate.h>
20 #include <sys/param.h>
21 #include <sys/tree.h>
22 #include <sys/types.h>
23 #include <sys/systm.h>
24 #include <uvm/uvm.h>
25 
26 
27 /*
28  * Hib alloc enforced alignment.
29  */
30 #define HIB_ALIGN		8 /* bytes alignment */
31 
32 /*
33  * sizeof builtin operation, but with alignment constraint.
34  */
35 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
36 
37 struct hiballoc_entry
38 {
39 	size_t			hibe_use;
40 	size_t			hibe_space;
41 	RB_ENTRY(hiballoc_entry) hibe_entry;
42 };
43 
44 /*
45  * Compare hiballoc entries based on the address they manage.
46  *
47  * Since the address is fixed, relative to struct hiballoc_entry,
48  * we just compare the hiballoc_entry pointers.
49  */
50 static __inline int
51 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r)
52 {
53 	return l < r ? -1 : (l > r);
54 }
55 
56 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
57 
58 /*
59  * Given a hiballoc entry, return the address it manages.
60  */
61 static __inline void*
62 hib_entry_to_addr(struct hiballoc_entry *entry)
63 {
64 	caddr_t addr;
65 
66 	addr = (caddr_t)entry;
67 	addr += HIB_SIZEOF(struct hiballoc_entry);
68 	return addr;
69 }
70 
71 /*
72  * Given an address, find the hiballoc that corresponds.
73  */
74 static __inline struct hiballoc_entry*
75 hib_addr_to_entry(void* addr_param)
76 {
77 	caddr_t addr;
78 
79 	addr = (caddr_t)addr_param;
80 	addr -= HIB_SIZEOF(struct hiballoc_entry);
81 	return (struct hiballoc_entry*)addr;
82 }
83 
84 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
85 
86 /*
87  * Allocate memory from the arena.
88  *
89  * Returns NULL if no memory is available.
90  */
91 void*
92 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
93 {
94 	struct hiballoc_entry *entry, *new_entry;
95 	size_t find_sz;
96 
97 	/*
98 	 * Enforce alignment of HIB_ALIGN bytes.
99 	 *
100 	 * Note that, because the entry is put in front of the allocation,
101 	 * 0-byte allocations are guaranteed a unique address.
102 	 */
103 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
104 
105 	/*
106 	 * Find an entry with hibe_space >= find_sz.
107 	 *
108 	 * If the root node is not large enough, we switch to tree traversal.
109 	 * Because all entries are made at the bottom of the free space,
110 	 * traversal from the end has a slightly better chance of yielding
111 	 * a sufficiently large space.
112 	 */
113 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
114 	entry = RB_ROOT(&arena->hib_addrs);
115 	if (entry != NULL && entry->hibe_space < find_sz) {
116 		RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
117 			if (entry->hibe_space >= find_sz)
118 				break;
119 		}
120 	}
121 
122 	/*
123 	 * Insufficient or too fragmented memory.
124 	 */
125 	if (entry == NULL)
126 		return NULL;
127 
128 	/*
129 	 * Create new entry in allocated space.
130 	 */
131 	new_entry = (struct hiballoc_entry*)(
132 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
133 	new_entry->hibe_space = entry->hibe_space - find_sz;
134 	new_entry->hibe_use = alloc_sz;
135 
136 	/*
137 	 * Insert entry.
138 	 */
139 	if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
140 		panic("hib_alloc: insert failure");
141 	entry->hibe_space = 0;
142 
143 	/* Return address managed by entry. */
144 	return hib_entry_to_addr(new_entry);
145 }
146 
147 /*
148  * Free a pointer previously allocated from this arena.
149  *
150  * If addr is NULL, this will be silently accepted.
151  */
152 void
153 hib_free(struct hiballoc_arena *arena, void *addr)
154 {
155 	struct hiballoc_entry *entry, *prev;
156 
157 	if (addr == NULL)
158 		return;
159 
160 	/*
161 	 * Derive entry from addr and check it is really in this arena.
162 	 */
163 	entry = hib_addr_to_entry(addr);
164 	if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
165 		panic("hib_free: freed item %p not in hib arena", addr);
166 
167 	/*
168 	 * Give the space in entry to its predecessor.
169 	 *
170 	 * If entry has no predecessor, change its used space into free space
171 	 * instead.
172 	 */
173 	prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry);
174 	if (prev != NULL &&
175 	    (void*)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
176 	    prev->hibe_use + prev->hibe_space) == entry) {
177 		/* Merge entry. */
178 		RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
179 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
180 		    entry->hibe_use + entry->hibe_space;
181 	} else {
182 	  	/* Flip used memory to free space. */
183 		entry->hibe_space += entry->hibe_use;
184 		entry->hibe_use = 0;
185 	}
186 }
187 
188 /*
189  * Initialize hiballoc.
190  *
191  * The allocator will manage memmory at ptr, which is len bytes.
192  */
193 int
194 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
195 {
196 	struct hiballoc_entry *entry;
197 	caddr_t ptr;
198 	size_t len;
199 
200 	RB_INIT(&arena->hib_addrs);
201 
202 	/*
203 	 * Hib allocator enforces HIB_ALIGN alignment.
204 	 * Fixup ptr and len.
205 	 */
206 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
207 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
208 	len &= ~((size_t)HIB_ALIGN - 1);
209 
210 	/*
211 	 * Insufficient memory to be able to allocate and also do bookkeeping.
212 	 */
213 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
214 		return ENOMEM;
215 
216 	/*
217 	 * Create entry describing space.
218 	 */
219 	entry = (struct hiballoc_entry*)ptr;
220 	entry->hibe_use = 0;
221 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
222 	RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
223 
224 	return 0;
225 }
226 
227 
228 /*
229  * Zero all free memory.
230  */
231 void
232 uvm_pmr_zero_everything(void)
233 {
234 	struct uvm_pmemrange	*pmr;
235 	struct vm_page		*pg;
236 	int			 i;
237 
238 	uvm_lock_fpageq();
239 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
240 		/* Zero single pages. */
241 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
242 		    != NULL) {
243 			uvm_pmr_remove(pmr, pg);
244 			uvm_pagezero(pg);
245 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
246 			uvmexp.zeropages++;
247 			uvm_pmr_insert(pmr, pg, 0);
248 		}
249 
250 		/* Zero multi page ranges. */
251 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY]))
252 		    != NULL) {
253 			pg--; /* Size tree always has second page. */
254 			uvm_pmr_remove(pmr, pg);
255 			for (i = 0; i < pg->fpgsz; i++) {
256 				uvm_pagezero(&pg[i]);
257 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
258 				uvmexp.zeropages++;
259 			}
260 			uvm_pmr_insert(pmr, pg, 0);
261 		}
262 	}
263 	uvm_unlock_fpageq();
264 }
265 
266 /*
267  * Mark all memory as dirty.
268  *
269  * Used to inform the system that the clean memory isn't clean for some
270  * reason, for example because we just came back from hibernate.
271  */
272 void
273 uvm_pmr_dirty_everything(void)
274 {
275 	struct uvm_pmemrange	*pmr;
276 	struct vm_page		*pg;
277 	int			 i;
278 
279 	uvm_lock_fpageq();
280 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
281 		/* Dirty single pages. */
282 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
283 		    != NULL) {
284 			uvm_pmr_remove(pmr, pg);
285 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
286 			uvm_pmr_insert(pmr, pg, 0);
287 		}
288 
289 		/* Dirty multi page ranges. */
290 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_ZEOR]))
291 		    != NULL) {
292 			pg--; /* Size tree always has second page. */
293 			uvm_pmr_remove(pmr, pg);
294 			for (i = 0; i < pg->fpgsz; i++)
295 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
296 			uvm_pmr_insert(pmr, pg, 0);
297 		}
298 	}
299 
300 	uvmexp.zeropages = 0;
301 	uvm_unlock_fpageq();
302 }
303 
304 /*
305  * Allocate the highest address that can hold sz.
306  *
307  * sz in bytes.
308  */
309 int
310 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz)
311 {
312 	struct uvm_pmemrange	*pmr;
313 	struct vm_page		*pig_pg, *pg;
314 
315 	/*
316 	 * Convert sz to pages, since that is what pmemrange uses internally.
317 	 */
318 	sz = atop(round_page(sz));
319 
320 	uvm_lock_fpageq();
321 
322 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
323 		RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) {
324 			if (pig_pg->fpgsz >= sz) {
325 				goto found;
326 			}
327 		}
328 	}
329 
330 	/*
331 	 * Allocation failure.
332 	 */
333 	uvm_unlock_pageq();
334 	return ENOMEM;
335 
336 found:
337 	/* Remove page from freelist. */
338 	uvm_pmr_remove_size(pmr, pig_pg);
339 	pig_pg->fpgsz -= sz;
340 	pg = pig_pg + pig_pg->fpgsz;
341 	if (pig_pg->fpgsz == 0)
342 		uvm_pmr_remove_addr(pmr, pig_pg);
343 	else
344 		uvm_pmr_insert_size(pmr, pig_pg);
345 
346 	uvmexp.free -= sz;
347 	*addr = VM_PAGE_TO_PHYS(pg);
348 
349 	/*
350 	 * Update pg flags.
351 	 *
352 	 * Note that we trash the sz argument now.
353 	 */
354 	while (sz > 0) {
355 		KASSERT(pg->pg_flags & PQ_FREE);
356 
357 		atomic_clearbits_int(&pg->pg_flags,
358 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
359 
360 		if (pg->pg_flags & PG_ZERO)
361 			uvmexp.zeropages -= sz;
362 		atomic_clearbits_int(&pg->pg_flags,
363 		    PG_ZERO|PQ_FREE);
364 
365 		pg->uobject = NULL;
366 		pg->uanon = NULL;
367 		pg->pg_version++;
368 
369 		/*
370 		 * Next.
371 		 */
372 		pg++;
373 		sz--;
374 	}
375 
376 	/* Return. */
377 	uvm_unlock_fpageq();
378 	return 0;
379 }
380 
381 /*
382  * Allocate a piglet area.
383  *
384  * This is as low as possible.
385  * Piglets are aligned.
386  *
387  * sz and align in bytes.
388  *
389  * The call will sleep for the pagedaemon to attempt to free memory.
390  * The pagedaemon may decide its not possible to free enough memory, causing
391  * the allocation to fail.
392  */
393 int
394 uvm_pmr_alloc_piglet(paddr_t *addr, psize_t sz, paddr_t align)
395 {
396 	vaddr_t			 pg_addr, piglet_addr;
397 	struct uvm_pmemrange	*pmr;
398 	struct vm_page		*pig_pg, *pg;
399 	struct pglist		 pageq;
400 	int			 pdaemon_woken;
401 
402 	KASSERT((align & (align - 1)) == 0);
403 	pdaemon_woken = 0; /* Didn't wake the pagedaemon. */
404 
405 	/*
406 	 * Fixup arguments: align must be at least PAGE_SIZE,
407 	 * sz will be converted to pagecount, since that is what
408 	 * pmemrange uses internally.
409 	 */
410 	if (align < PAGE_SIZE)
411 		align = PAGE_SIZE;
412 	sz = atop(round_page(sz));
413 
414 	uvm_lock_fpageq();
415 
416 	TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use,
417 	    pmr_use) {
418 retry:
419 		/*
420 		 * Search for a range with enough space.
421 		 * Use the address tree, to ensure the range is as low as
422 		 * possible.
423 		 */
424 		RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) {
425 			pg_addr = VM_PAGE_TO_PHYS(pig_pg);
426 			piglet_addr = (pg_addr + (align - 1)) & ~(align - 1);
427 
428 			if (pig_pg->fpgsz >= sz) {
429 				goto found;
430 			}
431 
432 			if (atop(pg_addr) + pig_pg->fpgsz >
433 			    atop(piglet_addr) + sz) {
434 				goto found;
435 			}
436 		}
437 
438 		/*
439 		 * Try to coerse the pagedaemon into freeing memory
440 		 * for the piglet.
441 		 *
442 		 * pdaemon_woken is set to prevent the code from
443 		 * falling into an endless loop.
444 		 */
445 		if (!pdaemon_woken) {
446 			pdaemon_woken = 1;
447 			if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1,
448 			    ptoa(sz), UVM_PLA_FAILOK) == 0)
449 				goto retry;
450 		}
451 	}
452 
453 	/* Return failure. */
454 	uvm_unlock_fpageq();
455 	return ENOMEM;
456 
457 found:
458 	/*
459 	 * Extract piglet from pigpen.
460 	 */
461 	TAILQ_INIT(&pageq);
462 	uvm_pmr_extract_range(pmr, pig_pg,
463 	    atop(piglet_addr), atop(piglet_addr) + sz, &pageq);
464 
465 	*addr = piglet_addr;
466 	uvmexp.free -= sz;
467 
468 	/*
469 	 * Update pg flags.
470 	 *
471 	 * Note that we trash the sz argument now.
472 	 */
473 	TAILQ_FOREACH(pg, &pageq, pageq) {
474 		KASSERT(pg->pg_flags & PQ_FREE);
475 
476 		atomic_clearbits_int(&pg->pg_flags,
477 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
478 
479 		if (pg->pg_flags & PG_ZERO)
480 			uvmexp.zeropages--;
481 		atomic_clearbits_int(&pg->pg_flags,
482 		    PG_ZERO|PQ_FREE);
483 
484 		pg->uobject = NULL;
485 		pg->uanon = NULL;
486 		pg->pg_version++;
487 	}
488 
489 	uvm_unlock_fpageq();
490 	return 0;
491 }
492 
493 /*
494  * Physmem RLE compression support.
495  *
496  * Given a physical page address, it will return the number of pages
497  * starting at the address, that are free.
498  * Returns 0 if the page at addr is not free.
499  */
500 psize_t
501 uvm_page_rle(paddr_t addr)
502 {
503 	struct vm_page		*pg, *pg_end;
504 	struct vm_physseg	*vmp;
505 	int			 pseg_idx, off_idx;
506 
507 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
508 	if (pseg_idx == -1)
509 		return 0;
510 
511 	vmp = &vm_physmem[pseg_idx];
512 	pg = &vmp->pgs[off_idx];
513 	if (!(pg->pg_flags & PQ_FREE))
514 		return 0;
515 
516 	/*
517 	 * Search for the first non-free page after pg.
518 	 * Note that the page may not be the first page in a free pmemrange,
519 	 * therefore pg->fpgsz cannot be used.
520 	 */
521 	for (pg_end = pg; pg_end <= vmp->lastpg &&
522 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++);
523 	return pg_end - pg;
524 }
525