xref: /openbsd-src/sys/kern/subr_hibernate.c (revision 0547f1a4d8730cec4a8c3308eefdad21983bb9ad)
1 /*	$OpenBSD: subr_hibernate.c,v 1.6 2011/07/08 21:00:53 ariane Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/hibernate.h>
20 #include <sys/param.h>
21 #include <sys/tree.h>
22 #include <sys/types.h>
23 #include <sys/systm.h>
24 #include <uvm/uvm.h>
25 
26 
27 /*
28  * Hib alloc enforced alignment.
29  */
30 #define HIB_ALIGN		8 /* bytes alignment */
31 
32 /*
33  * sizeof builtin operation, but with alignment constraint.
34  */
35 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
36 
37 struct hiballoc_entry
38 {
39 	size_t			hibe_use;
40 	size_t			hibe_space;
41 	RB_ENTRY(hiballoc_entry) hibe_entry;
42 };
43 
44 /*
45  * Compare hiballoc entries based on the address they manage.
46  *
47  * Since the address is fixed, relative to struct hiballoc_entry,
48  * we just compare the hiballoc_entry pointers.
49  */
50 static __inline int
51 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r)
52 {
53 	return l < r ? -1 : (l > r);
54 }
55 
56 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
57 
58 /*
59  * Given a hiballoc entry, return the address it manages.
60  */
61 static __inline void*
62 hib_entry_to_addr(struct hiballoc_entry *entry)
63 {
64 	caddr_t addr;
65 
66 	addr = (caddr_t)entry;
67 	addr += HIB_SIZEOF(struct hiballoc_entry);
68 	return addr;
69 }
70 
71 /*
72  * Given an address, find the hiballoc that corresponds.
73  */
74 static __inline struct hiballoc_entry*
75 hib_addr_to_entry(void* addr_param)
76 {
77 	caddr_t addr;
78 
79 	addr = (caddr_t)addr_param;
80 	addr -= HIB_SIZEOF(struct hiballoc_entry);
81 	return (struct hiballoc_entry*)addr;
82 }
83 
84 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
85 
86 /*
87  * Allocate memory from the arena.
88  *
89  * Returns NULL if no memory is available.
90  */
91 void*
92 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
93 {
94 	struct hiballoc_entry *entry, *new_entry;
95 	size_t find_sz;
96 
97 	/*
98 	 * Enforce alignment of HIB_ALIGN bytes.
99 	 *
100 	 * Note that, because the entry is put in front of the allocation,
101 	 * 0-byte allocations are guaranteed a unique address.
102 	 */
103 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
104 
105 	/*
106 	 * Find an entry with hibe_space >= find_sz.
107 	 *
108 	 * If the root node is not large enough, we switch to tree traversal.
109 	 * Because all entries are made at the bottom of the free space,
110 	 * traversal from the end has a slightly better chance of yielding
111 	 * a sufficiently large space.
112 	 */
113 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
114 	entry = RB_ROOT(&arena->hib_addrs);
115 	if (entry != NULL && entry->hibe_space < find_sz) {
116 		RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
117 			if (entry->hibe_space >= find_sz)
118 				break;
119 		}
120 	}
121 
122 	/*
123 	 * Insufficient or too fragmented memory.
124 	 */
125 	if (entry == NULL)
126 		return NULL;
127 
128 	/*
129 	 * Create new entry in allocated space.
130 	 */
131 	new_entry = (struct hiballoc_entry*)(
132 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
133 	new_entry->hibe_space = entry->hibe_space - find_sz;
134 	new_entry->hibe_use = alloc_sz;
135 
136 	/*
137 	 * Insert entry.
138 	 */
139 	if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
140 		panic("hib_alloc: insert failure");
141 	entry->hibe_space = 0;
142 
143 	/* Return address managed by entry. */
144 	return hib_entry_to_addr(new_entry);
145 }
146 
147 /*
148  * Free a pointer previously allocated from this arena.
149  *
150  * If addr is NULL, this will be silently accepted.
151  */
152 void
153 hib_free(struct hiballoc_arena *arena, void *addr)
154 {
155 	struct hiballoc_entry *entry, *prev;
156 
157 	if (addr == NULL)
158 		return;
159 
160 	/*
161 	 * Derive entry from addr and check it is really in this arena.
162 	 */
163 	entry = hib_addr_to_entry(addr);
164 	if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
165 		panic("hib_free: freed item %p not in hib arena", addr);
166 
167 	/*
168 	 * Give the space in entry to its predecessor.
169 	 *
170 	 * If entry has no predecessor, change its used space into free space
171 	 * instead.
172 	 */
173 	prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry);
174 	if (prev != NULL &&
175 	    (void*)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
176 	    prev->hibe_use + prev->hibe_space) == entry) {
177 		/* Merge entry. */
178 		RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
179 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
180 		    entry->hibe_use + entry->hibe_space;
181 	} else {
182 	  	/* Flip used memory to free space. */
183 		entry->hibe_space += entry->hibe_use;
184 		entry->hibe_use = 0;
185 	}
186 }
187 
188 /*
189  * Initialize hiballoc.
190  *
191  * The allocator will manage memmory at ptr, which is len bytes.
192  */
193 int
194 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
195 {
196 	struct hiballoc_entry *entry;
197 	caddr_t ptr;
198 	size_t len;
199 
200 	RB_INIT(&arena->hib_addrs);
201 
202 	/*
203 	 * Hib allocator enforces HIB_ALIGN alignment.
204 	 * Fixup ptr and len.
205 	 */
206 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
207 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
208 	len &= ~((size_t)HIB_ALIGN - 1);
209 
210 	/*
211 	 * Insufficient memory to be able to allocate and also do bookkeeping.
212 	 */
213 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
214 		return ENOMEM;
215 
216 	/*
217 	 * Create entry describing space.
218 	 */
219 	entry = (struct hiballoc_entry*)ptr;
220 	entry->hibe_use = 0;
221 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
222 	RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
223 
224 	return 0;
225 }
226 
227 
228 /*
229  * Zero all free memory.
230  */
231 void
232 uvm_pmr_zero_everything(void)
233 {
234 	struct uvm_pmemrange	*pmr;
235 	struct vm_page		*pg;
236 	int			 i;
237 
238 	uvm_lock_fpageq();
239 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
240 		/* Zero single pages. */
241 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
242 		    != NULL) {
243 			uvm_pmr_remove(pmr, pg);
244 			uvm_pagezero(pg);
245 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
246 			uvmexp.zeropages++;
247 			uvm_pmr_insert(pmr, pg, 0);
248 		}
249 
250 		/* Zero multi page ranges. */
251 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY]))
252 		    != NULL) {
253 			pg--; /* Size tree always has second page. */
254 			uvm_pmr_remove(pmr, pg);
255 			for (i = 0; i < pg->fpgsz; i++) {
256 				uvm_pagezero(&pg[i]);
257 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
258 				uvmexp.zeropages++;
259 			}
260 			uvm_pmr_insert(pmr, pg, 0);
261 		}
262 	}
263 	uvm_unlock_fpageq();
264 }
265 
266 /*
267  * Mark all memory as dirty.
268  *
269  * Used to inform the system that the clean memory isn't clean for some
270  * reason, for example because we just came back from hibernate.
271  */
272 void
273 uvm_pmr_dirty_everything(void)
274 {
275 	struct uvm_pmemrange	*pmr;
276 	struct vm_page		*pg;
277 	int			 i;
278 
279 	uvm_lock_fpageq();
280 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
281 		/* Dirty single pages. */
282 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
283 		    != NULL) {
284 			uvm_pmr_remove(pmr, pg);
285 			uvm_pagezero(pg);
286 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
287 			uvm_pmr_insert(pmr, pg, 0);
288 		}
289 
290 		/* Dirty multi page ranges. */
291 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_ZEOR]))
292 		    != NULL) {
293 			pg--; /* Size tree always has second page. */
294 			uvm_pmr_remove(pmr, pg);
295 			for (i = 0; i < pg->fpgsz; i++) {
296 				uvm_pagezero(&pg[i]);
297 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
298 			}
299 			uvm_pmr_insert(pmr, pg, 0);
300 		}
301 	}
302 
303 	uvmexp.zeropages = 0;
304 	uvm_unlock_fpageq();
305 }
306 
307 /*
308  * Allocate the highest address that can hold sz.
309  *
310  * sz in bytes.
311  */
312 int
313 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz)
314 {
315 	struct uvm_pmemrange	*pmr;
316 	struct vm_page		*pig_pg, *pg;
317 
318 	/*
319 	 * Convert sz to pages, since that is what pmemrange uses internally.
320 	 */
321 	sz = atop(round_page(sz));
322 
323 	uvm_lock_fpageq();
324 
325 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
326 		RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) {
327 			if (pig_pg->fpgsz >= sz) {
328 				goto found;
329 			}
330 		}
331 	}
332 
333 	/*
334 	 * Allocation failure.
335 	 */
336 	uvm_unlock_pageq();
337 	return ENOMEM;
338 
339 found:
340 	/* Remove page from freelist. */
341 	uvm_pmr_remove_size(pmr, pig_pg);
342 	pig_pg->fpgsz -= sz;
343 	pg = pig_pg + pig_pg->fpgsz;
344 	if (pig_pg->fpgsz == 0)
345 		uvm_pmr_remove_addr(pmr, pig_pg);
346 	else
347 		uvm_pmr_insert_size(pmr, pig_pg);
348 
349 	uvmexp.free -= sz;
350 	*addr = VM_PAGE_TO_PHYS(pg);
351 
352 	/*
353 	 * Update pg flags.
354 	 *
355 	 * Note that we trash the sz argument now.
356 	 */
357 	while (sz > 0) {
358 		KASSERT(pg->pg_flags & PQ_FREE);
359 
360 		atomic_clearbits_int(&pg->pg_flags,
361 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
362 
363 		if (pg->pg_flags & PG_ZERO)
364 			uvmexp.zeropages -= sz;
365 		atomic_clearbits_int(&pg->pg_flags,
366 		    PG_ZERO|PQ_FREE);
367 
368 		pg->uobject = NULL;
369 		pg->uanon = NULL;
370 		pg->pg_version++;
371 
372 		/*
373 		 * Next.
374 		 */
375 		pg++;
376 		sz--;
377 	}
378 
379 	/* Return. */
380 	uvm_unlock_fpageq();
381 	return 0;
382 }
383 
384 /*
385  * Allocate a piglet area.
386  *
387  * This is as low as possible.
388  * Piglets are aligned.
389  *
390  * sz and align in bytes.
391  *
392  * The call will sleep for the pagedaemon to attempt to free memory.
393  * The pagedaemon may decide its not possible to free enough memory, causing
394  * the allocation to fail.
395  */
396 int
397 uvm_pmr_alloc_piglet(paddr_t *addr, psize_t sz, paddr_t align)
398 {
399 	vaddr_t			 pg_addr, piglet_addr;
400 	struct uvm_pmemrange	*pmr;
401 	struct vm_page		*pig_pg, *pg;
402 	struct pglist		 pageq;
403 	int			 pdaemon_woken;
404 
405 	KASSERT((align & (align - 1)) == 0);
406 	pdaemon_woken = 0; /* Didn't wake the pagedaemon. */
407 
408 	/*
409 	 * Fixup arguments: align must be at least PAGE_SIZE,
410 	 * sz will be converted to pagecount, since that is what
411 	 * pmemrange uses internally.
412 	 */
413 	if (align < PAGE_SIZE)
414 		align = PAGE_SIZE;
415 	sz = atop(round_page(sz));
416 
417 	uvm_lock_fpageq();
418 
419 	TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use,
420 	    pmr_use) {
421 retry:
422 		/*
423 		 * Search for a range with enough space.
424 		 * Use the address tree, to ensure the range is as low as
425 		 * possible.
426 		 */
427 		RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) {
428 			pg_addr = VM_PAGE_TO_PHYS(pig_pg);
429 			piglet_addr = (pg_addr + (align - 1)) & ~(align - 1);
430 
431 			if (pig_pg->fpgsz >= sz) {
432 				goto found;
433 			}
434 
435 			if (atop(pg_addr) + pig_pg->fpgsz >
436 			    atop(piglet_addr) + sz) {
437 				goto found;
438 			}
439 		}
440 
441 		/*
442 		 * Try to coerse the pagedaemon into freeing memory
443 		 * for the piglet.
444 		 *
445 		 * pdaemon_woken is set to prevent the code from
446 		 * falling into an endless loop.
447 		 */
448 		if (!pdaemon_woken) {
449 			pdaemon_woken = 1;
450 			if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1,
451 			    ptoa(sz), UVM_PLA_FAILOK) == 0)
452 				goto retry;
453 		}
454 	}
455 
456 	/* Return failure. */
457 	uvm_unlock_fpageq();
458 	return ENOMEM;
459 
460 found:
461 	/*
462 	 * Extract piglet from pigpen.
463 	 */
464 	TAILQ_INIT(&pageq);
465 	uvm_pmr_extract_range(pmr, pig_pg,
466 	    atop(piglet_addr), atop(piglet_addr) + sz, &pageq);
467 
468 	*addr = piglet_addr;
469 	uvmexp.free -= sz;
470 
471 	/*
472 	 * Update pg flags.
473 	 *
474 	 * Note that we trash the sz argument now.
475 	 */
476 	TAILQ_FOREACH(pg, &pageq, pageq) {
477 		KASSERT(pg->pg_flags & PQ_FREE);
478 
479 		atomic_clearbits_int(&pg->pg_flags,
480 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
481 
482 		if (pg->pg_flags & PG_ZERO)
483 			uvmexp.zeropages--;
484 		atomic_clearbits_int(&pg->pg_flags,
485 		    PG_ZERO|PQ_FREE);
486 
487 		pg->uobject = NULL;
488 		pg->uanon = NULL;
489 		pg->pg_version++;
490 	}
491 
492 	uvm_unlock_fpageq();
493 	return 0;
494 }
495 
496 /*
497  * Physmem RLE compression support.
498  *
499  * Given a physical page address, it will return the number of pages
500  * starting at the address, that are free.
501  * Returns 0 if the page at addr is not free.
502  */
503 psize_t
504 uvm_page_rle(paddr_t addr)
505 {
506 	struct vm_page		*pg, *pg_end;
507 	struct vm_physseg	*vmp;
508 	int			 pseg_idx, off_idx;
509 
510 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
511 	if (pseg_idx == -1)
512 		return 0;
513 
514 	vmp = &vm_physmem[pseg_idx];
515 	pg = &vmp->pgs[off_idx];
516 	if (!(pg->pg_flags & PQ_FREE))
517 		return 0;
518 
519 	/*
520 	 * Search for the first non-free page after pg.
521 	 * Note that the page may not be the first page in a free pmemrange,
522 	 * therefore pg->fpgsz cannot be used.
523 	 */
524 	for (pg_end = pg; pg_end <= vmp->lastpg &&
525 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++);
526 	return pg_end - pg;
527 }
528