xref: /openbsd-src/sys/kern/subr_hibernate.c (revision 10aaff228b85ce2f62c157730f926b74075866c3)
1 /*	$OpenBSD: subr_hibernate.c,v 1.8 2011/07/09 00:08:04 mlarkin Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/hibernate.h>
20 #include <sys/param.h>
21 #include <sys/tree.h>
22 #include <sys/types.h>
23 #include <sys/systm.h>
24 #include <sys/disklabel.h>
25 #include <sys/conf.h>
26 #include <uvm/uvm.h>
27 #include <machine/hibernate.h>
28 
29 extern char *disk_readlabel(struct disklabel *, dev_t, char *, size_t);
30 
31 /*
32  * Hib alloc enforced alignment.
33  */
34 #define HIB_ALIGN		8 /* bytes alignment */
35 
36 /*
37  * sizeof builtin operation, but with alignment constraint.
38  */
39 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
40 
41 struct hiballoc_entry
42 {
43 	size_t			hibe_use;
44 	size_t			hibe_space;
45 	RB_ENTRY(hiballoc_entry) hibe_entry;
46 };
47 
48 /*
49  * Compare hiballoc entries based on the address they manage.
50  *
51  * Since the address is fixed, relative to struct hiballoc_entry,
52  * we just compare the hiballoc_entry pointers.
53  */
54 static __inline int
55 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r)
56 {
57 	return l < r ? -1 : (l > r);
58 }
59 
60 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
61 
62 /*
63  * Given a hiballoc entry, return the address it manages.
64  */
65 static __inline void*
66 hib_entry_to_addr(struct hiballoc_entry *entry)
67 {
68 	caddr_t addr;
69 
70 	addr = (caddr_t)entry;
71 	addr += HIB_SIZEOF(struct hiballoc_entry);
72 	return addr;
73 }
74 
75 /*
76  * Given an address, find the hiballoc that corresponds.
77  */
78 static __inline struct hiballoc_entry*
79 hib_addr_to_entry(void* addr_param)
80 {
81 	caddr_t addr;
82 
83 	addr = (caddr_t)addr_param;
84 	addr -= HIB_SIZEOF(struct hiballoc_entry);
85 	return (struct hiballoc_entry*)addr;
86 }
87 
88 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
89 
90 /*
91  * Allocate memory from the arena.
92  *
93  * Returns NULL if no memory is available.
94  */
95 void*
96 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
97 {
98 	struct hiballoc_entry *entry, *new_entry;
99 	size_t find_sz;
100 
101 	/*
102 	 * Enforce alignment of HIB_ALIGN bytes.
103 	 *
104 	 * Note that, because the entry is put in front of the allocation,
105 	 * 0-byte allocations are guaranteed a unique address.
106 	 */
107 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
108 
109 	/*
110 	 * Find an entry with hibe_space >= find_sz.
111 	 *
112 	 * If the root node is not large enough, we switch to tree traversal.
113 	 * Because all entries are made at the bottom of the free space,
114 	 * traversal from the end has a slightly better chance of yielding
115 	 * a sufficiently large space.
116 	 */
117 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
118 	entry = RB_ROOT(&arena->hib_addrs);
119 	if (entry != NULL && entry->hibe_space < find_sz) {
120 		RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
121 			if (entry->hibe_space >= find_sz)
122 				break;
123 		}
124 	}
125 
126 	/*
127 	 * Insufficient or too fragmented memory.
128 	 */
129 	if (entry == NULL)
130 		return NULL;
131 
132 	/*
133 	 * Create new entry in allocated space.
134 	 */
135 	new_entry = (struct hiballoc_entry*)(
136 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
137 	new_entry->hibe_space = entry->hibe_space - find_sz;
138 	new_entry->hibe_use = alloc_sz;
139 
140 	/*
141 	 * Insert entry.
142 	 */
143 	if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
144 		panic("hib_alloc: insert failure");
145 	entry->hibe_space = 0;
146 
147 	/* Return address managed by entry. */
148 	return hib_entry_to_addr(new_entry);
149 }
150 
151 /*
152  * Free a pointer previously allocated from this arena.
153  *
154  * If addr is NULL, this will be silently accepted.
155  */
156 void
157 hib_free(struct hiballoc_arena *arena, void *addr)
158 {
159 	struct hiballoc_entry *entry, *prev;
160 
161 	if (addr == NULL)
162 		return;
163 
164 	/*
165 	 * Derive entry from addr and check it is really in this arena.
166 	 */
167 	entry = hib_addr_to_entry(addr);
168 	if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
169 		panic("hib_free: freed item %p not in hib arena", addr);
170 
171 	/*
172 	 * Give the space in entry to its predecessor.
173 	 *
174 	 * If entry has no predecessor, change its used space into free space
175 	 * instead.
176 	 */
177 	prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry);
178 	if (prev != NULL &&
179 	    (void*)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
180 	    prev->hibe_use + prev->hibe_space) == entry) {
181 		/* Merge entry. */
182 		RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
183 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
184 		    entry->hibe_use + entry->hibe_space;
185 	} else {
186 	  	/* Flip used memory to free space. */
187 		entry->hibe_space += entry->hibe_use;
188 		entry->hibe_use = 0;
189 	}
190 }
191 
192 /*
193  * Initialize hiballoc.
194  *
195  * The allocator will manage memmory at ptr, which is len bytes.
196  */
197 int
198 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
199 {
200 	struct hiballoc_entry *entry;
201 	caddr_t ptr;
202 	size_t len;
203 
204 	RB_INIT(&arena->hib_addrs);
205 
206 	/*
207 	 * Hib allocator enforces HIB_ALIGN alignment.
208 	 * Fixup ptr and len.
209 	 */
210 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
211 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
212 	len &= ~((size_t)HIB_ALIGN - 1);
213 
214 	/*
215 	 * Insufficient memory to be able to allocate and also do bookkeeping.
216 	 */
217 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
218 		return ENOMEM;
219 
220 	/*
221 	 * Create entry describing space.
222 	 */
223 	entry = (struct hiballoc_entry*)ptr;
224 	entry->hibe_use = 0;
225 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
226 	RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
227 
228 	return 0;
229 }
230 
231 
232 /*
233  * Zero all free memory.
234  */
235 void
236 uvm_pmr_zero_everything(void)
237 {
238 	struct uvm_pmemrange	*pmr;
239 	struct vm_page		*pg;
240 	int			 i;
241 
242 	uvm_lock_fpageq();
243 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
244 		/* Zero single pages. */
245 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
246 		    != NULL) {
247 			uvm_pmr_remove(pmr, pg);
248 			uvm_pagezero(pg);
249 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
250 			uvmexp.zeropages++;
251 			uvm_pmr_insert(pmr, pg, 0);
252 		}
253 
254 		/* Zero multi page ranges. */
255 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY]))
256 		    != NULL) {
257 			pg--; /* Size tree always has second page. */
258 			uvm_pmr_remove(pmr, pg);
259 			for (i = 0; i < pg->fpgsz; i++) {
260 				uvm_pagezero(&pg[i]);
261 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
262 				uvmexp.zeropages++;
263 			}
264 			uvm_pmr_insert(pmr, pg, 0);
265 		}
266 	}
267 	uvm_unlock_fpageq();
268 }
269 
270 /*
271  * Mark all memory as dirty.
272  *
273  * Used to inform the system that the clean memory isn't clean for some
274  * reason, for example because we just came back from hibernate.
275  */
276 void
277 uvm_pmr_dirty_everything(void)
278 {
279 	struct uvm_pmemrange	*pmr;
280 	struct vm_page		*pg;
281 	int			 i;
282 
283 	uvm_lock_fpageq();
284 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
285 		/* Dirty single pages. */
286 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
287 		    != NULL) {
288 			uvm_pmr_remove(pmr, pg);
289 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
290 			uvm_pmr_insert(pmr, pg, 0);
291 		}
292 
293 		/* Dirty multi page ranges. */
294 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_ZERO]))
295 		    != NULL) {
296 			pg--; /* Size tree always has second page. */
297 			uvm_pmr_remove(pmr, pg);
298 			for (i = 0; i < pg->fpgsz; i++)
299 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
300 			uvm_pmr_insert(pmr, pg, 0);
301 		}
302 	}
303 
304 	uvmexp.zeropages = 0;
305 	uvm_unlock_fpageq();
306 }
307 
308 /*
309  * Allocate the highest address that can hold sz.
310  *
311  * sz in bytes.
312  */
313 int
314 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz)
315 {
316 	struct uvm_pmemrange	*pmr;
317 	struct vm_page		*pig_pg, *pg;
318 
319 	/*
320 	 * Convert sz to pages, since that is what pmemrange uses internally.
321 	 */
322 	sz = atop(round_page(sz));
323 
324 	uvm_lock_fpageq();
325 
326 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
327 		RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) {
328 			if (pig_pg->fpgsz >= sz) {
329 				goto found;
330 			}
331 		}
332 	}
333 
334 	/*
335 	 * Allocation failure.
336 	 */
337 	uvm_unlock_pageq();
338 	return ENOMEM;
339 
340 found:
341 	/* Remove page from freelist. */
342 	uvm_pmr_remove_size(pmr, pig_pg);
343 	pig_pg->fpgsz -= sz;
344 	pg = pig_pg + pig_pg->fpgsz;
345 	if (pig_pg->fpgsz == 0)
346 		uvm_pmr_remove_addr(pmr, pig_pg);
347 	else
348 		uvm_pmr_insert_size(pmr, pig_pg);
349 
350 	uvmexp.free -= sz;
351 	*addr = VM_PAGE_TO_PHYS(pg);
352 
353 	/*
354 	 * Update pg flags.
355 	 *
356 	 * Note that we trash the sz argument now.
357 	 */
358 	while (sz > 0) {
359 		KASSERT(pg->pg_flags & PQ_FREE);
360 
361 		atomic_clearbits_int(&pg->pg_flags,
362 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
363 
364 		if (pg->pg_flags & PG_ZERO)
365 			uvmexp.zeropages -= sz;
366 		atomic_clearbits_int(&pg->pg_flags,
367 		    PG_ZERO|PQ_FREE);
368 
369 		pg->uobject = NULL;
370 		pg->uanon = NULL;
371 		pg->pg_version++;
372 
373 		/*
374 		 * Next.
375 		 */
376 		pg++;
377 		sz--;
378 	}
379 
380 	/* Return. */
381 	uvm_unlock_fpageq();
382 	return 0;
383 }
384 
385 /*
386  * Allocate a piglet area.
387  *
388  * This is as low as possible.
389  * Piglets are aligned.
390  *
391  * sz and align in bytes.
392  *
393  * The call will sleep for the pagedaemon to attempt to free memory.
394  * The pagedaemon may decide its not possible to free enough memory, causing
395  * the allocation to fail.
396  */
397 int
398 uvm_pmr_alloc_piglet(paddr_t *addr, psize_t sz, paddr_t align)
399 {
400 	vaddr_t			 pg_addr, piglet_addr;
401 	struct uvm_pmemrange	*pmr;
402 	struct vm_page		*pig_pg, *pg;
403 	struct pglist		 pageq;
404 	int			 pdaemon_woken;
405 
406 	KASSERT((align & (align - 1)) == 0);
407 	pdaemon_woken = 0; /* Didn't wake the pagedaemon. */
408 
409 	/*
410 	 * Fixup arguments: align must be at least PAGE_SIZE,
411 	 * sz will be converted to pagecount, since that is what
412 	 * pmemrange uses internally.
413 	 */
414 	if (align < PAGE_SIZE)
415 		align = PAGE_SIZE;
416 	sz = atop(round_page(sz));
417 
418 	uvm_lock_fpageq();
419 
420 	TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use,
421 	    pmr_use) {
422 retry:
423 		/*
424 		 * Search for a range with enough space.
425 		 * Use the address tree, to ensure the range is as low as
426 		 * possible.
427 		 */
428 		RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) {
429 			pg_addr = VM_PAGE_TO_PHYS(pig_pg);
430 			piglet_addr = (pg_addr + (align - 1)) & ~(align - 1);
431 
432 			if (pig_pg->fpgsz >= sz) {
433 				goto found;
434 			}
435 
436 			if (atop(pg_addr) + pig_pg->fpgsz >
437 			    atop(piglet_addr) + sz) {
438 				goto found;
439 			}
440 		}
441 
442 		/*
443 		 * Try to coerse the pagedaemon into freeing memory
444 		 * for the piglet.
445 		 *
446 		 * pdaemon_woken is set to prevent the code from
447 		 * falling into an endless loop.
448 		 */
449 		if (!pdaemon_woken) {
450 			pdaemon_woken = 1;
451 			if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1,
452 			    ptoa(sz), UVM_PLA_FAILOK) == 0)
453 				goto retry;
454 		}
455 	}
456 
457 	/* Return failure. */
458 	uvm_unlock_fpageq();
459 	return ENOMEM;
460 
461 found:
462 	/*
463 	 * Extract piglet from pigpen.
464 	 */
465 	TAILQ_INIT(&pageq);
466 	uvm_pmr_extract_range(pmr, pig_pg,
467 	    atop(piglet_addr), atop(piglet_addr) + sz, &pageq);
468 
469 	*addr = piglet_addr;
470 	uvmexp.free -= sz;
471 
472 	/*
473 	 * Update pg flags.
474 	 *
475 	 * Note that we trash the sz argument now.
476 	 */
477 	TAILQ_FOREACH(pg, &pageq, pageq) {
478 		KASSERT(pg->pg_flags & PQ_FREE);
479 
480 		atomic_clearbits_int(&pg->pg_flags,
481 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
482 
483 		if (pg->pg_flags & PG_ZERO)
484 			uvmexp.zeropages--;
485 		atomic_clearbits_int(&pg->pg_flags,
486 		    PG_ZERO|PQ_FREE);
487 
488 		pg->uobject = NULL;
489 		pg->uanon = NULL;
490 		pg->pg_version++;
491 	}
492 
493 	uvm_unlock_fpageq();
494 	return 0;
495 }
496 
497 /*
498  * Physmem RLE compression support.
499  *
500  * Given a physical page address, it will return the number of pages
501  * starting at the address, that are free.
502  * Returns 0 if the page at addr is not free.
503  */
504 psize_t
505 uvm_page_rle(paddr_t addr)
506 {
507 	struct vm_page		*pg, *pg_end;
508 	struct vm_physseg	*vmp;
509 	int			 pseg_idx, off_idx;
510 
511 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
512 	if (pseg_idx == -1)
513 		return 0;
514 
515 	vmp = &vm_physmem[pseg_idx];
516 	pg = &vmp->pgs[off_idx];
517 	if (!(pg->pg_flags & PQ_FREE))
518 		return 0;
519 
520 	/*
521 	 * Search for the first non-free page after pg.
522 	 * Note that the page may not be the first page in a free pmemrange,
523 	 * therefore pg->fpgsz cannot be used.
524 	 */
525 	for (pg_end = pg; pg_end <= vmp->lastpg &&
526 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++);
527 	return pg_end - pg;
528 }
529 
530 /*
531  * get_hibernate_info
532  *
533  * Fills out the hibernate_info union pointed to by hiber_info
534  * with information about this machine (swap signature block
535  * offsets, number of memory ranges, kernel in use, etc)
536  *
537  */
538 int
539 get_hibernate_info(union hibernate_info *hiber_info)
540 {
541 	int chunktable_size;
542 	struct disklabel dl;
543 	char err_string[128], *dl_ret;
544 
545 	/* Determine I/O function to use */
546 	hiber_info->io_func = get_hibernate_io_function();
547 	if (hiber_info->io_func == NULL)
548 		return (1);
549 
550 	/* Calculate hibernate device */
551 	hiber_info->device = swdevt[0].sw_dev;
552 
553 	/* Read disklabel (used to calculate signature and image offsets) */
554 	dl_ret = disk_readlabel(&dl, hiber_info->device, err_string, 128);
555 
556 	if (dl_ret) {
557 		printf("Hibernate error reading disklabel: %s\n", dl_ret);
558 		return (1);
559 	}
560 
561 	hiber_info->secsize = dl.d_secsize;
562 
563 	/* Make sure the signature can fit in one block */
564 	KASSERT(sizeof(union hibernate_info)/hiber_info->secsize == 1);
565 
566 	/* Calculate swap offset from start of disk */
567 	hiber_info->swap_offset = dl.d_partitions[1].p_offset;
568 
569 	/* Calculate signature block location */
570 	hiber_info->sig_offset = dl.d_partitions[1].p_offset +
571 		dl.d_partitions[1].p_size -
572 		sizeof(union hibernate_info)/hiber_info->secsize;
573 
574 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize;
575 
576 	/* Calculate memory image location */
577 	hiber_info->image_offset = dl.d_partitions[1].p_offset +
578 		dl.d_partitions[1].p_size -
579 		(hiber_info->image_size / hiber_info->secsize) -
580 		sizeof(union hibernate_info)/hiber_info->secsize -
581 		chunktable_size;
582 
583 	/* Stash kernel version information */
584 	bzero(&hiber_info->kernel_version, 128);
585 	bcopy(version, &hiber_info->kernel_version,
586 		min(strlen(version), sizeof(hiber_info->kernel_version)-1));
587 
588 	/* Allocate piglet region */
589 	if (uvm_pmr_alloc_piglet(&hiber_info->piglet_base, HIBERNATE_CHUNK_SIZE,
590 		HIBERNATE_CHUNK_SIZE)) {
591 		printf("Hibernate failed to allocate the piglet\n");
592 		return (1);
593 	}
594 
595 	return get_hibernate_info_md(hiber_info);
596 }
597