xref: /openbsd-src/sys/kern/subr_hibernate.c (revision 22378a8fccbf90e08b01c8bcacf9cb522db24e65)
1 /*	$OpenBSD: subr_hibernate.c,v 1.9 2011/07/09 00:27:31 mlarkin Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/hibernate.h>
20 #include <sys/param.h>
21 #include <sys/tree.h>
22 #include <sys/types.h>
23 #include <sys/systm.h>
24 #include <sys/disklabel.h>
25 #include <sys/conf.h>
26 #include <uvm/uvm.h>
27 #include <machine/hibernate.h>
28 
29 extern char *disk_readlabel(struct disklabel *, dev_t, char *, size_t);
30 
31 struct hibernate_state *hibernate_state;
32 
33 /*
34  * Hib alloc enforced alignment.
35  */
36 #define HIB_ALIGN		8 /* bytes alignment */
37 
38 /*
39  * sizeof builtin operation, but with alignment constraint.
40  */
41 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
42 
43 struct hiballoc_entry
44 {
45 	size_t			hibe_use;
46 	size_t			hibe_space;
47 	RB_ENTRY(hiballoc_entry) hibe_entry;
48 };
49 
50 /*
51  * Compare hiballoc entries based on the address they manage.
52  *
53  * Since the address is fixed, relative to struct hiballoc_entry,
54  * we just compare the hiballoc_entry pointers.
55  */
56 static __inline int
57 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r)
58 {
59 	return l < r ? -1 : (l > r);
60 }
61 
62 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
63 
64 /*
65  * Given a hiballoc entry, return the address it manages.
66  */
67 static __inline void*
68 hib_entry_to_addr(struct hiballoc_entry *entry)
69 {
70 	caddr_t addr;
71 
72 	addr = (caddr_t)entry;
73 	addr += HIB_SIZEOF(struct hiballoc_entry);
74 	return addr;
75 }
76 
77 /*
78  * Given an address, find the hiballoc that corresponds.
79  */
80 static __inline struct hiballoc_entry*
81 hib_addr_to_entry(void* addr_param)
82 {
83 	caddr_t addr;
84 
85 	addr = (caddr_t)addr_param;
86 	addr -= HIB_SIZEOF(struct hiballoc_entry);
87 	return (struct hiballoc_entry*)addr;
88 }
89 
90 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
91 
92 /*
93  * Allocate memory from the arena.
94  *
95  * Returns NULL if no memory is available.
96  */
97 void*
98 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
99 {
100 	struct hiballoc_entry *entry, *new_entry;
101 	size_t find_sz;
102 
103 	/*
104 	 * Enforce alignment of HIB_ALIGN bytes.
105 	 *
106 	 * Note that, because the entry is put in front of the allocation,
107 	 * 0-byte allocations are guaranteed a unique address.
108 	 */
109 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
110 
111 	/*
112 	 * Find an entry with hibe_space >= find_sz.
113 	 *
114 	 * If the root node is not large enough, we switch to tree traversal.
115 	 * Because all entries are made at the bottom of the free space,
116 	 * traversal from the end has a slightly better chance of yielding
117 	 * a sufficiently large space.
118 	 */
119 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
120 	entry = RB_ROOT(&arena->hib_addrs);
121 	if (entry != NULL && entry->hibe_space < find_sz) {
122 		RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
123 			if (entry->hibe_space >= find_sz)
124 				break;
125 		}
126 	}
127 
128 	/*
129 	 * Insufficient or too fragmented memory.
130 	 */
131 	if (entry == NULL)
132 		return NULL;
133 
134 	/*
135 	 * Create new entry in allocated space.
136 	 */
137 	new_entry = (struct hiballoc_entry*)(
138 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
139 	new_entry->hibe_space = entry->hibe_space - find_sz;
140 	new_entry->hibe_use = alloc_sz;
141 
142 	/*
143 	 * Insert entry.
144 	 */
145 	if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
146 		panic("hib_alloc: insert failure");
147 	entry->hibe_space = 0;
148 
149 	/* Return address managed by entry. */
150 	return hib_entry_to_addr(new_entry);
151 }
152 
153 /*
154  * Free a pointer previously allocated from this arena.
155  *
156  * If addr is NULL, this will be silently accepted.
157  */
158 void
159 hib_free(struct hiballoc_arena *arena, void *addr)
160 {
161 	struct hiballoc_entry *entry, *prev;
162 
163 	if (addr == NULL)
164 		return;
165 
166 	/*
167 	 * Derive entry from addr and check it is really in this arena.
168 	 */
169 	entry = hib_addr_to_entry(addr);
170 	if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
171 		panic("hib_free: freed item %p not in hib arena", addr);
172 
173 	/*
174 	 * Give the space in entry to its predecessor.
175 	 *
176 	 * If entry has no predecessor, change its used space into free space
177 	 * instead.
178 	 */
179 	prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry);
180 	if (prev != NULL &&
181 	    (void*)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
182 	    prev->hibe_use + prev->hibe_space) == entry) {
183 		/* Merge entry. */
184 		RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
185 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
186 		    entry->hibe_use + entry->hibe_space;
187 	} else {
188 	  	/* Flip used memory to free space. */
189 		entry->hibe_space += entry->hibe_use;
190 		entry->hibe_use = 0;
191 	}
192 }
193 
194 /*
195  * Initialize hiballoc.
196  *
197  * The allocator will manage memmory at ptr, which is len bytes.
198  */
199 int
200 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
201 {
202 	struct hiballoc_entry *entry;
203 	caddr_t ptr;
204 	size_t len;
205 
206 	RB_INIT(&arena->hib_addrs);
207 
208 	/*
209 	 * Hib allocator enforces HIB_ALIGN alignment.
210 	 * Fixup ptr and len.
211 	 */
212 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
213 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
214 	len &= ~((size_t)HIB_ALIGN - 1);
215 
216 	/*
217 	 * Insufficient memory to be able to allocate and also do bookkeeping.
218 	 */
219 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
220 		return ENOMEM;
221 
222 	/*
223 	 * Create entry describing space.
224 	 */
225 	entry = (struct hiballoc_entry*)ptr;
226 	entry->hibe_use = 0;
227 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
228 	RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
229 
230 	return 0;
231 }
232 
233 
234 /*
235  * Zero all free memory.
236  */
237 void
238 uvm_pmr_zero_everything(void)
239 {
240 	struct uvm_pmemrange	*pmr;
241 	struct vm_page		*pg;
242 	int			 i;
243 
244 	uvm_lock_fpageq();
245 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
246 		/* Zero single pages. */
247 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
248 		    != NULL) {
249 			uvm_pmr_remove(pmr, pg);
250 			uvm_pagezero(pg);
251 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
252 			uvmexp.zeropages++;
253 			uvm_pmr_insert(pmr, pg, 0);
254 		}
255 
256 		/* Zero multi page ranges. */
257 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY]))
258 		    != NULL) {
259 			pg--; /* Size tree always has second page. */
260 			uvm_pmr_remove(pmr, pg);
261 			for (i = 0; i < pg->fpgsz; i++) {
262 				uvm_pagezero(&pg[i]);
263 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
264 				uvmexp.zeropages++;
265 			}
266 			uvm_pmr_insert(pmr, pg, 0);
267 		}
268 	}
269 	uvm_unlock_fpageq();
270 }
271 
272 /*
273  * Mark all memory as dirty.
274  *
275  * Used to inform the system that the clean memory isn't clean for some
276  * reason, for example because we just came back from hibernate.
277  */
278 void
279 uvm_pmr_dirty_everything(void)
280 {
281 	struct uvm_pmemrange	*pmr;
282 	struct vm_page		*pg;
283 	int			 i;
284 
285 	uvm_lock_fpageq();
286 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
287 		/* Dirty single pages. */
288 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
289 		    != NULL) {
290 			uvm_pmr_remove(pmr, pg);
291 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
292 			uvm_pmr_insert(pmr, pg, 0);
293 		}
294 
295 		/* Dirty multi page ranges. */
296 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_ZERO]))
297 		    != NULL) {
298 			pg--; /* Size tree always has second page. */
299 			uvm_pmr_remove(pmr, pg);
300 			for (i = 0; i < pg->fpgsz; i++)
301 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
302 			uvm_pmr_insert(pmr, pg, 0);
303 		}
304 	}
305 
306 	uvmexp.zeropages = 0;
307 	uvm_unlock_fpageq();
308 }
309 
310 /*
311  * Allocate the highest address that can hold sz.
312  *
313  * sz in bytes.
314  */
315 int
316 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz)
317 {
318 	struct uvm_pmemrange	*pmr;
319 	struct vm_page		*pig_pg, *pg;
320 
321 	/*
322 	 * Convert sz to pages, since that is what pmemrange uses internally.
323 	 */
324 	sz = atop(round_page(sz));
325 
326 	uvm_lock_fpageq();
327 
328 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
329 		RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) {
330 			if (pig_pg->fpgsz >= sz) {
331 				goto found;
332 			}
333 		}
334 	}
335 
336 	/*
337 	 * Allocation failure.
338 	 */
339 	uvm_unlock_pageq();
340 	return ENOMEM;
341 
342 found:
343 	/* Remove page from freelist. */
344 	uvm_pmr_remove_size(pmr, pig_pg);
345 	pig_pg->fpgsz -= sz;
346 	pg = pig_pg + pig_pg->fpgsz;
347 	if (pig_pg->fpgsz == 0)
348 		uvm_pmr_remove_addr(pmr, pig_pg);
349 	else
350 		uvm_pmr_insert_size(pmr, pig_pg);
351 
352 	uvmexp.free -= sz;
353 	*addr = VM_PAGE_TO_PHYS(pg);
354 
355 	/*
356 	 * Update pg flags.
357 	 *
358 	 * Note that we trash the sz argument now.
359 	 */
360 	while (sz > 0) {
361 		KASSERT(pg->pg_flags & PQ_FREE);
362 
363 		atomic_clearbits_int(&pg->pg_flags,
364 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
365 
366 		if (pg->pg_flags & PG_ZERO)
367 			uvmexp.zeropages -= sz;
368 		atomic_clearbits_int(&pg->pg_flags,
369 		    PG_ZERO|PQ_FREE);
370 
371 		pg->uobject = NULL;
372 		pg->uanon = NULL;
373 		pg->pg_version++;
374 
375 		/*
376 		 * Next.
377 		 */
378 		pg++;
379 		sz--;
380 	}
381 
382 	/* Return. */
383 	uvm_unlock_fpageq();
384 	return 0;
385 }
386 
387 /*
388  * Allocate a piglet area.
389  *
390  * This is as low as possible.
391  * Piglets are aligned.
392  *
393  * sz and align in bytes.
394  *
395  * The call will sleep for the pagedaemon to attempt to free memory.
396  * The pagedaemon may decide its not possible to free enough memory, causing
397  * the allocation to fail.
398  */
399 int
400 uvm_pmr_alloc_piglet(paddr_t *addr, psize_t sz, paddr_t align)
401 {
402 	vaddr_t			 pg_addr, piglet_addr;
403 	struct uvm_pmemrange	*pmr;
404 	struct vm_page		*pig_pg, *pg;
405 	struct pglist		 pageq;
406 	int			 pdaemon_woken;
407 
408 	KASSERT((align & (align - 1)) == 0);
409 	pdaemon_woken = 0; /* Didn't wake the pagedaemon. */
410 
411 	/*
412 	 * Fixup arguments: align must be at least PAGE_SIZE,
413 	 * sz will be converted to pagecount, since that is what
414 	 * pmemrange uses internally.
415 	 */
416 	if (align < PAGE_SIZE)
417 		align = PAGE_SIZE;
418 	sz = atop(round_page(sz));
419 
420 	uvm_lock_fpageq();
421 
422 	TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use,
423 	    pmr_use) {
424 retry:
425 		/*
426 		 * Search for a range with enough space.
427 		 * Use the address tree, to ensure the range is as low as
428 		 * possible.
429 		 */
430 		RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) {
431 			pg_addr = VM_PAGE_TO_PHYS(pig_pg);
432 			piglet_addr = (pg_addr + (align - 1)) & ~(align - 1);
433 
434 			if (pig_pg->fpgsz >= sz) {
435 				goto found;
436 			}
437 
438 			if (atop(pg_addr) + pig_pg->fpgsz >
439 			    atop(piglet_addr) + sz) {
440 				goto found;
441 			}
442 		}
443 
444 		/*
445 		 * Try to coerse the pagedaemon into freeing memory
446 		 * for the piglet.
447 		 *
448 		 * pdaemon_woken is set to prevent the code from
449 		 * falling into an endless loop.
450 		 */
451 		if (!pdaemon_woken) {
452 			pdaemon_woken = 1;
453 			if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1,
454 			    ptoa(sz), UVM_PLA_FAILOK) == 0)
455 				goto retry;
456 		}
457 	}
458 
459 	/* Return failure. */
460 	uvm_unlock_fpageq();
461 	return ENOMEM;
462 
463 found:
464 	/*
465 	 * Extract piglet from pigpen.
466 	 */
467 	TAILQ_INIT(&pageq);
468 	uvm_pmr_extract_range(pmr, pig_pg,
469 	    atop(piglet_addr), atop(piglet_addr) + sz, &pageq);
470 
471 	*addr = piglet_addr;
472 	uvmexp.free -= sz;
473 
474 	/*
475 	 * Update pg flags.
476 	 *
477 	 * Note that we trash the sz argument now.
478 	 */
479 	TAILQ_FOREACH(pg, &pageq, pageq) {
480 		KASSERT(pg->pg_flags & PQ_FREE);
481 
482 		atomic_clearbits_int(&pg->pg_flags,
483 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
484 
485 		if (pg->pg_flags & PG_ZERO)
486 			uvmexp.zeropages--;
487 		atomic_clearbits_int(&pg->pg_flags,
488 		    PG_ZERO|PQ_FREE);
489 
490 		pg->uobject = NULL;
491 		pg->uanon = NULL;
492 		pg->pg_version++;
493 	}
494 
495 	uvm_unlock_fpageq();
496 	return 0;
497 }
498 
499 /*
500  * Physmem RLE compression support.
501  *
502  * Given a physical page address, it will return the number of pages
503  * starting at the address, that are free.
504  * Returns 0 if the page at addr is not free.
505  */
506 psize_t
507 uvm_page_rle(paddr_t addr)
508 {
509 	struct vm_page		*pg, *pg_end;
510 	struct vm_physseg	*vmp;
511 	int			 pseg_idx, off_idx;
512 
513 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
514 	if (pseg_idx == -1)
515 		return 0;
516 
517 	vmp = &vm_physmem[pseg_idx];
518 	pg = &vmp->pgs[off_idx];
519 	if (!(pg->pg_flags & PQ_FREE))
520 		return 0;
521 
522 	/*
523 	 * Search for the first non-free page after pg.
524 	 * Note that the page may not be the first page in a free pmemrange,
525 	 * therefore pg->fpgsz cannot be used.
526 	 */
527 	for (pg_end = pg; pg_end <= vmp->lastpg &&
528 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++);
529 	return pg_end - pg;
530 }
531 
532 /*
533  * get_hibernate_info
534  *
535  * Fills out the hibernate_info union pointed to by hiber_info
536  * with information about this machine (swap signature block
537  * offsets, number of memory ranges, kernel in use, etc)
538  *
539  */
540 int
541 get_hibernate_info(union hibernate_info *hiber_info)
542 {
543 	int chunktable_size;
544 	struct disklabel dl;
545 	char err_string[128], *dl_ret;
546 
547 	/* Determine I/O function to use */
548 	hiber_info->io_func = get_hibernate_io_function();
549 	if (hiber_info->io_func == NULL)
550 		return (1);
551 
552 	/* Calculate hibernate device */
553 	hiber_info->device = swdevt[0].sw_dev;
554 
555 	/* Read disklabel (used to calculate signature and image offsets) */
556 	dl_ret = disk_readlabel(&dl, hiber_info->device, err_string, 128);
557 
558 	if (dl_ret) {
559 		printf("Hibernate error reading disklabel: %s\n", dl_ret);
560 		return (1);
561 	}
562 
563 	hiber_info->secsize = dl.d_secsize;
564 
565 	/* Make sure the signature can fit in one block */
566 	KASSERT(sizeof(union hibernate_info)/hiber_info->secsize == 1);
567 
568 	/* Calculate swap offset from start of disk */
569 	hiber_info->swap_offset = dl.d_partitions[1].p_offset;
570 
571 	/* Calculate signature block location */
572 	hiber_info->sig_offset = dl.d_partitions[1].p_offset +
573 		dl.d_partitions[1].p_size -
574 		sizeof(union hibernate_info)/hiber_info->secsize;
575 
576 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize;
577 
578 	/* Calculate memory image location */
579 	hiber_info->image_offset = dl.d_partitions[1].p_offset +
580 		dl.d_partitions[1].p_size -
581 		(hiber_info->image_size / hiber_info->secsize) -
582 		sizeof(union hibernate_info)/hiber_info->secsize -
583 		chunktable_size;
584 
585 	/* Stash kernel version information */
586 	bzero(&hiber_info->kernel_version, 128);
587 	bcopy(version, &hiber_info->kernel_version,
588 		min(strlen(version), sizeof(hiber_info->kernel_version)-1));
589 
590 	/* Allocate piglet region */
591 	if (uvm_pmr_alloc_piglet(&hiber_info->piglet_base, HIBERNATE_CHUNK_SIZE,
592 		HIBERNATE_CHUNK_SIZE)) {
593 		printf("Hibernate failed to allocate the piglet\n");
594 		return (1);
595 	}
596 
597 	return get_hibernate_info_md(hiber_info);
598 }
599 
600 /*
601  * hibernate_zlib_alloc
602  *
603  * Allocate nitems*size bytes from the hiballoc area presently in use
604  *
605  */
606 void
607 *hibernate_zlib_alloc(void *unused, int nitems, int size)
608 {
609 	return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size);
610 }
611 
612 /*
613  * hibernate_zlib_free
614  *
615  * Free the memory pointed to by addr in the hiballoc area presently in
616  * use
617  *
618  */
619 void
620 hibernate_zlib_free(void *unused, void *addr)
621 {
622 	hib_free(&hibernate_state->hiballoc_arena, addr);
623 }
624