xref: /openbsd-src/sys/kern/subr_hibernate.c (revision a3fa4d810652db178e9298d2bdda44611f2dfcde)
1 /*	$OpenBSD: subr_hibernate.c,v 1.66 2013/10/20 17:16:47 mlarkin Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/hibernate.h>
21 #include <sys/malloc.h>
22 #include <sys/param.h>
23 #include <sys/tree.h>
24 #include <sys/systm.h>
25 #include <sys/disklabel.h>
26 #include <sys/disk.h>
27 #include <sys/conf.h>
28 #include <sys/buf.h>
29 #include <sys/fcntl.h>
30 #include <sys/stat.h>
31 #include <uvm/uvm.h>
32 #include <uvm/uvm_swap.h>
33 #include <machine/hibernate.h>
34 
35 /*
36  * Hibernate piglet layout information
37  *
38  * The piglet is a scratch area of memory allocated by the suspending kernel.
39  * Its phys and virt addrs are recorded in the signature block. The piglet is
40  * used to guarantee an unused area of memory that can be used by the resuming
41  * kernel for various things. The piglet is excluded during unpack operations.
42  * The piglet size is presently 3*HIBERNATE_CHUNK_SIZE (typically 3*4MB).
43  *
44  * Offset from piglet_base	Purpose
45  * ----------------------------------------------------------------------------
46  * 0				I/O page used during resume
47  * 1*PAGE_SIZE		 	I/O page used during hibernate suspend
48  * 2*PAGE_SIZE		 	I/O page used during hibernate suspend
49  * 3*PAGE_SIZE			copy page used during hibernate suspend
50  * 4*PAGE_SIZE			final chunk ordering list (8 pages)
51  * 12*PAGE_SIZE			piglet chunk ordering list (8 pages)
52  * 20*PAGE_SIZE			temp chunk ordering list (8 pages)
53  * 28*PAGE_SIZE			start of hiballoc area
54  * 108*PAGE_SIZE		end of hiballoc area (80 pages)
55  * ...				unused
56  * HIBERNATE_CHUNK_SIZE		start of hibernate chunk table
57  * 2*HIBERNATE_CHUNK_SIZE	bounce area for chunks being unpacked
58  * 3*HIBERNATE_CHUNK_SIZE	end of piglet
59  */
60 
61 /* Temporary vaddr ranges used during hibernate */
62 vaddr_t hibernate_temp_page;
63 vaddr_t hibernate_copy_page;
64 
65 /* Hibernate info as read from disk during resume */
66 union hibernate_info disk_hiber_info;
67 paddr_t global_pig_start;
68 vaddr_t global_piglet_va;
69 
70 /* #define HIB_DEBUG */
71 #ifdef HIB_DEBUG
72 int	hib_debug = 99;
73 #define DPRINTF(x...)     do { if (hib_debug) printf(x); } while (0)
74 #define DNPRINTF(n,x...)  do { if (hib_debug > (n)) printf(x); } while (0)
75 #else
76 #define DPRINTF(x...)
77 #define DNPRINTF(n,x...)
78 #endif
79 
80 void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t);
81 
82 /*
83  * Hib alloc enforced alignment.
84  */
85 #define HIB_ALIGN		8 /* bytes alignment */
86 
87 /*
88  * sizeof builtin operation, but with alignment constraint.
89  */
90 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
91 
92 struct hiballoc_entry {
93 	size_t			hibe_use;
94 	size_t			hibe_space;
95 	RB_ENTRY(hiballoc_entry) hibe_entry;
96 };
97 
98 /*
99  * Compare hiballoc entries based on the address they manage.
100  *
101  * Since the address is fixed, relative to struct hiballoc_entry,
102  * we just compare the hiballoc_entry pointers.
103  */
104 static __inline int
105 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r)
106 {
107 	return l < r ? -1 : (l > r);
108 }
109 
110 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
111 
112 /*
113  * Given a hiballoc entry, return the address it manages.
114  */
115 static __inline void *
116 hib_entry_to_addr(struct hiballoc_entry *entry)
117 {
118 	caddr_t addr;
119 
120 	addr = (caddr_t)entry;
121 	addr += HIB_SIZEOF(struct hiballoc_entry);
122 	return addr;
123 }
124 
125 /*
126  * Given an address, find the hiballoc that corresponds.
127  */
128 static __inline struct hiballoc_entry*
129 hib_addr_to_entry(void *addr_param)
130 {
131 	caddr_t addr;
132 
133 	addr = (caddr_t)addr_param;
134 	addr -= HIB_SIZEOF(struct hiballoc_entry);
135 	return (struct hiballoc_entry*)addr;
136 }
137 
138 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
139 
140 /*
141  * Allocate memory from the arena.
142  *
143  * Returns NULL if no memory is available.
144  */
145 void *
146 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
147 {
148 	struct hiballoc_entry *entry, *new_entry;
149 	size_t find_sz;
150 
151 	/*
152 	 * Enforce alignment of HIB_ALIGN bytes.
153 	 *
154 	 * Note that, because the entry is put in front of the allocation,
155 	 * 0-byte allocations are guaranteed a unique address.
156 	 */
157 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
158 
159 	/*
160 	 * Find an entry with hibe_space >= find_sz.
161 	 *
162 	 * If the root node is not large enough, we switch to tree traversal.
163 	 * Because all entries are made at the bottom of the free space,
164 	 * traversal from the end has a slightly better chance of yielding
165 	 * a sufficiently large space.
166 	 */
167 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
168 	entry = RB_ROOT(&arena->hib_addrs);
169 	if (entry != NULL && entry->hibe_space < find_sz) {
170 		RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
171 			if (entry->hibe_space >= find_sz)
172 				break;
173 		}
174 	}
175 
176 	/*
177 	 * Insufficient or too fragmented memory.
178 	 */
179 	if (entry == NULL)
180 		return NULL;
181 
182 	/*
183 	 * Create new entry in allocated space.
184 	 */
185 	new_entry = (struct hiballoc_entry*)(
186 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
187 	new_entry->hibe_space = entry->hibe_space - find_sz;
188 	new_entry->hibe_use = alloc_sz;
189 
190 	/*
191 	 * Insert entry.
192 	 */
193 	if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
194 		panic("hib_alloc: insert failure");
195 	entry->hibe_space = 0;
196 
197 	/* Return address managed by entry. */
198 	return hib_entry_to_addr(new_entry);
199 }
200 
201 /*
202  * Free a pointer previously allocated from this arena.
203  *
204  * If addr is NULL, this will be silently accepted.
205  */
206 void
207 hib_free(struct hiballoc_arena *arena, void *addr)
208 {
209 	struct hiballoc_entry *entry, *prev;
210 
211 	if (addr == NULL)
212 		return;
213 
214 	/*
215 	 * Derive entry from addr and check it is really in this arena.
216 	 */
217 	entry = hib_addr_to_entry(addr);
218 	if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
219 		panic("hib_free: freed item %p not in hib arena", addr);
220 
221 	/*
222 	 * Give the space in entry to its predecessor.
223 	 *
224 	 * If entry has no predecessor, change its used space into free space
225 	 * instead.
226 	 */
227 	prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry);
228 	if (prev != NULL &&
229 	    (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
230 	    prev->hibe_use + prev->hibe_space) == entry) {
231 		/* Merge entry. */
232 		RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
233 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
234 		    entry->hibe_use + entry->hibe_space;
235 	} else {
236 		/* Flip used memory to free space. */
237 		entry->hibe_space += entry->hibe_use;
238 		entry->hibe_use = 0;
239 	}
240 }
241 
242 /*
243  * Initialize hiballoc.
244  *
245  * The allocator will manage memmory at ptr, which is len bytes.
246  */
247 int
248 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
249 {
250 	struct hiballoc_entry *entry;
251 	caddr_t ptr;
252 	size_t len;
253 
254 	RB_INIT(&arena->hib_addrs);
255 
256 	/*
257 	 * Hib allocator enforces HIB_ALIGN alignment.
258 	 * Fixup ptr and len.
259 	 */
260 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
261 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
262 	len &= ~((size_t)HIB_ALIGN - 1);
263 
264 	/*
265 	 * Insufficient memory to be able to allocate and also do bookkeeping.
266 	 */
267 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
268 		return ENOMEM;
269 
270 	/*
271 	 * Create entry describing space.
272 	 */
273 	entry = (struct hiballoc_entry*)ptr;
274 	entry->hibe_use = 0;
275 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
276 	RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
277 
278 	return 0;
279 }
280 
281 /*
282  * Zero all free memory.
283  */
284 void
285 uvm_pmr_zero_everything(void)
286 {
287 	struct uvm_pmemrange	*pmr;
288 	struct vm_page		*pg;
289 	int			 i;
290 
291 	uvm_lock_fpageq();
292 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
293 		/* Zero single pages. */
294 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
295 		    != NULL) {
296 			uvm_pmr_remove(pmr, pg);
297 			uvm_pagezero(pg);
298 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
299 			uvmexp.zeropages++;
300 			uvm_pmr_insert(pmr, pg, 0);
301 		}
302 
303 		/* Zero multi page ranges. */
304 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY]))
305 		    != NULL) {
306 			pg--; /* Size tree always has second page. */
307 			uvm_pmr_remove(pmr, pg);
308 			for (i = 0; i < pg->fpgsz; i++) {
309 				uvm_pagezero(&pg[i]);
310 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
311 				uvmexp.zeropages++;
312 			}
313 			uvm_pmr_insert(pmr, pg, 0);
314 		}
315 	}
316 	uvm_unlock_fpageq();
317 }
318 
319 /*
320  * Mark all memory as dirty.
321  *
322  * Used to inform the system that the clean memory isn't clean for some
323  * reason, for example because we just came back from hibernate.
324  */
325 void
326 uvm_pmr_dirty_everything(void)
327 {
328 	struct uvm_pmemrange	*pmr;
329 	struct vm_page		*pg;
330 	int			 i;
331 
332 	uvm_lock_fpageq();
333 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
334 		/* Dirty single pages. */
335 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
336 		    != NULL) {
337 			uvm_pmr_remove(pmr, pg);
338 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
339 			uvm_pmr_insert(pmr, pg, 0);
340 		}
341 
342 		/* Dirty multi page ranges. */
343 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_ZERO]))
344 		    != NULL) {
345 			pg--; /* Size tree always has second page. */
346 			uvm_pmr_remove(pmr, pg);
347 			for (i = 0; i < pg->fpgsz; i++)
348 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
349 			uvm_pmr_insert(pmr, pg, 0);
350 		}
351 	}
352 
353 	uvmexp.zeropages = 0;
354 	uvm_unlock_fpageq();
355 }
356 
357 /*
358  * Allocate the highest address that can hold sz.
359  *
360  * sz in bytes.
361  */
362 int
363 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz)
364 {
365 	struct uvm_pmemrange	*pmr;
366 	struct vm_page		*pig_pg, *pg;
367 
368 	/*
369 	 * Convert sz to pages, since that is what pmemrange uses internally.
370 	 */
371 	sz = atop(round_page(sz));
372 
373 	uvm_lock_fpageq();
374 
375 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
376 		RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) {
377 			if (pig_pg->fpgsz >= sz) {
378 				goto found;
379 			}
380 		}
381 	}
382 
383 	/*
384 	 * Allocation failure.
385 	 */
386 	uvm_unlock_fpageq();
387 	return ENOMEM;
388 
389 found:
390 	/* Remove page from freelist. */
391 	uvm_pmr_remove_size(pmr, pig_pg);
392 	pig_pg->fpgsz -= sz;
393 	pg = pig_pg + pig_pg->fpgsz;
394 	if (pig_pg->fpgsz == 0)
395 		uvm_pmr_remove_addr(pmr, pig_pg);
396 	else
397 		uvm_pmr_insert_size(pmr, pig_pg);
398 
399 	uvmexp.free -= sz;
400 	*addr = VM_PAGE_TO_PHYS(pg);
401 
402 	/*
403 	 * Update pg flags.
404 	 *
405 	 * Note that we trash the sz argument now.
406 	 */
407 	while (sz > 0) {
408 		KASSERT(pg->pg_flags & PQ_FREE);
409 
410 		atomic_clearbits_int(&pg->pg_flags,
411 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
412 
413 		if (pg->pg_flags & PG_ZERO)
414 			uvmexp.zeropages -= sz;
415 		atomic_clearbits_int(&pg->pg_flags,
416 		    PG_ZERO|PQ_FREE);
417 
418 		pg->uobject = NULL;
419 		pg->uanon = NULL;
420 		pg->pg_version++;
421 
422 		/*
423 		 * Next.
424 		 */
425 		pg++;
426 		sz--;
427 	}
428 
429 	/* Return. */
430 	uvm_unlock_fpageq();
431 	return 0;
432 }
433 
434 /*
435  * Allocate a piglet area.
436  *
437  * This is as low as possible.
438  * Piglets are aligned.
439  *
440  * sz and align in bytes.
441  *
442  * The call will sleep for the pagedaemon to attempt to free memory.
443  * The pagedaemon may decide its not possible to free enough memory, causing
444  * the allocation to fail.
445  */
446 int
447 uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align)
448 {
449 	paddr_t			 pg_addr, piglet_addr;
450 	struct uvm_pmemrange	*pmr;
451 	struct vm_page		*pig_pg, *pg;
452 	struct pglist		 pageq;
453 	int			 pdaemon_woken;
454 	vaddr_t			 piglet_va;
455 
456 	/* Ensure align is a power of 2 */
457 	KASSERT((align & (align - 1)) == 0);
458 
459 	pdaemon_woken = 0; /* Didn't wake the pagedaemon. */
460 
461 	/*
462 	 * Fixup arguments: align must be at least PAGE_SIZE,
463 	 * sz will be converted to pagecount, since that is what
464 	 * pmemrange uses internally.
465 	 */
466 	if (align < PAGE_SIZE)
467 		align = PAGE_SIZE;
468 	sz = round_page(sz);
469 
470 	uvm_lock_fpageq();
471 
472 	TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use,
473 	    pmr_use) {
474 retry:
475 		/*
476 		 * Search for a range with enough space.
477 		 * Use the address tree, to ensure the range is as low as
478 		 * possible.
479 		 */
480 		RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) {
481 			pg_addr = VM_PAGE_TO_PHYS(pig_pg);
482 			piglet_addr = (pg_addr + (align - 1)) & ~(align - 1);
483 
484 			if (atop(pg_addr) + pig_pg->fpgsz >=
485 			    atop(piglet_addr) + atop(sz))
486 				goto found;
487 		}
488 	}
489 
490 	/*
491 	 * Try to coerce the pagedaemon into freeing memory
492 	 * for the piglet.
493 	 *
494 	 * pdaemon_woken is set to prevent the code from
495 	 * falling into an endless loop.
496 	 */
497 	if (!pdaemon_woken) {
498 		pdaemon_woken = 1;
499 		if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1,
500 		    sz, UVM_PLA_FAILOK) == 0)
501 			goto retry;
502 	}
503 
504 	/* Return failure. */
505 	uvm_unlock_fpageq();
506 	return ENOMEM;
507 
508 found:
509 	/*
510 	 * Extract piglet from pigpen.
511 	 */
512 	TAILQ_INIT(&pageq);
513 	uvm_pmr_extract_range(pmr, pig_pg,
514 	    atop(piglet_addr), atop(piglet_addr) + atop(sz), &pageq);
515 
516 	*pa = piglet_addr;
517 	uvmexp.free -= atop(sz);
518 
519 	/*
520 	 * Update pg flags.
521 	 *
522 	 * Note that we trash the sz argument now.
523 	 */
524 	TAILQ_FOREACH(pg, &pageq, pageq) {
525 		KASSERT(pg->pg_flags & PQ_FREE);
526 
527 		atomic_clearbits_int(&pg->pg_flags,
528 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
529 
530 		if (pg->pg_flags & PG_ZERO)
531 			uvmexp.zeropages--;
532 		atomic_clearbits_int(&pg->pg_flags,
533 		    PG_ZERO|PQ_FREE);
534 
535 		pg->uobject = NULL;
536 		pg->uanon = NULL;
537 		pg->pg_version++;
538 	}
539 
540 	uvm_unlock_fpageq();
541 
542 	/*
543 	 * Now allocate a va.
544 	 * Use direct mappings for the pages.
545 	 */
546 
547 	piglet_va = *va = (vaddr_t)km_alloc(sz, &kv_any, &kp_none, &kd_waitok);
548 	if (!piglet_va) {
549 		uvm_pglistfree(&pageq);
550 		return ENOMEM;
551 	}
552 
553 	/*
554 	 * Map piglet to va.
555 	 */
556 	TAILQ_FOREACH(pg, &pageq, pageq) {
557 		pmap_kenter_pa(piglet_va, VM_PAGE_TO_PHYS(pg), UVM_PROT_RW);
558 		piglet_va += PAGE_SIZE;
559 	}
560 	pmap_update(pmap_kernel());
561 
562 	return 0;
563 }
564 
565 /*
566  * Free a piglet area.
567  */
568 void
569 uvm_pmr_free_piglet(vaddr_t va, vsize_t sz)
570 {
571 	paddr_t			 pa;
572 	struct vm_page		*pg;
573 
574 	/*
575 	 * Fix parameters.
576 	 */
577 	sz = round_page(sz);
578 
579 	/*
580 	 * Find the first page in piglet.
581 	 * Since piglets are contiguous, the first pg is all we need.
582 	 */
583 	if (!pmap_extract(pmap_kernel(), va, &pa))
584 		panic("uvm_pmr_free_piglet: piglet 0x%lx has no pages", va);
585 	pg = PHYS_TO_VM_PAGE(pa);
586 	if (pg == NULL)
587 		panic("uvm_pmr_free_piglet: unmanaged page 0x%lx", pa);
588 
589 	/*
590 	 * Unmap.
591 	 */
592 	pmap_kremove(va, sz);
593 	pmap_update(pmap_kernel());
594 
595 	/*
596 	 * Free the physical and virtual memory.
597 	 */
598 	uvm_pmr_freepages(pg, atop(sz));
599 	km_free((void *)va, sz, &kv_any, &kp_none);
600 }
601 
602 /*
603  * Physmem RLE compression support.
604  *
605  * Given a physical page address, return the number of pages starting at the
606  * address that are free.  Clamps to the number of pages in
607  * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free.
608  */
609 int
610 uvm_page_rle(paddr_t addr)
611 {
612 	struct vm_page		*pg, *pg_end;
613 	struct vm_physseg	*vmp;
614 	int			 pseg_idx, off_idx;
615 
616 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
617 	if (pseg_idx == -1)
618 		return 0;
619 
620 	vmp = &vm_physmem[pseg_idx];
621 	pg = &vmp->pgs[off_idx];
622 	if (!(pg->pg_flags & PQ_FREE))
623 		return 0;
624 
625 	/*
626 	 * Search for the first non-free page after pg.
627 	 * Note that the page may not be the first page in a free pmemrange,
628 	 * therefore pg->fpgsz cannot be used.
629 	 */
630 	for (pg_end = pg; pg_end <= vmp->lastpg &&
631 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++)
632 		;
633 	return min((pg_end - pg), HIBERNATE_CHUNK_SIZE/PAGE_SIZE);
634 }
635 
636 /*
637  * Fills out the hibernate_info union pointed to by hiber_info
638  * with information about this machine (swap signature block
639  * offsets, number of memory ranges, kernel in use, etc)
640  */
641 int
642 get_hibernate_info(union hibernate_info *hiber_info, int suspend)
643 {
644 	int chunktable_size;
645 	struct disklabel dl;
646 	char err_string[128], *dl_ret;
647 
648 	/* Determine I/O function to use */
649 	hiber_info->io_func = get_hibernate_io_function();
650 	if (hiber_info->io_func == NULL)
651 		return (1);
652 
653 	/* Calculate hibernate device */
654 	hiber_info->device = swdevt[0].sw_dev;
655 
656 	/* Read disklabel (used to calculate signature and image offsets) */
657 	dl_ret = disk_readlabel(&dl, hiber_info->device, err_string, 128);
658 
659 	if (dl_ret) {
660 		printf("Hibernate error reading disklabel: %s\n", dl_ret);
661 		return (1);
662 	}
663 
664 	/* Make sure we have a swap partition. */
665 	if (dl.d_partitions[1].p_fstype != FS_SWAP ||
666 	    dl.d_partitions[1].p_size == 0)
667 		return (1);
668 
669 	hiber_info->secsize = dl.d_secsize;
670 
671 	/* Make sure the signature can fit in one block */
672 	if(sizeof(union hibernate_info) > hiber_info->secsize)
673 		return (1);
674 
675 	/* Magic number */
676 	hiber_info->magic = HIBERNATE_MAGIC;
677 
678 	/* Calculate swap offset from start of disk */
679 	hiber_info->swap_offset = dl.d_partitions[1].p_offset;
680 
681 	/* Calculate signature block location */
682 	hiber_info->sig_offset = dl.d_partitions[1].p_offset +
683 	    dl.d_partitions[1].p_size -
684 	    sizeof(union hibernate_info)/hiber_info->secsize;
685 
686 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize;
687 
688 	/* Stash kernel version information */
689 	bzero(&hiber_info->kernel_version, 128);
690 	bcopy(version, &hiber_info->kernel_version,
691 	    min(strlen(version), sizeof(hiber_info->kernel_version)-1));
692 
693 	if (suspend) {
694 		/* Allocate piglet region */
695 		if (uvm_pmr_alloc_piglet(&hiber_info->piglet_va,
696 		    &hiber_info->piglet_pa, HIBERNATE_CHUNK_SIZE*3,
697 		    HIBERNATE_CHUNK_SIZE)) {
698 			printf("Hibernate failed to allocate the piglet\n");
699 			return (1);
700 		}
701 		hiber_info->io_page = (void *)hiber_info->piglet_va;
702 
703 		/*
704 		 * Initialization of the hibernate IO function for drivers
705 		 * that need to do prep work (such as allocating memory or
706 		 * setting up data structures that cannot safely be done
707 		 * during suspend without causing side effects). There is
708 		 * a matching HIB_DONE call performed after the write is
709 		 * completed.
710 		 */
711 		if (hiber_info->io_func(hiber_info->device, 0,
712 		    (vaddr_t)NULL, 0, HIB_INIT, hiber_info->io_page))
713 			goto fail;
714 
715 	} else {
716 		/*
717 		 * Resuming kernels use a regular I/O page since we won't
718 		 * have access to the suspended kernel's piglet VA at this
719 		 * point. No need to free this I/O page as it will vanish
720 		 * as part of the resume.
721 		 */
722 		hiber_info->io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
723 		if (!hiber_info->io_page)
724 			return (1);
725 	}
726 
727 
728 	if (get_hibernate_info_md(hiber_info))
729 		goto fail;
730 
731 	/* Calculate memory image location in swap */
732 	hiber_info->image_offset = dl.d_partitions[1].p_offset +
733 	    dl.d_partitions[1].p_size -
734 	    (hiber_info->image_size / hiber_info->secsize) -
735 	    sizeof(union hibernate_info)/hiber_info->secsize -
736 	    chunktable_size;
737 
738 	return (0);
739 fail:
740 	if (suspend)
741 		uvm_pmr_free_piglet(hiber_info->piglet_va,
742 		    HIBERNATE_CHUNK_SIZE * 3);
743 
744 	return (1);
745 }
746 
747 /*
748  * Allocate nitems*size bytes from the hiballoc area presently in use
749  */
750 void *
751 hibernate_zlib_alloc(void *unused, int nitems, int size)
752 {
753 	struct hibernate_zlib_state *hibernate_state;
754 
755 	hibernate_state =
756 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
757 
758 	return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size);
759 }
760 
761 /*
762  * Free the memory pointed to by addr in the hiballoc area presently in
763  * use
764  */
765 void
766 hibernate_zlib_free(void *unused, void *addr)
767 {
768 	struct hibernate_zlib_state *hibernate_state;
769 
770 	hibernate_state =
771 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
772 
773 	hib_free(&hibernate_state->hiballoc_arena, addr);
774 }
775 
776 /*
777  * Gets the next RLE value from the image stream
778  */
779 int
780 hibernate_get_next_rle(void)
781 {
782 	int rle, i;
783 	struct hibernate_zlib_state *hibernate_state;
784 
785 	hibernate_state =
786 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
787 
788 	/* Read RLE code */
789 	hibernate_state->hib_stream.next_out = (char *)&rle;
790 	hibernate_state->hib_stream.avail_out = sizeof(rle);
791 
792 	i = inflate(&hibernate_state->hib_stream, Z_FULL_FLUSH);
793 	if (i != Z_OK && i != Z_STREAM_END) {
794 		/*
795 		 * XXX - this will likely reboot/hang most machines
796 		 *       since the console output buffer will be unmapped,
797 		 *       but there's not much else we can do here.
798 		 */
799 		panic("inflate rle error");
800 	}
801 
802 	/* Sanity check what RLE value we got */
803 	if (rle > HIBERNATE_CHUNK_SIZE/PAGE_SIZE || rle < 0)
804 		panic("invalid RLE code");
805 
806 	if (i == Z_STREAM_END)
807 		rle = -1;
808 
809 	return rle;
810 }
811 
812 /*
813  * Inflate next page of data from the image stream
814  */
815 int
816 hibernate_inflate_page(void)
817 {
818 	struct hibernate_zlib_state *hibernate_state;
819 	int i;
820 
821 	hibernate_state =
822 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
823 
824 	/* Set up the stream for inflate */
825 	hibernate_state->hib_stream.next_out = (char *)HIBERNATE_INFLATE_PAGE;
826 	hibernate_state->hib_stream.avail_out = PAGE_SIZE;
827 
828 	/* Process next block of data */
829 	i = inflate(&hibernate_state->hib_stream, Z_PARTIAL_FLUSH);
830 	if (i != Z_OK && i != Z_STREAM_END) {
831 		/*
832 		 * XXX - this will likely reboot/hang most machines
833 		 *       since the console output buffer will be unmapped,
834 		 *       but there's not much else we can do here.
835 		 */
836 		panic("inflate error");
837 	}
838 
839 	/* We should always have extracted a full page ... */
840 	if (hibernate_state->hib_stream.avail_out != 0) {
841 		/*
842 		 * XXX - this will likely reboot/hang most machines
843 		 *       since the console output buffer will be unmapped,
844 		 *       but there's not much else we can do here.
845 		 */
846 		panic("incomplete page");
847 	}
848 
849 	return (i == Z_STREAM_END);
850 }
851 
852 /*
853  * Inflate size bytes from src into dest, skipping any pages in
854  * [src..dest] that are special (see hibernate_inflate_skip)
855  *
856  * This function executes while using the resume-time stack
857  * and pmap, and therefore cannot use ddb/printf/etc. Doing so
858  * will likely hang or reset the machine since the console output buffer
859  * will be unmapped.
860  */
861 void
862 hibernate_inflate_region(union hibernate_info *hiber_info, paddr_t dest,
863     paddr_t src, size_t size)
864 {
865 	int end_stream = 0 ;
866 	struct hibernate_zlib_state *hibernate_state;
867 
868 	hibernate_state =
869 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
870 
871 	hibernate_state->hib_stream.next_in = (char *)src;
872 	hibernate_state->hib_stream.avail_in = size;
873 
874 	do {
875 		/* Flush cache and TLB */
876 		hibernate_flush();
877 
878 		/*
879 		 * Is this a special page? If yes, redirect the
880 		 * inflate output to a scratch page (eg, discard it)
881 		 */
882 		if (hibernate_inflate_skip(hiber_info, dest)) {
883 			hibernate_enter_resume_mapping(
884 			    HIBERNATE_INFLATE_PAGE,
885 			    HIBERNATE_INFLATE_PAGE, 0);
886 		} else {
887 			hibernate_enter_resume_mapping(
888 			    HIBERNATE_INFLATE_PAGE, dest, 0);
889 		}
890 
891 		hibernate_flush();
892 		end_stream = hibernate_inflate_page();
893 
894 		dest += PAGE_SIZE;
895 	} while (!end_stream);
896 }
897 
898 /*
899  * deflate from src into the I/O page, up to 'remaining' bytes
900  *
901  * Returns number of input bytes consumed, and may reset
902  * the 'remaining' parameter if not all the output space was consumed
903  * (this information is needed to know how much to write to disk
904  */
905 size_t
906 hibernate_deflate(union hibernate_info *hiber_info, paddr_t src,
907     size_t *remaining)
908 {
909 	vaddr_t hibernate_io_page = hiber_info->piglet_va + PAGE_SIZE;
910 	struct hibernate_zlib_state *hibernate_state;
911 
912 	hibernate_state =
913 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
914 
915 	/* Set up the stream for deflate */
916 	hibernate_state->hib_stream.next_in = (caddr_t)src;
917 	hibernate_state->hib_stream.avail_in = PAGE_SIZE - (src & PAGE_MASK);
918 	hibernate_state->hib_stream.next_out = (caddr_t)hibernate_io_page +
919 	    (PAGE_SIZE - *remaining);
920 	hibernate_state->hib_stream.avail_out = *remaining;
921 
922 	/* Process next block of data */
923 	if (deflate(&hibernate_state->hib_stream, Z_PARTIAL_FLUSH) != Z_OK)
924 		panic("hibernate zlib deflate error");
925 
926 	/* Update pointers and return number of bytes consumed */
927 	*remaining = hibernate_state->hib_stream.avail_out;
928 	return (PAGE_SIZE - (src & PAGE_MASK)) -
929 	    hibernate_state->hib_stream.avail_in;
930 }
931 
932 /*
933  * Write the hibernation information specified in hiber_info
934  * to the location in swap previously calculated (last block of
935  * swap), called the "signature block".
936  */
937 int
938 hibernate_write_signature(union hibernate_info *hiber_info)
939 {
940 	/* Write hibernate info to disk */
941 	return (hiber_info->io_func(hiber_info->device, hiber_info->sig_offset,
942 	    (vaddr_t)hiber_info, hiber_info->secsize, HIB_W,
943 	    hiber_info->io_page));
944 }
945 
946 /*
947  * Write the memory chunk table to the area in swap immediately
948  * preceding the signature block. The chunk table is stored
949  * in the piglet when this function is called.
950  *
951  * Return values:
952  *
953  * 0   -  success
954  * EIO -  I/O error writing the chunktable
955  */
956 int
957 hibernate_write_chunktable(union hibernate_info *hiber_info)
958 {
959 	struct hibernate_disk_chunk *chunks;
960 	vaddr_t hibernate_chunk_table_start;
961 	size_t hibernate_chunk_table_size;
962 	daddr_t chunkbase;
963 	int i, err;
964 
965 	hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE;
966 
967 	chunkbase = hiber_info->sig_offset -
968 	    (hibernate_chunk_table_size / hiber_info->secsize);
969 
970 	hibernate_chunk_table_start = hiber_info->piglet_va +
971 	    HIBERNATE_CHUNK_SIZE;
972 
973 	chunks = (struct hibernate_disk_chunk *)(hiber_info->piglet_va +
974 	    HIBERNATE_CHUNK_SIZE);
975 
976 	/* Write chunk table */
977 	for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS) {
978 		if ((err = hiber_info->io_func(hiber_info->device,
979 		    chunkbase + (i/hiber_info->secsize),
980 		    (vaddr_t)(hibernate_chunk_table_start + i),
981 		    MAXPHYS, HIB_W, hiber_info->io_page))) {
982 			DPRINTF("chunktable write error: %d\n", err);
983 			return (EIO);
984 		}
985 	}
986 
987 	return (0);
988 }
989 
990 /*
991  * Write an empty hiber_info to the swap signature block, which is
992  * guaranteed to not match any valid hiber_info.
993  */
994 int
995 hibernate_clear_signature(void)
996 {
997 	union hibernate_info blank_hiber_info;
998 	union hibernate_info hiber_info;
999 
1000 	/* Zero out a blank hiber_info */
1001 	bzero(&blank_hiber_info, sizeof(union hibernate_info));
1002 
1003 	/* Get the signature block location */
1004 	if (get_hibernate_info(&hiber_info, 0))
1005 		return (1);
1006 
1007 	/* Write (zeroed) hibernate info to disk */
1008 	DPRINTF("clearing hibernate signature block location: %lld\n",
1009 		hiber_info.sig_offset - hiber_info.swap_offset);
1010 	if (hibernate_block_io(&hiber_info,
1011 	    hiber_info.sig_offset - hiber_info.swap_offset,
1012 	    hiber_info.secsize, (vaddr_t)&blank_hiber_info, 1))
1013 		printf("Warning: could not clear hibernate signature\n");
1014 
1015 	return (0);
1016 }
1017 
1018 /*
1019  * Check chunk range overlap when calculating whether or not to copy a
1020  * compressed chunk to the piglet area before decompressing.
1021  *
1022  * returns zero if the ranges do not overlap, non-zero otherwise.
1023  */
1024 int
1025 hibernate_check_overlap(paddr_t r1s, paddr_t r1e, paddr_t r2s, paddr_t r2e)
1026 {
1027 	/* case A : end of r1 overlaps start of r2 */
1028 	if (r1s < r2s && r1e > r2s)
1029 		return (1);
1030 
1031 	/* case B : r1 entirely inside r2 */
1032 	if (r1s >= r2s && r1e <= r2e)
1033 		return (1);
1034 
1035 	/* case C : r2 entirely inside r1 */
1036 	if (r2s >= r1s && r2e <= r1e)
1037 		return (1);
1038 
1039 	/* case D : end of r2 overlaps start of r1 */
1040 	if (r2s < r1s && r2e > r1s)
1041 		return (1);
1042 
1043 	return (0);
1044 }
1045 
1046 /*
1047  * Compare two hibernate_infos to determine if they are the same (eg,
1048  * we should be performing a hibernate resume on this machine.
1049  * Not all fields are checked - just enough to verify that the machine
1050  * has the same memory configuration and kernel as the one that
1051  * wrote the signature previously.
1052  */
1053 int
1054 hibernate_compare_signature(union hibernate_info *mine,
1055     union hibernate_info *disk)
1056 {
1057 	u_int i;
1058 
1059 	if (mine->nranges != disk->nranges) {
1060 		DPRINTF("hibernate memory range count mismatch\n");
1061 		return (1);
1062 	}
1063 
1064 	if (strcmp(mine->kernel_version, disk->kernel_version) != 0) {
1065 		DPRINTF("hibernate kernel version mismatch\n");
1066 		return (1);
1067 	}
1068 
1069 	for (i = 0; i < mine->nranges; i++) {
1070 		if ((mine->ranges[i].base != disk->ranges[i].base) ||
1071 		    (mine->ranges[i].end != disk->ranges[i].end) ) {
1072 			DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n",
1073 				i, mine->ranges[i].base, mine->ranges[i].end,
1074 				disk->ranges[i].base, disk->ranges[i].end);
1075 			return (1);
1076 		}
1077 	}
1078 
1079 	return (0);
1080 }
1081 
1082 /*
1083  * Transfers xfer_size bytes between the hibernate device specified in
1084  * hib_info at offset blkctr and the vaddr specified at dest.
1085  *
1086  * Separate offsets and pages are used to handle misaligned reads (reads
1087  * that span a page boundary).
1088  *
1089  * blkctr specifies a relative offset (relative to the start of swap),
1090  * not an absolute disk offset
1091  *
1092  */
1093 int
1094 hibernate_block_io(union hibernate_info *hib_info, daddr_t blkctr,
1095     size_t xfer_size, vaddr_t dest, int iswrite)
1096 {
1097 	struct buf *bp;
1098 	struct bdevsw *bdsw;
1099 	int error;
1100 
1101 	bp = geteblk(xfer_size);
1102 	bdsw = &bdevsw[major(hib_info->device)];
1103 
1104 	error = (*bdsw->d_open)(hib_info->device, FREAD, S_IFCHR, curproc);
1105 	if (error) {
1106 		printf("hibernate_block_io open failed\n");
1107 		return (1);
1108 	}
1109 
1110 	if (iswrite)
1111 		bcopy((caddr_t)dest, bp->b_data, xfer_size);
1112 
1113 	bp->b_bcount = xfer_size;
1114 	bp->b_blkno = blkctr;
1115 	CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
1116 	SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW);
1117 	bp->b_dev = hib_info->device;
1118 	bp->b_cylinder = 0;
1119 	(*bdsw->d_strategy)(bp);
1120 
1121 	error = biowait(bp);
1122 	if (error) {
1123 		printf("hib block_io biowait error %d blk %lld size %zu\n",
1124 			error, (long long)blkctr, xfer_size);
1125 		error = (*bdsw->d_close)(hib_info->device, 0, S_IFCHR,
1126 		    curproc);
1127 		if (error)
1128 			printf("hibernate_block_io error close failed\n");
1129 		return (1);
1130 	}
1131 
1132 	error = (*bdsw->d_close)(hib_info->device, FREAD, S_IFCHR, curproc);
1133 	if (error) {
1134 		printf("hibernate_block_io close failed\n");
1135 		return (1);
1136 	}
1137 
1138 	if (!iswrite)
1139 		bcopy(bp->b_data, (caddr_t)dest, xfer_size);
1140 
1141 	bp->b_flags |= B_INVAL;
1142 	brelse(bp);
1143 
1144 	return (0);
1145 }
1146 
1147 /*
1148  * Reads the signature block from swap, checks against the current machine's
1149  * information. If the information matches, perform a resume by reading the
1150  * saved image into the pig area, and unpacking.
1151  */
1152 void
1153 hibernate_resume(void)
1154 {
1155 	union hibernate_info hiber_info;
1156 	int s;
1157 
1158 	/* Get current running machine's hibernate info */
1159 	bzero(&hiber_info, sizeof(hiber_info));
1160 	if (get_hibernate_info(&hiber_info, 0)) {
1161 		DPRINTF("couldn't retrieve machine's hibernate info\n");
1162 		return;
1163 	}
1164 
1165 	/* Read hibernate info from disk */
1166 	s = splbio();
1167 
1168 	DPRINTF("reading hibernate signature block location: %lld\n",
1169 		hiber_info.sig_offset - hiber_info.swap_offset);
1170 
1171 	if (hibernate_block_io(&hiber_info,
1172 	    hiber_info.sig_offset - hiber_info.swap_offset,
1173 	    hiber_info.secsize, (vaddr_t)&disk_hiber_info, 0)) {
1174 		DPRINTF("error in hibernate read");
1175 		splx(s);
1176 		return;
1177 	}
1178 
1179 	/* Check magic number */
1180 	if (disk_hiber_info.magic != HIBERNATE_MAGIC) {
1181 		DPRINTF("wrong magic number in hibernate signature: %x\n",
1182 			disk_hiber_info.magic);
1183 		splx(s);
1184 		return;
1185 	}
1186 
1187 	/*
1188 	 * We (possibly) found a hibernate signature. Clear signature first,
1189 	 * to prevent accidental resume or endless resume cycles later.
1190 	 */
1191 	if (hibernate_clear_signature()) {
1192 		DPRINTF("error clearing hibernate signature block\n");
1193 		splx(s);
1194 		return;
1195 	}
1196 
1197 	/*
1198 	 * If on-disk and in-memory hibernate signatures match,
1199 	 * this means we should do a resume from hibernate.
1200 	 */
1201 	if (hibernate_compare_signature(&hiber_info, &disk_hiber_info)) {
1202 		DPRINTF("mismatched hibernate signature block\n");
1203 		splx(s);
1204 		return;
1205 	}
1206 
1207 #ifdef MULTIPROCESSOR
1208 	hibernate_quiesce_cpus();
1209 #endif /* MULTIPROCESSOR */
1210 
1211 	printf("Unhibernating...");
1212 
1213 	/* Read the image from disk into the image (pig) area */
1214 	if (hibernate_read_image(&disk_hiber_info))
1215 		goto fail;
1216 
1217 	if (config_suspend(TAILQ_FIRST(&alldevs), DVACT_QUIESCE) != 0)
1218 		goto fail;
1219 
1220 	(void) splhigh();
1221 	hibernate_disable_intr_machdep();
1222 	cold = 1;
1223 
1224 	if (config_suspend(TAILQ_FIRST(&alldevs), DVACT_SUSPEND) != 0) {
1225 		cold = 0;
1226 		hibernate_enable_intr_machdep();
1227 		goto fail;
1228 	}
1229 
1230 	pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1231 	    VM_PROT_ALL);
1232 	pmap_activate(curproc);
1233 
1234 	printf("Unpacking image...\n");
1235 
1236 	/* Switch stacks */
1237 	hibernate_switch_stack_machdep();
1238 
1239 	/* Unpack and resume */
1240 	hibernate_unpack_image(&disk_hiber_info);
1241 
1242 fail:
1243 	splx(s);
1244 	printf("\nUnable to resume hibernated image\n");
1245 }
1246 
1247 /*
1248  * Unpack image from pig area to original location by looping through the
1249  * list of output chunks in the order they should be restored (fchunks).
1250  *
1251  * Note that due to the stack smash protector and the fact that we have
1252  * switched stacks, it is not permitted to return from this function.
1253  */
1254 void
1255 hibernate_unpack_image(union hibernate_info *hiber_info)
1256 {
1257 	struct hibernate_disk_chunk *chunks;
1258 	union hibernate_info local_hiber_info;
1259 	paddr_t image_cur = global_pig_start;
1260 	short i, *fchunks;
1261 	char *pva = (char *)hiber_info->piglet_va;
1262 	struct hibernate_zlib_state *hibernate_state;
1263 
1264 	hibernate_state =
1265 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1266 
1267 	/* Mask off based on arch-specific piglet page size */
1268 	pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1269 	fchunks = (short *)(pva + (4 * PAGE_SIZE));
1270 
1271 	chunks = (struct hibernate_disk_chunk *)(pva +  HIBERNATE_CHUNK_SIZE);
1272 
1273 	/* Can't use hiber_info that's passed in after this point */
1274 	bcopy(hiber_info, &local_hiber_info, sizeof(union hibernate_info));
1275 
1276 	/*
1277 	 * Point of no return. Once we pass this point, only kernel code can
1278 	 * be accessed. No global variables or other kernel data structures
1279 	 * are guaranteed to be coherent after unpack starts.
1280 	 *
1281 	 * The image is now in high memory (pig area), we unpack from the pig
1282 	 * to the correct location in memory. We'll eventually end up copying
1283 	 * on top of ourself, but we are assured the kernel code here is the
1284 	 * same between the hibernated and resuming kernel, and we are running
1285 	 * on our own stack, so the overwrite is ok.
1286 	 */
1287 	hibernate_activate_resume_pt_machdep();
1288 
1289 	for (i = 0; i < local_hiber_info.chunk_ctr; i++) {
1290 		/* Reset zlib for inflate */
1291 		if (hibernate_zlib_reset(&local_hiber_info, 0) != Z_OK)
1292 			panic("hibernate failed to reset zlib for inflate");
1293 
1294 		hibernate_process_chunk(&local_hiber_info, &chunks[fchunks[i]],
1295 		    image_cur);
1296 
1297 		image_cur += chunks[fchunks[i]].compressed_size;
1298 
1299 	}
1300 
1301 	/*
1302 	 * Resume the loaded kernel by jumping to the MD resume vector.
1303 	 * We won't be returning from this call.
1304 	 */
1305 	hibernate_resume_machdep();
1306 }
1307 
1308 /*
1309  * Bounce a compressed image chunk to the piglet, entering mappings for the
1310  * copied pages as needed
1311  */
1312 void
1313 hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size)
1314 {
1315 	size_t ct, ofs;
1316 	paddr_t src = img_cur;
1317 	vaddr_t dest = piglet;
1318 
1319 	/* Copy first partial page */
1320 	ct = (PAGE_SIZE) - (src & PAGE_MASK);
1321 	ofs = (src & PAGE_MASK);
1322 
1323 	if (ct < PAGE_SIZE) {
1324 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE,
1325 			(src - ofs), 0);
1326 		hibernate_flush();
1327 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE + ofs), (caddr_t)dest, ct);
1328 		src += ct;
1329 		dest += ct;
1330 	}
1331 	wbinvd();
1332 
1333 	/* Copy remaining pages */
1334 	while (src < size + img_cur) {
1335 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, src, 0);
1336 		hibernate_flush();
1337 		ct = PAGE_SIZE;
1338 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE), (caddr_t)dest, ct);
1339 		hibernate_flush();
1340 		src += ct;
1341 		dest += ct;
1342 	}
1343 
1344 	hibernate_flush();
1345 	wbinvd();
1346 }
1347 
1348 /*
1349  * Process a chunk by bouncing it to the piglet, followed by unpacking
1350  */
1351 void
1352 hibernate_process_chunk(union hibernate_info *hiber_info,
1353     struct hibernate_disk_chunk *chunk, paddr_t img_cur)
1354 {
1355 	char *pva = (char *)hiber_info->piglet_va;
1356 
1357 	hibernate_copy_chunk_to_piglet(img_cur,
1358 	 (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), chunk->compressed_size);
1359 
1360 	hibernate_inflate_region(hiber_info, chunk->base,
1361 	    (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)),
1362 	    chunk->compressed_size);
1363 }
1364 
1365 /*
1366  * Write a compressed version of this machine's memory to disk, at the
1367  * precalculated swap offset:
1368  *
1369  * end of swap - signature block size - chunk table size - memory size
1370  *
1371  * The function begins by looping through each phys mem range, cutting each
1372  * one into MD sized chunks. These chunks are then compressed individually
1373  * and written out to disk, in phys mem order. Some chunks might compress
1374  * more than others, and for this reason, each chunk's size is recorded
1375  * in the chunk table, which is written to disk after the image has
1376  * properly been compressed and written (in hibernate_write_chunktable).
1377  *
1378  * When this function is called, the machine is nearly suspended - most
1379  * devices are quiesced/suspended, interrupts are off, and cold has
1380  * been set. This means that there can be no side effects once the
1381  * write has started, and the write function itself can also have no
1382  * side effects. This also means no printfs are permitted (since printf
1383  * has side effects.)
1384  *
1385  * Return values :
1386  *
1387  * 0      - success
1388  * EIO    - I/O error occurred writing the chunks
1389  * EINVAL - Failed to write a complete range
1390  * ENOMEM - Memory allocation failure during preparation of the zlib arena
1391  */
1392 int
1393 hibernate_write_chunks(union hibernate_info *hiber_info)
1394 {
1395 	paddr_t range_base, range_end, inaddr, temp_inaddr;
1396 	size_t nblocks, out_remaining, used;
1397 	struct hibernate_disk_chunk *chunks;
1398 	vaddr_t hibernate_io_page = hiber_info->piglet_va + PAGE_SIZE;
1399 	daddr_t blkctr = hiber_info->image_offset, offset = 0;
1400 	int i, err;
1401 	struct hibernate_zlib_state *hibernate_state;
1402 
1403 	hibernate_state =
1404 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1405 
1406 	hiber_info->chunk_ctr = 0;
1407 
1408 	/*
1409 	 * Allocate VA for the temp and copy page.
1410 	 *
1411 	 * These will become part of the suspended kernel and will
1412 	 * be freed in hibernate_free, upon resume.
1413 	 */
1414 	hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
1415 	    &kp_none, &kd_nowait);
1416 	if (!hibernate_temp_page)
1417 		return (ENOMEM);
1418 
1419 	hibernate_copy_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
1420 	    &kp_none, &kd_nowait);
1421 	if (!hibernate_copy_page) {
1422 		DPRINTF("out of memory allocating hibernate_copy_page\n");
1423 		return (ENOMEM);
1424 	}
1425 
1426 	pmap_kenter_pa(hibernate_copy_page,
1427 	    (hiber_info->piglet_pa + 3*PAGE_SIZE), VM_PROT_ALL);
1428 
1429 	pmap_activate(curproc);
1430 
1431 	chunks = (struct hibernate_disk_chunk *)(hiber_info->piglet_va +
1432 	    HIBERNATE_CHUNK_SIZE);
1433 
1434 	/* Calculate the chunk regions */
1435 	for (i = 0; i < hiber_info->nranges; i++) {
1436 		range_base = hiber_info->ranges[i].base;
1437 		range_end = hiber_info->ranges[i].end;
1438 
1439 		inaddr = range_base;
1440 
1441 		while (inaddr < range_end) {
1442 			chunks[hiber_info->chunk_ctr].base = inaddr;
1443 			if (inaddr + HIBERNATE_CHUNK_SIZE < range_end)
1444 				chunks[hiber_info->chunk_ctr].end = inaddr +
1445 				    HIBERNATE_CHUNK_SIZE;
1446 			else
1447 				chunks[hiber_info->chunk_ctr].end = range_end;
1448 
1449 			inaddr += HIBERNATE_CHUNK_SIZE;
1450 			hiber_info->chunk_ctr ++;
1451 		}
1452 	}
1453 
1454 	/* Compress and write the chunks in the chunktable */
1455 	for (i = 0; i < hiber_info->chunk_ctr; i++) {
1456 		range_base = chunks[i].base;
1457 		range_end = chunks[i].end;
1458 
1459 		chunks[i].offset = blkctr;
1460 
1461 		/* Reset zlib for deflate */
1462 		if (hibernate_zlib_reset(hiber_info, 1) != Z_OK) {
1463 			DPRINTF("hibernate_zlib_reset failed for deflate\n");
1464 			return (ENOMEM);
1465 		}
1466 
1467 		inaddr = range_base;
1468 
1469 		/*
1470 		 * For each range, loop through its phys mem region
1471 		 * and write out the chunks (the last chunk might be
1472 		 * smaller than the chunk size).
1473 		 */
1474 		while (inaddr < range_end) {
1475 			out_remaining = PAGE_SIZE;
1476 			while (out_remaining > 0 && inaddr < range_end) {
1477 
1478 				/*
1479 				 * Adjust for regions that are not evenly
1480 				 * divisible by PAGE_SIZE or overflowed
1481 				 * pages from the previous iteration.
1482 				 */
1483 				temp_inaddr = (inaddr & PAGE_MASK) +
1484 				    hibernate_copy_page;
1485 
1486 				/* Deflate from temp_inaddr to IO page */
1487 				if (inaddr != range_end) {
1488 					pmap_kenter_pa(hibernate_temp_page,
1489 					    inaddr & PMAP_PA_MASK, VM_PROT_ALL);
1490 
1491 					pmap_activate(curproc);
1492 
1493 					bcopy((caddr_t)hibernate_temp_page,
1494 					    (caddr_t)hibernate_copy_page,
1495 					    PAGE_SIZE);
1496 					inaddr += hibernate_deflate(hiber_info,
1497 					    temp_inaddr, &out_remaining);
1498 				}
1499 
1500 				if (out_remaining == 0) {
1501 					/* Filled up the page */
1502 					nblocks =
1503 					    PAGE_SIZE / hiber_info->secsize;
1504 
1505 					if ((err = hiber_info->io_func(
1506 					    hiber_info->device,
1507 					    blkctr, (vaddr_t)hibernate_io_page,
1508 					    PAGE_SIZE, HIB_W,
1509 					    hiber_info->io_page))) {
1510 						DPRINTF("hib write error %d\n",
1511 							err);
1512 						return (EIO);
1513 					}
1514 
1515 					blkctr += nblocks;
1516 				}
1517 			}
1518 		}
1519 
1520 		if (inaddr != range_end) {
1521 			DPRINTF("deflate range ended prematurely\n");
1522 			return (EINVAL);
1523 		}
1524 
1525 		/*
1526 		 * End of range. Round up to next secsize bytes
1527 		 * after finishing compress
1528 		 */
1529 		if (out_remaining == 0)
1530 			out_remaining = PAGE_SIZE;
1531 
1532 		/* Finish compress */
1533 		hibernate_state->hib_stream.next_in = (caddr_t)inaddr;
1534 		hibernate_state->hib_stream.avail_in = 0;
1535 		hibernate_state->hib_stream.next_out =
1536 		    (caddr_t)hibernate_io_page + (PAGE_SIZE - out_remaining);
1537 
1538 		/* We have an extra output page available for finalize */
1539 		hibernate_state->hib_stream.avail_out =
1540 			out_remaining + PAGE_SIZE;
1541 
1542 		if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH)) !=
1543 		    Z_STREAM_END) {
1544 			DPRINTF("deflate error in output stream: %d\n", err);
1545 			return (EIO);
1546 		}
1547 
1548 		out_remaining = hibernate_state->hib_stream.avail_out;
1549 
1550 		used = 2*PAGE_SIZE - out_remaining;
1551 		nblocks = used / hiber_info->secsize;
1552 
1553 		/* Round up to next block if needed */
1554 		if (used % hiber_info->secsize != 0)
1555 			nblocks ++;
1556 
1557 		/* Write final block(s) for this chunk */
1558 		if ((err = hiber_info->io_func(hiber_info->device, blkctr,
1559 		    (vaddr_t)hibernate_io_page, nblocks*hiber_info->secsize,
1560 		    HIB_W, hiber_info->io_page))) {
1561 			DPRINTF("hib final write error %d\n", err);
1562 			return (EIO);
1563 		}
1564 
1565 		blkctr += nblocks;
1566 
1567 		offset = blkctr;
1568 		chunks[i].compressed_size = (offset - chunks[i].offset) *
1569 		    hiber_info->secsize;
1570 	}
1571 
1572 	return (0);
1573 }
1574 
1575 /*
1576  * Reset the zlib stream state and allocate a new hiballoc area for either
1577  * inflate or deflate. This function is called once for each hibernate chunk.
1578  * Calling hiballoc_init multiple times is acceptable since the memory it is
1579  * provided is unmanaged memory (stolen). We use the memory provided to us
1580  * by the piglet allocated via the supplied hiber_info.
1581  */
1582 int
1583 hibernate_zlib_reset(union hibernate_info *hiber_info, int deflate)
1584 {
1585 	vaddr_t hibernate_zlib_start;
1586 	size_t hibernate_zlib_size;
1587 	char *pva = (char *)hiber_info->piglet_va;
1588 	struct hibernate_zlib_state *hibernate_state;
1589 
1590 	hibernate_state =
1591 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1592 
1593 	if(!deflate)
1594 		pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1595 
1596 	hibernate_zlib_start = (vaddr_t)(pva + (28 * PAGE_SIZE));
1597 	hibernate_zlib_size = 80 * PAGE_SIZE;
1598 
1599 	bzero((caddr_t)hibernate_zlib_start, hibernate_zlib_size);
1600 	bzero((caddr_t)hibernate_state, PAGE_SIZE);
1601 
1602 	/* Set up stream structure */
1603 	hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc;
1604 	hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free;
1605 
1606 	/* Initialize the hiballoc arena for zlib allocs/frees */
1607 	hiballoc_init(&hibernate_state->hiballoc_arena,
1608 	    (caddr_t)hibernate_zlib_start, hibernate_zlib_size);
1609 
1610 	if (deflate) {
1611 		return deflateInit(&hibernate_state->hib_stream,
1612 		    Z_BEST_SPEED);
1613 	} else
1614 		return inflateInit(&hibernate_state->hib_stream);
1615 }
1616 
1617 /*
1618  * Reads the hibernated memory image from disk, whose location and
1619  * size are recorded in hiber_info. Begin by reading the persisted
1620  * chunk table, which records the original chunk placement location
1621  * and compressed size for each. Next, allocate a pig region of
1622  * sufficient size to hold the compressed image. Next, read the
1623  * chunks into the pig area (calling hibernate_read_chunks to do this),
1624  * and finally, if all of the above succeeds, clear the hibernate signature.
1625  * The function will then return to hibernate_resume, which will proceed
1626  * to unpack the pig image to the correct place in memory.
1627  */
1628 int
1629 hibernate_read_image(union hibernate_info *hiber_info)
1630 {
1631 	size_t compressed_size, disk_size, chunktable_size, pig_sz;
1632 	paddr_t image_start, image_end, pig_start, pig_end;
1633 	struct hibernate_disk_chunk *chunks;
1634 	daddr_t blkctr;
1635 	vaddr_t chunktable = (vaddr_t)NULL;
1636 	paddr_t piglet_chunktable = hiber_info->piglet_pa +
1637 	    HIBERNATE_CHUNK_SIZE;
1638 	int i;
1639 
1640 	pmap_activate(curproc);
1641 
1642 	/* Calculate total chunk table size in disk blocks */
1643 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize;
1644 
1645 	blkctr = hiber_info->sig_offset - chunktable_size -
1646 			hiber_info->swap_offset;
1647 
1648 	chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any,
1649 	    &kp_none, &kd_nowait);
1650 
1651 	if (!chunktable)
1652 		return (1);
1653 
1654 	/* Read the chunktable from disk into the piglet chunktable */
1655 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE;
1656 	    i += PAGE_SIZE, blkctr += PAGE_SIZE/hiber_info->secsize) {
1657 		pmap_kenter_pa(chunktable + i, piglet_chunktable + i,
1658 		    VM_PROT_ALL);
1659 		pmap_update(pmap_kernel());
1660 		hibernate_block_io(hiber_info, blkctr, PAGE_SIZE,
1661 		    chunktable + i, 0);
1662 	}
1663 
1664 	blkctr = hiber_info->image_offset;
1665 	compressed_size = 0;
1666 
1667 	chunks = (struct hibernate_disk_chunk *)chunktable;
1668 
1669 	for (i = 0; i < hiber_info->chunk_ctr; i++)
1670 		compressed_size += chunks[i].compressed_size;
1671 
1672 	disk_size = compressed_size;
1673 
1674 	printf(" (image size: %zu)\n", compressed_size);
1675 
1676 	/* Allocate the pig area */
1677 	pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE;
1678 	if (uvm_pmr_alloc_pig(&pig_start, pig_sz) == ENOMEM)
1679 		return (1);
1680 
1681 	pig_end = pig_start + pig_sz;
1682 
1683 	/* Calculate image extents. Pig image must end on a chunk boundary. */
1684 	image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1);
1685 	image_start = pig_start;
1686 
1687 	image_start = image_end - disk_size;
1688 
1689 	hibernate_read_chunks(hiber_info, image_start, image_end, disk_size,
1690 	    chunks);
1691 
1692 	pmap_kremove(chunktable, PAGE_SIZE);
1693 	pmap_update(pmap_kernel());
1694 
1695 	/* Prepare the resume time pmap/page table */
1696 	hibernate_populate_resume_pt(hiber_info, image_start, image_end);
1697 
1698 	return (0);
1699 }
1700 
1701 /*
1702  * Read the hibernated memory chunks from disk (chunk information at this
1703  * point is stored in the piglet) into the pig area specified by
1704  * [pig_start .. pig_end]. Order the chunks so that the final chunk is the
1705  * only chunk with overlap possibilities.
1706  */
1707 int
1708 hibernate_read_chunks(union hibernate_info *hib_info, paddr_t pig_start,
1709     paddr_t pig_end, size_t image_compr_size,
1710     struct hibernate_disk_chunk *chunks)
1711 {
1712 	paddr_t img_index, img_cur, r1s, r1e, r2s, r2e;
1713 	paddr_t copy_start, copy_end, piglet_cur;
1714 	paddr_t piglet_base = hib_info->piglet_pa;
1715 	paddr_t piglet_end = piglet_base + HIBERNATE_CHUNK_SIZE;
1716 	daddr_t blkctr;
1717 	size_t processed, compressed_size, read_size;
1718 	int overlap, found, nchunks, nochunks = 0, nfchunks = 0, npchunks = 0;
1719 	short *ochunks, *pchunks, *fchunks, i, j;
1720 	vaddr_t tempva = (vaddr_t)NULL, hibernate_fchunk_area = (vaddr_t)NULL;
1721 
1722 	global_pig_start = pig_start;
1723 
1724 	pmap_activate(curproc);
1725 
1726 	/*
1727 	 * These mappings go into the resuming kernel's page table, and are
1728 	 * used only during image read. They dissappear from existence
1729 	 * when the suspended kernel is unpacked on top of us.
1730 	 */
1731 	tempva = (vaddr_t)km_alloc(2*PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
1732 	if (!tempva)
1733 		return (1);
1734 	hibernate_fchunk_area = (vaddr_t)km_alloc(24*PAGE_SIZE, &kv_any,
1735 	    &kp_none, &kd_nowait);
1736 	if (!hibernate_fchunk_area)
1737 		return (1);
1738 
1739 	/* Final output chunk ordering VA */
1740 	fchunks = (short *)hibernate_fchunk_area;
1741 
1742 	/* Piglet chunk ordering VA */
1743 	pchunks = (short *)(hibernate_fchunk_area + (8*PAGE_SIZE));
1744 
1745 	/* Final chunk ordering VA */
1746 	ochunks = (short *)(hibernate_fchunk_area + (16*PAGE_SIZE));
1747 
1748 	/* Map the chunk ordering region */
1749 	for(i=0; i<24 ; i++) {
1750 		pmap_kenter_pa(hibernate_fchunk_area + (i*PAGE_SIZE),
1751 			piglet_base + ((4+i)*PAGE_SIZE), VM_PROT_ALL);
1752 		pmap_update(pmap_kernel());
1753 	}
1754 
1755 	nchunks = hib_info->chunk_ctr;
1756 
1757 	/* Initially start all chunks as unplaced */
1758 	for (i = 0; i < nchunks; i++)
1759 		chunks[i].flags = 0;
1760 
1761 	/*
1762 	 * Search the list for chunks that are outside the pig area. These
1763 	 * can be placed first in the final output list.
1764 	 */
1765 	for (i = 0; i < nchunks; i++) {
1766 		if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) {
1767 			ochunks[nochunks] = i;
1768 			fchunks[nfchunks] = i;
1769 			nochunks++;
1770 			nfchunks++;
1771 			chunks[i].flags |= HIBERNATE_CHUNK_USED;
1772 		}
1773 	}
1774 
1775 	/*
1776 	 * Walk the ordering, place the chunks in ascending memory order.
1777 	 * Conflicts might arise, these are handled next.
1778 	 */
1779 	do {
1780 		img_index = -1;
1781 		found = 0;
1782 		j = -1;
1783 		for (i = 0; i < nchunks; i++)
1784 			if (chunks[i].base < img_index &&
1785 			    chunks[i].flags == 0 ) {
1786 				j = i;
1787 				img_index = chunks[i].base;
1788 			}
1789 
1790 		if (j != -1) {
1791 			found = 1;
1792 			ochunks[nochunks] = j;
1793 			nochunks++;
1794 			chunks[j].flags |= HIBERNATE_CHUNK_PLACED;
1795 		}
1796 	} while (found);
1797 
1798 	img_index = pig_start;
1799 
1800 	/*
1801 	 * Identify chunk output conflicts (chunks whose pig load area
1802 	 * corresponds to their original memory placement location)
1803 	 */
1804 	for (i = 0; i < nochunks ; i++) {
1805 		overlap = 0;
1806 		r1s = img_index;
1807 		r1e = img_index + chunks[ochunks[i]].compressed_size;
1808 		r2s = chunks[ochunks[i]].base;
1809 		r2e = chunks[ochunks[i]].end;
1810 
1811 		overlap = hibernate_check_overlap(r1s, r1e, r2s, r2e);
1812 		if (overlap)
1813 			chunks[ochunks[i]].flags |= HIBERNATE_CHUNK_CONFLICT;
1814 		img_index += chunks[ochunks[i]].compressed_size;
1815 	}
1816 
1817 	/*
1818 	 * Prepare the final output chunk list. Calculate an output
1819 	 * inflate strategy for overlapping chunks if needed.
1820 	 */
1821 	img_index = pig_start;
1822 	for (i = 0; i < nochunks ; i++) {
1823 		/*
1824 		 * If a conflict is detected, consume enough compressed
1825 		 * output chunks to fill the piglet
1826 		 */
1827 		if (chunks[ochunks[i]].flags & HIBERNATE_CHUNK_CONFLICT) {
1828 			copy_start = piglet_base;
1829 			copy_end = piglet_end;
1830 			piglet_cur = piglet_base;
1831 			npchunks = 0;
1832 			j = i;
1833 
1834 			while (copy_start < copy_end && j < nochunks) {
1835 				piglet_cur +=
1836 				    chunks[ochunks[j]].compressed_size;
1837 				pchunks[npchunks] = ochunks[j];
1838 				npchunks++;
1839 				copy_start +=
1840 				    chunks[ochunks[j]].compressed_size;
1841 				img_index += chunks[ochunks[j]].compressed_size;
1842 				i++;
1843 				j++;
1844 			}
1845 
1846 			piglet_cur = piglet_base;
1847 			for (j = 0; j < npchunks; j++) {
1848 				piglet_cur +=
1849 				    chunks[pchunks[j]].compressed_size;
1850 				fchunks[nfchunks] = pchunks[j];
1851 				chunks[pchunks[j]].flags |=
1852 				    HIBERNATE_CHUNK_USED;
1853 				nfchunks++;
1854 			}
1855 		} else {
1856 			/*
1857 			 * No conflict, chunk can be added without copying
1858 			 */
1859 			if ((chunks[ochunks[i]].flags &
1860 			    HIBERNATE_CHUNK_USED) == 0) {
1861 				fchunks[nfchunks] = ochunks[i];
1862 				chunks[ochunks[i]].flags |=
1863 				    HIBERNATE_CHUNK_USED;
1864 				nfchunks++;
1865 			}
1866 			img_index += chunks[ochunks[i]].compressed_size;
1867 		}
1868 	}
1869 
1870 	img_index = pig_start;
1871 	for (i = 0; i < nfchunks; i++) {
1872 		piglet_cur = piglet_base;
1873 		img_index += chunks[fchunks[i]].compressed_size;
1874 	}
1875 
1876 	img_cur = pig_start;
1877 
1878 	for (i = 0; i < nfchunks; i++) {
1879 		blkctr = chunks[fchunks[i]].offset - hib_info->swap_offset;
1880 		processed = 0;
1881 		compressed_size = chunks[fchunks[i]].compressed_size;
1882 
1883 		while (processed < compressed_size) {
1884 			pmap_kenter_pa(tempva, img_cur, VM_PROT_ALL);
1885 			pmap_kenter_pa(tempva + PAGE_SIZE, img_cur+PAGE_SIZE,
1886 			    VM_PROT_ALL);
1887 			pmap_update(pmap_kernel());
1888 
1889 			if (compressed_size - processed >= PAGE_SIZE)
1890 				read_size = PAGE_SIZE;
1891 			else
1892 				read_size = compressed_size - processed;
1893 
1894 			hibernate_block_io(hib_info, blkctr, read_size,
1895 			    tempva + (img_cur & PAGE_MASK), 0);
1896 
1897 			blkctr += (read_size / hib_info->secsize);
1898 
1899 			pmap_kremove(tempva, PAGE_SIZE);
1900 			pmap_kremove(tempva + PAGE_SIZE, PAGE_SIZE);
1901 			processed += read_size;
1902 			img_cur += read_size;
1903 		}
1904 	}
1905 
1906 	pmap_kremove(hibernate_fchunk_area, PAGE_SIZE);
1907 	pmap_kremove((vaddr_t)pchunks, PAGE_SIZE);
1908 	pmap_kremove((vaddr_t)fchunks, PAGE_SIZE);
1909 	pmap_update(pmap_kernel());
1910 
1911 	return (0);
1912 }
1913 
1914 /*
1915  * Hibernating a machine comprises the following operations:
1916  *  1. Calculating this machine's hibernate_info information
1917  *  2. Allocating a piglet and saving the piglet's physaddr
1918  *  3. Calculating the memory chunks
1919  *  4. Writing the compressed chunks to disk
1920  *  5. Writing the chunk table
1921  *  6. Writing the signature block (hibernate_info)
1922  *
1923  * On most architectures, the function calling hibernate_suspend would
1924  * then power off the machine using some MD-specific implementation.
1925  */
1926 int
1927 hibernate_suspend(void)
1928 {
1929 	union hibernate_info hib_info;
1930 	size_t swap_size;
1931 
1932 	/*
1933 	 * Calculate memory ranges, swap offsets, etc.
1934 	 * This also allocates a piglet whose physaddr is stored in
1935 	 * hib_info->piglet_pa and vaddr stored in hib_info->piglet_va
1936 	 */
1937 	if (get_hibernate_info(&hib_info, 1)) {
1938 		DPRINTF("failed to obtain hibernate info\n");
1939 		return (1);
1940 	}
1941 
1942 	swap_size = hib_info.image_size + hib_info.secsize +
1943 		HIBERNATE_CHUNK_TABLE_SIZE;
1944 
1945 	if (uvm_swap_check_range(hib_info.device, swap_size)) {
1946 		printf("insufficient swap space for hibernate\n");
1947 		return (1);
1948 	}
1949 
1950 	pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1951 		VM_PROT_ALL);
1952 	pmap_activate(curproc);
1953 
1954 	/* Stash the piglet VA so we can free it in the resuming kernel */
1955 	global_piglet_va = hib_info.piglet_va;
1956 
1957 	DPRINTF("hibernate: writing chunks\n");
1958 	if (hibernate_write_chunks(&hib_info)) {
1959 		DPRINTF("hibernate_write_chunks failed\n");
1960 		return (1);
1961 	}
1962 
1963 	DPRINTF("hibernate: writing chunktable\n");
1964 	if (hibernate_write_chunktable(&hib_info)) {
1965 		DPRINTF("hibernate_write_chunktable failed\n");
1966 		return (1);
1967 	}
1968 
1969 	DPRINTF("hibernate: writing signature\n");
1970 	if (hibernate_write_signature(&hib_info)) {
1971 		DPRINTF("hibernate_write_signature failed\n");
1972 		return (1);
1973 	}
1974 
1975 	/* Allow the disk to settle */
1976 	delay(500000);
1977 
1978 	/*
1979 	 * Give the device-specific I/O function a notification that we're
1980 	 * done, and that it can clean up or shutdown as needed.
1981 	 */
1982 	hib_info.io_func(hib_info.device, 0, (vaddr_t)NULL, 0,
1983 	    HIB_DONE, hib_info.io_page);
1984 
1985 	return (0);
1986 }
1987 
1988 /*
1989  * Free items allocated by hibernate_suspend()
1990  */
1991 void
1992 hibernate_free(void)
1993 {
1994 	if (global_piglet_va)
1995 		uvm_pmr_free_piglet(global_piglet_va,
1996 		    3*HIBERNATE_CHUNK_SIZE);
1997 
1998 	if (hibernate_copy_page)
1999 		pmap_kremove(hibernate_copy_page, PAGE_SIZE);
2000 	if (hibernate_temp_page)
2001 		pmap_kremove(hibernate_temp_page, PAGE_SIZE);
2002 
2003 	pmap_update(pmap_kernel());
2004 
2005 	if (hibernate_copy_page)
2006 		km_free((void *)hibernate_copy_page, PAGE_SIZE,
2007 		    &kv_any, &kp_none);
2008 	if (hibernate_temp_page)
2009 		km_free((void *)hibernate_temp_page, PAGE_SIZE,
2010 		    &kv_any, &kp_none);
2011 
2012 	global_piglet_va = 0;
2013 	hibernate_copy_page = 0;
2014 	hibernate_temp_page = 0;
2015 }
2016