xref: /openbsd-src/sys/kern/subr_hibernate.c (revision 9368744b8951a827ac14124f7108f8204ef90f47)
1 /*	$OpenBSD: subr_hibernate.c,v 1.92 2014/05/31 04:36:59 mlarkin Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/hibernate.h>
21 #include <sys/malloc.h>
22 #include <sys/param.h>
23 #include <sys/tree.h>
24 #include <sys/systm.h>
25 #include <sys/disklabel.h>
26 #include <sys/disk.h>
27 #include <sys/conf.h>
28 #include <sys/buf.h>
29 #include <sys/fcntl.h>
30 #include <sys/stat.h>
31 #include <uvm/uvm.h>
32 #include <uvm/uvm_swap.h>
33 #include <machine/hibernate.h>
34 
35 /*
36  * Hibernate piglet layout information
37  *
38  * The piglet is a scratch area of memory allocated by the suspending kernel.
39  * Its phys and virt addrs are recorded in the signature block. The piglet is
40  * used to guarantee an unused area of memory that can be used by the resuming
41  * kernel for various things. The piglet is excluded during unpack operations.
42  * The piglet size is presently 3*HIBERNATE_CHUNK_SIZE (typically 3*4MB).
43  *
44  * Offset from piglet_base	Purpose
45  * ----------------------------------------------------------------------------
46  * 0				I/O page used during resume
47  * 1*PAGE_SIZE		 	I/O page used during hibernate suspend
48  * 2*PAGE_SIZE		 	I/O page used during hibernate suspend
49  * 3*PAGE_SIZE			copy page used during hibernate suspend
50  * 4*PAGE_SIZE			final chunk ordering list (8 pages)
51  * 12*PAGE_SIZE			piglet chunk ordering list (8 pages)
52  * 20*PAGE_SIZE			temp chunk ordering list (8 pages)
53  * 28*PAGE_SIZE			start of hiballoc area
54  * 108*PAGE_SIZE		end of hiballoc area (80 pages)
55  * ...				unused
56  * HIBERNATE_CHUNK_SIZE		start of hibernate chunk table
57  * 2*HIBERNATE_CHUNK_SIZE	bounce area for chunks being unpacked
58  * 3*HIBERNATE_CHUNK_SIZE	end of piglet
59  */
60 
61 /* Temporary vaddr ranges used during hibernate */
62 vaddr_t hibernate_temp_page;
63 vaddr_t hibernate_copy_page;
64 
65 /* Hibernate info as read from disk during resume */
66 union hibernate_info disk_hib;
67 paddr_t global_pig_start;
68 vaddr_t global_piglet_va;
69 
70 /* #define HIB_DEBUG */
71 #ifdef HIB_DEBUG
72 int	hib_debug = 99;
73 #define DPRINTF(x...)     do { if (hib_debug) printf(x); } while (0)
74 #define DNPRINTF(n,x...)  do { if (hib_debug > (n)) printf(x); } while (0)
75 #else
76 #define DPRINTF(x...)
77 #define DNPRINTF(n,x...)
78 #endif
79 
80 void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t);
81 
82 /*
83  * Hib alloc enforced alignment.
84  */
85 #define HIB_ALIGN		8 /* bytes alignment */
86 
87 /*
88  * sizeof builtin operation, but with alignment constraint.
89  */
90 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
91 
92 struct hiballoc_entry {
93 	size_t			hibe_use;
94 	size_t			hibe_space;
95 	RB_ENTRY(hiballoc_entry) hibe_entry;
96 };
97 
98 /*
99  * Compare hiballoc entries based on the address they manage.
100  *
101  * Since the address is fixed, relative to struct hiballoc_entry,
102  * we just compare the hiballoc_entry pointers.
103  */
104 static __inline int
105 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r)
106 {
107 	return l < r ? -1 : (l > r);
108 }
109 
110 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
111 
112 /*
113  * Given a hiballoc entry, return the address it manages.
114  */
115 static __inline void *
116 hib_entry_to_addr(struct hiballoc_entry *entry)
117 {
118 	caddr_t addr;
119 
120 	addr = (caddr_t)entry;
121 	addr += HIB_SIZEOF(struct hiballoc_entry);
122 	return addr;
123 }
124 
125 /*
126  * Given an address, find the hiballoc that corresponds.
127  */
128 static __inline struct hiballoc_entry*
129 hib_addr_to_entry(void *addr_param)
130 {
131 	caddr_t addr;
132 
133 	addr = (caddr_t)addr_param;
134 	addr -= HIB_SIZEOF(struct hiballoc_entry);
135 	return (struct hiballoc_entry*)addr;
136 }
137 
138 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
139 
140 /*
141  * Allocate memory from the arena.
142  *
143  * Returns NULL if no memory is available.
144  */
145 void *
146 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
147 {
148 	struct hiballoc_entry *entry, *new_entry;
149 	size_t find_sz;
150 
151 	/*
152 	 * Enforce alignment of HIB_ALIGN bytes.
153 	 *
154 	 * Note that, because the entry is put in front of the allocation,
155 	 * 0-byte allocations are guaranteed a unique address.
156 	 */
157 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
158 
159 	/*
160 	 * Find an entry with hibe_space >= find_sz.
161 	 *
162 	 * If the root node is not large enough, we switch to tree traversal.
163 	 * Because all entries are made at the bottom of the free space,
164 	 * traversal from the end has a slightly better chance of yielding
165 	 * a sufficiently large space.
166 	 */
167 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
168 	entry = RB_ROOT(&arena->hib_addrs);
169 	if (entry != NULL && entry->hibe_space < find_sz) {
170 		RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
171 			if (entry->hibe_space >= find_sz)
172 				break;
173 		}
174 	}
175 
176 	/*
177 	 * Insufficient or too fragmented memory.
178 	 */
179 	if (entry == NULL)
180 		return NULL;
181 
182 	/*
183 	 * Create new entry in allocated space.
184 	 */
185 	new_entry = (struct hiballoc_entry*)(
186 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
187 	new_entry->hibe_space = entry->hibe_space - find_sz;
188 	new_entry->hibe_use = alloc_sz;
189 
190 	/*
191 	 * Insert entry.
192 	 */
193 	if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
194 		panic("hib_alloc: insert failure");
195 	entry->hibe_space = 0;
196 
197 	/* Return address managed by entry. */
198 	return hib_entry_to_addr(new_entry);
199 }
200 
201 /*
202  * Free a pointer previously allocated from this arena.
203  *
204  * If addr is NULL, this will be silently accepted.
205  */
206 void
207 hib_free(struct hiballoc_arena *arena, void *addr)
208 {
209 	struct hiballoc_entry *entry, *prev;
210 
211 	if (addr == NULL)
212 		return;
213 
214 	/*
215 	 * Derive entry from addr and check it is really in this arena.
216 	 */
217 	entry = hib_addr_to_entry(addr);
218 	if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
219 		panic("hib_free: freed item %p not in hib arena", addr);
220 
221 	/*
222 	 * Give the space in entry to its predecessor.
223 	 *
224 	 * If entry has no predecessor, change its used space into free space
225 	 * instead.
226 	 */
227 	prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry);
228 	if (prev != NULL &&
229 	    (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
230 	    prev->hibe_use + prev->hibe_space) == entry) {
231 		/* Merge entry. */
232 		RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
233 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
234 		    entry->hibe_use + entry->hibe_space;
235 	} else {
236 		/* Flip used memory to free space. */
237 		entry->hibe_space += entry->hibe_use;
238 		entry->hibe_use = 0;
239 	}
240 }
241 
242 /*
243  * Initialize hiballoc.
244  *
245  * The allocator will manage memmory at ptr, which is len bytes.
246  */
247 int
248 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
249 {
250 	struct hiballoc_entry *entry;
251 	caddr_t ptr;
252 	size_t len;
253 
254 	RB_INIT(&arena->hib_addrs);
255 
256 	/*
257 	 * Hib allocator enforces HIB_ALIGN alignment.
258 	 * Fixup ptr and len.
259 	 */
260 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
261 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
262 	len &= ~((size_t)HIB_ALIGN - 1);
263 
264 	/*
265 	 * Insufficient memory to be able to allocate and also do bookkeeping.
266 	 */
267 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
268 		return ENOMEM;
269 
270 	/*
271 	 * Create entry describing space.
272 	 */
273 	entry = (struct hiballoc_entry*)ptr;
274 	entry->hibe_use = 0;
275 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
276 	RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
277 
278 	return 0;
279 }
280 
281 /*
282  * Zero all free memory.
283  */
284 void
285 uvm_pmr_zero_everything(void)
286 {
287 	struct uvm_pmemrange	*pmr;
288 	struct vm_page		*pg;
289 	int			 i;
290 
291 	uvm_lock_fpageq();
292 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
293 		/* Zero single pages. */
294 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
295 		    != NULL) {
296 			uvm_pmr_remove(pmr, pg);
297 			uvm_pagezero(pg);
298 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
299 			uvmexp.zeropages++;
300 			uvm_pmr_insert(pmr, pg, 0);
301 		}
302 
303 		/* Zero multi page ranges. */
304 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY]))
305 		    != NULL) {
306 			pg--; /* Size tree always has second page. */
307 			uvm_pmr_remove(pmr, pg);
308 			for (i = 0; i < pg->fpgsz; i++) {
309 				uvm_pagezero(&pg[i]);
310 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
311 				uvmexp.zeropages++;
312 			}
313 			uvm_pmr_insert(pmr, pg, 0);
314 		}
315 	}
316 	uvm_unlock_fpageq();
317 }
318 
319 /*
320  * Mark all memory as dirty.
321  *
322  * Used to inform the system that the clean memory isn't clean for some
323  * reason, for example because we just came back from hibernate.
324  */
325 void
326 uvm_pmr_dirty_everything(void)
327 {
328 	struct uvm_pmemrange	*pmr;
329 	struct vm_page		*pg;
330 	int			 i;
331 
332 	uvm_lock_fpageq();
333 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
334 		/* Dirty single pages. */
335 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
336 		    != NULL) {
337 			uvm_pmr_remove(pmr, pg);
338 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
339 			uvm_pmr_insert(pmr, pg, 0);
340 		}
341 
342 		/* Dirty multi page ranges. */
343 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_ZERO]))
344 		    != NULL) {
345 			pg--; /* Size tree always has second page. */
346 			uvm_pmr_remove(pmr, pg);
347 			for (i = 0; i < pg->fpgsz; i++)
348 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
349 			uvm_pmr_insert(pmr, pg, 0);
350 		}
351 	}
352 
353 	uvmexp.zeropages = 0;
354 	uvm_unlock_fpageq();
355 }
356 
357 /*
358  * Allocate the highest address that can hold sz.
359  *
360  * sz in bytes.
361  */
362 int
363 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz)
364 {
365 	struct uvm_pmemrange	*pmr;
366 	struct vm_page		*pig_pg, *pg;
367 
368 	/*
369 	 * Convert sz to pages, since that is what pmemrange uses internally.
370 	 */
371 	sz = atop(round_page(sz));
372 
373 	uvm_lock_fpageq();
374 
375 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
376 		RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) {
377 			if (pig_pg->fpgsz >= sz) {
378 				goto found;
379 			}
380 		}
381 	}
382 
383 	/*
384 	 * Allocation failure.
385 	 */
386 	uvm_unlock_fpageq();
387 	return ENOMEM;
388 
389 found:
390 	/* Remove page from freelist. */
391 	uvm_pmr_remove_size(pmr, pig_pg);
392 	pig_pg->fpgsz -= sz;
393 	pg = pig_pg + pig_pg->fpgsz;
394 	if (pig_pg->fpgsz == 0)
395 		uvm_pmr_remove_addr(pmr, pig_pg);
396 	else
397 		uvm_pmr_insert_size(pmr, pig_pg);
398 
399 	uvmexp.free -= sz;
400 	*addr = VM_PAGE_TO_PHYS(pg);
401 
402 	/*
403 	 * Update pg flags.
404 	 *
405 	 * Note that we trash the sz argument now.
406 	 */
407 	while (sz > 0) {
408 		KASSERT(pg->pg_flags & PQ_FREE);
409 
410 		atomic_clearbits_int(&pg->pg_flags, PG_PMAPMASK);
411 
412 		if (pg->pg_flags & PG_ZERO)
413 			uvmexp.zeropages -= sz;
414 		atomic_clearbits_int(&pg->pg_flags,
415 		    PG_ZERO|PQ_FREE);
416 
417 		pg->uobject = NULL;
418 		pg->uanon = NULL;
419 		pg->pg_version++;
420 
421 		/*
422 		 * Next.
423 		 */
424 		pg++;
425 		sz--;
426 	}
427 
428 	/* Return. */
429 	uvm_unlock_fpageq();
430 	return 0;
431 }
432 
433 /*
434  * Allocate a piglet area.
435  *
436  * This is as low as possible.
437  * Piglets are aligned.
438  *
439  * sz and align in bytes.
440  *
441  * The call will sleep for the pagedaemon to attempt to free memory.
442  * The pagedaemon may decide its not possible to free enough memory, causing
443  * the allocation to fail.
444  */
445 int
446 uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align)
447 {
448 	paddr_t			 pg_addr, piglet_addr;
449 	struct uvm_pmemrange	*pmr;
450 	struct vm_page		*pig_pg, *pg;
451 	struct pglist		 pageq;
452 	int			 pdaemon_woken;
453 	vaddr_t			 piglet_va;
454 
455 	/* Ensure align is a power of 2 */
456 	KASSERT((align & (align - 1)) == 0);
457 
458 	pdaemon_woken = 0; /* Didn't wake the pagedaemon. */
459 
460 	/*
461 	 * Fixup arguments: align must be at least PAGE_SIZE,
462 	 * sz will be converted to pagecount, since that is what
463 	 * pmemrange uses internally.
464 	 */
465 	if (align < PAGE_SIZE)
466 		align = PAGE_SIZE;
467 	sz = round_page(sz);
468 
469 	uvm_lock_fpageq();
470 
471 	TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use,
472 	    pmr_use) {
473 retry:
474 		/*
475 		 * Search for a range with enough space.
476 		 * Use the address tree, to ensure the range is as low as
477 		 * possible.
478 		 */
479 		RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) {
480 			pg_addr = VM_PAGE_TO_PHYS(pig_pg);
481 			piglet_addr = (pg_addr + (align - 1)) & ~(align - 1);
482 
483 			if (atop(pg_addr) + pig_pg->fpgsz >=
484 			    atop(piglet_addr) + atop(sz))
485 				goto found;
486 		}
487 	}
488 
489 	/*
490 	 * Try to coerce the pagedaemon into freeing memory
491 	 * for the piglet.
492 	 *
493 	 * pdaemon_woken is set to prevent the code from
494 	 * falling into an endless loop.
495 	 */
496 	if (!pdaemon_woken) {
497 		pdaemon_woken = 1;
498 		if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1,
499 		    sz, UVM_PLA_FAILOK) == 0)
500 			goto retry;
501 	}
502 
503 	/* Return failure. */
504 	uvm_unlock_fpageq();
505 	return ENOMEM;
506 
507 found:
508 	/*
509 	 * Extract piglet from pigpen.
510 	 */
511 	TAILQ_INIT(&pageq);
512 	uvm_pmr_extract_range(pmr, pig_pg,
513 	    atop(piglet_addr), atop(piglet_addr) + atop(sz), &pageq);
514 
515 	*pa = piglet_addr;
516 	uvmexp.free -= atop(sz);
517 
518 	/*
519 	 * Update pg flags.
520 	 *
521 	 * Note that we trash the sz argument now.
522 	 */
523 	TAILQ_FOREACH(pg, &pageq, pageq) {
524 		KASSERT(pg->pg_flags & PQ_FREE);
525 
526 		atomic_clearbits_int(&pg->pg_flags, PG_PMAPMASK);
527 
528 		if (pg->pg_flags & PG_ZERO)
529 			uvmexp.zeropages--;
530 		atomic_clearbits_int(&pg->pg_flags,
531 		    PG_ZERO|PQ_FREE);
532 
533 		pg->uobject = NULL;
534 		pg->uanon = NULL;
535 		pg->pg_version++;
536 	}
537 
538 	uvm_unlock_fpageq();
539 
540 	/*
541 	 * Now allocate a va.
542 	 * Use direct mappings for the pages.
543 	 */
544 
545 	piglet_va = *va = (vaddr_t)km_alloc(sz, &kv_any, &kp_none, &kd_waitok);
546 	if (!piglet_va) {
547 		uvm_pglistfree(&pageq);
548 		return ENOMEM;
549 	}
550 
551 	/*
552 	 * Map piglet to va.
553 	 */
554 	TAILQ_FOREACH(pg, &pageq, pageq) {
555 		pmap_kenter_pa(piglet_va, VM_PAGE_TO_PHYS(pg), UVM_PROT_RW);
556 		piglet_va += PAGE_SIZE;
557 	}
558 	pmap_update(pmap_kernel());
559 
560 	return 0;
561 }
562 
563 /*
564  * Free a piglet area.
565  */
566 void
567 uvm_pmr_free_piglet(vaddr_t va, vsize_t sz)
568 {
569 	paddr_t			 pa;
570 	struct vm_page		*pg;
571 
572 	/*
573 	 * Fix parameters.
574 	 */
575 	sz = round_page(sz);
576 
577 	/*
578 	 * Find the first page in piglet.
579 	 * Since piglets are contiguous, the first pg is all we need.
580 	 */
581 	if (!pmap_extract(pmap_kernel(), va, &pa))
582 		panic("uvm_pmr_free_piglet: piglet 0x%lx has no pages", va);
583 	pg = PHYS_TO_VM_PAGE(pa);
584 	if (pg == NULL)
585 		panic("uvm_pmr_free_piglet: unmanaged page 0x%lx", pa);
586 
587 	/*
588 	 * Unmap.
589 	 */
590 	pmap_kremove(va, sz);
591 	pmap_update(pmap_kernel());
592 
593 	/*
594 	 * Free the physical and virtual memory.
595 	 */
596 	uvm_pmr_freepages(pg, atop(sz));
597 	km_free((void *)va, sz, &kv_any, &kp_none);
598 }
599 
600 /*
601  * Physmem RLE compression support.
602  *
603  * Given a physical page address, return the number of pages starting at the
604  * address that are free.  Clamps to the number of pages in
605  * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free.
606  */
607 int
608 uvm_page_rle(paddr_t addr)
609 {
610 	struct vm_page		*pg, *pg_end;
611 	struct vm_physseg	*vmp;
612 	int			 pseg_idx, off_idx;
613 
614 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
615 	if (pseg_idx == -1)
616 		return 0;
617 
618 	vmp = &vm_physmem[pseg_idx];
619 	pg = &vmp->pgs[off_idx];
620 	if (!(pg->pg_flags & PQ_FREE))
621 		return 0;
622 
623 	/*
624 	 * Search for the first non-free page after pg.
625 	 * Note that the page may not be the first page in a free pmemrange,
626 	 * therefore pg->fpgsz cannot be used.
627 	 */
628 	for (pg_end = pg; pg_end <= vmp->lastpg &&
629 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++)
630 		;
631 	return min((pg_end - pg), HIBERNATE_CHUNK_SIZE/PAGE_SIZE);
632 }
633 
634 /*
635  * Fills out the hibernate_info union pointed to by hiber_info
636  * with information about this machine (swap signature block
637  * offsets, number of memory ranges, kernel in use, etc)
638  */
639 int
640 get_hibernate_info(union hibernate_info *hib, int suspend)
641 {
642 	int chunktable_size;
643 	struct disklabel dl;
644 	char err_string[128], *dl_ret;
645 
646 	/* Determine I/O function to use */
647 	hib->io_func = get_hibernate_io_function();
648 	if (hib->io_func == NULL)
649 		return (1);
650 
651 	/* Calculate hibernate device */
652 	hib->dev = swdevt[0].sw_dev;
653 
654 	/* Read disklabel (used to calculate signature and image offsets) */
655 	dl_ret = disk_readlabel(&dl, hib->dev, err_string, 128);
656 
657 	if (dl_ret) {
658 		printf("Hibernate error reading disklabel: %s\n", dl_ret);
659 		return (1);
660 	}
661 
662 	/* Make sure we have a swap partition. */
663 	if (dl.d_partitions[1].p_fstype != FS_SWAP ||
664 	    DL_GETPSIZE(&dl.d_partitions[1]) == 0)
665 		return (1);
666 
667 	/* Make sure the signature can fit in one block */
668 	if (sizeof(union hibernate_info) > DEV_BSIZE)
669 		return (1);
670 
671 	/* Magic number */
672 	hib->magic = HIBERNATE_MAGIC;
673 
674 	/* Calculate signature block location */
675 	hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[1]) -
676 	    sizeof(union hibernate_info)/DEV_BSIZE;
677 
678 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / DEV_BSIZE;
679 
680 	/* Stash kernel version information */
681 	memset(&hib->kernel_version, 0, 128);
682 	bcopy(version, &hib->kernel_version,
683 	    min(strlen(version), sizeof(hib->kernel_version)-1));
684 
685 	if (suspend) {
686 		/* Allocate piglet region */
687 		if (uvm_pmr_alloc_piglet(&hib->piglet_va,
688 		    &hib->piglet_pa, HIBERNATE_CHUNK_SIZE*3,
689 		    HIBERNATE_CHUNK_SIZE)) {
690 			printf("Hibernate failed to allocate the piglet\n");
691 			return (1);
692 		}
693 		hib->io_page = (void *)hib->piglet_va;
694 
695 		/*
696 		 * Initialization of the hibernate IO function for drivers
697 		 * that need to do prep work (such as allocating memory or
698 		 * setting up data structures that cannot safely be done
699 		 * during suspend without causing side effects). There is
700 		 * a matching HIB_DONE call performed after the write is
701 		 * completed.
702 		 */
703 		if (hib->io_func(hib->dev, DL_GETPOFFSET(&dl.d_partitions[1]),
704 		    (vaddr_t)NULL, DL_GETPSIZE(&dl.d_partitions[1]),
705 		    HIB_INIT, hib->io_page))
706 			goto fail;
707 
708 	} else {
709 		/*
710 		 * Resuming kernels use a regular I/O page since we won't
711 		 * have access to the suspended kernel's piglet VA at this
712 		 * point. No need to free this I/O page as it will vanish
713 		 * as part of the resume.
714 		 */
715 		hib->io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
716 		if (!hib->io_page)
717 			return (1);
718 	}
719 
720 
721 	if (get_hibernate_info_md(hib))
722 		goto fail;
723 
724 
725 	return (0);
726 fail:
727 	if (suspend)
728 		uvm_pmr_free_piglet(hib->piglet_va,
729 		    HIBERNATE_CHUNK_SIZE * 3);
730 
731 	return (1);
732 }
733 
734 /*
735  * Allocate nitems*size bytes from the hiballoc area presently in use
736  */
737 void *
738 hibernate_zlib_alloc(void *unused, int nitems, int size)
739 {
740 	struct hibernate_zlib_state *hibernate_state;
741 
742 	hibernate_state =
743 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
744 
745 	return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size);
746 }
747 
748 /*
749  * Free the memory pointed to by addr in the hiballoc area presently in
750  * use
751  */
752 void
753 hibernate_zlib_free(void *unused, void *addr)
754 {
755 	struct hibernate_zlib_state *hibernate_state;
756 
757 	hibernate_state =
758 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
759 
760 	hib_free(&hibernate_state->hiballoc_arena, addr);
761 }
762 
763 /*
764  * Inflate next page of data from the image stream
765  */
766 int
767 hibernate_inflate_page(void)
768 {
769 	struct hibernate_zlib_state *hibernate_state;
770 	int i;
771 
772 	hibernate_state =
773 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
774 
775 	/* Set up the stream for inflate */
776 	hibernate_state->hib_stream.next_out = (char *)HIBERNATE_INFLATE_PAGE;
777 	hibernate_state->hib_stream.avail_out = PAGE_SIZE;
778 
779 	/* Process next block of data */
780 	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
781 	if (i != Z_OK && i != Z_STREAM_END) {
782 		/*
783 		 * XXX - this will likely reboot/hang most machines
784 		 *       since the console output buffer will be unmapped,
785 		 *       but there's not much else we can do here.
786 		 */
787 		panic("inflate error");
788 	}
789 
790 	/* We should always have extracted a full page ... */
791 	if (hibernate_state->hib_stream.avail_out != 0) {
792 		/*
793 		 * XXX - this will likely reboot/hang most machines
794 		 *       since the console output buffer will be unmapped,
795 		 *       but there's not much else we can do here.
796 		 */
797 		panic("incomplete page");
798 	}
799 
800 	return (i == Z_STREAM_END);
801 }
802 
803 /*
804  * Inflate size bytes from src into dest, skipping any pages in
805  * [src..dest] that are special (see hibernate_inflate_skip)
806  *
807  * This function executes while using the resume-time stack
808  * and pmap, and therefore cannot use ddb/printf/etc. Doing so
809  * will likely hang or reset the machine since the console output buffer
810  * will be unmapped.
811  */
812 void
813 hibernate_inflate_region(union hibernate_info *hib, paddr_t dest,
814     paddr_t src, size_t size)
815 {
816 	int end_stream = 0 ;
817 	struct hibernate_zlib_state *hibernate_state;
818 
819 	hibernate_state =
820 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
821 
822 	hibernate_state->hib_stream.next_in = (char *)src;
823 	hibernate_state->hib_stream.avail_in = size;
824 
825 	do {
826 		/*
827 		 * Is this a special page? If yes, redirect the
828 		 * inflate output to a scratch page (eg, discard it)
829 		 */
830 		if (hibernate_inflate_skip(hib, dest)) {
831 			hibernate_enter_resume_mapping(
832 			    HIBERNATE_INFLATE_PAGE,
833 			    HIBERNATE_INFLATE_PAGE, 0);
834 		} else {
835 			hibernate_enter_resume_mapping(
836 			    HIBERNATE_INFLATE_PAGE, dest, 0);
837 		}
838 
839 		hibernate_flush();
840 		end_stream = hibernate_inflate_page();
841 
842 		dest += PAGE_SIZE;
843 	} while (!end_stream);
844 }
845 
846 /*
847  * deflate from src into the I/O page, up to 'remaining' bytes
848  *
849  * Returns number of input bytes consumed, and may reset
850  * the 'remaining' parameter if not all the output space was consumed
851  * (this information is needed to know how much to write to disk
852  */
853 size_t
854 hibernate_deflate(union hibernate_info *hib, paddr_t src,
855     size_t *remaining)
856 {
857 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
858 	struct hibernate_zlib_state *hibernate_state;
859 
860 	hibernate_state =
861 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
862 
863 	/* Set up the stream for deflate */
864 	hibernate_state->hib_stream.next_in = (caddr_t)src;
865 	hibernate_state->hib_stream.avail_in = PAGE_SIZE - (src & PAGE_MASK);
866 	hibernate_state->hib_stream.next_out = (caddr_t)hibernate_io_page +
867 	    (PAGE_SIZE - *remaining);
868 	hibernate_state->hib_stream.avail_out = *remaining;
869 
870 	/* Process next block of data */
871 	if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH) != Z_OK)
872 		panic("hibernate zlib deflate error");
873 
874 	/* Update pointers and return number of bytes consumed */
875 	*remaining = hibernate_state->hib_stream.avail_out;
876 	return (PAGE_SIZE - (src & PAGE_MASK)) -
877 	    hibernate_state->hib_stream.avail_in;
878 }
879 
880 /*
881  * Write the hibernation information specified in hiber_info
882  * to the location in swap previously calculated (last block of
883  * swap), called the "signature block".
884  */
885 int
886 hibernate_write_signature(union hibernate_info *hib)
887 {
888 	/* Write hibernate info to disk */
889 	return (hib->io_func(hib->dev, hib->sig_offset,
890 	    (vaddr_t)hib, DEV_BSIZE, HIB_W,
891 	    hib->io_page));
892 }
893 
894 /*
895  * Write the memory chunk table to the area in swap immediately
896  * preceding the signature block. The chunk table is stored
897  * in the piglet when this function is called.  Returns errno.
898  */
899 int
900 hibernate_write_chunktable(union hibernate_info *hib)
901 {
902 	struct hibernate_disk_chunk *chunks;
903 	vaddr_t hibernate_chunk_table_start;
904 	size_t hibernate_chunk_table_size;
905 	int i, err;
906 
907 	hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE;
908 
909 	hibernate_chunk_table_start = hib->piglet_va +
910 	    HIBERNATE_CHUNK_SIZE;
911 
912 	chunks = (struct hibernate_disk_chunk *)(hib->piglet_va +
913 	    HIBERNATE_CHUNK_SIZE);
914 
915 	/* Write chunk table */
916 	for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS) {
917 		if ((err = hib->io_func(hib->dev,
918 		    hib->chunktable_offset + (i/DEV_BSIZE),
919 		    (vaddr_t)(hibernate_chunk_table_start + i),
920 		    MAXPHYS, HIB_W, hib->io_page))) {
921 			DPRINTF("chunktable write error: %d\n", err);
922 			return (err);
923 		}
924 	}
925 
926 	return (0);
927 }
928 
929 /*
930  * Write an empty hiber_info to the swap signature block, which is
931  * guaranteed to not match any valid hib.
932  */
933 int
934 hibernate_clear_signature(void)
935 {
936 	union hibernate_info blank_hiber_info;
937 	union hibernate_info hib;
938 
939 	/* Zero out a blank hiber_info */
940 	memset(&blank_hiber_info, 0, sizeof(union hibernate_info));
941 
942 	/* Get the signature block location */
943 	if (get_hibernate_info(&hib, 0))
944 		return (1);
945 
946 	/* Write (zeroed) hibernate info to disk */
947 	DPRINTF("clearing hibernate signature block location: %lld\n",
948 		hib.sig_offset);
949 	if (hibernate_block_io(&hib,
950 	    hib.sig_offset,
951 	    DEV_BSIZE, (vaddr_t)&blank_hiber_info, 1))
952 		printf("Warning: could not clear hibernate signature\n");
953 
954 	return (0);
955 }
956 
957 /*
958  * Check chunk range overlap when calculating whether or not to copy a
959  * compressed chunk to the piglet area before decompressing.
960  *
961  * returns zero if the ranges do not overlap, non-zero otherwise.
962  */
963 int
964 hibernate_check_overlap(paddr_t r1s, paddr_t r1e, paddr_t r2s, paddr_t r2e)
965 {
966 	/* case A : end of r1 overlaps start of r2 */
967 	if (r1s < r2s && r1e > r2s)
968 		return (1);
969 
970 	/* case B : r1 entirely inside r2 */
971 	if (r1s >= r2s && r1e <= r2e)
972 		return (1);
973 
974 	/* case C : r2 entirely inside r1 */
975 	if (r2s >= r1s && r2e <= r1e)
976 		return (1);
977 
978 	/* case D : end of r2 overlaps start of r1 */
979 	if (r2s < r1s && r2e > r1s)
980 		return (1);
981 
982 	return (0);
983 }
984 
985 /*
986  * Compare two hibernate_infos to determine if they are the same (eg,
987  * we should be performing a hibernate resume on this machine.
988  * Not all fields are checked - just enough to verify that the machine
989  * has the same memory configuration and kernel as the one that
990  * wrote the signature previously.
991  */
992 int
993 hibernate_compare_signature(union hibernate_info *mine,
994     union hibernate_info *disk)
995 {
996 	u_int i;
997 
998 	if (mine->nranges != disk->nranges) {
999 		DPRINTF("hibernate memory range count mismatch\n");
1000 		return (1);
1001 	}
1002 
1003 	if (strcmp(mine->kernel_version, disk->kernel_version) != 0) {
1004 		DPRINTF("hibernate kernel version mismatch\n");
1005 		return (1);
1006 	}
1007 
1008 	for (i = 0; i < mine->nranges; i++) {
1009 		if ((mine->ranges[i].base != disk->ranges[i].base) ||
1010 		    (mine->ranges[i].end != disk->ranges[i].end) ) {
1011 			DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n",
1012 				i,
1013 				(void *)mine->ranges[i].base,
1014 				(void *)mine->ranges[i].end,
1015 				(void *)disk->ranges[i].base,
1016 				(void *)disk->ranges[i].end);
1017 			return (1);
1018 		}
1019 	}
1020 
1021 	return (0);
1022 }
1023 
1024 /*
1025  * Transfers xfer_size bytes between the hibernate device specified in
1026  * hib_info at offset blkctr and the vaddr specified at dest.
1027  *
1028  * Separate offsets and pages are used to handle misaligned reads (reads
1029  * that span a page boundary).
1030  *
1031  * blkctr specifies a relative offset (relative to the start of swap),
1032  * not an absolute disk offset
1033  *
1034  */
1035 int
1036 hibernate_block_io(union hibernate_info *hib, daddr_t blkctr,
1037     size_t xfer_size, vaddr_t dest, int iswrite)
1038 {
1039 	struct buf *bp;
1040 	struct bdevsw *bdsw;
1041 	int error;
1042 
1043 	bp = geteblk(xfer_size);
1044 	bdsw = &bdevsw[major(hib->dev)];
1045 
1046 	error = (*bdsw->d_open)(hib->dev, FREAD, S_IFCHR, curproc);
1047 	if (error) {
1048 		printf("hibernate_block_io open failed\n");
1049 		return (1);
1050 	}
1051 
1052 	if (iswrite)
1053 		bcopy((caddr_t)dest, bp->b_data, xfer_size);
1054 
1055 	bp->b_bcount = xfer_size;
1056 	bp->b_blkno = blkctr;
1057 	CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
1058 	SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW);
1059 	bp->b_dev = hib->dev;
1060 	(*bdsw->d_strategy)(bp);
1061 
1062 	error = biowait(bp);
1063 	if (error) {
1064 		printf("hib block_io biowait error %d blk %lld size %zu\n",
1065 			error, (long long)blkctr, xfer_size);
1066 		error = (*bdsw->d_close)(hib->dev, 0, S_IFCHR,
1067 		    curproc);
1068 		if (error)
1069 			printf("hibernate_block_io error close failed\n");
1070 		return (1);
1071 	}
1072 
1073 	error = (*bdsw->d_close)(hib->dev, FREAD, S_IFCHR, curproc);
1074 	if (error) {
1075 		printf("hibernate_block_io close failed\n");
1076 		return (1);
1077 	}
1078 
1079 	if (!iswrite)
1080 		bcopy(bp->b_data, (caddr_t)dest, xfer_size);
1081 
1082 	bp->b_flags |= B_INVAL;
1083 	brelse(bp);
1084 
1085 	return (0);
1086 }
1087 
1088 /*
1089  * Reads the signature block from swap, checks against the current machine's
1090  * information. If the information matches, perform a resume by reading the
1091  * saved image into the pig area, and unpacking.
1092  */
1093 void
1094 hibernate_resume(void)
1095 {
1096 	union hibernate_info hib;
1097 	int s;
1098 
1099 	/* Get current running machine's hibernate info */
1100 	memset(&hib, 0, sizeof(hib));
1101 	if (get_hibernate_info(&hib, 0)) {
1102 		DPRINTF("couldn't retrieve machine's hibernate info\n");
1103 		return;
1104 	}
1105 
1106 	/* Read hibernate info from disk */
1107 	s = splbio();
1108 
1109 	DPRINTF("reading hibernate signature block location: %lld\n",
1110 		hib.sig_offset);
1111 
1112 	if (hibernate_block_io(&hib,
1113 	    hib.sig_offset,
1114 	    DEV_BSIZE, (vaddr_t)&disk_hib, 0)) {
1115 		DPRINTF("error in hibernate read");
1116 		splx(s);
1117 		return;
1118 	}
1119 
1120 	/* Check magic number */
1121 	if (disk_hib.magic != HIBERNATE_MAGIC) {
1122 		DPRINTF("wrong magic number in hibernate signature: %x\n",
1123 			disk_hib.magic);
1124 		splx(s);
1125 		return;
1126 	}
1127 
1128 	/*
1129 	 * We (possibly) found a hibernate signature. Clear signature first,
1130 	 * to prevent accidental resume or endless resume cycles later.
1131 	 */
1132 	if (hibernate_clear_signature()) {
1133 		DPRINTF("error clearing hibernate signature block\n");
1134 		splx(s);
1135 		return;
1136 	}
1137 
1138 	/*
1139 	 * If on-disk and in-memory hibernate signatures match,
1140 	 * this means we should do a resume from hibernate.
1141 	 */
1142 	if (hibernate_compare_signature(&hib, &disk_hib)) {
1143 		DPRINTF("mismatched hibernate signature block\n");
1144 		splx(s);
1145 		return;
1146 	}
1147 
1148 #ifdef MULTIPROCESSOR
1149 	hibernate_quiesce_cpus();
1150 #endif /* MULTIPROCESSOR */
1151 
1152 	/* Read the image from disk into the image (pig) area */
1153 	if (hibernate_read_image(&disk_hib))
1154 		goto fail;
1155 
1156 	if (config_suspend(device_mainbus(), DVACT_QUIESCE) != 0)
1157 		goto fail;
1158 
1159 	(void) splhigh();
1160 	hibernate_disable_intr_machdep();
1161 	cold = 1;
1162 
1163 	if (config_suspend(device_mainbus(), DVACT_SUSPEND) != 0) {
1164 		cold = 0;
1165 		hibernate_enable_intr_machdep();
1166 		goto fail;
1167 	}
1168 
1169 	pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1170 	    VM_PROT_ALL);
1171 	pmap_activate(curproc);
1172 
1173 	printf("Unpacking image...\n");
1174 
1175 	/* Switch stacks */
1176 	hibernate_switch_stack_machdep();
1177 
1178 	/* Unpack and resume */
1179 	hibernate_unpack_image(&disk_hib);
1180 
1181 fail:
1182 	splx(s);
1183 	printf("\nUnable to resume hibernated image\n");
1184 }
1185 
1186 /*
1187  * Unpack image from pig area to original location by looping through the
1188  * list of output chunks in the order they should be restored (fchunks).
1189  *
1190  * Note that due to the stack smash protector and the fact that we have
1191  * switched stacks, it is not permitted to return from this function.
1192  */
1193 void
1194 hibernate_unpack_image(union hibernate_info *hib)
1195 {
1196 	struct hibernate_disk_chunk *chunks;
1197 	union hibernate_info local_hib;
1198 	paddr_t image_cur = global_pig_start;
1199 	short i, *fchunks;
1200 	char *pva = (char *)hib->piglet_va;
1201 	struct hibernate_zlib_state *hibernate_state;
1202 
1203 	hibernate_state =
1204 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1205 
1206 	/* Mask off based on arch-specific piglet page size */
1207 	pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1208 	fchunks = (short *)(pva + (4 * PAGE_SIZE));
1209 
1210 	chunks = (struct hibernate_disk_chunk *)(pva +  HIBERNATE_CHUNK_SIZE);
1211 
1212 	/* Can't use hiber_info that's passed in after this point */
1213 	bcopy(hib, &local_hib, sizeof(union hibernate_info));
1214 
1215 	/*
1216 	 * Point of no return. Once we pass this point, only kernel code can
1217 	 * be accessed. No global variables or other kernel data structures
1218 	 * are guaranteed to be coherent after unpack starts.
1219 	 *
1220 	 * The image is now in high memory (pig area), we unpack from the pig
1221 	 * to the correct location in memory. We'll eventually end up copying
1222 	 * on top of ourself, but we are assured the kernel code here is the
1223 	 * same between the hibernated and resuming kernel, and we are running
1224 	 * on our own stack, so the overwrite is ok.
1225 	 */
1226 	hibernate_activate_resume_pt_machdep();
1227 
1228 	for (i = 0; i < local_hib.chunk_ctr; i++) {
1229 		/* Reset zlib for inflate */
1230 		if (hibernate_zlib_reset(&local_hib, 0) != Z_OK)
1231 			panic("hibernate failed to reset zlib for inflate");
1232 
1233 		hibernate_process_chunk(&local_hib, &chunks[fchunks[i]],
1234 		    image_cur);
1235 
1236 		image_cur += chunks[fchunks[i]].compressed_size;
1237 
1238 	}
1239 
1240 	/*
1241 	 * Resume the loaded kernel by jumping to the MD resume vector.
1242 	 * We won't be returning from this call.
1243 	 */
1244 	hibernate_resume_machdep();
1245 }
1246 
1247 /*
1248  * Bounce a compressed image chunk to the piglet, entering mappings for the
1249  * copied pages as needed
1250  */
1251 void
1252 hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size)
1253 {
1254 	size_t ct, ofs;
1255 	paddr_t src = img_cur;
1256 	vaddr_t dest = piglet;
1257 
1258 	/* Copy first partial page */
1259 	ct = (PAGE_SIZE) - (src & PAGE_MASK);
1260 	ofs = (src & PAGE_MASK);
1261 
1262 	if (ct < PAGE_SIZE) {
1263 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE,
1264 			(src - ofs), 0);
1265 		hibernate_flush();
1266 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE + ofs), (caddr_t)dest, ct);
1267 		src += ct;
1268 		dest += ct;
1269 	}
1270 
1271 	/* Copy remaining pages */
1272 	while (src < size + img_cur) {
1273 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, src, 0);
1274 		hibernate_flush();
1275 		ct = PAGE_SIZE;
1276 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE), (caddr_t)dest, ct);
1277 		hibernate_flush();
1278 		src += ct;
1279 		dest += ct;
1280 	}
1281 }
1282 
1283 /*
1284  * Process a chunk by bouncing it to the piglet, followed by unpacking
1285  */
1286 void
1287 hibernate_process_chunk(union hibernate_info *hib,
1288     struct hibernate_disk_chunk *chunk, paddr_t img_cur)
1289 {
1290 	char *pva = (char *)hib->piglet_va;
1291 
1292 	hibernate_copy_chunk_to_piglet(img_cur,
1293 	 (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), chunk->compressed_size);
1294 
1295 	hibernate_inflate_region(hib, chunk->base,
1296 	    (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)),
1297 	    chunk->compressed_size);
1298 }
1299 
1300 /*
1301  * Write a compressed version of this machine's memory to disk, at the
1302  * precalculated swap offset:
1303  *
1304  * end of swap - signature block size - chunk table size - memory size
1305  *
1306  * The function begins by looping through each phys mem range, cutting each
1307  * one into MD sized chunks. These chunks are then compressed individually
1308  * and written out to disk, in phys mem order. Some chunks might compress
1309  * more than others, and for this reason, each chunk's size is recorded
1310  * in the chunk table, which is written to disk after the image has
1311  * properly been compressed and written (in hibernate_write_chunktable).
1312  *
1313  * When this function is called, the machine is nearly suspended - most
1314  * devices are quiesced/suspended, interrupts are off, and cold has
1315  * been set. This means that there can be no side effects once the
1316  * write has started, and the write function itself can also have no
1317  * side effects. This also means no printfs are permitted (since printf
1318  * has side effects.)
1319  *
1320  * Return values :
1321  *
1322  * 0      - success
1323  * EIO    - I/O error occurred writing the chunks
1324  * EINVAL - Failed to write a complete range
1325  * ENOMEM - Memory allocation failure during preparation of the zlib arena
1326  */
1327 int
1328 hibernate_write_chunks(union hibernate_info *hib)
1329 {
1330 	paddr_t range_base, range_end, inaddr, temp_inaddr;
1331 	size_t nblocks, out_remaining, used;
1332 	struct hibernate_disk_chunk *chunks;
1333 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1334 	daddr_t blkctr = 0;
1335 	int i, err;
1336 	struct hibernate_zlib_state *hibernate_state;
1337 
1338 	hibernate_state =
1339 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1340 
1341 	hib->chunk_ctr = 0;
1342 
1343 	/*
1344 	 * Allocate VA for the temp and copy page.
1345 	 *
1346 	 * These will become part of the suspended kernel and will
1347 	 * be freed in hibernate_free, upon resume.
1348 	 */
1349 	hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
1350 	    &kp_none, &kd_nowait);
1351 	if (!hibernate_temp_page)
1352 		return (ENOMEM);
1353 
1354 	hibernate_copy_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
1355 	    &kp_none, &kd_nowait);
1356 	if (!hibernate_copy_page) {
1357 		DPRINTF("out of memory allocating hibernate_copy_page\n");
1358 		return (ENOMEM);
1359 	}
1360 
1361 	pmap_kenter_pa(hibernate_copy_page,
1362 	    (hib->piglet_pa + 3*PAGE_SIZE), VM_PROT_ALL);
1363 
1364 	pmap_activate(curproc);
1365 
1366 	chunks = (struct hibernate_disk_chunk *)(hib->piglet_va +
1367 	    HIBERNATE_CHUNK_SIZE);
1368 
1369 	/* Calculate the chunk regions */
1370 	for (i = 0; i < hib->nranges; i++) {
1371 		range_base = hib->ranges[i].base;
1372 		range_end = hib->ranges[i].end;
1373 
1374 		inaddr = range_base;
1375 
1376 		while (inaddr < range_end) {
1377 			chunks[hib->chunk_ctr].base = inaddr;
1378 			if (inaddr + HIBERNATE_CHUNK_SIZE < range_end)
1379 				chunks[hib->chunk_ctr].end = inaddr +
1380 				    HIBERNATE_CHUNK_SIZE;
1381 			else
1382 				chunks[hib->chunk_ctr].end = range_end;
1383 
1384 			inaddr += HIBERNATE_CHUNK_SIZE;
1385 			hib->chunk_ctr ++;
1386 		}
1387 	}
1388 
1389 	/* Compress and write the chunks in the chunktable */
1390 	for (i = 0; i < hib->chunk_ctr; i++) {
1391 		range_base = chunks[i].base;
1392 		range_end = chunks[i].end;
1393 
1394 		chunks[i].offset = blkctr + hib->image_offset;
1395 
1396 		/* Reset zlib for deflate */
1397 		if (hibernate_zlib_reset(hib, 1) != Z_OK) {
1398 			DPRINTF("hibernate_zlib_reset failed for deflate\n");
1399 			return (ENOMEM);
1400 		}
1401 
1402 		inaddr = range_base;
1403 
1404 		/*
1405 		 * For each range, loop through its phys mem region
1406 		 * and write out the chunks (the last chunk might be
1407 		 * smaller than the chunk size).
1408 		 */
1409 		while (inaddr < range_end) {
1410 			out_remaining = PAGE_SIZE;
1411 			while (out_remaining > 0 && inaddr < range_end) {
1412 
1413 				/*
1414 				 * Adjust for regions that are not evenly
1415 				 * divisible by PAGE_SIZE or overflowed
1416 				 * pages from the previous iteration.
1417 				 */
1418 				temp_inaddr = (inaddr & PAGE_MASK) +
1419 				    hibernate_copy_page;
1420 
1421 				/* Deflate from temp_inaddr to IO page */
1422 				if (inaddr != range_end) {
1423 					pmap_kenter_pa(hibernate_temp_page,
1424 					    inaddr & PMAP_PA_MASK, VM_PROT_ALL);
1425 
1426 					pmap_activate(curproc);
1427 
1428 					bcopy((caddr_t)hibernate_temp_page,
1429 					    (caddr_t)hibernate_copy_page,
1430 					    PAGE_SIZE);
1431 					inaddr += hibernate_deflate(hib,
1432 					    temp_inaddr, &out_remaining);
1433 				}
1434 
1435 				if (out_remaining == 0) {
1436 					/* Filled up the page */
1437 					nblocks =
1438 					    PAGE_SIZE / DEV_BSIZE;
1439 
1440 					if ((err = hib->io_func(hib->dev,
1441 					    blkctr + hib->image_offset,
1442 					    (vaddr_t)hibernate_io_page,
1443 					    PAGE_SIZE, HIB_W, hib->io_page))) {
1444 						DPRINTF("hib write error %d\n",
1445 						    err);
1446 						return (err);
1447 					}
1448 
1449 					blkctr += nblocks;
1450 				}
1451 			}
1452 		}
1453 
1454 		if (inaddr != range_end) {
1455 			DPRINTF("deflate range ended prematurely\n");
1456 			return (EINVAL);
1457 		}
1458 
1459 		/*
1460 		 * End of range. Round up to next secsize bytes
1461 		 * after finishing compress
1462 		 */
1463 		if (out_remaining == 0)
1464 			out_remaining = PAGE_SIZE;
1465 
1466 		/* Finish compress */
1467 		hibernate_state->hib_stream.next_in = (caddr_t)inaddr;
1468 		hibernate_state->hib_stream.avail_in = 0;
1469 		hibernate_state->hib_stream.next_out =
1470 		    (caddr_t)hibernate_io_page + (PAGE_SIZE - out_remaining);
1471 
1472 		/* We have an extra output page available for finalize */
1473 		hibernate_state->hib_stream.avail_out =
1474 			out_remaining + PAGE_SIZE;
1475 
1476 		if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH)) !=
1477 		    Z_STREAM_END) {
1478 			DPRINTF("deflate error in output stream: %d\n", err);
1479 			return (err);
1480 		}
1481 
1482 		out_remaining = hibernate_state->hib_stream.avail_out;
1483 
1484 		used = 2*PAGE_SIZE - out_remaining;
1485 		nblocks = used / DEV_BSIZE;
1486 
1487 		/* Round up to next block if needed */
1488 		if (used % DEV_BSIZE != 0)
1489 			nblocks ++;
1490 
1491 		/* Write final block(s) for this chunk */
1492 		if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset,
1493 		    (vaddr_t)hibernate_io_page, nblocks*DEV_BSIZE,
1494 		    HIB_W, hib->io_page))) {
1495 			DPRINTF("hib final write error %d\n", err);
1496 			return (err);
1497 		}
1498 
1499 		blkctr += nblocks;
1500 
1501 		chunks[i].compressed_size = (blkctr + hib->image_offset -
1502 		    chunks[i].offset) * DEV_BSIZE;
1503 	}
1504 
1505 	hib->chunktable_offset = hib->image_offset + blkctr;
1506 	return (0);
1507 }
1508 
1509 /*
1510  * Reset the zlib stream state and allocate a new hiballoc area for either
1511  * inflate or deflate. This function is called once for each hibernate chunk.
1512  * Calling hiballoc_init multiple times is acceptable since the memory it is
1513  * provided is unmanaged memory (stolen). We use the memory provided to us
1514  * by the piglet allocated via the supplied hib.
1515  */
1516 int
1517 hibernate_zlib_reset(union hibernate_info *hib, int deflate)
1518 {
1519 	vaddr_t hibernate_zlib_start;
1520 	size_t hibernate_zlib_size;
1521 	char *pva = (char *)hib->piglet_va;
1522 	struct hibernate_zlib_state *hibernate_state;
1523 
1524 	hibernate_state =
1525 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1526 
1527 	if (!deflate)
1528 		pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1529 
1530 	hibernate_zlib_start = (vaddr_t)(pva + (28 * PAGE_SIZE));
1531 	hibernate_zlib_size = 80 * PAGE_SIZE;
1532 
1533 	memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size);
1534 	memset(hibernate_state, 0, PAGE_SIZE);
1535 
1536 	/* Set up stream structure */
1537 	hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc;
1538 	hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free;
1539 
1540 	/* Initialize the hiballoc arena for zlib allocs/frees */
1541 	hiballoc_init(&hibernate_state->hiballoc_arena,
1542 	    (caddr_t)hibernate_zlib_start, hibernate_zlib_size);
1543 
1544 	if (deflate) {
1545 		return deflateInit(&hibernate_state->hib_stream,
1546 		    Z_BEST_SPEED);
1547 	} else
1548 		return inflateInit(&hibernate_state->hib_stream);
1549 }
1550 
1551 /*
1552  * Reads the hibernated memory image from disk, whose location and
1553  * size are recorded in hib. Begin by reading the persisted
1554  * chunk table, which records the original chunk placement location
1555  * and compressed size for each. Next, allocate a pig region of
1556  * sufficient size to hold the compressed image. Next, read the
1557  * chunks into the pig area (calling hibernate_read_chunks to do this),
1558  * and finally, if all of the above succeeds, clear the hibernate signature.
1559  * The function will then return to hibernate_resume, which will proceed
1560  * to unpack the pig image to the correct place in memory.
1561  */
1562 int
1563 hibernate_read_image(union hibernate_info *hib)
1564 {
1565 	size_t compressed_size, disk_size, chunktable_size, pig_sz;
1566 	paddr_t image_start, image_end, pig_start, pig_end;
1567 	struct hibernate_disk_chunk *chunks;
1568 	daddr_t blkctr;
1569 	vaddr_t chunktable = (vaddr_t)NULL;
1570 	paddr_t piglet_chunktable = hib->piglet_pa +
1571 	    HIBERNATE_CHUNK_SIZE;
1572 	int i, status;
1573 
1574 	status = 0;
1575 	pmap_activate(curproc);
1576 
1577 	/* Calculate total chunk table size in disk blocks */
1578 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / DEV_BSIZE;
1579 
1580 	blkctr = hib->chunktable_offset;
1581 
1582 	chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any,
1583 	    &kp_none, &kd_nowait);
1584 
1585 	if (!chunktable)
1586 		return (1);
1587 
1588 	/* Map chunktable pages */
1589 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE)
1590 		pmap_kenter_pa(chunktable + i, piglet_chunktable + i,
1591 		    VM_PROT_ALL);
1592 	pmap_update(pmap_kernel());
1593 
1594 	/* Read the chunktable from disk into the piglet chunktable */
1595 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE;
1596 	    i += MAXPHYS, blkctr += MAXPHYS/DEV_BSIZE)
1597 		hibernate_block_io(hib, blkctr, MAXPHYS,
1598 		    chunktable + i, 0);
1599 
1600 	blkctr = hib->image_offset;
1601 	compressed_size = 0;
1602 
1603 	chunks = (struct hibernate_disk_chunk *)chunktable;
1604 
1605 	for (i = 0; i < hib->chunk_ctr; i++)
1606 		compressed_size += chunks[i].compressed_size;
1607 
1608 	disk_size = compressed_size;
1609 
1610 	printf("unhibernating @ block %lld length %lu bytes\n",
1611 	    hib->sig_offset - chunktable_size,
1612 	    compressed_size);
1613 
1614 	/* Allocate the pig area */
1615 	pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE;
1616 	if (uvm_pmr_alloc_pig(&pig_start, pig_sz) == ENOMEM) {
1617 		status = 1;
1618 		goto unmap;
1619 	}
1620 
1621 	pig_end = pig_start + pig_sz;
1622 
1623 	/* Calculate image extents. Pig image must end on a chunk boundary. */
1624 	image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1);
1625 	image_start = image_end - disk_size;
1626 
1627 	hibernate_read_chunks(hib, image_start, image_end, disk_size,
1628 	    chunks);
1629 
1630 	/* Prepare the resume time pmap/page table */
1631 	hibernate_populate_resume_pt(hib, image_start, image_end);
1632 
1633 unmap:
1634 	/* Unmap chunktable pages */
1635 	pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE);
1636 	pmap_update(pmap_kernel());
1637 
1638 	return (status);
1639 }
1640 
1641 /*
1642  * Read the hibernated memory chunks from disk (chunk information at this
1643  * point is stored in the piglet) into the pig area specified by
1644  * [pig_start .. pig_end]. Order the chunks so that the final chunk is the
1645  * only chunk with overlap possibilities.
1646  */
1647 int
1648 hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start,
1649     paddr_t pig_end, size_t image_compr_size,
1650     struct hibernate_disk_chunk *chunks)
1651 {
1652 	paddr_t img_index, img_cur, r1s, r1e, r2s, r2e;
1653 	paddr_t copy_start, copy_end;
1654 	paddr_t piglet_base = hib->piglet_pa;
1655 	paddr_t piglet_end = piglet_base + HIBERNATE_CHUNK_SIZE;
1656 	daddr_t blkctr;
1657 	size_t processed, compressed_size, read_size;
1658 	int overlap, found, nchunks, nochunks = 0, nfchunks = 0, npchunks = 0;
1659 	int num_io_pages;
1660 	short *ochunks, *pchunks, *fchunks, i, j;
1661 	vaddr_t tempva = (vaddr_t)NULL, hibernate_fchunk_area = (vaddr_t)NULL;
1662 
1663 	global_pig_start = pig_start;
1664 
1665 	pmap_activate(curproc);
1666 
1667 	/*
1668 	 * These mappings go into the resuming kernel's page table, and are
1669 	 * used only during image read. They dissappear from existence
1670 	 * when the suspended kernel is unpacked on top of us.
1671 	 */
1672 	tempva = (vaddr_t)km_alloc(MAXPHYS + PAGE_SIZE, &kv_any, &kp_none,
1673 		&kd_nowait);
1674 	if (!tempva)
1675 		return (1);
1676 	hibernate_fchunk_area = (vaddr_t)km_alloc(24*PAGE_SIZE, &kv_any,
1677 	    &kp_none, &kd_nowait);
1678 	if (!hibernate_fchunk_area)
1679 		return (1);
1680 
1681 	/* Final output chunk ordering VA */
1682 	fchunks = (short *)hibernate_fchunk_area;
1683 
1684 	/* Piglet chunk ordering VA */
1685 	pchunks = (short *)(hibernate_fchunk_area + (8*PAGE_SIZE));
1686 
1687 	/* Final chunk ordering VA */
1688 	ochunks = (short *)(hibernate_fchunk_area + (16*PAGE_SIZE));
1689 
1690 	/* Map the chunk ordering region */
1691 	for(i=0; i<24 ; i++)
1692 		pmap_kenter_pa(hibernate_fchunk_area + (i*PAGE_SIZE),
1693 			piglet_base + ((4+i)*PAGE_SIZE), VM_PROT_ALL);
1694 	pmap_update(pmap_kernel());
1695 
1696 	nchunks = hib->chunk_ctr;
1697 
1698 	/* Initially start all chunks as unplaced */
1699 	for (i = 0; i < nchunks; i++)
1700 		chunks[i].flags = 0;
1701 
1702 	/*
1703 	 * Search the list for chunks that are outside the pig area. These
1704 	 * can be placed first in the final output list.
1705 	 */
1706 	for (i = 0; i < nchunks; i++) {
1707 		if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) {
1708 			ochunks[nochunks] = i;
1709 			fchunks[nfchunks] = i;
1710 			nochunks++;
1711 			nfchunks++;
1712 			chunks[i].flags |= HIBERNATE_CHUNK_USED;
1713 		}
1714 	}
1715 
1716 	/*
1717 	 * Walk the ordering, place the chunks in ascending memory order.
1718 	 * Conflicts might arise, these are handled next.
1719 	 */
1720 	do {
1721 		img_index = -1;
1722 		found = 0;
1723 		j = -1;
1724 		for (i = 0; i < nchunks; i++)
1725 			if (chunks[i].base < img_index &&
1726 			    chunks[i].flags == 0 ) {
1727 				j = i;
1728 				img_index = chunks[i].base;
1729 			}
1730 
1731 		if (j != -1) {
1732 			found = 1;
1733 			ochunks[nochunks] = j;
1734 			nochunks++;
1735 			chunks[j].flags |= HIBERNATE_CHUNK_PLACED;
1736 		}
1737 	} while (found);
1738 
1739 	img_index = pig_start;
1740 
1741 	/*
1742 	 * Identify chunk output conflicts (chunks whose pig load area
1743 	 * corresponds to their original memory placement location)
1744 	 */
1745 	for (i = 0; i < nochunks ; i++) {
1746 		overlap = 0;
1747 		r1s = img_index;
1748 		r1e = img_index + chunks[ochunks[i]].compressed_size;
1749 		r2s = chunks[ochunks[i]].base;
1750 		r2e = chunks[ochunks[i]].end;
1751 
1752 		overlap = hibernate_check_overlap(r1s, r1e, r2s, r2e);
1753 		if (overlap)
1754 			chunks[ochunks[i]].flags |= HIBERNATE_CHUNK_CONFLICT;
1755 		img_index += chunks[ochunks[i]].compressed_size;
1756 	}
1757 
1758 	/*
1759 	 * Prepare the final output chunk list. Calculate an output
1760 	 * inflate strategy for overlapping chunks if needed.
1761 	 */
1762 	for (i = 0; i < nochunks ; i++) {
1763 		/*
1764 		 * If a conflict is detected, consume enough compressed
1765 		 * output chunks to fill the piglet
1766 		 */
1767 		if (chunks[ochunks[i]].flags & HIBERNATE_CHUNK_CONFLICT) {
1768 			copy_start = piglet_base;
1769 			copy_end = piglet_end;
1770 			npchunks = 0;
1771 			j = i;
1772 
1773 			while (copy_start < copy_end && j < nochunks) {
1774 				pchunks[npchunks] = ochunks[j];
1775 				npchunks++;
1776 				copy_start +=
1777 				    chunks[ochunks[j]].compressed_size;
1778 				i++;
1779 				j++;
1780 			}
1781 
1782 			for (j = 0; j < npchunks; j++) {
1783 				fchunks[nfchunks] = pchunks[j];
1784 				chunks[pchunks[j]].flags |=
1785 				    HIBERNATE_CHUNK_USED;
1786 				nfchunks++;
1787 			}
1788 		} else {
1789 			/*
1790 			 * No conflict, chunk can be added without copying
1791 			 */
1792 			if ((chunks[ochunks[i]].flags &
1793 			    HIBERNATE_CHUNK_USED) == 0) {
1794 				fchunks[nfchunks] = ochunks[i];
1795 				chunks[ochunks[i]].flags |=
1796 				    HIBERNATE_CHUNK_USED;
1797 				nfchunks++;
1798 			}
1799 		}
1800 	}
1801 
1802 	img_cur = pig_start;
1803 
1804 	for (i = 0; i < nfchunks; i++) {
1805 		blkctr = chunks[fchunks[i]].offset;
1806 		processed = 0;
1807 		compressed_size = chunks[fchunks[i]].compressed_size;
1808 
1809 		while (processed < compressed_size) {
1810 			if (compressed_size - processed >= MAXPHYS)
1811 				read_size = MAXPHYS;
1812 			else
1813 				read_size = compressed_size - processed;
1814 
1815 			/*
1816 			 * We're reading read_size bytes, offset from the
1817 			 * start of a page by img_cur % PAGE_SIZE, so the
1818 			 * end will be read_size + (img_cur % PAGE_SIZE)
1819 			 * from the start of the first page.  Round that
1820 			 * up to the next page size.
1821 			 */
1822 			num_io_pages = (read_size + (img_cur % PAGE_SIZE)
1823 				+ PAGE_SIZE - 1) / PAGE_SIZE;
1824 
1825 			KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1);
1826 
1827 			/* Map pages for this read */
1828 			for (j = 0; j < num_io_pages; j ++)
1829 				pmap_kenter_pa(tempva + j * PAGE_SIZE,
1830 					img_cur + j * PAGE_SIZE, VM_PROT_ALL);
1831 
1832 			pmap_update(pmap_kernel());
1833 
1834 			hibernate_block_io(hib, blkctr, read_size,
1835 			    tempva + (img_cur & PAGE_MASK), 0);
1836 
1837 			blkctr += (read_size / DEV_BSIZE);
1838 
1839 			pmap_kremove(tempva, num_io_pages * PAGE_SIZE);
1840 			pmap_update(pmap_kernel());
1841 
1842 			processed += read_size;
1843 			img_cur += read_size;
1844 		}
1845 	}
1846 
1847 	pmap_kremove(hibernate_fchunk_area, PAGE_SIZE);
1848 	pmap_kremove((vaddr_t)pchunks, PAGE_SIZE);
1849 	pmap_kremove((vaddr_t)fchunks, PAGE_SIZE);
1850 	pmap_update(pmap_kernel());
1851 
1852 	return (0);
1853 }
1854 
1855 /*
1856  * Hibernating a machine comprises the following operations:
1857  *  1. Calculating this machine's hibernate_info information
1858  *  2. Allocating a piglet and saving the piglet's physaddr
1859  *  3. Calculating the memory chunks
1860  *  4. Writing the compressed chunks to disk
1861  *  5. Writing the chunk table
1862  *  6. Writing the signature block (hibernate_info)
1863  *
1864  * On most architectures, the function calling hibernate_suspend would
1865  * then power off the machine using some MD-specific implementation.
1866  */
1867 int
1868 hibernate_suspend(void)
1869 {
1870 	union hibernate_info hib;
1871 	u_long start, end;
1872 
1873 	/*
1874 	 * Calculate memory ranges, swap offsets, etc.
1875 	 * This also allocates a piglet whose physaddr is stored in
1876 	 * hib->piglet_pa and vaddr stored in hib->piglet_va
1877 	 */
1878 	if (get_hibernate_info(&hib, 1)) {
1879 		DPRINTF("failed to obtain hibernate info\n");
1880 		return (1);
1881 	}
1882 
1883 	/* Find a page-addressed region in swap [start,end] */
1884 	if (uvm_hibswap(hib.dev, &start, &end)) {
1885 		printf("cannot find any swap\n");
1886 		return (1);
1887 	}
1888 
1889 	if (end - start < 1000) {
1890 		printf("%lu\n is too small", end - start);
1891 		return (1);
1892 	}
1893 
1894 	/* Calculate block offsets in swap */
1895 	hib.image_offset = ctod(start);
1896 
1897 	/* XXX side effect */
1898 	DPRINTF("hibernate @ block %lld max-length %lu blocks\n",
1899 	    hib.image_offset, ctod(end) - ctod(start));
1900 
1901 	pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1902 		VM_PROT_ALL);
1903 	pmap_activate(curproc);
1904 
1905 	/* Stash the piglet VA so we can free it in the resuming kernel */
1906 	global_piglet_va = hib.piglet_va;
1907 
1908 	DPRINTF("hibernate: writing chunks\n");
1909 	if (hibernate_write_chunks(&hib)) {
1910 		DPRINTF("hibernate_write_chunks failed\n");
1911 		return (1);
1912 	}
1913 
1914 	DPRINTF("hibernate: writing chunktable\n");
1915 	if (hibernate_write_chunktable(&hib)) {
1916 		DPRINTF("hibernate_write_chunktable failed\n");
1917 		return (1);
1918 	}
1919 
1920 	DPRINTF("hibernate: writing signature\n");
1921 	if (hibernate_write_signature(&hib)) {
1922 		DPRINTF("hibernate_write_signature failed\n");
1923 		return (1);
1924 	}
1925 
1926 	/* Allow the disk to settle */
1927 	delay(500000);
1928 
1929 	/*
1930 	 * Give the device-specific I/O function a notification that we're
1931 	 * done, and that it can clean up or shutdown as needed.
1932 	 */
1933 	hib.io_func(hib.dev, 0, (vaddr_t)NULL, 0, HIB_DONE, hib.io_page);
1934 
1935 	return (0);
1936 }
1937 
1938 /*
1939  * Free items allocated by hibernate_suspend()
1940  */
1941 void
1942 hibernate_free(void)
1943 {
1944 	if (global_piglet_va)
1945 		uvm_pmr_free_piglet(global_piglet_va,
1946 		    3*HIBERNATE_CHUNK_SIZE);
1947 
1948 	if (hibernate_copy_page)
1949 		pmap_kremove(hibernate_copy_page, PAGE_SIZE);
1950 	if (hibernate_temp_page)
1951 		pmap_kremove(hibernate_temp_page, PAGE_SIZE);
1952 
1953 	pmap_update(pmap_kernel());
1954 
1955 	if (hibernate_copy_page)
1956 		km_free((void *)hibernate_copy_page, PAGE_SIZE,
1957 		    &kv_any, &kp_none);
1958 	if (hibernate_temp_page)
1959 		km_free((void *)hibernate_temp_page, PAGE_SIZE,
1960 		    &kv_any, &kp_none);
1961 
1962 	global_piglet_va = 0;
1963 	hibernate_copy_page = 0;
1964 	hibernate_temp_page = 0;
1965 }
1966