xref: /openbsd-src/sys/kern/subr_hibernate.c (revision bd831450ecfa352a2566676139bdad6cd231d422)
1 /*	$OpenBSD: subr_hibernate.c,v 1.93 2014/07/09 12:43:51 mlarkin Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/hibernate.h>
21 #include <sys/malloc.h>
22 #include <sys/param.h>
23 #include <sys/tree.h>
24 #include <sys/systm.h>
25 #include <sys/disklabel.h>
26 #include <sys/disk.h>
27 #include <sys/conf.h>
28 #include <sys/buf.h>
29 #include <sys/fcntl.h>
30 #include <sys/stat.h>
31 #include <uvm/uvm.h>
32 #include <uvm/uvm_swap.h>
33 #include <machine/hibernate.h>
34 
35 /*
36  * Hibernate piglet layout information
37  *
38  * The piglet is a scratch area of memory allocated by the suspending kernel.
39  * Its phys and virt addrs are recorded in the signature block. The piglet is
40  * used to guarantee an unused area of memory that can be used by the resuming
41  * kernel for various things. The piglet is excluded during unpack operations.
42  * The piglet size is presently 3*HIBERNATE_CHUNK_SIZE (typically 3*4MB).
43  *
44  * Offset from piglet_base	Purpose
45  * ----------------------------------------------------------------------------
46  * 0				I/O page used during resume
47  * 1*PAGE_SIZE		 	I/O page used during hibernate suspend
48  * 2*PAGE_SIZE		 	I/O page used during hibernate suspend
49  * 3*PAGE_SIZE			copy page used during hibernate suspend
50  * 4*PAGE_SIZE			final chunk ordering list (8 pages)
51  * 12*PAGE_SIZE			piglet chunk ordering list (8 pages)
52  * 20*PAGE_SIZE			temp chunk ordering list (8 pages)
53  * 28*PAGE_SIZE			start of hiballoc area
54  * 108*PAGE_SIZE		end of hiballoc area (80 pages)
55  * ...				unused
56  * HIBERNATE_CHUNK_SIZE		start of hibernate chunk table
57  * 2*HIBERNATE_CHUNK_SIZE	bounce area for chunks being unpacked
58  * 3*HIBERNATE_CHUNK_SIZE	end of piglet
59  */
60 
61 /* Temporary vaddr ranges used during hibernate */
62 vaddr_t hibernate_temp_page;
63 vaddr_t hibernate_copy_page;
64 
65 /* Hibernate info as read from disk during resume */
66 union hibernate_info disk_hib;
67 paddr_t global_pig_start;
68 vaddr_t global_piglet_va;
69 
70 /* #define HIB_DEBUG */
71 #ifdef HIB_DEBUG
72 int	hib_debug = 99;
73 #define DPRINTF(x...)     do { if (hib_debug) printf(x); } while (0)
74 #define DNPRINTF(n,x...)  do { if (hib_debug > (n)) printf(x); } while (0)
75 #else
76 #define DPRINTF(x...)
77 #define DNPRINTF(n,x...)
78 #endif
79 
80 #ifndef NO_PROPOLICE
81 extern long __guard_local;
82 #endif /* ! NO_PROPOLICE */
83 
84 void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t);
85 
86 /*
87  * Hib alloc enforced alignment.
88  */
89 #define HIB_ALIGN		8 /* bytes alignment */
90 
91 /*
92  * sizeof builtin operation, but with alignment constraint.
93  */
94 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
95 
96 struct hiballoc_entry {
97 	size_t			hibe_use;
98 	size_t			hibe_space;
99 	RB_ENTRY(hiballoc_entry) hibe_entry;
100 };
101 
102 /*
103  * Compare hiballoc entries based on the address they manage.
104  *
105  * Since the address is fixed, relative to struct hiballoc_entry,
106  * we just compare the hiballoc_entry pointers.
107  */
108 static __inline int
109 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r)
110 {
111 	return l < r ? -1 : (l > r);
112 }
113 
114 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
115 
116 /*
117  * Given a hiballoc entry, return the address it manages.
118  */
119 static __inline void *
120 hib_entry_to_addr(struct hiballoc_entry *entry)
121 {
122 	caddr_t addr;
123 
124 	addr = (caddr_t)entry;
125 	addr += HIB_SIZEOF(struct hiballoc_entry);
126 	return addr;
127 }
128 
129 /*
130  * Given an address, find the hiballoc that corresponds.
131  */
132 static __inline struct hiballoc_entry*
133 hib_addr_to_entry(void *addr_param)
134 {
135 	caddr_t addr;
136 
137 	addr = (caddr_t)addr_param;
138 	addr -= HIB_SIZEOF(struct hiballoc_entry);
139 	return (struct hiballoc_entry*)addr;
140 }
141 
142 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
143 
144 /*
145  * Allocate memory from the arena.
146  *
147  * Returns NULL if no memory is available.
148  */
149 void *
150 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
151 {
152 	struct hiballoc_entry *entry, *new_entry;
153 	size_t find_sz;
154 
155 	/*
156 	 * Enforce alignment of HIB_ALIGN bytes.
157 	 *
158 	 * Note that, because the entry is put in front of the allocation,
159 	 * 0-byte allocations are guaranteed a unique address.
160 	 */
161 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
162 
163 	/*
164 	 * Find an entry with hibe_space >= find_sz.
165 	 *
166 	 * If the root node is not large enough, we switch to tree traversal.
167 	 * Because all entries are made at the bottom of the free space,
168 	 * traversal from the end has a slightly better chance of yielding
169 	 * a sufficiently large space.
170 	 */
171 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
172 	entry = RB_ROOT(&arena->hib_addrs);
173 	if (entry != NULL && entry->hibe_space < find_sz) {
174 		RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
175 			if (entry->hibe_space >= find_sz)
176 				break;
177 		}
178 	}
179 
180 	/*
181 	 * Insufficient or too fragmented memory.
182 	 */
183 	if (entry == NULL)
184 		return NULL;
185 
186 	/*
187 	 * Create new entry in allocated space.
188 	 */
189 	new_entry = (struct hiballoc_entry*)(
190 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
191 	new_entry->hibe_space = entry->hibe_space - find_sz;
192 	new_entry->hibe_use = alloc_sz;
193 
194 	/*
195 	 * Insert entry.
196 	 */
197 	if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
198 		panic("hib_alloc: insert failure");
199 	entry->hibe_space = 0;
200 
201 	/* Return address managed by entry. */
202 	return hib_entry_to_addr(new_entry);
203 }
204 
205 /*
206  * Free a pointer previously allocated from this arena.
207  *
208  * If addr is NULL, this will be silently accepted.
209  */
210 void
211 hib_free(struct hiballoc_arena *arena, void *addr)
212 {
213 	struct hiballoc_entry *entry, *prev;
214 
215 	if (addr == NULL)
216 		return;
217 
218 	/*
219 	 * Derive entry from addr and check it is really in this arena.
220 	 */
221 	entry = hib_addr_to_entry(addr);
222 	if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
223 		panic("hib_free: freed item %p not in hib arena", addr);
224 
225 	/*
226 	 * Give the space in entry to its predecessor.
227 	 *
228 	 * If entry has no predecessor, change its used space into free space
229 	 * instead.
230 	 */
231 	prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry);
232 	if (prev != NULL &&
233 	    (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
234 	    prev->hibe_use + prev->hibe_space) == entry) {
235 		/* Merge entry. */
236 		RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
237 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
238 		    entry->hibe_use + entry->hibe_space;
239 	} else {
240 		/* Flip used memory to free space. */
241 		entry->hibe_space += entry->hibe_use;
242 		entry->hibe_use = 0;
243 	}
244 }
245 
246 /*
247  * Initialize hiballoc.
248  *
249  * The allocator will manage memmory at ptr, which is len bytes.
250  */
251 int
252 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
253 {
254 	struct hiballoc_entry *entry;
255 	caddr_t ptr;
256 	size_t len;
257 
258 	RB_INIT(&arena->hib_addrs);
259 
260 	/*
261 	 * Hib allocator enforces HIB_ALIGN alignment.
262 	 * Fixup ptr and len.
263 	 */
264 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
265 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
266 	len &= ~((size_t)HIB_ALIGN - 1);
267 
268 	/*
269 	 * Insufficient memory to be able to allocate and also do bookkeeping.
270 	 */
271 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
272 		return ENOMEM;
273 
274 	/*
275 	 * Create entry describing space.
276 	 */
277 	entry = (struct hiballoc_entry*)ptr;
278 	entry->hibe_use = 0;
279 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
280 	RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
281 
282 	return 0;
283 }
284 
285 /*
286  * Zero all free memory.
287  */
288 void
289 uvm_pmr_zero_everything(void)
290 {
291 	struct uvm_pmemrange	*pmr;
292 	struct vm_page		*pg;
293 	int			 i;
294 
295 	uvm_lock_fpageq();
296 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
297 		/* Zero single pages. */
298 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
299 		    != NULL) {
300 			uvm_pmr_remove(pmr, pg);
301 			uvm_pagezero(pg);
302 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
303 			uvmexp.zeropages++;
304 			uvm_pmr_insert(pmr, pg, 0);
305 		}
306 
307 		/* Zero multi page ranges. */
308 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY]))
309 		    != NULL) {
310 			pg--; /* Size tree always has second page. */
311 			uvm_pmr_remove(pmr, pg);
312 			for (i = 0; i < pg->fpgsz; i++) {
313 				uvm_pagezero(&pg[i]);
314 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
315 				uvmexp.zeropages++;
316 			}
317 			uvm_pmr_insert(pmr, pg, 0);
318 		}
319 	}
320 	uvm_unlock_fpageq();
321 }
322 
323 /*
324  * Mark all memory as dirty.
325  *
326  * Used to inform the system that the clean memory isn't clean for some
327  * reason, for example because we just came back from hibernate.
328  */
329 void
330 uvm_pmr_dirty_everything(void)
331 {
332 	struct uvm_pmemrange	*pmr;
333 	struct vm_page		*pg;
334 	int			 i;
335 
336 	uvm_lock_fpageq();
337 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
338 		/* Dirty single pages. */
339 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
340 		    != NULL) {
341 			uvm_pmr_remove(pmr, pg);
342 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
343 			uvm_pmr_insert(pmr, pg, 0);
344 		}
345 
346 		/* Dirty multi page ranges. */
347 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_ZERO]))
348 		    != NULL) {
349 			pg--; /* Size tree always has second page. */
350 			uvm_pmr_remove(pmr, pg);
351 			for (i = 0; i < pg->fpgsz; i++)
352 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
353 			uvm_pmr_insert(pmr, pg, 0);
354 		}
355 	}
356 
357 	uvmexp.zeropages = 0;
358 	uvm_unlock_fpageq();
359 }
360 
361 /*
362  * Allocate the highest address that can hold sz.
363  *
364  * sz in bytes.
365  */
366 int
367 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz)
368 {
369 	struct uvm_pmemrange	*pmr;
370 	struct vm_page		*pig_pg, *pg;
371 
372 	/*
373 	 * Convert sz to pages, since that is what pmemrange uses internally.
374 	 */
375 	sz = atop(round_page(sz));
376 
377 	uvm_lock_fpageq();
378 
379 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
380 		RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) {
381 			if (pig_pg->fpgsz >= sz) {
382 				goto found;
383 			}
384 		}
385 	}
386 
387 	/*
388 	 * Allocation failure.
389 	 */
390 	uvm_unlock_fpageq();
391 	return ENOMEM;
392 
393 found:
394 	/* Remove page from freelist. */
395 	uvm_pmr_remove_size(pmr, pig_pg);
396 	pig_pg->fpgsz -= sz;
397 	pg = pig_pg + pig_pg->fpgsz;
398 	if (pig_pg->fpgsz == 0)
399 		uvm_pmr_remove_addr(pmr, pig_pg);
400 	else
401 		uvm_pmr_insert_size(pmr, pig_pg);
402 
403 	uvmexp.free -= sz;
404 	*addr = VM_PAGE_TO_PHYS(pg);
405 
406 	/*
407 	 * Update pg flags.
408 	 *
409 	 * Note that we trash the sz argument now.
410 	 */
411 	while (sz > 0) {
412 		KASSERT(pg->pg_flags & PQ_FREE);
413 
414 		atomic_clearbits_int(&pg->pg_flags, PG_PMAPMASK);
415 
416 		if (pg->pg_flags & PG_ZERO)
417 			uvmexp.zeropages -= sz;
418 		atomic_clearbits_int(&pg->pg_flags,
419 		    PG_ZERO|PQ_FREE);
420 
421 		pg->uobject = NULL;
422 		pg->uanon = NULL;
423 		pg->pg_version++;
424 
425 		/*
426 		 * Next.
427 		 */
428 		pg++;
429 		sz--;
430 	}
431 
432 	/* Return. */
433 	uvm_unlock_fpageq();
434 	return 0;
435 }
436 
437 /*
438  * Allocate a piglet area.
439  *
440  * This is as low as possible.
441  * Piglets are aligned.
442  *
443  * sz and align in bytes.
444  *
445  * The call will sleep for the pagedaemon to attempt to free memory.
446  * The pagedaemon may decide its not possible to free enough memory, causing
447  * the allocation to fail.
448  */
449 int
450 uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align)
451 {
452 	paddr_t			 pg_addr, piglet_addr;
453 	struct uvm_pmemrange	*pmr;
454 	struct vm_page		*pig_pg, *pg;
455 	struct pglist		 pageq;
456 	int			 pdaemon_woken;
457 	vaddr_t			 piglet_va;
458 
459 	/* Ensure align is a power of 2 */
460 	KASSERT((align & (align - 1)) == 0);
461 
462 	pdaemon_woken = 0; /* Didn't wake the pagedaemon. */
463 
464 	/*
465 	 * Fixup arguments: align must be at least PAGE_SIZE,
466 	 * sz will be converted to pagecount, since that is what
467 	 * pmemrange uses internally.
468 	 */
469 	if (align < PAGE_SIZE)
470 		align = PAGE_SIZE;
471 	sz = round_page(sz);
472 
473 	uvm_lock_fpageq();
474 
475 	TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use,
476 	    pmr_use) {
477 retry:
478 		/*
479 		 * Search for a range with enough space.
480 		 * Use the address tree, to ensure the range is as low as
481 		 * possible.
482 		 */
483 		RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) {
484 			pg_addr = VM_PAGE_TO_PHYS(pig_pg);
485 			piglet_addr = (pg_addr + (align - 1)) & ~(align - 1);
486 
487 			if (atop(pg_addr) + pig_pg->fpgsz >=
488 			    atop(piglet_addr) + atop(sz))
489 				goto found;
490 		}
491 	}
492 
493 	/*
494 	 * Try to coerce the pagedaemon into freeing memory
495 	 * for the piglet.
496 	 *
497 	 * pdaemon_woken is set to prevent the code from
498 	 * falling into an endless loop.
499 	 */
500 	if (!pdaemon_woken) {
501 		pdaemon_woken = 1;
502 		if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1,
503 		    sz, UVM_PLA_FAILOK) == 0)
504 			goto retry;
505 	}
506 
507 	/* Return failure. */
508 	uvm_unlock_fpageq();
509 	return ENOMEM;
510 
511 found:
512 	/*
513 	 * Extract piglet from pigpen.
514 	 */
515 	TAILQ_INIT(&pageq);
516 	uvm_pmr_extract_range(pmr, pig_pg,
517 	    atop(piglet_addr), atop(piglet_addr) + atop(sz), &pageq);
518 
519 	*pa = piglet_addr;
520 	uvmexp.free -= atop(sz);
521 
522 	/*
523 	 * Update pg flags.
524 	 *
525 	 * Note that we trash the sz argument now.
526 	 */
527 	TAILQ_FOREACH(pg, &pageq, pageq) {
528 		KASSERT(pg->pg_flags & PQ_FREE);
529 
530 		atomic_clearbits_int(&pg->pg_flags, PG_PMAPMASK);
531 
532 		if (pg->pg_flags & PG_ZERO)
533 			uvmexp.zeropages--;
534 		atomic_clearbits_int(&pg->pg_flags,
535 		    PG_ZERO|PQ_FREE);
536 
537 		pg->uobject = NULL;
538 		pg->uanon = NULL;
539 		pg->pg_version++;
540 	}
541 
542 	uvm_unlock_fpageq();
543 
544 	/*
545 	 * Now allocate a va.
546 	 * Use direct mappings for the pages.
547 	 */
548 
549 	piglet_va = *va = (vaddr_t)km_alloc(sz, &kv_any, &kp_none, &kd_waitok);
550 	if (!piglet_va) {
551 		uvm_pglistfree(&pageq);
552 		return ENOMEM;
553 	}
554 
555 	/*
556 	 * Map piglet to va.
557 	 */
558 	TAILQ_FOREACH(pg, &pageq, pageq) {
559 		pmap_kenter_pa(piglet_va, VM_PAGE_TO_PHYS(pg), UVM_PROT_RW);
560 		piglet_va += PAGE_SIZE;
561 	}
562 	pmap_update(pmap_kernel());
563 
564 	return 0;
565 }
566 
567 /*
568  * Free a piglet area.
569  */
570 void
571 uvm_pmr_free_piglet(vaddr_t va, vsize_t sz)
572 {
573 	paddr_t			 pa;
574 	struct vm_page		*pg;
575 
576 	/*
577 	 * Fix parameters.
578 	 */
579 	sz = round_page(sz);
580 
581 	/*
582 	 * Find the first page in piglet.
583 	 * Since piglets are contiguous, the first pg is all we need.
584 	 */
585 	if (!pmap_extract(pmap_kernel(), va, &pa))
586 		panic("uvm_pmr_free_piglet: piglet 0x%lx has no pages", va);
587 	pg = PHYS_TO_VM_PAGE(pa);
588 	if (pg == NULL)
589 		panic("uvm_pmr_free_piglet: unmanaged page 0x%lx", pa);
590 
591 	/*
592 	 * Unmap.
593 	 */
594 	pmap_kremove(va, sz);
595 	pmap_update(pmap_kernel());
596 
597 	/*
598 	 * Free the physical and virtual memory.
599 	 */
600 	uvm_pmr_freepages(pg, atop(sz));
601 	km_free((void *)va, sz, &kv_any, &kp_none);
602 }
603 
604 /*
605  * Physmem RLE compression support.
606  *
607  * Given a physical page address, return the number of pages starting at the
608  * address that are free.  Clamps to the number of pages in
609  * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free.
610  */
611 int
612 uvm_page_rle(paddr_t addr)
613 {
614 	struct vm_page		*pg, *pg_end;
615 	struct vm_physseg	*vmp;
616 	int			 pseg_idx, off_idx;
617 
618 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
619 	if (pseg_idx == -1)
620 		return 0;
621 
622 	vmp = &vm_physmem[pseg_idx];
623 	pg = &vmp->pgs[off_idx];
624 	if (!(pg->pg_flags & PQ_FREE))
625 		return 0;
626 
627 	/*
628 	 * Search for the first non-free page after pg.
629 	 * Note that the page may not be the first page in a free pmemrange,
630 	 * therefore pg->fpgsz cannot be used.
631 	 */
632 	for (pg_end = pg; pg_end <= vmp->lastpg &&
633 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++)
634 		;
635 	return min((pg_end - pg), HIBERNATE_CHUNK_SIZE/PAGE_SIZE);
636 }
637 
638 /*
639  * Fills out the hibernate_info union pointed to by hiber_info
640  * with information about this machine (swap signature block
641  * offsets, number of memory ranges, kernel in use, etc)
642  */
643 int
644 get_hibernate_info(union hibernate_info *hib, int suspend)
645 {
646 	int chunktable_size;
647 	struct disklabel dl;
648 	char err_string[128], *dl_ret;
649 
650 #ifndef NO_PROPOLICE
651 	/* Save propolice guard */
652 	hib->guard = __guard_local;
653 #endif /* ! NO_PROPOLICE */
654 
655 	/* Determine I/O function to use */
656 	hib->io_func = get_hibernate_io_function();
657 	if (hib->io_func == NULL)
658 		return (1);
659 
660 	/* Calculate hibernate device */
661 	hib->dev = swdevt[0].sw_dev;
662 
663 	/* Read disklabel (used to calculate signature and image offsets) */
664 	dl_ret = disk_readlabel(&dl, hib->dev, err_string, 128);
665 
666 	if (dl_ret) {
667 		printf("Hibernate error reading disklabel: %s\n", dl_ret);
668 		return (1);
669 	}
670 
671 	/* Make sure we have a swap partition. */
672 	if (dl.d_partitions[1].p_fstype != FS_SWAP ||
673 	    DL_GETPSIZE(&dl.d_partitions[1]) == 0)
674 		return (1);
675 
676 	/* Make sure the signature can fit in one block */
677 	if (sizeof(union hibernate_info) > DEV_BSIZE)
678 		return (1);
679 
680 	/* Magic number */
681 	hib->magic = HIBERNATE_MAGIC;
682 
683 	/* Calculate signature block location */
684 	hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[1]) -
685 	    sizeof(union hibernate_info)/DEV_BSIZE;
686 
687 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / DEV_BSIZE;
688 
689 	/* Stash kernel version information */
690 	memset(&hib->kernel_version, 0, 128);
691 	bcopy(version, &hib->kernel_version,
692 	    min(strlen(version), sizeof(hib->kernel_version)-1));
693 
694 	if (suspend) {
695 		/* Allocate piglet region */
696 		if (uvm_pmr_alloc_piglet(&hib->piglet_va,
697 		    &hib->piglet_pa, HIBERNATE_CHUNK_SIZE*3,
698 		    HIBERNATE_CHUNK_SIZE)) {
699 			printf("Hibernate failed to allocate the piglet\n");
700 			return (1);
701 		}
702 		hib->io_page = (void *)hib->piglet_va;
703 
704 		/*
705 		 * Initialization of the hibernate IO function for drivers
706 		 * that need to do prep work (such as allocating memory or
707 		 * setting up data structures that cannot safely be done
708 		 * during suspend without causing side effects). There is
709 		 * a matching HIB_DONE call performed after the write is
710 		 * completed.
711 		 */
712 		if (hib->io_func(hib->dev, DL_GETPOFFSET(&dl.d_partitions[1]),
713 		    (vaddr_t)NULL, DL_GETPSIZE(&dl.d_partitions[1]),
714 		    HIB_INIT, hib->io_page))
715 			goto fail;
716 
717 	} else {
718 		/*
719 		 * Resuming kernels use a regular I/O page since we won't
720 		 * have access to the suspended kernel's piglet VA at this
721 		 * point. No need to free this I/O page as it will vanish
722 		 * as part of the resume.
723 		 */
724 		hib->io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
725 		if (!hib->io_page)
726 			return (1);
727 	}
728 
729 
730 	if (get_hibernate_info_md(hib))
731 		goto fail;
732 
733 
734 	return (0);
735 fail:
736 	if (suspend)
737 		uvm_pmr_free_piglet(hib->piglet_va,
738 		    HIBERNATE_CHUNK_SIZE * 3);
739 
740 	return (1);
741 }
742 
743 /*
744  * Allocate nitems*size bytes from the hiballoc area presently in use
745  */
746 void *
747 hibernate_zlib_alloc(void *unused, int nitems, int size)
748 {
749 	struct hibernate_zlib_state *hibernate_state;
750 
751 	hibernate_state =
752 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
753 
754 	return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size);
755 }
756 
757 /*
758  * Free the memory pointed to by addr in the hiballoc area presently in
759  * use
760  */
761 void
762 hibernate_zlib_free(void *unused, void *addr)
763 {
764 	struct hibernate_zlib_state *hibernate_state;
765 
766 	hibernate_state =
767 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
768 
769 	hib_free(&hibernate_state->hiballoc_arena, addr);
770 }
771 
772 /*
773  * Inflate next page of data from the image stream
774  */
775 int
776 hibernate_inflate_page(void)
777 {
778 	struct hibernate_zlib_state *hibernate_state;
779 	int i;
780 
781 	hibernate_state =
782 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
783 
784 	/* Set up the stream for inflate */
785 	hibernate_state->hib_stream.next_out = (char *)HIBERNATE_INFLATE_PAGE;
786 	hibernate_state->hib_stream.avail_out = PAGE_SIZE;
787 
788 	/* Process next block of data */
789 	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
790 	if (i != Z_OK && i != Z_STREAM_END) {
791 		/*
792 		 * XXX - this will likely reboot/hang most machines
793 		 *       since the console output buffer will be unmapped,
794 		 *       but there's not much else we can do here.
795 		 */
796 		panic("inflate error");
797 	}
798 
799 	/* We should always have extracted a full page ... */
800 	if (hibernate_state->hib_stream.avail_out != 0) {
801 		/*
802 		 * XXX - this will likely reboot/hang most machines
803 		 *       since the console output buffer will be unmapped,
804 		 *       but there's not much else we can do here.
805 		 */
806 		panic("incomplete page");
807 	}
808 
809 	return (i == Z_STREAM_END);
810 }
811 
812 /*
813  * Inflate size bytes from src into dest, skipping any pages in
814  * [src..dest] that are special (see hibernate_inflate_skip)
815  *
816  * This function executes while using the resume-time stack
817  * and pmap, and therefore cannot use ddb/printf/etc. Doing so
818  * will likely hang or reset the machine since the console output buffer
819  * will be unmapped.
820  */
821 void
822 hibernate_inflate_region(union hibernate_info *hib, paddr_t dest,
823     paddr_t src, size_t size)
824 {
825 	int end_stream = 0 ;
826 	struct hibernate_zlib_state *hibernate_state;
827 
828 	hibernate_state =
829 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
830 
831 	hibernate_state->hib_stream.next_in = (char *)src;
832 	hibernate_state->hib_stream.avail_in = size;
833 
834 	do {
835 		/*
836 		 * Is this a special page? If yes, redirect the
837 		 * inflate output to a scratch page (eg, discard it)
838 		 */
839 		if (hibernate_inflate_skip(hib, dest)) {
840 			hibernate_enter_resume_mapping(
841 			    HIBERNATE_INFLATE_PAGE,
842 			    HIBERNATE_INFLATE_PAGE, 0);
843 		} else {
844 			hibernate_enter_resume_mapping(
845 			    HIBERNATE_INFLATE_PAGE, dest, 0);
846 		}
847 
848 		hibernate_flush();
849 		end_stream = hibernate_inflate_page();
850 
851 		dest += PAGE_SIZE;
852 	} while (!end_stream);
853 }
854 
855 /*
856  * deflate from src into the I/O page, up to 'remaining' bytes
857  *
858  * Returns number of input bytes consumed, and may reset
859  * the 'remaining' parameter if not all the output space was consumed
860  * (this information is needed to know how much to write to disk
861  */
862 size_t
863 hibernate_deflate(union hibernate_info *hib, paddr_t src,
864     size_t *remaining)
865 {
866 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
867 	struct hibernate_zlib_state *hibernate_state;
868 
869 	hibernate_state =
870 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
871 
872 	/* Set up the stream for deflate */
873 	hibernate_state->hib_stream.next_in = (caddr_t)src;
874 	hibernate_state->hib_stream.avail_in = PAGE_SIZE - (src & PAGE_MASK);
875 	hibernate_state->hib_stream.next_out = (caddr_t)hibernate_io_page +
876 	    (PAGE_SIZE - *remaining);
877 	hibernate_state->hib_stream.avail_out = *remaining;
878 
879 	/* Process next block of data */
880 	if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH) != Z_OK)
881 		panic("hibernate zlib deflate error");
882 
883 	/* Update pointers and return number of bytes consumed */
884 	*remaining = hibernate_state->hib_stream.avail_out;
885 	return (PAGE_SIZE - (src & PAGE_MASK)) -
886 	    hibernate_state->hib_stream.avail_in;
887 }
888 
889 /*
890  * Write the hibernation information specified in hiber_info
891  * to the location in swap previously calculated (last block of
892  * swap), called the "signature block".
893  */
894 int
895 hibernate_write_signature(union hibernate_info *hib)
896 {
897 	/* Write hibernate info to disk */
898 	return (hib->io_func(hib->dev, hib->sig_offset,
899 	    (vaddr_t)hib, DEV_BSIZE, HIB_W,
900 	    hib->io_page));
901 }
902 
903 /*
904  * Write the memory chunk table to the area in swap immediately
905  * preceding the signature block. The chunk table is stored
906  * in the piglet when this function is called.  Returns errno.
907  */
908 int
909 hibernate_write_chunktable(union hibernate_info *hib)
910 {
911 	struct hibernate_disk_chunk *chunks;
912 	vaddr_t hibernate_chunk_table_start;
913 	size_t hibernate_chunk_table_size;
914 	int i, err;
915 
916 	hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE;
917 
918 	hibernate_chunk_table_start = hib->piglet_va +
919 	    HIBERNATE_CHUNK_SIZE;
920 
921 	chunks = (struct hibernate_disk_chunk *)(hib->piglet_va +
922 	    HIBERNATE_CHUNK_SIZE);
923 
924 	/* Write chunk table */
925 	for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS) {
926 		if ((err = hib->io_func(hib->dev,
927 		    hib->chunktable_offset + (i/DEV_BSIZE),
928 		    (vaddr_t)(hibernate_chunk_table_start + i),
929 		    MAXPHYS, HIB_W, hib->io_page))) {
930 			DPRINTF("chunktable write error: %d\n", err);
931 			return (err);
932 		}
933 	}
934 
935 	return (0);
936 }
937 
938 /*
939  * Write an empty hiber_info to the swap signature block, which is
940  * guaranteed to not match any valid hib.
941  */
942 int
943 hibernate_clear_signature(void)
944 {
945 	union hibernate_info blank_hiber_info;
946 	union hibernate_info hib;
947 
948 	/* Zero out a blank hiber_info */
949 	memset(&blank_hiber_info, 0, sizeof(union hibernate_info));
950 
951 	/* Get the signature block location */
952 	if (get_hibernate_info(&hib, 0))
953 		return (1);
954 
955 	/* Write (zeroed) hibernate info to disk */
956 	DPRINTF("clearing hibernate signature block location: %lld\n",
957 		hib.sig_offset);
958 	if (hibernate_block_io(&hib,
959 	    hib.sig_offset,
960 	    DEV_BSIZE, (vaddr_t)&blank_hiber_info, 1))
961 		printf("Warning: could not clear hibernate signature\n");
962 
963 	return (0);
964 }
965 
966 /*
967  * Check chunk range overlap when calculating whether or not to copy a
968  * compressed chunk to the piglet area before decompressing.
969  *
970  * returns zero if the ranges do not overlap, non-zero otherwise.
971  */
972 int
973 hibernate_check_overlap(paddr_t r1s, paddr_t r1e, paddr_t r2s, paddr_t r2e)
974 {
975 	/* case A : end of r1 overlaps start of r2 */
976 	if (r1s < r2s && r1e > r2s)
977 		return (1);
978 
979 	/* case B : r1 entirely inside r2 */
980 	if (r1s >= r2s && r1e <= r2e)
981 		return (1);
982 
983 	/* case C : r2 entirely inside r1 */
984 	if (r2s >= r1s && r2e <= r1e)
985 		return (1);
986 
987 	/* case D : end of r2 overlaps start of r1 */
988 	if (r2s < r1s && r2e > r1s)
989 		return (1);
990 
991 	return (0);
992 }
993 
994 /*
995  * Compare two hibernate_infos to determine if they are the same (eg,
996  * we should be performing a hibernate resume on this machine.
997  * Not all fields are checked - just enough to verify that the machine
998  * has the same memory configuration and kernel as the one that
999  * wrote the signature previously.
1000  */
1001 int
1002 hibernate_compare_signature(union hibernate_info *mine,
1003     union hibernate_info *disk)
1004 {
1005 	u_int i;
1006 
1007 	if (mine->nranges != disk->nranges) {
1008 		DPRINTF("hibernate memory range count mismatch\n");
1009 		return (1);
1010 	}
1011 
1012 	if (strcmp(mine->kernel_version, disk->kernel_version) != 0) {
1013 		DPRINTF("hibernate kernel version mismatch\n");
1014 		return (1);
1015 	}
1016 
1017 	for (i = 0; i < mine->nranges; i++) {
1018 		if ((mine->ranges[i].base != disk->ranges[i].base) ||
1019 		    (mine->ranges[i].end != disk->ranges[i].end) ) {
1020 			DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n",
1021 				i,
1022 				(void *)mine->ranges[i].base,
1023 				(void *)mine->ranges[i].end,
1024 				(void *)disk->ranges[i].base,
1025 				(void *)disk->ranges[i].end);
1026 			return (1);
1027 		}
1028 	}
1029 
1030 	return (0);
1031 }
1032 
1033 /*
1034  * Transfers xfer_size bytes between the hibernate device specified in
1035  * hib_info at offset blkctr and the vaddr specified at dest.
1036  *
1037  * Separate offsets and pages are used to handle misaligned reads (reads
1038  * that span a page boundary).
1039  *
1040  * blkctr specifies a relative offset (relative to the start of swap),
1041  * not an absolute disk offset
1042  *
1043  */
1044 int
1045 hibernate_block_io(union hibernate_info *hib, daddr_t blkctr,
1046     size_t xfer_size, vaddr_t dest, int iswrite)
1047 {
1048 	struct buf *bp;
1049 	struct bdevsw *bdsw;
1050 	int error;
1051 
1052 	bp = geteblk(xfer_size);
1053 	bdsw = &bdevsw[major(hib->dev)];
1054 
1055 	error = (*bdsw->d_open)(hib->dev, FREAD, S_IFCHR, curproc);
1056 	if (error) {
1057 		printf("hibernate_block_io open failed\n");
1058 		return (1);
1059 	}
1060 
1061 	if (iswrite)
1062 		bcopy((caddr_t)dest, bp->b_data, xfer_size);
1063 
1064 	bp->b_bcount = xfer_size;
1065 	bp->b_blkno = blkctr;
1066 	CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
1067 	SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW);
1068 	bp->b_dev = hib->dev;
1069 	(*bdsw->d_strategy)(bp);
1070 
1071 	error = biowait(bp);
1072 	if (error) {
1073 		printf("hib block_io biowait error %d blk %lld size %zu\n",
1074 			error, (long long)blkctr, xfer_size);
1075 		error = (*bdsw->d_close)(hib->dev, 0, S_IFCHR,
1076 		    curproc);
1077 		if (error)
1078 			printf("hibernate_block_io error close failed\n");
1079 		return (1);
1080 	}
1081 
1082 	error = (*bdsw->d_close)(hib->dev, FREAD, S_IFCHR, curproc);
1083 	if (error) {
1084 		printf("hibernate_block_io close failed\n");
1085 		return (1);
1086 	}
1087 
1088 	if (!iswrite)
1089 		bcopy(bp->b_data, (caddr_t)dest, xfer_size);
1090 
1091 	bp->b_flags |= B_INVAL;
1092 	brelse(bp);
1093 
1094 	return (0);
1095 }
1096 
1097 /*
1098  * Reads the signature block from swap, checks against the current machine's
1099  * information. If the information matches, perform a resume by reading the
1100  * saved image into the pig area, and unpacking.
1101  */
1102 void
1103 hibernate_resume(void)
1104 {
1105 	union hibernate_info hib;
1106 	int s;
1107 
1108 	/* Get current running machine's hibernate info */
1109 	memset(&hib, 0, sizeof(hib));
1110 	if (get_hibernate_info(&hib, 0)) {
1111 		DPRINTF("couldn't retrieve machine's hibernate info\n");
1112 		return;
1113 	}
1114 
1115 	/* Read hibernate info from disk */
1116 	s = splbio();
1117 
1118 	DPRINTF("reading hibernate signature block location: %lld\n",
1119 		hib.sig_offset);
1120 
1121 	if (hibernate_block_io(&hib,
1122 	    hib.sig_offset,
1123 	    DEV_BSIZE, (vaddr_t)&disk_hib, 0)) {
1124 		DPRINTF("error in hibernate read");
1125 		splx(s);
1126 		return;
1127 	}
1128 
1129 	/* Check magic number */
1130 	if (disk_hib.magic != HIBERNATE_MAGIC) {
1131 		DPRINTF("wrong magic number in hibernate signature: %x\n",
1132 			disk_hib.magic);
1133 		splx(s);
1134 		return;
1135 	}
1136 
1137 	/*
1138 	 * We (possibly) found a hibernate signature. Clear signature first,
1139 	 * to prevent accidental resume or endless resume cycles later.
1140 	 */
1141 	if (hibernate_clear_signature()) {
1142 		DPRINTF("error clearing hibernate signature block\n");
1143 		splx(s);
1144 		return;
1145 	}
1146 
1147 	/*
1148 	 * If on-disk and in-memory hibernate signatures match,
1149 	 * this means we should do a resume from hibernate.
1150 	 */
1151 	if (hibernate_compare_signature(&hib, &disk_hib)) {
1152 		DPRINTF("mismatched hibernate signature block\n");
1153 		splx(s);
1154 		return;
1155 	}
1156 
1157 #ifdef MULTIPROCESSOR
1158 	hibernate_quiesce_cpus();
1159 #endif /* MULTIPROCESSOR */
1160 
1161 	/* Read the image from disk into the image (pig) area */
1162 	if (hibernate_read_image(&disk_hib))
1163 		goto fail;
1164 
1165 	if (config_suspend(device_mainbus(), DVACT_QUIESCE) != 0)
1166 		goto fail;
1167 
1168 	(void) splhigh();
1169 	hibernate_disable_intr_machdep();
1170 	cold = 1;
1171 
1172 	if (config_suspend(device_mainbus(), DVACT_SUSPEND) != 0) {
1173 		cold = 0;
1174 		hibernate_enable_intr_machdep();
1175 		goto fail;
1176 	}
1177 
1178 	pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1179 	    VM_PROT_ALL);
1180 	pmap_activate(curproc);
1181 
1182 	printf("Unpacking image...\n");
1183 
1184 	/* Switch stacks */
1185 	hibernate_switch_stack_machdep();
1186 
1187 #ifndef NO_PROPOLICE
1188 	/* Start using suspended kernel's propolice guard */
1189 	__guard_local = disk_hib.guard;
1190 #endif /* ! NO_PROPOLICE */
1191 
1192 	/* Unpack and resume */
1193 	hibernate_unpack_image(&disk_hib);
1194 
1195 fail:
1196 	splx(s);
1197 	printf("\nUnable to resume hibernated image\n");
1198 }
1199 
1200 /*
1201  * Unpack image from pig area to original location by looping through the
1202  * list of output chunks in the order they should be restored (fchunks).
1203  *
1204  * Note that due to the stack smash protector and the fact that we have
1205  * switched stacks, it is not permitted to return from this function.
1206  */
1207 void
1208 hibernate_unpack_image(union hibernate_info *hib)
1209 {
1210 	struct hibernate_disk_chunk *chunks;
1211 	union hibernate_info local_hib;
1212 	paddr_t image_cur = global_pig_start;
1213 	short i, *fchunks;
1214 	char *pva = (char *)hib->piglet_va;
1215 	struct hibernate_zlib_state *hibernate_state;
1216 
1217 	hibernate_state =
1218 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1219 
1220 	/* Mask off based on arch-specific piglet page size */
1221 	pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1222 	fchunks = (short *)(pva + (4 * PAGE_SIZE));
1223 
1224 	chunks = (struct hibernate_disk_chunk *)(pva +  HIBERNATE_CHUNK_SIZE);
1225 
1226 	/* Can't use hiber_info that's passed in after this point */
1227 	bcopy(hib, &local_hib, sizeof(union hibernate_info));
1228 
1229 	/*
1230 	 * Point of no return. Once we pass this point, only kernel code can
1231 	 * be accessed. No global variables or other kernel data structures
1232 	 * are guaranteed to be coherent after unpack starts.
1233 	 *
1234 	 * The image is now in high memory (pig area), we unpack from the pig
1235 	 * to the correct location in memory. We'll eventually end up copying
1236 	 * on top of ourself, but we are assured the kernel code here is the
1237 	 * same between the hibernated and resuming kernel, and we are running
1238 	 * on our own stack, so the overwrite is ok.
1239 	 */
1240 	hibernate_activate_resume_pt_machdep();
1241 
1242 	for (i = 0; i < local_hib.chunk_ctr; i++) {
1243 		/* Reset zlib for inflate */
1244 		if (hibernate_zlib_reset(&local_hib, 0) != Z_OK)
1245 			panic("hibernate failed to reset zlib for inflate");
1246 
1247 		hibernate_process_chunk(&local_hib, &chunks[fchunks[i]],
1248 		    image_cur);
1249 
1250 		image_cur += chunks[fchunks[i]].compressed_size;
1251 
1252 	}
1253 
1254 	/*
1255 	 * Resume the loaded kernel by jumping to the MD resume vector.
1256 	 * We won't be returning from this call.
1257 	 */
1258 	hibernate_resume_machdep();
1259 }
1260 
1261 /*
1262  * Bounce a compressed image chunk to the piglet, entering mappings for the
1263  * copied pages as needed
1264  */
1265 void
1266 hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size)
1267 {
1268 	size_t ct, ofs;
1269 	paddr_t src = img_cur;
1270 	vaddr_t dest = piglet;
1271 
1272 	/* Copy first partial page */
1273 	ct = (PAGE_SIZE) - (src & PAGE_MASK);
1274 	ofs = (src & PAGE_MASK);
1275 
1276 	if (ct < PAGE_SIZE) {
1277 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE,
1278 			(src - ofs), 0);
1279 		hibernate_flush();
1280 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE + ofs), (caddr_t)dest, ct);
1281 		src += ct;
1282 		dest += ct;
1283 	}
1284 
1285 	/* Copy remaining pages */
1286 	while (src < size + img_cur) {
1287 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, src, 0);
1288 		hibernate_flush();
1289 		ct = PAGE_SIZE;
1290 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE), (caddr_t)dest, ct);
1291 		hibernate_flush();
1292 		src += ct;
1293 		dest += ct;
1294 	}
1295 }
1296 
1297 /*
1298  * Process a chunk by bouncing it to the piglet, followed by unpacking
1299  */
1300 void
1301 hibernate_process_chunk(union hibernate_info *hib,
1302     struct hibernate_disk_chunk *chunk, paddr_t img_cur)
1303 {
1304 	char *pva = (char *)hib->piglet_va;
1305 
1306 	hibernate_copy_chunk_to_piglet(img_cur,
1307 	 (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), chunk->compressed_size);
1308 
1309 	hibernate_inflate_region(hib, chunk->base,
1310 	    (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)),
1311 	    chunk->compressed_size);
1312 }
1313 
1314 /*
1315  * Write a compressed version of this machine's memory to disk, at the
1316  * precalculated swap offset:
1317  *
1318  * end of swap - signature block size - chunk table size - memory size
1319  *
1320  * The function begins by looping through each phys mem range, cutting each
1321  * one into MD sized chunks. These chunks are then compressed individually
1322  * and written out to disk, in phys mem order. Some chunks might compress
1323  * more than others, and for this reason, each chunk's size is recorded
1324  * in the chunk table, which is written to disk after the image has
1325  * properly been compressed and written (in hibernate_write_chunktable).
1326  *
1327  * When this function is called, the machine is nearly suspended - most
1328  * devices are quiesced/suspended, interrupts are off, and cold has
1329  * been set. This means that there can be no side effects once the
1330  * write has started, and the write function itself can also have no
1331  * side effects. This also means no printfs are permitted (since printf
1332  * has side effects.)
1333  *
1334  * Return values :
1335  *
1336  * 0      - success
1337  * EIO    - I/O error occurred writing the chunks
1338  * EINVAL - Failed to write a complete range
1339  * ENOMEM - Memory allocation failure during preparation of the zlib arena
1340  */
1341 int
1342 hibernate_write_chunks(union hibernate_info *hib)
1343 {
1344 	paddr_t range_base, range_end, inaddr, temp_inaddr;
1345 	size_t nblocks, out_remaining, used;
1346 	struct hibernate_disk_chunk *chunks;
1347 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1348 	daddr_t blkctr = 0;
1349 	int i, err;
1350 	struct hibernate_zlib_state *hibernate_state;
1351 
1352 	hibernate_state =
1353 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1354 
1355 	hib->chunk_ctr = 0;
1356 
1357 	/*
1358 	 * Allocate VA for the temp and copy page.
1359 	 *
1360 	 * These will become part of the suspended kernel and will
1361 	 * be freed in hibernate_free, upon resume.
1362 	 */
1363 	hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
1364 	    &kp_none, &kd_nowait);
1365 	if (!hibernate_temp_page)
1366 		return (ENOMEM);
1367 
1368 	hibernate_copy_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
1369 	    &kp_none, &kd_nowait);
1370 	if (!hibernate_copy_page) {
1371 		DPRINTF("out of memory allocating hibernate_copy_page\n");
1372 		return (ENOMEM);
1373 	}
1374 
1375 	pmap_kenter_pa(hibernate_copy_page,
1376 	    (hib->piglet_pa + 3*PAGE_SIZE), VM_PROT_ALL);
1377 
1378 	pmap_activate(curproc);
1379 
1380 	chunks = (struct hibernate_disk_chunk *)(hib->piglet_va +
1381 	    HIBERNATE_CHUNK_SIZE);
1382 
1383 	/* Calculate the chunk regions */
1384 	for (i = 0; i < hib->nranges; i++) {
1385 		range_base = hib->ranges[i].base;
1386 		range_end = hib->ranges[i].end;
1387 
1388 		inaddr = range_base;
1389 
1390 		while (inaddr < range_end) {
1391 			chunks[hib->chunk_ctr].base = inaddr;
1392 			if (inaddr + HIBERNATE_CHUNK_SIZE < range_end)
1393 				chunks[hib->chunk_ctr].end = inaddr +
1394 				    HIBERNATE_CHUNK_SIZE;
1395 			else
1396 				chunks[hib->chunk_ctr].end = range_end;
1397 
1398 			inaddr += HIBERNATE_CHUNK_SIZE;
1399 			hib->chunk_ctr ++;
1400 		}
1401 	}
1402 
1403 	/* Compress and write the chunks in the chunktable */
1404 	for (i = 0; i < hib->chunk_ctr; i++) {
1405 		range_base = chunks[i].base;
1406 		range_end = chunks[i].end;
1407 
1408 		chunks[i].offset = blkctr + hib->image_offset;
1409 
1410 		/* Reset zlib for deflate */
1411 		if (hibernate_zlib_reset(hib, 1) != Z_OK) {
1412 			DPRINTF("hibernate_zlib_reset failed for deflate\n");
1413 			return (ENOMEM);
1414 		}
1415 
1416 		inaddr = range_base;
1417 
1418 		/*
1419 		 * For each range, loop through its phys mem region
1420 		 * and write out the chunks (the last chunk might be
1421 		 * smaller than the chunk size).
1422 		 */
1423 		while (inaddr < range_end) {
1424 			out_remaining = PAGE_SIZE;
1425 			while (out_remaining > 0 && inaddr < range_end) {
1426 
1427 				/*
1428 				 * Adjust for regions that are not evenly
1429 				 * divisible by PAGE_SIZE or overflowed
1430 				 * pages from the previous iteration.
1431 				 */
1432 				temp_inaddr = (inaddr & PAGE_MASK) +
1433 				    hibernate_copy_page;
1434 
1435 				/* Deflate from temp_inaddr to IO page */
1436 				if (inaddr != range_end) {
1437 					pmap_kenter_pa(hibernate_temp_page,
1438 					    inaddr & PMAP_PA_MASK, VM_PROT_ALL);
1439 
1440 					pmap_activate(curproc);
1441 
1442 					bcopy((caddr_t)hibernate_temp_page,
1443 					    (caddr_t)hibernate_copy_page,
1444 					    PAGE_SIZE);
1445 					inaddr += hibernate_deflate(hib,
1446 					    temp_inaddr, &out_remaining);
1447 				}
1448 
1449 				if (out_remaining == 0) {
1450 					/* Filled up the page */
1451 					nblocks =
1452 					    PAGE_SIZE / DEV_BSIZE;
1453 
1454 					if ((err = hib->io_func(hib->dev,
1455 					    blkctr + hib->image_offset,
1456 					    (vaddr_t)hibernate_io_page,
1457 					    PAGE_SIZE, HIB_W, hib->io_page))) {
1458 						DPRINTF("hib write error %d\n",
1459 						    err);
1460 						return (err);
1461 					}
1462 
1463 					blkctr += nblocks;
1464 				}
1465 			}
1466 		}
1467 
1468 		if (inaddr != range_end) {
1469 			DPRINTF("deflate range ended prematurely\n");
1470 			return (EINVAL);
1471 		}
1472 
1473 		/*
1474 		 * End of range. Round up to next secsize bytes
1475 		 * after finishing compress
1476 		 */
1477 		if (out_remaining == 0)
1478 			out_remaining = PAGE_SIZE;
1479 
1480 		/* Finish compress */
1481 		hibernate_state->hib_stream.next_in = (caddr_t)inaddr;
1482 		hibernate_state->hib_stream.avail_in = 0;
1483 		hibernate_state->hib_stream.next_out =
1484 		    (caddr_t)hibernate_io_page + (PAGE_SIZE - out_remaining);
1485 
1486 		/* We have an extra output page available for finalize */
1487 		hibernate_state->hib_stream.avail_out =
1488 			out_remaining + PAGE_SIZE;
1489 
1490 		if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH)) !=
1491 		    Z_STREAM_END) {
1492 			DPRINTF("deflate error in output stream: %d\n", err);
1493 			return (err);
1494 		}
1495 
1496 		out_remaining = hibernate_state->hib_stream.avail_out;
1497 
1498 		used = 2*PAGE_SIZE - out_remaining;
1499 		nblocks = used / DEV_BSIZE;
1500 
1501 		/* Round up to next block if needed */
1502 		if (used % DEV_BSIZE != 0)
1503 			nblocks ++;
1504 
1505 		/* Write final block(s) for this chunk */
1506 		if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset,
1507 		    (vaddr_t)hibernate_io_page, nblocks*DEV_BSIZE,
1508 		    HIB_W, hib->io_page))) {
1509 			DPRINTF("hib final write error %d\n", err);
1510 			return (err);
1511 		}
1512 
1513 		blkctr += nblocks;
1514 
1515 		chunks[i].compressed_size = (blkctr + hib->image_offset -
1516 		    chunks[i].offset) * DEV_BSIZE;
1517 	}
1518 
1519 	hib->chunktable_offset = hib->image_offset + blkctr;
1520 	return (0);
1521 }
1522 
1523 /*
1524  * Reset the zlib stream state and allocate a new hiballoc area for either
1525  * inflate or deflate. This function is called once for each hibernate chunk.
1526  * Calling hiballoc_init multiple times is acceptable since the memory it is
1527  * provided is unmanaged memory (stolen). We use the memory provided to us
1528  * by the piglet allocated via the supplied hib.
1529  */
1530 int
1531 hibernate_zlib_reset(union hibernate_info *hib, int deflate)
1532 {
1533 	vaddr_t hibernate_zlib_start;
1534 	size_t hibernate_zlib_size;
1535 	char *pva = (char *)hib->piglet_va;
1536 	struct hibernate_zlib_state *hibernate_state;
1537 
1538 	hibernate_state =
1539 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1540 
1541 	if (!deflate)
1542 		pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1543 
1544 	hibernate_zlib_start = (vaddr_t)(pva + (28 * PAGE_SIZE));
1545 	hibernate_zlib_size = 80 * PAGE_SIZE;
1546 
1547 	memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size);
1548 	memset(hibernate_state, 0, PAGE_SIZE);
1549 
1550 	/* Set up stream structure */
1551 	hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc;
1552 	hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free;
1553 
1554 	/* Initialize the hiballoc arena for zlib allocs/frees */
1555 	hiballoc_init(&hibernate_state->hiballoc_arena,
1556 	    (caddr_t)hibernate_zlib_start, hibernate_zlib_size);
1557 
1558 	if (deflate) {
1559 		return deflateInit(&hibernate_state->hib_stream,
1560 		    Z_BEST_SPEED);
1561 	} else
1562 		return inflateInit(&hibernate_state->hib_stream);
1563 }
1564 
1565 /*
1566  * Reads the hibernated memory image from disk, whose location and
1567  * size are recorded in hib. Begin by reading the persisted
1568  * chunk table, which records the original chunk placement location
1569  * and compressed size for each. Next, allocate a pig region of
1570  * sufficient size to hold the compressed image. Next, read the
1571  * chunks into the pig area (calling hibernate_read_chunks to do this),
1572  * and finally, if all of the above succeeds, clear the hibernate signature.
1573  * The function will then return to hibernate_resume, which will proceed
1574  * to unpack the pig image to the correct place in memory.
1575  */
1576 int
1577 hibernate_read_image(union hibernate_info *hib)
1578 {
1579 	size_t compressed_size, disk_size, chunktable_size, pig_sz;
1580 	paddr_t image_start, image_end, pig_start, pig_end;
1581 	struct hibernate_disk_chunk *chunks;
1582 	daddr_t blkctr;
1583 	vaddr_t chunktable = (vaddr_t)NULL;
1584 	paddr_t piglet_chunktable = hib->piglet_pa +
1585 	    HIBERNATE_CHUNK_SIZE;
1586 	int i, status;
1587 
1588 	status = 0;
1589 	pmap_activate(curproc);
1590 
1591 	/* Calculate total chunk table size in disk blocks */
1592 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / DEV_BSIZE;
1593 
1594 	blkctr = hib->chunktable_offset;
1595 
1596 	chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any,
1597 	    &kp_none, &kd_nowait);
1598 
1599 	if (!chunktable)
1600 		return (1);
1601 
1602 	/* Map chunktable pages */
1603 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE)
1604 		pmap_kenter_pa(chunktable + i, piglet_chunktable + i,
1605 		    VM_PROT_ALL);
1606 	pmap_update(pmap_kernel());
1607 
1608 	/* Read the chunktable from disk into the piglet chunktable */
1609 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE;
1610 	    i += MAXPHYS, blkctr += MAXPHYS/DEV_BSIZE)
1611 		hibernate_block_io(hib, blkctr, MAXPHYS,
1612 		    chunktable + i, 0);
1613 
1614 	blkctr = hib->image_offset;
1615 	compressed_size = 0;
1616 
1617 	chunks = (struct hibernate_disk_chunk *)chunktable;
1618 
1619 	for (i = 0; i < hib->chunk_ctr; i++)
1620 		compressed_size += chunks[i].compressed_size;
1621 
1622 	disk_size = compressed_size;
1623 
1624 	printf("unhibernating @ block %lld length %lu bytes\n",
1625 	    hib->sig_offset - chunktable_size,
1626 	    compressed_size);
1627 
1628 	/* Allocate the pig area */
1629 	pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE;
1630 	if (uvm_pmr_alloc_pig(&pig_start, pig_sz) == ENOMEM) {
1631 		status = 1;
1632 		goto unmap;
1633 	}
1634 
1635 	pig_end = pig_start + pig_sz;
1636 
1637 	/* Calculate image extents. Pig image must end on a chunk boundary. */
1638 	image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1);
1639 	image_start = image_end - disk_size;
1640 
1641 	hibernate_read_chunks(hib, image_start, image_end, disk_size,
1642 	    chunks);
1643 
1644 	/* Prepare the resume time pmap/page table */
1645 	hibernate_populate_resume_pt(hib, image_start, image_end);
1646 
1647 unmap:
1648 	/* Unmap chunktable pages */
1649 	pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE);
1650 	pmap_update(pmap_kernel());
1651 
1652 	return (status);
1653 }
1654 
1655 /*
1656  * Read the hibernated memory chunks from disk (chunk information at this
1657  * point is stored in the piglet) into the pig area specified by
1658  * [pig_start .. pig_end]. Order the chunks so that the final chunk is the
1659  * only chunk with overlap possibilities.
1660  */
1661 int
1662 hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start,
1663     paddr_t pig_end, size_t image_compr_size,
1664     struct hibernate_disk_chunk *chunks)
1665 {
1666 	paddr_t img_index, img_cur, r1s, r1e, r2s, r2e;
1667 	paddr_t copy_start, copy_end;
1668 	paddr_t piglet_base = hib->piglet_pa;
1669 	paddr_t piglet_end = piglet_base + HIBERNATE_CHUNK_SIZE;
1670 	daddr_t blkctr;
1671 	size_t processed, compressed_size, read_size;
1672 	int overlap, found, nchunks, nochunks = 0, nfchunks = 0, npchunks = 0;
1673 	int num_io_pages;
1674 	short *ochunks, *pchunks, *fchunks, i, j;
1675 	vaddr_t tempva = (vaddr_t)NULL, hibernate_fchunk_area = (vaddr_t)NULL;
1676 
1677 	global_pig_start = pig_start;
1678 
1679 	pmap_activate(curproc);
1680 
1681 	/*
1682 	 * These mappings go into the resuming kernel's page table, and are
1683 	 * used only during image read. They dissappear from existence
1684 	 * when the suspended kernel is unpacked on top of us.
1685 	 */
1686 	tempva = (vaddr_t)km_alloc(MAXPHYS + PAGE_SIZE, &kv_any, &kp_none,
1687 		&kd_nowait);
1688 	if (!tempva)
1689 		return (1);
1690 	hibernate_fchunk_area = (vaddr_t)km_alloc(24*PAGE_SIZE, &kv_any,
1691 	    &kp_none, &kd_nowait);
1692 	if (!hibernate_fchunk_area)
1693 		return (1);
1694 
1695 	/* Final output chunk ordering VA */
1696 	fchunks = (short *)hibernate_fchunk_area;
1697 
1698 	/* Piglet chunk ordering VA */
1699 	pchunks = (short *)(hibernate_fchunk_area + (8*PAGE_SIZE));
1700 
1701 	/* Final chunk ordering VA */
1702 	ochunks = (short *)(hibernate_fchunk_area + (16*PAGE_SIZE));
1703 
1704 	/* Map the chunk ordering region */
1705 	for(i=0; i<24 ; i++)
1706 		pmap_kenter_pa(hibernate_fchunk_area + (i*PAGE_SIZE),
1707 			piglet_base + ((4+i)*PAGE_SIZE), VM_PROT_ALL);
1708 	pmap_update(pmap_kernel());
1709 
1710 	nchunks = hib->chunk_ctr;
1711 
1712 	/* Initially start all chunks as unplaced */
1713 	for (i = 0; i < nchunks; i++)
1714 		chunks[i].flags = 0;
1715 
1716 	/*
1717 	 * Search the list for chunks that are outside the pig area. These
1718 	 * can be placed first in the final output list.
1719 	 */
1720 	for (i = 0; i < nchunks; i++) {
1721 		if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) {
1722 			ochunks[nochunks] = i;
1723 			fchunks[nfchunks] = i;
1724 			nochunks++;
1725 			nfchunks++;
1726 			chunks[i].flags |= HIBERNATE_CHUNK_USED;
1727 		}
1728 	}
1729 
1730 	/*
1731 	 * Walk the ordering, place the chunks in ascending memory order.
1732 	 * Conflicts might arise, these are handled next.
1733 	 */
1734 	do {
1735 		img_index = -1;
1736 		found = 0;
1737 		j = -1;
1738 		for (i = 0; i < nchunks; i++)
1739 			if (chunks[i].base < img_index &&
1740 			    chunks[i].flags == 0 ) {
1741 				j = i;
1742 				img_index = chunks[i].base;
1743 			}
1744 
1745 		if (j != -1) {
1746 			found = 1;
1747 			ochunks[nochunks] = j;
1748 			nochunks++;
1749 			chunks[j].flags |= HIBERNATE_CHUNK_PLACED;
1750 		}
1751 	} while (found);
1752 
1753 	img_index = pig_start;
1754 
1755 	/*
1756 	 * Identify chunk output conflicts (chunks whose pig load area
1757 	 * corresponds to their original memory placement location)
1758 	 */
1759 	for (i = 0; i < nochunks ; i++) {
1760 		overlap = 0;
1761 		r1s = img_index;
1762 		r1e = img_index + chunks[ochunks[i]].compressed_size;
1763 		r2s = chunks[ochunks[i]].base;
1764 		r2e = chunks[ochunks[i]].end;
1765 
1766 		overlap = hibernate_check_overlap(r1s, r1e, r2s, r2e);
1767 		if (overlap)
1768 			chunks[ochunks[i]].flags |= HIBERNATE_CHUNK_CONFLICT;
1769 		img_index += chunks[ochunks[i]].compressed_size;
1770 	}
1771 
1772 	/*
1773 	 * Prepare the final output chunk list. Calculate an output
1774 	 * inflate strategy for overlapping chunks if needed.
1775 	 */
1776 	for (i = 0; i < nochunks ; i++) {
1777 		/*
1778 		 * If a conflict is detected, consume enough compressed
1779 		 * output chunks to fill the piglet
1780 		 */
1781 		if (chunks[ochunks[i]].flags & HIBERNATE_CHUNK_CONFLICT) {
1782 			copy_start = piglet_base;
1783 			copy_end = piglet_end;
1784 			npchunks = 0;
1785 			j = i;
1786 
1787 			while (copy_start < copy_end && j < nochunks) {
1788 				pchunks[npchunks] = ochunks[j];
1789 				npchunks++;
1790 				copy_start +=
1791 				    chunks[ochunks[j]].compressed_size;
1792 				i++;
1793 				j++;
1794 			}
1795 
1796 			for (j = 0; j < npchunks; j++) {
1797 				fchunks[nfchunks] = pchunks[j];
1798 				chunks[pchunks[j]].flags |=
1799 				    HIBERNATE_CHUNK_USED;
1800 				nfchunks++;
1801 			}
1802 		} else {
1803 			/*
1804 			 * No conflict, chunk can be added without copying
1805 			 */
1806 			if ((chunks[ochunks[i]].flags &
1807 			    HIBERNATE_CHUNK_USED) == 0) {
1808 				fchunks[nfchunks] = ochunks[i];
1809 				chunks[ochunks[i]].flags |=
1810 				    HIBERNATE_CHUNK_USED;
1811 				nfchunks++;
1812 			}
1813 		}
1814 	}
1815 
1816 	img_cur = pig_start;
1817 
1818 	for (i = 0; i < nfchunks; i++) {
1819 		blkctr = chunks[fchunks[i]].offset;
1820 		processed = 0;
1821 		compressed_size = chunks[fchunks[i]].compressed_size;
1822 
1823 		while (processed < compressed_size) {
1824 			if (compressed_size - processed >= MAXPHYS)
1825 				read_size = MAXPHYS;
1826 			else
1827 				read_size = compressed_size - processed;
1828 
1829 			/*
1830 			 * We're reading read_size bytes, offset from the
1831 			 * start of a page by img_cur % PAGE_SIZE, so the
1832 			 * end will be read_size + (img_cur % PAGE_SIZE)
1833 			 * from the start of the first page.  Round that
1834 			 * up to the next page size.
1835 			 */
1836 			num_io_pages = (read_size + (img_cur % PAGE_SIZE)
1837 				+ PAGE_SIZE - 1) / PAGE_SIZE;
1838 
1839 			KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1);
1840 
1841 			/* Map pages for this read */
1842 			for (j = 0; j < num_io_pages; j ++)
1843 				pmap_kenter_pa(tempva + j * PAGE_SIZE,
1844 					img_cur + j * PAGE_SIZE, VM_PROT_ALL);
1845 
1846 			pmap_update(pmap_kernel());
1847 
1848 			hibernate_block_io(hib, blkctr, read_size,
1849 			    tempva + (img_cur & PAGE_MASK), 0);
1850 
1851 			blkctr += (read_size / DEV_BSIZE);
1852 
1853 			pmap_kremove(tempva, num_io_pages * PAGE_SIZE);
1854 			pmap_update(pmap_kernel());
1855 
1856 			processed += read_size;
1857 			img_cur += read_size;
1858 		}
1859 	}
1860 
1861 	pmap_kremove(hibernate_fchunk_area, PAGE_SIZE);
1862 	pmap_kremove((vaddr_t)pchunks, PAGE_SIZE);
1863 	pmap_kremove((vaddr_t)fchunks, PAGE_SIZE);
1864 	pmap_update(pmap_kernel());
1865 
1866 	return (0);
1867 }
1868 
1869 /*
1870  * Hibernating a machine comprises the following operations:
1871  *  1. Calculating this machine's hibernate_info information
1872  *  2. Allocating a piglet and saving the piglet's physaddr
1873  *  3. Calculating the memory chunks
1874  *  4. Writing the compressed chunks to disk
1875  *  5. Writing the chunk table
1876  *  6. Writing the signature block (hibernate_info)
1877  *
1878  * On most architectures, the function calling hibernate_suspend would
1879  * then power off the machine using some MD-specific implementation.
1880  */
1881 int
1882 hibernate_suspend(void)
1883 {
1884 	union hibernate_info hib;
1885 	u_long start, end;
1886 
1887 	/*
1888 	 * Calculate memory ranges, swap offsets, etc.
1889 	 * This also allocates a piglet whose physaddr is stored in
1890 	 * hib->piglet_pa and vaddr stored in hib->piglet_va
1891 	 */
1892 	if (get_hibernate_info(&hib, 1)) {
1893 		DPRINTF("failed to obtain hibernate info\n");
1894 		return (1);
1895 	}
1896 
1897 	/* Find a page-addressed region in swap [start,end] */
1898 	if (uvm_hibswap(hib.dev, &start, &end)) {
1899 		printf("cannot find any swap\n");
1900 		return (1);
1901 	}
1902 
1903 	if (end - start < 1000) {
1904 		printf("%lu\n is too small", end - start);
1905 		return (1);
1906 	}
1907 
1908 	/* Calculate block offsets in swap */
1909 	hib.image_offset = ctod(start);
1910 
1911 	/* XXX side effect */
1912 	DPRINTF("hibernate @ block %lld max-length %lu blocks\n",
1913 	    hib.image_offset, ctod(end) - ctod(start));
1914 
1915 	pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1916 		VM_PROT_ALL);
1917 	pmap_activate(curproc);
1918 
1919 	/* Stash the piglet VA so we can free it in the resuming kernel */
1920 	global_piglet_va = hib.piglet_va;
1921 
1922 	DPRINTF("hibernate: writing chunks\n");
1923 	if (hibernate_write_chunks(&hib)) {
1924 		DPRINTF("hibernate_write_chunks failed\n");
1925 		return (1);
1926 	}
1927 
1928 	DPRINTF("hibernate: writing chunktable\n");
1929 	if (hibernate_write_chunktable(&hib)) {
1930 		DPRINTF("hibernate_write_chunktable failed\n");
1931 		return (1);
1932 	}
1933 
1934 	DPRINTF("hibernate: writing signature\n");
1935 	if (hibernate_write_signature(&hib)) {
1936 		DPRINTF("hibernate_write_signature failed\n");
1937 		return (1);
1938 	}
1939 
1940 	/* Allow the disk to settle */
1941 	delay(500000);
1942 
1943 	/*
1944 	 * Give the device-specific I/O function a notification that we're
1945 	 * done, and that it can clean up or shutdown as needed.
1946 	 */
1947 	hib.io_func(hib.dev, 0, (vaddr_t)NULL, 0, HIB_DONE, hib.io_page);
1948 
1949 	return (0);
1950 }
1951 
1952 /*
1953  * Free items allocated by hibernate_suspend()
1954  */
1955 void
1956 hibernate_free(void)
1957 {
1958 	if (global_piglet_va)
1959 		uvm_pmr_free_piglet(global_piglet_va,
1960 		    3*HIBERNATE_CHUNK_SIZE);
1961 
1962 	if (hibernate_copy_page)
1963 		pmap_kremove(hibernate_copy_page, PAGE_SIZE);
1964 	if (hibernate_temp_page)
1965 		pmap_kremove(hibernate_temp_page, PAGE_SIZE);
1966 
1967 	pmap_update(pmap_kernel());
1968 
1969 	if (hibernate_copy_page)
1970 		km_free((void *)hibernate_copy_page, PAGE_SIZE,
1971 		    &kv_any, &kp_none);
1972 	if (hibernate_temp_page)
1973 		km_free((void *)hibernate_temp_page, PAGE_SIZE,
1974 		    &kv_any, &kp_none);
1975 
1976 	global_piglet_va = 0;
1977 	hibernate_copy_page = 0;
1978 	hibernate_temp_page = 0;
1979 }
1980