xref: /openbsd-src/sys/kern/subr_hibernate.c (revision 3fed8fd5e758c6f38637bc27240b81cee0599d72)
1 /*	$OpenBSD: subr_hibernate.c,v 1.131 2022/01/07 02:26:53 guenther Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/hibernate.h>
21 #include <sys/malloc.h>
22 #include <sys/param.h>
23 #include <sys/tree.h>
24 #include <sys/systm.h>
25 #include <sys/disklabel.h>
26 #include <sys/disk.h>
27 #include <sys/conf.h>
28 #include <sys/buf.h>
29 #include <sys/fcntl.h>
30 #include <sys/stat.h>
31 #include <sys/atomic.h>
32 
33 #include <uvm/uvm.h>
34 #include <uvm/uvm_swap.h>
35 
36 #include <machine/hibernate.h>
37 
38 /* Make sure the signature can fit in one block */
39 CTASSERT(sizeof(union hibernate_info) <= DEV_BSIZE);
40 
41 /*
42  * Hibernate piglet layout information
43  *
44  * The piglet is a scratch area of memory allocated by the suspending kernel.
45  * Its phys and virt addrs are recorded in the signature block. The piglet is
46  * used to guarantee an unused area of memory that can be used by the resuming
47  * kernel for various things. The piglet is excluded during unpack operations.
48  * The piglet size is presently 4*HIBERNATE_CHUNK_SIZE (typically 4*4MB).
49  *
50  * Offset from piglet_base	Purpose
51  * ----------------------------------------------------------------------------
52  * 0				Private page for suspend I/O write functions
53  * 1*PAGE_SIZE			I/O page used during hibernate suspend
54  * 2*PAGE_SIZE			I/O page used during hibernate suspend
55  * 3*PAGE_SIZE			copy page used during hibernate suspend
56  * 4*PAGE_SIZE			final chunk ordering list (24 pages)
57  * 28*PAGE_SIZE			RLE utility page
58  * 29*PAGE_SIZE			start of hiballoc area
59  * 30*PAGE_SIZE			preserved entropy
60  * 110*PAGE_SIZE		end of hiballoc area (80 pages)
61  * 366*PAGE_SIZE		end of retguard preservation region (256 pages)
62  * ...				unused
63  * HIBERNATE_CHUNK_SIZE		start of hibernate chunk table
64  * 2*HIBERNATE_CHUNK_SIZE	bounce area for chunks being unpacked
65  * 4*HIBERNATE_CHUNK_SIZE	end of piglet
66  */
67 
68 /* Temporary vaddr ranges used during hibernate */
69 vaddr_t hibernate_temp_page;
70 vaddr_t hibernate_copy_page;
71 vaddr_t hibernate_rle_page;
72 
73 /* Hibernate info as read from disk during resume */
74 union hibernate_info disk_hib;
75 
76 /*
77  * Global copy of the pig start address. This needs to be a global as we
78  * switch stacks after computing it - it can't be stored on the stack.
79  */
80 paddr_t global_pig_start;
81 
82 /*
83  * Global copies of the piglet start addresses (PA/VA). We store these
84  * as globals to avoid having to carry them around as parameters, as the
85  * piglet is allocated early and freed late - its lifecycle extends beyond
86  * that of the hibernate info union which is calculated on suspend/resume.
87  */
88 vaddr_t global_piglet_va;
89 paddr_t global_piglet_pa;
90 
91 /* #define HIB_DEBUG */
92 #ifdef HIB_DEBUG
93 int	hib_debug = 99;
94 #define DPRINTF(x...)     do { if (hib_debug) printf(x); } while (0)
95 #define DNPRINTF(n,x...)  do { if (hib_debug > (n)) printf(x); } while (0)
96 #else
97 #define DPRINTF(x...)
98 #define DNPRINTF(n,x...)
99 #endif
100 
101 #ifndef NO_PROPOLICE
102 extern long __guard_local;
103 #endif /* ! NO_PROPOLICE */
104 
105 void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t);
106 int hibernate_calc_rle(paddr_t, paddr_t);
107 int hibernate_write_rle(union hibernate_info *, paddr_t, paddr_t, daddr_t *,
108 	size_t *);
109 
110 #define MAX_RLE (HIBERNATE_CHUNK_SIZE / PAGE_SIZE)
111 
112 /*
113  * Hib alloc enforced alignment.
114  */
115 #define HIB_ALIGN		8 /* bytes alignment */
116 
117 /*
118  * sizeof builtin operation, but with alignment constraint.
119  */
120 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
121 
122 struct hiballoc_entry {
123 	size_t			hibe_use;
124 	size_t			hibe_space;
125 	RBT_ENTRY(hiballoc_entry) hibe_entry;
126 };
127 
128 /*
129  * Sort hibernate memory ranges by ascending PA
130  */
131 void
132 hibernate_sort_ranges(union hibernate_info *hib_info)
133 {
134 	int i, j;
135 	struct hibernate_memory_range *ranges;
136 	paddr_t base, end;
137 
138 	ranges = hib_info->ranges;
139 
140 	for (i = 1; i < hib_info->nranges; i++) {
141 		j = i;
142 		while (j > 0 && ranges[j - 1].base > ranges[j].base) {
143 			base = ranges[j].base;
144 			end = ranges[j].end;
145 			ranges[j].base = ranges[j - 1].base;
146 			ranges[j].end = ranges[j - 1].end;
147 			ranges[j - 1].base = base;
148 			ranges[j - 1].end = end;
149 			j--;
150 		}
151 	}
152 }
153 
154 /*
155  * Compare hiballoc entries based on the address they manage.
156  *
157  * Since the address is fixed, relative to struct hiballoc_entry,
158  * we just compare the hiballoc_entry pointers.
159  */
160 static __inline int
161 hibe_cmp(const struct hiballoc_entry *l, const struct hiballoc_entry *r)
162 {
163 	vaddr_t vl = (vaddr_t)l;
164 	vaddr_t vr = (vaddr_t)r;
165 
166 	return vl < vr ? -1 : (vl > vr);
167 }
168 
169 RBT_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
170 
171 /*
172  * Given a hiballoc entry, return the address it manages.
173  */
174 static __inline void *
175 hib_entry_to_addr(struct hiballoc_entry *entry)
176 {
177 	caddr_t addr;
178 
179 	addr = (caddr_t)entry;
180 	addr += HIB_SIZEOF(struct hiballoc_entry);
181 	return addr;
182 }
183 
184 /*
185  * Given an address, find the hiballoc that corresponds.
186  */
187 static __inline struct hiballoc_entry*
188 hib_addr_to_entry(void *addr_param)
189 {
190 	caddr_t addr;
191 
192 	addr = (caddr_t)addr_param;
193 	addr -= HIB_SIZEOF(struct hiballoc_entry);
194 	return (struct hiballoc_entry*)addr;
195 }
196 
197 RBT_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp);
198 
199 /*
200  * Allocate memory from the arena.
201  *
202  * Returns NULL if no memory is available.
203  */
204 void *
205 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
206 {
207 	struct hiballoc_entry *entry, *new_entry;
208 	size_t find_sz;
209 
210 	/*
211 	 * Enforce alignment of HIB_ALIGN bytes.
212 	 *
213 	 * Note that, because the entry is put in front of the allocation,
214 	 * 0-byte allocations are guaranteed a unique address.
215 	 */
216 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
217 
218 	/*
219 	 * Find an entry with hibe_space >= find_sz.
220 	 *
221 	 * If the root node is not large enough, we switch to tree traversal.
222 	 * Because all entries are made at the bottom of the free space,
223 	 * traversal from the end has a slightly better chance of yielding
224 	 * a sufficiently large space.
225 	 */
226 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
227 	entry = RBT_ROOT(hiballoc_addr, &arena->hib_addrs);
228 	if (entry != NULL && entry->hibe_space < find_sz) {
229 		RBT_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
230 			if (entry->hibe_space >= find_sz)
231 				break;
232 		}
233 	}
234 
235 	/*
236 	 * Insufficient or too fragmented memory.
237 	 */
238 	if (entry == NULL)
239 		return NULL;
240 
241 	/*
242 	 * Create new entry in allocated space.
243 	 */
244 	new_entry = (struct hiballoc_entry*)(
245 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
246 	new_entry->hibe_space = entry->hibe_space - find_sz;
247 	new_entry->hibe_use = alloc_sz;
248 
249 	/*
250 	 * Insert entry.
251 	 */
252 	if (RBT_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
253 		panic("hib_alloc: insert failure");
254 	entry->hibe_space = 0;
255 
256 	/* Return address managed by entry. */
257 	return hib_entry_to_addr(new_entry);
258 }
259 
260 void
261 hib_getentropy(char **bufp, size_t *bufplen)
262 {
263 	if (!bufp || !bufplen)
264 		return;
265 
266 	*bufp = (char *)(global_piglet_va + (29 * PAGE_SIZE));
267 	*bufplen = PAGE_SIZE;
268 }
269 
270 /*
271  * Free a pointer previously allocated from this arena.
272  *
273  * If addr is NULL, this will be silently accepted.
274  */
275 void
276 hib_free(struct hiballoc_arena *arena, void *addr)
277 {
278 	struct hiballoc_entry *entry, *prev;
279 
280 	if (addr == NULL)
281 		return;
282 
283 	/*
284 	 * Derive entry from addr and check it is really in this arena.
285 	 */
286 	entry = hib_addr_to_entry(addr);
287 	if (RBT_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
288 		panic("hib_free: freed item %p not in hib arena", addr);
289 
290 	/*
291 	 * Give the space in entry to its predecessor.
292 	 *
293 	 * If entry has no predecessor, change its used space into free space
294 	 * instead.
295 	 */
296 	prev = RBT_PREV(hiballoc_addr, entry);
297 	if (prev != NULL &&
298 	    (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
299 	    prev->hibe_use + prev->hibe_space) == entry) {
300 		/* Merge entry. */
301 		RBT_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
302 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
303 		    entry->hibe_use + entry->hibe_space;
304 	} else {
305 		/* Flip used memory to free space. */
306 		entry->hibe_space += entry->hibe_use;
307 		entry->hibe_use = 0;
308 	}
309 }
310 
311 /*
312  * Initialize hiballoc.
313  *
314  * The allocator will manage memory at ptr, which is len bytes.
315  */
316 int
317 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
318 {
319 	struct hiballoc_entry *entry;
320 	caddr_t ptr;
321 	size_t len;
322 
323 	RBT_INIT(hiballoc_addr, &arena->hib_addrs);
324 
325 	/*
326 	 * Hib allocator enforces HIB_ALIGN alignment.
327 	 * Fixup ptr and len.
328 	 */
329 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
330 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
331 	len &= ~((size_t)HIB_ALIGN - 1);
332 
333 	/*
334 	 * Insufficient memory to be able to allocate and also do bookkeeping.
335 	 */
336 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
337 		return ENOMEM;
338 
339 	/*
340 	 * Create entry describing space.
341 	 */
342 	entry = (struct hiballoc_entry*)ptr;
343 	entry->hibe_use = 0;
344 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
345 	RBT_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
346 
347 	return 0;
348 }
349 
350 /*
351  * Zero all free memory.
352  */
353 void
354 uvm_pmr_zero_everything(void)
355 {
356 	struct uvm_pmemrange	*pmr;
357 	struct vm_page		*pg;
358 	int			 i;
359 
360 	uvm_lock_fpageq();
361 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
362 		/* Zero single pages. */
363 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
364 		    != NULL) {
365 			uvm_pmr_remove(pmr, pg);
366 			uvm_pagezero(pg);
367 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
368 			uvmexp.zeropages++;
369 			uvm_pmr_insert(pmr, pg, 0);
370 		}
371 
372 		/* Zero multi page ranges. */
373 		while ((pg = RBT_ROOT(uvm_pmr_size,
374 		    &pmr->size[UVM_PMR_MEMTYPE_DIRTY])) != NULL) {
375 			pg--; /* Size tree always has second page. */
376 			uvm_pmr_remove(pmr, pg);
377 			for (i = 0; i < pg->fpgsz; i++) {
378 				uvm_pagezero(&pg[i]);
379 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
380 				uvmexp.zeropages++;
381 			}
382 			uvm_pmr_insert(pmr, pg, 0);
383 		}
384 	}
385 	uvm_unlock_fpageq();
386 }
387 
388 /*
389  * Mark all memory as dirty.
390  *
391  * Used to inform the system that the clean memory isn't clean for some
392  * reason, for example because we just came back from hibernate.
393  */
394 void
395 uvm_pmr_dirty_everything(void)
396 {
397 	struct uvm_pmemrange	*pmr;
398 	struct vm_page		*pg;
399 	int			 i;
400 
401 	uvm_lock_fpageq();
402 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
403 		/* Dirty single pages. */
404 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
405 		    != NULL) {
406 			uvm_pmr_remove(pmr, pg);
407 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
408 			uvm_pmr_insert(pmr, pg, 0);
409 		}
410 
411 		/* Dirty multi page ranges. */
412 		while ((pg = RBT_ROOT(uvm_pmr_size,
413 		    &pmr->size[UVM_PMR_MEMTYPE_ZERO])) != NULL) {
414 			pg--; /* Size tree always has second page. */
415 			uvm_pmr_remove(pmr, pg);
416 			for (i = 0; i < pg->fpgsz; i++)
417 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
418 			uvm_pmr_insert(pmr, pg, 0);
419 		}
420 	}
421 
422 	uvmexp.zeropages = 0;
423 	uvm_unlock_fpageq();
424 }
425 
426 /*
427  * Allocate an area that can hold sz bytes and doesn't overlap with
428  * the piglet at piglet_pa.
429  */
430 int
431 uvm_pmr_alloc_pig(paddr_t *pa, psize_t sz, paddr_t piglet_pa)
432 {
433 	struct uvm_constraint_range pig_constraint;
434 	struct kmem_pa_mode kp_pig = {
435 		.kp_constraint = &pig_constraint,
436 		.kp_maxseg = 1
437 	};
438 	vaddr_t va;
439 
440 	sz = round_page(sz);
441 
442 	pig_constraint.ucr_low = piglet_pa + 4 * HIBERNATE_CHUNK_SIZE;
443 	pig_constraint.ucr_high = -1;
444 
445 	va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait);
446 	if (va == 0) {
447 		pig_constraint.ucr_low = 0;
448 		pig_constraint.ucr_high = piglet_pa - 1;
449 
450 		va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait);
451 		if (va == 0)
452 			return ENOMEM;
453 	}
454 
455 	pmap_extract(pmap_kernel(), va, pa);
456 	return 0;
457 }
458 
459 /*
460  * Allocate a piglet area.
461  *
462  * This needs to be in DMA-safe memory.
463  * Piglets are aligned.
464  *
465  * sz and align in bytes.
466  *
467  * The call will sleep for the pagedaemon to attempt to free memory.
468  * The pagedaemon may decide its not possible to free enough memory, causing
469  * the allocation to fail.
470  */
471 int
472 uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align)
473 {
474 	struct kmem_pa_mode kp_piglet = {
475 		.kp_constraint = &dma_constraint,
476 		.kp_align = align,
477 		.kp_maxseg = 1
478 	};
479 
480 	/* Ensure align is a power of 2 */
481 	KASSERT((align & (align - 1)) == 0);
482 
483 	/*
484 	 * Fixup arguments: align must be at least PAGE_SIZE,
485 	 * sz will be converted to pagecount, since that is what
486 	 * pmemrange uses internally.
487 	 */
488 	if (align < PAGE_SIZE)
489 		kp_piglet.kp_align = PAGE_SIZE;
490 
491 	sz = round_page(sz);
492 
493 	*va = (vaddr_t)km_alloc(sz, &kv_any, &kp_piglet, &kd_nowait);
494 	if (*va == 0)
495 		return ENOMEM;
496 
497 	pmap_extract(pmap_kernel(), *va, pa);
498 	return 0;
499 }
500 
501 /*
502  * Free a piglet area.
503  */
504 void
505 uvm_pmr_free_piglet(vaddr_t va, vsize_t sz)
506 {
507 	/*
508 	 * Fix parameters.
509 	 */
510 	sz = round_page(sz);
511 
512 	/*
513 	 * Free the physical and virtual memory.
514 	 */
515 	km_free((void *)va, sz, &kv_any, &kp_dma_contig);
516 }
517 
518 /*
519  * Physmem RLE compression support.
520  *
521  * Given a physical page address, return the number of pages starting at the
522  * address that are free.  Clamps to the number of pages in
523  * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free.
524  */
525 int
526 uvm_page_rle(paddr_t addr)
527 {
528 	struct vm_page		*pg, *pg_end;
529 	struct vm_physseg	*vmp;
530 	int			 pseg_idx, off_idx;
531 
532 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
533 	if (pseg_idx == -1)
534 		return 0;
535 
536 	vmp = &vm_physmem[pseg_idx];
537 	pg = &vmp->pgs[off_idx];
538 	if (!(pg->pg_flags & PQ_FREE))
539 		return 0;
540 
541 	/*
542 	 * Search for the first non-free page after pg.
543 	 * Note that the page may not be the first page in a free pmemrange,
544 	 * therefore pg->fpgsz cannot be used.
545 	 */
546 	for (pg_end = pg; pg_end <= vmp->lastpg &&
547 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE &&
548 	    (pg_end - pg) < HIBERNATE_CHUNK_SIZE/PAGE_SIZE; pg_end++)
549 		;
550 	return pg_end - pg;
551 }
552 
553 /*
554  * Calculate a hopefully unique version # for this kernel, based upon
555  * how it was linked.
556  */
557 u_int32_t
558 hibsum(void)
559 {
560 	return ((long)malloc ^ (long)km_alloc ^ (long)printf ^ (long)strlen);
561 }
562 
563 
564 /*
565  * Fills out the hibernate_info union pointed to by hib
566  * with information about this machine (swap signature block
567  * offsets, number of memory ranges, kernel in use, etc)
568  */
569 int
570 get_hibernate_info(union hibernate_info *hib, int suspend)
571 {
572 	struct disklabel dl;
573 	char err_string[128], *dl_ret;
574 	int part;
575 
576 #ifndef NO_PROPOLICE
577 	/* Save propolice guard */
578 	hib->guard = __guard_local;
579 #endif /* ! NO_PROPOLICE */
580 
581 	/* Determine I/O function to use */
582 	hib->io_func = get_hibernate_io_function(swdevt[0].sw_dev);
583 	if (hib->io_func == NULL)
584 		return (1);
585 
586 	/* Calculate hibernate device */
587 	hib->dev = swdevt[0].sw_dev;
588 
589 	/* Read disklabel (used to calculate signature and image offsets) */
590 	dl_ret = disk_readlabel(&dl, hib->dev, err_string, sizeof(err_string));
591 
592 	if (dl_ret) {
593 		printf("Hibernate error reading disklabel: %s\n", dl_ret);
594 		return (1);
595 	}
596 
597 	/* Make sure we have a swap partition. */
598 	part = DISKPART(hib->dev);
599 	if (dl.d_npartitions <= part ||
600 	    dl.d_partitions[part].p_fstype != FS_SWAP ||
601 	    DL_GETPSIZE(&dl.d_partitions[part]) == 0)
602 		return (1);
603 
604 	/* Magic number */
605 	hib->magic = HIBERNATE_MAGIC;
606 
607 	/* Calculate signature block location */
608 	hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[part]) -
609 	    sizeof(union hibernate_info)/DEV_BSIZE;
610 
611 	/* Stash kernel version information */
612 	memset(&hib->kernel_version, 0, 128);
613 	bcopy(version, &hib->kernel_version,
614 	    min(strlen(version), sizeof(hib->kernel_version)-1));
615 	hib->kernel_sum = hibsum();
616 
617 	if (suspend) {
618 		/* Grab the previously-allocated piglet addresses */
619 		hib->piglet_va = global_piglet_va;
620 		hib->piglet_pa = global_piglet_pa;
621 		hib->io_page = (void *)hib->piglet_va;
622 
623 		/*
624 		 * Initialization of the hibernate IO function for drivers
625 		 * that need to do prep work (such as allocating memory or
626 		 * setting up data structures that cannot safely be done
627 		 * during suspend without causing side effects). There is
628 		 * a matching HIB_DONE call performed after the write is
629 		 * completed.
630 		 */
631 		if (hib->io_func(hib->dev, DL_GETPOFFSET(&dl.d_partitions[part]),
632 		    (vaddr_t)NULL, DL_GETPSIZE(&dl.d_partitions[part]),
633 		    HIB_INIT, hib->io_page))
634 			goto fail;
635 
636 	} else {
637 		/*
638 		 * Resuming kernels use a regular private page for the driver
639 		 * No need to free this I/O page as it will vanish as part of
640 		 * the resume.
641 		 */
642 		hib->io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
643 		if (!hib->io_page)
644 			goto fail;
645 	}
646 
647 	if (get_hibernate_info_md(hib))
648 		goto fail;
649 
650 	return (0);
651 
652 fail:
653 	return (1);
654 }
655 
656 /*
657  * Allocate nitems*size bytes from the hiballoc area presently in use
658  */
659 void *
660 hibernate_zlib_alloc(void *unused, int nitems, int size)
661 {
662 	struct hibernate_zlib_state *hibernate_state;
663 
664 	hibernate_state =
665 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
666 
667 	return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size);
668 }
669 
670 /*
671  * Free the memory pointed to by addr in the hiballoc area presently in
672  * use
673  */
674 void
675 hibernate_zlib_free(void *unused, void *addr)
676 {
677 	struct hibernate_zlib_state *hibernate_state;
678 
679 	hibernate_state =
680 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
681 
682 	hib_free(&hibernate_state->hiballoc_arena, addr);
683 }
684 
685 /*
686  * Inflate next page of data from the image stream.
687  * The rle parameter is modified on exit to contain the number of pages to
688  * skip in the output stream (or 0 if this page was inflated into).
689  *
690  * Returns 0 if the stream contains additional data, or 1 if the stream is
691  * finished.
692  */
693 int
694 hibernate_inflate_page(int *rle)
695 {
696 	struct hibernate_zlib_state *hibernate_state;
697 	int i;
698 
699 	hibernate_state =
700 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
701 
702 	/* Set up the stream for RLE code inflate */
703 	hibernate_state->hib_stream.next_out = (unsigned char *)rle;
704 	hibernate_state->hib_stream.avail_out = sizeof(*rle);
705 
706 	/* Inflate RLE code */
707 	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
708 	if (i != Z_OK && i != Z_STREAM_END) {
709 		/*
710 		 * XXX - this will likely reboot/hang most machines
711 		 *       since the console output buffer will be unmapped,
712 		 *       but there's not much else we can do here.
713 		 */
714 		panic("rle inflate stream error");
715 	}
716 
717 	if (hibernate_state->hib_stream.avail_out != 0) {
718 		/*
719 		 * XXX - this will likely reboot/hang most machines
720 		 *       since the console output buffer will be unmapped,
721 		 *       but there's not much else we can do here.
722 		 */
723 		panic("rle short inflate error");
724 	}
725 
726 	if (*rle < 0 || *rle > 1024) {
727 		/*
728 		 * XXX - this will likely reboot/hang most machines
729 		 *       since the console output buffer will be unmapped,
730 		 *       but there's not much else we can do here.
731 		 */
732 		panic("invalid rle count");
733 	}
734 
735 	if (i == Z_STREAM_END)
736 		return (1);
737 
738 	if (*rle != 0)
739 		return (0);
740 
741 	/* Set up the stream for page inflate */
742 	hibernate_state->hib_stream.next_out =
743 		(unsigned char *)HIBERNATE_INFLATE_PAGE;
744 	hibernate_state->hib_stream.avail_out = PAGE_SIZE;
745 
746 	/* Process next block of data */
747 	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
748 	if (i != Z_OK && i != Z_STREAM_END) {
749 		/*
750 		 * XXX - this will likely reboot/hang most machines
751 		 *       since the console output buffer will be unmapped,
752 		 *       but there's not much else we can do here.
753 		 */
754 		panic("inflate error");
755 	}
756 
757 	/* We should always have extracted a full page ... */
758 	if (hibernate_state->hib_stream.avail_out != 0) {
759 		/*
760 		 * XXX - this will likely reboot/hang most machines
761 		 *       since the console output buffer will be unmapped,
762 		 *       but there's not much else we can do here.
763 		 */
764 		panic("incomplete page");
765 	}
766 
767 	return (i == Z_STREAM_END);
768 }
769 
770 /*
771  * Inflate size bytes from src into dest, skipping any pages in
772  * [src..dest] that are special (see hibernate_inflate_skip)
773  *
774  * This function executes while using the resume-time stack
775  * and pmap, and therefore cannot use ddb/printf/etc. Doing so
776  * will likely hang or reset the machine since the console output buffer
777  * will be unmapped.
778  */
779 void
780 hibernate_inflate_region(union hibernate_info *hib, paddr_t dest,
781     paddr_t src, size_t size)
782 {
783 	int end_stream = 0, rle, skip;
784 	struct hibernate_zlib_state *hibernate_state;
785 
786 	hibernate_state =
787 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
788 
789 	hibernate_state->hib_stream.next_in = (unsigned char *)src;
790 	hibernate_state->hib_stream.avail_in = size;
791 
792 	do {
793 		/*
794 		 * Is this a special page? If yes, redirect the
795 		 * inflate output to a scratch page (eg, discard it)
796 		 */
797 		skip = hibernate_inflate_skip(hib, dest);
798 		if (skip == HIB_SKIP) {
799 			hibernate_enter_resume_mapping(
800 			    HIBERNATE_INFLATE_PAGE,
801 			    HIBERNATE_INFLATE_PAGE, 0);
802 		} else if (skip == HIB_MOVE) {
803 			/*
804 			 * Special case : retguard region. This gets moved
805 			 * temporarily into the piglet region and copied into
806 			 * place immediately before resume
807 			 */
808 			hibernate_enter_resume_mapping(
809 			    HIBERNATE_INFLATE_PAGE,
810 			    hib->piglet_pa + (110 * PAGE_SIZE) +
811 			    hib->retguard_ofs, 0);
812 			hib->retguard_ofs += PAGE_SIZE;
813 			if (hib->retguard_ofs > 255 * PAGE_SIZE) {
814 				/*
815 				 * XXX - this will likely reboot/hang most
816 				 *       machines since the console output
817 				 *       buffer will be unmapped, but there's
818 				 *       not much else we can do here.
819 				 */
820 				panic("retguard move error, out of space");
821 			}
822 		} else {
823 			hibernate_enter_resume_mapping(
824 			    HIBERNATE_INFLATE_PAGE, dest, 0);
825 		}
826 
827 		hibernate_flush();
828 		end_stream = hibernate_inflate_page(&rle);
829 
830 		if (rle == 0)
831 			dest += PAGE_SIZE;
832 		else
833 			dest += (rle * PAGE_SIZE);
834 	} while (!end_stream);
835 }
836 
837 /*
838  * deflate from src into the I/O page, up to 'remaining' bytes
839  *
840  * Returns number of input bytes consumed, and may reset
841  * the 'remaining' parameter if not all the output space was consumed
842  * (this information is needed to know how much to write to disk)
843  */
844 size_t
845 hibernate_deflate(union hibernate_info *hib, paddr_t src,
846     size_t *remaining)
847 {
848 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
849 	struct hibernate_zlib_state *hibernate_state;
850 
851 	hibernate_state =
852 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
853 
854 	/* Set up the stream for deflate */
855 	hibernate_state->hib_stream.next_in = (unsigned char *)src;
856 	hibernate_state->hib_stream.avail_in = PAGE_SIZE - (src & PAGE_MASK);
857 	hibernate_state->hib_stream.next_out =
858 		(unsigned char *)hibernate_io_page + (PAGE_SIZE - *remaining);
859 	hibernate_state->hib_stream.avail_out = *remaining;
860 
861 	/* Process next block of data */
862 	if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH) != Z_OK)
863 		panic("hibernate zlib deflate error");
864 
865 	/* Update pointers and return number of bytes consumed */
866 	*remaining = hibernate_state->hib_stream.avail_out;
867 	return (PAGE_SIZE - (src & PAGE_MASK)) -
868 	    hibernate_state->hib_stream.avail_in;
869 }
870 
871 /*
872  * Write the hibernation information specified in hiber_info
873  * to the location in swap previously calculated (last block of
874  * swap), called the "signature block".
875  */
876 int
877 hibernate_write_signature(union hibernate_info *hib)
878 {
879 	/* Write hibernate info to disk */
880 	return (hib->io_func(hib->dev, hib->sig_offset,
881 	    (vaddr_t)hib, DEV_BSIZE, HIB_W,
882 	    hib->io_page));
883 }
884 
885 /*
886  * Write the memory chunk table to the area in swap immediately
887  * preceding the signature block. The chunk table is stored
888  * in the piglet when this function is called.  Returns errno.
889  */
890 int
891 hibernate_write_chunktable(union hibernate_info *hib)
892 {
893 	vaddr_t hibernate_chunk_table_start;
894 	size_t hibernate_chunk_table_size;
895 	int i, err;
896 
897 	hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE;
898 
899 	hibernate_chunk_table_start = hib->piglet_va +
900 	    HIBERNATE_CHUNK_SIZE;
901 
902 	/* Write chunk table */
903 	for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS) {
904 		if ((err = hib->io_func(hib->dev,
905 		    hib->chunktable_offset + (i/DEV_BSIZE),
906 		    (vaddr_t)(hibernate_chunk_table_start + i),
907 		    MAXPHYS, HIB_W, hib->io_page))) {
908 			DPRINTF("chunktable write error: %d\n", err);
909 			return (err);
910 		}
911 	}
912 
913 	return (0);
914 }
915 
916 /*
917  * Write an empty hiber_info to the swap signature block, which is
918  * guaranteed to not match any valid hib.
919  */
920 int
921 hibernate_clear_signature(void)
922 {
923 	union hibernate_info blank_hiber_info;
924 	union hibernate_info hib;
925 
926 	/* Zero out a blank hiber_info */
927 	memset(&blank_hiber_info, 0, sizeof(union hibernate_info));
928 
929 	/* Get the signature block location */
930 	if (get_hibernate_info(&hib, 0))
931 		return (1);
932 
933 	/* Write (zeroed) hibernate info to disk */
934 	DPRINTF("clearing hibernate signature block location: %lld\n",
935 		hib.sig_offset);
936 	if (hibernate_block_io(&hib,
937 	    hib.sig_offset,
938 	    DEV_BSIZE, (vaddr_t)&blank_hiber_info, 1))
939 		printf("Warning: could not clear hibernate signature\n");
940 
941 	return (0);
942 }
943 
944 /*
945  * Compare two hibernate_infos to determine if they are the same (eg,
946  * we should be performing a hibernate resume on this machine.
947  * Not all fields are checked - just enough to verify that the machine
948  * has the same memory configuration and kernel as the one that
949  * wrote the signature previously.
950  */
951 int
952 hibernate_compare_signature(union hibernate_info *mine,
953     union hibernate_info *disk)
954 {
955 	u_int i;
956 
957 	if (mine->nranges != disk->nranges) {
958 		printf("unhibernate failed: memory layout changed\n");
959 		return (1);
960 	}
961 
962 	if (strcmp(mine->kernel_version, disk->kernel_version) != 0) {
963 		printf("unhibernate failed: original kernel changed\n");
964 		return (1);
965 	}
966 
967 	if (hibsum() != disk->kernel_sum) {
968 		printf("unhibernate failed: original kernel changed\n");
969 		return (1);
970 	}
971 
972 	for (i = 0; i < mine->nranges; i++) {
973 		if ((mine->ranges[i].base != disk->ranges[i].base) ||
974 		    (mine->ranges[i].end != disk->ranges[i].end) ) {
975 			DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n",
976 				i,
977 				(void *)mine->ranges[i].base,
978 				(void *)mine->ranges[i].end,
979 				(void *)disk->ranges[i].base,
980 				(void *)disk->ranges[i].end);
981 			printf("unhibernate failed: memory size changed\n");
982 			return (1);
983 		}
984 	}
985 
986 	return (0);
987 }
988 
989 /*
990  * Transfers xfer_size bytes between the hibernate device specified in
991  * hib_info at offset blkctr and the vaddr specified at dest.
992  *
993  * Separate offsets and pages are used to handle misaligned reads (reads
994  * that span a page boundary).
995  *
996  * blkctr specifies a relative offset (relative to the start of swap),
997  * not an absolute disk offset
998  *
999  */
1000 int
1001 hibernate_block_io(union hibernate_info *hib, daddr_t blkctr,
1002     size_t xfer_size, vaddr_t dest, int iswrite)
1003 {
1004 	struct buf *bp;
1005 	struct bdevsw *bdsw;
1006 	int error;
1007 
1008 	bp = geteblk(xfer_size);
1009 	bdsw = &bdevsw[major(hib->dev)];
1010 
1011 	error = (*bdsw->d_open)(hib->dev, FREAD, S_IFCHR, curproc);
1012 	if (error) {
1013 		printf("hibernate_block_io open failed\n");
1014 		return (1);
1015 	}
1016 
1017 	if (iswrite)
1018 		bcopy((caddr_t)dest, bp->b_data, xfer_size);
1019 
1020 	bp->b_bcount = xfer_size;
1021 	bp->b_blkno = blkctr;
1022 	CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
1023 	SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW);
1024 	bp->b_dev = hib->dev;
1025 	(*bdsw->d_strategy)(bp);
1026 
1027 	error = biowait(bp);
1028 	if (error) {
1029 		printf("hib block_io biowait error %d blk %lld size %zu\n",
1030 			error, (long long)blkctr, xfer_size);
1031 		error = (*bdsw->d_close)(hib->dev, 0, S_IFCHR,
1032 		    curproc);
1033 		if (error)
1034 			printf("hibernate_block_io error close failed\n");
1035 		return (1);
1036 	}
1037 
1038 	error = (*bdsw->d_close)(hib->dev, FREAD, S_IFCHR, curproc);
1039 	if (error) {
1040 		printf("hibernate_block_io close failed\n");
1041 		return (1);
1042 	}
1043 
1044 	if (!iswrite)
1045 		bcopy(bp->b_data, (caddr_t)dest, xfer_size);
1046 
1047 	bp->b_flags |= B_INVAL;
1048 	brelse(bp);
1049 
1050 	return (0);
1051 }
1052 
1053 /*
1054  * Preserve one page worth of random data, generated from the resuming
1055  * kernel's arc4random. After resume, this preserved entropy can be used
1056  * to further improve the un-hibernated machine's entropy pool. This
1057  * random data is stored in the piglet, which is preserved across the
1058  * unpack operation, and is restored later in the resume process (see
1059  * hib_getentropy)
1060  */
1061 void
1062 hibernate_preserve_entropy(union hibernate_info *hib)
1063 {
1064 	void *entropy;
1065 
1066 	entropy = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
1067 
1068 	if (!entropy)
1069 		return;
1070 
1071 	pmap_activate(curproc);
1072 	pmap_kenter_pa((vaddr_t)entropy,
1073 	    (paddr_t)(hib->piglet_pa + (29 * PAGE_SIZE)),
1074 	    PROT_READ | PROT_WRITE);
1075 
1076 	arc4random_buf((void *)entropy, PAGE_SIZE);
1077 	pmap_kremove((vaddr_t)entropy, PAGE_SIZE);
1078 	km_free(entropy, PAGE_SIZE, &kv_any, &kp_none);
1079 }
1080 
1081 #ifndef NO_PROPOLICE
1082 vaddr_t
1083 hibernate_unprotect_ssp(void)
1084 {
1085 	struct kmem_dyn_mode kd_avoidalias;
1086 	vaddr_t va = trunc_page((vaddr_t)&__guard_local);
1087 	paddr_t pa;
1088 
1089 	pmap_extract(pmap_kernel(), va, &pa);
1090 
1091 	memset(&kd_avoidalias, 0, sizeof kd_avoidalias);
1092 	kd_avoidalias.kd_prefer = pa;
1093 	kd_avoidalias.kd_waitok = 1;
1094 	va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_avoidalias);
1095 	if (!va)
1096 		panic("hibernate_unprotect_ssp");
1097 
1098 	pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
1099 	pmap_update(pmap_kernel());
1100 
1101 	return va;
1102 }
1103 
1104 void
1105 hibernate_reprotect_ssp(vaddr_t va)
1106 {
1107 	pmap_kremove(va, PAGE_SIZE);
1108 	km_free((void *)va, PAGE_SIZE, &kv_any, &kp_none);
1109 }
1110 #endif /* NO_PROPOLICE */
1111 
1112 /*
1113  * Reads the signature block from swap, checks against the current machine's
1114  * information. If the information matches, perform a resume by reading the
1115  * saved image into the pig area, and unpacking.
1116  *
1117  * Must be called with interrupts enabled.
1118  */
1119 void
1120 hibernate_resume(void)
1121 {
1122 	union hibernate_info hib;
1123 	int s;
1124 #ifndef NO_PROPOLICE
1125 	vsize_t off = (vaddr_t)&__guard_local -
1126 	    trunc_page((vaddr_t)&__guard_local);
1127 	vaddr_t guard_va;
1128 #endif
1129 
1130 	/* Get current running machine's hibernate info */
1131 	memset(&hib, 0, sizeof(hib));
1132 	if (get_hibernate_info(&hib, 0)) {
1133 		DPRINTF("couldn't retrieve machine's hibernate info\n");
1134 		return;
1135 	}
1136 
1137 	/* Read hibernate info from disk */
1138 	s = splbio();
1139 
1140 	DPRINTF("reading hibernate signature block location: %lld\n",
1141 		hib.sig_offset);
1142 
1143 	if (hibernate_block_io(&hib,
1144 	    hib.sig_offset,
1145 	    DEV_BSIZE, (vaddr_t)&disk_hib, 0)) {
1146 		DPRINTF("error in hibernate read");
1147 		splx(s);
1148 		return;
1149 	}
1150 
1151 	/* Check magic number */
1152 	if (disk_hib.magic != HIBERNATE_MAGIC) {
1153 		DPRINTF("wrong magic number in hibernate signature: %x\n",
1154 			disk_hib.magic);
1155 		splx(s);
1156 		return;
1157 	}
1158 
1159 	/*
1160 	 * We (possibly) found a hibernate signature. Clear signature first,
1161 	 * to prevent accidental resume or endless resume cycles later.
1162 	 */
1163 	if (hibernate_clear_signature()) {
1164 		DPRINTF("error clearing hibernate signature block\n");
1165 		splx(s);
1166 		return;
1167 	}
1168 
1169 	/*
1170 	 * If on-disk and in-memory hibernate signatures match,
1171 	 * this means we should do a resume from hibernate.
1172 	 */
1173 	if (hibernate_compare_signature(&hib, &disk_hib)) {
1174 		DPRINTF("mismatched hibernate signature block\n");
1175 		splx(s);
1176 		return;
1177 	}
1178 	disk_hib.dev = hib.dev;
1179 
1180 #ifdef MULTIPROCESSOR
1181 	/* XXX - if we fail later, we may need to rehatch APs on some archs */
1182 	DPRINTF("hibernate: quiescing APs\n");
1183 	hibernate_quiesce_cpus();
1184 #endif /* MULTIPROCESSOR */
1185 
1186 	/* Read the image from disk into the image (pig) area */
1187 	if (hibernate_read_image(&disk_hib))
1188 		goto fail;
1189 
1190 	DPRINTF("hibernate: quiescing devices\n");
1191 	if (config_suspend_all(DVACT_QUIESCE) != 0)
1192 		goto fail;
1193 
1194 #ifndef NO_PROPOLICE
1195 	guard_va = hibernate_unprotect_ssp();
1196 #endif /* NO_PROPOLICE */
1197 
1198 	(void) splhigh();
1199 	hibernate_disable_intr_machdep();
1200 	cold = 1;
1201 
1202 	DPRINTF("hibernate: suspending devices\n");
1203 	if (config_suspend_all(DVACT_SUSPEND) != 0) {
1204 		cold = 0;
1205 		hibernate_enable_intr_machdep();
1206 #ifndef NO_PROPOLICE
1207 		hibernate_reprotect_ssp(guard_va);
1208 #endif /* ! NO_PROPOLICE */
1209 		goto fail;
1210 	}
1211 
1212 	hibernate_preserve_entropy(&disk_hib);
1213 
1214 	printf("Unpacking image...\n");
1215 
1216 	/* Switch stacks */
1217 	DPRINTF("hibernate: switching stacks\n");
1218 	hibernate_switch_stack_machdep();
1219 
1220 #ifndef NO_PROPOLICE
1221 	/* Start using suspended kernel's propolice guard */
1222 	*(long *)(guard_va + off) = disk_hib.guard;
1223 	hibernate_reprotect_ssp(guard_va);
1224 #endif /* ! NO_PROPOLICE */
1225 
1226 	/* Unpack and resume */
1227 	hibernate_unpack_image(&disk_hib);
1228 
1229 fail:
1230 	splx(s);
1231 	printf("\nUnable to resume hibernated image\n");
1232 }
1233 
1234 /*
1235  * Unpack image from pig area to original location by looping through the
1236  * list of output chunks in the order they should be restored (fchunks).
1237  *
1238  * Note that due to the stack smash protector and the fact that we have
1239  * switched stacks, it is not permitted to return from this function.
1240  */
1241 void
1242 hibernate_unpack_image(union hibernate_info *hib)
1243 {
1244 	struct hibernate_disk_chunk *chunks;
1245 	union hibernate_info local_hib;
1246 	paddr_t image_cur = global_pig_start;
1247 	short i, *fchunks;
1248 	char *pva;
1249 
1250 	/* Piglet will be identity mapped (VA == PA) */
1251 	pva = (char *)hib->piglet_pa;
1252 
1253 	fchunks = (short *)(pva + (4 * PAGE_SIZE));
1254 
1255 	chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE);
1256 
1257 	/* Can't use hiber_info that's passed in after this point */
1258 	bcopy(hib, &local_hib, sizeof(union hibernate_info));
1259 	local_hib.retguard_ofs = 0;
1260 
1261 	/* VA == PA */
1262 	local_hib.piglet_va = local_hib.piglet_pa;
1263 
1264 	/*
1265 	 * Point of no return. Once we pass this point, only kernel code can
1266 	 * be accessed. No global variables or other kernel data structures
1267 	 * are guaranteed to be coherent after unpack starts.
1268 	 *
1269 	 * The image is now in high memory (pig area), we unpack from the pig
1270 	 * to the correct location in memory. We'll eventually end up copying
1271 	 * on top of ourself, but we are assured the kernel code here is the
1272 	 * same between the hibernated and resuming kernel, and we are running
1273 	 * on our own stack, so the overwrite is ok.
1274 	 */
1275 	DPRINTF("hibernate: activating alt. pagetable and starting unpack\n");
1276 	hibernate_activate_resume_pt_machdep();
1277 
1278 	for (i = 0; i < local_hib.chunk_ctr; i++) {
1279 		/* Reset zlib for inflate */
1280 		if (hibernate_zlib_reset(&local_hib, 0) != Z_OK)
1281 			panic("hibernate failed to reset zlib for inflate");
1282 
1283 		hibernate_process_chunk(&local_hib, &chunks[fchunks[i]],
1284 		    image_cur);
1285 
1286 		image_cur += chunks[fchunks[i]].compressed_size;
1287 
1288 	}
1289 
1290 	/*
1291 	 * Resume the loaded kernel by jumping to the MD resume vector.
1292 	 * We won't be returning from this call. We pass the location of
1293 	 * the retguard save area so the MD code can replace it before
1294 	 * resuming. See the piglet layout at the top of this file for
1295 	 * more information on the layout of the piglet area.
1296 	 *
1297 	 * We use 'global_piglet_va' here since by the time we are at
1298 	 * this point, we have already unpacked the image, and we want
1299 	 * the suspended kernel's view of what the piglet was, before
1300 	 * suspend occurred (since we will need to use that in the retguard
1301 	 * copy code in hibernate_resume_machdep.)
1302 	 */
1303 	hibernate_resume_machdep(global_piglet_va + (110 * PAGE_SIZE));
1304 }
1305 
1306 /*
1307  * Bounce a compressed image chunk to the piglet, entering mappings for the
1308  * copied pages as needed
1309  */
1310 void
1311 hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size)
1312 {
1313 	size_t ct, ofs;
1314 	paddr_t src = img_cur;
1315 	vaddr_t dest = piglet;
1316 
1317 	/* Copy first partial page */
1318 	ct = (PAGE_SIZE) - (src & PAGE_MASK);
1319 	ofs = (src & PAGE_MASK);
1320 
1321 	if (ct < PAGE_SIZE) {
1322 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE,
1323 			(src - ofs), 0);
1324 		hibernate_flush();
1325 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE + ofs), (caddr_t)dest, ct);
1326 		src += ct;
1327 		dest += ct;
1328 	}
1329 
1330 	/* Copy remaining pages */
1331 	while (src < size + img_cur) {
1332 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, src, 0);
1333 		hibernate_flush();
1334 		ct = PAGE_SIZE;
1335 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE), (caddr_t)dest, ct);
1336 		hibernate_flush();
1337 		src += ct;
1338 		dest += ct;
1339 	}
1340 }
1341 
1342 /*
1343  * Process a chunk by bouncing it to the piglet, followed by unpacking
1344  */
1345 void
1346 hibernate_process_chunk(union hibernate_info *hib,
1347     struct hibernate_disk_chunk *chunk, paddr_t img_cur)
1348 {
1349 	char *pva = (char *)hib->piglet_va;
1350 
1351 	hibernate_copy_chunk_to_piglet(img_cur,
1352 	 (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), chunk->compressed_size);
1353 	hibernate_inflate_region(hib, chunk->base,
1354 	    (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)),
1355 	    chunk->compressed_size);
1356 }
1357 
1358 /*
1359  * Calculate RLE component for 'inaddr'. Clamps to max RLE pages between
1360  * inaddr and range_end.
1361  */
1362 int
1363 hibernate_calc_rle(paddr_t inaddr, paddr_t range_end)
1364 {
1365 	int rle;
1366 
1367 	rle = uvm_page_rle(inaddr);
1368 	KASSERT(rle >= 0 && rle <= MAX_RLE);
1369 
1370 	/* Clamp RLE to range end */
1371 	if (rle > 0 && inaddr + (rle * PAGE_SIZE) > range_end)
1372 		rle = (range_end - inaddr) / PAGE_SIZE;
1373 
1374 	return (rle);
1375 }
1376 
1377 /*
1378  * Write the RLE byte for page at 'inaddr' to the output stream.
1379  * Returns the number of pages to be skipped at 'inaddr'.
1380  */
1381 int
1382 hibernate_write_rle(union hibernate_info *hib, paddr_t inaddr,
1383 	paddr_t range_end, daddr_t *blkctr,
1384 	size_t *out_remaining)
1385 {
1386 	int rle, err, *rleloc;
1387 	struct hibernate_zlib_state *hibernate_state;
1388 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1389 
1390 	hibernate_state =
1391 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1392 
1393 	rle = hibernate_calc_rle(inaddr, range_end);
1394 
1395 	rleloc = (int *)hibernate_rle_page + MAX_RLE - 1;
1396 	*rleloc = rle;
1397 
1398 	/* Deflate the RLE byte into the stream */
1399 	hibernate_deflate(hib, (paddr_t)rleloc, out_remaining);
1400 
1401 	/* Did we fill the output page? If so, flush to disk */
1402 	if (*out_remaining == 0) {
1403 		if ((err = hib->io_func(hib->dev, *blkctr + hib->image_offset,
1404 			(vaddr_t)hibernate_io_page, PAGE_SIZE, HIB_W,
1405 			hib->io_page))) {
1406 				DPRINTF("hib write error %d\n", err);
1407 				return (err);
1408 		}
1409 
1410 		*blkctr += PAGE_SIZE / DEV_BSIZE;
1411 		*out_remaining = PAGE_SIZE;
1412 
1413 		/* If we didn't deflate the entire RLE byte, finish it now */
1414 		if (hibernate_state->hib_stream.avail_in != 0)
1415 			hibernate_deflate(hib,
1416 				(vaddr_t)hibernate_state->hib_stream.next_in,
1417 				out_remaining);
1418 	}
1419 
1420 	return (rle);
1421 }
1422 
1423 /*
1424  * Write a compressed version of this machine's memory to disk, at the
1425  * precalculated swap offset:
1426  *
1427  * end of swap - signature block size - chunk table size - memory size
1428  *
1429  * The function begins by looping through each phys mem range, cutting each
1430  * one into MD sized chunks. These chunks are then compressed individually
1431  * and written out to disk, in phys mem order. Some chunks might compress
1432  * more than others, and for this reason, each chunk's size is recorded
1433  * in the chunk table, which is written to disk after the image has
1434  * properly been compressed and written (in hibernate_write_chunktable).
1435  *
1436  * When this function is called, the machine is nearly suspended - most
1437  * devices are quiesced/suspended, interrupts are off, and cold has
1438  * been set. This means that there can be no side effects once the
1439  * write has started, and the write function itself can also have no
1440  * side effects. This also means no printfs are permitted (since printf
1441  * has side effects.)
1442  *
1443  * Return values :
1444  *
1445  * 0      - success
1446  * EIO    - I/O error occurred writing the chunks
1447  * EINVAL - Failed to write a complete range
1448  * ENOMEM - Memory allocation failure during preparation of the zlib arena
1449  */
1450 int
1451 hibernate_write_chunks(union hibernate_info *hib)
1452 {
1453 	paddr_t range_base, range_end, inaddr, temp_inaddr;
1454 	size_t nblocks, out_remaining, used;
1455 	struct hibernate_disk_chunk *chunks;
1456 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1457 	daddr_t blkctr = 0;
1458 	int i, rle, err;
1459 	struct hibernate_zlib_state *hibernate_state;
1460 
1461 	hibernate_state =
1462 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1463 
1464 	hib->chunk_ctr = 0;
1465 
1466 	/*
1467 	 * Map the utility VAs to the piglet. See the piglet map at the
1468 	 * top of this file for piglet layout information.
1469 	 */
1470 	hibernate_copy_page = hib->piglet_va + 3 * PAGE_SIZE;
1471 	hibernate_rle_page = hib->piglet_va + 28 * PAGE_SIZE;
1472 
1473 	chunks = (struct hibernate_disk_chunk *)(hib->piglet_va +
1474 	    HIBERNATE_CHUNK_SIZE);
1475 
1476 	/* Calculate the chunk regions */
1477 	for (i = 0; i < hib->nranges; i++) {
1478 		range_base = hib->ranges[i].base;
1479 		range_end = hib->ranges[i].end;
1480 
1481 		inaddr = range_base;
1482 
1483 		while (inaddr < range_end) {
1484 			chunks[hib->chunk_ctr].base = inaddr;
1485 			if (inaddr + HIBERNATE_CHUNK_SIZE < range_end)
1486 				chunks[hib->chunk_ctr].end = inaddr +
1487 				    HIBERNATE_CHUNK_SIZE;
1488 			else
1489 				chunks[hib->chunk_ctr].end = range_end;
1490 
1491 			inaddr += HIBERNATE_CHUNK_SIZE;
1492 			hib->chunk_ctr ++;
1493 		}
1494 	}
1495 
1496 	uvm_pmr_dirty_everything();
1497 	uvm_pmr_zero_everything();
1498 
1499 	/* Compress and write the chunks in the chunktable */
1500 	for (i = 0; i < hib->chunk_ctr; i++) {
1501 		range_base = chunks[i].base;
1502 		range_end = chunks[i].end;
1503 
1504 		chunks[i].offset = blkctr + hib->image_offset;
1505 
1506 		/* Reset zlib for deflate */
1507 		if (hibernate_zlib_reset(hib, 1) != Z_OK) {
1508 			DPRINTF("hibernate_zlib_reset failed for deflate\n");
1509 			return (ENOMEM);
1510 		}
1511 
1512 		inaddr = range_base;
1513 
1514 		/*
1515 		 * For each range, loop through its phys mem region
1516 		 * and write out the chunks (the last chunk might be
1517 		 * smaller than the chunk size).
1518 		 */
1519 		while (inaddr < range_end) {
1520 			out_remaining = PAGE_SIZE;
1521 			while (out_remaining > 0 && inaddr < range_end) {
1522 				/*
1523 				 * Adjust for regions that are not evenly
1524 				 * divisible by PAGE_SIZE or overflowed
1525 				 * pages from the previous iteration.
1526 				 */
1527 				temp_inaddr = (inaddr & PAGE_MASK) +
1528 				    hibernate_copy_page;
1529 
1530 				/* Deflate from temp_inaddr to IO page */
1531 				if (inaddr != range_end) {
1532 					if (inaddr % PAGE_SIZE == 0) {
1533 						rle = hibernate_write_rle(hib,
1534 							inaddr,
1535 							range_end,
1536 							&blkctr,
1537 							&out_remaining);
1538 					}
1539 
1540 					if (rle == 0) {
1541 						pmap_kenter_pa(hibernate_temp_page,
1542 							inaddr & PMAP_PA_MASK,
1543 							PROT_READ);
1544 
1545 						bcopy((caddr_t)hibernate_temp_page,
1546 							(caddr_t)hibernate_copy_page,
1547 							PAGE_SIZE);
1548 						inaddr += hibernate_deflate(hib,
1549 							temp_inaddr,
1550 							&out_remaining);
1551 					} else {
1552 						inaddr += rle * PAGE_SIZE;
1553 						if (inaddr > range_end)
1554 							inaddr = range_end;
1555 					}
1556 
1557 				}
1558 
1559 				if (out_remaining == 0) {
1560 					/* Filled up the page */
1561 					nblocks = PAGE_SIZE / DEV_BSIZE;
1562 
1563 					if ((err = hib->io_func(hib->dev,
1564 					    blkctr + hib->image_offset,
1565 					    (vaddr_t)hibernate_io_page,
1566 					    PAGE_SIZE, HIB_W, hib->io_page))) {
1567 						DPRINTF("hib write error %d\n",
1568 						    err);
1569 						return (err);
1570 					}
1571 
1572 					blkctr += nblocks;
1573 				}
1574 			}
1575 		}
1576 
1577 		if (inaddr != range_end) {
1578 			DPRINTF("deflate range ended prematurely\n");
1579 			return (EINVAL);
1580 		}
1581 
1582 		/*
1583 		 * End of range. Round up to next secsize bytes
1584 		 * after finishing compress
1585 		 */
1586 		if (out_remaining == 0)
1587 			out_remaining = PAGE_SIZE;
1588 
1589 		/* Finish compress */
1590 		hibernate_state->hib_stream.next_in = (unsigned char *)inaddr;
1591 		hibernate_state->hib_stream.avail_in = 0;
1592 		hibernate_state->hib_stream.next_out =
1593 		    (unsigned char *)hibernate_io_page +
1594 			(PAGE_SIZE - out_remaining);
1595 
1596 		/* We have an extra output page available for finalize */
1597 		hibernate_state->hib_stream.avail_out =
1598 			out_remaining + PAGE_SIZE;
1599 
1600 		if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH)) !=
1601 		    Z_STREAM_END) {
1602 			DPRINTF("deflate error in output stream: %d\n", err);
1603 			return (err);
1604 		}
1605 
1606 		out_remaining = hibernate_state->hib_stream.avail_out;
1607 
1608 		used = 2 * PAGE_SIZE - out_remaining;
1609 		nblocks = used / DEV_BSIZE;
1610 
1611 		/* Round up to next block if needed */
1612 		if (used % DEV_BSIZE != 0)
1613 			nblocks ++;
1614 
1615 		/* Write final block(s) for this chunk */
1616 		if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset,
1617 		    (vaddr_t)hibernate_io_page, nblocks*DEV_BSIZE,
1618 		    HIB_W, hib->io_page))) {
1619 			DPRINTF("hib final write error %d\n", err);
1620 			return (err);
1621 		}
1622 
1623 		blkctr += nblocks;
1624 
1625 		chunks[i].compressed_size = (blkctr + hib->image_offset -
1626 		    chunks[i].offset) * DEV_BSIZE;
1627 	}
1628 
1629 	hib->chunktable_offset = hib->image_offset + blkctr;
1630 	return (0);
1631 }
1632 
1633 /*
1634  * Reset the zlib stream state and allocate a new hiballoc area for either
1635  * inflate or deflate. This function is called once for each hibernate chunk.
1636  * Calling hiballoc_init multiple times is acceptable since the memory it is
1637  * provided is unmanaged memory (stolen). We use the memory provided to us
1638  * by the piglet allocated via the supplied hib.
1639  */
1640 int
1641 hibernate_zlib_reset(union hibernate_info *hib, int deflate)
1642 {
1643 	vaddr_t hibernate_zlib_start;
1644 	size_t hibernate_zlib_size;
1645 	char *pva = (char *)hib->piglet_va;
1646 	struct hibernate_zlib_state *hibernate_state;
1647 
1648 	hibernate_state =
1649 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1650 
1651 	if (!deflate)
1652 		pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1653 
1654 	/*
1655 	 * See piglet layout information at the start of this file for
1656 	 * information on the zlib page assignments.
1657 	 */
1658 	hibernate_zlib_start = (vaddr_t)(pva + (30 * PAGE_SIZE));
1659 	hibernate_zlib_size = 80 * PAGE_SIZE;
1660 
1661 	memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size);
1662 	memset(hibernate_state, 0, PAGE_SIZE);
1663 
1664 	/* Set up stream structure */
1665 	hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc;
1666 	hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free;
1667 
1668 	/* Initialize the hiballoc arena for zlib allocs/frees */
1669 	hiballoc_init(&hibernate_state->hiballoc_arena,
1670 	    (caddr_t)hibernate_zlib_start, hibernate_zlib_size);
1671 
1672 	if (deflate) {
1673 		return deflateInit(&hibernate_state->hib_stream,
1674 		    Z_BEST_SPEED);
1675 	} else
1676 		return inflateInit(&hibernate_state->hib_stream);
1677 }
1678 
1679 /*
1680  * Reads the hibernated memory image from disk, whose location and
1681  * size are recorded in hib. Begin by reading the persisted
1682  * chunk table, which records the original chunk placement location
1683  * and compressed size for each. Next, allocate a pig region of
1684  * sufficient size to hold the compressed image. Next, read the
1685  * chunks into the pig area (calling hibernate_read_chunks to do this),
1686  * and finally, if all of the above succeeds, clear the hibernate signature.
1687  * The function will then return to hibernate_resume, which will proceed
1688  * to unpack the pig image to the correct place in memory.
1689  */
1690 int
1691 hibernate_read_image(union hibernate_info *hib)
1692 {
1693 	size_t compressed_size, disk_size, chunktable_size, pig_sz;
1694 	paddr_t image_start, image_end, pig_start, pig_end;
1695 	struct hibernate_disk_chunk *chunks;
1696 	daddr_t blkctr;
1697 	vaddr_t chunktable = (vaddr_t)NULL;
1698 	paddr_t piglet_chunktable = hib->piglet_pa +
1699 	    HIBERNATE_CHUNK_SIZE;
1700 	int i, status;
1701 
1702 	status = 0;
1703 	pmap_activate(curproc);
1704 
1705 	/* Calculate total chunk table size in disk blocks */
1706 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / DEV_BSIZE;
1707 
1708 	blkctr = hib->chunktable_offset;
1709 
1710 	chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any,
1711 	    &kp_none, &kd_nowait);
1712 
1713 	if (!chunktable)
1714 		return (1);
1715 
1716 	/* Map chunktable pages */
1717 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE)
1718 		pmap_kenter_pa(chunktable + i, piglet_chunktable + i,
1719 		    PROT_READ | PROT_WRITE);
1720 	pmap_update(pmap_kernel());
1721 
1722 	/* Read the chunktable from disk into the piglet chunktable */
1723 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE;
1724 	    i += MAXPHYS, blkctr += MAXPHYS/DEV_BSIZE)
1725 		hibernate_block_io(hib, blkctr, MAXPHYS,
1726 		    chunktable + i, 0);
1727 
1728 	blkctr = hib->image_offset;
1729 	compressed_size = 0;
1730 
1731 	chunks = (struct hibernate_disk_chunk *)chunktable;
1732 
1733 	for (i = 0; i < hib->chunk_ctr; i++)
1734 		compressed_size += chunks[i].compressed_size;
1735 
1736 	disk_size = compressed_size;
1737 
1738 	printf("unhibernating @ block %lld length %luMB\n",
1739 	    hib->sig_offset - chunktable_size,
1740 	    compressed_size / (1024 * 1024));
1741 
1742 	/* Allocate the pig area */
1743 	pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE;
1744 	if (uvm_pmr_alloc_pig(&pig_start, pig_sz, hib->piglet_pa) == ENOMEM) {
1745 		status = 1;
1746 		goto unmap;
1747 	}
1748 
1749 	pig_end = pig_start + pig_sz;
1750 
1751 	/* Calculate image extents. Pig image must end on a chunk boundary. */
1752 	image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1);
1753 	image_start = image_end - disk_size;
1754 
1755 	hibernate_read_chunks(hib, image_start, image_end, disk_size,
1756 	    chunks);
1757 
1758 	/* Prepare the resume time pmap/page table */
1759 	hibernate_populate_resume_pt(hib, image_start, image_end);
1760 
1761 unmap:
1762 	/* Unmap chunktable pages */
1763 	pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE);
1764 	pmap_update(pmap_kernel());
1765 
1766 	return (status);
1767 }
1768 
1769 /*
1770  * Read the hibernated memory chunks from disk (chunk information at this
1771  * point is stored in the piglet) into the pig area specified by
1772  * [pig_start .. pig_end]. Order the chunks so that the final chunk is the
1773  * only chunk with overlap possibilities.
1774  */
1775 int
1776 hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start,
1777     paddr_t pig_end, size_t image_compr_size,
1778     struct hibernate_disk_chunk *chunks)
1779 {
1780 	paddr_t img_cur, piglet_base;
1781 	daddr_t blkctr;
1782 	size_t processed, compressed_size, read_size;
1783 	int nchunks, nfchunks, num_io_pages;
1784 	vaddr_t tempva, hibernate_fchunk_area;
1785 	short *fchunks, i, j;
1786 
1787 	tempva = (vaddr_t)NULL;
1788 	hibernate_fchunk_area = (vaddr_t)NULL;
1789 	nfchunks = 0;
1790 	piglet_base = hib->piglet_pa;
1791 	global_pig_start = pig_start;
1792 
1793 	/*
1794 	 * These mappings go into the resuming kernel's page table, and are
1795 	 * used only during image read. They disappear from existence
1796 	 * when the suspended kernel is unpacked on top of us.
1797 	 */
1798 	tempva = (vaddr_t)km_alloc(MAXPHYS + PAGE_SIZE, &kv_any, &kp_none,
1799 		&kd_nowait);
1800 	if (!tempva)
1801 		return (1);
1802 	hibernate_fchunk_area = (vaddr_t)km_alloc(24 * PAGE_SIZE, &kv_any,
1803 	    &kp_none, &kd_nowait);
1804 	if (!hibernate_fchunk_area)
1805 		return (1);
1806 
1807 	/* Final output chunk ordering VA */
1808 	fchunks = (short *)hibernate_fchunk_area;
1809 
1810 	/* Map the chunk ordering region */
1811 	for(i = 0; i < 24 ; i++)
1812 		pmap_kenter_pa(hibernate_fchunk_area + (i * PAGE_SIZE),
1813 			piglet_base + ((4 + i) * PAGE_SIZE),
1814 			PROT_READ | PROT_WRITE);
1815 	pmap_update(pmap_kernel());
1816 
1817 	nchunks = hib->chunk_ctr;
1818 
1819 	/* Initially start all chunks as unplaced */
1820 	for (i = 0; i < nchunks; i++)
1821 		chunks[i].flags = 0;
1822 
1823 	/*
1824 	 * Search the list for chunks that are outside the pig area. These
1825 	 * can be placed first in the final output list.
1826 	 */
1827 	for (i = 0; i < nchunks; i++) {
1828 		if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) {
1829 			fchunks[nfchunks] = i;
1830 			nfchunks++;
1831 			chunks[i].flags |= HIBERNATE_CHUNK_PLACED;
1832 		}
1833 	}
1834 
1835 	/*
1836 	 * Walk the ordering, place the chunks in ascending memory order.
1837 	 */
1838 	for (i = 0; i < nchunks; i++) {
1839 		if (chunks[i].flags != HIBERNATE_CHUNK_PLACED) {
1840 			fchunks[nfchunks] = i;
1841 			nfchunks++;
1842 			chunks[i].flags = HIBERNATE_CHUNK_PLACED;
1843 		}
1844 	}
1845 
1846 	img_cur = pig_start;
1847 
1848 	for (i = 0; i < nfchunks; i++) {
1849 		blkctr = chunks[fchunks[i]].offset;
1850 		processed = 0;
1851 		compressed_size = chunks[fchunks[i]].compressed_size;
1852 
1853 		while (processed < compressed_size) {
1854 			if (compressed_size - processed >= MAXPHYS)
1855 				read_size = MAXPHYS;
1856 			else
1857 				read_size = compressed_size - processed;
1858 
1859 			/*
1860 			 * We're reading read_size bytes, offset from the
1861 			 * start of a page by img_cur % PAGE_SIZE, so the
1862 			 * end will be read_size + (img_cur % PAGE_SIZE)
1863 			 * from the start of the first page.  Round that
1864 			 * up to the next page size.
1865 			 */
1866 			num_io_pages = (read_size + (img_cur % PAGE_SIZE)
1867 				+ PAGE_SIZE - 1) / PAGE_SIZE;
1868 
1869 			KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1);
1870 
1871 			/* Map pages for this read */
1872 			for (j = 0; j < num_io_pages; j ++)
1873 				pmap_kenter_pa(tempva + j * PAGE_SIZE,
1874 				    img_cur + j * PAGE_SIZE,
1875 				    PROT_READ | PROT_WRITE);
1876 
1877 			pmap_update(pmap_kernel());
1878 
1879 			hibernate_block_io(hib, blkctr, read_size,
1880 			    tempva + (img_cur & PAGE_MASK), 0);
1881 
1882 			blkctr += (read_size / DEV_BSIZE);
1883 
1884 			pmap_kremove(tempva, num_io_pages * PAGE_SIZE);
1885 			pmap_update(pmap_kernel());
1886 
1887 			processed += read_size;
1888 			img_cur += read_size;
1889 		}
1890 	}
1891 
1892 	pmap_kremove(hibernate_fchunk_area, 24 * PAGE_SIZE);
1893 	pmap_update(pmap_kernel());
1894 
1895 	return (0);
1896 }
1897 
1898 /*
1899  * Hibernating a machine comprises the following operations:
1900  *  1. Calculating this machine's hibernate_info information
1901  *  2. Allocating a piglet and saving the piglet's physaddr
1902  *  3. Calculating the memory chunks
1903  *  4. Writing the compressed chunks to disk
1904  *  5. Writing the chunk table
1905  *  6. Writing the signature block (hibernate_info)
1906  *
1907  * On most architectures, the function calling hibernate_suspend would
1908  * then power off the machine using some MD-specific implementation.
1909  */
1910 int
1911 hibernate_suspend(void)
1912 {
1913 	union hibernate_info hib;
1914 	u_long start, end;
1915 
1916 	/*
1917 	 * Calculate memory ranges, swap offsets, etc.
1918 	 * This also allocates a piglet whose physaddr is stored in
1919 	 * hib->piglet_pa and vaddr stored in hib->piglet_va
1920 	 */
1921 	if (get_hibernate_info(&hib, 1)) {
1922 		DPRINTF("failed to obtain hibernate info\n");
1923 		return (1);
1924 	}
1925 
1926 	/* Find a page-addressed region in swap [start,end] */
1927 	if (uvm_hibswap(hib.dev, &start, &end)) {
1928 		printf("hibernate: cannot find any swap\n");
1929 		return (1);
1930 	}
1931 
1932 	if (end - start < 1000) {
1933 		printf("hibernate: insufficient swap (%lu is too small)\n",
1934 			end - start);
1935 		return (1);
1936 	}
1937 
1938 	/* Calculate block offsets in swap */
1939 	hib.image_offset = ctod(start);
1940 
1941 	DPRINTF("hibernate @ block %lld max-length %lu blocks\n",
1942 	    hib.image_offset, ctod(end) - ctod(start));
1943 
1944 	pmap_activate(curproc);
1945 	DPRINTF("hibernate: writing chunks\n");
1946 	if (hibernate_write_chunks(&hib)) {
1947 		DPRINTF("hibernate_write_chunks failed\n");
1948 		return (1);
1949 	}
1950 
1951 	DPRINTF("hibernate: writing chunktable\n");
1952 	if (hibernate_write_chunktable(&hib)) {
1953 		DPRINTF("hibernate_write_chunktable failed\n");
1954 		return (1);
1955 	}
1956 
1957 	DPRINTF("hibernate: writing signature\n");
1958 	if (hibernate_write_signature(&hib)) {
1959 		DPRINTF("hibernate_write_signature failed\n");
1960 		return (1);
1961 	}
1962 
1963 	/* Allow the disk to settle */
1964 	delay(500000);
1965 
1966 	/*
1967 	 * Give the device-specific I/O function a notification that we're
1968 	 * done, and that it can clean up or shutdown as needed.
1969 	 */
1970 	hib.io_func(hib.dev, 0, (vaddr_t)NULL, 0, HIB_DONE, hib.io_page);
1971 	return (0);
1972 }
1973 
1974 int
1975 hibernate_alloc(void)
1976 {
1977 	KASSERT(global_piglet_va == 0);
1978 	KASSERT(hibernate_temp_page == 0);
1979 
1980 	pmap_activate(curproc);
1981 	pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1982 	    PROT_READ | PROT_WRITE);
1983 
1984 	/* Allocate a piglet, store its addresses in the supplied globals */
1985 	if (uvm_pmr_alloc_piglet(&global_piglet_va, &global_piglet_pa,
1986 	    HIBERNATE_CHUNK_SIZE * 4, HIBERNATE_CHUNK_SIZE))
1987 		goto unmap;
1988 
1989 	/*
1990 	 * Allocate VA for the temp page.
1991 	 *
1992 	 * This will become part of the suspended kernel and will
1993 	 * be freed in hibernate_free, upon resume (or hibernate
1994 	 * failure)
1995 	 */
1996 	hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
1997 	    &kp_none, &kd_nowait);
1998 	if (!hibernate_temp_page) {
1999 		uvm_pmr_free_piglet(global_piglet_va,
2000 		    4 * HIBERNATE_CHUNK_SIZE);
2001 		global_piglet_va = 0;
2002 		goto unmap;
2003 	}
2004 	return (0);
2005 unmap:
2006 	pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE);
2007 	pmap_update(pmap_kernel());
2008 	return (ENOMEM);
2009 }
2010 
2011 /*
2012  * Free items allocated by hibernate_alloc()
2013  */
2014 void
2015 hibernate_free(void)
2016 {
2017 	pmap_activate(curproc);
2018 
2019 	if (global_piglet_va)
2020 		uvm_pmr_free_piglet(global_piglet_va,
2021 		    4 * HIBERNATE_CHUNK_SIZE);
2022 
2023 	if (hibernate_temp_page) {
2024 		pmap_kremove(hibernate_temp_page, PAGE_SIZE);
2025 		km_free((void *)hibernate_temp_page, PAGE_SIZE,
2026 		    &kv_any, &kp_none);
2027 	}
2028 
2029 	global_piglet_va = 0;
2030 	hibernate_temp_page = 0;
2031 	pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE);
2032 	pmap_update(pmap_kernel());
2033 }
2034