xref: /openbsd-src/sys/kern/subr_hibernate.c (revision fc6d48fd3b921041134581a7d748516fe2a9e57f)
1 /*	$OpenBSD: subr_hibernate.c,v 1.140 2024/06/04 20:31:35 krw Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/hibernate.h>
21 #include <sys/malloc.h>
22 #include <sys/param.h>
23 #include <sys/tree.h>
24 #include <sys/systm.h>
25 #include <sys/disklabel.h>
26 #include <sys/disk.h>
27 #include <sys/conf.h>
28 #include <sys/buf.h>
29 #include <sys/fcntl.h>
30 #include <sys/stat.h>
31 #include <sys/atomic.h>
32 
33 #include <uvm/uvm.h>
34 #include <uvm/uvm_swap.h>
35 
36 #include <machine/hibernate.h>
37 
38 /* Make sure the signature can fit in one block */
39 CTASSERT((offsetof(union hibernate_info, sec_size) + sizeof(u_int32_t)) <= DEV_BSIZE);
40 
41 /*
42  * Hibernate piglet layout information
43  *
44  * The piglet is a scratch area of memory allocated by the suspending kernel.
45  * Its phys and virt addrs are recorded in the signature block. The piglet is
46  * used to guarantee an unused area of memory that can be used by the resuming
47  * kernel for various things. The piglet is excluded during unpack operations.
48  * The piglet size is presently 4*HIBERNATE_CHUNK_SIZE (typically 4*4MB).
49  *
50  * Offset from piglet_base	Purpose
51  * ----------------------------------------------------------------------------
52  * 0				Private page for suspend I/O write functions
53  * 1*PAGE_SIZE			I/O page used during hibernate suspend
54  * 2*PAGE_SIZE			I/O page used during hibernate suspend
55  * 3*PAGE_SIZE			copy page used during hibernate suspend
56  * 4*PAGE_SIZE			final chunk ordering list (24 pages)
57  * 28*PAGE_SIZE			RLE utility page
58  * 29*PAGE_SIZE			start of hiballoc area
59  * 30*PAGE_SIZE			preserved entropy
60  * 110*PAGE_SIZE		end of hiballoc area (80 pages)
61  * 366*PAGE_SIZE		end of retguard preservation region (256 pages)
62  * ...				unused
63  * HIBERNATE_CHUNK_SIZE		start of hibernate chunk table
64  * 2*HIBERNATE_CHUNK_SIZE	bounce area for chunks being unpacked
65  * 4*HIBERNATE_CHUNK_SIZE	end of piglet
66  */
67 
68 /* Temporary vaddr ranges used during hibernate */
69 vaddr_t hibernate_temp_page;
70 vaddr_t hibernate_copy_page;
71 vaddr_t hibernate_rle_page;
72 
73 /* Hibernate info as read from disk during resume */
74 union hibernate_info disk_hib;
75 
76 /*
77  * Global copy of the pig start address. This needs to be a global as we
78  * switch stacks after computing it - it can't be stored on the stack.
79  */
80 paddr_t global_pig_start;
81 
82 /*
83  * Global copies of the piglet start addresses (PA/VA). We store these
84  * as globals to avoid having to carry them around as parameters, as the
85  * piglet is allocated early and freed late - its lifecycle extends beyond
86  * that of the hibernate info union which is calculated on suspend/resume.
87  */
88 vaddr_t global_piglet_va;
89 paddr_t global_piglet_pa;
90 
91 /* #define HIB_DEBUG */
92 #ifdef HIB_DEBUG
93 int	hib_debug = 99;
94 #define DPRINTF(x...)     do { if (hib_debug) printf(x); } while (0)
95 #define DNPRINTF(n,x...)  do { if (hib_debug > (n)) printf(x); } while (0)
96 #else
97 #define DPRINTF(x...)
98 #define DNPRINTF(n,x...)
99 #endif
100 
101 #define	ROUNDUP(_x, _y)	((((_x)+(_y)-1)/(_y))*(_y))
102 
103 #ifndef NO_PROPOLICE
104 extern long __guard_local;
105 #endif /* ! NO_PROPOLICE */
106 
107 /* Retguard phys address (need to skip this region during unpack) */
108 paddr_t retguard_start_phys, retguard_end_phys;
109 extern char __retguard_start, __retguard_end;
110 
111 void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t);
112 int hibernate_calc_rle(paddr_t, paddr_t);
113 int hibernate_write_rle(union hibernate_info *, paddr_t, paddr_t, daddr_t *,
114 	size_t *);
115 
116 #define MAX_RLE (HIBERNATE_CHUNK_SIZE / PAGE_SIZE)
117 
118 /*
119  * Hib alloc enforced alignment.
120  */
121 #define HIB_ALIGN		8 /* bytes alignment */
122 
123 /*
124  * sizeof builtin operation, but with alignment constraint.
125  */
126 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
127 
128 struct hiballoc_entry {
129 	size_t			hibe_use;
130 	size_t			hibe_space;
131 	RBT_ENTRY(hiballoc_entry) hibe_entry;
132 };
133 
134 /*
135  * Sort hibernate memory ranges by ascending PA
136  */
137 void
138 hibernate_sort_ranges(union hibernate_info *hib_info)
139 {
140 	int i, j;
141 	struct hibernate_memory_range *ranges;
142 	paddr_t base, end;
143 
144 	ranges = hib_info->ranges;
145 
146 	for (i = 1; i < hib_info->nranges; i++) {
147 		j = i;
148 		while (j > 0 && ranges[j - 1].base > ranges[j].base) {
149 			base = ranges[j].base;
150 			end = ranges[j].end;
151 			ranges[j].base = ranges[j - 1].base;
152 			ranges[j].end = ranges[j - 1].end;
153 			ranges[j - 1].base = base;
154 			ranges[j - 1].end = end;
155 			j--;
156 		}
157 	}
158 }
159 
160 /*
161  * Compare hiballoc entries based on the address they manage.
162  *
163  * Since the address is fixed, relative to struct hiballoc_entry,
164  * we just compare the hiballoc_entry pointers.
165  */
166 static __inline int
167 hibe_cmp(const struct hiballoc_entry *l, const struct hiballoc_entry *r)
168 {
169 	vaddr_t vl = (vaddr_t)l;
170 	vaddr_t vr = (vaddr_t)r;
171 
172 	return vl < vr ? -1 : (vl > vr);
173 }
174 
175 RBT_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
176 
177 /*
178  * Given a hiballoc entry, return the address it manages.
179  */
180 static __inline void *
181 hib_entry_to_addr(struct hiballoc_entry *entry)
182 {
183 	caddr_t addr;
184 
185 	addr = (caddr_t)entry;
186 	addr += HIB_SIZEOF(struct hiballoc_entry);
187 	return addr;
188 }
189 
190 /*
191  * Given an address, find the hiballoc that corresponds.
192  */
193 static __inline struct hiballoc_entry*
194 hib_addr_to_entry(void *addr_param)
195 {
196 	caddr_t addr;
197 
198 	addr = (caddr_t)addr_param;
199 	addr -= HIB_SIZEOF(struct hiballoc_entry);
200 	return (struct hiballoc_entry*)addr;
201 }
202 
203 RBT_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp);
204 
205 /*
206  * Allocate memory from the arena.
207  *
208  * Returns NULL if no memory is available.
209  */
210 void *
211 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
212 {
213 	struct hiballoc_entry *entry, *new_entry;
214 	size_t find_sz;
215 
216 	/*
217 	 * Enforce alignment of HIB_ALIGN bytes.
218 	 *
219 	 * Note that, because the entry is put in front of the allocation,
220 	 * 0-byte allocations are guaranteed a unique address.
221 	 */
222 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
223 
224 	/*
225 	 * Find an entry with hibe_space >= find_sz.
226 	 *
227 	 * If the root node is not large enough, we switch to tree traversal.
228 	 * Because all entries are made at the bottom of the free space,
229 	 * traversal from the end has a slightly better chance of yielding
230 	 * a sufficiently large space.
231 	 */
232 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
233 	entry = RBT_ROOT(hiballoc_addr, &arena->hib_addrs);
234 	if (entry != NULL && entry->hibe_space < find_sz) {
235 		RBT_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
236 			if (entry->hibe_space >= find_sz)
237 				break;
238 		}
239 	}
240 
241 	/*
242 	 * Insufficient or too fragmented memory.
243 	 */
244 	if (entry == NULL)
245 		return NULL;
246 
247 	/*
248 	 * Create new entry in allocated space.
249 	 */
250 	new_entry = (struct hiballoc_entry*)(
251 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
252 	new_entry->hibe_space = entry->hibe_space - find_sz;
253 	new_entry->hibe_use = alloc_sz;
254 
255 	/*
256 	 * Insert entry.
257 	 */
258 	if (RBT_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
259 		panic("hib_alloc: insert failure");
260 	entry->hibe_space = 0;
261 
262 	/* Return address managed by entry. */
263 	return hib_entry_to_addr(new_entry);
264 }
265 
266 void
267 hib_getentropy(char **bufp, size_t *bufplen)
268 {
269 	if (!bufp || !bufplen)
270 		return;
271 
272 	*bufp = (char *)(global_piglet_va + (29 * PAGE_SIZE));
273 	*bufplen = PAGE_SIZE;
274 }
275 
276 /*
277  * Free a pointer previously allocated from this arena.
278  *
279  * If addr is NULL, this will be silently accepted.
280  */
281 void
282 hib_free(struct hiballoc_arena *arena, void *addr)
283 {
284 	struct hiballoc_entry *entry, *prev;
285 
286 	if (addr == NULL)
287 		return;
288 
289 	/*
290 	 * Derive entry from addr and check it is really in this arena.
291 	 */
292 	entry = hib_addr_to_entry(addr);
293 	if (RBT_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
294 		panic("hib_free: freed item %p not in hib arena", addr);
295 
296 	/*
297 	 * Give the space in entry to its predecessor.
298 	 *
299 	 * If entry has no predecessor, change its used space into free space
300 	 * instead.
301 	 */
302 	prev = RBT_PREV(hiballoc_addr, entry);
303 	if (prev != NULL &&
304 	    (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
305 	    prev->hibe_use + prev->hibe_space) == entry) {
306 		/* Merge entry. */
307 		RBT_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
308 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
309 		    entry->hibe_use + entry->hibe_space;
310 	} else {
311 		/* Flip used memory to free space. */
312 		entry->hibe_space += entry->hibe_use;
313 		entry->hibe_use = 0;
314 	}
315 }
316 
317 /*
318  * Initialize hiballoc.
319  *
320  * The allocator will manage memory at ptr, which is len bytes.
321  */
322 int
323 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
324 {
325 	struct hiballoc_entry *entry;
326 	caddr_t ptr;
327 	size_t len;
328 
329 	RBT_INIT(hiballoc_addr, &arena->hib_addrs);
330 
331 	/*
332 	 * Hib allocator enforces HIB_ALIGN alignment.
333 	 * Fixup ptr and len.
334 	 */
335 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
336 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
337 	len &= ~((size_t)HIB_ALIGN - 1);
338 
339 	/*
340 	 * Insufficient memory to be able to allocate and also do bookkeeping.
341 	 */
342 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
343 		return ENOMEM;
344 
345 	/*
346 	 * Create entry describing space.
347 	 */
348 	entry = (struct hiballoc_entry*)ptr;
349 	entry->hibe_use = 0;
350 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
351 	RBT_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
352 
353 	return 0;
354 }
355 
356 /*
357  * Zero all free memory.
358  */
359 void
360 uvm_pmr_zero_everything(void)
361 {
362 	struct uvm_pmemrange	*pmr;
363 	struct vm_page		*pg;
364 	int			 i;
365 
366 	uvm_lock_fpageq();
367 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
368 		/* Zero single pages. */
369 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
370 		    != NULL) {
371 			uvm_pmr_remove(pmr, pg);
372 			uvm_pagezero(pg);
373 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
374 			uvmexp.zeropages++;
375 			uvm_pmr_insert(pmr, pg, 0);
376 		}
377 
378 		/* Zero multi page ranges. */
379 		while ((pg = RBT_ROOT(uvm_pmr_size,
380 		    &pmr->size[UVM_PMR_MEMTYPE_DIRTY])) != NULL) {
381 			pg--; /* Size tree always has second page. */
382 			uvm_pmr_remove(pmr, pg);
383 			for (i = 0; i < pg->fpgsz; i++) {
384 				uvm_pagezero(&pg[i]);
385 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
386 				uvmexp.zeropages++;
387 			}
388 			uvm_pmr_insert(pmr, pg, 0);
389 		}
390 	}
391 	uvm_unlock_fpageq();
392 }
393 
394 /*
395  * Mark all memory as dirty.
396  *
397  * Used to inform the system that the clean memory isn't clean for some
398  * reason, for example because we just came back from hibernate.
399  */
400 void
401 uvm_pmr_dirty_everything(void)
402 {
403 	struct uvm_pmemrange	*pmr;
404 	struct vm_page		*pg;
405 	int			 i;
406 
407 	uvm_lock_fpageq();
408 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
409 		/* Dirty single pages. */
410 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
411 		    != NULL) {
412 			uvm_pmr_remove(pmr, pg);
413 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
414 			uvm_pmr_insert(pmr, pg, 0);
415 		}
416 
417 		/* Dirty multi page ranges. */
418 		while ((pg = RBT_ROOT(uvm_pmr_size,
419 		    &pmr->size[UVM_PMR_MEMTYPE_ZERO])) != NULL) {
420 			pg--; /* Size tree always has second page. */
421 			uvm_pmr_remove(pmr, pg);
422 			for (i = 0; i < pg->fpgsz; i++)
423 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
424 			uvm_pmr_insert(pmr, pg, 0);
425 		}
426 	}
427 
428 	uvmexp.zeropages = 0;
429 	uvm_unlock_fpageq();
430 }
431 
432 /*
433  * Allocate an area that can hold sz bytes and doesn't overlap with
434  * the piglet at piglet_pa.
435  */
436 int
437 uvm_pmr_alloc_pig(paddr_t *pa, psize_t sz, paddr_t piglet_pa)
438 {
439 	struct uvm_constraint_range pig_constraint;
440 	struct kmem_pa_mode kp_pig = {
441 		.kp_constraint = &pig_constraint,
442 		.kp_maxseg = 1
443 	};
444 	vaddr_t va;
445 
446 	sz = round_page(sz);
447 
448 	pig_constraint.ucr_low = piglet_pa + 4 * HIBERNATE_CHUNK_SIZE;
449 	pig_constraint.ucr_high = -1;
450 
451 	va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait);
452 	if (va == 0) {
453 		pig_constraint.ucr_low = 0;
454 		pig_constraint.ucr_high = piglet_pa - 1;
455 
456 		va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait);
457 		if (va == 0)
458 			return ENOMEM;
459 	}
460 
461 	pmap_extract(pmap_kernel(), va, pa);
462 	return 0;
463 }
464 
465 /*
466  * Allocate a piglet area.
467  *
468  * This needs to be in DMA-safe memory.
469  * Piglets are aligned.
470  *
471  * sz and align in bytes.
472  *
473  * The call will sleep for the pagedaemon to attempt to free memory.
474  * The pagedaemon may decide its not possible to free enough memory, causing
475  * the allocation to fail.
476  */
477 int
478 uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align)
479 {
480 	struct kmem_pa_mode kp_piglet = {
481 		.kp_constraint = &dma_constraint,
482 		.kp_align = align,
483 		.kp_maxseg = 1
484 	};
485 
486 	/* Ensure align is a power of 2 */
487 	KASSERT((align & (align - 1)) == 0);
488 
489 	/*
490 	 * Fixup arguments: align must be at least PAGE_SIZE,
491 	 * sz will be converted to pagecount, since that is what
492 	 * pmemrange uses internally.
493 	 */
494 	if (align < PAGE_SIZE)
495 		kp_piglet.kp_align = PAGE_SIZE;
496 
497 	sz = round_page(sz);
498 
499 	*va = (vaddr_t)km_alloc(sz, &kv_any, &kp_piglet, &kd_nowait);
500 	if (*va == 0)
501 		return ENOMEM;
502 
503 	pmap_extract(pmap_kernel(), *va, pa);
504 	return 0;
505 }
506 
507 /*
508  * Free a piglet area.
509  */
510 void
511 uvm_pmr_free_piglet(vaddr_t va, vsize_t sz)
512 {
513 	/*
514 	 * Fix parameters.
515 	 */
516 	sz = round_page(sz);
517 
518 	/*
519 	 * Free the physical and virtual memory.
520 	 */
521 	km_free((void *)va, sz, &kv_any, &kp_dma_contig);
522 }
523 
524 /*
525  * Physmem RLE compression support.
526  *
527  * Given a physical page address, return the number of pages starting at the
528  * address that are free.  Clamps to the number of pages in
529  * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free.
530  */
531 int
532 uvm_page_rle(paddr_t addr)
533 {
534 	struct vm_page		*pg, *pg_end;
535 	struct vm_physseg	*vmp;
536 	int			 pseg_idx, off_idx;
537 
538 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
539 	if (pseg_idx == -1)
540 		return 0;
541 
542 	vmp = &vm_physmem[pseg_idx];
543 	pg = &vmp->pgs[off_idx];
544 	if (!(pg->pg_flags & PQ_FREE))
545 		return 0;
546 
547 	/*
548 	 * Search for the first non-free page after pg.
549 	 * Note that the page may not be the first page in a free pmemrange,
550 	 * therefore pg->fpgsz cannot be used.
551 	 */
552 	for (pg_end = pg; pg_end <= vmp->lastpg &&
553 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE &&
554 	    (pg_end - pg) < HIBERNATE_CHUNK_SIZE/PAGE_SIZE; pg_end++)
555 		;
556 	return pg_end - pg;
557 }
558 
559 /*
560  * Fills out the hibernate_info union pointed to by hib
561  * with information about this machine (swap signature block
562  * offsets, number of memory ranges, kernel in use, etc)
563  */
564 int
565 get_hibernate_info(union hibernate_info *hib, int suspend)
566 {
567 	struct disklabel dl;
568 	char err_string[128], *dl_ret;
569 	int part;
570 	SHA2_CTX ctx;
571 	void *fn;
572 
573 #ifndef NO_PROPOLICE
574 	/* Save propolice guard */
575 	hib->guard = __guard_local;
576 #endif /* ! NO_PROPOLICE */
577 
578 	/* Determine I/O function to use */
579 	hib->io_func = get_hibernate_io_function(swdevt[0].sw_dev);
580 	if (hib->io_func == NULL)
581 		return (1);
582 
583 	/* Calculate hibernate device */
584 	hib->dev = swdevt[0].sw_dev;
585 
586 	/* Read disklabel (used to calculate signature and image offsets) */
587 	dl_ret = disk_readlabel(&dl, hib->dev, err_string, sizeof(err_string));
588 
589 	if (dl_ret) {
590 		printf("Hibernate error reading disklabel: %s\n", dl_ret);
591 		return (1);
592 	}
593 
594 	/* Make sure we have a swap partition. */
595 	part = DISKPART(hib->dev);
596 	if (dl.d_npartitions <= part ||
597 	    dl.d_secsize > sizeof(union hibernate_info) ||
598 	    dl.d_partitions[part].p_fstype != FS_SWAP ||
599 	    DL_GETPSIZE(&dl.d_partitions[part]) == 0)
600 		return (1);
601 
602 	/* Magic number */
603 	hib->magic = HIBERNATE_MAGIC;
604 
605 	/* Calculate signature block location */
606 	hib->sec_size = dl.d_secsize;
607 	hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[part]) - 1;
608 	hib->sig_offset = DL_SECTOBLK(&dl, hib->sig_offset);
609 
610 	SHA256Init(&ctx);
611 	SHA256Update(&ctx, version, strlen(version));
612 	fn = printf;
613 	SHA256Update(&ctx, &fn, sizeof(fn));
614 	fn = malloc;
615 	SHA256Update(&ctx, &fn, sizeof(fn));
616 	fn = km_alloc;
617 	SHA256Update(&ctx, &fn, sizeof(fn));
618 	fn = strlen;
619 	SHA256Update(&ctx, &fn, sizeof(fn));
620 	SHA256Final((u_int8_t *)&hib->kern_hash, &ctx);
621 
622 	if (suspend) {
623 		/* Grab the previously-allocated piglet addresses */
624 		hib->piglet_va = global_piglet_va;
625 		hib->piglet_pa = global_piglet_pa;
626 		hib->io_page = (void *)hib->piglet_va;
627 
628 		/*
629 		 * Initialization of the hibernate IO function for drivers
630 		 * that need to do prep work (such as allocating memory or
631 		 * setting up data structures that cannot safely be done
632 		 * during suspend without causing side effects). There is
633 		 * a matching HIB_DONE call performed after the write is
634 		 * completed.
635 		 */
636 		if (hib->io_func(hib->dev,
637 		    DL_SECTOBLK(&dl, DL_GETPOFFSET(&dl.d_partitions[part])),
638 		    (vaddr_t)NULL,
639 		    DL_SECTOBLK(&dl, DL_GETPSIZE(&dl.d_partitions[part])),
640 		    HIB_INIT, hib->io_page))
641 			goto fail;
642 
643 	} else {
644 		/*
645 		 * Resuming kernels use a regular private page for the driver
646 		 * No need to free this I/O page as it will vanish as part of
647 		 * the resume.
648 		 */
649 		hib->io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
650 		if (!hib->io_page)
651 			goto fail;
652 	}
653 
654 	if (get_hibernate_info_md(hib))
655 		goto fail;
656 
657 	return (0);
658 
659 fail:
660 	return (1);
661 }
662 
663 /*
664  * Allocate nitems*size bytes from the hiballoc area presently in use
665  */
666 void *
667 hibernate_zlib_alloc(void *unused, int nitems, int size)
668 {
669 	struct hibernate_zlib_state *hibernate_state;
670 
671 	hibernate_state =
672 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
673 
674 	return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size);
675 }
676 
677 /*
678  * Free the memory pointed to by addr in the hiballoc area presently in
679  * use
680  */
681 void
682 hibernate_zlib_free(void *unused, void *addr)
683 {
684 	struct hibernate_zlib_state *hibernate_state;
685 
686 	hibernate_state =
687 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
688 
689 	hib_free(&hibernate_state->hiballoc_arena, addr);
690 }
691 
692 /*
693  * Inflate next page of data from the image stream.
694  * The rle parameter is modified on exit to contain the number of pages to
695  * skip in the output stream (or 0 if this page was inflated into).
696  *
697  * Returns 0 if the stream contains additional data, or 1 if the stream is
698  * finished.
699  */
700 int
701 hibernate_inflate_page(int *rle)
702 {
703 	struct hibernate_zlib_state *hibernate_state;
704 	int i;
705 
706 	hibernate_state =
707 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
708 
709 	/* Set up the stream for RLE code inflate */
710 	hibernate_state->hib_stream.next_out = (unsigned char *)rle;
711 	hibernate_state->hib_stream.avail_out = sizeof(*rle);
712 
713 	/* Inflate RLE code */
714 	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
715 	if (i != Z_OK && i != Z_STREAM_END) {
716 		/*
717 		 * XXX - this will likely reboot/hang most machines
718 		 *       since the console output buffer will be unmapped,
719 		 *       but there's not much else we can do here.
720 		 */
721 		panic("rle inflate stream error");
722 	}
723 
724 	if (hibernate_state->hib_stream.avail_out != 0) {
725 		/*
726 		 * XXX - this will likely reboot/hang most machines
727 		 *       since the console output buffer will be unmapped,
728 		 *       but there's not much else we can do here.
729 		 */
730 		panic("rle short inflate error");
731 	}
732 
733 	if (*rle < 0 || *rle > 1024) {
734 		/*
735 		 * XXX - this will likely reboot/hang most machines
736 		 *       since the console output buffer will be unmapped,
737 		 *       but there's not much else we can do here.
738 		 */
739 		panic("invalid rle count");
740 	}
741 
742 	if (i == Z_STREAM_END)
743 		return (1);
744 
745 	if (*rle != 0)
746 		return (0);
747 
748 	/* Set up the stream for page inflate */
749 	hibernate_state->hib_stream.next_out =
750 		(unsigned char *)HIBERNATE_INFLATE_PAGE;
751 	hibernate_state->hib_stream.avail_out = PAGE_SIZE;
752 
753 	/* Process next block of data */
754 	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
755 	if (i != Z_OK && i != Z_STREAM_END) {
756 		/*
757 		 * XXX - this will likely reboot/hang most machines
758 		 *       since the console output buffer will be unmapped,
759 		 *       but there's not much else we can do here.
760 		 */
761 		panic("inflate error");
762 	}
763 
764 	/* We should always have extracted a full page ... */
765 	if (hibernate_state->hib_stream.avail_out != 0) {
766 		/*
767 		 * XXX - this will likely reboot/hang most machines
768 		 *       since the console output buffer will be unmapped,
769 		 *       but there's not much else we can do here.
770 		 */
771 		panic("incomplete page");
772 	}
773 
774 	return (i == Z_STREAM_END);
775 }
776 
777 /*
778  * Inflate size bytes from src into dest, skipping any pages in
779  * [src..dest] that are special (see hibernate_inflate_skip)
780  *
781  * This function executes while using the resume-time stack
782  * and pmap, and therefore cannot use ddb/printf/etc. Doing so
783  * will likely hang or reset the machine since the console output buffer
784  * will be unmapped.
785  */
786 void
787 hibernate_inflate_region(union hibernate_info *hib, paddr_t dest,
788     paddr_t src, size_t size)
789 {
790 	int end_stream = 0, rle, skip;
791 	struct hibernate_zlib_state *hibernate_state;
792 
793 	hibernate_state =
794 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
795 
796 	hibernate_state->hib_stream.next_in = (unsigned char *)src;
797 	hibernate_state->hib_stream.avail_in = size;
798 
799 	do {
800 		/*
801 		 * Is this a special page? If yes, redirect the
802 		 * inflate output to a scratch page (eg, discard it)
803 		 */
804 		skip = hibernate_inflate_skip(hib, dest);
805 		if (skip == HIB_SKIP) {
806 			hibernate_enter_resume_mapping(
807 			    HIBERNATE_INFLATE_PAGE,
808 			    HIBERNATE_INFLATE_PAGE, 0);
809 		} else if (skip == HIB_MOVE) {
810 			/*
811 			 * Special case : retguard region. This gets moved
812 			 * temporarily into the piglet region and copied into
813 			 * place immediately before resume
814 			 */
815 			hibernate_enter_resume_mapping(
816 			    HIBERNATE_INFLATE_PAGE,
817 			    hib->piglet_pa + (110 * PAGE_SIZE) +
818 			    hib->retguard_ofs, 0);
819 			hib->retguard_ofs += PAGE_SIZE;
820 			if (hib->retguard_ofs > 255 * PAGE_SIZE) {
821 				/*
822 				 * XXX - this will likely reboot/hang most
823 				 *       machines since the console output
824 				 *       buffer will be unmapped, but there's
825 				 *       not much else we can do here.
826 				 */
827 				panic("retguard move error, out of space");
828 			}
829 		} else {
830 			hibernate_enter_resume_mapping(
831 			    HIBERNATE_INFLATE_PAGE, dest, 0);
832 		}
833 
834 		hibernate_flush();
835 		end_stream = hibernate_inflate_page(&rle);
836 
837 		if (rle == 0)
838 			dest += PAGE_SIZE;
839 		else
840 			dest += (rle * PAGE_SIZE);
841 	} while (!end_stream);
842 }
843 
844 /*
845  * deflate from src into the I/O page, up to 'remaining' bytes
846  *
847  * Returns number of input bytes consumed, and may reset
848  * the 'remaining' parameter if not all the output space was consumed
849  * (this information is needed to know how much to write to disk)
850  */
851 size_t
852 hibernate_deflate(union hibernate_info *hib, paddr_t src,
853     size_t *remaining)
854 {
855 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
856 	struct hibernate_zlib_state *hibernate_state;
857 
858 	hibernate_state =
859 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
860 
861 	/* Set up the stream for deflate */
862 	hibernate_state->hib_stream.next_in = (unsigned char *)src;
863 	hibernate_state->hib_stream.avail_in = PAGE_SIZE - (src & PAGE_MASK);
864 	hibernate_state->hib_stream.next_out =
865 		(unsigned char *)hibernate_io_page + (PAGE_SIZE - *remaining);
866 	hibernate_state->hib_stream.avail_out = *remaining;
867 
868 	/* Process next block of data */
869 	if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH) != Z_OK)
870 		panic("hibernate zlib deflate error");
871 
872 	/* Update pointers and return number of bytes consumed */
873 	*remaining = hibernate_state->hib_stream.avail_out;
874 	return (PAGE_SIZE - (src & PAGE_MASK)) -
875 	    hibernate_state->hib_stream.avail_in;
876 }
877 
878 /*
879  * Write the hibernation information specified in hiber_info
880  * to the location in swap previously calculated (last block of
881  * swap), called the "signature block".
882  */
883 int
884 hibernate_write_signature(union hibernate_info *hib)
885 {
886 	memset(&disk_hib, 0, hib->sec_size);
887 	memcpy(&disk_hib, hib, DEV_BSIZE);
888 
889 	/* Write hibernate info to disk */
890 	return (hib->io_func(hib->dev, hib->sig_offset,
891 	    (vaddr_t)&disk_hib, hib->sec_size, HIB_W,
892 	    hib->io_page));
893 }
894 
895 /*
896  * Write the memory chunk table to the area in swap immediately
897  * preceding the signature block. The chunk table is stored
898  * in the piglet when this function is called.  Returns errno.
899  */
900 int
901 hibernate_write_chunktable(union hibernate_info *hib)
902 {
903 	vaddr_t hibernate_chunk_table_start;
904 	size_t hibernate_chunk_table_size;
905 	int i, err;
906 
907 	hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE;
908 
909 	hibernate_chunk_table_start = hib->piglet_va +
910 	    HIBERNATE_CHUNK_SIZE;
911 
912 	/* Write chunk table */
913 	for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS) {
914 		if ((err = hib->io_func(hib->dev,
915 		    hib->chunktable_offset + (i/DEV_BSIZE),
916 		    (vaddr_t)(hibernate_chunk_table_start + i),
917 		    MAXPHYS, HIB_W, hib->io_page))) {
918 			DPRINTF("chunktable write error: %d\n", err);
919 			return (err);
920 		}
921 	}
922 
923 	return (0);
924 }
925 
926 /*
927  * Write an empty hiber_info to the swap signature block, which is
928  * guaranteed to not match any valid hib.
929  */
930 int
931 hibernate_clear_signature(union hibernate_info *hib)
932 {
933 	uint8_t buf[DEV_BSIZE];
934 
935 	/* Zero out a blank hiber_info */
936 	memcpy(&buf, &disk_hib, sizeof(buf));
937 	memset(&disk_hib, 0, hib->sec_size);
938 
939 	/* Write (zeroed) hibernate info to disk */
940 	DPRINTF("clearing hibernate signature block location: %lld\n",
941 		hib->sig_offset);
942 	if (hibernate_block_io(hib,
943 	    hib->sig_offset,
944 	    hib->sec_size, (vaddr_t)&disk_hib, 1))
945 		printf("Warning: could not clear hibernate signature\n");
946 
947 	memcpy(&disk_hib, buf, sizeof(buf));
948 	return (0);
949 }
950 
951 /*
952  * Compare two hibernate_infos to determine if they are the same (eg,
953  * we should be performing a hibernate resume on this machine.
954  * Not all fields are checked - just enough to verify that the machine
955  * has the same memory configuration and kernel as the one that
956  * wrote the signature previously.
957  */
958 int
959 hibernate_compare_signature(union hibernate_info *mine,
960     union hibernate_info *disk)
961 {
962 	u_int i;
963 
964 	if (mine->nranges != disk->nranges) {
965 		printf("unhibernate failed: memory layout changed\n");
966 		return (1);
967 	}
968 
969 	if (bcmp(mine->kern_hash, disk->kern_hash, SHA256_DIGEST_LENGTH) != 0) {
970 		printf("unhibernate failed: original kernel changed\n");
971 		return (1);
972 	}
973 
974 	for (i = 0; i < mine->nranges; i++) {
975 		if ((mine->ranges[i].base != disk->ranges[i].base) ||
976 		    (mine->ranges[i].end != disk->ranges[i].end) ) {
977 			DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n",
978 				i,
979 				(void *)mine->ranges[i].base,
980 				(void *)mine->ranges[i].end,
981 				(void *)disk->ranges[i].base,
982 				(void *)disk->ranges[i].end);
983 			printf("unhibernate failed: memory size changed\n");
984 			return (1);
985 		}
986 	}
987 
988 	return (0);
989 }
990 
991 /*
992  * Transfers xfer_size bytes between the hibernate device specified in
993  * hib_info at offset blkctr and the vaddr specified at dest.
994  *
995  * Separate offsets and pages are used to handle misaligned reads (reads
996  * that span a page boundary).
997  *
998  * blkctr specifies a relative offset (relative to the start of swap),
999  * not an absolute disk offset
1000  *
1001  */
1002 int
1003 hibernate_block_io(union hibernate_info *hib, daddr_t blkctr,
1004     size_t xfer_size, vaddr_t dest, int iswrite)
1005 {
1006 	struct buf *bp;
1007 	struct bdevsw *bdsw;
1008 	int error;
1009 
1010 	bp = geteblk(xfer_size);
1011 	bdsw = &bdevsw[major(hib->dev)];
1012 
1013 	error = (*bdsw->d_open)(hib->dev, FREAD, S_IFCHR, curproc);
1014 	if (error) {
1015 		printf("hibernate_block_io open failed\n");
1016 		return (1);
1017 	}
1018 
1019 	if (iswrite)
1020 		bcopy((caddr_t)dest, bp->b_data, xfer_size);
1021 
1022 	bp->b_bcount = xfer_size;
1023 	bp->b_blkno = blkctr;
1024 	CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
1025 	SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW);
1026 	bp->b_dev = hib->dev;
1027 	(*bdsw->d_strategy)(bp);
1028 
1029 	error = biowait(bp);
1030 	if (error) {
1031 		printf("hib block_io biowait error %d blk %lld size %zu\n",
1032 			error, (long long)blkctr, xfer_size);
1033 		error = (*bdsw->d_close)(hib->dev, 0, S_IFCHR,
1034 		    curproc);
1035 		if (error)
1036 			printf("hibernate_block_io error close failed\n");
1037 		return (1);
1038 	}
1039 
1040 	error = (*bdsw->d_close)(hib->dev, FREAD, S_IFCHR, curproc);
1041 	if (error) {
1042 		printf("hibernate_block_io close failed\n");
1043 		return (1);
1044 	}
1045 
1046 	if (!iswrite)
1047 		bcopy(bp->b_data, (caddr_t)dest, xfer_size);
1048 
1049 	bp->b_flags |= B_INVAL;
1050 	brelse(bp);
1051 
1052 	return (0);
1053 }
1054 
1055 /*
1056  * Preserve one page worth of random data, generated from the resuming
1057  * kernel's arc4random. After resume, this preserved entropy can be used
1058  * to further improve the un-hibernated machine's entropy pool. This
1059  * random data is stored in the piglet, which is preserved across the
1060  * unpack operation, and is restored later in the resume process (see
1061  * hib_getentropy)
1062  */
1063 void
1064 hibernate_preserve_entropy(union hibernate_info *hib)
1065 {
1066 	void *entropy;
1067 
1068 	entropy = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
1069 
1070 	if (!entropy)
1071 		return;
1072 
1073 	pmap_activate(curproc);
1074 	pmap_kenter_pa((vaddr_t)entropy,
1075 	    (paddr_t)(hib->piglet_pa + (29 * PAGE_SIZE)),
1076 	    PROT_READ | PROT_WRITE);
1077 
1078 	arc4random_buf((void *)entropy, PAGE_SIZE);
1079 	pmap_kremove((vaddr_t)entropy, PAGE_SIZE);
1080 	km_free(entropy, PAGE_SIZE, &kv_any, &kp_none);
1081 }
1082 
1083 #ifndef NO_PROPOLICE
1084 vaddr_t
1085 hibernate_unprotect_ssp(void)
1086 {
1087 	struct kmem_dyn_mode kd_avoidalias;
1088 	vaddr_t va = trunc_page((vaddr_t)&__guard_local);
1089 	paddr_t pa;
1090 
1091 	pmap_extract(pmap_kernel(), va, &pa);
1092 
1093 	memset(&kd_avoidalias, 0, sizeof kd_avoidalias);
1094 	kd_avoidalias.kd_prefer = pa;
1095 	kd_avoidalias.kd_waitok = 1;
1096 	va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_avoidalias);
1097 	if (!va)
1098 		panic("hibernate_unprotect_ssp");
1099 
1100 	pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
1101 	pmap_update(pmap_kernel());
1102 
1103 	return va;
1104 }
1105 
1106 void
1107 hibernate_reprotect_ssp(vaddr_t va)
1108 {
1109 	pmap_kremove(va, PAGE_SIZE);
1110 	km_free((void *)va, PAGE_SIZE, &kv_any, &kp_none);
1111 }
1112 #endif /* NO_PROPOLICE */
1113 
1114 /*
1115  * Reads the signature block from swap, checks against the current machine's
1116  * information. If the information matches, perform a resume by reading the
1117  * saved image into the pig area, and unpacking.
1118  *
1119  * Must be called with interrupts enabled.
1120  */
1121 void
1122 hibernate_resume(void)
1123 {
1124 	uint8_t buf[DEV_BSIZE];
1125 	union hibernate_info *hib = (union hibernate_info *)&buf;
1126 	int s;
1127 #ifndef NO_PROPOLICE
1128 	vsize_t off = (vaddr_t)&__guard_local -
1129 	    trunc_page((vaddr_t)&__guard_local);
1130 	vaddr_t guard_va;
1131 #endif
1132 
1133 	/* Get current running machine's hibernate info */
1134 	memset(buf, 0, sizeof(buf));
1135 	if (get_hibernate_info(hib, 0)) {
1136 		DPRINTF("couldn't retrieve machine's hibernate info\n");
1137 		return;
1138 	}
1139 
1140 	/* Read hibernate info from disk */
1141 	s = splbio();
1142 
1143 	DPRINTF("reading hibernate signature block location: %lld\n",
1144 		hib->sig_offset);
1145 
1146 	if (hibernate_block_io(hib,
1147 	    hib->sig_offset,
1148 	    hib->sec_size, (vaddr_t)&disk_hib, 0)) {
1149 		DPRINTF("error in hibernate read\n");
1150 		splx(s);
1151 		return;
1152 	}
1153 
1154 	/* Check magic number */
1155 	if (disk_hib.magic != HIBERNATE_MAGIC) {
1156 		DPRINTF("wrong magic number in hibernate signature: %x\n",
1157 			disk_hib.magic);
1158 		splx(s);
1159 		return;
1160 	}
1161 
1162 	/*
1163 	 * We (possibly) found a hibernate signature. Clear signature first,
1164 	 * to prevent accidental resume or endless resume cycles later.
1165 	 */
1166 	if (hibernate_clear_signature(hib)) {
1167 		DPRINTF("error clearing hibernate signature block\n");
1168 		splx(s);
1169 		return;
1170 	}
1171 
1172 	/*
1173 	 * If on-disk and in-memory hibernate signatures match,
1174 	 * this means we should do a resume from hibernate.
1175 	 */
1176 	if (hibernate_compare_signature(hib, &disk_hib)) {
1177 		DPRINTF("mismatched hibernate signature block\n");
1178 		splx(s);
1179 		return;
1180 	}
1181 	disk_hib.dev = hib->dev;
1182 
1183 #ifdef MULTIPROCESSOR
1184 	/* XXX - if we fail later, we may need to rehatch APs on some archs */
1185 	DPRINTF("hibernate: quiescing APs\n");
1186 	hibernate_quiesce_cpus();
1187 #endif /* MULTIPROCESSOR */
1188 
1189 	/* Read the image from disk into the image (pig) area */
1190 	if (hibernate_read_image(&disk_hib))
1191 		goto fail;
1192 
1193 	DPRINTF("hibernate: quiescing devices\n");
1194 	if (config_suspend_all(DVACT_QUIESCE) != 0)
1195 		goto fail;
1196 
1197 #ifndef NO_PROPOLICE
1198 	guard_va = hibernate_unprotect_ssp();
1199 #endif /* NO_PROPOLICE */
1200 
1201 	(void) splhigh();
1202 	hibernate_disable_intr_machdep();
1203 	cold = 2;
1204 
1205 	DPRINTF("hibernate: suspending devices\n");
1206 	if (config_suspend_all(DVACT_SUSPEND) != 0) {
1207 		cold = 0;
1208 		hibernate_enable_intr_machdep();
1209 #ifndef NO_PROPOLICE
1210 		hibernate_reprotect_ssp(guard_va);
1211 #endif /* ! NO_PROPOLICE */
1212 		goto fail;
1213 	}
1214 
1215 	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_start,
1216 	    &retguard_start_phys);
1217 	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_end,
1218 	    &retguard_end_phys);
1219 
1220 	hibernate_preserve_entropy(&disk_hib);
1221 
1222 	printf("Unpacking image...\n");
1223 
1224 	/* Switch stacks */
1225 	DPRINTF("hibernate: switching stacks\n");
1226 	hibernate_switch_stack_machdep();
1227 
1228 #ifndef NO_PROPOLICE
1229 	/* Start using suspended kernel's propolice guard */
1230 	*(long *)(guard_va + off) = disk_hib.guard;
1231 	hibernate_reprotect_ssp(guard_va);
1232 #endif /* ! NO_PROPOLICE */
1233 
1234 	/* Unpack and resume */
1235 	hibernate_unpack_image(&disk_hib);
1236 
1237 fail:
1238 	splx(s);
1239 	printf("\nUnable to resume hibernated image\n");
1240 }
1241 
1242 /*
1243  * Unpack image from pig area to original location by looping through the
1244  * list of output chunks in the order they should be restored (fchunks).
1245  *
1246  * Note that due to the stack smash protector and the fact that we have
1247  * switched stacks, it is not permitted to return from this function.
1248  */
1249 void
1250 hibernate_unpack_image(union hibernate_info *hib)
1251 {
1252 	uint8_t buf[DEV_BSIZE];
1253 	struct hibernate_disk_chunk *chunks;
1254 	union hibernate_info *local_hib = (union hibernate_info *)&buf;
1255 	paddr_t image_cur = global_pig_start;
1256 	short i, *fchunks;
1257 	char *pva;
1258 
1259 	/* Piglet will be identity mapped (VA == PA) */
1260 	pva = (char *)hib->piglet_pa;
1261 
1262 	fchunks = (short *)(pva + (4 * PAGE_SIZE));
1263 
1264 	chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE);
1265 
1266 	/* Can't use hiber_info that's passed in after this point */
1267 	memcpy(buf, hib, sizeof(buf));
1268 	local_hib->retguard_ofs = 0;
1269 
1270 	/* VA == PA */
1271 	local_hib->piglet_va = local_hib->piglet_pa;
1272 
1273 	/*
1274 	 * Point of no return. Once we pass this point, only kernel code can
1275 	 * be accessed. No global variables or other kernel data structures
1276 	 * are guaranteed to be coherent after unpack starts.
1277 	 *
1278 	 * The image is now in high memory (pig area), we unpack from the pig
1279 	 * to the correct location in memory. We'll eventually end up copying
1280 	 * on top of ourself, but we are assured the kernel code here is the
1281 	 * same between the hibernated and resuming kernel, and we are running
1282 	 * on our own stack, so the overwrite is ok.
1283 	 */
1284 	DPRINTF("hibernate: activating alt. pagetable and starting unpack\n");
1285 	hibernate_activate_resume_pt_machdep();
1286 
1287 	for (i = 0; i < local_hib->chunk_ctr; i++) {
1288 		/* Reset zlib for inflate */
1289 		if (hibernate_zlib_reset(local_hib, 0) != Z_OK)
1290 			panic("hibernate failed to reset zlib for inflate");
1291 
1292 		hibernate_process_chunk(local_hib, &chunks[fchunks[i]],
1293 		    image_cur);
1294 
1295 		image_cur += chunks[fchunks[i]].compressed_size;
1296 	}
1297 
1298 	/*
1299 	 * Resume the loaded kernel by jumping to the MD resume vector.
1300 	 * We won't be returning from this call. We pass the location of
1301 	 * the retguard save area so the MD code can replace it before
1302 	 * resuming. See the piglet layout at the top of this file for
1303 	 * more information on the layout of the piglet area.
1304 	 *
1305 	 * We use 'global_piglet_va' here since by the time we are at
1306 	 * this point, we have already unpacked the image, and we want
1307 	 * the suspended kernel's view of what the piglet was, before
1308 	 * suspend occurred (since we will need to use that in the retguard
1309 	 * copy code in hibernate_resume_machdep.)
1310 	 */
1311 	hibernate_resume_machdep(global_piglet_va + (110 * PAGE_SIZE));
1312 }
1313 
1314 /*
1315  * Bounce a compressed image chunk to the piglet, entering mappings for the
1316  * copied pages as needed
1317  */
1318 void
1319 hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size)
1320 {
1321 	size_t ct, ofs;
1322 	paddr_t src = img_cur;
1323 	vaddr_t dest = piglet;
1324 
1325 	/* Copy first partial page */
1326 	ct = (PAGE_SIZE) - (src & PAGE_MASK);
1327 	ofs = (src & PAGE_MASK);
1328 
1329 	if (ct < PAGE_SIZE) {
1330 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE,
1331 			(src - ofs), 0);
1332 		hibernate_flush();
1333 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE + ofs), (caddr_t)dest, ct);
1334 		src += ct;
1335 		dest += ct;
1336 	}
1337 
1338 	/* Copy remaining pages */
1339 	while (src < size + img_cur) {
1340 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, src, 0);
1341 		hibernate_flush();
1342 		ct = PAGE_SIZE;
1343 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE), (caddr_t)dest, ct);
1344 		hibernate_flush();
1345 		src += ct;
1346 		dest += ct;
1347 	}
1348 }
1349 
1350 /*
1351  * Process a chunk by bouncing it to the piglet, followed by unpacking
1352  */
1353 void
1354 hibernate_process_chunk(union hibernate_info *hib,
1355     struct hibernate_disk_chunk *chunk, paddr_t img_cur)
1356 {
1357 	char *pva = (char *)hib->piglet_va;
1358 
1359 	hibernate_copy_chunk_to_piglet(img_cur,
1360 	 (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), chunk->compressed_size);
1361 	hibernate_inflate_region(hib, chunk->base,
1362 	    (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)),
1363 	    chunk->compressed_size);
1364 }
1365 
1366 /*
1367  * Calculate RLE component for 'inaddr'. Clamps to max RLE pages between
1368  * inaddr and range_end.
1369  */
1370 int
1371 hibernate_calc_rle(paddr_t inaddr, paddr_t range_end)
1372 {
1373 	int rle;
1374 
1375 	rle = uvm_page_rle(inaddr);
1376 	KASSERT(rle >= 0 && rle <= MAX_RLE);
1377 
1378 	/* Clamp RLE to range end */
1379 	if (rle > 0 && inaddr + (rle * PAGE_SIZE) > range_end)
1380 		rle = (range_end - inaddr) / PAGE_SIZE;
1381 
1382 	return (rle);
1383 }
1384 
1385 /*
1386  * Write the RLE byte for page at 'inaddr' to the output stream.
1387  * Returns the number of pages to be skipped at 'inaddr'.
1388  */
1389 int
1390 hibernate_write_rle(union hibernate_info *hib, paddr_t inaddr,
1391 	paddr_t range_end, daddr_t *blkctr,
1392 	size_t *out_remaining)
1393 {
1394 	int rle, err, *rleloc;
1395 	struct hibernate_zlib_state *hibernate_state;
1396 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1397 
1398 	hibernate_state =
1399 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1400 
1401 	rle = hibernate_calc_rle(inaddr, range_end);
1402 
1403 	rleloc = (int *)hibernate_rle_page + MAX_RLE - 1;
1404 	*rleloc = rle;
1405 
1406 	/* Deflate the RLE byte into the stream */
1407 	hibernate_deflate(hib, (paddr_t)rleloc, out_remaining);
1408 
1409 	/* Did we fill the output page? If so, flush to disk */
1410 	if (*out_remaining == 0) {
1411 		if ((err = hib->io_func(hib->dev, *blkctr + hib->image_offset,
1412 			(vaddr_t)hibernate_io_page, PAGE_SIZE, HIB_W,
1413 			hib->io_page))) {
1414 				DPRINTF("hib write error %d\n", err);
1415 				return (err);
1416 		}
1417 
1418 		*blkctr += PAGE_SIZE / DEV_BSIZE;
1419 		*out_remaining = PAGE_SIZE;
1420 
1421 		/* If we didn't deflate the entire RLE byte, finish it now */
1422 		if (hibernate_state->hib_stream.avail_in != 0)
1423 			hibernate_deflate(hib,
1424 				(vaddr_t)hibernate_state->hib_stream.next_in,
1425 				out_remaining);
1426 	}
1427 
1428 	return (rle);
1429 }
1430 
1431 /*
1432  * Write a compressed version of this machine's memory to disk, at the
1433  * precalculated swap offset:
1434  *
1435  * end of swap - signature block size - chunk table size - memory size
1436  *
1437  * The function begins by looping through each phys mem range, cutting each
1438  * one into MD sized chunks. These chunks are then compressed individually
1439  * and written out to disk, in phys mem order. Some chunks might compress
1440  * more than others, and for this reason, each chunk's size is recorded
1441  * in the chunk table, which is written to disk after the image has
1442  * properly been compressed and written (in hibernate_write_chunktable).
1443  *
1444  * When this function is called, the machine is nearly suspended - most
1445  * devices are quiesced/suspended, interrupts are off, and cold has
1446  * been set. This means that there can be no side effects once the
1447  * write has started, and the write function itself can also have no
1448  * side effects. This also means no printfs are permitted (since printf
1449  * has side effects.)
1450  *
1451  * Return values :
1452  *
1453  * 0      - success
1454  * EIO    - I/O error occurred writing the chunks
1455  * EINVAL - Failed to write a complete range
1456  * ENOMEM - Memory allocation failure during preparation of the zlib arena
1457  */
1458 int
1459 hibernate_write_chunks(union hibernate_info *hib)
1460 {
1461 	paddr_t range_base, range_end, inaddr, temp_inaddr;
1462 	size_t out_remaining, used;
1463 	struct hibernate_disk_chunk *chunks;
1464 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1465 	daddr_t blkctr = 0;
1466 	int i, rle, err;
1467 	struct hibernate_zlib_state *hibernate_state;
1468 
1469 	hibernate_state =
1470 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1471 
1472 	hib->chunk_ctr = 0;
1473 
1474 	/*
1475 	 * Map the utility VAs to the piglet. See the piglet map at the
1476 	 * top of this file for piglet layout information.
1477 	 */
1478 	hibernate_copy_page = hib->piglet_va + 3 * PAGE_SIZE;
1479 	hibernate_rle_page = hib->piglet_va + 28 * PAGE_SIZE;
1480 
1481 	chunks = (struct hibernate_disk_chunk *)(hib->piglet_va +
1482 	    HIBERNATE_CHUNK_SIZE);
1483 
1484 	/* Calculate the chunk regions */
1485 	for (i = 0; i < hib->nranges; i++) {
1486 		range_base = hib->ranges[i].base;
1487 		range_end = hib->ranges[i].end;
1488 
1489 		inaddr = range_base;
1490 
1491 		while (inaddr < range_end) {
1492 			chunks[hib->chunk_ctr].base = inaddr;
1493 			if (inaddr + HIBERNATE_CHUNK_SIZE < range_end)
1494 				chunks[hib->chunk_ctr].end = inaddr +
1495 				    HIBERNATE_CHUNK_SIZE;
1496 			else
1497 				chunks[hib->chunk_ctr].end = range_end;
1498 
1499 			inaddr += HIBERNATE_CHUNK_SIZE;
1500 			hib->chunk_ctr ++;
1501 		}
1502 	}
1503 
1504 	uvm_pmr_dirty_everything();
1505 	uvm_pmr_zero_everything();
1506 
1507 	/* Compress and write the chunks in the chunktable */
1508 	for (i = 0; i < hib->chunk_ctr; i++) {
1509 		range_base = chunks[i].base;
1510 		range_end = chunks[i].end;
1511 
1512 		chunks[i].offset = blkctr + hib->image_offset;
1513 
1514 		/* Reset zlib for deflate */
1515 		if (hibernate_zlib_reset(hib, 1) != Z_OK) {
1516 			DPRINTF("hibernate_zlib_reset failed for deflate\n");
1517 			return (ENOMEM);
1518 		}
1519 
1520 		inaddr = range_base;
1521 
1522 		/*
1523 		 * For each range, loop through its phys mem region
1524 		 * and write out the chunks (the last chunk might be
1525 		 * smaller than the chunk size).
1526 		 */
1527 		while (inaddr < range_end) {
1528 			out_remaining = PAGE_SIZE;
1529 			while (out_remaining > 0 && inaddr < range_end) {
1530 				/*
1531 				 * Adjust for regions that are not evenly
1532 				 * divisible by PAGE_SIZE or overflowed
1533 				 * pages from the previous iteration.
1534 				 */
1535 				temp_inaddr = (inaddr & PAGE_MASK) +
1536 				    hibernate_copy_page;
1537 
1538 				/* Deflate from temp_inaddr to IO page */
1539 				if (inaddr != range_end) {
1540 					if (inaddr % PAGE_SIZE == 0) {
1541 						rle = hibernate_write_rle(hib,
1542 							inaddr,
1543 							range_end,
1544 							&blkctr,
1545 							&out_remaining);
1546 					}
1547 
1548 					if (rle == 0) {
1549 						pmap_kenter_pa(hibernate_temp_page,
1550 							inaddr & PMAP_PA_MASK,
1551 							PROT_READ);
1552 
1553 						bcopy((caddr_t)hibernate_temp_page,
1554 							(caddr_t)hibernate_copy_page,
1555 							PAGE_SIZE);
1556 						inaddr += hibernate_deflate(hib,
1557 							temp_inaddr,
1558 							&out_remaining);
1559 					} else {
1560 						inaddr += rle * PAGE_SIZE;
1561 						if (inaddr > range_end)
1562 							inaddr = range_end;
1563 					}
1564 
1565 				}
1566 
1567 				if (out_remaining == 0) {
1568 					/* Filled up the page */
1569 					if ((err = hib->io_func(hib->dev,
1570 					    blkctr + hib->image_offset,
1571 					    (vaddr_t)hibernate_io_page,
1572 					    PAGE_SIZE, HIB_W, hib->io_page))) {
1573 						DPRINTF("hib write error %d\n",
1574 						    err);
1575 						return (err);
1576 					}
1577 					blkctr += PAGE_SIZE / DEV_BSIZE;
1578 				}
1579 			}
1580 		}
1581 
1582 		if (inaddr != range_end) {
1583 			DPRINTF("deflate range ended prematurely\n");
1584 			return (EINVAL);
1585 		}
1586 
1587 		/*
1588 		 * End of range. Round up to next secsize bytes
1589 		 * after finishing compress
1590 		 */
1591 		if (out_remaining == 0)
1592 			out_remaining = PAGE_SIZE;
1593 
1594 		/* Finish compress */
1595 		hibernate_state->hib_stream.next_in = (unsigned char *)inaddr;
1596 		hibernate_state->hib_stream.avail_in = 0;
1597 		hibernate_state->hib_stream.next_out =
1598 		    (unsigned char *)hibernate_io_page +
1599 			(PAGE_SIZE - out_remaining);
1600 
1601 		/* We have an extra output page available for finalize */
1602 		hibernate_state->hib_stream.avail_out =
1603 			out_remaining + PAGE_SIZE;
1604 
1605 		if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH)) !=
1606 		    Z_STREAM_END) {
1607 			DPRINTF("deflate error in output stream: %d\n", err);
1608 			return (err);
1609 		}
1610 
1611 		out_remaining = hibernate_state->hib_stream.avail_out;
1612 
1613 		/* Round up to next sector if needed */
1614 		used = ROUNDUP(2 * PAGE_SIZE - out_remaining, hib->sec_size);
1615 
1616 		/* Write final block(s) for this chunk */
1617 		if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset,
1618 		    (vaddr_t)hibernate_io_page, used,
1619 		    HIB_W, hib->io_page))) {
1620 			DPRINTF("hib final write error %d\n", err);
1621 			return (err);
1622 		}
1623 
1624 		blkctr += used / DEV_BSIZE;
1625 
1626 		chunks[i].compressed_size = (blkctr + hib->image_offset -
1627 		    chunks[i].offset) * DEV_BSIZE;
1628 	}
1629 
1630 	hib->chunktable_offset = hib->image_offset + blkctr;
1631 	return (0);
1632 }
1633 
1634 /*
1635  * Reset the zlib stream state and allocate a new hiballoc area for either
1636  * inflate or deflate. This function is called once for each hibernate chunk.
1637  * Calling hiballoc_init multiple times is acceptable since the memory it is
1638  * provided is unmanaged memory (stolen). We use the memory provided to us
1639  * by the piglet allocated via the supplied hib.
1640  */
1641 int
1642 hibernate_zlib_reset(union hibernate_info *hib, int deflate)
1643 {
1644 	vaddr_t hibernate_zlib_start;
1645 	size_t hibernate_zlib_size;
1646 	char *pva = (char *)hib->piglet_va;
1647 	struct hibernate_zlib_state *hibernate_state;
1648 
1649 	hibernate_state =
1650 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1651 
1652 	if (!deflate)
1653 		pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1654 
1655 	/*
1656 	 * See piglet layout information at the start of this file for
1657 	 * information on the zlib page assignments.
1658 	 */
1659 	hibernate_zlib_start = (vaddr_t)(pva + (30 * PAGE_SIZE));
1660 	hibernate_zlib_size = 80 * PAGE_SIZE;
1661 
1662 	memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size);
1663 	memset(hibernate_state, 0, PAGE_SIZE);
1664 
1665 	/* Set up stream structure */
1666 	hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc;
1667 	hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free;
1668 
1669 	/* Initialize the hiballoc arena for zlib allocs/frees */
1670 	hiballoc_init(&hibernate_state->hiballoc_arena,
1671 	    (caddr_t)hibernate_zlib_start, hibernate_zlib_size);
1672 
1673 	if (deflate) {
1674 		return deflateInit(&hibernate_state->hib_stream,
1675 		    Z_BEST_SPEED);
1676 	} else
1677 		return inflateInit(&hibernate_state->hib_stream);
1678 }
1679 
1680 /*
1681  * Reads the hibernated memory image from disk, whose location and
1682  * size are recorded in hib. Begin by reading the persisted
1683  * chunk table, which records the original chunk placement location
1684  * and compressed size for each. Next, allocate a pig region of
1685  * sufficient size to hold the compressed image. Next, read the
1686  * chunks into the pig area (calling hibernate_read_chunks to do this),
1687  * and finally, if all of the above succeeds, clear the hibernate signature.
1688  * The function will then return to hibernate_resume, which will proceed
1689  * to unpack the pig image to the correct place in memory.
1690  */
1691 int
1692 hibernate_read_image(union hibernate_info *hib)
1693 {
1694 	size_t compressed_size, disk_size, chunktable_size, pig_sz;
1695 	paddr_t image_start, image_end, pig_start, pig_end;
1696 	struct hibernate_disk_chunk *chunks;
1697 	daddr_t blkctr;
1698 	vaddr_t chunktable = (vaddr_t)NULL;
1699 	paddr_t piglet_chunktable = hib->piglet_pa +
1700 	    HIBERNATE_CHUNK_SIZE;
1701 	int i, status;
1702 
1703 	status = 0;
1704 	pmap_activate(curproc);
1705 
1706 	/* Calculate total chunk table size in disk blocks */
1707 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / DEV_BSIZE;
1708 
1709 	blkctr = hib->chunktable_offset;
1710 
1711 	chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any,
1712 	    &kp_none, &kd_nowait);
1713 
1714 	if (!chunktable)
1715 		return (1);
1716 
1717 	/* Map chunktable pages */
1718 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE)
1719 		pmap_kenter_pa(chunktable + i, piglet_chunktable + i,
1720 		    PROT_READ | PROT_WRITE);
1721 	pmap_update(pmap_kernel());
1722 
1723 	/* Read the chunktable from disk into the piglet chunktable */
1724 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE;
1725 	    i += MAXPHYS, blkctr += MAXPHYS/DEV_BSIZE)
1726 		hibernate_block_io(hib, blkctr, MAXPHYS,
1727 		    chunktable + i, 0);
1728 
1729 	blkctr = hib->image_offset;
1730 	compressed_size = 0;
1731 
1732 	chunks = (struct hibernate_disk_chunk *)chunktable;
1733 
1734 	for (i = 0; i < hib->chunk_ctr; i++)
1735 		compressed_size += chunks[i].compressed_size;
1736 
1737 	disk_size = compressed_size;
1738 
1739 	printf("unhibernating @ block %lld length %luMB\n",
1740 	    hib->sig_offset - chunktable_size,
1741 	    compressed_size / (1024 * 1024));
1742 
1743 	/* Allocate the pig area */
1744 	pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE;
1745 	if (uvm_pmr_alloc_pig(&pig_start, pig_sz, hib->piglet_pa) == ENOMEM) {
1746 		status = 1;
1747 		goto unmap;
1748 	}
1749 
1750 	pig_end = pig_start + pig_sz;
1751 
1752 	/* Calculate image extents. Pig image must end on a chunk boundary. */
1753 	image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1);
1754 	image_start = image_end - disk_size;
1755 
1756 	hibernate_read_chunks(hib, image_start, image_end, disk_size,
1757 	    chunks);
1758 
1759 	/* Prepare the resume time pmap/page table */
1760 	hibernate_populate_resume_pt(hib, image_start, image_end);
1761 
1762 unmap:
1763 	/* Unmap chunktable pages */
1764 	pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE);
1765 	pmap_update(pmap_kernel());
1766 
1767 	return (status);
1768 }
1769 
1770 /*
1771  * Read the hibernated memory chunks from disk (chunk information at this
1772  * point is stored in the piglet) into the pig area specified by
1773  * [pig_start .. pig_end]. Order the chunks so that the final chunk is the
1774  * only chunk with overlap possibilities.
1775  */
1776 int
1777 hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start,
1778     paddr_t pig_end, size_t image_compr_size,
1779     struct hibernate_disk_chunk *chunks)
1780 {
1781 	paddr_t img_cur, piglet_base;
1782 	daddr_t blkctr;
1783 	size_t processed, compressed_size, read_size;
1784 	int nchunks, nfchunks, num_io_pages;
1785 	vaddr_t tempva, hibernate_fchunk_area;
1786 	short *fchunks, i, j;
1787 
1788 	tempva = (vaddr_t)NULL;
1789 	hibernate_fchunk_area = (vaddr_t)NULL;
1790 	nfchunks = 0;
1791 	piglet_base = hib->piglet_pa;
1792 	global_pig_start = pig_start;
1793 
1794 	/*
1795 	 * These mappings go into the resuming kernel's page table, and are
1796 	 * used only during image read. They disappear from existence
1797 	 * when the suspended kernel is unpacked on top of us.
1798 	 */
1799 	tempva = (vaddr_t)km_alloc(MAXPHYS + PAGE_SIZE, &kv_any, &kp_none,
1800 		&kd_nowait);
1801 	if (!tempva)
1802 		return (1);
1803 	hibernate_fchunk_area = (vaddr_t)km_alloc(24 * PAGE_SIZE, &kv_any,
1804 	    &kp_none, &kd_nowait);
1805 	if (!hibernate_fchunk_area)
1806 		return (1);
1807 
1808 	/* Final output chunk ordering VA */
1809 	fchunks = (short *)hibernate_fchunk_area;
1810 
1811 	/* Map the chunk ordering region */
1812 	for(i = 0; i < 24 ; i++)
1813 		pmap_kenter_pa(hibernate_fchunk_area + (i * PAGE_SIZE),
1814 			piglet_base + ((4 + i) * PAGE_SIZE),
1815 			PROT_READ | PROT_WRITE);
1816 	pmap_update(pmap_kernel());
1817 
1818 	nchunks = hib->chunk_ctr;
1819 
1820 	/* Initially start all chunks as unplaced */
1821 	for (i = 0; i < nchunks; i++)
1822 		chunks[i].flags = 0;
1823 
1824 	/*
1825 	 * Search the list for chunks that are outside the pig area. These
1826 	 * can be placed first in the final output list.
1827 	 */
1828 	for (i = 0; i < nchunks; i++) {
1829 		if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) {
1830 			fchunks[nfchunks] = i;
1831 			nfchunks++;
1832 			chunks[i].flags |= HIBERNATE_CHUNK_PLACED;
1833 		}
1834 	}
1835 
1836 	/*
1837 	 * Walk the ordering, place the chunks in ascending memory order.
1838 	 */
1839 	for (i = 0; i < nchunks; i++) {
1840 		if (chunks[i].flags != HIBERNATE_CHUNK_PLACED) {
1841 			fchunks[nfchunks] = i;
1842 			nfchunks++;
1843 			chunks[i].flags = HIBERNATE_CHUNK_PLACED;
1844 		}
1845 	}
1846 
1847 	img_cur = pig_start;
1848 
1849 	for (i = 0; i < nfchunks; i++) {
1850 		blkctr = chunks[fchunks[i]].offset;
1851 		processed = 0;
1852 		compressed_size = chunks[fchunks[i]].compressed_size;
1853 
1854 		while (processed < compressed_size) {
1855 			if (compressed_size - processed >= MAXPHYS)
1856 				read_size = MAXPHYS;
1857 			else
1858 				read_size = compressed_size - processed;
1859 
1860 			/*
1861 			 * We're reading read_size bytes, offset from the
1862 			 * start of a page by img_cur % PAGE_SIZE, so the
1863 			 * end will be read_size + (img_cur % PAGE_SIZE)
1864 			 * from the start of the first page.  Round that
1865 			 * up to the next page size.
1866 			 */
1867 			num_io_pages = (read_size + (img_cur % PAGE_SIZE)
1868 				+ PAGE_SIZE - 1) / PAGE_SIZE;
1869 
1870 			KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1);
1871 
1872 			/* Map pages for this read */
1873 			for (j = 0; j < num_io_pages; j ++)
1874 				pmap_kenter_pa(tempva + j * PAGE_SIZE,
1875 				    img_cur + j * PAGE_SIZE,
1876 				    PROT_READ | PROT_WRITE);
1877 
1878 			pmap_update(pmap_kernel());
1879 
1880 			hibernate_block_io(hib, blkctr, read_size,
1881 			    tempva + (img_cur & PAGE_MASK), 0);
1882 
1883 			blkctr += (read_size / DEV_BSIZE);
1884 
1885 			pmap_kremove(tempva, num_io_pages * PAGE_SIZE);
1886 			pmap_update(pmap_kernel());
1887 
1888 			processed += read_size;
1889 			img_cur += read_size;
1890 		}
1891 	}
1892 
1893 	pmap_kremove(hibernate_fchunk_area, 24 * PAGE_SIZE);
1894 	pmap_update(pmap_kernel());
1895 
1896 	return (0);
1897 }
1898 
1899 /*
1900  * Hibernating a machine comprises the following operations:
1901  *  1. Calculating this machine's hibernate_info information
1902  *  2. Allocating a piglet and saving the piglet's physaddr
1903  *  3. Calculating the memory chunks
1904  *  4. Writing the compressed chunks to disk
1905  *  5. Writing the chunk table
1906  *  6. Writing the signature block (hibernate_info)
1907  *
1908  * On most architectures, the function calling hibernate_suspend would
1909  * then power off the machine using some MD-specific implementation.
1910  */
1911 int
1912 hibernate_suspend(void)
1913 {
1914 	uint8_t buf[DEV_BSIZE];
1915 	union hibernate_info *hib = (union hibernate_info *)&buf;
1916 	u_long start, end;
1917 
1918 	/*
1919 	 * Calculate memory ranges, swap offsets, etc.
1920 	 * This also allocates a piglet whose physaddr is stored in
1921 	 * hib->piglet_pa and vaddr stored in hib->piglet_va
1922 	 */
1923 	if (get_hibernate_info(hib, 1)) {
1924 		DPRINTF("failed to obtain hibernate info\n");
1925 		return (1);
1926 	}
1927 
1928 	/* Find a page-addressed region in swap [start,end] */
1929 	if (uvm_hibswap(hib->dev, &start, &end)) {
1930 		printf("hibernate: cannot find any swap\n");
1931 		return (1);
1932 	}
1933 
1934 	if (end - start < 1000) {
1935 		printf("hibernate: insufficient swap (%lu is too small)\n",
1936 			end - start + 1);
1937 		return (1);
1938 	}
1939 
1940 	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_start,
1941 	    &retguard_start_phys);
1942 	pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_end,
1943 	    &retguard_end_phys);
1944 
1945 	/* Calculate block offsets in swap */
1946 	hib->image_offset = ctod(start);
1947 
1948 	DPRINTF("hibernate @ block %lld max-length %lu blocks\n",
1949 	    hib->image_offset, ctod(end) - ctod(start) + 1);
1950 
1951 	pmap_activate(curproc);
1952 	DPRINTF("hibernate: writing chunks\n");
1953 	if (hibernate_write_chunks(hib)) {
1954 		DPRINTF("hibernate_write_chunks failed\n");
1955 		return (1);
1956 	}
1957 
1958 	DPRINTF("hibernate: writing chunktable\n");
1959 	if (hibernate_write_chunktable(hib)) {
1960 		DPRINTF("hibernate_write_chunktable failed\n");
1961 		return (1);
1962 	}
1963 
1964 	DPRINTF("hibernate: writing signature\n");
1965 	if (hibernate_write_signature(hib)) {
1966 		DPRINTF("hibernate_write_signature failed\n");
1967 		return (1);
1968 	}
1969 
1970 	/* Allow the disk to settle */
1971 	delay(500000);
1972 
1973 	/*
1974 	 * Give the device-specific I/O function a notification that we're
1975 	 * done, and that it can clean up or shutdown as needed.
1976 	 */
1977 	hib->io_func(hib->dev, 0, (vaddr_t)NULL, 0, HIB_DONE, hib->io_page);
1978 	return (0);
1979 }
1980 
1981 int
1982 hibernate_alloc(void)
1983 {
1984 	KASSERT(global_piglet_va == 0);
1985 	KASSERT(hibernate_temp_page == 0);
1986 
1987 	pmap_activate(curproc);
1988 	pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1989 	    PROT_READ | PROT_WRITE);
1990 
1991 	/* Allocate a piglet, store its addresses in the supplied globals */
1992 	if (uvm_pmr_alloc_piglet(&global_piglet_va, &global_piglet_pa,
1993 	    HIBERNATE_CHUNK_SIZE * 4, HIBERNATE_CHUNK_SIZE))
1994 		goto unmap;
1995 
1996 	/*
1997 	 * Allocate VA for the temp page.
1998 	 *
1999 	 * This will become part of the suspended kernel and will
2000 	 * be freed in hibernate_free, upon resume (or hibernate
2001 	 * failure)
2002 	 */
2003 	hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
2004 	    &kp_none, &kd_nowait);
2005 	if (!hibernate_temp_page) {
2006 		uvm_pmr_free_piglet(global_piglet_va, 4 * HIBERNATE_CHUNK_SIZE);
2007 		global_piglet_va = 0;
2008 		goto unmap;
2009 	}
2010 	return (0);
2011 unmap:
2012 	pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE);
2013 	pmap_update(pmap_kernel());
2014 	return (ENOMEM);
2015 }
2016 
2017 /*
2018  * Free items allocated by hibernate_alloc()
2019  */
2020 void
2021 hibernate_free(void)
2022 {
2023 	pmap_activate(curproc);
2024 
2025 	if (global_piglet_va)
2026 		uvm_pmr_free_piglet(global_piglet_va,
2027 		    4 * HIBERNATE_CHUNK_SIZE);
2028 
2029 	if (hibernate_temp_page) {
2030 		pmap_kremove(hibernate_temp_page, PAGE_SIZE);
2031 		km_free((void *)hibernate_temp_page, PAGE_SIZE,
2032 		    &kv_any, &kp_none);
2033 	}
2034 
2035 	global_piglet_va = 0;
2036 	hibernate_temp_page = 0;
2037 	pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE);
2038 	pmap_update(pmap_kernel());
2039 }
2040