xref: /openbsd-src/sys/kern/subr_hibernate.c (revision ae3cb403620ab940fbaabb3055fac045a63d56b7)
1 /*	$OpenBSD: subr_hibernate.c,v 1.123 2017/08/17 06:50:41 mlarkin Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/hibernate.h>
21 #include <sys/malloc.h>
22 #include <sys/param.h>
23 #include <sys/tree.h>
24 #include <sys/systm.h>
25 #include <sys/disklabel.h>
26 #include <sys/disk.h>
27 #include <sys/conf.h>
28 #include <sys/buf.h>
29 #include <sys/fcntl.h>
30 #include <sys/stat.h>
31 #include <sys/atomic.h>
32 
33 #include <uvm/uvm.h>
34 #include <uvm/uvm_swap.h>
35 
36 #include <machine/hibernate.h>
37 
38 /*
39  * Hibernate piglet layout information
40  *
41  * The piglet is a scratch area of memory allocated by the suspending kernel.
42  * Its phys and virt addrs are recorded in the signature block. The piglet is
43  * used to guarantee an unused area of memory that can be used by the resuming
44  * kernel for various things. The piglet is excluded during unpack operations.
45  * The piglet size is presently 4*HIBERNATE_CHUNK_SIZE (typically 4*4MB).
46  *
47  * Offset from piglet_base	Purpose
48  * ----------------------------------------------------------------------------
49  * 0				Private page for suspend I/O write functions
50  * 1*PAGE_SIZE			I/O page used during hibernate suspend
51  * 2*PAGE_SIZE			I/O page used during hibernate suspend
52  * 3*PAGE_SIZE			copy page used during hibernate suspend
53  * 4*PAGE_SIZE			final chunk ordering list (24 pages)
54  * 28*PAGE_SIZE			RLE utility page
55  * 29*PAGE_SIZE			start of hiballoc area
56  * 30*PAGE_SIZE			preserved entropy
57  * 110*PAGE_SIZE		end of hiballoc area (80 pages)
58  * ...				unused
59  * HIBERNATE_CHUNK_SIZE		start of hibernate chunk table
60  * 2*HIBERNATE_CHUNK_SIZE	bounce area for chunks being unpacked
61  * 4*HIBERNATE_CHUNK_SIZE	end of piglet
62  */
63 
64 /* Temporary vaddr ranges used during hibernate */
65 vaddr_t hibernate_temp_page;
66 vaddr_t hibernate_copy_page;
67 vaddr_t hibernate_rle_page;
68 
69 /* Hibernate info as read from disk during resume */
70 union hibernate_info disk_hib;
71 
72 /*
73  * Global copy of the pig start address. This needs to be a global as we
74  * switch stacks after computing it - it can't be stored on the stack.
75  */
76 paddr_t global_pig_start;
77 
78 /*
79  * Global copies of the piglet start addresses (PA/VA). We store these
80  * as globals to avoid having to carry them around as parameters, as the
81  * piglet is allocated early and freed late - its lifecycle extends beyond
82  * that of the hibernate info union which is calculated on suspend/resume.
83  */
84 vaddr_t global_piglet_va;
85 paddr_t global_piglet_pa;
86 
87 /* #define HIB_DEBUG */
88 #ifdef HIB_DEBUG
89 int	hib_debug = 99;
90 #define DPRINTF(x...)     do { if (hib_debug) printf(x); } while (0)
91 #define DNPRINTF(n,x...)  do { if (hib_debug > (n)) printf(x); } while (0)
92 #else
93 #define DPRINTF(x...)
94 #define DNPRINTF(n,x...)
95 #endif
96 
97 #ifndef NO_PROPOLICE
98 extern long __guard_local;
99 #endif /* ! NO_PROPOLICE */
100 
101 void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t);
102 int hibernate_calc_rle(paddr_t, paddr_t);
103 int hibernate_write_rle(union hibernate_info *, paddr_t, paddr_t, daddr_t *,
104 	size_t *);
105 
106 #define MAX_RLE (HIBERNATE_CHUNK_SIZE / PAGE_SIZE)
107 
108 /*
109  * Hib alloc enforced alignment.
110  */
111 #define HIB_ALIGN		8 /* bytes alignment */
112 
113 /*
114  * sizeof builtin operation, but with alignment constraint.
115  */
116 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
117 
118 struct hiballoc_entry {
119 	size_t			hibe_use;
120 	size_t			hibe_space;
121 	RBT_ENTRY(hiballoc_entry) hibe_entry;
122 };
123 
124 /*
125  * Sort hibernate memory ranges by ascending PA
126  */
127 void
128 hibernate_sort_ranges(union hibernate_info *hib_info)
129 {
130 	int i, j;
131 	struct hibernate_memory_range *ranges;
132 	paddr_t base, end;
133 
134 	ranges = hib_info->ranges;
135 
136 	for (i = 1; i < hib_info->nranges; i++) {
137 		j = i;
138 		while (j > 0 && ranges[j - 1].base > ranges[j].base) {
139 			base = ranges[j].base;
140 			end = ranges[j].end;
141 			ranges[j].base = ranges[j - 1].base;
142 			ranges[j].end = ranges[j - 1].end;
143 			ranges[j - 1].base = base;
144 			ranges[j - 1].end = end;
145 			j--;
146 		}
147 	}
148 }
149 
150 /*
151  * Compare hiballoc entries based on the address they manage.
152  *
153  * Since the address is fixed, relative to struct hiballoc_entry,
154  * we just compare the hiballoc_entry pointers.
155  */
156 static __inline int
157 hibe_cmp(const struct hiballoc_entry *l, const struct hiballoc_entry *r)
158 {
159 	vaddr_t vl = (vaddr_t)l;
160 	vaddr_t vr = (vaddr_t)r;
161 
162 	return vl < vr ? -1 : (vl > vr);
163 }
164 
165 RBT_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
166 
167 /*
168  * Given a hiballoc entry, return the address it manages.
169  */
170 static __inline void *
171 hib_entry_to_addr(struct hiballoc_entry *entry)
172 {
173 	caddr_t addr;
174 
175 	addr = (caddr_t)entry;
176 	addr += HIB_SIZEOF(struct hiballoc_entry);
177 	return addr;
178 }
179 
180 /*
181  * Given an address, find the hiballoc that corresponds.
182  */
183 static __inline struct hiballoc_entry*
184 hib_addr_to_entry(void *addr_param)
185 {
186 	caddr_t addr;
187 
188 	addr = (caddr_t)addr_param;
189 	addr -= HIB_SIZEOF(struct hiballoc_entry);
190 	return (struct hiballoc_entry*)addr;
191 }
192 
193 RBT_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp);
194 
195 /*
196  * Allocate memory from the arena.
197  *
198  * Returns NULL if no memory is available.
199  */
200 void *
201 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
202 {
203 	struct hiballoc_entry *entry, *new_entry;
204 	size_t find_sz;
205 
206 	/*
207 	 * Enforce alignment of HIB_ALIGN bytes.
208 	 *
209 	 * Note that, because the entry is put in front of the allocation,
210 	 * 0-byte allocations are guaranteed a unique address.
211 	 */
212 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
213 
214 	/*
215 	 * Find an entry with hibe_space >= find_sz.
216 	 *
217 	 * If the root node is not large enough, we switch to tree traversal.
218 	 * Because all entries are made at the bottom of the free space,
219 	 * traversal from the end has a slightly better chance of yielding
220 	 * a sufficiently large space.
221 	 */
222 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
223 	entry = RBT_ROOT(hiballoc_addr, &arena->hib_addrs);
224 	if (entry != NULL && entry->hibe_space < find_sz) {
225 		RBT_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
226 			if (entry->hibe_space >= find_sz)
227 				break;
228 		}
229 	}
230 
231 	/*
232 	 * Insufficient or too fragmented memory.
233 	 */
234 	if (entry == NULL)
235 		return NULL;
236 
237 	/*
238 	 * Create new entry in allocated space.
239 	 */
240 	new_entry = (struct hiballoc_entry*)(
241 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
242 	new_entry->hibe_space = entry->hibe_space - find_sz;
243 	new_entry->hibe_use = alloc_sz;
244 
245 	/*
246 	 * Insert entry.
247 	 */
248 	if (RBT_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
249 		panic("hib_alloc: insert failure");
250 	entry->hibe_space = 0;
251 
252 	/* Return address managed by entry. */
253 	return hib_entry_to_addr(new_entry);
254 }
255 
256 void
257 hib_getentropy(char **bufp, size_t *bufplen)
258 {
259 	if (!bufp || !bufplen)
260 		return;
261 
262 	*bufp = (char *)(global_piglet_va + (29 * PAGE_SIZE));
263 	*bufplen = PAGE_SIZE;
264 }
265 
266 /*
267  * Free a pointer previously allocated from this arena.
268  *
269  * If addr is NULL, this will be silently accepted.
270  */
271 void
272 hib_free(struct hiballoc_arena *arena, void *addr)
273 {
274 	struct hiballoc_entry *entry, *prev;
275 
276 	if (addr == NULL)
277 		return;
278 
279 	/*
280 	 * Derive entry from addr and check it is really in this arena.
281 	 */
282 	entry = hib_addr_to_entry(addr);
283 	if (RBT_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
284 		panic("hib_free: freed item %p not in hib arena", addr);
285 
286 	/*
287 	 * Give the space in entry to its predecessor.
288 	 *
289 	 * If entry has no predecessor, change its used space into free space
290 	 * instead.
291 	 */
292 	prev = RBT_PREV(hiballoc_addr, entry);
293 	if (prev != NULL &&
294 	    (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
295 	    prev->hibe_use + prev->hibe_space) == entry) {
296 		/* Merge entry. */
297 		RBT_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
298 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
299 		    entry->hibe_use + entry->hibe_space;
300 	} else {
301 		/* Flip used memory to free space. */
302 		entry->hibe_space += entry->hibe_use;
303 		entry->hibe_use = 0;
304 	}
305 }
306 
307 /*
308  * Initialize hiballoc.
309  *
310  * The allocator will manage memmory at ptr, which is len bytes.
311  */
312 int
313 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
314 {
315 	struct hiballoc_entry *entry;
316 	caddr_t ptr;
317 	size_t len;
318 
319 	RBT_INIT(hiballoc_addr, &arena->hib_addrs);
320 
321 	/*
322 	 * Hib allocator enforces HIB_ALIGN alignment.
323 	 * Fixup ptr and len.
324 	 */
325 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
326 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
327 	len &= ~((size_t)HIB_ALIGN - 1);
328 
329 	/*
330 	 * Insufficient memory to be able to allocate and also do bookkeeping.
331 	 */
332 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
333 		return ENOMEM;
334 
335 	/*
336 	 * Create entry describing space.
337 	 */
338 	entry = (struct hiballoc_entry*)ptr;
339 	entry->hibe_use = 0;
340 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
341 	RBT_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
342 
343 	return 0;
344 }
345 
346 /*
347  * Zero all free memory.
348  */
349 void
350 uvm_pmr_zero_everything(void)
351 {
352 	struct uvm_pmemrange	*pmr;
353 	struct vm_page		*pg;
354 	int			 i;
355 
356 	uvm_lock_fpageq();
357 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
358 		/* Zero single pages. */
359 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
360 		    != NULL) {
361 			uvm_pmr_remove(pmr, pg);
362 			uvm_pagezero(pg);
363 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
364 			uvmexp.zeropages++;
365 			uvm_pmr_insert(pmr, pg, 0);
366 		}
367 
368 		/* Zero multi page ranges. */
369 		while ((pg = RBT_ROOT(uvm_pmr_size,
370 		    &pmr->size[UVM_PMR_MEMTYPE_DIRTY])) != NULL) {
371 			pg--; /* Size tree always has second page. */
372 			uvm_pmr_remove(pmr, pg);
373 			for (i = 0; i < pg->fpgsz; i++) {
374 				uvm_pagezero(&pg[i]);
375 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
376 				uvmexp.zeropages++;
377 			}
378 			uvm_pmr_insert(pmr, pg, 0);
379 		}
380 	}
381 	uvm_unlock_fpageq();
382 }
383 
384 /*
385  * Mark all memory as dirty.
386  *
387  * Used to inform the system that the clean memory isn't clean for some
388  * reason, for example because we just came back from hibernate.
389  */
390 void
391 uvm_pmr_dirty_everything(void)
392 {
393 	struct uvm_pmemrange	*pmr;
394 	struct vm_page		*pg;
395 	int			 i;
396 
397 	uvm_lock_fpageq();
398 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
399 		/* Dirty single pages. */
400 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
401 		    != NULL) {
402 			uvm_pmr_remove(pmr, pg);
403 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
404 			uvm_pmr_insert(pmr, pg, 0);
405 		}
406 
407 		/* Dirty multi page ranges. */
408 		while ((pg = RBT_ROOT(uvm_pmr_size,
409 		    &pmr->size[UVM_PMR_MEMTYPE_ZERO])) != NULL) {
410 			pg--; /* Size tree always has second page. */
411 			uvm_pmr_remove(pmr, pg);
412 			for (i = 0; i < pg->fpgsz; i++)
413 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
414 			uvm_pmr_insert(pmr, pg, 0);
415 		}
416 	}
417 
418 	uvmexp.zeropages = 0;
419 	uvm_unlock_fpageq();
420 }
421 
422 /*
423  * Allocate an area that can hold sz bytes and doesn't overlap with
424  * the piglet at piglet_pa.
425  */
426 int
427 uvm_pmr_alloc_pig(paddr_t *pa, psize_t sz, paddr_t piglet_pa)
428 {
429 	struct uvm_constraint_range pig_constraint;
430 	struct kmem_pa_mode kp_pig = {
431 		.kp_constraint = &pig_constraint,
432 		.kp_maxseg = 1
433 	};
434 	vaddr_t va;
435 
436 	sz = round_page(sz);
437 
438 	pig_constraint.ucr_low = piglet_pa + 4 * HIBERNATE_CHUNK_SIZE;
439 	pig_constraint.ucr_high = -1;
440 
441 	va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait);
442 	if (va == 0) {
443 		pig_constraint.ucr_low = 0;
444 		pig_constraint.ucr_high = piglet_pa - 1;
445 
446 		va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait);
447 		if (va == 0)
448 			return ENOMEM;
449 	}
450 
451 	pmap_extract(pmap_kernel(), va, pa);
452 	return 0;
453 }
454 
455 /*
456  * Allocate a piglet area.
457  *
458  * This needs to be in DMA-safe memory.
459  * Piglets are aligned.
460  *
461  * sz and align in bytes.
462  *
463  * The call will sleep for the pagedaemon to attempt to free memory.
464  * The pagedaemon may decide its not possible to free enough memory, causing
465  * the allocation to fail.
466  */
467 int
468 uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align)
469 {
470 	struct kmem_pa_mode kp_piglet = {
471 		.kp_constraint = &dma_constraint,
472 		.kp_align = align,
473 		.kp_maxseg = 1
474 	};
475 
476 	/* Ensure align is a power of 2 */
477 	KASSERT((align & (align - 1)) == 0);
478 
479 	/*
480 	 * Fixup arguments: align must be at least PAGE_SIZE,
481 	 * sz will be converted to pagecount, since that is what
482 	 * pmemrange uses internally.
483 	 */
484 	if (align < PAGE_SIZE)
485 		kp_piglet.kp_align = PAGE_SIZE;
486 
487 	sz = round_page(sz);
488 
489 	*va = (vaddr_t)km_alloc(sz, &kv_any, &kp_piglet, &kd_nowait);
490 	if (*va == 0)
491 		return ENOMEM;
492 
493 	pmap_extract(pmap_kernel(), *va, pa);
494 	return 0;
495 }
496 
497 /*
498  * Free a piglet area.
499  */
500 void
501 uvm_pmr_free_piglet(vaddr_t va, vsize_t sz)
502 {
503 	/*
504 	 * Fix parameters.
505 	 */
506 	sz = round_page(sz);
507 
508 	/*
509 	 * Free the physical and virtual memory.
510 	 */
511 	km_free((void *)va, sz, &kv_any, &kp_dma_contig);
512 }
513 
514 /*
515  * Physmem RLE compression support.
516  *
517  * Given a physical page address, return the number of pages starting at the
518  * address that are free.  Clamps to the number of pages in
519  * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free.
520  */
521 int
522 uvm_page_rle(paddr_t addr)
523 {
524 	struct vm_page		*pg, *pg_end;
525 	struct vm_physseg	*vmp;
526 	int			 pseg_idx, off_idx;
527 
528 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
529 	if (pseg_idx == -1)
530 		return 0;
531 
532 	vmp = &vm_physmem[pseg_idx];
533 	pg = &vmp->pgs[off_idx];
534 	if (!(pg->pg_flags & PQ_FREE))
535 		return 0;
536 
537 	/*
538 	 * Search for the first non-free page after pg.
539 	 * Note that the page may not be the first page in a free pmemrange,
540 	 * therefore pg->fpgsz cannot be used.
541 	 */
542 	for (pg_end = pg; pg_end <= vmp->lastpg &&
543 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++)
544 		;
545 	return min((pg_end - pg), HIBERNATE_CHUNK_SIZE/PAGE_SIZE);
546 }
547 
548 /*
549  * Calculate a hopefully unique version # for this kernel, based upon
550  * how it was linked.
551  */
552 u_int32_t
553 hibsum(void)
554 {
555 	return ((long)malloc ^ (long)km_alloc ^ (long)printf ^ (long)strlen);
556 }
557 
558 
559 /*
560  * Fills out the hibernate_info union pointed to by hib
561  * with information about this machine (swap signature block
562  * offsets, number of memory ranges, kernel in use, etc)
563  */
564 int
565 get_hibernate_info(union hibernate_info *hib, int suspend)
566 {
567 	struct disklabel dl;
568 	char err_string[128], *dl_ret;
569 
570 #ifndef NO_PROPOLICE
571 	/* Save propolice guard */
572 	hib->guard = __guard_local;
573 #endif /* ! NO_PROPOLICE */
574 
575 	/* Determine I/O function to use */
576 	hib->io_func = get_hibernate_io_function(swdevt[0].sw_dev);
577 	if (hib->io_func == NULL)
578 		return (1);
579 
580 	/* Calculate hibernate device */
581 	hib->dev = swdevt[0].sw_dev;
582 
583 	/* Read disklabel (used to calculate signature and image offsets) */
584 	dl_ret = disk_readlabel(&dl, hib->dev, err_string, sizeof(err_string));
585 
586 	if (dl_ret) {
587 		printf("Hibernate error reading disklabel: %s\n", dl_ret);
588 		return (1);
589 	}
590 
591 	/* Make sure we have a swap partition. */
592 	if (dl.d_partitions[1].p_fstype != FS_SWAP ||
593 	    DL_GETPSIZE(&dl.d_partitions[1]) == 0)
594 		return (1);
595 
596 	/* Make sure the signature can fit in one block */
597 	if (sizeof(union hibernate_info) > DEV_BSIZE)
598 		return (1);
599 
600 	/* Magic number */
601 	hib->magic = HIBERNATE_MAGIC;
602 
603 	/* Calculate signature block location */
604 	hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[1]) -
605 	    sizeof(union hibernate_info)/DEV_BSIZE;
606 
607 	/* Stash kernel version information */
608 	memset(&hib->kernel_version, 0, 128);
609 	bcopy(version, &hib->kernel_version,
610 	    min(strlen(version), sizeof(hib->kernel_version)-1));
611 	hib->kernel_sum = hibsum();
612 
613 	if (suspend) {
614 		/* Grab the previously-allocated piglet addresses */
615 		hib->piglet_va = global_piglet_va;
616 		hib->piglet_pa = global_piglet_pa;
617 		hib->io_page = (void *)hib->piglet_va;
618 
619 		/*
620 		 * Initialization of the hibernate IO function for drivers
621 		 * that need to do prep work (such as allocating memory or
622 		 * setting up data structures that cannot safely be done
623 		 * during suspend without causing side effects). There is
624 		 * a matching HIB_DONE call performed after the write is
625 		 * completed.
626 		 */
627 		if (hib->io_func(hib->dev, DL_GETPOFFSET(&dl.d_partitions[1]),
628 		    (vaddr_t)NULL, DL_GETPSIZE(&dl.d_partitions[1]),
629 		    HIB_INIT, hib->io_page))
630 			goto fail;
631 
632 	} else {
633 		/*
634 		 * Resuming kernels use a regular private page for the driver
635 		 * No need to free this I/O page as it will vanish as part of
636 		 * the resume.
637 		 */
638 		hib->io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
639 		if (!hib->io_page)
640 			goto fail;
641 	}
642 
643 	if (get_hibernate_info_md(hib))
644 		goto fail;
645 
646 	return (0);
647 
648 fail:
649 	return (1);
650 }
651 
652 /*
653  * Allocate nitems*size bytes from the hiballoc area presently in use
654  */
655 void *
656 hibernate_zlib_alloc(void *unused, int nitems, int size)
657 {
658 	struct hibernate_zlib_state *hibernate_state;
659 
660 	hibernate_state =
661 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
662 
663 	return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size);
664 }
665 
666 /*
667  * Free the memory pointed to by addr in the hiballoc area presently in
668  * use
669  */
670 void
671 hibernate_zlib_free(void *unused, void *addr)
672 {
673 	struct hibernate_zlib_state *hibernate_state;
674 
675 	hibernate_state =
676 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
677 
678 	hib_free(&hibernate_state->hiballoc_arena, addr);
679 }
680 
681 /*
682  * Inflate next page of data from the image stream.
683  * The rle parameter is modified on exit to contain the number of pages to
684  * skip in the output stream (or 0 if this page was inflated into).
685  *
686  * Returns 0 if the stream contains additional data, or 1 if the stream is
687  * finished.
688  */
689 int
690 hibernate_inflate_page(int *rle)
691 {
692 	struct hibernate_zlib_state *hibernate_state;
693 	int i;
694 
695 	hibernate_state =
696 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
697 
698 	/* Set up the stream for RLE code inflate */
699 	hibernate_state->hib_stream.next_out = (unsigned char *)rle;
700 	hibernate_state->hib_stream.avail_out = sizeof(*rle);
701 
702 	/* Inflate RLE code */
703 	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
704 	if (i != Z_OK && i != Z_STREAM_END) {
705 		/*
706 		 * XXX - this will likely reboot/hang most machines
707 		 *       since the console output buffer will be unmapped,
708 		 *       but there's not much else we can do here.
709 		 */
710 		panic("rle inflate stream error");
711 	}
712 
713 	if (hibernate_state->hib_stream.avail_out != 0) {
714 		/*
715 		 * XXX - this will likely reboot/hang most machines
716 		 *       since the console output buffer will be unmapped,
717 		 *       but there's not much else we can do here.
718 		 */
719 		panic("rle short inflate error");
720 	}
721 
722 	if (*rle < 0 || *rle > 1024) {
723 		/*
724 		 * XXX - this will likely reboot/hang most machines
725 		 *       since the console output buffer will be unmapped,
726 		 *       but there's not much else we can do here.
727 		 */
728 		panic("invalid rle count");
729 	}
730 
731 	if (i == Z_STREAM_END)
732 		return (1);
733 
734 	if (*rle != 0)
735 		return (0);
736 
737 	/* Set up the stream for page inflate */
738 	hibernate_state->hib_stream.next_out =
739 		(unsigned char *)HIBERNATE_INFLATE_PAGE;
740 	hibernate_state->hib_stream.avail_out = PAGE_SIZE;
741 
742 	/* Process next block of data */
743 	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
744 	if (i != Z_OK && i != Z_STREAM_END) {
745 		/*
746 		 * XXX - this will likely reboot/hang most machines
747 		 *       since the console output buffer will be unmapped,
748 		 *       but there's not much else we can do here.
749 		 */
750 		panic("inflate error");
751 	}
752 
753 	/* We should always have extracted a full page ... */
754 	if (hibernate_state->hib_stream.avail_out != 0) {
755 		/*
756 		 * XXX - this will likely reboot/hang most machines
757 		 *       since the console output buffer will be unmapped,
758 		 *       but there's not much else we can do here.
759 		 */
760 		panic("incomplete page");
761 	}
762 
763 	return (i == Z_STREAM_END);
764 }
765 
766 /*
767  * Inflate size bytes from src into dest, skipping any pages in
768  * [src..dest] that are special (see hibernate_inflate_skip)
769  *
770  * This function executes while using the resume-time stack
771  * and pmap, and therefore cannot use ddb/printf/etc. Doing so
772  * will likely hang or reset the machine since the console output buffer
773  * will be unmapped.
774  */
775 void
776 hibernate_inflate_region(union hibernate_info *hib, paddr_t dest,
777     paddr_t src, size_t size)
778 {
779 	int end_stream = 0, rle;
780 	struct hibernate_zlib_state *hibernate_state;
781 
782 	hibernate_state =
783 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
784 
785 	hibernate_state->hib_stream.next_in = (unsigned char *)src;
786 	hibernate_state->hib_stream.avail_in = size;
787 
788 	do {
789 		/*
790 		 * Is this a special page? If yes, redirect the
791 		 * inflate output to a scratch page (eg, discard it)
792 		 */
793 		if (hibernate_inflate_skip(hib, dest)) {
794 			hibernate_enter_resume_mapping(
795 			    HIBERNATE_INFLATE_PAGE,
796 			    HIBERNATE_INFLATE_PAGE, 0);
797 		} else {
798 			hibernate_enter_resume_mapping(
799 			    HIBERNATE_INFLATE_PAGE, dest, 0);
800 		}
801 
802 		hibernate_flush();
803 		end_stream = hibernate_inflate_page(&rle);
804 
805 		if (rle == 0)
806 			dest += PAGE_SIZE;
807 		else
808 			dest += (rle * PAGE_SIZE);
809 	} while (!end_stream);
810 }
811 
812 /*
813  * deflate from src into the I/O page, up to 'remaining' bytes
814  *
815  * Returns number of input bytes consumed, and may reset
816  * the 'remaining' parameter if not all the output space was consumed
817  * (this information is needed to know how much to write to disk
818  */
819 size_t
820 hibernate_deflate(union hibernate_info *hib, paddr_t src,
821     size_t *remaining)
822 {
823 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
824 	struct hibernate_zlib_state *hibernate_state;
825 
826 	hibernate_state =
827 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
828 
829 	/* Set up the stream for deflate */
830 	hibernate_state->hib_stream.next_in = (unsigned char *)src;
831 	hibernate_state->hib_stream.avail_in = PAGE_SIZE - (src & PAGE_MASK);
832 	hibernate_state->hib_stream.next_out =
833 		(unsigned char *)hibernate_io_page + (PAGE_SIZE - *remaining);
834 	hibernate_state->hib_stream.avail_out = *remaining;
835 
836 	/* Process next block of data */
837 	if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH) != Z_OK)
838 		panic("hibernate zlib deflate error");
839 
840 	/* Update pointers and return number of bytes consumed */
841 	*remaining = hibernate_state->hib_stream.avail_out;
842 	return (PAGE_SIZE - (src & PAGE_MASK)) -
843 	    hibernate_state->hib_stream.avail_in;
844 }
845 
846 /*
847  * Write the hibernation information specified in hiber_info
848  * to the location in swap previously calculated (last block of
849  * swap), called the "signature block".
850  */
851 int
852 hibernate_write_signature(union hibernate_info *hib)
853 {
854 	/* Write hibernate info to disk */
855 	return (hib->io_func(hib->dev, hib->sig_offset,
856 	    (vaddr_t)hib, DEV_BSIZE, HIB_W,
857 	    hib->io_page));
858 }
859 
860 /*
861  * Write the memory chunk table to the area in swap immediately
862  * preceding the signature block. The chunk table is stored
863  * in the piglet when this function is called.  Returns errno.
864  */
865 int
866 hibernate_write_chunktable(union hibernate_info *hib)
867 {
868 	vaddr_t hibernate_chunk_table_start;
869 	size_t hibernate_chunk_table_size;
870 	int i, err;
871 
872 	hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE;
873 
874 	hibernate_chunk_table_start = hib->piglet_va +
875 	    HIBERNATE_CHUNK_SIZE;
876 
877 	/* Write chunk table */
878 	for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS) {
879 		if ((err = hib->io_func(hib->dev,
880 		    hib->chunktable_offset + (i/DEV_BSIZE),
881 		    (vaddr_t)(hibernate_chunk_table_start + i),
882 		    MAXPHYS, HIB_W, hib->io_page))) {
883 			DPRINTF("chunktable write error: %d\n", err);
884 			return (err);
885 		}
886 	}
887 
888 	return (0);
889 }
890 
891 /*
892  * Write an empty hiber_info to the swap signature block, which is
893  * guaranteed to not match any valid hib.
894  */
895 int
896 hibernate_clear_signature(void)
897 {
898 	union hibernate_info blank_hiber_info;
899 	union hibernate_info hib;
900 
901 	/* Zero out a blank hiber_info */
902 	memset(&blank_hiber_info, 0, sizeof(union hibernate_info));
903 
904 	/* Get the signature block location */
905 	if (get_hibernate_info(&hib, 0))
906 		return (1);
907 
908 	/* Write (zeroed) hibernate info to disk */
909 	DPRINTF("clearing hibernate signature block location: %lld\n",
910 		hib.sig_offset);
911 	if (hibernate_block_io(&hib,
912 	    hib.sig_offset,
913 	    DEV_BSIZE, (vaddr_t)&blank_hiber_info, 1))
914 		printf("Warning: could not clear hibernate signature\n");
915 
916 	return (0);
917 }
918 
919 /*
920  * Compare two hibernate_infos to determine if they are the same (eg,
921  * we should be performing a hibernate resume on this machine.
922  * Not all fields are checked - just enough to verify that the machine
923  * has the same memory configuration and kernel as the one that
924  * wrote the signature previously.
925  */
926 int
927 hibernate_compare_signature(union hibernate_info *mine,
928     union hibernate_info *disk)
929 {
930 	u_int i;
931 
932 	if (mine->nranges != disk->nranges) {
933 		printf("unhibernate failed: memory layout changed\n");
934 		return (1);
935 	}
936 
937 	if (strcmp(mine->kernel_version, disk->kernel_version) != 0) {
938 		printf("unhibernate failed: original kernel changed\n");
939 		return (1);
940 	}
941 
942 	if (hibsum() != disk->kernel_sum) {
943 		printf("unhibernate failed: original kernel changed\n");
944 		return (1);
945 	}
946 
947 	for (i = 0; i < mine->nranges; i++) {
948 		if ((mine->ranges[i].base != disk->ranges[i].base) ||
949 		    (mine->ranges[i].end != disk->ranges[i].end) ) {
950 			DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n",
951 				i,
952 				(void *)mine->ranges[i].base,
953 				(void *)mine->ranges[i].end,
954 				(void *)disk->ranges[i].base,
955 				(void *)disk->ranges[i].end);
956 			printf("unhibernate failed: memory size changed\n");
957 			return (1);
958 		}
959 	}
960 
961 	return (0);
962 }
963 
964 /*
965  * Transfers xfer_size bytes between the hibernate device specified in
966  * hib_info at offset blkctr and the vaddr specified at dest.
967  *
968  * Separate offsets and pages are used to handle misaligned reads (reads
969  * that span a page boundary).
970  *
971  * blkctr specifies a relative offset (relative to the start of swap),
972  * not an absolute disk offset
973  *
974  */
975 int
976 hibernate_block_io(union hibernate_info *hib, daddr_t blkctr,
977     size_t xfer_size, vaddr_t dest, int iswrite)
978 {
979 	struct buf *bp;
980 	struct bdevsw *bdsw;
981 	int error;
982 
983 	bp = geteblk(xfer_size);
984 	bdsw = &bdevsw[major(hib->dev)];
985 
986 	error = (*bdsw->d_open)(hib->dev, FREAD, S_IFCHR, curproc);
987 	if (error) {
988 		printf("hibernate_block_io open failed\n");
989 		return (1);
990 	}
991 
992 	if (iswrite)
993 		bcopy((caddr_t)dest, bp->b_data, xfer_size);
994 
995 	bp->b_bcount = xfer_size;
996 	bp->b_blkno = blkctr;
997 	CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
998 	SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW);
999 	bp->b_dev = hib->dev;
1000 	(*bdsw->d_strategy)(bp);
1001 
1002 	error = biowait(bp);
1003 	if (error) {
1004 		printf("hib block_io biowait error %d blk %lld size %zu\n",
1005 			error, (long long)blkctr, xfer_size);
1006 		error = (*bdsw->d_close)(hib->dev, 0, S_IFCHR,
1007 		    curproc);
1008 		if (error)
1009 			printf("hibernate_block_io error close failed\n");
1010 		return (1);
1011 	}
1012 
1013 	error = (*bdsw->d_close)(hib->dev, FREAD, S_IFCHR, curproc);
1014 	if (error) {
1015 		printf("hibernate_block_io close failed\n");
1016 		return (1);
1017 	}
1018 
1019 	if (!iswrite)
1020 		bcopy(bp->b_data, (caddr_t)dest, xfer_size);
1021 
1022 	bp->b_flags |= B_INVAL;
1023 	brelse(bp);
1024 
1025 	return (0);
1026 }
1027 
1028 /*
1029  * Preserve one page worth of random data, generated from the resuming
1030  * kernel's arc4random. After resume, this preserved entropy can be used
1031  * to further improve the un-hibernated machine's entropy pool. This
1032  * random data is stored in the piglet, which is preserved across the
1033  * unpack operation, and is restored later in the resume process (see
1034  * hib_getentropy)
1035  */
1036 void
1037 hibernate_preserve_entropy(union hibernate_info *hib)
1038 {
1039 	void *entropy;
1040 
1041 	entropy = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
1042 
1043 	if (!entropy)
1044 		return;
1045 
1046 	pmap_activate(curproc);
1047 	pmap_kenter_pa((vaddr_t)entropy,
1048 	    (paddr_t)(hib->piglet_pa + (29 * PAGE_SIZE)),
1049 	    PROT_READ | PROT_WRITE);
1050 
1051 	arc4random_buf((void *)entropy, PAGE_SIZE);
1052 	pmap_kremove((vaddr_t)entropy, PAGE_SIZE);
1053 	km_free(entropy, PAGE_SIZE, &kv_any, &kp_none);
1054 }
1055 
1056 #ifndef NO_PROPOLICE
1057 vaddr_t
1058 hibernate_unprotect_ssp(void)
1059 {
1060 	struct kmem_dyn_mode kd_avoidalias;
1061 	vaddr_t va = trunc_page((vaddr_t)&__guard_local);
1062 	paddr_t pa;
1063 
1064 	pmap_extract(pmap_kernel(), va, &pa);
1065 
1066 	memset(&kd_avoidalias, 0, sizeof kd_avoidalias);
1067 	kd_avoidalias.kd_prefer = pa;
1068 	kd_avoidalias.kd_waitok = 1;
1069 	va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_avoidalias);
1070 	if (!va)
1071 		panic("hibernate_unprotect_ssp");
1072 
1073 	pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
1074 	pmap_update(pmap_kernel());
1075 
1076 	return va;
1077 }
1078 
1079 void
1080 hibernate_reprotect_ssp(vaddr_t va)
1081 {
1082 	pmap_kremove(va, PAGE_SIZE);
1083 	km_free((void *)va, PAGE_SIZE, &kv_any, &kp_none);
1084 }
1085 #endif /* NO_PROPOLICE */
1086 
1087 /*
1088  * Reads the signature block from swap, checks against the current machine's
1089  * information. If the information matches, perform a resume by reading the
1090  * saved image into the pig area, and unpacking.
1091  *
1092  * Must be called with interrupts enabled.
1093  */
1094 void
1095 hibernate_resume(void)
1096 {
1097 	union hibernate_info hib;
1098 	int s;
1099 #ifndef NO_PROPOLICE
1100 	vsize_t off = (vaddr_t)&__guard_local -
1101 	    trunc_page((vaddr_t)&__guard_local);
1102 	vaddr_t guard_va;
1103 #endif
1104 
1105 	/* Get current running machine's hibernate info */
1106 	memset(&hib, 0, sizeof(hib));
1107 	if (get_hibernate_info(&hib, 0)) {
1108 		DPRINTF("couldn't retrieve machine's hibernate info\n");
1109 		return;
1110 	}
1111 
1112 	/* Read hibernate info from disk */
1113 	s = splbio();
1114 
1115 	DPRINTF("reading hibernate signature block location: %lld\n",
1116 		hib.sig_offset);
1117 
1118 	if (hibernate_block_io(&hib,
1119 	    hib.sig_offset,
1120 	    DEV_BSIZE, (vaddr_t)&disk_hib, 0)) {
1121 		DPRINTF("error in hibernate read");
1122 		splx(s);
1123 		return;
1124 	}
1125 
1126 	/* Check magic number */
1127 	if (disk_hib.magic != HIBERNATE_MAGIC) {
1128 		DPRINTF("wrong magic number in hibernate signature: %x\n",
1129 			disk_hib.magic);
1130 		splx(s);
1131 		return;
1132 	}
1133 
1134 	/*
1135 	 * We (possibly) found a hibernate signature. Clear signature first,
1136 	 * to prevent accidental resume or endless resume cycles later.
1137 	 */
1138 	if (hibernate_clear_signature()) {
1139 		DPRINTF("error clearing hibernate signature block\n");
1140 		splx(s);
1141 		return;
1142 	}
1143 
1144 	/*
1145 	 * If on-disk and in-memory hibernate signatures match,
1146 	 * this means we should do a resume from hibernate.
1147 	 */
1148 	if (hibernate_compare_signature(&hib, &disk_hib)) {
1149 		DPRINTF("mismatched hibernate signature block\n");
1150 		splx(s);
1151 		return;
1152 	}
1153 
1154 #ifdef MULTIPROCESSOR
1155 	/* XXX - if we fail later, we may need to rehatch APs on some archs */
1156 	DPRINTF("hibernate: quiescing APs\n");
1157 	hibernate_quiesce_cpus();
1158 #endif /* MULTIPROCESSOR */
1159 
1160 	/* Read the image from disk into the image (pig) area */
1161 	if (hibernate_read_image(&disk_hib))
1162 		goto fail;
1163 
1164 	DPRINTF("hibernate: quiescing devices\n");
1165 	if (config_suspend_all(DVACT_QUIESCE) != 0)
1166 		goto fail;
1167 
1168 #ifndef NO_PROPOLICE
1169 	guard_va = hibernate_unprotect_ssp();
1170 #endif /* NO_PROPOLICE */
1171 
1172 	(void) splhigh();
1173 	hibernate_disable_intr_machdep();
1174 	cold = 1;
1175 
1176 	DPRINTF("hibernate: suspending devices\n");
1177 	if (config_suspend_all(DVACT_SUSPEND) != 0) {
1178 		cold = 0;
1179 		hibernate_enable_intr_machdep();
1180 #ifndef NO_PROPOLICE
1181 		hibernate_reprotect_ssp(guard_va);
1182 #endif /* ! NO_PROPOLICE */
1183 		goto fail;
1184 	}
1185 
1186 	hibernate_preserve_entropy(&disk_hib);
1187 
1188 	printf("Unpacking image...\n");
1189 
1190 	/* Switch stacks */
1191 	DPRINTF("hibernate: switching stacks\n");
1192 	hibernate_switch_stack_machdep();
1193 
1194 #ifndef NO_PROPOLICE
1195 	/* Start using suspended kernel's propolice guard */
1196 	*(long *)(guard_va + off) = disk_hib.guard;
1197 	hibernate_reprotect_ssp(guard_va);
1198 #endif /* ! NO_PROPOLICE */
1199 
1200 	/* Unpack and resume */
1201 	hibernate_unpack_image(&disk_hib);
1202 
1203 fail:
1204 	splx(s);
1205 	printf("\nUnable to resume hibernated image\n");
1206 }
1207 
1208 /*
1209  * Unpack image from pig area to original location by looping through the
1210  * list of output chunks in the order they should be restored (fchunks).
1211  *
1212  * Note that due to the stack smash protector and the fact that we have
1213  * switched stacks, it is not permitted to return from this function.
1214  */
1215 void
1216 hibernate_unpack_image(union hibernate_info *hib)
1217 {
1218 	struct hibernate_disk_chunk *chunks;
1219 	union hibernate_info local_hib;
1220 	paddr_t image_cur = global_pig_start;
1221 	short i, *fchunks;
1222 	char *pva;
1223 
1224 	/* Piglet will be identity mapped (VA == PA) */
1225 	pva = (char *)hib->piglet_pa;
1226 
1227 	fchunks = (short *)(pva + (4 * PAGE_SIZE));
1228 
1229 	chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE);
1230 
1231 	/* Can't use hiber_info that's passed in after this point */
1232 	bcopy(hib, &local_hib, sizeof(union hibernate_info));
1233 
1234 	/* VA == PA */
1235 	local_hib.piglet_va = local_hib.piglet_pa;
1236 
1237 	/*
1238 	 * Point of no return. Once we pass this point, only kernel code can
1239 	 * be accessed. No global variables or other kernel data structures
1240 	 * are guaranteed to be coherent after unpack starts.
1241 	 *
1242 	 * The image is now in high memory (pig area), we unpack from the pig
1243 	 * to the correct location in memory. We'll eventually end up copying
1244 	 * on top of ourself, but we are assured the kernel code here is the
1245 	 * same between the hibernated and resuming kernel, and we are running
1246 	 * on our own stack, so the overwrite is ok.
1247 	 */
1248 	DPRINTF("hibernate: activating alt. pagetable and starting unpack\n");
1249 	hibernate_activate_resume_pt_machdep();
1250 
1251 	for (i = 0; i < local_hib.chunk_ctr; i++) {
1252 		/* Reset zlib for inflate */
1253 		if (hibernate_zlib_reset(&local_hib, 0) != Z_OK)
1254 			panic("hibernate failed to reset zlib for inflate");
1255 
1256 		hibernate_process_chunk(&local_hib, &chunks[fchunks[i]],
1257 		    image_cur);
1258 
1259 		image_cur += chunks[fchunks[i]].compressed_size;
1260 
1261 	}
1262 
1263 	/*
1264 	 * Resume the loaded kernel by jumping to the MD resume vector.
1265 	 * We won't be returning from this call.
1266 	 */
1267 	hibernate_resume_machdep();
1268 }
1269 
1270 /*
1271  * Bounce a compressed image chunk to the piglet, entering mappings for the
1272  * copied pages as needed
1273  */
1274 void
1275 hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size)
1276 {
1277 	size_t ct, ofs;
1278 	paddr_t src = img_cur;
1279 	vaddr_t dest = piglet;
1280 
1281 	/* Copy first partial page */
1282 	ct = (PAGE_SIZE) - (src & PAGE_MASK);
1283 	ofs = (src & PAGE_MASK);
1284 
1285 	if (ct < PAGE_SIZE) {
1286 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE,
1287 			(src - ofs), 0);
1288 		hibernate_flush();
1289 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE + ofs), (caddr_t)dest, ct);
1290 		src += ct;
1291 		dest += ct;
1292 	}
1293 
1294 	/* Copy remaining pages */
1295 	while (src < size + img_cur) {
1296 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, src, 0);
1297 		hibernate_flush();
1298 		ct = PAGE_SIZE;
1299 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE), (caddr_t)dest, ct);
1300 		hibernate_flush();
1301 		src += ct;
1302 		dest += ct;
1303 	}
1304 }
1305 
1306 /*
1307  * Process a chunk by bouncing it to the piglet, followed by unpacking
1308  */
1309 void
1310 hibernate_process_chunk(union hibernate_info *hib,
1311     struct hibernate_disk_chunk *chunk, paddr_t img_cur)
1312 {
1313 	char *pva = (char *)hib->piglet_va;
1314 
1315 	hibernate_copy_chunk_to_piglet(img_cur,
1316 	 (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), chunk->compressed_size);
1317 	hibernate_inflate_region(hib, chunk->base,
1318 	    (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)),
1319 	    chunk->compressed_size);
1320 }
1321 
1322 /*
1323  * Calculate RLE component for 'inaddr'. Clamps to max RLE pages between
1324  * inaddr and range_end.
1325  */
1326 int
1327 hibernate_calc_rle(paddr_t inaddr, paddr_t range_end)
1328 {
1329 	int rle;
1330 
1331 	rle = uvm_page_rle(inaddr);
1332 	KASSERT(rle >= 0 && rle <= MAX_RLE);
1333 
1334 	/* Clamp RLE to range end */
1335 	if (rle > 0 && inaddr + (rle * PAGE_SIZE) > range_end)
1336 		rle = (range_end - inaddr) / PAGE_SIZE;
1337 
1338 	return (rle);
1339 }
1340 
1341 /*
1342  * Write the RLE byte for page at 'inaddr' to the output stream.
1343  * Returns the number of pages to be skipped at 'inaddr'.
1344  */
1345 int
1346 hibernate_write_rle(union hibernate_info *hib, paddr_t inaddr,
1347 	paddr_t range_end, daddr_t *blkctr,
1348 	size_t *out_remaining)
1349 {
1350 	int rle, err, *rleloc;
1351 	struct hibernate_zlib_state *hibernate_state;
1352 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1353 
1354 	hibernate_state =
1355 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1356 
1357 	rle = hibernate_calc_rle(inaddr, range_end);
1358 
1359 	rleloc = (int *)hibernate_rle_page + MAX_RLE - 1;
1360 	*rleloc = rle;
1361 
1362 	/* Deflate the RLE byte into the stream */
1363 	hibernate_deflate(hib, (paddr_t)rleloc, out_remaining);
1364 
1365 	/* Did we fill the output page? If so, flush to disk */
1366 	if (*out_remaining == 0) {
1367 		if ((err = hib->io_func(hib->dev, *blkctr + hib->image_offset,
1368 			(vaddr_t)hibernate_io_page, PAGE_SIZE, HIB_W,
1369 			hib->io_page))) {
1370 				DPRINTF("hib write error %d\n", err);
1371 				return (err);
1372 		}
1373 
1374 		*blkctr += PAGE_SIZE / DEV_BSIZE;
1375 		*out_remaining = PAGE_SIZE;
1376 
1377 		/* If we didn't deflate the entire RLE byte, finish it now */
1378 		if (hibernate_state->hib_stream.avail_in != 0)
1379 			hibernate_deflate(hib,
1380 				(vaddr_t)hibernate_state->hib_stream.next_in,
1381 				out_remaining);
1382 	}
1383 
1384 	return (rle);
1385 }
1386 
1387 /*
1388  * Write a compressed version of this machine's memory to disk, at the
1389  * precalculated swap offset:
1390  *
1391  * end of swap - signature block size - chunk table size - memory size
1392  *
1393  * The function begins by looping through each phys mem range, cutting each
1394  * one into MD sized chunks. These chunks are then compressed individually
1395  * and written out to disk, in phys mem order. Some chunks might compress
1396  * more than others, and for this reason, each chunk's size is recorded
1397  * in the chunk table, which is written to disk after the image has
1398  * properly been compressed and written (in hibernate_write_chunktable).
1399  *
1400  * When this function is called, the machine is nearly suspended - most
1401  * devices are quiesced/suspended, interrupts are off, and cold has
1402  * been set. This means that there can be no side effects once the
1403  * write has started, and the write function itself can also have no
1404  * side effects. This also means no printfs are permitted (since printf
1405  * has side effects.)
1406  *
1407  * Return values :
1408  *
1409  * 0      - success
1410  * EIO    - I/O error occurred writing the chunks
1411  * EINVAL - Failed to write a complete range
1412  * ENOMEM - Memory allocation failure during preparation of the zlib arena
1413  */
1414 int
1415 hibernate_write_chunks(union hibernate_info *hib)
1416 {
1417 	paddr_t range_base, range_end, inaddr, temp_inaddr;
1418 	size_t nblocks, out_remaining, used;
1419 	struct hibernate_disk_chunk *chunks;
1420 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1421 	daddr_t blkctr = 0;
1422 	int i, rle, err;
1423 	struct hibernate_zlib_state *hibernate_state;
1424 
1425 	hibernate_state =
1426 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1427 
1428 	hib->chunk_ctr = 0;
1429 
1430 	/*
1431 	 * Map the utility VAs to the piglet. See the piglet map at the
1432 	 * top of this file for piglet layout information.
1433 	 */
1434 	hibernate_copy_page = hib->piglet_va + 3 * PAGE_SIZE;
1435 	hibernate_rle_page = hib->piglet_va + 28 * PAGE_SIZE;
1436 
1437 	chunks = (struct hibernate_disk_chunk *)(hib->piglet_va +
1438 	    HIBERNATE_CHUNK_SIZE);
1439 
1440 	/* Calculate the chunk regions */
1441 	for (i = 0; i < hib->nranges; i++) {
1442 		range_base = hib->ranges[i].base;
1443 		range_end = hib->ranges[i].end;
1444 
1445 		inaddr = range_base;
1446 
1447 		while (inaddr < range_end) {
1448 			chunks[hib->chunk_ctr].base = inaddr;
1449 			if (inaddr + HIBERNATE_CHUNK_SIZE < range_end)
1450 				chunks[hib->chunk_ctr].end = inaddr +
1451 				    HIBERNATE_CHUNK_SIZE;
1452 			else
1453 				chunks[hib->chunk_ctr].end = range_end;
1454 
1455 			inaddr += HIBERNATE_CHUNK_SIZE;
1456 			hib->chunk_ctr ++;
1457 		}
1458 	}
1459 
1460 	uvm_pmr_dirty_everything();
1461 	uvm_pmr_zero_everything();
1462 
1463 	/* Compress and write the chunks in the chunktable */
1464 	for (i = 0; i < hib->chunk_ctr; i++) {
1465 		range_base = chunks[i].base;
1466 		range_end = chunks[i].end;
1467 
1468 		chunks[i].offset = blkctr + hib->image_offset;
1469 
1470 		/* Reset zlib for deflate */
1471 		if (hibernate_zlib_reset(hib, 1) != Z_OK) {
1472 			DPRINTF("hibernate_zlib_reset failed for deflate\n");
1473 			return (ENOMEM);
1474 		}
1475 
1476 		inaddr = range_base;
1477 
1478 		/*
1479 		 * For each range, loop through its phys mem region
1480 		 * and write out the chunks (the last chunk might be
1481 		 * smaller than the chunk size).
1482 		 */
1483 		while (inaddr < range_end) {
1484 			out_remaining = PAGE_SIZE;
1485 			while (out_remaining > 0 && inaddr < range_end) {
1486 				/*
1487 				 * Adjust for regions that are not evenly
1488 				 * divisible by PAGE_SIZE or overflowed
1489 				 * pages from the previous iteration.
1490 				 */
1491 				temp_inaddr = (inaddr & PAGE_MASK) +
1492 				    hibernate_copy_page;
1493 
1494 				/* Deflate from temp_inaddr to IO page */
1495 				if (inaddr != range_end) {
1496 					if (inaddr % PAGE_SIZE == 0) {
1497 						rle = hibernate_write_rle(hib,
1498 							inaddr,
1499 							range_end,
1500 							&blkctr,
1501 							&out_remaining);
1502 					}
1503 
1504 					if (rle == 0) {
1505 						pmap_kenter_pa(hibernate_temp_page,
1506 							inaddr & PMAP_PA_MASK,
1507 							PROT_READ);
1508 
1509 						bcopy((caddr_t)hibernate_temp_page,
1510 							(caddr_t)hibernate_copy_page,
1511 							PAGE_SIZE);
1512 						inaddr += hibernate_deflate(hib,
1513 							temp_inaddr,
1514 							&out_remaining);
1515 					} else {
1516 						inaddr += rle * PAGE_SIZE;
1517 						if (inaddr > range_end)
1518 							inaddr = range_end;
1519 					}
1520 
1521 				}
1522 
1523 				if (out_remaining == 0) {
1524 					/* Filled up the page */
1525 					nblocks = PAGE_SIZE / DEV_BSIZE;
1526 
1527 					if ((err = hib->io_func(hib->dev,
1528 					    blkctr + hib->image_offset,
1529 					    (vaddr_t)hibernate_io_page,
1530 					    PAGE_SIZE, HIB_W, hib->io_page))) {
1531 						DPRINTF("hib write error %d\n",
1532 						    err);
1533 						return (err);
1534 					}
1535 
1536 					blkctr += nblocks;
1537 				}
1538 			}
1539 		}
1540 
1541 		if (inaddr != range_end) {
1542 			DPRINTF("deflate range ended prematurely\n");
1543 			return (EINVAL);
1544 		}
1545 
1546 		/*
1547 		 * End of range. Round up to next secsize bytes
1548 		 * after finishing compress
1549 		 */
1550 		if (out_remaining == 0)
1551 			out_remaining = PAGE_SIZE;
1552 
1553 		/* Finish compress */
1554 		hibernate_state->hib_stream.next_in = (unsigned char *)inaddr;
1555 		hibernate_state->hib_stream.avail_in = 0;
1556 		hibernate_state->hib_stream.next_out =
1557 		    (unsigned char *)hibernate_io_page +
1558 			(PAGE_SIZE - out_remaining);
1559 
1560 		/* We have an extra output page available for finalize */
1561 		hibernate_state->hib_stream.avail_out =
1562 			out_remaining + PAGE_SIZE;
1563 
1564 		if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH)) !=
1565 		    Z_STREAM_END) {
1566 			DPRINTF("deflate error in output stream: %d\n", err);
1567 			return (err);
1568 		}
1569 
1570 		out_remaining = hibernate_state->hib_stream.avail_out;
1571 
1572 		used = 2 * PAGE_SIZE - out_remaining;
1573 		nblocks = used / DEV_BSIZE;
1574 
1575 		/* Round up to next block if needed */
1576 		if (used % DEV_BSIZE != 0)
1577 			nblocks ++;
1578 
1579 		/* Write final block(s) for this chunk */
1580 		if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset,
1581 		    (vaddr_t)hibernate_io_page, nblocks*DEV_BSIZE,
1582 		    HIB_W, hib->io_page))) {
1583 			DPRINTF("hib final write error %d\n", err);
1584 			return (err);
1585 		}
1586 
1587 		blkctr += nblocks;
1588 
1589 		chunks[i].compressed_size = (blkctr + hib->image_offset -
1590 		    chunks[i].offset) * DEV_BSIZE;
1591 	}
1592 
1593 	hib->chunktable_offset = hib->image_offset + blkctr;
1594 	return (0);
1595 }
1596 
1597 /*
1598  * Reset the zlib stream state and allocate a new hiballoc area for either
1599  * inflate or deflate. This function is called once for each hibernate chunk.
1600  * Calling hiballoc_init multiple times is acceptable since the memory it is
1601  * provided is unmanaged memory (stolen). We use the memory provided to us
1602  * by the piglet allocated via the supplied hib.
1603  */
1604 int
1605 hibernate_zlib_reset(union hibernate_info *hib, int deflate)
1606 {
1607 	vaddr_t hibernate_zlib_start;
1608 	size_t hibernate_zlib_size;
1609 	char *pva = (char *)hib->piglet_va;
1610 	struct hibernate_zlib_state *hibernate_state;
1611 
1612 	hibernate_state =
1613 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1614 
1615 	if (!deflate)
1616 		pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1617 
1618 	/*
1619 	 * See piglet layout information at the start of this file for
1620 	 * information on the zlib page assignments.
1621 	 */
1622 	hibernate_zlib_start = (vaddr_t)(pva + (30 * PAGE_SIZE));
1623 	hibernate_zlib_size = 80 * PAGE_SIZE;
1624 
1625 	memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size);
1626 	memset(hibernate_state, 0, PAGE_SIZE);
1627 
1628 	/* Set up stream structure */
1629 	hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc;
1630 	hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free;
1631 
1632 	/* Initialize the hiballoc arena for zlib allocs/frees */
1633 	hiballoc_init(&hibernate_state->hiballoc_arena,
1634 	    (caddr_t)hibernate_zlib_start, hibernate_zlib_size);
1635 
1636 	if (deflate) {
1637 		return deflateInit(&hibernate_state->hib_stream,
1638 		    Z_BEST_SPEED);
1639 	} else
1640 		return inflateInit(&hibernate_state->hib_stream);
1641 }
1642 
1643 /*
1644  * Reads the hibernated memory image from disk, whose location and
1645  * size are recorded in hib. Begin by reading the persisted
1646  * chunk table, which records the original chunk placement location
1647  * and compressed size for each. Next, allocate a pig region of
1648  * sufficient size to hold the compressed image. Next, read the
1649  * chunks into the pig area (calling hibernate_read_chunks to do this),
1650  * and finally, if all of the above succeeds, clear the hibernate signature.
1651  * The function will then return to hibernate_resume, which will proceed
1652  * to unpack the pig image to the correct place in memory.
1653  */
1654 int
1655 hibernate_read_image(union hibernate_info *hib)
1656 {
1657 	size_t compressed_size, disk_size, chunktable_size, pig_sz;
1658 	paddr_t image_start, image_end, pig_start, pig_end;
1659 	struct hibernate_disk_chunk *chunks;
1660 	daddr_t blkctr;
1661 	vaddr_t chunktable = (vaddr_t)NULL;
1662 	paddr_t piglet_chunktable = hib->piglet_pa +
1663 	    HIBERNATE_CHUNK_SIZE;
1664 	int i, status;
1665 
1666 	status = 0;
1667 	pmap_activate(curproc);
1668 
1669 	/* Calculate total chunk table size in disk blocks */
1670 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / DEV_BSIZE;
1671 
1672 	blkctr = hib->chunktable_offset;
1673 
1674 	chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any,
1675 	    &kp_none, &kd_nowait);
1676 
1677 	if (!chunktable)
1678 		return (1);
1679 
1680 	/* Map chunktable pages */
1681 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE)
1682 		pmap_kenter_pa(chunktable + i, piglet_chunktable + i,
1683 		    PROT_READ | PROT_WRITE);
1684 	pmap_update(pmap_kernel());
1685 
1686 	/* Read the chunktable from disk into the piglet chunktable */
1687 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE;
1688 	    i += MAXPHYS, blkctr += MAXPHYS/DEV_BSIZE)
1689 		hibernate_block_io(hib, blkctr, MAXPHYS,
1690 		    chunktable + i, 0);
1691 
1692 	blkctr = hib->image_offset;
1693 	compressed_size = 0;
1694 
1695 	chunks = (struct hibernate_disk_chunk *)chunktable;
1696 
1697 	for (i = 0; i < hib->chunk_ctr; i++)
1698 		compressed_size += chunks[i].compressed_size;
1699 
1700 	disk_size = compressed_size;
1701 
1702 	printf("unhibernating @ block %lld length %lu bytes\n",
1703 	    hib->sig_offset - chunktable_size,
1704 	    compressed_size);
1705 
1706 	/* Allocate the pig area */
1707 	pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE;
1708 	if (uvm_pmr_alloc_pig(&pig_start, pig_sz, hib->piglet_pa) == ENOMEM) {
1709 		status = 1;
1710 		goto unmap;
1711 	}
1712 
1713 	pig_end = pig_start + pig_sz;
1714 
1715 	/* Calculate image extents. Pig image must end on a chunk boundary. */
1716 	image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1);
1717 	image_start = image_end - disk_size;
1718 
1719 	hibernate_read_chunks(hib, image_start, image_end, disk_size,
1720 	    chunks);
1721 
1722 	/* Prepare the resume time pmap/page table */
1723 	hibernate_populate_resume_pt(hib, image_start, image_end);
1724 
1725 unmap:
1726 	/* Unmap chunktable pages */
1727 	pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE);
1728 	pmap_update(pmap_kernel());
1729 
1730 	return (status);
1731 }
1732 
1733 /*
1734  * Read the hibernated memory chunks from disk (chunk information at this
1735  * point is stored in the piglet) into the pig area specified by
1736  * [pig_start .. pig_end]. Order the chunks so that the final chunk is the
1737  * only chunk with overlap possibilities.
1738  */
1739 int
1740 hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start,
1741     paddr_t pig_end, size_t image_compr_size,
1742     struct hibernate_disk_chunk *chunks)
1743 {
1744 	paddr_t img_cur, piglet_base;
1745 	daddr_t blkctr;
1746 	size_t processed, compressed_size, read_size;
1747 	int nchunks, nfchunks, num_io_pages;
1748 	vaddr_t tempva, hibernate_fchunk_area;
1749 	short *fchunks, i, j;
1750 
1751 	tempva = (vaddr_t)NULL;
1752 	hibernate_fchunk_area = (vaddr_t)NULL;
1753 	nfchunks = 0;
1754 	piglet_base = hib->piglet_pa;
1755 	global_pig_start = pig_start;
1756 
1757 	/*
1758 	 * These mappings go into the resuming kernel's page table, and are
1759 	 * used only during image read. They dissappear from existence
1760 	 * when the suspended kernel is unpacked on top of us.
1761 	 */
1762 	tempva = (vaddr_t)km_alloc(MAXPHYS + PAGE_SIZE, &kv_any, &kp_none,
1763 		&kd_nowait);
1764 	if (!tempva)
1765 		return (1);
1766 	hibernate_fchunk_area = (vaddr_t)km_alloc(24 * PAGE_SIZE, &kv_any,
1767 	    &kp_none, &kd_nowait);
1768 	if (!hibernate_fchunk_area)
1769 		return (1);
1770 
1771 	/* Final output chunk ordering VA */
1772 	fchunks = (short *)hibernate_fchunk_area;
1773 
1774 	/* Map the chunk ordering region */
1775 	for(i = 0; i < 24 ; i++)
1776 		pmap_kenter_pa(hibernate_fchunk_area + (i * PAGE_SIZE),
1777 			piglet_base + ((4 + i) * PAGE_SIZE),
1778 			PROT_READ | PROT_WRITE);
1779 	pmap_update(pmap_kernel());
1780 
1781 	nchunks = hib->chunk_ctr;
1782 
1783 	/* Initially start all chunks as unplaced */
1784 	for (i = 0; i < nchunks; i++)
1785 		chunks[i].flags = 0;
1786 
1787 	/*
1788 	 * Search the list for chunks that are outside the pig area. These
1789 	 * can be placed first in the final output list.
1790 	 */
1791 	for (i = 0; i < nchunks; i++) {
1792 		if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) {
1793 			fchunks[nfchunks] = i;
1794 			nfchunks++;
1795 			chunks[i].flags |= HIBERNATE_CHUNK_PLACED;
1796 		}
1797 	}
1798 
1799 	/*
1800 	 * Walk the ordering, place the chunks in ascending memory order.
1801 	 */
1802 	for (i = 0; i < nchunks; i++) {
1803 		if (chunks[i].flags != HIBERNATE_CHUNK_PLACED) {
1804 			fchunks[nfchunks] = i;
1805 			nfchunks++;
1806 			chunks[i].flags = HIBERNATE_CHUNK_PLACED;
1807 		}
1808 	}
1809 
1810 	img_cur = pig_start;
1811 
1812 	for (i = 0; i < nfchunks; i++) {
1813 		blkctr = chunks[fchunks[i]].offset;
1814 		processed = 0;
1815 		compressed_size = chunks[fchunks[i]].compressed_size;
1816 
1817 		while (processed < compressed_size) {
1818 			if (compressed_size - processed >= MAXPHYS)
1819 				read_size = MAXPHYS;
1820 			else
1821 				read_size = compressed_size - processed;
1822 
1823 			/*
1824 			 * We're reading read_size bytes, offset from the
1825 			 * start of a page by img_cur % PAGE_SIZE, so the
1826 			 * end will be read_size + (img_cur % PAGE_SIZE)
1827 			 * from the start of the first page.  Round that
1828 			 * up to the next page size.
1829 			 */
1830 			num_io_pages = (read_size + (img_cur % PAGE_SIZE)
1831 				+ PAGE_SIZE - 1) / PAGE_SIZE;
1832 
1833 			KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1);
1834 
1835 			/* Map pages for this read */
1836 			for (j = 0; j < num_io_pages; j ++)
1837 				pmap_kenter_pa(tempva + j * PAGE_SIZE,
1838 				    img_cur + j * PAGE_SIZE,
1839 				    PROT_READ | PROT_WRITE);
1840 
1841 			pmap_update(pmap_kernel());
1842 
1843 			hibernate_block_io(hib, blkctr, read_size,
1844 			    tempva + (img_cur & PAGE_MASK), 0);
1845 
1846 			blkctr += (read_size / DEV_BSIZE);
1847 
1848 			pmap_kremove(tempva, num_io_pages * PAGE_SIZE);
1849 			pmap_update(pmap_kernel());
1850 
1851 			processed += read_size;
1852 			img_cur += read_size;
1853 		}
1854 	}
1855 
1856 	pmap_kremove(hibernate_fchunk_area, 24 * PAGE_SIZE);
1857 	pmap_update(pmap_kernel());
1858 
1859 	return (0);
1860 }
1861 
1862 /*
1863  * Hibernating a machine comprises the following operations:
1864  *  1. Calculating this machine's hibernate_info information
1865  *  2. Allocating a piglet and saving the piglet's physaddr
1866  *  3. Calculating the memory chunks
1867  *  4. Writing the compressed chunks to disk
1868  *  5. Writing the chunk table
1869  *  6. Writing the signature block (hibernate_info)
1870  *
1871  * On most architectures, the function calling hibernate_suspend would
1872  * then power off the machine using some MD-specific implementation.
1873  */
1874 int
1875 hibernate_suspend(void)
1876 {
1877 	union hibernate_info hib;
1878 	u_long start, end;
1879 
1880 	/*
1881 	 * Calculate memory ranges, swap offsets, etc.
1882 	 * This also allocates a piglet whose physaddr is stored in
1883 	 * hib->piglet_pa and vaddr stored in hib->piglet_va
1884 	 */
1885 	if (get_hibernate_info(&hib, 1)) {
1886 		DPRINTF("failed to obtain hibernate info\n");
1887 		return (1);
1888 	}
1889 
1890 	/* Find a page-addressed region in swap [start,end] */
1891 	if (uvm_hibswap(hib.dev, &start, &end)) {
1892 		printf("hibernate: cannot find any swap\n");
1893 		return (1);
1894 	}
1895 
1896 	if (end - start < 1000) {
1897 		printf("hibernate: insufficient swap (%lu is too small)\n",
1898 			end - start);
1899 		return (1);
1900 	}
1901 
1902 	/* Calculate block offsets in swap */
1903 	hib.image_offset = ctod(start);
1904 
1905 	DPRINTF("hibernate @ block %lld max-length %lu blocks\n",
1906 	    hib.image_offset, ctod(end) - ctod(start));
1907 
1908 	pmap_activate(curproc);
1909 	DPRINTF("hibernate: writing chunks\n");
1910 	if (hibernate_write_chunks(&hib)) {
1911 		DPRINTF("hibernate_write_chunks failed\n");
1912 		return (1);
1913 	}
1914 
1915 	DPRINTF("hibernate: writing chunktable\n");
1916 	if (hibernate_write_chunktable(&hib)) {
1917 		DPRINTF("hibernate_write_chunktable failed\n");
1918 		return (1);
1919 	}
1920 
1921 	DPRINTF("hibernate: writing signature\n");
1922 	if (hibernate_write_signature(&hib)) {
1923 		DPRINTF("hibernate_write_signature failed\n");
1924 		return (1);
1925 	}
1926 
1927 	/* Allow the disk to settle */
1928 	delay(500000);
1929 
1930 	/*
1931 	 * Give the device-specific I/O function a notification that we're
1932 	 * done, and that it can clean up or shutdown as needed.
1933 	 */
1934 	hib.io_func(hib.dev, 0, (vaddr_t)NULL, 0, HIB_DONE, hib.io_page);
1935 	return (0);
1936 }
1937 
1938 int
1939 hibernate_alloc(void)
1940 {
1941 	KASSERT(global_piglet_va == 0);
1942 	KASSERT(hibernate_temp_page == 0);
1943 
1944 	pmap_activate(curproc);
1945 	pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1946 	    PROT_READ | PROT_WRITE);
1947 
1948 	/* Allocate a piglet, store its addresses in the supplied globals */
1949 	if (uvm_pmr_alloc_piglet(&global_piglet_va, &global_piglet_pa,
1950 	    HIBERNATE_CHUNK_SIZE * 4, HIBERNATE_CHUNK_SIZE))
1951 		goto unmap;
1952 
1953 	/*
1954 	 * Allocate VA for the temp page.
1955 	 *
1956 	 * This will become part of the suspended kernel and will
1957 	 * be freed in hibernate_free, upon resume (or hibernate
1958 	 * failure)
1959 	 */
1960 	hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
1961 	    &kp_none, &kd_nowait);
1962 	if (!hibernate_temp_page) {
1963 		uvm_pmr_free_piglet(global_piglet_va,
1964 		    4 * HIBERNATE_CHUNK_SIZE);
1965 		global_piglet_va = 0;
1966 		goto unmap;
1967 	}
1968 	return (0);
1969 unmap:
1970 	pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE);
1971 	pmap_update(pmap_kernel());
1972 	return (ENOMEM);
1973 }
1974 
1975 /*
1976  * Free items allocated by hibernate_alloc()
1977  */
1978 void
1979 hibernate_free(void)
1980 {
1981 	pmap_activate(curproc);
1982 
1983 	if (global_piglet_va)
1984 		uvm_pmr_free_piglet(global_piglet_va,
1985 		    4 * HIBERNATE_CHUNK_SIZE);
1986 
1987 	if (hibernate_temp_page) {
1988 		pmap_kremove(hibernate_temp_page, PAGE_SIZE);
1989 		km_free((void *)hibernate_temp_page, PAGE_SIZE,
1990 		    &kv_any, &kp_none);
1991 	}
1992 
1993 	global_piglet_va = 0;
1994 	hibernate_temp_page = 0;
1995 	pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE);
1996 	pmap_update(pmap_kernel());
1997 }
1998