xref: /openbsd-src/sys/kern/subr_hibernate.c (revision 21e3fba64eb608323ab8fd390e00c24330538f6a)
1 /*	$OpenBSD: subr_hibernate.c,v 1.13 2011/07/11 03:30:32 mlarkin Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/hibernate.h>
20 #include <sys/malloc.h>
21 #include <sys/param.h>
22 #include <sys/tree.h>
23 #include <sys/types.h>
24 #include <sys/systm.h>
25 #include <sys/disklabel.h>
26 #include <sys/conf.h>
27 #include <sys/buf.h>
28 #include <sys/fcntl.h>
29 #include <sys/stat.h>
30 #include <uvm/uvm.h>
31 #include <machine/hibernate.h>
32 
33 extern char *disk_readlabel(struct disklabel *, dev_t, char *, size_t);
34 
35 struct hibernate_state *hibernate_state;
36 
37 /*
38  * Hib alloc enforced alignment.
39  */
40 #define HIB_ALIGN		8 /* bytes alignment */
41 
42 /*
43  * sizeof builtin operation, but with alignment constraint.
44  */
45 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
46 
47 struct hiballoc_entry
48 {
49 	size_t			hibe_use;
50 	size_t			hibe_space;
51 	RB_ENTRY(hiballoc_entry) hibe_entry;
52 };
53 
54 /*
55  * Compare hiballoc entries based on the address they manage.
56  *
57  * Since the address is fixed, relative to struct hiballoc_entry,
58  * we just compare the hiballoc_entry pointers.
59  */
60 static __inline int
61 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r)
62 {
63 	return l < r ? -1 : (l > r);
64 }
65 
66 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
67 
68 /*
69  * Given a hiballoc entry, return the address it manages.
70  */
71 static __inline void*
72 hib_entry_to_addr(struct hiballoc_entry *entry)
73 {
74 	caddr_t addr;
75 
76 	addr = (caddr_t)entry;
77 	addr += HIB_SIZEOF(struct hiballoc_entry);
78 	return addr;
79 }
80 
81 /*
82  * Given an address, find the hiballoc that corresponds.
83  */
84 static __inline struct hiballoc_entry*
85 hib_addr_to_entry(void* addr_param)
86 {
87 	caddr_t addr;
88 
89 	addr = (caddr_t)addr_param;
90 	addr -= HIB_SIZEOF(struct hiballoc_entry);
91 	return (struct hiballoc_entry*)addr;
92 }
93 
94 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
95 
96 /*
97  * Allocate memory from the arena.
98  *
99  * Returns NULL if no memory is available.
100  */
101 void*
102 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
103 {
104 	struct hiballoc_entry *entry, *new_entry;
105 	size_t find_sz;
106 
107 	/*
108 	 * Enforce alignment of HIB_ALIGN bytes.
109 	 *
110 	 * Note that, because the entry is put in front of the allocation,
111 	 * 0-byte allocations are guaranteed a unique address.
112 	 */
113 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
114 
115 	/*
116 	 * Find an entry with hibe_space >= find_sz.
117 	 *
118 	 * If the root node is not large enough, we switch to tree traversal.
119 	 * Because all entries are made at the bottom of the free space,
120 	 * traversal from the end has a slightly better chance of yielding
121 	 * a sufficiently large space.
122 	 */
123 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
124 	entry = RB_ROOT(&arena->hib_addrs);
125 	if (entry != NULL && entry->hibe_space < find_sz) {
126 		RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
127 			if (entry->hibe_space >= find_sz)
128 				break;
129 		}
130 	}
131 
132 	/*
133 	 * Insufficient or too fragmented memory.
134 	 */
135 	if (entry == NULL)
136 		return NULL;
137 
138 	/*
139 	 * Create new entry in allocated space.
140 	 */
141 	new_entry = (struct hiballoc_entry*)(
142 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
143 	new_entry->hibe_space = entry->hibe_space - find_sz;
144 	new_entry->hibe_use = alloc_sz;
145 
146 	/*
147 	 * Insert entry.
148 	 */
149 	if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
150 		panic("hib_alloc: insert failure");
151 	entry->hibe_space = 0;
152 
153 	/* Return address managed by entry. */
154 	return hib_entry_to_addr(new_entry);
155 }
156 
157 /*
158  * Free a pointer previously allocated from this arena.
159  *
160  * If addr is NULL, this will be silently accepted.
161  */
162 void
163 hib_free(struct hiballoc_arena *arena, void *addr)
164 {
165 	struct hiballoc_entry *entry, *prev;
166 
167 	if (addr == NULL)
168 		return;
169 
170 	/*
171 	 * Derive entry from addr and check it is really in this arena.
172 	 */
173 	entry = hib_addr_to_entry(addr);
174 	if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
175 		panic("hib_free: freed item %p not in hib arena", addr);
176 
177 	/*
178 	 * Give the space in entry to its predecessor.
179 	 *
180 	 * If entry has no predecessor, change its used space into free space
181 	 * instead.
182 	 */
183 	prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry);
184 	if (prev != NULL &&
185 	    (void*)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
186 	    prev->hibe_use + prev->hibe_space) == entry) {
187 		/* Merge entry. */
188 		RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
189 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
190 		    entry->hibe_use + entry->hibe_space;
191 	} else {
192 	  	/* Flip used memory to free space. */
193 		entry->hibe_space += entry->hibe_use;
194 		entry->hibe_use = 0;
195 	}
196 }
197 
198 /*
199  * Initialize hiballoc.
200  *
201  * The allocator will manage memmory at ptr, which is len bytes.
202  */
203 int
204 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
205 {
206 	struct hiballoc_entry *entry;
207 	caddr_t ptr;
208 	size_t len;
209 
210 	RB_INIT(&arena->hib_addrs);
211 
212 	/*
213 	 * Hib allocator enforces HIB_ALIGN alignment.
214 	 * Fixup ptr and len.
215 	 */
216 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
217 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
218 	len &= ~((size_t)HIB_ALIGN - 1);
219 
220 	/*
221 	 * Insufficient memory to be able to allocate and also do bookkeeping.
222 	 */
223 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
224 		return ENOMEM;
225 
226 	/*
227 	 * Create entry describing space.
228 	 */
229 	entry = (struct hiballoc_entry*)ptr;
230 	entry->hibe_use = 0;
231 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
232 	RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
233 
234 	return 0;
235 }
236 
237 
238 /*
239  * Zero all free memory.
240  */
241 void
242 uvm_pmr_zero_everything(void)
243 {
244 	struct uvm_pmemrange	*pmr;
245 	struct vm_page		*pg;
246 	int			 i;
247 
248 	uvm_lock_fpageq();
249 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
250 		/* Zero single pages. */
251 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
252 		    != NULL) {
253 			uvm_pmr_remove(pmr, pg);
254 			uvm_pagezero(pg);
255 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
256 			uvmexp.zeropages++;
257 			uvm_pmr_insert(pmr, pg, 0);
258 		}
259 
260 		/* Zero multi page ranges. */
261 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY]))
262 		    != NULL) {
263 			pg--; /* Size tree always has second page. */
264 			uvm_pmr_remove(pmr, pg);
265 			for (i = 0; i < pg->fpgsz; i++) {
266 				uvm_pagezero(&pg[i]);
267 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
268 				uvmexp.zeropages++;
269 			}
270 			uvm_pmr_insert(pmr, pg, 0);
271 		}
272 	}
273 	uvm_unlock_fpageq();
274 }
275 
276 /*
277  * Mark all memory as dirty.
278  *
279  * Used to inform the system that the clean memory isn't clean for some
280  * reason, for example because we just came back from hibernate.
281  */
282 void
283 uvm_pmr_dirty_everything(void)
284 {
285 	struct uvm_pmemrange	*pmr;
286 	struct vm_page		*pg;
287 	int			 i;
288 
289 	uvm_lock_fpageq();
290 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
291 		/* Dirty single pages. */
292 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
293 		    != NULL) {
294 			uvm_pmr_remove(pmr, pg);
295 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
296 			uvm_pmr_insert(pmr, pg, 0);
297 		}
298 
299 		/* Dirty multi page ranges. */
300 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_ZERO]))
301 		    != NULL) {
302 			pg--; /* Size tree always has second page. */
303 			uvm_pmr_remove(pmr, pg);
304 			for (i = 0; i < pg->fpgsz; i++)
305 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
306 			uvm_pmr_insert(pmr, pg, 0);
307 		}
308 	}
309 
310 	uvmexp.zeropages = 0;
311 	uvm_unlock_fpageq();
312 }
313 
314 /*
315  * Allocate the highest address that can hold sz.
316  *
317  * sz in bytes.
318  */
319 int
320 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz)
321 {
322 	struct uvm_pmemrange	*pmr;
323 	struct vm_page		*pig_pg, *pg;
324 
325 	/*
326 	 * Convert sz to pages, since that is what pmemrange uses internally.
327 	 */
328 	sz = atop(round_page(sz));
329 
330 	uvm_lock_fpageq();
331 
332 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
333 		RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) {
334 			if (pig_pg->fpgsz >= sz) {
335 				goto found;
336 			}
337 		}
338 	}
339 
340 	/*
341 	 * Allocation failure.
342 	 */
343 	uvm_unlock_pageq();
344 	return ENOMEM;
345 
346 found:
347 	/* Remove page from freelist. */
348 	uvm_pmr_remove_size(pmr, pig_pg);
349 	pig_pg->fpgsz -= sz;
350 	pg = pig_pg + pig_pg->fpgsz;
351 	if (pig_pg->fpgsz == 0)
352 		uvm_pmr_remove_addr(pmr, pig_pg);
353 	else
354 		uvm_pmr_insert_size(pmr, pig_pg);
355 
356 	uvmexp.free -= sz;
357 	*addr = VM_PAGE_TO_PHYS(pg);
358 
359 	/*
360 	 * Update pg flags.
361 	 *
362 	 * Note that we trash the sz argument now.
363 	 */
364 	while (sz > 0) {
365 		KASSERT(pg->pg_flags & PQ_FREE);
366 
367 		atomic_clearbits_int(&pg->pg_flags,
368 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
369 
370 		if (pg->pg_flags & PG_ZERO)
371 			uvmexp.zeropages -= sz;
372 		atomic_clearbits_int(&pg->pg_flags,
373 		    PG_ZERO|PQ_FREE);
374 
375 		pg->uobject = NULL;
376 		pg->uanon = NULL;
377 		pg->pg_version++;
378 
379 		/*
380 		 * Next.
381 		 */
382 		pg++;
383 		sz--;
384 	}
385 
386 	/* Return. */
387 	uvm_unlock_fpageq();
388 	return 0;
389 }
390 
391 /*
392  * Allocate a piglet area.
393  *
394  * This is as low as possible.
395  * Piglets are aligned.
396  *
397  * sz and align in bytes.
398  *
399  * The call will sleep for the pagedaemon to attempt to free memory.
400  * The pagedaemon may decide its not possible to free enough memory, causing
401  * the allocation to fail.
402  */
403 int
404 uvm_pmr_alloc_piglet(paddr_t *addr, psize_t sz, paddr_t align)
405 {
406 	vaddr_t			 pg_addr, piglet_addr;
407 	struct uvm_pmemrange	*pmr;
408 	struct vm_page		*pig_pg, *pg;
409 	struct pglist		 pageq;
410 	int			 pdaemon_woken;
411 
412 	KASSERT((align & (align - 1)) == 0);
413 	pdaemon_woken = 0; /* Didn't wake the pagedaemon. */
414 
415 	/*
416 	 * Fixup arguments: align must be at least PAGE_SIZE,
417 	 * sz will be converted to pagecount, since that is what
418 	 * pmemrange uses internally.
419 	 */
420 	if (align < PAGE_SIZE)
421 		align = PAGE_SIZE;
422 	sz = atop(round_page(sz));
423 
424 	uvm_lock_fpageq();
425 
426 	TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use,
427 	    pmr_use) {
428 retry:
429 		/*
430 		 * Search for a range with enough space.
431 		 * Use the address tree, to ensure the range is as low as
432 		 * possible.
433 		 */
434 		RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) {
435 			pg_addr = VM_PAGE_TO_PHYS(pig_pg);
436 			piglet_addr = (pg_addr + (align - 1)) & ~(align - 1);
437 
438 			if (pig_pg->fpgsz >= sz) {
439 				goto found;
440 			}
441 
442 			if (atop(pg_addr) + pig_pg->fpgsz >
443 			    atop(piglet_addr) + sz) {
444 				goto found;
445 			}
446 		}
447 
448 		/*
449 		 * Try to coerse the pagedaemon into freeing memory
450 		 * for the piglet.
451 		 *
452 		 * pdaemon_woken is set to prevent the code from
453 		 * falling into an endless loop.
454 		 */
455 		if (!pdaemon_woken) {
456 			pdaemon_woken = 1;
457 			if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1,
458 			    ptoa(sz), UVM_PLA_FAILOK) == 0)
459 				goto retry;
460 		}
461 	}
462 
463 	/* Return failure. */
464 	uvm_unlock_fpageq();
465 	return ENOMEM;
466 
467 found:
468 	/*
469 	 * Extract piglet from pigpen.
470 	 */
471 	TAILQ_INIT(&pageq);
472 	uvm_pmr_extract_range(pmr, pig_pg,
473 	    atop(piglet_addr), atop(piglet_addr) + sz, &pageq);
474 
475 	*addr = piglet_addr;
476 	uvmexp.free -= sz;
477 
478 	/*
479 	 * Update pg flags.
480 	 *
481 	 * Note that we trash the sz argument now.
482 	 */
483 	TAILQ_FOREACH(pg, &pageq, pageq) {
484 		KASSERT(pg->pg_flags & PQ_FREE);
485 
486 		atomic_clearbits_int(&pg->pg_flags,
487 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
488 
489 		if (pg->pg_flags & PG_ZERO)
490 			uvmexp.zeropages--;
491 		atomic_clearbits_int(&pg->pg_flags,
492 		    PG_ZERO|PQ_FREE);
493 
494 		pg->uobject = NULL;
495 		pg->uanon = NULL;
496 		pg->pg_version++;
497 	}
498 
499 	uvm_unlock_fpageq();
500 	return 0;
501 }
502 
503 /*
504  * Physmem RLE compression support.
505  *
506  * Given a physical page address, it will return the number of pages
507  * starting at the address, that are free.
508  * Returns 0 if the page at addr is not free.
509  */
510 psize_t
511 uvm_page_rle(paddr_t addr)
512 {
513 	struct vm_page		*pg, *pg_end;
514 	struct vm_physseg	*vmp;
515 	int			 pseg_idx, off_idx;
516 
517 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
518 	if (pseg_idx == -1)
519 		return 0;
520 
521 	vmp = &vm_physmem[pseg_idx];
522 	pg = &vmp->pgs[off_idx];
523 	if (!(pg->pg_flags & PQ_FREE))
524 		return 0;
525 
526 	/*
527 	 * Search for the first non-free page after pg.
528 	 * Note that the page may not be the first page in a free pmemrange,
529 	 * therefore pg->fpgsz cannot be used.
530 	 */
531 	for (pg_end = pg; pg_end <= vmp->lastpg &&
532 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++);
533 	return pg_end - pg;
534 }
535 
536 /*
537  * get_hibernate_info
538  *
539  * Fills out the hibernate_info union pointed to by hiber_info
540  * with information about this machine (swap signature block
541  * offsets, number of memory ranges, kernel in use, etc)
542  *
543  */
544 int
545 get_hibernate_info(union hibernate_info *hiber_info)
546 {
547 	int chunktable_size;
548 	struct disklabel dl;
549 	char err_string[128], *dl_ret;
550 
551 	/* Determine I/O function to use */
552 	hiber_info->io_func = get_hibernate_io_function();
553 	if (hiber_info->io_func == NULL)
554 		return (1);
555 
556 	/* Calculate hibernate device */
557 	hiber_info->device = swdevt[0].sw_dev;
558 
559 	/* Read disklabel (used to calculate signature and image offsets) */
560 	dl_ret = disk_readlabel(&dl, hiber_info->device, err_string, 128);
561 
562 	if (dl_ret) {
563 		printf("Hibernate error reading disklabel: %s\n", dl_ret);
564 		return (1);
565 	}
566 
567 	hiber_info->secsize = dl.d_secsize;
568 
569 	/* Make sure the signature can fit in one block */
570 	KASSERT(sizeof(union hibernate_info)/hiber_info->secsize == 1);
571 
572 	/* Calculate swap offset from start of disk */
573 	hiber_info->swap_offset = dl.d_partitions[1].p_offset;
574 
575 	/* Calculate signature block location */
576 	hiber_info->sig_offset = dl.d_partitions[1].p_offset +
577 		dl.d_partitions[1].p_size -
578 		sizeof(union hibernate_info)/hiber_info->secsize;
579 
580 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize;
581 
582 	/* Calculate memory image location */
583 	hiber_info->image_offset = dl.d_partitions[1].p_offset +
584 		dl.d_partitions[1].p_size -
585 		(hiber_info->image_size / hiber_info->secsize) -
586 		sizeof(union hibernate_info)/hiber_info->secsize -
587 		chunktable_size;
588 
589 	/* Stash kernel version information */
590 	bzero(&hiber_info->kernel_version, 128);
591 	bcopy(version, &hiber_info->kernel_version,
592 		min(strlen(version), sizeof(hiber_info->kernel_version)-1));
593 
594 	/* Allocate piglet region */
595 	if (uvm_pmr_alloc_piglet(&hiber_info->piglet_base, HIBERNATE_CHUNK_SIZE,
596 		HIBERNATE_CHUNK_SIZE)) {
597 		printf("Hibernate failed to allocate the piglet\n");
598 		return (1);
599 	}
600 
601 	return get_hibernate_info_md(hiber_info);
602 }
603 
604 /*
605  * hibernate_zlib_alloc
606  *
607  * Allocate nitems*size bytes from the hiballoc area presently in use
608  *
609  */
610 void
611 *hibernate_zlib_alloc(void *unused, int nitems, int size)
612 {
613 	return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size);
614 }
615 
616 /*
617  * hibernate_zlib_free
618  *
619  * Free the memory pointed to by addr in the hiballoc area presently in
620  * use
621  *
622  */
623 void
624 hibernate_zlib_free(void *unused, void *addr)
625 {
626 	hib_free(&hibernate_state->hiballoc_arena, addr);
627 }
628 
629 /*
630  * hibernate_inflate
631  *
632  * Inflate size bytes from src into dest, skipping any pages in
633  * [src..dest] that are special (see hibernate_inflate_skip)
634  *
635  * For each page of output data, we map HIBERNATE_TEMP_PAGE
636  * to the current output page, and tell inflate() to inflate
637  * its data there, resulting in the inflated data being placed
638  * at the proper paddr.
639  *
640  * This function executes while using the resume-time stack
641  * and pmap, and therefore cannot use ddb/printf/etc. Doing so
642  * will likely hang or reset the machine.
643  *
644  */
645 void
646 hibernate_inflate(paddr_t dest, paddr_t src, size_t size)
647 {
648 	int i;
649 
650 	hibernate_state->hib_stream.avail_in = size;
651 	hibernate_state->hib_stream.next_in = (char *)src;
652 
653 	do {
654 		/* Flush cache and TLB */
655 		hibernate_flush();
656 
657 		/*
658 		 * Is this a special page? If yes, redirect the
659 		 * inflate output to a scratch page (eg, discard it)
660 		 */
661 		if (hibernate_inflate_skip(dest))
662 			hibernate_enter_resume_mapping(HIBERNATE_TEMP_PAGE,
663 				HIBERNATE_TEMP_PAGE, 0);
664 		else
665 			hibernate_enter_resume_mapping(HIBERNATE_TEMP_PAGE,
666 				dest, 0);
667 
668 		/* Set up the stream for inflate */
669 		hibernate_state->hib_stream.avail_out = PAGE_SIZE;
670 		hibernate_state->hib_stream.next_out =
671 			(char *)HIBERNATE_TEMP_PAGE;
672 
673 		/* Process next block of data */
674 		i = inflate(&hibernate_state->hib_stream, Z_PARTIAL_FLUSH);
675 		if (i != Z_OK && i != Z_STREAM_END) {
676 			/*
677 			 * XXX - this will likely reboot/hang most machines,
678 			 *       but there's not much else we can do here.
679 			 */
680 			panic("inflate error");
681 		}
682 
683 		dest += PAGE_SIZE - hibernate_state->hib_stream.avail_out;
684 	} while (i != Z_STREAM_END);
685 }
686 
687 /*
688  * hibernate_deflate
689  *
690  * deflate from src into the I/O page, up to 'remaining' bytes
691  *
692  * Returns number of input bytes consumed, and may reset
693  * the 'remaining' parameter if not all the output space was consumed
694  * (this information is needed to know how much to write to disk
695  *
696  */
697 size_t
698 hibernate_deflate(paddr_t src, size_t *remaining)
699 {
700 	/* Set up the stream for deflate */
701 	hibernate_state->hib_stream.avail_in = PAGE_SIZE -
702 		(src & PAGE_MASK);
703 	hibernate_state->hib_stream.avail_out = *remaining;
704 	hibernate_state->hib_stream.next_in = (caddr_t)src;
705 	hibernate_state->hib_stream.next_out = (caddr_t)HIBERNATE_IO_PAGE +
706 		(PAGE_SIZE - *remaining);
707 
708 	/* Process next block of data */
709 	if (deflate(&hibernate_state->hib_stream, Z_PARTIAL_FLUSH) != Z_OK)
710 		panic("hibernate zlib deflate error\n");
711 
712 	/* Update pointers and return number of bytes consumed */
713 	*remaining = hibernate_state->hib_stream.avail_out;
714 	return (PAGE_SIZE - (src & PAGE_MASK)) -
715 		hibernate_state->hib_stream.avail_in;
716 }
717 
718 /*
719  * hibernate_write_signature
720  *
721  * Write the hibernation information specified in hiber_info
722  * to the location in swap previously calculated (last block of
723  * swap), called the "signature block".
724  *
725  * Write the memory chunk table to the area in swap immediately
726  * preceding the signature block.
727  */
728 int
729 hibernate_write_signature(union hibernate_info *hiber_info)
730 {
731 	u_int8_t *io_page;
732 	daddr_t chunkbase;
733 	size_t i;
734 
735 	io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
736 	if (!io_page)
737 		return (1);
738 
739 	/* Write hibernate info to disk */
740 	if( hiber_info->io_func(hiber_info->device, hiber_info->sig_offset,
741 		(vaddr_t)hiber_info, hiber_info->secsize, 1, io_page))
742 			panic("error in hibernate write sig\n");
743 
744 	chunkbase = hiber_info->sig_offset -
745 		    (HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize);
746 
747 	/* Write chunk table */
748 	for(i=0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += NBPG) {
749 		if(hiber_info->io_func(hiber_info->device,
750 			chunkbase + (i/hiber_info->secsize),
751 			(vaddr_t)(HIBERNATE_CHUNK_TABLE_START + i),
752 			NBPG,
753 			1,
754 			io_page))
755 				panic("error in hibernate write chunks\n");
756 	}
757 
758 	free(io_page, M_DEVBUF);
759 
760 	return (0);
761 }
762 
763 /*
764  * hibernate_clear_signature
765  *
766  * Write an empty hiber_info to the swap signature block, which is
767  * guaranteed to not match any valid hiber_info.
768  */
769 int
770 hibernate_clear_signature()
771 {
772 	union hibernate_info blank_hiber_info;
773 	union hibernate_info hiber_info;
774 	u_int8_t *io_page;
775 
776 	/* Zero out a blank hiber_info */
777 	bzero(&blank_hiber_info, sizeof(hiber_info));
778 
779 	if (get_hibernate_info(&hiber_info))
780 		return (1);
781 
782 	io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
783 	if (!io_page)
784 		return (1);
785 
786 	/* Write (zeroed) hibernate info to disk */
787 	if(hiber_info.io_func(hiber_info.device, hiber_info.sig_offset,
788 		(vaddr_t)&blank_hiber_info, hiber_info.secsize, 1, io_page))
789 			panic("error hibernate write 6\n");
790 
791 	free(io_page, M_DEVBUF);
792 
793 	return (0);
794 }
795 
796 /*
797  * hibernate_check_overlap
798  *
799  * Check chunk range overlap when calculating whether or not to copy a
800  * compressed chunk to the piglet area before decompressing.
801  *
802  * returns zero if the ranges do not overlap, non-zero otherwise.
803  */
804 int
805 hibernate_check_overlap(paddr_t r1s, paddr_t r1e, paddr_t r2s, paddr_t r2e)
806 {
807 	/* case A : end of r1 overlaps start of r2 */
808 	if (r1s < r2s && r1e > r2s)
809 		return (1);
810 
811 	/* case B : r1 entirely inside r2 */
812 	if (r1s >= r2s && r1e <= r2e)
813 		return (1);
814 
815 	/* case C : r2 entirely inside r1 */
816 	if (r2s >= r1s && r2e <= r1e)
817 		return (1);
818 
819 	/* case D : end of r2 overlaps start of r1 */
820 	if (r2s < r1s && r2e > r1s)
821 		return (1);
822 
823 	return (0);
824 }
825 
826 /*
827  * hibernate_compare_signature
828  *
829  * Compare two hibernate_infos to determine if they are the same (eg,
830  * we should be performing a hibernate resume on this machine.
831  * Not all fields are checked - just enough to verify that the machine
832  * has the same memory configuration and kernel as the one that
833  * wrote the signature previously.
834  */
835 int
836 hibernate_compare_signature(union hibernate_info *mine,
837 	union hibernate_info *disk)
838 {
839 	u_int i;
840 
841 	if (mine->nranges != disk->nranges)
842 		return (1);
843 
844 	if (strcmp(mine->kernel_version, disk->kernel_version) != 0)
845 		return (1);
846 
847 	for (i=0; i< mine->nranges; i++) {
848 		if ((mine->ranges[i].base != disk->ranges[i].base) ||
849 			(mine->ranges[i].end != disk->ranges[i].end) )
850 		return (1);
851 	}
852 
853 	return (0);
854 }
855 
856 /*
857  * hibernate_read_block
858  *
859  * Reads read_size blocks from the hibernate device specified in
860  * hib_info at offset blkctr. Output is placed into the vaddr specified
861  * at dest.
862  *
863  * Separate offsets and pages are used to handle misaligned reads (reads
864  * that span a page boundary).
865  *
866  * blkctr specifies a relative offset (relative to the start of swap),
867  * not an absolute disk offset
868  *
869  */
870 int
871 hibernate_read_block(union hibernate_info *hib_info, daddr_t blkctr,
872 	size_t read_size, vaddr_t dest)
873 {
874 	struct buf *bp;
875 	struct bdevsw *bdsw;
876 	int error;
877 
878 	bp = geteblk(read_size);
879 	bdsw = &bdevsw[major(hib_info->device)];
880 
881 	error = (*bdsw->d_open)(hib_info->device, FREAD, S_IFCHR, curproc);
882 	if (error) {
883 		printf("hibernate_read_block open failed\n");
884 		return (1);
885 	}
886 
887 	bp->b_bcount = read_size;
888 	bp->b_blkno = blkctr;
889 	CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
890 	SET(bp->b_flags, B_BUSY | B_READ | B_RAW);
891 	bp->b_dev = hib_info->device;
892 	bp->b_cylinder = 0;
893 	(*bdsw->d_strategy)(bp);
894 
895 	error = biowait(bp);
896 	if (error) {
897 		printf("hibernate_read_block biowait failed %d\n", error);
898 		error = (*bdsw->d_close)(hib_info->device, 0, S_IFCHR,
899 				curproc);
900 		if (error)
901 			printf("hibernate_read_block error close failed\n");
902 		return (1);
903 	}
904 
905 	error = (*bdsw->d_close)(hib_info->device, FREAD, S_IFCHR, curproc);
906 	if (error) {
907 		printf("hibernate_read_block close failed\n");
908 		return (1);
909 	}
910 
911 	bcopy(bp->b_data, (caddr_t)dest, read_size);
912 
913 	bp->b_flags |= B_INVAL;
914 	brelse(bp);
915 
916 	return (0);
917 }
918 
919