xref: /openbsd-src/sys/kern/subr_hibernate.c (revision 63e557a51174f866246fccfa4470735fd4ca80ae)
1 /*	$OpenBSD: subr_hibernate.c,v 1.15 2011/07/18 16:50:56 ariane Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/hibernate.h>
20 #include <sys/malloc.h>
21 #include <sys/param.h>
22 #include <sys/tree.h>
23 #include <sys/types.h>
24 #include <sys/systm.h>
25 #include <sys/disklabel.h>
26 #include <sys/conf.h>
27 #include <sys/buf.h>
28 #include <sys/fcntl.h>
29 #include <sys/stat.h>
30 #include <uvm/uvm.h>
31 #include <machine/hibernate.h>
32 
33 extern char *disk_readlabel(struct disklabel *, dev_t, char *, size_t);
34 
35 struct hibernate_state *hibernate_state;
36 
37 /*
38  * Hib alloc enforced alignment.
39  */
40 #define HIB_ALIGN		8 /* bytes alignment */
41 
42 /*
43  * sizeof builtin operation, but with alignment constraint.
44  */
45 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
46 
47 struct hiballoc_entry
48 {
49 	size_t			hibe_use;
50 	size_t			hibe_space;
51 	RB_ENTRY(hiballoc_entry) hibe_entry;
52 };
53 
54 /*
55  * Compare hiballoc entries based on the address they manage.
56  *
57  * Since the address is fixed, relative to struct hiballoc_entry,
58  * we just compare the hiballoc_entry pointers.
59  */
60 static __inline int
61 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r)
62 {
63 	return l < r ? -1 : (l > r);
64 }
65 
66 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
67 
68 /*
69  * Given a hiballoc entry, return the address it manages.
70  */
71 static __inline void*
72 hib_entry_to_addr(struct hiballoc_entry *entry)
73 {
74 	caddr_t addr;
75 
76 	addr = (caddr_t)entry;
77 	addr += HIB_SIZEOF(struct hiballoc_entry);
78 	return addr;
79 }
80 
81 /*
82  * Given an address, find the hiballoc that corresponds.
83  */
84 static __inline struct hiballoc_entry*
85 hib_addr_to_entry(void* addr_param)
86 {
87 	caddr_t addr;
88 
89 	addr = (caddr_t)addr_param;
90 	addr -= HIB_SIZEOF(struct hiballoc_entry);
91 	return (struct hiballoc_entry*)addr;
92 }
93 
94 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
95 
96 /*
97  * Allocate memory from the arena.
98  *
99  * Returns NULL if no memory is available.
100  */
101 void*
102 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
103 {
104 	struct hiballoc_entry *entry, *new_entry;
105 	size_t find_sz;
106 
107 	/*
108 	 * Enforce alignment of HIB_ALIGN bytes.
109 	 *
110 	 * Note that, because the entry is put in front of the allocation,
111 	 * 0-byte allocations are guaranteed a unique address.
112 	 */
113 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
114 
115 	/*
116 	 * Find an entry with hibe_space >= find_sz.
117 	 *
118 	 * If the root node is not large enough, we switch to tree traversal.
119 	 * Because all entries are made at the bottom of the free space,
120 	 * traversal from the end has a slightly better chance of yielding
121 	 * a sufficiently large space.
122 	 */
123 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
124 	entry = RB_ROOT(&arena->hib_addrs);
125 	if (entry != NULL && entry->hibe_space < find_sz) {
126 		RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
127 			if (entry->hibe_space >= find_sz)
128 				break;
129 		}
130 	}
131 
132 	/*
133 	 * Insufficient or too fragmented memory.
134 	 */
135 	if (entry == NULL)
136 		return NULL;
137 
138 	/*
139 	 * Create new entry in allocated space.
140 	 */
141 	new_entry = (struct hiballoc_entry*)(
142 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
143 	new_entry->hibe_space = entry->hibe_space - find_sz;
144 	new_entry->hibe_use = alloc_sz;
145 
146 	/*
147 	 * Insert entry.
148 	 */
149 	if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
150 		panic("hib_alloc: insert failure");
151 	entry->hibe_space = 0;
152 
153 	/* Return address managed by entry. */
154 	return hib_entry_to_addr(new_entry);
155 }
156 
157 /*
158  * Free a pointer previously allocated from this arena.
159  *
160  * If addr is NULL, this will be silently accepted.
161  */
162 void
163 hib_free(struct hiballoc_arena *arena, void *addr)
164 {
165 	struct hiballoc_entry *entry, *prev;
166 
167 	if (addr == NULL)
168 		return;
169 
170 	/*
171 	 * Derive entry from addr and check it is really in this arena.
172 	 */
173 	entry = hib_addr_to_entry(addr);
174 	if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
175 		panic("hib_free: freed item %p not in hib arena", addr);
176 
177 	/*
178 	 * Give the space in entry to its predecessor.
179 	 *
180 	 * If entry has no predecessor, change its used space into free space
181 	 * instead.
182 	 */
183 	prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry);
184 	if (prev != NULL &&
185 	    (void*)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
186 	    prev->hibe_use + prev->hibe_space) == entry) {
187 		/* Merge entry. */
188 		RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
189 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
190 		    entry->hibe_use + entry->hibe_space;
191 	} else {
192 	  	/* Flip used memory to free space. */
193 		entry->hibe_space += entry->hibe_use;
194 		entry->hibe_use = 0;
195 	}
196 }
197 
198 /*
199  * Initialize hiballoc.
200  *
201  * The allocator will manage memmory at ptr, which is len bytes.
202  */
203 int
204 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
205 {
206 	struct hiballoc_entry *entry;
207 	caddr_t ptr;
208 	size_t len;
209 
210 	RB_INIT(&arena->hib_addrs);
211 
212 	/*
213 	 * Hib allocator enforces HIB_ALIGN alignment.
214 	 * Fixup ptr and len.
215 	 */
216 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
217 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
218 	len &= ~((size_t)HIB_ALIGN - 1);
219 
220 	/*
221 	 * Insufficient memory to be able to allocate and also do bookkeeping.
222 	 */
223 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
224 		return ENOMEM;
225 
226 	/*
227 	 * Create entry describing space.
228 	 */
229 	entry = (struct hiballoc_entry*)ptr;
230 	entry->hibe_use = 0;
231 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
232 	RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
233 
234 	return 0;
235 }
236 
237 
238 /*
239  * Zero all free memory.
240  */
241 void
242 uvm_pmr_zero_everything(void)
243 {
244 	struct uvm_pmemrange	*pmr;
245 	struct vm_page		*pg;
246 	int			 i;
247 
248 	uvm_lock_fpageq();
249 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
250 		/* Zero single pages. */
251 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
252 		    != NULL) {
253 			uvm_pmr_remove(pmr, pg);
254 			uvm_pagezero(pg);
255 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
256 			uvmexp.zeropages++;
257 			uvm_pmr_insert(pmr, pg, 0);
258 		}
259 
260 		/* Zero multi page ranges. */
261 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY]))
262 		    != NULL) {
263 			pg--; /* Size tree always has second page. */
264 			uvm_pmr_remove(pmr, pg);
265 			for (i = 0; i < pg->fpgsz; i++) {
266 				uvm_pagezero(&pg[i]);
267 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
268 				uvmexp.zeropages++;
269 			}
270 			uvm_pmr_insert(pmr, pg, 0);
271 		}
272 	}
273 	uvm_unlock_fpageq();
274 }
275 
276 /*
277  * Mark all memory as dirty.
278  *
279  * Used to inform the system that the clean memory isn't clean for some
280  * reason, for example because we just came back from hibernate.
281  */
282 void
283 uvm_pmr_dirty_everything(void)
284 {
285 	struct uvm_pmemrange	*pmr;
286 	struct vm_page		*pg;
287 	int			 i;
288 
289 	uvm_lock_fpageq();
290 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
291 		/* Dirty single pages. */
292 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
293 		    != NULL) {
294 			uvm_pmr_remove(pmr, pg);
295 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
296 			uvm_pmr_insert(pmr, pg, 0);
297 		}
298 
299 		/* Dirty multi page ranges. */
300 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_ZERO]))
301 		    != NULL) {
302 			pg--; /* Size tree always has second page. */
303 			uvm_pmr_remove(pmr, pg);
304 			for (i = 0; i < pg->fpgsz; i++)
305 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
306 			uvm_pmr_insert(pmr, pg, 0);
307 		}
308 	}
309 
310 	uvmexp.zeropages = 0;
311 	uvm_unlock_fpageq();
312 }
313 
314 /*
315  * Allocate the highest address that can hold sz.
316  *
317  * sz in bytes.
318  */
319 int
320 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz)
321 {
322 	struct uvm_pmemrange	*pmr;
323 	struct vm_page		*pig_pg, *pg;
324 
325 	/*
326 	 * Convert sz to pages, since that is what pmemrange uses internally.
327 	 */
328 	sz = atop(round_page(sz));
329 
330 	uvm_lock_fpageq();
331 
332 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
333 		RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) {
334 			if (pig_pg->fpgsz >= sz) {
335 				goto found;
336 			}
337 		}
338 	}
339 
340 	/*
341 	 * Allocation failure.
342 	 */
343 	uvm_unlock_pageq();
344 	return ENOMEM;
345 
346 found:
347 	/* Remove page from freelist. */
348 	uvm_pmr_remove_size(pmr, pig_pg);
349 	pig_pg->fpgsz -= sz;
350 	pg = pig_pg + pig_pg->fpgsz;
351 	if (pig_pg->fpgsz == 0)
352 		uvm_pmr_remove_addr(pmr, pig_pg);
353 	else
354 		uvm_pmr_insert_size(pmr, pig_pg);
355 
356 	uvmexp.free -= sz;
357 	*addr = VM_PAGE_TO_PHYS(pg);
358 
359 	/*
360 	 * Update pg flags.
361 	 *
362 	 * Note that we trash the sz argument now.
363 	 */
364 	while (sz > 0) {
365 		KASSERT(pg->pg_flags & PQ_FREE);
366 
367 		atomic_clearbits_int(&pg->pg_flags,
368 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
369 
370 		if (pg->pg_flags & PG_ZERO)
371 			uvmexp.zeropages -= sz;
372 		atomic_clearbits_int(&pg->pg_flags,
373 		    PG_ZERO|PQ_FREE);
374 
375 		pg->uobject = NULL;
376 		pg->uanon = NULL;
377 		pg->pg_version++;
378 
379 		/*
380 		 * Next.
381 		 */
382 		pg++;
383 		sz--;
384 	}
385 
386 	/* Return. */
387 	uvm_unlock_fpageq();
388 	return 0;
389 }
390 
391 /*
392  * Allocate a piglet area.
393  *
394  * This is as low as possible.
395  * Piglets are aligned.
396  *
397  * sz and align in bytes.
398  *
399  * The call will sleep for the pagedaemon to attempt to free memory.
400  * The pagedaemon may decide its not possible to free enough memory, causing
401  * the allocation to fail.
402  */
403 int
404 uvm_pmr_alloc_piglet(paddr_t *addr, psize_t sz, paddr_t align)
405 {
406 	vaddr_t			 pg_addr, piglet_addr;
407 	struct uvm_pmemrange	*pmr;
408 	struct vm_page		*pig_pg, *pg;
409 	struct pglist		 pageq;
410 	int			 pdaemon_woken;
411 
412 	KASSERT((align & (align - 1)) == 0);
413 	pdaemon_woken = 0; /* Didn't wake the pagedaemon. */
414 
415 	/*
416 	 * Fixup arguments: align must be at least PAGE_SIZE,
417 	 * sz will be converted to pagecount, since that is what
418 	 * pmemrange uses internally.
419 	 */
420 	if (align < PAGE_SIZE)
421 		align = PAGE_SIZE;
422 	sz = atop(round_page(sz));
423 
424 	uvm_lock_fpageq();
425 
426 	TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use,
427 	    pmr_use) {
428 retry:
429 		/*
430 		 * Search for a range with enough space.
431 		 * Use the address tree, to ensure the range is as low as
432 		 * possible.
433 		 */
434 		RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) {
435 			pg_addr = VM_PAGE_TO_PHYS(pig_pg);
436 			piglet_addr = (pg_addr + (align - 1)) & ~(align - 1);
437 
438 			if (atop(pg_addr) + pig_pg->fpgsz >=
439 			    atop(piglet_addr) + sz) {
440 				goto found;
441 			}
442 		}
443 
444 		/*
445 		 * Try to coerse the pagedaemon into freeing memory
446 		 * for the piglet.
447 		 *
448 		 * pdaemon_woken is set to prevent the code from
449 		 * falling into an endless loop.
450 		 */
451 		if (!pdaemon_woken) {
452 			pdaemon_woken = 1;
453 			if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1,
454 			    ptoa(sz), UVM_PLA_FAILOK) == 0)
455 				goto retry;
456 		}
457 	}
458 
459 	/* Return failure. */
460 	uvm_unlock_fpageq();
461 	return ENOMEM;
462 
463 found:
464 	/*
465 	 * Extract piglet from pigpen.
466 	 */
467 	TAILQ_INIT(&pageq);
468 	uvm_pmr_extract_range(pmr, pig_pg,
469 	    atop(piglet_addr), atop(piglet_addr) + sz, &pageq);
470 
471 	*addr = piglet_addr;
472 	uvmexp.free -= sz;
473 
474 	/*
475 	 * Update pg flags.
476 	 *
477 	 * Note that we trash the sz argument now.
478 	 */
479 	TAILQ_FOREACH(pg, &pageq, pageq) {
480 		KASSERT(pg->pg_flags & PQ_FREE);
481 
482 		atomic_clearbits_int(&pg->pg_flags,
483 		    PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3);
484 
485 		if (pg->pg_flags & PG_ZERO)
486 			uvmexp.zeropages--;
487 		atomic_clearbits_int(&pg->pg_flags,
488 		    PG_ZERO|PQ_FREE);
489 
490 		pg->uobject = NULL;
491 		pg->uanon = NULL;
492 		pg->pg_version++;
493 	}
494 
495 	uvm_unlock_fpageq();
496 	return 0;
497 }
498 
499 /*
500  * Physmem RLE compression support.
501  *
502  * Given a physical page address, it will return the number of pages
503  * starting at the address, that are free.
504  * Returns 0 if the page at addr is not free.
505  */
506 psize_t
507 uvm_page_rle(paddr_t addr)
508 {
509 	struct vm_page		*pg, *pg_end;
510 	struct vm_physseg	*vmp;
511 	int			 pseg_idx, off_idx;
512 
513 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
514 	if (pseg_idx == -1)
515 		return 0;
516 
517 	vmp = &vm_physmem[pseg_idx];
518 	pg = &vmp->pgs[off_idx];
519 	if (!(pg->pg_flags & PQ_FREE))
520 		return 0;
521 
522 	/*
523 	 * Search for the first non-free page after pg.
524 	 * Note that the page may not be the first page in a free pmemrange,
525 	 * therefore pg->fpgsz cannot be used.
526 	 */
527 	for (pg_end = pg; pg_end <= vmp->lastpg &&
528 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++);
529 	return pg_end - pg;
530 }
531 
532 /*
533  * get_hibernate_info
534  *
535  * Fills out the hibernate_info union pointed to by hiber_info
536  * with information about this machine (swap signature block
537  * offsets, number of memory ranges, kernel in use, etc)
538  *
539  */
540 int
541 get_hibernate_info(union hibernate_info *hiber_info)
542 {
543 	int chunktable_size;
544 	struct disklabel dl;
545 	char err_string[128], *dl_ret;
546 
547 	/* Determine I/O function to use */
548 	hiber_info->io_func = get_hibernate_io_function();
549 	if (hiber_info->io_func == NULL)
550 		return (1);
551 
552 	/* Calculate hibernate device */
553 	hiber_info->device = swdevt[0].sw_dev;
554 
555 	/* Read disklabel (used to calculate signature and image offsets) */
556 	dl_ret = disk_readlabel(&dl, hiber_info->device, err_string, 128);
557 
558 	if (dl_ret) {
559 		printf("Hibernate error reading disklabel: %s\n", dl_ret);
560 		return (1);
561 	}
562 
563 	hiber_info->secsize = dl.d_secsize;
564 
565 	/* Make sure the signature can fit in one block */
566 	KASSERT(sizeof(union hibernate_info)/hiber_info->secsize == 1);
567 
568 	/* Calculate swap offset from start of disk */
569 	hiber_info->swap_offset = dl.d_partitions[1].p_offset;
570 
571 	/* Calculate signature block location */
572 	hiber_info->sig_offset = dl.d_partitions[1].p_offset +
573 		dl.d_partitions[1].p_size -
574 		sizeof(union hibernate_info)/hiber_info->secsize;
575 
576 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize;
577 
578 	/* Calculate memory image location */
579 	hiber_info->image_offset = dl.d_partitions[1].p_offset +
580 		dl.d_partitions[1].p_size -
581 		(hiber_info->image_size / hiber_info->secsize) -
582 		sizeof(union hibernate_info)/hiber_info->secsize -
583 		chunktable_size;
584 
585 	/* Stash kernel version information */
586 	bzero(&hiber_info->kernel_version, 128);
587 	bcopy(version, &hiber_info->kernel_version,
588 		min(strlen(version), sizeof(hiber_info->kernel_version)-1));
589 
590 	/* Allocate piglet region */
591 	if (uvm_pmr_alloc_piglet(&hiber_info->piglet_base, HIBERNATE_CHUNK_SIZE,
592 		HIBERNATE_CHUNK_SIZE)) {
593 		printf("Hibernate failed to allocate the piglet\n");
594 		return (1);
595 	}
596 
597 	return get_hibernate_info_md(hiber_info);
598 }
599 
600 /*
601  * hibernate_zlib_alloc
602  *
603  * Allocate nitems*size bytes from the hiballoc area presently in use
604  *
605  */
606 void
607 *hibernate_zlib_alloc(void *unused, int nitems, int size)
608 {
609 	return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size);
610 }
611 
612 /*
613  * hibernate_zlib_free
614  *
615  * Free the memory pointed to by addr in the hiballoc area presently in
616  * use
617  *
618  */
619 void
620 hibernate_zlib_free(void *unused, void *addr)
621 {
622 	hib_free(&hibernate_state->hiballoc_arena, addr);
623 }
624 
625 /*
626  * hibernate_inflate
627  *
628  * Inflate size bytes from src into dest, skipping any pages in
629  * [src..dest] that are special (see hibernate_inflate_skip)
630  *
631  * For each page of output data, we map HIBERNATE_TEMP_PAGE
632  * to the current output page, and tell inflate() to inflate
633  * its data there, resulting in the inflated data being placed
634  * at the proper paddr.
635  *
636  * This function executes while using the resume-time stack
637  * and pmap, and therefore cannot use ddb/printf/etc. Doing so
638  * will likely hang or reset the machine.
639  *
640  */
641 void
642 hibernate_inflate(paddr_t dest, paddr_t src, size_t size)
643 {
644 	int i;
645 
646 	hibernate_state->hib_stream.avail_in = size;
647 	hibernate_state->hib_stream.next_in = (char *)src;
648 
649 	do {
650 		/* Flush cache and TLB */
651 		hibernate_flush();
652 
653 		/*
654 		 * Is this a special page? If yes, redirect the
655 		 * inflate output to a scratch page (eg, discard it)
656 		 */
657 		if (hibernate_inflate_skip(dest))
658 			hibernate_enter_resume_mapping(HIBERNATE_TEMP_PAGE,
659 				HIBERNATE_TEMP_PAGE, 0);
660 		else
661 			hibernate_enter_resume_mapping(HIBERNATE_TEMP_PAGE,
662 				dest, 0);
663 
664 		/* Set up the stream for inflate */
665 		hibernate_state->hib_stream.avail_out = PAGE_SIZE;
666 		hibernate_state->hib_stream.next_out =
667 			(char *)HIBERNATE_TEMP_PAGE;
668 
669 		/* Process next block of data */
670 		i = inflate(&hibernate_state->hib_stream, Z_PARTIAL_FLUSH);
671 		if (i != Z_OK && i != Z_STREAM_END) {
672 			/*
673 			 * XXX - this will likely reboot/hang most machines,
674 			 *       but there's not much else we can do here.
675 			 */
676 			panic("inflate error");
677 		}
678 
679 		dest += PAGE_SIZE - hibernate_state->hib_stream.avail_out;
680 	} while (i != Z_STREAM_END);
681 }
682 
683 /*
684  * hibernate_deflate
685  *
686  * deflate from src into the I/O page, up to 'remaining' bytes
687  *
688  * Returns number of input bytes consumed, and may reset
689  * the 'remaining' parameter if not all the output space was consumed
690  * (this information is needed to know how much to write to disk
691  *
692  */
693 size_t
694 hibernate_deflate(paddr_t src, size_t *remaining)
695 {
696 	/* Set up the stream for deflate */
697 	hibernate_state->hib_stream.avail_in = PAGE_SIZE -
698 		(src & PAGE_MASK);
699 	hibernate_state->hib_stream.avail_out = *remaining;
700 	hibernate_state->hib_stream.next_in = (caddr_t)src;
701 	hibernate_state->hib_stream.next_out = (caddr_t)HIBERNATE_IO_PAGE +
702 		(PAGE_SIZE - *remaining);
703 
704 	/* Process next block of data */
705 	if (deflate(&hibernate_state->hib_stream, Z_PARTIAL_FLUSH) != Z_OK)
706 		panic("hibernate zlib deflate error\n");
707 
708 	/* Update pointers and return number of bytes consumed */
709 	*remaining = hibernate_state->hib_stream.avail_out;
710 	return (PAGE_SIZE - (src & PAGE_MASK)) -
711 		hibernate_state->hib_stream.avail_in;
712 }
713 
714 /*
715  * hibernate_write_signature
716  *
717  * Write the hibernation information specified in hiber_info
718  * to the location in swap previously calculated (last block of
719  * swap), called the "signature block".
720  *
721  * Write the memory chunk table to the area in swap immediately
722  * preceding the signature block.
723  */
724 int
725 hibernate_write_signature(union hibernate_info *hiber_info)
726 {
727 	u_int8_t *io_page;
728 	daddr_t chunkbase;
729 	size_t i;
730 
731 	io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
732 	if (!io_page)
733 		return (1);
734 
735 	/* Write hibernate info to disk */
736 	if( hiber_info->io_func(hiber_info->device, hiber_info->sig_offset,
737 		(vaddr_t)hiber_info, hiber_info->secsize, 1, io_page))
738 			panic("error in hibernate write sig\n");
739 
740 	chunkbase = hiber_info->sig_offset -
741 		    (HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize);
742 
743 	/* Write chunk table */
744 	for(i=0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += NBPG) {
745 		if(hiber_info->io_func(hiber_info->device,
746 			chunkbase + (i/hiber_info->secsize),
747 			(vaddr_t)(HIBERNATE_CHUNK_TABLE_START + i),
748 			NBPG,
749 			1,
750 			io_page))
751 				panic("error in hibernate write chunks\n");
752 	}
753 
754 	free(io_page, M_DEVBUF);
755 
756 	return (0);
757 }
758 
759 /*
760  * hibernate_clear_signature
761  *
762  * Write an empty hiber_info to the swap signature block, which is
763  * guaranteed to not match any valid hiber_info.
764  */
765 int
766 hibernate_clear_signature()
767 {
768 	union hibernate_info blank_hiber_info;
769 	union hibernate_info hiber_info;
770 	u_int8_t *io_page;
771 
772 	/* Zero out a blank hiber_info */
773 	bzero(&blank_hiber_info, sizeof(hiber_info));
774 
775 	if (get_hibernate_info(&hiber_info))
776 		return (1);
777 
778 	io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
779 	if (!io_page)
780 		return (1);
781 
782 	/* Write (zeroed) hibernate info to disk */
783 	if(hiber_info.io_func(hiber_info.device, hiber_info.sig_offset,
784 		(vaddr_t)&blank_hiber_info, hiber_info.secsize, 1, io_page))
785 			panic("error hibernate write 6\n");
786 
787 	free(io_page, M_DEVBUF);
788 
789 	return (0);
790 }
791 
792 /*
793  * hibernate_check_overlap
794  *
795  * Check chunk range overlap when calculating whether or not to copy a
796  * compressed chunk to the piglet area before decompressing.
797  *
798  * returns zero if the ranges do not overlap, non-zero otherwise.
799  */
800 int
801 hibernate_check_overlap(paddr_t r1s, paddr_t r1e, paddr_t r2s, paddr_t r2e)
802 {
803 	/* case A : end of r1 overlaps start of r2 */
804 	if (r1s < r2s && r1e > r2s)
805 		return (1);
806 
807 	/* case B : r1 entirely inside r2 */
808 	if (r1s >= r2s && r1e <= r2e)
809 		return (1);
810 
811 	/* case C : r2 entirely inside r1 */
812 	if (r2s >= r1s && r2e <= r1e)
813 		return (1);
814 
815 	/* case D : end of r2 overlaps start of r1 */
816 	if (r2s < r1s && r2e > r1s)
817 		return (1);
818 
819 	return (0);
820 }
821 
822 /*
823  * hibernate_compare_signature
824  *
825  * Compare two hibernate_infos to determine if they are the same (eg,
826  * we should be performing a hibernate resume on this machine.
827  * Not all fields are checked - just enough to verify that the machine
828  * has the same memory configuration and kernel as the one that
829  * wrote the signature previously.
830  */
831 int
832 hibernate_compare_signature(union hibernate_info *mine,
833 	union hibernate_info *disk)
834 {
835 	u_int i;
836 
837 	if (mine->nranges != disk->nranges)
838 		return (1);
839 
840 	if (strcmp(mine->kernel_version, disk->kernel_version) != 0)
841 		return (1);
842 
843 	for (i=0; i< mine->nranges; i++) {
844 		if ((mine->ranges[i].base != disk->ranges[i].base) ||
845 			(mine->ranges[i].end != disk->ranges[i].end) )
846 		return (1);
847 	}
848 
849 	return (0);
850 }
851 
852 /*
853  * hibernate_read_block
854  *
855  * Reads read_size blocks from the hibernate device specified in
856  * hib_info at offset blkctr. Output is placed into the vaddr specified
857  * at dest.
858  *
859  * Separate offsets and pages are used to handle misaligned reads (reads
860  * that span a page boundary).
861  *
862  * blkctr specifies a relative offset (relative to the start of swap),
863  * not an absolute disk offset
864  *
865  */
866 int
867 hibernate_read_block(union hibernate_info *hib_info, daddr_t blkctr,
868 	size_t read_size, vaddr_t dest)
869 {
870 	struct buf *bp;
871 	struct bdevsw *bdsw;
872 	int error;
873 
874 	bp = geteblk(read_size);
875 	bdsw = &bdevsw[major(hib_info->device)];
876 
877 	error = (*bdsw->d_open)(hib_info->device, FREAD, S_IFCHR, curproc);
878 	if (error) {
879 		printf("hibernate_read_block open failed\n");
880 		return (1);
881 	}
882 
883 	bp->b_bcount = read_size;
884 	bp->b_blkno = blkctr;
885 	CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
886 	SET(bp->b_flags, B_BUSY | B_READ | B_RAW);
887 	bp->b_dev = hib_info->device;
888 	bp->b_cylinder = 0;
889 	(*bdsw->d_strategy)(bp);
890 
891 	error = biowait(bp);
892 	if (error) {
893 		printf("hibernate_read_block biowait failed %d\n", error);
894 		error = (*bdsw->d_close)(hib_info->device, 0, S_IFCHR,
895 				curproc);
896 		if (error)
897 			printf("hibernate_read_block error close failed\n");
898 		return (1);
899 	}
900 
901 	error = (*bdsw->d_close)(hib_info->device, FREAD, S_IFCHR, curproc);
902 	if (error) {
903 		printf("hibernate_read_block close failed\n");
904 		return (1);
905 	}
906 
907 	bcopy(bp->b_data, (caddr_t)dest, read_size);
908 
909 	bp->b_flags |= B_INVAL;
910 	brelse(bp);
911 
912 	return (0);
913 }
914 
915