xref: /openbsd-src/sys/kern/subr_hibernate.c (revision 0817055175eef1880cbcc0b168296b8e5da6500b)
1 /*	$OpenBSD: subr_hibernate.c,v 1.94 2014/07/09 14:10:25 mlarkin Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/hibernate.h>
21 #include <sys/malloc.h>
22 #include <sys/param.h>
23 #include <sys/tree.h>
24 #include <sys/systm.h>
25 #include <sys/disklabel.h>
26 #include <sys/disk.h>
27 #include <sys/conf.h>
28 #include <sys/buf.h>
29 #include <sys/fcntl.h>
30 #include <sys/stat.h>
31 #include <uvm/uvm.h>
32 #include <uvm/uvm_swap.h>
33 #include <machine/hibernate.h>
34 
35 /*
36  * Hibernate piglet layout information
37  *
38  * The piglet is a scratch area of memory allocated by the suspending kernel.
39  * Its phys and virt addrs are recorded in the signature block. The piglet is
40  * used to guarantee an unused area of memory that can be used by the resuming
41  * kernel for various things. The piglet is excluded during unpack operations.
42  * The piglet size is presently 3*HIBERNATE_CHUNK_SIZE (typically 3*4MB).
43  *
44  * Offset from piglet_base	Purpose
45  * ----------------------------------------------------------------------------
46  * 0				I/O page used during resume
47  * 1*PAGE_SIZE		 	I/O page used during hibernate suspend
48  * 2*PAGE_SIZE		 	I/O page used during hibernate suspend
49  * 3*PAGE_SIZE			copy page used during hibernate suspend
50  * 4*PAGE_SIZE			final chunk ordering list (8 pages)
51  * 12*PAGE_SIZE			piglet chunk ordering list (8 pages)
52  * 20*PAGE_SIZE			temp chunk ordering list (8 pages)
53  * 28*PAGE_SIZE			start of hiballoc area
54  * 108*PAGE_SIZE		end of hiballoc area (80 pages)
55  * ...				unused
56  * HIBERNATE_CHUNK_SIZE		start of hibernate chunk table
57  * 2*HIBERNATE_CHUNK_SIZE	bounce area for chunks being unpacked
58  * 3*HIBERNATE_CHUNK_SIZE	end of piglet
59  */
60 
61 /* Temporary vaddr ranges used during hibernate */
62 vaddr_t hibernate_temp_page;
63 vaddr_t hibernate_copy_page;
64 
65 /* Hibernate info as read from disk during resume */
66 union hibernate_info disk_hib;
67 paddr_t global_pig_start;
68 vaddr_t global_piglet_va;
69 
70 /* #define HIB_DEBUG */
71 #ifdef HIB_DEBUG
72 int	hib_debug = 99;
73 #define DPRINTF(x...)     do { if (hib_debug) printf(x); } while (0)
74 #define DNPRINTF(n,x...)  do { if (hib_debug > (n)) printf(x); } while (0)
75 #else
76 #define DPRINTF(x...)
77 #define DNPRINTF(n,x...)
78 #endif
79 
80 #ifndef NO_PROPOLICE
81 extern long __guard_local;
82 #endif /* ! NO_PROPOLICE */
83 
84 void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t);
85 
86 /*
87  * Hib alloc enforced alignment.
88  */
89 #define HIB_ALIGN		8 /* bytes alignment */
90 
91 /*
92  * sizeof builtin operation, but with alignment constraint.
93  */
94 #define HIB_SIZEOF(_type)	roundup(sizeof(_type), HIB_ALIGN)
95 
96 struct hiballoc_entry {
97 	size_t			hibe_use;
98 	size_t			hibe_space;
99 	RB_ENTRY(hiballoc_entry) hibe_entry;
100 };
101 
102 /*
103  * Sort hibernate memory ranges by ascending PA
104  */
105 void
106 hibernate_sort_ranges(union hibernate_info *hib_info)
107 {
108 	int i, j;
109 	struct hibernate_memory_range *ranges;
110 	paddr_t base, end;
111 
112 	ranges = hib_info->ranges;
113 
114 	for (i = 1; i < hib_info->nranges; i++) {
115 		j = i;
116 		while (j > 0 && ranges[j - 1].base > ranges[j].base) {
117 			base = ranges[j].base;
118 			end = ranges[j].end;
119 			ranges[j].base = ranges[j - 1].base;
120 			ranges[j].end = ranges[j - 1].end;
121 			ranges[j - 1].base = base;
122 			ranges[j - 1].end = end;
123 			j--;
124 		}
125 	}
126 }
127 
128 /*
129  * Compare hiballoc entries based on the address they manage.
130  *
131  * Since the address is fixed, relative to struct hiballoc_entry,
132  * we just compare the hiballoc_entry pointers.
133  */
134 static __inline int
135 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r)
136 {
137 	return l < r ? -1 : (l > r);
138 }
139 
140 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
141 
142 /*
143  * Given a hiballoc entry, return the address it manages.
144  */
145 static __inline void *
146 hib_entry_to_addr(struct hiballoc_entry *entry)
147 {
148 	caddr_t addr;
149 
150 	addr = (caddr_t)entry;
151 	addr += HIB_SIZEOF(struct hiballoc_entry);
152 	return addr;
153 }
154 
155 /*
156  * Given an address, find the hiballoc that corresponds.
157  */
158 static __inline struct hiballoc_entry*
159 hib_addr_to_entry(void *addr_param)
160 {
161 	caddr_t addr;
162 
163 	addr = (caddr_t)addr_param;
164 	addr -= HIB_SIZEOF(struct hiballoc_entry);
165 	return (struct hiballoc_entry*)addr;
166 }
167 
168 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp)
169 
170 /*
171  * Allocate memory from the arena.
172  *
173  * Returns NULL if no memory is available.
174  */
175 void *
176 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz)
177 {
178 	struct hiballoc_entry *entry, *new_entry;
179 	size_t find_sz;
180 
181 	/*
182 	 * Enforce alignment of HIB_ALIGN bytes.
183 	 *
184 	 * Note that, because the entry is put in front of the allocation,
185 	 * 0-byte allocations are guaranteed a unique address.
186 	 */
187 	alloc_sz = roundup(alloc_sz, HIB_ALIGN);
188 
189 	/*
190 	 * Find an entry with hibe_space >= find_sz.
191 	 *
192 	 * If the root node is not large enough, we switch to tree traversal.
193 	 * Because all entries are made at the bottom of the free space,
194 	 * traversal from the end has a slightly better chance of yielding
195 	 * a sufficiently large space.
196 	 */
197 	find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry);
198 	entry = RB_ROOT(&arena->hib_addrs);
199 	if (entry != NULL && entry->hibe_space < find_sz) {
200 		RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) {
201 			if (entry->hibe_space >= find_sz)
202 				break;
203 		}
204 	}
205 
206 	/*
207 	 * Insufficient or too fragmented memory.
208 	 */
209 	if (entry == NULL)
210 		return NULL;
211 
212 	/*
213 	 * Create new entry in allocated space.
214 	 */
215 	new_entry = (struct hiballoc_entry*)(
216 	    (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use);
217 	new_entry->hibe_space = entry->hibe_space - find_sz;
218 	new_entry->hibe_use = alloc_sz;
219 
220 	/*
221 	 * Insert entry.
222 	 */
223 	if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL)
224 		panic("hib_alloc: insert failure");
225 	entry->hibe_space = 0;
226 
227 	/* Return address managed by entry. */
228 	return hib_entry_to_addr(new_entry);
229 }
230 
231 /*
232  * Free a pointer previously allocated from this arena.
233  *
234  * If addr is NULL, this will be silently accepted.
235  */
236 void
237 hib_free(struct hiballoc_arena *arena, void *addr)
238 {
239 	struct hiballoc_entry *entry, *prev;
240 
241 	if (addr == NULL)
242 		return;
243 
244 	/*
245 	 * Derive entry from addr and check it is really in this arena.
246 	 */
247 	entry = hib_addr_to_entry(addr);
248 	if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry)
249 		panic("hib_free: freed item %p not in hib arena", addr);
250 
251 	/*
252 	 * Give the space in entry to its predecessor.
253 	 *
254 	 * If entry has no predecessor, change its used space into free space
255 	 * instead.
256 	 */
257 	prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry);
258 	if (prev != NULL &&
259 	    (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) +
260 	    prev->hibe_use + prev->hibe_space) == entry) {
261 		/* Merge entry. */
262 		RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry);
263 		prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) +
264 		    entry->hibe_use + entry->hibe_space;
265 	} else {
266 		/* Flip used memory to free space. */
267 		entry->hibe_space += entry->hibe_use;
268 		entry->hibe_use = 0;
269 	}
270 }
271 
272 /*
273  * Initialize hiballoc.
274  *
275  * The allocator will manage memmory at ptr, which is len bytes.
276  */
277 int
278 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len)
279 {
280 	struct hiballoc_entry *entry;
281 	caddr_t ptr;
282 	size_t len;
283 
284 	RB_INIT(&arena->hib_addrs);
285 
286 	/*
287 	 * Hib allocator enforces HIB_ALIGN alignment.
288 	 * Fixup ptr and len.
289 	 */
290 	ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN);
291 	len = p_len - ((size_t)ptr - (size_t)p_ptr);
292 	len &= ~((size_t)HIB_ALIGN - 1);
293 
294 	/*
295 	 * Insufficient memory to be able to allocate and also do bookkeeping.
296 	 */
297 	if (len <= HIB_SIZEOF(struct hiballoc_entry))
298 		return ENOMEM;
299 
300 	/*
301 	 * Create entry describing space.
302 	 */
303 	entry = (struct hiballoc_entry*)ptr;
304 	entry->hibe_use = 0;
305 	entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry);
306 	RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry);
307 
308 	return 0;
309 }
310 
311 /*
312  * Zero all free memory.
313  */
314 void
315 uvm_pmr_zero_everything(void)
316 {
317 	struct uvm_pmemrange	*pmr;
318 	struct vm_page		*pg;
319 	int			 i;
320 
321 	uvm_lock_fpageq();
322 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
323 		/* Zero single pages. */
324 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY]))
325 		    != NULL) {
326 			uvm_pmr_remove(pmr, pg);
327 			uvm_pagezero(pg);
328 			atomic_setbits_int(&pg->pg_flags, PG_ZERO);
329 			uvmexp.zeropages++;
330 			uvm_pmr_insert(pmr, pg, 0);
331 		}
332 
333 		/* Zero multi page ranges. */
334 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY]))
335 		    != NULL) {
336 			pg--; /* Size tree always has second page. */
337 			uvm_pmr_remove(pmr, pg);
338 			for (i = 0; i < pg->fpgsz; i++) {
339 				uvm_pagezero(&pg[i]);
340 				atomic_setbits_int(&pg[i].pg_flags, PG_ZERO);
341 				uvmexp.zeropages++;
342 			}
343 			uvm_pmr_insert(pmr, pg, 0);
344 		}
345 	}
346 	uvm_unlock_fpageq();
347 }
348 
349 /*
350  * Mark all memory as dirty.
351  *
352  * Used to inform the system that the clean memory isn't clean for some
353  * reason, for example because we just came back from hibernate.
354  */
355 void
356 uvm_pmr_dirty_everything(void)
357 {
358 	struct uvm_pmemrange	*pmr;
359 	struct vm_page		*pg;
360 	int			 i;
361 
362 	uvm_lock_fpageq();
363 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
364 		/* Dirty single pages. */
365 		while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO]))
366 		    != NULL) {
367 			uvm_pmr_remove(pmr, pg);
368 			atomic_clearbits_int(&pg->pg_flags, PG_ZERO);
369 			uvm_pmr_insert(pmr, pg, 0);
370 		}
371 
372 		/* Dirty multi page ranges. */
373 		while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_ZERO]))
374 		    != NULL) {
375 			pg--; /* Size tree always has second page. */
376 			uvm_pmr_remove(pmr, pg);
377 			for (i = 0; i < pg->fpgsz; i++)
378 				atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO);
379 			uvm_pmr_insert(pmr, pg, 0);
380 		}
381 	}
382 
383 	uvmexp.zeropages = 0;
384 	uvm_unlock_fpageq();
385 }
386 
387 /*
388  * Allocate the highest address that can hold sz.
389  *
390  * sz in bytes.
391  */
392 int
393 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz)
394 {
395 	struct uvm_pmemrange	*pmr;
396 	struct vm_page		*pig_pg, *pg;
397 
398 	/*
399 	 * Convert sz to pages, since that is what pmemrange uses internally.
400 	 */
401 	sz = atop(round_page(sz));
402 
403 	uvm_lock_fpageq();
404 
405 	TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) {
406 		RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) {
407 			if (pig_pg->fpgsz >= sz) {
408 				goto found;
409 			}
410 		}
411 	}
412 
413 	/*
414 	 * Allocation failure.
415 	 */
416 	uvm_unlock_fpageq();
417 	return ENOMEM;
418 
419 found:
420 	/* Remove page from freelist. */
421 	uvm_pmr_remove_size(pmr, pig_pg);
422 	pig_pg->fpgsz -= sz;
423 	pg = pig_pg + pig_pg->fpgsz;
424 	if (pig_pg->fpgsz == 0)
425 		uvm_pmr_remove_addr(pmr, pig_pg);
426 	else
427 		uvm_pmr_insert_size(pmr, pig_pg);
428 
429 	uvmexp.free -= sz;
430 	*addr = VM_PAGE_TO_PHYS(pg);
431 
432 	/*
433 	 * Update pg flags.
434 	 *
435 	 * Note that we trash the sz argument now.
436 	 */
437 	while (sz > 0) {
438 		KASSERT(pg->pg_flags & PQ_FREE);
439 
440 		atomic_clearbits_int(&pg->pg_flags, PG_PMAPMASK);
441 
442 		if (pg->pg_flags & PG_ZERO)
443 			uvmexp.zeropages -= sz;
444 		atomic_clearbits_int(&pg->pg_flags,
445 		    PG_ZERO|PQ_FREE);
446 
447 		pg->uobject = NULL;
448 		pg->uanon = NULL;
449 		pg->pg_version++;
450 
451 		/*
452 		 * Next.
453 		 */
454 		pg++;
455 		sz--;
456 	}
457 
458 	/* Return. */
459 	uvm_unlock_fpageq();
460 	return 0;
461 }
462 
463 /*
464  * Allocate a piglet area.
465  *
466  * This is as low as possible.
467  * Piglets are aligned.
468  *
469  * sz and align in bytes.
470  *
471  * The call will sleep for the pagedaemon to attempt to free memory.
472  * The pagedaemon may decide its not possible to free enough memory, causing
473  * the allocation to fail.
474  */
475 int
476 uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align)
477 {
478 	paddr_t			 pg_addr, piglet_addr;
479 	struct uvm_pmemrange	*pmr;
480 	struct vm_page		*pig_pg, *pg;
481 	struct pglist		 pageq;
482 	int			 pdaemon_woken;
483 	vaddr_t			 piglet_va;
484 
485 	/* Ensure align is a power of 2 */
486 	KASSERT((align & (align - 1)) == 0);
487 
488 	pdaemon_woken = 0; /* Didn't wake the pagedaemon. */
489 
490 	/*
491 	 * Fixup arguments: align must be at least PAGE_SIZE,
492 	 * sz will be converted to pagecount, since that is what
493 	 * pmemrange uses internally.
494 	 */
495 	if (align < PAGE_SIZE)
496 		align = PAGE_SIZE;
497 	sz = round_page(sz);
498 
499 	uvm_lock_fpageq();
500 
501 	TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use,
502 	    pmr_use) {
503 retry:
504 		/*
505 		 * Search for a range with enough space.
506 		 * Use the address tree, to ensure the range is as low as
507 		 * possible.
508 		 */
509 		RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) {
510 			pg_addr = VM_PAGE_TO_PHYS(pig_pg);
511 			piglet_addr = (pg_addr + (align - 1)) & ~(align - 1);
512 
513 			if (atop(pg_addr) + pig_pg->fpgsz >=
514 			    atop(piglet_addr) + atop(sz))
515 				goto found;
516 		}
517 	}
518 
519 	/*
520 	 * Try to coerce the pagedaemon into freeing memory
521 	 * for the piglet.
522 	 *
523 	 * pdaemon_woken is set to prevent the code from
524 	 * falling into an endless loop.
525 	 */
526 	if (!pdaemon_woken) {
527 		pdaemon_woken = 1;
528 		if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1,
529 		    sz, UVM_PLA_FAILOK) == 0)
530 			goto retry;
531 	}
532 
533 	/* Return failure. */
534 	uvm_unlock_fpageq();
535 	return ENOMEM;
536 
537 found:
538 	/*
539 	 * Extract piglet from pigpen.
540 	 */
541 	TAILQ_INIT(&pageq);
542 	uvm_pmr_extract_range(pmr, pig_pg,
543 	    atop(piglet_addr), atop(piglet_addr) + atop(sz), &pageq);
544 
545 	*pa = piglet_addr;
546 	uvmexp.free -= atop(sz);
547 
548 	/*
549 	 * Update pg flags.
550 	 *
551 	 * Note that we trash the sz argument now.
552 	 */
553 	TAILQ_FOREACH(pg, &pageq, pageq) {
554 		KASSERT(pg->pg_flags & PQ_FREE);
555 
556 		atomic_clearbits_int(&pg->pg_flags, PG_PMAPMASK);
557 
558 		if (pg->pg_flags & PG_ZERO)
559 			uvmexp.zeropages--;
560 		atomic_clearbits_int(&pg->pg_flags,
561 		    PG_ZERO|PQ_FREE);
562 
563 		pg->uobject = NULL;
564 		pg->uanon = NULL;
565 		pg->pg_version++;
566 	}
567 
568 	uvm_unlock_fpageq();
569 
570 	/*
571 	 * Now allocate a va.
572 	 * Use direct mappings for the pages.
573 	 */
574 
575 	piglet_va = *va = (vaddr_t)km_alloc(sz, &kv_any, &kp_none, &kd_waitok);
576 	if (!piglet_va) {
577 		uvm_pglistfree(&pageq);
578 		return ENOMEM;
579 	}
580 
581 	/*
582 	 * Map piglet to va.
583 	 */
584 	TAILQ_FOREACH(pg, &pageq, pageq) {
585 		pmap_kenter_pa(piglet_va, VM_PAGE_TO_PHYS(pg), UVM_PROT_RW);
586 		piglet_va += PAGE_SIZE;
587 	}
588 	pmap_update(pmap_kernel());
589 
590 	return 0;
591 }
592 
593 /*
594  * Free a piglet area.
595  */
596 void
597 uvm_pmr_free_piglet(vaddr_t va, vsize_t sz)
598 {
599 	paddr_t			 pa;
600 	struct vm_page		*pg;
601 
602 	/*
603 	 * Fix parameters.
604 	 */
605 	sz = round_page(sz);
606 
607 	/*
608 	 * Find the first page in piglet.
609 	 * Since piglets are contiguous, the first pg is all we need.
610 	 */
611 	if (!pmap_extract(pmap_kernel(), va, &pa))
612 		panic("uvm_pmr_free_piglet: piglet 0x%lx has no pages", va);
613 	pg = PHYS_TO_VM_PAGE(pa);
614 	if (pg == NULL)
615 		panic("uvm_pmr_free_piglet: unmanaged page 0x%lx", pa);
616 
617 	/*
618 	 * Unmap.
619 	 */
620 	pmap_kremove(va, sz);
621 	pmap_update(pmap_kernel());
622 
623 	/*
624 	 * Free the physical and virtual memory.
625 	 */
626 	uvm_pmr_freepages(pg, atop(sz));
627 	km_free((void *)va, sz, &kv_any, &kp_none);
628 }
629 
630 /*
631  * Physmem RLE compression support.
632  *
633  * Given a physical page address, return the number of pages starting at the
634  * address that are free.  Clamps to the number of pages in
635  * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free.
636  */
637 int
638 uvm_page_rle(paddr_t addr)
639 {
640 	struct vm_page		*pg, *pg_end;
641 	struct vm_physseg	*vmp;
642 	int			 pseg_idx, off_idx;
643 
644 	pseg_idx = vm_physseg_find(atop(addr), &off_idx);
645 	if (pseg_idx == -1)
646 		return 0;
647 
648 	vmp = &vm_physmem[pseg_idx];
649 	pg = &vmp->pgs[off_idx];
650 	if (!(pg->pg_flags & PQ_FREE))
651 		return 0;
652 
653 	/*
654 	 * Search for the first non-free page after pg.
655 	 * Note that the page may not be the first page in a free pmemrange,
656 	 * therefore pg->fpgsz cannot be used.
657 	 */
658 	for (pg_end = pg; pg_end <= vmp->lastpg &&
659 	    (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++)
660 		;
661 	return min((pg_end - pg), HIBERNATE_CHUNK_SIZE/PAGE_SIZE);
662 }
663 
664 /*
665  * Fills out the hibernate_info union pointed to by hiber_info
666  * with information about this machine (swap signature block
667  * offsets, number of memory ranges, kernel in use, etc)
668  */
669 int
670 get_hibernate_info(union hibernate_info *hib, int suspend)
671 {
672 	int chunktable_size;
673 	struct disklabel dl;
674 	char err_string[128], *dl_ret;
675 
676 #ifndef NO_PROPOLICE
677 	/* Save propolice guard */
678 	hib->guard = __guard_local;
679 #endif /* ! NO_PROPOLICE */
680 
681 	/* Determine I/O function to use */
682 	hib->io_func = get_hibernate_io_function();
683 	if (hib->io_func == NULL)
684 		return (1);
685 
686 	/* Calculate hibernate device */
687 	hib->dev = swdevt[0].sw_dev;
688 
689 	/* Read disklabel (used to calculate signature and image offsets) */
690 	dl_ret = disk_readlabel(&dl, hib->dev, err_string, 128);
691 
692 	if (dl_ret) {
693 		printf("Hibernate error reading disklabel: %s\n", dl_ret);
694 		return (1);
695 	}
696 
697 	/* Make sure we have a swap partition. */
698 	if (dl.d_partitions[1].p_fstype != FS_SWAP ||
699 	    DL_GETPSIZE(&dl.d_partitions[1]) == 0)
700 		return (1);
701 
702 	/* Make sure the signature can fit in one block */
703 	if (sizeof(union hibernate_info) > DEV_BSIZE)
704 		return (1);
705 
706 	/* Magic number */
707 	hib->magic = HIBERNATE_MAGIC;
708 
709 	/* Calculate signature block location */
710 	hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[1]) -
711 	    sizeof(union hibernate_info)/DEV_BSIZE;
712 
713 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / DEV_BSIZE;
714 
715 	/* Stash kernel version information */
716 	memset(&hib->kernel_version, 0, 128);
717 	bcopy(version, &hib->kernel_version,
718 	    min(strlen(version), sizeof(hib->kernel_version)-1));
719 
720 	if (suspend) {
721 		/* Allocate piglet region */
722 		if (uvm_pmr_alloc_piglet(&hib->piglet_va,
723 		    &hib->piglet_pa, HIBERNATE_CHUNK_SIZE*3,
724 		    HIBERNATE_CHUNK_SIZE)) {
725 			printf("Hibernate failed to allocate the piglet\n");
726 			return (1);
727 		}
728 		hib->io_page = (void *)hib->piglet_va;
729 
730 		/*
731 		 * Initialization of the hibernate IO function for drivers
732 		 * that need to do prep work (such as allocating memory or
733 		 * setting up data structures that cannot safely be done
734 		 * during suspend without causing side effects). There is
735 		 * a matching HIB_DONE call performed after the write is
736 		 * completed.
737 		 */
738 		if (hib->io_func(hib->dev, DL_GETPOFFSET(&dl.d_partitions[1]),
739 		    (vaddr_t)NULL, DL_GETPSIZE(&dl.d_partitions[1]),
740 		    HIB_INIT, hib->io_page))
741 			goto fail;
742 
743 	} else {
744 		/*
745 		 * Resuming kernels use a regular I/O page since we won't
746 		 * have access to the suspended kernel's piglet VA at this
747 		 * point. No need to free this I/O page as it will vanish
748 		 * as part of the resume.
749 		 */
750 		hib->io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
751 		if (!hib->io_page)
752 			return (1);
753 	}
754 
755 
756 	if (get_hibernate_info_md(hib))
757 		goto fail;
758 
759 
760 	return (0);
761 fail:
762 	if (suspend)
763 		uvm_pmr_free_piglet(hib->piglet_va,
764 		    HIBERNATE_CHUNK_SIZE * 3);
765 
766 	return (1);
767 }
768 
769 /*
770  * Allocate nitems*size bytes from the hiballoc area presently in use
771  */
772 void *
773 hibernate_zlib_alloc(void *unused, int nitems, int size)
774 {
775 	struct hibernate_zlib_state *hibernate_state;
776 
777 	hibernate_state =
778 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
779 
780 	return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size);
781 }
782 
783 /*
784  * Free the memory pointed to by addr in the hiballoc area presently in
785  * use
786  */
787 void
788 hibernate_zlib_free(void *unused, void *addr)
789 {
790 	struct hibernate_zlib_state *hibernate_state;
791 
792 	hibernate_state =
793 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
794 
795 	hib_free(&hibernate_state->hiballoc_arena, addr);
796 }
797 
798 /*
799  * Inflate next page of data from the image stream
800  */
801 int
802 hibernate_inflate_page(void)
803 {
804 	struct hibernate_zlib_state *hibernate_state;
805 	int i;
806 
807 	hibernate_state =
808 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
809 
810 	/* Set up the stream for inflate */
811 	hibernate_state->hib_stream.next_out = (char *)HIBERNATE_INFLATE_PAGE;
812 	hibernate_state->hib_stream.avail_out = PAGE_SIZE;
813 
814 	/* Process next block of data */
815 	i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH);
816 	if (i != Z_OK && i != Z_STREAM_END) {
817 		/*
818 		 * XXX - this will likely reboot/hang most machines
819 		 *       since the console output buffer will be unmapped,
820 		 *       but there's not much else we can do here.
821 		 */
822 		panic("inflate error");
823 	}
824 
825 	/* We should always have extracted a full page ... */
826 	if (hibernate_state->hib_stream.avail_out != 0) {
827 		/*
828 		 * XXX - this will likely reboot/hang most machines
829 		 *       since the console output buffer will be unmapped,
830 		 *       but there's not much else we can do here.
831 		 */
832 		panic("incomplete page");
833 	}
834 
835 	return (i == Z_STREAM_END);
836 }
837 
838 /*
839  * Inflate size bytes from src into dest, skipping any pages in
840  * [src..dest] that are special (see hibernate_inflate_skip)
841  *
842  * This function executes while using the resume-time stack
843  * and pmap, and therefore cannot use ddb/printf/etc. Doing so
844  * will likely hang or reset the machine since the console output buffer
845  * will be unmapped.
846  */
847 void
848 hibernate_inflate_region(union hibernate_info *hib, paddr_t dest,
849     paddr_t src, size_t size)
850 {
851 	int end_stream = 0 ;
852 	struct hibernate_zlib_state *hibernate_state;
853 
854 	hibernate_state =
855 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
856 
857 	hibernate_state->hib_stream.next_in = (char *)src;
858 	hibernate_state->hib_stream.avail_in = size;
859 
860 	do {
861 		/*
862 		 * Is this a special page? If yes, redirect the
863 		 * inflate output to a scratch page (eg, discard it)
864 		 */
865 		if (hibernate_inflate_skip(hib, dest)) {
866 			hibernate_enter_resume_mapping(
867 			    HIBERNATE_INFLATE_PAGE,
868 			    HIBERNATE_INFLATE_PAGE, 0);
869 		} else {
870 			hibernate_enter_resume_mapping(
871 			    HIBERNATE_INFLATE_PAGE, dest, 0);
872 		}
873 
874 		hibernate_flush();
875 		end_stream = hibernate_inflate_page();
876 
877 		dest += PAGE_SIZE;
878 	} while (!end_stream);
879 }
880 
881 /*
882  * deflate from src into the I/O page, up to 'remaining' bytes
883  *
884  * Returns number of input bytes consumed, and may reset
885  * the 'remaining' parameter if not all the output space was consumed
886  * (this information is needed to know how much to write to disk
887  */
888 size_t
889 hibernate_deflate(union hibernate_info *hib, paddr_t src,
890     size_t *remaining)
891 {
892 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
893 	struct hibernate_zlib_state *hibernate_state;
894 
895 	hibernate_state =
896 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
897 
898 	/* Set up the stream for deflate */
899 	hibernate_state->hib_stream.next_in = (caddr_t)src;
900 	hibernate_state->hib_stream.avail_in = PAGE_SIZE - (src & PAGE_MASK);
901 	hibernate_state->hib_stream.next_out = (caddr_t)hibernate_io_page +
902 	    (PAGE_SIZE - *remaining);
903 	hibernate_state->hib_stream.avail_out = *remaining;
904 
905 	/* Process next block of data */
906 	if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH) != Z_OK)
907 		panic("hibernate zlib deflate error");
908 
909 	/* Update pointers and return number of bytes consumed */
910 	*remaining = hibernate_state->hib_stream.avail_out;
911 	return (PAGE_SIZE - (src & PAGE_MASK)) -
912 	    hibernate_state->hib_stream.avail_in;
913 }
914 
915 /*
916  * Write the hibernation information specified in hiber_info
917  * to the location in swap previously calculated (last block of
918  * swap), called the "signature block".
919  */
920 int
921 hibernate_write_signature(union hibernate_info *hib)
922 {
923 	/* Write hibernate info to disk */
924 	return (hib->io_func(hib->dev, hib->sig_offset,
925 	    (vaddr_t)hib, DEV_BSIZE, HIB_W,
926 	    hib->io_page));
927 }
928 
929 /*
930  * Write the memory chunk table to the area in swap immediately
931  * preceding the signature block. The chunk table is stored
932  * in the piglet when this function is called.  Returns errno.
933  */
934 int
935 hibernate_write_chunktable(union hibernate_info *hib)
936 {
937 	struct hibernate_disk_chunk *chunks;
938 	vaddr_t hibernate_chunk_table_start;
939 	size_t hibernate_chunk_table_size;
940 	int i, err;
941 
942 	hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE;
943 
944 	hibernate_chunk_table_start = hib->piglet_va +
945 	    HIBERNATE_CHUNK_SIZE;
946 
947 	chunks = (struct hibernate_disk_chunk *)(hib->piglet_va +
948 	    HIBERNATE_CHUNK_SIZE);
949 
950 	/* Write chunk table */
951 	for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS) {
952 		if ((err = hib->io_func(hib->dev,
953 		    hib->chunktable_offset + (i/DEV_BSIZE),
954 		    (vaddr_t)(hibernate_chunk_table_start + i),
955 		    MAXPHYS, HIB_W, hib->io_page))) {
956 			DPRINTF("chunktable write error: %d\n", err);
957 			return (err);
958 		}
959 	}
960 
961 	return (0);
962 }
963 
964 /*
965  * Write an empty hiber_info to the swap signature block, which is
966  * guaranteed to not match any valid hib.
967  */
968 int
969 hibernate_clear_signature(void)
970 {
971 	union hibernate_info blank_hiber_info;
972 	union hibernate_info hib;
973 
974 	/* Zero out a blank hiber_info */
975 	memset(&blank_hiber_info, 0, sizeof(union hibernate_info));
976 
977 	/* Get the signature block location */
978 	if (get_hibernate_info(&hib, 0))
979 		return (1);
980 
981 	/* Write (zeroed) hibernate info to disk */
982 	DPRINTF("clearing hibernate signature block location: %lld\n",
983 		hib.sig_offset);
984 	if (hibernate_block_io(&hib,
985 	    hib.sig_offset,
986 	    DEV_BSIZE, (vaddr_t)&blank_hiber_info, 1))
987 		printf("Warning: could not clear hibernate signature\n");
988 
989 	return (0);
990 }
991 
992 /*
993  * Check chunk range overlap when calculating whether or not to copy a
994  * compressed chunk to the piglet area before decompressing.
995  *
996  * returns zero if the ranges do not overlap, non-zero otherwise.
997  */
998 int
999 hibernate_check_overlap(paddr_t r1s, paddr_t r1e, paddr_t r2s, paddr_t r2e)
1000 {
1001 	/* case A : end of r1 overlaps start of r2 */
1002 	if (r1s < r2s && r1e > r2s)
1003 		return (1);
1004 
1005 	/* case B : r1 entirely inside r2 */
1006 	if (r1s >= r2s && r1e <= r2e)
1007 		return (1);
1008 
1009 	/* case C : r2 entirely inside r1 */
1010 	if (r2s >= r1s && r2e <= r1e)
1011 		return (1);
1012 
1013 	/* case D : end of r2 overlaps start of r1 */
1014 	if (r2s < r1s && r2e > r1s)
1015 		return (1);
1016 
1017 	return (0);
1018 }
1019 
1020 /*
1021  * Compare two hibernate_infos to determine if they are the same (eg,
1022  * we should be performing a hibernate resume on this machine.
1023  * Not all fields are checked - just enough to verify that the machine
1024  * has the same memory configuration and kernel as the one that
1025  * wrote the signature previously.
1026  */
1027 int
1028 hibernate_compare_signature(union hibernate_info *mine,
1029     union hibernate_info *disk)
1030 {
1031 	u_int i;
1032 
1033 	if (mine->nranges != disk->nranges) {
1034 		DPRINTF("hibernate memory range count mismatch\n");
1035 		return (1);
1036 	}
1037 
1038 	if (strcmp(mine->kernel_version, disk->kernel_version) != 0) {
1039 		DPRINTF("hibernate kernel version mismatch\n");
1040 		return (1);
1041 	}
1042 
1043 	for (i = 0; i < mine->nranges; i++) {
1044 		if ((mine->ranges[i].base != disk->ranges[i].base) ||
1045 		    (mine->ranges[i].end != disk->ranges[i].end) ) {
1046 			DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n",
1047 				i,
1048 				(void *)mine->ranges[i].base,
1049 				(void *)mine->ranges[i].end,
1050 				(void *)disk->ranges[i].base,
1051 				(void *)disk->ranges[i].end);
1052 			return (1);
1053 		}
1054 	}
1055 
1056 	return (0);
1057 }
1058 
1059 /*
1060  * Transfers xfer_size bytes between the hibernate device specified in
1061  * hib_info at offset blkctr and the vaddr specified at dest.
1062  *
1063  * Separate offsets and pages are used to handle misaligned reads (reads
1064  * that span a page boundary).
1065  *
1066  * blkctr specifies a relative offset (relative to the start of swap),
1067  * not an absolute disk offset
1068  *
1069  */
1070 int
1071 hibernate_block_io(union hibernate_info *hib, daddr_t blkctr,
1072     size_t xfer_size, vaddr_t dest, int iswrite)
1073 {
1074 	struct buf *bp;
1075 	struct bdevsw *bdsw;
1076 	int error;
1077 
1078 	bp = geteblk(xfer_size);
1079 	bdsw = &bdevsw[major(hib->dev)];
1080 
1081 	error = (*bdsw->d_open)(hib->dev, FREAD, S_IFCHR, curproc);
1082 	if (error) {
1083 		printf("hibernate_block_io open failed\n");
1084 		return (1);
1085 	}
1086 
1087 	if (iswrite)
1088 		bcopy((caddr_t)dest, bp->b_data, xfer_size);
1089 
1090 	bp->b_bcount = xfer_size;
1091 	bp->b_blkno = blkctr;
1092 	CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
1093 	SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW);
1094 	bp->b_dev = hib->dev;
1095 	(*bdsw->d_strategy)(bp);
1096 
1097 	error = biowait(bp);
1098 	if (error) {
1099 		printf("hib block_io biowait error %d blk %lld size %zu\n",
1100 			error, (long long)blkctr, xfer_size);
1101 		error = (*bdsw->d_close)(hib->dev, 0, S_IFCHR,
1102 		    curproc);
1103 		if (error)
1104 			printf("hibernate_block_io error close failed\n");
1105 		return (1);
1106 	}
1107 
1108 	error = (*bdsw->d_close)(hib->dev, FREAD, S_IFCHR, curproc);
1109 	if (error) {
1110 		printf("hibernate_block_io close failed\n");
1111 		return (1);
1112 	}
1113 
1114 	if (!iswrite)
1115 		bcopy(bp->b_data, (caddr_t)dest, xfer_size);
1116 
1117 	bp->b_flags |= B_INVAL;
1118 	brelse(bp);
1119 
1120 	return (0);
1121 }
1122 
1123 /*
1124  * Reads the signature block from swap, checks against the current machine's
1125  * information. If the information matches, perform a resume by reading the
1126  * saved image into the pig area, and unpacking.
1127  */
1128 void
1129 hibernate_resume(void)
1130 {
1131 	union hibernate_info hib;
1132 	int s;
1133 
1134 	/* Get current running machine's hibernate info */
1135 	memset(&hib, 0, sizeof(hib));
1136 	if (get_hibernate_info(&hib, 0)) {
1137 		DPRINTF("couldn't retrieve machine's hibernate info\n");
1138 		return;
1139 	}
1140 
1141 	/* Read hibernate info from disk */
1142 	s = splbio();
1143 
1144 	DPRINTF("reading hibernate signature block location: %lld\n",
1145 		hib.sig_offset);
1146 
1147 	if (hibernate_block_io(&hib,
1148 	    hib.sig_offset,
1149 	    DEV_BSIZE, (vaddr_t)&disk_hib, 0)) {
1150 		DPRINTF("error in hibernate read");
1151 		splx(s);
1152 		return;
1153 	}
1154 
1155 	/* Check magic number */
1156 	if (disk_hib.magic != HIBERNATE_MAGIC) {
1157 		DPRINTF("wrong magic number in hibernate signature: %x\n",
1158 			disk_hib.magic);
1159 		splx(s);
1160 		return;
1161 	}
1162 
1163 	/*
1164 	 * We (possibly) found a hibernate signature. Clear signature first,
1165 	 * to prevent accidental resume or endless resume cycles later.
1166 	 */
1167 	if (hibernate_clear_signature()) {
1168 		DPRINTF("error clearing hibernate signature block\n");
1169 		splx(s);
1170 		return;
1171 	}
1172 
1173 	/*
1174 	 * If on-disk and in-memory hibernate signatures match,
1175 	 * this means we should do a resume from hibernate.
1176 	 */
1177 	if (hibernate_compare_signature(&hib, &disk_hib)) {
1178 		DPRINTF("mismatched hibernate signature block\n");
1179 		splx(s);
1180 		return;
1181 	}
1182 
1183 #ifdef MULTIPROCESSOR
1184 	hibernate_quiesce_cpus();
1185 #endif /* MULTIPROCESSOR */
1186 
1187 	/* Read the image from disk into the image (pig) area */
1188 	if (hibernate_read_image(&disk_hib))
1189 		goto fail;
1190 
1191 	if (config_suspend(device_mainbus(), DVACT_QUIESCE) != 0)
1192 		goto fail;
1193 
1194 	(void) splhigh();
1195 	hibernate_disable_intr_machdep();
1196 	cold = 1;
1197 
1198 	if (config_suspend(device_mainbus(), DVACT_SUSPEND) != 0) {
1199 		cold = 0;
1200 		hibernate_enable_intr_machdep();
1201 		goto fail;
1202 	}
1203 
1204 	pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1205 	    VM_PROT_ALL);
1206 	pmap_activate(curproc);
1207 
1208 	printf("Unpacking image...\n");
1209 
1210 	/* Switch stacks */
1211 	hibernate_switch_stack_machdep();
1212 
1213 #ifndef NO_PROPOLICE
1214 	/* Start using suspended kernel's propolice guard */
1215 	__guard_local = disk_hib.guard;
1216 #endif /* ! NO_PROPOLICE */
1217 
1218 	/* Unpack and resume */
1219 	hibernate_unpack_image(&disk_hib);
1220 
1221 fail:
1222 	splx(s);
1223 	printf("\nUnable to resume hibernated image\n");
1224 }
1225 
1226 /*
1227  * Unpack image from pig area to original location by looping through the
1228  * list of output chunks in the order they should be restored (fchunks).
1229  *
1230  * Note that due to the stack smash protector and the fact that we have
1231  * switched stacks, it is not permitted to return from this function.
1232  */
1233 void
1234 hibernate_unpack_image(union hibernate_info *hib)
1235 {
1236 	struct hibernate_disk_chunk *chunks;
1237 	union hibernate_info local_hib;
1238 	paddr_t image_cur = global_pig_start;
1239 	short i, *fchunks;
1240 	char *pva = (char *)hib->piglet_va;
1241 	struct hibernate_zlib_state *hibernate_state;
1242 
1243 	hibernate_state =
1244 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1245 
1246 	/* Mask off based on arch-specific piglet page size */
1247 	pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1248 	fchunks = (short *)(pva + (4 * PAGE_SIZE));
1249 
1250 	chunks = (struct hibernate_disk_chunk *)(pva +  HIBERNATE_CHUNK_SIZE);
1251 
1252 	/* Can't use hiber_info that's passed in after this point */
1253 	bcopy(hib, &local_hib, sizeof(union hibernate_info));
1254 
1255 	/*
1256 	 * Point of no return. Once we pass this point, only kernel code can
1257 	 * be accessed. No global variables or other kernel data structures
1258 	 * are guaranteed to be coherent after unpack starts.
1259 	 *
1260 	 * The image is now in high memory (pig area), we unpack from the pig
1261 	 * to the correct location in memory. We'll eventually end up copying
1262 	 * on top of ourself, but we are assured the kernel code here is the
1263 	 * same between the hibernated and resuming kernel, and we are running
1264 	 * on our own stack, so the overwrite is ok.
1265 	 */
1266 	hibernate_activate_resume_pt_machdep();
1267 
1268 	for (i = 0; i < local_hib.chunk_ctr; i++) {
1269 		/* Reset zlib for inflate */
1270 		if (hibernate_zlib_reset(&local_hib, 0) != Z_OK)
1271 			panic("hibernate failed to reset zlib for inflate");
1272 
1273 		hibernate_process_chunk(&local_hib, &chunks[fchunks[i]],
1274 		    image_cur);
1275 
1276 		image_cur += chunks[fchunks[i]].compressed_size;
1277 
1278 	}
1279 
1280 	/*
1281 	 * Resume the loaded kernel by jumping to the MD resume vector.
1282 	 * We won't be returning from this call.
1283 	 */
1284 	hibernate_resume_machdep();
1285 }
1286 
1287 /*
1288  * Bounce a compressed image chunk to the piglet, entering mappings for the
1289  * copied pages as needed
1290  */
1291 void
1292 hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size)
1293 {
1294 	size_t ct, ofs;
1295 	paddr_t src = img_cur;
1296 	vaddr_t dest = piglet;
1297 
1298 	/* Copy first partial page */
1299 	ct = (PAGE_SIZE) - (src & PAGE_MASK);
1300 	ofs = (src & PAGE_MASK);
1301 
1302 	if (ct < PAGE_SIZE) {
1303 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE,
1304 			(src - ofs), 0);
1305 		hibernate_flush();
1306 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE + ofs), (caddr_t)dest, ct);
1307 		src += ct;
1308 		dest += ct;
1309 	}
1310 
1311 	/* Copy remaining pages */
1312 	while (src < size + img_cur) {
1313 		hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, src, 0);
1314 		hibernate_flush();
1315 		ct = PAGE_SIZE;
1316 		bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE), (caddr_t)dest, ct);
1317 		hibernate_flush();
1318 		src += ct;
1319 		dest += ct;
1320 	}
1321 }
1322 
1323 /*
1324  * Process a chunk by bouncing it to the piglet, followed by unpacking
1325  */
1326 void
1327 hibernate_process_chunk(union hibernate_info *hib,
1328     struct hibernate_disk_chunk *chunk, paddr_t img_cur)
1329 {
1330 	char *pva = (char *)hib->piglet_va;
1331 
1332 	hibernate_copy_chunk_to_piglet(img_cur,
1333 	 (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), chunk->compressed_size);
1334 
1335 	hibernate_inflate_region(hib, chunk->base,
1336 	    (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)),
1337 	    chunk->compressed_size);
1338 }
1339 
1340 /*
1341  * Write a compressed version of this machine's memory to disk, at the
1342  * precalculated swap offset:
1343  *
1344  * end of swap - signature block size - chunk table size - memory size
1345  *
1346  * The function begins by looping through each phys mem range, cutting each
1347  * one into MD sized chunks. These chunks are then compressed individually
1348  * and written out to disk, in phys mem order. Some chunks might compress
1349  * more than others, and for this reason, each chunk's size is recorded
1350  * in the chunk table, which is written to disk after the image has
1351  * properly been compressed and written (in hibernate_write_chunktable).
1352  *
1353  * When this function is called, the machine is nearly suspended - most
1354  * devices are quiesced/suspended, interrupts are off, and cold has
1355  * been set. This means that there can be no side effects once the
1356  * write has started, and the write function itself can also have no
1357  * side effects. This also means no printfs are permitted (since printf
1358  * has side effects.)
1359  *
1360  * Return values :
1361  *
1362  * 0      - success
1363  * EIO    - I/O error occurred writing the chunks
1364  * EINVAL - Failed to write a complete range
1365  * ENOMEM - Memory allocation failure during preparation of the zlib arena
1366  */
1367 int
1368 hibernate_write_chunks(union hibernate_info *hib)
1369 {
1370 	paddr_t range_base, range_end, inaddr, temp_inaddr;
1371 	size_t nblocks, out_remaining, used;
1372 	struct hibernate_disk_chunk *chunks;
1373 	vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE;
1374 	daddr_t blkctr = 0;
1375 	int i, err;
1376 	struct hibernate_zlib_state *hibernate_state;
1377 
1378 	hibernate_state =
1379 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1380 
1381 	hib->chunk_ctr = 0;
1382 
1383 	/*
1384 	 * Allocate VA for the temp and copy page.
1385 	 *
1386 	 * These will become part of the suspended kernel and will
1387 	 * be freed in hibernate_free, upon resume.
1388 	 */
1389 	hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
1390 	    &kp_none, &kd_nowait);
1391 	if (!hibernate_temp_page)
1392 		return (ENOMEM);
1393 
1394 	hibernate_copy_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any,
1395 	    &kp_none, &kd_nowait);
1396 	if (!hibernate_copy_page) {
1397 		DPRINTF("out of memory allocating hibernate_copy_page\n");
1398 		return (ENOMEM);
1399 	}
1400 
1401 	pmap_kenter_pa(hibernate_copy_page,
1402 	    (hib->piglet_pa + 3*PAGE_SIZE), VM_PROT_ALL);
1403 
1404 	pmap_activate(curproc);
1405 
1406 	chunks = (struct hibernate_disk_chunk *)(hib->piglet_va +
1407 	    HIBERNATE_CHUNK_SIZE);
1408 
1409 	/* Calculate the chunk regions */
1410 	for (i = 0; i < hib->nranges; i++) {
1411 		range_base = hib->ranges[i].base;
1412 		range_end = hib->ranges[i].end;
1413 
1414 		inaddr = range_base;
1415 
1416 		while (inaddr < range_end) {
1417 			chunks[hib->chunk_ctr].base = inaddr;
1418 			if (inaddr + HIBERNATE_CHUNK_SIZE < range_end)
1419 				chunks[hib->chunk_ctr].end = inaddr +
1420 				    HIBERNATE_CHUNK_SIZE;
1421 			else
1422 				chunks[hib->chunk_ctr].end = range_end;
1423 
1424 			inaddr += HIBERNATE_CHUNK_SIZE;
1425 			hib->chunk_ctr ++;
1426 		}
1427 	}
1428 
1429 	/* Compress and write the chunks in the chunktable */
1430 	for (i = 0; i < hib->chunk_ctr; i++) {
1431 		range_base = chunks[i].base;
1432 		range_end = chunks[i].end;
1433 
1434 		chunks[i].offset = blkctr + hib->image_offset;
1435 
1436 		/* Reset zlib for deflate */
1437 		if (hibernate_zlib_reset(hib, 1) != Z_OK) {
1438 			DPRINTF("hibernate_zlib_reset failed for deflate\n");
1439 			return (ENOMEM);
1440 		}
1441 
1442 		inaddr = range_base;
1443 
1444 		/*
1445 		 * For each range, loop through its phys mem region
1446 		 * and write out the chunks (the last chunk might be
1447 		 * smaller than the chunk size).
1448 		 */
1449 		while (inaddr < range_end) {
1450 			out_remaining = PAGE_SIZE;
1451 			while (out_remaining > 0 && inaddr < range_end) {
1452 
1453 				/*
1454 				 * Adjust for regions that are not evenly
1455 				 * divisible by PAGE_SIZE or overflowed
1456 				 * pages from the previous iteration.
1457 				 */
1458 				temp_inaddr = (inaddr & PAGE_MASK) +
1459 				    hibernate_copy_page;
1460 
1461 				/* Deflate from temp_inaddr to IO page */
1462 				if (inaddr != range_end) {
1463 					pmap_kenter_pa(hibernate_temp_page,
1464 					    inaddr & PMAP_PA_MASK, VM_PROT_ALL);
1465 
1466 					pmap_activate(curproc);
1467 
1468 					bcopy((caddr_t)hibernate_temp_page,
1469 					    (caddr_t)hibernate_copy_page,
1470 					    PAGE_SIZE);
1471 					inaddr += hibernate_deflate(hib,
1472 					    temp_inaddr, &out_remaining);
1473 				}
1474 
1475 				if (out_remaining == 0) {
1476 					/* Filled up the page */
1477 					nblocks =
1478 					    PAGE_SIZE / DEV_BSIZE;
1479 
1480 					if ((err = hib->io_func(hib->dev,
1481 					    blkctr + hib->image_offset,
1482 					    (vaddr_t)hibernate_io_page,
1483 					    PAGE_SIZE, HIB_W, hib->io_page))) {
1484 						DPRINTF("hib write error %d\n",
1485 						    err);
1486 						return (err);
1487 					}
1488 
1489 					blkctr += nblocks;
1490 				}
1491 			}
1492 		}
1493 
1494 		if (inaddr != range_end) {
1495 			DPRINTF("deflate range ended prematurely\n");
1496 			return (EINVAL);
1497 		}
1498 
1499 		/*
1500 		 * End of range. Round up to next secsize bytes
1501 		 * after finishing compress
1502 		 */
1503 		if (out_remaining == 0)
1504 			out_remaining = PAGE_SIZE;
1505 
1506 		/* Finish compress */
1507 		hibernate_state->hib_stream.next_in = (caddr_t)inaddr;
1508 		hibernate_state->hib_stream.avail_in = 0;
1509 		hibernate_state->hib_stream.next_out =
1510 		    (caddr_t)hibernate_io_page + (PAGE_SIZE - out_remaining);
1511 
1512 		/* We have an extra output page available for finalize */
1513 		hibernate_state->hib_stream.avail_out =
1514 			out_remaining + PAGE_SIZE;
1515 
1516 		if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH)) !=
1517 		    Z_STREAM_END) {
1518 			DPRINTF("deflate error in output stream: %d\n", err);
1519 			return (err);
1520 		}
1521 
1522 		out_remaining = hibernate_state->hib_stream.avail_out;
1523 
1524 		used = 2*PAGE_SIZE - out_remaining;
1525 		nblocks = used / DEV_BSIZE;
1526 
1527 		/* Round up to next block if needed */
1528 		if (used % DEV_BSIZE != 0)
1529 			nblocks ++;
1530 
1531 		/* Write final block(s) for this chunk */
1532 		if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset,
1533 		    (vaddr_t)hibernate_io_page, nblocks*DEV_BSIZE,
1534 		    HIB_W, hib->io_page))) {
1535 			DPRINTF("hib final write error %d\n", err);
1536 			return (err);
1537 		}
1538 
1539 		blkctr += nblocks;
1540 
1541 		chunks[i].compressed_size = (blkctr + hib->image_offset -
1542 		    chunks[i].offset) * DEV_BSIZE;
1543 	}
1544 
1545 	hib->chunktable_offset = hib->image_offset + blkctr;
1546 	return (0);
1547 }
1548 
1549 /*
1550  * Reset the zlib stream state and allocate a new hiballoc area for either
1551  * inflate or deflate. This function is called once for each hibernate chunk.
1552  * Calling hiballoc_init multiple times is acceptable since the memory it is
1553  * provided is unmanaged memory (stolen). We use the memory provided to us
1554  * by the piglet allocated via the supplied hib.
1555  */
1556 int
1557 hibernate_zlib_reset(union hibernate_info *hib, int deflate)
1558 {
1559 	vaddr_t hibernate_zlib_start;
1560 	size_t hibernate_zlib_size;
1561 	char *pva = (char *)hib->piglet_va;
1562 	struct hibernate_zlib_state *hibernate_state;
1563 
1564 	hibernate_state =
1565 	    (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE;
1566 
1567 	if (!deflate)
1568 		pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK));
1569 
1570 	hibernate_zlib_start = (vaddr_t)(pva + (28 * PAGE_SIZE));
1571 	hibernate_zlib_size = 80 * PAGE_SIZE;
1572 
1573 	memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size);
1574 	memset(hibernate_state, 0, PAGE_SIZE);
1575 
1576 	/* Set up stream structure */
1577 	hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc;
1578 	hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free;
1579 
1580 	/* Initialize the hiballoc arena for zlib allocs/frees */
1581 	hiballoc_init(&hibernate_state->hiballoc_arena,
1582 	    (caddr_t)hibernate_zlib_start, hibernate_zlib_size);
1583 
1584 	if (deflate) {
1585 		return deflateInit(&hibernate_state->hib_stream,
1586 		    Z_BEST_SPEED);
1587 	} else
1588 		return inflateInit(&hibernate_state->hib_stream);
1589 }
1590 
1591 /*
1592  * Reads the hibernated memory image from disk, whose location and
1593  * size are recorded in hib. Begin by reading the persisted
1594  * chunk table, which records the original chunk placement location
1595  * and compressed size for each. Next, allocate a pig region of
1596  * sufficient size to hold the compressed image. Next, read the
1597  * chunks into the pig area (calling hibernate_read_chunks to do this),
1598  * and finally, if all of the above succeeds, clear the hibernate signature.
1599  * The function will then return to hibernate_resume, which will proceed
1600  * to unpack the pig image to the correct place in memory.
1601  */
1602 int
1603 hibernate_read_image(union hibernate_info *hib)
1604 {
1605 	size_t compressed_size, disk_size, chunktable_size, pig_sz;
1606 	paddr_t image_start, image_end, pig_start, pig_end;
1607 	struct hibernate_disk_chunk *chunks;
1608 	daddr_t blkctr;
1609 	vaddr_t chunktable = (vaddr_t)NULL;
1610 	paddr_t piglet_chunktable = hib->piglet_pa +
1611 	    HIBERNATE_CHUNK_SIZE;
1612 	int i, status;
1613 
1614 	status = 0;
1615 	pmap_activate(curproc);
1616 
1617 	/* Calculate total chunk table size in disk blocks */
1618 	chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / DEV_BSIZE;
1619 
1620 	blkctr = hib->chunktable_offset;
1621 
1622 	chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any,
1623 	    &kp_none, &kd_nowait);
1624 
1625 	if (!chunktable)
1626 		return (1);
1627 
1628 	/* Map chunktable pages */
1629 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE)
1630 		pmap_kenter_pa(chunktable + i, piglet_chunktable + i,
1631 		    VM_PROT_ALL);
1632 	pmap_update(pmap_kernel());
1633 
1634 	/* Read the chunktable from disk into the piglet chunktable */
1635 	for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE;
1636 	    i += MAXPHYS, blkctr += MAXPHYS/DEV_BSIZE)
1637 		hibernate_block_io(hib, blkctr, MAXPHYS,
1638 		    chunktable + i, 0);
1639 
1640 	blkctr = hib->image_offset;
1641 	compressed_size = 0;
1642 
1643 	chunks = (struct hibernate_disk_chunk *)chunktable;
1644 
1645 	for (i = 0; i < hib->chunk_ctr; i++)
1646 		compressed_size += chunks[i].compressed_size;
1647 
1648 	disk_size = compressed_size;
1649 
1650 	printf("unhibernating @ block %lld length %lu bytes\n",
1651 	    hib->sig_offset - chunktable_size,
1652 	    compressed_size);
1653 
1654 	/* Allocate the pig area */
1655 	pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE;
1656 	if (uvm_pmr_alloc_pig(&pig_start, pig_sz) == ENOMEM) {
1657 		status = 1;
1658 		goto unmap;
1659 	}
1660 
1661 	pig_end = pig_start + pig_sz;
1662 
1663 	/* Calculate image extents. Pig image must end on a chunk boundary. */
1664 	image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1);
1665 	image_start = image_end - disk_size;
1666 
1667 	hibernate_read_chunks(hib, image_start, image_end, disk_size,
1668 	    chunks);
1669 
1670 	/* Prepare the resume time pmap/page table */
1671 	hibernate_populate_resume_pt(hib, image_start, image_end);
1672 
1673 unmap:
1674 	/* Unmap chunktable pages */
1675 	pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE);
1676 	pmap_update(pmap_kernel());
1677 
1678 	return (status);
1679 }
1680 
1681 /*
1682  * Read the hibernated memory chunks from disk (chunk information at this
1683  * point is stored in the piglet) into the pig area specified by
1684  * [pig_start .. pig_end]. Order the chunks so that the final chunk is the
1685  * only chunk with overlap possibilities.
1686  */
1687 int
1688 hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start,
1689     paddr_t pig_end, size_t image_compr_size,
1690     struct hibernate_disk_chunk *chunks)
1691 {
1692 	paddr_t img_cur, piglet_base;
1693 	daddr_t blkctr;
1694 	size_t processed, compressed_size, read_size;
1695 	int nchunks, nfchunks, num_io_pages;
1696 	vaddr_t tempva, hibernate_fchunk_area;
1697 	short *fchunks, i, j;
1698 
1699 	tempva = (vaddr_t)NULL;
1700 	hibernate_fchunk_area = (vaddr_t)NULL;
1701 	nfchunks = 0;
1702 	piglet_base = hib->piglet_pa;
1703 	global_pig_start = pig_start;
1704 
1705 	pmap_activate(curproc);
1706 
1707 	/*
1708 	 * These mappings go into the resuming kernel's page table, and are
1709 	 * used only during image read. They dissappear from existence
1710 	 * when the suspended kernel is unpacked on top of us.
1711 	 */
1712 	tempva = (vaddr_t)km_alloc(MAXPHYS + PAGE_SIZE, &kv_any, &kp_none,
1713 		&kd_nowait);
1714 	if (!tempva)
1715 		return (1);
1716 	hibernate_fchunk_area = (vaddr_t)km_alloc(24 * PAGE_SIZE, &kv_any,
1717 	    &kp_none, &kd_nowait);
1718 	if (!hibernate_fchunk_area)
1719 		return (1);
1720 
1721 	/* Final output chunk ordering VA */
1722 	fchunks = (short *)hibernate_fchunk_area;
1723 
1724 	/* Map the chunk ordering region */
1725 	for(i = 0; i < 24 ; i++)
1726 		pmap_kenter_pa(hibernate_fchunk_area + (i * PAGE_SIZE),
1727 			piglet_base + ((4 + i) * PAGE_SIZE), VM_PROT_ALL);
1728 	pmap_update(pmap_kernel());
1729 
1730 	nchunks = hib->chunk_ctr;
1731 
1732 	/* Initially start all chunks as unplaced */
1733 	for (i = 0; i < nchunks; i++)
1734 		chunks[i].flags = 0;
1735 
1736 	/*
1737 	 * Search the list for chunks that are outside the pig area. These
1738 	 * can be placed first in the final output list.
1739 	 */
1740 	for (i = 0; i < nchunks; i++) {
1741 		if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) {
1742 			fchunks[nfchunks] = i;
1743 			nfchunks++;
1744 			chunks[i].flags |= HIBERNATE_CHUNK_PLACED;
1745 		}
1746 	}
1747 
1748 	/*
1749 	 * Walk the ordering, place the chunks in ascending memory order.
1750 	 */
1751 	for (i = 0; i < nchunks; i++) {
1752 		if (chunks[i].flags != HIBERNATE_CHUNK_PLACED) {
1753 			fchunks[nfchunks] = i;
1754 			nfchunks++;
1755 			chunks[i].flags = HIBERNATE_CHUNK_PLACED;
1756 		}
1757 	}
1758 
1759 	img_cur = pig_start;
1760 
1761 	for (i = 0; i < nfchunks; i++) {
1762 		blkctr = chunks[fchunks[i]].offset;
1763 		processed = 0;
1764 		compressed_size = chunks[fchunks[i]].compressed_size;
1765 
1766 		while (processed < compressed_size) {
1767 			if (compressed_size - processed >= MAXPHYS)
1768 				read_size = MAXPHYS;
1769 			else
1770 				read_size = compressed_size - processed;
1771 
1772 			/*
1773 			 * We're reading read_size bytes, offset from the
1774 			 * start of a page by img_cur % PAGE_SIZE, so the
1775 			 * end will be read_size + (img_cur % PAGE_SIZE)
1776 			 * from the start of the first page.  Round that
1777 			 * up to the next page size.
1778 			 */
1779 			num_io_pages = (read_size + (img_cur % PAGE_SIZE)
1780 				+ PAGE_SIZE - 1) / PAGE_SIZE;
1781 
1782 			KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1);
1783 
1784 			/* Map pages for this read */
1785 			for (j = 0; j < num_io_pages; j ++)
1786 				pmap_kenter_pa(tempva + j * PAGE_SIZE,
1787 					img_cur + j * PAGE_SIZE, VM_PROT_ALL);
1788 
1789 			pmap_update(pmap_kernel());
1790 
1791 			hibernate_block_io(hib, blkctr, read_size,
1792 			    tempva + (img_cur & PAGE_MASK), 0);
1793 
1794 			blkctr += (read_size / DEV_BSIZE);
1795 
1796 			pmap_kremove(tempva, num_io_pages * PAGE_SIZE);
1797 			pmap_update(pmap_kernel());
1798 
1799 			processed += read_size;
1800 			img_cur += read_size;
1801 		}
1802 	}
1803 
1804 	pmap_kremove(hibernate_fchunk_area, 24 * PAGE_SIZE);
1805 	pmap_update(pmap_kernel());
1806 
1807 	return (0);
1808 }
1809 
1810 /*
1811  * Hibernating a machine comprises the following operations:
1812  *  1. Calculating this machine's hibernate_info information
1813  *  2. Allocating a piglet and saving the piglet's physaddr
1814  *  3. Calculating the memory chunks
1815  *  4. Writing the compressed chunks to disk
1816  *  5. Writing the chunk table
1817  *  6. Writing the signature block (hibernate_info)
1818  *
1819  * On most architectures, the function calling hibernate_suspend would
1820  * then power off the machine using some MD-specific implementation.
1821  */
1822 int
1823 hibernate_suspend(void)
1824 {
1825 	union hibernate_info hib;
1826 	u_long start, end;
1827 
1828 	/*
1829 	 * Calculate memory ranges, swap offsets, etc.
1830 	 * This also allocates a piglet whose physaddr is stored in
1831 	 * hib->piglet_pa and vaddr stored in hib->piglet_va
1832 	 */
1833 	if (get_hibernate_info(&hib, 1)) {
1834 		DPRINTF("failed to obtain hibernate info\n");
1835 		return (1);
1836 	}
1837 
1838 	/* Find a page-addressed region in swap [start,end] */
1839 	if (uvm_hibswap(hib.dev, &start, &end)) {
1840 		printf("cannot find any swap\n");
1841 		return (1);
1842 	}
1843 
1844 	if (end - start < 1000) {
1845 		printf("%lu\n is too small", end - start);
1846 		return (1);
1847 	}
1848 
1849 	/* Calculate block offsets in swap */
1850 	hib.image_offset = ctod(start);
1851 
1852 	/* XXX side effect */
1853 	DPRINTF("hibernate @ block %lld max-length %lu blocks\n",
1854 	    hib.image_offset, ctod(end) - ctod(start));
1855 
1856 	pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE,
1857 		VM_PROT_ALL);
1858 	pmap_activate(curproc);
1859 
1860 	/* Stash the piglet VA so we can free it in the resuming kernel */
1861 	global_piglet_va = hib.piglet_va;
1862 
1863 	DPRINTF("hibernate: writing chunks\n");
1864 	if (hibernate_write_chunks(&hib)) {
1865 		DPRINTF("hibernate_write_chunks failed\n");
1866 		return (1);
1867 	}
1868 
1869 	DPRINTF("hibernate: writing chunktable\n");
1870 	if (hibernate_write_chunktable(&hib)) {
1871 		DPRINTF("hibernate_write_chunktable failed\n");
1872 		return (1);
1873 	}
1874 
1875 	DPRINTF("hibernate: writing signature\n");
1876 	if (hibernate_write_signature(&hib)) {
1877 		DPRINTF("hibernate_write_signature failed\n");
1878 		return (1);
1879 	}
1880 
1881 	/* Allow the disk to settle */
1882 	delay(500000);
1883 
1884 	/*
1885 	 * Give the device-specific I/O function a notification that we're
1886 	 * done, and that it can clean up or shutdown as needed.
1887 	 */
1888 	hib.io_func(hib.dev, 0, (vaddr_t)NULL, 0, HIB_DONE, hib.io_page);
1889 
1890 	return (0);
1891 }
1892 
1893 /*
1894  * Free items allocated by hibernate_suspend()
1895  */
1896 void
1897 hibernate_free(void)
1898 {
1899 	if (global_piglet_va)
1900 		uvm_pmr_free_piglet(global_piglet_va,
1901 		    3*HIBERNATE_CHUNK_SIZE);
1902 
1903 	if (hibernate_copy_page)
1904 		pmap_kremove(hibernate_copy_page, PAGE_SIZE);
1905 	if (hibernate_temp_page)
1906 		pmap_kremove(hibernate_temp_page, PAGE_SIZE);
1907 
1908 	pmap_update(pmap_kernel());
1909 
1910 	if (hibernate_copy_page)
1911 		km_free((void *)hibernate_copy_page, PAGE_SIZE,
1912 		    &kv_any, &kp_none);
1913 	if (hibernate_temp_page)
1914 		km_free((void *)hibernate_temp_page, PAGE_SIZE,
1915 		    &kv_any, &kp_none);
1916 
1917 	global_piglet_va = 0;
1918 	hibernate_copy_page = 0;
1919 	hibernate_temp_page = 0;
1920 }
1921