1*2fbc8e98Skrw /* $OpenBSD: subr_hibernate.c,v 1.152 2025/01/24 18:13:29 krw Exp $ */ 2088aa6daSariane 3088aa6daSariane /* 4088aa6daSariane * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> 520703d53Smlarkin * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org> 6088aa6daSariane * 7088aa6daSariane * Permission to use, copy, modify, and distribute this software for any 8088aa6daSariane * purpose with or without fee is hereby granted, provided that the above 9088aa6daSariane * copyright notice and this permission notice appear in all copies. 10088aa6daSariane * 11088aa6daSariane * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12088aa6daSariane * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13088aa6daSariane * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14088aa6daSariane * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15088aa6daSariane * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16088aa6daSariane * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17088aa6daSariane * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18088aa6daSariane */ 19088aa6daSariane 20088aa6daSariane #include <sys/hibernate.h> 21627cf4e5Smlarkin #include <sys/malloc.h> 22088aa6daSariane #include <sys/param.h> 23088aa6daSariane #include <sys/tree.h> 24088aa6daSariane #include <sys/systm.h> 2510aaff22Smlarkin #include <sys/disklabel.h> 26e33b04c7Sderaadt #include <sys/disk.h> 2710aaff22Smlarkin #include <sys/conf.h> 2821e3fba6Smlarkin #include <sys/buf.h> 2921e3fba6Smlarkin #include <sys/fcntl.h> 3021e3fba6Smlarkin #include <sys/stat.h> 3103d1830dStedu #include <sys/atomic.h> 3203d1830dStedu 33b3ad10faSariane #include <uvm/uvm.h> 34cc391fe1Smlarkin #include <uvm/uvm_swap.h> 3503d1830dStedu 3610aaff22Smlarkin #include <machine/hibernate.h> 37088aa6daSariane 383fed8fd5Sguenther /* Make sure the signature can fit in one block */ 39fc6d48fdSkrw CTASSERT((offsetof(union hibernate_info, sec_size) + sizeof(u_int32_t)) <= DEV_BSIZE); 403fed8fd5Sguenther 41b42f10c6Smlarkin /* 42b42f10c6Smlarkin * Hibernate piglet layout information 43b42f10c6Smlarkin * 44b42f10c6Smlarkin * The piglet is a scratch area of memory allocated by the suspending kernel. 45b42f10c6Smlarkin * Its phys and virt addrs are recorded in the signature block. The piglet is 46b42f10c6Smlarkin * used to guarantee an unused area of memory that can be used by the resuming 47b42f10c6Smlarkin * kernel for various things. The piglet is excluded during unpack operations. 4831a59060Smlarkin * The piglet size is presently 4*HIBERNATE_CHUNK_SIZE (typically 4*4MB). 49b42f10c6Smlarkin * 50b42f10c6Smlarkin * Offset from piglet_base Purpose 51b42f10c6Smlarkin * ---------------------------------------------------------------------------- 52965a4b2aSmlarkin * 0 Private page for suspend I/O write functions 53b42f10c6Smlarkin * 1*PAGE_SIZE I/O page used during hibernate suspend 54b484ab48Smlarkin * 2*PAGE_SIZE I/O page used during hibernate suspend 55b42f10c6Smlarkin * 3*PAGE_SIZE copy page used during hibernate suspend 5624aea4fdSmlarkin * 4*PAGE_SIZE final chunk ordering list (24 pages) 577882bc75Smlarkin * 28*PAGE_SIZE RLE utility page 587882bc75Smlarkin * 29*PAGE_SIZE start of hiballoc area 59464029d7Smlarkin * 30*PAGE_SIZE preserved entropy 60464029d7Smlarkin * 110*PAGE_SIZE end of hiballoc area (80 pages) 6165052cacSmlarkin * 366*PAGE_SIZE end of retguard preservation region (256 pages) 62b42f10c6Smlarkin * ... unused 63b42f10c6Smlarkin * HIBERNATE_CHUNK_SIZE start of hibernate chunk table 64b42f10c6Smlarkin * 2*HIBERNATE_CHUNK_SIZE bounce area for chunks being unpacked 6531a59060Smlarkin * 4*HIBERNATE_CHUNK_SIZE end of piglet 66b42f10c6Smlarkin */ 67b42f10c6Smlarkin 6820703d53Smlarkin /* Temporary vaddr ranges used during hibernate */ 6920703d53Smlarkin vaddr_t hibernate_temp_page; 7020703d53Smlarkin vaddr_t hibernate_copy_page; 717882bc75Smlarkin vaddr_t hibernate_rle_page; 7220703d53Smlarkin 7320703d53Smlarkin /* Hibernate info as read from disk during resume */ 74908847d1Sderaadt union hibernate_info disk_hib; 75c3ed0588Skrw struct bdevsw *bdsw; 76259dfafeSmlarkin 77259dfafeSmlarkin /* 78259dfafeSmlarkin * Global copy of the pig start address. This needs to be a global as we 79259dfafeSmlarkin * switch stacks after computing it - it can't be stored on the stack. 80259dfafeSmlarkin */ 8195a70527Smlarkin paddr_t global_pig_start; 82259dfafeSmlarkin 83259dfafeSmlarkin /* 84259dfafeSmlarkin * Global copies of the piglet start addresses (PA/VA). We store these 85259dfafeSmlarkin * as globals to avoid having to carry them around as parameters, as the 86259dfafeSmlarkin * piglet is allocated early and freed late - its lifecycle extends beyond 87259dfafeSmlarkin * that of the hibernate info union which is calculated on suspend/resume. 88259dfafeSmlarkin */ 8995e3d60bSmlarkin vaddr_t global_piglet_va; 90b0c80555Skettenis paddr_t global_piglet_pa; 9122378a8fSmlarkin 9296cbc698Smlarkin /* #define HIB_DEBUG */ 9396cbc698Smlarkin #ifdef HIB_DEBUG 9496cbc698Smlarkin int hib_debug = 99; 9596cbc698Smlarkin #define DPRINTF(x...) do { if (hib_debug) printf(x); } while (0) 9696cbc698Smlarkin #define DNPRINTF(n,x...) do { if (hib_debug > (n)) printf(x); } while (0) 9796cbc698Smlarkin #else 9896cbc698Smlarkin #define DPRINTF(x...) 9996cbc698Smlarkin #define DNPRINTF(n,x...) 10096cbc698Smlarkin #endif 10196cbc698Smlarkin 102bd831450Smlarkin #ifndef NO_PROPOLICE 103bd831450Smlarkin extern long __guard_local; 104bd831450Smlarkin #endif /* ! NO_PROPOLICE */ 105bd831450Smlarkin 1062547ab58Smlarkin /* Retguard phys address (need to skip this region during unpack) */ 1072547ab58Smlarkin paddr_t retguard_start_phys, retguard_end_phys; 1082547ab58Smlarkin extern char __retguard_start, __retguard_end; 1092547ab58Smlarkin 11021eafc1bSmlarkin void hibernate_copy_chunk_to_piglet(paddr_t, vaddr_t, size_t); 1117882bc75Smlarkin int hibernate_calc_rle(paddr_t, paddr_t); 1127882bc75Smlarkin int hibernate_write_rle(union hibernate_info *, paddr_t, paddr_t, daddr_t *, 1137882bc75Smlarkin size_t *); 1147882bc75Smlarkin 1157882bc75Smlarkin #define MAX_RLE (HIBERNATE_CHUNK_SIZE / PAGE_SIZE) 11621eafc1bSmlarkin 117088aa6daSariane /* 118088aa6daSariane * Hib alloc enforced alignment. 119088aa6daSariane */ 120088aa6daSariane #define HIB_ALIGN 8 /* bytes alignment */ 121088aa6daSariane 122088aa6daSariane /* 123088aa6daSariane * sizeof builtin operation, but with alignment constraint. 124088aa6daSariane */ 125088aa6daSariane #define HIB_SIZEOF(_type) roundup(sizeof(_type), HIB_ALIGN) 126088aa6daSariane 127e33b04c7Sderaadt struct hiballoc_entry { 128088aa6daSariane size_t hibe_use; 129088aa6daSariane size_t hibe_space; 130938d8250Sdlg RBT_ENTRY(hiballoc_entry) hibe_entry; 131088aa6daSariane }; 132088aa6daSariane 1339783fa9dSkrw #define IO_TYPE_IMG 1 1349783fa9dSkrw #define IO_TYPE_CHK 2 1359783fa9dSkrw #define IO_TYPE_SIG 3 1369783fa9dSkrw 1379783fa9dSkrw int 1389783fa9dSkrw hibernate_write(union hibernate_info *hib, daddr_t offset, vaddr_t addr, 1399783fa9dSkrw size_t size, int io_type) 1409783fa9dSkrw { 1419783fa9dSkrw const uint64_t blks = btodb(size); 1429783fa9dSkrw 1439783fa9dSkrw if (hib == NULL || offset < 0 || blks == 0) { 1449783fa9dSkrw printf("%s: hib is NULL, offset < 0 or blks == 0\n", __func__); 1459783fa9dSkrw return (EINVAL); 1469783fa9dSkrw } 1479783fa9dSkrw 1489783fa9dSkrw switch (io_type) { 1499783fa9dSkrw case IO_TYPE_IMG: 1509783fa9dSkrw if (offset + blks > hib->image_size) { 1519783fa9dSkrw printf("%s: image write is out of bounds: " 1529783fa9dSkrw "offset-image=%lld, offset-write=%lld, blks=%llu\n", 1539783fa9dSkrw __func__, hib->image_offset, offset, blks); 1549783fa9dSkrw return (EIO); 1559783fa9dSkrw } 1569783fa9dSkrw offset += hib->image_offset; 1579783fa9dSkrw break; 1589783fa9dSkrw case IO_TYPE_CHK: 1599783fa9dSkrw if (offset + blks > btodb(HIBERNATE_CHUNK_TABLE_SIZE)) { 1609783fa9dSkrw printf("%s: chunktable write is out of bounds: " 1619783fa9dSkrw "offset-chunk=%lld, offset-write=%lld, blks=%llu\n", 1629783fa9dSkrw __func__, hib->chunktable_offset, offset, blks); 1639783fa9dSkrw return (EIO); 1649783fa9dSkrw } 1659783fa9dSkrw offset += hib->chunktable_offset; 1669783fa9dSkrw break; 1679783fa9dSkrw case IO_TYPE_SIG: 1689783fa9dSkrw if (offset != hib->sig_offset || size != hib->sec_size) { 1699783fa9dSkrw printf("%s: signature write is out of bounds: " 1709783fa9dSkrw "offset-sig=%lld, offset-write=%lld, blks=%llu\n", 1719783fa9dSkrw __func__, hib->sig_offset, offset, blks); 1729783fa9dSkrw return (EIO); 1739783fa9dSkrw } 1749783fa9dSkrw break; 1759783fa9dSkrw default: 1769783fa9dSkrw printf("%s: unsupported io type %d\n", __func__, io_type); 1779783fa9dSkrw return (EINVAL); 1789783fa9dSkrw } 1799783fa9dSkrw 1809783fa9dSkrw return (hib->io_func(hib->dev, offset, addr, size, HIB_W, 1819783fa9dSkrw hib->io_page)); 1829783fa9dSkrw } 1839783fa9dSkrw 184088aa6daSariane /* 18508170551Smlarkin * Sort hibernate memory ranges by ascending PA 18608170551Smlarkin */ 18708170551Smlarkin void 18808170551Smlarkin hibernate_sort_ranges(union hibernate_info *hib_info) 18908170551Smlarkin { 19008170551Smlarkin int i, j; 19108170551Smlarkin struct hibernate_memory_range *ranges; 19208170551Smlarkin paddr_t base, end; 19308170551Smlarkin 19408170551Smlarkin ranges = hib_info->ranges; 19508170551Smlarkin 19608170551Smlarkin for (i = 1; i < hib_info->nranges; i++) { 19708170551Smlarkin j = i; 19808170551Smlarkin while (j > 0 && ranges[j - 1].base > ranges[j].base) { 19908170551Smlarkin base = ranges[j].base; 20008170551Smlarkin end = ranges[j].end; 20108170551Smlarkin ranges[j].base = ranges[j - 1].base; 20208170551Smlarkin ranges[j].end = ranges[j - 1].end; 20308170551Smlarkin ranges[j - 1].base = base; 20408170551Smlarkin ranges[j - 1].end = end; 20508170551Smlarkin j--; 20608170551Smlarkin } 20708170551Smlarkin } 20808170551Smlarkin } 20908170551Smlarkin 21008170551Smlarkin /* 211088aa6daSariane * Compare hiballoc entries based on the address they manage. 212088aa6daSariane * 213088aa6daSariane * Since the address is fixed, relative to struct hiballoc_entry, 214088aa6daSariane * we just compare the hiballoc_entry pointers. 215088aa6daSariane */ 216088aa6daSariane static __inline int 217938d8250Sdlg hibe_cmp(const struct hiballoc_entry *l, const struct hiballoc_entry *r) 218088aa6daSariane { 2190d3478fdSakfaew vaddr_t vl = (vaddr_t)l; 2200d3478fdSakfaew vaddr_t vr = (vaddr_t)r; 2210d3478fdSakfaew 2220d3478fdSakfaew return vl < vr ? -1 : (vl > vr); 223088aa6daSariane } 224088aa6daSariane 225938d8250Sdlg RBT_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp) 226088aa6daSariane 227088aa6daSariane /* 228088aa6daSariane * Given a hiballoc entry, return the address it manages. 229088aa6daSariane */ 230088aa6daSariane static __inline void * 231088aa6daSariane hib_entry_to_addr(struct hiballoc_entry *entry) 232088aa6daSariane { 233088aa6daSariane caddr_t addr; 234088aa6daSariane 235088aa6daSariane addr = (caddr_t)entry; 236088aa6daSariane addr += HIB_SIZEOF(struct hiballoc_entry); 237088aa6daSariane return addr; 238088aa6daSariane } 239088aa6daSariane 240088aa6daSariane /* 241088aa6daSariane * Given an address, find the hiballoc that corresponds. 242088aa6daSariane */ 243088aa6daSariane static __inline struct hiballoc_entry* 244088aa6daSariane hib_addr_to_entry(void *addr_param) 245088aa6daSariane { 246088aa6daSariane caddr_t addr; 247088aa6daSariane 248088aa6daSariane addr = (caddr_t)addr_param; 249088aa6daSariane addr -= HIB_SIZEOF(struct hiballoc_entry); 250088aa6daSariane return (struct hiballoc_entry*)addr; 251088aa6daSariane } 252088aa6daSariane 253938d8250Sdlg RBT_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp); 254088aa6daSariane 255088aa6daSariane /* 256088aa6daSariane * Allocate memory from the arena. 257088aa6daSariane * 258088aa6daSariane * Returns NULL if no memory is available. 259088aa6daSariane */ 260088aa6daSariane void * 261088aa6daSariane hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz) 262088aa6daSariane { 263088aa6daSariane struct hiballoc_entry *entry, *new_entry; 264088aa6daSariane size_t find_sz; 265088aa6daSariane 266088aa6daSariane /* 267088aa6daSariane * Enforce alignment of HIB_ALIGN bytes. 268088aa6daSariane * 269088aa6daSariane * Note that, because the entry is put in front of the allocation, 270088aa6daSariane * 0-byte allocations are guaranteed a unique address. 271088aa6daSariane */ 272088aa6daSariane alloc_sz = roundup(alloc_sz, HIB_ALIGN); 273088aa6daSariane 274088aa6daSariane /* 275088aa6daSariane * Find an entry with hibe_space >= find_sz. 276088aa6daSariane * 277088aa6daSariane * If the root node is not large enough, we switch to tree traversal. 278088aa6daSariane * Because all entries are made at the bottom of the free space, 279088aa6daSariane * traversal from the end has a slightly better chance of yielding 280088aa6daSariane * a sufficiently large space. 281088aa6daSariane */ 282088aa6daSariane find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry); 283938d8250Sdlg entry = RBT_ROOT(hiballoc_addr, &arena->hib_addrs); 284088aa6daSariane if (entry != NULL && entry->hibe_space < find_sz) { 285938d8250Sdlg RBT_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) { 286088aa6daSariane if (entry->hibe_space >= find_sz) 287088aa6daSariane break; 288088aa6daSariane } 289088aa6daSariane } 290088aa6daSariane 291088aa6daSariane /* 292088aa6daSariane * Insufficient or too fragmented memory. 293088aa6daSariane */ 294088aa6daSariane if (entry == NULL) 295088aa6daSariane return NULL; 296088aa6daSariane 297088aa6daSariane /* 298088aa6daSariane * Create new entry in allocated space. 299088aa6daSariane */ 300088aa6daSariane new_entry = (struct hiballoc_entry*)( 301088aa6daSariane (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use); 302088aa6daSariane new_entry->hibe_space = entry->hibe_space - find_sz; 303088aa6daSariane new_entry->hibe_use = alloc_sz; 304088aa6daSariane 305088aa6daSariane /* 306088aa6daSariane * Insert entry. 307088aa6daSariane */ 308938d8250Sdlg if (RBT_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL) 309088aa6daSariane panic("hib_alloc: insert failure"); 310088aa6daSariane entry->hibe_space = 0; 311088aa6daSariane 312088aa6daSariane /* Return address managed by entry. */ 313088aa6daSariane return hib_entry_to_addr(new_entry); 314088aa6daSariane } 315088aa6daSariane 31687dd1dd0Sderaadt void 31787dd1dd0Sderaadt hib_getentropy(char **bufp, size_t *bufplen) 31887dd1dd0Sderaadt { 319464029d7Smlarkin if (!bufp || !bufplen) 320464029d7Smlarkin return; 321464029d7Smlarkin 322464029d7Smlarkin *bufp = (char *)(global_piglet_va + (29 * PAGE_SIZE)); 323464029d7Smlarkin *bufplen = PAGE_SIZE; 32487dd1dd0Sderaadt } 32587dd1dd0Sderaadt 326088aa6daSariane /* 327088aa6daSariane * Free a pointer previously allocated from this arena. 328088aa6daSariane * 329088aa6daSariane * If addr is NULL, this will be silently accepted. 330088aa6daSariane */ 331088aa6daSariane void 332088aa6daSariane hib_free(struct hiballoc_arena *arena, void *addr) 333088aa6daSariane { 334088aa6daSariane struct hiballoc_entry *entry, *prev; 335088aa6daSariane 336088aa6daSariane if (addr == NULL) 337088aa6daSariane return; 338088aa6daSariane 339088aa6daSariane /* 340088aa6daSariane * Derive entry from addr and check it is really in this arena. 341088aa6daSariane */ 342088aa6daSariane entry = hib_addr_to_entry(addr); 343938d8250Sdlg if (RBT_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry) 344088aa6daSariane panic("hib_free: freed item %p not in hib arena", addr); 345088aa6daSariane 346088aa6daSariane /* 347088aa6daSariane * Give the space in entry to its predecessor. 348088aa6daSariane * 349088aa6daSariane * If entry has no predecessor, change its used space into free space 350088aa6daSariane * instead. 351088aa6daSariane */ 352938d8250Sdlg prev = RBT_PREV(hiballoc_addr, entry); 353088aa6daSariane if (prev != NULL && 354088aa6daSariane (void *)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) + 355088aa6daSariane prev->hibe_use + prev->hibe_space) == entry) { 356088aa6daSariane /* Merge entry. */ 357938d8250Sdlg RBT_REMOVE(hiballoc_addr, &arena->hib_addrs, entry); 358088aa6daSariane prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) + 359088aa6daSariane entry->hibe_use + entry->hibe_space; 360088aa6daSariane } else { 361088aa6daSariane /* Flip used memory to free space. */ 362088aa6daSariane entry->hibe_space += entry->hibe_use; 363088aa6daSariane entry->hibe_use = 0; 364088aa6daSariane } 365088aa6daSariane } 366088aa6daSariane 367088aa6daSariane /* 368088aa6daSariane * Initialize hiballoc. 369088aa6daSariane * 370678831beSjsg * The allocator will manage memory at ptr, which is len bytes. 371088aa6daSariane */ 372088aa6daSariane int 373088aa6daSariane hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len) 374088aa6daSariane { 375088aa6daSariane struct hiballoc_entry *entry; 376088aa6daSariane caddr_t ptr; 377088aa6daSariane size_t len; 378088aa6daSariane 379938d8250Sdlg RBT_INIT(hiballoc_addr, &arena->hib_addrs); 380088aa6daSariane 381088aa6daSariane /* 382088aa6daSariane * Hib allocator enforces HIB_ALIGN alignment. 383088aa6daSariane * Fixup ptr and len. 384088aa6daSariane */ 385088aa6daSariane ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN); 386088aa6daSariane len = p_len - ((size_t)ptr - (size_t)p_ptr); 387088aa6daSariane len &= ~((size_t)HIB_ALIGN - 1); 388088aa6daSariane 389088aa6daSariane /* 390088aa6daSariane * Insufficient memory to be able to allocate and also do bookkeeping. 391088aa6daSariane */ 392088aa6daSariane if (len <= HIB_SIZEOF(struct hiballoc_entry)) 393088aa6daSariane return ENOMEM; 394088aa6daSariane 395088aa6daSariane /* 396088aa6daSariane * Create entry describing space. 397088aa6daSariane */ 398088aa6daSariane entry = (struct hiballoc_entry*)ptr; 399088aa6daSariane entry->hibe_use = 0; 400088aa6daSariane entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry); 401938d8250Sdlg RBT_INSERT(hiballoc_addr, &arena->hib_addrs, entry); 402088aa6daSariane 403088aa6daSariane return 0; 404088aa6daSariane } 405b3ad10faSariane 406b3ad10faSariane /* 4070547f1a4Sariane * Mark all memory as dirty. 4080547f1a4Sariane * 409a569c98aSkrw * Used to inform the system that there are no pre-zero'd (PG_ZERO) free pages 410a569c98aSkrw * when we came back from hibernate. 4110547f1a4Sariane */ 4120547f1a4Sariane void 4130547f1a4Sariane uvm_pmr_dirty_everything(void) 4140547f1a4Sariane { 4150547f1a4Sariane struct uvm_pmemrange *pmr; 4160547f1a4Sariane struct vm_page *pg; 4170547f1a4Sariane int i; 4180547f1a4Sariane 4190547f1a4Sariane uvm_lock_fpageq(); 4200547f1a4Sariane TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 4210547f1a4Sariane /* Dirty single pages. */ 4220547f1a4Sariane while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO])) 4230547f1a4Sariane != NULL) { 4240547f1a4Sariane uvm_pmr_remove(pmr, pg); 4250547f1a4Sariane atomic_clearbits_int(&pg->pg_flags, PG_ZERO); 4260547f1a4Sariane uvm_pmr_insert(pmr, pg, 0); 4270547f1a4Sariane } 4280547f1a4Sariane 4290547f1a4Sariane /* Dirty multi page ranges. */ 430262a556aSdlg while ((pg = RBT_ROOT(uvm_pmr_size, 431262a556aSdlg &pmr->size[UVM_PMR_MEMTYPE_ZERO])) != NULL) { 4320547f1a4Sariane pg--; /* Size tree always has second page. */ 4330547f1a4Sariane uvm_pmr_remove(pmr, pg); 43482c7a60eSariane for (i = 0; i < pg->fpgsz; i++) 4350547f1a4Sariane atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO); 4360547f1a4Sariane uvm_pmr_insert(pmr, pg, 0); 4370547f1a4Sariane } 4380547f1a4Sariane } 4390547f1a4Sariane 4400547f1a4Sariane uvmexp.zeropages = 0; 4410547f1a4Sariane uvm_unlock_fpageq(); 4420547f1a4Sariane } 4430547f1a4Sariane 4440547f1a4Sariane /* 445b0c80555Skettenis * Allocate an area that can hold sz bytes and doesn't overlap with 446b0c80555Skettenis * the piglet at piglet_pa. 447485235ccSariane */ 448485235ccSariane int 449b0c80555Skettenis uvm_pmr_alloc_pig(paddr_t *pa, psize_t sz, paddr_t piglet_pa) 450485235ccSariane { 451b0c80555Skettenis struct uvm_constraint_range pig_constraint; 452b0c80555Skettenis struct kmem_pa_mode kp_pig = { 453b0c80555Skettenis .kp_constraint = &pig_constraint, 454b0c80555Skettenis .kp_maxseg = 1 455b0c80555Skettenis }; 456b0c80555Skettenis vaddr_t va; 4570f0ce22aSariane 458b0c80555Skettenis sz = round_page(sz); 459485235ccSariane 460b0c80555Skettenis pig_constraint.ucr_low = piglet_pa + 4 * HIBERNATE_CHUNK_SIZE; 461b0c80555Skettenis pig_constraint.ucr_high = -1; 4620f0ce22aSariane 463b0c80555Skettenis va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait); 464b0c80555Skettenis if (va == 0) { 465b0c80555Skettenis pig_constraint.ucr_low = 0; 466b0c80555Skettenis pig_constraint.ucr_high = piglet_pa - 1; 467485235ccSariane 468b0c80555Skettenis va = (vaddr_t)km_alloc(sz, &kv_any, &kp_pig, &kd_nowait); 469b0c80555Skettenis if (va == 0) 4700f0ce22aSariane return ENOMEM; 471485235ccSariane } 472485235ccSariane 473b0c80555Skettenis pmap_extract(pmap_kernel(), va, pa); 4740f0ce22aSariane return 0; 4750f0ce22aSariane } 4760f0ce22aSariane 4770f0ce22aSariane /* 4780f0ce22aSariane * Allocate a piglet area. 4790f0ce22aSariane * 480b0c80555Skettenis * This needs to be in DMA-safe memory. 4810f0ce22aSariane * Piglets are aligned. 4820f0ce22aSariane * 4830f0ce22aSariane * sz and align in bytes. 4840f0ce22aSariane */ 4850f0ce22aSariane int 48620703d53Smlarkin uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align) 4870f0ce22aSariane { 488b0c80555Skettenis struct kmem_pa_mode kp_piglet = { 489b0c80555Skettenis .kp_constraint = &dma_constraint, 490b0c80555Skettenis .kp_align = align, 491b0c80555Skettenis .kp_maxseg = 1 492b0c80555Skettenis }; 4930f0ce22aSariane 494c558d415Smlarkin /* Ensure align is a power of 2 */ 4950f0ce22aSariane KASSERT((align & (align - 1)) == 0); 496c558d415Smlarkin 4970f0ce22aSariane /* 4980f0ce22aSariane * Fixup arguments: align must be at least PAGE_SIZE, 4990f0ce22aSariane * sz will be converted to pagecount, since that is what 5000f0ce22aSariane * pmemrange uses internally. 5010f0ce22aSariane */ 5020f0ce22aSariane if (align < PAGE_SIZE) 503d06b9c1eSmlarkin kp_piglet.kp_align = PAGE_SIZE; 504d06b9c1eSmlarkin 50520703d53Smlarkin sz = round_page(sz); 5060f0ce22aSariane 507b0c80555Skettenis *va = (vaddr_t)km_alloc(sz, &kv_any, &kp_piglet, &kd_nowait); 508b0c80555Skettenis if (*va == 0) 5090f0ce22aSariane return ENOMEM; 5100f0ce22aSariane 511b0c80555Skettenis pmap_extract(pmap_kernel(), *va, pa); 5120f0ce22aSariane return 0; 513485235ccSariane } 514f0898735Sariane 515f0898735Sariane /* 51620703d53Smlarkin * Free a piglet area. 51720703d53Smlarkin */ 51820703d53Smlarkin void 51920703d53Smlarkin uvm_pmr_free_piglet(vaddr_t va, vsize_t sz) 52020703d53Smlarkin { 52120703d53Smlarkin /* 52220703d53Smlarkin * Fix parameters. 52320703d53Smlarkin */ 52420703d53Smlarkin sz = round_page(sz); 52520703d53Smlarkin 52620703d53Smlarkin /* 52720703d53Smlarkin * Free the physical and virtual memory. 52820703d53Smlarkin */ 529b0c80555Skettenis km_free((void *)va, sz, &kv_any, &kp_dma_contig); 53020703d53Smlarkin } 53120703d53Smlarkin 53220703d53Smlarkin /* 533f0898735Sariane * Physmem RLE compression support. 534f0898735Sariane * 535b42f10c6Smlarkin * Given a physical page address, return the number of pages starting at the 536b42f10c6Smlarkin * address that are free. Clamps to the number of pages in 53755d14f4eSmlarkin * HIBERNATE_CHUNK_SIZE. Returns 0 if the page at addr is not free. 538f0898735Sariane */ 53955d14f4eSmlarkin int 540f0898735Sariane uvm_page_rle(paddr_t addr) 541f0898735Sariane { 542f0898735Sariane struct vm_page *pg, *pg_end; 543f0898735Sariane struct vm_physseg *vmp; 544f0898735Sariane int pseg_idx, off_idx; 545f0898735Sariane 546f0898735Sariane pseg_idx = vm_physseg_find(atop(addr), &off_idx); 547f0898735Sariane if (pseg_idx == -1) 548f0898735Sariane return 0; 549f0898735Sariane 550f0898735Sariane vmp = &vm_physmem[pseg_idx]; 551f0898735Sariane pg = &vmp->pgs[off_idx]; 552f0898735Sariane if (!(pg->pg_flags & PQ_FREE)) 553f0898735Sariane return 0; 554f0898735Sariane 555f0898735Sariane /* 556f0898735Sariane * Search for the first non-free page after pg. 557f0898735Sariane * Note that the page may not be the first page in a free pmemrange, 558f0898735Sariane * therefore pg->fpgsz cannot be used. 559f0898735Sariane */ 560f0898735Sariane for (pg_end = pg; pg_end <= vmp->lastpg && 56156bc0efbSderaadt (pg_end->pg_flags & PQ_FREE) == PQ_FREE && 56256bc0efbSderaadt (pg_end - pg) < HIBERNATE_CHUNK_SIZE/PAGE_SIZE; pg_end++) 56351c66804Sderaadt ; 56456bc0efbSderaadt return pg_end - pg; 565f0898735Sariane } 56610aaff22Smlarkin 56710aaff22Smlarkin /* 5687bbeffb6Smlarkin * Fills out the hibernate_info union pointed to by hib 56910aaff22Smlarkin * with information about this machine (swap signature block 57010aaff22Smlarkin * offsets, number of memory ranges, kernel in use, etc) 57110aaff22Smlarkin */ 57210aaff22Smlarkin int 573908847d1Sderaadt get_hibernate_info(union hibernate_info *hib, int suspend) 57410aaff22Smlarkin { 57510aaff22Smlarkin struct disklabel dl; 57610aaff22Smlarkin char err_string[128], *dl_ret; 5773fed8fd5Sguenther int part; 578116c1678Smlarkin SHA2_CTX ctx; 579116c1678Smlarkin void *fn; 58010aaff22Smlarkin 581bd831450Smlarkin #ifndef NO_PROPOLICE 582bd831450Smlarkin /* Save propolice guard */ 583bd831450Smlarkin hib->guard = __guard_local; 584bd831450Smlarkin #endif /* ! NO_PROPOLICE */ 585bd831450Smlarkin 58610aaff22Smlarkin /* Determine I/O function to use */ 587a74a190bSjsg hib->io_func = get_hibernate_io_function(swdevt[0]); 588908847d1Sderaadt if (hib->io_func == NULL) 58910aaff22Smlarkin return (1); 59010aaff22Smlarkin 59110aaff22Smlarkin /* Calculate hibernate device */ 592a74a190bSjsg hib->dev = swdevt[0]; 59310aaff22Smlarkin 59410aaff22Smlarkin /* Read disklabel (used to calculate signature and image offsets) */ 5957b90f98fSmlarkin dl_ret = disk_readlabel(&dl, hib->dev, err_string, sizeof(err_string)); 59610aaff22Smlarkin 59710aaff22Smlarkin if (dl_ret) { 59810aaff22Smlarkin printf("Hibernate error reading disklabel: %s\n", dl_ret); 59910aaff22Smlarkin return (1); 60010aaff22Smlarkin } 60110aaff22Smlarkin 602a8dcd0fcSjsing /* Make sure we have a swap partition. */ 6033fed8fd5Sguenther part = DISKPART(hib->dev); 6043fed8fd5Sguenther if (dl.d_npartitions <= part || 605fc6d48fdSkrw dl.d_secsize > sizeof(union hibernate_info) || 6063fed8fd5Sguenther dl.d_partitions[part].p_fstype != FS_SWAP || 6073fed8fd5Sguenther DL_GETPSIZE(&dl.d_partitions[part]) == 0) 60886840a47Smlarkin return (1); 60986840a47Smlarkin 61086840a47Smlarkin /* Magic number */ 611908847d1Sderaadt hib->magic = HIBERNATE_MAGIC; 61210aaff22Smlarkin 61310aaff22Smlarkin /* Calculate signature block location */ 614fc6d48fdSkrw hib->sec_size = dl.d_secsize; 615fc6d48fdSkrw hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[part]) - 1; 616fc6d48fdSkrw hib->sig_offset = DL_SECTOBLK(&dl, hib->sig_offset); 61710aaff22Smlarkin 618116c1678Smlarkin SHA256Init(&ctx); 619116c1678Smlarkin SHA256Update(&ctx, version, strlen(version)); 620116c1678Smlarkin fn = printf; 621116c1678Smlarkin SHA256Update(&ctx, &fn, sizeof(fn)); 622116c1678Smlarkin fn = malloc; 623116c1678Smlarkin SHA256Update(&ctx, &fn, sizeof(fn)); 624116c1678Smlarkin fn = km_alloc; 625116c1678Smlarkin SHA256Update(&ctx, &fn, sizeof(fn)); 626116c1678Smlarkin fn = strlen; 627116c1678Smlarkin SHA256Update(&ctx, &fn, sizeof(fn)); 628116c1678Smlarkin SHA256Final((u_int8_t *)&hib->kern_hash, &ctx); 62920703d53Smlarkin 63020703d53Smlarkin if (suspend) { 631259dfafeSmlarkin /* Grab the previously-allocated piglet addresses */ 632b0c80555Skettenis hib->piglet_va = global_piglet_va; 633b0c80555Skettenis hib->piglet_pa = global_piglet_pa; 634908847d1Sderaadt hib->io_page = (void *)hib->piglet_va; 63589fc03e5Sjmatthew 63689fc03e5Sjmatthew /* 637b42f10c6Smlarkin * Initialization of the hibernate IO function for drivers 638b42f10c6Smlarkin * that need to do prep work (such as allocating memory or 639b42f10c6Smlarkin * setting up data structures that cannot safely be done 640b42f10c6Smlarkin * during suspend without causing side effects). There is 641b42f10c6Smlarkin * a matching HIB_DONE call performed after the write is 642b42f10c6Smlarkin * completed. 64389fc03e5Sjmatthew */ 644fc6d48fdSkrw if (hib->io_func(hib->dev, 645fc6d48fdSkrw DL_SECTOBLK(&dl, DL_GETPOFFSET(&dl.d_partitions[part])), 646fc6d48fdSkrw (vaddr_t)NULL, 647fc6d48fdSkrw DL_SECTOBLK(&dl, DL_GETPSIZE(&dl.d_partitions[part])), 648908847d1Sderaadt HIB_INIT, hib->io_page)) 64989fc03e5Sjmatthew goto fail; 65089fc03e5Sjmatthew 65142015ed9Smlarkin } else { 65242015ed9Smlarkin /* 653965a4b2aSmlarkin * Resuming kernels use a regular private page for the driver 654965a4b2aSmlarkin * No need to free this I/O page as it will vanish as part of 655965a4b2aSmlarkin * the resume. 65642015ed9Smlarkin */ 657908847d1Sderaadt hib->io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 658908847d1Sderaadt if (!hib->io_page) 659965a4b2aSmlarkin goto fail; 66020703d53Smlarkin } 66120703d53Smlarkin 662908847d1Sderaadt if (get_hibernate_info_md(hib)) 66342015ed9Smlarkin goto fail; 66420703d53Smlarkin 66520703d53Smlarkin return (0); 66644eec4e2Smlarkin 667b0c80555Skettenis fail: 66842015ed9Smlarkin return (1); 66910aaff22Smlarkin } 67022378a8fSmlarkin 67122378a8fSmlarkin /* 67222378a8fSmlarkin * Allocate nitems*size bytes from the hiballoc area presently in use 67322378a8fSmlarkin */ 6743843cbddSpirofti void * 6753843cbddSpirofti hibernate_zlib_alloc(void *unused, int nitems, int size) 67622378a8fSmlarkin { 6773a85c2afSmlarkin struct hibernate_zlib_state *hibernate_state; 6783a85c2afSmlarkin 6793843cbddSpirofti hibernate_state = 6803843cbddSpirofti (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; 6813a85c2afSmlarkin 68222378a8fSmlarkin return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size); 68322378a8fSmlarkin } 68422378a8fSmlarkin 68522378a8fSmlarkin /* 686627cf4e5Smlarkin * Free the memory pointed to by addr in the hiballoc area presently in 687627cf4e5Smlarkin * use 688627cf4e5Smlarkin */ 689627cf4e5Smlarkin void 690627cf4e5Smlarkin hibernate_zlib_free(void *unused, void *addr) 691627cf4e5Smlarkin { 6923a85c2afSmlarkin struct hibernate_zlib_state *hibernate_state; 6933a85c2afSmlarkin 6943843cbddSpirofti hibernate_state = 6953843cbddSpirofti (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; 6963a85c2afSmlarkin 697627cf4e5Smlarkin hib_free(&hibernate_state->hiballoc_arena, addr); 698627cf4e5Smlarkin } 699627cf4e5Smlarkin 700627cf4e5Smlarkin /* 7017882bc75Smlarkin * Inflate next page of data from the image stream. 7027882bc75Smlarkin * The rle parameter is modified on exit to contain the number of pages to 7037882bc75Smlarkin * skip in the output stream (or 0 if this page was inflated into). 7047882bc75Smlarkin * 7057882bc75Smlarkin * Returns 0 if the stream contains additional data, or 1 if the stream is 7067882bc75Smlarkin * finished. 7079783ed43Smlarkin */ 7089783ed43Smlarkin int 7097882bc75Smlarkin hibernate_inflate_page(int *rle) 7109783ed43Smlarkin { 7119783ed43Smlarkin struct hibernate_zlib_state *hibernate_state; 7129783ed43Smlarkin int i; 7139783ed43Smlarkin 7143843cbddSpirofti hibernate_state = 7153843cbddSpirofti (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; 7169783ed43Smlarkin 7177882bc75Smlarkin /* Set up the stream for RLE code inflate */ 718b498c5a9Smlarkin hibernate_state->hib_stream.next_out = (unsigned char *)rle; 7197882bc75Smlarkin hibernate_state->hib_stream.avail_out = sizeof(*rle); 7207882bc75Smlarkin 7217882bc75Smlarkin /* Inflate RLE code */ 7227882bc75Smlarkin i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH); 7237882bc75Smlarkin if (i != Z_OK && i != Z_STREAM_END) { 7247882bc75Smlarkin /* 7257882bc75Smlarkin * XXX - this will likely reboot/hang most machines 7267882bc75Smlarkin * since the console output buffer will be unmapped, 7277882bc75Smlarkin * but there's not much else we can do here. 7287882bc75Smlarkin */ 7297882bc75Smlarkin panic("rle inflate stream error"); 7307882bc75Smlarkin } 7317882bc75Smlarkin 7327882bc75Smlarkin if (hibernate_state->hib_stream.avail_out != 0) { 7337882bc75Smlarkin /* 7347882bc75Smlarkin * XXX - this will likely reboot/hang most machines 7357882bc75Smlarkin * since the console output buffer will be unmapped, 7367882bc75Smlarkin * but there's not much else we can do here. 7377882bc75Smlarkin */ 7387882bc75Smlarkin panic("rle short inflate error"); 7397882bc75Smlarkin } 7407882bc75Smlarkin 7417882bc75Smlarkin if (*rle < 0 || *rle > 1024) { 7427882bc75Smlarkin /* 7437882bc75Smlarkin * XXX - this will likely reboot/hang most machines 7447882bc75Smlarkin * since the console output buffer will be unmapped, 7457882bc75Smlarkin * but there's not much else we can do here. 7467882bc75Smlarkin */ 7477882bc75Smlarkin panic("invalid rle count"); 7487882bc75Smlarkin } 7497882bc75Smlarkin 7507882bc75Smlarkin if (i == Z_STREAM_END) 7517882bc75Smlarkin return (1); 7527882bc75Smlarkin 7537882bc75Smlarkin if (*rle != 0) 7547882bc75Smlarkin return (0); 7557882bc75Smlarkin 7567882bc75Smlarkin /* Set up the stream for page inflate */ 757b498c5a9Smlarkin hibernate_state->hib_stream.next_out = 758b498c5a9Smlarkin (unsigned char *)HIBERNATE_INFLATE_PAGE; 7599783ed43Smlarkin hibernate_state->hib_stream.avail_out = PAGE_SIZE; 7609783ed43Smlarkin 7619783ed43Smlarkin /* Process next block of data */ 762527cac00Smlarkin i = inflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH); 7639783ed43Smlarkin if (i != Z_OK && i != Z_STREAM_END) { 7649783ed43Smlarkin /* 765b42f10c6Smlarkin * XXX - this will likely reboot/hang most machines 766b42f10c6Smlarkin * since the console output buffer will be unmapped, 7679783ed43Smlarkin * but there's not much else we can do here. 7689783ed43Smlarkin */ 7699783ed43Smlarkin panic("inflate error"); 7709783ed43Smlarkin } 7719783ed43Smlarkin 7729783ed43Smlarkin /* We should always have extracted a full page ... */ 773b42f10c6Smlarkin if (hibernate_state->hib_stream.avail_out != 0) { 774b42f10c6Smlarkin /* 775b42f10c6Smlarkin * XXX - this will likely reboot/hang most machines 776b42f10c6Smlarkin * since the console output buffer will be unmapped, 777b42f10c6Smlarkin * but there's not much else we can do here. 778b42f10c6Smlarkin */ 7799783ed43Smlarkin panic("incomplete page"); 780b42f10c6Smlarkin } 7819783ed43Smlarkin 7829783ed43Smlarkin return (i == Z_STREAM_END); 7839783ed43Smlarkin } 7849783ed43Smlarkin 7859783ed43Smlarkin /* 7869783ed43Smlarkin * Inflate size bytes from src into dest, skipping any pages in 7879783ed43Smlarkin * [src..dest] that are special (see hibernate_inflate_skip) 7889783ed43Smlarkin * 7899783ed43Smlarkin * This function executes while using the resume-time stack 7909783ed43Smlarkin * and pmap, and therefore cannot use ddb/printf/etc. Doing so 791b42f10c6Smlarkin * will likely hang or reset the machine since the console output buffer 792b42f10c6Smlarkin * will be unmapped. 7939783ed43Smlarkin */ 7949783ed43Smlarkin void 795908847d1Sderaadt hibernate_inflate_region(union hibernate_info *hib, paddr_t dest, 7969783ed43Smlarkin paddr_t src, size_t size) 7979783ed43Smlarkin { 79865052cacSmlarkin int end_stream = 0, rle, skip; 7999783ed43Smlarkin struct hibernate_zlib_state *hibernate_state; 8009783ed43Smlarkin 8013843cbddSpirofti hibernate_state = 8023843cbddSpirofti (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; 8039783ed43Smlarkin 804b498c5a9Smlarkin hibernate_state->hib_stream.next_in = (unsigned char *)src; 8059783ed43Smlarkin hibernate_state->hib_stream.avail_in = size; 8069783ed43Smlarkin 8079783ed43Smlarkin do { 808d9e6d5c0Smlarkin /* 809d9e6d5c0Smlarkin * Is this a special page? If yes, redirect the 810d9e6d5c0Smlarkin * inflate output to a scratch page (eg, discard it) 811d9e6d5c0Smlarkin */ 81265052cacSmlarkin skip = hibernate_inflate_skip(hib, dest); 81365052cacSmlarkin if (skip == HIB_SKIP) { 814d9e6d5c0Smlarkin hibernate_enter_resume_mapping( 815d9e6d5c0Smlarkin HIBERNATE_INFLATE_PAGE, 816d9e6d5c0Smlarkin HIBERNATE_INFLATE_PAGE, 0); 81765052cacSmlarkin } else if (skip == HIB_MOVE) { 81865052cacSmlarkin /* 81965052cacSmlarkin * Special case : retguard region. This gets moved 82065052cacSmlarkin * temporarily into the piglet region and copied into 82165052cacSmlarkin * place immediately before resume 82265052cacSmlarkin */ 82365052cacSmlarkin hibernate_enter_resume_mapping( 82465052cacSmlarkin HIBERNATE_INFLATE_PAGE, 82565052cacSmlarkin hib->piglet_pa + (110 * PAGE_SIZE) + 82665052cacSmlarkin hib->retguard_ofs, 0); 82765052cacSmlarkin hib->retguard_ofs += PAGE_SIZE; 828a552a180Smlarkin if (hib->retguard_ofs > 255 * PAGE_SIZE) { 829a552a180Smlarkin /* 830a552a180Smlarkin * XXX - this will likely reboot/hang most 831a552a180Smlarkin * machines since the console output 832a552a180Smlarkin * buffer will be unmapped, but there's 833a552a180Smlarkin * not much else we can do here. 834a552a180Smlarkin */ 835a552a180Smlarkin panic("retguard move error, out of space"); 836a552a180Smlarkin } 8373a85c2afSmlarkin } else { 838d9e6d5c0Smlarkin hibernate_enter_resume_mapping( 839d9e6d5c0Smlarkin HIBERNATE_INFLATE_PAGE, dest, 0); 8403a85c2afSmlarkin } 841d9e6d5c0Smlarkin 842d9e6d5c0Smlarkin hibernate_flush(); 8437882bc75Smlarkin end_stream = hibernate_inflate_page(&rle); 844d588479bSmlarkin 8457882bc75Smlarkin if (rle == 0) 8469783ed43Smlarkin dest += PAGE_SIZE; 8477882bc75Smlarkin else 8487882bc75Smlarkin dest += (rle * PAGE_SIZE); 8499783ed43Smlarkin } while (!end_stream); 850d588479bSmlarkin } 851d588479bSmlarkin 852d588479bSmlarkin /* 853d588479bSmlarkin * deflate from src into the I/O page, up to 'remaining' bytes 854d588479bSmlarkin * 855d588479bSmlarkin * Returns number of input bytes consumed, and may reset 856d588479bSmlarkin * the 'remaining' parameter if not all the output space was consumed 857cde4819cSmlarkin * (this information is needed to know how much to write to disk) 858d588479bSmlarkin */ 859d588479bSmlarkin size_t 860908847d1Sderaadt hibernate_deflate(union hibernate_info *hib, paddr_t src, 86120703d53Smlarkin size_t *remaining) 862d588479bSmlarkin { 863908847d1Sderaadt vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE; 8643a85c2afSmlarkin struct hibernate_zlib_state *hibernate_state; 8653a85c2afSmlarkin 8663843cbddSpirofti hibernate_state = 8673843cbddSpirofti (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; 86820703d53Smlarkin 869d588479bSmlarkin /* Set up the stream for deflate */ 870b498c5a9Smlarkin hibernate_state->hib_stream.next_in = (unsigned char *)src; 871aa7ef211Sderaadt hibernate_state->hib_stream.avail_in = PAGE_SIZE - (src & PAGE_MASK); 872b498c5a9Smlarkin hibernate_state->hib_stream.next_out = 873b498c5a9Smlarkin (unsigned char *)hibernate_io_page + (PAGE_SIZE - *remaining); 874aa7ef211Sderaadt hibernate_state->hib_stream.avail_out = *remaining; 875d588479bSmlarkin 876d588479bSmlarkin /* Process next block of data */ 877527cac00Smlarkin if (deflate(&hibernate_state->hib_stream, Z_SYNC_FLUSH) != Z_OK) 878af7dea42Sjasper panic("hibernate zlib deflate error"); 879d588479bSmlarkin 880d588479bSmlarkin /* Update pointers and return number of bytes consumed */ 881d588479bSmlarkin *remaining = hibernate_state->hib_stream.avail_out; 882d588479bSmlarkin return (PAGE_SIZE - (src & PAGE_MASK)) - 883d588479bSmlarkin hibernate_state->hib_stream.avail_in; 88422378a8fSmlarkin } 885627cf4e5Smlarkin 886627cf4e5Smlarkin /* 887627cf4e5Smlarkin * Write the hibernation information specified in hiber_info 888627cf4e5Smlarkin * to the location in swap previously calculated (last block of 889627cf4e5Smlarkin * swap), called the "signature block". 890627cf4e5Smlarkin */ 891627cf4e5Smlarkin int 892908847d1Sderaadt hibernate_write_signature(union hibernate_info *hib) 893627cf4e5Smlarkin { 894fc6d48fdSkrw memset(&disk_hib, 0, hib->sec_size); 895fc6d48fdSkrw memcpy(&disk_hib, hib, DEV_BSIZE); 896fc6d48fdSkrw 897627cf4e5Smlarkin /* Write hibernate info to disk */ 8989783fa9dSkrw return (hibernate_write(hib, hib->sig_offset, 8999783fa9dSkrw (vaddr_t)&disk_hib, hib->sec_size, IO_TYPE_SIG)); 90020703d53Smlarkin } 90120703d53Smlarkin 90220703d53Smlarkin /* 90320703d53Smlarkin * Write the memory chunk table to the area in swap immediately 90420703d53Smlarkin * preceding the signature block. The chunk table is stored 905b687eefdSderaadt * in the piglet when this function is called. Returns errno. 90620703d53Smlarkin */ 90720703d53Smlarkin int 908908847d1Sderaadt hibernate_write_chunktable(union hibernate_info *hib) 90920703d53Smlarkin { 91020703d53Smlarkin vaddr_t hibernate_chunk_table_start; 91120703d53Smlarkin size_t hibernate_chunk_table_size; 91296cbc698Smlarkin int i, err; 91320703d53Smlarkin 91420703d53Smlarkin hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE; 915627cf4e5Smlarkin 916908847d1Sderaadt hibernate_chunk_table_start = hib->piglet_va + 91720703d53Smlarkin HIBERNATE_CHUNK_SIZE; 91820703d53Smlarkin 919627cf4e5Smlarkin /* Write chunk table */ 92020703d53Smlarkin for (i = 0; i < hibernate_chunk_table_size; i += MAXPHYS) { 9219783fa9dSkrw if ((err = hibernate_write(hib, btodb(i), 92220703d53Smlarkin (vaddr_t)(hibernate_chunk_table_start + i), 9239783fa9dSkrw MAXPHYS, IO_TYPE_CHK))) { 92496cbc698Smlarkin DPRINTF("chunktable write error: %d\n", err); 925b687eefdSderaadt return (err); 92620703d53Smlarkin } 92796cbc698Smlarkin } 928627cf4e5Smlarkin 929627cf4e5Smlarkin return (0); 930627cf4e5Smlarkin } 931627cf4e5Smlarkin 932627cf4e5Smlarkin /* 933627cf4e5Smlarkin * Write an empty hiber_info to the swap signature block, which is 934908847d1Sderaadt * guaranteed to not match any valid hib. 935627cf4e5Smlarkin */ 936627cf4e5Smlarkin int 9378cae4923Sguenther hibernate_clear_signature(union hibernate_info *hib) 938627cf4e5Smlarkin { 939fc6d48fdSkrw uint8_t buf[DEV_BSIZE]; 940627cf4e5Smlarkin 941627cf4e5Smlarkin /* Zero out a blank hiber_info */ 942fc6d48fdSkrw memcpy(&buf, &disk_hib, sizeof(buf)); 943fc6d48fdSkrw memset(&disk_hib, 0, hib->sec_size); 944627cf4e5Smlarkin 945627cf4e5Smlarkin /* Write (zeroed) hibernate info to disk */ 94696cbc698Smlarkin DPRINTF("clearing hibernate signature block location: %lld\n", 9478cae4923Sguenther hib->sig_offset); 9488cae4923Sguenther if (hibernate_block_io(hib, 9498cae4923Sguenther hib->sig_offset, 950fc6d48fdSkrw hib->sec_size, (vaddr_t)&disk_hib, 1)) 95186840a47Smlarkin printf("Warning: could not clear hibernate signature\n"); 952627cf4e5Smlarkin 953fc6d48fdSkrw memcpy(&disk_hib, buf, sizeof(buf)); 954627cf4e5Smlarkin return (0); 955627cf4e5Smlarkin } 956627cf4e5Smlarkin 957627cf4e5Smlarkin /* 958b514ebc1Smlarkin * Compare two hibernate_infos to determine if they are the same (eg, 959b514ebc1Smlarkin * we should be performing a hibernate resume on this machine. 960b514ebc1Smlarkin * Not all fields are checked - just enough to verify that the machine 961b514ebc1Smlarkin * has the same memory configuration and kernel as the one that 962b514ebc1Smlarkin * wrote the signature previously. 963b514ebc1Smlarkin */ 964b514ebc1Smlarkin int 965b514ebc1Smlarkin hibernate_compare_signature(union hibernate_info *mine, 966b514ebc1Smlarkin union hibernate_info *disk) 967b514ebc1Smlarkin { 968b514ebc1Smlarkin u_int i; 969b514ebc1Smlarkin 97086840a47Smlarkin if (mine->nranges != disk->nranges) { 971ac992051Smlarkin printf("unhibernate failed: memory layout changed\n"); 972b514ebc1Smlarkin return (1); 97386840a47Smlarkin } 974b514ebc1Smlarkin 975116c1678Smlarkin if (bcmp(mine->kern_hash, disk->kern_hash, SHA256_DIGEST_LENGTH) != 0) { 976ac992051Smlarkin printf("unhibernate failed: original kernel changed\n"); 977bee1d453Sderaadt return (1); 978bee1d453Sderaadt } 979bee1d453Sderaadt 980b514ebc1Smlarkin for (i = 0; i < mine->nranges; i++) { 981b514ebc1Smlarkin if ((mine->ranges[i].base != disk->ranges[i].base) || 98286840a47Smlarkin (mine->ranges[i].end != disk->ranges[i].end) ) { 98396cbc698Smlarkin DPRINTF("hib range %d mismatch [%p-%p != %p-%p]\n", 9849368744bSmlarkin i, 9859368744bSmlarkin (void *)mine->ranges[i].base, 9869368744bSmlarkin (void *)mine->ranges[i].end, 9879368744bSmlarkin (void *)disk->ranges[i].base, 9889368744bSmlarkin (void *)disk->ranges[i].end); 989ac992051Smlarkin printf("unhibernate failed: memory size changed\n"); 990b514ebc1Smlarkin return (1); 991b514ebc1Smlarkin } 99286840a47Smlarkin } 993b514ebc1Smlarkin 994b514ebc1Smlarkin return (0); 995b514ebc1Smlarkin } 996b514ebc1Smlarkin 99721e3fba6Smlarkin /* 99889fc03e5Sjmatthew * Transfers xfer_size bytes between the hibernate device specified in 99989fc03e5Sjmatthew * hib_info at offset blkctr and the vaddr specified at dest. 100021e3fba6Smlarkin * 100121e3fba6Smlarkin * Separate offsets and pages are used to handle misaligned reads (reads 100221e3fba6Smlarkin * that span a page boundary). 100321e3fba6Smlarkin * 100421e3fba6Smlarkin * blkctr specifies a relative offset (relative to the start of swap), 100521e3fba6Smlarkin * not an absolute disk offset 100621e3fba6Smlarkin * 100721e3fba6Smlarkin */ 100821e3fba6Smlarkin int 1009908847d1Sderaadt hibernate_block_io(union hibernate_info *hib, daddr_t blkctr, 101089fc03e5Sjmatthew size_t xfer_size, vaddr_t dest, int iswrite) 101121e3fba6Smlarkin { 101221e3fba6Smlarkin struct buf *bp; 101321e3fba6Smlarkin int error; 101421e3fba6Smlarkin 101589fc03e5Sjmatthew bp = geteblk(xfer_size); 101689fc03e5Sjmatthew if (iswrite) 101789fc03e5Sjmatthew bcopy((caddr_t)dest, bp->b_data, xfer_size); 101889fc03e5Sjmatthew 101989fc03e5Sjmatthew bp->b_bcount = xfer_size; 102021e3fba6Smlarkin bp->b_blkno = blkctr; 102121e3fba6Smlarkin CLR(bp->b_flags, B_READ | B_WRITE | B_DONE); 102289fc03e5Sjmatthew SET(bp->b_flags, B_BUSY | (iswrite ? B_WRITE : B_READ) | B_RAW); 1023eaf4a78cSderaadt bp->b_dev = hib->dev; 102421e3fba6Smlarkin (*bdsw->d_strategy)(bp); 102521e3fba6Smlarkin 102621e3fba6Smlarkin error = biowait(bp); 102721e3fba6Smlarkin if (error) { 10284d2d2571Smlarkin printf("hib block_io biowait error %d blk %lld size %zu\n", 10294d2d2571Smlarkin error, (long long)blkctr, xfer_size); 1030c3ed0588Skrw } else if (!iswrite) 103189fc03e5Sjmatthew bcopy(bp->b_data, (caddr_t)dest, xfer_size); 103221e3fba6Smlarkin 103321e3fba6Smlarkin bp->b_flags |= B_INVAL; 103421e3fba6Smlarkin brelse(bp); 103521e3fba6Smlarkin 1036c3ed0588Skrw return (error != 0); 103721e3fba6Smlarkin } 103821e3fba6Smlarkin 103920703d53Smlarkin /* 1040464029d7Smlarkin * Preserve one page worth of random data, generated from the resuming 1041464029d7Smlarkin * kernel's arc4random. After resume, this preserved entropy can be used 1042464029d7Smlarkin * to further improve the un-hibernated machine's entropy pool. This 1043464029d7Smlarkin * random data is stored in the piglet, which is preserved across the 1044464029d7Smlarkin * unpack operation, and is restored later in the resume process (see 1045464029d7Smlarkin * hib_getentropy) 1046464029d7Smlarkin */ 1047464029d7Smlarkin void 1048464029d7Smlarkin hibernate_preserve_entropy(union hibernate_info *hib) 1049464029d7Smlarkin { 1050464029d7Smlarkin void *entropy; 1051464029d7Smlarkin 1052464029d7Smlarkin entropy = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait); 1053464029d7Smlarkin 1054464029d7Smlarkin if (!entropy) 1055464029d7Smlarkin return; 1056464029d7Smlarkin 1057464029d7Smlarkin pmap_activate(curproc); 1058464029d7Smlarkin pmap_kenter_pa((vaddr_t)entropy, 1059464029d7Smlarkin (paddr_t)(hib->piglet_pa + (29 * PAGE_SIZE)), 1060464029d7Smlarkin PROT_READ | PROT_WRITE); 1061464029d7Smlarkin 1062464029d7Smlarkin arc4random_buf((void *)entropy, PAGE_SIZE); 1063464029d7Smlarkin pmap_kremove((vaddr_t)entropy, PAGE_SIZE); 1064464029d7Smlarkin km_free(entropy, PAGE_SIZE, &kv_any, &kp_none); 1065464029d7Smlarkin } 1066464029d7Smlarkin 106707e1ce2aSbeck #ifndef NO_PROPOLICE 106807e1ce2aSbeck vaddr_t 106907e1ce2aSbeck hibernate_unprotect_ssp(void) 107007e1ce2aSbeck { 107107e1ce2aSbeck struct kmem_dyn_mode kd_avoidalias; 107207e1ce2aSbeck vaddr_t va = trunc_page((vaddr_t)&__guard_local); 107307e1ce2aSbeck paddr_t pa; 107407e1ce2aSbeck 107507e1ce2aSbeck pmap_extract(pmap_kernel(), va, &pa); 107607e1ce2aSbeck 107707e1ce2aSbeck memset(&kd_avoidalias, 0, sizeof kd_avoidalias); 107807e1ce2aSbeck kd_avoidalias.kd_prefer = pa; 107907e1ce2aSbeck kd_avoidalias.kd_waitok = 1; 108007e1ce2aSbeck va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_avoidalias); 108107e1ce2aSbeck if (!va) 108207e1ce2aSbeck panic("hibernate_unprotect_ssp"); 108307e1ce2aSbeck 108407e1ce2aSbeck pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE); 108507e1ce2aSbeck pmap_update(pmap_kernel()); 108607e1ce2aSbeck 108707e1ce2aSbeck return va; 108807e1ce2aSbeck } 108907e1ce2aSbeck 109007e1ce2aSbeck void 109107e1ce2aSbeck hibernate_reprotect_ssp(vaddr_t va) 109207e1ce2aSbeck { 109307e1ce2aSbeck pmap_kremove(va, PAGE_SIZE); 109407e1ce2aSbeck km_free((void *)va, PAGE_SIZE, &kv_any, &kp_none); 109507e1ce2aSbeck } 109607e1ce2aSbeck #endif /* NO_PROPOLICE */ 109707e1ce2aSbeck 1098464029d7Smlarkin /* 109920703d53Smlarkin * Reads the signature block from swap, checks against the current machine's 110020703d53Smlarkin * information. If the information matches, perform a resume by reading the 110120703d53Smlarkin * saved image into the pig area, and unpacking. 11023120f534Smlarkin * 11033120f534Smlarkin * Must be called with interrupts enabled. 110420703d53Smlarkin */ 110520703d53Smlarkin void 1106e33b04c7Sderaadt hibernate_resume(void) 110720703d53Smlarkin { 1108fc6d48fdSkrw uint8_t buf[DEV_BSIZE]; 1109fc6d48fdSkrw union hibernate_info *hib = (union hibernate_info *)&buf; 111020703d53Smlarkin int s; 111107e1ce2aSbeck #ifndef NO_PROPOLICE 111207e1ce2aSbeck vsize_t off = (vaddr_t)&__guard_local - 111307e1ce2aSbeck trunc_page((vaddr_t)&__guard_local); 111407e1ce2aSbeck vaddr_t guard_va; 111507e1ce2aSbeck #endif 111620703d53Smlarkin 111720703d53Smlarkin /* Get current running machine's hibernate info */ 1118fc6d48fdSkrw memset(buf, 0, sizeof(buf)); 1119fc6d48fdSkrw if (get_hibernate_info(hib, 0)) { 1120491851bfSmlarkin DPRINTF("couldn't retrieve machine's hibernate info\n"); 112120703d53Smlarkin return; 1122491851bfSmlarkin } 112320703d53Smlarkin 112420703d53Smlarkin /* Read hibernate info from disk */ 112520703d53Smlarkin s = splbio(); 112620703d53Smlarkin 1127c3ed0588Skrw bdsw = &bdevsw[major(hib->dev)]; 1128c3ed0588Skrw if ((*bdsw->d_open)(hib->dev, FREAD, S_IFCHR, curproc)) { 1129c3ed0588Skrw printf("hibernate_resume device open failed\n"); 1130c3ed0588Skrw splx(s); 1131c3ed0588Skrw return; 1132c3ed0588Skrw } 1133c3ed0588Skrw 113496cbc698Smlarkin DPRINTF("reading hibernate signature block location: %lld\n", 1135fc6d48fdSkrw hib->sig_offset); 113686840a47Smlarkin 1137fc6d48fdSkrw if (hibernate_block_io(hib, 1138fc6d48fdSkrw hib->sig_offset, 1139fc6d48fdSkrw hib->sec_size, (vaddr_t)&disk_hib, 0)) { 1140ab0363c3Skrw DPRINTF("error in hibernate read\n"); 1141c3ed0588Skrw goto fail; 1142491851bfSmlarkin } 114320703d53Smlarkin 114486840a47Smlarkin /* Check magic number */ 1145908847d1Sderaadt if (disk_hib.magic != HIBERNATE_MAGIC) { 1146491851bfSmlarkin DPRINTF("wrong magic number in hibernate signature: %x\n", 1147908847d1Sderaadt disk_hib.magic); 1148c3ed0588Skrw goto fail; 114986840a47Smlarkin } 115086840a47Smlarkin 115186840a47Smlarkin /* 115286840a47Smlarkin * We (possibly) found a hibernate signature. Clear signature first, 115386840a47Smlarkin * to prevent accidental resume or endless resume cycles later. 115486840a47Smlarkin */ 1155fc6d48fdSkrw if (hibernate_clear_signature(hib)) { 1156491851bfSmlarkin DPRINTF("error clearing hibernate signature block\n"); 1157c3ed0588Skrw goto fail; 115886840a47Smlarkin } 115986840a47Smlarkin 116020703d53Smlarkin /* 116120703d53Smlarkin * If on-disk and in-memory hibernate signatures match, 116220703d53Smlarkin * this means we should do a resume from hibernate. 116320703d53Smlarkin */ 1164fc6d48fdSkrw if (hibernate_compare_signature(hib, &disk_hib)) { 1165491851bfSmlarkin DPRINTF("mismatched hibernate signature block\n"); 1166c3ed0588Skrw goto fail; 1167ca1a7c3eSderaadt } 1168fc6d48fdSkrw disk_hib.dev = hib->dev; 11693a85c2afSmlarkin 11709ecd98c6Smlarkin #ifdef MULTIPROCESSOR 11717bbeffb6Smlarkin /* XXX - if we fail later, we may need to rehatch APs on some archs */ 11727bbeffb6Smlarkin DPRINTF("hibernate: quiescing APs\n"); 11739ecd98c6Smlarkin hibernate_quiesce_cpus(); 11749ecd98c6Smlarkin #endif /* MULTIPROCESSOR */ 11759ecd98c6Smlarkin 117620703d53Smlarkin /* Read the image from disk into the image (pig) area */ 1177908847d1Sderaadt if (hibernate_read_image(&disk_hib)) 117820703d53Smlarkin goto fail; 1179c3ed0588Skrw if ((*bdsw->d_close)(hib->dev, 0, S_IFCHR, curproc)) 1180c3ed0588Skrw printf("hibernate_resume device close failed\n"); 1181c3ed0588Skrw bdsw = NULL; 118220703d53Smlarkin 11837bbeffb6Smlarkin DPRINTF("hibernate: quiescing devices\n"); 118495cb2185Skettenis if (config_suspend_all(DVACT_QUIESCE) != 0) 11857c9ea1f9Smlarkin goto fail; 11867c9ea1f9Smlarkin 118707e1ce2aSbeck #ifndef NO_PROPOLICE 118807e1ce2aSbeck guard_va = hibernate_unprotect_ssp(); 118907e1ce2aSbeck #endif /* NO_PROPOLICE */ 119007e1ce2aSbeck 1191ca1a7c3eSderaadt (void) splhigh(); 119252f9d053Smlarkin hibernate_disable_intr_machdep(); 11930a537ea4Sderaadt cold = 2; 119420703d53Smlarkin 11957bbeffb6Smlarkin DPRINTF("hibernate: suspending devices\n"); 119695cb2185Skettenis if (config_suspend_all(DVACT_SUSPEND) != 0) { 1197ca1a7c3eSderaadt cold = 0; 119852f9d053Smlarkin hibernate_enable_intr_machdep(); 119907e1ce2aSbeck #ifndef NO_PROPOLICE 120007e1ce2aSbeck hibernate_reprotect_ssp(guard_va); 120107e1ce2aSbeck #endif /* ! NO_PROPOLICE */ 1202ca1a7c3eSderaadt goto fail; 1203ca1a7c3eSderaadt } 1204ca1a7c3eSderaadt 12052547ab58Smlarkin pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_start, 12062547ab58Smlarkin &retguard_start_phys); 12072547ab58Smlarkin pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_end, 12082547ab58Smlarkin &retguard_end_phys); 12092547ab58Smlarkin 1210464029d7Smlarkin hibernate_preserve_entropy(&disk_hib); 1211464029d7Smlarkin 1212a3fa4d81Smlarkin printf("Unpacking image...\n"); 1213a3fa4d81Smlarkin 121420703d53Smlarkin /* Switch stacks */ 12157bbeffb6Smlarkin DPRINTF("hibernate: switching stacks\n"); 121620703d53Smlarkin hibernate_switch_stack_machdep(); 121720703d53Smlarkin 1218bd831450Smlarkin #ifndef NO_PROPOLICE 1219bd831450Smlarkin /* Start using suspended kernel's propolice guard */ 122007e1ce2aSbeck *(long *)(guard_va + off) = disk_hib.guard; 122107e1ce2aSbeck hibernate_reprotect_ssp(guard_va); 1222bd831450Smlarkin #endif /* ! NO_PROPOLICE */ 1223bd831450Smlarkin 12241e419546Smlarkin /* Unpack and resume */ 1225908847d1Sderaadt hibernate_unpack_image(&disk_hib); 122620703d53Smlarkin 122720703d53Smlarkin fail: 1228c3ed0588Skrw if (!bdsw) 122939502a33Smlarkin printf("\nUnable to resume hibernated image\n"); 1230c3ed0588Skrw else if ((*bdsw->d_close)(hib->dev, 0, S_IFCHR, curproc)) 1231c3ed0588Skrw printf("hibernate_resume device close failed\n"); 1232c3ed0588Skrw splx(s); 123320703d53Smlarkin } 123420703d53Smlarkin 123520703d53Smlarkin /* 123620703d53Smlarkin * Unpack image from pig area to original location by looping through the 123720703d53Smlarkin * list of output chunks in the order they should be restored (fchunks). 12381e419546Smlarkin * 12391e419546Smlarkin * Note that due to the stack smash protector and the fact that we have 12401e419546Smlarkin * switched stacks, it is not permitted to return from this function. 124120703d53Smlarkin */ 124220703d53Smlarkin void 1243908847d1Sderaadt hibernate_unpack_image(union hibernate_info *hib) 124420703d53Smlarkin { 1245fc6d48fdSkrw uint8_t buf[DEV_BSIZE]; 1246e33b04c7Sderaadt struct hibernate_disk_chunk *chunks; 1247fc6d48fdSkrw union hibernate_info *local_hib = (union hibernate_info *)&buf; 124895a70527Smlarkin paddr_t image_cur = global_pig_start; 1249cffc25bbSmlarkin short i, *fchunks; 125031a59060Smlarkin char *pva; 125120703d53Smlarkin 125231a59060Smlarkin /* Piglet will be identity mapped (VA == PA) */ 125331a59060Smlarkin pva = (char *)hib->piglet_pa; 125431a59060Smlarkin 1255cffc25bbSmlarkin fchunks = (short *)(pva + (4 * PAGE_SIZE)); 125620703d53Smlarkin 12572a168c45Smlarkin chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE); 125820703d53Smlarkin 12599783ed43Smlarkin /* Can't use hiber_info that's passed in after this point */ 1260fc6d48fdSkrw memcpy(buf, hib, sizeof(buf)); 1261fc6d48fdSkrw local_hib->retguard_ofs = 0; 126295a70527Smlarkin 126331a59060Smlarkin /* VA == PA */ 1264fc6d48fdSkrw local_hib->piglet_va = local_hib->piglet_pa; 126531a59060Smlarkin 12661e419546Smlarkin /* 12671e419546Smlarkin * Point of no return. Once we pass this point, only kernel code can 12681e419546Smlarkin * be accessed. No global variables or other kernel data structures 12691e419546Smlarkin * are guaranteed to be coherent after unpack starts. 12701e419546Smlarkin * 12711e419546Smlarkin * The image is now in high memory (pig area), we unpack from the pig 12721e419546Smlarkin * to the correct location in memory. We'll eventually end up copying 12731e419546Smlarkin * on top of ourself, but we are assured the kernel code here is the 12741e419546Smlarkin * same between the hibernated and resuming kernel, and we are running 12751e419546Smlarkin * on our own stack, so the overwrite is ok. 12761e419546Smlarkin */ 12777bbeffb6Smlarkin DPRINTF("hibernate: activating alt. pagetable and starting unpack\n"); 127820703d53Smlarkin hibernate_activate_resume_pt_machdep(); 127920703d53Smlarkin 1280fc6d48fdSkrw for (i = 0; i < local_hib->chunk_ctr; i++) { 128120703d53Smlarkin /* Reset zlib for inflate */ 1282fc6d48fdSkrw if (hibernate_zlib_reset(local_hib, 0) != Z_OK) 1283af7dea42Sjasper panic("hibernate failed to reset zlib for inflate"); 128420703d53Smlarkin 1285fc6d48fdSkrw hibernate_process_chunk(local_hib, &chunks[fchunks[i]], 12869783ed43Smlarkin image_cur); 12879783ed43Smlarkin 12889783ed43Smlarkin image_cur += chunks[fchunks[i]].compressed_size; 12899783ed43Smlarkin } 12901e419546Smlarkin 12911e419546Smlarkin /* 12921e419546Smlarkin * Resume the loaded kernel by jumping to the MD resume vector. 129365052cacSmlarkin * We won't be returning from this call. We pass the location of 129465052cacSmlarkin * the retguard save area so the MD code can replace it before 129565052cacSmlarkin * resuming. See the piglet layout at the top of this file for 129665052cacSmlarkin * more information on the layout of the piglet area. 129765052cacSmlarkin * 129865052cacSmlarkin * We use 'global_piglet_va' here since by the time we are at 129965052cacSmlarkin * this point, we have already unpacked the image, and we want 130065052cacSmlarkin * the suspended kernel's view of what the piglet was, before 130165052cacSmlarkin * suspend occurred (since we will need to use that in the retguard 130265052cacSmlarkin * copy code in hibernate_resume_machdep.) 13031e419546Smlarkin */ 130465052cacSmlarkin hibernate_resume_machdep(global_piglet_va + (110 * PAGE_SIZE)); 13059783ed43Smlarkin } 13069783ed43Smlarkin 13079783ed43Smlarkin /* 130821eafc1bSmlarkin * Bounce a compressed image chunk to the piglet, entering mappings for the 130921eafc1bSmlarkin * copied pages as needed 131021eafc1bSmlarkin */ 131121eafc1bSmlarkin void 131221eafc1bSmlarkin hibernate_copy_chunk_to_piglet(paddr_t img_cur, vaddr_t piglet, size_t size) 131321eafc1bSmlarkin { 131421eafc1bSmlarkin size_t ct, ofs; 131521eafc1bSmlarkin paddr_t src = img_cur; 131621eafc1bSmlarkin vaddr_t dest = piglet; 131721eafc1bSmlarkin 131821eafc1bSmlarkin /* Copy first partial page */ 131921eafc1bSmlarkin ct = (PAGE_SIZE) - (src & PAGE_MASK); 132021eafc1bSmlarkin ofs = (src & PAGE_MASK); 132121eafc1bSmlarkin 132221eafc1bSmlarkin if (ct < PAGE_SIZE) { 132321eafc1bSmlarkin hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, 132421eafc1bSmlarkin (src - ofs), 0); 132521eafc1bSmlarkin hibernate_flush(); 132621eafc1bSmlarkin bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE + ofs), (caddr_t)dest, ct); 132721eafc1bSmlarkin src += ct; 132821eafc1bSmlarkin dest += ct; 132921eafc1bSmlarkin } 133021eafc1bSmlarkin 133121eafc1bSmlarkin /* Copy remaining pages */ 133221eafc1bSmlarkin while (src < size + img_cur) { 133321eafc1bSmlarkin hibernate_enter_resume_mapping(HIBERNATE_INFLATE_PAGE, src, 0); 133421eafc1bSmlarkin hibernate_flush(); 133521eafc1bSmlarkin ct = PAGE_SIZE; 133621eafc1bSmlarkin bcopy((caddr_t)(HIBERNATE_INFLATE_PAGE), (caddr_t)dest, ct); 133721eafc1bSmlarkin hibernate_flush(); 133821eafc1bSmlarkin src += ct; 133921eafc1bSmlarkin dest += ct; 134021eafc1bSmlarkin } 134121eafc1bSmlarkin } 134221eafc1bSmlarkin 134321eafc1bSmlarkin /* 134421eafc1bSmlarkin * Process a chunk by bouncing it to the piglet, followed by unpacking 13459783ed43Smlarkin */ 13469783ed43Smlarkin void 1347908847d1Sderaadt hibernate_process_chunk(union hibernate_info *hib, 13489783ed43Smlarkin struct hibernate_disk_chunk *chunk, paddr_t img_cur) 13499783ed43Smlarkin { 1350908847d1Sderaadt char *pva = (char *)hib->piglet_va; 13519783ed43Smlarkin 135221eafc1bSmlarkin hibernate_copy_chunk_to_piglet(img_cur, 135321eafc1bSmlarkin (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), chunk->compressed_size); 1354908847d1Sderaadt hibernate_inflate_region(hib, chunk->base, 135595a70527Smlarkin (vaddr_t)(pva + (HIBERNATE_CHUNK_SIZE * 2)), 13569783ed43Smlarkin chunk->compressed_size); 135720703d53Smlarkin } 135820703d53Smlarkin 135920703d53Smlarkin /* 13607882bc75Smlarkin * Calculate RLE component for 'inaddr'. Clamps to max RLE pages between 13617882bc75Smlarkin * inaddr and range_end. 13627882bc75Smlarkin */ 13637882bc75Smlarkin int 13647882bc75Smlarkin hibernate_calc_rle(paddr_t inaddr, paddr_t range_end) 13657882bc75Smlarkin { 13667882bc75Smlarkin int rle; 13677882bc75Smlarkin 13687882bc75Smlarkin rle = uvm_page_rle(inaddr); 13697882bc75Smlarkin KASSERT(rle >= 0 && rle <= MAX_RLE); 13707882bc75Smlarkin 13717882bc75Smlarkin /* Clamp RLE to range end */ 13727882bc75Smlarkin if (rle > 0 && inaddr + (rle * PAGE_SIZE) > range_end) 13737882bc75Smlarkin rle = (range_end - inaddr) / PAGE_SIZE; 13747882bc75Smlarkin 13757882bc75Smlarkin return (rle); 13767882bc75Smlarkin } 13777882bc75Smlarkin 13787882bc75Smlarkin /* 13797882bc75Smlarkin * Write the RLE byte for page at 'inaddr' to the output stream. 13807882bc75Smlarkin * Returns the number of pages to be skipped at 'inaddr'. 13817882bc75Smlarkin */ 13827882bc75Smlarkin int 13837882bc75Smlarkin hibernate_write_rle(union hibernate_info *hib, paddr_t inaddr, 13847882bc75Smlarkin paddr_t range_end, daddr_t *blkctr, 13857882bc75Smlarkin size_t *out_remaining) 13867882bc75Smlarkin { 13877882bc75Smlarkin int rle, err, *rleloc; 13887882bc75Smlarkin struct hibernate_zlib_state *hibernate_state; 13897882bc75Smlarkin vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE; 13907882bc75Smlarkin 13917882bc75Smlarkin hibernate_state = 13927882bc75Smlarkin (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; 13937882bc75Smlarkin 13947882bc75Smlarkin rle = hibernate_calc_rle(inaddr, range_end); 13957882bc75Smlarkin 13967882bc75Smlarkin rleloc = (int *)hibernate_rle_page + MAX_RLE - 1; 13977882bc75Smlarkin *rleloc = rle; 13987882bc75Smlarkin 13997882bc75Smlarkin /* Deflate the RLE byte into the stream */ 14007882bc75Smlarkin hibernate_deflate(hib, (paddr_t)rleloc, out_remaining); 14017882bc75Smlarkin 14027882bc75Smlarkin /* Did we fill the output page? If so, flush to disk */ 14037882bc75Smlarkin if (*out_remaining == 0) { 14049783fa9dSkrw if ((err = hibernate_write(hib, *blkctr, 14059783fa9dSkrw (vaddr_t)hibernate_io_page, PAGE_SIZE, IO_TYPE_IMG))) { 14067882bc75Smlarkin DPRINTF("hib write error %d\n", err); 1407e874061eSkrw return -1; 14087882bc75Smlarkin } 14097882bc75Smlarkin 14109783fa9dSkrw *blkctr += btodb(PAGE_SIZE); 14117882bc75Smlarkin *out_remaining = PAGE_SIZE; 14127882bc75Smlarkin 14137882bc75Smlarkin /* If we didn't deflate the entire RLE byte, finish it now */ 14147882bc75Smlarkin if (hibernate_state->hib_stream.avail_in != 0) 14157882bc75Smlarkin hibernate_deflate(hib, 14167882bc75Smlarkin (vaddr_t)hibernate_state->hib_stream.next_in, 14177882bc75Smlarkin out_remaining); 14187882bc75Smlarkin } 14197882bc75Smlarkin 14207882bc75Smlarkin return (rle); 14217882bc75Smlarkin } 14227882bc75Smlarkin 14237882bc75Smlarkin /* 142420703d53Smlarkin * Write a compressed version of this machine's memory to disk, at the 142520703d53Smlarkin * precalculated swap offset: 142620703d53Smlarkin * 142720703d53Smlarkin * end of swap - signature block size - chunk table size - memory size 142820703d53Smlarkin * 142920703d53Smlarkin * The function begins by looping through each phys mem range, cutting each 14309783ed43Smlarkin * one into MD sized chunks. These chunks are then compressed individually 143120703d53Smlarkin * and written out to disk, in phys mem order. Some chunks might compress 143220703d53Smlarkin * more than others, and for this reason, each chunk's size is recorded 143320703d53Smlarkin * in the chunk table, which is written to disk after the image has 143420703d53Smlarkin * properly been compressed and written (in hibernate_write_chunktable). 143520703d53Smlarkin * 143620703d53Smlarkin * When this function is called, the machine is nearly suspended - most 143720703d53Smlarkin * devices are quiesced/suspended, interrupts are off, and cold has 143820703d53Smlarkin * been set. This means that there can be no side effects once the 143920703d53Smlarkin * write has started, and the write function itself can also have no 1440b42f10c6Smlarkin * side effects. This also means no printfs are permitted (since printf 14419783ed43Smlarkin * has side effects.) 1442e5033bc8Smlarkin * 1443e5033bc8Smlarkin * Return values : 1444e5033bc8Smlarkin * 1445e5033bc8Smlarkin * 0 - success 1446e5033bc8Smlarkin * EIO - I/O error occurred writing the chunks 1447e5033bc8Smlarkin * EINVAL - Failed to write a complete range 1448e5033bc8Smlarkin * ENOMEM - Memory allocation failure during preparation of the zlib arena 144920703d53Smlarkin */ 145020703d53Smlarkin int 1451908847d1Sderaadt hibernate_write_chunks(union hibernate_info *hib) 145220703d53Smlarkin { 145320703d53Smlarkin paddr_t range_base, range_end, inaddr, temp_inaddr; 1454fc6d48fdSkrw size_t out_remaining, used; 145520703d53Smlarkin struct hibernate_disk_chunk *chunks; 1456908847d1Sderaadt vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE; 145771e939c7Sderaadt daddr_t blkctr = 0; 14587882bc75Smlarkin int i, rle, err; 14593a85c2afSmlarkin struct hibernate_zlib_state *hibernate_state; 14603a85c2afSmlarkin 14613843cbddSpirofti hibernate_state = 14623843cbddSpirofti (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; 146320703d53Smlarkin 1464908847d1Sderaadt hib->chunk_ctr = 0; 146520703d53Smlarkin 146620703d53Smlarkin /* 14677882bc75Smlarkin * Map the utility VAs to the piglet. See the piglet map at the 14687882bc75Smlarkin * top of this file for piglet layout information. 14697882bc75Smlarkin */ 1470259dfafeSmlarkin hibernate_copy_page = hib->piglet_va + 3 * PAGE_SIZE; 1471259dfafeSmlarkin hibernate_rle_page = hib->piglet_va + 28 * PAGE_SIZE; 147220703d53Smlarkin 1473908847d1Sderaadt chunks = (struct hibernate_disk_chunk *)(hib->piglet_va + 147420703d53Smlarkin HIBERNATE_CHUNK_SIZE); 147520703d53Smlarkin 147620703d53Smlarkin /* Calculate the chunk regions */ 1477908847d1Sderaadt for (i = 0; i < hib->nranges; i++) { 1478908847d1Sderaadt range_base = hib->ranges[i].base; 1479908847d1Sderaadt range_end = hib->ranges[i].end; 148020703d53Smlarkin 148120703d53Smlarkin inaddr = range_base; 148220703d53Smlarkin 148320703d53Smlarkin while (inaddr < range_end) { 1484908847d1Sderaadt chunks[hib->chunk_ctr].base = inaddr; 148520703d53Smlarkin if (inaddr + HIBERNATE_CHUNK_SIZE < range_end) 1486908847d1Sderaadt chunks[hib->chunk_ctr].end = inaddr + 148720703d53Smlarkin HIBERNATE_CHUNK_SIZE; 148820703d53Smlarkin else 1489908847d1Sderaadt chunks[hib->chunk_ctr].end = range_end; 149020703d53Smlarkin 149120703d53Smlarkin inaddr += HIBERNATE_CHUNK_SIZE; 1492908847d1Sderaadt hib->chunk_ctr ++; 149320703d53Smlarkin } 149420703d53Smlarkin } 149520703d53Smlarkin 14967882bc75Smlarkin uvm_pmr_dirty_everything(); 14977882bc75Smlarkin 149820703d53Smlarkin /* Compress and write the chunks in the chunktable */ 1499908847d1Sderaadt for (i = 0; i < hib->chunk_ctr; i++) { 150020703d53Smlarkin range_base = chunks[i].base; 150120703d53Smlarkin range_end = chunks[i].end; 150220703d53Smlarkin 15039783fa9dSkrw chunks[i].offset = blkctr; 150420703d53Smlarkin 150520703d53Smlarkin /* Reset zlib for deflate */ 1506908847d1Sderaadt if (hibernate_zlib_reset(hib, 1) != Z_OK) { 150796cbc698Smlarkin DPRINTF("hibernate_zlib_reset failed for deflate\n"); 1508e5033bc8Smlarkin return (ENOMEM); 150996cbc698Smlarkin } 151020703d53Smlarkin 151120703d53Smlarkin inaddr = range_base; 151220703d53Smlarkin 151320703d53Smlarkin /* 151420703d53Smlarkin * For each range, loop through its phys mem region 151520703d53Smlarkin * and write out the chunks (the last chunk might be 151620703d53Smlarkin * smaller than the chunk size). 151720703d53Smlarkin */ 151820703d53Smlarkin while (inaddr < range_end) { 151920703d53Smlarkin out_remaining = PAGE_SIZE; 152020703d53Smlarkin while (out_remaining > 0 && inaddr < range_end) { 152195a70527Smlarkin /* 152295a70527Smlarkin * Adjust for regions that are not evenly 1523d9e6d5c0Smlarkin * divisible by PAGE_SIZE or overflowed 1524d9e6d5c0Smlarkin * pages from the previous iteration. 152595a70527Smlarkin */ 152620703d53Smlarkin temp_inaddr = (inaddr & PAGE_MASK) + 152720703d53Smlarkin hibernate_copy_page; 152820703d53Smlarkin 152920703d53Smlarkin /* Deflate from temp_inaddr to IO page */ 1530d9e6d5c0Smlarkin if (inaddr != range_end) { 1531e874061eSkrw rle = 0; 15327882bc75Smlarkin if (inaddr % PAGE_SIZE == 0) { 15337882bc75Smlarkin rle = hibernate_write_rle(hib, 15347882bc75Smlarkin inaddr, 15357882bc75Smlarkin range_end, 15367882bc75Smlarkin &blkctr, 15377882bc75Smlarkin &out_remaining); 15387882bc75Smlarkin } 15397882bc75Smlarkin 1540e874061eSkrw switch (rle) { 1541e874061eSkrw case -1: 1542e874061eSkrw return EIO; 1543e874061eSkrw case 0: 1544d9e6d5c0Smlarkin pmap_kenter_pa(hibernate_temp_page, 15457882bc75Smlarkin inaddr & PMAP_PA_MASK, 15461e8cdc2eSderaadt PROT_READ); 1547d9e6d5c0Smlarkin 1548d9e6d5c0Smlarkin bcopy((caddr_t)hibernate_temp_page, 15493843cbddSpirofti (caddr_t)hibernate_copy_page, 15503843cbddSpirofti PAGE_SIZE); 1551908847d1Sderaadt inaddr += hibernate_deflate(hib, 15527882bc75Smlarkin temp_inaddr, 15537882bc75Smlarkin &out_remaining); 1554e874061eSkrw break; 1555e874061eSkrw default: 15567882bc75Smlarkin inaddr += rle * PAGE_SIZE; 15577882bc75Smlarkin if (inaddr > range_end) 15587882bc75Smlarkin inaddr = range_end; 1559e874061eSkrw break; 15607882bc75Smlarkin } 15617882bc75Smlarkin 156220703d53Smlarkin } 156320703d53Smlarkin 156420703d53Smlarkin if (out_remaining == 0) { 156520703d53Smlarkin /* Filled up the page */ 15669783fa9dSkrw if ((err = hibernate_write(hib, blkctr, 15673f4becefSderaadt (vaddr_t)hibernate_io_page, 15689783fa9dSkrw PAGE_SIZE, IO_TYPE_IMG))) { 156996cbc698Smlarkin DPRINTF("hib write error %d\n", 157096cbc698Smlarkin err); 1571b687eefdSderaadt return (err); 157296cbc698Smlarkin } 15739783fa9dSkrw blkctr += btodb(PAGE_SIZE); 157420703d53Smlarkin } 157520703d53Smlarkin } 1576c92a73edSmlarkin } 157720703d53Smlarkin 157896cbc698Smlarkin if (inaddr != range_end) { 157996cbc698Smlarkin DPRINTF("deflate range ended prematurely\n"); 1580e5033bc8Smlarkin return (EINVAL); 158196cbc698Smlarkin } 158220703d53Smlarkin 158320703d53Smlarkin /* 158420703d53Smlarkin * End of range. Round up to next secsize bytes 158520703d53Smlarkin * after finishing compress 158620703d53Smlarkin */ 158720703d53Smlarkin if (out_remaining == 0) 158820703d53Smlarkin out_remaining = PAGE_SIZE; 158920703d53Smlarkin 159020703d53Smlarkin /* Finish compress */ 1591b498c5a9Smlarkin hibernate_state->hib_stream.next_in = (unsigned char *)inaddr; 1592aa7ef211Sderaadt hibernate_state->hib_stream.avail_in = 0; 159320703d53Smlarkin hibernate_state->hib_stream.next_out = 1594b498c5a9Smlarkin (unsigned char *)hibernate_io_page + 1595b498c5a9Smlarkin (PAGE_SIZE - out_remaining); 1596b484ab48Smlarkin 1597b484ab48Smlarkin /* We have an extra output page available for finalize */ 1598b484ab48Smlarkin hibernate_state->hib_stream.avail_out = 1599b484ab48Smlarkin out_remaining + PAGE_SIZE; 160020703d53Smlarkin 160196cbc698Smlarkin if ((err = deflate(&hibernate_state->hib_stream, Z_FINISH)) != 160296cbc698Smlarkin Z_STREAM_END) { 160396cbc698Smlarkin DPRINTF("deflate error in output stream: %d\n", err); 1604b687eefdSderaadt return (err); 160596cbc698Smlarkin } 160620703d53Smlarkin 160720703d53Smlarkin out_remaining = hibernate_state->hib_stream.avail_out; 160820703d53Smlarkin 1609fc6d48fdSkrw /* Round up to next sector if needed */ 1610*2fbc8e98Skrw used = roundup(2 * PAGE_SIZE - out_remaining, hib->sec_size); 161120703d53Smlarkin 161220703d53Smlarkin /* Write final block(s) for this chunk */ 16139783fa9dSkrw if ((err = hibernate_write(hib, blkctr, 16149783fa9dSkrw (vaddr_t)hibernate_io_page, used, IO_TYPE_IMG))) { 161596cbc698Smlarkin DPRINTF("hib final write error %d\n", err); 1616b687eefdSderaadt return (err); 161796cbc698Smlarkin } 161820703d53Smlarkin 16199783fa9dSkrw blkctr += btodb(used); 162020703d53Smlarkin 16219783fa9dSkrw chunks[i].compressed_size = dbtob(blkctr - chunks[i].offset); 162220703d53Smlarkin } 162320703d53Smlarkin 162420703d53Smlarkin return (0); 162520703d53Smlarkin } 162620703d53Smlarkin 162720703d53Smlarkin /* 162820703d53Smlarkin * Reset the zlib stream state and allocate a new hiballoc area for either 162920703d53Smlarkin * inflate or deflate. This function is called once for each hibernate chunk. 163020703d53Smlarkin * Calling hiballoc_init multiple times is acceptable since the memory it is 163120703d53Smlarkin * provided is unmanaged memory (stolen). We use the memory provided to us 1632908847d1Sderaadt * by the piglet allocated via the supplied hib. 163320703d53Smlarkin */ 163420703d53Smlarkin int 1635908847d1Sderaadt hibernate_zlib_reset(union hibernate_info *hib, int deflate) 163620703d53Smlarkin { 163720703d53Smlarkin vaddr_t hibernate_zlib_start; 163820703d53Smlarkin size_t hibernate_zlib_size; 1639908847d1Sderaadt char *pva = (char *)hib->piglet_va; 16403a85c2afSmlarkin struct hibernate_zlib_state *hibernate_state; 164120703d53Smlarkin 16423843cbddSpirofti hibernate_state = 16433843cbddSpirofti (struct hibernate_zlib_state *)HIBERNATE_HIBALLOC_PAGE; 16443a85c2afSmlarkin 16453a85c2afSmlarkin if (!deflate) 16463a85c2afSmlarkin pva = (char *)((paddr_t)pva & (PIGLET_PAGE_MASK)); 164720703d53Smlarkin 16487882bc75Smlarkin /* 16497882bc75Smlarkin * See piglet layout information at the start of this file for 16507882bc75Smlarkin * information on the zlib page assignments. 16517882bc75Smlarkin */ 1652464029d7Smlarkin hibernate_zlib_start = (vaddr_t)(pva + (30 * PAGE_SIZE)); 165320703d53Smlarkin hibernate_zlib_size = 80 * PAGE_SIZE; 165420703d53Smlarkin 165591ba896dStedu memset((void *)hibernate_zlib_start, 0, hibernate_zlib_size); 165691ba896dStedu memset(hibernate_state, 0, PAGE_SIZE); 165720703d53Smlarkin 165820703d53Smlarkin /* Set up stream structure */ 165920703d53Smlarkin hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc; 166020703d53Smlarkin hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free; 166120703d53Smlarkin 166220703d53Smlarkin /* Initialize the hiballoc arena for zlib allocs/frees */ 166396b2f336Skrw if (hiballoc_init(&hibernate_state->hiballoc_arena, 166496b2f336Skrw (caddr_t)hibernate_zlib_start, hibernate_zlib_size)) 166596b2f336Skrw return 1; 166620703d53Smlarkin 166720703d53Smlarkin if (deflate) { 166820703d53Smlarkin return deflateInit(&hibernate_state->hib_stream, 166984caa5ffSmlarkin Z_BEST_SPEED); 1670e33b04c7Sderaadt } else 167120703d53Smlarkin return inflateInit(&hibernate_state->hib_stream); 167220703d53Smlarkin } 167320703d53Smlarkin 167420703d53Smlarkin /* 167520703d53Smlarkin * Reads the hibernated memory image from disk, whose location and 1676908847d1Sderaadt * size are recorded in hib. Begin by reading the persisted 167720703d53Smlarkin * chunk table, which records the original chunk placement location 167820703d53Smlarkin * and compressed size for each. Next, allocate a pig region of 167920703d53Smlarkin * sufficient size to hold the compressed image. Next, read the 168020703d53Smlarkin * chunks into the pig area (calling hibernate_read_chunks to do this), 168120703d53Smlarkin * and finally, if all of the above succeeds, clear the hibernate signature. 168220703d53Smlarkin * The function will then return to hibernate_resume, which will proceed 168320703d53Smlarkin * to unpack the pig image to the correct place in memory. 168420703d53Smlarkin */ 168520703d53Smlarkin int 1686908847d1Sderaadt hibernate_read_image(union hibernate_info *hib) 168720703d53Smlarkin { 168820703d53Smlarkin size_t compressed_size, disk_size, chunktable_size, pig_sz; 1689e33b04c7Sderaadt paddr_t image_start, image_end, pig_start, pig_end; 1690e33b04c7Sderaadt struct hibernate_disk_chunk *chunks; 1691e33b04c7Sderaadt daddr_t blkctr; 169290f353dfSmlarkin vaddr_t chunktable = (vaddr_t)NULL; 1693908847d1Sderaadt paddr_t piglet_chunktable = hib->piglet_pa + 169490f353dfSmlarkin HIBERNATE_CHUNK_SIZE; 16959948b232Smlarkin int i, status; 169620703d53Smlarkin 16979948b232Smlarkin status = 0; 16983a85c2afSmlarkin pmap_activate(curproc); 16993a85c2afSmlarkin 170020703d53Smlarkin /* Calculate total chunk table size in disk blocks */ 17019783fa9dSkrw chunktable_size = btodb(HIBERNATE_CHUNK_TABLE_SIZE); 170220703d53Smlarkin 1703225caf62Sderaadt blkctr = hib->chunktable_offset; 170420703d53Smlarkin 170590f353dfSmlarkin chunktable = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any, 170690f353dfSmlarkin &kp_none, &kd_nowait); 170790f353dfSmlarkin 170890f353dfSmlarkin if (!chunktable) 170990f353dfSmlarkin return (1); 171090f353dfSmlarkin 17119948b232Smlarkin /* Map chunktable pages */ 17129948b232Smlarkin for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE) 17133843cbddSpirofti pmap_kenter_pa(chunktable + i, piglet_chunktable + i, 17141e8cdc2eSderaadt PROT_READ | PROT_WRITE); 17153a85c2afSmlarkin pmap_update(pmap_kernel()); 17169948b232Smlarkin 17179948b232Smlarkin /* Read the chunktable from disk into the piglet chunktable */ 17189948b232Smlarkin for (i = 0; i < HIBERNATE_CHUNK_TABLE_SIZE; 171996b2f336Skrw i += MAXPHYS, blkctr += btodb(MAXPHYS)) { 172096b2f336Skrw if (hibernate_block_io(hib, blkctr, MAXPHYS, 172196b2f336Skrw chunktable + i, 0)) { 172296b2f336Skrw status = 1; 172396b2f336Skrw goto unmap; 172496b2f336Skrw } 172596b2f336Skrw } 172620703d53Smlarkin 1727908847d1Sderaadt blkctr = hib->image_offset; 172820703d53Smlarkin compressed_size = 0; 17293a85c2afSmlarkin 173090f353dfSmlarkin chunks = (struct hibernate_disk_chunk *)chunktable; 173120703d53Smlarkin 1732908847d1Sderaadt for (i = 0; i < hib->chunk_ctr; i++) 173320703d53Smlarkin compressed_size += chunks[i].compressed_size; 173420703d53Smlarkin 173520703d53Smlarkin disk_size = compressed_size; 173620703d53Smlarkin 173771cdd4a2Sderaadt printf("unhibernating @ block %lld length %luMB\n", 17389783fa9dSkrw hib->image_offset, compressed_size / (1024 * 1024)); 173939502a33Smlarkin 174020703d53Smlarkin /* Allocate the pig area */ 174120703d53Smlarkin pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE; 1742b0c80555Skettenis if (uvm_pmr_alloc_pig(&pig_start, pig_sz, hib->piglet_pa) == ENOMEM) { 17439948b232Smlarkin status = 1; 17449948b232Smlarkin goto unmap; 17459948b232Smlarkin } 174620703d53Smlarkin 174720703d53Smlarkin pig_end = pig_start + pig_sz; 174820703d53Smlarkin 174920703d53Smlarkin /* Calculate image extents. Pig image must end on a chunk boundary. */ 175020703d53Smlarkin image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1); 175120703d53Smlarkin image_start = image_end - disk_size; 175220703d53Smlarkin 175396b2f336Skrw if (hibernate_read_chunks(hib, image_start, image_end, disk_size, 175496b2f336Skrw chunks)) { 175596b2f336Skrw status = 1; 175696b2f336Skrw goto unmap; 175796b2f336Skrw } 175820703d53Smlarkin 175920703d53Smlarkin /* Prepare the resume time pmap/page table */ 1760908847d1Sderaadt hibernate_populate_resume_pt(hib, image_start, image_end); 176120703d53Smlarkin 17629948b232Smlarkin unmap: 17639948b232Smlarkin /* Unmap chunktable pages */ 17649948b232Smlarkin pmap_kremove(chunktable, HIBERNATE_CHUNK_TABLE_SIZE); 17659948b232Smlarkin pmap_update(pmap_kernel()); 17669948b232Smlarkin 17679948b232Smlarkin return (status); 176820703d53Smlarkin } 176920703d53Smlarkin 177020703d53Smlarkin /* 177120703d53Smlarkin * Read the hibernated memory chunks from disk (chunk information at this 177220703d53Smlarkin * point is stored in the piglet) into the pig area specified by 177320703d53Smlarkin * [pig_start .. pig_end]. Order the chunks so that the final chunk is the 177420703d53Smlarkin * only chunk with overlap possibilities. 177520703d53Smlarkin */ 177620703d53Smlarkin int 1777908847d1Sderaadt hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start, 177890f353dfSmlarkin paddr_t pig_end, size_t image_compr_size, 177990f353dfSmlarkin struct hibernate_disk_chunk *chunks) 178020703d53Smlarkin { 178108170551Smlarkin paddr_t img_cur, piglet_base; 178220703d53Smlarkin daddr_t blkctr; 178320703d53Smlarkin size_t processed, compressed_size, read_size; 178496b2f336Skrw int err, nchunks, nfchunks, num_io_pages; 178508170551Smlarkin vaddr_t tempva, hibernate_fchunk_area; 178608170551Smlarkin short *fchunks, i, j; 178795a70527Smlarkin 178808170551Smlarkin tempva = (vaddr_t)NULL; 178908170551Smlarkin hibernate_fchunk_area = (vaddr_t)NULL; 179008170551Smlarkin nfchunks = 0; 179108170551Smlarkin piglet_base = hib->piglet_pa; 179295a70527Smlarkin global_pig_start = pig_start; 179395a70527Smlarkin 179490f353dfSmlarkin /* 179590f353dfSmlarkin * These mappings go into the resuming kernel's page table, and are 1796678831beSjsg * used only during image read. They disappear from existence 179790f353dfSmlarkin * when the suspended kernel is unpacked on top of us. 179890f353dfSmlarkin */ 17997d396bdbSmlarkin tempva = (vaddr_t)km_alloc(MAXPHYS + PAGE_SIZE, &kv_any, &kp_none, 18007d396bdbSmlarkin &kd_nowait); 180190f353dfSmlarkin if (!tempva) 180290f353dfSmlarkin return (1); 180321eafc1bSmlarkin hibernate_fchunk_area = (vaddr_t)km_alloc(24 * PAGE_SIZE, &kv_any, 180490f353dfSmlarkin &kp_none, &kd_nowait); 180590f353dfSmlarkin if (!hibernate_fchunk_area) 180690f353dfSmlarkin return (1); 180790f353dfSmlarkin 180821eafc1bSmlarkin /* Final output chunk ordering VA */ 1809cffc25bbSmlarkin fchunks = (short *)hibernate_fchunk_area; 181095a70527Smlarkin 181120703d53Smlarkin /* Map the chunk ordering region */ 18127d396bdbSmlarkin for(i = 0; i < 24 ; i++) 181321eafc1bSmlarkin pmap_kenter_pa(hibernate_fchunk_area + (i * PAGE_SIZE), 1814d6871fe2Smlarkin piglet_base + ((4 + i) * PAGE_SIZE), 18151e8cdc2eSderaadt PROT_READ | PROT_WRITE); 18163a85c2afSmlarkin pmap_update(pmap_kernel()); 181720703d53Smlarkin 1818908847d1Sderaadt nchunks = hib->chunk_ctr; 181920703d53Smlarkin 182020703d53Smlarkin /* Initially start all chunks as unplaced */ 182120703d53Smlarkin for (i = 0; i < nchunks; i++) 182220703d53Smlarkin chunks[i].flags = 0; 182320703d53Smlarkin 182420703d53Smlarkin /* 182520703d53Smlarkin * Search the list for chunks that are outside the pig area. These 182620703d53Smlarkin * can be placed first in the final output list. 182720703d53Smlarkin */ 182820703d53Smlarkin for (i = 0; i < nchunks; i++) { 182920703d53Smlarkin if (chunks[i].end <= pig_start || chunks[i].base >= pig_end) { 183055d14f4eSmlarkin fchunks[nfchunks] = i; 183120703d53Smlarkin nfchunks++; 183208170551Smlarkin chunks[i].flags |= HIBERNATE_CHUNK_PLACED; 183320703d53Smlarkin } 183420703d53Smlarkin } 183520703d53Smlarkin 183620703d53Smlarkin /* 183720703d53Smlarkin * Walk the ordering, place the chunks in ascending memory order. 183820703d53Smlarkin */ 183908170551Smlarkin for (i = 0; i < nchunks; i++) { 184008170551Smlarkin if (chunks[i].flags != HIBERNATE_CHUNK_PLACED) { 184108170551Smlarkin fchunks[nfchunks] = i; 184220703d53Smlarkin nfchunks++; 184308170551Smlarkin chunks[i].flags = HIBERNATE_CHUNK_PLACED; 184420703d53Smlarkin } 184520703d53Smlarkin } 184620703d53Smlarkin 184720703d53Smlarkin img_cur = pig_start; 184820703d53Smlarkin 184996b2f336Skrw for (i = 0, err = 0; i < nfchunks && err == 0; i++) { 18509783fa9dSkrw blkctr = chunks[fchunks[i]].offset + hib->image_offset; 185120703d53Smlarkin processed = 0; 185220703d53Smlarkin compressed_size = chunks[fchunks[i]].compressed_size; 185320703d53Smlarkin 185496b2f336Skrw while (processed < compressed_size && err == 0) { 18557d396bdbSmlarkin if (compressed_size - processed >= MAXPHYS) 18567d396bdbSmlarkin read_size = MAXPHYS; 185720703d53Smlarkin else 185820703d53Smlarkin read_size = compressed_size - processed; 185920703d53Smlarkin 18607d396bdbSmlarkin /* 18617d396bdbSmlarkin * We're reading read_size bytes, offset from the 18627d396bdbSmlarkin * start of a page by img_cur % PAGE_SIZE, so the 18637d396bdbSmlarkin * end will be read_size + (img_cur % PAGE_SIZE) 18647d396bdbSmlarkin * from the start of the first page. Round that 18657d396bdbSmlarkin * up to the next page size. 18667d396bdbSmlarkin */ 18677d396bdbSmlarkin num_io_pages = (read_size + (img_cur % PAGE_SIZE) 18687d396bdbSmlarkin + PAGE_SIZE - 1) / PAGE_SIZE; 18697d396bdbSmlarkin 18707d396bdbSmlarkin KASSERT(num_io_pages <= MAXPHYS/PAGE_SIZE + 1); 18717d396bdbSmlarkin 18727d396bdbSmlarkin /* Map pages for this read */ 18737d396bdbSmlarkin for (j = 0; j < num_io_pages; j ++) 18747d396bdbSmlarkin pmap_kenter_pa(tempva + j * PAGE_SIZE, 1875d6871fe2Smlarkin img_cur + j * PAGE_SIZE, 18761e8cdc2eSderaadt PROT_READ | PROT_WRITE); 18777d396bdbSmlarkin 18787d396bdbSmlarkin pmap_update(pmap_kernel()); 18797d396bdbSmlarkin 188096b2f336Skrw err = hibernate_block_io(hib, blkctr, read_size, 188189fc03e5Sjmatthew tempva + (img_cur & PAGE_MASK), 0); 188220703d53Smlarkin 18839783fa9dSkrw blkctr += btodb(read_size); 188420703d53Smlarkin 18857d396bdbSmlarkin pmap_kremove(tempva, num_io_pages * PAGE_SIZE); 18867d396bdbSmlarkin pmap_update(pmap_kernel()); 18877d396bdbSmlarkin 188820703d53Smlarkin processed += read_size; 188920703d53Smlarkin img_cur += read_size; 189020703d53Smlarkin } 189120703d53Smlarkin } 189220703d53Smlarkin 189308170551Smlarkin pmap_kremove(hibernate_fchunk_area, 24 * PAGE_SIZE); 18943a85c2afSmlarkin pmap_update(pmap_kernel()); 18953a85c2afSmlarkin 189696b2f336Skrw return (i != nfchunks); 189720703d53Smlarkin } 189820703d53Smlarkin 189920703d53Smlarkin /* 190020703d53Smlarkin * Hibernating a machine comprises the following operations: 190120703d53Smlarkin * 1. Calculating this machine's hibernate_info information 190220703d53Smlarkin * 2. Allocating a piglet and saving the piglet's physaddr 190320703d53Smlarkin * 3. Calculating the memory chunks 190420703d53Smlarkin * 4. Writing the compressed chunks to disk 190520703d53Smlarkin * 5. Writing the chunk table 190620703d53Smlarkin * 6. Writing the signature block (hibernate_info) 190720703d53Smlarkin * 190820703d53Smlarkin * On most architectures, the function calling hibernate_suspend would 190920703d53Smlarkin * then power off the machine using some MD-specific implementation. 191020703d53Smlarkin */ 191120703d53Smlarkin int 1912e33b04c7Sderaadt hibernate_suspend(void) 191320703d53Smlarkin { 1914fc6d48fdSkrw uint8_t buf[DEV_BSIZE]; 1915fc6d48fdSkrw union hibernate_info *hib = (union hibernate_info *)&buf; 191673ac9b72Sderaadt u_long start, end; 191720703d53Smlarkin 191820703d53Smlarkin /* 191920703d53Smlarkin * Calculate memory ranges, swap offsets, etc. 192020703d53Smlarkin * This also allocates a piglet whose physaddr is stored in 1921908847d1Sderaadt * hib->piglet_pa and vaddr stored in hib->piglet_va 192220703d53Smlarkin */ 1923fc6d48fdSkrw if (get_hibernate_info(hib, 1)) { 192496cbc698Smlarkin DPRINTF("failed to obtain hibernate info\n"); 192520703d53Smlarkin return (1); 192696cbc698Smlarkin } 192720703d53Smlarkin 192873ac9b72Sderaadt /* Find a page-addressed region in swap [start,end] */ 1929fc6d48fdSkrw if (uvm_hibswap(hib->dev, &start, &end)) { 19307bbeffb6Smlarkin printf("hibernate: cannot find any swap\n"); 1931cc391fe1Smlarkin return (1); 1932cc391fe1Smlarkin } 1933cc391fe1Smlarkin 19349783fa9dSkrw if (end - start + 1 < 1000) { 19357bbeffb6Smlarkin printf("hibernate: insufficient swap (%lu is too small)\n", 193602de433dSsemarie end - start + 1); 193773ac9b72Sderaadt return (1); 193873ac9b72Sderaadt } 193973ac9b72Sderaadt 19402547ab58Smlarkin pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_start, 19412547ab58Smlarkin &retguard_start_phys); 19422547ab58Smlarkin pmap_extract(pmap_kernel(), (vaddr_t)&__retguard_end, 19432547ab58Smlarkin &retguard_end_phys); 19442547ab58Smlarkin 194573ac9b72Sderaadt /* Calculate block offsets in swap */ 1946fc6d48fdSkrw hib->image_offset = ctod(start); 19479783fa9dSkrw hib->image_size = ctod(end - start + 1) - 19489783fa9dSkrw btodb(HIBERNATE_CHUNK_TABLE_SIZE); 1949c33b47b2Skrw hib->chunktable_offset = hib->image_offset + hib->image_size; 195073ac9b72Sderaadt 19519783fa9dSkrw DPRINTF("hibernate @ block %lld chunks-length %lu blocks, " 19529783fa9dSkrw "chunktable-length %d blocks\n", hib->image_offset, hib->image_size, 19539783fa9dSkrw btodb(HIBERNATE_CHUNK_TABLE_SIZE)); 195473ac9b72Sderaadt 19553a85c2afSmlarkin pmap_activate(curproc); 1956491851bfSmlarkin DPRINTF("hibernate: writing chunks\n"); 1957fc6d48fdSkrw if (hibernate_write_chunks(hib)) { 195896cbc698Smlarkin DPRINTF("hibernate_write_chunks failed\n"); 1959dcbeefeaSderaadt return (1); 196096cbc698Smlarkin } 196120703d53Smlarkin 1962491851bfSmlarkin DPRINTF("hibernate: writing chunktable\n"); 1963fc6d48fdSkrw if (hibernate_write_chunktable(hib)) { 196496cbc698Smlarkin DPRINTF("hibernate_write_chunktable failed\n"); 1965dcbeefeaSderaadt return (1); 196696cbc698Smlarkin } 196720703d53Smlarkin 1968491851bfSmlarkin DPRINTF("hibernate: writing signature\n"); 1969fc6d48fdSkrw if (hibernate_write_signature(hib)) { 197096cbc698Smlarkin DPRINTF("hibernate_write_signature failed\n"); 1971dcbeefeaSderaadt return (1); 197296cbc698Smlarkin } 197395a70527Smlarkin 197473cbb718Sderaadt /* Allow the disk to settle */ 197595e3d60bSmlarkin delay(500000); 197673cbb718Sderaadt 1977b42f10c6Smlarkin /* 1978b42f10c6Smlarkin * Give the device-specific I/O function a notification that we're 1979b42f10c6Smlarkin * done, and that it can clean up or shutdown as needed. 1980b42f10c6Smlarkin */ 198185df19c5Smglocker if (hib->io_func(hib->dev, 0, (vaddr_t)NULL, 0, HIB_DONE, hib->io_page)) 198285df19c5Smglocker printf("Warning: hibernate done failed\n"); 198395a70527Smlarkin return (0); 198420703d53Smlarkin } 198595e3d60bSmlarkin 1986b0c80555Skettenis int 1987b0c80555Skettenis hibernate_alloc(void) 1988b0c80555Skettenis { 1989b0c80555Skettenis KASSERT(global_piglet_va == 0); 1990b0c80555Skettenis KASSERT(hibernate_temp_page == 0); 1991b0c80555Skettenis 199278e2b1f8Smlarkin pmap_activate(curproc); 199378e2b1f8Smlarkin pmap_kenter_pa(HIBERNATE_HIBALLOC_PAGE, HIBERNATE_HIBALLOC_PAGE, 199478e2b1f8Smlarkin PROT_READ | PROT_WRITE); 199578e2b1f8Smlarkin 1996259dfafeSmlarkin /* Allocate a piglet, store its addresses in the supplied globals */ 1997b0c80555Skettenis if (uvm_pmr_alloc_piglet(&global_piglet_va, &global_piglet_pa, 1998b0c80555Skettenis HIBERNATE_CHUNK_SIZE * 4, HIBERNATE_CHUNK_SIZE)) 1999b237021bSderaadt goto unmap; 2000b0c80555Skettenis 200195e3d60bSmlarkin /* 20029a033ad3Smlarkin * Allocate VA for the temp page. 2003b0c80555Skettenis * 20049a033ad3Smlarkin * This will become part of the suspended kernel and will 2005259dfafeSmlarkin * be freed in hibernate_free, upon resume (or hibernate 2006259dfafeSmlarkin * failure) 2007b0c80555Skettenis */ 2008b0c80555Skettenis hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, 2009b0c80555Skettenis &kp_none, &kd_nowait); 2010b0c80555Skettenis if (!hibernate_temp_page) { 201178964d1aSmlarkin uvm_pmr_free_piglet(global_piglet_va, 4 * HIBERNATE_CHUNK_SIZE); 2012b237021bSderaadt global_piglet_va = 0; 2013b237021bSderaadt goto unmap; 2014b0c80555Skettenis } 2015b0c80555Skettenis return (0); 2016b237021bSderaadt unmap: 2017b237021bSderaadt pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE); 2018b237021bSderaadt pmap_update(pmap_kernel()); 2019b237021bSderaadt return (ENOMEM); 2020b0c80555Skettenis } 2021b0c80555Skettenis 2022b0c80555Skettenis /* 2023b0c80555Skettenis * Free items allocated by hibernate_alloc() 202495e3d60bSmlarkin */ 202595e3d60bSmlarkin void 202695e3d60bSmlarkin hibernate_free(void) 202795e3d60bSmlarkin { 202878e2b1f8Smlarkin pmap_activate(curproc); 202978e2b1f8Smlarkin 2030e9d438ceSderaadt if (global_piglet_va) 2031e9d438ceSderaadt uvm_pmr_free_piglet(global_piglet_va, 203231a59060Smlarkin 4 * HIBERNATE_CHUNK_SIZE); 203395e3d60bSmlarkin 2034259dfafeSmlarkin if (hibernate_temp_page) { 203595e3d60bSmlarkin pmap_kremove(hibernate_temp_page, PAGE_SIZE); 2036e9d438ceSderaadt km_free((void *)hibernate_temp_page, PAGE_SIZE, 2037e9d438ceSderaadt &kv_any, &kp_none); 2038259dfafeSmlarkin } 2039e9d438ceSderaadt 2040e9d438ceSderaadt global_piglet_va = 0; 2041e9d438ceSderaadt hibernate_temp_page = 0; 2042ffbce50eSmlarkin pmap_kremove(HIBERNATE_HIBALLOC_PAGE, PAGE_SIZE); 2043ffbce50eSmlarkin pmap_update(pmap_kernel()); 204495e3d60bSmlarkin } 2045