1 /* $OpenBSD: subr_hibernate.c,v 1.17 2011/09/21 06:13:39 mlarkin Exp $ */ 2 3 /* 4 * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> 5 * Copyright (c) 2011 Mike Larkin <mlarkin@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/hibernate.h> 21 #include <sys/malloc.h> 22 #include <sys/param.h> 23 #include <sys/tree.h> 24 #include <sys/types.h> 25 #include <sys/systm.h> 26 #include <sys/disklabel.h> 27 #include <sys/conf.h> 28 #include <sys/buf.h> 29 #include <sys/fcntl.h> 30 #include <sys/stat.h> 31 #include <uvm/uvm.h> 32 #include <machine/hibernate.h> 33 34 extern char *disk_readlabel(struct disklabel *, dev_t, char *, size_t); 35 36 struct hibernate_zlib_state *hibernate_state; 37 38 /* Temporary vaddr ranges used during hibernate */ 39 vaddr_t hibernate_temp_page; 40 vaddr_t hibernate_copy_page; 41 vaddr_t hibernate_stack_page; 42 vaddr_t hibernate_fchunk_area; 43 vaddr_t hibernate_chunktable_area; 44 vaddr_t hibernate_inflate_page; 45 46 /* Hibernate info as read from disk during resume */ 47 union hibernate_info disk_hiber_info; 48 49 /* 50 * Hib alloc enforced alignment. 51 */ 52 #define HIB_ALIGN 8 /* bytes alignment */ 53 54 /* 55 * sizeof builtin operation, but with alignment constraint. 56 */ 57 #define HIB_SIZEOF(_type) roundup(sizeof(_type), HIB_ALIGN) 58 59 struct hiballoc_entry 60 { 61 size_t hibe_use; 62 size_t hibe_space; 63 RB_ENTRY(hiballoc_entry) hibe_entry; 64 }; 65 66 /* 67 * Compare hiballoc entries based on the address they manage. 68 * 69 * Since the address is fixed, relative to struct hiballoc_entry, 70 * we just compare the hiballoc_entry pointers. 71 */ 72 static __inline int 73 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r) 74 { 75 return l < r ? -1 : (l > r); 76 } 77 78 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp) 79 80 /* 81 * Given a hiballoc entry, return the address it manages. 82 */ 83 static __inline void* 84 hib_entry_to_addr(struct hiballoc_entry *entry) 85 { 86 caddr_t addr; 87 88 addr = (caddr_t)entry; 89 addr += HIB_SIZEOF(struct hiballoc_entry); 90 return addr; 91 } 92 93 /* 94 * Given an address, find the hiballoc that corresponds. 95 */ 96 static __inline struct hiballoc_entry* 97 hib_addr_to_entry(void* addr_param) 98 { 99 caddr_t addr; 100 101 addr = (caddr_t)addr_param; 102 addr -= HIB_SIZEOF(struct hiballoc_entry); 103 return (struct hiballoc_entry*)addr; 104 } 105 106 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp) 107 108 /* 109 * Allocate memory from the arena. 110 * 111 * Returns NULL if no memory is available. 112 */ 113 void* 114 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz) 115 { 116 struct hiballoc_entry *entry, *new_entry; 117 size_t find_sz; 118 119 /* 120 * Enforce alignment of HIB_ALIGN bytes. 121 * 122 * Note that, because the entry is put in front of the allocation, 123 * 0-byte allocations are guaranteed a unique address. 124 */ 125 alloc_sz = roundup(alloc_sz, HIB_ALIGN); 126 127 /* 128 * Find an entry with hibe_space >= find_sz. 129 * 130 * If the root node is not large enough, we switch to tree traversal. 131 * Because all entries are made at the bottom of the free space, 132 * traversal from the end has a slightly better chance of yielding 133 * a sufficiently large space. 134 */ 135 find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry); 136 entry = RB_ROOT(&arena->hib_addrs); 137 if (entry != NULL && entry->hibe_space < find_sz) { 138 RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) { 139 if (entry->hibe_space >= find_sz) 140 break; 141 } 142 } 143 144 /* 145 * Insufficient or too fragmented memory. 146 */ 147 if (entry == NULL) 148 return NULL; 149 150 /* 151 * Create new entry in allocated space. 152 */ 153 new_entry = (struct hiballoc_entry*)( 154 (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use); 155 new_entry->hibe_space = entry->hibe_space - find_sz; 156 new_entry->hibe_use = alloc_sz; 157 158 /* 159 * Insert entry. 160 */ 161 if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL) 162 panic("hib_alloc: insert failure"); 163 entry->hibe_space = 0; 164 165 /* Return address managed by entry. */ 166 return hib_entry_to_addr(new_entry); 167 } 168 169 /* 170 * Free a pointer previously allocated from this arena. 171 * 172 * If addr is NULL, this will be silently accepted. 173 */ 174 void 175 hib_free(struct hiballoc_arena *arena, void *addr) 176 { 177 struct hiballoc_entry *entry, *prev; 178 179 if (addr == NULL) 180 return; 181 182 /* 183 * Derive entry from addr and check it is really in this arena. 184 */ 185 entry = hib_addr_to_entry(addr); 186 if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry) 187 panic("hib_free: freed item %p not in hib arena", addr); 188 189 /* 190 * Give the space in entry to its predecessor. 191 * 192 * If entry has no predecessor, change its used space into free space 193 * instead. 194 */ 195 prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry); 196 if (prev != NULL && 197 (void*)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) + 198 prev->hibe_use + prev->hibe_space) == entry) { 199 /* Merge entry. */ 200 RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry); 201 prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) + 202 entry->hibe_use + entry->hibe_space; 203 } else { 204 /* Flip used memory to free space. */ 205 entry->hibe_space += entry->hibe_use; 206 entry->hibe_use = 0; 207 } 208 } 209 210 /* 211 * Initialize hiballoc. 212 * 213 * The allocator will manage memmory at ptr, which is len bytes. 214 */ 215 int 216 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len) 217 { 218 struct hiballoc_entry *entry; 219 caddr_t ptr; 220 size_t len; 221 222 RB_INIT(&arena->hib_addrs); 223 224 /* 225 * Hib allocator enforces HIB_ALIGN alignment. 226 * Fixup ptr and len. 227 */ 228 ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN); 229 len = p_len - ((size_t)ptr - (size_t)p_ptr); 230 len &= ~((size_t)HIB_ALIGN - 1); 231 232 /* 233 * Insufficient memory to be able to allocate and also do bookkeeping. 234 */ 235 if (len <= HIB_SIZEOF(struct hiballoc_entry)) 236 return ENOMEM; 237 238 /* 239 * Create entry describing space. 240 */ 241 entry = (struct hiballoc_entry*)ptr; 242 entry->hibe_use = 0; 243 entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry); 244 RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry); 245 246 return 0; 247 } 248 249 250 /* 251 * Zero all free memory. 252 */ 253 void 254 uvm_pmr_zero_everything(void) 255 { 256 struct uvm_pmemrange *pmr; 257 struct vm_page *pg; 258 int i; 259 260 uvm_lock_fpageq(); 261 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 262 /* Zero single pages. */ 263 while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY])) 264 != NULL) { 265 uvm_pmr_remove(pmr, pg); 266 uvm_pagezero(pg); 267 atomic_setbits_int(&pg->pg_flags, PG_ZERO); 268 uvmexp.zeropages++; 269 uvm_pmr_insert(pmr, pg, 0); 270 } 271 272 /* Zero multi page ranges. */ 273 while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY])) 274 != NULL) { 275 pg--; /* Size tree always has second page. */ 276 uvm_pmr_remove(pmr, pg); 277 for (i = 0; i < pg->fpgsz; i++) { 278 uvm_pagezero(&pg[i]); 279 atomic_setbits_int(&pg[i].pg_flags, PG_ZERO); 280 uvmexp.zeropages++; 281 } 282 uvm_pmr_insert(pmr, pg, 0); 283 } 284 } 285 uvm_unlock_fpageq(); 286 } 287 288 /* 289 * Mark all memory as dirty. 290 * 291 * Used to inform the system that the clean memory isn't clean for some 292 * reason, for example because we just came back from hibernate. 293 */ 294 void 295 uvm_pmr_dirty_everything(void) 296 { 297 struct uvm_pmemrange *pmr; 298 struct vm_page *pg; 299 int i; 300 301 uvm_lock_fpageq(); 302 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 303 /* Dirty single pages. */ 304 while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO])) 305 != NULL) { 306 uvm_pmr_remove(pmr, pg); 307 atomic_clearbits_int(&pg->pg_flags, PG_ZERO); 308 uvm_pmr_insert(pmr, pg, 0); 309 } 310 311 /* Dirty multi page ranges. */ 312 while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_ZERO])) 313 != NULL) { 314 pg--; /* Size tree always has second page. */ 315 uvm_pmr_remove(pmr, pg); 316 for (i = 0; i < pg->fpgsz; i++) 317 atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO); 318 uvm_pmr_insert(pmr, pg, 0); 319 } 320 } 321 322 uvmexp.zeropages = 0; 323 uvm_unlock_fpageq(); 324 } 325 326 /* 327 * Allocate the highest address that can hold sz. 328 * 329 * sz in bytes. 330 */ 331 int 332 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz) 333 { 334 struct uvm_pmemrange *pmr; 335 struct vm_page *pig_pg, *pg; 336 337 /* 338 * Convert sz to pages, since that is what pmemrange uses internally. 339 */ 340 sz = atop(round_page(sz)); 341 342 uvm_lock_fpageq(); 343 344 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 345 RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) { 346 if (pig_pg->fpgsz >= sz) { 347 goto found; 348 } 349 } 350 } 351 352 /* 353 * Allocation failure. 354 */ 355 uvm_unlock_pageq(); 356 return ENOMEM; 357 358 found: 359 /* Remove page from freelist. */ 360 uvm_pmr_remove_size(pmr, pig_pg); 361 pig_pg->fpgsz -= sz; 362 pg = pig_pg + pig_pg->fpgsz; 363 if (pig_pg->fpgsz == 0) 364 uvm_pmr_remove_addr(pmr, pig_pg); 365 else 366 uvm_pmr_insert_size(pmr, pig_pg); 367 368 uvmexp.free -= sz; 369 *addr = VM_PAGE_TO_PHYS(pg); 370 371 /* 372 * Update pg flags. 373 * 374 * Note that we trash the sz argument now. 375 */ 376 while (sz > 0) { 377 KASSERT(pg->pg_flags & PQ_FREE); 378 379 atomic_clearbits_int(&pg->pg_flags, 380 PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3); 381 382 if (pg->pg_flags & PG_ZERO) 383 uvmexp.zeropages -= sz; 384 atomic_clearbits_int(&pg->pg_flags, 385 PG_ZERO|PQ_FREE); 386 387 pg->uobject = NULL; 388 pg->uanon = NULL; 389 pg->pg_version++; 390 391 /* 392 * Next. 393 */ 394 pg++; 395 sz--; 396 } 397 398 /* Return. */ 399 uvm_unlock_fpageq(); 400 return 0; 401 } 402 403 /* 404 * Allocate a piglet area. 405 * 406 * This is as low as possible. 407 * Piglets are aligned. 408 * 409 * sz and align in bytes. 410 * 411 * The call will sleep for the pagedaemon to attempt to free memory. 412 * The pagedaemon may decide its not possible to free enough memory, causing 413 * the allocation to fail. 414 */ 415 int 416 uvm_pmr_alloc_piglet(vaddr_t *va, paddr_t *pa, vsize_t sz, paddr_t align) 417 { 418 paddr_t pg_addr, piglet_addr; 419 struct uvm_pmemrange *pmr; 420 struct vm_page *pig_pg, *pg; 421 struct pglist pageq; 422 int pdaemon_woken; 423 vaddr_t piglet_va; 424 425 KASSERT((align & (align - 1)) == 0); 426 pdaemon_woken = 0; /* Didn't wake the pagedaemon. */ 427 428 /* 429 * Fixup arguments: align must be at least PAGE_SIZE, 430 * sz will be converted to pagecount, since that is what 431 * pmemrange uses internally. 432 */ 433 if (align < PAGE_SIZE) 434 align = PAGE_SIZE; 435 sz = round_page(sz); 436 437 uvm_lock_fpageq(); 438 439 TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use, 440 pmr_use) { 441 retry: 442 /* 443 * Search for a range with enough space. 444 * Use the address tree, to ensure the range is as low as 445 * possible. 446 */ 447 RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) { 448 pg_addr = VM_PAGE_TO_PHYS(pig_pg); 449 piglet_addr = (pg_addr + (align - 1)) & ~(align - 1); 450 451 if (atop(pg_addr) + pig_pg->fpgsz >= 452 atop(piglet_addr) + atop(sz)) { 453 goto found; 454 } 455 } 456 } 457 458 /* 459 * Try to coerse the pagedaemon into freeing memory 460 * for the piglet. 461 * 462 * pdaemon_woken is set to prevent the code from 463 * falling into an endless loop. 464 */ 465 if (!pdaemon_woken) { 466 pdaemon_woken = 1; 467 if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1, 468 sz, UVM_PLA_FAILOK) == 0) 469 goto retry; 470 } 471 472 /* Return failure. */ 473 uvm_unlock_fpageq(); 474 return ENOMEM; 475 476 found: 477 /* 478 * Extract piglet from pigpen. 479 */ 480 TAILQ_INIT(&pageq); 481 uvm_pmr_extract_range(pmr, pig_pg, 482 atop(piglet_addr), atop(piglet_addr) + atop(sz), &pageq); 483 484 *pa = piglet_addr; 485 uvmexp.free -= atop(sz); 486 487 /* 488 * Update pg flags. 489 * 490 * Note that we trash the sz argument now. 491 */ 492 TAILQ_FOREACH(pg, &pageq, pageq) { 493 KASSERT(pg->pg_flags & PQ_FREE); 494 495 atomic_clearbits_int(&pg->pg_flags, 496 PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3); 497 498 if (pg->pg_flags & PG_ZERO) 499 uvmexp.zeropages--; 500 atomic_clearbits_int(&pg->pg_flags, 501 PG_ZERO|PQ_FREE); 502 503 pg->uobject = NULL; 504 pg->uanon = NULL; 505 pg->pg_version++; 506 } 507 508 uvm_unlock_fpageq(); 509 510 511 /* 512 * Now allocate a va. 513 * Use direct mappings for the pages. 514 */ 515 516 piglet_va = *va = (vaddr_t)km_alloc(sz, &kv_any, &kp_none, &kd_waitok); 517 if (!piglet_va) { 518 uvm_pglistfree(&pageq); 519 return ENOMEM; 520 } 521 522 /* 523 * Map piglet to va. 524 */ 525 TAILQ_FOREACH(pg, &pageq, pageq) { 526 pmap_kenter_pa(piglet_va, VM_PAGE_TO_PHYS(pg), UVM_PROT_RW); 527 piglet_va += PAGE_SIZE; 528 } 529 pmap_update(pmap_kernel()); 530 531 return 0; 532 } 533 534 /* 535 * Free a piglet area. 536 */ 537 void 538 uvm_pmr_free_piglet(vaddr_t va, vsize_t sz) 539 { 540 paddr_t pa; 541 struct vm_page *pg; 542 543 /* 544 * Fix parameters. 545 */ 546 sz = round_page(sz); 547 548 /* 549 * Find the first page in piglet. 550 * Since piglets are contiguous, the first pg is all we need. 551 */ 552 if (!pmap_extract(pmap_kernel(), va, &pa)) 553 panic("uvm_pmr_free_piglet: piglet 0x%lx has no pages", va); 554 pg = PHYS_TO_VM_PAGE(pa); 555 if (pg == NULL) 556 panic("uvm_pmr_free_piglet: unmanaged page 0x%lx", pa); 557 558 /* 559 * Unmap. 560 */ 561 pmap_kremove(va, sz); 562 pmap_update(pmap_kernel()); 563 564 /* 565 * Free the physical and virtual memory. 566 */ 567 uvm_pmr_freepages(pg, atop(sz)); 568 km_free((void*)va, sz, &kv_any, &kp_none); 569 } 570 571 /* 572 * Physmem RLE compression support. 573 * 574 * Given a physical page address, it will return the number of pages 575 * starting at the address, that are free. 576 * Returns 0 if the page at addr is not free. 577 */ 578 psize_t 579 uvm_page_rle(paddr_t addr) 580 { 581 struct vm_page *pg, *pg_end; 582 struct vm_physseg *vmp; 583 int pseg_idx, off_idx; 584 585 pseg_idx = vm_physseg_find(atop(addr), &off_idx); 586 if (pseg_idx == -1) 587 return 0; 588 589 vmp = &vm_physmem[pseg_idx]; 590 pg = &vmp->pgs[off_idx]; 591 if (!(pg->pg_flags & PQ_FREE)) 592 return 0; 593 594 /* 595 * Search for the first non-free page after pg. 596 * Note that the page may not be the first page in a free pmemrange, 597 * therefore pg->fpgsz cannot be used. 598 */ 599 for (pg_end = pg; pg_end <= vmp->lastpg && 600 (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++); 601 return pg_end - pg; 602 } 603 604 /* 605 * Fills out the hibernate_info union pointed to by hiber_info 606 * with information about this machine (swap signature block 607 * offsets, number of memory ranges, kernel in use, etc) 608 * 609 */ 610 int 611 get_hibernate_info(union hibernate_info *hiber_info, int suspend) 612 { 613 int chunktable_size; 614 struct disklabel dl; 615 char err_string[128], *dl_ret; 616 617 /* Determine I/O function to use */ 618 hiber_info->io_func = get_hibernate_io_function(); 619 if (hiber_info->io_func == NULL) 620 return (1); 621 622 /* Calculate hibernate device */ 623 hiber_info->device = swdevt[0].sw_dev; 624 625 /* Read disklabel (used to calculate signature and image offsets) */ 626 dl_ret = disk_readlabel(&dl, hiber_info->device, err_string, 128); 627 628 if (dl_ret) { 629 printf("Hibernate error reading disklabel: %s\n", dl_ret); 630 return (1); 631 } 632 633 hiber_info->secsize = dl.d_secsize; 634 635 /* Make sure the signature can fit in one block */ 636 KASSERT(sizeof(union hibernate_info)/hiber_info->secsize == 1); 637 638 /* Calculate swap offset from start of disk */ 639 hiber_info->swap_offset = dl.d_partitions[1].p_offset; 640 641 /* Calculate signature block location */ 642 hiber_info->sig_offset = dl.d_partitions[1].p_offset + 643 dl.d_partitions[1].p_size - 644 sizeof(union hibernate_info)/hiber_info->secsize; 645 646 chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize; 647 648 /* Stash kernel version information */ 649 bzero(&hiber_info->kernel_version, 128); 650 bcopy(version, &hiber_info->kernel_version, 651 min(strlen(version), sizeof(hiber_info->kernel_version)-1)); 652 653 if (suspend) { 654 /* Allocate piglet region */ 655 if (uvm_pmr_alloc_piglet(&hiber_info->piglet_va, 656 &hiber_info->piglet_pa, 657 HIBERNATE_CHUNK_SIZE*3, 658 HIBERNATE_CHUNK_SIZE)) { 659 printf("Hibernate failed to allocate the piglet\n"); 660 return (1); 661 } 662 } 663 664 if (get_hibernate_info_md(hiber_info)) 665 return (1); 666 667 /* Calculate memory image location */ 668 hiber_info->image_offset = dl.d_partitions[1].p_offset + 669 dl.d_partitions[1].p_size - 670 (hiber_info->image_size / hiber_info->secsize) - 671 sizeof(union hibernate_info)/hiber_info->secsize - 672 chunktable_size; 673 674 return (0); 675 } 676 677 /* 678 * Allocate nitems*size bytes from the hiballoc area presently in use 679 */ 680 void 681 *hibernate_zlib_alloc(void *unused, int nitems, int size) 682 { 683 return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size); 684 } 685 686 /* 687 * Free the memory pointed to by addr in the hiballoc area presently in 688 * use 689 */ 690 void 691 hibernate_zlib_free(void *unused, void *addr) 692 { 693 hib_free(&hibernate_state->hiballoc_arena, addr); 694 } 695 696 /* 697 * Inflate size bytes from src into dest, skipping any pages in 698 * [src..dest] that are special (see hibernate_inflate_skip) 699 * 700 * For each page of output data, we map HIBERNATE_TEMP_PAGE 701 * to the current output page, and tell inflate() to inflate 702 * its data there, resulting in the inflated data being placed 703 * at the proper paddr. 704 * 705 * This function executes while using the resume-time stack 706 * and pmap, and therefore cannot use ddb/printf/etc. Doing so 707 * will likely hang or reset the machine. 708 */ 709 void 710 hibernate_inflate(union hibernate_info *hiber_info, 711 paddr_t dest, paddr_t src, size_t size) 712 { 713 int i; 714 715 hibernate_state->hib_stream.avail_in = size; 716 hibernate_state->hib_stream.next_in = (char *)src; 717 718 hibernate_inflate_page = hiber_info->piglet_va + 2 * PAGE_SIZE; 719 720 do { 721 /* Flush cache and TLB */ 722 hibernate_flush(); 723 724 /* 725 * Is this a special page? If yes, redirect the 726 * inflate output to a scratch page (eg, discard it) 727 */ 728 if (hibernate_inflate_skip(hiber_info, dest)) 729 hibernate_enter_resume_mapping( 730 hibernate_inflate_page, 731 hiber_info->piglet_pa + 2 * PAGE_SIZE, 732 0); 733 else 734 hibernate_enter_resume_mapping( 735 hibernate_inflate_page, 736 dest, 0); 737 738 /* Set up the stream for inflate */ 739 hibernate_state->hib_stream.avail_out = PAGE_SIZE; 740 hibernate_state->hib_stream.next_out = 741 (char *)hiber_info->piglet_va + 2 * PAGE_SIZE; 742 743 /* Process next block of data */ 744 i = inflate(&hibernate_state->hib_stream, Z_PARTIAL_FLUSH); 745 if (i != Z_OK && i != Z_STREAM_END) { 746 /* 747 * XXX - this will likely reboot/hang most machines, 748 * but there's not much else we can do here. 749 */ 750 panic("inflate error"); 751 } 752 753 dest += PAGE_SIZE - hibernate_state->hib_stream.avail_out; 754 } while (i != Z_STREAM_END); 755 } 756 757 /* 758 * deflate from src into the I/O page, up to 'remaining' bytes 759 * 760 * Returns number of input bytes consumed, and may reset 761 * the 'remaining' parameter if not all the output space was consumed 762 * (this information is needed to know how much to write to disk 763 */ 764 size_t 765 hibernate_deflate(union hibernate_info *hiber_info, paddr_t src, 766 size_t *remaining) 767 { 768 vaddr_t hibernate_io_page = hiber_info->piglet_va + PAGE_SIZE; 769 770 /* Set up the stream for deflate */ 771 hibernate_state->hib_stream.avail_in = PAGE_SIZE - 772 (src & PAGE_MASK); 773 hibernate_state->hib_stream.avail_out = *remaining; 774 hibernate_state->hib_stream.next_in = (caddr_t)src; 775 hibernate_state->hib_stream.next_out = (caddr_t)hibernate_io_page + 776 (PAGE_SIZE - *remaining); 777 778 /* Process next block of data */ 779 if (deflate(&hibernate_state->hib_stream, Z_PARTIAL_FLUSH) != Z_OK) 780 panic("hibernate zlib deflate error\n"); 781 782 /* Update pointers and return number of bytes consumed */ 783 *remaining = hibernate_state->hib_stream.avail_out; 784 return (PAGE_SIZE - (src & PAGE_MASK)) - 785 hibernate_state->hib_stream.avail_in; 786 } 787 788 /* 789 * Write the hibernation information specified in hiber_info 790 * to the location in swap previously calculated (last block of 791 * swap), called the "signature block". 792 * 793 * Write the memory chunk table to the area in swap immediately 794 * preceding the signature block. 795 */ 796 int 797 hibernate_write_signature(union hibernate_info *hiber_info) 798 { 799 u_int8_t *io_page; 800 int result = 0; 801 802 io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 803 if (!io_page) 804 return (1); 805 806 /* Write hibernate info to disk */ 807 if (hiber_info->io_func(hiber_info->device, hiber_info->sig_offset, 808 (vaddr_t)hiber_info, hiber_info->secsize, 1, io_page)) { 809 result = 1; 810 } 811 812 free(io_page, M_DEVBUF); 813 return (result); 814 } 815 816 /* 817 * Write the memory chunk table to the area in swap immediately 818 * preceding the signature block. The chunk table is stored 819 * in the piglet when this function is called. 820 */ 821 int 822 hibernate_write_chunktable(union hibernate_info *hiber_info) 823 { 824 u_int8_t *io_page; 825 int i; 826 daddr_t chunkbase; 827 vaddr_t hibernate_chunk_table_start; 828 size_t hibernate_chunk_table_size; 829 struct hibernate_disk_chunk *chunks; 830 831 io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 832 if (!io_page) 833 return (1); 834 835 hibernate_chunk_table_size = HIBERNATE_CHUNK_TABLE_SIZE; 836 837 chunkbase = hiber_info->sig_offset - 838 (hibernate_chunk_table_size / hiber_info->secsize); 839 840 hibernate_chunk_table_start = hiber_info->piglet_va + 841 HIBERNATE_CHUNK_SIZE; 842 843 chunks = (struct hibernate_disk_chunk *)(hiber_info->piglet_va + 844 HIBERNATE_CHUNK_SIZE); 845 846 /* Write chunk table */ 847 for(i=0; i < hibernate_chunk_table_size; i += MAXPHYS) { 848 if(hiber_info->io_func(hiber_info->device, 849 chunkbase + (i/hiber_info->secsize), 850 (vaddr_t)(hibernate_chunk_table_start + i), 851 MAXPHYS, 852 1, 853 io_page)) { 854 free(io_page, M_DEVBUF); 855 return (1); 856 } 857 } 858 859 free(io_page, M_DEVBUF); 860 861 return (0); 862 } 863 864 /* 865 * Write an empty hiber_info to the swap signature block, which is 866 * guaranteed to not match any valid hiber_info. 867 */ 868 int 869 hibernate_clear_signature() 870 { 871 union hibernate_info blank_hiber_info; 872 union hibernate_info hiber_info; 873 u_int8_t *io_page; 874 875 /* Zero out a blank hiber_info */ 876 bzero(&blank_hiber_info, sizeof(hiber_info)); 877 878 if (get_hibernate_info(&hiber_info, 0)) 879 return (1); 880 881 io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 882 if (!io_page) 883 return (1); 884 885 /* Write (zeroed) hibernate info to disk */ 886 /* XXX - use regular kernel write routine for this */ 887 if(hiber_info.io_func(hiber_info.device, hiber_info.sig_offset, 888 (vaddr_t)&blank_hiber_info, hiber_info.secsize, 1, io_page)) 889 panic("error hibernate write 6\n"); 890 891 free(io_page, M_DEVBUF); 892 893 return (0); 894 } 895 896 /* 897 * Check chunk range overlap when calculating whether or not to copy a 898 * compressed chunk to the piglet area before decompressing. 899 * 900 * returns zero if the ranges do not overlap, non-zero otherwise. 901 */ 902 int 903 hibernate_check_overlap(paddr_t r1s, paddr_t r1e, paddr_t r2s, paddr_t r2e) 904 { 905 /* case A : end of r1 overlaps start of r2 */ 906 if (r1s < r2s && r1e > r2s) 907 return (1); 908 909 /* case B : r1 entirely inside r2 */ 910 if (r1s >= r2s && r1e <= r2e) 911 return (1); 912 913 /* case C : r2 entirely inside r1 */ 914 if (r2s >= r1s && r2e <= r1e) 915 return (1); 916 917 /* case D : end of r2 overlaps start of r1 */ 918 if (r2s < r1s && r2e > r1s) 919 return (1); 920 921 return (0); 922 } 923 924 /* 925 * Compare two hibernate_infos to determine if they are the same (eg, 926 * we should be performing a hibernate resume on this machine. 927 * Not all fields are checked - just enough to verify that the machine 928 * has the same memory configuration and kernel as the one that 929 * wrote the signature previously. 930 */ 931 int 932 hibernate_compare_signature(union hibernate_info *mine, 933 union hibernate_info *disk) 934 { 935 u_int i; 936 937 if (mine->nranges != disk->nranges) 938 return (1); 939 940 if (strcmp(mine->kernel_version, disk->kernel_version) != 0) 941 return (1); 942 943 for (i=0; i< mine->nranges; i++) { 944 if ((mine->ranges[i].base != disk->ranges[i].base) || 945 (mine->ranges[i].end != disk->ranges[i].end) ) 946 return (1); 947 } 948 949 return (0); 950 } 951 952 /* 953 * Reads read_size bytes from the hibernate device specified in 954 * hib_info at offset blkctr. Output is placed into the vaddr specified 955 * at dest. 956 * 957 * Separate offsets and pages are used to handle misaligned reads (reads 958 * that span a page boundary). 959 * 960 * blkctr specifies a relative offset (relative to the start of swap), 961 * not an absolute disk offset 962 * 963 */ 964 int 965 hibernate_read_block(union hibernate_info *hib_info, daddr_t blkctr, 966 size_t read_size, vaddr_t dest) 967 { 968 struct buf *bp; 969 struct bdevsw *bdsw; 970 int error; 971 972 bp = geteblk(read_size); 973 bdsw = &bdevsw[major(hib_info->device)]; 974 975 error = (*bdsw->d_open)(hib_info->device, FREAD, S_IFCHR, curproc); 976 if (error) { 977 printf("hibernate_read_block open failed\n"); 978 return (1); 979 } 980 981 bp->b_bcount = read_size; 982 bp->b_blkno = blkctr; 983 CLR(bp->b_flags, B_READ | B_WRITE | B_DONE); 984 SET(bp->b_flags, B_BUSY | B_READ | B_RAW); 985 bp->b_dev = hib_info->device; 986 bp->b_cylinder = 0; 987 (*bdsw->d_strategy)(bp); 988 989 error = biowait(bp); 990 if (error) { 991 printf("hibernate_read_block biowait failed %d\n", error); 992 error = (*bdsw->d_close)(hib_info->device, 0, S_IFCHR, 993 curproc); 994 if (error) 995 printf("hibernate_read_block error close failed\n"); 996 return (1); 997 } 998 999 error = (*bdsw->d_close)(hib_info->device, FREAD, S_IFCHR, curproc); 1000 if (error) { 1001 printf("hibernate_read_block close failed\n"); 1002 return (1); 1003 } 1004 1005 bcopy(bp->b_data, (caddr_t)dest, read_size); 1006 1007 bp->b_flags |= B_INVAL; 1008 brelse(bp); 1009 1010 return (0); 1011 } 1012 1013 /* 1014 * Reads the signature block from swap, checks against the current machine's 1015 * information. If the information matches, perform a resume by reading the 1016 * saved image into the pig area, and unpacking. 1017 */ 1018 void 1019 hibernate_resume() 1020 { 1021 union hibernate_info hiber_info; 1022 u_int8_t *io_page; 1023 int s; 1024 1025 /* Scrub temporary vaddr ranges used during resume */ 1026 hibernate_temp_page = (vaddr_t)NULL; 1027 hibernate_fchunk_area = (vaddr_t)NULL; 1028 hibernate_chunktable_area = (vaddr_t)NULL; 1029 hibernate_stack_page = (vaddr_t)NULL; 1030 1031 /* Get current running machine's hibernate info */ 1032 bzero(&hiber_info, sizeof(hiber_info)); 1033 if (get_hibernate_info(&hiber_info, 0)) 1034 return; 1035 1036 io_page = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 1037 if (!io_page) 1038 return; 1039 1040 /* Read hibernate info from disk */ 1041 s = splbio(); 1042 1043 /* XXX use regular kernel read routine here */ 1044 if(hiber_info.io_func(hiber_info.device, hiber_info.sig_offset, 1045 (vaddr_t)&disk_hiber_info, 1046 hiber_info.secsize, 0, io_page)) 1047 panic("error in hibernate read\n"); 1048 1049 free(io_page, M_DEVBUF); 1050 1051 /* 1052 * If on-disk and in-memory hibernate signatures match, 1053 * this means we should do a resume from hibernate. 1054 */ 1055 if (hibernate_compare_signature(&hiber_info, 1056 &disk_hiber_info)) 1057 return; 1058 1059 /* 1060 * Allocate several regions of vaddrs for use during read. 1061 * These mappings go into the resuming kernel's page table, and are 1062 * used only during image read. 1063 */ 1064 hibernate_temp_page = (vaddr_t)km_alloc(2*PAGE_SIZE, &kv_any, 1065 &kp_none, &kd_nowait); 1066 if (!hibernate_temp_page) 1067 goto fail; 1068 1069 hibernate_fchunk_area = (vaddr_t)km_alloc(3*PAGE_SIZE, &kv_any, 1070 &kp_none, &kd_nowait); 1071 if (!hibernate_fchunk_area) 1072 goto fail; 1073 1074 /* Allocate a temporary chunktable area */ 1075 hibernate_chunktable_area = (vaddr_t)malloc(HIBERNATE_CHUNK_TABLE_SIZE, 1076 M_DEVBUF, M_NOWAIT); 1077 if (!hibernate_chunktable_area) 1078 goto fail; 1079 1080 /* Allocate one temporary page of VAs for the resume time stack */ 1081 hibernate_stack_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, 1082 &kp_none, &kd_nowait); 1083 if (!hibernate_stack_page) 1084 goto fail; 1085 1086 /* Read the image from disk into the image (pig) area */ 1087 if (hibernate_read_image(&disk_hiber_info)) 1088 goto fail; 1089 1090 /* Point of no return ... */ 1091 1092 disable_intr(); 1093 cold = 1; 1094 1095 /* Switch stacks */ 1096 hibernate_switch_stack_machdep(); 1097 1098 /* 1099 * Image is now in high memory (pig area), copy to correct location 1100 * in memory. We'll eventually end up copying on top of ourself, but 1101 * we are assured the kernel code here is the same between the 1102 * hibernated and resuming kernel, and we are running on our own 1103 * stack, so the overwrite is ok. 1104 */ 1105 hibernate_unpack_image(&disk_hiber_info); 1106 1107 /* 1108 * Resume the loaded kernel by jumping to the MD resume vector. 1109 * We won't be returning from this call. 1110 */ 1111 hibernate_resume_machdep(); 1112 1113 fail: 1114 printf("Unable to resume hibernated image\n"); 1115 1116 if (hibernate_temp_page) 1117 km_free((void *)hibernate_temp_page, 2*PAGE_SIZE, &kv_any, 1118 &kp_none); 1119 1120 if (hibernate_fchunk_area) 1121 km_free((void *)hibernate_fchunk_area, 3*PAGE_SIZE, &kv_any, 1122 &kp_none); 1123 1124 if (io_page) 1125 free((void *)io_page, M_DEVBUF); 1126 1127 if (hibernate_chunktable_area) 1128 free((void *)hibernate_chunktable_area, M_DEVBUF); 1129 } 1130 1131 /* 1132 * Unpack image from pig area to original location by looping through the 1133 * list of output chunks in the order they should be restored (fchunks). 1134 * This ordering is used to avoid having inflate overwrite a chunk in the 1135 * middle of processing that chunk. This will, of course, happen during the 1136 * final output chunk, where we copy the chunk to the piglet area first, 1137 * before inflating. 1138 */ 1139 void 1140 hibernate_unpack_image(union hibernate_info *hiber_info) 1141 { 1142 int i; 1143 paddr_t image_cur; 1144 vaddr_t tempva; 1145 struct hibernate_disk_chunk *chunks; 1146 char *pva; 1147 int *fchunks; 1148 1149 pva = (char *)hiber_info->piglet_va; 1150 1151 fchunks = (int *)(pva + (4 * PAGE_SIZE)); 1152 1153 /* Copy temporary chunktable to piglet */ 1154 tempva = (vaddr_t)km_alloc(HIBERNATE_CHUNK_TABLE_SIZE, &kv_any, 1155 &kp_none, &kd_nowait); 1156 for (i=0; i<HIBERNATE_CHUNK_TABLE_SIZE; i += PAGE_SIZE) 1157 pmap_kenter_pa(tempva + i, hiber_info->piglet_pa + 1158 HIBERNATE_CHUNK_SIZE + i, VM_PROT_ALL); 1159 1160 bcopy((caddr_t)hibernate_chunktable_area, (caddr_t)tempva, 1161 HIBERNATE_CHUNK_TABLE_SIZE); 1162 1163 chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE); 1164 1165 hibernate_activate_resume_pt_machdep(); 1166 1167 for (i=0; i<hiber_info->chunk_ctr; i++) { 1168 /* Reset zlib for inflate */ 1169 if (hibernate_zlib_reset(hiber_info, 0) != Z_OK) 1170 panic("hibernate failed to reset zlib for inflate\n"); 1171 1172 /* 1173 * If there is a conflict, copy the chunk to the piglet area 1174 * before unpacking it to its original location. 1175 */ 1176 if((chunks[fchunks[i]].flags & HIBERNATE_CHUNK_CONFLICT) == 0) 1177 hibernate_inflate(hiber_info, 1178 chunks[fchunks[i]].base, image_cur, 1179 chunks[fchunks[i]].compressed_size); 1180 else { 1181 bcopy((caddr_t)image_cur, 1182 (caddr_t)hiber_info->piglet_va + 1183 HIBERNATE_CHUNK_SIZE * 2, 1184 chunks[fchunks[i]].compressed_size); 1185 hibernate_inflate(hiber_info, 1186 chunks[fchunks[i]].base, 1187 hiber_info->piglet_va + 1188 HIBERNATE_CHUNK_SIZE * 2, 1189 chunks[fchunks[i]].compressed_size); 1190 } 1191 image_cur += chunks[fchunks[i]].compressed_size; 1192 } 1193 } 1194 1195 /* 1196 * Write a compressed version of this machine's memory to disk, at the 1197 * precalculated swap offset: 1198 * 1199 * end of swap - signature block size - chunk table size - memory size 1200 * 1201 * The function begins by looping through each phys mem range, cutting each 1202 * one into 4MB chunks. These chunks are then compressed individually 1203 * and written out to disk, in phys mem order. Some chunks might compress 1204 * more than others, and for this reason, each chunk's size is recorded 1205 * in the chunk table, which is written to disk after the image has 1206 * properly been compressed and written (in hibernate_write_chunktable). 1207 * 1208 * When this function is called, the machine is nearly suspended - most 1209 * devices are quiesced/suspended, interrupts are off, and cold has 1210 * been set. This means that there can be no side effects once the 1211 * write has started, and the write function itself can also have no 1212 * side effects. 1213 * 1214 * This function uses the piglet area during this process as follows: 1215 * 1216 * offset from piglet base use 1217 * ----------------------- -------------------- 1218 * 0 i/o allocation area 1219 * PAGE_SIZE i/o write area 1220 * 2*PAGE_SIZE temp/scratch page 1221 * 3*PAGE_SIZE temp/scratch page 1222 * 4*PAGE_SIZE hiballoc arena 1223 * 5*PAGE_SIZE to 85*PAGE_SIZE zlib deflate area 1224 * ... 1225 * HIBERNATE_CHUNK_SIZE chunk table temporary area 1226 * 1227 * Some transient piglet content is saved as part of deflate, 1228 * but it is irrelevant during resume as it will be repurposed 1229 * at that time for other things. 1230 */ 1231 int 1232 hibernate_write_chunks(union hibernate_info *hiber_info) 1233 { 1234 paddr_t range_base, range_end, inaddr, temp_inaddr; 1235 daddr_t blkctr; 1236 int i; 1237 size_t nblocks, out_remaining, used, offset; 1238 struct hibernate_disk_chunk *chunks; 1239 vaddr_t hibernate_alloc_page = hiber_info->piglet_va; 1240 vaddr_t hibernate_io_page = hiber_info->piglet_va + PAGE_SIZE; 1241 1242 blkctr = hiber_info->image_offset; 1243 hiber_info->chunk_ctr = 0; 1244 offset = 0; 1245 1246 /* 1247 * Allocate VA for the temp and copy page. 1248 */ 1249 1250 hibernate_temp_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, 1251 &kp_none, &kd_nowait); 1252 if (!hibernate_temp_page) 1253 return (1); 1254 1255 hibernate_copy_page = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, 1256 &kp_none, &kd_nowait); 1257 if (!hibernate_copy_page) 1258 return (1); 1259 1260 pmap_kenter_pa(hibernate_copy_page, 1261 (hiber_info->piglet_pa + 3*PAGE_SIZE), 1262 VM_PROT_ALL); 1263 1264 /* XXX - needed on i386. check other archs */ 1265 pmap_activate(curproc); 1266 1267 chunks = (struct hibernate_disk_chunk *)(hiber_info->piglet_va + 1268 HIBERNATE_CHUNK_SIZE); 1269 1270 /* Calculate the chunk regions */ 1271 for (i=0; i < hiber_info->nranges; i++) { 1272 range_base = hiber_info->ranges[i].base; 1273 range_end = hiber_info->ranges[i].end; 1274 1275 inaddr = range_base; 1276 1277 while (inaddr < range_end) { 1278 chunks[hiber_info->chunk_ctr].base = inaddr; 1279 if (inaddr + HIBERNATE_CHUNK_SIZE < range_end) 1280 chunks[hiber_info->chunk_ctr].end = inaddr + 1281 HIBERNATE_CHUNK_SIZE; 1282 else 1283 chunks[hiber_info->chunk_ctr].end = range_end; 1284 1285 inaddr += HIBERNATE_CHUNK_SIZE; 1286 hiber_info->chunk_ctr ++; 1287 } 1288 } 1289 1290 /* Compress and write the chunks in the chunktable */ 1291 for (i=0; i < hiber_info->chunk_ctr; i++) { 1292 range_base = chunks[i].base; 1293 range_end = chunks[i].end; 1294 1295 chunks[i].offset = blkctr; 1296 1297 /* Reset zlib for deflate */ 1298 if (hibernate_zlib_reset(hiber_info, 1) != Z_OK) 1299 return (1); 1300 1301 inaddr = range_base; 1302 1303 /* 1304 * For each range, loop through its phys mem region 1305 * and write out the chunks (the last chunk might be 1306 * smaller than the chunk size). 1307 */ 1308 while (inaddr < range_end) { 1309 out_remaining = PAGE_SIZE; 1310 while (out_remaining > 0 && inaddr < range_end) { 1311 pmap_kenter_pa(hibernate_temp_page, 1312 inaddr & PMAP_PA_MASK, VM_PROT_ALL); 1313 pmap_activate(curproc); 1314 1315 bcopy((caddr_t)hibernate_temp_page, 1316 (caddr_t)hibernate_copy_page, PAGE_SIZE); 1317 1318 /* Adjust for non page-sized regions */ 1319 temp_inaddr = (inaddr & PAGE_MASK) + 1320 hibernate_copy_page; 1321 1322 /* Deflate from temp_inaddr to IO page */ 1323 inaddr += hibernate_deflate(hiber_info, 1324 temp_inaddr, 1325 &out_remaining); 1326 } 1327 1328 if (out_remaining == 0) { 1329 /* Filled up the page */ 1330 nblocks = PAGE_SIZE / hiber_info->secsize; 1331 1332 if(hiber_info->io_func(hiber_info->device, blkctr, 1333 (vaddr_t)hibernate_io_page, PAGE_SIZE, 1334 1, (void *)hibernate_alloc_page)) 1335 return (1); 1336 1337 blkctr += nblocks; 1338 } 1339 1340 } 1341 1342 if (inaddr != range_end) 1343 return (1); 1344 1345 /* 1346 * End of range. Round up to next secsize bytes 1347 * after finishing compress 1348 */ 1349 if (out_remaining == 0) 1350 out_remaining = PAGE_SIZE; 1351 1352 /* Finish compress */ 1353 hibernate_state->hib_stream.avail_in = 0; 1354 hibernate_state->hib_stream.avail_out = out_remaining; 1355 hibernate_state->hib_stream.next_in = (caddr_t)inaddr; 1356 hibernate_state->hib_stream.next_out = 1357 (caddr_t)hibernate_io_page + (PAGE_SIZE - out_remaining); 1358 1359 if (deflate(&hibernate_state->hib_stream, Z_FINISH) != 1360 Z_STREAM_END) 1361 return (1); 1362 1363 out_remaining = hibernate_state->hib_stream.avail_out; 1364 1365 used = PAGE_SIZE - out_remaining; 1366 nblocks = used / hiber_info->secsize; 1367 1368 /* Round up to next block if needed */ 1369 if (used % hiber_info->secsize != 0) 1370 nblocks ++; 1371 1372 /* Write final block(s) for this chunk */ 1373 if( hiber_info->io_func(hiber_info->device, blkctr, 1374 (vaddr_t)hibernate_io_page, nblocks*hiber_info->secsize, 1375 1, (void *)hibernate_alloc_page)) 1376 return (1); 1377 1378 blkctr += nblocks; 1379 1380 offset = blkctr; 1381 chunks[i].compressed_size= 1382 (offset-chunks[i].offset)*hiber_info->secsize; 1383 1384 } 1385 1386 return (0); 1387 } 1388 1389 /* 1390 * Reset the zlib stream state and allocate a new hiballoc area for either 1391 * inflate or deflate. This function is called once for each hibernate chunk. 1392 * Calling hiballoc_init multiple times is acceptable since the memory it is 1393 * provided is unmanaged memory (stolen). We use the memory provided to us 1394 * by the piglet allocated via the supplied hiber_info. 1395 */ 1396 int 1397 hibernate_zlib_reset(union hibernate_info *hiber_info, int deflate) 1398 { 1399 vaddr_t hibernate_zlib_start; 1400 size_t hibernate_zlib_size; 1401 1402 hibernate_state = (struct hibernate_zlib_state *)hiber_info->piglet_va + 1403 (4 * PAGE_SIZE); 1404 1405 hibernate_zlib_start = hiber_info->piglet_va + (5 * PAGE_SIZE); 1406 hibernate_zlib_size = 80 * PAGE_SIZE; 1407 1408 bzero((caddr_t)hibernate_zlib_start, hibernate_zlib_size); 1409 bzero((caddr_t)hibernate_state, PAGE_SIZE); 1410 1411 /* Set up stream structure */ 1412 hibernate_state->hib_stream.zalloc = (alloc_func)hibernate_zlib_alloc; 1413 hibernate_state->hib_stream.zfree = (free_func)hibernate_zlib_free; 1414 1415 /* Initialize the hiballoc arena for zlib allocs/frees */ 1416 hiballoc_init(&hibernate_state->hiballoc_arena, 1417 (caddr_t)hibernate_zlib_start, hibernate_zlib_size); 1418 1419 if (deflate) { 1420 return deflateInit(&hibernate_state->hib_stream, 1421 Z_DEFAULT_COMPRESSION); 1422 } 1423 else 1424 return inflateInit(&hibernate_state->hib_stream); 1425 } 1426 1427 /* 1428 * Reads the hibernated memory image from disk, whose location and 1429 * size are recorded in hiber_info. Begin by reading the persisted 1430 * chunk table, which records the original chunk placement location 1431 * and compressed size for each. Next, allocate a pig region of 1432 * sufficient size to hold the compressed image. Next, read the 1433 * chunks into the pig area (calling hibernate_read_chunks to do this), 1434 * and finally, if all of the above succeeds, clear the hibernate signature. 1435 * The function will then return to hibernate_resume, which will proceed 1436 * to unpack the pig image to the correct place in memory. 1437 */ 1438 int 1439 hibernate_read_image(union hibernate_info *hiber_info) 1440 { 1441 int i; 1442 paddr_t image_start, image_end, pig_start, pig_end; 1443 daddr_t blkctr; 1444 struct hibernate_disk_chunk *chunks; 1445 size_t compressed_size, disk_size, chunktable_size, pig_sz; 1446 1447 /* Calculate total chunk table size in disk blocks */ 1448 chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize; 1449 1450 blkctr = hiber_info->sig_offset - chunktable_size - 1451 hiber_info->swap_offset; 1452 1453 for(i=0; i < HIBERNATE_CHUNK_TABLE_SIZE; 1454 i += MAXPHYS, blkctr += MAXPHYS/hiber_info->secsize) 1455 hibernate_read_block(hiber_info, blkctr, MAXPHYS, 1456 hibernate_chunktable_area + i); 1457 1458 blkctr = hiber_info->image_offset; 1459 compressed_size = 0; 1460 chunks = (struct hibernate_disk_chunk *)hibernate_chunktable_area; 1461 1462 for (i=0; i<hiber_info->chunk_ctr; i++) 1463 compressed_size += chunks[i].compressed_size; 1464 1465 disk_size = compressed_size; 1466 1467 /* Allocate the pig area */ 1468 pig_sz = compressed_size + HIBERNATE_CHUNK_SIZE; 1469 if (uvm_pmr_alloc_pig(&pig_start, pig_sz) == ENOMEM) 1470 return (1); 1471 1472 pig_end = pig_start + pig_sz; 1473 1474 /* Calculate image extents. Pig image must end on a chunk boundary. */ 1475 image_end = pig_end & ~(HIBERNATE_CHUNK_SIZE - 1); 1476 image_start = pig_start; 1477 1478 image_start = image_end - disk_size; 1479 1480 hibernate_read_chunks(hiber_info, image_start, image_end, disk_size); 1481 1482 /* Prepare the resume time pmap/page table */ 1483 hibernate_populate_resume_pt(hiber_info, image_start, image_end); 1484 1485 /* Read complete, clear the signature and return */ 1486 return hibernate_clear_signature(); 1487 } 1488 1489 /* 1490 * Read the hibernated memory chunks from disk (chunk information at this 1491 * point is stored in the piglet) into the pig area specified by 1492 * [pig_start .. pig_end]. Order the chunks so that the final chunk is the 1493 * only chunk with overlap possibilities. 1494 * 1495 * This function uses the piglet area during this process as follows: 1496 * 1497 * offset from piglet base use 1498 * ----------------------- -------------------- 1499 * 0 i/o allocation area 1500 * PAGE_SIZE i/o write area 1501 * 2*PAGE_SIZE temp/scratch page 1502 * 3*PAGE_SIZE temp/scratch page 1503 * 4*PAGE_SIZE to 6*PAGE_SIZE chunk ordering area 1504 * 7*PAGE_SIZE hiballoc arena 1505 * 8*PAGE_SIZE to 88*PAGE_SIZE zlib deflate area 1506 * ... 1507 * HIBERNATE_CHUNK_SIZE chunk table temporary area 1508 */ 1509 int 1510 hibernate_read_chunks(union hibernate_info *hib_info, paddr_t pig_start, 1511 paddr_t pig_end, size_t image_compr_size) 1512 { 1513 paddr_t img_index, img_cur, r1s, r1e, r2s, r2e; 1514 paddr_t copy_start, copy_end, piglet_cur; 1515 paddr_t piglet_base = hib_info->piglet_pa; 1516 paddr_t piglet_end = piglet_base + HIBERNATE_CHUNK_SIZE; 1517 daddr_t blkctr; 1518 size_t processed, compressed_size, read_size; 1519 int i, j, overlap, found, nchunks, nochunks=0, nfchunks=0, npchunks=0; 1520 struct hibernate_disk_chunk *chunks; 1521 u_int8_t *ochunks, *pchunks, *fchunks; 1522 1523 /* Map the chunk ordering region */ 1524 pmap_kenter_pa(hibernate_fchunk_area, 1525 piglet_base + (4*PAGE_SIZE), VM_PROT_ALL); 1526 pmap_kenter_pa(hibernate_fchunk_area + PAGE_SIZE, 1527 piglet_base + (5*PAGE_SIZE), VM_PROT_ALL); 1528 pmap_kenter_pa(hibernate_fchunk_area + 2*PAGE_SIZE, 1529 piglet_base + (6*PAGE_SIZE), 1530 VM_PROT_ALL); 1531 1532 /* Temporary output chunk ordering */ 1533 ochunks = (u_int8_t *)hibernate_fchunk_area; 1534 1535 /* Piglet chunk ordering */ 1536 pchunks = (u_int8_t *)hibernate_fchunk_area + PAGE_SIZE; 1537 1538 /* Final chunk ordering */ 1539 fchunks = (u_int8_t *)hibernate_fchunk_area + 2*PAGE_SIZE; 1540 1541 nchunks = hib_info->chunk_ctr; 1542 chunks = (struct hibernate_disk_chunk *)hibernate_chunktable_area; 1543 1544 /* Initially start all chunks as unplaced */ 1545 for (i=0; i < nchunks; i++) 1546 chunks[i].flags=0; 1547 1548 /* 1549 * Search the list for chunks that are outside the pig area. These 1550 * can be placed first in the final output list. 1551 */ 1552 for (i=0; i < nchunks; i++) { 1553 if(chunks[i].end <= pig_start || chunks[i].base >= pig_end) { 1554 ochunks[nochunks] = (u_int8_t)i; 1555 fchunks[nfchunks] = (u_int8_t)i; 1556 nochunks++; 1557 nfchunks++; 1558 chunks[i].flags |= HIBERNATE_CHUNK_USED; 1559 } 1560 } 1561 1562 /* 1563 * Walk the ordering, place the chunks in ascending memory order. 1564 * Conflicts might arise, these are handled next. 1565 */ 1566 do { 1567 img_index = -1; 1568 found=0; 1569 j=-1; 1570 for (i=0; i < nchunks; i++) 1571 if (chunks[i].base < img_index && 1572 chunks[i].flags == 0 ) { 1573 j = i; 1574 img_index = chunks[i].base; 1575 } 1576 1577 if (j != -1) { 1578 found = 1; 1579 ochunks[nochunks] = (short)j; 1580 nochunks++; 1581 chunks[j].flags |= HIBERNATE_CHUNK_PLACED; 1582 } 1583 } while (found); 1584 1585 img_index=pig_start; 1586 1587 /* 1588 * Identify chunk output conflicts (chunks whose pig load area 1589 * corresponds to their original memory placement location) 1590 */ 1591 for(i=0; i< nochunks ; i++) { 1592 overlap=0; 1593 r1s = img_index; 1594 r1e = img_index + chunks[ochunks[i]].compressed_size; 1595 r2s = chunks[ochunks[i]].base; 1596 r2e = chunks[ochunks[i]].end; 1597 1598 overlap = hibernate_check_overlap(r1s, r1e, r2s, r2e); 1599 if (overlap) 1600 chunks[ochunks[i]].flags |= HIBERNATE_CHUNK_CONFLICT; 1601 1602 img_index += chunks[ochunks[i]].compressed_size; 1603 } 1604 1605 /* 1606 * Prepare the final output chunk list. Calculate an output 1607 * inflate strategy for overlapping chunks if needed. 1608 */ 1609 img_index=pig_start; 1610 for (i=0; i < nochunks ; i++) { 1611 /* 1612 * If a conflict is detected, consume enough compressed 1613 * output chunks to fill the piglet 1614 */ 1615 if (chunks[ochunks[i]].flags & HIBERNATE_CHUNK_CONFLICT) { 1616 copy_start = piglet_base; 1617 copy_end = piglet_end; 1618 piglet_cur = piglet_base; 1619 npchunks = 0; 1620 j=i; 1621 while (copy_start < copy_end && j < nochunks) { 1622 piglet_cur += chunks[ochunks[j]].compressed_size; 1623 pchunks[npchunks] = ochunks[j]; 1624 npchunks++; 1625 copy_start += chunks[ochunks[j]].compressed_size; 1626 img_index += chunks[ochunks[j]].compressed_size; 1627 i++; 1628 j++; 1629 } 1630 1631 piglet_cur = piglet_base; 1632 for (j=0; j < npchunks; j++) { 1633 piglet_cur += chunks[pchunks[j]].compressed_size; 1634 fchunks[nfchunks] = pchunks[j]; 1635 chunks[pchunks[j]].flags |= HIBERNATE_CHUNK_USED; 1636 nfchunks++; 1637 } 1638 } else { 1639 /* 1640 * No conflict, chunk can be added without copying 1641 */ 1642 if ((chunks[ochunks[i]].flags & 1643 HIBERNATE_CHUNK_USED) == 0) { 1644 fchunks[nfchunks] = ochunks[i]; 1645 chunks[ochunks[i]].flags |= HIBERNATE_CHUNK_USED; 1646 nfchunks++; 1647 } 1648 1649 img_index += chunks[ochunks[i]].compressed_size; 1650 } 1651 } 1652 1653 img_index = pig_start; 1654 for(i=0 ; i< nfchunks; i++) { 1655 piglet_cur = piglet_base; 1656 img_index += chunks[fchunks[i]].compressed_size; 1657 } 1658 1659 img_cur = pig_start; 1660 1661 for(i=0; i<nfchunks; i++) { 1662 blkctr = chunks[fchunks[i]].offset - hib_info->swap_offset; 1663 processed = 0; 1664 compressed_size = chunks[fchunks[i]].compressed_size; 1665 1666 while (processed < compressed_size) { 1667 pmap_kenter_pa(hibernate_temp_page, img_cur, 1668 VM_PROT_ALL); 1669 pmap_kenter_pa(hibernate_temp_page + PAGE_SIZE, 1670 img_cur+PAGE_SIZE, VM_PROT_ALL); 1671 1672 /* XXX - needed on i386. check other archs */ 1673 pmap_activate(curproc); 1674 if (compressed_size - processed >= PAGE_SIZE) 1675 read_size = PAGE_SIZE; 1676 else 1677 read_size = compressed_size - processed; 1678 1679 hibernate_read_block(hib_info, blkctr, read_size, 1680 hibernate_temp_page + (img_cur & PAGE_MASK)); 1681 1682 blkctr += (read_size / hib_info->secsize); 1683 1684 hibernate_flush(); 1685 pmap_kremove(hibernate_temp_page, PAGE_SIZE); 1686 pmap_kremove(hibernate_temp_page + PAGE_SIZE, 1687 PAGE_SIZE); 1688 processed += read_size; 1689 img_cur += read_size; 1690 } 1691 } 1692 1693 return (0); 1694 } 1695 1696 /* 1697 * Hibernating a machine comprises the following operations: 1698 * 1. Calculating this machine's hibernate_info information 1699 * 2. Allocating a piglet and saving the piglet's physaddr 1700 * 3. Calculating the memory chunks 1701 * 4. Writing the compressed chunks to disk 1702 * 5. Writing the chunk table 1703 * 6. Writing the signature block (hibernate_info) 1704 * 1705 * On most architectures, the function calling hibernate_suspend would 1706 * then power off the machine using some MD-specific implementation. 1707 */ 1708 int 1709 hibernate_suspend() 1710 { 1711 union hibernate_info hib_info; 1712 1713 /* 1714 * Calculate memory ranges, swap offsets, etc. 1715 * This also allocates a piglet whose physaddr is stored in 1716 * hib_info->piglet_pa and vaddr stored in hib_info->piglet_va 1717 */ 1718 if (get_hibernate_info(&hib_info, 1)) 1719 return (1); 1720 1721 /* XXX - Won't need to zero everything with RLE */ 1722 uvm_pmr_zero_everything(); 1723 1724 if (hibernate_write_chunks(&hib_info)) 1725 return (1); 1726 1727 if (hibernate_write_chunktable(&hib_info)) 1728 return (1); 1729 1730 return hibernate_write_signature(&hib_info); 1731 } 1732