1 /* $OpenBSD: subr_hibernate.c,v 1.9 2011/07/09 00:27:31 mlarkin Exp $ */ 2 3 /* 4 * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/hibernate.h> 20 #include <sys/param.h> 21 #include <sys/tree.h> 22 #include <sys/types.h> 23 #include <sys/systm.h> 24 #include <sys/disklabel.h> 25 #include <sys/conf.h> 26 #include <uvm/uvm.h> 27 #include <machine/hibernate.h> 28 29 extern char *disk_readlabel(struct disklabel *, dev_t, char *, size_t); 30 31 struct hibernate_state *hibernate_state; 32 33 /* 34 * Hib alloc enforced alignment. 35 */ 36 #define HIB_ALIGN 8 /* bytes alignment */ 37 38 /* 39 * sizeof builtin operation, but with alignment constraint. 40 */ 41 #define HIB_SIZEOF(_type) roundup(sizeof(_type), HIB_ALIGN) 42 43 struct hiballoc_entry 44 { 45 size_t hibe_use; 46 size_t hibe_space; 47 RB_ENTRY(hiballoc_entry) hibe_entry; 48 }; 49 50 /* 51 * Compare hiballoc entries based on the address they manage. 52 * 53 * Since the address is fixed, relative to struct hiballoc_entry, 54 * we just compare the hiballoc_entry pointers. 55 */ 56 static __inline int 57 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r) 58 { 59 return l < r ? -1 : (l > r); 60 } 61 62 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp) 63 64 /* 65 * Given a hiballoc entry, return the address it manages. 66 */ 67 static __inline void* 68 hib_entry_to_addr(struct hiballoc_entry *entry) 69 { 70 caddr_t addr; 71 72 addr = (caddr_t)entry; 73 addr += HIB_SIZEOF(struct hiballoc_entry); 74 return addr; 75 } 76 77 /* 78 * Given an address, find the hiballoc that corresponds. 79 */ 80 static __inline struct hiballoc_entry* 81 hib_addr_to_entry(void* addr_param) 82 { 83 caddr_t addr; 84 85 addr = (caddr_t)addr_param; 86 addr -= HIB_SIZEOF(struct hiballoc_entry); 87 return (struct hiballoc_entry*)addr; 88 } 89 90 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp) 91 92 /* 93 * Allocate memory from the arena. 94 * 95 * Returns NULL if no memory is available. 96 */ 97 void* 98 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz) 99 { 100 struct hiballoc_entry *entry, *new_entry; 101 size_t find_sz; 102 103 /* 104 * Enforce alignment of HIB_ALIGN bytes. 105 * 106 * Note that, because the entry is put in front of the allocation, 107 * 0-byte allocations are guaranteed a unique address. 108 */ 109 alloc_sz = roundup(alloc_sz, HIB_ALIGN); 110 111 /* 112 * Find an entry with hibe_space >= find_sz. 113 * 114 * If the root node is not large enough, we switch to tree traversal. 115 * Because all entries are made at the bottom of the free space, 116 * traversal from the end has a slightly better chance of yielding 117 * a sufficiently large space. 118 */ 119 find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry); 120 entry = RB_ROOT(&arena->hib_addrs); 121 if (entry != NULL && entry->hibe_space < find_sz) { 122 RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) { 123 if (entry->hibe_space >= find_sz) 124 break; 125 } 126 } 127 128 /* 129 * Insufficient or too fragmented memory. 130 */ 131 if (entry == NULL) 132 return NULL; 133 134 /* 135 * Create new entry in allocated space. 136 */ 137 new_entry = (struct hiballoc_entry*)( 138 (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use); 139 new_entry->hibe_space = entry->hibe_space - find_sz; 140 new_entry->hibe_use = alloc_sz; 141 142 /* 143 * Insert entry. 144 */ 145 if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL) 146 panic("hib_alloc: insert failure"); 147 entry->hibe_space = 0; 148 149 /* Return address managed by entry. */ 150 return hib_entry_to_addr(new_entry); 151 } 152 153 /* 154 * Free a pointer previously allocated from this arena. 155 * 156 * If addr is NULL, this will be silently accepted. 157 */ 158 void 159 hib_free(struct hiballoc_arena *arena, void *addr) 160 { 161 struct hiballoc_entry *entry, *prev; 162 163 if (addr == NULL) 164 return; 165 166 /* 167 * Derive entry from addr and check it is really in this arena. 168 */ 169 entry = hib_addr_to_entry(addr); 170 if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry) 171 panic("hib_free: freed item %p not in hib arena", addr); 172 173 /* 174 * Give the space in entry to its predecessor. 175 * 176 * If entry has no predecessor, change its used space into free space 177 * instead. 178 */ 179 prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry); 180 if (prev != NULL && 181 (void*)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) + 182 prev->hibe_use + prev->hibe_space) == entry) { 183 /* Merge entry. */ 184 RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry); 185 prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) + 186 entry->hibe_use + entry->hibe_space; 187 } else { 188 /* Flip used memory to free space. */ 189 entry->hibe_space += entry->hibe_use; 190 entry->hibe_use = 0; 191 } 192 } 193 194 /* 195 * Initialize hiballoc. 196 * 197 * The allocator will manage memmory at ptr, which is len bytes. 198 */ 199 int 200 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len) 201 { 202 struct hiballoc_entry *entry; 203 caddr_t ptr; 204 size_t len; 205 206 RB_INIT(&arena->hib_addrs); 207 208 /* 209 * Hib allocator enforces HIB_ALIGN alignment. 210 * Fixup ptr and len. 211 */ 212 ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN); 213 len = p_len - ((size_t)ptr - (size_t)p_ptr); 214 len &= ~((size_t)HIB_ALIGN - 1); 215 216 /* 217 * Insufficient memory to be able to allocate and also do bookkeeping. 218 */ 219 if (len <= HIB_SIZEOF(struct hiballoc_entry)) 220 return ENOMEM; 221 222 /* 223 * Create entry describing space. 224 */ 225 entry = (struct hiballoc_entry*)ptr; 226 entry->hibe_use = 0; 227 entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry); 228 RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry); 229 230 return 0; 231 } 232 233 234 /* 235 * Zero all free memory. 236 */ 237 void 238 uvm_pmr_zero_everything(void) 239 { 240 struct uvm_pmemrange *pmr; 241 struct vm_page *pg; 242 int i; 243 244 uvm_lock_fpageq(); 245 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 246 /* Zero single pages. */ 247 while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY])) 248 != NULL) { 249 uvm_pmr_remove(pmr, pg); 250 uvm_pagezero(pg); 251 atomic_setbits_int(&pg->pg_flags, PG_ZERO); 252 uvmexp.zeropages++; 253 uvm_pmr_insert(pmr, pg, 0); 254 } 255 256 /* Zero multi page ranges. */ 257 while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY])) 258 != NULL) { 259 pg--; /* Size tree always has second page. */ 260 uvm_pmr_remove(pmr, pg); 261 for (i = 0; i < pg->fpgsz; i++) { 262 uvm_pagezero(&pg[i]); 263 atomic_setbits_int(&pg[i].pg_flags, PG_ZERO); 264 uvmexp.zeropages++; 265 } 266 uvm_pmr_insert(pmr, pg, 0); 267 } 268 } 269 uvm_unlock_fpageq(); 270 } 271 272 /* 273 * Mark all memory as dirty. 274 * 275 * Used to inform the system that the clean memory isn't clean for some 276 * reason, for example because we just came back from hibernate. 277 */ 278 void 279 uvm_pmr_dirty_everything(void) 280 { 281 struct uvm_pmemrange *pmr; 282 struct vm_page *pg; 283 int i; 284 285 uvm_lock_fpageq(); 286 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 287 /* Dirty single pages. */ 288 while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO])) 289 != NULL) { 290 uvm_pmr_remove(pmr, pg); 291 atomic_clearbits_int(&pg->pg_flags, PG_ZERO); 292 uvm_pmr_insert(pmr, pg, 0); 293 } 294 295 /* Dirty multi page ranges. */ 296 while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_ZERO])) 297 != NULL) { 298 pg--; /* Size tree always has second page. */ 299 uvm_pmr_remove(pmr, pg); 300 for (i = 0; i < pg->fpgsz; i++) 301 atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO); 302 uvm_pmr_insert(pmr, pg, 0); 303 } 304 } 305 306 uvmexp.zeropages = 0; 307 uvm_unlock_fpageq(); 308 } 309 310 /* 311 * Allocate the highest address that can hold sz. 312 * 313 * sz in bytes. 314 */ 315 int 316 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz) 317 { 318 struct uvm_pmemrange *pmr; 319 struct vm_page *pig_pg, *pg; 320 321 /* 322 * Convert sz to pages, since that is what pmemrange uses internally. 323 */ 324 sz = atop(round_page(sz)); 325 326 uvm_lock_fpageq(); 327 328 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 329 RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) { 330 if (pig_pg->fpgsz >= sz) { 331 goto found; 332 } 333 } 334 } 335 336 /* 337 * Allocation failure. 338 */ 339 uvm_unlock_pageq(); 340 return ENOMEM; 341 342 found: 343 /* Remove page from freelist. */ 344 uvm_pmr_remove_size(pmr, pig_pg); 345 pig_pg->fpgsz -= sz; 346 pg = pig_pg + pig_pg->fpgsz; 347 if (pig_pg->fpgsz == 0) 348 uvm_pmr_remove_addr(pmr, pig_pg); 349 else 350 uvm_pmr_insert_size(pmr, pig_pg); 351 352 uvmexp.free -= sz; 353 *addr = VM_PAGE_TO_PHYS(pg); 354 355 /* 356 * Update pg flags. 357 * 358 * Note that we trash the sz argument now. 359 */ 360 while (sz > 0) { 361 KASSERT(pg->pg_flags & PQ_FREE); 362 363 atomic_clearbits_int(&pg->pg_flags, 364 PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3); 365 366 if (pg->pg_flags & PG_ZERO) 367 uvmexp.zeropages -= sz; 368 atomic_clearbits_int(&pg->pg_flags, 369 PG_ZERO|PQ_FREE); 370 371 pg->uobject = NULL; 372 pg->uanon = NULL; 373 pg->pg_version++; 374 375 /* 376 * Next. 377 */ 378 pg++; 379 sz--; 380 } 381 382 /* Return. */ 383 uvm_unlock_fpageq(); 384 return 0; 385 } 386 387 /* 388 * Allocate a piglet area. 389 * 390 * This is as low as possible. 391 * Piglets are aligned. 392 * 393 * sz and align in bytes. 394 * 395 * The call will sleep for the pagedaemon to attempt to free memory. 396 * The pagedaemon may decide its not possible to free enough memory, causing 397 * the allocation to fail. 398 */ 399 int 400 uvm_pmr_alloc_piglet(paddr_t *addr, psize_t sz, paddr_t align) 401 { 402 vaddr_t pg_addr, piglet_addr; 403 struct uvm_pmemrange *pmr; 404 struct vm_page *pig_pg, *pg; 405 struct pglist pageq; 406 int pdaemon_woken; 407 408 KASSERT((align & (align - 1)) == 0); 409 pdaemon_woken = 0; /* Didn't wake the pagedaemon. */ 410 411 /* 412 * Fixup arguments: align must be at least PAGE_SIZE, 413 * sz will be converted to pagecount, since that is what 414 * pmemrange uses internally. 415 */ 416 if (align < PAGE_SIZE) 417 align = PAGE_SIZE; 418 sz = atop(round_page(sz)); 419 420 uvm_lock_fpageq(); 421 422 TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use, 423 pmr_use) { 424 retry: 425 /* 426 * Search for a range with enough space. 427 * Use the address tree, to ensure the range is as low as 428 * possible. 429 */ 430 RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) { 431 pg_addr = VM_PAGE_TO_PHYS(pig_pg); 432 piglet_addr = (pg_addr + (align - 1)) & ~(align - 1); 433 434 if (pig_pg->fpgsz >= sz) { 435 goto found; 436 } 437 438 if (atop(pg_addr) + pig_pg->fpgsz > 439 atop(piglet_addr) + sz) { 440 goto found; 441 } 442 } 443 444 /* 445 * Try to coerse the pagedaemon into freeing memory 446 * for the piglet. 447 * 448 * pdaemon_woken is set to prevent the code from 449 * falling into an endless loop. 450 */ 451 if (!pdaemon_woken) { 452 pdaemon_woken = 1; 453 if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1, 454 ptoa(sz), UVM_PLA_FAILOK) == 0) 455 goto retry; 456 } 457 } 458 459 /* Return failure. */ 460 uvm_unlock_fpageq(); 461 return ENOMEM; 462 463 found: 464 /* 465 * Extract piglet from pigpen. 466 */ 467 TAILQ_INIT(&pageq); 468 uvm_pmr_extract_range(pmr, pig_pg, 469 atop(piglet_addr), atop(piglet_addr) + sz, &pageq); 470 471 *addr = piglet_addr; 472 uvmexp.free -= sz; 473 474 /* 475 * Update pg flags. 476 * 477 * Note that we trash the sz argument now. 478 */ 479 TAILQ_FOREACH(pg, &pageq, pageq) { 480 KASSERT(pg->pg_flags & PQ_FREE); 481 482 atomic_clearbits_int(&pg->pg_flags, 483 PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3); 484 485 if (pg->pg_flags & PG_ZERO) 486 uvmexp.zeropages--; 487 atomic_clearbits_int(&pg->pg_flags, 488 PG_ZERO|PQ_FREE); 489 490 pg->uobject = NULL; 491 pg->uanon = NULL; 492 pg->pg_version++; 493 } 494 495 uvm_unlock_fpageq(); 496 return 0; 497 } 498 499 /* 500 * Physmem RLE compression support. 501 * 502 * Given a physical page address, it will return the number of pages 503 * starting at the address, that are free. 504 * Returns 0 if the page at addr is not free. 505 */ 506 psize_t 507 uvm_page_rle(paddr_t addr) 508 { 509 struct vm_page *pg, *pg_end; 510 struct vm_physseg *vmp; 511 int pseg_idx, off_idx; 512 513 pseg_idx = vm_physseg_find(atop(addr), &off_idx); 514 if (pseg_idx == -1) 515 return 0; 516 517 vmp = &vm_physmem[pseg_idx]; 518 pg = &vmp->pgs[off_idx]; 519 if (!(pg->pg_flags & PQ_FREE)) 520 return 0; 521 522 /* 523 * Search for the first non-free page after pg. 524 * Note that the page may not be the first page in a free pmemrange, 525 * therefore pg->fpgsz cannot be used. 526 */ 527 for (pg_end = pg; pg_end <= vmp->lastpg && 528 (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++); 529 return pg_end - pg; 530 } 531 532 /* 533 * get_hibernate_info 534 * 535 * Fills out the hibernate_info union pointed to by hiber_info 536 * with information about this machine (swap signature block 537 * offsets, number of memory ranges, kernel in use, etc) 538 * 539 */ 540 int 541 get_hibernate_info(union hibernate_info *hiber_info) 542 { 543 int chunktable_size; 544 struct disklabel dl; 545 char err_string[128], *dl_ret; 546 547 /* Determine I/O function to use */ 548 hiber_info->io_func = get_hibernate_io_function(); 549 if (hiber_info->io_func == NULL) 550 return (1); 551 552 /* Calculate hibernate device */ 553 hiber_info->device = swdevt[0].sw_dev; 554 555 /* Read disklabel (used to calculate signature and image offsets) */ 556 dl_ret = disk_readlabel(&dl, hiber_info->device, err_string, 128); 557 558 if (dl_ret) { 559 printf("Hibernate error reading disklabel: %s\n", dl_ret); 560 return (1); 561 } 562 563 hiber_info->secsize = dl.d_secsize; 564 565 /* Make sure the signature can fit in one block */ 566 KASSERT(sizeof(union hibernate_info)/hiber_info->secsize == 1); 567 568 /* Calculate swap offset from start of disk */ 569 hiber_info->swap_offset = dl.d_partitions[1].p_offset; 570 571 /* Calculate signature block location */ 572 hiber_info->sig_offset = dl.d_partitions[1].p_offset + 573 dl.d_partitions[1].p_size - 574 sizeof(union hibernate_info)/hiber_info->secsize; 575 576 chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize; 577 578 /* Calculate memory image location */ 579 hiber_info->image_offset = dl.d_partitions[1].p_offset + 580 dl.d_partitions[1].p_size - 581 (hiber_info->image_size / hiber_info->secsize) - 582 sizeof(union hibernate_info)/hiber_info->secsize - 583 chunktable_size; 584 585 /* Stash kernel version information */ 586 bzero(&hiber_info->kernel_version, 128); 587 bcopy(version, &hiber_info->kernel_version, 588 min(strlen(version), sizeof(hiber_info->kernel_version)-1)); 589 590 /* Allocate piglet region */ 591 if (uvm_pmr_alloc_piglet(&hiber_info->piglet_base, HIBERNATE_CHUNK_SIZE, 592 HIBERNATE_CHUNK_SIZE)) { 593 printf("Hibernate failed to allocate the piglet\n"); 594 return (1); 595 } 596 597 return get_hibernate_info_md(hiber_info); 598 } 599 600 /* 601 * hibernate_zlib_alloc 602 * 603 * Allocate nitems*size bytes from the hiballoc area presently in use 604 * 605 */ 606 void 607 *hibernate_zlib_alloc(void *unused, int nitems, int size) 608 { 609 return hib_alloc(&hibernate_state->hiballoc_arena, nitems*size); 610 } 611 612 /* 613 * hibernate_zlib_free 614 * 615 * Free the memory pointed to by addr in the hiballoc area presently in 616 * use 617 * 618 */ 619 void 620 hibernate_zlib_free(void *unused, void *addr) 621 { 622 hib_free(&hibernate_state->hiballoc_arena, addr); 623 } 624