1 /* $OpenBSD: subr_hibernate.c,v 1.8 2011/07/09 00:08:04 mlarkin Exp $ */ 2 3 /* 4 * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/hibernate.h> 20 #include <sys/param.h> 21 #include <sys/tree.h> 22 #include <sys/types.h> 23 #include <sys/systm.h> 24 #include <sys/disklabel.h> 25 #include <sys/conf.h> 26 #include <uvm/uvm.h> 27 #include <machine/hibernate.h> 28 29 extern char *disk_readlabel(struct disklabel *, dev_t, char *, size_t); 30 31 /* 32 * Hib alloc enforced alignment. 33 */ 34 #define HIB_ALIGN 8 /* bytes alignment */ 35 36 /* 37 * sizeof builtin operation, but with alignment constraint. 38 */ 39 #define HIB_SIZEOF(_type) roundup(sizeof(_type), HIB_ALIGN) 40 41 struct hiballoc_entry 42 { 43 size_t hibe_use; 44 size_t hibe_space; 45 RB_ENTRY(hiballoc_entry) hibe_entry; 46 }; 47 48 /* 49 * Compare hiballoc entries based on the address they manage. 50 * 51 * Since the address is fixed, relative to struct hiballoc_entry, 52 * we just compare the hiballoc_entry pointers. 53 */ 54 static __inline int 55 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r) 56 { 57 return l < r ? -1 : (l > r); 58 } 59 60 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp) 61 62 /* 63 * Given a hiballoc entry, return the address it manages. 64 */ 65 static __inline void* 66 hib_entry_to_addr(struct hiballoc_entry *entry) 67 { 68 caddr_t addr; 69 70 addr = (caddr_t)entry; 71 addr += HIB_SIZEOF(struct hiballoc_entry); 72 return addr; 73 } 74 75 /* 76 * Given an address, find the hiballoc that corresponds. 77 */ 78 static __inline struct hiballoc_entry* 79 hib_addr_to_entry(void* addr_param) 80 { 81 caddr_t addr; 82 83 addr = (caddr_t)addr_param; 84 addr -= HIB_SIZEOF(struct hiballoc_entry); 85 return (struct hiballoc_entry*)addr; 86 } 87 88 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp) 89 90 /* 91 * Allocate memory from the arena. 92 * 93 * Returns NULL if no memory is available. 94 */ 95 void* 96 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz) 97 { 98 struct hiballoc_entry *entry, *new_entry; 99 size_t find_sz; 100 101 /* 102 * Enforce alignment of HIB_ALIGN bytes. 103 * 104 * Note that, because the entry is put in front of the allocation, 105 * 0-byte allocations are guaranteed a unique address. 106 */ 107 alloc_sz = roundup(alloc_sz, HIB_ALIGN); 108 109 /* 110 * Find an entry with hibe_space >= find_sz. 111 * 112 * If the root node is not large enough, we switch to tree traversal. 113 * Because all entries are made at the bottom of the free space, 114 * traversal from the end has a slightly better chance of yielding 115 * a sufficiently large space. 116 */ 117 find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry); 118 entry = RB_ROOT(&arena->hib_addrs); 119 if (entry != NULL && entry->hibe_space < find_sz) { 120 RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) { 121 if (entry->hibe_space >= find_sz) 122 break; 123 } 124 } 125 126 /* 127 * Insufficient or too fragmented memory. 128 */ 129 if (entry == NULL) 130 return NULL; 131 132 /* 133 * Create new entry in allocated space. 134 */ 135 new_entry = (struct hiballoc_entry*)( 136 (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use); 137 new_entry->hibe_space = entry->hibe_space - find_sz; 138 new_entry->hibe_use = alloc_sz; 139 140 /* 141 * Insert entry. 142 */ 143 if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL) 144 panic("hib_alloc: insert failure"); 145 entry->hibe_space = 0; 146 147 /* Return address managed by entry. */ 148 return hib_entry_to_addr(new_entry); 149 } 150 151 /* 152 * Free a pointer previously allocated from this arena. 153 * 154 * If addr is NULL, this will be silently accepted. 155 */ 156 void 157 hib_free(struct hiballoc_arena *arena, void *addr) 158 { 159 struct hiballoc_entry *entry, *prev; 160 161 if (addr == NULL) 162 return; 163 164 /* 165 * Derive entry from addr and check it is really in this arena. 166 */ 167 entry = hib_addr_to_entry(addr); 168 if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry) 169 panic("hib_free: freed item %p not in hib arena", addr); 170 171 /* 172 * Give the space in entry to its predecessor. 173 * 174 * If entry has no predecessor, change its used space into free space 175 * instead. 176 */ 177 prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry); 178 if (prev != NULL && 179 (void*)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) + 180 prev->hibe_use + prev->hibe_space) == entry) { 181 /* Merge entry. */ 182 RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry); 183 prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) + 184 entry->hibe_use + entry->hibe_space; 185 } else { 186 /* Flip used memory to free space. */ 187 entry->hibe_space += entry->hibe_use; 188 entry->hibe_use = 0; 189 } 190 } 191 192 /* 193 * Initialize hiballoc. 194 * 195 * The allocator will manage memmory at ptr, which is len bytes. 196 */ 197 int 198 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len) 199 { 200 struct hiballoc_entry *entry; 201 caddr_t ptr; 202 size_t len; 203 204 RB_INIT(&arena->hib_addrs); 205 206 /* 207 * Hib allocator enforces HIB_ALIGN alignment. 208 * Fixup ptr and len. 209 */ 210 ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN); 211 len = p_len - ((size_t)ptr - (size_t)p_ptr); 212 len &= ~((size_t)HIB_ALIGN - 1); 213 214 /* 215 * Insufficient memory to be able to allocate and also do bookkeeping. 216 */ 217 if (len <= HIB_SIZEOF(struct hiballoc_entry)) 218 return ENOMEM; 219 220 /* 221 * Create entry describing space. 222 */ 223 entry = (struct hiballoc_entry*)ptr; 224 entry->hibe_use = 0; 225 entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry); 226 RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry); 227 228 return 0; 229 } 230 231 232 /* 233 * Zero all free memory. 234 */ 235 void 236 uvm_pmr_zero_everything(void) 237 { 238 struct uvm_pmemrange *pmr; 239 struct vm_page *pg; 240 int i; 241 242 uvm_lock_fpageq(); 243 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 244 /* Zero single pages. */ 245 while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY])) 246 != NULL) { 247 uvm_pmr_remove(pmr, pg); 248 uvm_pagezero(pg); 249 atomic_setbits_int(&pg->pg_flags, PG_ZERO); 250 uvmexp.zeropages++; 251 uvm_pmr_insert(pmr, pg, 0); 252 } 253 254 /* Zero multi page ranges. */ 255 while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY])) 256 != NULL) { 257 pg--; /* Size tree always has second page. */ 258 uvm_pmr_remove(pmr, pg); 259 for (i = 0; i < pg->fpgsz; i++) { 260 uvm_pagezero(&pg[i]); 261 atomic_setbits_int(&pg[i].pg_flags, PG_ZERO); 262 uvmexp.zeropages++; 263 } 264 uvm_pmr_insert(pmr, pg, 0); 265 } 266 } 267 uvm_unlock_fpageq(); 268 } 269 270 /* 271 * Mark all memory as dirty. 272 * 273 * Used to inform the system that the clean memory isn't clean for some 274 * reason, for example because we just came back from hibernate. 275 */ 276 void 277 uvm_pmr_dirty_everything(void) 278 { 279 struct uvm_pmemrange *pmr; 280 struct vm_page *pg; 281 int i; 282 283 uvm_lock_fpageq(); 284 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 285 /* Dirty single pages. */ 286 while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO])) 287 != NULL) { 288 uvm_pmr_remove(pmr, pg); 289 atomic_clearbits_int(&pg->pg_flags, PG_ZERO); 290 uvm_pmr_insert(pmr, pg, 0); 291 } 292 293 /* Dirty multi page ranges. */ 294 while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_ZERO])) 295 != NULL) { 296 pg--; /* Size tree always has second page. */ 297 uvm_pmr_remove(pmr, pg); 298 for (i = 0; i < pg->fpgsz; i++) 299 atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO); 300 uvm_pmr_insert(pmr, pg, 0); 301 } 302 } 303 304 uvmexp.zeropages = 0; 305 uvm_unlock_fpageq(); 306 } 307 308 /* 309 * Allocate the highest address that can hold sz. 310 * 311 * sz in bytes. 312 */ 313 int 314 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz) 315 { 316 struct uvm_pmemrange *pmr; 317 struct vm_page *pig_pg, *pg; 318 319 /* 320 * Convert sz to pages, since that is what pmemrange uses internally. 321 */ 322 sz = atop(round_page(sz)); 323 324 uvm_lock_fpageq(); 325 326 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 327 RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) { 328 if (pig_pg->fpgsz >= sz) { 329 goto found; 330 } 331 } 332 } 333 334 /* 335 * Allocation failure. 336 */ 337 uvm_unlock_pageq(); 338 return ENOMEM; 339 340 found: 341 /* Remove page from freelist. */ 342 uvm_pmr_remove_size(pmr, pig_pg); 343 pig_pg->fpgsz -= sz; 344 pg = pig_pg + pig_pg->fpgsz; 345 if (pig_pg->fpgsz == 0) 346 uvm_pmr_remove_addr(pmr, pig_pg); 347 else 348 uvm_pmr_insert_size(pmr, pig_pg); 349 350 uvmexp.free -= sz; 351 *addr = VM_PAGE_TO_PHYS(pg); 352 353 /* 354 * Update pg flags. 355 * 356 * Note that we trash the sz argument now. 357 */ 358 while (sz > 0) { 359 KASSERT(pg->pg_flags & PQ_FREE); 360 361 atomic_clearbits_int(&pg->pg_flags, 362 PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3); 363 364 if (pg->pg_flags & PG_ZERO) 365 uvmexp.zeropages -= sz; 366 atomic_clearbits_int(&pg->pg_flags, 367 PG_ZERO|PQ_FREE); 368 369 pg->uobject = NULL; 370 pg->uanon = NULL; 371 pg->pg_version++; 372 373 /* 374 * Next. 375 */ 376 pg++; 377 sz--; 378 } 379 380 /* Return. */ 381 uvm_unlock_fpageq(); 382 return 0; 383 } 384 385 /* 386 * Allocate a piglet area. 387 * 388 * This is as low as possible. 389 * Piglets are aligned. 390 * 391 * sz and align in bytes. 392 * 393 * The call will sleep for the pagedaemon to attempt to free memory. 394 * The pagedaemon may decide its not possible to free enough memory, causing 395 * the allocation to fail. 396 */ 397 int 398 uvm_pmr_alloc_piglet(paddr_t *addr, psize_t sz, paddr_t align) 399 { 400 vaddr_t pg_addr, piglet_addr; 401 struct uvm_pmemrange *pmr; 402 struct vm_page *pig_pg, *pg; 403 struct pglist pageq; 404 int pdaemon_woken; 405 406 KASSERT((align & (align - 1)) == 0); 407 pdaemon_woken = 0; /* Didn't wake the pagedaemon. */ 408 409 /* 410 * Fixup arguments: align must be at least PAGE_SIZE, 411 * sz will be converted to pagecount, since that is what 412 * pmemrange uses internally. 413 */ 414 if (align < PAGE_SIZE) 415 align = PAGE_SIZE; 416 sz = atop(round_page(sz)); 417 418 uvm_lock_fpageq(); 419 420 TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use, 421 pmr_use) { 422 retry: 423 /* 424 * Search for a range with enough space. 425 * Use the address tree, to ensure the range is as low as 426 * possible. 427 */ 428 RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) { 429 pg_addr = VM_PAGE_TO_PHYS(pig_pg); 430 piglet_addr = (pg_addr + (align - 1)) & ~(align - 1); 431 432 if (pig_pg->fpgsz >= sz) { 433 goto found; 434 } 435 436 if (atop(pg_addr) + pig_pg->fpgsz > 437 atop(piglet_addr) + sz) { 438 goto found; 439 } 440 } 441 442 /* 443 * Try to coerse the pagedaemon into freeing memory 444 * for the piglet. 445 * 446 * pdaemon_woken is set to prevent the code from 447 * falling into an endless loop. 448 */ 449 if (!pdaemon_woken) { 450 pdaemon_woken = 1; 451 if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1, 452 ptoa(sz), UVM_PLA_FAILOK) == 0) 453 goto retry; 454 } 455 } 456 457 /* Return failure. */ 458 uvm_unlock_fpageq(); 459 return ENOMEM; 460 461 found: 462 /* 463 * Extract piglet from pigpen. 464 */ 465 TAILQ_INIT(&pageq); 466 uvm_pmr_extract_range(pmr, pig_pg, 467 atop(piglet_addr), atop(piglet_addr) + sz, &pageq); 468 469 *addr = piglet_addr; 470 uvmexp.free -= sz; 471 472 /* 473 * Update pg flags. 474 * 475 * Note that we trash the sz argument now. 476 */ 477 TAILQ_FOREACH(pg, &pageq, pageq) { 478 KASSERT(pg->pg_flags & PQ_FREE); 479 480 atomic_clearbits_int(&pg->pg_flags, 481 PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3); 482 483 if (pg->pg_flags & PG_ZERO) 484 uvmexp.zeropages--; 485 atomic_clearbits_int(&pg->pg_flags, 486 PG_ZERO|PQ_FREE); 487 488 pg->uobject = NULL; 489 pg->uanon = NULL; 490 pg->pg_version++; 491 } 492 493 uvm_unlock_fpageq(); 494 return 0; 495 } 496 497 /* 498 * Physmem RLE compression support. 499 * 500 * Given a physical page address, it will return the number of pages 501 * starting at the address, that are free. 502 * Returns 0 if the page at addr is not free. 503 */ 504 psize_t 505 uvm_page_rle(paddr_t addr) 506 { 507 struct vm_page *pg, *pg_end; 508 struct vm_physseg *vmp; 509 int pseg_idx, off_idx; 510 511 pseg_idx = vm_physseg_find(atop(addr), &off_idx); 512 if (pseg_idx == -1) 513 return 0; 514 515 vmp = &vm_physmem[pseg_idx]; 516 pg = &vmp->pgs[off_idx]; 517 if (!(pg->pg_flags & PQ_FREE)) 518 return 0; 519 520 /* 521 * Search for the first non-free page after pg. 522 * Note that the page may not be the first page in a free pmemrange, 523 * therefore pg->fpgsz cannot be used. 524 */ 525 for (pg_end = pg; pg_end <= vmp->lastpg && 526 (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++); 527 return pg_end - pg; 528 } 529 530 /* 531 * get_hibernate_info 532 * 533 * Fills out the hibernate_info union pointed to by hiber_info 534 * with information about this machine (swap signature block 535 * offsets, number of memory ranges, kernel in use, etc) 536 * 537 */ 538 int 539 get_hibernate_info(union hibernate_info *hiber_info) 540 { 541 int chunktable_size; 542 struct disklabel dl; 543 char err_string[128], *dl_ret; 544 545 /* Determine I/O function to use */ 546 hiber_info->io_func = get_hibernate_io_function(); 547 if (hiber_info->io_func == NULL) 548 return (1); 549 550 /* Calculate hibernate device */ 551 hiber_info->device = swdevt[0].sw_dev; 552 553 /* Read disklabel (used to calculate signature and image offsets) */ 554 dl_ret = disk_readlabel(&dl, hiber_info->device, err_string, 128); 555 556 if (dl_ret) { 557 printf("Hibernate error reading disklabel: %s\n", dl_ret); 558 return (1); 559 } 560 561 hiber_info->secsize = dl.d_secsize; 562 563 /* Make sure the signature can fit in one block */ 564 KASSERT(sizeof(union hibernate_info)/hiber_info->secsize == 1); 565 566 /* Calculate swap offset from start of disk */ 567 hiber_info->swap_offset = dl.d_partitions[1].p_offset; 568 569 /* Calculate signature block location */ 570 hiber_info->sig_offset = dl.d_partitions[1].p_offset + 571 dl.d_partitions[1].p_size - 572 sizeof(union hibernate_info)/hiber_info->secsize; 573 574 chunktable_size = HIBERNATE_CHUNK_TABLE_SIZE / hiber_info->secsize; 575 576 /* Calculate memory image location */ 577 hiber_info->image_offset = dl.d_partitions[1].p_offset + 578 dl.d_partitions[1].p_size - 579 (hiber_info->image_size / hiber_info->secsize) - 580 sizeof(union hibernate_info)/hiber_info->secsize - 581 chunktable_size; 582 583 /* Stash kernel version information */ 584 bzero(&hiber_info->kernel_version, 128); 585 bcopy(version, &hiber_info->kernel_version, 586 min(strlen(version), sizeof(hiber_info->kernel_version)-1)); 587 588 /* Allocate piglet region */ 589 if (uvm_pmr_alloc_piglet(&hiber_info->piglet_base, HIBERNATE_CHUNK_SIZE, 590 HIBERNATE_CHUNK_SIZE)) { 591 printf("Hibernate failed to allocate the piglet\n"); 592 return (1); 593 } 594 595 return get_hibernate_info_md(hiber_info); 596 } 597