1 /* $OpenBSD: subr_hibernate.c,v 1.6 2011/07/08 21:00:53 ariane Exp $ */ 2 3 /* 4 * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/hibernate.h> 20 #include <sys/param.h> 21 #include <sys/tree.h> 22 #include <sys/types.h> 23 #include <sys/systm.h> 24 #include <uvm/uvm.h> 25 26 27 /* 28 * Hib alloc enforced alignment. 29 */ 30 #define HIB_ALIGN 8 /* bytes alignment */ 31 32 /* 33 * sizeof builtin operation, but with alignment constraint. 34 */ 35 #define HIB_SIZEOF(_type) roundup(sizeof(_type), HIB_ALIGN) 36 37 struct hiballoc_entry 38 { 39 size_t hibe_use; 40 size_t hibe_space; 41 RB_ENTRY(hiballoc_entry) hibe_entry; 42 }; 43 44 /* 45 * Compare hiballoc entries based on the address they manage. 46 * 47 * Since the address is fixed, relative to struct hiballoc_entry, 48 * we just compare the hiballoc_entry pointers. 49 */ 50 static __inline int 51 hibe_cmp(struct hiballoc_entry *l, struct hiballoc_entry *r) 52 { 53 return l < r ? -1 : (l > r); 54 } 55 56 RB_PROTOTYPE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp) 57 58 /* 59 * Given a hiballoc entry, return the address it manages. 60 */ 61 static __inline void* 62 hib_entry_to_addr(struct hiballoc_entry *entry) 63 { 64 caddr_t addr; 65 66 addr = (caddr_t)entry; 67 addr += HIB_SIZEOF(struct hiballoc_entry); 68 return addr; 69 } 70 71 /* 72 * Given an address, find the hiballoc that corresponds. 73 */ 74 static __inline struct hiballoc_entry* 75 hib_addr_to_entry(void* addr_param) 76 { 77 caddr_t addr; 78 79 addr = (caddr_t)addr_param; 80 addr -= HIB_SIZEOF(struct hiballoc_entry); 81 return (struct hiballoc_entry*)addr; 82 } 83 84 RB_GENERATE(hiballoc_addr, hiballoc_entry, hibe_entry, hibe_cmp) 85 86 /* 87 * Allocate memory from the arena. 88 * 89 * Returns NULL if no memory is available. 90 */ 91 void* 92 hib_alloc(struct hiballoc_arena *arena, size_t alloc_sz) 93 { 94 struct hiballoc_entry *entry, *new_entry; 95 size_t find_sz; 96 97 /* 98 * Enforce alignment of HIB_ALIGN bytes. 99 * 100 * Note that, because the entry is put in front of the allocation, 101 * 0-byte allocations are guaranteed a unique address. 102 */ 103 alloc_sz = roundup(alloc_sz, HIB_ALIGN); 104 105 /* 106 * Find an entry with hibe_space >= find_sz. 107 * 108 * If the root node is not large enough, we switch to tree traversal. 109 * Because all entries are made at the bottom of the free space, 110 * traversal from the end has a slightly better chance of yielding 111 * a sufficiently large space. 112 */ 113 find_sz = alloc_sz + HIB_SIZEOF(struct hiballoc_entry); 114 entry = RB_ROOT(&arena->hib_addrs); 115 if (entry != NULL && entry->hibe_space < find_sz) { 116 RB_FOREACH_REVERSE(entry, hiballoc_addr, &arena->hib_addrs) { 117 if (entry->hibe_space >= find_sz) 118 break; 119 } 120 } 121 122 /* 123 * Insufficient or too fragmented memory. 124 */ 125 if (entry == NULL) 126 return NULL; 127 128 /* 129 * Create new entry in allocated space. 130 */ 131 new_entry = (struct hiballoc_entry*)( 132 (caddr_t)hib_entry_to_addr(entry) + entry->hibe_use); 133 new_entry->hibe_space = entry->hibe_space - find_sz; 134 new_entry->hibe_use = alloc_sz; 135 136 /* 137 * Insert entry. 138 */ 139 if (RB_INSERT(hiballoc_addr, &arena->hib_addrs, new_entry) != NULL) 140 panic("hib_alloc: insert failure"); 141 entry->hibe_space = 0; 142 143 /* Return address managed by entry. */ 144 return hib_entry_to_addr(new_entry); 145 } 146 147 /* 148 * Free a pointer previously allocated from this arena. 149 * 150 * If addr is NULL, this will be silently accepted. 151 */ 152 void 153 hib_free(struct hiballoc_arena *arena, void *addr) 154 { 155 struct hiballoc_entry *entry, *prev; 156 157 if (addr == NULL) 158 return; 159 160 /* 161 * Derive entry from addr and check it is really in this arena. 162 */ 163 entry = hib_addr_to_entry(addr); 164 if (RB_FIND(hiballoc_addr, &arena->hib_addrs, entry) != entry) 165 panic("hib_free: freed item %p not in hib arena", addr); 166 167 /* 168 * Give the space in entry to its predecessor. 169 * 170 * If entry has no predecessor, change its used space into free space 171 * instead. 172 */ 173 prev = RB_PREV(hiballoc_addr, &arena->hib_addrs, entry); 174 if (prev != NULL && 175 (void*)((caddr_t)prev + HIB_SIZEOF(struct hiballoc_entry) + 176 prev->hibe_use + prev->hibe_space) == entry) { 177 /* Merge entry. */ 178 RB_REMOVE(hiballoc_addr, &arena->hib_addrs, entry); 179 prev->hibe_space += HIB_SIZEOF(struct hiballoc_entry) + 180 entry->hibe_use + entry->hibe_space; 181 } else { 182 /* Flip used memory to free space. */ 183 entry->hibe_space += entry->hibe_use; 184 entry->hibe_use = 0; 185 } 186 } 187 188 /* 189 * Initialize hiballoc. 190 * 191 * The allocator will manage memmory at ptr, which is len bytes. 192 */ 193 int 194 hiballoc_init(struct hiballoc_arena *arena, void *p_ptr, size_t p_len) 195 { 196 struct hiballoc_entry *entry; 197 caddr_t ptr; 198 size_t len; 199 200 RB_INIT(&arena->hib_addrs); 201 202 /* 203 * Hib allocator enforces HIB_ALIGN alignment. 204 * Fixup ptr and len. 205 */ 206 ptr = (caddr_t)roundup((vaddr_t)p_ptr, HIB_ALIGN); 207 len = p_len - ((size_t)ptr - (size_t)p_ptr); 208 len &= ~((size_t)HIB_ALIGN - 1); 209 210 /* 211 * Insufficient memory to be able to allocate and also do bookkeeping. 212 */ 213 if (len <= HIB_SIZEOF(struct hiballoc_entry)) 214 return ENOMEM; 215 216 /* 217 * Create entry describing space. 218 */ 219 entry = (struct hiballoc_entry*)ptr; 220 entry->hibe_use = 0; 221 entry->hibe_space = len - HIB_SIZEOF(struct hiballoc_entry); 222 RB_INSERT(hiballoc_addr, &arena->hib_addrs, entry); 223 224 return 0; 225 } 226 227 228 /* 229 * Zero all free memory. 230 */ 231 void 232 uvm_pmr_zero_everything(void) 233 { 234 struct uvm_pmemrange *pmr; 235 struct vm_page *pg; 236 int i; 237 238 uvm_lock_fpageq(); 239 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 240 /* Zero single pages. */ 241 while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_DIRTY])) 242 != NULL) { 243 uvm_pmr_remove(pmr, pg); 244 uvm_pagezero(pg); 245 atomic_setbits_int(&pg->pg_flags, PG_ZERO); 246 uvmexp.zeropages++; 247 uvm_pmr_insert(pmr, pg, 0); 248 } 249 250 /* Zero multi page ranges. */ 251 while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_DIRTY])) 252 != NULL) { 253 pg--; /* Size tree always has second page. */ 254 uvm_pmr_remove(pmr, pg); 255 for (i = 0; i < pg->fpgsz; i++) { 256 uvm_pagezero(&pg[i]); 257 atomic_setbits_int(&pg[i].pg_flags, PG_ZERO); 258 uvmexp.zeropages++; 259 } 260 uvm_pmr_insert(pmr, pg, 0); 261 } 262 } 263 uvm_unlock_fpageq(); 264 } 265 266 /* 267 * Mark all memory as dirty. 268 * 269 * Used to inform the system that the clean memory isn't clean for some 270 * reason, for example because we just came back from hibernate. 271 */ 272 void 273 uvm_pmr_dirty_everything(void) 274 { 275 struct uvm_pmemrange *pmr; 276 struct vm_page *pg; 277 int i; 278 279 uvm_lock_fpageq(); 280 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 281 /* Dirty single pages. */ 282 while ((pg = TAILQ_FIRST(&pmr->single[UVM_PMR_MEMTYPE_ZERO])) 283 != NULL) { 284 uvm_pmr_remove(pmr, pg); 285 uvm_pagezero(pg); 286 atomic_clearbits_int(&pg->pg_flags, PG_ZERO); 287 uvm_pmr_insert(pmr, pg, 0); 288 } 289 290 /* Dirty multi page ranges. */ 291 while ((pg = RB_ROOT(&pmr->size[UVM_PMR_MEMTYPE_ZEOR])) 292 != NULL) { 293 pg--; /* Size tree always has second page. */ 294 uvm_pmr_remove(pmr, pg); 295 for (i = 0; i < pg->fpgsz; i++) { 296 uvm_pagezero(&pg[i]); 297 atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO); 298 } 299 uvm_pmr_insert(pmr, pg, 0); 300 } 301 } 302 303 uvmexp.zeropages = 0; 304 uvm_unlock_fpageq(); 305 } 306 307 /* 308 * Allocate the highest address that can hold sz. 309 * 310 * sz in bytes. 311 */ 312 int 313 uvm_pmr_alloc_pig(paddr_t *addr, psize_t sz) 314 { 315 struct uvm_pmemrange *pmr; 316 struct vm_page *pig_pg, *pg; 317 318 /* 319 * Convert sz to pages, since that is what pmemrange uses internally. 320 */ 321 sz = atop(round_page(sz)); 322 323 uvm_lock_fpageq(); 324 325 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 326 RB_FOREACH_REVERSE(pig_pg, uvm_pmr_addr, &pmr->addr) { 327 if (pig_pg->fpgsz >= sz) { 328 goto found; 329 } 330 } 331 } 332 333 /* 334 * Allocation failure. 335 */ 336 uvm_unlock_pageq(); 337 return ENOMEM; 338 339 found: 340 /* Remove page from freelist. */ 341 uvm_pmr_remove_size(pmr, pig_pg); 342 pig_pg->fpgsz -= sz; 343 pg = pig_pg + pig_pg->fpgsz; 344 if (pig_pg->fpgsz == 0) 345 uvm_pmr_remove_addr(pmr, pig_pg); 346 else 347 uvm_pmr_insert_size(pmr, pig_pg); 348 349 uvmexp.free -= sz; 350 *addr = VM_PAGE_TO_PHYS(pg); 351 352 /* 353 * Update pg flags. 354 * 355 * Note that we trash the sz argument now. 356 */ 357 while (sz > 0) { 358 KASSERT(pg->pg_flags & PQ_FREE); 359 360 atomic_clearbits_int(&pg->pg_flags, 361 PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3); 362 363 if (pg->pg_flags & PG_ZERO) 364 uvmexp.zeropages -= sz; 365 atomic_clearbits_int(&pg->pg_flags, 366 PG_ZERO|PQ_FREE); 367 368 pg->uobject = NULL; 369 pg->uanon = NULL; 370 pg->pg_version++; 371 372 /* 373 * Next. 374 */ 375 pg++; 376 sz--; 377 } 378 379 /* Return. */ 380 uvm_unlock_fpageq(); 381 return 0; 382 } 383 384 /* 385 * Allocate a piglet area. 386 * 387 * This is as low as possible. 388 * Piglets are aligned. 389 * 390 * sz and align in bytes. 391 * 392 * The call will sleep for the pagedaemon to attempt to free memory. 393 * The pagedaemon may decide its not possible to free enough memory, causing 394 * the allocation to fail. 395 */ 396 int 397 uvm_pmr_alloc_piglet(paddr_t *addr, psize_t sz, paddr_t align) 398 { 399 vaddr_t pg_addr, piglet_addr; 400 struct uvm_pmemrange *pmr; 401 struct vm_page *pig_pg, *pg; 402 struct pglist pageq; 403 int pdaemon_woken; 404 405 KASSERT((align & (align - 1)) == 0); 406 pdaemon_woken = 0; /* Didn't wake the pagedaemon. */ 407 408 /* 409 * Fixup arguments: align must be at least PAGE_SIZE, 410 * sz will be converted to pagecount, since that is what 411 * pmemrange uses internally. 412 */ 413 if (align < PAGE_SIZE) 414 align = PAGE_SIZE; 415 sz = atop(round_page(sz)); 416 417 uvm_lock_fpageq(); 418 419 TAILQ_FOREACH_REVERSE(pmr, &uvm.pmr_control.use, uvm_pmemrange_use, 420 pmr_use) { 421 retry: 422 /* 423 * Search for a range with enough space. 424 * Use the address tree, to ensure the range is as low as 425 * possible. 426 */ 427 RB_FOREACH(pig_pg, uvm_pmr_addr, &pmr->addr) { 428 pg_addr = VM_PAGE_TO_PHYS(pig_pg); 429 piglet_addr = (pg_addr + (align - 1)) & ~(align - 1); 430 431 if (pig_pg->fpgsz >= sz) { 432 goto found; 433 } 434 435 if (atop(pg_addr) + pig_pg->fpgsz > 436 atop(piglet_addr) + sz) { 437 goto found; 438 } 439 } 440 441 /* 442 * Try to coerse the pagedaemon into freeing memory 443 * for the piglet. 444 * 445 * pdaemon_woken is set to prevent the code from 446 * falling into an endless loop. 447 */ 448 if (!pdaemon_woken) { 449 pdaemon_woken = 1; 450 if (uvm_wait_pla(ptoa(pmr->low), ptoa(pmr->high) - 1, 451 ptoa(sz), UVM_PLA_FAILOK) == 0) 452 goto retry; 453 } 454 } 455 456 /* Return failure. */ 457 uvm_unlock_fpageq(); 458 return ENOMEM; 459 460 found: 461 /* 462 * Extract piglet from pigpen. 463 */ 464 TAILQ_INIT(&pageq); 465 uvm_pmr_extract_range(pmr, pig_pg, 466 atop(piglet_addr), atop(piglet_addr) + sz, &pageq); 467 468 *addr = piglet_addr; 469 uvmexp.free -= sz; 470 471 /* 472 * Update pg flags. 473 * 474 * Note that we trash the sz argument now. 475 */ 476 TAILQ_FOREACH(pg, &pageq, pageq) { 477 KASSERT(pg->pg_flags & PQ_FREE); 478 479 atomic_clearbits_int(&pg->pg_flags, 480 PG_PMAP0|PG_PMAP1|PG_PMAP2|PG_PMAP3); 481 482 if (pg->pg_flags & PG_ZERO) 483 uvmexp.zeropages--; 484 atomic_clearbits_int(&pg->pg_flags, 485 PG_ZERO|PQ_FREE); 486 487 pg->uobject = NULL; 488 pg->uanon = NULL; 489 pg->pg_version++; 490 } 491 492 uvm_unlock_fpageq(); 493 return 0; 494 } 495 496 /* 497 * Physmem RLE compression support. 498 * 499 * Given a physical page address, it will return the number of pages 500 * starting at the address, that are free. 501 * Returns 0 if the page at addr is not free. 502 */ 503 psize_t 504 uvm_page_rle(paddr_t addr) 505 { 506 struct vm_page *pg, *pg_end; 507 struct vm_physseg *vmp; 508 int pseg_idx, off_idx; 509 510 pseg_idx = vm_physseg_find(atop(addr), &off_idx); 511 if (pseg_idx == -1) 512 return 0; 513 514 vmp = &vm_physmem[pseg_idx]; 515 pg = &vmp->pgs[off_idx]; 516 if (!(pg->pg_flags & PQ_FREE)) 517 return 0; 518 519 /* 520 * Search for the first non-free page after pg. 521 * Note that the page may not be the first page in a free pmemrange, 522 * therefore pg->fpgsz cannot be used. 523 */ 524 for (pg_end = pg; pg_end <= vmp->lastpg && 525 (pg_end->pg_flags & PQ_FREE) == PQ_FREE; pg_end++); 526 return pg_end - pg; 527 } 528