1 /* $NetBSD: uvm_pglist.c,v 1.32 2004/09/17 20:46:03 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * uvm_pglist.c: pglist functions 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.32 2004/09/17 20:46:03 yamt Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/malloc.h> 50 #include <sys/proc.h> 51 52 #include <uvm/uvm.h> 53 54 #ifdef VM_PAGE_ALLOC_MEMORY_STATS 55 #define STAT_INCR(v) (v)++ 56 #define STAT_DECR(v) do { \ 57 if ((v) == 0) \ 58 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \ 59 else \ 60 (v)--; \ 61 } while (/*CONSTCOND*/ 0) 62 u_long uvm_pglistalloc_npages; 63 #else 64 #define STAT_INCR(v) 65 #define STAT_DECR(v) 66 #endif 67 68 /* 69 * uvm_pglistalloc: allocate a list of pages 70 * 71 * => allocated pages are placed onto an rlist. rlist is 72 * initialized by uvm_pglistalloc. 73 * => returns 0 on success or errno on failure 74 * => implementation allocates a single segment if any constraints are 75 * imposed by call arguments. 76 * => doesn't take into account clean non-busy pages on inactive list 77 * that could be used(?) 78 * => params: 79 * size the size of the allocation, rounded to page size. 80 * low the low address of the allowed allocation range. 81 * high the high address of the allowed allocation range. 82 * alignment memory must be aligned to this power-of-two boundary. 83 * boundary no segment in the allocation may cross this 84 * power-of-two boundary (relative to zero). 85 */ 86 87 static void uvm_pglist_add(struct vm_page *, struct pglist *); 88 static int uvm_pglistalloc_c_ps(struct vm_physseg *, int, paddr_t, paddr_t, 89 paddr_t, paddr_t, struct pglist *); 90 static int uvm_pglistalloc_contig(int, paddr_t, paddr_t, paddr_t, paddr_t, 91 struct pglist *); 92 static int uvm_pglistalloc_s_ps(struct vm_physseg *, int, paddr_t, paddr_t, 93 struct pglist *); 94 static int uvm_pglistalloc_simple(int, paddr_t, paddr_t, 95 struct pglist *, int); 96 97 static void 98 uvm_pglist_add(pg, rlist) 99 struct vm_page *pg; 100 struct pglist *rlist; 101 { 102 int free_list, color, pgflidx; 103 #ifdef DEBUG 104 struct vm_page *tp; 105 #endif 106 107 #if PGFL_NQUEUES != 2 108 #error uvm_pglistalloc needs to be updated 109 #endif 110 111 free_list = uvm_page_lookup_freelist(pg); 112 color = VM_PGCOLOR_BUCKET(pg); 113 pgflidx = (pg->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN; 114 #ifdef DEBUG 115 for (tp = TAILQ_FIRST(&uvm.page_free[ 116 free_list].pgfl_buckets[color].pgfl_queues[pgflidx]); 117 tp != NULL; 118 tp = TAILQ_NEXT(tp, pageq)) { 119 if (tp == pg) 120 break; 121 } 122 if (tp == NULL) 123 panic("uvm_pglistalloc: page not on freelist"); 124 #endif 125 TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_buckets[ 126 color].pgfl_queues[pgflidx], pg, pageq); 127 uvmexp.free--; 128 if (pg->flags & PG_ZERO) 129 uvmexp.zeropages--; 130 pg->flags = PG_CLEAN; 131 pg->pqflags = 0; 132 pg->uobject = NULL; 133 pg->uanon = NULL; 134 TAILQ_INSERT_TAIL(rlist, pg, pageq); 135 STAT_INCR(uvm_pglistalloc_npages); 136 } 137 138 static int 139 uvm_pglistalloc_c_ps(ps, num, low, high, alignment, boundary, rlist) 140 struct vm_physseg *ps; 141 int num; 142 paddr_t low, high, alignment, boundary; 143 struct pglist *rlist; 144 { 145 int try, limit, tryidx, end, idx; 146 struct vm_page *pgs; 147 int pagemask; 148 #ifdef DEBUG 149 paddr_t idxpa, lastidxpa; 150 int cidx; 151 #endif 152 #ifdef PGALLOC_VERBOSE 153 printf("pgalloc: contig %d pgs from psi %ld\n", num, 154 (long)(ps - vm_physmem)); 155 #endif 156 157 try = roundup(max(atop(low), ps->avail_start), atop(alignment)); 158 limit = min(atop(high), ps->avail_end); 159 pagemask = ~((boundary >> PAGE_SHIFT) - 1); 160 161 for (;;) { 162 if (try + num > limit) { 163 /* 164 * We've run past the allowable range. 165 */ 166 return (0); /* FAIL */ 167 } 168 if (boundary != 0 && 169 ((try ^ (try + num - 1)) & pagemask) != 0) { 170 /* 171 * Region crosses boundary. Jump to the boundary 172 * just crossed and ensure alignment. 173 */ 174 try = (try + num - 1) & pagemask; 175 try = roundup(try, atop(alignment)); 176 continue; 177 } 178 #ifdef DEBUG 179 /* 180 * Make sure this is a managed physical page. 181 */ 182 183 if (vm_physseg_find(try, &cidx) != ps - vm_physmem) 184 panic("pgalloc contig: botch1"); 185 if (cidx != try - ps->start) 186 panic("pgalloc contig: botch2"); 187 if (vm_physseg_find(try + num - 1, &cidx) != ps - vm_physmem) 188 panic("pgalloc contig: botch3"); 189 if (cidx != try - ps->start + num - 1) 190 panic("pgalloc contig: botch4"); 191 #endif 192 tryidx = try - ps->start; 193 end = tryidx + num; 194 pgs = ps->pgs; 195 196 /* 197 * Found a suitable starting page. See if the range is free. 198 */ 199 for (idx = tryidx; idx < end; idx++) { 200 if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) 201 break; 202 203 #ifdef DEBUG 204 idxpa = VM_PAGE_TO_PHYS(&pgs[idx]); 205 if (idx > tryidx) { 206 lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]); 207 if ((lastidxpa + PAGE_SIZE) != idxpa) { 208 /* 209 * Region not contiguous. 210 */ 211 panic("pgalloc contig: botch5"); 212 } 213 if (boundary != 0 && 214 ((lastidxpa ^ idxpa) & ~(boundary - 1)) 215 != 0) { 216 /* 217 * Region crosses boundary. 218 */ 219 panic("pgalloc contig: botch6"); 220 } 221 } 222 #endif 223 } 224 if (idx == end) 225 break; 226 227 try += atop(alignment); 228 } 229 230 /* 231 * we have a chunk of memory that conforms to the requested constraints. 232 */ 233 idx = tryidx; 234 while (idx < end) 235 uvm_pglist_add(&pgs[idx++], rlist); 236 237 #ifdef PGALLOC_VERBOSE 238 printf("got %d pgs\n", num); 239 #endif 240 return (num); /* number of pages allocated */ 241 } 242 243 static int 244 uvm_pglistalloc_contig(num, low, high, alignment, boundary, rlist) 245 int num; 246 paddr_t low, high, alignment, boundary; 247 struct pglist *rlist; 248 { 249 int fl, psi; 250 struct vm_physseg *ps; 251 int s, error; 252 253 /* Default to "lose". */ 254 error = ENOMEM; 255 256 /* 257 * Block all memory allocation and lock the free list. 258 */ 259 s = uvm_lock_fpageq(); 260 261 /* Are there even any free pages? */ 262 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 263 goto out; 264 265 for (fl = 0; fl < VM_NFREELIST; fl++) { 266 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 267 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 268 #else 269 for (psi = 0 ; psi < vm_nphysseg ; psi++) 270 #endif 271 { 272 ps = &vm_physmem[psi]; 273 274 if (ps->free_list != fl) 275 continue; 276 277 num -= uvm_pglistalloc_c_ps(ps, num, low, high, 278 alignment, boundary, rlist); 279 if (num == 0) { 280 #ifdef PGALLOC_VERBOSE 281 printf("pgalloc: %lx-%lx\n", 282 VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 283 VM_PAGE_TO_PHYS(TAILQ_LAST(rlist))); 284 #endif 285 error = 0; 286 goto out; 287 } 288 } 289 } 290 291 out: 292 /* 293 * check to see if we need to generate some free pages waking 294 * the pagedaemon. 295 */ 296 297 UVM_KICK_PDAEMON(); 298 uvm_unlock_fpageq(s); 299 return (error); 300 } 301 302 static int 303 uvm_pglistalloc_s_ps(ps, num, low, high, rlist) 304 struct vm_physseg *ps; 305 int num; 306 paddr_t low, high; 307 struct pglist *rlist; 308 { 309 int todo, limit, try; 310 struct vm_page *pg; 311 #ifdef DEBUG 312 int cidx; 313 #endif 314 #ifdef PGALLOC_VERBOSE 315 printf("pgalloc: simple %d pgs from psi %ld\n", num, 316 (long)(ps - vm_physmem)); 317 #endif 318 319 todo = num; 320 limit = min(atop(high), ps->avail_end); 321 322 for (try = max(atop(low), ps->avail_start); 323 try < limit; try ++) { 324 #ifdef DEBUG 325 if (vm_physseg_find(try, &cidx) != ps - vm_physmem) 326 panic("pgalloc simple: botch1"); 327 if (cidx != (try - ps->start)) 328 panic("pgalloc simple: botch2"); 329 #endif 330 pg = &ps->pgs[try - ps->start]; 331 if (VM_PAGE_IS_FREE(pg) == 0) 332 continue; 333 334 uvm_pglist_add(pg, rlist); 335 if (--todo == 0) 336 break; 337 } 338 339 #ifdef PGALLOC_VERBOSE 340 printf("got %d pgs\n", num - todo); 341 #endif 342 return (num - todo); /* number of pages allocated */ 343 } 344 345 static int 346 uvm_pglistalloc_simple(num, low, high, rlist, waitok) 347 int num; 348 paddr_t low, high; 349 struct pglist *rlist; 350 int waitok; 351 { 352 int fl, psi, s, error; 353 struct vm_physseg *ps; 354 355 /* Default to "lose". */ 356 error = ENOMEM; 357 358 again: 359 /* 360 * Block all memory allocation and lock the free list. 361 */ 362 s = uvm_lock_fpageq(); 363 364 /* Are there even any free pages? */ 365 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 366 goto out; 367 368 for (fl = 0; fl < VM_NFREELIST; fl++) { 369 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 370 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 371 #else 372 for (psi = 0 ; psi < vm_nphysseg ; psi++) 373 #endif 374 { 375 ps = &vm_physmem[psi]; 376 377 if (ps->free_list != fl) 378 continue; 379 380 num -= uvm_pglistalloc_s_ps(ps, num, low, high, rlist); 381 if (num == 0) { 382 error = 0; 383 goto out; 384 } 385 } 386 387 } 388 389 out: 390 /* 391 * check to see if we need to generate some free pages waking 392 * the pagedaemon. 393 */ 394 395 UVM_KICK_PDAEMON(); 396 uvm_unlock_fpageq(s); 397 if (error) { 398 if (waitok) { 399 /* XXX perhaps some time limitation? */ 400 #ifdef DEBUG 401 printf("pglistalloc waiting\n"); 402 #endif 403 uvm_wait("pglalloc"); 404 goto again; 405 } else 406 uvm_pglistfree(rlist); 407 } 408 #ifdef PGALLOC_VERBOSE 409 if (!error) 410 printf("pgalloc: %lx..%lx\n", 411 VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 412 VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 413 #endif 414 return (error); 415 } 416 417 int 418 uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok) 419 psize_t size; 420 paddr_t low, high, alignment, boundary; 421 struct pglist *rlist; 422 int nsegs, waitok; 423 { 424 int num, res; 425 426 KASSERT((alignment & (alignment - 1)) == 0); 427 KASSERT((boundary & (boundary - 1)) == 0); 428 429 /* 430 * Our allocations are always page granularity, so our alignment 431 * must be, too. 432 */ 433 if (alignment < PAGE_SIZE) 434 alignment = PAGE_SIZE; 435 if (boundary != 0 && boundary < size) 436 return (EINVAL); 437 num = atop(round_page(size)); 438 low = roundup(low, alignment); 439 440 TAILQ_INIT(rlist); 441 442 if ((nsegs < size >> PAGE_SHIFT) || (alignment != PAGE_SIZE) || 443 (boundary != 0)) 444 res = uvm_pglistalloc_contig(num, low, high, alignment, 445 boundary, rlist); 446 else 447 res = uvm_pglistalloc_simple(num, low, high, rlist, waitok); 448 449 return (res); 450 } 451 452 /* 453 * uvm_pglistfree: free a list of pages 454 * 455 * => pages should already be unmapped 456 */ 457 458 void 459 uvm_pglistfree(list) 460 struct pglist *list; 461 { 462 struct vm_page *pg; 463 int s; 464 465 /* 466 * Lock the free list and free each page. 467 */ 468 469 s = uvm_lock_fpageq(); 470 while ((pg = TAILQ_FIRST(list)) != NULL) { 471 boolean_t iszero; 472 473 KASSERT((pg->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) == 0); 474 TAILQ_REMOVE(list, pg, pageq); 475 iszero = (pg->flags & PG_ZERO); 476 pg->pqflags = PQ_FREE; 477 #ifdef DEBUG 478 pg->uobject = (void *)0xdeadbeef; 479 pg->offset = 0xdeadbeef; 480 pg->uanon = (void *)0xdeadbeef; 481 #endif /* DEBUG */ 482 #ifdef DEBUG 483 if (iszero) 484 uvm_pagezerocheck(pg); 485 #endif /* DEBUG */ 486 TAILQ_INSERT_HEAD(&uvm.page_free[uvm_page_lookup_freelist(pg)]. 487 pgfl_buckets[VM_PGCOLOR_BUCKET(pg)]. 488 pgfl_queues[iszero ? PGFL_ZEROS : PGFL_UNKNOWN], pg, pageq); 489 uvmexp.free++; 490 if (iszero) 491 uvmexp.zeropages++; 492 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) 493 uvm.page_idle_zero = vm_page_zero_enable; 494 STAT_DECR(uvm_pglistalloc_npages); 495 } 496 uvm_unlock_fpageq(s); 497 } 498