1 /* $NetBSD: uvm_pglist.c,v 1.39 2008/02/27 14:24:24 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * uvm_pglist.c: pglist functions 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.39 2008/02/27 14:24:24 ad Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/malloc.h> 50 #include <sys/proc.h> 51 52 #include <uvm/uvm.h> 53 #include <uvm/uvm_pdpolicy.h> 54 55 #ifdef VM_PAGE_ALLOC_MEMORY_STATS 56 #define STAT_INCR(v) (v)++ 57 #define STAT_DECR(v) do { \ 58 if ((v) == 0) \ 59 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \ 60 else \ 61 (v)--; \ 62 } while (/*CONSTCOND*/ 0) 63 u_long uvm_pglistalloc_npages; 64 #else 65 #define STAT_INCR(v) 66 #define STAT_DECR(v) 67 #endif 68 69 /* 70 * uvm_pglistalloc: allocate a list of pages 71 * 72 * => allocated pages are placed onto an rlist. rlist is 73 * initialized by uvm_pglistalloc. 74 * => returns 0 on success or errno on failure 75 * => implementation allocates a single segment if any constraints are 76 * imposed by call arguments. 77 * => doesn't take into account clean non-busy pages on inactive list 78 * that could be used(?) 79 * => params: 80 * size the size of the allocation, rounded to page size. 81 * low the low address of the allowed allocation range. 82 * high the high address of the allowed allocation range. 83 * alignment memory must be aligned to this power-of-two boundary. 84 * boundary no segment in the allocation may cross this 85 * power-of-two boundary (relative to zero). 86 */ 87 88 static void 89 uvm_pglist_add(struct vm_page *pg, struct pglist *rlist) 90 { 91 int free_list, color, pgflidx; 92 #ifdef DEBUG 93 struct vm_page *tp; 94 #endif 95 96 KASSERT(mutex_owned(&uvm_fpageqlock)); 97 98 #if PGFL_NQUEUES != 2 99 #error uvm_pglistalloc needs to be updated 100 #endif 101 102 free_list = uvm_page_lookup_freelist(pg); 103 color = VM_PGCOLOR_BUCKET(pg); 104 pgflidx = (pg->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN; 105 #ifdef DEBUG 106 for (tp = TAILQ_FIRST(&uvm.page_free[ 107 free_list].pgfl_buckets[color].pgfl_queues[pgflidx]); 108 tp != NULL; 109 tp = TAILQ_NEXT(tp, pageq)) { 110 if (tp == pg) 111 break; 112 } 113 if (tp == NULL) 114 panic("uvm_pglistalloc: page not on freelist"); 115 #endif 116 TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_buckets[ 117 color].pgfl_queues[pgflidx], pg, pageq); 118 uvmexp.free--; 119 if (pg->flags & PG_ZERO) 120 uvmexp.zeropages--; 121 pg->flags = PG_CLEAN; 122 pg->pqflags = 0; 123 pg->uobject = NULL; 124 pg->uanon = NULL; 125 TAILQ_INSERT_TAIL(rlist, pg, pageq); 126 STAT_INCR(uvm_pglistalloc_npages); 127 } 128 129 static int 130 uvm_pglistalloc_c_ps(struct vm_physseg *ps, int num, paddr_t low, paddr_t high, 131 paddr_t alignment, paddr_t boundary, struct pglist *rlist) 132 { 133 int try, limit, tryidx, end, idx; 134 struct vm_page *pgs; 135 int pagemask; 136 #ifdef DEBUG 137 paddr_t idxpa, lastidxpa; 138 int cidx = 0; /* XXX: GCC */ 139 #endif 140 #ifdef PGALLOC_VERBOSE 141 printf("pgalloc: contig %d pgs from psi %ld\n", num, 142 (long)(ps - vm_physmem)); 143 #endif 144 145 KASSERT(mutex_owned(&uvm_fpageqlock)); 146 147 try = roundup(max(atop(low), ps->avail_start), atop(alignment)); 148 limit = min(atop(high), ps->avail_end); 149 pagemask = ~((boundary >> PAGE_SHIFT) - 1); 150 151 for (;;) { 152 if (try + num > limit) { 153 /* 154 * We've run past the allowable range. 155 */ 156 return (0); /* FAIL */ 157 } 158 if (boundary != 0 && 159 ((try ^ (try + num - 1)) & pagemask) != 0) { 160 /* 161 * Region crosses boundary. Jump to the boundary 162 * just crossed and ensure alignment. 163 */ 164 try = (try + num - 1) & pagemask; 165 try = roundup(try, atop(alignment)); 166 continue; 167 } 168 #ifdef DEBUG 169 /* 170 * Make sure this is a managed physical page. 171 */ 172 173 if (vm_physseg_find(try, &cidx) != ps - vm_physmem) 174 panic("pgalloc contig: botch1"); 175 if (cidx != try - ps->start) 176 panic("pgalloc contig: botch2"); 177 if (vm_physseg_find(try + num - 1, &cidx) != ps - vm_physmem) 178 panic("pgalloc contig: botch3"); 179 if (cidx != try - ps->start + num - 1) 180 panic("pgalloc contig: botch4"); 181 #endif 182 tryidx = try - ps->start; 183 end = tryidx + num; 184 pgs = ps->pgs; 185 186 /* 187 * Found a suitable starting page. See if the range is free. 188 */ 189 for (idx = tryidx; idx < end; idx++) { 190 if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) 191 break; 192 193 #ifdef DEBUG 194 idxpa = VM_PAGE_TO_PHYS(&pgs[idx]); 195 if (idx > tryidx) { 196 lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]); 197 if ((lastidxpa + PAGE_SIZE) != idxpa) { 198 /* 199 * Region not contiguous. 200 */ 201 panic("pgalloc contig: botch5"); 202 } 203 if (boundary != 0 && 204 ((lastidxpa ^ idxpa) & ~(boundary - 1)) 205 != 0) { 206 /* 207 * Region crosses boundary. 208 */ 209 panic("pgalloc contig: botch6"); 210 } 211 } 212 #endif 213 } 214 if (idx == end) 215 break; 216 217 try += atop(alignment); 218 } 219 220 /* 221 * we have a chunk of memory that conforms to the requested constraints. 222 */ 223 idx = tryidx; 224 while (idx < end) 225 uvm_pglist_add(&pgs[idx++], rlist); 226 227 #ifdef PGALLOC_VERBOSE 228 printf("got %d pgs\n", num); 229 #endif 230 return (num); /* number of pages allocated */ 231 } 232 233 static int 234 uvm_pglistalloc_contig(int num, paddr_t low, paddr_t high, paddr_t alignment, 235 paddr_t boundary, struct pglist *rlist) 236 { 237 int fl, psi; 238 struct vm_physseg *ps; 239 int error; 240 241 /* Default to "lose". */ 242 error = ENOMEM; 243 244 /* 245 * Block all memory allocation and lock the free list. 246 */ 247 mutex_spin_enter(&uvm_fpageqlock); 248 249 /* Are there even any free pages? */ 250 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 251 goto out; 252 253 for (fl = 0; fl < VM_NFREELIST; fl++) { 254 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 255 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 256 #else 257 for (psi = 0 ; psi < vm_nphysseg ; psi++) 258 #endif 259 { 260 ps = &vm_physmem[psi]; 261 262 if (ps->free_list != fl) 263 continue; 264 265 num -= uvm_pglistalloc_c_ps(ps, num, low, high, 266 alignment, boundary, rlist); 267 if (num == 0) { 268 #ifdef PGALLOC_VERBOSE 269 printf("pgalloc: %lx-%lx\n", 270 VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 271 VM_PAGE_TO_PHYS(TAILQ_LAST(rlist))); 272 #endif 273 error = 0; 274 goto out; 275 } 276 } 277 } 278 279 out: 280 /* 281 * check to see if we need to generate some free pages waking 282 * the pagedaemon. 283 */ 284 285 uvm_kick_pdaemon(); 286 mutex_spin_exit(&uvm_fpageqlock); 287 return (error); 288 } 289 290 static int 291 uvm_pglistalloc_s_ps(struct vm_physseg *ps, int num, paddr_t low, paddr_t high, 292 struct pglist *rlist) 293 { 294 int todo, limit, try; 295 struct vm_page *pg; 296 #ifdef DEBUG 297 int cidx = 0; /* XXX: GCC */ 298 #endif 299 #ifdef PGALLOC_VERBOSE 300 printf("pgalloc: simple %d pgs from psi %ld\n", num, 301 (long)(ps - vm_physmem)); 302 #endif 303 304 KASSERT(mutex_owned(&uvm_fpageqlock)); 305 306 todo = num; 307 limit = min(atop(high), ps->avail_end); 308 309 for (try = max(atop(low), ps->avail_start); 310 try < limit; try ++) { 311 #ifdef DEBUG 312 if (vm_physseg_find(try, &cidx) != ps - vm_physmem) 313 panic("pgalloc simple: botch1"); 314 if (cidx != (try - ps->start)) 315 panic("pgalloc simple: botch2"); 316 #endif 317 pg = &ps->pgs[try - ps->start]; 318 if (VM_PAGE_IS_FREE(pg) == 0) 319 continue; 320 321 uvm_pglist_add(pg, rlist); 322 if (--todo == 0) 323 break; 324 } 325 326 #ifdef PGALLOC_VERBOSE 327 printf("got %d pgs\n", num - todo); 328 #endif 329 return (num - todo); /* number of pages allocated */ 330 } 331 332 static int 333 uvm_pglistalloc_simple(int num, paddr_t low, paddr_t high, 334 struct pglist *rlist, int waitok) 335 { 336 int fl, psi, error; 337 struct vm_physseg *ps; 338 339 /* Default to "lose". */ 340 error = ENOMEM; 341 342 again: 343 /* 344 * Block all memory allocation and lock the free list. 345 */ 346 mutex_spin_enter(&uvm_fpageqlock); 347 348 /* Are there even any free pages? */ 349 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 350 goto out; 351 352 for (fl = 0; fl < VM_NFREELIST; fl++) { 353 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 354 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 355 #else 356 for (psi = 0 ; psi < vm_nphysseg ; psi++) 357 #endif 358 { 359 ps = &vm_physmem[psi]; 360 361 if (ps->free_list != fl) 362 continue; 363 364 num -= uvm_pglistalloc_s_ps(ps, num, low, high, rlist); 365 if (num == 0) { 366 error = 0; 367 goto out; 368 } 369 } 370 371 } 372 373 out: 374 /* 375 * check to see if we need to generate some free pages waking 376 * the pagedaemon. 377 */ 378 379 uvm_kick_pdaemon(); 380 mutex_spin_exit(&uvm_fpageqlock); 381 382 if (error) { 383 if (waitok) { 384 /* XXX perhaps some time limitation? */ 385 #ifdef DEBUG 386 printf("pglistalloc waiting\n"); 387 #endif 388 uvm_wait("pglalloc"); 389 goto again; 390 } else 391 uvm_pglistfree(rlist); 392 } 393 #ifdef PGALLOC_VERBOSE 394 if (!error) 395 printf("pgalloc: %lx..%lx\n", 396 VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 397 VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 398 #endif 399 return (error); 400 } 401 402 int 403 uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment, 404 paddr_t boundary, struct pglist *rlist, int nsegs, int waitok) 405 { 406 int num, res; 407 408 KASSERT((alignment & (alignment - 1)) == 0); 409 KASSERT((boundary & (boundary - 1)) == 0); 410 411 /* 412 * Our allocations are always page granularity, so our alignment 413 * must be, too. 414 */ 415 if (alignment < PAGE_SIZE) 416 alignment = PAGE_SIZE; 417 if (boundary != 0 && boundary < size) 418 return (EINVAL); 419 num = atop(round_page(size)); 420 low = roundup(low, alignment); 421 422 TAILQ_INIT(rlist); 423 424 if ((nsegs < size >> PAGE_SHIFT) || (alignment != PAGE_SIZE) || 425 (boundary != 0)) 426 res = uvm_pglistalloc_contig(num, low, high, alignment, 427 boundary, rlist); 428 else 429 res = uvm_pglistalloc_simple(num, low, high, rlist, waitok); 430 431 return (res); 432 } 433 434 /* 435 * uvm_pglistfree: free a list of pages 436 * 437 * => pages should already be unmapped 438 */ 439 440 void 441 uvm_pglistfree(struct pglist *list) 442 { 443 struct vm_page *pg; 444 445 /* 446 * Lock the free list and free each page. 447 */ 448 449 mutex_spin_enter(&uvm_fpageqlock); 450 while ((pg = TAILQ_FIRST(list)) != NULL) { 451 bool iszero; 452 453 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 454 TAILQ_REMOVE(list, pg, pageq); 455 iszero = (pg->flags & PG_ZERO); 456 pg->pqflags = PQ_FREE; 457 #ifdef DEBUG 458 pg->uobject = (void *)0xdeadbeef; 459 pg->offset = 0xdeadbeef; 460 pg->uanon = (void *)0xdeadbeef; 461 #endif /* DEBUG */ 462 #ifdef DEBUG 463 if (iszero) 464 uvm_pagezerocheck(pg); 465 #endif /* DEBUG */ 466 TAILQ_INSERT_HEAD(&uvm.page_free[uvm_page_lookup_freelist(pg)]. 467 pgfl_buckets[VM_PGCOLOR_BUCKET(pg)]. 468 pgfl_queues[iszero ? PGFL_ZEROS : PGFL_UNKNOWN], pg, pageq); 469 uvmexp.free++; 470 if (iszero) 471 uvmexp.zeropages++; 472 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) 473 uvm.page_idle_zero = vm_page_zero_enable; 474 STAT_DECR(uvm_pglistalloc_npages); 475 } 476 mutex_spin_exit(&uvm_fpageqlock); 477 } 478