1 /* $NetBSD: uvm_pglist.c,v 1.40 2008/04/28 20:24:12 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * uvm_pglist.c: pglist functions 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.40 2008/04/28 20:24:12 martin Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/malloc.h> 43 #include <sys/proc.h> 44 45 #include <uvm/uvm.h> 46 #include <uvm/uvm_pdpolicy.h> 47 48 #ifdef VM_PAGE_ALLOC_MEMORY_STATS 49 #define STAT_INCR(v) (v)++ 50 #define STAT_DECR(v) do { \ 51 if ((v) == 0) \ 52 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \ 53 else \ 54 (v)--; \ 55 } while (/*CONSTCOND*/ 0) 56 u_long uvm_pglistalloc_npages; 57 #else 58 #define STAT_INCR(v) 59 #define STAT_DECR(v) 60 #endif 61 62 /* 63 * uvm_pglistalloc: allocate a list of pages 64 * 65 * => allocated pages are placed onto an rlist. rlist is 66 * initialized by uvm_pglistalloc. 67 * => returns 0 on success or errno on failure 68 * => implementation allocates a single segment if any constraints are 69 * imposed by call arguments. 70 * => doesn't take into account clean non-busy pages on inactive list 71 * that could be used(?) 72 * => params: 73 * size the size of the allocation, rounded to page size. 74 * low the low address of the allowed allocation range. 75 * high the high address of the allowed allocation range. 76 * alignment memory must be aligned to this power-of-two boundary. 77 * boundary no segment in the allocation may cross this 78 * power-of-two boundary (relative to zero). 79 */ 80 81 static void 82 uvm_pglist_add(struct vm_page *pg, struct pglist *rlist) 83 { 84 int free_list, color, pgflidx; 85 #ifdef DEBUG 86 struct vm_page *tp; 87 #endif 88 89 KASSERT(mutex_owned(&uvm_fpageqlock)); 90 91 #if PGFL_NQUEUES != 2 92 #error uvm_pglistalloc needs to be updated 93 #endif 94 95 free_list = uvm_page_lookup_freelist(pg); 96 color = VM_PGCOLOR_BUCKET(pg); 97 pgflidx = (pg->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN; 98 #ifdef DEBUG 99 for (tp = TAILQ_FIRST(&uvm.page_free[ 100 free_list].pgfl_buckets[color].pgfl_queues[pgflidx]); 101 tp != NULL; 102 tp = TAILQ_NEXT(tp, pageq)) { 103 if (tp == pg) 104 break; 105 } 106 if (tp == NULL) 107 panic("uvm_pglistalloc: page not on freelist"); 108 #endif 109 TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_buckets[ 110 color].pgfl_queues[pgflidx], pg, pageq); 111 uvmexp.free--; 112 if (pg->flags & PG_ZERO) 113 uvmexp.zeropages--; 114 pg->flags = PG_CLEAN; 115 pg->pqflags = 0; 116 pg->uobject = NULL; 117 pg->uanon = NULL; 118 TAILQ_INSERT_TAIL(rlist, pg, pageq); 119 STAT_INCR(uvm_pglistalloc_npages); 120 } 121 122 static int 123 uvm_pglistalloc_c_ps(struct vm_physseg *ps, int num, paddr_t low, paddr_t high, 124 paddr_t alignment, paddr_t boundary, struct pglist *rlist) 125 { 126 int try, limit, tryidx, end, idx; 127 struct vm_page *pgs; 128 int pagemask; 129 #ifdef DEBUG 130 paddr_t idxpa, lastidxpa; 131 int cidx = 0; /* XXX: GCC */ 132 #endif 133 #ifdef PGALLOC_VERBOSE 134 printf("pgalloc: contig %d pgs from psi %ld\n", num, 135 (long)(ps - vm_physmem)); 136 #endif 137 138 KASSERT(mutex_owned(&uvm_fpageqlock)); 139 140 try = roundup(max(atop(low), ps->avail_start), atop(alignment)); 141 limit = min(atop(high), ps->avail_end); 142 pagemask = ~((boundary >> PAGE_SHIFT) - 1); 143 144 for (;;) { 145 if (try + num > limit) { 146 /* 147 * We've run past the allowable range. 148 */ 149 return (0); /* FAIL */ 150 } 151 if (boundary != 0 && 152 ((try ^ (try + num - 1)) & pagemask) != 0) { 153 /* 154 * Region crosses boundary. Jump to the boundary 155 * just crossed and ensure alignment. 156 */ 157 try = (try + num - 1) & pagemask; 158 try = roundup(try, atop(alignment)); 159 continue; 160 } 161 #ifdef DEBUG 162 /* 163 * Make sure this is a managed physical page. 164 */ 165 166 if (vm_physseg_find(try, &cidx) != ps - vm_physmem) 167 panic("pgalloc contig: botch1"); 168 if (cidx != try - ps->start) 169 panic("pgalloc contig: botch2"); 170 if (vm_physseg_find(try + num - 1, &cidx) != ps - vm_physmem) 171 panic("pgalloc contig: botch3"); 172 if (cidx != try - ps->start + num - 1) 173 panic("pgalloc contig: botch4"); 174 #endif 175 tryidx = try - ps->start; 176 end = tryidx + num; 177 pgs = ps->pgs; 178 179 /* 180 * Found a suitable starting page. See if the range is free. 181 */ 182 for (idx = tryidx; idx < end; idx++) { 183 if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) 184 break; 185 186 #ifdef DEBUG 187 idxpa = VM_PAGE_TO_PHYS(&pgs[idx]); 188 if (idx > tryidx) { 189 lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]); 190 if ((lastidxpa + PAGE_SIZE) != idxpa) { 191 /* 192 * Region not contiguous. 193 */ 194 panic("pgalloc contig: botch5"); 195 } 196 if (boundary != 0 && 197 ((lastidxpa ^ idxpa) & ~(boundary - 1)) 198 != 0) { 199 /* 200 * Region crosses boundary. 201 */ 202 panic("pgalloc contig: botch6"); 203 } 204 } 205 #endif 206 } 207 if (idx == end) 208 break; 209 210 try += atop(alignment); 211 } 212 213 /* 214 * we have a chunk of memory that conforms to the requested constraints. 215 */ 216 idx = tryidx; 217 while (idx < end) 218 uvm_pglist_add(&pgs[idx++], rlist); 219 220 #ifdef PGALLOC_VERBOSE 221 printf("got %d pgs\n", num); 222 #endif 223 return (num); /* number of pages allocated */ 224 } 225 226 static int 227 uvm_pglistalloc_contig(int num, paddr_t low, paddr_t high, paddr_t alignment, 228 paddr_t boundary, struct pglist *rlist) 229 { 230 int fl, psi; 231 struct vm_physseg *ps; 232 int error; 233 234 /* Default to "lose". */ 235 error = ENOMEM; 236 237 /* 238 * Block all memory allocation and lock the free list. 239 */ 240 mutex_spin_enter(&uvm_fpageqlock); 241 242 /* Are there even any free pages? */ 243 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 244 goto out; 245 246 for (fl = 0; fl < VM_NFREELIST; fl++) { 247 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 248 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 249 #else 250 for (psi = 0 ; psi < vm_nphysseg ; psi++) 251 #endif 252 { 253 ps = &vm_physmem[psi]; 254 255 if (ps->free_list != fl) 256 continue; 257 258 num -= uvm_pglistalloc_c_ps(ps, num, low, high, 259 alignment, boundary, rlist); 260 if (num == 0) { 261 #ifdef PGALLOC_VERBOSE 262 printf("pgalloc: %lx-%lx\n", 263 VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 264 VM_PAGE_TO_PHYS(TAILQ_LAST(rlist))); 265 #endif 266 error = 0; 267 goto out; 268 } 269 } 270 } 271 272 out: 273 /* 274 * check to see if we need to generate some free pages waking 275 * the pagedaemon. 276 */ 277 278 uvm_kick_pdaemon(); 279 mutex_spin_exit(&uvm_fpageqlock); 280 return (error); 281 } 282 283 static int 284 uvm_pglistalloc_s_ps(struct vm_physseg *ps, int num, paddr_t low, paddr_t high, 285 struct pglist *rlist) 286 { 287 int todo, limit, try; 288 struct vm_page *pg; 289 #ifdef DEBUG 290 int cidx = 0; /* XXX: GCC */ 291 #endif 292 #ifdef PGALLOC_VERBOSE 293 printf("pgalloc: simple %d pgs from psi %ld\n", num, 294 (long)(ps - vm_physmem)); 295 #endif 296 297 KASSERT(mutex_owned(&uvm_fpageqlock)); 298 299 todo = num; 300 limit = min(atop(high), ps->avail_end); 301 302 for (try = max(atop(low), ps->avail_start); 303 try < limit; try ++) { 304 #ifdef DEBUG 305 if (vm_physseg_find(try, &cidx) != ps - vm_physmem) 306 panic("pgalloc simple: botch1"); 307 if (cidx != (try - ps->start)) 308 panic("pgalloc simple: botch2"); 309 #endif 310 pg = &ps->pgs[try - ps->start]; 311 if (VM_PAGE_IS_FREE(pg) == 0) 312 continue; 313 314 uvm_pglist_add(pg, rlist); 315 if (--todo == 0) 316 break; 317 } 318 319 #ifdef PGALLOC_VERBOSE 320 printf("got %d pgs\n", num - todo); 321 #endif 322 return (num - todo); /* number of pages allocated */ 323 } 324 325 static int 326 uvm_pglistalloc_simple(int num, paddr_t low, paddr_t high, 327 struct pglist *rlist, int waitok) 328 { 329 int fl, psi, error; 330 struct vm_physseg *ps; 331 332 /* Default to "lose". */ 333 error = ENOMEM; 334 335 again: 336 /* 337 * Block all memory allocation and lock the free list. 338 */ 339 mutex_spin_enter(&uvm_fpageqlock); 340 341 /* Are there even any free pages? */ 342 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 343 goto out; 344 345 for (fl = 0; fl < VM_NFREELIST; fl++) { 346 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 347 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 348 #else 349 for (psi = 0 ; psi < vm_nphysseg ; psi++) 350 #endif 351 { 352 ps = &vm_physmem[psi]; 353 354 if (ps->free_list != fl) 355 continue; 356 357 num -= uvm_pglistalloc_s_ps(ps, num, low, high, rlist); 358 if (num == 0) { 359 error = 0; 360 goto out; 361 } 362 } 363 364 } 365 366 out: 367 /* 368 * check to see if we need to generate some free pages waking 369 * the pagedaemon. 370 */ 371 372 uvm_kick_pdaemon(); 373 mutex_spin_exit(&uvm_fpageqlock); 374 375 if (error) { 376 if (waitok) { 377 /* XXX perhaps some time limitation? */ 378 #ifdef DEBUG 379 printf("pglistalloc waiting\n"); 380 #endif 381 uvm_wait("pglalloc"); 382 goto again; 383 } else 384 uvm_pglistfree(rlist); 385 } 386 #ifdef PGALLOC_VERBOSE 387 if (!error) 388 printf("pgalloc: %lx..%lx\n", 389 VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 390 VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 391 #endif 392 return (error); 393 } 394 395 int 396 uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment, 397 paddr_t boundary, struct pglist *rlist, int nsegs, int waitok) 398 { 399 int num, res; 400 401 KASSERT((alignment & (alignment - 1)) == 0); 402 KASSERT((boundary & (boundary - 1)) == 0); 403 404 /* 405 * Our allocations are always page granularity, so our alignment 406 * must be, too. 407 */ 408 if (alignment < PAGE_SIZE) 409 alignment = PAGE_SIZE; 410 if (boundary != 0 && boundary < size) 411 return (EINVAL); 412 num = atop(round_page(size)); 413 low = roundup(low, alignment); 414 415 TAILQ_INIT(rlist); 416 417 if ((nsegs < size >> PAGE_SHIFT) || (alignment != PAGE_SIZE) || 418 (boundary != 0)) 419 res = uvm_pglistalloc_contig(num, low, high, alignment, 420 boundary, rlist); 421 else 422 res = uvm_pglistalloc_simple(num, low, high, rlist, waitok); 423 424 return (res); 425 } 426 427 /* 428 * uvm_pglistfree: free a list of pages 429 * 430 * => pages should already be unmapped 431 */ 432 433 void 434 uvm_pglistfree(struct pglist *list) 435 { 436 struct vm_page *pg; 437 438 /* 439 * Lock the free list and free each page. 440 */ 441 442 mutex_spin_enter(&uvm_fpageqlock); 443 while ((pg = TAILQ_FIRST(list)) != NULL) { 444 bool iszero; 445 446 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 447 TAILQ_REMOVE(list, pg, pageq); 448 iszero = (pg->flags & PG_ZERO); 449 pg->pqflags = PQ_FREE; 450 #ifdef DEBUG 451 pg->uobject = (void *)0xdeadbeef; 452 pg->offset = 0xdeadbeef; 453 pg->uanon = (void *)0xdeadbeef; 454 #endif /* DEBUG */ 455 #ifdef DEBUG 456 if (iszero) 457 uvm_pagezerocheck(pg); 458 #endif /* DEBUG */ 459 TAILQ_INSERT_HEAD(&uvm.page_free[uvm_page_lookup_freelist(pg)]. 460 pgfl_buckets[VM_PGCOLOR_BUCKET(pg)]. 461 pgfl_queues[iszero ? PGFL_ZEROS : PGFL_UNKNOWN], pg, pageq); 462 uvmexp.free++; 463 if (iszero) 464 uvmexp.zeropages++; 465 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) 466 uvm.page_idle_zero = vm_page_zero_enable; 467 STAT_DECR(uvm_pglistalloc_npages); 468 } 469 mutex_spin_exit(&uvm_fpageqlock); 470 } 471