1 /* $NetBSD: uvm_pglist.c,v 1.36 2006/09/15 15:51:13 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * uvm_pglist.c: pglist functions 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.36 2006/09/15 15:51:13 yamt Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/malloc.h> 50 #include <sys/proc.h> 51 52 #include <uvm/uvm.h> 53 #include <uvm/uvm_pdpolicy.h> 54 55 #ifdef VM_PAGE_ALLOC_MEMORY_STATS 56 #define STAT_INCR(v) (v)++ 57 #define STAT_DECR(v) do { \ 58 if ((v) == 0) \ 59 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \ 60 else \ 61 (v)--; \ 62 } while (/*CONSTCOND*/ 0) 63 u_long uvm_pglistalloc_npages; 64 #else 65 #define STAT_INCR(v) 66 #define STAT_DECR(v) 67 #endif 68 69 /* 70 * uvm_pglistalloc: allocate a list of pages 71 * 72 * => allocated pages are placed onto an rlist. rlist is 73 * initialized by uvm_pglistalloc. 74 * => returns 0 on success or errno on failure 75 * => implementation allocates a single segment if any constraints are 76 * imposed by call arguments. 77 * => doesn't take into account clean non-busy pages on inactive list 78 * that could be used(?) 79 * => params: 80 * size the size of the allocation, rounded to page size. 81 * low the low address of the allowed allocation range. 82 * high the high address of the allowed allocation range. 83 * alignment memory must be aligned to this power-of-two boundary. 84 * boundary no segment in the allocation may cross this 85 * power-of-two boundary (relative to zero). 86 */ 87 88 static void 89 uvm_pglist_add(struct vm_page *pg, struct pglist *rlist) 90 { 91 int free_list, color, pgflidx; 92 #ifdef DEBUG 93 struct vm_page *tp; 94 #endif 95 96 #if PGFL_NQUEUES != 2 97 #error uvm_pglistalloc needs to be updated 98 #endif 99 100 free_list = uvm_page_lookup_freelist(pg); 101 color = VM_PGCOLOR_BUCKET(pg); 102 pgflidx = (pg->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN; 103 #ifdef DEBUG 104 for (tp = TAILQ_FIRST(&uvm.page_free[ 105 free_list].pgfl_buckets[color].pgfl_queues[pgflidx]); 106 tp != NULL; 107 tp = TAILQ_NEXT(tp, pageq)) { 108 if (tp == pg) 109 break; 110 } 111 if (tp == NULL) 112 panic("uvm_pglistalloc: page not on freelist"); 113 #endif 114 TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_buckets[ 115 color].pgfl_queues[pgflidx], pg, pageq); 116 uvmexp.free--; 117 if (pg->flags & PG_ZERO) 118 uvmexp.zeropages--; 119 pg->flags = PG_CLEAN; 120 pg->pqflags = 0; 121 pg->uobject = NULL; 122 pg->uanon = NULL; 123 TAILQ_INSERT_TAIL(rlist, pg, pageq); 124 STAT_INCR(uvm_pglistalloc_npages); 125 } 126 127 static int 128 uvm_pglistalloc_c_ps(struct vm_physseg *ps, int num, paddr_t low, paddr_t high, 129 paddr_t alignment, paddr_t boundary, struct pglist *rlist) 130 { 131 int try, limit, tryidx, end, idx; 132 struct vm_page *pgs; 133 int pagemask; 134 #ifdef DEBUG 135 paddr_t idxpa, lastidxpa; 136 int cidx = 0; /* XXX: GCC */ 137 #endif 138 #ifdef PGALLOC_VERBOSE 139 printf("pgalloc: contig %d pgs from psi %ld\n", num, 140 (long)(ps - vm_physmem)); 141 #endif 142 143 try = roundup(max(atop(low), ps->avail_start), atop(alignment)); 144 limit = min(atop(high), ps->avail_end); 145 pagemask = ~((boundary >> PAGE_SHIFT) - 1); 146 147 for (;;) { 148 if (try + num > limit) { 149 /* 150 * We've run past the allowable range. 151 */ 152 return (0); /* FAIL */ 153 } 154 if (boundary != 0 && 155 ((try ^ (try + num - 1)) & pagemask) != 0) { 156 /* 157 * Region crosses boundary. Jump to the boundary 158 * just crossed and ensure alignment. 159 */ 160 try = (try + num - 1) & pagemask; 161 try = roundup(try, atop(alignment)); 162 continue; 163 } 164 #ifdef DEBUG 165 /* 166 * Make sure this is a managed physical page. 167 */ 168 169 if (vm_physseg_find(try, &cidx) != ps - vm_physmem) 170 panic("pgalloc contig: botch1"); 171 if (cidx != try - ps->start) 172 panic("pgalloc contig: botch2"); 173 if (vm_physseg_find(try + num - 1, &cidx) != ps - vm_physmem) 174 panic("pgalloc contig: botch3"); 175 if (cidx != try - ps->start + num - 1) 176 panic("pgalloc contig: botch4"); 177 #endif 178 tryidx = try - ps->start; 179 end = tryidx + num; 180 pgs = ps->pgs; 181 182 /* 183 * Found a suitable starting page. See if the range is free. 184 */ 185 for (idx = tryidx; idx < end; idx++) { 186 if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) 187 break; 188 189 #ifdef DEBUG 190 idxpa = VM_PAGE_TO_PHYS(&pgs[idx]); 191 if (idx > tryidx) { 192 lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]); 193 if ((lastidxpa + PAGE_SIZE) != idxpa) { 194 /* 195 * Region not contiguous. 196 */ 197 panic("pgalloc contig: botch5"); 198 } 199 if (boundary != 0 && 200 ((lastidxpa ^ idxpa) & ~(boundary - 1)) 201 != 0) { 202 /* 203 * Region crosses boundary. 204 */ 205 panic("pgalloc contig: botch6"); 206 } 207 } 208 #endif 209 } 210 if (idx == end) 211 break; 212 213 try += atop(alignment); 214 } 215 216 /* 217 * we have a chunk of memory that conforms to the requested constraints. 218 */ 219 idx = tryidx; 220 while (idx < end) 221 uvm_pglist_add(&pgs[idx++], rlist); 222 223 #ifdef PGALLOC_VERBOSE 224 printf("got %d pgs\n", num); 225 #endif 226 return (num); /* number of pages allocated */ 227 } 228 229 static int 230 uvm_pglistalloc_contig(int num, paddr_t low, paddr_t high, paddr_t alignment, 231 paddr_t boundary, struct pglist *rlist) 232 { 233 int fl, psi; 234 struct vm_physseg *ps; 235 int s, error; 236 237 /* Default to "lose". */ 238 error = ENOMEM; 239 240 /* 241 * Block all memory allocation and lock the free list. 242 */ 243 s = uvm_lock_fpageq(); 244 245 /* Are there even any free pages? */ 246 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 247 goto out; 248 249 for (fl = 0; fl < VM_NFREELIST; fl++) { 250 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 251 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 252 #else 253 for (psi = 0 ; psi < vm_nphysseg ; psi++) 254 #endif 255 { 256 ps = &vm_physmem[psi]; 257 258 if (ps->free_list != fl) 259 continue; 260 261 num -= uvm_pglistalloc_c_ps(ps, num, low, high, 262 alignment, boundary, rlist); 263 if (num == 0) { 264 #ifdef PGALLOC_VERBOSE 265 printf("pgalloc: %lx-%lx\n", 266 VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 267 VM_PAGE_TO_PHYS(TAILQ_LAST(rlist))); 268 #endif 269 error = 0; 270 goto out; 271 } 272 } 273 } 274 275 out: 276 /* 277 * check to see if we need to generate some free pages waking 278 * the pagedaemon. 279 */ 280 281 uvm_kick_pdaemon(); 282 uvm_unlock_fpageq(s); 283 return (error); 284 } 285 286 static int 287 uvm_pglistalloc_s_ps(struct vm_physseg *ps, int num, paddr_t low, paddr_t high, 288 struct pglist *rlist) 289 { 290 int todo, limit, try; 291 struct vm_page *pg; 292 #ifdef DEBUG 293 int cidx = 0; /* XXX: GCC */ 294 #endif 295 #ifdef PGALLOC_VERBOSE 296 printf("pgalloc: simple %d pgs from psi %ld\n", num, 297 (long)(ps - vm_physmem)); 298 #endif 299 300 todo = num; 301 limit = min(atop(high), ps->avail_end); 302 303 for (try = max(atop(low), ps->avail_start); 304 try < limit; try ++) { 305 #ifdef DEBUG 306 if (vm_physseg_find(try, &cidx) != ps - vm_physmem) 307 panic("pgalloc simple: botch1"); 308 if (cidx != (try - ps->start)) 309 panic("pgalloc simple: botch2"); 310 #endif 311 pg = &ps->pgs[try - ps->start]; 312 if (VM_PAGE_IS_FREE(pg) == 0) 313 continue; 314 315 uvm_pglist_add(pg, rlist); 316 if (--todo == 0) 317 break; 318 } 319 320 #ifdef PGALLOC_VERBOSE 321 printf("got %d pgs\n", num - todo); 322 #endif 323 return (num - todo); /* number of pages allocated */ 324 } 325 326 static int 327 uvm_pglistalloc_simple(int num, paddr_t low, paddr_t high, 328 struct pglist *rlist, int waitok) 329 { 330 int fl, psi, s, error; 331 struct vm_physseg *ps; 332 333 /* Default to "lose". */ 334 error = ENOMEM; 335 336 again: 337 /* 338 * Block all memory allocation and lock the free list. 339 */ 340 s = uvm_lock_fpageq(); 341 342 /* Are there even any free pages? */ 343 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 344 goto out; 345 346 for (fl = 0; fl < VM_NFREELIST; fl++) { 347 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 348 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 349 #else 350 for (psi = 0 ; psi < vm_nphysseg ; psi++) 351 #endif 352 { 353 ps = &vm_physmem[psi]; 354 355 if (ps->free_list != fl) 356 continue; 357 358 num -= uvm_pglistalloc_s_ps(ps, num, low, high, rlist); 359 if (num == 0) { 360 error = 0; 361 goto out; 362 } 363 } 364 365 } 366 367 out: 368 /* 369 * check to see if we need to generate some free pages waking 370 * the pagedaemon. 371 */ 372 373 uvm_kick_pdaemon(); 374 uvm_unlock_fpageq(s); 375 if (error) { 376 if (waitok) { 377 /* XXX perhaps some time limitation? */ 378 #ifdef DEBUG 379 printf("pglistalloc waiting\n"); 380 #endif 381 uvm_wait("pglalloc"); 382 goto again; 383 } else 384 uvm_pglistfree(rlist); 385 } 386 #ifdef PGALLOC_VERBOSE 387 if (!error) 388 printf("pgalloc: %lx..%lx\n", 389 VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 390 VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 391 #endif 392 return (error); 393 } 394 395 int 396 uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment, 397 paddr_t boundary, struct pglist *rlist, int nsegs, int waitok) 398 { 399 int num, res; 400 401 KASSERT((alignment & (alignment - 1)) == 0); 402 KASSERT((boundary & (boundary - 1)) == 0); 403 404 /* 405 * Our allocations are always page granularity, so our alignment 406 * must be, too. 407 */ 408 if (alignment < PAGE_SIZE) 409 alignment = PAGE_SIZE; 410 if (boundary != 0 && boundary < size) 411 return (EINVAL); 412 num = atop(round_page(size)); 413 low = roundup(low, alignment); 414 415 TAILQ_INIT(rlist); 416 417 if ((nsegs < size >> PAGE_SHIFT) || (alignment != PAGE_SIZE) || 418 (boundary != 0)) 419 res = uvm_pglistalloc_contig(num, low, high, alignment, 420 boundary, rlist); 421 else 422 res = uvm_pglistalloc_simple(num, low, high, rlist, waitok); 423 424 return (res); 425 } 426 427 /* 428 * uvm_pglistfree: free a list of pages 429 * 430 * => pages should already be unmapped 431 */ 432 433 void 434 uvm_pglistfree(struct pglist *list) 435 { 436 struct vm_page *pg; 437 int s; 438 439 /* 440 * Lock the free list and free each page. 441 */ 442 443 s = uvm_lock_fpageq(); 444 while ((pg = TAILQ_FIRST(list)) != NULL) { 445 boolean_t iszero; 446 447 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 448 TAILQ_REMOVE(list, pg, pageq); 449 iszero = (pg->flags & PG_ZERO); 450 pg->pqflags = PQ_FREE; 451 #ifdef DEBUG 452 pg->uobject = (void *)0xdeadbeef; 453 pg->offset = 0xdeadbeef; 454 pg->uanon = (void *)0xdeadbeef; 455 #endif /* DEBUG */ 456 #ifdef DEBUG 457 if (iszero) 458 uvm_pagezerocheck(pg); 459 #endif /* DEBUG */ 460 TAILQ_INSERT_HEAD(&uvm.page_free[uvm_page_lookup_freelist(pg)]. 461 pgfl_buckets[VM_PGCOLOR_BUCKET(pg)]. 462 pgfl_queues[iszero ? PGFL_ZEROS : PGFL_UNKNOWN], pg, pageq); 463 uvmexp.free++; 464 if (iszero) 465 uvmexp.zeropages++; 466 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) 467 uvm.page_idle_zero = vm_page_zero_enable; 468 STAT_DECR(uvm_pglistalloc_npages); 469 } 470 uvm_unlock_fpageq(s); 471 } 472