1 /* $NetBSD: uvm_pglist.c,v 1.34 2005/12/11 12:25:29 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * uvm_pglist.c: pglist functions 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.34 2005/12/11 12:25:29 christos Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/malloc.h> 50 #include <sys/proc.h> 51 52 #include <uvm/uvm.h> 53 54 #ifdef VM_PAGE_ALLOC_MEMORY_STATS 55 #define STAT_INCR(v) (v)++ 56 #define STAT_DECR(v) do { \ 57 if ((v) == 0) \ 58 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \ 59 else \ 60 (v)--; \ 61 } while (/*CONSTCOND*/ 0) 62 u_long uvm_pglistalloc_npages; 63 #else 64 #define STAT_INCR(v) 65 #define STAT_DECR(v) 66 #endif 67 68 /* 69 * uvm_pglistalloc: allocate a list of pages 70 * 71 * => allocated pages are placed onto an rlist. rlist is 72 * initialized by uvm_pglistalloc. 73 * => returns 0 on success or errno on failure 74 * => implementation allocates a single segment if any constraints are 75 * imposed by call arguments. 76 * => doesn't take into account clean non-busy pages on inactive list 77 * that could be used(?) 78 * => params: 79 * size the size of the allocation, rounded to page size. 80 * low the low address of the allowed allocation range. 81 * high the high address of the allowed allocation range. 82 * alignment memory must be aligned to this power-of-two boundary. 83 * boundary no segment in the allocation may cross this 84 * power-of-two boundary (relative to zero). 85 */ 86 87 static void 88 uvm_pglist_add(struct vm_page *pg, struct pglist *rlist) 89 { 90 int free_list, color, pgflidx; 91 #ifdef DEBUG 92 struct vm_page *tp; 93 #endif 94 95 #if PGFL_NQUEUES != 2 96 #error uvm_pglistalloc needs to be updated 97 #endif 98 99 free_list = uvm_page_lookup_freelist(pg); 100 color = VM_PGCOLOR_BUCKET(pg); 101 pgflidx = (pg->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN; 102 #ifdef DEBUG 103 for (tp = TAILQ_FIRST(&uvm.page_free[ 104 free_list].pgfl_buckets[color].pgfl_queues[pgflidx]); 105 tp != NULL; 106 tp = TAILQ_NEXT(tp, pageq)) { 107 if (tp == pg) 108 break; 109 } 110 if (tp == NULL) 111 panic("uvm_pglistalloc: page not on freelist"); 112 #endif 113 TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_buckets[ 114 color].pgfl_queues[pgflidx], pg, pageq); 115 uvmexp.free--; 116 if (pg->flags & PG_ZERO) 117 uvmexp.zeropages--; 118 pg->flags = PG_CLEAN; 119 pg->pqflags = 0; 120 pg->uobject = NULL; 121 pg->uanon = NULL; 122 TAILQ_INSERT_TAIL(rlist, pg, pageq); 123 STAT_INCR(uvm_pglistalloc_npages); 124 } 125 126 static int 127 uvm_pglistalloc_c_ps(struct vm_physseg *ps, int num, paddr_t low, paddr_t high, 128 paddr_t alignment, paddr_t boundary, struct pglist *rlist) 129 { 130 int try, limit, tryidx, end, idx; 131 struct vm_page *pgs; 132 int pagemask; 133 #ifdef DEBUG 134 paddr_t idxpa, lastidxpa; 135 int cidx; 136 #endif 137 #ifdef PGALLOC_VERBOSE 138 printf("pgalloc: contig %d pgs from psi %ld\n", num, 139 (long)(ps - vm_physmem)); 140 #endif 141 142 try = roundup(max(atop(low), ps->avail_start), atop(alignment)); 143 limit = min(atop(high), ps->avail_end); 144 pagemask = ~((boundary >> PAGE_SHIFT) - 1); 145 146 for (;;) { 147 if (try + num > limit) { 148 /* 149 * We've run past the allowable range. 150 */ 151 return (0); /* FAIL */ 152 } 153 if (boundary != 0 && 154 ((try ^ (try + num - 1)) & pagemask) != 0) { 155 /* 156 * Region crosses boundary. Jump to the boundary 157 * just crossed and ensure alignment. 158 */ 159 try = (try + num - 1) & pagemask; 160 try = roundup(try, atop(alignment)); 161 continue; 162 } 163 #ifdef DEBUG 164 /* 165 * Make sure this is a managed physical page. 166 */ 167 168 if (vm_physseg_find(try, &cidx) != ps - vm_physmem) 169 panic("pgalloc contig: botch1"); 170 if (cidx != try - ps->start) 171 panic("pgalloc contig: botch2"); 172 if (vm_physseg_find(try + num - 1, &cidx) != ps - vm_physmem) 173 panic("pgalloc contig: botch3"); 174 if (cidx != try - ps->start + num - 1) 175 panic("pgalloc contig: botch4"); 176 #endif 177 tryidx = try - ps->start; 178 end = tryidx + num; 179 pgs = ps->pgs; 180 181 /* 182 * Found a suitable starting page. See if the range is free. 183 */ 184 for (idx = tryidx; idx < end; idx++) { 185 if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) 186 break; 187 188 #ifdef DEBUG 189 idxpa = VM_PAGE_TO_PHYS(&pgs[idx]); 190 if (idx > tryidx) { 191 lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]); 192 if ((lastidxpa + PAGE_SIZE) != idxpa) { 193 /* 194 * Region not contiguous. 195 */ 196 panic("pgalloc contig: botch5"); 197 } 198 if (boundary != 0 && 199 ((lastidxpa ^ idxpa) & ~(boundary - 1)) 200 != 0) { 201 /* 202 * Region crosses boundary. 203 */ 204 panic("pgalloc contig: botch6"); 205 } 206 } 207 #endif 208 } 209 if (idx == end) 210 break; 211 212 try += atop(alignment); 213 } 214 215 /* 216 * we have a chunk of memory that conforms to the requested constraints. 217 */ 218 idx = tryidx; 219 while (idx < end) 220 uvm_pglist_add(&pgs[idx++], rlist); 221 222 #ifdef PGALLOC_VERBOSE 223 printf("got %d pgs\n", num); 224 #endif 225 return (num); /* number of pages allocated */ 226 } 227 228 static int 229 uvm_pglistalloc_contig(int num, paddr_t low, paddr_t high, paddr_t alignment, 230 paddr_t boundary, struct pglist *rlist) 231 { 232 int fl, psi; 233 struct vm_physseg *ps; 234 int s, error; 235 236 /* Default to "lose". */ 237 error = ENOMEM; 238 239 /* 240 * Block all memory allocation and lock the free list. 241 */ 242 s = uvm_lock_fpageq(); 243 244 /* Are there even any free pages? */ 245 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 246 goto out; 247 248 for (fl = 0; fl < VM_NFREELIST; fl++) { 249 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 250 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 251 #else 252 for (psi = 0 ; psi < vm_nphysseg ; psi++) 253 #endif 254 { 255 ps = &vm_physmem[psi]; 256 257 if (ps->free_list != fl) 258 continue; 259 260 num -= uvm_pglistalloc_c_ps(ps, num, low, high, 261 alignment, boundary, rlist); 262 if (num == 0) { 263 #ifdef PGALLOC_VERBOSE 264 printf("pgalloc: %lx-%lx\n", 265 VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 266 VM_PAGE_TO_PHYS(TAILQ_LAST(rlist))); 267 #endif 268 error = 0; 269 goto out; 270 } 271 } 272 } 273 274 out: 275 /* 276 * check to see if we need to generate some free pages waking 277 * the pagedaemon. 278 */ 279 280 UVM_KICK_PDAEMON(); 281 uvm_unlock_fpageq(s); 282 return (error); 283 } 284 285 static int 286 uvm_pglistalloc_s_ps(struct vm_physseg *ps, int num, paddr_t low, paddr_t high, 287 struct pglist *rlist) 288 { 289 int todo, limit, try; 290 struct vm_page *pg; 291 #ifdef DEBUG 292 int cidx; 293 #endif 294 #ifdef PGALLOC_VERBOSE 295 printf("pgalloc: simple %d pgs from psi %ld\n", num, 296 (long)(ps - vm_physmem)); 297 #endif 298 299 todo = num; 300 limit = min(atop(high), ps->avail_end); 301 302 for (try = max(atop(low), ps->avail_start); 303 try < limit; try ++) { 304 #ifdef DEBUG 305 if (vm_physseg_find(try, &cidx) != ps - vm_physmem) 306 panic("pgalloc simple: botch1"); 307 if (cidx != (try - ps->start)) 308 panic("pgalloc simple: botch2"); 309 #endif 310 pg = &ps->pgs[try - ps->start]; 311 if (VM_PAGE_IS_FREE(pg) == 0) 312 continue; 313 314 uvm_pglist_add(pg, rlist); 315 if (--todo == 0) 316 break; 317 } 318 319 #ifdef PGALLOC_VERBOSE 320 printf("got %d pgs\n", num - todo); 321 #endif 322 return (num - todo); /* number of pages allocated */ 323 } 324 325 static int 326 uvm_pglistalloc_simple(int num, paddr_t low, paddr_t high, 327 struct pglist *rlist, int waitok) 328 { 329 int fl, psi, s, error; 330 struct vm_physseg *ps; 331 332 /* Default to "lose". */ 333 error = ENOMEM; 334 335 again: 336 /* 337 * Block all memory allocation and lock the free list. 338 */ 339 s = uvm_lock_fpageq(); 340 341 /* Are there even any free pages? */ 342 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 343 goto out; 344 345 for (fl = 0; fl < VM_NFREELIST; fl++) { 346 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 347 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 348 #else 349 for (psi = 0 ; psi < vm_nphysseg ; psi++) 350 #endif 351 { 352 ps = &vm_physmem[psi]; 353 354 if (ps->free_list != fl) 355 continue; 356 357 num -= uvm_pglistalloc_s_ps(ps, num, low, high, rlist); 358 if (num == 0) { 359 error = 0; 360 goto out; 361 } 362 } 363 364 } 365 366 out: 367 /* 368 * check to see if we need to generate some free pages waking 369 * the pagedaemon. 370 */ 371 372 UVM_KICK_PDAEMON(); 373 uvm_unlock_fpageq(s); 374 if (error) { 375 if (waitok) { 376 /* XXX perhaps some time limitation? */ 377 #ifdef DEBUG 378 printf("pglistalloc waiting\n"); 379 #endif 380 uvm_wait("pglalloc"); 381 goto again; 382 } else 383 uvm_pglistfree(rlist); 384 } 385 #ifdef PGALLOC_VERBOSE 386 if (!error) 387 printf("pgalloc: %lx..%lx\n", 388 VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 389 VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 390 #endif 391 return (error); 392 } 393 394 int 395 uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment, 396 paddr_t boundary, struct pglist *rlist, int nsegs, int waitok) 397 { 398 int num, res; 399 400 KASSERT((alignment & (alignment - 1)) == 0); 401 KASSERT((boundary & (boundary - 1)) == 0); 402 403 /* 404 * Our allocations are always page granularity, so our alignment 405 * must be, too. 406 */ 407 if (alignment < PAGE_SIZE) 408 alignment = PAGE_SIZE; 409 if (boundary != 0 && boundary < size) 410 return (EINVAL); 411 num = atop(round_page(size)); 412 low = roundup(low, alignment); 413 414 TAILQ_INIT(rlist); 415 416 if ((nsegs < size >> PAGE_SHIFT) || (alignment != PAGE_SIZE) || 417 (boundary != 0)) 418 res = uvm_pglistalloc_contig(num, low, high, alignment, 419 boundary, rlist); 420 else 421 res = uvm_pglistalloc_simple(num, low, high, rlist, waitok); 422 423 return (res); 424 } 425 426 /* 427 * uvm_pglistfree: free a list of pages 428 * 429 * => pages should already be unmapped 430 */ 431 432 void 433 uvm_pglistfree(struct pglist *list) 434 { 435 struct vm_page *pg; 436 int s; 437 438 /* 439 * Lock the free list and free each page. 440 */ 441 442 s = uvm_lock_fpageq(); 443 while ((pg = TAILQ_FIRST(list)) != NULL) { 444 boolean_t iszero; 445 446 KASSERT((pg->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) == 0); 447 TAILQ_REMOVE(list, pg, pageq); 448 iszero = (pg->flags & PG_ZERO); 449 pg->pqflags = PQ_FREE; 450 #ifdef DEBUG 451 pg->uobject = (void *)0xdeadbeef; 452 pg->offset = 0xdeadbeef; 453 pg->uanon = (void *)0xdeadbeef; 454 #endif /* DEBUG */ 455 #ifdef DEBUG 456 if (iszero) 457 uvm_pagezerocheck(pg); 458 #endif /* DEBUG */ 459 TAILQ_INSERT_HEAD(&uvm.page_free[uvm_page_lookup_freelist(pg)]. 460 pgfl_buckets[VM_PGCOLOR_BUCKET(pg)]. 461 pgfl_queues[iszero ? PGFL_ZEROS : PGFL_UNKNOWN], pg, pageq); 462 uvmexp.free++; 463 if (iszero) 464 uvmexp.zeropages++; 465 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) 466 uvm.page_idle_zero = vm_page_zero_enable; 467 STAT_DECR(uvm_pglistalloc_npages); 468 } 469 uvm_unlock_fpageq(s); 470 } 471