1 /* $NetBSD: uvm_pglist.c,v 1.46 2010/06/17 03:13:58 mrg Exp $ */ 2 3 /*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * uvm_pglist.c: pglist functions 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.46 2010/06/17 03:13:58 mrg Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/malloc.h> 43 #include <sys/proc.h> 44 45 #include <uvm/uvm.h> 46 #include <uvm/uvm_pdpolicy.h> 47 48 #ifdef VM_PAGE_ALLOC_MEMORY_STATS 49 #define STAT_INCR(v) (v)++ 50 #define STAT_DECR(v) do { \ 51 if ((v) == 0) \ 52 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \ 53 else \ 54 (v)--; \ 55 } while (/*CONSTCOND*/ 0) 56 u_long uvm_pglistalloc_npages; 57 #else 58 #define STAT_INCR(v) 59 #define STAT_DECR(v) 60 #endif 61 62 /* 63 * uvm_pglistalloc: allocate a list of pages 64 * 65 * => allocated pages are placed onto an rlist. rlist is 66 * initialized by uvm_pglistalloc. 67 * => returns 0 on success or errno on failure 68 * => implementation allocates a single segment if any constraints are 69 * imposed by call arguments. 70 * => doesn't take into account clean non-busy pages on inactive list 71 * that could be used(?) 72 * => params: 73 * size the size of the allocation, rounded to page size. 74 * low the low address of the allowed allocation range. 75 * high the high address of the allowed allocation range. 76 * alignment memory must be aligned to this power-of-two boundary. 77 * boundary no segment in the allocation may cross this 78 * power-of-two boundary (relative to zero). 79 */ 80 81 static void 82 uvm_pglist_add(struct vm_page *pg, struct pglist *rlist) 83 { 84 int free_list, color, pgflidx; 85 #ifdef NOT_DEBUG 86 struct vm_page *tp; 87 #endif 88 89 KASSERT(mutex_owned(&uvm_fpageqlock)); 90 91 #if PGFL_NQUEUES != 2 92 #error uvm_pglistalloc needs to be updated 93 #endif 94 95 free_list = uvm_page_lookup_freelist(pg); 96 color = VM_PGCOLOR_BUCKET(pg); 97 pgflidx = (pg->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN; 98 #ifdef NOT_DEBUG 99 for (tp = LIST_FIRST(&uvm.page_free[ 100 free_list].pgfl_buckets[color].pgfl_queues[pgflidx]); 101 tp != NULL; 102 tp = LIST_NEXT(tp, pageq.list)) { 103 if (tp == pg) 104 break; 105 } 106 if (tp == NULL) 107 panic("uvm_pglistalloc: page not on freelist"); 108 #endif 109 LIST_REMOVE(pg, pageq.list); /* global */ 110 LIST_REMOVE(pg, listq.list); /* cpu */ 111 uvmexp.free--; 112 if (pg->flags & PG_ZERO) 113 uvmexp.zeropages--; 114 VM_FREE_PAGE_TO_CPU(pg)->pages[pgflidx]--; 115 pg->flags = PG_CLEAN; 116 pg->pqflags = 0; 117 pg->uobject = NULL; 118 pg->uanon = NULL; 119 TAILQ_INSERT_TAIL(rlist, pg, pageq.queue); 120 STAT_INCR(uvm_pglistalloc_npages); 121 } 122 123 static int 124 uvm_pglistalloc_c_ps(struct vm_physseg *ps, int num, paddr_t low, paddr_t high, 125 paddr_t alignment, paddr_t boundary, struct pglist *rlist) 126 { 127 int try, limit, tryidx, end, idx; 128 struct vm_page *pgs; 129 int pagemask; 130 #ifdef DEBUG 131 paddr_t idxpa, lastidxpa; 132 int cidx = 0; /* XXX: GCC */ 133 #endif 134 #ifdef PGALLOC_VERBOSE 135 printf("pgalloc: contig %d pgs from psi %ld\n", num, 136 (long)(ps - vm_physmem)); 137 #endif 138 139 KASSERT(mutex_owned(&uvm_fpageqlock)); 140 141 try = roundup(max(atop(low), ps->avail_start), atop(alignment)); 142 limit = min(atop(high), ps->avail_end); 143 pagemask = ~((boundary >> PAGE_SHIFT) - 1); 144 145 for (;;) { 146 if (try + num > limit) { 147 /* 148 * We've run past the allowable range. 149 */ 150 return (0); /* FAIL */ 151 } 152 if (boundary != 0 && 153 ((try ^ (try + num - 1)) & pagemask) != 0) { 154 /* 155 * Region crosses boundary. Jump to the boundary 156 * just crossed and ensure alignment. 157 */ 158 try = (try + num - 1) & pagemask; 159 try = roundup(try, atop(alignment)); 160 continue; 161 } 162 #ifdef DEBUG 163 /* 164 * Make sure this is a managed physical page. 165 */ 166 167 if (vm_physseg_find(try, &cidx) != ps - vm_physmem) 168 panic("pgalloc contig: botch1"); 169 if (cidx != try - ps->start) 170 panic("pgalloc contig: botch2"); 171 if (vm_physseg_find(try + num - 1, &cidx) != ps - vm_physmem) 172 panic("pgalloc contig: botch3"); 173 if (cidx != try - ps->start + num - 1) 174 panic("pgalloc contig: botch4"); 175 #endif 176 tryidx = try - ps->start; 177 end = tryidx + num; 178 pgs = ps->pgs; 179 180 /* 181 * Found a suitable starting page. See if the range is free. 182 */ 183 for (idx = tryidx; idx < end; idx++) { 184 if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) 185 break; 186 187 #ifdef DEBUG 188 idxpa = VM_PAGE_TO_PHYS(&pgs[idx]); 189 if (idx > tryidx) { 190 lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]); 191 if ((lastidxpa + PAGE_SIZE) != idxpa) { 192 /* 193 * Region not contiguous. 194 */ 195 panic("pgalloc contig: botch5"); 196 } 197 if (boundary != 0 && 198 ((lastidxpa ^ idxpa) & ~(boundary - 1)) 199 != 0) { 200 /* 201 * Region crosses boundary. 202 */ 203 panic("pgalloc contig: botch6"); 204 } 205 } 206 #endif 207 } 208 if (idx == end) 209 break; 210 211 try += atop(alignment); 212 } 213 214 /* 215 * we have a chunk of memory that conforms to the requested constraints. 216 */ 217 idx = tryidx; 218 while (idx < end) 219 uvm_pglist_add(&pgs[idx++], rlist); 220 221 #ifdef PGALLOC_VERBOSE 222 printf("got %d pgs\n", num); 223 #endif 224 return (num); /* number of pages allocated */ 225 } 226 227 static int 228 uvm_pglistalloc_contig(int num, paddr_t low, paddr_t high, paddr_t alignment, 229 paddr_t boundary, struct pglist *rlist) 230 { 231 int fl, psi; 232 struct vm_physseg *ps; 233 int error; 234 235 /* Default to "lose". */ 236 error = ENOMEM; 237 238 /* 239 * Block all memory allocation and lock the free list. 240 */ 241 mutex_spin_enter(&uvm_fpageqlock); 242 243 /* Are there even any free pages? */ 244 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 245 goto out; 246 247 for (fl = 0; fl < VM_NFREELIST; fl++) { 248 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 249 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 250 #else 251 for (psi = 0 ; psi < vm_nphysseg ; psi++) 252 #endif 253 { 254 ps = &vm_physmem[psi]; 255 256 if (ps->free_list != fl) 257 continue; 258 259 num -= uvm_pglistalloc_c_ps(ps, num, low, high, 260 alignment, boundary, rlist); 261 if (num == 0) { 262 #ifdef PGALLOC_VERBOSE 263 printf("pgalloc: %"PRIxMAX"-%"PRIxMAX"\n", 264 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 265 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 266 #endif 267 error = 0; 268 goto out; 269 } 270 } 271 } 272 273 out: 274 /* 275 * check to see if we need to generate some free pages waking 276 * the pagedaemon. 277 */ 278 279 uvm_kick_pdaemon(); 280 mutex_spin_exit(&uvm_fpageqlock); 281 return (error); 282 } 283 284 static int 285 uvm_pglistalloc_s_ps(struct vm_physseg *ps, int num, paddr_t low, paddr_t high, 286 struct pglist *rlist) 287 { 288 int todo, limit, try; 289 struct vm_page *pg; 290 #ifdef DEBUG 291 int cidx = 0; /* XXX: GCC */ 292 #endif 293 #ifdef PGALLOC_VERBOSE 294 printf("pgalloc: simple %d pgs from psi %ld\n", num, 295 (long)(ps - vm_physmem)); 296 #endif 297 298 KASSERT(mutex_owned(&uvm_fpageqlock)); 299 300 todo = num; 301 limit = min(atop(high), ps->avail_end); 302 303 for (try = max(atop(low), ps->avail_start); 304 try < limit; try ++) { 305 #ifdef DEBUG 306 if (vm_physseg_find(try, &cidx) != ps - vm_physmem) 307 panic("pgalloc simple: botch1"); 308 if (cidx != (try - ps->start)) 309 panic("pgalloc simple: botch2"); 310 #endif 311 pg = &ps->pgs[try - ps->start]; 312 if (VM_PAGE_IS_FREE(pg) == 0) 313 continue; 314 315 uvm_pglist_add(pg, rlist); 316 if (--todo == 0) 317 break; 318 } 319 320 #ifdef PGALLOC_VERBOSE 321 printf("got %d pgs\n", num - todo); 322 #endif 323 return (num - todo); /* number of pages allocated */ 324 } 325 326 static int 327 uvm_pglistalloc_simple(int num, paddr_t low, paddr_t high, 328 struct pglist *rlist, int waitok) 329 { 330 int fl, psi, error; 331 struct vm_physseg *ps; 332 333 /* Default to "lose". */ 334 error = ENOMEM; 335 336 again: 337 /* 338 * Block all memory allocation and lock the free list. 339 */ 340 mutex_spin_enter(&uvm_fpageqlock); 341 342 /* Are there even any free pages? */ 343 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 344 goto out; 345 346 for (fl = 0; fl < VM_NFREELIST; fl++) { 347 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 348 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 349 #else 350 for (psi = 0 ; psi < vm_nphysseg ; psi++) 351 #endif 352 { 353 ps = &vm_physmem[psi]; 354 355 if (ps->free_list != fl) 356 continue; 357 358 num -= uvm_pglistalloc_s_ps(ps, num, low, high, rlist); 359 if (num == 0) { 360 error = 0; 361 goto out; 362 } 363 } 364 365 } 366 367 out: 368 /* 369 * check to see if we need to generate some free pages waking 370 * the pagedaemon. 371 */ 372 373 uvm_kick_pdaemon(); 374 mutex_spin_exit(&uvm_fpageqlock); 375 376 if (error) { 377 if (waitok) { 378 /* XXX perhaps some time limitation? */ 379 #ifdef DEBUG 380 printf("pglistalloc waiting\n"); 381 #endif 382 uvm_wait("pglalloc"); 383 goto again; 384 } else 385 uvm_pglistfree(rlist); 386 } 387 #ifdef PGALLOC_VERBOSE 388 if (!error) 389 printf("pgalloc: %"PRIxMAX"..%"PRIxMAX"\n", 390 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 391 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 392 #endif 393 return (error); 394 } 395 396 int 397 uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment, 398 paddr_t boundary, struct pglist *rlist, int nsegs, int waitok) 399 { 400 int num, res; 401 402 KASSERT((alignment & (alignment - 1)) == 0); 403 KASSERT((boundary & (boundary - 1)) == 0); 404 405 /* 406 * Our allocations are always page granularity, so our alignment 407 * must be, too. 408 */ 409 if (alignment < PAGE_SIZE) 410 alignment = PAGE_SIZE; 411 if (boundary != 0 && boundary < size) 412 return (EINVAL); 413 num = atop(round_page(size)); 414 low = roundup(low, alignment); 415 416 TAILQ_INIT(rlist); 417 418 if ((nsegs < size >> PAGE_SHIFT) || (alignment != PAGE_SIZE) || 419 (boundary != 0)) 420 res = uvm_pglistalloc_contig(num, low, high, alignment, 421 boundary, rlist); 422 else 423 res = uvm_pglistalloc_simple(num, low, high, rlist, waitok); 424 425 return (res); 426 } 427 428 /* 429 * uvm_pglistfree: free a list of pages 430 * 431 * => pages should already be unmapped 432 */ 433 434 void 435 uvm_pglistfree(struct pglist *list) 436 { 437 struct uvm_cpu *ucpu; 438 struct vm_page *pg; 439 int index, color, queue; 440 bool iszero; 441 442 /* 443 * Lock the free list and free each page. 444 */ 445 446 mutex_spin_enter(&uvm_fpageqlock); 447 ucpu = curcpu()->ci_data.cpu_uvm; 448 while ((pg = TAILQ_FIRST(list)) != NULL) { 449 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 450 TAILQ_REMOVE(list, pg, pageq.queue); 451 iszero = (pg->flags & PG_ZERO); 452 pg->pqflags = PQ_FREE; 453 #ifdef DEBUG 454 pg->uobject = (void *)0xdeadbeef; 455 pg->uanon = (void *)0xdeadbeef; 456 #endif /* DEBUG */ 457 #ifdef DEBUG 458 if (iszero) 459 uvm_pagezerocheck(pg); 460 #endif /* DEBUG */ 461 index = uvm_page_lookup_freelist(pg); 462 color = VM_PGCOLOR_BUCKET(pg); 463 queue = iszero ? PGFL_ZEROS : PGFL_UNKNOWN; 464 pg->offset = (uintptr_t)ucpu; 465 LIST_INSERT_HEAD(&uvm.page_free[index].pgfl_buckets[color]. 466 pgfl_queues[queue], pg, pageq.list); 467 LIST_INSERT_HEAD(&ucpu->page_free[index].pgfl_buckets[color]. 468 pgfl_queues[queue], pg, listq.list); 469 uvmexp.free++; 470 if (iszero) 471 uvmexp.zeropages++; 472 ucpu->pages[queue]++; 473 STAT_DECR(uvm_pglistalloc_npages); 474 } 475 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) 476 ucpu->page_idle_zero = vm_page_zero_enable; 477 mutex_spin_exit(&uvm_fpageqlock); 478 } 479