1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 /*
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
33 *
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
37 */
38
39 /*
40 * VM - segment management.
41 */
42
43 #include <sys/types.h>
44 #include <sys/inttypes.h>
45 #include <sys/t_lock.h>
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/kmem.h>
49 #include <sys/sysmacros.h>
50 #include <sys/vmsystm.h>
51 #include <sys/tuneable.h>
52 #include <sys/debug.h>
53 #include <sys/fs/swapnode.h>
54 #include <sys/cmn_err.h>
55 #include <sys/callb.h>
56 #include <sys/mem_config.h>
57 #include <sys/mman.h>
58
59 #include <vm/hat.h>
60 #include <vm/as.h>
61 #include <vm/seg.h>
62 #include <vm/seg_kmem.h>
63 #include <vm/seg_spt.h>
64 #include <vm/seg_vn.h>
65 #include <vm/anon.h>
66
67 /*
68 * kstats for segment advise
69 */
70 segadvstat_t segadvstat = {
71 { "MADV_FREE_hit", KSTAT_DATA_ULONG },
72 { "MADV_FREE_miss", KSTAT_DATA_ULONG },
73 };
74
75 kstat_named_t *segadvstat_ptr = (kstat_named_t *)&segadvstat;
76 uint_t segadvstat_ndata = sizeof (segadvstat) / sizeof (kstat_named_t);
77
78 /*
79 * entry in the segment page cache
80 */
81 struct seg_pcache {
82 struct seg_pcache *p_hnext; /* list for hashed blocks */
83 struct seg_pcache *p_hprev;
84 pcache_link_t p_plink; /* per segment/amp list */
85 void *p_htag0; /* segment/amp pointer */
86 caddr_t p_addr; /* base address/anon_idx */
87 size_t p_len; /* total bytes */
88 size_t p_wlen; /* writtable bytes at p_addr */
89 struct page **p_pp; /* pp shadow list */
90 seg_preclaim_cbfunc_t p_callback; /* reclaim callback function */
91 clock_t p_lbolt; /* lbolt from last use */
92 struct seg_phash *p_hashp; /* our pcache hash bucket */
93 uint_t p_active; /* active count */
94 uchar_t p_write; /* true if S_WRITE */
95 uchar_t p_ref; /* reference byte */
96 ushort_t p_flags; /* bit flags */
97 };
98
99 struct seg_phash {
100 struct seg_pcache *p_hnext; /* list for hashed blocks */
101 struct seg_pcache *p_hprev;
102 kmutex_t p_hmutex; /* protects hash bucket */
103 pcache_link_t p_halink[2]; /* active bucket linkages */
104 };
105
106 struct seg_phash_wired {
107 struct seg_pcache *p_hnext; /* list for hashed blocks */
108 struct seg_pcache *p_hprev;
109 kmutex_t p_hmutex; /* protects hash bucket */
110 };
111
112 /*
113 * A parameter to control a maximum number of bytes that can be
114 * purged from pcache at a time.
115 */
116 #define P_MAX_APURGE_BYTES (1024 * 1024 * 1024)
117
118 /*
119 * log2(fraction of pcache to reclaim at a time).
120 */
121 #define P_SHRINK_SHFT (5)
122
123 /*
124 * The following variables can be tuned via /etc/system.
125 */
126
127 int segpcache_enabled = 1; /* if 1, shadow lists are cached */
128 pgcnt_t segpcache_maxwindow = 0; /* max # of pages that can be cached */
129 ulong_t segpcache_hashsize_win = 0; /* # of non wired buckets */
130 ulong_t segpcache_hashsize_wired = 0; /* # of wired buckets */
131 int segpcache_reap_sec = 1; /* reap check rate in secs */
132 clock_t segpcache_reap_ticks = 0; /* reap interval in ticks */
133 int segpcache_pcp_maxage_sec = 1; /* pcp max age in secs */
134 clock_t segpcache_pcp_maxage_ticks = 0; /* pcp max age in ticks */
135 int segpcache_shrink_shift = P_SHRINK_SHFT; /* log2 reap fraction */
136 pgcnt_t segpcache_maxapurge_bytes = P_MAX_APURGE_BYTES; /* max purge bytes */
137
138 static kmutex_t seg_pcache_mtx; /* protects seg_pdisabled counter */
139 static kmutex_t seg_pasync_mtx; /* protects async thread scheduling */
140 static kcondvar_t seg_pasync_cv;
141
142 #pragma align 64(pctrl1)
143 #pragma align 64(pctrl2)
144 #pragma align 64(pctrl3)
145
146 /*
147 * Keep frequently used variables together in one cache line.
148 */
149 static struct p_ctrl1 {
150 uint_t p_disabled; /* if not 0, caching temporarily off */
151 pgcnt_t p_maxwin; /* max # of pages that can be cached */
152 size_t p_hashwin_sz; /* # of non wired buckets */
153 struct seg_phash *p_htabwin; /* hash table for non wired entries */
154 size_t p_hashwired_sz; /* # of wired buckets */
155 struct seg_phash_wired *p_htabwired; /* hash table for wired entries */
156 kmem_cache_t *p_kmcache; /* kmem cache for seg_pcache structs */
157 #ifdef _LP64
158 ulong_t pad[1];
159 #endif /* _LP64 */
160 } pctrl1;
161
162 static struct p_ctrl2 {
163 kmutex_t p_mem_mtx; /* protects window counter and p_halinks */
164 pgcnt_t p_locked_win; /* # pages from window */
165 pgcnt_t p_locked; /* # of pages cached by pagelock */
166 uchar_t p_ahcur; /* current active links for insert/delete */
167 uchar_t p_athr_on; /* async reclaim thread is running. */
168 pcache_link_t p_ahhead[2]; /* active buckets linkages */
169 } pctrl2;
170
171 static struct p_ctrl3 {
172 clock_t p_pcp_maxage; /* max pcp age in ticks */
173 ulong_t p_athr_empty_ahb; /* athread walk stats */
174 ulong_t p_athr_full_ahb; /* athread walk stats */
175 pgcnt_t p_maxapurge_npages; /* max pages to purge at a time */
176 int p_shrink_shft; /* reap shift factor */
177 #ifdef _LP64
178 ulong_t pad[3];
179 #endif /* _LP64 */
180 } pctrl3;
181
182 #define seg_pdisabled pctrl1.p_disabled
183 #define seg_pmaxwindow pctrl1.p_maxwin
184 #define seg_phashsize_win pctrl1.p_hashwin_sz
185 #define seg_phashtab_win pctrl1.p_htabwin
186 #define seg_phashsize_wired pctrl1.p_hashwired_sz
187 #define seg_phashtab_wired pctrl1.p_htabwired
188 #define seg_pkmcache pctrl1.p_kmcache
189 #define seg_pmem_mtx pctrl2.p_mem_mtx
190 #define seg_plocked_window pctrl2.p_locked_win
191 #define seg_plocked pctrl2.p_locked
192 #define seg_pahcur pctrl2.p_ahcur
193 #define seg_pathr_on pctrl2.p_athr_on
194 #define seg_pahhead pctrl2.p_ahhead
195 #define seg_pmax_pcpage pctrl3.p_pcp_maxage
196 #define seg_pathr_empty_ahb pctrl3.p_athr_empty_ahb
197 #define seg_pathr_full_ahb pctrl3.p_athr_full_ahb
198 #define seg_pshrink_shift pctrl3.p_shrink_shft
199 #define seg_pmaxapurge_npages pctrl3.p_maxapurge_npages
200
201 #define P_HASHWIN_MASK (seg_phashsize_win - 1)
202 #define P_HASHWIRED_MASK (seg_phashsize_wired - 1)
203 #define P_BASESHIFT (6)
204
205 kthread_t *seg_pasync_thr;
206
207 extern struct seg_ops segvn_ops;
208 extern struct seg_ops segspt_shmops;
209
210 #define IS_PFLAGS_WIRED(flags) ((flags) & SEGP_FORCE_WIRED)
211 #define IS_PCP_WIRED(pcp) IS_PFLAGS_WIRED((pcp)->p_flags)
212
213 #define LBOLT_DELTA(t) ((ulong_t)(ddi_get_lbolt() - (t)))
214
215 #define PCP_AGE(pcp) LBOLT_DELTA((pcp)->p_lbolt)
216
217 /*
218 * htag0 argument can be a seg or amp pointer.
219 */
220 #define P_HASHBP(seg, htag0, addr, flags) \
221 (IS_PFLAGS_WIRED((flags)) ? \
222 ((struct seg_phash *)&seg_phashtab_wired[P_HASHWIRED_MASK & \
223 ((uintptr_t)(htag0) >> P_BASESHIFT)]) : \
224 (&seg_phashtab_win[P_HASHWIN_MASK & \
225 (((uintptr_t)(htag0) >> 3) ^ \
226 ((uintptr_t)(addr) >> ((flags & SEGP_PSHIFT) ? \
227 (flags >> 16) : page_get_shift((seg)->s_szc))))]))
228
229 /*
230 * htag0 argument can be a seg or amp pointer.
231 */
232 #define P_MATCH(pcp, htag0, addr, len) \
233 ((pcp)->p_htag0 == (htag0) && \
234 (pcp)->p_addr == (addr) && \
235 (pcp)->p_len >= (len))
236
237 #define P_MATCH_PP(pcp, htag0, addr, len, pp) \
238 ((pcp)->p_pp == (pp) && \
239 (pcp)->p_htag0 == (htag0) && \
240 (pcp)->p_addr == (addr) && \
241 (pcp)->p_len >= (len))
242
243 #define plink2pcache(pl) ((struct seg_pcache *)((uintptr_t)(pl) - \
244 offsetof(struct seg_pcache, p_plink)))
245
246 #define hlink2phash(hl, l) ((struct seg_phash *)((uintptr_t)(hl) - \
247 offsetof(struct seg_phash, p_halink[l])))
248
249 /*
250 * seg_padd_abuck()/seg_premove_abuck() link and unlink hash buckets from
251 * active hash bucket lists. We maintain active bucket lists to reduce the
252 * overhead of finding active buckets during asynchronous purging since there
253 * can be 10s of millions of buckets on a large system but only a small subset
254 * of them in actual use.
255 *
256 * There're 2 active bucket lists. Current active list (as per seg_pahcur) is
257 * used by seg_pinsert()/seg_pinactive()/seg_ppurge() to add and delete
258 * buckets. The other list is used by asynchronous purge thread. This allows
259 * the purge thread to walk its active list without holding seg_pmem_mtx for a
260 * long time. When asynchronous thread is done with its list it switches to
261 * current active list and makes the list it just finished processing as
262 * current active list.
263 *
264 * seg_padd_abuck() only adds the bucket to current list if the bucket is not
265 * yet on any list. seg_premove_abuck() may remove the bucket from either
266 * list. If the bucket is on current list it will be always removed. Otherwise
267 * the bucket is only removed if asynchronous purge thread is not currently
268 * running or seg_premove_abuck() is called by asynchronous purge thread
269 * itself. A given bucket can only be on one of active lists at a time. These
270 * routines should be called with per bucket lock held. The routines use
271 * seg_pmem_mtx to protect list updates. seg_padd_abuck() must be called after
272 * the first entry is added to the bucket chain and seg_premove_abuck() must
273 * be called after the last pcp entry is deleted from its chain. Per bucket
274 * lock should be held by the callers. This avoids a potential race condition
275 * when seg_premove_abuck() removes a bucket after pcp entries are added to
276 * its list after the caller checked that the bucket has no entries. (this
277 * race would cause a loss of an active bucket from the active lists).
278 *
279 * Both lists are circular doubly linked lists anchored at seg_pahhead heads.
280 * New entries are added to the end of the list since LRU is used as the
281 * purging policy.
282 */
283 static void
seg_padd_abuck(struct seg_phash * hp)284 seg_padd_abuck(struct seg_phash *hp)
285 {
286 int lix;
287
288 ASSERT(MUTEX_HELD(&hp->p_hmutex));
289 ASSERT((struct seg_phash *)hp->p_hnext != hp);
290 ASSERT((struct seg_phash *)hp->p_hprev != hp);
291 ASSERT(hp->p_hnext == hp->p_hprev);
292 ASSERT(!IS_PCP_WIRED(hp->p_hnext));
293 ASSERT(hp->p_hnext->p_hnext == (struct seg_pcache *)hp);
294 ASSERT(hp->p_hprev->p_hprev == (struct seg_pcache *)hp);
295 ASSERT(hp >= seg_phashtab_win &&
296 hp < &seg_phashtab_win[seg_phashsize_win]);
297
298 /*
299 * This bucket can already be on one of active lists
300 * since seg_premove_abuck() may have failed to remove it
301 * before.
302 */
303 mutex_enter(&seg_pmem_mtx);
304 lix = seg_pahcur;
305 ASSERT(lix >= 0 && lix <= 1);
306 if (hp->p_halink[lix].p_lnext != NULL) {
307 ASSERT(hp->p_halink[lix].p_lprev != NULL);
308 ASSERT(hp->p_halink[!lix].p_lnext == NULL);
309 ASSERT(hp->p_halink[!lix].p_lprev == NULL);
310 mutex_exit(&seg_pmem_mtx);
311 return;
312 }
313 ASSERT(hp->p_halink[lix].p_lprev == NULL);
314
315 /*
316 * If this bucket is still on list !lix async thread can't yet remove
317 * it since we hold here per bucket lock. In this case just return
318 * since async thread will eventually find and process this bucket.
319 */
320 if (hp->p_halink[!lix].p_lnext != NULL) {
321 ASSERT(hp->p_halink[!lix].p_lprev != NULL);
322 mutex_exit(&seg_pmem_mtx);
323 return;
324 }
325 ASSERT(hp->p_halink[!lix].p_lprev == NULL);
326 /*
327 * This bucket is not on any active bucket list yet.
328 * Add the bucket to the tail of current active list.
329 */
330 hp->p_halink[lix].p_lnext = &seg_pahhead[lix];
331 hp->p_halink[lix].p_lprev = seg_pahhead[lix].p_lprev;
332 seg_pahhead[lix].p_lprev->p_lnext = &hp->p_halink[lix];
333 seg_pahhead[lix].p_lprev = &hp->p_halink[lix];
334 mutex_exit(&seg_pmem_mtx);
335 }
336
337 static void
seg_premove_abuck(struct seg_phash * hp,int athr)338 seg_premove_abuck(struct seg_phash *hp, int athr)
339 {
340 int lix;
341
342 ASSERT(MUTEX_HELD(&hp->p_hmutex));
343 ASSERT((struct seg_phash *)hp->p_hnext == hp);
344 ASSERT((struct seg_phash *)hp->p_hprev == hp);
345 ASSERT(hp >= seg_phashtab_win &&
346 hp < &seg_phashtab_win[seg_phashsize_win]);
347
348 if (athr) {
349 ASSERT(seg_pathr_on);
350 ASSERT(seg_pahcur <= 1);
351 /*
352 * We are called by asynchronous thread that found this bucket
353 * on not currently active (i.e. !seg_pahcur) list. Remove it
354 * from there. Per bucket lock we are holding makes sure
355 * seg_pinsert() can't sneak in and add pcp entries to this
356 * bucket right before we remove the bucket from its list.
357 */
358 lix = !seg_pahcur;
359 ASSERT(hp->p_halink[lix].p_lnext != NULL);
360 ASSERT(hp->p_halink[lix].p_lprev != NULL);
361 ASSERT(hp->p_halink[!lix].p_lnext == NULL);
362 ASSERT(hp->p_halink[!lix].p_lprev == NULL);
363 hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
364 hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
365 hp->p_halink[lix].p_lnext = NULL;
366 hp->p_halink[lix].p_lprev = NULL;
367 return;
368 }
369
370 mutex_enter(&seg_pmem_mtx);
371 lix = seg_pahcur;
372 ASSERT(lix >= 0 && lix <= 1);
373
374 /*
375 * If the bucket is on currently active list just remove it from
376 * there.
377 */
378 if (hp->p_halink[lix].p_lnext != NULL) {
379 ASSERT(hp->p_halink[lix].p_lprev != NULL);
380 ASSERT(hp->p_halink[!lix].p_lnext == NULL);
381 ASSERT(hp->p_halink[!lix].p_lprev == NULL);
382 hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
383 hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
384 hp->p_halink[lix].p_lnext = NULL;
385 hp->p_halink[lix].p_lprev = NULL;
386 mutex_exit(&seg_pmem_mtx);
387 return;
388 }
389 ASSERT(hp->p_halink[lix].p_lprev == NULL);
390
391 /*
392 * If asynchronous thread is not running we can remove the bucket from
393 * not currently active list. The bucket must be on this list since we
394 * already checked that it's not on the other list and the bucket from
395 * which we just deleted the last pcp entry must be still on one of the
396 * active bucket lists.
397 */
398 lix = !lix;
399 ASSERT(hp->p_halink[lix].p_lnext != NULL);
400 ASSERT(hp->p_halink[lix].p_lprev != NULL);
401
402 if (!seg_pathr_on) {
403 hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
404 hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
405 hp->p_halink[lix].p_lnext = NULL;
406 hp->p_halink[lix].p_lprev = NULL;
407 }
408 mutex_exit(&seg_pmem_mtx);
409 }
410
411 /*
412 * Check if bucket pointed by hp already has a pcp entry that matches request
413 * htag0, addr and len. Set *found to 1 if match is found and to 0 otherwise.
414 * Also delete matching entries that cover smaller address range but start
415 * at the same address as addr argument. Return the list of deleted entries if
416 * any. This is an internal helper function called from seg_pinsert() only
417 * for non wired shadow lists. The caller already holds a per seg/amp list
418 * lock.
419 */
420 static struct seg_pcache *
seg_plookup_checkdup(struct seg_phash * hp,void * htag0,caddr_t addr,size_t len,int * found)421 seg_plookup_checkdup(struct seg_phash *hp, void *htag0,
422 caddr_t addr, size_t len, int *found)
423 {
424 struct seg_pcache *pcp;
425 struct seg_pcache *delcallb_list = NULL;
426
427 ASSERT(MUTEX_HELD(&hp->p_hmutex));
428
429 *found = 0;
430 for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
431 pcp = pcp->p_hnext) {
432 ASSERT(pcp->p_hashp == hp);
433 if (pcp->p_htag0 == htag0 && pcp->p_addr == addr) {
434 ASSERT(!IS_PCP_WIRED(pcp));
435 if (pcp->p_len < len) {
436 pcache_link_t *plinkp;
437 if (pcp->p_active) {
438 continue;
439 }
440 plinkp = &pcp->p_plink;
441 plinkp->p_lprev->p_lnext = plinkp->p_lnext;
442 plinkp->p_lnext->p_lprev = plinkp->p_lprev;
443 pcp->p_hprev->p_hnext = pcp->p_hnext;
444 pcp->p_hnext->p_hprev = pcp->p_hprev;
445 pcp->p_hprev = delcallb_list;
446 delcallb_list = pcp;
447 } else {
448 *found = 1;
449 break;
450 }
451 }
452 }
453 return (delcallb_list);
454 }
455
456 /*
457 * lookup an address range in pagelock cache. Return shadow list and bump up
458 * active count. If amp is not NULL use amp as a lookup tag otherwise use seg
459 * as a lookup tag.
460 */
461 struct page **
seg_plookup(struct seg * seg,struct anon_map * amp,caddr_t addr,size_t len,enum seg_rw rw,uint_t flags)462 seg_plookup(struct seg *seg, struct anon_map *amp, caddr_t addr, size_t len,
463 enum seg_rw rw, uint_t flags)
464 {
465 struct seg_pcache *pcp;
466 struct seg_phash *hp;
467 void *htag0;
468
469 ASSERT(seg != NULL);
470 ASSERT(rw == S_READ || rw == S_WRITE);
471
472 /*
473 * Skip pagelock cache, while DR is in progress or
474 * seg_pcache is off.
475 */
476 if (seg_pdisabled) {
477 return (NULL);
478 }
479 ASSERT(seg_phashsize_win != 0);
480
481 htag0 = (amp == NULL ? (void *)seg : (void *)amp);
482 hp = P_HASHBP(seg, htag0, addr, flags);
483 mutex_enter(&hp->p_hmutex);
484 for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
485 pcp = pcp->p_hnext) {
486 ASSERT(pcp->p_hashp == hp);
487 if (P_MATCH(pcp, htag0, addr, len)) {
488 ASSERT(IS_PFLAGS_WIRED(flags) == IS_PCP_WIRED(pcp));
489 /*
490 * If this request wants to write pages
491 * but write permissions starting from
492 * addr don't cover the entire length len
493 * return lookup failure back to the caller.
494 * It will check protections and fail this
495 * pagelock operation with EACCESS error.
496 */
497 if (rw == S_WRITE && pcp->p_wlen < len) {
498 break;
499 }
500 if (pcp->p_active == UINT_MAX) {
501 break;
502 }
503 pcp->p_active++;
504 if (rw == S_WRITE && !pcp->p_write) {
505 pcp->p_write = 1;
506 }
507 mutex_exit(&hp->p_hmutex);
508 return (pcp->p_pp);
509 }
510 }
511 mutex_exit(&hp->p_hmutex);
512 return (NULL);
513 }
514
515 /*
516 * mark address range inactive. If the cache is off or the address range is
517 * not in the cache or another shadow list that covers bigger range is found
518 * we call the segment driver to reclaim the pages. Otherwise just decrement
519 * active count and set ref bit. If amp is not NULL use amp as a lookup tag
520 * otherwise use seg as a lookup tag.
521 */
522 void
seg_pinactive(struct seg * seg,struct anon_map * amp,caddr_t addr,size_t len,struct page ** pp,enum seg_rw rw,uint_t flags,seg_preclaim_cbfunc_t callback)523 seg_pinactive(struct seg *seg, struct anon_map *amp, caddr_t addr,
524 size_t len, struct page **pp, enum seg_rw rw, uint_t flags,
525 seg_preclaim_cbfunc_t callback)
526 {
527 struct seg_pcache *pcp;
528 struct seg_phash *hp;
529 kmutex_t *pmtx = NULL;
530 pcache_link_t *pheadp;
531 void *htag0;
532 pgcnt_t npages = 0;
533 int keep = 0;
534
535 ASSERT(seg != NULL);
536 ASSERT(rw == S_READ || rw == S_WRITE);
537
538 htag0 = (amp == NULL ? (void *)seg : (void *)amp);
539
540 /*
541 * Skip lookup if pcache is not configured.
542 */
543 if (seg_phashsize_win == 0) {
544 goto out;
545 }
546
547 /*
548 * Grab per seg/amp lock before hash lock if we are going to remove
549 * inactive entry from pcache.
550 */
551 if (!IS_PFLAGS_WIRED(flags) && seg_pdisabled) {
552 if (amp == NULL) {
553 pheadp = &seg->s_phead;
554 pmtx = &seg->s_pmtx;
555 } else {
556 pheadp = &->a_phead;
557 pmtx = &->a_pmtx;
558 }
559 mutex_enter(pmtx);
560 }
561
562 hp = P_HASHBP(seg, htag0, addr, flags);
563 mutex_enter(&hp->p_hmutex);
564 again:
565 for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
566 pcp = pcp->p_hnext) {
567 ASSERT(pcp->p_hashp == hp);
568 if (P_MATCH_PP(pcp, htag0, addr, len, pp)) {
569 ASSERT(IS_PFLAGS_WIRED(flags) == IS_PCP_WIRED(pcp));
570 ASSERT(pcp->p_active);
571 if (keep) {
572 /*
573 * Don't remove this pcp entry
574 * if we didn't find duplicate
575 * shadow lists on second search.
576 * Somebody removed those duplicates
577 * since we dropped hash lock after first
578 * search.
579 */
580 ASSERT(pmtx != NULL);
581 ASSERT(!IS_PFLAGS_WIRED(flags));
582 mutex_exit(pmtx);
583 pmtx = NULL;
584 }
585 pcp->p_active--;
586 if (pcp->p_active == 0 && (pmtx != NULL ||
587 (seg_pdisabled && IS_PFLAGS_WIRED(flags)))) {
588
589 /*
590 * This entry is no longer active. Remove it
591 * now either because pcaching is temporarily
592 * disabled or there're other pcp entries that
593 * can match this pagelock request (i.e. this
594 * entry is a duplicate).
595 */
596
597 ASSERT(callback == pcp->p_callback);
598 if (pmtx != NULL) {
599 pcache_link_t *plinkp = &pcp->p_plink;
600 ASSERT(!IS_PCP_WIRED(pcp));
601 ASSERT(pheadp->p_lnext != pheadp);
602 ASSERT(pheadp->p_lprev != pheadp);
603 plinkp->p_lprev->p_lnext =
604 plinkp->p_lnext;
605 plinkp->p_lnext->p_lprev =
606 plinkp->p_lprev;
607 }
608 pcp->p_hprev->p_hnext = pcp->p_hnext;
609 pcp->p_hnext->p_hprev = pcp->p_hprev;
610 if (!IS_PCP_WIRED(pcp) &&
611 hp->p_hnext == (struct seg_pcache *)hp) {
612 /*
613 * We removed the last entry from this
614 * bucket. Now remove the bucket from
615 * its active list.
616 */
617 seg_premove_abuck(hp, 0);
618 }
619 mutex_exit(&hp->p_hmutex);
620 if (pmtx != NULL) {
621 mutex_exit(pmtx);
622 }
623 len = pcp->p_len;
624 npages = btop(len);
625 if (rw != S_WRITE && pcp->p_write) {
626 rw = S_WRITE;
627 }
628 kmem_cache_free(seg_pkmcache, pcp);
629 goto out;
630 } else {
631 /*
632 * We found a matching pcp entry but will not
633 * free it right away even if it's no longer
634 * active.
635 */
636 if (!pcp->p_active && !IS_PCP_WIRED(pcp)) {
637 /*
638 * Set the reference bit and mark the
639 * time of last access to this pcp
640 * so that asynchronous thread doesn't
641 * free it immediately since
642 * it may be reactivated very soon.
643 */
644 pcp->p_lbolt = ddi_get_lbolt();
645 pcp->p_ref = 1;
646 }
647 mutex_exit(&hp->p_hmutex);
648 if (pmtx != NULL) {
649 mutex_exit(pmtx);
650 }
651 return;
652 }
653 } else if (!IS_PFLAGS_WIRED(flags) &&
654 P_MATCH(pcp, htag0, addr, len)) {
655 /*
656 * This is a duplicate pcp entry. This situation may
657 * happen if a bigger shadow list that covers our
658 * range was added while our entry was still active.
659 * Now we can free our pcp entry if it becomes
660 * inactive.
661 */
662 if (!pcp->p_active) {
663 /*
664 * Mark this entry as referenced just in case
665 * we'll free our own pcp entry soon.
666 */
667 pcp->p_lbolt = ddi_get_lbolt();
668 pcp->p_ref = 1;
669 }
670 if (pmtx != NULL) {
671 /*
672 * we are already holding pmtx and found a
673 * duplicate. Don't keep our own pcp entry.
674 */
675 keep = 0;
676 continue;
677 }
678 /*
679 * We have to use mutex_tryenter to attempt to lock
680 * seg/amp list lock since we already hold hash lock
681 * and seg/amp list lock is above hash lock in lock
682 * order. If mutex_tryenter fails drop hash lock and
683 * retake both locks in correct order and research
684 * this hash chain.
685 */
686 ASSERT(keep == 0);
687 if (amp == NULL) {
688 pheadp = &seg->s_phead;
689 pmtx = &seg->s_pmtx;
690 } else {
691 pheadp = &->a_phead;
692 pmtx = &->a_pmtx;
693 }
694 if (!mutex_tryenter(pmtx)) {
695 mutex_exit(&hp->p_hmutex);
696 mutex_enter(pmtx);
697 mutex_enter(&hp->p_hmutex);
698 /*
699 * If we don't find bigger shadow list on
700 * second search (it may happen since we
701 * dropped bucket lock) keep the entry that
702 * matches our own shadow list.
703 */
704 keep = 1;
705 goto again;
706 }
707 }
708 }
709 mutex_exit(&hp->p_hmutex);
710 if (pmtx != NULL) {
711 mutex_exit(pmtx);
712 }
713 out:
714 (*callback)(htag0, addr, len, pp, rw, 0);
715 if (npages) {
716 mutex_enter(&seg_pmem_mtx);
717 ASSERT(seg_plocked >= npages);
718 seg_plocked -= npages;
719 if (!IS_PFLAGS_WIRED(flags)) {
720 ASSERT(seg_plocked_window >= npages);
721 seg_plocked_window -= npages;
722 }
723 mutex_exit(&seg_pmem_mtx);
724 }
725
726 }
727
728 #ifdef DEBUG
729 static uint32_t p_insert_chk_mtbf = 0;
730 #endif
731
732 /*
733 * The seg_pinsert_check() is used by segment drivers to predict whether
734 * a call to seg_pinsert will fail and thereby avoid wasteful pre-processing.
735 */
736 /*ARGSUSED*/
737 int
seg_pinsert_check(struct seg * seg,struct anon_map * amp,caddr_t addr,size_t len,uint_t flags)738 seg_pinsert_check(struct seg *seg, struct anon_map *amp, caddr_t addr,
739 size_t len, uint_t flags)
740 {
741 ASSERT(seg != NULL);
742
743 #ifdef DEBUG
744 if (p_insert_chk_mtbf && !(gethrtime() % p_insert_chk_mtbf)) {
745 return (SEGP_FAIL);
746 }
747 #endif
748
749 if (seg_pdisabled) {
750 return (SEGP_FAIL);
751 }
752 ASSERT(seg_phashsize_win != 0);
753
754 if (IS_PFLAGS_WIRED(flags)) {
755 return (SEGP_SUCCESS);
756 }
757
758 if (seg_plocked_window + btop(len) > seg_pmaxwindow) {
759 return (SEGP_FAIL);
760 }
761
762 if (freemem < desfree) {
763 return (SEGP_FAIL);
764 }
765
766 return (SEGP_SUCCESS);
767 }
768
769 #ifdef DEBUG
770 static uint32_t p_insert_mtbf = 0;
771 #endif
772
773 /*
774 * Insert address range with shadow list into pagelock cache if there's no
775 * shadow list already cached for this address range. If the cache is off or
776 * caching is temporarily disabled or the allowed 'window' is exceeded return
777 * SEGP_FAIL. Otherwise return SEGP_SUCCESS.
778 *
779 * For non wired shadow lists (segvn case) include address in the hashing
780 * function to avoid linking all the entries from the same segment or amp on
781 * the same bucket. amp is used instead of seg if amp is not NULL. Non wired
782 * pcache entries are also linked on a per segment/amp list so that all
783 * entries can be found quickly during seg/amp purge without walking the
784 * entire pcache hash table. For wired shadow lists (segspt case) we
785 * don't use address hashing and per segment linking because the caller
786 * currently inserts only one entry per segment that covers the entire
787 * segment. If we used per segment linking even for segspt it would complicate
788 * seg_ppurge_wiredpp() locking.
789 *
790 * Both hash bucket and per seg/amp locks need to be held before adding a non
791 * wired entry to hash and per seg/amp lists. per seg/amp lock should be taken
792 * first.
793 *
794 * This function will also remove from pcache old inactive shadow lists that
795 * overlap with this request but cover smaller range for the same start
796 * address.
797 */
798 int
seg_pinsert(struct seg * seg,struct anon_map * amp,caddr_t addr,size_t len,size_t wlen,struct page ** pp,enum seg_rw rw,uint_t flags,seg_preclaim_cbfunc_t callback)799 seg_pinsert(struct seg *seg, struct anon_map *amp, caddr_t addr, size_t len,
800 size_t wlen, struct page **pp, enum seg_rw rw, uint_t flags,
801 seg_preclaim_cbfunc_t callback)
802 {
803 struct seg_pcache *pcp;
804 struct seg_phash *hp;
805 pgcnt_t npages;
806 pcache_link_t *pheadp;
807 kmutex_t *pmtx;
808 struct seg_pcache *delcallb_list = NULL;
809
810 ASSERT(seg != NULL);
811 ASSERT(rw == S_READ || rw == S_WRITE);
812 ASSERT(rw == S_READ || wlen == len);
813 ASSERT(rw == S_WRITE || wlen <= len);
814 ASSERT(amp == NULL || wlen == len);
815
816 #ifdef DEBUG
817 if (p_insert_mtbf && !(gethrtime() % p_insert_mtbf)) {
818 return (SEGP_FAIL);
819 }
820 #endif
821
822 if (seg_pdisabled) {
823 return (SEGP_FAIL);
824 }
825 ASSERT(seg_phashsize_win != 0);
826
827 ASSERT((len & PAGEOFFSET) == 0);
828 npages = btop(len);
829 mutex_enter(&seg_pmem_mtx);
830 if (!IS_PFLAGS_WIRED(flags)) {
831 if (seg_plocked_window + npages > seg_pmaxwindow) {
832 mutex_exit(&seg_pmem_mtx);
833 return (SEGP_FAIL);
834 }
835 seg_plocked_window += npages;
836 }
837 seg_plocked += npages;
838 mutex_exit(&seg_pmem_mtx);
839
840 pcp = kmem_cache_alloc(seg_pkmcache, KM_SLEEP);
841 /*
842 * If amp is not NULL set htag0 to amp otherwise set it to seg.
843 */
844 if (amp == NULL) {
845 pcp->p_htag0 = (void *)seg;
846 pcp->p_flags = flags & 0xffff;
847 } else {
848 pcp->p_htag0 = (void *)amp;
849 pcp->p_flags = (flags & 0xffff) | SEGP_AMP;
850 }
851 pcp->p_addr = addr;
852 pcp->p_len = len;
853 pcp->p_wlen = wlen;
854 pcp->p_pp = pp;
855 pcp->p_write = (rw == S_WRITE);
856 pcp->p_callback = callback;
857 pcp->p_active = 1;
858
859 hp = P_HASHBP(seg, pcp->p_htag0, addr, flags);
860 if (!IS_PFLAGS_WIRED(flags)) {
861 int found;
862 void *htag0;
863 if (amp == NULL) {
864 pheadp = &seg->s_phead;
865 pmtx = &seg->s_pmtx;
866 htag0 = (void *)seg;
867 } else {
868 pheadp = &->a_phead;
869 pmtx = &->a_pmtx;
870 htag0 = (void *)amp;
871 }
872 mutex_enter(pmtx);
873 mutex_enter(&hp->p_hmutex);
874 delcallb_list = seg_plookup_checkdup(hp, htag0, addr,
875 len, &found);
876 if (found) {
877 mutex_exit(&hp->p_hmutex);
878 mutex_exit(pmtx);
879 mutex_enter(&seg_pmem_mtx);
880 seg_plocked -= npages;
881 seg_plocked_window -= npages;
882 mutex_exit(&seg_pmem_mtx);
883 kmem_cache_free(seg_pkmcache, pcp);
884 goto out;
885 }
886 pcp->p_plink.p_lnext = pheadp->p_lnext;
887 pcp->p_plink.p_lprev = pheadp;
888 pheadp->p_lnext->p_lprev = &pcp->p_plink;
889 pheadp->p_lnext = &pcp->p_plink;
890 } else {
891 mutex_enter(&hp->p_hmutex);
892 }
893 pcp->p_hashp = hp;
894 pcp->p_hnext = hp->p_hnext;
895 pcp->p_hprev = (struct seg_pcache *)hp;
896 hp->p_hnext->p_hprev = pcp;
897 hp->p_hnext = pcp;
898 if (!IS_PFLAGS_WIRED(flags) &&
899 hp->p_hprev == pcp) {
900 seg_padd_abuck(hp);
901 }
902 mutex_exit(&hp->p_hmutex);
903 if (!IS_PFLAGS_WIRED(flags)) {
904 mutex_exit(pmtx);
905 }
906
907 out:
908 npages = 0;
909 while (delcallb_list != NULL) {
910 pcp = delcallb_list;
911 delcallb_list = pcp->p_hprev;
912 ASSERT(!IS_PCP_WIRED(pcp) && !pcp->p_active);
913 (void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
914 pcp->p_len, pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 0);
915 npages += btop(pcp->p_len);
916 kmem_cache_free(seg_pkmcache, pcp);
917 }
918 if (npages) {
919 ASSERT(!IS_PFLAGS_WIRED(flags));
920 mutex_enter(&seg_pmem_mtx);
921 ASSERT(seg_plocked >= npages);
922 ASSERT(seg_plocked_window >= npages);
923 seg_plocked -= npages;
924 seg_plocked_window -= npages;
925 mutex_exit(&seg_pmem_mtx);
926 }
927
928 return (SEGP_SUCCESS);
929 }
930
931 /*
932 * purge entries from the pagelock cache if not active
933 * and not recently used.
934 */
935 static void
seg_ppurge_async(int force)936 seg_ppurge_async(int force)
937 {
938 struct seg_pcache *delcallb_list = NULL;
939 struct seg_pcache *pcp;
940 struct seg_phash *hp;
941 pgcnt_t npages = 0;
942 pgcnt_t npages_window = 0;
943 pgcnt_t npgs_to_purge;
944 pgcnt_t npgs_purged = 0;
945 int hlinks = 0;
946 int hlix;
947 pcache_link_t *hlinkp;
948 pcache_link_t *hlnextp = NULL;
949 int lowmem;
950 int trim;
951
952 ASSERT(seg_phashsize_win != 0);
953
954 /*
955 * if the cache is off or empty, return
956 */
957 if (seg_plocked == 0 || (!force && seg_plocked_window == 0)) {
958 return;
959 }
960
961 if (!force) {
962 lowmem = 0;
963 trim = 0;
964 if (freemem < lotsfree + needfree) {
965 spgcnt_t fmem = MAX((spgcnt_t)(freemem - needfree), 0);
966 if (fmem <= 5 * (desfree >> 2)) {
967 lowmem = 1;
968 } else if (fmem <= 7 * (lotsfree >> 3)) {
969 if (seg_plocked_window >=
970 (availrmem_initial >> 1)) {
971 lowmem = 1;
972 }
973 } else if (fmem < lotsfree) {
974 if (seg_plocked_window >=
975 3 * (availrmem_initial >> 2)) {
976 lowmem = 1;
977 }
978 }
979 }
980 if (seg_plocked_window >= 7 * (seg_pmaxwindow >> 3)) {
981 trim = 1;
982 }
983 if (!lowmem && !trim) {
984 return;
985 }
986 npgs_to_purge = seg_plocked_window >>
987 seg_pshrink_shift;
988 if (lowmem) {
989 npgs_to_purge = MIN(npgs_to_purge,
990 MAX(seg_pmaxapurge_npages, desfree));
991 } else {
992 npgs_to_purge = MIN(npgs_to_purge,
993 seg_pmaxapurge_npages);
994 }
995 if (npgs_to_purge == 0) {
996 return;
997 }
998 } else {
999 struct seg_phash_wired *hpw;
1000
1001 ASSERT(seg_phashsize_wired != 0);
1002
1003 for (hpw = seg_phashtab_wired;
1004 hpw < &seg_phashtab_wired[seg_phashsize_wired]; hpw++) {
1005
1006 if (hpw->p_hnext == (struct seg_pcache *)hpw) {
1007 continue;
1008 }
1009
1010 mutex_enter(&hpw->p_hmutex);
1011
1012 for (pcp = hpw->p_hnext;
1013 pcp != (struct seg_pcache *)hpw;
1014 pcp = pcp->p_hnext) {
1015
1016 ASSERT(IS_PCP_WIRED(pcp));
1017 ASSERT(pcp->p_hashp ==
1018 (struct seg_phash *)hpw);
1019
1020 if (pcp->p_active) {
1021 continue;
1022 }
1023 pcp->p_hprev->p_hnext = pcp->p_hnext;
1024 pcp->p_hnext->p_hprev = pcp->p_hprev;
1025 pcp->p_hprev = delcallb_list;
1026 delcallb_list = pcp;
1027 }
1028 mutex_exit(&hpw->p_hmutex);
1029 }
1030 }
1031
1032 mutex_enter(&seg_pmem_mtx);
1033 if (seg_pathr_on) {
1034 mutex_exit(&seg_pmem_mtx);
1035 goto runcb;
1036 }
1037 seg_pathr_on = 1;
1038 mutex_exit(&seg_pmem_mtx);
1039 ASSERT(seg_pahcur <= 1);
1040 hlix = !seg_pahcur;
1041
1042 again:
1043 for (hlinkp = seg_pahhead[hlix].p_lnext; hlinkp != &seg_pahhead[hlix];
1044 hlinkp = hlnextp) {
1045
1046 hlnextp = hlinkp->p_lnext;
1047 ASSERT(hlnextp != NULL);
1048
1049 hp = hlink2phash(hlinkp, hlix);
1050 if (hp->p_hnext == (struct seg_pcache *)hp) {
1051 seg_pathr_empty_ahb++;
1052 continue;
1053 }
1054 seg_pathr_full_ahb++;
1055 mutex_enter(&hp->p_hmutex);
1056
1057 for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
1058 pcp = pcp->p_hnext) {
1059 pcache_link_t *pheadp;
1060 pcache_link_t *plinkp;
1061 void *htag0;
1062 kmutex_t *pmtx;
1063
1064 ASSERT(!IS_PCP_WIRED(pcp));
1065 ASSERT(pcp->p_hashp == hp);
1066
1067 if (pcp->p_active) {
1068 continue;
1069 }
1070 if (!force && pcp->p_ref &&
1071 PCP_AGE(pcp) < seg_pmax_pcpage) {
1072 pcp->p_ref = 0;
1073 continue;
1074 }
1075 plinkp = &pcp->p_plink;
1076 htag0 = pcp->p_htag0;
1077 if (pcp->p_flags & SEGP_AMP) {
1078 pheadp = &((amp_t *)htag0)->a_phead;
1079 pmtx = &((amp_t *)htag0)->a_pmtx;
1080 } else {
1081 pheadp = &((seg_t *)htag0)->s_phead;
1082 pmtx = &((seg_t *)htag0)->s_pmtx;
1083 }
1084 if (!mutex_tryenter(pmtx)) {
1085 continue;
1086 }
1087 ASSERT(pheadp->p_lnext != pheadp);
1088 ASSERT(pheadp->p_lprev != pheadp);
1089 plinkp->p_lprev->p_lnext =
1090 plinkp->p_lnext;
1091 plinkp->p_lnext->p_lprev =
1092 plinkp->p_lprev;
1093 pcp->p_hprev->p_hnext = pcp->p_hnext;
1094 pcp->p_hnext->p_hprev = pcp->p_hprev;
1095 mutex_exit(pmtx);
1096 pcp->p_hprev = delcallb_list;
1097 delcallb_list = pcp;
1098 npgs_purged += btop(pcp->p_len);
1099 }
1100 if (hp->p_hnext == (struct seg_pcache *)hp) {
1101 seg_premove_abuck(hp, 1);
1102 }
1103 mutex_exit(&hp->p_hmutex);
1104 if (npgs_purged >= seg_plocked_window) {
1105 break;
1106 }
1107 if (!force) {
1108 if (npgs_purged >= npgs_to_purge) {
1109 break;
1110 }
1111 if (!trim && !(seg_pathr_full_ahb & 15)) {
1112 ASSERT(lowmem);
1113 if (freemem >= lotsfree + needfree) {
1114 break;
1115 }
1116 }
1117 }
1118 }
1119
1120 if (hlinkp == &seg_pahhead[hlix]) {
1121 /*
1122 * We processed the entire hlix active bucket list
1123 * but didn't find enough pages to reclaim.
1124 * Switch the lists and walk the other list
1125 * if we haven't done it yet.
1126 */
1127 mutex_enter(&seg_pmem_mtx);
1128 ASSERT(seg_pathr_on);
1129 ASSERT(seg_pahcur == !hlix);
1130 seg_pahcur = hlix;
1131 mutex_exit(&seg_pmem_mtx);
1132 if (++hlinks < 2) {
1133 hlix = !hlix;
1134 goto again;
1135 }
1136 } else if ((hlinkp = hlnextp) != &seg_pahhead[hlix] &&
1137 seg_pahhead[hlix].p_lnext != hlinkp) {
1138 ASSERT(hlinkp != NULL);
1139 ASSERT(hlinkp->p_lprev != &seg_pahhead[hlix]);
1140 ASSERT(seg_pahhead[hlix].p_lnext != &seg_pahhead[hlix]);
1141 ASSERT(seg_pahhead[hlix].p_lprev != &seg_pahhead[hlix]);
1142
1143 /*
1144 * Reinsert the header to point to hlinkp
1145 * so that we start from hlinkp bucket next time around.
1146 */
1147 seg_pahhead[hlix].p_lnext->p_lprev = seg_pahhead[hlix].p_lprev;
1148 seg_pahhead[hlix].p_lprev->p_lnext = seg_pahhead[hlix].p_lnext;
1149 seg_pahhead[hlix].p_lnext = hlinkp;
1150 seg_pahhead[hlix].p_lprev = hlinkp->p_lprev;
1151 hlinkp->p_lprev->p_lnext = &seg_pahhead[hlix];
1152 hlinkp->p_lprev = &seg_pahhead[hlix];
1153 }
1154
1155 mutex_enter(&seg_pmem_mtx);
1156 ASSERT(seg_pathr_on);
1157 seg_pathr_on = 0;
1158 mutex_exit(&seg_pmem_mtx);
1159
1160 runcb:
1161 /*
1162 * Run the delayed callback list. segments/amps can't go away until
1163 * callback is executed since they must have non 0 softlockcnt. That's
1164 * why we don't need to hold as/seg/amp locks to execute the callback.
1165 */
1166 while (delcallb_list != NULL) {
1167 pcp = delcallb_list;
1168 delcallb_list = pcp->p_hprev;
1169 ASSERT(!pcp->p_active);
1170 (void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
1171 pcp->p_len, pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 1);
1172 npages += btop(pcp->p_len);
1173 if (!IS_PCP_WIRED(pcp)) {
1174 npages_window += btop(pcp->p_len);
1175 }
1176 kmem_cache_free(seg_pkmcache, pcp);
1177 }
1178 if (npages) {
1179 mutex_enter(&seg_pmem_mtx);
1180 ASSERT(seg_plocked >= npages);
1181 ASSERT(seg_plocked_window >= npages_window);
1182 seg_plocked -= npages;
1183 seg_plocked_window -= npages_window;
1184 mutex_exit(&seg_pmem_mtx);
1185 }
1186 }
1187
1188 /*
1189 * Remove cached pages for segment(s) entries from hashtable. The segments
1190 * are identified by pp array. This is useful for multiple seg's cached on
1191 * behalf of dummy segment (ISM/DISM) with common pp array.
1192 */
1193 void
seg_ppurge_wiredpp(struct page ** pp)1194 seg_ppurge_wiredpp(struct page **pp)
1195 {
1196 struct seg_pcache *pcp;
1197 struct seg_phash_wired *hp;
1198 pgcnt_t npages = 0;
1199 struct seg_pcache *delcallb_list = NULL;
1200
1201 /*
1202 * if the cache is empty, return
1203 */
1204 if (seg_plocked == 0) {
1205 return;
1206 }
1207 ASSERT(seg_phashsize_wired != 0);
1208
1209 for (hp = seg_phashtab_wired;
1210 hp < &seg_phashtab_wired[seg_phashsize_wired]; hp++) {
1211 if (hp->p_hnext == (struct seg_pcache *)hp) {
1212 continue;
1213 }
1214 mutex_enter(&hp->p_hmutex);
1215 pcp = hp->p_hnext;
1216 while (pcp != (struct seg_pcache *)hp) {
1217 ASSERT(pcp->p_hashp == (struct seg_phash *)hp);
1218 ASSERT(IS_PCP_WIRED(pcp));
1219 /*
1220 * purge entries which are not active
1221 */
1222 if (!pcp->p_active && pcp->p_pp == pp) {
1223 ASSERT(pcp->p_htag0 != NULL);
1224 pcp->p_hprev->p_hnext = pcp->p_hnext;
1225 pcp->p_hnext->p_hprev = pcp->p_hprev;
1226 pcp->p_hprev = delcallb_list;
1227 delcallb_list = pcp;
1228 }
1229 pcp = pcp->p_hnext;
1230 }
1231 mutex_exit(&hp->p_hmutex);
1232 /*
1233 * segments can't go away until callback is executed since
1234 * they must have non 0 softlockcnt. That's why we don't
1235 * need to hold as/seg locks to execute the callback.
1236 */
1237 while (delcallb_list != NULL) {
1238 int done;
1239 pcp = delcallb_list;
1240 delcallb_list = pcp->p_hprev;
1241 ASSERT(!pcp->p_active);
1242 done = (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
1243 pcp->p_len, pcp->p_pp,
1244 pcp->p_write ? S_WRITE : S_READ, 1);
1245 npages += btop(pcp->p_len);
1246 ASSERT(IS_PCP_WIRED(pcp));
1247 kmem_cache_free(seg_pkmcache, pcp);
1248 if (done) {
1249 ASSERT(delcallb_list == NULL);
1250 goto out;
1251 }
1252 }
1253 }
1254
1255 out:
1256 mutex_enter(&seg_pmem_mtx);
1257 ASSERT(seg_plocked >= npages);
1258 seg_plocked -= npages;
1259 mutex_exit(&seg_pmem_mtx);
1260 }
1261
1262 /*
1263 * purge all entries for a given segment. Since we
1264 * callback into the segment driver directly for page
1265 * reclaim the caller needs to hold the right locks.
1266 */
1267 void
seg_ppurge(struct seg * seg,struct anon_map * amp,uint_t flags)1268 seg_ppurge(struct seg *seg, struct anon_map *amp, uint_t flags)
1269 {
1270 struct seg_pcache *delcallb_list = NULL;
1271 struct seg_pcache *pcp;
1272 struct seg_phash *hp;
1273 pgcnt_t npages = 0;
1274 void *htag0;
1275
1276 if (seg_plocked == 0) {
1277 return;
1278 }
1279 ASSERT(seg_phashsize_win != 0);
1280
1281 /*
1282 * If amp is not NULL use amp as a lookup tag otherwise use seg
1283 * as a lookup tag.
1284 */
1285 htag0 = (amp == NULL ? (void *)seg : (void *)amp);
1286 ASSERT(htag0 != NULL);
1287 if (IS_PFLAGS_WIRED(flags)) {
1288 hp = P_HASHBP(seg, htag0, 0, flags);
1289 mutex_enter(&hp->p_hmutex);
1290 pcp = hp->p_hnext;
1291 while (pcp != (struct seg_pcache *)hp) {
1292 ASSERT(pcp->p_hashp == hp);
1293 ASSERT(IS_PCP_WIRED(pcp));
1294 if (pcp->p_htag0 == htag0) {
1295 if (pcp->p_active) {
1296 break;
1297 }
1298 pcp->p_hprev->p_hnext = pcp->p_hnext;
1299 pcp->p_hnext->p_hprev = pcp->p_hprev;
1300 pcp->p_hprev = delcallb_list;
1301 delcallb_list = pcp;
1302 }
1303 pcp = pcp->p_hnext;
1304 }
1305 mutex_exit(&hp->p_hmutex);
1306 } else {
1307 pcache_link_t *plinkp;
1308 pcache_link_t *pheadp;
1309 kmutex_t *pmtx;
1310
1311 if (amp == NULL) {
1312 ASSERT(seg != NULL);
1313 pheadp = &seg->s_phead;
1314 pmtx = &seg->s_pmtx;
1315 } else {
1316 pheadp = &->a_phead;
1317 pmtx = &->a_pmtx;
1318 }
1319 mutex_enter(pmtx);
1320 while ((plinkp = pheadp->p_lnext) != pheadp) {
1321 pcp = plink2pcache(plinkp);
1322 ASSERT(!IS_PCP_WIRED(pcp));
1323 ASSERT(pcp->p_htag0 == htag0);
1324 hp = pcp->p_hashp;
1325 mutex_enter(&hp->p_hmutex);
1326 if (pcp->p_active) {
1327 mutex_exit(&hp->p_hmutex);
1328 break;
1329 }
1330 ASSERT(plinkp->p_lprev == pheadp);
1331 pheadp->p_lnext = plinkp->p_lnext;
1332 plinkp->p_lnext->p_lprev = pheadp;
1333 pcp->p_hprev->p_hnext = pcp->p_hnext;
1334 pcp->p_hnext->p_hprev = pcp->p_hprev;
1335 pcp->p_hprev = delcallb_list;
1336 delcallb_list = pcp;
1337 if (hp->p_hnext == (struct seg_pcache *)hp) {
1338 seg_premove_abuck(hp, 0);
1339 }
1340 mutex_exit(&hp->p_hmutex);
1341 }
1342 mutex_exit(pmtx);
1343 }
1344 while (delcallb_list != NULL) {
1345 pcp = delcallb_list;
1346 delcallb_list = pcp->p_hprev;
1347 ASSERT(!pcp->p_active);
1348 (void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr, pcp->p_len,
1349 pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 0);
1350 npages += btop(pcp->p_len);
1351 kmem_cache_free(seg_pkmcache, pcp);
1352 }
1353 mutex_enter(&seg_pmem_mtx);
1354 ASSERT(seg_plocked >= npages);
1355 seg_plocked -= npages;
1356 if (!IS_PFLAGS_WIRED(flags)) {
1357 ASSERT(seg_plocked_window >= npages);
1358 seg_plocked_window -= npages;
1359 }
1360 mutex_exit(&seg_pmem_mtx);
1361 }
1362
1363 static void seg_pinit_mem_config(void);
1364
1365 /*
1366 * setup the pagelock cache
1367 */
1368 static void
seg_pinit(void)1369 seg_pinit(void)
1370 {
1371 struct seg_phash *hp;
1372 ulong_t i;
1373 pgcnt_t physmegs;
1374
1375 seg_plocked = 0;
1376 seg_plocked_window = 0;
1377
1378 if (segpcache_enabled == 0) {
1379 seg_phashsize_win = 0;
1380 seg_phashsize_wired = 0;
1381 seg_pdisabled = 1;
1382 return;
1383 }
1384
1385 seg_pdisabled = 0;
1386 seg_pkmcache = kmem_cache_create("seg_pcache",
1387 sizeof (struct seg_pcache), 0, NULL, NULL, NULL, NULL, NULL, 0);
1388 if (segpcache_pcp_maxage_ticks <= 0) {
1389 segpcache_pcp_maxage_ticks = segpcache_pcp_maxage_sec * hz;
1390 }
1391 seg_pmax_pcpage = segpcache_pcp_maxage_ticks;
1392 seg_pathr_empty_ahb = 0;
1393 seg_pathr_full_ahb = 0;
1394 seg_pshrink_shift = segpcache_shrink_shift;
1395 seg_pmaxapurge_npages = btop(segpcache_maxapurge_bytes);
1396
1397 mutex_init(&seg_pcache_mtx, NULL, MUTEX_DEFAULT, NULL);
1398 mutex_init(&seg_pmem_mtx, NULL, MUTEX_DEFAULT, NULL);
1399 mutex_init(&seg_pasync_mtx, NULL, MUTEX_DEFAULT, NULL);
1400 cv_init(&seg_pasync_cv, NULL, CV_DEFAULT, NULL);
1401
1402 physmegs = physmem >> (20 - PAGESHIFT);
1403
1404 /*
1405 * If segpcache_hashsize_win was not set in /etc/system or it has
1406 * absurd value set it to a default.
1407 */
1408 if (segpcache_hashsize_win == 0 || segpcache_hashsize_win > physmem) {
1409 /*
1410 * Create one bucket per 32K (or at least per 8 pages) of
1411 * available memory.
1412 */
1413 pgcnt_t pages_per_bucket = MAX(btop(32 * 1024), 8);
1414 segpcache_hashsize_win = MAX(1024, physmem / pages_per_bucket);
1415 }
1416 if (!ISP2(segpcache_hashsize_win)) {
1417 ulong_t rndfac = ~(1UL <<
1418 (highbit(segpcache_hashsize_win) - 1));
1419 rndfac &= segpcache_hashsize_win;
1420 segpcache_hashsize_win += rndfac;
1421 segpcache_hashsize_win = 1 <<
1422 (highbit(segpcache_hashsize_win) - 1);
1423 }
1424 seg_phashsize_win = segpcache_hashsize_win;
1425 seg_phashtab_win = kmem_zalloc(
1426 seg_phashsize_win * sizeof (struct seg_phash),
1427 KM_SLEEP);
1428 for (i = 0; i < seg_phashsize_win; i++) {
1429 hp = &seg_phashtab_win[i];
1430 hp->p_hnext = (struct seg_pcache *)hp;
1431 hp->p_hprev = (struct seg_pcache *)hp;
1432 mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
1433 }
1434
1435 seg_pahcur = 0;
1436 seg_pathr_on = 0;
1437 seg_pahhead[0].p_lnext = &seg_pahhead[0];
1438 seg_pahhead[0].p_lprev = &seg_pahhead[0];
1439 seg_pahhead[1].p_lnext = &seg_pahhead[1];
1440 seg_pahhead[1].p_lprev = &seg_pahhead[1];
1441
1442 /*
1443 * If segpcache_hashsize_wired was not set in /etc/system or it has
1444 * absurd value set it to a default.
1445 */
1446 if (segpcache_hashsize_wired == 0 ||
1447 segpcache_hashsize_wired > physmem / 4) {
1448 /*
1449 * Choose segpcache_hashsize_wired based on physmem.
1450 * Create a bucket per 128K bytes upto 256K buckets.
1451 */
1452 if (physmegs < 20 * 1024) {
1453 segpcache_hashsize_wired = MAX(1024, physmegs << 3);
1454 } else {
1455 segpcache_hashsize_wired = 256 * 1024;
1456 }
1457 }
1458 if (!ISP2(segpcache_hashsize_wired)) {
1459 segpcache_hashsize_wired = 1 <<
1460 highbit(segpcache_hashsize_wired);
1461 }
1462 seg_phashsize_wired = segpcache_hashsize_wired;
1463 seg_phashtab_wired = kmem_zalloc(
1464 seg_phashsize_wired * sizeof (struct seg_phash_wired), KM_SLEEP);
1465 for (i = 0; i < seg_phashsize_wired; i++) {
1466 hp = (struct seg_phash *)&seg_phashtab_wired[i];
1467 hp->p_hnext = (struct seg_pcache *)hp;
1468 hp->p_hprev = (struct seg_pcache *)hp;
1469 mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
1470 }
1471
1472 if (segpcache_maxwindow == 0) {
1473 if (physmegs < 64) {
1474 /* 3% of memory */
1475 segpcache_maxwindow = availrmem >> 5;
1476 } else if (physmegs < 512) {
1477 /* 12% of memory */
1478 segpcache_maxwindow = availrmem >> 3;
1479 } else if (physmegs < 1024) {
1480 /* 25% of memory */
1481 segpcache_maxwindow = availrmem >> 2;
1482 } else if (physmegs < 2048) {
1483 /* 50% of memory */
1484 segpcache_maxwindow = availrmem >> 1;
1485 } else {
1486 /* no limit */
1487 segpcache_maxwindow = (pgcnt_t)-1;
1488 }
1489 }
1490 seg_pmaxwindow = segpcache_maxwindow;
1491 seg_pinit_mem_config();
1492 }
1493
1494 /*
1495 * called by pageout if memory is low
1496 */
1497 void
seg_preap(void)1498 seg_preap(void)
1499 {
1500 /*
1501 * if the cache is off or empty, return
1502 */
1503 if (seg_plocked_window == 0) {
1504 return;
1505 }
1506 ASSERT(seg_phashsize_win != 0);
1507
1508 /*
1509 * If somebody is already purging pcache
1510 * just return.
1511 */
1512 if (seg_pdisabled) {
1513 return;
1514 }
1515
1516 cv_signal(&seg_pasync_cv);
1517 }
1518
1519 /*
1520 * run as a backgroud thread and reclaim pagelock
1521 * pages which have not been used recently
1522 */
1523 void
seg_pasync_thread(void)1524 seg_pasync_thread(void)
1525 {
1526 callb_cpr_t cpr_info;
1527
1528 if (seg_phashsize_win == 0) {
1529 thread_exit();
1530 /*NOTREACHED*/
1531 }
1532
1533 seg_pasync_thr = curthread;
1534
1535 CALLB_CPR_INIT(&cpr_info, &seg_pasync_mtx,
1536 callb_generic_cpr, "seg_pasync");
1537
1538 if (segpcache_reap_ticks <= 0) {
1539 segpcache_reap_ticks = segpcache_reap_sec * hz;
1540 }
1541
1542 mutex_enter(&seg_pasync_mtx);
1543 for (;;) {
1544 CALLB_CPR_SAFE_BEGIN(&cpr_info);
1545 (void) cv_reltimedwait(&seg_pasync_cv, &seg_pasync_mtx,
1546 segpcache_reap_ticks, TR_CLOCK_TICK);
1547 CALLB_CPR_SAFE_END(&cpr_info, &seg_pasync_mtx);
1548 if (seg_pdisabled == 0) {
1549 seg_ppurge_async(0);
1550 }
1551 }
1552 }
1553
1554 static struct kmem_cache *seg_cache;
1555
1556 /*
1557 * Initialize segment management data structures.
1558 */
1559 void
seg_init(void)1560 seg_init(void)
1561 {
1562 kstat_t *ksp;
1563
1564 seg_cache = kmem_cache_create("seg_cache", sizeof (struct seg),
1565 0, NULL, NULL, NULL, NULL, NULL, 0);
1566
1567 ksp = kstat_create("unix", 0, "segadvstat", "vm", KSTAT_TYPE_NAMED,
1568 segadvstat_ndata, KSTAT_FLAG_VIRTUAL);
1569 if (ksp) {
1570 ksp->ks_data = (void *)segadvstat_ptr;
1571 kstat_install(ksp);
1572 }
1573
1574 seg_pinit();
1575 }
1576
1577 /*
1578 * Allocate a segment to cover [base, base+size]
1579 * and attach it to the specified address space.
1580 */
1581 struct seg *
seg_alloc(struct as * as,caddr_t base,size_t size)1582 seg_alloc(struct as *as, caddr_t base, size_t size)
1583 {
1584 struct seg *new;
1585 caddr_t segbase;
1586 size_t segsize;
1587
1588 segbase = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK);
1589 segsize = (((uintptr_t)(base + size) + PAGEOFFSET) & PAGEMASK) -
1590 (uintptr_t)segbase;
1591
1592 if (!valid_va_range(&segbase, &segsize, segsize, AH_LO))
1593 return ((struct seg *)NULL); /* bad virtual addr range */
1594
1595 if (as != &kas &&
1596 valid_usr_range(segbase, segsize, 0, as,
1597 as->a_userlimit) != RANGE_OKAY)
1598 return ((struct seg *)NULL); /* bad virtual addr range */
1599
1600 new = kmem_cache_alloc(seg_cache, KM_SLEEP);
1601 new->s_ops = NULL;
1602 new->s_data = NULL;
1603 new->s_szc = 0;
1604 new->s_flags = 0;
1605 mutex_init(&new->s_pmtx, NULL, MUTEX_DEFAULT, NULL);
1606 new->s_phead.p_lnext = &new->s_phead;
1607 new->s_phead.p_lprev = &new->s_phead;
1608 if (seg_attach(as, segbase, segsize, new) < 0) {
1609 kmem_cache_free(seg_cache, new);
1610 return ((struct seg *)NULL);
1611 }
1612 /* caller must fill in ops, data */
1613 return (new);
1614 }
1615
1616 /*
1617 * Attach a segment to the address space. Used by seg_alloc()
1618 * and for kernel startup to attach to static segments.
1619 */
1620 int
seg_attach(struct as * as,caddr_t base,size_t size,struct seg * seg)1621 seg_attach(struct as *as, caddr_t base, size_t size, struct seg *seg)
1622 {
1623 seg->s_as = as;
1624 seg->s_base = base;
1625 seg->s_size = size;
1626
1627 /*
1628 * as_addseg() will add the segment at the appropraite point
1629 * in the list. It will return -1 if there is overlap with
1630 * an already existing segment.
1631 */
1632 return (as_addseg(as, seg));
1633 }
1634
1635 /*
1636 * Unmap a segment and free it from its associated address space.
1637 * This should be called by anybody who's finished with a whole segment's
1638 * mapping. Just calls SEGOP_UNMAP() on the whole mapping . It is the
1639 * responsibility of the segment driver to unlink the the segment
1640 * from the address space, and to free public and private data structures
1641 * associated with the segment. (This is typically done by a call to
1642 * seg_free()).
1643 */
1644 void
seg_unmap(struct seg * seg)1645 seg_unmap(struct seg *seg)
1646 {
1647 #ifdef DEBUG
1648 int ret;
1649 #endif /* DEBUG */
1650
1651 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1652
1653 /* Shouldn't have called seg_unmap if mapping isn't yet established */
1654 ASSERT(seg->s_data != NULL);
1655
1656 /* Unmap the whole mapping */
1657 #ifdef DEBUG
1658 ret = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
1659 ASSERT(ret == 0);
1660 #else
1661 SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
1662 #endif /* DEBUG */
1663 }
1664
1665 /*
1666 * Free the segment from its associated as. This should only be called
1667 * if a mapping to the segment has not yet been established (e.g., if
1668 * an error occurs in the middle of doing an as_map when the segment
1669 * has already been partially set up) or if it has already been deleted
1670 * (e.g., from a segment driver unmap routine if the unmap applies to the
1671 * entire segment). If the mapping is currently set up then seg_unmap() should
1672 * be called instead.
1673 */
1674 void
seg_free(struct seg * seg)1675 seg_free(struct seg *seg)
1676 {
1677 register struct as *as = seg->s_as;
1678 struct seg *tseg = as_removeseg(as, seg);
1679
1680 ASSERT(tseg == seg);
1681
1682 /*
1683 * If the segment private data field is NULL,
1684 * then segment driver is not attached yet.
1685 */
1686 if (seg->s_data != NULL)
1687 SEGOP_FREE(seg);
1688
1689 mutex_destroy(&seg->s_pmtx);
1690 ASSERT(seg->s_phead.p_lnext == &seg->s_phead);
1691 ASSERT(seg->s_phead.p_lprev == &seg->s_phead);
1692 kmem_cache_free(seg_cache, seg);
1693 }
1694
1695 /*ARGSUSED*/
1696 static void
seg_p_mem_config_post_add(void * arg,pgcnt_t delta_pages)1697 seg_p_mem_config_post_add(
1698 void *arg,
1699 pgcnt_t delta_pages)
1700 {
1701 /* Nothing to do. */
1702 }
1703
1704 void
seg_p_enable(void)1705 seg_p_enable(void)
1706 {
1707 mutex_enter(&seg_pcache_mtx);
1708 ASSERT(seg_pdisabled != 0);
1709 seg_pdisabled--;
1710 mutex_exit(&seg_pcache_mtx);
1711 }
1712
1713 /*
1714 * seg_p_disable - disables seg_pcache, and then attempts to empty the
1715 * cache.
1716 * Returns SEGP_SUCCESS if the cache was successfully emptied, or
1717 * SEGP_FAIL if the cache could not be emptied.
1718 */
1719 int
seg_p_disable(void)1720 seg_p_disable(void)
1721 {
1722 pgcnt_t old_plocked;
1723 int stall_count = 0;
1724
1725 mutex_enter(&seg_pcache_mtx);
1726 seg_pdisabled++;
1727 ASSERT(seg_pdisabled != 0);
1728 mutex_exit(&seg_pcache_mtx);
1729
1730 /*
1731 * Attempt to empty the cache. Terminate if seg_plocked does not
1732 * diminish with SEGP_STALL_THRESHOLD consecutive attempts.
1733 */
1734 while (seg_plocked != 0) {
1735 ASSERT(seg_phashsize_win != 0);
1736 old_plocked = seg_plocked;
1737 seg_ppurge_async(1);
1738 if (seg_plocked == old_plocked) {
1739 if (stall_count++ > SEGP_STALL_THRESHOLD) {
1740 return (SEGP_FAIL);
1741 }
1742 } else
1743 stall_count = 0;
1744 if (seg_plocked != 0)
1745 delay(hz/SEGP_PREDEL_DELAY_FACTOR);
1746 }
1747 return (SEGP_SUCCESS);
1748 }
1749
1750 /*
1751 * Attempt to purge seg_pcache. May need to return before this has
1752 * completed to allow other pre_del callbacks to unlock pages. This is
1753 * ok because:
1754 * 1) The seg_pdisabled flag has been set so at least we won't
1755 * cache anymore locks and the locks we couldn't purge
1756 * will not be held if they do get released by a subsequent
1757 * pre-delete callback.
1758 *
1759 * 2) The rest of the memory delete thread processing does not
1760 * depend on the changes made in this pre-delete callback. No
1761 * panics will result, the worst that will happen is that the
1762 * DR code will timeout and cancel the delete.
1763 */
1764 /*ARGSUSED*/
1765 static int
seg_p_mem_config_pre_del(void * arg,pgcnt_t delta_pages)1766 seg_p_mem_config_pre_del(
1767 void *arg,
1768 pgcnt_t delta_pages)
1769 {
1770 if (seg_phashsize_win == 0) {
1771 return (0);
1772 }
1773 if (seg_p_disable() != SEGP_SUCCESS)
1774 cmn_err(CE_NOTE,
1775 "!Pre-delete couldn't purge"" pagelock cache - continuing");
1776 return (0);
1777 }
1778
1779 /*ARGSUSED*/
1780 static void
seg_p_mem_config_post_del(void * arg,pgcnt_t delta_pages,int cancelled)1781 seg_p_mem_config_post_del(
1782 void *arg,
1783 pgcnt_t delta_pages,
1784 int cancelled)
1785 {
1786 if (seg_phashsize_win == 0) {
1787 return;
1788 }
1789 seg_p_enable();
1790 }
1791
1792 static kphysm_setup_vector_t seg_p_mem_config_vec = {
1793 KPHYSM_SETUP_VECTOR_VERSION,
1794 seg_p_mem_config_post_add,
1795 seg_p_mem_config_pre_del,
1796 seg_p_mem_config_post_del,
1797 };
1798
1799 static void
seg_pinit_mem_config(void)1800 seg_pinit_mem_config(void)
1801 {
1802 int ret;
1803
1804 ret = kphysm_setup_func_register(&seg_p_mem_config_vec, (void *)NULL);
1805 /*
1806 * Want to catch this in the debug kernel. At run time, if the
1807 * callbacks don't get run all will be OK as the disable just makes
1808 * it more likely that the pages can be collected.
1809 */
1810 ASSERT(ret == 0);
1811 }
1812
1813 /*
1814 * Verify that segment is not a shared anonymous segment which reserves
1815 * swap. zone.max-swap accounting (zone->zone_max_swap) cannot be transfered
1816 * from one zone to another if any segments are shared. This is because the
1817 * last process to exit will credit the swap reservation. This could lead
1818 * to the swap being reserved by one zone, and credited to another.
1819 */
1820 boolean_t
seg_can_change_zones(struct seg * seg)1821 seg_can_change_zones(struct seg *seg)
1822 {
1823 struct segvn_data *svd;
1824
1825 if (seg->s_ops == &segspt_shmops)
1826 return (B_FALSE);
1827
1828 if (seg->s_ops == &segvn_ops) {
1829 svd = (struct segvn_data *)seg->s_data;
1830 if (svd->type == MAP_SHARED &&
1831 svd->amp != NULL &&
1832 svd->amp->swresv > 0)
1833 return (B_FALSE);
1834 }
1835 return (B_TRUE);
1836 }
1837
1838 /*
1839 * Return swap reserved by a segment backing a private mapping.
1840 */
1841 size_t
seg_swresv(struct seg * seg)1842 seg_swresv(struct seg *seg)
1843 {
1844 struct segvn_data *svd;
1845 size_t swap = 0;
1846
1847 if (seg->s_ops == &segvn_ops) {
1848 svd = (struct segvn_data *)seg->s_data;
1849 if (svd->type == MAP_PRIVATE && svd->swresv > 0)
1850 swap = svd->swresv;
1851 }
1852 return (swap);
1853 }
1854