1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/types.h>
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/buf.h>
29 #include <sys/cred.h>
30 #include <sys/errno.h>
31 #include <sys/vnode.h>
32 #include <sys/vfs_opreg.h>
33 #include <sys/cmn_err.h>
34 #include <sys/swap.h>
35 #include <sys/mman.h>
36 #include <sys/vmsystm.h>
37 #include <sys/vtrace.h>
38 #include <sys/debug.h>
39 #include <sys/sysmacros.h>
40 #include <sys/vm.h>
41
42 #include <sys/fs/swapnode.h>
43
44 #include <vm/seg.h>
45 #include <vm/page.h>
46 #include <vm/pvn.h>
47 #include <fs/fs_subr.h>
48
49 #include <vm/seg_kp.h>
50
51 /*
52 * Define the routines within this file.
53 */
54 static int swap_getpage(struct vnode *vp, offset_t off, size_t len,
55 uint_t *protp, struct page **plarr, size_t plsz, struct seg *seg,
56 caddr_t addr, enum seg_rw rw, struct cred *cr, caller_context_t *ct);
57 static int swap_putpage(struct vnode *vp, offset_t off, size_t len,
58 int flags, struct cred *cr, caller_context_t *ct);
59 static void swap_inactive(struct vnode *vp, struct cred *cr,
60 caller_context_t *ct);
61 static void swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn,
62 cred_t *cr, caller_context_t *ct);
63
64 static int swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
65 uint_t *protp, page_t **plarr, size_t plsz,
66 struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
67
68 int swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
69 uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
70 uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
71 enum seg_rw rw, struct cred *cr);
72
73 static int swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
74 size_t *lenp, int flags, struct cred *cr);
75
76 const fs_operation_def_t swap_vnodeops_template[] = {
77 VOPNAME_INACTIVE, { .vop_inactive = swap_inactive },
78 VOPNAME_GETPAGE, { .vop_getpage = swap_getpage },
79 VOPNAME_PUTPAGE, { .vop_putpage = swap_putpage },
80 VOPNAME_DISPOSE, { .vop_dispose = swap_dispose },
81 VOPNAME_SETFL, { .error = fs_error },
82 VOPNAME_POLL, { .error = fs_error },
83 VOPNAME_PATHCONF, { .error = fs_error },
84 VOPNAME_GETSECATTR, { .error = fs_error },
85 VOPNAME_SHRLOCK, { .error = fs_error },
86 NULL, NULL
87 };
88
89 vnodeops_t *swap_vnodeops;
90
91 /* ARGSUSED */
92 static void
swap_inactive(struct vnode * vp,struct cred * cr,caller_context_t * ct)93 swap_inactive(
94 struct vnode *vp,
95 struct cred *cr,
96 caller_context_t *ct)
97 {
98 SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0);
99 }
100
101 /*
102 * Return all the pages from [off..off+len] in given file
103 */
104 /*ARGSUSED*/
105 static int
swap_getpage(struct vnode * vp,offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr,caller_context_t * ct)106 swap_getpage(
107 struct vnode *vp,
108 offset_t off,
109 size_t len,
110 uint_t *protp,
111 page_t *pl[],
112 size_t plsz,
113 struct seg *seg,
114 caddr_t addr,
115 enum seg_rw rw,
116 struct cred *cr,
117 caller_context_t *ct)
118 {
119 int err;
120
121 SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n",
122 (void *)vp, off, len, 0, 0);
123
124 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE,
125 "swapfs getpage:vp %p off %llx len %ld",
126 (void *)vp, off, len);
127
128 if (len <= PAGESIZE) {
129 err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
130 seg, addr, rw, cr);
131 } else {
132 err = pvn_getpages(swap_getapage, vp, (u_offset_t)off, len,
133 protp, pl, plsz, seg, addr, rw, cr);
134 }
135
136 return (err);
137 }
138
139 /*
140 * Called from pvn_getpages or swap_getpage to get a particular page.
141 */
142 /*ARGSUSED*/
143 static int
swap_getapage(struct vnode * vp,u_offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr)144 swap_getapage(
145 struct vnode *vp,
146 u_offset_t off,
147 size_t len,
148 uint_t *protp,
149 page_t *pl[],
150 size_t plsz,
151 struct seg *seg,
152 caddr_t addr,
153 enum seg_rw rw,
154 struct cred *cr)
155 {
156 struct page *pp, *rpp;
157 int flags;
158 int err = 0;
159 struct vnode *pvp = NULL;
160 u_offset_t poff;
161 int flag_noreloc;
162 se_t lock;
163 extern int kcage_on;
164 int upgrade = 0;
165
166 SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n",
167 vp, off, len, 0, 0);
168
169 /*
170 * Until there is a call-back mechanism to cause SEGKP
171 * pages to be unlocked, make them non-relocatable.
172 */
173 if (SEG_IS_SEGKP(seg))
174 flag_noreloc = PG_NORELOC;
175 else
176 flag_noreloc = 0;
177
178 if (protp != NULL)
179 *protp = PROT_ALL;
180
181 lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
182
183 again:
184 if (pp = page_lookup(vp, off, lock)) {
185 /*
186 * In very rare instances, a segkp page may have been
187 * relocated outside of the kernel by the kernel cage
188 * due to the window between page_unlock() and
189 * VOP_PUTPAGE() in segkp_unlock(). Due to the
190 * rareness of these occurances, the solution is to
191 * relocate the page to a P_NORELOC page.
192 */
193 if (flag_noreloc != 0) {
194 if (!PP_ISNORELOC(pp) && kcage_on) {
195 if (lock != SE_EXCL) {
196 upgrade = 1;
197 if (!page_tryupgrade(pp)) {
198 page_unlock(pp);
199 lock = SE_EXCL;
200 goto again;
201 }
202 }
203
204 if (page_relocate_cage(&pp, &rpp) != 0)
205 panic("swap_getapage: "
206 "page_relocate_cage failed");
207
208 pp = rpp;
209 }
210 }
211
212 if (pl) {
213 if (upgrade)
214 page_downgrade(pp);
215
216 pl[0] = pp;
217 pl[1] = NULL;
218 } else {
219 page_unlock(pp);
220 }
221 } else {
222 pp = page_create_va(vp, off, PAGESIZE,
223 PG_WAIT | PG_EXCL | flag_noreloc,
224 seg, addr);
225 /*
226 * Someone raced in and created the page after we did the
227 * lookup but before we did the create, so go back and
228 * try to look it up again.
229 */
230 if (pp == NULL)
231 goto again;
232 if (rw != S_CREATE) {
233 err = swap_getphysname(vp, off, &pvp, &poff);
234 if (pvp) {
235 struct anon *ap;
236 kmutex_t *ahm;
237
238 flags = (pl == NULL ? B_ASYNC|B_READ : B_READ);
239 err = VOP_PAGEIO(pvp, pp, poff,
240 PAGESIZE, flags, cr, NULL);
241
242 if (!err) {
243 ahm = AH_MUTEX(vp, off);
244 mutex_enter(ahm);
245
246 ap = swap_anon(vp, off);
247 if (ap == NULL) {
248 panic("swap_getapage:"
249 " null anon");
250 }
251
252 if (ap->an_pvp == pvp &&
253 ap->an_poff == poff) {
254 swap_phys_free(pvp, poff,
255 PAGESIZE);
256 ap->an_pvp = NULL;
257 ap->an_poff = NULL;
258 hat_setmod(pp);
259 }
260
261 mutex_exit(ahm);
262 }
263 } else {
264 if (!err)
265 pagezero(pp, 0, PAGESIZE);
266
267 /*
268 * If it's a fault ahead, release page_io_lock
269 * and SE_EXCL we grabbed in page_create_va
270 *
271 * If we are here, we haven't called VOP_PAGEIO
272 * and thus calling pvn_read_done(pp, B_READ)
273 * below may mislead that we tried i/o. Besides,
274 * in case of async, pvn_read_done() should
275 * not be called by *getpage()
276 */
277 if (pl == NULL) {
278 /*
279 * swap_getphysname can return error
280 * only when we are getting called from
281 * swapslot_free which passes non-NULL
282 * pl to VOP_GETPAGE.
283 */
284 ASSERT(err == 0);
285 page_io_unlock(pp);
286 page_unlock(pp);
287 }
288 }
289 }
290
291 ASSERT(pp != NULL);
292
293 if (err && pl)
294 pvn_read_done(pp, B_ERROR);
295
296 if (!err && pl)
297 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
298 }
299 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
300 "swapfs getapage:pp %p vp %p off %llx", pp, vp, off);
301 return (err);
302 }
303
304 /*
305 * Called from large page anon routines only! This is an ugly hack where
306 * the anon layer directly calls into swapfs with a preallocated large page.
307 * Another method would have been to change to VOP and add an extra arg for
308 * the preallocated large page. This all could be cleaned up later when we
309 * solve the anonymous naming problem and no longer need to loop across of
310 * the VOP in PAGESIZE increments to fill in or initialize a large page as
311 * is done today. I think the latter is better since it avoid a change to
312 * the VOP interface that could later be avoided.
313 */
314 int
swap_getconpage(struct vnode * vp,u_offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,page_t * conpp,uint_t * pszc,spgcnt_t * nreloc,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr)315 swap_getconpage(
316 struct vnode *vp,
317 u_offset_t off,
318 size_t len,
319 uint_t *protp,
320 page_t *pl[],
321 size_t plsz,
322 page_t *conpp,
323 uint_t *pszc,
324 spgcnt_t *nreloc,
325 struct seg *seg,
326 caddr_t addr,
327 enum seg_rw rw,
328 struct cred *cr)
329 {
330 struct page *pp;
331 int err = 0;
332 struct vnode *pvp = NULL;
333 u_offset_t poff;
334
335 ASSERT(len == PAGESIZE);
336 ASSERT(pl != NULL);
337 ASSERT(plsz == PAGESIZE);
338 ASSERT(protp == NULL);
339 ASSERT(nreloc != NULL);
340 ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */
341 SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n",
342 vp, off, len, 0, 0);
343
344 /*
345 * If we are not using a preallocated page then we know one already
346 * exists. So just let the old code handle it.
347 */
348 if (conpp == NULL) {
349 err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
350 seg, addr, rw, cr);
351 return (err);
352 }
353 ASSERT(conpp->p_szc != 0);
354 ASSERT(PAGE_EXCL(conpp));
355
356
357 ASSERT(conpp->p_next == conpp);
358 ASSERT(conpp->p_prev == conpp);
359 ASSERT(!PP_ISAGED(conpp));
360 ASSERT(!PP_ISFREE(conpp));
361
362 *nreloc = 0;
363 pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0);
364
365 /*
366 * If existing page is found we may need to relocate.
367 */
368 if (pp != conpp) {
369 ASSERT(rw != S_CREATE);
370 ASSERT(pszc != NULL);
371 ASSERT(PAGE_SHARED(pp));
372 if (pp->p_szc < conpp->p_szc) {
373 *pszc = pp->p_szc;
374 page_unlock(pp);
375 err = -1;
376 } else if (pp->p_szc > conpp->p_szc &&
377 seg->s_szc > conpp->p_szc) {
378 *pszc = MIN(pp->p_szc, seg->s_szc);
379 page_unlock(pp);
380 err = -2;
381 } else {
382 pl[0] = pp;
383 pl[1] = NULL;
384 if (page_pptonum(pp) &
385 (page_get_pagecnt(conpp->p_szc) - 1))
386 cmn_err(CE_PANIC, "swap_getconpage: no root");
387 }
388 return (err);
389 }
390
391 ASSERT(PAGE_EXCL(pp));
392
393 if (*nreloc != 0) {
394 ASSERT(rw != S_CREATE);
395 pl[0] = pp;
396 pl[1] = NULL;
397 return (0);
398 }
399
400 *nreloc = 1;
401
402 /*
403 * If necessary do the page io.
404 */
405 if (rw != S_CREATE) {
406 /*
407 * Since we are only called now on behalf of an
408 * address space operation it's impossible for
409 * us to fail unlike swap_getapge() which
410 * also gets called from swapslot_free().
411 */
412 if (swap_getphysname(vp, off, &pvp, &poff)) {
413 cmn_err(CE_PANIC,
414 "swap_getconpage: swap_getphysname failed!");
415 }
416
417 if (pvp != NULL) {
418 err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ,
419 cr, NULL);
420 if (err == 0) {
421 struct anon *ap;
422 kmutex_t *ahm;
423
424 ahm = AH_MUTEX(vp, off);
425 mutex_enter(ahm);
426 ap = swap_anon(vp, off);
427 if (ap == NULL)
428 panic("swap_getconpage: null anon");
429 if (ap->an_pvp != pvp || ap->an_poff != poff)
430 panic("swap_getconpage: bad anon");
431
432 swap_phys_free(pvp, poff, PAGESIZE);
433 ap->an_pvp = NULL;
434 ap->an_poff = NULL;
435 hat_setmod(pp);
436 mutex_exit(ahm);
437 }
438 } else {
439 pagezero(pp, 0, PAGESIZE);
440 }
441 }
442
443 /*
444 * Normally we would let pvn_read_done() destroy
445 * the page on IO error. But since this is a preallocated
446 * page we'll let the anon layer handle it.
447 */
448 page_io_unlock(pp);
449 if (err != 0)
450 page_hashout(pp, NULL);
451 ASSERT(pp->p_next == pp);
452 ASSERT(pp->p_prev == pp);
453
454 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
455 "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off);
456
457 pl[0] = pp;
458 pl[1] = NULL;
459 return (err);
460 }
461
462 /* Async putpage klustering stuff */
463 int sw_pending_size;
464 extern int klustsize;
465 extern struct async_reqs *sw_getreq();
466 extern void sw_putreq(struct async_reqs *);
467 extern void sw_putbackreq(struct async_reqs *);
468 extern struct async_reqs *sw_getfree();
469 extern void sw_putfree(struct async_reqs *);
470
471 static size_t swap_putpagecnt, swap_pagespushed;
472 static size_t swap_otherfail, swap_otherpages;
473 static size_t swap_klustfail, swap_klustpages;
474 static size_t swap_getiofail, swap_getiopages;
475
476 /*
477 * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}.
478 * If len == 0, do from off to EOF.
479 */
480 static int swap_nopage = 0; /* Don't do swap_putpage's if set */
481
482 /* ARGSUSED */
483 static int
swap_putpage(struct vnode * vp,offset_t off,size_t len,int flags,struct cred * cr,caller_context_t * ct)484 swap_putpage(
485 struct vnode *vp,
486 offset_t off,
487 size_t len,
488 int flags,
489 struct cred *cr,
490 caller_context_t *ct)
491 {
492 page_t *pp;
493 u_offset_t io_off;
494 size_t io_len = 0;
495 int err = 0;
496 int nowait;
497 struct async_reqs *arg;
498
499 if (swap_nopage)
500 return (0);
501
502 ASSERT(vp->v_count != 0);
503
504 nowait = flags & B_PAGE_NOWAIT;
505
506 /*
507 * Clear force flag so that p_lckcnt pages are not invalidated.
508 */
509 flags &= ~(B_FORCE | B_PAGE_NOWAIT);
510
511 SWAPFS_PRINT(SWAP_VOPS,
512 "swap_putpage: vp %p, off %llx len %lx, flags %x\n",
513 (void *)vp, off, len, flags, 0);
514 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE,
515 "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len);
516
517 if (vp->v_flag & VNOMAP)
518 return (ENOSYS);
519
520 if (!vn_has_cached_data(vp))
521 return (0);
522
523 if (len == 0) {
524 if (curproc == proc_pageout)
525 cmn_err(CE_PANIC, "swapfs: pageout can't block");
526
527 /* Search the entire vp list for pages >= off. */
528 err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage,
529 flags, cr);
530 } else {
531 u_offset_t eoff;
532
533 /*
534 * Loop over all offsets in the range [off...off + len]
535 * looking for pages to deal with.
536 */
537 eoff = off + len;
538 for (io_off = (u_offset_t)off; io_off < eoff;
539 io_off += io_len) {
540 /*
541 * If we run out of the async req slot, put the page
542 * now instead of queuing.
543 */
544 if (flags == (B_ASYNC | B_FREE) &&
545 sw_pending_size < klustsize &&
546 (arg = sw_getfree())) {
547 /*
548 * If we are clustering, we should allow
549 * pageout to feed us more pages because # of
550 * pushes is limited by # of I/Os, and one
551 * cluster is considered to be one I/O.
552 */
553 if (pushes)
554 pushes--;
555
556 arg->a_vp = vp;
557 arg->a_off = io_off;
558 arg->a_len = PAGESIZE;
559 arg->a_flags = B_ASYNC | B_FREE;
560 arg->a_cred = kcred;
561 sw_putreq(arg);
562 io_len = PAGESIZE;
563 continue;
564 }
565 /*
566 * If we are not invalidating pages, use the
567 * routine page_lookup_nowait() to prevent
568 * reclaiming them from the free list.
569 */
570 if (!nowait && ((flags & B_INVAL) ||
571 (flags & (B_ASYNC | B_FREE)) == B_FREE))
572 pp = page_lookup(vp, io_off, SE_EXCL);
573 else
574 pp = page_lookup_nowait(vp, io_off,
575 (flags & (B_FREE | B_INVAL)) ?
576 SE_EXCL : SE_SHARED);
577
578 if (pp == NULL || pvn_getdirty(pp, flags) == 0)
579 io_len = PAGESIZE;
580 else {
581 err = swap_putapage(vp, pp, &io_off, &io_len,
582 flags, cr);
583 if (err != 0)
584 break;
585 }
586 }
587 }
588 /* If invalidating, verify all pages on vnode list are gone. */
589 if (err == 0 && off == 0 && len == 0 &&
590 (flags & B_INVAL) && vn_has_cached_data(vp)) {
591 cmn_err(CE_WARN,
592 "swap_putpage: B_INVAL, pages not gone");
593 }
594 return (err);
595 }
596
597 /*
598 * Write out a single page.
599 * For swapfs this means choose a physical swap slot and write the page
600 * out using VOP_PAGEIO.
601 * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty
602 * swapfs pages, a bunch of contiguous swap slots and then write them
603 * all out in one clustered i/o.
604 */
605 /*ARGSUSED*/
606 static int
swap_putapage(struct vnode * vp,page_t * pp,u_offset_t * offp,size_t * lenp,int flags,struct cred * cr)607 swap_putapage(
608 struct vnode *vp,
609 page_t *pp,
610 u_offset_t *offp,
611 size_t *lenp,
612 int flags,
613 struct cred *cr)
614 {
615 int err;
616 struct vnode *pvp;
617 u_offset_t poff, off;
618 u_offset_t doff;
619 size_t dlen;
620 size_t klsz = 0;
621 u_offset_t klstart = 0;
622 struct vnode *klvp = NULL;
623 page_t *pplist;
624 se_t se;
625 struct async_reqs *arg;
626 size_t swap_klustsize;
627
628 /*
629 * This check is added for callers who access swap_putpage with len = 0.
630 * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
631 * And it's necessary to do the same queuing if users have the same
632 * B_ASYNC|B_FREE flags on.
633 */
634 if (flags == (B_ASYNC | B_FREE) &&
635 sw_pending_size < klustsize && (arg = sw_getfree())) {
636
637 hat_setmod(pp);
638 page_io_unlock(pp);
639 page_unlock(pp);
640
641 arg->a_vp = vp;
642 arg->a_off = pp->p_offset;
643 arg->a_len = PAGESIZE;
644 arg->a_flags = B_ASYNC | B_FREE;
645 arg->a_cred = kcred;
646 sw_putreq(arg);
647
648 return (0);
649 }
650
651 SWAPFS_PRINT(SWAP_PUTP,
652 "swap_putapage: pp %p, vp %p, off %llx, flags %x\n",
653 pp, vp, pp->p_offset, flags, 0);
654
655 ASSERT(PAGE_LOCKED(pp));
656
657 off = pp->p_offset;
658
659 doff = off;
660 dlen = PAGESIZE;
661
662 if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) {
663 err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0);
664 hat_setmod(pp);
665 page_io_unlock(pp);
666 page_unlock(pp);
667 goto out;
668 }
669
670 klvp = pvp;
671 klstart = poff;
672 pplist = pp;
673 /*
674 * If this is ASYNC | FREE and we've accumulated a bunch of such
675 * pending requests, kluster.
676 */
677 if (flags == (B_ASYNC | B_FREE))
678 swap_klustsize = klustsize;
679 else
680 swap_klustsize = PAGESIZE;
681 se = (flags & B_FREE ? SE_EXCL : SE_SHARED);
682 klsz = PAGESIZE;
683 while (klsz < swap_klustsize) {
684 if ((arg = sw_getreq()) == NULL) {
685 swap_getiofail++;
686 swap_getiopages += btop(klsz);
687 break;
688 }
689 ASSERT(vn_matchops(arg->a_vp, swap_vnodeops));
690 vp = arg->a_vp;
691 off = arg->a_off;
692
693 if ((pp = page_lookup_nowait(vp, off, se)) == NULL) {
694 swap_otherfail++;
695 swap_otherpages += btop(klsz);
696 sw_putfree(arg);
697 break;
698 }
699 if (pvn_getdirty(pp, flags | B_DELWRI) == 0) {
700 sw_putfree(arg);
701 continue;
702 }
703 /* Get new physical backing store for the page */
704 doff = off;
705 dlen = PAGESIZE;
706 if (err = swap_newphysname(vp, off, &doff, &dlen,
707 &pvp, &poff)) {
708 swap_otherfail++;
709 swap_otherpages += btop(klsz);
710 hat_setmod(pp);
711 page_io_unlock(pp);
712 page_unlock(pp);
713 sw_putbackreq(arg);
714 break;
715 }
716 /* Try to cluster new physical name with previous ones */
717 if (klvp == pvp && poff == klstart + klsz) {
718 klsz += PAGESIZE;
719 page_add(&pplist, pp);
720 pplist = pplist->p_next;
721 sw_putfree(arg);
722 } else if (klvp == pvp && poff == klstart - PAGESIZE) {
723 klsz += PAGESIZE;
724 klstart -= PAGESIZE;
725 page_add(&pplist, pp);
726 sw_putfree(arg);
727 } else {
728 swap_klustfail++;
729 swap_klustpages += btop(klsz);
730 hat_setmod(pp);
731 page_io_unlock(pp);
732 page_unlock(pp);
733 sw_putbackreq(arg);
734 break;
735 }
736 }
737
738 err = VOP_PAGEIO(klvp, pplist, klstart, klsz,
739 B_WRITE | flags, cr, NULL);
740
741 if ((flags & B_ASYNC) == 0)
742 pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
743
744 /* Statistics */
745 if (!err) {
746 swap_putpagecnt++;
747 swap_pagespushed += btop(klsz);
748 }
749 out:
750 TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE,
751 "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx",
752 vp, klvp, klstart, klsz);
753 if (err && err != ENOMEM)
754 cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err);
755 if (lenp)
756 *lenp = PAGESIZE;
757 return (err);
758 }
759
760 static void
swap_dispose(vnode_t * vp,page_t * pp,int fl,int dn,cred_t * cr,caller_context_t * ct)761 swap_dispose(
762 vnode_t *vp,
763 page_t *pp,
764 int fl,
765 int dn,
766 cred_t *cr,
767 caller_context_t *ct)
768 {
769 int err;
770 u_offset_t off = pp->p_offset;
771 vnode_t *pvp;
772 u_offset_t poff;
773
774 ASSERT(PAGE_EXCL(pp));
775
776 /*
777 * The caller will free/invalidate large page in one shot instead of
778 * one small page at a time.
779 */
780 if (pp->p_szc != 0) {
781 page_unlock(pp);
782 return;
783 }
784
785 err = swap_getphysname(vp, off, &pvp, &poff);
786 if (!err && pvp != NULL)
787 VOP_DISPOSE(pvp, pp, fl, dn, cr, ct);
788 else
789 fs_dispose(vp, pp, fl, dn, cr, ct);
790 }
791