xref: /onnv-gate/usr/src/uts/common/fs/dcfs/dc_vnops.c (revision 12874:4e09ded00759)
15648Ssetje 
25648Ssetje /*
35648Ssetje  * CDDL HEADER START
45648Ssetje  *
55648Ssetje  * The contents of this file are subject to the terms of the
65648Ssetje  * Common Development and Distribution License (the "License").
75648Ssetje  * You may not use this file except in compliance with the License.
85648Ssetje  *
95648Ssetje  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
105648Ssetje  * or http://www.opensolaris.org/os/licensing.
115648Ssetje  * See the License for the specific language governing permissions
125648Ssetje  * and limitations under the License.
135648Ssetje  *
145648Ssetje  * When distributing Covered Code, include this CDDL HEADER in each
155648Ssetje  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
165648Ssetje  * If applicable, add the following below this CDDL HEADER, with the
175648Ssetje  * fields enclosed by brackets "[]" replaced with your own identifying
185648Ssetje  * information: Portions Copyright [yyyy] [name of copyright owner]
195648Ssetje  *
205648Ssetje  * CDDL HEADER END
215648Ssetje  */
225648Ssetje /*
2312633Sjohn.levon@sun.com  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
245648Ssetje  */
255648Ssetje 
265648Ssetje /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
275648Ssetje /*	  All Rights Reserved  	*/
285648Ssetje 
295648Ssetje /*
305648Ssetje  * University Copyright- Copyright (c) 1982, 1986, 1988
315648Ssetje  * The Regents of the University of California
325648Ssetje  * All Rights Reserved
335648Ssetje  *
345648Ssetje  * University Acknowledgment- Portions of this document are derived from
355648Ssetje  * software developed by the University of California, Berkeley, and its
365648Ssetje  * contributors.
375648Ssetje  */
385648Ssetje 
395648Ssetje #include <sys/types.h>
405648Ssetje #include <sys/thread.h>
415648Ssetje #include <sys/t_lock.h>
425648Ssetje #include <sys/param.h>
435648Ssetje #include <sys/systm.h>
445648Ssetje #include <sys/bitmap.h>
455648Ssetje #include <sys/buf.h>
465648Ssetje #include <sys/cmn_err.h>
475648Ssetje #include <sys/conf.h>
485648Ssetje #include <sys/ddi.h>
495648Ssetje #include <sys/debug.h>
505648Ssetje #include <sys/errno.h>
515648Ssetje #include <sys/time.h>
525648Ssetje #include <sys/fcntl.h>
535648Ssetje #include <sys/flock.h>
545648Ssetje #include <sys/file.h>
555648Ssetje #include <sys/kmem.h>
565648Ssetje #include <sys/mman.h>
575648Ssetje #include <sys/vmsystm.h>
585648Ssetje #include <sys/open.h>
595648Ssetje #include <sys/swap.h>
605648Ssetje #include <sys/sysmacros.h>
615648Ssetje #include <sys/uio.h>
625648Ssetje #include <sys/vfs.h>
635648Ssetje #include <sys/vfs_opreg.h>
645648Ssetje #include <sys/vnode.h>
655648Ssetje #include <sys/stat.h>
665648Ssetje #include <sys/poll.h>
675648Ssetje #include <sys/zmod.h>
685648Ssetje #include <sys/fs/decomp.h>
695648Ssetje 
705648Ssetje #include <vm/hat.h>
715648Ssetje #include <vm/as.h>
725648Ssetje #include <vm/page.h>
735648Ssetje #include <vm/pvn.h>
745648Ssetje #include <vm/seg_vn.h>
755648Ssetje #include <vm/seg_kmem.h>
765648Ssetje #include <vm/seg_map.h>
775648Ssetje 
785648Ssetje #include <fs/fs_subr.h>
795648Ssetje 
805648Ssetje /*
815648Ssetje  * dcfs - A filesystem for automatic decompressing of fiocompressed files
825648Ssetje  *
835648Ssetje  * This filesystem is a layered filesystem that sits on top of a normal
845648Ssetje  * persistent filesystem and provides automatic decompression of files
855648Ssetje  * that have been previously compressed and stored on the host file system.
865648Ssetje  * This is a pseudo filesystem in that it does not persist data, rather it
875648Ssetje  * intercepts file lookup requests on the host filesystem and provides
885648Ssetje  * transparent decompression of those files. Currently the only supported
895648Ssetje  * host filesystem is ufs.
905648Ssetje  *
915648Ssetje  * A file is compressed via a userland utility (currently cmd/boot/fiocompress)
925648Ssetje  * and marked by fiocompress as a compressed file via a flag in the on-disk
935648Ssetje  * inode (set via a ufs ioctl() - see `ufs_vnops.c`ufs_ioctl()`_FIO_COMPRESSED
945648Ssetje  * ufs_lookup checks for this flag and if set, passes control to decompvp
955648Ssetje  * a function defined in this (dcfs) filesystem. decomvp uncompresses the file
965648Ssetje  * and returns a dcfs vnode to the VFS layer.
975648Ssetje  *
985648Ssetje  * dcfs is layered on top of ufs and passes requests involving persistence
995648Ssetje  * to the underlying ufs filesystem. The compressed files currently cannot be
1005648Ssetje  * written to.
1015648Ssetje  */
1025648Ssetje 
1035648Ssetje 
1045648Ssetje /*
1055648Ssetje  * Define data structures within this file.
1065648Ssetje  */
1075648Ssetje #define	DCSHFT		5
1085648Ssetje #define	DCTABLESIZE	16
1095648Ssetje 
1105648Ssetje #if ((DCTABLESIZE & (DCTABLESIZE - 1)) == 0)
1115648Ssetje #define	DCHASH(vp) (((uintptr_t)(vp) >> DCSHFT) & (DCTABLESIZE - 1))
1125648Ssetje #else
1135648Ssetje #define	DCHASH(vp) (((uintptr_t)(vp) >> DCSHFT) % DTABLESIZEC)
1145648Ssetje #endif
1155648Ssetje 
1165648Ssetje #define	DCLRUSIZE	16
1175648Ssetje 
1185648Ssetje #define	DCCACHESIZE	4
1195648Ssetje 
1205648Ssetje #define	rounddown(x, y)	((x) & ~((y) - 1))
1215648Ssetje 
1225648Ssetje struct dcnode	*dctable[DCTABLESIZE];
1235648Ssetje 
1245648Ssetje struct dcnode	*dclru;
1255648Ssetje static int	dclru_len;
1265648Ssetje 
1275648Ssetje kmutex_t	dctable_lock;
1285648Ssetje 
1295648Ssetje dev_t		dcdev;
1305648Ssetje struct vfs	dc_vfs;
1315648Ssetje 
1325648Ssetje struct kmem_cache *dcnode_cache;
1335648Ssetje struct kmem_cache *dcbuf_cache[DCCACHESIZE];
1345648Ssetje 
1355648Ssetje kmutex_t	dccache_lock;
1365648Ssetje 
1375648Ssetje static int dcinit(int, char *);
1385648Ssetje 
1395648Ssetje static struct dcnode	*dcnode_alloc(void);
1405648Ssetje static void		dcnode_free(struct dcnode *);
1415648Ssetje static void		dcnode_recycle(struct dcnode *);
1425648Ssetje 
1435648Ssetje static void		dcinsert(struct dcnode *);
1445648Ssetje static void		dcdelete(struct dcnode *);
1455648Ssetje static struct dcnode	*dcfind(struct vnode *);
1465648Ssetje static void		dclru_add(struct dcnode *);
1475648Ssetje static void		dclru_sub(struct dcnode *);
1485648Ssetje 
1495648Ssetje 
1505648Ssetje /*
1515648Ssetje  * This is the loadable module wrapper.
1525648Ssetje  */
1535648Ssetje #include <sys/modctl.h>
1545648Ssetje 
1555648Ssetje struct vfsops *dc_vfsops;
1565648Ssetje 
1575648Ssetje static vfsdef_t vfw = {
1585648Ssetje 	VFSDEF_VERSION,
1595648Ssetje 	"dcfs",
1605648Ssetje 	dcinit,
16112633Sjohn.levon@sun.com 	VSW_ZMOUNT,
1625648Ssetje 	NULL
1635648Ssetje };
1645648Ssetje 
1655648Ssetje /*
1665648Ssetje  * Module linkage information for the kernel.
1675648Ssetje  */
1685648Ssetje extern struct mod_ops mod_fsops;
1695648Ssetje 
1705648Ssetje static struct modlfs modlfs = {
1715648Ssetje 	&mod_fsops, "compressed filesystem", &vfw
1725648Ssetje };
1735648Ssetje 
1745648Ssetje static struct modlinkage modlinkage = {
1755648Ssetje 	MODREV_1, (void *)&modlfs, NULL
1765648Ssetje };
1775648Ssetje 
1785648Ssetje int
_init()1795648Ssetje _init()
1805648Ssetje {
1815648Ssetje 	return (mod_install(&modlinkage));
1825648Ssetje }
1835648Ssetje 
1845648Ssetje int
_info(struct modinfo * modinfop)1855648Ssetje _info(struct modinfo *modinfop)
1865648Ssetje {
1875648Ssetje 	return (mod_info(&modlinkage, modinfop));
1885648Ssetje }
1895648Ssetje 
1905648Ssetje 
1915648Ssetje static int dc_open(struct vnode **, int, struct cred *, caller_context_t *);
1925648Ssetje static int dc_close(struct vnode *, int, int, offset_t,
1935648Ssetje     struct cred *, caller_context_t *);
1945648Ssetje static int dc_read(struct vnode *, struct uio *, int, struct cred *,
1955648Ssetje     struct caller_context *);
1965648Ssetje static int dc_getattr(struct vnode *, struct vattr *, int,
1975648Ssetje     struct cred *, caller_context_t *);
1985648Ssetje static int dc_setattr(struct vnode *, struct vattr *, int, struct cred *,
1995648Ssetje     struct caller_context *);
2005648Ssetje static int dc_access(struct vnode *, int, int,
2015648Ssetje     struct cred *, caller_context_t *);
2025648Ssetje static int dc_fsync(struct vnode *, int, struct cred *, caller_context_t *);
2035648Ssetje static void dc_inactive(struct vnode *, struct cred *, caller_context_t *);
2045648Ssetje static int dc_fid(struct vnode *, struct fid *, caller_context_t *);
2055648Ssetje static int dc_seek(struct vnode *, offset_t, offset_t *, caller_context_t *);
2065648Ssetje static int dc_frlock(struct vnode *, int, struct flock64 *, int, offset_t,
2075648Ssetje     struct flk_callback *, struct cred *, caller_context_t *);
20812729SAndrew.Balfour@Sun.COM static int dc_realvp(struct vnode *, struct vnode **, caller_context_t *);
2095648Ssetje static int dc_getpage(struct vnode *, offset_t, size_t, uint_t *,
2105648Ssetje     struct page **, size_t, struct seg *, caddr_t, enum seg_rw,
2115648Ssetje     struct cred *, caller_context_t *);
2125648Ssetje static int dc_putpage(struct vnode *, offset_t, size_t, int,
2135648Ssetje     struct cred *, caller_context_t *);
2145648Ssetje static int dc_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
2155648Ssetje     uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
2165648Ssetje static int dc_addmap(struct vnode *, offset_t, struct as *, caddr_t, size_t,
2175648Ssetje     uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
2185648Ssetje static int dc_delmap(struct vnode *, offset_t, struct as *, caddr_t, size_t,
2195648Ssetje     uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
2205648Ssetje 
2215648Ssetje struct vnodeops *dc_vnodeops;
2225648Ssetje 
2235648Ssetje const fs_operation_def_t dc_vnodeops_template[] = {
2245648Ssetje 	VOPNAME_OPEN,			{ .vop_open = dc_open },
2255648Ssetje 	VOPNAME_CLOSE,			{ .vop_close = dc_close },
2265648Ssetje 	VOPNAME_READ,			{ .vop_read = dc_read },
2275648Ssetje 	VOPNAME_GETATTR,		{ .vop_getattr =  dc_getattr },
2285648Ssetje 	VOPNAME_SETATTR,		{ .vop_setattr = dc_setattr },
2295648Ssetje 	VOPNAME_ACCESS,			{ .vop_access = dc_access },
2305648Ssetje 	VOPNAME_FSYNC,			{ .vop_fsync = dc_fsync },
2315648Ssetje 	VOPNAME_INACTIVE,		{ .vop_inactive = dc_inactive },
2325648Ssetje 	VOPNAME_FID,			{ .vop_fid = dc_fid },
2335648Ssetje 	VOPNAME_SEEK,			{ .vop_seek = dc_seek },
2345648Ssetje 	VOPNAME_FRLOCK,			{ .vop_frlock = dc_frlock },
23512729SAndrew.Balfour@Sun.COM 	VOPNAME_REALVP,			{ .vop_realvp = dc_realvp },
2365648Ssetje 	VOPNAME_GETPAGE,		{ .vop_getpage = dc_getpage },
2375648Ssetje 	VOPNAME_PUTPAGE,		{ .vop_putpage = dc_putpage },
2385648Ssetje 	VOPNAME_MAP,			{ .vop_map = dc_map },
2395648Ssetje 	VOPNAME_ADDMAP,			{ .vop_addmap = dc_addmap },
2405648Ssetje 	VOPNAME_DELMAP,			{ .vop_delmap = dc_delmap },
2415648Ssetje 	NULL,				NULL
2425648Ssetje };
2435648Ssetje 
2445648Ssetje /*ARGSUSED*/
2455648Ssetje static int
dc_open(struct vnode ** vpp,int flag,struct cred * cr,caller_context_t * ctp)2465648Ssetje dc_open(struct vnode **vpp, int flag, struct cred *cr, caller_context_t *ctp)
2475648Ssetje {
2485648Ssetje 	return (0);
2495648Ssetje }
2505648Ssetje 
2515648Ssetje /*ARGSUSED*/
2525648Ssetje static int
dc_close(struct vnode * vp,int flag,int count,offset_t off,struct cred * cr,caller_context_t * ctp)2535648Ssetje dc_close(struct vnode *vp, int flag, int count, offset_t off,
2545648Ssetje     struct cred *cr, caller_context_t *ctp)
2555648Ssetje {
2565648Ssetje 	(void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
2575648Ssetje 	cleanshares(vp, ttoproc(curthread)->p_pid);
2585648Ssetje 	return (0);
2595648Ssetje }
2605648Ssetje 
2615648Ssetje /*ARGSUSED*/
2625648Ssetje static int
dc_read(struct vnode * vp,struct uio * uiop,int ioflag,struct cred * cr,struct caller_context * ct)2635648Ssetje dc_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
2645648Ssetje 	struct caller_context *ct)
2655648Ssetje {
2665648Ssetje 	struct dcnode *dp = VTODC(vp);
2675648Ssetje 	size_t rdsize = MAX(MAXBSIZE, dp->dc_hdr->ch_blksize);
2685648Ssetje 	size_t fsize = dp->dc_hdr->ch_fsize;
2695648Ssetje 	int error;
2705648Ssetje 
2715648Ssetje 	/*
2725648Ssetje 	 * Loop through file with segmap, decompression will occur
2735648Ssetje 	 * in dc_getapage
2745648Ssetje 	 */
2755648Ssetje 	do {
2765648Ssetje 		caddr_t base;
2775648Ssetje 		size_t n;
2785648Ssetje 		offset_t mapon;
2795648Ssetje 
2805648Ssetje 		/*
2815648Ssetje 		 * read to end of block or file
2825648Ssetje 		 */
2835648Ssetje 		mapon = uiop->uio_loffset & (rdsize - 1);
2845648Ssetje 		n = MIN(rdsize - mapon, uiop->uio_resid);
2855648Ssetje 		n = MIN(n, fsize - uiop->uio_loffset);
2865648Ssetje 		if (n == 0)
2875648Ssetje 			return (0);	/* at EOF */
2885648Ssetje 
2895648Ssetje 		base = segmap_getmapflt(segkmap, vp, uiop->uio_loffset, n, 1,
2905648Ssetje 		    S_READ);
2915648Ssetje 		error = uiomove(base + mapon, n, UIO_READ, uiop);
2925648Ssetje 		if (!error) {
2935648Ssetje 			uint_t flags;
2945648Ssetje 
2955648Ssetje 			if (n + mapon == rdsize || uiop->uio_loffset == fsize)
2965648Ssetje 				flags = SM_DONTNEED;
2975648Ssetje 			else
2985648Ssetje 				flags = 0;
2995648Ssetje 			error = segmap_release(segkmap, base, flags);
3005648Ssetje 		} else
3015648Ssetje 			(void) segmap_release(segkmap, base, 0);
3025648Ssetje 	} while (!error && uiop->uio_resid);
3035648Ssetje 
3045648Ssetje 	return (error);
3055648Ssetje }
3065648Ssetje 
3075648Ssetje static int
dc_getattr(struct vnode * vp,struct vattr * vap,int flags,cred_t * cred,caller_context_t * ctp)3085648Ssetje dc_getattr(struct vnode *vp, struct vattr *vap, int flags,
3095648Ssetje     cred_t *cred, caller_context_t *ctp)
3105648Ssetje {
3115648Ssetje 	struct dcnode *dp = VTODC(vp);
3125648Ssetje 	struct vnode *subvp = dp->dc_subvp;
3135648Ssetje 	int error;
3145648Ssetje 
3155648Ssetje 	error = VOP_GETATTR(subvp, vap, flags, cred, ctp);
3165648Ssetje 
3175648Ssetje 	/* substitute uncompressed size */
3185648Ssetje 	vap->va_size = dp->dc_hdr->ch_fsize;
3195648Ssetje 	return (error);
3205648Ssetje }
3215648Ssetje 
3225648Ssetje static int
dc_setattr(struct vnode * vp,struct vattr * vap,int flags,cred_t * cred,caller_context_t * ctp)3235648Ssetje dc_setattr(struct vnode *vp, struct vattr *vap, int flags, cred_t *cred,
3245648Ssetje     caller_context_t *ctp)
3255648Ssetje {
3265648Ssetje 	struct dcnode *dp = VTODC(vp);
3275648Ssetje 	struct vnode *subvp = dp->dc_subvp;
3285648Ssetje 
3295648Ssetje 	return (VOP_SETATTR(subvp, vap, flags, cred, ctp));
3305648Ssetje }
3315648Ssetje 
3325648Ssetje static int
dc_access(struct vnode * vp,int mode,int flags,cred_t * cred,caller_context_t * ctp)3335648Ssetje dc_access(struct vnode *vp, int mode, int flags,
3345648Ssetje     cred_t *cred, caller_context_t *ctp)
3355648Ssetje {
3365648Ssetje 	struct dcnode *dp = VTODC(vp);
3375648Ssetje 	struct vnode *subvp = dp->dc_subvp;
3385648Ssetje 
3395648Ssetje 	return (VOP_ACCESS(subvp, mode, flags, cred, ctp));
3405648Ssetje }
3415648Ssetje 
3425648Ssetje /*ARGSUSED*/
3435648Ssetje static int
dc_fsync(vnode_t * vp,int syncflag,cred_t * cred,caller_context_t * ctp)3445648Ssetje dc_fsync(vnode_t *vp, int syncflag, cred_t *cred, caller_context_t *ctp)
3455648Ssetje {
3465648Ssetje 	return (0);
3475648Ssetje }
3485648Ssetje 
3495648Ssetje /*ARGSUSED*/
3505648Ssetje static void
dc_inactive(struct vnode * vp,cred_t * cr,caller_context_t * ctp)3515648Ssetje dc_inactive(struct vnode *vp, cred_t *cr, caller_context_t *ctp)
3525648Ssetje {
3535648Ssetje 	struct dcnode *dp = VTODC(vp);
3545648Ssetje 
3555648Ssetje 	mutex_enter(&dctable_lock);
3565648Ssetje 	mutex_enter(&vp->v_lock);
3575648Ssetje 	ASSERT(vp->v_count >= 1);
3585648Ssetje 	if (--vp->v_count != 0) {
3595648Ssetje 		/*
3605648Ssetje 		 * Somebody accessed the dcnode before we got a chance to
3615648Ssetje 		 * remove it.  They will remove it when they do a vn_rele.
3625648Ssetje 		 */
3635648Ssetje 		mutex_exit(&vp->v_lock);
3645648Ssetje 		mutex_exit(&dctable_lock);
3655648Ssetje 		return;
3665648Ssetje 	}
3675648Ssetje 	mutex_exit(&vp->v_lock);
3685648Ssetje 
3695648Ssetje 	dcnode_free(dp);
3705648Ssetje 
3715648Ssetje 	mutex_exit(&dctable_lock);
3725648Ssetje }
3735648Ssetje 
3745648Ssetje static int
dc_fid(struct vnode * vp,struct fid * fidp,caller_context_t * ctp)3755648Ssetje dc_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ctp)
3765648Ssetje {
3775648Ssetje 	struct dcnode *dp = VTODC(vp);
3785648Ssetje 	struct vnode *subvp = dp->dc_subvp;
3795648Ssetje 
3805648Ssetje 	return (VOP_FID(subvp, fidp, ctp));
3815648Ssetje }
3825648Ssetje 
3835648Ssetje static int
dc_seek(struct vnode * vp,offset_t oof,offset_t * noffp,caller_context_t * ctp)3845648Ssetje dc_seek(struct vnode *vp, offset_t oof, offset_t *noffp, caller_context_t *ctp)
3855648Ssetje {
3865648Ssetje 	struct dcnode *dp = VTODC(vp);
3875648Ssetje 	struct vnode *subvp = dp->dc_subvp;
3885648Ssetje 
3895648Ssetje 	return (VOP_SEEK(subvp, oof, noffp, ctp));
3905648Ssetje }
3915648Ssetje 
3925648Ssetje static int
dc_frlock(struct vnode * vp,int cmd,struct flock64 * bfp,int flag,offset_t offset,struct flk_callback * flk_cbp,cred_t * cr,caller_context_t * ctp)3935648Ssetje dc_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
3945648Ssetje     offset_t offset, struct flk_callback *flk_cbp,
3955648Ssetje     cred_t *cr, caller_context_t *ctp)
3965648Ssetje {
3975648Ssetje 	struct dcnode *dp = VTODC(vp);
398*12874SJohn.Zolnowsky@Sun.COM 	int error;
399*12874SJohn.Zolnowsky@Sun.COM 	struct vattr vattr;
4005648Ssetje 
4015648Ssetje 	/*
4025648Ssetje 	 * If file is being mapped, disallow frlock.
4035648Ssetje 	 */
404*12874SJohn.Zolnowsky@Sun.COM 	vattr.va_mask = AT_MODE;
405*12874SJohn.Zolnowsky@Sun.COM 	if (error = VOP_GETATTR(dp->dc_subvp, &vattr, 0, cr, ctp))
406*12874SJohn.Zolnowsky@Sun.COM 		return (error);
407*12874SJohn.Zolnowsky@Sun.COM 	if (dp->dc_mapcnt > 0 && MANDLOCK(vp, vattr.va_mode))
4085648Ssetje 		return (EAGAIN);
4095648Ssetje 
4105648Ssetje 	return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ctp));
4115648Ssetje }
4125648Ssetje 
4135648Ssetje /*ARGSUSED*/
4145648Ssetje static int
dc_getblock_miss(struct vnode * vp,offset_t off,size_t len,struct page ** ppp,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr)4155648Ssetje dc_getblock_miss(struct vnode *vp, offset_t off, size_t len, struct page **ppp,
4165648Ssetje     struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr)
4175648Ssetje {
4185648Ssetje 	struct dcnode *dp = VTODC(vp);
4195648Ssetje 	struct comphdr *hdr = dp->dc_hdr;
4205648Ssetje 	struct page *pp;
4215648Ssetje 	struct buf *bp;
4225648Ssetje 	caddr_t saddr;
4235648Ssetje 	off_t cblkno;
4245648Ssetje 	size_t rdoff, rdsize, dsize;
4255648Ssetje 	long xlen;
4265648Ssetje 	int error, zerr;
4275648Ssetje 
4285648Ssetje 	ASSERT(len == hdr->ch_blksize);
4295648Ssetje 	/*
4305648Ssetje 	 * Get destination pages and make them addressable
4315648Ssetje 	 */
4325648Ssetje 	pp = page_create_va(vp, off, len, PG_WAIT, seg, addr);
4335648Ssetje 	bp = pageio_setup(pp, len, vp, B_READ);
4345648Ssetje 	bp_mapin(bp);
4355648Ssetje 
4365648Ssetje 	/*
4375648Ssetje 	 * read compressed data from subordinate vnode
4385648Ssetje 	 */
4395648Ssetje 	saddr = kmem_cache_alloc(dp->dc_bufcache, KM_SLEEP);
4405648Ssetje 	cblkno = off / len;
4415648Ssetje 	rdoff = hdr->ch_blkmap[cblkno];
4425648Ssetje 	rdsize = hdr->ch_blkmap[cblkno + 1] - rdoff;
4435648Ssetje 	error = vn_rdwr(UIO_READ, dp->dc_subvp, saddr, rdsize, rdoff,
4445648Ssetje 	    UIO_SYSSPACE, 0, 0, cr, NULL);
4455648Ssetje 	if (error)
4465648Ssetje 		goto cleanup;
4475648Ssetje 
4485648Ssetje 	/*
4495648Ssetje 	 * Uncompress
4505648Ssetje 	 */
4515648Ssetje 	dsize = len;
4525648Ssetje 	zerr = z_uncompress(bp->b_un.b_addr, &dsize, saddr, dp->dc_zmax);
4535648Ssetje 	if (zerr != Z_OK) {
4545648Ssetje 		error = EIO;
4555648Ssetje 		goto cleanup;
4565648Ssetje 	}
4575648Ssetje 
4585648Ssetje 	/*
4595648Ssetje 	 * Handle EOF
4605648Ssetje 	 */
4615648Ssetje 	xlen = hdr->ch_fsize - off;
4625648Ssetje 	if (xlen < len) {
4635648Ssetje 		bzero(bp->b_un.b_addr + xlen, len - xlen);
4645648Ssetje 		if (dsize != xlen)
4655648Ssetje 			error = EIO;
4665648Ssetje 	} else if (dsize != len)
4675648Ssetje 		error = EIO;
4685648Ssetje 
4695648Ssetje 	/*
4705648Ssetje 	 * Clean up
4715648Ssetje 	 */
4725648Ssetje cleanup:
4735648Ssetje 	kmem_cache_free(dp->dc_bufcache, saddr);
4745648Ssetje 	pageio_done(bp);
4755648Ssetje 	*ppp = pp;
4765648Ssetje 	return (error);
4775648Ssetje }
4785648Ssetje 
4795648Ssetje static int
dc_getblock(struct vnode * vp,offset_t off,size_t len,struct page ** ppp,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr)4805648Ssetje dc_getblock(struct vnode *vp, offset_t off, size_t len, struct page **ppp,
4815648Ssetje     struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr)
4825648Ssetje {
4835648Ssetje 	struct page *pp, *plist = NULL;
4845648Ssetje 	offset_t pgoff;
4855648Ssetje 	int rdblk;
4865648Ssetje 
4875648Ssetje 	/*
4885648Ssetje 	 * pvn_read_kluster() doesn't quite do what we want, since it
4895648Ssetje 	 * thinks sub block reads are ok.  Here we always decompress
4905648Ssetje 	 * a full block.
4915648Ssetje 	 */
4925648Ssetje 
4935648Ssetje 	/*
4945648Ssetje 	 * Check page cache
4955648Ssetje 	 */
4965648Ssetje 	rdblk = 0;
4975648Ssetje 	for (pgoff = off; pgoff < off + len; pgoff += PAGESIZE) {
4985648Ssetje 		pp = page_lookup(vp, pgoff, SE_EXCL);
4995648Ssetje 		if (pp == NULL) {
5005648Ssetje 			rdblk = 1;
5015648Ssetje 			break;
5025648Ssetje 		}
5035648Ssetje 		page_io_lock(pp);
5045648Ssetje 		page_add(&plist, pp);
5055648Ssetje 		plist = plist->p_next;
5065648Ssetje 	}
5075648Ssetje 	if (!rdblk) {
5085648Ssetje 		*ppp = plist;
5095648Ssetje 		return (0);	/* all pages in cache */
5105648Ssetje 	}
5115648Ssetje 
5125648Ssetje 	/*
5135648Ssetje 	 * Undo any locks so getblock_miss has an open field
5145648Ssetje 	 */
5155648Ssetje 	if (plist != NULL)
5165648Ssetje 		pvn_io_done(plist);
5175648Ssetje 
5185648Ssetje 	return (dc_getblock_miss(vp, off, len, ppp, seg, addr, rw, cr));
5195648Ssetje }
5205648Ssetje 
52112729SAndrew.Balfour@Sun.COM static int
dc_realvp(vnode_t * vp,vnode_t ** vpp,caller_context_t * ct)52212729SAndrew.Balfour@Sun.COM dc_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
52312729SAndrew.Balfour@Sun.COM {
52412729SAndrew.Balfour@Sun.COM 	struct vnode *rvp;
52512729SAndrew.Balfour@Sun.COM 
52612729SAndrew.Balfour@Sun.COM 	vp = VTODC(vp)->dc_subvp;
52712729SAndrew.Balfour@Sun.COM 	if (VOP_REALVP(vp, &rvp, ct) == 0)
52812729SAndrew.Balfour@Sun.COM 		vp = rvp;
52912729SAndrew.Balfour@Sun.COM 	*vpp = vp;
53012729SAndrew.Balfour@Sun.COM 	return (0);
53112729SAndrew.Balfour@Sun.COM }
53212729SAndrew.Balfour@Sun.COM 
5335648Ssetje /*ARGSUSED10*/
5345648Ssetje static int
dc_getpage(struct vnode * vp,offset_t off,size_t len,uint_t * protp,struct page * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr,caller_context_t * ctp)5355648Ssetje dc_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp,
5365648Ssetje     struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr,
5375648Ssetje     enum seg_rw rw, struct cred *cr, caller_context_t *ctp)
5385648Ssetje {
5395648Ssetje 	struct dcnode *dp = VTODC(vp);
5405648Ssetje 	struct comphdr *hdr = dp->dc_hdr;
5415648Ssetje 	struct page *pp, *plist = NULL;
5425648Ssetje 	caddr_t vp_baddr;
5435648Ssetje 	offset_t vp_boff, vp_bend;
5445648Ssetje 	size_t bsize = hdr->ch_blksize;
5455648Ssetje 	int nblks, error;
5465648Ssetje 
5475648Ssetje 	/* does not support write */
5485648Ssetje 	if (rw == S_WRITE) {
5495648Ssetje 		panic("write attempt on compressed file");
5505648Ssetje 		/*NOTREACHED*/
5515648Ssetje 	}
5525648Ssetje 
5535648Ssetje 	if (protp)
5545648Ssetje 		*protp = PROT_ALL;
5555648Ssetje 	/*
5565648Ssetje 	 * We don't support asynchronous operation at the moment, so
5575648Ssetje 	 * just pretend we did it.  If the pages are ever actually
5585648Ssetje 	 * needed, they'll get brought in then.
5595648Ssetje 	 */
5605648Ssetje 	if (pl == NULL)
5615648Ssetje 		return (0);
5625648Ssetje 
5635648Ssetje 	/*
5645648Ssetje 	 * Calc block start and end offsets
5655648Ssetje 	 */
5665648Ssetje 	vp_boff = rounddown(off, bsize);
5675648Ssetje 	vp_bend = roundup(off + len, bsize);
5685648Ssetje 	vp_baddr = (caddr_t)rounddown((uintptr_t)addr, bsize);
5695648Ssetje 
5705648Ssetje 	nblks = (vp_bend - vp_boff) / bsize;
5715648Ssetje 	while (nblks--) {
5725648Ssetje 		error = dc_getblock(vp, vp_boff, bsize, &pp, seg, vp_baddr,
5735648Ssetje 		    rw, cr);
5745648Ssetje 		page_list_concat(&plist, &pp);
5755648Ssetje 		vp_boff += bsize;
5765648Ssetje 		vp_baddr += bsize;
5775648Ssetje 	}
5785648Ssetje 	if (!error)
5795648Ssetje 		pvn_plist_init(plist, pl, plsz, off, len, rw);
5805648Ssetje 	else
5815648Ssetje 		pvn_read_done(plist, B_ERROR);
5825648Ssetje 	return (error);
5835648Ssetje }
5845648Ssetje 
5855648Ssetje /*
5865648Ssetje  * This function should never be called. We need to have it to pass
5875648Ssetje  * it as an argument to other functions.
5885648Ssetje  */
5895648Ssetje /*ARGSUSED*/
5905648Ssetje static int
dc_putapage(struct vnode * vp,struct page * pp,u_offset_t * offp,size_t * lenp,int flags,struct cred * cr)5915648Ssetje dc_putapage(struct vnode *vp, struct page *pp, u_offset_t *offp, size_t *lenp,
5925648Ssetje     int flags, struct cred *cr)
5935648Ssetje {
5945648Ssetje 	/* should never happen */
5955648Ssetje 	cmn_err(CE_PANIC, "dcfs: dc_putapage: dirty page");
5965648Ssetje 	/*NOTREACHED*/
5975648Ssetje 	return (0);
5985648Ssetje }
5995648Ssetje 
6005648Ssetje 
6015648Ssetje /*
6025648Ssetje  * The only flags we support are B_INVAL, B_FREE and B_DONTNEED.
6035648Ssetje  * B_INVAL is set by:
6045648Ssetje  *
6055648Ssetje  *	1) the MC_SYNC command of memcntl(2) to support the MS_INVALIDATE flag.
6065648Ssetje  *	2) the MC_ADVISE command of memcntl(2) with the MADV_DONTNEED advice
6075648Ssetje  *	   which translates to an MC_SYNC with the MS_INVALIDATE flag.
6085648Ssetje  *
6095648Ssetje  * The B_FREE (as well as the B_DONTNEED) flag is set when the
6105648Ssetje  * MADV_SEQUENTIAL advice has been used. VOP_PUTPAGE is invoked
6115648Ssetje  * from SEGVN to release pages behind a pagefault.
6125648Ssetje  */
6135648Ssetje /*ARGSUSED5*/
6145648Ssetje static int
dc_putpage(struct vnode * vp,offset_t off,size_t len,int flags,struct cred * cr,caller_context_t * ctp)6155648Ssetje dc_putpage(struct vnode *vp, offset_t off, size_t len, int flags,
6165648Ssetje     struct cred *cr, caller_context_t *ctp)
6175648Ssetje {
6185648Ssetje 	int error = 0;
6195648Ssetje 
6205648Ssetje 	if (vp->v_count == 0) {
6215648Ssetje 		panic("dcfs_putpage: bad v_count");
6225648Ssetje 		/*NOTREACHED*/
6235648Ssetje 	}
6245648Ssetje 
6255648Ssetje 	if (vp->v_flag & VNOMAP)
6265648Ssetje 		return (ENOSYS);
6275648Ssetje 
6285648Ssetje 	if (!vn_has_cached_data(vp))	/* no pages mapped */
6295648Ssetje 		return (0);
6305648Ssetje 
6315648Ssetje 	if (len == 0)		/* from 'off' to EOF */
6325648Ssetje 		error = pvn_vplist_dirty(vp, off, dc_putapage, flags, cr);
6335648Ssetje 	else {
6345648Ssetje 		offset_t io_off;
6355648Ssetje 		se_t se = (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED;
6365648Ssetje 
6375648Ssetje 		for (io_off = off; io_off < off + len; io_off += PAGESIZE) {
6385648Ssetje 			page_t *pp;
6395648Ssetje 
6405648Ssetje 			/*
6415648Ssetje 			 * We insist on getting the page only if we are
6425648Ssetje 			 * about to invalidate, free or write it and
6435648Ssetje 			 * the B_ASYNC flag is not set.
6445648Ssetje 			 */
6455648Ssetje 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0))
6465648Ssetje 				pp = page_lookup(vp, io_off, se);
6475648Ssetje 			else
6485648Ssetje 				pp = page_lookup_nowait(vp, io_off, se);
6495648Ssetje 
6505648Ssetje 			if (pp == NULL)
6515648Ssetje 				continue;
6525648Ssetje 			/*
6535648Ssetje 			 * Normally pvn_getdirty() should return 0, which
6545648Ssetje 			 * impies that it has done the job for us.
6555648Ssetje 			 * The shouldn't-happen scenario is when it returns 1.
6565648Ssetje 			 * This means that the page has been modified and
6575648Ssetje 			 * needs to be put back.
6585648Ssetje 			 * Since we can't write to a dcfs compressed file,
6595648Ssetje 			 * we fake a failed I/O and force pvn_write_done()
6605648Ssetje 			 * to destroy the page.
6615648Ssetje 			 */
6625648Ssetje 			if (pvn_getdirty(pp, flags) == 1) {
6635648Ssetje 				cmn_err(CE_NOTE, "dc_putpage: dirty page");
6645648Ssetje 				pvn_write_done(pp, flags |
6655648Ssetje 				    B_ERROR | B_WRITE | B_INVAL | B_FORCE);
6665648Ssetje 			}
6675648Ssetje 		}
6685648Ssetje 	}
6695648Ssetje 	return (error);
6705648Ssetje }
6715648Ssetje 
6725648Ssetje static int
dc_map(struct vnode * vp,offset_t off,struct as * as,caddr_t * addrp,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cred,caller_context_t * ctp)6735648Ssetje dc_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
6745648Ssetje     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
6755648Ssetje     struct cred *cred, caller_context_t *ctp)
6765648Ssetje {
6775648Ssetje 	struct vattr vattr;
6785648Ssetje 	struct segvn_crargs vn_a;
6795648Ssetje 	int error;
6805648Ssetje 
6815648Ssetje 	if (vp->v_flag & VNOMAP)
6825648Ssetje 		return (ENOSYS);
6835648Ssetje 
6845648Ssetje 	if (off < (offset_t)0 || (offset_t)(off + len) < (offset_t)0)
6855648Ssetje 		return (ENXIO);
6865648Ssetje 
6875648Ssetje 	/*
6885648Ssetje 	 * If file is being locked, disallow mapping.
6895648Ssetje 	 */
6905648Ssetje 	if (error = VOP_GETATTR(VTODC(vp)->dc_subvp, &vattr, 0, cred, ctp))
6915648Ssetje 		return (error);
6925648Ssetje 	if (vn_has_mandatory_locks(vp, vattr.va_mode))
6935648Ssetje 		return (EAGAIN);
6945648Ssetje 
6955648Ssetje 	as_rangelock(as);
6965648Ssetje 
6975648Ssetje 	if ((flags & MAP_FIXED) == 0) {
6985648Ssetje 		map_addr(addrp, len, off, 1, flags);
6995648Ssetje 		if (*addrp == NULL) {
7005648Ssetje 			as_rangeunlock(as);
7015648Ssetje 			return (ENOMEM);
7025648Ssetje 		}
7035648Ssetje 	} else {
7045648Ssetje 		/*
7055648Ssetje 		 * User specified address - blow away any previous mappings
7065648Ssetje 		 */
7075648Ssetje 		(void) as_unmap(as, *addrp, len);
7085648Ssetje 	}
7095648Ssetje 
7105648Ssetje 	vn_a.vp = vp;
7115648Ssetje 	vn_a.offset = off;
7125648Ssetje 	vn_a.type = flags & MAP_TYPE;
7135648Ssetje 	vn_a.prot = prot;
7145648Ssetje 	vn_a.maxprot = maxprot;
7155648Ssetje 	vn_a.flags = flags & ~MAP_TYPE;
7165648Ssetje 	vn_a.cred = cred;
7175648Ssetje 	vn_a.amp = NULL;
7185648Ssetje 	vn_a.szc = 0;
7195648Ssetje 	vn_a.lgrp_mem_policy_flags = 0;
7205648Ssetje 
7215648Ssetje 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
7225648Ssetje 	as_rangeunlock(as);
7235648Ssetje 	return (error);
7245648Ssetje }
7255648Ssetje 
7265648Ssetje /*ARGSUSED*/
7275648Ssetje static int
dc_addmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cr,caller_context_t * ctp)7285648Ssetje dc_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
7295648Ssetje     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
7305648Ssetje     struct cred *cr, caller_context_t *ctp)
7315648Ssetje {
7325648Ssetje 	struct dcnode *dp;
7335648Ssetje 
7345648Ssetje 	if (vp->v_flag & VNOMAP)
7355648Ssetje 		return (ENOSYS);
7365648Ssetje 
7375648Ssetje 	dp = VTODC(vp);
7385648Ssetje 	mutex_enter(&dp->dc_lock);
7395648Ssetje 	dp->dc_mapcnt += btopr(len);
7405648Ssetje 	mutex_exit(&dp->dc_lock);
7415648Ssetje 	return (0);
7425648Ssetje }
7435648Ssetje 
7445648Ssetje /*ARGSUSED*/
7455648Ssetje static int
dc_delmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uint_t prot,uint_t maxprot,uint_t flags,struct cred * cr,caller_context_t * ctp)7465648Ssetje dc_delmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
7475648Ssetje     size_t len, uint_t prot, uint_t maxprot, uint_t flags,
7485648Ssetje     struct cred *cr, caller_context_t *ctp)
7495648Ssetje {
7505648Ssetje 	struct dcnode *dp;
7515648Ssetje 
7525648Ssetje 	if (vp->v_flag & VNOMAP)
7535648Ssetje 		return (ENOSYS);
7545648Ssetje 
7555648Ssetje 	dp = VTODC(vp);
7565648Ssetje 	mutex_enter(&dp->dc_lock);
7575648Ssetje 	dp->dc_mapcnt -= btopr(len);
7585648Ssetje 	ASSERT(dp->dc_mapcnt >= 0);
7595648Ssetje 	mutex_exit(&dp->dc_lock);
7605648Ssetje 	return (0);
7615648Ssetje }
7625648Ssetje 
7635648Ssetje /*
7645648Ssetje  * Constructor/destructor routines for dcnodes
7655648Ssetje  */
7665648Ssetje /*ARGSUSED1*/
7675648Ssetje static int
dcnode_constructor(void * buf,void * cdrarg,int kmflags)7685648Ssetje dcnode_constructor(void *buf, void *cdrarg, int kmflags)
7695648Ssetje {
7705648Ssetje 	struct dcnode *dp = buf;
7715648Ssetje 	struct vnode *vp;
7725648Ssetje 
7736754Stomee 	vp = dp->dc_vp = vn_alloc(kmflags);
7746754Stomee 	if (vp == NULL) {
7756754Stomee 		return (-1);
7766754Stomee 	}
7776754Stomee 	vp->v_data = dp;
7785648Ssetje 	vp->v_type = VREG;
7795648Ssetje 	vp->v_flag = VNOSWAP;
7805648Ssetje 	vp->v_vfsp = &dc_vfs;
7815648Ssetje 	vn_setops(vp, dc_vnodeops);
7825648Ssetje 	vn_exists(vp);
7835648Ssetje 
7845648Ssetje 	mutex_init(&dp->dc_lock, NULL, MUTEX_DEFAULT, NULL);
7855648Ssetje 	dp->dc_mapcnt = 0;
7865648Ssetje 	dp->dc_lrunext = dp->dc_lruprev = NULL;
7876754Stomee 	dp->dc_hdr = NULL;
7886754Stomee 	dp->dc_subvp = NULL;
7895648Ssetje 	return (0);
7905648Ssetje }
7915648Ssetje 
7925648Ssetje /*ARGSUSED*/
7935648Ssetje static void
dcnode_destructor(void * buf,void * cdrarg)7945648Ssetje dcnode_destructor(void *buf, void *cdrarg)
7955648Ssetje {
7965648Ssetje 	struct dcnode *dp = buf;
7975648Ssetje 	struct vnode *vp = DCTOV(dp);
7985648Ssetje 
7995648Ssetje 	mutex_destroy(&dp->dc_lock);
8005648Ssetje 
8015648Ssetje 	VERIFY(dp->dc_hdr == NULL);
8025648Ssetje 	VERIFY(dp->dc_subvp == NULL);
8035648Ssetje 	vn_invalid(vp);
8045648Ssetje 	vn_free(vp);
8055648Ssetje }
8065648Ssetje 
8075648Ssetje static struct dcnode *
dcnode_alloc(void)8085648Ssetje dcnode_alloc(void)
8095648Ssetje {
8105648Ssetje 	struct dcnode *dp;
8115648Ssetje 
8125648Ssetje 	/*
8135648Ssetje 	 * If the free list is above DCLRUSIZE
8145648Ssetje 	 * re-use one from it
8155648Ssetje 	 */
8165648Ssetje 	mutex_enter(&dctable_lock);
8175648Ssetje 	if (dclru_len < DCLRUSIZE) {
8185648Ssetje 		mutex_exit(&dctable_lock);
8195648Ssetje 		dp = kmem_cache_alloc(dcnode_cache, KM_SLEEP);
8205648Ssetje 	} else {
8215648Ssetje 		ASSERT(dclru != NULL);
8225648Ssetje 		dp = dclru;
8235648Ssetje 		dclru_sub(dp);
8245648Ssetje 		dcdelete(dp);
8255648Ssetje 		mutex_exit(&dctable_lock);
8265648Ssetje 		dcnode_recycle(dp);
8275648Ssetje 	}
8285648Ssetje 	return (dp);
8295648Ssetje }
8305648Ssetje 
8315648Ssetje static void
dcnode_free(struct dcnode * dp)8325648Ssetje dcnode_free(struct dcnode *dp)
8335648Ssetje {
8345648Ssetje 	struct vnode *vp = DCTOV(dp);
8355648Ssetje 
8365648Ssetje 	ASSERT(MUTEX_HELD(&dctable_lock));
8375648Ssetje 
8385648Ssetje 	/*
8395648Ssetje 	 * If no cached pages, no need to put it on lru
8405648Ssetje 	 */
8415648Ssetje 	if (!vn_has_cached_data(vp)) {
8425648Ssetje 		dcdelete(dp);
8435648Ssetje 		dcnode_recycle(dp);
8445648Ssetje 		kmem_cache_free(dcnode_cache, dp);
8455648Ssetje 		return;
8465648Ssetje 	}
8475648Ssetje 
8485648Ssetje 	/*
8495648Ssetje 	 * Add to lru, if it's over the limit, free from head
8505648Ssetje 	 */
8515648Ssetje 	dclru_add(dp);
8525648Ssetje 	if (dclru_len > DCLRUSIZE) {
8535648Ssetje 		dp = dclru;
8545648Ssetje 		dclru_sub(dp);
8555648Ssetje 		dcdelete(dp);
8565648Ssetje 		dcnode_recycle(dp);
8575648Ssetje 		kmem_cache_free(dcnode_cache, dp);
8585648Ssetje 	}
8595648Ssetje }
8605648Ssetje 
8615648Ssetje static void
dcnode_recycle(struct dcnode * dp)8625648Ssetje dcnode_recycle(struct dcnode *dp)
8635648Ssetje {
8645648Ssetje 	struct vnode *vp;
8655648Ssetje 
8665648Ssetje 	vp = DCTOV(dp);
8675648Ssetje 
8685648Ssetje 	VN_RELE(dp->dc_subvp);
8695648Ssetje 	dp->dc_subvp = NULL;
8705648Ssetje 	(void) pvn_vplist_dirty(vp, 0, dc_putapage, B_INVAL, NULL);
8715648Ssetje 	kmem_free(dp->dc_hdr, dp->dc_hdrsize);
8725648Ssetje 	dp->dc_hdr = NULL;
8735648Ssetje 	dp->dc_hdrsize = dp->dc_zmax = 0;
8745648Ssetje 	dp->dc_bufcache = NULL;
8755648Ssetje 	dp->dc_mapcnt = 0;
8765648Ssetje 	vn_reinit(vp);
8775648Ssetje 	vp->v_type = VREG;
8785648Ssetje 	vp->v_flag = VNOSWAP;
8795648Ssetje 	vp->v_vfsp = &dc_vfs;
8805648Ssetje }
8815648Ssetje 
8825648Ssetje static int
dcinit(int fstype,char * name)8835648Ssetje dcinit(int fstype, char *name)
8845648Ssetje {
8855648Ssetje 	static const fs_operation_def_t dc_vfsops_template[] = {
8865648Ssetje 		NULL, NULL
8875648Ssetje 	};
8885648Ssetje 	int error;
8895648Ssetje 	major_t dev;
8905648Ssetje 
8915648Ssetje 	error = vfs_setfsops(fstype, dc_vfsops_template, &dc_vfsops);
8925648Ssetje 	if (error) {
8935648Ssetje 		cmn_err(CE_WARN, "dcinit: bad vfs ops template");
8945648Ssetje 		return (error);
8955648Ssetje 	}
8965648Ssetje 	VFS_INIT(&dc_vfs, dc_vfsops, NULL);
8975648Ssetje 	dc_vfs.vfs_flag = VFS_RDONLY;
8985648Ssetje 	dc_vfs.vfs_fstype = fstype;
8995648Ssetje 	if ((dev = getudev()) == (major_t)-1)
9005648Ssetje 		dev = 0;
9015648Ssetje 	dcdev = makedevice(dev, 0);
9025648Ssetje 	dc_vfs.vfs_dev = dcdev;
9035648Ssetje 
9045648Ssetje 	error = vn_make_ops(name, dc_vnodeops_template, &dc_vnodeops);
9055648Ssetje 	if (error != 0) {
9065648Ssetje 		(void) vfs_freevfsops_by_type(fstype);
9075648Ssetje 		cmn_err(CE_WARN, "dcinit: bad vnode ops template");
9085648Ssetje 		return (error);
9095648Ssetje 	}
9105648Ssetje 
9115648Ssetje 	mutex_init(&dctable_lock, NULL, MUTEX_DEFAULT, NULL);
9125648Ssetje 	mutex_init(&dccache_lock, NULL, MUTEX_DEFAULT, NULL);
9135648Ssetje 	dcnode_cache = kmem_cache_create("dcnode_cache", sizeof (struct dcnode),
9145648Ssetje 	    0, dcnode_constructor, dcnode_destructor, NULL, NULL, NULL, 0);
9155648Ssetje 
9165648Ssetje 	return (0);
9175648Ssetje }
9185648Ssetje 
9195648Ssetje /*
9205648Ssetje  * Return shadow vnode with the given vp as its subordinate
9215648Ssetje  */
9225648Ssetje struct vnode *
decompvp(struct vnode * vp,cred_t * cred,caller_context_t * ctp)9235648Ssetje decompvp(struct vnode *vp, cred_t *cred, caller_context_t *ctp)
9245648Ssetje {
9255648Ssetje 	struct dcnode *dp, *ndp;
9265648Ssetje 	struct comphdr thdr, *hdr;
9275648Ssetje 	struct kmem_cache **cpp;
9285648Ssetje 	struct vattr vattr;
9295648Ssetje 	size_t hdrsize, bsize;
9305648Ssetje 	int error;
9315648Ssetje 
9325648Ssetje 	/*
9335648Ssetje 	 * See if we have an existing shadow
9345648Ssetje 	 * If none, we have to manufacture one
9355648Ssetje 	 */
9365648Ssetje 	mutex_enter(&dctable_lock);
9375648Ssetje 	dp = dcfind(vp);
9385648Ssetje 	mutex_exit(&dctable_lock);
9395648Ssetje 	if (dp != NULL)
9405648Ssetje 		return (DCTOV(dp));
9415648Ssetje 
9425648Ssetje 	/*
9435648Ssetje 	 * Make sure it's a valid compressed file
9445648Ssetje 	 */
9455648Ssetje 	hdr = &thdr;
9465648Ssetje 	error = vn_rdwr(UIO_READ, vp, (caddr_t)hdr, sizeof (struct comphdr), 0,
9475648Ssetje 	    UIO_SYSSPACE, 0, 0, cred, NULL);
9487858SKrishnendu.Sadhukhan@Sun.COM 	if (error || hdr->ch_magic != CH_MAGIC_ZLIB ||
9495648Ssetje 	    hdr->ch_version != CH_VERSION || hdr->ch_algorithm != CH_ALG_ZLIB ||
9505648Ssetje 	    hdr->ch_fsize == 0 || hdr->ch_blksize < PAGESIZE ||
9515648Ssetje 	    hdr->ch_blksize > ptob(DCCACHESIZE) ||
9525648Ssetje 	    (hdr->ch_blksize & (hdr->ch_blksize - 1)) != 0)
9535648Ssetje 		return (NULL);
9545648Ssetje 
9555648Ssetje 	/* get underlying file size */
9565648Ssetje 	if (VOP_GETATTR(vp, &vattr, 0, cred, ctp) != 0)
9575648Ssetje 		return (NULL);
9585648Ssetje 
9595648Ssetje 	/*
9605648Ssetje 	 * Re-read entire header
9615648Ssetje 	 */
9625648Ssetje 	hdrsize = hdr->ch_blkmap[0] + sizeof (uint64_t);
9635648Ssetje 	hdr = kmem_alloc(hdrsize, KM_SLEEP);
9645648Ssetje 	error = vn_rdwr(UIO_READ, vp, (caddr_t)hdr, hdrsize, 0, UIO_SYSSPACE,
9655648Ssetje 	    0, 0, cred, NULL);
9665648Ssetje 	if (error) {
9675648Ssetje 		kmem_free(hdr, hdrsize);
9685648Ssetje 		return (NULL);
9695648Ssetje 	}
9705648Ssetje 
9715648Ssetje 	/*
9725648Ssetje 	 * add extra blkmap entry to make dc_getblock()'s
9735648Ssetje 	 * life easier
9745648Ssetje 	 */
9755648Ssetje 	bsize = hdr->ch_blksize;
9765648Ssetje 	hdr->ch_blkmap[((hdr->ch_fsize-1) / bsize) + 1] = vattr.va_size;
9775648Ssetje 
9785648Ssetje 	ndp = dcnode_alloc();
9795648Ssetje 	ndp->dc_subvp = vp;
9805648Ssetje 	VN_HOLD(vp);
9815648Ssetje 	ndp->dc_hdr = hdr;
9825648Ssetje 	ndp->dc_hdrsize = hdrsize;
9835648Ssetje 
9845648Ssetje 	/*
9855648Ssetje 	 * Allocate kmem cache if none there already
9865648Ssetje 	 */
9875648Ssetje 	ndp->dc_zmax = ZMAXBUF(bsize);
9885648Ssetje 	cpp = &dcbuf_cache[btop(bsize)];
9895648Ssetje 	mutex_enter(&dccache_lock);
9905648Ssetje 	if (*cpp == NULL)
9915648Ssetje 		*cpp = kmem_cache_create("dcbuf_cache", ndp->dc_zmax, 0, NULL,
9925648Ssetje 		    NULL, NULL, NULL, NULL, 0);
9935648Ssetje 	mutex_exit(&dccache_lock);
9945648Ssetje 	ndp->dc_bufcache = *cpp;
9955648Ssetje 
9965648Ssetje 	/*
9975648Ssetje 	 * Recheck table in case someone else created shadow
9985648Ssetje 	 * while we were blocked above.
9995648Ssetje 	 */
10005648Ssetje 	mutex_enter(&dctable_lock);
10015648Ssetje 	dp = dcfind(vp);
10025648Ssetje 	if (dp != NULL) {
10035648Ssetje 		mutex_exit(&dctable_lock);
10045648Ssetje 		dcnode_recycle(ndp);
10055648Ssetje 		kmem_cache_free(dcnode_cache, ndp);
10065648Ssetje 		return (DCTOV(dp));
10075648Ssetje 	}
10085648Ssetje 	dcinsert(ndp);
10095648Ssetje 	mutex_exit(&dctable_lock);
10105648Ssetje 
10115648Ssetje 	return (DCTOV(ndp));
10125648Ssetje }
10135648Ssetje 
10145648Ssetje 
10155648Ssetje /*
10165648Ssetje  * dcnode lookup table
10175648Ssetje  * These routines maintain a table of dcnodes hashed by their
10185648Ssetje  * subordinate vnode so that they can be found if they already
10195648Ssetje  * exist in the vnode cache
10205648Ssetje  */
10215648Ssetje 
10225648Ssetje /*
10235648Ssetje  * Put a dcnode in the table.
10245648Ssetje  */
10255648Ssetje static void
dcinsert(struct dcnode * newdp)10265648Ssetje dcinsert(struct dcnode *newdp)
10275648Ssetje {
10285648Ssetje 	int idx = DCHASH(newdp->dc_subvp);
10295648Ssetje 
10305648Ssetje 	ASSERT(MUTEX_HELD(&dctable_lock));
10315648Ssetje 	newdp->dc_hash = dctable[idx];
10325648Ssetje 	dctable[idx] = newdp;
10335648Ssetje }
10345648Ssetje 
10355648Ssetje /*
10365648Ssetje  * Remove a dcnode from the hash table.
10375648Ssetje  */
10385648Ssetje void
dcdelete(struct dcnode * deldp)10395648Ssetje dcdelete(struct dcnode *deldp)
10405648Ssetje {
10415648Ssetje 	int idx = DCHASH(deldp->dc_subvp);
10425648Ssetje 	struct dcnode *dp, *prevdp;
10435648Ssetje 
10445648Ssetje 	ASSERT(MUTEX_HELD(&dctable_lock));
10455648Ssetje 	dp = dctable[idx];
10465648Ssetje 	if (dp == deldp)
10475648Ssetje 		dctable[idx] = dp->dc_hash;
10485648Ssetje 	else {
10495648Ssetje 		for (prevdp = dp, dp = dp->dc_hash; dp != NULL;
10505648Ssetje 		    prevdp = dp, dp = dp->dc_hash) {
10515648Ssetje 			if (dp == deldp) {
10525648Ssetje 				prevdp->dc_hash = dp->dc_hash;
10535648Ssetje 				break;
10545648Ssetje 			}
10555648Ssetje 		}
10565648Ssetje 	}
10575648Ssetje 	ASSERT(dp != NULL);
10585648Ssetje }
10595648Ssetje 
10605648Ssetje /*
10615648Ssetje  * Find a shadow vnode in the dctable hash list.
10625648Ssetje  */
10635648Ssetje static struct dcnode *
dcfind(struct vnode * vp)10645648Ssetje dcfind(struct vnode *vp)
10655648Ssetje {
10665648Ssetje 	struct dcnode *dp;
10675648Ssetje 
10685648Ssetje 	ASSERT(MUTEX_HELD(&dctable_lock));
10695648Ssetje 	for (dp = dctable[DCHASH(vp)]; dp != NULL; dp = dp->dc_hash)
10705648Ssetje 		if (dp->dc_subvp == vp) {
10715648Ssetje 			VN_HOLD(DCTOV(dp));
10725648Ssetje 			if (dp->dc_lrunext)
10735648Ssetje 				dclru_sub(dp);
10745648Ssetje 			return (dp);
10755648Ssetje 		}
10765648Ssetje 	return (NULL);
10775648Ssetje }
10785648Ssetje 
10795648Ssetje #ifdef	DEBUG
10805648Ssetje static int
dclru_count(void)10815648Ssetje dclru_count(void)
10825648Ssetje {
10835648Ssetje 	struct dcnode *dp;
10845648Ssetje 	int i = 0;
10855648Ssetje 
10865648Ssetje 	if (dclru == NULL)
10875648Ssetje 		return (0);
10885648Ssetje 	for (dp = dclru; dp->dc_lrunext != dclru; dp = dp->dc_lrunext)
10895648Ssetje 		i++;
10905648Ssetje 	return (i + 1);
10915648Ssetje }
10925648Ssetje #endif
10935648Ssetje 
10945648Ssetje static void
dclru_add(struct dcnode * dp)10955648Ssetje dclru_add(struct dcnode *dp)
10965648Ssetje {
10975648Ssetje 	/*
10985648Ssetje 	 * Add to dclru as double-link chain
10995648Ssetje 	 */
11005648Ssetje 	ASSERT(MUTEX_HELD(&dctable_lock));
11015648Ssetje 	if (dclru == NULL) {
11025648Ssetje 		dclru = dp;
11035648Ssetje 		dp->dc_lruprev = dp->dc_lrunext = dp;
11045648Ssetje 	} else {
11055648Ssetje 		struct dcnode *last = dclru->dc_lruprev;
11065648Ssetje 
11075648Ssetje 		dclru->dc_lruprev = dp;
11085648Ssetje 		last->dc_lrunext = dp;
11095648Ssetje 		dp->dc_lruprev = last;
11105648Ssetje 		dp->dc_lrunext = dclru;
11115648Ssetje 	}
11125648Ssetje 	dclru_len++;
11135648Ssetje 	ASSERT(dclru_len == dclru_count());
11145648Ssetje }
11155648Ssetje 
11165648Ssetje static void
dclru_sub(struct dcnode * dp)11175648Ssetje dclru_sub(struct dcnode *dp)
11185648Ssetje {
11195648Ssetje 	ASSERT(MUTEX_HELD(&dctable_lock));
11205648Ssetje 	dp->dc_lrunext->dc_lruprev = dp->dc_lruprev;
11215648Ssetje 	dp->dc_lruprev->dc_lrunext = dp->dc_lrunext;
11225648Ssetje 	if (dp == dclru)
11235648Ssetje 		dclru = dp->dc_lrunext == dp ? NULL : dp->dc_lrunext;
11245648Ssetje 	dp->dc_lrunext = dp->dc_lruprev = NULL;
11255648Ssetje 	dclru_len--;
11265648Ssetje 	ASSERT(dclru_len == dclru_count());
11275648Ssetje }
1128