xref: /onnv-gate/usr/src/uts/common/fs/ufs/ufs_inode.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28*0Sstevel@tonic-gate /*	  All Rights Reserved  	*/
29*0Sstevel@tonic-gate 
30*0Sstevel@tonic-gate /*
31*0Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
32*0Sstevel@tonic-gate  * The Regents of the University of California
33*0Sstevel@tonic-gate  * All Rights Reserved
34*0Sstevel@tonic-gate  *
35*0Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
36*0Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
37*0Sstevel@tonic-gate  * contributors.
38*0Sstevel@tonic-gate  */
39*0Sstevel@tonic-gate 
40*0Sstevel@tonic-gate 
41*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
42*0Sstevel@tonic-gate 
43*0Sstevel@tonic-gate #include <sys/types.h>
44*0Sstevel@tonic-gate #include <sys/t_lock.h>
45*0Sstevel@tonic-gate #include <sys/param.h>
46*0Sstevel@tonic-gate #include <sys/systm.h>
47*0Sstevel@tonic-gate #include <sys/uio.h>
48*0Sstevel@tonic-gate #include <sys/bitmap.h>
49*0Sstevel@tonic-gate #include <sys/signal.h>
50*0Sstevel@tonic-gate #include <sys/cred.h>
51*0Sstevel@tonic-gate #include <sys/user.h>
52*0Sstevel@tonic-gate #include <sys/vfs.h>
53*0Sstevel@tonic-gate #include <sys/stat.h>
54*0Sstevel@tonic-gate #include <sys/vnode.h>
55*0Sstevel@tonic-gate #include <sys/buf.h>
56*0Sstevel@tonic-gate #include <sys/proc.h>
57*0Sstevel@tonic-gate #include <sys/disp.h>
58*0Sstevel@tonic-gate #include <sys/dnlc.h>
59*0Sstevel@tonic-gate #include <sys/mode.h>
60*0Sstevel@tonic-gate #include <sys/cmn_err.h>
61*0Sstevel@tonic-gate #include <sys/kstat.h>
62*0Sstevel@tonic-gate #include <sys/acl.h>
63*0Sstevel@tonic-gate #include <sys/var.h>
64*0Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
65*0Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
66*0Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
67*0Sstevel@tonic-gate #include <sys/fs/ufs_acl.h>
68*0Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
69*0Sstevel@tonic-gate #include <sys/fs/ufs_quota.h>
70*0Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
71*0Sstevel@tonic-gate #include <vm/hat.h>
72*0Sstevel@tonic-gate #include <vm/as.h>
73*0Sstevel@tonic-gate #include <vm/pvn.h>
74*0Sstevel@tonic-gate #include <vm/seg.h>
75*0Sstevel@tonic-gate #include <sys/swap.h>
76*0Sstevel@tonic-gate #include <sys/cpuvar.h>
77*0Sstevel@tonic-gate #include <sys/sysmacros.h>
78*0Sstevel@tonic-gate #include <sys/errno.h>
79*0Sstevel@tonic-gate #include <sys/kmem.h>
80*0Sstevel@tonic-gate #include <sys/debug.h>
81*0Sstevel@tonic-gate #include <fs/fs_subr.h>
82*0Sstevel@tonic-gate #include <sys/policy.h>
83*0Sstevel@tonic-gate 
84*0Sstevel@tonic-gate struct kmem_cache *inode_cache;		/* cache of free inodes */
85*0Sstevel@tonic-gate 
86*0Sstevel@tonic-gate /* UFS Inode Cache Stats -- Not protected */
87*0Sstevel@tonic-gate struct	instats ins = {
88*0Sstevel@tonic-gate 	{ "size",		KSTAT_DATA_ULONG },
89*0Sstevel@tonic-gate 	{ "maxsize",		KSTAT_DATA_ULONG },
90*0Sstevel@tonic-gate 	{ "hits",		KSTAT_DATA_ULONG },
91*0Sstevel@tonic-gate 	{ "misses",		KSTAT_DATA_ULONG },
92*0Sstevel@tonic-gate 	{ "kmem allocs",	KSTAT_DATA_ULONG },
93*0Sstevel@tonic-gate 	{ "kmem frees",		KSTAT_DATA_ULONG },
94*0Sstevel@tonic-gate 	{ "maxsize reached",	KSTAT_DATA_ULONG },
95*0Sstevel@tonic-gate 	{ "puts at frontlist",	KSTAT_DATA_ULONG },
96*0Sstevel@tonic-gate 	{ "puts at backlist",	KSTAT_DATA_ULONG },
97*0Sstevel@tonic-gate 	{ "queues to free",	KSTAT_DATA_ULONG },
98*0Sstevel@tonic-gate 	{ "scans",		KSTAT_DATA_ULONG },
99*0Sstevel@tonic-gate 	{ "thread idles",	KSTAT_DATA_ULONG },
100*0Sstevel@tonic-gate 	{ "lookup idles",	KSTAT_DATA_ULONG },
101*0Sstevel@tonic-gate 	{ "vget idles",		KSTAT_DATA_ULONG },
102*0Sstevel@tonic-gate 	{ "cache allocs",	KSTAT_DATA_ULONG },
103*0Sstevel@tonic-gate 	{ "cache frees",	KSTAT_DATA_ULONG },
104*0Sstevel@tonic-gate 	{ "pushes at close",	KSTAT_DATA_ULONG }
105*0Sstevel@tonic-gate };
106*0Sstevel@tonic-gate 
107*0Sstevel@tonic-gate /* kstat data */
108*0Sstevel@tonic-gate static kstat_t		*ufs_inode_kstat = NULL;
109*0Sstevel@tonic-gate 
110*0Sstevel@tonic-gate union ihead *ihead;	/* inode LRU cache, Chris Maltby */
111*0Sstevel@tonic-gate kmutex_t *ih_lock;	/* protect inode cache hash table */
112*0Sstevel@tonic-gate static int ino_hashlen = 4;	/* desired average hash chain length */
113*0Sstevel@tonic-gate int inohsz;		/* number of buckets in the hash table */
114*0Sstevel@tonic-gate 
115*0Sstevel@tonic-gate kmutex_t	ufs_scan_lock;	/* stop racing multiple ufs_scan_inodes() */
116*0Sstevel@tonic-gate kmutex_t	ufs_iuniqtime_lock; /* protect iuniqtime */
117*0Sstevel@tonic-gate kmutex_t	ufsvfs_mutex;
118*0Sstevel@tonic-gate struct ufsvfs	*oldufsvfslist, *ufsvfslist;
119*0Sstevel@tonic-gate 
120*0Sstevel@tonic-gate /*
121*0Sstevel@tonic-gate  * time to wait after ufsvfsp->vfs_iotstamp before declaring that no
122*0Sstevel@tonic-gate  * I/Os are going on.
123*0Sstevel@tonic-gate  */
124*0Sstevel@tonic-gate clock_t	ufs_iowait;
125*0Sstevel@tonic-gate 
126*0Sstevel@tonic-gate /*
127*0Sstevel@tonic-gate  * the threads that process idle inodes and free (deleted) inodes
128*0Sstevel@tonic-gate  * have high water marks that are set in ufsinit().
129*0Sstevel@tonic-gate  * These values but can be no less then the minimum shown below
130*0Sstevel@tonic-gate  */
131*0Sstevel@tonic-gate int	ufs_idle_max;	/* # of allowable idle inodes */
132*0Sstevel@tonic-gate ulong_t	ufs_inode_max;	/* hard limit of allowable idle inodes */
133*0Sstevel@tonic-gate #define	UFS_IDLE_MAX	(16)	/* min # of allowable idle inodes */
134*0Sstevel@tonic-gate 
135*0Sstevel@tonic-gate /*
136*0Sstevel@tonic-gate  * Tunables for ufs write throttling.
137*0Sstevel@tonic-gate  * These are validated in ufs_iinit() since improper settings
138*0Sstevel@tonic-gate  * can lead to filesystem hangs.
139*0Sstevel@tonic-gate  */
140*0Sstevel@tonic-gate #define	UFS_HW_DEFAULT	(16 * 1024 * 1024)
141*0Sstevel@tonic-gate #define	UFS_LW_DEFAULT	(8 * 1024 * 1024)
142*0Sstevel@tonic-gate int	ufs_HW = UFS_HW_DEFAULT;
143*0Sstevel@tonic-gate int	ufs_LW = UFS_LW_DEFAULT;
144*0Sstevel@tonic-gate 
145*0Sstevel@tonic-gate static void ihinit(void);
146*0Sstevel@tonic-gate extern int hash2ints(int, int);
147*0Sstevel@tonic-gate 
148*0Sstevel@tonic-gate static int ufs_iget_internal(struct vfs *, ino_t, struct inode **,
149*0Sstevel@tonic-gate     struct cred *, int);
150*0Sstevel@tonic-gate 
151*0Sstevel@tonic-gate /* ARGSUSED */
152*0Sstevel@tonic-gate static int
153*0Sstevel@tonic-gate ufs_inode_kstat_update(kstat_t *ksp, int rw)
154*0Sstevel@tonic-gate {
155*0Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
156*0Sstevel@tonic-gate 		return (EACCES);
157*0Sstevel@tonic-gate 
158*0Sstevel@tonic-gate 	ins.in_malloc.value.ul	= (ulong_t)kmem_cache_stat(inode_cache,
159*0Sstevel@tonic-gate 	    "slab_alloc");
160*0Sstevel@tonic-gate 	ins.in_mfree.value.ul	= (ulong_t)kmem_cache_stat(inode_cache,
161*0Sstevel@tonic-gate 	    "slab_free");
162*0Sstevel@tonic-gate 	ins.in_kcalloc.value.ul	= (ulong_t)kmem_cache_stat(inode_cache,
163*0Sstevel@tonic-gate 	    "alloc");
164*0Sstevel@tonic-gate 	ins.in_kcfree.value.ul	= (ulong_t)kmem_cache_stat(inode_cache,
165*0Sstevel@tonic-gate 	    "free");
166*0Sstevel@tonic-gate 	ins.in_size.value.ul	= (ulong_t)kmem_cache_stat(inode_cache,
167*0Sstevel@tonic-gate 	    "buf_inuse");
168*0Sstevel@tonic-gate 	ins.in_maxreached.value.ul = (ulong_t)kmem_cache_stat(inode_cache,
169*0Sstevel@tonic-gate 	    "buf_max");
170*0Sstevel@tonic-gate 	ins.in_misses.value.ul = ins.in_kcalloc.value.ul;
171*0Sstevel@tonic-gate 
172*0Sstevel@tonic-gate 	return (0);
173*0Sstevel@tonic-gate }
174*0Sstevel@tonic-gate 
175*0Sstevel@tonic-gate void
176*0Sstevel@tonic-gate ufs_iinit(void)
177*0Sstevel@tonic-gate {
178*0Sstevel@tonic-gate 	/*
179*0Sstevel@tonic-gate 	 * Validate that ufs_HW > ufs_LW.
180*0Sstevel@tonic-gate 	 * The default values for these two tunables have been increased.
181*0Sstevel@tonic-gate 	 * There is now a range of values for ufs_HW that used to be
182*0Sstevel@tonic-gate 	 * legal on previous Solaris versions but no longer is now.
183*0Sstevel@tonic-gate 	 * Upgrading a machine which has an /etc/system setting for ufs_HW
184*0Sstevel@tonic-gate 	 * from that range can lead to filesystem hangs unless the values
185*0Sstevel@tonic-gate 	 * are checked here.
186*0Sstevel@tonic-gate 	 */
187*0Sstevel@tonic-gate 	if (ufs_HW <= ufs_LW) {
188*0Sstevel@tonic-gate 		cmn_err(CE_WARN,
189*0Sstevel@tonic-gate 			    "ufs_HW (%d) <= ufs_LW (%d). Check /etc/system.",
190*0Sstevel@tonic-gate 			    ufs_HW, ufs_LW);
191*0Sstevel@tonic-gate 		ufs_LW = UFS_LW_DEFAULT;
192*0Sstevel@tonic-gate 		ufs_HW = UFS_HW_DEFAULT;
193*0Sstevel@tonic-gate 		cmn_err(CE_CONT, "using defaults, ufs_HW = %d, ufs_LW = %d\n",
194*0Sstevel@tonic-gate 			    ufs_HW, ufs_LW);
195*0Sstevel@tonic-gate 	}
196*0Sstevel@tonic-gate 
197*0Sstevel@tonic-gate 	/*
198*0Sstevel@tonic-gate 	 * Adjust the tunable `ufs_ninode' to a reasonable value
199*0Sstevel@tonic-gate 	 */
200*0Sstevel@tonic-gate 	if (ufs_ninode <= 0)
201*0Sstevel@tonic-gate 		ufs_ninode = ncsize;
202*0Sstevel@tonic-gate 	if (ufs_inode_max == 0)
203*0Sstevel@tonic-gate 		ufs_inode_max = (ulong_t)((kmem_maxavail() >> 2) /
204*0Sstevel@tonic-gate 					sizeof (struct inode));
205*0Sstevel@tonic-gate 	if (ufs_ninode > ufs_inode_max || (ufs_ninode == 0 && ncsize == 0)) {
206*0Sstevel@tonic-gate 		cmn_err(CE_NOTE, "setting ufs_ninode to max value of %ld",
207*0Sstevel@tonic-gate 				ufs_inode_max);
208*0Sstevel@tonic-gate 		ufs_ninode = ufs_inode_max;
209*0Sstevel@tonic-gate 	}
210*0Sstevel@tonic-gate 	/*
211*0Sstevel@tonic-gate 	 * Wait till third call of ufs_update to declare that no I/Os are
212*0Sstevel@tonic-gate 	 * going on. This allows deferred access times to be flushed to disk.
213*0Sstevel@tonic-gate 	 */
214*0Sstevel@tonic-gate 	ufs_iowait = v.v_autoup * hz * 2;
215*0Sstevel@tonic-gate 
216*0Sstevel@tonic-gate 	/*
217*0Sstevel@tonic-gate 	 * idle thread runs when 25% of ufs_ninode entries are on the queue
218*0Sstevel@tonic-gate 	 */
219*0Sstevel@tonic-gate 	if (ufs_idle_max == 0)
220*0Sstevel@tonic-gate 		ufs_idle_max = ufs_ninode >> 2;
221*0Sstevel@tonic-gate 	if (ufs_idle_max < UFS_IDLE_MAX)
222*0Sstevel@tonic-gate 		ufs_idle_max = UFS_IDLE_MAX;
223*0Sstevel@tonic-gate 	if (ufs_idle_max > ufs_ninode)
224*0Sstevel@tonic-gate 		ufs_idle_max = ufs_ninode;
225*0Sstevel@tonic-gate 	/*
226*0Sstevel@tonic-gate 	 * This is really a misnomer, it is ufs_queue_init
227*0Sstevel@tonic-gate 	 */
228*0Sstevel@tonic-gate 	ufs_thread_init(&ufs_idle_q, ufs_idle_max);
229*0Sstevel@tonic-gate 	ufs_thread_start(&ufs_idle_q, ufs_thread_idle, NULL);
230*0Sstevel@tonic-gate 
231*0Sstevel@tonic-gate 	/*
232*0Sstevel@tonic-gate 	 * global hlock thread
233*0Sstevel@tonic-gate 	 */
234*0Sstevel@tonic-gate 	ufs_thread_init(&ufs_hlock, 1);
235*0Sstevel@tonic-gate 	ufs_thread_start(&ufs_hlock, ufs_thread_hlock, NULL);
236*0Sstevel@tonic-gate 
237*0Sstevel@tonic-gate 	ihinit();
238*0Sstevel@tonic-gate 	qtinit();
239*0Sstevel@tonic-gate 	ins.in_maxsize.value.ul = ufs_ninode;
240*0Sstevel@tonic-gate 	if ((ufs_inode_kstat = kstat_create("ufs", 0, "inode_cache", "ufs",
241*0Sstevel@tonic-gate 	    KSTAT_TYPE_NAMED, sizeof (ins) / sizeof (kstat_named_t),
242*0Sstevel@tonic-gate 	    KSTAT_FLAG_VIRTUAL)) != NULL) {
243*0Sstevel@tonic-gate 		ufs_inode_kstat->ks_data = (void *)&ins;
244*0Sstevel@tonic-gate 		ufs_inode_kstat->ks_update = ufs_inode_kstat_update;
245*0Sstevel@tonic-gate 		kstat_install(ufs_inode_kstat);
246*0Sstevel@tonic-gate 	}
247*0Sstevel@tonic-gate 	ufsfx_init();		/* fix-on-panic initialization */
248*0Sstevel@tonic-gate 	si_cache_init();
249*0Sstevel@tonic-gate 	ufs_directio_init();
250*0Sstevel@tonic-gate 	lufs_init();
251*0Sstevel@tonic-gate 	mutex_init(&ufs_iuniqtime_lock, NULL, MUTEX_DEFAULT, NULL);
252*0Sstevel@tonic-gate }
253*0Sstevel@tonic-gate 
254*0Sstevel@tonic-gate /* ARGSUSED */
255*0Sstevel@tonic-gate static int
256*0Sstevel@tonic-gate ufs_inode_cache_constructor(void *buf, void *cdrarg, int kmflags)
257*0Sstevel@tonic-gate {
258*0Sstevel@tonic-gate 	struct inode *ip = buf;
259*0Sstevel@tonic-gate 	struct vnode *vp;
260*0Sstevel@tonic-gate 
261*0Sstevel@tonic-gate 	rw_init(&ip->i_rwlock, NULL, RW_DEFAULT, NULL);
262*0Sstevel@tonic-gate 	rw_init(&ip->i_contents, NULL, RW_DEFAULT, NULL);
263*0Sstevel@tonic-gate 	mutex_init(&ip->i_tlock, NULL, MUTEX_DEFAULT, NULL);
264*0Sstevel@tonic-gate 	dnlc_dir_init(&ip->i_danchor);
265*0Sstevel@tonic-gate 
266*0Sstevel@tonic-gate 	cv_init(&ip->i_wrcv, NULL, CV_DRIVER, NULL);
267*0Sstevel@tonic-gate 
268*0Sstevel@tonic-gate 	vp = vn_alloc(KM_SLEEP);
269*0Sstevel@tonic-gate 	ip->i_vnode = vp;
270*0Sstevel@tonic-gate 
271*0Sstevel@tonic-gate 	vn_setops(vp, ufs_vnodeops);
272*0Sstevel@tonic-gate 	vp->v_data = (caddr_t)ip;
273*0Sstevel@tonic-gate 
274*0Sstevel@tonic-gate 	return (0);
275*0Sstevel@tonic-gate }
276*0Sstevel@tonic-gate 
277*0Sstevel@tonic-gate /* ARGSUSED */
278*0Sstevel@tonic-gate static void
279*0Sstevel@tonic-gate ufs_inode_cache_destructor(void *buf, void *cdrarg)
280*0Sstevel@tonic-gate {
281*0Sstevel@tonic-gate 	struct inode *ip = buf;
282*0Sstevel@tonic-gate 	struct vnode *vp;
283*0Sstevel@tonic-gate 
284*0Sstevel@tonic-gate 	vp = ITOV(ip);
285*0Sstevel@tonic-gate 
286*0Sstevel@tonic-gate 	rw_destroy(&ip->i_rwlock);
287*0Sstevel@tonic-gate 	rw_destroy(&ip->i_contents);
288*0Sstevel@tonic-gate 
289*0Sstevel@tonic-gate 	mutex_destroy(&ip->i_tlock);
290*0Sstevel@tonic-gate 	if (vp->v_type == VDIR) {
291*0Sstevel@tonic-gate 		dnlc_dir_fini(&ip->i_danchor);
292*0Sstevel@tonic-gate 	}
293*0Sstevel@tonic-gate 
294*0Sstevel@tonic-gate 	cv_destroy(&ip->i_wrcv);
295*0Sstevel@tonic-gate 
296*0Sstevel@tonic-gate 	vn_free(vp);
297*0Sstevel@tonic-gate }
298*0Sstevel@tonic-gate 
299*0Sstevel@tonic-gate /*
300*0Sstevel@tonic-gate  * Initialize hash links for inodes
301*0Sstevel@tonic-gate  * and build inode free list.
302*0Sstevel@tonic-gate  */
303*0Sstevel@tonic-gate void
304*0Sstevel@tonic-gate ihinit(void)
305*0Sstevel@tonic-gate {
306*0Sstevel@tonic-gate 	int i;
307*0Sstevel@tonic-gate 	union	ihead *ih = ihead;
308*0Sstevel@tonic-gate 
309*0Sstevel@tonic-gate 	mutex_init(&ufs_scan_lock, NULL, MUTEX_DEFAULT, NULL);
310*0Sstevel@tonic-gate 
311*0Sstevel@tonic-gate 	inohsz = 1 << highbit(ufs_ninode / ino_hashlen);
312*0Sstevel@tonic-gate 	ihead = kmem_zalloc(inohsz * sizeof (union ihead), KM_SLEEP);
313*0Sstevel@tonic-gate 	ih_lock = kmem_zalloc(inohsz * sizeof (kmutex_t), KM_SLEEP);
314*0Sstevel@tonic-gate 
315*0Sstevel@tonic-gate 	for (i = 0, ih = ihead; i < inohsz; i++,  ih++) {
316*0Sstevel@tonic-gate 		ih->ih_head[0] = ih;
317*0Sstevel@tonic-gate 		ih->ih_head[1] = ih;
318*0Sstevel@tonic-gate 		mutex_init(&ih_lock[i], NULL, MUTEX_DEFAULT, NULL);
319*0Sstevel@tonic-gate 	}
320*0Sstevel@tonic-gate 	inode_cache = kmem_cache_create("ufs_inode_cache",
321*0Sstevel@tonic-gate 		sizeof (struct inode), 0, ufs_inode_cache_constructor,
322*0Sstevel@tonic-gate 		ufs_inode_cache_destructor, ufs_inode_cache_reclaim,
323*0Sstevel@tonic-gate 		NULL, NULL, 0);
324*0Sstevel@tonic-gate }
325*0Sstevel@tonic-gate 
326*0Sstevel@tonic-gate /*
327*0Sstevel@tonic-gate  * Free an inode structure
328*0Sstevel@tonic-gate  */
329*0Sstevel@tonic-gate void
330*0Sstevel@tonic-gate ufs_free_inode(struct inode *ip)
331*0Sstevel@tonic-gate {
332*0Sstevel@tonic-gate 	vn_invalid(ITOV(ip));
333*0Sstevel@tonic-gate 	kmem_cache_free(inode_cache, ip);
334*0Sstevel@tonic-gate }
335*0Sstevel@tonic-gate 
336*0Sstevel@tonic-gate /*
337*0Sstevel@tonic-gate  * Allocate an inode structure
338*0Sstevel@tonic-gate  */
339*0Sstevel@tonic-gate struct inode *
340*0Sstevel@tonic-gate ufs_alloc_inode(ufsvfs_t *ufsvfsp, ino_t ino)
341*0Sstevel@tonic-gate {
342*0Sstevel@tonic-gate 	struct inode *ip;
343*0Sstevel@tonic-gate 	vnode_t *vp;
344*0Sstevel@tonic-gate 
345*0Sstevel@tonic-gate 	ip = kmem_cache_alloc(inode_cache, KM_SLEEP);
346*0Sstevel@tonic-gate 	/*
347*0Sstevel@tonic-gate 	 * at this point we have a newly allocated inode
348*0Sstevel@tonic-gate 	 */
349*0Sstevel@tonic-gate 	ip->i_freef = ip;
350*0Sstevel@tonic-gate 	ip->i_freeb = ip;
351*0Sstevel@tonic-gate 	ip->i_flag = IREF;
352*0Sstevel@tonic-gate 	ip->i_seq = 0xFF;	/* Unique initial value */
353*0Sstevel@tonic-gate 	ip->i_dev = ufsvfsp->vfs_dev;
354*0Sstevel@tonic-gate 	ip->i_ufsvfs = ufsvfsp;
355*0Sstevel@tonic-gate 	ip->i_devvp = ufsvfsp->vfs_devvp;
356*0Sstevel@tonic-gate 	ip->i_number = ino;
357*0Sstevel@tonic-gate 	ip->i_diroff = 0;
358*0Sstevel@tonic-gate 	ip->i_nextr = 0;
359*0Sstevel@tonic-gate 	ip->i_map = NULL;
360*0Sstevel@tonic-gate 	ip->i_rdev = 0;
361*0Sstevel@tonic-gate 	ip->i_writes = 0;
362*0Sstevel@tonic-gate 	ip->i_mode = 0;
363*0Sstevel@tonic-gate 	ip->i_delaylen = 0;
364*0Sstevel@tonic-gate 	ip->i_delayoff = 0;
365*0Sstevel@tonic-gate 	ip->i_nextrio = 0;
366*0Sstevel@tonic-gate 	ip->i_ufs_acl = NULL;
367*0Sstevel@tonic-gate 	ip->i_cflags = 0;
368*0Sstevel@tonic-gate 	ip->i_mapcnt = 0;
369*0Sstevel@tonic-gate 	ip->i_dquot = NULL;
370*0Sstevel@tonic-gate 	ip->i_cachedir = 1;
371*0Sstevel@tonic-gate 	ip->i_writer = NULL;
372*0Sstevel@tonic-gate 
373*0Sstevel@tonic-gate 	/*
374*0Sstevel@tonic-gate 	 * the vnode for this inode was allocated by the constructor
375*0Sstevel@tonic-gate 	 */
376*0Sstevel@tonic-gate 	vp = ITOV(ip);
377*0Sstevel@tonic-gate 	vn_reinit(vp);
378*0Sstevel@tonic-gate 	if (ino == (ino_t)UFSROOTINO)
379*0Sstevel@tonic-gate 		vp->v_flag = VROOT;
380*0Sstevel@tonic-gate 	vp->v_vfsp = ufsvfsp->vfs_vfs;
381*0Sstevel@tonic-gate 	vn_exists(vp);
382*0Sstevel@tonic-gate 	return (ip);
383*0Sstevel@tonic-gate }
384*0Sstevel@tonic-gate 
385*0Sstevel@tonic-gate /*
386*0Sstevel@tonic-gate  * Look up an inode by device, inumber.  If it is in core (in the
387*0Sstevel@tonic-gate  * inode structure), honor the locking protocol.  If it is not in
388*0Sstevel@tonic-gate  * core, read it in from the specified device after freeing any pages.
389*0Sstevel@tonic-gate  * In all cases, a pointer to a VN_HELD inode structure is returned.
390*0Sstevel@tonic-gate  */
391*0Sstevel@tonic-gate int
392*0Sstevel@tonic-gate ufs_iget(struct vfs *vfsp, ino_t ino, struct inode **ipp, struct cred *cr)
393*0Sstevel@tonic-gate {
394*0Sstevel@tonic-gate 	return (ufs_iget_internal(vfsp, ino, ipp, cr, 0));
395*0Sstevel@tonic-gate }
396*0Sstevel@tonic-gate 
397*0Sstevel@tonic-gate /*
398*0Sstevel@tonic-gate  * A version of ufs_iget which returns only allocated, linked inodes.
399*0Sstevel@tonic-gate  * This is appropriate for any callers who do not expect a free inode.
400*0Sstevel@tonic-gate  */
401*0Sstevel@tonic-gate int
402*0Sstevel@tonic-gate ufs_iget_alloced(struct vfs *vfsp, ino_t ino, struct inode **ipp,
403*0Sstevel@tonic-gate     struct cred *cr)
404*0Sstevel@tonic-gate {
405*0Sstevel@tonic-gate 	return (ufs_iget_internal(vfsp, ino, ipp, cr, 1));
406*0Sstevel@tonic-gate }
407*0Sstevel@tonic-gate 
408*0Sstevel@tonic-gate /*
409*0Sstevel@tonic-gate  * Set vnode attributes based on v_type, this should be called whenever
410*0Sstevel@tonic-gate  * an inode's i_mode is changed.
411*0Sstevel@tonic-gate  */
412*0Sstevel@tonic-gate void
413*0Sstevel@tonic-gate ufs_reset_vnode(vnode_t *vp)
414*0Sstevel@tonic-gate {
415*0Sstevel@tonic-gate 	/*
416*0Sstevel@tonic-gate 	 * an old DBE hack
417*0Sstevel@tonic-gate 	 */
418*0Sstevel@tonic-gate 	if ((VTOI(vp)->i_mode & (ISVTX | IEXEC | IFDIR)) == ISVTX)
419*0Sstevel@tonic-gate 		vp->v_flag |= VSWAPLIKE;
420*0Sstevel@tonic-gate 	else
421*0Sstevel@tonic-gate 		vp->v_flag &= ~VSWAPLIKE;
422*0Sstevel@tonic-gate 
423*0Sstevel@tonic-gate 	/*
424*0Sstevel@tonic-gate 	 * if not swap like and it's just a regular file, we want
425*0Sstevel@tonic-gate 	 * to maintain the vnode's pages sorted by clean/modified
426*0Sstevel@tonic-gate 	 * for faster sync'ing to disk
427*0Sstevel@tonic-gate 	 */
428*0Sstevel@tonic-gate 	if (vp->v_type == VREG)
429*0Sstevel@tonic-gate 		vp->v_flag |= VMODSORT;
430*0Sstevel@tonic-gate 	else
431*0Sstevel@tonic-gate 		vp->v_flag &= ~VMODSORT;
432*0Sstevel@tonic-gate 
433*0Sstevel@tonic-gate 	/*
434*0Sstevel@tonic-gate 	 * Is this an attribute hidden dir?
435*0Sstevel@tonic-gate 	 */
436*0Sstevel@tonic-gate 	if ((VTOI(vp)->i_mode & IFMT) == IFATTRDIR)
437*0Sstevel@tonic-gate 		vp->v_flag |= V_XATTRDIR;
438*0Sstevel@tonic-gate 	else
439*0Sstevel@tonic-gate 		vp->v_flag &= ~V_XATTRDIR;
440*0Sstevel@tonic-gate }
441*0Sstevel@tonic-gate 
442*0Sstevel@tonic-gate /*
443*0Sstevel@tonic-gate  * Shared implementation of ufs_iget and ufs_iget_alloced.  The 'validate'
444*0Sstevel@tonic-gate  * flag is used to distinguish the two; when true, we validate that the inode
445*0Sstevel@tonic-gate  * being retrieved looks like a linked and allocated inode.
446*0Sstevel@tonic-gate  */
447*0Sstevel@tonic-gate /* ARGSUSED */
448*0Sstevel@tonic-gate static int
449*0Sstevel@tonic-gate ufs_iget_internal(struct vfs *vfsp, ino_t ino, struct inode **ipp,
450*0Sstevel@tonic-gate     struct cred *cr, int validate)
451*0Sstevel@tonic-gate {
452*0Sstevel@tonic-gate 	struct inode *ip, *sp;
453*0Sstevel@tonic-gate 	union ihead *ih;
454*0Sstevel@tonic-gate 	kmutex_t *ihm;
455*0Sstevel@tonic-gate 	struct buf *bp;
456*0Sstevel@tonic-gate 	struct dinode *dp;
457*0Sstevel@tonic-gate 	struct vnode *vp;
458*0Sstevel@tonic-gate 	extern vfs_t EIO_vfs;
459*0Sstevel@tonic-gate 	int error;
460*0Sstevel@tonic-gate 	int ftype;	/* XXX - Remove later on */
461*0Sstevel@tonic-gate 	dev_t vfs_dev;
462*0Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
463*0Sstevel@tonic-gate 	struct fs *fs;
464*0Sstevel@tonic-gate 	int hno;
465*0Sstevel@tonic-gate 	daddr_t bno;
466*0Sstevel@tonic-gate 	ulong_t ioff;
467*0Sstevel@tonic-gate 
468*0Sstevel@tonic-gate 	CPU_STATS_ADD_K(sys, ufsiget, 1);
469*0Sstevel@tonic-gate 
470*0Sstevel@tonic-gate 	/*
471*0Sstevel@tonic-gate 	 * Lookup inode in cache.
472*0Sstevel@tonic-gate 	 */
473*0Sstevel@tonic-gate 	vfs_dev = vfsp->vfs_dev;
474*0Sstevel@tonic-gate 	hno = INOHASH(ino);
475*0Sstevel@tonic-gate 	ih = &ihead[hno];
476*0Sstevel@tonic-gate 	ihm = &ih_lock[hno];
477*0Sstevel@tonic-gate 
478*0Sstevel@tonic-gate again:
479*0Sstevel@tonic-gate 	mutex_enter(ihm);
480*0Sstevel@tonic-gate 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) {
481*0Sstevel@tonic-gate 		if (ino != ip->i_number || vfs_dev != ip->i_dev ||
482*0Sstevel@tonic-gate 		    (ip->i_flag & ISTALE))
483*0Sstevel@tonic-gate 			continue;
484*0Sstevel@tonic-gate 
485*0Sstevel@tonic-gate 		/*
486*0Sstevel@tonic-gate 		 * Found the interesting inode; hold it and drop the cache lock
487*0Sstevel@tonic-gate 		 */
488*0Sstevel@tonic-gate 		vp = ITOV(ip);	/* for locknest */
489*0Sstevel@tonic-gate 		VN_HOLD(vp);
490*0Sstevel@tonic-gate 		mutex_exit(ihm);
491*0Sstevel@tonic-gate 		rw_enter(&ip->i_contents, RW_READER);
492*0Sstevel@tonic-gate 
493*0Sstevel@tonic-gate 		/*
494*0Sstevel@tonic-gate 		 * if necessary, remove from idle list
495*0Sstevel@tonic-gate 		 */
496*0Sstevel@tonic-gate 		if ((ip->i_flag & IREF) == 0) {
497*0Sstevel@tonic-gate 			if (ufs_rmidle(ip))
498*0Sstevel@tonic-gate 				VN_RELE(vp);
499*0Sstevel@tonic-gate 		}
500*0Sstevel@tonic-gate 
501*0Sstevel@tonic-gate 		/*
502*0Sstevel@tonic-gate 		 * Could the inode be read from disk?
503*0Sstevel@tonic-gate 		 */
504*0Sstevel@tonic-gate 		if (ip->i_flag & ISTALE) {
505*0Sstevel@tonic-gate 			rw_exit(&ip->i_contents);
506*0Sstevel@tonic-gate 			VN_RELE(vp);
507*0Sstevel@tonic-gate 			goto again;
508*0Sstevel@tonic-gate 		}
509*0Sstevel@tonic-gate 
510*0Sstevel@tonic-gate 		ins.in_hits.value.ul++;
511*0Sstevel@tonic-gate 		*ipp = ip;
512*0Sstevel@tonic-gate 
513*0Sstevel@tonic-gate 		/*
514*0Sstevel@tonic-gate 		 * Reset the vnode's attribute flags
515*0Sstevel@tonic-gate 		 */
516*0Sstevel@tonic-gate 		mutex_enter(&vp->v_lock);
517*0Sstevel@tonic-gate 		ufs_reset_vnode(vp);
518*0Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
519*0Sstevel@tonic-gate 
520*0Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
521*0Sstevel@tonic-gate 
522*0Sstevel@tonic-gate 		return (0);
523*0Sstevel@tonic-gate 	}
524*0Sstevel@tonic-gate 	mutex_exit(ihm);
525*0Sstevel@tonic-gate 
526*0Sstevel@tonic-gate 	/*
527*0Sstevel@tonic-gate 	 * Inode was not in cache.
528*0Sstevel@tonic-gate 	 *
529*0Sstevel@tonic-gate 	 * Allocate a new entry
530*0Sstevel@tonic-gate 	 */
531*0Sstevel@tonic-gate 	ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
532*0Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
533*0Sstevel@tonic-gate 
534*0Sstevel@tonic-gate 	ip = ufs_alloc_inode(ufsvfsp, ino);
535*0Sstevel@tonic-gate 	vp = ITOV(ip);
536*0Sstevel@tonic-gate 
537*0Sstevel@tonic-gate 	bno = fsbtodb(fs, itod(fs, ino));
538*0Sstevel@tonic-gate 	ioff = (sizeof (struct dinode)) * (itoo(fs, ino));
539*0Sstevel@tonic-gate 	ip->i_doff = (offset_t)ioff + ldbtob(bno);
540*0Sstevel@tonic-gate 
541*0Sstevel@tonic-gate 	/*
542*0Sstevel@tonic-gate 	 * put a place holder in the cache (if not already there)
543*0Sstevel@tonic-gate 	 */
544*0Sstevel@tonic-gate 	mutex_enter(ihm);
545*0Sstevel@tonic-gate 	for (sp = ih->ih_chain[0]; sp != (struct inode *)ih; sp = sp->i_forw)
546*0Sstevel@tonic-gate 		if (ino == sp->i_number && vfs_dev == sp->i_dev &&
547*0Sstevel@tonic-gate 		    ((sp->i_flag & ISTALE) == 0)) {
548*0Sstevel@tonic-gate 			mutex_exit(ihm);
549*0Sstevel@tonic-gate 			ufs_free_inode(ip);
550*0Sstevel@tonic-gate 			goto again;
551*0Sstevel@tonic-gate 		}
552*0Sstevel@tonic-gate 	/*
553*0Sstevel@tonic-gate 	 * It would be nice to ASSERT(RW_READ_HELD(&ufsvfsp->vfs_dqrwlock))
554*0Sstevel@tonic-gate 	 * here, but if we do, then shadow inode allocations panic the
555*0Sstevel@tonic-gate 	 * system.  We don't have to hold vfs_dqrwlock for shadow inodes
556*0Sstevel@tonic-gate 	 * and the ufs_iget() parameters don't tell us what we are getting
557*0Sstevel@tonic-gate 	 * so we have no way of knowing this is a ufs_iget() call from
558*0Sstevel@tonic-gate 	 * a ufs_ialloc() call for a shadow inode.
559*0Sstevel@tonic-gate 	 */
560*0Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
561*0Sstevel@tonic-gate 	insque(ip, ih);
562*0Sstevel@tonic-gate 	mutex_exit(ihm);
563*0Sstevel@tonic-gate 	/*
564*0Sstevel@tonic-gate 	 * read the dinode
565*0Sstevel@tonic-gate 	 */
566*0Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, bno, (int)fs->fs_bsize);
567*0Sstevel@tonic-gate 
568*0Sstevel@tonic-gate 	/*
569*0Sstevel@tonic-gate 	 * Check I/O errors
570*0Sstevel@tonic-gate 	 */
571*0Sstevel@tonic-gate 	error = ((bp->b_flags & B_ERROR) ? geterror(bp) : 0);
572*0Sstevel@tonic-gate 	if (error) {
573*0Sstevel@tonic-gate 		brelse(bp);
574*0Sstevel@tonic-gate 		ip->i_flag |= ISTALE;	/* in case someone is looking it up */
575*0Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
576*0Sstevel@tonic-gate 		vp->v_vfsp = &EIO_vfs;
577*0Sstevel@tonic-gate 		VN_RELE(vp);
578*0Sstevel@tonic-gate 		return (error);
579*0Sstevel@tonic-gate 	}
580*0Sstevel@tonic-gate 	/*
581*0Sstevel@tonic-gate 	 * initialize the inode's dinode
582*0Sstevel@tonic-gate 	 */
583*0Sstevel@tonic-gate 	dp = (struct dinode *)(ioff + bp->b_un.b_addr);
584*0Sstevel@tonic-gate 	ip->i_ic = dp->di_ic;			/* structure assignment */
585*0Sstevel@tonic-gate 	brelse(bp);
586*0Sstevel@tonic-gate 
587*0Sstevel@tonic-gate 	/*
588*0Sstevel@tonic-gate 	 * Maintain compatibility with Solaris 1.x UFS
589*0Sstevel@tonic-gate 	 */
590*0Sstevel@tonic-gate 	if (ip->i_suid != UID_LONG)
591*0Sstevel@tonic-gate 		ip->i_uid = ip->i_suid;
592*0Sstevel@tonic-gate 	if (ip->i_sgid != GID_LONG)
593*0Sstevel@tonic-gate 		ip->i_gid = ip->i_sgid;
594*0Sstevel@tonic-gate 
595*0Sstevel@tonic-gate 	ftype = ip->i_mode & IFMT;
596*0Sstevel@tonic-gate 	if (ftype == IFBLK || ftype == IFCHR) {
597*0Sstevel@tonic-gate 		dev_t dv;
598*0Sstevel@tonic-gate 		uint_t top16 = ip->i_ordev & 0xffff0000u;
599*0Sstevel@tonic-gate 
600*0Sstevel@tonic-gate 		if (top16 == 0 || top16 == 0xffff0000u)
601*0Sstevel@tonic-gate 			dv = expdev(ip->i_ordev);
602*0Sstevel@tonic-gate 		else
603*0Sstevel@tonic-gate 			dv = expldev(ip->i_ordev);
604*0Sstevel@tonic-gate 		vp->v_rdev = ip->i_rdev = dv;
605*0Sstevel@tonic-gate 	}
606*0Sstevel@tonic-gate 
607*0Sstevel@tonic-gate 	/*
608*0Sstevel@tonic-gate 	 * if our caller only expects allocated inodes, verify that
609*0Sstevel@tonic-gate 	 * this inode looks good; throw it out if it's bad.
610*0Sstevel@tonic-gate 	 */
611*0Sstevel@tonic-gate 	if (validate) {
612*0Sstevel@tonic-gate 		if ((ftype == 0) || (ip->i_nlink <= 0)) {
613*0Sstevel@tonic-gate 			ip->i_flag |= ISTALE;
614*0Sstevel@tonic-gate 			rw_exit(&ip->i_contents);
615*0Sstevel@tonic-gate 			vp->v_vfsp = &EIO_vfs;
616*0Sstevel@tonic-gate 			VN_RELE(vp);
617*0Sstevel@tonic-gate 			cmn_err(CE_NOTE,
618*0Sstevel@tonic-gate 			    "%s: unexpected free inode %d, run fsck(1M)%s",
619*0Sstevel@tonic-gate 			    fs->fs_fsmnt, (int)ino,
620*0Sstevel@tonic-gate 			    (TRANS_ISTRANS(ufsvfsp) ? " -o f" : ""));
621*0Sstevel@tonic-gate 			return (EIO);
622*0Sstevel@tonic-gate 		}
623*0Sstevel@tonic-gate 	}
624*0Sstevel@tonic-gate 
625*0Sstevel@tonic-gate 	/*
626*0Sstevel@tonic-gate 	 * finish initializing the vnode
627*0Sstevel@tonic-gate 	 */
628*0Sstevel@tonic-gate 	vp->v_type = IFTOVT((mode_t)ip->i_mode);
629*0Sstevel@tonic-gate 
630*0Sstevel@tonic-gate 	ufs_reset_vnode(vp);
631*0Sstevel@tonic-gate 
632*0Sstevel@tonic-gate 	/*
633*0Sstevel@tonic-gate 	 * read the shadow
634*0Sstevel@tonic-gate 	 */
635*0Sstevel@tonic-gate 	if (ftype != 0 && ip->i_shadow != 0) {
636*0Sstevel@tonic-gate 		if ((error = ufs_si_load(ip, cr)) != 0) {
637*0Sstevel@tonic-gate 			ip->i_flag |= ISTALE;
638*0Sstevel@tonic-gate 			ip->i_ufs_acl = NULL;
639*0Sstevel@tonic-gate 			rw_exit(&ip->i_contents);
640*0Sstevel@tonic-gate 			vp->v_vfsp = &EIO_vfs;
641*0Sstevel@tonic-gate 			VN_RELE(vp);
642*0Sstevel@tonic-gate 			return (error);
643*0Sstevel@tonic-gate 		}
644*0Sstevel@tonic-gate 	}
645*0Sstevel@tonic-gate 
646*0Sstevel@tonic-gate 	/*
647*0Sstevel@tonic-gate 	 * Only attach quota information if the inode has a type and if
648*0Sstevel@tonic-gate 	 * that type is not a shadow inode.
649*0Sstevel@tonic-gate 	 */
650*0Sstevel@tonic-gate 	if (ip->i_mode && ((ip->i_mode & IFMT) != IFSHAD) &&
651*0Sstevel@tonic-gate 	    ((ip->i_mode & IFMT) != IFATTRDIR)) {
652*0Sstevel@tonic-gate 		ip->i_dquot = getinoquota(ip);
653*0Sstevel@tonic-gate 	}
654*0Sstevel@tonic-gate 	TRANS_MATA_IGET(ufsvfsp, ip);
655*0Sstevel@tonic-gate 	*ipp = ip;
656*0Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
657*0Sstevel@tonic-gate 
658*0Sstevel@tonic-gate 	return (0);
659*0Sstevel@tonic-gate }
660*0Sstevel@tonic-gate 
661*0Sstevel@tonic-gate /*
662*0Sstevel@tonic-gate  * Vnode is no longer referenced, write the inode out
663*0Sstevel@tonic-gate  * and if necessary, truncate and deallocate the file.
664*0Sstevel@tonic-gate  */
665*0Sstevel@tonic-gate void
666*0Sstevel@tonic-gate ufs_iinactive(struct inode *ip)
667*0Sstevel@tonic-gate {
668*0Sstevel@tonic-gate 	int		front;
669*0Sstevel@tonic-gate 	struct inode	*iq;
670*0Sstevel@tonic-gate 	struct inode	*hip;
671*0Sstevel@tonic-gate 	struct ufs_q	*uq;
672*0Sstevel@tonic-gate 	struct vnode	*vp = ITOV(ip);
673*0Sstevel@tonic-gate 
674*0Sstevel@tonic-gate 
675*0Sstevel@tonic-gate 	/*
676*0Sstevel@tonic-gate 	 * Because the vnode type might have been changed,
677*0Sstevel@tonic-gate 	 * the dnlc_dir_purge must be called unconditionally.
678*0Sstevel@tonic-gate 	 */
679*0Sstevel@tonic-gate 	dnlc_dir_purge(&ip->i_danchor);
680*0Sstevel@tonic-gate 
681*0Sstevel@tonic-gate 	/*
682*0Sstevel@tonic-gate 	 * Get exclusive access to inode data.
683*0Sstevel@tonic-gate 	 */
684*0Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
685*0Sstevel@tonic-gate 	ASSERT(ip->i_flag & IREF);
686*0Sstevel@tonic-gate 
687*0Sstevel@tonic-gate 	/*
688*0Sstevel@tonic-gate 	 * Make sure no one reclaimed the inode before we put it on
689*0Sstevel@tonic-gate 	 * the freelist or destroy it. We keep our 'hold' on the vnode
690*0Sstevel@tonic-gate 	 * from vn_rele until we are ready to do something with the inode.
691*0Sstevel@tonic-gate 	 *
692*0Sstevel@tonic-gate 	 * Pageout may put a VN_HOLD/VN_RELE at anytime during this
693*0Sstevel@tonic-gate 	 * operation via an async putpage, so we must make sure
694*0Sstevel@tonic-gate 	 * we don't free/destroy the inode more than once. ufs_iget
695*0Sstevel@tonic-gate 	 * may also put a VN_HOLD on the inode before it grabs
696*0Sstevel@tonic-gate 	 * the i_contents lock. This is done so we don't free
697*0Sstevel@tonic-gate 	 * an inode that a thread is waiting on.
698*0Sstevel@tonic-gate 	 */
699*0Sstevel@tonic-gate 	mutex_enter(&vp->v_lock);
700*0Sstevel@tonic-gate 
701*0Sstevel@tonic-gate 	if (vp->v_count > 1) {
702*0Sstevel@tonic-gate 	    vp->v_count--;  /* release our hold from vn_rele */
703*0Sstevel@tonic-gate 	    mutex_exit(&vp->v_lock);
704*0Sstevel@tonic-gate 	    rw_exit(&ip->i_contents);
705*0Sstevel@tonic-gate 	    return;
706*0Sstevel@tonic-gate 	}
707*0Sstevel@tonic-gate 	mutex_exit(&vp->v_lock);
708*0Sstevel@tonic-gate 
709*0Sstevel@tonic-gate 	/*
710*0Sstevel@tonic-gate 	 * For umount case: if ufsvfs ptr is NULL, the inode is unhashed
711*0Sstevel@tonic-gate 	 * and clean.  It can be safely destroyed (cyf).
712*0Sstevel@tonic-gate 	 */
713*0Sstevel@tonic-gate 	if (ip->i_ufsvfs == NULL) {
714*0Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
715*0Sstevel@tonic-gate 		ufs_si_del(ip);
716*0Sstevel@tonic-gate 		ASSERT((vp->v_type == VCHR) || !vn_has_cached_data(vp));
717*0Sstevel@tonic-gate 		ufs_free_inode(ip);
718*0Sstevel@tonic-gate 		return;
719*0Sstevel@tonic-gate 	}
720*0Sstevel@tonic-gate 
721*0Sstevel@tonic-gate 	/*
722*0Sstevel@tonic-gate 	 * queue idle inode to appropriate thread. Will check v_count == 1
723*0Sstevel@tonic-gate 	 * prior to putting this on the appropriate queue.
724*0Sstevel@tonic-gate 	 * Stale inodes will be unhashed and freed by the ufs idle thread
725*0Sstevel@tonic-gate 	 * in ufs_idle_free()
726*0Sstevel@tonic-gate 	 */
727*0Sstevel@tonic-gate 	front = 1;
728*0Sstevel@tonic-gate 	if ((ip->i_flag & ISTALE) == 0 && ip->i_fs->fs_ronly == 0 &&
729*0Sstevel@tonic-gate 	    ip->i_mode && ip->i_nlink <= 0) {
730*0Sstevel@tonic-gate 		/*
731*0Sstevel@tonic-gate 		 * Mark the i_flag to indicate that inode is being deleted.
732*0Sstevel@tonic-gate 		 * This flag will be cleared when the deletion is complete.
733*0Sstevel@tonic-gate 		 * This prevents nfs from sneaking in via ufs_vget() while
734*0Sstevel@tonic-gate 		 * the delete is in progress (bugid 1242481).
735*0Sstevel@tonic-gate 		 */
736*0Sstevel@tonic-gate 		ip->i_flag |= IDEL;
737*0Sstevel@tonic-gate 
738*0Sstevel@tonic-gate 		/*
739*0Sstevel@tonic-gate 		 * NOIDEL means that deletes are not allowed at this time;
740*0Sstevel@tonic-gate 		 * whoever resets NOIDEL will also send this inode back
741*0Sstevel@tonic-gate 		 * through ufs_iinactive.  IREF remains set.
742*0Sstevel@tonic-gate 		 */
743*0Sstevel@tonic-gate 		if (ULOCKFS_IS_NOIDEL(ITOUL(ip))) {
744*0Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
745*0Sstevel@tonic-gate 			vp->v_count--;
746*0Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
747*0Sstevel@tonic-gate 			rw_exit(&ip->i_contents);
748*0Sstevel@tonic-gate 			return;
749*0Sstevel@tonic-gate 		}
750*0Sstevel@tonic-gate 		if (!TRANS_ISTRANS(ip->i_ufsvfs)) {
751*0Sstevel@tonic-gate 			rw_exit(&ip->i_contents);
752*0Sstevel@tonic-gate 			ufs_delete(ip->i_ufsvfs, ip, 0);
753*0Sstevel@tonic-gate 			return;
754*0Sstevel@tonic-gate 		}
755*0Sstevel@tonic-gate 
756*0Sstevel@tonic-gate 		/* queue to delete thread; IREF remains set */
757*0Sstevel@tonic-gate 		ins.in_qfree.value.ul++;
758*0Sstevel@tonic-gate 		uq = &ip->i_ufsvfs->vfs_delete;
759*0Sstevel@tonic-gate 
760*0Sstevel@tonic-gate 		mutex_enter(&uq->uq_mutex);
761*0Sstevel@tonic-gate 
762*0Sstevel@tonic-gate 		/* add to q */
763*0Sstevel@tonic-gate 		if ((iq = uq->uq_ihead) != 0) {
764*0Sstevel@tonic-gate 			ip->i_freef = iq;
765*0Sstevel@tonic-gate 			ip->i_freeb = iq->i_freeb;
766*0Sstevel@tonic-gate 			iq->i_freeb->i_freef = ip;
767*0Sstevel@tonic-gate 			iq->i_freeb = ip;
768*0Sstevel@tonic-gate 			if (front)
769*0Sstevel@tonic-gate 				uq->uq_ihead = ip;
770*0Sstevel@tonic-gate 		} else {
771*0Sstevel@tonic-gate 			uq->uq_ihead = ip;
772*0Sstevel@tonic-gate 			ip->i_freef = ip;
773*0Sstevel@tonic-gate 			ip->i_freeb = ip;
774*0Sstevel@tonic-gate 		}
775*0Sstevel@tonic-gate 	} else {
776*0Sstevel@tonic-gate 		/*
777*0Sstevel@tonic-gate 		 * queue to idle thread
778*0Sstevel@tonic-gate 		 *  Check the v_count == 1 again.
779*0Sstevel@tonic-gate 		 *
780*0Sstevel@tonic-gate 		 */
781*0Sstevel@tonic-gate 		mutex_enter(&vp->v_lock);
782*0Sstevel@tonic-gate 		if (vp->v_count > 1) {
783*0Sstevel@tonic-gate 		    vp->v_count--;  /* release our hold from vn_rele */
784*0Sstevel@tonic-gate 		    mutex_exit(&vp->v_lock);
785*0Sstevel@tonic-gate 		    rw_exit(&ip->i_contents);
786*0Sstevel@tonic-gate 		    return;
787*0Sstevel@tonic-gate 		}
788*0Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
789*0Sstevel@tonic-gate 		uq = &ufs_idle_q;
790*0Sstevel@tonic-gate 
791*0Sstevel@tonic-gate 		/*
792*0Sstevel@tonic-gate 		 * useful iff it has pages or is a fastsymlink; otherwise junk
793*0Sstevel@tonic-gate 		 */
794*0Sstevel@tonic-gate 		mutex_enter(&uq->uq_mutex);
795*0Sstevel@tonic-gate 
796*0Sstevel@tonic-gate 		/* clear IREF means `on idle list' */
797*0Sstevel@tonic-gate 		ip->i_flag &= ~(IREF | IDIRECTIO);
798*0Sstevel@tonic-gate 
799*0Sstevel@tonic-gate 		if (vn_has_cached_data(vp) || ip->i_flag & IFASTSYMLNK) {
800*0Sstevel@tonic-gate 			ins.in_frback.value.ul++;
801*0Sstevel@tonic-gate 			hip = (inode_t *)&ufs_useful_iq[IQHASH(ip)];
802*0Sstevel@tonic-gate 			ufs_nuseful_iq++;
803*0Sstevel@tonic-gate 		} else {
804*0Sstevel@tonic-gate 			ins.in_frfront.value.ul++;
805*0Sstevel@tonic-gate 			hip = (inode_t *)&ufs_junk_iq[IQHASH(ip)];
806*0Sstevel@tonic-gate 			ip->i_flag |= IJUNKIQ;
807*0Sstevel@tonic-gate 			ufs_njunk_iq++;
808*0Sstevel@tonic-gate 		}
809*0Sstevel@tonic-gate 		ip->i_freef = hip;
810*0Sstevel@tonic-gate 		ip->i_freeb = hip->i_freeb;
811*0Sstevel@tonic-gate 		hip->i_freeb->i_freef = ip;
812*0Sstevel@tonic-gate 		hip->i_freeb = ip;
813*0Sstevel@tonic-gate 	}
814*0Sstevel@tonic-gate 
815*0Sstevel@tonic-gate 	/* wakeup thread(s) if q is overfull */
816*0Sstevel@tonic-gate 	if (++uq->uq_ne == uq->uq_lowat)
817*0Sstevel@tonic-gate 		cv_broadcast(&uq->uq_cv);
818*0Sstevel@tonic-gate 
819*0Sstevel@tonic-gate 	/* all done, release the q and inode */
820*0Sstevel@tonic-gate 	mutex_exit(&uq->uq_mutex);
821*0Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
822*0Sstevel@tonic-gate }
823*0Sstevel@tonic-gate 
824*0Sstevel@tonic-gate /*
825*0Sstevel@tonic-gate  * Check accessed and update flags on an inode structure.
826*0Sstevel@tonic-gate  * If any are on, update the inode with the (unique) current time.
827*0Sstevel@tonic-gate  * If waitfor is given, insure I/O order so wait for write to complete.
828*0Sstevel@tonic-gate  */
829*0Sstevel@tonic-gate void
830*0Sstevel@tonic-gate ufs_iupdat(struct inode *ip, int waitfor)
831*0Sstevel@tonic-gate {
832*0Sstevel@tonic-gate 	struct buf	*bp;
833*0Sstevel@tonic-gate 	struct fs	*fp;
834*0Sstevel@tonic-gate 	struct dinode	*dp;
835*0Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp 	= ip->i_ufsvfs;
836*0Sstevel@tonic-gate 	int 		i;
837*0Sstevel@tonic-gate 	int		do_trans_times;
838*0Sstevel@tonic-gate 	ushort_t	flag;
839*0Sstevel@tonic-gate 	o_uid_t		suid;
840*0Sstevel@tonic-gate 	o_gid_t		sgid;
841*0Sstevel@tonic-gate 
842*0Sstevel@tonic-gate 	/*
843*0Sstevel@tonic-gate 	 * This function is now safe to be called with either the reader
844*0Sstevel@tonic-gate 	 * or writer i_contents lock.
845*0Sstevel@tonic-gate 	 */
846*0Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
847*0Sstevel@tonic-gate 
848*0Sstevel@tonic-gate 	/*
849*0Sstevel@tonic-gate 	 * Return if file system has been forcibly umounted.
850*0Sstevel@tonic-gate 	 */
851*0Sstevel@tonic-gate 	if (ufsvfsp == NULL)
852*0Sstevel@tonic-gate 		return;
853*0Sstevel@tonic-gate 
854*0Sstevel@tonic-gate 	flag = ip->i_flag;	/* Atomic read */
855*0Sstevel@tonic-gate 	/*
856*0Sstevel@tonic-gate 	 * We better not update the disk inode from a stale inode.
857*0Sstevel@tonic-gate 	 */
858*0Sstevel@tonic-gate 	if (flag & ISTALE)
859*0Sstevel@tonic-gate 		return;
860*0Sstevel@tonic-gate 
861*0Sstevel@tonic-gate 	fp = ip->i_fs;
862*0Sstevel@tonic-gate 
863*0Sstevel@tonic-gate 	if ((flag & (IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG)) != 0) {
864*0Sstevel@tonic-gate 		if (fp->fs_ronly) {
865*0Sstevel@tonic-gate 			mutex_enter(&ip->i_tlock);
866*0Sstevel@tonic-gate 			ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG);
867*0Sstevel@tonic-gate 			mutex_exit(&ip->i_tlock);
868*0Sstevel@tonic-gate 			return;
869*0Sstevel@tonic-gate 		}
870*0Sstevel@tonic-gate 		/*
871*0Sstevel@tonic-gate 		 * fs is active while metadata is being written
872*0Sstevel@tonic-gate 		 */
873*0Sstevel@tonic-gate 		mutex_enter(&ufsvfsp->vfs_lock);
874*0Sstevel@tonic-gate 		ufs_notclean(ufsvfsp);
875*0Sstevel@tonic-gate 		/*
876*0Sstevel@tonic-gate 		 * get the dinode
877*0Sstevel@tonic-gate 		 */
878*0Sstevel@tonic-gate 		bp = UFS_BREAD(ufsvfsp, ip->i_dev,
879*0Sstevel@tonic-gate 		    (daddr_t)fsbtodb(fp, itod(fp, ip->i_number)),
880*0Sstevel@tonic-gate 		    (int)fp->fs_bsize);
881*0Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR) {
882*0Sstevel@tonic-gate 			mutex_enter(&ip->i_tlock);
883*0Sstevel@tonic-gate 			ip->i_flag &=
884*0Sstevel@tonic-gate 			    ~(IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG);
885*0Sstevel@tonic-gate 			mutex_exit(&ip->i_tlock);
886*0Sstevel@tonic-gate 			brelse(bp);
887*0Sstevel@tonic-gate 			return;
888*0Sstevel@tonic-gate 		}
889*0Sstevel@tonic-gate 		/*
890*0Sstevel@tonic-gate 		 * munge inode fields
891*0Sstevel@tonic-gate 		 */
892*0Sstevel@tonic-gate 		mutex_enter(&ip->i_tlock);
893*0Sstevel@tonic-gate 		ITIMES_NOLOCK(ip);
894*0Sstevel@tonic-gate 		do_trans_times = ((ip->i_flag & (IMOD|IMODACC)) == IMODACC);
895*0Sstevel@tonic-gate 		ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG);
896*0Sstevel@tonic-gate 		mutex_exit(&ip->i_tlock);
897*0Sstevel@tonic-gate 
898*0Sstevel@tonic-gate 		/*
899*0Sstevel@tonic-gate 		 * For reads and concurrent re-writes, no deltas were
900*0Sstevel@tonic-gate 		 * entered for the access time changes - do it now.
901*0Sstevel@tonic-gate 		 */
902*0Sstevel@tonic-gate 		if (do_trans_times) {
903*0Sstevel@tonic-gate 			TRANS_INODE_TIMES(ufsvfsp, ip);
904*0Sstevel@tonic-gate 		}
905*0Sstevel@tonic-gate 
906*0Sstevel@tonic-gate 		/*
907*0Sstevel@tonic-gate 		 * For SunOS 5.0->5.4, these lines below read:
908*0Sstevel@tonic-gate 		 *
909*0Sstevel@tonic-gate 		 * suid = (ip->i_uid > MAXUID) ? UID_LONG : ip->i_uid;
910*0Sstevel@tonic-gate 		 * sgid = (ip->i_gid > MAXUID) ? GID_LONG : ip->i_gid;
911*0Sstevel@tonic-gate 		 *
912*0Sstevel@tonic-gate 		 * where MAXUID was set to 60002.  This was incorrect -
913*0Sstevel@tonic-gate 		 * the uids should have been constrained to what fitted into
914*0Sstevel@tonic-gate 		 * a 16-bit word.
915*0Sstevel@tonic-gate 		 *
916*0Sstevel@tonic-gate 		 * This means that files from 4.x filesystems that have an
917*0Sstevel@tonic-gate 		 * i_suid field larger than 60002 will have that field
918*0Sstevel@tonic-gate 		 * changed to 65535.
919*0Sstevel@tonic-gate 		 *
920*0Sstevel@tonic-gate 		 * Security note: 4.x UFS could never create a i_suid of
921*0Sstevel@tonic-gate 		 * UID_LONG since that would've corresponded to -1.
922*0Sstevel@tonic-gate 		 */
923*0Sstevel@tonic-gate 		suid = (ulong_t)ip->i_uid > (ulong_t)USHRT_MAX ?
924*0Sstevel@tonic-gate 			UID_LONG : ip->i_uid;
925*0Sstevel@tonic-gate 		sgid = (ulong_t)ip->i_gid > (ulong_t)USHRT_MAX ?
926*0Sstevel@tonic-gate 			GID_LONG : ip->i_gid;
927*0Sstevel@tonic-gate 
928*0Sstevel@tonic-gate 		if ((ip->i_suid != suid) || (ip->i_sgid != sgid)) {
929*0Sstevel@tonic-gate 			ip->i_suid = suid;
930*0Sstevel@tonic-gate 			ip->i_sgid = sgid;
931*0Sstevel@tonic-gate 			TRANS_INODE(ufsvfsp, ip);
932*0Sstevel@tonic-gate 		}
933*0Sstevel@tonic-gate 
934*0Sstevel@tonic-gate 		if ((ip->i_mode & IFMT) == IFBLK ||
935*0Sstevel@tonic-gate 		    (ip->i_mode & IFMT) == IFCHR) {
936*0Sstevel@tonic-gate 			dev_t d = ip->i_rdev;
937*0Sstevel@tonic-gate 			dev32_t dev32;
938*0Sstevel@tonic-gate 
939*0Sstevel@tonic-gate 			/*
940*0Sstevel@tonic-gate 			 * load first direct block only if special device
941*0Sstevel@tonic-gate 			 */
942*0Sstevel@tonic-gate 			if (!cmpldev(&dev32, d)) {
943*0Sstevel@tonic-gate 				/*
944*0Sstevel@tonic-gate 				 * We panic here because there's "no way"
945*0Sstevel@tonic-gate 				 * we should have been able to create a large
946*0Sstevel@tonic-gate 				 * inode with a large dev_t.  Earlier layers
947*0Sstevel@tonic-gate 				 * should've caught this.
948*0Sstevel@tonic-gate 				 */
949*0Sstevel@tonic-gate 				panic("ip %p: i_rdev too big", (void *)ip);
950*0Sstevel@tonic-gate 			}
951*0Sstevel@tonic-gate 
952*0Sstevel@tonic-gate 			if (dev32 & ~((O_MAXMAJ << L_BITSMINOR32) | O_MAXMIN)) {
953*0Sstevel@tonic-gate 				ip->i_ordev = dev32;	/* can't use old fmt. */
954*0Sstevel@tonic-gate 			} else {
955*0Sstevel@tonic-gate 				ip->i_ordev = cmpdev(d);
956*0Sstevel@tonic-gate 			}
957*0Sstevel@tonic-gate 		}
958*0Sstevel@tonic-gate 
959*0Sstevel@tonic-gate 		/*
960*0Sstevel@tonic-gate 		 * copy inode to dinode (zero fastsymlnk in dinode)
961*0Sstevel@tonic-gate 		 */
962*0Sstevel@tonic-gate 		dp = (struct dinode *)bp->b_un.b_addr + itoo(fp, ip->i_number);
963*0Sstevel@tonic-gate 		dp->di_ic = ip->i_ic;	/* structure assignment */
964*0Sstevel@tonic-gate 		if (flag & IFASTSYMLNK) {
965*0Sstevel@tonic-gate 			for (i = 1; i < NDADDR; i++)
966*0Sstevel@tonic-gate 				dp->di_db[i] = 0;
967*0Sstevel@tonic-gate 			for (i = 0; i < NIADDR; i++)
968*0Sstevel@tonic-gate 				dp->di_ib[i] = 0;
969*0Sstevel@tonic-gate 		}
970*0Sstevel@tonic-gate 		if (TRANS_ISTRANS(ufsvfsp)) {
971*0Sstevel@tonic-gate 			/*
972*0Sstevel@tonic-gate 			 * Pass only a sector size buffer containing
973*0Sstevel@tonic-gate 			 * the inode, otherwise when the buffer is copied
974*0Sstevel@tonic-gate 			 * into a cached roll buffer then too much memory
975*0Sstevel@tonic-gate 			 * gets consumed if 8KB inode buffers are passed.
976*0Sstevel@tonic-gate 			 */
977*0Sstevel@tonic-gate 			TRANS_LOG(ufsvfsp, (caddr_t)dp, ip->i_doff,
978*0Sstevel@tonic-gate 			    sizeof (struct dinode),
979*0Sstevel@tonic-gate 			    (caddr_t)P2ALIGN((uintptr_t)dp, DEV_BSIZE),
980*0Sstevel@tonic-gate 			    DEV_BSIZE);
981*0Sstevel@tonic-gate 
982*0Sstevel@tonic-gate 			brelse(bp);
983*0Sstevel@tonic-gate 		} else if (waitfor && (ip->i_ufsvfs->vfs_dio == 0)) {
984*0Sstevel@tonic-gate 			UFS_BRWRITE(ufsvfsp, bp);
985*0Sstevel@tonic-gate 
986*0Sstevel@tonic-gate 			/*
987*0Sstevel@tonic-gate 			 * Synchronous write has guaranteed that inode
988*0Sstevel@tonic-gate 			 * has been written on disk so clear the flag
989*0Sstevel@tonic-gate 			 */
990*0Sstevel@tonic-gate 			mutex_enter(&ip->i_tlock);
991*0Sstevel@tonic-gate 			ip->i_flag &= ~IBDWRITE;
992*0Sstevel@tonic-gate 			mutex_exit(&ip->i_tlock);
993*0Sstevel@tonic-gate 		} else {
994*0Sstevel@tonic-gate 			bdrwrite(bp);
995*0Sstevel@tonic-gate 
996*0Sstevel@tonic-gate 			/*
997*0Sstevel@tonic-gate 			 * This write hasn't guaranteed that inode has been
998*0Sstevel@tonic-gate 			 * written on the disk.
999*0Sstevel@tonic-gate 			 * Since, all updat flags on inode are cleared, we must
1000*0Sstevel@tonic-gate 			 * remember the condition in case inode is to be updated
1001*0Sstevel@tonic-gate 			 * synchronously later (e.g.- fsync()/fdatasync())
1002*0Sstevel@tonic-gate 			 * and inode has not been modified yet.
1003*0Sstevel@tonic-gate 			 */
1004*0Sstevel@tonic-gate 			mutex_enter(&ip->i_tlock);
1005*0Sstevel@tonic-gate 			ip->i_flag |= IBDWRITE;
1006*0Sstevel@tonic-gate 			mutex_exit(&ip->i_tlock);
1007*0Sstevel@tonic-gate 		}
1008*0Sstevel@tonic-gate 	} else {
1009*0Sstevel@tonic-gate 		/*
1010*0Sstevel@tonic-gate 		 * In case previous inode update was done asynchronously
1011*0Sstevel@tonic-gate 		 * (IBDWRITE) and this inode update request wants guaranteed
1012*0Sstevel@tonic-gate 		 * (synchronous) disk update, flush the inode.
1013*0Sstevel@tonic-gate 		 */
1014*0Sstevel@tonic-gate 		if (waitfor && (flag & IBDWRITE)) {
1015*0Sstevel@tonic-gate 			blkflush(ip->i_dev,
1016*0Sstevel@tonic-gate 				(daddr_t)fsbtodb(fp, itod(fp, ip->i_number)));
1017*0Sstevel@tonic-gate 			mutex_enter(&ip->i_tlock);
1018*0Sstevel@tonic-gate 			ip->i_flag &= ~IBDWRITE;
1019*0Sstevel@tonic-gate 			mutex_exit(&ip->i_tlock);
1020*0Sstevel@tonic-gate 		}
1021*0Sstevel@tonic-gate 	}
1022*0Sstevel@tonic-gate }
1023*0Sstevel@tonic-gate 
1024*0Sstevel@tonic-gate #define	SINGLE	0	/* index of single indirect block */
1025*0Sstevel@tonic-gate #define	DOUBLE	1	/* index of double indirect block */
1026*0Sstevel@tonic-gate #define	TRIPLE	2	/* index of triple indirect block */
1027*0Sstevel@tonic-gate 
1028*0Sstevel@tonic-gate /*
1029*0Sstevel@tonic-gate  * Release blocks associated with the inode ip and
1030*0Sstevel@tonic-gate  * stored in the indirect block bn.  Blocks are free'd
1031*0Sstevel@tonic-gate  * in LIFO order up to (but not including) lastbn.  If
1032*0Sstevel@tonic-gate  * level is greater than SINGLE, the block is an indirect
1033*0Sstevel@tonic-gate  * block and recursive calls to indirtrunc must be used to
1034*0Sstevel@tonic-gate  * cleanse other indirect blocks.
1035*0Sstevel@tonic-gate  *
1036*0Sstevel@tonic-gate  * N.B.: triple indirect blocks are untested.
1037*0Sstevel@tonic-gate  */
1038*0Sstevel@tonic-gate static long
1039*0Sstevel@tonic-gate indirtrunc(struct inode *ip, daddr_t bn, daddr_t lastbn, int level, int flags)
1040*0Sstevel@tonic-gate {
1041*0Sstevel@tonic-gate 	int i;
1042*0Sstevel@tonic-gate 	struct buf *bp, *copy;
1043*0Sstevel@tonic-gate 	daddr32_t *bap;
1044*0Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
1045*0Sstevel@tonic-gate 	struct fs *fs = ufsvfsp->vfs_fs;
1046*0Sstevel@tonic-gate 	daddr_t nb, last;
1047*0Sstevel@tonic-gate 	long factor;
1048*0Sstevel@tonic-gate 	int blocksreleased = 0, nblocks;
1049*0Sstevel@tonic-gate 
1050*0Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
1051*0Sstevel@tonic-gate 	/*
1052*0Sstevel@tonic-gate 	 * Calculate index in current block of last
1053*0Sstevel@tonic-gate 	 * block to be kept.  -1 indicates the entire
1054*0Sstevel@tonic-gate 	 * block so we need not calculate the index.
1055*0Sstevel@tonic-gate 	 */
1056*0Sstevel@tonic-gate 	factor = 1;
1057*0Sstevel@tonic-gate 	for (i = SINGLE; i < level; i++)
1058*0Sstevel@tonic-gate 		factor *= NINDIR(fs);
1059*0Sstevel@tonic-gate 	last = lastbn;
1060*0Sstevel@tonic-gate 	if (lastbn > 0)
1061*0Sstevel@tonic-gate 		last /= factor;
1062*0Sstevel@tonic-gate 	nblocks = btodb(fs->fs_bsize);
1063*0Sstevel@tonic-gate 	/*
1064*0Sstevel@tonic-gate 	 * Get buffer of block pointers, zero those
1065*0Sstevel@tonic-gate 	 * entries corresponding to blocks to be free'd,
1066*0Sstevel@tonic-gate 	 * and update on disk copy first.
1067*0Sstevel@tonic-gate 	 * *Unless* the root pointer has been synchronously
1068*0Sstevel@tonic-gate 	 * written to disk.  If nothing points to this
1069*0Sstevel@tonic-gate 	 * indirect block then don't bother zero'ing and
1070*0Sstevel@tonic-gate 	 * writing it.
1071*0Sstevel@tonic-gate 	 */
1072*0Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp,
1073*0Sstevel@tonic-gate 			ip->i_dev, (daddr_t)fsbtodb(fs, bn), (int)fs->fs_bsize);
1074*0Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
1075*0Sstevel@tonic-gate 		brelse(bp);
1076*0Sstevel@tonic-gate 		return (0);
1077*0Sstevel@tonic-gate 	}
1078*0Sstevel@tonic-gate 	bap = bp->b_un.b_daddr;
1079*0Sstevel@tonic-gate 	if ((flags & I_CHEAP) == 0) {
1080*0Sstevel@tonic-gate 		uint_t	zb;
1081*0Sstevel@tonic-gate 
1082*0Sstevel@tonic-gate 		zb = (uint_t)((NINDIR(fs) - (last + 1)) * sizeof (daddr32_t));
1083*0Sstevel@tonic-gate 
1084*0Sstevel@tonic-gate 		if (zb) {
1085*0Sstevel@tonic-gate 			/*
1086*0Sstevel@tonic-gate 			 * push any data into the log before we zero it
1087*0Sstevel@tonic-gate 			 */
1088*0Sstevel@tonic-gate 			if (bp->b_flags & B_DELWRI)
1089*0Sstevel@tonic-gate 				TRANS_LOG(ufsvfsp, (caddr_t)bap,
1090*0Sstevel@tonic-gate 					ldbtob(bp->b_blkno), bp->b_bcount,
1091*0Sstevel@tonic-gate 					bp->b_un.b_addr, bp->b_bcount);
1092*0Sstevel@tonic-gate 			copy = ngeteblk(fs->fs_bsize);
1093*0Sstevel@tonic-gate 			bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr,
1094*0Sstevel@tonic-gate 				(uint_t)fs->fs_bsize);
1095*0Sstevel@tonic-gate 			bzero((caddr_t)&bap[last + 1], zb);
1096*0Sstevel@tonic-gate 
1097*0Sstevel@tonic-gate 			TRANS_BUF(ufsvfsp,
1098*0Sstevel@tonic-gate 				(caddr_t)&bap[last + 1] - (caddr_t)bap,
1099*0Sstevel@tonic-gate 				zb, bp, DT_ABZERO);
1100*0Sstevel@tonic-gate 
1101*0Sstevel@tonic-gate 			UFS_BRWRITE(ufsvfsp, bp);
1102*0Sstevel@tonic-gate 			bp = copy, bap = bp->b_un.b_daddr;
1103*0Sstevel@tonic-gate 		}
1104*0Sstevel@tonic-gate 	} else {
1105*0Sstevel@tonic-gate 		/* make sure write retries are also cleared */
1106*0Sstevel@tonic-gate 		bp->b_flags &= ~(B_DELWRI | B_RETRYWRI);
1107*0Sstevel@tonic-gate 		bp->b_flags |= B_STALE | B_AGE;
1108*0Sstevel@tonic-gate 	}
1109*0Sstevel@tonic-gate 
1110*0Sstevel@tonic-gate 	/*
1111*0Sstevel@tonic-gate 	 * Recursively free totally unused blocks.
1112*0Sstevel@tonic-gate 	 */
1113*0Sstevel@tonic-gate 	flags |= I_CHEAP;
1114*0Sstevel@tonic-gate 	for (i = NINDIR(fs) - 1; i > last; i--) {
1115*0Sstevel@tonic-gate 		nb = bap[i];
1116*0Sstevel@tonic-gate 		if (nb == 0)
1117*0Sstevel@tonic-gate 			continue;
1118*0Sstevel@tonic-gate 		if (level > SINGLE) {
1119*0Sstevel@tonic-gate 			blocksreleased +=
1120*0Sstevel@tonic-gate 			    indirtrunc(ip, nb, (daddr_t)-1, level - 1, flags);
1121*0Sstevel@tonic-gate 			free(ip, nb, (off_t)fs->fs_bsize, flags | I_IBLK);
1122*0Sstevel@tonic-gate 		} else
1123*0Sstevel@tonic-gate 			free(ip, nb, (off_t)fs->fs_bsize, flags);
1124*0Sstevel@tonic-gate 		blocksreleased += nblocks;
1125*0Sstevel@tonic-gate 	}
1126*0Sstevel@tonic-gate 	flags &= ~I_CHEAP;
1127*0Sstevel@tonic-gate 
1128*0Sstevel@tonic-gate 	/*
1129*0Sstevel@tonic-gate 	 * Recursively free last partial block.
1130*0Sstevel@tonic-gate 	 */
1131*0Sstevel@tonic-gate 	if (level > SINGLE && lastbn >= 0) {
1132*0Sstevel@tonic-gate 		last = lastbn % factor;
1133*0Sstevel@tonic-gate 		nb = bap[i];
1134*0Sstevel@tonic-gate 		if (nb != 0)
1135*0Sstevel@tonic-gate 			blocksreleased += indirtrunc(ip, nb, last, level - 1,
1136*0Sstevel@tonic-gate 				flags);
1137*0Sstevel@tonic-gate 	}
1138*0Sstevel@tonic-gate 	brelse(bp);
1139*0Sstevel@tonic-gate 	return (blocksreleased);
1140*0Sstevel@tonic-gate }
1141*0Sstevel@tonic-gate 
1142*0Sstevel@tonic-gate /*
1143*0Sstevel@tonic-gate  * Truncate the inode ip to at most length size.
1144*0Sstevel@tonic-gate  * Free affected disk blocks -- the blocks of the
1145*0Sstevel@tonic-gate  * file are removed in reverse order.
1146*0Sstevel@tonic-gate  *
1147*0Sstevel@tonic-gate  * N.B.: triple indirect blocks are untested.
1148*0Sstevel@tonic-gate  */
1149*0Sstevel@tonic-gate static int i_genrand = 1234;
1150*0Sstevel@tonic-gate int
1151*0Sstevel@tonic-gate ufs_itrunc(struct inode *oip, u_offset_t length, int flags, cred_t *cr)
1152*0Sstevel@tonic-gate {
1153*0Sstevel@tonic-gate 	struct fs *fs = oip->i_fs;
1154*0Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = oip->i_ufsvfs;
1155*0Sstevel@tonic-gate 	struct inode *ip;
1156*0Sstevel@tonic-gate 	daddr_t lastblock;
1157*0Sstevel@tonic-gate 	off_t bsize;
1158*0Sstevel@tonic-gate 	int boff;
1159*0Sstevel@tonic-gate 	daddr_t bn, lastiblock[NIADDR];
1160*0Sstevel@tonic-gate 	int level;
1161*0Sstevel@tonic-gate 	long nblocks, blocksreleased = 0;
1162*0Sstevel@tonic-gate 	int i;
1163*0Sstevel@tonic-gate 	ushort_t mode;
1164*0Sstevel@tonic-gate 	struct inode tip;
1165*0Sstevel@tonic-gate 	int err;
1166*0Sstevel@tonic-gate 	u_offset_t maxoffset = (ufsvfsp->vfs_lfflags & UFS_LARGEFILES) ?
1167*0Sstevel@tonic-gate 	    (UFS_MAXOFFSET_T) : (MAXOFF32_T);
1168*0Sstevel@tonic-gate 
1169*0Sstevel@tonic-gate 	/*
1170*0Sstevel@tonic-gate 	 * Shadow inodes do not need to hold the vfs_dqrwlock lock. Most
1171*0Sstevel@tonic-gate 	 * other uses need the reader lock. opendq() holds the writer lock.
1172*0Sstevel@tonic-gate 	 */
1173*0Sstevel@tonic-gate 	ASSERT((oip->i_mode & IFMT) == IFSHAD ||
1174*0Sstevel@tonic-gate 		RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
1175*0Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&oip->i_contents));
1176*0Sstevel@tonic-gate 	/*
1177*0Sstevel@tonic-gate 	 * We only allow truncation of regular files and directories
1178*0Sstevel@tonic-gate 	 * to arbitrary lengths here.  In addition, we allow symbolic
1179*0Sstevel@tonic-gate 	 * links to be truncated only to zero length.  Other inode
1180*0Sstevel@tonic-gate 	 * types cannot have their length set here.  Disk blocks are
1181*0Sstevel@tonic-gate 	 * being dealt with - especially device inodes where
1182*0Sstevel@tonic-gate 	 * ip->i_ordev is actually being stored in ip->i_db[0]!
1183*0Sstevel@tonic-gate 	 */
1184*0Sstevel@tonic-gate 	TRANS_INODE(ufsvfsp, oip);
1185*0Sstevel@tonic-gate 	mode = oip->i_mode & IFMT;
1186*0Sstevel@tonic-gate 	if (flags & I_FREE) {
1187*0Sstevel@tonic-gate 		i_genrand *= 16843009;  /* turns into shift and adds */
1188*0Sstevel@tonic-gate 		i_genrand++;
1189*0Sstevel@tonic-gate 		oip->i_gen += ((i_genrand + lbolt) & 0xffff) + 1;
1190*0Sstevel@tonic-gate 		oip->i_flag |= ICHG |IUPD;
1191*0Sstevel@tonic-gate 		oip->i_seq++;
1192*0Sstevel@tonic-gate 		if (length == oip->i_size)
1193*0Sstevel@tonic-gate 			return (0);
1194*0Sstevel@tonic-gate 		flags |= I_CHEAP;
1195*0Sstevel@tonic-gate 	}
1196*0Sstevel@tonic-gate 	if (mode == IFIFO)
1197*0Sstevel@tonic-gate 		return (0);
1198*0Sstevel@tonic-gate 	if (mode != IFREG && mode != IFDIR && mode != IFATTRDIR &&
1199*0Sstevel@tonic-gate 	    !(mode == IFLNK && length == (offset_t)0) && mode != IFSHAD)
1200*0Sstevel@tonic-gate 		return (EINVAL);
1201*0Sstevel@tonic-gate 	if (length > maxoffset)
1202*0Sstevel@tonic-gate 		return (EFBIG);
1203*0Sstevel@tonic-gate 	if ((mode == IFDIR) || (mode == IFATTRDIR))
1204*0Sstevel@tonic-gate 		flags |= I_DIR;
1205*0Sstevel@tonic-gate 	if (mode == IFSHAD)
1206*0Sstevel@tonic-gate 		flags |= I_SHAD;
1207*0Sstevel@tonic-gate 	if (oip == ufsvfsp->vfs_qinod)
1208*0Sstevel@tonic-gate 		flags |= I_QUOTA;
1209*0Sstevel@tonic-gate 	if (length == oip->i_size) {
1210*0Sstevel@tonic-gate 		/* update ctime and mtime to please POSIX tests */
1211*0Sstevel@tonic-gate 		oip->i_flag |= ICHG |IUPD;
1212*0Sstevel@tonic-gate 		oip->i_seq++;
1213*0Sstevel@tonic-gate 		if (length == 0) {
1214*0Sstevel@tonic-gate 			/* nothing to cache so clear the flag */
1215*0Sstevel@tonic-gate 			oip->i_flag &= ~IFASTSYMLNK;
1216*0Sstevel@tonic-gate 		}
1217*0Sstevel@tonic-gate 		return (0);
1218*0Sstevel@tonic-gate 	}
1219*0Sstevel@tonic-gate 	/* wipe out fast symlink till next access */
1220*0Sstevel@tonic-gate 	if (oip->i_flag & IFASTSYMLNK) {
1221*0Sstevel@tonic-gate 		int j;
1222*0Sstevel@tonic-gate 
1223*0Sstevel@tonic-gate 		ASSERT(ITOV(oip)->v_type == VLNK);
1224*0Sstevel@tonic-gate 
1225*0Sstevel@tonic-gate 		oip->i_flag &= ~IFASTSYMLNK;
1226*0Sstevel@tonic-gate 
1227*0Sstevel@tonic-gate 		for (j = 1; j < NDADDR; j++)
1228*0Sstevel@tonic-gate 			oip->i_db[j] = 0;
1229*0Sstevel@tonic-gate 		for (j = 0; j < NIADDR; j++)
1230*0Sstevel@tonic-gate 			oip->i_ib[j] = 0;
1231*0Sstevel@tonic-gate 	}
1232*0Sstevel@tonic-gate 
1233*0Sstevel@tonic-gate 	boff = (int)blkoff(fs, length);
1234*0Sstevel@tonic-gate 
1235*0Sstevel@tonic-gate 	if (length > oip->i_size) {
1236*0Sstevel@tonic-gate 		/*
1237*0Sstevel@tonic-gate 		 * Trunc up case.  BMAPALLOC will insure that the right blocks
1238*0Sstevel@tonic-gate 		 * are allocated.  This includes extending the old frag to a
1239*0Sstevel@tonic-gate 		 * full block (if needed) in addition to doing any work
1240*0Sstevel@tonic-gate 		 * needed for allocating the last block.
1241*0Sstevel@tonic-gate 		 */
1242*0Sstevel@tonic-gate 		if (boff == 0)
1243*0Sstevel@tonic-gate 			err = BMAPALLOC(oip, length - 1, (int)fs->fs_bsize, cr);
1244*0Sstevel@tonic-gate 		else
1245*0Sstevel@tonic-gate 			err = BMAPALLOC(oip, length - 1, boff, cr);
1246*0Sstevel@tonic-gate 
1247*0Sstevel@tonic-gate 		if (err == 0) {
1248*0Sstevel@tonic-gate 			/*
1249*0Sstevel@tonic-gate 			 * Save old size and set inode's size now
1250*0Sstevel@tonic-gate 			 * so that we don't cause too much of the
1251*0Sstevel@tonic-gate 			 * file to be zero'd and pushed.
1252*0Sstevel@tonic-gate 			 */
1253*0Sstevel@tonic-gate 			u_offset_t osize = oip->i_size;
1254*0Sstevel@tonic-gate 			oip->i_size  = length;
1255*0Sstevel@tonic-gate 			/*
1256*0Sstevel@tonic-gate 			 * Make sure we zero out the remaining bytes of
1257*0Sstevel@tonic-gate 			 * the page in case a mmap scribbled on it. We
1258*0Sstevel@tonic-gate 			 * can't prevent a mmap from writing beyond EOF
1259*0Sstevel@tonic-gate 			 * on the last page of a file.
1260*0Sstevel@tonic-gate 			 *
1261*0Sstevel@tonic-gate 			 */
1262*0Sstevel@tonic-gate 			if ((boff = (int)blkoff(fs, osize)) != 0) {
1263*0Sstevel@tonic-gate 				bsize = (int)lblkno(fs, osize - 1) >= NDADDR ?
1264*0Sstevel@tonic-gate 				    fs->fs_bsize : fragroundup(fs, boff);
1265*0Sstevel@tonic-gate 				pvn_vpzero(ITOV(oip), osize,
1266*0Sstevel@tonic-gate 				    (size_t)(bsize - boff));
1267*0Sstevel@tonic-gate 			}
1268*0Sstevel@tonic-gate 			oip->i_flag |= ICHG|IATTCHG;
1269*0Sstevel@tonic-gate 			oip->i_seq++;
1270*0Sstevel@tonic-gate 			ITIMES_NOLOCK(oip);
1271*0Sstevel@tonic-gate 			/*
1272*0Sstevel@tonic-gate 			 * MAXOFF32_T is old 2GB size limit. If
1273*0Sstevel@tonic-gate 			 * this operation caused a large file to be
1274*0Sstevel@tonic-gate 			 * created, turn on the superblock flag
1275*0Sstevel@tonic-gate 			 * and update the superblock, if the flag
1276*0Sstevel@tonic-gate 			 * is not already on.
1277*0Sstevel@tonic-gate 			 */
1278*0Sstevel@tonic-gate 			if ((length > (u_offset_t)MAXOFF32_T) &&
1279*0Sstevel@tonic-gate 			    !(fs->fs_flags & FSLARGEFILES)) {
1280*0Sstevel@tonic-gate 				ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES);
1281*0Sstevel@tonic-gate 				mutex_enter(&ufsvfsp->vfs_lock);
1282*0Sstevel@tonic-gate 				fs->fs_flags |= FSLARGEFILES;
1283*0Sstevel@tonic-gate 				ufs_sbwrite(ufsvfsp);
1284*0Sstevel@tonic-gate 				mutex_exit(&ufsvfsp->vfs_lock);
1285*0Sstevel@tonic-gate 			}
1286*0Sstevel@tonic-gate 		}
1287*0Sstevel@tonic-gate 
1288*0Sstevel@tonic-gate 		return (err);
1289*0Sstevel@tonic-gate 	}
1290*0Sstevel@tonic-gate 
1291*0Sstevel@tonic-gate 	/*
1292*0Sstevel@tonic-gate 	 * Update the pages of the file.  If the file is not being
1293*0Sstevel@tonic-gate 	 * truncated to a block boundary, the contents of the
1294*0Sstevel@tonic-gate 	 * pages following the end of the file must be zero'ed
1295*0Sstevel@tonic-gate 	 * in case it ever become accessible again because
1296*0Sstevel@tonic-gate 	 * of subsequent file growth.
1297*0Sstevel@tonic-gate 	 */
1298*0Sstevel@tonic-gate 	if (boff == 0) {
1299*0Sstevel@tonic-gate 		(void) pvn_vplist_dirty(ITOV(oip), length, ufs_putapage,
1300*0Sstevel@tonic-gate 		    B_INVAL | B_TRUNC, CRED());
1301*0Sstevel@tonic-gate 		if (length == 0)
1302*0Sstevel@tonic-gate 			ASSERT(!vn_has_cached_data(ITOV(oip)));
1303*0Sstevel@tonic-gate 	} else {
1304*0Sstevel@tonic-gate 		/*
1305*0Sstevel@tonic-gate 		 * Make sure that the last block is properly allocated.
1306*0Sstevel@tonic-gate 		 * We only really have to do this if the last block is
1307*0Sstevel@tonic-gate 		 * actually allocated since ufs_bmap will now handle the case
1308*0Sstevel@tonic-gate 		 * of an fragment which has no block allocated.  Just to
1309*0Sstevel@tonic-gate 		 * be sure, we do it now independent of current allocation.
1310*0Sstevel@tonic-gate 		 */
1311*0Sstevel@tonic-gate 		err = BMAPALLOC(oip, length - 1, boff, cr);
1312*0Sstevel@tonic-gate 		if (err)
1313*0Sstevel@tonic-gate 			return (err);
1314*0Sstevel@tonic-gate 
1315*0Sstevel@tonic-gate 		/*
1316*0Sstevel@tonic-gate 		 * BMAPALLOC will call bmap_write which defers i_seq
1317*0Sstevel@tonic-gate 		 * processing.  If the timestamps were changed, update
1318*0Sstevel@tonic-gate 		 * i_seq before rdip drops i_contents or syncs the inode.
1319*0Sstevel@tonic-gate 		 */
1320*0Sstevel@tonic-gate 		if (oip->i_flag & (ICHG|IUPD))
1321*0Sstevel@tonic-gate 			oip->i_seq++;
1322*0Sstevel@tonic-gate 
1323*0Sstevel@tonic-gate 		/*
1324*0Sstevel@tonic-gate 		 * BugId 4069932
1325*0Sstevel@tonic-gate 		 * Make sure that the relevant partial page appears in
1326*0Sstevel@tonic-gate 		 * the v_pages list, so that pvn_vpzero() will do its
1327*0Sstevel@tonic-gate 		 * job.  Since doing this correctly requires everything
1328*0Sstevel@tonic-gate 		 * in rdip() except for the uiomove(), it's easier and
1329*0Sstevel@tonic-gate 		 * safer to do the uiomove() rather than duplicate the
1330*0Sstevel@tonic-gate 		 * rest of rdip() here.
1331*0Sstevel@tonic-gate 		 *
1332*0Sstevel@tonic-gate 		 * To get here, we know that length indicates a byte
1333*0Sstevel@tonic-gate 		 * that is not the first byte of a block.  (length - 1)
1334*0Sstevel@tonic-gate 		 * is the last actual byte known to exist.  Deduction
1335*0Sstevel@tonic-gate 		 * shows it is in the same block as byte (length).
1336*0Sstevel@tonic-gate 		 * Thus, this rdip() invocation should always succeed
1337*0Sstevel@tonic-gate 		 * except in the face of i/o errors, and give us the
1338*0Sstevel@tonic-gate 		 * block we care about.
1339*0Sstevel@tonic-gate 		 *
1340*0Sstevel@tonic-gate 		 * rdip() makes the same locking assertions and
1341*0Sstevel@tonic-gate 		 * assumptions as we do.  We do not acquire any locks
1342*0Sstevel@tonic-gate 		 * before calling it, so we have not changed the locking
1343*0Sstevel@tonic-gate 		 * situation.  Finally, there do not appear to be any
1344*0Sstevel@tonic-gate 		 * paths whereby rdip() ends up invoking us again.
1345*0Sstevel@tonic-gate 		 * Thus, infinite recursion is avoided.
1346*0Sstevel@tonic-gate 		 */
1347*0Sstevel@tonic-gate 		{
1348*0Sstevel@tonic-gate 			uio_t uio;
1349*0Sstevel@tonic-gate 			iovec_t iov[1];
1350*0Sstevel@tonic-gate 			char buffer;
1351*0Sstevel@tonic-gate 
1352*0Sstevel@tonic-gate 			uio.uio_iov = iov;
1353*0Sstevel@tonic-gate 			uio.uio_iovcnt = 1;
1354*0Sstevel@tonic-gate 			uio.uio_loffset = length - 1;
1355*0Sstevel@tonic-gate 			uio.uio_resid = 1;
1356*0Sstevel@tonic-gate 			uio.uio_segflg = UIO_SYSSPACE;
1357*0Sstevel@tonic-gate 			uio.uio_extflg = UIO_COPY_CACHED;
1358*0Sstevel@tonic-gate 
1359*0Sstevel@tonic-gate 			iov[0].iov_base = &buffer;
1360*0Sstevel@tonic-gate 			iov[0].iov_len = 1;
1361*0Sstevel@tonic-gate 
1362*0Sstevel@tonic-gate 			err = rdip(oip, &uio, UIO_READ, NULL);
1363*0Sstevel@tonic-gate 			if (err)
1364*0Sstevel@tonic-gate 				return (err);
1365*0Sstevel@tonic-gate 		}
1366*0Sstevel@tonic-gate 
1367*0Sstevel@tonic-gate 		bsize = (int)lblkno(fs, length - 1) >= NDADDR ?
1368*0Sstevel@tonic-gate 		    fs->fs_bsize : fragroundup(fs, boff);
1369*0Sstevel@tonic-gate 		pvn_vpzero(ITOV(oip), length, (size_t)(bsize - boff));
1370*0Sstevel@tonic-gate 		/*
1371*0Sstevel@tonic-gate 		 * Ensure full fs block is marked as dirty.
1372*0Sstevel@tonic-gate 		 */
1373*0Sstevel@tonic-gate 		(void) pvn_vplist_dirty(ITOV(oip), length + (bsize - boff),
1374*0Sstevel@tonic-gate 		    ufs_putapage, B_INVAL | B_TRUNC, CRED());
1375*0Sstevel@tonic-gate 	}
1376*0Sstevel@tonic-gate 
1377*0Sstevel@tonic-gate 	/*
1378*0Sstevel@tonic-gate 	 * Calculate index into inode's block list of
1379*0Sstevel@tonic-gate 	 * last direct and indirect blocks (if any)
1380*0Sstevel@tonic-gate 	 * which we want to keep.  Lastblock is -1 when
1381*0Sstevel@tonic-gate 	 * the file is truncated to 0.
1382*0Sstevel@tonic-gate 	 */
1383*0Sstevel@tonic-gate 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
1384*0Sstevel@tonic-gate 	lastiblock[SINGLE] = lastblock - NDADDR;
1385*0Sstevel@tonic-gate 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
1386*0Sstevel@tonic-gate 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
1387*0Sstevel@tonic-gate 	nblocks = btodb(fs->fs_bsize);
1388*0Sstevel@tonic-gate 
1389*0Sstevel@tonic-gate 	/*
1390*0Sstevel@tonic-gate 	 * Update file and block pointers
1391*0Sstevel@tonic-gate 	 * on disk before we start freeing blocks.
1392*0Sstevel@tonic-gate 	 * If we crash before free'ing blocks below,
1393*0Sstevel@tonic-gate 	 * the blocks will be returned to the free list.
1394*0Sstevel@tonic-gate 	 * lastiblock values are also normalized to -1
1395*0Sstevel@tonic-gate 	 * for calls to indirtrunc below.
1396*0Sstevel@tonic-gate 	 */
1397*0Sstevel@tonic-gate 	tip = *oip;			/* structure copy */
1398*0Sstevel@tonic-gate 	ip = &tip;
1399*0Sstevel@tonic-gate 
1400*0Sstevel@tonic-gate 	for (level = TRIPLE; level >= SINGLE; level--)
1401*0Sstevel@tonic-gate 		if (lastiblock[level] < 0) {
1402*0Sstevel@tonic-gate 			oip->i_ib[level] = 0;
1403*0Sstevel@tonic-gate 			lastiblock[level] = -1;
1404*0Sstevel@tonic-gate 		}
1405*0Sstevel@tonic-gate 	for (i = NDADDR - 1; i > lastblock; i--) {
1406*0Sstevel@tonic-gate 		oip->i_db[i] = 0;
1407*0Sstevel@tonic-gate 		flags |= I_CHEAP;
1408*0Sstevel@tonic-gate 	}
1409*0Sstevel@tonic-gate 	oip->i_size = length;
1410*0Sstevel@tonic-gate 	oip->i_flag |= ICHG|IUPD|IATTCHG;
1411*0Sstevel@tonic-gate 	oip->i_seq++;
1412*0Sstevel@tonic-gate 	if (!TRANS_ISTRANS(ufsvfsp))
1413*0Sstevel@tonic-gate 		ufs_iupdat(oip, I_SYNC);	/* do sync inode update */
1414*0Sstevel@tonic-gate 
1415*0Sstevel@tonic-gate 	/*
1416*0Sstevel@tonic-gate 	 * Indirect blocks first.
1417*0Sstevel@tonic-gate 	 */
1418*0Sstevel@tonic-gate 	for (level = TRIPLE; level >= SINGLE; level--) {
1419*0Sstevel@tonic-gate 		bn = ip->i_ib[level];
1420*0Sstevel@tonic-gate 		if (bn != 0) {
1421*0Sstevel@tonic-gate 			blocksreleased +=
1422*0Sstevel@tonic-gate 			    indirtrunc(ip, bn, lastiblock[level], level, flags);
1423*0Sstevel@tonic-gate 			if (lastiblock[level] < 0) {
1424*0Sstevel@tonic-gate 				ip->i_ib[level] = 0;
1425*0Sstevel@tonic-gate 				free(ip, bn, (off_t)fs->fs_bsize,
1426*0Sstevel@tonic-gate 					flags | I_IBLK);
1427*0Sstevel@tonic-gate 				blocksreleased += nblocks;
1428*0Sstevel@tonic-gate 			}
1429*0Sstevel@tonic-gate 		}
1430*0Sstevel@tonic-gate 		if (lastiblock[level] >= 0)
1431*0Sstevel@tonic-gate 			goto done;
1432*0Sstevel@tonic-gate 	}
1433*0Sstevel@tonic-gate 
1434*0Sstevel@tonic-gate 	/*
1435*0Sstevel@tonic-gate 	 * All whole direct blocks or frags.
1436*0Sstevel@tonic-gate 	 */
1437*0Sstevel@tonic-gate 	for (i = NDADDR - 1; i > lastblock; i--) {
1438*0Sstevel@tonic-gate 		bn = ip->i_db[i];
1439*0Sstevel@tonic-gate 		if (bn == 0)
1440*0Sstevel@tonic-gate 			continue;
1441*0Sstevel@tonic-gate 		ip->i_db[i] = 0;
1442*0Sstevel@tonic-gate 		bsize = (off_t)blksize(fs, ip, i);
1443*0Sstevel@tonic-gate 		free(ip, bn, bsize, flags);
1444*0Sstevel@tonic-gate 		blocksreleased += btodb(bsize);
1445*0Sstevel@tonic-gate 	}
1446*0Sstevel@tonic-gate 	if (lastblock < 0)
1447*0Sstevel@tonic-gate 		goto done;
1448*0Sstevel@tonic-gate 
1449*0Sstevel@tonic-gate 	/*
1450*0Sstevel@tonic-gate 	 * Finally, look for a change in size of the
1451*0Sstevel@tonic-gate 	 * last direct block; release any frags.
1452*0Sstevel@tonic-gate 	 */
1453*0Sstevel@tonic-gate 	bn = ip->i_db[lastblock];
1454*0Sstevel@tonic-gate 	if (bn != 0) {
1455*0Sstevel@tonic-gate 		off_t oldspace, newspace;
1456*0Sstevel@tonic-gate 
1457*0Sstevel@tonic-gate 		/*
1458*0Sstevel@tonic-gate 		 * Calculate amount of space we're giving
1459*0Sstevel@tonic-gate 		 * back as old block size minus new block size.
1460*0Sstevel@tonic-gate 		 */
1461*0Sstevel@tonic-gate 		oldspace = blksize(fs, ip, lastblock);
1462*0Sstevel@tonic-gate 		UFS_SET_ISIZE(length, ip);
1463*0Sstevel@tonic-gate 		newspace = blksize(fs, ip, lastblock);
1464*0Sstevel@tonic-gate 		if (newspace == 0) {
1465*0Sstevel@tonic-gate 			err = ufs_fault(ITOV(ip), "ufs_itrunc: newspace == 0");
1466*0Sstevel@tonic-gate 			return (err);
1467*0Sstevel@tonic-gate 		}
1468*0Sstevel@tonic-gate 		if (oldspace - newspace > 0) {
1469*0Sstevel@tonic-gate 			/*
1470*0Sstevel@tonic-gate 			 * Block number of space to be free'd is
1471*0Sstevel@tonic-gate 			 * the old block # plus the number of frags
1472*0Sstevel@tonic-gate 			 * required for the storage we're keeping.
1473*0Sstevel@tonic-gate 			 */
1474*0Sstevel@tonic-gate 			bn += numfrags(fs, newspace);
1475*0Sstevel@tonic-gate 			free(ip, bn, oldspace - newspace, flags);
1476*0Sstevel@tonic-gate 			blocksreleased += btodb(oldspace - newspace);
1477*0Sstevel@tonic-gate 		}
1478*0Sstevel@tonic-gate 	}
1479*0Sstevel@tonic-gate done:
1480*0Sstevel@tonic-gate /* BEGIN PARANOIA */
1481*0Sstevel@tonic-gate 	for (level = SINGLE; level <= TRIPLE; level++)
1482*0Sstevel@tonic-gate 		if (ip->i_ib[level] != oip->i_ib[level]) {
1483*0Sstevel@tonic-gate 			err = ufs_fault(ITOV(ip), "ufs_itrunc: indirect block");
1484*0Sstevel@tonic-gate 			return (err);
1485*0Sstevel@tonic-gate 		}
1486*0Sstevel@tonic-gate 
1487*0Sstevel@tonic-gate 	for (i = 0; i < NDADDR; i++)
1488*0Sstevel@tonic-gate 		if (ip->i_db[i] != oip->i_db[i]) {
1489*0Sstevel@tonic-gate 			err = ufs_fault(ITOV(ip), "ufs_itrunc: direct block");
1490*0Sstevel@tonic-gate 			return (err);
1491*0Sstevel@tonic-gate 		}
1492*0Sstevel@tonic-gate /* END PARANOIA */
1493*0Sstevel@tonic-gate 	oip->i_blocks -= blocksreleased;
1494*0Sstevel@tonic-gate 
1495*0Sstevel@tonic-gate 	if (oip->i_blocks < 0) {		/* sanity */
1496*0Sstevel@tonic-gate 		cmn_err(CE_NOTE,
1497*0Sstevel@tonic-gate 		    "ufs_itrunc: %s/%d new size = %lld, blocks = %d\n",
1498*0Sstevel@tonic-gate 		    fs->fs_fsmnt, (int)oip->i_number, oip->i_size,
1499*0Sstevel@tonic-gate 		    (int)oip->i_blocks);
1500*0Sstevel@tonic-gate 		oip->i_blocks = 0;
1501*0Sstevel@tonic-gate 	}
1502*0Sstevel@tonic-gate 	oip->i_flag |= ICHG|IATTCHG;
1503*0Sstevel@tonic-gate 	oip->i_seq++;
1504*0Sstevel@tonic-gate 	/* blocksreleased is >= zero, so this can not fail */
1505*0Sstevel@tonic-gate 	(void) chkdq(oip, -blocksreleased, 0, cr, (char **)NULL,
1506*0Sstevel@tonic-gate 		(size_t *)NULL);
1507*0Sstevel@tonic-gate 	return (0);
1508*0Sstevel@tonic-gate }
1509*0Sstevel@tonic-gate 
1510*0Sstevel@tonic-gate /*
1511*0Sstevel@tonic-gate  * Check mode permission on inode.  Mode is READ, WRITE or EXEC.
1512*0Sstevel@tonic-gate  * In the case of WRITE, the read-only status of the file system
1513*0Sstevel@tonic-gate  * is checked.  Depending on the calling user, the appropriate
1514*0Sstevel@tonic-gate  * mode bits are selected; privileges to override missing permission
1515*0Sstevel@tonic-gate  * bits are checked through secpolicy_vnode_access().
1516*0Sstevel@tonic-gate  */
1517*0Sstevel@tonic-gate int
1518*0Sstevel@tonic-gate ufs_iaccess(void *vip, int mode, struct cred *cr)
1519*0Sstevel@tonic-gate {
1520*0Sstevel@tonic-gate 	struct inode *ip = vip;
1521*0Sstevel@tonic-gate 	int shift = 0;
1522*0Sstevel@tonic-gate 
1523*0Sstevel@tonic-gate 	if (mode & IWRITE) {
1524*0Sstevel@tonic-gate 		/*
1525*0Sstevel@tonic-gate 		 * Disallow write attempts on read-only
1526*0Sstevel@tonic-gate 		 * file systems, unless the file is a block
1527*0Sstevel@tonic-gate 		 * or character device or a FIFO.
1528*0Sstevel@tonic-gate 		 */
1529*0Sstevel@tonic-gate 		if (ip->i_fs->fs_ronly != 0) {
1530*0Sstevel@tonic-gate 			if ((ip->i_mode & IFMT) != IFCHR &&
1531*0Sstevel@tonic-gate 			    (ip->i_mode & IFMT) != IFBLK &&
1532*0Sstevel@tonic-gate 			    (ip->i_mode & IFMT) != IFIFO) {
1533*0Sstevel@tonic-gate 				return (EROFS);
1534*0Sstevel@tonic-gate 			}
1535*0Sstevel@tonic-gate 		}
1536*0Sstevel@tonic-gate 	}
1537*0Sstevel@tonic-gate 	/*
1538*0Sstevel@tonic-gate 	 * If there is a shadow inode check for the presence of an acl,
1539*0Sstevel@tonic-gate 	 * if the acl is there use the ufs_acl_access routine to check
1540*0Sstevel@tonic-gate 	 * the acl
1541*0Sstevel@tonic-gate 	 */
1542*0Sstevel@tonic-gate 	if (ip->i_ufs_acl && ip->i_ufs_acl->aowner)
1543*0Sstevel@tonic-gate 		return (ufs_acl_access(ip, mode, cr));
1544*0Sstevel@tonic-gate 
1545*0Sstevel@tonic-gate 	/*
1546*0Sstevel@tonic-gate 	 * Access check is based on only
1547*0Sstevel@tonic-gate 	 * one of owner, group, public.
1548*0Sstevel@tonic-gate 	 * If not owner, then check group.
1549*0Sstevel@tonic-gate 	 * If not a member of the group, then
1550*0Sstevel@tonic-gate 	 * check public access.
1551*0Sstevel@tonic-gate 	 */
1552*0Sstevel@tonic-gate 	if (crgetuid(cr) != ip->i_uid) {
1553*0Sstevel@tonic-gate 		shift += 3;
1554*0Sstevel@tonic-gate 		if (!groupmember((uid_t)ip->i_gid, cr))
1555*0Sstevel@tonic-gate 			shift += 3;
1556*0Sstevel@tonic-gate 	}
1557*0Sstevel@tonic-gate 
1558*0Sstevel@tonic-gate 	mode &= ~(ip->i_mode << shift);
1559*0Sstevel@tonic-gate 
1560*0Sstevel@tonic-gate 	if (mode == 0)
1561*0Sstevel@tonic-gate 		return (0);
1562*0Sstevel@tonic-gate 
1563*0Sstevel@tonic-gate 	/* test missing privilege bits */
1564*0Sstevel@tonic-gate 	return (secpolicy_vnode_access(cr, ITOV(ip), ip->i_uid, mode));
1565*0Sstevel@tonic-gate }
1566*0Sstevel@tonic-gate 
1567*0Sstevel@tonic-gate /*
1568*0Sstevel@tonic-gate  * if necessary, remove an inode from the free list
1569*0Sstevel@tonic-gate  *	i_contents is held except at unmount
1570*0Sstevel@tonic-gate  *
1571*0Sstevel@tonic-gate  * Return 1 if the inode is taken off of the ufs_idle_q,
1572*0Sstevel@tonic-gate  * and the caller is expected to call VN_RELE.
1573*0Sstevel@tonic-gate  *
1574*0Sstevel@tonic-gate  * Return 0 otherwise.
1575*0Sstevel@tonic-gate  */
1576*0Sstevel@tonic-gate int
1577*0Sstevel@tonic-gate ufs_rmidle(struct inode *ip)
1578*0Sstevel@tonic-gate {
1579*0Sstevel@tonic-gate 	int rval = 0;
1580*0Sstevel@tonic-gate 
1581*0Sstevel@tonic-gate 	mutex_enter(&ip->i_tlock);
1582*0Sstevel@tonic-gate 	if ((ip->i_flag & IREF) == 0) {
1583*0Sstevel@tonic-gate 		mutex_enter(&ufs_idle_q.uq_mutex);
1584*0Sstevel@tonic-gate 		ip->i_freef->i_freeb = ip->i_freeb;
1585*0Sstevel@tonic-gate 		ip->i_freeb->i_freef = ip->i_freef;
1586*0Sstevel@tonic-gate 		ip->i_freef = ip;
1587*0Sstevel@tonic-gate 		ip->i_freeb = ip;
1588*0Sstevel@tonic-gate 		ip->i_flag |= IREF;
1589*0Sstevel@tonic-gate 		ufs_idle_q.uq_ne--;
1590*0Sstevel@tonic-gate 		if (ip->i_flag & IJUNKIQ) {
1591*0Sstevel@tonic-gate 			ufs_njunk_iq--;
1592*0Sstevel@tonic-gate 			ip->i_flag &= ~IJUNKIQ;
1593*0Sstevel@tonic-gate 		} else {
1594*0Sstevel@tonic-gate 			ufs_nuseful_iq--;
1595*0Sstevel@tonic-gate 		}
1596*0Sstevel@tonic-gate 		mutex_exit(&ufs_idle_q.uq_mutex);
1597*0Sstevel@tonic-gate 		rval = 1;
1598*0Sstevel@tonic-gate 	}
1599*0Sstevel@tonic-gate 	mutex_exit(&ip->i_tlock);
1600*0Sstevel@tonic-gate 	return (rval);
1601*0Sstevel@tonic-gate }
1602*0Sstevel@tonic-gate 
1603*0Sstevel@tonic-gate /*
1604*0Sstevel@tonic-gate  * scan the hash of inodes and call func with the inode locked
1605*0Sstevel@tonic-gate  */
1606*0Sstevel@tonic-gate int
1607*0Sstevel@tonic-gate ufs_scan_inodes(int rwtry, int (*func)(struct inode *, void *), void *arg,
1608*0Sstevel@tonic-gate 		struct ufsvfs *ufsvfsp)
1609*0Sstevel@tonic-gate {
1610*0Sstevel@tonic-gate 	struct inode		*ip;		/* current inode */
1611*0Sstevel@tonic-gate 	struct inode		*lip = NULL;	/* last/previous inode */
1612*0Sstevel@tonic-gate 	union ihead		*ih;		/* current hash chain */
1613*0Sstevel@tonic-gate 	int			error, i;
1614*0Sstevel@tonic-gate 	int			saverror = 0;
1615*0Sstevel@tonic-gate 	int			lip_held;	/* lip needs a VN_RELE() */
1616*0Sstevel@tonic-gate 
1617*0Sstevel@tonic-gate 	/*
1618*0Sstevel@tonic-gate 	 * If ufsvfsp is NULL, then our caller should be holding
1619*0Sstevel@tonic-gate 	 * ufs_scan_lock to avoid conflicts between ufs_unmount() and
1620*0Sstevel@tonic-gate 	 * ufs_update().  Otherwise, to avoid false-positives in
1621*0Sstevel@tonic-gate 	 * ufs_unmount()'s v_count-based EBUSY check, we only hold
1622*0Sstevel@tonic-gate 	 * those inodes that are in the file system our caller cares
1623*0Sstevel@tonic-gate 	 * about.
1624*0Sstevel@tonic-gate 	 *
1625*0Sstevel@tonic-gate 	 * We know that ip is a valid inode in the hash chain (and thus
1626*0Sstevel@tonic-gate 	 * we can trust i_ufsvfs) because the inode we chained from
1627*0Sstevel@tonic-gate 	 * (lip) is still in the hash chain.  This is true because either:
1628*0Sstevel@tonic-gate 	 *
1629*0Sstevel@tonic-gate 	 * 1. We did not drop the hash chain lock since the last
1630*0Sstevel@tonic-gate 	 *    iteration (because we were not interested in the last inode),
1631*0Sstevel@tonic-gate 	 * or
1632*0Sstevel@tonic-gate 	 * 2. We maintained a hold on the last inode while we
1633*0Sstevel@tonic-gate 	 *    we were processing it, so it could not be removed
1634*0Sstevel@tonic-gate 	 *    from the hash chain.
1635*0Sstevel@tonic-gate 	 *
1636*0Sstevel@tonic-gate 	 * The whole reason we're dropping and re-grabbing the chain
1637*0Sstevel@tonic-gate 	 * lock on every inode is so that we don't present a major
1638*0Sstevel@tonic-gate 	 * choke point on throughput, particularly when we've been
1639*0Sstevel@tonic-gate 	 * called on behalf of fsflush.
1640*0Sstevel@tonic-gate 	 */
1641*0Sstevel@tonic-gate 
1642*0Sstevel@tonic-gate 	for (i = 0, ih = ihead; i < inohsz; i++, ih++) {
1643*0Sstevel@tonic-gate 		mutex_enter(&ih_lock[i]);
1644*0Sstevel@tonic-gate 		for (ip = ih->ih_chain[0], lip_held = 0;
1645*0Sstevel@tonic-gate 		    ip != (struct inode *)ih;
1646*0Sstevel@tonic-gate 		    ip = lip->i_forw) {
1647*0Sstevel@tonic-gate 
1648*0Sstevel@tonic-gate 			ins.in_scan.value.ul++;
1649*0Sstevel@tonic-gate 
1650*0Sstevel@tonic-gate 			/*
1651*0Sstevel@tonic-gate 			 * Undo the previous iteration's VN_HOLD(), but
1652*0Sstevel@tonic-gate 			 * only if one was done.
1653*0Sstevel@tonic-gate 			 */
1654*0Sstevel@tonic-gate 			if (lip_held)
1655*0Sstevel@tonic-gate 				VN_RELE(ITOV(lip));
1656*0Sstevel@tonic-gate 
1657*0Sstevel@tonic-gate 			lip = ip;
1658*0Sstevel@tonic-gate 			if (ufsvfsp != NULL && ip->i_ufsvfs != ufsvfsp) {
1659*0Sstevel@tonic-gate 				/*
1660*0Sstevel@tonic-gate 				 * We're not processing all inodes, and
1661*0Sstevel@tonic-gate 				 * this inode is not in the filesystem of
1662*0Sstevel@tonic-gate 				 * interest, so skip it.  No need to do a
1663*0Sstevel@tonic-gate 				 * VN_HOLD() since we're not dropping the
1664*0Sstevel@tonic-gate 				 * hash chain lock until after we've
1665*0Sstevel@tonic-gate 				 * done the i_forw traversal above.
1666*0Sstevel@tonic-gate 				 */
1667*0Sstevel@tonic-gate 				lip_held = 0;
1668*0Sstevel@tonic-gate 				continue;
1669*0Sstevel@tonic-gate 			}
1670*0Sstevel@tonic-gate 			VN_HOLD(ITOV(ip));
1671*0Sstevel@tonic-gate 			lip_held = 1;
1672*0Sstevel@tonic-gate 			mutex_exit(&ih_lock[i]);
1673*0Sstevel@tonic-gate 
1674*0Sstevel@tonic-gate 			/*
1675*0Sstevel@tonic-gate 			 * Acquire the contents lock as writer to make
1676*0Sstevel@tonic-gate 			 * sure that the inode has been initialized in
1677*0Sstevel@tonic-gate 			 * the cache or removed from the idle list by
1678*0Sstevel@tonic-gate 			 * ufs_iget().  This works because ufs_iget()
1679*0Sstevel@tonic-gate 			 * acquires the contents lock before putting
1680*0Sstevel@tonic-gate 			 * the inode into the cache.  If we can lock
1681*0Sstevel@tonic-gate 			 * it, then he's done with it.
1682*0Sstevel@tonic-gate 			 */
1683*0Sstevel@tonic-gate 
1684*0Sstevel@tonic-gate 			if (rwtry) {
1685*0Sstevel@tonic-gate 				if (!rw_tryenter(&ip->i_contents, RW_WRITER)) {
1686*0Sstevel@tonic-gate 					mutex_enter(&ih_lock[i]);
1687*0Sstevel@tonic-gate 					continue;
1688*0Sstevel@tonic-gate 				}
1689*0Sstevel@tonic-gate 			} else {
1690*0Sstevel@tonic-gate 				rw_enter(&ip->i_contents, RW_WRITER);
1691*0Sstevel@tonic-gate 			}
1692*0Sstevel@tonic-gate 
1693*0Sstevel@tonic-gate 			rw_exit(&ip->i_contents);
1694*0Sstevel@tonic-gate 
1695*0Sstevel@tonic-gate 			/*
1696*0Sstevel@tonic-gate 			 * ISTALE means the inode couldn't be read
1697*0Sstevel@tonic-gate 			 *
1698*0Sstevel@tonic-gate 			 * We don't have to hold the i_contents lock
1699*0Sstevel@tonic-gate 			 * for this check for a couple of
1700*0Sstevel@tonic-gate 			 * reasons. First, if ISTALE is set then the
1701*0Sstevel@tonic-gate 			 * flag cannot be cleared until the inode is
1702*0Sstevel@tonic-gate 			 * removed from the cache and that cannot
1703*0Sstevel@tonic-gate 			 * happen until after we VN_RELE() it.
1704*0Sstevel@tonic-gate 			 * Second, if ISTALE is not set, then the
1705*0Sstevel@tonic-gate 			 * inode is in the cache and does not need to
1706*0Sstevel@tonic-gate 			 * be read from disk so ISTALE cannot be set
1707*0Sstevel@tonic-gate 			 * while we are not looking.
1708*0Sstevel@tonic-gate 			 */
1709*0Sstevel@tonic-gate 			if ((ip->i_flag & ISTALE) == 0) {
1710*0Sstevel@tonic-gate 				if ((error = (*func)(ip, arg)) != 0)
1711*0Sstevel@tonic-gate 					saverror = error;
1712*0Sstevel@tonic-gate 			}
1713*0Sstevel@tonic-gate 
1714*0Sstevel@tonic-gate 			mutex_enter(&ih_lock[i]);
1715*0Sstevel@tonic-gate 		}
1716*0Sstevel@tonic-gate 		if (lip_held)
1717*0Sstevel@tonic-gate 			VN_RELE(ITOV(lip));
1718*0Sstevel@tonic-gate 		mutex_exit(&ih_lock[i]);
1719*0Sstevel@tonic-gate 	}
1720*0Sstevel@tonic-gate 	return (saverror);
1721*0Sstevel@tonic-gate }
1722*0Sstevel@tonic-gate 
1723*0Sstevel@tonic-gate /*
1724*0Sstevel@tonic-gate  * Mark inode with the current time, plus a unique increment.
1725*0Sstevel@tonic-gate  *
1726*0Sstevel@tonic-gate  * Since we only keep 32-bit time on disk, if UFS is still alive
1727*0Sstevel@tonic-gate  * beyond 2038, filesystem times will simply stick at the last
1728*0Sstevel@tonic-gate  * possible second of 32-bit time. Not ideal, but probably better
1729*0Sstevel@tonic-gate  * than going into the remote past, or confusing applications with
1730*0Sstevel@tonic-gate  * negative time.
1731*0Sstevel@tonic-gate  */
1732*0Sstevel@tonic-gate void
1733*0Sstevel@tonic-gate ufs_imark(struct inode *ip)
1734*0Sstevel@tonic-gate {
1735*0Sstevel@tonic-gate 	timestruc_t now;
1736*0Sstevel@tonic-gate 	int32_t usec, nsec;
1737*0Sstevel@tonic-gate 
1738*0Sstevel@tonic-gate 	/*
1739*0Sstevel@tonic-gate 	 * The update of i_seq may have been deferred, increase i_seq here
1740*0Sstevel@tonic-gate 	 * to make sure it is in sync with the timestamps.
1741*0Sstevel@tonic-gate 	 */
1742*0Sstevel@tonic-gate 	if (ip->i_flag & ISEQ) {
1743*0Sstevel@tonic-gate 		ASSERT(ip->i_flag & (IUPD|ICHG));
1744*0Sstevel@tonic-gate 		ip->i_seq++;
1745*0Sstevel@tonic-gate 		ip->i_flag &= ~ISEQ;
1746*0Sstevel@tonic-gate 	}
1747*0Sstevel@tonic-gate 
1748*0Sstevel@tonic-gate 	gethrestime(&now);
1749*0Sstevel@tonic-gate 
1750*0Sstevel@tonic-gate 	/*
1751*0Sstevel@tonic-gate 	 * Fast algorithm to convert nsec to usec -- see hrt2ts()
1752*0Sstevel@tonic-gate 	 * in common/os/timers.c for a full description.
1753*0Sstevel@tonic-gate 	 */
1754*0Sstevel@tonic-gate 	nsec = now.tv_nsec;
1755*0Sstevel@tonic-gate 	usec = nsec + (nsec >> 2);
1756*0Sstevel@tonic-gate 	usec = nsec + (usec >> 1);
1757*0Sstevel@tonic-gate 	usec = nsec + (usec >> 2);
1758*0Sstevel@tonic-gate 	usec = nsec + (usec >> 4);
1759*0Sstevel@tonic-gate 	usec = nsec - (usec >> 3);
1760*0Sstevel@tonic-gate 	usec = nsec + (usec >> 2);
1761*0Sstevel@tonic-gate 	usec = nsec + (usec >> 3);
1762*0Sstevel@tonic-gate 	usec = nsec + (usec >> 4);
1763*0Sstevel@tonic-gate 	usec = nsec + (usec >> 1);
1764*0Sstevel@tonic-gate 	usec = nsec + (usec >> 6);
1765*0Sstevel@tonic-gate 	usec = usec >> 10;
1766*0Sstevel@tonic-gate 
1767*0Sstevel@tonic-gate 	mutex_enter(&ufs_iuniqtime_lock);
1768*0Sstevel@tonic-gate 	if (now.tv_sec > (time_t)iuniqtime.tv_sec ||
1769*0Sstevel@tonic-gate 	    usec > iuniqtime.tv_usec) {
1770*0Sstevel@tonic-gate 		if (now.tv_sec < TIME32_MAX) {
1771*0Sstevel@tonic-gate 			iuniqtime.tv_sec = (time32_t)now.tv_sec;
1772*0Sstevel@tonic-gate 			iuniqtime.tv_usec = usec;
1773*0Sstevel@tonic-gate 		}
1774*0Sstevel@tonic-gate 	} else {
1775*0Sstevel@tonic-gate 		if (iuniqtime.tv_sec < TIME32_MAX) {
1776*0Sstevel@tonic-gate 			iuniqtime.tv_usec++;
1777*0Sstevel@tonic-gate 			/* Check for usec overflow */
1778*0Sstevel@tonic-gate 			if (iuniqtime.tv_usec >= MICROSEC) {
1779*0Sstevel@tonic-gate 				iuniqtime.tv_sec++;
1780*0Sstevel@tonic-gate 				iuniqtime.tv_usec = 0;
1781*0Sstevel@tonic-gate 			}
1782*0Sstevel@tonic-gate 		}
1783*0Sstevel@tonic-gate 	}
1784*0Sstevel@tonic-gate 
1785*0Sstevel@tonic-gate 	if ((ip->i_flag & IACC) && !(ip->i_ufsvfs->vfs_noatime)) {
1786*0Sstevel@tonic-gate 		ip->i_atime = iuniqtime;
1787*0Sstevel@tonic-gate 	}
1788*0Sstevel@tonic-gate 	if (ip->i_flag & IUPD) {
1789*0Sstevel@tonic-gate 		ip->i_mtime = iuniqtime;
1790*0Sstevel@tonic-gate 		ip->i_flag |= IMODTIME;
1791*0Sstevel@tonic-gate 	}
1792*0Sstevel@tonic-gate 	if (ip->i_flag & ICHG) {
1793*0Sstevel@tonic-gate 		ip->i_diroff = 0;
1794*0Sstevel@tonic-gate 		ip->i_ctime = iuniqtime;
1795*0Sstevel@tonic-gate 	}
1796*0Sstevel@tonic-gate 	mutex_exit(&ufs_iuniqtime_lock);
1797*0Sstevel@tonic-gate }
1798*0Sstevel@tonic-gate 
1799*0Sstevel@tonic-gate /*
1800*0Sstevel@tonic-gate  * Update timestamps in inode.
1801*0Sstevel@tonic-gate  */
1802*0Sstevel@tonic-gate void
1803*0Sstevel@tonic-gate ufs_itimes_nolock(struct inode *ip)
1804*0Sstevel@tonic-gate {
1805*0Sstevel@tonic-gate 
1806*0Sstevel@tonic-gate 	/*
1807*0Sstevel@tonic-gate 	 * if noatime is set and the inode access time is the only field that
1808*0Sstevel@tonic-gate 	 * must be changed, exit immediately.
1809*0Sstevel@tonic-gate 	 */
1810*0Sstevel@tonic-gate 	if (((ip->i_flag & (IUPD|IACC|ICHG)) == IACC) &&
1811*0Sstevel@tonic-gate 	    (ip->i_ufsvfs->vfs_noatime)) {
1812*0Sstevel@tonic-gate 		return;
1813*0Sstevel@tonic-gate 	}
1814*0Sstevel@tonic-gate 
1815*0Sstevel@tonic-gate 	if (ip->i_flag & (IUPD|IACC|ICHG)) {
1816*0Sstevel@tonic-gate 		if (ip->i_flag & ICHG)
1817*0Sstevel@tonic-gate 			ip->i_flag |= IMOD;
1818*0Sstevel@tonic-gate 		else
1819*0Sstevel@tonic-gate 			ip->i_flag |= IMODACC;
1820*0Sstevel@tonic-gate 		ufs_imark(ip);
1821*0Sstevel@tonic-gate 		ip->i_flag &= ~(IACC|IUPD|ICHG);
1822*0Sstevel@tonic-gate 	}
1823*0Sstevel@tonic-gate }
1824