10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
54662Sfrankho * Common Development and Distribution License (the "License").
64662Sfrankho * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*12273SCasper.Dik@Sun.COM * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate */
240Sstevel@tonic-gate
250Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
260Sstevel@tonic-gate /* All Rights Reserved */
270Sstevel@tonic-gate
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988
300Sstevel@tonic-gate * The Regents of the University of California
310Sstevel@tonic-gate * All Rights Reserved
320Sstevel@tonic-gate *
330Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from
340Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its
350Sstevel@tonic-gate * contributors.
360Sstevel@tonic-gate */
370Sstevel@tonic-gate
380Sstevel@tonic-gate #include <sys/types.h>
390Sstevel@tonic-gate #include <sys/t_lock.h>
400Sstevel@tonic-gate #include <sys/param.h>
410Sstevel@tonic-gate #include <sys/systm.h>
420Sstevel@tonic-gate #include <sys/uio.h>
430Sstevel@tonic-gate #include <sys/bitmap.h>
440Sstevel@tonic-gate #include <sys/signal.h>
450Sstevel@tonic-gate #include <sys/cred.h>
460Sstevel@tonic-gate #include <sys/user.h>
470Sstevel@tonic-gate #include <sys/vfs.h>
480Sstevel@tonic-gate #include <sys/stat.h>
490Sstevel@tonic-gate #include <sys/vnode.h>
500Sstevel@tonic-gate #include <sys/buf.h>
510Sstevel@tonic-gate #include <sys/proc.h>
520Sstevel@tonic-gate #include <sys/disp.h>
530Sstevel@tonic-gate #include <sys/dnlc.h>
540Sstevel@tonic-gate #include <sys/mode.h>
550Sstevel@tonic-gate #include <sys/cmn_err.h>
560Sstevel@tonic-gate #include <sys/kstat.h>
570Sstevel@tonic-gate #include <sys/acl.h>
580Sstevel@tonic-gate #include <sys/var.h>
590Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
600Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
610Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
620Sstevel@tonic-gate #include <sys/fs/ufs_acl.h>
630Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
640Sstevel@tonic-gate #include <sys/fs/ufs_quota.h>
650Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
660Sstevel@tonic-gate #include <vm/hat.h>
670Sstevel@tonic-gate #include <vm/as.h>
680Sstevel@tonic-gate #include <vm/pvn.h>
690Sstevel@tonic-gate #include <vm/seg.h>
700Sstevel@tonic-gate #include <sys/swap.h>
710Sstevel@tonic-gate #include <sys/cpuvar.h>
720Sstevel@tonic-gate #include <sys/sysmacros.h>
730Sstevel@tonic-gate #include <sys/errno.h>
740Sstevel@tonic-gate #include <sys/kmem.h>
750Sstevel@tonic-gate #include <sys/debug.h>
760Sstevel@tonic-gate #include <fs/fs_subr.h>
770Sstevel@tonic-gate #include <sys/policy.h>
780Sstevel@tonic-gate
790Sstevel@tonic-gate struct kmem_cache *inode_cache; /* cache of free inodes */
800Sstevel@tonic-gate
810Sstevel@tonic-gate /* UFS Inode Cache Stats -- Not protected */
820Sstevel@tonic-gate struct instats ins = {
830Sstevel@tonic-gate { "size", KSTAT_DATA_ULONG },
840Sstevel@tonic-gate { "maxsize", KSTAT_DATA_ULONG },
850Sstevel@tonic-gate { "hits", KSTAT_DATA_ULONG },
860Sstevel@tonic-gate { "misses", KSTAT_DATA_ULONG },
870Sstevel@tonic-gate { "kmem allocs", KSTAT_DATA_ULONG },
880Sstevel@tonic-gate { "kmem frees", KSTAT_DATA_ULONG },
890Sstevel@tonic-gate { "maxsize reached", KSTAT_DATA_ULONG },
900Sstevel@tonic-gate { "puts at frontlist", KSTAT_DATA_ULONG },
910Sstevel@tonic-gate { "puts at backlist", KSTAT_DATA_ULONG },
920Sstevel@tonic-gate { "queues to free", KSTAT_DATA_ULONG },
930Sstevel@tonic-gate { "scans", KSTAT_DATA_ULONG },
940Sstevel@tonic-gate { "thread idles", KSTAT_DATA_ULONG },
950Sstevel@tonic-gate { "lookup idles", KSTAT_DATA_ULONG },
960Sstevel@tonic-gate { "vget idles", KSTAT_DATA_ULONG },
970Sstevel@tonic-gate { "cache allocs", KSTAT_DATA_ULONG },
980Sstevel@tonic-gate { "cache frees", KSTAT_DATA_ULONG },
990Sstevel@tonic-gate { "pushes at close", KSTAT_DATA_ULONG }
1000Sstevel@tonic-gate };
1010Sstevel@tonic-gate
1020Sstevel@tonic-gate /* kstat data */
1030Sstevel@tonic-gate static kstat_t *ufs_inode_kstat = NULL;
1040Sstevel@tonic-gate
1050Sstevel@tonic-gate union ihead *ihead; /* inode LRU cache, Chris Maltby */
1060Sstevel@tonic-gate kmutex_t *ih_lock; /* protect inode cache hash table */
1070Sstevel@tonic-gate static int ino_hashlen = 4; /* desired average hash chain length */
1080Sstevel@tonic-gate int inohsz; /* number of buckets in the hash table */
1090Sstevel@tonic-gate
1100Sstevel@tonic-gate kmutex_t ufs_scan_lock; /* stop racing multiple ufs_scan_inodes() */
1110Sstevel@tonic-gate kmutex_t ufs_iuniqtime_lock; /* protect iuniqtime */
1120Sstevel@tonic-gate kmutex_t ufsvfs_mutex;
1130Sstevel@tonic-gate struct ufsvfs *oldufsvfslist, *ufsvfslist;
1140Sstevel@tonic-gate
1150Sstevel@tonic-gate /*
1160Sstevel@tonic-gate * time to wait after ufsvfsp->vfs_iotstamp before declaring that no
1170Sstevel@tonic-gate * I/Os are going on.
1180Sstevel@tonic-gate */
1190Sstevel@tonic-gate clock_t ufs_iowait;
1200Sstevel@tonic-gate
1210Sstevel@tonic-gate /*
1220Sstevel@tonic-gate * the threads that process idle inodes and free (deleted) inodes
1230Sstevel@tonic-gate * have high water marks that are set in ufsinit().
1240Sstevel@tonic-gate * These values but can be no less then the minimum shown below
1250Sstevel@tonic-gate */
1260Sstevel@tonic-gate int ufs_idle_max; /* # of allowable idle inodes */
1270Sstevel@tonic-gate ulong_t ufs_inode_max; /* hard limit of allowable idle inodes */
1280Sstevel@tonic-gate #define UFS_IDLE_MAX (16) /* min # of allowable idle inodes */
1290Sstevel@tonic-gate
1300Sstevel@tonic-gate /*
1310Sstevel@tonic-gate * Tunables for ufs write throttling.
1320Sstevel@tonic-gate * These are validated in ufs_iinit() since improper settings
1330Sstevel@tonic-gate * can lead to filesystem hangs.
1340Sstevel@tonic-gate */
1350Sstevel@tonic-gate #define UFS_HW_DEFAULT (16 * 1024 * 1024)
1360Sstevel@tonic-gate #define UFS_LW_DEFAULT (8 * 1024 * 1024)
1370Sstevel@tonic-gate int ufs_HW = UFS_HW_DEFAULT;
1380Sstevel@tonic-gate int ufs_LW = UFS_LW_DEFAULT;
1390Sstevel@tonic-gate
1400Sstevel@tonic-gate static void ihinit(void);
1410Sstevel@tonic-gate extern int hash2ints(int, int);
1420Sstevel@tonic-gate
1430Sstevel@tonic-gate static int ufs_iget_internal(struct vfs *, ino_t, struct inode **,
1440Sstevel@tonic-gate struct cred *, int);
1450Sstevel@tonic-gate
1460Sstevel@tonic-gate /* ARGSUSED */
1470Sstevel@tonic-gate static int
ufs_inode_kstat_update(kstat_t * ksp,int rw)1480Sstevel@tonic-gate ufs_inode_kstat_update(kstat_t *ksp, int rw)
1490Sstevel@tonic-gate {
1500Sstevel@tonic-gate if (rw == KSTAT_WRITE)
1510Sstevel@tonic-gate return (EACCES);
1520Sstevel@tonic-gate
1530Sstevel@tonic-gate ins.in_malloc.value.ul = (ulong_t)kmem_cache_stat(inode_cache,
1540Sstevel@tonic-gate "slab_alloc");
1550Sstevel@tonic-gate ins.in_mfree.value.ul = (ulong_t)kmem_cache_stat(inode_cache,
1560Sstevel@tonic-gate "slab_free");
1570Sstevel@tonic-gate ins.in_kcalloc.value.ul = (ulong_t)kmem_cache_stat(inode_cache,
1580Sstevel@tonic-gate "alloc");
1590Sstevel@tonic-gate ins.in_kcfree.value.ul = (ulong_t)kmem_cache_stat(inode_cache,
1600Sstevel@tonic-gate "free");
1610Sstevel@tonic-gate ins.in_size.value.ul = (ulong_t)kmem_cache_stat(inode_cache,
1620Sstevel@tonic-gate "buf_inuse");
1630Sstevel@tonic-gate ins.in_maxreached.value.ul = (ulong_t)kmem_cache_stat(inode_cache,
1640Sstevel@tonic-gate "buf_max");
1650Sstevel@tonic-gate ins.in_misses.value.ul = ins.in_kcalloc.value.ul;
1660Sstevel@tonic-gate
1670Sstevel@tonic-gate return (0);
1680Sstevel@tonic-gate }
1690Sstevel@tonic-gate
1700Sstevel@tonic-gate void
ufs_iinit(void)1710Sstevel@tonic-gate ufs_iinit(void)
1720Sstevel@tonic-gate {
1730Sstevel@tonic-gate /*
1740Sstevel@tonic-gate * Validate that ufs_HW > ufs_LW.
1750Sstevel@tonic-gate * The default values for these two tunables have been increased.
1760Sstevel@tonic-gate * There is now a range of values for ufs_HW that used to be
1770Sstevel@tonic-gate * legal on previous Solaris versions but no longer is now.
1780Sstevel@tonic-gate * Upgrading a machine which has an /etc/system setting for ufs_HW
1790Sstevel@tonic-gate * from that range can lead to filesystem hangs unless the values
1800Sstevel@tonic-gate * are checked here.
1810Sstevel@tonic-gate */
1820Sstevel@tonic-gate if (ufs_HW <= ufs_LW) {
1830Sstevel@tonic-gate cmn_err(CE_WARN,
1844662Sfrankho "ufs_HW (%d) <= ufs_LW (%d). Check /etc/system.",
1854662Sfrankho ufs_HW, ufs_LW);
1860Sstevel@tonic-gate ufs_LW = UFS_LW_DEFAULT;
1870Sstevel@tonic-gate ufs_HW = UFS_HW_DEFAULT;
1880Sstevel@tonic-gate cmn_err(CE_CONT, "using defaults, ufs_HW = %d, ufs_LW = %d\n",
1894662Sfrankho ufs_HW, ufs_LW);
1900Sstevel@tonic-gate }
1910Sstevel@tonic-gate
1920Sstevel@tonic-gate /*
1930Sstevel@tonic-gate * Adjust the tunable `ufs_ninode' to a reasonable value
1940Sstevel@tonic-gate */
1950Sstevel@tonic-gate if (ufs_ninode <= 0)
1960Sstevel@tonic-gate ufs_ninode = ncsize;
1970Sstevel@tonic-gate if (ufs_inode_max == 0)
1984662Sfrankho ufs_inode_max =
1994662Sfrankho (ulong_t)((kmem_maxavail() >> 2) / sizeof (struct inode));
2000Sstevel@tonic-gate if (ufs_ninode > ufs_inode_max || (ufs_ninode == 0 && ncsize == 0)) {
2010Sstevel@tonic-gate cmn_err(CE_NOTE, "setting ufs_ninode to max value of %ld",
2024662Sfrankho ufs_inode_max);
2030Sstevel@tonic-gate ufs_ninode = ufs_inode_max;
2040Sstevel@tonic-gate }
2050Sstevel@tonic-gate /*
2060Sstevel@tonic-gate * Wait till third call of ufs_update to declare that no I/Os are
2070Sstevel@tonic-gate * going on. This allows deferred access times to be flushed to disk.
2080Sstevel@tonic-gate */
2090Sstevel@tonic-gate ufs_iowait = v.v_autoup * hz * 2;
2100Sstevel@tonic-gate
2110Sstevel@tonic-gate /*
2120Sstevel@tonic-gate * idle thread runs when 25% of ufs_ninode entries are on the queue
2130Sstevel@tonic-gate */
2140Sstevel@tonic-gate if (ufs_idle_max == 0)
2150Sstevel@tonic-gate ufs_idle_max = ufs_ninode >> 2;
2160Sstevel@tonic-gate if (ufs_idle_max < UFS_IDLE_MAX)
2170Sstevel@tonic-gate ufs_idle_max = UFS_IDLE_MAX;
2180Sstevel@tonic-gate if (ufs_idle_max > ufs_ninode)
2190Sstevel@tonic-gate ufs_idle_max = ufs_ninode;
2200Sstevel@tonic-gate /*
2210Sstevel@tonic-gate * This is really a misnomer, it is ufs_queue_init
2220Sstevel@tonic-gate */
2230Sstevel@tonic-gate ufs_thread_init(&ufs_idle_q, ufs_idle_max);
2240Sstevel@tonic-gate ufs_thread_start(&ufs_idle_q, ufs_thread_idle, NULL);
2250Sstevel@tonic-gate
2260Sstevel@tonic-gate /*
2270Sstevel@tonic-gate * global hlock thread
2280Sstevel@tonic-gate */
2290Sstevel@tonic-gate ufs_thread_init(&ufs_hlock, 1);
2300Sstevel@tonic-gate ufs_thread_start(&ufs_hlock, ufs_thread_hlock, NULL);
2310Sstevel@tonic-gate
2320Sstevel@tonic-gate ihinit();
2330Sstevel@tonic-gate qtinit();
2340Sstevel@tonic-gate ins.in_maxsize.value.ul = ufs_ninode;
2350Sstevel@tonic-gate if ((ufs_inode_kstat = kstat_create("ufs", 0, "inode_cache", "ufs",
2360Sstevel@tonic-gate KSTAT_TYPE_NAMED, sizeof (ins) / sizeof (kstat_named_t),
2370Sstevel@tonic-gate KSTAT_FLAG_VIRTUAL)) != NULL) {
2380Sstevel@tonic-gate ufs_inode_kstat->ks_data = (void *)&ins;
2390Sstevel@tonic-gate ufs_inode_kstat->ks_update = ufs_inode_kstat_update;
2400Sstevel@tonic-gate kstat_install(ufs_inode_kstat);
2410Sstevel@tonic-gate }
2420Sstevel@tonic-gate ufsfx_init(); /* fix-on-panic initialization */
2430Sstevel@tonic-gate si_cache_init();
2440Sstevel@tonic-gate ufs_directio_init();
2450Sstevel@tonic-gate lufs_init();
2460Sstevel@tonic-gate mutex_init(&ufs_iuniqtime_lock, NULL, MUTEX_DEFAULT, NULL);
2470Sstevel@tonic-gate }
2480Sstevel@tonic-gate
2490Sstevel@tonic-gate /* ARGSUSED */
2500Sstevel@tonic-gate static int
ufs_inode_cache_constructor(void * buf,void * cdrarg,int kmflags)2510Sstevel@tonic-gate ufs_inode_cache_constructor(void *buf, void *cdrarg, int kmflags)
2520Sstevel@tonic-gate {
2530Sstevel@tonic-gate struct inode *ip = buf;
2540Sstevel@tonic-gate struct vnode *vp;
2550Sstevel@tonic-gate
2566712Stomee vp = ip->i_vnode = vn_alloc(kmflags);
2576712Stomee if (vp == NULL) {
2586712Stomee return (-1);
2596712Stomee }
2606712Stomee vn_setops(vp, ufs_vnodeops);
2616712Stomee vp->v_data = ip;
2626712Stomee
2630Sstevel@tonic-gate rw_init(&ip->i_rwlock, NULL, RW_DEFAULT, NULL);
2640Sstevel@tonic-gate rw_init(&ip->i_contents, NULL, RW_DEFAULT, NULL);
2650Sstevel@tonic-gate mutex_init(&ip->i_tlock, NULL, MUTEX_DEFAULT, NULL);
2660Sstevel@tonic-gate dnlc_dir_init(&ip->i_danchor);
2670Sstevel@tonic-gate
2680Sstevel@tonic-gate cv_init(&ip->i_wrcv, NULL, CV_DRIVER, NULL);
2690Sstevel@tonic-gate
2700Sstevel@tonic-gate return (0);
2710Sstevel@tonic-gate }
2720Sstevel@tonic-gate
2730Sstevel@tonic-gate /* ARGSUSED */
2740Sstevel@tonic-gate static void
ufs_inode_cache_destructor(void * buf,void * cdrarg)2750Sstevel@tonic-gate ufs_inode_cache_destructor(void *buf, void *cdrarg)
2760Sstevel@tonic-gate {
2770Sstevel@tonic-gate struct inode *ip = buf;
2780Sstevel@tonic-gate struct vnode *vp;
2790Sstevel@tonic-gate
2800Sstevel@tonic-gate vp = ITOV(ip);
2810Sstevel@tonic-gate
2820Sstevel@tonic-gate rw_destroy(&ip->i_rwlock);
2830Sstevel@tonic-gate rw_destroy(&ip->i_contents);
2840Sstevel@tonic-gate mutex_destroy(&ip->i_tlock);
2850Sstevel@tonic-gate if (vp->v_type == VDIR) {
2860Sstevel@tonic-gate dnlc_dir_fini(&ip->i_danchor);
2870Sstevel@tonic-gate }
2880Sstevel@tonic-gate
2890Sstevel@tonic-gate cv_destroy(&ip->i_wrcv);
2900Sstevel@tonic-gate
2910Sstevel@tonic-gate vn_free(vp);
2920Sstevel@tonic-gate }
2930Sstevel@tonic-gate
2940Sstevel@tonic-gate /*
2950Sstevel@tonic-gate * Initialize hash links for inodes
2960Sstevel@tonic-gate * and build inode free list.
2970Sstevel@tonic-gate */
2980Sstevel@tonic-gate void
ihinit(void)2990Sstevel@tonic-gate ihinit(void)
3000Sstevel@tonic-gate {
3010Sstevel@tonic-gate int i;
3020Sstevel@tonic-gate union ihead *ih = ihead;
3030Sstevel@tonic-gate
3040Sstevel@tonic-gate mutex_init(&ufs_scan_lock, NULL, MUTEX_DEFAULT, NULL);
3050Sstevel@tonic-gate
3060Sstevel@tonic-gate inohsz = 1 << highbit(ufs_ninode / ino_hashlen);
3070Sstevel@tonic-gate ihead = kmem_zalloc(inohsz * sizeof (union ihead), KM_SLEEP);
3080Sstevel@tonic-gate ih_lock = kmem_zalloc(inohsz * sizeof (kmutex_t), KM_SLEEP);
3090Sstevel@tonic-gate
3100Sstevel@tonic-gate for (i = 0, ih = ihead; i < inohsz; i++, ih++) {
3110Sstevel@tonic-gate ih->ih_head[0] = ih;
3120Sstevel@tonic-gate ih->ih_head[1] = ih;
3130Sstevel@tonic-gate mutex_init(&ih_lock[i], NULL, MUTEX_DEFAULT, NULL);
3140Sstevel@tonic-gate }
3150Sstevel@tonic-gate inode_cache = kmem_cache_create("ufs_inode_cache",
3164662Sfrankho sizeof (struct inode), 0, ufs_inode_cache_constructor,
3174662Sfrankho ufs_inode_cache_destructor, ufs_inode_cache_reclaim,
3184662Sfrankho NULL, NULL, 0);
3190Sstevel@tonic-gate }
3200Sstevel@tonic-gate
3210Sstevel@tonic-gate /*
3220Sstevel@tonic-gate * Free an inode structure
3230Sstevel@tonic-gate */
3240Sstevel@tonic-gate void
ufs_free_inode(struct inode * ip)3250Sstevel@tonic-gate ufs_free_inode(struct inode *ip)
3260Sstevel@tonic-gate {
3270Sstevel@tonic-gate vn_invalid(ITOV(ip));
3280Sstevel@tonic-gate kmem_cache_free(inode_cache, ip);
3290Sstevel@tonic-gate }
3300Sstevel@tonic-gate
3310Sstevel@tonic-gate /*
3320Sstevel@tonic-gate * Allocate an inode structure
3330Sstevel@tonic-gate */
3340Sstevel@tonic-gate struct inode *
ufs_alloc_inode(ufsvfs_t * ufsvfsp,ino_t ino)3350Sstevel@tonic-gate ufs_alloc_inode(ufsvfs_t *ufsvfsp, ino_t ino)
3360Sstevel@tonic-gate {
3370Sstevel@tonic-gate struct inode *ip;
3380Sstevel@tonic-gate vnode_t *vp;
3390Sstevel@tonic-gate
3400Sstevel@tonic-gate ip = kmem_cache_alloc(inode_cache, KM_SLEEP);
3410Sstevel@tonic-gate /*
3420Sstevel@tonic-gate * at this point we have a newly allocated inode
3430Sstevel@tonic-gate */
3440Sstevel@tonic-gate ip->i_freef = ip;
3450Sstevel@tonic-gate ip->i_freeb = ip;
3460Sstevel@tonic-gate ip->i_flag = IREF;
3470Sstevel@tonic-gate ip->i_seq = 0xFF; /* Unique initial value */
3480Sstevel@tonic-gate ip->i_dev = ufsvfsp->vfs_dev;
3490Sstevel@tonic-gate ip->i_ufsvfs = ufsvfsp;
3500Sstevel@tonic-gate ip->i_devvp = ufsvfsp->vfs_devvp;
3510Sstevel@tonic-gate ip->i_number = ino;
3520Sstevel@tonic-gate ip->i_diroff = 0;
3530Sstevel@tonic-gate ip->i_nextr = 0;
3540Sstevel@tonic-gate ip->i_map = NULL;
3550Sstevel@tonic-gate ip->i_rdev = 0;
3560Sstevel@tonic-gate ip->i_writes = 0;
3570Sstevel@tonic-gate ip->i_mode = 0;
3580Sstevel@tonic-gate ip->i_delaylen = 0;
3590Sstevel@tonic-gate ip->i_delayoff = 0;
3600Sstevel@tonic-gate ip->i_nextrio = 0;
3610Sstevel@tonic-gate ip->i_ufs_acl = NULL;
3620Sstevel@tonic-gate ip->i_cflags = 0;
3630Sstevel@tonic-gate ip->i_mapcnt = 0;
3640Sstevel@tonic-gate ip->i_dquot = NULL;
365745Svsakar ip->i_cachedir = CD_ENABLED;
3660Sstevel@tonic-gate ip->i_writer = NULL;
3670Sstevel@tonic-gate
3680Sstevel@tonic-gate /*
3690Sstevel@tonic-gate * the vnode for this inode was allocated by the constructor
3700Sstevel@tonic-gate */
3710Sstevel@tonic-gate vp = ITOV(ip);
3720Sstevel@tonic-gate vn_reinit(vp);
3730Sstevel@tonic-gate if (ino == (ino_t)UFSROOTINO)
3740Sstevel@tonic-gate vp->v_flag = VROOT;
3750Sstevel@tonic-gate vp->v_vfsp = ufsvfsp->vfs_vfs;
3760Sstevel@tonic-gate vn_exists(vp);
3770Sstevel@tonic-gate return (ip);
3780Sstevel@tonic-gate }
3790Sstevel@tonic-gate
3800Sstevel@tonic-gate /*
3810Sstevel@tonic-gate * Look up an inode by device, inumber. If it is in core (in the
3820Sstevel@tonic-gate * inode structure), honor the locking protocol. If it is not in
3830Sstevel@tonic-gate * core, read it in from the specified device after freeing any pages.
3840Sstevel@tonic-gate * In all cases, a pointer to a VN_HELD inode structure is returned.
3850Sstevel@tonic-gate */
3860Sstevel@tonic-gate int
ufs_iget(struct vfs * vfsp,ino_t ino,struct inode ** ipp,struct cred * cr)3870Sstevel@tonic-gate ufs_iget(struct vfs *vfsp, ino_t ino, struct inode **ipp, struct cred *cr)
3880Sstevel@tonic-gate {
3890Sstevel@tonic-gate return (ufs_iget_internal(vfsp, ino, ipp, cr, 0));
3900Sstevel@tonic-gate }
3910Sstevel@tonic-gate
3920Sstevel@tonic-gate /*
3930Sstevel@tonic-gate * A version of ufs_iget which returns only allocated, linked inodes.
3940Sstevel@tonic-gate * This is appropriate for any callers who do not expect a free inode.
3950Sstevel@tonic-gate */
3960Sstevel@tonic-gate int
ufs_iget_alloced(struct vfs * vfsp,ino_t ino,struct inode ** ipp,struct cred * cr)3970Sstevel@tonic-gate ufs_iget_alloced(struct vfs *vfsp, ino_t ino, struct inode **ipp,
3980Sstevel@tonic-gate struct cred *cr)
3990Sstevel@tonic-gate {
4000Sstevel@tonic-gate return (ufs_iget_internal(vfsp, ino, ipp, cr, 1));
4010Sstevel@tonic-gate }
4020Sstevel@tonic-gate
4030Sstevel@tonic-gate /*
4040Sstevel@tonic-gate * Set vnode attributes based on v_type, this should be called whenever
4050Sstevel@tonic-gate * an inode's i_mode is changed.
4060Sstevel@tonic-gate */
4070Sstevel@tonic-gate void
ufs_reset_vnode(vnode_t * vp)4080Sstevel@tonic-gate ufs_reset_vnode(vnode_t *vp)
4090Sstevel@tonic-gate {
4100Sstevel@tonic-gate /*
4110Sstevel@tonic-gate * an old DBE hack
4120Sstevel@tonic-gate */
4130Sstevel@tonic-gate if ((VTOI(vp)->i_mode & (ISVTX | IEXEC | IFDIR)) == ISVTX)
4140Sstevel@tonic-gate vp->v_flag |= VSWAPLIKE;
4150Sstevel@tonic-gate else
4160Sstevel@tonic-gate vp->v_flag &= ~VSWAPLIKE;
4170Sstevel@tonic-gate
4180Sstevel@tonic-gate /*
4190Sstevel@tonic-gate * if not swap like and it's just a regular file, we want
4200Sstevel@tonic-gate * to maintain the vnode's pages sorted by clean/modified
4210Sstevel@tonic-gate * for faster sync'ing to disk
4220Sstevel@tonic-gate */
4230Sstevel@tonic-gate if (vp->v_type == VREG)
4240Sstevel@tonic-gate vp->v_flag |= VMODSORT;
4250Sstevel@tonic-gate else
4260Sstevel@tonic-gate vp->v_flag &= ~VMODSORT;
4270Sstevel@tonic-gate
4280Sstevel@tonic-gate /*
4290Sstevel@tonic-gate * Is this an attribute hidden dir?
4300Sstevel@tonic-gate */
4310Sstevel@tonic-gate if ((VTOI(vp)->i_mode & IFMT) == IFATTRDIR)
4320Sstevel@tonic-gate vp->v_flag |= V_XATTRDIR;
4330Sstevel@tonic-gate else
4340Sstevel@tonic-gate vp->v_flag &= ~V_XATTRDIR;
4350Sstevel@tonic-gate }
4360Sstevel@tonic-gate
4370Sstevel@tonic-gate /*
4380Sstevel@tonic-gate * Shared implementation of ufs_iget and ufs_iget_alloced. The 'validate'
4390Sstevel@tonic-gate * flag is used to distinguish the two; when true, we validate that the inode
4400Sstevel@tonic-gate * being retrieved looks like a linked and allocated inode.
4410Sstevel@tonic-gate */
4420Sstevel@tonic-gate /* ARGSUSED */
4430Sstevel@tonic-gate static int
ufs_iget_internal(struct vfs * vfsp,ino_t ino,struct inode ** ipp,struct cred * cr,int validate)4440Sstevel@tonic-gate ufs_iget_internal(struct vfs *vfsp, ino_t ino, struct inode **ipp,
4450Sstevel@tonic-gate struct cred *cr, int validate)
4460Sstevel@tonic-gate {
4470Sstevel@tonic-gate struct inode *ip, *sp;
4480Sstevel@tonic-gate union ihead *ih;
4490Sstevel@tonic-gate kmutex_t *ihm;
4500Sstevel@tonic-gate struct buf *bp;
4510Sstevel@tonic-gate struct dinode *dp;
4520Sstevel@tonic-gate struct vnode *vp;
4530Sstevel@tonic-gate extern vfs_t EIO_vfs;
4540Sstevel@tonic-gate int error;
4550Sstevel@tonic-gate int ftype; /* XXX - Remove later on */
4560Sstevel@tonic-gate dev_t vfs_dev;
4570Sstevel@tonic-gate struct ufsvfs *ufsvfsp;
4580Sstevel@tonic-gate struct fs *fs;
4590Sstevel@tonic-gate int hno;
4600Sstevel@tonic-gate daddr_t bno;
4610Sstevel@tonic-gate ulong_t ioff;
4620Sstevel@tonic-gate
4630Sstevel@tonic-gate CPU_STATS_ADD_K(sys, ufsiget, 1);
4640Sstevel@tonic-gate
4650Sstevel@tonic-gate /*
4660Sstevel@tonic-gate * Lookup inode in cache.
4670Sstevel@tonic-gate */
4680Sstevel@tonic-gate vfs_dev = vfsp->vfs_dev;
4690Sstevel@tonic-gate hno = INOHASH(ino);
4700Sstevel@tonic-gate ih = &ihead[hno];
4710Sstevel@tonic-gate ihm = &ih_lock[hno];
4720Sstevel@tonic-gate
4730Sstevel@tonic-gate again:
4740Sstevel@tonic-gate mutex_enter(ihm);
4750Sstevel@tonic-gate for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) {
4760Sstevel@tonic-gate if (ino != ip->i_number || vfs_dev != ip->i_dev ||
4770Sstevel@tonic-gate (ip->i_flag & ISTALE))
4780Sstevel@tonic-gate continue;
4790Sstevel@tonic-gate
4800Sstevel@tonic-gate /*
4810Sstevel@tonic-gate * Found the interesting inode; hold it and drop the cache lock
4820Sstevel@tonic-gate */
4830Sstevel@tonic-gate vp = ITOV(ip); /* for locknest */
4840Sstevel@tonic-gate VN_HOLD(vp);
4850Sstevel@tonic-gate mutex_exit(ihm);
4860Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER);
4870Sstevel@tonic-gate
4880Sstevel@tonic-gate /*
4890Sstevel@tonic-gate * if necessary, remove from idle list
4900Sstevel@tonic-gate */
4910Sstevel@tonic-gate if ((ip->i_flag & IREF) == 0) {
4920Sstevel@tonic-gate if (ufs_rmidle(ip))
4930Sstevel@tonic-gate VN_RELE(vp);
4940Sstevel@tonic-gate }
4950Sstevel@tonic-gate
4960Sstevel@tonic-gate /*
4970Sstevel@tonic-gate * Could the inode be read from disk?
4980Sstevel@tonic-gate */
4990Sstevel@tonic-gate if (ip->i_flag & ISTALE) {
5000Sstevel@tonic-gate rw_exit(&ip->i_contents);
5010Sstevel@tonic-gate VN_RELE(vp);
5020Sstevel@tonic-gate goto again;
5030Sstevel@tonic-gate }
5040Sstevel@tonic-gate
5050Sstevel@tonic-gate ins.in_hits.value.ul++;
5060Sstevel@tonic-gate *ipp = ip;
5070Sstevel@tonic-gate
5080Sstevel@tonic-gate /*
5090Sstevel@tonic-gate * Reset the vnode's attribute flags
5100Sstevel@tonic-gate */
5110Sstevel@tonic-gate mutex_enter(&vp->v_lock);
5120Sstevel@tonic-gate ufs_reset_vnode(vp);
5130Sstevel@tonic-gate mutex_exit(&vp->v_lock);
5140Sstevel@tonic-gate
5150Sstevel@tonic-gate rw_exit(&ip->i_contents);
5160Sstevel@tonic-gate
5170Sstevel@tonic-gate return (0);
5180Sstevel@tonic-gate }
5190Sstevel@tonic-gate mutex_exit(ihm);
5200Sstevel@tonic-gate
5210Sstevel@tonic-gate /*
5220Sstevel@tonic-gate * Inode was not in cache.
5230Sstevel@tonic-gate *
5240Sstevel@tonic-gate * Allocate a new entry
5250Sstevel@tonic-gate */
5260Sstevel@tonic-gate ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
5270Sstevel@tonic-gate fs = ufsvfsp->vfs_fs;
5280Sstevel@tonic-gate
5290Sstevel@tonic-gate ip = ufs_alloc_inode(ufsvfsp, ino);
5300Sstevel@tonic-gate vp = ITOV(ip);
5310Sstevel@tonic-gate
5320Sstevel@tonic-gate bno = fsbtodb(fs, itod(fs, ino));
5330Sstevel@tonic-gate ioff = (sizeof (struct dinode)) * (itoo(fs, ino));
5340Sstevel@tonic-gate ip->i_doff = (offset_t)ioff + ldbtob(bno);
5350Sstevel@tonic-gate
5360Sstevel@tonic-gate /*
5370Sstevel@tonic-gate * put a place holder in the cache (if not already there)
5380Sstevel@tonic-gate */
5390Sstevel@tonic-gate mutex_enter(ihm);
5400Sstevel@tonic-gate for (sp = ih->ih_chain[0]; sp != (struct inode *)ih; sp = sp->i_forw)
5410Sstevel@tonic-gate if (ino == sp->i_number && vfs_dev == sp->i_dev &&
5420Sstevel@tonic-gate ((sp->i_flag & ISTALE) == 0)) {
5430Sstevel@tonic-gate mutex_exit(ihm);
5440Sstevel@tonic-gate ufs_free_inode(ip);
5450Sstevel@tonic-gate goto again;
5460Sstevel@tonic-gate }
5470Sstevel@tonic-gate /*
5480Sstevel@tonic-gate * It would be nice to ASSERT(RW_READ_HELD(&ufsvfsp->vfs_dqrwlock))
5490Sstevel@tonic-gate * here, but if we do, then shadow inode allocations panic the
5500Sstevel@tonic-gate * system. We don't have to hold vfs_dqrwlock for shadow inodes
5510Sstevel@tonic-gate * and the ufs_iget() parameters don't tell us what we are getting
5520Sstevel@tonic-gate * so we have no way of knowing this is a ufs_iget() call from
5530Sstevel@tonic-gate * a ufs_ialloc() call for a shadow inode.
5540Sstevel@tonic-gate */
5550Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER);
5560Sstevel@tonic-gate insque(ip, ih);
5570Sstevel@tonic-gate mutex_exit(ihm);
5580Sstevel@tonic-gate /*
5590Sstevel@tonic-gate * read the dinode
5600Sstevel@tonic-gate */
5610Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, bno, (int)fs->fs_bsize);
5620Sstevel@tonic-gate
5630Sstevel@tonic-gate /*
5640Sstevel@tonic-gate * Check I/O errors
5650Sstevel@tonic-gate */
5660Sstevel@tonic-gate error = ((bp->b_flags & B_ERROR) ? geterror(bp) : 0);
5670Sstevel@tonic-gate if (error) {
5680Sstevel@tonic-gate brelse(bp);
5690Sstevel@tonic-gate ip->i_flag |= ISTALE; /* in case someone is looking it up */
5700Sstevel@tonic-gate rw_exit(&ip->i_contents);
5710Sstevel@tonic-gate vp->v_vfsp = &EIO_vfs;
5720Sstevel@tonic-gate VN_RELE(vp);
5730Sstevel@tonic-gate return (error);
5740Sstevel@tonic-gate }
5750Sstevel@tonic-gate /*
5760Sstevel@tonic-gate * initialize the inode's dinode
5770Sstevel@tonic-gate */
5780Sstevel@tonic-gate dp = (struct dinode *)(ioff + bp->b_un.b_addr);
5790Sstevel@tonic-gate ip->i_ic = dp->di_ic; /* structure assignment */
5800Sstevel@tonic-gate brelse(bp);
5810Sstevel@tonic-gate
5820Sstevel@tonic-gate /*
5830Sstevel@tonic-gate * Maintain compatibility with Solaris 1.x UFS
5840Sstevel@tonic-gate */
5850Sstevel@tonic-gate if (ip->i_suid != UID_LONG)
5860Sstevel@tonic-gate ip->i_uid = ip->i_suid;
5870Sstevel@tonic-gate if (ip->i_sgid != GID_LONG)
5880Sstevel@tonic-gate ip->i_gid = ip->i_sgid;
5890Sstevel@tonic-gate
5900Sstevel@tonic-gate ftype = ip->i_mode & IFMT;
5910Sstevel@tonic-gate if (ftype == IFBLK || ftype == IFCHR) {
5920Sstevel@tonic-gate dev_t dv;
5930Sstevel@tonic-gate uint_t top16 = ip->i_ordev & 0xffff0000u;
5940Sstevel@tonic-gate
5950Sstevel@tonic-gate if (top16 == 0 || top16 == 0xffff0000u)
5960Sstevel@tonic-gate dv = expdev(ip->i_ordev);
5970Sstevel@tonic-gate else
5980Sstevel@tonic-gate dv = expldev(ip->i_ordev);
5990Sstevel@tonic-gate vp->v_rdev = ip->i_rdev = dv;
6000Sstevel@tonic-gate }
6010Sstevel@tonic-gate
6020Sstevel@tonic-gate /*
6030Sstevel@tonic-gate * if our caller only expects allocated inodes, verify that
6040Sstevel@tonic-gate * this inode looks good; throw it out if it's bad.
6050Sstevel@tonic-gate */
6060Sstevel@tonic-gate if (validate) {
6070Sstevel@tonic-gate if ((ftype == 0) || (ip->i_nlink <= 0)) {
6080Sstevel@tonic-gate ip->i_flag |= ISTALE;
6090Sstevel@tonic-gate rw_exit(&ip->i_contents);
6100Sstevel@tonic-gate vp->v_vfsp = &EIO_vfs;
6110Sstevel@tonic-gate VN_RELE(vp);
6120Sstevel@tonic-gate cmn_err(CE_NOTE,
6130Sstevel@tonic-gate "%s: unexpected free inode %d, run fsck(1M)%s",
6140Sstevel@tonic-gate fs->fs_fsmnt, (int)ino,
6150Sstevel@tonic-gate (TRANS_ISTRANS(ufsvfsp) ? " -o f" : ""));
6160Sstevel@tonic-gate return (EIO);
6170Sstevel@tonic-gate }
6180Sstevel@tonic-gate }
6190Sstevel@tonic-gate
6200Sstevel@tonic-gate /*
6215244Sbatschul * Finish initializing the vnode, special handling for shadow inodes
6225244Sbatschul * because IFTOVT() will produce a v_type of VNON which is not what we
6235244Sbatschul * want, set v_type to VREG explicitly in that case.
6240Sstevel@tonic-gate */
6255244Sbatschul if (ftype == IFSHAD) {
6265244Sbatschul vp->v_type = VREG;
6275244Sbatschul } else {
6285244Sbatschul vp->v_type = IFTOVT((mode_t)ip->i_mode);
6295244Sbatschul }
6300Sstevel@tonic-gate
6310Sstevel@tonic-gate ufs_reset_vnode(vp);
6320Sstevel@tonic-gate
6330Sstevel@tonic-gate /*
6340Sstevel@tonic-gate * read the shadow
6350Sstevel@tonic-gate */
6360Sstevel@tonic-gate if (ftype != 0 && ip->i_shadow != 0) {
6370Sstevel@tonic-gate if ((error = ufs_si_load(ip, cr)) != 0) {
6380Sstevel@tonic-gate ip->i_flag |= ISTALE;
6390Sstevel@tonic-gate ip->i_ufs_acl = NULL;
6400Sstevel@tonic-gate rw_exit(&ip->i_contents);
6410Sstevel@tonic-gate vp->v_vfsp = &EIO_vfs;
6420Sstevel@tonic-gate VN_RELE(vp);
6430Sstevel@tonic-gate return (error);
6440Sstevel@tonic-gate }
6450Sstevel@tonic-gate }
6460Sstevel@tonic-gate
6470Sstevel@tonic-gate /*
6480Sstevel@tonic-gate * Only attach quota information if the inode has a type and if
6490Sstevel@tonic-gate * that type is not a shadow inode.
6500Sstevel@tonic-gate */
6510Sstevel@tonic-gate if (ip->i_mode && ((ip->i_mode & IFMT) != IFSHAD) &&
6520Sstevel@tonic-gate ((ip->i_mode & IFMT) != IFATTRDIR)) {
6530Sstevel@tonic-gate ip->i_dquot = getinoquota(ip);
6540Sstevel@tonic-gate }
6550Sstevel@tonic-gate TRANS_MATA_IGET(ufsvfsp, ip);
6560Sstevel@tonic-gate *ipp = ip;
6570Sstevel@tonic-gate rw_exit(&ip->i_contents);
6580Sstevel@tonic-gate
6590Sstevel@tonic-gate return (0);
6600Sstevel@tonic-gate }
6610Sstevel@tonic-gate
6620Sstevel@tonic-gate /*
6630Sstevel@tonic-gate * Vnode is no longer referenced, write the inode out
6640Sstevel@tonic-gate * and if necessary, truncate and deallocate the file.
6650Sstevel@tonic-gate */
6660Sstevel@tonic-gate void
ufs_iinactive(struct inode * ip)6670Sstevel@tonic-gate ufs_iinactive(struct inode *ip)
6680Sstevel@tonic-gate {
6690Sstevel@tonic-gate int front;
6700Sstevel@tonic-gate struct inode *iq;
6710Sstevel@tonic-gate struct inode *hip;
6720Sstevel@tonic-gate struct ufs_q *uq;
6730Sstevel@tonic-gate struct vnode *vp = ITOV(ip);
674512Sjkennedy struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
675512Sjkennedy struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
6760Sstevel@tonic-gate
6770Sstevel@tonic-gate /*
6780Sstevel@tonic-gate * Because the vnode type might have been changed,
6790Sstevel@tonic-gate * the dnlc_dir_purge must be called unconditionally.
6800Sstevel@tonic-gate */
6810Sstevel@tonic-gate dnlc_dir_purge(&ip->i_danchor);
6820Sstevel@tonic-gate
6830Sstevel@tonic-gate /*
6840Sstevel@tonic-gate * Get exclusive access to inode data.
6850Sstevel@tonic-gate */
6860Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER);
6870Sstevel@tonic-gate ASSERT(ip->i_flag & IREF);
6880Sstevel@tonic-gate
6890Sstevel@tonic-gate /*
6900Sstevel@tonic-gate * Make sure no one reclaimed the inode before we put it on
6910Sstevel@tonic-gate * the freelist or destroy it. We keep our 'hold' on the vnode
6920Sstevel@tonic-gate * from vn_rele until we are ready to do something with the inode.
6930Sstevel@tonic-gate *
6940Sstevel@tonic-gate * Pageout may put a VN_HOLD/VN_RELE at anytime during this
6950Sstevel@tonic-gate * operation via an async putpage, so we must make sure
6960Sstevel@tonic-gate * we don't free/destroy the inode more than once. ufs_iget
6970Sstevel@tonic-gate * may also put a VN_HOLD on the inode before it grabs
6980Sstevel@tonic-gate * the i_contents lock. This is done so we don't free
6990Sstevel@tonic-gate * an inode that a thread is waiting on.
7000Sstevel@tonic-gate */
7010Sstevel@tonic-gate mutex_enter(&vp->v_lock);
7020Sstevel@tonic-gate
7030Sstevel@tonic-gate if (vp->v_count > 1) {
7044662Sfrankho vp->v_count--; /* release our hold from vn_rele */
7054662Sfrankho mutex_exit(&vp->v_lock);
7064662Sfrankho rw_exit(&ip->i_contents);
7074662Sfrankho return;
7080Sstevel@tonic-gate }
7090Sstevel@tonic-gate mutex_exit(&vp->v_lock);
7100Sstevel@tonic-gate
7110Sstevel@tonic-gate /*
7120Sstevel@tonic-gate * For umount case: if ufsvfs ptr is NULL, the inode is unhashed
7130Sstevel@tonic-gate * and clean. It can be safely destroyed (cyf).
7140Sstevel@tonic-gate */
7150Sstevel@tonic-gate if (ip->i_ufsvfs == NULL) {
7160Sstevel@tonic-gate rw_exit(&ip->i_contents);
7170Sstevel@tonic-gate ufs_si_del(ip);
7180Sstevel@tonic-gate ASSERT((vp->v_type == VCHR) || !vn_has_cached_data(vp));
7190Sstevel@tonic-gate ufs_free_inode(ip);
7200Sstevel@tonic-gate return;
7210Sstevel@tonic-gate }
7220Sstevel@tonic-gate
7230Sstevel@tonic-gate /*
7240Sstevel@tonic-gate * queue idle inode to appropriate thread. Will check v_count == 1
7250Sstevel@tonic-gate * prior to putting this on the appropriate queue.
7260Sstevel@tonic-gate * Stale inodes will be unhashed and freed by the ufs idle thread
7270Sstevel@tonic-gate * in ufs_idle_free()
7280Sstevel@tonic-gate */
7290Sstevel@tonic-gate front = 1;
7300Sstevel@tonic-gate if ((ip->i_flag & ISTALE) == 0 && ip->i_fs->fs_ronly == 0 &&
7310Sstevel@tonic-gate ip->i_mode && ip->i_nlink <= 0) {
7320Sstevel@tonic-gate /*
7330Sstevel@tonic-gate * Mark the i_flag to indicate that inode is being deleted.
7340Sstevel@tonic-gate * This flag will be cleared when the deletion is complete.
7350Sstevel@tonic-gate * This prevents nfs from sneaking in via ufs_vget() while
7360Sstevel@tonic-gate * the delete is in progress (bugid 1242481).
7370Sstevel@tonic-gate */
7380Sstevel@tonic-gate ip->i_flag |= IDEL;
7390Sstevel@tonic-gate
7400Sstevel@tonic-gate /*
7410Sstevel@tonic-gate * NOIDEL means that deletes are not allowed at this time;
7420Sstevel@tonic-gate * whoever resets NOIDEL will also send this inode back
7430Sstevel@tonic-gate * through ufs_iinactive. IREF remains set.
7440Sstevel@tonic-gate */
7450Sstevel@tonic-gate if (ULOCKFS_IS_NOIDEL(ITOUL(ip))) {
7460Sstevel@tonic-gate mutex_enter(&vp->v_lock);
7470Sstevel@tonic-gate vp->v_count--;
7480Sstevel@tonic-gate mutex_exit(&vp->v_lock);
7490Sstevel@tonic-gate rw_exit(&ip->i_contents);
7500Sstevel@tonic-gate return;
7510Sstevel@tonic-gate }
7520Sstevel@tonic-gate if (!TRANS_ISTRANS(ip->i_ufsvfs)) {
7530Sstevel@tonic-gate rw_exit(&ip->i_contents);
7540Sstevel@tonic-gate ufs_delete(ip->i_ufsvfs, ip, 0);
7550Sstevel@tonic-gate return;
7560Sstevel@tonic-gate }
7570Sstevel@tonic-gate
7580Sstevel@tonic-gate /* queue to delete thread; IREF remains set */
7590Sstevel@tonic-gate ins.in_qfree.value.ul++;
7600Sstevel@tonic-gate uq = &ip->i_ufsvfs->vfs_delete;
7610Sstevel@tonic-gate
7620Sstevel@tonic-gate mutex_enter(&uq->uq_mutex);
7630Sstevel@tonic-gate
7640Sstevel@tonic-gate /* add to q */
7650Sstevel@tonic-gate if ((iq = uq->uq_ihead) != 0) {
7660Sstevel@tonic-gate ip->i_freef = iq;
7670Sstevel@tonic-gate ip->i_freeb = iq->i_freeb;
7680Sstevel@tonic-gate iq->i_freeb->i_freef = ip;
7690Sstevel@tonic-gate iq->i_freeb = ip;
7700Sstevel@tonic-gate if (front)
7710Sstevel@tonic-gate uq->uq_ihead = ip;
7720Sstevel@tonic-gate } else {
7730Sstevel@tonic-gate uq->uq_ihead = ip;
7740Sstevel@tonic-gate ip->i_freef = ip;
7750Sstevel@tonic-gate ip->i_freeb = ip;
7760Sstevel@tonic-gate }
777512Sjkennedy
778512Sjkennedy delq_info->delq_unreclaimed_files += 1;
779512Sjkennedy delq_info->delq_unreclaimed_blocks += ip->i_blocks;
7800Sstevel@tonic-gate } else {
7810Sstevel@tonic-gate /*
7820Sstevel@tonic-gate * queue to idle thread
7830Sstevel@tonic-gate * Check the v_count == 1 again.
7840Sstevel@tonic-gate *
7850Sstevel@tonic-gate */
7860Sstevel@tonic-gate mutex_enter(&vp->v_lock);
7870Sstevel@tonic-gate if (vp->v_count > 1) {
7884662Sfrankho vp->v_count--; /* release our hold from vn_rele */
7894662Sfrankho mutex_exit(&vp->v_lock);
7904662Sfrankho rw_exit(&ip->i_contents);
7914662Sfrankho return;
7920Sstevel@tonic-gate }
7930Sstevel@tonic-gate mutex_exit(&vp->v_lock);
7940Sstevel@tonic-gate uq = &ufs_idle_q;
7950Sstevel@tonic-gate
7960Sstevel@tonic-gate /*
7970Sstevel@tonic-gate * useful iff it has pages or is a fastsymlink; otherwise junk
7980Sstevel@tonic-gate */
7990Sstevel@tonic-gate mutex_enter(&uq->uq_mutex);
8000Sstevel@tonic-gate
8010Sstevel@tonic-gate /* clear IREF means `on idle list' */
8020Sstevel@tonic-gate ip->i_flag &= ~(IREF | IDIRECTIO);
8030Sstevel@tonic-gate
8040Sstevel@tonic-gate if (vn_has_cached_data(vp) || ip->i_flag & IFASTSYMLNK) {
8050Sstevel@tonic-gate ins.in_frback.value.ul++;
8060Sstevel@tonic-gate hip = (inode_t *)&ufs_useful_iq[IQHASH(ip)];
8070Sstevel@tonic-gate ufs_nuseful_iq++;
8080Sstevel@tonic-gate } else {
8090Sstevel@tonic-gate ins.in_frfront.value.ul++;
8100Sstevel@tonic-gate hip = (inode_t *)&ufs_junk_iq[IQHASH(ip)];
8110Sstevel@tonic-gate ip->i_flag |= IJUNKIQ;
8120Sstevel@tonic-gate ufs_njunk_iq++;
8130Sstevel@tonic-gate }
8140Sstevel@tonic-gate ip->i_freef = hip;
8150Sstevel@tonic-gate ip->i_freeb = hip->i_freeb;
8160Sstevel@tonic-gate hip->i_freeb->i_freef = ip;
8170Sstevel@tonic-gate hip->i_freeb = ip;
8180Sstevel@tonic-gate }
8190Sstevel@tonic-gate
8200Sstevel@tonic-gate /* wakeup thread(s) if q is overfull */
8210Sstevel@tonic-gate if (++uq->uq_ne == uq->uq_lowat)
8220Sstevel@tonic-gate cv_broadcast(&uq->uq_cv);
8230Sstevel@tonic-gate
8240Sstevel@tonic-gate /* all done, release the q and inode */
8250Sstevel@tonic-gate mutex_exit(&uq->uq_mutex);
8260Sstevel@tonic-gate rw_exit(&ip->i_contents);
8270Sstevel@tonic-gate }
8280Sstevel@tonic-gate
8290Sstevel@tonic-gate /*
8300Sstevel@tonic-gate * Check accessed and update flags on an inode structure.
8310Sstevel@tonic-gate * If any are on, update the inode with the (unique) current time.
8320Sstevel@tonic-gate * If waitfor is given, insure I/O order so wait for write to complete.
8330Sstevel@tonic-gate */
8340Sstevel@tonic-gate void
ufs_iupdat(struct inode * ip,int waitfor)8350Sstevel@tonic-gate ufs_iupdat(struct inode *ip, int waitfor)
8360Sstevel@tonic-gate {
8370Sstevel@tonic-gate struct buf *bp;
8380Sstevel@tonic-gate struct fs *fp;
8390Sstevel@tonic-gate struct dinode *dp;
8400Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
8410Sstevel@tonic-gate int i;
8420Sstevel@tonic-gate int do_trans_times;
8430Sstevel@tonic-gate ushort_t flag;
8440Sstevel@tonic-gate o_uid_t suid;
8450Sstevel@tonic-gate o_gid_t sgid;
8460Sstevel@tonic-gate
8470Sstevel@tonic-gate /*
8480Sstevel@tonic-gate * This function is now safe to be called with either the reader
8490Sstevel@tonic-gate * or writer i_contents lock.
8500Sstevel@tonic-gate */
8510Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&ip->i_contents));
8520Sstevel@tonic-gate
8530Sstevel@tonic-gate /*
8540Sstevel@tonic-gate * Return if file system has been forcibly umounted.
8550Sstevel@tonic-gate */
8560Sstevel@tonic-gate if (ufsvfsp == NULL)
8570Sstevel@tonic-gate return;
8580Sstevel@tonic-gate
8590Sstevel@tonic-gate flag = ip->i_flag; /* Atomic read */
8600Sstevel@tonic-gate /*
8610Sstevel@tonic-gate * We better not update the disk inode from a stale inode.
8620Sstevel@tonic-gate */
8630Sstevel@tonic-gate if (flag & ISTALE)
8640Sstevel@tonic-gate return;
8650Sstevel@tonic-gate
8660Sstevel@tonic-gate fp = ip->i_fs;
8670Sstevel@tonic-gate
8680Sstevel@tonic-gate if ((flag & (IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG)) != 0) {
8690Sstevel@tonic-gate if (fp->fs_ronly) {
8700Sstevel@tonic-gate mutex_enter(&ip->i_tlock);
8710Sstevel@tonic-gate ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG);
8720Sstevel@tonic-gate mutex_exit(&ip->i_tlock);
8730Sstevel@tonic-gate return;
8740Sstevel@tonic-gate }
8750Sstevel@tonic-gate /*
8760Sstevel@tonic-gate * fs is active while metadata is being written
8770Sstevel@tonic-gate */
8780Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock);
8790Sstevel@tonic-gate ufs_notclean(ufsvfsp);
8800Sstevel@tonic-gate /*
8810Sstevel@tonic-gate * get the dinode
8820Sstevel@tonic-gate */
8830Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev,
8840Sstevel@tonic-gate (daddr_t)fsbtodb(fp, itod(fp, ip->i_number)),
8850Sstevel@tonic-gate (int)fp->fs_bsize);
8860Sstevel@tonic-gate if (bp->b_flags & B_ERROR) {
8870Sstevel@tonic-gate mutex_enter(&ip->i_tlock);
8880Sstevel@tonic-gate ip->i_flag &=
8890Sstevel@tonic-gate ~(IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG);
8900Sstevel@tonic-gate mutex_exit(&ip->i_tlock);
8910Sstevel@tonic-gate brelse(bp);
8920Sstevel@tonic-gate return;
8930Sstevel@tonic-gate }
8940Sstevel@tonic-gate /*
8950Sstevel@tonic-gate * munge inode fields
8960Sstevel@tonic-gate */
8970Sstevel@tonic-gate mutex_enter(&ip->i_tlock);
8980Sstevel@tonic-gate ITIMES_NOLOCK(ip);
8990Sstevel@tonic-gate do_trans_times = ((ip->i_flag & (IMOD|IMODACC)) == IMODACC);
9000Sstevel@tonic-gate ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG);
9010Sstevel@tonic-gate mutex_exit(&ip->i_tlock);
9020Sstevel@tonic-gate
9030Sstevel@tonic-gate /*
9040Sstevel@tonic-gate * For reads and concurrent re-writes, no deltas were
9050Sstevel@tonic-gate * entered for the access time changes - do it now.
9060Sstevel@tonic-gate */
9070Sstevel@tonic-gate if (do_trans_times) {
9080Sstevel@tonic-gate TRANS_INODE_TIMES(ufsvfsp, ip);
9090Sstevel@tonic-gate }
9100Sstevel@tonic-gate
9110Sstevel@tonic-gate /*
9120Sstevel@tonic-gate * For SunOS 5.0->5.4, these lines below read:
9130Sstevel@tonic-gate *
9140Sstevel@tonic-gate * suid = (ip->i_uid > MAXUID) ? UID_LONG : ip->i_uid;
9150Sstevel@tonic-gate * sgid = (ip->i_gid > MAXUID) ? GID_LONG : ip->i_gid;
9160Sstevel@tonic-gate *
9170Sstevel@tonic-gate * where MAXUID was set to 60002. This was incorrect -
9180Sstevel@tonic-gate * the uids should have been constrained to what fitted into
9190Sstevel@tonic-gate * a 16-bit word.
9200Sstevel@tonic-gate *
9210Sstevel@tonic-gate * This means that files from 4.x filesystems that have an
9220Sstevel@tonic-gate * i_suid field larger than 60002 will have that field
9230Sstevel@tonic-gate * changed to 65535.
9240Sstevel@tonic-gate *
9250Sstevel@tonic-gate * Security note: 4.x UFS could never create a i_suid of
9260Sstevel@tonic-gate * UID_LONG since that would've corresponded to -1.
9270Sstevel@tonic-gate */
9280Sstevel@tonic-gate suid = (ulong_t)ip->i_uid > (ulong_t)USHRT_MAX ?
9294662Sfrankho UID_LONG : ip->i_uid;
9300Sstevel@tonic-gate sgid = (ulong_t)ip->i_gid > (ulong_t)USHRT_MAX ?
9314662Sfrankho GID_LONG : ip->i_gid;
9320Sstevel@tonic-gate
9330Sstevel@tonic-gate if ((ip->i_suid != suid) || (ip->i_sgid != sgid)) {
9340Sstevel@tonic-gate ip->i_suid = suid;
9350Sstevel@tonic-gate ip->i_sgid = sgid;
9360Sstevel@tonic-gate TRANS_INODE(ufsvfsp, ip);
9370Sstevel@tonic-gate }
9380Sstevel@tonic-gate
9390Sstevel@tonic-gate if ((ip->i_mode & IFMT) == IFBLK ||
9400Sstevel@tonic-gate (ip->i_mode & IFMT) == IFCHR) {
9410Sstevel@tonic-gate dev_t d = ip->i_rdev;
9420Sstevel@tonic-gate dev32_t dev32;
9430Sstevel@tonic-gate
9440Sstevel@tonic-gate /*
9450Sstevel@tonic-gate * load first direct block only if special device
9460Sstevel@tonic-gate */
9470Sstevel@tonic-gate if (!cmpldev(&dev32, d)) {
9480Sstevel@tonic-gate /*
9490Sstevel@tonic-gate * We panic here because there's "no way"
9500Sstevel@tonic-gate * we should have been able to create a large
9510Sstevel@tonic-gate * inode with a large dev_t. Earlier layers
9520Sstevel@tonic-gate * should've caught this.
9530Sstevel@tonic-gate */
9540Sstevel@tonic-gate panic("ip %p: i_rdev too big", (void *)ip);
9550Sstevel@tonic-gate }
9560Sstevel@tonic-gate
9570Sstevel@tonic-gate if (dev32 & ~((O_MAXMAJ << L_BITSMINOR32) | O_MAXMIN)) {
9580Sstevel@tonic-gate ip->i_ordev = dev32; /* can't use old fmt. */
9590Sstevel@tonic-gate } else {
9600Sstevel@tonic-gate ip->i_ordev = cmpdev(d);
9610Sstevel@tonic-gate }
9620Sstevel@tonic-gate }
9630Sstevel@tonic-gate
9640Sstevel@tonic-gate /*
9650Sstevel@tonic-gate * copy inode to dinode (zero fastsymlnk in dinode)
9660Sstevel@tonic-gate */
9670Sstevel@tonic-gate dp = (struct dinode *)bp->b_un.b_addr + itoo(fp, ip->i_number);
9680Sstevel@tonic-gate dp->di_ic = ip->i_ic; /* structure assignment */
9690Sstevel@tonic-gate if (flag & IFASTSYMLNK) {
9700Sstevel@tonic-gate for (i = 1; i < NDADDR; i++)
9710Sstevel@tonic-gate dp->di_db[i] = 0;
9720Sstevel@tonic-gate for (i = 0; i < NIADDR; i++)
9730Sstevel@tonic-gate dp->di_ib[i] = 0;
9740Sstevel@tonic-gate }
9750Sstevel@tonic-gate if (TRANS_ISTRANS(ufsvfsp)) {
9760Sstevel@tonic-gate /*
9770Sstevel@tonic-gate * Pass only a sector size buffer containing
9780Sstevel@tonic-gate * the inode, otherwise when the buffer is copied
9790Sstevel@tonic-gate * into a cached roll buffer then too much memory
9800Sstevel@tonic-gate * gets consumed if 8KB inode buffers are passed.
9810Sstevel@tonic-gate */
9820Sstevel@tonic-gate TRANS_LOG(ufsvfsp, (caddr_t)dp, ip->i_doff,
9830Sstevel@tonic-gate sizeof (struct dinode),
9840Sstevel@tonic-gate (caddr_t)P2ALIGN((uintptr_t)dp, DEV_BSIZE),
9850Sstevel@tonic-gate DEV_BSIZE);
9860Sstevel@tonic-gate
9870Sstevel@tonic-gate brelse(bp);
9880Sstevel@tonic-gate } else if (waitfor && (ip->i_ufsvfs->vfs_dio == 0)) {
9890Sstevel@tonic-gate UFS_BRWRITE(ufsvfsp, bp);
9900Sstevel@tonic-gate
9910Sstevel@tonic-gate /*
9920Sstevel@tonic-gate * Synchronous write has guaranteed that inode
9930Sstevel@tonic-gate * has been written on disk so clear the flag
9940Sstevel@tonic-gate */
9950Sstevel@tonic-gate mutex_enter(&ip->i_tlock);
9960Sstevel@tonic-gate ip->i_flag &= ~IBDWRITE;
9970Sstevel@tonic-gate mutex_exit(&ip->i_tlock);
9980Sstevel@tonic-gate } else {
9990Sstevel@tonic-gate bdrwrite(bp);
10000Sstevel@tonic-gate
10010Sstevel@tonic-gate /*
10020Sstevel@tonic-gate * This write hasn't guaranteed that inode has been
10030Sstevel@tonic-gate * written on the disk.
10040Sstevel@tonic-gate * Since, all updat flags on inode are cleared, we must
10050Sstevel@tonic-gate * remember the condition in case inode is to be updated
10060Sstevel@tonic-gate * synchronously later (e.g.- fsync()/fdatasync())
10070Sstevel@tonic-gate * and inode has not been modified yet.
10080Sstevel@tonic-gate */
10090Sstevel@tonic-gate mutex_enter(&ip->i_tlock);
10100Sstevel@tonic-gate ip->i_flag |= IBDWRITE;
10110Sstevel@tonic-gate mutex_exit(&ip->i_tlock);
10120Sstevel@tonic-gate }
10130Sstevel@tonic-gate } else {
10140Sstevel@tonic-gate /*
10150Sstevel@tonic-gate * In case previous inode update was done asynchronously
10160Sstevel@tonic-gate * (IBDWRITE) and this inode update request wants guaranteed
10170Sstevel@tonic-gate * (synchronous) disk update, flush the inode.
10180Sstevel@tonic-gate */
10190Sstevel@tonic-gate if (waitfor && (flag & IBDWRITE)) {
10200Sstevel@tonic-gate blkflush(ip->i_dev,
10214662Sfrankho (daddr_t)fsbtodb(fp, itod(fp, ip->i_number)));
10220Sstevel@tonic-gate mutex_enter(&ip->i_tlock);
10230Sstevel@tonic-gate ip->i_flag &= ~IBDWRITE;
10240Sstevel@tonic-gate mutex_exit(&ip->i_tlock);
10250Sstevel@tonic-gate }
10260Sstevel@tonic-gate }
10270Sstevel@tonic-gate }
10280Sstevel@tonic-gate
10290Sstevel@tonic-gate #define SINGLE 0 /* index of single indirect block */
10300Sstevel@tonic-gate #define DOUBLE 1 /* index of double indirect block */
10310Sstevel@tonic-gate #define TRIPLE 2 /* index of triple indirect block */
10320Sstevel@tonic-gate
10330Sstevel@tonic-gate /*
10340Sstevel@tonic-gate * Release blocks associated with the inode ip and
10350Sstevel@tonic-gate * stored in the indirect block bn. Blocks are free'd
10360Sstevel@tonic-gate * in LIFO order up to (but not including) lastbn. If
10370Sstevel@tonic-gate * level is greater than SINGLE, the block is an indirect
10380Sstevel@tonic-gate * block and recursive calls to indirtrunc must be used to
10390Sstevel@tonic-gate * cleanse other indirect blocks.
10400Sstevel@tonic-gate *
10410Sstevel@tonic-gate * N.B.: triple indirect blocks are untested.
10420Sstevel@tonic-gate */
10430Sstevel@tonic-gate static long
indirtrunc(struct inode * ip,daddr_t bn,daddr_t lastbn,int level,int flags)10440Sstevel@tonic-gate indirtrunc(struct inode *ip, daddr_t bn, daddr_t lastbn, int level, int flags)
10450Sstevel@tonic-gate {
10460Sstevel@tonic-gate int i;
10470Sstevel@tonic-gate struct buf *bp, *copy;
10480Sstevel@tonic-gate daddr32_t *bap;
10490Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
10500Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs;
10510Sstevel@tonic-gate daddr_t nb, last;
10520Sstevel@tonic-gate long factor;
10530Sstevel@tonic-gate int blocksreleased = 0, nblocks;
10540Sstevel@tonic-gate
10550Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&ip->i_contents));
10560Sstevel@tonic-gate /*
10570Sstevel@tonic-gate * Calculate index in current block of last
10580Sstevel@tonic-gate * block to be kept. -1 indicates the entire
10590Sstevel@tonic-gate * block so we need not calculate the index.
10600Sstevel@tonic-gate */
10610Sstevel@tonic-gate factor = 1;
10620Sstevel@tonic-gate for (i = SINGLE; i < level; i++)
10630Sstevel@tonic-gate factor *= NINDIR(fs);
10640Sstevel@tonic-gate last = lastbn;
10650Sstevel@tonic-gate if (lastbn > 0)
10660Sstevel@tonic-gate last /= factor;
10670Sstevel@tonic-gate nblocks = btodb(fs->fs_bsize);
10680Sstevel@tonic-gate /*
10690Sstevel@tonic-gate * Get buffer of block pointers, zero those
10700Sstevel@tonic-gate * entries corresponding to blocks to be free'd,
10710Sstevel@tonic-gate * and update on disk copy first.
10720Sstevel@tonic-gate * *Unless* the root pointer has been synchronously
10730Sstevel@tonic-gate * written to disk. If nothing points to this
10740Sstevel@tonic-gate * indirect block then don't bother zero'ing and
10750Sstevel@tonic-gate * writing it.
10760Sstevel@tonic-gate */
10770Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp,
10784662Sfrankho ip->i_dev, (daddr_t)fsbtodb(fs, bn), (int)fs->fs_bsize);
10790Sstevel@tonic-gate if (bp->b_flags & B_ERROR) {
10800Sstevel@tonic-gate brelse(bp);
10810Sstevel@tonic-gate return (0);
10820Sstevel@tonic-gate }
10830Sstevel@tonic-gate bap = bp->b_un.b_daddr;
10840Sstevel@tonic-gate if ((flags & I_CHEAP) == 0) {
10850Sstevel@tonic-gate uint_t zb;
10860Sstevel@tonic-gate
10870Sstevel@tonic-gate zb = (uint_t)((NINDIR(fs) - (last + 1)) * sizeof (daddr32_t));
10880Sstevel@tonic-gate
10890Sstevel@tonic-gate if (zb) {
10900Sstevel@tonic-gate /*
10910Sstevel@tonic-gate * push any data into the log before we zero it
10920Sstevel@tonic-gate */
10930Sstevel@tonic-gate if (bp->b_flags & B_DELWRI)
10940Sstevel@tonic-gate TRANS_LOG(ufsvfsp, (caddr_t)bap,
10954662Sfrankho ldbtob(bp->b_blkno), bp->b_bcount,
10964662Sfrankho bp->b_un.b_addr, bp->b_bcount);
10970Sstevel@tonic-gate copy = ngeteblk(fs->fs_bsize);
10980Sstevel@tonic-gate bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr,
10994662Sfrankho (uint_t)fs->fs_bsize);
11000Sstevel@tonic-gate bzero((caddr_t)&bap[last + 1], zb);
11010Sstevel@tonic-gate
11020Sstevel@tonic-gate TRANS_BUF(ufsvfsp,
11034662Sfrankho (caddr_t)&bap[last + 1] - (caddr_t)bap,
11044662Sfrankho zb, bp, DT_ABZERO);
11050Sstevel@tonic-gate
11060Sstevel@tonic-gate UFS_BRWRITE(ufsvfsp, bp);
11070Sstevel@tonic-gate bp = copy, bap = bp->b_un.b_daddr;
11080Sstevel@tonic-gate }
11090Sstevel@tonic-gate } else {
11100Sstevel@tonic-gate /* make sure write retries are also cleared */
11110Sstevel@tonic-gate bp->b_flags &= ~(B_DELWRI | B_RETRYWRI);
11120Sstevel@tonic-gate bp->b_flags |= B_STALE | B_AGE;
11130Sstevel@tonic-gate }
11140Sstevel@tonic-gate
11150Sstevel@tonic-gate /*
11160Sstevel@tonic-gate * Recursively free totally unused blocks.
11170Sstevel@tonic-gate */
11180Sstevel@tonic-gate flags |= I_CHEAP;
11190Sstevel@tonic-gate for (i = NINDIR(fs) - 1; i > last; i--) {
11200Sstevel@tonic-gate nb = bap[i];
11210Sstevel@tonic-gate if (nb == 0)
11220Sstevel@tonic-gate continue;
11230Sstevel@tonic-gate if (level > SINGLE) {
11240Sstevel@tonic-gate blocksreleased +=
11250Sstevel@tonic-gate indirtrunc(ip, nb, (daddr_t)-1, level - 1, flags);
11260Sstevel@tonic-gate free(ip, nb, (off_t)fs->fs_bsize, flags | I_IBLK);
11270Sstevel@tonic-gate } else
11280Sstevel@tonic-gate free(ip, nb, (off_t)fs->fs_bsize, flags);
11290Sstevel@tonic-gate blocksreleased += nblocks;
11300Sstevel@tonic-gate }
11310Sstevel@tonic-gate flags &= ~I_CHEAP;
11320Sstevel@tonic-gate
11330Sstevel@tonic-gate /*
11340Sstevel@tonic-gate * Recursively free last partial block.
11350Sstevel@tonic-gate */
11360Sstevel@tonic-gate if (level > SINGLE && lastbn >= 0) {
11370Sstevel@tonic-gate last = lastbn % factor;
11380Sstevel@tonic-gate nb = bap[i];
11390Sstevel@tonic-gate if (nb != 0)
11404662Sfrankho blocksreleased +=
11414662Sfrankho indirtrunc(ip, nb, last, level - 1, flags);
11420Sstevel@tonic-gate }
11430Sstevel@tonic-gate brelse(bp);
11440Sstevel@tonic-gate return (blocksreleased);
11450Sstevel@tonic-gate }
11460Sstevel@tonic-gate
11470Sstevel@tonic-gate /*
11480Sstevel@tonic-gate * Truncate the inode ip to at most length size.
11490Sstevel@tonic-gate * Free affected disk blocks -- the blocks of the
11500Sstevel@tonic-gate * file are removed in reverse order.
11510Sstevel@tonic-gate *
11520Sstevel@tonic-gate * N.B.: triple indirect blocks are untested.
11530Sstevel@tonic-gate */
11540Sstevel@tonic-gate static int i_genrand = 1234;
11550Sstevel@tonic-gate int
ufs_itrunc(struct inode * oip,u_offset_t length,int flags,cred_t * cr)11560Sstevel@tonic-gate ufs_itrunc(struct inode *oip, u_offset_t length, int flags, cred_t *cr)
11570Sstevel@tonic-gate {
11580Sstevel@tonic-gate struct fs *fs = oip->i_fs;
11590Sstevel@tonic-gate struct ufsvfs *ufsvfsp = oip->i_ufsvfs;
11600Sstevel@tonic-gate struct inode *ip;
11610Sstevel@tonic-gate daddr_t lastblock;
11620Sstevel@tonic-gate off_t bsize;
11630Sstevel@tonic-gate int boff;
11640Sstevel@tonic-gate daddr_t bn, lastiblock[NIADDR];
11650Sstevel@tonic-gate int level;
11660Sstevel@tonic-gate long nblocks, blocksreleased = 0;
11670Sstevel@tonic-gate int i;
11680Sstevel@tonic-gate ushort_t mode;
11690Sstevel@tonic-gate struct inode tip;
11700Sstevel@tonic-gate int err;
11710Sstevel@tonic-gate u_offset_t maxoffset = (ufsvfsp->vfs_lfflags & UFS_LARGEFILES) ?
11720Sstevel@tonic-gate (UFS_MAXOFFSET_T) : (MAXOFF32_T);
11730Sstevel@tonic-gate
11740Sstevel@tonic-gate /*
11750Sstevel@tonic-gate * Shadow inodes do not need to hold the vfs_dqrwlock lock. Most
11760Sstevel@tonic-gate * other uses need the reader lock. opendq() holds the writer lock.
11770Sstevel@tonic-gate */
11780Sstevel@tonic-gate ASSERT((oip->i_mode & IFMT) == IFSHAD ||
11794662Sfrankho RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
11800Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&oip->i_contents));
11810Sstevel@tonic-gate /*
11820Sstevel@tonic-gate * We only allow truncation of regular files and directories
11830Sstevel@tonic-gate * to arbitrary lengths here. In addition, we allow symbolic
11840Sstevel@tonic-gate * links to be truncated only to zero length. Other inode
11850Sstevel@tonic-gate * types cannot have their length set here. Disk blocks are
11860Sstevel@tonic-gate * being dealt with - especially device inodes where
11870Sstevel@tonic-gate * ip->i_ordev is actually being stored in ip->i_db[0]!
11880Sstevel@tonic-gate */
11890Sstevel@tonic-gate TRANS_INODE(ufsvfsp, oip);
11900Sstevel@tonic-gate mode = oip->i_mode & IFMT;
11910Sstevel@tonic-gate if (flags & I_FREE) {
11920Sstevel@tonic-gate i_genrand *= 16843009; /* turns into shift and adds */
11930Sstevel@tonic-gate i_genrand++;
119411066Srafael.vanoni@sun.com oip->i_gen += ((i_genrand + ddi_get_lbolt()) & 0xffff) + 1;
11950Sstevel@tonic-gate oip->i_flag |= ICHG |IUPD;
11960Sstevel@tonic-gate oip->i_seq++;
11970Sstevel@tonic-gate if (length == oip->i_size)
11980Sstevel@tonic-gate return (0);
11990Sstevel@tonic-gate flags |= I_CHEAP;
12000Sstevel@tonic-gate }
12010Sstevel@tonic-gate if (mode == IFIFO)
12020Sstevel@tonic-gate return (0);
12030Sstevel@tonic-gate if (mode != IFREG && mode != IFDIR && mode != IFATTRDIR &&
12040Sstevel@tonic-gate !(mode == IFLNK && length == (offset_t)0) && mode != IFSHAD)
12050Sstevel@tonic-gate return (EINVAL);
12060Sstevel@tonic-gate if (length > maxoffset)
12070Sstevel@tonic-gate return (EFBIG);
12080Sstevel@tonic-gate if ((mode == IFDIR) || (mode == IFATTRDIR))
12090Sstevel@tonic-gate flags |= I_DIR;
12100Sstevel@tonic-gate if (mode == IFSHAD)
12110Sstevel@tonic-gate flags |= I_SHAD;
12120Sstevel@tonic-gate if (oip == ufsvfsp->vfs_qinod)
12130Sstevel@tonic-gate flags |= I_QUOTA;
12140Sstevel@tonic-gate if (length == oip->i_size) {
12150Sstevel@tonic-gate /* update ctime and mtime to please POSIX tests */
12160Sstevel@tonic-gate oip->i_flag |= ICHG |IUPD;
12170Sstevel@tonic-gate oip->i_seq++;
12180Sstevel@tonic-gate if (length == 0) {
12190Sstevel@tonic-gate /* nothing to cache so clear the flag */
12200Sstevel@tonic-gate oip->i_flag &= ~IFASTSYMLNK;
12210Sstevel@tonic-gate }
12220Sstevel@tonic-gate return (0);
12230Sstevel@tonic-gate }
12240Sstevel@tonic-gate /* wipe out fast symlink till next access */
12250Sstevel@tonic-gate if (oip->i_flag & IFASTSYMLNK) {
12260Sstevel@tonic-gate int j;
12270Sstevel@tonic-gate
12280Sstevel@tonic-gate ASSERT(ITOV(oip)->v_type == VLNK);
12290Sstevel@tonic-gate
12300Sstevel@tonic-gate oip->i_flag &= ~IFASTSYMLNK;
12310Sstevel@tonic-gate
12320Sstevel@tonic-gate for (j = 1; j < NDADDR; j++)
12330Sstevel@tonic-gate oip->i_db[j] = 0;
12340Sstevel@tonic-gate for (j = 0; j < NIADDR; j++)
12350Sstevel@tonic-gate oip->i_ib[j] = 0;
12360Sstevel@tonic-gate }
12370Sstevel@tonic-gate
12380Sstevel@tonic-gate boff = (int)blkoff(fs, length);
12390Sstevel@tonic-gate
12400Sstevel@tonic-gate if (length > oip->i_size) {
12410Sstevel@tonic-gate /*
12420Sstevel@tonic-gate * Trunc up case. BMAPALLOC will insure that the right blocks
12430Sstevel@tonic-gate * are allocated. This includes extending the old frag to a
12440Sstevel@tonic-gate * full block (if needed) in addition to doing any work
12450Sstevel@tonic-gate * needed for allocating the last block.
12460Sstevel@tonic-gate */
12470Sstevel@tonic-gate if (boff == 0)
12480Sstevel@tonic-gate err = BMAPALLOC(oip, length - 1, (int)fs->fs_bsize, cr);
12490Sstevel@tonic-gate else
12500Sstevel@tonic-gate err = BMAPALLOC(oip, length - 1, boff, cr);
12510Sstevel@tonic-gate
12520Sstevel@tonic-gate if (err == 0) {
12530Sstevel@tonic-gate /*
12540Sstevel@tonic-gate * Save old size and set inode's size now
12550Sstevel@tonic-gate * so that we don't cause too much of the
12560Sstevel@tonic-gate * file to be zero'd and pushed.
12570Sstevel@tonic-gate */
12580Sstevel@tonic-gate u_offset_t osize = oip->i_size;
12590Sstevel@tonic-gate oip->i_size = length;
12600Sstevel@tonic-gate /*
12610Sstevel@tonic-gate * Make sure we zero out the remaining bytes of
12620Sstevel@tonic-gate * the page in case a mmap scribbled on it. We
12630Sstevel@tonic-gate * can't prevent a mmap from writing beyond EOF
12640Sstevel@tonic-gate * on the last page of a file.
12650Sstevel@tonic-gate *
12660Sstevel@tonic-gate */
12670Sstevel@tonic-gate if ((boff = (int)blkoff(fs, osize)) != 0) {
12680Sstevel@tonic-gate bsize = (int)lblkno(fs, osize - 1) >= NDADDR ?
12690Sstevel@tonic-gate fs->fs_bsize : fragroundup(fs, boff);
12700Sstevel@tonic-gate pvn_vpzero(ITOV(oip), osize,
12710Sstevel@tonic-gate (size_t)(bsize - boff));
12720Sstevel@tonic-gate }
12730Sstevel@tonic-gate oip->i_flag |= ICHG|IATTCHG;
12740Sstevel@tonic-gate oip->i_seq++;
12750Sstevel@tonic-gate ITIMES_NOLOCK(oip);
12760Sstevel@tonic-gate /*
12770Sstevel@tonic-gate * MAXOFF32_T is old 2GB size limit. If
12780Sstevel@tonic-gate * this operation caused a large file to be
12790Sstevel@tonic-gate * created, turn on the superblock flag
12800Sstevel@tonic-gate * and update the superblock, if the flag
12810Sstevel@tonic-gate * is not already on.
12820Sstevel@tonic-gate */
12830Sstevel@tonic-gate if ((length > (u_offset_t)MAXOFF32_T) &&
12840Sstevel@tonic-gate !(fs->fs_flags & FSLARGEFILES)) {
12850Sstevel@tonic-gate ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES);
12860Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock);
12870Sstevel@tonic-gate fs->fs_flags |= FSLARGEFILES;
12880Sstevel@tonic-gate ufs_sbwrite(ufsvfsp);
12890Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock);
12900Sstevel@tonic-gate }
12910Sstevel@tonic-gate }
12920Sstevel@tonic-gate
12930Sstevel@tonic-gate return (err);
12940Sstevel@tonic-gate }
12950Sstevel@tonic-gate
12960Sstevel@tonic-gate /*
12970Sstevel@tonic-gate * Update the pages of the file. If the file is not being
12980Sstevel@tonic-gate * truncated to a block boundary, the contents of the
12990Sstevel@tonic-gate * pages following the end of the file must be zero'ed
13000Sstevel@tonic-gate * in case it ever become accessible again because
13010Sstevel@tonic-gate * of subsequent file growth.
13020Sstevel@tonic-gate */
13030Sstevel@tonic-gate if (boff == 0) {
13040Sstevel@tonic-gate (void) pvn_vplist_dirty(ITOV(oip), length, ufs_putapage,
13050Sstevel@tonic-gate B_INVAL | B_TRUNC, CRED());
13060Sstevel@tonic-gate } else {
13070Sstevel@tonic-gate /*
13080Sstevel@tonic-gate * Make sure that the last block is properly allocated.
13090Sstevel@tonic-gate * We only really have to do this if the last block is
13100Sstevel@tonic-gate * actually allocated since ufs_bmap will now handle the case
13110Sstevel@tonic-gate * of an fragment which has no block allocated. Just to
13120Sstevel@tonic-gate * be sure, we do it now independent of current allocation.
13130Sstevel@tonic-gate */
13140Sstevel@tonic-gate err = BMAPALLOC(oip, length - 1, boff, cr);
13150Sstevel@tonic-gate if (err)
13160Sstevel@tonic-gate return (err);
13170Sstevel@tonic-gate
13180Sstevel@tonic-gate /*
13190Sstevel@tonic-gate * BMAPALLOC will call bmap_write which defers i_seq
13200Sstevel@tonic-gate * processing. If the timestamps were changed, update
13210Sstevel@tonic-gate * i_seq before rdip drops i_contents or syncs the inode.
13220Sstevel@tonic-gate */
13230Sstevel@tonic-gate if (oip->i_flag & (ICHG|IUPD))
13240Sstevel@tonic-gate oip->i_seq++;
13250Sstevel@tonic-gate
13260Sstevel@tonic-gate /*
13270Sstevel@tonic-gate * BugId 4069932
13280Sstevel@tonic-gate * Make sure that the relevant partial page appears in
13290Sstevel@tonic-gate * the v_pages list, so that pvn_vpzero() will do its
13300Sstevel@tonic-gate * job. Since doing this correctly requires everything
13310Sstevel@tonic-gate * in rdip() except for the uiomove(), it's easier and
13320Sstevel@tonic-gate * safer to do the uiomove() rather than duplicate the
13330Sstevel@tonic-gate * rest of rdip() here.
13340Sstevel@tonic-gate *
13350Sstevel@tonic-gate * To get here, we know that length indicates a byte
13360Sstevel@tonic-gate * that is not the first byte of a block. (length - 1)
13370Sstevel@tonic-gate * is the last actual byte known to exist. Deduction
13380Sstevel@tonic-gate * shows it is in the same block as byte (length).
13390Sstevel@tonic-gate * Thus, this rdip() invocation should always succeed
13400Sstevel@tonic-gate * except in the face of i/o errors, and give us the
13410Sstevel@tonic-gate * block we care about.
13420Sstevel@tonic-gate *
13430Sstevel@tonic-gate * rdip() makes the same locking assertions and
13440Sstevel@tonic-gate * assumptions as we do. We do not acquire any locks
13450Sstevel@tonic-gate * before calling it, so we have not changed the locking
13460Sstevel@tonic-gate * situation. Finally, there do not appear to be any
13470Sstevel@tonic-gate * paths whereby rdip() ends up invoking us again.
13480Sstevel@tonic-gate * Thus, infinite recursion is avoided.
13490Sstevel@tonic-gate */
13500Sstevel@tonic-gate {
13510Sstevel@tonic-gate uio_t uio;
13520Sstevel@tonic-gate iovec_t iov[1];
13530Sstevel@tonic-gate char buffer;
13540Sstevel@tonic-gate
13550Sstevel@tonic-gate uio.uio_iov = iov;
13560Sstevel@tonic-gate uio.uio_iovcnt = 1;
13570Sstevel@tonic-gate uio.uio_loffset = length - 1;
13580Sstevel@tonic-gate uio.uio_resid = 1;
13590Sstevel@tonic-gate uio.uio_segflg = UIO_SYSSPACE;
13600Sstevel@tonic-gate uio.uio_extflg = UIO_COPY_CACHED;
13610Sstevel@tonic-gate
13620Sstevel@tonic-gate iov[0].iov_base = &buffer;
13630Sstevel@tonic-gate iov[0].iov_len = 1;
13640Sstevel@tonic-gate
13650Sstevel@tonic-gate err = rdip(oip, &uio, UIO_READ, NULL);
13660Sstevel@tonic-gate if (err)
13670Sstevel@tonic-gate return (err);
13680Sstevel@tonic-gate }
13690Sstevel@tonic-gate
13700Sstevel@tonic-gate bsize = (int)lblkno(fs, length - 1) >= NDADDR ?
13710Sstevel@tonic-gate fs->fs_bsize : fragroundup(fs, boff);
13720Sstevel@tonic-gate pvn_vpzero(ITOV(oip), length, (size_t)(bsize - boff));
13730Sstevel@tonic-gate /*
13740Sstevel@tonic-gate * Ensure full fs block is marked as dirty.
13750Sstevel@tonic-gate */
13760Sstevel@tonic-gate (void) pvn_vplist_dirty(ITOV(oip), length + (bsize - boff),
13770Sstevel@tonic-gate ufs_putapage, B_INVAL | B_TRUNC, CRED());
13780Sstevel@tonic-gate }
13790Sstevel@tonic-gate
13800Sstevel@tonic-gate /*
13810Sstevel@tonic-gate * Calculate index into inode's block list of
13820Sstevel@tonic-gate * last direct and indirect blocks (if any)
13830Sstevel@tonic-gate * which we want to keep. Lastblock is -1 when
13840Sstevel@tonic-gate * the file is truncated to 0.
13850Sstevel@tonic-gate */
13860Sstevel@tonic-gate lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
13870Sstevel@tonic-gate lastiblock[SINGLE] = lastblock - NDADDR;
13880Sstevel@tonic-gate lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
13890Sstevel@tonic-gate lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
13900Sstevel@tonic-gate nblocks = btodb(fs->fs_bsize);
13910Sstevel@tonic-gate
13920Sstevel@tonic-gate /*
13930Sstevel@tonic-gate * Update file and block pointers
13940Sstevel@tonic-gate * on disk before we start freeing blocks.
13950Sstevel@tonic-gate * If we crash before free'ing blocks below,
13960Sstevel@tonic-gate * the blocks will be returned to the free list.
13970Sstevel@tonic-gate * lastiblock values are also normalized to -1
13980Sstevel@tonic-gate * for calls to indirtrunc below.
13990Sstevel@tonic-gate */
14000Sstevel@tonic-gate tip = *oip; /* structure copy */
14010Sstevel@tonic-gate ip = &tip;
14020Sstevel@tonic-gate
14030Sstevel@tonic-gate for (level = TRIPLE; level >= SINGLE; level--)
14040Sstevel@tonic-gate if (lastiblock[level] < 0) {
14050Sstevel@tonic-gate oip->i_ib[level] = 0;
14060Sstevel@tonic-gate lastiblock[level] = -1;
14070Sstevel@tonic-gate }
14080Sstevel@tonic-gate for (i = NDADDR - 1; i > lastblock; i--) {
14090Sstevel@tonic-gate oip->i_db[i] = 0;
14100Sstevel@tonic-gate flags |= I_CHEAP;
14110Sstevel@tonic-gate }
14120Sstevel@tonic-gate oip->i_size = length;
14130Sstevel@tonic-gate oip->i_flag |= ICHG|IUPD|IATTCHG;
14140Sstevel@tonic-gate oip->i_seq++;
14150Sstevel@tonic-gate if (!TRANS_ISTRANS(ufsvfsp))
14160Sstevel@tonic-gate ufs_iupdat(oip, I_SYNC); /* do sync inode update */
14170Sstevel@tonic-gate
14180Sstevel@tonic-gate /*
14190Sstevel@tonic-gate * Indirect blocks first.
14200Sstevel@tonic-gate */
14210Sstevel@tonic-gate for (level = TRIPLE; level >= SINGLE; level--) {
14220Sstevel@tonic-gate bn = ip->i_ib[level];
14230Sstevel@tonic-gate if (bn != 0) {
14240Sstevel@tonic-gate blocksreleased +=
14250Sstevel@tonic-gate indirtrunc(ip, bn, lastiblock[level], level, flags);
14260Sstevel@tonic-gate if (lastiblock[level] < 0) {
14270Sstevel@tonic-gate ip->i_ib[level] = 0;
14280Sstevel@tonic-gate free(ip, bn, (off_t)fs->fs_bsize,
14294662Sfrankho flags | I_IBLK);
14300Sstevel@tonic-gate blocksreleased += nblocks;
14310Sstevel@tonic-gate }
14320Sstevel@tonic-gate }
14330Sstevel@tonic-gate if (lastiblock[level] >= 0)
14340Sstevel@tonic-gate goto done;
14350Sstevel@tonic-gate }
14360Sstevel@tonic-gate
14370Sstevel@tonic-gate /*
14380Sstevel@tonic-gate * All whole direct blocks or frags.
14390Sstevel@tonic-gate */
14400Sstevel@tonic-gate for (i = NDADDR - 1; i > lastblock; i--) {
14410Sstevel@tonic-gate bn = ip->i_db[i];
14420Sstevel@tonic-gate if (bn == 0)
14430Sstevel@tonic-gate continue;
14440Sstevel@tonic-gate ip->i_db[i] = 0;
14450Sstevel@tonic-gate bsize = (off_t)blksize(fs, ip, i);
14460Sstevel@tonic-gate free(ip, bn, bsize, flags);
14470Sstevel@tonic-gate blocksreleased += btodb(bsize);
14480Sstevel@tonic-gate }
14490Sstevel@tonic-gate if (lastblock < 0)
14500Sstevel@tonic-gate goto done;
14510Sstevel@tonic-gate
14520Sstevel@tonic-gate /*
14530Sstevel@tonic-gate * Finally, look for a change in size of the
14540Sstevel@tonic-gate * last direct block; release any frags.
14550Sstevel@tonic-gate */
14560Sstevel@tonic-gate bn = ip->i_db[lastblock];
14570Sstevel@tonic-gate if (bn != 0) {
14580Sstevel@tonic-gate off_t oldspace, newspace;
14590Sstevel@tonic-gate
14600Sstevel@tonic-gate /*
14610Sstevel@tonic-gate * Calculate amount of space we're giving
14620Sstevel@tonic-gate * back as old block size minus new block size.
14630Sstevel@tonic-gate */
14640Sstevel@tonic-gate oldspace = blksize(fs, ip, lastblock);
14650Sstevel@tonic-gate UFS_SET_ISIZE(length, ip);
14660Sstevel@tonic-gate newspace = blksize(fs, ip, lastblock);
14670Sstevel@tonic-gate if (newspace == 0) {
14680Sstevel@tonic-gate err = ufs_fault(ITOV(ip), "ufs_itrunc: newspace == 0");
14690Sstevel@tonic-gate return (err);
14700Sstevel@tonic-gate }
14710Sstevel@tonic-gate if (oldspace - newspace > 0) {
14720Sstevel@tonic-gate /*
14730Sstevel@tonic-gate * Block number of space to be free'd is
14740Sstevel@tonic-gate * the old block # plus the number of frags
14750Sstevel@tonic-gate * required for the storage we're keeping.
14760Sstevel@tonic-gate */
14770Sstevel@tonic-gate bn += numfrags(fs, newspace);
14780Sstevel@tonic-gate free(ip, bn, oldspace - newspace, flags);
14790Sstevel@tonic-gate blocksreleased += btodb(oldspace - newspace);
14800Sstevel@tonic-gate }
14810Sstevel@tonic-gate }
14820Sstevel@tonic-gate done:
14830Sstevel@tonic-gate /* BEGIN PARANOIA */
14840Sstevel@tonic-gate for (level = SINGLE; level <= TRIPLE; level++)
14850Sstevel@tonic-gate if (ip->i_ib[level] != oip->i_ib[level]) {
14860Sstevel@tonic-gate err = ufs_fault(ITOV(ip), "ufs_itrunc: indirect block");
14870Sstevel@tonic-gate return (err);
14880Sstevel@tonic-gate }
14890Sstevel@tonic-gate
14900Sstevel@tonic-gate for (i = 0; i < NDADDR; i++)
14910Sstevel@tonic-gate if (ip->i_db[i] != oip->i_db[i]) {
14920Sstevel@tonic-gate err = ufs_fault(ITOV(ip), "ufs_itrunc: direct block");
14930Sstevel@tonic-gate return (err);
14940Sstevel@tonic-gate }
14950Sstevel@tonic-gate /* END PARANOIA */
14960Sstevel@tonic-gate oip->i_blocks -= blocksreleased;
14970Sstevel@tonic-gate
14980Sstevel@tonic-gate if (oip->i_blocks < 0) { /* sanity */
14990Sstevel@tonic-gate cmn_err(CE_NOTE,
15000Sstevel@tonic-gate "ufs_itrunc: %s/%d new size = %lld, blocks = %d\n",
15010Sstevel@tonic-gate fs->fs_fsmnt, (int)oip->i_number, oip->i_size,
15020Sstevel@tonic-gate (int)oip->i_blocks);
15030Sstevel@tonic-gate oip->i_blocks = 0;
15040Sstevel@tonic-gate }
15050Sstevel@tonic-gate oip->i_flag |= ICHG|IATTCHG;
15060Sstevel@tonic-gate oip->i_seq++;
15070Sstevel@tonic-gate /* blocksreleased is >= zero, so this can not fail */
15080Sstevel@tonic-gate (void) chkdq(oip, -blocksreleased, 0, cr, (char **)NULL,
15094662Sfrankho (size_t *)NULL);
15100Sstevel@tonic-gate return (0);
15110Sstevel@tonic-gate }
15120Sstevel@tonic-gate
15130Sstevel@tonic-gate /*
15140Sstevel@tonic-gate * Check mode permission on inode. Mode is READ, WRITE or EXEC.
15150Sstevel@tonic-gate * In the case of WRITE, the read-only status of the file system
15160Sstevel@tonic-gate * is checked. Depending on the calling user, the appropriate
15170Sstevel@tonic-gate * mode bits are selected; privileges to override missing permission
15180Sstevel@tonic-gate * bits are checked through secpolicy_vnode_access().
15197737SFrank.Batschulat@Sun.COM * The i_contens lock must be held as reader here to prevent racing with
15207737SFrank.Batschulat@Sun.COM * the acl subsystem removing/setting/changing acls on this inode.
15217737SFrank.Batschulat@Sun.COM * The caller is responsible for indicating whether or not the i_contents
15227737SFrank.Batschulat@Sun.COM * lock needs to be acquired here or if already held.
15230Sstevel@tonic-gate */
15240Sstevel@tonic-gate int
ufs_iaccess(struct inode * ip,int mode,struct cred * cr,int dolock)15257737SFrank.Batschulat@Sun.COM ufs_iaccess(struct inode *ip, int mode, struct cred *cr, int dolock)
15260Sstevel@tonic-gate {
15270Sstevel@tonic-gate int shift = 0;
15287737SFrank.Batschulat@Sun.COM int ret = 0;
15297737SFrank.Batschulat@Sun.COM
15307737SFrank.Batschulat@Sun.COM if (dolock)
15317737SFrank.Batschulat@Sun.COM rw_enter(&ip->i_contents, RW_READER);
15327737SFrank.Batschulat@Sun.COM ASSERT(RW_LOCK_HELD(&ip->i_contents));
15330Sstevel@tonic-gate
15340Sstevel@tonic-gate if (mode & IWRITE) {
15350Sstevel@tonic-gate /*
15360Sstevel@tonic-gate * Disallow write attempts on read-only
15370Sstevel@tonic-gate * file systems, unless the file is a block
15380Sstevel@tonic-gate * or character device or a FIFO.
15390Sstevel@tonic-gate */
15400Sstevel@tonic-gate if (ip->i_fs->fs_ronly != 0) {
15410Sstevel@tonic-gate if ((ip->i_mode & IFMT) != IFCHR &&
15420Sstevel@tonic-gate (ip->i_mode & IFMT) != IFBLK &&
15430Sstevel@tonic-gate (ip->i_mode & IFMT) != IFIFO) {
15447737SFrank.Batschulat@Sun.COM ret = EROFS;
15457737SFrank.Batschulat@Sun.COM goto out;
15460Sstevel@tonic-gate }
15470Sstevel@tonic-gate }
15480Sstevel@tonic-gate }
15490Sstevel@tonic-gate /*
15507737SFrank.Batschulat@Sun.COM * If there is an acl, check the acl and return.
15510Sstevel@tonic-gate */
15527737SFrank.Batschulat@Sun.COM if (ip->i_ufs_acl && ip->i_ufs_acl->aowner) {
15537737SFrank.Batschulat@Sun.COM ret = ufs_acl_access(ip, mode, cr);
15547737SFrank.Batschulat@Sun.COM goto out;
15557737SFrank.Batschulat@Sun.COM }
15560Sstevel@tonic-gate
15570Sstevel@tonic-gate /*
15587737SFrank.Batschulat@Sun.COM * Access check is based on only one of owner, group, public.
15590Sstevel@tonic-gate * If not owner, then check group.
15607737SFrank.Batschulat@Sun.COM * If not a member of the group, then check public access.
15610Sstevel@tonic-gate */
15620Sstevel@tonic-gate if (crgetuid(cr) != ip->i_uid) {
15630Sstevel@tonic-gate shift += 3;
15640Sstevel@tonic-gate if (!groupmember((uid_t)ip->i_gid, cr))
15650Sstevel@tonic-gate shift += 3;
15660Sstevel@tonic-gate }
15670Sstevel@tonic-gate
15680Sstevel@tonic-gate /* test missing privilege bits */
1569*12273SCasper.Dik@Sun.COM ret = secpolicy_vnode_access2(cr, ITOV(ip), ip->i_uid,
1570*12273SCasper.Dik@Sun.COM ip->i_mode << shift, mode);
15717737SFrank.Batschulat@Sun.COM out:
15727737SFrank.Batschulat@Sun.COM if (dolock)
15737737SFrank.Batschulat@Sun.COM rw_exit(&ip->i_contents);
15747737SFrank.Batschulat@Sun.COM return (ret);
15750Sstevel@tonic-gate }
15760Sstevel@tonic-gate
15770Sstevel@tonic-gate /*
15780Sstevel@tonic-gate * if necessary, remove an inode from the free list
15790Sstevel@tonic-gate * i_contents is held except at unmount
15800Sstevel@tonic-gate *
15810Sstevel@tonic-gate * Return 1 if the inode is taken off of the ufs_idle_q,
15820Sstevel@tonic-gate * and the caller is expected to call VN_RELE.
15830Sstevel@tonic-gate *
15840Sstevel@tonic-gate * Return 0 otherwise.
15850Sstevel@tonic-gate */
15860Sstevel@tonic-gate int
ufs_rmidle(struct inode * ip)15870Sstevel@tonic-gate ufs_rmidle(struct inode *ip)
15880Sstevel@tonic-gate {
15890Sstevel@tonic-gate int rval = 0;
15900Sstevel@tonic-gate
15910Sstevel@tonic-gate mutex_enter(&ip->i_tlock);
15920Sstevel@tonic-gate if ((ip->i_flag & IREF) == 0) {
15930Sstevel@tonic-gate mutex_enter(&ufs_idle_q.uq_mutex);
15940Sstevel@tonic-gate ip->i_freef->i_freeb = ip->i_freeb;
15950Sstevel@tonic-gate ip->i_freeb->i_freef = ip->i_freef;
15960Sstevel@tonic-gate ip->i_freef = ip;
15970Sstevel@tonic-gate ip->i_freeb = ip;
15980Sstevel@tonic-gate ip->i_flag |= IREF;
15990Sstevel@tonic-gate ufs_idle_q.uq_ne--;
16000Sstevel@tonic-gate if (ip->i_flag & IJUNKIQ) {
16010Sstevel@tonic-gate ufs_njunk_iq--;
16020Sstevel@tonic-gate ip->i_flag &= ~IJUNKIQ;
16030Sstevel@tonic-gate } else {
16040Sstevel@tonic-gate ufs_nuseful_iq--;
16050Sstevel@tonic-gate }
16060Sstevel@tonic-gate mutex_exit(&ufs_idle_q.uq_mutex);
16070Sstevel@tonic-gate rval = 1;
16080Sstevel@tonic-gate }
16090Sstevel@tonic-gate mutex_exit(&ip->i_tlock);
16100Sstevel@tonic-gate return (rval);
16110Sstevel@tonic-gate }
16120Sstevel@tonic-gate
16130Sstevel@tonic-gate /*
16140Sstevel@tonic-gate * scan the hash of inodes and call func with the inode locked
16150Sstevel@tonic-gate */
16160Sstevel@tonic-gate int
ufs_scan_inodes(int rwtry,int (* func)(struct inode *,void *),void * arg,struct ufsvfs * ufsvfsp)16170Sstevel@tonic-gate ufs_scan_inodes(int rwtry, int (*func)(struct inode *, void *), void *arg,
16180Sstevel@tonic-gate struct ufsvfs *ufsvfsp)
16190Sstevel@tonic-gate {
16200Sstevel@tonic-gate struct inode *ip; /* current inode */
16210Sstevel@tonic-gate struct inode *lip = NULL; /* last/previous inode */
16220Sstevel@tonic-gate union ihead *ih; /* current hash chain */
16230Sstevel@tonic-gate int error, i;
16240Sstevel@tonic-gate int saverror = 0;
16250Sstevel@tonic-gate int lip_held; /* lip needs a VN_RELE() */
16260Sstevel@tonic-gate
16270Sstevel@tonic-gate /*
16280Sstevel@tonic-gate * If ufsvfsp is NULL, then our caller should be holding
16290Sstevel@tonic-gate * ufs_scan_lock to avoid conflicts between ufs_unmount() and
16300Sstevel@tonic-gate * ufs_update(). Otherwise, to avoid false-positives in
16310Sstevel@tonic-gate * ufs_unmount()'s v_count-based EBUSY check, we only hold
16320Sstevel@tonic-gate * those inodes that are in the file system our caller cares
16330Sstevel@tonic-gate * about.
16340Sstevel@tonic-gate *
16350Sstevel@tonic-gate * We know that ip is a valid inode in the hash chain (and thus
16360Sstevel@tonic-gate * we can trust i_ufsvfs) because the inode we chained from
16370Sstevel@tonic-gate * (lip) is still in the hash chain. This is true because either:
16380Sstevel@tonic-gate *
16390Sstevel@tonic-gate * 1. We did not drop the hash chain lock since the last
16400Sstevel@tonic-gate * iteration (because we were not interested in the last inode),
16410Sstevel@tonic-gate * or
16420Sstevel@tonic-gate * 2. We maintained a hold on the last inode while we
16430Sstevel@tonic-gate * we were processing it, so it could not be removed
16440Sstevel@tonic-gate * from the hash chain.
16450Sstevel@tonic-gate *
16460Sstevel@tonic-gate * The whole reason we're dropping and re-grabbing the chain
16470Sstevel@tonic-gate * lock on every inode is so that we don't present a major
16480Sstevel@tonic-gate * choke point on throughput, particularly when we've been
16490Sstevel@tonic-gate * called on behalf of fsflush.
16500Sstevel@tonic-gate */
16510Sstevel@tonic-gate
16520Sstevel@tonic-gate for (i = 0, ih = ihead; i < inohsz; i++, ih++) {
16530Sstevel@tonic-gate mutex_enter(&ih_lock[i]);
16540Sstevel@tonic-gate for (ip = ih->ih_chain[0], lip_held = 0;
16550Sstevel@tonic-gate ip != (struct inode *)ih;
16560Sstevel@tonic-gate ip = lip->i_forw) {
16570Sstevel@tonic-gate
16580Sstevel@tonic-gate ins.in_scan.value.ul++;
16590Sstevel@tonic-gate
16600Sstevel@tonic-gate /*
16610Sstevel@tonic-gate * Undo the previous iteration's VN_HOLD(), but
16620Sstevel@tonic-gate * only if one was done.
16630Sstevel@tonic-gate */
16640Sstevel@tonic-gate if (lip_held)
16650Sstevel@tonic-gate VN_RELE(ITOV(lip));
16660Sstevel@tonic-gate
16670Sstevel@tonic-gate lip = ip;
16680Sstevel@tonic-gate if (ufsvfsp != NULL && ip->i_ufsvfs != ufsvfsp) {
16690Sstevel@tonic-gate /*
16700Sstevel@tonic-gate * We're not processing all inodes, and
16710Sstevel@tonic-gate * this inode is not in the filesystem of
16720Sstevel@tonic-gate * interest, so skip it. No need to do a
16730Sstevel@tonic-gate * VN_HOLD() since we're not dropping the
16740Sstevel@tonic-gate * hash chain lock until after we've
16750Sstevel@tonic-gate * done the i_forw traversal above.
16760Sstevel@tonic-gate */
16770Sstevel@tonic-gate lip_held = 0;
16780Sstevel@tonic-gate continue;
16790Sstevel@tonic-gate }
16800Sstevel@tonic-gate VN_HOLD(ITOV(ip));
16810Sstevel@tonic-gate lip_held = 1;
16820Sstevel@tonic-gate mutex_exit(&ih_lock[i]);
16830Sstevel@tonic-gate
16840Sstevel@tonic-gate /*
16850Sstevel@tonic-gate * Acquire the contents lock as writer to make
16860Sstevel@tonic-gate * sure that the inode has been initialized in
16870Sstevel@tonic-gate * the cache or removed from the idle list by
16880Sstevel@tonic-gate * ufs_iget(). This works because ufs_iget()
16890Sstevel@tonic-gate * acquires the contents lock before putting
16900Sstevel@tonic-gate * the inode into the cache. If we can lock
16910Sstevel@tonic-gate * it, then he's done with it.
16920Sstevel@tonic-gate */
16930Sstevel@tonic-gate
16940Sstevel@tonic-gate if (rwtry) {
16950Sstevel@tonic-gate if (!rw_tryenter(&ip->i_contents, RW_WRITER)) {
16960Sstevel@tonic-gate mutex_enter(&ih_lock[i]);
16970Sstevel@tonic-gate continue;
16980Sstevel@tonic-gate }
16990Sstevel@tonic-gate } else {
17000Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER);
17010Sstevel@tonic-gate }
17020Sstevel@tonic-gate
17030Sstevel@tonic-gate rw_exit(&ip->i_contents);
17040Sstevel@tonic-gate
17050Sstevel@tonic-gate /*
17060Sstevel@tonic-gate * ISTALE means the inode couldn't be read
17070Sstevel@tonic-gate *
17080Sstevel@tonic-gate * We don't have to hold the i_contents lock
17090Sstevel@tonic-gate * for this check for a couple of
17100Sstevel@tonic-gate * reasons. First, if ISTALE is set then the
17110Sstevel@tonic-gate * flag cannot be cleared until the inode is
17120Sstevel@tonic-gate * removed from the cache and that cannot
17130Sstevel@tonic-gate * happen until after we VN_RELE() it.
17140Sstevel@tonic-gate * Second, if ISTALE is not set, then the
17150Sstevel@tonic-gate * inode is in the cache and does not need to
17160Sstevel@tonic-gate * be read from disk so ISTALE cannot be set
17170Sstevel@tonic-gate * while we are not looking.
17180Sstevel@tonic-gate */
17190Sstevel@tonic-gate if ((ip->i_flag & ISTALE) == 0) {
17200Sstevel@tonic-gate if ((error = (*func)(ip, arg)) != 0)
17210Sstevel@tonic-gate saverror = error;
17220Sstevel@tonic-gate }
17230Sstevel@tonic-gate
17240Sstevel@tonic-gate mutex_enter(&ih_lock[i]);
17250Sstevel@tonic-gate }
17260Sstevel@tonic-gate if (lip_held)
17270Sstevel@tonic-gate VN_RELE(ITOV(lip));
17280Sstevel@tonic-gate mutex_exit(&ih_lock[i]);
17290Sstevel@tonic-gate }
17300Sstevel@tonic-gate return (saverror);
17310Sstevel@tonic-gate }
17320Sstevel@tonic-gate
17330Sstevel@tonic-gate /*
17340Sstevel@tonic-gate * Mark inode with the current time, plus a unique increment.
17350Sstevel@tonic-gate *
17360Sstevel@tonic-gate * Since we only keep 32-bit time on disk, if UFS is still alive
17370Sstevel@tonic-gate * beyond 2038, filesystem times will simply stick at the last
17380Sstevel@tonic-gate * possible second of 32-bit time. Not ideal, but probably better
17390Sstevel@tonic-gate * than going into the remote past, or confusing applications with
17400Sstevel@tonic-gate * negative time.
17410Sstevel@tonic-gate */
17420Sstevel@tonic-gate void
ufs_imark(struct inode * ip)17430Sstevel@tonic-gate ufs_imark(struct inode *ip)
17440Sstevel@tonic-gate {
17450Sstevel@tonic-gate timestruc_t now;
17460Sstevel@tonic-gate int32_t usec, nsec;
17470Sstevel@tonic-gate
17480Sstevel@tonic-gate /*
17490Sstevel@tonic-gate * The update of i_seq may have been deferred, increase i_seq here
17500Sstevel@tonic-gate * to make sure it is in sync with the timestamps.
17510Sstevel@tonic-gate */
17520Sstevel@tonic-gate if (ip->i_flag & ISEQ) {
17530Sstevel@tonic-gate ASSERT(ip->i_flag & (IUPD|ICHG));
17540Sstevel@tonic-gate ip->i_seq++;
17550Sstevel@tonic-gate ip->i_flag &= ~ISEQ;
17560Sstevel@tonic-gate }
17570Sstevel@tonic-gate
17580Sstevel@tonic-gate gethrestime(&now);
17590Sstevel@tonic-gate
17600Sstevel@tonic-gate /*
17610Sstevel@tonic-gate * Fast algorithm to convert nsec to usec -- see hrt2ts()
17620Sstevel@tonic-gate * in common/os/timers.c for a full description.
17630Sstevel@tonic-gate */
17640Sstevel@tonic-gate nsec = now.tv_nsec;
17650Sstevel@tonic-gate usec = nsec + (nsec >> 2);
17660Sstevel@tonic-gate usec = nsec + (usec >> 1);
17670Sstevel@tonic-gate usec = nsec + (usec >> 2);
17680Sstevel@tonic-gate usec = nsec + (usec >> 4);
17690Sstevel@tonic-gate usec = nsec - (usec >> 3);
17700Sstevel@tonic-gate usec = nsec + (usec >> 2);
17710Sstevel@tonic-gate usec = nsec + (usec >> 3);
17720Sstevel@tonic-gate usec = nsec + (usec >> 4);
17730Sstevel@tonic-gate usec = nsec + (usec >> 1);
17740Sstevel@tonic-gate usec = nsec + (usec >> 6);
17750Sstevel@tonic-gate usec = usec >> 10;
17760Sstevel@tonic-gate
17770Sstevel@tonic-gate mutex_enter(&ufs_iuniqtime_lock);
17780Sstevel@tonic-gate if (now.tv_sec > (time_t)iuniqtime.tv_sec ||
17790Sstevel@tonic-gate usec > iuniqtime.tv_usec) {
17800Sstevel@tonic-gate if (now.tv_sec < TIME32_MAX) {
17810Sstevel@tonic-gate iuniqtime.tv_sec = (time32_t)now.tv_sec;
17820Sstevel@tonic-gate iuniqtime.tv_usec = usec;
17830Sstevel@tonic-gate }
17840Sstevel@tonic-gate } else {
17850Sstevel@tonic-gate if (iuniqtime.tv_sec < TIME32_MAX) {
17860Sstevel@tonic-gate iuniqtime.tv_usec++;
17870Sstevel@tonic-gate /* Check for usec overflow */
17880Sstevel@tonic-gate if (iuniqtime.tv_usec >= MICROSEC) {
17890Sstevel@tonic-gate iuniqtime.tv_sec++;
17900Sstevel@tonic-gate iuniqtime.tv_usec = 0;
17910Sstevel@tonic-gate }
17920Sstevel@tonic-gate }
17930Sstevel@tonic-gate }
17940Sstevel@tonic-gate
17950Sstevel@tonic-gate if ((ip->i_flag & IACC) && !(ip->i_ufsvfs->vfs_noatime)) {
17960Sstevel@tonic-gate ip->i_atime = iuniqtime;
17970Sstevel@tonic-gate }
17980Sstevel@tonic-gate if (ip->i_flag & IUPD) {
17990Sstevel@tonic-gate ip->i_mtime = iuniqtime;
18000Sstevel@tonic-gate ip->i_flag |= IMODTIME;
18010Sstevel@tonic-gate }
18020Sstevel@tonic-gate if (ip->i_flag & ICHG) {
18030Sstevel@tonic-gate ip->i_diroff = 0;
18040Sstevel@tonic-gate ip->i_ctime = iuniqtime;
18050Sstevel@tonic-gate }
18060Sstevel@tonic-gate mutex_exit(&ufs_iuniqtime_lock);
18070Sstevel@tonic-gate }
18080Sstevel@tonic-gate
18090Sstevel@tonic-gate /*
18100Sstevel@tonic-gate * Update timestamps in inode.
18110Sstevel@tonic-gate */
18120Sstevel@tonic-gate void
ufs_itimes_nolock(struct inode * ip)18130Sstevel@tonic-gate ufs_itimes_nolock(struct inode *ip)
18140Sstevel@tonic-gate {
18150Sstevel@tonic-gate
18160Sstevel@tonic-gate /*
18170Sstevel@tonic-gate * if noatime is set and the inode access time is the only field that
18180Sstevel@tonic-gate * must be changed, exit immediately.
18190Sstevel@tonic-gate */
18200Sstevel@tonic-gate if (((ip->i_flag & (IUPD|IACC|ICHG)) == IACC) &&
18210Sstevel@tonic-gate (ip->i_ufsvfs->vfs_noatime)) {
18220Sstevel@tonic-gate return;
18230Sstevel@tonic-gate }
18240Sstevel@tonic-gate
18250Sstevel@tonic-gate if (ip->i_flag & (IUPD|IACC|ICHG)) {
18260Sstevel@tonic-gate if (ip->i_flag & ICHG)
18270Sstevel@tonic-gate ip->i_flag |= IMOD;
18280Sstevel@tonic-gate else
18290Sstevel@tonic-gate ip->i_flag |= IMODACC;
18300Sstevel@tonic-gate ufs_imark(ip);
18310Sstevel@tonic-gate ip->i_flag &= ~(IACC|IUPD|ICHG);
18320Sstevel@tonic-gate }
18330Sstevel@tonic-gate }
1834