10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 70Sstevel@tonic-gate * with the License. 80Sstevel@tonic-gate * 90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 110Sstevel@tonic-gate * See the License for the specific language governing permissions 120Sstevel@tonic-gate * and limitations under the License. 130Sstevel@tonic-gate * 140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 190Sstevel@tonic-gate * 200Sstevel@tonic-gate * CDDL HEADER END 210Sstevel@tonic-gate */ 220Sstevel@tonic-gate /* 230Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 280Sstevel@tonic-gate /* All Rights Reserved */ 290Sstevel@tonic-gate 300Sstevel@tonic-gate /* 310Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 320Sstevel@tonic-gate * The Regents of the University of California 330Sstevel@tonic-gate * All Rights Reserved 340Sstevel@tonic-gate * 350Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 360Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 370Sstevel@tonic-gate * contributors. 380Sstevel@tonic-gate */ 390Sstevel@tonic-gate 400Sstevel@tonic-gate 410Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 420Sstevel@tonic-gate 430Sstevel@tonic-gate #include <sys/types.h> 440Sstevel@tonic-gate #include <sys/t_lock.h> 450Sstevel@tonic-gate #include <sys/param.h> 460Sstevel@tonic-gate #include <sys/systm.h> 470Sstevel@tonic-gate #include <sys/uio.h> 480Sstevel@tonic-gate #include <sys/bitmap.h> 490Sstevel@tonic-gate #include <sys/signal.h> 500Sstevel@tonic-gate #include <sys/cred.h> 510Sstevel@tonic-gate #include <sys/user.h> 520Sstevel@tonic-gate #include <sys/vfs.h> 530Sstevel@tonic-gate #include <sys/stat.h> 540Sstevel@tonic-gate #include <sys/vnode.h> 550Sstevel@tonic-gate #include <sys/buf.h> 560Sstevel@tonic-gate #include <sys/proc.h> 570Sstevel@tonic-gate #include <sys/disp.h> 580Sstevel@tonic-gate #include <sys/dnlc.h> 590Sstevel@tonic-gate #include <sys/mode.h> 600Sstevel@tonic-gate #include <sys/cmn_err.h> 610Sstevel@tonic-gate #include <sys/kstat.h> 620Sstevel@tonic-gate #include <sys/acl.h> 630Sstevel@tonic-gate #include <sys/var.h> 640Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 650Sstevel@tonic-gate #include <sys/fs/ufs_fs.h> 660Sstevel@tonic-gate #include <sys/fs/ufs_trans.h> 670Sstevel@tonic-gate #include <sys/fs/ufs_acl.h> 680Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 690Sstevel@tonic-gate #include <sys/fs/ufs_quota.h> 700Sstevel@tonic-gate #include <sys/fs/ufs_log.h> 710Sstevel@tonic-gate #include <vm/hat.h> 720Sstevel@tonic-gate #include <vm/as.h> 730Sstevel@tonic-gate #include <vm/pvn.h> 740Sstevel@tonic-gate #include <vm/seg.h> 750Sstevel@tonic-gate #include <sys/swap.h> 760Sstevel@tonic-gate #include <sys/cpuvar.h> 770Sstevel@tonic-gate #include <sys/sysmacros.h> 780Sstevel@tonic-gate #include <sys/errno.h> 790Sstevel@tonic-gate #include <sys/kmem.h> 800Sstevel@tonic-gate #include <sys/debug.h> 810Sstevel@tonic-gate #include <fs/fs_subr.h> 820Sstevel@tonic-gate #include <sys/policy.h> 830Sstevel@tonic-gate 840Sstevel@tonic-gate struct kmem_cache *inode_cache; /* cache of free inodes */ 850Sstevel@tonic-gate 860Sstevel@tonic-gate /* UFS Inode Cache Stats -- Not protected */ 870Sstevel@tonic-gate struct instats ins = { 880Sstevel@tonic-gate { "size", KSTAT_DATA_ULONG }, 890Sstevel@tonic-gate { "maxsize", KSTAT_DATA_ULONG }, 900Sstevel@tonic-gate { "hits", KSTAT_DATA_ULONG }, 910Sstevel@tonic-gate { "misses", KSTAT_DATA_ULONG }, 920Sstevel@tonic-gate { "kmem allocs", KSTAT_DATA_ULONG }, 930Sstevel@tonic-gate { "kmem frees", KSTAT_DATA_ULONG }, 940Sstevel@tonic-gate { "maxsize reached", KSTAT_DATA_ULONG }, 950Sstevel@tonic-gate { "puts at frontlist", KSTAT_DATA_ULONG }, 960Sstevel@tonic-gate { "puts at backlist", KSTAT_DATA_ULONG }, 970Sstevel@tonic-gate { "queues to free", KSTAT_DATA_ULONG }, 980Sstevel@tonic-gate { "scans", KSTAT_DATA_ULONG }, 990Sstevel@tonic-gate { "thread idles", KSTAT_DATA_ULONG }, 1000Sstevel@tonic-gate { "lookup idles", KSTAT_DATA_ULONG }, 1010Sstevel@tonic-gate { "vget idles", KSTAT_DATA_ULONG }, 1020Sstevel@tonic-gate { "cache allocs", KSTAT_DATA_ULONG }, 1030Sstevel@tonic-gate { "cache frees", KSTAT_DATA_ULONG }, 1040Sstevel@tonic-gate { "pushes at close", KSTAT_DATA_ULONG } 1050Sstevel@tonic-gate }; 1060Sstevel@tonic-gate 1070Sstevel@tonic-gate /* kstat data */ 1080Sstevel@tonic-gate static kstat_t *ufs_inode_kstat = NULL; 1090Sstevel@tonic-gate 1100Sstevel@tonic-gate union ihead *ihead; /* inode LRU cache, Chris Maltby */ 1110Sstevel@tonic-gate kmutex_t *ih_lock; /* protect inode cache hash table */ 1120Sstevel@tonic-gate static int ino_hashlen = 4; /* desired average hash chain length */ 1130Sstevel@tonic-gate int inohsz; /* number of buckets in the hash table */ 1140Sstevel@tonic-gate 1150Sstevel@tonic-gate kmutex_t ufs_scan_lock; /* stop racing multiple ufs_scan_inodes() */ 1160Sstevel@tonic-gate kmutex_t ufs_iuniqtime_lock; /* protect iuniqtime */ 1170Sstevel@tonic-gate kmutex_t ufsvfs_mutex; 1180Sstevel@tonic-gate struct ufsvfs *oldufsvfslist, *ufsvfslist; 1190Sstevel@tonic-gate 1200Sstevel@tonic-gate /* 1210Sstevel@tonic-gate * time to wait after ufsvfsp->vfs_iotstamp before declaring that no 1220Sstevel@tonic-gate * I/Os are going on. 1230Sstevel@tonic-gate */ 1240Sstevel@tonic-gate clock_t ufs_iowait; 1250Sstevel@tonic-gate 1260Sstevel@tonic-gate /* 1270Sstevel@tonic-gate * the threads that process idle inodes and free (deleted) inodes 1280Sstevel@tonic-gate * have high water marks that are set in ufsinit(). 1290Sstevel@tonic-gate * These values but can be no less then the minimum shown below 1300Sstevel@tonic-gate */ 1310Sstevel@tonic-gate int ufs_idle_max; /* # of allowable idle inodes */ 1320Sstevel@tonic-gate ulong_t ufs_inode_max; /* hard limit of allowable idle inodes */ 1330Sstevel@tonic-gate #define UFS_IDLE_MAX (16) /* min # of allowable idle inodes */ 1340Sstevel@tonic-gate 1350Sstevel@tonic-gate /* 1360Sstevel@tonic-gate * Tunables for ufs write throttling. 1370Sstevel@tonic-gate * These are validated in ufs_iinit() since improper settings 1380Sstevel@tonic-gate * can lead to filesystem hangs. 1390Sstevel@tonic-gate */ 1400Sstevel@tonic-gate #define UFS_HW_DEFAULT (16 * 1024 * 1024) 1410Sstevel@tonic-gate #define UFS_LW_DEFAULT (8 * 1024 * 1024) 1420Sstevel@tonic-gate int ufs_HW = UFS_HW_DEFAULT; 1430Sstevel@tonic-gate int ufs_LW = UFS_LW_DEFAULT; 1440Sstevel@tonic-gate 1450Sstevel@tonic-gate static void ihinit(void); 1460Sstevel@tonic-gate extern int hash2ints(int, int); 1470Sstevel@tonic-gate 1480Sstevel@tonic-gate static int ufs_iget_internal(struct vfs *, ino_t, struct inode **, 1490Sstevel@tonic-gate struct cred *, int); 1500Sstevel@tonic-gate 1510Sstevel@tonic-gate /* ARGSUSED */ 1520Sstevel@tonic-gate static int 1530Sstevel@tonic-gate ufs_inode_kstat_update(kstat_t *ksp, int rw) 1540Sstevel@tonic-gate { 1550Sstevel@tonic-gate if (rw == KSTAT_WRITE) 1560Sstevel@tonic-gate return (EACCES); 1570Sstevel@tonic-gate 1580Sstevel@tonic-gate ins.in_malloc.value.ul = (ulong_t)kmem_cache_stat(inode_cache, 1590Sstevel@tonic-gate "slab_alloc"); 1600Sstevel@tonic-gate ins.in_mfree.value.ul = (ulong_t)kmem_cache_stat(inode_cache, 1610Sstevel@tonic-gate "slab_free"); 1620Sstevel@tonic-gate ins.in_kcalloc.value.ul = (ulong_t)kmem_cache_stat(inode_cache, 1630Sstevel@tonic-gate "alloc"); 1640Sstevel@tonic-gate ins.in_kcfree.value.ul = (ulong_t)kmem_cache_stat(inode_cache, 1650Sstevel@tonic-gate "free"); 1660Sstevel@tonic-gate ins.in_size.value.ul = (ulong_t)kmem_cache_stat(inode_cache, 1670Sstevel@tonic-gate "buf_inuse"); 1680Sstevel@tonic-gate ins.in_maxreached.value.ul = (ulong_t)kmem_cache_stat(inode_cache, 1690Sstevel@tonic-gate "buf_max"); 1700Sstevel@tonic-gate ins.in_misses.value.ul = ins.in_kcalloc.value.ul; 1710Sstevel@tonic-gate 1720Sstevel@tonic-gate return (0); 1730Sstevel@tonic-gate } 1740Sstevel@tonic-gate 1750Sstevel@tonic-gate void 1760Sstevel@tonic-gate ufs_iinit(void) 1770Sstevel@tonic-gate { 1780Sstevel@tonic-gate /* 1790Sstevel@tonic-gate * Validate that ufs_HW > ufs_LW. 1800Sstevel@tonic-gate * The default values for these two tunables have been increased. 1810Sstevel@tonic-gate * There is now a range of values for ufs_HW that used to be 1820Sstevel@tonic-gate * legal on previous Solaris versions but no longer is now. 1830Sstevel@tonic-gate * Upgrading a machine which has an /etc/system setting for ufs_HW 1840Sstevel@tonic-gate * from that range can lead to filesystem hangs unless the values 1850Sstevel@tonic-gate * are checked here. 1860Sstevel@tonic-gate */ 1870Sstevel@tonic-gate if (ufs_HW <= ufs_LW) { 1880Sstevel@tonic-gate cmn_err(CE_WARN, 1890Sstevel@tonic-gate "ufs_HW (%d) <= ufs_LW (%d). Check /etc/system.", 1900Sstevel@tonic-gate ufs_HW, ufs_LW); 1910Sstevel@tonic-gate ufs_LW = UFS_LW_DEFAULT; 1920Sstevel@tonic-gate ufs_HW = UFS_HW_DEFAULT; 1930Sstevel@tonic-gate cmn_err(CE_CONT, "using defaults, ufs_HW = %d, ufs_LW = %d\n", 1940Sstevel@tonic-gate ufs_HW, ufs_LW); 1950Sstevel@tonic-gate } 1960Sstevel@tonic-gate 1970Sstevel@tonic-gate /* 1980Sstevel@tonic-gate * Adjust the tunable `ufs_ninode' to a reasonable value 1990Sstevel@tonic-gate */ 2000Sstevel@tonic-gate if (ufs_ninode <= 0) 2010Sstevel@tonic-gate ufs_ninode = ncsize; 2020Sstevel@tonic-gate if (ufs_inode_max == 0) 2030Sstevel@tonic-gate ufs_inode_max = (ulong_t)((kmem_maxavail() >> 2) / 2040Sstevel@tonic-gate sizeof (struct inode)); 2050Sstevel@tonic-gate if (ufs_ninode > ufs_inode_max || (ufs_ninode == 0 && ncsize == 0)) { 2060Sstevel@tonic-gate cmn_err(CE_NOTE, "setting ufs_ninode to max value of %ld", 2070Sstevel@tonic-gate ufs_inode_max); 2080Sstevel@tonic-gate ufs_ninode = ufs_inode_max; 2090Sstevel@tonic-gate } 2100Sstevel@tonic-gate /* 2110Sstevel@tonic-gate * Wait till third call of ufs_update to declare that no I/Os are 2120Sstevel@tonic-gate * going on. This allows deferred access times to be flushed to disk. 2130Sstevel@tonic-gate */ 2140Sstevel@tonic-gate ufs_iowait = v.v_autoup * hz * 2; 2150Sstevel@tonic-gate 2160Sstevel@tonic-gate /* 2170Sstevel@tonic-gate * idle thread runs when 25% of ufs_ninode entries are on the queue 2180Sstevel@tonic-gate */ 2190Sstevel@tonic-gate if (ufs_idle_max == 0) 2200Sstevel@tonic-gate ufs_idle_max = ufs_ninode >> 2; 2210Sstevel@tonic-gate if (ufs_idle_max < UFS_IDLE_MAX) 2220Sstevel@tonic-gate ufs_idle_max = UFS_IDLE_MAX; 2230Sstevel@tonic-gate if (ufs_idle_max > ufs_ninode) 2240Sstevel@tonic-gate ufs_idle_max = ufs_ninode; 2250Sstevel@tonic-gate /* 2260Sstevel@tonic-gate * This is really a misnomer, it is ufs_queue_init 2270Sstevel@tonic-gate */ 2280Sstevel@tonic-gate ufs_thread_init(&ufs_idle_q, ufs_idle_max); 2290Sstevel@tonic-gate ufs_thread_start(&ufs_idle_q, ufs_thread_idle, NULL); 2300Sstevel@tonic-gate 2310Sstevel@tonic-gate /* 2320Sstevel@tonic-gate * global hlock thread 2330Sstevel@tonic-gate */ 2340Sstevel@tonic-gate ufs_thread_init(&ufs_hlock, 1); 2350Sstevel@tonic-gate ufs_thread_start(&ufs_hlock, ufs_thread_hlock, NULL); 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate ihinit(); 2380Sstevel@tonic-gate qtinit(); 2390Sstevel@tonic-gate ins.in_maxsize.value.ul = ufs_ninode; 2400Sstevel@tonic-gate if ((ufs_inode_kstat = kstat_create("ufs", 0, "inode_cache", "ufs", 2410Sstevel@tonic-gate KSTAT_TYPE_NAMED, sizeof (ins) / sizeof (kstat_named_t), 2420Sstevel@tonic-gate KSTAT_FLAG_VIRTUAL)) != NULL) { 2430Sstevel@tonic-gate ufs_inode_kstat->ks_data = (void *)&ins; 2440Sstevel@tonic-gate ufs_inode_kstat->ks_update = ufs_inode_kstat_update; 2450Sstevel@tonic-gate kstat_install(ufs_inode_kstat); 2460Sstevel@tonic-gate } 2470Sstevel@tonic-gate ufsfx_init(); /* fix-on-panic initialization */ 2480Sstevel@tonic-gate si_cache_init(); 2490Sstevel@tonic-gate ufs_directio_init(); 2500Sstevel@tonic-gate lufs_init(); 2510Sstevel@tonic-gate mutex_init(&ufs_iuniqtime_lock, NULL, MUTEX_DEFAULT, NULL); 2520Sstevel@tonic-gate } 2530Sstevel@tonic-gate 2540Sstevel@tonic-gate /* ARGSUSED */ 2550Sstevel@tonic-gate static int 2560Sstevel@tonic-gate ufs_inode_cache_constructor(void *buf, void *cdrarg, int kmflags) 2570Sstevel@tonic-gate { 2580Sstevel@tonic-gate struct inode *ip = buf; 2590Sstevel@tonic-gate struct vnode *vp; 2600Sstevel@tonic-gate 2610Sstevel@tonic-gate rw_init(&ip->i_rwlock, NULL, RW_DEFAULT, NULL); 2620Sstevel@tonic-gate rw_init(&ip->i_contents, NULL, RW_DEFAULT, NULL); 2630Sstevel@tonic-gate mutex_init(&ip->i_tlock, NULL, MUTEX_DEFAULT, NULL); 2640Sstevel@tonic-gate dnlc_dir_init(&ip->i_danchor); 2650Sstevel@tonic-gate 2660Sstevel@tonic-gate cv_init(&ip->i_wrcv, NULL, CV_DRIVER, NULL); 2670Sstevel@tonic-gate 2680Sstevel@tonic-gate vp = vn_alloc(KM_SLEEP); 2690Sstevel@tonic-gate ip->i_vnode = vp; 2700Sstevel@tonic-gate 2710Sstevel@tonic-gate vn_setops(vp, ufs_vnodeops); 2720Sstevel@tonic-gate vp->v_data = (caddr_t)ip; 2730Sstevel@tonic-gate 2740Sstevel@tonic-gate return (0); 2750Sstevel@tonic-gate } 2760Sstevel@tonic-gate 2770Sstevel@tonic-gate /* ARGSUSED */ 2780Sstevel@tonic-gate static void 2790Sstevel@tonic-gate ufs_inode_cache_destructor(void *buf, void *cdrarg) 2800Sstevel@tonic-gate { 2810Sstevel@tonic-gate struct inode *ip = buf; 2820Sstevel@tonic-gate struct vnode *vp; 2830Sstevel@tonic-gate 2840Sstevel@tonic-gate vp = ITOV(ip); 2850Sstevel@tonic-gate 2860Sstevel@tonic-gate rw_destroy(&ip->i_rwlock); 2870Sstevel@tonic-gate rw_destroy(&ip->i_contents); 2880Sstevel@tonic-gate 2890Sstevel@tonic-gate mutex_destroy(&ip->i_tlock); 2900Sstevel@tonic-gate if (vp->v_type == VDIR) { 2910Sstevel@tonic-gate dnlc_dir_fini(&ip->i_danchor); 2920Sstevel@tonic-gate } 2930Sstevel@tonic-gate 2940Sstevel@tonic-gate cv_destroy(&ip->i_wrcv); 2950Sstevel@tonic-gate 2960Sstevel@tonic-gate vn_free(vp); 2970Sstevel@tonic-gate } 2980Sstevel@tonic-gate 2990Sstevel@tonic-gate /* 3000Sstevel@tonic-gate * Initialize hash links for inodes 3010Sstevel@tonic-gate * and build inode free list. 3020Sstevel@tonic-gate */ 3030Sstevel@tonic-gate void 3040Sstevel@tonic-gate ihinit(void) 3050Sstevel@tonic-gate { 3060Sstevel@tonic-gate int i; 3070Sstevel@tonic-gate union ihead *ih = ihead; 3080Sstevel@tonic-gate 3090Sstevel@tonic-gate mutex_init(&ufs_scan_lock, NULL, MUTEX_DEFAULT, NULL); 3100Sstevel@tonic-gate 3110Sstevel@tonic-gate inohsz = 1 << highbit(ufs_ninode / ino_hashlen); 3120Sstevel@tonic-gate ihead = kmem_zalloc(inohsz * sizeof (union ihead), KM_SLEEP); 3130Sstevel@tonic-gate ih_lock = kmem_zalloc(inohsz * sizeof (kmutex_t), KM_SLEEP); 3140Sstevel@tonic-gate 3150Sstevel@tonic-gate for (i = 0, ih = ihead; i < inohsz; i++, ih++) { 3160Sstevel@tonic-gate ih->ih_head[0] = ih; 3170Sstevel@tonic-gate ih->ih_head[1] = ih; 3180Sstevel@tonic-gate mutex_init(&ih_lock[i], NULL, MUTEX_DEFAULT, NULL); 3190Sstevel@tonic-gate } 3200Sstevel@tonic-gate inode_cache = kmem_cache_create("ufs_inode_cache", 3210Sstevel@tonic-gate sizeof (struct inode), 0, ufs_inode_cache_constructor, 3220Sstevel@tonic-gate ufs_inode_cache_destructor, ufs_inode_cache_reclaim, 3230Sstevel@tonic-gate NULL, NULL, 0); 3240Sstevel@tonic-gate } 3250Sstevel@tonic-gate 3260Sstevel@tonic-gate /* 3270Sstevel@tonic-gate * Free an inode structure 3280Sstevel@tonic-gate */ 3290Sstevel@tonic-gate void 3300Sstevel@tonic-gate ufs_free_inode(struct inode *ip) 3310Sstevel@tonic-gate { 3320Sstevel@tonic-gate vn_invalid(ITOV(ip)); 3330Sstevel@tonic-gate kmem_cache_free(inode_cache, ip); 3340Sstevel@tonic-gate } 3350Sstevel@tonic-gate 3360Sstevel@tonic-gate /* 3370Sstevel@tonic-gate * Allocate an inode structure 3380Sstevel@tonic-gate */ 3390Sstevel@tonic-gate struct inode * 3400Sstevel@tonic-gate ufs_alloc_inode(ufsvfs_t *ufsvfsp, ino_t ino) 3410Sstevel@tonic-gate { 3420Sstevel@tonic-gate struct inode *ip; 3430Sstevel@tonic-gate vnode_t *vp; 3440Sstevel@tonic-gate 3450Sstevel@tonic-gate ip = kmem_cache_alloc(inode_cache, KM_SLEEP); 3460Sstevel@tonic-gate /* 3470Sstevel@tonic-gate * at this point we have a newly allocated inode 3480Sstevel@tonic-gate */ 3490Sstevel@tonic-gate ip->i_freef = ip; 3500Sstevel@tonic-gate ip->i_freeb = ip; 3510Sstevel@tonic-gate ip->i_flag = IREF; 3520Sstevel@tonic-gate ip->i_seq = 0xFF; /* Unique initial value */ 3530Sstevel@tonic-gate ip->i_dev = ufsvfsp->vfs_dev; 3540Sstevel@tonic-gate ip->i_ufsvfs = ufsvfsp; 3550Sstevel@tonic-gate ip->i_devvp = ufsvfsp->vfs_devvp; 3560Sstevel@tonic-gate ip->i_number = ino; 3570Sstevel@tonic-gate ip->i_diroff = 0; 3580Sstevel@tonic-gate ip->i_nextr = 0; 3590Sstevel@tonic-gate ip->i_map = NULL; 3600Sstevel@tonic-gate ip->i_rdev = 0; 3610Sstevel@tonic-gate ip->i_writes = 0; 3620Sstevel@tonic-gate ip->i_mode = 0; 3630Sstevel@tonic-gate ip->i_delaylen = 0; 3640Sstevel@tonic-gate ip->i_delayoff = 0; 3650Sstevel@tonic-gate ip->i_nextrio = 0; 3660Sstevel@tonic-gate ip->i_ufs_acl = NULL; 3670Sstevel@tonic-gate ip->i_cflags = 0; 3680Sstevel@tonic-gate ip->i_mapcnt = 0; 3690Sstevel@tonic-gate ip->i_dquot = NULL; 3700Sstevel@tonic-gate ip->i_cachedir = 1; 3710Sstevel@tonic-gate ip->i_writer = NULL; 3720Sstevel@tonic-gate 3730Sstevel@tonic-gate /* 3740Sstevel@tonic-gate * the vnode for this inode was allocated by the constructor 3750Sstevel@tonic-gate */ 3760Sstevel@tonic-gate vp = ITOV(ip); 3770Sstevel@tonic-gate vn_reinit(vp); 3780Sstevel@tonic-gate if (ino == (ino_t)UFSROOTINO) 3790Sstevel@tonic-gate vp->v_flag = VROOT; 3800Sstevel@tonic-gate vp->v_vfsp = ufsvfsp->vfs_vfs; 3810Sstevel@tonic-gate vn_exists(vp); 3820Sstevel@tonic-gate return (ip); 3830Sstevel@tonic-gate } 3840Sstevel@tonic-gate 3850Sstevel@tonic-gate /* 3860Sstevel@tonic-gate * Look up an inode by device, inumber. If it is in core (in the 3870Sstevel@tonic-gate * inode structure), honor the locking protocol. If it is not in 3880Sstevel@tonic-gate * core, read it in from the specified device after freeing any pages. 3890Sstevel@tonic-gate * In all cases, a pointer to a VN_HELD inode structure is returned. 3900Sstevel@tonic-gate */ 3910Sstevel@tonic-gate int 3920Sstevel@tonic-gate ufs_iget(struct vfs *vfsp, ino_t ino, struct inode **ipp, struct cred *cr) 3930Sstevel@tonic-gate { 3940Sstevel@tonic-gate return (ufs_iget_internal(vfsp, ino, ipp, cr, 0)); 3950Sstevel@tonic-gate } 3960Sstevel@tonic-gate 3970Sstevel@tonic-gate /* 3980Sstevel@tonic-gate * A version of ufs_iget which returns only allocated, linked inodes. 3990Sstevel@tonic-gate * This is appropriate for any callers who do not expect a free inode. 4000Sstevel@tonic-gate */ 4010Sstevel@tonic-gate int 4020Sstevel@tonic-gate ufs_iget_alloced(struct vfs *vfsp, ino_t ino, struct inode **ipp, 4030Sstevel@tonic-gate struct cred *cr) 4040Sstevel@tonic-gate { 4050Sstevel@tonic-gate return (ufs_iget_internal(vfsp, ino, ipp, cr, 1)); 4060Sstevel@tonic-gate } 4070Sstevel@tonic-gate 4080Sstevel@tonic-gate /* 4090Sstevel@tonic-gate * Set vnode attributes based on v_type, this should be called whenever 4100Sstevel@tonic-gate * an inode's i_mode is changed. 4110Sstevel@tonic-gate */ 4120Sstevel@tonic-gate void 4130Sstevel@tonic-gate ufs_reset_vnode(vnode_t *vp) 4140Sstevel@tonic-gate { 4150Sstevel@tonic-gate /* 4160Sstevel@tonic-gate * an old DBE hack 4170Sstevel@tonic-gate */ 4180Sstevel@tonic-gate if ((VTOI(vp)->i_mode & (ISVTX | IEXEC | IFDIR)) == ISVTX) 4190Sstevel@tonic-gate vp->v_flag |= VSWAPLIKE; 4200Sstevel@tonic-gate else 4210Sstevel@tonic-gate vp->v_flag &= ~VSWAPLIKE; 4220Sstevel@tonic-gate 4230Sstevel@tonic-gate /* 4240Sstevel@tonic-gate * if not swap like and it's just a regular file, we want 4250Sstevel@tonic-gate * to maintain the vnode's pages sorted by clean/modified 4260Sstevel@tonic-gate * for faster sync'ing to disk 4270Sstevel@tonic-gate */ 4280Sstevel@tonic-gate if (vp->v_type == VREG) 4290Sstevel@tonic-gate vp->v_flag |= VMODSORT; 4300Sstevel@tonic-gate else 4310Sstevel@tonic-gate vp->v_flag &= ~VMODSORT; 4320Sstevel@tonic-gate 4330Sstevel@tonic-gate /* 4340Sstevel@tonic-gate * Is this an attribute hidden dir? 4350Sstevel@tonic-gate */ 4360Sstevel@tonic-gate if ((VTOI(vp)->i_mode & IFMT) == IFATTRDIR) 4370Sstevel@tonic-gate vp->v_flag |= V_XATTRDIR; 4380Sstevel@tonic-gate else 4390Sstevel@tonic-gate vp->v_flag &= ~V_XATTRDIR; 4400Sstevel@tonic-gate } 4410Sstevel@tonic-gate 4420Sstevel@tonic-gate /* 4430Sstevel@tonic-gate * Shared implementation of ufs_iget and ufs_iget_alloced. The 'validate' 4440Sstevel@tonic-gate * flag is used to distinguish the two; when true, we validate that the inode 4450Sstevel@tonic-gate * being retrieved looks like a linked and allocated inode. 4460Sstevel@tonic-gate */ 4470Sstevel@tonic-gate /* ARGSUSED */ 4480Sstevel@tonic-gate static int 4490Sstevel@tonic-gate ufs_iget_internal(struct vfs *vfsp, ino_t ino, struct inode **ipp, 4500Sstevel@tonic-gate struct cred *cr, int validate) 4510Sstevel@tonic-gate { 4520Sstevel@tonic-gate struct inode *ip, *sp; 4530Sstevel@tonic-gate union ihead *ih; 4540Sstevel@tonic-gate kmutex_t *ihm; 4550Sstevel@tonic-gate struct buf *bp; 4560Sstevel@tonic-gate struct dinode *dp; 4570Sstevel@tonic-gate struct vnode *vp; 4580Sstevel@tonic-gate extern vfs_t EIO_vfs; 4590Sstevel@tonic-gate int error; 4600Sstevel@tonic-gate int ftype; /* XXX - Remove later on */ 4610Sstevel@tonic-gate dev_t vfs_dev; 4620Sstevel@tonic-gate struct ufsvfs *ufsvfsp; 4630Sstevel@tonic-gate struct fs *fs; 4640Sstevel@tonic-gate int hno; 4650Sstevel@tonic-gate daddr_t bno; 4660Sstevel@tonic-gate ulong_t ioff; 4670Sstevel@tonic-gate 4680Sstevel@tonic-gate CPU_STATS_ADD_K(sys, ufsiget, 1); 4690Sstevel@tonic-gate 4700Sstevel@tonic-gate /* 4710Sstevel@tonic-gate * Lookup inode in cache. 4720Sstevel@tonic-gate */ 4730Sstevel@tonic-gate vfs_dev = vfsp->vfs_dev; 4740Sstevel@tonic-gate hno = INOHASH(ino); 4750Sstevel@tonic-gate ih = &ihead[hno]; 4760Sstevel@tonic-gate ihm = &ih_lock[hno]; 4770Sstevel@tonic-gate 4780Sstevel@tonic-gate again: 4790Sstevel@tonic-gate mutex_enter(ihm); 4800Sstevel@tonic-gate for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) { 4810Sstevel@tonic-gate if (ino != ip->i_number || vfs_dev != ip->i_dev || 4820Sstevel@tonic-gate (ip->i_flag & ISTALE)) 4830Sstevel@tonic-gate continue; 4840Sstevel@tonic-gate 4850Sstevel@tonic-gate /* 4860Sstevel@tonic-gate * Found the interesting inode; hold it and drop the cache lock 4870Sstevel@tonic-gate */ 4880Sstevel@tonic-gate vp = ITOV(ip); /* for locknest */ 4890Sstevel@tonic-gate VN_HOLD(vp); 4900Sstevel@tonic-gate mutex_exit(ihm); 4910Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_READER); 4920Sstevel@tonic-gate 4930Sstevel@tonic-gate /* 4940Sstevel@tonic-gate * if necessary, remove from idle list 4950Sstevel@tonic-gate */ 4960Sstevel@tonic-gate if ((ip->i_flag & IREF) == 0) { 4970Sstevel@tonic-gate if (ufs_rmidle(ip)) 4980Sstevel@tonic-gate VN_RELE(vp); 4990Sstevel@tonic-gate } 5000Sstevel@tonic-gate 5010Sstevel@tonic-gate /* 5020Sstevel@tonic-gate * Could the inode be read from disk? 5030Sstevel@tonic-gate */ 5040Sstevel@tonic-gate if (ip->i_flag & ISTALE) { 5050Sstevel@tonic-gate rw_exit(&ip->i_contents); 5060Sstevel@tonic-gate VN_RELE(vp); 5070Sstevel@tonic-gate goto again; 5080Sstevel@tonic-gate } 5090Sstevel@tonic-gate 5100Sstevel@tonic-gate ins.in_hits.value.ul++; 5110Sstevel@tonic-gate *ipp = ip; 5120Sstevel@tonic-gate 5130Sstevel@tonic-gate /* 5140Sstevel@tonic-gate * Reset the vnode's attribute flags 5150Sstevel@tonic-gate */ 5160Sstevel@tonic-gate mutex_enter(&vp->v_lock); 5170Sstevel@tonic-gate ufs_reset_vnode(vp); 5180Sstevel@tonic-gate mutex_exit(&vp->v_lock); 5190Sstevel@tonic-gate 5200Sstevel@tonic-gate rw_exit(&ip->i_contents); 5210Sstevel@tonic-gate 5220Sstevel@tonic-gate return (0); 5230Sstevel@tonic-gate } 5240Sstevel@tonic-gate mutex_exit(ihm); 5250Sstevel@tonic-gate 5260Sstevel@tonic-gate /* 5270Sstevel@tonic-gate * Inode was not in cache. 5280Sstevel@tonic-gate * 5290Sstevel@tonic-gate * Allocate a new entry 5300Sstevel@tonic-gate */ 5310Sstevel@tonic-gate ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 5320Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 5330Sstevel@tonic-gate 5340Sstevel@tonic-gate ip = ufs_alloc_inode(ufsvfsp, ino); 5350Sstevel@tonic-gate vp = ITOV(ip); 5360Sstevel@tonic-gate 5370Sstevel@tonic-gate bno = fsbtodb(fs, itod(fs, ino)); 5380Sstevel@tonic-gate ioff = (sizeof (struct dinode)) * (itoo(fs, ino)); 5390Sstevel@tonic-gate ip->i_doff = (offset_t)ioff + ldbtob(bno); 5400Sstevel@tonic-gate 5410Sstevel@tonic-gate /* 5420Sstevel@tonic-gate * put a place holder in the cache (if not already there) 5430Sstevel@tonic-gate */ 5440Sstevel@tonic-gate mutex_enter(ihm); 5450Sstevel@tonic-gate for (sp = ih->ih_chain[0]; sp != (struct inode *)ih; sp = sp->i_forw) 5460Sstevel@tonic-gate if (ino == sp->i_number && vfs_dev == sp->i_dev && 5470Sstevel@tonic-gate ((sp->i_flag & ISTALE) == 0)) { 5480Sstevel@tonic-gate mutex_exit(ihm); 5490Sstevel@tonic-gate ufs_free_inode(ip); 5500Sstevel@tonic-gate goto again; 5510Sstevel@tonic-gate } 5520Sstevel@tonic-gate /* 5530Sstevel@tonic-gate * It would be nice to ASSERT(RW_READ_HELD(&ufsvfsp->vfs_dqrwlock)) 5540Sstevel@tonic-gate * here, but if we do, then shadow inode allocations panic the 5550Sstevel@tonic-gate * system. We don't have to hold vfs_dqrwlock for shadow inodes 5560Sstevel@tonic-gate * and the ufs_iget() parameters don't tell us what we are getting 5570Sstevel@tonic-gate * so we have no way of knowing this is a ufs_iget() call from 5580Sstevel@tonic-gate * a ufs_ialloc() call for a shadow inode. 5590Sstevel@tonic-gate */ 5600Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 5610Sstevel@tonic-gate insque(ip, ih); 5620Sstevel@tonic-gate mutex_exit(ihm); 5630Sstevel@tonic-gate /* 5640Sstevel@tonic-gate * read the dinode 5650Sstevel@tonic-gate */ 5660Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, bno, (int)fs->fs_bsize); 5670Sstevel@tonic-gate 5680Sstevel@tonic-gate /* 5690Sstevel@tonic-gate * Check I/O errors 5700Sstevel@tonic-gate */ 5710Sstevel@tonic-gate error = ((bp->b_flags & B_ERROR) ? geterror(bp) : 0); 5720Sstevel@tonic-gate if (error) { 5730Sstevel@tonic-gate brelse(bp); 5740Sstevel@tonic-gate ip->i_flag |= ISTALE; /* in case someone is looking it up */ 5750Sstevel@tonic-gate rw_exit(&ip->i_contents); 5760Sstevel@tonic-gate vp->v_vfsp = &EIO_vfs; 5770Sstevel@tonic-gate VN_RELE(vp); 5780Sstevel@tonic-gate return (error); 5790Sstevel@tonic-gate } 5800Sstevel@tonic-gate /* 5810Sstevel@tonic-gate * initialize the inode's dinode 5820Sstevel@tonic-gate */ 5830Sstevel@tonic-gate dp = (struct dinode *)(ioff + bp->b_un.b_addr); 5840Sstevel@tonic-gate ip->i_ic = dp->di_ic; /* structure assignment */ 5850Sstevel@tonic-gate brelse(bp); 5860Sstevel@tonic-gate 5870Sstevel@tonic-gate /* 5880Sstevel@tonic-gate * Maintain compatibility with Solaris 1.x UFS 5890Sstevel@tonic-gate */ 5900Sstevel@tonic-gate if (ip->i_suid != UID_LONG) 5910Sstevel@tonic-gate ip->i_uid = ip->i_suid; 5920Sstevel@tonic-gate if (ip->i_sgid != GID_LONG) 5930Sstevel@tonic-gate ip->i_gid = ip->i_sgid; 5940Sstevel@tonic-gate 5950Sstevel@tonic-gate ftype = ip->i_mode & IFMT; 5960Sstevel@tonic-gate if (ftype == IFBLK || ftype == IFCHR) { 5970Sstevel@tonic-gate dev_t dv; 5980Sstevel@tonic-gate uint_t top16 = ip->i_ordev & 0xffff0000u; 5990Sstevel@tonic-gate 6000Sstevel@tonic-gate if (top16 == 0 || top16 == 0xffff0000u) 6010Sstevel@tonic-gate dv = expdev(ip->i_ordev); 6020Sstevel@tonic-gate else 6030Sstevel@tonic-gate dv = expldev(ip->i_ordev); 6040Sstevel@tonic-gate vp->v_rdev = ip->i_rdev = dv; 6050Sstevel@tonic-gate } 6060Sstevel@tonic-gate 6070Sstevel@tonic-gate /* 6080Sstevel@tonic-gate * if our caller only expects allocated inodes, verify that 6090Sstevel@tonic-gate * this inode looks good; throw it out if it's bad. 6100Sstevel@tonic-gate */ 6110Sstevel@tonic-gate if (validate) { 6120Sstevel@tonic-gate if ((ftype == 0) || (ip->i_nlink <= 0)) { 6130Sstevel@tonic-gate ip->i_flag |= ISTALE; 6140Sstevel@tonic-gate rw_exit(&ip->i_contents); 6150Sstevel@tonic-gate vp->v_vfsp = &EIO_vfs; 6160Sstevel@tonic-gate VN_RELE(vp); 6170Sstevel@tonic-gate cmn_err(CE_NOTE, 6180Sstevel@tonic-gate "%s: unexpected free inode %d, run fsck(1M)%s", 6190Sstevel@tonic-gate fs->fs_fsmnt, (int)ino, 6200Sstevel@tonic-gate (TRANS_ISTRANS(ufsvfsp) ? " -o f" : "")); 6210Sstevel@tonic-gate return (EIO); 6220Sstevel@tonic-gate } 6230Sstevel@tonic-gate } 6240Sstevel@tonic-gate 6250Sstevel@tonic-gate /* 6260Sstevel@tonic-gate * finish initializing the vnode 6270Sstevel@tonic-gate */ 6280Sstevel@tonic-gate vp->v_type = IFTOVT((mode_t)ip->i_mode); 6290Sstevel@tonic-gate 6300Sstevel@tonic-gate ufs_reset_vnode(vp); 6310Sstevel@tonic-gate 6320Sstevel@tonic-gate /* 6330Sstevel@tonic-gate * read the shadow 6340Sstevel@tonic-gate */ 6350Sstevel@tonic-gate if (ftype != 0 && ip->i_shadow != 0) { 6360Sstevel@tonic-gate if ((error = ufs_si_load(ip, cr)) != 0) { 6370Sstevel@tonic-gate ip->i_flag |= ISTALE; 6380Sstevel@tonic-gate ip->i_ufs_acl = NULL; 6390Sstevel@tonic-gate rw_exit(&ip->i_contents); 6400Sstevel@tonic-gate vp->v_vfsp = &EIO_vfs; 6410Sstevel@tonic-gate VN_RELE(vp); 6420Sstevel@tonic-gate return (error); 6430Sstevel@tonic-gate } 6440Sstevel@tonic-gate } 6450Sstevel@tonic-gate 6460Sstevel@tonic-gate /* 6470Sstevel@tonic-gate * Only attach quota information if the inode has a type and if 6480Sstevel@tonic-gate * that type is not a shadow inode. 6490Sstevel@tonic-gate */ 6500Sstevel@tonic-gate if (ip->i_mode && ((ip->i_mode & IFMT) != IFSHAD) && 6510Sstevel@tonic-gate ((ip->i_mode & IFMT) != IFATTRDIR)) { 6520Sstevel@tonic-gate ip->i_dquot = getinoquota(ip); 6530Sstevel@tonic-gate } 6540Sstevel@tonic-gate TRANS_MATA_IGET(ufsvfsp, ip); 6550Sstevel@tonic-gate *ipp = ip; 6560Sstevel@tonic-gate rw_exit(&ip->i_contents); 6570Sstevel@tonic-gate 6580Sstevel@tonic-gate return (0); 6590Sstevel@tonic-gate } 6600Sstevel@tonic-gate 6610Sstevel@tonic-gate /* 6620Sstevel@tonic-gate * Vnode is no longer referenced, write the inode out 6630Sstevel@tonic-gate * and if necessary, truncate and deallocate the file. 6640Sstevel@tonic-gate */ 6650Sstevel@tonic-gate void 6660Sstevel@tonic-gate ufs_iinactive(struct inode *ip) 6670Sstevel@tonic-gate { 6680Sstevel@tonic-gate int front; 6690Sstevel@tonic-gate struct inode *iq; 6700Sstevel@tonic-gate struct inode *hip; 6710Sstevel@tonic-gate struct ufs_q *uq; 6720Sstevel@tonic-gate struct vnode *vp = ITOV(ip); 673*512Sjkennedy struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 674*512Sjkennedy struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info; 6750Sstevel@tonic-gate 6760Sstevel@tonic-gate /* 6770Sstevel@tonic-gate * Because the vnode type might have been changed, 6780Sstevel@tonic-gate * the dnlc_dir_purge must be called unconditionally. 6790Sstevel@tonic-gate */ 6800Sstevel@tonic-gate dnlc_dir_purge(&ip->i_danchor); 6810Sstevel@tonic-gate 6820Sstevel@tonic-gate /* 6830Sstevel@tonic-gate * Get exclusive access to inode data. 6840Sstevel@tonic-gate */ 6850Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 6860Sstevel@tonic-gate ASSERT(ip->i_flag & IREF); 6870Sstevel@tonic-gate 6880Sstevel@tonic-gate /* 6890Sstevel@tonic-gate * Make sure no one reclaimed the inode before we put it on 6900Sstevel@tonic-gate * the freelist or destroy it. We keep our 'hold' on the vnode 6910Sstevel@tonic-gate * from vn_rele until we are ready to do something with the inode. 6920Sstevel@tonic-gate * 6930Sstevel@tonic-gate * Pageout may put a VN_HOLD/VN_RELE at anytime during this 6940Sstevel@tonic-gate * operation via an async putpage, so we must make sure 6950Sstevel@tonic-gate * we don't free/destroy the inode more than once. ufs_iget 6960Sstevel@tonic-gate * may also put a VN_HOLD on the inode before it grabs 6970Sstevel@tonic-gate * the i_contents lock. This is done so we don't free 6980Sstevel@tonic-gate * an inode that a thread is waiting on. 6990Sstevel@tonic-gate */ 7000Sstevel@tonic-gate mutex_enter(&vp->v_lock); 7010Sstevel@tonic-gate 7020Sstevel@tonic-gate if (vp->v_count > 1) { 7030Sstevel@tonic-gate vp->v_count--; /* release our hold from vn_rele */ 7040Sstevel@tonic-gate mutex_exit(&vp->v_lock); 7050Sstevel@tonic-gate rw_exit(&ip->i_contents); 7060Sstevel@tonic-gate return; 7070Sstevel@tonic-gate } 7080Sstevel@tonic-gate mutex_exit(&vp->v_lock); 7090Sstevel@tonic-gate 7100Sstevel@tonic-gate /* 7110Sstevel@tonic-gate * For umount case: if ufsvfs ptr is NULL, the inode is unhashed 7120Sstevel@tonic-gate * and clean. It can be safely destroyed (cyf). 7130Sstevel@tonic-gate */ 7140Sstevel@tonic-gate if (ip->i_ufsvfs == NULL) { 7150Sstevel@tonic-gate rw_exit(&ip->i_contents); 7160Sstevel@tonic-gate ufs_si_del(ip); 7170Sstevel@tonic-gate ASSERT((vp->v_type == VCHR) || !vn_has_cached_data(vp)); 7180Sstevel@tonic-gate ufs_free_inode(ip); 7190Sstevel@tonic-gate return; 7200Sstevel@tonic-gate } 7210Sstevel@tonic-gate 7220Sstevel@tonic-gate /* 7230Sstevel@tonic-gate * queue idle inode to appropriate thread. Will check v_count == 1 7240Sstevel@tonic-gate * prior to putting this on the appropriate queue. 7250Sstevel@tonic-gate * Stale inodes will be unhashed and freed by the ufs idle thread 7260Sstevel@tonic-gate * in ufs_idle_free() 7270Sstevel@tonic-gate */ 7280Sstevel@tonic-gate front = 1; 7290Sstevel@tonic-gate if ((ip->i_flag & ISTALE) == 0 && ip->i_fs->fs_ronly == 0 && 7300Sstevel@tonic-gate ip->i_mode && ip->i_nlink <= 0) { 7310Sstevel@tonic-gate /* 7320Sstevel@tonic-gate * Mark the i_flag to indicate that inode is being deleted. 7330Sstevel@tonic-gate * This flag will be cleared when the deletion is complete. 7340Sstevel@tonic-gate * This prevents nfs from sneaking in via ufs_vget() while 7350Sstevel@tonic-gate * the delete is in progress (bugid 1242481). 7360Sstevel@tonic-gate */ 7370Sstevel@tonic-gate ip->i_flag |= IDEL; 7380Sstevel@tonic-gate 7390Sstevel@tonic-gate /* 7400Sstevel@tonic-gate * NOIDEL means that deletes are not allowed at this time; 7410Sstevel@tonic-gate * whoever resets NOIDEL will also send this inode back 7420Sstevel@tonic-gate * through ufs_iinactive. IREF remains set. 7430Sstevel@tonic-gate */ 7440Sstevel@tonic-gate if (ULOCKFS_IS_NOIDEL(ITOUL(ip))) { 7450Sstevel@tonic-gate mutex_enter(&vp->v_lock); 7460Sstevel@tonic-gate vp->v_count--; 7470Sstevel@tonic-gate mutex_exit(&vp->v_lock); 7480Sstevel@tonic-gate rw_exit(&ip->i_contents); 7490Sstevel@tonic-gate return; 7500Sstevel@tonic-gate } 7510Sstevel@tonic-gate if (!TRANS_ISTRANS(ip->i_ufsvfs)) { 7520Sstevel@tonic-gate rw_exit(&ip->i_contents); 7530Sstevel@tonic-gate ufs_delete(ip->i_ufsvfs, ip, 0); 7540Sstevel@tonic-gate return; 7550Sstevel@tonic-gate } 7560Sstevel@tonic-gate 7570Sstevel@tonic-gate /* queue to delete thread; IREF remains set */ 7580Sstevel@tonic-gate ins.in_qfree.value.ul++; 7590Sstevel@tonic-gate uq = &ip->i_ufsvfs->vfs_delete; 7600Sstevel@tonic-gate 7610Sstevel@tonic-gate mutex_enter(&uq->uq_mutex); 7620Sstevel@tonic-gate 7630Sstevel@tonic-gate /* add to q */ 7640Sstevel@tonic-gate if ((iq = uq->uq_ihead) != 0) { 7650Sstevel@tonic-gate ip->i_freef = iq; 7660Sstevel@tonic-gate ip->i_freeb = iq->i_freeb; 7670Sstevel@tonic-gate iq->i_freeb->i_freef = ip; 7680Sstevel@tonic-gate iq->i_freeb = ip; 7690Sstevel@tonic-gate if (front) 7700Sstevel@tonic-gate uq->uq_ihead = ip; 7710Sstevel@tonic-gate } else { 7720Sstevel@tonic-gate uq->uq_ihead = ip; 7730Sstevel@tonic-gate ip->i_freef = ip; 7740Sstevel@tonic-gate ip->i_freeb = ip; 7750Sstevel@tonic-gate } 776*512Sjkennedy 777*512Sjkennedy delq_info->delq_unreclaimed_files += 1; 778*512Sjkennedy delq_info->delq_unreclaimed_blocks += ip->i_blocks; 7790Sstevel@tonic-gate } else { 7800Sstevel@tonic-gate /* 7810Sstevel@tonic-gate * queue to idle thread 7820Sstevel@tonic-gate * Check the v_count == 1 again. 7830Sstevel@tonic-gate * 7840Sstevel@tonic-gate */ 7850Sstevel@tonic-gate mutex_enter(&vp->v_lock); 7860Sstevel@tonic-gate if (vp->v_count > 1) { 7870Sstevel@tonic-gate vp->v_count--; /* release our hold from vn_rele */ 7880Sstevel@tonic-gate mutex_exit(&vp->v_lock); 7890Sstevel@tonic-gate rw_exit(&ip->i_contents); 7900Sstevel@tonic-gate return; 7910Sstevel@tonic-gate } 7920Sstevel@tonic-gate mutex_exit(&vp->v_lock); 7930Sstevel@tonic-gate uq = &ufs_idle_q; 7940Sstevel@tonic-gate 7950Sstevel@tonic-gate /* 7960Sstevel@tonic-gate * useful iff it has pages or is a fastsymlink; otherwise junk 7970Sstevel@tonic-gate */ 7980Sstevel@tonic-gate mutex_enter(&uq->uq_mutex); 7990Sstevel@tonic-gate 8000Sstevel@tonic-gate /* clear IREF means `on idle list' */ 8010Sstevel@tonic-gate ip->i_flag &= ~(IREF | IDIRECTIO); 8020Sstevel@tonic-gate 8030Sstevel@tonic-gate if (vn_has_cached_data(vp) || ip->i_flag & IFASTSYMLNK) { 8040Sstevel@tonic-gate ins.in_frback.value.ul++; 8050Sstevel@tonic-gate hip = (inode_t *)&ufs_useful_iq[IQHASH(ip)]; 8060Sstevel@tonic-gate ufs_nuseful_iq++; 8070Sstevel@tonic-gate } else { 8080Sstevel@tonic-gate ins.in_frfront.value.ul++; 8090Sstevel@tonic-gate hip = (inode_t *)&ufs_junk_iq[IQHASH(ip)]; 8100Sstevel@tonic-gate ip->i_flag |= IJUNKIQ; 8110Sstevel@tonic-gate ufs_njunk_iq++; 8120Sstevel@tonic-gate } 8130Sstevel@tonic-gate ip->i_freef = hip; 8140Sstevel@tonic-gate ip->i_freeb = hip->i_freeb; 8150Sstevel@tonic-gate hip->i_freeb->i_freef = ip; 8160Sstevel@tonic-gate hip->i_freeb = ip; 8170Sstevel@tonic-gate } 8180Sstevel@tonic-gate 8190Sstevel@tonic-gate /* wakeup thread(s) if q is overfull */ 8200Sstevel@tonic-gate if (++uq->uq_ne == uq->uq_lowat) 8210Sstevel@tonic-gate cv_broadcast(&uq->uq_cv); 8220Sstevel@tonic-gate 8230Sstevel@tonic-gate /* all done, release the q and inode */ 8240Sstevel@tonic-gate mutex_exit(&uq->uq_mutex); 8250Sstevel@tonic-gate rw_exit(&ip->i_contents); 8260Sstevel@tonic-gate } 8270Sstevel@tonic-gate 8280Sstevel@tonic-gate /* 8290Sstevel@tonic-gate * Check accessed and update flags on an inode structure. 8300Sstevel@tonic-gate * If any are on, update the inode with the (unique) current time. 8310Sstevel@tonic-gate * If waitfor is given, insure I/O order so wait for write to complete. 8320Sstevel@tonic-gate */ 8330Sstevel@tonic-gate void 8340Sstevel@tonic-gate ufs_iupdat(struct inode *ip, int waitfor) 8350Sstevel@tonic-gate { 8360Sstevel@tonic-gate struct buf *bp; 8370Sstevel@tonic-gate struct fs *fp; 8380Sstevel@tonic-gate struct dinode *dp; 8390Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 8400Sstevel@tonic-gate int i; 8410Sstevel@tonic-gate int do_trans_times; 8420Sstevel@tonic-gate ushort_t flag; 8430Sstevel@tonic-gate o_uid_t suid; 8440Sstevel@tonic-gate o_gid_t sgid; 8450Sstevel@tonic-gate 8460Sstevel@tonic-gate /* 8470Sstevel@tonic-gate * This function is now safe to be called with either the reader 8480Sstevel@tonic-gate * or writer i_contents lock. 8490Sstevel@tonic-gate */ 8500Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&ip->i_contents)); 8510Sstevel@tonic-gate 8520Sstevel@tonic-gate /* 8530Sstevel@tonic-gate * Return if file system has been forcibly umounted. 8540Sstevel@tonic-gate */ 8550Sstevel@tonic-gate if (ufsvfsp == NULL) 8560Sstevel@tonic-gate return; 8570Sstevel@tonic-gate 8580Sstevel@tonic-gate flag = ip->i_flag; /* Atomic read */ 8590Sstevel@tonic-gate /* 8600Sstevel@tonic-gate * We better not update the disk inode from a stale inode. 8610Sstevel@tonic-gate */ 8620Sstevel@tonic-gate if (flag & ISTALE) 8630Sstevel@tonic-gate return; 8640Sstevel@tonic-gate 8650Sstevel@tonic-gate fp = ip->i_fs; 8660Sstevel@tonic-gate 8670Sstevel@tonic-gate if ((flag & (IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG)) != 0) { 8680Sstevel@tonic-gate if (fp->fs_ronly) { 8690Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 8700Sstevel@tonic-gate ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG); 8710Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 8720Sstevel@tonic-gate return; 8730Sstevel@tonic-gate } 8740Sstevel@tonic-gate /* 8750Sstevel@tonic-gate * fs is active while metadata is being written 8760Sstevel@tonic-gate */ 8770Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 8780Sstevel@tonic-gate ufs_notclean(ufsvfsp); 8790Sstevel@tonic-gate /* 8800Sstevel@tonic-gate * get the dinode 8810Sstevel@tonic-gate */ 8820Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ip->i_dev, 8830Sstevel@tonic-gate (daddr_t)fsbtodb(fp, itod(fp, ip->i_number)), 8840Sstevel@tonic-gate (int)fp->fs_bsize); 8850Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 8860Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 8870Sstevel@tonic-gate ip->i_flag &= 8880Sstevel@tonic-gate ~(IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG); 8890Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 8900Sstevel@tonic-gate brelse(bp); 8910Sstevel@tonic-gate return; 8920Sstevel@tonic-gate } 8930Sstevel@tonic-gate /* 8940Sstevel@tonic-gate * munge inode fields 8950Sstevel@tonic-gate */ 8960Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 8970Sstevel@tonic-gate ITIMES_NOLOCK(ip); 8980Sstevel@tonic-gate do_trans_times = ((ip->i_flag & (IMOD|IMODACC)) == IMODACC); 8990Sstevel@tonic-gate ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG); 9000Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 9010Sstevel@tonic-gate 9020Sstevel@tonic-gate /* 9030Sstevel@tonic-gate * For reads and concurrent re-writes, no deltas were 9040Sstevel@tonic-gate * entered for the access time changes - do it now. 9050Sstevel@tonic-gate */ 9060Sstevel@tonic-gate if (do_trans_times) { 9070Sstevel@tonic-gate TRANS_INODE_TIMES(ufsvfsp, ip); 9080Sstevel@tonic-gate } 9090Sstevel@tonic-gate 9100Sstevel@tonic-gate /* 9110Sstevel@tonic-gate * For SunOS 5.0->5.4, these lines below read: 9120Sstevel@tonic-gate * 9130Sstevel@tonic-gate * suid = (ip->i_uid > MAXUID) ? UID_LONG : ip->i_uid; 9140Sstevel@tonic-gate * sgid = (ip->i_gid > MAXUID) ? GID_LONG : ip->i_gid; 9150Sstevel@tonic-gate * 9160Sstevel@tonic-gate * where MAXUID was set to 60002. This was incorrect - 9170Sstevel@tonic-gate * the uids should have been constrained to what fitted into 9180Sstevel@tonic-gate * a 16-bit word. 9190Sstevel@tonic-gate * 9200Sstevel@tonic-gate * This means that files from 4.x filesystems that have an 9210Sstevel@tonic-gate * i_suid field larger than 60002 will have that field 9220Sstevel@tonic-gate * changed to 65535. 9230Sstevel@tonic-gate * 9240Sstevel@tonic-gate * Security note: 4.x UFS could never create a i_suid of 9250Sstevel@tonic-gate * UID_LONG since that would've corresponded to -1. 9260Sstevel@tonic-gate */ 9270Sstevel@tonic-gate suid = (ulong_t)ip->i_uid > (ulong_t)USHRT_MAX ? 9280Sstevel@tonic-gate UID_LONG : ip->i_uid; 9290Sstevel@tonic-gate sgid = (ulong_t)ip->i_gid > (ulong_t)USHRT_MAX ? 9300Sstevel@tonic-gate GID_LONG : ip->i_gid; 9310Sstevel@tonic-gate 9320Sstevel@tonic-gate if ((ip->i_suid != suid) || (ip->i_sgid != sgid)) { 9330Sstevel@tonic-gate ip->i_suid = suid; 9340Sstevel@tonic-gate ip->i_sgid = sgid; 9350Sstevel@tonic-gate TRANS_INODE(ufsvfsp, ip); 9360Sstevel@tonic-gate } 9370Sstevel@tonic-gate 9380Sstevel@tonic-gate if ((ip->i_mode & IFMT) == IFBLK || 9390Sstevel@tonic-gate (ip->i_mode & IFMT) == IFCHR) { 9400Sstevel@tonic-gate dev_t d = ip->i_rdev; 9410Sstevel@tonic-gate dev32_t dev32; 9420Sstevel@tonic-gate 9430Sstevel@tonic-gate /* 9440Sstevel@tonic-gate * load first direct block only if special device 9450Sstevel@tonic-gate */ 9460Sstevel@tonic-gate if (!cmpldev(&dev32, d)) { 9470Sstevel@tonic-gate /* 9480Sstevel@tonic-gate * We panic here because there's "no way" 9490Sstevel@tonic-gate * we should have been able to create a large 9500Sstevel@tonic-gate * inode with a large dev_t. Earlier layers 9510Sstevel@tonic-gate * should've caught this. 9520Sstevel@tonic-gate */ 9530Sstevel@tonic-gate panic("ip %p: i_rdev too big", (void *)ip); 9540Sstevel@tonic-gate } 9550Sstevel@tonic-gate 9560Sstevel@tonic-gate if (dev32 & ~((O_MAXMAJ << L_BITSMINOR32) | O_MAXMIN)) { 9570Sstevel@tonic-gate ip->i_ordev = dev32; /* can't use old fmt. */ 9580Sstevel@tonic-gate } else { 9590Sstevel@tonic-gate ip->i_ordev = cmpdev(d); 9600Sstevel@tonic-gate } 9610Sstevel@tonic-gate } 9620Sstevel@tonic-gate 9630Sstevel@tonic-gate /* 9640Sstevel@tonic-gate * copy inode to dinode (zero fastsymlnk in dinode) 9650Sstevel@tonic-gate */ 9660Sstevel@tonic-gate dp = (struct dinode *)bp->b_un.b_addr + itoo(fp, ip->i_number); 9670Sstevel@tonic-gate dp->di_ic = ip->i_ic; /* structure assignment */ 9680Sstevel@tonic-gate if (flag & IFASTSYMLNK) { 9690Sstevel@tonic-gate for (i = 1; i < NDADDR; i++) 9700Sstevel@tonic-gate dp->di_db[i] = 0; 9710Sstevel@tonic-gate for (i = 0; i < NIADDR; i++) 9720Sstevel@tonic-gate dp->di_ib[i] = 0; 9730Sstevel@tonic-gate } 9740Sstevel@tonic-gate if (TRANS_ISTRANS(ufsvfsp)) { 9750Sstevel@tonic-gate /* 9760Sstevel@tonic-gate * Pass only a sector size buffer containing 9770Sstevel@tonic-gate * the inode, otherwise when the buffer is copied 9780Sstevel@tonic-gate * into a cached roll buffer then too much memory 9790Sstevel@tonic-gate * gets consumed if 8KB inode buffers are passed. 9800Sstevel@tonic-gate */ 9810Sstevel@tonic-gate TRANS_LOG(ufsvfsp, (caddr_t)dp, ip->i_doff, 9820Sstevel@tonic-gate sizeof (struct dinode), 9830Sstevel@tonic-gate (caddr_t)P2ALIGN((uintptr_t)dp, DEV_BSIZE), 9840Sstevel@tonic-gate DEV_BSIZE); 9850Sstevel@tonic-gate 9860Sstevel@tonic-gate brelse(bp); 9870Sstevel@tonic-gate } else if (waitfor && (ip->i_ufsvfs->vfs_dio == 0)) { 9880Sstevel@tonic-gate UFS_BRWRITE(ufsvfsp, bp); 9890Sstevel@tonic-gate 9900Sstevel@tonic-gate /* 9910Sstevel@tonic-gate * Synchronous write has guaranteed that inode 9920Sstevel@tonic-gate * has been written on disk so clear the flag 9930Sstevel@tonic-gate */ 9940Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 9950Sstevel@tonic-gate ip->i_flag &= ~IBDWRITE; 9960Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 9970Sstevel@tonic-gate } else { 9980Sstevel@tonic-gate bdrwrite(bp); 9990Sstevel@tonic-gate 10000Sstevel@tonic-gate /* 10010Sstevel@tonic-gate * This write hasn't guaranteed that inode has been 10020Sstevel@tonic-gate * written on the disk. 10030Sstevel@tonic-gate * Since, all updat flags on inode are cleared, we must 10040Sstevel@tonic-gate * remember the condition in case inode is to be updated 10050Sstevel@tonic-gate * synchronously later (e.g.- fsync()/fdatasync()) 10060Sstevel@tonic-gate * and inode has not been modified yet. 10070Sstevel@tonic-gate */ 10080Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 10090Sstevel@tonic-gate ip->i_flag |= IBDWRITE; 10100Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 10110Sstevel@tonic-gate } 10120Sstevel@tonic-gate } else { 10130Sstevel@tonic-gate /* 10140Sstevel@tonic-gate * In case previous inode update was done asynchronously 10150Sstevel@tonic-gate * (IBDWRITE) and this inode update request wants guaranteed 10160Sstevel@tonic-gate * (synchronous) disk update, flush the inode. 10170Sstevel@tonic-gate */ 10180Sstevel@tonic-gate if (waitfor && (flag & IBDWRITE)) { 10190Sstevel@tonic-gate blkflush(ip->i_dev, 10200Sstevel@tonic-gate (daddr_t)fsbtodb(fp, itod(fp, ip->i_number))); 10210Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 10220Sstevel@tonic-gate ip->i_flag &= ~IBDWRITE; 10230Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 10240Sstevel@tonic-gate } 10250Sstevel@tonic-gate } 10260Sstevel@tonic-gate } 10270Sstevel@tonic-gate 10280Sstevel@tonic-gate #define SINGLE 0 /* index of single indirect block */ 10290Sstevel@tonic-gate #define DOUBLE 1 /* index of double indirect block */ 10300Sstevel@tonic-gate #define TRIPLE 2 /* index of triple indirect block */ 10310Sstevel@tonic-gate 10320Sstevel@tonic-gate /* 10330Sstevel@tonic-gate * Release blocks associated with the inode ip and 10340Sstevel@tonic-gate * stored in the indirect block bn. Blocks are free'd 10350Sstevel@tonic-gate * in LIFO order up to (but not including) lastbn. If 10360Sstevel@tonic-gate * level is greater than SINGLE, the block is an indirect 10370Sstevel@tonic-gate * block and recursive calls to indirtrunc must be used to 10380Sstevel@tonic-gate * cleanse other indirect blocks. 10390Sstevel@tonic-gate * 10400Sstevel@tonic-gate * N.B.: triple indirect blocks are untested. 10410Sstevel@tonic-gate */ 10420Sstevel@tonic-gate static long 10430Sstevel@tonic-gate indirtrunc(struct inode *ip, daddr_t bn, daddr_t lastbn, int level, int flags) 10440Sstevel@tonic-gate { 10450Sstevel@tonic-gate int i; 10460Sstevel@tonic-gate struct buf *bp, *copy; 10470Sstevel@tonic-gate daddr32_t *bap; 10480Sstevel@tonic-gate struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 10490Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 10500Sstevel@tonic-gate daddr_t nb, last; 10510Sstevel@tonic-gate long factor; 10520Sstevel@tonic-gate int blocksreleased = 0, nblocks; 10530Sstevel@tonic-gate 10540Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&ip->i_contents)); 10550Sstevel@tonic-gate /* 10560Sstevel@tonic-gate * Calculate index in current block of last 10570Sstevel@tonic-gate * block to be kept. -1 indicates the entire 10580Sstevel@tonic-gate * block so we need not calculate the index. 10590Sstevel@tonic-gate */ 10600Sstevel@tonic-gate factor = 1; 10610Sstevel@tonic-gate for (i = SINGLE; i < level; i++) 10620Sstevel@tonic-gate factor *= NINDIR(fs); 10630Sstevel@tonic-gate last = lastbn; 10640Sstevel@tonic-gate if (lastbn > 0) 10650Sstevel@tonic-gate last /= factor; 10660Sstevel@tonic-gate nblocks = btodb(fs->fs_bsize); 10670Sstevel@tonic-gate /* 10680Sstevel@tonic-gate * Get buffer of block pointers, zero those 10690Sstevel@tonic-gate * entries corresponding to blocks to be free'd, 10700Sstevel@tonic-gate * and update on disk copy first. 10710Sstevel@tonic-gate * *Unless* the root pointer has been synchronously 10720Sstevel@tonic-gate * written to disk. If nothing points to this 10730Sstevel@tonic-gate * indirect block then don't bother zero'ing and 10740Sstevel@tonic-gate * writing it. 10750Sstevel@tonic-gate */ 10760Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, 10770Sstevel@tonic-gate ip->i_dev, (daddr_t)fsbtodb(fs, bn), (int)fs->fs_bsize); 10780Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 10790Sstevel@tonic-gate brelse(bp); 10800Sstevel@tonic-gate return (0); 10810Sstevel@tonic-gate } 10820Sstevel@tonic-gate bap = bp->b_un.b_daddr; 10830Sstevel@tonic-gate if ((flags & I_CHEAP) == 0) { 10840Sstevel@tonic-gate uint_t zb; 10850Sstevel@tonic-gate 10860Sstevel@tonic-gate zb = (uint_t)((NINDIR(fs) - (last + 1)) * sizeof (daddr32_t)); 10870Sstevel@tonic-gate 10880Sstevel@tonic-gate if (zb) { 10890Sstevel@tonic-gate /* 10900Sstevel@tonic-gate * push any data into the log before we zero it 10910Sstevel@tonic-gate */ 10920Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) 10930Sstevel@tonic-gate TRANS_LOG(ufsvfsp, (caddr_t)bap, 10940Sstevel@tonic-gate ldbtob(bp->b_blkno), bp->b_bcount, 10950Sstevel@tonic-gate bp->b_un.b_addr, bp->b_bcount); 10960Sstevel@tonic-gate copy = ngeteblk(fs->fs_bsize); 10970Sstevel@tonic-gate bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, 10980Sstevel@tonic-gate (uint_t)fs->fs_bsize); 10990Sstevel@tonic-gate bzero((caddr_t)&bap[last + 1], zb); 11000Sstevel@tonic-gate 11010Sstevel@tonic-gate TRANS_BUF(ufsvfsp, 11020Sstevel@tonic-gate (caddr_t)&bap[last + 1] - (caddr_t)bap, 11030Sstevel@tonic-gate zb, bp, DT_ABZERO); 11040Sstevel@tonic-gate 11050Sstevel@tonic-gate UFS_BRWRITE(ufsvfsp, bp); 11060Sstevel@tonic-gate bp = copy, bap = bp->b_un.b_daddr; 11070Sstevel@tonic-gate } 11080Sstevel@tonic-gate } else { 11090Sstevel@tonic-gate /* make sure write retries are also cleared */ 11100Sstevel@tonic-gate bp->b_flags &= ~(B_DELWRI | B_RETRYWRI); 11110Sstevel@tonic-gate bp->b_flags |= B_STALE | B_AGE; 11120Sstevel@tonic-gate } 11130Sstevel@tonic-gate 11140Sstevel@tonic-gate /* 11150Sstevel@tonic-gate * Recursively free totally unused blocks. 11160Sstevel@tonic-gate */ 11170Sstevel@tonic-gate flags |= I_CHEAP; 11180Sstevel@tonic-gate for (i = NINDIR(fs) - 1; i > last; i--) { 11190Sstevel@tonic-gate nb = bap[i]; 11200Sstevel@tonic-gate if (nb == 0) 11210Sstevel@tonic-gate continue; 11220Sstevel@tonic-gate if (level > SINGLE) { 11230Sstevel@tonic-gate blocksreleased += 11240Sstevel@tonic-gate indirtrunc(ip, nb, (daddr_t)-1, level - 1, flags); 11250Sstevel@tonic-gate free(ip, nb, (off_t)fs->fs_bsize, flags | I_IBLK); 11260Sstevel@tonic-gate } else 11270Sstevel@tonic-gate free(ip, nb, (off_t)fs->fs_bsize, flags); 11280Sstevel@tonic-gate blocksreleased += nblocks; 11290Sstevel@tonic-gate } 11300Sstevel@tonic-gate flags &= ~I_CHEAP; 11310Sstevel@tonic-gate 11320Sstevel@tonic-gate /* 11330Sstevel@tonic-gate * Recursively free last partial block. 11340Sstevel@tonic-gate */ 11350Sstevel@tonic-gate if (level > SINGLE && lastbn >= 0) { 11360Sstevel@tonic-gate last = lastbn % factor; 11370Sstevel@tonic-gate nb = bap[i]; 11380Sstevel@tonic-gate if (nb != 0) 11390Sstevel@tonic-gate blocksreleased += indirtrunc(ip, nb, last, level - 1, 11400Sstevel@tonic-gate flags); 11410Sstevel@tonic-gate } 11420Sstevel@tonic-gate brelse(bp); 11430Sstevel@tonic-gate return (blocksreleased); 11440Sstevel@tonic-gate } 11450Sstevel@tonic-gate 11460Sstevel@tonic-gate /* 11470Sstevel@tonic-gate * Truncate the inode ip to at most length size. 11480Sstevel@tonic-gate * Free affected disk blocks -- the blocks of the 11490Sstevel@tonic-gate * file are removed in reverse order. 11500Sstevel@tonic-gate * 11510Sstevel@tonic-gate * N.B.: triple indirect blocks are untested. 11520Sstevel@tonic-gate */ 11530Sstevel@tonic-gate static int i_genrand = 1234; 11540Sstevel@tonic-gate int 11550Sstevel@tonic-gate ufs_itrunc(struct inode *oip, u_offset_t length, int flags, cred_t *cr) 11560Sstevel@tonic-gate { 11570Sstevel@tonic-gate struct fs *fs = oip->i_fs; 11580Sstevel@tonic-gate struct ufsvfs *ufsvfsp = oip->i_ufsvfs; 11590Sstevel@tonic-gate struct inode *ip; 11600Sstevel@tonic-gate daddr_t lastblock; 11610Sstevel@tonic-gate off_t bsize; 11620Sstevel@tonic-gate int boff; 11630Sstevel@tonic-gate daddr_t bn, lastiblock[NIADDR]; 11640Sstevel@tonic-gate int level; 11650Sstevel@tonic-gate long nblocks, blocksreleased = 0; 11660Sstevel@tonic-gate int i; 11670Sstevel@tonic-gate ushort_t mode; 11680Sstevel@tonic-gate struct inode tip; 11690Sstevel@tonic-gate int err; 11700Sstevel@tonic-gate u_offset_t maxoffset = (ufsvfsp->vfs_lfflags & UFS_LARGEFILES) ? 11710Sstevel@tonic-gate (UFS_MAXOFFSET_T) : (MAXOFF32_T); 11720Sstevel@tonic-gate 11730Sstevel@tonic-gate /* 11740Sstevel@tonic-gate * Shadow inodes do not need to hold the vfs_dqrwlock lock. Most 11750Sstevel@tonic-gate * other uses need the reader lock. opendq() holds the writer lock. 11760Sstevel@tonic-gate */ 11770Sstevel@tonic-gate ASSERT((oip->i_mode & IFMT) == IFSHAD || 11780Sstevel@tonic-gate RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock)); 11790Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&oip->i_contents)); 11800Sstevel@tonic-gate /* 11810Sstevel@tonic-gate * We only allow truncation of regular files and directories 11820Sstevel@tonic-gate * to arbitrary lengths here. In addition, we allow symbolic 11830Sstevel@tonic-gate * links to be truncated only to zero length. Other inode 11840Sstevel@tonic-gate * types cannot have their length set here. Disk blocks are 11850Sstevel@tonic-gate * being dealt with - especially device inodes where 11860Sstevel@tonic-gate * ip->i_ordev is actually being stored in ip->i_db[0]! 11870Sstevel@tonic-gate */ 11880Sstevel@tonic-gate TRANS_INODE(ufsvfsp, oip); 11890Sstevel@tonic-gate mode = oip->i_mode & IFMT; 11900Sstevel@tonic-gate if (flags & I_FREE) { 11910Sstevel@tonic-gate i_genrand *= 16843009; /* turns into shift and adds */ 11920Sstevel@tonic-gate i_genrand++; 11930Sstevel@tonic-gate oip->i_gen += ((i_genrand + lbolt) & 0xffff) + 1; 11940Sstevel@tonic-gate oip->i_flag |= ICHG |IUPD; 11950Sstevel@tonic-gate oip->i_seq++; 11960Sstevel@tonic-gate if (length == oip->i_size) 11970Sstevel@tonic-gate return (0); 11980Sstevel@tonic-gate flags |= I_CHEAP; 11990Sstevel@tonic-gate } 12000Sstevel@tonic-gate if (mode == IFIFO) 12010Sstevel@tonic-gate return (0); 12020Sstevel@tonic-gate if (mode != IFREG && mode != IFDIR && mode != IFATTRDIR && 12030Sstevel@tonic-gate !(mode == IFLNK && length == (offset_t)0) && mode != IFSHAD) 12040Sstevel@tonic-gate return (EINVAL); 12050Sstevel@tonic-gate if (length > maxoffset) 12060Sstevel@tonic-gate return (EFBIG); 12070Sstevel@tonic-gate if ((mode == IFDIR) || (mode == IFATTRDIR)) 12080Sstevel@tonic-gate flags |= I_DIR; 12090Sstevel@tonic-gate if (mode == IFSHAD) 12100Sstevel@tonic-gate flags |= I_SHAD; 12110Sstevel@tonic-gate if (oip == ufsvfsp->vfs_qinod) 12120Sstevel@tonic-gate flags |= I_QUOTA; 12130Sstevel@tonic-gate if (length == oip->i_size) { 12140Sstevel@tonic-gate /* update ctime and mtime to please POSIX tests */ 12150Sstevel@tonic-gate oip->i_flag |= ICHG |IUPD; 12160Sstevel@tonic-gate oip->i_seq++; 12170Sstevel@tonic-gate if (length == 0) { 12180Sstevel@tonic-gate /* nothing to cache so clear the flag */ 12190Sstevel@tonic-gate oip->i_flag &= ~IFASTSYMLNK; 12200Sstevel@tonic-gate } 12210Sstevel@tonic-gate return (0); 12220Sstevel@tonic-gate } 12230Sstevel@tonic-gate /* wipe out fast symlink till next access */ 12240Sstevel@tonic-gate if (oip->i_flag & IFASTSYMLNK) { 12250Sstevel@tonic-gate int j; 12260Sstevel@tonic-gate 12270Sstevel@tonic-gate ASSERT(ITOV(oip)->v_type == VLNK); 12280Sstevel@tonic-gate 12290Sstevel@tonic-gate oip->i_flag &= ~IFASTSYMLNK; 12300Sstevel@tonic-gate 12310Sstevel@tonic-gate for (j = 1; j < NDADDR; j++) 12320Sstevel@tonic-gate oip->i_db[j] = 0; 12330Sstevel@tonic-gate for (j = 0; j < NIADDR; j++) 12340Sstevel@tonic-gate oip->i_ib[j] = 0; 12350Sstevel@tonic-gate } 12360Sstevel@tonic-gate 12370Sstevel@tonic-gate boff = (int)blkoff(fs, length); 12380Sstevel@tonic-gate 12390Sstevel@tonic-gate if (length > oip->i_size) { 12400Sstevel@tonic-gate /* 12410Sstevel@tonic-gate * Trunc up case. BMAPALLOC will insure that the right blocks 12420Sstevel@tonic-gate * are allocated. This includes extending the old frag to a 12430Sstevel@tonic-gate * full block (if needed) in addition to doing any work 12440Sstevel@tonic-gate * needed for allocating the last block. 12450Sstevel@tonic-gate */ 12460Sstevel@tonic-gate if (boff == 0) 12470Sstevel@tonic-gate err = BMAPALLOC(oip, length - 1, (int)fs->fs_bsize, cr); 12480Sstevel@tonic-gate else 12490Sstevel@tonic-gate err = BMAPALLOC(oip, length - 1, boff, cr); 12500Sstevel@tonic-gate 12510Sstevel@tonic-gate if (err == 0) { 12520Sstevel@tonic-gate /* 12530Sstevel@tonic-gate * Save old size and set inode's size now 12540Sstevel@tonic-gate * so that we don't cause too much of the 12550Sstevel@tonic-gate * file to be zero'd and pushed. 12560Sstevel@tonic-gate */ 12570Sstevel@tonic-gate u_offset_t osize = oip->i_size; 12580Sstevel@tonic-gate oip->i_size = length; 12590Sstevel@tonic-gate /* 12600Sstevel@tonic-gate * Make sure we zero out the remaining bytes of 12610Sstevel@tonic-gate * the page in case a mmap scribbled on it. We 12620Sstevel@tonic-gate * can't prevent a mmap from writing beyond EOF 12630Sstevel@tonic-gate * on the last page of a file. 12640Sstevel@tonic-gate * 12650Sstevel@tonic-gate */ 12660Sstevel@tonic-gate if ((boff = (int)blkoff(fs, osize)) != 0) { 12670Sstevel@tonic-gate bsize = (int)lblkno(fs, osize - 1) >= NDADDR ? 12680Sstevel@tonic-gate fs->fs_bsize : fragroundup(fs, boff); 12690Sstevel@tonic-gate pvn_vpzero(ITOV(oip), osize, 12700Sstevel@tonic-gate (size_t)(bsize - boff)); 12710Sstevel@tonic-gate } 12720Sstevel@tonic-gate oip->i_flag |= ICHG|IATTCHG; 12730Sstevel@tonic-gate oip->i_seq++; 12740Sstevel@tonic-gate ITIMES_NOLOCK(oip); 12750Sstevel@tonic-gate /* 12760Sstevel@tonic-gate * MAXOFF32_T is old 2GB size limit. If 12770Sstevel@tonic-gate * this operation caused a large file to be 12780Sstevel@tonic-gate * created, turn on the superblock flag 12790Sstevel@tonic-gate * and update the superblock, if the flag 12800Sstevel@tonic-gate * is not already on. 12810Sstevel@tonic-gate */ 12820Sstevel@tonic-gate if ((length > (u_offset_t)MAXOFF32_T) && 12830Sstevel@tonic-gate !(fs->fs_flags & FSLARGEFILES)) { 12840Sstevel@tonic-gate ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES); 12850Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 12860Sstevel@tonic-gate fs->fs_flags |= FSLARGEFILES; 12870Sstevel@tonic-gate ufs_sbwrite(ufsvfsp); 12880Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 12890Sstevel@tonic-gate } 12900Sstevel@tonic-gate } 12910Sstevel@tonic-gate 12920Sstevel@tonic-gate return (err); 12930Sstevel@tonic-gate } 12940Sstevel@tonic-gate 12950Sstevel@tonic-gate /* 12960Sstevel@tonic-gate * Update the pages of the file. If the file is not being 12970Sstevel@tonic-gate * truncated to a block boundary, the contents of the 12980Sstevel@tonic-gate * pages following the end of the file must be zero'ed 12990Sstevel@tonic-gate * in case it ever become accessible again because 13000Sstevel@tonic-gate * of subsequent file growth. 13010Sstevel@tonic-gate */ 13020Sstevel@tonic-gate if (boff == 0) { 13030Sstevel@tonic-gate (void) pvn_vplist_dirty(ITOV(oip), length, ufs_putapage, 13040Sstevel@tonic-gate B_INVAL | B_TRUNC, CRED()); 13050Sstevel@tonic-gate } else { 13060Sstevel@tonic-gate /* 13070Sstevel@tonic-gate * Make sure that the last block is properly allocated. 13080Sstevel@tonic-gate * We only really have to do this if the last block is 13090Sstevel@tonic-gate * actually allocated since ufs_bmap will now handle the case 13100Sstevel@tonic-gate * of an fragment which has no block allocated. Just to 13110Sstevel@tonic-gate * be sure, we do it now independent of current allocation. 13120Sstevel@tonic-gate */ 13130Sstevel@tonic-gate err = BMAPALLOC(oip, length - 1, boff, cr); 13140Sstevel@tonic-gate if (err) 13150Sstevel@tonic-gate return (err); 13160Sstevel@tonic-gate 13170Sstevel@tonic-gate /* 13180Sstevel@tonic-gate * BMAPALLOC will call bmap_write which defers i_seq 13190Sstevel@tonic-gate * processing. If the timestamps were changed, update 13200Sstevel@tonic-gate * i_seq before rdip drops i_contents or syncs the inode. 13210Sstevel@tonic-gate */ 13220Sstevel@tonic-gate if (oip->i_flag & (ICHG|IUPD)) 13230Sstevel@tonic-gate oip->i_seq++; 13240Sstevel@tonic-gate 13250Sstevel@tonic-gate /* 13260Sstevel@tonic-gate * BugId 4069932 13270Sstevel@tonic-gate * Make sure that the relevant partial page appears in 13280Sstevel@tonic-gate * the v_pages list, so that pvn_vpzero() will do its 13290Sstevel@tonic-gate * job. Since doing this correctly requires everything 13300Sstevel@tonic-gate * in rdip() except for the uiomove(), it's easier and 13310Sstevel@tonic-gate * safer to do the uiomove() rather than duplicate the 13320Sstevel@tonic-gate * rest of rdip() here. 13330Sstevel@tonic-gate * 13340Sstevel@tonic-gate * To get here, we know that length indicates a byte 13350Sstevel@tonic-gate * that is not the first byte of a block. (length - 1) 13360Sstevel@tonic-gate * is the last actual byte known to exist. Deduction 13370Sstevel@tonic-gate * shows it is in the same block as byte (length). 13380Sstevel@tonic-gate * Thus, this rdip() invocation should always succeed 13390Sstevel@tonic-gate * except in the face of i/o errors, and give us the 13400Sstevel@tonic-gate * block we care about. 13410Sstevel@tonic-gate * 13420Sstevel@tonic-gate * rdip() makes the same locking assertions and 13430Sstevel@tonic-gate * assumptions as we do. We do not acquire any locks 13440Sstevel@tonic-gate * before calling it, so we have not changed the locking 13450Sstevel@tonic-gate * situation. Finally, there do not appear to be any 13460Sstevel@tonic-gate * paths whereby rdip() ends up invoking us again. 13470Sstevel@tonic-gate * Thus, infinite recursion is avoided. 13480Sstevel@tonic-gate */ 13490Sstevel@tonic-gate { 13500Sstevel@tonic-gate uio_t uio; 13510Sstevel@tonic-gate iovec_t iov[1]; 13520Sstevel@tonic-gate char buffer; 13530Sstevel@tonic-gate 13540Sstevel@tonic-gate uio.uio_iov = iov; 13550Sstevel@tonic-gate uio.uio_iovcnt = 1; 13560Sstevel@tonic-gate uio.uio_loffset = length - 1; 13570Sstevel@tonic-gate uio.uio_resid = 1; 13580Sstevel@tonic-gate uio.uio_segflg = UIO_SYSSPACE; 13590Sstevel@tonic-gate uio.uio_extflg = UIO_COPY_CACHED; 13600Sstevel@tonic-gate 13610Sstevel@tonic-gate iov[0].iov_base = &buffer; 13620Sstevel@tonic-gate iov[0].iov_len = 1; 13630Sstevel@tonic-gate 13640Sstevel@tonic-gate err = rdip(oip, &uio, UIO_READ, NULL); 13650Sstevel@tonic-gate if (err) 13660Sstevel@tonic-gate return (err); 13670Sstevel@tonic-gate } 13680Sstevel@tonic-gate 13690Sstevel@tonic-gate bsize = (int)lblkno(fs, length - 1) >= NDADDR ? 13700Sstevel@tonic-gate fs->fs_bsize : fragroundup(fs, boff); 13710Sstevel@tonic-gate pvn_vpzero(ITOV(oip), length, (size_t)(bsize - boff)); 13720Sstevel@tonic-gate /* 13730Sstevel@tonic-gate * Ensure full fs block is marked as dirty. 13740Sstevel@tonic-gate */ 13750Sstevel@tonic-gate (void) pvn_vplist_dirty(ITOV(oip), length + (bsize - boff), 13760Sstevel@tonic-gate ufs_putapage, B_INVAL | B_TRUNC, CRED()); 13770Sstevel@tonic-gate } 13780Sstevel@tonic-gate 13790Sstevel@tonic-gate /* 13800Sstevel@tonic-gate * Calculate index into inode's block list of 13810Sstevel@tonic-gate * last direct and indirect blocks (if any) 13820Sstevel@tonic-gate * which we want to keep. Lastblock is -1 when 13830Sstevel@tonic-gate * the file is truncated to 0. 13840Sstevel@tonic-gate */ 13850Sstevel@tonic-gate lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 13860Sstevel@tonic-gate lastiblock[SINGLE] = lastblock - NDADDR; 13870Sstevel@tonic-gate lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 13880Sstevel@tonic-gate lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 13890Sstevel@tonic-gate nblocks = btodb(fs->fs_bsize); 13900Sstevel@tonic-gate 13910Sstevel@tonic-gate /* 13920Sstevel@tonic-gate * Update file and block pointers 13930Sstevel@tonic-gate * on disk before we start freeing blocks. 13940Sstevel@tonic-gate * If we crash before free'ing blocks below, 13950Sstevel@tonic-gate * the blocks will be returned to the free list. 13960Sstevel@tonic-gate * lastiblock values are also normalized to -1 13970Sstevel@tonic-gate * for calls to indirtrunc below. 13980Sstevel@tonic-gate */ 13990Sstevel@tonic-gate tip = *oip; /* structure copy */ 14000Sstevel@tonic-gate ip = &tip; 14010Sstevel@tonic-gate 14020Sstevel@tonic-gate for (level = TRIPLE; level >= SINGLE; level--) 14030Sstevel@tonic-gate if (lastiblock[level] < 0) { 14040Sstevel@tonic-gate oip->i_ib[level] = 0; 14050Sstevel@tonic-gate lastiblock[level] = -1; 14060Sstevel@tonic-gate } 14070Sstevel@tonic-gate for (i = NDADDR - 1; i > lastblock; i--) { 14080Sstevel@tonic-gate oip->i_db[i] = 0; 14090Sstevel@tonic-gate flags |= I_CHEAP; 14100Sstevel@tonic-gate } 14110Sstevel@tonic-gate oip->i_size = length; 14120Sstevel@tonic-gate oip->i_flag |= ICHG|IUPD|IATTCHG; 14130Sstevel@tonic-gate oip->i_seq++; 14140Sstevel@tonic-gate if (!TRANS_ISTRANS(ufsvfsp)) 14150Sstevel@tonic-gate ufs_iupdat(oip, I_SYNC); /* do sync inode update */ 14160Sstevel@tonic-gate 14170Sstevel@tonic-gate /* 14180Sstevel@tonic-gate * Indirect blocks first. 14190Sstevel@tonic-gate */ 14200Sstevel@tonic-gate for (level = TRIPLE; level >= SINGLE; level--) { 14210Sstevel@tonic-gate bn = ip->i_ib[level]; 14220Sstevel@tonic-gate if (bn != 0) { 14230Sstevel@tonic-gate blocksreleased += 14240Sstevel@tonic-gate indirtrunc(ip, bn, lastiblock[level], level, flags); 14250Sstevel@tonic-gate if (lastiblock[level] < 0) { 14260Sstevel@tonic-gate ip->i_ib[level] = 0; 14270Sstevel@tonic-gate free(ip, bn, (off_t)fs->fs_bsize, 14280Sstevel@tonic-gate flags | I_IBLK); 14290Sstevel@tonic-gate blocksreleased += nblocks; 14300Sstevel@tonic-gate } 14310Sstevel@tonic-gate } 14320Sstevel@tonic-gate if (lastiblock[level] >= 0) 14330Sstevel@tonic-gate goto done; 14340Sstevel@tonic-gate } 14350Sstevel@tonic-gate 14360Sstevel@tonic-gate /* 14370Sstevel@tonic-gate * All whole direct blocks or frags. 14380Sstevel@tonic-gate */ 14390Sstevel@tonic-gate for (i = NDADDR - 1; i > lastblock; i--) { 14400Sstevel@tonic-gate bn = ip->i_db[i]; 14410Sstevel@tonic-gate if (bn == 0) 14420Sstevel@tonic-gate continue; 14430Sstevel@tonic-gate ip->i_db[i] = 0; 14440Sstevel@tonic-gate bsize = (off_t)blksize(fs, ip, i); 14450Sstevel@tonic-gate free(ip, bn, bsize, flags); 14460Sstevel@tonic-gate blocksreleased += btodb(bsize); 14470Sstevel@tonic-gate } 14480Sstevel@tonic-gate if (lastblock < 0) 14490Sstevel@tonic-gate goto done; 14500Sstevel@tonic-gate 14510Sstevel@tonic-gate /* 14520Sstevel@tonic-gate * Finally, look for a change in size of the 14530Sstevel@tonic-gate * last direct block; release any frags. 14540Sstevel@tonic-gate */ 14550Sstevel@tonic-gate bn = ip->i_db[lastblock]; 14560Sstevel@tonic-gate if (bn != 0) { 14570Sstevel@tonic-gate off_t oldspace, newspace; 14580Sstevel@tonic-gate 14590Sstevel@tonic-gate /* 14600Sstevel@tonic-gate * Calculate amount of space we're giving 14610Sstevel@tonic-gate * back as old block size minus new block size. 14620Sstevel@tonic-gate */ 14630Sstevel@tonic-gate oldspace = blksize(fs, ip, lastblock); 14640Sstevel@tonic-gate UFS_SET_ISIZE(length, ip); 14650Sstevel@tonic-gate newspace = blksize(fs, ip, lastblock); 14660Sstevel@tonic-gate if (newspace == 0) { 14670Sstevel@tonic-gate err = ufs_fault(ITOV(ip), "ufs_itrunc: newspace == 0"); 14680Sstevel@tonic-gate return (err); 14690Sstevel@tonic-gate } 14700Sstevel@tonic-gate if (oldspace - newspace > 0) { 14710Sstevel@tonic-gate /* 14720Sstevel@tonic-gate * Block number of space to be free'd is 14730Sstevel@tonic-gate * the old block # plus the number of frags 14740Sstevel@tonic-gate * required for the storage we're keeping. 14750Sstevel@tonic-gate */ 14760Sstevel@tonic-gate bn += numfrags(fs, newspace); 14770Sstevel@tonic-gate free(ip, bn, oldspace - newspace, flags); 14780Sstevel@tonic-gate blocksreleased += btodb(oldspace - newspace); 14790Sstevel@tonic-gate } 14800Sstevel@tonic-gate } 14810Sstevel@tonic-gate done: 14820Sstevel@tonic-gate /* BEGIN PARANOIA */ 14830Sstevel@tonic-gate for (level = SINGLE; level <= TRIPLE; level++) 14840Sstevel@tonic-gate if (ip->i_ib[level] != oip->i_ib[level]) { 14850Sstevel@tonic-gate err = ufs_fault(ITOV(ip), "ufs_itrunc: indirect block"); 14860Sstevel@tonic-gate return (err); 14870Sstevel@tonic-gate } 14880Sstevel@tonic-gate 14890Sstevel@tonic-gate for (i = 0; i < NDADDR; i++) 14900Sstevel@tonic-gate if (ip->i_db[i] != oip->i_db[i]) { 14910Sstevel@tonic-gate err = ufs_fault(ITOV(ip), "ufs_itrunc: direct block"); 14920Sstevel@tonic-gate return (err); 14930Sstevel@tonic-gate } 14940Sstevel@tonic-gate /* END PARANOIA */ 14950Sstevel@tonic-gate oip->i_blocks -= blocksreleased; 14960Sstevel@tonic-gate 14970Sstevel@tonic-gate if (oip->i_blocks < 0) { /* sanity */ 14980Sstevel@tonic-gate cmn_err(CE_NOTE, 14990Sstevel@tonic-gate "ufs_itrunc: %s/%d new size = %lld, blocks = %d\n", 15000Sstevel@tonic-gate fs->fs_fsmnt, (int)oip->i_number, oip->i_size, 15010Sstevel@tonic-gate (int)oip->i_blocks); 15020Sstevel@tonic-gate oip->i_blocks = 0; 15030Sstevel@tonic-gate } 15040Sstevel@tonic-gate oip->i_flag |= ICHG|IATTCHG; 15050Sstevel@tonic-gate oip->i_seq++; 15060Sstevel@tonic-gate /* blocksreleased is >= zero, so this can not fail */ 15070Sstevel@tonic-gate (void) chkdq(oip, -blocksreleased, 0, cr, (char **)NULL, 15080Sstevel@tonic-gate (size_t *)NULL); 15090Sstevel@tonic-gate return (0); 15100Sstevel@tonic-gate } 15110Sstevel@tonic-gate 15120Sstevel@tonic-gate /* 15130Sstevel@tonic-gate * Check mode permission on inode. Mode is READ, WRITE or EXEC. 15140Sstevel@tonic-gate * In the case of WRITE, the read-only status of the file system 15150Sstevel@tonic-gate * is checked. Depending on the calling user, the appropriate 15160Sstevel@tonic-gate * mode bits are selected; privileges to override missing permission 15170Sstevel@tonic-gate * bits are checked through secpolicy_vnode_access(). 15180Sstevel@tonic-gate */ 15190Sstevel@tonic-gate int 15200Sstevel@tonic-gate ufs_iaccess(void *vip, int mode, struct cred *cr) 15210Sstevel@tonic-gate { 15220Sstevel@tonic-gate struct inode *ip = vip; 15230Sstevel@tonic-gate int shift = 0; 15240Sstevel@tonic-gate 15250Sstevel@tonic-gate if (mode & IWRITE) { 15260Sstevel@tonic-gate /* 15270Sstevel@tonic-gate * Disallow write attempts on read-only 15280Sstevel@tonic-gate * file systems, unless the file is a block 15290Sstevel@tonic-gate * or character device or a FIFO. 15300Sstevel@tonic-gate */ 15310Sstevel@tonic-gate if (ip->i_fs->fs_ronly != 0) { 15320Sstevel@tonic-gate if ((ip->i_mode & IFMT) != IFCHR && 15330Sstevel@tonic-gate (ip->i_mode & IFMT) != IFBLK && 15340Sstevel@tonic-gate (ip->i_mode & IFMT) != IFIFO) { 15350Sstevel@tonic-gate return (EROFS); 15360Sstevel@tonic-gate } 15370Sstevel@tonic-gate } 15380Sstevel@tonic-gate } 15390Sstevel@tonic-gate /* 15400Sstevel@tonic-gate * If there is a shadow inode check for the presence of an acl, 15410Sstevel@tonic-gate * if the acl is there use the ufs_acl_access routine to check 15420Sstevel@tonic-gate * the acl 15430Sstevel@tonic-gate */ 15440Sstevel@tonic-gate if (ip->i_ufs_acl && ip->i_ufs_acl->aowner) 15450Sstevel@tonic-gate return (ufs_acl_access(ip, mode, cr)); 15460Sstevel@tonic-gate 15470Sstevel@tonic-gate /* 15480Sstevel@tonic-gate * Access check is based on only 15490Sstevel@tonic-gate * one of owner, group, public. 15500Sstevel@tonic-gate * If not owner, then check group. 15510Sstevel@tonic-gate * If not a member of the group, then 15520Sstevel@tonic-gate * check public access. 15530Sstevel@tonic-gate */ 15540Sstevel@tonic-gate if (crgetuid(cr) != ip->i_uid) { 15550Sstevel@tonic-gate shift += 3; 15560Sstevel@tonic-gate if (!groupmember((uid_t)ip->i_gid, cr)) 15570Sstevel@tonic-gate shift += 3; 15580Sstevel@tonic-gate } 15590Sstevel@tonic-gate 15600Sstevel@tonic-gate mode &= ~(ip->i_mode << shift); 15610Sstevel@tonic-gate 15620Sstevel@tonic-gate if (mode == 0) 15630Sstevel@tonic-gate return (0); 15640Sstevel@tonic-gate 15650Sstevel@tonic-gate /* test missing privilege bits */ 15660Sstevel@tonic-gate return (secpolicy_vnode_access(cr, ITOV(ip), ip->i_uid, mode)); 15670Sstevel@tonic-gate } 15680Sstevel@tonic-gate 15690Sstevel@tonic-gate /* 15700Sstevel@tonic-gate * if necessary, remove an inode from the free list 15710Sstevel@tonic-gate * i_contents is held except at unmount 15720Sstevel@tonic-gate * 15730Sstevel@tonic-gate * Return 1 if the inode is taken off of the ufs_idle_q, 15740Sstevel@tonic-gate * and the caller is expected to call VN_RELE. 15750Sstevel@tonic-gate * 15760Sstevel@tonic-gate * Return 0 otherwise. 15770Sstevel@tonic-gate */ 15780Sstevel@tonic-gate int 15790Sstevel@tonic-gate ufs_rmidle(struct inode *ip) 15800Sstevel@tonic-gate { 15810Sstevel@tonic-gate int rval = 0; 15820Sstevel@tonic-gate 15830Sstevel@tonic-gate mutex_enter(&ip->i_tlock); 15840Sstevel@tonic-gate if ((ip->i_flag & IREF) == 0) { 15850Sstevel@tonic-gate mutex_enter(&ufs_idle_q.uq_mutex); 15860Sstevel@tonic-gate ip->i_freef->i_freeb = ip->i_freeb; 15870Sstevel@tonic-gate ip->i_freeb->i_freef = ip->i_freef; 15880Sstevel@tonic-gate ip->i_freef = ip; 15890Sstevel@tonic-gate ip->i_freeb = ip; 15900Sstevel@tonic-gate ip->i_flag |= IREF; 15910Sstevel@tonic-gate ufs_idle_q.uq_ne--; 15920Sstevel@tonic-gate if (ip->i_flag & IJUNKIQ) { 15930Sstevel@tonic-gate ufs_njunk_iq--; 15940Sstevel@tonic-gate ip->i_flag &= ~IJUNKIQ; 15950Sstevel@tonic-gate } else { 15960Sstevel@tonic-gate ufs_nuseful_iq--; 15970Sstevel@tonic-gate } 15980Sstevel@tonic-gate mutex_exit(&ufs_idle_q.uq_mutex); 15990Sstevel@tonic-gate rval = 1; 16000Sstevel@tonic-gate } 16010Sstevel@tonic-gate mutex_exit(&ip->i_tlock); 16020Sstevel@tonic-gate return (rval); 16030Sstevel@tonic-gate } 16040Sstevel@tonic-gate 16050Sstevel@tonic-gate /* 16060Sstevel@tonic-gate * scan the hash of inodes and call func with the inode locked 16070Sstevel@tonic-gate */ 16080Sstevel@tonic-gate int 16090Sstevel@tonic-gate ufs_scan_inodes(int rwtry, int (*func)(struct inode *, void *), void *arg, 16100Sstevel@tonic-gate struct ufsvfs *ufsvfsp) 16110Sstevel@tonic-gate { 16120Sstevel@tonic-gate struct inode *ip; /* current inode */ 16130Sstevel@tonic-gate struct inode *lip = NULL; /* last/previous inode */ 16140Sstevel@tonic-gate union ihead *ih; /* current hash chain */ 16150Sstevel@tonic-gate int error, i; 16160Sstevel@tonic-gate int saverror = 0; 16170Sstevel@tonic-gate int lip_held; /* lip needs a VN_RELE() */ 16180Sstevel@tonic-gate 16190Sstevel@tonic-gate /* 16200Sstevel@tonic-gate * If ufsvfsp is NULL, then our caller should be holding 16210Sstevel@tonic-gate * ufs_scan_lock to avoid conflicts between ufs_unmount() and 16220Sstevel@tonic-gate * ufs_update(). Otherwise, to avoid false-positives in 16230Sstevel@tonic-gate * ufs_unmount()'s v_count-based EBUSY check, we only hold 16240Sstevel@tonic-gate * those inodes that are in the file system our caller cares 16250Sstevel@tonic-gate * about. 16260Sstevel@tonic-gate * 16270Sstevel@tonic-gate * We know that ip is a valid inode in the hash chain (and thus 16280Sstevel@tonic-gate * we can trust i_ufsvfs) because the inode we chained from 16290Sstevel@tonic-gate * (lip) is still in the hash chain. This is true because either: 16300Sstevel@tonic-gate * 16310Sstevel@tonic-gate * 1. We did not drop the hash chain lock since the last 16320Sstevel@tonic-gate * iteration (because we were not interested in the last inode), 16330Sstevel@tonic-gate * or 16340Sstevel@tonic-gate * 2. We maintained a hold on the last inode while we 16350Sstevel@tonic-gate * we were processing it, so it could not be removed 16360Sstevel@tonic-gate * from the hash chain. 16370Sstevel@tonic-gate * 16380Sstevel@tonic-gate * The whole reason we're dropping and re-grabbing the chain 16390Sstevel@tonic-gate * lock on every inode is so that we don't present a major 16400Sstevel@tonic-gate * choke point on throughput, particularly when we've been 16410Sstevel@tonic-gate * called on behalf of fsflush. 16420Sstevel@tonic-gate */ 16430Sstevel@tonic-gate 16440Sstevel@tonic-gate for (i = 0, ih = ihead; i < inohsz; i++, ih++) { 16450Sstevel@tonic-gate mutex_enter(&ih_lock[i]); 16460Sstevel@tonic-gate for (ip = ih->ih_chain[0], lip_held = 0; 16470Sstevel@tonic-gate ip != (struct inode *)ih; 16480Sstevel@tonic-gate ip = lip->i_forw) { 16490Sstevel@tonic-gate 16500Sstevel@tonic-gate ins.in_scan.value.ul++; 16510Sstevel@tonic-gate 16520Sstevel@tonic-gate /* 16530Sstevel@tonic-gate * Undo the previous iteration's VN_HOLD(), but 16540Sstevel@tonic-gate * only if one was done. 16550Sstevel@tonic-gate */ 16560Sstevel@tonic-gate if (lip_held) 16570Sstevel@tonic-gate VN_RELE(ITOV(lip)); 16580Sstevel@tonic-gate 16590Sstevel@tonic-gate lip = ip; 16600Sstevel@tonic-gate if (ufsvfsp != NULL && ip->i_ufsvfs != ufsvfsp) { 16610Sstevel@tonic-gate /* 16620Sstevel@tonic-gate * We're not processing all inodes, and 16630Sstevel@tonic-gate * this inode is not in the filesystem of 16640Sstevel@tonic-gate * interest, so skip it. No need to do a 16650Sstevel@tonic-gate * VN_HOLD() since we're not dropping the 16660Sstevel@tonic-gate * hash chain lock until after we've 16670Sstevel@tonic-gate * done the i_forw traversal above. 16680Sstevel@tonic-gate */ 16690Sstevel@tonic-gate lip_held = 0; 16700Sstevel@tonic-gate continue; 16710Sstevel@tonic-gate } 16720Sstevel@tonic-gate VN_HOLD(ITOV(ip)); 16730Sstevel@tonic-gate lip_held = 1; 16740Sstevel@tonic-gate mutex_exit(&ih_lock[i]); 16750Sstevel@tonic-gate 16760Sstevel@tonic-gate /* 16770Sstevel@tonic-gate * Acquire the contents lock as writer to make 16780Sstevel@tonic-gate * sure that the inode has been initialized in 16790Sstevel@tonic-gate * the cache or removed from the idle list by 16800Sstevel@tonic-gate * ufs_iget(). This works because ufs_iget() 16810Sstevel@tonic-gate * acquires the contents lock before putting 16820Sstevel@tonic-gate * the inode into the cache. If we can lock 16830Sstevel@tonic-gate * it, then he's done with it. 16840Sstevel@tonic-gate */ 16850Sstevel@tonic-gate 16860Sstevel@tonic-gate if (rwtry) { 16870Sstevel@tonic-gate if (!rw_tryenter(&ip->i_contents, RW_WRITER)) { 16880Sstevel@tonic-gate mutex_enter(&ih_lock[i]); 16890Sstevel@tonic-gate continue; 16900Sstevel@tonic-gate } 16910Sstevel@tonic-gate } else { 16920Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 16930Sstevel@tonic-gate } 16940Sstevel@tonic-gate 16950Sstevel@tonic-gate rw_exit(&ip->i_contents); 16960Sstevel@tonic-gate 16970Sstevel@tonic-gate /* 16980Sstevel@tonic-gate * ISTALE means the inode couldn't be read 16990Sstevel@tonic-gate * 17000Sstevel@tonic-gate * We don't have to hold the i_contents lock 17010Sstevel@tonic-gate * for this check for a couple of 17020Sstevel@tonic-gate * reasons. First, if ISTALE is set then the 17030Sstevel@tonic-gate * flag cannot be cleared until the inode is 17040Sstevel@tonic-gate * removed from the cache and that cannot 17050Sstevel@tonic-gate * happen until after we VN_RELE() it. 17060Sstevel@tonic-gate * Second, if ISTALE is not set, then the 17070Sstevel@tonic-gate * inode is in the cache and does not need to 17080Sstevel@tonic-gate * be read from disk so ISTALE cannot be set 17090Sstevel@tonic-gate * while we are not looking. 17100Sstevel@tonic-gate */ 17110Sstevel@tonic-gate if ((ip->i_flag & ISTALE) == 0) { 17120Sstevel@tonic-gate if ((error = (*func)(ip, arg)) != 0) 17130Sstevel@tonic-gate saverror = error; 17140Sstevel@tonic-gate } 17150Sstevel@tonic-gate 17160Sstevel@tonic-gate mutex_enter(&ih_lock[i]); 17170Sstevel@tonic-gate } 17180Sstevel@tonic-gate if (lip_held) 17190Sstevel@tonic-gate VN_RELE(ITOV(lip)); 17200Sstevel@tonic-gate mutex_exit(&ih_lock[i]); 17210Sstevel@tonic-gate } 17220Sstevel@tonic-gate return (saverror); 17230Sstevel@tonic-gate } 17240Sstevel@tonic-gate 17250Sstevel@tonic-gate /* 17260Sstevel@tonic-gate * Mark inode with the current time, plus a unique increment. 17270Sstevel@tonic-gate * 17280Sstevel@tonic-gate * Since we only keep 32-bit time on disk, if UFS is still alive 17290Sstevel@tonic-gate * beyond 2038, filesystem times will simply stick at the last 17300Sstevel@tonic-gate * possible second of 32-bit time. Not ideal, but probably better 17310Sstevel@tonic-gate * than going into the remote past, or confusing applications with 17320Sstevel@tonic-gate * negative time. 17330Sstevel@tonic-gate */ 17340Sstevel@tonic-gate void 17350Sstevel@tonic-gate ufs_imark(struct inode *ip) 17360Sstevel@tonic-gate { 17370Sstevel@tonic-gate timestruc_t now; 17380Sstevel@tonic-gate int32_t usec, nsec; 17390Sstevel@tonic-gate 17400Sstevel@tonic-gate /* 17410Sstevel@tonic-gate * The update of i_seq may have been deferred, increase i_seq here 17420Sstevel@tonic-gate * to make sure it is in sync with the timestamps. 17430Sstevel@tonic-gate */ 17440Sstevel@tonic-gate if (ip->i_flag & ISEQ) { 17450Sstevel@tonic-gate ASSERT(ip->i_flag & (IUPD|ICHG)); 17460Sstevel@tonic-gate ip->i_seq++; 17470Sstevel@tonic-gate ip->i_flag &= ~ISEQ; 17480Sstevel@tonic-gate } 17490Sstevel@tonic-gate 17500Sstevel@tonic-gate gethrestime(&now); 17510Sstevel@tonic-gate 17520Sstevel@tonic-gate /* 17530Sstevel@tonic-gate * Fast algorithm to convert nsec to usec -- see hrt2ts() 17540Sstevel@tonic-gate * in common/os/timers.c for a full description. 17550Sstevel@tonic-gate */ 17560Sstevel@tonic-gate nsec = now.tv_nsec; 17570Sstevel@tonic-gate usec = nsec + (nsec >> 2); 17580Sstevel@tonic-gate usec = nsec + (usec >> 1); 17590Sstevel@tonic-gate usec = nsec + (usec >> 2); 17600Sstevel@tonic-gate usec = nsec + (usec >> 4); 17610Sstevel@tonic-gate usec = nsec - (usec >> 3); 17620Sstevel@tonic-gate usec = nsec + (usec >> 2); 17630Sstevel@tonic-gate usec = nsec + (usec >> 3); 17640Sstevel@tonic-gate usec = nsec + (usec >> 4); 17650Sstevel@tonic-gate usec = nsec + (usec >> 1); 17660Sstevel@tonic-gate usec = nsec + (usec >> 6); 17670Sstevel@tonic-gate usec = usec >> 10; 17680Sstevel@tonic-gate 17690Sstevel@tonic-gate mutex_enter(&ufs_iuniqtime_lock); 17700Sstevel@tonic-gate if (now.tv_sec > (time_t)iuniqtime.tv_sec || 17710Sstevel@tonic-gate usec > iuniqtime.tv_usec) { 17720Sstevel@tonic-gate if (now.tv_sec < TIME32_MAX) { 17730Sstevel@tonic-gate iuniqtime.tv_sec = (time32_t)now.tv_sec; 17740Sstevel@tonic-gate iuniqtime.tv_usec = usec; 17750Sstevel@tonic-gate } 17760Sstevel@tonic-gate } else { 17770Sstevel@tonic-gate if (iuniqtime.tv_sec < TIME32_MAX) { 17780Sstevel@tonic-gate iuniqtime.tv_usec++; 17790Sstevel@tonic-gate /* Check for usec overflow */ 17800Sstevel@tonic-gate if (iuniqtime.tv_usec >= MICROSEC) { 17810Sstevel@tonic-gate iuniqtime.tv_sec++; 17820Sstevel@tonic-gate iuniqtime.tv_usec = 0; 17830Sstevel@tonic-gate } 17840Sstevel@tonic-gate } 17850Sstevel@tonic-gate } 17860Sstevel@tonic-gate 17870Sstevel@tonic-gate if ((ip->i_flag & IACC) && !(ip->i_ufsvfs->vfs_noatime)) { 17880Sstevel@tonic-gate ip->i_atime = iuniqtime; 17890Sstevel@tonic-gate } 17900Sstevel@tonic-gate if (ip->i_flag & IUPD) { 17910Sstevel@tonic-gate ip->i_mtime = iuniqtime; 17920Sstevel@tonic-gate ip->i_flag |= IMODTIME; 17930Sstevel@tonic-gate } 17940Sstevel@tonic-gate if (ip->i_flag & ICHG) { 17950Sstevel@tonic-gate ip->i_diroff = 0; 17960Sstevel@tonic-gate ip->i_ctime = iuniqtime; 17970Sstevel@tonic-gate } 17980Sstevel@tonic-gate mutex_exit(&ufs_iuniqtime_lock); 17990Sstevel@tonic-gate } 18000Sstevel@tonic-gate 18010Sstevel@tonic-gate /* 18020Sstevel@tonic-gate * Update timestamps in inode. 18030Sstevel@tonic-gate */ 18040Sstevel@tonic-gate void 18050Sstevel@tonic-gate ufs_itimes_nolock(struct inode *ip) 18060Sstevel@tonic-gate { 18070Sstevel@tonic-gate 18080Sstevel@tonic-gate /* 18090Sstevel@tonic-gate * if noatime is set and the inode access time is the only field that 18100Sstevel@tonic-gate * must be changed, exit immediately. 18110Sstevel@tonic-gate */ 18120Sstevel@tonic-gate if (((ip->i_flag & (IUPD|IACC|ICHG)) == IACC) && 18130Sstevel@tonic-gate (ip->i_ufsvfs->vfs_noatime)) { 18140Sstevel@tonic-gate return; 18150Sstevel@tonic-gate } 18160Sstevel@tonic-gate 18170Sstevel@tonic-gate if (ip->i_flag & (IUPD|IACC|ICHG)) { 18180Sstevel@tonic-gate if (ip->i_flag & ICHG) 18190Sstevel@tonic-gate ip->i_flag |= IMOD; 18200Sstevel@tonic-gate else 18210Sstevel@tonic-gate ip->i_flag |= IMODACC; 18220Sstevel@tonic-gate ufs_imark(ip); 18230Sstevel@tonic-gate ip->i_flag &= ~(IACC|IUPD|ICHG); 18240Sstevel@tonic-gate } 18250Sstevel@tonic-gate } 1826