10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51365Sowenr * Common Development and Distribution License (the "License").
61365Sowenr * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*12607Sjohn.levon@sun.com * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate */
240Sstevel@tonic-gate
250Sstevel@tonic-gate #include <sys/types.h>
260Sstevel@tonic-gate #include <sys/t_lock.h>
270Sstevel@tonic-gate #include <sys/param.h>
280Sstevel@tonic-gate #include <sys/time.h>
290Sstevel@tonic-gate #include <sys/systm.h>
300Sstevel@tonic-gate #include <sys/sysmacros.h>
310Sstevel@tonic-gate #include <sys/resource.h>
320Sstevel@tonic-gate #include <sys/signal.h>
330Sstevel@tonic-gate #include <sys/cred.h>
340Sstevel@tonic-gate #include <sys/user.h>
350Sstevel@tonic-gate #include <sys/buf.h>
360Sstevel@tonic-gate #include <sys/vfs.h>
370Sstevel@tonic-gate #include <sys/vnode.h>
380Sstevel@tonic-gate #include <sys/proc.h>
390Sstevel@tonic-gate #include <sys/disp.h>
400Sstevel@tonic-gate #include <sys/file.h>
410Sstevel@tonic-gate #include <sys/fcntl.h>
420Sstevel@tonic-gate #include <sys/flock.h>
43329Saguzovsk #include <sys/atomic.h>
440Sstevel@tonic-gate #include <sys/kmem.h>
450Sstevel@tonic-gate #include <sys/uio.h>
460Sstevel@tonic-gate #include <sys/conf.h>
470Sstevel@tonic-gate #include <sys/mman.h>
480Sstevel@tonic-gate #include <sys/pathname.h>
490Sstevel@tonic-gate #include <sys/debug.h>
500Sstevel@tonic-gate #include <sys/vmsystm.h>
510Sstevel@tonic-gate #include <sys/cmn_err.h>
520Sstevel@tonic-gate #include <sys/acct.h>
530Sstevel@tonic-gate #include <sys/dnlc.h>
540Sstevel@tonic-gate #include <sys/swap.h>
550Sstevel@tonic-gate
560Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
570Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
580Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h>
590Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
600Sstevel@tonic-gate #include <sys/fs/ufs_panic.h>
610Sstevel@tonic-gate #include <sys/fs/ufs_mount.h>
620Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
630Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
640Sstevel@tonic-gate #include <sys/fs/ufs_quota.h>
650Sstevel@tonic-gate #include <sys/dirent.h> /* must be AFTER <sys/fs/fsdir.h>! */
660Sstevel@tonic-gate #include <sys/errno.h>
670Sstevel@tonic-gate #include <sys/sysinfo.h>
680Sstevel@tonic-gate
690Sstevel@tonic-gate #include <vm/hat.h>
700Sstevel@tonic-gate #include <vm/pvn.h>
710Sstevel@tonic-gate #include <vm/as.h>
720Sstevel@tonic-gate #include <vm/seg.h>
730Sstevel@tonic-gate #include <vm/seg_map.h>
740Sstevel@tonic-gate #include <vm/seg_vn.h>
750Sstevel@tonic-gate #include <vm/rm.h>
760Sstevel@tonic-gate #include <vm/anon.h>
770Sstevel@tonic-gate #include <sys/swap.h>
780Sstevel@tonic-gate #include <sys/dnlc.h>
790Sstevel@tonic-gate
800Sstevel@tonic-gate extern struct vnode *common_specvp(struct vnode *vp);
810Sstevel@tonic-gate
820Sstevel@tonic-gate /* error lock status */
830Sstevel@tonic-gate #define UN_ERRLCK (-1)
840Sstevel@tonic-gate #define SET_ERRLCK 1
850Sstevel@tonic-gate #define RE_ERRLCK 2
860Sstevel@tonic-gate #define NO_ERRLCK 0
870Sstevel@tonic-gate
880Sstevel@tonic-gate /*
890Sstevel@tonic-gate * Index to be used in TSD for storing lockfs data
900Sstevel@tonic-gate */
910Sstevel@tonic-gate uint_t ufs_lockfs_key;
920Sstevel@tonic-gate
930Sstevel@tonic-gate typedef struct _ulockfs_info {
940Sstevel@tonic-gate struct _ulockfs_info *next;
950Sstevel@tonic-gate struct ulockfs *ulp;
96923Ssdebnath uint_t flags;
970Sstevel@tonic-gate } ulockfs_info_t;
980Sstevel@tonic-gate
99923Ssdebnath #define ULOCK_INFO_FALLOCATE 0x00000001 /* fallocate thread */
100923Ssdebnath
1010Sstevel@tonic-gate /*
1020Sstevel@tonic-gate * Check in TSD that whether we are already doing any VOP on this filesystem
1030Sstevel@tonic-gate */
1040Sstevel@tonic-gate #define IS_REC_VOP(found, head, ulp, free) \
1050Sstevel@tonic-gate { \
1060Sstevel@tonic-gate ulockfs_info_t *_curr; \
1070Sstevel@tonic-gate \
1080Sstevel@tonic-gate for (found = 0, free = NULL, _curr = head; \
1090Sstevel@tonic-gate _curr != NULL; _curr = _curr->next) { \
1100Sstevel@tonic-gate if ((free == NULL) && \
1110Sstevel@tonic-gate (_curr->ulp == NULL)) \
1120Sstevel@tonic-gate free = _curr; \
1130Sstevel@tonic-gate if (_curr->ulp == ulp) { \
1140Sstevel@tonic-gate found = 1; \
1150Sstevel@tonic-gate break; \
1160Sstevel@tonic-gate } \
1170Sstevel@tonic-gate } \
1180Sstevel@tonic-gate }
1190Sstevel@tonic-gate
1200Sstevel@tonic-gate /*
1210Sstevel@tonic-gate * Get the lockfs data from TSD so that lockfs handles the recursive VOP
1220Sstevel@tonic-gate * properly
1230Sstevel@tonic-gate */
1240Sstevel@tonic-gate #define SEARCH_ULOCKFSP(head, ulp, info) \
1250Sstevel@tonic-gate { \
1260Sstevel@tonic-gate ulockfs_info_t *_curr; \
1270Sstevel@tonic-gate \
1280Sstevel@tonic-gate for (_curr = head; _curr != NULL; \
1290Sstevel@tonic-gate _curr = _curr->next) { \
1300Sstevel@tonic-gate if (_curr->ulp == ulp) { \
1310Sstevel@tonic-gate break; \
1320Sstevel@tonic-gate } \
1330Sstevel@tonic-gate } \
1340Sstevel@tonic-gate \
1350Sstevel@tonic-gate info = _curr; \
1360Sstevel@tonic-gate }
1370Sstevel@tonic-gate
1380Sstevel@tonic-gate /*
1390Sstevel@tonic-gate * Validate lockfs request
1400Sstevel@tonic-gate */
1410Sstevel@tonic-gate static int
ufs_getlfd(struct lockfs * lockfsp,struct lockfs * ul_lockfsp)1420Sstevel@tonic-gate ufs_getlfd(
1430Sstevel@tonic-gate struct lockfs *lockfsp, /* new lock request */
1440Sstevel@tonic-gate struct lockfs *ul_lockfsp) /* old lock state */
1450Sstevel@tonic-gate {
1460Sstevel@tonic-gate int error = 0;
1470Sstevel@tonic-gate
1480Sstevel@tonic-gate /*
1490Sstevel@tonic-gate * no input flags defined
1500Sstevel@tonic-gate */
1510Sstevel@tonic-gate if (lockfsp->lf_flags != 0) {
1520Sstevel@tonic-gate error = EINVAL;
1530Sstevel@tonic-gate goto errout;
1540Sstevel@tonic-gate }
1550Sstevel@tonic-gate
1560Sstevel@tonic-gate /*
1570Sstevel@tonic-gate * check key
1580Sstevel@tonic-gate */
1590Sstevel@tonic-gate if (!LOCKFS_IS_ULOCK(ul_lockfsp))
1600Sstevel@tonic-gate if (lockfsp->lf_key != ul_lockfsp->lf_key) {
1610Sstevel@tonic-gate error = EINVAL;
1620Sstevel@tonic-gate goto errout;
1630Sstevel@tonic-gate }
1640Sstevel@tonic-gate
1650Sstevel@tonic-gate lockfsp->lf_key = ul_lockfsp->lf_key + 1;
1660Sstevel@tonic-gate
1670Sstevel@tonic-gate errout:
1680Sstevel@tonic-gate return (error);
1690Sstevel@tonic-gate }
1700Sstevel@tonic-gate
1710Sstevel@tonic-gate /*
1720Sstevel@tonic-gate * ufs_checkaccton
1730Sstevel@tonic-gate * check if accounting is turned on on this fs
1740Sstevel@tonic-gate */
1750Sstevel@tonic-gate
1760Sstevel@tonic-gate int
ufs_checkaccton(struct vnode * vp)1770Sstevel@tonic-gate ufs_checkaccton(struct vnode *vp)
1780Sstevel@tonic-gate {
1790Sstevel@tonic-gate if (acct_fs_in_use(vp))
1800Sstevel@tonic-gate return (EDEADLK);
1810Sstevel@tonic-gate return (0);
1820Sstevel@tonic-gate }
1830Sstevel@tonic-gate
1840Sstevel@tonic-gate /*
1850Sstevel@tonic-gate * ufs_checkswapon
1860Sstevel@tonic-gate * check if local swapping is to file on this fs
1870Sstevel@tonic-gate */
1880Sstevel@tonic-gate int
ufs_checkswapon(struct vnode * vp)1890Sstevel@tonic-gate ufs_checkswapon(struct vnode *vp)
1900Sstevel@tonic-gate {
1910Sstevel@tonic-gate struct swapinfo *sip;
1920Sstevel@tonic-gate
1930Sstevel@tonic-gate mutex_enter(&swapinfo_lock);
1940Sstevel@tonic-gate for (sip = swapinfo; sip; sip = sip->si_next)
1950Sstevel@tonic-gate if (sip->si_vp->v_vfsp == vp->v_vfsp) {
1960Sstevel@tonic-gate mutex_exit(&swapinfo_lock);
1970Sstevel@tonic-gate return (EDEADLK);
1980Sstevel@tonic-gate }
1990Sstevel@tonic-gate mutex_exit(&swapinfo_lock);
2000Sstevel@tonic-gate return (0);
2010Sstevel@tonic-gate }
2020Sstevel@tonic-gate
2030Sstevel@tonic-gate /*
2040Sstevel@tonic-gate * ufs_freeze
2050Sstevel@tonic-gate * pend future accesses for current lock and desired lock
2060Sstevel@tonic-gate */
2070Sstevel@tonic-gate void
ufs_freeze(struct ulockfs * ulp,struct lockfs * lockfsp)2080Sstevel@tonic-gate ufs_freeze(struct ulockfs *ulp, struct lockfs *lockfsp)
2090Sstevel@tonic-gate {
2100Sstevel@tonic-gate /*
2110Sstevel@tonic-gate * set to new lock type
2120Sstevel@tonic-gate */
2130Sstevel@tonic-gate ulp->ul_lockfs.lf_lock = lockfsp->lf_lock;
2140Sstevel@tonic-gate ulp->ul_lockfs.lf_key = lockfsp->lf_key;
2150Sstevel@tonic-gate ulp->ul_lockfs.lf_comlen = lockfsp->lf_comlen;
2160Sstevel@tonic-gate ulp->ul_lockfs.lf_comment = lockfsp->lf_comment;
2170Sstevel@tonic-gate
2180Sstevel@tonic-gate ulp->ul_fs_lock = (1 << ulp->ul_lockfs.lf_lock);
2190Sstevel@tonic-gate }
2200Sstevel@tonic-gate
2210Sstevel@tonic-gate /*
222329Saguzovsk * All callers of ufs_quiesce() atomically increment ufs_quiesce_pend before
223329Saguzovsk * starting ufs_quiesce() protocol and decrement it only when a file system no
224329Saguzovsk * longer has to be in quiescent state. This allows ufs_pageio() to detect
225329Saguzovsk * that another thread wants to quiesce a file system. See more comments in
226329Saguzovsk * ufs_pageio().
227329Saguzovsk */
228329Saguzovsk ulong_t ufs_quiesce_pend = 0;
229329Saguzovsk
230329Saguzovsk /*
2310Sstevel@tonic-gate * ufs_quiesce
2320Sstevel@tonic-gate * wait for outstanding accesses to finish
2330Sstevel@tonic-gate */
2340Sstevel@tonic-gate int
ufs_quiesce(struct ulockfs * ulp)2350Sstevel@tonic-gate ufs_quiesce(struct ulockfs *ulp)
2360Sstevel@tonic-gate {
2370Sstevel@tonic-gate int error = 0;
238923Ssdebnath ulockfs_info_t *head;
239923Ssdebnath ulockfs_info_t *info;
24010278SFrank.Batschulat@Sun.COM klwp_t *lwp = ttolwp(curthread);
241923Ssdebnath
242923Ssdebnath head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
243923Ssdebnath SEARCH_ULOCKFSP(head, ulp, info);
2440Sstevel@tonic-gate
2450Sstevel@tonic-gate /*
24610278SFrank.Batschulat@Sun.COM * We have to keep /proc away from stopping us after we applied
24710278SFrank.Batschulat@Sun.COM * the softlock but before we got a chance to clear it again.
24810278SFrank.Batschulat@Sun.COM * prstop() may pagefault and become stuck on the softlock still
24910278SFrank.Batschulat@Sun.COM * pending.
25010278SFrank.Batschulat@Sun.COM */
25110278SFrank.Batschulat@Sun.COM if (lwp != NULL)
25210278SFrank.Batschulat@Sun.COM lwp->lwp_nostop++;
25310278SFrank.Batschulat@Sun.COM
25410278SFrank.Batschulat@Sun.COM /*
2550Sstevel@tonic-gate * Set a softlock to suspend future ufs_vnops so that
2560Sstevel@tonic-gate * this lockfs request will not be starved
2570Sstevel@tonic-gate */
2580Sstevel@tonic-gate ULOCKFS_SET_SLOCK(ulp);
259329Saguzovsk ASSERT(ufs_quiesce_pend);
2600Sstevel@tonic-gate
2610Sstevel@tonic-gate /* check if there is any outstanding ufs vnodeops calls */
262923Ssdebnath while (ulp->ul_vnops_cnt || ulp->ul_falloc_cnt) {
263329Saguzovsk /*
264329Saguzovsk * use timed version of cv_wait_sig() to make sure we don't
265329Saguzovsk * miss a wake up call from ufs_pageio() when it doesn't use
266329Saguzovsk * ul_lock.
267923Ssdebnath *
268923Ssdebnath * when a fallocate thread comes in, the only way it returns
269923Ssdebnath * from this function is if there are no other vnode operations
270923Ssdebnath * going on (remember fallocate threads are tracked using
271923Ssdebnath * ul_falloc_cnt not ul_vnops_cnt), and another fallocate thread
272923Ssdebnath * hasn't already grabbed the fs write lock.
273329Saguzovsk */
274923Ssdebnath if (info && (info->flags & ULOCK_INFO_FALLOCATE)) {
275923Ssdebnath if (!ulp->ul_vnops_cnt && !ULOCKFS_IS_FWLOCK(ulp))
276923Ssdebnath goto out;
277923Ssdebnath }
27811066Srafael.vanoni@sun.com if (!cv_reltimedwait_sig(&ulp->ul_cv, &ulp->ul_lock, hz,
27911066Srafael.vanoni@sun.com TR_CLOCK_TICK)) {
2800Sstevel@tonic-gate error = EINTR;
2810Sstevel@tonic-gate goto out;
2820Sstevel@tonic-gate }
283923Ssdebnath }
2840Sstevel@tonic-gate
2850Sstevel@tonic-gate out:
2860Sstevel@tonic-gate /*
2870Sstevel@tonic-gate * unlock the soft lock
2880Sstevel@tonic-gate */
2890Sstevel@tonic-gate ULOCKFS_CLR_SLOCK(ulp);
2900Sstevel@tonic-gate
29110278SFrank.Batschulat@Sun.COM if (lwp != NULL)
29210278SFrank.Batschulat@Sun.COM lwp->lwp_nostop--;
29310278SFrank.Batschulat@Sun.COM
2940Sstevel@tonic-gate return (error);
2950Sstevel@tonic-gate }
296923Ssdebnath
2970Sstevel@tonic-gate /*
2980Sstevel@tonic-gate * ufs_flush_inode
2990Sstevel@tonic-gate */
3000Sstevel@tonic-gate int
ufs_flush_inode(struct inode * ip,void * arg)3010Sstevel@tonic-gate ufs_flush_inode(struct inode *ip, void *arg)
3020Sstevel@tonic-gate {
3030Sstevel@tonic-gate int error;
3040Sstevel@tonic-gate int saverror = 0;
3050Sstevel@tonic-gate
3060Sstevel@tonic-gate /*
3070Sstevel@tonic-gate * wrong file system; keep looking
3080Sstevel@tonic-gate */
3090Sstevel@tonic-gate if (ip->i_ufsvfs != (struct ufsvfs *)arg)
3100Sstevel@tonic-gate return (0);
3110Sstevel@tonic-gate
3120Sstevel@tonic-gate /*
3130Sstevel@tonic-gate * asynchronously push all the dirty pages
3140Sstevel@tonic-gate */
3150Sstevel@tonic-gate if (((error = TRANS_SYNCIP(ip, B_ASYNC, 0, TOP_SYNCIP_FLUSHI)) != 0) &&
3160Sstevel@tonic-gate (error != EAGAIN))
3170Sstevel@tonic-gate saverror = error;
3180Sstevel@tonic-gate /*
3190Sstevel@tonic-gate * wait for io and discard all mappings
3200Sstevel@tonic-gate */
3210Sstevel@tonic-gate if (error = TRANS_SYNCIP(ip, B_INVAL, 0, TOP_SYNCIP_FLUSHI))
3220Sstevel@tonic-gate saverror = error;
3230Sstevel@tonic-gate
3240Sstevel@tonic-gate if (ITOV(ip)->v_type == VDIR) {
3250Sstevel@tonic-gate dnlc_dir_purge(&ip->i_danchor);
3260Sstevel@tonic-gate }
3270Sstevel@tonic-gate
3280Sstevel@tonic-gate return (saverror);
3290Sstevel@tonic-gate }
3300Sstevel@tonic-gate
3310Sstevel@tonic-gate /*
3320Sstevel@tonic-gate * ufs_flush
3330Sstevel@tonic-gate * Flush everything that is currently dirty; this includes invalidating
3340Sstevel@tonic-gate * any mappings.
3350Sstevel@tonic-gate */
3360Sstevel@tonic-gate int
ufs_flush(struct vfs * vfsp)3370Sstevel@tonic-gate ufs_flush(struct vfs *vfsp)
3380Sstevel@tonic-gate {
3390Sstevel@tonic-gate int error;
3400Sstevel@tonic-gate int saverror = 0;
3410Sstevel@tonic-gate struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
3420Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs;
3433454Smishra int tdontblock = 0;
3440Sstevel@tonic-gate
3450Sstevel@tonic-gate ASSERT(vfs_lock_held(vfsp));
3460Sstevel@tonic-gate
3470Sstevel@tonic-gate /*
3480Sstevel@tonic-gate * purge dnlc
3490Sstevel@tonic-gate */
3500Sstevel@tonic-gate (void) dnlc_purge_vfsp(vfsp, 0);
3510Sstevel@tonic-gate
3520Sstevel@tonic-gate /*
3530Sstevel@tonic-gate * drain the delete and idle threads
3540Sstevel@tonic-gate */
3550Sstevel@tonic-gate ufs_delete_drain(vfsp, 0, 0);
3560Sstevel@tonic-gate ufs_idle_drain(vfsp);
3570Sstevel@tonic-gate
3580Sstevel@tonic-gate /*
3590Sstevel@tonic-gate * flush and invalidate quota records
3600Sstevel@tonic-gate */
3610Sstevel@tonic-gate (void) qsync(ufsvfsp);
3620Sstevel@tonic-gate
3630Sstevel@tonic-gate /*
3640Sstevel@tonic-gate * flush w/invalidate the inodes for vfsp
3650Sstevel@tonic-gate */
3660Sstevel@tonic-gate if (error = ufs_scan_inodes(0, ufs_flush_inode, ufsvfsp, ufsvfsp))
3670Sstevel@tonic-gate saverror = error;
3680Sstevel@tonic-gate
3690Sstevel@tonic-gate /*
3700Sstevel@tonic-gate * synchronously flush superblock and summary info
3710Sstevel@tonic-gate */
3720Sstevel@tonic-gate if (fs->fs_ronly == 0 && fs->fs_fmod) {
3730Sstevel@tonic-gate fs->fs_fmod = 0;
3740Sstevel@tonic-gate TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_FLUSH);
3750Sstevel@tonic-gate }
3760Sstevel@tonic-gate /*
3770Sstevel@tonic-gate * flush w/invalidate block device pages and buf cache
3780Sstevel@tonic-gate */
3790Sstevel@tonic-gate if ((error = VOP_PUTPAGE(common_specvp(ufsvfsp->vfs_devvp),
3805331Samw (offset_t)0, 0, B_INVAL, CRED(), NULL)) > 0)
3810Sstevel@tonic-gate saverror = error;
3820Sstevel@tonic-gate
3830Sstevel@tonic-gate (void) bflush((dev_t)vfsp->vfs_dev);
3840Sstevel@tonic-gate (void) bfinval((dev_t)vfsp->vfs_dev, 0);
3850Sstevel@tonic-gate
3860Sstevel@tonic-gate /*
3870Sstevel@tonic-gate * drain the delete and idle threads again
3880Sstevel@tonic-gate */
3890Sstevel@tonic-gate ufs_delete_drain(vfsp, 0, 0);
3900Sstevel@tonic-gate ufs_idle_drain(vfsp);
3910Sstevel@tonic-gate
3920Sstevel@tonic-gate /*
3930Sstevel@tonic-gate * play with the clean flag
3940Sstevel@tonic-gate */
3950Sstevel@tonic-gate if (saverror == 0)
3960Sstevel@tonic-gate ufs_checkclean(vfsp);
3970Sstevel@tonic-gate
3980Sstevel@tonic-gate /*
399921Sbatschul * Flush any outstanding transactions and roll the log
400921Sbatschul * only if we are supposed to do, i.e. LDL_NOROLL not set.
401921Sbatschul * We can not simply check for fs_ronly here since fsck also may
402921Sbatschul * use this code to roll the log on a read-only filesystem, e.g.
403921Sbatschul * root during early stages of boot, if other then a sanity check is
404921Sbatschul * done, it will clear LDL_NOROLL before.
405921Sbatschul * In addition we assert that the deltamap does not contain any deltas
406921Sbatschul * in case LDL_NOROLL is set since this is not supposed to happen.
4070Sstevel@tonic-gate */
4080Sstevel@tonic-gate if (TRANS_ISTRANS(ufsvfsp)) {
409921Sbatschul ml_unit_t *ul = ufsvfsp->vfs_log;
410921Sbatschul mt_map_t *mtm = ul->un_deltamap;
411921Sbatschul
412921Sbatschul if (ul->un_flags & LDL_NOROLL) {
413921Sbatschul ASSERT(mtm->mtm_nme == 0);
414921Sbatschul } else {
4153454Smishra /*
4163454Smishra * Do not set T_DONTBLOCK if there is a
4173454Smishra * transaction opened by caller.
4183454Smishra */
4193454Smishra if (curthread->t_flag & T_DONTBLOCK)
4203454Smishra tdontblock = 1;
4213454Smishra else
4223454Smishra curthread->t_flag |= T_DONTBLOCK;
4233454Smishra
424921Sbatschul TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_FLUSH,
425921Sbatschul TOP_COMMIT_SIZE, error);
4263454Smishra
427921Sbatschul if (!error) {
428921Sbatschul TRANS_END_SYNC(ufsvfsp, saverror,
429921Sbatschul TOP_COMMIT_FLUSH, TOP_COMMIT_SIZE);
430921Sbatschul }
4313454Smishra
4323454Smishra if (tdontblock == 0)
4333454Smishra curthread->t_flag &= ~T_DONTBLOCK;
4343454Smishra
435921Sbatschul logmap_roll_dev(ufsvfsp->vfs_log);
436427Sdduvall }
4370Sstevel@tonic-gate }
4380Sstevel@tonic-gate
4390Sstevel@tonic-gate return (saverror);
4400Sstevel@tonic-gate }
4410Sstevel@tonic-gate
4420Sstevel@tonic-gate /*
4430Sstevel@tonic-gate * ufs_thaw_wlock
4440Sstevel@tonic-gate * special processing when thawing down to wlock
4450Sstevel@tonic-gate */
4460Sstevel@tonic-gate static int
ufs_thaw_wlock(struct inode * ip,void * arg)4470Sstevel@tonic-gate ufs_thaw_wlock(struct inode *ip, void *arg)
4480Sstevel@tonic-gate {
4490Sstevel@tonic-gate /*
4500Sstevel@tonic-gate * wrong file system; keep looking
4510Sstevel@tonic-gate */
4520Sstevel@tonic-gate if (ip->i_ufsvfs != (struct ufsvfs *)arg)
4530Sstevel@tonic-gate return (0);
4540Sstevel@tonic-gate
4550Sstevel@tonic-gate /*
4560Sstevel@tonic-gate * iupdat refuses to clear flags if the fs is read only. The fs
4570Sstevel@tonic-gate * may become read/write during the lock and we wouldn't want
4580Sstevel@tonic-gate * these inodes being written to disk. So clear the flags.
4590Sstevel@tonic-gate */
4600Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER);
4610Sstevel@tonic-gate ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG);
4620Sstevel@tonic-gate rw_exit(&ip->i_contents);
4630Sstevel@tonic-gate
4640Sstevel@tonic-gate /*
4650Sstevel@tonic-gate * pages are mlocked -- fail wlock
4660Sstevel@tonic-gate */
4670Sstevel@tonic-gate if (ITOV(ip)->v_type != VCHR && vn_has_cached_data(ITOV(ip)))
4680Sstevel@tonic-gate return (EBUSY);
4690Sstevel@tonic-gate
4700Sstevel@tonic-gate return (0);
4710Sstevel@tonic-gate }
4720Sstevel@tonic-gate
4730Sstevel@tonic-gate /*
4740Sstevel@tonic-gate * ufs_thaw_hlock
4750Sstevel@tonic-gate * special processing when thawing down to hlock or elock
4760Sstevel@tonic-gate */
4770Sstevel@tonic-gate static int
ufs_thaw_hlock(struct inode * ip,void * arg)4780Sstevel@tonic-gate ufs_thaw_hlock(struct inode *ip, void *arg)
4790Sstevel@tonic-gate {
4800Sstevel@tonic-gate struct vnode *vp = ITOV(ip);
4810Sstevel@tonic-gate
4820Sstevel@tonic-gate /*
4830Sstevel@tonic-gate * wrong file system; keep looking
4840Sstevel@tonic-gate */
4850Sstevel@tonic-gate if (ip->i_ufsvfs != (struct ufsvfs *)arg)
4860Sstevel@tonic-gate return (0);
4870Sstevel@tonic-gate
4880Sstevel@tonic-gate /*
4890Sstevel@tonic-gate * blow away all pages - even if they are mlocked
4900Sstevel@tonic-gate */
4910Sstevel@tonic-gate do {
4920Sstevel@tonic-gate (void) TRANS_SYNCIP(ip, B_INVAL | B_FORCE, 0, TOP_SYNCIP_HLOCK);
4930Sstevel@tonic-gate } while ((vp->v_type != VCHR) && vn_has_cached_data(vp));
4940Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER);
4950Sstevel@tonic-gate ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG);
4960Sstevel@tonic-gate rw_exit(&ip->i_contents);
4970Sstevel@tonic-gate
4980Sstevel@tonic-gate return (0);
4990Sstevel@tonic-gate }
5000Sstevel@tonic-gate
5010Sstevel@tonic-gate /*
5020Sstevel@tonic-gate * ufs_thaw
5030Sstevel@tonic-gate * thaw file system lock down to current value
5040Sstevel@tonic-gate */
5050Sstevel@tonic-gate int
ufs_thaw(struct vfs * vfsp,struct ufsvfs * ufsvfsp,struct ulockfs * ulp)5060Sstevel@tonic-gate ufs_thaw(struct vfs *vfsp, struct ufsvfs *ufsvfsp, struct ulockfs *ulp)
5070Sstevel@tonic-gate {
5080Sstevel@tonic-gate int error = 0;
5090Sstevel@tonic-gate int noidel = (int)(ulp->ul_flag & ULOCKFS_NOIDEL);
5100Sstevel@tonic-gate
5110Sstevel@tonic-gate /*
5120Sstevel@tonic-gate * if wlock or hlock or elock
5130Sstevel@tonic-gate */
5140Sstevel@tonic-gate if (ULOCKFS_IS_WLOCK(ulp) || ULOCKFS_IS_HLOCK(ulp) ||
5150Sstevel@tonic-gate ULOCKFS_IS_ELOCK(ulp)) {
5160Sstevel@tonic-gate
5170Sstevel@tonic-gate /*
5180Sstevel@tonic-gate * don't keep access times
5190Sstevel@tonic-gate * don't free deleted files
5200Sstevel@tonic-gate * if superblock writes are allowed, limit them to me for now
5210Sstevel@tonic-gate */
5220Sstevel@tonic-gate ulp->ul_flag |= (ULOCKFS_NOIACC|ULOCKFS_NOIDEL);
5230Sstevel@tonic-gate if (ulp->ul_sbowner != (kthread_id_t)-1)
5240Sstevel@tonic-gate ulp->ul_sbowner = curthread;
5250Sstevel@tonic-gate
5260Sstevel@tonic-gate /*
5270Sstevel@tonic-gate * wait for writes for deleted files and superblock updates
5280Sstevel@tonic-gate */
5290Sstevel@tonic-gate (void) ufs_flush(vfsp);
5300Sstevel@tonic-gate
5310Sstevel@tonic-gate /*
5320Sstevel@tonic-gate * now make sure the quota file is up-to-date
5330Sstevel@tonic-gate * expensive; but effective
5340Sstevel@tonic-gate */
5350Sstevel@tonic-gate error = ufs_flush(vfsp);
5360Sstevel@tonic-gate /*
5370Sstevel@tonic-gate * no one can write the superblock
5380Sstevel@tonic-gate */
5390Sstevel@tonic-gate ulp->ul_sbowner = (kthread_id_t)-1;
5400Sstevel@tonic-gate
5410Sstevel@tonic-gate /*
5420Sstevel@tonic-gate * special processing for wlock/hlock/elock
5430Sstevel@tonic-gate */
5440Sstevel@tonic-gate if (ULOCKFS_IS_WLOCK(ulp)) {
5450Sstevel@tonic-gate if (error)
5460Sstevel@tonic-gate goto errout;
5470Sstevel@tonic-gate error = bfinval(ufsvfsp->vfs_dev, 0);
5480Sstevel@tonic-gate if (error)
5490Sstevel@tonic-gate goto errout;
5500Sstevel@tonic-gate error = ufs_scan_inodes(0, ufs_thaw_wlock,
5514662Sfrankho (void *)ufsvfsp, ufsvfsp);
5520Sstevel@tonic-gate if (error)
5530Sstevel@tonic-gate goto errout;
5540Sstevel@tonic-gate }
5550Sstevel@tonic-gate if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) {
5560Sstevel@tonic-gate error = 0;
5570Sstevel@tonic-gate (void) ufs_scan_inodes(0, ufs_thaw_hlock,
5584662Sfrankho (void *)ufsvfsp, ufsvfsp);
5590Sstevel@tonic-gate (void) bfinval(ufsvfsp->vfs_dev, 1);
5600Sstevel@tonic-gate }
5610Sstevel@tonic-gate } else {
5620Sstevel@tonic-gate
5630Sstevel@tonic-gate /*
5640Sstevel@tonic-gate * okay to keep access times
5650Sstevel@tonic-gate * okay to free deleted files
5660Sstevel@tonic-gate * okay to write the superblock
5670Sstevel@tonic-gate */
5680Sstevel@tonic-gate ulp->ul_flag &= ~(ULOCKFS_NOIACC|ULOCKFS_NOIDEL);
5690Sstevel@tonic-gate ulp->ul_sbowner = NULL;
5700Sstevel@tonic-gate
5710Sstevel@tonic-gate /*
5720Sstevel@tonic-gate * flush in case deleted files are in memory
5730Sstevel@tonic-gate */
5740Sstevel@tonic-gate if (noidel) {
5750Sstevel@tonic-gate if (error = ufs_flush(vfsp))
5760Sstevel@tonic-gate goto errout;
5770Sstevel@tonic-gate }
5780Sstevel@tonic-gate }
5790Sstevel@tonic-gate
5800Sstevel@tonic-gate errout:
5810Sstevel@tonic-gate cv_broadcast(&ulp->ul_cv);
5820Sstevel@tonic-gate return (error);
5830Sstevel@tonic-gate }
5840Sstevel@tonic-gate
5850Sstevel@tonic-gate /*
5860Sstevel@tonic-gate * ufs_reconcile_fs
5870Sstevel@tonic-gate * reconcile incore superblock with ondisk superblock
5880Sstevel@tonic-gate */
5890Sstevel@tonic-gate int
ufs_reconcile_fs(struct vfs * vfsp,struct ufsvfs * ufsvfsp,int errlck)5900Sstevel@tonic-gate ufs_reconcile_fs(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck)
5910Sstevel@tonic-gate {
5920Sstevel@tonic-gate struct fs *mfs; /* in-memory superblock */
5930Sstevel@tonic-gate struct fs *dfs; /* on-disk superblock */
5940Sstevel@tonic-gate struct buf *bp; /* on-disk superblock buf */
5950Sstevel@tonic-gate int needs_unlock;
5960Sstevel@tonic-gate char finished_fsclean;
5970Sstevel@tonic-gate
5980Sstevel@tonic-gate mfs = ufsvfsp->vfs_fs;
5990Sstevel@tonic-gate
6000Sstevel@tonic-gate /*
6010Sstevel@tonic-gate * get the on-disk copy of the superblock
6020Sstevel@tonic-gate */
6030Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, vfsp->vfs_dev, SBLOCK, SBSIZE);
6040Sstevel@tonic-gate bp->b_flags |= (B_STALE|B_AGE);
6050Sstevel@tonic-gate if (bp->b_flags & B_ERROR) {
6060Sstevel@tonic-gate brelse(bp);
6070Sstevel@tonic-gate return (EIO);
6080Sstevel@tonic-gate }
6090Sstevel@tonic-gate dfs = bp->b_un.b_fs;
6100Sstevel@tonic-gate
6110Sstevel@tonic-gate /* error locks may only unlock after the fs has been made consistent */
6120Sstevel@tonic-gate if (errlck == UN_ERRLCK) {
6130Sstevel@tonic-gate if (dfs->fs_clean == FSFIX) { /* being repaired */
6140Sstevel@tonic-gate brelse(bp);
6150Sstevel@tonic-gate return (EAGAIN);
6160Sstevel@tonic-gate }
6170Sstevel@tonic-gate /* repair not yet started? */
6180Sstevel@tonic-gate finished_fsclean = TRANS_ISTRANS(ufsvfsp)? FSLOG: FSCLEAN;
6190Sstevel@tonic-gate if (dfs->fs_clean != finished_fsclean) {
6200Sstevel@tonic-gate brelse(bp);
6210Sstevel@tonic-gate return (EBUSY);
6220Sstevel@tonic-gate }
6230Sstevel@tonic-gate }
6240Sstevel@tonic-gate
6250Sstevel@tonic-gate /*
6260Sstevel@tonic-gate * if superblock has changed too much, abort
6270Sstevel@tonic-gate */
6280Sstevel@tonic-gate if ((mfs->fs_sblkno != dfs->fs_sblkno) ||
6290Sstevel@tonic-gate (mfs->fs_cblkno != dfs->fs_cblkno) ||
6300Sstevel@tonic-gate (mfs->fs_iblkno != dfs->fs_iblkno) ||
6310Sstevel@tonic-gate (mfs->fs_dblkno != dfs->fs_dblkno) ||
6320Sstevel@tonic-gate (mfs->fs_cgoffset != dfs->fs_cgoffset) ||
6330Sstevel@tonic-gate (mfs->fs_cgmask != dfs->fs_cgmask) ||
6340Sstevel@tonic-gate (mfs->fs_bsize != dfs->fs_bsize) ||
6350Sstevel@tonic-gate (mfs->fs_fsize != dfs->fs_fsize) ||
6360Sstevel@tonic-gate (mfs->fs_frag != dfs->fs_frag) ||
6370Sstevel@tonic-gate (mfs->fs_bmask != dfs->fs_bmask) ||
6380Sstevel@tonic-gate (mfs->fs_fmask != dfs->fs_fmask) ||
6390Sstevel@tonic-gate (mfs->fs_bshift != dfs->fs_bshift) ||
6400Sstevel@tonic-gate (mfs->fs_fshift != dfs->fs_fshift) ||
6410Sstevel@tonic-gate (mfs->fs_fragshift != dfs->fs_fragshift) ||
6420Sstevel@tonic-gate (mfs->fs_fsbtodb != dfs->fs_fsbtodb) ||
6430Sstevel@tonic-gate (mfs->fs_sbsize != dfs->fs_sbsize) ||
6440Sstevel@tonic-gate (mfs->fs_nindir != dfs->fs_nindir) ||
6450Sstevel@tonic-gate (mfs->fs_nspf != dfs->fs_nspf) ||
6460Sstevel@tonic-gate (mfs->fs_trackskew != dfs->fs_trackskew) ||
6470Sstevel@tonic-gate (mfs->fs_cgsize != dfs->fs_cgsize) ||
6480Sstevel@tonic-gate (mfs->fs_ntrak != dfs->fs_ntrak) ||
6490Sstevel@tonic-gate (mfs->fs_nsect != dfs->fs_nsect) ||
6500Sstevel@tonic-gate (mfs->fs_spc != dfs->fs_spc) ||
6510Sstevel@tonic-gate (mfs->fs_cpg != dfs->fs_cpg) ||
6520Sstevel@tonic-gate (mfs->fs_ipg != dfs->fs_ipg) ||
6530Sstevel@tonic-gate (mfs->fs_fpg != dfs->fs_fpg) ||
6540Sstevel@tonic-gate (mfs->fs_postblformat != dfs->fs_postblformat) ||
6550Sstevel@tonic-gate (mfs->fs_magic != dfs->fs_magic)) {
6560Sstevel@tonic-gate brelse(bp);
6570Sstevel@tonic-gate return (EACCES);
6580Sstevel@tonic-gate }
6590Sstevel@tonic-gate if (dfs->fs_clean == FSBAD || FSOKAY != dfs->fs_state + dfs->fs_time)
6600Sstevel@tonic-gate if (mfs->fs_clean == FSLOG) {
6610Sstevel@tonic-gate brelse(bp);
6620Sstevel@tonic-gate return (EACCES);
6630Sstevel@tonic-gate }
6640Sstevel@tonic-gate
6650Sstevel@tonic-gate /*
6660Sstevel@tonic-gate * get new summary info
6670Sstevel@tonic-gate */
6680Sstevel@tonic-gate if (ufs_getsummaryinfo(vfsp->vfs_dev, ufsvfsp, dfs)) {
6690Sstevel@tonic-gate brelse(bp);
6700Sstevel@tonic-gate return (EIO);
6710Sstevel@tonic-gate }
6720Sstevel@tonic-gate
6730Sstevel@tonic-gate /*
6740Sstevel@tonic-gate * release old summary info and update in-memory superblock
6750Sstevel@tonic-gate */
6760Sstevel@tonic-gate kmem_free(mfs->fs_u.fs_csp, mfs->fs_cssize);
6770Sstevel@tonic-gate mfs->fs_u.fs_csp = dfs->fs_u.fs_csp; /* Only entry 0 used */
6780Sstevel@tonic-gate
6790Sstevel@tonic-gate /*
6800Sstevel@tonic-gate * update fields allowed to change
6810Sstevel@tonic-gate */
6820Sstevel@tonic-gate mfs->fs_size = dfs->fs_size;
6830Sstevel@tonic-gate mfs->fs_dsize = dfs->fs_dsize;
6840Sstevel@tonic-gate mfs->fs_ncg = dfs->fs_ncg;
6850Sstevel@tonic-gate mfs->fs_minfree = dfs->fs_minfree;
6860Sstevel@tonic-gate mfs->fs_rotdelay = dfs->fs_rotdelay;
6870Sstevel@tonic-gate mfs->fs_rps = dfs->fs_rps;
6880Sstevel@tonic-gate mfs->fs_maxcontig = dfs->fs_maxcontig;
6890Sstevel@tonic-gate mfs->fs_maxbpg = dfs->fs_maxbpg;
6900Sstevel@tonic-gate mfs->fs_csmask = dfs->fs_csmask;
6910Sstevel@tonic-gate mfs->fs_csshift = dfs->fs_csshift;
6920Sstevel@tonic-gate mfs->fs_optim = dfs->fs_optim;
6930Sstevel@tonic-gate mfs->fs_csaddr = dfs->fs_csaddr;
6940Sstevel@tonic-gate mfs->fs_cssize = dfs->fs_cssize;
6950Sstevel@tonic-gate mfs->fs_ncyl = dfs->fs_ncyl;
6960Sstevel@tonic-gate mfs->fs_cstotal = dfs->fs_cstotal;
6970Sstevel@tonic-gate mfs->fs_reclaim = dfs->fs_reclaim;
6980Sstevel@tonic-gate
6990Sstevel@tonic-gate if (mfs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) {
7000Sstevel@tonic-gate mfs->fs_reclaim &= ~FS_RECLAIM;
7010Sstevel@tonic-gate mfs->fs_reclaim |= FS_RECLAIMING;
7020Sstevel@tonic-gate ufs_thread_start(&ufsvfsp->vfs_reclaim,
7034662Sfrankho ufs_thread_reclaim, vfsp);
7040Sstevel@tonic-gate }
7050Sstevel@tonic-gate
7060Sstevel@tonic-gate /* XXX What to do about sparecon? */
7070Sstevel@tonic-gate
7080Sstevel@tonic-gate /* XXX need to copy volume label */
7090Sstevel@tonic-gate
7100Sstevel@tonic-gate /*
7110Sstevel@tonic-gate * ondisk clean flag overrides inmemory clean flag iff == FSBAD
7120Sstevel@tonic-gate * or if error-locked and ondisk is now clean
7130Sstevel@tonic-gate */
7140Sstevel@tonic-gate needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
7150Sstevel@tonic-gate if (needs_unlock)
7160Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock);
7170Sstevel@tonic-gate
7180Sstevel@tonic-gate if (errlck == UN_ERRLCK) {
7190Sstevel@tonic-gate if (finished_fsclean == dfs->fs_clean)
7200Sstevel@tonic-gate mfs->fs_clean = finished_fsclean;
7210Sstevel@tonic-gate else
7220Sstevel@tonic-gate mfs->fs_clean = FSBAD;
7230Sstevel@tonic-gate mfs->fs_state = FSOKAY - dfs->fs_time;
7240Sstevel@tonic-gate }
7250Sstevel@tonic-gate
7260Sstevel@tonic-gate if (FSOKAY != dfs->fs_state + dfs->fs_time ||
7270Sstevel@tonic-gate (dfs->fs_clean == FSBAD))
7280Sstevel@tonic-gate mfs->fs_clean = FSBAD;
7290Sstevel@tonic-gate
7300Sstevel@tonic-gate if (needs_unlock)
7310Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock);
7320Sstevel@tonic-gate
7330Sstevel@tonic-gate brelse(bp);
7340Sstevel@tonic-gate
7350Sstevel@tonic-gate return (0);
7360Sstevel@tonic-gate }
7370Sstevel@tonic-gate
7380Sstevel@tonic-gate /*
7390Sstevel@tonic-gate * ufs_reconcile_inode
7400Sstevel@tonic-gate * reconcile ondisk inode with incore inode
7410Sstevel@tonic-gate */
7420Sstevel@tonic-gate static int
ufs_reconcile_inode(struct inode * ip,void * arg)7430Sstevel@tonic-gate ufs_reconcile_inode(struct inode *ip, void *arg)
7440Sstevel@tonic-gate {
7450Sstevel@tonic-gate int i;
7460Sstevel@tonic-gate int ndaddr;
7470Sstevel@tonic-gate int niaddr;
7480Sstevel@tonic-gate struct dinode *dp; /* ondisk inode */
7490Sstevel@tonic-gate struct buf *bp = NULL;
7500Sstevel@tonic-gate uid_t d_uid;
7510Sstevel@tonic-gate gid_t d_gid;
7520Sstevel@tonic-gate int error = 0;
7530Sstevel@tonic-gate struct fs *fs;
7540Sstevel@tonic-gate
7550Sstevel@tonic-gate /*
7560Sstevel@tonic-gate * not an inode we care about
7570Sstevel@tonic-gate */
7580Sstevel@tonic-gate if (ip->i_ufsvfs != (struct ufsvfs *)arg)
7590Sstevel@tonic-gate return (0);
7600Sstevel@tonic-gate
7610Sstevel@tonic-gate fs = ip->i_fs;
7620Sstevel@tonic-gate
7630Sstevel@tonic-gate /*
7640Sstevel@tonic-gate * Inode reconciliation fails: we made the filesystem quiescent
7650Sstevel@tonic-gate * and we did a ufs_flush() before calling ufs_reconcile_inode()
7660Sstevel@tonic-gate * and thus the inode should not have been changed inbetween.
7670Sstevel@tonic-gate * Any discrepancies indicate a logic error and a pretty
7680Sstevel@tonic-gate * significant run-state inconsistency we should complain about.
7690Sstevel@tonic-gate */
7700Sstevel@tonic-gate if (ip->i_flag & (IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG)) {
7710Sstevel@tonic-gate cmn_err(CE_WARN, "%s: Inode reconciliation failed for"
7720Sstevel@tonic-gate "inode %llu", fs->fs_fsmnt, (u_longlong_t)ip->i_number);
7730Sstevel@tonic-gate return (EINVAL);
7740Sstevel@tonic-gate }
7750Sstevel@tonic-gate
7760Sstevel@tonic-gate /*
7770Sstevel@tonic-gate * get the dinode
7780Sstevel@tonic-gate */
7790Sstevel@tonic-gate bp = UFS_BREAD(ip->i_ufsvfs,
7804662Sfrankho ip->i_dev, (daddr_t)fsbtodb(fs, itod(fs, ip->i_number)),
7810Sstevel@tonic-gate (int)fs->fs_bsize);
7820Sstevel@tonic-gate if (bp->b_flags & B_ERROR) {
7830Sstevel@tonic-gate brelse(bp);
7840Sstevel@tonic-gate return (EIO);
7850Sstevel@tonic-gate }
7860Sstevel@tonic-gate dp = bp->b_un.b_dino;
7870Sstevel@tonic-gate dp += itoo(fs, ip->i_number);
7880Sstevel@tonic-gate
7890Sstevel@tonic-gate /*
7900Sstevel@tonic-gate * handle Sun's implementation of EFT
7910Sstevel@tonic-gate */
7920Sstevel@tonic-gate d_uid = (dp->di_suid == UID_LONG) ? dp->di_uid : (uid_t)dp->di_suid;
7930Sstevel@tonic-gate d_gid = (dp->di_sgid == GID_LONG) ? dp->di_gid : (uid_t)dp->di_sgid;
7940Sstevel@tonic-gate
7950Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER);
7960Sstevel@tonic-gate
7970Sstevel@tonic-gate /*
7980Sstevel@tonic-gate * some fields are not allowed to change
7990Sstevel@tonic-gate */
8000Sstevel@tonic-gate if ((ip->i_mode != dp->di_mode) ||
8010Sstevel@tonic-gate (ip->i_gen != dp->di_gen) ||
8020Sstevel@tonic-gate (ip->i_uid != d_uid) ||
8030Sstevel@tonic-gate (ip->i_gid != d_gid)) {
8040Sstevel@tonic-gate error = EACCES;
8050Sstevel@tonic-gate goto out;
8060Sstevel@tonic-gate }
8070Sstevel@tonic-gate
8080Sstevel@tonic-gate /*
8090Sstevel@tonic-gate * and some are allowed to change
8100Sstevel@tonic-gate */
8110Sstevel@tonic-gate ip->i_size = dp->di_size;
8120Sstevel@tonic-gate ip->i_ic.ic_flags = dp->di_ic.ic_flags;
8130Sstevel@tonic-gate ip->i_blocks = dp->di_blocks;
8140Sstevel@tonic-gate ip->i_nlink = dp->di_nlink;
8150Sstevel@tonic-gate if (ip->i_flag & IFASTSYMLNK) {
8160Sstevel@tonic-gate ndaddr = 1;
8170Sstevel@tonic-gate niaddr = 0;
8180Sstevel@tonic-gate } else {
8190Sstevel@tonic-gate ndaddr = NDADDR;
8200Sstevel@tonic-gate niaddr = NIADDR;
8210Sstevel@tonic-gate }
8220Sstevel@tonic-gate for (i = 0; i < ndaddr; ++i)
8230Sstevel@tonic-gate ip->i_db[i] = dp->di_db[i];
8240Sstevel@tonic-gate for (i = 0; i < niaddr; ++i)
8250Sstevel@tonic-gate ip->i_ib[i] = dp->di_ib[i];
8260Sstevel@tonic-gate
8270Sstevel@tonic-gate out:
8280Sstevel@tonic-gate rw_exit(&ip->i_contents);
8290Sstevel@tonic-gate brelse(bp);
8300Sstevel@tonic-gate return (error);
8310Sstevel@tonic-gate }
8320Sstevel@tonic-gate
8330Sstevel@tonic-gate /*
8340Sstevel@tonic-gate * ufs_reconcile
8350Sstevel@tonic-gate * reconcile ondisk superblock/inodes with any incore
8360Sstevel@tonic-gate */
8370Sstevel@tonic-gate static int
ufs_reconcile(struct vfs * vfsp,struct ufsvfs * ufsvfsp,int errlck)8380Sstevel@tonic-gate ufs_reconcile(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck)
8390Sstevel@tonic-gate {
8400Sstevel@tonic-gate int error = 0;
8410Sstevel@tonic-gate
8420Sstevel@tonic-gate /*
8430Sstevel@tonic-gate * get rid of as much inmemory data as possible
8440Sstevel@tonic-gate */
8450Sstevel@tonic-gate (void) ufs_flush(vfsp);
8460Sstevel@tonic-gate
8470Sstevel@tonic-gate /*
8480Sstevel@tonic-gate * reconcile the superblock and inodes
8490Sstevel@tonic-gate */
8500Sstevel@tonic-gate if (error = ufs_reconcile_fs(vfsp, ufsvfsp, errlck))
8510Sstevel@tonic-gate return (error);
8520Sstevel@tonic-gate if (error = ufs_scan_inodes(0, ufs_reconcile_inode, ufsvfsp, ufsvfsp))
8530Sstevel@tonic-gate return (error);
8540Sstevel@tonic-gate /*
8550Sstevel@tonic-gate * allocation blocks may be incorrect; get rid of them
8560Sstevel@tonic-gate */
8570Sstevel@tonic-gate (void) ufs_flush(vfsp);
8580Sstevel@tonic-gate
8590Sstevel@tonic-gate return (error);
8600Sstevel@tonic-gate }
8610Sstevel@tonic-gate
8620Sstevel@tonic-gate /*
8630Sstevel@tonic-gate * File system locking
8640Sstevel@tonic-gate */
8650Sstevel@tonic-gate int
ufs_fiolfs(struct vnode * vp,struct lockfs * lockfsp,int from_log)8660Sstevel@tonic-gate ufs_fiolfs(struct vnode *vp, struct lockfs *lockfsp, int from_log)
8670Sstevel@tonic-gate {
8680Sstevel@tonic-gate return (ufs__fiolfs(vp, lockfsp, /* from_user */ 1, from_log));
8690Sstevel@tonic-gate }
8700Sstevel@tonic-gate
8710Sstevel@tonic-gate /* kernel-internal interface, also used by fix-on-panic */
8720Sstevel@tonic-gate int
ufs__fiolfs(struct vnode * vp,struct lockfs * lockfsp,int from_user,int from_log)8730Sstevel@tonic-gate ufs__fiolfs(
8740Sstevel@tonic-gate struct vnode *vp,
8750Sstevel@tonic-gate struct lockfs *lockfsp,
8760Sstevel@tonic-gate int from_user,
8770Sstevel@tonic-gate int from_log)
8780Sstevel@tonic-gate {
8790Sstevel@tonic-gate struct ulockfs *ulp;
8800Sstevel@tonic-gate struct lockfs lfs;
8810Sstevel@tonic-gate int error;
8820Sstevel@tonic-gate struct vfs *vfsp;
8830Sstevel@tonic-gate struct ufsvfs *ufsvfsp;
8840Sstevel@tonic-gate int errlck = NO_ERRLCK;
8850Sstevel@tonic-gate int poll_events = POLLPRI;
8860Sstevel@tonic-gate extern struct pollhead ufs_pollhd;
887923Ssdebnath ulockfs_info_t *head;
888923Ssdebnath ulockfs_info_t *info;
8893454Smishra int signal = 0;
8900Sstevel@tonic-gate
8910Sstevel@tonic-gate /* check valid lock type */
8920Sstevel@tonic-gate if (!lockfsp || lockfsp->lf_lock > LOCKFS_MAXLOCK)
8930Sstevel@tonic-gate return (EINVAL);
8940Sstevel@tonic-gate
8950Sstevel@tonic-gate if (!vp || !vp->v_vfsp || !vp->v_vfsp->vfs_data)
8960Sstevel@tonic-gate return (EIO);
8970Sstevel@tonic-gate
8989736Sbatschul vfsp = vp->v_vfsp;
8999736Sbatschul
9009736Sbatschul if (vfsp->vfs_flag & VFS_UNMOUNTED) /* has been unmounted */
9017276Sjr26306 return (EIO);
9027276Sjr26306
9037276Sjr26306 /* take the lock and check again */
9049736Sbatschul vfs_lock_wait(vfsp);
9059736Sbatschul if (vfsp->vfs_flag & VFS_UNMOUNTED) {
9069736Sbatschul vfs_unlock(vfsp);
9077276Sjr26306 return (EIO);
9087276Sjr26306 }
9097276Sjr26306
9109736Sbatschul /*
9119736Sbatschul * Can't wlock or ro/elock fs with accounting or local swap file
9129736Sbatschul * We need to check for this before we grab the ul_lock to avoid
9139736Sbatschul * deadlocks with the accounting framework.
9149736Sbatschul */
9159736Sbatschul if ((LOCKFS_IS_WLOCK(lockfsp) || LOCKFS_IS_ELOCK(lockfsp) ||
9169736Sbatschul LOCKFS_IS_ROELOCK(lockfsp)) && !from_log) {
9179736Sbatschul if (ufs_checkaccton(vp) || ufs_checkswapon(vp)) {
9189736Sbatschul vfs_unlock(vfsp);
9199736Sbatschul return (EDEADLK);
9209736Sbatschul }
9219736Sbatschul }
9229736Sbatschul
9230Sstevel@tonic-gate ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
9240Sstevel@tonic-gate ulp = &ufsvfsp->vfs_ulockfs;
925923Ssdebnath head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
926923Ssdebnath SEARCH_ULOCKFSP(head, ulp, info);
927923Ssdebnath
9280Sstevel@tonic-gate /*
9290Sstevel@tonic-gate * Suspend both the reclaim thread and the delete thread.
9300Sstevel@tonic-gate * This must be done outside the lockfs locking protocol.
9310Sstevel@tonic-gate */
9320Sstevel@tonic-gate ufs_thread_suspend(&ufsvfsp->vfs_reclaim);
9330Sstevel@tonic-gate ufs_thread_suspend(&ufsvfsp->vfs_delete);
9340Sstevel@tonic-gate
9350Sstevel@tonic-gate mutex_enter(&ulp->ul_lock);
936329Saguzovsk atomic_add_long(&ufs_quiesce_pend, 1);
9370Sstevel@tonic-gate
9380Sstevel@tonic-gate /*
9390Sstevel@tonic-gate * Quit if there is another lockfs request in progress
9400Sstevel@tonic-gate * that is waiting for existing ufs_vnops to complete.
9410Sstevel@tonic-gate */
9420Sstevel@tonic-gate if (ULOCKFS_IS_BUSY(ulp)) {
9430Sstevel@tonic-gate error = EBUSY;
9440Sstevel@tonic-gate goto errexit;
9450Sstevel@tonic-gate }
9460Sstevel@tonic-gate
9470Sstevel@tonic-gate /* cannot ulocked or downgrade a hard-lock */
9480Sstevel@tonic-gate if (ULOCKFS_IS_HLOCK(ulp)) {
9490Sstevel@tonic-gate error = EIO;
9500Sstevel@tonic-gate goto errexit;
9510Sstevel@tonic-gate }
9520Sstevel@tonic-gate
9530Sstevel@tonic-gate /* an error lock may be unlocked or relocked, only */
9540Sstevel@tonic-gate if (ULOCKFS_IS_ELOCK(ulp)) {
9550Sstevel@tonic-gate if (!LOCKFS_IS_ULOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
9560Sstevel@tonic-gate error = EBUSY;
9570Sstevel@tonic-gate goto errexit;
9580Sstevel@tonic-gate }
9590Sstevel@tonic-gate }
9600Sstevel@tonic-gate
9610Sstevel@tonic-gate /*
9620Sstevel@tonic-gate * a read-only error lock may only be upgraded to an
9630Sstevel@tonic-gate * error lock or hard lock
9640Sstevel@tonic-gate */
9650Sstevel@tonic-gate if (ULOCKFS_IS_ROELOCK(ulp)) {
9660Sstevel@tonic-gate if (!LOCKFS_IS_HLOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
9670Sstevel@tonic-gate error = EBUSY;
9680Sstevel@tonic-gate goto errexit;
9690Sstevel@tonic-gate }
9700Sstevel@tonic-gate }
9710Sstevel@tonic-gate
9720Sstevel@tonic-gate /*
9730Sstevel@tonic-gate * until read-only error locks are fully implemented
9740Sstevel@tonic-gate * just return EINVAL
9750Sstevel@tonic-gate */
9760Sstevel@tonic-gate if (LOCKFS_IS_ROELOCK(lockfsp)) {
9770Sstevel@tonic-gate error = EINVAL;
9780Sstevel@tonic-gate goto errexit;
9790Sstevel@tonic-gate }
9800Sstevel@tonic-gate
9810Sstevel@tonic-gate /*
9820Sstevel@tonic-gate * an error lock may only be applied if the file system is
9830Sstevel@tonic-gate * unlocked or already error locked.
9840Sstevel@tonic-gate * (this is to prevent the case where a fs gets changed out from
9850Sstevel@tonic-gate * underneath a fs that is locked for backup,
9860Sstevel@tonic-gate * that is, name/delete/write-locked.)
9870Sstevel@tonic-gate */
9880Sstevel@tonic-gate if ((!ULOCKFS_IS_ULOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp) &&
9890Sstevel@tonic-gate !ULOCKFS_IS_ROELOCK(ulp)) &&
9900Sstevel@tonic-gate (LOCKFS_IS_ELOCK(lockfsp) || LOCKFS_IS_ROELOCK(lockfsp))) {
9910Sstevel@tonic-gate error = EBUSY;
9920Sstevel@tonic-gate goto errexit;
9930Sstevel@tonic-gate }
9940Sstevel@tonic-gate
9950Sstevel@tonic-gate /* get and validate the input lockfs request */
9960Sstevel@tonic-gate if (error = ufs_getlfd(lockfsp, &ulp->ul_lockfs))
9970Sstevel@tonic-gate goto errexit;
9980Sstevel@tonic-gate
9990Sstevel@tonic-gate /*
10000Sstevel@tonic-gate * save current ulockfs struct
10010Sstevel@tonic-gate */
10020Sstevel@tonic-gate bcopy(&ulp->ul_lockfs, &lfs, sizeof (struct lockfs));
10030Sstevel@tonic-gate
10040Sstevel@tonic-gate /*
10050Sstevel@tonic-gate * Freeze the file system (pend future accesses)
10060Sstevel@tonic-gate */
10070Sstevel@tonic-gate ufs_freeze(ulp, lockfsp);
10080Sstevel@tonic-gate
10090Sstevel@tonic-gate /*
10100Sstevel@tonic-gate * Set locking in progress because ufs_quiesce may free the
10110Sstevel@tonic-gate * ul_lock mutex.
10120Sstevel@tonic-gate */
10130Sstevel@tonic-gate ULOCKFS_SET_BUSY(ulp);
10140Sstevel@tonic-gate /* update the ioctl copy */
10150Sstevel@tonic-gate LOCKFS_SET_BUSY(&ulp->ul_lockfs);
10160Sstevel@tonic-gate
10170Sstevel@tonic-gate /*
1018923Ssdebnath * We need to unset FWLOCK status before we call ufs_quiesce
1019923Ssdebnath * so that the thread doesnt get suspended. We do this only if
1020923Ssdebnath * this (fallocate) thread requested an unlock operation.
1021923Ssdebnath */
1022923Ssdebnath if (info && (info->flags & ULOCK_INFO_FALLOCATE)) {
1023923Ssdebnath if (!ULOCKFS_IS_WLOCK(ulp))
1024923Ssdebnath ULOCKFS_CLR_FWLOCK(ulp);
1025923Ssdebnath }
1026923Ssdebnath
1027923Ssdebnath /*
10280Sstevel@tonic-gate * Quiesce (wait for outstanding accesses to finish)
10290Sstevel@tonic-gate */
10303454Smishra if (error = ufs_quiesce(ulp)) {
10313454Smishra /*
10323454Smishra * Interrupted due to signal. There could still be
10333454Smishra * pending vnops.
10343454Smishra */
10353454Smishra signal = 1;
10363454Smishra
10373454Smishra /*
10383454Smishra * We do broadcast because lock-status
10393454Smishra * could be reverted to old status.
10403454Smishra */
10413454Smishra cv_broadcast(&ulp->ul_cv);
10420Sstevel@tonic-gate goto errout;
10433454Smishra }
10440Sstevel@tonic-gate
10450Sstevel@tonic-gate /*
1046923Ssdebnath * If the fallocate thread requested a write fs lock operation
1047923Ssdebnath * then we set fwlock status in the ulp.
1048923Ssdebnath */
1049923Ssdebnath if (info && (info->flags & ULOCK_INFO_FALLOCATE)) {
1050923Ssdebnath if (ULOCKFS_IS_WLOCK(ulp))
1051923Ssdebnath ULOCKFS_SET_FWLOCK(ulp);
1052923Ssdebnath }
1053923Ssdebnath
1054923Ssdebnath /*
10550Sstevel@tonic-gate * save error lock status to pass down to reconcilation
10560Sstevel@tonic-gate * routines and for later cleanup
10570Sstevel@tonic-gate */
10580Sstevel@tonic-gate if (LOCKFS_IS_ELOCK(&lfs) && ULOCKFS_IS_ULOCK(ulp))
10590Sstevel@tonic-gate errlck = UN_ERRLCK;
10600Sstevel@tonic-gate
10610Sstevel@tonic-gate if (ULOCKFS_IS_ELOCK(ulp) || ULOCKFS_IS_ROELOCK(ulp)) {
10620Sstevel@tonic-gate int needs_unlock;
10630Sstevel@tonic-gate int needs_sbwrite;
10640Sstevel@tonic-gate
10650Sstevel@tonic-gate poll_events |= POLLERR;
10664662Sfrankho errlck = LOCKFS_IS_ELOCK(&lfs) || LOCKFS_IS_ROELOCK(&lfs) ?
10674662Sfrankho RE_ERRLCK : SET_ERRLCK;
10680Sstevel@tonic-gate
10690Sstevel@tonic-gate needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
10700Sstevel@tonic-gate if (needs_unlock)
10710Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock);
10720Sstevel@tonic-gate
10730Sstevel@tonic-gate /* disable delayed i/o */
10740Sstevel@tonic-gate needs_sbwrite = 0;
10750Sstevel@tonic-gate
10760Sstevel@tonic-gate if (errlck == SET_ERRLCK) {
10770Sstevel@tonic-gate ufsvfsp->vfs_fs->fs_clean = FSBAD;
10780Sstevel@tonic-gate needs_sbwrite = 1;
10790Sstevel@tonic-gate }
10800Sstevel@tonic-gate
10810Sstevel@tonic-gate needs_sbwrite |= ufsvfsp->vfs_dio;
10820Sstevel@tonic-gate ufsvfsp->vfs_dio = 0;
10830Sstevel@tonic-gate
10840Sstevel@tonic-gate if (needs_unlock)
10850Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock);
10860Sstevel@tonic-gate
10870Sstevel@tonic-gate if (needs_sbwrite) {
10880Sstevel@tonic-gate ulp->ul_sbowner = curthread;
10890Sstevel@tonic-gate TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_STABLE);
10900Sstevel@tonic-gate
10910Sstevel@tonic-gate if (needs_unlock)
10920Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock);
10930Sstevel@tonic-gate
10940Sstevel@tonic-gate ufsvfsp->vfs_fs->fs_fmod = 0;
10950Sstevel@tonic-gate
10960Sstevel@tonic-gate if (needs_unlock)
10970Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock);
10980Sstevel@tonic-gate }
10990Sstevel@tonic-gate }
11000Sstevel@tonic-gate
11010Sstevel@tonic-gate /*
11020Sstevel@tonic-gate * reconcile superblock and inodes if was wlocked
11030Sstevel@tonic-gate */
11040Sstevel@tonic-gate if (LOCKFS_IS_WLOCK(&lfs) || LOCKFS_IS_ELOCK(&lfs)) {
11050Sstevel@tonic-gate if (error = ufs_reconcile(vfsp, ufsvfsp, errlck))
11060Sstevel@tonic-gate goto errout;
11070Sstevel@tonic-gate /*
11080Sstevel@tonic-gate * in case the fs grew; reset the metadata map for logging tests
11090Sstevel@tonic-gate */
11100Sstevel@tonic-gate TRANS_MATA_UMOUNT(ufsvfsp);
11110Sstevel@tonic-gate TRANS_MATA_MOUNT(ufsvfsp);
11120Sstevel@tonic-gate TRANS_MATA_SI(ufsvfsp, ufsvfsp->vfs_fs);
11130Sstevel@tonic-gate }
11140Sstevel@tonic-gate
11150Sstevel@tonic-gate /*
11160Sstevel@tonic-gate * At least everything *currently* dirty goes out.
11170Sstevel@tonic-gate */
11180Sstevel@tonic-gate
11190Sstevel@tonic-gate if ((error = ufs_flush(vfsp)) != 0 && !ULOCKFS_IS_HLOCK(ulp) &&
11200Sstevel@tonic-gate !ULOCKFS_IS_ELOCK(ulp))
11210Sstevel@tonic-gate goto errout;
11220Sstevel@tonic-gate
11230Sstevel@tonic-gate /*
11240Sstevel@tonic-gate * thaw file system and wakeup pended processes
11250Sstevel@tonic-gate */
11260Sstevel@tonic-gate if (error = ufs_thaw(vfsp, ufsvfsp, ulp))
11270Sstevel@tonic-gate if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp))
11280Sstevel@tonic-gate goto errout;
11290Sstevel@tonic-gate
11300Sstevel@tonic-gate /*
11310Sstevel@tonic-gate * reset modified flag if not already write locked
11320Sstevel@tonic-gate */
11330Sstevel@tonic-gate if (!LOCKFS_IS_WLOCK(&lfs))
11340Sstevel@tonic-gate ULOCKFS_CLR_MOD(ulp);
11350Sstevel@tonic-gate
11360Sstevel@tonic-gate /*
11370Sstevel@tonic-gate * idle the lock struct
11380Sstevel@tonic-gate */
11390Sstevel@tonic-gate ULOCKFS_CLR_BUSY(ulp);
11400Sstevel@tonic-gate /* update the ioctl copy */
11410Sstevel@tonic-gate LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
11420Sstevel@tonic-gate
11430Sstevel@tonic-gate /*
11440Sstevel@tonic-gate * free current comment
11450Sstevel@tonic-gate */
11460Sstevel@tonic-gate if (lfs.lf_comment && lfs.lf_comlen != 0) {
11470Sstevel@tonic-gate kmem_free(lfs.lf_comment, lfs.lf_comlen);
11480Sstevel@tonic-gate lfs.lf_comment = NULL;
11490Sstevel@tonic-gate lfs.lf_comlen = 0;
11500Sstevel@tonic-gate }
11510Sstevel@tonic-gate
11520Sstevel@tonic-gate /* do error lock cleanup */
11530Sstevel@tonic-gate if (errlck == UN_ERRLCK)
11540Sstevel@tonic-gate ufsfx_unlockfs(ufsvfsp);
11550Sstevel@tonic-gate
11560Sstevel@tonic-gate else if (errlck == RE_ERRLCK)
11570Sstevel@tonic-gate ufsfx_lockfs(ufsvfsp);
11580Sstevel@tonic-gate
11590Sstevel@tonic-gate /* don't allow error lock from user to invoke panic */
11600Sstevel@tonic-gate else if (from_user && errlck == SET_ERRLCK &&
11614662Sfrankho !(ufsvfsp->vfs_fsfx.fx_flags & (UFSMNT_ONERROR_PANIC >> 4)))
11620Sstevel@tonic-gate (void) ufs_fault(ufsvfsp->vfs_root,
11630Sstevel@tonic-gate ulp->ul_lockfs.lf_comment && ulp->ul_lockfs.lf_comlen > 0 ?
11640Sstevel@tonic-gate ulp->ul_lockfs.lf_comment: "user-applied error lock");
11650Sstevel@tonic-gate
1166329Saguzovsk atomic_add_long(&ufs_quiesce_pend, -1);
11670Sstevel@tonic-gate mutex_exit(&ulp->ul_lock);
11680Sstevel@tonic-gate vfs_unlock(vfsp);
11690Sstevel@tonic-gate
11700Sstevel@tonic-gate if (ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs))
11710Sstevel@tonic-gate poll_events |= POLLERR;
11720Sstevel@tonic-gate
11730Sstevel@tonic-gate pollwakeup(&ufs_pollhd, poll_events);
11740Sstevel@tonic-gate
11750Sstevel@tonic-gate /*
11760Sstevel@tonic-gate * Allow both the delete thread and the reclaim thread to
11770Sstevel@tonic-gate * continue.
11780Sstevel@tonic-gate */
11790Sstevel@tonic-gate ufs_thread_continue(&ufsvfsp->vfs_delete);
11800Sstevel@tonic-gate ufs_thread_continue(&ufsvfsp->vfs_reclaim);
11810Sstevel@tonic-gate
11820Sstevel@tonic-gate return (0);
11830Sstevel@tonic-gate
11840Sstevel@tonic-gate errout:
11850Sstevel@tonic-gate /*
11860Sstevel@tonic-gate * Lock failed. Reset the old lock in ufsvfs if not hard locked.
11870Sstevel@tonic-gate */
11880Sstevel@tonic-gate if (!LOCKFS_IS_HLOCK(&ulp->ul_lockfs)) {
11890Sstevel@tonic-gate bcopy(&lfs, &ulp->ul_lockfs, sizeof (struct lockfs));
11900Sstevel@tonic-gate ulp->ul_fs_lock = (1 << lfs.lf_lock);
11910Sstevel@tonic-gate }
11923454Smishra
11933454Smishra /*
11943454Smishra * Don't call ufs_thaw() when there's a signal during
11953454Smishra * ufs quiesce operation as it can lead to deadlock
11963454Smishra * with getpage.
11973454Smishra */
11983454Smishra if (signal == 0)
11993454Smishra (void) ufs_thaw(vfsp, ufsvfsp, ulp);
12003454Smishra
12010Sstevel@tonic-gate ULOCKFS_CLR_BUSY(ulp);
12020Sstevel@tonic-gate LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
12030Sstevel@tonic-gate
12040Sstevel@tonic-gate errexit:
1205329Saguzovsk atomic_add_long(&ufs_quiesce_pend, -1);
12060Sstevel@tonic-gate mutex_exit(&ulp->ul_lock);
12070Sstevel@tonic-gate vfs_unlock(vfsp);
12080Sstevel@tonic-gate
12090Sstevel@tonic-gate /*
12100Sstevel@tonic-gate * Allow both the delete thread and the reclaim thread to
12110Sstevel@tonic-gate * continue.
12120Sstevel@tonic-gate */
12130Sstevel@tonic-gate ufs_thread_continue(&ufsvfsp->vfs_delete);
12140Sstevel@tonic-gate ufs_thread_continue(&ufsvfsp->vfs_reclaim);
12150Sstevel@tonic-gate
12160Sstevel@tonic-gate return (error);
12170Sstevel@tonic-gate }
12180Sstevel@tonic-gate
12190Sstevel@tonic-gate /*
12200Sstevel@tonic-gate * fiolfss
12210Sstevel@tonic-gate * return the current file system locking state info
12220Sstevel@tonic-gate */
12230Sstevel@tonic-gate int
ufs_fiolfss(struct vnode * vp,struct lockfs * lockfsp)12240Sstevel@tonic-gate ufs_fiolfss(struct vnode *vp, struct lockfs *lockfsp)
12250Sstevel@tonic-gate {
12260Sstevel@tonic-gate struct ulockfs *ulp;
12270Sstevel@tonic-gate
12280Sstevel@tonic-gate if (!vp || !vp->v_vfsp || !VTOI(vp))
12290Sstevel@tonic-gate return (EINVAL);
12300Sstevel@tonic-gate
12310Sstevel@tonic-gate /* file system has been forcibly unmounted */
12320Sstevel@tonic-gate if (VTOI(vp)->i_ufsvfs == NULL)
12330Sstevel@tonic-gate return (EIO);
12340Sstevel@tonic-gate
12350Sstevel@tonic-gate ulp = VTOUL(vp);
12360Sstevel@tonic-gate
12370Sstevel@tonic-gate if (ULOCKFS_IS_HLOCK(ulp)) {
12380Sstevel@tonic-gate *lockfsp = ulp->ul_lockfs; /* structure assignment */
12390Sstevel@tonic-gate return (0);
12400Sstevel@tonic-gate }
12410Sstevel@tonic-gate
12420Sstevel@tonic-gate mutex_enter(&ulp->ul_lock);
12430Sstevel@tonic-gate
12440Sstevel@tonic-gate *lockfsp = ulp->ul_lockfs; /* structure assignment */
12450Sstevel@tonic-gate
12460Sstevel@tonic-gate if (ULOCKFS_IS_MOD(ulp))
12470Sstevel@tonic-gate lockfsp->lf_flags |= LOCKFS_MOD;
12480Sstevel@tonic-gate
12490Sstevel@tonic-gate mutex_exit(&ulp->ul_lock);
12500Sstevel@tonic-gate
12510Sstevel@tonic-gate return (0);
12520Sstevel@tonic-gate }
12530Sstevel@tonic-gate
12540Sstevel@tonic-gate /*
12550Sstevel@tonic-gate * ufs_check_lockfs
12560Sstevel@tonic-gate * check whether a ufs_vnops conflicts with the file system lock
12570Sstevel@tonic-gate */
12580Sstevel@tonic-gate int
ufs_check_lockfs(struct ufsvfs * ufsvfsp,struct ulockfs * ulp,ulong_t mask)12590Sstevel@tonic-gate ufs_check_lockfs(struct ufsvfs *ufsvfsp, struct ulockfs *ulp, ulong_t mask)
12600Sstevel@tonic-gate {
12610Sstevel@tonic-gate k_sigset_t smask;
12620Sstevel@tonic-gate int sig, slock;
12630Sstevel@tonic-gate
12640Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ulp->ul_lock));
12650Sstevel@tonic-gate
12660Sstevel@tonic-gate while (ulp->ul_fs_lock & mask) {
12670Sstevel@tonic-gate slock = (int)ULOCKFS_IS_SLOCK(ulp);
12680Sstevel@tonic-gate if ((curthread->t_flag & T_DONTPEND) && !slock) {
12690Sstevel@tonic-gate curthread->t_flag |= T_WOULDBLOCK;
12700Sstevel@tonic-gate return (EAGAIN);
12710Sstevel@tonic-gate }
12720Sstevel@tonic-gate curthread->t_flag &= ~T_WOULDBLOCK;
12730Sstevel@tonic-gate
12741365Sowenr /*
12751365Sowenr * In the case of an onerr umount of the fs, threads could
12761365Sowenr * have blocked before coming into ufs_check_lockfs and
12771365Sowenr * need to check for the special case of ELOCK and
12781365Sowenr * vfs_dontblock being set which would indicate that the fs
12791365Sowenr * is on its way out and will not return therefore making
12801365Sowenr * EIO the appropriate response.
12811365Sowenr */
12821365Sowenr if (ULOCKFS_IS_HLOCK(ulp) ||
12831365Sowenr (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
12840Sstevel@tonic-gate return (EIO);
12850Sstevel@tonic-gate
12860Sstevel@tonic-gate /*
12870Sstevel@tonic-gate * wait for lock status to change
12880Sstevel@tonic-gate */
12890Sstevel@tonic-gate if (slock || ufsvfsp->vfs_nointr) {
12900Sstevel@tonic-gate cv_wait(&ulp->ul_cv, &ulp->ul_lock);
12910Sstevel@tonic-gate } else {
12920Sstevel@tonic-gate sigintr(&smask, 1);
12930Sstevel@tonic-gate sig = cv_wait_sig(&ulp->ul_cv, &ulp->ul_lock);
12940Sstevel@tonic-gate sigunintr(&smask);
12950Sstevel@tonic-gate if ((!sig && (ulp->ul_fs_lock & mask)) ||
12964662Sfrankho ufsvfsp->vfs_dontblock)
12970Sstevel@tonic-gate return (EINTR);
12980Sstevel@tonic-gate }
12990Sstevel@tonic-gate }
1300923Ssdebnath
1301923Ssdebnath if (mask & ULOCKFS_FWLOCK) {
1302923Ssdebnath atomic_add_long(&ulp->ul_falloc_cnt, 1);
1303923Ssdebnath ULOCKFS_SET_FALLOC(ulp);
1304923Ssdebnath } else {
1305923Ssdebnath atomic_add_long(&ulp->ul_vnops_cnt, 1);
1306923Ssdebnath }
1307923Ssdebnath
13080Sstevel@tonic-gate return (0);
13090Sstevel@tonic-gate }
13100Sstevel@tonic-gate
13110Sstevel@tonic-gate /*
13120Sstevel@tonic-gate * Check whether we came across the handcrafted lockfs protocol path. We can't
13130Sstevel@tonic-gate * simply check for T_DONTBLOCK here as one would assume since this can also
13140Sstevel@tonic-gate * falsely catch recursive VOP's going to a different filesystem, instead we
13150Sstevel@tonic-gate * check if we already hold the ulockfs->ul_lock mutex.
13160Sstevel@tonic-gate */
13170Sstevel@tonic-gate static int
ufs_lockfs_is_under_rawlockfs(struct ulockfs * ulp)13180Sstevel@tonic-gate ufs_lockfs_is_under_rawlockfs(struct ulockfs *ulp)
13190Sstevel@tonic-gate {
13200Sstevel@tonic-gate return ((mutex_owner(&ulp->ul_lock) != curthread) ? 0 : 1);
13210Sstevel@tonic-gate }
13220Sstevel@tonic-gate
13230Sstevel@tonic-gate /*
13240Sstevel@tonic-gate * ufs_lockfs_begin - start the lockfs locking protocol
13250Sstevel@tonic-gate */
13260Sstevel@tonic-gate int
ufs_lockfs_begin(struct ufsvfs * ufsvfsp,struct ulockfs ** ulpp,ulong_t mask)13270Sstevel@tonic-gate ufs_lockfs_begin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask)
13280Sstevel@tonic-gate {
13290Sstevel@tonic-gate int error;
13300Sstevel@tonic-gate int rec_vop;
13317212Svsakar ushort_t op_cnt_incremented = 0;
13327212Svsakar ulong_t *ctr;
13330Sstevel@tonic-gate struct ulockfs *ulp;
13340Sstevel@tonic-gate ulockfs_info_t *ulockfs_info;
13350Sstevel@tonic-gate ulockfs_info_t *ulockfs_info_free;
13360Sstevel@tonic-gate ulockfs_info_t *ulockfs_info_temp;
13370Sstevel@tonic-gate
13380Sstevel@tonic-gate /*
13390Sstevel@tonic-gate * file system has been forcibly unmounted
13400Sstevel@tonic-gate */
13410Sstevel@tonic-gate if (ufsvfsp == NULL)
13420Sstevel@tonic-gate return (EIO);
13430Sstevel@tonic-gate
13440Sstevel@tonic-gate *ulpp = ulp = &ufsvfsp->vfs_ulockfs;
13450Sstevel@tonic-gate
13460Sstevel@tonic-gate /*
13470Sstevel@tonic-gate * Do lockfs protocol
13480Sstevel@tonic-gate */
13490Sstevel@tonic-gate ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
13500Sstevel@tonic-gate IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
13510Sstevel@tonic-gate
13520Sstevel@tonic-gate /*
13530Sstevel@tonic-gate * Detect recursive VOP call or handcrafted internal lockfs protocol
13540Sstevel@tonic-gate * path and bail out in that case.
13550Sstevel@tonic-gate */
13560Sstevel@tonic-gate if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
13570Sstevel@tonic-gate *ulpp = NULL;
13580Sstevel@tonic-gate return (0);
13590Sstevel@tonic-gate } else {
13600Sstevel@tonic-gate if (ulockfs_info_free == NULL) {
13610Sstevel@tonic-gate if ((ulockfs_info_temp = (ulockfs_info_t *)
13620Sstevel@tonic-gate kmem_zalloc(sizeof (ulockfs_info_t),
13630Sstevel@tonic-gate KM_NOSLEEP)) == NULL) {
13640Sstevel@tonic-gate *ulpp = NULL;
13650Sstevel@tonic-gate return (ENOMEM);
13660Sstevel@tonic-gate }
13670Sstevel@tonic-gate }
13680Sstevel@tonic-gate }
13690Sstevel@tonic-gate
13700Sstevel@tonic-gate /*
13710Sstevel@tonic-gate * First time VOP call
13727212Svsakar *
13737212Svsakar * Increment the ctr irrespective of the lockfs state. If the lockfs
13747212Svsakar * state is not ULOCKFS_ULOCK, we can decrement it later. However,
13757212Svsakar * before incrementing we need to check if there is a pending quiesce
13767212Svsakar * request because if we have a continuous stream of ufs_lockfs_begin
13777212Svsakar * requests pounding on a few cpu's then the ufs_quiesce thread might
13787212Svsakar * never see the value of zero for ctr - a livelock kind of scenario.
13790Sstevel@tonic-gate */
13807212Svsakar ctr = (mask & ULOCKFS_FWLOCK) ?
13817212Svsakar &ulp->ul_falloc_cnt : &ulp->ul_vnops_cnt;
13827212Svsakar if (!ULOCKFS_IS_SLOCK(ulp)) {
13837212Svsakar atomic_add_long(ctr, 1);
13847212Svsakar op_cnt_incremented++;
13857212Svsakar }
13867212Svsakar
13877212Svsakar /*
13887212Svsakar * If the lockfs state (indicated by ul_fs_lock) is not just
13897212Svsakar * ULOCKFS_ULOCK, then we will be routed through ufs_check_lockfs
13907212Svsakar * where there is a check with an appropriate mask to selectively allow
13917212Svsakar * operations permitted for that kind of lockfs state.
13927212Svsakar *
13937212Svsakar * Even these selective operations should not be allowed to go through
13947212Svsakar * if a lockfs request is in progress because that could result in inode
13957212Svsakar * modifications during a quiesce and could hence result in inode
13967212Svsakar * reconciliation failures. ULOCKFS_SLOCK alone would not be sufficient,
13977212Svsakar * so make use of ufs_quiesce_pend to disallow vnode operations when a
13987212Svsakar * quiesce is in progress.
13997212Svsakar */
14007212Svsakar if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
14017212Svsakar if (op_cnt_incremented)
14027212Svsakar if (!atomic_add_long_nv(ctr, -1))
14037212Svsakar cv_broadcast(&ulp->ul_cv);
14047212Svsakar mutex_enter(&ulp->ul_lock);
14057212Svsakar error = ufs_check_lockfs(ufsvfsp, ulp, mask);
14067212Svsakar mutex_exit(&ulp->ul_lock);
14077212Svsakar if (error) {
14080Sstevel@tonic-gate if (ulockfs_info_free == NULL)
14090Sstevel@tonic-gate kmem_free(ulockfs_info_temp,
14100Sstevel@tonic-gate sizeof (ulockfs_info_t));
14110Sstevel@tonic-gate return (error);
14120Sstevel@tonic-gate }
14137212Svsakar } else {
14147212Svsakar /*
14157212Svsakar * This is the common case of file system in a unlocked state.
14167212Svsakar *
14177212Svsakar * If a file system is unlocked, we would expect the ctr to have
14187212Svsakar * been incremented by now. But this will not be true when a
14197212Svsakar * quiesce is winding up - SLOCK was set when we checked before
14207212Svsakar * incrementing the ctr, but by the time we checked for
14217212Svsakar * ULOCKFS_IS_JUSTULOCK, the quiesce thread was gone. It is okay
14227212Svsakar * to take ul_lock and go through the slow path in this uncommon
14237212Svsakar * case.
14247212Svsakar */
14257212Svsakar if (op_cnt_incremented == 0) {
14267212Svsakar mutex_enter(&ulp->ul_lock);
14277212Svsakar error = ufs_check_lockfs(ufsvfsp, ulp, mask);
14287212Svsakar if (error) {
14297212Svsakar mutex_exit(&ulp->ul_lock);
14307212Svsakar if (ulockfs_info_free == NULL)
14317212Svsakar kmem_free(ulockfs_info_temp,
14327212Svsakar sizeof (ulockfs_info_t));
14337212Svsakar return (error);
14347212Svsakar }
14357212Svsakar if (mask & ULOCKFS_FWLOCK)
14367212Svsakar ULOCKFS_SET_FALLOC(ulp);
14377212Svsakar mutex_exit(&ulp->ul_lock);
14387212Svsakar } else if (mask & ULOCKFS_FWLOCK) {
14397212Svsakar mutex_enter(&ulp->ul_lock);
14407212Svsakar ULOCKFS_SET_FALLOC(ulp);
14417212Svsakar mutex_exit(&ulp->ul_lock);
14427212Svsakar }
14430Sstevel@tonic-gate }
14440Sstevel@tonic-gate
14450Sstevel@tonic-gate if (ulockfs_info_free != NULL) {
14460Sstevel@tonic-gate ulockfs_info_free->ulp = ulp;
1447923Ssdebnath if (mask & ULOCKFS_FWLOCK)
1448923Ssdebnath ulockfs_info_free->flags |= ULOCK_INFO_FALLOCATE;
14490Sstevel@tonic-gate } else {
14500Sstevel@tonic-gate ulockfs_info_temp->ulp = ulp;
14510Sstevel@tonic-gate ulockfs_info_temp->next = ulockfs_info;
1452923Ssdebnath if (mask & ULOCKFS_FWLOCK)
1453923Ssdebnath ulockfs_info_temp->flags |= ULOCK_INFO_FALLOCATE;
14540Sstevel@tonic-gate ASSERT(ufs_lockfs_key != 0);
14550Sstevel@tonic-gate (void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
14560Sstevel@tonic-gate }
14570Sstevel@tonic-gate
14580Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK;
14590Sstevel@tonic-gate return (0);
14600Sstevel@tonic-gate }
14610Sstevel@tonic-gate
14620Sstevel@tonic-gate /*
14630Sstevel@tonic-gate * Check whether we are returning from the top level VOP.
14640Sstevel@tonic-gate */
14650Sstevel@tonic-gate static int
ufs_lockfs_top_vop_return(ulockfs_info_t * head)14660Sstevel@tonic-gate ufs_lockfs_top_vop_return(ulockfs_info_t *head)
14670Sstevel@tonic-gate {
14680Sstevel@tonic-gate ulockfs_info_t *info;
14690Sstevel@tonic-gate int result = 1;
14700Sstevel@tonic-gate
14710Sstevel@tonic-gate for (info = head; info != NULL; info = info->next) {
14720Sstevel@tonic-gate if (info->ulp != NULL) {
14730Sstevel@tonic-gate result = 0;
14740Sstevel@tonic-gate break;
14750Sstevel@tonic-gate }
14760Sstevel@tonic-gate }
14770Sstevel@tonic-gate
14780Sstevel@tonic-gate return (result);
14790Sstevel@tonic-gate }
14800Sstevel@tonic-gate
14810Sstevel@tonic-gate /*
14820Sstevel@tonic-gate * ufs_lockfs_end - terminate the lockfs locking protocol
14830Sstevel@tonic-gate */
14840Sstevel@tonic-gate void
ufs_lockfs_end(struct ulockfs * ulp)14850Sstevel@tonic-gate ufs_lockfs_end(struct ulockfs *ulp)
14860Sstevel@tonic-gate {
14870Sstevel@tonic-gate ulockfs_info_t *info;
14880Sstevel@tonic-gate ulockfs_info_t *head;
14890Sstevel@tonic-gate
14900Sstevel@tonic-gate /*
14910Sstevel@tonic-gate * end-of-VOP protocol
14920Sstevel@tonic-gate */
14930Sstevel@tonic-gate if (ulp == NULL)
14940Sstevel@tonic-gate return;
14950Sstevel@tonic-gate
14960Sstevel@tonic-gate head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
14970Sstevel@tonic-gate SEARCH_ULOCKFSP(head, ulp, info);
14980Sstevel@tonic-gate
14990Sstevel@tonic-gate /*
15000Sstevel@tonic-gate * If we're called from a first level VOP, we have to have a
15010Sstevel@tonic-gate * valid ulockfs record in the TSD.
15020Sstevel@tonic-gate */
15030Sstevel@tonic-gate ASSERT(info != NULL);
15040Sstevel@tonic-gate
15050Sstevel@tonic-gate /*
15060Sstevel@tonic-gate * Invalidate the ulockfs record.
15070Sstevel@tonic-gate */
15080Sstevel@tonic-gate info->ulp = NULL;
15090Sstevel@tonic-gate
15100Sstevel@tonic-gate if (ufs_lockfs_top_vop_return(head))
15110Sstevel@tonic-gate curthread->t_flag &= ~T_DONTBLOCK;
15120Sstevel@tonic-gate
1513923Ssdebnath /* fallocate thread */
1514923Ssdebnath if (ULOCKFS_IS_FALLOC(ulp) && info->flags & ULOCK_INFO_FALLOCATE) {
15157212Svsakar /* Clear the thread's fallocate state */
15167212Svsakar info->flags &= ~ULOCK_INFO_FALLOCATE;
15177212Svsakar if (!atomic_add_long_nv(&ulp->ul_falloc_cnt, -1)) {
15187212Svsakar mutex_enter(&ulp->ul_lock);
1519923Ssdebnath ULOCKFS_CLR_FALLOC(ulp);
15207212Svsakar cv_broadcast(&ulp->ul_cv);
15217212Svsakar mutex_exit(&ulp->ul_lock);
15227212Svsakar }
1523923Ssdebnath } else { /* normal thread */
1524923Ssdebnath if (!atomic_add_long_nv(&ulp->ul_vnops_cnt, -1))
1525923Ssdebnath cv_broadcast(&ulp->ul_cv);
1526923Ssdebnath }
15270Sstevel@tonic-gate }
15280Sstevel@tonic-gate
15290Sstevel@tonic-gate /*
15305859Svsakar * ufs_lockfs_trybegin - try to start the lockfs locking protocol without
15315859Svsakar * blocking.
15325859Svsakar */
15335859Svsakar int
ufs_lockfs_trybegin(struct ufsvfs * ufsvfsp,struct ulockfs ** ulpp,ulong_t mask)15345859Svsakar ufs_lockfs_trybegin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask)
15355859Svsakar {
15365859Svsakar int error = 0;
15375859Svsakar int rec_vop;
15387212Svsakar ushort_t op_cnt_incremented = 0;
15397212Svsakar ulong_t *ctr;
15405859Svsakar struct ulockfs *ulp;
15415859Svsakar ulockfs_info_t *ulockfs_info;
15425859Svsakar ulockfs_info_t *ulockfs_info_free;
15435859Svsakar ulockfs_info_t *ulockfs_info_temp;
15445859Svsakar
15455859Svsakar /*
15465859Svsakar * file system has been forcibly unmounted
15475859Svsakar */
15485859Svsakar if (ufsvfsp == NULL)
15495859Svsakar return (EIO);
15505859Svsakar
15515859Svsakar *ulpp = ulp = &ufsvfsp->vfs_ulockfs;
15525859Svsakar
15535859Svsakar /*
15545859Svsakar * Do lockfs protocol
15555859Svsakar */
15565859Svsakar ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
15575859Svsakar IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
15585859Svsakar
15595859Svsakar /*
15605859Svsakar * Detect recursive VOP call or handcrafted internal lockfs protocol
15615859Svsakar * path and bail out in that case.
15625859Svsakar */
15635859Svsakar if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
15645859Svsakar *ulpp = NULL;
15655859Svsakar return (0);
15665859Svsakar } else {
15675859Svsakar if (ulockfs_info_free == NULL) {
15685859Svsakar if ((ulockfs_info_temp = (ulockfs_info_t *)
15695859Svsakar kmem_zalloc(sizeof (ulockfs_info_t),
15705859Svsakar KM_NOSLEEP)) == NULL) {
15715859Svsakar *ulpp = NULL;
15725859Svsakar return (ENOMEM);
15735859Svsakar }
15745859Svsakar }
15755859Svsakar }
15765859Svsakar
15775859Svsakar /*
15785859Svsakar * First time VOP call
15797212Svsakar *
15807212Svsakar * Increment the ctr irrespective of the lockfs state. If the lockfs
15817212Svsakar * state is not ULOCKFS_ULOCK, we can decrement it later. However,
15827212Svsakar * before incrementing we need to check if there is a pending quiesce
15837212Svsakar * request because if we have a continuous stream of ufs_lockfs_begin
15847212Svsakar * requests pounding on a few cpu's then the ufs_quiesce thread might
15857212Svsakar * never see the value of zero for ctr - a livelock kind of scenario.
15865859Svsakar */
15877212Svsakar ctr = (mask & ULOCKFS_FWLOCK) ?
15887212Svsakar &ulp->ul_falloc_cnt : &ulp->ul_vnops_cnt;
15897212Svsakar if (!ULOCKFS_IS_SLOCK(ulp)) {
15907212Svsakar atomic_add_long(ctr, 1);
15917212Svsakar op_cnt_incremented++;
15927212Svsakar }
15937212Svsakar
15947212Svsakar if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
15955859Svsakar /*
15965859Svsakar * Non-blocking version of ufs_check_lockfs() code.
15975859Svsakar *
15985859Svsakar * If the file system is not hard locked or error locked
15995859Svsakar * and if ulp->ul_fs_lock allows this operation, increment
16005859Svsakar * the appropriate counter and proceed (For eg., In case the
16015859Svsakar * file system is delete locked, a mmap can still go through).
16025859Svsakar */
16037212Svsakar if (op_cnt_incremented)
16047212Svsakar if (!atomic_add_long_nv(ctr, -1))
16057212Svsakar cv_broadcast(&ulp->ul_cv);
16067212Svsakar mutex_enter(&ulp->ul_lock);
16075859Svsakar if (ULOCKFS_IS_HLOCK(ulp) ||
16085859Svsakar (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
16095859Svsakar error = EIO;
16105859Svsakar else if (ulp->ul_fs_lock & mask)
16115859Svsakar error = EAGAIN;
16125859Svsakar
16135859Svsakar if (error) {
16145859Svsakar mutex_exit(&ulp->ul_lock);
16155859Svsakar if (ulockfs_info_free == NULL)
16165859Svsakar kmem_free(ulockfs_info_temp,
16175859Svsakar sizeof (ulockfs_info_t));
16185859Svsakar return (error);
16197212Svsakar }
16207212Svsakar atomic_add_long(ctr, 1);
16217212Svsakar if (mask & ULOCKFS_FWLOCK)
16227212Svsakar ULOCKFS_SET_FALLOC(ulp);
16237212Svsakar mutex_exit(&ulp->ul_lock);
16247212Svsakar } else {
16257212Svsakar /*
16267212Svsakar * This is the common case of file system in a unlocked state.
16277212Svsakar *
16287212Svsakar * If a file system is unlocked, we would expect the ctr to have
16297212Svsakar * been incremented by now. But this will not be true when a
16307212Svsakar * quiesce is winding up - SLOCK was set when we checked before
16317212Svsakar * incrementing the ctr, but by the time we checked for
16327212Svsakar * ULOCKFS_IS_JUSTULOCK, the quiesce thread was gone. Take
16337212Svsakar * ul_lock and go through the non-blocking version of
16347212Svsakar * ufs_check_lockfs() code.
16357212Svsakar */
16367212Svsakar if (op_cnt_incremented == 0) {
16377212Svsakar mutex_enter(&ulp->ul_lock);
16387212Svsakar if (ULOCKFS_IS_HLOCK(ulp) ||
16397212Svsakar (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
16407212Svsakar error = EIO;
16417212Svsakar else if (ulp->ul_fs_lock & mask)
16427212Svsakar error = EAGAIN;
16437212Svsakar
16447212Svsakar if (error) {
16457212Svsakar mutex_exit(&ulp->ul_lock);
16467212Svsakar if (ulockfs_info_free == NULL)
16477212Svsakar kmem_free(ulockfs_info_temp,
16487212Svsakar sizeof (ulockfs_info_t));
16497212Svsakar return (error);
16507212Svsakar }
16517212Svsakar atomic_add_long(ctr, 1);
16527212Svsakar if (mask & ULOCKFS_FWLOCK)
16535859Svsakar ULOCKFS_SET_FALLOC(ulp);
16547212Svsakar mutex_exit(&ulp->ul_lock);
16557212Svsakar } else if (mask & ULOCKFS_FWLOCK) {
16567212Svsakar mutex_enter(&ulp->ul_lock);
16577212Svsakar ULOCKFS_SET_FALLOC(ulp);
16587212Svsakar mutex_exit(&ulp->ul_lock);
16595859Svsakar }
16605859Svsakar }
16615859Svsakar
16625859Svsakar if (ulockfs_info_free != NULL) {
16635859Svsakar ulockfs_info_free->ulp = ulp;
16645859Svsakar if (mask & ULOCKFS_FWLOCK)
16655859Svsakar ulockfs_info_free->flags |= ULOCK_INFO_FALLOCATE;
16665859Svsakar } else {
16675859Svsakar ulockfs_info_temp->ulp = ulp;
16685859Svsakar ulockfs_info_temp->next = ulockfs_info;
16695859Svsakar if (mask & ULOCKFS_FWLOCK)
16705859Svsakar ulockfs_info_temp->flags |= ULOCK_INFO_FALLOCATE;
16715859Svsakar ASSERT(ufs_lockfs_key != 0);
16725859Svsakar (void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
16735859Svsakar }
16745859Svsakar
16755859Svsakar curthread->t_flag |= T_DONTBLOCK;
16765859Svsakar return (0);
16775859Svsakar }
16785859Svsakar
16795859Svsakar /*
16800Sstevel@tonic-gate * specialized version of ufs_lockfs_begin() called by ufs_getpage().
16810Sstevel@tonic-gate */
16820Sstevel@tonic-gate int
ufs_lockfs_begin_getpage(struct ufsvfs * ufsvfsp,struct ulockfs ** ulpp,struct seg * seg,int read_access,uint_t * protp)16830Sstevel@tonic-gate ufs_lockfs_begin_getpage(
16840Sstevel@tonic-gate struct ufsvfs *ufsvfsp,
16850Sstevel@tonic-gate struct ulockfs **ulpp,
16860Sstevel@tonic-gate struct seg *seg,
16870Sstevel@tonic-gate int read_access,
16880Sstevel@tonic-gate uint_t *protp)
16890Sstevel@tonic-gate {
16900Sstevel@tonic-gate ulong_t mask;
16910Sstevel@tonic-gate int error;
16920Sstevel@tonic-gate int rec_vop;
16930Sstevel@tonic-gate struct ulockfs *ulp;
16940Sstevel@tonic-gate ulockfs_info_t *ulockfs_info;
16950Sstevel@tonic-gate ulockfs_info_t *ulockfs_info_free;
16960Sstevel@tonic-gate ulockfs_info_t *ulockfs_info_temp;
16970Sstevel@tonic-gate
16980Sstevel@tonic-gate /*
16990Sstevel@tonic-gate * file system has been forcibly unmounted
17000Sstevel@tonic-gate */
17010Sstevel@tonic-gate if (ufsvfsp == NULL)
17020Sstevel@tonic-gate return (EIO);
17030Sstevel@tonic-gate
17040Sstevel@tonic-gate *ulpp = ulp = &ufsvfsp->vfs_ulockfs;
17050Sstevel@tonic-gate
17060Sstevel@tonic-gate /*
17070Sstevel@tonic-gate * Do lockfs protocol
17080Sstevel@tonic-gate */
17090Sstevel@tonic-gate ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
17100Sstevel@tonic-gate IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
17110Sstevel@tonic-gate
17120Sstevel@tonic-gate /*
17130Sstevel@tonic-gate * Detect recursive VOP call or handcrafted internal lockfs protocol
17140Sstevel@tonic-gate * path and bail out in that case.
17150Sstevel@tonic-gate */
17160Sstevel@tonic-gate if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
17170Sstevel@tonic-gate *ulpp = NULL;
17180Sstevel@tonic-gate return (0);
17190Sstevel@tonic-gate } else {
17200Sstevel@tonic-gate if (ulockfs_info_free == NULL) {
17210Sstevel@tonic-gate if ((ulockfs_info_temp = (ulockfs_info_t *)
17220Sstevel@tonic-gate kmem_zalloc(sizeof (ulockfs_info_t),
17230Sstevel@tonic-gate KM_NOSLEEP)) == NULL) {
17240Sstevel@tonic-gate *ulpp = NULL;
17250Sstevel@tonic-gate return (ENOMEM);
17260Sstevel@tonic-gate }
17270Sstevel@tonic-gate }
17280Sstevel@tonic-gate }
17290Sstevel@tonic-gate
17300Sstevel@tonic-gate /*
17310Sstevel@tonic-gate * First time VOP call
17320Sstevel@tonic-gate */
17337212Svsakar atomic_add_long(&ulp->ul_vnops_cnt, 1);
17347212Svsakar if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
17357212Svsakar if (!atomic_add_long_nv(&ulp->ul_vnops_cnt, -1))
17367212Svsakar cv_broadcast(&ulp->ul_cv);
17377212Svsakar mutex_enter(&ulp->ul_lock);
17380Sstevel@tonic-gate if (seg->s_ops == &segvn_ops &&
17390Sstevel@tonic-gate ((struct segvn_data *)seg->s_data)->type != MAP_SHARED) {
17400Sstevel@tonic-gate mask = (ulong_t)ULOCKFS_GETREAD_MASK;
17410Sstevel@tonic-gate } else if (protp && read_access) {
17420Sstevel@tonic-gate /*
17430Sstevel@tonic-gate * Restrict the mapping to readonly.
17440Sstevel@tonic-gate * Writes to this mapping will cause
17450Sstevel@tonic-gate * another fault which will then
17460Sstevel@tonic-gate * be suspended if fs is write locked
17470Sstevel@tonic-gate */
17480Sstevel@tonic-gate *protp &= ~PROT_WRITE;
17490Sstevel@tonic-gate mask = (ulong_t)ULOCKFS_GETREAD_MASK;
17500Sstevel@tonic-gate } else
17510Sstevel@tonic-gate mask = (ulong_t)ULOCKFS_GETWRITE_MASK;
17520Sstevel@tonic-gate
17530Sstevel@tonic-gate /*
17540Sstevel@tonic-gate * will sleep if this fs is locked against this VOP
17550Sstevel@tonic-gate */
17567212Svsakar error = ufs_check_lockfs(ufsvfsp, ulp, mask);
17577212Svsakar mutex_exit(&ulp->ul_lock);
17587212Svsakar if (error) {
17590Sstevel@tonic-gate if (ulockfs_info_free == NULL)
17600Sstevel@tonic-gate kmem_free(ulockfs_info_temp,
17610Sstevel@tonic-gate sizeof (ulockfs_info_t));
17620Sstevel@tonic-gate return (error);
17630Sstevel@tonic-gate }
17640Sstevel@tonic-gate }
17650Sstevel@tonic-gate
17660Sstevel@tonic-gate if (ulockfs_info_free != NULL) {
17670Sstevel@tonic-gate ulockfs_info_free->ulp = ulp;
17680Sstevel@tonic-gate } else {
17690Sstevel@tonic-gate ulockfs_info_temp->ulp = ulp;
17700Sstevel@tonic-gate ulockfs_info_temp->next = ulockfs_info;
17710Sstevel@tonic-gate ASSERT(ufs_lockfs_key != 0);
17720Sstevel@tonic-gate (void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
17730Sstevel@tonic-gate }
17740Sstevel@tonic-gate
17750Sstevel@tonic-gate curthread->t_flag |= T_DONTBLOCK;
17760Sstevel@tonic-gate return (0);
17770Sstevel@tonic-gate }
17780Sstevel@tonic-gate
17790Sstevel@tonic-gate void
ufs_lockfs_tsd_destructor(void * head)17800Sstevel@tonic-gate ufs_lockfs_tsd_destructor(void *head)
17810Sstevel@tonic-gate {
17820Sstevel@tonic-gate ulockfs_info_t *curr = (ulockfs_info_t *)head;
17830Sstevel@tonic-gate ulockfs_info_t *temp;
17840Sstevel@tonic-gate
17850Sstevel@tonic-gate for (; curr != NULL; ) {
17860Sstevel@tonic-gate /*
17870Sstevel@tonic-gate * The TSD destructor is being called when the thread exits
17880Sstevel@tonic-gate * (via thread_exit()). At that time it must have cleaned up
17890Sstevel@tonic-gate * all VOPs via ufs_lockfs_end() and there must not be a
17900Sstevel@tonic-gate * valid ulockfs record exist while a thread is exiting.
17910Sstevel@tonic-gate */
17920Sstevel@tonic-gate temp = curr;
17930Sstevel@tonic-gate curr = curr->next;
17940Sstevel@tonic-gate ASSERT(temp->ulp == NULL);
17950Sstevel@tonic-gate kmem_free(temp, sizeof (ulockfs_info_t));
17960Sstevel@tonic-gate }
17970Sstevel@tonic-gate }
1798