xref: /onnv-gate/usr/src/uts/common/os/flock.c (revision 5331:3047ad28a67b)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*5331Samw  * Common Development and Distribution License (the "License").
6*5331Samw  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /* ONC_PLUS EXTRACT START */
220Sstevel@tonic-gate 
230Sstevel@tonic-gate /*
24*5331Samw  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
250Sstevel@tonic-gate  * Use is subject to license terms.
260Sstevel@tonic-gate  */
270Sstevel@tonic-gate 
280Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
290Sstevel@tonic-gate /*	All Rights Reserved */
300Sstevel@tonic-gate 
310Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
320Sstevel@tonic-gate 
330Sstevel@tonic-gate #include <sys/flock_impl.h>
340Sstevel@tonic-gate #include <sys/vfs.h>
350Sstevel@tonic-gate #include <sys/t_lock.h>		/* for <sys/callb.h> */
360Sstevel@tonic-gate #include <sys/callb.h>
370Sstevel@tonic-gate #include <sys/clconf.h>
380Sstevel@tonic-gate #include <sys/cladm.h>
390Sstevel@tonic-gate #include <sys/nbmlock.h>
400Sstevel@tonic-gate #include <sys/cred.h>
410Sstevel@tonic-gate #include <sys/policy.h>
420Sstevel@tonic-gate 
430Sstevel@tonic-gate /*
440Sstevel@tonic-gate  * The following four variables are for statistics purposes and they are
450Sstevel@tonic-gate  * not protected by locks. They may not be accurate but will at least be
460Sstevel@tonic-gate  * close to the actual value.
470Sstevel@tonic-gate  */
480Sstevel@tonic-gate 
490Sstevel@tonic-gate int	flk_lock_allocs;
500Sstevel@tonic-gate int	flk_lock_frees;
510Sstevel@tonic-gate int 	edge_allocs;
520Sstevel@tonic-gate int	edge_frees;
530Sstevel@tonic-gate int 	flk_proc_vertex_allocs;
540Sstevel@tonic-gate int 	flk_proc_edge_allocs;
550Sstevel@tonic-gate int	flk_proc_vertex_frees;
560Sstevel@tonic-gate int	flk_proc_edge_frees;
570Sstevel@tonic-gate 
580Sstevel@tonic-gate static kmutex_t flock_lock;
590Sstevel@tonic-gate 
600Sstevel@tonic-gate #ifdef DEBUG
610Sstevel@tonic-gate int check_debug = 0;
620Sstevel@tonic-gate #define	CHECK_ACTIVE_LOCKS(gp)	if (check_debug) \
630Sstevel@tonic-gate 					check_active_locks(gp);
640Sstevel@tonic-gate #define	CHECK_SLEEPING_LOCKS(gp)	if (check_debug) \
650Sstevel@tonic-gate 						check_sleeping_locks(gp);
660Sstevel@tonic-gate #define	CHECK_OWNER_LOCKS(gp, pid, sysid, vp) 	\
670Sstevel@tonic-gate 		if (check_debug)	\
680Sstevel@tonic-gate 			check_owner_locks(gp, pid, sysid, vp);
690Sstevel@tonic-gate #define	CHECK_LOCK_TRANSITION(old_state, new_state) \
700Sstevel@tonic-gate 	{ \
710Sstevel@tonic-gate 		if (check_lock_transition(old_state, new_state)) { \
720Sstevel@tonic-gate 			cmn_err(CE_PANIC, "Illegal lock transition \
730Sstevel@tonic-gate 			    from %d to %d", old_state, new_state); \
740Sstevel@tonic-gate 		} \
750Sstevel@tonic-gate 	}
760Sstevel@tonic-gate #else
770Sstevel@tonic-gate 
780Sstevel@tonic-gate #define	CHECK_ACTIVE_LOCKS(gp)
790Sstevel@tonic-gate #define	CHECK_SLEEPING_LOCKS(gp)
800Sstevel@tonic-gate #define	CHECK_OWNER_LOCKS(gp, pid, sysid, vp)
810Sstevel@tonic-gate #define	CHECK_LOCK_TRANSITION(old_state, new_state)
820Sstevel@tonic-gate 
830Sstevel@tonic-gate #endif /* DEBUG */
840Sstevel@tonic-gate 
850Sstevel@tonic-gate struct kmem_cache	*flk_edge_cache;
860Sstevel@tonic-gate 
870Sstevel@tonic-gate graph_t		*lock_graph[HASH_SIZE];
880Sstevel@tonic-gate proc_graph_t	pgraph;
890Sstevel@tonic-gate 
900Sstevel@tonic-gate /*
910Sstevel@tonic-gate  * Clustering.
920Sstevel@tonic-gate  *
930Sstevel@tonic-gate  * NLM REGISTRY TYPE IMPLEMENTATION
940Sstevel@tonic-gate  *
950Sstevel@tonic-gate  * Assumptions:
960Sstevel@tonic-gate  *  1.  Nodes in a cluster are numbered starting at 1; always non-negative
970Sstevel@tonic-gate  *	integers; maximum node id is returned by clconf_maximum_nodeid().
980Sstevel@tonic-gate  *  2.  We use this node id to identify the node an NLM server runs on.
990Sstevel@tonic-gate  */
1000Sstevel@tonic-gate 
1010Sstevel@tonic-gate /*
1020Sstevel@tonic-gate  * NLM registry object keeps track of NLM servers via their
1030Sstevel@tonic-gate  * nlmids (which are the node ids of the node in the cluster they run on)
1040Sstevel@tonic-gate  * that have requested locks at this LLM with which this registry is
1050Sstevel@tonic-gate  * associated.
1060Sstevel@tonic-gate  *
1070Sstevel@tonic-gate  * Representation of abstraction:
1080Sstevel@tonic-gate  *    rep = record[	states: array[nlm_state],
1090Sstevel@tonic-gate  *			lock: mutex]
1100Sstevel@tonic-gate  *
1110Sstevel@tonic-gate  *    Representation invariants:
1120Sstevel@tonic-gate  *	1. index i of rep.states is between 0 and n - 1 where n is number
1130Sstevel@tonic-gate  *	   of elements in the array, which happen to be the maximum number
1140Sstevel@tonic-gate  *	   of nodes in the cluster configuration + 1.
1150Sstevel@tonic-gate  *	2. map nlmid to index i of rep.states
1160Sstevel@tonic-gate  *		0   -> 0
1170Sstevel@tonic-gate  *		1   -> 1
1180Sstevel@tonic-gate  *		2   -> 2
1190Sstevel@tonic-gate  *		n-1 -> clconf_maximum_nodeid()+1
1200Sstevel@tonic-gate  *	3.  This 1-1 mapping is quite convenient and it avoids errors resulting
1210Sstevel@tonic-gate  *	    from forgetting to subtract 1 from the index.
1220Sstevel@tonic-gate  *	4.  The reason we keep the 0th index is the following.  A legitimate
1230Sstevel@tonic-gate  *	    cluster configuration includes making a UFS file system NFS
1240Sstevel@tonic-gate  *	    exportable.  The code is structured so that if you're in a cluster
1250Sstevel@tonic-gate  *	    you do one thing; otherwise, you do something else.  The problem
1260Sstevel@tonic-gate  *	    is what to do if you think you're in a cluster with PXFS loaded,
1270Sstevel@tonic-gate  *	    but you're using UFS not PXFS?  The upper two bytes of the sysid
1280Sstevel@tonic-gate  *	    encode the node id of the node where NLM server runs; these bytes
1290Sstevel@tonic-gate  *	    are zero for UFS.  Since the nodeid is used to index into the
1300Sstevel@tonic-gate  *	    registry, we can record the NLM server state information at index
1310Sstevel@tonic-gate  *	    0 using the same mechanism used for PXFS file locks!
1320Sstevel@tonic-gate  */
1330Sstevel@tonic-gate static flk_nlm_status_t *nlm_reg_status = NULL;	/* state array 0..N-1 */
1340Sstevel@tonic-gate static kmutex_t nlm_reg_lock;			/* lock to protect arrary */
1350Sstevel@tonic-gate static uint_t nlm_status_size;			/* size of state array */
1360Sstevel@tonic-gate 
1370Sstevel@tonic-gate /*
1380Sstevel@tonic-gate  * Although we need a global lock dependency graph (and associated data
1390Sstevel@tonic-gate  * structures), we also need a per-zone notion of whether the lock manager is
1400Sstevel@tonic-gate  * running, and so whether to allow lock manager requests or not.
1410Sstevel@tonic-gate  *
1420Sstevel@tonic-gate  * Thus, on a per-zone basis we maintain a ``global'' variable
1430Sstevel@tonic-gate  * (flk_lockmgr_status), protected by flock_lock, and set when the lock
1440Sstevel@tonic-gate  * manager is determined to be changing state (starting or stopping).
1450Sstevel@tonic-gate  *
1460Sstevel@tonic-gate  * Each graph/zone pair also has a copy of this variable, which is protected by
1470Sstevel@tonic-gate  * the graph's mutex.
1480Sstevel@tonic-gate  *
1490Sstevel@tonic-gate  * The per-graph copies are used to synchronize lock requests with shutdown
1500Sstevel@tonic-gate  * requests.  The global copy is used to initialize the per-graph field when a
1510Sstevel@tonic-gate  * new graph is created.
1520Sstevel@tonic-gate  */
1530Sstevel@tonic-gate struct flock_globals {
1540Sstevel@tonic-gate 	flk_lockmgr_status_t flk_lockmgr_status;
1550Sstevel@tonic-gate 	flk_lockmgr_status_t lockmgr_status[HASH_SIZE];
1560Sstevel@tonic-gate };
1570Sstevel@tonic-gate 
1580Sstevel@tonic-gate zone_key_t flock_zone_key;
1590Sstevel@tonic-gate 
1600Sstevel@tonic-gate static void create_flock(lock_descriptor_t *, flock64_t *);
1610Sstevel@tonic-gate static lock_descriptor_t	*flk_get_lock(void);
1620Sstevel@tonic-gate static void	flk_free_lock(lock_descriptor_t	*lock);
1630Sstevel@tonic-gate static void	flk_get_first_blocking_lock(lock_descriptor_t *request);
1640Sstevel@tonic-gate static int flk_process_request(lock_descriptor_t *);
1650Sstevel@tonic-gate static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int);
1660Sstevel@tonic-gate static edge_t *flk_get_edge(void);
1670Sstevel@tonic-gate static int flk_wait_execute_request(lock_descriptor_t *);
1680Sstevel@tonic-gate static int flk_relation(lock_descriptor_t *, lock_descriptor_t *);
1690Sstevel@tonic-gate static void flk_insert_active_lock(lock_descriptor_t *);
1700Sstevel@tonic-gate static void flk_delete_active_lock(lock_descriptor_t *, int);
1710Sstevel@tonic-gate static void flk_insert_sleeping_lock(lock_descriptor_t *);
1720Sstevel@tonic-gate static void flk_graph_uncolor(graph_t *);
1730Sstevel@tonic-gate static void flk_wakeup(lock_descriptor_t *, int);
1740Sstevel@tonic-gate static void flk_free_edge(edge_t *);
1750Sstevel@tonic-gate static void flk_recompute_dependencies(lock_descriptor_t *,
1760Sstevel@tonic-gate 			lock_descriptor_t **,  int, int);
1770Sstevel@tonic-gate static int flk_find_barriers(lock_descriptor_t *);
1780Sstevel@tonic-gate static void flk_update_barriers(lock_descriptor_t *);
1790Sstevel@tonic-gate static int flk_color_reachables(lock_descriptor_t *);
1800Sstevel@tonic-gate static int flk_canceled(lock_descriptor_t *);
1810Sstevel@tonic-gate static void flk_delete_locks_by_sysid(lock_descriptor_t *);
1820Sstevel@tonic-gate static void report_blocker(lock_descriptor_t *, lock_descriptor_t *);
1830Sstevel@tonic-gate static void wait_for_lock(lock_descriptor_t *);
1840Sstevel@tonic-gate static void unlock_lockmgr_granted(struct flock_globals *);
1850Sstevel@tonic-gate static void wakeup_sleeping_lockmgr_locks(struct flock_globals *);
1860Sstevel@tonic-gate 
1870Sstevel@tonic-gate /* Clustering hooks */
1880Sstevel@tonic-gate static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t);
1890Sstevel@tonic-gate static void cl_flk_wakeup_sleeping_nlm_locks(int);
1900Sstevel@tonic-gate static void cl_flk_unlock_nlm_granted(int);
1910Sstevel@tonic-gate 
1920Sstevel@tonic-gate #ifdef DEBUG
1930Sstevel@tonic-gate static int check_lock_transition(int, int);
1940Sstevel@tonic-gate static void check_sleeping_locks(graph_t *);
1950Sstevel@tonic-gate static void check_active_locks(graph_t *);
1960Sstevel@tonic-gate static int no_path(lock_descriptor_t *, lock_descriptor_t *);
1970Sstevel@tonic-gate static void path(lock_descriptor_t *, lock_descriptor_t *);
1980Sstevel@tonic-gate static void check_owner_locks(graph_t *, pid_t, int, vnode_t *);
1990Sstevel@tonic-gate static int level_one_path(lock_descriptor_t *, lock_descriptor_t *);
2000Sstevel@tonic-gate static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int);
2010Sstevel@tonic-gate #endif
2020Sstevel@tonic-gate 
203*5331Samw /*	proc_graph function definitions */
2040Sstevel@tonic-gate static int flk_check_deadlock(lock_descriptor_t *);
2050Sstevel@tonic-gate static void flk_proc_graph_uncolor(void);
2060Sstevel@tonic-gate static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *);
2070Sstevel@tonic-gate static proc_edge_t *flk_get_proc_edge(void);
2080Sstevel@tonic-gate static void flk_proc_release(proc_vertex_t *);
2090Sstevel@tonic-gate static void flk_free_proc_edge(proc_edge_t *);
2100Sstevel@tonic-gate static void flk_update_proc_graph(edge_t *, int);
2110Sstevel@tonic-gate 
2120Sstevel@tonic-gate /* Non-blocking mandatory locking */
2130Sstevel@tonic-gate static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t,
2140Sstevel@tonic-gate 			u_offset_t);
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate static struct flock_globals *
flk_get_globals(void)2170Sstevel@tonic-gate flk_get_globals(void)
2180Sstevel@tonic-gate {
2190Sstevel@tonic-gate 	/*
2200Sstevel@tonic-gate 	 * The KLM module had better be loaded if we're attempting to handle
2210Sstevel@tonic-gate 	 * lockmgr requests.
2220Sstevel@tonic-gate 	 */
2230Sstevel@tonic-gate 	ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED);
2240Sstevel@tonic-gate 	return (zone_getspecific(flock_zone_key, curproc->p_zone));
2250Sstevel@tonic-gate }
2260Sstevel@tonic-gate 
2270Sstevel@tonic-gate static flk_lockmgr_status_t
flk_get_lockmgr_status(void)2280Sstevel@tonic-gate flk_get_lockmgr_status(void)
2290Sstevel@tonic-gate {
2300Sstevel@tonic-gate 	struct flock_globals *fg;
2310Sstevel@tonic-gate 
2320Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&flock_lock));
2330Sstevel@tonic-gate 
2340Sstevel@tonic-gate 	if (flock_zone_key == ZONE_KEY_UNINITIALIZED) {
2350Sstevel@tonic-gate 		/*
2360Sstevel@tonic-gate 		 * KLM module not loaded; lock manager definitely not running.
2370Sstevel@tonic-gate 		 */
2380Sstevel@tonic-gate 		return (FLK_LOCKMGR_DOWN);
2390Sstevel@tonic-gate 	}
2400Sstevel@tonic-gate 	fg = flk_get_globals();
2410Sstevel@tonic-gate 	return (fg->flk_lockmgr_status);
2420Sstevel@tonic-gate }
2430Sstevel@tonic-gate 
2440Sstevel@tonic-gate /*
2450Sstevel@tonic-gate  * Routine called from fs_frlock in fs/fs_subr.c
2460Sstevel@tonic-gate  */
2470Sstevel@tonic-gate 
2480Sstevel@tonic-gate int
reclock(vnode_t * vp,flock64_t * lckdat,int cmd,int flag,u_offset_t offset,flk_callback_t * flk_cbp)2490Sstevel@tonic-gate reclock(vnode_t		*vp,
2500Sstevel@tonic-gate 	flock64_t	*lckdat,
2510Sstevel@tonic-gate 	int		cmd,
2520Sstevel@tonic-gate 	int		flag,
2530Sstevel@tonic-gate 	u_offset_t	offset,
2540Sstevel@tonic-gate 	flk_callback_t	*flk_cbp)
2550Sstevel@tonic-gate {
2560Sstevel@tonic-gate 	lock_descriptor_t	stack_lock_request;
2570Sstevel@tonic-gate 	lock_descriptor_t	*lock_request;
2580Sstevel@tonic-gate 	int error = 0;
2590Sstevel@tonic-gate 	graph_t	*gp;
2600Sstevel@tonic-gate 	int			nlmid;
2610Sstevel@tonic-gate 
2620Sstevel@tonic-gate 	/*
2630Sstevel@tonic-gate 	 * Check access permissions
2640Sstevel@tonic-gate 	 */
2650Sstevel@tonic-gate 	if ((cmd & SETFLCK) &&
2660Sstevel@tonic-gate 		((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
2670Sstevel@tonic-gate 		(lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
2680Sstevel@tonic-gate 			return (EBADF);
2690Sstevel@tonic-gate 
2700Sstevel@tonic-gate 	/*
2710Sstevel@tonic-gate 	 * for query and unlock we use the stack_lock_request
2720Sstevel@tonic-gate 	 */
2730Sstevel@tonic-gate 
2740Sstevel@tonic-gate 	if ((lckdat->l_type == F_UNLCK) ||
2750Sstevel@tonic-gate 			!((cmd & INOFLCK) || (cmd & SETFLCK))) {
2760Sstevel@tonic-gate 		lock_request = &stack_lock_request;
2770Sstevel@tonic-gate 		(void) bzero((caddr_t)lock_request,
2780Sstevel@tonic-gate 				sizeof (lock_descriptor_t));
2790Sstevel@tonic-gate 
2800Sstevel@tonic-gate 		/*
2810Sstevel@tonic-gate 		 * following is added to make the assertions in
2820Sstevel@tonic-gate 		 * flk_execute_request() to pass through
2830Sstevel@tonic-gate 		 */
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate 		lock_request->l_edge.edge_in_next = &lock_request->l_edge;
2860Sstevel@tonic-gate 		lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
2870Sstevel@tonic-gate 		lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
2880Sstevel@tonic-gate 		lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
2890Sstevel@tonic-gate 		lock_request->l_status = FLK_INITIAL_STATE;
2900Sstevel@tonic-gate 	} else {
2910Sstevel@tonic-gate 		lock_request = flk_get_lock();
2920Sstevel@tonic-gate 	}
2930Sstevel@tonic-gate 	lock_request->l_state = 0;
2940Sstevel@tonic-gate 	lock_request->l_vnode = vp;
2950Sstevel@tonic-gate 	lock_request->l_zoneid = getzoneid();
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate 	/*
2980Sstevel@tonic-gate 	 * Convert the request range into the canonical start and end
2990Sstevel@tonic-gate 	 * values.  The NLM protocol supports locking over the entire
3000Sstevel@tonic-gate 	 * 32-bit range, so there's no range checking for remote requests,
3010Sstevel@tonic-gate 	 * but we still need to verify that local requests obey the rules.
3020Sstevel@tonic-gate 	 */
3030Sstevel@tonic-gate 	/* Clustering */
3040Sstevel@tonic-gate 	if ((cmd & (RCMDLCK | PCMDLCK)) != 0) {
3050Sstevel@tonic-gate 		ASSERT(lckdat->l_whence == 0);
3060Sstevel@tonic-gate 		lock_request->l_start = lckdat->l_start;
3070Sstevel@tonic-gate 		lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T :
3080Sstevel@tonic-gate 			lckdat->l_start + (lckdat->l_len - 1);
3090Sstevel@tonic-gate 	} else {
3100Sstevel@tonic-gate 		/* check the validity of the lock range */
3110Sstevel@tonic-gate 		error = flk_convert_lock_data(vp, lckdat,
3120Sstevel@tonic-gate 			&lock_request->l_start, &lock_request->l_end,
3130Sstevel@tonic-gate 			offset);
3140Sstevel@tonic-gate 		if (error) {
3150Sstevel@tonic-gate 			goto done;
3160Sstevel@tonic-gate 		}
3170Sstevel@tonic-gate 		error = flk_check_lock_data(lock_request->l_start,
3180Sstevel@tonic-gate 					    lock_request->l_end, MAXEND);
3190Sstevel@tonic-gate 		if (error) {
3200Sstevel@tonic-gate 			goto done;
3210Sstevel@tonic-gate 		}
3220Sstevel@tonic-gate 	}
3230Sstevel@tonic-gate 
3240Sstevel@tonic-gate 	ASSERT(lock_request->l_end >= lock_request->l_start);
3250Sstevel@tonic-gate 
3260Sstevel@tonic-gate 	lock_request->l_type = lckdat->l_type;
3270Sstevel@tonic-gate 	if (cmd & INOFLCK)
3280Sstevel@tonic-gate 		lock_request->l_state |= IO_LOCK;
3290Sstevel@tonic-gate 	if (cmd & SLPFLCK)
3300Sstevel@tonic-gate 		lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
3310Sstevel@tonic-gate 	if (cmd & RCMDLCK)
3320Sstevel@tonic-gate 		lock_request->l_state |= LOCKMGR_LOCK;
3330Sstevel@tonic-gate 	if (cmd & NBMLCK)
3340Sstevel@tonic-gate 		lock_request->l_state |= NBMAND_LOCK;
3350Sstevel@tonic-gate 	/*
3360Sstevel@tonic-gate 	 * Clustering: set flag for PXFS locks
3370Sstevel@tonic-gate 	 * We do not _only_ check for the PCMDLCK flag because PXFS locks could
3380Sstevel@tonic-gate 	 * also be of type 'RCMDLCK'.
3390Sstevel@tonic-gate 	 * We do not _only_ check the GETPXFSID() macro because local PXFS
3400Sstevel@tonic-gate 	 * clients use a pxfsid of zero to permit deadlock detection in the LLM.
3410Sstevel@tonic-gate 	 */
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate 	if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) {
3440Sstevel@tonic-gate 		lock_request->l_state |= PXFS_LOCK;
3450Sstevel@tonic-gate 	}
3460Sstevel@tonic-gate 	if (!((cmd & SETFLCK) || (cmd & INOFLCK))) {
3470Sstevel@tonic-gate 		if (lock_request->l_type == F_RDLCK ||
3480Sstevel@tonic-gate 			lock_request->l_type == F_WRLCK)
3490Sstevel@tonic-gate 			lock_request->l_state |= QUERY_LOCK;
3500Sstevel@tonic-gate 	}
3510Sstevel@tonic-gate 	lock_request->l_flock = (*lckdat);
3520Sstevel@tonic-gate 	lock_request->l_callbacks = flk_cbp;
3530Sstevel@tonic-gate 
3540Sstevel@tonic-gate 	/*
3550Sstevel@tonic-gate 	 * We are ready for processing the request
3560Sstevel@tonic-gate 	 */
3570Sstevel@tonic-gate 	if (IS_LOCKMGR(lock_request)) {
3580Sstevel@tonic-gate 		/*
3590Sstevel@tonic-gate 		 * If the lock request is an NLM server request ....
3600Sstevel@tonic-gate 		 */
3610Sstevel@tonic-gate 		if (nlm_status_size == 0) { /* not booted as cluster */
3620Sstevel@tonic-gate 			mutex_enter(&flock_lock);
3630Sstevel@tonic-gate 			/*
3640Sstevel@tonic-gate 			 * Bail out if this is a lock manager request and the
3650Sstevel@tonic-gate 			 * lock manager is not supposed to be running.
3660Sstevel@tonic-gate 			 */
3670Sstevel@tonic-gate 			if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) {
3680Sstevel@tonic-gate 				mutex_exit(&flock_lock);
3690Sstevel@tonic-gate 				error = ENOLCK;
3700Sstevel@tonic-gate 				goto done;
3710Sstevel@tonic-gate 			}
3720Sstevel@tonic-gate 			mutex_exit(&flock_lock);
3730Sstevel@tonic-gate 		} else {			/* booted as a cluster */
3740Sstevel@tonic-gate 			nlmid = GETNLMID(lock_request->l_flock.l_sysid);
3750Sstevel@tonic-gate 			ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
3760Sstevel@tonic-gate 
3770Sstevel@tonic-gate 			mutex_enter(&nlm_reg_lock);
3780Sstevel@tonic-gate 			/*
3790Sstevel@tonic-gate 			 * If the NLM registry does not know about this
3800Sstevel@tonic-gate 			 * NLM server making the request, add its nlmid
3810Sstevel@tonic-gate 			 * to the registry.
3820Sstevel@tonic-gate 			 */
3830Sstevel@tonic-gate 			if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status,
3840Sstevel@tonic-gate 				nlmid)) {
3850Sstevel@tonic-gate 				FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid);
3860Sstevel@tonic-gate 			} else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status,
3870Sstevel@tonic-gate 				nlmid)) {
3880Sstevel@tonic-gate 				/*
3890Sstevel@tonic-gate 				 * If the NLM server is already known (has made
3900Sstevel@tonic-gate 				 * previous lock requests) and its state is
3910Sstevel@tonic-gate 				 * not NLM_UP (means that NLM server is
3920Sstevel@tonic-gate 				 * shutting down), then bail out with an
3930Sstevel@tonic-gate 				 * error to deny the lock request.
3940Sstevel@tonic-gate 				 */
3950Sstevel@tonic-gate 				mutex_exit(&nlm_reg_lock);
3960Sstevel@tonic-gate 				error = ENOLCK;
3970Sstevel@tonic-gate 				goto done;
3980Sstevel@tonic-gate 			}
3990Sstevel@tonic-gate 			mutex_exit(&nlm_reg_lock);
4000Sstevel@tonic-gate 		}
4010Sstevel@tonic-gate 	}
4020Sstevel@tonic-gate 
4030Sstevel@tonic-gate 	/* Now get the lock graph for a particular vnode */
4040Sstevel@tonic-gate 	gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
4050Sstevel@tonic-gate 
4060Sstevel@tonic-gate 	/*
4070Sstevel@tonic-gate 	 * We drop rwlock here otherwise this might end up causing a
4080Sstevel@tonic-gate 	 * deadlock if this IOLOCK sleeps. (bugid # 1183392).
4090Sstevel@tonic-gate 	 */
4100Sstevel@tonic-gate 
4110Sstevel@tonic-gate 	if (IS_IO_LOCK(lock_request)) {
4120Sstevel@tonic-gate 		VOP_RWUNLOCK(vp,
4130Sstevel@tonic-gate 			(lock_request->l_type == F_RDLCK) ?
4140Sstevel@tonic-gate 				V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
4150Sstevel@tonic-gate 	}
4160Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
4170Sstevel@tonic-gate 
4180Sstevel@tonic-gate 	lock_request->l_state |= REFERENCED_LOCK;
4190Sstevel@tonic-gate 	lock_request->l_graph = gp;
4200Sstevel@tonic-gate 
4210Sstevel@tonic-gate 	switch (lock_request->l_type) {
4220Sstevel@tonic-gate 	case F_RDLCK:
4230Sstevel@tonic-gate 	case F_WRLCK:
4240Sstevel@tonic-gate 		if (IS_QUERY_LOCK(lock_request)) {
4250Sstevel@tonic-gate 			flk_get_first_blocking_lock(lock_request);
4260Sstevel@tonic-gate 			(*lckdat) = lock_request->l_flock;
4270Sstevel@tonic-gate 			break;
4280Sstevel@tonic-gate 		}
4290Sstevel@tonic-gate 
4300Sstevel@tonic-gate 		/* process the request now */
4310Sstevel@tonic-gate 
4320Sstevel@tonic-gate 		error = flk_process_request(lock_request);
4330Sstevel@tonic-gate 		break;
4340Sstevel@tonic-gate 
4350Sstevel@tonic-gate 	case F_UNLCK:
4360Sstevel@tonic-gate 		/* unlock request will not block so execute it immediately */
4370Sstevel@tonic-gate 
4380Sstevel@tonic-gate 		if (IS_LOCKMGR(lock_request) &&
4390Sstevel@tonic-gate 		    flk_canceled(lock_request)) {
4400Sstevel@tonic-gate 			error = 0;
4410Sstevel@tonic-gate 		} else {
4420Sstevel@tonic-gate 			error = flk_execute_request(lock_request);
4430Sstevel@tonic-gate 		}
4440Sstevel@tonic-gate 		break;
4450Sstevel@tonic-gate 
4460Sstevel@tonic-gate 	case F_UNLKSYS:
4470Sstevel@tonic-gate 		/*
4480Sstevel@tonic-gate 		 * Recovery mechanism to release lock manager locks when
4490Sstevel@tonic-gate 		 * NFS client crashes and restart. NFS server will clear
4500Sstevel@tonic-gate 		 * old locks and grant new locks.
4510Sstevel@tonic-gate 		 */
4520Sstevel@tonic-gate 
4530Sstevel@tonic-gate 		if (lock_request->l_flock.l_sysid == 0) {
4540Sstevel@tonic-gate 			mutex_exit(&gp->gp_mutex);
4550Sstevel@tonic-gate 			return (EINVAL);
4560Sstevel@tonic-gate 		}
4570Sstevel@tonic-gate 		if (secpolicy_nfs(CRED()) != 0) {
4580Sstevel@tonic-gate 			mutex_exit(&gp->gp_mutex);
4590Sstevel@tonic-gate 			return (EPERM);
4600Sstevel@tonic-gate 		}
4610Sstevel@tonic-gate 		flk_delete_locks_by_sysid(lock_request);
4620Sstevel@tonic-gate 		lock_request->l_state &= ~REFERENCED_LOCK;
4630Sstevel@tonic-gate 		flk_set_state(lock_request, FLK_DEAD_STATE);
4640Sstevel@tonic-gate 		flk_free_lock(lock_request);
4650Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
4660Sstevel@tonic-gate 		return (0);
4670Sstevel@tonic-gate 
4680Sstevel@tonic-gate 	default:
4690Sstevel@tonic-gate 		error = EINVAL;
4700Sstevel@tonic-gate 		break;
4710Sstevel@tonic-gate 	}
4720Sstevel@tonic-gate 
4730Sstevel@tonic-gate 	/* Clustering: For blocked PXFS locks, return */
4740Sstevel@tonic-gate 	if (error == PXFS_LOCK_BLOCKED) {
4750Sstevel@tonic-gate 		lock_request->l_state &= ~REFERENCED_LOCK;
4760Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
4770Sstevel@tonic-gate 		return (error);
4780Sstevel@tonic-gate 	}
4790Sstevel@tonic-gate 
4800Sstevel@tonic-gate 	/*
4810Sstevel@tonic-gate 	 * Now that we have seen the status of locks in the system for
4820Sstevel@tonic-gate 	 * this vnode we acquire the rwlock if it is an IO_LOCK.
4830Sstevel@tonic-gate 	 */
4840Sstevel@tonic-gate 
4850Sstevel@tonic-gate 	if (IS_IO_LOCK(lock_request)) {
4860Sstevel@tonic-gate 		(void) VOP_RWLOCK(vp,
4870Sstevel@tonic-gate 			(lock_request->l_type == F_RDLCK) ?
4880Sstevel@tonic-gate 				V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
4890Sstevel@tonic-gate 		if (!error) {
4900Sstevel@tonic-gate 			lckdat->l_type = F_UNLCK;
4910Sstevel@tonic-gate 
4920Sstevel@tonic-gate 			/*
4930Sstevel@tonic-gate 			 * This wake up is needed otherwise
4940Sstevel@tonic-gate 			 * if IO_LOCK has slept the dependents on this
4950Sstevel@tonic-gate 			 * will not be woken up at all. (bugid # 1185482).
4960Sstevel@tonic-gate 			 */
4970Sstevel@tonic-gate 
4980Sstevel@tonic-gate 			flk_wakeup(lock_request, 1);
4990Sstevel@tonic-gate 			flk_set_state(lock_request, FLK_DEAD_STATE);
5000Sstevel@tonic-gate 			flk_free_lock(lock_request);
5010Sstevel@tonic-gate 		}
5020Sstevel@tonic-gate 		/*
5030Sstevel@tonic-gate 		 * else if error had occurred either flk_process_request()
5040Sstevel@tonic-gate 		 * has returned EDEADLK in which case there will be no
5050Sstevel@tonic-gate 		 * dependents for this lock or EINTR from flk_wait_execute_
5060Sstevel@tonic-gate 		 * request() in which case flk_cancel_sleeping_lock()
5070Sstevel@tonic-gate 		 * would have been done. same is true with EBADF.
5080Sstevel@tonic-gate 		 */
5090Sstevel@tonic-gate 	}
5100Sstevel@tonic-gate 
5110Sstevel@tonic-gate 	if (lock_request == &stack_lock_request) {
5120Sstevel@tonic-gate 		flk_set_state(lock_request, FLK_DEAD_STATE);
5130Sstevel@tonic-gate 	} else {
5140Sstevel@tonic-gate 		lock_request->l_state &= ~REFERENCED_LOCK;
5150Sstevel@tonic-gate 		if ((error != 0) || IS_DELETED(lock_request)) {
5160Sstevel@tonic-gate 			flk_set_state(lock_request, FLK_DEAD_STATE);
5170Sstevel@tonic-gate 			flk_free_lock(lock_request);
5180Sstevel@tonic-gate 		}
5190Sstevel@tonic-gate 	}
5200Sstevel@tonic-gate 
5210Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
5220Sstevel@tonic-gate 	return (error);
5230Sstevel@tonic-gate 
5240Sstevel@tonic-gate done:
5250Sstevel@tonic-gate 	flk_set_state(lock_request, FLK_DEAD_STATE);
5260Sstevel@tonic-gate 	if (lock_request != &stack_lock_request)
5270Sstevel@tonic-gate 		flk_free_lock(lock_request);
5280Sstevel@tonic-gate 	return (error);
5290Sstevel@tonic-gate }
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate /*
5320Sstevel@tonic-gate  * Invoke the callbacks in the given list.  If before sleeping, invoke in
5330Sstevel@tonic-gate  * list order.  If after sleeping, invoke in reverse order.
5340Sstevel@tonic-gate  *
5350Sstevel@tonic-gate  * CPR (suspend/resume) support: if one of the callbacks returns a
5360Sstevel@tonic-gate  * callb_cpr_t, return it.   This will be used to make the thread CPR-safe
5370Sstevel@tonic-gate  * while it is sleeping.  There should be at most one callb_cpr_t for the
5380Sstevel@tonic-gate  * thread.
5390Sstevel@tonic-gate  * XXX This is unnecessarily complicated.  The CPR information should just
5400Sstevel@tonic-gate  * get passed in directly through VOP_FRLOCK and reclock, rather than
5410Sstevel@tonic-gate  * sneaking it in via a callback.
5420Sstevel@tonic-gate  */
5430Sstevel@tonic-gate 
5440Sstevel@tonic-gate callb_cpr_t *
flk_invoke_callbacks(flk_callback_t * cblist,flk_cb_when_t when)5450Sstevel@tonic-gate flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when)
5460Sstevel@tonic-gate {
5470Sstevel@tonic-gate 	callb_cpr_t *cpr_callbackp = NULL;
5480Sstevel@tonic-gate 	callb_cpr_t *one_result;
5490Sstevel@tonic-gate 	flk_callback_t *cb;
5500Sstevel@tonic-gate 
5510Sstevel@tonic-gate 	if (cblist == NULL)
5520Sstevel@tonic-gate 		return (NULL);
5530Sstevel@tonic-gate 
5540Sstevel@tonic-gate 	if (when == FLK_BEFORE_SLEEP) {
5550Sstevel@tonic-gate 		cb = cblist;
5560Sstevel@tonic-gate 		do {
5570Sstevel@tonic-gate 			one_result = (*cb->cb_callback)(when, cb->cb_data);
5580Sstevel@tonic-gate 			if (one_result != NULL) {
5590Sstevel@tonic-gate 				ASSERT(cpr_callbackp == NULL);
5600Sstevel@tonic-gate 				cpr_callbackp = one_result;
5610Sstevel@tonic-gate 			}
5620Sstevel@tonic-gate 			cb = cb->cb_next;
5630Sstevel@tonic-gate 		} while (cb != cblist);
5640Sstevel@tonic-gate 	} else {
5650Sstevel@tonic-gate 		cb = cblist->cb_prev;
5660Sstevel@tonic-gate 		do {
5670Sstevel@tonic-gate 			one_result = (*cb->cb_callback)(when, cb->cb_data);
5680Sstevel@tonic-gate 			if (one_result != NULL) {
5690Sstevel@tonic-gate 				cpr_callbackp = one_result;
5700Sstevel@tonic-gate 			}
5710Sstevel@tonic-gate 			cb = cb->cb_prev;
5720Sstevel@tonic-gate 		} while (cb != cblist->cb_prev);
5730Sstevel@tonic-gate 	}
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate 	return (cpr_callbackp);
5760Sstevel@tonic-gate }
5770Sstevel@tonic-gate 
5780Sstevel@tonic-gate /*
5790Sstevel@tonic-gate  * Initialize a flk_callback_t to hold the given callback.
5800Sstevel@tonic-gate  */
5810Sstevel@tonic-gate 
5820Sstevel@tonic-gate void
flk_init_callback(flk_callback_t * flk_cb,callb_cpr_t * (* cb_fcn)(flk_cb_when_t,void *),void * cbdata)5830Sstevel@tonic-gate flk_init_callback(flk_callback_t *flk_cb,
5840Sstevel@tonic-gate 	callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata)
5850Sstevel@tonic-gate {
5860Sstevel@tonic-gate 	flk_cb->cb_next = flk_cb;
5870Sstevel@tonic-gate 	flk_cb->cb_prev = flk_cb;
5880Sstevel@tonic-gate 	flk_cb->cb_callback = cb_fcn;
5890Sstevel@tonic-gate 	flk_cb->cb_data = cbdata;
5900Sstevel@tonic-gate }
5910Sstevel@tonic-gate 
5920Sstevel@tonic-gate /*
5930Sstevel@tonic-gate  * Initialize an flk_callback_t and then link it into the head of an
5940Sstevel@tonic-gate  * existing list (which may be NULL).
5950Sstevel@tonic-gate  */
5960Sstevel@tonic-gate 
5970Sstevel@tonic-gate void
flk_add_callback(flk_callback_t * newcb,callb_cpr_t * (* cb_fcn)(flk_cb_when_t,void *),void * cbdata,flk_callback_t * cblist)5980Sstevel@tonic-gate flk_add_callback(flk_callback_t *newcb,
5990Sstevel@tonic-gate 		callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *),
6000Sstevel@tonic-gate 		void *cbdata, flk_callback_t *cblist)
6010Sstevel@tonic-gate {
6020Sstevel@tonic-gate 	flk_init_callback(newcb, cb_fcn, cbdata);
6030Sstevel@tonic-gate 
6040Sstevel@tonic-gate 	if (cblist == NULL)
6050Sstevel@tonic-gate 		return;
6060Sstevel@tonic-gate 
6070Sstevel@tonic-gate 	newcb->cb_prev = cblist->cb_prev;
6080Sstevel@tonic-gate 	newcb->cb_next = cblist;
6090Sstevel@tonic-gate 	cblist->cb_prev->cb_next = newcb;
6100Sstevel@tonic-gate 	cblist->cb_prev = newcb;
6110Sstevel@tonic-gate }
6120Sstevel@tonic-gate /* ONC_PLUS EXTRACT END */
6130Sstevel@tonic-gate 
6140Sstevel@tonic-gate /*
6150Sstevel@tonic-gate  * Initialize the flk_edge_cache data structure and create the
6160Sstevel@tonic-gate  * nlm_reg_status array.
6170Sstevel@tonic-gate  */
6180Sstevel@tonic-gate 
6190Sstevel@tonic-gate void
flk_init(void)6200Sstevel@tonic-gate flk_init(void)
6210Sstevel@tonic-gate {
6220Sstevel@tonic-gate 	uint_t	i;
6230Sstevel@tonic-gate 
6240Sstevel@tonic-gate 	flk_edge_cache = kmem_cache_create("flk_edges",
6250Sstevel@tonic-gate 		sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0);
6260Sstevel@tonic-gate 	if (flk_edge_cache == NULL) {
6270Sstevel@tonic-gate 		cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n");
6280Sstevel@tonic-gate 	}
6290Sstevel@tonic-gate 	/*
6300Sstevel@tonic-gate 	 * Create the NLM registry object.
6310Sstevel@tonic-gate 	 */
6320Sstevel@tonic-gate 
6330Sstevel@tonic-gate 	if (cluster_bootflags & CLUSTER_BOOTED) {
6340Sstevel@tonic-gate 		/*
6350Sstevel@tonic-gate 		 * This routine tells you the maximum node id that will be used
6360Sstevel@tonic-gate 		 * in the cluster.  This number will be the size of the nlm
6370Sstevel@tonic-gate 		 * registry status array.  We add 1 because we will be using
6380Sstevel@tonic-gate 		 * all entries indexed from 0 to maxnodeid; e.g., from 0
6390Sstevel@tonic-gate 		 * to 64, for a total of 65 entries.
6400Sstevel@tonic-gate 		 */
6410Sstevel@tonic-gate 		nlm_status_size = clconf_maximum_nodeid() + 1;
6420Sstevel@tonic-gate 	} else {
6430Sstevel@tonic-gate 		nlm_status_size = 0;
6440Sstevel@tonic-gate 	}
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate 	if (nlm_status_size != 0) {	/* booted as a cluster */
6470Sstevel@tonic-gate 		nlm_reg_status = (flk_nlm_status_t *)
6480Sstevel@tonic-gate 			kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size,
6490Sstevel@tonic-gate 				KM_SLEEP);
6500Sstevel@tonic-gate 
6510Sstevel@tonic-gate 		/* initialize all NLM states in array to NLM_UNKNOWN */
6520Sstevel@tonic-gate 		for (i = 0; i < nlm_status_size; i++) {
6530Sstevel@tonic-gate 			nlm_reg_status[i] = FLK_NLM_UNKNOWN;
6540Sstevel@tonic-gate 		}
6550Sstevel@tonic-gate 	}
6560Sstevel@tonic-gate }
6570Sstevel@tonic-gate 
6580Sstevel@tonic-gate /*
6590Sstevel@tonic-gate  * Zone constructor/destructor callbacks to be executed when a zone is
6600Sstevel@tonic-gate  * created/destroyed.
6610Sstevel@tonic-gate  */
6620Sstevel@tonic-gate /* ARGSUSED */
6630Sstevel@tonic-gate void *
flk_zone_init(zoneid_t zoneid)6640Sstevel@tonic-gate flk_zone_init(zoneid_t zoneid)
6650Sstevel@tonic-gate {
6660Sstevel@tonic-gate 	struct flock_globals *fg;
6670Sstevel@tonic-gate 	uint_t i;
6680Sstevel@tonic-gate 
6690Sstevel@tonic-gate 	fg = kmem_alloc(sizeof (*fg), KM_SLEEP);
6700Sstevel@tonic-gate 	fg->flk_lockmgr_status = FLK_LOCKMGR_UP;
6710Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++)
6720Sstevel@tonic-gate 		fg->lockmgr_status[i] = FLK_LOCKMGR_UP;
6730Sstevel@tonic-gate 	return (fg);
6740Sstevel@tonic-gate }
6750Sstevel@tonic-gate 
6760Sstevel@tonic-gate /* ARGSUSED */
6770Sstevel@tonic-gate void
flk_zone_fini(zoneid_t zoneid,void * data)6780Sstevel@tonic-gate flk_zone_fini(zoneid_t zoneid, void *data)
6790Sstevel@tonic-gate {
6800Sstevel@tonic-gate 	struct flock_globals *fg = data;
6810Sstevel@tonic-gate 
6820Sstevel@tonic-gate 	kmem_free(fg, sizeof (*fg));
6830Sstevel@tonic-gate }
6840Sstevel@tonic-gate 
6850Sstevel@tonic-gate /*
686*5331Samw  * Get a lock_descriptor structure with initialization of edge lists.
6870Sstevel@tonic-gate  */
6880Sstevel@tonic-gate 
6890Sstevel@tonic-gate static lock_descriptor_t *
flk_get_lock(void)6900Sstevel@tonic-gate flk_get_lock(void)
6910Sstevel@tonic-gate {
6920Sstevel@tonic-gate 	lock_descriptor_t	*l;
6930Sstevel@tonic-gate 
6940Sstevel@tonic-gate 	l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP);
6950Sstevel@tonic-gate 
6960Sstevel@tonic-gate 	cv_init(&l->l_cv, NULL, CV_DRIVER, NULL);
6970Sstevel@tonic-gate 	l->l_edge.edge_in_next = &l->l_edge;
6980Sstevel@tonic-gate 	l->l_edge.edge_in_prev = &l->l_edge;
6990Sstevel@tonic-gate 	l->l_edge.edge_adj_next = &l->l_edge;
7000Sstevel@tonic-gate 	l->l_edge.edge_adj_prev = &l->l_edge;
7010Sstevel@tonic-gate 	l->pvertex = -1;
7020Sstevel@tonic-gate 	l->l_status = FLK_INITIAL_STATE;
7030Sstevel@tonic-gate 	flk_lock_allocs++;
7040Sstevel@tonic-gate 	return (l);
7050Sstevel@tonic-gate }
7060Sstevel@tonic-gate 
7070Sstevel@tonic-gate /*
7080Sstevel@tonic-gate  * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag
7090Sstevel@tonic-gate  * when some thread has a reference to it as in reclock().
7100Sstevel@tonic-gate  */
7110Sstevel@tonic-gate 
7120Sstevel@tonic-gate void
flk_free_lock(lock_descriptor_t * lock)7130Sstevel@tonic-gate flk_free_lock(lock_descriptor_t	*lock)
7140Sstevel@tonic-gate {
7150Sstevel@tonic-gate 	ASSERT(IS_DEAD(lock));
7160Sstevel@tonic-gate 	if (IS_REFERENCED(lock)) {
7170Sstevel@tonic-gate 		lock->l_state |= DELETED_LOCK;
7180Sstevel@tonic-gate 		return;
7190Sstevel@tonic-gate 	}
7200Sstevel@tonic-gate 	flk_lock_frees++;
7210Sstevel@tonic-gate 	kmem_free((void *)lock, sizeof (lock_descriptor_t));
7220Sstevel@tonic-gate }
7230Sstevel@tonic-gate 
7240Sstevel@tonic-gate void
flk_set_state(lock_descriptor_t * lock,int new_state)7250Sstevel@tonic-gate flk_set_state(lock_descriptor_t *lock, int new_state)
7260Sstevel@tonic-gate {
7270Sstevel@tonic-gate 	/*
7280Sstevel@tonic-gate 	 * Locks in the sleeping list may be woken up in a number of ways,
729*5331Samw 	 * and more than once.  If a sleeping lock is signaled awake more
7300Sstevel@tonic-gate 	 * than once, then it may or may not change state depending on its
7310Sstevel@tonic-gate 	 * current state.
7320Sstevel@tonic-gate 	 * Also note that NLM locks that are sleeping could be moved to an
7330Sstevel@tonic-gate 	 * interrupted state more than once if the unlock request is
7340Sstevel@tonic-gate 	 * retransmitted by the NLM client - the second time around, this is
7350Sstevel@tonic-gate 	 * just a nop.
736*5331Samw 	 * The ordering of being signaled awake is:
7370Sstevel@tonic-gate 	 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE.
7380Sstevel@tonic-gate 	 * The checks below implement this ordering.
7390Sstevel@tonic-gate 	 */
7400Sstevel@tonic-gate 	if (IS_INTERRUPTED(lock)) {
7410Sstevel@tonic-gate 		if ((new_state == FLK_CANCELLED_STATE) ||
7420Sstevel@tonic-gate 		    (new_state == FLK_GRANTED_STATE) ||
7430Sstevel@tonic-gate 		    (new_state == FLK_INTERRUPTED_STATE)) {
7440Sstevel@tonic-gate 			return;
7450Sstevel@tonic-gate 		}
7460Sstevel@tonic-gate 	}
7470Sstevel@tonic-gate 	if (IS_CANCELLED(lock)) {
7480Sstevel@tonic-gate 		if ((new_state == FLK_GRANTED_STATE) ||
7490Sstevel@tonic-gate 		    (new_state == FLK_CANCELLED_STATE)) {
7500Sstevel@tonic-gate 			return;
7510Sstevel@tonic-gate 		}
7520Sstevel@tonic-gate 	}
7530Sstevel@tonic-gate 	CHECK_LOCK_TRANSITION(lock->l_status, new_state);
7540Sstevel@tonic-gate 	if (IS_PXFS(lock)) {
7550Sstevel@tonic-gate 		cl_flk_state_transition_notify(lock, lock->l_status, new_state);
7560Sstevel@tonic-gate 	}
7570Sstevel@tonic-gate 	lock->l_status = new_state;
7580Sstevel@tonic-gate }
7590Sstevel@tonic-gate 
7600Sstevel@tonic-gate /*
7610Sstevel@tonic-gate  * Routine that checks whether there are any blocking locks in the system.
7620Sstevel@tonic-gate  *
7630Sstevel@tonic-gate  * The policy followed is if a write lock is sleeping we don't allow read
7640Sstevel@tonic-gate  * locks before this write lock even though there may not be any active
7650Sstevel@tonic-gate  * locks corresponding to the read locks' region.
7660Sstevel@tonic-gate  *
7670Sstevel@tonic-gate  * flk_add_edge() function adds an edge between l1 and l2 iff there
7680Sstevel@tonic-gate  * is no path between l1 and l2. This is done to have a "minimum
7690Sstevel@tonic-gate  * storage representation" of the dependency graph.
7700Sstevel@tonic-gate  *
7710Sstevel@tonic-gate  * Another property of the graph is since only the new request throws
7720Sstevel@tonic-gate  * edges to the existing locks in the graph, the graph is always topologically
7730Sstevel@tonic-gate  * ordered.
7740Sstevel@tonic-gate  */
7750Sstevel@tonic-gate 
7760Sstevel@tonic-gate static int
flk_process_request(lock_descriptor_t * request)7770Sstevel@tonic-gate flk_process_request(lock_descriptor_t *request)
7780Sstevel@tonic-gate {
7790Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
7800Sstevel@tonic-gate 	lock_descriptor_t *lock;
7810Sstevel@tonic-gate 	int request_blocked_by_active = 0;
7820Sstevel@tonic-gate 	int request_blocked_by_granted = 0;
7830Sstevel@tonic-gate 	int request_blocked_by_sleeping = 0;
7840Sstevel@tonic-gate 	vnode_t	*vp = request->l_vnode;
7850Sstevel@tonic-gate 	int	error = 0;
7860Sstevel@tonic-gate 	int request_will_wait = 0;
7870Sstevel@tonic-gate 	int found_covering_lock = 0;
7880Sstevel@tonic-gate 	lock_descriptor_t *covered_by = NULL;
7890Sstevel@tonic-gate 
7900Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
7910Sstevel@tonic-gate 	request_will_wait = IS_WILLING_TO_SLEEP(request);
7920Sstevel@tonic-gate 
7930Sstevel@tonic-gate 	/*
7940Sstevel@tonic-gate 	 * check active locks
7950Sstevel@tonic-gate 	 */
7960Sstevel@tonic-gate 
7970Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
7980Sstevel@tonic-gate 
7990Sstevel@tonic-gate 
8000Sstevel@tonic-gate 	if (lock) {
8010Sstevel@tonic-gate 		do {
8020Sstevel@tonic-gate 			if (BLOCKS(lock, request)) {
8030Sstevel@tonic-gate 				if (!request_will_wait)
8040Sstevel@tonic-gate 					return (EAGAIN);
8050Sstevel@tonic-gate 				request_blocked_by_active = 1;
8060Sstevel@tonic-gate 				break;
8070Sstevel@tonic-gate 			}
8080Sstevel@tonic-gate 			/*
8090Sstevel@tonic-gate 			 * Grant lock if it is for the same owner holding active
8100Sstevel@tonic-gate 			 * lock that covers the request.
8110Sstevel@tonic-gate 			 */
8120Sstevel@tonic-gate 
8130Sstevel@tonic-gate 			if (SAME_OWNER(lock, request) &&
8140Sstevel@tonic-gate 					COVERS(lock, request) &&
8150Sstevel@tonic-gate 						(request->l_type == F_RDLCK))
8160Sstevel@tonic-gate 				return (flk_execute_request(request));
8170Sstevel@tonic-gate 			lock = lock->l_next;
8180Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
8190Sstevel@tonic-gate 	}
8200Sstevel@tonic-gate 
8210Sstevel@tonic-gate 	if (!request_blocked_by_active) {
8220Sstevel@tonic-gate 			lock_descriptor_t *lk[1];
8230Sstevel@tonic-gate 			lock_descriptor_t *first_glock = NULL;
8240Sstevel@tonic-gate 		/*
8250Sstevel@tonic-gate 		 * Shall we grant this?! NO!!
8260Sstevel@tonic-gate 		 * What about those locks that were just granted and still
8270Sstevel@tonic-gate 		 * in sleep queue. Those threads are woken up and so locks
8280Sstevel@tonic-gate 		 * are almost active.
8290Sstevel@tonic-gate 		 */
8300Sstevel@tonic-gate 		SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
8310Sstevel@tonic-gate 		if (lock) {
8320Sstevel@tonic-gate 			do {
8330Sstevel@tonic-gate 				if (BLOCKS(lock, request)) {
8340Sstevel@tonic-gate 					if (IS_GRANTED(lock)) {
8350Sstevel@tonic-gate 						request_blocked_by_granted = 1;
8360Sstevel@tonic-gate 					} else {
8370Sstevel@tonic-gate 						request_blocked_by_sleeping = 1;
8380Sstevel@tonic-gate 					}
8390Sstevel@tonic-gate 				}
8400Sstevel@tonic-gate 
8410Sstevel@tonic-gate 				lock = lock->l_next;
8420Sstevel@tonic-gate 			} while ((lock->l_vnode == vp));
8430Sstevel@tonic-gate 			first_glock = lock->l_prev;
8440Sstevel@tonic-gate 			ASSERT(first_glock->l_vnode == vp);
8450Sstevel@tonic-gate 		}
8460Sstevel@tonic-gate 
8470Sstevel@tonic-gate 		if (request_blocked_by_granted)
8480Sstevel@tonic-gate 			goto block;
8490Sstevel@tonic-gate 
8500Sstevel@tonic-gate 		if (!request_blocked_by_sleeping) {
8510Sstevel@tonic-gate 			/*
8520Sstevel@tonic-gate 			 * If the request isn't going to be blocked by a
8530Sstevel@tonic-gate 			 * sleeping request, we know that it isn't going to
8540Sstevel@tonic-gate 			 * be blocked; we can just execute the request --
8550Sstevel@tonic-gate 			 * without performing costly deadlock detection.
8560Sstevel@tonic-gate 			 */
8570Sstevel@tonic-gate 			ASSERT(!request_blocked_by_active);
8580Sstevel@tonic-gate 			return (flk_execute_request(request));
8590Sstevel@tonic-gate 		} else if (request->l_type == F_RDLCK) {
8600Sstevel@tonic-gate 			/*
8610Sstevel@tonic-gate 			 * If we have a sleeping writer in the requested
8620Sstevel@tonic-gate 			 * lock's range, block.
8630Sstevel@tonic-gate 			 */
8640Sstevel@tonic-gate 			goto block;
8650Sstevel@tonic-gate 		}
8660Sstevel@tonic-gate 
8670Sstevel@tonic-gate 		lk[0] = request;
8680Sstevel@tonic-gate 		request->l_state |= RECOMPUTE_LOCK;
8690Sstevel@tonic-gate 		SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
8700Sstevel@tonic-gate 		if (lock) {
8710Sstevel@tonic-gate 			do {
8720Sstevel@tonic-gate 				flk_recompute_dependencies(lock, lk, 1, 0);
8730Sstevel@tonic-gate 				lock = lock->l_next;
8740Sstevel@tonic-gate 			} while (lock->l_vnode == vp);
8750Sstevel@tonic-gate 		}
8760Sstevel@tonic-gate 		lock = first_glock;
8770Sstevel@tonic-gate 		if (lock) {
8780Sstevel@tonic-gate 			do {
8790Sstevel@tonic-gate 				if (IS_GRANTED(lock)) {
8800Sstevel@tonic-gate 				flk_recompute_dependencies(lock, lk, 1, 0);
8810Sstevel@tonic-gate 				}
8820Sstevel@tonic-gate 				lock = lock->l_prev;
8830Sstevel@tonic-gate 			} while ((lock->l_vnode == vp));
8840Sstevel@tonic-gate 		}
8850Sstevel@tonic-gate 		request->l_state &= ~RECOMPUTE_LOCK;
8860Sstevel@tonic-gate 		if (!NO_DEPENDENTS(request) && flk_check_deadlock(request))
8870Sstevel@tonic-gate 			return (EDEADLK);
8880Sstevel@tonic-gate 		return (flk_execute_request(request));
8890Sstevel@tonic-gate 	}
8900Sstevel@tonic-gate 
8910Sstevel@tonic-gate block:
8920Sstevel@tonic-gate 	if (request_will_wait)
8930Sstevel@tonic-gate 		flk_graph_uncolor(gp);
8940Sstevel@tonic-gate 
8950Sstevel@tonic-gate 	/* check sleeping locks */
8960Sstevel@tonic-gate 
8970Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
8980Sstevel@tonic-gate 
8990Sstevel@tonic-gate 	/*
9000Sstevel@tonic-gate 	 * If we find a sleeping write lock that is a superset of the
9010Sstevel@tonic-gate 	 * region wanted by request we can be assured that by adding an
9020Sstevel@tonic-gate 	 * edge to this write lock we have paths to all locks in the
9030Sstevel@tonic-gate 	 * graph that blocks the request except in one case and that is why
9040Sstevel@tonic-gate 	 * another check for SAME_OWNER in the loop below. The exception
9050Sstevel@tonic-gate 	 * case is when this process that owns the sleeping write lock 'l1'
9060Sstevel@tonic-gate 	 * has other locks l2, l3, l4 that are in the system and arrived
9070Sstevel@tonic-gate 	 * before l1. l1 does not have path to these locks as they are from
9080Sstevel@tonic-gate 	 * same process. We break when we find a second covering sleeping
9090Sstevel@tonic-gate 	 * lock l5 owned by a process different from that owning l1, because
9100Sstevel@tonic-gate 	 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if
9110Sstevel@tonic-gate 	 * it has l1 would have produced a deadlock already.
9120Sstevel@tonic-gate 	 */
9130Sstevel@tonic-gate 
9140Sstevel@tonic-gate 	if (lock) {
9150Sstevel@tonic-gate 		do {
9160Sstevel@tonic-gate 			if (BLOCKS(lock, request)) {
9170Sstevel@tonic-gate 				if (!request_will_wait)
9180Sstevel@tonic-gate 					return (EAGAIN);
9190Sstevel@tonic-gate 				if (COVERS(lock, request) &&
9200Sstevel@tonic-gate 						lock->l_type == F_WRLCK) {
9210Sstevel@tonic-gate 					if (found_covering_lock &&
9220Sstevel@tonic-gate 					    !SAME_OWNER(lock, covered_by)) {
9230Sstevel@tonic-gate 						found_covering_lock++;
9240Sstevel@tonic-gate 						break;
9250Sstevel@tonic-gate 					}
9260Sstevel@tonic-gate 					found_covering_lock = 1;
9270Sstevel@tonic-gate 					covered_by = lock;
9280Sstevel@tonic-gate 				}
9290Sstevel@tonic-gate 				if (found_covering_lock &&
9300Sstevel@tonic-gate 					!SAME_OWNER(lock, covered_by)) {
9310Sstevel@tonic-gate 					lock = lock->l_next;
9320Sstevel@tonic-gate 					continue;
9330Sstevel@tonic-gate 				}
9340Sstevel@tonic-gate 				if ((error = flk_add_edge(request, lock,
9350Sstevel@tonic-gate 						!found_covering_lock, 0)))
9360Sstevel@tonic-gate 					return (error);
9370Sstevel@tonic-gate 			}
9380Sstevel@tonic-gate 			lock = lock->l_next;
9390Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
9400Sstevel@tonic-gate 	}
9410Sstevel@tonic-gate 
9420Sstevel@tonic-gate /*
9430Sstevel@tonic-gate  * found_covering_lock == 2 iff at this point 'request' has paths
9440Sstevel@tonic-gate  * to all locks that blocks 'request'. found_covering_lock == 1 iff at this
9450Sstevel@tonic-gate  * point 'request' has paths to all locks that blocks 'request' whose owners
9460Sstevel@tonic-gate  * are not same as the one that covers 'request' (covered_by above) and
9470Sstevel@tonic-gate  * we can have locks whose owner is same as covered_by in the active list.
9480Sstevel@tonic-gate  */
9490Sstevel@tonic-gate 
9500Sstevel@tonic-gate 	if (request_blocked_by_active && found_covering_lock != 2) {
9510Sstevel@tonic-gate 		SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
9520Sstevel@tonic-gate 		ASSERT(lock != NULL);
9530Sstevel@tonic-gate 		do {
9540Sstevel@tonic-gate 			if (BLOCKS(lock, request)) {
9550Sstevel@tonic-gate 				if (found_covering_lock &&
9560Sstevel@tonic-gate 					!SAME_OWNER(lock, covered_by)) {
9570Sstevel@tonic-gate 					lock = lock->l_next;
9580Sstevel@tonic-gate 					continue;
9590Sstevel@tonic-gate 				}
9600Sstevel@tonic-gate 				if ((error = flk_add_edge(request, lock,
9610Sstevel@tonic-gate 							CHECK_CYCLE, 0)))
9620Sstevel@tonic-gate 					return (error);
9630Sstevel@tonic-gate 			}
9640Sstevel@tonic-gate 			lock = lock->l_next;
9650Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
9660Sstevel@tonic-gate 	}
9670Sstevel@tonic-gate 
9680Sstevel@tonic-gate 	if (NOT_BLOCKED(request)) {
9690Sstevel@tonic-gate 		/*
9700Sstevel@tonic-gate 		 * request not dependent on any other locks
9710Sstevel@tonic-gate 		 * so execute this request
9720Sstevel@tonic-gate 		 */
9730Sstevel@tonic-gate 		return (flk_execute_request(request));
9740Sstevel@tonic-gate 	} else {
9750Sstevel@tonic-gate 		/*
9760Sstevel@tonic-gate 		 * check for deadlock
9770Sstevel@tonic-gate 		 */
9780Sstevel@tonic-gate 		if (flk_check_deadlock(request))
9790Sstevel@tonic-gate 			return (EDEADLK);
9800Sstevel@tonic-gate 		/*
9810Sstevel@tonic-gate 		 * this thread has to sleep
9820Sstevel@tonic-gate 		 */
9830Sstevel@tonic-gate 		return (flk_wait_execute_request(request));
9840Sstevel@tonic-gate 	}
9850Sstevel@tonic-gate }
9860Sstevel@tonic-gate 
9870Sstevel@tonic-gate /* ONC_PLUS EXTRACT START */
9880Sstevel@tonic-gate /*
9890Sstevel@tonic-gate  * The actual execution of the request in the simple case is only to
9900Sstevel@tonic-gate  * insert the 'request' in the list of active locks if it is not an
9910Sstevel@tonic-gate  * UNLOCK.
9920Sstevel@tonic-gate  * We have to consider the existing active locks' relation to
9930Sstevel@tonic-gate  * this 'request' if they are owned by same process. flk_relation() does
9940Sstevel@tonic-gate  * this job and sees to that the dependency graph information is maintained
9950Sstevel@tonic-gate  * properly.
9960Sstevel@tonic-gate  */
9970Sstevel@tonic-gate 
9980Sstevel@tonic-gate int
flk_execute_request(lock_descriptor_t * request)9990Sstevel@tonic-gate flk_execute_request(lock_descriptor_t *request)
10000Sstevel@tonic-gate {
10010Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
10020Sstevel@tonic-gate 	vnode_t	*vp = request->l_vnode;
10030Sstevel@tonic-gate 	lock_descriptor_t	*lock, *lock1;
10040Sstevel@tonic-gate 	int done_searching = 0;
10050Sstevel@tonic-gate 
10060Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
10070Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
10080Sstevel@tonic-gate 
10090Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
10100Sstevel@tonic-gate 
10110Sstevel@tonic-gate 	flk_set_state(request, FLK_START_STATE);
10120Sstevel@tonic-gate 
10130Sstevel@tonic-gate 	ASSERT(NOT_BLOCKED(request));
10140Sstevel@tonic-gate 
10150Sstevel@tonic-gate 	/* IO_LOCK requests are only to check status */
10160Sstevel@tonic-gate 
10170Sstevel@tonic-gate 	if (IS_IO_LOCK(request))
10180Sstevel@tonic-gate 		return (0);
10190Sstevel@tonic-gate 
10200Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
10210Sstevel@tonic-gate 
10220Sstevel@tonic-gate 	if (lock == NULL && request->l_type == F_UNLCK)
10230Sstevel@tonic-gate 		return (0);
10240Sstevel@tonic-gate 	if (lock == NULL) {
10250Sstevel@tonic-gate 		flk_insert_active_lock(request);
10260Sstevel@tonic-gate 		return (0);
10270Sstevel@tonic-gate 	}
10280Sstevel@tonic-gate 
10290Sstevel@tonic-gate 	do {
10300Sstevel@tonic-gate 		lock1 = lock->l_next;
10310Sstevel@tonic-gate 		if (SAME_OWNER(request, lock)) {
10320Sstevel@tonic-gate 			done_searching = flk_relation(lock, request);
10330Sstevel@tonic-gate 		}
10340Sstevel@tonic-gate 		lock = lock1;
10350Sstevel@tonic-gate 	} while (lock->l_vnode == vp && !done_searching);
10360Sstevel@tonic-gate 
10370Sstevel@tonic-gate 	/*
10380Sstevel@tonic-gate 	 * insert in active queue
10390Sstevel@tonic-gate 	 */
10400Sstevel@tonic-gate 
10410Sstevel@tonic-gate 	if (request->l_type != F_UNLCK)
10420Sstevel@tonic-gate 		flk_insert_active_lock(request);
10430Sstevel@tonic-gate 
10440Sstevel@tonic-gate 	return (0);
10450Sstevel@tonic-gate }
10460Sstevel@tonic-gate /* ONC_PLUS EXTRACT END */
10470Sstevel@tonic-gate 
10480Sstevel@tonic-gate /*
10490Sstevel@tonic-gate  * 'request' is blocked by some one therefore we put it into sleep queue.
10500Sstevel@tonic-gate  */
10510Sstevel@tonic-gate static int
flk_wait_execute_request(lock_descriptor_t * request)10520Sstevel@tonic-gate flk_wait_execute_request(lock_descriptor_t *request)
10530Sstevel@tonic-gate {
10540Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
10550Sstevel@tonic-gate 	callb_cpr_t 	*cprp;		/* CPR info from callback */
10560Sstevel@tonic-gate 	struct flock_globals *fg;
10570Sstevel@tonic-gate 	int index;
10580Sstevel@tonic-gate 
10590Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
10600Sstevel@tonic-gate 	ASSERT(IS_WILLING_TO_SLEEP(request));
10610Sstevel@tonic-gate 
10620Sstevel@tonic-gate 	flk_insert_sleeping_lock(request);
10630Sstevel@tonic-gate 
10640Sstevel@tonic-gate 	if (IS_LOCKMGR(request)) {
10650Sstevel@tonic-gate 		index = HASH_INDEX(request->l_vnode);
10660Sstevel@tonic-gate 		fg = flk_get_globals();
10670Sstevel@tonic-gate 
10680Sstevel@tonic-gate 		if (nlm_status_size == 0) {	/* not booted as a cluster */
10690Sstevel@tonic-gate 			if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) {
10700Sstevel@tonic-gate 				flk_cancel_sleeping_lock(request, 1);
10710Sstevel@tonic-gate 				return (ENOLCK);
10720Sstevel@tonic-gate 			}
10730Sstevel@tonic-gate 		} else {			/* booted as a cluster */
10740Sstevel@tonic-gate 			/*
10750Sstevel@tonic-gate 			 * If the request is an NLM server lock request,
10760Sstevel@tonic-gate 			 * and the NLM state of the lock request is not
10770Sstevel@tonic-gate 			 * NLM_UP (because the NLM server is shutting
10780Sstevel@tonic-gate 			 * down), then cancel the sleeping lock and
10790Sstevel@tonic-gate 			 * return error ENOLCK that will encourage the
10800Sstevel@tonic-gate 			 * client to retransmit.
10810Sstevel@tonic-gate 			 */
10820Sstevel@tonic-gate 			if (!IS_NLM_UP(request)) {
10830Sstevel@tonic-gate 				flk_cancel_sleeping_lock(request, 1);
10840Sstevel@tonic-gate 				return (ENOLCK);
10850Sstevel@tonic-gate 			}
10860Sstevel@tonic-gate 		}
10870Sstevel@tonic-gate 	}
10880Sstevel@tonic-gate 
10890Sstevel@tonic-gate 	/* Clustering: For blocking PXFS locks, return */
10900Sstevel@tonic-gate 	if (IS_PXFS(request)) {
10910Sstevel@tonic-gate 		/*
10920Sstevel@tonic-gate 		 * PXFS locks sleep on the client side.
10930Sstevel@tonic-gate 		 * The callback argument is used to wake up the sleeper
10940Sstevel@tonic-gate 		 * when the lock is granted.
10950Sstevel@tonic-gate 		 * We return -1 (rather than an errno value) to indicate
10960Sstevel@tonic-gate 		 * the client side should sleep
10970Sstevel@tonic-gate 		 */
10980Sstevel@tonic-gate 		return (PXFS_LOCK_BLOCKED);
10990Sstevel@tonic-gate 	}
11000Sstevel@tonic-gate 
11010Sstevel@tonic-gate 	if (request->l_callbacks != NULL) {
11020Sstevel@tonic-gate 		/*
11030Sstevel@tonic-gate 		 * To make sure the shutdown code works correctly, either
11040Sstevel@tonic-gate 		 * the callback must happen after putting the lock on the
11050Sstevel@tonic-gate 		 * sleep list, or we must check the shutdown status after
11060Sstevel@tonic-gate 		 * returning from the callback (and before sleeping).  At
11070Sstevel@tonic-gate 		 * least for now, we'll use the first option.  If a
11080Sstevel@tonic-gate 		 * shutdown or signal or whatever happened while the graph
11090Sstevel@tonic-gate 		 * mutex was dropped, that will be detected by
11100Sstevel@tonic-gate 		 * wait_for_lock().
11110Sstevel@tonic-gate 		 */
11120Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
11130Sstevel@tonic-gate 
11140Sstevel@tonic-gate 		cprp = flk_invoke_callbacks(request->l_callbacks,
11150Sstevel@tonic-gate 					    FLK_BEFORE_SLEEP);
11160Sstevel@tonic-gate 
11170Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
11180Sstevel@tonic-gate 
11190Sstevel@tonic-gate 		if (cprp == NULL) {
11200Sstevel@tonic-gate 			wait_for_lock(request);
11210Sstevel@tonic-gate 		} else {
11220Sstevel@tonic-gate 			mutex_enter(cprp->cc_lockp);
11230Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(cprp);
11240Sstevel@tonic-gate 			mutex_exit(cprp->cc_lockp);
11250Sstevel@tonic-gate 			wait_for_lock(request);
11260Sstevel@tonic-gate 			mutex_enter(cprp->cc_lockp);
11270Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp);
11280Sstevel@tonic-gate 			mutex_exit(cprp->cc_lockp);
11290Sstevel@tonic-gate 		}
11300Sstevel@tonic-gate 
11310Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
11320Sstevel@tonic-gate 		(void) flk_invoke_callbacks(request->l_callbacks,
11330Sstevel@tonic-gate 					    FLK_AFTER_SLEEP);
11340Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
11350Sstevel@tonic-gate 	} else {
11360Sstevel@tonic-gate 		wait_for_lock(request);
11370Sstevel@tonic-gate 	}
11380Sstevel@tonic-gate 
11390Sstevel@tonic-gate 	if (IS_LOCKMGR(request)) {
11400Sstevel@tonic-gate 		/*
11410Sstevel@tonic-gate 		 * If the lock manager is shutting down, return an
11420Sstevel@tonic-gate 		 * error that will encourage the client to retransmit.
11430Sstevel@tonic-gate 		 */
11440Sstevel@tonic-gate 		if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP &&
11450Sstevel@tonic-gate 			!IS_GRANTED(request)) {
11460Sstevel@tonic-gate 			flk_cancel_sleeping_lock(request, 1);
11470Sstevel@tonic-gate 			return (ENOLCK);
11480Sstevel@tonic-gate 		}
11490Sstevel@tonic-gate 	}
11500Sstevel@tonic-gate 
11510Sstevel@tonic-gate 	if (IS_INTERRUPTED(request)) {
11520Sstevel@tonic-gate 		/* we got a signal, or act like we did */
11530Sstevel@tonic-gate 		flk_cancel_sleeping_lock(request, 1);
11540Sstevel@tonic-gate 		return (EINTR);
11550Sstevel@tonic-gate 	}
11560Sstevel@tonic-gate 
11570Sstevel@tonic-gate 	/* Cancelled if some other thread has closed the file */
11580Sstevel@tonic-gate 
11590Sstevel@tonic-gate 	if (IS_CANCELLED(request)) {
11600Sstevel@tonic-gate 		flk_cancel_sleeping_lock(request, 1);
11610Sstevel@tonic-gate 		return (EBADF);
11620Sstevel@tonic-gate 	}
11630Sstevel@tonic-gate 
11640Sstevel@tonic-gate 	request->l_state &= ~GRANTED_LOCK;
11650Sstevel@tonic-gate 	REMOVE_SLEEP_QUEUE(request);
11660Sstevel@tonic-gate 	return (flk_execute_request(request));
11670Sstevel@tonic-gate }
11680Sstevel@tonic-gate 
11690Sstevel@tonic-gate /*
11700Sstevel@tonic-gate  * This routine adds an edge between from and to because from depends
11710Sstevel@tonic-gate  * to. If asked to check for deadlock it checks whether there are any
11720Sstevel@tonic-gate  * reachable locks from "from_lock" that is owned by the same process
11730Sstevel@tonic-gate  * as "from_lock".
11740Sstevel@tonic-gate  * NOTE: It is the caller's responsibility to make sure that the color
11750Sstevel@tonic-gate  * of the graph is consistent between the calls to flk_add_edge as done
11760Sstevel@tonic-gate  * in flk_process_request. This routine does not color and check for
11770Sstevel@tonic-gate  * deadlock explicitly.
11780Sstevel@tonic-gate  */
11790Sstevel@tonic-gate 
11800Sstevel@tonic-gate static int
flk_add_edge(lock_descriptor_t * from_lock,lock_descriptor_t * to_lock,int check_cycle,int update_graph)11810Sstevel@tonic-gate flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock,
11820Sstevel@tonic-gate 			int check_cycle, int update_graph)
11830Sstevel@tonic-gate {
11840Sstevel@tonic-gate 	edge_t	*edge;
11850Sstevel@tonic-gate 	edge_t	*ep;
11860Sstevel@tonic-gate 	lock_descriptor_t	*vertex;
11870Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
11880Sstevel@tonic-gate 
11890Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
11900Sstevel@tonic-gate 
11910Sstevel@tonic-gate 	/*
11920Sstevel@tonic-gate 	 * if to vertex already has mark_color just return
11930Sstevel@tonic-gate 	 * don't add an edge as it is reachable from from vertex
11940Sstevel@tonic-gate 	 * before itself.
11950Sstevel@tonic-gate 	 */
11960Sstevel@tonic-gate 
11970Sstevel@tonic-gate 	if (COLORED(to_lock))
11980Sstevel@tonic-gate 		return (0);
11990Sstevel@tonic-gate 
12000Sstevel@tonic-gate 	edge = flk_get_edge();
12010Sstevel@tonic-gate 
12020Sstevel@tonic-gate 	/*
12030Sstevel@tonic-gate 	 * set the from and to vertex
12040Sstevel@tonic-gate 	 */
12050Sstevel@tonic-gate 
12060Sstevel@tonic-gate 	edge->from_vertex = from_lock;
12070Sstevel@tonic-gate 	edge->to_vertex = to_lock;
12080Sstevel@tonic-gate 
12090Sstevel@tonic-gate 	/*
12100Sstevel@tonic-gate 	 * put in adjacency list of from vertex
12110Sstevel@tonic-gate 	 */
12120Sstevel@tonic-gate 
12130Sstevel@tonic-gate 	from_lock->l_edge.edge_adj_next->edge_adj_prev = edge;
12140Sstevel@tonic-gate 	edge->edge_adj_next = from_lock->l_edge.edge_adj_next;
12150Sstevel@tonic-gate 	edge->edge_adj_prev = &from_lock->l_edge;
12160Sstevel@tonic-gate 	from_lock->l_edge.edge_adj_next = edge;
12170Sstevel@tonic-gate 
12180Sstevel@tonic-gate 	/*
12190Sstevel@tonic-gate 	 * put in in list of to vertex
12200Sstevel@tonic-gate 	 */
12210Sstevel@tonic-gate 
12220Sstevel@tonic-gate 	to_lock->l_edge.edge_in_next->edge_in_prev = edge;
12230Sstevel@tonic-gate 	edge->edge_in_next = to_lock->l_edge.edge_in_next;
12240Sstevel@tonic-gate 	to_lock->l_edge.edge_in_next = edge;
12250Sstevel@tonic-gate 	edge->edge_in_prev = &to_lock->l_edge;
12260Sstevel@tonic-gate 
12270Sstevel@tonic-gate 
12280Sstevel@tonic-gate 	if (update_graph) {
12290Sstevel@tonic-gate 		flk_update_proc_graph(edge, 0);
12300Sstevel@tonic-gate 		return (0);
12310Sstevel@tonic-gate 	}
12320Sstevel@tonic-gate 	if (!check_cycle) {
12330Sstevel@tonic-gate 		return (0);
12340Sstevel@tonic-gate 	}
12350Sstevel@tonic-gate 
12360Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, from_lock, l_stack);
12370Sstevel@tonic-gate 
12380Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
12390Sstevel@tonic-gate 
12400Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack);
12410Sstevel@tonic-gate 
12420Sstevel@tonic-gate 		for (ep = FIRST_ADJ(vertex);
12430Sstevel@tonic-gate 			ep != HEAD(vertex);
12440Sstevel@tonic-gate 				ep = NEXT_ADJ(ep)) {
12450Sstevel@tonic-gate 			if (COLORED(ep->to_vertex))
12460Sstevel@tonic-gate 				continue;
12470Sstevel@tonic-gate 			COLOR(ep->to_vertex);
12480Sstevel@tonic-gate 			if (SAME_OWNER(ep->to_vertex, from_lock))
12490Sstevel@tonic-gate 				goto dead_lock;
12500Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
12510Sstevel@tonic-gate 		}
12520Sstevel@tonic-gate 	}
12530Sstevel@tonic-gate 	return (0);
12540Sstevel@tonic-gate 
12550Sstevel@tonic-gate dead_lock:
12560Sstevel@tonic-gate 
12570Sstevel@tonic-gate 	/*
12580Sstevel@tonic-gate 	 * remove all edges
12590Sstevel@tonic-gate 	 */
12600Sstevel@tonic-gate 
12610Sstevel@tonic-gate 	ep = FIRST_ADJ(from_lock);
12620Sstevel@tonic-gate 
12630Sstevel@tonic-gate 	while (ep != HEAD(from_lock)) {
12640Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
12650Sstevel@tonic-gate 		from_lock->l_sedge = NEXT_ADJ(ep);
12660Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
12670Sstevel@tonic-gate 		flk_free_edge(ep);
12680Sstevel@tonic-gate 		ep = from_lock->l_sedge;
12690Sstevel@tonic-gate 	}
12700Sstevel@tonic-gate 	return (EDEADLK);
12710Sstevel@tonic-gate }
12720Sstevel@tonic-gate 
12730Sstevel@tonic-gate /*
12740Sstevel@tonic-gate  * Get an edge structure for representing the dependency between two locks.
12750Sstevel@tonic-gate  */
12760Sstevel@tonic-gate 
12770Sstevel@tonic-gate static edge_t *
flk_get_edge()12780Sstevel@tonic-gate flk_get_edge()
12790Sstevel@tonic-gate {
12800Sstevel@tonic-gate 	edge_t	*ep;
12810Sstevel@tonic-gate 
12820Sstevel@tonic-gate 	ASSERT(flk_edge_cache != NULL);
12830Sstevel@tonic-gate 
12840Sstevel@tonic-gate 	ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP);
12850Sstevel@tonic-gate 	edge_allocs++;
12860Sstevel@tonic-gate 	return (ep);
12870Sstevel@tonic-gate }
12880Sstevel@tonic-gate 
12890Sstevel@tonic-gate /*
12900Sstevel@tonic-gate  * Free the edge structure.
12910Sstevel@tonic-gate  */
12920Sstevel@tonic-gate 
12930Sstevel@tonic-gate static void
flk_free_edge(edge_t * ep)12940Sstevel@tonic-gate flk_free_edge(edge_t *ep)
12950Sstevel@tonic-gate {
12960Sstevel@tonic-gate 	edge_frees++;
12970Sstevel@tonic-gate 	kmem_cache_free(flk_edge_cache, (void *)ep);
12980Sstevel@tonic-gate }
12990Sstevel@tonic-gate 
13000Sstevel@tonic-gate /*
13010Sstevel@tonic-gate  * Check the relationship of request with lock and perform the
13020Sstevel@tonic-gate  * recomputation of dependencies, break lock if required, and return
13030Sstevel@tonic-gate  * 1 if request cannot have any more relationship with the next
13040Sstevel@tonic-gate  * active locks.
13050Sstevel@tonic-gate  * The 'lock' and 'request' are compared and in case of overlap we
13060Sstevel@tonic-gate  * delete the 'lock' and form new locks to represent the non-overlapped
13070Sstevel@tonic-gate  * portion of original 'lock'. This function has side effects such as
13080Sstevel@tonic-gate  * 'lock' will be freed, new locks will be added to the active list.
13090Sstevel@tonic-gate  */
13100Sstevel@tonic-gate 
13110Sstevel@tonic-gate static int
flk_relation(lock_descriptor_t * lock,lock_descriptor_t * request)13120Sstevel@tonic-gate flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request)
13130Sstevel@tonic-gate {
13140Sstevel@tonic-gate 	int lock_effect;
13150Sstevel@tonic-gate 	lock_descriptor_t *lock1, *lock2;
13160Sstevel@tonic-gate 	lock_descriptor_t *topology[3];
13170Sstevel@tonic-gate 	int nvertex = 0;
13180Sstevel@tonic-gate 	int i;
13190Sstevel@tonic-gate 	edge_t	*ep;
13200Sstevel@tonic-gate 	graph_t	*gp = (lock->l_graph);
13210Sstevel@tonic-gate 
13220Sstevel@tonic-gate 
13230Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
13240Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
13250Sstevel@tonic-gate 
13260Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
13270Sstevel@tonic-gate 
13280Sstevel@tonic-gate 	topology[0] = topology[1] = topology[2] = NULL;
13290Sstevel@tonic-gate 
13300Sstevel@tonic-gate 	if (request->l_type == F_UNLCK)
13310Sstevel@tonic-gate 		lock_effect = FLK_UNLOCK;
13320Sstevel@tonic-gate 	else if (request->l_type == F_RDLCK &&
13330Sstevel@tonic-gate 			lock->l_type == F_WRLCK)
13340Sstevel@tonic-gate 		lock_effect = FLK_DOWNGRADE;
13350Sstevel@tonic-gate 	else if (request->l_type == F_WRLCK &&
13360Sstevel@tonic-gate 			lock->l_type == F_RDLCK)
13370Sstevel@tonic-gate 		lock_effect = FLK_UPGRADE;
13380Sstevel@tonic-gate 	else
13390Sstevel@tonic-gate 		lock_effect = FLK_STAY_SAME;
13400Sstevel@tonic-gate 
13410Sstevel@tonic-gate 	if (lock->l_end < request->l_start) {
13420Sstevel@tonic-gate 		if (lock->l_end == request->l_start - 1 &&
13430Sstevel@tonic-gate 				lock_effect == FLK_STAY_SAME) {
13440Sstevel@tonic-gate 			topology[0] = request;
13450Sstevel@tonic-gate 			request->l_start = lock->l_start;
13460Sstevel@tonic-gate 			nvertex = 1;
13470Sstevel@tonic-gate 			goto recompute;
13480Sstevel@tonic-gate 		} else {
13490Sstevel@tonic-gate 			return (0);
13500Sstevel@tonic-gate 		}
13510Sstevel@tonic-gate 	}
13520Sstevel@tonic-gate 
13530Sstevel@tonic-gate 	if (lock->l_start > request->l_end) {
13540Sstevel@tonic-gate 		if (request->l_end == lock->l_start - 1 &&
13550Sstevel@tonic-gate 					lock_effect == FLK_STAY_SAME) {
13560Sstevel@tonic-gate 			topology[0] = request;
13570Sstevel@tonic-gate 			request->l_end = lock->l_end;
13580Sstevel@tonic-gate 			nvertex = 1;
13590Sstevel@tonic-gate 			goto recompute;
13600Sstevel@tonic-gate 		} else {
13610Sstevel@tonic-gate 			return (1);
13620Sstevel@tonic-gate 		}
13630Sstevel@tonic-gate 	}
13640Sstevel@tonic-gate 
13650Sstevel@tonic-gate 	if (request->l_end < lock->l_end) {
13660Sstevel@tonic-gate 		if (request->l_start > lock->l_start) {
13670Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
13680Sstevel@tonic-gate 				request->l_start = lock->l_start;
13690Sstevel@tonic-gate 				request->l_end = lock->l_end;
13700Sstevel@tonic-gate 				topology[0] = request;
13710Sstevel@tonic-gate 				nvertex = 1;
13720Sstevel@tonic-gate 			} else {
13730Sstevel@tonic-gate 				lock1 = flk_get_lock();
13740Sstevel@tonic-gate 				lock2 = flk_get_lock();
13750Sstevel@tonic-gate 				COPY(lock1, lock);
13760Sstevel@tonic-gate 				COPY(lock2, lock);
13770Sstevel@tonic-gate 				lock1->l_start = lock->l_start;
13780Sstevel@tonic-gate 				lock1->l_end = request->l_start - 1;
13790Sstevel@tonic-gate 				lock2->l_start = request->l_end + 1;
13800Sstevel@tonic-gate 				lock2->l_end = lock->l_end;
13810Sstevel@tonic-gate 				topology[0] = lock1;
13820Sstevel@tonic-gate 				topology[1] = lock2;
13830Sstevel@tonic-gate 				topology[2] = request;
13840Sstevel@tonic-gate 				nvertex = 3;
13850Sstevel@tonic-gate 			}
13860Sstevel@tonic-gate 		} else if (request->l_start < lock->l_start) {
13870Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
13880Sstevel@tonic-gate 				request->l_end = lock->l_end;
13890Sstevel@tonic-gate 				topology[0] = request;
13900Sstevel@tonic-gate 				nvertex = 1;
13910Sstevel@tonic-gate 			} else {
13920Sstevel@tonic-gate 				lock1 = flk_get_lock();
13930Sstevel@tonic-gate 				COPY(lock1, lock);
13940Sstevel@tonic-gate 				lock1->l_start = request->l_end + 1;
13950Sstevel@tonic-gate 				topology[0] = lock1;
13960Sstevel@tonic-gate 				topology[1] = request;
13970Sstevel@tonic-gate 				nvertex = 2;
13980Sstevel@tonic-gate 			}
13990Sstevel@tonic-gate 		} else  {
14000Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
14010Sstevel@tonic-gate 				request->l_start = lock->l_start;
14020Sstevel@tonic-gate 				request->l_end = lock->l_end;
14030Sstevel@tonic-gate 				topology[0] = request;
14040Sstevel@tonic-gate 				nvertex = 1;
14050Sstevel@tonic-gate 			} else {
14060Sstevel@tonic-gate 				lock1 = flk_get_lock();
14070Sstevel@tonic-gate 				COPY(lock1, lock);
14080Sstevel@tonic-gate 				lock1->l_start = request->l_end + 1;
14090Sstevel@tonic-gate 				topology[0] = lock1;
14100Sstevel@tonic-gate 				topology[1] = request;
14110Sstevel@tonic-gate 				nvertex = 2;
14120Sstevel@tonic-gate 			}
14130Sstevel@tonic-gate 		}
14140Sstevel@tonic-gate 	} else if (request->l_end > lock->l_end) {
14150Sstevel@tonic-gate 		if (request->l_start > lock->l_start)  {
14160Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
14170Sstevel@tonic-gate 				request->l_start = lock->l_start;
14180Sstevel@tonic-gate 				topology[0] = request;
14190Sstevel@tonic-gate 				nvertex = 1;
14200Sstevel@tonic-gate 			} else {
14210Sstevel@tonic-gate 				lock1 = flk_get_lock();
14220Sstevel@tonic-gate 				COPY(lock1, lock);
14230Sstevel@tonic-gate 				lock1->l_end = request->l_start - 1;
14240Sstevel@tonic-gate 				topology[0] = lock1;
14250Sstevel@tonic-gate 				topology[1] = request;
14260Sstevel@tonic-gate 				nvertex = 2;
14270Sstevel@tonic-gate 			}
14280Sstevel@tonic-gate 		} else if (request->l_start < lock->l_start)  {
14290Sstevel@tonic-gate 			topology[0] = request;
14300Sstevel@tonic-gate 			nvertex = 1;
14310Sstevel@tonic-gate 		} else {
14320Sstevel@tonic-gate 			topology[0] = request;
14330Sstevel@tonic-gate 			nvertex = 1;
14340Sstevel@tonic-gate 		}
14350Sstevel@tonic-gate 	} else {
14360Sstevel@tonic-gate 		if (request->l_start > lock->l_start) {
14370Sstevel@tonic-gate 			if (lock_effect == FLK_STAY_SAME) {
14380Sstevel@tonic-gate 				request->l_start = lock->l_start;
14390Sstevel@tonic-gate 				topology[0] = request;
14400Sstevel@tonic-gate 				nvertex = 1;
14410Sstevel@tonic-gate 			} else {
14420Sstevel@tonic-gate 				lock1 = flk_get_lock();
14430Sstevel@tonic-gate 				COPY(lock1, lock);
14440Sstevel@tonic-gate 				lock1->l_end = request->l_start - 1;
14450Sstevel@tonic-gate 				topology[0] = lock1;
14460Sstevel@tonic-gate 				topology[1] = request;
14470Sstevel@tonic-gate 				nvertex = 2;
14480Sstevel@tonic-gate 			}
14490Sstevel@tonic-gate 		} else if (request->l_start < lock->l_start) {
14500Sstevel@tonic-gate 			topology[0] = request;
14510Sstevel@tonic-gate 			nvertex = 1;
14520Sstevel@tonic-gate 		} else {
14530Sstevel@tonic-gate 			if (lock_effect !=  FLK_UNLOCK) {
14540Sstevel@tonic-gate 				topology[0] = request;
14550Sstevel@tonic-gate 				nvertex = 1;
14560Sstevel@tonic-gate 			} else {
14570Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
14580Sstevel@tonic-gate 				flk_wakeup(lock, 1);
14590Sstevel@tonic-gate 				flk_free_lock(lock);
14600Sstevel@tonic-gate 				CHECK_SLEEPING_LOCKS(gp);
14610Sstevel@tonic-gate 				CHECK_ACTIVE_LOCKS(gp);
14620Sstevel@tonic-gate 				return (1);
14630Sstevel@tonic-gate 			}
14640Sstevel@tonic-gate 		}
14650Sstevel@tonic-gate 	}
14660Sstevel@tonic-gate 
14670Sstevel@tonic-gate recompute:
14680Sstevel@tonic-gate 
14690Sstevel@tonic-gate 	/*
14700Sstevel@tonic-gate 	 * For unlock we don't send the 'request' to for recomputing
14710Sstevel@tonic-gate 	 * dependencies because no lock will add an edge to this.
14720Sstevel@tonic-gate 	 */
14730Sstevel@tonic-gate 
14740Sstevel@tonic-gate 	if (lock_effect == FLK_UNLOCK) {
14750Sstevel@tonic-gate 		topology[nvertex-1] = NULL;
14760Sstevel@tonic-gate 		nvertex--;
14770Sstevel@tonic-gate 	}
14780Sstevel@tonic-gate 	for (i = 0; i < nvertex; i++) {
14790Sstevel@tonic-gate 		topology[i]->l_state |= RECOMPUTE_LOCK;
14800Sstevel@tonic-gate 		topology[i]->l_color = NO_COLOR;
14810Sstevel@tonic-gate 	}
14820Sstevel@tonic-gate 
14830Sstevel@tonic-gate 	ASSERT(FIRST_ADJ(lock) == HEAD(lock));
14840Sstevel@tonic-gate 
14850Sstevel@tonic-gate 	/*
14860Sstevel@tonic-gate 	 * we remove the adjacent edges for all vertices' to this vertex
14870Sstevel@tonic-gate 	 * 'lock'.
14880Sstevel@tonic-gate 	 */
14890Sstevel@tonic-gate 
14900Sstevel@tonic-gate 	ep = FIRST_IN(lock);
14910Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
14920Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
14930Sstevel@tonic-gate 		ep = NEXT_IN(ep);
14940Sstevel@tonic-gate 	}
14950Sstevel@tonic-gate 
14960Sstevel@tonic-gate 	flk_delete_active_lock(lock, 0);
14970Sstevel@tonic-gate 
14980Sstevel@tonic-gate 	/* We are ready for recomputing the dependencies now */
14990Sstevel@tonic-gate 
15000Sstevel@tonic-gate 	flk_recompute_dependencies(lock, topology, nvertex, 1);
15010Sstevel@tonic-gate 
15020Sstevel@tonic-gate 	for (i = 0; i < nvertex; i++) {
15030Sstevel@tonic-gate 		topology[i]->l_state &= ~RECOMPUTE_LOCK;
15040Sstevel@tonic-gate 		topology[i]->l_color = NO_COLOR;
15050Sstevel@tonic-gate 	}
15060Sstevel@tonic-gate 
15070Sstevel@tonic-gate 
15080Sstevel@tonic-gate 	if (lock_effect == FLK_UNLOCK) {
15090Sstevel@tonic-gate 		nvertex++;
15100Sstevel@tonic-gate 	}
15110Sstevel@tonic-gate 	for (i = 0; i < nvertex - 1; i++) {
15120Sstevel@tonic-gate 		flk_insert_active_lock(topology[i]);
15130Sstevel@tonic-gate 	}
15140Sstevel@tonic-gate 
15150Sstevel@tonic-gate 
15160Sstevel@tonic-gate 	if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) {
15170Sstevel@tonic-gate 		flk_wakeup(lock, 0);
15180Sstevel@tonic-gate 	} else {
15190Sstevel@tonic-gate 		ep = FIRST_IN(lock);
15200Sstevel@tonic-gate 		while (ep != HEAD(lock)) {
15210Sstevel@tonic-gate 			lock->l_sedge = NEXT_IN(ep);
15220Sstevel@tonic-gate 			IN_LIST_REMOVE(ep);
15230Sstevel@tonic-gate 			flk_update_proc_graph(ep, 1);
15240Sstevel@tonic-gate 			flk_free_edge(ep);
15250Sstevel@tonic-gate 			ep = lock->l_sedge;
15260Sstevel@tonic-gate 		}
15270Sstevel@tonic-gate 	}
15280Sstevel@tonic-gate 	flk_free_lock(lock);
15290Sstevel@tonic-gate 
15300Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
15310Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
15320Sstevel@tonic-gate 	return (0);
15330Sstevel@tonic-gate }
15340Sstevel@tonic-gate 
15350Sstevel@tonic-gate /*
15360Sstevel@tonic-gate  * Insert a lock into the active queue.
15370Sstevel@tonic-gate  */
15380Sstevel@tonic-gate 
15390Sstevel@tonic-gate static void
flk_insert_active_lock(lock_descriptor_t * new_lock)15400Sstevel@tonic-gate flk_insert_active_lock(lock_descriptor_t *new_lock)
15410Sstevel@tonic-gate {
15420Sstevel@tonic-gate 	graph_t	*gp = new_lock->l_graph;
15430Sstevel@tonic-gate 	vnode_t	*vp = new_lock->l_vnode;
15440Sstevel@tonic-gate 	lock_descriptor_t *first_lock, *lock;
15450Sstevel@tonic-gate 
15460Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
15470Sstevel@tonic-gate 
15480Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
15490Sstevel@tonic-gate 	first_lock = lock;
15500Sstevel@tonic-gate 
15510Sstevel@tonic-gate 	if (first_lock != NULL) {
15520Sstevel@tonic-gate 		for (; (lock->l_vnode == vp &&
15530Sstevel@tonic-gate 			lock->l_start < new_lock->l_start); lock = lock->l_next)
15540Sstevel@tonic-gate 			;
15550Sstevel@tonic-gate 	} else {
15560Sstevel@tonic-gate 		lock = ACTIVE_HEAD(gp);
15570Sstevel@tonic-gate 	}
15580Sstevel@tonic-gate 
15590Sstevel@tonic-gate 	lock->l_prev->l_next = new_lock;
15600Sstevel@tonic-gate 	new_lock->l_next = lock;
15610Sstevel@tonic-gate 	new_lock->l_prev = lock->l_prev;
15620Sstevel@tonic-gate 	lock->l_prev = new_lock;
15630Sstevel@tonic-gate 
15640Sstevel@tonic-gate 	if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) {
15650Sstevel@tonic-gate 		vp->v_filocks = (struct filock *)new_lock;
15660Sstevel@tonic-gate 	}
15670Sstevel@tonic-gate 	flk_set_state(new_lock, FLK_ACTIVE_STATE);
15680Sstevel@tonic-gate 	new_lock->l_state |= ACTIVE_LOCK;
15690Sstevel@tonic-gate 
15700Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
15710Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
15720Sstevel@tonic-gate }
15730Sstevel@tonic-gate 
15740Sstevel@tonic-gate /*
15750Sstevel@tonic-gate  * Delete the active lock : Performs two functions depending on the
15760Sstevel@tonic-gate  * value of second parameter. One is to remove from the active lists
15770Sstevel@tonic-gate  * only and other is to both remove and free the lock.
15780Sstevel@tonic-gate  */
15790Sstevel@tonic-gate 
15800Sstevel@tonic-gate static void
flk_delete_active_lock(lock_descriptor_t * lock,int free_lock)15810Sstevel@tonic-gate flk_delete_active_lock(lock_descriptor_t *lock, int free_lock)
15820Sstevel@tonic-gate {
15830Sstevel@tonic-gate 	vnode_t *vp = lock->l_vnode;
15840Sstevel@tonic-gate 	graph_t	*gp = lock->l_graph;
15850Sstevel@tonic-gate 
15860Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
15870Sstevel@tonic-gate 	if (free_lock)
15880Sstevel@tonic-gate 		ASSERT(NO_DEPENDENTS(lock));
15890Sstevel@tonic-gate 	ASSERT(NOT_BLOCKED(lock));
15900Sstevel@tonic-gate 	ASSERT(IS_ACTIVE(lock));
15910Sstevel@tonic-gate 
15920Sstevel@tonic-gate 	ASSERT((vp->v_filocks != NULL));
15930Sstevel@tonic-gate 
15940Sstevel@tonic-gate 	if (vp->v_filocks == (struct filock *)lock) {
15950Sstevel@tonic-gate 		vp->v_filocks = (struct filock *)
15960Sstevel@tonic-gate 				((lock->l_next->l_vnode == vp) ? lock->l_next :
15970Sstevel@tonic-gate 								NULL);
15980Sstevel@tonic-gate 	}
15990Sstevel@tonic-gate 	lock->l_next->l_prev = lock->l_prev;
16000Sstevel@tonic-gate 	lock->l_prev->l_next = lock->l_next;
16010Sstevel@tonic-gate 	lock->l_next = lock->l_prev = NULL;
16020Sstevel@tonic-gate 	flk_set_state(lock, FLK_DEAD_STATE);
16030Sstevel@tonic-gate 	lock->l_state &= ~ACTIVE_LOCK;
16040Sstevel@tonic-gate 
16050Sstevel@tonic-gate 	if (free_lock)
16060Sstevel@tonic-gate 		flk_free_lock(lock);
16070Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
16080Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
16090Sstevel@tonic-gate }
16100Sstevel@tonic-gate 
16110Sstevel@tonic-gate /*
16120Sstevel@tonic-gate  * Insert into the sleep queue.
16130Sstevel@tonic-gate  */
16140Sstevel@tonic-gate 
16150Sstevel@tonic-gate static void
flk_insert_sleeping_lock(lock_descriptor_t * request)16160Sstevel@tonic-gate flk_insert_sleeping_lock(lock_descriptor_t *request)
16170Sstevel@tonic-gate {
16180Sstevel@tonic-gate 	graph_t *gp = request->l_graph;
16190Sstevel@tonic-gate 	vnode_t	*vp = request->l_vnode;
16200Sstevel@tonic-gate 	lock_descriptor_t	*lock;
16210Sstevel@tonic-gate 
16220Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
16230Sstevel@tonic-gate 	ASSERT(IS_INITIAL(request));
16240Sstevel@tonic-gate 
16250Sstevel@tonic-gate 	for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks &&
16260Sstevel@tonic-gate 		lock->l_vnode < vp); lock = lock->l_next)
16270Sstevel@tonic-gate 		;
16280Sstevel@tonic-gate 
16290Sstevel@tonic-gate 	lock->l_prev->l_next = request;
16300Sstevel@tonic-gate 	request->l_prev = lock->l_prev;
16310Sstevel@tonic-gate 	lock->l_prev = request;
16320Sstevel@tonic-gate 	request->l_next = lock;
16330Sstevel@tonic-gate 	flk_set_state(request, FLK_SLEEPING_STATE);
16340Sstevel@tonic-gate 	request->l_state |= SLEEPING_LOCK;
16350Sstevel@tonic-gate }
16360Sstevel@tonic-gate 
16370Sstevel@tonic-gate /*
16380Sstevel@tonic-gate  * Cancelling a sleeping lock implies removing a vertex from the
16390Sstevel@tonic-gate  * dependency graph and therefore we should recompute the dependencies
16400Sstevel@tonic-gate  * of all vertices that have a path  to this vertex, w.r.t. all
16410Sstevel@tonic-gate  * vertices reachable from this vertex.
16420Sstevel@tonic-gate  */
16430Sstevel@tonic-gate 
16440Sstevel@tonic-gate void
flk_cancel_sleeping_lock(lock_descriptor_t * request,int remove_from_queue)16450Sstevel@tonic-gate flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue)
16460Sstevel@tonic-gate {
16470Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
16480Sstevel@tonic-gate 	vnode_t *vp = request->l_vnode;
16490Sstevel@tonic-gate 	lock_descriptor_t **topology = NULL;
16500Sstevel@tonic-gate 	edge_t	*ep;
16510Sstevel@tonic-gate 	lock_descriptor_t *vertex, *lock;
16520Sstevel@tonic-gate 	int nvertex = 0;
16530Sstevel@tonic-gate 	int i;
16540Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
16550Sstevel@tonic-gate 
16560Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
16570Sstevel@tonic-gate 
16580Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
16590Sstevel@tonic-gate 	/*
16600Sstevel@tonic-gate 	 * count number of vertex pointers that has to be allocated
16610Sstevel@tonic-gate 	 * All vertices that are reachable from request.
16620Sstevel@tonic-gate 	 */
16630Sstevel@tonic-gate 
16640Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, request, l_stack);
16650Sstevel@tonic-gate 
16660Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
16670Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack);
16680Sstevel@tonic-gate 		for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
16690Sstevel@tonic-gate 					ep = NEXT_ADJ(ep)) {
16700Sstevel@tonic-gate 			if (IS_RECOMPUTE(ep->to_vertex))
16710Sstevel@tonic-gate 				continue;
16720Sstevel@tonic-gate 			ep->to_vertex->l_state |= RECOMPUTE_LOCK;
16730Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
16740Sstevel@tonic-gate 			nvertex++;
16750Sstevel@tonic-gate 		}
16760Sstevel@tonic-gate 	}
16770Sstevel@tonic-gate 
16780Sstevel@tonic-gate 	/*
16790Sstevel@tonic-gate 	 * allocate memory for holding the vertex pointers
16800Sstevel@tonic-gate 	 */
16810Sstevel@tonic-gate 
16820Sstevel@tonic-gate 	if (nvertex) {
16830Sstevel@tonic-gate 		topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *),
16840Sstevel@tonic-gate 						KM_SLEEP);
16850Sstevel@tonic-gate 	}
16860Sstevel@tonic-gate 
16870Sstevel@tonic-gate 	/*
16880Sstevel@tonic-gate 	 * one more pass to actually store the vertices in the
16890Sstevel@tonic-gate 	 * allocated array.
16900Sstevel@tonic-gate 	 * We first check sleeping locks and then active locks
16910Sstevel@tonic-gate 	 * so that topology array will be in a topological
16920Sstevel@tonic-gate 	 * order.
16930Sstevel@tonic-gate 	 */
16940Sstevel@tonic-gate 
16950Sstevel@tonic-gate 	nvertex = 0;
16960Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
16970Sstevel@tonic-gate 
16980Sstevel@tonic-gate 	if (lock) {
16990Sstevel@tonic-gate 		do {
17000Sstevel@tonic-gate 			if (IS_RECOMPUTE(lock)) {
17010Sstevel@tonic-gate 				lock->l_index = nvertex;
17020Sstevel@tonic-gate 				topology[nvertex++] = lock;
17030Sstevel@tonic-gate 			}
17040Sstevel@tonic-gate 			lock->l_color = NO_COLOR;
17050Sstevel@tonic-gate 			lock = lock->l_next;
17060Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
17070Sstevel@tonic-gate 	}
17080Sstevel@tonic-gate 
17090Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
17100Sstevel@tonic-gate 
17110Sstevel@tonic-gate 	if (lock) {
17120Sstevel@tonic-gate 		do {
17130Sstevel@tonic-gate 			if (IS_RECOMPUTE(lock)) {
17140Sstevel@tonic-gate 				lock->l_index = nvertex;
17150Sstevel@tonic-gate 				topology[nvertex++] = lock;
17160Sstevel@tonic-gate 			}
17170Sstevel@tonic-gate 			lock->l_color = NO_COLOR;
17180Sstevel@tonic-gate 			lock = lock->l_next;
17190Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
17200Sstevel@tonic-gate 	}
17210Sstevel@tonic-gate 
17220Sstevel@tonic-gate 	/*
17230Sstevel@tonic-gate 	 * remove in and out edges of request
17240Sstevel@tonic-gate 	 * They are freed after updating proc_graph below.
17250Sstevel@tonic-gate 	 */
17260Sstevel@tonic-gate 
17270Sstevel@tonic-gate 	for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) {
17280Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
17290Sstevel@tonic-gate 	}
17300Sstevel@tonic-gate 
17310Sstevel@tonic-gate 
17320Sstevel@tonic-gate 	if (remove_from_queue)
17330Sstevel@tonic-gate 		REMOVE_SLEEP_QUEUE(request);
17340Sstevel@tonic-gate 
17350Sstevel@tonic-gate 	/* we are ready to recompute */
17360Sstevel@tonic-gate 
17370Sstevel@tonic-gate 	flk_recompute_dependencies(request, topology, nvertex, 1);
17380Sstevel@tonic-gate 
17390Sstevel@tonic-gate 	ep = FIRST_ADJ(request);
17400Sstevel@tonic-gate 	while (ep != HEAD(request)) {
17410Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
17420Sstevel@tonic-gate 		request->l_sedge = NEXT_ADJ(ep);
17430Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
17440Sstevel@tonic-gate 		flk_update_proc_graph(ep, 1);
17450Sstevel@tonic-gate 		flk_free_edge(ep);
17460Sstevel@tonic-gate 		ep = request->l_sedge;
17470Sstevel@tonic-gate 	}
17480Sstevel@tonic-gate 
17490Sstevel@tonic-gate 
17500Sstevel@tonic-gate 	/*
17510Sstevel@tonic-gate 	 * unset the RECOMPUTE flag in those vertices
17520Sstevel@tonic-gate 	 */
17530Sstevel@tonic-gate 
17540Sstevel@tonic-gate 	for (i = 0; i < nvertex; i++) {
17550Sstevel@tonic-gate 		topology[i]->l_state &= ~RECOMPUTE_LOCK;
17560Sstevel@tonic-gate 	}
17570Sstevel@tonic-gate 
17580Sstevel@tonic-gate 	/*
17590Sstevel@tonic-gate 	 * free the topology
17600Sstevel@tonic-gate 	 */
17610Sstevel@tonic-gate 	if (nvertex)
17620Sstevel@tonic-gate 		kmem_free((void *)topology,
17630Sstevel@tonic-gate 			(nvertex * sizeof (lock_descriptor_t *)));
17640Sstevel@tonic-gate 	/*
17650Sstevel@tonic-gate 	 * Possibility of some locks unblocked now
17660Sstevel@tonic-gate 	 */
17670Sstevel@tonic-gate 
17680Sstevel@tonic-gate 	flk_wakeup(request, 0);
17690Sstevel@tonic-gate 
17700Sstevel@tonic-gate 	/*
17710Sstevel@tonic-gate 	 * we expect to have a correctly recomputed graph  now.
17720Sstevel@tonic-gate 	 */
17730Sstevel@tonic-gate 	flk_set_state(request, FLK_DEAD_STATE);
17740Sstevel@tonic-gate 	flk_free_lock(request);
17750Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
17760Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
17770Sstevel@tonic-gate 
17780Sstevel@tonic-gate }
17790Sstevel@tonic-gate 
17800Sstevel@tonic-gate /*
17810Sstevel@tonic-gate  * Uncoloring the graph is simply to increment the mark value of the graph
17820Sstevel@tonic-gate  * And only when wrap round takes place will we color all vertices in
17830Sstevel@tonic-gate  * the graph explicitly.
17840Sstevel@tonic-gate  */
17850Sstevel@tonic-gate 
17860Sstevel@tonic-gate static void
flk_graph_uncolor(graph_t * gp)17870Sstevel@tonic-gate flk_graph_uncolor(graph_t *gp)
17880Sstevel@tonic-gate {
17890Sstevel@tonic-gate 	lock_descriptor_t *lock;
17900Sstevel@tonic-gate 
17910Sstevel@tonic-gate 	if (gp->mark == UINT_MAX) {
17920Sstevel@tonic-gate 		gp->mark = 1;
17930Sstevel@tonic-gate 	for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
17940Sstevel@tonic-gate 					lock = lock->l_next)
17950Sstevel@tonic-gate 			lock->l_color  = 0;
17960Sstevel@tonic-gate 
17970Sstevel@tonic-gate 	for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp);
17980Sstevel@tonic-gate 					lock = lock->l_next)
17990Sstevel@tonic-gate 			lock->l_color  = 0;
18000Sstevel@tonic-gate 	} else {
18010Sstevel@tonic-gate 		gp->mark++;
18020Sstevel@tonic-gate 	}
18030Sstevel@tonic-gate }
18040Sstevel@tonic-gate 
18050Sstevel@tonic-gate /*
18060Sstevel@tonic-gate  * Wake up locks that are blocked on the given lock.
18070Sstevel@tonic-gate  */
18080Sstevel@tonic-gate 
18090Sstevel@tonic-gate static void
flk_wakeup(lock_descriptor_t * lock,int adj_list_remove)18100Sstevel@tonic-gate flk_wakeup(lock_descriptor_t *lock, int adj_list_remove)
18110Sstevel@tonic-gate {
18120Sstevel@tonic-gate 	edge_t	*ep;
18130Sstevel@tonic-gate 	graph_t	*gp = lock->l_graph;
18140Sstevel@tonic-gate 	lock_descriptor_t	*lck;
18150Sstevel@tonic-gate 
18160Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
18170Sstevel@tonic-gate 	if (NO_DEPENDENTS(lock))
18180Sstevel@tonic-gate 		return;
18190Sstevel@tonic-gate 	ep = FIRST_IN(lock);
18200Sstevel@tonic-gate 	do {
18210Sstevel@tonic-gate 		/*
18220Sstevel@tonic-gate 		 * delete the edge from the adjacency list
18230Sstevel@tonic-gate 		 * of from vertex. if no more adjacent edges
18240Sstevel@tonic-gate 		 * for this vertex wake this process.
18250Sstevel@tonic-gate 		 */
18260Sstevel@tonic-gate 		lck = ep->from_vertex;
18270Sstevel@tonic-gate 		if (adj_list_remove)
18280Sstevel@tonic-gate 			ADJ_LIST_REMOVE(ep);
18290Sstevel@tonic-gate 		flk_update_proc_graph(ep, 1);
18300Sstevel@tonic-gate 		if (NOT_BLOCKED(lck)) {
18310Sstevel@tonic-gate 			GRANT_WAKEUP(lck);
18320Sstevel@tonic-gate 		}
18330Sstevel@tonic-gate 		lock->l_sedge = NEXT_IN(ep);
18340Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
18350Sstevel@tonic-gate 		flk_free_edge(ep);
18360Sstevel@tonic-gate 		ep = lock->l_sedge;
18370Sstevel@tonic-gate 	} while (ep != HEAD(lock));
18380Sstevel@tonic-gate 	ASSERT(NO_DEPENDENTS(lock));
18390Sstevel@tonic-gate }
18400Sstevel@tonic-gate 
18410Sstevel@tonic-gate /*
18420Sstevel@tonic-gate  * The dependents of request, is checked for its dependency against the
18430Sstevel@tonic-gate  * locks in topology (called topology because the array is and should be in
18440Sstevel@tonic-gate  * topological order for this algorithm, if not in topological order the
18450Sstevel@tonic-gate  * inner loop below might add more edges than necessary. Topological ordering
18460Sstevel@tonic-gate  * of vertices satisfies the property that all edges will be from left to
18470Sstevel@tonic-gate  * right i.e., topology[i] can have an edge to  topology[j], iff i<j)
18480Sstevel@tonic-gate  * If lock l1 in the dependent set of request is dependent (blocked by)
18490Sstevel@tonic-gate  * on lock l2 in topology but does not have a path to it, we add an edge
18500Sstevel@tonic-gate  * in the inner loop below.
18510Sstevel@tonic-gate  *
18520Sstevel@tonic-gate  * We don't want to add an edge between l1 and l2 if there exists
18530Sstevel@tonic-gate  * already a path from l1 to l2, so care has to be taken for those vertices
18540Sstevel@tonic-gate  * that  have two paths to 'request'. These vertices are referred to here
18550Sstevel@tonic-gate  * as barrier locks.
18560Sstevel@tonic-gate  *
18570Sstevel@tonic-gate  * The barriers has to be found (those vertex that originally had two paths
18580Sstevel@tonic-gate  * to request) because otherwise we may end up adding edges unnecessarily
18590Sstevel@tonic-gate  * to vertices in topology, and thus barrier vertices can have an edge
18600Sstevel@tonic-gate  * to a vertex in topology as well a path to it.
18610Sstevel@tonic-gate  */
18620Sstevel@tonic-gate 
18630Sstevel@tonic-gate static void
flk_recompute_dependencies(lock_descriptor_t * request,lock_descriptor_t ** topology,int nvertex,int update_graph)18640Sstevel@tonic-gate flk_recompute_dependencies(lock_descriptor_t *request,
18650Sstevel@tonic-gate 		lock_descriptor_t **topology,
18660Sstevel@tonic-gate 			int nvertex, int update_graph)
18670Sstevel@tonic-gate {
18680Sstevel@tonic-gate 	lock_descriptor_t *vertex, *lock;
18690Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
18700Sstevel@tonic-gate 	int i, count;
18710Sstevel@tonic-gate 	int barrier_found = 0;
18720Sstevel@tonic-gate 	edge_t	*ep;
18730Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
18740Sstevel@tonic-gate 
18750Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
18760Sstevel@tonic-gate 
18770Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
18780Sstevel@tonic-gate 	if (nvertex == 0)
18790Sstevel@tonic-gate 		return;
18800Sstevel@tonic-gate 	flk_graph_uncolor(request->l_graph);
18810Sstevel@tonic-gate 	barrier_found = flk_find_barriers(request);
18820Sstevel@tonic-gate 	request->l_state |= RECOMPUTE_DONE;
18830Sstevel@tonic-gate 
18840Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, request, l_stack);
18850Sstevel@tonic-gate 	request->l_sedge = FIRST_IN(request);
18860Sstevel@tonic-gate 
18870Sstevel@tonic-gate 
18880Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
18890Sstevel@tonic-gate 		if (vertex->l_state & RECOMPUTE_DONE) {
18900Sstevel@tonic-gate 			count = 0;
18910Sstevel@tonic-gate 			goto next_in_edge;
18920Sstevel@tonic-gate 		}
18930Sstevel@tonic-gate 		if (IS_BARRIER(vertex)) {
18940Sstevel@tonic-gate 			/* decrement the barrier count */
18950Sstevel@tonic-gate 			if (vertex->l_index) {
18960Sstevel@tonic-gate 				vertex->l_index--;
18970Sstevel@tonic-gate 				/* this guy will be pushed again anyway ? */
18980Sstevel@tonic-gate 				STACK_POP(vertex_stack, l_stack);
18990Sstevel@tonic-gate 				if (vertex->l_index == 0)  {
19000Sstevel@tonic-gate 				/*
19010Sstevel@tonic-gate 				 * barrier is over we can recompute
19020Sstevel@tonic-gate 				 * dependencies for this lock in the
19030Sstevel@tonic-gate 				 * next stack pop
19040Sstevel@tonic-gate 				 */
19050Sstevel@tonic-gate 					vertex->l_state &= ~BARRIER_LOCK;
19060Sstevel@tonic-gate 				}
19070Sstevel@tonic-gate 				continue;
19080Sstevel@tonic-gate 			}
19090Sstevel@tonic-gate 		}
19100Sstevel@tonic-gate 		vertex->l_state |= RECOMPUTE_DONE;
19110Sstevel@tonic-gate 		flk_graph_uncolor(gp);
19120Sstevel@tonic-gate 		count = flk_color_reachables(vertex);
19130Sstevel@tonic-gate 		for (i = 0; i < nvertex; i++) {
19140Sstevel@tonic-gate 			lock = topology[i];
19150Sstevel@tonic-gate 			if (COLORED(lock))
19160Sstevel@tonic-gate 				continue;
19170Sstevel@tonic-gate 			if (BLOCKS(lock, vertex)) {
19180Sstevel@tonic-gate 				(void) flk_add_edge(vertex, lock,
19190Sstevel@tonic-gate 				    NO_CHECK_CYCLE, update_graph);
19200Sstevel@tonic-gate 				COLOR(lock);
19210Sstevel@tonic-gate 				count++;
19220Sstevel@tonic-gate 				count += flk_color_reachables(lock);
19230Sstevel@tonic-gate 			}
19240Sstevel@tonic-gate 
19250Sstevel@tonic-gate 		}
19260Sstevel@tonic-gate 
19270Sstevel@tonic-gate next_in_edge:
19280Sstevel@tonic-gate 		if (count == nvertex ||
19290Sstevel@tonic-gate 				vertex->l_sedge == HEAD(vertex)) {
19300Sstevel@tonic-gate 			/* prune the tree below this */
19310Sstevel@tonic-gate 			STACK_POP(vertex_stack, l_stack);
19320Sstevel@tonic-gate 			vertex->l_state &= ~RECOMPUTE_DONE;
19330Sstevel@tonic-gate 			/* update the barrier locks below this! */
19340Sstevel@tonic-gate 			if (vertex->l_sedge != HEAD(vertex) && barrier_found) {
19350Sstevel@tonic-gate 				flk_graph_uncolor(gp);
19360Sstevel@tonic-gate 				flk_update_barriers(vertex);
19370Sstevel@tonic-gate 			}
19380Sstevel@tonic-gate 			continue;
19390Sstevel@tonic-gate 		}
19400Sstevel@tonic-gate 
19410Sstevel@tonic-gate 		ep = vertex->l_sedge;
19420Sstevel@tonic-gate 		lock = ep->from_vertex;
19430Sstevel@tonic-gate 		STACK_PUSH(vertex_stack, lock, l_stack);
19440Sstevel@tonic-gate 		lock->l_sedge = FIRST_IN(lock);
19450Sstevel@tonic-gate 		vertex->l_sedge = NEXT_IN(ep);
19460Sstevel@tonic-gate 	}
19470Sstevel@tonic-gate 
19480Sstevel@tonic-gate }
19490Sstevel@tonic-gate 
19500Sstevel@tonic-gate /*
19510Sstevel@tonic-gate  * Color all reachable vertices from vertex that belongs to topology (here
19520Sstevel@tonic-gate  * those that have RECOMPUTE_LOCK set in their state) and yet uncolored.
19530Sstevel@tonic-gate  *
19540Sstevel@tonic-gate  * Note: we need to use a different stack_link l_stack1 because this is
19550Sstevel@tonic-gate  * called from flk_recompute_dependencies() that already uses a stack with
19560Sstevel@tonic-gate  * l_stack as stack_link.
19570Sstevel@tonic-gate  */
19580Sstevel@tonic-gate 
19590Sstevel@tonic-gate static int
flk_color_reachables(lock_descriptor_t * vertex)19600Sstevel@tonic-gate flk_color_reachables(lock_descriptor_t *vertex)
19610Sstevel@tonic-gate {
19620Sstevel@tonic-gate 	lock_descriptor_t *ver, *lock;
19630Sstevel@tonic-gate 	int count;
19640Sstevel@tonic-gate 	edge_t	*ep;
19650Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
19660Sstevel@tonic-gate 
19670Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
19680Sstevel@tonic-gate 
19690Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, vertex, l_stack1);
19700Sstevel@tonic-gate 	count = 0;
19710Sstevel@tonic-gate 	while ((ver = STACK_TOP(vertex_stack)) != NULL) {
19720Sstevel@tonic-gate 
19730Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack1);
19740Sstevel@tonic-gate 		for (ep = FIRST_ADJ(ver); ep != HEAD(ver);
19750Sstevel@tonic-gate 					ep = NEXT_ADJ(ep)) {
19760Sstevel@tonic-gate 			lock = ep->to_vertex;
19770Sstevel@tonic-gate 			if (COLORED(lock))
19780Sstevel@tonic-gate 				continue;
19790Sstevel@tonic-gate 			COLOR(lock);
19800Sstevel@tonic-gate 			if (IS_RECOMPUTE(lock))
19810Sstevel@tonic-gate 				count++;
19820Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, lock, l_stack1);
19830Sstevel@tonic-gate 		}
19840Sstevel@tonic-gate 
19850Sstevel@tonic-gate 	}
19860Sstevel@tonic-gate 	return (count);
19870Sstevel@tonic-gate }
19880Sstevel@tonic-gate 
19890Sstevel@tonic-gate /*
19900Sstevel@tonic-gate  * Called from flk_recompute_dependencies() this routine decrements
19910Sstevel@tonic-gate  * the barrier count of barrier vertices that are reachable from lock.
19920Sstevel@tonic-gate  */
19930Sstevel@tonic-gate 
19940Sstevel@tonic-gate static void
flk_update_barriers(lock_descriptor_t * lock)19950Sstevel@tonic-gate flk_update_barriers(lock_descriptor_t *lock)
19960Sstevel@tonic-gate {
19970Sstevel@tonic-gate 	lock_descriptor_t *vertex, *lck;
19980Sstevel@tonic-gate 	edge_t	*ep;
19990Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
20000Sstevel@tonic-gate 
20010Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
20020Sstevel@tonic-gate 
20030Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, lock, l_stack1);
20040Sstevel@tonic-gate 
20050Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
20060Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack1);
20070Sstevel@tonic-gate 		for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
20080Sstevel@tonic-gate 						ep = NEXT_IN(ep)) {
20090Sstevel@tonic-gate 			lck = ep->from_vertex;
20100Sstevel@tonic-gate 			if (COLORED(lck)) {
20110Sstevel@tonic-gate 				if (IS_BARRIER(lck)) {
20120Sstevel@tonic-gate 					ASSERT(lck->l_index > 0);
20130Sstevel@tonic-gate 					lck->l_index--;
20140Sstevel@tonic-gate 					if (lck->l_index == 0)
20150Sstevel@tonic-gate 						lck->l_state &= ~BARRIER_LOCK;
20160Sstevel@tonic-gate 				}
20170Sstevel@tonic-gate 				continue;
20180Sstevel@tonic-gate 			}
20190Sstevel@tonic-gate 			COLOR(lck);
20200Sstevel@tonic-gate 			if (IS_BARRIER(lck)) {
20210Sstevel@tonic-gate 				ASSERT(lck->l_index > 0);
20220Sstevel@tonic-gate 				lck->l_index--;
20230Sstevel@tonic-gate 				if (lck->l_index == 0)
20240Sstevel@tonic-gate 					lck->l_state &= ~BARRIER_LOCK;
20250Sstevel@tonic-gate 			}
20260Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, lck, l_stack1);
20270Sstevel@tonic-gate 		}
20280Sstevel@tonic-gate 	}
20290Sstevel@tonic-gate }
20300Sstevel@tonic-gate 
20310Sstevel@tonic-gate /*
20320Sstevel@tonic-gate  * Finds all vertices that are reachable from 'lock' more than once and
20330Sstevel@tonic-gate  * mark them as barrier vertices and increment their barrier count.
20340Sstevel@tonic-gate  * The barrier count is one minus the total number of paths from lock
20350Sstevel@tonic-gate  * to that vertex.
20360Sstevel@tonic-gate  */
20370Sstevel@tonic-gate 
20380Sstevel@tonic-gate static int
flk_find_barriers(lock_descriptor_t * lock)20390Sstevel@tonic-gate flk_find_barriers(lock_descriptor_t *lock)
20400Sstevel@tonic-gate {
20410Sstevel@tonic-gate 	lock_descriptor_t *vertex, *lck;
20420Sstevel@tonic-gate 	int found = 0;
20430Sstevel@tonic-gate 	edge_t	*ep;
20440Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
20450Sstevel@tonic-gate 
20460Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
20470Sstevel@tonic-gate 
20480Sstevel@tonic-gate 	STACK_PUSH(vertex_stack, lock, l_stack1);
20490Sstevel@tonic-gate 
20500Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
20510Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_stack1);
20520Sstevel@tonic-gate 		for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
20530Sstevel@tonic-gate 						ep = NEXT_IN(ep)) {
20540Sstevel@tonic-gate 			lck = ep->from_vertex;
20550Sstevel@tonic-gate 			if (COLORED(lck)) {
20560Sstevel@tonic-gate 				/* this is a barrier */
20570Sstevel@tonic-gate 				lck->l_state |= BARRIER_LOCK;
20580Sstevel@tonic-gate 				/* index will have barrier count */
20590Sstevel@tonic-gate 				lck->l_index++;
20600Sstevel@tonic-gate 				if (!found)
20610Sstevel@tonic-gate 					found = 1;
20620Sstevel@tonic-gate 				continue;
20630Sstevel@tonic-gate 			}
20640Sstevel@tonic-gate 			COLOR(lck);
20650Sstevel@tonic-gate 			lck->l_index = 0;
20660Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, lck, l_stack1);
20670Sstevel@tonic-gate 		}
20680Sstevel@tonic-gate 	}
20690Sstevel@tonic-gate 	return (found);
20700Sstevel@tonic-gate }
20710Sstevel@tonic-gate 
20720Sstevel@tonic-gate /*
20730Sstevel@tonic-gate  * Finds the first lock that is mainly responsible for blocking this
20740Sstevel@tonic-gate  * request.  If there is no such lock, request->l_flock.l_type is set to
20750Sstevel@tonic-gate  * F_UNLCK.  Otherwise, request->l_flock is filled in with the particulars
20760Sstevel@tonic-gate  * of the blocking lock.
20770Sstevel@tonic-gate  *
20780Sstevel@tonic-gate  * Note: It is possible a request is blocked by a sleeping lock because
20790Sstevel@tonic-gate  * of the fairness policy used in flk_process_request() to construct the
20800Sstevel@tonic-gate  * dependencies. (see comments before flk_process_request()).
20810Sstevel@tonic-gate  */
20820Sstevel@tonic-gate 
20830Sstevel@tonic-gate static void
flk_get_first_blocking_lock(lock_descriptor_t * request)20840Sstevel@tonic-gate flk_get_first_blocking_lock(lock_descriptor_t *request)
20850Sstevel@tonic-gate {
20860Sstevel@tonic-gate 	graph_t	*gp = request->l_graph;
20870Sstevel@tonic-gate 	vnode_t *vp = request->l_vnode;
20880Sstevel@tonic-gate 	lock_descriptor_t *lock, *blocker;
20890Sstevel@tonic-gate 
20900Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
20910Sstevel@tonic-gate 	blocker = NULL;
20920Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
20930Sstevel@tonic-gate 
20940Sstevel@tonic-gate 	if (lock) {
20950Sstevel@tonic-gate 		do {
20960Sstevel@tonic-gate 			if (BLOCKS(lock, request)) {
20970Sstevel@tonic-gate 				blocker = lock;
20980Sstevel@tonic-gate 				break;
20990Sstevel@tonic-gate 			}
21000Sstevel@tonic-gate 			lock = lock->l_next;
21010Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
21020Sstevel@tonic-gate 	}
21030Sstevel@tonic-gate 
21040Sstevel@tonic-gate 	if (blocker) {
21050Sstevel@tonic-gate 		report_blocker(blocker, request);
21060Sstevel@tonic-gate 	} else
21070Sstevel@tonic-gate 		request->l_flock.l_type = F_UNLCK;
21080Sstevel@tonic-gate }
21090Sstevel@tonic-gate 
21100Sstevel@tonic-gate /*
21110Sstevel@tonic-gate  * Get the graph_t structure associated with a vnode.
21120Sstevel@tonic-gate  * If 'initialize' is non-zero, and the graph_t structure for this vnode has
21130Sstevel@tonic-gate  * not yet been initialized, then a new element is allocated and returned.
21140Sstevel@tonic-gate  */
21150Sstevel@tonic-gate graph_t *
flk_get_lock_graph(vnode_t * vp,int initialize)21160Sstevel@tonic-gate flk_get_lock_graph(vnode_t *vp, int initialize)
21170Sstevel@tonic-gate {
21180Sstevel@tonic-gate 	graph_t *gp;
21190Sstevel@tonic-gate 	graph_t *gp_alloc = NULL;
21200Sstevel@tonic-gate 	int index = HASH_INDEX(vp);
21210Sstevel@tonic-gate 
21220Sstevel@tonic-gate 	if (initialize == FLK_USE_GRAPH) {
21230Sstevel@tonic-gate 		mutex_enter(&flock_lock);
21240Sstevel@tonic-gate 		gp = lock_graph[index];
21250Sstevel@tonic-gate 		mutex_exit(&flock_lock);
21260Sstevel@tonic-gate 		return (gp);
21270Sstevel@tonic-gate 	}
21280Sstevel@tonic-gate 
21290Sstevel@tonic-gate 	ASSERT(initialize == FLK_INIT_GRAPH);
21300Sstevel@tonic-gate 
21310Sstevel@tonic-gate 	if (lock_graph[index] == NULL) {
21320Sstevel@tonic-gate 
21330Sstevel@tonic-gate 		gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP);
21340Sstevel@tonic-gate 
21350Sstevel@tonic-gate 		/* Initialize the graph */
21360Sstevel@tonic-gate 
21370Sstevel@tonic-gate 		gp_alloc->active_locks.l_next =
21380Sstevel@tonic-gate 		    gp_alloc->active_locks.l_prev =
21390Sstevel@tonic-gate 		    (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc);
21400Sstevel@tonic-gate 		gp_alloc->sleeping_locks.l_next =
21410Sstevel@tonic-gate 		    gp_alloc->sleeping_locks.l_prev =
21420Sstevel@tonic-gate 		    (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc);
21430Sstevel@tonic-gate 		gp_alloc->index = index;
21440Sstevel@tonic-gate 		mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL);
21450Sstevel@tonic-gate 	}
21460Sstevel@tonic-gate 
21470Sstevel@tonic-gate 	mutex_enter(&flock_lock);
21480Sstevel@tonic-gate 
21490Sstevel@tonic-gate 	gp = lock_graph[index];
21500Sstevel@tonic-gate 
21510Sstevel@tonic-gate 	/* Recheck the value within flock_lock */
21520Sstevel@tonic-gate 	if (gp == NULL) {
21530Sstevel@tonic-gate 		struct flock_globals *fg;
21540Sstevel@tonic-gate 
21550Sstevel@tonic-gate 		/* We must have previously allocated the graph_t structure */
21560Sstevel@tonic-gate 		ASSERT(gp_alloc != NULL);
21570Sstevel@tonic-gate 		lock_graph[index] = gp = gp_alloc;
21580Sstevel@tonic-gate 		/*
21590Sstevel@tonic-gate 		 * The lockmgr status is only needed if KLM is loaded.
21600Sstevel@tonic-gate 		 */
21610Sstevel@tonic-gate 		if (flock_zone_key != ZONE_KEY_UNINITIALIZED) {
21620Sstevel@tonic-gate 			fg = flk_get_globals();
21630Sstevel@tonic-gate 			fg->lockmgr_status[index] = fg->flk_lockmgr_status;
21640Sstevel@tonic-gate 		}
21650Sstevel@tonic-gate 	}
21660Sstevel@tonic-gate 
21670Sstevel@tonic-gate 	mutex_exit(&flock_lock);
21680Sstevel@tonic-gate 
21690Sstevel@tonic-gate 	if ((gp_alloc != NULL) && (gp != gp_alloc)) {
21700Sstevel@tonic-gate 		/* There was a race to allocate the graph_t and we lost */
21710Sstevel@tonic-gate 		mutex_destroy(&gp_alloc->gp_mutex);
21720Sstevel@tonic-gate 		kmem_free(gp_alloc, sizeof (graph_t));
21730Sstevel@tonic-gate 	}
21740Sstevel@tonic-gate 
21750Sstevel@tonic-gate 	return (gp);
21760Sstevel@tonic-gate }
21770Sstevel@tonic-gate 
21780Sstevel@tonic-gate /*
21790Sstevel@tonic-gate  * PSARC case 1997/292
21800Sstevel@tonic-gate  */
21810Sstevel@tonic-gate int
cl_flk_has_remote_locks_for_nlmid(vnode_t * vp,int nlmid)21820Sstevel@tonic-gate cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid)
21830Sstevel@tonic-gate {
21840Sstevel@tonic-gate 	lock_descriptor_t *lock;
21850Sstevel@tonic-gate 	int result = 0;
21860Sstevel@tonic-gate 	graph_t *gp;
21870Sstevel@tonic-gate 	int			lock_nlmid;
21880Sstevel@tonic-gate 
21890Sstevel@tonic-gate 	/*
21900Sstevel@tonic-gate 	 * Check to see if node is booted as a cluster. If not, return.
21910Sstevel@tonic-gate 	 */
21920Sstevel@tonic-gate 	if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
21930Sstevel@tonic-gate 		return (0);
21940Sstevel@tonic-gate 	}
21950Sstevel@tonic-gate 
21960Sstevel@tonic-gate 	gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
21970Sstevel@tonic-gate 	if (gp == NULL) {
21980Sstevel@tonic-gate 		return (0);
21990Sstevel@tonic-gate 	}
22000Sstevel@tonic-gate 
22010Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
22020Sstevel@tonic-gate 
22030Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
22040Sstevel@tonic-gate 
22050Sstevel@tonic-gate 	if (lock) {
22060Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
22070Sstevel@tonic-gate 			/* get NLM id from sysid */
22080Sstevel@tonic-gate 			lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
22090Sstevel@tonic-gate 
22100Sstevel@tonic-gate 			/*
22110Sstevel@tonic-gate 			 * If NLM server request _and_ nlmid of lock matches
22120Sstevel@tonic-gate 			 * nlmid of argument, then we've found a remote lock.
22130Sstevel@tonic-gate 			 */
22140Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
22150Sstevel@tonic-gate 				result = 1;
22160Sstevel@tonic-gate 				goto done;
22170Sstevel@tonic-gate 			}
22180Sstevel@tonic-gate 			lock = lock->l_next;
22190Sstevel@tonic-gate 		}
22200Sstevel@tonic-gate 	}
22210Sstevel@tonic-gate 
22220Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
22230Sstevel@tonic-gate 
22240Sstevel@tonic-gate 	if (lock) {
22250Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
22260Sstevel@tonic-gate 			/* get NLM id from sysid */
22270Sstevel@tonic-gate 			lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
22280Sstevel@tonic-gate 
22290Sstevel@tonic-gate 			/*
22300Sstevel@tonic-gate 			 * If NLM server request _and_ nlmid of lock matches
22310Sstevel@tonic-gate 			 * nlmid of argument, then we've found a remote lock.
22320Sstevel@tonic-gate 			 */
22330Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
22340Sstevel@tonic-gate 				result = 1;
22350Sstevel@tonic-gate 				goto done;
22360Sstevel@tonic-gate 			}
22370Sstevel@tonic-gate 			lock = lock->l_next;
22380Sstevel@tonic-gate 		}
22390Sstevel@tonic-gate 	}
22400Sstevel@tonic-gate 
22410Sstevel@tonic-gate done:
22420Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
22430Sstevel@tonic-gate 	return (result);
22440Sstevel@tonic-gate }
22450Sstevel@tonic-gate 
22460Sstevel@tonic-gate /* ONC_PLUS EXTRACT START */
22470Sstevel@tonic-gate /*
22480Sstevel@tonic-gate  * Determine whether there are any locks for the given vnode with a remote
22490Sstevel@tonic-gate  * sysid.  Returns zero if not, non-zero if there are.
22500Sstevel@tonic-gate  *
22510Sstevel@tonic-gate  * Note that the return value from this function is potentially invalid
22520Sstevel@tonic-gate  * once it has been returned.  The caller is responsible for providing its
22530Sstevel@tonic-gate  * own synchronization mechanism to ensure that the return value is useful
22540Sstevel@tonic-gate  * (e.g., see nfs_lockcompletion()).
22550Sstevel@tonic-gate  */
22560Sstevel@tonic-gate int
flk_has_remote_locks(vnode_t * vp)22570Sstevel@tonic-gate flk_has_remote_locks(vnode_t *vp)
22580Sstevel@tonic-gate {
22590Sstevel@tonic-gate 	lock_descriptor_t *lock;
22600Sstevel@tonic-gate 	int result = 0;
22610Sstevel@tonic-gate 	graph_t *gp;
22620Sstevel@tonic-gate 
22630Sstevel@tonic-gate 	gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
22640Sstevel@tonic-gate 	if (gp == NULL) {
22650Sstevel@tonic-gate 		return (0);
22660Sstevel@tonic-gate 	}
22670Sstevel@tonic-gate 
22680Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
22690Sstevel@tonic-gate 
22700Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
22710Sstevel@tonic-gate 
22720Sstevel@tonic-gate 	if (lock) {
22730Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
22740Sstevel@tonic-gate 			if (IS_REMOTE(lock)) {
22750Sstevel@tonic-gate 				result = 1;
22760Sstevel@tonic-gate 				goto done;
22770Sstevel@tonic-gate 			}
22780Sstevel@tonic-gate 			lock = lock->l_next;
22790Sstevel@tonic-gate 		}
22800Sstevel@tonic-gate 	}
22810Sstevel@tonic-gate 
22820Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
22830Sstevel@tonic-gate 
22840Sstevel@tonic-gate 	if (lock) {
22850Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
22860Sstevel@tonic-gate 			if (IS_REMOTE(lock)) {
22870Sstevel@tonic-gate 				result = 1;
22880Sstevel@tonic-gate 				goto done;
22890Sstevel@tonic-gate 			}
22900Sstevel@tonic-gate 			lock = lock->l_next;
22910Sstevel@tonic-gate 		}
22920Sstevel@tonic-gate 	}
22930Sstevel@tonic-gate 
22940Sstevel@tonic-gate done:
22950Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
22960Sstevel@tonic-gate 	return (result);
22970Sstevel@tonic-gate }
22980Sstevel@tonic-gate 
22990Sstevel@tonic-gate /*
23000Sstevel@tonic-gate  * Determine if there are any locks owned by the given sysid.
23010Sstevel@tonic-gate  * Returns zero if not, non-zero if there are.  Note that this return code
23020Sstevel@tonic-gate  * could be derived from flk_get_{sleeping,active}_locks, but this routine
23030Sstevel@tonic-gate  * avoids all the memory allocations of those routines.
23040Sstevel@tonic-gate  *
23050Sstevel@tonic-gate  * This routine has the same synchronization issues as
23060Sstevel@tonic-gate  * flk_has_remote_locks.
23070Sstevel@tonic-gate  */
23080Sstevel@tonic-gate 
23090Sstevel@tonic-gate int
flk_sysid_has_locks(int sysid,int lck_type)23100Sstevel@tonic-gate flk_sysid_has_locks(int sysid, int lck_type)
23110Sstevel@tonic-gate {
23120Sstevel@tonic-gate 	int		has_locks = 0;
23130Sstevel@tonic-gate 	lock_descriptor_t	*lock;
23140Sstevel@tonic-gate 	graph_t 	*gp;
23150Sstevel@tonic-gate 	int		i;
23160Sstevel@tonic-gate 
23170Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE && !has_locks; i++) {
23180Sstevel@tonic-gate 		mutex_enter(&flock_lock);
23190Sstevel@tonic-gate 		gp = lock_graph[i];
23200Sstevel@tonic-gate 		mutex_exit(&flock_lock);
23210Sstevel@tonic-gate 		if (gp == NULL) {
23220Sstevel@tonic-gate 			continue;
23230Sstevel@tonic-gate 		}
23240Sstevel@tonic-gate 
23250Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
23260Sstevel@tonic-gate 
23270Sstevel@tonic-gate 		if (lck_type & FLK_QUERY_ACTIVE) {
23280Sstevel@tonic-gate 			for (lock = ACTIVE_HEAD(gp)->l_next;
23290Sstevel@tonic-gate 			    lock != ACTIVE_HEAD(gp) && !has_locks;
23300Sstevel@tonic-gate 			    lock = lock->l_next) {
23310Sstevel@tonic-gate 				if (lock->l_flock.l_sysid == sysid)
23320Sstevel@tonic-gate 					has_locks = 1;
23330Sstevel@tonic-gate 			}
23340Sstevel@tonic-gate 		}
23350Sstevel@tonic-gate 
23360Sstevel@tonic-gate 		if (lck_type & FLK_QUERY_SLEEPING) {
23370Sstevel@tonic-gate 			for (lock = SLEEPING_HEAD(gp)->l_next;
23380Sstevel@tonic-gate 				lock != SLEEPING_HEAD(gp) && !has_locks;
23390Sstevel@tonic-gate 				lock = lock->l_next) {
23400Sstevel@tonic-gate 				if (lock->l_flock.l_sysid == sysid)
23410Sstevel@tonic-gate 					has_locks = 1;
23420Sstevel@tonic-gate 			}
23430Sstevel@tonic-gate 		}
23440Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
23450Sstevel@tonic-gate 	}
23460Sstevel@tonic-gate 
23470Sstevel@tonic-gate 	return (has_locks);
23480Sstevel@tonic-gate }
23490Sstevel@tonic-gate 
23500Sstevel@tonic-gate 
23510Sstevel@tonic-gate /*
23520Sstevel@tonic-gate  * PSARC case 1997/292
23530Sstevel@tonic-gate  *
23540Sstevel@tonic-gate  * Requires: "sysid" is a pair [nlmid, sysid].  The lower half is 16-bit
23550Sstevel@tonic-gate  *  quantity, the real sysid generated by the NLM server; the upper half
23560Sstevel@tonic-gate  *  identifies the node of the cluster where the NLM server ran.
23570Sstevel@tonic-gate  *  This routine is only called by an NLM server running in a cluster.
23580Sstevel@tonic-gate  * Effects: Remove all locks held on behalf of the client identified
23590Sstevel@tonic-gate  *  by "sysid."
23600Sstevel@tonic-gate  */
23610Sstevel@tonic-gate void
cl_flk_remove_locks_by_sysid(int sysid)23620Sstevel@tonic-gate cl_flk_remove_locks_by_sysid(int sysid)
23630Sstevel@tonic-gate {
23640Sstevel@tonic-gate 	graph_t	*gp;
23650Sstevel@tonic-gate 	int i;
23660Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
23670Sstevel@tonic-gate 
23680Sstevel@tonic-gate 	/*
23690Sstevel@tonic-gate 	 * Check to see if node is booted as a cluster. If not, return.
23700Sstevel@tonic-gate 	 */
23710Sstevel@tonic-gate 	if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
23720Sstevel@tonic-gate 		return;
23730Sstevel@tonic-gate 	}
23740Sstevel@tonic-gate 
23750Sstevel@tonic-gate 	ASSERT(sysid != 0);
23760Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
23770Sstevel@tonic-gate 		mutex_enter(&flock_lock);
23780Sstevel@tonic-gate 		gp = lock_graph[i];
23790Sstevel@tonic-gate 		mutex_exit(&flock_lock);
23800Sstevel@tonic-gate 
23810Sstevel@tonic-gate 		if (gp == NULL)
23820Sstevel@tonic-gate 			continue;
23830Sstevel@tonic-gate 
23840Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);	/*  get mutex on lock graph */
23850Sstevel@tonic-gate 
23860Sstevel@tonic-gate 		/* signal sleeping requests so that they bail out */
23870Sstevel@tonic-gate 		lock = SLEEPING_HEAD(gp)->l_next;
23880Sstevel@tonic-gate 		while (lock != SLEEPING_HEAD(gp)) {
23890Sstevel@tonic-gate 			nlock = lock->l_next;
23900Sstevel@tonic-gate 			if (lock->l_flock.l_sysid == sysid) {
23910Sstevel@tonic-gate 				INTERRUPT_WAKEUP(lock);
23920Sstevel@tonic-gate 			}
23930Sstevel@tonic-gate 			lock = nlock;
23940Sstevel@tonic-gate 		}
23950Sstevel@tonic-gate 
23960Sstevel@tonic-gate 		/* delete active locks */
23970Sstevel@tonic-gate 		lock = ACTIVE_HEAD(gp)->l_next;
23980Sstevel@tonic-gate 		while (lock != ACTIVE_HEAD(gp)) {
23990Sstevel@tonic-gate 			nlock = lock->l_next;
24000Sstevel@tonic-gate 			if (lock->l_flock.l_sysid == sysid) {
24010Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
24020Sstevel@tonic-gate 				flk_wakeup(lock, 1);
24030Sstevel@tonic-gate 				flk_free_lock(lock);
24040Sstevel@tonic-gate 			}
24050Sstevel@tonic-gate 			lock = nlock;
24060Sstevel@tonic-gate 		}
24070Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);    /* release mutex on lock graph */
24080Sstevel@tonic-gate 	}
24090Sstevel@tonic-gate }
24100Sstevel@tonic-gate 
24110Sstevel@tonic-gate /*
24120Sstevel@tonic-gate  * Delete all locks in the system that belongs to the sysid of the request.
24130Sstevel@tonic-gate  */
24140Sstevel@tonic-gate 
24150Sstevel@tonic-gate static void
flk_delete_locks_by_sysid(lock_descriptor_t * request)24160Sstevel@tonic-gate flk_delete_locks_by_sysid(lock_descriptor_t *request)
24170Sstevel@tonic-gate {
24180Sstevel@tonic-gate 	int	sysid  = request->l_flock.l_sysid;
24190Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
24200Sstevel@tonic-gate 	graph_t	*gp;
24210Sstevel@tonic-gate 	int i;
24220Sstevel@tonic-gate 
24230Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex));
24240Sstevel@tonic-gate 	ASSERT(sysid != 0);
24250Sstevel@tonic-gate 
24260Sstevel@tonic-gate 	mutex_exit(&request->l_graph->gp_mutex);
24270Sstevel@tonic-gate 
24280Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
24290Sstevel@tonic-gate 		mutex_enter(&flock_lock);
24300Sstevel@tonic-gate 		gp = lock_graph[i];
24310Sstevel@tonic-gate 		mutex_exit(&flock_lock);
24320Sstevel@tonic-gate 
24330Sstevel@tonic-gate 		if (gp == NULL)
24340Sstevel@tonic-gate 			continue;
24350Sstevel@tonic-gate 
24360Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
24370Sstevel@tonic-gate 
24380Sstevel@tonic-gate 		/* signal sleeping requests so that they bail out */
24390Sstevel@tonic-gate 		lock = SLEEPING_HEAD(gp)->l_next;
24400Sstevel@tonic-gate 		while (lock != SLEEPING_HEAD(gp)) {
24410Sstevel@tonic-gate 			nlock = lock->l_next;
24420Sstevel@tonic-gate 			if (lock->l_flock.l_sysid == sysid) {
24430Sstevel@tonic-gate 				INTERRUPT_WAKEUP(lock);
24440Sstevel@tonic-gate 			}
24450Sstevel@tonic-gate 			lock = nlock;
24460Sstevel@tonic-gate 		}
24470Sstevel@tonic-gate 
24480Sstevel@tonic-gate 		/* delete active locks */
24490Sstevel@tonic-gate 		lock = ACTIVE_HEAD(gp)->l_next;
24500Sstevel@tonic-gate 		while (lock != ACTIVE_HEAD(gp)) {
24510Sstevel@tonic-gate 			nlock = lock->l_next;
24520Sstevel@tonic-gate 			if (lock->l_flock.l_sysid == sysid) {
24530Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
24540Sstevel@tonic-gate 				flk_wakeup(lock, 1);
24550Sstevel@tonic-gate 				flk_free_lock(lock);
24560Sstevel@tonic-gate 			}
24570Sstevel@tonic-gate 			lock = nlock;
24580Sstevel@tonic-gate 		}
24590Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
24600Sstevel@tonic-gate 	}
24610Sstevel@tonic-gate 
24620Sstevel@tonic-gate 	mutex_enter(&request->l_graph->gp_mutex);
24630Sstevel@tonic-gate }
24640Sstevel@tonic-gate 
24650Sstevel@tonic-gate /*
24660Sstevel@tonic-gate  * Clustering: Deletes PXFS locks
24670Sstevel@tonic-gate  * Effects: Delete all locks on files in the given file system and with the
24680Sstevel@tonic-gate  *  given PXFS id.
24690Sstevel@tonic-gate  */
24700Sstevel@tonic-gate void
cl_flk_delete_pxfs_locks(struct vfs * vfsp,int pxfsid)24710Sstevel@tonic-gate cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid)
24720Sstevel@tonic-gate {
24730Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
24740Sstevel@tonic-gate 	graph_t	*gp;
24750Sstevel@tonic-gate 	int i;
24760Sstevel@tonic-gate 
24770Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
24780Sstevel@tonic-gate 		mutex_enter(&flock_lock);
24790Sstevel@tonic-gate 		gp = lock_graph[i];
24800Sstevel@tonic-gate 		mutex_exit(&flock_lock);
24810Sstevel@tonic-gate 
24820Sstevel@tonic-gate 		if (gp == NULL)
24830Sstevel@tonic-gate 			continue;
24840Sstevel@tonic-gate 
24850Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
24860Sstevel@tonic-gate 
24870Sstevel@tonic-gate 		/* signal sleeping requests so that they bail out */
24880Sstevel@tonic-gate 		lock = SLEEPING_HEAD(gp)->l_next;
24890Sstevel@tonic-gate 		while (lock != SLEEPING_HEAD(gp)) {
24900Sstevel@tonic-gate 			nlock = lock->l_next;
24910Sstevel@tonic-gate 			if (lock->l_vnode->v_vfsp == vfsp) {
24920Sstevel@tonic-gate 				ASSERT(IS_PXFS(lock));
24930Sstevel@tonic-gate 				if (GETPXFSID(lock->l_flock.l_sysid) ==
24940Sstevel@tonic-gate 				    pxfsid) {
24950Sstevel@tonic-gate 					flk_set_state(lock,
24960Sstevel@tonic-gate 					    FLK_CANCELLED_STATE);
24970Sstevel@tonic-gate 					flk_cancel_sleeping_lock(lock, 1);
24980Sstevel@tonic-gate 				}
24990Sstevel@tonic-gate 			}
25000Sstevel@tonic-gate 			lock = nlock;
25010Sstevel@tonic-gate 		}
25020Sstevel@tonic-gate 
25030Sstevel@tonic-gate 		/* delete active locks */
25040Sstevel@tonic-gate 		lock = ACTIVE_HEAD(gp)->l_next;
25050Sstevel@tonic-gate 		while (lock != ACTIVE_HEAD(gp)) {
25060Sstevel@tonic-gate 			nlock = lock->l_next;
25070Sstevel@tonic-gate 			if (lock->l_vnode->v_vfsp == vfsp) {
25080Sstevel@tonic-gate 				ASSERT(IS_PXFS(lock));
25090Sstevel@tonic-gate 				if (GETPXFSID(lock->l_flock.l_sysid) ==
25100Sstevel@tonic-gate 				    pxfsid) {
25110Sstevel@tonic-gate 					flk_delete_active_lock(lock, 0);
25120Sstevel@tonic-gate 					flk_wakeup(lock, 1);
25130Sstevel@tonic-gate 					flk_free_lock(lock);
25140Sstevel@tonic-gate 				}
25150Sstevel@tonic-gate 			}
25160Sstevel@tonic-gate 			lock = nlock;
25170Sstevel@tonic-gate 		}
25180Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
25190Sstevel@tonic-gate 	}
25200Sstevel@tonic-gate }
25210Sstevel@tonic-gate 
25220Sstevel@tonic-gate /*
25230Sstevel@tonic-gate  * Search for a sleeping lock manager lock which matches exactly this lock
25240Sstevel@tonic-gate  * request; if one is found, fake a signal to cancel it.
25250Sstevel@tonic-gate  *
25260Sstevel@tonic-gate  * Return 1 if a matching lock was found, 0 otherwise.
25270Sstevel@tonic-gate  */
25280Sstevel@tonic-gate 
25290Sstevel@tonic-gate static int
flk_canceled(lock_descriptor_t * request)25300Sstevel@tonic-gate flk_canceled(lock_descriptor_t *request)
25310Sstevel@tonic-gate {
25320Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
25330Sstevel@tonic-gate 	graph_t *gp = request->l_graph;
25340Sstevel@tonic-gate 	vnode_t *vp = request->l_vnode;
25350Sstevel@tonic-gate 
25360Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
25370Sstevel@tonic-gate 	ASSERT(IS_LOCKMGR(request));
25380Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
25390Sstevel@tonic-gate 
25400Sstevel@tonic-gate 	if (lock) {
25410Sstevel@tonic-gate 		while (lock->l_vnode == vp) {
25420Sstevel@tonic-gate 			nlock = lock->l_next;
25430Sstevel@tonic-gate 			if (SAME_OWNER(lock, request) &&
25440Sstevel@tonic-gate 				lock->l_start == request->l_start &&
25450Sstevel@tonic-gate 					lock->l_end == request->l_end) {
25460Sstevel@tonic-gate 				INTERRUPT_WAKEUP(lock);
25470Sstevel@tonic-gate 				return (1);
25480Sstevel@tonic-gate 			}
25490Sstevel@tonic-gate 			lock = nlock;
25500Sstevel@tonic-gate 		}
25510Sstevel@tonic-gate 	}
25520Sstevel@tonic-gate 	return (0);
25530Sstevel@tonic-gate }
25540Sstevel@tonic-gate 
25550Sstevel@tonic-gate /*
25560Sstevel@tonic-gate  * Remove all the locks for the vnode belonging to the given pid and sysid.
25570Sstevel@tonic-gate  */
25580Sstevel@tonic-gate 
25590Sstevel@tonic-gate void
cleanlocks(vnode_t * vp,pid_t pid,int sysid)25600Sstevel@tonic-gate cleanlocks(vnode_t *vp, pid_t pid, int sysid)
25610Sstevel@tonic-gate {
25620Sstevel@tonic-gate 	graph_t	*gp;
25630Sstevel@tonic-gate 	lock_descriptor_t *lock, *nlock;
25640Sstevel@tonic-gate 	lock_descriptor_t *link_stack;
25650Sstevel@tonic-gate 
25660Sstevel@tonic-gate 	STACK_INIT(link_stack);
25670Sstevel@tonic-gate 
25680Sstevel@tonic-gate 	gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
25690Sstevel@tonic-gate 
25700Sstevel@tonic-gate 	if (gp == NULL)
25710Sstevel@tonic-gate 		return;
25720Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
25730Sstevel@tonic-gate 
25740Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
25750Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
25760Sstevel@tonic-gate 
25770Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
25780Sstevel@tonic-gate 
25790Sstevel@tonic-gate 	if (lock) {
25800Sstevel@tonic-gate 		do {
25810Sstevel@tonic-gate 			nlock = lock->l_next;
25820Sstevel@tonic-gate 			if ((lock->l_flock.l_pid == pid ||
25830Sstevel@tonic-gate 					pid == IGN_PID) &&
25840Sstevel@tonic-gate 				lock->l_flock.l_sysid == sysid) {
25850Sstevel@tonic-gate 				CANCEL_WAKEUP(lock);
25860Sstevel@tonic-gate 			}
25870Sstevel@tonic-gate 			lock = nlock;
25880Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
25890Sstevel@tonic-gate 	}
25900Sstevel@tonic-gate 
25910Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
25920Sstevel@tonic-gate 
25930Sstevel@tonic-gate 	if (lock) {
25940Sstevel@tonic-gate 		do {
25950Sstevel@tonic-gate 			nlock = lock->l_next;
25960Sstevel@tonic-gate 			if ((lock->l_flock.l_pid == pid ||
25970Sstevel@tonic-gate 					pid == IGN_PID) &&
25980Sstevel@tonic-gate 				lock->l_flock.l_sysid == sysid) {
25990Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
26000Sstevel@tonic-gate 				STACK_PUSH(link_stack, lock, l_stack);
26010Sstevel@tonic-gate 			}
26020Sstevel@tonic-gate 			lock = nlock;
26030Sstevel@tonic-gate 		} while (lock->l_vnode == vp);
26040Sstevel@tonic-gate 	}
26050Sstevel@tonic-gate 
26060Sstevel@tonic-gate 	while ((lock = STACK_TOP(link_stack)) != NULL) {
26070Sstevel@tonic-gate 		STACK_POP(link_stack, l_stack);
26080Sstevel@tonic-gate 		flk_wakeup(lock, 1);
26090Sstevel@tonic-gate 		flk_free_lock(lock);
26100Sstevel@tonic-gate 	}
26110Sstevel@tonic-gate 
26120Sstevel@tonic-gate 	CHECK_SLEEPING_LOCKS(gp);
26130Sstevel@tonic-gate 	CHECK_ACTIVE_LOCKS(gp);
26140Sstevel@tonic-gate 	CHECK_OWNER_LOCKS(gp, pid, sysid, vp);
26150Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
26160Sstevel@tonic-gate }
26170Sstevel@tonic-gate /* ONC_PLUS EXTRACT END */
26180Sstevel@tonic-gate 
26190Sstevel@tonic-gate 
26200Sstevel@tonic-gate /*
26210Sstevel@tonic-gate  * Called from 'fs' read and write routines for files that have mandatory
26220Sstevel@tonic-gate  * locking enabled.
26230Sstevel@tonic-gate  */
26240Sstevel@tonic-gate 
26250Sstevel@tonic-gate int
chklock(struct vnode * vp,int iomode,u_offset_t offset,ssize_t len,int fmode,caller_context_t * ct)26260Sstevel@tonic-gate chklock(
26270Sstevel@tonic-gate 	struct vnode	*vp,
26280Sstevel@tonic-gate 	int 		iomode,
26290Sstevel@tonic-gate 	u_offset_t	offset,
26300Sstevel@tonic-gate 	ssize_t		len,
26310Sstevel@tonic-gate 	int 		fmode,
26320Sstevel@tonic-gate 	caller_context_t *ct)
26330Sstevel@tonic-gate {
26340Sstevel@tonic-gate 	register int	i;
26350Sstevel@tonic-gate 	struct flock64 	bf;
26360Sstevel@tonic-gate 	int 		error = 0;
26370Sstevel@tonic-gate 
26380Sstevel@tonic-gate 	bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK;
26390Sstevel@tonic-gate 	bf.l_whence = 0;
26400Sstevel@tonic-gate 	bf.l_start = offset;
26410Sstevel@tonic-gate 	bf.l_len = len;
26420Sstevel@tonic-gate 	if (ct == NULL) {
26430Sstevel@tonic-gate 		bf.l_pid = curproc->p_pid;
26440Sstevel@tonic-gate 		bf.l_sysid = 0;
26450Sstevel@tonic-gate 	} else {
26460Sstevel@tonic-gate 		bf.l_pid = ct->cc_pid;
26470Sstevel@tonic-gate 		bf.l_sysid = ct->cc_sysid;
26480Sstevel@tonic-gate 	}
26490Sstevel@tonic-gate 	i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK;
26500Sstevel@tonic-gate 	if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 ||
26510Sstevel@tonic-gate 	    bf.l_type != F_UNLCK)
26520Sstevel@tonic-gate 		error = i ? i : EAGAIN;
26530Sstevel@tonic-gate 	return (error);
26540Sstevel@tonic-gate }
26550Sstevel@tonic-gate 
26560Sstevel@tonic-gate /* ONC_PLUS EXTRACT START */
26570Sstevel@tonic-gate /*
26580Sstevel@tonic-gate  * convoff - converts the given data (start, whence) to the
26590Sstevel@tonic-gate  * given whence.
26600Sstevel@tonic-gate  */
26610Sstevel@tonic-gate int
convoff(vp,lckdat,whence,offset)26620Sstevel@tonic-gate convoff(vp, lckdat, whence, offset)
26630Sstevel@tonic-gate 	struct vnode 	*vp;
26640Sstevel@tonic-gate 	struct flock64 	*lckdat;
26650Sstevel@tonic-gate 	int 		whence;
26660Sstevel@tonic-gate 	offset_t	offset;
26670Sstevel@tonic-gate {
26680Sstevel@tonic-gate 	int 		error;
26690Sstevel@tonic-gate 	struct vattr 	vattr;
26700Sstevel@tonic-gate 
26710Sstevel@tonic-gate 	if ((lckdat->l_whence == 2) || (whence == 2)) {
26720Sstevel@tonic-gate 		vattr.va_mask = AT_SIZE;
2673*5331Samw 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
26740Sstevel@tonic-gate 			return (error);
26750Sstevel@tonic-gate 	}
26760Sstevel@tonic-gate 
26770Sstevel@tonic-gate 	switch (lckdat->l_whence) {
26780Sstevel@tonic-gate 	case 1:
26790Sstevel@tonic-gate 		lckdat->l_start += offset;
26800Sstevel@tonic-gate 		break;
26810Sstevel@tonic-gate 	case 2:
26820Sstevel@tonic-gate 		lckdat->l_start += vattr.va_size;
26830Sstevel@tonic-gate 		/* FALLTHRU */
26840Sstevel@tonic-gate 	case 0:
26850Sstevel@tonic-gate 		break;
26860Sstevel@tonic-gate 	default:
26870Sstevel@tonic-gate 		return (EINVAL);
26880Sstevel@tonic-gate 	}
26890Sstevel@tonic-gate 
26900Sstevel@tonic-gate 	if (lckdat->l_start < 0)
26910Sstevel@tonic-gate 		return (EINVAL);
26920Sstevel@tonic-gate 
26930Sstevel@tonic-gate 	switch (whence) {
26940Sstevel@tonic-gate 	case 1:
26950Sstevel@tonic-gate 		lckdat->l_start -= offset;
26960Sstevel@tonic-gate 		break;
26970Sstevel@tonic-gate 	case 2:
26980Sstevel@tonic-gate 		lckdat->l_start -= vattr.va_size;
26990Sstevel@tonic-gate 		/* FALLTHRU */
27000Sstevel@tonic-gate 	case 0:
27010Sstevel@tonic-gate 		break;
27020Sstevel@tonic-gate 	default:
27030Sstevel@tonic-gate 		return (EINVAL);
27040Sstevel@tonic-gate 	}
27050Sstevel@tonic-gate 
27060Sstevel@tonic-gate 	lckdat->l_whence = (short)whence;
27070Sstevel@tonic-gate 	return (0);
27080Sstevel@tonic-gate }
27090Sstevel@tonic-gate /* ONC_PLUS EXTRACT END */
27100Sstevel@tonic-gate 
27110Sstevel@tonic-gate 
27120Sstevel@tonic-gate /* 	proc_graph function definitions */
27130Sstevel@tonic-gate 
27140Sstevel@tonic-gate /*
27150Sstevel@tonic-gate  * Function checks for deadlock due to the new 'lock'. If deadlock found
27160Sstevel@tonic-gate  * edges of this lock are freed and returned.
27170Sstevel@tonic-gate  */
27180Sstevel@tonic-gate 
27190Sstevel@tonic-gate static int
flk_check_deadlock(lock_descriptor_t * lock)27200Sstevel@tonic-gate flk_check_deadlock(lock_descriptor_t *lock)
27210Sstevel@tonic-gate {
27220Sstevel@tonic-gate 	proc_vertex_t	*start_vertex, *pvertex;
27230Sstevel@tonic-gate 	proc_vertex_t *dvertex;
27240Sstevel@tonic-gate 	proc_edge_t *pep, *ppep;
27250Sstevel@tonic-gate 	edge_t	*ep, *nep;
27260Sstevel@tonic-gate 	proc_vertex_t *process_stack;
27270Sstevel@tonic-gate 
27280Sstevel@tonic-gate 	STACK_INIT(process_stack);
27290Sstevel@tonic-gate 
27300Sstevel@tonic-gate 	mutex_enter(&flock_lock);
27310Sstevel@tonic-gate 	start_vertex = flk_get_proc_vertex(lock);
27320Sstevel@tonic-gate 	ASSERT(start_vertex != NULL);
27330Sstevel@tonic-gate 
27340Sstevel@tonic-gate 	/* construct the edges from this process to other processes */
27350Sstevel@tonic-gate 
27360Sstevel@tonic-gate 	ep = FIRST_ADJ(lock);
27370Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
27380Sstevel@tonic-gate 		proc_vertex_t *adj_proc;
27390Sstevel@tonic-gate 
27400Sstevel@tonic-gate 		adj_proc = flk_get_proc_vertex(ep->to_vertex);
27410Sstevel@tonic-gate 		for (pep = start_vertex->edge; pep != NULL; pep = pep->next) {
27420Sstevel@tonic-gate 			if (pep->to_proc == adj_proc) {
27430Sstevel@tonic-gate 				ASSERT(pep->refcount);
27440Sstevel@tonic-gate 				pep->refcount++;
27450Sstevel@tonic-gate 				break;
27460Sstevel@tonic-gate 			}
27470Sstevel@tonic-gate 		}
27480Sstevel@tonic-gate 		if (pep == NULL) {
27490Sstevel@tonic-gate 			pep = flk_get_proc_edge();
27500Sstevel@tonic-gate 			pep->to_proc = adj_proc;
27510Sstevel@tonic-gate 			pep->refcount = 1;
27520Sstevel@tonic-gate 			adj_proc->incount++;
27530Sstevel@tonic-gate 			pep->next = start_vertex->edge;
27540Sstevel@tonic-gate 			start_vertex->edge = pep;
27550Sstevel@tonic-gate 		}
27560Sstevel@tonic-gate 		ep = NEXT_ADJ(ep);
27570Sstevel@tonic-gate 	}
27580Sstevel@tonic-gate 
27590Sstevel@tonic-gate 	ep = FIRST_IN(lock);
27600Sstevel@tonic-gate 
27610Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
27620Sstevel@tonic-gate 		proc_vertex_t *in_proc;
27630Sstevel@tonic-gate 
27640Sstevel@tonic-gate 		in_proc = flk_get_proc_vertex(ep->from_vertex);
27650Sstevel@tonic-gate 
27660Sstevel@tonic-gate 		for (pep = in_proc->edge; pep != NULL; pep = pep->next) {
27670Sstevel@tonic-gate 			if (pep->to_proc == start_vertex) {
27680Sstevel@tonic-gate 				ASSERT(pep->refcount);
27690Sstevel@tonic-gate 				pep->refcount++;
27700Sstevel@tonic-gate 				break;
27710Sstevel@tonic-gate 			}
27720Sstevel@tonic-gate 		}
27730Sstevel@tonic-gate 		if (pep == NULL) {
27740Sstevel@tonic-gate 			pep = flk_get_proc_edge();
27750Sstevel@tonic-gate 			pep->to_proc = start_vertex;
27760Sstevel@tonic-gate 			pep->refcount = 1;
27770Sstevel@tonic-gate 			start_vertex->incount++;
27780Sstevel@tonic-gate 			pep->next = in_proc->edge;
27790Sstevel@tonic-gate 			in_proc->edge = pep;
27800Sstevel@tonic-gate 		}
27810Sstevel@tonic-gate 		ep = NEXT_IN(ep);
27820Sstevel@tonic-gate 	}
27830Sstevel@tonic-gate 
27840Sstevel@tonic-gate 	if (start_vertex->incount == 0) {
27850Sstevel@tonic-gate 		mutex_exit(&flock_lock);
27860Sstevel@tonic-gate 		return (0);
27870Sstevel@tonic-gate 	}
27880Sstevel@tonic-gate 
27890Sstevel@tonic-gate 	flk_proc_graph_uncolor();
27900Sstevel@tonic-gate 
27910Sstevel@tonic-gate 	start_vertex->p_sedge = start_vertex->edge;
27920Sstevel@tonic-gate 
27930Sstevel@tonic-gate 	STACK_PUSH(process_stack, start_vertex, p_stack);
27940Sstevel@tonic-gate 
27950Sstevel@tonic-gate 	while ((pvertex = STACK_TOP(process_stack)) != NULL) {
27960Sstevel@tonic-gate 		for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) {
27970Sstevel@tonic-gate 			dvertex = pep->to_proc;
27980Sstevel@tonic-gate 			if (!PROC_ARRIVED(dvertex)) {
27990Sstevel@tonic-gate 				STACK_PUSH(process_stack, dvertex, p_stack);
28000Sstevel@tonic-gate 				dvertex->p_sedge = dvertex->edge;
28010Sstevel@tonic-gate 				PROC_ARRIVE(pvertex);
28020Sstevel@tonic-gate 				pvertex->p_sedge = pep->next;
28030Sstevel@tonic-gate 				break;
28040Sstevel@tonic-gate 			}
28050Sstevel@tonic-gate 			if (!PROC_DEPARTED(dvertex))
28060Sstevel@tonic-gate 				goto deadlock;
28070Sstevel@tonic-gate 		}
28080Sstevel@tonic-gate 		if (pep == NULL) {
28090Sstevel@tonic-gate 			PROC_DEPART(pvertex);
28100Sstevel@tonic-gate 			STACK_POP(process_stack, p_stack);
28110Sstevel@tonic-gate 		}
28120Sstevel@tonic-gate 	}
28130Sstevel@tonic-gate 	mutex_exit(&flock_lock);
28140Sstevel@tonic-gate 	return (0);
28150Sstevel@tonic-gate 
28160Sstevel@tonic-gate deadlock:
28170Sstevel@tonic-gate 
28180Sstevel@tonic-gate 	/* we remove all lock edges and proc edges */
28190Sstevel@tonic-gate 
28200Sstevel@tonic-gate 	ep = FIRST_ADJ(lock);
28210Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
28220Sstevel@tonic-gate 		proc_vertex_t *adj_proc;
28230Sstevel@tonic-gate 		adj_proc = flk_get_proc_vertex(ep->to_vertex);
28240Sstevel@tonic-gate 		nep = NEXT_ADJ(ep);
28250Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
28260Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
28270Sstevel@tonic-gate 		flk_free_edge(ep);
28280Sstevel@tonic-gate 		ppep = start_vertex->edge;
28290Sstevel@tonic-gate 		for (pep = start_vertex->edge; pep != NULL; ppep = pep,
28300Sstevel@tonic-gate 						pep = ppep->next) {
28310Sstevel@tonic-gate 			if (pep->to_proc == adj_proc) {
28320Sstevel@tonic-gate 				pep->refcount--;
28330Sstevel@tonic-gate 				if (pep->refcount == 0) {
28340Sstevel@tonic-gate 					if (pep == ppep) {
28350Sstevel@tonic-gate 						start_vertex->edge = pep->next;
28360Sstevel@tonic-gate 					} else {
28370Sstevel@tonic-gate 						ppep->next = pep->next;
28380Sstevel@tonic-gate 					}
28390Sstevel@tonic-gate 					adj_proc->incount--;
28400Sstevel@tonic-gate 					flk_proc_release(adj_proc);
28410Sstevel@tonic-gate 					flk_free_proc_edge(pep);
28420Sstevel@tonic-gate 				}
28430Sstevel@tonic-gate 				break;
28440Sstevel@tonic-gate 			}
28450Sstevel@tonic-gate 		}
28460Sstevel@tonic-gate 		ep = nep;
28470Sstevel@tonic-gate 	}
28480Sstevel@tonic-gate 	ep = FIRST_IN(lock);
28490Sstevel@tonic-gate 	while (ep != HEAD(lock)) {
28500Sstevel@tonic-gate 		proc_vertex_t *in_proc;
28510Sstevel@tonic-gate 		in_proc = flk_get_proc_vertex(ep->from_vertex);
28520Sstevel@tonic-gate 		nep = NEXT_IN(ep);
28530Sstevel@tonic-gate 		IN_LIST_REMOVE(ep);
28540Sstevel@tonic-gate 		ADJ_LIST_REMOVE(ep);
28550Sstevel@tonic-gate 		flk_free_edge(ep);
28560Sstevel@tonic-gate 		ppep = in_proc->edge;
28570Sstevel@tonic-gate 		for (pep = in_proc->edge; pep != NULL; ppep = pep,
28580Sstevel@tonic-gate 						pep = ppep->next) {
28590Sstevel@tonic-gate 			if (pep->to_proc == start_vertex) {
28600Sstevel@tonic-gate 				pep->refcount--;
28610Sstevel@tonic-gate 				if (pep->refcount == 0) {
28620Sstevel@tonic-gate 					if (pep == ppep) {
28630Sstevel@tonic-gate 						in_proc->edge = pep->next;
28640Sstevel@tonic-gate 					} else {
28650Sstevel@tonic-gate 						ppep->next = pep->next;
28660Sstevel@tonic-gate 					}
28670Sstevel@tonic-gate 					start_vertex->incount--;
28680Sstevel@tonic-gate 					flk_proc_release(in_proc);
28690Sstevel@tonic-gate 					flk_free_proc_edge(pep);
28700Sstevel@tonic-gate 				}
28710Sstevel@tonic-gate 				break;
28720Sstevel@tonic-gate 			}
28730Sstevel@tonic-gate 		}
28740Sstevel@tonic-gate 		ep = nep;
28750Sstevel@tonic-gate 	}
28760Sstevel@tonic-gate 	flk_proc_release(start_vertex);
28770Sstevel@tonic-gate 	mutex_exit(&flock_lock);
28780Sstevel@tonic-gate 	return (1);
28790Sstevel@tonic-gate }
28800Sstevel@tonic-gate 
28810Sstevel@tonic-gate /*
28820Sstevel@tonic-gate  * Get a proc vertex. If lock's pvertex value gets a correct proc vertex
28830Sstevel@tonic-gate  * from the list we return that, otherwise we allocate one. If necessary,
28840Sstevel@tonic-gate  * we grow the list of vertices also.
28850Sstevel@tonic-gate  */
28860Sstevel@tonic-gate 
28870Sstevel@tonic-gate static proc_vertex_t *
flk_get_proc_vertex(lock_descriptor_t * lock)28880Sstevel@tonic-gate flk_get_proc_vertex(lock_descriptor_t *lock)
28890Sstevel@tonic-gate {
28900Sstevel@tonic-gate 	int i;
28910Sstevel@tonic-gate 	proc_vertex_t	*pv;
28920Sstevel@tonic-gate 	proc_vertex_t	**palloc;
28930Sstevel@tonic-gate 
28940Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&flock_lock));
28950Sstevel@tonic-gate 	if (lock->pvertex != -1) {
28960Sstevel@tonic-gate 		ASSERT(lock->pvertex >= 0);
28970Sstevel@tonic-gate 		pv = pgraph.proc[lock->pvertex];
28980Sstevel@tonic-gate 		if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
28990Sstevel@tonic-gate 			return (pv);
29000Sstevel@tonic-gate 		}
29010Sstevel@tonic-gate 	}
29020Sstevel@tonic-gate 	for (i = 0; i < pgraph.gcount; i++) {
29030Sstevel@tonic-gate 		pv = pgraph.proc[i];
29040Sstevel@tonic-gate 		if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
29050Sstevel@tonic-gate 			lock->pvertex = pv->index = i;
29060Sstevel@tonic-gate 			return (pv);
29070Sstevel@tonic-gate 		}
29080Sstevel@tonic-gate 	}
29090Sstevel@tonic-gate 	pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP);
29100Sstevel@tonic-gate 	pv->pid = lock->l_flock.l_pid;
29110Sstevel@tonic-gate 	pv->sysid = lock->l_flock.l_sysid;
29120Sstevel@tonic-gate 	flk_proc_vertex_allocs++;
29130Sstevel@tonic-gate 	if (pgraph.free != 0) {
29140Sstevel@tonic-gate 		for (i = 0; i < pgraph.gcount; i++) {
29150Sstevel@tonic-gate 			if (pgraph.proc[i] == NULL) {
29160Sstevel@tonic-gate 				pgraph.proc[i] = pv;
29170Sstevel@tonic-gate 				lock->pvertex = pv->index = i;
29180Sstevel@tonic-gate 				pgraph.free--;
29190Sstevel@tonic-gate 				return (pv);
29200Sstevel@tonic-gate 			}
29210Sstevel@tonic-gate 		}
29220Sstevel@tonic-gate 	}
29230Sstevel@tonic-gate 	palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) *
29240Sstevel@tonic-gate 				sizeof (proc_vertex_t *), KM_SLEEP);
29250Sstevel@tonic-gate 
29260Sstevel@tonic-gate 	if (pgraph.proc) {
29270Sstevel@tonic-gate 		bcopy(pgraph.proc, palloc,
29280Sstevel@tonic-gate 			pgraph.gcount * sizeof (proc_vertex_t *));
29290Sstevel@tonic-gate 
29300Sstevel@tonic-gate 		kmem_free(pgraph.proc,
29310Sstevel@tonic-gate 			pgraph.gcount * sizeof (proc_vertex_t *));
29320Sstevel@tonic-gate 	}
29330Sstevel@tonic-gate 	pgraph.proc = palloc;
29340Sstevel@tonic-gate 	pgraph.free += (PROC_CHUNK - 1);
29350Sstevel@tonic-gate 	pv->index = lock->pvertex = pgraph.gcount;
29360Sstevel@tonic-gate 	pgraph.gcount += PROC_CHUNK;
29370Sstevel@tonic-gate 	pgraph.proc[pv->index] = pv;
29380Sstevel@tonic-gate 	return (pv);
29390Sstevel@tonic-gate }
29400Sstevel@tonic-gate 
29410Sstevel@tonic-gate /*
29420Sstevel@tonic-gate  * Allocate a proc edge.
29430Sstevel@tonic-gate  */
29440Sstevel@tonic-gate 
29450Sstevel@tonic-gate static proc_edge_t *
flk_get_proc_edge()29460Sstevel@tonic-gate flk_get_proc_edge()
29470Sstevel@tonic-gate {
29480Sstevel@tonic-gate 	proc_edge_t *pep;
29490Sstevel@tonic-gate 
29500Sstevel@tonic-gate 	pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP);
29510Sstevel@tonic-gate 	flk_proc_edge_allocs++;
29520Sstevel@tonic-gate 	return (pep);
29530Sstevel@tonic-gate }
29540Sstevel@tonic-gate 
29550Sstevel@tonic-gate /*
29560Sstevel@tonic-gate  * Free the proc edge. Called whenever its reference count goes to zero.
29570Sstevel@tonic-gate  */
29580Sstevel@tonic-gate 
29590Sstevel@tonic-gate static void
flk_free_proc_edge(proc_edge_t * pep)29600Sstevel@tonic-gate flk_free_proc_edge(proc_edge_t *pep)
29610Sstevel@tonic-gate {
29620Sstevel@tonic-gate 	ASSERT(pep->refcount == 0);
29630Sstevel@tonic-gate 	kmem_free((void *)pep, sizeof (proc_edge_t));
29640Sstevel@tonic-gate 	flk_proc_edge_frees++;
29650Sstevel@tonic-gate }
29660Sstevel@tonic-gate 
29670Sstevel@tonic-gate /*
29680Sstevel@tonic-gate  * Color the graph explicitly done only when the mark value hits max value.
29690Sstevel@tonic-gate  */
29700Sstevel@tonic-gate 
29710Sstevel@tonic-gate static void
flk_proc_graph_uncolor()29720Sstevel@tonic-gate flk_proc_graph_uncolor()
29730Sstevel@tonic-gate {
29740Sstevel@tonic-gate 	int i;
29750Sstevel@tonic-gate 
29760Sstevel@tonic-gate 	if (pgraph.mark == UINT_MAX) {
29770Sstevel@tonic-gate 		for (i = 0; i < pgraph.gcount; i++)
29780Sstevel@tonic-gate 			if (pgraph.proc[i] != NULL) {
29790Sstevel@tonic-gate 				pgraph.proc[i]->atime = 0;
29800Sstevel@tonic-gate 				pgraph.proc[i]->dtime = 0;
29810Sstevel@tonic-gate 			}
29820Sstevel@tonic-gate 		pgraph.mark = 1;
29830Sstevel@tonic-gate 	} else {
29840Sstevel@tonic-gate 		pgraph.mark++;
29850Sstevel@tonic-gate 	}
29860Sstevel@tonic-gate }
29870Sstevel@tonic-gate 
29880Sstevel@tonic-gate /*
29890Sstevel@tonic-gate  * Release the proc vertex iff both there are no in edges and out edges
29900Sstevel@tonic-gate  */
29910Sstevel@tonic-gate 
29920Sstevel@tonic-gate static void
flk_proc_release(proc_vertex_t * proc)29930Sstevel@tonic-gate flk_proc_release(proc_vertex_t *proc)
29940Sstevel@tonic-gate {
29950Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&flock_lock));
29960Sstevel@tonic-gate 	if (proc->edge == NULL && proc->incount == 0) {
29970Sstevel@tonic-gate 		pgraph.proc[proc->index] = NULL;
29980Sstevel@tonic-gate 		pgraph.free++;
29990Sstevel@tonic-gate 		kmem_free(proc, sizeof (proc_vertex_t));
30000Sstevel@tonic-gate 		flk_proc_vertex_frees++;
30010Sstevel@tonic-gate 	}
30020Sstevel@tonic-gate }
30030Sstevel@tonic-gate 
30040Sstevel@tonic-gate /*
30050Sstevel@tonic-gate  * Updates process graph to reflect change in a lock_graph.
30060Sstevel@tonic-gate  * Note: We should call this function only after we have a correctly
30070Sstevel@tonic-gate  * recomputed lock graph. Otherwise we might miss a deadlock detection.
30080Sstevel@tonic-gate  * eg: in function flk_relation() we call this function after flk_recompute_
30090Sstevel@tonic-gate  * dependencies() otherwise if a process tries to lock a vnode hashed
30100Sstevel@tonic-gate  * into another graph it might sleep for ever.
30110Sstevel@tonic-gate  */
30120Sstevel@tonic-gate 
30130Sstevel@tonic-gate static void
flk_update_proc_graph(edge_t * ep,int delete)30140Sstevel@tonic-gate flk_update_proc_graph(edge_t *ep, int delete)
30150Sstevel@tonic-gate {
30160Sstevel@tonic-gate 	proc_vertex_t *toproc, *fromproc;
30170Sstevel@tonic-gate 	proc_edge_t *pep, *prevpep;
30180Sstevel@tonic-gate 
30190Sstevel@tonic-gate 	mutex_enter(&flock_lock);
30200Sstevel@tonic-gate 	toproc = flk_get_proc_vertex(ep->to_vertex);
30210Sstevel@tonic-gate 	fromproc = flk_get_proc_vertex(ep->from_vertex);
30220Sstevel@tonic-gate 
30230Sstevel@tonic-gate 	if (!delete)
30240Sstevel@tonic-gate 		goto add;
30250Sstevel@tonic-gate 	pep = prevpep = fromproc->edge;
30260Sstevel@tonic-gate 
30270Sstevel@tonic-gate 	ASSERT(pep != NULL);
30280Sstevel@tonic-gate 	while (pep != NULL) {
30290Sstevel@tonic-gate 		if (pep->to_proc == toproc) {
30300Sstevel@tonic-gate 			ASSERT(pep->refcount > 0);
30310Sstevel@tonic-gate 			pep->refcount--;
30320Sstevel@tonic-gate 			if (pep->refcount == 0) {
30330Sstevel@tonic-gate 				if (pep == prevpep) {
30340Sstevel@tonic-gate 					fromproc->edge = pep->next;
30350Sstevel@tonic-gate 				} else {
30360Sstevel@tonic-gate 					prevpep->next = pep->next;
30370Sstevel@tonic-gate 				}
30380Sstevel@tonic-gate 				toproc->incount--;
30390Sstevel@tonic-gate 				flk_proc_release(toproc);
30400Sstevel@tonic-gate 				flk_free_proc_edge(pep);
30410Sstevel@tonic-gate 			}
30420Sstevel@tonic-gate 			break;
30430Sstevel@tonic-gate 		}
30440Sstevel@tonic-gate 		prevpep = pep;
30450Sstevel@tonic-gate 		pep = pep->next;
30460Sstevel@tonic-gate 	}
30470Sstevel@tonic-gate 	flk_proc_release(fromproc);
30480Sstevel@tonic-gate 	mutex_exit(&flock_lock);
30490Sstevel@tonic-gate 	return;
30500Sstevel@tonic-gate add:
30510Sstevel@tonic-gate 
30520Sstevel@tonic-gate 	pep = fromproc->edge;
30530Sstevel@tonic-gate 
30540Sstevel@tonic-gate 	while (pep != NULL) {
30550Sstevel@tonic-gate 		if (pep->to_proc == toproc) {
30560Sstevel@tonic-gate 			ASSERT(pep->refcount > 0);
30570Sstevel@tonic-gate 			pep->refcount++;
30580Sstevel@tonic-gate 			break;
30590Sstevel@tonic-gate 		}
30600Sstevel@tonic-gate 		pep = pep->next;
30610Sstevel@tonic-gate 	}
30620Sstevel@tonic-gate 	if (pep == NULL) {
30630Sstevel@tonic-gate 		pep = flk_get_proc_edge();
30640Sstevel@tonic-gate 		pep->to_proc = toproc;
30650Sstevel@tonic-gate 		pep->refcount = 1;
30660Sstevel@tonic-gate 		toproc->incount++;
30670Sstevel@tonic-gate 		pep->next = fromproc->edge;
30680Sstevel@tonic-gate 		fromproc->edge = pep;
30690Sstevel@tonic-gate 	}
30700Sstevel@tonic-gate 	mutex_exit(&flock_lock);
30710Sstevel@tonic-gate }
30720Sstevel@tonic-gate 
30730Sstevel@tonic-gate /* ONC_PLUS EXTRACT START */
30740Sstevel@tonic-gate /*
30750Sstevel@tonic-gate  * Set the control status for lock manager requests.
30760Sstevel@tonic-gate  *
30770Sstevel@tonic-gate  */
30780Sstevel@tonic-gate 
30790Sstevel@tonic-gate /*
30800Sstevel@tonic-gate  * PSARC case 1997/292
30810Sstevel@tonic-gate  *
30820Sstevel@tonic-gate  * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid().
30830Sstevel@tonic-gate  * Effects: Set the state of the NLM server identified by "nlmid"
30840Sstevel@tonic-gate  *   in the NLM registry to state "nlm_state."
30850Sstevel@tonic-gate  *   Raises exception no_such_nlm if "nlmid" doesn't identify a known
30860Sstevel@tonic-gate  *   NLM server to this LLM.
30870Sstevel@tonic-gate  *   Note that when this routine is called with NLM_SHUTTING_DOWN there
30880Sstevel@tonic-gate  *   may be locks requests that have gotten started but not finished.  In
30890Sstevel@tonic-gate  *   particular, there may be blocking requests that are in the callback code
30900Sstevel@tonic-gate  *   before sleeping (so they're not holding the lock for the graph).  If
30910Sstevel@tonic-gate  *   such a thread reacquires the graph's lock (to go to sleep) after
30920Sstevel@tonic-gate  *   NLM state in the NLM registry  is set to a non-up value,
30930Sstevel@tonic-gate  *   it will notice the status and bail out.  If the request gets
30940Sstevel@tonic-gate  *   granted before the thread can check the NLM registry, let it
30950Sstevel@tonic-gate  *   continue normally.  It will get flushed when we are called with NLM_DOWN.
30960Sstevel@tonic-gate  *
30970Sstevel@tonic-gate  * Modifies: nlm_reg_obj (global)
30980Sstevel@tonic-gate  * Arguments:
30990Sstevel@tonic-gate  *    nlmid	(IN):    id uniquely identifying an NLM server
31000Sstevel@tonic-gate  *    nlm_state (IN):    NLM server state to change "nlmid" to
31010Sstevel@tonic-gate  */
31020Sstevel@tonic-gate void
cl_flk_set_nlm_status(int nlmid,flk_nlm_status_t nlm_state)31030Sstevel@tonic-gate cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state)
31040Sstevel@tonic-gate {
31050Sstevel@tonic-gate 	/*
31060Sstevel@tonic-gate 	 * Check to see if node is booted as a cluster. If not, return.
31070Sstevel@tonic-gate 	 */
31080Sstevel@tonic-gate 	if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
31090Sstevel@tonic-gate 		return;
31100Sstevel@tonic-gate 	}
31110Sstevel@tonic-gate 
31120Sstevel@tonic-gate 	/*
31130Sstevel@tonic-gate 	 * Check for development/debugging.  It is possible to boot a node
31140Sstevel@tonic-gate 	 * in non-cluster mode, and then run a special script, currently
31150Sstevel@tonic-gate 	 * available only to developers, to bring up the node as part of a
31160Sstevel@tonic-gate 	 * cluster.  The problem is that running such a script does not
31170Sstevel@tonic-gate 	 * result in the routine flk_init() being called and hence global array
31180Sstevel@tonic-gate 	 * nlm_reg_status is NULL.  The NLM thinks it's in cluster mode,
31190Sstevel@tonic-gate 	 * but the LLM needs to do an additional check to see if the global
31200Sstevel@tonic-gate 	 * array has been created or not. If nlm_reg_status is NULL, then
31210Sstevel@tonic-gate 	 * return, else continue.
31220Sstevel@tonic-gate 	 */
31230Sstevel@tonic-gate 	if (nlm_reg_status == NULL) {
31240Sstevel@tonic-gate 		return;
31250Sstevel@tonic-gate 	}
31260Sstevel@tonic-gate 
31270Sstevel@tonic-gate 	ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
31280Sstevel@tonic-gate 	mutex_enter(&nlm_reg_lock);
31290Sstevel@tonic-gate 
31300Sstevel@tonic-gate 	if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) {
31310Sstevel@tonic-gate 		/*
31320Sstevel@tonic-gate 		 * If the NLM server "nlmid" is unknown in the NLM registry,
31330Sstevel@tonic-gate 		 * add it to the registry in the nlm shutting down state.
31340Sstevel@tonic-gate 		 */
31350Sstevel@tonic-gate 		FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
31360Sstevel@tonic-gate 			FLK_NLM_SHUTTING_DOWN);
31370Sstevel@tonic-gate 	} else {
31380Sstevel@tonic-gate 		/*
31390Sstevel@tonic-gate 		 * Change the state of the NLM server identified by "nlmid"
31400Sstevel@tonic-gate 		 * in the NLM registry to the argument "nlm_state."
31410Sstevel@tonic-gate 		 */
31420Sstevel@tonic-gate 		FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
31430Sstevel@tonic-gate 			nlm_state);
31440Sstevel@tonic-gate 	}
31450Sstevel@tonic-gate 
31460Sstevel@tonic-gate 	/*
31470Sstevel@tonic-gate 	 *  The reason we must register the NLM server that is shutting down
31480Sstevel@tonic-gate 	 *  with an LLM that doesn't already know about it (never sent a lock
31490Sstevel@tonic-gate 	 *  request) is to handle correctly a race between shutdown and a new
31500Sstevel@tonic-gate 	 *  lock request.  Suppose that a shutdown request from the NLM server
31510Sstevel@tonic-gate 	 *  invokes this routine at the LLM, and a thread is spawned to
31520Sstevel@tonic-gate 	 *  service the request. Now suppose a new lock request is in
31530Sstevel@tonic-gate 	 *  progress and has already passed the first line of defense in
31540Sstevel@tonic-gate 	 *  reclock(), which denies new locks requests from NLM servers
31550Sstevel@tonic-gate 	 *  that are not in the NLM_UP state.  After the current routine
31560Sstevel@tonic-gate 	 *  is invoked for both phases of shutdown, the routine will return,
31570Sstevel@tonic-gate 	 *  having done nothing, and the lock request will proceed and
31580Sstevel@tonic-gate 	 *  probably be granted.  The problem is that the shutdown was ignored
31590Sstevel@tonic-gate 	 *  by the lock request because there was no record of that NLM server
31600Sstevel@tonic-gate 	 *  shutting down.   We will be in the peculiar position of thinking
31610Sstevel@tonic-gate 	 *  that we've shutdown the NLM server and all locks at all LLMs have
31620Sstevel@tonic-gate 	 *  been discarded, but in fact there's still one lock held.
31630Sstevel@tonic-gate 	 *  The solution is to record the existence of NLM server and change
31640Sstevel@tonic-gate 	 *  its state immediately to NLM_SHUTTING_DOWN.  The lock request in
31650Sstevel@tonic-gate 	 *  progress may proceed because the next phase NLM_DOWN will catch
31660Sstevel@tonic-gate 	 *  this lock and discard it.
31670Sstevel@tonic-gate 	 */
31680Sstevel@tonic-gate 	mutex_exit(&nlm_reg_lock);
31690Sstevel@tonic-gate 
31700Sstevel@tonic-gate 	switch (nlm_state) {
31710Sstevel@tonic-gate 	case FLK_NLM_UP:
31720Sstevel@tonic-gate 		/*
31730Sstevel@tonic-gate 		 * Change the NLM state of all locks still held on behalf of
31740Sstevel@tonic-gate 		 * the NLM server identified by "nlmid" to NLM_UP.
31750Sstevel@tonic-gate 		 */
31760Sstevel@tonic-gate 		cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP);
31770Sstevel@tonic-gate 		break;
31780Sstevel@tonic-gate 
31790Sstevel@tonic-gate 	case FLK_NLM_SHUTTING_DOWN:
31800Sstevel@tonic-gate 		/*
31810Sstevel@tonic-gate 		 * Wake up all sleeping locks for the NLM server identified
31820Sstevel@tonic-gate 		 * by "nlmid." Note that eventually all woken threads will
31830Sstevel@tonic-gate 		 * have their lock requests cancelled and descriptors
31840Sstevel@tonic-gate 		 * removed from the sleeping lock list.  Note that the NLM
31850Sstevel@tonic-gate 		 * server state associated with each lock descriptor is
31860Sstevel@tonic-gate 		 * changed to FLK_NLM_SHUTTING_DOWN.
31870Sstevel@tonic-gate 		 */
31880Sstevel@tonic-gate 		cl_flk_wakeup_sleeping_nlm_locks(nlmid);
31890Sstevel@tonic-gate 		break;
31900Sstevel@tonic-gate 
31910Sstevel@tonic-gate 	case FLK_NLM_DOWN:
31920Sstevel@tonic-gate 		/*
31930Sstevel@tonic-gate 		 * Discard all active, granted locks for this NLM server
31940Sstevel@tonic-gate 		 * identified by "nlmid."
31950Sstevel@tonic-gate 		 */
31960Sstevel@tonic-gate 		cl_flk_unlock_nlm_granted(nlmid);
31970Sstevel@tonic-gate 		break;
31980Sstevel@tonic-gate 
31990Sstevel@tonic-gate 	default:
32000Sstevel@tonic-gate 		panic("cl_set_nlm_status: bad status (%d)", nlm_state);
32010Sstevel@tonic-gate 	}
32020Sstevel@tonic-gate }
32030Sstevel@tonic-gate 
32040Sstevel@tonic-gate /*
32050Sstevel@tonic-gate  * Set the control status for lock manager requests.
32060Sstevel@tonic-gate  *
32070Sstevel@tonic-gate  * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there
32080Sstevel@tonic-gate  * may be locks requests that have gotten started but not finished.  In
32090Sstevel@tonic-gate  * particular, there may be blocking requests that are in the callback code
32100Sstevel@tonic-gate  * before sleeping (so they're not holding the lock for the graph).  If
32110Sstevel@tonic-gate  * such a thread reacquires the graph's lock (to go to sleep) after
32120Sstevel@tonic-gate  * flk_lockmgr_status is set to a non-up value, it will notice the status
32130Sstevel@tonic-gate  * and bail out.  If the request gets granted before the thread can check
32140Sstevel@tonic-gate  * flk_lockmgr_status, let it continue normally.  It will get flushed when
32150Sstevel@tonic-gate  * we are called with FLK_LOCKMGR_DOWN.
32160Sstevel@tonic-gate  */
32170Sstevel@tonic-gate 
32180Sstevel@tonic-gate void
flk_set_lockmgr_status(flk_lockmgr_status_t status)32190Sstevel@tonic-gate flk_set_lockmgr_status(flk_lockmgr_status_t status)
32200Sstevel@tonic-gate {
32210Sstevel@tonic-gate 	int i;
32220Sstevel@tonic-gate 	graph_t *gp;
32230Sstevel@tonic-gate 	struct flock_globals *fg;
32240Sstevel@tonic-gate 
32250Sstevel@tonic-gate 	fg = flk_get_globals();
32260Sstevel@tonic-gate 	ASSERT(fg != NULL);
32270Sstevel@tonic-gate 
32280Sstevel@tonic-gate 	mutex_enter(&flock_lock);
32290Sstevel@tonic-gate 	fg->flk_lockmgr_status = status;
32300Sstevel@tonic-gate 	mutex_exit(&flock_lock);
32310Sstevel@tonic-gate 
32320Sstevel@tonic-gate 	/*
32330Sstevel@tonic-gate 	 * If the lock manager is coming back up, all that's needed is to
32340Sstevel@tonic-gate 	 * propagate this information to the graphs.  If the lock manager
32350Sstevel@tonic-gate 	 * is going down, additional action is required, and each graph's
32360Sstevel@tonic-gate 	 * copy of the state is updated atomically with this other action.
32370Sstevel@tonic-gate 	 */
32380Sstevel@tonic-gate 	switch (status) {
32390Sstevel@tonic-gate 	case FLK_LOCKMGR_UP:
32400Sstevel@tonic-gate 		for (i = 0; i < HASH_SIZE; i++) {
32410Sstevel@tonic-gate 			mutex_enter(&flock_lock);
32420Sstevel@tonic-gate 			gp = lock_graph[i];
32430Sstevel@tonic-gate 			mutex_exit(&flock_lock);
32440Sstevel@tonic-gate 			if (gp == NULL)
32450Sstevel@tonic-gate 				continue;
32460Sstevel@tonic-gate 			mutex_enter(&gp->gp_mutex);
32470Sstevel@tonic-gate 			fg->lockmgr_status[i] = status;
32480Sstevel@tonic-gate 			mutex_exit(&gp->gp_mutex);
32490Sstevel@tonic-gate 		}
32500Sstevel@tonic-gate 		break;
32510Sstevel@tonic-gate 	case FLK_WAKEUP_SLEEPERS:
32520Sstevel@tonic-gate 		wakeup_sleeping_lockmgr_locks(fg);
32530Sstevel@tonic-gate 		break;
32540Sstevel@tonic-gate 	case FLK_LOCKMGR_DOWN:
32550Sstevel@tonic-gate 		unlock_lockmgr_granted(fg);
32560Sstevel@tonic-gate 		break;
32570Sstevel@tonic-gate 	default:
32580Sstevel@tonic-gate 		panic("flk_set_lockmgr_status: bad status (%d)", status);
32590Sstevel@tonic-gate 		break;
32600Sstevel@tonic-gate 	}
32610Sstevel@tonic-gate }
32620Sstevel@tonic-gate 
32630Sstevel@tonic-gate /*
32640Sstevel@tonic-gate  * This routine returns all the locks that are active or sleeping and are
32650Sstevel@tonic-gate  * associated with a particular set of identifiers.  If lock_state != 0, then
32660Sstevel@tonic-gate  * only locks that match the lock_state are returned. If lock_state == 0, then
32670Sstevel@tonic-gate  * all locks are returned. If pid == NOPID, the pid is ignored.  If
32680Sstevel@tonic-gate  * use_sysid is FALSE, then the sysid is ignored.  If vp is NULL, then the
32690Sstevel@tonic-gate  * vnode pointer is ignored.
32700Sstevel@tonic-gate  *
32710Sstevel@tonic-gate  * A list containing the vnode pointer and an flock structure
32720Sstevel@tonic-gate  * describing the lock is returned.  Each element in the list is
3273*5331Samw  * dynamically allocated and must be freed by the caller.  The
32740Sstevel@tonic-gate  * last item in the list is denoted by a NULL value in the ll_next
32750Sstevel@tonic-gate  * field.
32760Sstevel@tonic-gate  *
32770Sstevel@tonic-gate  * The vnode pointers returned are held.  The caller is responsible
32780Sstevel@tonic-gate  * for releasing these.  Note that the returned list is only a snapshot of
32790Sstevel@tonic-gate  * the current lock information, and that it is a snapshot of a moving
32800Sstevel@tonic-gate  * target (only one graph is locked at a time).
32810Sstevel@tonic-gate  */
32820Sstevel@tonic-gate 
32830Sstevel@tonic-gate locklist_t *
get_lock_list(int list_type,int lock_state,int sysid,boolean_t use_sysid,pid_t pid,const vnode_t * vp,zoneid_t zoneid)32840Sstevel@tonic-gate get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid,
32850Sstevel@tonic-gate 		pid_t pid, const vnode_t *vp, zoneid_t zoneid)
32860Sstevel@tonic-gate {
32870Sstevel@tonic-gate 	lock_descriptor_t	*lock;
32880Sstevel@tonic-gate 	lock_descriptor_t	*graph_head;
32890Sstevel@tonic-gate 	locklist_t		listhead;
32900Sstevel@tonic-gate 	locklist_t		*llheadp;
32910Sstevel@tonic-gate 	locklist_t		*llp;
32920Sstevel@tonic-gate 	locklist_t		*lltp;
32930Sstevel@tonic-gate 	graph_t			*gp;
32940Sstevel@tonic-gate 	int			i;
32950Sstevel@tonic-gate 	int			first_index; /* graph index */
32960Sstevel@tonic-gate 	int			num_indexes; /* graph index */
32970Sstevel@tonic-gate 
32980Sstevel@tonic-gate 	ASSERT((list_type == FLK_ACTIVE_STATE) ||
32990Sstevel@tonic-gate 	    (list_type == FLK_SLEEPING_STATE));
33000Sstevel@tonic-gate 
33010Sstevel@tonic-gate 	/*
33020Sstevel@tonic-gate 	 * Get a pointer to something to use as a list head while building
33030Sstevel@tonic-gate 	 * the rest of the list.
33040Sstevel@tonic-gate 	 */
33050Sstevel@tonic-gate 	llheadp = &listhead;
33060Sstevel@tonic-gate 	lltp = llheadp;
33070Sstevel@tonic-gate 	llheadp->ll_next = (locklist_t *)NULL;
33080Sstevel@tonic-gate 
33090Sstevel@tonic-gate 	/* Figure out which graphs we want to look at. */
33100Sstevel@tonic-gate 	if (vp == NULL) {
33110Sstevel@tonic-gate 		first_index = 0;
33120Sstevel@tonic-gate 		num_indexes = HASH_SIZE;
33130Sstevel@tonic-gate 	} else {
33140Sstevel@tonic-gate 		first_index = HASH_INDEX(vp);
33150Sstevel@tonic-gate 		num_indexes = 1;
33160Sstevel@tonic-gate 	}
33170Sstevel@tonic-gate 
33180Sstevel@tonic-gate 	for (i = first_index; i < first_index + num_indexes; i++) {
33190Sstevel@tonic-gate 		mutex_enter(&flock_lock);
33200Sstevel@tonic-gate 		gp = lock_graph[i];
33210Sstevel@tonic-gate 		mutex_exit(&flock_lock);
33220Sstevel@tonic-gate 		if (gp == NULL) {
33230Sstevel@tonic-gate 			continue;
33240Sstevel@tonic-gate 		}
33250Sstevel@tonic-gate 
33260Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
33270Sstevel@tonic-gate 		graph_head = (list_type == FLK_ACTIVE_STATE) ?
33280Sstevel@tonic-gate 			ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp);
33290Sstevel@tonic-gate 		for (lock = graph_head->l_next;
33300Sstevel@tonic-gate 		    lock != graph_head;
33310Sstevel@tonic-gate 		    lock = lock->l_next) {
33320Sstevel@tonic-gate 			if (use_sysid && lock->l_flock.l_sysid != sysid)
33330Sstevel@tonic-gate 				continue;
33340Sstevel@tonic-gate 			if (pid != NOPID && lock->l_flock.l_pid != pid)
33350Sstevel@tonic-gate 				continue;
33360Sstevel@tonic-gate 			if (vp != NULL && lock->l_vnode != vp)
33370Sstevel@tonic-gate 				continue;
33380Sstevel@tonic-gate 			if (lock_state && !(lock_state & lock->l_state))
33390Sstevel@tonic-gate 				continue;
33400Sstevel@tonic-gate 			if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES)
33410Sstevel@tonic-gate 				continue;
33420Sstevel@tonic-gate 			/*
33430Sstevel@tonic-gate 			 * A matching lock was found.  Allocate
33440Sstevel@tonic-gate 			 * space for a new locklist entry and fill
33450Sstevel@tonic-gate 			 * it in.
33460Sstevel@tonic-gate 			 */
33470Sstevel@tonic-gate 			llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP);
33480Sstevel@tonic-gate 			lltp->ll_next = llp;
33490Sstevel@tonic-gate 			VN_HOLD(lock->l_vnode);
33500Sstevel@tonic-gate 			llp->ll_vp = lock->l_vnode;
33510Sstevel@tonic-gate 			create_flock(lock, &(llp->ll_flock));
33520Sstevel@tonic-gate 			llp->ll_next = (locklist_t *)NULL;
33530Sstevel@tonic-gate 			lltp = llp;
33540Sstevel@tonic-gate 		}
33550Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
33560Sstevel@tonic-gate 	}
33570Sstevel@tonic-gate 
33580Sstevel@tonic-gate 	llp = llheadp->ll_next;
33590Sstevel@tonic-gate 	return (llp);
33600Sstevel@tonic-gate }
33610Sstevel@tonic-gate 
33620Sstevel@tonic-gate /*
33630Sstevel@tonic-gate  * These two functions are simply interfaces to get_lock_list.  They return
33640Sstevel@tonic-gate  * a list of sleeping or active locks for the given sysid and pid.  See
33650Sstevel@tonic-gate  * get_lock_list for details.
33660Sstevel@tonic-gate  *
33670Sstevel@tonic-gate  * In either case we don't particularly care to specify the zone of interest;
33680Sstevel@tonic-gate  * the sysid-space is global across zones, so the sysid will map to exactly one
33690Sstevel@tonic-gate  * zone, and we'll return information for that zone.
33700Sstevel@tonic-gate  */
33710Sstevel@tonic-gate 
33720Sstevel@tonic-gate locklist_t *
flk_get_sleeping_locks(int sysid,pid_t pid)33730Sstevel@tonic-gate flk_get_sleeping_locks(int sysid, pid_t pid)
33740Sstevel@tonic-gate {
33750Sstevel@tonic-gate 	return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL,
33760Sstevel@tonic-gate 		    ALL_ZONES));
33770Sstevel@tonic-gate }
33780Sstevel@tonic-gate 
33790Sstevel@tonic-gate locklist_t *
flk_get_active_locks(int sysid,pid_t pid)33800Sstevel@tonic-gate flk_get_active_locks(int sysid, pid_t pid)
33810Sstevel@tonic-gate {
33820Sstevel@tonic-gate 	return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL,
33830Sstevel@tonic-gate 		    ALL_ZONES));
33840Sstevel@tonic-gate }
33850Sstevel@tonic-gate 
33860Sstevel@tonic-gate /*
33870Sstevel@tonic-gate  * Another interface to get_lock_list.  This one returns all the active
33880Sstevel@tonic-gate  * locks for a given vnode.  Again, see get_lock_list for details.
33890Sstevel@tonic-gate  *
33900Sstevel@tonic-gate  * We don't need to specify which zone's locks we're interested in.  The matter
33910Sstevel@tonic-gate  * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't
33920Sstevel@tonic-gate  * be used by multiple zones, so the list of locks will all be from the right
33930Sstevel@tonic-gate  * zone.
33940Sstevel@tonic-gate  */
33950Sstevel@tonic-gate 
33960Sstevel@tonic-gate locklist_t *
flk_active_locks_for_vp(const vnode_t * vp)33970Sstevel@tonic-gate flk_active_locks_for_vp(const vnode_t *vp)
33980Sstevel@tonic-gate {
33990Sstevel@tonic-gate 	return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp,
34000Sstevel@tonic-gate 		    ALL_ZONES));
34010Sstevel@tonic-gate }
34020Sstevel@tonic-gate 
34030Sstevel@tonic-gate /*
34040Sstevel@tonic-gate  * Another interface to get_lock_list.  This one returns all the active
34050Sstevel@tonic-gate  * nbmand locks for a given vnode.  Again, see get_lock_list for details.
34060Sstevel@tonic-gate  *
34070Sstevel@tonic-gate  * See the comment for flk_active_locks_for_vp() for why we don't care to
34080Sstevel@tonic-gate  * specify the particular zone of interest.
34090Sstevel@tonic-gate  */
34100Sstevel@tonic-gate locklist_t *
flk_active_nbmand_locks_for_vp(const vnode_t * vp)34110Sstevel@tonic-gate flk_active_nbmand_locks_for_vp(const vnode_t *vp)
34120Sstevel@tonic-gate {
34130Sstevel@tonic-gate 	return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
34140Sstevel@tonic-gate 				NOPID, vp, ALL_ZONES));
34150Sstevel@tonic-gate }
34160Sstevel@tonic-gate 
34170Sstevel@tonic-gate /*
34180Sstevel@tonic-gate  * Another interface to get_lock_list.  This one returns all the active
34190Sstevel@tonic-gate  * nbmand locks for a given pid.  Again, see get_lock_list for details.
34200Sstevel@tonic-gate  *
34210Sstevel@tonic-gate  * The zone doesn't need to be specified here; the locks held by a
34220Sstevel@tonic-gate  * particular process will either be local (ie, non-NFS) or from the zone
34230Sstevel@tonic-gate  * the process is executing in.  This is because other parts of the system
34240Sstevel@tonic-gate  * ensure that an NFS vnode can't be used in a zone other than that in
34250Sstevel@tonic-gate  * which it was opened.
34260Sstevel@tonic-gate  */
34270Sstevel@tonic-gate locklist_t *
flk_active_nbmand_locks(pid_t pid)34280Sstevel@tonic-gate flk_active_nbmand_locks(pid_t pid)
34290Sstevel@tonic-gate {
34300Sstevel@tonic-gate 	return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
34310Sstevel@tonic-gate 				pid, NULL, ALL_ZONES));
34320Sstevel@tonic-gate }
34330Sstevel@tonic-gate 
34340Sstevel@tonic-gate /*
34350Sstevel@tonic-gate  * Free up all entries in the locklist.
34360Sstevel@tonic-gate  */
34370Sstevel@tonic-gate void
flk_free_locklist(locklist_t * llp)34380Sstevel@tonic-gate flk_free_locklist(locklist_t *llp)
34390Sstevel@tonic-gate {
34400Sstevel@tonic-gate 	locklist_t *next_llp;
34410Sstevel@tonic-gate 
34420Sstevel@tonic-gate 	while (llp) {
34430Sstevel@tonic-gate 		next_llp = llp->ll_next;
34440Sstevel@tonic-gate 		VN_RELE(llp->ll_vp);
34450Sstevel@tonic-gate 		kmem_free(llp, sizeof (*llp));
34460Sstevel@tonic-gate 		llp = next_llp;
34470Sstevel@tonic-gate 	}
34480Sstevel@tonic-gate }
34490Sstevel@tonic-gate 
34500Sstevel@tonic-gate static void
cl_flk_change_nlm_state_all_locks(int nlmid,flk_nlm_status_t nlm_state)34510Sstevel@tonic-gate cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state)
34520Sstevel@tonic-gate {
34530Sstevel@tonic-gate 	/*
34540Sstevel@tonic-gate 	 * For each graph "lg" in the hash table lock_graph do
34550Sstevel@tonic-gate 	 * a.  Get the list of sleeping locks
34560Sstevel@tonic-gate 	 * b.  For each lock descriptor in the list do
34570Sstevel@tonic-gate 	 *	i.   If the requested lock is an NLM server request AND
34580Sstevel@tonic-gate 	 *		the nlmid is the same as the routine argument then
34590Sstevel@tonic-gate 	 *		change the lock descriptor's state field to
34600Sstevel@tonic-gate 	 *		"nlm_state."
34610Sstevel@tonic-gate 	 * c.  Get the list of active locks
34620Sstevel@tonic-gate 	 * d.  For each lock descriptor in the list do
34630Sstevel@tonic-gate 	 *	i.   If the requested lock is an NLM server request AND
34640Sstevel@tonic-gate 	 *		the nlmid is the same as the routine argument then
34650Sstevel@tonic-gate 	 *		change the lock descriptor's state field to
34660Sstevel@tonic-gate 	 *		"nlm_state."
34670Sstevel@tonic-gate 	 */
34680Sstevel@tonic-gate 
34690Sstevel@tonic-gate 	int			i;
34700Sstevel@tonic-gate 	graph_t			*gp;			/* lock graph */
34710Sstevel@tonic-gate 	lock_descriptor_t	*lock;			/* lock */
34720Sstevel@tonic-gate 	lock_descriptor_t	*nlock = NULL;		/* next lock */
34730Sstevel@tonic-gate 	int			lock_nlmid;
34740Sstevel@tonic-gate 
34750Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
34760Sstevel@tonic-gate 		mutex_enter(&flock_lock);
34770Sstevel@tonic-gate 		gp = lock_graph[i];
34780Sstevel@tonic-gate 		mutex_exit(&flock_lock);
34790Sstevel@tonic-gate 		if (gp == NULL) {
34800Sstevel@tonic-gate 			continue;
34810Sstevel@tonic-gate 		}
34820Sstevel@tonic-gate 
34830Sstevel@tonic-gate 		/* Get list of sleeping locks in current lock graph. */
34840Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
34850Sstevel@tonic-gate 		for (lock = SLEEPING_HEAD(gp)->l_next;
34860Sstevel@tonic-gate 		    lock != SLEEPING_HEAD(gp);
34870Sstevel@tonic-gate 		    lock = nlock) {
34880Sstevel@tonic-gate 			nlock = lock->l_next;
34890Sstevel@tonic-gate 			/* get NLM id */
34900Sstevel@tonic-gate 			lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
34910Sstevel@tonic-gate 
34920Sstevel@tonic-gate 			/*
34930Sstevel@tonic-gate 			 * If NLM server request AND nlmid of lock matches
34940Sstevel@tonic-gate 			 * nlmid of argument, then set the NLM state of the
34950Sstevel@tonic-gate 			 * lock to "nlm_state."
34960Sstevel@tonic-gate 			 */
34970Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
34980Sstevel@tonic-gate 				SET_NLM_STATE(lock, nlm_state);
34990Sstevel@tonic-gate 			}
35000Sstevel@tonic-gate 		}
35010Sstevel@tonic-gate 
35020Sstevel@tonic-gate 		/* Get list of active locks in current lock graph. */
35030Sstevel@tonic-gate 		for (lock = ACTIVE_HEAD(gp)->l_next;
35040Sstevel@tonic-gate 		    lock != ACTIVE_HEAD(gp);
35050Sstevel@tonic-gate 		    lock = nlock) {
35060Sstevel@tonic-gate 			nlock = lock->l_next;
35070Sstevel@tonic-gate 			/* get NLM id */
35080Sstevel@tonic-gate 			lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
35090Sstevel@tonic-gate 
35100Sstevel@tonic-gate 			/*
35110Sstevel@tonic-gate 			 * If NLM server request AND nlmid of lock matches
35120Sstevel@tonic-gate 			 * nlmid of argument, then set the NLM state of the
35130Sstevel@tonic-gate 			 * lock to "nlm_state."
35140Sstevel@tonic-gate 			 */
35150Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
35160Sstevel@tonic-gate 				ASSERT(IS_ACTIVE(lock));
35170Sstevel@tonic-gate 				SET_NLM_STATE(lock, nlm_state);
35180Sstevel@tonic-gate 			}
35190Sstevel@tonic-gate 		}
35200Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
35210Sstevel@tonic-gate 	}
35220Sstevel@tonic-gate }
35230Sstevel@tonic-gate 
35240Sstevel@tonic-gate /*
35250Sstevel@tonic-gate  * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid().
35260Sstevel@tonic-gate  * Effects: Find all sleeping lock manager requests _only_ for the NLM server
35270Sstevel@tonic-gate  *   identified by "nlmid." Poke those lock requests.
35280Sstevel@tonic-gate  */
35290Sstevel@tonic-gate static void
cl_flk_wakeup_sleeping_nlm_locks(int nlmid)35300Sstevel@tonic-gate cl_flk_wakeup_sleeping_nlm_locks(int nlmid)
35310Sstevel@tonic-gate {
35320Sstevel@tonic-gate 	lock_descriptor_t *lock;
35330Sstevel@tonic-gate 	lock_descriptor_t *nlock = NULL; /* next lock */
35340Sstevel@tonic-gate 	int i;
35350Sstevel@tonic-gate 	graph_t *gp;
35360Sstevel@tonic-gate 	int	lock_nlmid;
35370Sstevel@tonic-gate 
35380Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
35390Sstevel@tonic-gate 		mutex_enter(&flock_lock);
35400Sstevel@tonic-gate 		gp = lock_graph[i];
35410Sstevel@tonic-gate 		mutex_exit(&flock_lock);
35420Sstevel@tonic-gate 		if (gp == NULL) {
35430Sstevel@tonic-gate 			continue;
35440Sstevel@tonic-gate 		}
35450Sstevel@tonic-gate 
35460Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
35470Sstevel@tonic-gate 		for (lock = SLEEPING_HEAD(gp)->l_next;
35480Sstevel@tonic-gate 		    lock != SLEEPING_HEAD(gp);
35490Sstevel@tonic-gate 		    lock = nlock) {
35500Sstevel@tonic-gate 			nlock = lock->l_next;
35510Sstevel@tonic-gate 			/*
35520Sstevel@tonic-gate 			 * If NLM server request _and_ nlmid of lock matches
35530Sstevel@tonic-gate 			 * nlmid of argument, then set the NLM state of the
35540Sstevel@tonic-gate 			 * lock to NLM_SHUTTING_DOWN, and wake up sleeping
35550Sstevel@tonic-gate 			 * request.
35560Sstevel@tonic-gate 			 */
35570Sstevel@tonic-gate 			if (IS_LOCKMGR(lock)) {
35580Sstevel@tonic-gate 				/* get NLM id */
35590Sstevel@tonic-gate 				lock_nlmid =
35600Sstevel@tonic-gate 					GETNLMID(lock->l_flock.l_sysid);
35610Sstevel@tonic-gate 				if (nlmid == lock_nlmid) {
35620Sstevel@tonic-gate 					SET_NLM_STATE(lock,
35630Sstevel@tonic-gate 						FLK_NLM_SHUTTING_DOWN);
35640Sstevel@tonic-gate 					INTERRUPT_WAKEUP(lock);
35650Sstevel@tonic-gate 				}
35660Sstevel@tonic-gate 			}
35670Sstevel@tonic-gate 		}
35680Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
35690Sstevel@tonic-gate 	}
35700Sstevel@tonic-gate }
35710Sstevel@tonic-gate 
35720Sstevel@tonic-gate /*
35730Sstevel@tonic-gate  * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid()
35740Sstevel@tonic-gate  * Effects:  Find all active (granted) lock manager locks _only_ for the
35750Sstevel@tonic-gate  *   NLM server identified by "nlmid" and release them.
35760Sstevel@tonic-gate  */
35770Sstevel@tonic-gate static void
cl_flk_unlock_nlm_granted(int nlmid)35780Sstevel@tonic-gate cl_flk_unlock_nlm_granted(int nlmid)
35790Sstevel@tonic-gate {
35800Sstevel@tonic-gate 	lock_descriptor_t *lock;
35810Sstevel@tonic-gate 	lock_descriptor_t *nlock = NULL; /* next lock */
35820Sstevel@tonic-gate 	int i;
35830Sstevel@tonic-gate 	graph_t *gp;
35840Sstevel@tonic-gate 	int	lock_nlmid;
35850Sstevel@tonic-gate 
35860Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
35870Sstevel@tonic-gate 		mutex_enter(&flock_lock);
35880Sstevel@tonic-gate 		gp = lock_graph[i];
35890Sstevel@tonic-gate 		mutex_exit(&flock_lock);
35900Sstevel@tonic-gate 		if (gp == NULL) {
35910Sstevel@tonic-gate 			continue;
35920Sstevel@tonic-gate 		}
35930Sstevel@tonic-gate 
35940Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
35950Sstevel@tonic-gate 		for (lock = ACTIVE_HEAD(gp)->l_next;
35960Sstevel@tonic-gate 		    lock != ACTIVE_HEAD(gp);
35970Sstevel@tonic-gate 		    lock = nlock) {
35980Sstevel@tonic-gate 			nlock = lock->l_next;
35990Sstevel@tonic-gate 			ASSERT(IS_ACTIVE(lock));
36000Sstevel@tonic-gate 
36010Sstevel@tonic-gate 			/*
36020Sstevel@tonic-gate 			 * If it's an  NLM server request _and_ nlmid of
36030Sstevel@tonic-gate 			 * the lock matches nlmid of argument, then
36040Sstevel@tonic-gate 			 * remove the active lock the list, wakup blocked
36050Sstevel@tonic-gate 			 * threads, and free the storage for the lock.
36060Sstevel@tonic-gate 			 * Note that there's no need to mark the NLM state
36070Sstevel@tonic-gate 			 * of this lock to NLM_DOWN because the lock will
36080Sstevel@tonic-gate 			 * be deleted anyway and its storage freed.
36090Sstevel@tonic-gate 			 */
36100Sstevel@tonic-gate 			if (IS_LOCKMGR(lock)) {
36110Sstevel@tonic-gate 				/* get NLM id */
36120Sstevel@tonic-gate 				lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
36130Sstevel@tonic-gate 				if (nlmid == lock_nlmid) {
36140Sstevel@tonic-gate 					flk_delete_active_lock(lock, 0);
36150Sstevel@tonic-gate 					flk_wakeup(lock, 1);
36160Sstevel@tonic-gate 					flk_free_lock(lock);
36170Sstevel@tonic-gate 				}
36180Sstevel@tonic-gate 			}
36190Sstevel@tonic-gate 		}
36200Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
36210Sstevel@tonic-gate 	}
36220Sstevel@tonic-gate }
36230Sstevel@tonic-gate 
36240Sstevel@tonic-gate /*
36250Sstevel@tonic-gate  * Find all sleeping lock manager requests and poke them.
36260Sstevel@tonic-gate  */
36270Sstevel@tonic-gate static void
wakeup_sleeping_lockmgr_locks(struct flock_globals * fg)36280Sstevel@tonic-gate wakeup_sleeping_lockmgr_locks(struct flock_globals *fg)
36290Sstevel@tonic-gate {
36300Sstevel@tonic-gate 	lock_descriptor_t *lock;
36310Sstevel@tonic-gate 	lock_descriptor_t *nlock = NULL; /* next lock */
36320Sstevel@tonic-gate 	int i;
36330Sstevel@tonic-gate 	graph_t *gp;
36340Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
36350Sstevel@tonic-gate 
36360Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
36370Sstevel@tonic-gate 		mutex_enter(&flock_lock);
36380Sstevel@tonic-gate 		gp = lock_graph[i];
36390Sstevel@tonic-gate 		mutex_exit(&flock_lock);
36400Sstevel@tonic-gate 		if (gp == NULL) {
36410Sstevel@tonic-gate 			continue;
36420Sstevel@tonic-gate 		}
36430Sstevel@tonic-gate 
36440Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
36450Sstevel@tonic-gate 		fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS;
36460Sstevel@tonic-gate 		for (lock = SLEEPING_HEAD(gp)->l_next;
36470Sstevel@tonic-gate 		    lock != SLEEPING_HEAD(gp);
36480Sstevel@tonic-gate 		    lock = nlock) {
36490Sstevel@tonic-gate 			nlock = lock->l_next;
36500Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
36510Sstevel@tonic-gate 				INTERRUPT_WAKEUP(lock);
36520Sstevel@tonic-gate 			}
36530Sstevel@tonic-gate 		}
36540Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
36550Sstevel@tonic-gate 	}
36560Sstevel@tonic-gate }
36570Sstevel@tonic-gate 
36580Sstevel@tonic-gate 
36590Sstevel@tonic-gate /*
36600Sstevel@tonic-gate  * Find all active (granted) lock manager locks and release them.
36610Sstevel@tonic-gate  */
36620Sstevel@tonic-gate static void
unlock_lockmgr_granted(struct flock_globals * fg)36630Sstevel@tonic-gate unlock_lockmgr_granted(struct flock_globals *fg)
36640Sstevel@tonic-gate {
36650Sstevel@tonic-gate 	lock_descriptor_t *lock;
36660Sstevel@tonic-gate 	lock_descriptor_t *nlock = NULL; /* next lock */
36670Sstevel@tonic-gate 	int i;
36680Sstevel@tonic-gate 	graph_t *gp;
36690Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
36700Sstevel@tonic-gate 
36710Sstevel@tonic-gate 	for (i = 0; i < HASH_SIZE; i++) {
36720Sstevel@tonic-gate 		mutex_enter(&flock_lock);
36730Sstevel@tonic-gate 		gp = lock_graph[i];
36740Sstevel@tonic-gate 		mutex_exit(&flock_lock);
36750Sstevel@tonic-gate 		if (gp == NULL) {
36760Sstevel@tonic-gate 			continue;
36770Sstevel@tonic-gate 		}
36780Sstevel@tonic-gate 
36790Sstevel@tonic-gate 		mutex_enter(&gp->gp_mutex);
36800Sstevel@tonic-gate 		fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN;
36810Sstevel@tonic-gate 		for (lock = ACTIVE_HEAD(gp)->l_next;
36820Sstevel@tonic-gate 		    lock != ACTIVE_HEAD(gp);
36830Sstevel@tonic-gate 		    lock = nlock) {
36840Sstevel@tonic-gate 			nlock = lock->l_next;
36850Sstevel@tonic-gate 			if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
36860Sstevel@tonic-gate 				ASSERT(IS_ACTIVE(lock));
36870Sstevel@tonic-gate 				flk_delete_active_lock(lock, 0);
36880Sstevel@tonic-gate 				flk_wakeup(lock, 1);
36890Sstevel@tonic-gate 				flk_free_lock(lock);
36900Sstevel@tonic-gate 			}
36910Sstevel@tonic-gate 		}
36920Sstevel@tonic-gate 		mutex_exit(&gp->gp_mutex);
36930Sstevel@tonic-gate 	}
36940Sstevel@tonic-gate }
36950Sstevel@tonic-gate /* ONC_PLUS EXTRACT END */
36960Sstevel@tonic-gate 
36970Sstevel@tonic-gate 
36980Sstevel@tonic-gate /*
36990Sstevel@tonic-gate  * Wait until a lock is granted, cancelled, or interrupted.
37000Sstevel@tonic-gate  */
37010Sstevel@tonic-gate 
37020Sstevel@tonic-gate static void
wait_for_lock(lock_descriptor_t * request)37030Sstevel@tonic-gate wait_for_lock(lock_descriptor_t *request)
37040Sstevel@tonic-gate {
37050Sstevel@tonic-gate 	graph_t *gp = request->l_graph;
37060Sstevel@tonic-gate 
37070Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&gp->gp_mutex));
37080Sstevel@tonic-gate 
37090Sstevel@tonic-gate 	while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) &&
37100Sstevel@tonic-gate 	    !(IS_INTERRUPTED(request))) {
37110Sstevel@tonic-gate 		if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) {
37120Sstevel@tonic-gate 			flk_set_state(request, FLK_INTERRUPTED_STATE);
37130Sstevel@tonic-gate 			request->l_state |= INTERRUPTED_LOCK;
37140Sstevel@tonic-gate 		}
37150Sstevel@tonic-gate 	}
37160Sstevel@tonic-gate }
37170Sstevel@tonic-gate 
37180Sstevel@tonic-gate /* ONC_PLUS EXTRACT START */
37190Sstevel@tonic-gate /*
37200Sstevel@tonic-gate  * Create an flock structure from the existing lock information
37210Sstevel@tonic-gate  *
37220Sstevel@tonic-gate  * This routine is used to create flock structures for the lock manager
3723*5331Samw  * to use in a reclaim request.  Since the lock was originated on this
37240Sstevel@tonic-gate  * host, it must be conforming to UNIX semantics, so no checking is
37250Sstevel@tonic-gate  * done to make sure it falls within the lower half of the 32-bit range.
37260Sstevel@tonic-gate  */
37270Sstevel@tonic-gate 
37280Sstevel@tonic-gate static void
create_flock(lock_descriptor_t * lp,flock64_t * flp)37290Sstevel@tonic-gate create_flock(lock_descriptor_t *lp, flock64_t *flp)
37300Sstevel@tonic-gate {
37310Sstevel@tonic-gate 	ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND);
37320Sstevel@tonic-gate 	ASSERT(lp->l_end >= lp->l_start);
37330Sstevel@tonic-gate 
37340Sstevel@tonic-gate 	flp->l_type = lp->l_type;
37350Sstevel@tonic-gate 	flp->l_whence = 0;
37360Sstevel@tonic-gate 	flp->l_start = lp->l_start;
37370Sstevel@tonic-gate 	flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 :
37380Sstevel@tonic-gate 		(lp->l_end - lp->l_start + 1);
37390Sstevel@tonic-gate 	flp->l_sysid = lp->l_flock.l_sysid;
37400Sstevel@tonic-gate 	flp->l_pid = lp->l_flock.l_pid;
37410Sstevel@tonic-gate }
37420Sstevel@tonic-gate 
37430Sstevel@tonic-gate /*
37440Sstevel@tonic-gate  * Convert flock_t data describing a lock range into unsigned long starting
37450Sstevel@tonic-gate  * and ending points, which are put into lock_request.  Returns 0 or an
37460Sstevel@tonic-gate  * errno value.
37470Sstevel@tonic-gate  * Large Files: max is passed by the caller and we return EOVERFLOW
37480Sstevel@tonic-gate  * as defined by LFS API.
37490Sstevel@tonic-gate  */
37500Sstevel@tonic-gate 
37510Sstevel@tonic-gate int
flk_convert_lock_data(vnode_t * vp,flock64_t * flp,u_offset_t * start,u_offset_t * end,offset_t offset)37520Sstevel@tonic-gate flk_convert_lock_data(vnode_t *vp, flock64_t *flp,
37530Sstevel@tonic-gate     u_offset_t *start, u_offset_t *end, offset_t offset)
37540Sstevel@tonic-gate {
37550Sstevel@tonic-gate 	struct vattr	vattr;
37560Sstevel@tonic-gate 	int	error;
37570Sstevel@tonic-gate 
37580Sstevel@tonic-gate 	/*
37590Sstevel@tonic-gate 	 * Determine the starting point of the request
37600Sstevel@tonic-gate 	 */
37610Sstevel@tonic-gate 	switch (flp->l_whence) {
37620Sstevel@tonic-gate 	case 0:		/* SEEK_SET */
37630Sstevel@tonic-gate 		*start = (u_offset_t)flp->l_start;
37640Sstevel@tonic-gate 		break;
37650Sstevel@tonic-gate 	case 1:		/* SEEK_CUR */
37660Sstevel@tonic-gate 		*start = (u_offset_t)(flp->l_start + offset);
37670Sstevel@tonic-gate 		break;
37680Sstevel@tonic-gate 	case 2:		/* SEEK_END */
37690Sstevel@tonic-gate 		vattr.va_mask = AT_SIZE;
3770*5331Samw 		if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
37710Sstevel@tonic-gate 			return (error);
37720Sstevel@tonic-gate 		*start = (u_offset_t)(flp->l_start + vattr.va_size);
37730Sstevel@tonic-gate 		break;
37740Sstevel@tonic-gate 	default:
37750Sstevel@tonic-gate 		return (EINVAL);
37760Sstevel@tonic-gate 	}
37770Sstevel@tonic-gate 
37780Sstevel@tonic-gate 	/*
37790Sstevel@tonic-gate 	 * Determine the range covered by the request.
37800Sstevel@tonic-gate 	 */
37810Sstevel@tonic-gate 	if (flp->l_len == 0)
37820Sstevel@tonic-gate 		*end = MAX_U_OFFSET_T;
37830Sstevel@tonic-gate 	else if ((offset_t)flp->l_len > 0) {
37840Sstevel@tonic-gate 		*end = (u_offset_t)(*start + (flp->l_len - 1));
37850Sstevel@tonic-gate 	} else {
37860Sstevel@tonic-gate 		/*
37870Sstevel@tonic-gate 		 * Negative length; why do we even allow this ?
37880Sstevel@tonic-gate 		 * Because this allows easy specification of
37890Sstevel@tonic-gate 		 * the last n bytes of the file.
37900Sstevel@tonic-gate 		 */
37910Sstevel@tonic-gate 		*end = *start;
37920Sstevel@tonic-gate 		*start += (u_offset_t)flp->l_len;
37930Sstevel@tonic-gate 		(*start)++;
37940Sstevel@tonic-gate 	}
37950Sstevel@tonic-gate 	return (0);
37960Sstevel@tonic-gate }
37970Sstevel@tonic-gate 
37980Sstevel@tonic-gate /*
37990Sstevel@tonic-gate  * Check the validity of lock data.  This can used by the NFS
38000Sstevel@tonic-gate  * frlock routines to check data before contacting the server.  The
38010Sstevel@tonic-gate  * server must support semantics that aren't as restrictive as
38020Sstevel@tonic-gate  * the UNIX API, so the NFS client is required to check.
38030Sstevel@tonic-gate  * The maximum is now passed in by the caller.
38040Sstevel@tonic-gate  */
38050Sstevel@tonic-gate 
38060Sstevel@tonic-gate int
flk_check_lock_data(u_offset_t start,u_offset_t end,offset_t max)38070Sstevel@tonic-gate flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max)
38080Sstevel@tonic-gate {
38090Sstevel@tonic-gate 	/*
38100Sstevel@tonic-gate 	 * The end (length) for local locking should never be greater
38110Sstevel@tonic-gate 	 * than MAXEND. However, the representation for
38120Sstevel@tonic-gate 	 * the entire file is MAX_U_OFFSET_T.
38130Sstevel@tonic-gate 	 */
38140Sstevel@tonic-gate 	if ((start > max) ||
38150Sstevel@tonic-gate 	    ((end > max) && (end != MAX_U_OFFSET_T))) {
38160Sstevel@tonic-gate 		return (EINVAL);
38170Sstevel@tonic-gate 	}
38180Sstevel@tonic-gate 	if (start > end) {
38190Sstevel@tonic-gate 	    return (EINVAL);
38200Sstevel@tonic-gate 	}
38210Sstevel@tonic-gate 	return (0);
38220Sstevel@tonic-gate }
38230Sstevel@tonic-gate 
38240Sstevel@tonic-gate /*
38250Sstevel@tonic-gate  * Fill in request->l_flock with information about the lock blocking the
38260Sstevel@tonic-gate  * request.  The complexity here is that lock manager requests are allowed
38270Sstevel@tonic-gate  * to see into the upper part of the 32-bit address range, whereas local
38280Sstevel@tonic-gate  * requests are only allowed to see signed values.
38290Sstevel@tonic-gate  *
38300Sstevel@tonic-gate  * What should be done when "blocker" is a lock manager lock that uses the
38310Sstevel@tonic-gate  * upper portion of the 32-bit range, but "request" is local?  Since the
38320Sstevel@tonic-gate  * request has already been determined to have been blocked by the blocker,
38330Sstevel@tonic-gate  * at least some portion of "blocker" must be in the range of the request,
38340Sstevel@tonic-gate  * or the request extends to the end of file.  For the first case, the
38350Sstevel@tonic-gate  * portion in the lower range is returned with the indication that it goes
38360Sstevel@tonic-gate  * "to EOF."  For the second case, the last byte of the lower range is
38370Sstevel@tonic-gate  * returned with the indication that it goes "to EOF."
38380Sstevel@tonic-gate  */
38390Sstevel@tonic-gate 
38400Sstevel@tonic-gate static void
report_blocker(lock_descriptor_t * blocker,lock_descriptor_t * request)38410Sstevel@tonic-gate report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request)
38420Sstevel@tonic-gate {
38430Sstevel@tonic-gate 	flock64_t *flrp;			/* l_flock portion of request */
38440Sstevel@tonic-gate 
38450Sstevel@tonic-gate 	ASSERT(blocker != NULL);
38460Sstevel@tonic-gate 
38470Sstevel@tonic-gate 	flrp = &request->l_flock;
38480Sstevel@tonic-gate 	flrp->l_whence = 0;
38490Sstevel@tonic-gate 	flrp->l_type = blocker->l_type;
38500Sstevel@tonic-gate 	flrp->l_pid = blocker->l_flock.l_pid;
38510Sstevel@tonic-gate 	flrp->l_sysid = blocker->l_flock.l_sysid;
38520Sstevel@tonic-gate 
38530Sstevel@tonic-gate 	if (IS_LOCKMGR(request)) {
38540Sstevel@tonic-gate 		flrp->l_start = blocker->l_start;
38550Sstevel@tonic-gate 		if (blocker->l_end == MAX_U_OFFSET_T)
38560Sstevel@tonic-gate 			flrp->l_len = 0;
38570Sstevel@tonic-gate 		else
38580Sstevel@tonic-gate 			flrp->l_len = blocker->l_end - blocker->l_start + 1;
38590Sstevel@tonic-gate 	} else {
38600Sstevel@tonic-gate 		if (blocker->l_start > MAXEND) {
38610Sstevel@tonic-gate 			flrp->l_start = MAXEND;
38620Sstevel@tonic-gate 			flrp->l_len = 0;
38630Sstevel@tonic-gate 		} else {
38640Sstevel@tonic-gate 			flrp->l_start = blocker->l_start;
38650Sstevel@tonic-gate 			if (blocker->l_end == MAX_U_OFFSET_T)
38660Sstevel@tonic-gate 				flrp->l_len = 0;
38670Sstevel@tonic-gate 			else
38680Sstevel@tonic-gate 				flrp->l_len = blocker->l_end -
38690Sstevel@tonic-gate 					blocker->l_start + 1;
38700Sstevel@tonic-gate 		}
38710Sstevel@tonic-gate 	}
38720Sstevel@tonic-gate }
38730Sstevel@tonic-gate /* ONC_PLUS EXTRACT END */
38740Sstevel@tonic-gate 
38750Sstevel@tonic-gate /*
38760Sstevel@tonic-gate  * PSARC case 1997/292
38770Sstevel@tonic-gate  */
38780Sstevel@tonic-gate /*
38790Sstevel@tonic-gate  * This is the public routine exported by flock.h.
38800Sstevel@tonic-gate  */
38810Sstevel@tonic-gate void
cl_flk_change_nlm_state_to_unknown(int nlmid)38820Sstevel@tonic-gate cl_flk_change_nlm_state_to_unknown(int nlmid)
38830Sstevel@tonic-gate {
38840Sstevel@tonic-gate 	/*
38850Sstevel@tonic-gate 	 * Check to see if node is booted as a cluster. If not, return.
38860Sstevel@tonic-gate 	 */
38870Sstevel@tonic-gate 	if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
38880Sstevel@tonic-gate 		return;
38890Sstevel@tonic-gate 	}
38900Sstevel@tonic-gate 
38910Sstevel@tonic-gate 	/*
38920Sstevel@tonic-gate 	 * See comment in cl_flk_set_nlm_status().
38930Sstevel@tonic-gate 	 */
38940Sstevel@tonic-gate 	if (nlm_reg_status == NULL) {
38950Sstevel@tonic-gate 		return;
38960Sstevel@tonic-gate 	}
38970Sstevel@tonic-gate 
38980Sstevel@tonic-gate 	/*
38990Sstevel@tonic-gate 	 * protect NLM registry state with a mutex.
39000Sstevel@tonic-gate 	 */
39010Sstevel@tonic-gate 	ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
39020Sstevel@tonic-gate 	mutex_enter(&nlm_reg_lock);
39030Sstevel@tonic-gate 	FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN);
39040Sstevel@tonic-gate 	mutex_exit(&nlm_reg_lock);
39050Sstevel@tonic-gate }
39060Sstevel@tonic-gate 
39070Sstevel@tonic-gate /*
39080Sstevel@tonic-gate  * Return non-zero if the given I/O request conflicts with an active NBMAND
39090Sstevel@tonic-gate  * lock.
39100Sstevel@tonic-gate  * If svmand is non-zero, it means look at all active locks, not just NBMAND
39110Sstevel@tonic-gate  * locks.
39120Sstevel@tonic-gate  */
39130Sstevel@tonic-gate 
39140Sstevel@tonic-gate int
nbl_lock_conflict(vnode_t * vp,nbl_op_t op,u_offset_t offset,ssize_t length,int svmand,caller_context_t * ct)39150Sstevel@tonic-gate nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset,
3916*5331Samw 		ssize_t length, int svmand, caller_context_t *ct)
39170Sstevel@tonic-gate {
39180Sstevel@tonic-gate 	int conflict = 0;
39190Sstevel@tonic-gate 	graph_t			*gp;
39200Sstevel@tonic-gate 	lock_descriptor_t	*lock;
3921*5331Samw 	pid_t pid;
3922*5331Samw 	int sysid;
3923*5331Samw 
3924*5331Samw 	if (ct == NULL) {
3925*5331Samw 		pid = curproc->p_pid;
3926*5331Samw 		sysid = 0;
3927*5331Samw 	} else {
3928*5331Samw 		pid = ct->cc_pid;
3929*5331Samw 		sysid = ct->cc_sysid;
3930*5331Samw 	}
39310Sstevel@tonic-gate 
39320Sstevel@tonic-gate 	mutex_enter(&flock_lock);
39330Sstevel@tonic-gate 	gp = lock_graph[HASH_INDEX(vp)];
39340Sstevel@tonic-gate 	mutex_exit(&flock_lock);
39350Sstevel@tonic-gate 	if (gp == NULL)
39360Sstevel@tonic-gate 		return (0);
39370Sstevel@tonic-gate 
39380Sstevel@tonic-gate 	mutex_enter(&gp->gp_mutex);
39390Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
39400Sstevel@tonic-gate 
39410Sstevel@tonic-gate 	for (; lock && lock->l_vnode == vp; lock = lock->l_next) {
39420Sstevel@tonic-gate 		if ((svmand || (lock->l_state & NBMAND_LOCK)) &&
3943*5331Samw 		    (lock->l_flock.l_sysid != sysid ||
3944*5331Samw 		    lock->l_flock.l_pid != pid) &&
39450Sstevel@tonic-gate 		    lock_blocks_io(op, offset, length,
39460Sstevel@tonic-gate 				lock->l_type, lock->l_start, lock->l_end)) {
39470Sstevel@tonic-gate 			conflict = 1;
39480Sstevel@tonic-gate 			break;
39490Sstevel@tonic-gate 		}
39500Sstevel@tonic-gate 	}
39510Sstevel@tonic-gate 	mutex_exit(&gp->gp_mutex);
39520Sstevel@tonic-gate 
39530Sstevel@tonic-gate 	return (conflict);
39540Sstevel@tonic-gate }
39550Sstevel@tonic-gate 
39560Sstevel@tonic-gate /*
39570Sstevel@tonic-gate  * Return non-zero if the given I/O request conflicts with the given lock.
39580Sstevel@tonic-gate  */
39590Sstevel@tonic-gate 
39600Sstevel@tonic-gate static int
lock_blocks_io(nbl_op_t op,u_offset_t offset,ssize_t length,int lock_type,u_offset_t lock_start,u_offset_t lock_end)39610Sstevel@tonic-gate lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length,
39620Sstevel@tonic-gate 	    int lock_type, u_offset_t lock_start, u_offset_t lock_end)
39630Sstevel@tonic-gate {
39640Sstevel@tonic-gate 	ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE);
39650Sstevel@tonic-gate 	ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK);
39660Sstevel@tonic-gate 
39670Sstevel@tonic-gate 	if (op == NBL_READ && lock_type == F_RDLCK)
39680Sstevel@tonic-gate 		return (0);
39690Sstevel@tonic-gate 
39700Sstevel@tonic-gate 	if (offset <= lock_start && lock_start < offset + length)
39710Sstevel@tonic-gate 		return (1);
39720Sstevel@tonic-gate 	if (lock_start <= offset && offset <= lock_end)
39730Sstevel@tonic-gate 		return (1);
39740Sstevel@tonic-gate 
39750Sstevel@tonic-gate 	return (0);
39760Sstevel@tonic-gate }
39770Sstevel@tonic-gate 
39780Sstevel@tonic-gate #ifdef DEBUG
39790Sstevel@tonic-gate static void
check_active_locks(graph_t * gp)39800Sstevel@tonic-gate check_active_locks(graph_t *gp)
39810Sstevel@tonic-gate {
39820Sstevel@tonic-gate 	lock_descriptor_t *lock, *lock1;
39830Sstevel@tonic-gate 	edge_t	*ep;
39840Sstevel@tonic-gate 
39850Sstevel@tonic-gate 	for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
39860Sstevel@tonic-gate 						lock = lock->l_next) {
39870Sstevel@tonic-gate 		ASSERT(IS_ACTIVE(lock));
39880Sstevel@tonic-gate 		ASSERT(NOT_BLOCKED(lock));
39890Sstevel@tonic-gate 		ASSERT(!IS_BARRIER(lock));
39900Sstevel@tonic-gate 
39910Sstevel@tonic-gate 		ep = FIRST_IN(lock);
39920Sstevel@tonic-gate 
39930Sstevel@tonic-gate 		while (ep != HEAD(lock)) {
39940Sstevel@tonic-gate 			ASSERT(IS_SLEEPING(ep->from_vertex));
39950Sstevel@tonic-gate 			ASSERT(!NOT_BLOCKED(ep->from_vertex));
39960Sstevel@tonic-gate 			ep = NEXT_IN(ep);
39970Sstevel@tonic-gate 		}
39980Sstevel@tonic-gate 
39990Sstevel@tonic-gate 		for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp);
40000Sstevel@tonic-gate 					lock1 = lock1->l_next) {
40010Sstevel@tonic-gate 			if (lock1->l_vnode == lock->l_vnode) {
40020Sstevel@tonic-gate 			if (BLOCKS(lock1, lock)) {
40030Sstevel@tonic-gate 				cmn_err(CE_PANIC,
40040Sstevel@tonic-gate 				    "active lock %p blocks %p",
40050Sstevel@tonic-gate 				    (void *)lock1, (void *)lock);
40060Sstevel@tonic-gate 			} else if (BLOCKS(lock, lock1)) {
40070Sstevel@tonic-gate 				cmn_err(CE_PANIC,
40080Sstevel@tonic-gate 				    "active lock %p blocks %p",
40090Sstevel@tonic-gate 				    (void *)lock, (void *)lock1);
40100Sstevel@tonic-gate 			}
40110Sstevel@tonic-gate 			}
40120Sstevel@tonic-gate 		}
40130Sstevel@tonic-gate 	}
40140Sstevel@tonic-gate }
40150Sstevel@tonic-gate 
40160Sstevel@tonic-gate /*
40170Sstevel@tonic-gate  * Effect: This functions checks to see if the transition from 'old_state' to
40180Sstevel@tonic-gate  *	'new_state' is a valid one.  It returns 0 if the transition is valid
40190Sstevel@tonic-gate  *	and 1 if it is not.
40200Sstevel@tonic-gate  *	For a map of valid transitions, see sys/flock_impl.h
40210Sstevel@tonic-gate  */
40220Sstevel@tonic-gate static int
check_lock_transition(int old_state,int new_state)40230Sstevel@tonic-gate check_lock_transition(int old_state, int new_state)
40240Sstevel@tonic-gate {
40250Sstevel@tonic-gate 	switch (old_state) {
40260Sstevel@tonic-gate 	case FLK_INITIAL_STATE:
40270Sstevel@tonic-gate 		if ((new_state == FLK_START_STATE) ||
40280Sstevel@tonic-gate 		    (new_state == FLK_SLEEPING_STATE) ||
40290Sstevel@tonic-gate 		    (new_state == FLK_ACTIVE_STATE) ||
40300Sstevel@tonic-gate 		    (new_state == FLK_DEAD_STATE)) {
40310Sstevel@tonic-gate 			return (0);
40320Sstevel@tonic-gate 		} else {
40330Sstevel@tonic-gate 			return (1);
40340Sstevel@tonic-gate 		}
40350Sstevel@tonic-gate 	case FLK_START_STATE:
40360Sstevel@tonic-gate 		if ((new_state == FLK_ACTIVE_STATE) ||
40370Sstevel@tonic-gate 		    (new_state == FLK_DEAD_STATE)) {
40380Sstevel@tonic-gate 			return (0);
40390Sstevel@tonic-gate 		} else {
40400Sstevel@tonic-gate 			return (1);
40410Sstevel@tonic-gate 		}
40420Sstevel@tonic-gate 	case FLK_ACTIVE_STATE:
40430Sstevel@tonic-gate 		if (new_state == FLK_DEAD_STATE) {
40440Sstevel@tonic-gate 			return (0);
40450Sstevel@tonic-gate 		} else {
40460Sstevel@tonic-gate 			return (1);
40470Sstevel@tonic-gate 		}
40480Sstevel@tonic-gate 	case FLK_SLEEPING_STATE:
40490Sstevel@tonic-gate 		if ((new_state == FLK_GRANTED_STATE) ||
40500Sstevel@tonic-gate 		    (new_state == FLK_INTERRUPTED_STATE) ||
40510Sstevel@tonic-gate 		    (new_state == FLK_CANCELLED_STATE)) {
40520Sstevel@tonic-gate 			return (0);
40530Sstevel@tonic-gate 		} else {
40540Sstevel@tonic-gate 			return (1);
40550Sstevel@tonic-gate 		}
40560Sstevel@tonic-gate 	case FLK_GRANTED_STATE:
40570Sstevel@tonic-gate 		if ((new_state == FLK_START_STATE) ||
40580Sstevel@tonic-gate 		    (new_state == FLK_INTERRUPTED_STATE) ||
40590Sstevel@tonic-gate 		    (new_state == FLK_CANCELLED_STATE)) {
40600Sstevel@tonic-gate 			return (0);
40610Sstevel@tonic-gate 		} else {
40620Sstevel@tonic-gate 			return (1);
40630Sstevel@tonic-gate 		}
40640Sstevel@tonic-gate 	case FLK_CANCELLED_STATE:
40650Sstevel@tonic-gate 		if ((new_state == FLK_INTERRUPTED_STATE) ||
40660Sstevel@tonic-gate 		    (new_state == FLK_DEAD_STATE)) {
40670Sstevel@tonic-gate 			return (0);
40680Sstevel@tonic-gate 		} else {
40690Sstevel@tonic-gate 			return (1);
40700Sstevel@tonic-gate 		}
40710Sstevel@tonic-gate 	case FLK_INTERRUPTED_STATE:
40720Sstevel@tonic-gate 		if (new_state == FLK_DEAD_STATE) {
40730Sstevel@tonic-gate 			return (0);
40740Sstevel@tonic-gate 		} else {
40750Sstevel@tonic-gate 			return (1);
40760Sstevel@tonic-gate 		}
40770Sstevel@tonic-gate 	case FLK_DEAD_STATE:
40780Sstevel@tonic-gate 		/* May be set more than once */
40790Sstevel@tonic-gate 		if (new_state == FLK_DEAD_STATE) {
40800Sstevel@tonic-gate 			return (0);
40810Sstevel@tonic-gate 		} else {
40820Sstevel@tonic-gate 			return (1);
40830Sstevel@tonic-gate 		}
40840Sstevel@tonic-gate 	default:
40850Sstevel@tonic-gate 		return (1);
40860Sstevel@tonic-gate 	}
40870Sstevel@tonic-gate }
40880Sstevel@tonic-gate 
40890Sstevel@tonic-gate static void
check_sleeping_locks(graph_t * gp)40900Sstevel@tonic-gate check_sleeping_locks(graph_t *gp)
40910Sstevel@tonic-gate {
40920Sstevel@tonic-gate 	lock_descriptor_t *lock1, *lock2;
40930Sstevel@tonic-gate 	edge_t *ep;
40940Sstevel@tonic-gate 	for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp);
40950Sstevel@tonic-gate 				lock1 = lock1->l_next) {
40960Sstevel@tonic-gate 				ASSERT(!IS_BARRIER(lock1));
40970Sstevel@tonic-gate 	for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp);
40980Sstevel@tonic-gate 				lock2 = lock2->l_next) {
40990Sstevel@tonic-gate 		if (lock1->l_vnode == lock2->l_vnode) {
41000Sstevel@tonic-gate 			if (BLOCKS(lock2, lock1)) {
41010Sstevel@tonic-gate 				ASSERT(!IS_GRANTED(lock1));
41020Sstevel@tonic-gate 				ASSERT(!NOT_BLOCKED(lock1));
41030Sstevel@tonic-gate 				path(lock1, lock2);
41040Sstevel@tonic-gate 			}
41050Sstevel@tonic-gate 		}
41060Sstevel@tonic-gate 	}
41070Sstevel@tonic-gate 
41080Sstevel@tonic-gate 	for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp);
41090Sstevel@tonic-gate 					lock2 = lock2->l_next) {
41100Sstevel@tonic-gate 				ASSERT(!IS_BARRIER(lock1));
41110Sstevel@tonic-gate 		if (lock1->l_vnode == lock2->l_vnode) {
41120Sstevel@tonic-gate 			if (BLOCKS(lock2, lock1)) {
41130Sstevel@tonic-gate 				ASSERT(!IS_GRANTED(lock1));
41140Sstevel@tonic-gate 				ASSERT(!NOT_BLOCKED(lock1));
41150Sstevel@tonic-gate 				path(lock1, lock2);
41160Sstevel@tonic-gate 			}
41170Sstevel@tonic-gate 		}
41180Sstevel@tonic-gate 	}
41190Sstevel@tonic-gate 	ep = FIRST_ADJ(lock1);
41200Sstevel@tonic-gate 	while (ep != HEAD(lock1)) {
41210Sstevel@tonic-gate 		ASSERT(BLOCKS(ep->to_vertex, lock1));
41220Sstevel@tonic-gate 		ep = NEXT_ADJ(ep);
41230Sstevel@tonic-gate 	}
41240Sstevel@tonic-gate 	}
41250Sstevel@tonic-gate }
41260Sstevel@tonic-gate 
41270Sstevel@tonic-gate static int
level_two_path(lock_descriptor_t * lock1,lock_descriptor_t * lock2,int no_path)41280Sstevel@tonic-gate level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path)
41290Sstevel@tonic-gate {
41300Sstevel@tonic-gate 	edge_t	*ep;
41310Sstevel@tonic-gate 	lock_descriptor_t	*vertex;
41320Sstevel@tonic-gate 	lock_descriptor_t *vertex_stack;
41330Sstevel@tonic-gate 
41340Sstevel@tonic-gate 	STACK_INIT(vertex_stack);
41350Sstevel@tonic-gate 
41360Sstevel@tonic-gate 	flk_graph_uncolor(lock1->l_graph);
41370Sstevel@tonic-gate 	ep = FIRST_ADJ(lock1);
41380Sstevel@tonic-gate 	ASSERT(ep != HEAD(lock1));
41390Sstevel@tonic-gate 	while (ep != HEAD(lock1)) {
41400Sstevel@tonic-gate 		if (no_path)
41410Sstevel@tonic-gate 			ASSERT(ep->to_vertex != lock2);
41420Sstevel@tonic-gate 		STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
41430Sstevel@tonic-gate 		COLOR(ep->to_vertex);
41440Sstevel@tonic-gate 		ep = NEXT_ADJ(ep);
41450Sstevel@tonic-gate 	}
41460Sstevel@tonic-gate 
41470Sstevel@tonic-gate 	while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
41480Sstevel@tonic-gate 		STACK_POP(vertex_stack, l_dstack);
41490Sstevel@tonic-gate 		for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
41500Sstevel@tonic-gate 						ep = NEXT_ADJ(ep)) {
41510Sstevel@tonic-gate 			if (COLORED(ep->to_vertex))
41520Sstevel@tonic-gate 				continue;
41530Sstevel@tonic-gate 			COLOR(ep->to_vertex);
41540Sstevel@tonic-gate 			if (ep->to_vertex == lock2)
41550Sstevel@tonic-gate 				return (1);
41560Sstevel@tonic-gate 
41570Sstevel@tonic-gate 			STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
41580Sstevel@tonic-gate 		}
41590Sstevel@tonic-gate 	}
41600Sstevel@tonic-gate 	return (0);
41610Sstevel@tonic-gate }
41620Sstevel@tonic-gate 
41630Sstevel@tonic-gate static void
check_owner_locks(graph_t * gp,pid_t pid,int sysid,vnode_t * vp)41640Sstevel@tonic-gate check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp)
41650Sstevel@tonic-gate {
41660Sstevel@tonic-gate 	lock_descriptor_t *lock;
41670Sstevel@tonic-gate 
41680Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
41690Sstevel@tonic-gate 
41700Sstevel@tonic-gate 	if (lock) {
41710Sstevel@tonic-gate 		while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) {
41720Sstevel@tonic-gate 			if (lock->l_flock.l_pid == pid &&
41730Sstevel@tonic-gate 			    lock->l_flock.l_sysid == sysid)
41740Sstevel@tonic-gate 				cmn_err(CE_PANIC,
41750Sstevel@tonic-gate 				    "owner pid %d's lock %p in active queue",
41760Sstevel@tonic-gate 				    pid, (void *)lock);
41770Sstevel@tonic-gate 			lock = lock->l_next;
41780Sstevel@tonic-gate 		}
41790Sstevel@tonic-gate 	}
41800Sstevel@tonic-gate 	SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
41810Sstevel@tonic-gate 
41820Sstevel@tonic-gate 	if (lock) {
41830Sstevel@tonic-gate 		while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) {
41840Sstevel@tonic-gate 			if (lock->l_flock.l_pid == pid &&
41850Sstevel@tonic-gate 			    lock->l_flock.l_sysid == sysid)
41860Sstevel@tonic-gate 				cmn_err(CE_PANIC,
41870Sstevel@tonic-gate 				    "owner pid %d's lock %p in sleep queue",
41880Sstevel@tonic-gate 				    pid, (void *)lock);
41890Sstevel@tonic-gate 			lock = lock->l_next;
41900Sstevel@tonic-gate 		}
41910Sstevel@tonic-gate 	}
41920Sstevel@tonic-gate }
41930Sstevel@tonic-gate 
41940Sstevel@tonic-gate static int
level_one_path(lock_descriptor_t * lock1,lock_descriptor_t * lock2)41950Sstevel@tonic-gate level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
41960Sstevel@tonic-gate {
41970Sstevel@tonic-gate 	edge_t *ep = FIRST_ADJ(lock1);
41980Sstevel@tonic-gate 
41990Sstevel@tonic-gate 	while (ep != HEAD(lock1)) {
42000Sstevel@tonic-gate 		if (ep->to_vertex == lock2)
42010Sstevel@tonic-gate 			return (1);
42020Sstevel@tonic-gate 		else
42030Sstevel@tonic-gate 			ep = NEXT_ADJ(ep);
42040Sstevel@tonic-gate 	}
42050Sstevel@tonic-gate 	return (0);
42060Sstevel@tonic-gate }
42070Sstevel@tonic-gate 
42080Sstevel@tonic-gate static int
no_path(lock_descriptor_t * lock1,lock_descriptor_t * lock2)42090Sstevel@tonic-gate no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
42100Sstevel@tonic-gate {
42110Sstevel@tonic-gate 	return (!level_two_path(lock1, lock2, 1));
42120Sstevel@tonic-gate }
42130Sstevel@tonic-gate 
42140Sstevel@tonic-gate static void
path(lock_descriptor_t * lock1,lock_descriptor_t * lock2)42150Sstevel@tonic-gate path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
42160Sstevel@tonic-gate {
42170Sstevel@tonic-gate 	if (level_one_path(lock1, lock2)) {
42180Sstevel@tonic-gate 		if (level_two_path(lock1, lock2, 0) != 0) {
42190Sstevel@tonic-gate 			cmn_err(CE_WARN,
42200Sstevel@tonic-gate 			    "one edge one path from lock1 %p lock2 %p",
42210Sstevel@tonic-gate 			    (void *)lock1, (void *)lock2);
42220Sstevel@tonic-gate 		}
42230Sstevel@tonic-gate 	} else if (no_path(lock1, lock2)) {
42240Sstevel@tonic-gate 		cmn_err(CE_PANIC,
42250Sstevel@tonic-gate 		    "No path from  lock1 %p to lock2 %p",
42260Sstevel@tonic-gate 		    (void *)lock1, (void *)lock2);
42270Sstevel@tonic-gate 	}
42280Sstevel@tonic-gate }
42290Sstevel@tonic-gate #endif /* DEBUG */
4230