xref: /onnv-gate/usr/src/cmd/cmd-inet/usr.lib/dsvclockd/container.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright (c) 2000-2001 by Sun Microsystems, Inc.
24*0Sstevel@tonic-gate  * All rights reserved.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include <sys/types.h>
30*0Sstevel@tonic-gate #include <synch.h>
31*0Sstevel@tonic-gate #include <assert.h>
32*0Sstevel@tonic-gate #include <stdlib.h>
33*0Sstevel@tonic-gate #include <string.h>
34*0Sstevel@tonic-gate #include <stdio.h>
35*0Sstevel@tonic-gate #include <fcntl.h>
36*0Sstevel@tonic-gate #include <errno.h>
37*0Sstevel@tonic-gate #include <dhcpmsg.h>
38*0Sstevel@tonic-gate #include <unistd.h>
39*0Sstevel@tonic-gate #include <dhcp_svc_private.h>
40*0Sstevel@tonic-gate 
41*0Sstevel@tonic-gate #include "container.h"
42*0Sstevel@tonic-gate 
43*0Sstevel@tonic-gate /*
44*0Sstevel@tonic-gate  * Container locking code -- warning: serious pain ahead.
45*0Sstevel@tonic-gate  *
46*0Sstevel@tonic-gate  * This code synchronizes access to a given container across multiple
47*0Sstevel@tonic-gate  * threads in this (dsvclockd) process, and optionally synchronizes across
48*0Sstevel@tonic-gate  * multiple instances of dsvclockd running on different hosts.  The
49*0Sstevel@tonic-gate  * synchronization allows multiple readers or a single writer at one time.
50*0Sstevel@tonic-gate  *
51*0Sstevel@tonic-gate  * Since by definition there is at most one dsvclockd running per host and
52*0Sstevel@tonic-gate  * all requests by all threads in all processes running on that host funnel
53*0Sstevel@tonic-gate  * into it, this code effectively synchronizes access to a given container
54*0Sstevel@tonic-gate  * across all threads in all processes running on a given host.  This means
55*0Sstevel@tonic-gate  * that the optional synchronization across multiple instances of dsvclockd
56*0Sstevel@tonic-gate  * on different hosts provides true cross-host synchronization for all
57*0Sstevel@tonic-gate  * threads in all processes on all cooperating machines (though all hosts
58*0Sstevel@tonic-gate  * must have write access to a common directory).
59*0Sstevel@tonic-gate  *
60*0Sstevel@tonic-gate  * The container synchronization here should be viewed as a two step
61*0Sstevel@tonic-gate  * process, where the first step is optional:
62*0Sstevel@tonic-gate  *
63*0Sstevel@tonic-gate  *	1. Synchronize access across the set of cooperating dsvclockd's
64*0Sstevel@tonic-gate  *	   on multiple hosts.  This is known as acquiring the host lock.
65*0Sstevel@tonic-gate  *
66*0Sstevel@tonic-gate  *	2. Synchronize access across the set of threads running inside
67*0Sstevel@tonic-gate  *	   this dsvclockd process.  This is known as acquiring the
68*0Sstevel@tonic-gate  *	   intra-process lock.
69*0Sstevel@tonic-gate  *
70*0Sstevel@tonic-gate  * In order to implement the first (host lock) step, we use fcntl()-based
71*0Sstevel@tonic-gate  * file locking on a file inside an NFS-shared directory and rely on NFS to
72*0Sstevel@tonic-gate  * do our synchronization for us.  Note that this can only be used to
73*0Sstevel@tonic-gate  * implement the first step since fcntl()-based locks are process locks,
74*0Sstevel@tonic-gate  * and the effects of using these locks with multiple threads are not
75*0Sstevel@tonic-gate  * defined.  Furthermore, note that this means it requires some fancy
76*0Sstevel@tonic-gate  * footwork to ensure that only one thread in a given dsvclockd process
77*0Sstevel@tonic-gate  * tries to acquire the fcntl() lock for that process.
78*0Sstevel@tonic-gate  *
79*0Sstevel@tonic-gate  * In order to implement the second step, we use custom-made reader-writer
80*0Sstevel@tonic-gate  * locks since the stock Solaris ones don't quite have the semantics we
81*0Sstevel@tonic-gate  * need -- in particular, we need to relax the requirement that the thread
82*0Sstevel@tonic-gate  * which acquired the lock is the one releasing it.
83*0Sstevel@tonic-gate  *
84*0Sstevel@tonic-gate  * Lock ordering guidelines:
85*0Sstevel@tonic-gate  *
86*0Sstevel@tonic-gate  * For the most part, this code does not acquire more than one container
87*0Sstevel@tonic-gate  * lock at a time -- whenever feasible, please do the same.  If you must
88*0Sstevel@tonic-gate  * acquire more than one lock at a time, the correct order is:
89*0Sstevel@tonic-gate  *
90*0Sstevel@tonic-gate  *	1. cn_nholds_lock
91*0Sstevel@tonic-gate  *	2. cn_lock
92*0Sstevel@tonic-gate  *	3. cn_hlock_lock
93*0Sstevel@tonic-gate  */
94*0Sstevel@tonic-gate 
95*0Sstevel@tonic-gate static int host_lock(dsvcd_container_t *, int, boolean_t);
96*0Sstevel@tonic-gate static int host_unlock(dsvcd_container_t *);
97*0Sstevel@tonic-gate static unsigned int cn_nlocks(dsvcd_container_t *);
98*0Sstevel@tonic-gate 
99*0Sstevel@tonic-gate /*
100*0Sstevel@tonic-gate  * Create a container identified by `cn_id'; returns an instance of the new
101*0Sstevel@tonic-gate  * container upon success, or NULL on failure.  Note that `cn_id' is
102*0Sstevel@tonic-gate  * treated as a pathname and thus must be a unique name for the container
103*0Sstevel@tonic-gate  * across all containers, container versions, and datastores -- additionally,
104*0Sstevel@tonic-gate  * if `crosshost' is set, then the directory named by `cn_id' must be a
105*0Sstevel@tonic-gate  * directory mounted on all cooperating hosts.
106*0Sstevel@tonic-gate  */
107*0Sstevel@tonic-gate dsvcd_container_t *
cn_create(const char * cn_id,boolean_t crosshost)108*0Sstevel@tonic-gate cn_create(const char *cn_id, boolean_t crosshost)
109*0Sstevel@tonic-gate {
110*0Sstevel@tonic-gate 	dsvcd_container_t *cn;
111*0Sstevel@tonic-gate 
112*0Sstevel@tonic-gate 	dhcpmsg(MSG_VERBOSE, "creating %scontainer synchpoint `%s'", crosshost ?
113*0Sstevel@tonic-gate 	    "crosshost " : "", cn_id);
114*0Sstevel@tonic-gate 
115*0Sstevel@tonic-gate 	cn = calloc(1, sizeof (dsvcd_container_t));
116*0Sstevel@tonic-gate 	if (cn == NULL)
117*0Sstevel@tonic-gate 		return (NULL);
118*0Sstevel@tonic-gate 
119*0Sstevel@tonic-gate 	cn->cn_id = strdup(cn_id);
120*0Sstevel@tonic-gate 	if (cn->cn_id == NULL) {
121*0Sstevel@tonic-gate 		free(cn);
122*0Sstevel@tonic-gate 		return (NULL);
123*0Sstevel@tonic-gate 	}
124*0Sstevel@tonic-gate 
125*0Sstevel@tonic-gate 	(void) mutex_init(&cn->cn_lock, USYNC_THREAD, NULL);
126*0Sstevel@tonic-gate 	(void) mutex_init(&cn->cn_hlock_lock, USYNC_THREAD, NULL);
127*0Sstevel@tonic-gate 	(void) mutex_init(&cn->cn_nholds_lock, USYNC_THREAD, NULL);
128*0Sstevel@tonic-gate 
129*0Sstevel@tonic-gate 	(void) cond_init(&cn->cn_hlockcv, USYNC_THREAD, NULL);
130*0Sstevel@tonic-gate 
131*0Sstevel@tonic-gate 	cn->cn_whead	  = NULL;
132*0Sstevel@tonic-gate 	cn->cn_wtail	  = NULL;
133*0Sstevel@tonic-gate 	cn->cn_nholds	  = 0;
134*0Sstevel@tonic-gate 	cn->cn_closing	  = B_FALSE;
135*0Sstevel@tonic-gate 	cn->cn_crosshost  = crosshost;
136*0Sstevel@tonic-gate 	cn->cn_hlockstate = CN_HUNLOCKED;
137*0Sstevel@tonic-gate 	cn->cn_hlockcount = 0;
138*0Sstevel@tonic-gate 
139*0Sstevel@tonic-gate 	return (cn);
140*0Sstevel@tonic-gate }
141*0Sstevel@tonic-gate 
142*0Sstevel@tonic-gate /*
143*0Sstevel@tonic-gate  * Destroy container `cn'; wait a decent amount of time for activity on the
144*0Sstevel@tonic-gate  * container to quiesce first.  If the caller has not prohibited other
145*0Sstevel@tonic-gate  * threads from calling into the container yet, this may take a long time.
146*0Sstevel@tonic-gate  */
147*0Sstevel@tonic-gate void
cn_destroy(dsvcd_container_t * cn)148*0Sstevel@tonic-gate cn_destroy(dsvcd_container_t *cn)
149*0Sstevel@tonic-gate {
150*0Sstevel@tonic-gate 	unsigned int	attempts;
151*0Sstevel@tonic-gate 	unsigned int	nstalelocks;
152*0Sstevel@tonic-gate 
153*0Sstevel@tonic-gate 	dhcpmsg(MSG_VERBOSE, "destroying container synchpoint `%s'", cn->cn_id);
154*0Sstevel@tonic-gate 
155*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_lock);
156*0Sstevel@tonic-gate 	cn->cn_closing = B_TRUE;
157*0Sstevel@tonic-gate 	(void) mutex_unlock(&cn->cn_lock);
158*0Sstevel@tonic-gate 
159*0Sstevel@tonic-gate 	/*
160*0Sstevel@tonic-gate 	 * Wait for up to CN_DESTROY_WAIT seconds for all the lock holders
161*0Sstevel@tonic-gate 	 * to relinquish their locks.  If the container has locks that seem
162*0Sstevel@tonic-gate 	 * to be stale, then warn the user before destroying it.  The locks
163*0Sstevel@tonic-gate 	 * will be unlocked automatically when we exit.
164*0Sstevel@tonic-gate 	 */
165*0Sstevel@tonic-gate 	for (attempts = 0; attempts < CN_DESTROY_WAIT; attempts++) {
166*0Sstevel@tonic-gate 		nstalelocks = cn_nlocks(cn);
167*0Sstevel@tonic-gate 		if (nstalelocks == 0)
168*0Sstevel@tonic-gate 			break;
169*0Sstevel@tonic-gate 
170*0Sstevel@tonic-gate 		(void) sleep(1);
171*0Sstevel@tonic-gate 	}
172*0Sstevel@tonic-gate 
173*0Sstevel@tonic-gate 	if (nstalelocks == 1) {
174*0Sstevel@tonic-gate 		dhcpmsg(MSG_WARNING, "unlocking stale lock on "
175*0Sstevel@tonic-gate 		    "container `%s'", cn->cn_id);
176*0Sstevel@tonic-gate 	} else if (nstalelocks != 0) {
177*0Sstevel@tonic-gate 		dhcpmsg(MSG_WARNING, "unlocking %d stale locks on "
178*0Sstevel@tonic-gate 		    "container `%s'", nstalelocks, cn->cn_id);
179*0Sstevel@tonic-gate 	}
180*0Sstevel@tonic-gate 
181*0Sstevel@tonic-gate 	(void) cond_destroy(&cn->cn_hlockcv);
182*0Sstevel@tonic-gate 	(void) mutex_destroy(&cn->cn_nholds_lock);
183*0Sstevel@tonic-gate 	(void) mutex_destroy(&cn->cn_hlock_lock);
184*0Sstevel@tonic-gate 	(void) mutex_destroy(&cn->cn_lock);
185*0Sstevel@tonic-gate 
186*0Sstevel@tonic-gate 	free(cn->cn_id);
187*0Sstevel@tonic-gate 	free(cn);
188*0Sstevel@tonic-gate }
189*0Sstevel@tonic-gate 
190*0Sstevel@tonic-gate /*
191*0Sstevel@tonic-gate  * Wait (block) until a lock of type `locktype' is obtained on container
192*0Sstevel@tonic-gate  * `cn'.  Returns a DSVC_* return code; if DSVC_SUCCESS is returned, then
193*0Sstevel@tonic-gate  * the lock is held upon return.  Must be called with the container's
194*0Sstevel@tonic-gate  * cn_nholds_lock held on entry; returns with it unlocked.
195*0Sstevel@tonic-gate  */
196*0Sstevel@tonic-gate static int
cn_wait_for_lock(dsvcd_container_t * cn,dsvcd_locktype_t locktype)197*0Sstevel@tonic-gate cn_wait_for_lock(dsvcd_container_t *cn, dsvcd_locktype_t locktype)
198*0Sstevel@tonic-gate {
199*0Sstevel@tonic-gate 	dsvcd_waitlist_t	waititem;
200*0Sstevel@tonic-gate 	int			retval = DSVC_SUCCESS;
201*0Sstevel@tonic-gate 
202*0Sstevel@tonic-gate 	assert(MUTEX_HELD(&cn->cn_nholds_lock));
203*0Sstevel@tonic-gate 	assert(cn->cn_nholds != 0);
204*0Sstevel@tonic-gate 
205*0Sstevel@tonic-gate 	waititem.wl_next = NULL;
206*0Sstevel@tonic-gate 	waititem.wl_prev = NULL;
207*0Sstevel@tonic-gate 	waititem.wl_locktype = locktype;
208*0Sstevel@tonic-gate 	(void) cond_init(&waititem.wl_cv, USYNC_THREAD, NULL);
209*0Sstevel@tonic-gate 
210*0Sstevel@tonic-gate 	/*
211*0Sstevel@tonic-gate 	 * Chain our stack-local waititem onto the list; this keeps us from
212*0Sstevel@tonic-gate 	 * having to worry about allocation failures and also makes it easy
213*0Sstevel@tonic-gate 	 * for cn_unlock() to just pull us off the list without worrying
214*0Sstevel@tonic-gate 	 * about freeing the memory.
215*0Sstevel@tonic-gate 	 *
216*0Sstevel@tonic-gate 	 * Note that we can do this because by definition we are blocked in
217*0Sstevel@tonic-gate 	 * this function until we are signalled.
218*0Sstevel@tonic-gate 	 */
219*0Sstevel@tonic-gate 	if (cn->cn_whead != NULL) {
220*0Sstevel@tonic-gate 		waititem.wl_prev = cn->cn_wtail;
221*0Sstevel@tonic-gate 		cn->cn_wtail->wl_next = &waititem;
222*0Sstevel@tonic-gate 		cn->cn_wtail = &waititem;
223*0Sstevel@tonic-gate 	} else {
224*0Sstevel@tonic-gate 		cn->cn_whead = &waititem;
225*0Sstevel@tonic-gate 		cn->cn_wtail = &waititem;
226*0Sstevel@tonic-gate 	}
227*0Sstevel@tonic-gate 
228*0Sstevel@tonic-gate 	do {
229*0Sstevel@tonic-gate 		if (cond_wait(&waititem.wl_cv, &cn->cn_nholds_lock) != 0) {
230*0Sstevel@tonic-gate 			dhcpmsg(MSG_DEBUG, "cn_wait_for_lock: cond_wait error");
231*0Sstevel@tonic-gate 			retval = DSVC_INTERNAL;
232*0Sstevel@tonic-gate 			break;
233*0Sstevel@tonic-gate 		}
234*0Sstevel@tonic-gate 	} while ((locktype == DSVCD_RDLOCK && cn->cn_nholds == -1) ||
235*0Sstevel@tonic-gate 	    (locktype == DSVCD_WRLOCK && cn->cn_nholds != 0));
236*0Sstevel@tonic-gate 
237*0Sstevel@tonic-gate 	(void) cond_destroy(&waititem.wl_cv);
238*0Sstevel@tonic-gate 
239*0Sstevel@tonic-gate 	assert(MUTEX_HELD(&cn->cn_nholds_lock));
240*0Sstevel@tonic-gate 
241*0Sstevel@tonic-gate 	/*
242*0Sstevel@tonic-gate 	 * We got woken up; pull ourselves off of the local waitlist.
243*0Sstevel@tonic-gate 	 */
244*0Sstevel@tonic-gate 	if (waititem.wl_prev != NULL)
245*0Sstevel@tonic-gate 		waititem.wl_prev->wl_next = waititem.wl_next;
246*0Sstevel@tonic-gate 	else
247*0Sstevel@tonic-gate 		cn->cn_whead = waititem.wl_next;
248*0Sstevel@tonic-gate 
249*0Sstevel@tonic-gate 	if (waititem.wl_next != NULL)
250*0Sstevel@tonic-gate 		waititem.wl_next->wl_prev = waititem.wl_prev;
251*0Sstevel@tonic-gate 	else
252*0Sstevel@tonic-gate 		cn->cn_wtail = waititem.wl_prev;
253*0Sstevel@tonic-gate 
254*0Sstevel@tonic-gate 	if (retval == DSVC_SUCCESS) {
255*0Sstevel@tonic-gate 		if (locktype == DSVCD_WRLOCK)
256*0Sstevel@tonic-gate 			cn->cn_nholds = -1;
257*0Sstevel@tonic-gate 		else
258*0Sstevel@tonic-gate 			cn->cn_nholds++;
259*0Sstevel@tonic-gate 	}
260*0Sstevel@tonic-gate 
261*0Sstevel@tonic-gate 	/*
262*0Sstevel@tonic-gate 	 * If we just acquired a read lock and the next waiter is waiting
263*0Sstevel@tonic-gate 	 * for a readlock too, signal the waiter.  Note that we wake each
264*0Sstevel@tonic-gate 	 * reader up one-by-one like this to avoid excessive contention on
265*0Sstevel@tonic-gate 	 * cn_nholds_lock.
266*0Sstevel@tonic-gate 	 */
267*0Sstevel@tonic-gate 	if (locktype == DSVCD_RDLOCK && cn->cn_whead != NULL &&
268*0Sstevel@tonic-gate 	    cn->cn_whead->wl_locktype == DSVCD_RDLOCK)
269*0Sstevel@tonic-gate 		(void) cond_signal(&cn->cn_whead->wl_cv);
270*0Sstevel@tonic-gate 
271*0Sstevel@tonic-gate 	(void) mutex_unlock(&cn->cn_nholds_lock);
272*0Sstevel@tonic-gate 	return (retval);
273*0Sstevel@tonic-gate }
274*0Sstevel@tonic-gate 
275*0Sstevel@tonic-gate /*
276*0Sstevel@tonic-gate  * Lock container `cn' for reader (shared) access.  If the container cannot
277*0Sstevel@tonic-gate  * be locked immediately (there is currently a writer lock held or a writer
278*0Sstevel@tonic-gate  * lock waiting for the lock), then if `nonblock' is B_TRUE, DSVC_BUSY is
279*0Sstevel@tonic-gate  * returned.  Otherwise, block until the lock can be obtained.  Returns a
280*0Sstevel@tonic-gate  * DSVC_* code.
281*0Sstevel@tonic-gate  */
282*0Sstevel@tonic-gate int
cn_rdlock(dsvcd_container_t * cn,boolean_t nonblock)283*0Sstevel@tonic-gate cn_rdlock(dsvcd_container_t *cn, boolean_t nonblock)
284*0Sstevel@tonic-gate {
285*0Sstevel@tonic-gate 	int	retval;
286*0Sstevel@tonic-gate 
287*0Sstevel@tonic-gate 	/*
288*0Sstevel@tonic-gate 	 * The container is going away; no new lock requests.
289*0Sstevel@tonic-gate 	 */
290*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_lock);
291*0Sstevel@tonic-gate 	if (cn->cn_closing) {
292*0Sstevel@tonic-gate 		(void) mutex_unlock(&cn->cn_lock);
293*0Sstevel@tonic-gate 		return (DSVC_SYNCH_ERR);
294*0Sstevel@tonic-gate 	}
295*0Sstevel@tonic-gate 	(void) mutex_unlock(&cn->cn_lock);
296*0Sstevel@tonic-gate 
297*0Sstevel@tonic-gate 	/*
298*0Sstevel@tonic-gate 	 * See if we can grab the lock without having to block; only
299*0Sstevel@tonic-gate 	 * possible if we can acquire the host lock without blocking, if
300*0Sstevel@tonic-gate 	 * the lock is not currently owned by a writer and if there are no
301*0Sstevel@tonic-gate 	 * writers currently enqueued for accessing this lock (we know that
302*0Sstevel@tonic-gate 	 * if there's a waiter it must be a writer since this code doesn't
303*0Sstevel@tonic-gate 	 * enqueue readers until there's a writer enqueued).  We enqueue
304*0Sstevel@tonic-gate 	 * these requests to improve fairness.
305*0Sstevel@tonic-gate 	 */
306*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_nholds_lock);
307*0Sstevel@tonic-gate 
308*0Sstevel@tonic-gate 	if (cn->cn_nholds != -1 && cn->cn_whead == NULL &&
309*0Sstevel@tonic-gate 	    host_lock(cn, F_RDLCK, B_TRUE) == DSVC_SUCCESS) {
310*0Sstevel@tonic-gate 		cn->cn_nholds++;
311*0Sstevel@tonic-gate 		(void) mutex_unlock(&cn->cn_nholds_lock);
312*0Sstevel@tonic-gate 		return (DSVC_SUCCESS);
313*0Sstevel@tonic-gate 	}
314*0Sstevel@tonic-gate 
315*0Sstevel@tonic-gate 	(void) mutex_unlock(&cn->cn_nholds_lock);
316*0Sstevel@tonic-gate 
317*0Sstevel@tonic-gate 	/*
318*0Sstevel@tonic-gate 	 * Cannot grab the lock without blocking somewhere; wait until we
319*0Sstevel@tonic-gate 	 * can grab the host lock, then with that lock held obtain our
320*0Sstevel@tonic-gate 	 * intra-process lock.
321*0Sstevel@tonic-gate 	 */
322*0Sstevel@tonic-gate 	if (nonblock)
323*0Sstevel@tonic-gate 		return (DSVC_BUSY);
324*0Sstevel@tonic-gate 	retval = host_lock(cn, F_RDLCK, B_FALSE);
325*0Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
326*0Sstevel@tonic-gate 		return (retval);
327*0Sstevel@tonic-gate 
328*0Sstevel@tonic-gate 	/*
329*0Sstevel@tonic-gate 	 * We've got the read lock; if there aren't any writers currently
330*0Sstevel@tonic-gate 	 * contending for our intra-process lock then succeed immediately.
331*0Sstevel@tonic-gate 	 * It's possible for there to be waiters but for nholds to be zero
332*0Sstevel@tonic-gate 	 * via the following scenario:
333*0Sstevel@tonic-gate 	 *
334*0Sstevel@tonic-gate 	 *	1. The last holder of a lock unlocks, dropping nholds to
335*0Sstevel@tonic-gate 	 *	   zero and signaling the head waiter on the waitlist.
336*0Sstevel@tonic-gate 	 *
337*0Sstevel@tonic-gate 	 *	2. The last holder drops cn_nholds_lock.
338*0Sstevel@tonic-gate 	 *
339*0Sstevel@tonic-gate 	 *	3. We acquire cn_nholds_lock before the signaled waiter
340*0Sstevel@tonic-gate 	 *	   does.
341*0Sstevel@tonic-gate 	 *
342*0Sstevel@tonic-gate 	 * Note that this case won't cause a deadlock even if we didn't
343*0Sstevel@tonic-gate 	 * check for it here (when the waiter finally gets cn_nholds_lock,
344*0Sstevel@tonic-gate 	 * it'll find that the waitlist is once again non-NULL, and signal
345*0Sstevel@tonic-gate 	 * the us).  However, as an optimization, handle the case here.
346*0Sstevel@tonic-gate 	 */
347*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_nholds_lock);
348*0Sstevel@tonic-gate 	if (cn->cn_nholds != -1 &&
349*0Sstevel@tonic-gate 	    (cn->cn_whead == NULL || cn->cn_nholds == 0)) {
350*0Sstevel@tonic-gate 		cn->cn_nholds++;
351*0Sstevel@tonic-gate 		(void) mutex_unlock(&cn->cn_nholds_lock);
352*0Sstevel@tonic-gate 		return (DSVC_SUCCESS);
353*0Sstevel@tonic-gate 	}
354*0Sstevel@tonic-gate 
355*0Sstevel@tonic-gate 	/* cn_wait_for_lock() will drop cn_nholds_lock */
356*0Sstevel@tonic-gate 	retval = cn_wait_for_lock(cn, DSVCD_RDLOCK);
357*0Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS) {
358*0Sstevel@tonic-gate 		(void) host_unlock(cn);
359*0Sstevel@tonic-gate 		return (retval);
360*0Sstevel@tonic-gate 	}
361*0Sstevel@tonic-gate 	return (DSVC_SUCCESS);
362*0Sstevel@tonic-gate }
363*0Sstevel@tonic-gate 
364*0Sstevel@tonic-gate /*
365*0Sstevel@tonic-gate  * Lock container `cn' for writer (exclusive) access.  If the container
366*0Sstevel@tonic-gate  * cannot be locked immediately (there are currently readers or a writer),
367*0Sstevel@tonic-gate  * then if `nonblock' is B_TRUE, DSVC_BUSY is returned.  Otherwise, block
368*0Sstevel@tonic-gate  * until the lock can be obtained.  Returns a DSVC_* code.
369*0Sstevel@tonic-gate  */
370*0Sstevel@tonic-gate int
cn_wrlock(dsvcd_container_t * cn,boolean_t nonblock)371*0Sstevel@tonic-gate cn_wrlock(dsvcd_container_t *cn, boolean_t nonblock)
372*0Sstevel@tonic-gate {
373*0Sstevel@tonic-gate 	int	retval;
374*0Sstevel@tonic-gate 
375*0Sstevel@tonic-gate 	/*
376*0Sstevel@tonic-gate 	 * The container is going away; no new lock requests.
377*0Sstevel@tonic-gate 	 */
378*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_lock);
379*0Sstevel@tonic-gate 	if (cn->cn_closing) {
380*0Sstevel@tonic-gate 		(void) mutex_unlock(&cn->cn_lock);
381*0Sstevel@tonic-gate 		return (DSVC_SYNCH_ERR);
382*0Sstevel@tonic-gate 	}
383*0Sstevel@tonic-gate 	(void) mutex_unlock(&cn->cn_lock);
384*0Sstevel@tonic-gate 
385*0Sstevel@tonic-gate 	/*
386*0Sstevel@tonic-gate 	 * See if we can grab the lock without having to block; only
387*0Sstevel@tonic-gate 	 * possible if there are no current writers within our process and
388*0Sstevel@tonic-gate 	 * that we can immediately acquire the host lock.
389*0Sstevel@tonic-gate 	 */
390*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_nholds_lock);
391*0Sstevel@tonic-gate 
392*0Sstevel@tonic-gate 	if (cn->cn_nholds == 0 &&
393*0Sstevel@tonic-gate 	    host_lock(cn, F_WRLCK, B_TRUE) == DSVC_SUCCESS) {
394*0Sstevel@tonic-gate 		cn->cn_nholds = -1;
395*0Sstevel@tonic-gate 		(void) mutex_unlock(&cn->cn_nholds_lock);
396*0Sstevel@tonic-gate 		return (DSVC_SUCCESS);
397*0Sstevel@tonic-gate 	}
398*0Sstevel@tonic-gate 
399*0Sstevel@tonic-gate 	(void) mutex_unlock(&cn->cn_nholds_lock);
400*0Sstevel@tonic-gate 
401*0Sstevel@tonic-gate 	/*
402*0Sstevel@tonic-gate 	 * Cannot grab the lock without blocking somewhere; wait until we
403*0Sstevel@tonic-gate 	 * can grab the host lock, then with that lock held obtain our
404*0Sstevel@tonic-gate 	 * intra-process lock.
405*0Sstevel@tonic-gate 	 */
406*0Sstevel@tonic-gate 	if (nonblock)
407*0Sstevel@tonic-gate 		return (DSVC_BUSY);
408*0Sstevel@tonic-gate 	retval = host_lock(cn, F_WRLCK, B_FALSE);
409*0Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
410*0Sstevel@tonic-gate 		return (retval);
411*0Sstevel@tonic-gate 
412*0Sstevel@tonic-gate 	/*
413*0Sstevel@tonic-gate 	 * We've got the host lock; if there aren't any writers currently
414*0Sstevel@tonic-gate 	 * contending for our intra-process lock then succeed immediately.
415*0Sstevel@tonic-gate 	 */
416*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_nholds_lock);
417*0Sstevel@tonic-gate 	if (cn->cn_nholds == 0) {
418*0Sstevel@tonic-gate 		cn->cn_nholds = -1;
419*0Sstevel@tonic-gate 		(void) mutex_unlock(&cn->cn_nholds_lock);
420*0Sstevel@tonic-gate 		return (DSVC_SUCCESS);
421*0Sstevel@tonic-gate 	}
422*0Sstevel@tonic-gate 
423*0Sstevel@tonic-gate 	/* cn_wait_for_lock() will drop cn_nholds_lock */
424*0Sstevel@tonic-gate 	retval = cn_wait_for_lock(cn, DSVCD_WRLOCK);
425*0Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS) {
426*0Sstevel@tonic-gate 		(void) host_unlock(cn);
427*0Sstevel@tonic-gate 		return (retval);
428*0Sstevel@tonic-gate 	}
429*0Sstevel@tonic-gate 	return (DSVC_SUCCESS);
430*0Sstevel@tonic-gate }
431*0Sstevel@tonic-gate 
432*0Sstevel@tonic-gate /*
433*0Sstevel@tonic-gate  * Unlock reader or writer lock on container `cn'; returns a DSVC_* code
434*0Sstevel@tonic-gate  */
435*0Sstevel@tonic-gate int
cn_unlock(dsvcd_container_t * cn)436*0Sstevel@tonic-gate cn_unlock(dsvcd_container_t *cn)
437*0Sstevel@tonic-gate {
438*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_nholds_lock);
439*0Sstevel@tonic-gate 
440*0Sstevel@tonic-gate 	if (cn->cn_nholds == 0) {
441*0Sstevel@tonic-gate 		(void) mutex_unlock(&cn->cn_nholds_lock);
442*0Sstevel@tonic-gate 		return (DSVC_SYNCH_ERR);
443*0Sstevel@tonic-gate 	}
444*0Sstevel@tonic-gate 
445*0Sstevel@tonic-gate 	if (cn->cn_nholds != -1 && cn->cn_nholds != 1) {
446*0Sstevel@tonic-gate 		cn->cn_nholds--;
447*0Sstevel@tonic-gate 		(void) host_unlock(cn);
448*0Sstevel@tonic-gate 		(void) mutex_unlock(&cn->cn_nholds_lock);
449*0Sstevel@tonic-gate 		return (DSVC_SUCCESS);
450*0Sstevel@tonic-gate 	}
451*0Sstevel@tonic-gate 
452*0Sstevel@tonic-gate 	/*
453*0Sstevel@tonic-gate 	 * The last reader or a writer just unlocked -- signal the first
454*0Sstevel@tonic-gate 	 * waiter.  To avoid a thundering herd, we only signal the first
455*0Sstevel@tonic-gate 	 * waiter, even if there are multiple readers ready to go --
456*0Sstevel@tonic-gate 	 * instead, each reader is responsible for signaling the next
457*0Sstevel@tonic-gate 	 * in cn_wait_for_lock().
458*0Sstevel@tonic-gate 	 */
459*0Sstevel@tonic-gate 	cn->cn_nholds = 0;
460*0Sstevel@tonic-gate 	if (cn->cn_whead != NULL)
461*0Sstevel@tonic-gate 		(void) cond_signal(&cn->cn_whead->wl_cv);
462*0Sstevel@tonic-gate 
463*0Sstevel@tonic-gate 	(void) host_unlock(cn);
464*0Sstevel@tonic-gate 	(void) mutex_unlock(&cn->cn_nholds_lock);
465*0Sstevel@tonic-gate 
466*0Sstevel@tonic-gate 	return (DSVC_SUCCESS);
467*0Sstevel@tonic-gate }
468*0Sstevel@tonic-gate 
469*0Sstevel@tonic-gate /*
470*0Sstevel@tonic-gate  * Find out what kind of lock is on `cn'.  Note that this is just a
471*0Sstevel@tonic-gate  * snapshot in time and without additional locks the answer may be invalid
472*0Sstevel@tonic-gate  * by the time the function returns.
473*0Sstevel@tonic-gate  */
474*0Sstevel@tonic-gate dsvcd_locktype_t
cn_locktype(dsvcd_container_t * cn)475*0Sstevel@tonic-gate cn_locktype(dsvcd_container_t *cn)
476*0Sstevel@tonic-gate {
477*0Sstevel@tonic-gate 	int nholds;
478*0Sstevel@tonic-gate 
479*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_nholds_lock);
480*0Sstevel@tonic-gate 	nholds = cn->cn_nholds;
481*0Sstevel@tonic-gate 	(void) mutex_unlock(&cn->cn_nholds_lock);
482*0Sstevel@tonic-gate 
483*0Sstevel@tonic-gate 	if (nholds == 0)
484*0Sstevel@tonic-gate 		return (DSVCD_NOLOCK);
485*0Sstevel@tonic-gate 	else if (nholds > 0)
486*0Sstevel@tonic-gate 		return (DSVCD_RDLOCK);
487*0Sstevel@tonic-gate 	else
488*0Sstevel@tonic-gate 		return (DSVCD_WRLOCK);
489*0Sstevel@tonic-gate }
490*0Sstevel@tonic-gate 
491*0Sstevel@tonic-gate /*
492*0Sstevel@tonic-gate  * Obtain a lock of type `locktype' on container `cn' such that we have
493*0Sstevel@tonic-gate  * shared or exclusive access to this container across all hosts.  If
494*0Sstevel@tonic-gate  * `nonblock' is true and the lock cannot be obtained return DSVC_BUSY.  If
495*0Sstevel@tonic-gate  * the lock is already held, the number of instances of the lock "checked
496*0Sstevel@tonic-gate  * out" by this host is incremented.
497*0Sstevel@tonic-gate  */
498*0Sstevel@tonic-gate static int
host_lock(dsvcd_container_t * cn,int locktype,boolean_t nonblock)499*0Sstevel@tonic-gate host_lock(dsvcd_container_t *cn, int locktype, boolean_t nonblock)
500*0Sstevel@tonic-gate {
501*0Sstevel@tonic-gate 	struct flock	flock;
502*0Sstevel@tonic-gate 	int		fd;
503*0Sstevel@tonic-gate 	char		*basename, lockpath[MAXPATHLEN];
504*0Sstevel@tonic-gate 	int		error;
505*0Sstevel@tonic-gate 
506*0Sstevel@tonic-gate 	if (!cn->cn_crosshost)
507*0Sstevel@tonic-gate 		return (DSVC_SUCCESS);
508*0Sstevel@tonic-gate 
509*0Sstevel@tonic-gate 	/*
510*0Sstevel@tonic-gate 	 * Before we wait for a while, see if the container is going away;
511*0Sstevel@tonic-gate 	 * if so, fail now so the container can drain quicker..
512*0Sstevel@tonic-gate 	 */
513*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_lock);
514*0Sstevel@tonic-gate 	if (cn->cn_closing) {
515*0Sstevel@tonic-gate 		(void) mutex_unlock(&cn->cn_lock);
516*0Sstevel@tonic-gate 		return (DSVC_SYNCH_ERR);
517*0Sstevel@tonic-gate 	}
518*0Sstevel@tonic-gate 	(void) mutex_unlock(&cn->cn_lock);
519*0Sstevel@tonic-gate 
520*0Sstevel@tonic-gate 	/*
521*0Sstevel@tonic-gate 	 * Note that we only wait if (1) there's already a thread trying to
522*0Sstevel@tonic-gate 	 * grab the host lock on our host or if (2) this host currently
523*0Sstevel@tonic-gate 	 * holds a host shared lock and we need an exclusive lock.  Note
524*0Sstevel@tonic-gate 	 * that we do *not* wait in the following situations:
525*0Sstevel@tonic-gate 	 *
526*0Sstevel@tonic-gate 	 *	* This host holds an exclusive host lock and another
527*0Sstevel@tonic-gate 	 *	  exclusive host lock request comes in.  We rely on the
528*0Sstevel@tonic-gate 	 *	  intra-process lock to do the synchronization.
529*0Sstevel@tonic-gate 	 *
530*0Sstevel@tonic-gate 	 *	* This host holds an exclusive host lock and a shared host
531*0Sstevel@tonic-gate 	 *	  lock request comes in.  Since this host already has
532*0Sstevel@tonic-gate 	 *	  exclusive access, we already implicitly hold the shared
533*0Sstevel@tonic-gate 	 *	  host lock as far as this host is concerned, so just rely
534*0Sstevel@tonic-gate 	 *	  on the intra-process lock to do the synchronization.
535*0Sstevel@tonic-gate 	 *
536*0Sstevel@tonic-gate 	 * These semantics make sense as long as one remembers that the
537*0Sstevel@tonic-gate 	 * host lock merely provides exclusive or shared access for a given
538*0Sstevel@tonic-gate 	 * host or set of hosts -- that is, exclusive access is exclusive
539*0Sstevel@tonic-gate 	 * access for that machine, not for the given request.
540*0Sstevel@tonic-gate 	 */
541*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_hlock_lock);
542*0Sstevel@tonic-gate 
543*0Sstevel@tonic-gate 	while (cn->cn_hlockstate == CN_HPENDING ||
544*0Sstevel@tonic-gate 	    cn->cn_hlockstate == CN_HRDLOCKED && locktype == F_WRLCK) {
545*0Sstevel@tonic-gate 		if (nonblock) {
546*0Sstevel@tonic-gate 			(void) mutex_unlock(&cn->cn_hlock_lock);
547*0Sstevel@tonic-gate 			return (DSVC_BUSY);
548*0Sstevel@tonic-gate 		}
549*0Sstevel@tonic-gate 
550*0Sstevel@tonic-gate 		if (cond_wait(&cn->cn_hlockcv, &cn->cn_hlock_lock) != 0) {
551*0Sstevel@tonic-gate 			(void) mutex_unlock(&cn->cn_hlock_lock);
552*0Sstevel@tonic-gate 			return (DSVC_SYNCH_ERR);
553*0Sstevel@tonic-gate 		}
554*0Sstevel@tonic-gate 	}
555*0Sstevel@tonic-gate 
556*0Sstevel@tonic-gate 	if (cn->cn_hlockstate == CN_HRDLOCKED ||
557*0Sstevel@tonic-gate 	    cn->cn_hlockstate == CN_HWRLOCKED) {
558*0Sstevel@tonic-gate 		/*
559*0Sstevel@tonic-gate 		 * Already locked; just bump the held lock count.
560*0Sstevel@tonic-gate 		 */
561*0Sstevel@tonic-gate 		assert(cn->cn_hlockcount > 0);
562*0Sstevel@tonic-gate 		cn->cn_hlockcount++;
563*0Sstevel@tonic-gate 		(void) mutex_unlock(&cn->cn_hlock_lock);
564*0Sstevel@tonic-gate 		return (DSVC_SUCCESS);
565*0Sstevel@tonic-gate 	}
566*0Sstevel@tonic-gate 
567*0Sstevel@tonic-gate 	/*
568*0Sstevel@tonic-gate 	 * We're the thread that's going to try to acquire the host lock.
569*0Sstevel@tonic-gate 	 */
570*0Sstevel@tonic-gate 
571*0Sstevel@tonic-gate 	assert(cn->cn_hlockcount == 0);
572*0Sstevel@tonic-gate 
573*0Sstevel@tonic-gate 	/*
574*0Sstevel@tonic-gate 	 * Create the lock file as a hidden file in the directory named by
575*0Sstevel@tonic-gate 	 * cn_id.  So if cn_id is /var/dhcp/SUNWfiles1_dhcptab, we want the
576*0Sstevel@tonic-gate 	 * lock file to be /var/dhcp/.SUNWfiles1_dhcptab.lock.  Please, no
577*0Sstevel@tonic-gate 	 * giggles about the snprintf().
578*0Sstevel@tonic-gate 	 */
579*0Sstevel@tonic-gate 	basename = strrchr(cn->cn_id, '/');
580*0Sstevel@tonic-gate 	if (basename == NULL)
581*0Sstevel@tonic-gate 		basename = cn->cn_id;
582*0Sstevel@tonic-gate 	else
583*0Sstevel@tonic-gate 		basename++;
584*0Sstevel@tonic-gate 
585*0Sstevel@tonic-gate 	(void) snprintf(lockpath, MAXPATHLEN, "%.*s.%s.lock",
586*0Sstevel@tonic-gate 	    basename - cn->cn_id, cn->cn_id, basename);
587*0Sstevel@tonic-gate 	fd = open(lockpath, O_RDWR|O_CREAT, 0600);
588*0Sstevel@tonic-gate 	if (fd == -1) {
589*0Sstevel@tonic-gate 		(void) mutex_unlock(&cn->cn_hlock_lock);
590*0Sstevel@tonic-gate 		return (DSVC_SYNCH_ERR);
591*0Sstevel@tonic-gate 	}
592*0Sstevel@tonic-gate 
593*0Sstevel@tonic-gate 	cn->cn_hlockstate = CN_HPENDING;
594*0Sstevel@tonic-gate 	(void) mutex_unlock(&cn->cn_hlock_lock);
595*0Sstevel@tonic-gate 
596*0Sstevel@tonic-gate 	flock.l_len	= 0;
597*0Sstevel@tonic-gate 	flock.l_type	= locktype;
598*0Sstevel@tonic-gate 	flock.l_start	= 0;
599*0Sstevel@tonic-gate 	flock.l_whence	= SEEK_SET;
600*0Sstevel@tonic-gate 
601*0Sstevel@tonic-gate 	if (fcntl(fd, nonblock ? F_SETLK : F_SETLKW, &flock) == -1) {
602*0Sstevel@tonic-gate 		/*
603*0Sstevel@tonic-gate 		 * For some reason we couldn't acquire the lock.  Reset the
604*0Sstevel@tonic-gate 		 * host lock state to "unlocked" and signal another thread
605*0Sstevel@tonic-gate 		 * (if there's one waiting) to pick up where we left off.
606*0Sstevel@tonic-gate 		 */
607*0Sstevel@tonic-gate 		error = errno;
608*0Sstevel@tonic-gate 		(void) mutex_lock(&cn->cn_hlock_lock);
609*0Sstevel@tonic-gate 		cn->cn_hlockstate = CN_HUNLOCKED;
610*0Sstevel@tonic-gate 		(void) cond_signal(&cn->cn_hlockcv);
611*0Sstevel@tonic-gate 		(void) mutex_unlock(&cn->cn_hlock_lock);
612*0Sstevel@tonic-gate 		(void) close(fd);
613*0Sstevel@tonic-gate 		return (error == EAGAIN ? DSVC_BUSY : DSVC_SYNCH_ERR);
614*0Sstevel@tonic-gate 	}
615*0Sstevel@tonic-gate 
616*0Sstevel@tonic-gate 	/*
617*0Sstevel@tonic-gate 	 * Got the lock; wake up all the waiters since they can all succeed
618*0Sstevel@tonic-gate 	 */
619*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_hlock_lock);
620*0Sstevel@tonic-gate 	cn->cn_hlockstate = (locktype == F_WRLCK ? CN_HWRLOCKED : CN_HRDLOCKED);
621*0Sstevel@tonic-gate 	cn->cn_hlockcount++;
622*0Sstevel@tonic-gate 	cn->cn_hlockfd = fd;
623*0Sstevel@tonic-gate 	(void) cond_broadcast(&cn->cn_hlockcv);
624*0Sstevel@tonic-gate 	(void) mutex_unlock(&cn->cn_hlock_lock);
625*0Sstevel@tonic-gate 
626*0Sstevel@tonic-gate 	return (DSVC_SUCCESS);
627*0Sstevel@tonic-gate }
628*0Sstevel@tonic-gate 
629*0Sstevel@tonic-gate /*
630*0Sstevel@tonic-gate  * Unlock a checked out instance of a shared or exclusive lock on container
631*0Sstevel@tonic-gate  * `cn'; if the number of checked out instances goes to zero, then the host
632*0Sstevel@tonic-gate  * lock is unlocked so that other hosts may compete for it.
633*0Sstevel@tonic-gate  */
634*0Sstevel@tonic-gate static int
host_unlock(dsvcd_container_t * cn)635*0Sstevel@tonic-gate host_unlock(dsvcd_container_t *cn)
636*0Sstevel@tonic-gate {
637*0Sstevel@tonic-gate 	struct flock	flock;
638*0Sstevel@tonic-gate 
639*0Sstevel@tonic-gate 	if (!cn->cn_crosshost)
640*0Sstevel@tonic-gate 		return (DSVC_SUCCESS);
641*0Sstevel@tonic-gate 
642*0Sstevel@tonic-gate 	assert(cn->cn_hlockcount > 0);
643*0Sstevel@tonic-gate 
644*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_hlock_lock);
645*0Sstevel@tonic-gate 	if (cn->cn_hlockcount > 1) {
646*0Sstevel@tonic-gate 		/*
647*0Sstevel@tonic-gate 		 * Not the last unlock by this host; just decrement the
648*0Sstevel@tonic-gate 		 * held lock count.
649*0Sstevel@tonic-gate 		 */
650*0Sstevel@tonic-gate 		cn->cn_hlockcount--;
651*0Sstevel@tonic-gate 		(void) mutex_unlock(&cn->cn_hlock_lock);
652*0Sstevel@tonic-gate 		return (DSVC_SUCCESS);
653*0Sstevel@tonic-gate 	}
654*0Sstevel@tonic-gate 
655*0Sstevel@tonic-gate 	flock.l_len	= 0;
656*0Sstevel@tonic-gate 	flock.l_type	= F_UNLCK;
657*0Sstevel@tonic-gate 	flock.l_start	= 0;
658*0Sstevel@tonic-gate 	flock.l_whence	= SEEK_SET;
659*0Sstevel@tonic-gate 
660*0Sstevel@tonic-gate 	if (fcntl(cn->cn_hlockfd, F_SETLK, &flock) == -1) {
661*0Sstevel@tonic-gate 		(void) mutex_unlock(&cn->cn_hlock_lock);
662*0Sstevel@tonic-gate 		return (DSVC_SYNCH_ERR);
663*0Sstevel@tonic-gate 	}
664*0Sstevel@tonic-gate 
665*0Sstevel@tonic-gate 	/*
666*0Sstevel@tonic-gate 	 * Note that we don't unlink the lockfile for a number of reasons,
667*0Sstevel@tonic-gate 	 * the most blatant reason being:
668*0Sstevel@tonic-gate 	 *
669*0Sstevel@tonic-gate 	 *	1. Several hosts lock the lockfile for shared access.
670*0Sstevel@tonic-gate 	 *	2. One host unlocks the lockfile and unlinks it (here).
671*0Sstevel@tonic-gate 	 *	3. Another host comes in, goes to exclusively lock the
672*0Sstevel@tonic-gate 	 *	   lockfile, finds no lockfile, and creates a new one
673*0Sstevel@tonic-gate 	 *	   (meanwhile, the other hosts are still accessing the
674*0Sstevel@tonic-gate 	 *	   container through the unlinked lockfile).
675*0Sstevel@tonic-gate 	 *
676*0Sstevel@tonic-gate 	 * We could put in some hairy code to try to unlink lockfiles
677*0Sstevel@tonic-gate 	 * elsewhere (when possible), but it hardly seems worth it since
678*0Sstevel@tonic-gate 	 * inodes are cheap.
679*0Sstevel@tonic-gate 	 */
680*0Sstevel@tonic-gate 
681*0Sstevel@tonic-gate 	(void) close(cn->cn_hlockfd);
682*0Sstevel@tonic-gate 	cn->cn_hlockcount = 0;
683*0Sstevel@tonic-gate 	cn->cn_hlockstate = CN_HUNLOCKED;
684*0Sstevel@tonic-gate 	/*
685*0Sstevel@tonic-gate 	 * We need to signal `cn_hlockcv' in case there are threads which
686*0Sstevel@tonic-gate 	 * are waiting on it to attempt flock() exclusive access (see the
687*0Sstevel@tonic-gate 	 * comments in host_lock() for more details about this case).
688*0Sstevel@tonic-gate 	 */
689*0Sstevel@tonic-gate 	(void) cond_signal(&cn->cn_hlockcv);
690*0Sstevel@tonic-gate 	(void) mutex_unlock(&cn->cn_hlock_lock);
691*0Sstevel@tonic-gate 
692*0Sstevel@tonic-gate 	return (DSVC_SUCCESS);
693*0Sstevel@tonic-gate }
694*0Sstevel@tonic-gate 
695*0Sstevel@tonic-gate /*
696*0Sstevel@tonic-gate  * Return the number of locks currently held for container `cn'.
697*0Sstevel@tonic-gate  */
698*0Sstevel@tonic-gate static unsigned int
cn_nlocks(dsvcd_container_t * cn)699*0Sstevel@tonic-gate cn_nlocks(dsvcd_container_t *cn)
700*0Sstevel@tonic-gate {
701*0Sstevel@tonic-gate 	unsigned int nlocks;
702*0Sstevel@tonic-gate 
703*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_nholds_lock);
704*0Sstevel@tonic-gate 	(void) mutex_lock(&cn->cn_hlock_lock);
705*0Sstevel@tonic-gate 
706*0Sstevel@tonic-gate 	switch (cn->cn_nholds) {
707*0Sstevel@tonic-gate 	case 0:
708*0Sstevel@tonic-gate 		nlocks = cn->cn_hlockcount;
709*0Sstevel@tonic-gate 		break;
710*0Sstevel@tonic-gate 	case -1:
711*0Sstevel@tonic-gate 		nlocks = 1;
712*0Sstevel@tonic-gate 		break;
713*0Sstevel@tonic-gate 	default:
714*0Sstevel@tonic-gate 		nlocks = cn->cn_nholds;
715*0Sstevel@tonic-gate 		break;
716*0Sstevel@tonic-gate 	}
717*0Sstevel@tonic-gate 
718*0Sstevel@tonic-gate 	dhcpmsg(MSG_DEBUG, "cn_nlocks: nholds=%d hlockstate=%d hlockcount=%d",
719*0Sstevel@tonic-gate 	    cn->cn_nholds, cn->cn_hlockstate, cn->cn_hlockcount);
720*0Sstevel@tonic-gate 
721*0Sstevel@tonic-gate 	(void) mutex_unlock(&cn->cn_hlock_lock);
722*0Sstevel@tonic-gate 	(void) mutex_unlock(&cn->cn_nholds_lock);
723*0Sstevel@tonic-gate 
724*0Sstevel@tonic-gate 	return (nlocks);
725*0Sstevel@tonic-gate }
726