xref: /onnv-gate/usr/src/cmd/cmd-inet/usr.lib/dsvclockd/container.c (revision 0:68f95e015346)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2000-2001 by Sun Microsystems, Inc.
24  * All rights reserved.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <synch.h>
31 #include <assert.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <stdio.h>
35 #include <fcntl.h>
36 #include <errno.h>
37 #include <dhcpmsg.h>
38 #include <unistd.h>
39 #include <dhcp_svc_private.h>
40 
41 #include "container.h"
42 
43 /*
44  * Container locking code -- warning: serious pain ahead.
45  *
46  * This code synchronizes access to a given container across multiple
47  * threads in this (dsvclockd) process, and optionally synchronizes across
48  * multiple instances of dsvclockd running on different hosts.  The
49  * synchronization allows multiple readers or a single writer at one time.
50  *
51  * Since by definition there is at most one dsvclockd running per host and
52  * all requests by all threads in all processes running on that host funnel
53  * into it, this code effectively synchronizes access to a given container
54  * across all threads in all processes running on a given host.  This means
55  * that the optional synchronization across multiple instances of dsvclockd
56  * on different hosts provides true cross-host synchronization for all
57  * threads in all processes on all cooperating machines (though all hosts
58  * must have write access to a common directory).
59  *
60  * The container synchronization here should be viewed as a two step
61  * process, where the first step is optional:
62  *
63  *	1. Synchronize access across the set of cooperating dsvclockd's
64  *	   on multiple hosts.  This is known as acquiring the host lock.
65  *
66  *	2. Synchronize access across the set of threads running inside
67  *	   this dsvclockd process.  This is known as acquiring the
68  *	   intra-process lock.
69  *
70  * In order to implement the first (host lock) step, we use fcntl()-based
71  * file locking on a file inside an NFS-shared directory and rely on NFS to
72  * do our synchronization for us.  Note that this can only be used to
73  * implement the first step since fcntl()-based locks are process locks,
74  * and the effects of using these locks with multiple threads are not
75  * defined.  Furthermore, note that this means it requires some fancy
76  * footwork to ensure that only one thread in a given dsvclockd process
77  * tries to acquire the fcntl() lock for that process.
78  *
79  * In order to implement the second step, we use custom-made reader-writer
80  * locks since the stock Solaris ones don't quite have the semantics we
81  * need -- in particular, we need to relax the requirement that the thread
82  * which acquired the lock is the one releasing it.
83  *
84  * Lock ordering guidelines:
85  *
86  * For the most part, this code does not acquire more than one container
87  * lock at a time -- whenever feasible, please do the same.  If you must
88  * acquire more than one lock at a time, the correct order is:
89  *
90  *	1. cn_nholds_lock
91  *	2. cn_lock
92  *	3. cn_hlock_lock
93  */
94 
95 static int host_lock(dsvcd_container_t *, int, boolean_t);
96 static int host_unlock(dsvcd_container_t *);
97 static unsigned int cn_nlocks(dsvcd_container_t *);
98 
99 /*
100  * Create a container identified by `cn_id'; returns an instance of the new
101  * container upon success, or NULL on failure.  Note that `cn_id' is
102  * treated as a pathname and thus must be a unique name for the container
103  * across all containers, container versions, and datastores -- additionally,
104  * if `crosshost' is set, then the directory named by `cn_id' must be a
105  * directory mounted on all cooperating hosts.
106  */
107 dsvcd_container_t *
cn_create(const char * cn_id,boolean_t crosshost)108 cn_create(const char *cn_id, boolean_t crosshost)
109 {
110 	dsvcd_container_t *cn;
111 
112 	dhcpmsg(MSG_VERBOSE, "creating %scontainer synchpoint `%s'", crosshost ?
113 	    "crosshost " : "", cn_id);
114 
115 	cn = calloc(1, sizeof (dsvcd_container_t));
116 	if (cn == NULL)
117 		return (NULL);
118 
119 	cn->cn_id = strdup(cn_id);
120 	if (cn->cn_id == NULL) {
121 		free(cn);
122 		return (NULL);
123 	}
124 
125 	(void) mutex_init(&cn->cn_lock, USYNC_THREAD, NULL);
126 	(void) mutex_init(&cn->cn_hlock_lock, USYNC_THREAD, NULL);
127 	(void) mutex_init(&cn->cn_nholds_lock, USYNC_THREAD, NULL);
128 
129 	(void) cond_init(&cn->cn_hlockcv, USYNC_THREAD, NULL);
130 
131 	cn->cn_whead	  = NULL;
132 	cn->cn_wtail	  = NULL;
133 	cn->cn_nholds	  = 0;
134 	cn->cn_closing	  = B_FALSE;
135 	cn->cn_crosshost  = crosshost;
136 	cn->cn_hlockstate = CN_HUNLOCKED;
137 	cn->cn_hlockcount = 0;
138 
139 	return (cn);
140 }
141 
142 /*
143  * Destroy container `cn'; wait a decent amount of time for activity on the
144  * container to quiesce first.  If the caller has not prohibited other
145  * threads from calling into the container yet, this may take a long time.
146  */
147 void
cn_destroy(dsvcd_container_t * cn)148 cn_destroy(dsvcd_container_t *cn)
149 {
150 	unsigned int	attempts;
151 	unsigned int	nstalelocks;
152 
153 	dhcpmsg(MSG_VERBOSE, "destroying container synchpoint `%s'", cn->cn_id);
154 
155 	(void) mutex_lock(&cn->cn_lock);
156 	cn->cn_closing = B_TRUE;
157 	(void) mutex_unlock(&cn->cn_lock);
158 
159 	/*
160 	 * Wait for up to CN_DESTROY_WAIT seconds for all the lock holders
161 	 * to relinquish their locks.  If the container has locks that seem
162 	 * to be stale, then warn the user before destroying it.  The locks
163 	 * will be unlocked automatically when we exit.
164 	 */
165 	for (attempts = 0; attempts < CN_DESTROY_WAIT; attempts++) {
166 		nstalelocks = cn_nlocks(cn);
167 		if (nstalelocks == 0)
168 			break;
169 
170 		(void) sleep(1);
171 	}
172 
173 	if (nstalelocks == 1) {
174 		dhcpmsg(MSG_WARNING, "unlocking stale lock on "
175 		    "container `%s'", cn->cn_id);
176 	} else if (nstalelocks != 0) {
177 		dhcpmsg(MSG_WARNING, "unlocking %d stale locks on "
178 		    "container `%s'", nstalelocks, cn->cn_id);
179 	}
180 
181 	(void) cond_destroy(&cn->cn_hlockcv);
182 	(void) mutex_destroy(&cn->cn_nholds_lock);
183 	(void) mutex_destroy(&cn->cn_hlock_lock);
184 	(void) mutex_destroy(&cn->cn_lock);
185 
186 	free(cn->cn_id);
187 	free(cn);
188 }
189 
190 /*
191  * Wait (block) until a lock of type `locktype' is obtained on container
192  * `cn'.  Returns a DSVC_* return code; if DSVC_SUCCESS is returned, then
193  * the lock is held upon return.  Must be called with the container's
194  * cn_nholds_lock held on entry; returns with it unlocked.
195  */
196 static int
cn_wait_for_lock(dsvcd_container_t * cn,dsvcd_locktype_t locktype)197 cn_wait_for_lock(dsvcd_container_t *cn, dsvcd_locktype_t locktype)
198 {
199 	dsvcd_waitlist_t	waititem;
200 	int			retval = DSVC_SUCCESS;
201 
202 	assert(MUTEX_HELD(&cn->cn_nholds_lock));
203 	assert(cn->cn_nholds != 0);
204 
205 	waititem.wl_next = NULL;
206 	waititem.wl_prev = NULL;
207 	waititem.wl_locktype = locktype;
208 	(void) cond_init(&waititem.wl_cv, USYNC_THREAD, NULL);
209 
210 	/*
211 	 * Chain our stack-local waititem onto the list; this keeps us from
212 	 * having to worry about allocation failures and also makes it easy
213 	 * for cn_unlock() to just pull us off the list without worrying
214 	 * about freeing the memory.
215 	 *
216 	 * Note that we can do this because by definition we are blocked in
217 	 * this function until we are signalled.
218 	 */
219 	if (cn->cn_whead != NULL) {
220 		waititem.wl_prev = cn->cn_wtail;
221 		cn->cn_wtail->wl_next = &waititem;
222 		cn->cn_wtail = &waititem;
223 	} else {
224 		cn->cn_whead = &waititem;
225 		cn->cn_wtail = &waititem;
226 	}
227 
228 	do {
229 		if (cond_wait(&waititem.wl_cv, &cn->cn_nholds_lock) != 0) {
230 			dhcpmsg(MSG_DEBUG, "cn_wait_for_lock: cond_wait error");
231 			retval = DSVC_INTERNAL;
232 			break;
233 		}
234 	} while ((locktype == DSVCD_RDLOCK && cn->cn_nholds == -1) ||
235 	    (locktype == DSVCD_WRLOCK && cn->cn_nholds != 0));
236 
237 	(void) cond_destroy(&waititem.wl_cv);
238 
239 	assert(MUTEX_HELD(&cn->cn_nholds_lock));
240 
241 	/*
242 	 * We got woken up; pull ourselves off of the local waitlist.
243 	 */
244 	if (waititem.wl_prev != NULL)
245 		waititem.wl_prev->wl_next = waititem.wl_next;
246 	else
247 		cn->cn_whead = waititem.wl_next;
248 
249 	if (waititem.wl_next != NULL)
250 		waititem.wl_next->wl_prev = waititem.wl_prev;
251 	else
252 		cn->cn_wtail = waititem.wl_prev;
253 
254 	if (retval == DSVC_SUCCESS) {
255 		if (locktype == DSVCD_WRLOCK)
256 			cn->cn_nholds = -1;
257 		else
258 			cn->cn_nholds++;
259 	}
260 
261 	/*
262 	 * If we just acquired a read lock and the next waiter is waiting
263 	 * for a readlock too, signal the waiter.  Note that we wake each
264 	 * reader up one-by-one like this to avoid excessive contention on
265 	 * cn_nholds_lock.
266 	 */
267 	if (locktype == DSVCD_RDLOCK && cn->cn_whead != NULL &&
268 	    cn->cn_whead->wl_locktype == DSVCD_RDLOCK)
269 		(void) cond_signal(&cn->cn_whead->wl_cv);
270 
271 	(void) mutex_unlock(&cn->cn_nholds_lock);
272 	return (retval);
273 }
274 
275 /*
276  * Lock container `cn' for reader (shared) access.  If the container cannot
277  * be locked immediately (there is currently a writer lock held or a writer
278  * lock waiting for the lock), then if `nonblock' is B_TRUE, DSVC_BUSY is
279  * returned.  Otherwise, block until the lock can be obtained.  Returns a
280  * DSVC_* code.
281  */
282 int
cn_rdlock(dsvcd_container_t * cn,boolean_t nonblock)283 cn_rdlock(dsvcd_container_t *cn, boolean_t nonblock)
284 {
285 	int	retval;
286 
287 	/*
288 	 * The container is going away; no new lock requests.
289 	 */
290 	(void) mutex_lock(&cn->cn_lock);
291 	if (cn->cn_closing) {
292 		(void) mutex_unlock(&cn->cn_lock);
293 		return (DSVC_SYNCH_ERR);
294 	}
295 	(void) mutex_unlock(&cn->cn_lock);
296 
297 	/*
298 	 * See if we can grab the lock without having to block; only
299 	 * possible if we can acquire the host lock without blocking, if
300 	 * the lock is not currently owned by a writer and if there are no
301 	 * writers currently enqueued for accessing this lock (we know that
302 	 * if there's a waiter it must be a writer since this code doesn't
303 	 * enqueue readers until there's a writer enqueued).  We enqueue
304 	 * these requests to improve fairness.
305 	 */
306 	(void) mutex_lock(&cn->cn_nholds_lock);
307 
308 	if (cn->cn_nholds != -1 && cn->cn_whead == NULL &&
309 	    host_lock(cn, F_RDLCK, B_TRUE) == DSVC_SUCCESS) {
310 		cn->cn_nholds++;
311 		(void) mutex_unlock(&cn->cn_nholds_lock);
312 		return (DSVC_SUCCESS);
313 	}
314 
315 	(void) mutex_unlock(&cn->cn_nholds_lock);
316 
317 	/*
318 	 * Cannot grab the lock without blocking somewhere; wait until we
319 	 * can grab the host lock, then with that lock held obtain our
320 	 * intra-process lock.
321 	 */
322 	if (nonblock)
323 		return (DSVC_BUSY);
324 	retval = host_lock(cn, F_RDLCK, B_FALSE);
325 	if (retval != DSVC_SUCCESS)
326 		return (retval);
327 
328 	/*
329 	 * We've got the read lock; if there aren't any writers currently
330 	 * contending for our intra-process lock then succeed immediately.
331 	 * It's possible for there to be waiters but for nholds to be zero
332 	 * via the following scenario:
333 	 *
334 	 *	1. The last holder of a lock unlocks, dropping nholds to
335 	 *	   zero and signaling the head waiter on the waitlist.
336 	 *
337 	 *	2. The last holder drops cn_nholds_lock.
338 	 *
339 	 *	3. We acquire cn_nholds_lock before the signaled waiter
340 	 *	   does.
341 	 *
342 	 * Note that this case won't cause a deadlock even if we didn't
343 	 * check for it here (when the waiter finally gets cn_nholds_lock,
344 	 * it'll find that the waitlist is once again non-NULL, and signal
345 	 * the us).  However, as an optimization, handle the case here.
346 	 */
347 	(void) mutex_lock(&cn->cn_nholds_lock);
348 	if (cn->cn_nholds != -1 &&
349 	    (cn->cn_whead == NULL || cn->cn_nholds == 0)) {
350 		cn->cn_nholds++;
351 		(void) mutex_unlock(&cn->cn_nholds_lock);
352 		return (DSVC_SUCCESS);
353 	}
354 
355 	/* cn_wait_for_lock() will drop cn_nholds_lock */
356 	retval = cn_wait_for_lock(cn, DSVCD_RDLOCK);
357 	if (retval != DSVC_SUCCESS) {
358 		(void) host_unlock(cn);
359 		return (retval);
360 	}
361 	return (DSVC_SUCCESS);
362 }
363 
364 /*
365  * Lock container `cn' for writer (exclusive) access.  If the container
366  * cannot be locked immediately (there are currently readers or a writer),
367  * then if `nonblock' is B_TRUE, DSVC_BUSY is returned.  Otherwise, block
368  * until the lock can be obtained.  Returns a DSVC_* code.
369  */
370 int
cn_wrlock(dsvcd_container_t * cn,boolean_t nonblock)371 cn_wrlock(dsvcd_container_t *cn, boolean_t nonblock)
372 {
373 	int	retval;
374 
375 	/*
376 	 * The container is going away; no new lock requests.
377 	 */
378 	(void) mutex_lock(&cn->cn_lock);
379 	if (cn->cn_closing) {
380 		(void) mutex_unlock(&cn->cn_lock);
381 		return (DSVC_SYNCH_ERR);
382 	}
383 	(void) mutex_unlock(&cn->cn_lock);
384 
385 	/*
386 	 * See if we can grab the lock without having to block; only
387 	 * possible if there are no current writers within our process and
388 	 * that we can immediately acquire the host lock.
389 	 */
390 	(void) mutex_lock(&cn->cn_nholds_lock);
391 
392 	if (cn->cn_nholds == 0 &&
393 	    host_lock(cn, F_WRLCK, B_TRUE) == DSVC_SUCCESS) {
394 		cn->cn_nholds = -1;
395 		(void) mutex_unlock(&cn->cn_nholds_lock);
396 		return (DSVC_SUCCESS);
397 	}
398 
399 	(void) mutex_unlock(&cn->cn_nholds_lock);
400 
401 	/*
402 	 * Cannot grab the lock without blocking somewhere; wait until we
403 	 * can grab the host lock, then with that lock held obtain our
404 	 * intra-process lock.
405 	 */
406 	if (nonblock)
407 		return (DSVC_BUSY);
408 	retval = host_lock(cn, F_WRLCK, B_FALSE);
409 	if (retval != DSVC_SUCCESS)
410 		return (retval);
411 
412 	/*
413 	 * We've got the host lock; if there aren't any writers currently
414 	 * contending for our intra-process lock then succeed immediately.
415 	 */
416 	(void) mutex_lock(&cn->cn_nholds_lock);
417 	if (cn->cn_nholds == 0) {
418 		cn->cn_nholds = -1;
419 		(void) mutex_unlock(&cn->cn_nholds_lock);
420 		return (DSVC_SUCCESS);
421 	}
422 
423 	/* cn_wait_for_lock() will drop cn_nholds_lock */
424 	retval = cn_wait_for_lock(cn, DSVCD_WRLOCK);
425 	if (retval != DSVC_SUCCESS) {
426 		(void) host_unlock(cn);
427 		return (retval);
428 	}
429 	return (DSVC_SUCCESS);
430 }
431 
432 /*
433  * Unlock reader or writer lock on container `cn'; returns a DSVC_* code
434  */
435 int
cn_unlock(dsvcd_container_t * cn)436 cn_unlock(dsvcd_container_t *cn)
437 {
438 	(void) mutex_lock(&cn->cn_nholds_lock);
439 
440 	if (cn->cn_nholds == 0) {
441 		(void) mutex_unlock(&cn->cn_nholds_lock);
442 		return (DSVC_SYNCH_ERR);
443 	}
444 
445 	if (cn->cn_nholds != -1 && cn->cn_nholds != 1) {
446 		cn->cn_nholds--;
447 		(void) host_unlock(cn);
448 		(void) mutex_unlock(&cn->cn_nholds_lock);
449 		return (DSVC_SUCCESS);
450 	}
451 
452 	/*
453 	 * The last reader or a writer just unlocked -- signal the first
454 	 * waiter.  To avoid a thundering herd, we only signal the first
455 	 * waiter, even if there are multiple readers ready to go --
456 	 * instead, each reader is responsible for signaling the next
457 	 * in cn_wait_for_lock().
458 	 */
459 	cn->cn_nholds = 0;
460 	if (cn->cn_whead != NULL)
461 		(void) cond_signal(&cn->cn_whead->wl_cv);
462 
463 	(void) host_unlock(cn);
464 	(void) mutex_unlock(&cn->cn_nholds_lock);
465 
466 	return (DSVC_SUCCESS);
467 }
468 
469 /*
470  * Find out what kind of lock is on `cn'.  Note that this is just a
471  * snapshot in time and without additional locks the answer may be invalid
472  * by the time the function returns.
473  */
474 dsvcd_locktype_t
cn_locktype(dsvcd_container_t * cn)475 cn_locktype(dsvcd_container_t *cn)
476 {
477 	int nholds;
478 
479 	(void) mutex_lock(&cn->cn_nholds_lock);
480 	nholds = cn->cn_nholds;
481 	(void) mutex_unlock(&cn->cn_nholds_lock);
482 
483 	if (nholds == 0)
484 		return (DSVCD_NOLOCK);
485 	else if (nholds > 0)
486 		return (DSVCD_RDLOCK);
487 	else
488 		return (DSVCD_WRLOCK);
489 }
490 
491 /*
492  * Obtain a lock of type `locktype' on container `cn' such that we have
493  * shared or exclusive access to this container across all hosts.  If
494  * `nonblock' is true and the lock cannot be obtained return DSVC_BUSY.  If
495  * the lock is already held, the number of instances of the lock "checked
496  * out" by this host is incremented.
497  */
498 static int
host_lock(dsvcd_container_t * cn,int locktype,boolean_t nonblock)499 host_lock(dsvcd_container_t *cn, int locktype, boolean_t nonblock)
500 {
501 	struct flock	flock;
502 	int		fd;
503 	char		*basename, lockpath[MAXPATHLEN];
504 	int		error;
505 
506 	if (!cn->cn_crosshost)
507 		return (DSVC_SUCCESS);
508 
509 	/*
510 	 * Before we wait for a while, see if the container is going away;
511 	 * if so, fail now so the container can drain quicker..
512 	 */
513 	(void) mutex_lock(&cn->cn_lock);
514 	if (cn->cn_closing) {
515 		(void) mutex_unlock(&cn->cn_lock);
516 		return (DSVC_SYNCH_ERR);
517 	}
518 	(void) mutex_unlock(&cn->cn_lock);
519 
520 	/*
521 	 * Note that we only wait if (1) there's already a thread trying to
522 	 * grab the host lock on our host or if (2) this host currently
523 	 * holds a host shared lock and we need an exclusive lock.  Note
524 	 * that we do *not* wait in the following situations:
525 	 *
526 	 *	* This host holds an exclusive host lock and another
527 	 *	  exclusive host lock request comes in.  We rely on the
528 	 *	  intra-process lock to do the synchronization.
529 	 *
530 	 *	* This host holds an exclusive host lock and a shared host
531 	 *	  lock request comes in.  Since this host already has
532 	 *	  exclusive access, we already implicitly hold the shared
533 	 *	  host lock as far as this host is concerned, so just rely
534 	 *	  on the intra-process lock to do the synchronization.
535 	 *
536 	 * These semantics make sense as long as one remembers that the
537 	 * host lock merely provides exclusive or shared access for a given
538 	 * host or set of hosts -- that is, exclusive access is exclusive
539 	 * access for that machine, not for the given request.
540 	 */
541 	(void) mutex_lock(&cn->cn_hlock_lock);
542 
543 	while (cn->cn_hlockstate == CN_HPENDING ||
544 	    cn->cn_hlockstate == CN_HRDLOCKED && locktype == F_WRLCK) {
545 		if (nonblock) {
546 			(void) mutex_unlock(&cn->cn_hlock_lock);
547 			return (DSVC_BUSY);
548 		}
549 
550 		if (cond_wait(&cn->cn_hlockcv, &cn->cn_hlock_lock) != 0) {
551 			(void) mutex_unlock(&cn->cn_hlock_lock);
552 			return (DSVC_SYNCH_ERR);
553 		}
554 	}
555 
556 	if (cn->cn_hlockstate == CN_HRDLOCKED ||
557 	    cn->cn_hlockstate == CN_HWRLOCKED) {
558 		/*
559 		 * Already locked; just bump the held lock count.
560 		 */
561 		assert(cn->cn_hlockcount > 0);
562 		cn->cn_hlockcount++;
563 		(void) mutex_unlock(&cn->cn_hlock_lock);
564 		return (DSVC_SUCCESS);
565 	}
566 
567 	/*
568 	 * We're the thread that's going to try to acquire the host lock.
569 	 */
570 
571 	assert(cn->cn_hlockcount == 0);
572 
573 	/*
574 	 * Create the lock file as a hidden file in the directory named by
575 	 * cn_id.  So if cn_id is /var/dhcp/SUNWfiles1_dhcptab, we want the
576 	 * lock file to be /var/dhcp/.SUNWfiles1_dhcptab.lock.  Please, no
577 	 * giggles about the snprintf().
578 	 */
579 	basename = strrchr(cn->cn_id, '/');
580 	if (basename == NULL)
581 		basename = cn->cn_id;
582 	else
583 		basename++;
584 
585 	(void) snprintf(lockpath, MAXPATHLEN, "%.*s.%s.lock",
586 	    basename - cn->cn_id, cn->cn_id, basename);
587 	fd = open(lockpath, O_RDWR|O_CREAT, 0600);
588 	if (fd == -1) {
589 		(void) mutex_unlock(&cn->cn_hlock_lock);
590 		return (DSVC_SYNCH_ERR);
591 	}
592 
593 	cn->cn_hlockstate = CN_HPENDING;
594 	(void) mutex_unlock(&cn->cn_hlock_lock);
595 
596 	flock.l_len	= 0;
597 	flock.l_type	= locktype;
598 	flock.l_start	= 0;
599 	flock.l_whence	= SEEK_SET;
600 
601 	if (fcntl(fd, nonblock ? F_SETLK : F_SETLKW, &flock) == -1) {
602 		/*
603 		 * For some reason we couldn't acquire the lock.  Reset the
604 		 * host lock state to "unlocked" and signal another thread
605 		 * (if there's one waiting) to pick up where we left off.
606 		 */
607 		error = errno;
608 		(void) mutex_lock(&cn->cn_hlock_lock);
609 		cn->cn_hlockstate = CN_HUNLOCKED;
610 		(void) cond_signal(&cn->cn_hlockcv);
611 		(void) mutex_unlock(&cn->cn_hlock_lock);
612 		(void) close(fd);
613 		return (error == EAGAIN ? DSVC_BUSY : DSVC_SYNCH_ERR);
614 	}
615 
616 	/*
617 	 * Got the lock; wake up all the waiters since they can all succeed
618 	 */
619 	(void) mutex_lock(&cn->cn_hlock_lock);
620 	cn->cn_hlockstate = (locktype == F_WRLCK ? CN_HWRLOCKED : CN_HRDLOCKED);
621 	cn->cn_hlockcount++;
622 	cn->cn_hlockfd = fd;
623 	(void) cond_broadcast(&cn->cn_hlockcv);
624 	(void) mutex_unlock(&cn->cn_hlock_lock);
625 
626 	return (DSVC_SUCCESS);
627 }
628 
629 /*
630  * Unlock a checked out instance of a shared or exclusive lock on container
631  * `cn'; if the number of checked out instances goes to zero, then the host
632  * lock is unlocked so that other hosts may compete for it.
633  */
634 static int
host_unlock(dsvcd_container_t * cn)635 host_unlock(dsvcd_container_t *cn)
636 {
637 	struct flock	flock;
638 
639 	if (!cn->cn_crosshost)
640 		return (DSVC_SUCCESS);
641 
642 	assert(cn->cn_hlockcount > 0);
643 
644 	(void) mutex_lock(&cn->cn_hlock_lock);
645 	if (cn->cn_hlockcount > 1) {
646 		/*
647 		 * Not the last unlock by this host; just decrement the
648 		 * held lock count.
649 		 */
650 		cn->cn_hlockcount--;
651 		(void) mutex_unlock(&cn->cn_hlock_lock);
652 		return (DSVC_SUCCESS);
653 	}
654 
655 	flock.l_len	= 0;
656 	flock.l_type	= F_UNLCK;
657 	flock.l_start	= 0;
658 	flock.l_whence	= SEEK_SET;
659 
660 	if (fcntl(cn->cn_hlockfd, F_SETLK, &flock) == -1) {
661 		(void) mutex_unlock(&cn->cn_hlock_lock);
662 		return (DSVC_SYNCH_ERR);
663 	}
664 
665 	/*
666 	 * Note that we don't unlink the lockfile for a number of reasons,
667 	 * the most blatant reason being:
668 	 *
669 	 *	1. Several hosts lock the lockfile for shared access.
670 	 *	2. One host unlocks the lockfile and unlinks it (here).
671 	 *	3. Another host comes in, goes to exclusively lock the
672 	 *	   lockfile, finds no lockfile, and creates a new one
673 	 *	   (meanwhile, the other hosts are still accessing the
674 	 *	   container through the unlinked lockfile).
675 	 *
676 	 * We could put in some hairy code to try to unlink lockfiles
677 	 * elsewhere (when possible), but it hardly seems worth it since
678 	 * inodes are cheap.
679 	 */
680 
681 	(void) close(cn->cn_hlockfd);
682 	cn->cn_hlockcount = 0;
683 	cn->cn_hlockstate = CN_HUNLOCKED;
684 	/*
685 	 * We need to signal `cn_hlockcv' in case there are threads which
686 	 * are waiting on it to attempt flock() exclusive access (see the
687 	 * comments in host_lock() for more details about this case).
688 	 */
689 	(void) cond_signal(&cn->cn_hlockcv);
690 	(void) mutex_unlock(&cn->cn_hlock_lock);
691 
692 	return (DSVC_SUCCESS);
693 }
694 
695 /*
696  * Return the number of locks currently held for container `cn'.
697  */
698 static unsigned int
cn_nlocks(dsvcd_container_t * cn)699 cn_nlocks(dsvcd_container_t *cn)
700 {
701 	unsigned int nlocks;
702 
703 	(void) mutex_lock(&cn->cn_nholds_lock);
704 	(void) mutex_lock(&cn->cn_hlock_lock);
705 
706 	switch (cn->cn_nholds) {
707 	case 0:
708 		nlocks = cn->cn_hlockcount;
709 		break;
710 	case -1:
711 		nlocks = 1;
712 		break;
713 	default:
714 		nlocks = cn->cn_nholds;
715 		break;
716 	}
717 
718 	dhcpmsg(MSG_DEBUG, "cn_nlocks: nholds=%d hlockstate=%d hlockcount=%d",
719 	    cn->cn_nholds, cn->cn_hlockstate, cn->cn_hlockcount);
720 
721 	(void) mutex_unlock(&cn->cn_hlock_lock);
722 	(void) mutex_unlock(&cn->cn_nholds_lock);
723 
724 	return (nlocks);
725 }
726