xref: /netbsd-src/sys/fs/nfs/nlm/nlm_prot_impl.c (revision a002c830eb91285acca39715ad623b8a2e676b81)
1 /*	$NetBSD: nlm_prot_impl.c,v 1.5 2023/04/28 22:31:38 andvar Exp $	*/
2 /*-
3  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
4  * Authors: Doug Rabson <dfr@rabson.org>
5  * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #ifdef _KERNEL_OPT
30 #include "opt_inet6.h"
31 #endif
32 
33 #include <sys/cdefs.h>
34 /* __FBSDID("FreeBSD: head/sys/nlm/nlm_prot_impl.c 302216 2016-06-26 20:08:42Z kib "); */
35 __RCSID("$NetBSD: nlm_prot_impl.c,v 1.5 2023/04/28 22:31:38 andvar Exp $");
36 
37 #include <sys/param.h>
38 #include <sys/fail.h>
39 #include <sys/fcntl.h>
40 #include <sys/kernel.h>
41 #include <sys/kthread.h>
42 #include <sys/lockf.h>
43 #include <sys/malloc.h>
44 #include <sys/mount.h>
45 #include <sys/proc.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/syscall.h>
49 #include <sys/sysctl.h>
50 #include <sys/sysent.h>
51 #include <sys/syslog.h>
52 #include <sys/sysproto.h>
53 #include <sys/systm.h>
54 #include <sys/taskqueue.h>
55 #include <sys/unistd.h>
56 #include <sys/vnode.h>
57 
58 #if 0
59 #if __FreeBSD_version >= 700000
60 #include <sys/priv.h>
61 #endif
62 #endif
63 
64 #include <fs/nfs/common/nfsproto.h>
65 #include <fs/nfs/common/nfs_lock.h>
66 
67 #include <fs/nfs/nlm/nlm_prot.h>
68 #include <fs/nfs/nlm/sm_inter.h>
69 #include <fs/nfs/nlm/nlm.h>
70 
71 #include <rpc/rpc_com.h>
72 #include <rpc/rpcb_prot.h>
73 
74 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager");
75 
76 /*
77  * If a host is inactive (and holds no locks) for this amount of
78  * seconds, we consider it idle and stop tracking it.
79  */
80 #define NLM_IDLE_TIMEOUT	30
81 
82 /*
83  * We check the host list for idle every few seconds.
84  */
85 #define NLM_IDLE_PERIOD		5
86 
87 /*
88  * We only look for GRANTED_RES messages for a little while.
89  */
90 #define NLM_EXPIRE_TIMEOUT	10
91 
92 /*
93  * Support for sysctl vfs.nlm.sysid
94  */
95 static SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW, NULL,
96     "Network Lock Manager");
97 static SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, CTLFLAG_RW, NULL, "");
98 
99 /*
100  * Syscall hooks
101  */
102 static int nlm_syscall_offset = SYS_nlm_syscall;
103 static struct sysent nlm_syscall_prev_sysent;
104 #if __FreeBSD_version < 700000
105 static struct sysent nlm_syscall_sysent = {
106 	(sizeof(struct nlm_syscall_args) / sizeof(register_t)) | SYF_MPSAFE,
107 	(sy_call_t *) nlm_syscall
108 };
109 #else
110 MAKE_SYSENT(nlm_syscall);
111 #endif
112 static bool_t nlm_syscall_registered = FALSE;
113 
114 /*
115  * Debug level passed in from userland. We also support a sysctl hook
116  * so that it can be changed on a live system.
117  */
118 static int nlm_debug_level;
119 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, "");
120 
121 #define NLM_DEBUG(_level, args...)			\
122 	do {						\
123 		if (nlm_debug_level >= (_level))	\
124 			log(LOG_DEBUG, args);		\
125 	} while(0)
126 #define NLM_ERR(args...)			\
127 	do {					\
128 		log(LOG_ERR, args);		\
129 	} while(0)
130 
131 /*
132  * Grace period handling. The value of nlm_grace_threshold is the
133  * value of time_uptime after which we are serving requests normally.
134  */
135 static time_t nlm_grace_threshold;
136 
137 /*
138  * We check for idle hosts if time_uptime is greater than
139  * nlm_next_idle_check,
140  */
141 static time_t nlm_next_idle_check;
142 
143 /*
144  * A flag to indicate the server is already running.
145  */
146 static int nlm_is_running;
147 
148 /*
149  * A socket to use for RPC - shared by all IPv4 RPC clients.
150  */
151 static struct socket *nlm_socket;
152 
153 #ifdef INET6
154 
155 /*
156  * A socket to use for RPC - shared by all IPv6 RPC clients.
157  */
158 static struct socket *nlm_socket6;
159 
160 #endif
161 
162 /*
163  * An RPC client handle that can be used to communicate with the local
164  * NSM.
165  */
166 static CLIENT *nlm_nsm;
167 
168 /*
169  * An AUTH handle for the server's creds.
170  */
171 static AUTH *nlm_auth;
172 
173 /*
174  * A zero timeval for sending async RPC messages.
175  */
176 struct timeval nlm_zero_tv = { 0, 0 };
177 
178 /*
179  * The local NSM state number
180  */
181 int nlm_nsm_state;
182 
183 
184 /*
185  * A lock to protect the host list and waiting lock list.
186  */
187 static struct mtx nlm_global_lock;
188 
189 /*
190  * Locks:
191  * (l)		locked by nh_lock
192  * (s)		only accessed via server RPC which is single threaded
193  * (g)		locked by nlm_global_lock
194  * (c)		const until freeing
195  * (a)		modified using atomic ops
196  */
197 
198 /*
199  * A pending client-side lock request, stored on the nlm_waiting_locks
200  * list.
201  */
202 struct nlm_waiting_lock {
203 	TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */
204 	bool_t		nw_waiting;	       /* (g) */
205 	nlm4_lock	nw_lock;	       /* (c) */
206 	union nfsfh	nw_fh;		       /* (c) */
207 	struct vnode	*nw_vp;		       /* (c) */
208 };
209 TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock);
210 
211 struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */
212 
213 /*
214  * A pending server-side asynchronous lock request, stored on the
215  * nh_pending list of the NLM host.
216  */
217 struct nlm_async_lock {
218 	TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */
219 	struct task	af_task;	/* (c) async callback details */
220 	void		*af_cookie;	/* (l) lock manager cancel token */
221 	struct vnode	*af_vp;		/* (l) vnode to lock */
222 	struct flock	af_fl;		/* (c) lock details */
223 	struct nlm_host *af_host;	/* (c) host which is locking */
224 	CLIENT		*af_rpc;	/* (c) rpc client to send message */
225 	nlm4_testargs	af_granted;	/* (c) notification details */
226 	time_t		af_expiretime;	/* (c) notification time */
227 };
228 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock);
229 
230 /*
231  * NLM host.
232  */
233 enum nlm_host_state {
234 	NLM_UNMONITORED,
235 	NLM_MONITORED,
236 	NLM_MONITOR_FAILED,
237 	NLM_RECOVERING
238 };
239 
240 struct nlm_rpc {
241 	CLIENT		*nr_client;    /* (l) RPC client handle */
242 	time_t		nr_create_time; /* (l) when client was created */
243 };
244 
245 struct nlm_host {
246 	struct mtx	nh_lock;
247 	volatile u_int	nh_refs;       /* (a) reference count */
248 	TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */
249 	char		nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */
250 	uint32_t	nh_sysid;	 /* (c) our allocated system ID */
251 	char		nh_sysid_string[10]; /* (c) string rep. of sysid */
252 	struct sockaddr_storage	nh_addr; /* (s) remote address of host */
253 	struct nlm_rpc	nh_srvrpc;	 /* (l) RPC for server replies */
254 	struct nlm_rpc	nh_clntrpc;	 /* (l) RPC for client requests */
255 	rpcvers_t	nh_vers;	 /* (s) NLM version of host */
256 	int		nh_state;	 /* (s) last seen NSM state of host */
257 	enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */
258 	time_t		nh_idle_timeout; /* (s) Time at which host is idle */
259 	struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */
260 	uint32_t	nh_grantcookie;  /* (l) grant cookie counter */
261 	struct nlm_async_lock_list nh_pending; /* (l) pending async locks */
262 	struct nlm_async_lock_list nh_granted; /* (l) granted locks */
263 	struct nlm_async_lock_list nh_finished; /* (l) finished async locks */
264 };
265 TAILQ_HEAD(nlm_host_list, nlm_host);
266 
267 static struct nlm_host_list nlm_hosts; /* (g) */
268 static uint32_t nlm_next_sysid = 1;    /* (g) */
269 
270 static void	nlm_host_unmonitor(struct nlm_host *);
271 
272 struct nlm_grantcookie {
273 	uint32_t	ng_sysid;
274 	uint32_t	ng_cookie;
275 };
276 
277 static inline uint32_t
ng_sysid(struct netobj * src)278 ng_sysid(struct netobj *src)
279 {
280 
281 	return ((struct nlm_grantcookie *)src->n_bytes)->ng_sysid;
282 }
283 
284 static inline uint32_t
ng_cookie(struct netobj * src)285 ng_cookie(struct netobj *src)
286 {
287 
288 	return ((struct nlm_grantcookie *)src->n_bytes)->ng_cookie;
289 }
290 
291 /**********************************************************************/
292 
293 /*
294  * Initialise NLM globals.
295  */
296 static void
nlm_init(void * dummy)297 nlm_init(void *dummy)
298 {
299 	int error;
300 
301 	mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF);
302 	TAILQ_INIT(&nlm_waiting_locks);
303 	TAILQ_INIT(&nlm_hosts);
304 
305 	error = syscall_register(&nlm_syscall_offset, &nlm_syscall_sysent,
306 	    &nlm_syscall_prev_sysent, SY_THR_STATIC_KLD);
307 	if (error)
308 		NLM_ERR("Can't register NLM syscall\n");
309 	else
310 		nlm_syscall_registered = TRUE;
311 }
312 SYSINIT(nlm_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_init, NULL);
313 
314 static void
nlm_uninit(void * dummy)315 nlm_uninit(void *dummy)
316 {
317 
318 	if (nlm_syscall_registered)
319 		syscall_deregister(&nlm_syscall_offset,
320 		    &nlm_syscall_prev_sysent);
321 }
322 SYSUNINIT(nlm_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_uninit, NULL);
323 
324 /*
325  * Create a netobj from an arbitrary source.
326  */
327 void
nlm_make_netobj(struct netobj * dst,caddr_t src,size_t srcsize,struct malloc_type * type)328 nlm_make_netobj(struct netobj *dst, caddr_t src, size_t srcsize,
329     struct malloc_type *type)
330 {
331 
332 	dst->n_len = srcsize;
333 	dst->n_bytes = malloc(srcsize, type, M_WAITOK);
334 	memcpy(dst->n_bytes, src, srcsize);
335 }
336 
337 /*
338  * Copy a struct netobj.
339  */
340 void
nlm_copy_netobj(struct netobj * dst,struct netobj * src,struct malloc_type * type)341 nlm_copy_netobj(struct netobj *dst, struct netobj *src,
342     struct malloc_type *type)
343 {
344 
345 	nlm_make_netobj(dst, src->n_bytes, src->n_len, type);
346 }
347 
348 
349 /*
350  * Create an RPC client handle for the given (address,prog,vers)
351  * triple using UDP.
352  */
353 static CLIENT *
nlm_get_rpc(struct sockaddr * sa,rpcprog_t prog,rpcvers_t vers)354 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers)
355 {
356 	char *wchan = "nlmrcv";
357 	const char* protofmly;
358 	struct sockaddr_storage ss;
359 	struct socket *so;
360 	CLIENT *rpcb;
361 	struct timeval timo;
362 	RPCB parms;
363 	char *uaddr;
364 	enum clnt_stat stat = RPC_SUCCESS;
365 	int rpcvers = RPCBVERS4;
366 	bool_t do_tcp = FALSE;
367 	bool_t tryagain = FALSE;
368 	struct portmap mapping;
369 	u_short port = 0;
370 
371 	/*
372 	 * First we need to contact the remote RPCBIND service to find
373 	 * the right port.
374 	 */
375 	memcpy(&ss, sa, sa->sa_len);
376 	switch (ss.ss_family) {
377 	case AF_INET:
378 		((struct sockaddr_in *)&ss)->sin_port = htons(111);
379 		protofmly = "inet";
380 		so = nlm_socket;
381 		break;
382 
383 #ifdef INET6
384 	case AF_INET6:
385 		((struct sockaddr_in6 *)&ss)->sin6_port = htons(111);
386 		protofmly = "inet6";
387 		so = nlm_socket6;
388 		break;
389 #endif
390 
391 	default:
392 		/*
393 		 * Unsupported address family - fail.
394 		 */
395 		return (NULL);
396 	}
397 
398 	rpcb = clnt_dg_create(so, (struct sockaddr *)&ss,
399 	    RPCBPROG, rpcvers, 0, 0);
400 	if (!rpcb)
401 		return (NULL);
402 
403 try_tcp:
404 	parms.r_prog = prog;
405 	parms.r_vers = vers;
406 	if (do_tcp)
407 		parms.r_netid = "tcp";
408 	else
409 		parms.r_netid = "udp";
410 	parms.r_addr = "";
411 	parms.r_owner = "";
412 
413 	/*
414 	 * Use the default timeout.
415 	 */
416 	timo.tv_sec = 25;
417 	timo.tv_usec = 0;
418 again:
419 	switch (rpcvers) {
420 	case RPCBVERS4:
421 	case RPCBVERS:
422 		/*
423 		 * Try RPCBIND 4 then 3.
424 		 */
425 		uaddr = NULL;
426 		stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR,
427 		    (xdrproc_t) xdr_rpcb, &parms,
428 		    (xdrproc_t) xdr_wrapstring, &uaddr, timo);
429 		if (stat == RPC_SUCCESS) {
430 			/*
431 			 * We have a reply from the remote RPCBIND - turn it
432 			 * into an appropriate address and make a new client
433 			 * that can talk to the remote NLM.
434 			 *
435 			 * XXX fixup IPv6 scope ID.
436 			 */
437 			struct netbuf *a;
438 			a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr);
439 			if (!a) {
440 				tryagain = TRUE;
441 			} else {
442 				tryagain = FALSE;
443 				memcpy(&ss, a->buf, a->len);
444 				free(a->buf, M_RPC);
445 				free(a, M_RPC);
446 				xdr_free((xdrproc_t) xdr_wrapstring, &uaddr);
447 			}
448 		}
449 		if (tryagain || stat == RPC_PROGVERSMISMATCH) {
450 			if (rpcvers == RPCBVERS4)
451 				rpcvers = RPCBVERS;
452 			else if (rpcvers == RPCBVERS)
453 				rpcvers = PMAPVERS;
454 			CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers);
455 			goto again;
456 		}
457 		break;
458 	case PMAPVERS:
459 		/*
460 		 * Try portmap.
461 		 */
462 		mapping.pm_prog = parms.r_prog;
463 		mapping.pm_vers = parms.r_vers;
464 		mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP;
465 		mapping.pm_port = 0;
466 
467 		stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT,
468 		    (xdrproc_t) xdr_portmap, &mapping,
469 		    (xdrproc_t) xdr_u_short, &port, timo);
470 
471 		if (stat == RPC_SUCCESS) {
472 			switch (ss.ss_family) {
473 			case AF_INET:
474 				((struct sockaddr_in *)&ss)->sin_port =
475 					htons(port);
476 				break;
477 
478 #ifdef INET6
479 			case AF_INET6:
480 				((struct sockaddr_in6 *)&ss)->sin6_port =
481 					htons(port);
482 				break;
483 #endif
484 			}
485 		}
486 		break;
487 	default:
488 		panic("invalid rpcvers %d", rpcvers);
489 	}
490 	/*
491 	 * We may have a positive response from the portmapper, but the NLM
492 	 * service was not found. Make sure we received a valid port.
493 	 */
494 	switch (ss.ss_family) {
495 	case AF_INET:
496 		port = ((struct sockaddr_in *)&ss)->sin_port;
497 		break;
498 #ifdef INET6
499 	case AF_INET6:
500 		port = ((struct sockaddr_in6 *)&ss)->sin6_port;
501 		break;
502 #endif
503 	}
504 	if (stat != RPC_SUCCESS || !port) {
505 		/*
506 		 * If we were able to talk to rpcbind or portmap, but the udp
507 		 * variant wasn't available, ask about tcp.
508 		 *
509 		 * XXX - We could also check for a TCP portmapper, but
510 		 * if the host is running a portmapper at all, we should be able
511 		 * to hail it over UDP.
512 		 */
513 		if (stat == RPC_SUCCESS && !do_tcp) {
514 			do_tcp = TRUE;
515 			goto try_tcp;
516 		}
517 
518 		/* Otherwise, bad news. */
519 		NLM_ERR("NLM: failed to contact remote rpcbind, "
520 		    "stat = %d, port = %d\n", (int) stat, port);
521 		CLNT_DESTROY(rpcb);
522 		return (NULL);
523 	}
524 
525 	if (do_tcp) {
526 		/*
527 		 * Destroy the UDP client we used to speak to rpcbind and
528 		 * recreate as a TCP client.
529 		 */
530 		struct netconfig *nconf = NULL;
531 
532 		CLNT_DESTROY(rpcb);
533 
534 		switch (ss.ss_family) {
535 		case AF_INET:
536 			nconf = getnetconfigent("tcp");
537 			break;
538 #ifdef INET6
539 		case AF_INET6:
540 			nconf = getnetconfigent("tcp6");
541 			break;
542 #endif
543 		}
544 
545 		rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss,
546 		    prog, vers, 0, 0);
547 		CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan);
548 		rpcb->cl_auth = nlm_auth;
549 
550 	} else {
551 		/*
552 		 * Re-use the client we used to speak to rpcbind.
553 		 */
554 		CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss);
555 		CLNT_CONTROL(rpcb, CLSET_PROG, &prog);
556 		CLNT_CONTROL(rpcb, CLSET_VERS, &vers);
557 		CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan);
558 		rpcb->cl_auth = nlm_auth;
559 	}
560 
561 	return (rpcb);
562 }
563 
564 /*
565  * This async callback after when an async lock request has been
566  * granted. We notify the host which initiated the request.
567  */
568 static void
nlm_lock_callback(void * arg,int pending)569 nlm_lock_callback(void *arg, int pending)
570 {
571 	struct nlm_async_lock *af = (struct nlm_async_lock *) arg;
572 	struct rpc_callextra ext;
573 
574 	NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) granted,"
575 	    " cookie %d:%d\n", af, af->af_host->nh_caller_name,
576 	    af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie),
577 	    ng_cookie(&af->af_granted.cookie));
578 
579 	/*
580 	 * Send the results back to the host.
581 	 *
582 	 * Note: there is a possible race here with nlm_host_notify
583 	 * destroying the RPC client. To avoid problems, the first
584 	 * thing nlm_host_notify does is to cancel pending async lock
585 	 * requests.
586 	 */
587 	memset(&ext, 0, sizeof(ext));
588 	ext.rc_auth = nlm_auth;
589 	if (af->af_host->nh_vers == NLM_VERS4) {
590 		nlm4_granted_msg_4(&af->af_granted,
591 		    NULL, af->af_rpc, &ext, nlm_zero_tv);
592 	} else {
593 		/*
594 		 * Back-convert to legacy protocol
595 		 */
596 		nlm_testargs granted;
597 		granted.cookie = af->af_granted.cookie;
598 		granted.exclusive = af->af_granted.exclusive;
599 		granted.alock.caller_name =
600 			af->af_granted.alock.caller_name;
601 		granted.alock.fh = af->af_granted.alock.fh;
602 		granted.alock.oh = af->af_granted.alock.oh;
603 		granted.alock.svid = af->af_granted.alock.svid;
604 		granted.alock.l_offset =
605 			af->af_granted.alock.l_offset;
606 		granted.alock.l_len =
607 			af->af_granted.alock.l_len;
608 
609 		nlm_granted_msg_1(&granted,
610 		    NULL, af->af_rpc, &ext, nlm_zero_tv);
611 	}
612 
613 	/*
614 	 * Move this entry to the nh_granted list.
615 	 */
616 	af->af_expiretime = time_uptime + NLM_EXPIRE_TIMEOUT;
617 	mtx_lock(&af->af_host->nh_lock);
618 	TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link);
619 	TAILQ_INSERT_TAIL(&af->af_host->nh_granted, af, af_link);
620 	mtx_unlock(&af->af_host->nh_lock);
621 }
622 
623 /*
624  * Free an async lock request. The request must have been removed from
625  * any list.
626  */
627 static void
nlm_free_async_lock(struct nlm_async_lock * af)628 nlm_free_async_lock(struct nlm_async_lock *af)
629 {
630 	/*
631 	 * Free an async lock.
632 	 */
633 	if (af->af_rpc)
634 		CLNT_RELEASE(af->af_rpc);
635 	xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted);
636 	if (af->af_vp)
637 		vrele(af->af_vp);
638 	free(af, M_NLM);
639 }
640 
641 /*
642  * Cancel our async request - this must be called with
643  * af->nh_host->nh_lock held. This is slightly complicated by a
644  * potential race with our own callback. If we fail to cancel the
645  * lock, it must already have been granted - we make sure our async
646  * task has completed by calling taskqueue_drain in this case.
647  */
648 static int
nlm_cancel_async_lock(struct nlm_async_lock * af)649 nlm_cancel_async_lock(struct nlm_async_lock *af)
650 {
651 	struct nlm_host *host = af->af_host;
652 	int error;
653 
654 	mtx_assert(&host->nh_lock, MA_OWNED);
655 
656 	mtx_unlock(&host->nh_lock);
657 
658 	error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl,
659 	    F_REMOTE, NULL, &af->af_cookie);
660 
661 	if (error) {
662 		/*
663 		 * We failed to cancel - make sure our callback has
664 		 * completed before we continue.
665 		 */
666 		taskqueue_drain(taskqueue_thread, &af->af_task);
667 	}
668 
669 	mtx_lock(&host->nh_lock);
670 
671 	if (!error) {
672 		NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) "
673 		    "cancelled\n", af, host->nh_caller_name, host->nh_sysid);
674 
675 		/*
676 		 * Remove from the nh_pending list and free now that
677 		 * we are safe from the callback.
678 		 */
679 		TAILQ_REMOVE(&host->nh_pending, af, af_link);
680 		mtx_unlock(&host->nh_lock);
681 		nlm_free_async_lock(af);
682 		mtx_lock(&host->nh_lock);
683 	}
684 
685 	return (error);
686 }
687 
688 static void
nlm_check_expired_locks(struct nlm_host * host)689 nlm_check_expired_locks(struct nlm_host *host)
690 {
691 	struct nlm_async_lock *af;
692 	time_t uptime = time_uptime;
693 
694 	mtx_lock(&host->nh_lock);
695 	while ((af = TAILQ_FIRST(&host->nh_granted)) != NULL
696 	    && uptime >= af->af_expiretime) {
697 		NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) expired,"
698 		    " cookie %d:%d\n", af, af->af_host->nh_caller_name,
699 		    af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie),
700 		    ng_cookie(&af->af_granted.cookie));
701 		TAILQ_REMOVE(&host->nh_granted, af, af_link);
702 		mtx_unlock(&host->nh_lock);
703 		nlm_free_async_lock(af);
704 		mtx_lock(&host->nh_lock);
705 	}
706 	while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) {
707 		TAILQ_REMOVE(&host->nh_finished, af, af_link);
708 		mtx_unlock(&host->nh_lock);
709 		nlm_free_async_lock(af);
710 		mtx_lock(&host->nh_lock);
711 	}
712 	mtx_unlock(&host->nh_lock);
713 }
714 
715 /*
716  * Free resources used by a host. This is called after the reference
717  * count has reached zero so it doesn't need to worry about locks.
718  */
719 static void
nlm_host_destroy(struct nlm_host * host)720 nlm_host_destroy(struct nlm_host *host)
721 {
722 
723 	mtx_lock(&nlm_global_lock);
724 	TAILQ_REMOVE(&nlm_hosts, host, nh_link);
725 	mtx_unlock(&nlm_global_lock);
726 
727 	if (host->nh_srvrpc.nr_client)
728 		CLNT_RELEASE(host->nh_srvrpc.nr_client);
729 	if (host->nh_clntrpc.nr_client)
730 		CLNT_RELEASE(host->nh_clntrpc.nr_client);
731 	mtx_destroy(&host->nh_lock);
732 	sysctl_ctx_free(&host->nh_sysctl);
733 	free(host, M_NLM);
734 }
735 
736 /*
737  * Thread start callback for client lock recovery
738  */
739 static void
nlm_client_recovery_start(void * arg)740 nlm_client_recovery_start(void *arg)
741 {
742 	struct nlm_host *host = (struct nlm_host *) arg;
743 
744 	NLM_DEBUG(1, "NLM: client lock recovery for %s started\n",
745 	    host->nh_caller_name);
746 
747 	nlm_client_recovery(host);
748 
749 	NLM_DEBUG(1, "NLM: client lock recovery for %s completed\n",
750 	    host->nh_caller_name);
751 
752 	host->nh_monstate = NLM_MONITORED;
753 	nlm_host_release(host);
754 
755 	kthread_exit();
756 }
757 
758 /*
759  * This is called when we receive a host state change notification. We
760  * unlock any active locks owned by the host. When rpc.lockd is
761  * shutting down, this function is called with newstate set to zero
762  * which allows us to cancel any pending async locks and clear the
763  * locking state.
764  */
765 static void
nlm_host_notify(struct nlm_host * host,int newstate)766 nlm_host_notify(struct nlm_host *host, int newstate)
767 {
768 	struct nlm_async_lock *af;
769 
770 	if (newstate) {
771 		NLM_DEBUG(1, "NLM: host %s (sysid %d) rebooted, new "
772 		    "state is %d\n", host->nh_caller_name,
773 		    host->nh_sysid, newstate);
774 	}
775 
776 	/*
777 	 * Cancel any pending async locks for this host.
778 	 */
779 	mtx_lock(&host->nh_lock);
780 	while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) {
781 		/*
782 		 * nlm_cancel_async_lock will remove the entry from
783 		 * nh_pending and free it.
784 		 */
785 		nlm_cancel_async_lock(af);
786 	}
787 	mtx_unlock(&host->nh_lock);
788 	nlm_check_expired_locks(host);
789 
790 	/*
791 	 * The host just rebooted - trash its locks.
792 	 */
793 	lf_clearremotesys(host->nh_sysid);
794 	host->nh_state = newstate;
795 
796 	/*
797 	 * If we have any remote locks for this host (i.e. it
798 	 * represents a remote NFS server that our local NFS client
799 	 * has locks for), start a recovery thread.
800 	 */
801 	if (newstate != 0
802 	    && host->nh_monstate != NLM_RECOVERING
803 	    && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) {
804 		struct thread *td;
805 		host->nh_monstate = NLM_RECOVERING;
806 		refcount_acquire(&host->nh_refs);
807 		kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0,
808 		    "NFS lock recovery for %s", host->nh_caller_name);
809 	}
810 }
811 
812 /*
813  * Sysctl handler to count the number of locks for a sysid.
814  */
815 static int
nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS)816 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS)
817 {
818 	struct nlm_host *host;
819 	int count;
820 
821 	host = oidp->oid_arg1;
822 	count = lf_countlocks(host->nh_sysid);
823 	return sysctl_handle_int(oidp, &count, 0, req);
824 }
825 
826 /*
827  * Sysctl handler to count the number of client locks for a sysid.
828  */
829 static int
nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS)830 nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS)
831 {
832 	struct nlm_host *host;
833 	int count;
834 
835 	host = oidp->oid_arg1;
836 	count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid);
837 	return sysctl_handle_int(oidp, &count, 0, req);
838 }
839 
840 /*
841  * Create a new NLM host.
842  */
843 static struct nlm_host *
nlm_create_host(const char * caller_name)844 nlm_create_host(const char* caller_name)
845 {
846 	struct nlm_host *host;
847 	struct sysctl_oid *oid;
848 
849 	mtx_assert(&nlm_global_lock, MA_OWNED);
850 
851 	NLM_DEBUG(1, "NLM: new host %s (sysid %d)\n",
852 	    caller_name, nlm_next_sysid);
853 	host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO);
854 	if (!host)
855 		return (NULL);
856 	mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF);
857 	host->nh_refs = 1;
858 	strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN);
859 	host->nh_sysid = nlm_next_sysid++;
860 	snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string),
861 		"%d", host->nh_sysid);
862 	host->nh_vers = 0;
863 	host->nh_state = 0;
864 	host->nh_monstate = NLM_UNMONITORED;
865 	host->nh_grantcookie = 1;
866 	TAILQ_INIT(&host->nh_pending);
867 	TAILQ_INIT(&host->nh_granted);
868 	TAILQ_INIT(&host->nh_finished);
869 	TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link);
870 
871 	mtx_unlock(&nlm_global_lock);
872 
873 	sysctl_ctx_init(&host->nh_sysctl);
874 	oid = SYSCTL_ADD_NODE(&host->nh_sysctl,
875 	    SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid),
876 	    OID_AUTO, host->nh_sysid_string, CTLFLAG_RD, NULL, "");
877 	SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
878 	    "hostname", CTLFLAG_RD, host->nh_caller_name, 0, "");
879 	SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
880 	    "version", CTLFLAG_RD, &host->nh_vers, 0, "");
881 	SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
882 	    "monitored", CTLFLAG_RD, &host->nh_monstate, 0, "");
883 	SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
884 	    "lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0,
885 	    nlm_host_lock_count_sysctl, "I", "");
886 	SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
887 	    "client_lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0,
888 	    nlm_host_client_lock_count_sysctl, "I", "");
889 
890 	mtx_lock(&nlm_global_lock);
891 
892 	return (host);
893 }
894 
895 /*
896  * Acquire the next sysid for remote locks not handled by the NLM.
897  */
898 uint32_t
nlm_acquire_next_sysid(void)899 nlm_acquire_next_sysid(void)
900 {
901 	uint32_t next_sysid;
902 
903 	mtx_lock(&nlm_global_lock);
904 	next_sysid = nlm_next_sysid++;
905 	mtx_unlock(&nlm_global_lock);
906 	return (next_sysid);
907 }
908 
909 /*
910  * Return non-zero if the address parts of the two sockaddrs are the
911  * same.
912  */
913 static int
nlm_compare_addr(const struct sockaddr * a,const struct sockaddr * b)914 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b)
915 {
916 	const struct sockaddr_in *a4, *b4;
917 #ifdef INET6
918 	const struct sockaddr_in6 *a6, *b6;
919 #endif
920 
921 	if (a->sa_family != b->sa_family)
922 		return (FALSE);
923 
924 	switch (a->sa_family) {
925 	case AF_INET:
926 		a4 = (const struct sockaddr_in *) a;
927 		b4 = (const struct sockaddr_in *) b;
928 		return !memcmp(&a4->sin_addr, &b4->sin_addr,
929 		    sizeof(a4->sin_addr));
930 #ifdef INET6
931 	case AF_INET6:
932 		a6 = (const struct sockaddr_in6 *) a;
933 		b6 = (const struct sockaddr_in6 *) b;
934 		return !memcmp(&a6->sin6_addr, &b6->sin6_addr,
935 		    sizeof(a6->sin6_addr));
936 #endif
937 	}
938 
939 	return (0);
940 }
941 
942 /*
943  * Check for idle hosts and stop monitoring them. We could also free
944  * the host structure here, possibly after a larger timeout but that
945  * would require some care to avoid races with
946  * e.g. nlm_host_lock_count_sysctl.
947  */
948 static void
nlm_check_idle(void)949 nlm_check_idle(void)
950 {
951 	struct nlm_host *host;
952 
953 	mtx_assert(&nlm_global_lock, MA_OWNED);
954 
955 	if (time_uptime <= nlm_next_idle_check)
956 		return;
957 
958 	nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD;
959 
960 	TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
961 		if (host->nh_monstate == NLM_MONITORED
962 		    && time_uptime > host->nh_idle_timeout) {
963 			mtx_unlock(&nlm_global_lock);
964 			if (lf_countlocks(host->nh_sysid) > 0
965 			    || lf_countlocks(NLM_SYSID_CLIENT
966 				+ host->nh_sysid)) {
967 				host->nh_idle_timeout =
968 					time_uptime + NLM_IDLE_TIMEOUT;
969 				mtx_lock(&nlm_global_lock);
970 				continue;
971 			}
972 			nlm_host_unmonitor(host);
973 			mtx_lock(&nlm_global_lock);
974 		}
975 	}
976 }
977 
978 /*
979  * Search for an existing NLM host that matches the given name
980  * (typically the caller_name element of an nlm4_lock).  If none is
981  * found, create a new host. If 'addr' is non-NULL, record the remote
982  * address of the host so that we can call it back for async
983  * responses. If 'vers' is greater than zero then record the NLM
984  * program version to use to communicate with this client.
985  */
986 struct nlm_host *
nlm_find_host_by_name(const char * name,const struct sockaddr * addr,rpcvers_t vers)987 nlm_find_host_by_name(const char *name, const struct sockaddr *addr,
988     rpcvers_t vers)
989 {
990 	struct nlm_host *host;
991 
992 	mtx_lock(&nlm_global_lock);
993 
994 	/*
995 	 * The remote host is determined by caller_name.
996 	 */
997 	TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
998 		if (!strcmp(host->nh_caller_name, name))
999 			break;
1000 	}
1001 
1002 	if (!host) {
1003 		host = nlm_create_host(name);
1004 		if (!host) {
1005 			mtx_unlock(&nlm_global_lock);
1006 			return (NULL);
1007 		}
1008 	}
1009 	refcount_acquire(&host->nh_refs);
1010 
1011 	host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT;
1012 
1013 	/*
1014 	 * If we have an address for the host, record it so that we
1015 	 * can send async replies etc.
1016 	 */
1017 	if (addr) {
1018 
1019 		KASSERT(addr->sa_len < sizeof(struct sockaddr_storage),
1020 		    ("Strange remote transport address length"));
1021 
1022 		/*
1023 		 * If we have seen an address before and we currently
1024 		 * have an RPC client handle, make sure the address is
1025 		 * the same, otherwise discard the client handle.
1026 		 */
1027 		if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) {
1028 			if (!nlm_compare_addr(
1029 				    (struct sockaddr *) &host->nh_addr,
1030 				    addr)
1031 			    || host->nh_vers != vers) {
1032 				CLIENT *client;
1033 				mtx_lock(&host->nh_lock);
1034 				client = host->nh_srvrpc.nr_client;
1035 				host->nh_srvrpc.nr_client = NULL;
1036 				mtx_unlock(&host->nh_lock);
1037 				if (client) {
1038 					CLNT_RELEASE(client);
1039 				}
1040 			}
1041 		}
1042 		memcpy(&host->nh_addr, addr, addr->sa_len);
1043 		host->nh_vers = vers;
1044 	}
1045 
1046 	nlm_check_idle();
1047 
1048 	mtx_unlock(&nlm_global_lock);
1049 
1050 	return (host);
1051 }
1052 
1053 /*
1054  * Search for an existing NLM host that matches the given remote
1055  * address. If none is found, create a new host with the requested
1056  * address and remember 'vers' as the NLM protocol version to use for
1057  * that host.
1058  */
1059 struct nlm_host *
nlm_find_host_by_addr(const struct sockaddr * addr,int vers)1060 nlm_find_host_by_addr(const struct sockaddr *addr, int vers)
1061 {
1062 	/*
1063 	 * Fake up a name using inet_ntop. This buffer is
1064 	 * large enough for an IPv6 address.
1065 	 */
1066 	char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"];
1067 	struct nlm_host *host;
1068 
1069 	switch (addr->sa_family) {
1070 	case AF_INET:
1071 		inet_ntop(AF_INET,
1072 		    &((const struct sockaddr_in *) addr)->sin_addr,
1073 		    tmp, sizeof tmp);
1074 		break;
1075 #ifdef INET6
1076 	case AF_INET6:
1077 		inet_ntop(AF_INET6,
1078 		    &((const struct sockaddr_in6 *) addr)->sin6_addr,
1079 		    tmp, sizeof tmp);
1080 		break;
1081 #endif
1082 	default:
1083 		strlcpy(tmp, "<unknown>", sizeof(tmp));
1084 	}
1085 
1086 
1087 	mtx_lock(&nlm_global_lock);
1088 
1089 	/*
1090 	 * The remote host is determined by caller_name.
1091 	 */
1092 	TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
1093 		if (nlm_compare_addr(addr,
1094 			(const struct sockaddr *) &host->nh_addr))
1095 			break;
1096 	}
1097 
1098 	if (!host) {
1099 		host = nlm_create_host(tmp);
1100 		if (!host) {
1101 			mtx_unlock(&nlm_global_lock);
1102 			return (NULL);
1103 		}
1104 		memcpy(&host->nh_addr, addr, addr->sa_len);
1105 		host->nh_vers = vers;
1106 	}
1107 	refcount_acquire(&host->nh_refs);
1108 
1109 	host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT;
1110 
1111 	nlm_check_idle();
1112 
1113 	mtx_unlock(&nlm_global_lock);
1114 
1115 	return (host);
1116 }
1117 
1118 /*
1119  * Find the NLM host that matches the value of 'sysid'. If none
1120  * exists, return NULL.
1121  */
1122 static struct nlm_host *
nlm_find_host_by_sysid(int sysid)1123 nlm_find_host_by_sysid(int sysid)
1124 {
1125 	struct nlm_host *host;
1126 
1127 	TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
1128 		if (host->nh_sysid == sysid) {
1129 			refcount_acquire(&host->nh_refs);
1130 			return (host);
1131 		}
1132 	}
1133 
1134 	return (NULL);
1135 }
1136 
nlm_host_release(struct nlm_host * host)1137 void nlm_host_release(struct nlm_host *host)
1138 {
1139 	if (refcount_release(&host->nh_refs)) {
1140 		/*
1141 		 * Free the host
1142 		 */
1143 		nlm_host_destroy(host);
1144 	}
1145 }
1146 
1147 /*
1148  * Unregister this NLM host with the local NSM due to idleness.
1149  */
1150 static void
nlm_host_unmonitor(struct nlm_host * host)1151 nlm_host_unmonitor(struct nlm_host *host)
1152 {
1153 	mon_id smmonid;
1154 	sm_stat_res smstat;
1155 	struct timeval timo;
1156 	enum clnt_stat stat;
1157 
1158 	NLM_DEBUG(1, "NLM: unmonitoring %s (sysid %d)\n",
1159 	    host->nh_caller_name, host->nh_sysid);
1160 
1161 	/*
1162 	 * We put our assigned system ID value in the priv field to
1163 	 * make it simpler to find the host if we are notified of a
1164 	 * host restart.
1165 	 */
1166 	smmonid.mon_name = host->nh_caller_name;
1167 	smmonid.my_id.my_name = "localhost";
1168 	smmonid.my_id.my_prog = NLM_PROG;
1169 	smmonid.my_id.my_vers = NLM_SM;
1170 	smmonid.my_id.my_proc = NLM_SM_NOTIFY;
1171 
1172 	timo.tv_sec = 25;
1173 	timo.tv_usec = 0;
1174 	stat = CLNT_CALL(nlm_nsm, SM_UNMON,
1175 	    (xdrproc_t) xdr_mon, &smmonid,
1176 	    (xdrproc_t) xdr_sm_stat, &smstat, timo);
1177 
1178 	if (stat != RPC_SUCCESS) {
1179 		NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat);
1180 		return;
1181 	}
1182 	if (smstat.res_stat == stat_fail) {
1183 		NLM_ERR("Local NSM refuses to unmonitor %s\n",
1184 		    host->nh_caller_name);
1185 		return;
1186 	}
1187 
1188 	host->nh_monstate = NLM_UNMONITORED;
1189 }
1190 
1191 /*
1192  * Register this NLM host with the local NSM so that we can be
1193  * notified if it reboots.
1194  */
1195 void
nlm_host_monitor(struct nlm_host * host,int state)1196 nlm_host_monitor(struct nlm_host *host, int state)
1197 {
1198 	mon smmon;
1199 	sm_stat_res smstat;
1200 	struct timeval timo;
1201 	enum clnt_stat stat;
1202 
1203 	if (state && !host->nh_state) {
1204 		/*
1205 		 * This is the first time we have seen an NSM state
1206 		 * value for this host. We record it here to help
1207 		 * detect host reboots.
1208 		 */
1209 		host->nh_state = state;
1210 		NLM_DEBUG(1, "NLM: host %s (sysid %d) has NSM state %d\n",
1211 		    host->nh_caller_name, host->nh_sysid, state);
1212 	}
1213 
1214 	mtx_lock(&host->nh_lock);
1215 	if (host->nh_monstate != NLM_UNMONITORED) {
1216 		mtx_unlock(&host->nh_lock);
1217 		return;
1218 	}
1219 	host->nh_monstate = NLM_MONITORED;
1220 	mtx_unlock(&host->nh_lock);
1221 
1222 	NLM_DEBUG(1, "NLM: monitoring %s (sysid %d)\n",
1223 	    host->nh_caller_name, host->nh_sysid);
1224 
1225 	/*
1226 	 * We put our assigned system ID value in the priv field to
1227 	 * make it simpler to find the host if we are notified of a
1228 	 * host restart.
1229 	 */
1230 	smmon.mon_id.mon_name = host->nh_caller_name;
1231 	smmon.mon_id.my_id.my_name = "localhost";
1232 	smmon.mon_id.my_id.my_prog = NLM_PROG;
1233 	smmon.mon_id.my_id.my_vers = NLM_SM;
1234 	smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY;
1235 	memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid));
1236 
1237 	timo.tv_sec = 25;
1238 	timo.tv_usec = 0;
1239 	stat = CLNT_CALL(nlm_nsm, SM_MON,
1240 	    (xdrproc_t) xdr_mon, &smmon,
1241 	    (xdrproc_t) xdr_sm_stat, &smstat, timo);
1242 
1243 	if (stat != RPC_SUCCESS) {
1244 		NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat);
1245 		return;
1246 	}
1247 	if (smstat.res_stat == stat_fail) {
1248 		NLM_ERR("Local NSM refuses to monitor %s\n",
1249 		    host->nh_caller_name);
1250 		mtx_lock(&host->nh_lock);
1251 		host->nh_monstate = NLM_MONITOR_FAILED;
1252 		mtx_unlock(&host->nh_lock);
1253 		return;
1254 	}
1255 
1256 	host->nh_monstate = NLM_MONITORED;
1257 }
1258 
1259 /*
1260  * Return an RPC client handle that can be used to talk to the NLM
1261  * running on the given host.
1262  */
1263 CLIENT *
nlm_host_get_rpc(struct nlm_host * host,bool_t isserver)1264 nlm_host_get_rpc(struct nlm_host *host, bool_t isserver)
1265 {
1266 	struct nlm_rpc *rpc;
1267 	CLIENT *client;
1268 
1269 	mtx_lock(&host->nh_lock);
1270 
1271 	if (isserver)
1272 		rpc = &host->nh_srvrpc;
1273 	else
1274 		rpc = &host->nh_clntrpc;
1275 
1276 	/*
1277 	 * We can't hold onto RPC handles for too long - the async
1278 	 * call/reply protocol used by some NLM clients makes it hard
1279 	 * to tell when they change port numbers (e.g. after a
1280 	 * reboot). Note that if a client reboots while it isn't
1281 	 * holding any locks, it won't bother to notify us. We
1282 	 * expire the RPC handles after two minutes.
1283 	 */
1284 	if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) {
1285 		client = rpc->nr_client;
1286 		rpc->nr_client = NULL;
1287 		mtx_unlock(&host->nh_lock);
1288 		CLNT_RELEASE(client);
1289 		mtx_lock(&host->nh_lock);
1290 	}
1291 
1292 	if (!rpc->nr_client) {
1293 		mtx_unlock(&host->nh_lock);
1294 		client = nlm_get_rpc((struct sockaddr *)&host->nh_addr,
1295 		    NLM_PROG, host->nh_vers);
1296 		mtx_lock(&host->nh_lock);
1297 
1298 		if (client) {
1299 			if (rpc->nr_client) {
1300 				mtx_unlock(&host->nh_lock);
1301 				CLNT_DESTROY(client);
1302 				mtx_lock(&host->nh_lock);
1303 			} else {
1304 				rpc->nr_client = client;
1305 				rpc->nr_create_time = time_uptime;
1306 			}
1307 		}
1308 	}
1309 
1310 	client = rpc->nr_client;
1311 	if (client)
1312 		CLNT_ACQUIRE(client);
1313 	mtx_unlock(&host->nh_lock);
1314 
1315 	return (client);
1316 
1317 }
1318 
nlm_host_get_sysid(struct nlm_host * host)1319 int nlm_host_get_sysid(struct nlm_host *host)
1320 {
1321 
1322 	return (host->nh_sysid);
1323 }
1324 
1325 int
nlm_host_get_state(struct nlm_host * host)1326 nlm_host_get_state(struct nlm_host *host)
1327 {
1328 
1329 	return (host->nh_state);
1330 }
1331 
1332 void *
nlm_register_wait_lock(struct nlm4_lock * lock,struct vnode * vp)1333 nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp)
1334 {
1335 	struct nlm_waiting_lock *nw;
1336 
1337 	nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK);
1338 	nw->nw_lock = *lock;
1339 	memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes,
1340 	    nw->nw_lock.fh.n_len);
1341 	nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes;
1342 	nw->nw_waiting = TRUE;
1343 	nw->nw_vp = vp;
1344 	mtx_lock(&nlm_global_lock);
1345 	TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link);
1346 	mtx_unlock(&nlm_global_lock);
1347 
1348 	return nw;
1349 }
1350 
1351 void
nlm_deregister_wait_lock(void * handle)1352 nlm_deregister_wait_lock(void *handle)
1353 {
1354 	struct nlm_waiting_lock *nw = handle;
1355 
1356 	mtx_lock(&nlm_global_lock);
1357 	TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link);
1358 	mtx_unlock(&nlm_global_lock);
1359 
1360 	free(nw, M_NLM);
1361 }
1362 
1363 int
nlm_wait_lock(void * handle,int timo)1364 nlm_wait_lock(void *handle, int timo)
1365 {
1366 	struct nlm_waiting_lock *nw = handle;
1367 	int error, stops_deferred;
1368 
1369 	/*
1370 	 * If the granted message arrived before we got here,
1371 	 * nw->nw_waiting will be FALSE - in that case, don't sleep.
1372 	 */
1373 	mtx_lock(&nlm_global_lock);
1374 	error = 0;
1375 	if (nw->nw_waiting) {
1376 		stops_deferred = sigdeferstop(SIGDEFERSTOP_ERESTART);
1377 		error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo);
1378 		sigallowstop(stops_deferred);
1379 	}
1380 	TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link);
1381 	if (error) {
1382 		/*
1383 		 * The granted message may arrive after the
1384 		 * interrupt/timeout but before we manage to lock the
1385 		 * mutex. Detect this by examining nw_lock.
1386 		 */
1387 		if (!nw->nw_waiting)
1388 			error = 0;
1389 	} else {
1390 		/*
1391 		 * If nlm_cancel_wait is called, then error will be
1392 		 * zero but nw_waiting will still be TRUE. We
1393 		 * translate this into EINTR.
1394 		 */
1395 		if (nw->nw_waiting)
1396 			error = EINTR;
1397 	}
1398 	mtx_unlock(&nlm_global_lock);
1399 
1400 	free(nw, M_NLM);
1401 
1402 	return (error);
1403 }
1404 
1405 void
nlm_cancel_wait(struct vnode * vp)1406 nlm_cancel_wait(struct vnode *vp)
1407 {
1408 	struct nlm_waiting_lock *nw;
1409 
1410 	mtx_lock(&nlm_global_lock);
1411 	TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
1412 		if (nw->nw_vp == vp) {
1413 			wakeup(nw);
1414 		}
1415 	}
1416 	mtx_unlock(&nlm_global_lock);
1417 }
1418 
1419 
1420 /**********************************************************************/
1421 
1422 /*
1423  * Syscall interface with userland.
1424  */
1425 
1426 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp);
1427 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp);
1428 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp);
1429 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp);
1430 
1431 static int
nlm_register_services(SVCPOOL * pool,int addr_count,char ** addrs)1432 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs)
1433 {
1434 	static rpcvers_t versions[] = {
1435 		NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4
1436 	};
1437 	static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = {
1438 		nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4
1439 	};
1440 
1441 	SVCXPRT **xprts;
1442 	char netid[16];
1443 	char uaddr[128];
1444 	struct netconfig *nconf;
1445 	int i, j, error;
1446 
1447 	if (!addr_count) {
1448 		NLM_ERR("NLM: no service addresses given - can't start server");
1449 		return (EINVAL);
1450 	}
1451 
1452 	if (addr_count < 0 || addr_count > 256 ) {
1453 		NLM_ERR("NLM:  too many service addresses (%d) given, "
1454 		    "max 256 - can't start server\n", addr_count);
1455 		return (EINVAL);
1456 	}
1457 
1458 	xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK|M_ZERO);
1459 	for (i = 0; i < nitems(versions); i++) {
1460 		for (j = 0; j < addr_count; j++) {
1461 			/*
1462 			 * Create transports for the first version and
1463 			 * then just register everything else to the
1464 			 * same transports.
1465 			 */
1466 			if (i == 0) {
1467 				char *up;
1468 
1469 				error = copyin(&addrs[2*j], &up,
1470 				    sizeof(char*));
1471 				if (error)
1472 					goto out;
1473 				error = copyinstr(up, netid, sizeof(netid),
1474 				    NULL);
1475 				if (error)
1476 					goto out;
1477 				error = copyin(&addrs[2*j+1], &up,
1478 				    sizeof(char*));
1479 				if (error)
1480 					goto out;
1481 				error = copyinstr(up, uaddr, sizeof(uaddr),
1482 				    NULL);
1483 				if (error)
1484 					goto out;
1485 				nconf = getnetconfigent(netid);
1486 				if (!nconf) {
1487 					NLM_ERR("Can't lookup netid %s\n",
1488 					    netid);
1489 					error = EINVAL;
1490 					goto out;
1491 				}
1492 				xprts[j] = svc_tp_create(pool, dispatchers[i],
1493 				    NLM_PROG, versions[i], uaddr, nconf);
1494 				if (!xprts[j]) {
1495 					NLM_ERR("NLM: unable to create "
1496 					    "(NLM_PROG, %d).\n", versions[i]);
1497 					error = EINVAL;
1498 					goto out;
1499 				}
1500 				freenetconfigent(nconf);
1501 			} else {
1502 				nconf = getnetconfigent(xprts[j]->xp_netid);
1503 				rpcb_unset(NLM_PROG, versions[i], nconf);
1504 				if (!svc_reg(xprts[j], NLM_PROG, versions[i],
1505 					dispatchers[i], nconf)) {
1506 					NLM_ERR("NLM: can't register "
1507 					    "(NLM_PROG, %d)\n", versions[i]);
1508 					error = EINVAL;
1509 					goto out;
1510 				}
1511 			}
1512 		}
1513 	}
1514 	error = 0;
1515 out:
1516 	for (j = 0; j < addr_count; j++) {
1517 		if (xprts[j])
1518 			SVC_RELEASE(xprts[j]);
1519 	}
1520 	free(xprts, M_NLM);
1521 	return (error);
1522 }
1523 
1524 /*
1525  * Main server entry point. Contacts the local NSM to get its current
1526  * state and send SM_UNMON_ALL. Registers the NLM services and then
1527  * services requests. Does not return until the server is interrupted
1528  * by a signal.
1529  */
1530 static int
nlm_server_main(int addr_count,char ** addrs)1531 nlm_server_main(int addr_count, char **addrs)
1532 {
1533 	struct thread *td = curthread;
1534 	int error;
1535 	SVCPOOL *pool = NULL;
1536 	struct sockopt opt;
1537 	int portlow;
1538 #ifdef INET6
1539 	struct sockaddr_in6 sin6;
1540 #endif
1541 	struct sockaddr_in sin;
1542 	my_id id;
1543 	sm_stat smstat;
1544 	struct timeval timo;
1545 	enum clnt_stat stat;
1546 	struct nlm_host *host, *nhost;
1547 	struct nlm_waiting_lock *nw;
1548 	vop_advlock_t *old_nfs_advlock;
1549 	vop_reclaim_t *old_nfs_reclaim;
1550 
1551 	if (nlm_is_running != 0) {
1552 		NLM_ERR("NLM: can't start server - "
1553 		    "it appears to be running already\n");
1554 		return (EPERM);
1555 	}
1556 
1557 	if (nlm_socket == NULL) {
1558 		memset(&opt, 0, sizeof(opt));
1559 
1560 		error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0,
1561 		    td->td_ucred, td);
1562 		if (error) {
1563 			NLM_ERR("NLM: can't create IPv4 socket - error %d\n",
1564 			    error);
1565 			return (error);
1566 		}
1567 		opt.sopt_dir = SOPT_SET;
1568 		opt.sopt_level = IPPROTO_IP;
1569 		opt.sopt_name = IP_PORTRANGE;
1570 		portlow = IP_PORTRANGE_LOW;
1571 		opt.sopt_val = &portlow;
1572 		opt.sopt_valsize = sizeof(portlow);
1573 		sosetopt(nlm_socket, &opt);
1574 
1575 #ifdef INET6
1576 		nlm_socket6 = NULL;
1577 		error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0,
1578 		    td->td_ucred, td);
1579 		if (error) {
1580 			NLM_ERR("NLM: can't create IPv6 socket - error %d\n",
1581 			    error);
1582 			soclose(nlm_socket);
1583 			nlm_socket = NULL;
1584 			return (error);
1585 		}
1586 		opt.sopt_dir = SOPT_SET;
1587 		opt.sopt_level = IPPROTO_IPV6;
1588 		opt.sopt_name = IPV6_PORTRANGE;
1589 		portlow = IPV6_PORTRANGE_LOW;
1590 		opt.sopt_val = &portlow;
1591 		opt.sopt_valsize = sizeof(portlow);
1592 		sosetopt(nlm_socket6, &opt);
1593 #endif
1594 	}
1595 
1596 	nlm_auth = authunix_create(curthread->td_ucred);
1597 
1598 #ifdef INET6
1599 	memset(&sin6, 0, sizeof(sin6));
1600 	sin6.sin6_len = sizeof(sin6);
1601 	sin6.sin6_family = AF_INET6;
1602 	sin6.sin6_addr = in6addr_loopback;
1603 	nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS);
1604 	if (!nlm_nsm) {
1605 #endif
1606 		memset(&sin, 0, sizeof(sin));
1607 		sin.sin_len = sizeof(sin);
1608 		sin.sin_family = AF_INET;
1609 		sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1610 		nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG,
1611 		    SM_VERS);
1612 #ifdef INET6
1613 	}
1614 #endif
1615 
1616 	if (!nlm_nsm) {
1617 		NLM_ERR("Can't start NLM - unable to contact NSM\n");
1618 		error = EINVAL;
1619 		goto out;
1620 	}
1621 
1622 	pool = svcpool_create("NLM", NULL);
1623 
1624 	error = nlm_register_services(pool, addr_count, addrs);
1625 	if (error)
1626 		goto out;
1627 
1628 	memset(&id, 0, sizeof(id));
1629 	id.my_name = "NFS NLM";
1630 
1631 	timo.tv_sec = 25;
1632 	timo.tv_usec = 0;
1633 	stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL,
1634 	    (xdrproc_t) xdr_my_id, &id,
1635 	    (xdrproc_t) xdr_sm_stat, &smstat, timo);
1636 
1637 	if (stat != RPC_SUCCESS) {
1638 		struct rpc_err err;
1639 
1640 		CLNT_GETERR(nlm_nsm, &err);
1641 		NLM_ERR("NLM: unexpected error contacting NSM, "
1642 		    "stat=%d, errno=%d\n", stat, err.re_errno);
1643 		error = EINVAL;
1644 		goto out;
1645 	}
1646 	nlm_is_running = 1;
1647 
1648 	NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state);
1649 	nlm_nsm_state = smstat.state;
1650 
1651 	old_nfs_advlock = nfs_advlock_p;
1652 	nfs_advlock_p = nlm_advlock;
1653 	old_nfs_reclaim = nfs_reclaim_p;
1654 	nfs_reclaim_p = nlm_reclaim;
1655 
1656 	svc_run(pool);
1657 	error = 0;
1658 
1659 	nfs_advlock_p = old_nfs_advlock;
1660 	nfs_reclaim_p = old_nfs_reclaim;
1661 
1662 out:
1663 	nlm_is_running = 0;
1664 	if (pool)
1665 		svcpool_destroy(pool);
1666 
1667 	/*
1668 	 * We are finished communicating with the NSM.
1669 	 */
1670 	if (nlm_nsm) {
1671 		CLNT_RELEASE(nlm_nsm);
1672 		nlm_nsm = NULL;
1673 	}
1674 
1675 	/*
1676 	 * Trash all the existing state so that if the server
1677 	 * restarts, it gets a clean slate. This is complicated by the
1678 	 * possibility that there may be other threads trying to make
1679 	 * client locking requests.
1680 	 *
1681 	 * First we fake a client reboot notification which will
1682 	 * cancel any pending async locks and purge remote lock state
1683 	 * from the local lock manager. We release the reference from
1684 	 * nlm_hosts to the host (which may remove it from the list
1685 	 * and free it). After this phase, the only entries in the
1686 	 * nlm_host list should be from other threads performing
1687 	 * client lock requests.
1688 	 */
1689 	mtx_lock(&nlm_global_lock);
1690 	TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
1691 		wakeup(nw);
1692 	}
1693 	TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) {
1694 		mtx_unlock(&nlm_global_lock);
1695 		nlm_host_notify(host, 0);
1696 		nlm_host_release(host);
1697 		mtx_lock(&nlm_global_lock);
1698 	}
1699 	mtx_unlock(&nlm_global_lock);
1700 
1701 	AUTH_DESTROY(nlm_auth);
1702 
1703 	return (error);
1704 }
1705 
1706 int
sys_nlm_syscall(struct thread * td,struct nlm_syscall_args * uap)1707 sys_nlm_syscall(struct thread *td, struct nlm_syscall_args *uap)
1708 {
1709 	int error;
1710 
1711 #if __FreeBSD_version >= 700000
1712 	error = priv_check(td, PRIV_NFS_LOCKD);
1713 #else
1714 	error = suser(td);
1715 #endif
1716 	if (error)
1717 		return (error);
1718 
1719 	nlm_debug_level = uap->debug_level;
1720 	nlm_grace_threshold = time_uptime + uap->grace_period;
1721 	nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD;
1722 
1723 	return nlm_server_main(uap->addr_count, uap->addrs);
1724 }
1725 
1726 /**********************************************************************/
1727 
1728 /*
1729  * NLM implementation details, called from the RPC stubs.
1730  */
1731 
1732 
1733 void
nlm_sm_notify(struct nlm_sm_status * argp)1734 nlm_sm_notify(struct nlm_sm_status *argp)
1735 {
1736 	uint32_t sysid;
1737 	struct nlm_host *host;
1738 
1739 	NLM_DEBUG(3, "nlm_sm_notify(): mon_name = %s\n", argp->mon_name);
1740 	memcpy(&sysid, &argp->priv, sizeof(sysid));
1741 	host = nlm_find_host_by_sysid(sysid);
1742 	if (host) {
1743 		nlm_host_notify(host, argp->state);
1744 		nlm_host_release(host);
1745 	}
1746 }
1747 
1748 static void
nlm_convert_to_fhandle_t(fhandle_t * fhp,struct netobj * p)1749 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p)
1750 {
1751 	memcpy(fhp, p->n_bytes, sizeof(fhandle_t));
1752 }
1753 
1754 struct vfs_state {
1755 	struct mount	*vs_mp;
1756 	struct vnode	*vs_vp;
1757 	int		vs_vnlocked;
1758 };
1759 
1760 static int
nlm_get_vfs_state(struct nlm_host * host,struct svc_req * rqstp,fhandle_t * fhp,struct vfs_state * vs,accmode_t accmode)1761 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp,
1762     fhandle_t *fhp, struct vfs_state *vs, accmode_t accmode)
1763 {
1764 	int error, exflags;
1765 	struct ucred *cred = NULL, *credanon = NULL;
1766 
1767 	memset(vs, 0, sizeof(*vs));
1768 
1769 	vs->vs_mp = vfs_getvfs(&fhp->fh_fsid);
1770 	if (!vs->vs_mp) {
1771 		return (ESTALE);
1772 	}
1773 
1774 	/* accmode == 0 means don't check, since it is an unlock. */
1775 	if (accmode != 0) {
1776 		error = VFS_CHECKEXP(vs->vs_mp,
1777 		    (struct sockaddr *)&host->nh_addr, &exflags, &credanon,
1778 		    NULL, NULL);
1779 		if (error)
1780 			goto out;
1781 
1782 		if (exflags & MNT_EXRDONLY ||
1783 		    (vs->vs_mp->mnt_flag & MNT_RDONLY)) {
1784 			error = EROFS;
1785 			goto out;
1786 		}
1787 	}
1788 
1789 	error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, LK_EXCLUSIVE, &vs->vs_vp);
1790 	if (error)
1791 		goto out;
1792 	vs->vs_vnlocked = TRUE;
1793 
1794 	if (accmode != 0) {
1795 		if (!svc_getcred(rqstp, &cred, NULL)) {
1796 			error = EINVAL;
1797 			goto out;
1798 		}
1799 		if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
1800 			crfree(cred);
1801 			cred = credanon;
1802 			credanon = NULL;
1803 		}
1804 
1805 		/*
1806 		 * Check cred.
1807 		 */
1808 		error = VOP_ACCESS(vs->vs_vp, accmode, cred, curthread);
1809 		/*
1810 		 * If this failed and accmode != VWRITE, try again with
1811 		 * VWRITE to maintain backwards compatibility with the
1812 		 * old code that always used VWRITE.
1813 		 */
1814 		if (error != 0 && accmode != VWRITE)
1815 			error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread);
1816 		if (error)
1817 			goto out;
1818 	}
1819 
1820 #if __FreeBSD_version < 800011
1821 	VOP_UNLOCK(vs->vs_vp, 0, curthread);
1822 #else
1823 	VOP_UNLOCK(vs->vs_vp, 0);
1824 #endif
1825 	vs->vs_vnlocked = FALSE;
1826 
1827 out:
1828 	if (cred)
1829 		crfree(cred);
1830 	if (credanon)
1831 		crfree(credanon);
1832 
1833 	return (error);
1834 }
1835 
1836 static void
nlm_release_vfs_state(struct vfs_state * vs)1837 nlm_release_vfs_state(struct vfs_state *vs)
1838 {
1839 
1840 	if (vs->vs_vp) {
1841 		if (vs->vs_vnlocked)
1842 			vput(vs->vs_vp);
1843 		else
1844 			vrele(vs->vs_vp);
1845 	}
1846 	if (vs->vs_mp)
1847 		vfs_rel(vs->vs_mp);
1848 }
1849 
1850 static nlm4_stats
nlm_convert_error(int error)1851 nlm_convert_error(int error)
1852 {
1853 
1854 	if (error == ESTALE)
1855 		return nlm4_stale_fh;
1856 	else if (error == EROFS)
1857 		return nlm4_rofs;
1858 	else
1859 		return nlm4_failed;
1860 }
1861 
1862 int
nlm_do_test(nlm4_testargs * argp,nlm4_testres * result,struct svc_req * rqstp,CLIENT ** rpcp)1863 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp,
1864 	CLIENT **rpcp)
1865 {
1866 	fhandle_t fh;
1867 	struct vfs_state vs;
1868 	struct nlm_host *host, *bhost;
1869 	int error, sysid;
1870 	struct flock fl;
1871 	accmode_t accmode;
1872 
1873 	memset(result, 0, sizeof(*result));
1874 	memset(&vs, 0, sizeof(vs));
1875 
1876 	host = nlm_find_host_by_name(argp->alock.caller_name,
1877 	    svc_getrpccaller(rqstp), rqstp->rq_vers);
1878 	if (!host) {
1879 		result->stat.stat = nlm4_denied_nolocks;
1880 		return (ENOMEM);
1881 	}
1882 
1883 	NLM_DEBUG(3, "nlm_do_test(): caller_name = %s (sysid = %d)\n",
1884 	    host->nh_caller_name, host->nh_sysid);
1885 
1886 	nlm_check_expired_locks(host);
1887 	sysid = host->nh_sysid;
1888 
1889 	nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
1890 	nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
1891 
1892 	if (time_uptime < nlm_grace_threshold) {
1893 		result->stat.stat = nlm4_denied_grace_period;
1894 		goto out;
1895 	}
1896 
1897 	accmode = argp->exclusive ? VWRITE : VREAD;
1898 	error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode);
1899 	if (error) {
1900 		result->stat.stat = nlm_convert_error(error);
1901 		goto out;
1902 	}
1903 
1904 	fl.l_start = argp->alock.l_offset;
1905 	fl.l_len = argp->alock.l_len;
1906 	fl.l_pid = argp->alock.svid;
1907 	fl.l_sysid = sysid;
1908 	fl.l_whence = SEEK_SET;
1909 	if (argp->exclusive)
1910 		fl.l_type = F_WRLCK;
1911 	else
1912 		fl.l_type = F_RDLCK;
1913 	error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE);
1914 	if (error) {
1915 		result->stat.stat = nlm4_failed;
1916 		goto out;
1917 	}
1918 
1919 	if (fl.l_type == F_UNLCK) {
1920 		result->stat.stat = nlm4_granted;
1921 	} else {
1922 		result->stat.stat = nlm4_denied;
1923 		result->stat.nlm4_testrply_u.holder.exclusive =
1924 			(fl.l_type == F_WRLCK);
1925 		result->stat.nlm4_testrply_u.holder.svid = fl.l_pid;
1926 		bhost = nlm_find_host_by_sysid(fl.l_sysid);
1927 		if (bhost) {
1928 			/*
1929 			 * We don't have any useful way of recording
1930 			 * the value of oh used in the original lock
1931 			 * request. Ideally, the test reply would have
1932 			 * a space for the owning host's name allowing
1933 			 * our caller's NLM to keep track.
1934 			 *
1935 			 * As far as I can see, Solaris uses an eight
1936 			 * byte structure for oh which contains a four
1937 			 * byte pid encoded in local byte order and
1938 			 * the first four bytes of the host
1939 			 * name. Linux uses a variable length string
1940 			 * 'pid@hostname' in ascii but doesn't even
1941 			 * return that in test replies.
1942 			 *
1943 			 * For the moment, return nothing in oh
1944 			 * (already zero'ed above).
1945 			 */
1946 			nlm_host_release(bhost);
1947 		}
1948 		result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start;
1949 		result->stat.nlm4_testrply_u.holder.l_len = fl.l_len;
1950 	}
1951 
1952 out:
1953 	nlm_release_vfs_state(&vs);
1954 	if (rpcp)
1955 		*rpcp = nlm_host_get_rpc(host, TRUE);
1956 	nlm_host_release(host);
1957 	return (0);
1958 }
1959 
1960 int
nlm_do_lock(nlm4_lockargs * argp,nlm4_res * result,struct svc_req * rqstp,bool_t monitor,CLIENT ** rpcp)1961 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp,
1962     bool_t monitor, CLIENT **rpcp)
1963 {
1964 	fhandle_t fh;
1965 	struct vfs_state vs;
1966 	struct nlm_host *host;
1967 	int error, sysid;
1968 	struct flock fl;
1969 	accmode_t accmode;
1970 
1971 	memset(result, 0, sizeof(*result));
1972 	memset(&vs, 0, sizeof(vs));
1973 
1974 	host = nlm_find_host_by_name(argp->alock.caller_name,
1975 	    svc_getrpccaller(rqstp), rqstp->rq_vers);
1976 	if (!host) {
1977 		result->stat.stat = nlm4_denied_nolocks;
1978 		return (ENOMEM);
1979 	}
1980 
1981 	NLM_DEBUG(3, "nlm_do_lock(): caller_name = %s (sysid = %d)\n",
1982 	    host->nh_caller_name, host->nh_sysid);
1983 
1984 	if (monitor && host->nh_state && argp->state
1985 	    && host->nh_state != argp->state) {
1986 		/*
1987 		 * The host rebooted without telling us. Trash its
1988 		 * locks.
1989 		 */
1990 		nlm_host_notify(host, argp->state);
1991 	}
1992 
1993 	nlm_check_expired_locks(host);
1994 	sysid = host->nh_sysid;
1995 
1996 	nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
1997 	nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
1998 
1999 	if (time_uptime < nlm_grace_threshold && !argp->reclaim) {
2000 		result->stat.stat = nlm4_denied_grace_period;
2001 		goto out;
2002 	}
2003 
2004 	accmode = argp->exclusive ? VWRITE : VREAD;
2005 	error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode);
2006 	if (error) {
2007 		result->stat.stat = nlm_convert_error(error);
2008 		goto out;
2009 	}
2010 
2011 	fl.l_start = argp->alock.l_offset;
2012 	fl.l_len = argp->alock.l_len;
2013 	fl.l_pid = argp->alock.svid;
2014 	fl.l_sysid = sysid;
2015 	fl.l_whence = SEEK_SET;
2016 	if (argp->exclusive)
2017 		fl.l_type = F_WRLCK;
2018 	else
2019 		fl.l_type = F_RDLCK;
2020 	if (argp->block) {
2021 		struct nlm_async_lock *af;
2022 		CLIENT *client;
2023 		struct nlm_grantcookie cookie;
2024 
2025 		/*
2026 		 * First, make sure we can contact the host's NLM.
2027 		 */
2028 		client = nlm_host_get_rpc(host, TRUE);
2029 		if (!client) {
2030 			result->stat.stat = nlm4_failed;
2031 			goto out;
2032 		}
2033 
2034 		/*
2035 		 * First we need to check and see if there is an
2036 		 * existing blocked lock that matches. This could be a
2037 		 * badly behaved client or an RPC re-send. If we find
2038 		 * one, just return nlm4_blocked.
2039 		 */
2040 		mtx_lock(&host->nh_lock);
2041 		TAILQ_FOREACH(af, &host->nh_pending, af_link) {
2042 			if (af->af_fl.l_start == fl.l_start
2043 			    && af->af_fl.l_len == fl.l_len
2044 			    && af->af_fl.l_pid == fl.l_pid
2045 			    && af->af_fl.l_type == fl.l_type) {
2046 				break;
2047 			}
2048 		}
2049 		if (!af) {
2050 			cookie.ng_sysid = host->nh_sysid;
2051 			cookie.ng_cookie = host->nh_grantcookie++;
2052 		}
2053 		mtx_unlock(&host->nh_lock);
2054 		if (af) {
2055 			CLNT_RELEASE(client);
2056 			result->stat.stat = nlm4_blocked;
2057 			goto out;
2058 		}
2059 
2060 		af = malloc(sizeof(struct nlm_async_lock), M_NLM,
2061 		    M_WAITOK|M_ZERO);
2062 		TASK_INIT(&af->af_task, 0, nlm_lock_callback, af);
2063 		af->af_vp = vs.vs_vp;
2064 		af->af_fl = fl;
2065 		af->af_host = host;
2066 		af->af_rpc = client;
2067 		/*
2068 		 * We use M_RPC here so that we can xdr_free the thing
2069 		 * later.
2070 		 */
2071 		nlm_make_netobj(&af->af_granted.cookie,
2072 		    (caddr_t)&cookie, sizeof(cookie), M_RPC);
2073 		af->af_granted.exclusive = argp->exclusive;
2074 		af->af_granted.alock.caller_name =
2075 			strdup(argp->alock.caller_name, M_RPC);
2076 		nlm_copy_netobj(&af->af_granted.alock.fh,
2077 		    &argp->alock.fh, M_RPC);
2078 		nlm_copy_netobj(&af->af_granted.alock.oh,
2079 		    &argp->alock.oh, M_RPC);
2080 		af->af_granted.alock.svid = argp->alock.svid;
2081 		af->af_granted.alock.l_offset = argp->alock.l_offset;
2082 		af->af_granted.alock.l_len = argp->alock.l_len;
2083 
2084 		/*
2085 		 * Put the entry on the pending list before calling
2086 		 * VOP_ADVLOCKASYNC. We do this in case the lock
2087 		 * request was blocked (returning EINPROGRESS) but
2088 		 * then granted before we manage to run again. The
2089 		 * client may receive the granted message before we
2090 		 * send our blocked reply but thats their problem.
2091 		 */
2092 		mtx_lock(&host->nh_lock);
2093 		TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link);
2094 		mtx_unlock(&host->nh_lock);
2095 
2096 		error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE,
2097 		    &af->af_task, &af->af_cookie);
2098 
2099 		/*
2100 		 * If the lock completed synchronously, just free the
2101 		 * tracking structure now.
2102 		 */
2103 		if (error != EINPROGRESS) {
2104 			CLNT_RELEASE(af->af_rpc);
2105 			mtx_lock(&host->nh_lock);
2106 			TAILQ_REMOVE(&host->nh_pending, af, af_link);
2107 			mtx_unlock(&host->nh_lock);
2108 			xdr_free((xdrproc_t) xdr_nlm4_testargs,
2109 			    &af->af_granted);
2110 			free(af, M_NLM);
2111 		} else {
2112 			NLM_DEBUG(2, "NLM: pending async lock %p for %s "
2113 			    "(sysid %d)\n", af, host->nh_caller_name, sysid);
2114 			/*
2115 			 * Don't vrele the vnode just yet - this must
2116 			 * wait until either the async callback
2117 			 * happens or the lock is cancelled.
2118 			 */
2119 			vs.vs_vp = NULL;
2120 		}
2121 	} else {
2122 		error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE);
2123 	}
2124 
2125 	if (error) {
2126 		if (error == EINPROGRESS) {
2127 			result->stat.stat = nlm4_blocked;
2128 		} else if (error == EDEADLK) {
2129 			result->stat.stat = nlm4_deadlck;
2130 		} else if (error == EAGAIN) {
2131 			result->stat.stat = nlm4_denied;
2132 		} else {
2133 			result->stat.stat = nlm4_failed;
2134 		}
2135 	} else {
2136 		if (monitor)
2137 			nlm_host_monitor(host, argp->state);
2138 		result->stat.stat = nlm4_granted;
2139 	}
2140 
2141 out:
2142 	nlm_release_vfs_state(&vs);
2143 	if (rpcp)
2144 		*rpcp = nlm_host_get_rpc(host, TRUE);
2145 	nlm_host_release(host);
2146 	return (0);
2147 }
2148 
2149 int
nlm_do_cancel(nlm4_cancargs * argp,nlm4_res * result,struct svc_req * rqstp,CLIENT ** rpcp)2150 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp,
2151     CLIENT **rpcp)
2152 {
2153 	fhandle_t fh;
2154 	struct vfs_state vs;
2155 	struct nlm_host *host;
2156 	int error, sysid;
2157 	struct flock fl;
2158 	struct nlm_async_lock *af;
2159 
2160 	memset(result, 0, sizeof(*result));
2161 	memset(&vs, 0, sizeof(vs));
2162 
2163 	host = nlm_find_host_by_name(argp->alock.caller_name,
2164 	    svc_getrpccaller(rqstp), rqstp->rq_vers);
2165 	if (!host) {
2166 		result->stat.stat = nlm4_denied_nolocks;
2167 		return (ENOMEM);
2168 	}
2169 
2170 	NLM_DEBUG(3, "nlm_do_cancel(): caller_name = %s (sysid = %d)\n",
2171 	    host->nh_caller_name, host->nh_sysid);
2172 
2173 	nlm_check_expired_locks(host);
2174 	sysid = host->nh_sysid;
2175 
2176 	nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
2177 	nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
2178 
2179 	if (time_uptime < nlm_grace_threshold) {
2180 		result->stat.stat = nlm4_denied_grace_period;
2181 		goto out;
2182 	}
2183 
2184 	error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0);
2185 	if (error) {
2186 		result->stat.stat = nlm_convert_error(error);
2187 		goto out;
2188 	}
2189 
2190 	fl.l_start = argp->alock.l_offset;
2191 	fl.l_len = argp->alock.l_len;
2192 	fl.l_pid = argp->alock.svid;
2193 	fl.l_sysid = sysid;
2194 	fl.l_whence = SEEK_SET;
2195 	if (argp->exclusive)
2196 		fl.l_type = F_WRLCK;
2197 	else
2198 		fl.l_type = F_RDLCK;
2199 
2200 	/*
2201 	 * First we need to try and find the async lock request - if
2202 	 * there isn't one, we give up and return nlm4_denied.
2203 	 */
2204 	mtx_lock(&host->nh_lock);
2205 
2206 	TAILQ_FOREACH(af, &host->nh_pending, af_link) {
2207 		if (af->af_fl.l_start == fl.l_start
2208 		    && af->af_fl.l_len == fl.l_len
2209 		    && af->af_fl.l_pid == fl.l_pid
2210 		    && af->af_fl.l_type == fl.l_type) {
2211 			break;
2212 		}
2213 	}
2214 
2215 	if (!af) {
2216 		mtx_unlock(&host->nh_lock);
2217 		result->stat.stat = nlm4_denied;
2218 		goto out;
2219 	}
2220 
2221 	error = nlm_cancel_async_lock(af);
2222 
2223 	if (error) {
2224 		result->stat.stat = nlm4_denied;
2225 	} else {
2226 		result->stat.stat = nlm4_granted;
2227 	}
2228 
2229 	mtx_unlock(&host->nh_lock);
2230 
2231 out:
2232 	nlm_release_vfs_state(&vs);
2233 	if (rpcp)
2234 		*rpcp = nlm_host_get_rpc(host, TRUE);
2235 	nlm_host_release(host);
2236 	return (0);
2237 }
2238 
2239 int
nlm_do_unlock(nlm4_unlockargs * argp,nlm4_res * result,struct svc_req * rqstp,CLIENT ** rpcp)2240 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp,
2241     CLIENT **rpcp)
2242 {
2243 	fhandle_t fh;
2244 	struct vfs_state vs;
2245 	struct nlm_host *host;
2246 	int error, sysid;
2247 	struct flock fl;
2248 
2249 	memset(result, 0, sizeof(*result));
2250 	memset(&vs, 0, sizeof(vs));
2251 
2252 	host = nlm_find_host_by_name(argp->alock.caller_name,
2253 	    svc_getrpccaller(rqstp), rqstp->rq_vers);
2254 	if (!host) {
2255 		result->stat.stat = nlm4_denied_nolocks;
2256 		return (ENOMEM);
2257 	}
2258 
2259 	NLM_DEBUG(3, "nlm_do_unlock(): caller_name = %s (sysid = %d)\n",
2260 	    host->nh_caller_name, host->nh_sysid);
2261 
2262 	nlm_check_expired_locks(host);
2263 	sysid = host->nh_sysid;
2264 
2265 	nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
2266 	nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
2267 
2268 	if (time_uptime < nlm_grace_threshold) {
2269 		result->stat.stat = nlm4_denied_grace_period;
2270 		goto out;
2271 	}
2272 
2273 	error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0);
2274 	if (error) {
2275 		result->stat.stat = nlm_convert_error(error);
2276 		goto out;
2277 	}
2278 
2279 	fl.l_start = argp->alock.l_offset;
2280 	fl.l_len = argp->alock.l_len;
2281 	fl.l_pid = argp->alock.svid;
2282 	fl.l_sysid = sysid;
2283 	fl.l_whence = SEEK_SET;
2284 	fl.l_type = F_UNLCK;
2285 	error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE);
2286 
2287 	/*
2288 	 * Ignore the error - there is no result code for failure,
2289 	 * only for grace period.
2290 	 */
2291 	result->stat.stat = nlm4_granted;
2292 
2293 out:
2294 	nlm_release_vfs_state(&vs);
2295 	if (rpcp)
2296 		*rpcp = nlm_host_get_rpc(host, TRUE);
2297 	nlm_host_release(host);
2298 	return (0);
2299 }
2300 
2301 int
nlm_do_granted(nlm4_testargs * argp,nlm4_res * result,struct svc_req * rqstp,CLIENT ** rpcp)2302 nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp,
2303 
2304     CLIENT **rpcp)
2305 {
2306 	struct nlm_host *host;
2307 	struct nlm_waiting_lock *nw;
2308 
2309 	memset(result, 0, sizeof(*result));
2310 
2311 	host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers);
2312 	if (!host) {
2313 		result->stat.stat = nlm4_denied_nolocks;
2314 		return (ENOMEM);
2315 	}
2316 
2317 	nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
2318 	result->stat.stat = nlm4_denied;
2319 	KFAIL_POINT_CODE(DEBUG_FP, nlm_deny_grant, goto out);
2320 
2321 	mtx_lock(&nlm_global_lock);
2322 	TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
2323 		if (!nw->nw_waiting)
2324 			continue;
2325 		if (argp->alock.svid == nw->nw_lock.svid
2326 		    && argp->alock.l_offset == nw->nw_lock.l_offset
2327 		    && argp->alock.l_len == nw->nw_lock.l_len
2328 		    && argp->alock.fh.n_len == nw->nw_lock.fh.n_len
2329 		    && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes,
2330 			nw->nw_lock.fh.n_len)) {
2331 			nw->nw_waiting = FALSE;
2332 			wakeup(nw);
2333 			result->stat.stat = nlm4_granted;
2334 			break;
2335 		}
2336 	}
2337 	mtx_unlock(&nlm_global_lock);
2338 
2339 out:
2340 	if (rpcp)
2341 		*rpcp = nlm_host_get_rpc(host, TRUE);
2342 	nlm_host_release(host);
2343 	return (0);
2344 }
2345 
2346 void
nlm_do_granted_res(nlm4_res * argp,struct svc_req * rqstp)2347 nlm_do_granted_res(nlm4_res *argp, struct svc_req *rqstp)
2348 {
2349 	struct nlm_host *host = NULL;
2350 	struct nlm_async_lock *af = NULL;
2351 	int error;
2352 
2353 	if (argp->cookie.n_len != sizeof(struct nlm_grantcookie)) {
2354 		NLM_DEBUG(1, "NLM: bogus grant cookie");
2355 		goto out;
2356 	}
2357 
2358 	host = nlm_find_host_by_sysid(ng_sysid(&argp->cookie));
2359 	if (!host) {
2360 		NLM_DEBUG(1, "NLM: Unknown host rejected our grant");
2361 		goto out;
2362 	}
2363 
2364 	mtx_lock(&host->nh_lock);
2365 	TAILQ_FOREACH(af, &host->nh_granted, af_link)
2366 	    if (ng_cookie(&argp->cookie) ==
2367 		ng_cookie(&af->af_granted.cookie))
2368 		    break;
2369 	if (af)
2370 		TAILQ_REMOVE(&host->nh_granted, af, af_link);
2371 	mtx_unlock(&host->nh_lock);
2372 
2373 	if (!af) {
2374 		NLM_DEBUG(1, "NLM: host %s (sysid %d) replied to our grant "
2375 		    "with unrecognized cookie %d:%d", host->nh_caller_name,
2376 		    host->nh_sysid, ng_sysid(&argp->cookie),
2377 		    ng_cookie(&argp->cookie));
2378 		goto out;
2379 	}
2380 
2381 	if (argp->stat.stat != nlm4_granted) {
2382 		af->af_fl.l_type = F_UNLCK;
2383 		error = VOP_ADVLOCK(af->af_vp, NULL, F_UNLCK, &af->af_fl, F_REMOTE);
2384 		if (error) {
2385 			NLM_DEBUG(1, "NLM: host %s (sysid %d) rejected our grant "
2386 			    "and we failed to unlock (%d)", host->nh_caller_name,
2387 			    host->nh_sysid, error);
2388 			goto out;
2389 		}
2390 
2391 		NLM_DEBUG(5, "NLM: async lock %p rejected by host %s (sysid %d)",
2392 		    af, host->nh_caller_name, host->nh_sysid);
2393 	} else {
2394 		NLM_DEBUG(5, "NLM: async lock %p accepted by host %s (sysid %d)",
2395 		    af, host->nh_caller_name, host->nh_sysid);
2396 	}
2397 
2398  out:
2399 	if (af)
2400 		nlm_free_async_lock(af);
2401 	if (host)
2402 		nlm_host_release(host);
2403 }
2404 
2405 void
nlm_do_free_all(nlm4_notify * argp)2406 nlm_do_free_all(nlm4_notify *argp)
2407 {
2408 	struct nlm_host *host, *thost;
2409 
2410 	TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) {
2411 		if (!strcmp(host->nh_caller_name, argp->name))
2412 			nlm_host_notify(host, argp->state);
2413 	}
2414 }
2415 
2416 /*
2417  * Kernel module glue
2418  */
2419 static int
nfslockd_modevent(module_t mod,int type,void * data)2420 nfslockd_modevent(module_t mod, int type, void *data)
2421 {
2422 
2423 	switch (type) {
2424 	case MOD_LOAD:
2425 		return (0);
2426 	case MOD_UNLOAD:
2427 		/* The NLM module cannot be safely unloaded. */
2428 		/* FALLTHROUGH */
2429 	default:
2430 		return (EOPNOTSUPP);
2431 	}
2432 }
2433 static moduledata_t nfslockd_mod = {
2434 	"nfslockd",
2435 	nfslockd_modevent,
2436 	NULL,
2437 };
2438 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY);
2439 
2440 /* So that loader and kldload(2) can find us, wherever we are.. */
2441 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1);
2442 MODULE_DEPEND(nfslockd, nfslock, 1, 1, 1);
2443 MODULE_VERSION(nfslockd, 1);
2444