1 /* $NetBSD: nlm_prot_impl.c,v 1.5 2023/04/28 22:31:38 andvar Exp $ */
2 /*-
3 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
4 * Authors: Doug Rabson <dfr@rabson.org>
5 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #ifdef _KERNEL_OPT
30 #include "opt_inet6.h"
31 #endif
32
33 #include <sys/cdefs.h>
34 /* __FBSDID("FreeBSD: head/sys/nlm/nlm_prot_impl.c 302216 2016-06-26 20:08:42Z kib "); */
35 __RCSID("$NetBSD: nlm_prot_impl.c,v 1.5 2023/04/28 22:31:38 andvar Exp $");
36
37 #include <sys/param.h>
38 #include <sys/fail.h>
39 #include <sys/fcntl.h>
40 #include <sys/kernel.h>
41 #include <sys/kthread.h>
42 #include <sys/lockf.h>
43 #include <sys/malloc.h>
44 #include <sys/mount.h>
45 #include <sys/proc.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/syscall.h>
49 #include <sys/sysctl.h>
50 #include <sys/sysent.h>
51 #include <sys/syslog.h>
52 #include <sys/sysproto.h>
53 #include <sys/systm.h>
54 #include <sys/taskqueue.h>
55 #include <sys/unistd.h>
56 #include <sys/vnode.h>
57
58 #if 0
59 #if __FreeBSD_version >= 700000
60 #include <sys/priv.h>
61 #endif
62 #endif
63
64 #include <fs/nfs/common/nfsproto.h>
65 #include <fs/nfs/common/nfs_lock.h>
66
67 #include <fs/nfs/nlm/nlm_prot.h>
68 #include <fs/nfs/nlm/sm_inter.h>
69 #include <fs/nfs/nlm/nlm.h>
70
71 #include <rpc/rpc_com.h>
72 #include <rpc/rpcb_prot.h>
73
74 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager");
75
76 /*
77 * If a host is inactive (and holds no locks) for this amount of
78 * seconds, we consider it idle and stop tracking it.
79 */
80 #define NLM_IDLE_TIMEOUT 30
81
82 /*
83 * We check the host list for idle every few seconds.
84 */
85 #define NLM_IDLE_PERIOD 5
86
87 /*
88 * We only look for GRANTED_RES messages for a little while.
89 */
90 #define NLM_EXPIRE_TIMEOUT 10
91
92 /*
93 * Support for sysctl vfs.nlm.sysid
94 */
95 static SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW, NULL,
96 "Network Lock Manager");
97 static SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, CTLFLAG_RW, NULL, "");
98
99 /*
100 * Syscall hooks
101 */
102 static int nlm_syscall_offset = SYS_nlm_syscall;
103 static struct sysent nlm_syscall_prev_sysent;
104 #if __FreeBSD_version < 700000
105 static struct sysent nlm_syscall_sysent = {
106 (sizeof(struct nlm_syscall_args) / sizeof(register_t)) | SYF_MPSAFE,
107 (sy_call_t *) nlm_syscall
108 };
109 #else
110 MAKE_SYSENT(nlm_syscall);
111 #endif
112 static bool_t nlm_syscall_registered = FALSE;
113
114 /*
115 * Debug level passed in from userland. We also support a sysctl hook
116 * so that it can be changed on a live system.
117 */
118 static int nlm_debug_level;
119 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, "");
120
121 #define NLM_DEBUG(_level, args...) \
122 do { \
123 if (nlm_debug_level >= (_level)) \
124 log(LOG_DEBUG, args); \
125 } while(0)
126 #define NLM_ERR(args...) \
127 do { \
128 log(LOG_ERR, args); \
129 } while(0)
130
131 /*
132 * Grace period handling. The value of nlm_grace_threshold is the
133 * value of time_uptime after which we are serving requests normally.
134 */
135 static time_t nlm_grace_threshold;
136
137 /*
138 * We check for idle hosts if time_uptime is greater than
139 * nlm_next_idle_check,
140 */
141 static time_t nlm_next_idle_check;
142
143 /*
144 * A flag to indicate the server is already running.
145 */
146 static int nlm_is_running;
147
148 /*
149 * A socket to use for RPC - shared by all IPv4 RPC clients.
150 */
151 static struct socket *nlm_socket;
152
153 #ifdef INET6
154
155 /*
156 * A socket to use for RPC - shared by all IPv6 RPC clients.
157 */
158 static struct socket *nlm_socket6;
159
160 #endif
161
162 /*
163 * An RPC client handle that can be used to communicate with the local
164 * NSM.
165 */
166 static CLIENT *nlm_nsm;
167
168 /*
169 * An AUTH handle for the server's creds.
170 */
171 static AUTH *nlm_auth;
172
173 /*
174 * A zero timeval for sending async RPC messages.
175 */
176 struct timeval nlm_zero_tv = { 0, 0 };
177
178 /*
179 * The local NSM state number
180 */
181 int nlm_nsm_state;
182
183
184 /*
185 * A lock to protect the host list and waiting lock list.
186 */
187 static struct mtx nlm_global_lock;
188
189 /*
190 * Locks:
191 * (l) locked by nh_lock
192 * (s) only accessed via server RPC which is single threaded
193 * (g) locked by nlm_global_lock
194 * (c) const until freeing
195 * (a) modified using atomic ops
196 */
197
198 /*
199 * A pending client-side lock request, stored on the nlm_waiting_locks
200 * list.
201 */
202 struct nlm_waiting_lock {
203 TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */
204 bool_t nw_waiting; /* (g) */
205 nlm4_lock nw_lock; /* (c) */
206 union nfsfh nw_fh; /* (c) */
207 struct vnode *nw_vp; /* (c) */
208 };
209 TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock);
210
211 struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */
212
213 /*
214 * A pending server-side asynchronous lock request, stored on the
215 * nh_pending list of the NLM host.
216 */
217 struct nlm_async_lock {
218 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */
219 struct task af_task; /* (c) async callback details */
220 void *af_cookie; /* (l) lock manager cancel token */
221 struct vnode *af_vp; /* (l) vnode to lock */
222 struct flock af_fl; /* (c) lock details */
223 struct nlm_host *af_host; /* (c) host which is locking */
224 CLIENT *af_rpc; /* (c) rpc client to send message */
225 nlm4_testargs af_granted; /* (c) notification details */
226 time_t af_expiretime; /* (c) notification time */
227 };
228 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock);
229
230 /*
231 * NLM host.
232 */
233 enum nlm_host_state {
234 NLM_UNMONITORED,
235 NLM_MONITORED,
236 NLM_MONITOR_FAILED,
237 NLM_RECOVERING
238 };
239
240 struct nlm_rpc {
241 CLIENT *nr_client; /* (l) RPC client handle */
242 time_t nr_create_time; /* (l) when client was created */
243 };
244
245 struct nlm_host {
246 struct mtx nh_lock;
247 volatile u_int nh_refs; /* (a) reference count */
248 TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */
249 char nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */
250 uint32_t nh_sysid; /* (c) our allocated system ID */
251 char nh_sysid_string[10]; /* (c) string rep. of sysid */
252 struct sockaddr_storage nh_addr; /* (s) remote address of host */
253 struct nlm_rpc nh_srvrpc; /* (l) RPC for server replies */
254 struct nlm_rpc nh_clntrpc; /* (l) RPC for client requests */
255 rpcvers_t nh_vers; /* (s) NLM version of host */
256 int nh_state; /* (s) last seen NSM state of host */
257 enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */
258 time_t nh_idle_timeout; /* (s) Time at which host is idle */
259 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */
260 uint32_t nh_grantcookie; /* (l) grant cookie counter */
261 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */
262 struct nlm_async_lock_list nh_granted; /* (l) granted locks */
263 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */
264 };
265 TAILQ_HEAD(nlm_host_list, nlm_host);
266
267 static struct nlm_host_list nlm_hosts; /* (g) */
268 static uint32_t nlm_next_sysid = 1; /* (g) */
269
270 static void nlm_host_unmonitor(struct nlm_host *);
271
272 struct nlm_grantcookie {
273 uint32_t ng_sysid;
274 uint32_t ng_cookie;
275 };
276
277 static inline uint32_t
ng_sysid(struct netobj * src)278 ng_sysid(struct netobj *src)
279 {
280
281 return ((struct nlm_grantcookie *)src->n_bytes)->ng_sysid;
282 }
283
284 static inline uint32_t
ng_cookie(struct netobj * src)285 ng_cookie(struct netobj *src)
286 {
287
288 return ((struct nlm_grantcookie *)src->n_bytes)->ng_cookie;
289 }
290
291 /**********************************************************************/
292
293 /*
294 * Initialise NLM globals.
295 */
296 static void
nlm_init(void * dummy)297 nlm_init(void *dummy)
298 {
299 int error;
300
301 mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF);
302 TAILQ_INIT(&nlm_waiting_locks);
303 TAILQ_INIT(&nlm_hosts);
304
305 error = syscall_register(&nlm_syscall_offset, &nlm_syscall_sysent,
306 &nlm_syscall_prev_sysent, SY_THR_STATIC_KLD);
307 if (error)
308 NLM_ERR("Can't register NLM syscall\n");
309 else
310 nlm_syscall_registered = TRUE;
311 }
312 SYSINIT(nlm_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_init, NULL);
313
314 static void
nlm_uninit(void * dummy)315 nlm_uninit(void *dummy)
316 {
317
318 if (nlm_syscall_registered)
319 syscall_deregister(&nlm_syscall_offset,
320 &nlm_syscall_prev_sysent);
321 }
322 SYSUNINIT(nlm_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_uninit, NULL);
323
324 /*
325 * Create a netobj from an arbitrary source.
326 */
327 void
nlm_make_netobj(struct netobj * dst,caddr_t src,size_t srcsize,struct malloc_type * type)328 nlm_make_netobj(struct netobj *dst, caddr_t src, size_t srcsize,
329 struct malloc_type *type)
330 {
331
332 dst->n_len = srcsize;
333 dst->n_bytes = malloc(srcsize, type, M_WAITOK);
334 memcpy(dst->n_bytes, src, srcsize);
335 }
336
337 /*
338 * Copy a struct netobj.
339 */
340 void
nlm_copy_netobj(struct netobj * dst,struct netobj * src,struct malloc_type * type)341 nlm_copy_netobj(struct netobj *dst, struct netobj *src,
342 struct malloc_type *type)
343 {
344
345 nlm_make_netobj(dst, src->n_bytes, src->n_len, type);
346 }
347
348
349 /*
350 * Create an RPC client handle for the given (address,prog,vers)
351 * triple using UDP.
352 */
353 static CLIENT *
nlm_get_rpc(struct sockaddr * sa,rpcprog_t prog,rpcvers_t vers)354 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers)
355 {
356 char *wchan = "nlmrcv";
357 const char* protofmly;
358 struct sockaddr_storage ss;
359 struct socket *so;
360 CLIENT *rpcb;
361 struct timeval timo;
362 RPCB parms;
363 char *uaddr;
364 enum clnt_stat stat = RPC_SUCCESS;
365 int rpcvers = RPCBVERS4;
366 bool_t do_tcp = FALSE;
367 bool_t tryagain = FALSE;
368 struct portmap mapping;
369 u_short port = 0;
370
371 /*
372 * First we need to contact the remote RPCBIND service to find
373 * the right port.
374 */
375 memcpy(&ss, sa, sa->sa_len);
376 switch (ss.ss_family) {
377 case AF_INET:
378 ((struct sockaddr_in *)&ss)->sin_port = htons(111);
379 protofmly = "inet";
380 so = nlm_socket;
381 break;
382
383 #ifdef INET6
384 case AF_INET6:
385 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111);
386 protofmly = "inet6";
387 so = nlm_socket6;
388 break;
389 #endif
390
391 default:
392 /*
393 * Unsupported address family - fail.
394 */
395 return (NULL);
396 }
397
398 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss,
399 RPCBPROG, rpcvers, 0, 0);
400 if (!rpcb)
401 return (NULL);
402
403 try_tcp:
404 parms.r_prog = prog;
405 parms.r_vers = vers;
406 if (do_tcp)
407 parms.r_netid = "tcp";
408 else
409 parms.r_netid = "udp";
410 parms.r_addr = "";
411 parms.r_owner = "";
412
413 /*
414 * Use the default timeout.
415 */
416 timo.tv_sec = 25;
417 timo.tv_usec = 0;
418 again:
419 switch (rpcvers) {
420 case RPCBVERS4:
421 case RPCBVERS:
422 /*
423 * Try RPCBIND 4 then 3.
424 */
425 uaddr = NULL;
426 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR,
427 (xdrproc_t) xdr_rpcb, &parms,
428 (xdrproc_t) xdr_wrapstring, &uaddr, timo);
429 if (stat == RPC_SUCCESS) {
430 /*
431 * We have a reply from the remote RPCBIND - turn it
432 * into an appropriate address and make a new client
433 * that can talk to the remote NLM.
434 *
435 * XXX fixup IPv6 scope ID.
436 */
437 struct netbuf *a;
438 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr);
439 if (!a) {
440 tryagain = TRUE;
441 } else {
442 tryagain = FALSE;
443 memcpy(&ss, a->buf, a->len);
444 free(a->buf, M_RPC);
445 free(a, M_RPC);
446 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr);
447 }
448 }
449 if (tryagain || stat == RPC_PROGVERSMISMATCH) {
450 if (rpcvers == RPCBVERS4)
451 rpcvers = RPCBVERS;
452 else if (rpcvers == RPCBVERS)
453 rpcvers = PMAPVERS;
454 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers);
455 goto again;
456 }
457 break;
458 case PMAPVERS:
459 /*
460 * Try portmap.
461 */
462 mapping.pm_prog = parms.r_prog;
463 mapping.pm_vers = parms.r_vers;
464 mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP;
465 mapping.pm_port = 0;
466
467 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT,
468 (xdrproc_t) xdr_portmap, &mapping,
469 (xdrproc_t) xdr_u_short, &port, timo);
470
471 if (stat == RPC_SUCCESS) {
472 switch (ss.ss_family) {
473 case AF_INET:
474 ((struct sockaddr_in *)&ss)->sin_port =
475 htons(port);
476 break;
477
478 #ifdef INET6
479 case AF_INET6:
480 ((struct sockaddr_in6 *)&ss)->sin6_port =
481 htons(port);
482 break;
483 #endif
484 }
485 }
486 break;
487 default:
488 panic("invalid rpcvers %d", rpcvers);
489 }
490 /*
491 * We may have a positive response from the portmapper, but the NLM
492 * service was not found. Make sure we received a valid port.
493 */
494 switch (ss.ss_family) {
495 case AF_INET:
496 port = ((struct sockaddr_in *)&ss)->sin_port;
497 break;
498 #ifdef INET6
499 case AF_INET6:
500 port = ((struct sockaddr_in6 *)&ss)->sin6_port;
501 break;
502 #endif
503 }
504 if (stat != RPC_SUCCESS || !port) {
505 /*
506 * If we were able to talk to rpcbind or portmap, but the udp
507 * variant wasn't available, ask about tcp.
508 *
509 * XXX - We could also check for a TCP portmapper, but
510 * if the host is running a portmapper at all, we should be able
511 * to hail it over UDP.
512 */
513 if (stat == RPC_SUCCESS && !do_tcp) {
514 do_tcp = TRUE;
515 goto try_tcp;
516 }
517
518 /* Otherwise, bad news. */
519 NLM_ERR("NLM: failed to contact remote rpcbind, "
520 "stat = %d, port = %d\n", (int) stat, port);
521 CLNT_DESTROY(rpcb);
522 return (NULL);
523 }
524
525 if (do_tcp) {
526 /*
527 * Destroy the UDP client we used to speak to rpcbind and
528 * recreate as a TCP client.
529 */
530 struct netconfig *nconf = NULL;
531
532 CLNT_DESTROY(rpcb);
533
534 switch (ss.ss_family) {
535 case AF_INET:
536 nconf = getnetconfigent("tcp");
537 break;
538 #ifdef INET6
539 case AF_INET6:
540 nconf = getnetconfigent("tcp6");
541 break;
542 #endif
543 }
544
545 rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss,
546 prog, vers, 0, 0);
547 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan);
548 rpcb->cl_auth = nlm_auth;
549
550 } else {
551 /*
552 * Re-use the client we used to speak to rpcbind.
553 */
554 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss);
555 CLNT_CONTROL(rpcb, CLSET_PROG, &prog);
556 CLNT_CONTROL(rpcb, CLSET_VERS, &vers);
557 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan);
558 rpcb->cl_auth = nlm_auth;
559 }
560
561 return (rpcb);
562 }
563
564 /*
565 * This async callback after when an async lock request has been
566 * granted. We notify the host which initiated the request.
567 */
568 static void
nlm_lock_callback(void * arg,int pending)569 nlm_lock_callback(void *arg, int pending)
570 {
571 struct nlm_async_lock *af = (struct nlm_async_lock *) arg;
572 struct rpc_callextra ext;
573
574 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) granted,"
575 " cookie %d:%d\n", af, af->af_host->nh_caller_name,
576 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie),
577 ng_cookie(&af->af_granted.cookie));
578
579 /*
580 * Send the results back to the host.
581 *
582 * Note: there is a possible race here with nlm_host_notify
583 * destroying the RPC client. To avoid problems, the first
584 * thing nlm_host_notify does is to cancel pending async lock
585 * requests.
586 */
587 memset(&ext, 0, sizeof(ext));
588 ext.rc_auth = nlm_auth;
589 if (af->af_host->nh_vers == NLM_VERS4) {
590 nlm4_granted_msg_4(&af->af_granted,
591 NULL, af->af_rpc, &ext, nlm_zero_tv);
592 } else {
593 /*
594 * Back-convert to legacy protocol
595 */
596 nlm_testargs granted;
597 granted.cookie = af->af_granted.cookie;
598 granted.exclusive = af->af_granted.exclusive;
599 granted.alock.caller_name =
600 af->af_granted.alock.caller_name;
601 granted.alock.fh = af->af_granted.alock.fh;
602 granted.alock.oh = af->af_granted.alock.oh;
603 granted.alock.svid = af->af_granted.alock.svid;
604 granted.alock.l_offset =
605 af->af_granted.alock.l_offset;
606 granted.alock.l_len =
607 af->af_granted.alock.l_len;
608
609 nlm_granted_msg_1(&granted,
610 NULL, af->af_rpc, &ext, nlm_zero_tv);
611 }
612
613 /*
614 * Move this entry to the nh_granted list.
615 */
616 af->af_expiretime = time_uptime + NLM_EXPIRE_TIMEOUT;
617 mtx_lock(&af->af_host->nh_lock);
618 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link);
619 TAILQ_INSERT_TAIL(&af->af_host->nh_granted, af, af_link);
620 mtx_unlock(&af->af_host->nh_lock);
621 }
622
623 /*
624 * Free an async lock request. The request must have been removed from
625 * any list.
626 */
627 static void
nlm_free_async_lock(struct nlm_async_lock * af)628 nlm_free_async_lock(struct nlm_async_lock *af)
629 {
630 /*
631 * Free an async lock.
632 */
633 if (af->af_rpc)
634 CLNT_RELEASE(af->af_rpc);
635 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted);
636 if (af->af_vp)
637 vrele(af->af_vp);
638 free(af, M_NLM);
639 }
640
641 /*
642 * Cancel our async request - this must be called with
643 * af->nh_host->nh_lock held. This is slightly complicated by a
644 * potential race with our own callback. If we fail to cancel the
645 * lock, it must already have been granted - we make sure our async
646 * task has completed by calling taskqueue_drain in this case.
647 */
648 static int
nlm_cancel_async_lock(struct nlm_async_lock * af)649 nlm_cancel_async_lock(struct nlm_async_lock *af)
650 {
651 struct nlm_host *host = af->af_host;
652 int error;
653
654 mtx_assert(&host->nh_lock, MA_OWNED);
655
656 mtx_unlock(&host->nh_lock);
657
658 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl,
659 F_REMOTE, NULL, &af->af_cookie);
660
661 if (error) {
662 /*
663 * We failed to cancel - make sure our callback has
664 * completed before we continue.
665 */
666 taskqueue_drain(taskqueue_thread, &af->af_task);
667 }
668
669 mtx_lock(&host->nh_lock);
670
671 if (!error) {
672 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) "
673 "cancelled\n", af, host->nh_caller_name, host->nh_sysid);
674
675 /*
676 * Remove from the nh_pending list and free now that
677 * we are safe from the callback.
678 */
679 TAILQ_REMOVE(&host->nh_pending, af, af_link);
680 mtx_unlock(&host->nh_lock);
681 nlm_free_async_lock(af);
682 mtx_lock(&host->nh_lock);
683 }
684
685 return (error);
686 }
687
688 static void
nlm_check_expired_locks(struct nlm_host * host)689 nlm_check_expired_locks(struct nlm_host *host)
690 {
691 struct nlm_async_lock *af;
692 time_t uptime = time_uptime;
693
694 mtx_lock(&host->nh_lock);
695 while ((af = TAILQ_FIRST(&host->nh_granted)) != NULL
696 && uptime >= af->af_expiretime) {
697 NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) expired,"
698 " cookie %d:%d\n", af, af->af_host->nh_caller_name,
699 af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie),
700 ng_cookie(&af->af_granted.cookie));
701 TAILQ_REMOVE(&host->nh_granted, af, af_link);
702 mtx_unlock(&host->nh_lock);
703 nlm_free_async_lock(af);
704 mtx_lock(&host->nh_lock);
705 }
706 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) {
707 TAILQ_REMOVE(&host->nh_finished, af, af_link);
708 mtx_unlock(&host->nh_lock);
709 nlm_free_async_lock(af);
710 mtx_lock(&host->nh_lock);
711 }
712 mtx_unlock(&host->nh_lock);
713 }
714
715 /*
716 * Free resources used by a host. This is called after the reference
717 * count has reached zero so it doesn't need to worry about locks.
718 */
719 static void
nlm_host_destroy(struct nlm_host * host)720 nlm_host_destroy(struct nlm_host *host)
721 {
722
723 mtx_lock(&nlm_global_lock);
724 TAILQ_REMOVE(&nlm_hosts, host, nh_link);
725 mtx_unlock(&nlm_global_lock);
726
727 if (host->nh_srvrpc.nr_client)
728 CLNT_RELEASE(host->nh_srvrpc.nr_client);
729 if (host->nh_clntrpc.nr_client)
730 CLNT_RELEASE(host->nh_clntrpc.nr_client);
731 mtx_destroy(&host->nh_lock);
732 sysctl_ctx_free(&host->nh_sysctl);
733 free(host, M_NLM);
734 }
735
736 /*
737 * Thread start callback for client lock recovery
738 */
739 static void
nlm_client_recovery_start(void * arg)740 nlm_client_recovery_start(void *arg)
741 {
742 struct nlm_host *host = (struct nlm_host *) arg;
743
744 NLM_DEBUG(1, "NLM: client lock recovery for %s started\n",
745 host->nh_caller_name);
746
747 nlm_client_recovery(host);
748
749 NLM_DEBUG(1, "NLM: client lock recovery for %s completed\n",
750 host->nh_caller_name);
751
752 host->nh_monstate = NLM_MONITORED;
753 nlm_host_release(host);
754
755 kthread_exit();
756 }
757
758 /*
759 * This is called when we receive a host state change notification. We
760 * unlock any active locks owned by the host. When rpc.lockd is
761 * shutting down, this function is called with newstate set to zero
762 * which allows us to cancel any pending async locks and clear the
763 * locking state.
764 */
765 static void
nlm_host_notify(struct nlm_host * host,int newstate)766 nlm_host_notify(struct nlm_host *host, int newstate)
767 {
768 struct nlm_async_lock *af;
769
770 if (newstate) {
771 NLM_DEBUG(1, "NLM: host %s (sysid %d) rebooted, new "
772 "state is %d\n", host->nh_caller_name,
773 host->nh_sysid, newstate);
774 }
775
776 /*
777 * Cancel any pending async locks for this host.
778 */
779 mtx_lock(&host->nh_lock);
780 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) {
781 /*
782 * nlm_cancel_async_lock will remove the entry from
783 * nh_pending and free it.
784 */
785 nlm_cancel_async_lock(af);
786 }
787 mtx_unlock(&host->nh_lock);
788 nlm_check_expired_locks(host);
789
790 /*
791 * The host just rebooted - trash its locks.
792 */
793 lf_clearremotesys(host->nh_sysid);
794 host->nh_state = newstate;
795
796 /*
797 * If we have any remote locks for this host (i.e. it
798 * represents a remote NFS server that our local NFS client
799 * has locks for), start a recovery thread.
800 */
801 if (newstate != 0
802 && host->nh_monstate != NLM_RECOVERING
803 && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) {
804 struct thread *td;
805 host->nh_monstate = NLM_RECOVERING;
806 refcount_acquire(&host->nh_refs);
807 kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0,
808 "NFS lock recovery for %s", host->nh_caller_name);
809 }
810 }
811
812 /*
813 * Sysctl handler to count the number of locks for a sysid.
814 */
815 static int
nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS)816 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS)
817 {
818 struct nlm_host *host;
819 int count;
820
821 host = oidp->oid_arg1;
822 count = lf_countlocks(host->nh_sysid);
823 return sysctl_handle_int(oidp, &count, 0, req);
824 }
825
826 /*
827 * Sysctl handler to count the number of client locks for a sysid.
828 */
829 static int
nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS)830 nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS)
831 {
832 struct nlm_host *host;
833 int count;
834
835 host = oidp->oid_arg1;
836 count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid);
837 return sysctl_handle_int(oidp, &count, 0, req);
838 }
839
840 /*
841 * Create a new NLM host.
842 */
843 static struct nlm_host *
nlm_create_host(const char * caller_name)844 nlm_create_host(const char* caller_name)
845 {
846 struct nlm_host *host;
847 struct sysctl_oid *oid;
848
849 mtx_assert(&nlm_global_lock, MA_OWNED);
850
851 NLM_DEBUG(1, "NLM: new host %s (sysid %d)\n",
852 caller_name, nlm_next_sysid);
853 host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO);
854 if (!host)
855 return (NULL);
856 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF);
857 host->nh_refs = 1;
858 strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN);
859 host->nh_sysid = nlm_next_sysid++;
860 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string),
861 "%d", host->nh_sysid);
862 host->nh_vers = 0;
863 host->nh_state = 0;
864 host->nh_monstate = NLM_UNMONITORED;
865 host->nh_grantcookie = 1;
866 TAILQ_INIT(&host->nh_pending);
867 TAILQ_INIT(&host->nh_granted);
868 TAILQ_INIT(&host->nh_finished);
869 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link);
870
871 mtx_unlock(&nlm_global_lock);
872
873 sysctl_ctx_init(&host->nh_sysctl);
874 oid = SYSCTL_ADD_NODE(&host->nh_sysctl,
875 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid),
876 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD, NULL, "");
877 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
878 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, "");
879 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
880 "version", CTLFLAG_RD, &host->nh_vers, 0, "");
881 SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
882 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, "");
883 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
884 "lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0,
885 nlm_host_lock_count_sysctl, "I", "");
886 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
887 "client_lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0,
888 nlm_host_client_lock_count_sysctl, "I", "");
889
890 mtx_lock(&nlm_global_lock);
891
892 return (host);
893 }
894
895 /*
896 * Acquire the next sysid for remote locks not handled by the NLM.
897 */
898 uint32_t
nlm_acquire_next_sysid(void)899 nlm_acquire_next_sysid(void)
900 {
901 uint32_t next_sysid;
902
903 mtx_lock(&nlm_global_lock);
904 next_sysid = nlm_next_sysid++;
905 mtx_unlock(&nlm_global_lock);
906 return (next_sysid);
907 }
908
909 /*
910 * Return non-zero if the address parts of the two sockaddrs are the
911 * same.
912 */
913 static int
nlm_compare_addr(const struct sockaddr * a,const struct sockaddr * b)914 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b)
915 {
916 const struct sockaddr_in *a4, *b4;
917 #ifdef INET6
918 const struct sockaddr_in6 *a6, *b6;
919 #endif
920
921 if (a->sa_family != b->sa_family)
922 return (FALSE);
923
924 switch (a->sa_family) {
925 case AF_INET:
926 a4 = (const struct sockaddr_in *) a;
927 b4 = (const struct sockaddr_in *) b;
928 return !memcmp(&a4->sin_addr, &b4->sin_addr,
929 sizeof(a4->sin_addr));
930 #ifdef INET6
931 case AF_INET6:
932 a6 = (const struct sockaddr_in6 *) a;
933 b6 = (const struct sockaddr_in6 *) b;
934 return !memcmp(&a6->sin6_addr, &b6->sin6_addr,
935 sizeof(a6->sin6_addr));
936 #endif
937 }
938
939 return (0);
940 }
941
942 /*
943 * Check for idle hosts and stop monitoring them. We could also free
944 * the host structure here, possibly after a larger timeout but that
945 * would require some care to avoid races with
946 * e.g. nlm_host_lock_count_sysctl.
947 */
948 static void
nlm_check_idle(void)949 nlm_check_idle(void)
950 {
951 struct nlm_host *host;
952
953 mtx_assert(&nlm_global_lock, MA_OWNED);
954
955 if (time_uptime <= nlm_next_idle_check)
956 return;
957
958 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD;
959
960 TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
961 if (host->nh_monstate == NLM_MONITORED
962 && time_uptime > host->nh_idle_timeout) {
963 mtx_unlock(&nlm_global_lock);
964 if (lf_countlocks(host->nh_sysid) > 0
965 || lf_countlocks(NLM_SYSID_CLIENT
966 + host->nh_sysid)) {
967 host->nh_idle_timeout =
968 time_uptime + NLM_IDLE_TIMEOUT;
969 mtx_lock(&nlm_global_lock);
970 continue;
971 }
972 nlm_host_unmonitor(host);
973 mtx_lock(&nlm_global_lock);
974 }
975 }
976 }
977
978 /*
979 * Search for an existing NLM host that matches the given name
980 * (typically the caller_name element of an nlm4_lock). If none is
981 * found, create a new host. If 'addr' is non-NULL, record the remote
982 * address of the host so that we can call it back for async
983 * responses. If 'vers' is greater than zero then record the NLM
984 * program version to use to communicate with this client.
985 */
986 struct nlm_host *
nlm_find_host_by_name(const char * name,const struct sockaddr * addr,rpcvers_t vers)987 nlm_find_host_by_name(const char *name, const struct sockaddr *addr,
988 rpcvers_t vers)
989 {
990 struct nlm_host *host;
991
992 mtx_lock(&nlm_global_lock);
993
994 /*
995 * The remote host is determined by caller_name.
996 */
997 TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
998 if (!strcmp(host->nh_caller_name, name))
999 break;
1000 }
1001
1002 if (!host) {
1003 host = nlm_create_host(name);
1004 if (!host) {
1005 mtx_unlock(&nlm_global_lock);
1006 return (NULL);
1007 }
1008 }
1009 refcount_acquire(&host->nh_refs);
1010
1011 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT;
1012
1013 /*
1014 * If we have an address for the host, record it so that we
1015 * can send async replies etc.
1016 */
1017 if (addr) {
1018
1019 KASSERT(addr->sa_len < sizeof(struct sockaddr_storage),
1020 ("Strange remote transport address length"));
1021
1022 /*
1023 * If we have seen an address before and we currently
1024 * have an RPC client handle, make sure the address is
1025 * the same, otherwise discard the client handle.
1026 */
1027 if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) {
1028 if (!nlm_compare_addr(
1029 (struct sockaddr *) &host->nh_addr,
1030 addr)
1031 || host->nh_vers != vers) {
1032 CLIENT *client;
1033 mtx_lock(&host->nh_lock);
1034 client = host->nh_srvrpc.nr_client;
1035 host->nh_srvrpc.nr_client = NULL;
1036 mtx_unlock(&host->nh_lock);
1037 if (client) {
1038 CLNT_RELEASE(client);
1039 }
1040 }
1041 }
1042 memcpy(&host->nh_addr, addr, addr->sa_len);
1043 host->nh_vers = vers;
1044 }
1045
1046 nlm_check_idle();
1047
1048 mtx_unlock(&nlm_global_lock);
1049
1050 return (host);
1051 }
1052
1053 /*
1054 * Search for an existing NLM host that matches the given remote
1055 * address. If none is found, create a new host with the requested
1056 * address and remember 'vers' as the NLM protocol version to use for
1057 * that host.
1058 */
1059 struct nlm_host *
nlm_find_host_by_addr(const struct sockaddr * addr,int vers)1060 nlm_find_host_by_addr(const struct sockaddr *addr, int vers)
1061 {
1062 /*
1063 * Fake up a name using inet_ntop. This buffer is
1064 * large enough for an IPv6 address.
1065 */
1066 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"];
1067 struct nlm_host *host;
1068
1069 switch (addr->sa_family) {
1070 case AF_INET:
1071 inet_ntop(AF_INET,
1072 &((const struct sockaddr_in *) addr)->sin_addr,
1073 tmp, sizeof tmp);
1074 break;
1075 #ifdef INET6
1076 case AF_INET6:
1077 inet_ntop(AF_INET6,
1078 &((const struct sockaddr_in6 *) addr)->sin6_addr,
1079 tmp, sizeof tmp);
1080 break;
1081 #endif
1082 default:
1083 strlcpy(tmp, "<unknown>", sizeof(tmp));
1084 }
1085
1086
1087 mtx_lock(&nlm_global_lock);
1088
1089 /*
1090 * The remote host is determined by caller_name.
1091 */
1092 TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
1093 if (nlm_compare_addr(addr,
1094 (const struct sockaddr *) &host->nh_addr))
1095 break;
1096 }
1097
1098 if (!host) {
1099 host = nlm_create_host(tmp);
1100 if (!host) {
1101 mtx_unlock(&nlm_global_lock);
1102 return (NULL);
1103 }
1104 memcpy(&host->nh_addr, addr, addr->sa_len);
1105 host->nh_vers = vers;
1106 }
1107 refcount_acquire(&host->nh_refs);
1108
1109 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT;
1110
1111 nlm_check_idle();
1112
1113 mtx_unlock(&nlm_global_lock);
1114
1115 return (host);
1116 }
1117
1118 /*
1119 * Find the NLM host that matches the value of 'sysid'. If none
1120 * exists, return NULL.
1121 */
1122 static struct nlm_host *
nlm_find_host_by_sysid(int sysid)1123 nlm_find_host_by_sysid(int sysid)
1124 {
1125 struct nlm_host *host;
1126
1127 TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
1128 if (host->nh_sysid == sysid) {
1129 refcount_acquire(&host->nh_refs);
1130 return (host);
1131 }
1132 }
1133
1134 return (NULL);
1135 }
1136
nlm_host_release(struct nlm_host * host)1137 void nlm_host_release(struct nlm_host *host)
1138 {
1139 if (refcount_release(&host->nh_refs)) {
1140 /*
1141 * Free the host
1142 */
1143 nlm_host_destroy(host);
1144 }
1145 }
1146
1147 /*
1148 * Unregister this NLM host with the local NSM due to idleness.
1149 */
1150 static void
nlm_host_unmonitor(struct nlm_host * host)1151 nlm_host_unmonitor(struct nlm_host *host)
1152 {
1153 mon_id smmonid;
1154 sm_stat_res smstat;
1155 struct timeval timo;
1156 enum clnt_stat stat;
1157
1158 NLM_DEBUG(1, "NLM: unmonitoring %s (sysid %d)\n",
1159 host->nh_caller_name, host->nh_sysid);
1160
1161 /*
1162 * We put our assigned system ID value in the priv field to
1163 * make it simpler to find the host if we are notified of a
1164 * host restart.
1165 */
1166 smmonid.mon_name = host->nh_caller_name;
1167 smmonid.my_id.my_name = "localhost";
1168 smmonid.my_id.my_prog = NLM_PROG;
1169 smmonid.my_id.my_vers = NLM_SM;
1170 smmonid.my_id.my_proc = NLM_SM_NOTIFY;
1171
1172 timo.tv_sec = 25;
1173 timo.tv_usec = 0;
1174 stat = CLNT_CALL(nlm_nsm, SM_UNMON,
1175 (xdrproc_t) xdr_mon, &smmonid,
1176 (xdrproc_t) xdr_sm_stat, &smstat, timo);
1177
1178 if (stat != RPC_SUCCESS) {
1179 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat);
1180 return;
1181 }
1182 if (smstat.res_stat == stat_fail) {
1183 NLM_ERR("Local NSM refuses to unmonitor %s\n",
1184 host->nh_caller_name);
1185 return;
1186 }
1187
1188 host->nh_monstate = NLM_UNMONITORED;
1189 }
1190
1191 /*
1192 * Register this NLM host with the local NSM so that we can be
1193 * notified if it reboots.
1194 */
1195 void
nlm_host_monitor(struct nlm_host * host,int state)1196 nlm_host_monitor(struct nlm_host *host, int state)
1197 {
1198 mon smmon;
1199 sm_stat_res smstat;
1200 struct timeval timo;
1201 enum clnt_stat stat;
1202
1203 if (state && !host->nh_state) {
1204 /*
1205 * This is the first time we have seen an NSM state
1206 * value for this host. We record it here to help
1207 * detect host reboots.
1208 */
1209 host->nh_state = state;
1210 NLM_DEBUG(1, "NLM: host %s (sysid %d) has NSM state %d\n",
1211 host->nh_caller_name, host->nh_sysid, state);
1212 }
1213
1214 mtx_lock(&host->nh_lock);
1215 if (host->nh_monstate != NLM_UNMONITORED) {
1216 mtx_unlock(&host->nh_lock);
1217 return;
1218 }
1219 host->nh_monstate = NLM_MONITORED;
1220 mtx_unlock(&host->nh_lock);
1221
1222 NLM_DEBUG(1, "NLM: monitoring %s (sysid %d)\n",
1223 host->nh_caller_name, host->nh_sysid);
1224
1225 /*
1226 * We put our assigned system ID value in the priv field to
1227 * make it simpler to find the host if we are notified of a
1228 * host restart.
1229 */
1230 smmon.mon_id.mon_name = host->nh_caller_name;
1231 smmon.mon_id.my_id.my_name = "localhost";
1232 smmon.mon_id.my_id.my_prog = NLM_PROG;
1233 smmon.mon_id.my_id.my_vers = NLM_SM;
1234 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY;
1235 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid));
1236
1237 timo.tv_sec = 25;
1238 timo.tv_usec = 0;
1239 stat = CLNT_CALL(nlm_nsm, SM_MON,
1240 (xdrproc_t) xdr_mon, &smmon,
1241 (xdrproc_t) xdr_sm_stat, &smstat, timo);
1242
1243 if (stat != RPC_SUCCESS) {
1244 NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat);
1245 return;
1246 }
1247 if (smstat.res_stat == stat_fail) {
1248 NLM_ERR("Local NSM refuses to monitor %s\n",
1249 host->nh_caller_name);
1250 mtx_lock(&host->nh_lock);
1251 host->nh_monstate = NLM_MONITOR_FAILED;
1252 mtx_unlock(&host->nh_lock);
1253 return;
1254 }
1255
1256 host->nh_monstate = NLM_MONITORED;
1257 }
1258
1259 /*
1260 * Return an RPC client handle that can be used to talk to the NLM
1261 * running on the given host.
1262 */
1263 CLIENT *
nlm_host_get_rpc(struct nlm_host * host,bool_t isserver)1264 nlm_host_get_rpc(struct nlm_host *host, bool_t isserver)
1265 {
1266 struct nlm_rpc *rpc;
1267 CLIENT *client;
1268
1269 mtx_lock(&host->nh_lock);
1270
1271 if (isserver)
1272 rpc = &host->nh_srvrpc;
1273 else
1274 rpc = &host->nh_clntrpc;
1275
1276 /*
1277 * We can't hold onto RPC handles for too long - the async
1278 * call/reply protocol used by some NLM clients makes it hard
1279 * to tell when they change port numbers (e.g. after a
1280 * reboot). Note that if a client reboots while it isn't
1281 * holding any locks, it won't bother to notify us. We
1282 * expire the RPC handles after two minutes.
1283 */
1284 if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) {
1285 client = rpc->nr_client;
1286 rpc->nr_client = NULL;
1287 mtx_unlock(&host->nh_lock);
1288 CLNT_RELEASE(client);
1289 mtx_lock(&host->nh_lock);
1290 }
1291
1292 if (!rpc->nr_client) {
1293 mtx_unlock(&host->nh_lock);
1294 client = nlm_get_rpc((struct sockaddr *)&host->nh_addr,
1295 NLM_PROG, host->nh_vers);
1296 mtx_lock(&host->nh_lock);
1297
1298 if (client) {
1299 if (rpc->nr_client) {
1300 mtx_unlock(&host->nh_lock);
1301 CLNT_DESTROY(client);
1302 mtx_lock(&host->nh_lock);
1303 } else {
1304 rpc->nr_client = client;
1305 rpc->nr_create_time = time_uptime;
1306 }
1307 }
1308 }
1309
1310 client = rpc->nr_client;
1311 if (client)
1312 CLNT_ACQUIRE(client);
1313 mtx_unlock(&host->nh_lock);
1314
1315 return (client);
1316
1317 }
1318
nlm_host_get_sysid(struct nlm_host * host)1319 int nlm_host_get_sysid(struct nlm_host *host)
1320 {
1321
1322 return (host->nh_sysid);
1323 }
1324
1325 int
nlm_host_get_state(struct nlm_host * host)1326 nlm_host_get_state(struct nlm_host *host)
1327 {
1328
1329 return (host->nh_state);
1330 }
1331
1332 void *
nlm_register_wait_lock(struct nlm4_lock * lock,struct vnode * vp)1333 nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp)
1334 {
1335 struct nlm_waiting_lock *nw;
1336
1337 nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK);
1338 nw->nw_lock = *lock;
1339 memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes,
1340 nw->nw_lock.fh.n_len);
1341 nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes;
1342 nw->nw_waiting = TRUE;
1343 nw->nw_vp = vp;
1344 mtx_lock(&nlm_global_lock);
1345 TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link);
1346 mtx_unlock(&nlm_global_lock);
1347
1348 return nw;
1349 }
1350
1351 void
nlm_deregister_wait_lock(void * handle)1352 nlm_deregister_wait_lock(void *handle)
1353 {
1354 struct nlm_waiting_lock *nw = handle;
1355
1356 mtx_lock(&nlm_global_lock);
1357 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link);
1358 mtx_unlock(&nlm_global_lock);
1359
1360 free(nw, M_NLM);
1361 }
1362
1363 int
nlm_wait_lock(void * handle,int timo)1364 nlm_wait_lock(void *handle, int timo)
1365 {
1366 struct nlm_waiting_lock *nw = handle;
1367 int error, stops_deferred;
1368
1369 /*
1370 * If the granted message arrived before we got here,
1371 * nw->nw_waiting will be FALSE - in that case, don't sleep.
1372 */
1373 mtx_lock(&nlm_global_lock);
1374 error = 0;
1375 if (nw->nw_waiting) {
1376 stops_deferred = sigdeferstop(SIGDEFERSTOP_ERESTART);
1377 error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo);
1378 sigallowstop(stops_deferred);
1379 }
1380 TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link);
1381 if (error) {
1382 /*
1383 * The granted message may arrive after the
1384 * interrupt/timeout but before we manage to lock the
1385 * mutex. Detect this by examining nw_lock.
1386 */
1387 if (!nw->nw_waiting)
1388 error = 0;
1389 } else {
1390 /*
1391 * If nlm_cancel_wait is called, then error will be
1392 * zero but nw_waiting will still be TRUE. We
1393 * translate this into EINTR.
1394 */
1395 if (nw->nw_waiting)
1396 error = EINTR;
1397 }
1398 mtx_unlock(&nlm_global_lock);
1399
1400 free(nw, M_NLM);
1401
1402 return (error);
1403 }
1404
1405 void
nlm_cancel_wait(struct vnode * vp)1406 nlm_cancel_wait(struct vnode *vp)
1407 {
1408 struct nlm_waiting_lock *nw;
1409
1410 mtx_lock(&nlm_global_lock);
1411 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
1412 if (nw->nw_vp == vp) {
1413 wakeup(nw);
1414 }
1415 }
1416 mtx_unlock(&nlm_global_lock);
1417 }
1418
1419
1420 /**********************************************************************/
1421
1422 /*
1423 * Syscall interface with userland.
1424 */
1425
1426 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp);
1427 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp);
1428 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp);
1429 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp);
1430
1431 static int
nlm_register_services(SVCPOOL * pool,int addr_count,char ** addrs)1432 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs)
1433 {
1434 static rpcvers_t versions[] = {
1435 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4
1436 };
1437 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = {
1438 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4
1439 };
1440
1441 SVCXPRT **xprts;
1442 char netid[16];
1443 char uaddr[128];
1444 struct netconfig *nconf;
1445 int i, j, error;
1446
1447 if (!addr_count) {
1448 NLM_ERR("NLM: no service addresses given - can't start server");
1449 return (EINVAL);
1450 }
1451
1452 if (addr_count < 0 || addr_count > 256 ) {
1453 NLM_ERR("NLM: too many service addresses (%d) given, "
1454 "max 256 - can't start server\n", addr_count);
1455 return (EINVAL);
1456 }
1457
1458 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK|M_ZERO);
1459 for (i = 0; i < nitems(versions); i++) {
1460 for (j = 0; j < addr_count; j++) {
1461 /*
1462 * Create transports for the first version and
1463 * then just register everything else to the
1464 * same transports.
1465 */
1466 if (i == 0) {
1467 char *up;
1468
1469 error = copyin(&addrs[2*j], &up,
1470 sizeof(char*));
1471 if (error)
1472 goto out;
1473 error = copyinstr(up, netid, sizeof(netid),
1474 NULL);
1475 if (error)
1476 goto out;
1477 error = copyin(&addrs[2*j+1], &up,
1478 sizeof(char*));
1479 if (error)
1480 goto out;
1481 error = copyinstr(up, uaddr, sizeof(uaddr),
1482 NULL);
1483 if (error)
1484 goto out;
1485 nconf = getnetconfigent(netid);
1486 if (!nconf) {
1487 NLM_ERR("Can't lookup netid %s\n",
1488 netid);
1489 error = EINVAL;
1490 goto out;
1491 }
1492 xprts[j] = svc_tp_create(pool, dispatchers[i],
1493 NLM_PROG, versions[i], uaddr, nconf);
1494 if (!xprts[j]) {
1495 NLM_ERR("NLM: unable to create "
1496 "(NLM_PROG, %d).\n", versions[i]);
1497 error = EINVAL;
1498 goto out;
1499 }
1500 freenetconfigent(nconf);
1501 } else {
1502 nconf = getnetconfigent(xprts[j]->xp_netid);
1503 rpcb_unset(NLM_PROG, versions[i], nconf);
1504 if (!svc_reg(xprts[j], NLM_PROG, versions[i],
1505 dispatchers[i], nconf)) {
1506 NLM_ERR("NLM: can't register "
1507 "(NLM_PROG, %d)\n", versions[i]);
1508 error = EINVAL;
1509 goto out;
1510 }
1511 }
1512 }
1513 }
1514 error = 0;
1515 out:
1516 for (j = 0; j < addr_count; j++) {
1517 if (xprts[j])
1518 SVC_RELEASE(xprts[j]);
1519 }
1520 free(xprts, M_NLM);
1521 return (error);
1522 }
1523
1524 /*
1525 * Main server entry point. Contacts the local NSM to get its current
1526 * state and send SM_UNMON_ALL. Registers the NLM services and then
1527 * services requests. Does not return until the server is interrupted
1528 * by a signal.
1529 */
1530 static int
nlm_server_main(int addr_count,char ** addrs)1531 nlm_server_main(int addr_count, char **addrs)
1532 {
1533 struct thread *td = curthread;
1534 int error;
1535 SVCPOOL *pool = NULL;
1536 struct sockopt opt;
1537 int portlow;
1538 #ifdef INET6
1539 struct sockaddr_in6 sin6;
1540 #endif
1541 struct sockaddr_in sin;
1542 my_id id;
1543 sm_stat smstat;
1544 struct timeval timo;
1545 enum clnt_stat stat;
1546 struct nlm_host *host, *nhost;
1547 struct nlm_waiting_lock *nw;
1548 vop_advlock_t *old_nfs_advlock;
1549 vop_reclaim_t *old_nfs_reclaim;
1550
1551 if (nlm_is_running != 0) {
1552 NLM_ERR("NLM: can't start server - "
1553 "it appears to be running already\n");
1554 return (EPERM);
1555 }
1556
1557 if (nlm_socket == NULL) {
1558 memset(&opt, 0, sizeof(opt));
1559
1560 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0,
1561 td->td_ucred, td);
1562 if (error) {
1563 NLM_ERR("NLM: can't create IPv4 socket - error %d\n",
1564 error);
1565 return (error);
1566 }
1567 opt.sopt_dir = SOPT_SET;
1568 opt.sopt_level = IPPROTO_IP;
1569 opt.sopt_name = IP_PORTRANGE;
1570 portlow = IP_PORTRANGE_LOW;
1571 opt.sopt_val = &portlow;
1572 opt.sopt_valsize = sizeof(portlow);
1573 sosetopt(nlm_socket, &opt);
1574
1575 #ifdef INET6
1576 nlm_socket6 = NULL;
1577 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0,
1578 td->td_ucred, td);
1579 if (error) {
1580 NLM_ERR("NLM: can't create IPv6 socket - error %d\n",
1581 error);
1582 soclose(nlm_socket);
1583 nlm_socket = NULL;
1584 return (error);
1585 }
1586 opt.sopt_dir = SOPT_SET;
1587 opt.sopt_level = IPPROTO_IPV6;
1588 opt.sopt_name = IPV6_PORTRANGE;
1589 portlow = IPV6_PORTRANGE_LOW;
1590 opt.sopt_val = &portlow;
1591 opt.sopt_valsize = sizeof(portlow);
1592 sosetopt(nlm_socket6, &opt);
1593 #endif
1594 }
1595
1596 nlm_auth = authunix_create(curthread->td_ucred);
1597
1598 #ifdef INET6
1599 memset(&sin6, 0, sizeof(sin6));
1600 sin6.sin6_len = sizeof(sin6);
1601 sin6.sin6_family = AF_INET6;
1602 sin6.sin6_addr = in6addr_loopback;
1603 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS);
1604 if (!nlm_nsm) {
1605 #endif
1606 memset(&sin, 0, sizeof(sin));
1607 sin.sin_len = sizeof(sin);
1608 sin.sin_family = AF_INET;
1609 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1610 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG,
1611 SM_VERS);
1612 #ifdef INET6
1613 }
1614 #endif
1615
1616 if (!nlm_nsm) {
1617 NLM_ERR("Can't start NLM - unable to contact NSM\n");
1618 error = EINVAL;
1619 goto out;
1620 }
1621
1622 pool = svcpool_create("NLM", NULL);
1623
1624 error = nlm_register_services(pool, addr_count, addrs);
1625 if (error)
1626 goto out;
1627
1628 memset(&id, 0, sizeof(id));
1629 id.my_name = "NFS NLM";
1630
1631 timo.tv_sec = 25;
1632 timo.tv_usec = 0;
1633 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL,
1634 (xdrproc_t) xdr_my_id, &id,
1635 (xdrproc_t) xdr_sm_stat, &smstat, timo);
1636
1637 if (stat != RPC_SUCCESS) {
1638 struct rpc_err err;
1639
1640 CLNT_GETERR(nlm_nsm, &err);
1641 NLM_ERR("NLM: unexpected error contacting NSM, "
1642 "stat=%d, errno=%d\n", stat, err.re_errno);
1643 error = EINVAL;
1644 goto out;
1645 }
1646 nlm_is_running = 1;
1647
1648 NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state);
1649 nlm_nsm_state = smstat.state;
1650
1651 old_nfs_advlock = nfs_advlock_p;
1652 nfs_advlock_p = nlm_advlock;
1653 old_nfs_reclaim = nfs_reclaim_p;
1654 nfs_reclaim_p = nlm_reclaim;
1655
1656 svc_run(pool);
1657 error = 0;
1658
1659 nfs_advlock_p = old_nfs_advlock;
1660 nfs_reclaim_p = old_nfs_reclaim;
1661
1662 out:
1663 nlm_is_running = 0;
1664 if (pool)
1665 svcpool_destroy(pool);
1666
1667 /*
1668 * We are finished communicating with the NSM.
1669 */
1670 if (nlm_nsm) {
1671 CLNT_RELEASE(nlm_nsm);
1672 nlm_nsm = NULL;
1673 }
1674
1675 /*
1676 * Trash all the existing state so that if the server
1677 * restarts, it gets a clean slate. This is complicated by the
1678 * possibility that there may be other threads trying to make
1679 * client locking requests.
1680 *
1681 * First we fake a client reboot notification which will
1682 * cancel any pending async locks and purge remote lock state
1683 * from the local lock manager. We release the reference from
1684 * nlm_hosts to the host (which may remove it from the list
1685 * and free it). After this phase, the only entries in the
1686 * nlm_host list should be from other threads performing
1687 * client lock requests.
1688 */
1689 mtx_lock(&nlm_global_lock);
1690 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
1691 wakeup(nw);
1692 }
1693 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) {
1694 mtx_unlock(&nlm_global_lock);
1695 nlm_host_notify(host, 0);
1696 nlm_host_release(host);
1697 mtx_lock(&nlm_global_lock);
1698 }
1699 mtx_unlock(&nlm_global_lock);
1700
1701 AUTH_DESTROY(nlm_auth);
1702
1703 return (error);
1704 }
1705
1706 int
sys_nlm_syscall(struct thread * td,struct nlm_syscall_args * uap)1707 sys_nlm_syscall(struct thread *td, struct nlm_syscall_args *uap)
1708 {
1709 int error;
1710
1711 #if __FreeBSD_version >= 700000
1712 error = priv_check(td, PRIV_NFS_LOCKD);
1713 #else
1714 error = suser(td);
1715 #endif
1716 if (error)
1717 return (error);
1718
1719 nlm_debug_level = uap->debug_level;
1720 nlm_grace_threshold = time_uptime + uap->grace_period;
1721 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD;
1722
1723 return nlm_server_main(uap->addr_count, uap->addrs);
1724 }
1725
1726 /**********************************************************************/
1727
1728 /*
1729 * NLM implementation details, called from the RPC stubs.
1730 */
1731
1732
1733 void
nlm_sm_notify(struct nlm_sm_status * argp)1734 nlm_sm_notify(struct nlm_sm_status *argp)
1735 {
1736 uint32_t sysid;
1737 struct nlm_host *host;
1738
1739 NLM_DEBUG(3, "nlm_sm_notify(): mon_name = %s\n", argp->mon_name);
1740 memcpy(&sysid, &argp->priv, sizeof(sysid));
1741 host = nlm_find_host_by_sysid(sysid);
1742 if (host) {
1743 nlm_host_notify(host, argp->state);
1744 nlm_host_release(host);
1745 }
1746 }
1747
1748 static void
nlm_convert_to_fhandle_t(fhandle_t * fhp,struct netobj * p)1749 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p)
1750 {
1751 memcpy(fhp, p->n_bytes, sizeof(fhandle_t));
1752 }
1753
1754 struct vfs_state {
1755 struct mount *vs_mp;
1756 struct vnode *vs_vp;
1757 int vs_vnlocked;
1758 };
1759
1760 static int
nlm_get_vfs_state(struct nlm_host * host,struct svc_req * rqstp,fhandle_t * fhp,struct vfs_state * vs,accmode_t accmode)1761 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp,
1762 fhandle_t *fhp, struct vfs_state *vs, accmode_t accmode)
1763 {
1764 int error, exflags;
1765 struct ucred *cred = NULL, *credanon = NULL;
1766
1767 memset(vs, 0, sizeof(*vs));
1768
1769 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid);
1770 if (!vs->vs_mp) {
1771 return (ESTALE);
1772 }
1773
1774 /* accmode == 0 means don't check, since it is an unlock. */
1775 if (accmode != 0) {
1776 error = VFS_CHECKEXP(vs->vs_mp,
1777 (struct sockaddr *)&host->nh_addr, &exflags, &credanon,
1778 NULL, NULL);
1779 if (error)
1780 goto out;
1781
1782 if (exflags & MNT_EXRDONLY ||
1783 (vs->vs_mp->mnt_flag & MNT_RDONLY)) {
1784 error = EROFS;
1785 goto out;
1786 }
1787 }
1788
1789 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, LK_EXCLUSIVE, &vs->vs_vp);
1790 if (error)
1791 goto out;
1792 vs->vs_vnlocked = TRUE;
1793
1794 if (accmode != 0) {
1795 if (!svc_getcred(rqstp, &cred, NULL)) {
1796 error = EINVAL;
1797 goto out;
1798 }
1799 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
1800 crfree(cred);
1801 cred = credanon;
1802 credanon = NULL;
1803 }
1804
1805 /*
1806 * Check cred.
1807 */
1808 error = VOP_ACCESS(vs->vs_vp, accmode, cred, curthread);
1809 /*
1810 * If this failed and accmode != VWRITE, try again with
1811 * VWRITE to maintain backwards compatibility with the
1812 * old code that always used VWRITE.
1813 */
1814 if (error != 0 && accmode != VWRITE)
1815 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread);
1816 if (error)
1817 goto out;
1818 }
1819
1820 #if __FreeBSD_version < 800011
1821 VOP_UNLOCK(vs->vs_vp, 0, curthread);
1822 #else
1823 VOP_UNLOCK(vs->vs_vp, 0);
1824 #endif
1825 vs->vs_vnlocked = FALSE;
1826
1827 out:
1828 if (cred)
1829 crfree(cred);
1830 if (credanon)
1831 crfree(credanon);
1832
1833 return (error);
1834 }
1835
1836 static void
nlm_release_vfs_state(struct vfs_state * vs)1837 nlm_release_vfs_state(struct vfs_state *vs)
1838 {
1839
1840 if (vs->vs_vp) {
1841 if (vs->vs_vnlocked)
1842 vput(vs->vs_vp);
1843 else
1844 vrele(vs->vs_vp);
1845 }
1846 if (vs->vs_mp)
1847 vfs_rel(vs->vs_mp);
1848 }
1849
1850 static nlm4_stats
nlm_convert_error(int error)1851 nlm_convert_error(int error)
1852 {
1853
1854 if (error == ESTALE)
1855 return nlm4_stale_fh;
1856 else if (error == EROFS)
1857 return nlm4_rofs;
1858 else
1859 return nlm4_failed;
1860 }
1861
1862 int
nlm_do_test(nlm4_testargs * argp,nlm4_testres * result,struct svc_req * rqstp,CLIENT ** rpcp)1863 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp,
1864 CLIENT **rpcp)
1865 {
1866 fhandle_t fh;
1867 struct vfs_state vs;
1868 struct nlm_host *host, *bhost;
1869 int error, sysid;
1870 struct flock fl;
1871 accmode_t accmode;
1872
1873 memset(result, 0, sizeof(*result));
1874 memset(&vs, 0, sizeof(vs));
1875
1876 host = nlm_find_host_by_name(argp->alock.caller_name,
1877 svc_getrpccaller(rqstp), rqstp->rq_vers);
1878 if (!host) {
1879 result->stat.stat = nlm4_denied_nolocks;
1880 return (ENOMEM);
1881 }
1882
1883 NLM_DEBUG(3, "nlm_do_test(): caller_name = %s (sysid = %d)\n",
1884 host->nh_caller_name, host->nh_sysid);
1885
1886 nlm_check_expired_locks(host);
1887 sysid = host->nh_sysid;
1888
1889 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
1890 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
1891
1892 if (time_uptime < nlm_grace_threshold) {
1893 result->stat.stat = nlm4_denied_grace_period;
1894 goto out;
1895 }
1896
1897 accmode = argp->exclusive ? VWRITE : VREAD;
1898 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode);
1899 if (error) {
1900 result->stat.stat = nlm_convert_error(error);
1901 goto out;
1902 }
1903
1904 fl.l_start = argp->alock.l_offset;
1905 fl.l_len = argp->alock.l_len;
1906 fl.l_pid = argp->alock.svid;
1907 fl.l_sysid = sysid;
1908 fl.l_whence = SEEK_SET;
1909 if (argp->exclusive)
1910 fl.l_type = F_WRLCK;
1911 else
1912 fl.l_type = F_RDLCK;
1913 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE);
1914 if (error) {
1915 result->stat.stat = nlm4_failed;
1916 goto out;
1917 }
1918
1919 if (fl.l_type == F_UNLCK) {
1920 result->stat.stat = nlm4_granted;
1921 } else {
1922 result->stat.stat = nlm4_denied;
1923 result->stat.nlm4_testrply_u.holder.exclusive =
1924 (fl.l_type == F_WRLCK);
1925 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid;
1926 bhost = nlm_find_host_by_sysid(fl.l_sysid);
1927 if (bhost) {
1928 /*
1929 * We don't have any useful way of recording
1930 * the value of oh used in the original lock
1931 * request. Ideally, the test reply would have
1932 * a space for the owning host's name allowing
1933 * our caller's NLM to keep track.
1934 *
1935 * As far as I can see, Solaris uses an eight
1936 * byte structure for oh which contains a four
1937 * byte pid encoded in local byte order and
1938 * the first four bytes of the host
1939 * name. Linux uses a variable length string
1940 * 'pid@hostname' in ascii but doesn't even
1941 * return that in test replies.
1942 *
1943 * For the moment, return nothing in oh
1944 * (already zero'ed above).
1945 */
1946 nlm_host_release(bhost);
1947 }
1948 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start;
1949 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len;
1950 }
1951
1952 out:
1953 nlm_release_vfs_state(&vs);
1954 if (rpcp)
1955 *rpcp = nlm_host_get_rpc(host, TRUE);
1956 nlm_host_release(host);
1957 return (0);
1958 }
1959
1960 int
nlm_do_lock(nlm4_lockargs * argp,nlm4_res * result,struct svc_req * rqstp,bool_t monitor,CLIENT ** rpcp)1961 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp,
1962 bool_t monitor, CLIENT **rpcp)
1963 {
1964 fhandle_t fh;
1965 struct vfs_state vs;
1966 struct nlm_host *host;
1967 int error, sysid;
1968 struct flock fl;
1969 accmode_t accmode;
1970
1971 memset(result, 0, sizeof(*result));
1972 memset(&vs, 0, sizeof(vs));
1973
1974 host = nlm_find_host_by_name(argp->alock.caller_name,
1975 svc_getrpccaller(rqstp), rqstp->rq_vers);
1976 if (!host) {
1977 result->stat.stat = nlm4_denied_nolocks;
1978 return (ENOMEM);
1979 }
1980
1981 NLM_DEBUG(3, "nlm_do_lock(): caller_name = %s (sysid = %d)\n",
1982 host->nh_caller_name, host->nh_sysid);
1983
1984 if (monitor && host->nh_state && argp->state
1985 && host->nh_state != argp->state) {
1986 /*
1987 * The host rebooted without telling us. Trash its
1988 * locks.
1989 */
1990 nlm_host_notify(host, argp->state);
1991 }
1992
1993 nlm_check_expired_locks(host);
1994 sysid = host->nh_sysid;
1995
1996 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
1997 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
1998
1999 if (time_uptime < nlm_grace_threshold && !argp->reclaim) {
2000 result->stat.stat = nlm4_denied_grace_period;
2001 goto out;
2002 }
2003
2004 accmode = argp->exclusive ? VWRITE : VREAD;
2005 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode);
2006 if (error) {
2007 result->stat.stat = nlm_convert_error(error);
2008 goto out;
2009 }
2010
2011 fl.l_start = argp->alock.l_offset;
2012 fl.l_len = argp->alock.l_len;
2013 fl.l_pid = argp->alock.svid;
2014 fl.l_sysid = sysid;
2015 fl.l_whence = SEEK_SET;
2016 if (argp->exclusive)
2017 fl.l_type = F_WRLCK;
2018 else
2019 fl.l_type = F_RDLCK;
2020 if (argp->block) {
2021 struct nlm_async_lock *af;
2022 CLIENT *client;
2023 struct nlm_grantcookie cookie;
2024
2025 /*
2026 * First, make sure we can contact the host's NLM.
2027 */
2028 client = nlm_host_get_rpc(host, TRUE);
2029 if (!client) {
2030 result->stat.stat = nlm4_failed;
2031 goto out;
2032 }
2033
2034 /*
2035 * First we need to check and see if there is an
2036 * existing blocked lock that matches. This could be a
2037 * badly behaved client or an RPC re-send. If we find
2038 * one, just return nlm4_blocked.
2039 */
2040 mtx_lock(&host->nh_lock);
2041 TAILQ_FOREACH(af, &host->nh_pending, af_link) {
2042 if (af->af_fl.l_start == fl.l_start
2043 && af->af_fl.l_len == fl.l_len
2044 && af->af_fl.l_pid == fl.l_pid
2045 && af->af_fl.l_type == fl.l_type) {
2046 break;
2047 }
2048 }
2049 if (!af) {
2050 cookie.ng_sysid = host->nh_sysid;
2051 cookie.ng_cookie = host->nh_grantcookie++;
2052 }
2053 mtx_unlock(&host->nh_lock);
2054 if (af) {
2055 CLNT_RELEASE(client);
2056 result->stat.stat = nlm4_blocked;
2057 goto out;
2058 }
2059
2060 af = malloc(sizeof(struct nlm_async_lock), M_NLM,
2061 M_WAITOK|M_ZERO);
2062 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af);
2063 af->af_vp = vs.vs_vp;
2064 af->af_fl = fl;
2065 af->af_host = host;
2066 af->af_rpc = client;
2067 /*
2068 * We use M_RPC here so that we can xdr_free the thing
2069 * later.
2070 */
2071 nlm_make_netobj(&af->af_granted.cookie,
2072 (caddr_t)&cookie, sizeof(cookie), M_RPC);
2073 af->af_granted.exclusive = argp->exclusive;
2074 af->af_granted.alock.caller_name =
2075 strdup(argp->alock.caller_name, M_RPC);
2076 nlm_copy_netobj(&af->af_granted.alock.fh,
2077 &argp->alock.fh, M_RPC);
2078 nlm_copy_netobj(&af->af_granted.alock.oh,
2079 &argp->alock.oh, M_RPC);
2080 af->af_granted.alock.svid = argp->alock.svid;
2081 af->af_granted.alock.l_offset = argp->alock.l_offset;
2082 af->af_granted.alock.l_len = argp->alock.l_len;
2083
2084 /*
2085 * Put the entry on the pending list before calling
2086 * VOP_ADVLOCKASYNC. We do this in case the lock
2087 * request was blocked (returning EINPROGRESS) but
2088 * then granted before we manage to run again. The
2089 * client may receive the granted message before we
2090 * send our blocked reply but thats their problem.
2091 */
2092 mtx_lock(&host->nh_lock);
2093 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link);
2094 mtx_unlock(&host->nh_lock);
2095
2096 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE,
2097 &af->af_task, &af->af_cookie);
2098
2099 /*
2100 * If the lock completed synchronously, just free the
2101 * tracking structure now.
2102 */
2103 if (error != EINPROGRESS) {
2104 CLNT_RELEASE(af->af_rpc);
2105 mtx_lock(&host->nh_lock);
2106 TAILQ_REMOVE(&host->nh_pending, af, af_link);
2107 mtx_unlock(&host->nh_lock);
2108 xdr_free((xdrproc_t) xdr_nlm4_testargs,
2109 &af->af_granted);
2110 free(af, M_NLM);
2111 } else {
2112 NLM_DEBUG(2, "NLM: pending async lock %p for %s "
2113 "(sysid %d)\n", af, host->nh_caller_name, sysid);
2114 /*
2115 * Don't vrele the vnode just yet - this must
2116 * wait until either the async callback
2117 * happens or the lock is cancelled.
2118 */
2119 vs.vs_vp = NULL;
2120 }
2121 } else {
2122 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE);
2123 }
2124
2125 if (error) {
2126 if (error == EINPROGRESS) {
2127 result->stat.stat = nlm4_blocked;
2128 } else if (error == EDEADLK) {
2129 result->stat.stat = nlm4_deadlck;
2130 } else if (error == EAGAIN) {
2131 result->stat.stat = nlm4_denied;
2132 } else {
2133 result->stat.stat = nlm4_failed;
2134 }
2135 } else {
2136 if (monitor)
2137 nlm_host_monitor(host, argp->state);
2138 result->stat.stat = nlm4_granted;
2139 }
2140
2141 out:
2142 nlm_release_vfs_state(&vs);
2143 if (rpcp)
2144 *rpcp = nlm_host_get_rpc(host, TRUE);
2145 nlm_host_release(host);
2146 return (0);
2147 }
2148
2149 int
nlm_do_cancel(nlm4_cancargs * argp,nlm4_res * result,struct svc_req * rqstp,CLIENT ** rpcp)2150 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp,
2151 CLIENT **rpcp)
2152 {
2153 fhandle_t fh;
2154 struct vfs_state vs;
2155 struct nlm_host *host;
2156 int error, sysid;
2157 struct flock fl;
2158 struct nlm_async_lock *af;
2159
2160 memset(result, 0, sizeof(*result));
2161 memset(&vs, 0, sizeof(vs));
2162
2163 host = nlm_find_host_by_name(argp->alock.caller_name,
2164 svc_getrpccaller(rqstp), rqstp->rq_vers);
2165 if (!host) {
2166 result->stat.stat = nlm4_denied_nolocks;
2167 return (ENOMEM);
2168 }
2169
2170 NLM_DEBUG(3, "nlm_do_cancel(): caller_name = %s (sysid = %d)\n",
2171 host->nh_caller_name, host->nh_sysid);
2172
2173 nlm_check_expired_locks(host);
2174 sysid = host->nh_sysid;
2175
2176 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
2177 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
2178
2179 if (time_uptime < nlm_grace_threshold) {
2180 result->stat.stat = nlm4_denied_grace_period;
2181 goto out;
2182 }
2183
2184 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0);
2185 if (error) {
2186 result->stat.stat = nlm_convert_error(error);
2187 goto out;
2188 }
2189
2190 fl.l_start = argp->alock.l_offset;
2191 fl.l_len = argp->alock.l_len;
2192 fl.l_pid = argp->alock.svid;
2193 fl.l_sysid = sysid;
2194 fl.l_whence = SEEK_SET;
2195 if (argp->exclusive)
2196 fl.l_type = F_WRLCK;
2197 else
2198 fl.l_type = F_RDLCK;
2199
2200 /*
2201 * First we need to try and find the async lock request - if
2202 * there isn't one, we give up and return nlm4_denied.
2203 */
2204 mtx_lock(&host->nh_lock);
2205
2206 TAILQ_FOREACH(af, &host->nh_pending, af_link) {
2207 if (af->af_fl.l_start == fl.l_start
2208 && af->af_fl.l_len == fl.l_len
2209 && af->af_fl.l_pid == fl.l_pid
2210 && af->af_fl.l_type == fl.l_type) {
2211 break;
2212 }
2213 }
2214
2215 if (!af) {
2216 mtx_unlock(&host->nh_lock);
2217 result->stat.stat = nlm4_denied;
2218 goto out;
2219 }
2220
2221 error = nlm_cancel_async_lock(af);
2222
2223 if (error) {
2224 result->stat.stat = nlm4_denied;
2225 } else {
2226 result->stat.stat = nlm4_granted;
2227 }
2228
2229 mtx_unlock(&host->nh_lock);
2230
2231 out:
2232 nlm_release_vfs_state(&vs);
2233 if (rpcp)
2234 *rpcp = nlm_host_get_rpc(host, TRUE);
2235 nlm_host_release(host);
2236 return (0);
2237 }
2238
2239 int
nlm_do_unlock(nlm4_unlockargs * argp,nlm4_res * result,struct svc_req * rqstp,CLIENT ** rpcp)2240 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp,
2241 CLIENT **rpcp)
2242 {
2243 fhandle_t fh;
2244 struct vfs_state vs;
2245 struct nlm_host *host;
2246 int error, sysid;
2247 struct flock fl;
2248
2249 memset(result, 0, sizeof(*result));
2250 memset(&vs, 0, sizeof(vs));
2251
2252 host = nlm_find_host_by_name(argp->alock.caller_name,
2253 svc_getrpccaller(rqstp), rqstp->rq_vers);
2254 if (!host) {
2255 result->stat.stat = nlm4_denied_nolocks;
2256 return (ENOMEM);
2257 }
2258
2259 NLM_DEBUG(3, "nlm_do_unlock(): caller_name = %s (sysid = %d)\n",
2260 host->nh_caller_name, host->nh_sysid);
2261
2262 nlm_check_expired_locks(host);
2263 sysid = host->nh_sysid;
2264
2265 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
2266 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
2267
2268 if (time_uptime < nlm_grace_threshold) {
2269 result->stat.stat = nlm4_denied_grace_period;
2270 goto out;
2271 }
2272
2273 error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0);
2274 if (error) {
2275 result->stat.stat = nlm_convert_error(error);
2276 goto out;
2277 }
2278
2279 fl.l_start = argp->alock.l_offset;
2280 fl.l_len = argp->alock.l_len;
2281 fl.l_pid = argp->alock.svid;
2282 fl.l_sysid = sysid;
2283 fl.l_whence = SEEK_SET;
2284 fl.l_type = F_UNLCK;
2285 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE);
2286
2287 /*
2288 * Ignore the error - there is no result code for failure,
2289 * only for grace period.
2290 */
2291 result->stat.stat = nlm4_granted;
2292
2293 out:
2294 nlm_release_vfs_state(&vs);
2295 if (rpcp)
2296 *rpcp = nlm_host_get_rpc(host, TRUE);
2297 nlm_host_release(host);
2298 return (0);
2299 }
2300
2301 int
nlm_do_granted(nlm4_testargs * argp,nlm4_res * result,struct svc_req * rqstp,CLIENT ** rpcp)2302 nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp,
2303
2304 CLIENT **rpcp)
2305 {
2306 struct nlm_host *host;
2307 struct nlm_waiting_lock *nw;
2308
2309 memset(result, 0, sizeof(*result));
2310
2311 host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers);
2312 if (!host) {
2313 result->stat.stat = nlm4_denied_nolocks;
2314 return (ENOMEM);
2315 }
2316
2317 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
2318 result->stat.stat = nlm4_denied;
2319 KFAIL_POINT_CODE(DEBUG_FP, nlm_deny_grant, goto out);
2320
2321 mtx_lock(&nlm_global_lock);
2322 TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
2323 if (!nw->nw_waiting)
2324 continue;
2325 if (argp->alock.svid == nw->nw_lock.svid
2326 && argp->alock.l_offset == nw->nw_lock.l_offset
2327 && argp->alock.l_len == nw->nw_lock.l_len
2328 && argp->alock.fh.n_len == nw->nw_lock.fh.n_len
2329 && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes,
2330 nw->nw_lock.fh.n_len)) {
2331 nw->nw_waiting = FALSE;
2332 wakeup(nw);
2333 result->stat.stat = nlm4_granted;
2334 break;
2335 }
2336 }
2337 mtx_unlock(&nlm_global_lock);
2338
2339 out:
2340 if (rpcp)
2341 *rpcp = nlm_host_get_rpc(host, TRUE);
2342 nlm_host_release(host);
2343 return (0);
2344 }
2345
2346 void
nlm_do_granted_res(nlm4_res * argp,struct svc_req * rqstp)2347 nlm_do_granted_res(nlm4_res *argp, struct svc_req *rqstp)
2348 {
2349 struct nlm_host *host = NULL;
2350 struct nlm_async_lock *af = NULL;
2351 int error;
2352
2353 if (argp->cookie.n_len != sizeof(struct nlm_grantcookie)) {
2354 NLM_DEBUG(1, "NLM: bogus grant cookie");
2355 goto out;
2356 }
2357
2358 host = nlm_find_host_by_sysid(ng_sysid(&argp->cookie));
2359 if (!host) {
2360 NLM_DEBUG(1, "NLM: Unknown host rejected our grant");
2361 goto out;
2362 }
2363
2364 mtx_lock(&host->nh_lock);
2365 TAILQ_FOREACH(af, &host->nh_granted, af_link)
2366 if (ng_cookie(&argp->cookie) ==
2367 ng_cookie(&af->af_granted.cookie))
2368 break;
2369 if (af)
2370 TAILQ_REMOVE(&host->nh_granted, af, af_link);
2371 mtx_unlock(&host->nh_lock);
2372
2373 if (!af) {
2374 NLM_DEBUG(1, "NLM: host %s (sysid %d) replied to our grant "
2375 "with unrecognized cookie %d:%d", host->nh_caller_name,
2376 host->nh_sysid, ng_sysid(&argp->cookie),
2377 ng_cookie(&argp->cookie));
2378 goto out;
2379 }
2380
2381 if (argp->stat.stat != nlm4_granted) {
2382 af->af_fl.l_type = F_UNLCK;
2383 error = VOP_ADVLOCK(af->af_vp, NULL, F_UNLCK, &af->af_fl, F_REMOTE);
2384 if (error) {
2385 NLM_DEBUG(1, "NLM: host %s (sysid %d) rejected our grant "
2386 "and we failed to unlock (%d)", host->nh_caller_name,
2387 host->nh_sysid, error);
2388 goto out;
2389 }
2390
2391 NLM_DEBUG(5, "NLM: async lock %p rejected by host %s (sysid %d)",
2392 af, host->nh_caller_name, host->nh_sysid);
2393 } else {
2394 NLM_DEBUG(5, "NLM: async lock %p accepted by host %s (sysid %d)",
2395 af, host->nh_caller_name, host->nh_sysid);
2396 }
2397
2398 out:
2399 if (af)
2400 nlm_free_async_lock(af);
2401 if (host)
2402 nlm_host_release(host);
2403 }
2404
2405 void
nlm_do_free_all(nlm4_notify * argp)2406 nlm_do_free_all(nlm4_notify *argp)
2407 {
2408 struct nlm_host *host, *thost;
2409
2410 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) {
2411 if (!strcmp(host->nh_caller_name, argp->name))
2412 nlm_host_notify(host, argp->state);
2413 }
2414 }
2415
2416 /*
2417 * Kernel module glue
2418 */
2419 static int
nfslockd_modevent(module_t mod,int type,void * data)2420 nfslockd_modevent(module_t mod, int type, void *data)
2421 {
2422
2423 switch (type) {
2424 case MOD_LOAD:
2425 return (0);
2426 case MOD_UNLOAD:
2427 /* The NLM module cannot be safely unloaded. */
2428 /* FALLTHROUGH */
2429 default:
2430 return (EOPNOTSUPP);
2431 }
2432 }
2433 static moduledata_t nfslockd_mod = {
2434 "nfslockd",
2435 nfslockd_modevent,
2436 NULL,
2437 };
2438 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY);
2439
2440 /* So that loader and kldload(2) can find us, wherever we are.. */
2441 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1);
2442 MODULE_DEPEND(nfslockd, nfslock, 1, 1, 1);
2443 MODULE_VERSION(nfslockd, 1);
2444