1*d6c967bbSthorpej /* $NetBSD: nfs_lock.c,v 1.3 2020/01/02 15:42:26 thorpej Exp $ */
26ca35587Sdholland /*-
36ca35587Sdholland * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
46ca35587Sdholland *
56ca35587Sdholland * Redistribution and use in source and binary forms, with or without
66ca35587Sdholland * modification, are permitted provided that the following conditions
76ca35587Sdholland * are met:
86ca35587Sdholland * 1. Redistributions of source code must retain the above copyright
96ca35587Sdholland * notice, this list of conditions and the following disclaimer.
106ca35587Sdholland * 2. Redistributions in binary form must reproduce the above copyright
116ca35587Sdholland * notice, this list of conditions and the following disclaimer in the
126ca35587Sdholland * documentation and/or other materials provided with the distribution.
136ca35587Sdholland * 3. Berkeley Software Design Inc's name may not be used to endorse or
146ca35587Sdholland * promote products derived from this software without specific prior
156ca35587Sdholland * written permission.
166ca35587Sdholland *
176ca35587Sdholland * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
186ca35587Sdholland * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
196ca35587Sdholland * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
206ca35587Sdholland * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
216ca35587Sdholland * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
226ca35587Sdholland * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
236ca35587Sdholland * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
246ca35587Sdholland * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
256ca35587Sdholland * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
266ca35587Sdholland * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
276ca35587Sdholland * SUCH DAMAGE.
286ca35587Sdholland *
296ca35587Sdholland * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
306ca35587Sdholland */
316ca35587Sdholland
326ca35587Sdholland #include <sys/cdefs.h>
33e81f0ea2Spgoyette /* __FBSDID("FreeBSD: head/sys/nfs/nfs_lock.c 303382 2016-07-27 11:08:59Z kib "); */
34*d6c967bbSthorpej __RCSID("$NetBSD: nfs_lock.c,v 1.3 2020/01/02 15:42:26 thorpej Exp $");
356ca35587Sdholland
366ca35587Sdholland #include <sys/param.h>
376ca35587Sdholland #include <sys/systm.h>
386ca35587Sdholland #include <sys/conf.h>
396ca35587Sdholland #include <sys/fcntl.h>
406ca35587Sdholland #include <sys/kernel.h> /* for hz */
416ca35587Sdholland #include <sys/limits.h>
426ca35587Sdholland #include <sys/lock.h>
436ca35587Sdholland #include <sys/malloc.h>
446ca35587Sdholland #include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */
456ca35587Sdholland #include <sys/mbuf.h>
466ca35587Sdholland #include <sys/mount.h>
476ca35587Sdholland #include <sys/namei.h>
486ca35587Sdholland #include <sys/priv.h>
496ca35587Sdholland #include <sys/proc.h>
506ca35587Sdholland #include <sys/resourcevar.h>
516ca35587Sdholland #include <sys/socket.h>
526ca35587Sdholland #include <sys/socket.h>
536ca35587Sdholland #include <sys/unistd.h>
546ca35587Sdholland #include <sys/vnode.h>
556ca35587Sdholland
566ca35587Sdholland #include <net/if.h>
576ca35587Sdholland
5888b1d6a6Spgoyette #include <fs/nfs/common/nfsproto.h>
5988b1d6a6Spgoyette #include <fs/nfs/common/nfs_lock.h>
6088b1d6a6Spgoyette #include <fs/nfs/client/nfs.h>
6188b1d6a6Spgoyette #include <fs/nfs/client/nfsmount.h>
6288b1d6a6Spgoyette #include <fs/nfs/client/nfsnode.h>
6388b1d6a6Spgoyette #include <fs/nfs/client/nlminfo.h>
646ca35587Sdholland
656ca35587Sdholland extern void (*nlminfo_release_p)(struct proc *p);
666ca35587Sdholland
676ca35587Sdholland vop_advlock_t *nfs_advlock_p = nfs_dolock;
686ca35587Sdholland vop_reclaim_t *nfs_reclaim_p = NULL;
696ca35587Sdholland
706ca35587Sdholland static MALLOC_DEFINE(M_NFSLOCK, "nfsclient_lock", "NFS lock request");
716ca35587Sdholland static MALLOC_DEFINE(M_NLMINFO, "nfsclient_nlminfo",
726ca35587Sdholland "NFS lock process structure");
736ca35587Sdholland
746ca35587Sdholland static int nfslockdans(struct thread *td, struct lockd_ans *ansp);
756ca35587Sdholland static void nlminfo_release(struct proc *p);
766ca35587Sdholland /*
776ca35587Sdholland * --------------------------------------------------------------------
786ca35587Sdholland * A miniature device driver which the userland uses to talk to us.
796ca35587Sdholland *
806ca35587Sdholland */
816ca35587Sdholland
826ca35587Sdholland static struct cdev *nfslock_dev;
836ca35587Sdholland static struct mtx nfslock_mtx;
846ca35587Sdholland static int nfslock_isopen;
856ca35587Sdholland static TAILQ_HEAD(,__lock_msg) nfslock_list;
866ca35587Sdholland
876ca35587Sdholland static int
nfslock_open(struct cdev * dev,int oflags,int devtype,struct thread * td)886ca35587Sdholland nfslock_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
896ca35587Sdholland {
906ca35587Sdholland int error;
916ca35587Sdholland
926ca35587Sdholland error = priv_check(td, PRIV_NFS_LOCKD);
936ca35587Sdholland if (error)
946ca35587Sdholland return (error);
956ca35587Sdholland
966ca35587Sdholland mtx_lock(&nfslock_mtx);
976ca35587Sdholland if (!nfslock_isopen) {
986ca35587Sdholland error = 0;
996ca35587Sdholland nfslock_isopen = 1;
1006ca35587Sdholland } else {
1016ca35587Sdholland error = EOPNOTSUPP;
1026ca35587Sdholland }
1036ca35587Sdholland mtx_unlock(&nfslock_mtx);
1046ca35587Sdholland
1056ca35587Sdholland return (error);
1066ca35587Sdholland }
1076ca35587Sdholland
1086ca35587Sdholland static int
nfslock_close(struct cdev * dev,int fflag,int devtype,struct thread * td)1096ca35587Sdholland nfslock_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1106ca35587Sdholland {
1116ca35587Sdholland struct __lock_msg *lm;
1126ca35587Sdholland
1136ca35587Sdholland mtx_lock(&nfslock_mtx);
1146ca35587Sdholland nfslock_isopen = 0;
1156ca35587Sdholland while (!TAILQ_EMPTY(&nfslock_list)) {
1166ca35587Sdholland lm = TAILQ_FIRST(&nfslock_list);
1176ca35587Sdholland /* XXX: answer request */
1186ca35587Sdholland TAILQ_REMOVE(&nfslock_list, lm, lm_link);
1196ca35587Sdholland free(lm, M_NFSLOCK);
1206ca35587Sdholland }
1216ca35587Sdholland mtx_unlock(&nfslock_mtx);
1226ca35587Sdholland return (0);
1236ca35587Sdholland }
1246ca35587Sdholland
1256ca35587Sdholland static int
nfslock_read(struct cdev * dev,struct uio * uio,int ioflag)1266ca35587Sdholland nfslock_read(struct cdev *dev, struct uio *uio, int ioflag)
1276ca35587Sdholland {
1286ca35587Sdholland int error;
1296ca35587Sdholland struct __lock_msg *lm;
1306ca35587Sdholland
1316ca35587Sdholland if (uio->uio_resid != sizeof *lm)
1326ca35587Sdholland return (EOPNOTSUPP);
1336ca35587Sdholland lm = NULL;
1346ca35587Sdholland error = 0;
1356ca35587Sdholland mtx_lock(&nfslock_mtx);
1366ca35587Sdholland while (TAILQ_EMPTY(&nfslock_list)) {
1376ca35587Sdholland error = msleep(&nfslock_list, &nfslock_mtx, PSOCK | PCATCH,
1386ca35587Sdholland "nfslockd", 0);
1396ca35587Sdholland if (error)
1406ca35587Sdholland break;
1416ca35587Sdholland }
1426ca35587Sdholland if (!error) {
1436ca35587Sdholland lm = TAILQ_FIRST(&nfslock_list);
1446ca35587Sdholland TAILQ_REMOVE(&nfslock_list, lm, lm_link);
1456ca35587Sdholland }
1466ca35587Sdholland mtx_unlock(&nfslock_mtx);
1476ca35587Sdholland if (!error) {
1486ca35587Sdholland error = uiomove(lm, sizeof *lm, uio);
1496ca35587Sdholland free(lm, M_NFSLOCK);
1506ca35587Sdholland }
1516ca35587Sdholland return (error);
1526ca35587Sdholland }
1536ca35587Sdholland
1546ca35587Sdholland static int
nfslock_write(struct cdev * dev,struct uio * uio,int ioflag)1556ca35587Sdholland nfslock_write(struct cdev *dev, struct uio *uio, int ioflag)
1566ca35587Sdholland {
1576ca35587Sdholland struct lockd_ans la;
1586ca35587Sdholland int error;
1596ca35587Sdholland
1606ca35587Sdholland if (uio->uio_resid != sizeof la)
1616ca35587Sdholland return (EOPNOTSUPP);
1626ca35587Sdholland error = uiomove(&la, sizeof la, uio);
1636ca35587Sdholland if (!error)
1646ca35587Sdholland error = nfslockdans(curthread, &la);
1656ca35587Sdholland return (error);
1666ca35587Sdholland }
1676ca35587Sdholland
1686ca35587Sdholland static int
nfslock_send(struct __lock_msg * lm)1696ca35587Sdholland nfslock_send(struct __lock_msg *lm)
1706ca35587Sdholland {
1716ca35587Sdholland struct __lock_msg *lm2;
1726ca35587Sdholland int error;
1736ca35587Sdholland
1746ca35587Sdholland error = 0;
1756ca35587Sdholland lm2 = malloc(sizeof *lm2, M_NFSLOCK, M_WAITOK);
1766ca35587Sdholland mtx_lock(&nfslock_mtx);
1776ca35587Sdholland if (nfslock_isopen) {
1786ca35587Sdholland memcpy(lm2, lm, sizeof *lm2);
1796ca35587Sdholland TAILQ_INSERT_TAIL(&nfslock_list, lm2, lm_link);
1806ca35587Sdholland wakeup(&nfslock_list);
1816ca35587Sdholland } else {
1826ca35587Sdholland error = EOPNOTSUPP;
1836ca35587Sdholland }
1846ca35587Sdholland mtx_unlock(&nfslock_mtx);
1856ca35587Sdholland if (error)
1866ca35587Sdholland free(lm2, M_NFSLOCK);
1876ca35587Sdholland return (error);
1886ca35587Sdholland }
1896ca35587Sdholland
1906ca35587Sdholland static struct cdevsw nfslock_cdevsw = {
1916ca35587Sdholland .d_version = D_VERSION,
1926ca35587Sdholland .d_open = nfslock_open,
1936ca35587Sdholland .d_close = nfslock_close,
1946ca35587Sdholland .d_read = nfslock_read,
1956ca35587Sdholland .d_write = nfslock_write,
1966ca35587Sdholland .d_name = "nfslock"
1976ca35587Sdholland };
1986ca35587Sdholland
1996ca35587Sdholland static int
nfslock_modevent(module_t mod __unused,int type,void * data __unused)2006ca35587Sdholland nfslock_modevent(module_t mod __unused, int type, void *data __unused)
2016ca35587Sdholland {
2026ca35587Sdholland
2036ca35587Sdholland switch (type) {
2046ca35587Sdholland case MOD_LOAD:
2056ca35587Sdholland if (bootverbose)
2066ca35587Sdholland printf("nfslock: pseudo-device\n");
2076ca35587Sdholland mtx_init(&nfslock_mtx, "nfslock", NULL, MTX_DEF);
2086ca35587Sdholland TAILQ_INIT(&nfslock_list);
2096ca35587Sdholland nlminfo_release_p = nlminfo_release;
2106ca35587Sdholland nfslock_dev = make_dev(&nfslock_cdevsw, 0,
2116ca35587Sdholland UID_ROOT, GID_KMEM, 0600, _PATH_NFSLCKDEV);
2126ca35587Sdholland return (0);
2136ca35587Sdholland default:
2146ca35587Sdholland return (EOPNOTSUPP);
2156ca35587Sdholland }
2166ca35587Sdholland }
2176ca35587Sdholland
2186ca35587Sdholland DEV_MODULE(nfslock, nfslock_modevent, NULL);
2196ca35587Sdholland MODULE_VERSION(nfslock, 1);
2206ca35587Sdholland
2216ca35587Sdholland
2226ca35587Sdholland /*
2236ca35587Sdholland * XXX
2246ca35587Sdholland * We have to let the process know if the call succeeded. I'm using an extra
2256ca35587Sdholland * field in the p_nlminfo field in the proc structure, as it is already for
2266ca35587Sdholland * lockd stuff.
2276ca35587Sdholland */
2286ca35587Sdholland
2296ca35587Sdholland /*
2306ca35587Sdholland * nfs_advlock --
2316ca35587Sdholland * NFS advisory byte-level locks.
2326ca35587Sdholland *
2336ca35587Sdholland * The vnode shall be (shared) locked on the entry, it is
2346ca35587Sdholland * unconditionally unlocked after.
2356ca35587Sdholland */
2366ca35587Sdholland int
nfs_dolock(struct vop_advlock_args * ap)2376ca35587Sdholland nfs_dolock(struct vop_advlock_args *ap)
2386ca35587Sdholland {
2396ca35587Sdholland LOCKD_MSG msg;
2406ca35587Sdholland struct thread *td;
2416ca35587Sdholland struct vnode *vp;
2426ca35587Sdholland int error;
2436ca35587Sdholland struct flock *fl;
2446ca35587Sdholland struct proc *p;
2456ca35587Sdholland struct nfsmount *nmp;
246*d6c967bbSthorpej struct timeval btv;
2476ca35587Sdholland
2486ca35587Sdholland td = curthread;
2496ca35587Sdholland p = td->td_proc;
2506ca35587Sdholland
2516ca35587Sdholland vp = ap->a_vp;
2526ca35587Sdholland fl = ap->a_fl;
2536ca35587Sdholland nmp = VFSTONFS(vp->v_mount);
2546ca35587Sdholland
2556ca35587Sdholland ASSERT_VOP_LOCKED(vp, "nfs_dolock");
2566ca35587Sdholland
2576ca35587Sdholland nmp->nm_getinfo(vp, msg.lm_fh, &msg.lm_fh_len, &msg.lm_addr,
2586ca35587Sdholland &msg.lm_nfsv3, NULL, NULL);
2596ca35587Sdholland VOP_UNLOCK(vp, 0);
2606ca35587Sdholland
2616ca35587Sdholland /*
2626ca35587Sdholland * the NLM protocol doesn't allow the server to return an error
2636ca35587Sdholland * on ranges, so we do it.
2646ca35587Sdholland */
2656ca35587Sdholland if (fl->l_whence != SEEK_END) {
2666ca35587Sdholland if ((fl->l_whence != SEEK_CUR && fl->l_whence != SEEK_SET) ||
2676ca35587Sdholland fl->l_start < 0 ||
2686ca35587Sdholland (fl->l_len < 0 &&
2696ca35587Sdholland (fl->l_start == 0 || fl->l_start + fl->l_len < 0)))
2706ca35587Sdholland return (EINVAL);
2716ca35587Sdholland if (fl->l_len > 0 &&
2726ca35587Sdholland (fl->l_len - 1 > OFF_MAX - fl->l_start))
2736ca35587Sdholland return (EOVERFLOW);
2746ca35587Sdholland }
2756ca35587Sdholland
2766ca35587Sdholland /*
2776ca35587Sdholland * Fill in the information structure.
2786ca35587Sdholland */
2796ca35587Sdholland msg.lm_version = LOCKD_MSG_VERSION;
2806ca35587Sdholland msg.lm_msg_ident.pid = p->p_pid;
2816ca35587Sdholland
2826ca35587Sdholland mtx_lock(&Giant);
2836ca35587Sdholland /*
2846ca35587Sdholland * if there is no nfsowner table yet, allocate one.
2856ca35587Sdholland */
2866ca35587Sdholland if (p->p_nlminfo == NULL) {
2876ca35587Sdholland p->p_nlminfo = malloc(sizeof(struct nlminfo),
2886ca35587Sdholland M_NLMINFO, M_WAITOK | M_ZERO);
2896ca35587Sdholland p->p_nlminfo->pid_start = p->p_stats->p_start;
290*d6c967bbSthorpej getmicroboottime(&btv);
291*d6c967bbSthorpej timevaladd(&p->p_nlminfo->pid_start, &btv);
2926ca35587Sdholland }
2936ca35587Sdholland msg.lm_msg_ident.pid_start = p->p_nlminfo->pid_start;
2946ca35587Sdholland msg.lm_msg_ident.msg_seq = ++(p->p_nlminfo->msg_seq);
2956ca35587Sdholland
2966ca35587Sdholland msg.lm_fl = *fl;
2976ca35587Sdholland msg.lm_wait = ap->a_flags & F_WAIT;
2986ca35587Sdholland msg.lm_getlk = ap->a_op == F_GETLK;
2996ca35587Sdholland cru2x(td->td_ucred, &msg.lm_cred);
3006ca35587Sdholland
3016ca35587Sdholland for (;;) {
3026ca35587Sdholland error = nfslock_send(&msg);
3036ca35587Sdholland if (error)
3046ca35587Sdholland goto out;
3056ca35587Sdholland
3066ca35587Sdholland /* Unlocks succeed immediately. */
3076ca35587Sdholland if (fl->l_type == F_UNLCK)
3086ca35587Sdholland goto out;
3096ca35587Sdholland
3106ca35587Sdholland /*
3116ca35587Sdholland * Retry after 20 seconds if we haven't gotten a response yet.
3126ca35587Sdholland * This number was picked out of thin air... but is longer
3136ca35587Sdholland * then even a reasonably loaded system should take (at least
3146ca35587Sdholland * on a local network). XXX Probably should use a back-off
3156ca35587Sdholland * scheme.
3166ca35587Sdholland *
3176ca35587Sdholland * XXX: No PCATCH here since we currently have no useful
3186ca35587Sdholland * way to signal to the userland rpc.lockd that the request
3196ca35587Sdholland * has been aborted. Once the rpc.lockd implementation
3206ca35587Sdholland * can handle aborts, and we report them properly,
3216ca35587Sdholland * PCATCH can be put back. In the mean time, if we did
3226ca35587Sdholland * permit aborting, the lock attempt would "get lost"
3236ca35587Sdholland * and the lock would get stuck in the locked state.
3246ca35587Sdholland */
3256ca35587Sdholland error = tsleep(p->p_nlminfo, PUSER, "lockd", 20*hz);
3266ca35587Sdholland if (error != 0) {
3276ca35587Sdholland if (error == EWOULDBLOCK) {
3286ca35587Sdholland /*
3296ca35587Sdholland * We timed out, so we rewrite the request
3306ca35587Sdholland * to the fifo.
3316ca35587Sdholland */
3326ca35587Sdholland continue;
3336ca35587Sdholland }
3346ca35587Sdholland
3356ca35587Sdholland break;
3366ca35587Sdholland }
3376ca35587Sdholland
3386ca35587Sdholland if (msg.lm_getlk && p->p_nlminfo->retcode == 0) {
3396ca35587Sdholland if (p->p_nlminfo->set_getlk_pid) {
3406ca35587Sdholland fl->l_sysid = 0; /* XXX */
3416ca35587Sdholland fl->l_pid = p->p_nlminfo->getlk_pid;
3426ca35587Sdholland } else {
3436ca35587Sdholland fl->l_type = F_UNLCK;
3446ca35587Sdholland }
3456ca35587Sdholland }
3466ca35587Sdholland error = p->p_nlminfo->retcode;
3476ca35587Sdholland break;
3486ca35587Sdholland }
3496ca35587Sdholland out:
3506ca35587Sdholland mtx_unlock(&Giant);
3516ca35587Sdholland return (error);
3526ca35587Sdholland }
3536ca35587Sdholland
3546ca35587Sdholland /*
3556ca35587Sdholland * nfslockdans --
3566ca35587Sdholland * NFS advisory byte-level locks answer from the lock daemon.
3576ca35587Sdholland */
3586ca35587Sdholland static int
nfslockdans(struct thread * td,struct lockd_ans * ansp)3596ca35587Sdholland nfslockdans(struct thread *td, struct lockd_ans *ansp)
3606ca35587Sdholland {
3616ca35587Sdholland struct proc *targetp;
3626ca35587Sdholland
3636ca35587Sdholland /* the version should match, or we're out of sync */
3646ca35587Sdholland if (ansp->la_vers != LOCKD_ANS_VERSION)
3656ca35587Sdholland return (EINVAL);
3666ca35587Sdholland
3676ca35587Sdholland /* Find the process, set its return errno and wake it up. */
3686ca35587Sdholland if ((targetp = pfind(ansp->la_msg_ident.pid)) == NULL)
3696ca35587Sdholland return (ESRCH);
3706ca35587Sdholland
3716ca35587Sdholland /* verify the pid hasn't been reused (if we can), and it isn't waiting
3726ca35587Sdholland * for an answer from a more recent request. We return an EPIPE if
3736ca35587Sdholland * the match fails, because we've already used ESRCH above, and this
3746ca35587Sdholland * is sort of like writing on a pipe after the reader has closed it.
3756ca35587Sdholland */
3766ca35587Sdholland if (targetp->p_nlminfo == NULL ||
3776ca35587Sdholland ((ansp->la_msg_ident.msg_seq != -1) &&
3786ca35587Sdholland (timevalcmp(&targetp->p_nlminfo->pid_start,
3796ca35587Sdholland &ansp->la_msg_ident.pid_start, !=) ||
3806ca35587Sdholland targetp->p_nlminfo->msg_seq != ansp->la_msg_ident.msg_seq))) {
3816ca35587Sdholland PROC_UNLOCK(targetp);
3826ca35587Sdholland return (EPIPE);
3836ca35587Sdholland }
3846ca35587Sdholland
3856ca35587Sdholland targetp->p_nlminfo->retcode = ansp->la_errno;
3866ca35587Sdholland targetp->p_nlminfo->set_getlk_pid = ansp->la_set_getlk_pid;
3876ca35587Sdholland targetp->p_nlminfo->getlk_pid = ansp->la_getlk_pid;
3886ca35587Sdholland
3896ca35587Sdholland wakeup(targetp->p_nlminfo);
3906ca35587Sdholland
3916ca35587Sdholland PROC_UNLOCK(targetp);
3926ca35587Sdholland return (0);
3936ca35587Sdholland }
3946ca35587Sdholland
3956ca35587Sdholland /*
3966ca35587Sdholland * Free nlminfo attached to process.
3976ca35587Sdholland */
3986ca35587Sdholland void
nlminfo_release(struct proc * p)3996ca35587Sdholland nlminfo_release(struct proc *p)
4006ca35587Sdholland {
4016ca35587Sdholland free(p->p_nlminfo, M_NLMINFO);
4026ca35587Sdholland p->p_nlminfo = NULL;
4036ca35587Sdholland }
404