1*481d3881Srin /* $NetBSD: nfs_nfsdcache.c,v 1.5 2024/07/05 04:31:52 rin Exp $ */
26ca35587Sdholland /*-
36ca35587Sdholland * Copyright (c) 1989, 1993
46ca35587Sdholland * The Regents of the University of California. All rights reserved.
56ca35587Sdholland *
66ca35587Sdholland * This code is derived from software contributed to Berkeley by
76ca35587Sdholland * Rick Macklem at The University of Guelph.
86ca35587Sdholland *
96ca35587Sdholland * Redistribution and use in source and binary forms, with or without
106ca35587Sdholland * modification, are permitted provided that the following conditions
116ca35587Sdholland * are met:
126ca35587Sdholland * 1. Redistributions of source code must retain the above copyright
136ca35587Sdholland * notice, this list of conditions and the following disclaimer.
146ca35587Sdholland * 2. Redistributions in binary form must reproduce the above copyright
156ca35587Sdholland * notice, this list of conditions and the following disclaimer in the
166ca35587Sdholland * documentation and/or other materials provided with the distribution.
176ca35587Sdholland * 4. Neither the name of the University nor the names of its contributors
186ca35587Sdholland * may be used to endorse or promote products derived from this software
196ca35587Sdholland * without specific prior written permission.
206ca35587Sdholland *
216ca35587Sdholland * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
226ca35587Sdholland * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
236ca35587Sdholland * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
246ca35587Sdholland * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
256ca35587Sdholland * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
266ca35587Sdholland * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
276ca35587Sdholland * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
286ca35587Sdholland * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
296ca35587Sdholland * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
306ca35587Sdholland * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
316ca35587Sdholland * SUCH DAMAGE.
326ca35587Sdholland *
336ca35587Sdholland */
346ca35587Sdholland
356ca35587Sdholland #include <sys/cdefs.h>
362d39560cSpgoyette /* __FBSDID("FreeBSD: head/sys/fs/nfsserver/nfs_nfsdcache.c 304026 2016-08-12 22:44:59Z rmacklem "); */
37*481d3881Srin __RCSID("$NetBSD: nfs_nfsdcache.c,v 1.5 2024/07/05 04:31:52 rin Exp $");
386ca35587Sdholland
396ca35587Sdholland /*
406ca35587Sdholland * Here is the basic algorithm:
416ca35587Sdholland * First, some design criteria I used:
426ca35587Sdholland * - I think a false hit is more serious than a false miss
436ca35587Sdholland * - A false hit for an RPC that has Op(s) that order via seqid# must be
446ca35587Sdholland * avoided at all cost
456ca35587Sdholland * - A valid hit will probably happen a long time after the original reply
466ca35587Sdholland * and the TCP socket that the original request was received on will no
476ca35587Sdholland * longer be active
486ca35587Sdholland * (The long time delay implies to me that LRU is not appropriate.)
496ca35587Sdholland * - The mechanism will satisfy the requirements of ordering Ops with seqid#s
506ca35587Sdholland * in them as well as minimizing the risk of redoing retried non-idempotent
516ca35587Sdholland * Ops.
526ca35587Sdholland * Because it is biased towards avoiding false hits, multiple entries with
536ca35587Sdholland * the same xid are to be expected, especially for the case of the entry
546ca35587Sdholland * in the cache being related to a seqid# sequenced Op.
556ca35587Sdholland *
566ca35587Sdholland * The basic algorithm I'm about to code up:
576ca35587Sdholland * - Null RPCs bypass the cache and are just done
586ca35587Sdholland * For TCP
596ca35587Sdholland * - key on <xid, NFS version> (as noted above, there can be several
606ca35587Sdholland * entries with the same key)
616ca35587Sdholland * When a request arrives:
626ca35587Sdholland * For all that match key
636ca35587Sdholland * - if RPC# != OR request_size !=
646ca35587Sdholland * - not a match with this one
656ca35587Sdholland * - if NFSv4 and received on same TCP socket OR
666ca35587Sdholland * received on a TCP connection created before the
676ca35587Sdholland * entry was cached
686ca35587Sdholland * - not a match with this one
696ca35587Sdholland * (V2,3 clients might retry on same TCP socket)
706ca35587Sdholland * - calculate checksum on first N bytes of NFS XDR
716ca35587Sdholland * - if checksum !=
726ca35587Sdholland * - not a match for this one
736ca35587Sdholland * If any of the remaining ones that match has a
746ca35587Sdholland * seqid_refcnt > 0
756ca35587Sdholland * - not a match (go do RPC, using new cache entry)
766ca35587Sdholland * If one match left
776ca35587Sdholland * - a hit (reply from cache)
786ca35587Sdholland * else
796ca35587Sdholland * - miss (go do RPC, using new cache entry)
806ca35587Sdholland *
816ca35587Sdholland * During processing of NFSv4 request:
826ca35587Sdholland * - set a flag when a non-idempotent Op is processed
836ca35587Sdholland * - when an Op that uses a seqid# (Open,...) is processed
846ca35587Sdholland * - if same seqid# as referenced entry in cache
856ca35587Sdholland * - free new cache entry
866ca35587Sdholland * - reply from referenced cache entry
876ca35587Sdholland * else if next seqid# in order
886ca35587Sdholland * - free referenced cache entry
896ca35587Sdholland * - increment seqid_refcnt on new cache entry
906ca35587Sdholland * - set pointer from Openowner/Lockowner to
916ca35587Sdholland * new cache entry (aka reference it)
926ca35587Sdholland * else if first seqid# in sequence
936ca35587Sdholland * - increment seqid_refcnt on new cache entry
946ca35587Sdholland * - set pointer from Openowner/Lockowner to
956ca35587Sdholland * new cache entry (aka reference it)
966ca35587Sdholland *
976ca35587Sdholland * At end of RPC processing:
986ca35587Sdholland * - if seqid_refcnt > 0 OR flagged non-idempotent on new
996ca35587Sdholland * cache entry
1006ca35587Sdholland * - save reply in cache entry
1016ca35587Sdholland * - calculate checksum on first N bytes of NFS XDR
1026ca35587Sdholland * request
1036ca35587Sdholland * - note op and length of XDR request (in bytes)
1046ca35587Sdholland * - timestamp it
1056ca35587Sdholland * else
1066ca35587Sdholland * - free new cache entry
1076ca35587Sdholland * - Send reply (noting info for socket activity check, below)
1086ca35587Sdholland *
1096ca35587Sdholland * For cache entries saved above:
1106ca35587Sdholland * - if saved since seqid_refcnt was > 0
1116ca35587Sdholland * - free when seqid_refcnt decrements to 0
1126ca35587Sdholland * (when next one in sequence is processed above, or
1136ca35587Sdholland * when Openowner/Lockowner is discarded)
1146ca35587Sdholland * else { non-idempotent Op(s) }
1156ca35587Sdholland * - free when
1166ca35587Sdholland * - some further activity observed on same
1176ca35587Sdholland * socket
1186ca35587Sdholland * (I'm not yet sure how I'm going to do
1196ca35587Sdholland * this. Maybe look at the TCP connection
1206ca35587Sdholland * to see if the send_tcp_sequence# is well
1216ca35587Sdholland * past sent reply OR K additional RPCs
1226ca35587Sdholland * replied on same socket OR?)
1236ca35587Sdholland * OR
1246ca35587Sdholland * - when very old (hours, days, weeks?)
1256ca35587Sdholland *
1266ca35587Sdholland * For UDP (v2, 3 only), pretty much the old way:
1276ca35587Sdholland * - key on <xid, NFS version, RPC#, Client host ip#>
1286ca35587Sdholland * (at most one entry for each key)
1296ca35587Sdholland *
1306ca35587Sdholland * When a Request arrives:
1316ca35587Sdholland * - if a match with entry via key
1326ca35587Sdholland * - if RPC marked In_progress
1336ca35587Sdholland * - discard request (don't send reply)
1346ca35587Sdholland * else
1356ca35587Sdholland * - reply from cache
1366ca35587Sdholland * - timestamp cache entry
1376ca35587Sdholland * else
1386ca35587Sdholland * - add entry to cache, marked In_progress
1396ca35587Sdholland * - do RPC
1406ca35587Sdholland * - when RPC done
1416ca35587Sdholland * - if RPC# non-idempotent
1426ca35587Sdholland * - mark entry Done (not In_progress)
1436ca35587Sdholland * - save reply
1446ca35587Sdholland * - timestamp cache entry
1456ca35587Sdholland * else
1466ca35587Sdholland * - free cache entry
1476ca35587Sdholland * - send reply
1486ca35587Sdholland *
1496ca35587Sdholland * Later, entries with saved replies are free'd a short time (few minutes)
1506ca35587Sdholland * after reply sent (timestamp).
1516ca35587Sdholland * Reference: Chet Juszczak, "Improving the Performance and Correctness
1526ca35587Sdholland * of an NFS Server", in Proc. Winter 1989 USENIX Conference,
1536ca35587Sdholland * pages 53-63. San Diego, February 1989.
1546ca35587Sdholland * for the UDP case.
1556ca35587Sdholland * nfsrc_floodlevel is set to the allowable upper limit for saved replies
1566ca35587Sdholland * for TCP. For V3, a reply won't be saved when the flood level is
1576ca35587Sdholland * hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in
1586ca35587Sdholland * that case. This level should be set high enough that this almost
1596ca35587Sdholland * never happens.
1606ca35587Sdholland */
1616ca35587Sdholland #ifndef APPLEKEXT
162c10f8b50Spgoyette #include <fs/nfs/common/nfsport.h>
1636ca35587Sdholland
1642d39560cSpgoyette extern struct nfsstatsv1 nfsstatsv1;
1656ca35587Sdholland extern struct mtx nfsrc_udpmtx;
1666ca35587Sdholland extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
1672d39560cSpgoyette extern struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
1686ca35587Sdholland int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0;
1696ca35587Sdholland #endif /* !APPLEKEXT */
1706ca35587Sdholland
1716ca35587Sdholland SYSCTL_DECL(_vfs_nfsd);
1726ca35587Sdholland
1736ca35587Sdholland static u_int nfsrc_tcphighwater = 0;
1746ca35587Sdholland static int
sysctl_tcphighwater(SYSCTL_HANDLER_ARGS)1756ca35587Sdholland sysctl_tcphighwater(SYSCTL_HANDLER_ARGS)
1766ca35587Sdholland {
1776ca35587Sdholland int error, newhighwater;
1786ca35587Sdholland
1796ca35587Sdholland newhighwater = nfsrc_tcphighwater;
1806ca35587Sdholland error = sysctl_handle_int(oidp, &newhighwater, 0, req);
1816ca35587Sdholland if (error != 0 || req->newptr == NULL)
1826ca35587Sdholland return (error);
1836ca35587Sdholland if (newhighwater < 0)
1846ca35587Sdholland return (EINVAL);
1856ca35587Sdholland if (newhighwater >= nfsrc_floodlevel)
1866ca35587Sdholland nfsrc_floodlevel = newhighwater + newhighwater / 5;
1876ca35587Sdholland nfsrc_tcphighwater = newhighwater;
1886ca35587Sdholland return (0);
1896ca35587Sdholland }
1906ca35587Sdholland SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0,
1916ca35587Sdholland sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU",
1926ca35587Sdholland "High water mark for TCP cache entries");
1936ca35587Sdholland
1946ca35587Sdholland static u_int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER;
1956ca35587Sdholland SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW,
1966ca35587Sdholland &nfsrc_udphighwater, 0,
1976ca35587Sdholland "High water mark for UDP cache entries");
1986ca35587Sdholland static u_int nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT;
1996ca35587Sdholland SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW,
2006ca35587Sdholland &nfsrc_tcptimeout, 0,
2016ca35587Sdholland "Timeout for TCP entries in the DRC");
2026ca35587Sdholland static u_int nfsrc_tcpnonidempotent = 1;
2036ca35587Sdholland SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW,
2046ca35587Sdholland &nfsrc_tcpnonidempotent, 0,
2056ca35587Sdholland "Enable the DRC for NFS over TCP");
2066ca35587Sdholland
2076ca35587Sdholland static int nfsrc_udpcachesize = 0;
2086ca35587Sdholland static TAILQ_HEAD(, nfsrvcache) nfsrvudplru;
2096ca35587Sdholland static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
2106ca35587Sdholland
2116ca35587Sdholland /*
2126ca35587Sdholland * and the reverse mapping from generic to Version 2 procedure numbers
2136ca35587Sdholland */
2146ca35587Sdholland static int newnfsv2_procid[NFS_V3NPROCS] = {
2156ca35587Sdholland NFSV2PROC_NULL,
2166ca35587Sdholland NFSV2PROC_GETATTR,
2176ca35587Sdholland NFSV2PROC_SETATTR,
2186ca35587Sdholland NFSV2PROC_LOOKUP,
2196ca35587Sdholland NFSV2PROC_NOOP,
2206ca35587Sdholland NFSV2PROC_READLINK,
2216ca35587Sdholland NFSV2PROC_READ,
2226ca35587Sdholland NFSV2PROC_WRITE,
2236ca35587Sdholland NFSV2PROC_CREATE,
2246ca35587Sdholland NFSV2PROC_MKDIR,
2256ca35587Sdholland NFSV2PROC_SYMLINK,
2266ca35587Sdholland NFSV2PROC_CREATE,
2276ca35587Sdholland NFSV2PROC_REMOVE,
2286ca35587Sdholland NFSV2PROC_RMDIR,
2296ca35587Sdholland NFSV2PROC_RENAME,
2306ca35587Sdholland NFSV2PROC_LINK,
2316ca35587Sdholland NFSV2PROC_READDIR,
2326ca35587Sdholland NFSV2PROC_NOOP,
2336ca35587Sdholland NFSV2PROC_STATFS,
2346ca35587Sdholland NFSV2PROC_NOOP,
2356ca35587Sdholland NFSV2PROC_NOOP,
2366ca35587Sdholland NFSV2PROC_NOOP,
2376ca35587Sdholland };
2386ca35587Sdholland
2396ca35587Sdholland #define nfsrc_hash(xid) (((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE)
2406ca35587Sdholland #define NFSRCUDPHASH(xid) \
2416ca35587Sdholland (&nfsrvudphashtbl[nfsrc_hash(xid)])
2426ca35587Sdholland #define NFSRCHASH(xid) \
2436ca35587Sdholland (&nfsrchash_table[nfsrc_hash(xid)].tbl)
2442d39560cSpgoyette #define NFSRCAHASH(xid) (&nfsrcahash_table[nfsrc_hash(xid)])
2456ca35587Sdholland #define TRUE 1
2466ca35587Sdholland #define FALSE 0
2476ca35587Sdholland #define NFSRVCACHE_CHECKLEN 100
2486ca35587Sdholland
2496ca35587Sdholland /* True iff the rpc reply is an nfs status ONLY! */
2506ca35587Sdholland static int nfsv2_repstat[NFS_V3NPROCS] = {
2516ca35587Sdholland FALSE,
2526ca35587Sdholland FALSE,
2536ca35587Sdholland FALSE,
2546ca35587Sdholland FALSE,
2556ca35587Sdholland FALSE,
2566ca35587Sdholland FALSE,
2576ca35587Sdholland FALSE,
2586ca35587Sdholland FALSE,
2596ca35587Sdholland FALSE,
2606ca35587Sdholland FALSE,
2616ca35587Sdholland TRUE,
2626ca35587Sdholland TRUE,
2636ca35587Sdholland TRUE,
2646ca35587Sdholland TRUE,
2656ca35587Sdholland FALSE,
2666ca35587Sdholland TRUE,
2676ca35587Sdholland FALSE,
2686ca35587Sdholland FALSE,
2696ca35587Sdholland FALSE,
2706ca35587Sdholland FALSE,
2716ca35587Sdholland FALSE,
2726ca35587Sdholland FALSE,
2736ca35587Sdholland };
2746ca35587Sdholland
2756ca35587Sdholland /*
2766ca35587Sdholland * Will NFS want to work over IPv6 someday?
2776ca35587Sdholland */
2786ca35587Sdholland #define NETFAMILY(rp) \
2796ca35587Sdholland (((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET)
2806ca35587Sdholland
2816ca35587Sdholland /* local functions */
2826ca35587Sdholland static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
2836ca35587Sdholland static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp);
2846ca35587Sdholland static void nfsrc_lock(struct nfsrvcache *rp);
2856ca35587Sdholland static void nfsrc_unlock(struct nfsrvcache *rp);
2866ca35587Sdholland static void nfsrc_wanted(struct nfsrvcache *rp);
2876ca35587Sdholland static void nfsrc_freecache(struct nfsrvcache *rp);
2886ca35587Sdholland static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum);
2896ca35587Sdholland static void nfsrc_marksametcpconn(u_int64_t);
2906ca35587Sdholland
2916ca35587Sdholland /*
2926ca35587Sdholland * Return the correct mutex for this cache entry.
2936ca35587Sdholland */
2946ca35587Sdholland static __inline struct mtx *
nfsrc_cachemutex(struct nfsrvcache * rp)2956ca35587Sdholland nfsrc_cachemutex(struct nfsrvcache *rp)
2966ca35587Sdholland {
2976ca35587Sdholland
2986ca35587Sdholland if ((rp->rc_flag & RC_UDP) != 0)
2996ca35587Sdholland return (&nfsrc_udpmtx);
3006ca35587Sdholland return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx);
3016ca35587Sdholland }
3026ca35587Sdholland
3036ca35587Sdholland /*
3046ca35587Sdholland * Initialize the server request cache list
3056ca35587Sdholland */
3066ca35587Sdholland APPLESTATIC void
nfsrvd_initcache(void)3076ca35587Sdholland nfsrvd_initcache(void)
3086ca35587Sdholland {
3096ca35587Sdholland int i;
3106ca35587Sdholland static int inited = 0;
3116ca35587Sdholland
3126ca35587Sdholland if (inited)
3136ca35587Sdholland return;
3146ca35587Sdholland inited = 1;
3156ca35587Sdholland for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
3166ca35587Sdholland LIST_INIT(&nfsrvudphashtbl[i]);
3176ca35587Sdholland LIST_INIT(&nfsrchash_table[i].tbl);
3182d39560cSpgoyette LIST_INIT(&nfsrcahash_table[i].tbl);
3196ca35587Sdholland }
3206ca35587Sdholland TAILQ_INIT(&nfsrvudplru);
3216ca35587Sdholland nfsrc_tcpsavedreplies = 0;
3226ca35587Sdholland nfsrc_udpcachesize = 0;
3232d39560cSpgoyette nfsstatsv1.srvcache_tcppeak = 0;
3242d39560cSpgoyette nfsstatsv1.srvcache_size = 0;
3256ca35587Sdholland }
3266ca35587Sdholland
3276ca35587Sdholland /*
3286ca35587Sdholland * Get a cache entry for this request. Basically just malloc a new one
3296ca35587Sdholland * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest.
3306ca35587Sdholland */
3316ca35587Sdholland APPLESTATIC int
nfsrvd_getcache(struct nfsrv_descript * nd)3322d39560cSpgoyette nfsrvd_getcache(struct nfsrv_descript *nd)
3336ca35587Sdholland {
3346ca35587Sdholland struct nfsrvcache *newrp;
3356ca35587Sdholland int ret;
3366ca35587Sdholland
3376ca35587Sdholland if (nd->nd_procnum == NFSPROC_NULL)
3386ca35587Sdholland panic("nfsd cache null");
3396ca35587Sdholland MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache),
3406ca35587Sdholland M_NFSRVCACHE, M_WAITOK);
3416ca35587Sdholland NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache));
3426ca35587Sdholland if (nd->nd_flag & ND_NFSV4)
3436ca35587Sdholland newrp->rc_flag = RC_NFSV4;
3446ca35587Sdholland else if (nd->nd_flag & ND_NFSV3)
3456ca35587Sdholland newrp->rc_flag = RC_NFSV3;
3466ca35587Sdholland else
3476ca35587Sdholland newrp->rc_flag = RC_NFSV2;
3486ca35587Sdholland newrp->rc_xid = nd->nd_retxid;
3496ca35587Sdholland newrp->rc_proc = nd->nd_procnum;
3506ca35587Sdholland newrp->rc_sockref = nd->nd_sockref;
3516ca35587Sdholland newrp->rc_cachetime = nd->nd_tcpconntime;
3526ca35587Sdholland if (nd->nd_flag & ND_SAMETCPCONN)
3536ca35587Sdholland newrp->rc_flag |= RC_SAMETCPCONN;
3546ca35587Sdholland if (nd->nd_nam2 != NULL) {
3556ca35587Sdholland newrp->rc_flag |= RC_UDP;
3566ca35587Sdholland ret = nfsrc_getudp(nd, newrp);
3576ca35587Sdholland } else {
3586ca35587Sdholland ret = nfsrc_gettcp(nd, newrp);
3596ca35587Sdholland }
3606ca35587Sdholland NFSEXITCODE2(0, nd);
3616ca35587Sdholland return (ret);
3626ca35587Sdholland }
3636ca35587Sdholland
3646ca35587Sdholland /*
3656ca35587Sdholland * For UDP (v2, v3):
3666ca35587Sdholland * - key on <xid, NFS version, RPC#, Client host ip#>
3676ca35587Sdholland * (at most one entry for each key)
3686ca35587Sdholland */
3696ca35587Sdholland static int
nfsrc_getudp(struct nfsrv_descript * nd,struct nfsrvcache * newrp)3706ca35587Sdholland nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
3716ca35587Sdholland {
3726ca35587Sdholland struct nfsrvcache *rp;
3736ca35587Sdholland struct sockaddr_in *saddr;
3746ca35587Sdholland struct sockaddr_in6 *saddr6;
3756ca35587Sdholland struct nfsrvhashhead *hp;
3766ca35587Sdholland int ret = 0;
3776ca35587Sdholland struct mtx *mutex;
3786ca35587Sdholland
3796ca35587Sdholland mutex = nfsrc_cachemutex(newrp);
3806ca35587Sdholland hp = NFSRCUDPHASH(newrp->rc_xid);
3816ca35587Sdholland loop:
3826ca35587Sdholland mtx_lock(mutex);
3836ca35587Sdholland LIST_FOREACH(rp, hp, rc_hash) {
3846ca35587Sdholland if (newrp->rc_xid == rp->rc_xid &&
3856ca35587Sdholland newrp->rc_proc == rp->rc_proc &&
3866ca35587Sdholland (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
3876ca35587Sdholland nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
3886ca35587Sdholland if ((rp->rc_flag & RC_LOCKED) != 0) {
3896ca35587Sdholland rp->rc_flag |= RC_WANTED;
3906ca35587Sdholland (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
3916ca35587Sdholland "nfsrc", 10 * hz);
3926ca35587Sdholland goto loop;
3936ca35587Sdholland }
3946ca35587Sdholland if (rp->rc_flag == 0)
3956ca35587Sdholland panic("nfs udp cache0");
3966ca35587Sdholland rp->rc_flag |= RC_LOCKED;
3976ca35587Sdholland TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
3986ca35587Sdholland TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
3996ca35587Sdholland if (rp->rc_flag & RC_INPROG) {
4002d39560cSpgoyette nfsstatsv1.srvcache_inproghits++;
4016ca35587Sdholland mtx_unlock(mutex);
4026ca35587Sdholland ret = RC_DROPIT;
4036ca35587Sdholland } else if (rp->rc_flag & RC_REPSTATUS) {
4046ca35587Sdholland /*
4056ca35587Sdholland * V2 only.
4066ca35587Sdholland */
4072d39560cSpgoyette nfsstatsv1.srvcache_nonidemdonehits++;
4086ca35587Sdholland mtx_unlock(mutex);
4096ca35587Sdholland nfsrvd_rephead(nd);
4106ca35587Sdholland *(nd->nd_errp) = rp->rc_status;
4116ca35587Sdholland ret = RC_REPLY;
4126ca35587Sdholland rp->rc_timestamp = NFSD_MONOSEC +
4136ca35587Sdholland NFSRVCACHE_UDPTIMEOUT;
4146ca35587Sdholland } else if (rp->rc_flag & RC_REPMBUF) {
4152d39560cSpgoyette nfsstatsv1.srvcache_nonidemdonehits++;
4166ca35587Sdholland mtx_unlock(mutex);
4176ca35587Sdholland nd->nd_mreq = m_copym(rp->rc_reply, 0,
4186ca35587Sdholland M_COPYALL, M_WAITOK);
4196ca35587Sdholland ret = RC_REPLY;
4206ca35587Sdholland rp->rc_timestamp = NFSD_MONOSEC +
4216ca35587Sdholland NFSRVCACHE_UDPTIMEOUT;
4226ca35587Sdholland } else {
4236ca35587Sdholland panic("nfs udp cache1");
4246ca35587Sdholland }
4256ca35587Sdholland nfsrc_unlock(rp);
4266ca35587Sdholland free((caddr_t)newrp, M_NFSRVCACHE);
4276ca35587Sdholland goto out;
4286ca35587Sdholland }
4296ca35587Sdholland }
4302d39560cSpgoyette nfsstatsv1.srvcache_misses++;
4312d39560cSpgoyette atomic_add_int(&nfsstatsv1.srvcache_size, 1);
4326ca35587Sdholland nfsrc_udpcachesize++;
4336ca35587Sdholland
4346ca35587Sdholland newrp->rc_flag |= RC_INPROG;
4356ca35587Sdholland saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
4366ca35587Sdholland if (saddr->sin_family == AF_INET)
4376ca35587Sdholland newrp->rc_inet = saddr->sin_addr.s_addr;
4386ca35587Sdholland else if (saddr->sin_family == AF_INET6) {
4396ca35587Sdholland saddr6 = (struct sockaddr_in6 *)saddr;
4406ca35587Sdholland NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6,
4416ca35587Sdholland sizeof (struct in6_addr));
4426ca35587Sdholland newrp->rc_flag |= RC_INETIPV6;
4436ca35587Sdholland }
4446ca35587Sdholland LIST_INSERT_HEAD(hp, newrp, rc_hash);
4456ca35587Sdholland TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru);
4466ca35587Sdholland mtx_unlock(mutex);
4476ca35587Sdholland nd->nd_rp = newrp;
4486ca35587Sdholland ret = RC_DOIT;
4496ca35587Sdholland
4506ca35587Sdholland out:
4516ca35587Sdholland NFSEXITCODE2(0, nd);
4526ca35587Sdholland return (ret);
4536ca35587Sdholland }
4546ca35587Sdholland
4556ca35587Sdholland /*
4566ca35587Sdholland * Update a request cache entry after the rpc has been done
4576ca35587Sdholland */
4586ca35587Sdholland APPLESTATIC struct nfsrvcache *
nfsrvd_updatecache(struct nfsrv_descript * nd)4592d39560cSpgoyette nfsrvd_updatecache(struct nfsrv_descript *nd)
4606ca35587Sdholland {
4616ca35587Sdholland struct nfsrvcache *rp;
4626ca35587Sdholland struct nfsrvcache *retrp = NULL;
4636ca35587Sdholland mbuf_t m;
4646ca35587Sdholland struct mtx *mutex;
4656ca35587Sdholland
4666ca35587Sdholland rp = nd->nd_rp;
4676ca35587Sdholland if (!rp)
4686ca35587Sdholland panic("nfsrvd_updatecache null rp");
4696ca35587Sdholland nd->nd_rp = NULL;
4706ca35587Sdholland mutex = nfsrc_cachemutex(rp);
4716ca35587Sdholland mtx_lock(mutex);
4726ca35587Sdholland nfsrc_lock(rp);
4736ca35587Sdholland if (!(rp->rc_flag & RC_INPROG))
4746ca35587Sdholland panic("nfsrvd_updatecache not inprog");
4756ca35587Sdholland rp->rc_flag &= ~RC_INPROG;
4766ca35587Sdholland if (rp->rc_flag & RC_UDP) {
4776ca35587Sdholland TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
4786ca35587Sdholland TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
4796ca35587Sdholland }
4806ca35587Sdholland
4816ca35587Sdholland /*
4826ca35587Sdholland * Reply from cache is a special case returned by nfsrv_checkseqid().
4836ca35587Sdholland */
4846ca35587Sdholland if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
4852d39560cSpgoyette nfsstatsv1.srvcache_nonidemdonehits++;
4866ca35587Sdholland mtx_unlock(mutex);
4876ca35587Sdholland nd->nd_repstat = 0;
4886ca35587Sdholland mbuf_freem(nd->nd_mreq);
4896ca35587Sdholland if (!(rp->rc_flag & RC_REPMBUF))
4906ca35587Sdholland panic("reply from cache");
4916ca35587Sdholland nd->nd_mreq = m_copym(rp->rc_reply, 0,
4926ca35587Sdholland M_COPYALL, M_WAITOK);
4936ca35587Sdholland rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
4946ca35587Sdholland nfsrc_unlock(rp);
4956ca35587Sdholland goto out;
4966ca35587Sdholland }
4976ca35587Sdholland
4986ca35587Sdholland /*
4996ca35587Sdholland * If rc_refcnt > 0, save it
5006ca35587Sdholland * For UDP, save it if ND_SAVEREPLY is set
5016ca35587Sdholland * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set
5026ca35587Sdholland */
5036ca35587Sdholland if (nd->nd_repstat != NFSERR_DONTREPLY &&
5046ca35587Sdholland (rp->rc_refcnt > 0 ||
5056ca35587Sdholland ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) ||
5066ca35587Sdholland ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) &&
5076ca35587Sdholland nfsrc_tcpsavedreplies <= nfsrc_floodlevel &&
5086ca35587Sdholland nfsrc_tcpnonidempotent))) {
5096ca35587Sdholland if (rp->rc_refcnt > 0) {
5106ca35587Sdholland if (!(rp->rc_flag & RC_NFSV4))
5116ca35587Sdholland panic("update_cache refcnt");
5126ca35587Sdholland rp->rc_flag |= RC_REFCNT;
5136ca35587Sdholland }
5146ca35587Sdholland if ((nd->nd_flag & ND_NFSV2) &&
5156ca35587Sdholland nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
5166ca35587Sdholland rp->rc_status = nd->nd_repstat;
5176ca35587Sdholland rp->rc_flag |= RC_REPSTATUS;
5186ca35587Sdholland mtx_unlock(mutex);
5196ca35587Sdholland } else {
5206ca35587Sdholland if (!(rp->rc_flag & RC_UDP)) {
5216ca35587Sdholland atomic_add_int(&nfsrc_tcpsavedreplies, 1);
5226ca35587Sdholland if (nfsrc_tcpsavedreplies >
5232d39560cSpgoyette nfsstatsv1.srvcache_tcppeak)
5242d39560cSpgoyette nfsstatsv1.srvcache_tcppeak =
5256ca35587Sdholland nfsrc_tcpsavedreplies;
5266ca35587Sdholland }
5276ca35587Sdholland mtx_unlock(mutex);
5286ca35587Sdholland m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
5296ca35587Sdholland mtx_lock(mutex);
5306ca35587Sdholland rp->rc_reply = m;
5316ca35587Sdholland rp->rc_flag |= RC_REPMBUF;
5326ca35587Sdholland mtx_unlock(mutex);
5336ca35587Sdholland }
5346ca35587Sdholland if (rp->rc_flag & RC_UDP) {
5356ca35587Sdholland rp->rc_timestamp = NFSD_MONOSEC +
5366ca35587Sdholland NFSRVCACHE_UDPTIMEOUT;
5376ca35587Sdholland nfsrc_unlock(rp);
5386ca35587Sdholland } else {
5396ca35587Sdholland rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
5406ca35587Sdholland if (rp->rc_refcnt > 0)
5416ca35587Sdholland nfsrc_unlock(rp);
5426ca35587Sdholland else
5436ca35587Sdholland retrp = rp;
5446ca35587Sdholland }
5456ca35587Sdholland } else {
5466ca35587Sdholland nfsrc_freecache(rp);
5476ca35587Sdholland mtx_unlock(mutex);
5486ca35587Sdholland }
5496ca35587Sdholland
5506ca35587Sdholland out:
5516ca35587Sdholland NFSEXITCODE2(0, nd);
5526ca35587Sdholland return (retrp);
5536ca35587Sdholland }
5546ca35587Sdholland
5556ca35587Sdholland /*
5566ca35587Sdholland * Invalidate and, if possible, free an in prog cache entry.
5576ca35587Sdholland * Must not sleep.
5586ca35587Sdholland */
5596ca35587Sdholland APPLESTATIC void
nfsrvd_delcache(struct nfsrvcache * rp)5606ca35587Sdholland nfsrvd_delcache(struct nfsrvcache *rp)
5616ca35587Sdholland {
5626ca35587Sdholland struct mtx *mutex;
5636ca35587Sdholland
5646ca35587Sdholland mutex = nfsrc_cachemutex(rp);
5656ca35587Sdholland if (!(rp->rc_flag & RC_INPROG))
5666ca35587Sdholland panic("nfsrvd_delcache not in prog");
5676ca35587Sdholland mtx_lock(mutex);
5686ca35587Sdholland rp->rc_flag &= ~RC_INPROG;
5696ca35587Sdholland if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
5706ca35587Sdholland nfsrc_freecache(rp);
5716ca35587Sdholland mtx_unlock(mutex);
5726ca35587Sdholland }
5736ca35587Sdholland
5746ca35587Sdholland /*
5756ca35587Sdholland * Called after nfsrvd_updatecache() once the reply is sent, to update
5762d39560cSpgoyette * the entry's sequence number and unlock it. The argument is
5776ca35587Sdholland * the pointer returned by nfsrvd_updatecache().
5786ca35587Sdholland */
5796ca35587Sdholland APPLESTATIC void
nfsrvd_sentcache(struct nfsrvcache * rp,int have_seq,uint32_t seq)5802d39560cSpgoyette nfsrvd_sentcache(struct nfsrvcache *rp, int have_seq, uint32_t seq)
5816ca35587Sdholland {
5822d39560cSpgoyette struct nfsrchash_bucket *hbp;
5836ca35587Sdholland
5842d39560cSpgoyette KASSERT(rp->rc_flag & RC_LOCKED, ("nfsrvd_sentcache not locked"));
5852d39560cSpgoyette if (have_seq) {
5862d39560cSpgoyette hbp = NFSRCAHASH(rp->rc_sockref);
5872d39560cSpgoyette mtx_lock(&hbp->mtx);
5882d39560cSpgoyette rp->rc_tcpseq = seq;
5892d39560cSpgoyette if (rp->rc_acked != RC_NO_ACK)
5902d39560cSpgoyette LIST_INSERT_HEAD(&hbp->tbl, rp, rc_ahash);
5912d39560cSpgoyette rp->rc_acked = RC_NO_ACK;
5922d39560cSpgoyette mtx_unlock(&hbp->mtx);
5936ca35587Sdholland }
5946ca35587Sdholland nfsrc_unlock(rp);
5956ca35587Sdholland }
5966ca35587Sdholland
5976ca35587Sdholland /*
5986ca35587Sdholland * Get a cache entry for TCP
5996ca35587Sdholland * - key on <xid, nfs version>
6006ca35587Sdholland * (allow multiple entries for a given key)
6016ca35587Sdholland */
6026ca35587Sdholland static int
nfsrc_gettcp(struct nfsrv_descript * nd,struct nfsrvcache * newrp)6036ca35587Sdholland nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp)
6046ca35587Sdholland {
6056ca35587Sdholland struct nfsrvcache *rp, *nextrp;
6066ca35587Sdholland int i;
6076ca35587Sdholland struct nfsrvcache *hitrp;
6086ca35587Sdholland struct nfsrvhashhead *hp, nfsrc_templist;
6096ca35587Sdholland int hit, ret = 0;
6106ca35587Sdholland struct mtx *mutex;
6116ca35587Sdholland
6126ca35587Sdholland mutex = nfsrc_cachemutex(newrp);
6136ca35587Sdholland hp = NFSRCHASH(newrp->rc_xid);
6146ca35587Sdholland newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
6156ca35587Sdholland tryagain:
6166ca35587Sdholland mtx_lock(mutex);
6176ca35587Sdholland hit = 1;
6186ca35587Sdholland LIST_INIT(&nfsrc_templist);
6196ca35587Sdholland /*
6206ca35587Sdholland * Get all the matches and put them on the temp list.
6216ca35587Sdholland */
6226ca35587Sdholland rp = LIST_FIRST(hp);
6239d605174Schristos while (rp != NULL) {
6246ca35587Sdholland nextrp = LIST_NEXT(rp, rc_hash);
6256ca35587Sdholland if (newrp->rc_xid == rp->rc_xid &&
6266ca35587Sdholland (!(rp->rc_flag & RC_INPROG) ||
6276ca35587Sdholland ((newrp->rc_flag & RC_SAMETCPCONN) &&
6286ca35587Sdholland newrp->rc_sockref == rp->rc_sockref)) &&
6296ca35587Sdholland (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) &&
6306ca35587Sdholland newrp->rc_proc == rp->rc_proc &&
6316ca35587Sdholland ((newrp->rc_flag & RC_NFSV4) &&
6326ca35587Sdholland newrp->rc_sockref != rp->rc_sockref &&
6336ca35587Sdholland newrp->rc_cachetime >= rp->rc_cachetime)
6346ca35587Sdholland && newrp->rc_reqlen == rp->rc_reqlen &&
6356ca35587Sdholland newrp->rc_cksum == rp->rc_cksum) {
6366ca35587Sdholland LIST_REMOVE(rp, rc_hash);
6376ca35587Sdholland LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash);
6386ca35587Sdholland }
6396ca35587Sdholland rp = nextrp;
6406ca35587Sdholland }
6416ca35587Sdholland
6426ca35587Sdholland /*
6436ca35587Sdholland * Now, use nfsrc_templist to decide if there is a match.
6446ca35587Sdholland */
6456ca35587Sdholland i = 0;
6466ca35587Sdholland LIST_FOREACH(rp, &nfsrc_templist, rc_hash) {
6476ca35587Sdholland i++;
6486ca35587Sdholland if (rp->rc_refcnt > 0) {
6496ca35587Sdholland hit = 0;
6506ca35587Sdholland break;
6516ca35587Sdholland }
6526ca35587Sdholland }
6536ca35587Sdholland /*
6546ca35587Sdholland * Can be a hit only if one entry left.
6556ca35587Sdholland * Note possible hit entry and put nfsrc_templist back on hash
6566ca35587Sdholland * list.
6576ca35587Sdholland */
6586ca35587Sdholland if (i != 1)
6596ca35587Sdholland hit = 0;
6606ca35587Sdholland hitrp = rp = LIST_FIRST(&nfsrc_templist);
6619d605174Schristos while (rp != NULL) {
6626ca35587Sdholland nextrp = LIST_NEXT(rp, rc_hash);
6636ca35587Sdholland LIST_REMOVE(rp, rc_hash);
6646ca35587Sdholland LIST_INSERT_HEAD(hp, rp, rc_hash);
6656ca35587Sdholland rp = nextrp;
6666ca35587Sdholland }
6679d605174Schristos if (LIST_FIRST(&nfsrc_templist) != NULL)
6686ca35587Sdholland panic("nfs gettcp cache templist");
6696ca35587Sdholland
6706ca35587Sdholland if (hit) {
6716ca35587Sdholland rp = hitrp;
6726ca35587Sdholland if ((rp->rc_flag & RC_LOCKED) != 0) {
6736ca35587Sdholland rp->rc_flag |= RC_WANTED;
6746ca35587Sdholland (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
6756ca35587Sdholland "nfsrc", 10 * hz);
6766ca35587Sdholland goto tryagain;
6776ca35587Sdholland }
6786ca35587Sdholland if (rp->rc_flag == 0)
6796ca35587Sdholland panic("nfs tcp cache0");
6806ca35587Sdholland rp->rc_flag |= RC_LOCKED;
6816ca35587Sdholland if (rp->rc_flag & RC_INPROG) {
6822d39560cSpgoyette nfsstatsv1.srvcache_inproghits++;
6836ca35587Sdholland mtx_unlock(mutex);
6846ca35587Sdholland if (newrp->rc_sockref == rp->rc_sockref)
6856ca35587Sdholland nfsrc_marksametcpconn(rp->rc_sockref);
6866ca35587Sdholland ret = RC_DROPIT;
6876ca35587Sdholland } else if (rp->rc_flag & RC_REPSTATUS) {
6886ca35587Sdholland /*
6896ca35587Sdholland * V2 only.
6906ca35587Sdholland */
6912d39560cSpgoyette nfsstatsv1.srvcache_nonidemdonehits++;
6926ca35587Sdholland mtx_unlock(mutex);
6936ca35587Sdholland if (newrp->rc_sockref == rp->rc_sockref)
6946ca35587Sdholland nfsrc_marksametcpconn(rp->rc_sockref);
6956ca35587Sdholland ret = RC_REPLY;
6966ca35587Sdholland nfsrvd_rephead(nd);
6976ca35587Sdholland *(nd->nd_errp) = rp->rc_status;
6986ca35587Sdholland rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
6996ca35587Sdholland } else if (rp->rc_flag & RC_REPMBUF) {
7002d39560cSpgoyette nfsstatsv1.srvcache_nonidemdonehits++;
7016ca35587Sdholland mtx_unlock(mutex);
7026ca35587Sdholland if (newrp->rc_sockref == rp->rc_sockref)
7036ca35587Sdholland nfsrc_marksametcpconn(rp->rc_sockref);
7046ca35587Sdholland ret = RC_REPLY;
7056ca35587Sdholland nd->nd_mreq = m_copym(rp->rc_reply, 0,
7066ca35587Sdholland M_COPYALL, M_WAITOK);
7076ca35587Sdholland rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
7086ca35587Sdholland } else {
7096ca35587Sdholland panic("nfs tcp cache1");
7106ca35587Sdholland }
7116ca35587Sdholland nfsrc_unlock(rp);
7126ca35587Sdholland free((caddr_t)newrp, M_NFSRVCACHE);
7136ca35587Sdholland goto out;
7146ca35587Sdholland }
7152d39560cSpgoyette nfsstatsv1.srvcache_misses++;
7162d39560cSpgoyette atomic_add_int(&nfsstatsv1.srvcache_size, 1);
7176ca35587Sdholland
7186ca35587Sdholland /*
7196ca35587Sdholland * For TCP, multiple entries for a key are allowed, so don't
7206ca35587Sdholland * chain it into the hash table until done.
7216ca35587Sdholland */
7226ca35587Sdholland newrp->rc_cachetime = NFSD_MONOSEC;
7236ca35587Sdholland newrp->rc_flag |= RC_INPROG;
7246ca35587Sdholland LIST_INSERT_HEAD(hp, newrp, rc_hash);
7256ca35587Sdholland mtx_unlock(mutex);
7266ca35587Sdholland nd->nd_rp = newrp;
7276ca35587Sdholland ret = RC_DOIT;
7286ca35587Sdholland
7296ca35587Sdholland out:
7306ca35587Sdholland NFSEXITCODE2(0, nd);
7316ca35587Sdholland return (ret);
7326ca35587Sdholland }
7336ca35587Sdholland
7346ca35587Sdholland /*
7356ca35587Sdholland * Lock a cache entry.
7366ca35587Sdholland */
7376ca35587Sdholland static void
nfsrc_lock(struct nfsrvcache * rp)7386ca35587Sdholland nfsrc_lock(struct nfsrvcache *rp)
7396ca35587Sdholland {
7406ca35587Sdholland struct mtx *mutex;
7416ca35587Sdholland
7426ca35587Sdholland mutex = nfsrc_cachemutex(rp);
7436ca35587Sdholland mtx_assert(mutex, MA_OWNED);
7446ca35587Sdholland while ((rp->rc_flag & RC_LOCKED) != 0) {
7456ca35587Sdholland rp->rc_flag |= RC_WANTED;
7466ca35587Sdholland (void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
7476ca35587Sdholland }
7486ca35587Sdholland rp->rc_flag |= RC_LOCKED;
7496ca35587Sdholland }
7506ca35587Sdholland
7516ca35587Sdholland /*
7526ca35587Sdholland * Unlock a cache entry.
7536ca35587Sdholland */
7546ca35587Sdholland static void
nfsrc_unlock(struct nfsrvcache * rp)7556ca35587Sdholland nfsrc_unlock(struct nfsrvcache *rp)
7566ca35587Sdholland {
7576ca35587Sdholland struct mtx *mutex;
7586ca35587Sdholland
7596ca35587Sdholland mutex = nfsrc_cachemutex(rp);
7606ca35587Sdholland mtx_lock(mutex);
7616ca35587Sdholland rp->rc_flag &= ~RC_LOCKED;
7626ca35587Sdholland nfsrc_wanted(rp);
7636ca35587Sdholland mtx_unlock(mutex);
7646ca35587Sdholland }
7656ca35587Sdholland
7666ca35587Sdholland /*
7676ca35587Sdholland * Wakeup anyone wanting entry.
7686ca35587Sdholland */
7696ca35587Sdholland static void
nfsrc_wanted(struct nfsrvcache * rp)7706ca35587Sdholland nfsrc_wanted(struct nfsrvcache *rp)
7716ca35587Sdholland {
7726ca35587Sdholland if (rp->rc_flag & RC_WANTED) {
7736ca35587Sdholland rp->rc_flag &= ~RC_WANTED;
7746ca35587Sdholland wakeup((caddr_t)rp);
7756ca35587Sdholland }
7766ca35587Sdholland }
7776ca35587Sdholland
7786ca35587Sdholland /*
7796ca35587Sdholland * Free up the entry.
7806ca35587Sdholland * Must not sleep.
7816ca35587Sdholland */
7826ca35587Sdholland static void
nfsrc_freecache(struct nfsrvcache * rp)7836ca35587Sdholland nfsrc_freecache(struct nfsrvcache *rp)
7846ca35587Sdholland {
7852d39560cSpgoyette struct nfsrchash_bucket *hbp;
7866ca35587Sdholland
7876ca35587Sdholland LIST_REMOVE(rp, rc_hash);
7886ca35587Sdholland if (rp->rc_flag & RC_UDP) {
7896ca35587Sdholland TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
7906ca35587Sdholland nfsrc_udpcachesize--;
7912d39560cSpgoyette } else if (rp->rc_acked != RC_NO_SEQ) {
7922d39560cSpgoyette hbp = NFSRCAHASH(rp->rc_sockref);
7932d39560cSpgoyette mtx_lock(&hbp->mtx);
7942d39560cSpgoyette if (rp->rc_acked == RC_NO_ACK)
7952d39560cSpgoyette LIST_REMOVE(rp, rc_ahash);
7962d39560cSpgoyette mtx_unlock(&hbp->mtx);
7976ca35587Sdholland }
7986ca35587Sdholland nfsrc_wanted(rp);
7996ca35587Sdholland if (rp->rc_flag & RC_REPMBUF) {
8006ca35587Sdholland mbuf_freem(rp->rc_reply);
8016ca35587Sdholland if (!(rp->rc_flag & RC_UDP))
8026ca35587Sdholland atomic_add_int(&nfsrc_tcpsavedreplies, -1);
8036ca35587Sdholland }
8046ca35587Sdholland FREE((caddr_t)rp, M_NFSRVCACHE);
8052d39560cSpgoyette atomic_add_int(&nfsstatsv1.srvcache_size, -1);
8066ca35587Sdholland }
8076ca35587Sdholland
8086ca35587Sdholland /*
8096ca35587Sdholland * Clean out the cache. Called when nfsserver module is unloaded.
8106ca35587Sdholland */
8116ca35587Sdholland APPLESTATIC void
nfsrvd_cleancache(void)8126ca35587Sdholland nfsrvd_cleancache(void)
8136ca35587Sdholland {
8146ca35587Sdholland struct nfsrvcache *rp, *nextrp;
8156ca35587Sdholland int i;
8166ca35587Sdholland
8176ca35587Sdholland for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
8186ca35587Sdholland mtx_lock(&nfsrchash_table[i].mtx);
8196ca35587Sdholland LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp)
8206ca35587Sdholland nfsrc_freecache(rp);
8216ca35587Sdholland mtx_unlock(&nfsrchash_table[i].mtx);
8226ca35587Sdholland }
8236ca35587Sdholland mtx_lock(&nfsrc_udpmtx);
8246ca35587Sdholland for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
8256ca35587Sdholland LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) {
8266ca35587Sdholland nfsrc_freecache(rp);
8276ca35587Sdholland }
8286ca35587Sdholland }
8292d39560cSpgoyette nfsstatsv1.srvcache_size = 0;
8306ca35587Sdholland mtx_unlock(&nfsrc_udpmtx);
8316ca35587Sdholland nfsrc_tcpsavedreplies = 0;
8326ca35587Sdholland }
8336ca35587Sdholland
8342d39560cSpgoyette #define HISTSIZE 16
8356ca35587Sdholland /*
8366ca35587Sdholland * The basic rule is to get rid of entries that are expired.
8376ca35587Sdholland */
8382d39560cSpgoyette void
nfsrc_trimcache(u_int64_t sockref,uint32_t snd_una,int final)8392d39560cSpgoyette nfsrc_trimcache(u_int64_t sockref, uint32_t snd_una, int final)
8406ca35587Sdholland {
8412d39560cSpgoyette struct nfsrchash_bucket *hbp;
8426ca35587Sdholland struct nfsrvcache *rp, *nextrp;
8432d39560cSpgoyette int force, lastslot, i, j, k, tto, time_histo[HISTSIZE];
8446ca35587Sdholland time_t thisstamp;
8456ca35587Sdholland static time_t udp_lasttrim = 0, tcp_lasttrim = 0;
8462d39560cSpgoyette static int onethread = 0, oneslot = 0;
8472d39560cSpgoyette
8482d39560cSpgoyette if (sockref != 0) {
8492d39560cSpgoyette hbp = NFSRCAHASH(sockref);
8502d39560cSpgoyette mtx_lock(&hbp->mtx);
8512d39560cSpgoyette LIST_FOREACH_SAFE(rp, &hbp->tbl, rc_ahash, nextrp) {
8522d39560cSpgoyette if (sockref == rp->rc_sockref) {
8532d39560cSpgoyette if (SEQ_GEQ(snd_una, rp->rc_tcpseq)) {
8542d39560cSpgoyette rp->rc_acked = RC_ACK;
8552d39560cSpgoyette LIST_REMOVE(rp, rc_ahash);
8562d39560cSpgoyette } else if (final) {
8572d39560cSpgoyette rp->rc_acked = RC_NACK;
8582d39560cSpgoyette LIST_REMOVE(rp, rc_ahash);
8592d39560cSpgoyette }
8602d39560cSpgoyette }
8612d39560cSpgoyette }
8622d39560cSpgoyette mtx_unlock(&hbp->mtx);
8632d39560cSpgoyette }
8646ca35587Sdholland
8656ca35587Sdholland if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0)
8666ca35587Sdholland return;
8676ca35587Sdholland if (NFSD_MONOSEC != udp_lasttrim ||
8686ca35587Sdholland nfsrc_udpcachesize >= (nfsrc_udphighwater +
8696ca35587Sdholland nfsrc_udphighwater / 2)) {
8706ca35587Sdholland mtx_lock(&nfsrc_udpmtx);
8716ca35587Sdholland udp_lasttrim = NFSD_MONOSEC;
8726ca35587Sdholland TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
8736ca35587Sdholland if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
8746ca35587Sdholland && rp->rc_refcnt == 0
8756ca35587Sdholland && ((rp->rc_flag & RC_REFCNT) ||
8766ca35587Sdholland udp_lasttrim > rp->rc_timestamp ||
8776ca35587Sdholland nfsrc_udpcachesize > nfsrc_udphighwater))
8786ca35587Sdholland nfsrc_freecache(rp);
8796ca35587Sdholland }
8806ca35587Sdholland mtx_unlock(&nfsrc_udpmtx);
8816ca35587Sdholland }
8826ca35587Sdholland if (NFSD_MONOSEC != tcp_lasttrim ||
8836ca35587Sdholland nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) {
8842d39560cSpgoyette force = nfsrc_tcphighwater / 4;
8852d39560cSpgoyette if (force > 0 &&
8862d39560cSpgoyette nfsrc_tcpsavedreplies + force >= nfsrc_tcphighwater) {
8872d39560cSpgoyette for (i = 0; i < HISTSIZE; i++)
8886ca35587Sdholland time_histo[i] = 0;
8892d39560cSpgoyette i = 0;
8902d39560cSpgoyette lastslot = NFSRVCACHE_HASHSIZE - 1;
8912d39560cSpgoyette } else {
8922d39560cSpgoyette force = 0;
8932d39560cSpgoyette if (NFSD_MONOSEC != tcp_lasttrim) {
8942d39560cSpgoyette i = 0;
8952d39560cSpgoyette lastslot = NFSRVCACHE_HASHSIZE - 1;
8962d39560cSpgoyette } else {
8972d39560cSpgoyette lastslot = i = oneslot;
8982d39560cSpgoyette if (++oneslot >= NFSRVCACHE_HASHSIZE)
8992d39560cSpgoyette oneslot = 0;
9002d39560cSpgoyette }
9012d39560cSpgoyette }
9022d39560cSpgoyette tto = nfsrc_tcptimeout;
9036ca35587Sdholland tcp_lasttrim = NFSD_MONOSEC;
9042d39560cSpgoyette for (; i <= lastslot; i++) {
9052d39560cSpgoyette mtx_lock(&nfsrchash_table[i].mtx);
9066ca35587Sdholland LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash,
9076ca35587Sdholland nextrp) {
9086ca35587Sdholland if (!(rp->rc_flag &
9096ca35587Sdholland (RC_INPROG|RC_LOCKED|RC_WANTED))
9106ca35587Sdholland && rp->rc_refcnt == 0) {
9112d39560cSpgoyette if ((rp->rc_flag & RC_REFCNT) ||
9122d39560cSpgoyette tcp_lasttrim > rp->rc_timestamp ||
9132d39560cSpgoyette rp->rc_acked == RC_ACK) {
9142d39560cSpgoyette nfsrc_freecache(rp);
9152d39560cSpgoyette continue;
9162d39560cSpgoyette }
9172d39560cSpgoyette
9182d39560cSpgoyette if (force == 0)
9192d39560cSpgoyette continue;
9206ca35587Sdholland /*
9216ca35587Sdholland * The timestamps range from roughly the
9226ca35587Sdholland * present (tcp_lasttrim) to the present
9236ca35587Sdholland * + nfsrc_tcptimeout. Generate a simple
9246ca35587Sdholland * histogram of where the timeouts fall.
9256ca35587Sdholland */
9266ca35587Sdholland j = rp->rc_timestamp - tcp_lasttrim;
9272d39560cSpgoyette if (j >= tto)
9282d39560cSpgoyette j = HISTSIZE - 1;
9292d39560cSpgoyette else if (j < 0)
9306ca35587Sdholland j = 0;
9312d39560cSpgoyette else
9322d39560cSpgoyette j = j * HISTSIZE / tto;
9336ca35587Sdholland time_histo[j]++;
9346ca35587Sdholland }
9356ca35587Sdholland }
9366ca35587Sdholland mtx_unlock(&nfsrchash_table[i].mtx);
9376ca35587Sdholland }
9382d39560cSpgoyette if (force) {
9396ca35587Sdholland /*
9406ca35587Sdholland * Trim some more with a smaller timeout of as little
9416ca35587Sdholland * as 20% of nfsrc_tcptimeout to try and get below
9426ca35587Sdholland * 80% of the nfsrc_tcphighwater.
9436ca35587Sdholland */
9446ca35587Sdholland k = 0;
9452d39560cSpgoyette for (i = 0; i < (HISTSIZE - 2); i++) {
9466ca35587Sdholland k += time_histo[i];
9472d39560cSpgoyette if (k > force)
9486ca35587Sdholland break;
9496ca35587Sdholland }
9502d39560cSpgoyette k = tto * (i + 1) / HISTSIZE;
9516ca35587Sdholland if (k < 1)
9526ca35587Sdholland k = 1;
9536ca35587Sdholland thisstamp = tcp_lasttrim + k;
9546ca35587Sdholland for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
9556ca35587Sdholland mtx_lock(&nfsrchash_table[i].mtx);
9566ca35587Sdholland LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl,
9576ca35587Sdholland rc_hash, nextrp) {
9586ca35587Sdholland if (!(rp->rc_flag &
9596ca35587Sdholland (RC_INPROG|RC_LOCKED|RC_WANTED))
9606ca35587Sdholland && rp->rc_refcnt == 0
9616ca35587Sdholland && ((rp->rc_flag & RC_REFCNT) ||
9626ca35587Sdholland thisstamp > rp->rc_timestamp ||
9632d39560cSpgoyette rp->rc_acked == RC_ACK))
9646ca35587Sdholland nfsrc_freecache(rp);
9656ca35587Sdholland }
9666ca35587Sdholland mtx_unlock(&nfsrchash_table[i].mtx);
9676ca35587Sdholland }
9686ca35587Sdholland }
9696ca35587Sdholland }
9706ca35587Sdholland atomic_store_rel_int(&onethread, 0);
9716ca35587Sdholland }
9726ca35587Sdholland
9736ca35587Sdholland /*
9746ca35587Sdholland * Add a seqid# reference to the cache entry.
9756ca35587Sdholland */
9766ca35587Sdholland APPLESTATIC void
nfsrvd_refcache(struct nfsrvcache * rp)9776ca35587Sdholland nfsrvd_refcache(struct nfsrvcache *rp)
9786ca35587Sdholland {
9796ca35587Sdholland struct mtx *mutex;
9806ca35587Sdholland
9812d39560cSpgoyette if (rp == NULL)
9822d39560cSpgoyette /* For NFSv4.1, there is no cache entry. */
9832d39560cSpgoyette return;
9846ca35587Sdholland mutex = nfsrc_cachemutex(rp);
9856ca35587Sdholland mtx_lock(mutex);
9866ca35587Sdholland if (rp->rc_refcnt < 0)
9876ca35587Sdholland panic("nfs cache refcnt");
9886ca35587Sdholland rp->rc_refcnt++;
9896ca35587Sdholland mtx_unlock(mutex);
9906ca35587Sdholland }
9916ca35587Sdholland
9926ca35587Sdholland /*
9936ca35587Sdholland * Dereference a seqid# cache entry.
9946ca35587Sdholland */
9956ca35587Sdholland APPLESTATIC void
nfsrvd_derefcache(struct nfsrvcache * rp)9966ca35587Sdholland nfsrvd_derefcache(struct nfsrvcache *rp)
9976ca35587Sdholland {
9986ca35587Sdholland struct mtx *mutex;
9996ca35587Sdholland
10006ca35587Sdholland mutex = nfsrc_cachemutex(rp);
10016ca35587Sdholland mtx_lock(mutex);
10026ca35587Sdholland if (rp->rc_refcnt <= 0)
10036ca35587Sdholland panic("nfs cache derefcnt");
10046ca35587Sdholland rp->rc_refcnt--;
10056ca35587Sdholland if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
10066ca35587Sdholland nfsrc_freecache(rp);
10076ca35587Sdholland mtx_unlock(mutex);
10086ca35587Sdholland }
10096ca35587Sdholland
10106ca35587Sdholland /*
10116ca35587Sdholland * Calculate the length of the mbuf list and a checksum on the first up to
10126ca35587Sdholland * NFSRVCACHE_CHECKLEN bytes.
10136ca35587Sdholland */
10146ca35587Sdholland static int
nfsrc_getlenandcksum(mbuf_t m1,u_int16_t * cksum)10156ca35587Sdholland nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum)
10166ca35587Sdholland {
10176ca35587Sdholland int len = 0, cklen;
10186ca35587Sdholland mbuf_t m;
10196ca35587Sdholland
10206ca35587Sdholland m = m1;
10216ca35587Sdholland while (m) {
10226ca35587Sdholland len += mbuf_len(m);
10236ca35587Sdholland m = mbuf_next(m);
10246ca35587Sdholland }
10256ca35587Sdholland cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len;
10266ca35587Sdholland *cksum = in_cksum(m1, cklen);
10276ca35587Sdholland return (len);
10286ca35587Sdholland }
10296ca35587Sdholland
10306ca35587Sdholland /*
10316ca35587Sdholland * Mark a TCP connection that is seeing retries. Should never happen for
10326ca35587Sdholland * NFSv4.
10336ca35587Sdholland */
10346ca35587Sdholland static void
nfsrc_marksametcpconn(u_int64_t sockref)10356ca35587Sdholland nfsrc_marksametcpconn(u_int64_t sockref)
10366ca35587Sdholland {
10376ca35587Sdholland }
10386ca35587Sdholland
1039