1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27*0Sstevel@tonic-gate /* All Rights Reserved */ 28*0Sstevel@tonic-gate /* 29*0Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 30*0Sstevel@tonic-gate * 4.3 BSD under license from the Regents of the University of 31*0Sstevel@tonic-gate * California. 32*0Sstevel@tonic-gate */ 33*0Sstevel@tonic-gate 34*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 35*0Sstevel@tonic-gate 36*0Sstevel@tonic-gate /* 37*0Sstevel@tonic-gate * Server side of RPC over RDMA in the kernel. 38*0Sstevel@tonic-gate */ 39*0Sstevel@tonic-gate 40*0Sstevel@tonic-gate #include <sys/param.h> 41*0Sstevel@tonic-gate #include <sys/types.h> 42*0Sstevel@tonic-gate #include <sys/user.h> 43*0Sstevel@tonic-gate #include <sys/sysmacros.h> 44*0Sstevel@tonic-gate #include <sys/proc.h> 45*0Sstevel@tonic-gate #include <sys/file.h> 46*0Sstevel@tonic-gate #include <sys/errno.h> 47*0Sstevel@tonic-gate #include <sys/kmem.h> 48*0Sstevel@tonic-gate #include <sys/debug.h> 49*0Sstevel@tonic-gate #include <sys/systm.h> 50*0Sstevel@tonic-gate #include <sys/cmn_err.h> 51*0Sstevel@tonic-gate #include <sys/kstat.h> 52*0Sstevel@tonic-gate #include <sys/vtrace.h> 53*0Sstevel@tonic-gate #include <sys/debug.h> 54*0Sstevel@tonic-gate 55*0Sstevel@tonic-gate #include <rpc/types.h> 56*0Sstevel@tonic-gate #include <rpc/xdr.h> 57*0Sstevel@tonic-gate #include <rpc/auth.h> 58*0Sstevel@tonic-gate #include <rpc/clnt.h> 59*0Sstevel@tonic-gate #include <rpc/rpc_msg.h> 60*0Sstevel@tonic-gate #include <rpc/svc.h> 61*0Sstevel@tonic-gate #include <rpc/rpc_rdma.h> 62*0Sstevel@tonic-gate #include <sys/ddi.h> 63*0Sstevel@tonic-gate #include <sys/sunddi.h> 64*0Sstevel@tonic-gate 65*0Sstevel@tonic-gate #include <inet/common.h> 66*0Sstevel@tonic-gate #include <inet/ip.h> 67*0Sstevel@tonic-gate #include <inet/ip6.h> 68*0Sstevel@tonic-gate 69*0Sstevel@tonic-gate /* 70*0Sstevel@tonic-gate * RDMA transport specific data associated with SVCMASTERXPRT 71*0Sstevel@tonic-gate */ 72*0Sstevel@tonic-gate struct rdma_data { 73*0Sstevel@tonic-gate SVCMASTERXPRT *rd_xprt; /* back ptr to SVCMASTERXPRT */ 74*0Sstevel@tonic-gate struct rdma_svc_data rd_data; /* rdma data */ 75*0Sstevel@tonic-gate rdma_mod_t *r_mod; /* RDMA module containing ops ptr */ 76*0Sstevel@tonic-gate }; 77*0Sstevel@tonic-gate 78*0Sstevel@tonic-gate /* 79*0Sstevel@tonic-gate * Plugin connection specific data stashed away in clone SVCXPRT 80*0Sstevel@tonic-gate */ 81*0Sstevel@tonic-gate struct clone_rdma_data { 82*0Sstevel@tonic-gate CONN *conn; /* RDMA connection */ 83*0Sstevel@tonic-gate rdma_buf_t rpcbuf; /* RPC req/resp buffer */ 84*0Sstevel@tonic-gate }; 85*0Sstevel@tonic-gate 86*0Sstevel@tonic-gate #ifdef DEBUG 87*0Sstevel@tonic-gate int rdma_svc_debug = 0; 88*0Sstevel@tonic-gate #endif 89*0Sstevel@tonic-gate 90*0Sstevel@tonic-gate #define MAXADDRLEN 128 /* max length for address mask */ 91*0Sstevel@tonic-gate 92*0Sstevel@tonic-gate /* 93*0Sstevel@tonic-gate * Routines exported through ops vector. 94*0Sstevel@tonic-gate */ 95*0Sstevel@tonic-gate static bool_t svc_rdma_krecv(SVCXPRT *, mblk_t *, struct rpc_msg *); 96*0Sstevel@tonic-gate static bool_t svc_rdma_ksend(SVCXPRT *, struct rpc_msg *); 97*0Sstevel@tonic-gate static bool_t svc_rdma_kgetargs(SVCXPRT *, xdrproc_t, caddr_t); 98*0Sstevel@tonic-gate static bool_t svc_rdma_kfreeargs(SVCXPRT *, xdrproc_t, caddr_t); 99*0Sstevel@tonic-gate void svc_rdma_kdestroy(SVCMASTERXPRT *); 100*0Sstevel@tonic-gate static int svc_rdma_kdup(struct svc_req *, caddr_t, int, 101*0Sstevel@tonic-gate struct dupreq **, bool_t *); 102*0Sstevel@tonic-gate static void svc_rdma_kdupdone(struct dupreq *, caddr_t, 103*0Sstevel@tonic-gate void (*)(), int, int); 104*0Sstevel@tonic-gate static int32_t *svc_rdma_kgetres(SVCXPRT *, int); 105*0Sstevel@tonic-gate static void svc_rdma_kfreeres(SVCXPRT *); 106*0Sstevel@tonic-gate static void svc_rdma_kclone_destroy(SVCXPRT *); 107*0Sstevel@tonic-gate static void svc_rdma_kstart(SVCMASTERXPRT *); 108*0Sstevel@tonic-gate void svc_rdma_kstop(SVCMASTERXPRT *); 109*0Sstevel@tonic-gate 110*0Sstevel@tonic-gate /* 111*0Sstevel@tonic-gate * Server transport operations vector. 112*0Sstevel@tonic-gate */ 113*0Sstevel@tonic-gate struct svc_ops rdma_svc_ops = { 114*0Sstevel@tonic-gate svc_rdma_krecv, /* Get requests */ 115*0Sstevel@tonic-gate svc_rdma_kgetargs, /* Deserialize arguments */ 116*0Sstevel@tonic-gate svc_rdma_ksend, /* Send reply */ 117*0Sstevel@tonic-gate svc_rdma_kfreeargs, /* Free argument data space */ 118*0Sstevel@tonic-gate svc_rdma_kdestroy, /* Destroy transport handle */ 119*0Sstevel@tonic-gate svc_rdma_kdup, /* Check entry in dup req cache */ 120*0Sstevel@tonic-gate svc_rdma_kdupdone, /* Mark entry in dup req cache as done */ 121*0Sstevel@tonic-gate svc_rdma_kgetres, /* Get pointer to response buffer */ 122*0Sstevel@tonic-gate svc_rdma_kfreeres, /* Destroy pre-serialized response header */ 123*0Sstevel@tonic-gate svc_rdma_kclone_destroy, /* Destroy a clone xprt */ 124*0Sstevel@tonic-gate svc_rdma_kstart /* Tell `ready-to-receive' to rpcmod */ 125*0Sstevel@tonic-gate }; 126*0Sstevel@tonic-gate 127*0Sstevel@tonic-gate /* 128*0Sstevel@tonic-gate * Server statistics 129*0Sstevel@tonic-gate * NOTE: This structure type is duplicated in the NFS fast path. 130*0Sstevel@tonic-gate */ 131*0Sstevel@tonic-gate struct { 132*0Sstevel@tonic-gate kstat_named_t rscalls; 133*0Sstevel@tonic-gate kstat_named_t rsbadcalls; 134*0Sstevel@tonic-gate kstat_named_t rsnullrecv; 135*0Sstevel@tonic-gate kstat_named_t rsbadlen; 136*0Sstevel@tonic-gate kstat_named_t rsxdrcall; 137*0Sstevel@tonic-gate kstat_named_t rsdupchecks; 138*0Sstevel@tonic-gate kstat_named_t rsdupreqs; 139*0Sstevel@tonic-gate kstat_named_t rslongrpcs; 140*0Sstevel@tonic-gate } rdmarsstat = { 141*0Sstevel@tonic-gate { "calls", KSTAT_DATA_UINT64 }, 142*0Sstevel@tonic-gate { "badcalls", KSTAT_DATA_UINT64 }, 143*0Sstevel@tonic-gate { "nullrecv", KSTAT_DATA_UINT64 }, 144*0Sstevel@tonic-gate { "badlen", KSTAT_DATA_UINT64 }, 145*0Sstevel@tonic-gate { "xdrcall", KSTAT_DATA_UINT64 }, 146*0Sstevel@tonic-gate { "dupchecks", KSTAT_DATA_UINT64 }, 147*0Sstevel@tonic-gate { "dupreqs", KSTAT_DATA_UINT64 }, 148*0Sstevel@tonic-gate { "longrpcs", KSTAT_DATA_UINT64 } 149*0Sstevel@tonic-gate }; 150*0Sstevel@tonic-gate 151*0Sstevel@tonic-gate kstat_named_t *rdmarsstat_ptr = (kstat_named_t *)&rdmarsstat; 152*0Sstevel@tonic-gate uint_t rdmarsstat_ndata = sizeof (rdmarsstat) / sizeof (kstat_named_t); 153*0Sstevel@tonic-gate 154*0Sstevel@tonic-gate #define RSSTAT_INCR(x) rdmarsstat.x.value.ui64++ 155*0Sstevel@tonic-gate 156*0Sstevel@tonic-gate /* 157*0Sstevel@tonic-gate * Create a transport record. 158*0Sstevel@tonic-gate * The transport record, output buffer, and private data structure 159*0Sstevel@tonic-gate * are allocated. The output buffer is serialized into using xdrmem. 160*0Sstevel@tonic-gate * There is one transport record per user process which implements a 161*0Sstevel@tonic-gate * set of services. 162*0Sstevel@tonic-gate */ 163*0Sstevel@tonic-gate /* ARGSUSED */ 164*0Sstevel@tonic-gate int 165*0Sstevel@tonic-gate svc_rdma_kcreate(char *netid, SVC_CALLOUT_TABLE *sct, int id, 166*0Sstevel@tonic-gate rdma_xprt_group_t *started_xprts) 167*0Sstevel@tonic-gate { 168*0Sstevel@tonic-gate int error; 169*0Sstevel@tonic-gate SVCMASTERXPRT *xprt; 170*0Sstevel@tonic-gate struct rdma_data *rd; 171*0Sstevel@tonic-gate rdma_registry_t *rmod; 172*0Sstevel@tonic-gate rdma_xprt_record_t *xprt_rec; 173*0Sstevel@tonic-gate queue_t *q; 174*0Sstevel@tonic-gate 175*0Sstevel@tonic-gate /* 176*0Sstevel@tonic-gate * modload the RDMA plugins is not already done. 177*0Sstevel@tonic-gate */ 178*0Sstevel@tonic-gate if (!rdma_modloaded) { 179*0Sstevel@tonic-gate mutex_enter(&rdma_modload_lock); 180*0Sstevel@tonic-gate if (!rdma_modloaded) { 181*0Sstevel@tonic-gate error = rdma_modload(); 182*0Sstevel@tonic-gate } 183*0Sstevel@tonic-gate mutex_exit(&rdma_modload_lock); 184*0Sstevel@tonic-gate 185*0Sstevel@tonic-gate if (error) 186*0Sstevel@tonic-gate return (error); 187*0Sstevel@tonic-gate } 188*0Sstevel@tonic-gate 189*0Sstevel@tonic-gate /* 190*0Sstevel@tonic-gate * master_xprt_count is the count of master transport handles 191*0Sstevel@tonic-gate * that were successfully created and are ready to recieve for 192*0Sstevel@tonic-gate * RDMA based access. 193*0Sstevel@tonic-gate */ 194*0Sstevel@tonic-gate error = 0; 195*0Sstevel@tonic-gate xprt_rec = NULL; 196*0Sstevel@tonic-gate rw_enter(&rdma_lock, RW_READER); 197*0Sstevel@tonic-gate if (rdma_mod_head == NULL) { 198*0Sstevel@tonic-gate started_xprts->rtg_count = 0; 199*0Sstevel@tonic-gate rw_exit(&rdma_lock); 200*0Sstevel@tonic-gate if (rdma_dev_available) 201*0Sstevel@tonic-gate return (EPROTONOSUPPORT); 202*0Sstevel@tonic-gate else 203*0Sstevel@tonic-gate return (ENODEV); 204*0Sstevel@tonic-gate } 205*0Sstevel@tonic-gate 206*0Sstevel@tonic-gate /* 207*0Sstevel@tonic-gate * If we have reached here, then atleast one RDMA plugin has loaded. 208*0Sstevel@tonic-gate * Create a master_xprt, make it start listenining on the device, 209*0Sstevel@tonic-gate * if an error is generated, record it, we might need to shut 210*0Sstevel@tonic-gate * the master_xprt. 211*0Sstevel@tonic-gate * SVC_START() calls svc_rdma_kstart which calls plugin binding 212*0Sstevel@tonic-gate * routines. 213*0Sstevel@tonic-gate */ 214*0Sstevel@tonic-gate for (rmod = rdma_mod_head; rmod != NULL; rmod = rmod->r_next) { 215*0Sstevel@tonic-gate 216*0Sstevel@tonic-gate /* 217*0Sstevel@tonic-gate * One SVCMASTERXPRT per RDMA plugin. 218*0Sstevel@tonic-gate */ 219*0Sstevel@tonic-gate xprt = kmem_zalloc(sizeof (*xprt), KM_SLEEP); 220*0Sstevel@tonic-gate xprt->xp_ops = &rdma_svc_ops; 221*0Sstevel@tonic-gate xprt->xp_sct = sct; 222*0Sstevel@tonic-gate xprt->xp_type = T_RDMA; 223*0Sstevel@tonic-gate mutex_init(&xprt->xp_req_lock, NULL, MUTEX_DEFAULT, NULL); 224*0Sstevel@tonic-gate mutex_init(&xprt->xp_thread_lock, NULL, MUTEX_DEFAULT, NULL); 225*0Sstevel@tonic-gate xprt->xp_req_head = (mblk_t *)0; 226*0Sstevel@tonic-gate xprt->xp_req_tail = (mblk_t *)0; 227*0Sstevel@tonic-gate xprt->xp_threads = 0; 228*0Sstevel@tonic-gate xprt->xp_detached_threads = 0; 229*0Sstevel@tonic-gate 230*0Sstevel@tonic-gate rd = kmem_zalloc(sizeof (*rd), KM_SLEEP); 231*0Sstevel@tonic-gate xprt->xp_p2 = (caddr_t)rd; 232*0Sstevel@tonic-gate rd->rd_xprt = xprt; 233*0Sstevel@tonic-gate rd->r_mod = rmod->r_mod; 234*0Sstevel@tonic-gate 235*0Sstevel@tonic-gate q = &rd->rd_data.q; 236*0Sstevel@tonic-gate xprt->xp_wq = q; 237*0Sstevel@tonic-gate q->q_ptr = &rd->rd_xprt; 238*0Sstevel@tonic-gate xprt->xp_netid = NULL; 239*0Sstevel@tonic-gate 240*0Sstevel@tonic-gate if (netid != NULL) { 241*0Sstevel@tonic-gate xprt->xp_netid = kmem_alloc(strlen(netid) + 1, 242*0Sstevel@tonic-gate KM_SLEEP); 243*0Sstevel@tonic-gate (void) strcpy(xprt->xp_netid, netid); 244*0Sstevel@tonic-gate } 245*0Sstevel@tonic-gate 246*0Sstevel@tonic-gate xprt->xp_addrmask.maxlen = 247*0Sstevel@tonic-gate xprt->xp_addrmask.len = sizeof (struct sockaddr_in); 248*0Sstevel@tonic-gate xprt->xp_addrmask.buf = 249*0Sstevel@tonic-gate kmem_zalloc(xprt->xp_addrmask.len, KM_SLEEP); 250*0Sstevel@tonic-gate ((struct sockaddr_in *)xprt->xp_addrmask.buf)->sin_addr.s_addr = 251*0Sstevel@tonic-gate (uint32_t)~0; 252*0Sstevel@tonic-gate ((struct sockaddr_in *)xprt->xp_addrmask.buf)->sin_family = 253*0Sstevel@tonic-gate (ushort_t)~0; 254*0Sstevel@tonic-gate 255*0Sstevel@tonic-gate /* 256*0Sstevel@tonic-gate * Each of the plugins will have their own Service ID 257*0Sstevel@tonic-gate * to listener specific mapping, like port number for VI 258*0Sstevel@tonic-gate * and service name for IB. 259*0Sstevel@tonic-gate */ 260*0Sstevel@tonic-gate rd->rd_data.svcid = id; 261*0Sstevel@tonic-gate error = svc_xprt_register(xprt, id); 262*0Sstevel@tonic-gate if (error) { 263*0Sstevel@tonic-gate cmn_err(CE_WARN, "svc_rdma_kcreate: svc_xprt_register" 264*0Sstevel@tonic-gate "failed"); 265*0Sstevel@tonic-gate goto cleanup; 266*0Sstevel@tonic-gate } 267*0Sstevel@tonic-gate 268*0Sstevel@tonic-gate SVC_START(xprt); 269*0Sstevel@tonic-gate if (!rd->rd_data.active) { 270*0Sstevel@tonic-gate svc_xprt_unregister(xprt); 271*0Sstevel@tonic-gate error = rd->rd_data.err_code; 272*0Sstevel@tonic-gate goto cleanup; 273*0Sstevel@tonic-gate } 274*0Sstevel@tonic-gate 275*0Sstevel@tonic-gate /* 276*0Sstevel@tonic-gate * This is set only when there is atleast one or more 277*0Sstevel@tonic-gate * transports successfully created. We insert the pointer 278*0Sstevel@tonic-gate * to the created RDMA master xprt into a separately maintained 279*0Sstevel@tonic-gate * list. This way we can easily reference it later to cleanup, 280*0Sstevel@tonic-gate * when NFS kRPC service pool is going away/unregistered. 281*0Sstevel@tonic-gate */ 282*0Sstevel@tonic-gate started_xprts->rtg_count ++; 283*0Sstevel@tonic-gate xprt_rec = kmem_alloc(sizeof (*xprt_rec), KM_SLEEP); 284*0Sstevel@tonic-gate xprt_rec->rtr_xprt_ptr = xprt; 285*0Sstevel@tonic-gate xprt_rec->rtr_next = started_xprts->rtg_listhead; 286*0Sstevel@tonic-gate started_xprts->rtg_listhead = xprt_rec; 287*0Sstevel@tonic-gate continue; 288*0Sstevel@tonic-gate cleanup: 289*0Sstevel@tonic-gate SVC_DESTROY(xprt); 290*0Sstevel@tonic-gate if (error == RDMA_FAILED) 291*0Sstevel@tonic-gate error = EPROTONOSUPPORT; 292*0Sstevel@tonic-gate } 293*0Sstevel@tonic-gate 294*0Sstevel@tonic-gate rw_exit(&rdma_lock); 295*0Sstevel@tonic-gate 296*0Sstevel@tonic-gate /* 297*0Sstevel@tonic-gate * Don't return any error even if a single plugin was started 298*0Sstevel@tonic-gate * successfully. 299*0Sstevel@tonic-gate */ 300*0Sstevel@tonic-gate if (started_xprts->rtg_count == 0) 301*0Sstevel@tonic-gate return (error); 302*0Sstevel@tonic-gate return (0); 303*0Sstevel@tonic-gate } 304*0Sstevel@tonic-gate 305*0Sstevel@tonic-gate /* 306*0Sstevel@tonic-gate * Cleanup routine for freeing up memory allocated by 307*0Sstevel@tonic-gate * svc_rdma_kcreate() 308*0Sstevel@tonic-gate */ 309*0Sstevel@tonic-gate void 310*0Sstevel@tonic-gate svc_rdma_kdestroy(SVCMASTERXPRT *xprt) 311*0Sstevel@tonic-gate { 312*0Sstevel@tonic-gate struct rdma_data *rd = (struct rdma_data *)xprt->xp_p2; 313*0Sstevel@tonic-gate 314*0Sstevel@tonic-gate 315*0Sstevel@tonic-gate mutex_destroy(&xprt->xp_req_lock); 316*0Sstevel@tonic-gate mutex_destroy(&xprt->xp_thread_lock); 317*0Sstevel@tonic-gate kmem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1); 318*0Sstevel@tonic-gate kmem_free(rd, sizeof (*rd)); 319*0Sstevel@tonic-gate kmem_free(xprt->xp_addrmask.buf, xprt->xp_addrmask.maxlen); 320*0Sstevel@tonic-gate kmem_free(xprt, sizeof (*xprt)); 321*0Sstevel@tonic-gate } 322*0Sstevel@tonic-gate 323*0Sstevel@tonic-gate 324*0Sstevel@tonic-gate static void 325*0Sstevel@tonic-gate svc_rdma_kstart(SVCMASTERXPRT *xprt) 326*0Sstevel@tonic-gate { 327*0Sstevel@tonic-gate struct rdma_svc_data *svcdata; 328*0Sstevel@tonic-gate rdma_mod_t *rmod; 329*0Sstevel@tonic-gate 330*0Sstevel@tonic-gate svcdata = &((struct rdma_data *)xprt->xp_p2)->rd_data; 331*0Sstevel@tonic-gate rmod = ((struct rdma_data *)xprt->xp_p2)->r_mod; 332*0Sstevel@tonic-gate 333*0Sstevel@tonic-gate /* 334*0Sstevel@tonic-gate * Create a listener for module at this port 335*0Sstevel@tonic-gate */ 336*0Sstevel@tonic-gate 337*0Sstevel@tonic-gate (*rmod->rdma_ops->rdma_svc_listen)(svcdata); 338*0Sstevel@tonic-gate } 339*0Sstevel@tonic-gate 340*0Sstevel@tonic-gate void 341*0Sstevel@tonic-gate svc_rdma_kstop(SVCMASTERXPRT *xprt) 342*0Sstevel@tonic-gate { 343*0Sstevel@tonic-gate struct rdma_svc_data *svcdata; 344*0Sstevel@tonic-gate rdma_mod_t *rmod; 345*0Sstevel@tonic-gate 346*0Sstevel@tonic-gate svcdata = &((struct rdma_data *)xprt->xp_p2)->rd_data; 347*0Sstevel@tonic-gate rmod = ((struct rdma_data *)xprt->xp_p2)->r_mod; 348*0Sstevel@tonic-gate 349*0Sstevel@tonic-gate /* 350*0Sstevel@tonic-gate * Call the stop listener routine for each plugin. 351*0Sstevel@tonic-gate */ 352*0Sstevel@tonic-gate (*rmod->rdma_ops->rdma_svc_stop)(svcdata); 353*0Sstevel@tonic-gate if (svcdata->active) 354*0Sstevel@tonic-gate cmn_err(CE_WARN, "rdma_stop: Failed to shutdown RDMA based kRPC" 355*0Sstevel@tonic-gate " listener"); 356*0Sstevel@tonic-gate } 357*0Sstevel@tonic-gate 358*0Sstevel@tonic-gate /* ARGSUSED */ 359*0Sstevel@tonic-gate static void 360*0Sstevel@tonic-gate svc_rdma_kclone_destroy(SVCXPRT *clone_xprt) 361*0Sstevel@tonic-gate { 362*0Sstevel@tonic-gate } 363*0Sstevel@tonic-gate 364*0Sstevel@tonic-gate static bool_t 365*0Sstevel@tonic-gate svc_rdma_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg) 366*0Sstevel@tonic-gate { 367*0Sstevel@tonic-gate XDR *xdrs; 368*0Sstevel@tonic-gate rdma_stat status; 369*0Sstevel@tonic-gate struct recv_data *rdp = (struct recv_data *)mp->b_rptr; 370*0Sstevel@tonic-gate CONN *conn; 371*0Sstevel@tonic-gate struct clone_rdma_data *vd; 372*0Sstevel@tonic-gate struct clist *cl; 373*0Sstevel@tonic-gate uint_t vers, op, pos; 374*0Sstevel@tonic-gate uint32_t xid; 375*0Sstevel@tonic-gate 376*0Sstevel@tonic-gate vd = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 377*0Sstevel@tonic-gate RSSTAT_INCR(rscalls); 378*0Sstevel@tonic-gate conn = rdp->conn; 379*0Sstevel@tonic-gate 380*0Sstevel@tonic-gate /* 381*0Sstevel@tonic-gate * Post a receive descriptor on this 382*0Sstevel@tonic-gate * endpoint to ensure all packets are received. 383*0Sstevel@tonic-gate */ 384*0Sstevel@tonic-gate status = rdma_svc_postrecv(conn); 385*0Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 386*0Sstevel@tonic-gate cmn_err(CE_NOTE, 387*0Sstevel@tonic-gate "svc_rdma_krecv: rdma_svc_postrecv failed %d", status); 388*0Sstevel@tonic-gate } 389*0Sstevel@tonic-gate 390*0Sstevel@tonic-gate if (rdp->status != 0) { 391*0Sstevel@tonic-gate RDMA_BUF_FREE(conn, &rdp->rpcmsg); 392*0Sstevel@tonic-gate RDMA_REL_CONN(conn); 393*0Sstevel@tonic-gate RSSTAT_INCR(rsbadcalls); 394*0Sstevel@tonic-gate freeb(mp); 395*0Sstevel@tonic-gate return (FALSE); 396*0Sstevel@tonic-gate } 397*0Sstevel@tonic-gate 398*0Sstevel@tonic-gate /* 399*0Sstevel@tonic-gate * Decode rpc message 400*0Sstevel@tonic-gate */ 401*0Sstevel@tonic-gate xdrs = &clone_xprt->xp_xdrin; 402*0Sstevel@tonic-gate xdrmem_create(xdrs, rdp->rpcmsg.addr, rdp->rpcmsg.len, XDR_DECODE); 403*0Sstevel@tonic-gate 404*0Sstevel@tonic-gate /* 405*0Sstevel@tonic-gate * Get the XID 406*0Sstevel@tonic-gate */ 407*0Sstevel@tonic-gate /* 408*0Sstevel@tonic-gate * Treat xid as opaque (xid is the first entity 409*0Sstevel@tonic-gate * in the rpc rdma message). 410*0Sstevel@tonic-gate */ 411*0Sstevel@tonic-gate xid = *(uint32_t *)rdp->rpcmsg.addr; 412*0Sstevel@tonic-gate /* Skip xid and set the xdr position accordingly. */ 413*0Sstevel@tonic-gate XDR_SETPOS(xdrs, sizeof (uint32_t)); 414*0Sstevel@tonic-gate if (! xdr_u_int(xdrs, &vers) || 415*0Sstevel@tonic-gate ! xdr_u_int(xdrs, &op)) { 416*0Sstevel@tonic-gate cmn_err(CE_WARN, "svc_rdma_krecv: xdr_u_int failed"); 417*0Sstevel@tonic-gate XDR_DESTROY(xdrs); 418*0Sstevel@tonic-gate RDMA_BUF_FREE(conn, &rdp->rpcmsg); 419*0Sstevel@tonic-gate RDMA_REL_CONN(conn); 420*0Sstevel@tonic-gate freeb(mp); 421*0Sstevel@tonic-gate RSSTAT_INCR(rsbadcalls); 422*0Sstevel@tonic-gate return (FALSE); 423*0Sstevel@tonic-gate } 424*0Sstevel@tonic-gate if (op == RDMA_DONE) { 425*0Sstevel@tonic-gate /* 426*0Sstevel@tonic-gate * Should not get RDMA_DONE 427*0Sstevel@tonic-gate */ 428*0Sstevel@tonic-gate freeb(mp); 429*0Sstevel@tonic-gate XDR_DESTROY(xdrs); 430*0Sstevel@tonic-gate RDMA_BUF_FREE(conn, &rdp->rpcmsg); 431*0Sstevel@tonic-gate RDMA_REL_CONN(conn); 432*0Sstevel@tonic-gate RSSTAT_INCR(rsbadcalls); 433*0Sstevel@tonic-gate return (FALSE); /* no response */ 434*0Sstevel@tonic-gate } 435*0Sstevel@tonic-gate 436*0Sstevel@tonic-gate #ifdef DEBUG 437*0Sstevel@tonic-gate if (rdma_svc_debug) 438*0Sstevel@tonic-gate printf("svc_rdma_krecv: recv'd call xid %u\n", xid); 439*0Sstevel@tonic-gate #endif 440*0Sstevel@tonic-gate /* 441*0Sstevel@tonic-gate * Now decode the chunk list 442*0Sstevel@tonic-gate */ 443*0Sstevel@tonic-gate cl = NULL; 444*0Sstevel@tonic-gate if (! xdr_do_clist(xdrs, &cl)) { 445*0Sstevel@tonic-gate cmn_err(CE_WARN, "svc_rdma_krecv: xdr_do_clist failed"); 446*0Sstevel@tonic-gate } 447*0Sstevel@tonic-gate 448*0Sstevel@tonic-gate /* 449*0Sstevel@tonic-gate * A chunk at 0 offset indicates that the RPC call message 450*0Sstevel@tonic-gate * is in a chunk. Get the RPC call message chunk. 451*0Sstevel@tonic-gate */ 452*0Sstevel@tonic-gate if (cl != NULL && op == RDMA_NOMSG) { 453*0Sstevel@tonic-gate struct clist *cllong; /* Long RPC chunk */ 454*0Sstevel@tonic-gate 455*0Sstevel@tonic-gate /* Remove RPC call message chunk from chunklist */ 456*0Sstevel@tonic-gate cllong = cl; 457*0Sstevel@tonic-gate cl = cl->c_next; 458*0Sstevel@tonic-gate cllong->c_next = NULL; 459*0Sstevel@tonic-gate 460*0Sstevel@tonic-gate /* Allocate and register memory for the RPC call msg chunk */ 461*0Sstevel@tonic-gate cllong->c_daddr = (uint64)(uintptr_t) 462*0Sstevel@tonic-gate kmem_alloc(cllong->c_len, KM_SLEEP); 463*0Sstevel@tonic-gate if (cllong->c_daddr == NULL) { 464*0Sstevel@tonic-gate cmn_err(CE_WARN, 465*0Sstevel@tonic-gate "svc_rdma_krecv: no memory for rpc call"); 466*0Sstevel@tonic-gate XDR_DESTROY(xdrs); 467*0Sstevel@tonic-gate RDMA_BUF_FREE(conn, &rdp->rpcmsg); 468*0Sstevel@tonic-gate RDMA_REL_CONN(conn); 469*0Sstevel@tonic-gate freeb(mp); 470*0Sstevel@tonic-gate RSSTAT_INCR(rsbadcalls); 471*0Sstevel@tonic-gate clist_free(cl); 472*0Sstevel@tonic-gate clist_free(cllong); 473*0Sstevel@tonic-gate return (FALSE); 474*0Sstevel@tonic-gate } 475*0Sstevel@tonic-gate status = clist_register(conn, cllong, 0); 476*0Sstevel@tonic-gate if (status) { 477*0Sstevel@tonic-gate cmn_err(CE_WARN, 478*0Sstevel@tonic-gate "svc_rdma_krecv: clist_register failed"); 479*0Sstevel@tonic-gate kmem_free((void *)(uintptr_t)cllong->c_daddr, 480*0Sstevel@tonic-gate cllong->c_len); 481*0Sstevel@tonic-gate XDR_DESTROY(xdrs); 482*0Sstevel@tonic-gate RDMA_BUF_FREE(conn, &rdp->rpcmsg); 483*0Sstevel@tonic-gate RDMA_REL_CONN(conn); 484*0Sstevel@tonic-gate freeb(mp); 485*0Sstevel@tonic-gate RSSTAT_INCR(rsbadcalls); 486*0Sstevel@tonic-gate clist_free(cl); 487*0Sstevel@tonic-gate clist_free(cllong); 488*0Sstevel@tonic-gate return (FALSE); 489*0Sstevel@tonic-gate } 490*0Sstevel@tonic-gate 491*0Sstevel@tonic-gate /* 492*0Sstevel@tonic-gate * Now read the RPC call message in 493*0Sstevel@tonic-gate */ 494*0Sstevel@tonic-gate status = RDMA_READ(conn, cllong, WAIT); 495*0Sstevel@tonic-gate if (status) { 496*0Sstevel@tonic-gate cmn_err(CE_WARN, 497*0Sstevel@tonic-gate "svc_rdma_krecv: rdma_read failed %d", status); 498*0Sstevel@tonic-gate (void) clist_deregister(conn, cllong, 0); 499*0Sstevel@tonic-gate kmem_free((void *)(uintptr_t)cllong->c_daddr, 500*0Sstevel@tonic-gate cllong->c_len); 501*0Sstevel@tonic-gate XDR_DESTROY(xdrs); 502*0Sstevel@tonic-gate RDMA_BUF_FREE(conn, &rdp->rpcmsg); 503*0Sstevel@tonic-gate RDMA_REL_CONN(conn); 504*0Sstevel@tonic-gate freeb(mp); 505*0Sstevel@tonic-gate RSSTAT_INCR(rsbadcalls); 506*0Sstevel@tonic-gate clist_free(cl); 507*0Sstevel@tonic-gate clist_free(cllong); 508*0Sstevel@tonic-gate return (FALSE); 509*0Sstevel@tonic-gate } 510*0Sstevel@tonic-gate /* 511*0Sstevel@tonic-gate * Sync memory for CPU after DMA 512*0Sstevel@tonic-gate */ 513*0Sstevel@tonic-gate status = clist_syncmem(conn, cllong, 0); 514*0Sstevel@tonic-gate 515*0Sstevel@tonic-gate /* 516*0Sstevel@tonic-gate * Deregister the chunk 517*0Sstevel@tonic-gate */ 518*0Sstevel@tonic-gate (void) clist_deregister(conn, cllong, 0); 519*0Sstevel@tonic-gate 520*0Sstevel@tonic-gate /* 521*0Sstevel@tonic-gate * Setup the XDR for the RPC call message 522*0Sstevel@tonic-gate */ 523*0Sstevel@tonic-gate xdrrdma_create(xdrs, (caddr_t)(uintptr_t)cllong->c_daddr, 524*0Sstevel@tonic-gate cllong->c_len, 0, cl, XDR_DECODE, conn); 525*0Sstevel@tonic-gate vd->rpcbuf.type = CHUNK_BUFFER; 526*0Sstevel@tonic-gate vd->rpcbuf.addr = (caddr_t)(uintptr_t)cllong->c_daddr; 527*0Sstevel@tonic-gate vd->rpcbuf.len = cllong->c_len; 528*0Sstevel@tonic-gate vd->rpcbuf.handle.mrc_rmr = 0; 529*0Sstevel@tonic-gate 530*0Sstevel@tonic-gate /* 531*0Sstevel@tonic-gate * Free the chunk element with the Long RPC details and 532*0Sstevel@tonic-gate * the message received. 533*0Sstevel@tonic-gate */ 534*0Sstevel@tonic-gate clist_free(cllong); 535*0Sstevel@tonic-gate RDMA_BUF_FREE(conn, &rdp->rpcmsg); 536*0Sstevel@tonic-gate } else { 537*0Sstevel@tonic-gate pos = XDR_GETPOS(xdrs); 538*0Sstevel@tonic-gate 539*0Sstevel@tonic-gate /* 540*0Sstevel@tonic-gate * Now the RPC call message header 541*0Sstevel@tonic-gate */ 542*0Sstevel@tonic-gate xdrrdma_create(xdrs, rdp->rpcmsg.addr + pos, 543*0Sstevel@tonic-gate rdp->rpcmsg.len - pos, 0, cl, XDR_DECODE, conn); 544*0Sstevel@tonic-gate vd->rpcbuf = rdp->rpcmsg; 545*0Sstevel@tonic-gate } 546*0Sstevel@tonic-gate if (! xdr_callmsg(xdrs, msg)) { 547*0Sstevel@tonic-gate cmn_err(CE_WARN, "svc_rdma_krecv: xdr_callmsg failed"); 548*0Sstevel@tonic-gate if (cl != NULL) 549*0Sstevel@tonic-gate clist_free(cl); 550*0Sstevel@tonic-gate XDR_DESTROY(xdrs); 551*0Sstevel@tonic-gate rdma_buf_free(conn, &vd->rpcbuf); 552*0Sstevel@tonic-gate RDMA_REL_CONN(conn); 553*0Sstevel@tonic-gate freeb(mp); 554*0Sstevel@tonic-gate RSSTAT_INCR(rsxdrcall); 555*0Sstevel@tonic-gate RSSTAT_INCR(rsbadcalls); 556*0Sstevel@tonic-gate return (FALSE); 557*0Sstevel@tonic-gate } 558*0Sstevel@tonic-gate 559*0Sstevel@tonic-gate /* 560*0Sstevel@tonic-gate * Point the remote transport address in the service_transport 561*0Sstevel@tonic-gate * handle at the address in the request. 562*0Sstevel@tonic-gate */ 563*0Sstevel@tonic-gate clone_xprt->xp_rtaddr.buf = conn->c_raddr.buf; 564*0Sstevel@tonic-gate clone_xprt->xp_rtaddr.len = conn->c_raddr.len; 565*0Sstevel@tonic-gate clone_xprt->xp_rtaddr.maxlen = conn->c_raddr.len; 566*0Sstevel@tonic-gate 567*0Sstevel@tonic-gate #ifdef DEBUG 568*0Sstevel@tonic-gate if (rdma_svc_debug) { 569*0Sstevel@tonic-gate struct sockaddr_in *sin4; 570*0Sstevel@tonic-gate char print_addr[INET_ADDRSTRLEN]; 571*0Sstevel@tonic-gate 572*0Sstevel@tonic-gate sin4 = (struct sockaddr_in *)clone_xprt->xp_rtaddr.buf; 573*0Sstevel@tonic-gate bzero(print_addr, INET_ADDRSTRLEN); 574*0Sstevel@tonic-gate (void) inet_ntop(AF_INET, 575*0Sstevel@tonic-gate &sin4->sin_addr, print_addr, INET_ADDRSTRLEN); 576*0Sstevel@tonic-gate cmn_err(CE_NOTE, 577*0Sstevel@tonic-gate "svc_rdma_krecv: remote clnt_addr: %s", print_addr); 578*0Sstevel@tonic-gate } 579*0Sstevel@tonic-gate #endif 580*0Sstevel@tonic-gate 581*0Sstevel@tonic-gate clone_xprt->xp_xid = xid; 582*0Sstevel@tonic-gate vd->conn = conn; 583*0Sstevel@tonic-gate freeb(mp); 584*0Sstevel@tonic-gate return (TRUE); 585*0Sstevel@tonic-gate } 586*0Sstevel@tonic-gate 587*0Sstevel@tonic-gate /* 588*0Sstevel@tonic-gate * Send rpc reply. 589*0Sstevel@tonic-gate */ 590*0Sstevel@tonic-gate static bool_t 591*0Sstevel@tonic-gate svc_rdma_ksend(SVCXPRT *clone_xprt, struct rpc_msg *msg) 592*0Sstevel@tonic-gate { 593*0Sstevel@tonic-gate struct clone_rdma_data *vd; 594*0Sstevel@tonic-gate XDR *xdrs = &(clone_xprt->xp_xdrout), rxdrs; 595*0Sstevel@tonic-gate int retval = FALSE; 596*0Sstevel@tonic-gate xdrproc_t xdr_results; 597*0Sstevel@tonic-gate caddr_t xdr_location; 598*0Sstevel@tonic-gate bool_t has_args, reg = FALSE; 599*0Sstevel@tonic-gate uint_t len, op; 600*0Sstevel@tonic-gate uint_t vers; 601*0Sstevel@tonic-gate struct clist *cl = NULL, *cle = NULL; 602*0Sstevel@tonic-gate struct clist *sendlist = NULL; 603*0Sstevel@tonic-gate int status; 604*0Sstevel@tonic-gate int msglen; 605*0Sstevel@tonic-gate rdma_buf_t clmsg, longreply, rpcreply; 606*0Sstevel@tonic-gate 607*0Sstevel@tonic-gate vd = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 608*0Sstevel@tonic-gate 609*0Sstevel@tonic-gate /* 610*0Sstevel@tonic-gate * If there is a result procedure specified in the reply message, 611*0Sstevel@tonic-gate * it will be processed in the xdr_replymsg and SVCAUTH_WRAP. 612*0Sstevel@tonic-gate * We need to make sure it won't be processed twice, so we null 613*0Sstevel@tonic-gate * it for xdr_replymsg here. 614*0Sstevel@tonic-gate */ 615*0Sstevel@tonic-gate has_args = FALSE; 616*0Sstevel@tonic-gate if (msg->rm_reply.rp_stat == MSG_ACCEPTED && 617*0Sstevel@tonic-gate msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { 618*0Sstevel@tonic-gate if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) { 619*0Sstevel@tonic-gate has_args = TRUE; 620*0Sstevel@tonic-gate xdr_location = msg->acpted_rply.ar_results.where; 621*0Sstevel@tonic-gate msg->acpted_rply.ar_results.proc = xdr_void; 622*0Sstevel@tonic-gate msg->acpted_rply.ar_results.where = NULL; 623*0Sstevel@tonic-gate } 624*0Sstevel@tonic-gate } 625*0Sstevel@tonic-gate 626*0Sstevel@tonic-gate /* 627*0Sstevel@tonic-gate * Get the size of the rpc reply message. Need this 628*0Sstevel@tonic-gate * to determine if the rpc reply message will fit in 629*0Sstevel@tonic-gate * the pre-allocated RDMA buffers. If the rpc reply 630*0Sstevel@tonic-gate * message length is greater that the pre-allocated 631*0Sstevel@tonic-gate * buffers then, a one time use buffer is allocated 632*0Sstevel@tonic-gate * and registered for this rpc reply. 633*0Sstevel@tonic-gate */ 634*0Sstevel@tonic-gate msglen = xdr_sizeof(xdr_replymsg, msg); 635*0Sstevel@tonic-gate if (has_args && msg->rm_reply.rp_acpt.ar_verf.oa_flavor != RPCSEC_GSS) { 636*0Sstevel@tonic-gate msglen += xdrrdma_sizeof(xdr_results, xdr_location, 637*0Sstevel@tonic-gate rdma_minchunk); 638*0Sstevel@tonic-gate if (msglen > RPC_MSG_SZ) { 639*0Sstevel@tonic-gate 640*0Sstevel@tonic-gate /* 641*0Sstevel@tonic-gate * Allocate chunk buffer for rpc reply 642*0Sstevel@tonic-gate */ 643*0Sstevel@tonic-gate rpcreply.type = CHUNK_BUFFER; 644*0Sstevel@tonic-gate rpcreply.addr = kmem_zalloc(msglen, KM_SLEEP); 645*0Sstevel@tonic-gate cle = kmem_zalloc(sizeof (*cle), KM_SLEEP); 646*0Sstevel@tonic-gate cle->c_xdroff = 0; 647*0Sstevel@tonic-gate cle->c_len = rpcreply.len = msglen; 648*0Sstevel@tonic-gate cle->c_saddr = (uint64)(uintptr_t)rpcreply.addr; 649*0Sstevel@tonic-gate cle->c_next = NULL; 650*0Sstevel@tonic-gate xdrrdma_create(xdrs, rpcreply.addr, msglen, 651*0Sstevel@tonic-gate rdma_minchunk, cle, XDR_ENCODE, NULL); 652*0Sstevel@tonic-gate op = RDMA_NOMSG; 653*0Sstevel@tonic-gate } else { 654*0Sstevel@tonic-gate /* 655*0Sstevel@tonic-gate * Get a pre-allocated buffer for rpc reply 656*0Sstevel@tonic-gate */ 657*0Sstevel@tonic-gate rpcreply.type = SEND_BUFFER; 658*0Sstevel@tonic-gate if (RDMA_BUF_ALLOC(vd->conn, &rpcreply)) { 659*0Sstevel@tonic-gate cmn_err(CE_WARN, 660*0Sstevel@tonic-gate "svc_rdma_ksend: no free buffers!"); 661*0Sstevel@tonic-gate return (retval); 662*0Sstevel@tonic-gate } 663*0Sstevel@tonic-gate xdrrdma_create(xdrs, rpcreply.addr, rpcreply.len, 664*0Sstevel@tonic-gate rdma_minchunk, NULL, XDR_ENCODE, NULL); 665*0Sstevel@tonic-gate op = RDMA_MSG; 666*0Sstevel@tonic-gate } 667*0Sstevel@tonic-gate 668*0Sstevel@tonic-gate /* 669*0Sstevel@tonic-gate * Initialize the XDR encode stream. 670*0Sstevel@tonic-gate */ 671*0Sstevel@tonic-gate msg->rm_xid = clone_xprt->xp_xid; 672*0Sstevel@tonic-gate 673*0Sstevel@tonic-gate if (!(xdr_replymsg(xdrs, msg) && 674*0Sstevel@tonic-gate (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 675*0Sstevel@tonic-gate xdr_results, xdr_location)))) { 676*0Sstevel@tonic-gate rdma_buf_free(vd->conn, &rpcreply); 677*0Sstevel@tonic-gate if (cle) 678*0Sstevel@tonic-gate clist_free(cle); 679*0Sstevel@tonic-gate cmn_err(CE_WARN, 680*0Sstevel@tonic-gate "svc_rdma_ksend: xdr_replymsg/SVCAUTH_WRAP " 681*0Sstevel@tonic-gate "failed"); 682*0Sstevel@tonic-gate goto out; 683*0Sstevel@tonic-gate } 684*0Sstevel@tonic-gate len = XDR_GETPOS(xdrs); 685*0Sstevel@tonic-gate } 686*0Sstevel@tonic-gate if (has_args && msg->rm_reply.rp_acpt.ar_verf.oa_flavor == RPCSEC_GSS) { 687*0Sstevel@tonic-gate 688*0Sstevel@tonic-gate /* 689*0Sstevel@tonic-gate * For RPCSEC_GSS since we cannot accurately presize the 690*0Sstevel@tonic-gate * buffer required for encoding, we assume that its going 691*0Sstevel@tonic-gate * to be a Long RPC to start with. We also create the 692*0Sstevel@tonic-gate * the XDR stream with min_chunk set to 0 which instructs 693*0Sstevel@tonic-gate * the XDR layer to not chunk the incoming byte stream. 694*0Sstevel@tonic-gate */ 695*0Sstevel@tonic-gate msglen += 2 * MAX_AUTH_BYTES + 2 * sizeof (struct opaque_auth); 696*0Sstevel@tonic-gate msglen += xdr_sizeof(xdr_results, xdr_location); 697*0Sstevel@tonic-gate 698*0Sstevel@tonic-gate /* 699*0Sstevel@tonic-gate * Long RPC. Allocate one time use custom buffer. 700*0Sstevel@tonic-gate */ 701*0Sstevel@tonic-gate longreply.type = CHUNK_BUFFER; 702*0Sstevel@tonic-gate longreply.addr = kmem_zalloc(msglen, KM_SLEEP); 703*0Sstevel@tonic-gate cle = kmem_zalloc(sizeof (*cle), KM_SLEEP); 704*0Sstevel@tonic-gate cle->c_xdroff = 0; 705*0Sstevel@tonic-gate cle->c_len = longreply.len = msglen; 706*0Sstevel@tonic-gate cle->c_saddr = (uint64)(uintptr_t)longreply.addr; 707*0Sstevel@tonic-gate cle->c_next = NULL; 708*0Sstevel@tonic-gate xdrrdma_create(xdrs, longreply.addr, msglen, 0, cle, 709*0Sstevel@tonic-gate XDR_ENCODE, NULL); 710*0Sstevel@tonic-gate op = RDMA_NOMSG; 711*0Sstevel@tonic-gate /* 712*0Sstevel@tonic-gate * Initialize the XDR encode stream. 713*0Sstevel@tonic-gate */ 714*0Sstevel@tonic-gate msg->rm_xid = clone_xprt->xp_xid; 715*0Sstevel@tonic-gate 716*0Sstevel@tonic-gate if (!(xdr_replymsg(xdrs, msg) && 717*0Sstevel@tonic-gate (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, 718*0Sstevel@tonic-gate xdr_results, xdr_location)))) { 719*0Sstevel@tonic-gate if (longreply.addr != xdrs->x_base) { 720*0Sstevel@tonic-gate longreply.addr = xdrs->x_base; 721*0Sstevel@tonic-gate longreply.len = xdr_getbufsize(xdrs); 722*0Sstevel@tonic-gate } 723*0Sstevel@tonic-gate rdma_buf_free(vd->conn, &longreply); 724*0Sstevel@tonic-gate if (cle) 725*0Sstevel@tonic-gate clist_free(cle); 726*0Sstevel@tonic-gate cmn_err(CE_WARN, 727*0Sstevel@tonic-gate "svc_rdma_ksend: xdr_replymsg/SVCAUTH_WRAP " 728*0Sstevel@tonic-gate "failed"); 729*0Sstevel@tonic-gate goto out; 730*0Sstevel@tonic-gate } 731*0Sstevel@tonic-gate 732*0Sstevel@tonic-gate /* 733*0Sstevel@tonic-gate * If we had to allocate a new buffer while encoding 734*0Sstevel@tonic-gate * then update the addr and len. 735*0Sstevel@tonic-gate */ 736*0Sstevel@tonic-gate if (longreply.addr != xdrs->x_base) { 737*0Sstevel@tonic-gate longreply.addr = xdrs->x_base; 738*0Sstevel@tonic-gate longreply.len = xdr_getbufsize(xdrs); 739*0Sstevel@tonic-gate } 740*0Sstevel@tonic-gate 741*0Sstevel@tonic-gate len = XDR_GETPOS(xdrs); 742*0Sstevel@tonic-gate 743*0Sstevel@tonic-gate /* 744*0Sstevel@tonic-gate * If it so happens that the encoded message is after all 745*0Sstevel@tonic-gate * not long enough to be a Long RPC then allocate a 746*0Sstevel@tonic-gate * SEND_BUFFER and copy the encoded message into it. 747*0Sstevel@tonic-gate */ 748*0Sstevel@tonic-gate if (len > RPC_MSG_SZ) { 749*0Sstevel@tonic-gate rpcreply.type = CHUNK_BUFFER; 750*0Sstevel@tonic-gate rpcreply.addr = longreply.addr; 751*0Sstevel@tonic-gate rpcreply.len = longreply.len; 752*0Sstevel@tonic-gate } else { 753*0Sstevel@tonic-gate clist_free(cle); 754*0Sstevel@tonic-gate XDR_DESTROY(xdrs); 755*0Sstevel@tonic-gate /* 756*0Sstevel@tonic-gate * Get a pre-allocated buffer for rpc reply 757*0Sstevel@tonic-gate */ 758*0Sstevel@tonic-gate rpcreply.type = SEND_BUFFER; 759*0Sstevel@tonic-gate if (RDMA_BUF_ALLOC(vd->conn, &rpcreply)) { 760*0Sstevel@tonic-gate cmn_err(CE_WARN, 761*0Sstevel@tonic-gate "svc_rdma_ksend: no free buffers!"); 762*0Sstevel@tonic-gate rdma_buf_free(vd->conn, &longreply); 763*0Sstevel@tonic-gate return (retval); 764*0Sstevel@tonic-gate } 765*0Sstevel@tonic-gate bcopy(longreply.addr, rpcreply.addr, len); 766*0Sstevel@tonic-gate xdrrdma_create(xdrs, rpcreply.addr, len, 0, NULL, 767*0Sstevel@tonic-gate XDR_ENCODE, NULL); 768*0Sstevel@tonic-gate rdma_buf_free(vd->conn, &longreply); 769*0Sstevel@tonic-gate op = RDMA_MSG; 770*0Sstevel@tonic-gate } 771*0Sstevel@tonic-gate } 772*0Sstevel@tonic-gate 773*0Sstevel@tonic-gate if (has_args == FALSE) { 774*0Sstevel@tonic-gate 775*0Sstevel@tonic-gate if (msglen > RPC_MSG_SZ) { 776*0Sstevel@tonic-gate 777*0Sstevel@tonic-gate /* 778*0Sstevel@tonic-gate * Allocate chunk buffer for rpc reply 779*0Sstevel@tonic-gate */ 780*0Sstevel@tonic-gate rpcreply.type = CHUNK_BUFFER; 781*0Sstevel@tonic-gate rpcreply.addr = kmem_zalloc(msglen, KM_SLEEP); 782*0Sstevel@tonic-gate cle = kmem_zalloc(sizeof (*cle), KM_SLEEP); 783*0Sstevel@tonic-gate cle->c_xdroff = 0; 784*0Sstevel@tonic-gate cle->c_len = rpcreply.len = msglen; 785*0Sstevel@tonic-gate cle->c_saddr = (uint64)(uintptr_t)rpcreply.addr; 786*0Sstevel@tonic-gate cle->c_next = NULL; 787*0Sstevel@tonic-gate xdrrdma_create(xdrs, rpcreply.addr, msglen, 788*0Sstevel@tonic-gate rdma_minchunk, cle, XDR_ENCODE, NULL); 789*0Sstevel@tonic-gate op = RDMA_NOMSG; 790*0Sstevel@tonic-gate } else { 791*0Sstevel@tonic-gate /* 792*0Sstevel@tonic-gate * Get a pre-allocated buffer for rpc reply 793*0Sstevel@tonic-gate */ 794*0Sstevel@tonic-gate rpcreply.type = SEND_BUFFER; 795*0Sstevel@tonic-gate if (RDMA_BUF_ALLOC(vd->conn, &rpcreply)) { 796*0Sstevel@tonic-gate cmn_err(CE_WARN, 797*0Sstevel@tonic-gate "svc_rdma_ksend: no free buffers!"); 798*0Sstevel@tonic-gate return (retval); 799*0Sstevel@tonic-gate } 800*0Sstevel@tonic-gate xdrrdma_create(xdrs, rpcreply.addr, rpcreply.len, 801*0Sstevel@tonic-gate rdma_minchunk, NULL, XDR_ENCODE, NULL); 802*0Sstevel@tonic-gate op = RDMA_MSG; 803*0Sstevel@tonic-gate } 804*0Sstevel@tonic-gate 805*0Sstevel@tonic-gate /* 806*0Sstevel@tonic-gate * Initialize the XDR encode stream. 807*0Sstevel@tonic-gate */ 808*0Sstevel@tonic-gate msg->rm_xid = clone_xprt->xp_xid; 809*0Sstevel@tonic-gate 810*0Sstevel@tonic-gate if (!xdr_replymsg(xdrs, msg)) { 811*0Sstevel@tonic-gate rdma_buf_free(vd->conn, &rpcreply); 812*0Sstevel@tonic-gate if (cle) 813*0Sstevel@tonic-gate clist_free(cle); 814*0Sstevel@tonic-gate cmn_err(CE_WARN, 815*0Sstevel@tonic-gate "svc_rdma_ksend: xdr_replymsg/SVCAUTH_WRAP " 816*0Sstevel@tonic-gate "failed"); 817*0Sstevel@tonic-gate goto out; 818*0Sstevel@tonic-gate } 819*0Sstevel@tonic-gate len = XDR_GETPOS(xdrs); 820*0Sstevel@tonic-gate } 821*0Sstevel@tonic-gate 822*0Sstevel@tonic-gate /* 823*0Sstevel@tonic-gate * Get clist and a buffer for sending it across 824*0Sstevel@tonic-gate */ 825*0Sstevel@tonic-gate cl = xdrrdma_clist(xdrs); 826*0Sstevel@tonic-gate clmsg.type = SEND_BUFFER; 827*0Sstevel@tonic-gate if (RDMA_BUF_ALLOC(vd->conn, &clmsg)) { 828*0Sstevel@tonic-gate rdma_buf_free(vd->conn, &rpcreply); 829*0Sstevel@tonic-gate cmn_err(CE_WARN, "svc_rdma_ksend: no free buffers!!"); 830*0Sstevel@tonic-gate goto out; 831*0Sstevel@tonic-gate } 832*0Sstevel@tonic-gate 833*0Sstevel@tonic-gate /* 834*0Sstevel@tonic-gate * Now register the chunks in the list 835*0Sstevel@tonic-gate */ 836*0Sstevel@tonic-gate if (cl != NULL) { 837*0Sstevel@tonic-gate status = clist_register(vd->conn, cl, 1); 838*0Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 839*0Sstevel@tonic-gate rdma_buf_free(vd->conn, &clmsg); 840*0Sstevel@tonic-gate cmn_err(CE_WARN, 841*0Sstevel@tonic-gate "svc_rdma_ksend: clist register failed"); 842*0Sstevel@tonic-gate goto out; 843*0Sstevel@tonic-gate } 844*0Sstevel@tonic-gate reg = TRUE; 845*0Sstevel@tonic-gate } 846*0Sstevel@tonic-gate 847*0Sstevel@tonic-gate /* 848*0Sstevel@tonic-gate * XDR the XID, vers, and op 849*0Sstevel@tonic-gate */ 850*0Sstevel@tonic-gate /* 851*0Sstevel@tonic-gate * Treat xid as opaque (xid is the first entity 852*0Sstevel@tonic-gate * in the rpc rdma message). 853*0Sstevel@tonic-gate */ 854*0Sstevel@tonic-gate vers = RPCRDMA_VERS; 855*0Sstevel@tonic-gate xdrs = &rxdrs; 856*0Sstevel@tonic-gate xdrmem_create(xdrs, clmsg.addr, clmsg.len, XDR_ENCODE); 857*0Sstevel@tonic-gate (*(uint32_t *)clmsg.addr) = msg->rm_xid; 858*0Sstevel@tonic-gate /* Skip xid and set the xdr position accordingly. */ 859*0Sstevel@tonic-gate XDR_SETPOS(xdrs, sizeof (uint32_t)); 860*0Sstevel@tonic-gate if (! xdr_u_int(xdrs, &vers) || 861*0Sstevel@tonic-gate ! xdr_u_int(xdrs, &op)) { 862*0Sstevel@tonic-gate rdma_buf_free(vd->conn, &rpcreply); 863*0Sstevel@tonic-gate rdma_buf_free(vd->conn, &clmsg); 864*0Sstevel@tonic-gate cmn_err(CE_WARN, "svc_rdma_ksend: xdr_u_int failed"); 865*0Sstevel@tonic-gate goto out; 866*0Sstevel@tonic-gate } 867*0Sstevel@tonic-gate 868*0Sstevel@tonic-gate /* 869*0Sstevel@tonic-gate * Now XDR the chunk list 870*0Sstevel@tonic-gate */ 871*0Sstevel@tonic-gate (void) xdr_do_clist(xdrs, &cl); 872*0Sstevel@tonic-gate 873*0Sstevel@tonic-gate clist_add(&sendlist, 0, XDR_GETPOS(xdrs), &clmsg.handle, clmsg.addr, 874*0Sstevel@tonic-gate NULL, NULL); 875*0Sstevel@tonic-gate 876*0Sstevel@tonic-gate if (op == RDMA_MSG) { 877*0Sstevel@tonic-gate clist_add(&sendlist, 0, len, &rpcreply.handle, rpcreply.addr, 878*0Sstevel@tonic-gate NULL, NULL); 879*0Sstevel@tonic-gate } else { 880*0Sstevel@tonic-gate cl->c_len = len; 881*0Sstevel@tonic-gate RSSTAT_INCR(rslongrpcs); 882*0Sstevel@tonic-gate } 883*0Sstevel@tonic-gate 884*0Sstevel@tonic-gate /* 885*0Sstevel@tonic-gate * Send the reply message to the client 886*0Sstevel@tonic-gate */ 887*0Sstevel@tonic-gate if (cl != NULL) { 888*0Sstevel@tonic-gate status = clist_syncmem(vd->conn, cl, 1); 889*0Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 890*0Sstevel@tonic-gate rdma_buf_free(vd->conn, &rpcreply); 891*0Sstevel@tonic-gate rdma_buf_free(vd->conn, &clmsg); 892*0Sstevel@tonic-gate goto out; 893*0Sstevel@tonic-gate } 894*0Sstevel@tonic-gate #ifdef DEBUG 895*0Sstevel@tonic-gate if (rdma_svc_debug) 896*0Sstevel@tonic-gate printf("svc_rdma_ksend: chunk response len %d xid %u\n", 897*0Sstevel@tonic-gate cl->c_len, msg->rm_xid); 898*0Sstevel@tonic-gate #endif 899*0Sstevel@tonic-gate /* 900*0Sstevel@tonic-gate * Post a receive buffer because we expect a RDMA_DONE 901*0Sstevel@tonic-gate * message. 902*0Sstevel@tonic-gate */ 903*0Sstevel@tonic-gate status = rdma_svc_postrecv(vd->conn); 904*0Sstevel@tonic-gate 905*0Sstevel@tonic-gate /* 906*0Sstevel@tonic-gate * Send the RPC reply message and wait for RDMA_DONE 907*0Sstevel@tonic-gate */ 908*0Sstevel@tonic-gate status = RDMA_SEND_RESP(vd->conn, sendlist, msg->rm_xid); 909*0Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 910*0Sstevel@tonic-gate #ifdef DEBUG 911*0Sstevel@tonic-gate if (rdma_svc_debug) 912*0Sstevel@tonic-gate cmn_err(CE_NOTE, "svc_rdma_ksend: " 913*0Sstevel@tonic-gate "rdma_send_resp failed %d", status); 914*0Sstevel@tonic-gate #endif 915*0Sstevel@tonic-gate goto out; 916*0Sstevel@tonic-gate } 917*0Sstevel@tonic-gate #ifdef DEBUG 918*0Sstevel@tonic-gate if (rdma_svc_debug) 919*0Sstevel@tonic-gate printf("svc_rdma_ksend: got RDMA_DONE xid %u\n", msg->rm_xid); 920*0Sstevel@tonic-gate #endif 921*0Sstevel@tonic-gate } else { 922*0Sstevel@tonic-gate #ifdef DEBUG 923*0Sstevel@tonic-gate if (rdma_svc_debug) 924*0Sstevel@tonic-gate printf("svc_rdma_ksend: msg response xid %u\n", msg->rm_xid); 925*0Sstevel@tonic-gate #endif 926*0Sstevel@tonic-gate status = RDMA_SEND(vd->conn, sendlist, msg->rm_xid); 927*0Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 928*0Sstevel@tonic-gate #ifdef DEBUG 929*0Sstevel@tonic-gate if (rdma_svc_debug) 930*0Sstevel@tonic-gate cmn_err(CE_NOTE, "svc_rdma_ksend: " 931*0Sstevel@tonic-gate "rdma_send failed %d", status); 932*0Sstevel@tonic-gate #endif 933*0Sstevel@tonic-gate goto out; 934*0Sstevel@tonic-gate } 935*0Sstevel@tonic-gate } 936*0Sstevel@tonic-gate 937*0Sstevel@tonic-gate retval = TRUE; 938*0Sstevel@tonic-gate out: 939*0Sstevel@tonic-gate /* 940*0Sstevel@tonic-gate * Deregister the chunks 941*0Sstevel@tonic-gate */ 942*0Sstevel@tonic-gate if (cl != NULL) { 943*0Sstevel@tonic-gate if (reg) 944*0Sstevel@tonic-gate (void) clist_deregister(vd->conn, cl, 1); 945*0Sstevel@tonic-gate if (op == RDMA_NOMSG) { 946*0Sstevel@tonic-gate /* 947*0Sstevel@tonic-gate * Long RPC reply in chunk. Free it up. 948*0Sstevel@tonic-gate */ 949*0Sstevel@tonic-gate rdma_buf_free(vd->conn, &rpcreply); 950*0Sstevel@tonic-gate } 951*0Sstevel@tonic-gate clist_free(cl); 952*0Sstevel@tonic-gate } 953*0Sstevel@tonic-gate 954*0Sstevel@tonic-gate /* 955*0Sstevel@tonic-gate * Free up sendlist chunks 956*0Sstevel@tonic-gate */ 957*0Sstevel@tonic-gate if (sendlist != NULL) 958*0Sstevel@tonic-gate clist_free(sendlist); 959*0Sstevel@tonic-gate 960*0Sstevel@tonic-gate /* 961*0Sstevel@tonic-gate * Destroy private data for xdr rdma 962*0Sstevel@tonic-gate */ 963*0Sstevel@tonic-gate XDR_DESTROY(&(clone_xprt->xp_xdrout)); 964*0Sstevel@tonic-gate 965*0Sstevel@tonic-gate /* 966*0Sstevel@tonic-gate * This is completely disgusting. If public is set it is 967*0Sstevel@tonic-gate * a pointer to a structure whose first field is the address 968*0Sstevel@tonic-gate * of the function to free that structure and any related 969*0Sstevel@tonic-gate * stuff. (see rrokfree in nfs_xdr.c). 970*0Sstevel@tonic-gate */ 971*0Sstevel@tonic-gate if (xdrs->x_public) { 972*0Sstevel@tonic-gate /* LINTED pointer alignment */ 973*0Sstevel@tonic-gate (**((int (**)())xdrs->x_public))(xdrs->x_public); 974*0Sstevel@tonic-gate } 975*0Sstevel@tonic-gate 976*0Sstevel@tonic-gate return (retval); 977*0Sstevel@tonic-gate } 978*0Sstevel@tonic-gate 979*0Sstevel@tonic-gate /* 980*0Sstevel@tonic-gate * Deserialize arguments. 981*0Sstevel@tonic-gate */ 982*0Sstevel@tonic-gate static bool_t 983*0Sstevel@tonic-gate svc_rdma_kgetargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, caddr_t args_ptr) 984*0Sstevel@tonic-gate { 985*0Sstevel@tonic-gate if ((SVCAUTH_UNWRAP(&clone_xprt->xp_auth, &clone_xprt->xp_xdrin, 986*0Sstevel@tonic-gate xdr_args, args_ptr)) != TRUE) 987*0Sstevel@tonic-gate return (FALSE); 988*0Sstevel@tonic-gate return (TRUE); 989*0Sstevel@tonic-gate } 990*0Sstevel@tonic-gate 991*0Sstevel@tonic-gate static bool_t 992*0Sstevel@tonic-gate svc_rdma_kfreeargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 993*0Sstevel@tonic-gate caddr_t args_ptr) 994*0Sstevel@tonic-gate { 995*0Sstevel@tonic-gate struct clone_rdma_data *vd; 996*0Sstevel@tonic-gate bool_t retval; 997*0Sstevel@tonic-gate 998*0Sstevel@tonic-gate vd = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 999*0Sstevel@tonic-gate if (args_ptr) { 1000*0Sstevel@tonic-gate XDR *xdrs = &clone_xprt->xp_xdrin; 1001*0Sstevel@tonic-gate struct clist *cl; 1002*0Sstevel@tonic-gate 1003*0Sstevel@tonic-gate cl = xdrrdma_clist(xdrs); 1004*0Sstevel@tonic-gate if (cl != NULL) 1005*0Sstevel@tonic-gate clist_free(cl); 1006*0Sstevel@tonic-gate 1007*0Sstevel@tonic-gate xdrs->x_op = XDR_FREE; 1008*0Sstevel@tonic-gate retval = (*xdr_args)(xdrs, args_ptr); 1009*0Sstevel@tonic-gate } 1010*0Sstevel@tonic-gate XDR_DESTROY(&(clone_xprt->xp_xdrin)); 1011*0Sstevel@tonic-gate rdma_buf_free(vd->conn, &vd->rpcbuf); 1012*0Sstevel@tonic-gate RDMA_REL_CONN(vd->conn); 1013*0Sstevel@tonic-gate return (retval); 1014*0Sstevel@tonic-gate } 1015*0Sstevel@tonic-gate 1016*0Sstevel@tonic-gate /* ARGSUSED */ 1017*0Sstevel@tonic-gate static int32_t * 1018*0Sstevel@tonic-gate svc_rdma_kgetres(SVCXPRT *clone_xprt, int size) 1019*0Sstevel@tonic-gate { 1020*0Sstevel@tonic-gate return (NULL); 1021*0Sstevel@tonic-gate } 1022*0Sstevel@tonic-gate 1023*0Sstevel@tonic-gate /* ARGSUSED */ 1024*0Sstevel@tonic-gate static void 1025*0Sstevel@tonic-gate svc_rdma_kfreeres(SVCXPRT *clone_xprt) 1026*0Sstevel@tonic-gate { 1027*0Sstevel@tonic-gate } 1028*0Sstevel@tonic-gate 1029*0Sstevel@tonic-gate /* 1030*0Sstevel@tonic-gate * the dup cacheing routines below provide a cache of non-failure 1031*0Sstevel@tonic-gate * transaction id's. rpc service routines can use this to detect 1032*0Sstevel@tonic-gate * retransmissions and re-send a non-failure response. 1033*0Sstevel@tonic-gate */ 1034*0Sstevel@tonic-gate 1035*0Sstevel@tonic-gate /* 1036*0Sstevel@tonic-gate * MAXDUPREQS is the number of cached items. It should be adjusted 1037*0Sstevel@tonic-gate * to the service load so that there is likely to be a response entry 1038*0Sstevel@tonic-gate * when the first retransmission comes in. 1039*0Sstevel@tonic-gate */ 1040*0Sstevel@tonic-gate #define MAXDUPREQS 1024 1041*0Sstevel@tonic-gate 1042*0Sstevel@tonic-gate /* 1043*0Sstevel@tonic-gate * This should be appropriately scaled to MAXDUPREQS. 1044*0Sstevel@tonic-gate */ 1045*0Sstevel@tonic-gate #define DRHASHSZ 257 1046*0Sstevel@tonic-gate 1047*0Sstevel@tonic-gate #if ((DRHASHSZ & (DRHASHSZ - 1)) == 0) 1048*0Sstevel@tonic-gate #define XIDHASH(xid) ((xid) & (DRHASHSZ - 1)) 1049*0Sstevel@tonic-gate #else 1050*0Sstevel@tonic-gate #define XIDHASH(xid) ((xid) % DRHASHSZ) 1051*0Sstevel@tonic-gate #endif 1052*0Sstevel@tonic-gate #define DRHASH(dr) XIDHASH((dr)->dr_xid) 1053*0Sstevel@tonic-gate #define REQTOXID(req) ((req)->rq_xprt->xp_xid) 1054*0Sstevel@tonic-gate 1055*0Sstevel@tonic-gate static int rdmandupreqs = 0; 1056*0Sstevel@tonic-gate static int rdmamaxdupreqs = MAXDUPREQS; 1057*0Sstevel@tonic-gate static kmutex_t rdmadupreq_lock; 1058*0Sstevel@tonic-gate static struct dupreq *rdmadrhashtbl[DRHASHSZ]; 1059*0Sstevel@tonic-gate static int rdmadrhashstat[DRHASHSZ]; 1060*0Sstevel@tonic-gate 1061*0Sstevel@tonic-gate static void unhash(struct dupreq *); 1062*0Sstevel@tonic-gate 1063*0Sstevel@tonic-gate /* 1064*0Sstevel@tonic-gate * rdmadrmru points to the head of a circular linked list in lru order. 1065*0Sstevel@tonic-gate * rdmadrmru->dr_next == drlru 1066*0Sstevel@tonic-gate */ 1067*0Sstevel@tonic-gate struct dupreq *rdmadrmru; 1068*0Sstevel@tonic-gate 1069*0Sstevel@tonic-gate /* 1070*0Sstevel@tonic-gate * svc_rdma_kdup searches the request cache and returns 0 if the 1071*0Sstevel@tonic-gate * request is not found in the cache. If it is found, then it 1072*0Sstevel@tonic-gate * returns the state of the request (in progress or done) and 1073*0Sstevel@tonic-gate * the status or attributes that were part of the original reply. 1074*0Sstevel@tonic-gate */ 1075*0Sstevel@tonic-gate static int 1076*0Sstevel@tonic-gate svc_rdma_kdup(struct svc_req *req, caddr_t res, int size, struct dupreq **drpp, 1077*0Sstevel@tonic-gate bool_t *dupcachedp) 1078*0Sstevel@tonic-gate { 1079*0Sstevel@tonic-gate struct dupreq *dr; 1080*0Sstevel@tonic-gate uint32_t xid; 1081*0Sstevel@tonic-gate uint32_t drhash; 1082*0Sstevel@tonic-gate int status; 1083*0Sstevel@tonic-gate 1084*0Sstevel@tonic-gate xid = REQTOXID(req); 1085*0Sstevel@tonic-gate mutex_enter(&rdmadupreq_lock); 1086*0Sstevel@tonic-gate RSSTAT_INCR(rsdupchecks); 1087*0Sstevel@tonic-gate /* 1088*0Sstevel@tonic-gate * Check to see whether an entry already exists in the cache. 1089*0Sstevel@tonic-gate */ 1090*0Sstevel@tonic-gate dr = rdmadrhashtbl[XIDHASH(xid)]; 1091*0Sstevel@tonic-gate while (dr != NULL) { 1092*0Sstevel@tonic-gate if (dr->dr_xid == xid && 1093*0Sstevel@tonic-gate dr->dr_proc == req->rq_proc && 1094*0Sstevel@tonic-gate dr->dr_prog == req->rq_prog && 1095*0Sstevel@tonic-gate dr->dr_vers == req->rq_vers && 1096*0Sstevel@tonic-gate dr->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 1097*0Sstevel@tonic-gate bcmp((caddr_t)dr->dr_addr.buf, 1098*0Sstevel@tonic-gate (caddr_t)req->rq_xprt->xp_rtaddr.buf, 1099*0Sstevel@tonic-gate dr->dr_addr.len) == 0) { 1100*0Sstevel@tonic-gate status = dr->dr_status; 1101*0Sstevel@tonic-gate if (status == DUP_DONE) { 1102*0Sstevel@tonic-gate bcopy(dr->dr_resp.buf, res, size); 1103*0Sstevel@tonic-gate if (dupcachedp != NULL) 1104*0Sstevel@tonic-gate *dupcachedp = (dr->dr_resfree != NULL); 1105*0Sstevel@tonic-gate } else { 1106*0Sstevel@tonic-gate dr->dr_status = DUP_INPROGRESS; 1107*0Sstevel@tonic-gate *drpp = dr; 1108*0Sstevel@tonic-gate } 1109*0Sstevel@tonic-gate RSSTAT_INCR(rsdupreqs); 1110*0Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 1111*0Sstevel@tonic-gate return (status); 1112*0Sstevel@tonic-gate } 1113*0Sstevel@tonic-gate dr = dr->dr_chain; 1114*0Sstevel@tonic-gate } 1115*0Sstevel@tonic-gate 1116*0Sstevel@tonic-gate /* 1117*0Sstevel@tonic-gate * There wasn't an entry, either allocate a new one or recycle 1118*0Sstevel@tonic-gate * an old one. 1119*0Sstevel@tonic-gate */ 1120*0Sstevel@tonic-gate if (rdmandupreqs < rdmamaxdupreqs) { 1121*0Sstevel@tonic-gate dr = kmem_alloc(sizeof (*dr), KM_NOSLEEP); 1122*0Sstevel@tonic-gate if (dr == NULL) { 1123*0Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 1124*0Sstevel@tonic-gate return (DUP_ERROR); 1125*0Sstevel@tonic-gate } 1126*0Sstevel@tonic-gate dr->dr_resp.buf = NULL; 1127*0Sstevel@tonic-gate dr->dr_resp.maxlen = 0; 1128*0Sstevel@tonic-gate dr->dr_addr.buf = NULL; 1129*0Sstevel@tonic-gate dr->dr_addr.maxlen = 0; 1130*0Sstevel@tonic-gate if (rdmadrmru) { 1131*0Sstevel@tonic-gate dr->dr_next = rdmadrmru->dr_next; 1132*0Sstevel@tonic-gate rdmadrmru->dr_next = dr; 1133*0Sstevel@tonic-gate } else { 1134*0Sstevel@tonic-gate dr->dr_next = dr; 1135*0Sstevel@tonic-gate } 1136*0Sstevel@tonic-gate rdmandupreqs++; 1137*0Sstevel@tonic-gate } else { 1138*0Sstevel@tonic-gate dr = rdmadrmru->dr_next; 1139*0Sstevel@tonic-gate while (dr->dr_status == DUP_INPROGRESS) { 1140*0Sstevel@tonic-gate dr = dr->dr_next; 1141*0Sstevel@tonic-gate if (dr == rdmadrmru->dr_next) { 1142*0Sstevel@tonic-gate cmn_err(CE_WARN, "svc_rdma_kdup no slots free"); 1143*0Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 1144*0Sstevel@tonic-gate return (DUP_ERROR); 1145*0Sstevel@tonic-gate } 1146*0Sstevel@tonic-gate } 1147*0Sstevel@tonic-gate unhash(dr); 1148*0Sstevel@tonic-gate if (dr->dr_resfree) { 1149*0Sstevel@tonic-gate (*dr->dr_resfree)(dr->dr_resp.buf); 1150*0Sstevel@tonic-gate } 1151*0Sstevel@tonic-gate } 1152*0Sstevel@tonic-gate dr->dr_resfree = NULL; 1153*0Sstevel@tonic-gate rdmadrmru = dr; 1154*0Sstevel@tonic-gate 1155*0Sstevel@tonic-gate dr->dr_xid = REQTOXID(req); 1156*0Sstevel@tonic-gate dr->dr_prog = req->rq_prog; 1157*0Sstevel@tonic-gate dr->dr_vers = req->rq_vers; 1158*0Sstevel@tonic-gate dr->dr_proc = req->rq_proc; 1159*0Sstevel@tonic-gate if (dr->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 1160*0Sstevel@tonic-gate if (dr->dr_addr.buf != NULL) 1161*0Sstevel@tonic-gate kmem_free(dr->dr_addr.buf, dr->dr_addr.maxlen); 1162*0Sstevel@tonic-gate dr->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 1163*0Sstevel@tonic-gate dr->dr_addr.buf = kmem_alloc(dr->dr_addr.maxlen, KM_NOSLEEP); 1164*0Sstevel@tonic-gate if (dr->dr_addr.buf == NULL) { 1165*0Sstevel@tonic-gate dr->dr_addr.maxlen = 0; 1166*0Sstevel@tonic-gate dr->dr_status = DUP_DROP; 1167*0Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 1168*0Sstevel@tonic-gate return (DUP_ERROR); 1169*0Sstevel@tonic-gate } 1170*0Sstevel@tonic-gate } 1171*0Sstevel@tonic-gate dr->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 1172*0Sstevel@tonic-gate bcopy(req->rq_xprt->xp_rtaddr.buf, dr->dr_addr.buf, dr->dr_addr.len); 1173*0Sstevel@tonic-gate if (dr->dr_resp.maxlen < size) { 1174*0Sstevel@tonic-gate if (dr->dr_resp.buf != NULL) 1175*0Sstevel@tonic-gate kmem_free(dr->dr_resp.buf, dr->dr_resp.maxlen); 1176*0Sstevel@tonic-gate dr->dr_resp.maxlen = (unsigned int)size; 1177*0Sstevel@tonic-gate dr->dr_resp.buf = kmem_alloc(size, KM_NOSLEEP); 1178*0Sstevel@tonic-gate if (dr->dr_resp.buf == NULL) { 1179*0Sstevel@tonic-gate dr->dr_resp.maxlen = 0; 1180*0Sstevel@tonic-gate dr->dr_status = DUP_DROP; 1181*0Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 1182*0Sstevel@tonic-gate return (DUP_ERROR); 1183*0Sstevel@tonic-gate } 1184*0Sstevel@tonic-gate } 1185*0Sstevel@tonic-gate dr->dr_status = DUP_INPROGRESS; 1186*0Sstevel@tonic-gate 1187*0Sstevel@tonic-gate drhash = (uint32_t)DRHASH(dr); 1188*0Sstevel@tonic-gate dr->dr_chain = rdmadrhashtbl[drhash]; 1189*0Sstevel@tonic-gate rdmadrhashtbl[drhash] = dr; 1190*0Sstevel@tonic-gate rdmadrhashstat[drhash]++; 1191*0Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 1192*0Sstevel@tonic-gate *drpp = dr; 1193*0Sstevel@tonic-gate return (DUP_NEW); 1194*0Sstevel@tonic-gate } 1195*0Sstevel@tonic-gate 1196*0Sstevel@tonic-gate /* 1197*0Sstevel@tonic-gate * svc_rdma_kdupdone marks the request done (DUP_DONE or DUP_DROP) 1198*0Sstevel@tonic-gate * and stores the response. 1199*0Sstevel@tonic-gate */ 1200*0Sstevel@tonic-gate static void 1201*0Sstevel@tonic-gate svc_rdma_kdupdone(struct dupreq *dr, caddr_t res, void (*dis_resfree)(), 1202*0Sstevel@tonic-gate int size, int status) 1203*0Sstevel@tonic-gate { 1204*0Sstevel@tonic-gate ASSERT(dr->dr_resfree == NULL); 1205*0Sstevel@tonic-gate if (status == DUP_DONE) { 1206*0Sstevel@tonic-gate bcopy(res, dr->dr_resp.buf, size); 1207*0Sstevel@tonic-gate dr->dr_resfree = dis_resfree; 1208*0Sstevel@tonic-gate } 1209*0Sstevel@tonic-gate dr->dr_status = status; 1210*0Sstevel@tonic-gate } 1211*0Sstevel@tonic-gate 1212*0Sstevel@tonic-gate /* 1213*0Sstevel@tonic-gate * This routine expects that the mutex, rdmadupreq_lock, is already held. 1214*0Sstevel@tonic-gate */ 1215*0Sstevel@tonic-gate static void 1216*0Sstevel@tonic-gate unhash(struct dupreq *dr) 1217*0Sstevel@tonic-gate { 1218*0Sstevel@tonic-gate struct dupreq *drt; 1219*0Sstevel@tonic-gate struct dupreq *drtprev = NULL; 1220*0Sstevel@tonic-gate uint32_t drhash; 1221*0Sstevel@tonic-gate 1222*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&rdmadupreq_lock)); 1223*0Sstevel@tonic-gate 1224*0Sstevel@tonic-gate drhash = (uint32_t)DRHASH(dr); 1225*0Sstevel@tonic-gate drt = rdmadrhashtbl[drhash]; 1226*0Sstevel@tonic-gate while (drt != NULL) { 1227*0Sstevel@tonic-gate if (drt == dr) { 1228*0Sstevel@tonic-gate rdmadrhashstat[drhash]--; 1229*0Sstevel@tonic-gate if (drtprev == NULL) { 1230*0Sstevel@tonic-gate rdmadrhashtbl[drhash] = drt->dr_chain; 1231*0Sstevel@tonic-gate } else { 1232*0Sstevel@tonic-gate drtprev->dr_chain = drt->dr_chain; 1233*0Sstevel@tonic-gate } 1234*0Sstevel@tonic-gate return; 1235*0Sstevel@tonic-gate } 1236*0Sstevel@tonic-gate drtprev = drt; 1237*0Sstevel@tonic-gate drt = drt->dr_chain; 1238*0Sstevel@tonic-gate } 1239*0Sstevel@tonic-gate } 1240