10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7387SRobert.Gordon@Sun.COM * Common Development and Distribution License (the "License"). 6*7387SRobert.Gordon@Sun.COM * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*7387SRobert.Gordon@Sun.COM * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 260Sstevel@tonic-gate /* All Rights Reserved */ 270Sstevel@tonic-gate /* 280Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 290Sstevel@tonic-gate * 4.3 BSD under license from the Regents of the University of 300Sstevel@tonic-gate * California. 310Sstevel@tonic-gate */ 320Sstevel@tonic-gate 330Sstevel@tonic-gate #include <sys/param.h> 340Sstevel@tonic-gate #include <sys/types.h> 350Sstevel@tonic-gate #include <sys/user.h> 360Sstevel@tonic-gate #include <sys/systm.h> 370Sstevel@tonic-gate #include <sys/sysmacros.h> 380Sstevel@tonic-gate #include <sys/errno.h> 390Sstevel@tonic-gate #include <sys/kmem.h> 400Sstevel@tonic-gate #include <sys/debug.h> 410Sstevel@tonic-gate #include <sys/systm.h> 420Sstevel@tonic-gate #include <sys/kstat.h> 430Sstevel@tonic-gate #include <sys/t_lock.h> 440Sstevel@tonic-gate #include <sys/ddi.h> 450Sstevel@tonic-gate #include <sys/cmn_err.h> 460Sstevel@tonic-gate #include <sys/time.h> 470Sstevel@tonic-gate #include <sys/isa_defs.h> 480Sstevel@tonic-gate #include <sys/zone.h> 49*7387SRobert.Gordon@Sun.COM #include <sys/sdt.h> 500Sstevel@tonic-gate 510Sstevel@tonic-gate #include <rpc/types.h> 520Sstevel@tonic-gate #include <rpc/xdr.h> 530Sstevel@tonic-gate #include <rpc/auth.h> 540Sstevel@tonic-gate #include <rpc/clnt.h> 550Sstevel@tonic-gate #include <rpc/rpc_msg.h> 560Sstevel@tonic-gate #include <rpc/rpc_rdma.h> 57*7387SRobert.Gordon@Sun.COM #include <nfs/nfs.h> 58*7387SRobert.Gordon@Sun.COM #include <nfs/nfs4_kprot.h> 590Sstevel@tonic-gate 60*7387SRobert.Gordon@Sun.COM static uint32_t rdma_bufs_rqst = RDMA_BUFS_RQST; 61*7387SRobert.Gordon@Sun.COM 62*7387SRobert.Gordon@Sun.COM static int clnt_compose_rpcmsg(CLIENT *, rpcproc_t, rdma_buf_t *, 63*7387SRobert.Gordon@Sun.COM XDR *, xdrproc_t, caddr_t); 64*7387SRobert.Gordon@Sun.COM static int clnt_compose_rdma_header(CONN *, CLIENT *, rdma_buf_t *, 65*7387SRobert.Gordon@Sun.COM XDR **, uint_t *); 66*7387SRobert.Gordon@Sun.COM static int clnt_setup_rlist(CONN *, XDR *, XDR *); 67*7387SRobert.Gordon@Sun.COM static int clnt_setup_wlist(CONN *, XDR *, XDR *); 68*7387SRobert.Gordon@Sun.COM static int clnt_setup_long_reply(CONN *, struct clist **, uint_t); 69*7387SRobert.Gordon@Sun.COM static void clnt_check_credit(CONN *); 70*7387SRobert.Gordon@Sun.COM static void clnt_return_credit(CONN *); 71*7387SRobert.Gordon@Sun.COM static void clnt_decode_long_reply(CONN *, struct clist *, 72*7387SRobert.Gordon@Sun.COM struct clist *, XDR *, XDR **, struct clist *, 73*7387SRobert.Gordon@Sun.COM struct clist *, uint_t, uint_t); 74*7387SRobert.Gordon@Sun.COM 75*7387SRobert.Gordon@Sun.COM static void clnt_update_credit(CONN *, uint32_t); 76*7387SRobert.Gordon@Sun.COM static void check_dereg_wlist(CONN *, struct clist *); 770Sstevel@tonic-gate 780Sstevel@tonic-gate static enum clnt_stat clnt_rdma_kcallit(CLIENT *, rpcproc_t, xdrproc_t, 790Sstevel@tonic-gate caddr_t, xdrproc_t, caddr_t, struct timeval); 800Sstevel@tonic-gate static void clnt_rdma_kabort(CLIENT *); 810Sstevel@tonic-gate static void clnt_rdma_kerror(CLIENT *, struct rpc_err *); 820Sstevel@tonic-gate static bool_t clnt_rdma_kfreeres(CLIENT *, xdrproc_t, caddr_t); 830Sstevel@tonic-gate static void clnt_rdma_kdestroy(CLIENT *); 840Sstevel@tonic-gate static bool_t clnt_rdma_kcontrol(CLIENT *, int, char *); 850Sstevel@tonic-gate static int clnt_rdma_ksettimers(CLIENT *, struct rpc_timers *, 860Sstevel@tonic-gate struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t); 870Sstevel@tonic-gate 880Sstevel@tonic-gate /* 890Sstevel@tonic-gate * Operations vector for RDMA based RPC 900Sstevel@tonic-gate */ 910Sstevel@tonic-gate static struct clnt_ops rdma_clnt_ops = { 920Sstevel@tonic-gate clnt_rdma_kcallit, /* do rpc call */ 930Sstevel@tonic-gate clnt_rdma_kabort, /* abort call */ 940Sstevel@tonic-gate clnt_rdma_kerror, /* return error status */ 950Sstevel@tonic-gate clnt_rdma_kfreeres, /* free results */ 960Sstevel@tonic-gate clnt_rdma_kdestroy, /* destroy rpc handle */ 970Sstevel@tonic-gate clnt_rdma_kcontrol, /* the ioctl() of rpc */ 980Sstevel@tonic-gate clnt_rdma_ksettimers, /* set retry timers */ 990Sstevel@tonic-gate }; 1000Sstevel@tonic-gate 1010Sstevel@tonic-gate /* 1020Sstevel@tonic-gate * The size of the preserialized RPC header information. 1030Sstevel@tonic-gate */ 1040Sstevel@tonic-gate #define CKU_HDRSIZE 20 105*7387SRobert.Gordon@Sun.COM #define CLNT_RDMA_SUCCESS 0 106*7387SRobert.Gordon@Sun.COM #define CLNT_RDMA_FAIL (-1) 107*7387SRobert.Gordon@Sun.COM 108*7387SRobert.Gordon@Sun.COM #define AUTH_REFRESH_COUNT 2 109*7387SRobert.Gordon@Sun.COM 110*7387SRobert.Gordon@Sun.COM #define IS_RPCSEC_GSS(authh) \ 111*7387SRobert.Gordon@Sun.COM (authh->cl_auth->ah_cred.oa_flavor == RPCSEC_GSS) 1120Sstevel@tonic-gate 1130Sstevel@tonic-gate /* 1140Sstevel@tonic-gate * Per RPC RDMA endpoint details 1150Sstevel@tonic-gate */ 1160Sstevel@tonic-gate typedef struct cku_private { 1170Sstevel@tonic-gate CLIENT cku_client; /* client handle */ 1180Sstevel@tonic-gate rdma_mod_t *cku_rd_mod; /* underlying RDMA mod */ 1190Sstevel@tonic-gate void *cku_rd_handle; /* underlying RDMA device */ 1200Sstevel@tonic-gate struct netbuf cku_addr; /* remote netbuf address */ 1210Sstevel@tonic-gate int cku_addrfmly; /* for finding addr_type */ 1220Sstevel@tonic-gate struct rpc_err cku_err; /* error status */ 1230Sstevel@tonic-gate struct cred *cku_cred; /* credentials */ 1240Sstevel@tonic-gate XDR cku_outxdr; /* xdr stream for output */ 1250Sstevel@tonic-gate uint32_t cku_outsz; 1260Sstevel@tonic-gate XDR cku_inxdr; /* xdr stream for input */ 1270Sstevel@tonic-gate char cku_rpchdr[CKU_HDRSIZE+4]; /* rpc header */ 1280Sstevel@tonic-gate uint32_t cku_xid; /* current XID */ 1290Sstevel@tonic-gate } cku_private_t; 1300Sstevel@tonic-gate 1310Sstevel@tonic-gate #define CLNT_RDMA_DELAY 10 /* secs to delay after a connection failure */ 1320Sstevel@tonic-gate static int clnt_rdma_min_delay = CLNT_RDMA_DELAY; 1330Sstevel@tonic-gate 1340Sstevel@tonic-gate struct { 1350Sstevel@tonic-gate kstat_named_t rccalls; 1360Sstevel@tonic-gate kstat_named_t rcbadcalls; 1370Sstevel@tonic-gate kstat_named_t rcbadxids; 1380Sstevel@tonic-gate kstat_named_t rctimeouts; 1390Sstevel@tonic-gate kstat_named_t rcnewcreds; 1400Sstevel@tonic-gate kstat_named_t rcbadverfs; 1410Sstevel@tonic-gate kstat_named_t rctimers; 1420Sstevel@tonic-gate kstat_named_t rccantconn; 1430Sstevel@tonic-gate kstat_named_t rcnomem; 1440Sstevel@tonic-gate kstat_named_t rcintrs; 1450Sstevel@tonic-gate kstat_named_t rclongrpcs; 1460Sstevel@tonic-gate } rdmarcstat = { 1470Sstevel@tonic-gate { "calls", KSTAT_DATA_UINT64 }, 1480Sstevel@tonic-gate { "badcalls", KSTAT_DATA_UINT64 }, 1490Sstevel@tonic-gate { "badxids", KSTAT_DATA_UINT64 }, 1500Sstevel@tonic-gate { "timeouts", KSTAT_DATA_UINT64 }, 1510Sstevel@tonic-gate { "newcreds", KSTAT_DATA_UINT64 }, 1520Sstevel@tonic-gate { "badverfs", KSTAT_DATA_UINT64 }, 1530Sstevel@tonic-gate { "timers", KSTAT_DATA_UINT64 }, 1540Sstevel@tonic-gate { "cantconn", KSTAT_DATA_UINT64 }, 1550Sstevel@tonic-gate { "nomem", KSTAT_DATA_UINT64 }, 1560Sstevel@tonic-gate { "interrupts", KSTAT_DATA_UINT64 }, 1570Sstevel@tonic-gate { "longrpc", KSTAT_DATA_UINT64 } 1580Sstevel@tonic-gate }; 1590Sstevel@tonic-gate 1600Sstevel@tonic-gate kstat_named_t *rdmarcstat_ptr = (kstat_named_t *)&rdmarcstat; 1610Sstevel@tonic-gate uint_t rdmarcstat_ndata = sizeof (rdmarcstat) / sizeof (kstat_named_t); 1620Sstevel@tonic-gate 1630Sstevel@tonic-gate #ifdef DEBUG 1640Sstevel@tonic-gate int rdma_clnt_debug = 0; 1650Sstevel@tonic-gate #endif 1660Sstevel@tonic-gate 1670Sstevel@tonic-gate #ifdef accurate_stats 1680Sstevel@tonic-gate extern kmutex_t rdmarcstat_lock; /* mutex for rcstat updates */ 1690Sstevel@tonic-gate 1700Sstevel@tonic-gate #define RCSTAT_INCR(x) \ 1710Sstevel@tonic-gate mutex_enter(&rdmarcstat_lock); \ 1720Sstevel@tonic-gate rdmarcstat.x.value.ui64++; \ 1730Sstevel@tonic-gate mutex_exit(&rdmarcstat_lock); 1740Sstevel@tonic-gate #else 1750Sstevel@tonic-gate #define RCSTAT_INCR(x) \ 1760Sstevel@tonic-gate rdmarcstat.x.value.ui64++; 1770Sstevel@tonic-gate #endif 1780Sstevel@tonic-gate 1790Sstevel@tonic-gate #define ptoh(p) (&((p)->cku_client)) 1800Sstevel@tonic-gate #define htop(h) ((cku_private_t *)((h)->cl_private)) 1810Sstevel@tonic-gate 182*7387SRobert.Gordon@Sun.COM uint_t 183*7387SRobert.Gordon@Sun.COM calc_length(uint_t len) 184*7387SRobert.Gordon@Sun.COM { 185*7387SRobert.Gordon@Sun.COM len = RNDUP(len); 186*7387SRobert.Gordon@Sun.COM 187*7387SRobert.Gordon@Sun.COM if (len <= 64 * 1024) { 188*7387SRobert.Gordon@Sun.COM if (len > 32 * 1024) { 189*7387SRobert.Gordon@Sun.COM len = 64 * 1024; 190*7387SRobert.Gordon@Sun.COM } else { 191*7387SRobert.Gordon@Sun.COM if (len > 16 * 1024) { 192*7387SRobert.Gordon@Sun.COM len = 32 * 1024; 193*7387SRobert.Gordon@Sun.COM } else { 194*7387SRobert.Gordon@Sun.COM if (len > 8 * 1024) { 195*7387SRobert.Gordon@Sun.COM len = 16 * 1024; 196*7387SRobert.Gordon@Sun.COM } else { 197*7387SRobert.Gordon@Sun.COM len = 8 * 1024; 198*7387SRobert.Gordon@Sun.COM } 199*7387SRobert.Gordon@Sun.COM } 200*7387SRobert.Gordon@Sun.COM } 201*7387SRobert.Gordon@Sun.COM } 202*7387SRobert.Gordon@Sun.COM return (len); 203*7387SRobert.Gordon@Sun.COM } 2040Sstevel@tonic-gate int 2050Sstevel@tonic-gate clnt_rdma_kcreate(char *proto, void *handle, struct netbuf *raddr, int family, 2060Sstevel@tonic-gate rpcprog_t pgm, rpcvers_t vers, struct cred *cred, CLIENT **cl) 2070Sstevel@tonic-gate { 2080Sstevel@tonic-gate CLIENT *h; 2090Sstevel@tonic-gate struct cku_private *p; 2100Sstevel@tonic-gate struct rpc_msg call_msg; 2110Sstevel@tonic-gate rdma_registry_t *rp; 2120Sstevel@tonic-gate 2130Sstevel@tonic-gate ASSERT(INGLOBALZONE(curproc)); 2140Sstevel@tonic-gate 2150Sstevel@tonic-gate if (cl == NULL) 2160Sstevel@tonic-gate return (EINVAL); 2170Sstevel@tonic-gate *cl = NULL; 2180Sstevel@tonic-gate 2190Sstevel@tonic-gate p = kmem_zalloc(sizeof (*p), KM_SLEEP); 2200Sstevel@tonic-gate 2210Sstevel@tonic-gate /* 2220Sstevel@tonic-gate * Find underlying RDMATF plugin 2230Sstevel@tonic-gate */ 2240Sstevel@tonic-gate rw_enter(&rdma_lock, RW_READER); 2250Sstevel@tonic-gate rp = rdma_mod_head; 2260Sstevel@tonic-gate while (rp != NULL) { 2270Sstevel@tonic-gate if (strcmp(rp->r_mod->rdma_api, proto)) 2280Sstevel@tonic-gate rp = rp->r_next; 2290Sstevel@tonic-gate else { 2300Sstevel@tonic-gate p->cku_rd_mod = rp->r_mod; 2310Sstevel@tonic-gate p->cku_rd_handle = handle; 2320Sstevel@tonic-gate break; 2330Sstevel@tonic-gate } 2340Sstevel@tonic-gate } 2350Sstevel@tonic-gate rw_exit(&rdma_lock); 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate if (p->cku_rd_mod == NULL) { 2380Sstevel@tonic-gate /* 2390Sstevel@tonic-gate * Should not happen. 2400Sstevel@tonic-gate * No matching RDMATF plugin. 2410Sstevel@tonic-gate */ 2420Sstevel@tonic-gate kmem_free(p, sizeof (struct cku_private)); 2430Sstevel@tonic-gate return (EINVAL); 2440Sstevel@tonic-gate } 2450Sstevel@tonic-gate 2460Sstevel@tonic-gate h = ptoh(p); 2470Sstevel@tonic-gate h->cl_ops = &rdma_clnt_ops; 2480Sstevel@tonic-gate h->cl_private = (caddr_t)p; 2490Sstevel@tonic-gate h->cl_auth = authkern_create(); 2500Sstevel@tonic-gate 2510Sstevel@tonic-gate /* call message, just used to pre-serialize below */ 2520Sstevel@tonic-gate call_msg.rm_xid = 0; 2530Sstevel@tonic-gate call_msg.rm_direction = CALL; 2540Sstevel@tonic-gate call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; 2550Sstevel@tonic-gate call_msg.rm_call.cb_prog = pgm; 2560Sstevel@tonic-gate call_msg.rm_call.cb_vers = vers; 2570Sstevel@tonic-gate 2580Sstevel@tonic-gate xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, CKU_HDRSIZE, XDR_ENCODE); 2590Sstevel@tonic-gate /* pre-serialize call message header */ 2600Sstevel@tonic-gate if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) { 2610Sstevel@tonic-gate XDR_DESTROY(&p->cku_outxdr); 2620Sstevel@tonic-gate auth_destroy(h->cl_auth); 2630Sstevel@tonic-gate kmem_free(p, sizeof (struct cku_private)); 2640Sstevel@tonic-gate return (EINVAL); 2650Sstevel@tonic-gate } 2660Sstevel@tonic-gate 2670Sstevel@tonic-gate /* 2680Sstevel@tonic-gate * Set up the rpc information 2690Sstevel@tonic-gate */ 2700Sstevel@tonic-gate p->cku_cred = cred; 2710Sstevel@tonic-gate p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP); 2720Sstevel@tonic-gate p->cku_addr.maxlen = raddr->maxlen; 2730Sstevel@tonic-gate p->cku_addr.len = raddr->len; 2740Sstevel@tonic-gate bcopy(raddr->buf, p->cku_addr.buf, raddr->len); 2750Sstevel@tonic-gate p->cku_addrfmly = family; 2760Sstevel@tonic-gate 2770Sstevel@tonic-gate *cl = h; 2780Sstevel@tonic-gate return (0); 2790Sstevel@tonic-gate } 2800Sstevel@tonic-gate 2810Sstevel@tonic-gate static void 2820Sstevel@tonic-gate clnt_rdma_kdestroy(CLIENT *h) 2830Sstevel@tonic-gate { 2840Sstevel@tonic-gate struct cku_private *p = htop(h); 2850Sstevel@tonic-gate 2860Sstevel@tonic-gate kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 2870Sstevel@tonic-gate kmem_free(p, sizeof (*p)); 2880Sstevel@tonic-gate } 2890Sstevel@tonic-gate 2900Sstevel@tonic-gate void 2910Sstevel@tonic-gate clnt_rdma_kinit(CLIENT *h, char *proto, void *handle, struct netbuf *raddr, 2920Sstevel@tonic-gate struct cred *cred) 2930Sstevel@tonic-gate { 2940Sstevel@tonic-gate struct cku_private *p = htop(h); 2950Sstevel@tonic-gate rdma_registry_t *rp; 2960Sstevel@tonic-gate 2970Sstevel@tonic-gate ASSERT(INGLOBALZONE(curproc)); 2980Sstevel@tonic-gate /* 2990Sstevel@tonic-gate * Find underlying RDMATF plugin 3000Sstevel@tonic-gate */ 3010Sstevel@tonic-gate p->cku_rd_mod = NULL; 3020Sstevel@tonic-gate rw_enter(&rdma_lock, RW_READER); 3030Sstevel@tonic-gate rp = rdma_mod_head; 3040Sstevel@tonic-gate while (rp != NULL) { 3050Sstevel@tonic-gate if (strcmp(rp->r_mod->rdma_api, proto)) 3060Sstevel@tonic-gate rp = rp->r_next; 3070Sstevel@tonic-gate else { 3080Sstevel@tonic-gate p->cku_rd_mod = rp->r_mod; 3090Sstevel@tonic-gate p->cku_rd_handle = handle; 3100Sstevel@tonic-gate break; 3110Sstevel@tonic-gate } 3120Sstevel@tonic-gate 3130Sstevel@tonic-gate } 3140Sstevel@tonic-gate rw_exit(&rdma_lock); 3150Sstevel@tonic-gate 3160Sstevel@tonic-gate /* 3170Sstevel@tonic-gate * Set up the rpc information 3180Sstevel@tonic-gate */ 3190Sstevel@tonic-gate p->cku_cred = cred; 3200Sstevel@tonic-gate p->cku_xid = 0; 3210Sstevel@tonic-gate 3220Sstevel@tonic-gate if (p->cku_addr.maxlen < raddr->len) { 3230Sstevel@tonic-gate if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL) 3240Sstevel@tonic-gate kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 3250Sstevel@tonic-gate p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP); 3260Sstevel@tonic-gate p->cku_addr.maxlen = raddr->maxlen; 3270Sstevel@tonic-gate } 3280Sstevel@tonic-gate 3290Sstevel@tonic-gate p->cku_addr.len = raddr->len; 3300Sstevel@tonic-gate bcopy(raddr->buf, p->cku_addr.buf, raddr->len); 3310Sstevel@tonic-gate h->cl_ops = &rdma_clnt_ops; 3320Sstevel@tonic-gate } 3330Sstevel@tonic-gate 334*7387SRobert.Gordon@Sun.COM static int 335*7387SRobert.Gordon@Sun.COM clnt_compose_rpcmsg(CLIENT *h, rpcproc_t procnum, 336*7387SRobert.Gordon@Sun.COM rdma_buf_t *rpcmsg, XDR *xdrs, 337*7387SRobert.Gordon@Sun.COM xdrproc_t xdr_args, caddr_t argsp) 338*7387SRobert.Gordon@Sun.COM { 339*7387SRobert.Gordon@Sun.COM cku_private_t *p = htop(h); 340*7387SRobert.Gordon@Sun.COM 341*7387SRobert.Gordon@Sun.COM if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { 342*7387SRobert.Gordon@Sun.COM /* 343*7387SRobert.Gordon@Sun.COM * Copy in the preserialized RPC header 344*7387SRobert.Gordon@Sun.COM * information. 345*7387SRobert.Gordon@Sun.COM */ 346*7387SRobert.Gordon@Sun.COM bcopy(p->cku_rpchdr, rpcmsg->addr, CKU_HDRSIZE); 347*7387SRobert.Gordon@Sun.COM 348*7387SRobert.Gordon@Sun.COM /* 349*7387SRobert.Gordon@Sun.COM * transaction id is the 1st thing in the output 350*7387SRobert.Gordon@Sun.COM * buffer. 351*7387SRobert.Gordon@Sun.COM */ 352*7387SRobert.Gordon@Sun.COM /* LINTED pointer alignment */ 353*7387SRobert.Gordon@Sun.COM (*(uint32_t *)(rpcmsg->addr)) = p->cku_xid; 354*7387SRobert.Gordon@Sun.COM 355*7387SRobert.Gordon@Sun.COM /* Skip the preserialized stuff. */ 356*7387SRobert.Gordon@Sun.COM XDR_SETPOS(xdrs, CKU_HDRSIZE); 357*7387SRobert.Gordon@Sun.COM 358*7387SRobert.Gordon@Sun.COM /* Serialize dynamic stuff into the output buffer. */ 359*7387SRobert.Gordon@Sun.COM if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) || 360*7387SRobert.Gordon@Sun.COM (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) || 361*7387SRobert.Gordon@Sun.COM (!(*xdr_args)(xdrs, argsp))) { 362*7387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__clntrdma__rpcmsg__dynargs); 363*7387SRobert.Gordon@Sun.COM return (CLNT_RDMA_FAIL); 364*7387SRobert.Gordon@Sun.COM } 365*7387SRobert.Gordon@Sun.COM p->cku_outsz = XDR_GETPOS(xdrs); 366*7387SRobert.Gordon@Sun.COM } else { 367*7387SRobert.Gordon@Sun.COM uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[CKU_HDRSIZE]; 368*7387SRobert.Gordon@Sun.COM IXDR_PUT_U_INT32(uproc, procnum); 369*7387SRobert.Gordon@Sun.COM (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid; 370*7387SRobert.Gordon@Sun.COM XDR_SETPOS(xdrs, 0); 371*7387SRobert.Gordon@Sun.COM 372*7387SRobert.Gordon@Sun.COM /* Serialize the procedure number and the arguments. */ 373*7387SRobert.Gordon@Sun.COM if (!AUTH_WRAP(h->cl_auth, (caddr_t)p->cku_rpchdr, 374*7387SRobert.Gordon@Sun.COM CKU_HDRSIZE+4, xdrs, xdr_args, argsp)) { 375*7387SRobert.Gordon@Sun.COM if (rpcmsg->addr != xdrs->x_base) { 376*7387SRobert.Gordon@Sun.COM rpcmsg->addr = xdrs->x_base; 377*7387SRobert.Gordon@Sun.COM rpcmsg->len = xdr_getbufsize(xdrs); 378*7387SRobert.Gordon@Sun.COM } 379*7387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__clntrdma__rpcmsg__procnum); 380*7387SRobert.Gordon@Sun.COM return (CLNT_RDMA_FAIL); 381*7387SRobert.Gordon@Sun.COM } 382*7387SRobert.Gordon@Sun.COM /* 383*7387SRobert.Gordon@Sun.COM * If we had to allocate a new buffer while encoding 384*7387SRobert.Gordon@Sun.COM * then update the addr and len. 385*7387SRobert.Gordon@Sun.COM */ 386*7387SRobert.Gordon@Sun.COM if (rpcmsg->addr != xdrs->x_base) { 387*7387SRobert.Gordon@Sun.COM rpcmsg->addr = xdrs->x_base; 388*7387SRobert.Gordon@Sun.COM rpcmsg->len = xdr_getbufsize(xdrs); 389*7387SRobert.Gordon@Sun.COM } 390*7387SRobert.Gordon@Sun.COM 391*7387SRobert.Gordon@Sun.COM p->cku_outsz = XDR_GETPOS(xdrs); 392*7387SRobert.Gordon@Sun.COM DTRACE_PROBE1(krpc__i__compose__size__sec, int, p->cku_outsz) 393*7387SRobert.Gordon@Sun.COM } 394*7387SRobert.Gordon@Sun.COM 395*7387SRobert.Gordon@Sun.COM return (CLNT_RDMA_SUCCESS); 396*7387SRobert.Gordon@Sun.COM } 397*7387SRobert.Gordon@Sun.COM 398*7387SRobert.Gordon@Sun.COM static int 399*7387SRobert.Gordon@Sun.COM clnt_compose_rdma_header(CONN *conn, CLIENT *h, rdma_buf_t *clmsg, 400*7387SRobert.Gordon@Sun.COM XDR **xdrs, uint_t *op) 401*7387SRobert.Gordon@Sun.COM { 402*7387SRobert.Gordon@Sun.COM cku_private_t *p = htop(h); 403*7387SRobert.Gordon@Sun.COM uint_t vers; 404*7387SRobert.Gordon@Sun.COM uint32_t rdma_credit = rdma_bufs_rqst; 405*7387SRobert.Gordon@Sun.COM 406*7387SRobert.Gordon@Sun.COM vers = RPCRDMA_VERS; 407*7387SRobert.Gordon@Sun.COM clmsg->type = SEND_BUFFER; 408*7387SRobert.Gordon@Sun.COM 409*7387SRobert.Gordon@Sun.COM if (rdma_buf_alloc(conn, clmsg)) { 410*7387SRobert.Gordon@Sun.COM return (CLNT_RDMA_FAIL); 411*7387SRobert.Gordon@Sun.COM } 412*7387SRobert.Gordon@Sun.COM 413*7387SRobert.Gordon@Sun.COM *xdrs = &p->cku_outxdr; 414*7387SRobert.Gordon@Sun.COM xdrmem_create(*xdrs, clmsg->addr, clmsg->len, XDR_ENCODE); 415*7387SRobert.Gordon@Sun.COM 416*7387SRobert.Gordon@Sun.COM (*(uint32_t *)clmsg->addr) = p->cku_xid; 417*7387SRobert.Gordon@Sun.COM XDR_SETPOS(*xdrs, sizeof (uint32_t)); 418*7387SRobert.Gordon@Sun.COM (void) xdr_u_int(*xdrs, &vers); 419*7387SRobert.Gordon@Sun.COM (void) xdr_u_int(*xdrs, &rdma_credit); 420*7387SRobert.Gordon@Sun.COM (void) xdr_u_int(*xdrs, op); 421*7387SRobert.Gordon@Sun.COM 422*7387SRobert.Gordon@Sun.COM return (CLNT_RDMA_SUCCESS); 423*7387SRobert.Gordon@Sun.COM } 424*7387SRobert.Gordon@Sun.COM 425*7387SRobert.Gordon@Sun.COM /* 426*7387SRobert.Gordon@Sun.COM * If xp_cl is NULL value, then the RPC payload will NOT carry 427*7387SRobert.Gordon@Sun.COM * an RDMA READ chunk list, in this case we insert FALSE into 428*7387SRobert.Gordon@Sun.COM * the XDR stream. Otherwise we use the clist and RDMA register 429*7387SRobert.Gordon@Sun.COM * the memory and encode the clist into the outbound XDR stream. 430*7387SRobert.Gordon@Sun.COM */ 431*7387SRobert.Gordon@Sun.COM static int 432*7387SRobert.Gordon@Sun.COM clnt_setup_rlist(CONN *conn, XDR *xdrs, XDR *call_xdrp) 433*7387SRobert.Gordon@Sun.COM { 434*7387SRobert.Gordon@Sun.COM int status; 435*7387SRobert.Gordon@Sun.COM struct clist *rclp; 436*7387SRobert.Gordon@Sun.COM int32_t xdr_flag = XDR_RDMA_RLIST_REG; 437*7387SRobert.Gordon@Sun.COM 438*7387SRobert.Gordon@Sun.COM XDR_CONTROL(call_xdrp, XDR_RDMA_GET_RLIST, &rclp); 439*7387SRobert.Gordon@Sun.COM 440*7387SRobert.Gordon@Sun.COM if (rclp != NULL) { 441*7387SRobert.Gordon@Sun.COM status = clist_register(conn, rclp, CLIST_REG_SOURCE); 442*7387SRobert.Gordon@Sun.COM if (status != RDMA_SUCCESS) { 443*7387SRobert.Gordon@Sun.COM return (CLNT_RDMA_FAIL); 444*7387SRobert.Gordon@Sun.COM } 445*7387SRobert.Gordon@Sun.COM XDR_CONTROL(call_xdrp, XDR_RDMA_SET_FLAGS, &xdr_flag); 446*7387SRobert.Gordon@Sun.COM } 447*7387SRobert.Gordon@Sun.COM (void) xdr_do_clist(xdrs, &rclp); 448*7387SRobert.Gordon@Sun.COM 449*7387SRobert.Gordon@Sun.COM return (CLNT_RDMA_SUCCESS); 450*7387SRobert.Gordon@Sun.COM } 451*7387SRobert.Gordon@Sun.COM 452*7387SRobert.Gordon@Sun.COM /* 453*7387SRobert.Gordon@Sun.COM * If xp_wcl is NULL value, then the RPC payload will NOT carry 454*7387SRobert.Gordon@Sun.COM * an RDMA WRITE chunk list, in this case we insert FALSE into 455*7387SRobert.Gordon@Sun.COM * the XDR stream. Otherwise we use the clist and RDMA register 456*7387SRobert.Gordon@Sun.COM * the memory and encode the clist into the outbound XDR stream. 457*7387SRobert.Gordon@Sun.COM */ 458*7387SRobert.Gordon@Sun.COM static int 459*7387SRobert.Gordon@Sun.COM clnt_setup_wlist(CONN *conn, XDR *xdrs, XDR *call_xdrp) 460*7387SRobert.Gordon@Sun.COM { 461*7387SRobert.Gordon@Sun.COM int status; 462*7387SRobert.Gordon@Sun.COM struct clist *wlist; 463*7387SRobert.Gordon@Sun.COM int32_t xdr_flag = XDR_RDMA_WLIST_REG; 464*7387SRobert.Gordon@Sun.COM 465*7387SRobert.Gordon@Sun.COM XDR_CONTROL(call_xdrp, XDR_RDMA_GET_WLIST, &wlist); 466*7387SRobert.Gordon@Sun.COM 467*7387SRobert.Gordon@Sun.COM if (wlist != NULL) { 468*7387SRobert.Gordon@Sun.COM status = clist_register(conn, wlist, CLIST_REG_DST); 469*7387SRobert.Gordon@Sun.COM if (status != RDMA_SUCCESS) { 470*7387SRobert.Gordon@Sun.COM return (CLNT_RDMA_FAIL); 471*7387SRobert.Gordon@Sun.COM } 472*7387SRobert.Gordon@Sun.COM XDR_CONTROL(call_xdrp, XDR_RDMA_SET_FLAGS, &xdr_flag); 473*7387SRobert.Gordon@Sun.COM } 474*7387SRobert.Gordon@Sun.COM 475*7387SRobert.Gordon@Sun.COM if (!xdr_encode_wlist(xdrs, wlist)) 476*7387SRobert.Gordon@Sun.COM return (CLNT_RDMA_FAIL); 477*7387SRobert.Gordon@Sun.COM 478*7387SRobert.Gordon@Sun.COM return (CLNT_RDMA_SUCCESS); 479*7387SRobert.Gordon@Sun.COM } 480*7387SRobert.Gordon@Sun.COM 481*7387SRobert.Gordon@Sun.COM static int 482*7387SRobert.Gordon@Sun.COM clnt_setup_long_reply(CONN *conn, struct clist **clpp, uint_t length) 483*7387SRobert.Gordon@Sun.COM { 484*7387SRobert.Gordon@Sun.COM if (length == 0) { 485*7387SRobert.Gordon@Sun.COM *clpp = NULL; 486*7387SRobert.Gordon@Sun.COM return (CLNT_RDMA_SUCCESS); 487*7387SRobert.Gordon@Sun.COM } 488*7387SRobert.Gordon@Sun.COM 489*7387SRobert.Gordon@Sun.COM *clpp = clist_alloc(); 490*7387SRobert.Gordon@Sun.COM 491*7387SRobert.Gordon@Sun.COM (*clpp)->rb_longbuf.len = calc_length(length); 492*7387SRobert.Gordon@Sun.COM (*clpp)->rb_longbuf.type = RDMA_LONG_BUFFER; 493*7387SRobert.Gordon@Sun.COM 494*7387SRobert.Gordon@Sun.COM if (rdma_buf_alloc(conn, &((*clpp)->rb_longbuf))) { 495*7387SRobert.Gordon@Sun.COM clist_free(*clpp); 496*7387SRobert.Gordon@Sun.COM *clpp = NULL; 497*7387SRobert.Gordon@Sun.COM return (CLNT_RDMA_FAIL); 498*7387SRobert.Gordon@Sun.COM } 499*7387SRobert.Gordon@Sun.COM 500*7387SRobert.Gordon@Sun.COM (*clpp)->u.c_daddr3 = (*clpp)->rb_longbuf.addr; 501*7387SRobert.Gordon@Sun.COM (*clpp)->c_len = (*clpp)->rb_longbuf.len; 502*7387SRobert.Gordon@Sun.COM (*clpp)->c_next = NULL; 503*7387SRobert.Gordon@Sun.COM (*clpp)->c_dmemhandle = (*clpp)->rb_longbuf.handle; 504*7387SRobert.Gordon@Sun.COM 505*7387SRobert.Gordon@Sun.COM if (clist_register(conn, *clpp, CLIST_REG_DST)) { 506*7387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__clntrdma__longrep_regbuf); 507*7387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &((*clpp)->rb_longbuf)); 508*7387SRobert.Gordon@Sun.COM clist_free(*clpp); 509*7387SRobert.Gordon@Sun.COM return (CLNT_RDMA_FAIL); 510*7387SRobert.Gordon@Sun.COM } 511*7387SRobert.Gordon@Sun.COM 512*7387SRobert.Gordon@Sun.COM return (CLNT_RDMA_SUCCESS); 513*7387SRobert.Gordon@Sun.COM } 514*7387SRobert.Gordon@Sun.COM 5150Sstevel@tonic-gate /* ARGSUSED */ 5160Sstevel@tonic-gate static enum clnt_stat 5170Sstevel@tonic-gate clnt_rdma_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args, 518*7387SRobert.Gordon@Sun.COM caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, 519*7387SRobert.Gordon@Sun.COM struct timeval wait) 5200Sstevel@tonic-gate { 5210Sstevel@tonic-gate cku_private_t *p = htop(h); 522*7387SRobert.Gordon@Sun.COM 523*7387SRobert.Gordon@Sun.COM int try_call_again; 524*7387SRobert.Gordon@Sun.COM int refresh_attempt = AUTH_REFRESH_COUNT; 5250Sstevel@tonic-gate int status; 526*7387SRobert.Gordon@Sun.COM int msglen; 527*7387SRobert.Gordon@Sun.COM 528*7387SRobert.Gordon@Sun.COM XDR *call_xdrp, callxdr; /* for xdrrdma encoding the RPC call */ 529*7387SRobert.Gordon@Sun.COM XDR *reply_xdrp, replyxdr; /* for xdrrdma decoding the RPC reply */ 530*7387SRobert.Gordon@Sun.COM XDR *rdmahdr_o_xdrs, *rdmahdr_i_xdrs; 531*7387SRobert.Gordon@Sun.COM 5320Sstevel@tonic-gate struct rpc_msg reply_msg; 533*7387SRobert.Gordon@Sun.COM 534*7387SRobert.Gordon@Sun.COM struct clist *cl_sendlist; 535*7387SRobert.Gordon@Sun.COM struct clist *cl_recvlist; 536*7387SRobert.Gordon@Sun.COM struct clist *cl; 537*7387SRobert.Gordon@Sun.COM struct clist *cl_rpcmsg; 538*7387SRobert.Gordon@Sun.COM struct clist *cl_rdma_reply; 539*7387SRobert.Gordon@Sun.COM struct clist *cl_rpcreply_wlist; 540*7387SRobert.Gordon@Sun.COM struct clist *cl_long_reply; 541*7387SRobert.Gordon@Sun.COM 542*7387SRobert.Gordon@Sun.COM uint_t vers; 543*7387SRobert.Gordon@Sun.COM uint_t op; 5440Sstevel@tonic-gate uint_t off; 545*7387SRobert.Gordon@Sun.COM uint32_t seg_array_len; 546*7387SRobert.Gordon@Sun.COM uint_t long_reply_len; 547*7387SRobert.Gordon@Sun.COM uint_t rpcsec_gss; 548*7387SRobert.Gordon@Sun.COM uint_t gss_i_or_p; 549*7387SRobert.Gordon@Sun.COM 5500Sstevel@tonic-gate CONN *conn = NULL; 551*7387SRobert.Gordon@Sun.COM rdma_buf_t clmsg; 552*7387SRobert.Gordon@Sun.COM rdma_buf_t rpcmsg; 553*7387SRobert.Gordon@Sun.COM rdma_chunkinfo_lengths_t rcil; 554*7387SRobert.Gordon@Sun.COM 5550Sstevel@tonic-gate clock_t ticks; 556*7387SRobert.Gordon@Sun.COM bool_t wlist_exists_reply; 557*7387SRobert.Gordon@Sun.COM 558*7387SRobert.Gordon@Sun.COM uint32_t rdma_credit = rdma_bufs_rqst; 5590Sstevel@tonic-gate 5600Sstevel@tonic-gate RCSTAT_INCR(rccalls); 561*7387SRobert.Gordon@Sun.COM 562*7387SRobert.Gordon@Sun.COM call_again: 563*7387SRobert.Gordon@Sun.COM 564*7387SRobert.Gordon@Sun.COM bzero(&clmsg, sizeof (clmsg)); 565*7387SRobert.Gordon@Sun.COM bzero(&rpcmsg, sizeof (rpcmsg)); 566*7387SRobert.Gordon@Sun.COM try_call_again = 0; 567*7387SRobert.Gordon@Sun.COM cl_sendlist = NULL; 568*7387SRobert.Gordon@Sun.COM cl_recvlist = NULL; 569*7387SRobert.Gordon@Sun.COM cl = NULL; 570*7387SRobert.Gordon@Sun.COM cl_rpcmsg = NULL; 571*7387SRobert.Gordon@Sun.COM cl_rdma_reply = NULL; 572*7387SRobert.Gordon@Sun.COM call_xdrp = NULL; 573*7387SRobert.Gordon@Sun.COM reply_xdrp = NULL; 574*7387SRobert.Gordon@Sun.COM wlist_exists_reply = FALSE; 575*7387SRobert.Gordon@Sun.COM cl_rpcreply_wlist = NULL; 576*7387SRobert.Gordon@Sun.COM cl_long_reply = NULL; 577*7387SRobert.Gordon@Sun.COM rcil.rcil_len = 0; 578*7387SRobert.Gordon@Sun.COM rcil.rcil_len_alt = 0; 579*7387SRobert.Gordon@Sun.COM long_reply_len = 0; 580*7387SRobert.Gordon@Sun.COM 5810Sstevel@tonic-gate /* 5820Sstevel@tonic-gate * Get unique xid 5830Sstevel@tonic-gate */ 5840Sstevel@tonic-gate if (p->cku_xid == 0) 5850Sstevel@tonic-gate p->cku_xid = alloc_xid(); 5860Sstevel@tonic-gate 5870Sstevel@tonic-gate status = RDMA_GET_CONN(p->cku_rd_mod->rdma_ops, &p->cku_addr, 5880Sstevel@tonic-gate p->cku_addrfmly, p->cku_rd_handle, &conn); 5890Sstevel@tonic-gate 590*7387SRobert.Gordon@Sun.COM /* 591*7387SRobert.Gordon@Sun.COM * If there is a problem with the connection reflect the issue 592*7387SRobert.Gordon@Sun.COM * back to the higher level to address, we MAY delay for a short 593*7387SRobert.Gordon@Sun.COM * period so that we are kind to the transport. 594*7387SRobert.Gordon@Sun.COM */ 5950Sstevel@tonic-gate if (conn == NULL) { 5960Sstevel@tonic-gate /* 5970Sstevel@tonic-gate * Connect failed to server. Could be because of one 5980Sstevel@tonic-gate * of several things. In some cases we don't want 5990Sstevel@tonic-gate * the caller to retry immediately - delay before 6000Sstevel@tonic-gate * returning to caller. 6010Sstevel@tonic-gate */ 6020Sstevel@tonic-gate switch (status) { 6030Sstevel@tonic-gate case RDMA_TIMEDOUT: 6040Sstevel@tonic-gate /* 6050Sstevel@tonic-gate * Already timed out. No need to delay 6060Sstevel@tonic-gate * some more. 6070Sstevel@tonic-gate */ 6080Sstevel@tonic-gate p->cku_err.re_status = RPC_TIMEDOUT; 6090Sstevel@tonic-gate p->cku_err.re_errno = ETIMEDOUT; 6100Sstevel@tonic-gate break; 6110Sstevel@tonic-gate case RDMA_INTR: 6120Sstevel@tonic-gate /* 6130Sstevel@tonic-gate * Failed because of an signal. Very likely 6140Sstevel@tonic-gate * the caller will not retry. 6150Sstevel@tonic-gate */ 6160Sstevel@tonic-gate p->cku_err.re_status = RPC_INTR; 6170Sstevel@tonic-gate p->cku_err.re_errno = EINTR; 6180Sstevel@tonic-gate break; 6190Sstevel@tonic-gate default: 6200Sstevel@tonic-gate /* 6210Sstevel@tonic-gate * All other failures - server down or service 6220Sstevel@tonic-gate * down or temporary resource failure. Delay before 6230Sstevel@tonic-gate * returning to caller. 6240Sstevel@tonic-gate */ 6250Sstevel@tonic-gate ticks = clnt_rdma_min_delay * drv_usectohz(1000000); 6260Sstevel@tonic-gate p->cku_err.re_status = RPC_CANTCONNECT; 6270Sstevel@tonic-gate p->cku_err.re_errno = EIO; 6280Sstevel@tonic-gate 6290Sstevel@tonic-gate if (h->cl_nosignal == TRUE) { 6300Sstevel@tonic-gate delay(ticks); 6310Sstevel@tonic-gate } else { 6320Sstevel@tonic-gate if (delay_sig(ticks) == EINTR) { 6330Sstevel@tonic-gate p->cku_err.re_status = RPC_INTR; 6340Sstevel@tonic-gate p->cku_err.re_errno = EINTR; 6350Sstevel@tonic-gate } 6360Sstevel@tonic-gate } 6370Sstevel@tonic-gate break; 6380Sstevel@tonic-gate } 6390Sstevel@tonic-gate 6400Sstevel@tonic-gate return (p->cku_err.re_status); 6410Sstevel@tonic-gate } 642*7387SRobert.Gordon@Sun.COM 643*7387SRobert.Gordon@Sun.COM clnt_check_credit(conn); 6440Sstevel@tonic-gate 645*7387SRobert.Gordon@Sun.COM status = CLNT_RDMA_FAIL; 646*7387SRobert.Gordon@Sun.COM 647*7387SRobert.Gordon@Sun.COM rpcsec_gss = gss_i_or_p = FALSE; 6480Sstevel@tonic-gate 649*7387SRobert.Gordon@Sun.COM if (IS_RPCSEC_GSS(h)) { 650*7387SRobert.Gordon@Sun.COM rpcsec_gss = TRUE; 651*7387SRobert.Gordon@Sun.COM if (rpc_gss_get_service_type(h->cl_auth) == 652*7387SRobert.Gordon@Sun.COM rpc_gss_svc_integrity || 653*7387SRobert.Gordon@Sun.COM rpc_gss_get_service_type(h->cl_auth) == 654*7387SRobert.Gordon@Sun.COM rpc_gss_svc_privacy) 655*7387SRobert.Gordon@Sun.COM gss_i_or_p = TRUE; 6560Sstevel@tonic-gate } 6570Sstevel@tonic-gate 658*7387SRobert.Gordon@Sun.COM /* 659*7387SRobert.Gordon@Sun.COM * Try a regular RDMA message if RPCSEC_GSS is not being used 660*7387SRobert.Gordon@Sun.COM * or if RPCSEC_GSS is being used for authentication only. 661*7387SRobert.Gordon@Sun.COM */ 662*7387SRobert.Gordon@Sun.COM if (rpcsec_gss == FALSE || 663*7387SRobert.Gordon@Sun.COM (rpcsec_gss == TRUE && gss_i_or_p == FALSE)) { 6640Sstevel@tonic-gate /* 665*7387SRobert.Gordon@Sun.COM * Grab a send buffer for the request. Try to 666*7387SRobert.Gordon@Sun.COM * encode it to see if it fits. If not, then it 667*7387SRobert.Gordon@Sun.COM * needs to be sent in a chunk. 6680Sstevel@tonic-gate */ 669*7387SRobert.Gordon@Sun.COM rpcmsg.type = SEND_BUFFER; 670*7387SRobert.Gordon@Sun.COM if (rdma_buf_alloc(conn, &rpcmsg)) { 671*7387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__clntrdma__callit_nobufs); 6720Sstevel@tonic-gate goto done; 6730Sstevel@tonic-gate } 674*7387SRobert.Gordon@Sun.COM 675*7387SRobert.Gordon@Sun.COM /* First try to encode into regular send buffer */ 676*7387SRobert.Gordon@Sun.COM op = RDMA_MSG; 6770Sstevel@tonic-gate 678*7387SRobert.Gordon@Sun.COM call_xdrp = &callxdr; 679*7387SRobert.Gordon@Sun.COM 680*7387SRobert.Gordon@Sun.COM xdrrdma_create(call_xdrp, rpcmsg.addr, rpcmsg.len, 681*7387SRobert.Gordon@Sun.COM rdma_minchunk, NULL, XDR_ENCODE, conn); 6820Sstevel@tonic-gate 683*7387SRobert.Gordon@Sun.COM status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, call_xdrp, 684*7387SRobert.Gordon@Sun.COM xdr_args, argsp); 685*7387SRobert.Gordon@Sun.COM 686*7387SRobert.Gordon@Sun.COM if (status != CLNT_RDMA_SUCCESS) { 687*7387SRobert.Gordon@Sun.COM /* Clean up from previous encode attempt */ 688*7387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &rpcmsg); 689*7387SRobert.Gordon@Sun.COM XDR_DESTROY(call_xdrp); 6900Sstevel@tonic-gate } else { 691*7387SRobert.Gordon@Sun.COM XDR_CONTROL(call_xdrp, XDR_RDMA_GET_CHUNK_LEN, &rcil); 6920Sstevel@tonic-gate } 6930Sstevel@tonic-gate } 6940Sstevel@tonic-gate 695*7387SRobert.Gordon@Sun.COM /* If the encode didn't work, then try a NOMSG */ 696*7387SRobert.Gordon@Sun.COM if (status != CLNT_RDMA_SUCCESS) { 697*7387SRobert.Gordon@Sun.COM 698*7387SRobert.Gordon@Sun.COM msglen = CKU_HDRSIZE + BYTES_PER_XDR_UNIT + MAX_AUTH_BYTES + 699*7387SRobert.Gordon@Sun.COM xdr_sizeof(xdr_args, argsp); 700*7387SRobert.Gordon@Sun.COM 701*7387SRobert.Gordon@Sun.COM msglen = calc_length(msglen); 702*7387SRobert.Gordon@Sun.COM 703*7387SRobert.Gordon@Sun.COM /* pick up the lengths for the reply buffer needed */ 704*7387SRobert.Gordon@Sun.COM (void) xdrrdma_sizeof(xdr_args, argsp, 0, 705*7387SRobert.Gordon@Sun.COM &rcil.rcil_len, &rcil.rcil_len_alt); 706*7387SRobert.Gordon@Sun.COM 707*7387SRobert.Gordon@Sun.COM /* 708*7387SRobert.Gordon@Sun.COM * Construct a clist to describe the CHUNK_BUFFER 709*7387SRobert.Gordon@Sun.COM * for the rpcmsg. 710*7387SRobert.Gordon@Sun.COM */ 711*7387SRobert.Gordon@Sun.COM cl_rpcmsg = clist_alloc(); 712*7387SRobert.Gordon@Sun.COM cl_rpcmsg->c_len = msglen; 713*7387SRobert.Gordon@Sun.COM cl_rpcmsg->rb_longbuf.type = RDMA_LONG_BUFFER; 714*7387SRobert.Gordon@Sun.COM cl_rpcmsg->rb_longbuf.len = msglen; 715*7387SRobert.Gordon@Sun.COM if (rdma_buf_alloc(conn, &cl_rpcmsg->rb_longbuf)) { 716*7387SRobert.Gordon@Sun.COM clist_free(cl_rpcmsg); 717*7387SRobert.Gordon@Sun.COM goto done; 718*7387SRobert.Gordon@Sun.COM } 719*7387SRobert.Gordon@Sun.COM cl_rpcmsg->w.c_saddr3 = cl_rpcmsg->rb_longbuf.addr; 720*7387SRobert.Gordon@Sun.COM 721*7387SRobert.Gordon@Sun.COM op = RDMA_NOMSG; 722*7387SRobert.Gordon@Sun.COM call_xdrp = &callxdr; 723*7387SRobert.Gordon@Sun.COM 724*7387SRobert.Gordon@Sun.COM xdrrdma_create(call_xdrp, cl_rpcmsg->rb_longbuf.addr, 725*7387SRobert.Gordon@Sun.COM cl_rpcmsg->rb_longbuf.len, 0, 726*7387SRobert.Gordon@Sun.COM cl_rpcmsg, XDR_ENCODE, conn); 727*7387SRobert.Gordon@Sun.COM 728*7387SRobert.Gordon@Sun.COM status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, call_xdrp, 729*7387SRobert.Gordon@Sun.COM xdr_args, argsp); 730*7387SRobert.Gordon@Sun.COM 731*7387SRobert.Gordon@Sun.COM if (status != CLNT_RDMA_SUCCESS) { 732*7387SRobert.Gordon@Sun.COM p->cku_err.re_status = RPC_CANTENCODEARGS; 733*7387SRobert.Gordon@Sun.COM p->cku_err.re_errno = EIO; 734*7387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__clntrdma__callit__composemsg); 735*7387SRobert.Gordon@Sun.COM goto done; 736*7387SRobert.Gordon@Sun.COM } 737*7387SRobert.Gordon@Sun.COM } 738*7387SRobert.Gordon@Sun.COM 739*7387SRobert.Gordon@Sun.COM /* 740*7387SRobert.Gordon@Sun.COM * During the XDR_ENCODE we may have "allocated" an RDMA READ or 741*7387SRobert.Gordon@Sun.COM * RDMA WRITE clist. 742*7387SRobert.Gordon@Sun.COM * 743*7387SRobert.Gordon@Sun.COM * First pull the RDMA READ chunk list from the XDR private 744*7387SRobert.Gordon@Sun.COM * area to keep it handy. 745*7387SRobert.Gordon@Sun.COM */ 746*7387SRobert.Gordon@Sun.COM XDR_CONTROL(call_xdrp, XDR_RDMA_GET_RLIST, &cl); 747*7387SRobert.Gordon@Sun.COM 748*7387SRobert.Gordon@Sun.COM if (gss_i_or_p) { 749*7387SRobert.Gordon@Sun.COM long_reply_len = rcil.rcil_len + rcil.rcil_len_alt; 750*7387SRobert.Gordon@Sun.COM long_reply_len += MAX_AUTH_BYTES; 751*7387SRobert.Gordon@Sun.COM } else { 752*7387SRobert.Gordon@Sun.COM long_reply_len = rcil.rcil_len; 753*7387SRobert.Gordon@Sun.COM } 7540Sstevel@tonic-gate 7550Sstevel@tonic-gate /* 7560Sstevel@tonic-gate * Update the chunk size information for the Long RPC msg. 7570Sstevel@tonic-gate */ 7580Sstevel@tonic-gate if (cl && op == RDMA_NOMSG) 7590Sstevel@tonic-gate cl->c_len = p->cku_outsz; 7600Sstevel@tonic-gate 7610Sstevel@tonic-gate /* 762*7387SRobert.Gordon@Sun.COM * Prepare the RDMA header. On success xdrs will hold the result 763*7387SRobert.Gordon@Sun.COM * of xdrmem_create() for a SEND_BUFFER. 7640Sstevel@tonic-gate */ 765*7387SRobert.Gordon@Sun.COM status = clnt_compose_rdma_header(conn, h, &clmsg, 766*7387SRobert.Gordon@Sun.COM &rdmahdr_o_xdrs, &op); 767*7387SRobert.Gordon@Sun.COM 768*7387SRobert.Gordon@Sun.COM if (status != CLNT_RDMA_SUCCESS) { 7690Sstevel@tonic-gate p->cku_err.re_status = RPC_CANTSEND; 7700Sstevel@tonic-gate p->cku_err.re_errno = EIO; 7710Sstevel@tonic-gate RCSTAT_INCR(rcnomem); 772*7387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__clntrdma__callit__nobufs2); 7730Sstevel@tonic-gate goto done; 7740Sstevel@tonic-gate } 775*7387SRobert.Gordon@Sun.COM 7760Sstevel@tonic-gate /* 777*7387SRobert.Gordon@Sun.COM * Now insert the RDMA READ list iff present 7780Sstevel@tonic-gate */ 779*7387SRobert.Gordon@Sun.COM status = clnt_setup_rlist(conn, rdmahdr_o_xdrs, call_xdrp); 780*7387SRobert.Gordon@Sun.COM if (status != CLNT_RDMA_SUCCESS) { 781*7387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__clntrdma__callit__clistreg); 782*7387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &clmsg); 783*7387SRobert.Gordon@Sun.COM p->cku_err.re_status = RPC_CANTSEND; 784*7387SRobert.Gordon@Sun.COM p->cku_err.re_errno = EIO; 785*7387SRobert.Gordon@Sun.COM goto done; 786*7387SRobert.Gordon@Sun.COM } 787*7387SRobert.Gordon@Sun.COM 788*7387SRobert.Gordon@Sun.COM /* 789*7387SRobert.Gordon@Sun.COM * Setup RDMA WRITE chunk list for nfs read operation 790*7387SRobert.Gordon@Sun.COM * other operations will have a NULL which will result 791*7387SRobert.Gordon@Sun.COM * as a NULL list in the XDR stream. 792*7387SRobert.Gordon@Sun.COM */ 793*7387SRobert.Gordon@Sun.COM status = clnt_setup_wlist(conn, rdmahdr_o_xdrs, call_xdrp); 794*7387SRobert.Gordon@Sun.COM if (status != CLNT_RDMA_SUCCESS) { 795*7387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &clmsg); 796*7387SRobert.Gordon@Sun.COM p->cku_err.re_status = RPC_CANTSEND; 797*7387SRobert.Gordon@Sun.COM p->cku_err.re_errno = EIO; 798*7387SRobert.Gordon@Sun.COM goto done; 799*7387SRobert.Gordon@Sun.COM } 8000Sstevel@tonic-gate 8010Sstevel@tonic-gate /* 802*7387SRobert.Gordon@Sun.COM * If NULL call and RPCSEC_GSS, provide a chunk such that 803*7387SRobert.Gordon@Sun.COM * large responses can flow back to the client. 804*7387SRobert.Gordon@Sun.COM * If RPCSEC_GSS with integrity or privacy is in use, get chunk. 8050Sstevel@tonic-gate */ 806*7387SRobert.Gordon@Sun.COM if ((procnum == 0 && rpcsec_gss == TRUE) || 807*7387SRobert.Gordon@Sun.COM (rpcsec_gss == TRUE && gss_i_or_p == TRUE)) 808*7387SRobert.Gordon@Sun.COM long_reply_len += 1024; 8090Sstevel@tonic-gate 810*7387SRobert.Gordon@Sun.COM status = clnt_setup_long_reply(conn, &cl_long_reply, long_reply_len); 8110Sstevel@tonic-gate 812*7387SRobert.Gordon@Sun.COM if (status != CLNT_RDMA_SUCCESS) { 813*7387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &clmsg); 814*7387SRobert.Gordon@Sun.COM p->cku_err.re_status = RPC_CANTSEND; 815*7387SRobert.Gordon@Sun.COM p->cku_err.re_errno = EIO; 816*7387SRobert.Gordon@Sun.COM goto done; 8170Sstevel@tonic-gate } 8180Sstevel@tonic-gate 8190Sstevel@tonic-gate /* 820*7387SRobert.Gordon@Sun.COM * XDR encode the RDMA_REPLY write chunk 821*7387SRobert.Gordon@Sun.COM */ 822*7387SRobert.Gordon@Sun.COM seg_array_len = (cl_long_reply ? 1 : 0); 823*7387SRobert.Gordon@Sun.COM (void) xdr_encode_reply_wchunk(rdmahdr_o_xdrs, cl_long_reply, 824*7387SRobert.Gordon@Sun.COM seg_array_len); 825*7387SRobert.Gordon@Sun.COM 826*7387SRobert.Gordon@Sun.COM /* 827*7387SRobert.Gordon@Sun.COM * Construct a clist in "sendlist" that represents what we 828*7387SRobert.Gordon@Sun.COM * will push over the wire. 829*7387SRobert.Gordon@Sun.COM * 8300Sstevel@tonic-gate * Start with the RDMA header and clist (if any) 8310Sstevel@tonic-gate */ 832*7387SRobert.Gordon@Sun.COM clist_add(&cl_sendlist, 0, XDR_GETPOS(rdmahdr_o_xdrs), &clmsg.handle, 833*7387SRobert.Gordon@Sun.COM clmsg.addr, NULL, NULL); 8340Sstevel@tonic-gate 8350Sstevel@tonic-gate /* 836*7387SRobert.Gordon@Sun.COM * Put the RPC call message in sendlist if small RPC 8370Sstevel@tonic-gate */ 8380Sstevel@tonic-gate if (op == RDMA_MSG) { 839*7387SRobert.Gordon@Sun.COM clist_add(&cl_sendlist, 0, p->cku_outsz, &rpcmsg.handle, 840*7387SRobert.Gordon@Sun.COM rpcmsg.addr, NULL, NULL); 8410Sstevel@tonic-gate } else { 8420Sstevel@tonic-gate /* Long RPC already in chunk list */ 8430Sstevel@tonic-gate RCSTAT_INCR(rclongrpcs); 8440Sstevel@tonic-gate } 8450Sstevel@tonic-gate 8460Sstevel@tonic-gate /* 8470Sstevel@tonic-gate * Set up a reply buffer ready for the reply 8480Sstevel@tonic-gate */ 8490Sstevel@tonic-gate status = rdma_clnt_postrecv(conn, p->cku_xid); 8500Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 8510Sstevel@tonic-gate rdma_buf_free(conn, &clmsg); 8520Sstevel@tonic-gate p->cku_err.re_status = RPC_CANTSEND; 8530Sstevel@tonic-gate p->cku_err.re_errno = EIO; 8540Sstevel@tonic-gate goto done; 8550Sstevel@tonic-gate } 856*7387SRobert.Gordon@Sun.COM 8570Sstevel@tonic-gate /* 8580Sstevel@tonic-gate * sync the memory for dma 8590Sstevel@tonic-gate */ 8600Sstevel@tonic-gate if (cl != NULL) { 861*7387SRobert.Gordon@Sun.COM status = clist_syncmem(conn, cl, CLIST_REG_SOURCE); 8620Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 863*7387SRobert.Gordon@Sun.COM (void) rdma_clnt_postrecv_remove(conn, p->cku_xid); 8640Sstevel@tonic-gate rdma_buf_free(conn, &clmsg); 8650Sstevel@tonic-gate p->cku_err.re_status = RPC_CANTSEND; 8660Sstevel@tonic-gate p->cku_err.re_errno = EIO; 8670Sstevel@tonic-gate goto done; 8680Sstevel@tonic-gate } 8690Sstevel@tonic-gate } 8700Sstevel@tonic-gate 8710Sstevel@tonic-gate /* 872*7387SRobert.Gordon@Sun.COM * Send the RDMA Header and RPC call message to the server 8730Sstevel@tonic-gate */ 874*7387SRobert.Gordon@Sun.COM status = RDMA_SEND(conn, cl_sendlist, p->cku_xid); 8750Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 876*7387SRobert.Gordon@Sun.COM (void) rdma_clnt_postrecv_remove(conn, p->cku_xid); 8770Sstevel@tonic-gate p->cku_err.re_status = RPC_CANTSEND; 8780Sstevel@tonic-gate p->cku_err.re_errno = EIO; 8790Sstevel@tonic-gate goto done; 8800Sstevel@tonic-gate } 881*7387SRobert.Gordon@Sun.COM 882*7387SRobert.Gordon@Sun.COM /* 883*7387SRobert.Gordon@Sun.COM * RDMA plugin now owns the send msg buffers. 884*7387SRobert.Gordon@Sun.COM * Clear them out and don't free them. 885*7387SRobert.Gordon@Sun.COM */ 886*7387SRobert.Gordon@Sun.COM clmsg.addr = NULL; 887*7387SRobert.Gordon@Sun.COM if (rpcmsg.type == SEND_BUFFER) 888*7387SRobert.Gordon@Sun.COM rpcmsg.addr = NULL; 8890Sstevel@tonic-gate 8900Sstevel@tonic-gate /* 8910Sstevel@tonic-gate * Recv rpc reply 8920Sstevel@tonic-gate */ 893*7387SRobert.Gordon@Sun.COM status = RDMA_RECV(conn, &cl_recvlist, p->cku_xid); 8940Sstevel@tonic-gate 8950Sstevel@tonic-gate /* 8960Sstevel@tonic-gate * Now check recv status 8970Sstevel@tonic-gate */ 8980Sstevel@tonic-gate if (status != 0) { 8990Sstevel@tonic-gate if (status == RDMA_INTR) { 9000Sstevel@tonic-gate p->cku_err.re_status = RPC_INTR; 9010Sstevel@tonic-gate p->cku_err.re_errno = EINTR; 9020Sstevel@tonic-gate RCSTAT_INCR(rcintrs); 9030Sstevel@tonic-gate } else if (status == RPC_TIMEDOUT) { 9040Sstevel@tonic-gate p->cku_err.re_status = RPC_TIMEDOUT; 9050Sstevel@tonic-gate p->cku_err.re_errno = ETIMEDOUT; 9060Sstevel@tonic-gate RCSTAT_INCR(rctimeouts); 9070Sstevel@tonic-gate } else { 9080Sstevel@tonic-gate p->cku_err.re_status = RPC_CANTRECV; 9090Sstevel@tonic-gate p->cku_err.re_errno = EIO; 9100Sstevel@tonic-gate } 9110Sstevel@tonic-gate goto done; 9120Sstevel@tonic-gate } 913*7387SRobert.Gordon@Sun.COM 9140Sstevel@tonic-gate /* 9150Sstevel@tonic-gate * Process the reply message. 9160Sstevel@tonic-gate * 9170Sstevel@tonic-gate * First the chunk list (if any) 9180Sstevel@tonic-gate */ 919*7387SRobert.Gordon@Sun.COM rdmahdr_i_xdrs = &(p->cku_inxdr); 920*7387SRobert.Gordon@Sun.COM xdrmem_create(rdmahdr_i_xdrs, 921*7387SRobert.Gordon@Sun.COM (caddr_t)(uintptr_t)cl_recvlist->w.c_saddr3, 922*7387SRobert.Gordon@Sun.COM cl_recvlist->c_len, XDR_DECODE); 923*7387SRobert.Gordon@Sun.COM 9240Sstevel@tonic-gate /* 9250Sstevel@tonic-gate * Treat xid as opaque (xid is the first entity 9260Sstevel@tonic-gate * in the rpc rdma message). 927*7387SRobert.Gordon@Sun.COM * Skip xid and set the xdr position accordingly. 9280Sstevel@tonic-gate */ 929*7387SRobert.Gordon@Sun.COM XDR_SETPOS(rdmahdr_i_xdrs, sizeof (uint32_t)); 930*7387SRobert.Gordon@Sun.COM (void) xdr_u_int(rdmahdr_i_xdrs, &vers); 931*7387SRobert.Gordon@Sun.COM (void) xdr_u_int(rdmahdr_i_xdrs, &rdma_credit); 932*7387SRobert.Gordon@Sun.COM (void) xdr_u_int(rdmahdr_i_xdrs, &op); 933*7387SRobert.Gordon@Sun.COM (void) xdr_do_clist(rdmahdr_i_xdrs, &cl); 934*7387SRobert.Gordon@Sun.COM 935*7387SRobert.Gordon@Sun.COM clnt_update_credit(conn, rdma_credit); 936*7387SRobert.Gordon@Sun.COM 937*7387SRobert.Gordon@Sun.COM wlist_exists_reply = FALSE; 938*7387SRobert.Gordon@Sun.COM if (! xdr_decode_wlist(rdmahdr_i_xdrs, &cl_rpcreply_wlist, 939*7387SRobert.Gordon@Sun.COM &wlist_exists_reply)) { 940*7387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__clntrdma__callit__wlist_decode); 941*7387SRobert.Gordon@Sun.COM p->cku_err.re_status = RPC_CANTDECODERES; 942*7387SRobert.Gordon@Sun.COM p->cku_err.re_errno = EIO; 943*7387SRobert.Gordon@Sun.COM goto done; 944*7387SRobert.Gordon@Sun.COM } 9450Sstevel@tonic-gate 9460Sstevel@tonic-gate /* 947*7387SRobert.Gordon@Sun.COM * The server shouldn't have sent a RDMA_SEND that 948*7387SRobert.Gordon@Sun.COM * the client needs to RDMA_WRITE a reply back to 949*7387SRobert.Gordon@Sun.COM * the server. So silently ignoring what the 950*7387SRobert.Gordon@Sun.COM * server returns in the rdma_reply section of the 951*7387SRobert.Gordon@Sun.COM * header. 9520Sstevel@tonic-gate */ 953*7387SRobert.Gordon@Sun.COM (void) xdr_decode_reply_wchunk(rdmahdr_i_xdrs, &cl_rdma_reply); 954*7387SRobert.Gordon@Sun.COM off = xdr_getpos(rdmahdr_i_xdrs); 9550Sstevel@tonic-gate 956*7387SRobert.Gordon@Sun.COM clnt_decode_long_reply(conn, cl_long_reply, 957*7387SRobert.Gordon@Sun.COM cl_rdma_reply, &replyxdr, &reply_xdrp, 958*7387SRobert.Gordon@Sun.COM cl, cl_recvlist, op, off); 9590Sstevel@tonic-gate 960*7387SRobert.Gordon@Sun.COM if (reply_xdrp == NULL) 961*7387SRobert.Gordon@Sun.COM goto done; 9620Sstevel@tonic-gate 963*7387SRobert.Gordon@Sun.COM if (wlist_exists_reply) { 964*7387SRobert.Gordon@Sun.COM XDR_CONTROL(reply_xdrp, XDR_RDMA_SET_WLIST, cl_rpcreply_wlist); 9650Sstevel@tonic-gate } 9660Sstevel@tonic-gate 9670Sstevel@tonic-gate reply_msg.rm_direction = REPLY; 9680Sstevel@tonic-gate reply_msg.rm_reply.rp_stat = MSG_ACCEPTED; 9690Sstevel@tonic-gate reply_msg.acpted_rply.ar_stat = SUCCESS; 9700Sstevel@tonic-gate reply_msg.acpted_rply.ar_verf = _null_auth; 971*7387SRobert.Gordon@Sun.COM 9720Sstevel@tonic-gate /* 9730Sstevel@tonic-gate * xdr_results will be done in AUTH_UNWRAP. 9740Sstevel@tonic-gate */ 9750Sstevel@tonic-gate reply_msg.acpted_rply.ar_results.where = NULL; 9760Sstevel@tonic-gate reply_msg.acpted_rply.ar_results.proc = xdr_void; 9770Sstevel@tonic-gate 9780Sstevel@tonic-gate /* 9790Sstevel@tonic-gate * Decode and validate the response. 9800Sstevel@tonic-gate */ 981*7387SRobert.Gordon@Sun.COM if (xdr_replymsg(reply_xdrp, &reply_msg)) { 9820Sstevel@tonic-gate enum clnt_stat re_status; 9830Sstevel@tonic-gate 9840Sstevel@tonic-gate _seterr_reply(&reply_msg, &(p->cku_err)); 9850Sstevel@tonic-gate 9860Sstevel@tonic-gate re_status = p->cku_err.re_status; 9870Sstevel@tonic-gate if (re_status == RPC_SUCCESS) { 9880Sstevel@tonic-gate /* 9890Sstevel@tonic-gate * Reply is good, check auth. 9900Sstevel@tonic-gate */ 9910Sstevel@tonic-gate if (!AUTH_VALIDATE(h->cl_auth, 9920Sstevel@tonic-gate &reply_msg.acpted_rply.ar_verf)) { 9930Sstevel@tonic-gate p->cku_err.re_status = RPC_AUTHERROR; 9940Sstevel@tonic-gate p->cku_err.re_why = AUTH_INVALIDRESP; 9950Sstevel@tonic-gate RCSTAT_INCR(rcbadverfs); 996*7387SRobert.Gordon@Sun.COM DTRACE_PROBE( 997*7387SRobert.Gordon@Sun.COM krpc__e__clntrdma__callit__authvalidate); 998*7387SRobert.Gordon@Sun.COM } else if (!AUTH_UNWRAP(h->cl_auth, reply_xdrp, 9990Sstevel@tonic-gate xdr_results, resultsp)) { 10000Sstevel@tonic-gate p->cku_err.re_status = RPC_CANTDECODERES; 10010Sstevel@tonic-gate p->cku_err.re_errno = EIO; 1002*7387SRobert.Gordon@Sun.COM DTRACE_PROBE( 1003*7387SRobert.Gordon@Sun.COM krpc__e__clntrdma__callit__authunwrap); 10040Sstevel@tonic-gate } 10050Sstevel@tonic-gate } else { 10060Sstevel@tonic-gate /* set errno in case we can't recover */ 10070Sstevel@tonic-gate if (re_status != RPC_VERSMISMATCH && 10080Sstevel@tonic-gate re_status != RPC_AUTHERROR && 10090Sstevel@tonic-gate re_status != RPC_PROGVERSMISMATCH) 10100Sstevel@tonic-gate p->cku_err.re_errno = EIO; 10110Sstevel@tonic-gate 10120Sstevel@tonic-gate if (re_status == RPC_AUTHERROR) { 1013*7387SRobert.Gordon@Sun.COM if ((refresh_attempt > 0) && 1014*7387SRobert.Gordon@Sun.COM AUTH_REFRESH(h->cl_auth, &reply_msg, 1015*7387SRobert.Gordon@Sun.COM p->cku_cred)) { 1016*7387SRobert.Gordon@Sun.COM refresh_attempt--; 1017*7387SRobert.Gordon@Sun.COM try_call_again = 1; 1018*7387SRobert.Gordon@Sun.COM goto done; 1019*7387SRobert.Gordon@Sun.COM } 1020*7387SRobert.Gordon@Sun.COM 1021*7387SRobert.Gordon@Sun.COM try_call_again = 0; 1022*7387SRobert.Gordon@Sun.COM 1023*7387SRobert.Gordon@Sun.COM /* 1024*7387SRobert.Gordon@Sun.COM * We have used the client handle to 1025*7387SRobert.Gordon@Sun.COM * do an AUTH_REFRESH and the RPC status may 1026*7387SRobert.Gordon@Sun.COM * be set to RPC_SUCCESS; Let's make sure to 1027*7387SRobert.Gordon@Sun.COM * set it to RPC_AUTHERROR. 1028*7387SRobert.Gordon@Sun.COM */ 1029*7387SRobert.Gordon@Sun.COM p->cku_err.re_status = RPC_AUTHERROR; 1030*7387SRobert.Gordon@Sun.COM 10310Sstevel@tonic-gate /* 10320Sstevel@tonic-gate * Map recoverable and unrecoverable 10330Sstevel@tonic-gate * authentication errors to appropriate 10340Sstevel@tonic-gate * errno 10350Sstevel@tonic-gate */ 10360Sstevel@tonic-gate switch (p->cku_err.re_why) { 10370Sstevel@tonic-gate case AUTH_BADCRED: 10380Sstevel@tonic-gate case AUTH_BADVERF: 10390Sstevel@tonic-gate case AUTH_INVALIDRESP: 10400Sstevel@tonic-gate case AUTH_TOOWEAK: 10410Sstevel@tonic-gate case AUTH_FAILED: 10420Sstevel@tonic-gate case RPCSEC_GSS_NOCRED: 10430Sstevel@tonic-gate case RPCSEC_GSS_FAILED: 10440Sstevel@tonic-gate p->cku_err.re_errno = EACCES; 10450Sstevel@tonic-gate break; 10460Sstevel@tonic-gate case AUTH_REJECTEDCRED: 10470Sstevel@tonic-gate case AUTH_REJECTEDVERF: 10480Sstevel@tonic-gate default: 10490Sstevel@tonic-gate p->cku_err.re_errno = EIO; 10500Sstevel@tonic-gate break; 10510Sstevel@tonic-gate } 10520Sstevel@tonic-gate } 1053*7387SRobert.Gordon@Sun.COM DTRACE_PROBE1(krpc__e__clntrdma__callit__rpcfailed, 1054*7387SRobert.Gordon@Sun.COM int, p->cku_err.re_why); 10550Sstevel@tonic-gate } 10560Sstevel@tonic-gate } else { 10570Sstevel@tonic-gate p->cku_err.re_status = RPC_CANTDECODERES; 10580Sstevel@tonic-gate p->cku_err.re_errno = EIO; 1059*7387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__clntrdma__callit__replymsg); 10600Sstevel@tonic-gate } 10610Sstevel@tonic-gate 1062*7387SRobert.Gordon@Sun.COM done: 1063*7387SRobert.Gordon@Sun.COM clnt_return_credit(conn); 1064*7387SRobert.Gordon@Sun.COM 1065*7387SRobert.Gordon@Sun.COM if (cl_sendlist != NULL) 1066*7387SRobert.Gordon@Sun.COM clist_free(cl_sendlist); 1067*7387SRobert.Gordon@Sun.COM 10680Sstevel@tonic-gate /* 10690Sstevel@tonic-gate * If rpc reply is in a chunk, free it now. 10700Sstevel@tonic-gate */ 1071*7387SRobert.Gordon@Sun.COM if (cl_long_reply) { 1072*7387SRobert.Gordon@Sun.COM (void) clist_deregister(conn, cl_long_reply, CLIST_REG_DST); 1073*7387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &cl_long_reply->rb_longbuf); 1074*7387SRobert.Gordon@Sun.COM clist_free(cl_long_reply); 1075*7387SRobert.Gordon@Sun.COM } 10760Sstevel@tonic-gate 1077*7387SRobert.Gordon@Sun.COM if (call_xdrp) 1078*7387SRobert.Gordon@Sun.COM XDR_DESTROY(call_xdrp); 10790Sstevel@tonic-gate 1080*7387SRobert.Gordon@Sun.COM if (reply_xdrp) { 1081*7387SRobert.Gordon@Sun.COM (void) xdr_rpc_free_verifier(reply_xdrp, &reply_msg); 1082*7387SRobert.Gordon@Sun.COM XDR_DESTROY(reply_xdrp); 10830Sstevel@tonic-gate } 10840Sstevel@tonic-gate 1085*7387SRobert.Gordon@Sun.COM if (cl_rdma_reply) { 1086*7387SRobert.Gordon@Sun.COM clist_free(cl_rdma_reply); 10870Sstevel@tonic-gate } 10880Sstevel@tonic-gate 1089*7387SRobert.Gordon@Sun.COM if (cl_recvlist) { 1090*7387SRobert.Gordon@Sun.COM rdma_buf_t recvmsg = {0}; 1091*7387SRobert.Gordon@Sun.COM recvmsg.addr = (caddr_t)(uintptr_t)cl_recvlist->w.c_saddr3; 10920Sstevel@tonic-gate recvmsg.type = RECV_BUFFER; 10930Sstevel@tonic-gate RDMA_BUF_FREE(conn, &recvmsg); 1094*7387SRobert.Gordon@Sun.COM clist_free(cl_recvlist); 10950Sstevel@tonic-gate } 1096*7387SRobert.Gordon@Sun.COM 10970Sstevel@tonic-gate RDMA_REL_CONN(conn); 1098*7387SRobert.Gordon@Sun.COM 1099*7387SRobert.Gordon@Sun.COM if (try_call_again) 1100*7387SRobert.Gordon@Sun.COM goto call_again; 1101*7387SRobert.Gordon@Sun.COM 11020Sstevel@tonic-gate if (p->cku_err.re_status != RPC_SUCCESS) { 11030Sstevel@tonic-gate RCSTAT_INCR(rcbadcalls); 11040Sstevel@tonic-gate } 11050Sstevel@tonic-gate return (p->cku_err.re_status); 11060Sstevel@tonic-gate } 11070Sstevel@tonic-gate 1108*7387SRobert.Gordon@Sun.COM 1109*7387SRobert.Gordon@Sun.COM static void 1110*7387SRobert.Gordon@Sun.COM clnt_decode_long_reply(CONN *conn, 1111*7387SRobert.Gordon@Sun.COM struct clist *cl_long_reply, 1112*7387SRobert.Gordon@Sun.COM struct clist *cl_rdma_reply, XDR *xdrs, 1113*7387SRobert.Gordon@Sun.COM XDR **rxdrp, struct clist *cl, 1114*7387SRobert.Gordon@Sun.COM struct clist *cl_recvlist, 1115*7387SRobert.Gordon@Sun.COM uint_t op, uint_t off) 1116*7387SRobert.Gordon@Sun.COM { 1117*7387SRobert.Gordon@Sun.COM if (op != RDMA_NOMSG) { 1118*7387SRobert.Gordon@Sun.COM DTRACE_PROBE1(krpc__i__longrepl__rdmamsg__len, 1119*7387SRobert.Gordon@Sun.COM int, cl_recvlist->c_len - off); 1120*7387SRobert.Gordon@Sun.COM xdrrdma_create(xdrs, 1121*7387SRobert.Gordon@Sun.COM (caddr_t)(uintptr_t)(cl_recvlist->w.c_saddr3 + off), 1122*7387SRobert.Gordon@Sun.COM cl_recvlist->c_len - off, 0, cl, XDR_DECODE, conn); 1123*7387SRobert.Gordon@Sun.COM *rxdrp = xdrs; 1124*7387SRobert.Gordon@Sun.COM return; 1125*7387SRobert.Gordon@Sun.COM } 1126*7387SRobert.Gordon@Sun.COM 1127*7387SRobert.Gordon@Sun.COM /* op must be RDMA_NOMSG */ 1128*7387SRobert.Gordon@Sun.COM if (cl) { 1129*7387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__clntrdma__declongreply__serverreadlist); 1130*7387SRobert.Gordon@Sun.COM return; 1131*7387SRobert.Gordon@Sun.COM } 1132*7387SRobert.Gordon@Sun.COM 1133*7387SRobert.Gordon@Sun.COM if (cl_long_reply->u.c_daddr) { 1134*7387SRobert.Gordon@Sun.COM DTRACE_PROBE1(krpc__i__longrepl__rdmanomsg__len, 1135*7387SRobert.Gordon@Sun.COM int, cl_rdma_reply->c_len); 1136*7387SRobert.Gordon@Sun.COM 1137*7387SRobert.Gordon@Sun.COM xdrrdma_create(xdrs, (caddr_t)cl_long_reply->u.c_daddr3, 1138*7387SRobert.Gordon@Sun.COM cl_rdma_reply->c_len, 0, NULL, XDR_DECODE, conn); 1139*7387SRobert.Gordon@Sun.COM 1140*7387SRobert.Gordon@Sun.COM *rxdrp = xdrs; 1141*7387SRobert.Gordon@Sun.COM } 1142*7387SRobert.Gordon@Sun.COM } 1143*7387SRobert.Gordon@Sun.COM 1144*7387SRobert.Gordon@Sun.COM static void 1145*7387SRobert.Gordon@Sun.COM clnt_return_credit(CONN *conn) 1146*7387SRobert.Gordon@Sun.COM { 1147*7387SRobert.Gordon@Sun.COM rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc; 1148*7387SRobert.Gordon@Sun.COM 1149*7387SRobert.Gordon@Sun.COM mutex_enter(&conn->c_lock); 1150*7387SRobert.Gordon@Sun.COM cc_info->clnt_cc_in_flight_ops--; 1151*7387SRobert.Gordon@Sun.COM cv_signal(&cc_info->clnt_cc_cv); 1152*7387SRobert.Gordon@Sun.COM mutex_exit(&conn->c_lock); 1153*7387SRobert.Gordon@Sun.COM } 1154*7387SRobert.Gordon@Sun.COM 1155*7387SRobert.Gordon@Sun.COM static void 1156*7387SRobert.Gordon@Sun.COM clnt_update_credit(CONN *conn, uint32_t rdma_credit) 1157*7387SRobert.Gordon@Sun.COM { 1158*7387SRobert.Gordon@Sun.COM rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc; 1159*7387SRobert.Gordon@Sun.COM 1160*7387SRobert.Gordon@Sun.COM /* 1161*7387SRobert.Gordon@Sun.COM * If the granted has not altered, avoid taking the 1162*7387SRobert.Gordon@Sun.COM * mutex, to essentially do nothing.. 1163*7387SRobert.Gordon@Sun.COM */ 1164*7387SRobert.Gordon@Sun.COM if (cc_info->clnt_cc_granted_ops == rdma_credit) 1165*7387SRobert.Gordon@Sun.COM return; 1166*7387SRobert.Gordon@Sun.COM /* 1167*7387SRobert.Gordon@Sun.COM * Get the granted number of buffers for credit control. 1168*7387SRobert.Gordon@Sun.COM */ 1169*7387SRobert.Gordon@Sun.COM mutex_enter(&conn->c_lock); 1170*7387SRobert.Gordon@Sun.COM cc_info->clnt_cc_granted_ops = rdma_credit; 1171*7387SRobert.Gordon@Sun.COM mutex_exit(&conn->c_lock); 1172*7387SRobert.Gordon@Sun.COM } 1173*7387SRobert.Gordon@Sun.COM 1174*7387SRobert.Gordon@Sun.COM static void 1175*7387SRobert.Gordon@Sun.COM clnt_check_credit(CONN *conn) 1176*7387SRobert.Gordon@Sun.COM { 1177*7387SRobert.Gordon@Sun.COM rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc; 1178*7387SRobert.Gordon@Sun.COM 1179*7387SRobert.Gordon@Sun.COM /* 1180*7387SRobert.Gordon@Sun.COM * Make sure we are not going over our allowed buffer use 1181*7387SRobert.Gordon@Sun.COM * (and make sure we have gotten a granted value before). 1182*7387SRobert.Gordon@Sun.COM */ 1183*7387SRobert.Gordon@Sun.COM mutex_enter(&conn->c_lock); 1184*7387SRobert.Gordon@Sun.COM while (cc_info->clnt_cc_in_flight_ops >= cc_info->clnt_cc_granted_ops && 1185*7387SRobert.Gordon@Sun.COM cc_info->clnt_cc_granted_ops != 0) { 1186*7387SRobert.Gordon@Sun.COM /* 1187*7387SRobert.Gordon@Sun.COM * Client has maxed out its granted buffers due to 1188*7387SRobert.Gordon@Sun.COM * credit control. Current handling is to block and wait. 1189*7387SRobert.Gordon@Sun.COM */ 1190*7387SRobert.Gordon@Sun.COM cv_wait(&cc_info->clnt_cc_cv, &conn->c_lock); 1191*7387SRobert.Gordon@Sun.COM } 1192*7387SRobert.Gordon@Sun.COM cc_info->clnt_cc_in_flight_ops++; 1193*7387SRobert.Gordon@Sun.COM mutex_exit(&conn->c_lock); 1194*7387SRobert.Gordon@Sun.COM } 1195*7387SRobert.Gordon@Sun.COM 11960Sstevel@tonic-gate /* ARGSUSED */ 11970Sstevel@tonic-gate static void 11980Sstevel@tonic-gate clnt_rdma_kabort(CLIENT *h) 11990Sstevel@tonic-gate { 12000Sstevel@tonic-gate } 12010Sstevel@tonic-gate 12020Sstevel@tonic-gate static void 12030Sstevel@tonic-gate clnt_rdma_kerror(CLIENT *h, struct rpc_err *err) 12040Sstevel@tonic-gate { 12050Sstevel@tonic-gate struct cku_private *p = htop(h); 12060Sstevel@tonic-gate *err = p->cku_err; 12070Sstevel@tonic-gate } 12080Sstevel@tonic-gate 12090Sstevel@tonic-gate static bool_t 12100Sstevel@tonic-gate clnt_rdma_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr) 12110Sstevel@tonic-gate { 12120Sstevel@tonic-gate struct cku_private *p = htop(h); 12130Sstevel@tonic-gate XDR *xdrs; 12140Sstevel@tonic-gate 12150Sstevel@tonic-gate xdrs = &(p->cku_outxdr); 12160Sstevel@tonic-gate xdrs->x_op = XDR_FREE; 12170Sstevel@tonic-gate return ((*xdr_res)(xdrs, res_ptr)); 12180Sstevel@tonic-gate } 12190Sstevel@tonic-gate 12200Sstevel@tonic-gate /* ARGSUSED */ 12210Sstevel@tonic-gate static bool_t 12220Sstevel@tonic-gate clnt_rdma_kcontrol(CLIENT *h, int cmd, char *arg) 12230Sstevel@tonic-gate { 12240Sstevel@tonic-gate return (TRUE); 12250Sstevel@tonic-gate } 12260Sstevel@tonic-gate 12270Sstevel@tonic-gate /* ARGSUSED */ 12280Sstevel@tonic-gate static int 12290Sstevel@tonic-gate clnt_rdma_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all, 12300Sstevel@tonic-gate int minimum, void(*feedback)(int, int, caddr_t), caddr_t arg, 12310Sstevel@tonic-gate uint32_t xid) 12320Sstevel@tonic-gate { 12330Sstevel@tonic-gate RCSTAT_INCR(rctimers); 12340Sstevel@tonic-gate return (0); 12350Sstevel@tonic-gate } 12360Sstevel@tonic-gate 12370Sstevel@tonic-gate int 12380Sstevel@tonic-gate rdma_reachable(int addr_type, struct netbuf *addr, struct knetconfig **knconf) 12390Sstevel@tonic-gate { 12400Sstevel@tonic-gate rdma_registry_t *rp; 12410Sstevel@tonic-gate void *handle = NULL; 12420Sstevel@tonic-gate struct knetconfig *knc; 12430Sstevel@tonic-gate char *pf, *p; 12440Sstevel@tonic-gate rdma_stat status; 12450Sstevel@tonic-gate int error = 0; 12460Sstevel@tonic-gate 12470Sstevel@tonic-gate if (!INGLOBALZONE(curproc)) 12480Sstevel@tonic-gate return (-1); 1249*7387SRobert.Gordon@Sun.COM 12500Sstevel@tonic-gate /* 12510Sstevel@tonic-gate * modload the RDMA plugins if not already done. 12520Sstevel@tonic-gate */ 12530Sstevel@tonic-gate if (!rdma_modloaded) { 12540Sstevel@tonic-gate mutex_enter(&rdma_modload_lock); 12550Sstevel@tonic-gate if (!rdma_modloaded) { 12560Sstevel@tonic-gate error = rdma_modload(); 12570Sstevel@tonic-gate } 12580Sstevel@tonic-gate mutex_exit(&rdma_modload_lock); 12590Sstevel@tonic-gate if (error) 12600Sstevel@tonic-gate return (-1); 12610Sstevel@tonic-gate } 12620Sstevel@tonic-gate 12630Sstevel@tonic-gate if (!rdma_dev_available) 12640Sstevel@tonic-gate return (-1); 12650Sstevel@tonic-gate 12660Sstevel@tonic-gate rw_enter(&rdma_lock, RW_READER); 12670Sstevel@tonic-gate rp = rdma_mod_head; 12680Sstevel@tonic-gate while (rp != NULL) { 12690Sstevel@tonic-gate status = RDMA_REACHABLE(rp->r_mod->rdma_ops, addr_type, addr, 12700Sstevel@tonic-gate &handle); 12710Sstevel@tonic-gate if (status == RDMA_SUCCESS) { 12720Sstevel@tonic-gate knc = kmem_zalloc(sizeof (struct knetconfig), 1273*7387SRobert.Gordon@Sun.COM KM_SLEEP); 12740Sstevel@tonic-gate knc->knc_semantics = NC_TPI_RDMA; 12750Sstevel@tonic-gate pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 12760Sstevel@tonic-gate p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 12770Sstevel@tonic-gate if (addr_type == AF_INET) 12780Sstevel@tonic-gate (void) strncpy(pf, NC_INET, KNC_STRSIZE); 12790Sstevel@tonic-gate else if (addr_type == AF_INET6) 12800Sstevel@tonic-gate (void) strncpy(pf, NC_INET6, KNC_STRSIZE); 12810Sstevel@tonic-gate pf[KNC_STRSIZE - 1] = '\0'; 12820Sstevel@tonic-gate 12830Sstevel@tonic-gate (void) strncpy(p, rp->r_mod->rdma_api, KNC_STRSIZE); 12840Sstevel@tonic-gate p[KNC_STRSIZE - 1] = '\0'; 12850Sstevel@tonic-gate 12860Sstevel@tonic-gate knc->knc_protofmly = pf; 12870Sstevel@tonic-gate knc->knc_proto = p; 12880Sstevel@tonic-gate knc->knc_rdev = (dev_t)handle; 12890Sstevel@tonic-gate *knconf = knc; 12900Sstevel@tonic-gate rw_exit(&rdma_lock); 12910Sstevel@tonic-gate return (0); 12920Sstevel@tonic-gate } 12930Sstevel@tonic-gate rp = rp->r_next; 12940Sstevel@tonic-gate } 12950Sstevel@tonic-gate rw_exit(&rdma_lock); 12960Sstevel@tonic-gate return (-1); 12970Sstevel@tonic-gate } 1298*7387SRobert.Gordon@Sun.COM 1299*7387SRobert.Gordon@Sun.COM static void 1300*7387SRobert.Gordon@Sun.COM check_dereg_wlist(CONN *conn, clist *rwc) 1301*7387SRobert.Gordon@Sun.COM { 1302*7387SRobert.Gordon@Sun.COM int status; 1303*7387SRobert.Gordon@Sun.COM 1304*7387SRobert.Gordon@Sun.COM if (rwc == NULL) 1305*7387SRobert.Gordon@Sun.COM return; 1306*7387SRobert.Gordon@Sun.COM 1307*7387SRobert.Gordon@Sun.COM if (rwc->c_dmemhandle.mrc_rmr && rwc->c_len) { 1308*7387SRobert.Gordon@Sun.COM 1309*7387SRobert.Gordon@Sun.COM status = clist_deregister(conn, rwc, CLIST_REG_DST); 1310*7387SRobert.Gordon@Sun.COM 1311*7387SRobert.Gordon@Sun.COM if (status != RDMA_SUCCESS) { 1312*7387SRobert.Gordon@Sun.COM DTRACE_PROBE1(krpc__e__clntrdma__dereg_wlist, 1313*7387SRobert.Gordon@Sun.COM int, status); 1314*7387SRobert.Gordon@Sun.COM } 1315*7387SRobert.Gordon@Sun.COM } 1316*7387SRobert.Gordon@Sun.COM } 1317