10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 57387SRobert.Gordon@Sun.COM * Common Development and Distribution License (the "License"). 67387SRobert.Gordon@Sun.COM * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*11967SKaren.Rochford@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 260Sstevel@tonic-gate /* All Rights Reserved */ 270Sstevel@tonic-gate /* 280Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 290Sstevel@tonic-gate * 4.3 BSD under license from the Regents of the University of 300Sstevel@tonic-gate * California. 310Sstevel@tonic-gate */ 320Sstevel@tonic-gate 330Sstevel@tonic-gate /* 340Sstevel@tonic-gate * Server side of RPC over RDMA in the kernel. 350Sstevel@tonic-gate */ 360Sstevel@tonic-gate 370Sstevel@tonic-gate #include <sys/param.h> 380Sstevel@tonic-gate #include <sys/types.h> 390Sstevel@tonic-gate #include <sys/user.h> 400Sstevel@tonic-gate #include <sys/sysmacros.h> 410Sstevel@tonic-gate #include <sys/proc.h> 420Sstevel@tonic-gate #include <sys/file.h> 430Sstevel@tonic-gate #include <sys/errno.h> 440Sstevel@tonic-gate #include <sys/kmem.h> 450Sstevel@tonic-gate #include <sys/debug.h> 460Sstevel@tonic-gate #include <sys/systm.h> 470Sstevel@tonic-gate #include <sys/cmn_err.h> 480Sstevel@tonic-gate #include <sys/kstat.h> 490Sstevel@tonic-gate #include <sys/vtrace.h> 500Sstevel@tonic-gate #include <sys/debug.h> 510Sstevel@tonic-gate 520Sstevel@tonic-gate #include <rpc/types.h> 530Sstevel@tonic-gate #include <rpc/xdr.h> 540Sstevel@tonic-gate #include <rpc/auth.h> 550Sstevel@tonic-gate #include <rpc/clnt.h> 560Sstevel@tonic-gate #include <rpc/rpc_msg.h> 570Sstevel@tonic-gate #include <rpc/svc.h> 580Sstevel@tonic-gate #include <rpc/rpc_rdma.h> 590Sstevel@tonic-gate #include <sys/ddi.h> 600Sstevel@tonic-gate #include <sys/sunddi.h> 610Sstevel@tonic-gate 620Sstevel@tonic-gate #include <inet/common.h> 630Sstevel@tonic-gate #include <inet/ip.h> 640Sstevel@tonic-gate #include <inet/ip6.h> 650Sstevel@tonic-gate 667387SRobert.Gordon@Sun.COM #include <nfs/nfs.h> 677387SRobert.Gordon@Sun.COM #include <sys/sdt.h> 687387SRobert.Gordon@Sun.COM 697387SRobert.Gordon@Sun.COM #define SVC_RDMA_SUCCESS 0 707387SRobert.Gordon@Sun.COM #define SVC_RDMA_FAIL -1 717387SRobert.Gordon@Sun.COM 727387SRobert.Gordon@Sun.COM #define SVC_CREDIT_FACTOR (0.5) 737387SRobert.Gordon@Sun.COM 747387SRobert.Gordon@Sun.COM #define MSG_IS_RPCSEC_GSS(msg) \ 757387SRobert.Gordon@Sun.COM ((msg)->rm_reply.rp_acpt.ar_verf.oa_flavor == RPCSEC_GSS) 767387SRobert.Gordon@Sun.COM 777387SRobert.Gordon@Sun.COM 787387SRobert.Gordon@Sun.COM uint32_t rdma_bufs_granted = RDMA_BUFS_GRANT; 797387SRobert.Gordon@Sun.COM 800Sstevel@tonic-gate /* 810Sstevel@tonic-gate * RDMA transport specific data associated with SVCMASTERXPRT 820Sstevel@tonic-gate */ 830Sstevel@tonic-gate struct rdma_data { 840Sstevel@tonic-gate SVCMASTERXPRT *rd_xprt; /* back ptr to SVCMASTERXPRT */ 850Sstevel@tonic-gate struct rdma_svc_data rd_data; /* rdma data */ 860Sstevel@tonic-gate rdma_mod_t *r_mod; /* RDMA module containing ops ptr */ 870Sstevel@tonic-gate }; 880Sstevel@tonic-gate 890Sstevel@tonic-gate /* 900Sstevel@tonic-gate * Plugin connection specific data stashed away in clone SVCXPRT 910Sstevel@tonic-gate */ 920Sstevel@tonic-gate struct clone_rdma_data { 93*11967SKaren.Rochford@Sun.COM bool_t cloned; /* xprt cloned for thread processing */ 940Sstevel@tonic-gate CONN *conn; /* RDMA connection */ 950Sstevel@tonic-gate rdma_buf_t rpcbuf; /* RPC req/resp buffer */ 967387SRobert.Gordon@Sun.COM struct clist *cl_reply; /* reply chunk buffer info */ 977387SRobert.Gordon@Sun.COM struct clist *cl_wlist; /* write list clist */ 980Sstevel@tonic-gate }; 990Sstevel@tonic-gate 100*11967SKaren.Rochford@Sun.COM 1010Sstevel@tonic-gate #define MAXADDRLEN 128 /* max length for address mask */ 1020Sstevel@tonic-gate 1030Sstevel@tonic-gate /* 1040Sstevel@tonic-gate * Routines exported through ops vector. 1050Sstevel@tonic-gate */ 1060Sstevel@tonic-gate static bool_t svc_rdma_krecv(SVCXPRT *, mblk_t *, struct rpc_msg *); 1070Sstevel@tonic-gate static bool_t svc_rdma_ksend(SVCXPRT *, struct rpc_msg *); 1080Sstevel@tonic-gate static bool_t svc_rdma_kgetargs(SVCXPRT *, xdrproc_t, caddr_t); 1090Sstevel@tonic-gate static bool_t svc_rdma_kfreeargs(SVCXPRT *, xdrproc_t, caddr_t); 1100Sstevel@tonic-gate void svc_rdma_kdestroy(SVCMASTERXPRT *); 1110Sstevel@tonic-gate static int svc_rdma_kdup(struct svc_req *, caddr_t, int, 1120Sstevel@tonic-gate struct dupreq **, bool_t *); 1130Sstevel@tonic-gate static void svc_rdma_kdupdone(struct dupreq *, caddr_t, 1140Sstevel@tonic-gate void (*)(), int, int); 1150Sstevel@tonic-gate static int32_t *svc_rdma_kgetres(SVCXPRT *, int); 1160Sstevel@tonic-gate static void svc_rdma_kfreeres(SVCXPRT *); 1170Sstevel@tonic-gate static void svc_rdma_kclone_destroy(SVCXPRT *); 1180Sstevel@tonic-gate static void svc_rdma_kstart(SVCMASTERXPRT *); 1190Sstevel@tonic-gate void svc_rdma_kstop(SVCMASTERXPRT *); 120*11967SKaren.Rochford@Sun.COM static void svc_rdma_kclone_xprt(SVCXPRT *, SVCXPRT *); 1210Sstevel@tonic-gate 1227387SRobert.Gordon@Sun.COM static int svc_process_long_reply(SVCXPRT *, xdrproc_t, 1237387SRobert.Gordon@Sun.COM caddr_t, struct rpc_msg *, bool_t, int *, 1247387SRobert.Gordon@Sun.COM int *, int *, unsigned int *); 1257387SRobert.Gordon@Sun.COM 1267387SRobert.Gordon@Sun.COM static int svc_compose_rpcmsg(SVCXPRT *, CONN *, xdrproc_t, 1277387SRobert.Gordon@Sun.COM caddr_t, rdma_buf_t *, XDR **, struct rpc_msg *, 1287387SRobert.Gordon@Sun.COM bool_t, uint_t *); 1297387SRobert.Gordon@Sun.COM static bool_t rpcmsg_length(xdrproc_t, 1307387SRobert.Gordon@Sun.COM caddr_t, 1317387SRobert.Gordon@Sun.COM struct rpc_msg *, bool_t, int); 1327387SRobert.Gordon@Sun.COM 1330Sstevel@tonic-gate /* 1340Sstevel@tonic-gate * Server transport operations vector. 1350Sstevel@tonic-gate */ 1360Sstevel@tonic-gate struct svc_ops rdma_svc_ops = { 1370Sstevel@tonic-gate svc_rdma_krecv, /* Get requests */ 1380Sstevel@tonic-gate svc_rdma_kgetargs, /* Deserialize arguments */ 1390Sstevel@tonic-gate svc_rdma_ksend, /* Send reply */ 1400Sstevel@tonic-gate svc_rdma_kfreeargs, /* Free argument data space */ 1410Sstevel@tonic-gate svc_rdma_kdestroy, /* Destroy transport handle */ 1420Sstevel@tonic-gate svc_rdma_kdup, /* Check entry in dup req cache */ 1430Sstevel@tonic-gate svc_rdma_kdupdone, /* Mark entry in dup req cache as done */ 1440Sstevel@tonic-gate svc_rdma_kgetres, /* Get pointer to response buffer */ 1450Sstevel@tonic-gate svc_rdma_kfreeres, /* Destroy pre-serialized response header */ 1460Sstevel@tonic-gate svc_rdma_kclone_destroy, /* Destroy a clone xprt */ 147*11967SKaren.Rochford@Sun.COM svc_rdma_kstart, /* Tell `ready-to-receive' to rpcmod */ 148*11967SKaren.Rochford@Sun.COM svc_rdma_kclone_xprt /* Transport specific clone xprt */ 1490Sstevel@tonic-gate }; 1500Sstevel@tonic-gate 1510Sstevel@tonic-gate /* 1520Sstevel@tonic-gate * Server statistics 1530Sstevel@tonic-gate * NOTE: This structure type is duplicated in the NFS fast path. 1540Sstevel@tonic-gate */ 1550Sstevel@tonic-gate struct { 1560Sstevel@tonic-gate kstat_named_t rscalls; 1570Sstevel@tonic-gate kstat_named_t rsbadcalls; 1580Sstevel@tonic-gate kstat_named_t rsnullrecv; 1590Sstevel@tonic-gate kstat_named_t rsbadlen; 1600Sstevel@tonic-gate kstat_named_t rsxdrcall; 1610Sstevel@tonic-gate kstat_named_t rsdupchecks; 1620Sstevel@tonic-gate kstat_named_t rsdupreqs; 1630Sstevel@tonic-gate kstat_named_t rslongrpcs; 1647387SRobert.Gordon@Sun.COM kstat_named_t rstotalreplies; 1657387SRobert.Gordon@Sun.COM kstat_named_t rstotallongreplies; 1667387SRobert.Gordon@Sun.COM kstat_named_t rstotalinlinereplies; 1670Sstevel@tonic-gate } rdmarsstat = { 1680Sstevel@tonic-gate { "calls", KSTAT_DATA_UINT64 }, 1690Sstevel@tonic-gate { "badcalls", KSTAT_DATA_UINT64 }, 1700Sstevel@tonic-gate { "nullrecv", KSTAT_DATA_UINT64 }, 1710Sstevel@tonic-gate { "badlen", KSTAT_DATA_UINT64 }, 1720Sstevel@tonic-gate { "xdrcall", KSTAT_DATA_UINT64 }, 1730Sstevel@tonic-gate { "dupchecks", KSTAT_DATA_UINT64 }, 1740Sstevel@tonic-gate { "dupreqs", KSTAT_DATA_UINT64 }, 1757387SRobert.Gordon@Sun.COM { "longrpcs", KSTAT_DATA_UINT64 }, 1767387SRobert.Gordon@Sun.COM { "totalreplies", KSTAT_DATA_UINT64 }, 1777387SRobert.Gordon@Sun.COM { "totallongreplies", KSTAT_DATA_UINT64 }, 1787387SRobert.Gordon@Sun.COM { "totalinlinereplies", KSTAT_DATA_UINT64 }, 1790Sstevel@tonic-gate }; 1800Sstevel@tonic-gate 1810Sstevel@tonic-gate kstat_named_t *rdmarsstat_ptr = (kstat_named_t *)&rdmarsstat; 1820Sstevel@tonic-gate uint_t rdmarsstat_ndata = sizeof (rdmarsstat) / sizeof (kstat_named_t); 1830Sstevel@tonic-gate 1847387SRobert.Gordon@Sun.COM #define RSSTAT_INCR(x) atomic_add_64(&rdmarsstat.x.value.ui64, 1) 1850Sstevel@tonic-gate /* 1860Sstevel@tonic-gate * Create a transport record. 1870Sstevel@tonic-gate * The transport record, output buffer, and private data structure 1880Sstevel@tonic-gate * are allocated. The output buffer is serialized into using xdrmem. 1890Sstevel@tonic-gate * There is one transport record per user process which implements a 1900Sstevel@tonic-gate * set of services. 1910Sstevel@tonic-gate */ 1920Sstevel@tonic-gate /* ARGSUSED */ 1930Sstevel@tonic-gate int 1940Sstevel@tonic-gate svc_rdma_kcreate(char *netid, SVC_CALLOUT_TABLE *sct, int id, 1957387SRobert.Gordon@Sun.COM rdma_xprt_group_t *started_xprts) 1960Sstevel@tonic-gate { 1970Sstevel@tonic-gate int error; 1980Sstevel@tonic-gate SVCMASTERXPRT *xprt; 1990Sstevel@tonic-gate struct rdma_data *rd; 2000Sstevel@tonic-gate rdma_registry_t *rmod; 2010Sstevel@tonic-gate rdma_xprt_record_t *xprt_rec; 2020Sstevel@tonic-gate queue_t *q; 2030Sstevel@tonic-gate /* 2040Sstevel@tonic-gate * modload the RDMA plugins is not already done. 2050Sstevel@tonic-gate */ 2060Sstevel@tonic-gate if (!rdma_modloaded) { 2077387SRobert.Gordon@Sun.COM /*CONSTANTCONDITION*/ 2087387SRobert.Gordon@Sun.COM ASSERT(sizeof (struct clone_rdma_data) <= SVC_P2LEN); 2097387SRobert.Gordon@Sun.COM 2100Sstevel@tonic-gate mutex_enter(&rdma_modload_lock); 2110Sstevel@tonic-gate if (!rdma_modloaded) { 2120Sstevel@tonic-gate error = rdma_modload(); 2130Sstevel@tonic-gate } 2140Sstevel@tonic-gate mutex_exit(&rdma_modload_lock); 2150Sstevel@tonic-gate 2160Sstevel@tonic-gate if (error) 2170Sstevel@tonic-gate return (error); 2180Sstevel@tonic-gate } 2190Sstevel@tonic-gate 2200Sstevel@tonic-gate /* 2210Sstevel@tonic-gate * master_xprt_count is the count of master transport handles 2220Sstevel@tonic-gate * that were successfully created and are ready to recieve for 2230Sstevel@tonic-gate * RDMA based access. 2240Sstevel@tonic-gate */ 2250Sstevel@tonic-gate error = 0; 2260Sstevel@tonic-gate xprt_rec = NULL; 2270Sstevel@tonic-gate rw_enter(&rdma_lock, RW_READER); 2280Sstevel@tonic-gate if (rdma_mod_head == NULL) { 2290Sstevel@tonic-gate started_xprts->rtg_count = 0; 2300Sstevel@tonic-gate rw_exit(&rdma_lock); 2310Sstevel@tonic-gate if (rdma_dev_available) 2320Sstevel@tonic-gate return (EPROTONOSUPPORT); 2330Sstevel@tonic-gate else 2340Sstevel@tonic-gate return (ENODEV); 2350Sstevel@tonic-gate } 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate /* 2380Sstevel@tonic-gate * If we have reached here, then atleast one RDMA plugin has loaded. 2390Sstevel@tonic-gate * Create a master_xprt, make it start listenining on the device, 2400Sstevel@tonic-gate * if an error is generated, record it, we might need to shut 2410Sstevel@tonic-gate * the master_xprt. 2420Sstevel@tonic-gate * SVC_START() calls svc_rdma_kstart which calls plugin binding 2430Sstevel@tonic-gate * routines. 2440Sstevel@tonic-gate */ 2450Sstevel@tonic-gate for (rmod = rdma_mod_head; rmod != NULL; rmod = rmod->r_next) { 2460Sstevel@tonic-gate 2470Sstevel@tonic-gate /* 2480Sstevel@tonic-gate * One SVCMASTERXPRT per RDMA plugin. 2490Sstevel@tonic-gate */ 2500Sstevel@tonic-gate xprt = kmem_zalloc(sizeof (*xprt), KM_SLEEP); 2510Sstevel@tonic-gate xprt->xp_ops = &rdma_svc_ops; 2520Sstevel@tonic-gate xprt->xp_sct = sct; 2530Sstevel@tonic-gate xprt->xp_type = T_RDMA; 2540Sstevel@tonic-gate mutex_init(&xprt->xp_req_lock, NULL, MUTEX_DEFAULT, NULL); 2550Sstevel@tonic-gate mutex_init(&xprt->xp_thread_lock, NULL, MUTEX_DEFAULT, NULL); 2560Sstevel@tonic-gate xprt->xp_req_head = (mblk_t *)0; 2570Sstevel@tonic-gate xprt->xp_req_tail = (mblk_t *)0; 2580Sstevel@tonic-gate xprt->xp_threads = 0; 2590Sstevel@tonic-gate xprt->xp_detached_threads = 0; 2600Sstevel@tonic-gate 2610Sstevel@tonic-gate rd = kmem_zalloc(sizeof (*rd), KM_SLEEP); 2620Sstevel@tonic-gate xprt->xp_p2 = (caddr_t)rd; 2630Sstevel@tonic-gate rd->rd_xprt = xprt; 2640Sstevel@tonic-gate rd->r_mod = rmod->r_mod; 2650Sstevel@tonic-gate 2660Sstevel@tonic-gate q = &rd->rd_data.q; 2670Sstevel@tonic-gate xprt->xp_wq = q; 2680Sstevel@tonic-gate q->q_ptr = &rd->rd_xprt; 2690Sstevel@tonic-gate xprt->xp_netid = NULL; 2700Sstevel@tonic-gate 2710Sstevel@tonic-gate xprt->xp_addrmask.maxlen = 2720Sstevel@tonic-gate xprt->xp_addrmask.len = sizeof (struct sockaddr_in); 2730Sstevel@tonic-gate xprt->xp_addrmask.buf = 2740Sstevel@tonic-gate kmem_zalloc(xprt->xp_addrmask.len, KM_SLEEP); 2750Sstevel@tonic-gate ((struct sockaddr_in *)xprt->xp_addrmask.buf)->sin_addr.s_addr = 2760Sstevel@tonic-gate (uint32_t)~0; 2770Sstevel@tonic-gate ((struct sockaddr_in *)xprt->xp_addrmask.buf)->sin_family = 2780Sstevel@tonic-gate (ushort_t)~0; 2790Sstevel@tonic-gate 2800Sstevel@tonic-gate /* 2810Sstevel@tonic-gate * Each of the plugins will have their own Service ID 2820Sstevel@tonic-gate * to listener specific mapping, like port number for VI 2830Sstevel@tonic-gate * and service name for IB. 2840Sstevel@tonic-gate */ 2850Sstevel@tonic-gate rd->rd_data.svcid = id; 2860Sstevel@tonic-gate error = svc_xprt_register(xprt, id); 2870Sstevel@tonic-gate if (error) { 2887387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__xprt__reg); 2890Sstevel@tonic-gate goto cleanup; 2900Sstevel@tonic-gate } 2910Sstevel@tonic-gate 2920Sstevel@tonic-gate SVC_START(xprt); 2930Sstevel@tonic-gate if (!rd->rd_data.active) { 2940Sstevel@tonic-gate svc_xprt_unregister(xprt); 2950Sstevel@tonic-gate error = rd->rd_data.err_code; 2960Sstevel@tonic-gate goto cleanup; 2970Sstevel@tonic-gate } 2980Sstevel@tonic-gate 2990Sstevel@tonic-gate /* 3000Sstevel@tonic-gate * This is set only when there is atleast one or more 3010Sstevel@tonic-gate * transports successfully created. We insert the pointer 3020Sstevel@tonic-gate * to the created RDMA master xprt into a separately maintained 3030Sstevel@tonic-gate * list. This way we can easily reference it later to cleanup, 3040Sstevel@tonic-gate * when NFS kRPC service pool is going away/unregistered. 3050Sstevel@tonic-gate */ 3060Sstevel@tonic-gate started_xprts->rtg_count ++; 3070Sstevel@tonic-gate xprt_rec = kmem_alloc(sizeof (*xprt_rec), KM_SLEEP); 3080Sstevel@tonic-gate xprt_rec->rtr_xprt_ptr = xprt; 3090Sstevel@tonic-gate xprt_rec->rtr_next = started_xprts->rtg_listhead; 3100Sstevel@tonic-gate started_xprts->rtg_listhead = xprt_rec; 3110Sstevel@tonic-gate continue; 3120Sstevel@tonic-gate cleanup: 3130Sstevel@tonic-gate SVC_DESTROY(xprt); 3140Sstevel@tonic-gate if (error == RDMA_FAILED) 3150Sstevel@tonic-gate error = EPROTONOSUPPORT; 3160Sstevel@tonic-gate } 3170Sstevel@tonic-gate 3180Sstevel@tonic-gate rw_exit(&rdma_lock); 3190Sstevel@tonic-gate 3200Sstevel@tonic-gate /* 3210Sstevel@tonic-gate * Don't return any error even if a single plugin was started 3220Sstevel@tonic-gate * successfully. 3230Sstevel@tonic-gate */ 3240Sstevel@tonic-gate if (started_xprts->rtg_count == 0) 3250Sstevel@tonic-gate return (error); 3260Sstevel@tonic-gate return (0); 3270Sstevel@tonic-gate } 3280Sstevel@tonic-gate 3290Sstevel@tonic-gate /* 3300Sstevel@tonic-gate * Cleanup routine for freeing up memory allocated by 3310Sstevel@tonic-gate * svc_rdma_kcreate() 3320Sstevel@tonic-gate */ 3330Sstevel@tonic-gate void 3340Sstevel@tonic-gate svc_rdma_kdestroy(SVCMASTERXPRT *xprt) 3350Sstevel@tonic-gate { 3360Sstevel@tonic-gate struct rdma_data *rd = (struct rdma_data *)xprt->xp_p2; 3370Sstevel@tonic-gate 3380Sstevel@tonic-gate 3390Sstevel@tonic-gate mutex_destroy(&xprt->xp_req_lock); 3400Sstevel@tonic-gate mutex_destroy(&xprt->xp_thread_lock); 3410Sstevel@tonic-gate kmem_free(rd, sizeof (*rd)); 3420Sstevel@tonic-gate kmem_free(xprt->xp_addrmask.buf, xprt->xp_addrmask.maxlen); 3430Sstevel@tonic-gate kmem_free(xprt, sizeof (*xprt)); 3440Sstevel@tonic-gate } 3450Sstevel@tonic-gate 3460Sstevel@tonic-gate 3470Sstevel@tonic-gate static void 3480Sstevel@tonic-gate svc_rdma_kstart(SVCMASTERXPRT *xprt) 3490Sstevel@tonic-gate { 3500Sstevel@tonic-gate struct rdma_svc_data *svcdata; 3510Sstevel@tonic-gate rdma_mod_t *rmod; 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate svcdata = &((struct rdma_data *)xprt->xp_p2)->rd_data; 3540Sstevel@tonic-gate rmod = ((struct rdma_data *)xprt->xp_p2)->r_mod; 3550Sstevel@tonic-gate 3560Sstevel@tonic-gate /* 3570Sstevel@tonic-gate * Create a listener for module at this port 3580Sstevel@tonic-gate */ 3590Sstevel@tonic-gate 3608695SRajkumar.Sivaprakasam@Sun.COM if (rmod->rdma_count != 0) 3618695SRajkumar.Sivaprakasam@Sun.COM (*rmod->rdma_ops->rdma_svc_listen)(svcdata); 3628695SRajkumar.Sivaprakasam@Sun.COM else 3638695SRajkumar.Sivaprakasam@Sun.COM svcdata->err_code = RDMA_FAILED; 3640Sstevel@tonic-gate } 3650Sstevel@tonic-gate 3660Sstevel@tonic-gate void 3670Sstevel@tonic-gate svc_rdma_kstop(SVCMASTERXPRT *xprt) 3680Sstevel@tonic-gate { 3690Sstevel@tonic-gate struct rdma_svc_data *svcdata; 3700Sstevel@tonic-gate rdma_mod_t *rmod; 3710Sstevel@tonic-gate 3720Sstevel@tonic-gate svcdata = &((struct rdma_data *)xprt->xp_p2)->rd_data; 3730Sstevel@tonic-gate rmod = ((struct rdma_data *)xprt->xp_p2)->r_mod; 3740Sstevel@tonic-gate 3750Sstevel@tonic-gate /* 3768695SRajkumar.Sivaprakasam@Sun.COM * Call the stop listener routine for each plugin. If rdma_count is 3778695SRajkumar.Sivaprakasam@Sun.COM * already zero set active to zero. 3780Sstevel@tonic-gate */ 3798695SRajkumar.Sivaprakasam@Sun.COM if (rmod->rdma_count != 0) 3808695SRajkumar.Sivaprakasam@Sun.COM (*rmod->rdma_ops->rdma_svc_stop)(svcdata); 3818695SRajkumar.Sivaprakasam@Sun.COM else 3828695SRajkumar.Sivaprakasam@Sun.COM svcdata->active = 0; 3830Sstevel@tonic-gate if (svcdata->active) 3847387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__kstop); 3850Sstevel@tonic-gate } 3860Sstevel@tonic-gate 3870Sstevel@tonic-gate /* ARGSUSED */ 3880Sstevel@tonic-gate static void 3890Sstevel@tonic-gate svc_rdma_kclone_destroy(SVCXPRT *clone_xprt) 3900Sstevel@tonic-gate { 391*11967SKaren.Rochford@Sun.COM 392*11967SKaren.Rochford@Sun.COM struct clone_rdma_data *cdrp; 393*11967SKaren.Rochford@Sun.COM cdrp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 394*11967SKaren.Rochford@Sun.COM 395*11967SKaren.Rochford@Sun.COM /* 396*11967SKaren.Rochford@Sun.COM * Only free buffers and release connection when cloned is set. 397*11967SKaren.Rochford@Sun.COM */ 398*11967SKaren.Rochford@Sun.COM if (cdrp->cloned != TRUE) 399*11967SKaren.Rochford@Sun.COM return; 400*11967SKaren.Rochford@Sun.COM 401*11967SKaren.Rochford@Sun.COM rdma_buf_free(cdrp->conn, &cdrp->rpcbuf); 402*11967SKaren.Rochford@Sun.COM if (cdrp->cl_reply) { 403*11967SKaren.Rochford@Sun.COM clist_free(cdrp->cl_reply); 404*11967SKaren.Rochford@Sun.COM cdrp->cl_reply = NULL; 405*11967SKaren.Rochford@Sun.COM } 406*11967SKaren.Rochford@Sun.COM RDMA_REL_CONN(cdrp->conn); 407*11967SKaren.Rochford@Sun.COM 408*11967SKaren.Rochford@Sun.COM cdrp->cloned = 0; 4090Sstevel@tonic-gate } 4100Sstevel@tonic-gate 411*11967SKaren.Rochford@Sun.COM /* 412*11967SKaren.Rochford@Sun.COM * Clone the xprt specific information. It will be freed by 413*11967SKaren.Rochford@Sun.COM * SVC_CLONE_DESTROY. 414*11967SKaren.Rochford@Sun.COM */ 415*11967SKaren.Rochford@Sun.COM static void 416*11967SKaren.Rochford@Sun.COM svc_rdma_kclone_xprt(SVCXPRT *src_xprt, SVCXPRT *dst_xprt) 417*11967SKaren.Rochford@Sun.COM { 418*11967SKaren.Rochford@Sun.COM struct clone_rdma_data *srcp2; 419*11967SKaren.Rochford@Sun.COM struct clone_rdma_data *dstp2; 420*11967SKaren.Rochford@Sun.COM 421*11967SKaren.Rochford@Sun.COM srcp2 = (struct clone_rdma_data *)src_xprt->xp_p2buf; 422*11967SKaren.Rochford@Sun.COM dstp2 = (struct clone_rdma_data *)dst_xprt->xp_p2buf; 423*11967SKaren.Rochford@Sun.COM 424*11967SKaren.Rochford@Sun.COM if (srcp2->conn != NULL) { 425*11967SKaren.Rochford@Sun.COM srcp2->cloned = TRUE; 426*11967SKaren.Rochford@Sun.COM *dstp2 = *srcp2; 427*11967SKaren.Rochford@Sun.COM } 428*11967SKaren.Rochford@Sun.COM } 429*11967SKaren.Rochford@Sun.COM 430*11967SKaren.Rochford@Sun.COM 4310Sstevel@tonic-gate static bool_t 4320Sstevel@tonic-gate svc_rdma_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg) 4330Sstevel@tonic-gate { 4347387SRobert.Gordon@Sun.COM XDR *xdrs; 4357387SRobert.Gordon@Sun.COM CONN *conn; 4367387SRobert.Gordon@Sun.COM rdma_recv_data_t *rdp = (rdma_recv_data_t *)mp->b_rptr; 4377387SRobert.Gordon@Sun.COM struct clone_rdma_data *crdp; 4387387SRobert.Gordon@Sun.COM struct clist *cl = NULL; 4397387SRobert.Gordon@Sun.COM struct clist *wcl = NULL; 4407387SRobert.Gordon@Sun.COM struct clist *cllong = NULL; 4417387SRobert.Gordon@Sun.COM 4427387SRobert.Gordon@Sun.COM rdma_stat status; 4437387SRobert.Gordon@Sun.COM uint32_t vers, op, pos, xid; 4447387SRobert.Gordon@Sun.COM uint32_t rdma_credit; 4457387SRobert.Gordon@Sun.COM uint32_t wcl_total_length = 0; 4467387SRobert.Gordon@Sun.COM bool_t wwl = FALSE; 4477387SRobert.Gordon@Sun.COM 4487387SRobert.Gordon@Sun.COM crdp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 4490Sstevel@tonic-gate RSSTAT_INCR(rscalls); 4500Sstevel@tonic-gate conn = rdp->conn; 4510Sstevel@tonic-gate 4520Sstevel@tonic-gate status = rdma_svc_postrecv(conn); 4530Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 4547387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__krecv__postrecv); 4557387SRobert.Gordon@Sun.COM goto badrpc_call; 4560Sstevel@tonic-gate } 4570Sstevel@tonic-gate 4580Sstevel@tonic-gate xdrs = &clone_xprt->xp_xdrin; 4590Sstevel@tonic-gate xdrmem_create(xdrs, rdp->rpcmsg.addr, rdp->rpcmsg.len, XDR_DECODE); 4607387SRobert.Gordon@Sun.COM xid = *(uint32_t *)rdp->rpcmsg.addr; 4617387SRobert.Gordon@Sun.COM XDR_SETPOS(xdrs, sizeof (uint32_t)); 4620Sstevel@tonic-gate 4630Sstevel@tonic-gate if (! xdr_u_int(xdrs, &vers) || 4647387SRobert.Gordon@Sun.COM ! xdr_u_int(xdrs, &rdma_credit) || 4650Sstevel@tonic-gate ! xdr_u_int(xdrs, &op)) { 4667387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__krecv__uint); 4677387SRobert.Gordon@Sun.COM goto xdr_err; 4680Sstevel@tonic-gate } 4690Sstevel@tonic-gate 4707387SRobert.Gordon@Sun.COM /* Checking if the status of the recv operation was normal */ 4717387SRobert.Gordon@Sun.COM if (rdp->status != 0) { 4727387SRobert.Gordon@Sun.COM DTRACE_PROBE1(krpc__e__svcrdma__krecv__invalid__status, 4737387SRobert.Gordon@Sun.COM int, rdp->status); 4747387SRobert.Gordon@Sun.COM goto badrpc_call; 4757387SRobert.Gordon@Sun.COM } 4767387SRobert.Gordon@Sun.COM 4770Sstevel@tonic-gate if (! xdr_do_clist(xdrs, &cl)) { 4787387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__krecv__do__clist); 4797387SRobert.Gordon@Sun.COM goto xdr_err; 4800Sstevel@tonic-gate } 4810Sstevel@tonic-gate 4827387SRobert.Gordon@Sun.COM if (!xdr_decode_wlist_svc(xdrs, &wcl, &wwl, &wcl_total_length, conn)) { 4837387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__krecv__decode__wlist); 4847387SRobert.Gordon@Sun.COM if (cl) 4857387SRobert.Gordon@Sun.COM clist_free(cl); 4867387SRobert.Gordon@Sun.COM goto xdr_err; 4877387SRobert.Gordon@Sun.COM } 4887387SRobert.Gordon@Sun.COM crdp->cl_wlist = wcl; 4897387SRobert.Gordon@Sun.COM 4907387SRobert.Gordon@Sun.COM crdp->cl_reply = NULL; 4917387SRobert.Gordon@Sun.COM (void) xdr_decode_reply_wchunk(xdrs, &crdp->cl_reply); 4927387SRobert.Gordon@Sun.COM 4930Sstevel@tonic-gate /* 4940Sstevel@tonic-gate * A chunk at 0 offset indicates that the RPC call message 4950Sstevel@tonic-gate * is in a chunk. Get the RPC call message chunk. 4960Sstevel@tonic-gate */ 4970Sstevel@tonic-gate if (cl != NULL && op == RDMA_NOMSG) { 4980Sstevel@tonic-gate 4990Sstevel@tonic-gate /* Remove RPC call message chunk from chunklist */ 5000Sstevel@tonic-gate cllong = cl; 5010Sstevel@tonic-gate cl = cl->c_next; 5020Sstevel@tonic-gate cllong->c_next = NULL; 5030Sstevel@tonic-gate 5047387SRobert.Gordon@Sun.COM 5050Sstevel@tonic-gate /* Allocate and register memory for the RPC call msg chunk */ 5067387SRobert.Gordon@Sun.COM cllong->rb_longbuf.type = RDMA_LONG_BUFFER; 5077387SRobert.Gordon@Sun.COM cllong->rb_longbuf.len = cllong->c_len > LONG_REPLY_LEN ? 5087387SRobert.Gordon@Sun.COM cllong->c_len : LONG_REPLY_LEN; 5097387SRobert.Gordon@Sun.COM 5107387SRobert.Gordon@Sun.COM if (rdma_buf_alloc(conn, &cllong->rb_longbuf)) { 5110Sstevel@tonic-gate clist_free(cllong); 5127387SRobert.Gordon@Sun.COM goto cll_malloc_err; 5130Sstevel@tonic-gate } 5147387SRobert.Gordon@Sun.COM 5157387SRobert.Gordon@Sun.COM cllong->u.c_daddr3 = cllong->rb_longbuf.addr; 5167387SRobert.Gordon@Sun.COM 5177387SRobert.Gordon@Sun.COM if (cllong->u.c_daddr == NULL) { 5187387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__krecv__nomem); 5197387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &cllong->rb_longbuf); 5207387SRobert.Gordon@Sun.COM clist_free(cllong); 5217387SRobert.Gordon@Sun.COM goto cll_malloc_err; 5227387SRobert.Gordon@Sun.COM } 5237387SRobert.Gordon@Sun.COM 5247387SRobert.Gordon@Sun.COM status = clist_register(conn, cllong, CLIST_REG_DST); 5250Sstevel@tonic-gate if (status) { 5267387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__krecv__clist__reg); 5277387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &cllong->rb_longbuf); 5280Sstevel@tonic-gate clist_free(cllong); 5297387SRobert.Gordon@Sun.COM goto cll_malloc_err; 5300Sstevel@tonic-gate } 5310Sstevel@tonic-gate 5320Sstevel@tonic-gate /* 5330Sstevel@tonic-gate * Now read the RPC call message in 5340Sstevel@tonic-gate */ 5350Sstevel@tonic-gate status = RDMA_READ(conn, cllong, WAIT); 5360Sstevel@tonic-gate if (status) { 5377387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__krecv__read); 5389348SSiddheshwar.Mahesh@Sun.COM (void) clist_deregister(conn, cllong); 5397387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &cllong->rb_longbuf); 5400Sstevel@tonic-gate clist_free(cllong); 5417387SRobert.Gordon@Sun.COM goto cll_malloc_err; 5420Sstevel@tonic-gate } 5430Sstevel@tonic-gate 5447387SRobert.Gordon@Sun.COM status = clist_syncmem(conn, cllong, CLIST_REG_DST); 5459348SSiddheshwar.Mahesh@Sun.COM (void) clist_deregister(conn, cllong); 5460Sstevel@tonic-gate 5477387SRobert.Gordon@Sun.COM xdrrdma_create(xdrs, (caddr_t)(uintptr_t)cllong->u.c_daddr3, 5480Sstevel@tonic-gate cllong->c_len, 0, cl, XDR_DECODE, conn); 5490Sstevel@tonic-gate 5507387SRobert.Gordon@Sun.COM crdp->rpcbuf = cllong->rb_longbuf; 5517387SRobert.Gordon@Sun.COM crdp->rpcbuf.len = cllong->c_len; 5520Sstevel@tonic-gate clist_free(cllong); 5530Sstevel@tonic-gate RDMA_BUF_FREE(conn, &rdp->rpcmsg); 5540Sstevel@tonic-gate } else { 5550Sstevel@tonic-gate pos = XDR_GETPOS(xdrs); 5567387SRobert.Gordon@Sun.COM xdrrdma_create(xdrs, rdp->rpcmsg.addr + pos, 5577387SRobert.Gordon@Sun.COM rdp->rpcmsg.len - pos, 0, cl, XDR_DECODE, conn); 5587387SRobert.Gordon@Sun.COM crdp->rpcbuf = rdp->rpcmsg; 5590Sstevel@tonic-gate 5607387SRobert.Gordon@Sun.COM /* Use xdrrdmablk_ops to indicate there is a read chunk list */ 5617387SRobert.Gordon@Sun.COM if (cl != NULL) { 5627387SRobert.Gordon@Sun.COM int32_t flg = XDR_RDMA_RLIST_REG; 5637387SRobert.Gordon@Sun.COM 5647387SRobert.Gordon@Sun.COM XDR_CONTROL(xdrs, XDR_RDMA_SET_FLAGS, &flg); 5657387SRobert.Gordon@Sun.COM xdrs->x_ops = &xdrrdmablk_ops; 5667387SRobert.Gordon@Sun.COM } 5670Sstevel@tonic-gate } 5687387SRobert.Gordon@Sun.COM 5697387SRobert.Gordon@Sun.COM if (crdp->cl_wlist) { 5707387SRobert.Gordon@Sun.COM int32_t flg = XDR_RDMA_WLIST_REG; 5717387SRobert.Gordon@Sun.COM 5727387SRobert.Gordon@Sun.COM XDR_CONTROL(xdrs, XDR_RDMA_SET_WLIST, crdp->cl_wlist); 5737387SRobert.Gordon@Sun.COM XDR_CONTROL(xdrs, XDR_RDMA_SET_FLAGS, &flg); 5747387SRobert.Gordon@Sun.COM } 5757387SRobert.Gordon@Sun.COM 5760Sstevel@tonic-gate if (! xdr_callmsg(xdrs, msg)) { 5777387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__krecv__callmsg); 5780Sstevel@tonic-gate RSSTAT_INCR(rsxdrcall); 5797387SRobert.Gordon@Sun.COM goto callmsg_err; 5800Sstevel@tonic-gate } 5810Sstevel@tonic-gate 5820Sstevel@tonic-gate /* 5830Sstevel@tonic-gate * Point the remote transport address in the service_transport 5840Sstevel@tonic-gate * handle at the address in the request. 5850Sstevel@tonic-gate */ 5860Sstevel@tonic-gate clone_xprt->xp_rtaddr.buf = conn->c_raddr.buf; 5870Sstevel@tonic-gate clone_xprt->xp_rtaddr.len = conn->c_raddr.len; 5880Sstevel@tonic-gate clone_xprt->xp_rtaddr.maxlen = conn->c_raddr.len; 58910326SSiddheshwar.Mahesh@Sun.COM 59010326SSiddheshwar.Mahesh@Sun.COM clone_xprt->xp_lcladdr.buf = conn->c_laddr.buf; 59110326SSiddheshwar.Mahesh@Sun.COM clone_xprt->xp_lcladdr.len = conn->c_laddr.len; 59210326SSiddheshwar.Mahesh@Sun.COM clone_xprt->xp_lcladdr.maxlen = conn->c_laddr.len; 59310326SSiddheshwar.Mahesh@Sun.COM 59410326SSiddheshwar.Mahesh@Sun.COM /* 59510326SSiddheshwar.Mahesh@Sun.COM * In case of RDMA, connection management is 59610326SSiddheshwar.Mahesh@Sun.COM * entirely done in rpcib module and netid in the 59710326SSiddheshwar.Mahesh@Sun.COM * SVCMASTERXPRT is NULL. Initialize the clone netid 59810326SSiddheshwar.Mahesh@Sun.COM * from the connection. 59910326SSiddheshwar.Mahesh@Sun.COM */ 60010326SSiddheshwar.Mahesh@Sun.COM 60110326SSiddheshwar.Mahesh@Sun.COM clone_xprt->xp_netid = conn->c_netid; 60210326SSiddheshwar.Mahesh@Sun.COM 6037387SRobert.Gordon@Sun.COM clone_xprt->xp_xid = xid; 6047387SRobert.Gordon@Sun.COM crdp->conn = conn; 6050Sstevel@tonic-gate 6067387SRobert.Gordon@Sun.COM freeb(mp); 6077387SRobert.Gordon@Sun.COM 6087387SRobert.Gordon@Sun.COM return (TRUE); 6097387SRobert.Gordon@Sun.COM 6107387SRobert.Gordon@Sun.COM callmsg_err: 6117387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &crdp->rpcbuf); 6127387SRobert.Gordon@Sun.COM 6137387SRobert.Gordon@Sun.COM cll_malloc_err: 6147387SRobert.Gordon@Sun.COM if (cl) 6157387SRobert.Gordon@Sun.COM clist_free(cl); 6167387SRobert.Gordon@Sun.COM xdr_err: 6177387SRobert.Gordon@Sun.COM XDR_DESTROY(xdrs); 6187387SRobert.Gordon@Sun.COM 6197387SRobert.Gordon@Sun.COM badrpc_call: 6207387SRobert.Gordon@Sun.COM RDMA_BUF_FREE(conn, &rdp->rpcmsg); 6217387SRobert.Gordon@Sun.COM RDMA_REL_CONN(conn); 6227387SRobert.Gordon@Sun.COM freeb(mp); 6237387SRobert.Gordon@Sun.COM RSSTAT_INCR(rsbadcalls); 6247387SRobert.Gordon@Sun.COM return (FALSE); 6257387SRobert.Gordon@Sun.COM } 6267387SRobert.Gordon@Sun.COM 6277387SRobert.Gordon@Sun.COM static int 6287387SRobert.Gordon@Sun.COM svc_process_long_reply(SVCXPRT * clone_xprt, 6297387SRobert.Gordon@Sun.COM xdrproc_t xdr_results, caddr_t xdr_location, 6307387SRobert.Gordon@Sun.COM struct rpc_msg *msg, bool_t has_args, int *msglen, 6317387SRobert.Gordon@Sun.COM int *freelen, int *numchunks, unsigned int *final_len) 6327387SRobert.Gordon@Sun.COM { 6337387SRobert.Gordon@Sun.COM int status; 6347387SRobert.Gordon@Sun.COM XDR xdrslong; 6357387SRobert.Gordon@Sun.COM struct clist *wcl = NULL; 6367387SRobert.Gordon@Sun.COM int count = 0; 6377387SRobert.Gordon@Sun.COM int alloc_len; 6387387SRobert.Gordon@Sun.COM char *memp; 6397387SRobert.Gordon@Sun.COM rdma_buf_t long_rpc = {0}; 6407387SRobert.Gordon@Sun.COM struct clone_rdma_data *crdp; 6417387SRobert.Gordon@Sun.COM 6427387SRobert.Gordon@Sun.COM crdp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 6437387SRobert.Gordon@Sun.COM 6447387SRobert.Gordon@Sun.COM bzero(&xdrslong, sizeof (xdrslong)); 6457387SRobert.Gordon@Sun.COM 6467387SRobert.Gordon@Sun.COM /* Choose a size for the long rpc response */ 6477387SRobert.Gordon@Sun.COM if (MSG_IS_RPCSEC_GSS(msg)) { 6487387SRobert.Gordon@Sun.COM alloc_len = RNDUP(MAX_AUTH_BYTES + *msglen); 6497387SRobert.Gordon@Sun.COM } else { 6507387SRobert.Gordon@Sun.COM alloc_len = RNDUP(*msglen); 6517387SRobert.Gordon@Sun.COM } 6527387SRobert.Gordon@Sun.COM 6537387SRobert.Gordon@Sun.COM if (alloc_len <= 64 * 1024) { 6547387SRobert.Gordon@Sun.COM if (alloc_len > 32 * 1024) { 6557387SRobert.Gordon@Sun.COM alloc_len = 64 * 1024; 6567387SRobert.Gordon@Sun.COM } else { 6577387SRobert.Gordon@Sun.COM if (alloc_len > 16 * 1024) { 6587387SRobert.Gordon@Sun.COM alloc_len = 32 * 1024; 6597387SRobert.Gordon@Sun.COM } else { 6607387SRobert.Gordon@Sun.COM alloc_len = 16 * 1024; 6617387SRobert.Gordon@Sun.COM } 6627387SRobert.Gordon@Sun.COM } 6637387SRobert.Gordon@Sun.COM } 6647387SRobert.Gordon@Sun.COM 6657387SRobert.Gordon@Sun.COM long_rpc.type = RDMA_LONG_BUFFER; 6667387SRobert.Gordon@Sun.COM long_rpc.len = alloc_len; 6677387SRobert.Gordon@Sun.COM if (rdma_buf_alloc(crdp->conn, &long_rpc)) { 6687387SRobert.Gordon@Sun.COM return (SVC_RDMA_FAIL); 6697387SRobert.Gordon@Sun.COM } 6707387SRobert.Gordon@Sun.COM 6717387SRobert.Gordon@Sun.COM memp = long_rpc.addr; 6727387SRobert.Gordon@Sun.COM xdrmem_create(&xdrslong, memp, alloc_len, XDR_ENCODE); 6737387SRobert.Gordon@Sun.COM 6747387SRobert.Gordon@Sun.COM msg->rm_xid = clone_xprt->xp_xid; 6757387SRobert.Gordon@Sun.COM 6767387SRobert.Gordon@Sun.COM if (!(xdr_replymsg(&xdrslong, msg) && 6777387SRobert.Gordon@Sun.COM (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, &xdrslong, 6787387SRobert.Gordon@Sun.COM xdr_results, xdr_location)))) { 6797387SRobert.Gordon@Sun.COM rdma_buf_free(crdp->conn, &long_rpc); 6807387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__longrep__authwrap); 6817387SRobert.Gordon@Sun.COM return (SVC_RDMA_FAIL); 6827387SRobert.Gordon@Sun.COM } 6837387SRobert.Gordon@Sun.COM 6847387SRobert.Gordon@Sun.COM *final_len = XDR_GETPOS(&xdrslong); 6857387SRobert.Gordon@Sun.COM 6869348SSiddheshwar.Mahesh@Sun.COM DTRACE_PROBE1(krpc__i__replylen, uint_t, *final_len); 6877387SRobert.Gordon@Sun.COM *numchunks = 0; 6887387SRobert.Gordon@Sun.COM *freelen = 0; 6897387SRobert.Gordon@Sun.COM 6907387SRobert.Gordon@Sun.COM wcl = crdp->cl_reply; 6917387SRobert.Gordon@Sun.COM wcl->rb_longbuf = long_rpc; 6927387SRobert.Gordon@Sun.COM 6937387SRobert.Gordon@Sun.COM count = *final_len; 6949348SSiddheshwar.Mahesh@Sun.COM while ((wcl != NULL) && (count > 0)) { 6959348SSiddheshwar.Mahesh@Sun.COM 6967387SRobert.Gordon@Sun.COM if (wcl->c_dmemhandle.mrc_rmr == 0) 6977387SRobert.Gordon@Sun.COM break; 6980Sstevel@tonic-gate 6999348SSiddheshwar.Mahesh@Sun.COM DTRACE_PROBE2(krpc__i__write__chunks, uint32_t, count, 7009348SSiddheshwar.Mahesh@Sun.COM uint32_t, wcl->c_len); 7019348SSiddheshwar.Mahesh@Sun.COM 7027387SRobert.Gordon@Sun.COM if (wcl->c_len > count) { 7037387SRobert.Gordon@Sun.COM wcl->c_len = count; 7047387SRobert.Gordon@Sun.COM } 7057387SRobert.Gordon@Sun.COM wcl->w.c_saddr3 = (caddr_t)memp; 7067387SRobert.Gordon@Sun.COM 7077387SRobert.Gordon@Sun.COM count -= wcl->c_len; 7087387SRobert.Gordon@Sun.COM *numchunks += 1; 7099348SSiddheshwar.Mahesh@Sun.COM memp += wcl->c_len; 7109348SSiddheshwar.Mahesh@Sun.COM wcl = wcl->c_next; 7119348SSiddheshwar.Mahesh@Sun.COM } 7129348SSiddheshwar.Mahesh@Sun.COM 7139348SSiddheshwar.Mahesh@Sun.COM /* 7149348SSiddheshwar.Mahesh@Sun.COM * Make rest of the chunks 0-len 7159348SSiddheshwar.Mahesh@Sun.COM */ 7169348SSiddheshwar.Mahesh@Sun.COM while (wcl != NULL) { 7179348SSiddheshwar.Mahesh@Sun.COM if (wcl->c_dmemhandle.mrc_rmr == 0) 7187387SRobert.Gordon@Sun.COM break; 7199348SSiddheshwar.Mahesh@Sun.COM wcl->c_len = 0; 7207387SRobert.Gordon@Sun.COM wcl = wcl->c_next; 7217387SRobert.Gordon@Sun.COM } 7227387SRobert.Gordon@Sun.COM 7237387SRobert.Gordon@Sun.COM wcl = crdp->cl_reply; 7247387SRobert.Gordon@Sun.COM 7257387SRobert.Gordon@Sun.COM /* 7267387SRobert.Gordon@Sun.COM * MUST fail if there are still more data 7277387SRobert.Gordon@Sun.COM */ 7287387SRobert.Gordon@Sun.COM if (count > 0) { 7297387SRobert.Gordon@Sun.COM rdma_buf_free(crdp->conn, &long_rpc); 7307387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__longrep__dlen__clist); 7317387SRobert.Gordon@Sun.COM return (SVC_RDMA_FAIL); 7327387SRobert.Gordon@Sun.COM } 7337387SRobert.Gordon@Sun.COM 7347387SRobert.Gordon@Sun.COM if (clist_register(crdp->conn, wcl, CLIST_REG_SOURCE) != RDMA_SUCCESS) { 7357387SRobert.Gordon@Sun.COM rdma_buf_free(crdp->conn, &long_rpc); 7367387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__longrep__clistreg); 7377387SRobert.Gordon@Sun.COM return (SVC_RDMA_FAIL); 7387387SRobert.Gordon@Sun.COM } 7397387SRobert.Gordon@Sun.COM 7407387SRobert.Gordon@Sun.COM status = clist_syncmem(crdp->conn, wcl, CLIST_REG_SOURCE); 7417387SRobert.Gordon@Sun.COM 7427387SRobert.Gordon@Sun.COM if (status) { 7439348SSiddheshwar.Mahesh@Sun.COM (void) clist_deregister(crdp->conn, wcl); 7447387SRobert.Gordon@Sun.COM rdma_buf_free(crdp->conn, &long_rpc); 7457387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__longrep__syncmem); 7467387SRobert.Gordon@Sun.COM return (SVC_RDMA_FAIL); 7470Sstevel@tonic-gate } 7487387SRobert.Gordon@Sun.COM 7497387SRobert.Gordon@Sun.COM status = RDMA_WRITE(crdp->conn, wcl, WAIT); 7507387SRobert.Gordon@Sun.COM 7519348SSiddheshwar.Mahesh@Sun.COM (void) clist_deregister(crdp->conn, wcl); 7527387SRobert.Gordon@Sun.COM rdma_buf_free(crdp->conn, &wcl->rb_longbuf); 7537387SRobert.Gordon@Sun.COM 7547387SRobert.Gordon@Sun.COM if (status != RDMA_SUCCESS) { 7557387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__longrep__write); 7567387SRobert.Gordon@Sun.COM return (SVC_RDMA_FAIL); 7577387SRobert.Gordon@Sun.COM } 7587387SRobert.Gordon@Sun.COM 7597387SRobert.Gordon@Sun.COM return (SVC_RDMA_SUCCESS); 7607387SRobert.Gordon@Sun.COM } 7617387SRobert.Gordon@Sun.COM 7620Sstevel@tonic-gate 7637387SRobert.Gordon@Sun.COM static int 7647387SRobert.Gordon@Sun.COM svc_compose_rpcmsg(SVCXPRT * clone_xprt, CONN * conn, xdrproc_t xdr_results, 7657387SRobert.Gordon@Sun.COM caddr_t xdr_location, rdma_buf_t *rpcreply, XDR ** xdrs, 7667387SRobert.Gordon@Sun.COM struct rpc_msg *msg, bool_t has_args, uint_t *len) 7677387SRobert.Gordon@Sun.COM { 7687387SRobert.Gordon@Sun.COM /* 7697387SRobert.Gordon@Sun.COM * Get a pre-allocated buffer for rpc reply 7707387SRobert.Gordon@Sun.COM */ 7717387SRobert.Gordon@Sun.COM rpcreply->type = SEND_BUFFER; 7727387SRobert.Gordon@Sun.COM if (rdma_buf_alloc(conn, rpcreply)) { 7737387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__rpcmsg__reply__nofreebufs); 7747387SRobert.Gordon@Sun.COM return (SVC_RDMA_FAIL); 7757387SRobert.Gordon@Sun.COM } 7767387SRobert.Gordon@Sun.COM 7777387SRobert.Gordon@Sun.COM xdrrdma_create(*xdrs, rpcreply->addr, rpcreply->len, 7787387SRobert.Gordon@Sun.COM 0, NULL, XDR_ENCODE, conn); 7797387SRobert.Gordon@Sun.COM 7807387SRobert.Gordon@Sun.COM msg->rm_xid = clone_xprt->xp_xid; 7817387SRobert.Gordon@Sun.COM 7827387SRobert.Gordon@Sun.COM if (has_args) { 7837387SRobert.Gordon@Sun.COM if (!(xdr_replymsg(*xdrs, msg) && 7847387SRobert.Gordon@Sun.COM (!has_args || 7857387SRobert.Gordon@Sun.COM SVCAUTH_WRAP(&clone_xprt->xp_auth, *xdrs, 7867387SRobert.Gordon@Sun.COM xdr_results, xdr_location)))) { 7877387SRobert.Gordon@Sun.COM rdma_buf_free(conn, rpcreply); 7887387SRobert.Gordon@Sun.COM DTRACE_PROBE( 7897387SRobert.Gordon@Sun.COM krpc__e__svcrdma__rpcmsg__reply__authwrap1); 7907387SRobert.Gordon@Sun.COM return (SVC_RDMA_FAIL); 7917387SRobert.Gordon@Sun.COM } 7927387SRobert.Gordon@Sun.COM } else { 7937387SRobert.Gordon@Sun.COM if (!xdr_replymsg(*xdrs, msg)) { 7947387SRobert.Gordon@Sun.COM rdma_buf_free(conn, rpcreply); 7957387SRobert.Gordon@Sun.COM DTRACE_PROBE( 7967387SRobert.Gordon@Sun.COM krpc__e__svcrdma__rpcmsg__reply__authwrap2); 7977387SRobert.Gordon@Sun.COM return (SVC_RDMA_FAIL); 7987387SRobert.Gordon@Sun.COM } 7997387SRobert.Gordon@Sun.COM } 8007387SRobert.Gordon@Sun.COM 8017387SRobert.Gordon@Sun.COM *len = XDR_GETPOS(*xdrs); 8027387SRobert.Gordon@Sun.COM 8037387SRobert.Gordon@Sun.COM return (SVC_RDMA_SUCCESS); 8040Sstevel@tonic-gate } 8050Sstevel@tonic-gate 8060Sstevel@tonic-gate /* 8070Sstevel@tonic-gate * Send rpc reply. 8080Sstevel@tonic-gate */ 8090Sstevel@tonic-gate static bool_t 8107387SRobert.Gordon@Sun.COM svc_rdma_ksend(SVCXPRT * clone_xprt, struct rpc_msg *msg) 8110Sstevel@tonic-gate { 8127387SRobert.Gordon@Sun.COM XDR *xdrs_rpc = &(clone_xprt->xp_xdrout); 8137387SRobert.Gordon@Sun.COM XDR xdrs_rhdr; 8147387SRobert.Gordon@Sun.COM CONN *conn = NULL; 8157387SRobert.Gordon@Sun.COM rdma_buf_t rbuf_resp = {0}, rbuf_rpc_resp = {0}; 8167387SRobert.Gordon@Sun.COM 8177387SRobert.Gordon@Sun.COM struct clone_rdma_data *crdp; 8187387SRobert.Gordon@Sun.COM struct clist *cl_read = NULL; 8197387SRobert.Gordon@Sun.COM struct clist *cl_send = NULL; 8207387SRobert.Gordon@Sun.COM struct clist *cl_write = NULL; 8217387SRobert.Gordon@Sun.COM xdrproc_t xdr_results; /* results XDR encoding function */ 8227387SRobert.Gordon@Sun.COM caddr_t xdr_location; /* response results pointer */ 8237387SRobert.Gordon@Sun.COM 8240Sstevel@tonic-gate int retval = FALSE; 8257387SRobert.Gordon@Sun.COM int status, msglen, num_wreply_segments = 0; 8267387SRobert.Gordon@Sun.COM uint32_t rdma_credit = 0; 8277387SRobert.Gordon@Sun.COM int freelen = 0; 8287387SRobert.Gordon@Sun.COM bool_t has_args; 8297387SRobert.Gordon@Sun.COM uint_t final_resp_len, rdma_response_op, vers; 8300Sstevel@tonic-gate 8317387SRobert.Gordon@Sun.COM bzero(&xdrs_rhdr, sizeof (XDR)); 8327387SRobert.Gordon@Sun.COM crdp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 8337387SRobert.Gordon@Sun.COM conn = crdp->conn; 8340Sstevel@tonic-gate 8350Sstevel@tonic-gate /* 8360Sstevel@tonic-gate * If there is a result procedure specified in the reply message, 8370Sstevel@tonic-gate * it will be processed in the xdr_replymsg and SVCAUTH_WRAP. 8380Sstevel@tonic-gate * We need to make sure it won't be processed twice, so we null 8390Sstevel@tonic-gate * it for xdr_replymsg here. 8400Sstevel@tonic-gate */ 8410Sstevel@tonic-gate has_args = FALSE; 8420Sstevel@tonic-gate if (msg->rm_reply.rp_stat == MSG_ACCEPTED && 8430Sstevel@tonic-gate msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { 8440Sstevel@tonic-gate if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) { 8450Sstevel@tonic-gate has_args = TRUE; 8460Sstevel@tonic-gate xdr_location = msg->acpted_rply.ar_results.where; 8470Sstevel@tonic-gate msg->acpted_rply.ar_results.proc = xdr_void; 8480Sstevel@tonic-gate msg->acpted_rply.ar_results.where = NULL; 8490Sstevel@tonic-gate } 8500Sstevel@tonic-gate } 8510Sstevel@tonic-gate 8520Sstevel@tonic-gate /* 8537387SRobert.Gordon@Sun.COM * Given the limit on the inline response size (RPC_MSG_SZ), 8547387SRobert.Gordon@Sun.COM * there is a need to make a guess as to the overall size of 8557387SRobert.Gordon@Sun.COM * the response. If the resultant size is beyond the inline 8567387SRobert.Gordon@Sun.COM * size, then the server needs to use the "reply chunk list" 8577387SRobert.Gordon@Sun.COM * provided by the client (if the client provided one). An 8587387SRobert.Gordon@Sun.COM * example of this type of response would be a READDIR 8597387SRobert.Gordon@Sun.COM * response (e.g. a small directory read would fit in RPC_MSG_SZ 8607387SRobert.Gordon@Sun.COM * and that is the preference but it may not fit) 8617387SRobert.Gordon@Sun.COM * 8627387SRobert.Gordon@Sun.COM * Combine the encoded size and the size of the true results 8637387SRobert.Gordon@Sun.COM * and then make the decision about where to encode and send results. 8647387SRobert.Gordon@Sun.COM * 8657387SRobert.Gordon@Sun.COM * One important note, this calculation is ignoring the size 8667387SRobert.Gordon@Sun.COM * of the encoding of the authentication overhead. The reason 8677387SRobert.Gordon@Sun.COM * for this is rooted in the complexities of access to the 8687387SRobert.Gordon@Sun.COM * encoded size of RPCSEC_GSS related authentiation, 8697387SRobert.Gordon@Sun.COM * integrity, and privacy. 8707387SRobert.Gordon@Sun.COM * 8717387SRobert.Gordon@Sun.COM * If it turns out that the encoded authentication bumps the 8727387SRobert.Gordon@Sun.COM * response over the RPC_MSG_SZ limit, then it may need to 8737387SRobert.Gordon@Sun.COM * attempt to encode for the reply chunk list. 8747387SRobert.Gordon@Sun.COM */ 8757387SRobert.Gordon@Sun.COM 8767387SRobert.Gordon@Sun.COM /* 8777387SRobert.Gordon@Sun.COM * Calculating the "sizeof" the RPC response header and the 8787387SRobert.Gordon@Sun.COM * encoded results. 8790Sstevel@tonic-gate */ 8800Sstevel@tonic-gate msglen = xdr_sizeof(xdr_replymsg, msg); 8817387SRobert.Gordon@Sun.COM 8827387SRobert.Gordon@Sun.COM if (msglen > 0) { 8837387SRobert.Gordon@Sun.COM RSSTAT_INCR(rstotalreplies); 8847387SRobert.Gordon@Sun.COM } 8857387SRobert.Gordon@Sun.COM if (has_args) 8860Sstevel@tonic-gate msglen += xdrrdma_sizeof(xdr_results, xdr_location, 8877387SRobert.Gordon@Sun.COM rdma_minchunk, NULL, NULL); 8887387SRobert.Gordon@Sun.COM 8897387SRobert.Gordon@Sun.COM DTRACE_PROBE1(krpc__i__svcrdma__ksend__msglen, int, msglen); 8900Sstevel@tonic-gate 8917387SRobert.Gordon@Sun.COM status = SVC_RDMA_SUCCESS; 8920Sstevel@tonic-gate 8937387SRobert.Gordon@Sun.COM if (msglen < RPC_MSG_SZ) { 8940Sstevel@tonic-gate /* 8957387SRobert.Gordon@Sun.COM * Looks like the response will fit in the inline 8967387SRobert.Gordon@Sun.COM * response; let's try 8970Sstevel@tonic-gate */ 8987387SRobert.Gordon@Sun.COM RSSTAT_INCR(rstotalinlinereplies); 8997387SRobert.Gordon@Sun.COM 9007387SRobert.Gordon@Sun.COM rdma_response_op = RDMA_MSG; 9010Sstevel@tonic-gate 9027387SRobert.Gordon@Sun.COM status = svc_compose_rpcmsg(clone_xprt, conn, xdr_results, 9037387SRobert.Gordon@Sun.COM xdr_location, &rbuf_rpc_resp, &xdrs_rpc, msg, 9047387SRobert.Gordon@Sun.COM has_args, &final_resp_len); 9057387SRobert.Gordon@Sun.COM 9067387SRobert.Gordon@Sun.COM DTRACE_PROBE1(krpc__i__srdma__ksend__compose_status, 9077387SRobert.Gordon@Sun.COM int, status); 9087387SRobert.Gordon@Sun.COM DTRACE_PROBE1(krpc__i__srdma__ksend__compose_len, 9097387SRobert.Gordon@Sun.COM int, final_resp_len); 9107387SRobert.Gordon@Sun.COM 9117387SRobert.Gordon@Sun.COM if (status == SVC_RDMA_SUCCESS && crdp->cl_reply) { 9127387SRobert.Gordon@Sun.COM clist_free(crdp->cl_reply); 9137387SRobert.Gordon@Sun.COM crdp->cl_reply = NULL; 9140Sstevel@tonic-gate } 9150Sstevel@tonic-gate } 9160Sstevel@tonic-gate 9177387SRobert.Gordon@Sun.COM /* 9187387SRobert.Gordon@Sun.COM * If the encode failed (size?) or the message really is 9197387SRobert.Gordon@Sun.COM * larger than what is allowed, try the response chunk list. 9207387SRobert.Gordon@Sun.COM */ 9217387SRobert.Gordon@Sun.COM if (status != SVC_RDMA_SUCCESS || msglen >= RPC_MSG_SZ) { 9220Sstevel@tonic-gate /* 9237387SRobert.Gordon@Sun.COM * attempting to use a reply chunk list when there 9247387SRobert.Gordon@Sun.COM * isn't one won't get very far... 9250Sstevel@tonic-gate */ 9267387SRobert.Gordon@Sun.COM if (crdp->cl_reply == NULL) { 9277387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__ksend__noreplycl); 9280Sstevel@tonic-gate goto out; 9290Sstevel@tonic-gate } 9300Sstevel@tonic-gate 9317387SRobert.Gordon@Sun.COM RSSTAT_INCR(rstotallongreplies); 9327387SRobert.Gordon@Sun.COM 9337387SRobert.Gordon@Sun.COM msglen = xdr_sizeof(xdr_replymsg, msg); 9347387SRobert.Gordon@Sun.COM msglen += xdrrdma_sizeof(xdr_results, xdr_location, 0, 9357387SRobert.Gordon@Sun.COM NULL, NULL); 9367387SRobert.Gordon@Sun.COM 9377387SRobert.Gordon@Sun.COM status = svc_process_long_reply(clone_xprt, xdr_results, 9387387SRobert.Gordon@Sun.COM xdr_location, msg, has_args, &msglen, &freelen, 9397387SRobert.Gordon@Sun.COM &num_wreply_segments, &final_resp_len); 9407387SRobert.Gordon@Sun.COM 9417387SRobert.Gordon@Sun.COM DTRACE_PROBE1(krpc__i__svcrdma__ksend__longreplen, 9427387SRobert.Gordon@Sun.COM int, final_resp_len); 9437387SRobert.Gordon@Sun.COM 9447387SRobert.Gordon@Sun.COM if (status != SVC_RDMA_SUCCESS) { 9457387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__ksend__compose__failed); 9467387SRobert.Gordon@Sun.COM goto out; 9470Sstevel@tonic-gate } 9480Sstevel@tonic-gate 9497387SRobert.Gordon@Sun.COM rdma_response_op = RDMA_NOMSG; 9500Sstevel@tonic-gate } 9510Sstevel@tonic-gate 9527387SRobert.Gordon@Sun.COM DTRACE_PROBE1(krpc__i__svcrdma__ksend__rdmamsg__len, 9537387SRobert.Gordon@Sun.COM int, final_resp_len); 9540Sstevel@tonic-gate 9557387SRobert.Gordon@Sun.COM rbuf_resp.type = SEND_BUFFER; 9567387SRobert.Gordon@Sun.COM if (rdma_buf_alloc(conn, &rbuf_resp)) { 9577387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &rbuf_rpc_resp); 9587387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__ksend__nofreebufs); 9597387SRobert.Gordon@Sun.COM goto out; 9600Sstevel@tonic-gate } 9610Sstevel@tonic-gate 9627387SRobert.Gordon@Sun.COM rdma_credit = rdma_bufs_granted; 9637387SRobert.Gordon@Sun.COM 9647387SRobert.Gordon@Sun.COM vers = RPCRDMA_VERS; 9657387SRobert.Gordon@Sun.COM xdrmem_create(&xdrs_rhdr, rbuf_resp.addr, rbuf_resp.len, XDR_ENCODE); 9667387SRobert.Gordon@Sun.COM (*(uint32_t *)rbuf_resp.addr) = msg->rm_xid; 9677387SRobert.Gordon@Sun.COM /* Skip xid and set the xdr position accordingly. */ 9687387SRobert.Gordon@Sun.COM XDR_SETPOS(&xdrs_rhdr, sizeof (uint32_t)); 9697387SRobert.Gordon@Sun.COM if (!xdr_u_int(&xdrs_rhdr, &vers) || 9707387SRobert.Gordon@Sun.COM !xdr_u_int(&xdrs_rhdr, &rdma_credit) || 9717387SRobert.Gordon@Sun.COM !xdr_u_int(&xdrs_rhdr, &rdma_response_op)) { 9727387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &rbuf_rpc_resp); 9737387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &rbuf_resp); 9747387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__ksend__uint); 9750Sstevel@tonic-gate goto out; 9760Sstevel@tonic-gate } 9770Sstevel@tonic-gate 9780Sstevel@tonic-gate /* 9797387SRobert.Gordon@Sun.COM * Now XDR the read chunk list, actually always NULL 9800Sstevel@tonic-gate */ 9817387SRobert.Gordon@Sun.COM (void) xdr_encode_rlist_svc(&xdrs_rhdr, cl_read); 9820Sstevel@tonic-gate 9830Sstevel@tonic-gate /* 9847387SRobert.Gordon@Sun.COM * encode write list -- we already drove RDMA_WRITEs 9850Sstevel@tonic-gate */ 9867387SRobert.Gordon@Sun.COM cl_write = crdp->cl_wlist; 9877387SRobert.Gordon@Sun.COM if (!xdr_encode_wlist(&xdrs_rhdr, cl_write)) { 9887387SRobert.Gordon@Sun.COM DTRACE_PROBE(krpc__e__svcrdma__ksend__enc__wlist); 9897387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &rbuf_rpc_resp); 9907387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &rbuf_resp); 9910Sstevel@tonic-gate goto out; 9920Sstevel@tonic-gate } 9930Sstevel@tonic-gate 9940Sstevel@tonic-gate /* 9957387SRobert.Gordon@Sun.COM * XDR encode the RDMA_REPLY write chunk 9960Sstevel@tonic-gate */ 9977387SRobert.Gordon@Sun.COM if (!xdr_encode_reply_wchunk(&xdrs_rhdr, crdp->cl_reply, 9987387SRobert.Gordon@Sun.COM num_wreply_segments)) { 9997387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &rbuf_rpc_resp); 10007387SRobert.Gordon@Sun.COM rdma_buf_free(conn, &rbuf_resp); 10017387SRobert.Gordon@Sun.COM goto out; 10020Sstevel@tonic-gate } 10030Sstevel@tonic-gate 10047387SRobert.Gordon@Sun.COM clist_add(&cl_send, 0, XDR_GETPOS(&xdrs_rhdr), &rbuf_resp.handle, 10057387SRobert.Gordon@Sun.COM rbuf_resp.addr, NULL, NULL); 10060Sstevel@tonic-gate 10077387SRobert.Gordon@Sun.COM if (rdma_response_op == RDMA_MSG) { 10087387SRobert.Gordon@Sun.COM clist_add(&cl_send, 0, final_resp_len, &rbuf_rpc_resp.handle, 10097387SRobert.Gordon@Sun.COM rbuf_rpc_resp.addr, NULL, NULL); 10100Sstevel@tonic-gate } 10110Sstevel@tonic-gate 10127387SRobert.Gordon@Sun.COM status = RDMA_SEND(conn, cl_send, msg->rm_xid); 10137387SRobert.Gordon@Sun.COM 10147387SRobert.Gordon@Sun.COM if (status == RDMA_SUCCESS) { 10157387SRobert.Gordon@Sun.COM retval = TRUE; 10160Sstevel@tonic-gate } 10170Sstevel@tonic-gate 10187387SRobert.Gordon@Sun.COM out: 10190Sstevel@tonic-gate /* 10200Sstevel@tonic-gate * Free up sendlist chunks 10210Sstevel@tonic-gate */ 10227387SRobert.Gordon@Sun.COM if (cl_send != NULL) 10237387SRobert.Gordon@Sun.COM clist_free(cl_send); 10240Sstevel@tonic-gate 10250Sstevel@tonic-gate /* 10260Sstevel@tonic-gate * Destroy private data for xdr rdma 10270Sstevel@tonic-gate */ 10287387SRobert.Gordon@Sun.COM if (clone_xprt->xp_xdrout.x_ops != NULL) { 10297387SRobert.Gordon@Sun.COM XDR_DESTROY(&(clone_xprt->xp_xdrout)); 10307387SRobert.Gordon@Sun.COM } 10317387SRobert.Gordon@Sun.COM 10327387SRobert.Gordon@Sun.COM if (crdp->cl_reply) { 10337387SRobert.Gordon@Sun.COM clist_free(crdp->cl_reply); 10347387SRobert.Gordon@Sun.COM crdp->cl_reply = NULL; 10357387SRobert.Gordon@Sun.COM } 10360Sstevel@tonic-gate 10370Sstevel@tonic-gate /* 10380Sstevel@tonic-gate * This is completely disgusting. If public is set it is 10390Sstevel@tonic-gate * a pointer to a structure whose first field is the address 10400Sstevel@tonic-gate * of the function to free that structure and any related 10410Sstevel@tonic-gate * stuff. (see rrokfree in nfs_xdr.c). 10420Sstevel@tonic-gate */ 10437387SRobert.Gordon@Sun.COM if (xdrs_rpc->x_public) { 10440Sstevel@tonic-gate /* LINTED pointer alignment */ 10457387SRobert.Gordon@Sun.COM (**((int (**)()) xdrs_rpc->x_public)) (xdrs_rpc->x_public); 10467387SRobert.Gordon@Sun.COM } 10477387SRobert.Gordon@Sun.COM 10487387SRobert.Gordon@Sun.COM if (xdrs_rhdr.x_ops != NULL) { 10497387SRobert.Gordon@Sun.COM XDR_DESTROY(&xdrs_rhdr); 10500Sstevel@tonic-gate } 10510Sstevel@tonic-gate 10520Sstevel@tonic-gate return (retval); 10530Sstevel@tonic-gate } 10540Sstevel@tonic-gate 10550Sstevel@tonic-gate /* 10560Sstevel@tonic-gate * Deserialize arguments. 10570Sstevel@tonic-gate */ 10580Sstevel@tonic-gate static bool_t 10590Sstevel@tonic-gate svc_rdma_kgetargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, caddr_t args_ptr) 10600Sstevel@tonic-gate { 10610Sstevel@tonic-gate if ((SVCAUTH_UNWRAP(&clone_xprt->xp_auth, &clone_xprt->xp_xdrin, 10620Sstevel@tonic-gate xdr_args, args_ptr)) != TRUE) 10630Sstevel@tonic-gate return (FALSE); 10640Sstevel@tonic-gate return (TRUE); 10650Sstevel@tonic-gate } 10660Sstevel@tonic-gate 10670Sstevel@tonic-gate static bool_t 10680Sstevel@tonic-gate svc_rdma_kfreeargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args, 10690Sstevel@tonic-gate caddr_t args_ptr) 10700Sstevel@tonic-gate { 10717387SRobert.Gordon@Sun.COM struct clone_rdma_data *crdp; 10720Sstevel@tonic-gate bool_t retval; 10730Sstevel@tonic-gate 1074*11967SKaren.Rochford@Sun.COM /* 1075*11967SKaren.Rochford@Sun.COM * If the cloned bit is true, then this transport specific 1076*11967SKaren.Rochford@Sun.COM * rmda data has been duplicated into another cloned xprt. Do 1077*11967SKaren.Rochford@Sun.COM * not free, or release the connection, it is still in use. The 1078*11967SKaren.Rochford@Sun.COM * buffers will be freed and the connection released later by 1079*11967SKaren.Rochford@Sun.COM * SVC_CLONE_DESTROY(). 1080*11967SKaren.Rochford@Sun.COM */ 10817387SRobert.Gordon@Sun.COM crdp = (struct clone_rdma_data *)clone_xprt->xp_p2buf; 1082*11967SKaren.Rochford@Sun.COM if (crdp->cloned == TRUE) { 1083*11967SKaren.Rochford@Sun.COM crdp->cloned = 0; 1084*11967SKaren.Rochford@Sun.COM return (TRUE); 1085*11967SKaren.Rochford@Sun.COM } 10867387SRobert.Gordon@Sun.COM 10877387SRobert.Gordon@Sun.COM /* 10887387SRobert.Gordon@Sun.COM * Free the args if needed then XDR_DESTROY 10897387SRobert.Gordon@Sun.COM */ 10900Sstevel@tonic-gate if (args_ptr) { 10910Sstevel@tonic-gate XDR *xdrs = &clone_xprt->xp_xdrin; 10920Sstevel@tonic-gate 10930Sstevel@tonic-gate xdrs->x_op = XDR_FREE; 10940Sstevel@tonic-gate retval = (*xdr_args)(xdrs, args_ptr); 10950Sstevel@tonic-gate } 10967387SRobert.Gordon@Sun.COM 10970Sstevel@tonic-gate XDR_DESTROY(&(clone_xprt->xp_xdrin)); 10987387SRobert.Gordon@Sun.COM rdma_buf_free(crdp->conn, &crdp->rpcbuf); 10997387SRobert.Gordon@Sun.COM if (crdp->cl_reply) { 11007387SRobert.Gordon@Sun.COM clist_free(crdp->cl_reply); 11017387SRobert.Gordon@Sun.COM crdp->cl_reply = NULL; 11027387SRobert.Gordon@Sun.COM } 11037387SRobert.Gordon@Sun.COM RDMA_REL_CONN(crdp->conn); 11047387SRobert.Gordon@Sun.COM 11050Sstevel@tonic-gate return (retval); 11060Sstevel@tonic-gate } 11070Sstevel@tonic-gate 11080Sstevel@tonic-gate /* ARGSUSED */ 11090Sstevel@tonic-gate static int32_t * 11100Sstevel@tonic-gate svc_rdma_kgetres(SVCXPRT *clone_xprt, int size) 11110Sstevel@tonic-gate { 11120Sstevel@tonic-gate return (NULL); 11130Sstevel@tonic-gate } 11140Sstevel@tonic-gate 11150Sstevel@tonic-gate /* ARGSUSED */ 11160Sstevel@tonic-gate static void 11170Sstevel@tonic-gate svc_rdma_kfreeres(SVCXPRT *clone_xprt) 11180Sstevel@tonic-gate { 11190Sstevel@tonic-gate } 11200Sstevel@tonic-gate 11210Sstevel@tonic-gate /* 11220Sstevel@tonic-gate * the dup cacheing routines below provide a cache of non-failure 11230Sstevel@tonic-gate * transaction id's. rpc service routines can use this to detect 11240Sstevel@tonic-gate * retransmissions and re-send a non-failure response. 11250Sstevel@tonic-gate */ 11260Sstevel@tonic-gate 11270Sstevel@tonic-gate /* 11280Sstevel@tonic-gate * MAXDUPREQS is the number of cached items. It should be adjusted 11290Sstevel@tonic-gate * to the service load so that there is likely to be a response entry 11300Sstevel@tonic-gate * when the first retransmission comes in. 11310Sstevel@tonic-gate */ 11320Sstevel@tonic-gate #define MAXDUPREQS 1024 11330Sstevel@tonic-gate 11340Sstevel@tonic-gate /* 11350Sstevel@tonic-gate * This should be appropriately scaled to MAXDUPREQS. 11360Sstevel@tonic-gate */ 11370Sstevel@tonic-gate #define DRHASHSZ 257 11380Sstevel@tonic-gate 11390Sstevel@tonic-gate #if ((DRHASHSZ & (DRHASHSZ - 1)) == 0) 11400Sstevel@tonic-gate #define XIDHASH(xid) ((xid) & (DRHASHSZ - 1)) 11410Sstevel@tonic-gate #else 11420Sstevel@tonic-gate #define XIDHASH(xid) ((xid) % DRHASHSZ) 11430Sstevel@tonic-gate #endif 11440Sstevel@tonic-gate #define DRHASH(dr) XIDHASH((dr)->dr_xid) 11450Sstevel@tonic-gate #define REQTOXID(req) ((req)->rq_xprt->xp_xid) 11460Sstevel@tonic-gate 11470Sstevel@tonic-gate static int rdmandupreqs = 0; 11487803Sgt29601@anthrax int rdmamaxdupreqs = MAXDUPREQS; 11490Sstevel@tonic-gate static kmutex_t rdmadupreq_lock; 11500Sstevel@tonic-gate static struct dupreq *rdmadrhashtbl[DRHASHSZ]; 11510Sstevel@tonic-gate static int rdmadrhashstat[DRHASHSZ]; 11520Sstevel@tonic-gate 11530Sstevel@tonic-gate static void unhash(struct dupreq *); 11540Sstevel@tonic-gate 11550Sstevel@tonic-gate /* 11560Sstevel@tonic-gate * rdmadrmru points to the head of a circular linked list in lru order. 11570Sstevel@tonic-gate * rdmadrmru->dr_next == drlru 11580Sstevel@tonic-gate */ 11590Sstevel@tonic-gate struct dupreq *rdmadrmru; 11600Sstevel@tonic-gate 11610Sstevel@tonic-gate /* 11620Sstevel@tonic-gate * svc_rdma_kdup searches the request cache and returns 0 if the 11630Sstevel@tonic-gate * request is not found in the cache. If it is found, then it 11640Sstevel@tonic-gate * returns the state of the request (in progress or done) and 11650Sstevel@tonic-gate * the status or attributes that were part of the original reply. 11660Sstevel@tonic-gate */ 11670Sstevel@tonic-gate static int 11680Sstevel@tonic-gate svc_rdma_kdup(struct svc_req *req, caddr_t res, int size, struct dupreq **drpp, 11690Sstevel@tonic-gate bool_t *dupcachedp) 11700Sstevel@tonic-gate { 11710Sstevel@tonic-gate struct dupreq *dr; 11720Sstevel@tonic-gate uint32_t xid; 11730Sstevel@tonic-gate uint32_t drhash; 11740Sstevel@tonic-gate int status; 11750Sstevel@tonic-gate 11760Sstevel@tonic-gate xid = REQTOXID(req); 11770Sstevel@tonic-gate mutex_enter(&rdmadupreq_lock); 11780Sstevel@tonic-gate RSSTAT_INCR(rsdupchecks); 11790Sstevel@tonic-gate /* 11800Sstevel@tonic-gate * Check to see whether an entry already exists in the cache. 11810Sstevel@tonic-gate */ 11820Sstevel@tonic-gate dr = rdmadrhashtbl[XIDHASH(xid)]; 11830Sstevel@tonic-gate while (dr != NULL) { 11840Sstevel@tonic-gate if (dr->dr_xid == xid && 11850Sstevel@tonic-gate dr->dr_proc == req->rq_proc && 11860Sstevel@tonic-gate dr->dr_prog == req->rq_prog && 11870Sstevel@tonic-gate dr->dr_vers == req->rq_vers && 11880Sstevel@tonic-gate dr->dr_addr.len == req->rq_xprt->xp_rtaddr.len && 11890Sstevel@tonic-gate bcmp((caddr_t)dr->dr_addr.buf, 11900Sstevel@tonic-gate (caddr_t)req->rq_xprt->xp_rtaddr.buf, 11910Sstevel@tonic-gate dr->dr_addr.len) == 0) { 11920Sstevel@tonic-gate status = dr->dr_status; 11930Sstevel@tonic-gate if (status == DUP_DONE) { 11940Sstevel@tonic-gate bcopy(dr->dr_resp.buf, res, size); 11950Sstevel@tonic-gate if (dupcachedp != NULL) 11960Sstevel@tonic-gate *dupcachedp = (dr->dr_resfree != NULL); 11970Sstevel@tonic-gate } else { 11980Sstevel@tonic-gate dr->dr_status = DUP_INPROGRESS; 11990Sstevel@tonic-gate *drpp = dr; 12000Sstevel@tonic-gate } 12010Sstevel@tonic-gate RSSTAT_INCR(rsdupreqs); 12020Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12030Sstevel@tonic-gate return (status); 12040Sstevel@tonic-gate } 12050Sstevel@tonic-gate dr = dr->dr_chain; 12060Sstevel@tonic-gate } 12070Sstevel@tonic-gate 12080Sstevel@tonic-gate /* 12090Sstevel@tonic-gate * There wasn't an entry, either allocate a new one or recycle 12100Sstevel@tonic-gate * an old one. 12110Sstevel@tonic-gate */ 12120Sstevel@tonic-gate if (rdmandupreqs < rdmamaxdupreqs) { 12130Sstevel@tonic-gate dr = kmem_alloc(sizeof (*dr), KM_NOSLEEP); 12140Sstevel@tonic-gate if (dr == NULL) { 12150Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12160Sstevel@tonic-gate return (DUP_ERROR); 12170Sstevel@tonic-gate } 12180Sstevel@tonic-gate dr->dr_resp.buf = NULL; 12190Sstevel@tonic-gate dr->dr_resp.maxlen = 0; 12200Sstevel@tonic-gate dr->dr_addr.buf = NULL; 12210Sstevel@tonic-gate dr->dr_addr.maxlen = 0; 12220Sstevel@tonic-gate if (rdmadrmru) { 12230Sstevel@tonic-gate dr->dr_next = rdmadrmru->dr_next; 12240Sstevel@tonic-gate rdmadrmru->dr_next = dr; 12250Sstevel@tonic-gate } else { 12260Sstevel@tonic-gate dr->dr_next = dr; 12270Sstevel@tonic-gate } 12280Sstevel@tonic-gate rdmandupreqs++; 12290Sstevel@tonic-gate } else { 12300Sstevel@tonic-gate dr = rdmadrmru->dr_next; 12310Sstevel@tonic-gate while (dr->dr_status == DUP_INPROGRESS) { 12320Sstevel@tonic-gate dr = dr->dr_next; 12330Sstevel@tonic-gate if (dr == rdmadrmru->dr_next) { 12340Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12350Sstevel@tonic-gate return (DUP_ERROR); 12360Sstevel@tonic-gate } 12370Sstevel@tonic-gate } 12380Sstevel@tonic-gate unhash(dr); 12390Sstevel@tonic-gate if (dr->dr_resfree) { 12400Sstevel@tonic-gate (*dr->dr_resfree)(dr->dr_resp.buf); 12410Sstevel@tonic-gate } 12420Sstevel@tonic-gate } 12430Sstevel@tonic-gate dr->dr_resfree = NULL; 12440Sstevel@tonic-gate rdmadrmru = dr; 12450Sstevel@tonic-gate 12460Sstevel@tonic-gate dr->dr_xid = REQTOXID(req); 12470Sstevel@tonic-gate dr->dr_prog = req->rq_prog; 12480Sstevel@tonic-gate dr->dr_vers = req->rq_vers; 12490Sstevel@tonic-gate dr->dr_proc = req->rq_proc; 12500Sstevel@tonic-gate if (dr->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { 12510Sstevel@tonic-gate if (dr->dr_addr.buf != NULL) 12520Sstevel@tonic-gate kmem_free(dr->dr_addr.buf, dr->dr_addr.maxlen); 12530Sstevel@tonic-gate dr->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; 12540Sstevel@tonic-gate dr->dr_addr.buf = kmem_alloc(dr->dr_addr.maxlen, KM_NOSLEEP); 12550Sstevel@tonic-gate if (dr->dr_addr.buf == NULL) { 12560Sstevel@tonic-gate dr->dr_addr.maxlen = 0; 12570Sstevel@tonic-gate dr->dr_status = DUP_DROP; 12580Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12590Sstevel@tonic-gate return (DUP_ERROR); 12600Sstevel@tonic-gate } 12610Sstevel@tonic-gate } 12620Sstevel@tonic-gate dr->dr_addr.len = req->rq_xprt->xp_rtaddr.len; 12630Sstevel@tonic-gate bcopy(req->rq_xprt->xp_rtaddr.buf, dr->dr_addr.buf, dr->dr_addr.len); 12640Sstevel@tonic-gate if (dr->dr_resp.maxlen < size) { 12650Sstevel@tonic-gate if (dr->dr_resp.buf != NULL) 12660Sstevel@tonic-gate kmem_free(dr->dr_resp.buf, dr->dr_resp.maxlen); 12670Sstevel@tonic-gate dr->dr_resp.maxlen = (unsigned int)size; 12680Sstevel@tonic-gate dr->dr_resp.buf = kmem_alloc(size, KM_NOSLEEP); 12690Sstevel@tonic-gate if (dr->dr_resp.buf == NULL) { 12700Sstevel@tonic-gate dr->dr_resp.maxlen = 0; 12710Sstevel@tonic-gate dr->dr_status = DUP_DROP; 12720Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12730Sstevel@tonic-gate return (DUP_ERROR); 12740Sstevel@tonic-gate } 12750Sstevel@tonic-gate } 12760Sstevel@tonic-gate dr->dr_status = DUP_INPROGRESS; 12770Sstevel@tonic-gate 12780Sstevel@tonic-gate drhash = (uint32_t)DRHASH(dr); 12790Sstevel@tonic-gate dr->dr_chain = rdmadrhashtbl[drhash]; 12800Sstevel@tonic-gate rdmadrhashtbl[drhash] = dr; 12810Sstevel@tonic-gate rdmadrhashstat[drhash]++; 12820Sstevel@tonic-gate mutex_exit(&rdmadupreq_lock); 12830Sstevel@tonic-gate *drpp = dr; 12840Sstevel@tonic-gate return (DUP_NEW); 12850Sstevel@tonic-gate } 12860Sstevel@tonic-gate 12870Sstevel@tonic-gate /* 12880Sstevel@tonic-gate * svc_rdma_kdupdone marks the request done (DUP_DONE or DUP_DROP) 12890Sstevel@tonic-gate * and stores the response. 12900Sstevel@tonic-gate */ 12910Sstevel@tonic-gate static void 12920Sstevel@tonic-gate svc_rdma_kdupdone(struct dupreq *dr, caddr_t res, void (*dis_resfree)(), 12930Sstevel@tonic-gate int size, int status) 12940Sstevel@tonic-gate { 12950Sstevel@tonic-gate ASSERT(dr->dr_resfree == NULL); 12960Sstevel@tonic-gate if (status == DUP_DONE) { 12970Sstevel@tonic-gate bcopy(res, dr->dr_resp.buf, size); 12980Sstevel@tonic-gate dr->dr_resfree = dis_resfree; 12990Sstevel@tonic-gate } 13000Sstevel@tonic-gate dr->dr_status = status; 13010Sstevel@tonic-gate } 13020Sstevel@tonic-gate 13030Sstevel@tonic-gate /* 13040Sstevel@tonic-gate * This routine expects that the mutex, rdmadupreq_lock, is already held. 13050Sstevel@tonic-gate */ 13060Sstevel@tonic-gate static void 13070Sstevel@tonic-gate unhash(struct dupreq *dr) 13080Sstevel@tonic-gate { 13090Sstevel@tonic-gate struct dupreq *drt; 13100Sstevel@tonic-gate struct dupreq *drtprev = NULL; 13110Sstevel@tonic-gate uint32_t drhash; 13120Sstevel@tonic-gate 13130Sstevel@tonic-gate ASSERT(MUTEX_HELD(&rdmadupreq_lock)); 13140Sstevel@tonic-gate 13150Sstevel@tonic-gate drhash = (uint32_t)DRHASH(dr); 13160Sstevel@tonic-gate drt = rdmadrhashtbl[drhash]; 13170Sstevel@tonic-gate while (drt != NULL) { 13180Sstevel@tonic-gate if (drt == dr) { 13190Sstevel@tonic-gate rdmadrhashstat[drhash]--; 13200Sstevel@tonic-gate if (drtprev == NULL) { 13210Sstevel@tonic-gate rdmadrhashtbl[drhash] = drt->dr_chain; 13220Sstevel@tonic-gate } else { 13230Sstevel@tonic-gate drtprev->dr_chain = drt->dr_chain; 13240Sstevel@tonic-gate } 13250Sstevel@tonic-gate return; 13260Sstevel@tonic-gate } 13270Sstevel@tonic-gate drtprev = drt; 13280Sstevel@tonic-gate drt = drt->dr_chain; 13290Sstevel@tonic-gate } 13300Sstevel@tonic-gate } 13317387SRobert.Gordon@Sun.COM 13327387SRobert.Gordon@Sun.COM bool_t 13337387SRobert.Gordon@Sun.COM rdma_get_wchunk(struct svc_req *req, iovec_t *iov, struct clist *wlist) 13347387SRobert.Gordon@Sun.COM { 13357387SRobert.Gordon@Sun.COM struct clist *clist; 13367387SRobert.Gordon@Sun.COM uint32_t tlen; 13377387SRobert.Gordon@Sun.COM 13387387SRobert.Gordon@Sun.COM if (req->rq_xprt->xp_type != T_RDMA) { 13397387SRobert.Gordon@Sun.COM return (FALSE); 13407387SRobert.Gordon@Sun.COM } 13417387SRobert.Gordon@Sun.COM 13427387SRobert.Gordon@Sun.COM tlen = 0; 13437387SRobert.Gordon@Sun.COM clist = wlist; 13447387SRobert.Gordon@Sun.COM while (clist) { 13457387SRobert.Gordon@Sun.COM tlen += clist->c_len; 13467387SRobert.Gordon@Sun.COM clist = clist->c_next; 13477387SRobert.Gordon@Sun.COM } 13487387SRobert.Gordon@Sun.COM 13497387SRobert.Gordon@Sun.COM /* 13507387SRobert.Gordon@Sun.COM * set iov to addr+len of first segment of first wchunk of 13517387SRobert.Gordon@Sun.COM * wlist sent by client. krecv() already malloc'd a buffer 13527387SRobert.Gordon@Sun.COM * large enough, but registration is deferred until we write 13537387SRobert.Gordon@Sun.COM * the buffer back to (NFS) client using RDMA_WRITE. 13547387SRobert.Gordon@Sun.COM */ 13557387SRobert.Gordon@Sun.COM iov->iov_base = (caddr_t)(uintptr_t)wlist->w.c_saddr; 13567387SRobert.Gordon@Sun.COM iov->iov_len = tlen; 13577387SRobert.Gordon@Sun.COM 13587387SRobert.Gordon@Sun.COM return (TRUE); 13597387SRobert.Gordon@Sun.COM } 13609348SSiddheshwar.Mahesh@Sun.COM 13619348SSiddheshwar.Mahesh@Sun.COM /* 13629348SSiddheshwar.Mahesh@Sun.COM * routine to setup the read chunk lists 13639348SSiddheshwar.Mahesh@Sun.COM */ 13649348SSiddheshwar.Mahesh@Sun.COM 13659348SSiddheshwar.Mahesh@Sun.COM int 13669348SSiddheshwar.Mahesh@Sun.COM rdma_setup_read_chunks(struct clist *wcl, uint32_t count, int *wcl_len) 13679348SSiddheshwar.Mahesh@Sun.COM { 13689348SSiddheshwar.Mahesh@Sun.COM int data_len, avail_len; 13699348SSiddheshwar.Mahesh@Sun.COM uint_t round_len; 13709348SSiddheshwar.Mahesh@Sun.COM 13719348SSiddheshwar.Mahesh@Sun.COM data_len = avail_len = 0; 13729348SSiddheshwar.Mahesh@Sun.COM 13739348SSiddheshwar.Mahesh@Sun.COM while (wcl != NULL && count > 0) { 13749348SSiddheshwar.Mahesh@Sun.COM if (wcl->c_dmemhandle.mrc_rmr == 0) 13759348SSiddheshwar.Mahesh@Sun.COM break; 13769348SSiddheshwar.Mahesh@Sun.COM 13779348SSiddheshwar.Mahesh@Sun.COM if (wcl->c_len < count) { 13789348SSiddheshwar.Mahesh@Sun.COM data_len += wcl->c_len; 13799348SSiddheshwar.Mahesh@Sun.COM avail_len = 0; 13809348SSiddheshwar.Mahesh@Sun.COM } else { 13819348SSiddheshwar.Mahesh@Sun.COM data_len += count; 13829348SSiddheshwar.Mahesh@Sun.COM avail_len = wcl->c_len - count; 13839348SSiddheshwar.Mahesh@Sun.COM wcl->c_len = count; 13849348SSiddheshwar.Mahesh@Sun.COM } 13859348SSiddheshwar.Mahesh@Sun.COM count -= wcl->c_len; 13869348SSiddheshwar.Mahesh@Sun.COM 13879348SSiddheshwar.Mahesh@Sun.COM if (count == 0) 13889348SSiddheshwar.Mahesh@Sun.COM break; 13899348SSiddheshwar.Mahesh@Sun.COM 13909348SSiddheshwar.Mahesh@Sun.COM wcl = wcl->c_next; 13919348SSiddheshwar.Mahesh@Sun.COM } 13929348SSiddheshwar.Mahesh@Sun.COM 13939348SSiddheshwar.Mahesh@Sun.COM /* 13949348SSiddheshwar.Mahesh@Sun.COM * MUST fail if there are still more data 13959348SSiddheshwar.Mahesh@Sun.COM */ 13969348SSiddheshwar.Mahesh@Sun.COM if (count > 0) { 13979348SSiddheshwar.Mahesh@Sun.COM DTRACE_PROBE2(krpc__e__rdma_setup_read_chunks_clist_len, 13989348SSiddheshwar.Mahesh@Sun.COM int, data_len, int, count); 13999348SSiddheshwar.Mahesh@Sun.COM return (FALSE); 14009348SSiddheshwar.Mahesh@Sun.COM } 14019348SSiddheshwar.Mahesh@Sun.COM 14029348SSiddheshwar.Mahesh@Sun.COM /* 14039348SSiddheshwar.Mahesh@Sun.COM * Round up the last chunk to 4-byte boundary 14049348SSiddheshwar.Mahesh@Sun.COM */ 14059348SSiddheshwar.Mahesh@Sun.COM *wcl_len = roundup(data_len, BYTES_PER_XDR_UNIT); 14069348SSiddheshwar.Mahesh@Sun.COM round_len = *wcl_len - data_len; 14079348SSiddheshwar.Mahesh@Sun.COM 14089348SSiddheshwar.Mahesh@Sun.COM if (round_len) { 14099348SSiddheshwar.Mahesh@Sun.COM 14109348SSiddheshwar.Mahesh@Sun.COM /* 14119348SSiddheshwar.Mahesh@Sun.COM * If there is space in the current chunk, 14129348SSiddheshwar.Mahesh@Sun.COM * add the roundup to the chunk. 14139348SSiddheshwar.Mahesh@Sun.COM */ 14149348SSiddheshwar.Mahesh@Sun.COM if (avail_len >= round_len) { 14159348SSiddheshwar.Mahesh@Sun.COM wcl->c_len += round_len; 14169348SSiddheshwar.Mahesh@Sun.COM } else { 14179348SSiddheshwar.Mahesh@Sun.COM /* 14189348SSiddheshwar.Mahesh@Sun.COM * try the next one. 14199348SSiddheshwar.Mahesh@Sun.COM */ 14209348SSiddheshwar.Mahesh@Sun.COM wcl = wcl->c_next; 14219348SSiddheshwar.Mahesh@Sun.COM if ((wcl == NULL) || (wcl->c_len < round_len)) { 14229348SSiddheshwar.Mahesh@Sun.COM DTRACE_PROBE1( 14239348SSiddheshwar.Mahesh@Sun.COM krpc__e__rdma_setup_read_chunks_rndup, 14249348SSiddheshwar.Mahesh@Sun.COM int, round_len); 14259348SSiddheshwar.Mahesh@Sun.COM return (FALSE); 14269348SSiddheshwar.Mahesh@Sun.COM } 14279348SSiddheshwar.Mahesh@Sun.COM wcl->c_len = round_len; 14289348SSiddheshwar.Mahesh@Sun.COM } 14299348SSiddheshwar.Mahesh@Sun.COM } 14309348SSiddheshwar.Mahesh@Sun.COM 14319348SSiddheshwar.Mahesh@Sun.COM wcl = wcl->c_next; 14329348SSiddheshwar.Mahesh@Sun.COM 14339348SSiddheshwar.Mahesh@Sun.COM /* 14349348SSiddheshwar.Mahesh@Sun.COM * Make rest of the chunks 0-len 14359348SSiddheshwar.Mahesh@Sun.COM */ 14369348SSiddheshwar.Mahesh@Sun.COM 14379348SSiddheshwar.Mahesh@Sun.COM clist_zero_len(wcl); 14389348SSiddheshwar.Mahesh@Sun.COM 14399348SSiddheshwar.Mahesh@Sun.COM return (TRUE); 14409348SSiddheshwar.Mahesh@Sun.COM } 1441