1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #ifndef _IB_H 28*0Sstevel@tonic-gate #define _IB_H 29*0Sstevel@tonic-gate 30*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 31*0Sstevel@tonic-gate 32*0Sstevel@tonic-gate /* 33*0Sstevel@tonic-gate * ib.h, rpcib plugin interface. 34*0Sstevel@tonic-gate */ 35*0Sstevel@tonic-gate 36*0Sstevel@tonic-gate #include <sys/types.h> 37*0Sstevel@tonic-gate #include <sys/ddi.h> 38*0Sstevel@tonic-gate #include <sys/sunddi.h> 39*0Sstevel@tonic-gate #include <sys/conf.h> 40*0Sstevel@tonic-gate #include <sys/stat.h> 41*0Sstevel@tonic-gate #include <rpc/rpc.h> 42*0Sstevel@tonic-gate #include <rpc/rpc_rdma.h> 43*0Sstevel@tonic-gate #include <sys/ib/ibtl/ibti.h> 44*0Sstevel@tonic-gate 45*0Sstevel@tonic-gate #ifdef __cplusplus 46*0Sstevel@tonic-gate extern "C" { 47*0Sstevel@tonic-gate #endif 48*0Sstevel@tonic-gate 49*0Sstevel@tonic-gate #define MAX_BUFS 256 /* max no. of buffers per pool */ 50*0Sstevel@tonic-gate #define DEF_CQ_SIZE 4096 - 1 /* default CQ size */ 51*0Sstevel@tonic-gate /* 52*0Sstevel@tonic-gate * Tavor returns the next higher power of 2 53*0Sstevel@tonic-gate * CQ entries than the requested size. 54*0Sstevel@tonic-gate * For instance, if you request (2^12 - 1) 55*0Sstevel@tonic-gate * CQ entries, Tavor returns 2^12 entries. 56*0Sstevel@tonic-gate * 4K CQ entries suffice. Hence, 4096 - 1. 57*0Sstevel@tonic-gate */ 58*0Sstevel@tonic-gate #define DEF_SQ_SIZE 128 /* default SendQ size */ 59*0Sstevel@tonic-gate #define DEF_RQ_SIZE 256 /* default RecvQ size */ 60*0Sstevel@tonic-gate #define DSEG_MAX 2 61*0Sstevel@tonic-gate #define RQ_DSEG_MAX 1 /* default RQ data seg */ 62*0Sstevel@tonic-gate #define IBSRM_HB 0x8000 /* high order bit of pkey */ 63*0Sstevel@tonic-gate #define NFS_SEC_KEY0 0x6878 /* randomly selected NFS security key */ 64*0Sstevel@tonic-gate #define NFS_SEC_KEY1 0x8679 65*0Sstevel@tonic-gate 66*0Sstevel@tonic-gate /* max no. of refresh attempts on IBT_CM_CONN_STALE error */ 67*0Sstevel@tonic-gate #define REFRESH_ATTEMPTS 3 68*0Sstevel@tonic-gate 69*0Sstevel@tonic-gate typedef struct rib_hca_s rib_hca_t; 70*0Sstevel@tonic-gate typedef struct rib_qp_s rib_qp_t; 71*0Sstevel@tonic-gate typedef struct rib_cq_s rib_cq_t; 72*0Sstevel@tonic-gate 73*0Sstevel@tonic-gate /* 74*0Sstevel@tonic-gate * Notification for RDMA_DONE is based on xid 75*0Sstevel@tonic-gate */ 76*0Sstevel@tonic-gate struct rdma_done_list { 77*0Sstevel@tonic-gate uint32_t xid; /* XID waiting for RDMA_DONE */ 78*0Sstevel@tonic-gate kcondvar_t rdma_done_cv; /* cv for RDMA_DONE */ 79*0Sstevel@tonic-gate struct rdma_done_list *next; 80*0Sstevel@tonic-gate struct rdma_done_list *prev; 81*0Sstevel@tonic-gate }; 82*0Sstevel@tonic-gate 83*0Sstevel@tonic-gate /* 84*0Sstevel@tonic-gate * State of the plugin. 85*0Sstevel@tonic-gate * ACCEPT = accepting new connections and requests 86*0Sstevel@tonic-gate * NO_ACCEPT = not accepting new connection and requests 87*0Sstevel@tonic-gate */ 88*0Sstevel@tonic-gate #define ACCEPT 1 89*0Sstevel@tonic-gate #define NO_ACCEPT 2 90*0Sstevel@tonic-gate 91*0Sstevel@tonic-gate /* 92*0Sstevel@tonic-gate * Send Wait states 93*0Sstevel@tonic-gate */ 94*0Sstevel@tonic-gate #define SEND_WAIT -1 95*0Sstevel@tonic-gate 96*0Sstevel@tonic-gate /* 97*0Sstevel@tonic-gate * Reply states 98*0Sstevel@tonic-gate */ 99*0Sstevel@tonic-gate #define REPLY_WAIT -1 100*0Sstevel@tonic-gate 101*0Sstevel@tonic-gate typedef void * rib_pvoid; 102*0Sstevel@tonic-gate typedef rib_pvoid RIB_SYNCMEM_HANDLE; 103*0Sstevel@tonic-gate 104*0Sstevel@tonic-gate /* 105*0Sstevel@tonic-gate * IB buffer pool management structure 106*0Sstevel@tonic-gate */ 107*0Sstevel@tonic-gate 108*0Sstevel@tonic-gate /* 109*0Sstevel@tonic-gate * Buffer pool info 110*0Sstevel@tonic-gate */ 111*0Sstevel@tonic-gate typedef struct { 112*0Sstevel@tonic-gate kmutex_t buflock; /* lock for this structure */ 113*0Sstevel@tonic-gate caddr_t buf; /* pool address */ 114*0Sstevel@tonic-gate uint32_t bufhandle; /* rkey for this pool */ 115*0Sstevel@tonic-gate ulong_t bufsize; /* size of pool */ 116*0Sstevel@tonic-gate int rsize; /* size of each element */ 117*0Sstevel@tonic-gate int numelems; /* no. of elements allocated */ 118*0Sstevel@tonic-gate int buffree; /* no. of free elements */ 119*0Sstevel@tonic-gate void *buflist[1]; /* free elements in pool */ 120*0Sstevel@tonic-gate } bufpool_t; 121*0Sstevel@tonic-gate 122*0Sstevel@tonic-gate typedef struct { 123*0Sstevel@tonic-gate bufpool_t *bpool; 124*0Sstevel@tonic-gate ibt_mr_hdl_t *mr_hdl; 125*0Sstevel@tonic-gate ibt_mr_desc_t *mr_desc; /* vaddr, lkey, rkey */ 126*0Sstevel@tonic-gate } rib_bufpool_t; 127*0Sstevel@tonic-gate 128*0Sstevel@tonic-gate /* 129*0Sstevel@tonic-gate * ATS relsted defines and structures. 130*0Sstevel@tonic-gate */ 131*0Sstevel@tonic-gate #define ATS_AR_DATA_LEN 16 132*0Sstevel@tonic-gate #define IBD_NAME "ibd" 133*0Sstevel@tonic-gate #define N_IBD_INSTANCES 4 134*0Sstevel@tonic-gate 135*0Sstevel@tonic-gate typedef struct rpcib_ats_s { 136*0Sstevel@tonic-gate int ras_inst; 137*0Sstevel@tonic-gate ib_pkey_t ras_pkey; 138*0Sstevel@tonic-gate ib_gid_t ras_port_gid; 139*0Sstevel@tonic-gate sa_family_t ras_inet_type; 140*0Sstevel@tonic-gate union { 141*0Sstevel@tonic-gate struct sockaddr_in ras_sockaddr; 142*0Sstevel@tonic-gate struct sockaddr_in6 ras_sockaddr6; 143*0Sstevel@tonic-gate } ra_sin; 144*0Sstevel@tonic-gate #define ras_sin ra_sin.ras_sockaddr 145*0Sstevel@tonic-gate #define ras_sin6 ra_sin.ras_sockaddr6 146*0Sstevel@tonic-gate } rpcib_ats_t; 147*0Sstevel@tonic-gate 148*0Sstevel@tonic-gate typedef struct rpcib_ibd_insts_s { 149*0Sstevel@tonic-gate int rib_ibd_alloc; 150*0Sstevel@tonic-gate int rib_ibd_cnt; 151*0Sstevel@tonic-gate rpcib_ats_t *rib_ats; 152*0Sstevel@tonic-gate } rpcib_ibd_insts_t; 153*0Sstevel@tonic-gate 154*0Sstevel@tonic-gate /* 155*0Sstevel@tonic-gate * Service types supported by RPCIB 156*0Sstevel@tonic-gate * For now only NFS is supported. 157*0Sstevel@tonic-gate */ 158*0Sstevel@tonic-gate #define NFS 1 159*0Sstevel@tonic-gate #define NLM 2 160*0Sstevel@tonic-gate 161*0Sstevel@tonic-gate /* 162*0Sstevel@tonic-gate * Tracks consumer state (client or server). 163*0Sstevel@tonic-gate */ 164*0Sstevel@tonic-gate typedef enum { 165*0Sstevel@tonic-gate RIB_SERVER, 166*0Sstevel@tonic-gate RIB_CLIENT 167*0Sstevel@tonic-gate } rib_mode_t; 168*0Sstevel@tonic-gate 169*0Sstevel@tonic-gate /* 170*0Sstevel@tonic-gate * CQ structure 171*0Sstevel@tonic-gate */ 172*0Sstevel@tonic-gate struct rib_cq_s { 173*0Sstevel@tonic-gate rib_hca_t *rib_hca; 174*0Sstevel@tonic-gate ibt_cq_hdl_t rib_cq_hdl; 175*0Sstevel@tonic-gate }; 176*0Sstevel@tonic-gate 177*0Sstevel@tonic-gate /* 178*0Sstevel@tonic-gate * RPCIB plugin state 179*0Sstevel@tonic-gate */ 180*0Sstevel@tonic-gate typedef struct rpcib_state { 181*0Sstevel@tonic-gate ibt_clnt_hdl_t ibt_clnt_hdl; 182*0Sstevel@tonic-gate uint32_t hca_count; 183*0Sstevel@tonic-gate uint32_t nhca_inited; 184*0Sstevel@tonic-gate ib_guid_t *hca_guids; 185*0Sstevel@tonic-gate rib_hca_t *hcas; 186*0Sstevel@tonic-gate int refcount; 187*0Sstevel@tonic-gate kmutex_t open_hca_lock; 188*0Sstevel@tonic-gate rib_hca_t *hca; /* the hca being used */ 189*0Sstevel@tonic-gate queue_t *q; /* up queue for a serv_type */ 190*0Sstevel@tonic-gate uint32_t service_type; /* NFS, NLM, etc */ 191*0Sstevel@tonic-gate void *private; 192*0Sstevel@tonic-gate } rpcib_state_t; 193*0Sstevel@tonic-gate 194*0Sstevel@tonic-gate /* 195*0Sstevel@tonic-gate * Each registered service's data structure. 196*0Sstevel@tonic-gate * Each HCA has a list of these structures, which are the registered 197*0Sstevel@tonic-gate * services on this HCA. 198*0Sstevel@tonic-gate */ 199*0Sstevel@tonic-gate typedef struct rib_service rib_service_t; 200*0Sstevel@tonic-gate struct rib_service { 201*0Sstevel@tonic-gate uint32_t srv_type; /* i.e, NFS, NLM, v4CBD */ 202*0Sstevel@tonic-gate 203*0Sstevel@tonic-gate /* 204*0Sstevel@tonic-gate * service name, i.e, <IP>::NFS or <IP>::NLM. Since 205*0Sstevel@tonic-gate * each type of service can be registered with many 206*0Sstevel@tonic-gate * IP addrs(srv_name) and is running on all ports 207*0Sstevel@tonic-gate * for all HCAs. 208*0Sstevel@tonic-gate */ 209*0Sstevel@tonic-gate char *srv_name; 210*0Sstevel@tonic-gate 211*0Sstevel@tonic-gate uint32_t srv_port; /* port on which registered */ 212*0Sstevel@tonic-gate ib_svc_id_t srv_id; /* from ibt_register call */ 213*0Sstevel@tonic-gate ibt_srv_hdl_t srv_hdl; /* from ibt_register call */ 214*0Sstevel@tonic-gate ibt_sbind_hdl_t *srv_sbind_hdl; /* from ibt_bind call */ 215*0Sstevel@tonic-gate ibt_ar_t srv_ar; 216*0Sstevel@tonic-gate 217*0Sstevel@tonic-gate /* 218*0Sstevel@tonic-gate * pointer to the next service registered on this 219*0Sstevel@tonic-gate * particular HCA 220*0Sstevel@tonic-gate */ 221*0Sstevel@tonic-gate rib_service_t *srv_next; 222*0Sstevel@tonic-gate }; 223*0Sstevel@tonic-gate 224*0Sstevel@tonic-gate /* 225*0Sstevel@tonic-gate * Connection lists 226*0Sstevel@tonic-gate */ 227*0Sstevel@tonic-gate typedef struct { 228*0Sstevel@tonic-gate krwlock_t conn_lock; /* list lock */ 229*0Sstevel@tonic-gate CONN *conn_hd; /* list head */ 230*0Sstevel@tonic-gate } rib_conn_list_t; 231*0Sstevel@tonic-gate 232*0Sstevel@tonic-gate enum hca_state { 233*0Sstevel@tonic-gate HCA_INITED, /* hca in up and running state */ 234*0Sstevel@tonic-gate HCA_DETACHED /* hca in detached state */ 235*0Sstevel@tonic-gate }; 236*0Sstevel@tonic-gate 237*0Sstevel@tonic-gate /* 238*0Sstevel@tonic-gate * RPCIB per HCA structure 239*0Sstevel@tonic-gate */ 240*0Sstevel@tonic-gate struct rib_hca_s { 241*0Sstevel@tonic-gate ibt_clnt_hdl_t ibt_clnt_hdl; 242*0Sstevel@tonic-gate 243*0Sstevel@tonic-gate /* 244*0Sstevel@tonic-gate * per HCA. 245*0Sstevel@tonic-gate */ 246*0Sstevel@tonic-gate ibt_hca_hdl_t hca_hdl; /* HCA handle */ 247*0Sstevel@tonic-gate ibt_hca_attr_t hca_attrs; /* HCA attributes */ 248*0Sstevel@tonic-gate ibt_pd_hdl_t pd_hdl; 249*0Sstevel@tonic-gate ib_guid_t hca_guid; 250*0Sstevel@tonic-gate uint32_t hca_nports; 251*0Sstevel@tonic-gate ibt_hca_portinfo_t *hca_ports; 252*0Sstevel@tonic-gate size_t hca_pinfosz; 253*0Sstevel@tonic-gate enum hca_state state; /* state of HCA */ 254*0Sstevel@tonic-gate krwlock_t state_lock; /* protects state field */ 255*0Sstevel@tonic-gate bool_t inuse; /* indicates HCA usage */ 256*0Sstevel@tonic-gate kmutex_t inuse_lock; /* protects inuse field */ 257*0Sstevel@tonic-gate /* 258*0Sstevel@tonic-gate * List of services registered on all ports available 259*0Sstevel@tonic-gate * on this HCA. Only one consumer of KRPC can register 260*0Sstevel@tonic-gate * its services at one time or tear them down at one 261*0Sstevel@tonic-gate * time. 262*0Sstevel@tonic-gate */ 263*0Sstevel@tonic-gate rib_service_t *service_list; 264*0Sstevel@tonic-gate krwlock_t service_list_lock; 265*0Sstevel@tonic-gate 266*0Sstevel@tonic-gate rib_service_t *ats_list; /* Service list for ATS */ 267*0Sstevel@tonic-gate 268*0Sstevel@tonic-gate rib_conn_list_t cl_conn_list; /* client conn list */ 269*0Sstevel@tonic-gate rib_conn_list_t srv_conn_list; /* server conn list */ 270*0Sstevel@tonic-gate 271*0Sstevel@tonic-gate rib_cq_t *clnt_scq; 272*0Sstevel@tonic-gate rib_cq_t *clnt_rcq; 273*0Sstevel@tonic-gate rib_cq_t *svc_scq; 274*0Sstevel@tonic-gate rib_cq_t *svc_rcq; 275*0Sstevel@tonic-gate kmutex_t cb_lock; 276*0Sstevel@tonic-gate kcondvar_t cb_cv; 277*0Sstevel@tonic-gate 278*0Sstevel@tonic-gate rib_bufpool_t *recv_pool; /* recv buf pool */ 279*0Sstevel@tonic-gate rib_bufpool_t *send_pool; /* send buf pool */ 280*0Sstevel@tonic-gate 281*0Sstevel@tonic-gate void *iblock; /* interrupt cookie */ 282*0Sstevel@tonic-gate }; 283*0Sstevel@tonic-gate 284*0Sstevel@tonic-gate 285*0Sstevel@tonic-gate /* 286*0Sstevel@tonic-gate * Structure on wait state of a post send 287*0Sstevel@tonic-gate */ 288*0Sstevel@tonic-gate struct send_wid { 289*0Sstevel@tonic-gate uint32_t xid; 290*0Sstevel@tonic-gate int cv_sig; 291*0Sstevel@tonic-gate kmutex_t sendwait_lock; 292*0Sstevel@tonic-gate kcondvar_t wait_cv; 293*0Sstevel@tonic-gate uint_t status; 294*0Sstevel@tonic-gate rib_qp_t *qp; 295*0Sstevel@tonic-gate int nsbufs; /* # of send buffers posted */ 296*0Sstevel@tonic-gate uint64_t sbufaddr[DSEG_MAX]; /* posted send buffers */ 297*0Sstevel@tonic-gate }; 298*0Sstevel@tonic-gate 299*0Sstevel@tonic-gate /* 300*0Sstevel@tonic-gate * Structure on reply descriptor for recv queue. 301*0Sstevel@tonic-gate * Different from the above posting of a descriptor. 302*0Sstevel@tonic-gate */ 303*0Sstevel@tonic-gate struct reply { 304*0Sstevel@tonic-gate uint32_t xid; 305*0Sstevel@tonic-gate uint_t status; 306*0Sstevel@tonic-gate uint64_t vaddr_cq; /* buf addr from CQ */ 307*0Sstevel@tonic-gate uint_t bytes_xfer; 308*0Sstevel@tonic-gate kcondvar_t wait_cv; 309*0Sstevel@tonic-gate struct reply *next; 310*0Sstevel@tonic-gate struct reply *prev; 311*0Sstevel@tonic-gate }; 312*0Sstevel@tonic-gate 313*0Sstevel@tonic-gate struct svc_recv { 314*0Sstevel@tonic-gate rib_qp_t *qp; 315*0Sstevel@tonic-gate uint64_t vaddr; 316*0Sstevel@tonic-gate uint_t bytes_xfer; 317*0Sstevel@tonic-gate }; 318*0Sstevel@tonic-gate 319*0Sstevel@tonic-gate struct recv_wid { 320*0Sstevel@tonic-gate uint32_t xid; 321*0Sstevel@tonic-gate rib_qp_t *qp; 322*0Sstevel@tonic-gate uint64_t addr; /* posted buf addr */ 323*0Sstevel@tonic-gate }; 324*0Sstevel@tonic-gate 325*0Sstevel@tonic-gate /* 326*0Sstevel@tonic-gate * Per QP data structure 327*0Sstevel@tonic-gate */ 328*0Sstevel@tonic-gate struct rib_qp_s { 329*0Sstevel@tonic-gate rib_hca_t *hca; 330*0Sstevel@tonic-gate rib_mode_t mode; /* RIB_SERVER or RIB_CLIENT */ 331*0Sstevel@tonic-gate CONN rdmaconn; 332*0Sstevel@tonic-gate ibt_channel_hdl_t qp_hdl; 333*0Sstevel@tonic-gate uint_t port_num; 334*0Sstevel@tonic-gate ib_qpn_t qpn; 335*0Sstevel@tonic-gate int chan_flags; 336*0Sstevel@tonic-gate clock_t timeout; 337*0Sstevel@tonic-gate ibt_rc_chan_query_attr_t qp_q_attrs; 338*0Sstevel@tonic-gate rib_cq_t *send_cq; /* send CQ */ 339*0Sstevel@tonic-gate rib_cq_t *recv_cq; /* recv CQ */ 340*0Sstevel@tonic-gate 341*0Sstevel@tonic-gate /* 342*0Sstevel@tonic-gate * Number of pre-posted rbufs 343*0Sstevel@tonic-gate */ 344*0Sstevel@tonic-gate uint_t n_posted_rbufs; 345*0Sstevel@tonic-gate kcondvar_t posted_rbufs_cv; 346*0Sstevel@tonic-gate kmutex_t posted_rbufs_lock; 347*0Sstevel@tonic-gate 348*0Sstevel@tonic-gate /* 349*0Sstevel@tonic-gate * RPC reply 350*0Sstevel@tonic-gate */ 351*0Sstevel@tonic-gate uint_t rep_list_size; 352*0Sstevel@tonic-gate struct reply *replylist; 353*0Sstevel@tonic-gate kmutex_t replylist_lock; 354*0Sstevel@tonic-gate 355*0Sstevel@tonic-gate /* 356*0Sstevel@tonic-gate * server only, RDMA_DONE 357*0Sstevel@tonic-gate */ 358*0Sstevel@tonic-gate struct rdma_done_list *rdlist; 359*0Sstevel@tonic-gate kmutex_t rdlist_lock; 360*0Sstevel@tonic-gate 361*0Sstevel@tonic-gate kmutex_t cb_lock; 362*0Sstevel@tonic-gate kcondvar_t cb_conn_cv; 363*0Sstevel@tonic-gate 364*0Sstevel@tonic-gate caddr_t q; /* upstream queue */ 365*0Sstevel@tonic-gate }; 366*0Sstevel@tonic-gate 367*0Sstevel@tonic-gate #define ctoqp(conn) ((rib_qp_t *)((conn)->c_private)) 368*0Sstevel@tonic-gate #define qptoc(rqp) ((CONN *)&((rqp)->rdmaconn)) 369*0Sstevel@tonic-gate 370*0Sstevel@tonic-gate /* 371*0Sstevel@tonic-gate * Timeout for various calls 372*0Sstevel@tonic-gate */ 373*0Sstevel@tonic-gate #define CONN_WAIT_TIME 40 374*0Sstevel@tonic-gate #define SEND_WAIT_TIME 40 /* time for send completion */ 375*0Sstevel@tonic-gate 376*0Sstevel@tonic-gate #define REPLY_WAIT_TIME 40 /* time to get reply from remote QP */ 377*0Sstevel@tonic-gate 378*0Sstevel@tonic-gate #ifdef __cplusplus 379*0Sstevel@tonic-gate } 380*0Sstevel@tonic-gate #endif 381*0Sstevel@tonic-gate 382*0Sstevel@tonic-gate #endif /* !_IB_H */ 383