10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 57387SRobert.Gordon@Sun.COM * Common Development and Distribution License (the "License"). 67387SRobert.Gordon@Sun.COM * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*8695SRajkumar.Sivaprakasam@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 257387SRobert.Gordon@Sun.COM /* 267387SRobert.Gordon@Sun.COM * Copyright (c) 2007, The Ohio State University. All rights reserved. 277387SRobert.Gordon@Sun.COM * 287387SRobert.Gordon@Sun.COM * Portions of this source code is developed by the team members of 297387SRobert.Gordon@Sun.COM * The Ohio State University's Network-Based Computing Laboratory (NBCL), 307387SRobert.Gordon@Sun.COM * headed by Professor Dhabaleswar K. (DK) Panda. 317387SRobert.Gordon@Sun.COM * 327387SRobert.Gordon@Sun.COM * Acknowledgements to contributions from developors: 337387SRobert.Gordon@Sun.COM * Ranjit Noronha: noronha@cse.ohio-state.edu 347387SRobert.Gordon@Sun.COM * Lei Chai : chail@cse.ohio-state.edu 357387SRobert.Gordon@Sun.COM * Weikuan Yu : yuw@cse.ohio-state.edu 367387SRobert.Gordon@Sun.COM * 377387SRobert.Gordon@Sun.COM */ 387387SRobert.Gordon@Sun.COM 390Sstevel@tonic-gate 400Sstevel@tonic-gate #ifndef _IB_H 410Sstevel@tonic-gate #define _IB_H 420Sstevel@tonic-gate 430Sstevel@tonic-gate /* 440Sstevel@tonic-gate * ib.h, rpcib plugin interface. 450Sstevel@tonic-gate */ 460Sstevel@tonic-gate 470Sstevel@tonic-gate #include <sys/types.h> 480Sstevel@tonic-gate #include <sys/ddi.h> 490Sstevel@tonic-gate #include <sys/sunddi.h> 500Sstevel@tonic-gate #include <sys/conf.h> 510Sstevel@tonic-gate #include <sys/stat.h> 520Sstevel@tonic-gate #include <rpc/rpc.h> 530Sstevel@tonic-gate #include <rpc/rpc_rdma.h> 540Sstevel@tonic-gate #include <sys/ib/ibtl/ibti.h> 557387SRobert.Gordon@Sun.COM #include <sys/avl.h> 560Sstevel@tonic-gate 570Sstevel@tonic-gate #ifdef __cplusplus 580Sstevel@tonic-gate extern "C" { 590Sstevel@tonic-gate #endif 600Sstevel@tonic-gate 617387SRobert.Gordon@Sun.COM #define MAX_BUFS 1024 /* max no. of buffers per pool */ 627387SRobert.Gordon@Sun.COM 630Sstevel@tonic-gate #define DEF_CQ_SIZE 4096 - 1 /* default CQ size */ 640Sstevel@tonic-gate /* 650Sstevel@tonic-gate * Tavor returns the next higher power of 2 660Sstevel@tonic-gate * CQ entries than the requested size. 670Sstevel@tonic-gate * For instance, if you request (2^12 - 1) 680Sstevel@tonic-gate * CQ entries, Tavor returns 2^12 entries. 690Sstevel@tonic-gate * 4K CQ entries suffice. Hence, 4096 - 1. 700Sstevel@tonic-gate */ 710Sstevel@tonic-gate #define DEF_SQ_SIZE 128 /* default SendQ size */ 720Sstevel@tonic-gate #define DEF_RQ_SIZE 256 /* default RecvQ size */ 730Sstevel@tonic-gate #define DSEG_MAX 2 740Sstevel@tonic-gate #define RQ_DSEG_MAX 1 /* default RQ data seg */ 750Sstevel@tonic-gate #define IBSRM_HB 0x8000 /* high order bit of pkey */ 760Sstevel@tonic-gate 770Sstevel@tonic-gate /* max no. of refresh attempts on IBT_CM_CONN_STALE error */ 780Sstevel@tonic-gate #define REFRESH_ATTEMPTS 3 790Sstevel@tonic-gate 800Sstevel@tonic-gate typedef struct rib_hca_s rib_hca_t; 810Sstevel@tonic-gate typedef struct rib_qp_s rib_qp_t; 820Sstevel@tonic-gate typedef struct rib_cq_s rib_cq_t; 830Sstevel@tonic-gate 840Sstevel@tonic-gate /* 850Sstevel@tonic-gate * Notification for RDMA_DONE is based on xid 860Sstevel@tonic-gate */ 870Sstevel@tonic-gate struct rdma_done_list { 880Sstevel@tonic-gate uint32_t xid; /* XID waiting for RDMA_DONE */ 890Sstevel@tonic-gate kcondvar_t rdma_done_cv; /* cv for RDMA_DONE */ 900Sstevel@tonic-gate struct rdma_done_list *next; 910Sstevel@tonic-gate struct rdma_done_list *prev; 920Sstevel@tonic-gate }; 930Sstevel@tonic-gate 940Sstevel@tonic-gate /* 950Sstevel@tonic-gate * State of the plugin. 960Sstevel@tonic-gate * ACCEPT = accepting new connections and requests 970Sstevel@tonic-gate * NO_ACCEPT = not accepting new connection and requests 980Sstevel@tonic-gate */ 990Sstevel@tonic-gate #define ACCEPT 1 1000Sstevel@tonic-gate #define NO_ACCEPT 2 1010Sstevel@tonic-gate 1020Sstevel@tonic-gate /* 1030Sstevel@tonic-gate * Send Wait states 1040Sstevel@tonic-gate */ 1050Sstevel@tonic-gate #define SEND_WAIT -1 1060Sstevel@tonic-gate 1070Sstevel@tonic-gate /* 1080Sstevel@tonic-gate * Reply states 1090Sstevel@tonic-gate */ 1100Sstevel@tonic-gate #define REPLY_WAIT -1 1110Sstevel@tonic-gate 1120Sstevel@tonic-gate typedef void * rib_pvoid; 1130Sstevel@tonic-gate typedef rib_pvoid RIB_SYNCMEM_HANDLE; 1140Sstevel@tonic-gate 1150Sstevel@tonic-gate /* 1160Sstevel@tonic-gate * IB buffer pool management structure 1170Sstevel@tonic-gate */ 1180Sstevel@tonic-gate 1190Sstevel@tonic-gate /* 1200Sstevel@tonic-gate * Buffer pool info 1210Sstevel@tonic-gate */ 1220Sstevel@tonic-gate typedef struct { 1230Sstevel@tonic-gate kmutex_t buflock; /* lock for this structure */ 1240Sstevel@tonic-gate caddr_t buf; /* pool address */ 1250Sstevel@tonic-gate uint32_t bufhandle; /* rkey for this pool */ 1260Sstevel@tonic-gate ulong_t bufsize; /* size of pool */ 1270Sstevel@tonic-gate int rsize; /* size of each element */ 1280Sstevel@tonic-gate int numelems; /* no. of elements allocated */ 1290Sstevel@tonic-gate int buffree; /* no. of free elements */ 1300Sstevel@tonic-gate void *buflist[1]; /* free elements in pool */ 1310Sstevel@tonic-gate } bufpool_t; 1320Sstevel@tonic-gate 1330Sstevel@tonic-gate typedef struct { 1340Sstevel@tonic-gate bufpool_t *bpool; 1350Sstevel@tonic-gate ibt_mr_hdl_t *mr_hdl; 1360Sstevel@tonic-gate ibt_mr_desc_t *mr_desc; /* vaddr, lkey, rkey */ 1370Sstevel@tonic-gate } rib_bufpool_t; 1380Sstevel@tonic-gate 1390Sstevel@tonic-gate /* 1400Sstevel@tonic-gate * ATS relsted defines and structures. 1410Sstevel@tonic-gate */ 1420Sstevel@tonic-gate #define ATS_AR_DATA_LEN 16 1430Sstevel@tonic-gate #define IBD_NAME "ibd" 1440Sstevel@tonic-gate #define N_IBD_INSTANCES 4 1450Sstevel@tonic-gate 1460Sstevel@tonic-gate 1470Sstevel@tonic-gate /* 1480Sstevel@tonic-gate * Service types supported by RPCIB 1490Sstevel@tonic-gate * For now only NFS is supported. 1500Sstevel@tonic-gate */ 1510Sstevel@tonic-gate #define NFS 1 1520Sstevel@tonic-gate #define NLM 2 1530Sstevel@tonic-gate 1540Sstevel@tonic-gate /* 1550Sstevel@tonic-gate * Tracks consumer state (client or server). 1560Sstevel@tonic-gate */ 1570Sstevel@tonic-gate typedef enum { 1580Sstevel@tonic-gate RIB_SERVER, 1590Sstevel@tonic-gate RIB_CLIENT 1600Sstevel@tonic-gate } rib_mode_t; 1610Sstevel@tonic-gate 1620Sstevel@tonic-gate /* 1630Sstevel@tonic-gate * CQ structure 1640Sstevel@tonic-gate */ 1650Sstevel@tonic-gate struct rib_cq_s { 1660Sstevel@tonic-gate rib_hca_t *rib_hca; 1670Sstevel@tonic-gate ibt_cq_hdl_t rib_cq_hdl; 1680Sstevel@tonic-gate }; 1690Sstevel@tonic-gate 1700Sstevel@tonic-gate /* 1710Sstevel@tonic-gate * RPCIB plugin state 1720Sstevel@tonic-gate */ 1730Sstevel@tonic-gate typedef struct rpcib_state { 1740Sstevel@tonic-gate ibt_clnt_hdl_t ibt_clnt_hdl; 1750Sstevel@tonic-gate uint32_t hca_count; 1760Sstevel@tonic-gate uint32_t nhca_inited; 1770Sstevel@tonic-gate ib_guid_t *hca_guids; 1780Sstevel@tonic-gate rib_hca_t *hcas; 1790Sstevel@tonic-gate int refcount; 1800Sstevel@tonic-gate kmutex_t open_hca_lock; 1810Sstevel@tonic-gate rib_hca_t *hca; /* the hca being used */ 1820Sstevel@tonic-gate queue_t *q; /* up queue for a serv_type */ 1830Sstevel@tonic-gate uint32_t service_type; /* NFS, NLM, etc */ 1840Sstevel@tonic-gate void *private; 1850Sstevel@tonic-gate } rpcib_state_t; 1860Sstevel@tonic-gate 1870Sstevel@tonic-gate /* 1880Sstevel@tonic-gate * Each registered service's data structure. 1890Sstevel@tonic-gate * Each HCA has a list of these structures, which are the registered 1900Sstevel@tonic-gate * services on this HCA. 1910Sstevel@tonic-gate */ 1920Sstevel@tonic-gate typedef struct rib_service rib_service_t; 1930Sstevel@tonic-gate struct rib_service { 1940Sstevel@tonic-gate uint32_t srv_type; /* i.e, NFS, NLM, v4CBD */ 1950Sstevel@tonic-gate ibt_srv_hdl_t srv_hdl; /* from ibt_register call */ 1960Sstevel@tonic-gate rib_service_t *srv_next; 1970Sstevel@tonic-gate }; 1980Sstevel@tonic-gate 1990Sstevel@tonic-gate /* 2000Sstevel@tonic-gate * Connection lists 2010Sstevel@tonic-gate */ 2020Sstevel@tonic-gate typedef struct { 2030Sstevel@tonic-gate krwlock_t conn_lock; /* list lock */ 2040Sstevel@tonic-gate CONN *conn_hd; /* list head */ 2050Sstevel@tonic-gate } rib_conn_list_t; 2060Sstevel@tonic-gate 2070Sstevel@tonic-gate enum hca_state { 208*8695SRajkumar.Sivaprakasam@Sun.COM HCA_DETACHED, /* hca in detached state */ 2090Sstevel@tonic-gate HCA_INITED, /* hca in up and running state */ 2100Sstevel@tonic-gate }; 2110Sstevel@tonic-gate 2120Sstevel@tonic-gate /* 2130Sstevel@tonic-gate * RPCIB per HCA structure 2140Sstevel@tonic-gate */ 2150Sstevel@tonic-gate struct rib_hca_s { 2160Sstevel@tonic-gate ibt_clnt_hdl_t ibt_clnt_hdl; 2170Sstevel@tonic-gate 2180Sstevel@tonic-gate /* 2190Sstevel@tonic-gate * per HCA. 2200Sstevel@tonic-gate */ 2210Sstevel@tonic-gate ibt_hca_hdl_t hca_hdl; /* HCA handle */ 2220Sstevel@tonic-gate ibt_hca_attr_t hca_attrs; /* HCA attributes */ 2230Sstevel@tonic-gate ibt_pd_hdl_t pd_hdl; 2240Sstevel@tonic-gate ib_guid_t hca_guid; 2250Sstevel@tonic-gate uint32_t hca_nports; 2260Sstevel@tonic-gate ibt_hca_portinfo_t *hca_ports; 2270Sstevel@tonic-gate size_t hca_pinfosz; 2280Sstevel@tonic-gate enum hca_state state; /* state of HCA */ 2290Sstevel@tonic-gate krwlock_t state_lock; /* protects state field */ 2300Sstevel@tonic-gate bool_t inuse; /* indicates HCA usage */ 2310Sstevel@tonic-gate kmutex_t inuse_lock; /* protects inuse field */ 2320Sstevel@tonic-gate /* 2330Sstevel@tonic-gate * List of services registered on all ports available 2340Sstevel@tonic-gate * on this HCA. Only one consumer of KRPC can register 2350Sstevel@tonic-gate * its services at one time or tear them down at one 2360Sstevel@tonic-gate * time. 2370Sstevel@tonic-gate */ 2380Sstevel@tonic-gate rib_service_t *service_list; 2390Sstevel@tonic-gate krwlock_t service_list_lock; 2400Sstevel@tonic-gate 2410Sstevel@tonic-gate 2420Sstevel@tonic-gate rib_conn_list_t cl_conn_list; /* client conn list */ 2430Sstevel@tonic-gate rib_conn_list_t srv_conn_list; /* server conn list */ 2440Sstevel@tonic-gate 2450Sstevel@tonic-gate rib_cq_t *clnt_scq; 2460Sstevel@tonic-gate rib_cq_t *clnt_rcq; 2470Sstevel@tonic-gate rib_cq_t *svc_scq; 2480Sstevel@tonic-gate rib_cq_t *svc_rcq; 2490Sstevel@tonic-gate kmutex_t cb_lock; 2500Sstevel@tonic-gate kcondvar_t cb_cv; 2510Sstevel@tonic-gate 2520Sstevel@tonic-gate rib_bufpool_t *recv_pool; /* recv buf pool */ 2530Sstevel@tonic-gate rib_bufpool_t *send_pool; /* send buf pool */ 2540Sstevel@tonic-gate 2550Sstevel@tonic-gate void *iblock; /* interrupt cookie */ 2567387SRobert.Gordon@Sun.COM 2577387SRobert.Gordon@Sun.COM kmem_cache_t *server_side_cache; /* long reply pool */ 2587387SRobert.Gordon@Sun.COM avl_tree_t avl_tree; 2597387SRobert.Gordon@Sun.COM kmutex_t avl_lock; 2607387SRobert.Gordon@Sun.COM krwlock_t avl_rw_lock; 2617387SRobert.Gordon@Sun.COM volatile bool_t avl_init; 2627387SRobert.Gordon@Sun.COM kmutex_t cache_allocation; 2637387SRobert.Gordon@Sun.COM ddi_taskq_t *reg_cache_clean_up; 2647387SRobert.Gordon@Sun.COM ib_svc_id_t srv_id; 2657387SRobert.Gordon@Sun.COM ibt_srv_hdl_t srv_hdl; 2667387SRobert.Gordon@Sun.COM uint_t reg_state; 2677387SRobert.Gordon@Sun.COM 2680Sstevel@tonic-gate }; 2690Sstevel@tonic-gate 2700Sstevel@tonic-gate 2710Sstevel@tonic-gate /* 2720Sstevel@tonic-gate * Structure on wait state of a post send 2730Sstevel@tonic-gate */ 2740Sstevel@tonic-gate struct send_wid { 2750Sstevel@tonic-gate uint32_t xid; 2760Sstevel@tonic-gate int cv_sig; 2770Sstevel@tonic-gate kmutex_t sendwait_lock; 2780Sstevel@tonic-gate kcondvar_t wait_cv; 2790Sstevel@tonic-gate uint_t status; 2800Sstevel@tonic-gate rib_qp_t *qp; 2810Sstevel@tonic-gate int nsbufs; /* # of send buffers posted */ 2820Sstevel@tonic-gate uint64_t sbufaddr[DSEG_MAX]; /* posted send buffers */ 2837387SRobert.Gordon@Sun.COM caddr_t c; 2847387SRobert.Gordon@Sun.COM caddr_t c1; 2857387SRobert.Gordon@Sun.COM int l1; 2867387SRobert.Gordon@Sun.COM caddr_t c2; 2877387SRobert.Gordon@Sun.COM int l2; 2887387SRobert.Gordon@Sun.COM int wl, rl; 2890Sstevel@tonic-gate }; 2900Sstevel@tonic-gate 2910Sstevel@tonic-gate /* 2920Sstevel@tonic-gate * Structure on reply descriptor for recv queue. 2930Sstevel@tonic-gate * Different from the above posting of a descriptor. 2940Sstevel@tonic-gate */ 2950Sstevel@tonic-gate struct reply { 2960Sstevel@tonic-gate uint32_t xid; 2970Sstevel@tonic-gate uint_t status; 2980Sstevel@tonic-gate uint64_t vaddr_cq; /* buf addr from CQ */ 2990Sstevel@tonic-gate uint_t bytes_xfer; 3000Sstevel@tonic-gate kcondvar_t wait_cv; 3010Sstevel@tonic-gate struct reply *next; 3020Sstevel@tonic-gate struct reply *prev; 3030Sstevel@tonic-gate }; 3040Sstevel@tonic-gate 3050Sstevel@tonic-gate struct svc_recv { 3060Sstevel@tonic-gate rib_qp_t *qp; 3070Sstevel@tonic-gate uint64_t vaddr; 3080Sstevel@tonic-gate uint_t bytes_xfer; 3090Sstevel@tonic-gate }; 3100Sstevel@tonic-gate 3110Sstevel@tonic-gate struct recv_wid { 3120Sstevel@tonic-gate uint32_t xid; 3130Sstevel@tonic-gate rib_qp_t *qp; 3140Sstevel@tonic-gate uint64_t addr; /* posted buf addr */ 3150Sstevel@tonic-gate }; 3160Sstevel@tonic-gate 3170Sstevel@tonic-gate /* 3180Sstevel@tonic-gate * Per QP data structure 3190Sstevel@tonic-gate */ 3200Sstevel@tonic-gate struct rib_qp_s { 3210Sstevel@tonic-gate rib_hca_t *hca; 3220Sstevel@tonic-gate rib_mode_t mode; /* RIB_SERVER or RIB_CLIENT */ 3230Sstevel@tonic-gate CONN rdmaconn; 3240Sstevel@tonic-gate ibt_channel_hdl_t qp_hdl; 3250Sstevel@tonic-gate uint_t port_num; 3260Sstevel@tonic-gate ib_qpn_t qpn; 3270Sstevel@tonic-gate int chan_flags; 3280Sstevel@tonic-gate clock_t timeout; 3290Sstevel@tonic-gate ibt_rc_chan_query_attr_t qp_q_attrs; 3300Sstevel@tonic-gate rib_cq_t *send_cq; /* send CQ */ 3310Sstevel@tonic-gate rib_cq_t *recv_cq; /* recv CQ */ 3320Sstevel@tonic-gate 3330Sstevel@tonic-gate /* 3340Sstevel@tonic-gate * Number of pre-posted rbufs 3350Sstevel@tonic-gate */ 3360Sstevel@tonic-gate uint_t n_posted_rbufs; 3370Sstevel@tonic-gate kcondvar_t posted_rbufs_cv; 3380Sstevel@tonic-gate kmutex_t posted_rbufs_lock; 3390Sstevel@tonic-gate 3400Sstevel@tonic-gate /* 3410Sstevel@tonic-gate * RPC reply 3420Sstevel@tonic-gate */ 3430Sstevel@tonic-gate uint_t rep_list_size; 3440Sstevel@tonic-gate struct reply *replylist; 3450Sstevel@tonic-gate kmutex_t replylist_lock; 3460Sstevel@tonic-gate 3470Sstevel@tonic-gate /* 3480Sstevel@tonic-gate * server only, RDMA_DONE 3490Sstevel@tonic-gate */ 3500Sstevel@tonic-gate struct rdma_done_list *rdlist; 3510Sstevel@tonic-gate kmutex_t rdlist_lock; 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate kmutex_t cb_lock; 3540Sstevel@tonic-gate kcondvar_t cb_conn_cv; 3550Sstevel@tonic-gate 3560Sstevel@tonic-gate caddr_t q; /* upstream queue */ 3577387SRobert.Gordon@Sun.COM struct send_wid wd; 3580Sstevel@tonic-gate }; 3590Sstevel@tonic-gate 3600Sstevel@tonic-gate #define ctoqp(conn) ((rib_qp_t *)((conn)->c_private)) 3610Sstevel@tonic-gate #define qptoc(rqp) ((CONN *)&((rqp)->rdmaconn)) 3620Sstevel@tonic-gate 3630Sstevel@tonic-gate /* 3640Sstevel@tonic-gate * Timeout for various calls 3650Sstevel@tonic-gate */ 3660Sstevel@tonic-gate #define CONN_WAIT_TIME 40 3670Sstevel@tonic-gate #define SEND_WAIT_TIME 40 /* time for send completion */ 3680Sstevel@tonic-gate 3690Sstevel@tonic-gate #define REPLY_WAIT_TIME 40 /* time to get reply from remote QP */ 3700Sstevel@tonic-gate 3710Sstevel@tonic-gate #ifdef __cplusplus 3720Sstevel@tonic-gate } 3730Sstevel@tonic-gate #endif 3740Sstevel@tonic-gate 3750Sstevel@tonic-gate #endif /* !_IB_H */ 376