xref: /onnv-gate/usr/src/uts/common/io/ib/clients/rds/rdsib.c (revision 8659:46f97dfd41cf)
13302Sagiri /*
23302Sagiri  * CDDL HEADER START
33302Sagiri  *
43302Sagiri  * The contents of this file are subject to the terms of the
53302Sagiri  * Common Development and Distribution License (the "License").
63302Sagiri  * You may not use this file except in compliance with the License.
73302Sagiri  *
83302Sagiri  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93302Sagiri  * or http://www.opensolaris.org/os/licensing.
103302Sagiri  * See the License for the specific language governing permissions
113302Sagiri  * and limitations under the License.
123302Sagiri  *
133302Sagiri  * When distributing Covered Code, include this CDDL HEADER in each
143302Sagiri  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153302Sagiri  * If applicable, add the following below this CDDL HEADER, with the
163302Sagiri  * fields enclosed by brackets "[]" replaced with your own identifying
173302Sagiri  * information: Portions Copyright [yyyy] [name of copyright owner]
183302Sagiri  *
193302Sagiri  * CDDL HEADER END
203302Sagiri  */
213302Sagiri /*
22*8659SBill.Taylor@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
233302Sagiri  * Use is subject to license terms.
243302Sagiri  */
253302Sagiri 
263302Sagiri 
273302Sagiri #include <sys/types.h>
283302Sagiri #include <sys/stat.h>
293302Sagiri #include <sys/conf.h>
303302Sagiri #include <sys/ddi.h>
313302Sagiri #include <sys/sunddi.h>
323302Sagiri #include <sys/modctl.h>
333302Sagiri #include <inet/ip.h>
343302Sagiri #include <sys/ib/clients/rds/rdsib_ib.h>
353302Sagiri #include <sys/ib/clients/rds/rdsib_buf.h>
363302Sagiri #include <sys/ib/clients/rds/rdsib_cm.h>
373302Sagiri #include <sys/ib/clients/rds/rdsib_protocol.h>
383302Sagiri #include <sys/ib/clients/rds/rds_transport.h>
393302Sagiri #include <sys/ib/clients/rds/rds_kstat.h>
403302Sagiri 
413302Sagiri /*
423302Sagiri  * Global Configuration Variables
433302Sagiri  * As defined in RDS proposal
443302Sagiri  */
456438Sagiri uint_t		MaxNodes		= RDS_MAX_NODES;
463302Sagiri uint_t		RdsPktSize;
475342Sagiri uint_t		NDataRX;
483302Sagiri uint_t		MaxDataSendBuffers	= RDS_MAX_DATA_SEND_BUFFERS;
493302Sagiri uint_t		MaxDataRecvBuffers	= RDS_MAX_DATA_RECV_BUFFERS;
503302Sagiri uint_t		MaxCtrlSendBuffers	= RDS_MAX_CTRL_SEND_BUFFERS;
513302Sagiri uint_t		MaxCtrlRecvBuffers	= RDS_MAX_CTRL_RECV_BUFFERS;
523302Sagiri uint_t		DataRecvBufferLWM	= RDS_DATA_RECV_BUFFER_LWM;
533302Sagiri uint_t		CtrlRecvBufferLWM	= RDS_CTRL_RECV_BUFFER_LWM;
543302Sagiri uint_t		PendingRxPktsHWM	= RDS_PENDING_RX_PKTS_HWM;
553302Sagiri uint_t		MinRnrRetry		= RDS_IB_RNR_RETRY;
563302Sagiri uint8_t		IBPathRetryCount	= RDS_IB_PATH_RETRY;
573302Sagiri uint8_t		IBPktLifeTime		= RDS_IB_PKT_LT;
583302Sagiri 
593302Sagiri extern int rdsib_open_ib();
603302Sagiri extern void rdsib_close_ib();
613302Sagiri extern void rds_resume_port(in_port_t port);
623302Sagiri extern int rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip,
633302Sagiri     in_port_t sendport, in_port_t recvport, zoneid_t zoneid);
644467Sagiri extern boolean_t rds_if_lookup_by_name(char *devname);
653302Sagiri 
663302Sagiri rds_transport_ops_t rds_ib_transport_ops = {
673302Sagiri 	rdsib_open_ib,
683302Sagiri 	rdsib_close_ib,
693302Sagiri 	rds_sendmsg,
703302Sagiri 	rds_resume_port,
713302Sagiri 	rds_if_lookup_by_name
723302Sagiri };
733302Sagiri 
743302Sagiri /* global */
753302Sagiri rds_state_t	*rdsib_statep = NULL;
764467Sagiri krwlock_t	rds_loopback_portmap_lock;
774467Sagiri uint8_t		rds_loopback_portmap[RDS_PORT_MAP_SIZE];
783302Sagiri ddi_taskq_t	*rds_taskq = NULL;
793302Sagiri dev_info_t	*rdsib_dev_info = NULL;
803302Sagiri uint_t		rds_rx_pkts_pending_hwm;
813302Sagiri 
823302Sagiri #ifdef DEBUG
833302Sagiri uint32_t	rdsdbglvl = RDS_LOG_L3;
843302Sagiri #else
853302Sagiri uint32_t	rdsdbglvl = RDS_LOG_L2;
863302Sagiri #endif
873302Sagiri 
883302Sagiri #define		RDS_NUM_TASKQ_THREADS	4
893302Sagiri 
903302Sagiri static int rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
913302Sagiri static int rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
923302Sagiri static int rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
933302Sagiri     void **result);
943302Sagiri static void rds_read_config_values(dev_info_t *dip);
953302Sagiri 
963302Sagiri /* Driver entry points */
973302Sagiri static struct cb_ops	rdsib_cb_ops = {
983302Sagiri 	nulldev,		/* open */
993302Sagiri 	nulldev,		/* close */
1003302Sagiri 	nodev,			/* strategy */
1013302Sagiri 	nodev,			/* print */
1023302Sagiri 	nodev,			/* dump */
1033302Sagiri 	nodev,			/* read */
1043302Sagiri 	nodev,			/* write */
1053302Sagiri 	nodev,			/* ioctl */
1063302Sagiri 	nodev,			/* devmap */
1073302Sagiri 	nodev,			/* mmap */
1083302Sagiri 	nodev,			/* segmap */
1093302Sagiri 	nochpoll,		/* poll */
1103302Sagiri 	ddi_prop_op,		/* prop_op */
1113302Sagiri 	NULL,			/* stream */
1123302Sagiri 	D_MP,			/* cb_flag */
1133302Sagiri 	CB_REV,			/* rev */
1143302Sagiri 	nodev,			/* int (*cb_aread)() */
1153302Sagiri 	nodev,			/* int (*cb_awrite)() */
1163302Sagiri };
1173302Sagiri 
1183302Sagiri /* Device options */
1193302Sagiri static struct dev_ops rdsib_ops = {
1203302Sagiri 	DEVO_REV,		/* devo_rev, */
1213302Sagiri 	0,			/* refcnt  */
1223302Sagiri 	rdsib_info,		/* info */
1233302Sagiri 	nulldev,		/* identify */
1243302Sagiri 	nulldev,		/* probe */
1253302Sagiri 	rdsib_attach,		/* attach */
1263302Sagiri 	rdsib_detach,		/* detach */
1273302Sagiri 	nodev,			/* reset */
1283302Sagiri 	&rdsib_cb_ops,		/* driver ops - devctl interfaces */
1293302Sagiri 	NULL,			/* bus operations */
1307656SSherry.Moore@Sun.COM 	NULL,			/* power */
131*8659SBill.Taylor@Sun.COM 	ddi_quiesce_not_needed,	/* devo_quiesce */
1323302Sagiri };
1333302Sagiri 
1343302Sagiri /*
1353302Sagiri  * Module linkage information.
1363302Sagiri  */
1377656SSherry.Moore@Sun.COM #define	RDS_DEVDESC	"RDS IB driver"
1383302Sagiri static struct modldrv rdsib_modldrv = {
1393302Sagiri 	&mod_driverops,		/* Driver module */
1403302Sagiri 	RDS_DEVDESC,		/* Driver name and version */
1413302Sagiri 	&rdsib_ops,		/* Driver ops */
1423302Sagiri };
1433302Sagiri 
1443302Sagiri static struct modlinkage rdsib_modlinkage = {
1453302Sagiri 	MODREV_1,
1463302Sagiri 	(void *)&rdsib_modldrv,
1473302Sagiri 	NULL
1483302Sagiri };
1493302Sagiri 
1503302Sagiri /* Called from _init */
1513302Sagiri int
rdsib_init()1523302Sagiri rdsib_init()
1533302Sagiri {
1543302Sagiri 	/* RDS supports only one instance */
1553302Sagiri 	rdsib_statep = kmem_zalloc(sizeof (rds_state_t), KM_SLEEP);
1563302Sagiri 
1573302Sagiri 	rw_init(&rdsib_statep->rds_sessionlock, NULL, RW_DRIVER, NULL);
1583302Sagiri 	rw_init(&rdsib_statep->rds_hca_lock, NULL, RW_DRIVER, NULL);
1593302Sagiri 
1604467Sagiri 	rw_init(&rds_loopback_portmap_lock, NULL, RW_DRIVER, NULL);
1614467Sagiri 	bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE);
1623302Sagiri 
1633302Sagiri 	mutex_init(&rds_dpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
1643302Sagiri 	cv_init(&rds_dpool.pool_cv, NULL, CV_DRIVER, NULL);
1653302Sagiri 	mutex_init(&rds_cpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
1663302Sagiri 	cv_init(&rds_cpool.pool_cv, NULL, CV_DRIVER, NULL);
1673302Sagiri 
1683302Sagiri 	/* Initialize logging */
1693302Sagiri 	rds_logging_initialization();
1703302Sagiri 
1713302Sagiri 	RDS_SET_NPORT(1); /* this should never be 0 */
1723302Sagiri 
1733302Sagiri 	ASSERT(rds_transport_ops == NULL);
1743302Sagiri 	rds_transport_ops = &rds_ib_transport_ops;
1753302Sagiri 
1763302Sagiri 	return (0);
1773302Sagiri }
1783302Sagiri 
1793302Sagiri /* Called from _fini */
1803302Sagiri void
rdsib_fini()1813302Sagiri rdsib_fini()
1823302Sagiri {
1833302Sagiri 	/* Stop logging */
1843302Sagiri 	rds_logging_destroy();
1853302Sagiri 
1863302Sagiri 	cv_destroy(&rds_dpool.pool_cv);
1873302Sagiri 	mutex_destroy(&rds_dpool.pool_lock);
1883302Sagiri 	cv_destroy(&rds_cpool.pool_cv);
1893302Sagiri 	mutex_destroy(&rds_cpool.pool_lock);
1903302Sagiri 
1914467Sagiri 	rw_destroy(&rds_loopback_portmap_lock);
1923302Sagiri 
1933302Sagiri 	rw_destroy(&rdsib_statep->rds_hca_lock);
1943302Sagiri 	rw_destroy(&rdsib_statep->rds_sessionlock);
1953302Sagiri 	kmem_free(rdsib_statep, sizeof (rds_state_t));
1963302Sagiri 
1973302Sagiri 	rds_transport_ops = NULL;
1983302Sagiri }
1993302Sagiri 
2003302Sagiri int
_init(void)2013302Sagiri _init(void)
2023302Sagiri {
2033302Sagiri 	int	ret;
2043302Sagiri 
2053302Sagiri 	if (ibt_hw_is_present() == 0) {
2063302Sagiri 		return (ENODEV);
2073302Sagiri 	}
2083302Sagiri 
2093302Sagiri 	ret = rdsib_init();
2103302Sagiri 	if (ret != 0) {
2113302Sagiri 		return (ret);
2123302Sagiri 	}
2133302Sagiri 
2143302Sagiri 	ret = mod_install(&rdsib_modlinkage);
2153302Sagiri 	if (ret != 0) {
2163302Sagiri 		/*
2173302Sagiri 		 * Could not load module
2183302Sagiri 		 */
2193302Sagiri 		rdsib_fini();
2203302Sagiri 		return (ret);
2213302Sagiri 	}
2223302Sagiri 
2233302Sagiri 	return (0);
2243302Sagiri }
2253302Sagiri 
2263302Sagiri int
_fini()2273302Sagiri _fini()
2283302Sagiri {
2293302Sagiri 	int	ret;
2303302Sagiri 
2313302Sagiri 	/*
2323302Sagiri 	 * Remove module
2333302Sagiri 	 */
2343302Sagiri 	if ((ret = mod_remove(&rdsib_modlinkage)) != 0) {
2353302Sagiri 		return (ret);
2363302Sagiri 	}
2373302Sagiri 
2383302Sagiri 	rdsib_fini();
2393302Sagiri 
2403302Sagiri 	return (0);
2413302Sagiri }
2423302Sagiri 
2433302Sagiri int
_info(struct modinfo * modinfop)2443302Sagiri _info(struct modinfo *modinfop)
2453302Sagiri {
2463302Sagiri 	return (mod_info(&rdsib_modlinkage, modinfop));
2473302Sagiri }
2483302Sagiri 
2493302Sagiri static int
rdsib_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)2503302Sagiri rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
2513302Sagiri {
2523302Sagiri 	int	ret;
2533302Sagiri 
2546438Sagiri 	RDS_DPRINTF2("rdsib_attach", "enter");
2553302Sagiri 
2563302Sagiri 	if (cmd != DDI_ATTACH)
2573302Sagiri 		return (DDI_FAILURE);
2583302Sagiri 
2593302Sagiri 	if (rdsib_dev_info != NULL) {
2603302Sagiri 		RDS_DPRINTF1("rdsib_attach", "Multiple RDS instances are"
2613302Sagiri 		    " not supported (rds_dev_info: 0x%p)", rdsib_dev_info);
2623302Sagiri 		return (DDI_FAILURE);
2633302Sagiri 	}
2643302Sagiri 
2653302Sagiri 	rdsib_dev_info = dip;
2663302Sagiri 	rds_read_config_values(dip);
2673302Sagiri 
2683302Sagiri 	rds_taskq = ddi_taskq_create(dip, "rds_taskq", RDS_NUM_TASKQ_THREADS,
2693302Sagiri 	    TASKQ_DEFAULTPRI, 0);
2703302Sagiri 	if (rds_taskq == NULL) {
2716702Sagiri 		RDS_DPRINTF1("rdsib_attach",
2726702Sagiri 		    "ddi_taskq_create failed for rds_taskq");
2734154Sagiri 		rdsib_dev_info = NULL;
2743302Sagiri 		return (DDI_FAILURE);
2753302Sagiri 	}
2763302Sagiri 
2773302Sagiri 	ret = ddi_create_minor_node(dip, "rdsib", S_IFCHR, 0, DDI_PSEUDO, 0);
2783302Sagiri 	if (ret != DDI_SUCCESS) {
2796702Sagiri 		RDS_DPRINTF1("rdsib_attach",
2806702Sagiri 		    "ddi_create_minor_node failed: %d", ret);
2813302Sagiri 		ddi_taskq_destroy(rds_taskq);
2823302Sagiri 		rds_taskq = NULL;
2834154Sagiri 		rdsib_dev_info = NULL;
2843302Sagiri 		return (DDI_FAILURE);
2853302Sagiri 	}
2863302Sagiri 
2873302Sagiri 	/* Max number of receive buffers on the system */
2885342Sagiri 	NDataRX = (MaxNodes - 1) * MaxDataRecvBuffers * 2;
2893302Sagiri 
2903302Sagiri 	/*
2913302Sagiri 	 * High water mark for the receive buffers in the system. If the
2923302Sagiri 	 * number of buffers used crosses this mark then all sockets in
2933302Sagiri 	 * would be stalled. The port quota for the sockets is set based
2943302Sagiri 	 * on this limit.
2953302Sagiri 	 */
2965342Sagiri 	rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100;
2973302Sagiri 
2986438Sagiri 	ret = rdsib_initialize_ib();
2996438Sagiri 	if (ret != 0) {
3006702Sagiri 		RDS_DPRINTF1("rdsib_attach",
3016702Sagiri 		    "rdsib_initialize_ib failed: %d", ret);
3026438Sagiri 		ddi_taskq_destroy(rds_taskq);
3036438Sagiri 		rds_taskq = NULL;
3046438Sagiri 		rdsib_dev_info = NULL;
3056438Sagiri 		return (DDI_FAILURE);
3066438Sagiri 	}
3076438Sagiri 
3086438Sagiri 	RDS_DPRINTF2("rdsib_attach", "return");
3093302Sagiri 
3103302Sagiri 	return (DDI_SUCCESS);
3113302Sagiri }
3123302Sagiri 
3133302Sagiri static int
rdsib_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)3143302Sagiri rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
3153302Sagiri {
3166438Sagiri 	RDS_DPRINTF2("rdsib_detach", "enter");
3173302Sagiri 
3183302Sagiri 	if (cmd != DDI_DETACH)
3193302Sagiri 		return (DDI_FAILURE);
3203302Sagiri 
3216438Sagiri 	rdsib_deinitialize_ib();
3226438Sagiri 
3233302Sagiri 	ddi_remove_minor_node(dip, "rdsib");
3243302Sagiri 
3253302Sagiri 	/* destroy taskq */
3263302Sagiri 	if (rds_taskq != NULL) {
3273302Sagiri 		ddi_taskq_destroy(rds_taskq);
3283302Sagiri 		rds_taskq = NULL;
3293302Sagiri 	}
3303302Sagiri 
3314154Sagiri 	rdsib_dev_info = NULL;
3324154Sagiri 
3336438Sagiri 	RDS_DPRINTF2("rdsib_detach", "return");
3343302Sagiri 
3353302Sagiri 	return (DDI_SUCCESS);
3363302Sagiri }
3373302Sagiri 
3383302Sagiri /* ARGSUSED */
3393302Sagiri static int
rdsib_info(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)3403302Sagiri rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
3413302Sagiri {
3423302Sagiri 	int ret = DDI_FAILURE;
3433302Sagiri 
3443302Sagiri 	switch (cmd) {
3453302Sagiri 	case DDI_INFO_DEVT2DEVINFO:
3463302Sagiri 		if (rdsib_dev_info != NULL) {
3473302Sagiri 			*result = (void *)rdsib_dev_info;
3483302Sagiri 			ret = DDI_SUCCESS;
3493302Sagiri 		}
3503302Sagiri 		break;
3513302Sagiri 
3523302Sagiri 	case DDI_INFO_DEVT2INSTANCE:
3533302Sagiri 		*result = NULL;
3543302Sagiri 		ret = DDI_SUCCESS;
3553302Sagiri 		break;
3563302Sagiri 
3573302Sagiri 	default:
3583302Sagiri 		break;
3593302Sagiri 	}
3603302Sagiri 
3613302Sagiri 	return (ret);
3623302Sagiri }
3633302Sagiri 
3643302Sagiri static void
rds_read_config_values(dev_info_t * dip)3653302Sagiri rds_read_config_values(dev_info_t *dip)
3663302Sagiri {
3675342Sagiri 	MaxNodes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
3685342Sagiri 	    "MaxNodes", RDS_MAX_NODES);
3695342Sagiri 
3703302Sagiri 	UserBufferSize = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
3713302Sagiri 	    DDI_PROP_DONTPASS, "UserBufferSize", RDS_USER_DATA_BUFFER_SIZE);
3723302Sagiri 
3733302Sagiri 	MaxDataSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
3743302Sagiri 	    DDI_PROP_DONTPASS, "MaxDataSendBuffers", RDS_MAX_DATA_SEND_BUFFERS);
3753302Sagiri 
3763302Sagiri 	MaxDataRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
3773302Sagiri 	    DDI_PROP_DONTPASS, "MaxDataRecvBuffers", RDS_MAX_DATA_RECV_BUFFERS);
3783302Sagiri 
3793302Sagiri 	MaxCtrlSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
3803302Sagiri 	    DDI_PROP_DONTPASS, "MaxCtrlSendBuffers", RDS_MAX_CTRL_SEND_BUFFERS);
3813302Sagiri 
3823302Sagiri 	MaxCtrlRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
3833302Sagiri 	    DDI_PROP_DONTPASS, "MaxCtrlRecvBuffers", RDS_MAX_CTRL_RECV_BUFFERS);
3843302Sagiri 
3853302Sagiri 	DataRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
3863302Sagiri 	    DDI_PROP_DONTPASS, "DataRecvBufferLWM", RDS_DATA_RECV_BUFFER_LWM);
3873302Sagiri 
3883302Sagiri 	CtrlRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
3893302Sagiri 	    DDI_PROP_DONTPASS, "CtrlRecvBufferLWM", RDS_CTRL_RECV_BUFFER_LWM);
3903302Sagiri 
3913302Sagiri 	PendingRxPktsHWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
3923302Sagiri 	    DDI_PROP_DONTPASS, "PendingRxPktsHWM", RDS_PENDING_RX_PKTS_HWM);
3933302Sagiri 
3943302Sagiri 	MinRnrRetry = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
3953302Sagiri 	    "MinRnrRetry", RDS_IB_RNR_RETRY);
3963302Sagiri 
3973302Sagiri 	IBPathRetryCount = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
3983302Sagiri 	    DDI_PROP_DONTPASS, "IBPathRetryCount", RDS_IB_PATH_RETRY);
3993302Sagiri 
4003302Sagiri 	IBPktLifeTime = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
4013302Sagiri 	    DDI_PROP_DONTPASS, "IBPktLifeTime", RDS_IB_PKT_LT);
4023302Sagiri 
4033302Sagiri 	rdsdbglvl = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
4043302Sagiri 	    "rdsdbglvl", RDS_LOG_L2);
4053302Sagiri 
4065342Sagiri 	if (MaxNodes < 2) {
4075342Sagiri 		cmn_err(CE_WARN, "MaxNodes is set to less than 2");
4085342Sagiri 		MaxNodes = 2;
4093302Sagiri 	}
4103302Sagiri }
411