xref: /onnv-gate/usr/src/uts/sun4v/io/vsw_txdring.c (revision 12011:2377022c7a2d)
1*12011SSriharsha.Basavapatna@Sun.COM /*
2*12011SSriharsha.Basavapatna@Sun.COM  * CDDL HEADER START
3*12011SSriharsha.Basavapatna@Sun.COM  *
4*12011SSriharsha.Basavapatna@Sun.COM  * The contents of this file are subject to the terms of the
5*12011SSriharsha.Basavapatna@Sun.COM  * Common Development and Distribution License (the "License").
6*12011SSriharsha.Basavapatna@Sun.COM  * You may not use this file except in compliance with the License.
7*12011SSriharsha.Basavapatna@Sun.COM  *
8*12011SSriharsha.Basavapatna@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*12011SSriharsha.Basavapatna@Sun.COM  * or http://www.opensolaris.org/os/licensing.
10*12011SSriharsha.Basavapatna@Sun.COM  * See the License for the specific language governing permissions
11*12011SSriharsha.Basavapatna@Sun.COM  * and limitations under the License.
12*12011SSriharsha.Basavapatna@Sun.COM  *
13*12011SSriharsha.Basavapatna@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
14*12011SSriharsha.Basavapatna@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*12011SSriharsha.Basavapatna@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
16*12011SSriharsha.Basavapatna@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
17*12011SSriharsha.Basavapatna@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
18*12011SSriharsha.Basavapatna@Sun.COM  *
19*12011SSriharsha.Basavapatna@Sun.COM  * CDDL HEADER END
20*12011SSriharsha.Basavapatna@Sun.COM  */
21*12011SSriharsha.Basavapatna@Sun.COM 
22*12011SSriharsha.Basavapatna@Sun.COM /*
23*12011SSriharsha.Basavapatna@Sun.COM  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24*12011SSriharsha.Basavapatna@Sun.COM  * Use is subject to license terms.
25*12011SSriharsha.Basavapatna@Sun.COM  */
26*12011SSriharsha.Basavapatna@Sun.COM #include <sys/types.h>
27*12011SSriharsha.Basavapatna@Sun.COM #include <sys/errno.h>
28*12011SSriharsha.Basavapatna@Sun.COM #include <sys/sysmacros.h>
29*12011SSriharsha.Basavapatna@Sun.COM #include <sys/param.h>
30*12011SSriharsha.Basavapatna@Sun.COM #include <sys/machsystm.h>
31*12011SSriharsha.Basavapatna@Sun.COM #include <sys/stream.h>
32*12011SSriharsha.Basavapatna@Sun.COM #include <sys/strsubr.h>
33*12011SSriharsha.Basavapatna@Sun.COM #include <sys/kmem.h>
34*12011SSriharsha.Basavapatna@Sun.COM #include <sys/strsun.h>
35*12011SSriharsha.Basavapatna@Sun.COM #include <sys/callb.h>
36*12011SSriharsha.Basavapatna@Sun.COM #include <sys/sdt.h>
37*12011SSriharsha.Basavapatna@Sun.COM #include <sys/mach_descrip.h>
38*12011SSriharsha.Basavapatna@Sun.COM #include <sys/mdeg.h>
39*12011SSriharsha.Basavapatna@Sun.COM #include <net/if.h>
40*12011SSriharsha.Basavapatna@Sun.COM #include <sys/vsw.h>
41*12011SSriharsha.Basavapatna@Sun.COM #include <sys/vio_mailbox.h>
42*12011SSriharsha.Basavapatna@Sun.COM #include <sys/vio_common.h>
43*12011SSriharsha.Basavapatna@Sun.COM #include <sys/vnet_common.h>
44*12011SSriharsha.Basavapatna@Sun.COM #include <sys/vnet_mailbox.h>
45*12011SSriharsha.Basavapatna@Sun.COM #include <sys/vio_util.h>
46*12011SSriharsha.Basavapatna@Sun.COM 
47*12011SSriharsha.Basavapatna@Sun.COM /*
48*12011SSriharsha.Basavapatna@Sun.COM  * This file contains the implementation of TxDring data transfer mode of VIO
49*12011SSriharsha.Basavapatna@Sun.COM  * Protocol in vsw. The functions in this file are invoked from vsw_ldc.c
50*12011SSriharsha.Basavapatna@Sun.COM  * after TxDring mode is negotiated with the peer during attribute phase of
51*12011SSriharsha.Basavapatna@Sun.COM  * handshake. This file contains functions that setup the transmit and receive
52*12011SSriharsha.Basavapatna@Sun.COM  * descriptor rings, and associated resources in TxDring mode. It also contains
53*12011SSriharsha.Basavapatna@Sun.COM  * the transmit and receive data processing functions that are invoked in
54*12011SSriharsha.Basavapatna@Sun.COM  * TxDring mode.
55*12011SSriharsha.Basavapatna@Sun.COM  */
56*12011SSriharsha.Basavapatna@Sun.COM 
57*12011SSriharsha.Basavapatna@Sun.COM /* Functions exported to vsw_ldc.c */
58*12011SSriharsha.Basavapatna@Sun.COM vio_dring_reg_msg_t *vsw_create_tx_dring_info(vsw_ldc_t *);
59*12011SSriharsha.Basavapatna@Sun.COM int vsw_setup_tx_dring(vsw_ldc_t *ldcp, dring_info_t *dp);
60*12011SSriharsha.Basavapatna@Sun.COM void vsw_destroy_tx_dring(vsw_ldc_t *ldcp);
61*12011SSriharsha.Basavapatna@Sun.COM dring_info_t *vsw_map_rx_dring(vsw_ldc_t *ldcp, void *pkt);
62*12011SSriharsha.Basavapatna@Sun.COM void vsw_unmap_rx_dring(vsw_ldc_t *ldcp);
63*12011SSriharsha.Basavapatna@Sun.COM int vsw_dringsend(vsw_ldc_t *, mblk_t *);
64*12011SSriharsha.Basavapatna@Sun.COM void vsw_ldc_msg_worker(void *arg);
65*12011SSriharsha.Basavapatna@Sun.COM void vsw_stop_msg_thread(vsw_ldc_t *ldcp);
66*12011SSriharsha.Basavapatna@Sun.COM void vsw_process_dringdata(void *, void *);
67*12011SSriharsha.Basavapatna@Sun.COM int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t);
68*12011SSriharsha.Basavapatna@Sun.COM int vsw_reclaim_dring(dring_info_t *dp, int start);
69*12011SSriharsha.Basavapatna@Sun.COM int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, int *);
70*12011SSriharsha.Basavapatna@Sun.COM 
71*12011SSriharsha.Basavapatna@Sun.COM /* Internal functions */
72*12011SSriharsha.Basavapatna@Sun.COM static int vsw_init_multipools(vsw_ldc_t *ldcp, vsw_t *vswp);
73*12011SSriharsha.Basavapatna@Sun.COM static dring_info_t *vsw_create_tx_dring(vsw_ldc_t *);
74*12011SSriharsha.Basavapatna@Sun.COM 
75*12011SSriharsha.Basavapatna@Sun.COM /* Functions imported from vsw_ldc.c */
76*12011SSriharsha.Basavapatna@Sun.COM extern void vsw_process_pkt(void *);
77*12011SSriharsha.Basavapatna@Sun.COM extern void vsw_destroy_rxpools(void *);
78*12011SSriharsha.Basavapatna@Sun.COM extern dring_info_t *vsw_map_dring_cmn(vsw_ldc_t *ldcp,
79*12011SSriharsha.Basavapatna@Sun.COM     vio_dring_reg_msg_t *dring_pkt);
80*12011SSriharsha.Basavapatna@Sun.COM extern void vsw_process_conn_evt(vsw_ldc_t *, uint16_t);
81*12011SSriharsha.Basavapatna@Sun.COM extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
82*12011SSriharsha.Basavapatna@Sun.COM 
83*12011SSriharsha.Basavapatna@Sun.COM /* Tunables */
84*12011SSriharsha.Basavapatna@Sun.COM extern int vsw_wretries;
85*12011SSriharsha.Basavapatna@Sun.COM extern int vsw_recv_delay;
86*12011SSriharsha.Basavapatna@Sun.COM extern int vsw_recv_retries;
87*12011SSriharsha.Basavapatna@Sun.COM extern boolean_t vsw_jumbo_rxpools;
88*12011SSriharsha.Basavapatna@Sun.COM extern uint32_t vsw_chain_len;
89*12011SSriharsha.Basavapatna@Sun.COM extern uint32_t vsw_num_descriptors;
90*12011SSriharsha.Basavapatna@Sun.COM extern uint32_t vsw_mblk_size1;
91*12011SSriharsha.Basavapatna@Sun.COM extern uint32_t vsw_mblk_size2;
92*12011SSriharsha.Basavapatna@Sun.COM extern uint32_t vsw_mblk_size3;
93*12011SSriharsha.Basavapatna@Sun.COM extern uint32_t vsw_mblk_size4;
94*12011SSriharsha.Basavapatna@Sun.COM extern uint32_t vsw_num_mblks1;
95*12011SSriharsha.Basavapatna@Sun.COM extern uint32_t vsw_num_mblks2;
96*12011SSriharsha.Basavapatna@Sun.COM extern uint32_t vsw_num_mblks3;
97*12011SSriharsha.Basavapatna@Sun.COM extern uint32_t vsw_num_mblks4;
98*12011SSriharsha.Basavapatna@Sun.COM 
99*12011SSriharsha.Basavapatna@Sun.COM #define	VSW_NUM_VMPOOLS		3	/* number of vio mblk pools */
100*12011SSriharsha.Basavapatna@Sun.COM 
101*12011SSriharsha.Basavapatna@Sun.COM #define	SND_DRING_NACK(ldcp, pkt) \
102*12011SSriharsha.Basavapatna@Sun.COM 	pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
103*12011SSriharsha.Basavapatna@Sun.COM 	pkt->tag.vio_sid = ldcp->local_session; \
104*12011SSriharsha.Basavapatna@Sun.COM 	(void) vsw_send_msg(ldcp, (void *)pkt, \
105*12011SSriharsha.Basavapatna@Sun.COM 			sizeof (vio_dring_msg_t), B_TRUE);
106*12011SSriharsha.Basavapatna@Sun.COM 
107*12011SSriharsha.Basavapatna@Sun.COM vio_dring_reg_msg_t *
vsw_create_tx_dring_info(vsw_ldc_t * ldcp)108*12011SSriharsha.Basavapatna@Sun.COM vsw_create_tx_dring_info(vsw_ldc_t *ldcp)
109*12011SSriharsha.Basavapatna@Sun.COM {
110*12011SSriharsha.Basavapatna@Sun.COM 	vio_dring_reg_msg_t	*mp;
111*12011SSriharsha.Basavapatna@Sun.COM 	dring_info_t		*dp;
112*12011SSriharsha.Basavapatna@Sun.COM 	vsw_t			*vswp = ldcp->ldc_vswp;
113*12011SSriharsha.Basavapatna@Sun.COM 
114*12011SSriharsha.Basavapatna@Sun.COM 	D1(vswp, "%s enter\n", __func__);
115*12011SSriharsha.Basavapatna@Sun.COM 
116*12011SSriharsha.Basavapatna@Sun.COM 	/*
117*12011SSriharsha.Basavapatna@Sun.COM 	 * If we can't create a dring, obviously no point sending
118*12011SSriharsha.Basavapatna@Sun.COM 	 * a message.
119*12011SSriharsha.Basavapatna@Sun.COM 	 */
120*12011SSriharsha.Basavapatna@Sun.COM 	if ((dp = vsw_create_tx_dring(ldcp)) == NULL)
121*12011SSriharsha.Basavapatna@Sun.COM 		return (NULL);
122*12011SSriharsha.Basavapatna@Sun.COM 
123*12011SSriharsha.Basavapatna@Sun.COM 	mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP);
124*12011SSriharsha.Basavapatna@Sun.COM 
125*12011SSriharsha.Basavapatna@Sun.COM 	mp->tag.vio_msgtype = VIO_TYPE_CTRL;
126*12011SSriharsha.Basavapatna@Sun.COM 	mp->tag.vio_subtype = VIO_SUBTYPE_INFO;
127*12011SSriharsha.Basavapatna@Sun.COM 	mp->tag.vio_subtype_env = VIO_DRING_REG;
128*12011SSriharsha.Basavapatna@Sun.COM 	mp->tag.vio_sid = ldcp->local_session;
129*12011SSriharsha.Basavapatna@Sun.COM 
130*12011SSriharsha.Basavapatna@Sun.COM 	/* payload */
131*12011SSriharsha.Basavapatna@Sun.COM 	mp->num_descriptors = dp->num_descriptors;
132*12011SSriharsha.Basavapatna@Sun.COM 	mp->descriptor_size = dp->descriptor_size;
133*12011SSriharsha.Basavapatna@Sun.COM 	mp->options = dp->options;
134*12011SSriharsha.Basavapatna@Sun.COM 	mp->ncookies = dp->dring_ncookies;
135*12011SSriharsha.Basavapatna@Sun.COM 	bcopy(&dp->dring_cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t));
136*12011SSriharsha.Basavapatna@Sun.COM 
137*12011SSriharsha.Basavapatna@Sun.COM 	mp->dring_ident = 0;
138*12011SSriharsha.Basavapatna@Sun.COM 
139*12011SSriharsha.Basavapatna@Sun.COM 	D1(vswp, "%s exit\n", __func__);
140*12011SSriharsha.Basavapatna@Sun.COM 
141*12011SSriharsha.Basavapatna@Sun.COM 	return (mp);
142*12011SSriharsha.Basavapatna@Sun.COM }
143*12011SSriharsha.Basavapatna@Sun.COM 
144*12011SSriharsha.Basavapatna@Sun.COM /*
145*12011SSriharsha.Basavapatna@Sun.COM  * Allocate transmit resources for the channel. The resources consist of a
146*12011SSriharsha.Basavapatna@Sun.COM  * transmit descriptor ring and an associated transmit buffer area.
147*12011SSriharsha.Basavapatna@Sun.COM  */
148*12011SSriharsha.Basavapatna@Sun.COM static dring_info_t *
vsw_create_tx_dring(vsw_ldc_t * ldcp)149*12011SSriharsha.Basavapatna@Sun.COM vsw_create_tx_dring(vsw_ldc_t *ldcp)
150*12011SSriharsha.Basavapatna@Sun.COM {
151*12011SSriharsha.Basavapatna@Sun.COM 	vsw_t			*vswp = ldcp->ldc_vswp;
152*12011SSriharsha.Basavapatna@Sun.COM 	ldc_mem_info_t		minfo;
153*12011SSriharsha.Basavapatna@Sun.COM 	dring_info_t		*dp;
154*12011SSriharsha.Basavapatna@Sun.COM 
155*12011SSriharsha.Basavapatna@Sun.COM 	dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
156*12011SSriharsha.Basavapatna@Sun.COM 	mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL);
157*12011SSriharsha.Basavapatna@Sun.COM 	mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL);
158*12011SSriharsha.Basavapatna@Sun.COM 	ldcp->lane_out.dringp = dp;
159*12011SSriharsha.Basavapatna@Sun.COM 
160*12011SSriharsha.Basavapatna@Sun.COM 	/* create public section of ring */
161*12011SSriharsha.Basavapatna@Sun.COM 	if ((ldc_mem_dring_create(vsw_num_descriptors,
162*12011SSriharsha.Basavapatna@Sun.COM 	    sizeof (vnet_public_desc_t), &dp->dring_handle)) != 0) {
163*12011SSriharsha.Basavapatna@Sun.COM 
164*12011SSriharsha.Basavapatna@Sun.COM 		DERR(vswp, "vsw_create_tx_dring(%lld): ldc dring create "
165*12011SSriharsha.Basavapatna@Sun.COM 		    "failed", ldcp->ldc_id);
166*12011SSriharsha.Basavapatna@Sun.COM 		goto fail;
167*12011SSriharsha.Basavapatna@Sun.COM 	}
168*12011SSriharsha.Basavapatna@Sun.COM 	ASSERT(dp->dring_handle != NULL);
169*12011SSriharsha.Basavapatna@Sun.COM 
170*12011SSriharsha.Basavapatna@Sun.COM 	/*
171*12011SSriharsha.Basavapatna@Sun.COM 	 * Get the base address of the public section of the ring.
172*12011SSriharsha.Basavapatna@Sun.COM 	 */
173*12011SSriharsha.Basavapatna@Sun.COM 	if ((ldc_mem_dring_info(dp->dring_handle, &minfo)) != 0) {
174*12011SSriharsha.Basavapatna@Sun.COM 		DERR(vswp, "vsw_create_tx_dring(%lld): dring info failed\n",
175*12011SSriharsha.Basavapatna@Sun.COM 		    ldcp->ldc_id);
176*12011SSriharsha.Basavapatna@Sun.COM 		goto fail;
177*12011SSriharsha.Basavapatna@Sun.COM 	} else {
178*12011SSriharsha.Basavapatna@Sun.COM 		ASSERT(minfo.vaddr != 0);
179*12011SSriharsha.Basavapatna@Sun.COM 		dp->pub_addr = minfo.vaddr;
180*12011SSriharsha.Basavapatna@Sun.COM 	}
181*12011SSriharsha.Basavapatna@Sun.COM 
182*12011SSriharsha.Basavapatna@Sun.COM 	dp->num_descriptors = vsw_num_descriptors;
183*12011SSriharsha.Basavapatna@Sun.COM 	dp->descriptor_size = sizeof (vnet_public_desc_t);
184*12011SSriharsha.Basavapatna@Sun.COM 	dp->options = VIO_TX_DRING;
185*12011SSriharsha.Basavapatna@Sun.COM 	dp->dring_ncookies = 1;	/* guaranteed by ldc */
186*12011SSriharsha.Basavapatna@Sun.COM 
187*12011SSriharsha.Basavapatna@Sun.COM 	/*
188*12011SSriharsha.Basavapatna@Sun.COM 	 * create private portion of ring
189*12011SSriharsha.Basavapatna@Sun.COM 	 */
190*12011SSriharsha.Basavapatna@Sun.COM 	dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc(
191*12011SSriharsha.Basavapatna@Sun.COM 	    (sizeof (vsw_private_desc_t) * vsw_num_descriptors), KM_SLEEP);
192*12011SSriharsha.Basavapatna@Sun.COM 
193*12011SSriharsha.Basavapatna@Sun.COM 	if (vsw_setup_tx_dring(ldcp, dp)) {
194*12011SSriharsha.Basavapatna@Sun.COM 		DERR(vswp, "%s: unable to setup ring", __func__);
195*12011SSriharsha.Basavapatna@Sun.COM 		goto fail;
196*12011SSriharsha.Basavapatna@Sun.COM 	}
197*12011SSriharsha.Basavapatna@Sun.COM 
198*12011SSriharsha.Basavapatna@Sun.COM 	/* bind dring to the channel */
199*12011SSriharsha.Basavapatna@Sun.COM 	if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->dring_handle,
200*12011SSriharsha.Basavapatna@Sun.COM 	    LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW,
201*12011SSriharsha.Basavapatna@Sun.COM 	    &dp->dring_cookie[0], &dp->dring_ncookies)) != 0) {
202*12011SSriharsha.Basavapatna@Sun.COM 		DERR(vswp, "vsw_create_tx_dring: unable to bind to channel "
203*12011SSriharsha.Basavapatna@Sun.COM 		    "%lld", ldcp->ldc_id);
204*12011SSriharsha.Basavapatna@Sun.COM 		goto fail;
205*12011SSriharsha.Basavapatna@Sun.COM 	}
206*12011SSriharsha.Basavapatna@Sun.COM 
207*12011SSriharsha.Basavapatna@Sun.COM 	/* haven't used any descriptors yet */
208*12011SSriharsha.Basavapatna@Sun.COM 	dp->end_idx = 0;
209*12011SSriharsha.Basavapatna@Sun.COM 	dp->last_ack_recv = -1;
210*12011SSriharsha.Basavapatna@Sun.COM 	dp->restart_reqd = B_TRUE;
211*12011SSriharsha.Basavapatna@Sun.COM 
212*12011SSriharsha.Basavapatna@Sun.COM 	return (dp);
213*12011SSriharsha.Basavapatna@Sun.COM 
214*12011SSriharsha.Basavapatna@Sun.COM fail:
215*12011SSriharsha.Basavapatna@Sun.COM 	vsw_destroy_tx_dring(ldcp);
216*12011SSriharsha.Basavapatna@Sun.COM 	return (NULL);
217*12011SSriharsha.Basavapatna@Sun.COM }
218*12011SSriharsha.Basavapatna@Sun.COM 
219*12011SSriharsha.Basavapatna@Sun.COM /*
220*12011SSriharsha.Basavapatna@Sun.COM  * Setup the descriptors in the tx dring.
221*12011SSriharsha.Basavapatna@Sun.COM  * Returns 0 on success, 1 on failure.
222*12011SSriharsha.Basavapatna@Sun.COM  */
223*12011SSriharsha.Basavapatna@Sun.COM int
vsw_setup_tx_dring(vsw_ldc_t * ldcp,dring_info_t * dp)224*12011SSriharsha.Basavapatna@Sun.COM vsw_setup_tx_dring(vsw_ldc_t *ldcp, dring_info_t *dp)
225*12011SSriharsha.Basavapatna@Sun.COM {
226*12011SSriharsha.Basavapatna@Sun.COM 	vnet_public_desc_t	*pub_addr = NULL;
227*12011SSriharsha.Basavapatna@Sun.COM 	vsw_private_desc_t	*priv_addr = NULL;
228*12011SSriharsha.Basavapatna@Sun.COM 	vsw_t			*vswp = ldcp->ldc_vswp;
229*12011SSriharsha.Basavapatna@Sun.COM 	uint64_t		*tmpp;
230*12011SSriharsha.Basavapatna@Sun.COM 	uint64_t		offset = 0;
231*12011SSriharsha.Basavapatna@Sun.COM 	uint32_t		ncookies = 0;
232*12011SSriharsha.Basavapatna@Sun.COM 	static char		*name = "vsw_setup_ring";
233*12011SSriharsha.Basavapatna@Sun.COM 	int			i, j, nc, rv;
234*12011SSriharsha.Basavapatna@Sun.COM 	size_t			data_sz;
235*12011SSriharsha.Basavapatna@Sun.COM 	void			*data_addr;
236*12011SSriharsha.Basavapatna@Sun.COM 
237*12011SSriharsha.Basavapatna@Sun.COM 	priv_addr = dp->priv_addr;
238*12011SSriharsha.Basavapatna@Sun.COM 	pub_addr = dp->pub_addr;
239*12011SSriharsha.Basavapatna@Sun.COM 
240*12011SSriharsha.Basavapatna@Sun.COM 	/* public section may be null but private should never be */
241*12011SSriharsha.Basavapatna@Sun.COM 	ASSERT(priv_addr != NULL);
242*12011SSriharsha.Basavapatna@Sun.COM 
243*12011SSriharsha.Basavapatna@Sun.COM 	/*
244*12011SSriharsha.Basavapatna@Sun.COM 	 * Allocate the region of memory which will be used to hold
245*12011SSriharsha.Basavapatna@Sun.COM 	 * the data the descriptors will refer to.
246*12011SSriharsha.Basavapatna@Sun.COM 	 */
247*12011SSriharsha.Basavapatna@Sun.COM 	data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
248*12011SSriharsha.Basavapatna@Sun.COM 
249*12011SSriharsha.Basavapatna@Sun.COM 	/*
250*12011SSriharsha.Basavapatna@Sun.COM 	 * In order to ensure that the number of ldc cookies per descriptor is
251*12011SSriharsha.Basavapatna@Sun.COM 	 * limited to be within the default MAX_COOKIES (2), we take the steps
252*12011SSriharsha.Basavapatna@Sun.COM 	 * outlined below:
253*12011SSriharsha.Basavapatna@Sun.COM 	 *
254*12011SSriharsha.Basavapatna@Sun.COM 	 * Align the entire data buffer area to 8K and carve out per descriptor
255*12011SSriharsha.Basavapatna@Sun.COM 	 * data buffers starting from this 8K aligned base address.
256*12011SSriharsha.Basavapatna@Sun.COM 	 *
257*12011SSriharsha.Basavapatna@Sun.COM 	 * We round up the mtu specified to be a multiple of 2K or 4K.
258*12011SSriharsha.Basavapatna@Sun.COM 	 * For sizes up to 12K we round up the size to the next 2K.
259*12011SSriharsha.Basavapatna@Sun.COM 	 * For sizes > 12K we round up to the next 4K (otherwise sizes such as
260*12011SSriharsha.Basavapatna@Sun.COM 	 * 14K could end up needing 3 cookies, with the buffer spread across
261*12011SSriharsha.Basavapatna@Sun.COM 	 * 3 8K pages:  8K+6K, 2K+8K+2K, 6K+8K, ...).
262*12011SSriharsha.Basavapatna@Sun.COM 	 */
263*12011SSriharsha.Basavapatna@Sun.COM 	if (data_sz <= VNET_12K) {
264*12011SSriharsha.Basavapatna@Sun.COM 		data_sz = VNET_ROUNDUP_2K(data_sz);
265*12011SSriharsha.Basavapatna@Sun.COM 	} else {
266*12011SSriharsha.Basavapatna@Sun.COM 		data_sz = VNET_ROUNDUP_4K(data_sz);
267*12011SSriharsha.Basavapatna@Sun.COM 	}
268*12011SSriharsha.Basavapatna@Sun.COM 
269*12011SSriharsha.Basavapatna@Sun.COM 	dp->desc_data_sz = data_sz;
270*12011SSriharsha.Basavapatna@Sun.COM 
271*12011SSriharsha.Basavapatna@Sun.COM 	/* allocate extra 8K bytes for alignment */
272*12011SSriharsha.Basavapatna@Sun.COM 	dp->data_sz = (vsw_num_descriptors * data_sz) + VNET_8K;
273*12011SSriharsha.Basavapatna@Sun.COM 	data_addr = kmem_alloc(dp->data_sz, KM_SLEEP);
274*12011SSriharsha.Basavapatna@Sun.COM 	dp->data_addr = data_addr;
275*12011SSriharsha.Basavapatna@Sun.COM 
276*12011SSriharsha.Basavapatna@Sun.COM 	D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name,
277*12011SSriharsha.Basavapatna@Sun.COM 	    dp->data_sz, dp->data_addr);
278*12011SSriharsha.Basavapatna@Sun.COM 
279*12011SSriharsha.Basavapatna@Sun.COM 	/* align the starting address of the data area to 8K */
280*12011SSriharsha.Basavapatna@Sun.COM 	data_addr = (void *)VNET_ROUNDUP_8K((uintptr_t)data_addr);
281*12011SSriharsha.Basavapatna@Sun.COM 
282*12011SSriharsha.Basavapatna@Sun.COM 	tmpp = (uint64_t *)data_addr;
283*12011SSriharsha.Basavapatna@Sun.COM 	offset = dp->desc_data_sz/sizeof (tmpp);
284*12011SSriharsha.Basavapatna@Sun.COM 
285*12011SSriharsha.Basavapatna@Sun.COM 	/*
286*12011SSriharsha.Basavapatna@Sun.COM 	 * Initialise some of the private and public (if they exist)
287*12011SSriharsha.Basavapatna@Sun.COM 	 * descriptor fields.
288*12011SSriharsha.Basavapatna@Sun.COM 	 */
289*12011SSriharsha.Basavapatna@Sun.COM 	for (i = 0; i < vsw_num_descriptors; i++) {
290*12011SSriharsha.Basavapatna@Sun.COM 		mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL);
291*12011SSriharsha.Basavapatna@Sun.COM 
292*12011SSriharsha.Basavapatna@Sun.COM 		if ((ldc_mem_alloc_handle(ldcp->ldc_handle,
293*12011SSriharsha.Basavapatna@Sun.COM 		    &priv_addr->memhandle)) != 0) {
294*12011SSriharsha.Basavapatna@Sun.COM 			DERR(vswp, "%s: alloc mem handle failed", name);
295*12011SSriharsha.Basavapatna@Sun.COM 			goto fail;
296*12011SSriharsha.Basavapatna@Sun.COM 		}
297*12011SSriharsha.Basavapatna@Sun.COM 
298*12011SSriharsha.Basavapatna@Sun.COM 		priv_addr->datap = (void *)tmpp;
299*12011SSriharsha.Basavapatna@Sun.COM 
300*12011SSriharsha.Basavapatna@Sun.COM 		rv = ldc_mem_bind_handle(priv_addr->memhandle,
301*12011SSriharsha.Basavapatna@Sun.COM 		    (caddr_t)priv_addr->datap, dp->desc_data_sz,
302*12011SSriharsha.Basavapatna@Sun.COM 		    LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W,
303*12011SSriharsha.Basavapatna@Sun.COM 		    &(priv_addr->memcookie[0]), &ncookies);
304*12011SSriharsha.Basavapatna@Sun.COM 		if (rv != 0) {
305*12011SSriharsha.Basavapatna@Sun.COM 			DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed "
306*12011SSriharsha.Basavapatna@Sun.COM 			    "(rv %d)", name, ldcp->ldc_id, rv);
307*12011SSriharsha.Basavapatna@Sun.COM 			goto fail;
308*12011SSriharsha.Basavapatna@Sun.COM 		}
309*12011SSriharsha.Basavapatna@Sun.COM 		priv_addr->bound = 1;
310*12011SSriharsha.Basavapatna@Sun.COM 
311*12011SSriharsha.Basavapatna@Sun.COM 		D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx",
312*12011SSriharsha.Basavapatna@Sun.COM 		    name, i, priv_addr->memcookie[0].addr,
313*12011SSriharsha.Basavapatna@Sun.COM 		    priv_addr->memcookie[0].size);
314*12011SSriharsha.Basavapatna@Sun.COM 
315*12011SSriharsha.Basavapatna@Sun.COM 		if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) {
316*12011SSriharsha.Basavapatna@Sun.COM 			DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned "
317*12011SSriharsha.Basavapatna@Sun.COM 			    "invalid num of cookies (%d) for size 0x%llx",
318*12011SSriharsha.Basavapatna@Sun.COM 			    name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ);
319*12011SSriharsha.Basavapatna@Sun.COM 
320*12011SSriharsha.Basavapatna@Sun.COM 			goto fail;
321*12011SSriharsha.Basavapatna@Sun.COM 		} else {
322*12011SSriharsha.Basavapatna@Sun.COM 			for (j = 1; j < ncookies; j++) {
323*12011SSriharsha.Basavapatna@Sun.COM 				rv = ldc_mem_nextcookie(priv_addr->memhandle,
324*12011SSriharsha.Basavapatna@Sun.COM 				    &(priv_addr->memcookie[j]));
325*12011SSriharsha.Basavapatna@Sun.COM 				if (rv != 0) {
326*12011SSriharsha.Basavapatna@Sun.COM 					DERR(vswp, "%s: ldc_mem_nextcookie "
327*12011SSriharsha.Basavapatna@Sun.COM 					    "failed rv (%d)", name, rv);
328*12011SSriharsha.Basavapatna@Sun.COM 					goto fail;
329*12011SSriharsha.Basavapatna@Sun.COM 				}
330*12011SSriharsha.Basavapatna@Sun.COM 				D3(vswp, "%s: memcookie %d : addr 0x%llx : "
331*12011SSriharsha.Basavapatna@Sun.COM 				    "size 0x%llx", name, j,
332*12011SSriharsha.Basavapatna@Sun.COM 				    priv_addr->memcookie[j].addr,
333*12011SSriharsha.Basavapatna@Sun.COM 				    priv_addr->memcookie[j].size);
334*12011SSriharsha.Basavapatna@Sun.COM 			}
335*12011SSriharsha.Basavapatna@Sun.COM 
336*12011SSriharsha.Basavapatna@Sun.COM 		}
337*12011SSriharsha.Basavapatna@Sun.COM 		priv_addr->ncookies = ncookies;
338*12011SSriharsha.Basavapatna@Sun.COM 		priv_addr->dstate = VIO_DESC_FREE;
339*12011SSriharsha.Basavapatna@Sun.COM 
340*12011SSriharsha.Basavapatna@Sun.COM 		if (pub_addr != NULL) {
341*12011SSriharsha.Basavapatna@Sun.COM 
342*12011SSriharsha.Basavapatna@Sun.COM 			/* link pub and private sides */
343*12011SSriharsha.Basavapatna@Sun.COM 			priv_addr->descp = pub_addr;
344*12011SSriharsha.Basavapatna@Sun.COM 
345*12011SSriharsha.Basavapatna@Sun.COM 			pub_addr->ncookies = priv_addr->ncookies;
346*12011SSriharsha.Basavapatna@Sun.COM 
347*12011SSriharsha.Basavapatna@Sun.COM 			for (nc = 0; nc < pub_addr->ncookies; nc++) {
348*12011SSriharsha.Basavapatna@Sun.COM 				bcopy(&priv_addr->memcookie[nc],
349*12011SSriharsha.Basavapatna@Sun.COM 				    &pub_addr->memcookie[nc],
350*12011SSriharsha.Basavapatna@Sun.COM 				    sizeof (ldc_mem_cookie_t));
351*12011SSriharsha.Basavapatna@Sun.COM 			}
352*12011SSriharsha.Basavapatna@Sun.COM 
353*12011SSriharsha.Basavapatna@Sun.COM 			pub_addr->hdr.dstate = VIO_DESC_FREE;
354*12011SSriharsha.Basavapatna@Sun.COM 			pub_addr++;
355*12011SSriharsha.Basavapatna@Sun.COM 		}
356*12011SSriharsha.Basavapatna@Sun.COM 
357*12011SSriharsha.Basavapatna@Sun.COM 		/*
358*12011SSriharsha.Basavapatna@Sun.COM 		 * move to next element in the dring and the next
359*12011SSriharsha.Basavapatna@Sun.COM 		 * position in the data buffer.
360*12011SSriharsha.Basavapatna@Sun.COM 		 */
361*12011SSriharsha.Basavapatna@Sun.COM 		priv_addr++;
362*12011SSriharsha.Basavapatna@Sun.COM 		tmpp += offset;
363*12011SSriharsha.Basavapatna@Sun.COM 	}
364*12011SSriharsha.Basavapatna@Sun.COM 
365*12011SSriharsha.Basavapatna@Sun.COM 	return (0);
366*12011SSriharsha.Basavapatna@Sun.COM 
367*12011SSriharsha.Basavapatna@Sun.COM fail:
368*12011SSriharsha.Basavapatna@Sun.COM 	/* return failure; caller will cleanup */
369*12011SSriharsha.Basavapatna@Sun.COM 	return (1);
370*12011SSriharsha.Basavapatna@Sun.COM }
371*12011SSriharsha.Basavapatna@Sun.COM 
372*12011SSriharsha.Basavapatna@Sun.COM /*
373*12011SSriharsha.Basavapatna@Sun.COM  * Free transmit resources for the channel.
374*12011SSriharsha.Basavapatna@Sun.COM  */
375*12011SSriharsha.Basavapatna@Sun.COM void
vsw_destroy_tx_dring(vsw_ldc_t * ldcp)376*12011SSriharsha.Basavapatna@Sun.COM vsw_destroy_tx_dring(vsw_ldc_t *ldcp)
377*12011SSriharsha.Basavapatna@Sun.COM {
378*12011SSriharsha.Basavapatna@Sun.COM 	vsw_private_desc_t	*paddr = NULL;
379*12011SSriharsha.Basavapatna@Sun.COM 	int			i;
380*12011SSriharsha.Basavapatna@Sun.COM 	lane_t			*lp = &ldcp->lane_out;
381*12011SSriharsha.Basavapatna@Sun.COM 	dring_info_t		*dp;
382*12011SSriharsha.Basavapatna@Sun.COM 
383*12011SSriharsha.Basavapatna@Sun.COM 	dp = lp->dringp;
384*12011SSriharsha.Basavapatna@Sun.COM 	if (dp == NULL) {
385*12011SSriharsha.Basavapatna@Sun.COM 		return;
386*12011SSriharsha.Basavapatna@Sun.COM 	}
387*12011SSriharsha.Basavapatna@Sun.COM 
388*12011SSriharsha.Basavapatna@Sun.COM 	mutex_enter(&dp->dlock);
389*12011SSriharsha.Basavapatna@Sun.COM 
390*12011SSriharsha.Basavapatna@Sun.COM 	if (dp->priv_addr != NULL) {
391*12011SSriharsha.Basavapatna@Sun.COM 		/*
392*12011SSriharsha.Basavapatna@Sun.COM 		 * First unbind and free the memory handles
393*12011SSriharsha.Basavapatna@Sun.COM 		 * stored in each descriptor within the ring.
394*12011SSriharsha.Basavapatna@Sun.COM 		 */
395*12011SSriharsha.Basavapatna@Sun.COM 		for (i = 0; i < vsw_num_descriptors; i++) {
396*12011SSriharsha.Basavapatna@Sun.COM 			paddr = (vsw_private_desc_t *)dp->priv_addr + i;
397*12011SSriharsha.Basavapatna@Sun.COM 			if (paddr->memhandle != NULL) {
398*12011SSriharsha.Basavapatna@Sun.COM 				if (paddr->bound == 1) {
399*12011SSriharsha.Basavapatna@Sun.COM 					if (ldc_mem_unbind_handle(
400*12011SSriharsha.Basavapatna@Sun.COM 					    paddr->memhandle) != 0) {
401*12011SSriharsha.Basavapatna@Sun.COM 						DERR(NULL, "error "
402*12011SSriharsha.Basavapatna@Sun.COM 						"unbinding handle for "
403*12011SSriharsha.Basavapatna@Sun.COM 						"ring 0x%llx at pos %d",
404*12011SSriharsha.Basavapatna@Sun.COM 						    dp, i);
405*12011SSriharsha.Basavapatna@Sun.COM 						continue;
406*12011SSriharsha.Basavapatna@Sun.COM 					}
407*12011SSriharsha.Basavapatna@Sun.COM 					paddr->bound = 0;
408*12011SSriharsha.Basavapatna@Sun.COM 				}
409*12011SSriharsha.Basavapatna@Sun.COM 
410*12011SSriharsha.Basavapatna@Sun.COM 				if (ldc_mem_free_handle(
411*12011SSriharsha.Basavapatna@Sun.COM 				    paddr->memhandle) != 0) {
412*12011SSriharsha.Basavapatna@Sun.COM 					DERR(NULL, "error freeing "
413*12011SSriharsha.Basavapatna@Sun.COM 					    "handle for ring 0x%llx "
414*12011SSriharsha.Basavapatna@Sun.COM 					    "at pos %d", dp, i);
415*12011SSriharsha.Basavapatna@Sun.COM 					continue;
416*12011SSriharsha.Basavapatna@Sun.COM 				}
417*12011SSriharsha.Basavapatna@Sun.COM 				paddr->memhandle = NULL;
418*12011SSriharsha.Basavapatna@Sun.COM 			}
419*12011SSriharsha.Basavapatna@Sun.COM 			mutex_destroy(&paddr->dstate_lock);
420*12011SSriharsha.Basavapatna@Sun.COM 		}
421*12011SSriharsha.Basavapatna@Sun.COM 		kmem_free(dp->priv_addr,
422*12011SSriharsha.Basavapatna@Sun.COM 		    (sizeof (vsw_private_desc_t) * vsw_num_descriptors));
423*12011SSriharsha.Basavapatna@Sun.COM 	}
424*12011SSriharsha.Basavapatna@Sun.COM 
425*12011SSriharsha.Basavapatna@Sun.COM 	/*
426*12011SSriharsha.Basavapatna@Sun.COM 	 * Now unbind and destroy the ring itself.
427*12011SSriharsha.Basavapatna@Sun.COM 	 */
428*12011SSriharsha.Basavapatna@Sun.COM 	if (dp->dring_handle != NULL) {
429*12011SSriharsha.Basavapatna@Sun.COM 		(void) ldc_mem_dring_unbind(dp->dring_handle);
430*12011SSriharsha.Basavapatna@Sun.COM 		(void) ldc_mem_dring_destroy(dp->dring_handle);
431*12011SSriharsha.Basavapatna@Sun.COM 	}
432*12011SSriharsha.Basavapatna@Sun.COM 
433*12011SSriharsha.Basavapatna@Sun.COM 	if (dp->data_addr != NULL) {
434*12011SSriharsha.Basavapatna@Sun.COM 		kmem_free(dp->data_addr, dp->data_sz);
435*12011SSriharsha.Basavapatna@Sun.COM 	}
436*12011SSriharsha.Basavapatna@Sun.COM 
437*12011SSriharsha.Basavapatna@Sun.COM 	mutex_exit(&dp->dlock);
438*12011SSriharsha.Basavapatna@Sun.COM 	mutex_destroy(&dp->dlock);
439*12011SSriharsha.Basavapatna@Sun.COM 	mutex_destroy(&dp->restart_lock);
440*12011SSriharsha.Basavapatna@Sun.COM 	kmem_free(dp, sizeof (dring_info_t));
441*12011SSriharsha.Basavapatna@Sun.COM 	lp->dringp = NULL;
442*12011SSriharsha.Basavapatna@Sun.COM }
443*12011SSriharsha.Basavapatna@Sun.COM 
444*12011SSriharsha.Basavapatna@Sun.COM /*
445*12011SSriharsha.Basavapatna@Sun.COM  * Map the transmit descriptor ring exported
446*12011SSriharsha.Basavapatna@Sun.COM  * by the peer, as our receive descriptor ring.
447*12011SSriharsha.Basavapatna@Sun.COM  */
448*12011SSriharsha.Basavapatna@Sun.COM dring_info_t *
vsw_map_rx_dring(vsw_ldc_t * ldcp,void * pkt)449*12011SSriharsha.Basavapatna@Sun.COM vsw_map_rx_dring(vsw_ldc_t *ldcp, void *pkt)
450*12011SSriharsha.Basavapatna@Sun.COM {
451*12011SSriharsha.Basavapatna@Sun.COM 	int			rv;
452*12011SSriharsha.Basavapatna@Sun.COM 	dring_info_t		*dp;
453*12011SSriharsha.Basavapatna@Sun.COM 	vio_dring_reg_msg_t	*dring_pkt = pkt;
454*12011SSriharsha.Basavapatna@Sun.COM 	vsw_t			*vswp = ldcp->ldc_vswp;
455*12011SSriharsha.Basavapatna@Sun.COM 
456*12011SSriharsha.Basavapatna@Sun.COM 	dp = vsw_map_dring_cmn(ldcp, dring_pkt);
457*12011SSriharsha.Basavapatna@Sun.COM 	if (dp == NULL) {
458*12011SSriharsha.Basavapatna@Sun.COM 		return (NULL);
459*12011SSriharsha.Basavapatna@Sun.COM 	}
460*12011SSriharsha.Basavapatna@Sun.COM 
461*12011SSriharsha.Basavapatna@Sun.COM 	/* TxDring mode specific initializations */
462*12011SSriharsha.Basavapatna@Sun.COM 	dp->end_idx = 0;
463*12011SSriharsha.Basavapatna@Sun.COM 	ldcp->lane_in.dringp = dp;
464*12011SSriharsha.Basavapatna@Sun.COM 
465*12011SSriharsha.Basavapatna@Sun.COM 	/* Allocate pools of receive mblks */
466*12011SSriharsha.Basavapatna@Sun.COM 	rv = vsw_init_multipools(ldcp, vswp);
467*12011SSriharsha.Basavapatna@Sun.COM 	if (rv != 0) {
468*12011SSriharsha.Basavapatna@Sun.COM 		/*
469*12011SSriharsha.Basavapatna@Sun.COM 		 * We do not return failure if receive mblk pools can't
470*12011SSriharsha.Basavapatna@Sun.COM 		 * be allocated, instead allocb(9F) will be used to
471*12011SSriharsha.Basavapatna@Sun.COM 		 * dynamically allocate buffers during receive.
472*12011SSriharsha.Basavapatna@Sun.COM 		 */
473*12011SSriharsha.Basavapatna@Sun.COM 		DWARN(vswp, "%s: unable to create free mblk pools for"
474*12011SSriharsha.Basavapatna@Sun.COM 		    " channel %ld (rv %d)", __func__, ldcp->ldc_id, rv);
475*12011SSriharsha.Basavapatna@Sun.COM 	}
476*12011SSriharsha.Basavapatna@Sun.COM 
477*12011SSriharsha.Basavapatna@Sun.COM 	return (dp);
478*12011SSriharsha.Basavapatna@Sun.COM }
479*12011SSriharsha.Basavapatna@Sun.COM 
480*12011SSriharsha.Basavapatna@Sun.COM /*
481*12011SSriharsha.Basavapatna@Sun.COM  * Unmap the receive descriptor ring.
482*12011SSriharsha.Basavapatna@Sun.COM  */
483*12011SSriharsha.Basavapatna@Sun.COM void
vsw_unmap_rx_dring(vsw_ldc_t * ldcp)484*12011SSriharsha.Basavapatna@Sun.COM vsw_unmap_rx_dring(vsw_ldc_t *ldcp)
485*12011SSriharsha.Basavapatna@Sun.COM {
486*12011SSriharsha.Basavapatna@Sun.COM 	vio_mblk_pool_t *fvmp = NULL;
487*12011SSriharsha.Basavapatna@Sun.COM 	vsw_t		*vswp = ldcp->ldc_vswp;
488*12011SSriharsha.Basavapatna@Sun.COM 	lane_t		*lp = &ldcp->lane_in;
489*12011SSriharsha.Basavapatna@Sun.COM 	dring_info_t	*dp;
490*12011SSriharsha.Basavapatna@Sun.COM 
491*12011SSriharsha.Basavapatna@Sun.COM 	if ((dp = lp->dringp) == NULL) {
492*12011SSriharsha.Basavapatna@Sun.COM 		return;
493*12011SSriharsha.Basavapatna@Sun.COM 	}
494*12011SSriharsha.Basavapatna@Sun.COM 
495*12011SSriharsha.Basavapatna@Sun.COM 	/*
496*12011SSriharsha.Basavapatna@Sun.COM 	 * If we can't destroy all the rx pools for this channel,
497*12011SSriharsha.Basavapatna@Sun.COM 	 * dispatch a task to retry and clean up those rx pools. Note
498*12011SSriharsha.Basavapatna@Sun.COM 	 * that we don't need to wait for the task to complete. If the
499*12011SSriharsha.Basavapatna@Sun.COM 	 * vsw device itself gets detached (vsw_detach()), it will wait
500*12011SSriharsha.Basavapatna@Sun.COM 	 * for the task to complete implicitly in ddi_taskq_destroy().
501*12011SSriharsha.Basavapatna@Sun.COM 	 */
502*12011SSriharsha.Basavapatna@Sun.COM 	vio_destroy_multipools(&ldcp->vmp, &fvmp);
503*12011SSriharsha.Basavapatna@Sun.COM 	if (fvmp != NULL) {
504*12011SSriharsha.Basavapatna@Sun.COM 		(void) ddi_taskq_dispatch(vswp->rxp_taskq,
505*12011SSriharsha.Basavapatna@Sun.COM 		    vsw_destroy_rxpools, fvmp, DDI_SLEEP);
506*12011SSriharsha.Basavapatna@Sun.COM 	}
507*12011SSriharsha.Basavapatna@Sun.COM 
508*12011SSriharsha.Basavapatna@Sun.COM 	if (dp->dring_handle != NULL) {
509*12011SSriharsha.Basavapatna@Sun.COM 		(void) ldc_mem_dring_unmap(dp->dring_handle);
510*12011SSriharsha.Basavapatna@Sun.COM 	}
511*12011SSriharsha.Basavapatna@Sun.COM 	kmem_free(dp, sizeof (dring_info_t));
512*12011SSriharsha.Basavapatna@Sun.COM 	lp->dringp = NULL;
513*12011SSriharsha.Basavapatna@Sun.COM }
514*12011SSriharsha.Basavapatna@Sun.COM 
515*12011SSriharsha.Basavapatna@Sun.COM static int
vsw_init_multipools(vsw_ldc_t * ldcp,vsw_t * vswp)516*12011SSriharsha.Basavapatna@Sun.COM vsw_init_multipools(vsw_ldc_t *ldcp, vsw_t *vswp)
517*12011SSriharsha.Basavapatna@Sun.COM {
518*12011SSriharsha.Basavapatna@Sun.COM 	size_t		data_sz;
519*12011SSriharsha.Basavapatna@Sun.COM 	int		rv;
520*12011SSriharsha.Basavapatna@Sun.COM 	uint32_t	sz1 = 0;
521*12011SSriharsha.Basavapatna@Sun.COM 	uint32_t	sz2 = 0;
522*12011SSriharsha.Basavapatna@Sun.COM 	uint32_t	sz3 = 0;
523*12011SSriharsha.Basavapatna@Sun.COM 	uint32_t	sz4 = 0;
524*12011SSriharsha.Basavapatna@Sun.COM 
525*12011SSriharsha.Basavapatna@Sun.COM 	/*
526*12011SSriharsha.Basavapatna@Sun.COM 	 * We round up the mtu specified to be a multiple of 2K to limit the
527*12011SSriharsha.Basavapatna@Sun.COM 	 * number of rx buffer pools created for a given mtu.
528*12011SSriharsha.Basavapatna@Sun.COM 	 */
529*12011SSriharsha.Basavapatna@Sun.COM 	data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN;
530*12011SSriharsha.Basavapatna@Sun.COM 	data_sz = VNET_ROUNDUP_2K(data_sz);
531*12011SSriharsha.Basavapatna@Sun.COM 
532*12011SSriharsha.Basavapatna@Sun.COM 	/*
533*12011SSriharsha.Basavapatna@Sun.COM 	 * If pool sizes are specified, use them. Note that the presence of
534*12011SSriharsha.Basavapatna@Sun.COM 	 * the first tunable will be used as a hint.
535*12011SSriharsha.Basavapatna@Sun.COM 	 */
536*12011SSriharsha.Basavapatna@Sun.COM 	if (vsw_mblk_size1 != 0) {
537*12011SSriharsha.Basavapatna@Sun.COM 		sz1 = vsw_mblk_size1;
538*12011SSriharsha.Basavapatna@Sun.COM 		sz2 = vsw_mblk_size2;
539*12011SSriharsha.Basavapatna@Sun.COM 		sz3 = vsw_mblk_size3;
540*12011SSriharsha.Basavapatna@Sun.COM 		sz4 = vsw_mblk_size4;
541*12011SSriharsha.Basavapatna@Sun.COM 
542*12011SSriharsha.Basavapatna@Sun.COM 		if (sz4 == 0) { /* need 3 pools */
543*12011SSriharsha.Basavapatna@Sun.COM 
544*12011SSriharsha.Basavapatna@Sun.COM 			ldcp->max_rxpool_size = sz3;
545*12011SSriharsha.Basavapatna@Sun.COM 			rv = vio_init_multipools(&ldcp->vmp,
546*12011SSriharsha.Basavapatna@Sun.COM 			    VSW_NUM_VMPOOLS, sz1, sz2, sz3,
547*12011SSriharsha.Basavapatna@Sun.COM 			    vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3);
548*12011SSriharsha.Basavapatna@Sun.COM 
549*12011SSriharsha.Basavapatna@Sun.COM 		} else {
550*12011SSriharsha.Basavapatna@Sun.COM 
551*12011SSriharsha.Basavapatna@Sun.COM 			ldcp->max_rxpool_size = sz4;
552*12011SSriharsha.Basavapatna@Sun.COM 			rv = vio_init_multipools(&ldcp->vmp,
553*12011SSriharsha.Basavapatna@Sun.COM 			    VSW_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4,
554*12011SSriharsha.Basavapatna@Sun.COM 			    vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3,
555*12011SSriharsha.Basavapatna@Sun.COM 			    vsw_num_mblks4);
556*12011SSriharsha.Basavapatna@Sun.COM 
557*12011SSriharsha.Basavapatna@Sun.COM 		}
558*12011SSriharsha.Basavapatna@Sun.COM 
559*12011SSriharsha.Basavapatna@Sun.COM 		return (rv);
560*12011SSriharsha.Basavapatna@Sun.COM 	}
561*12011SSriharsha.Basavapatna@Sun.COM 
562*12011SSriharsha.Basavapatna@Sun.COM 	/*
563*12011SSriharsha.Basavapatna@Sun.COM 	 * Pool sizes are not specified. We select the pool sizes based on the
564*12011SSriharsha.Basavapatna@Sun.COM 	 * mtu if vnet_jumbo_rxpools is enabled.
565*12011SSriharsha.Basavapatna@Sun.COM 	 */
566*12011SSriharsha.Basavapatna@Sun.COM 	if (vsw_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) {
567*12011SSriharsha.Basavapatna@Sun.COM 		/*
568*12011SSriharsha.Basavapatna@Sun.COM 		 * Receive buffer pool allocation based on mtu is disabled.
569*12011SSriharsha.Basavapatna@Sun.COM 		 * Use the default mechanism of standard size pool allocation.
570*12011SSriharsha.Basavapatna@Sun.COM 		 */
571*12011SSriharsha.Basavapatna@Sun.COM 		sz1 = VSW_MBLK_SZ_128;
572*12011SSriharsha.Basavapatna@Sun.COM 		sz2 = VSW_MBLK_SZ_256;
573*12011SSriharsha.Basavapatna@Sun.COM 		sz3 = VSW_MBLK_SZ_2048;
574*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->max_rxpool_size = sz3;
575*12011SSriharsha.Basavapatna@Sun.COM 
576*12011SSriharsha.Basavapatna@Sun.COM 		rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS,
577*12011SSriharsha.Basavapatna@Sun.COM 		    sz1, sz2, sz3,
578*12011SSriharsha.Basavapatna@Sun.COM 		    vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3);
579*12011SSriharsha.Basavapatna@Sun.COM 
580*12011SSriharsha.Basavapatna@Sun.COM 		return (rv);
581*12011SSriharsha.Basavapatna@Sun.COM 	}
582*12011SSriharsha.Basavapatna@Sun.COM 
583*12011SSriharsha.Basavapatna@Sun.COM 	switch (data_sz) {
584*12011SSriharsha.Basavapatna@Sun.COM 
585*12011SSriharsha.Basavapatna@Sun.COM 	case VNET_4K:
586*12011SSriharsha.Basavapatna@Sun.COM 
587*12011SSriharsha.Basavapatna@Sun.COM 		sz1 = VSW_MBLK_SZ_128;
588*12011SSriharsha.Basavapatna@Sun.COM 		sz2 = VSW_MBLK_SZ_256;
589*12011SSriharsha.Basavapatna@Sun.COM 		sz3 = VSW_MBLK_SZ_2048;
590*12011SSriharsha.Basavapatna@Sun.COM 		sz4 = sz3 << 1;			/* 4K */
591*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->max_rxpool_size = sz4;
592*12011SSriharsha.Basavapatna@Sun.COM 
593*12011SSriharsha.Basavapatna@Sun.COM 		rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS + 1,
594*12011SSriharsha.Basavapatna@Sun.COM 		    sz1, sz2, sz3, sz4,
595*12011SSriharsha.Basavapatna@Sun.COM 		    vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3,
596*12011SSriharsha.Basavapatna@Sun.COM 		    vsw_num_mblks4);
597*12011SSriharsha.Basavapatna@Sun.COM 		break;
598*12011SSriharsha.Basavapatna@Sun.COM 
599*12011SSriharsha.Basavapatna@Sun.COM 	default:	/* data_sz:  4K+ to 16K */
600*12011SSriharsha.Basavapatna@Sun.COM 
601*12011SSriharsha.Basavapatna@Sun.COM 		sz1 = VSW_MBLK_SZ_256;
602*12011SSriharsha.Basavapatna@Sun.COM 		sz2 = VSW_MBLK_SZ_2048;
603*12011SSriharsha.Basavapatna@Sun.COM 		sz3 = data_sz >> 1;	/* Jumbo-size/2 */
604*12011SSriharsha.Basavapatna@Sun.COM 		sz4 = data_sz;	/* Jumbo-size */
605*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->max_rxpool_size = sz4;
606*12011SSriharsha.Basavapatna@Sun.COM 
607*12011SSriharsha.Basavapatna@Sun.COM 		rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS + 1,
608*12011SSriharsha.Basavapatna@Sun.COM 		    sz1, sz2, sz3, sz4,
609*12011SSriharsha.Basavapatna@Sun.COM 		    vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3,
610*12011SSriharsha.Basavapatna@Sun.COM 		    vsw_num_mblks4);
611*12011SSriharsha.Basavapatna@Sun.COM 		break;
612*12011SSriharsha.Basavapatna@Sun.COM 	}
613*12011SSriharsha.Basavapatna@Sun.COM 
614*12011SSriharsha.Basavapatna@Sun.COM 	return (rv);
615*12011SSriharsha.Basavapatna@Sun.COM 
616*12011SSriharsha.Basavapatna@Sun.COM }
617*12011SSriharsha.Basavapatna@Sun.COM 
618*12011SSriharsha.Basavapatna@Sun.COM /*
619*12011SSriharsha.Basavapatna@Sun.COM  * Generic routine to send message out over ldc channel.
620*12011SSriharsha.Basavapatna@Sun.COM  *
621*12011SSriharsha.Basavapatna@Sun.COM  * It is possible that when we attempt to write over the ldc channel
622*12011SSriharsha.Basavapatna@Sun.COM  * that we get notified that it has been reset. Depending on the value
623*12011SSriharsha.Basavapatna@Sun.COM  * of the handle_reset flag we either handle that event here or simply
624*12011SSriharsha.Basavapatna@Sun.COM  * notify the caller that the channel was reset.
625*12011SSriharsha.Basavapatna@Sun.COM  */
626*12011SSriharsha.Basavapatna@Sun.COM int
vsw_send_msg(vsw_ldc_t * ldcp,void * msgp,int size,boolean_t handle_reset)627*12011SSriharsha.Basavapatna@Sun.COM vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset)
628*12011SSriharsha.Basavapatna@Sun.COM {
629*12011SSriharsha.Basavapatna@Sun.COM 	int			rv;
630*12011SSriharsha.Basavapatna@Sun.COM 	size_t			msglen = size;
631*12011SSriharsha.Basavapatna@Sun.COM 	vio_msg_tag_t		*tag = (vio_msg_tag_t *)msgp;
632*12011SSriharsha.Basavapatna@Sun.COM 	vsw_t			*vswp = ldcp->ldc_vswp;
633*12011SSriharsha.Basavapatna@Sun.COM 	vio_dring_msg_t		*dmsg;
634*12011SSriharsha.Basavapatna@Sun.COM 	vio_raw_data_msg_t	*rmsg;
635*12011SSriharsha.Basavapatna@Sun.COM 	vnet_ibnd_desc_t	*imsg;
636*12011SSriharsha.Basavapatna@Sun.COM 	boolean_t		data_msg = B_FALSE;
637*12011SSriharsha.Basavapatna@Sun.COM 	int			retries = vsw_wretries;
638*12011SSriharsha.Basavapatna@Sun.COM 
639*12011SSriharsha.Basavapatna@Sun.COM 	D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes",
640*12011SSriharsha.Basavapatna@Sun.COM 	    ldcp->ldc_id, size);
641*12011SSriharsha.Basavapatna@Sun.COM 
642*12011SSriharsha.Basavapatna@Sun.COM 	D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype);
643*12011SSriharsha.Basavapatna@Sun.COM 	D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype);
644*12011SSriharsha.Basavapatna@Sun.COM 	D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env);
645*12011SSriharsha.Basavapatna@Sun.COM 
646*12011SSriharsha.Basavapatna@Sun.COM 	mutex_enter(&ldcp->ldc_txlock);
647*12011SSriharsha.Basavapatna@Sun.COM 
648*12011SSriharsha.Basavapatna@Sun.COM 	if (tag->vio_subtype == VIO_SUBTYPE_INFO) {
649*12011SSriharsha.Basavapatna@Sun.COM 		if (tag->vio_subtype_env == VIO_DRING_DATA) {
650*12011SSriharsha.Basavapatna@Sun.COM 			dmsg = (vio_dring_msg_t *)tag;
651*12011SSriharsha.Basavapatna@Sun.COM 			dmsg->seq_num = ldcp->lane_out.seq_num;
652*12011SSriharsha.Basavapatna@Sun.COM 			data_msg = B_TRUE;
653*12011SSriharsha.Basavapatna@Sun.COM 		} else if (tag->vio_subtype_env == VIO_PKT_DATA) {
654*12011SSriharsha.Basavapatna@Sun.COM 			rmsg = (vio_raw_data_msg_t *)tag;
655*12011SSriharsha.Basavapatna@Sun.COM 			rmsg->seq_num = ldcp->lane_out.seq_num;
656*12011SSriharsha.Basavapatna@Sun.COM 			data_msg = B_TRUE;
657*12011SSriharsha.Basavapatna@Sun.COM 		} else if (tag->vio_subtype_env == VIO_DESC_DATA) {
658*12011SSriharsha.Basavapatna@Sun.COM 			imsg = (vnet_ibnd_desc_t *)tag;
659*12011SSriharsha.Basavapatna@Sun.COM 			imsg->hdr.seq_num = ldcp->lane_out.seq_num;
660*12011SSriharsha.Basavapatna@Sun.COM 			data_msg = B_TRUE;
661*12011SSriharsha.Basavapatna@Sun.COM 		}
662*12011SSriharsha.Basavapatna@Sun.COM 	}
663*12011SSriharsha.Basavapatna@Sun.COM 
664*12011SSriharsha.Basavapatna@Sun.COM 	do {
665*12011SSriharsha.Basavapatna@Sun.COM 		msglen = size;
666*12011SSriharsha.Basavapatna@Sun.COM 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen);
667*12011SSriharsha.Basavapatna@Sun.COM 	} while (rv == EWOULDBLOCK && --retries > 0);
668*12011SSriharsha.Basavapatna@Sun.COM 
669*12011SSriharsha.Basavapatna@Sun.COM 	if (rv == 0 && data_msg == B_TRUE) {
670*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->lane_out.seq_num++;
671*12011SSriharsha.Basavapatna@Sun.COM 	}
672*12011SSriharsha.Basavapatna@Sun.COM 
673*12011SSriharsha.Basavapatna@Sun.COM 	if ((rv != 0) || (msglen != size)) {
674*12011SSriharsha.Basavapatna@Sun.COM 		DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) "
675*12011SSriharsha.Basavapatna@Sun.COM 		    "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen);
676*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->ldc_stats.oerrors++;
677*12011SSriharsha.Basavapatna@Sun.COM 	}
678*12011SSriharsha.Basavapatna@Sun.COM 
679*12011SSriharsha.Basavapatna@Sun.COM 	mutex_exit(&ldcp->ldc_txlock);
680*12011SSriharsha.Basavapatna@Sun.COM 
681*12011SSriharsha.Basavapatna@Sun.COM 	/*
682*12011SSriharsha.Basavapatna@Sun.COM 	 * If channel has been reset we either handle it here or
683*12011SSriharsha.Basavapatna@Sun.COM 	 * simply report back that it has been reset and let caller
684*12011SSriharsha.Basavapatna@Sun.COM 	 * decide what to do.
685*12011SSriharsha.Basavapatna@Sun.COM 	 */
686*12011SSriharsha.Basavapatna@Sun.COM 	if (rv == ECONNRESET) {
687*12011SSriharsha.Basavapatna@Sun.COM 		DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id);
688*12011SSriharsha.Basavapatna@Sun.COM 
689*12011SSriharsha.Basavapatna@Sun.COM 		if (handle_reset) {
690*12011SSriharsha.Basavapatna@Sun.COM 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
691*12011SSriharsha.Basavapatna@Sun.COM 		}
692*12011SSriharsha.Basavapatna@Sun.COM 	}
693*12011SSriharsha.Basavapatna@Sun.COM 
694*12011SSriharsha.Basavapatna@Sun.COM 	return (rv);
695*12011SSriharsha.Basavapatna@Sun.COM }
696*12011SSriharsha.Basavapatna@Sun.COM 
697*12011SSriharsha.Basavapatna@Sun.COM /*
698*12011SSriharsha.Basavapatna@Sun.COM  * A per LDC worker thread to process ldc messages. This thread is woken up by
699*12011SSriharsha.Basavapatna@Sun.COM  * the LDC interrupt handler to process LDC packets and receive data.
700*12011SSriharsha.Basavapatna@Sun.COM  */
701*12011SSriharsha.Basavapatna@Sun.COM void
vsw_ldc_msg_worker(void * arg)702*12011SSriharsha.Basavapatna@Sun.COM vsw_ldc_msg_worker(void *arg)
703*12011SSriharsha.Basavapatna@Sun.COM {
704*12011SSriharsha.Basavapatna@Sun.COM 	callb_cpr_t	cprinfo;
705*12011SSriharsha.Basavapatna@Sun.COM 	vsw_ldc_t	*ldcp = (vsw_ldc_t *)arg;
706*12011SSriharsha.Basavapatna@Sun.COM 	vsw_t		*vswp = ldcp->ldc_vswp;
707*12011SSriharsha.Basavapatna@Sun.COM 
708*12011SSriharsha.Basavapatna@Sun.COM 	D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id);
709*12011SSriharsha.Basavapatna@Sun.COM 	CALLB_CPR_INIT(&cprinfo, &ldcp->msg_thr_lock, callb_generic_cpr,
710*12011SSriharsha.Basavapatna@Sun.COM 	    "vsw_msg_thread");
711*12011SSriharsha.Basavapatna@Sun.COM 	mutex_enter(&ldcp->msg_thr_lock);
712*12011SSriharsha.Basavapatna@Sun.COM 	while (!(ldcp->msg_thr_flags & VSW_WTHR_STOP)) {
713*12011SSriharsha.Basavapatna@Sun.COM 
714*12011SSriharsha.Basavapatna@Sun.COM 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
715*12011SSriharsha.Basavapatna@Sun.COM 		/*
716*12011SSriharsha.Basavapatna@Sun.COM 		 * Wait until the data is received or a stop
717*12011SSriharsha.Basavapatna@Sun.COM 		 * request is received.
718*12011SSriharsha.Basavapatna@Sun.COM 		 */
719*12011SSriharsha.Basavapatna@Sun.COM 		while (!(ldcp->msg_thr_flags &
720*12011SSriharsha.Basavapatna@Sun.COM 		    (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) {
721*12011SSriharsha.Basavapatna@Sun.COM 			cv_wait(&ldcp->msg_thr_cv, &ldcp->msg_thr_lock);
722*12011SSriharsha.Basavapatna@Sun.COM 		}
723*12011SSriharsha.Basavapatna@Sun.COM 		CALLB_CPR_SAFE_END(&cprinfo, &ldcp->msg_thr_lock)
724*12011SSriharsha.Basavapatna@Sun.COM 
725*12011SSriharsha.Basavapatna@Sun.COM 		/*
726*12011SSriharsha.Basavapatna@Sun.COM 		 * First process the stop request.
727*12011SSriharsha.Basavapatna@Sun.COM 		 */
728*12011SSriharsha.Basavapatna@Sun.COM 		if (ldcp->msg_thr_flags & VSW_WTHR_STOP) {
729*12011SSriharsha.Basavapatna@Sun.COM 			D2(vswp, "%s(%lld):Rx thread stopped\n",
730*12011SSriharsha.Basavapatna@Sun.COM 			    __func__, ldcp->ldc_id);
731*12011SSriharsha.Basavapatna@Sun.COM 			break;
732*12011SSriharsha.Basavapatna@Sun.COM 		}
733*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->msg_thr_flags &= ~VSW_WTHR_DATARCVD;
734*12011SSriharsha.Basavapatna@Sun.COM 		mutex_exit(&ldcp->msg_thr_lock);
735*12011SSriharsha.Basavapatna@Sun.COM 		D1(vswp, "%s(%lld):calling vsw_process_pkt\n",
736*12011SSriharsha.Basavapatna@Sun.COM 		    __func__, ldcp->ldc_id);
737*12011SSriharsha.Basavapatna@Sun.COM 		mutex_enter(&ldcp->ldc_cblock);
738*12011SSriharsha.Basavapatna@Sun.COM 		vsw_process_pkt(ldcp);
739*12011SSriharsha.Basavapatna@Sun.COM 		mutex_exit(&ldcp->ldc_cblock);
740*12011SSriharsha.Basavapatna@Sun.COM 		mutex_enter(&ldcp->msg_thr_lock);
741*12011SSriharsha.Basavapatna@Sun.COM 	}
742*12011SSriharsha.Basavapatna@Sun.COM 
743*12011SSriharsha.Basavapatna@Sun.COM 	/*
744*12011SSriharsha.Basavapatna@Sun.COM 	 * Update the run status and wakeup the thread that
745*12011SSriharsha.Basavapatna@Sun.COM 	 * has sent the stop request.
746*12011SSriharsha.Basavapatna@Sun.COM 	 */
747*12011SSriharsha.Basavapatna@Sun.COM 	ldcp->msg_thr_flags &= ~VSW_WTHR_STOP;
748*12011SSriharsha.Basavapatna@Sun.COM 	ldcp->msg_thread = NULL;
749*12011SSriharsha.Basavapatna@Sun.COM 	CALLB_CPR_EXIT(&cprinfo);
750*12011SSriharsha.Basavapatna@Sun.COM 	D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id);
751*12011SSriharsha.Basavapatna@Sun.COM 	thread_exit();
752*12011SSriharsha.Basavapatna@Sun.COM }
753*12011SSriharsha.Basavapatna@Sun.COM 
754*12011SSriharsha.Basavapatna@Sun.COM /* Co-ordinate with msg processing thread to stop it */
755*12011SSriharsha.Basavapatna@Sun.COM void
vsw_stop_msg_thread(vsw_ldc_t * ldcp)756*12011SSriharsha.Basavapatna@Sun.COM vsw_stop_msg_thread(vsw_ldc_t *ldcp)
757*12011SSriharsha.Basavapatna@Sun.COM {
758*12011SSriharsha.Basavapatna@Sun.COM 	kt_did_t	tid = 0;
759*12011SSriharsha.Basavapatna@Sun.COM 	vsw_t		*vswp = ldcp->ldc_vswp;
760*12011SSriharsha.Basavapatna@Sun.COM 
761*12011SSriharsha.Basavapatna@Sun.COM 	D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id);
762*12011SSriharsha.Basavapatna@Sun.COM 	/*
763*12011SSriharsha.Basavapatna@Sun.COM 	 * Send a stop request by setting the stop flag and
764*12011SSriharsha.Basavapatna@Sun.COM 	 * wait until the msg process thread stops.
765*12011SSriharsha.Basavapatna@Sun.COM 	 */
766*12011SSriharsha.Basavapatna@Sun.COM 	mutex_enter(&ldcp->msg_thr_lock);
767*12011SSriharsha.Basavapatna@Sun.COM 	if (ldcp->msg_thread != NULL) {
768*12011SSriharsha.Basavapatna@Sun.COM 		tid = ldcp->msg_thread->t_did;
769*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->msg_thr_flags |= VSW_WTHR_STOP;
770*12011SSriharsha.Basavapatna@Sun.COM 		cv_signal(&ldcp->msg_thr_cv);
771*12011SSriharsha.Basavapatna@Sun.COM 	}
772*12011SSriharsha.Basavapatna@Sun.COM 	mutex_exit(&ldcp->msg_thr_lock);
773*12011SSriharsha.Basavapatna@Sun.COM 
774*12011SSriharsha.Basavapatna@Sun.COM 	if (tid != 0) {
775*12011SSriharsha.Basavapatna@Sun.COM 		thread_join(tid);
776*12011SSriharsha.Basavapatna@Sun.COM 	}
777*12011SSriharsha.Basavapatna@Sun.COM 	D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id);
778*12011SSriharsha.Basavapatna@Sun.COM }
779*12011SSriharsha.Basavapatna@Sun.COM 
780*12011SSriharsha.Basavapatna@Sun.COM /*
781*12011SSriharsha.Basavapatna@Sun.COM  * Send packet out via descriptor ring to a logical device.
782*12011SSriharsha.Basavapatna@Sun.COM  */
783*12011SSriharsha.Basavapatna@Sun.COM int
vsw_dringsend(vsw_ldc_t * ldcp,mblk_t * mp)784*12011SSriharsha.Basavapatna@Sun.COM vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp)
785*12011SSriharsha.Basavapatna@Sun.COM {
786*12011SSriharsha.Basavapatna@Sun.COM 	vio_dring_msg_t		dring_pkt;
787*12011SSriharsha.Basavapatna@Sun.COM 	dring_info_t		*dp = NULL;
788*12011SSriharsha.Basavapatna@Sun.COM 	vsw_private_desc_t	*priv_desc = NULL;
789*12011SSriharsha.Basavapatna@Sun.COM 	vnet_public_desc_t	*pub = NULL;
790*12011SSriharsha.Basavapatna@Sun.COM 	vsw_t			*vswp = ldcp->ldc_vswp;
791*12011SSriharsha.Basavapatna@Sun.COM 	mblk_t			*bp;
792*12011SSriharsha.Basavapatna@Sun.COM 	size_t			n, size;
793*12011SSriharsha.Basavapatna@Sun.COM 	caddr_t			bufp;
794*12011SSriharsha.Basavapatna@Sun.COM 	int			idx;
795*12011SSriharsha.Basavapatna@Sun.COM 	int			status = LDC_TX_SUCCESS;
796*12011SSriharsha.Basavapatna@Sun.COM 	struct ether_header	*ehp = (struct ether_header *)mp->b_rptr;
797*12011SSriharsha.Basavapatna@Sun.COM 	lane_t			*lp = &ldcp->lane_out;
798*12011SSriharsha.Basavapatna@Sun.COM 
799*12011SSriharsha.Basavapatna@Sun.COM 	D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id);
800*12011SSriharsha.Basavapatna@Sun.COM 
801*12011SSriharsha.Basavapatna@Sun.COM 	/* TODO: make test a macro */
802*12011SSriharsha.Basavapatna@Sun.COM 	if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) ||
803*12011SSriharsha.Basavapatna@Sun.COM 	    (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) {
804*12011SSriharsha.Basavapatna@Sun.COM 		DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping "
805*12011SSriharsha.Basavapatna@Sun.COM 		    "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status,
806*12011SSriharsha.Basavapatna@Sun.COM 		    ldcp->lane_out.lstate);
807*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->ldc_stats.oerrors++;
808*12011SSriharsha.Basavapatna@Sun.COM 		return (LDC_TX_FAILURE);
809*12011SSriharsha.Basavapatna@Sun.COM 	}
810*12011SSriharsha.Basavapatna@Sun.COM 
811*12011SSriharsha.Basavapatna@Sun.COM 	if ((dp = ldcp->lane_out.dringp) == NULL) {
812*12011SSriharsha.Basavapatna@Sun.COM 		DERR(vswp, "%s(%lld): no dring for outbound lane on"
813*12011SSriharsha.Basavapatna@Sun.COM 		    " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id);
814*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->ldc_stats.oerrors++;
815*12011SSriharsha.Basavapatna@Sun.COM 		return (LDC_TX_FAILURE);
816*12011SSriharsha.Basavapatna@Sun.COM 	}
817*12011SSriharsha.Basavapatna@Sun.COM 
818*12011SSriharsha.Basavapatna@Sun.COM 	size = msgsize(mp);
819*12011SSriharsha.Basavapatna@Sun.COM 	if (size > (size_t)lp->mtu) {
820*12011SSriharsha.Basavapatna@Sun.COM 		DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__,
821*12011SSriharsha.Basavapatna@Sun.COM 		    ldcp->ldc_id, size);
822*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->ldc_stats.oerrors++;
823*12011SSriharsha.Basavapatna@Sun.COM 		return (LDC_TX_FAILURE);
824*12011SSriharsha.Basavapatna@Sun.COM 	}
825*12011SSriharsha.Basavapatna@Sun.COM 
826*12011SSriharsha.Basavapatna@Sun.COM 	/*
827*12011SSriharsha.Basavapatna@Sun.COM 	 * Find a free descriptor
828*12011SSriharsha.Basavapatna@Sun.COM 	 *
829*12011SSriharsha.Basavapatna@Sun.COM 	 * Note: for the moment we are assuming that we will only
830*12011SSriharsha.Basavapatna@Sun.COM 	 * have one dring going from the switch to each of its
831*12011SSriharsha.Basavapatna@Sun.COM 	 * peers. This may change in the future.
832*12011SSriharsha.Basavapatna@Sun.COM 	 */
833*12011SSriharsha.Basavapatna@Sun.COM 	if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) {
834*12011SSriharsha.Basavapatna@Sun.COM 		D2(vswp, "%s(%lld): no descriptor available for ring "
835*12011SSriharsha.Basavapatna@Sun.COM 		    "at 0x%llx", __func__, ldcp->ldc_id, dp);
836*12011SSriharsha.Basavapatna@Sun.COM 
837*12011SSriharsha.Basavapatna@Sun.COM 		/* nothing more we can do */
838*12011SSriharsha.Basavapatna@Sun.COM 		status = LDC_TX_NORESOURCES;
839*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->ldc_stats.tx_no_desc++;
840*12011SSriharsha.Basavapatna@Sun.COM 		goto vsw_dringsend_free_exit;
841*12011SSriharsha.Basavapatna@Sun.COM 	} else {
842*12011SSriharsha.Basavapatna@Sun.COM 		D2(vswp, "%s(%lld): free private descriptor found at pos %ld "
843*12011SSriharsha.Basavapatna@Sun.COM 		    "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc);
844*12011SSriharsha.Basavapatna@Sun.COM 	}
845*12011SSriharsha.Basavapatna@Sun.COM 
846*12011SSriharsha.Basavapatna@Sun.COM 	/* copy data into the descriptor */
847*12011SSriharsha.Basavapatna@Sun.COM 	bufp = priv_desc->datap;
848*12011SSriharsha.Basavapatna@Sun.COM 	bufp += VNET_IPALIGN;
849*12011SSriharsha.Basavapatna@Sun.COM 	for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) {
850*12011SSriharsha.Basavapatna@Sun.COM 		n = MBLKL(bp);
851*12011SSriharsha.Basavapatna@Sun.COM 		bcopy(bp->b_rptr, bufp, n);
852*12011SSriharsha.Basavapatna@Sun.COM 		bufp += n;
853*12011SSriharsha.Basavapatna@Sun.COM 	}
854*12011SSriharsha.Basavapatna@Sun.COM 
855*12011SSriharsha.Basavapatna@Sun.COM 	priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size;
856*12011SSriharsha.Basavapatna@Sun.COM 
857*12011SSriharsha.Basavapatna@Sun.COM 	pub = priv_desc->descp;
858*12011SSriharsha.Basavapatna@Sun.COM 	pub->nbytes = priv_desc->datalen;
859*12011SSriharsha.Basavapatna@Sun.COM 
860*12011SSriharsha.Basavapatna@Sun.COM 	/* update statistics */
861*12011SSriharsha.Basavapatna@Sun.COM 	if (IS_BROADCAST(ehp))
862*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->ldc_stats.brdcstxmt++;
863*12011SSriharsha.Basavapatna@Sun.COM 	else if (IS_MULTICAST(ehp))
864*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->ldc_stats.multixmt++;
865*12011SSriharsha.Basavapatna@Sun.COM 	ldcp->ldc_stats.opackets++;
866*12011SSriharsha.Basavapatna@Sun.COM 	ldcp->ldc_stats.obytes += priv_desc->datalen;
867*12011SSriharsha.Basavapatna@Sun.COM 
868*12011SSriharsha.Basavapatna@Sun.COM 	mutex_enter(&priv_desc->dstate_lock);
869*12011SSriharsha.Basavapatna@Sun.COM 	pub->hdr.dstate = VIO_DESC_READY;
870*12011SSriharsha.Basavapatna@Sun.COM 	mutex_exit(&priv_desc->dstate_lock);
871*12011SSriharsha.Basavapatna@Sun.COM 
872*12011SSriharsha.Basavapatna@Sun.COM 	/*
873*12011SSriharsha.Basavapatna@Sun.COM 	 * Determine whether or not we need to send a message to our
874*12011SSriharsha.Basavapatna@Sun.COM 	 * peer prompting them to read our newly updated descriptor(s).
875*12011SSriharsha.Basavapatna@Sun.COM 	 */
876*12011SSriharsha.Basavapatna@Sun.COM 	mutex_enter(&dp->restart_lock);
877*12011SSriharsha.Basavapatna@Sun.COM 	if (dp->restart_reqd) {
878*12011SSriharsha.Basavapatna@Sun.COM 		dp->restart_reqd = B_FALSE;
879*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->ldc_stats.dring_data_msgs_sent++;
880*12011SSriharsha.Basavapatna@Sun.COM 		mutex_exit(&dp->restart_lock);
881*12011SSriharsha.Basavapatna@Sun.COM 
882*12011SSriharsha.Basavapatna@Sun.COM 		/*
883*12011SSriharsha.Basavapatna@Sun.COM 		 * Send a vio_dring_msg to peer to prompt them to read
884*12011SSriharsha.Basavapatna@Sun.COM 		 * the updated descriptor ring.
885*12011SSriharsha.Basavapatna@Sun.COM 		 */
886*12011SSriharsha.Basavapatna@Sun.COM 		dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA;
887*12011SSriharsha.Basavapatna@Sun.COM 		dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
888*12011SSriharsha.Basavapatna@Sun.COM 		dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA;
889*12011SSriharsha.Basavapatna@Sun.COM 		dring_pkt.tag.vio_sid = ldcp->local_session;
890*12011SSriharsha.Basavapatna@Sun.COM 
891*12011SSriharsha.Basavapatna@Sun.COM 		/* Note - for now using first ring */
892*12011SSriharsha.Basavapatna@Sun.COM 		dring_pkt.dring_ident = dp->ident;
893*12011SSriharsha.Basavapatna@Sun.COM 
894*12011SSriharsha.Basavapatna@Sun.COM 		/*
895*12011SSriharsha.Basavapatna@Sun.COM 		 * If last_ack_recv is -1 then we know we've not
896*12011SSriharsha.Basavapatna@Sun.COM 		 * received any ack's yet, so this must be the first
897*12011SSriharsha.Basavapatna@Sun.COM 		 * msg sent, so set the start to the begining of the ring.
898*12011SSriharsha.Basavapatna@Sun.COM 		 */
899*12011SSriharsha.Basavapatna@Sun.COM 		mutex_enter(&dp->dlock);
900*12011SSriharsha.Basavapatna@Sun.COM 		if (dp->last_ack_recv == -1) {
901*12011SSriharsha.Basavapatna@Sun.COM 			dring_pkt.start_idx = 0;
902*12011SSriharsha.Basavapatna@Sun.COM 		} else {
903*12011SSriharsha.Basavapatna@Sun.COM 			dring_pkt.start_idx =
904*12011SSriharsha.Basavapatna@Sun.COM 			    (dp->last_ack_recv + 1) % dp->num_descriptors;
905*12011SSriharsha.Basavapatna@Sun.COM 		}
906*12011SSriharsha.Basavapatna@Sun.COM 		dring_pkt.end_idx = -1;
907*12011SSriharsha.Basavapatna@Sun.COM 		mutex_exit(&dp->dlock);
908*12011SSriharsha.Basavapatna@Sun.COM 
909*12011SSriharsha.Basavapatna@Sun.COM 		D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__,
910*12011SSriharsha.Basavapatna@Sun.COM 		    ldcp->ldc_id, dp, dring_pkt.dring_ident);
911*12011SSriharsha.Basavapatna@Sun.COM 		D3(vswp, "%s(%lld): start %lld : end %lld :\n",
912*12011SSriharsha.Basavapatna@Sun.COM 		    __func__, ldcp->ldc_id, dring_pkt.start_idx,
913*12011SSriharsha.Basavapatna@Sun.COM 		    dring_pkt.end_idx);
914*12011SSriharsha.Basavapatna@Sun.COM 
915*12011SSriharsha.Basavapatna@Sun.COM 		(void) vsw_send_msg(ldcp, (void *)&dring_pkt,
916*12011SSriharsha.Basavapatna@Sun.COM 		    sizeof (vio_dring_msg_t), B_TRUE);
917*12011SSriharsha.Basavapatna@Sun.COM 
918*12011SSriharsha.Basavapatna@Sun.COM 		return (status);
919*12011SSriharsha.Basavapatna@Sun.COM 
920*12011SSriharsha.Basavapatna@Sun.COM 	} else {
921*12011SSriharsha.Basavapatna@Sun.COM 		mutex_exit(&dp->restart_lock);
922*12011SSriharsha.Basavapatna@Sun.COM 		D2(vswp, "%s(%lld): updating descp %d", __func__,
923*12011SSriharsha.Basavapatna@Sun.COM 		    ldcp->ldc_id, idx);
924*12011SSriharsha.Basavapatna@Sun.COM 	}
925*12011SSriharsha.Basavapatna@Sun.COM 
926*12011SSriharsha.Basavapatna@Sun.COM vsw_dringsend_free_exit:
927*12011SSriharsha.Basavapatna@Sun.COM 
928*12011SSriharsha.Basavapatna@Sun.COM 	D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id);
929*12011SSriharsha.Basavapatna@Sun.COM 	return (status);
930*12011SSriharsha.Basavapatna@Sun.COM }
931*12011SSriharsha.Basavapatna@Sun.COM 
932*12011SSriharsha.Basavapatna@Sun.COM /*
933*12011SSriharsha.Basavapatna@Sun.COM  * Searches the private section of a ring for a free descriptor,
934*12011SSriharsha.Basavapatna@Sun.COM  * starting at the location of the last free descriptor found
935*12011SSriharsha.Basavapatna@Sun.COM  * previously.
936*12011SSriharsha.Basavapatna@Sun.COM  *
937*12011SSriharsha.Basavapatna@Sun.COM  * Returns 0 if free descriptor is available, and updates state
938*12011SSriharsha.Basavapatna@Sun.COM  * of private descriptor to VIO_DESC_READY,  otherwise returns 1.
939*12011SSriharsha.Basavapatna@Sun.COM  *
940*12011SSriharsha.Basavapatna@Sun.COM  * FUTURE: might need to return contiguous range of descriptors
941*12011SSriharsha.Basavapatna@Sun.COM  * as dring info msg assumes all will be contiguous.
942*12011SSriharsha.Basavapatna@Sun.COM  */
943*12011SSriharsha.Basavapatna@Sun.COM int
vsw_dring_find_free_desc(dring_info_t * dringp,vsw_private_desc_t ** priv_p,int * idx)944*12011SSriharsha.Basavapatna@Sun.COM vsw_dring_find_free_desc(dring_info_t *dringp,
945*12011SSriharsha.Basavapatna@Sun.COM 		vsw_private_desc_t **priv_p, int *idx)
946*12011SSriharsha.Basavapatna@Sun.COM {
947*12011SSriharsha.Basavapatna@Sun.COM 	vsw_private_desc_t	*addr = NULL;
948*12011SSriharsha.Basavapatna@Sun.COM 	int			num = vsw_num_descriptors;
949*12011SSriharsha.Basavapatna@Sun.COM 	int			ret = 1;
950*12011SSriharsha.Basavapatna@Sun.COM 
951*12011SSriharsha.Basavapatna@Sun.COM 	D1(NULL, "%s enter\n", __func__);
952*12011SSriharsha.Basavapatna@Sun.COM 
953*12011SSriharsha.Basavapatna@Sun.COM 	ASSERT(dringp->priv_addr != NULL);
954*12011SSriharsha.Basavapatna@Sun.COM 
955*12011SSriharsha.Basavapatna@Sun.COM 	D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld",
956*12011SSriharsha.Basavapatna@Sun.COM 	    __func__, dringp, dringp->end_idx);
957*12011SSriharsha.Basavapatna@Sun.COM 
958*12011SSriharsha.Basavapatna@Sun.COM 	addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx;
959*12011SSriharsha.Basavapatna@Sun.COM 
960*12011SSriharsha.Basavapatna@Sun.COM 	mutex_enter(&addr->dstate_lock);
961*12011SSriharsha.Basavapatna@Sun.COM 	if (addr->dstate == VIO_DESC_FREE) {
962*12011SSriharsha.Basavapatna@Sun.COM 		addr->dstate = VIO_DESC_READY;
963*12011SSriharsha.Basavapatna@Sun.COM 		*priv_p = addr;
964*12011SSriharsha.Basavapatna@Sun.COM 		*idx = dringp->end_idx;
965*12011SSriharsha.Basavapatna@Sun.COM 		dringp->end_idx = (dringp->end_idx + 1) % num;
966*12011SSriharsha.Basavapatna@Sun.COM 		ret = 0;
967*12011SSriharsha.Basavapatna@Sun.COM 
968*12011SSriharsha.Basavapatna@Sun.COM 	}
969*12011SSriharsha.Basavapatna@Sun.COM 	mutex_exit(&addr->dstate_lock);
970*12011SSriharsha.Basavapatna@Sun.COM 
971*12011SSriharsha.Basavapatna@Sun.COM 	/* ring full */
972*12011SSriharsha.Basavapatna@Sun.COM 	if (ret == 1) {
973*12011SSriharsha.Basavapatna@Sun.COM 		D2(NULL, "%s: no desp free: started at %d", __func__,
974*12011SSriharsha.Basavapatna@Sun.COM 		    dringp->end_idx);
975*12011SSriharsha.Basavapatna@Sun.COM 	}
976*12011SSriharsha.Basavapatna@Sun.COM 
977*12011SSriharsha.Basavapatna@Sun.COM 	D1(NULL, "%s: exit\n", __func__);
978*12011SSriharsha.Basavapatna@Sun.COM 
979*12011SSriharsha.Basavapatna@Sun.COM 	return (ret);
980*12011SSriharsha.Basavapatna@Sun.COM }
981*12011SSriharsha.Basavapatna@Sun.COM 
982*12011SSriharsha.Basavapatna@Sun.COM /* vsw_reclaim_dring -- reclaim descriptors */
983*12011SSriharsha.Basavapatna@Sun.COM int
vsw_reclaim_dring(dring_info_t * dp,int start)984*12011SSriharsha.Basavapatna@Sun.COM vsw_reclaim_dring(dring_info_t *dp, int start)
985*12011SSriharsha.Basavapatna@Sun.COM {
986*12011SSriharsha.Basavapatna@Sun.COM 	int i, j, len;
987*12011SSriharsha.Basavapatna@Sun.COM 	vsw_private_desc_t *priv_addr;
988*12011SSriharsha.Basavapatna@Sun.COM 	vnet_public_desc_t *pub_addr;
989*12011SSriharsha.Basavapatna@Sun.COM 
990*12011SSriharsha.Basavapatna@Sun.COM 	pub_addr = (vnet_public_desc_t *)dp->pub_addr;
991*12011SSriharsha.Basavapatna@Sun.COM 	priv_addr = (vsw_private_desc_t *)dp->priv_addr;
992*12011SSriharsha.Basavapatna@Sun.COM 	len = dp->num_descriptors;
993*12011SSriharsha.Basavapatna@Sun.COM 
994*12011SSriharsha.Basavapatna@Sun.COM 	D2(NULL, "%s: start index %ld\n", __func__, start);
995*12011SSriharsha.Basavapatna@Sun.COM 
996*12011SSriharsha.Basavapatna@Sun.COM 	j = 0;
997*12011SSriharsha.Basavapatna@Sun.COM 	for (i = start; j < len; i = (i + 1) % len, j++) {
998*12011SSriharsha.Basavapatna@Sun.COM 		pub_addr = (vnet_public_desc_t *)dp->pub_addr + i;
999*12011SSriharsha.Basavapatna@Sun.COM 		priv_addr = (vsw_private_desc_t *)dp->priv_addr + i;
1000*12011SSriharsha.Basavapatna@Sun.COM 
1001*12011SSriharsha.Basavapatna@Sun.COM 		mutex_enter(&priv_addr->dstate_lock);
1002*12011SSriharsha.Basavapatna@Sun.COM 		if (pub_addr->hdr.dstate != VIO_DESC_DONE) {
1003*12011SSriharsha.Basavapatna@Sun.COM 			mutex_exit(&priv_addr->dstate_lock);
1004*12011SSriharsha.Basavapatna@Sun.COM 			break;
1005*12011SSriharsha.Basavapatna@Sun.COM 		}
1006*12011SSriharsha.Basavapatna@Sun.COM 		pub_addr->hdr.dstate = VIO_DESC_FREE;
1007*12011SSriharsha.Basavapatna@Sun.COM 		priv_addr->dstate = VIO_DESC_FREE;
1008*12011SSriharsha.Basavapatna@Sun.COM 		/* clear all the fields */
1009*12011SSriharsha.Basavapatna@Sun.COM 		priv_addr->datalen = 0;
1010*12011SSriharsha.Basavapatna@Sun.COM 		pub_addr->hdr.ack = 0;
1011*12011SSriharsha.Basavapatna@Sun.COM 		mutex_exit(&priv_addr->dstate_lock);
1012*12011SSriharsha.Basavapatna@Sun.COM 
1013*12011SSriharsha.Basavapatna@Sun.COM 		D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx",
1014*12011SSriharsha.Basavapatna@Sun.COM 		    i, pub_addr->hdr.dstate, priv_addr->dstate);
1015*12011SSriharsha.Basavapatna@Sun.COM 	}
1016*12011SSriharsha.Basavapatna@Sun.COM 	return (j);
1017*12011SSriharsha.Basavapatna@Sun.COM }
1018*12011SSriharsha.Basavapatna@Sun.COM 
1019*12011SSriharsha.Basavapatna@Sun.COM void
vsw_process_dringdata(void * arg,void * dpkt)1020*12011SSriharsha.Basavapatna@Sun.COM vsw_process_dringdata(void *arg, void *dpkt)
1021*12011SSriharsha.Basavapatna@Sun.COM {
1022*12011SSriharsha.Basavapatna@Sun.COM 	vsw_ldc_t		*ldcp = arg;
1023*12011SSriharsha.Basavapatna@Sun.COM 	vio_dring_msg_t		*dring_pkt;
1024*12011SSriharsha.Basavapatna@Sun.COM 	vnet_public_desc_t	desc, *pub_addr = NULL;
1025*12011SSriharsha.Basavapatna@Sun.COM 	vsw_private_desc_t	*priv_addr = NULL;
1026*12011SSriharsha.Basavapatna@Sun.COM 	dring_info_t		*dp = NULL;
1027*12011SSriharsha.Basavapatna@Sun.COM 	vsw_t			*vswp = ldcp->ldc_vswp;
1028*12011SSriharsha.Basavapatna@Sun.COM 	mblk_t			*mp = NULL;
1029*12011SSriharsha.Basavapatna@Sun.COM 	vio_mblk_t		*vmp = NULL;
1030*12011SSriharsha.Basavapatna@Sun.COM 	mblk_t			*bp = NULL;
1031*12011SSriharsha.Basavapatna@Sun.COM 	mblk_t			*bpt = NULL;
1032*12011SSriharsha.Basavapatna@Sun.COM 	size_t			nbytes = 0;
1033*12011SSriharsha.Basavapatna@Sun.COM 	uint64_t		chain = 0;
1034*12011SSriharsha.Basavapatna@Sun.COM 	uint64_t		len;
1035*12011SSriharsha.Basavapatna@Sun.COM 	uint32_t		pos, start;
1036*12011SSriharsha.Basavapatna@Sun.COM 	uint32_t		range_start, range_end;
1037*12011SSriharsha.Basavapatna@Sun.COM 	int32_t			end, num, cnt = 0;
1038*12011SSriharsha.Basavapatna@Sun.COM 	int			i, rv, rng_rv = 0, msg_rv = 0;
1039*12011SSriharsha.Basavapatna@Sun.COM 	boolean_t		prev_desc_ack = B_FALSE;
1040*12011SSriharsha.Basavapatna@Sun.COM 	int			read_attempts = 0;
1041*12011SSriharsha.Basavapatna@Sun.COM 	struct ether_header	*ehp;
1042*12011SSriharsha.Basavapatna@Sun.COM 	lane_t			*lp = &ldcp->lane_out;
1043*12011SSriharsha.Basavapatna@Sun.COM 
1044*12011SSriharsha.Basavapatna@Sun.COM 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
1045*12011SSriharsha.Basavapatna@Sun.COM 
1046*12011SSriharsha.Basavapatna@Sun.COM 	/*
1047*12011SSriharsha.Basavapatna@Sun.COM 	 * We know this is a data/dring packet so
1048*12011SSriharsha.Basavapatna@Sun.COM 	 * cast it into the correct structure.
1049*12011SSriharsha.Basavapatna@Sun.COM 	 */
1050*12011SSriharsha.Basavapatna@Sun.COM 	dring_pkt = (vio_dring_msg_t *)dpkt;
1051*12011SSriharsha.Basavapatna@Sun.COM 
1052*12011SSriharsha.Basavapatna@Sun.COM 	/*
1053*12011SSriharsha.Basavapatna@Sun.COM 	 * Switch on the vio_subtype. If its INFO then we need to
1054*12011SSriharsha.Basavapatna@Sun.COM 	 * process the data. If its an ACK we need to make sure
1055*12011SSriharsha.Basavapatna@Sun.COM 	 * it makes sense (i.e did we send an earlier data/info),
1056*12011SSriharsha.Basavapatna@Sun.COM 	 * and if its a NACK then we maybe attempt a retry.
1057*12011SSriharsha.Basavapatna@Sun.COM 	 */
1058*12011SSriharsha.Basavapatna@Sun.COM 	switch (dring_pkt->tag.vio_subtype) {
1059*12011SSriharsha.Basavapatna@Sun.COM 	case VIO_SUBTYPE_INFO:
1060*12011SSriharsha.Basavapatna@Sun.COM 		D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id);
1061*12011SSriharsha.Basavapatna@Sun.COM 
1062*12011SSriharsha.Basavapatna@Sun.COM 		dp = ldcp->lane_in.dringp;
1063*12011SSriharsha.Basavapatna@Sun.COM 		if (dp->ident != dring_pkt->dring_ident) {
1064*12011SSriharsha.Basavapatna@Sun.COM 			DERR(vswp, "%s(%lld): unable to find dring from "
1065*12011SSriharsha.Basavapatna@Sun.COM 			    "ident 0x%llx", __func__, ldcp->ldc_id,
1066*12011SSriharsha.Basavapatna@Sun.COM 			    dring_pkt->dring_ident);
1067*12011SSriharsha.Basavapatna@Sun.COM 
1068*12011SSriharsha.Basavapatna@Sun.COM 			SND_DRING_NACK(ldcp, dring_pkt);
1069*12011SSriharsha.Basavapatna@Sun.COM 			return;
1070*12011SSriharsha.Basavapatna@Sun.COM 		}
1071*12011SSriharsha.Basavapatna@Sun.COM 
1072*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->ldc_stats.dring_data_msgs_rcvd++;
1073*12011SSriharsha.Basavapatna@Sun.COM 
1074*12011SSriharsha.Basavapatna@Sun.COM 		start = pos = dring_pkt->start_idx;
1075*12011SSriharsha.Basavapatna@Sun.COM 		end = dring_pkt->end_idx;
1076*12011SSriharsha.Basavapatna@Sun.COM 		len = dp->num_descriptors;
1077*12011SSriharsha.Basavapatna@Sun.COM 
1078*12011SSriharsha.Basavapatna@Sun.COM 		range_start = range_end = pos;
1079*12011SSriharsha.Basavapatna@Sun.COM 
1080*12011SSriharsha.Basavapatna@Sun.COM 		D2(vswp, "%s(%lld): start index %ld : end %ld\n",
1081*12011SSriharsha.Basavapatna@Sun.COM 		    __func__, ldcp->ldc_id, start, end);
1082*12011SSriharsha.Basavapatna@Sun.COM 
1083*12011SSriharsha.Basavapatna@Sun.COM 		if (end == -1) {
1084*12011SSriharsha.Basavapatna@Sun.COM 			num = -1;
1085*12011SSriharsha.Basavapatna@Sun.COM 		} else if (end >= 0) {
1086*12011SSriharsha.Basavapatna@Sun.COM 			num = end >= pos ? end - pos + 1: (len - pos + 1) + end;
1087*12011SSriharsha.Basavapatna@Sun.COM 
1088*12011SSriharsha.Basavapatna@Sun.COM 			/* basic sanity check */
1089*12011SSriharsha.Basavapatna@Sun.COM 			if (end > len) {
1090*12011SSriharsha.Basavapatna@Sun.COM 				DERR(vswp, "%s(%lld): endpoint %lld outside "
1091*12011SSriharsha.Basavapatna@Sun.COM 				    "ring length %lld", __func__,
1092*12011SSriharsha.Basavapatna@Sun.COM 				    ldcp->ldc_id, end, len);
1093*12011SSriharsha.Basavapatna@Sun.COM 
1094*12011SSriharsha.Basavapatna@Sun.COM 				SND_DRING_NACK(ldcp, dring_pkt);
1095*12011SSriharsha.Basavapatna@Sun.COM 				return;
1096*12011SSriharsha.Basavapatna@Sun.COM 			}
1097*12011SSriharsha.Basavapatna@Sun.COM 		} else {
1098*12011SSriharsha.Basavapatna@Sun.COM 			DERR(vswp, "%s(%lld): invalid endpoint %lld",
1099*12011SSriharsha.Basavapatna@Sun.COM 			    __func__, ldcp->ldc_id, end);
1100*12011SSriharsha.Basavapatna@Sun.COM 			SND_DRING_NACK(ldcp, dring_pkt);
1101*12011SSriharsha.Basavapatna@Sun.COM 			return;
1102*12011SSriharsha.Basavapatna@Sun.COM 		}
1103*12011SSriharsha.Basavapatna@Sun.COM 
1104*12011SSriharsha.Basavapatna@Sun.COM 		while (cnt != num) {
1105*12011SSriharsha.Basavapatna@Sun.COM vsw_recheck_desc:
1106*12011SSriharsha.Basavapatna@Sun.COM 			pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos;
1107*12011SSriharsha.Basavapatna@Sun.COM 
1108*12011SSriharsha.Basavapatna@Sun.COM 			if ((rng_rv = vnet_dring_entry_copy(pub_addr,
1109*12011SSriharsha.Basavapatna@Sun.COM 			    &desc, dp->dring_mtype, dp->dring_handle,
1110*12011SSriharsha.Basavapatna@Sun.COM 			    pos, pos)) != 0) {
1111*12011SSriharsha.Basavapatna@Sun.COM 				DERR(vswp, "%s(%lld): unable to copy "
1112*12011SSriharsha.Basavapatna@Sun.COM 				    "descriptor at pos %d: err %d",
1113*12011SSriharsha.Basavapatna@Sun.COM 				    __func__, pos, ldcp->ldc_id, rng_rv);
1114*12011SSriharsha.Basavapatna@Sun.COM 				ldcp->ldc_stats.ierrors++;
1115*12011SSriharsha.Basavapatna@Sun.COM 				break;
1116*12011SSriharsha.Basavapatna@Sun.COM 			}
1117*12011SSriharsha.Basavapatna@Sun.COM 
1118*12011SSriharsha.Basavapatna@Sun.COM 			/*
1119*12011SSriharsha.Basavapatna@Sun.COM 			 * When given a bounded range of descriptors
1120*12011SSriharsha.Basavapatna@Sun.COM 			 * to process, its an error to hit a descriptor
1121*12011SSriharsha.Basavapatna@Sun.COM 			 * which is not ready. In the non-bounded case
1122*12011SSriharsha.Basavapatna@Sun.COM 			 * (end_idx == -1) this simply indicates we have
1123*12011SSriharsha.Basavapatna@Sun.COM 			 * reached the end of the current active range.
1124*12011SSriharsha.Basavapatna@Sun.COM 			 */
1125*12011SSriharsha.Basavapatna@Sun.COM 			if (desc.hdr.dstate != VIO_DESC_READY) {
1126*12011SSriharsha.Basavapatna@Sun.COM 				/* unbound - no error */
1127*12011SSriharsha.Basavapatna@Sun.COM 				if (end == -1) {
1128*12011SSriharsha.Basavapatna@Sun.COM 					if (read_attempts == vsw_recv_retries)
1129*12011SSriharsha.Basavapatna@Sun.COM 						break;
1130*12011SSriharsha.Basavapatna@Sun.COM 
1131*12011SSriharsha.Basavapatna@Sun.COM 					delay(drv_usectohz(vsw_recv_delay));
1132*12011SSriharsha.Basavapatna@Sun.COM 					read_attempts++;
1133*12011SSriharsha.Basavapatna@Sun.COM 					goto vsw_recheck_desc;
1134*12011SSriharsha.Basavapatna@Sun.COM 				}
1135*12011SSriharsha.Basavapatna@Sun.COM 
1136*12011SSriharsha.Basavapatna@Sun.COM 				/* bounded - error - so NACK back */
1137*12011SSriharsha.Basavapatna@Sun.COM 				DERR(vswp, "%s(%lld): descriptor not READY "
1138*12011SSriharsha.Basavapatna@Sun.COM 				    "(%d)", __func__, ldcp->ldc_id,
1139*12011SSriharsha.Basavapatna@Sun.COM 				    desc.hdr.dstate);
1140*12011SSriharsha.Basavapatna@Sun.COM 				SND_DRING_NACK(ldcp, dring_pkt);
1141*12011SSriharsha.Basavapatna@Sun.COM 				return;
1142*12011SSriharsha.Basavapatna@Sun.COM 			}
1143*12011SSriharsha.Basavapatna@Sun.COM 
1144*12011SSriharsha.Basavapatna@Sun.COM 			DTRACE_PROBE1(read_attempts, int, read_attempts);
1145*12011SSriharsha.Basavapatna@Sun.COM 
1146*12011SSriharsha.Basavapatna@Sun.COM 			range_end = pos;
1147*12011SSriharsha.Basavapatna@Sun.COM 
1148*12011SSriharsha.Basavapatna@Sun.COM 			/*
1149*12011SSriharsha.Basavapatna@Sun.COM 			 * If we ACK'd the previous descriptor then now
1150*12011SSriharsha.Basavapatna@Sun.COM 			 * record the new range start position for later
1151*12011SSriharsha.Basavapatna@Sun.COM 			 * ACK's.
1152*12011SSriharsha.Basavapatna@Sun.COM 			 */
1153*12011SSriharsha.Basavapatna@Sun.COM 			if (prev_desc_ack) {
1154*12011SSriharsha.Basavapatna@Sun.COM 				range_start = pos;
1155*12011SSriharsha.Basavapatna@Sun.COM 
1156*12011SSriharsha.Basavapatna@Sun.COM 				D2(vswp, "%s(%lld): updating range start to be "
1157*12011SSriharsha.Basavapatna@Sun.COM 				    "%d", __func__, ldcp->ldc_id, range_start);
1158*12011SSriharsha.Basavapatna@Sun.COM 
1159*12011SSriharsha.Basavapatna@Sun.COM 				prev_desc_ack = B_FALSE;
1160*12011SSriharsha.Basavapatna@Sun.COM 			}
1161*12011SSriharsha.Basavapatna@Sun.COM 
1162*12011SSriharsha.Basavapatna@Sun.COM 			D2(vswp, "%s(%lld): processing desc %lld at pos"
1163*12011SSriharsha.Basavapatna@Sun.COM 			    " 0x%llx : dstate 0x%lx : datalen 0x%lx",
1164*12011SSriharsha.Basavapatna@Sun.COM 			    __func__, ldcp->ldc_id, pos, &desc,
1165*12011SSriharsha.Basavapatna@Sun.COM 			    desc.hdr.dstate, desc.nbytes);
1166*12011SSriharsha.Basavapatna@Sun.COM 
1167*12011SSriharsha.Basavapatna@Sun.COM 			if ((desc.nbytes < ETHERMIN) ||
1168*12011SSriharsha.Basavapatna@Sun.COM 			    (desc.nbytes > lp->mtu)) {
1169*12011SSriharsha.Basavapatna@Sun.COM 				/* invalid size; drop the packet */
1170*12011SSriharsha.Basavapatna@Sun.COM 				ldcp->ldc_stats.ierrors++;
1171*12011SSriharsha.Basavapatna@Sun.COM 				goto vsw_process_desc_done;
1172*12011SSriharsha.Basavapatna@Sun.COM 			}
1173*12011SSriharsha.Basavapatna@Sun.COM 
1174*12011SSriharsha.Basavapatna@Sun.COM 			/*
1175*12011SSriharsha.Basavapatna@Sun.COM 			 * Ensure that we ask ldc for an aligned
1176*12011SSriharsha.Basavapatna@Sun.COM 			 * number of bytes. Data is padded to align on 8
1177*12011SSriharsha.Basavapatna@Sun.COM 			 * byte boundary, desc.nbytes is actual data length,
1178*12011SSriharsha.Basavapatna@Sun.COM 			 * i.e. minus that padding.
1179*12011SSriharsha.Basavapatna@Sun.COM 			 */
1180*12011SSriharsha.Basavapatna@Sun.COM 			nbytes = (desc.nbytes + VNET_IPALIGN + 7) & ~7;
1181*12011SSriharsha.Basavapatna@Sun.COM 			if (nbytes > ldcp->max_rxpool_size) {
1182*12011SSriharsha.Basavapatna@Sun.COM 				mp = allocb(desc.nbytes + VNET_IPALIGN + 8,
1183*12011SSriharsha.Basavapatna@Sun.COM 				    BPRI_MED);
1184*12011SSriharsha.Basavapatna@Sun.COM 				vmp = NULL;
1185*12011SSriharsha.Basavapatna@Sun.COM 			} else {
1186*12011SSriharsha.Basavapatna@Sun.COM 				vmp = vio_multipool_allocb(&ldcp->vmp, nbytes);
1187*12011SSriharsha.Basavapatna@Sun.COM 				if (vmp == NULL) {
1188*12011SSriharsha.Basavapatna@Sun.COM 					ldcp->ldc_stats.rx_vio_allocb_fail++;
1189*12011SSriharsha.Basavapatna@Sun.COM 					/*
1190*12011SSriharsha.Basavapatna@Sun.COM 					 * No free receive buffers available,
1191*12011SSriharsha.Basavapatna@Sun.COM 					 * so fallback onto allocb(9F). Make
1192*12011SSriharsha.Basavapatna@Sun.COM 					 * sure that we get a data buffer which
1193*12011SSriharsha.Basavapatna@Sun.COM 					 * is a multiple of 8 as this is
1194*12011SSriharsha.Basavapatna@Sun.COM 					 * required by ldc_mem_copy.
1195*12011SSriharsha.Basavapatna@Sun.COM 					 */
1196*12011SSriharsha.Basavapatna@Sun.COM 					DTRACE_PROBE(allocb);
1197*12011SSriharsha.Basavapatna@Sun.COM 					mp = allocb(desc.nbytes +
1198*12011SSriharsha.Basavapatna@Sun.COM 					    VNET_IPALIGN + 8, BPRI_MED);
1199*12011SSriharsha.Basavapatna@Sun.COM 				} else {
1200*12011SSriharsha.Basavapatna@Sun.COM 					mp = vmp->mp;
1201*12011SSriharsha.Basavapatna@Sun.COM 				}
1202*12011SSriharsha.Basavapatna@Sun.COM 			}
1203*12011SSriharsha.Basavapatna@Sun.COM 			if (mp == NULL) {
1204*12011SSriharsha.Basavapatna@Sun.COM 				DERR(vswp, "%s(%ld): allocb failed",
1205*12011SSriharsha.Basavapatna@Sun.COM 				    __func__, ldcp->ldc_id);
1206*12011SSriharsha.Basavapatna@Sun.COM 				rng_rv = vnet_dring_entry_set_dstate(pub_addr,
1207*12011SSriharsha.Basavapatna@Sun.COM 				    dp->dring_mtype, dp->dring_handle, pos, pos,
1208*12011SSriharsha.Basavapatna@Sun.COM 				    VIO_DESC_DONE);
1209*12011SSriharsha.Basavapatna@Sun.COM 				ldcp->ldc_stats.ierrors++;
1210*12011SSriharsha.Basavapatna@Sun.COM 				ldcp->ldc_stats.rx_allocb_fail++;
1211*12011SSriharsha.Basavapatna@Sun.COM 				break;
1212*12011SSriharsha.Basavapatna@Sun.COM 			}
1213*12011SSriharsha.Basavapatna@Sun.COM 
1214*12011SSriharsha.Basavapatna@Sun.COM 			rv = ldc_mem_copy(ldcp->ldc_handle,
1215*12011SSriharsha.Basavapatna@Sun.COM 			    (caddr_t)mp->b_rptr, 0, &nbytes,
1216*12011SSriharsha.Basavapatna@Sun.COM 			    desc.memcookie, desc.ncookies, LDC_COPY_IN);
1217*12011SSriharsha.Basavapatna@Sun.COM 			if (rv != 0) {
1218*12011SSriharsha.Basavapatna@Sun.COM 				DERR(vswp, "%s(%d): unable to copy in data "
1219*12011SSriharsha.Basavapatna@Sun.COM 				    "from %d cookies in desc %d (rv %d)",
1220*12011SSriharsha.Basavapatna@Sun.COM 				    __func__, ldcp->ldc_id, desc.ncookies,
1221*12011SSriharsha.Basavapatna@Sun.COM 				    pos, rv);
1222*12011SSriharsha.Basavapatna@Sun.COM 				freemsg(mp);
1223*12011SSriharsha.Basavapatna@Sun.COM 
1224*12011SSriharsha.Basavapatna@Sun.COM 				rng_rv = vnet_dring_entry_set_dstate(pub_addr,
1225*12011SSriharsha.Basavapatna@Sun.COM 				    dp->dring_mtype, dp->dring_handle, pos, pos,
1226*12011SSriharsha.Basavapatna@Sun.COM 				    VIO_DESC_DONE);
1227*12011SSriharsha.Basavapatna@Sun.COM 				ldcp->ldc_stats.ierrors++;
1228*12011SSriharsha.Basavapatna@Sun.COM 				break;
1229*12011SSriharsha.Basavapatna@Sun.COM 			} else {
1230*12011SSriharsha.Basavapatna@Sun.COM 				D2(vswp, "%s(%d): copied in %ld bytes"
1231*12011SSriharsha.Basavapatna@Sun.COM 				    " using %d cookies", __func__,
1232*12011SSriharsha.Basavapatna@Sun.COM 				    ldcp->ldc_id, nbytes, desc.ncookies);
1233*12011SSriharsha.Basavapatna@Sun.COM 			}
1234*12011SSriharsha.Basavapatna@Sun.COM 
1235*12011SSriharsha.Basavapatna@Sun.COM 			/* adjust the read pointer to skip over the padding */
1236*12011SSriharsha.Basavapatna@Sun.COM 			mp->b_rptr += VNET_IPALIGN;
1237*12011SSriharsha.Basavapatna@Sun.COM 
1238*12011SSriharsha.Basavapatna@Sun.COM 			/* point to the actual end of data */
1239*12011SSriharsha.Basavapatna@Sun.COM 			mp->b_wptr = mp->b_rptr + desc.nbytes;
1240*12011SSriharsha.Basavapatna@Sun.COM 
1241*12011SSriharsha.Basavapatna@Sun.COM 			if (vmp != NULL) {
1242*12011SSriharsha.Basavapatna@Sun.COM 				vmp->state = VIO_MBLK_HAS_DATA;
1243*12011SSriharsha.Basavapatna@Sun.COM 			}
1244*12011SSriharsha.Basavapatna@Sun.COM 
1245*12011SSriharsha.Basavapatna@Sun.COM 			/* update statistics */
1246*12011SSriharsha.Basavapatna@Sun.COM 			ehp = (struct ether_header *)mp->b_rptr;
1247*12011SSriharsha.Basavapatna@Sun.COM 			if (IS_BROADCAST(ehp))
1248*12011SSriharsha.Basavapatna@Sun.COM 				ldcp->ldc_stats.brdcstrcv++;
1249*12011SSriharsha.Basavapatna@Sun.COM 			else if (IS_MULTICAST(ehp))
1250*12011SSriharsha.Basavapatna@Sun.COM 				ldcp->ldc_stats.multircv++;
1251*12011SSriharsha.Basavapatna@Sun.COM 
1252*12011SSriharsha.Basavapatna@Sun.COM 			ldcp->ldc_stats.ipackets++;
1253*12011SSriharsha.Basavapatna@Sun.COM 			ldcp->ldc_stats.rbytes += desc.nbytes;
1254*12011SSriharsha.Basavapatna@Sun.COM 
1255*12011SSriharsha.Basavapatna@Sun.COM 			/*
1256*12011SSriharsha.Basavapatna@Sun.COM 			 * IPALIGN space can be used for VLAN_TAG
1257*12011SSriharsha.Basavapatna@Sun.COM 			 */
1258*12011SSriharsha.Basavapatna@Sun.COM 			(void) vsw_vlan_frame_pretag(ldcp->ldc_port,
1259*12011SSriharsha.Basavapatna@Sun.COM 			    VSW_VNETPORT, mp);
1260*12011SSriharsha.Basavapatna@Sun.COM 
1261*12011SSriharsha.Basavapatna@Sun.COM 			/* build a chain of received packets */
1262*12011SSriharsha.Basavapatna@Sun.COM 			if (bp == NULL) {
1263*12011SSriharsha.Basavapatna@Sun.COM 				/* first pkt */
1264*12011SSriharsha.Basavapatna@Sun.COM 				bp = mp;
1265*12011SSriharsha.Basavapatna@Sun.COM 				bp->b_next = bp->b_prev = NULL;
1266*12011SSriharsha.Basavapatna@Sun.COM 				bpt = bp;
1267*12011SSriharsha.Basavapatna@Sun.COM 				chain = 1;
1268*12011SSriharsha.Basavapatna@Sun.COM 			} else {
1269*12011SSriharsha.Basavapatna@Sun.COM 				mp->b_next = mp->b_prev = NULL;
1270*12011SSriharsha.Basavapatna@Sun.COM 				bpt->b_next = mp;
1271*12011SSriharsha.Basavapatna@Sun.COM 				bpt = mp;
1272*12011SSriharsha.Basavapatna@Sun.COM 				chain++;
1273*12011SSriharsha.Basavapatna@Sun.COM 			}
1274*12011SSriharsha.Basavapatna@Sun.COM 
1275*12011SSriharsha.Basavapatna@Sun.COM vsw_process_desc_done:
1276*12011SSriharsha.Basavapatna@Sun.COM 			/* mark we are finished with this descriptor */
1277*12011SSriharsha.Basavapatna@Sun.COM 			if ((rng_rv = vnet_dring_entry_set_dstate(pub_addr,
1278*12011SSriharsha.Basavapatna@Sun.COM 			    dp->dring_mtype, dp->dring_handle, pos, pos,
1279*12011SSriharsha.Basavapatna@Sun.COM 			    VIO_DESC_DONE)) != 0) {
1280*12011SSriharsha.Basavapatna@Sun.COM 				DERR(vswp, "%s(%lld): unable to update "
1281*12011SSriharsha.Basavapatna@Sun.COM 				    "dstate at pos %d: err %d",
1282*12011SSriharsha.Basavapatna@Sun.COM 				    __func__, pos, ldcp->ldc_id, rng_rv);
1283*12011SSriharsha.Basavapatna@Sun.COM 				ldcp->ldc_stats.ierrors++;
1284*12011SSriharsha.Basavapatna@Sun.COM 				break;
1285*12011SSriharsha.Basavapatna@Sun.COM 			}
1286*12011SSriharsha.Basavapatna@Sun.COM 
1287*12011SSriharsha.Basavapatna@Sun.COM 			/*
1288*12011SSriharsha.Basavapatna@Sun.COM 			 * Send an ACK back to peer if requested.
1289*12011SSriharsha.Basavapatna@Sun.COM 			 */
1290*12011SSriharsha.Basavapatna@Sun.COM 			if (desc.hdr.ack) {
1291*12011SSriharsha.Basavapatna@Sun.COM 				dring_pkt->start_idx = range_start;
1292*12011SSriharsha.Basavapatna@Sun.COM 				dring_pkt->end_idx = range_end;
1293*12011SSriharsha.Basavapatna@Sun.COM 
1294*12011SSriharsha.Basavapatna@Sun.COM 				DERR(vswp, "%s(%lld): processed %d %d, ACK"
1295*12011SSriharsha.Basavapatna@Sun.COM 				    " requested", __func__, ldcp->ldc_id,
1296*12011SSriharsha.Basavapatna@Sun.COM 				    dring_pkt->start_idx, dring_pkt->end_idx);
1297*12011SSriharsha.Basavapatna@Sun.COM 
1298*12011SSriharsha.Basavapatna@Sun.COM 				dring_pkt->dring_process_state = VIO_DP_ACTIVE;
1299*12011SSriharsha.Basavapatna@Sun.COM 				dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
1300*12011SSriharsha.Basavapatna@Sun.COM 				dring_pkt->tag.vio_sid = ldcp->local_session;
1301*12011SSriharsha.Basavapatna@Sun.COM 
1302*12011SSriharsha.Basavapatna@Sun.COM 				msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt,
1303*12011SSriharsha.Basavapatna@Sun.COM 				    sizeof (vio_dring_msg_t), B_FALSE);
1304*12011SSriharsha.Basavapatna@Sun.COM 
1305*12011SSriharsha.Basavapatna@Sun.COM 				/*
1306*12011SSriharsha.Basavapatna@Sun.COM 				 * Check if ACK was successfully sent. If not
1307*12011SSriharsha.Basavapatna@Sun.COM 				 * we break and deal with that below.
1308*12011SSriharsha.Basavapatna@Sun.COM 				 */
1309*12011SSriharsha.Basavapatna@Sun.COM 				if (msg_rv != 0)
1310*12011SSriharsha.Basavapatna@Sun.COM 					break;
1311*12011SSriharsha.Basavapatna@Sun.COM 
1312*12011SSriharsha.Basavapatna@Sun.COM 				prev_desc_ack = B_TRUE;
1313*12011SSriharsha.Basavapatna@Sun.COM 				range_start = pos;
1314*12011SSriharsha.Basavapatna@Sun.COM 			}
1315*12011SSriharsha.Basavapatna@Sun.COM 
1316*12011SSriharsha.Basavapatna@Sun.COM 			/* next descriptor */
1317*12011SSriharsha.Basavapatna@Sun.COM 			pos = (pos + 1) % len;
1318*12011SSriharsha.Basavapatna@Sun.COM 			cnt++;
1319*12011SSriharsha.Basavapatna@Sun.COM 
1320*12011SSriharsha.Basavapatna@Sun.COM 			/*
1321*12011SSriharsha.Basavapatna@Sun.COM 			 * Break out of loop here and stop processing to
1322*12011SSriharsha.Basavapatna@Sun.COM 			 * allow some other network device (or disk) to
1323*12011SSriharsha.Basavapatna@Sun.COM 			 * get access to the cpu.
1324*12011SSriharsha.Basavapatna@Sun.COM 			 */
1325*12011SSriharsha.Basavapatna@Sun.COM 			if (chain > vsw_chain_len) {
1326*12011SSriharsha.Basavapatna@Sun.COM 				D3(vswp, "%s(%lld): switching chain of %d "
1327*12011SSriharsha.Basavapatna@Sun.COM 				    "msgs", __func__, ldcp->ldc_id, chain);
1328*12011SSriharsha.Basavapatna@Sun.COM 				break;
1329*12011SSriharsha.Basavapatna@Sun.COM 			}
1330*12011SSriharsha.Basavapatna@Sun.COM 		}
1331*12011SSriharsha.Basavapatna@Sun.COM 
1332*12011SSriharsha.Basavapatna@Sun.COM 		/* send the chain of packets to be switched */
1333*12011SSriharsha.Basavapatna@Sun.COM 		if (bp != NULL) {
1334*12011SSriharsha.Basavapatna@Sun.COM 			DTRACE_PROBE1(vsw_rcv_msgs, int, chain);
1335*12011SSriharsha.Basavapatna@Sun.COM 			D3(vswp, "%s(%lld): switching chain of %d msgs",
1336*12011SSriharsha.Basavapatna@Sun.COM 			    __func__, ldcp->ldc_id, chain);
1337*12011SSriharsha.Basavapatna@Sun.COM 			vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT,
1338*12011SSriharsha.Basavapatna@Sun.COM 			    ldcp->ldc_port, NULL);
1339*12011SSriharsha.Basavapatna@Sun.COM 		}
1340*12011SSriharsha.Basavapatna@Sun.COM 
1341*12011SSriharsha.Basavapatna@Sun.COM 		/*
1342*12011SSriharsha.Basavapatna@Sun.COM 		 * If when we encountered an error when attempting to
1343*12011SSriharsha.Basavapatna@Sun.COM 		 * access an imported dring, initiate a connection reset.
1344*12011SSriharsha.Basavapatna@Sun.COM 		 */
1345*12011SSriharsha.Basavapatna@Sun.COM 		if (rng_rv != 0) {
1346*12011SSriharsha.Basavapatna@Sun.COM 			vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
1347*12011SSriharsha.Basavapatna@Sun.COM 			break;
1348*12011SSriharsha.Basavapatna@Sun.COM 		}
1349*12011SSriharsha.Basavapatna@Sun.COM 
1350*12011SSriharsha.Basavapatna@Sun.COM 		/*
1351*12011SSriharsha.Basavapatna@Sun.COM 		 * If when we attempted to send the ACK we found that the
1352*12011SSriharsha.Basavapatna@Sun.COM 		 * channel had been reset then now handle this.
1353*12011SSriharsha.Basavapatna@Sun.COM 		 */
1354*12011SSriharsha.Basavapatna@Sun.COM 		if (msg_rv == ECONNRESET) {
1355*12011SSriharsha.Basavapatna@Sun.COM 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
1356*12011SSriharsha.Basavapatna@Sun.COM 			break;
1357*12011SSriharsha.Basavapatna@Sun.COM 		}
1358*12011SSriharsha.Basavapatna@Sun.COM 
1359*12011SSriharsha.Basavapatna@Sun.COM 		DTRACE_PROBE1(msg_cnt, int, cnt);
1360*12011SSriharsha.Basavapatna@Sun.COM 
1361*12011SSriharsha.Basavapatna@Sun.COM 		/*
1362*12011SSriharsha.Basavapatna@Sun.COM 		 * We are now finished so ACK back with the state
1363*12011SSriharsha.Basavapatna@Sun.COM 		 * set to STOPPING so our peer knows we are finished
1364*12011SSriharsha.Basavapatna@Sun.COM 		 */
1365*12011SSriharsha.Basavapatna@Sun.COM 		dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
1366*12011SSriharsha.Basavapatna@Sun.COM 		dring_pkt->tag.vio_sid = ldcp->local_session;
1367*12011SSriharsha.Basavapatna@Sun.COM 
1368*12011SSriharsha.Basavapatna@Sun.COM 		dring_pkt->dring_process_state = VIO_DP_STOPPED;
1369*12011SSriharsha.Basavapatna@Sun.COM 
1370*12011SSriharsha.Basavapatna@Sun.COM 		DTRACE_PROBE(stop_process_sent);
1371*12011SSriharsha.Basavapatna@Sun.COM 
1372*12011SSriharsha.Basavapatna@Sun.COM 		/*
1373*12011SSriharsha.Basavapatna@Sun.COM 		 * We have not processed any more descriptors beyond
1374*12011SSriharsha.Basavapatna@Sun.COM 		 * the last one we ACK'd.
1375*12011SSriharsha.Basavapatna@Sun.COM 		 */
1376*12011SSriharsha.Basavapatna@Sun.COM 		if (prev_desc_ack)
1377*12011SSriharsha.Basavapatna@Sun.COM 			range_start = range_end;
1378*12011SSriharsha.Basavapatna@Sun.COM 
1379*12011SSriharsha.Basavapatna@Sun.COM 		dring_pkt->start_idx = range_start;
1380*12011SSriharsha.Basavapatna@Sun.COM 		dring_pkt->end_idx = range_end;
1381*12011SSriharsha.Basavapatna@Sun.COM 
1382*12011SSriharsha.Basavapatna@Sun.COM 		D2(vswp, "%s(%lld) processed : %d : %d, now stopping",
1383*12011SSriharsha.Basavapatna@Sun.COM 		    __func__, ldcp->ldc_id, dring_pkt->start_idx,
1384*12011SSriharsha.Basavapatna@Sun.COM 		    dring_pkt->end_idx);
1385*12011SSriharsha.Basavapatna@Sun.COM 
1386*12011SSriharsha.Basavapatna@Sun.COM 		(void) vsw_send_msg(ldcp, (void *)dring_pkt,
1387*12011SSriharsha.Basavapatna@Sun.COM 		    sizeof (vio_dring_msg_t), B_TRUE);
1388*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->ldc_stats.dring_data_acks_sent++;
1389*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->ldc_stats.dring_stopped_acks_sent++;
1390*12011SSriharsha.Basavapatna@Sun.COM 		break;
1391*12011SSriharsha.Basavapatna@Sun.COM 
1392*12011SSriharsha.Basavapatna@Sun.COM 	case VIO_SUBTYPE_ACK:
1393*12011SSriharsha.Basavapatna@Sun.COM 		D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id);
1394*12011SSriharsha.Basavapatna@Sun.COM 		/*
1395*12011SSriharsha.Basavapatna@Sun.COM 		 * Verify that the relevant descriptors are all
1396*12011SSriharsha.Basavapatna@Sun.COM 		 * marked as DONE
1397*12011SSriharsha.Basavapatna@Sun.COM 		 */
1398*12011SSriharsha.Basavapatna@Sun.COM 		dp = ldcp->lane_out.dringp;
1399*12011SSriharsha.Basavapatna@Sun.COM 		if (dp->ident != dring_pkt->dring_ident) {
1400*12011SSriharsha.Basavapatna@Sun.COM 			DERR(vswp, "%s: unknown ident in ACK", __func__);
1401*12011SSriharsha.Basavapatna@Sun.COM 			return;
1402*12011SSriharsha.Basavapatna@Sun.COM 		}
1403*12011SSriharsha.Basavapatna@Sun.COM 
1404*12011SSriharsha.Basavapatna@Sun.COM 		start = end = 0;
1405*12011SSriharsha.Basavapatna@Sun.COM 		start = dring_pkt->start_idx;
1406*12011SSriharsha.Basavapatna@Sun.COM 		end = dring_pkt->end_idx;
1407*12011SSriharsha.Basavapatna@Sun.COM 		len = dp->num_descriptors;
1408*12011SSriharsha.Basavapatna@Sun.COM 
1409*12011SSriharsha.Basavapatna@Sun.COM 
1410*12011SSriharsha.Basavapatna@Sun.COM 		mutex_enter(&dp->dlock);
1411*12011SSriharsha.Basavapatna@Sun.COM 		dp->last_ack_recv = end;
1412*12011SSriharsha.Basavapatna@Sun.COM 		ldcp->ldc_stats.dring_data_acks_rcvd++;
1413*12011SSriharsha.Basavapatna@Sun.COM 		mutex_exit(&dp->dlock);
1414*12011SSriharsha.Basavapatna@Sun.COM 
1415*12011SSriharsha.Basavapatna@Sun.COM 		(void) vsw_reclaim_dring(dp, start);
1416*12011SSriharsha.Basavapatna@Sun.COM 
1417*12011SSriharsha.Basavapatna@Sun.COM 		/*
1418*12011SSriharsha.Basavapatna@Sun.COM 		 * If our peer is stopping processing descriptors then
1419*12011SSriharsha.Basavapatna@Sun.COM 		 * we check to make sure it has processed all the descriptors
1420*12011SSriharsha.Basavapatna@Sun.COM 		 * we have updated. If not then we send it a new message
1421*12011SSriharsha.Basavapatna@Sun.COM 		 * to prompt it to restart.
1422*12011SSriharsha.Basavapatna@Sun.COM 		 */
1423*12011SSriharsha.Basavapatna@Sun.COM 		if (dring_pkt->dring_process_state == VIO_DP_STOPPED) {
1424*12011SSriharsha.Basavapatna@Sun.COM 			DTRACE_PROBE(stop_process_recv);
1425*12011SSriharsha.Basavapatna@Sun.COM 			D2(vswp, "%s(%lld): got stopping msg : %d : %d",
1426*12011SSriharsha.Basavapatna@Sun.COM 			    __func__, ldcp->ldc_id, dring_pkt->start_idx,
1427*12011SSriharsha.Basavapatna@Sun.COM 			    dring_pkt->end_idx);
1428*12011SSriharsha.Basavapatna@Sun.COM 
1429*12011SSriharsha.Basavapatna@Sun.COM 			/*
1430*12011SSriharsha.Basavapatna@Sun.COM 			 * Check next descriptor in public section of ring.
1431*12011SSriharsha.Basavapatna@Sun.COM 			 * If its marked as READY then we need to prompt our
1432*12011SSriharsha.Basavapatna@Sun.COM 			 * peer to start processing the ring again.
1433*12011SSriharsha.Basavapatna@Sun.COM 			 */
1434*12011SSriharsha.Basavapatna@Sun.COM 			i = (end + 1) % len;
1435*12011SSriharsha.Basavapatna@Sun.COM 			pub_addr = (vnet_public_desc_t *)dp->pub_addr + i;
1436*12011SSriharsha.Basavapatna@Sun.COM 			priv_addr = (vsw_private_desc_t *)dp->priv_addr + i;
1437*12011SSriharsha.Basavapatna@Sun.COM 
1438*12011SSriharsha.Basavapatna@Sun.COM 			/*
1439*12011SSriharsha.Basavapatna@Sun.COM 			 * Hold the restart lock across all of this to
1440*12011SSriharsha.Basavapatna@Sun.COM 			 * make sure that its not possible for us to
1441*12011SSriharsha.Basavapatna@Sun.COM 			 * decide that a msg needs to be sent in the future
1442*12011SSriharsha.Basavapatna@Sun.COM 			 * but the sending code having already checked is
1443*12011SSriharsha.Basavapatna@Sun.COM 			 * about to exit.
1444*12011SSriharsha.Basavapatna@Sun.COM 			 */
1445*12011SSriharsha.Basavapatna@Sun.COM 			mutex_enter(&dp->restart_lock);
1446*12011SSriharsha.Basavapatna@Sun.COM 			ldcp->ldc_stats.dring_stopped_acks_rcvd++;
1447*12011SSriharsha.Basavapatna@Sun.COM 			mutex_enter(&priv_addr->dstate_lock);
1448*12011SSriharsha.Basavapatna@Sun.COM 			if (pub_addr->hdr.dstate == VIO_DESC_READY) {
1449*12011SSriharsha.Basavapatna@Sun.COM 
1450*12011SSriharsha.Basavapatna@Sun.COM 				mutex_exit(&priv_addr->dstate_lock);
1451*12011SSriharsha.Basavapatna@Sun.COM 
1452*12011SSriharsha.Basavapatna@Sun.COM 				dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
1453*12011SSriharsha.Basavapatna@Sun.COM 				dring_pkt->tag.vio_sid = ldcp->local_session;
1454*12011SSriharsha.Basavapatna@Sun.COM 
1455*12011SSriharsha.Basavapatna@Sun.COM 				dring_pkt->start_idx = (end + 1) % len;
1456*12011SSriharsha.Basavapatna@Sun.COM 				dring_pkt->end_idx = -1;
1457*12011SSriharsha.Basavapatna@Sun.COM 
1458*12011SSriharsha.Basavapatna@Sun.COM 				D2(vswp, "%s(%lld) : sending restart msg:"
1459*12011SSriharsha.Basavapatna@Sun.COM 				    " %d : %d", __func__, ldcp->ldc_id,
1460*12011SSriharsha.Basavapatna@Sun.COM 				    dring_pkt->start_idx, dring_pkt->end_idx);
1461*12011SSriharsha.Basavapatna@Sun.COM 
1462*12011SSriharsha.Basavapatna@Sun.COM 				msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt,
1463*12011SSriharsha.Basavapatna@Sun.COM 				    sizeof (vio_dring_msg_t), B_FALSE);
1464*12011SSriharsha.Basavapatna@Sun.COM 				ldcp->ldc_stats.dring_data_msgs_sent++;
1465*12011SSriharsha.Basavapatna@Sun.COM 
1466*12011SSriharsha.Basavapatna@Sun.COM 			} else {
1467*12011SSriharsha.Basavapatna@Sun.COM 				mutex_exit(&priv_addr->dstate_lock);
1468*12011SSriharsha.Basavapatna@Sun.COM 				dp->restart_reqd = B_TRUE;
1469*12011SSriharsha.Basavapatna@Sun.COM 			}
1470*12011SSriharsha.Basavapatna@Sun.COM 			mutex_exit(&dp->restart_lock);
1471*12011SSriharsha.Basavapatna@Sun.COM 		}
1472*12011SSriharsha.Basavapatna@Sun.COM 
1473*12011SSriharsha.Basavapatna@Sun.COM 		if (msg_rv == ECONNRESET)
1474*12011SSriharsha.Basavapatna@Sun.COM 			vsw_process_conn_evt(ldcp, VSW_CONN_RESET);
1475*12011SSriharsha.Basavapatna@Sun.COM 
1476*12011SSriharsha.Basavapatna@Sun.COM 		break;
1477*12011SSriharsha.Basavapatna@Sun.COM 
1478*12011SSriharsha.Basavapatna@Sun.COM 	case VIO_SUBTYPE_NACK:
1479*12011SSriharsha.Basavapatna@Sun.COM 		DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK",
1480*12011SSriharsha.Basavapatna@Sun.COM 		    __func__, ldcp->ldc_id);
1481*12011SSriharsha.Basavapatna@Sun.COM 		/*
1482*12011SSriharsha.Basavapatna@Sun.COM 		 * Something is badly wrong if we are getting NACK's
1483*12011SSriharsha.Basavapatna@Sun.COM 		 * for our data pkts. So reset the channel.
1484*12011SSriharsha.Basavapatna@Sun.COM 		 */
1485*12011SSriharsha.Basavapatna@Sun.COM 		vsw_process_conn_evt(ldcp, VSW_CONN_RESTART);
1486*12011SSriharsha.Basavapatna@Sun.COM 
1487*12011SSriharsha.Basavapatna@Sun.COM 		break;
1488*12011SSriharsha.Basavapatna@Sun.COM 
1489*12011SSriharsha.Basavapatna@Sun.COM 	default:
1490*12011SSriharsha.Basavapatna@Sun.COM 		DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__,
1491*12011SSriharsha.Basavapatna@Sun.COM 		    ldcp->ldc_id, dring_pkt->tag.vio_subtype);
1492*12011SSriharsha.Basavapatna@Sun.COM 	}
1493*12011SSriharsha.Basavapatna@Sun.COM 
1494*12011SSriharsha.Basavapatna@Sun.COM 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
1495*12011SSriharsha.Basavapatna@Sun.COM }
1496