1*13101SVenki.Rajagopalan@Sun.COM /*
2*13101SVenki.Rajagopalan@Sun.COM * CDDL HEADER START
3*13101SVenki.Rajagopalan@Sun.COM *
4*13101SVenki.Rajagopalan@Sun.COM * The contents of this file are subject to the terms of the
5*13101SVenki.Rajagopalan@Sun.COM * Common Development and Distribution License (the "License").
6*13101SVenki.Rajagopalan@Sun.COM * You may not use this file except in compliance with the License.
7*13101SVenki.Rajagopalan@Sun.COM *
8*13101SVenki.Rajagopalan@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*13101SVenki.Rajagopalan@Sun.COM * or http://www.opensolaris.org/os/licensing.
10*13101SVenki.Rajagopalan@Sun.COM * See the License for the specific language governing permissions
11*13101SVenki.Rajagopalan@Sun.COM * and limitations under the License.
12*13101SVenki.Rajagopalan@Sun.COM *
13*13101SVenki.Rajagopalan@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each
14*13101SVenki.Rajagopalan@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*13101SVenki.Rajagopalan@Sun.COM * If applicable, add the following below this CDDL HEADER, with the
16*13101SVenki.Rajagopalan@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying
17*13101SVenki.Rajagopalan@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner]
18*13101SVenki.Rajagopalan@Sun.COM *
19*13101SVenki.Rajagopalan@Sun.COM * CDDL HEADER END
20*13101SVenki.Rajagopalan@Sun.COM */
21*13101SVenki.Rajagopalan@Sun.COM
22*13101SVenki.Rajagopalan@Sun.COM /*
23*13101SVenki.Rajagopalan@Sun.COM * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24*13101SVenki.Rajagopalan@Sun.COM */
25*13101SVenki.Rajagopalan@Sun.COM
26*13101SVenki.Rajagopalan@Sun.COM #include <sys/types.h>
27*13101SVenki.Rajagopalan@Sun.COM #include <sys/kmem.h>
28*13101SVenki.Rajagopalan@Sun.COM #include <sys/conf.h>
29*13101SVenki.Rajagopalan@Sun.COM #include <sys/ddi.h>
30*13101SVenki.Rajagopalan@Sun.COM #include <sys/sunddi.h>
31*13101SVenki.Rajagopalan@Sun.COM #include <sys/ksynch.h>
32*13101SVenki.Rajagopalan@Sun.COM #include <sys/pattr.h> /* HCK_* */
33*13101SVenki.Rajagopalan@Sun.COM #include <inet/ip.h> /* ipha_t */
34*13101SVenki.Rajagopalan@Sun.COM #include <inet/tcp.h> /* tcph_t */
35*13101SVenki.Rajagopalan@Sun.COM #include <sys/mac_provider.h> /* mac_* */
36*13101SVenki.Rajagopalan@Sun.COM #include <sys/strsun.h> /* MBLKL */
37*13101SVenki.Rajagopalan@Sun.COM
38*13101SVenki.Rajagopalan@Sun.COM #include <sys/ib/clients/eoib/eib_impl.h>
39*13101SVenki.Rajagopalan@Sun.COM
40*13101SVenki.Rajagopalan@Sun.COM /*
41*13101SVenki.Rajagopalan@Sun.COM * Declarations private to this file
42*13101SVenki.Rajagopalan@Sun.COM */
43*13101SVenki.Rajagopalan@Sun.COM static int eib_data_setup_cqs(eib_t *, eib_vnic_t *);
44*13101SVenki.Rajagopalan@Sun.COM static int eib_data_setup_ud_channel(eib_t *, eib_vnic_t *);
45*13101SVenki.Rajagopalan@Sun.COM static void eib_data_setup_lso(eib_wqe_t *, mblk_t *, uint32_t,
46*13101SVenki.Rajagopalan@Sun.COM eib_ether_hdr_t *);
47*13101SVenki.Rajagopalan@Sun.COM static int eib_data_prepare_sgl(eib_vnic_t *, eib_wqe_t *, mblk_t *);
48*13101SVenki.Rajagopalan@Sun.COM static int eib_data_is_mcast_pkt_ok(eib_vnic_t *, uint8_t *, uint64_t *,
49*13101SVenki.Rajagopalan@Sun.COM uint64_t *);
50*13101SVenki.Rajagopalan@Sun.COM static void eib_data_rx_comp_intr(ibt_cq_hdl_t, void *);
51*13101SVenki.Rajagopalan@Sun.COM static void eib_data_tx_comp_intr(ibt_cq_hdl_t, void *);
52*13101SVenki.Rajagopalan@Sun.COM static mblk_t *eib_data_rx_comp(eib_vnic_t *, eib_wqe_t *, ibt_wc_t *);
53*13101SVenki.Rajagopalan@Sun.COM static void eib_data_tx_comp(eib_vnic_t *, eib_wqe_t *, eib_chan_t *);
54*13101SVenki.Rajagopalan@Sun.COM static void eib_data_err_comp(eib_vnic_t *, eib_wqe_t *, ibt_wc_t *);
55*13101SVenki.Rajagopalan@Sun.COM static void eib_rb_data_setup_cqs(eib_t *, eib_vnic_t *);
56*13101SVenki.Rajagopalan@Sun.COM static void eib_rb_data_setup_ud_channel(eib_t *, eib_vnic_t *);
57*13101SVenki.Rajagopalan@Sun.COM
58*13101SVenki.Rajagopalan@Sun.COM
59*13101SVenki.Rajagopalan@Sun.COM int
eib_data_create_qp(eib_t * ss,eib_vnic_t * vnic,int * err)60*13101SVenki.Rajagopalan@Sun.COM eib_data_create_qp(eib_t *ss, eib_vnic_t *vnic, int *err)
61*13101SVenki.Rajagopalan@Sun.COM {
62*13101SVenki.Rajagopalan@Sun.COM eib_chan_t *chan = NULL;
63*13101SVenki.Rajagopalan@Sun.COM
64*13101SVenki.Rajagopalan@Sun.COM /*
65*13101SVenki.Rajagopalan@Sun.COM * Allocate a eib_chan_t to store stuff about this vnic's data qp
66*13101SVenki.Rajagopalan@Sun.COM * and initialize it with default admin qp pkey parameters. We'll
67*13101SVenki.Rajagopalan@Sun.COM * re-associate this with the pkey we receive from the gw once we
68*13101SVenki.Rajagopalan@Sun.COM * receive the login ack.
69*13101SVenki.Rajagopalan@Sun.COM */
70*13101SVenki.Rajagopalan@Sun.COM vnic->vn_data_chan = eib_chan_init();
71*13101SVenki.Rajagopalan@Sun.COM
72*13101SVenki.Rajagopalan@Sun.COM chan = vnic->vn_data_chan;
73*13101SVenki.Rajagopalan@Sun.COM chan->ch_pkey = ss->ei_admin_chan->ch_pkey;
74*13101SVenki.Rajagopalan@Sun.COM chan->ch_pkey_ix = ss->ei_admin_chan->ch_pkey_ix;
75*13101SVenki.Rajagopalan@Sun.COM chan->ch_vnic_inst = vnic->vn_instance;
76*13101SVenki.Rajagopalan@Sun.COM
77*13101SVenki.Rajagopalan@Sun.COM /*
78*13101SVenki.Rajagopalan@Sun.COM * Setup tx/rx CQs and completion handlers
79*13101SVenki.Rajagopalan@Sun.COM */
80*13101SVenki.Rajagopalan@Sun.COM if (eib_data_setup_cqs(ss, vnic) != EIB_E_SUCCESS) {
81*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_create_qp: "
82*13101SVenki.Rajagopalan@Sun.COM "eib_data_setup_cqs(vn_inst=0x%x) failed",
83*13101SVenki.Rajagopalan@Sun.COM vnic->vn_instance);
84*13101SVenki.Rajagopalan@Sun.COM *err = ENOMEM;
85*13101SVenki.Rajagopalan@Sun.COM goto data_create_qp_fail;
86*13101SVenki.Rajagopalan@Sun.COM }
87*13101SVenki.Rajagopalan@Sun.COM
88*13101SVenki.Rajagopalan@Sun.COM /*
89*13101SVenki.Rajagopalan@Sun.COM * Setup UD channel
90*13101SVenki.Rajagopalan@Sun.COM */
91*13101SVenki.Rajagopalan@Sun.COM if (eib_data_setup_ud_channel(ss, vnic) != EIB_E_SUCCESS) {
92*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_create_qp: "
93*13101SVenki.Rajagopalan@Sun.COM "eib_data_setup_ud_channel(vn_inst=0x%x) failed",
94*13101SVenki.Rajagopalan@Sun.COM vnic->vn_instance);
95*13101SVenki.Rajagopalan@Sun.COM *err = ENOMEM;
96*13101SVenki.Rajagopalan@Sun.COM goto data_create_qp_fail;
97*13101SVenki.Rajagopalan@Sun.COM }
98*13101SVenki.Rajagopalan@Sun.COM
99*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_SUCCESS);
100*13101SVenki.Rajagopalan@Sun.COM
101*13101SVenki.Rajagopalan@Sun.COM data_create_qp_fail:
102*13101SVenki.Rajagopalan@Sun.COM eib_rb_data_create_qp(ss, vnic);
103*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_FAILURE);
104*13101SVenki.Rajagopalan@Sun.COM }
105*13101SVenki.Rajagopalan@Sun.COM
106*13101SVenki.Rajagopalan@Sun.COM /*ARGSUSED*/
107*13101SVenki.Rajagopalan@Sun.COM uint_t
eib_data_rx_comp_handler(caddr_t arg1,caddr_t arg2)108*13101SVenki.Rajagopalan@Sun.COM eib_data_rx_comp_handler(caddr_t arg1, caddr_t arg2)
109*13101SVenki.Rajagopalan@Sun.COM {
110*13101SVenki.Rajagopalan@Sun.COM eib_vnic_t *vnic = (eib_vnic_t *)(void *)arg1;
111*13101SVenki.Rajagopalan@Sun.COM eib_t *ss = vnic->vn_ss;
112*13101SVenki.Rajagopalan@Sun.COM eib_chan_t *chan = vnic->vn_data_chan;
113*13101SVenki.Rajagopalan@Sun.COM eib_stats_t *stats = ss->ei_stats;
114*13101SVenki.Rajagopalan@Sun.COM ibt_wc_t *wc;
115*13101SVenki.Rajagopalan@Sun.COM eib_wqe_t *wqe;
116*13101SVenki.Rajagopalan@Sun.COM mblk_t *mp;
117*13101SVenki.Rajagopalan@Sun.COM mblk_t *head = NULL;
118*13101SVenki.Rajagopalan@Sun.COM mblk_t *tail = NULL;
119*13101SVenki.Rajagopalan@Sun.COM ibt_status_t ret;
120*13101SVenki.Rajagopalan@Sun.COM uint_t pkts_per_call = 0;
121*13101SVenki.Rajagopalan@Sun.COM uint_t polled;
122*13101SVenki.Rajagopalan@Sun.COM uint_t rbytes;
123*13101SVenki.Rajagopalan@Sun.COM uint_t ipkts;
124*13101SVenki.Rajagopalan@Sun.COM uint_t num_wc;
125*13101SVenki.Rajagopalan@Sun.COM int i;
126*13101SVenki.Rajagopalan@Sun.COM
127*13101SVenki.Rajagopalan@Sun.COM /*
128*13101SVenki.Rajagopalan@Sun.COM * Re-arm the rx notification callback before we start polling
129*13101SVenki.Rajagopalan@Sun.COM * the completion queue. There's nothing much we can do if the
130*13101SVenki.Rajagopalan@Sun.COM * enable_cq_notify fails - we issue a warning and move on.
131*13101SVenki.Rajagopalan@Sun.COM */
132*13101SVenki.Rajagopalan@Sun.COM ret = ibt_enable_cq_notify(chan->ch_rcv_cq_hdl, IBT_NEXT_COMPLETION);
133*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
134*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp_handler: "
135*13101SVenki.Rajagopalan@Sun.COM "ibt_enable_cq_notify() failed, ret=%d", ret);
136*13101SVenki.Rajagopalan@Sun.COM }
137*13101SVenki.Rajagopalan@Sun.COM
138*13101SVenki.Rajagopalan@Sun.COM /*
139*13101SVenki.Rajagopalan@Sun.COM * We don't want to be stuck in receive processing for too long without
140*13101SVenki.Rajagopalan@Sun.COM * giving others a chance.
141*13101SVenki.Rajagopalan@Sun.COM */
142*13101SVenki.Rajagopalan@Sun.COM num_wc = (chan->ch_rcv_cq_sz < EIB_MAX_RX_PKTS_ONINTR) ?
143*13101SVenki.Rajagopalan@Sun.COM chan->ch_rcv_cq_sz : EIB_MAX_RX_PKTS_ONINTR;
144*13101SVenki.Rajagopalan@Sun.COM
145*13101SVenki.Rajagopalan@Sun.COM /*
146*13101SVenki.Rajagopalan@Sun.COM * Handle rx completions
147*13101SVenki.Rajagopalan@Sun.COM */
148*13101SVenki.Rajagopalan@Sun.COM while ((ret = ibt_poll_cq(chan->ch_rcv_cq_hdl, chan->ch_rcv_wc,
149*13101SVenki.Rajagopalan@Sun.COM num_wc, &polled)) == IBT_SUCCESS) {
150*13101SVenki.Rajagopalan@Sun.COM
151*13101SVenki.Rajagopalan@Sun.COM rbytes = ipkts = 0;
152*13101SVenki.Rajagopalan@Sun.COM head = tail = NULL;
153*13101SVenki.Rajagopalan@Sun.COM
154*13101SVenki.Rajagopalan@Sun.COM for (wc = chan->ch_rcv_wc, i = 0; i < polled; i++, wc++) {
155*13101SVenki.Rajagopalan@Sun.COM wqe = (eib_wqe_t *)(uintptr_t)wc->wc_id;
156*13101SVenki.Rajagopalan@Sun.COM
157*13101SVenki.Rajagopalan@Sun.COM ASSERT(EIB_WQE_TYPE(wqe->qe_info) == EIB_WQE_RX);
158*13101SVenki.Rajagopalan@Sun.COM
159*13101SVenki.Rajagopalan@Sun.COM /*
160*13101SVenki.Rajagopalan@Sun.COM * Clear the posted-to-hca flag and reduce the number
161*13101SVenki.Rajagopalan@Sun.COM * of posted-rwqes count
162*13101SVenki.Rajagopalan@Sun.COM */
163*13101SVenki.Rajagopalan@Sun.COM wqe->qe_info &= (~EIB_WQE_FLG_POSTED_TO_HCA);
164*13101SVenki.Rajagopalan@Sun.COM eib_rsrc_decr_posted_rwqe(ss, chan);
165*13101SVenki.Rajagopalan@Sun.COM
166*13101SVenki.Rajagopalan@Sun.COM rbytes += wc->wc_bytes_xfer;
167*13101SVenki.Rajagopalan@Sun.COM if (wc->wc_status != IBT_WC_SUCCESS) {
168*13101SVenki.Rajagopalan@Sun.COM EIB_INCR_COUNTER(&stats->st_ierrors);
169*13101SVenki.Rajagopalan@Sun.COM eib_data_err_comp(vnic, wqe, wc);
170*13101SVenki.Rajagopalan@Sun.COM } else {
171*13101SVenki.Rajagopalan@Sun.COM ipkts++;
172*13101SVenki.Rajagopalan@Sun.COM mp = eib_data_rx_comp(vnic, wqe, wc);
173*13101SVenki.Rajagopalan@Sun.COM if (mp == NULL) {
174*13101SVenki.Rajagopalan@Sun.COM continue;
175*13101SVenki.Rajagopalan@Sun.COM } else {
176*13101SVenki.Rajagopalan@Sun.COM /*
177*13101SVenki.Rajagopalan@Sun.COM * Add this mp to the list to
178*13101SVenki.Rajagopalan@Sun.COM * send it to the nw layer. Note
179*13101SVenki.Rajagopalan@Sun.COM * that the wqe could've been
180*13101SVenki.Rajagopalan@Sun.COM * returned to the pool if we're
181*13101SVenki.Rajagopalan@Sun.COM * running low, so don't process
182*13101SVenki.Rajagopalan@Sun.COM * wqe after this point.
183*13101SVenki.Rajagopalan@Sun.COM */
184*13101SVenki.Rajagopalan@Sun.COM if (head)
185*13101SVenki.Rajagopalan@Sun.COM tail->b_next = mp;
186*13101SVenki.Rajagopalan@Sun.COM else
187*13101SVenki.Rajagopalan@Sun.COM head = mp;
188*13101SVenki.Rajagopalan@Sun.COM tail = mp;
189*13101SVenki.Rajagopalan@Sun.COM }
190*13101SVenki.Rajagopalan@Sun.COM }
191*13101SVenki.Rajagopalan@Sun.COM }
192*13101SVenki.Rajagopalan@Sun.COM
193*13101SVenki.Rajagopalan@Sun.COM /*
194*13101SVenki.Rajagopalan@Sun.COM * We reduce the number of atomic updates to key statistics
195*13101SVenki.Rajagopalan@Sun.COM * by pooling them here, once per ibt_poll_cq(). The accuracy
196*13101SVenki.Rajagopalan@Sun.COM * and consistency of the published statistics within a cq
197*13101SVenki.Rajagopalan@Sun.COM * polling cycle will be compromised a little bit, but that
198*13101SVenki.Rajagopalan@Sun.COM * should be ok, given that we probably gain a little bit by
199*13101SVenki.Rajagopalan@Sun.COM * not having to do these atomic operations per packet.
200*13101SVenki.Rajagopalan@Sun.COM */
201*13101SVenki.Rajagopalan@Sun.COM EIB_UPDATE_COUNTER(&stats->st_rbytes, rbytes);
202*13101SVenki.Rajagopalan@Sun.COM EIB_UPDATE_COUNTER(&stats->st_ipkts, ipkts);
203*13101SVenki.Rajagopalan@Sun.COM
204*13101SVenki.Rajagopalan@Sun.COM pkts_per_call += ipkts;
205*13101SVenki.Rajagopalan@Sun.COM
206*13101SVenki.Rajagopalan@Sun.COM if (head) {
207*13101SVenki.Rajagopalan@Sun.COM mac_rx(ss->ei_mac_hdl, NULL, head);
208*13101SVenki.Rajagopalan@Sun.COM }
209*13101SVenki.Rajagopalan@Sun.COM
210*13101SVenki.Rajagopalan@Sun.COM /*
211*13101SVenki.Rajagopalan@Sun.COM * If we have processed too many packets in one attempt, we'll
212*13101SVenki.Rajagopalan@Sun.COM * have to come back here later.
213*13101SVenki.Rajagopalan@Sun.COM */
214*13101SVenki.Rajagopalan@Sun.COM if (pkts_per_call >= EIB_MAX_RX_PKTS_ONINTR) {
215*13101SVenki.Rajagopalan@Sun.COM (void) ddi_intr_trigger_softint(vnic->vn_data_rx_si_hdl,
216*13101SVenki.Rajagopalan@Sun.COM NULL);
217*13101SVenki.Rajagopalan@Sun.COM break;
218*13101SVenki.Rajagopalan@Sun.COM }
219*13101SVenki.Rajagopalan@Sun.COM
220*13101SVenki.Rajagopalan@Sun.COM num_wc -= polled;
221*13101SVenki.Rajagopalan@Sun.COM }
222*13101SVenki.Rajagopalan@Sun.COM
223*13101SVenki.Rajagopalan@Sun.COM return (DDI_INTR_CLAIMED);
224*13101SVenki.Rajagopalan@Sun.COM }
225*13101SVenki.Rajagopalan@Sun.COM
226*13101SVenki.Rajagopalan@Sun.COM /*ARGSUSED*/
227*13101SVenki.Rajagopalan@Sun.COM uint_t
eib_data_tx_comp_handler(caddr_t arg1,caddr_t arg2)228*13101SVenki.Rajagopalan@Sun.COM eib_data_tx_comp_handler(caddr_t arg1, caddr_t arg2)
229*13101SVenki.Rajagopalan@Sun.COM {
230*13101SVenki.Rajagopalan@Sun.COM eib_vnic_t *vnic = (eib_vnic_t *)(void *)arg1;
231*13101SVenki.Rajagopalan@Sun.COM eib_t *ss = vnic->vn_ss;
232*13101SVenki.Rajagopalan@Sun.COM eib_chan_t *chan = vnic->vn_data_chan;
233*13101SVenki.Rajagopalan@Sun.COM eib_stats_t *stats = ss->ei_stats;
234*13101SVenki.Rajagopalan@Sun.COM ibt_wc_t *wc;
235*13101SVenki.Rajagopalan@Sun.COM eib_wqe_t *wqe;
236*13101SVenki.Rajagopalan@Sun.COM ibt_status_t ret;
237*13101SVenki.Rajagopalan@Sun.COM uint_t polled;
238*13101SVenki.Rajagopalan@Sun.COM int i;
239*13101SVenki.Rajagopalan@Sun.COM
240*13101SVenki.Rajagopalan@Sun.COM /*
241*13101SVenki.Rajagopalan@Sun.COM * Re-arm the tx notification callback before we start polling
242*13101SVenki.Rajagopalan@Sun.COM * the completion queue. There's nothing much we can do if the
243*13101SVenki.Rajagopalan@Sun.COM * enable_cq_notify fails - we issue a warning and move on.
244*13101SVenki.Rajagopalan@Sun.COM */
245*13101SVenki.Rajagopalan@Sun.COM ret = ibt_enable_cq_notify(chan->ch_cq_hdl, IBT_NEXT_COMPLETION);
246*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
247*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_tx_comp_handler: "
248*13101SVenki.Rajagopalan@Sun.COM "ibt_enable_cq_notify() failed, ret=%d", ret);
249*13101SVenki.Rajagopalan@Sun.COM }
250*13101SVenki.Rajagopalan@Sun.COM
251*13101SVenki.Rajagopalan@Sun.COM /*
252*13101SVenki.Rajagopalan@Sun.COM * Handle tx completions
253*13101SVenki.Rajagopalan@Sun.COM */
254*13101SVenki.Rajagopalan@Sun.COM while ((ret = ibt_poll_cq(chan->ch_cq_hdl, chan->ch_wc, chan->ch_cq_sz,
255*13101SVenki.Rajagopalan@Sun.COM &polled)) == IBT_SUCCESS) {
256*13101SVenki.Rajagopalan@Sun.COM for (wc = chan->ch_wc, i = 0; i < polled; i++, wc++) {
257*13101SVenki.Rajagopalan@Sun.COM wqe = (eib_wqe_t *)(uintptr_t)wc->wc_id;
258*13101SVenki.Rajagopalan@Sun.COM
259*13101SVenki.Rajagopalan@Sun.COM ASSERT(EIB_WQE_TYPE(wqe->qe_info) == EIB_WQE_TX);
260*13101SVenki.Rajagopalan@Sun.COM
261*13101SVenki.Rajagopalan@Sun.COM if (wc->wc_status != IBT_WC_SUCCESS) {
262*13101SVenki.Rajagopalan@Sun.COM EIB_INCR_COUNTER(&stats->st_oerrors);
263*13101SVenki.Rajagopalan@Sun.COM eib_data_err_comp(vnic, wqe, wc);
264*13101SVenki.Rajagopalan@Sun.COM } else {
265*13101SVenki.Rajagopalan@Sun.COM eib_data_tx_comp(vnic, wqe, vnic->vn_data_chan);
266*13101SVenki.Rajagopalan@Sun.COM }
267*13101SVenki.Rajagopalan@Sun.COM }
268*13101SVenki.Rajagopalan@Sun.COM }
269*13101SVenki.Rajagopalan@Sun.COM
270*13101SVenki.Rajagopalan@Sun.COM return (DDI_INTR_CLAIMED);
271*13101SVenki.Rajagopalan@Sun.COM }
272*13101SVenki.Rajagopalan@Sun.COM
273*13101SVenki.Rajagopalan@Sun.COM void
eib_data_rx_recycle(caddr_t arg)274*13101SVenki.Rajagopalan@Sun.COM eib_data_rx_recycle(caddr_t arg)
275*13101SVenki.Rajagopalan@Sun.COM {
276*13101SVenki.Rajagopalan@Sun.COM eib_wqe_t *rwqe = (eib_wqe_t *)(void *)arg;
277*13101SVenki.Rajagopalan@Sun.COM eib_t *ss = rwqe->qe_pool->wp_ss;
278*13101SVenki.Rajagopalan@Sun.COM eib_chan_t *vn_chan;
279*13101SVenki.Rajagopalan@Sun.COM uint_t nic_state;
280*13101SVenki.Rajagopalan@Sun.COM int ret;
281*13101SVenki.Rajagopalan@Sun.COM
282*13101SVenki.Rajagopalan@Sun.COM /*
283*13101SVenki.Rajagopalan@Sun.COM * We come here from three places - (a) from the nw layer if the
284*13101SVenki.Rajagopalan@Sun.COM * rx mblk we handed to it has been done with and the nw layer is
285*13101SVenki.Rajagopalan@Sun.COM * calling the freemsg() (b) from eib_data_rx_comp() if the rx
286*13101SVenki.Rajagopalan@Sun.COM * completion processing discovers that the received EoIB packet
287*13101SVenki.Rajagopalan@Sun.COM * has a problem and (c) from eib_data_err_comp() if we're tearing
288*13101SVenki.Rajagopalan@Sun.COM * down this channel. We only need to repost the rwqe if we're
289*13101SVenki.Rajagopalan@Sun.COM * being called back from the nw layer. For the other two cases,
290*13101SVenki.Rajagopalan@Sun.COM * we'll simply return the rwqe to the pool. Also, since we would've
291*13101SVenki.Rajagopalan@Sun.COM * already updated the ch_rx_posted counters in the rx completion
292*13101SVenki.Rajagopalan@Sun.COM * handler, we don't pass the chan pointer to eib_rsrc_return_rwqe
293*13101SVenki.Rajagopalan@Sun.COM * from within this routine.
294*13101SVenki.Rajagopalan@Sun.COM */
295*13101SVenki.Rajagopalan@Sun.COM rwqe->qe_mp = NULL;
296*13101SVenki.Rajagopalan@Sun.COM if ((rwqe->qe_info & EIB_WQE_FLG_WITH_NW) == 0) {
297*13101SVenki.Rajagopalan@Sun.COM eib_rsrc_return_rwqe(ss, rwqe, NULL);
298*13101SVenki.Rajagopalan@Sun.COM return;
299*13101SVenki.Rajagopalan@Sun.COM }
300*13101SVenki.Rajagopalan@Sun.COM
301*13101SVenki.Rajagopalan@Sun.COM rwqe->qe_info &= (~EIB_WQE_FLG_WITH_NW);
302*13101SVenki.Rajagopalan@Sun.COM
303*13101SVenki.Rajagopalan@Sun.COM /*
304*13101SVenki.Rajagopalan@Sun.COM * If the buffers are being returned by nw layer after a long
305*13101SVenki.Rajagopalan@Sun.COM * time, this eoib instance could've even been stopped by now.
306*13101SVenki.Rajagopalan@Sun.COM * If so, simply return the rwqe to the pool.
307*13101SVenki.Rajagopalan@Sun.COM */
308*13101SVenki.Rajagopalan@Sun.COM nic_state = eib_mac_get_nic_state(ss);
309*13101SVenki.Rajagopalan@Sun.COM if ((nic_state & EIB_NIC_STARTED) != EIB_NIC_STARTED) {
310*13101SVenki.Rajagopalan@Sun.COM eib_rsrc_return_rwqe(ss, rwqe, NULL);
311*13101SVenki.Rajagopalan@Sun.COM return;
312*13101SVenki.Rajagopalan@Sun.COM }
313*13101SVenki.Rajagopalan@Sun.COM
314*13101SVenki.Rajagopalan@Sun.COM /*
315*13101SVenki.Rajagopalan@Sun.COM * Or it could've taken even longer, and the nic has even been
316*13101SVenki.Rajagopalan@Sun.COM * restarted. Only thing we can do is to make sure that the
317*13101SVenki.Rajagopalan@Sun.COM * original channel pointer we passed corresponds to what's in
318*13101SVenki.Rajagopalan@Sun.COM * the instance of the vnic currently.
319*13101SVenki.Rajagopalan@Sun.COM */
320*13101SVenki.Rajagopalan@Sun.COM vn_chan = eib_vnic_get_data_chan(ss, rwqe->qe_vnic_inst);
321*13101SVenki.Rajagopalan@Sun.COM if (vn_chan == NULL || vn_chan != rwqe->qe_chan) {
322*13101SVenki.Rajagopalan@Sun.COM eib_rsrc_return_rwqe(ss, rwqe, NULL);
323*13101SVenki.Rajagopalan@Sun.COM return;
324*13101SVenki.Rajagopalan@Sun.COM }
325*13101SVenki.Rajagopalan@Sun.COM
326*13101SVenki.Rajagopalan@Sun.COM /*
327*13101SVenki.Rajagopalan@Sun.COM * Try to repost the rwqe if we're not tearing down this channel
328*13101SVenki.Rajagopalan@Sun.COM */
329*13101SVenki.Rajagopalan@Sun.COM if (vn_chan->ch_tear_down) {
330*13101SVenki.Rajagopalan@Sun.COM eib_rsrc_return_rwqe(ss, rwqe, NULL);
331*13101SVenki.Rajagopalan@Sun.COM } else {
332*13101SVenki.Rajagopalan@Sun.COM ret = eib_chan_post_recv(ss, vn_chan, rwqe);
333*13101SVenki.Rajagopalan@Sun.COM if (ret != EIB_E_SUCCESS) {
334*13101SVenki.Rajagopalan@Sun.COM if (rwqe->qe_mp)
335*13101SVenki.Rajagopalan@Sun.COM freemsg(rwqe->qe_mp);
336*13101SVenki.Rajagopalan@Sun.COM else
337*13101SVenki.Rajagopalan@Sun.COM eib_rsrc_return_rwqe(ss, rwqe, NULL);
338*13101SVenki.Rajagopalan@Sun.COM }
339*13101SVenki.Rajagopalan@Sun.COM }
340*13101SVenki.Rajagopalan@Sun.COM }
341*13101SVenki.Rajagopalan@Sun.COM
342*13101SVenki.Rajagopalan@Sun.COM void
eib_data_post_tx(eib_vnic_t * vnic,eib_wqe_t * swqe)343*13101SVenki.Rajagopalan@Sun.COM eib_data_post_tx(eib_vnic_t *vnic, eib_wqe_t *swqe)
344*13101SVenki.Rajagopalan@Sun.COM {
345*13101SVenki.Rajagopalan@Sun.COM eib_chan_t *chan = vnic->vn_data_chan;
346*13101SVenki.Rajagopalan@Sun.COM eib_t *ss = vnic->vn_ss;
347*13101SVenki.Rajagopalan@Sun.COM eib_stats_t *stats = vnic->vn_ss->ei_stats;
348*13101SVenki.Rajagopalan@Sun.COM ibt_send_wr_t wrs[EIB_MAX_POST_MULTIPLE];
349*13101SVenki.Rajagopalan@Sun.COM eib_wqe_t *wqes[EIB_MAX_POST_MULTIPLE];
350*13101SVenki.Rajagopalan@Sun.COM eib_wqe_t *elem;
351*13101SVenki.Rajagopalan@Sun.COM ibt_status_t ret;
352*13101SVenki.Rajagopalan@Sun.COM uint_t n_wrs;
353*13101SVenki.Rajagopalan@Sun.COM uint_t n_posted;
354*13101SVenki.Rajagopalan@Sun.COM uint_t total_failed = 0;
355*13101SVenki.Rajagopalan@Sun.COM uint_t n_failed = 0;
356*13101SVenki.Rajagopalan@Sun.COM uint_t i;
357*13101SVenki.Rajagopalan@Sun.COM
358*13101SVenki.Rajagopalan@Sun.COM /*
359*13101SVenki.Rajagopalan@Sun.COM * See if we have room for this wqe and then add it to the
360*13101SVenki.Rajagopalan@Sun.COM * list of tx wrs to post in this channel.
361*13101SVenki.Rajagopalan@Sun.COM */
362*13101SVenki.Rajagopalan@Sun.COM mutex_enter(&chan->ch_tx_lock);
363*13101SVenki.Rajagopalan@Sun.COM
364*13101SVenki.Rajagopalan@Sun.COM if ((chan->ch_tx_posted + 1) >= (chan->ch_max_swqes - 1)) {
365*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_post_tx: "
366*13101SVenki.Rajagopalan@Sun.COM "too many swqes posted already, posted=0x%lx, "
367*13101SVenki.Rajagopalan@Sun.COM "max=0x%lx", chan->ch_tx_posted, chan->ch_max_swqes);
368*13101SVenki.Rajagopalan@Sun.COM mutex_exit(&chan->ch_tx_lock);
369*13101SVenki.Rajagopalan@Sun.COM return;
370*13101SVenki.Rajagopalan@Sun.COM }
371*13101SVenki.Rajagopalan@Sun.COM
372*13101SVenki.Rajagopalan@Sun.COM swqe->qe_nxt_post = NULL;
373*13101SVenki.Rajagopalan@Sun.COM if (chan->ch_tx) {
374*13101SVenki.Rajagopalan@Sun.COM chan->ch_tx_tail->qe_nxt_post = swqe;
375*13101SVenki.Rajagopalan@Sun.COM } else {
376*13101SVenki.Rajagopalan@Sun.COM chan->ch_tx = swqe;
377*13101SVenki.Rajagopalan@Sun.COM }
378*13101SVenki.Rajagopalan@Sun.COM chan->ch_tx_tail = swqe;
379*13101SVenki.Rajagopalan@Sun.COM chan->ch_tx_posted++; /* pre-increment */
380*13101SVenki.Rajagopalan@Sun.COM
381*13101SVenki.Rajagopalan@Sun.COM /*
382*13101SVenki.Rajagopalan@Sun.COM * If someone's already posting tx wqes in this channel, let
383*13101SVenki.Rajagopalan@Sun.COM * them post ours as well.
384*13101SVenki.Rajagopalan@Sun.COM */
385*13101SVenki.Rajagopalan@Sun.COM if (chan->ch_tx_busy == B_TRUE) {
386*13101SVenki.Rajagopalan@Sun.COM mutex_exit(&chan->ch_tx_lock);
387*13101SVenki.Rajagopalan@Sun.COM return;
388*13101SVenki.Rajagopalan@Sun.COM }
389*13101SVenki.Rajagopalan@Sun.COM chan->ch_tx_busy = B_TRUE;
390*13101SVenki.Rajagopalan@Sun.COM
391*13101SVenki.Rajagopalan@Sun.COM while (chan->ch_tx) {
392*13101SVenki.Rajagopalan@Sun.COM /*
393*13101SVenki.Rajagopalan@Sun.COM * Post EIB_MAX_POST_MULTIPLE wrs at a time
394*13101SVenki.Rajagopalan@Sun.COM */
395*13101SVenki.Rajagopalan@Sun.COM for (n_wrs = 0, elem = chan->ch_tx;
396*13101SVenki.Rajagopalan@Sun.COM (elem) && (n_wrs < EIB_MAX_POST_MULTIPLE);
397*13101SVenki.Rajagopalan@Sun.COM elem = elem->qe_nxt_post, n_wrs++) {
398*13101SVenki.Rajagopalan@Sun.COM wqes[n_wrs] = elem;
399*13101SVenki.Rajagopalan@Sun.COM wrs[n_wrs] = (elem->qe_wr).send;
400*13101SVenki.Rajagopalan@Sun.COM }
401*13101SVenki.Rajagopalan@Sun.COM chan->ch_tx = elem;
402*13101SVenki.Rajagopalan@Sun.COM if (elem == NULL) {
403*13101SVenki.Rajagopalan@Sun.COM chan->ch_tx_tail = NULL;
404*13101SVenki.Rajagopalan@Sun.COM }
405*13101SVenki.Rajagopalan@Sun.COM mutex_exit(&chan->ch_tx_lock);
406*13101SVenki.Rajagopalan@Sun.COM
407*13101SVenki.Rajagopalan@Sun.COM ASSERT(n_wrs != 0);
408*13101SVenki.Rajagopalan@Sun.COM
409*13101SVenki.Rajagopalan@Sun.COM /*
410*13101SVenki.Rajagopalan@Sun.COM * If multiple wrs posting fails for some reason, we'll try
411*13101SVenki.Rajagopalan@Sun.COM * posting the unposted ones one by one. If even that fails,
412*13101SVenki.Rajagopalan@Sun.COM * we'll release any mappings/buffers/mblks associated with
413*13101SVenki.Rajagopalan@Sun.COM * this wqe and return it to the pool.
414*13101SVenki.Rajagopalan@Sun.COM */
415*13101SVenki.Rajagopalan@Sun.COM n_posted = n_failed = 0;
416*13101SVenki.Rajagopalan@Sun.COM ret = ibt_post_send(chan->ch_chan, wrs, n_wrs, &n_posted);
417*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
418*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_post_tx: "
419*13101SVenki.Rajagopalan@Sun.COM "ibt_post_send(n_wrs=0x%lx, n_posted=0x%lx) "
420*13101SVenki.Rajagopalan@Sun.COM "failed, ret=%d", n_wrs, n_posted, ret);
421*13101SVenki.Rajagopalan@Sun.COM
422*13101SVenki.Rajagopalan@Sun.COM for (i = n_posted; i < n_wrs; i++) {
423*13101SVenki.Rajagopalan@Sun.COM ret = ibt_post_send(chan->ch_chan, &wrs[i],
424*13101SVenki.Rajagopalan@Sun.COM 1, NULL);
425*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
426*13101SVenki.Rajagopalan@Sun.COM n_failed++;
427*13101SVenki.Rajagopalan@Sun.COM eib_data_tx_comp(vnic, wqes[i], chan);
428*13101SVenki.Rajagopalan@Sun.COM
429*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance,
430*13101SVenki.Rajagopalan@Sun.COM "eib_data_post_tx: "
431*13101SVenki.Rajagopalan@Sun.COM "ibt_post_send(n_wrs=1) failed, "
432*13101SVenki.Rajagopalan@Sun.COM "ret=%d", ret);
433*13101SVenki.Rajagopalan@Sun.COM }
434*13101SVenki.Rajagopalan@Sun.COM }
435*13101SVenki.Rajagopalan@Sun.COM }
436*13101SVenki.Rajagopalan@Sun.COM total_failed += n_failed;
437*13101SVenki.Rajagopalan@Sun.COM
438*13101SVenki.Rajagopalan@Sun.COM mutex_enter(&chan->ch_tx_lock);
439*13101SVenki.Rajagopalan@Sun.COM }
440*13101SVenki.Rajagopalan@Sun.COM
441*13101SVenki.Rajagopalan@Sun.COM chan->ch_tx_busy = B_FALSE;
442*13101SVenki.Rajagopalan@Sun.COM mutex_exit(&chan->ch_tx_lock);
443*13101SVenki.Rajagopalan@Sun.COM
444*13101SVenki.Rajagopalan@Sun.COM /*
445*13101SVenki.Rajagopalan@Sun.COM * If we failed to post something, update error stats
446*13101SVenki.Rajagopalan@Sun.COM */
447*13101SVenki.Rajagopalan@Sun.COM if (total_failed) {
448*13101SVenki.Rajagopalan@Sun.COM EIB_UPDATE_COUNTER(&stats->st_oerrors, total_failed);
449*13101SVenki.Rajagopalan@Sun.COM }
450*13101SVenki.Rajagopalan@Sun.COM }
451*13101SVenki.Rajagopalan@Sun.COM
452*13101SVenki.Rajagopalan@Sun.COM void
eib_data_parse_ether_hdr(mblk_t * mp,eib_ether_hdr_t * evh)453*13101SVenki.Rajagopalan@Sun.COM eib_data_parse_ether_hdr(mblk_t *mp, eib_ether_hdr_t *evh)
454*13101SVenki.Rajagopalan@Sun.COM {
455*13101SVenki.Rajagopalan@Sun.COM struct ether_vlan_header *vl_hdr;
456*13101SVenki.Rajagopalan@Sun.COM struct ether_header *hdr;
457*13101SVenki.Rajagopalan@Sun.COM
458*13101SVenki.Rajagopalan@Sun.COM /*
459*13101SVenki.Rajagopalan@Sun.COM * Assume that the ether header (with or without vlan tag) is
460*13101SVenki.Rajagopalan@Sun.COM * contained in one fragment
461*13101SVenki.Rajagopalan@Sun.COM */
462*13101SVenki.Rajagopalan@Sun.COM hdr = (struct ether_header *)(void *)mp->b_rptr;
463*13101SVenki.Rajagopalan@Sun.COM vl_hdr = (struct ether_vlan_header *)(void *)mp->b_rptr;
464*13101SVenki.Rajagopalan@Sun.COM
465*13101SVenki.Rajagopalan@Sun.COM evh->eh_ether_type = ntohs(hdr->ether_type);
466*13101SVenki.Rajagopalan@Sun.COM if (evh->eh_ether_type != ETHERTYPE_VLAN) {
467*13101SVenki.Rajagopalan@Sun.COM evh->eh_tagless = 1;
468*13101SVenki.Rajagopalan@Sun.COM evh->eh_vlan = 0;
469*13101SVenki.Rajagopalan@Sun.COM ether_copy((void *)hdr->ether_dhost.ether_addr_octet,
470*13101SVenki.Rajagopalan@Sun.COM (void *)evh->eh_dmac);
471*13101SVenki.Rajagopalan@Sun.COM ether_copy((void *)hdr->ether_shost.ether_addr_octet,
472*13101SVenki.Rajagopalan@Sun.COM (void *)evh->eh_smac);
473*13101SVenki.Rajagopalan@Sun.COM } else {
474*13101SVenki.Rajagopalan@Sun.COM evh->eh_ether_type = ntohs(vl_hdr->ether_type);
475*13101SVenki.Rajagopalan@Sun.COM evh->eh_tagless = 0;
476*13101SVenki.Rajagopalan@Sun.COM evh->eh_vlan = VLAN_ID(ntohs(vl_hdr->ether_tci));
477*13101SVenki.Rajagopalan@Sun.COM ether_copy((void *)vl_hdr->ether_dhost.ether_addr_octet,
478*13101SVenki.Rajagopalan@Sun.COM (void *)evh->eh_dmac);
479*13101SVenki.Rajagopalan@Sun.COM ether_copy((void *)vl_hdr->ether_shost.ether_addr_octet,
480*13101SVenki.Rajagopalan@Sun.COM (void *)evh->eh_smac);
481*13101SVenki.Rajagopalan@Sun.COM }
482*13101SVenki.Rajagopalan@Sun.COM }
483*13101SVenki.Rajagopalan@Sun.COM
484*13101SVenki.Rajagopalan@Sun.COM int
eib_data_lookup_vnic(eib_t * ss,uint8_t * mac,uint16_t vlan,eib_vnic_t ** vnicp,boolean_t * failed)485*13101SVenki.Rajagopalan@Sun.COM eib_data_lookup_vnic(eib_t *ss, uint8_t *mac, uint16_t vlan, eib_vnic_t **vnicp,
486*13101SVenki.Rajagopalan@Sun.COM boolean_t *failed)
487*13101SVenki.Rajagopalan@Sun.COM {
488*13101SVenki.Rajagopalan@Sun.COM eib_vnic_t *vnic;
489*13101SVenki.Rajagopalan@Sun.COM eib_vnic_req_t *vrq;
490*13101SVenki.Rajagopalan@Sun.COM uint8_t *vn_mac;
491*13101SVenki.Rajagopalan@Sun.COM uint16_t vn_vlan;
492*13101SVenki.Rajagopalan@Sun.COM uint64_t av;
493*13101SVenki.Rajagopalan@Sun.COM int inst = 0;
494*13101SVenki.Rajagopalan@Sun.COM
495*13101SVenki.Rajagopalan@Sun.COM if (mac == NULL)
496*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_FAILURE);
497*13101SVenki.Rajagopalan@Sun.COM
498*13101SVenki.Rajagopalan@Sun.COM /*
499*13101SVenki.Rajagopalan@Sun.COM * For now, a simple search (but only what we've allocated). Note that
500*13101SVenki.Rajagopalan@Sun.COM * if we're in the process of creating a vnic, the instance might've
501*13101SVenki.Rajagopalan@Sun.COM * been allocated, but the vnic entry would be NULL.
502*13101SVenki.Rajagopalan@Sun.COM */
503*13101SVenki.Rajagopalan@Sun.COM mutex_enter(&ss->ei_vnic_lock);
504*13101SVenki.Rajagopalan@Sun.COM av = ss->ei_active_vnics;
505*13101SVenki.Rajagopalan@Sun.COM while ((inst = EIB_FIND_LSB_SET(av)) != -1) {
506*13101SVenki.Rajagopalan@Sun.COM if ((vnic = ss->ei_vnic[inst]) != NULL) {
507*13101SVenki.Rajagopalan@Sun.COM vn_mac = vnic->vn_login_data.ld_assigned_mac;
508*13101SVenki.Rajagopalan@Sun.COM vn_vlan = vnic->vn_login_data.ld_assigned_vlan;
509*13101SVenki.Rajagopalan@Sun.COM
510*13101SVenki.Rajagopalan@Sun.COM if ((vn_vlan == vlan) &&
511*13101SVenki.Rajagopalan@Sun.COM (bcmp(vn_mac, mac, ETHERADDRL) == 0)) {
512*13101SVenki.Rajagopalan@Sun.COM if (vnicp) {
513*13101SVenki.Rajagopalan@Sun.COM *vnicp = vnic;
514*13101SVenki.Rajagopalan@Sun.COM }
515*13101SVenki.Rajagopalan@Sun.COM mutex_exit(&ss->ei_vnic_lock);
516*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_SUCCESS);
517*13101SVenki.Rajagopalan@Sun.COM }
518*13101SVenki.Rajagopalan@Sun.COM }
519*13101SVenki.Rajagopalan@Sun.COM
520*13101SVenki.Rajagopalan@Sun.COM av &= (~((uint64_t)1 << inst));
521*13101SVenki.Rajagopalan@Sun.COM }
522*13101SVenki.Rajagopalan@Sun.COM mutex_exit(&ss->ei_vnic_lock);
523*13101SVenki.Rajagopalan@Sun.COM
524*13101SVenki.Rajagopalan@Sun.COM /*
525*13101SVenki.Rajagopalan@Sun.COM * If we haven't been able to locate a vnic for this {mac,vlan} tuple,
526*13101SVenki.Rajagopalan@Sun.COM * see if we've already failed a creation request for this vnic, and
527*13101SVenki.Rajagopalan@Sun.COM * return that information.
528*13101SVenki.Rajagopalan@Sun.COM */
529*13101SVenki.Rajagopalan@Sun.COM if (failed) {
530*13101SVenki.Rajagopalan@Sun.COM mutex_enter(&ss->ei_vnic_req_lock);
531*13101SVenki.Rajagopalan@Sun.COM *failed = B_FALSE;
532*13101SVenki.Rajagopalan@Sun.COM for (vrq = ss->ei_failed_vnic_req; vrq; vrq = vrq->vr_next) {
533*13101SVenki.Rajagopalan@Sun.COM if ((vrq->vr_vlan == vlan) &&
534*13101SVenki.Rajagopalan@Sun.COM (bcmp(vrq->vr_mac, mac, ETHERADDRL) == 0)) {
535*13101SVenki.Rajagopalan@Sun.COM *failed = B_TRUE;
536*13101SVenki.Rajagopalan@Sun.COM }
537*13101SVenki.Rajagopalan@Sun.COM }
538*13101SVenki.Rajagopalan@Sun.COM mutex_exit(&ss->ei_vnic_req_lock);
539*13101SVenki.Rajagopalan@Sun.COM }
540*13101SVenki.Rajagopalan@Sun.COM
541*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_FAILURE);
542*13101SVenki.Rajagopalan@Sun.COM }
543*13101SVenki.Rajagopalan@Sun.COM
544*13101SVenki.Rajagopalan@Sun.COM int
eib_data_prepare_frame(eib_vnic_t * vnic,eib_wqe_t * swqe,mblk_t * mp,eib_ether_hdr_t * evh)545*13101SVenki.Rajagopalan@Sun.COM eib_data_prepare_frame(eib_vnic_t *vnic, eib_wqe_t *swqe, mblk_t *mp,
546*13101SVenki.Rajagopalan@Sun.COM eib_ether_hdr_t *evh)
547*13101SVenki.Rajagopalan@Sun.COM {
548*13101SVenki.Rajagopalan@Sun.COM uint32_t mss;
549*13101SVenki.Rajagopalan@Sun.COM uint32_t lsoflags;
550*13101SVenki.Rajagopalan@Sun.COM uint32_t hckflags;
551*13101SVenki.Rajagopalan@Sun.COM
552*13101SVenki.Rajagopalan@Sun.COM /*
553*13101SVenki.Rajagopalan@Sun.COM * The swqe defaults are set to use the regular ud work request
554*13101SVenki.Rajagopalan@Sun.COM * member and the IBT_WRC_SEND opcode, so we don't need to do
555*13101SVenki.Rajagopalan@Sun.COM * anything here if this isn't an LSO packet.
556*13101SVenki.Rajagopalan@Sun.COM */
557*13101SVenki.Rajagopalan@Sun.COM mac_lso_get(mp, &mss, &lsoflags);
558*13101SVenki.Rajagopalan@Sun.COM if ((lsoflags & HW_LSO) == HW_LSO)
559*13101SVenki.Rajagopalan@Sun.COM eib_data_setup_lso(swqe, mp, mss, evh);
560*13101SVenki.Rajagopalan@Sun.COM
561*13101SVenki.Rajagopalan@Sun.COM mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &hckflags);
562*13101SVenki.Rajagopalan@Sun.COM if ((hckflags & HCK_FULLCKSUM) == HCK_FULLCKSUM) {
563*13101SVenki.Rajagopalan@Sun.COM swqe->qe_wr.send.wr_flags |= IBT_WR_SEND_CKSUM;
564*13101SVenki.Rajagopalan@Sun.COM } else {
565*13101SVenki.Rajagopalan@Sun.COM swqe->qe_wr.send.wr_flags &= (~IBT_WR_SEND_CKSUM);
566*13101SVenki.Rajagopalan@Sun.COM }
567*13101SVenki.Rajagopalan@Sun.COM
568*13101SVenki.Rajagopalan@Sun.COM if (eib_data_prepare_sgl(vnic, swqe, mp) != 0)
569*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_FAILURE);
570*13101SVenki.Rajagopalan@Sun.COM
571*13101SVenki.Rajagopalan@Sun.COM swqe->qe_mp = mp;
572*13101SVenki.Rajagopalan@Sun.COM
573*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_SUCCESS);
574*13101SVenki.Rajagopalan@Sun.COM }
575*13101SVenki.Rajagopalan@Sun.COM
576*13101SVenki.Rajagopalan@Sun.COM void
eib_rb_data_create_qp(eib_t * ss,eib_vnic_t * vnic)577*13101SVenki.Rajagopalan@Sun.COM eib_rb_data_create_qp(eib_t *ss, eib_vnic_t *vnic)
578*13101SVenki.Rajagopalan@Sun.COM {
579*13101SVenki.Rajagopalan@Sun.COM eib_rb_data_setup_ud_channel(ss, vnic);
580*13101SVenki.Rajagopalan@Sun.COM
581*13101SVenki.Rajagopalan@Sun.COM eib_rb_data_setup_cqs(ss, vnic);
582*13101SVenki.Rajagopalan@Sun.COM
583*13101SVenki.Rajagopalan@Sun.COM eib_chan_fini(vnic->vn_data_chan);
584*13101SVenki.Rajagopalan@Sun.COM vnic->vn_data_chan = NULL;
585*13101SVenki.Rajagopalan@Sun.COM }
586*13101SVenki.Rajagopalan@Sun.COM
587*13101SVenki.Rajagopalan@Sun.COM static int
eib_data_setup_cqs(eib_t * ss,eib_vnic_t * vnic)588*13101SVenki.Rajagopalan@Sun.COM eib_data_setup_cqs(eib_t *ss, eib_vnic_t *vnic)
589*13101SVenki.Rajagopalan@Sun.COM {
590*13101SVenki.Rajagopalan@Sun.COM eib_chan_t *chan = vnic->vn_data_chan;
591*13101SVenki.Rajagopalan@Sun.COM ibt_cq_attr_t cq_attr;
592*13101SVenki.Rajagopalan@Sun.COM ibt_status_t ret;
593*13101SVenki.Rajagopalan@Sun.COM uint_t snd_sz;
594*13101SVenki.Rajagopalan@Sun.COM uint_t rcv_sz;
595*13101SVenki.Rajagopalan@Sun.COM int rv;
596*13101SVenki.Rajagopalan@Sun.COM
597*13101SVenki.Rajagopalan@Sun.COM /*
598*13101SVenki.Rajagopalan@Sun.COM * Allocate send completion queue. Note that we've already verified
599*13101SVenki.Rajagopalan@Sun.COM * that cp_max_swqe and cp_max_rwqe meet the max cq size requirements
600*13101SVenki.Rajagopalan@Sun.COM * of the hca.
601*13101SVenki.Rajagopalan@Sun.COM */
602*13101SVenki.Rajagopalan@Sun.COM cq_attr.cq_sched = NULL;
603*13101SVenki.Rajagopalan@Sun.COM cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
604*13101SVenki.Rajagopalan@Sun.COM cq_attr.cq_size = ss->ei_caps->cp_max_swqe + 1;
605*13101SVenki.Rajagopalan@Sun.COM
606*13101SVenki.Rajagopalan@Sun.COM ret = ibt_alloc_cq(ss->ei_hca_hdl, &cq_attr, &chan->ch_cq_hdl, &snd_sz);
607*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
608*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_cqs: "
609*13101SVenki.Rajagopalan@Sun.COM "ibt_alloc_cq(snd_cq_sz=0x%lx) failed, ret=%d",
610*13101SVenki.Rajagopalan@Sun.COM cq_attr.cq_size, ret);
611*13101SVenki.Rajagopalan@Sun.COM goto setup_data_cqs_fail;
612*13101SVenki.Rajagopalan@Sun.COM }
613*13101SVenki.Rajagopalan@Sun.COM ret = ibt_modify_cq(chan->ch_cq_hdl, EIB_TX_COMP_COUNT,
614*13101SVenki.Rajagopalan@Sun.COM EIB_TX_COMP_USEC, 0);
615*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
616*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_setup_cqs: "
617*13101SVenki.Rajagopalan@Sun.COM "ibt_modify_cq(snd_comp_count=0x%lx, snd_comp_usec=0x%lx) "
618*13101SVenki.Rajagopalan@Sun.COM "failed, ret=%d",
619*13101SVenki.Rajagopalan@Sun.COM EIB_TX_COMP_COUNT, EIB_TX_COMP_USEC, ret);
620*13101SVenki.Rajagopalan@Sun.COM }
621*13101SVenki.Rajagopalan@Sun.COM
622*13101SVenki.Rajagopalan@Sun.COM /*
623*13101SVenki.Rajagopalan@Sun.COM * Allocate receive completion queue
624*13101SVenki.Rajagopalan@Sun.COM */
625*13101SVenki.Rajagopalan@Sun.COM cq_attr.cq_sched = NULL;
626*13101SVenki.Rajagopalan@Sun.COM cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
627*13101SVenki.Rajagopalan@Sun.COM cq_attr.cq_size = ss->ei_caps->cp_max_rwqe + 1;
628*13101SVenki.Rajagopalan@Sun.COM
629*13101SVenki.Rajagopalan@Sun.COM ret = ibt_alloc_cq(ss->ei_hca_hdl, &cq_attr, &chan->ch_rcv_cq_hdl,
630*13101SVenki.Rajagopalan@Sun.COM &rcv_sz);
631*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
632*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_cqs: "
633*13101SVenki.Rajagopalan@Sun.COM "ibt_alloc_cq(rcv_cq_sz=0x%lx) failed, ret=%d",
634*13101SVenki.Rajagopalan@Sun.COM cq_attr.cq_size, ret);
635*13101SVenki.Rajagopalan@Sun.COM goto setup_data_cqs_fail;
636*13101SVenki.Rajagopalan@Sun.COM }
637*13101SVenki.Rajagopalan@Sun.COM ret = ibt_modify_cq(chan->ch_rcv_cq_hdl, EIB_RX_COMP_COUNT,
638*13101SVenki.Rajagopalan@Sun.COM EIB_RX_COMP_USEC, 0);
639*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
640*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_setup_cqs: "
641*13101SVenki.Rajagopalan@Sun.COM "ibt_modify_cq(rcv_comp_count=0x%lx, rcv_comp_usec=0x%lx) "
642*13101SVenki.Rajagopalan@Sun.COM "failed, ret=%d",
643*13101SVenki.Rajagopalan@Sun.COM EIB_RX_COMP_COUNT, EIB_RX_COMP_USEC, ret);
644*13101SVenki.Rajagopalan@Sun.COM }
645*13101SVenki.Rajagopalan@Sun.COM
646*13101SVenki.Rajagopalan@Sun.COM /*
647*13101SVenki.Rajagopalan@Sun.COM * Set up parameters for collecting tx and rx completion information
648*13101SVenki.Rajagopalan@Sun.COM */
649*13101SVenki.Rajagopalan@Sun.COM chan->ch_cq_sz = snd_sz;
650*13101SVenki.Rajagopalan@Sun.COM chan->ch_wc = kmem_zalloc(sizeof (ibt_wc_t) * snd_sz, KM_SLEEP);
651*13101SVenki.Rajagopalan@Sun.COM chan->ch_rcv_cq_sz = rcv_sz;
652*13101SVenki.Rajagopalan@Sun.COM chan->ch_rcv_wc = kmem_zalloc(sizeof (ibt_wc_t) * rcv_sz, KM_SLEEP);
653*13101SVenki.Rajagopalan@Sun.COM
654*13101SVenki.Rajagopalan@Sun.COM /*
655*13101SVenki.Rajagopalan@Sun.COM * Set up the vnic's data tx completion queue handler and allocate
656*13101SVenki.Rajagopalan@Sun.COM * a softint for it as well.
657*13101SVenki.Rajagopalan@Sun.COM */
658*13101SVenki.Rajagopalan@Sun.COM if ((rv = ddi_intr_add_softint(ss->ei_dip, &vnic->vn_data_tx_si_hdl,
659*13101SVenki.Rajagopalan@Sun.COM EIB_SOFTPRI_DATA, eib_data_tx_comp_handler, vnic)) != DDI_SUCCESS) {
660*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_cqs: "
661*13101SVenki.Rajagopalan@Sun.COM "ddi_intr_add_softint() failed for data tx qp, ret=%d", rv);
662*13101SVenki.Rajagopalan@Sun.COM goto setup_data_cqs_fail;
663*13101SVenki.Rajagopalan@Sun.COM }
664*13101SVenki.Rajagopalan@Sun.COM ibt_set_cq_handler(chan->ch_cq_hdl, eib_data_tx_comp_intr, vnic);
665*13101SVenki.Rajagopalan@Sun.COM ret = ibt_enable_cq_notify(chan->ch_cq_hdl, IBT_NEXT_COMPLETION);
666*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
667*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_cqs: "
668*13101SVenki.Rajagopalan@Sun.COM "ibt_enable_cq_notify() failed for tx cq, ret=%d", ret);
669*13101SVenki.Rajagopalan@Sun.COM goto setup_data_cqs_fail;
670*13101SVenki.Rajagopalan@Sun.COM }
671*13101SVenki.Rajagopalan@Sun.COM
672*13101SVenki.Rajagopalan@Sun.COM /*
673*13101SVenki.Rajagopalan@Sun.COM * And then the data rx completion queue handler
674*13101SVenki.Rajagopalan@Sun.COM */
675*13101SVenki.Rajagopalan@Sun.COM if ((rv = ddi_intr_add_softint(ss->ei_dip, &vnic->vn_data_rx_si_hdl,
676*13101SVenki.Rajagopalan@Sun.COM EIB_SOFTPRI_DATA, eib_data_rx_comp_handler, vnic)) != DDI_SUCCESS) {
677*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_cqs: "
678*13101SVenki.Rajagopalan@Sun.COM "ddi_intr_add_softint() failed for data rx qp, ret=%d", rv);
679*13101SVenki.Rajagopalan@Sun.COM goto setup_data_cqs_fail;
680*13101SVenki.Rajagopalan@Sun.COM }
681*13101SVenki.Rajagopalan@Sun.COM ibt_set_cq_handler(chan->ch_rcv_cq_hdl, eib_data_rx_comp_intr, vnic);
682*13101SVenki.Rajagopalan@Sun.COM ret = ibt_enable_cq_notify(chan->ch_rcv_cq_hdl, IBT_NEXT_COMPLETION);
683*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
684*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_cqs: "
685*13101SVenki.Rajagopalan@Sun.COM "ibt_enable_cq_notify() failed for rx cq, ret=%d", ret);
686*13101SVenki.Rajagopalan@Sun.COM goto setup_data_cqs_fail;
687*13101SVenki.Rajagopalan@Sun.COM }
688*13101SVenki.Rajagopalan@Sun.COM
689*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_SUCCESS);
690*13101SVenki.Rajagopalan@Sun.COM
691*13101SVenki.Rajagopalan@Sun.COM setup_data_cqs_fail:
692*13101SVenki.Rajagopalan@Sun.COM eib_rb_data_setup_cqs(ss, vnic);
693*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_FAILURE);
694*13101SVenki.Rajagopalan@Sun.COM }
695*13101SVenki.Rajagopalan@Sun.COM
696*13101SVenki.Rajagopalan@Sun.COM static int
eib_data_setup_ud_channel(eib_t * ss,eib_vnic_t * vnic)697*13101SVenki.Rajagopalan@Sun.COM eib_data_setup_ud_channel(eib_t *ss, eib_vnic_t *vnic)
698*13101SVenki.Rajagopalan@Sun.COM {
699*13101SVenki.Rajagopalan@Sun.COM eib_chan_t *chan = vnic->vn_data_chan;
700*13101SVenki.Rajagopalan@Sun.COM ibt_ud_chan_alloc_args_t alloc_attr;
701*13101SVenki.Rajagopalan@Sun.COM ibt_ud_chan_query_attr_t query_attr;
702*13101SVenki.Rajagopalan@Sun.COM ibt_status_t ret;
703*13101SVenki.Rajagopalan@Sun.COM
704*13101SVenki.Rajagopalan@Sun.COM bzero(&alloc_attr, sizeof (ibt_ud_chan_alloc_args_t));
705*13101SVenki.Rajagopalan@Sun.COM bzero(&query_attr, sizeof (ibt_ud_chan_query_attr_t));
706*13101SVenki.Rajagopalan@Sun.COM
707*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_flags = IBT_ALL_SIGNALED;
708*13101SVenki.Rajagopalan@Sun.COM if (ss->ei_caps->cp_resv_lkey_capab)
709*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_flags |= IBT_FAST_REG_RES_LKEY;
710*13101SVenki.Rajagopalan@Sun.COM if (ss->ei_caps->cp_lso_maxlen)
711*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_flags |= IBT_USES_LSO;
712*13101SVenki.Rajagopalan@Sun.COM
713*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_hca_port_num = ss->ei_props->ep_port_num;
714*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_pkey_ix = chan->ch_pkey_ix;
715*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_sizes.cs_sq = ss->ei_caps->cp_max_swqe;
716*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_sizes.cs_rq = ss->ei_caps->cp_max_rwqe;
717*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_sizes.cs_sq_sgl = ss->ei_caps->cp_max_sgl;
718*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_sizes.cs_rq_sgl = 1;
719*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_sizes.cs_inline = 0;
720*13101SVenki.Rajagopalan@Sun.COM
721*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_qkey = EIB_DATA_QKEY;
722*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_scq = chan->ch_cq_hdl;
723*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_rcq = chan->ch_rcv_cq_hdl;
724*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_pd = ss->ei_pd_hdl;
725*13101SVenki.Rajagopalan@Sun.COM
726*13101SVenki.Rajagopalan@Sun.COM ret = ibt_alloc_ud_channel(ss->ei_hca_hdl, IBT_ACHAN_NO_FLAGS,
727*13101SVenki.Rajagopalan@Sun.COM &alloc_attr, &chan->ch_chan, NULL);
728*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
729*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_ud_channel: "
730*13101SVenki.Rajagopalan@Sun.COM "ibt_alloc_ud_channel(port=0x%x, pkey_ix=0x%x, "
731*13101SVenki.Rajagopalan@Sun.COM "cs_sq=0x%lx, cs_rq=0x%lx, sq_sgl=0x%lx) failed, ret=%d",
732*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_hca_port_num, chan->ch_pkey_ix,
733*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_sizes.cs_sq, alloc_attr.ud_sizes.cs_rq,
734*13101SVenki.Rajagopalan@Sun.COM alloc_attr.ud_sizes.cs_sq_sgl, ret);
735*13101SVenki.Rajagopalan@Sun.COM
736*13101SVenki.Rajagopalan@Sun.COM goto setup_data_ud_channel_fail;
737*13101SVenki.Rajagopalan@Sun.COM }
738*13101SVenki.Rajagopalan@Sun.COM
739*13101SVenki.Rajagopalan@Sun.COM ret = ibt_query_ud_channel(chan->ch_chan, &query_attr);
740*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
741*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_ud_channel: "
742*13101SVenki.Rajagopalan@Sun.COM "ibt_query_ud_channel() failed, ret=%d", ret);
743*13101SVenki.Rajagopalan@Sun.COM goto setup_data_ud_channel_fail;
744*13101SVenki.Rajagopalan@Sun.COM }
745*13101SVenki.Rajagopalan@Sun.COM
746*13101SVenki.Rajagopalan@Sun.COM chan->ch_qpn = query_attr.ud_qpn;
747*13101SVenki.Rajagopalan@Sun.COM chan->ch_max_swqes = query_attr.ud_chan_sizes.cs_sq;
748*13101SVenki.Rajagopalan@Sun.COM chan->ch_max_rwqes = query_attr.ud_chan_sizes.cs_rq;
749*13101SVenki.Rajagopalan@Sun.COM chan->ch_lwm_rwqes = chan->ch_max_rwqes >> 2;
750*13101SVenki.Rajagopalan@Sun.COM chan->ch_rwqe_bktsz = (chan->ch_max_rwqes < EIB_DATA_RWQE_BKT) ?
751*13101SVenki.Rajagopalan@Sun.COM chan->ch_max_rwqes : EIB_DATA_RWQE_BKT;
752*13101SVenki.Rajagopalan@Sun.COM chan->ch_ip_hdr_align = EIB_IP_HDR_ALIGN;
753*13101SVenki.Rajagopalan@Sun.COM chan->ch_alloc_mp = B_TRUE;
754*13101SVenki.Rajagopalan@Sun.COM chan->ch_tear_down = B_FALSE;
755*13101SVenki.Rajagopalan@Sun.COM
756*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_SUCCESS);
757*13101SVenki.Rajagopalan@Sun.COM
758*13101SVenki.Rajagopalan@Sun.COM setup_data_ud_channel_fail:
759*13101SVenki.Rajagopalan@Sun.COM eib_rb_data_setup_ud_channel(ss, vnic);
760*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_FAILURE);
761*13101SVenki.Rajagopalan@Sun.COM }
762*13101SVenki.Rajagopalan@Sun.COM
763*13101SVenki.Rajagopalan@Sun.COM static void
eib_data_setup_lso(eib_wqe_t * swqe,mblk_t * mp,uint32_t mss,eib_ether_hdr_t * evh)764*13101SVenki.Rajagopalan@Sun.COM eib_data_setup_lso(eib_wqe_t *swqe, mblk_t *mp, uint32_t mss,
765*13101SVenki.Rajagopalan@Sun.COM eib_ether_hdr_t *evh)
766*13101SVenki.Rajagopalan@Sun.COM {
767*13101SVenki.Rajagopalan@Sun.COM ibt_wr_lso_t *lso;
768*13101SVenki.Rajagopalan@Sun.COM mblk_t *nmp;
769*13101SVenki.Rajagopalan@Sun.COM uint8_t *dst;
770*13101SVenki.Rajagopalan@Sun.COM uintptr_t ip_start;
771*13101SVenki.Rajagopalan@Sun.COM uintptr_t tcp_start;
772*13101SVenki.Rajagopalan@Sun.COM uint_t pending;
773*13101SVenki.Rajagopalan@Sun.COM uint_t mblen;
774*13101SVenki.Rajagopalan@Sun.COM uint_t eth_hdr_len;
775*13101SVenki.Rajagopalan@Sun.COM uint_t ip_hdr_len;
776*13101SVenki.Rajagopalan@Sun.COM uint_t tcp_hdr_len;
777*13101SVenki.Rajagopalan@Sun.COM
778*13101SVenki.Rajagopalan@Sun.COM /*
779*13101SVenki.Rajagopalan@Sun.COM * When the swqe was grabbed, it would've had its wr_opcode and
780*13101SVenki.Rajagopalan@Sun.COM * wr.ud.udwr_dest set to default values. Since we're now going
781*13101SVenki.Rajagopalan@Sun.COM * to use LSO, we need to change these.
782*13101SVenki.Rajagopalan@Sun.COM */
783*13101SVenki.Rajagopalan@Sun.COM swqe->qe_wr.send.wr_opcode = IBT_WRC_SEND_LSO;
784*13101SVenki.Rajagopalan@Sun.COM lso = &(swqe->qe_wr.send.wr.ud_lso);
785*13101SVenki.Rajagopalan@Sun.COM lso->lso_ud_dest = swqe->qe_dest;
786*13101SVenki.Rajagopalan@Sun.COM lso->lso_mss = mss;
787*13101SVenki.Rajagopalan@Sun.COM
788*13101SVenki.Rajagopalan@Sun.COM /*
789*13101SVenki.Rajagopalan@Sun.COM * Details on the ethernet header in the mp is already known to us
790*13101SVenki.Rajagopalan@Sun.COM */
791*13101SVenki.Rajagopalan@Sun.COM eth_hdr_len = (evh->eh_tagless) ? (sizeof (struct ether_header)) :
792*13101SVenki.Rajagopalan@Sun.COM (sizeof (struct ether_vlan_header));
793*13101SVenki.Rajagopalan@Sun.COM
794*13101SVenki.Rajagopalan@Sun.COM /*
795*13101SVenki.Rajagopalan@Sun.COM * Calculate the LSO header size and set it in the UD LSO structure.
796*13101SVenki.Rajagopalan@Sun.COM * Note that the only assumption we make is that each of the Ethernet,
797*13101SVenki.Rajagopalan@Sun.COM * IP and TCP headers will be contained in a single mblk fragment;
798*13101SVenki.Rajagopalan@Sun.COM * together, the headers may span multiple mblk fragments. Note also
799*13101SVenki.Rajagopalan@Sun.COM * that since the EoIB encapsulation header is not part of the message
800*13101SVenki.Rajagopalan@Sun.COM * block we receive, we'll need to account space for inserting it later.
801*13101SVenki.Rajagopalan@Sun.COM */
802*13101SVenki.Rajagopalan@Sun.COM nmp = mp;
803*13101SVenki.Rajagopalan@Sun.COM ip_start = (uintptr_t)(nmp->b_rptr) + eth_hdr_len;
804*13101SVenki.Rajagopalan@Sun.COM if (ip_start >= (uintptr_t)(nmp->b_wptr)) {
805*13101SVenki.Rajagopalan@Sun.COM ip_start = (uintptr_t)nmp->b_cont->b_rptr
806*13101SVenki.Rajagopalan@Sun.COM + (ip_start - (uintptr_t)(nmp->b_wptr));
807*13101SVenki.Rajagopalan@Sun.COM nmp = nmp->b_cont;
808*13101SVenki.Rajagopalan@Sun.COM }
809*13101SVenki.Rajagopalan@Sun.COM ip_hdr_len = IPH_HDR_LENGTH((ipha_t *)ip_start);
810*13101SVenki.Rajagopalan@Sun.COM
811*13101SVenki.Rajagopalan@Sun.COM tcp_start = ip_start + ip_hdr_len;
812*13101SVenki.Rajagopalan@Sun.COM if (tcp_start >= (uintptr_t)(nmp->b_wptr)) {
813*13101SVenki.Rajagopalan@Sun.COM tcp_start = (uintptr_t)nmp->b_cont->b_rptr
814*13101SVenki.Rajagopalan@Sun.COM + (tcp_start - (uintptr_t)(nmp->b_wptr));
815*13101SVenki.Rajagopalan@Sun.COM nmp = nmp->b_cont;
816*13101SVenki.Rajagopalan@Sun.COM }
817*13101SVenki.Rajagopalan@Sun.COM tcp_hdr_len = TCP_HDR_LENGTH((tcph_t *)tcp_start);
818*13101SVenki.Rajagopalan@Sun.COM
819*13101SVenki.Rajagopalan@Sun.COM /*
820*13101SVenki.Rajagopalan@Sun.COM * Since the passed mp fragment never contains the EoIB encapsulation
821*13101SVenki.Rajagopalan@Sun.COM * header, we always have to copy the lso header. Sigh.
822*13101SVenki.Rajagopalan@Sun.COM */
823*13101SVenki.Rajagopalan@Sun.COM lso->lso_hdr = swqe->qe_payload_hdr;
824*13101SVenki.Rajagopalan@Sun.COM lso->lso_hdr_sz = EIB_ENCAP_HDR_SZ + eth_hdr_len +
825*13101SVenki.Rajagopalan@Sun.COM ip_hdr_len + tcp_hdr_len;
826*13101SVenki.Rajagopalan@Sun.COM
827*13101SVenki.Rajagopalan@Sun.COM /*
828*13101SVenki.Rajagopalan@Sun.COM * We already have the EoIB encapsulation header written at the
829*13101SVenki.Rajagopalan@Sun.COM * start of wqe->qe_payload_hdr during swqe acquisition. Only
830*13101SVenki.Rajagopalan@Sun.COM * copy the remaining headers.
831*13101SVenki.Rajagopalan@Sun.COM */
832*13101SVenki.Rajagopalan@Sun.COM dst = lso->lso_hdr + EIB_ENCAP_HDR_SZ;
833*13101SVenki.Rajagopalan@Sun.COM pending = lso->lso_hdr_sz - EIB_ENCAP_HDR_SZ;
834*13101SVenki.Rajagopalan@Sun.COM
835*13101SVenki.Rajagopalan@Sun.COM for (nmp = mp; nmp && pending; nmp = nmp->b_cont) {
836*13101SVenki.Rajagopalan@Sun.COM mblen = MBLKL(nmp);
837*13101SVenki.Rajagopalan@Sun.COM if (pending > mblen) {
838*13101SVenki.Rajagopalan@Sun.COM bcopy(nmp->b_rptr, dst, mblen);
839*13101SVenki.Rajagopalan@Sun.COM dst += mblen;
840*13101SVenki.Rajagopalan@Sun.COM pending -= mblen;
841*13101SVenki.Rajagopalan@Sun.COM } else {
842*13101SVenki.Rajagopalan@Sun.COM bcopy(nmp->b_rptr, dst, pending);
843*13101SVenki.Rajagopalan@Sun.COM break;
844*13101SVenki.Rajagopalan@Sun.COM }
845*13101SVenki.Rajagopalan@Sun.COM }
846*13101SVenki.Rajagopalan@Sun.COM }
847*13101SVenki.Rajagopalan@Sun.COM
848*13101SVenki.Rajagopalan@Sun.COM static int
eib_data_prepare_sgl(eib_vnic_t * vnic,eib_wqe_t * swqe,mblk_t * mp)849*13101SVenki.Rajagopalan@Sun.COM eib_data_prepare_sgl(eib_vnic_t *vnic, eib_wqe_t *swqe, mblk_t *mp)
850*13101SVenki.Rajagopalan@Sun.COM {
851*13101SVenki.Rajagopalan@Sun.COM eib_t *ss = vnic->vn_ss;
852*13101SVenki.Rajagopalan@Sun.COM eib_stats_t *stats = vnic->vn_ss->ei_stats;
853*13101SVenki.Rajagopalan@Sun.COM ibt_iov_t iov_arr[EIB_MAX_SGL];
854*13101SVenki.Rajagopalan@Sun.COM ibt_iov_attr_t iov_attr;
855*13101SVenki.Rajagopalan@Sun.COM ibt_wr_ds_t *sgl;
856*13101SVenki.Rajagopalan@Sun.COM ibt_status_t ret;
857*13101SVenki.Rajagopalan@Sun.COM mblk_t *nmp;
858*13101SVenki.Rajagopalan@Sun.COM mblk_t *data_mp;
859*13101SVenki.Rajagopalan@Sun.COM uchar_t *bufp;
860*13101SVenki.Rajagopalan@Sun.COM size_t blksize;
861*13101SVenki.Rajagopalan@Sun.COM size_t skip;
862*13101SVenki.Rajagopalan@Sun.COM size_t avail;
863*13101SVenki.Rajagopalan@Sun.COM uint_t lsohdr_sz;
864*13101SVenki.Rajagopalan@Sun.COM uint_t pktsz;
865*13101SVenki.Rajagopalan@Sun.COM ptrdiff_t frag_len;
866*13101SVenki.Rajagopalan@Sun.COM uint_t pending_hdr;
867*13101SVenki.Rajagopalan@Sun.COM uint_t nblks;
868*13101SVenki.Rajagopalan@Sun.COM uint_t i;
869*13101SVenki.Rajagopalan@Sun.COM
870*13101SVenki.Rajagopalan@Sun.COM /*
871*13101SVenki.Rajagopalan@Sun.COM * Let's skip ahead to the TCP data if this is LSO. Note that while
872*13101SVenki.Rajagopalan@Sun.COM * the lso header size in the swqe includes the EoIB encapsulation
873*13101SVenki.Rajagopalan@Sun.COM * header size, that encapsulation header itself won't be found in
874*13101SVenki.Rajagopalan@Sun.COM * the mblk.
875*13101SVenki.Rajagopalan@Sun.COM */
876*13101SVenki.Rajagopalan@Sun.COM lsohdr_sz = (swqe->qe_wr.send.wr_opcode == IBT_WRC_SEND) ? 0 :
877*13101SVenki.Rajagopalan@Sun.COM swqe->qe_wr.send.wr.ud_lso.lso_hdr_sz;
878*13101SVenki.Rajagopalan@Sun.COM
879*13101SVenki.Rajagopalan@Sun.COM data_mp = mp;
880*13101SVenki.Rajagopalan@Sun.COM pending_hdr = 0;
881*13101SVenki.Rajagopalan@Sun.COM if (lsohdr_sz) {
882*13101SVenki.Rajagopalan@Sun.COM pending_hdr = lsohdr_sz - EIB_ENCAP_HDR_SZ;
883*13101SVenki.Rajagopalan@Sun.COM for (nmp = mp; nmp; nmp = nmp->b_cont) {
884*13101SVenki.Rajagopalan@Sun.COM frag_len =
885*13101SVenki.Rajagopalan@Sun.COM (uintptr_t)nmp->b_wptr - (uintptr_t)nmp->b_rptr;
886*13101SVenki.Rajagopalan@Sun.COM if (frag_len > pending_hdr)
887*13101SVenki.Rajagopalan@Sun.COM break;
888*13101SVenki.Rajagopalan@Sun.COM pending_hdr -= frag_len;
889*13101SVenki.Rajagopalan@Sun.COM }
890*13101SVenki.Rajagopalan@Sun.COM data_mp = nmp; /* start of data past lso header */
891*13101SVenki.Rajagopalan@Sun.COM ASSERT(data_mp != NULL);
892*13101SVenki.Rajagopalan@Sun.COM }
893*13101SVenki.Rajagopalan@Sun.COM
894*13101SVenki.Rajagopalan@Sun.COM /*
895*13101SVenki.Rajagopalan@Sun.COM * If this is an LSO packet, we want pktsz to hold the size of the
896*13101SVenki.Rajagopalan@Sun.COM * data following the eoib/ethernet/tcp/ip headers. If this is a
897*13101SVenki.Rajagopalan@Sun.COM * non-LSO packet, we want pktsz to refer to the size of the entire
898*13101SVenki.Rajagopalan@Sun.COM * packet with all the headers, and nblks to hold the number of
899*13101SVenki.Rajagopalan@Sun.COM * mappings we'll need to iov map this (for reserved lkey request).
900*13101SVenki.Rajagopalan@Sun.COM */
901*13101SVenki.Rajagopalan@Sun.COM if (lsohdr_sz == 0) {
902*13101SVenki.Rajagopalan@Sun.COM nblks = 1;
903*13101SVenki.Rajagopalan@Sun.COM pktsz = EIB_ENCAP_HDR_SZ;
904*13101SVenki.Rajagopalan@Sun.COM } else {
905*13101SVenki.Rajagopalan@Sun.COM nblks = 0;
906*13101SVenki.Rajagopalan@Sun.COM pktsz = 0;
907*13101SVenki.Rajagopalan@Sun.COM }
908*13101SVenki.Rajagopalan@Sun.COM for (nmp = data_mp; nmp != NULL; nmp = nmp->b_cont) {
909*13101SVenki.Rajagopalan@Sun.COM pktsz += MBLKL(nmp);
910*13101SVenki.Rajagopalan@Sun.COM nblks++;
911*13101SVenki.Rajagopalan@Sun.COM }
912*13101SVenki.Rajagopalan@Sun.COM pktsz -= pending_hdr;
913*13101SVenki.Rajagopalan@Sun.COM
914*13101SVenki.Rajagopalan@Sun.COM EIB_UPDATE_COUNTER(&stats->st_obytes, pktsz);
915*13101SVenki.Rajagopalan@Sun.COM EIB_INCR_COUNTER(&stats->st_opkts);
916*13101SVenki.Rajagopalan@Sun.COM
917*13101SVenki.Rajagopalan@Sun.COM /*
918*13101SVenki.Rajagopalan@Sun.COM * We only do ibt_map_mem_iov() if the pktsz is above the tx copy
919*13101SVenki.Rajagopalan@Sun.COM * threshold and if the number of mp fragments is less than the
920*13101SVenki.Rajagopalan@Sun.COM * maximum acceptable.
921*13101SVenki.Rajagopalan@Sun.COM */
922*13101SVenki.Rajagopalan@Sun.COM if ((ss->ei_caps->cp_resv_lkey_capab) && (pktsz > EIB_TX_COPY_THRESH) &&
923*13101SVenki.Rajagopalan@Sun.COM (nblks < ss->ei_caps->cp_hiwm_sgl)) {
924*13101SVenki.Rajagopalan@Sun.COM
925*13101SVenki.Rajagopalan@Sun.COM iov_attr.iov_as = NULL;
926*13101SVenki.Rajagopalan@Sun.COM iov_attr.iov = iov_arr;
927*13101SVenki.Rajagopalan@Sun.COM iov_attr.iov_buf = NULL;
928*13101SVenki.Rajagopalan@Sun.COM iov_attr.iov_list_len = nblks;
929*13101SVenki.Rajagopalan@Sun.COM iov_attr.iov_wr_nds = ss->ei_caps->cp_max_sgl;
930*13101SVenki.Rajagopalan@Sun.COM iov_attr.iov_lso_hdr_sz = lsohdr_sz;
931*13101SVenki.Rajagopalan@Sun.COM iov_attr.iov_flags = IBT_IOV_SLEEP;
932*13101SVenki.Rajagopalan@Sun.COM
933*13101SVenki.Rajagopalan@Sun.COM i = 0;
934*13101SVenki.Rajagopalan@Sun.COM if (lsohdr_sz == 0) {
935*13101SVenki.Rajagopalan@Sun.COM iov_arr[i].iov_addr = (caddr_t)swqe->qe_payload_hdr;
936*13101SVenki.Rajagopalan@Sun.COM iov_arr[i].iov_len = EIB_ENCAP_HDR_SZ;
937*13101SVenki.Rajagopalan@Sun.COM i++;
938*13101SVenki.Rajagopalan@Sun.COM }
939*13101SVenki.Rajagopalan@Sun.COM for (nmp = data_mp; i < nblks; i++, nmp = nmp->b_cont) {
940*13101SVenki.Rajagopalan@Sun.COM iov_arr[i].iov_addr = (caddr_t)(void *)nmp->b_rptr;
941*13101SVenki.Rajagopalan@Sun.COM iov_arr[i].iov_len = MBLKL(nmp);
942*13101SVenki.Rajagopalan@Sun.COM if (nmp == data_mp) {
943*13101SVenki.Rajagopalan@Sun.COM iov_arr[i].iov_addr += pending_hdr;
944*13101SVenki.Rajagopalan@Sun.COM iov_arr[i].iov_len -= pending_hdr;
945*13101SVenki.Rajagopalan@Sun.COM }
946*13101SVenki.Rajagopalan@Sun.COM }
947*13101SVenki.Rajagopalan@Sun.COM swqe->qe_info |= EIB_WQE_FLG_BUFTYPE_MAPPED;
948*13101SVenki.Rajagopalan@Sun.COM swqe->qe_wr.send.wr_sgl = swqe->qe_big_sgl;
949*13101SVenki.Rajagopalan@Sun.COM
950*13101SVenki.Rajagopalan@Sun.COM ret = ibt_map_mem_iov(ss->ei_hca_hdl, &iov_attr,
951*13101SVenki.Rajagopalan@Sun.COM &swqe->qe_wr, &swqe->qe_iov_hdl);
952*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
953*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance,
954*13101SVenki.Rajagopalan@Sun.COM "eib_data_prepare_sgl: "
955*13101SVenki.Rajagopalan@Sun.COM "ibt_map_mem_iov(nblks=0x%lx) failed, ret=%d ",
956*13101SVenki.Rajagopalan@Sun.COM "attempting to use copy path", nblks, ret);
957*13101SVenki.Rajagopalan@Sun.COM goto prepare_sgl_copy_path;
958*13101SVenki.Rajagopalan@Sun.COM }
959*13101SVenki.Rajagopalan@Sun.COM
960*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_SUCCESS);
961*13101SVenki.Rajagopalan@Sun.COM }
962*13101SVenki.Rajagopalan@Sun.COM
963*13101SVenki.Rajagopalan@Sun.COM prepare_sgl_copy_path:
964*13101SVenki.Rajagopalan@Sun.COM if (pktsz <= swqe->qe_bufsz) {
965*13101SVenki.Rajagopalan@Sun.COM swqe->qe_wr.send.wr_nds = 1;
966*13101SVenki.Rajagopalan@Sun.COM swqe->qe_wr.send.wr_sgl = &swqe->qe_sgl;
967*13101SVenki.Rajagopalan@Sun.COM swqe->qe_sgl.ds_len = pktsz;
968*13101SVenki.Rajagopalan@Sun.COM
969*13101SVenki.Rajagopalan@Sun.COM /*
970*13101SVenki.Rajagopalan@Sun.COM * Even though this is the copy path for transfers less than
971*13101SVenki.Rajagopalan@Sun.COM * qe_bufsz, it could still be an LSO packet. If so, we only
972*13101SVenki.Rajagopalan@Sun.COM * have to write the data following all the headers into the
973*13101SVenki.Rajagopalan@Sun.COM * work request buffer, since we'll be sending the lso header
974*13101SVenki.Rajagopalan@Sun.COM * itself separately. If this is not an LSO send (but pkt size
975*13101SVenki.Rajagopalan@Sun.COM * greater than mtu, say for a jumbo frame), then we need
976*13101SVenki.Rajagopalan@Sun.COM * to write all the headers including EoIB encapsulation,
977*13101SVenki.Rajagopalan@Sun.COM * into the work request buffer.
978*13101SVenki.Rajagopalan@Sun.COM */
979*13101SVenki.Rajagopalan@Sun.COM bufp = (uchar_t *)(uintptr_t)swqe->qe_sgl.ds_va;
980*13101SVenki.Rajagopalan@Sun.COM if (lsohdr_sz == 0) {
981*13101SVenki.Rajagopalan@Sun.COM *(uint32_t *)((void *)bufp) = htonl(EIB_TX_ENCAP_HDR);
982*13101SVenki.Rajagopalan@Sun.COM bufp += EIB_ENCAP_HDR_SZ;
983*13101SVenki.Rajagopalan@Sun.COM }
984*13101SVenki.Rajagopalan@Sun.COM for (nmp = data_mp; nmp != NULL; nmp = nmp->b_cont) {
985*13101SVenki.Rajagopalan@Sun.COM blksize = MBLKL(nmp) - pending_hdr;
986*13101SVenki.Rajagopalan@Sun.COM bcopy(nmp->b_rptr + pending_hdr, bufp, blksize);
987*13101SVenki.Rajagopalan@Sun.COM bufp += blksize;
988*13101SVenki.Rajagopalan@Sun.COM pending_hdr = 0;
989*13101SVenki.Rajagopalan@Sun.COM }
990*13101SVenki.Rajagopalan@Sun.COM
991*13101SVenki.Rajagopalan@Sun.COM /*
992*13101SVenki.Rajagopalan@Sun.COM * If the ethernet frame we're going to send is less than
993*13101SVenki.Rajagopalan@Sun.COM * ETHERMIN, pad up the buffer to ETHERMIN (with zeros)
994*13101SVenki.Rajagopalan@Sun.COM */
995*13101SVenki.Rajagopalan@Sun.COM if ((pktsz + lsohdr_sz) < (ETHERMIN + EIB_ENCAP_HDR_SZ)) {
996*13101SVenki.Rajagopalan@Sun.COM bzero(bufp, (ETHERMIN + EIB_ENCAP_HDR_SZ) -
997*13101SVenki.Rajagopalan@Sun.COM (pktsz + lsohdr_sz));
998*13101SVenki.Rajagopalan@Sun.COM swqe->qe_sgl.ds_len = ETHERMIN + EIB_ENCAP_HDR_SZ;
999*13101SVenki.Rajagopalan@Sun.COM }
1000*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_SUCCESS);
1001*13101SVenki.Rajagopalan@Sun.COM }
1002*13101SVenki.Rajagopalan@Sun.COM
1003*13101SVenki.Rajagopalan@Sun.COM /*
1004*13101SVenki.Rajagopalan@Sun.COM * Copy path for transfers greater than swqe->qe_bufsz
1005*13101SVenki.Rajagopalan@Sun.COM */
1006*13101SVenki.Rajagopalan@Sun.COM swqe->qe_wr.send.wr_sgl = swqe->qe_big_sgl;
1007*13101SVenki.Rajagopalan@Sun.COM if (eib_rsrc_grab_lsobufs(ss, pktsz, swqe->qe_wr.send.wr_sgl,
1008*13101SVenki.Rajagopalan@Sun.COM &(swqe->qe_wr.send.wr_nds)) != EIB_E_SUCCESS) {
1009*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_prepare_sgl: "
1010*13101SVenki.Rajagopalan@Sun.COM "eib_rsrc_grab_lsobufs() failed");
1011*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_FAILURE);
1012*13101SVenki.Rajagopalan@Sun.COM }
1013*13101SVenki.Rajagopalan@Sun.COM swqe->qe_info |= EIB_WQE_FLG_BUFTYPE_LSO;
1014*13101SVenki.Rajagopalan@Sun.COM
1015*13101SVenki.Rajagopalan@Sun.COM /*
1016*13101SVenki.Rajagopalan@Sun.COM * Copy the larger-than-qe_buf_sz packet into a set of fixed-sized,
1017*13101SVenki.Rajagopalan@Sun.COM * pre-mapped LSO buffers. Note that we might need to skip part of
1018*13101SVenki.Rajagopalan@Sun.COM * the LSO header in the first fragment as before.
1019*13101SVenki.Rajagopalan@Sun.COM */
1020*13101SVenki.Rajagopalan@Sun.COM nmp = data_mp;
1021*13101SVenki.Rajagopalan@Sun.COM skip = pending_hdr;
1022*13101SVenki.Rajagopalan@Sun.COM for (i = 0; i < swqe->qe_wr.send.wr_nds; i++) {
1023*13101SVenki.Rajagopalan@Sun.COM sgl = swqe->qe_wr.send.wr_sgl + i;
1024*13101SVenki.Rajagopalan@Sun.COM bufp = (uchar_t *)(uintptr_t)sgl->ds_va;
1025*13101SVenki.Rajagopalan@Sun.COM avail = EIB_LSO_BUFSZ;
1026*13101SVenki.Rajagopalan@Sun.COM
1027*13101SVenki.Rajagopalan@Sun.COM /*
1028*13101SVenki.Rajagopalan@Sun.COM * If this is a non-LSO packet (perhaps a jumbo frame?)
1029*13101SVenki.Rajagopalan@Sun.COM * we may still need to prefix the EoIB header in the
1030*13101SVenki.Rajagopalan@Sun.COM * wr buffer.
1031*13101SVenki.Rajagopalan@Sun.COM */
1032*13101SVenki.Rajagopalan@Sun.COM if ((i == 0) && (lsohdr_sz == 0)) {
1033*13101SVenki.Rajagopalan@Sun.COM *(uint32_t *)((void *)bufp) = htonl(EIB_TX_ENCAP_HDR);
1034*13101SVenki.Rajagopalan@Sun.COM bufp += EIB_ENCAP_HDR_SZ;
1035*13101SVenki.Rajagopalan@Sun.COM avail -= EIB_ENCAP_HDR_SZ;
1036*13101SVenki.Rajagopalan@Sun.COM }
1037*13101SVenki.Rajagopalan@Sun.COM
1038*13101SVenki.Rajagopalan@Sun.COM while (nmp && avail) {
1039*13101SVenki.Rajagopalan@Sun.COM blksize = MBLKL(nmp) - skip;
1040*13101SVenki.Rajagopalan@Sun.COM if (blksize > avail) {
1041*13101SVenki.Rajagopalan@Sun.COM bcopy(nmp->b_rptr + skip, bufp, avail);
1042*13101SVenki.Rajagopalan@Sun.COM skip += avail;
1043*13101SVenki.Rajagopalan@Sun.COM avail = 0;
1044*13101SVenki.Rajagopalan@Sun.COM } else {
1045*13101SVenki.Rajagopalan@Sun.COM bcopy(nmp->b_rptr + skip, bufp, blksize);
1046*13101SVenki.Rajagopalan@Sun.COM skip = 0;
1047*13101SVenki.Rajagopalan@Sun.COM bufp += blksize;
1048*13101SVenki.Rajagopalan@Sun.COM avail -= blksize;
1049*13101SVenki.Rajagopalan@Sun.COM nmp = nmp->b_cont;
1050*13101SVenki.Rajagopalan@Sun.COM }
1051*13101SVenki.Rajagopalan@Sun.COM }
1052*13101SVenki.Rajagopalan@Sun.COM }
1053*13101SVenki.Rajagopalan@Sun.COM
1054*13101SVenki.Rajagopalan@Sun.COM return (EIB_E_SUCCESS);
1055*13101SVenki.Rajagopalan@Sun.COM }
1056*13101SVenki.Rajagopalan@Sun.COM
1057*13101SVenki.Rajagopalan@Sun.COM /*ARGSUSED*/
1058*13101SVenki.Rajagopalan@Sun.COM static int
eib_data_is_mcast_pkt_ok(eib_vnic_t * vnic,uint8_t * macaddr,uint64_t * brdcst,uint64_t * multicst)1059*13101SVenki.Rajagopalan@Sun.COM eib_data_is_mcast_pkt_ok(eib_vnic_t *vnic, uint8_t *macaddr, uint64_t *brdcst,
1060*13101SVenki.Rajagopalan@Sun.COM uint64_t *multicst)
1061*13101SVenki.Rajagopalan@Sun.COM {
1062*13101SVenki.Rajagopalan@Sun.COM /*
1063*13101SVenki.Rajagopalan@Sun.COM * If the dmac is a broadcast packet, let it through. Otherwise, either
1064*13101SVenki.Rajagopalan@Sun.COM * we should be in promiscuous mode or the dmac should be in our list of
1065*13101SVenki.Rajagopalan@Sun.COM * joined multicast addresses. Currently we only update the stat
1066*13101SVenki.Rajagopalan@Sun.COM * counters and always let things through.
1067*13101SVenki.Rajagopalan@Sun.COM */
1068*13101SVenki.Rajagopalan@Sun.COM if (bcmp(macaddr, eib_broadcast_mac, ETHERADDRL) == 0)
1069*13101SVenki.Rajagopalan@Sun.COM EIB_INCR_COUNTER(brdcst);
1070*13101SVenki.Rajagopalan@Sun.COM else
1071*13101SVenki.Rajagopalan@Sun.COM EIB_INCR_COUNTER(multicst);
1072*13101SVenki.Rajagopalan@Sun.COM
1073*13101SVenki.Rajagopalan@Sun.COM return (1);
1074*13101SVenki.Rajagopalan@Sun.COM }
1075*13101SVenki.Rajagopalan@Sun.COM
1076*13101SVenki.Rajagopalan@Sun.COM static void
eib_data_rx_comp_intr(ibt_cq_hdl_t cq_hdl,void * arg)1077*13101SVenki.Rajagopalan@Sun.COM eib_data_rx_comp_intr(ibt_cq_hdl_t cq_hdl, void *arg)
1078*13101SVenki.Rajagopalan@Sun.COM {
1079*13101SVenki.Rajagopalan@Sun.COM eib_vnic_t *vnic = arg;
1080*13101SVenki.Rajagopalan@Sun.COM eib_chan_t *chan = vnic->vn_data_chan;
1081*13101SVenki.Rajagopalan@Sun.COM eib_t *ss = vnic->vn_ss;
1082*13101SVenki.Rajagopalan@Sun.COM
1083*13101SVenki.Rajagopalan@Sun.COM if (cq_hdl != chan->ch_rcv_cq_hdl) {
1084*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_data_rx_comp_intr: "
1085*13101SVenki.Rajagopalan@Sun.COM "cq_hdl(0x%llx) != chan->ch_cq_hdl(0x%llx), "
1086*13101SVenki.Rajagopalan@Sun.COM "ignoring completion", cq_hdl, chan->ch_cq_hdl);
1087*13101SVenki.Rajagopalan@Sun.COM return;
1088*13101SVenki.Rajagopalan@Sun.COM }
1089*13101SVenki.Rajagopalan@Sun.COM
1090*13101SVenki.Rajagopalan@Sun.COM ASSERT(vnic->vn_data_rx_si_hdl != NULL);
1091*13101SVenki.Rajagopalan@Sun.COM
1092*13101SVenki.Rajagopalan@Sun.COM (void) ddi_intr_trigger_softint(vnic->vn_data_rx_si_hdl, NULL);
1093*13101SVenki.Rajagopalan@Sun.COM }
1094*13101SVenki.Rajagopalan@Sun.COM
1095*13101SVenki.Rajagopalan@Sun.COM static void
eib_data_tx_comp_intr(ibt_cq_hdl_t cq_hdl,void * arg)1096*13101SVenki.Rajagopalan@Sun.COM eib_data_tx_comp_intr(ibt_cq_hdl_t cq_hdl, void *arg)
1097*13101SVenki.Rajagopalan@Sun.COM {
1098*13101SVenki.Rajagopalan@Sun.COM eib_vnic_t *vnic = arg;
1099*13101SVenki.Rajagopalan@Sun.COM eib_chan_t *chan = vnic->vn_data_chan;
1100*13101SVenki.Rajagopalan@Sun.COM eib_t *ss = vnic->vn_ss;
1101*13101SVenki.Rajagopalan@Sun.COM
1102*13101SVenki.Rajagopalan@Sun.COM if (cq_hdl != chan->ch_cq_hdl) {
1103*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_data_tx_comp_intr: "
1104*13101SVenki.Rajagopalan@Sun.COM "cq_hdl(0x%llx) != chan->ch_cq_hdl(0x%llx), "
1105*13101SVenki.Rajagopalan@Sun.COM "ignoring completion", cq_hdl, chan->ch_cq_hdl);
1106*13101SVenki.Rajagopalan@Sun.COM return;
1107*13101SVenki.Rajagopalan@Sun.COM }
1108*13101SVenki.Rajagopalan@Sun.COM
1109*13101SVenki.Rajagopalan@Sun.COM ASSERT(vnic->vn_data_tx_si_hdl != NULL);
1110*13101SVenki.Rajagopalan@Sun.COM
1111*13101SVenki.Rajagopalan@Sun.COM (void) ddi_intr_trigger_softint(vnic->vn_data_tx_si_hdl, NULL);
1112*13101SVenki.Rajagopalan@Sun.COM }
1113*13101SVenki.Rajagopalan@Sun.COM
1114*13101SVenki.Rajagopalan@Sun.COM static mblk_t *
eib_data_rx_comp(eib_vnic_t * vnic,eib_wqe_t * wqe,ibt_wc_t * wc)1115*13101SVenki.Rajagopalan@Sun.COM eib_data_rx_comp(eib_vnic_t *vnic, eib_wqe_t *wqe, ibt_wc_t *wc)
1116*13101SVenki.Rajagopalan@Sun.COM {
1117*13101SVenki.Rajagopalan@Sun.COM eib_t *ss = vnic->vn_ss;
1118*13101SVenki.Rajagopalan@Sun.COM eib_chan_t *chan = vnic->vn_data_chan;
1119*13101SVenki.Rajagopalan@Sun.COM eib_login_data_t *ld = &vnic->vn_login_data;
1120*13101SVenki.Rajagopalan@Sun.COM eib_stats_t *stats = ss->ei_stats;
1121*13101SVenki.Rajagopalan@Sun.COM eib_ether_hdr_t evh;
1122*13101SVenki.Rajagopalan@Sun.COM mblk_t *mp;
1123*13101SVenki.Rajagopalan@Sun.COM boolean_t allocd_mp = B_FALSE;
1124*13101SVenki.Rajagopalan@Sun.COM uint_t ec_hdr;
1125*13101SVenki.Rajagopalan@Sun.COM uint_t ec_sign;
1126*13101SVenki.Rajagopalan@Sun.COM uint_t ec_ver;
1127*13101SVenki.Rajagopalan@Sun.COM uint_t ec_tu_cs;
1128*13101SVenki.Rajagopalan@Sun.COM uint_t ec_ip_cs;
1129*13101SVenki.Rajagopalan@Sun.COM
1130*13101SVenki.Rajagopalan@Sun.COM /*
1131*13101SVenki.Rajagopalan@Sun.COM * Before we process this mblk and send it up to network layer, see
1132*13101SVenki.Rajagopalan@Sun.COM * if we're running low on rwqes in the wqe pool. If so, allocate a
1133*13101SVenki.Rajagopalan@Sun.COM * new mblk, copy the received data into it and send it up (and return
1134*13101SVenki.Rajagopalan@Sun.COM * the current rwqe back to the pool immediately by calling freemsg()
1135*13101SVenki.Rajagopalan@Sun.COM * on the original mblk).
1136*13101SVenki.Rajagopalan@Sun.COM */
1137*13101SVenki.Rajagopalan@Sun.COM if (!eib_rsrc_rxpool_low(wqe)) {
1138*13101SVenki.Rajagopalan@Sun.COM mp = wqe->qe_mp;
1139*13101SVenki.Rajagopalan@Sun.COM } else {
1140*13101SVenki.Rajagopalan@Sun.COM if ((mp = allocb(wc->wc_bytes_xfer, BPRI_HI)) != NULL) {
1141*13101SVenki.Rajagopalan@Sun.COM bcopy(wqe->qe_mp->b_rptr, mp->b_rptr,
1142*13101SVenki.Rajagopalan@Sun.COM wc->wc_bytes_xfer);
1143*13101SVenki.Rajagopalan@Sun.COM freemsg(wqe->qe_mp);
1144*13101SVenki.Rajagopalan@Sun.COM allocd_mp = B_TRUE;
1145*13101SVenki.Rajagopalan@Sun.COM } else {
1146*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1147*13101SVenki.Rajagopalan@Sun.COM "wqe level below watermark, dropping rx pkt");
1148*13101SVenki.Rajagopalan@Sun.COM EIB_INCR_COUNTER(&stats->st_norcvbuf);
1149*13101SVenki.Rajagopalan@Sun.COM freemsg(wqe->qe_mp);
1150*13101SVenki.Rajagopalan@Sun.COM return (NULL);
1151*13101SVenki.Rajagopalan@Sun.COM }
1152*13101SVenki.Rajagopalan@Sun.COM }
1153*13101SVenki.Rajagopalan@Sun.COM
1154*13101SVenki.Rajagopalan@Sun.COM /*
1155*13101SVenki.Rajagopalan@Sun.COM * Adjust write pointer depending on how much data came in. Note that
1156*13101SVenki.Rajagopalan@Sun.COM * since the nw layer will expect us to hand over the mp with the
1157*13101SVenki.Rajagopalan@Sun.COM * ethernet header starting at mp->b_rptr, update the b_rptr as well.
1158*13101SVenki.Rajagopalan@Sun.COM */
1159*13101SVenki.Rajagopalan@Sun.COM mp->b_wptr = mp->b_rptr + wc->wc_bytes_xfer;
1160*13101SVenki.Rajagopalan@Sun.COM
1161*13101SVenki.Rajagopalan@Sun.COM /*
1162*13101SVenki.Rajagopalan@Sun.COM * We have a problem if this really happens!
1163*13101SVenki.Rajagopalan@Sun.COM */
1164*13101SVenki.Rajagopalan@Sun.COM if (mp->b_next != NULL) {
1165*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1166*13101SVenki.Rajagopalan@Sun.COM "received packet's b_next not NULL, possible dup from cq");
1167*13101SVenki.Rajagopalan@Sun.COM mp->b_next = NULL;
1168*13101SVenki.Rajagopalan@Sun.COM }
1169*13101SVenki.Rajagopalan@Sun.COM
1170*13101SVenki.Rajagopalan@Sun.COM /*
1171*13101SVenki.Rajagopalan@Sun.COM * Drop loopback packets ?
1172*13101SVenki.Rajagopalan@Sun.COM */
1173*13101SVenki.Rajagopalan@Sun.COM if ((wc->wc_slid == ss->ei_props->ep_blid) &&
1174*13101SVenki.Rajagopalan@Sun.COM (wc->wc_qpn == chan->ch_qpn)) {
1175*13101SVenki.Rajagopalan@Sun.COM goto data_rx_comp_fail;
1176*13101SVenki.Rajagopalan@Sun.COM }
1177*13101SVenki.Rajagopalan@Sun.COM
1178*13101SVenki.Rajagopalan@Sun.COM mp->b_rptr += EIB_GRH_SZ;
1179*13101SVenki.Rajagopalan@Sun.COM
1180*13101SVenki.Rajagopalan@Sun.COM /*
1181*13101SVenki.Rajagopalan@Sun.COM * Since the recv buffer has been aligned for IP header to start on
1182*13101SVenki.Rajagopalan@Sun.COM * a word boundary, it is safe to say that the EoIB and ethernet
1183*13101SVenki.Rajagopalan@Sun.COM * headers won't start on a word boundary.
1184*13101SVenki.Rajagopalan@Sun.COM */
1185*13101SVenki.Rajagopalan@Sun.COM bcopy(mp->b_rptr, &ec_hdr, EIB_ENCAP_HDR_SZ);
1186*13101SVenki.Rajagopalan@Sun.COM
1187*13101SVenki.Rajagopalan@Sun.COM /*
1188*13101SVenki.Rajagopalan@Sun.COM * Check EoIB signature and version
1189*13101SVenki.Rajagopalan@Sun.COM */
1190*13101SVenki.Rajagopalan@Sun.COM ec_hdr = ntohl(ec_hdr);
1191*13101SVenki.Rajagopalan@Sun.COM
1192*13101SVenki.Rajagopalan@Sun.COM ec_sign = (ec_hdr >> EIB_ENCAP_SIGN_SHIFT) & EIB_ENCAP_SIGN_MASK;
1193*13101SVenki.Rajagopalan@Sun.COM if (ec_sign != EIB_EH_SIGNATURE) {
1194*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1195*13101SVenki.Rajagopalan@Sun.COM "EoIB encapsulation header signature (0x%lx) unknown",
1196*13101SVenki.Rajagopalan@Sun.COM ec_sign);
1197*13101SVenki.Rajagopalan@Sun.COM goto data_rx_comp_fail;
1198*13101SVenki.Rajagopalan@Sun.COM }
1199*13101SVenki.Rajagopalan@Sun.COM
1200*13101SVenki.Rajagopalan@Sun.COM ec_ver = (ec_hdr >> EIB_ENCAP_VER_SHIFT) & EIB_ENCAP_VER_MASK;
1201*13101SVenki.Rajagopalan@Sun.COM if (ec_ver != EIB_EH_VERSION) {
1202*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1203*13101SVenki.Rajagopalan@Sun.COM "EoIB encapsulation header version (0x%lx) unknown",
1204*13101SVenki.Rajagopalan@Sun.COM ec_ver);
1205*13101SVenki.Rajagopalan@Sun.COM goto data_rx_comp_fail;
1206*13101SVenki.Rajagopalan@Sun.COM }
1207*13101SVenki.Rajagopalan@Sun.COM
1208*13101SVenki.Rajagopalan@Sun.COM /*
1209*13101SVenki.Rajagopalan@Sun.COM * Check TCP/UDP and IP checksum
1210*13101SVenki.Rajagopalan@Sun.COM */
1211*13101SVenki.Rajagopalan@Sun.COM ec_tu_cs = (ec_hdr >> EIB_ENCAP_TCPCHK_SHIFT) & EIB_ENCAP_TCPCHK_MASK;
1212*13101SVenki.Rajagopalan@Sun.COM ec_ip_cs = (ec_hdr >> EIB_ENCAP_IPCHK_SHIFT) & EIB_ENCAP_IPCHK_MASK;
1213*13101SVenki.Rajagopalan@Sun.COM
1214*13101SVenki.Rajagopalan@Sun.COM if ((ec_tu_cs == EIB_EH_UDPCSUM_OK || ec_tu_cs == EIB_EH_TCPCSUM_OK) &&
1215*13101SVenki.Rajagopalan@Sun.COM (ec_ip_cs == EIB_EH_IPCSUM_OK)) {
1216*13101SVenki.Rajagopalan@Sun.COM mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM_OK);
1217*13101SVenki.Rajagopalan@Sun.COM } else if (ec_tu_cs == EIB_EH_CSUM_BAD || ec_ip_cs == EIB_EH_CSUM_BAD) {
1218*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1219*13101SVenki.Rajagopalan@Sun.COM "EoIB encapsulation header tcp/udp checksum (0x%lx) or"
1220*13101SVenki.Rajagopalan@Sun.COM "ip checksum (0x%lx) is bad", ec_tu_cs, ec_ip_cs);
1221*13101SVenki.Rajagopalan@Sun.COM }
1222*13101SVenki.Rajagopalan@Sun.COM
1223*13101SVenki.Rajagopalan@Sun.COM /*
1224*13101SVenki.Rajagopalan@Sun.COM * Update the message block's b_rptr to the start of ethernet header
1225*13101SVenki.Rajagopalan@Sun.COM * and parse the header information
1226*13101SVenki.Rajagopalan@Sun.COM */
1227*13101SVenki.Rajagopalan@Sun.COM mp->b_rptr += EIB_ENCAP_HDR_SZ;
1228*13101SVenki.Rajagopalan@Sun.COM eib_data_parse_ether_hdr(mp, &evh);
1229*13101SVenki.Rajagopalan@Sun.COM
1230*13101SVenki.Rajagopalan@Sun.COM /*
1231*13101SVenki.Rajagopalan@Sun.COM * If the incoming packet is vlan-tagged, but the tag doesn't match
1232*13101SVenki.Rajagopalan@Sun.COM * this vnic's vlan, drop it.
1233*13101SVenki.Rajagopalan@Sun.COM */
1234*13101SVenki.Rajagopalan@Sun.COM if ((evh.eh_tagless == 0) && (evh.eh_vlan != ld->ld_assigned_vlan)) {
1235*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1236*13101SVenki.Rajagopalan@Sun.COM "received packet's vlan unknown, expected=0x%x, got=0x%x",
1237*13101SVenki.Rajagopalan@Sun.COM ld->ld_assigned_vlan, evh.eh_vlan);
1238*13101SVenki.Rajagopalan@Sun.COM goto data_rx_comp_fail;
1239*13101SVenki.Rajagopalan@Sun.COM }
1240*13101SVenki.Rajagopalan@Sun.COM
1241*13101SVenki.Rajagopalan@Sun.COM /*
1242*13101SVenki.Rajagopalan@Sun.COM * Final checks to see if the unicast destination is indeed correct
1243*13101SVenki.Rajagopalan@Sun.COM * and to see if the multicast address is ok for us.
1244*13101SVenki.Rajagopalan@Sun.COM */
1245*13101SVenki.Rajagopalan@Sun.COM if (EIB_UNICAST_MAC(evh.eh_dmac)) {
1246*13101SVenki.Rajagopalan@Sun.COM if (bcmp(evh.eh_dmac, ld->ld_assigned_mac, ETHERADDRL) != 0) {
1247*13101SVenki.Rajagopalan@Sun.COM uint8_t *exp;
1248*13101SVenki.Rajagopalan@Sun.COM uint8_t *got;
1249*13101SVenki.Rajagopalan@Sun.COM
1250*13101SVenki.Rajagopalan@Sun.COM exp = ld->ld_assigned_mac;
1251*13101SVenki.Rajagopalan@Sun.COM got = evh.eh_dmac;
1252*13101SVenki.Rajagopalan@Sun.COM
1253*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1254*13101SVenki.Rajagopalan@Sun.COM "received packet's macaddr mismatch, "
1255*13101SVenki.Rajagopalan@Sun.COM "expected=%x:%x:%x:%x:%x:%x, got=%x:%x:%x:%x:%x:%x",
1256*13101SVenki.Rajagopalan@Sun.COM exp[0], exp[1], exp[2], exp[3], exp[4], exp[5],
1257*13101SVenki.Rajagopalan@Sun.COM got[0], got[1], got[2], got[3], got[4], got[5]);
1258*13101SVenki.Rajagopalan@Sun.COM
1259*13101SVenki.Rajagopalan@Sun.COM goto data_rx_comp_fail;
1260*13101SVenki.Rajagopalan@Sun.COM }
1261*13101SVenki.Rajagopalan@Sun.COM } else {
1262*13101SVenki.Rajagopalan@Sun.COM if (!eib_data_is_mcast_pkt_ok(vnic, evh.eh_dmac,
1263*13101SVenki.Rajagopalan@Sun.COM &stats->st_brdcstrcv, &stats->st_multircv)) {
1264*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1265*13101SVenki.Rajagopalan@Sun.COM "multicast packet not ok");
1266*13101SVenki.Rajagopalan@Sun.COM goto data_rx_comp_fail;
1267*13101SVenki.Rajagopalan@Sun.COM }
1268*13101SVenki.Rajagopalan@Sun.COM }
1269*13101SVenki.Rajagopalan@Sun.COM
1270*13101SVenki.Rajagopalan@Sun.COM /*
1271*13101SVenki.Rajagopalan@Sun.COM * Strip ethernet FCS if present in the packet. ConnectX-2 doesn't
1272*13101SVenki.Rajagopalan@Sun.COM * support ethernet FCS, so this shouldn't happen anyway.
1273*13101SVenki.Rajagopalan@Sun.COM */
1274*13101SVenki.Rajagopalan@Sun.COM if ((ec_hdr >> EIB_ENCAP_FCS_B_SHIFT) & 0x1) {
1275*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1276*13101SVenki.Rajagopalan@Sun.COM "ethernet FCS present (ec_hdr=0%lx), ignoring",
1277*13101SVenki.Rajagopalan@Sun.COM ec_hdr);
1278*13101SVenki.Rajagopalan@Sun.COM
1279*13101SVenki.Rajagopalan@Sun.COM mp->b_wptr -= ETHERFCSL;
1280*13101SVenki.Rajagopalan@Sun.COM }
1281*13101SVenki.Rajagopalan@Sun.COM
1282*13101SVenki.Rajagopalan@Sun.COM /*
1283*13101SVenki.Rajagopalan@Sun.COM * If this is the same mp as was in the original rwqe (i.e. we didn't
1284*13101SVenki.Rajagopalan@Sun.COM * do any allocb()), then mark the rwqe flag so we know that its mblk
1285*13101SVenki.Rajagopalan@Sun.COM * is with the network layer.
1286*13101SVenki.Rajagopalan@Sun.COM */
1287*13101SVenki.Rajagopalan@Sun.COM if (!allocd_mp) {
1288*13101SVenki.Rajagopalan@Sun.COM wqe->qe_info |= EIB_WQE_FLG_WITH_NW;
1289*13101SVenki.Rajagopalan@Sun.COM }
1290*13101SVenki.Rajagopalan@Sun.COM
1291*13101SVenki.Rajagopalan@Sun.COM return (mp);
1292*13101SVenki.Rajagopalan@Sun.COM
1293*13101SVenki.Rajagopalan@Sun.COM data_rx_comp_fail:
1294*13101SVenki.Rajagopalan@Sun.COM freemsg(mp);
1295*13101SVenki.Rajagopalan@Sun.COM return (NULL);
1296*13101SVenki.Rajagopalan@Sun.COM }
1297*13101SVenki.Rajagopalan@Sun.COM
1298*13101SVenki.Rajagopalan@Sun.COM static void
eib_data_tx_comp(eib_vnic_t * vnic,eib_wqe_t * wqe,eib_chan_t * chan)1299*13101SVenki.Rajagopalan@Sun.COM eib_data_tx_comp(eib_vnic_t *vnic, eib_wqe_t *wqe, eib_chan_t *chan)
1300*13101SVenki.Rajagopalan@Sun.COM {
1301*13101SVenki.Rajagopalan@Sun.COM eib_t *ss = vnic->vn_ss;
1302*13101SVenki.Rajagopalan@Sun.COM ibt_status_t ret;
1303*13101SVenki.Rajagopalan@Sun.COM
1304*13101SVenki.Rajagopalan@Sun.COM if (wqe->qe_mp) {
1305*13101SVenki.Rajagopalan@Sun.COM if (wqe->qe_info & EIB_WQE_FLG_BUFTYPE_MAPPED) {
1306*13101SVenki.Rajagopalan@Sun.COM ret = ibt_unmap_mem_iov(ss->ei_hca_hdl,
1307*13101SVenki.Rajagopalan@Sun.COM wqe->qe_iov_hdl);
1308*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
1309*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance,
1310*13101SVenki.Rajagopalan@Sun.COM "eib_data_tx_comp: "
1311*13101SVenki.Rajagopalan@Sun.COM "ibt_unmap_mem_iov() failed, ret=%d", ret);
1312*13101SVenki.Rajagopalan@Sun.COM }
1313*13101SVenki.Rajagopalan@Sun.COM wqe->qe_iov_hdl = NULL;
1314*13101SVenki.Rajagopalan@Sun.COM } else if (wqe->qe_info & EIB_WQE_FLG_BUFTYPE_LSO) {
1315*13101SVenki.Rajagopalan@Sun.COM eib_rsrc_return_lsobufs(ss, wqe->qe_big_sgl,
1316*13101SVenki.Rajagopalan@Sun.COM wqe->qe_wr.send.wr_nds);
1317*13101SVenki.Rajagopalan@Sun.COM }
1318*13101SVenki.Rajagopalan@Sun.COM freemsg(wqe->qe_mp);
1319*13101SVenki.Rajagopalan@Sun.COM wqe->qe_mp = NULL;
1320*13101SVenki.Rajagopalan@Sun.COM }
1321*13101SVenki.Rajagopalan@Sun.COM
1322*13101SVenki.Rajagopalan@Sun.COM eib_rsrc_return_swqe(ss, wqe, chan);
1323*13101SVenki.Rajagopalan@Sun.COM }
1324*13101SVenki.Rajagopalan@Sun.COM
1325*13101SVenki.Rajagopalan@Sun.COM static void
eib_data_err_comp(eib_vnic_t * vnic,eib_wqe_t * wqe,ibt_wc_t * wc)1326*13101SVenki.Rajagopalan@Sun.COM eib_data_err_comp(eib_vnic_t *vnic, eib_wqe_t *wqe, ibt_wc_t *wc)
1327*13101SVenki.Rajagopalan@Sun.COM {
1328*13101SVenki.Rajagopalan@Sun.COM eib_t *ss = vnic->vn_ss;
1329*13101SVenki.Rajagopalan@Sun.COM
1330*13101SVenki.Rajagopalan@Sun.COM /*
1331*13101SVenki.Rajagopalan@Sun.COM * Currently, all we do is report
1332*13101SVenki.Rajagopalan@Sun.COM */
1333*13101SVenki.Rajagopalan@Sun.COM switch (wc->wc_status) {
1334*13101SVenki.Rajagopalan@Sun.COM case IBT_WC_WR_FLUSHED_ERR:
1335*13101SVenki.Rajagopalan@Sun.COM break;
1336*13101SVenki.Rajagopalan@Sun.COM
1337*13101SVenki.Rajagopalan@Sun.COM case IBT_WC_LOCAL_CHAN_OP_ERR:
1338*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_err_comp: "
1339*13101SVenki.Rajagopalan@Sun.COM "IBT_WC_LOCAL_CHAN_OP_ERR seen, wqe_info=0x%lx ",
1340*13101SVenki.Rajagopalan@Sun.COM wqe->qe_info);
1341*13101SVenki.Rajagopalan@Sun.COM break;
1342*13101SVenki.Rajagopalan@Sun.COM
1343*13101SVenki.Rajagopalan@Sun.COM case IBT_WC_LOCAL_PROTECT_ERR:
1344*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_err_comp: "
1345*13101SVenki.Rajagopalan@Sun.COM "IBT_WC_LOCAL_PROTECT_ERR seen, wqe_info=0x%lx ",
1346*13101SVenki.Rajagopalan@Sun.COM wqe->qe_info);
1347*13101SVenki.Rajagopalan@Sun.COM break;
1348*13101SVenki.Rajagopalan@Sun.COM }
1349*13101SVenki.Rajagopalan@Sun.COM
1350*13101SVenki.Rajagopalan@Sun.COM /*
1351*13101SVenki.Rajagopalan@Sun.COM * When a wc indicates error, we do not attempt to repost the
1352*13101SVenki.Rajagopalan@Sun.COM * rwqe but simply return it to the wqe pool. Also for rwqes,
1353*13101SVenki.Rajagopalan@Sun.COM * attempting to free the mblk in the wqe invokes the
1354*13101SVenki.Rajagopalan@Sun.COM * eib_data_rx_recycle() callback. For tx wqes, error handling
1355*13101SVenki.Rajagopalan@Sun.COM * is the same as successful completion handling. We still
1356*13101SVenki.Rajagopalan@Sun.COM * have to unmap iov/free lsobufs/free mblk and then return the
1357*13101SVenki.Rajagopalan@Sun.COM * swqe to the pool.
1358*13101SVenki.Rajagopalan@Sun.COM */
1359*13101SVenki.Rajagopalan@Sun.COM if (EIB_WQE_TYPE(wqe->qe_info) == EIB_WQE_RX) {
1360*13101SVenki.Rajagopalan@Sun.COM ASSERT(wqe->qe_mp != NULL);
1361*13101SVenki.Rajagopalan@Sun.COM freemsg(wqe->qe_mp);
1362*13101SVenki.Rajagopalan@Sun.COM } else {
1363*13101SVenki.Rajagopalan@Sun.COM eib_data_tx_comp(vnic, wqe, vnic->vn_data_chan);
1364*13101SVenki.Rajagopalan@Sun.COM }
1365*13101SVenki.Rajagopalan@Sun.COM }
1366*13101SVenki.Rajagopalan@Sun.COM
1367*13101SVenki.Rajagopalan@Sun.COM /*ARGSUSED*/
1368*13101SVenki.Rajagopalan@Sun.COM static void
eib_rb_data_setup_cqs(eib_t * ss,eib_vnic_t * vnic)1369*13101SVenki.Rajagopalan@Sun.COM eib_rb_data_setup_cqs(eib_t *ss, eib_vnic_t *vnic)
1370*13101SVenki.Rajagopalan@Sun.COM {
1371*13101SVenki.Rajagopalan@Sun.COM eib_chan_t *chan = vnic->vn_data_chan;
1372*13101SVenki.Rajagopalan@Sun.COM ibt_status_t ret;
1373*13101SVenki.Rajagopalan@Sun.COM
1374*13101SVenki.Rajagopalan@Sun.COM if (chan == NULL)
1375*13101SVenki.Rajagopalan@Sun.COM return;
1376*13101SVenki.Rajagopalan@Sun.COM
1377*13101SVenki.Rajagopalan@Sun.COM /*
1378*13101SVenki.Rajagopalan@Sun.COM * Reset any completion handlers we may have set up
1379*13101SVenki.Rajagopalan@Sun.COM */
1380*13101SVenki.Rajagopalan@Sun.COM if (chan->ch_rcv_cq_hdl) {
1381*13101SVenki.Rajagopalan@Sun.COM ibt_set_cq_handler(chan->ch_rcv_cq_hdl, NULL, NULL);
1382*13101SVenki.Rajagopalan@Sun.COM }
1383*13101SVenki.Rajagopalan@Sun.COM if (chan->ch_cq_hdl) {
1384*13101SVenki.Rajagopalan@Sun.COM ibt_set_cq_handler(chan->ch_cq_hdl, NULL, NULL);
1385*13101SVenki.Rajagopalan@Sun.COM }
1386*13101SVenki.Rajagopalan@Sun.COM
1387*13101SVenki.Rajagopalan@Sun.COM /*
1388*13101SVenki.Rajagopalan@Sun.COM * Remove any softints that were added
1389*13101SVenki.Rajagopalan@Sun.COM */
1390*13101SVenki.Rajagopalan@Sun.COM if (vnic->vn_data_rx_si_hdl) {
1391*13101SVenki.Rajagopalan@Sun.COM (void) ddi_intr_remove_softint(vnic->vn_data_rx_si_hdl);
1392*13101SVenki.Rajagopalan@Sun.COM vnic->vn_data_rx_si_hdl = NULL;
1393*13101SVenki.Rajagopalan@Sun.COM }
1394*13101SVenki.Rajagopalan@Sun.COM if (vnic->vn_data_tx_si_hdl) {
1395*13101SVenki.Rajagopalan@Sun.COM (void) ddi_intr_remove_softint(vnic->vn_data_tx_si_hdl);
1396*13101SVenki.Rajagopalan@Sun.COM vnic->vn_data_tx_si_hdl = NULL;
1397*13101SVenki.Rajagopalan@Sun.COM }
1398*13101SVenki.Rajagopalan@Sun.COM
1399*13101SVenki.Rajagopalan@Sun.COM /*
1400*13101SVenki.Rajagopalan@Sun.COM * Release any work completion buffers we may have allocated
1401*13101SVenki.Rajagopalan@Sun.COM */
1402*13101SVenki.Rajagopalan@Sun.COM if (chan->ch_rcv_wc && chan->ch_rcv_cq_sz) {
1403*13101SVenki.Rajagopalan@Sun.COM kmem_free(chan->ch_rcv_wc,
1404*13101SVenki.Rajagopalan@Sun.COM sizeof (ibt_wc_t) * chan->ch_rcv_cq_sz);
1405*13101SVenki.Rajagopalan@Sun.COM }
1406*13101SVenki.Rajagopalan@Sun.COM chan->ch_rcv_cq_sz = 0;
1407*13101SVenki.Rajagopalan@Sun.COM chan->ch_rcv_wc = NULL;
1408*13101SVenki.Rajagopalan@Sun.COM
1409*13101SVenki.Rajagopalan@Sun.COM if (chan->ch_wc && chan->ch_cq_sz) {
1410*13101SVenki.Rajagopalan@Sun.COM kmem_free(chan->ch_wc, sizeof (ibt_wc_t) * chan->ch_cq_sz);
1411*13101SVenki.Rajagopalan@Sun.COM }
1412*13101SVenki.Rajagopalan@Sun.COM chan->ch_cq_sz = 0;
1413*13101SVenki.Rajagopalan@Sun.COM chan->ch_wc = NULL;
1414*13101SVenki.Rajagopalan@Sun.COM
1415*13101SVenki.Rajagopalan@Sun.COM /*
1416*13101SVenki.Rajagopalan@Sun.COM * Free any completion queues we may have allocated
1417*13101SVenki.Rajagopalan@Sun.COM */
1418*13101SVenki.Rajagopalan@Sun.COM if (chan->ch_rcv_cq_hdl) {
1419*13101SVenki.Rajagopalan@Sun.COM ret = ibt_free_cq(chan->ch_rcv_cq_hdl);
1420*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
1421*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance,
1422*13101SVenki.Rajagopalan@Sun.COM "eib_rb_data_setup_cqs: "
1423*13101SVenki.Rajagopalan@Sun.COM "ibt_free_cq(rcv_cq) failed, ret=%d", ret);
1424*13101SVenki.Rajagopalan@Sun.COM }
1425*13101SVenki.Rajagopalan@Sun.COM chan->ch_rcv_cq_hdl = NULL;
1426*13101SVenki.Rajagopalan@Sun.COM }
1427*13101SVenki.Rajagopalan@Sun.COM if (chan->ch_cq_hdl) {
1428*13101SVenki.Rajagopalan@Sun.COM ret = ibt_free_cq(chan->ch_cq_hdl);
1429*13101SVenki.Rajagopalan@Sun.COM if (ret != IBT_SUCCESS) {
1430*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance,
1431*13101SVenki.Rajagopalan@Sun.COM "eib_rb_data_setup_cqs: "
1432*13101SVenki.Rajagopalan@Sun.COM "ibt_free_cq(snd_cq) failed, ret=%d", ret);
1433*13101SVenki.Rajagopalan@Sun.COM }
1434*13101SVenki.Rajagopalan@Sun.COM chan->ch_cq_hdl = NULL;
1435*13101SVenki.Rajagopalan@Sun.COM }
1436*13101SVenki.Rajagopalan@Sun.COM }
1437*13101SVenki.Rajagopalan@Sun.COM
1438*13101SVenki.Rajagopalan@Sun.COM /*ARGSUSED*/
1439*13101SVenki.Rajagopalan@Sun.COM static void
eib_rb_data_setup_ud_channel(eib_t * ss,eib_vnic_t * vnic)1440*13101SVenki.Rajagopalan@Sun.COM eib_rb_data_setup_ud_channel(eib_t *ss, eib_vnic_t *vnic)
1441*13101SVenki.Rajagopalan@Sun.COM {
1442*13101SVenki.Rajagopalan@Sun.COM eib_chan_t *chan = vnic->vn_data_chan;
1443*13101SVenki.Rajagopalan@Sun.COM ibt_status_t ret;
1444*13101SVenki.Rajagopalan@Sun.COM
1445*13101SVenki.Rajagopalan@Sun.COM if (chan == NULL)
1446*13101SVenki.Rajagopalan@Sun.COM return;
1447*13101SVenki.Rajagopalan@Sun.COM
1448*13101SVenki.Rajagopalan@Sun.COM if (chan->ch_chan) {
1449*13101SVenki.Rajagopalan@Sun.COM /*
1450*13101SVenki.Rajagopalan@Sun.COM * We're trying to tear down this UD channel. Make sure that
1451*13101SVenki.Rajagopalan@Sun.COM * we don't attempt to refill (repost) at any point from now on.
1452*13101SVenki.Rajagopalan@Sun.COM */
1453*13101SVenki.Rajagopalan@Sun.COM chan->ch_tear_down = B_TRUE;
1454*13101SVenki.Rajagopalan@Sun.COM if ((ret = ibt_flush_channel(chan->ch_chan)) != IBT_SUCCESS) {
1455*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance,
1456*13101SVenki.Rajagopalan@Sun.COM "eib_rb_data_setup_ud_channel: "
1457*13101SVenki.Rajagopalan@Sun.COM "ibt_flush_channel() failed, ret=%d", ret);
1458*13101SVenki.Rajagopalan@Sun.COM }
1459*13101SVenki.Rajagopalan@Sun.COM
1460*13101SVenki.Rajagopalan@Sun.COM /*
1461*13101SVenki.Rajagopalan@Sun.COM * Wait until all posted tx wqes on this channel are back with
1462*13101SVenki.Rajagopalan@Sun.COM * the wqe pool.
1463*13101SVenki.Rajagopalan@Sun.COM */
1464*13101SVenki.Rajagopalan@Sun.COM mutex_enter(&chan->ch_tx_lock);
1465*13101SVenki.Rajagopalan@Sun.COM while (chan->ch_tx_posted > 0)
1466*13101SVenki.Rajagopalan@Sun.COM cv_wait(&chan->ch_tx_cv, &chan->ch_tx_lock);
1467*13101SVenki.Rajagopalan@Sun.COM mutex_exit(&chan->ch_tx_lock);
1468*13101SVenki.Rajagopalan@Sun.COM
1469*13101SVenki.Rajagopalan@Sun.COM /*
1470*13101SVenki.Rajagopalan@Sun.COM * Wait until all posted rx wqes on this channel are back with
1471*13101SVenki.Rajagopalan@Sun.COM * the wqe pool.
1472*13101SVenki.Rajagopalan@Sun.COM */
1473*13101SVenki.Rajagopalan@Sun.COM mutex_enter(&chan->ch_rx_lock);
1474*13101SVenki.Rajagopalan@Sun.COM while (chan->ch_rx_posted > 0)
1475*13101SVenki.Rajagopalan@Sun.COM cv_wait(&chan->ch_rx_cv, &chan->ch_rx_lock);
1476*13101SVenki.Rajagopalan@Sun.COM mutex_exit(&chan->ch_rx_lock);
1477*13101SVenki.Rajagopalan@Sun.COM
1478*13101SVenki.Rajagopalan@Sun.COM /*
1479*13101SVenki.Rajagopalan@Sun.COM * Now we're ready to free this channel
1480*13101SVenki.Rajagopalan@Sun.COM */
1481*13101SVenki.Rajagopalan@Sun.COM if ((ret = ibt_free_channel(chan->ch_chan)) != IBT_SUCCESS) {
1482*13101SVenki.Rajagopalan@Sun.COM EIB_DPRINTF_WARN(ss->ei_instance,
1483*13101SVenki.Rajagopalan@Sun.COM "eib_rb_data_setup_ud_channel: "
1484*13101SVenki.Rajagopalan@Sun.COM "ibt_free_channel() failed, ret=%d", ret);
1485*13101SVenki.Rajagopalan@Sun.COM }
1486*13101SVenki.Rajagopalan@Sun.COM
1487*13101SVenki.Rajagopalan@Sun.COM chan->ch_alloc_mp = B_FALSE;
1488*13101SVenki.Rajagopalan@Sun.COM chan->ch_ip_hdr_align = 0;
1489*13101SVenki.Rajagopalan@Sun.COM chan->ch_rwqe_bktsz = 0;
1490*13101SVenki.Rajagopalan@Sun.COM chan->ch_lwm_rwqes = 0;
1491*13101SVenki.Rajagopalan@Sun.COM chan->ch_max_rwqes = 0;
1492*13101SVenki.Rajagopalan@Sun.COM chan->ch_max_swqes = 0;
1493*13101SVenki.Rajagopalan@Sun.COM chan->ch_qpn = 0;
1494*13101SVenki.Rajagopalan@Sun.COM chan->ch_chan = NULL;
1495*13101SVenki.Rajagopalan@Sun.COM }
1496*13101SVenki.Rajagopalan@Sun.COM }
1497