xref: /dpdk/drivers/net/cxgbe/cxgbe_ethdev.c (revision a997a33b2a0145ad3e6320ea1fc7df8d51a2fcdf)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2014-2016 Chelsio Communications.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Chelsio Communications nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/queue.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <stdint.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <stdarg.h>
41 #include <inttypes.h>
42 #include <netinet/in.h>
43 
44 #include <rte_byteorder.h>
45 #include <rte_common.h>
46 #include <rte_cycles.h>
47 #include <rte_interrupts.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_pci.h>
51 #include <rte_atomic.h>
52 #include <rte_branch_prediction.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_tailq.h>
56 #include <rte_eal.h>
57 #include <rte_alarm.h>
58 #include <rte_ether.h>
59 #include <rte_ethdev.h>
60 #include <rte_atomic.h>
61 #include <rte_malloc.h>
62 #include <rte_random.h>
63 #include <rte_dev.h>
64 
65 #include "cxgbe.h"
66 
67 /*
68  * Macros needed to support the PCI Device ID Table ...
69  */
70 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
71 	static struct rte_pci_id cxgb4_pci_tbl[] = {
72 #define CH_PCI_DEVICE_ID_FUNCTION 0x4
73 
74 #define PCI_VENDOR_ID_CHELSIO 0x1425
75 
76 #define CH_PCI_ID_TABLE_ENTRY(devid) \
77 		{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CHELSIO, (devid)) }
78 
79 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \
80 		{ .vendor_id = 0, } \
81 	}
82 
83 /*
84  *... and the PCI ID Table itself ...
85  */
86 #include "t4_pci_id_tbl.h"
87 
88 static uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
89 				uint16_t nb_pkts)
90 {
91 	struct sge_eth_txq *txq = (struct sge_eth_txq *)tx_queue;
92 	uint16_t pkts_sent, pkts_remain;
93 	uint16_t total_sent = 0;
94 	int ret = 0;
95 
96 	CXGBE_DEBUG_TX(adapter, "%s: txq = %p; tx_pkts = %p; nb_pkts = %d\n",
97 		       __func__, txq, tx_pkts, nb_pkts);
98 
99 	t4_os_lock(&txq->txq_lock);
100 	/* free up desc from already completed tx */
101 	reclaim_completed_tx(&txq->q);
102 	while (total_sent < nb_pkts) {
103 		pkts_remain = nb_pkts - total_sent;
104 
105 		for (pkts_sent = 0; pkts_sent < pkts_remain; pkts_sent++) {
106 			ret = t4_eth_xmit(txq, tx_pkts[total_sent + pkts_sent]);
107 			if (ret < 0)
108 				break;
109 		}
110 		if (!pkts_sent)
111 			break;
112 		total_sent += pkts_sent;
113 		/* reclaim as much as possible */
114 		reclaim_completed_tx(&txq->q);
115 	}
116 
117 	t4_os_unlock(&txq->txq_lock);
118 	return total_sent;
119 }
120 
121 static uint16_t cxgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
122 				uint16_t nb_pkts)
123 {
124 	struct sge_eth_rxq *rxq = (struct sge_eth_rxq *)rx_queue;
125 	unsigned int work_done;
126 
127 	CXGBE_DEBUG_RX(adapter, "%s: rxq->rspq.cntxt_id = %u; nb_pkts = %d\n",
128 		       __func__, rxq->rspq.cntxt_id, nb_pkts);
129 
130 	if (cxgbe_poll(&rxq->rspq, rx_pkts, (unsigned int)nb_pkts, &work_done))
131 		dev_err(adapter, "error in cxgbe poll\n");
132 
133 	CXGBE_DEBUG_RX(adapter, "%s: work_done = %u\n", __func__, work_done);
134 	return work_done;
135 }
136 
137 static void cxgbe_dev_info_get(struct rte_eth_dev *eth_dev,
138 			       struct rte_eth_dev_info *device_info)
139 {
140 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
141 	struct adapter *adapter = pi->adapter;
142 	int max_queues = adapter->sge.max_ethqsets / adapter->params.nports;
143 
144 	static const struct rte_eth_desc_lim cxgbe_desc_lim = {
145 		.nb_max = CXGBE_MAX_RING_DESC_SIZE,
146 		.nb_min = CXGBE_MIN_RING_DESC_SIZE,
147 		.nb_align = 1,
148 	};
149 
150 	device_info->min_rx_bufsize = CXGBE_MIN_RX_BUFSIZE;
151 	device_info->max_rx_pktlen = CXGBE_MAX_RX_PKTLEN;
152 	device_info->max_rx_queues = max_queues;
153 	device_info->max_tx_queues = max_queues;
154 	device_info->max_mac_addrs = 1;
155 	/* XXX: For now we support one MAC/port */
156 	device_info->max_vfs = adapter->params.arch.vfcount;
157 	device_info->max_vmdq_pools = 0; /* XXX: For now no support for VMDQ */
158 
159 	device_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP |
160 				       DEV_RX_OFFLOAD_IPV4_CKSUM |
161 				       DEV_RX_OFFLOAD_UDP_CKSUM |
162 				       DEV_RX_OFFLOAD_TCP_CKSUM;
163 
164 	device_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT |
165 				       DEV_TX_OFFLOAD_IPV4_CKSUM |
166 				       DEV_TX_OFFLOAD_UDP_CKSUM |
167 				       DEV_TX_OFFLOAD_TCP_CKSUM |
168 				       DEV_TX_OFFLOAD_TCP_TSO;
169 
170 	device_info->reta_size = pi->rss_size;
171 
172 	device_info->rx_desc_lim = cxgbe_desc_lim;
173 	device_info->tx_desc_lim = cxgbe_desc_lim;
174 	device_info->speed_capa = ETH_LINK_SPEED_10G | ETH_LINK_SPEED_40G;
175 }
176 
177 static void cxgbe_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
178 {
179 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
180 	struct adapter *adapter = pi->adapter;
181 
182 	t4_set_rxmode(adapter, adapter->mbox, pi->viid, -1,
183 		      1, -1, 1, -1, false);
184 }
185 
186 static void cxgbe_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
187 {
188 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
189 	struct adapter *adapter = pi->adapter;
190 
191 	t4_set_rxmode(adapter, adapter->mbox, pi->viid, -1,
192 		      0, -1, 1, -1, false);
193 }
194 
195 static void cxgbe_dev_allmulticast_enable(struct rte_eth_dev *eth_dev)
196 {
197 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
198 	struct adapter *adapter = pi->adapter;
199 
200 	/* TODO: address filters ?? */
201 
202 	t4_set_rxmode(adapter, adapter->mbox, pi->viid, -1,
203 		      -1, 1, 1, -1, false);
204 }
205 
206 static void cxgbe_dev_allmulticast_disable(struct rte_eth_dev *eth_dev)
207 {
208 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
209 	struct adapter *adapter = pi->adapter;
210 
211 	/* TODO: address filters ?? */
212 
213 	t4_set_rxmode(adapter, adapter->mbox, pi->viid, -1,
214 		      -1, 0, 1, -1, false);
215 }
216 
217 static int cxgbe_dev_link_update(struct rte_eth_dev *eth_dev,
218 				 __rte_unused int wait_to_complete)
219 {
220 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
221 	struct adapter *adapter = pi->adapter;
222 	struct sge *s = &adapter->sge;
223 	struct rte_eth_link *old_link = &eth_dev->data->dev_link;
224 	unsigned int work_done, budget = 4;
225 
226 	cxgbe_poll(&s->fw_evtq, NULL, budget, &work_done);
227 	if (old_link->link_status == pi->link_cfg.link_ok)
228 		return -1;  /* link not changed */
229 
230 	eth_dev->data->dev_link.link_status = pi->link_cfg.link_ok;
231 	eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
232 	eth_dev->data->dev_link.link_speed = pi->link_cfg.speed;
233 
234 	/* link has changed */
235 	return 0;
236 }
237 
238 static int cxgbe_dev_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu)
239 {
240 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
241 	struct adapter *adapter = pi->adapter;
242 	struct rte_eth_dev_info dev_info;
243 	int err;
244 	uint16_t new_mtu = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
245 
246 	cxgbe_dev_info_get(eth_dev, &dev_info);
247 
248 	/* Must accommodate at least ETHER_MIN_MTU */
249 	if ((new_mtu < ETHER_MIN_MTU) || (new_mtu > dev_info.max_rx_pktlen))
250 		return -EINVAL;
251 
252 	/* set to jumbo mode if needed */
253 	if (new_mtu > ETHER_MAX_LEN)
254 		eth_dev->data->dev_conf.rxmode.jumbo_frame = 1;
255 	else
256 		eth_dev->data->dev_conf.rxmode.jumbo_frame = 0;
257 
258 	err = t4_set_rxmode(adapter, adapter->mbox, pi->viid, new_mtu, -1, -1,
259 			    -1, -1, true);
260 	if (!err)
261 		eth_dev->data->dev_conf.rxmode.max_rx_pkt_len = new_mtu;
262 
263 	return err;
264 }
265 
266 static int cxgbe_dev_tx_queue_start(struct rte_eth_dev *eth_dev,
267 				    uint16_t tx_queue_id);
268 static int cxgbe_dev_rx_queue_start(struct rte_eth_dev *eth_dev,
269 				    uint16_t tx_queue_id);
270 static void cxgbe_dev_tx_queue_release(void *q);
271 static void cxgbe_dev_rx_queue_release(void *q);
272 
273 /*
274  * Stop device.
275  */
276 static void cxgbe_dev_close(struct rte_eth_dev *eth_dev)
277 {
278 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
279 	struct adapter *adapter = pi->adapter;
280 	int i, dev_down = 0;
281 
282 	CXGBE_FUNC_TRACE();
283 
284 	if (!(adapter->flags & FULL_INIT_DONE))
285 		return;
286 
287 	cxgbe_down(pi);
288 
289 	/*
290 	 *  We clear queues only if both tx and rx path of the port
291 	 *  have been disabled
292 	 */
293 	t4_sge_eth_clear_queues(pi);
294 
295 	/*  See if all ports are down */
296 	for_each_port(adapter, i) {
297 		pi = adap2pinfo(adapter, i);
298 		/*
299 		 * Skip first port of the adapter since it will be closed
300 		 * by DPDK
301 		 */
302 		if (i == 0)
303 			continue;
304 		dev_down += (pi->eth_dev->data->dev_started == 0) ? 1 : 0;
305 	}
306 
307 	/* If rest of the ports are stopped, then free up resources */
308 	if (dev_down == (adapter->params.nports - 1))
309 		cxgbe_close(adapter);
310 }
311 
312 /* Start the device.
313  * It returns 0 on success.
314  */
315 static int cxgbe_dev_start(struct rte_eth_dev *eth_dev)
316 {
317 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
318 	struct adapter *adapter = pi->adapter;
319 	int err = 0, i;
320 
321 	CXGBE_FUNC_TRACE();
322 
323 	/*
324 	 * If we don't have a connection to the firmware there's nothing we
325 	 * can do.
326 	 */
327 	if (!(adapter->flags & FW_OK)) {
328 		err = -ENXIO;
329 		goto out;
330 	}
331 
332 	if (!(adapter->flags & FULL_INIT_DONE)) {
333 		err = cxgbe_up(adapter);
334 		if (err < 0)
335 			goto out;
336 	}
337 
338 	err = setup_rss(pi);
339 	if (err)
340 		goto out;
341 
342 	for (i = 0; i < pi->n_tx_qsets; i++) {
343 		err = cxgbe_dev_tx_queue_start(eth_dev, i);
344 		if (err)
345 			goto out;
346 	}
347 
348 	for (i = 0; i < pi->n_rx_qsets; i++) {
349 		err = cxgbe_dev_rx_queue_start(eth_dev, i);
350 		if (err)
351 			goto out;
352 	}
353 
354 	err = link_start(pi);
355 	if (err)
356 		goto out;
357 
358 out:
359 	return err;
360 }
361 
362 /*
363  * Stop device: disable rx and tx functions to allow for reconfiguring.
364  */
365 static void cxgbe_dev_stop(struct rte_eth_dev *eth_dev)
366 {
367 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
368 	struct adapter *adapter = pi->adapter;
369 
370 	CXGBE_FUNC_TRACE();
371 
372 	if (!(adapter->flags & FULL_INIT_DONE))
373 		return;
374 
375 	cxgbe_down(pi);
376 
377 	/*
378 	 *  We clear queues only if both tx and rx path of the port
379 	 *  have been disabled
380 	 */
381 	t4_sge_eth_clear_queues(pi);
382 }
383 
384 static int cxgbe_dev_configure(struct rte_eth_dev *eth_dev)
385 {
386 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
387 	struct adapter *adapter = pi->adapter;
388 	int err;
389 
390 	CXGBE_FUNC_TRACE();
391 
392 	if (!(adapter->flags & FW_QUEUE_BOUND)) {
393 		err = setup_sge_fwevtq(adapter);
394 		if (err)
395 			return err;
396 		adapter->flags |= FW_QUEUE_BOUND;
397 	}
398 
399 	err = cfg_queue_count(eth_dev);
400 	if (err)
401 		return err;
402 
403 	return 0;
404 }
405 
406 static int cxgbe_dev_tx_queue_start(struct rte_eth_dev *eth_dev,
407 				    uint16_t tx_queue_id)
408 {
409 	int ret;
410 	struct sge_eth_txq *txq = (struct sge_eth_txq *)
411 				  (eth_dev->data->tx_queues[tx_queue_id]);
412 
413 	dev_debug(NULL, "%s: tx_queue_id = %d\n", __func__, tx_queue_id);
414 
415 	ret = t4_sge_eth_txq_start(txq);
416 	if (ret == 0)
417 		eth_dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
418 
419 	return ret;
420 }
421 
422 static int cxgbe_dev_tx_queue_stop(struct rte_eth_dev *eth_dev,
423 				   uint16_t tx_queue_id)
424 {
425 	int ret;
426 	struct sge_eth_txq *txq = (struct sge_eth_txq *)
427 				  (eth_dev->data->tx_queues[tx_queue_id]);
428 
429 	dev_debug(NULL, "%s: tx_queue_id = %d\n", __func__, tx_queue_id);
430 
431 	ret = t4_sge_eth_txq_stop(txq);
432 	if (ret == 0)
433 		eth_dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
434 
435 	return ret;
436 }
437 
438 static int cxgbe_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
439 				    uint16_t queue_idx,	uint16_t nb_desc,
440 				    unsigned int socket_id,
441 				    const struct rte_eth_txconf *tx_conf)
442 {
443 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
444 	struct adapter *adapter = pi->adapter;
445 	struct sge *s = &adapter->sge;
446 	struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset + queue_idx];
447 	int err = 0;
448 	unsigned int temp_nb_desc;
449 
450 	RTE_SET_USED(tx_conf);
451 
452 	dev_debug(adapter, "%s: eth_dev->data->nb_tx_queues = %d; queue_idx = %d; nb_desc = %d; socket_id = %d; pi->first_qset = %u\n",
453 		  __func__, eth_dev->data->nb_tx_queues, queue_idx, nb_desc,
454 		  socket_id, pi->first_qset);
455 
456 	/*  Free up the existing queue  */
457 	if (eth_dev->data->tx_queues[queue_idx]) {
458 		cxgbe_dev_tx_queue_release(eth_dev->data->tx_queues[queue_idx]);
459 		eth_dev->data->tx_queues[queue_idx] = NULL;
460 	}
461 
462 	eth_dev->data->tx_queues[queue_idx] = (void *)txq;
463 
464 	/* Sanity Checking
465 	 *
466 	 * nb_desc should be > 1023 and <= CXGBE_MAX_RING_DESC_SIZE
467 	 */
468 	temp_nb_desc = nb_desc;
469 	if (nb_desc < CXGBE_MIN_RING_DESC_SIZE) {
470 		dev_warn(adapter, "%s: number of descriptors must be >= %d. Using default [%d]\n",
471 			 __func__, CXGBE_MIN_RING_DESC_SIZE,
472 			 CXGBE_DEFAULT_TX_DESC_SIZE);
473 		temp_nb_desc = CXGBE_DEFAULT_TX_DESC_SIZE;
474 	} else if (nb_desc > CXGBE_MAX_RING_DESC_SIZE) {
475 		dev_err(adapter, "%s: number of descriptors must be between %d and %d inclusive. Default [%d]\n",
476 			__func__, CXGBE_MIN_RING_DESC_SIZE,
477 			CXGBE_MAX_RING_DESC_SIZE, CXGBE_DEFAULT_TX_DESC_SIZE);
478 		return -(EINVAL);
479 	}
480 
481 	txq->q.size = temp_nb_desc;
482 
483 	err = t4_sge_alloc_eth_txq(adapter, txq, eth_dev, queue_idx,
484 				   s->fw_evtq.cntxt_id, socket_id);
485 
486 	dev_debug(adapter, "%s: txq->q.cntxt_id= %d err = %d\n",
487 		  __func__, txq->q.cntxt_id, err);
488 
489 	return err;
490 }
491 
492 static void cxgbe_dev_tx_queue_release(void *q)
493 {
494 	struct sge_eth_txq *txq = (struct sge_eth_txq *)q;
495 
496 	if (txq) {
497 		struct port_info *pi = (struct port_info *)
498 				       (txq->eth_dev->data->dev_private);
499 		struct adapter *adap = pi->adapter;
500 
501 		dev_debug(adapter, "%s: pi->port_id = %d; tx_queue_id = %d\n",
502 			  __func__, pi->port_id, txq->q.cntxt_id);
503 
504 		t4_sge_eth_txq_release(adap, txq);
505 	}
506 }
507 
508 static int cxgbe_dev_rx_queue_start(struct rte_eth_dev *eth_dev,
509 				    uint16_t rx_queue_id)
510 {
511 	int ret;
512 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
513 	struct adapter *adap = pi->adapter;
514 	struct sge_rspq *q;
515 
516 	dev_debug(adapter, "%s: pi->port_id = %d; rx_queue_id = %d\n",
517 		  __func__, pi->port_id, rx_queue_id);
518 
519 	q = eth_dev->data->rx_queues[rx_queue_id];
520 
521 	ret = t4_sge_eth_rxq_start(adap, q);
522 	if (ret == 0)
523 		eth_dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
524 
525 	return ret;
526 }
527 
528 static int cxgbe_dev_rx_queue_stop(struct rte_eth_dev *eth_dev,
529 				   uint16_t rx_queue_id)
530 {
531 	int ret;
532 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
533 	struct adapter *adap = pi->adapter;
534 	struct sge_rspq *q;
535 
536 	dev_debug(adapter, "%s: pi->port_id = %d; rx_queue_id = %d\n",
537 		  __func__, pi->port_id, rx_queue_id);
538 
539 	q = eth_dev->data->rx_queues[rx_queue_id];
540 	ret = t4_sge_eth_rxq_stop(adap, q);
541 	if (ret == 0)
542 		eth_dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
543 
544 	return ret;
545 }
546 
547 static int cxgbe_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
548 				    uint16_t queue_idx,	uint16_t nb_desc,
549 				    unsigned int socket_id,
550 				    const struct rte_eth_rxconf *rx_conf,
551 				    struct rte_mempool *mp)
552 {
553 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
554 	struct adapter *adapter = pi->adapter;
555 	struct sge *s = &adapter->sge;
556 	struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset + queue_idx];
557 	int err = 0;
558 	int msi_idx = 0;
559 	unsigned int temp_nb_desc;
560 	struct rte_eth_dev_info dev_info;
561 	unsigned int pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
562 
563 	RTE_SET_USED(rx_conf);
564 
565 	dev_debug(adapter, "%s: eth_dev->data->nb_rx_queues = %d; queue_idx = %d; nb_desc = %d; socket_id = %d; mp = %p\n",
566 		  __func__, eth_dev->data->nb_rx_queues, queue_idx, nb_desc,
567 		  socket_id, mp);
568 
569 	cxgbe_dev_info_get(eth_dev, &dev_info);
570 
571 	/* Must accommodate at least ETHER_MIN_MTU */
572 	if ((pkt_len < dev_info.min_rx_bufsize) ||
573 	    (pkt_len > dev_info.max_rx_pktlen)) {
574 		dev_err(adap, "%s: max pkt len must be > %d and <= %d\n",
575 			__func__, dev_info.min_rx_bufsize,
576 			dev_info.max_rx_pktlen);
577 		return -EINVAL;
578 	}
579 
580 	/*  Free up the existing queue  */
581 	if (eth_dev->data->rx_queues[queue_idx]) {
582 		cxgbe_dev_rx_queue_release(eth_dev->data->rx_queues[queue_idx]);
583 		eth_dev->data->rx_queues[queue_idx] = NULL;
584 	}
585 
586 	eth_dev->data->rx_queues[queue_idx] = (void *)rxq;
587 
588 	/* Sanity Checking
589 	 *
590 	 * nb_desc should be > 0 and <= CXGBE_MAX_RING_DESC_SIZE
591 	 */
592 	temp_nb_desc = nb_desc;
593 	if (nb_desc < CXGBE_MIN_RING_DESC_SIZE) {
594 		dev_warn(adapter, "%s: number of descriptors must be >= %d. Using default [%d]\n",
595 			 __func__, CXGBE_MIN_RING_DESC_SIZE,
596 			 CXGBE_DEFAULT_RX_DESC_SIZE);
597 		temp_nb_desc = CXGBE_DEFAULT_RX_DESC_SIZE;
598 	} else if (nb_desc > CXGBE_MAX_RING_DESC_SIZE) {
599 		dev_err(adapter, "%s: number of descriptors must be between %d and %d inclusive. Default [%d]\n",
600 			__func__, CXGBE_MIN_RING_DESC_SIZE,
601 			CXGBE_MAX_RING_DESC_SIZE, CXGBE_DEFAULT_RX_DESC_SIZE);
602 		return -(EINVAL);
603 	}
604 
605 	rxq->rspq.size = temp_nb_desc;
606 	if ((&rxq->fl) != NULL)
607 		rxq->fl.size = temp_nb_desc;
608 
609 	/* Set to jumbo mode if necessary */
610 	if (pkt_len > ETHER_MAX_LEN)
611 		eth_dev->data->dev_conf.rxmode.jumbo_frame = 1;
612 	else
613 		eth_dev->data->dev_conf.rxmode.jumbo_frame = 0;
614 
615 	err = t4_sge_alloc_rxq(adapter, &rxq->rspq, false, eth_dev, msi_idx,
616 			       &rxq->fl, t4_ethrx_handler,
617 			       t4_get_mps_bg_map(adapter, pi->tx_chan), mp,
618 			       queue_idx, socket_id);
619 
620 	dev_debug(adapter, "%s: err = %d; port_id = %d; cntxt_id = %u\n",
621 		  __func__, err, pi->port_id, rxq->rspq.cntxt_id);
622 	return err;
623 }
624 
625 static void cxgbe_dev_rx_queue_release(void *q)
626 {
627 	struct sge_eth_rxq *rxq = (struct sge_eth_rxq *)q;
628 	struct sge_rspq *rq = &rxq->rspq;
629 
630 	if (rq) {
631 		struct port_info *pi = (struct port_info *)
632 				       (rq->eth_dev->data->dev_private);
633 		struct adapter *adap = pi->adapter;
634 
635 		dev_debug(adapter, "%s: pi->port_id = %d; rx_queue_id = %d\n",
636 			  __func__, pi->port_id, rxq->rspq.cntxt_id);
637 
638 		t4_sge_eth_rxq_release(adap, rxq);
639 	}
640 }
641 
642 /*
643  * Get port statistics.
644  */
645 static void cxgbe_dev_stats_get(struct rte_eth_dev *eth_dev,
646 				struct rte_eth_stats *eth_stats)
647 {
648 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
649 	struct adapter *adapter = pi->adapter;
650 	struct sge *s = &adapter->sge;
651 	struct port_stats ps;
652 	unsigned int i;
653 
654 	cxgbe_stats_get(pi, &ps);
655 
656 	/* RX Stats */
657 	eth_stats->ipackets = ps.rx_frames;
658 	eth_stats->ibytes   = ps.rx_octets;
659 	eth_stats->imissed  = ps.rx_ovflow0 + ps.rx_ovflow1 +
660 			      ps.rx_ovflow2 + ps.rx_ovflow3 +
661 			      ps.rx_trunc0 + ps.rx_trunc1 +
662 			      ps.rx_trunc2 + ps.rx_trunc3;
663 	eth_stats->ierrors  = ps.rx_symbol_err + ps.rx_fcs_err +
664 			      ps.rx_jabber + ps.rx_too_long + ps.rx_runt +
665 			      ps.rx_len_err;
666 
667 	/* TX Stats */
668 	eth_stats->opackets = ps.tx_frames;
669 	eth_stats->obytes   = ps.tx_octets;
670 	eth_stats->oerrors  = ps.tx_error_frames;
671 
672 	for (i = 0; i < pi->n_rx_qsets; i++) {
673 		struct sge_eth_rxq *rxq =
674 			&s->ethrxq[pi->first_qset + i];
675 
676 		eth_stats->q_ipackets[i] = rxq->stats.pkts;
677 		eth_stats->q_ibytes[i] = rxq->stats.rx_bytes;
678 	}
679 
680 	for (i = 0; i < pi->n_tx_qsets; i++) {
681 		struct sge_eth_txq *txq =
682 			&s->ethtxq[pi->first_qset + i];
683 
684 		eth_stats->q_opackets[i] = txq->stats.pkts;
685 		eth_stats->q_obytes[i] = txq->stats.tx_bytes;
686 		eth_stats->q_errors[i] = txq->stats.mapping_err;
687 	}
688 }
689 
690 /*
691  * Reset port statistics.
692  */
693 static void cxgbe_dev_stats_reset(struct rte_eth_dev *eth_dev)
694 {
695 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
696 	struct adapter *adapter = pi->adapter;
697 	struct sge *s = &adapter->sge;
698 	unsigned int i;
699 
700 	cxgbe_stats_reset(pi);
701 	for (i = 0; i < pi->n_rx_qsets; i++) {
702 		struct sge_eth_rxq *rxq =
703 			&s->ethrxq[pi->first_qset + i];
704 
705 		rxq->stats.pkts = 0;
706 		rxq->stats.rx_bytes = 0;
707 	}
708 	for (i = 0; i < pi->n_tx_qsets; i++) {
709 		struct sge_eth_txq *txq =
710 			&s->ethtxq[pi->first_qset + i];
711 
712 		txq->stats.pkts = 0;
713 		txq->stats.tx_bytes = 0;
714 		txq->stats.mapping_err = 0;
715 	}
716 }
717 
718 static int cxgbe_flow_ctrl_get(struct rte_eth_dev *eth_dev,
719 			       struct rte_eth_fc_conf *fc_conf)
720 {
721 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
722 	struct link_config *lc = &pi->link_cfg;
723 	int rx_pause, tx_pause;
724 
725 	fc_conf->autoneg = lc->fc & PAUSE_AUTONEG;
726 	rx_pause = lc->fc & PAUSE_RX;
727 	tx_pause = lc->fc & PAUSE_TX;
728 
729 	if (rx_pause && tx_pause)
730 		fc_conf->mode = RTE_FC_FULL;
731 	else if (rx_pause)
732 		fc_conf->mode = RTE_FC_RX_PAUSE;
733 	else if (tx_pause)
734 		fc_conf->mode = RTE_FC_TX_PAUSE;
735 	else
736 		fc_conf->mode = RTE_FC_NONE;
737 	return 0;
738 }
739 
740 static int cxgbe_flow_ctrl_set(struct rte_eth_dev *eth_dev,
741 			       struct rte_eth_fc_conf *fc_conf)
742 {
743 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
744 	struct adapter *adapter = pi->adapter;
745 	struct link_config *lc = &pi->link_cfg;
746 
747 	if (lc->supported & FW_PORT_CAP_ANEG) {
748 		if (fc_conf->autoneg)
749 			lc->requested_fc |= PAUSE_AUTONEG;
750 		else
751 			lc->requested_fc &= ~PAUSE_AUTONEG;
752 	}
753 
754 	if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
755 	    (fc_conf->mode & RTE_FC_RX_PAUSE))
756 		lc->requested_fc |= PAUSE_RX;
757 	else
758 		lc->requested_fc &= ~PAUSE_RX;
759 
760 	if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
761 	    (fc_conf->mode & RTE_FC_TX_PAUSE))
762 		lc->requested_fc |= PAUSE_TX;
763 	else
764 		lc->requested_fc &= ~PAUSE_TX;
765 
766 	return t4_link_l1cfg(adapter, adapter->mbox, pi->tx_chan,
767 			     &pi->link_cfg);
768 }
769 
770 static const uint32_t *
771 cxgbe_dev_supported_ptypes_get(struct rte_eth_dev *eth_dev)
772 {
773 	static const uint32_t ptypes[] = {
774 		RTE_PTYPE_L3_IPV4,
775 		RTE_PTYPE_L3_IPV6,
776 		RTE_PTYPE_UNKNOWN
777 	};
778 
779 	if (eth_dev->rx_pkt_burst == cxgbe_recv_pkts)
780 		return ptypes;
781 	return NULL;
782 }
783 
784 static int cxgbe_get_eeprom_length(struct rte_eth_dev *dev)
785 {
786 	RTE_SET_USED(dev);
787 	return EEPROMSIZE;
788 }
789 
790 /**
791  * eeprom_ptov - translate a physical EEPROM address to virtual
792  * @phys_addr: the physical EEPROM address
793  * @fn: the PCI function number
794  * @sz: size of function-specific area
795  *
796  * Translate a physical EEPROM address to virtual.  The first 1K is
797  * accessed through virtual addresses starting at 31K, the rest is
798  * accessed through virtual addresses starting at 0.
799  *
800  * The mapping is as follows:
801  * [0..1K) -> [31K..32K)
802  * [1K..1K+A) -> [31K-A..31K)
803  * [1K+A..ES) -> [0..ES-A-1K)
804  *
805  * where A = @fn * @sz, and ES = EEPROM size.
806  */
807 static int eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz)
808 {
809 	fn *= sz;
810 	if (phys_addr < 1024)
811 		return phys_addr + (31 << 10);
812 	if (phys_addr < 1024 + fn)
813 		return fn + phys_addr - 1024;
814 	if (phys_addr < EEPROMSIZE)
815 		return phys_addr - 1024 - fn;
816 	if (phys_addr < EEPROMVSIZE)
817 		return phys_addr - 1024;
818 	return -EINVAL;
819 }
820 
821 /* The next two routines implement eeprom read/write from physical addresses.
822  */
823 static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v)
824 {
825 	int vaddr = eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE);
826 
827 	if (vaddr >= 0)
828 		vaddr = t4_seeprom_read(adap, vaddr, v);
829 	return vaddr < 0 ? vaddr : 0;
830 }
831 
832 static int eeprom_wr_phys(struct adapter *adap, unsigned int phys_addr, u32 v)
833 {
834 	int vaddr = eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE);
835 
836 	if (vaddr >= 0)
837 		vaddr = t4_seeprom_write(adap, vaddr, v);
838 	return vaddr < 0 ? vaddr : 0;
839 }
840 
841 #define EEPROM_MAGIC 0x38E2F10C
842 
843 static int cxgbe_get_eeprom(struct rte_eth_dev *dev,
844 			    struct rte_dev_eeprom_info *e)
845 {
846 	struct port_info *pi = (struct port_info *)(dev->data->dev_private);
847 	struct adapter *adapter = pi->adapter;
848 	u32 i, err = 0;
849 	u8 *buf = rte_zmalloc(NULL, EEPROMSIZE, 0);
850 
851 	if (!buf)
852 		return -ENOMEM;
853 
854 	e->magic = EEPROM_MAGIC;
855 	for (i = e->offset & ~3; !err && i < e->offset + e->length; i += 4)
856 		err = eeprom_rd_phys(adapter, i, (u32 *)&buf[i]);
857 
858 	if (!err)
859 		rte_memcpy(e->data, buf + e->offset, e->length);
860 	rte_free(buf);
861 	return err;
862 }
863 
864 static int cxgbe_set_eeprom(struct rte_eth_dev *dev,
865 			    struct rte_dev_eeprom_info *eeprom)
866 {
867 	struct port_info *pi = (struct port_info *)(dev->data->dev_private);
868 	struct adapter *adapter = pi->adapter;
869 	u8 *buf;
870 	int err = 0;
871 	u32 aligned_offset, aligned_len, *p;
872 
873 	if (eeprom->magic != EEPROM_MAGIC)
874 		return -EINVAL;
875 
876 	aligned_offset = eeprom->offset & ~3;
877 	aligned_len = (eeprom->length + (eeprom->offset & 3) + 3) & ~3;
878 
879 	if (adapter->pf > 0) {
880 		u32 start = 1024 + adapter->pf * EEPROMPFSIZE;
881 
882 		if (aligned_offset < start ||
883 		    aligned_offset + aligned_len > start + EEPROMPFSIZE)
884 			return -EPERM;
885 	}
886 
887 	if (aligned_offset != eeprom->offset || aligned_len != eeprom->length) {
888 		/* RMW possibly needed for first or last words.
889 		 */
890 		buf = rte_zmalloc(NULL, aligned_len, 0);
891 		if (!buf)
892 			return -ENOMEM;
893 		err = eeprom_rd_phys(adapter, aligned_offset, (u32 *)buf);
894 		if (!err && aligned_len > 4)
895 			err = eeprom_rd_phys(adapter,
896 					     aligned_offset + aligned_len - 4,
897 					     (u32 *)&buf[aligned_len - 4]);
898 		if (err)
899 			goto out;
900 		rte_memcpy(buf + (eeprom->offset & 3), eeprom->data,
901 			   eeprom->length);
902 	} else {
903 		buf = eeprom->data;
904 	}
905 
906 	err = t4_seeprom_wp(adapter, false);
907 	if (err)
908 		goto out;
909 
910 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
911 		err = eeprom_wr_phys(adapter, aligned_offset, *p);
912 		aligned_offset += 4;
913 	}
914 
915 	if (!err)
916 		err = t4_seeprom_wp(adapter, true);
917 out:
918 	if (buf != eeprom->data)
919 		rte_free(buf);
920 	return err;
921 }
922 
923 static int cxgbe_get_regs_len(struct rte_eth_dev *eth_dev)
924 {
925 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
926 	struct adapter *adapter = pi->adapter;
927 
928 	return t4_get_regs_len(adapter) / sizeof(uint32_t);
929 }
930 
931 static int cxgbe_get_regs(struct rte_eth_dev *eth_dev,
932 			  struct rte_dev_reg_info *regs)
933 {
934 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
935 	struct adapter *adapter = pi->adapter;
936 
937 	regs->version = CHELSIO_CHIP_VERSION(adapter->params.chip) |
938 		(CHELSIO_CHIP_RELEASE(adapter->params.chip) << 10) |
939 		(1 << 16);
940 
941 	if (regs->data == NULL) {
942 		regs->length = cxgbe_get_regs_len(eth_dev);
943 		regs->width = sizeof(uint32_t);
944 
945 		return 0;
946 	}
947 
948 	t4_get_regs(adapter, regs->data, (regs->length * sizeof(uint32_t)));
949 
950 	return 0;
951 }
952 
953 static const struct eth_dev_ops cxgbe_eth_dev_ops = {
954 	.dev_start		= cxgbe_dev_start,
955 	.dev_stop		= cxgbe_dev_stop,
956 	.dev_close		= cxgbe_dev_close,
957 	.promiscuous_enable	= cxgbe_dev_promiscuous_enable,
958 	.promiscuous_disable	= cxgbe_dev_promiscuous_disable,
959 	.allmulticast_enable	= cxgbe_dev_allmulticast_enable,
960 	.allmulticast_disable	= cxgbe_dev_allmulticast_disable,
961 	.dev_configure		= cxgbe_dev_configure,
962 	.dev_infos_get		= cxgbe_dev_info_get,
963 	.dev_supported_ptypes_get = cxgbe_dev_supported_ptypes_get,
964 	.link_update		= cxgbe_dev_link_update,
965 	.mtu_set		= cxgbe_dev_mtu_set,
966 	.tx_queue_setup         = cxgbe_dev_tx_queue_setup,
967 	.tx_queue_start		= cxgbe_dev_tx_queue_start,
968 	.tx_queue_stop		= cxgbe_dev_tx_queue_stop,
969 	.tx_queue_release	= cxgbe_dev_tx_queue_release,
970 	.rx_queue_setup         = cxgbe_dev_rx_queue_setup,
971 	.rx_queue_start		= cxgbe_dev_rx_queue_start,
972 	.rx_queue_stop		= cxgbe_dev_rx_queue_stop,
973 	.rx_queue_release	= cxgbe_dev_rx_queue_release,
974 	.stats_get		= cxgbe_dev_stats_get,
975 	.stats_reset		= cxgbe_dev_stats_reset,
976 	.flow_ctrl_get		= cxgbe_flow_ctrl_get,
977 	.flow_ctrl_set		= cxgbe_flow_ctrl_set,
978 	.get_eeprom_length	= cxgbe_get_eeprom_length,
979 	.get_eeprom		= cxgbe_get_eeprom,
980 	.set_eeprom		= cxgbe_set_eeprom,
981 	.get_reg		= cxgbe_get_regs,
982 };
983 
984 /*
985  * Initialize driver
986  * It returns 0 on success.
987  */
988 static int eth_cxgbe_dev_init(struct rte_eth_dev *eth_dev)
989 {
990 	struct rte_pci_device *pci_dev;
991 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
992 	struct adapter *adapter = NULL;
993 	char name[RTE_ETH_NAME_MAX_LEN];
994 	int err = 0;
995 
996 	CXGBE_FUNC_TRACE();
997 
998 	eth_dev->dev_ops = &cxgbe_eth_dev_ops;
999 	eth_dev->rx_pkt_burst = &cxgbe_recv_pkts;
1000 	eth_dev->tx_pkt_burst = &cxgbe_xmit_pkts;
1001 
1002 	/* for secondary processes, we don't initialise any further as primary
1003 	 * has already done this work.
1004 	 */
1005 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1006 		return 0;
1007 
1008 	pci_dev = eth_dev->pci_dev;
1009 
1010 	snprintf(name, sizeof(name), "cxgbeadapter%d", eth_dev->data->port_id);
1011 	adapter = rte_zmalloc(name, sizeof(*adapter), 0);
1012 	if (!adapter)
1013 		return -1;
1014 
1015 	adapter->use_unpacked_mode = 1;
1016 	adapter->regs = (void *)pci_dev->mem_resource[0].addr;
1017 	if (!adapter->regs) {
1018 		dev_err(adapter, "%s: cannot map device registers\n", __func__);
1019 		err = -ENOMEM;
1020 		goto out_free_adapter;
1021 	}
1022 	adapter->pdev = pci_dev;
1023 	adapter->eth_dev = eth_dev;
1024 	pi->adapter = adapter;
1025 
1026 	err = cxgbe_probe(adapter);
1027 	if (err) {
1028 		dev_err(adapter, "%s: cxgbe probe failed with err %d\n",
1029 			__func__, err);
1030 		goto out_free_adapter;
1031 	}
1032 
1033 	return 0;
1034 
1035 out_free_adapter:
1036 	rte_free(adapter);
1037 	return err;
1038 }
1039 
1040 static struct eth_driver rte_cxgbe_pmd = {
1041 	.pci_drv = {
1042 		.id_table = cxgb4_pci_tbl,
1043 		.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
1044 		.probe = rte_eth_dev_pci_probe,
1045 		.remove = rte_eth_dev_pci_remove,
1046 	},
1047 	.eth_dev_init = eth_cxgbe_dev_init,
1048 	.dev_private_size = sizeof(struct port_info),
1049 };
1050 
1051 RTE_PMD_REGISTER_PCI(net_cxgbe, rte_cxgbe_pmd.pci_drv);
1052 RTE_PMD_REGISTER_PCI_TABLE(net_cxgbe, cxgb4_pci_tbl);
1053 RTE_PMD_REGISTER_KMOD_DEP(net_cxgbe, "* igb_uio | uio_pci_generic | vfio");
1054