xref: /dpdk/drivers/net/cxgbe/cxgbe_ethdev.c (revision 0857b942113874c69dc3db5df11a828ee3cc9b6b)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2014-2016 Chelsio Communications.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Chelsio Communications nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/queue.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <stdint.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <stdarg.h>
41 #include <inttypes.h>
42 #include <netinet/in.h>
43 
44 #include <rte_byteorder.h>
45 #include <rte_common.h>
46 #include <rte_cycles.h>
47 #include <rte_interrupts.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_pci.h>
51 #include <rte_atomic.h>
52 #include <rte_branch_prediction.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_tailq.h>
56 #include <rte_eal.h>
57 #include <rte_alarm.h>
58 #include <rte_ether.h>
59 #include <rte_ethdev.h>
60 #include <rte_atomic.h>
61 #include <rte_malloc.h>
62 #include <rte_random.h>
63 #include <rte_dev.h>
64 
65 #include "cxgbe.h"
66 
67 /*
68  * Macros needed to support the PCI Device ID Table ...
69  */
70 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
71 	static const struct rte_pci_id cxgb4_pci_tbl[] = {
72 #define CH_PCI_DEVICE_ID_FUNCTION 0x4
73 
74 #define PCI_VENDOR_ID_CHELSIO 0x1425
75 
76 #define CH_PCI_ID_TABLE_ENTRY(devid) \
77 		{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CHELSIO, (devid)) }
78 
79 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \
80 		{ .vendor_id = 0, } \
81 	}
82 
83 /*
84  *... and the PCI ID Table itself ...
85  */
86 #include "t4_pci_id_tbl.h"
87 
88 static uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
89 				uint16_t nb_pkts)
90 {
91 	struct sge_eth_txq *txq = (struct sge_eth_txq *)tx_queue;
92 	uint16_t pkts_sent, pkts_remain;
93 	uint16_t total_sent = 0;
94 	int ret = 0;
95 
96 	CXGBE_DEBUG_TX(adapter, "%s: txq = %p; tx_pkts = %p; nb_pkts = %d\n",
97 		       __func__, txq, tx_pkts, nb_pkts);
98 
99 	t4_os_lock(&txq->txq_lock);
100 	/* free up desc from already completed tx */
101 	reclaim_completed_tx(&txq->q);
102 	while (total_sent < nb_pkts) {
103 		pkts_remain = nb_pkts - total_sent;
104 
105 		for (pkts_sent = 0; pkts_sent < pkts_remain; pkts_sent++) {
106 			ret = t4_eth_xmit(txq, tx_pkts[total_sent + pkts_sent]);
107 			if (ret < 0)
108 				break;
109 		}
110 		if (!pkts_sent)
111 			break;
112 		total_sent += pkts_sent;
113 		/* reclaim as much as possible */
114 		reclaim_completed_tx(&txq->q);
115 	}
116 
117 	t4_os_unlock(&txq->txq_lock);
118 	return total_sent;
119 }
120 
121 static uint16_t cxgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
122 				uint16_t nb_pkts)
123 {
124 	struct sge_eth_rxq *rxq = (struct sge_eth_rxq *)rx_queue;
125 	unsigned int work_done;
126 
127 	CXGBE_DEBUG_RX(adapter, "%s: rxq->rspq.cntxt_id = %u; nb_pkts = %d\n",
128 		       __func__, rxq->rspq.cntxt_id, nb_pkts);
129 
130 	if (cxgbe_poll(&rxq->rspq, rx_pkts, (unsigned int)nb_pkts, &work_done))
131 		dev_err(adapter, "error in cxgbe poll\n");
132 
133 	CXGBE_DEBUG_RX(adapter, "%s: work_done = %u\n", __func__, work_done);
134 	return work_done;
135 }
136 
137 static void cxgbe_dev_info_get(struct rte_eth_dev *eth_dev,
138 			       struct rte_eth_dev_info *device_info)
139 {
140 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
141 	struct adapter *adapter = pi->adapter;
142 	int max_queues = adapter->sge.max_ethqsets / adapter->params.nports;
143 
144 	static const struct rte_eth_desc_lim cxgbe_desc_lim = {
145 		.nb_max = CXGBE_MAX_RING_DESC_SIZE,
146 		.nb_min = CXGBE_MIN_RING_DESC_SIZE,
147 		.nb_align = 1,
148 	};
149 
150 	device_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
151 
152 	device_info->min_rx_bufsize = CXGBE_MIN_RX_BUFSIZE;
153 	device_info->max_rx_pktlen = CXGBE_MAX_RX_PKTLEN;
154 	device_info->max_rx_queues = max_queues;
155 	device_info->max_tx_queues = max_queues;
156 	device_info->max_mac_addrs = 1;
157 	/* XXX: For now we support one MAC/port */
158 	device_info->max_vfs = adapter->params.arch.vfcount;
159 	device_info->max_vmdq_pools = 0; /* XXX: For now no support for VMDQ */
160 
161 	device_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP |
162 				       DEV_RX_OFFLOAD_IPV4_CKSUM |
163 				       DEV_RX_OFFLOAD_UDP_CKSUM |
164 				       DEV_RX_OFFLOAD_TCP_CKSUM;
165 
166 	device_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT |
167 				       DEV_TX_OFFLOAD_IPV4_CKSUM |
168 				       DEV_TX_OFFLOAD_UDP_CKSUM |
169 				       DEV_TX_OFFLOAD_TCP_CKSUM |
170 				       DEV_TX_OFFLOAD_TCP_TSO;
171 
172 	device_info->reta_size = pi->rss_size;
173 
174 	device_info->rx_desc_lim = cxgbe_desc_lim;
175 	device_info->tx_desc_lim = cxgbe_desc_lim;
176 	device_info->speed_capa = ETH_LINK_SPEED_10G | ETH_LINK_SPEED_40G;
177 }
178 
179 static void cxgbe_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
180 {
181 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
182 	struct adapter *adapter = pi->adapter;
183 
184 	t4_set_rxmode(adapter, adapter->mbox, pi->viid, -1,
185 		      1, -1, 1, -1, false);
186 }
187 
188 static void cxgbe_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
189 {
190 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
191 	struct adapter *adapter = pi->adapter;
192 
193 	t4_set_rxmode(adapter, adapter->mbox, pi->viid, -1,
194 		      0, -1, 1, -1, false);
195 }
196 
197 static void cxgbe_dev_allmulticast_enable(struct rte_eth_dev *eth_dev)
198 {
199 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
200 	struct adapter *adapter = pi->adapter;
201 
202 	/* TODO: address filters ?? */
203 
204 	t4_set_rxmode(adapter, adapter->mbox, pi->viid, -1,
205 		      -1, 1, 1, -1, false);
206 }
207 
208 static void cxgbe_dev_allmulticast_disable(struct rte_eth_dev *eth_dev)
209 {
210 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
211 	struct adapter *adapter = pi->adapter;
212 
213 	/* TODO: address filters ?? */
214 
215 	t4_set_rxmode(adapter, adapter->mbox, pi->viid, -1,
216 		      -1, 0, 1, -1, false);
217 }
218 
219 static int cxgbe_dev_link_update(struct rte_eth_dev *eth_dev,
220 				 __rte_unused int wait_to_complete)
221 {
222 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
223 	struct adapter *adapter = pi->adapter;
224 	struct sge *s = &adapter->sge;
225 	struct rte_eth_link *old_link = &eth_dev->data->dev_link;
226 	unsigned int work_done, budget = 4;
227 
228 	cxgbe_poll(&s->fw_evtq, NULL, budget, &work_done);
229 	if (old_link->link_status == pi->link_cfg.link_ok)
230 		return -1;  /* link not changed */
231 
232 	eth_dev->data->dev_link.link_status = pi->link_cfg.link_ok;
233 	eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
234 	eth_dev->data->dev_link.link_speed = pi->link_cfg.speed;
235 
236 	/* link has changed */
237 	return 0;
238 }
239 
240 static int cxgbe_dev_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu)
241 {
242 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
243 	struct adapter *adapter = pi->adapter;
244 	struct rte_eth_dev_info dev_info;
245 	int err;
246 	uint16_t new_mtu = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
247 
248 	cxgbe_dev_info_get(eth_dev, &dev_info);
249 
250 	/* Must accommodate at least ETHER_MIN_MTU */
251 	if ((new_mtu < ETHER_MIN_MTU) || (new_mtu > dev_info.max_rx_pktlen))
252 		return -EINVAL;
253 
254 	/* set to jumbo mode if needed */
255 	if (new_mtu > ETHER_MAX_LEN)
256 		eth_dev->data->dev_conf.rxmode.jumbo_frame = 1;
257 	else
258 		eth_dev->data->dev_conf.rxmode.jumbo_frame = 0;
259 
260 	err = t4_set_rxmode(adapter, adapter->mbox, pi->viid, new_mtu, -1, -1,
261 			    -1, -1, true);
262 	if (!err)
263 		eth_dev->data->dev_conf.rxmode.max_rx_pkt_len = new_mtu;
264 
265 	return err;
266 }
267 
268 static int cxgbe_dev_tx_queue_start(struct rte_eth_dev *eth_dev,
269 				    uint16_t tx_queue_id);
270 static int cxgbe_dev_rx_queue_start(struct rte_eth_dev *eth_dev,
271 				    uint16_t tx_queue_id);
272 static void cxgbe_dev_tx_queue_release(void *q);
273 static void cxgbe_dev_rx_queue_release(void *q);
274 
275 /*
276  * Stop device.
277  */
278 static void cxgbe_dev_close(struct rte_eth_dev *eth_dev)
279 {
280 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
281 	struct adapter *adapter = pi->adapter;
282 	int i, dev_down = 0;
283 
284 	CXGBE_FUNC_TRACE();
285 
286 	if (!(adapter->flags & FULL_INIT_DONE))
287 		return;
288 
289 	cxgbe_down(pi);
290 
291 	/*
292 	 *  We clear queues only if both tx and rx path of the port
293 	 *  have been disabled
294 	 */
295 	t4_sge_eth_clear_queues(pi);
296 
297 	/*  See if all ports are down */
298 	for_each_port(adapter, i) {
299 		pi = adap2pinfo(adapter, i);
300 		/*
301 		 * Skip first port of the adapter since it will be closed
302 		 * by DPDK
303 		 */
304 		if (i == 0)
305 			continue;
306 		dev_down += (pi->eth_dev->data->dev_started == 0) ? 1 : 0;
307 	}
308 
309 	/* If rest of the ports are stopped, then free up resources */
310 	if (dev_down == (adapter->params.nports - 1))
311 		cxgbe_close(adapter);
312 }
313 
314 /* Start the device.
315  * It returns 0 on success.
316  */
317 static int cxgbe_dev_start(struct rte_eth_dev *eth_dev)
318 {
319 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
320 	struct adapter *adapter = pi->adapter;
321 	int err = 0, i;
322 
323 	CXGBE_FUNC_TRACE();
324 
325 	/*
326 	 * If we don't have a connection to the firmware there's nothing we
327 	 * can do.
328 	 */
329 	if (!(adapter->flags & FW_OK)) {
330 		err = -ENXIO;
331 		goto out;
332 	}
333 
334 	if (!(adapter->flags & FULL_INIT_DONE)) {
335 		err = cxgbe_up(adapter);
336 		if (err < 0)
337 			goto out;
338 	}
339 
340 	err = setup_rss(pi);
341 	if (err)
342 		goto out;
343 
344 	for (i = 0; i < pi->n_tx_qsets; i++) {
345 		err = cxgbe_dev_tx_queue_start(eth_dev, i);
346 		if (err)
347 			goto out;
348 	}
349 
350 	for (i = 0; i < pi->n_rx_qsets; i++) {
351 		err = cxgbe_dev_rx_queue_start(eth_dev, i);
352 		if (err)
353 			goto out;
354 	}
355 
356 	err = link_start(pi);
357 	if (err)
358 		goto out;
359 
360 out:
361 	return err;
362 }
363 
364 /*
365  * Stop device: disable rx and tx functions to allow for reconfiguring.
366  */
367 static void cxgbe_dev_stop(struct rte_eth_dev *eth_dev)
368 {
369 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
370 	struct adapter *adapter = pi->adapter;
371 
372 	CXGBE_FUNC_TRACE();
373 
374 	if (!(adapter->flags & FULL_INIT_DONE))
375 		return;
376 
377 	cxgbe_down(pi);
378 
379 	/*
380 	 *  We clear queues only if both tx and rx path of the port
381 	 *  have been disabled
382 	 */
383 	t4_sge_eth_clear_queues(pi);
384 }
385 
386 static int cxgbe_dev_configure(struct rte_eth_dev *eth_dev)
387 {
388 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
389 	struct adapter *adapter = pi->adapter;
390 	int err;
391 
392 	CXGBE_FUNC_TRACE();
393 
394 	if (!(adapter->flags & FW_QUEUE_BOUND)) {
395 		err = setup_sge_fwevtq(adapter);
396 		if (err)
397 			return err;
398 		adapter->flags |= FW_QUEUE_BOUND;
399 	}
400 
401 	err = cfg_queue_count(eth_dev);
402 	if (err)
403 		return err;
404 
405 	return 0;
406 }
407 
408 static int cxgbe_dev_tx_queue_start(struct rte_eth_dev *eth_dev,
409 				    uint16_t tx_queue_id)
410 {
411 	int ret;
412 	struct sge_eth_txq *txq = (struct sge_eth_txq *)
413 				  (eth_dev->data->tx_queues[tx_queue_id]);
414 
415 	dev_debug(NULL, "%s: tx_queue_id = %d\n", __func__, tx_queue_id);
416 
417 	ret = t4_sge_eth_txq_start(txq);
418 	if (ret == 0)
419 		eth_dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
420 
421 	return ret;
422 }
423 
424 static int cxgbe_dev_tx_queue_stop(struct rte_eth_dev *eth_dev,
425 				   uint16_t tx_queue_id)
426 {
427 	int ret;
428 	struct sge_eth_txq *txq = (struct sge_eth_txq *)
429 				  (eth_dev->data->tx_queues[tx_queue_id]);
430 
431 	dev_debug(NULL, "%s: tx_queue_id = %d\n", __func__, tx_queue_id);
432 
433 	ret = t4_sge_eth_txq_stop(txq);
434 	if (ret == 0)
435 		eth_dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
436 
437 	return ret;
438 }
439 
440 static int cxgbe_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
441 				    uint16_t queue_idx,	uint16_t nb_desc,
442 				    unsigned int socket_id,
443 				    const struct rte_eth_txconf *tx_conf)
444 {
445 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
446 	struct adapter *adapter = pi->adapter;
447 	struct sge *s = &adapter->sge;
448 	struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset + queue_idx];
449 	int err = 0;
450 	unsigned int temp_nb_desc;
451 
452 	RTE_SET_USED(tx_conf);
453 
454 	dev_debug(adapter, "%s: eth_dev->data->nb_tx_queues = %d; queue_idx = %d; nb_desc = %d; socket_id = %d; pi->first_qset = %u\n",
455 		  __func__, eth_dev->data->nb_tx_queues, queue_idx, nb_desc,
456 		  socket_id, pi->first_qset);
457 
458 	/*  Free up the existing queue  */
459 	if (eth_dev->data->tx_queues[queue_idx]) {
460 		cxgbe_dev_tx_queue_release(eth_dev->data->tx_queues[queue_idx]);
461 		eth_dev->data->tx_queues[queue_idx] = NULL;
462 	}
463 
464 	eth_dev->data->tx_queues[queue_idx] = (void *)txq;
465 
466 	/* Sanity Checking
467 	 *
468 	 * nb_desc should be > 1023 and <= CXGBE_MAX_RING_DESC_SIZE
469 	 */
470 	temp_nb_desc = nb_desc;
471 	if (nb_desc < CXGBE_MIN_RING_DESC_SIZE) {
472 		dev_warn(adapter, "%s: number of descriptors must be >= %d. Using default [%d]\n",
473 			 __func__, CXGBE_MIN_RING_DESC_SIZE,
474 			 CXGBE_DEFAULT_TX_DESC_SIZE);
475 		temp_nb_desc = CXGBE_DEFAULT_TX_DESC_SIZE;
476 	} else if (nb_desc > CXGBE_MAX_RING_DESC_SIZE) {
477 		dev_err(adapter, "%s: number of descriptors must be between %d and %d inclusive. Default [%d]\n",
478 			__func__, CXGBE_MIN_RING_DESC_SIZE,
479 			CXGBE_MAX_RING_DESC_SIZE, CXGBE_DEFAULT_TX_DESC_SIZE);
480 		return -(EINVAL);
481 	}
482 
483 	txq->q.size = temp_nb_desc;
484 
485 	err = t4_sge_alloc_eth_txq(adapter, txq, eth_dev, queue_idx,
486 				   s->fw_evtq.cntxt_id, socket_id);
487 
488 	dev_debug(adapter, "%s: txq->q.cntxt_id= %d err = %d\n",
489 		  __func__, txq->q.cntxt_id, err);
490 
491 	return err;
492 }
493 
494 static void cxgbe_dev_tx_queue_release(void *q)
495 {
496 	struct sge_eth_txq *txq = (struct sge_eth_txq *)q;
497 
498 	if (txq) {
499 		struct port_info *pi = (struct port_info *)
500 				       (txq->eth_dev->data->dev_private);
501 		struct adapter *adap = pi->adapter;
502 
503 		dev_debug(adapter, "%s: pi->port_id = %d; tx_queue_id = %d\n",
504 			  __func__, pi->port_id, txq->q.cntxt_id);
505 
506 		t4_sge_eth_txq_release(adap, txq);
507 	}
508 }
509 
510 static int cxgbe_dev_rx_queue_start(struct rte_eth_dev *eth_dev,
511 				    uint16_t rx_queue_id)
512 {
513 	int ret;
514 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
515 	struct adapter *adap = pi->adapter;
516 	struct sge_rspq *q;
517 
518 	dev_debug(adapter, "%s: pi->port_id = %d; rx_queue_id = %d\n",
519 		  __func__, pi->port_id, rx_queue_id);
520 
521 	q = eth_dev->data->rx_queues[rx_queue_id];
522 
523 	ret = t4_sge_eth_rxq_start(adap, q);
524 	if (ret == 0)
525 		eth_dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
526 
527 	return ret;
528 }
529 
530 static int cxgbe_dev_rx_queue_stop(struct rte_eth_dev *eth_dev,
531 				   uint16_t rx_queue_id)
532 {
533 	int ret;
534 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
535 	struct adapter *adap = pi->adapter;
536 	struct sge_rspq *q;
537 
538 	dev_debug(adapter, "%s: pi->port_id = %d; rx_queue_id = %d\n",
539 		  __func__, pi->port_id, rx_queue_id);
540 
541 	q = eth_dev->data->rx_queues[rx_queue_id];
542 	ret = t4_sge_eth_rxq_stop(adap, q);
543 	if (ret == 0)
544 		eth_dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
545 
546 	return ret;
547 }
548 
549 static int cxgbe_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
550 				    uint16_t queue_idx,	uint16_t nb_desc,
551 				    unsigned int socket_id,
552 				    const struct rte_eth_rxconf *rx_conf,
553 				    struct rte_mempool *mp)
554 {
555 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
556 	struct adapter *adapter = pi->adapter;
557 	struct sge *s = &adapter->sge;
558 	struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset + queue_idx];
559 	int err = 0;
560 	int msi_idx = 0;
561 	unsigned int temp_nb_desc;
562 	struct rte_eth_dev_info dev_info;
563 	unsigned int pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
564 
565 	RTE_SET_USED(rx_conf);
566 
567 	dev_debug(adapter, "%s: eth_dev->data->nb_rx_queues = %d; queue_idx = %d; nb_desc = %d; socket_id = %d; mp = %p\n",
568 		  __func__, eth_dev->data->nb_rx_queues, queue_idx, nb_desc,
569 		  socket_id, mp);
570 
571 	cxgbe_dev_info_get(eth_dev, &dev_info);
572 
573 	/* Must accommodate at least ETHER_MIN_MTU */
574 	if ((pkt_len < dev_info.min_rx_bufsize) ||
575 	    (pkt_len > dev_info.max_rx_pktlen)) {
576 		dev_err(adap, "%s: max pkt len must be > %d and <= %d\n",
577 			__func__, dev_info.min_rx_bufsize,
578 			dev_info.max_rx_pktlen);
579 		return -EINVAL;
580 	}
581 
582 	/*  Free up the existing queue  */
583 	if (eth_dev->data->rx_queues[queue_idx]) {
584 		cxgbe_dev_rx_queue_release(eth_dev->data->rx_queues[queue_idx]);
585 		eth_dev->data->rx_queues[queue_idx] = NULL;
586 	}
587 
588 	eth_dev->data->rx_queues[queue_idx] = (void *)rxq;
589 
590 	/* Sanity Checking
591 	 *
592 	 * nb_desc should be > 0 and <= CXGBE_MAX_RING_DESC_SIZE
593 	 */
594 	temp_nb_desc = nb_desc;
595 	if (nb_desc < CXGBE_MIN_RING_DESC_SIZE) {
596 		dev_warn(adapter, "%s: number of descriptors must be >= %d. Using default [%d]\n",
597 			 __func__, CXGBE_MIN_RING_DESC_SIZE,
598 			 CXGBE_DEFAULT_RX_DESC_SIZE);
599 		temp_nb_desc = CXGBE_DEFAULT_RX_DESC_SIZE;
600 	} else if (nb_desc > CXGBE_MAX_RING_DESC_SIZE) {
601 		dev_err(adapter, "%s: number of descriptors must be between %d and %d inclusive. Default [%d]\n",
602 			__func__, CXGBE_MIN_RING_DESC_SIZE,
603 			CXGBE_MAX_RING_DESC_SIZE, CXGBE_DEFAULT_RX_DESC_SIZE);
604 		return -(EINVAL);
605 	}
606 
607 	rxq->rspq.size = temp_nb_desc;
608 	if ((&rxq->fl) != NULL)
609 		rxq->fl.size = temp_nb_desc;
610 
611 	/* Set to jumbo mode if necessary */
612 	if (pkt_len > ETHER_MAX_LEN)
613 		eth_dev->data->dev_conf.rxmode.jumbo_frame = 1;
614 	else
615 		eth_dev->data->dev_conf.rxmode.jumbo_frame = 0;
616 
617 	err = t4_sge_alloc_rxq(adapter, &rxq->rspq, false, eth_dev, msi_idx,
618 			       &rxq->fl, t4_ethrx_handler,
619 			       t4_get_mps_bg_map(adapter, pi->tx_chan), mp,
620 			       queue_idx, socket_id);
621 
622 	dev_debug(adapter, "%s: err = %d; port_id = %d; cntxt_id = %u\n",
623 		  __func__, err, pi->port_id, rxq->rspq.cntxt_id);
624 	return err;
625 }
626 
627 static void cxgbe_dev_rx_queue_release(void *q)
628 {
629 	struct sge_eth_rxq *rxq = (struct sge_eth_rxq *)q;
630 	struct sge_rspq *rq = &rxq->rspq;
631 
632 	if (rq) {
633 		struct port_info *pi = (struct port_info *)
634 				       (rq->eth_dev->data->dev_private);
635 		struct adapter *adap = pi->adapter;
636 
637 		dev_debug(adapter, "%s: pi->port_id = %d; rx_queue_id = %d\n",
638 			  __func__, pi->port_id, rxq->rspq.cntxt_id);
639 
640 		t4_sge_eth_rxq_release(adap, rxq);
641 	}
642 }
643 
644 /*
645  * Get port statistics.
646  */
647 static void cxgbe_dev_stats_get(struct rte_eth_dev *eth_dev,
648 				struct rte_eth_stats *eth_stats)
649 {
650 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
651 	struct adapter *adapter = pi->adapter;
652 	struct sge *s = &adapter->sge;
653 	struct port_stats ps;
654 	unsigned int i;
655 
656 	cxgbe_stats_get(pi, &ps);
657 
658 	/* RX Stats */
659 	eth_stats->ipackets = ps.rx_frames;
660 	eth_stats->ibytes   = ps.rx_octets;
661 	eth_stats->imissed  = ps.rx_ovflow0 + ps.rx_ovflow1 +
662 			      ps.rx_ovflow2 + ps.rx_ovflow3 +
663 			      ps.rx_trunc0 + ps.rx_trunc1 +
664 			      ps.rx_trunc2 + ps.rx_trunc3;
665 	eth_stats->ierrors  = ps.rx_symbol_err + ps.rx_fcs_err +
666 			      ps.rx_jabber + ps.rx_too_long + ps.rx_runt +
667 			      ps.rx_len_err;
668 
669 	/* TX Stats */
670 	eth_stats->opackets = ps.tx_frames;
671 	eth_stats->obytes   = ps.tx_octets;
672 	eth_stats->oerrors  = ps.tx_error_frames;
673 
674 	for (i = 0; i < pi->n_rx_qsets; i++) {
675 		struct sge_eth_rxq *rxq =
676 			&s->ethrxq[pi->first_qset + i];
677 
678 		eth_stats->q_ipackets[i] = rxq->stats.pkts;
679 		eth_stats->q_ibytes[i] = rxq->stats.rx_bytes;
680 	}
681 
682 	for (i = 0; i < pi->n_tx_qsets; i++) {
683 		struct sge_eth_txq *txq =
684 			&s->ethtxq[pi->first_qset + i];
685 
686 		eth_stats->q_opackets[i] = txq->stats.pkts;
687 		eth_stats->q_obytes[i] = txq->stats.tx_bytes;
688 		eth_stats->q_errors[i] = txq->stats.mapping_err;
689 	}
690 }
691 
692 /*
693  * Reset port statistics.
694  */
695 static void cxgbe_dev_stats_reset(struct rte_eth_dev *eth_dev)
696 {
697 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
698 	struct adapter *adapter = pi->adapter;
699 	struct sge *s = &adapter->sge;
700 	unsigned int i;
701 
702 	cxgbe_stats_reset(pi);
703 	for (i = 0; i < pi->n_rx_qsets; i++) {
704 		struct sge_eth_rxq *rxq =
705 			&s->ethrxq[pi->first_qset + i];
706 
707 		rxq->stats.pkts = 0;
708 		rxq->stats.rx_bytes = 0;
709 	}
710 	for (i = 0; i < pi->n_tx_qsets; i++) {
711 		struct sge_eth_txq *txq =
712 			&s->ethtxq[pi->first_qset + i];
713 
714 		txq->stats.pkts = 0;
715 		txq->stats.tx_bytes = 0;
716 		txq->stats.mapping_err = 0;
717 	}
718 }
719 
720 static int cxgbe_flow_ctrl_get(struct rte_eth_dev *eth_dev,
721 			       struct rte_eth_fc_conf *fc_conf)
722 {
723 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
724 	struct link_config *lc = &pi->link_cfg;
725 	int rx_pause, tx_pause;
726 
727 	fc_conf->autoneg = lc->fc & PAUSE_AUTONEG;
728 	rx_pause = lc->fc & PAUSE_RX;
729 	tx_pause = lc->fc & PAUSE_TX;
730 
731 	if (rx_pause && tx_pause)
732 		fc_conf->mode = RTE_FC_FULL;
733 	else if (rx_pause)
734 		fc_conf->mode = RTE_FC_RX_PAUSE;
735 	else if (tx_pause)
736 		fc_conf->mode = RTE_FC_TX_PAUSE;
737 	else
738 		fc_conf->mode = RTE_FC_NONE;
739 	return 0;
740 }
741 
742 static int cxgbe_flow_ctrl_set(struct rte_eth_dev *eth_dev,
743 			       struct rte_eth_fc_conf *fc_conf)
744 {
745 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
746 	struct adapter *adapter = pi->adapter;
747 	struct link_config *lc = &pi->link_cfg;
748 
749 	if (lc->supported & FW_PORT_CAP_ANEG) {
750 		if (fc_conf->autoneg)
751 			lc->requested_fc |= PAUSE_AUTONEG;
752 		else
753 			lc->requested_fc &= ~PAUSE_AUTONEG;
754 	}
755 
756 	if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
757 	    (fc_conf->mode & RTE_FC_RX_PAUSE))
758 		lc->requested_fc |= PAUSE_RX;
759 	else
760 		lc->requested_fc &= ~PAUSE_RX;
761 
762 	if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
763 	    (fc_conf->mode & RTE_FC_TX_PAUSE))
764 		lc->requested_fc |= PAUSE_TX;
765 	else
766 		lc->requested_fc &= ~PAUSE_TX;
767 
768 	return t4_link_l1cfg(adapter, adapter->mbox, pi->tx_chan,
769 			     &pi->link_cfg);
770 }
771 
772 static const uint32_t *
773 cxgbe_dev_supported_ptypes_get(struct rte_eth_dev *eth_dev)
774 {
775 	static const uint32_t ptypes[] = {
776 		RTE_PTYPE_L3_IPV4,
777 		RTE_PTYPE_L3_IPV6,
778 		RTE_PTYPE_UNKNOWN
779 	};
780 
781 	if (eth_dev->rx_pkt_burst == cxgbe_recv_pkts)
782 		return ptypes;
783 	return NULL;
784 }
785 
786 static int cxgbe_get_eeprom_length(struct rte_eth_dev *dev)
787 {
788 	RTE_SET_USED(dev);
789 	return EEPROMSIZE;
790 }
791 
792 /**
793  * eeprom_ptov - translate a physical EEPROM address to virtual
794  * @phys_addr: the physical EEPROM address
795  * @fn: the PCI function number
796  * @sz: size of function-specific area
797  *
798  * Translate a physical EEPROM address to virtual.  The first 1K is
799  * accessed through virtual addresses starting at 31K, the rest is
800  * accessed through virtual addresses starting at 0.
801  *
802  * The mapping is as follows:
803  * [0..1K) -> [31K..32K)
804  * [1K..1K+A) -> [31K-A..31K)
805  * [1K+A..ES) -> [0..ES-A-1K)
806  *
807  * where A = @fn * @sz, and ES = EEPROM size.
808  */
809 static int eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz)
810 {
811 	fn *= sz;
812 	if (phys_addr < 1024)
813 		return phys_addr + (31 << 10);
814 	if (phys_addr < 1024 + fn)
815 		return fn + phys_addr - 1024;
816 	if (phys_addr < EEPROMSIZE)
817 		return phys_addr - 1024 - fn;
818 	if (phys_addr < EEPROMVSIZE)
819 		return phys_addr - 1024;
820 	return -EINVAL;
821 }
822 
823 /* The next two routines implement eeprom read/write from physical addresses.
824  */
825 static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v)
826 {
827 	int vaddr = eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE);
828 
829 	if (vaddr >= 0)
830 		vaddr = t4_seeprom_read(adap, vaddr, v);
831 	return vaddr < 0 ? vaddr : 0;
832 }
833 
834 static int eeprom_wr_phys(struct adapter *adap, unsigned int phys_addr, u32 v)
835 {
836 	int vaddr = eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE);
837 
838 	if (vaddr >= 0)
839 		vaddr = t4_seeprom_write(adap, vaddr, v);
840 	return vaddr < 0 ? vaddr : 0;
841 }
842 
843 #define EEPROM_MAGIC 0x38E2F10C
844 
845 static int cxgbe_get_eeprom(struct rte_eth_dev *dev,
846 			    struct rte_dev_eeprom_info *e)
847 {
848 	struct port_info *pi = (struct port_info *)(dev->data->dev_private);
849 	struct adapter *adapter = pi->adapter;
850 	u32 i, err = 0;
851 	u8 *buf = rte_zmalloc(NULL, EEPROMSIZE, 0);
852 
853 	if (!buf)
854 		return -ENOMEM;
855 
856 	e->magic = EEPROM_MAGIC;
857 	for (i = e->offset & ~3; !err && i < e->offset + e->length; i += 4)
858 		err = eeprom_rd_phys(adapter, i, (u32 *)&buf[i]);
859 
860 	if (!err)
861 		rte_memcpy(e->data, buf + e->offset, e->length);
862 	rte_free(buf);
863 	return err;
864 }
865 
866 static int cxgbe_set_eeprom(struct rte_eth_dev *dev,
867 			    struct rte_dev_eeprom_info *eeprom)
868 {
869 	struct port_info *pi = (struct port_info *)(dev->data->dev_private);
870 	struct adapter *adapter = pi->adapter;
871 	u8 *buf;
872 	int err = 0;
873 	u32 aligned_offset, aligned_len, *p;
874 
875 	if (eeprom->magic != EEPROM_MAGIC)
876 		return -EINVAL;
877 
878 	aligned_offset = eeprom->offset & ~3;
879 	aligned_len = (eeprom->length + (eeprom->offset & 3) + 3) & ~3;
880 
881 	if (adapter->pf > 0) {
882 		u32 start = 1024 + adapter->pf * EEPROMPFSIZE;
883 
884 		if (aligned_offset < start ||
885 		    aligned_offset + aligned_len > start + EEPROMPFSIZE)
886 			return -EPERM;
887 	}
888 
889 	if (aligned_offset != eeprom->offset || aligned_len != eeprom->length) {
890 		/* RMW possibly needed for first or last words.
891 		 */
892 		buf = rte_zmalloc(NULL, aligned_len, 0);
893 		if (!buf)
894 			return -ENOMEM;
895 		err = eeprom_rd_phys(adapter, aligned_offset, (u32 *)buf);
896 		if (!err && aligned_len > 4)
897 			err = eeprom_rd_phys(adapter,
898 					     aligned_offset + aligned_len - 4,
899 					     (u32 *)&buf[aligned_len - 4]);
900 		if (err)
901 			goto out;
902 		rte_memcpy(buf + (eeprom->offset & 3), eeprom->data,
903 			   eeprom->length);
904 	} else {
905 		buf = eeprom->data;
906 	}
907 
908 	err = t4_seeprom_wp(adapter, false);
909 	if (err)
910 		goto out;
911 
912 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
913 		err = eeprom_wr_phys(adapter, aligned_offset, *p);
914 		aligned_offset += 4;
915 	}
916 
917 	if (!err)
918 		err = t4_seeprom_wp(adapter, true);
919 out:
920 	if (buf != eeprom->data)
921 		rte_free(buf);
922 	return err;
923 }
924 
925 static int cxgbe_get_regs_len(struct rte_eth_dev *eth_dev)
926 {
927 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
928 	struct adapter *adapter = pi->adapter;
929 
930 	return t4_get_regs_len(adapter) / sizeof(uint32_t);
931 }
932 
933 static int cxgbe_get_regs(struct rte_eth_dev *eth_dev,
934 			  struct rte_dev_reg_info *regs)
935 {
936 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
937 	struct adapter *adapter = pi->adapter;
938 
939 	regs->version = CHELSIO_CHIP_VERSION(adapter->params.chip) |
940 		(CHELSIO_CHIP_RELEASE(adapter->params.chip) << 10) |
941 		(1 << 16);
942 
943 	if (regs->data == NULL) {
944 		regs->length = cxgbe_get_regs_len(eth_dev);
945 		regs->width = sizeof(uint32_t);
946 
947 		return 0;
948 	}
949 
950 	t4_get_regs(adapter, regs->data, (regs->length * sizeof(uint32_t)));
951 
952 	return 0;
953 }
954 
955 static const struct eth_dev_ops cxgbe_eth_dev_ops = {
956 	.dev_start		= cxgbe_dev_start,
957 	.dev_stop		= cxgbe_dev_stop,
958 	.dev_close		= cxgbe_dev_close,
959 	.promiscuous_enable	= cxgbe_dev_promiscuous_enable,
960 	.promiscuous_disable	= cxgbe_dev_promiscuous_disable,
961 	.allmulticast_enable	= cxgbe_dev_allmulticast_enable,
962 	.allmulticast_disable	= cxgbe_dev_allmulticast_disable,
963 	.dev_configure		= cxgbe_dev_configure,
964 	.dev_infos_get		= cxgbe_dev_info_get,
965 	.dev_supported_ptypes_get = cxgbe_dev_supported_ptypes_get,
966 	.link_update		= cxgbe_dev_link_update,
967 	.mtu_set		= cxgbe_dev_mtu_set,
968 	.tx_queue_setup         = cxgbe_dev_tx_queue_setup,
969 	.tx_queue_start		= cxgbe_dev_tx_queue_start,
970 	.tx_queue_stop		= cxgbe_dev_tx_queue_stop,
971 	.tx_queue_release	= cxgbe_dev_tx_queue_release,
972 	.rx_queue_setup         = cxgbe_dev_rx_queue_setup,
973 	.rx_queue_start		= cxgbe_dev_rx_queue_start,
974 	.rx_queue_stop		= cxgbe_dev_rx_queue_stop,
975 	.rx_queue_release	= cxgbe_dev_rx_queue_release,
976 	.stats_get		= cxgbe_dev_stats_get,
977 	.stats_reset		= cxgbe_dev_stats_reset,
978 	.flow_ctrl_get		= cxgbe_flow_ctrl_get,
979 	.flow_ctrl_set		= cxgbe_flow_ctrl_set,
980 	.get_eeprom_length	= cxgbe_get_eeprom_length,
981 	.get_eeprom		= cxgbe_get_eeprom,
982 	.set_eeprom		= cxgbe_set_eeprom,
983 	.get_reg		= cxgbe_get_regs,
984 };
985 
986 /*
987  * Initialize driver
988  * It returns 0 on success.
989  */
990 static int eth_cxgbe_dev_init(struct rte_eth_dev *eth_dev)
991 {
992 	struct rte_pci_device *pci_dev;
993 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
994 	struct adapter *adapter = NULL;
995 	char name[RTE_ETH_NAME_MAX_LEN];
996 	int err = 0;
997 
998 	CXGBE_FUNC_TRACE();
999 
1000 	eth_dev->dev_ops = &cxgbe_eth_dev_ops;
1001 	eth_dev->rx_pkt_burst = &cxgbe_recv_pkts;
1002 	eth_dev->tx_pkt_burst = &cxgbe_xmit_pkts;
1003 
1004 	/* for secondary processes, we don't initialise any further as primary
1005 	 * has already done this work.
1006 	 */
1007 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1008 		return 0;
1009 
1010 	pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
1011 
1012 	snprintf(name, sizeof(name), "cxgbeadapter%d", eth_dev->data->port_id);
1013 	adapter = rte_zmalloc(name, sizeof(*adapter), 0);
1014 	if (!adapter)
1015 		return -1;
1016 
1017 	adapter->use_unpacked_mode = 1;
1018 	adapter->regs = (void *)pci_dev->mem_resource[0].addr;
1019 	if (!adapter->regs) {
1020 		dev_err(adapter, "%s: cannot map device registers\n", __func__);
1021 		err = -ENOMEM;
1022 		goto out_free_adapter;
1023 	}
1024 	adapter->pdev = pci_dev;
1025 	adapter->eth_dev = eth_dev;
1026 	pi->adapter = adapter;
1027 
1028 	err = cxgbe_probe(adapter);
1029 	if (err) {
1030 		dev_err(adapter, "%s: cxgbe probe failed with err %d\n",
1031 			__func__, err);
1032 		goto out_free_adapter;
1033 	}
1034 
1035 	return 0;
1036 
1037 out_free_adapter:
1038 	rte_free(adapter);
1039 	return err;
1040 }
1041 
1042 static struct eth_driver rte_cxgbe_pmd = {
1043 	.pci_drv = {
1044 		.id_table = cxgb4_pci_tbl,
1045 		.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
1046 		.probe = rte_eth_dev_pci_probe,
1047 		.remove = rte_eth_dev_pci_remove,
1048 	},
1049 	.eth_dev_init = eth_cxgbe_dev_init,
1050 	.dev_private_size = sizeof(struct port_info),
1051 };
1052 
1053 RTE_PMD_REGISTER_PCI(net_cxgbe, rte_cxgbe_pmd.pci_drv);
1054 RTE_PMD_REGISTER_PCI_TABLE(net_cxgbe, cxgb4_pci_tbl);
1055 RTE_PMD_REGISTER_KMOD_DEP(net_cxgbe, "* igb_uio | uio_pci_generic | vfio");
1056