xref: /dpdk/drivers/net/enic/enic_main.c (revision c103585df76017fedd5b0ea2f4769fb9ee42f31f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5 
6 #include <stdio.h>
7 
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 
12 #include <rte_pci.h>
13 #include <bus_pci_driver.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_string_fns.h>
18 #include <ethdev_driver.h>
19 #include <rte_geneve.h>
20 
21 #include "enic_compat.h"
22 #include "enic.h"
23 #include "enic_sriov.h"
24 #include "wq_enet_desc.h"
25 #include "rq_enet_desc.h"
26 #include "cq_enet_desc.h"
27 #include "vnic_enet.h"
28 #include "vnic_dev.h"
29 #include "vnic_wq.h"
30 #include "vnic_rq.h"
31 #include "vnic_cq.h"
32 #include "vnic_intr.h"
33 #include "vnic_nic.h"
34 
35 void
36 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
37 {
38 	uint16_t i;
39 
40 	if (!rq || !rq->mbuf_ring) {
41 		dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
42 		return;
43 	}
44 
45 	for (i = 0; i < rq->ring.desc_count; i++) {
46 		if (rq->mbuf_ring[i]) {
47 			rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
48 			rq->mbuf_ring[i] = NULL;
49 		}
50 	}
51 }
52 
53 void enic_free_wq_buf(struct rte_mbuf **buf)
54 {
55 	struct rte_mbuf *mbuf = *buf;
56 
57 	rte_pktmbuf_free_seg(mbuf);
58 	*buf = NULL;
59 }
60 
61 static void enic_log_q_error(struct enic *enic)
62 {
63 	unsigned int i;
64 	uint32_t error_status;
65 
66 	for (i = 0; i < enic->wq_count; i++) {
67 		error_status = vnic_wq_error_status(&enic->wq[i]);
68 		if (error_status)
69 			dev_err(enic, "WQ[%d] error_status %d\n", i,
70 				error_status);
71 	}
72 
73 	for (i = 0; i < enic_vnic_rq_count(enic); i++) {
74 		if (!enic->rq[i].in_use)
75 			continue;
76 		error_status = vnic_rq_error_status(&enic->rq[i]);
77 		if (error_status)
78 			dev_err(enic, "RQ[%d] error_status %d\n", i,
79 				error_status);
80 	}
81 }
82 
83 static void enic_clear_soft_stats(struct enic *enic)
84 {
85 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
86 	rte_atomic64_clear(&soft_stats->rx_nombuf);
87 	rte_atomic64_clear(&soft_stats->rx_packet_errors);
88 	rte_atomic64_clear(&soft_stats->tx_oversized);
89 }
90 
91 static void enic_init_soft_stats(struct enic *enic)
92 {
93 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
94 	rte_atomic64_init(&soft_stats->rx_nombuf);
95 	rte_atomic64_init(&soft_stats->rx_packet_errors);
96 	rte_atomic64_init(&soft_stats->tx_oversized);
97 	enic_clear_soft_stats(enic);
98 }
99 
100 int enic_dev_stats_clear(struct enic *enic)
101 {
102 	int ret;
103 
104 	ret = vnic_dev_stats_clear(enic->vdev);
105 	if (ret != 0) {
106 		dev_err(enic, "Error in clearing stats\n");
107 		return ret;
108 	}
109 	enic_clear_soft_stats(enic);
110 
111 	return 0;
112 }
113 
114 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
115 {
116 	struct vnic_stats *stats;
117 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
118 	int64_t rx_truncated;
119 	uint64_t rx_packet_errors;
120 	int ret = vnic_dev_stats_dump(enic->vdev, &stats);
121 
122 	if (ret) {
123 		dev_err(enic, "Error in getting stats\n");
124 		return ret;
125 	}
126 
127 	/* The number of truncated packets can only be calculated by
128 	 * subtracting a hardware counter from error packets received by
129 	 * the driver. Note: this causes transient inaccuracies in the
130 	 * ipackets count. Also, the length of truncated packets are
131 	 * counted in ibytes even though truncated packets are dropped
132 	 * which can make ibytes be slightly higher than it should be.
133 	 */
134 	rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
135 	rx_truncated = rx_packet_errors - stats->rx.rx_errors;
136 
137 	r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
138 	r_stats->opackets = stats->tx.tx_frames_ok;
139 
140 	r_stats->ibytes = stats->rx.rx_bytes_ok;
141 	r_stats->obytes = stats->tx.tx_bytes_ok;
142 
143 	r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
144 	r_stats->oerrors = stats->tx.tx_errors
145 			   + rte_atomic64_read(&soft_stats->tx_oversized);
146 
147 	r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
148 
149 	r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
150 	return 0;
151 }
152 
153 int enic_del_mac_address(struct enic *enic, int mac_index)
154 {
155 	struct rte_eth_dev *eth_dev = enic->rte_dev;
156 	uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
157 
158 	return enic_dev_del_addr(enic, mac_addr);
159 }
160 
161 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
162 {
163 	int err;
164 
165 	err = enic_dev_add_addr(enic, mac_addr);
166 	if (err)
167 		dev_err(enic, "add mac addr failed\n");
168 	return err;
169 }
170 
171 void enic_free_rq_buf(struct rte_mbuf **mbuf)
172 {
173 	if (*mbuf == NULL)
174 		return;
175 
176 	rte_pktmbuf_free(*mbuf);
177 	*mbuf = NULL;
178 }
179 
180 void enic_init_vnic_resources(struct enic *enic)
181 {
182 	unsigned int error_interrupt_enable = 1;
183 	unsigned int error_interrupt_offset = 0;
184 	unsigned int rxq_interrupt_enable = 0;
185 	unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
186 	unsigned int index = 0;
187 	unsigned int cq_idx;
188 	struct vnic_rq *data_rq;
189 
190 	if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
191 		rxq_interrupt_enable = 1;
192 
193 	for (index = 0; index < enic->rq_count; index++) {
194 		cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
195 
196 		vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
197 			cq_idx,
198 			error_interrupt_enable,
199 			error_interrupt_offset);
200 
201 		data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)];
202 		if (data_rq->in_use)
203 			vnic_rq_init(data_rq,
204 				     cq_idx,
205 				     error_interrupt_enable,
206 				     error_interrupt_offset);
207 		vnic_cq_init(&enic->cq[cq_idx],
208 			0 /* flow_control_enable */,
209 			1 /* color_enable */,
210 			0 /* cq_head */,
211 			0 /* cq_tail */,
212 			1 /* cq_tail_color */,
213 			rxq_interrupt_enable,
214 			1 /* cq_entry_enable */,
215 			0 /* cq_message_enable */,
216 			rxq_interrupt_offset,
217 			0 /* cq_message_addr */);
218 		if (rxq_interrupt_enable)
219 			rxq_interrupt_offset++;
220 	}
221 
222 	for (index = 0; index < enic->wq_count; index++) {
223 		vnic_wq_init(&enic->wq[index],
224 			enic_cq_wq(enic, index),
225 			error_interrupt_enable,
226 			error_interrupt_offset);
227 		/* Compute unsupported ol flags for enic_prep_pkts() */
228 		enic->wq[index].tx_offload_notsup_mask =
229 			RTE_MBUF_F_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
230 
231 		cq_idx = enic_cq_wq(enic, index);
232 		vnic_cq_init(&enic->cq[cq_idx],
233 			0 /* flow_control_enable */,
234 			1 /* color_enable */,
235 			0 /* cq_head */,
236 			0 /* cq_tail */,
237 			1 /* cq_tail_color */,
238 			0 /* interrupt_enable */,
239 			0 /* cq_entry_enable */,
240 			1 /* cq_message_enable */,
241 			0 /* interrupt offset */,
242 			(uint64_t)enic->wq[index].cqmsg_rz->iova);
243 	}
244 
245 	for (index = 0; index < enic->intr_count; index++) {
246 		vnic_intr_init(&enic->intr[index],
247 			       enic->config.intr_timer_usec,
248 			       enic->config.intr_timer_type,
249 			       /*mask_on_assertion*/1);
250 	}
251 }
252 
253 
254 int
255 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
256 {
257 	struct rte_mbuf *mb;
258 	struct rq_enet_desc *rqd = rq->ring.descs;
259 	unsigned i;
260 	dma_addr_t dma_addr;
261 	uint32_t max_rx_pktlen;
262 	uint16_t rq_buf_len;
263 
264 	if (!rq->in_use)
265 		return 0;
266 
267 	dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
268 		  rq->ring.desc_count);
269 
270 	/*
271 	 * If *not* using scatter and the mbuf size is greater than the
272 	 * requested max packet size (mtu + eth overhead), then reduce the
273 	 * posted buffer size to max packet size. HW still receives packets
274 	 * larger than max packet size, but they will be truncated, which we
275 	 * drop in the rx handler. Not ideal, but better than returning
276 	 * large packets when the user is not expecting them.
277 	 */
278 	max_rx_pktlen = enic_mtu_to_max_rx_pktlen(enic->rte_dev->data->mtu);
279 	rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
280 	if (max_rx_pktlen < rq_buf_len && !rq->data_queue_enable)
281 		rq_buf_len = max_rx_pktlen;
282 	for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
283 		mb = rte_mbuf_raw_alloc(rq->mp);
284 		if (mb == NULL) {
285 			dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
286 			(unsigned)rq->index);
287 			return -ENOMEM;
288 		}
289 
290 		mb->data_off = RTE_PKTMBUF_HEADROOM;
291 		dma_addr = (dma_addr_t)(mb->buf_iova
292 			   + RTE_PKTMBUF_HEADROOM);
293 		rq_enet_desc_enc(rqd, dma_addr,
294 				(rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
295 				: RQ_ENET_TYPE_NOT_SOP),
296 				rq_buf_len);
297 		rq->mbuf_ring[i] = mb;
298 	}
299 	/*
300 	 * Do not post the buffers to the NIC until we enable the RQ via
301 	 * enic_start_rq().
302 	 */
303 	rq->need_initial_post = true;
304 	/* Initialize fetch index while RQ is disabled */
305 	iowrite32(0, &rq->ctrl->fetch_index);
306 	return 0;
307 }
308 
309 /*
310  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
311  * allocated the buffers and filled the RQ descriptor ring. Just need to push
312  * the post index to the NIC.
313  */
314 static void
315 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
316 {
317 	if (!rq->in_use || !rq->need_initial_post)
318 		return;
319 
320 	/* make sure all prior writes are complete before doing the PIO write */
321 	rte_rmb();
322 
323 	/* Post all but the last buffer to VIC. */
324 	rq->posted_index = rq->ring.desc_count - 1;
325 
326 	rq->rx_nb_hold = 0;
327 
328 	dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
329 		enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
330 	iowrite32(rq->posted_index, &rq->ctrl->posted_index);
331 	rte_rmb();
332 	rq->need_initial_post = false;
333 }
334 
335 void *
336 enic_alloc_consistent(void *priv, size_t size,
337 	dma_addr_t *dma_handle, uint8_t *name)
338 {
339 	void *vaddr;
340 	const struct rte_memzone *rz;
341 	*dma_handle = 0;
342 	struct enic *enic = (struct enic *)priv;
343 	struct enic_memzone_entry *mze;
344 
345 	rz = rte_memzone_reserve_aligned((const char *)name, size,
346 			SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
347 	if (!rz) {
348 		pr_err("%s : Failed to allocate memory requested for %s\n",
349 			__func__, name);
350 		return NULL;
351 	}
352 
353 	vaddr = rz->addr;
354 	*dma_handle = (dma_addr_t)rz->iova;
355 
356 	mze = rte_malloc("enic memzone entry",
357 			 sizeof(struct enic_memzone_entry), 0);
358 
359 	if (!mze) {
360 		pr_err("%s : Failed to allocate memory for memzone list\n",
361 		       __func__);
362 		rte_memzone_free(rz);
363 		return NULL;
364 	}
365 
366 	mze->rz = rz;
367 
368 	rte_spinlock_lock(&enic->memzone_list_lock);
369 	LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
370 	rte_spinlock_unlock(&enic->memzone_list_lock);
371 
372 	return vaddr;
373 }
374 
375 void
376 enic_free_consistent(void *priv,
377 		     __rte_unused size_t size,
378 		     void *vaddr,
379 		     dma_addr_t dma_handle)
380 {
381 	struct enic_memzone_entry *mze;
382 	struct enic *enic = (struct enic *)priv;
383 
384 	rte_spinlock_lock(&enic->memzone_list_lock);
385 	LIST_FOREACH(mze, &enic->memzone_list, entries) {
386 		if (mze->rz->addr == vaddr &&
387 		    mze->rz->iova == dma_handle)
388 			break;
389 	}
390 	if (mze == NULL) {
391 		rte_spinlock_unlock(&enic->memzone_list_lock);
392 		dev_warning(enic,
393 			    "Tried to free memory, but couldn't find it in the memzone list\n");
394 		return;
395 	}
396 	LIST_REMOVE(mze, entries);
397 	rte_spinlock_unlock(&enic->memzone_list_lock);
398 	rte_memzone_free(mze->rz);
399 	rte_free(mze);
400 }
401 
402 int enic_link_update(struct rte_eth_dev *eth_dev)
403 {
404 	struct enic *enic = pmd_priv(eth_dev);
405 	struct rte_eth_link link;
406 
407 	memset(&link, 0, sizeof(link));
408 	link.link_status = enic_get_link_status(enic);
409 	link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
410 	link.link_speed = vnic_dev_port_speed(enic->vdev);
411 
412 	return rte_eth_linkstatus_set(eth_dev, &link);
413 }
414 
415 static void
416 enic_intr_handler(void *arg)
417 {
418 	struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
419 	struct enic *enic = pmd_priv(dev);
420 
421 	ENICPMD_FUNC_TRACE();
422 
423 	vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
424 
425 	if (enic_is_vf(enic)) {
426 		/*
427 		 * When using the admin channel, VF receives link
428 		 * status changes from PF. enic_poll_vf_admin_chan()
429 		 * calls RTE_ETH_EVENT_INTR_LSC.
430 		 */
431 		enic_poll_vf_admin_chan(enic);
432 		return;
433 	}
434 
435 	enic_link_update(dev);
436 	rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
437 	enic_log_q_error(enic);
438 	/* Re-enable irq in case of INTx */
439 	rte_intr_ack(enic->pdev->intr_handle);
440 }
441 
442 static int enic_rxq_intr_init(struct enic *enic)
443 {
444 	struct rte_intr_handle *intr_handle;
445 	uint32_t rxq_intr_count, i;
446 	int err;
447 
448 	intr_handle = enic->rte_dev->intr_handle;
449 	if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
450 		return 0;
451 	/*
452 	 * Rx queue interrupts only work when we have MSI-X interrupts,
453 	 * one per queue. Sharing one interrupt is technically
454 	 * possible with VIC, but it is not worth the complications it brings.
455 	 */
456 	if (!rte_intr_cap_multiple(intr_handle)) {
457 		dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
458 			" (vfio-pci driver)\n");
459 		return -ENOTSUP;
460 	}
461 	rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
462 	err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
463 	if (err) {
464 		dev_err(enic, "Failed to enable event fds for Rx queue"
465 			" interrupts\n");
466 		return err;
467 	}
468 
469 	if (rte_intr_vec_list_alloc(intr_handle, "enic_intr_vec",
470 					   rxq_intr_count)) {
471 		dev_err(enic, "Failed to allocate intr_vec\n");
472 		return -ENOMEM;
473 	}
474 	for (i = 0; i < rxq_intr_count; i++)
475 		if (rte_intr_vec_list_index_set(intr_handle, i,
476 						   i + ENICPMD_RXQ_INTR_OFFSET))
477 			return -rte_errno;
478 	return 0;
479 }
480 
481 static void enic_rxq_intr_deinit(struct enic *enic)
482 {
483 	struct rte_intr_handle *intr_handle;
484 
485 	intr_handle = enic->rte_dev->intr_handle;
486 	rte_intr_efd_disable(intr_handle);
487 
488 	rte_intr_vec_list_free(intr_handle);
489 }
490 
491 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
492 {
493 	struct wq_enet_desc *desc;
494 	struct vnic_wq *wq;
495 	unsigned int i;
496 
497 	/*
498 	 * Fill WQ descriptor fields that never change. Every descriptor is
499 	 * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
500 	 * descriptors (i.e. request one completion update every 32 packets).
501 	 */
502 	wq = &enic->wq[queue_idx];
503 	desc = (struct wq_enet_desc *)wq->ring.descs;
504 	for (i = 0; i < wq->ring.desc_count; i++, desc++) {
505 		desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
506 		if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
507 			desc->header_length_flags |=
508 				(1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
509 	}
510 }
511 
512 /*
513  * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
514  * used when that file is not compiled.
515  */
516 __rte_weak bool
517 enic_use_vector_rx_handler(__rte_unused struct rte_eth_dev *eth_dev)
518 {
519 	return false;
520 }
521 
522 void enic_pick_rx_handler(struct rte_eth_dev *eth_dev)
523 {
524 	struct enic *enic = pmd_priv(eth_dev);
525 
526 	if (enic->cq64) {
527 		ENICPMD_LOG(DEBUG, " use the normal Rx handler for 64B CQ entry");
528 		eth_dev->rx_pkt_burst = &enic_recv_pkts_64;
529 		return;
530 	}
531 	/*
532 	 * Preference order:
533 	 * 1. The vectorized handler if possible and requested.
534 	 * 2. The non-scatter, simplified handler if scatter Rx is not used.
535 	 * 3. The default handler as a fallback.
536 	 */
537 	if (enic_use_vector_rx_handler(eth_dev))
538 		return;
539 	if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
540 		ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler");
541 		eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
542 	} else {
543 		ENICPMD_LOG(DEBUG, " use the normal Rx handler");
544 		eth_dev->rx_pkt_burst = &enic_recv_pkts;
545 	}
546 }
547 
548 /* Secondary process uses this to set the Tx handler */
549 void enic_pick_tx_handler(struct rte_eth_dev *eth_dev)
550 {
551 	struct enic *enic = pmd_priv(eth_dev);
552 
553 	if (enic->use_simple_tx_handler) {
554 		ENICPMD_LOG(DEBUG, " use the simple tx handler");
555 		eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
556 	} else {
557 		ENICPMD_LOG(DEBUG, " use the default tx handler");
558 		eth_dev->tx_pkt_burst = &enic_xmit_pkts;
559 	}
560 }
561 
562 int enic_enable(struct enic *enic)
563 {
564 	unsigned int index;
565 	int err;
566 	struct rte_eth_dev *eth_dev = enic->rte_dev;
567 	uint64_t simple_tx_offloads;
568 	uintptr_t p;
569 
570 	if (enic->enable_avx2_rx) {
571 		struct rte_mbuf mb_def = { .buf_addr = 0 };
572 
573 		/*
574 		 * mbuf_initializer contains const-after-init fields of
575 		 * receive mbufs (i.e. 64 bits of fields from rearm_data).
576 		 * It is currently used by the vectorized handler.
577 		 */
578 		mb_def.nb_segs = 1;
579 		mb_def.data_off = RTE_PKTMBUF_HEADROOM;
580 		mb_def.port = enic->port_id;
581 		rte_mbuf_refcnt_set(&mb_def, 1);
582 		rte_compiler_barrier();
583 		p = (uintptr_t)&mb_def.rearm_data;
584 		enic->mbuf_initializer = *(uint64_t *)p;
585 	}
586 
587 	eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
588 	eth_dev->data->dev_link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
589 
590 	/* vnic notification of link status has already been turned on in
591 	 * enic_dev_init() which is called during probe time.  Here we are
592 	 * just turning on interrupt vector 0 if needed.
593 	 */
594 	if (eth_dev->data->dev_conf.intr_conf.lsc)
595 		vnic_dev_notify_set(enic->vdev, 0);
596 
597 	err = enic_rxq_intr_init(enic);
598 	if (err)
599 		return err;
600 
601 	/* Initialize flowman if not already initialized during probe */
602 	if (enic->fm == NULL && enic_fm_init(enic))
603 		dev_warning(enic, "Init of flowman failed.\n");
604 
605 	for (index = 0; index < enic->rq_count; index++) {
606 		err = enic_alloc_rx_queue_mbufs(enic,
607 			&enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
608 		if (err) {
609 			dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
610 			return err;
611 		}
612 		err = enic_alloc_rx_queue_mbufs(enic,
613 			&enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)]);
614 		if (err) {
615 			/* release the allocated mbufs for the sop rq*/
616 			enic_rxmbuf_queue_release(enic,
617 				&enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
618 
619 			dev_err(enic, "Failed to alloc data RX queue mbufs\n");
620 			return err;
621 		}
622 	}
623 
624 	/*
625 	 * Use the simple TX handler if possible. Only checksum offloads
626 	 * and vlan insertion are supported.
627 	 */
628 	simple_tx_offloads = enic->tx_offload_capa &
629 		(RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM |
630 		 RTE_ETH_TX_OFFLOAD_VLAN_INSERT |
631 		 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
632 		 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
633 		 RTE_ETH_TX_OFFLOAD_TCP_CKSUM);
634 	if ((eth_dev->data->dev_conf.txmode.offloads &
635 	     ~simple_tx_offloads) == 0) {
636 		ENICPMD_LOG(DEBUG, " use the simple tx handler");
637 		eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
638 		for (index = 0; index < enic->wq_count; index++)
639 			enic_prep_wq_for_simple_tx(enic, index);
640 		enic->use_simple_tx_handler = 1;
641 	} else {
642 		ENICPMD_LOG(DEBUG, " use the default tx handler");
643 		eth_dev->tx_pkt_burst = &enic_xmit_pkts;
644 	}
645 
646 	enic_pick_rx_handler(eth_dev);
647 
648 	for (index = 0; index < enic->wq_count; index++)
649 		enic_start_wq(enic, index);
650 	for (index = 0; index < enic->rq_count; index++)
651 		enic_start_rq(enic, index);
652 
653 	enic_dev_add_addr(enic, enic->mac_addr);
654 
655 	vnic_dev_enable_wait(enic->vdev);
656 
657 	/* Register and enable error interrupt */
658 	rte_intr_callback_register(enic->pdev->intr_handle,
659 		enic_intr_handler, (void *)enic->rte_dev);
660 	rte_intr_enable(enic->pdev->intr_handle);
661 	/* Unmask LSC interrupt */
662 	vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
663 
664 	return 0;
665 }
666 
667 int enic_alloc_intr_resources(struct enic *enic)
668 {
669 	int err;
670 	unsigned int i;
671 
672 	dev_info(enic, "vNIC resources used:  "\
673 		"wq %d rq %d cq %d intr %d\n",
674 		enic->wq_count, enic_vnic_rq_count(enic),
675 		enic->cq_count, enic->intr_count);
676 
677 	if (enic_is_vf(enic)) {
678 		dev_info(enic, "vNIC admin channel resources used: wq %d rq %d cq %d\n",
679 			 enic->conf_admin_wq_count, enic->conf_admin_rq_count,
680 			 enic->conf_admin_cq_count);
681 	}
682 
683 	for (i = 0; i < enic->intr_count; i++) {
684 		err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
685 		if (err) {
686 			enic_free_vnic_resources(enic);
687 			return err;
688 		}
689 	}
690 
691 	return 0;
692 }
693 
694 void enic_free_rq(void *rxq)
695 {
696 	struct vnic_rq *rq_sop, *rq_data;
697 	struct enic *enic;
698 
699 	if (rxq == NULL)
700 		return;
701 
702 	rq_sop = (struct vnic_rq *)rxq;
703 	enic = vnic_dev_priv(rq_sop->vdev);
704 	rq_data = &enic->rq[rq_sop->data_queue_idx];
705 
706 	if (rq_sop->free_mbufs) {
707 		struct rte_mbuf **mb;
708 		int i;
709 
710 		mb = rq_sop->free_mbufs;
711 		for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
712 		     i < ENIC_RX_BURST_MAX; i++)
713 			rte_pktmbuf_free(mb[i]);
714 		rte_free(rq_sop->free_mbufs);
715 		rq_sop->free_mbufs = NULL;
716 		rq_sop->num_free_mbufs = 0;
717 	}
718 
719 	enic_rxmbuf_queue_release(enic, rq_sop);
720 	if (rq_data->in_use)
721 		enic_rxmbuf_queue_release(enic, rq_data);
722 
723 	rte_free(rq_sop->mbuf_ring);
724 	if (rq_data->in_use)
725 		rte_free(rq_data->mbuf_ring);
726 
727 	rq_sop->mbuf_ring = NULL;
728 	rq_data->mbuf_ring = NULL;
729 
730 	vnic_rq_free(rq_sop);
731 	if (rq_data->in_use)
732 		vnic_rq_free(rq_data);
733 
734 	vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
735 
736 	rq_sop->in_use = 0;
737 	rq_data->in_use = 0;
738 }
739 
740 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
741 {
742 	struct rte_eth_dev_data *data = enic->dev_data;
743 	vnic_wq_enable(&enic->wq[queue_idx]);
744 	data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
745 }
746 
747 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
748 {
749 	struct rte_eth_dev_data *data = enic->dev_data;
750 	int ret;
751 
752 	ret = vnic_wq_disable(&enic->wq[queue_idx]);
753 	if (ret)
754 		return ret;
755 
756 	data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
757 	return 0;
758 }
759 
760 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
761 {
762 	struct rte_eth_dev_data *data = enic->dev_data;
763 	struct vnic_rq *rq_sop;
764 	struct vnic_rq *rq_data;
765 	rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
766 	rq_data = &enic->rq[rq_sop->data_queue_idx];
767 
768 	if (rq_data->in_use) {
769 		vnic_rq_enable(rq_data);
770 		enic_initial_post_rx(enic, rq_data);
771 	}
772 	rte_mb();
773 	vnic_rq_enable(rq_sop);
774 	enic_initial_post_rx(enic, rq_sop);
775 	data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
776 }
777 
778 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
779 {
780 	struct rte_eth_dev_data *data = enic->dev_data;
781 	int ret1 = 0, ret2 = 0;
782 	struct vnic_rq *rq_sop;
783 	struct vnic_rq *rq_data;
784 	rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
785 	rq_data = &enic->rq[rq_sop->data_queue_idx];
786 
787 	ret2 = vnic_rq_disable(rq_sop);
788 	rte_mb();
789 	if (rq_data->in_use)
790 		ret1 = vnic_rq_disable(rq_data);
791 
792 	if (ret2)
793 		return ret2;
794 	else if (ret1)
795 		return ret1;
796 
797 	data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
798 	return 0;
799 }
800 
801 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
802 	unsigned int socket_id, struct rte_mempool *mp,
803 	uint16_t nb_desc, uint16_t free_thresh)
804 {
805 	struct enic_vf_representor *vf;
806 	int rc;
807 	uint16_t sop_queue_idx;
808 	uint16_t data_queue_idx;
809 	uint16_t cq_idx;
810 	struct vnic_rq *rq_sop;
811 	struct vnic_rq *rq_data;
812 	unsigned int mbuf_size, mbufs_per_pkt;
813 	unsigned int nb_sop_desc, nb_data_desc;
814 	uint16_t min_sop, max_sop, min_data, max_data;
815 	uint32_t max_rx_pktlen;
816 
817 	/*
818 	 * Representor uses a reserved PF queue. Translate representor
819 	 * queue number to PF queue number.
820 	 */
821 	if (rte_eth_dev_is_repr(enic->rte_dev)) {
822 		RTE_ASSERT(queue_idx == 0);
823 		vf = VF_ENIC_TO_VF_REP(enic);
824 		sop_queue_idx = vf->pf_rq_sop_idx;
825 		data_queue_idx = vf->pf_rq_data_idx;
826 		enic = vf->pf;
827 		queue_idx = sop_queue_idx;
828 	} else {
829 		sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
830 		data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx, enic);
831 	}
832 	cq_idx = enic_cq_rq(enic, sop_queue_idx);
833 	rq_sop = &enic->rq[sop_queue_idx];
834 	rq_data = &enic->rq[data_queue_idx];
835 	rq_sop->is_sop = 1;
836 	rq_sop->data_queue_idx = data_queue_idx;
837 	rq_data->is_sop = 0;
838 	rq_data->data_queue_idx = 0;
839 	rq_sop->socket_id = socket_id;
840 	rq_sop->mp = mp;
841 	rq_data->socket_id = socket_id;
842 	rq_data->mp = mp;
843 	rq_sop->in_use = 1;
844 	rq_sop->rx_free_thresh = free_thresh;
845 	rq_data->rx_free_thresh = free_thresh;
846 	dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
847 		  free_thresh);
848 
849 	mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
850 			       RTE_PKTMBUF_HEADROOM);
851 	/* max_rx_pktlen includes the ethernet header and CRC. */
852 	max_rx_pktlen = enic_mtu_to_max_rx_pktlen(enic->rte_dev->data->mtu);
853 
854 	if (enic->rte_dev->data->dev_conf.rxmode.offloads &
855 	    RTE_ETH_RX_OFFLOAD_SCATTER) {
856 		dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
857 		/* ceil((max pkt len)/mbuf_size) */
858 		mbufs_per_pkt = (max_rx_pktlen + mbuf_size - 1) / mbuf_size;
859 	} else {
860 		dev_info(enic, "Scatter rx mode disabled\n");
861 		mbufs_per_pkt = 1;
862 		if (max_rx_pktlen > mbuf_size) {
863 			dev_warning(enic, "The maximum Rx packet size (%u) is"
864 				    " larger than the mbuf size (%u), and"
865 				    " scatter is disabled. Larger packets will"
866 				    " be truncated.\n",
867 				    max_rx_pktlen, mbuf_size);
868 		}
869 	}
870 
871 	if (mbufs_per_pkt > 1) {
872 		dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
873 		rq_sop->data_queue_enable = 1;
874 		rq_data->in_use = 1;
875 		/*
876 		 * HW does not directly support MTU. HW always
877 		 * receives packet sizes up to the "max" MTU.
878 		 * If not using scatter, we can achieve the effect of dropping
879 		 * larger packets by reducing the size of posted buffers.
880 		 * See enic_alloc_rx_queue_mbufs().
881 		 */
882 		if (enic->rte_dev->data->mtu < enic->max_mtu) {
883 			dev_warning(enic,
884 				"mtu is ignored when scatter rx mode is in use.\n");
885 		}
886 	} else {
887 		dev_info(enic, "Rq %u Scatter rx mode not being used\n",
888 			 queue_idx);
889 		rq_sop->data_queue_enable = 0;
890 		rq_data->in_use = 0;
891 	}
892 
893 	/* number of descriptors have to be a multiple of 32 */
894 	nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
895 	nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
896 
897 	rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
898 	rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
899 
900 	if (mbufs_per_pkt > 1) {
901 		min_sop = ENIC_RX_BURST_MAX;
902 		max_sop = ((enic->config.rq_desc_count /
903 			    (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
904 		min_data = min_sop * (mbufs_per_pkt - 1);
905 		max_data = enic->config.rq_desc_count;
906 	} else {
907 		min_sop = ENIC_RX_BURST_MAX;
908 		max_sop = enic->config.rq_desc_count;
909 		min_data = 0;
910 		max_data = 0;
911 	}
912 
913 	if (nb_desc < (min_sop + min_data)) {
914 		dev_warning(enic,
915 			    "Number of rx descs too low, adjusting to minimum\n");
916 		nb_sop_desc = min_sop;
917 		nb_data_desc = min_data;
918 	} else if (nb_desc > (max_sop + max_data)) {
919 		dev_warning(enic,
920 			    "Number of rx_descs too high, adjusting to maximum\n");
921 		nb_sop_desc = max_sop;
922 		nb_data_desc = max_data;
923 	}
924 	if (mbufs_per_pkt > 1) {
925 		dev_info(enic, "For max packet size %u and mbuf size %u valid"
926 			 " rx descriptor range is %u to %u\n",
927 			 max_rx_pktlen, mbuf_size, min_sop + min_data,
928 			 max_sop + max_data);
929 	}
930 	dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
931 		 nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
932 
933 	/* Allocate sop queue resources */
934 	rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
935 		nb_sop_desc, sizeof(struct rq_enet_desc));
936 	if (rc) {
937 		dev_err(enic, "error in allocation of sop rq\n");
938 		goto err_exit;
939 	}
940 	nb_sop_desc = rq_sop->ring.desc_count;
941 
942 	if (rq_data->in_use) {
943 		/* Allocate data queue resources */
944 		rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
945 				   nb_data_desc,
946 				   sizeof(struct rq_enet_desc));
947 		if (rc) {
948 			dev_err(enic, "error in allocation of data rq\n");
949 			goto err_free_rq_sop;
950 		}
951 		nb_data_desc = rq_data->ring.desc_count;
952 	}
953 	/* Enable 64B CQ entry if requested */
954 	if (enic->cq64 && vnic_dev_set_cq_entry_size(enic->vdev,
955 				sop_queue_idx, VNIC_RQ_CQ_ENTRY_SIZE_64)) {
956 		dev_err(enic, "failed to enable 64B CQ entry on sop rq\n");
957 		goto err_free_rq_data;
958 	}
959 	if (rq_data->in_use && enic->cq64 &&
960 	    vnic_dev_set_cq_entry_size(enic->vdev, data_queue_idx,
961 		VNIC_RQ_CQ_ENTRY_SIZE_64)) {
962 		dev_err(enic, "failed to enable 64B CQ entry on data rq\n");
963 		goto err_free_rq_data;
964 	}
965 
966 	rc = vnic_cq_alloc(enic->vdev, &enic->cq[cq_idx], cq_idx,
967 			   socket_id, nb_sop_desc + nb_data_desc,
968 			   enic->cq64 ?	sizeof(struct cq_enet_rq_desc_64) :
969 			   sizeof(struct cq_enet_rq_desc));
970 	if (rc) {
971 		dev_err(enic, "error in allocation of cq for rq\n");
972 		goto err_free_rq_data;
973 	}
974 
975 	/* Allocate the mbuf rings */
976 	rq_sop->mbuf_ring = (struct rte_mbuf **)
977 		rte_zmalloc_socket("rq->mbuf_ring",
978 				   sizeof(struct rte_mbuf *) * nb_sop_desc,
979 				   RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
980 	if (rq_sop->mbuf_ring == NULL)
981 		goto err_free_cq;
982 
983 	if (rq_data->in_use) {
984 		rq_data->mbuf_ring = (struct rte_mbuf **)
985 			rte_zmalloc_socket("rq->mbuf_ring",
986 				sizeof(struct rte_mbuf *) * nb_data_desc,
987 				RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
988 		if (rq_data->mbuf_ring == NULL)
989 			goto err_free_sop_mbuf;
990 	}
991 
992 	rq_sop->free_mbufs = (struct rte_mbuf **)
993 		rte_zmalloc_socket("rq->free_mbufs",
994 				   sizeof(struct rte_mbuf *) *
995 				   ENIC_RX_BURST_MAX,
996 				   RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
997 	if (rq_sop->free_mbufs == NULL)
998 		goto err_free_data_mbuf;
999 	rq_sop->num_free_mbufs = 0;
1000 
1001 	rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
1002 
1003 	return 0;
1004 
1005 err_free_data_mbuf:
1006 	rte_free(rq_data->mbuf_ring);
1007 err_free_sop_mbuf:
1008 	rte_free(rq_sop->mbuf_ring);
1009 err_free_cq:
1010 	/* cleanup on error */
1011 	vnic_cq_free(&enic->cq[cq_idx]);
1012 err_free_rq_data:
1013 	if (rq_data->in_use)
1014 		vnic_rq_free(rq_data);
1015 err_free_rq_sop:
1016 	vnic_rq_free(rq_sop);
1017 err_exit:
1018 	return -ENOMEM;
1019 }
1020 
1021 void enic_free_wq(void *txq)
1022 {
1023 	struct vnic_wq *wq;
1024 	struct enic *enic;
1025 
1026 	if (txq == NULL)
1027 		return;
1028 
1029 	wq = (struct vnic_wq *)txq;
1030 	enic = vnic_dev_priv(wq->vdev);
1031 	rte_memzone_free(wq->cqmsg_rz);
1032 	vnic_wq_free(wq);
1033 	vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
1034 }
1035 
1036 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
1037 	unsigned int socket_id, uint16_t nb_desc)
1038 {
1039 	struct enic_vf_representor *vf;
1040 	int err;
1041 	struct vnic_wq *wq;
1042 	unsigned int cq_index;
1043 	char name[RTE_MEMZONE_NAMESIZE];
1044 	static int instance;
1045 
1046 	/*
1047 	 * Representor uses a reserved PF queue. Translate representor
1048 	 * queue number to PF queue number.
1049 	 */
1050 	if (rte_eth_dev_is_repr(enic->rte_dev)) {
1051 		RTE_ASSERT(queue_idx == 0);
1052 		vf = VF_ENIC_TO_VF_REP(enic);
1053 		queue_idx = vf->pf_wq_idx;
1054 		cq_index = vf->pf_wq_cq_idx;
1055 		enic = vf->pf;
1056 	} else {
1057 		cq_index = enic_cq_wq(enic, queue_idx);
1058 	}
1059 	wq = &enic->wq[queue_idx];
1060 	wq->socket_id = socket_id;
1061 	/*
1062 	 * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
1063 	 * print an info message for diagnostics.
1064 	 */
1065 	dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
1066 
1067 	/* Allocate queue resources */
1068 	err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
1069 		nb_desc,
1070 		sizeof(struct wq_enet_desc));
1071 	if (err) {
1072 		dev_err(enic, "error in allocation of wq\n");
1073 		return err;
1074 	}
1075 
1076 	err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
1077 		socket_id, nb_desc,
1078 		sizeof(struct cq_enet_wq_desc));
1079 	if (err) {
1080 		vnic_wq_free(wq);
1081 		dev_err(enic, "error in allocation of cq for wq\n");
1082 	}
1083 
1084 	/* setup up CQ message */
1085 	snprintf((char *)name, sizeof(name),
1086 		 "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
1087 		instance++);
1088 
1089 	wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
1090 			sizeof(uint32_t), SOCKET_ID_ANY,
1091 			RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
1092 	if (!wq->cqmsg_rz)
1093 		return -ENOMEM;
1094 
1095 	return err;
1096 }
1097 
1098 int enic_disable(struct enic *enic)
1099 {
1100 	unsigned int i;
1101 	int err;
1102 
1103 	for (i = 0; i < enic->intr_count; i++) {
1104 		vnic_intr_mask(&enic->intr[i]);
1105 		(void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1106 	}
1107 	enic_rxq_intr_deinit(enic);
1108 	rte_intr_disable(enic->pdev->intr_handle);
1109 	rte_intr_callback_unregister(enic->pdev->intr_handle,
1110 				     enic_intr_handler,
1111 				     (void *)enic->rte_dev);
1112 
1113 	vnic_dev_disable(enic->vdev);
1114 
1115 	enic_fm_destroy(enic);
1116 
1117 	enic_dev_del_addr(enic, enic->mac_addr);
1118 
1119 	for (i = 0; i < enic->wq_count; i++) {
1120 		err = vnic_wq_disable(&enic->wq[i]);
1121 		if (err)
1122 			return err;
1123 	}
1124 	for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1125 		if (enic->rq[i].in_use) {
1126 			err = vnic_rq_disable(&enic->rq[i]);
1127 			if (err)
1128 				return err;
1129 		}
1130 	}
1131 
1132 	/* If we were using interrupts, set the interrupt vector to -1
1133 	 * to disable interrupts.  We are not disabling link notifications,
1134 	 * though, as we want the polling of link status to continue working.
1135 	 */
1136 	if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1137 		vnic_dev_notify_set(enic->vdev, -1);
1138 
1139 	vnic_dev_set_reset_flag(enic->vdev, 1);
1140 
1141 	for (i = 0; i < enic->wq_count; i++)
1142 		vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1143 
1144 	for (i = 0; i < enic_vnic_rq_count(enic); i++)
1145 		if (enic->rq[i].in_use)
1146 			vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1147 	for (i = 0; i < enic->cq_count; i++)
1148 		vnic_cq_clean(&enic->cq[i]);
1149 	for (i = 0; i < enic->intr_count; i++)
1150 		vnic_intr_clean(&enic->intr[i]);
1151 
1152 	if (enic_is_vf(enic))
1153 		enic_disable_vf_admin_chan(enic, true);
1154 	return 0;
1155 }
1156 
1157 static int enic_dev_wait(struct vnic_dev *vdev,
1158 	int (*start)(struct vnic_dev *, int),
1159 	int (*finished)(struct vnic_dev *, int *),
1160 	int arg)
1161 {
1162 	int done;
1163 	int err;
1164 	int i;
1165 
1166 	err = start(vdev, arg);
1167 	if (err)
1168 		return err;
1169 
1170 	/* Wait for func to complete...2 seconds max */
1171 	for (i = 0; i < 2000; i++) {
1172 		err = finished(vdev, &done);
1173 		if (err)
1174 			return err;
1175 		if (done)
1176 			return 0;
1177 		usleep(1000);
1178 	}
1179 	return -ETIMEDOUT;
1180 }
1181 
1182 static int enic_dev_open(struct enic *enic)
1183 {
1184 	int err;
1185 	int flags = CMD_OPENF_IG_DESCCACHE;
1186 
1187 	err = enic_dev_wait(enic->vdev, vnic_dev_open,
1188 		vnic_dev_open_done, flags);
1189 	if (err)
1190 		dev_err(enic_get_dev(enic),
1191 			"vNIC device open failed, err %d\n", err);
1192 
1193 	return err;
1194 }
1195 
1196 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1197 {
1198 	dma_addr_t rss_key_buf_pa;
1199 	union vnic_rss_key *rss_key_buf_va = NULL;
1200 	int err, i;
1201 	uint8_t name[RTE_MEMZONE_NAMESIZE];
1202 
1203 	RTE_ASSERT(user_key != NULL);
1204 	snprintf((char *)name, sizeof(name), "rss_key-%s", enic->bdf_name);
1205 	rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1206 		&rss_key_buf_pa, name);
1207 	if (!rss_key_buf_va)
1208 		return -ENOMEM;
1209 
1210 	for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1211 		rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1212 
1213 	err = enic_set_rss_key(enic,
1214 		rss_key_buf_pa,
1215 		sizeof(union vnic_rss_key));
1216 
1217 	/* Save for later queries */
1218 	if (!err) {
1219 		rte_memcpy(&enic->rss_key, rss_key_buf_va,
1220 			   sizeof(union vnic_rss_key));
1221 	}
1222 	enic_free_consistent(enic, sizeof(union vnic_rss_key),
1223 		rss_key_buf_va, rss_key_buf_pa);
1224 
1225 	return err;
1226 }
1227 
1228 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1229 {
1230 	dma_addr_t rss_cpu_buf_pa;
1231 	union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1232 	int err;
1233 	uint8_t name[RTE_MEMZONE_NAMESIZE];
1234 
1235 	snprintf((char *)name, sizeof(name), "rss_cpu-%s", enic->bdf_name);
1236 	rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1237 		&rss_cpu_buf_pa, name);
1238 	if (!rss_cpu_buf_va)
1239 		return -ENOMEM;
1240 
1241 	rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1242 
1243 	err = enic_set_rss_cpu(enic,
1244 		rss_cpu_buf_pa,
1245 		sizeof(union vnic_rss_cpu));
1246 
1247 	enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1248 		rss_cpu_buf_va, rss_cpu_buf_pa);
1249 
1250 	/* Save for later queries */
1251 	if (!err)
1252 		rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1253 	return err;
1254 }
1255 
1256 static int enic_set_niccfg(struct enic *enic, uint8_t rss_default_cpu,
1257 	uint8_t rss_hash_type, uint8_t rss_hash_bits, uint8_t rss_base_cpu,
1258 	uint8_t rss_enable)
1259 {
1260 	const uint8_t tso_ipid_split_en = 0;
1261 	int err;
1262 
1263 	err = enic_set_nic_cfg(enic,
1264 		rss_default_cpu, rss_hash_type,
1265 		rss_hash_bits, rss_base_cpu,
1266 		rss_enable, tso_ipid_split_en,
1267 		enic->ig_vlan_strip_en);
1268 
1269 	return err;
1270 }
1271 
1272 /* Initialize RSS with defaults, called from dev_configure */
1273 int enic_init_rss_nic_cfg(struct enic *enic)
1274 {
1275 	static uint8_t default_rss_key[] = {
1276 		85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1277 		80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1278 		76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1279 		69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1280 	};
1281 	struct rte_eth_rss_conf rss_conf;
1282 	union vnic_rss_cpu rss_cpu;
1283 	int ret, i;
1284 
1285 	rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1286 	/*
1287 	 * If setting key for the first time, and the user gives us none, then
1288 	 * push the default key to NIC.
1289 	 */
1290 	if (rss_conf.rss_key == NULL) {
1291 		rss_conf.rss_key = default_rss_key;
1292 		rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1293 	}
1294 	ret = enic_set_rss_conf(enic, &rss_conf);
1295 	if (ret) {
1296 		dev_err(enic, "Failed to configure RSS\n");
1297 		return ret;
1298 	}
1299 	if (enic->rss_enable) {
1300 		/* If enabling RSS, use the default reta */
1301 		for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1302 			rss_cpu.cpu[i / 4].b[i % 4] =
1303 				enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1304 		}
1305 		ret = enic_set_rss_reta(enic, &rss_cpu);
1306 		if (ret)
1307 			dev_err(enic, "Failed to set RSS indirection table\n");
1308 	}
1309 	return ret;
1310 }
1311 
1312 int enic_setup_finish(struct enic *enic)
1313 {
1314 	int err;
1315 
1316 	ENICPMD_FUNC_TRACE();
1317 	enic_init_soft_stats(enic);
1318 
1319 	/*
1320 	 * Enable admin channel so we can perform certain devcmds
1321 	 * via admin channel. For example, vnic_dev_packet_filter()
1322 	 */
1323 	if (enic_is_vf(enic)) {
1324 		err = enic_enable_vf_admin_chan(enic);
1325 		if (err)
1326 			return err;
1327 	}
1328 
1329 	/* switchdev: enable promisc mode on PF */
1330 	if (enic->switchdev_mode) {
1331 		RTE_VERIFY(!enic_is_vf(enic));
1332 		vnic_dev_packet_filter(enic->vdev,
1333 				       0 /* directed  */,
1334 				       0 /* multicast */,
1335 				       0 /* broadcast */,
1336 				       1 /* promisc   */,
1337 				       0 /* allmulti  */);
1338 		enic->promisc = 1;
1339 		enic->allmulti = 0;
1340 		return 0;
1341 	}
1342 	/* Default conf */
1343 	err = enic_dev_packet_filter(enic,
1344 		1 /* directed  */,
1345 		1 /* multicast */,
1346 		1 /* broadcast */,
1347 		0 /* promisc   */,
1348 		1 /* allmulti  */);
1349 
1350 	enic->promisc = 0;
1351 	enic->allmulti = 1;
1352 
1353 	return err;
1354 }
1355 
1356 static int enic_rss_conf_valid(struct enic *enic,
1357 			       struct rte_eth_rss_conf *rss_conf)
1358 {
1359 	/* RSS is disabled per VIC settings. Ignore rss_conf. */
1360 	if (enic->flow_type_rss_offloads == 0)
1361 		return 0;
1362 	if (rss_conf->rss_key != NULL &&
1363 	    rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1364 		dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1365 			rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1366 		return -EINVAL;
1367 	}
1368 	if (rss_conf->rss_hf != 0 &&
1369 	    (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1370 		dev_err(enic, "Given rss_hf contains none of the supported"
1371 			" types\n");
1372 		return -EINVAL;
1373 	}
1374 	return 0;
1375 }
1376 
1377 /* Set hash type and key according to rss_conf */
1378 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1379 {
1380 	struct rte_eth_dev *eth_dev;
1381 	uint64_t rss_hf;
1382 	uint8_t rss_hash_type;
1383 	uint8_t rss_enable;
1384 	int ret;
1385 
1386 	RTE_ASSERT(rss_conf != NULL);
1387 	ret = enic_rss_conf_valid(enic, rss_conf);
1388 	if (ret) {
1389 		dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1390 		return ret;
1391 	}
1392 
1393 	eth_dev = enic->rte_dev;
1394 	rss_hash_type = 0;
1395 	rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1396 	if (enic->rq_count > 1 &&
1397 	    (eth_dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) &&
1398 	    rss_hf != 0) {
1399 		rss_enable = 1;
1400 		if (rss_hf & (RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
1401 			      RTE_ETH_RSS_NONFRAG_IPV4_OTHER))
1402 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1403 		if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1404 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1405 		if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP) {
1406 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1407 			if (enic->udp_rss_weak) {
1408 				/*
1409 				 * 'TCP' is not a typo. The "weak" version of
1410 				 * UDP RSS requires both the TCP and UDP bits
1411 				 * be set. It does enable TCP RSS as well.
1412 				 */
1413 				rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1414 			}
1415 		}
1416 		if (rss_hf & (RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_IPV6_EX |
1417 			      RTE_ETH_RSS_FRAG_IPV6 | RTE_ETH_RSS_NONFRAG_IPV6_OTHER))
1418 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1419 		if (rss_hf & (RTE_ETH_RSS_NONFRAG_IPV6_TCP | RTE_ETH_RSS_IPV6_TCP_EX))
1420 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1421 		if (rss_hf & (RTE_ETH_RSS_NONFRAG_IPV6_UDP | RTE_ETH_RSS_IPV6_UDP_EX)) {
1422 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1423 			if (enic->udp_rss_weak)
1424 				rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1425 		}
1426 	} else {
1427 		rss_enable = 0;
1428 		rss_hf = 0;
1429 	}
1430 
1431 	/* Set the hash key if provided */
1432 	if (rss_enable && rss_conf->rss_key) {
1433 		ret = enic_set_rsskey(enic, rss_conf->rss_key);
1434 		if (ret) {
1435 			dev_err(enic, "Failed to set RSS key\n");
1436 			return ret;
1437 		}
1438 	}
1439 
1440 	ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1441 			      ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1442 			      rss_enable);
1443 	if (!ret) {
1444 		enic->rss_hf = rss_hf;
1445 		enic->rss_hash_type = rss_hash_type;
1446 		enic->rss_enable = rss_enable;
1447 	} else {
1448 		dev_err(enic, "Failed to update RSS configurations."
1449 			" hash=0x%x\n", rss_hash_type);
1450 	}
1451 	return ret;
1452 }
1453 
1454 int enic_set_vlan_strip(struct enic *enic)
1455 {
1456 	/*
1457 	 * Unfortunately, VLAN strip on/off and RSS on/off are configured
1458 	 * together. So, re-do niccfg, preserving the current RSS settings.
1459 	 */
1460 	return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1461 			       ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1462 			       enic->rss_enable);
1463 }
1464 
1465 int enic_add_packet_filter(struct enic *enic)
1466 {
1467 	ENICPMD_FUNC_TRACE();
1468 	/* switchdev ignores packet filters */
1469 	if (enic->switchdev_mode) {
1470 		ENICPMD_LOG(DEBUG, " switchdev: ignore packet filter");
1471 		return 0;
1472 	}
1473 	/* Args -> directed, multicast, broadcast, promisc, allmulti */
1474 	return enic_dev_packet_filter(enic, 1, 1, 1,
1475 		enic->promisc, enic->allmulti);
1476 }
1477 
1478 int enic_get_link_status(struct enic *enic)
1479 {
1480 	return vnic_dev_link_status(enic->vdev);
1481 }
1482 
1483 static void enic_dev_deinit(struct enic *enic)
1484 {
1485 	/* stop link status checking */
1486 	vnic_dev_notify_unset(enic->vdev);
1487 
1488 	/* mac_addrs is freed by rte_eth_dev_release_port() */
1489 	rte_free(enic->cq);
1490 	rte_free(enic->intr);
1491 	rte_free(enic->rq);
1492 	rte_free(enic->wq);
1493 }
1494 
1495 
1496 int enic_set_vnic_res(struct enic *enic)
1497 {
1498 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1499 	int rc = 0;
1500 	unsigned int required_rq, required_wq, required_cq, required_intr;
1501 
1502 	/* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1503 	required_rq = eth_dev->data->nb_rx_queues * 2;
1504 	required_wq = eth_dev->data->nb_tx_queues;
1505 	required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1506 	required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1507 	if (eth_dev->data->dev_conf.intr_conf.rxq) {
1508 		required_intr += eth_dev->data->nb_rx_queues;
1509 	}
1510 	/* FW adds 2 interrupts for admin chan. Use 1 for RQ */
1511 	if (enic_is_vf(enic))
1512 		required_intr += 1;
1513 	ENICPMD_LOG(DEBUG, "Required queues for PF: rq %u wq %u cq %u",
1514 		    required_rq, required_wq, required_cq);
1515 	if (enic->vf_required_rq) {
1516 		/* Queues needed for VF representors */
1517 		required_rq += enic->vf_required_rq;
1518 		required_wq += enic->vf_required_wq;
1519 		required_cq += enic->vf_required_cq;
1520 		ENICPMD_LOG(DEBUG, "Required queues for VF representors: rq %u wq %u cq %u",
1521 			    enic->vf_required_rq, enic->vf_required_wq,
1522 			    enic->vf_required_cq);
1523 	}
1524 
1525 	if (enic->conf_rq_count < required_rq) {
1526 		dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1527 			eth_dev->data->nb_rx_queues,
1528 			required_rq, enic->conf_rq_count);
1529 		rc = -EINVAL;
1530 	}
1531 	if (enic->conf_wq_count < required_wq) {
1532 		dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1533 			eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1534 		rc = -EINVAL;
1535 	}
1536 
1537 	if (enic->conf_cq_count < required_cq) {
1538 		dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1539 			required_cq, enic->conf_cq_count);
1540 		rc = -EINVAL;
1541 	}
1542 	if (enic->conf_intr_count < required_intr) {
1543 		dev_err(dev, "Not enough Interrupts to support Rx queue"
1544 			" interrupts. Required:%u, Configured:%u\n",
1545 			required_intr, enic->conf_intr_count);
1546 		rc = -EINVAL;
1547 	}
1548 
1549 	if (rc == 0) {
1550 		enic->rq_count = eth_dev->data->nb_rx_queues;
1551 		enic->wq_count = eth_dev->data->nb_tx_queues;
1552 		enic->cq_count = enic->rq_count + enic->wq_count;
1553 		enic->intr_count = required_intr;
1554 	}
1555 
1556 	return rc;
1557 }
1558 
1559 /* Initialize the completion queue for an RQ */
1560 static int
1561 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1562 {
1563 	struct vnic_rq *sop_rq, *data_rq;
1564 	unsigned int cq_idx;
1565 	int rc = 0;
1566 
1567 	sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1568 	data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx, enic)];
1569 	cq_idx = enic_cq_rq(enic, rq_idx);
1570 
1571 	vnic_cq_clean(&enic->cq[cq_idx]);
1572 	vnic_cq_init(&enic->cq[cq_idx],
1573 		     0 /* flow_control_enable */,
1574 		     1 /* color_enable */,
1575 		     0 /* cq_head */,
1576 		     0 /* cq_tail */,
1577 		     1 /* cq_tail_color */,
1578 		     0 /* interrupt_enable */,
1579 		     1 /* cq_entry_enable */,
1580 		     0 /* cq_message_enable */,
1581 		     0 /* interrupt offset */,
1582 		     0 /* cq_message_addr */);
1583 
1584 
1585 	vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1586 			   enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1587 			   sop_rq->ring.desc_count - 1, 1, 0);
1588 	if (data_rq->in_use) {
1589 		vnic_rq_init_start(data_rq,
1590 				   enic_cq_rq(enic,
1591 				   enic_rte_rq_idx_to_data_idx(rq_idx, enic)),
1592 				   0, data_rq->ring.desc_count - 1, 1, 0);
1593 	}
1594 
1595 	rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1596 	if (rc)
1597 		return rc;
1598 
1599 	if (data_rq->in_use) {
1600 		rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1601 		if (rc) {
1602 			enic_rxmbuf_queue_release(enic, sop_rq);
1603 			return rc;
1604 		}
1605 	}
1606 
1607 	return 0;
1608 }
1609 
1610 /* The Cisco NIC can send and receive packets up to a max packet size
1611  * determined by the NIC type and firmware. There is also an MTU
1612  * configured into the NIC via the CIMC/UCSM management interface
1613  * which can be overridden by this function (up to the max packet size).
1614  * Depending on the network setup, doing so may cause packet drops
1615  * and unexpected behavior.
1616  */
1617 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1618 {
1619 	unsigned int rq_idx;
1620 	struct vnic_rq *rq;
1621 	int rc = 0;
1622 	uint16_t old_mtu;	/* previous setting */
1623 	uint16_t config_mtu;	/* Value configured into NIC via CIMC/UCSM */
1624 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1625 
1626 	old_mtu = eth_dev->data->mtu;
1627 	config_mtu = enic->config.mtu;
1628 
1629 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1630 		return -E_RTE_SECONDARY;
1631 
1632 	if (new_mtu > enic->max_mtu) {
1633 		dev_err(enic,
1634 			"MTU not updated: requested (%u) greater than max (%u)\n",
1635 			new_mtu, enic->max_mtu);
1636 		return -EINVAL;
1637 	}
1638 	if (new_mtu < ENIC_MIN_MTU) {
1639 		dev_info(enic,
1640 			"MTU not updated: requested (%u) less than min (%u)\n",
1641 			new_mtu, ENIC_MIN_MTU);
1642 		return -EINVAL;
1643 	}
1644 	if (new_mtu > config_mtu)
1645 		dev_warning(enic,
1646 			"MTU (%u) is greater than value configured in NIC (%u)\n",
1647 			new_mtu, config_mtu);
1648 
1649 	/*
1650 	 * If the device has not started (enic_enable), nothing to do.
1651 	 * Later, enic_enable() will set up RQs reflecting the new maximum
1652 	 * packet length.
1653 	 */
1654 	if (!eth_dev->data->dev_started)
1655 		return rc;
1656 
1657 	/*
1658 	 * The device has started, re-do RQs on the fly. In the process, we
1659 	 * pick up the new maximum packet length.
1660 	 *
1661 	 * Some applications rely on the ability to change MTU without stopping
1662 	 * the device. So keep this behavior for now.
1663 	 */
1664 	rte_spinlock_lock(&enic->mtu_lock);
1665 
1666 	/* Stop traffic on all RQs */
1667 	for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1668 		rq = &enic->rq[rq_idx];
1669 		if (rq->is_sop && rq->in_use) {
1670 			rc = enic_stop_rq(enic,
1671 					  enic_sop_rq_idx_to_rte_idx(rq_idx));
1672 			if (rc) {
1673 				dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1674 				goto set_mtu_done;
1675 			}
1676 		}
1677 	}
1678 
1679 	/* replace Rx function with a no-op to avoid getting stale pkts */
1680 	eth_dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
1681 	rte_eth_fp_ops[enic->port_id].rx_pkt_burst = eth_dev->rx_pkt_burst;
1682 	rte_mb();
1683 
1684 	/* Allow time for threads to exit the real Rx function. */
1685 	usleep(100000);
1686 
1687 	/* now it is safe to reconfigure the RQs */
1688 
1689 
1690 	/* free and reallocate RQs with the new MTU */
1691 	for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1692 		rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1693 		if (!rq->in_use)
1694 			continue;
1695 
1696 		enic_free_rq(rq);
1697 		rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1698 				   rq->tot_nb_desc, rq->rx_free_thresh);
1699 		if (rc) {
1700 			dev_err(enic,
1701 				"Fatal MTU alloc error- No traffic will pass\n");
1702 			goto set_mtu_done;
1703 		}
1704 
1705 		rc = enic_reinit_rq(enic, rq_idx);
1706 		if (rc) {
1707 			dev_err(enic,
1708 				"Fatal MTU RQ reinit- No traffic will pass\n");
1709 			goto set_mtu_done;
1710 		}
1711 	}
1712 
1713 	/* put back the real receive function */
1714 	rte_mb();
1715 	enic_pick_rx_handler(eth_dev);
1716 	rte_eth_fp_ops[enic->port_id].rx_pkt_burst = eth_dev->rx_pkt_burst;
1717 	rte_mb();
1718 
1719 	/* restart Rx traffic */
1720 	for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1721 		rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1722 		if (rq->is_sop && rq->in_use)
1723 			enic_start_rq(enic, rq_idx);
1724 	}
1725 
1726 set_mtu_done:
1727 	dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1728 	rte_spinlock_unlock(&enic->mtu_lock);
1729 	return rc;
1730 }
1731 
1732 static void
1733 enic_disable_overlay_offload(struct enic *enic)
1734 {
1735 	/*
1736 	 * Disabling fails if the feature is provisioned but
1737 	 * not enabled. So ignore result and do not log error.
1738 	 */
1739 	if (enic->vxlan) {
1740 		vnic_dev_overlay_offload_ctrl(enic->vdev,
1741 			OVERLAY_FEATURE_VXLAN, OVERLAY_OFFLOAD_DISABLE);
1742 	}
1743 	if (enic->geneve) {
1744 		vnic_dev_overlay_offload_ctrl(enic->vdev,
1745 			OVERLAY_FEATURE_GENEVE, OVERLAY_OFFLOAD_DISABLE);
1746 	}
1747 }
1748 
1749 static int
1750 enic_enable_overlay_offload(struct enic *enic)
1751 {
1752 	if (enic->vxlan && vnic_dev_overlay_offload_ctrl(enic->vdev,
1753 			OVERLAY_FEATURE_VXLAN, OVERLAY_OFFLOAD_ENABLE) != 0) {
1754 		dev_err(NULL, "failed to enable VXLAN offload\n");
1755 		return -EINVAL;
1756 	}
1757 	if (enic->geneve && vnic_dev_overlay_offload_ctrl(enic->vdev,
1758 			OVERLAY_FEATURE_GENEVE, OVERLAY_OFFLOAD_ENABLE) != 0) {
1759 		dev_err(NULL, "failed to enable Geneve offload\n");
1760 		return -EINVAL;
1761 	}
1762 	enic->tx_offload_capa |=
1763 		RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1764 		(enic->geneve ? RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO : 0) |
1765 		(enic->vxlan ? RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO : 0);
1766 	enic->tx_offload_mask |=
1767 		RTE_MBUF_F_TX_OUTER_IPV6 |
1768 		RTE_MBUF_F_TX_OUTER_IPV4 |
1769 		RTE_MBUF_F_TX_OUTER_IP_CKSUM |
1770 		RTE_MBUF_F_TX_TUNNEL_MASK;
1771 	enic->overlay_offload = true;
1772 
1773 	if (enic->vxlan && enic->geneve)
1774 		dev_info(NULL, "Overlay offload is enabled (VxLAN, Geneve)\n");
1775 	else if (enic->vxlan)
1776 		dev_info(NULL, "Overlay offload is enabled (VxLAN)\n");
1777 	else
1778 		dev_info(NULL, "Overlay offload is enabled (Geneve)\n");
1779 
1780 	return 0;
1781 }
1782 
1783 static int
1784 enic_reset_overlay_port(struct enic *enic)
1785 {
1786 	if (enic->vxlan) {
1787 		enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT;
1788 		/*
1789 		 * Reset the vxlan port to the default, as the NIC firmware
1790 		 * does not reset it automatically and keeps the old setting.
1791 		 */
1792 		if (vnic_dev_overlay_offload_cfg(enic->vdev,
1793 						 OVERLAY_CFG_VXLAN_PORT_UPDATE,
1794 						 RTE_VXLAN_DEFAULT_PORT)) {
1795 			dev_err(enic, "failed to update vxlan port\n");
1796 			return -EINVAL;
1797 		}
1798 	}
1799 	if (enic->geneve) {
1800 		enic->geneve_port = RTE_GENEVE_DEFAULT_PORT;
1801 		if (vnic_dev_overlay_offload_cfg(enic->vdev,
1802 						 OVERLAY_CFG_GENEVE_PORT_UPDATE,
1803 						 RTE_GENEVE_DEFAULT_PORT)) {
1804 			dev_err(enic, "failed to update vxlan port\n");
1805 			return -EINVAL;
1806 		}
1807 	}
1808 	return 0;
1809 }
1810 
1811 static int enic_dev_init(struct enic *enic)
1812 {
1813 	int err;
1814 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1815 
1816 	vnic_dev_intr_coal_timer_info_default(enic->vdev);
1817 
1818 	/* Get vNIC configuration
1819 	*/
1820 	err = enic_get_vnic_config(enic);
1821 	if (err) {
1822 		dev_err(dev, "Get vNIC configuration failed, aborting\n");
1823 		return err;
1824 	}
1825 
1826 	/* Get available resource counts */
1827 	enic_get_res_counts(enic);
1828 	if (enic->conf_rq_count == 1) {
1829 		dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1830 		dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1831 		dev_err(enic, "See the ENIC PMD guide for more information.\n");
1832 		return -EINVAL;
1833 	}
1834 	/* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1835 	enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1836 			       enic->conf_cq_count, 8);
1837 	enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1838 				 enic->conf_intr_count, 8);
1839 	enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1840 			       enic->conf_rq_count, 8);
1841 	enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1842 			       enic->conf_wq_count, 8);
1843 	if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1844 		dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1845 		return -1;
1846 	}
1847 	if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1848 		dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1849 		return -1;
1850 	}
1851 	if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1852 		dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1853 		return -1;
1854 	}
1855 	if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1856 		dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1857 		return -1;
1858 	}
1859 
1860 	eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr",
1861 					sizeof(struct rte_ether_addr) *
1862 					ENIC_UNICAST_PERFECT_FILTERS, 0);
1863 	if (!eth_dev->data->mac_addrs) {
1864 		dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1865 		return -1;
1866 	}
1867 
1868 	/*
1869 	 * If PF has not assigned any MAC address for VF, generate a random one.
1870 	 */
1871 	if (enic_is_vf(enic)) {
1872 		struct rte_ether_addr ea;
1873 
1874 		memcpy(ea.addr_bytes, enic->mac_addr, RTE_ETHER_ADDR_LEN);
1875 		if (!rte_is_valid_assigned_ether_addr(&ea)) {
1876 			rte_eth_random_addr(ea.addr_bytes);
1877 			ENICPMD_LOG(INFO, "assigned random MAC address " RTE_ETHER_ADDR_PRT_FMT,
1878 				    RTE_ETHER_ADDR_BYTES(&ea));
1879 			memcpy(enic->mac_addr, ea.addr_bytes, RTE_ETHER_ADDR_LEN);
1880 		}
1881 	}
1882 
1883 	rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr,
1884 			eth_dev->data->mac_addrs);
1885 
1886 	vnic_dev_set_reset_flag(enic->vdev, 0);
1887 
1888 	LIST_INIT(&enic->flows);
1889 
1890 	/* set up link status checking */
1891 	vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1892 
1893 	enic->overlay_offload = false;
1894 	/*
1895 	 * First, explicitly disable overlay offload as the setting is
1896 	 * sticky, and resetting vNIC may not disable it.
1897 	 */
1898 	enic_disable_overlay_offload(enic);
1899 	/* Then, enable overlay offload according to vNIC flags */
1900 	if (!enic->disable_overlay && (enic->vxlan || enic->geneve)) {
1901 		err = enic_enable_overlay_offload(enic);
1902 		if (err) {
1903 			dev_info(NULL, "failed to enable overlay offload\n");
1904 			return err;
1905 		}
1906 	}
1907 	/*
1908 	 * Reset the vxlan/geneve port if HW parsing is available. It
1909 	 * is always enabled regardless of overlay offload
1910 	 * enable/disable.
1911 	 */
1912 	err = enic_reset_overlay_port(enic);
1913 	if (err)
1914 		return err;
1915 
1916 	if (enic_fm_init(enic))
1917 		dev_warning(enic, "Init of flowman failed.\n");
1918 	return 0;
1919 }
1920 
1921 static void lock_devcmd(void *priv)
1922 {
1923 	struct enic *enic = priv;
1924 
1925 	rte_spinlock_lock(&enic->devcmd_lock);
1926 }
1927 
1928 static void unlock_devcmd(void *priv)
1929 {
1930 	struct enic *enic = priv;
1931 
1932 	rte_spinlock_unlock(&enic->devcmd_lock);
1933 }
1934 
1935 int enic_probe(struct enic *enic)
1936 {
1937 	struct rte_pci_device *pdev = enic->pdev;
1938 	int err = -1;
1939 
1940 	dev_debug(enic, "Initializing ENIC PMD\n");
1941 
1942 	/* if this is a secondary process the hardware is already initialized */
1943 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1944 		return 0;
1945 
1946 	enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1947 	enic->bar0.len = pdev->mem_resource[0].len;
1948 
1949 	/* Register vNIC device */
1950 	enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1951 	if (!enic->vdev) {
1952 		dev_err(enic, "vNIC registration failed, aborting\n");
1953 		goto err_out;
1954 	}
1955 
1956 	LIST_INIT(&enic->memzone_list);
1957 	rte_spinlock_init(&enic->memzone_list_lock);
1958 
1959 	vnic_register_cbacks(enic->vdev,
1960 		enic_alloc_consistent,
1961 		enic_free_consistent);
1962 
1963 	/*
1964 	 * Allocate the consistent memory for stats upfront so both primary and
1965 	 * secondary processes can dump stats.
1966 	 */
1967 	err = vnic_dev_alloc_stats_mem(enic->vdev);
1968 	if (err) {
1969 		dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1970 		goto err_out_unregister;
1971 	}
1972 	/* Issue device open to get device in known state */
1973 	err = enic_dev_open(enic);
1974 	if (err) {
1975 		dev_err(enic, "vNIC dev open failed, aborting\n");
1976 		goto err_out_unregister;
1977 	}
1978 
1979 	/* Set ingress vlan rewrite mode before vnic initialization */
1980 	dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1981 		  enic->ig_vlan_rewrite_mode);
1982 	err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1983 		enic->ig_vlan_rewrite_mode);
1984 	if (err) {
1985 		dev_err(enic,
1986 			"Failed to set ingress vlan rewrite mode, aborting.\n");
1987 		goto err_out_dev_close;
1988 	}
1989 
1990 	/* Issue device init to initialize the vnic-to-switch link.
1991 	 * We'll start with carrier off and wait for link UP
1992 	 * notification later to turn on carrier.  We don't need
1993 	 * to wait here for the vnic-to-switch link initialization
1994 	 * to complete; link UP notification is the indication that
1995 	 * the process is complete.
1996 	 */
1997 
1998 	err = vnic_dev_init(enic->vdev, 0);
1999 	if (err) {
2000 		dev_err(enic, "vNIC dev init failed, aborting\n");
2001 		goto err_out_dev_close;
2002 	}
2003 
2004 	err = enic_dev_init(enic);
2005 	if (err) {
2006 		dev_err(enic, "Device initialization failed, aborting\n");
2007 		goto err_out_dev_close;
2008 	}
2009 
2010 	/* Use a PF spinlock to serialize devcmd from PF and VF representors */
2011 	if (enic->switchdev_mode) {
2012 		rte_spinlock_init(&enic->devcmd_lock);
2013 		vnic_register_lock(enic->vdev, lock_devcmd, unlock_devcmd);
2014 	}
2015 	return 0;
2016 
2017 err_out_dev_close:
2018 	vnic_dev_close(enic->vdev);
2019 err_out_unregister:
2020 	vnic_dev_unregister(enic->vdev);
2021 err_out:
2022 	return err;
2023 }
2024 
2025 void enic_remove(struct enic *enic)
2026 {
2027 	enic_dev_deinit(enic);
2028 	vnic_dev_close(enic->vdev);
2029 	vnic_dev_unregister(enic->vdev);
2030 }
2031