xref: /dpdk/drivers/net/enic/enic_main.c (revision 68a03efeed657e6e05f281479b33b51102797e15)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5 
6 #include <stdio.h>
7 
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 
12 #include <rte_pci.h>
13 #include <rte_bus_pci.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_string_fns.h>
18 #include <ethdev_driver.h>
19 
20 #include "enic_compat.h"
21 #include "enic.h"
22 #include "wq_enet_desc.h"
23 #include "rq_enet_desc.h"
24 #include "cq_enet_desc.h"
25 #include "vnic_enet.h"
26 #include "vnic_dev.h"
27 #include "vnic_wq.h"
28 #include "vnic_rq.h"
29 #include "vnic_cq.h"
30 #include "vnic_intr.h"
31 #include "vnic_nic.h"
32 
33 static inline int enic_is_sriov_vf(struct enic *enic)
34 {
35 	return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
36 }
37 
38 static int is_zero_addr(uint8_t *addr)
39 {
40 	return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
41 }
42 
43 static int is_mcast_addr(uint8_t *addr)
44 {
45 	return addr[0] & 1;
46 }
47 
48 static int is_eth_addr_valid(uint8_t *addr)
49 {
50 	return !is_mcast_addr(addr) && !is_zero_addr(addr);
51 }
52 
53 void
54 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
55 {
56 	uint16_t i;
57 
58 	if (!rq || !rq->mbuf_ring) {
59 		dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
60 		return;
61 	}
62 
63 	for (i = 0; i < rq->ring.desc_count; i++) {
64 		if (rq->mbuf_ring[i]) {
65 			rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
66 			rq->mbuf_ring[i] = NULL;
67 		}
68 	}
69 }
70 
71 void enic_free_wq_buf(struct rte_mbuf **buf)
72 {
73 	struct rte_mbuf *mbuf = *buf;
74 
75 	rte_pktmbuf_free_seg(mbuf);
76 	*buf = NULL;
77 }
78 
79 static void enic_log_q_error(struct enic *enic)
80 {
81 	unsigned int i;
82 	uint32_t error_status;
83 
84 	for (i = 0; i < enic->wq_count; i++) {
85 		error_status = vnic_wq_error_status(&enic->wq[i]);
86 		if (error_status)
87 			dev_err(enic, "WQ[%d] error_status %d\n", i,
88 				error_status);
89 	}
90 
91 	for (i = 0; i < enic_vnic_rq_count(enic); i++) {
92 		if (!enic->rq[i].in_use)
93 			continue;
94 		error_status = vnic_rq_error_status(&enic->rq[i]);
95 		if (error_status)
96 			dev_err(enic, "RQ[%d] error_status %d\n", i,
97 				error_status);
98 	}
99 }
100 
101 static void enic_clear_soft_stats(struct enic *enic)
102 {
103 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
104 	rte_atomic64_clear(&soft_stats->rx_nombuf);
105 	rte_atomic64_clear(&soft_stats->rx_packet_errors);
106 	rte_atomic64_clear(&soft_stats->tx_oversized);
107 }
108 
109 static void enic_init_soft_stats(struct enic *enic)
110 {
111 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
112 	rte_atomic64_init(&soft_stats->rx_nombuf);
113 	rte_atomic64_init(&soft_stats->rx_packet_errors);
114 	rte_atomic64_init(&soft_stats->tx_oversized);
115 	enic_clear_soft_stats(enic);
116 }
117 
118 int enic_dev_stats_clear(struct enic *enic)
119 {
120 	int ret;
121 
122 	ret = vnic_dev_stats_clear(enic->vdev);
123 	if (ret != 0) {
124 		dev_err(enic, "Error in clearing stats\n");
125 		return ret;
126 	}
127 	enic_clear_soft_stats(enic);
128 
129 	return 0;
130 }
131 
132 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
133 {
134 	struct vnic_stats *stats;
135 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
136 	int64_t rx_truncated;
137 	uint64_t rx_packet_errors;
138 	int ret = vnic_dev_stats_dump(enic->vdev, &stats);
139 
140 	if (ret) {
141 		dev_err(enic, "Error in getting stats\n");
142 		return ret;
143 	}
144 
145 	/* The number of truncated packets can only be calculated by
146 	 * subtracting a hardware counter from error packets received by
147 	 * the driver. Note: this causes transient inaccuracies in the
148 	 * ipackets count. Also, the length of truncated packets are
149 	 * counted in ibytes even though truncated packets are dropped
150 	 * which can make ibytes be slightly higher than it should be.
151 	 */
152 	rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
153 	rx_truncated = rx_packet_errors - stats->rx.rx_errors;
154 
155 	r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
156 	r_stats->opackets = stats->tx.tx_frames_ok;
157 
158 	r_stats->ibytes = stats->rx.rx_bytes_ok;
159 	r_stats->obytes = stats->tx.tx_bytes_ok;
160 
161 	r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
162 	r_stats->oerrors = stats->tx.tx_errors
163 			   + rte_atomic64_read(&soft_stats->tx_oversized);
164 
165 	r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
166 
167 	r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
168 	return 0;
169 }
170 
171 int enic_del_mac_address(struct enic *enic, int mac_index)
172 {
173 	struct rte_eth_dev *eth_dev = enic->rte_dev;
174 	uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
175 
176 	return vnic_dev_del_addr(enic->vdev, mac_addr);
177 }
178 
179 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
180 {
181 	int err;
182 
183 	if (!is_eth_addr_valid(mac_addr)) {
184 		dev_err(enic, "invalid mac address\n");
185 		return -EINVAL;
186 	}
187 
188 	err = vnic_dev_add_addr(enic->vdev, mac_addr);
189 	if (err)
190 		dev_err(enic, "add mac addr failed\n");
191 	return err;
192 }
193 
194 void enic_free_rq_buf(struct rte_mbuf **mbuf)
195 {
196 	if (*mbuf == NULL)
197 		return;
198 
199 	rte_pktmbuf_free(*mbuf);
200 	*mbuf = NULL;
201 }
202 
203 void enic_init_vnic_resources(struct enic *enic)
204 {
205 	unsigned int error_interrupt_enable = 1;
206 	unsigned int error_interrupt_offset = 0;
207 	unsigned int rxq_interrupt_enable = 0;
208 	unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
209 	unsigned int index = 0;
210 	unsigned int cq_idx;
211 	struct vnic_rq *data_rq;
212 
213 	if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
214 		rxq_interrupt_enable = 1;
215 
216 	for (index = 0; index < enic->rq_count; index++) {
217 		cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
218 
219 		vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
220 			cq_idx,
221 			error_interrupt_enable,
222 			error_interrupt_offset);
223 
224 		data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)];
225 		if (data_rq->in_use)
226 			vnic_rq_init(data_rq,
227 				     cq_idx,
228 				     error_interrupt_enable,
229 				     error_interrupt_offset);
230 		vnic_cq_init(&enic->cq[cq_idx],
231 			0 /* flow_control_enable */,
232 			1 /* color_enable */,
233 			0 /* cq_head */,
234 			0 /* cq_tail */,
235 			1 /* cq_tail_color */,
236 			rxq_interrupt_enable,
237 			1 /* cq_entry_enable */,
238 			0 /* cq_message_enable */,
239 			rxq_interrupt_offset,
240 			0 /* cq_message_addr */);
241 		if (rxq_interrupt_enable)
242 			rxq_interrupt_offset++;
243 	}
244 
245 	for (index = 0; index < enic->wq_count; index++) {
246 		vnic_wq_init(&enic->wq[index],
247 			enic_cq_wq(enic, index),
248 			error_interrupt_enable,
249 			error_interrupt_offset);
250 		/* Compute unsupported ol flags for enic_prep_pkts() */
251 		enic->wq[index].tx_offload_notsup_mask =
252 			PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
253 
254 		cq_idx = enic_cq_wq(enic, index);
255 		vnic_cq_init(&enic->cq[cq_idx],
256 			0 /* flow_control_enable */,
257 			1 /* color_enable */,
258 			0 /* cq_head */,
259 			0 /* cq_tail */,
260 			1 /* cq_tail_color */,
261 			0 /* interrupt_enable */,
262 			0 /* cq_entry_enable */,
263 			1 /* cq_message_enable */,
264 			0 /* interrupt offset */,
265 			(uint64_t)enic->wq[index].cqmsg_rz->iova);
266 	}
267 
268 	for (index = 0; index < enic->intr_count; index++) {
269 		vnic_intr_init(&enic->intr[index],
270 			       enic->config.intr_timer_usec,
271 			       enic->config.intr_timer_type,
272 			       /*mask_on_assertion*/1);
273 	}
274 }
275 
276 
277 int
278 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
279 {
280 	struct rte_mbuf *mb;
281 	struct rq_enet_desc *rqd = rq->ring.descs;
282 	unsigned i;
283 	dma_addr_t dma_addr;
284 	uint32_t max_rx_pkt_len;
285 	uint16_t rq_buf_len;
286 
287 	if (!rq->in_use)
288 		return 0;
289 
290 	dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
291 		  rq->ring.desc_count);
292 
293 	/*
294 	 * If *not* using scatter and the mbuf size is greater than the
295 	 * requested max packet size (max_rx_pkt_len), then reduce the
296 	 * posted buffer size to max_rx_pkt_len. HW still receives packets
297 	 * larger than max_rx_pkt_len, but they will be truncated, which we
298 	 * drop in the rx handler. Not ideal, but better than returning
299 	 * large packets when the user is not expecting them.
300 	 */
301 	max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
302 	rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
303 	if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
304 		rq_buf_len = max_rx_pkt_len;
305 	for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
306 		mb = rte_mbuf_raw_alloc(rq->mp);
307 		if (mb == NULL) {
308 			dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
309 			(unsigned)rq->index);
310 			return -ENOMEM;
311 		}
312 
313 		mb->data_off = RTE_PKTMBUF_HEADROOM;
314 		dma_addr = (dma_addr_t)(mb->buf_iova
315 			   + RTE_PKTMBUF_HEADROOM);
316 		rq_enet_desc_enc(rqd, dma_addr,
317 				(rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
318 				: RQ_ENET_TYPE_NOT_SOP),
319 				rq_buf_len);
320 		rq->mbuf_ring[i] = mb;
321 	}
322 	/*
323 	 * Do not post the buffers to the NIC until we enable the RQ via
324 	 * enic_start_rq().
325 	 */
326 	rq->need_initial_post = true;
327 	/* Initialize fetch index while RQ is disabled */
328 	iowrite32(0, &rq->ctrl->fetch_index);
329 	return 0;
330 }
331 
332 /*
333  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
334  * allocated the buffers and filled the RQ descriptor ring. Just need to push
335  * the post index to the NIC.
336  */
337 static void
338 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
339 {
340 	if (!rq->in_use || !rq->need_initial_post)
341 		return;
342 
343 	/* make sure all prior writes are complete before doing the PIO write */
344 	rte_rmb();
345 
346 	/* Post all but the last buffer to VIC. */
347 	rq->posted_index = rq->ring.desc_count - 1;
348 
349 	rq->rx_nb_hold = 0;
350 
351 	dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
352 		enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
353 	iowrite32(rq->posted_index, &rq->ctrl->posted_index);
354 	rte_rmb();
355 	rq->need_initial_post = false;
356 }
357 
358 void *
359 enic_alloc_consistent(void *priv, size_t size,
360 	dma_addr_t *dma_handle, uint8_t *name)
361 {
362 	void *vaddr;
363 	const struct rte_memzone *rz;
364 	*dma_handle = 0;
365 	struct enic *enic = (struct enic *)priv;
366 	struct enic_memzone_entry *mze;
367 
368 	rz = rte_memzone_reserve_aligned((const char *)name, size,
369 			SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
370 	if (!rz) {
371 		pr_err("%s : Failed to allocate memory requested for %s\n",
372 			__func__, name);
373 		return NULL;
374 	}
375 
376 	vaddr = rz->addr;
377 	*dma_handle = (dma_addr_t)rz->iova;
378 
379 	mze = rte_malloc("enic memzone entry",
380 			 sizeof(struct enic_memzone_entry), 0);
381 
382 	if (!mze) {
383 		pr_err("%s : Failed to allocate memory for memzone list\n",
384 		       __func__);
385 		rte_memzone_free(rz);
386 		return NULL;
387 	}
388 
389 	mze->rz = rz;
390 
391 	rte_spinlock_lock(&enic->memzone_list_lock);
392 	LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
393 	rte_spinlock_unlock(&enic->memzone_list_lock);
394 
395 	return vaddr;
396 }
397 
398 void
399 enic_free_consistent(void *priv,
400 		     __rte_unused size_t size,
401 		     void *vaddr,
402 		     dma_addr_t dma_handle)
403 {
404 	struct enic_memzone_entry *mze;
405 	struct enic *enic = (struct enic *)priv;
406 
407 	rte_spinlock_lock(&enic->memzone_list_lock);
408 	LIST_FOREACH(mze, &enic->memzone_list, entries) {
409 		if (mze->rz->addr == vaddr &&
410 		    mze->rz->iova == dma_handle)
411 			break;
412 	}
413 	if (mze == NULL) {
414 		rte_spinlock_unlock(&enic->memzone_list_lock);
415 		dev_warning(enic,
416 			    "Tried to free memory, but couldn't find it in the memzone list\n");
417 		return;
418 	}
419 	LIST_REMOVE(mze, entries);
420 	rte_spinlock_unlock(&enic->memzone_list_lock);
421 	rte_memzone_free(mze->rz);
422 	rte_free(mze);
423 }
424 
425 int enic_link_update(struct rte_eth_dev *eth_dev)
426 {
427 	struct enic *enic = pmd_priv(eth_dev);
428 	struct rte_eth_link link;
429 
430 	memset(&link, 0, sizeof(link));
431 	link.link_status = enic_get_link_status(enic);
432 	link.link_duplex = ETH_LINK_FULL_DUPLEX;
433 	link.link_speed = vnic_dev_port_speed(enic->vdev);
434 
435 	return rte_eth_linkstatus_set(eth_dev, &link);
436 }
437 
438 static void
439 enic_intr_handler(void *arg)
440 {
441 	struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
442 	struct enic *enic = pmd_priv(dev);
443 
444 	vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
445 
446 	enic_link_update(dev);
447 	rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
448 	enic_log_q_error(enic);
449 	/* Re-enable irq in case of INTx */
450 	rte_intr_ack(&enic->pdev->intr_handle);
451 }
452 
453 static int enic_rxq_intr_init(struct enic *enic)
454 {
455 	struct rte_intr_handle *intr_handle;
456 	uint32_t rxq_intr_count, i;
457 	int err;
458 
459 	intr_handle = enic->rte_dev->intr_handle;
460 	if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
461 		return 0;
462 	/*
463 	 * Rx queue interrupts only work when we have MSI-X interrupts,
464 	 * one per queue. Sharing one interrupt is technically
465 	 * possible with VIC, but it is not worth the complications it brings.
466 	 */
467 	if (!rte_intr_cap_multiple(intr_handle)) {
468 		dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
469 			" (vfio-pci driver)\n");
470 		return -ENOTSUP;
471 	}
472 	rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
473 	err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
474 	if (err) {
475 		dev_err(enic, "Failed to enable event fds for Rx queue"
476 			" interrupts\n");
477 		return err;
478 	}
479 	intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
480 					    rxq_intr_count * sizeof(int), 0);
481 	if (intr_handle->intr_vec == NULL) {
482 		dev_err(enic, "Failed to allocate intr_vec\n");
483 		return -ENOMEM;
484 	}
485 	for (i = 0; i < rxq_intr_count; i++)
486 		intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
487 	return 0;
488 }
489 
490 static void enic_rxq_intr_deinit(struct enic *enic)
491 {
492 	struct rte_intr_handle *intr_handle;
493 
494 	intr_handle = enic->rte_dev->intr_handle;
495 	rte_intr_efd_disable(intr_handle);
496 	if (intr_handle->intr_vec != NULL) {
497 		rte_free(intr_handle->intr_vec);
498 		intr_handle->intr_vec = NULL;
499 	}
500 }
501 
502 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
503 {
504 	struct wq_enet_desc *desc;
505 	struct vnic_wq *wq;
506 	unsigned int i;
507 
508 	/*
509 	 * Fill WQ descriptor fields that never change. Every descriptor is
510 	 * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
511 	 * descriptors (i.e. request one completion update every 32 packets).
512 	 */
513 	wq = &enic->wq[queue_idx];
514 	desc = (struct wq_enet_desc *)wq->ring.descs;
515 	for (i = 0; i < wq->ring.desc_count; i++, desc++) {
516 		desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
517 		if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
518 			desc->header_length_flags |=
519 				(1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
520 	}
521 }
522 
523 /*
524  * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
525  * used when that file is not compiled.
526  */
527 __rte_weak bool
528 enic_use_vector_rx_handler(__rte_unused struct rte_eth_dev *eth_dev)
529 {
530 	return false;
531 }
532 
533 void enic_pick_rx_handler(struct rte_eth_dev *eth_dev)
534 {
535 	struct enic *enic = pmd_priv(eth_dev);
536 
537 	if (enic->cq64) {
538 		ENICPMD_LOG(DEBUG, " use the normal Rx handler for 64B CQ entry");
539 		eth_dev->rx_pkt_burst = &enic_recv_pkts_64;
540 		return;
541 	}
542 	/*
543 	 * Preference order:
544 	 * 1. The vectorized handler if possible and requested.
545 	 * 2. The non-scatter, simplified handler if scatter Rx is not used.
546 	 * 3. The default handler as a fallback.
547 	 */
548 	if (enic_use_vector_rx_handler(eth_dev))
549 		return;
550 	if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
551 		ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler");
552 		eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
553 	} else {
554 		ENICPMD_LOG(DEBUG, " use the normal Rx handler");
555 		eth_dev->rx_pkt_burst = &enic_recv_pkts;
556 	}
557 }
558 
559 /* Secondary process uses this to set the Tx handler */
560 void enic_pick_tx_handler(struct rte_eth_dev *eth_dev)
561 {
562 	struct enic *enic = pmd_priv(eth_dev);
563 
564 	if (enic->use_simple_tx_handler) {
565 		ENICPMD_LOG(DEBUG, " use the simple tx handler");
566 		eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
567 	} else {
568 		ENICPMD_LOG(DEBUG, " use the default tx handler");
569 		eth_dev->tx_pkt_burst = &enic_xmit_pkts;
570 	}
571 }
572 
573 int enic_enable(struct enic *enic)
574 {
575 	unsigned int index;
576 	int err;
577 	struct rte_eth_dev *eth_dev = enic->rte_dev;
578 	uint64_t simple_tx_offloads;
579 	uintptr_t p;
580 
581 	if (enic->enable_avx2_rx) {
582 		struct rte_mbuf mb_def = { .buf_addr = 0 };
583 
584 		/*
585 		 * mbuf_initializer contains const-after-init fields of
586 		 * receive mbufs (i.e. 64 bits of fields from rearm_data).
587 		 * It is currently used by the vectorized handler.
588 		 */
589 		mb_def.nb_segs = 1;
590 		mb_def.data_off = RTE_PKTMBUF_HEADROOM;
591 		mb_def.port = enic->port_id;
592 		rte_mbuf_refcnt_set(&mb_def, 1);
593 		rte_compiler_barrier();
594 		p = (uintptr_t)&mb_def.rearm_data;
595 		enic->mbuf_initializer = *(uint64_t *)p;
596 	}
597 
598 	eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
599 	eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
600 
601 	/* vnic notification of link status has already been turned on in
602 	 * enic_dev_init() which is called during probe time.  Here we are
603 	 * just turning on interrupt vector 0 if needed.
604 	 */
605 	if (eth_dev->data->dev_conf.intr_conf.lsc)
606 		vnic_dev_notify_set(enic->vdev, 0);
607 
608 	err = enic_rxq_intr_init(enic);
609 	if (err)
610 		return err;
611 
612 	/* Initialize flowman if not already initialized during probe */
613 	if (enic->fm == NULL && enic_fm_init(enic))
614 		dev_warning(enic, "Init of flowman failed.\n");
615 
616 	for (index = 0; index < enic->rq_count; index++) {
617 		err = enic_alloc_rx_queue_mbufs(enic,
618 			&enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
619 		if (err) {
620 			dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
621 			return err;
622 		}
623 		err = enic_alloc_rx_queue_mbufs(enic,
624 			&enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)]);
625 		if (err) {
626 			/* release the allocated mbufs for the sop rq*/
627 			enic_rxmbuf_queue_release(enic,
628 				&enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
629 
630 			dev_err(enic, "Failed to alloc data RX queue mbufs\n");
631 			return err;
632 		}
633 	}
634 
635 	/*
636 	 * Use the simple TX handler if possible. Only checksum offloads
637 	 * and vlan insertion are supported.
638 	 */
639 	simple_tx_offloads = enic->tx_offload_capa &
640 		(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
641 		 DEV_TX_OFFLOAD_VLAN_INSERT |
642 		 DEV_TX_OFFLOAD_IPV4_CKSUM |
643 		 DEV_TX_OFFLOAD_UDP_CKSUM |
644 		 DEV_TX_OFFLOAD_TCP_CKSUM);
645 	if ((eth_dev->data->dev_conf.txmode.offloads &
646 	     ~simple_tx_offloads) == 0) {
647 		ENICPMD_LOG(DEBUG, " use the simple tx handler");
648 		eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
649 		for (index = 0; index < enic->wq_count; index++)
650 			enic_prep_wq_for_simple_tx(enic, index);
651 		enic->use_simple_tx_handler = 1;
652 	} else {
653 		ENICPMD_LOG(DEBUG, " use the default tx handler");
654 		eth_dev->tx_pkt_burst = &enic_xmit_pkts;
655 	}
656 
657 	enic_pick_rx_handler(eth_dev);
658 
659 	for (index = 0; index < enic->wq_count; index++)
660 		enic_start_wq(enic, index);
661 	for (index = 0; index < enic->rq_count; index++)
662 		enic_start_rq(enic, index);
663 
664 	vnic_dev_add_addr(enic->vdev, enic->mac_addr);
665 
666 	vnic_dev_enable_wait(enic->vdev);
667 
668 	/* Register and enable error interrupt */
669 	rte_intr_callback_register(&(enic->pdev->intr_handle),
670 		enic_intr_handler, (void *)enic->rte_dev);
671 
672 	rte_intr_enable(&(enic->pdev->intr_handle));
673 	/* Unmask LSC interrupt */
674 	vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
675 
676 	return 0;
677 }
678 
679 int enic_alloc_intr_resources(struct enic *enic)
680 {
681 	int err;
682 	unsigned int i;
683 
684 	dev_info(enic, "vNIC resources used:  "\
685 		"wq %d rq %d cq %d intr %d\n",
686 		enic->wq_count, enic_vnic_rq_count(enic),
687 		enic->cq_count, enic->intr_count);
688 
689 	for (i = 0; i < enic->intr_count; i++) {
690 		err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
691 		if (err) {
692 			enic_free_vnic_resources(enic);
693 			return err;
694 		}
695 	}
696 	return 0;
697 }
698 
699 void enic_free_rq(void *rxq)
700 {
701 	struct vnic_rq *rq_sop, *rq_data;
702 	struct enic *enic;
703 
704 	if (rxq == NULL)
705 		return;
706 
707 	rq_sop = (struct vnic_rq *)rxq;
708 	enic = vnic_dev_priv(rq_sop->vdev);
709 	rq_data = &enic->rq[rq_sop->data_queue_idx];
710 
711 	if (rq_sop->free_mbufs) {
712 		struct rte_mbuf **mb;
713 		int i;
714 
715 		mb = rq_sop->free_mbufs;
716 		for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
717 		     i < ENIC_RX_BURST_MAX; i++)
718 			rte_pktmbuf_free(mb[i]);
719 		rte_free(rq_sop->free_mbufs);
720 		rq_sop->free_mbufs = NULL;
721 		rq_sop->num_free_mbufs = 0;
722 	}
723 
724 	enic_rxmbuf_queue_release(enic, rq_sop);
725 	if (rq_data->in_use)
726 		enic_rxmbuf_queue_release(enic, rq_data);
727 
728 	rte_free(rq_sop->mbuf_ring);
729 	if (rq_data->in_use)
730 		rte_free(rq_data->mbuf_ring);
731 
732 	rq_sop->mbuf_ring = NULL;
733 	rq_data->mbuf_ring = NULL;
734 
735 	vnic_rq_free(rq_sop);
736 	if (rq_data->in_use)
737 		vnic_rq_free(rq_data);
738 
739 	vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
740 
741 	rq_sop->in_use = 0;
742 	rq_data->in_use = 0;
743 }
744 
745 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
746 {
747 	struct rte_eth_dev_data *data = enic->dev_data;
748 	vnic_wq_enable(&enic->wq[queue_idx]);
749 	data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
750 }
751 
752 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
753 {
754 	struct rte_eth_dev_data *data = enic->dev_data;
755 	int ret;
756 
757 	ret = vnic_wq_disable(&enic->wq[queue_idx]);
758 	if (ret)
759 		return ret;
760 
761 	data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
762 	return 0;
763 }
764 
765 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
766 {
767 	struct rte_eth_dev_data *data = enic->dev_data;
768 	struct vnic_rq *rq_sop;
769 	struct vnic_rq *rq_data;
770 	rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
771 	rq_data = &enic->rq[rq_sop->data_queue_idx];
772 
773 	if (rq_data->in_use) {
774 		vnic_rq_enable(rq_data);
775 		enic_initial_post_rx(enic, rq_data);
776 	}
777 	rte_mb();
778 	vnic_rq_enable(rq_sop);
779 	enic_initial_post_rx(enic, rq_sop);
780 	data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
781 }
782 
783 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
784 {
785 	struct rte_eth_dev_data *data = enic->dev_data;
786 	int ret1 = 0, ret2 = 0;
787 	struct vnic_rq *rq_sop;
788 	struct vnic_rq *rq_data;
789 	rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
790 	rq_data = &enic->rq[rq_sop->data_queue_idx];
791 
792 	ret2 = vnic_rq_disable(rq_sop);
793 	rte_mb();
794 	if (rq_data->in_use)
795 		ret1 = vnic_rq_disable(rq_data);
796 
797 	if (ret2)
798 		return ret2;
799 	else if (ret1)
800 		return ret1;
801 
802 	data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
803 	return 0;
804 }
805 
806 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
807 	unsigned int socket_id, struct rte_mempool *mp,
808 	uint16_t nb_desc, uint16_t free_thresh)
809 {
810 	struct enic_vf_representor *vf;
811 	int rc;
812 	uint16_t sop_queue_idx;
813 	uint16_t data_queue_idx;
814 	uint16_t cq_idx;
815 	struct vnic_rq *rq_sop;
816 	struct vnic_rq *rq_data;
817 	unsigned int mbuf_size, mbufs_per_pkt;
818 	unsigned int nb_sop_desc, nb_data_desc;
819 	uint16_t min_sop, max_sop, min_data, max_data;
820 	uint32_t max_rx_pkt_len;
821 
822 	/*
823 	 * Representor uses a reserved PF queue. Translate representor
824 	 * queue number to PF queue number.
825 	 */
826 	if (enic_is_vf_rep(enic)) {
827 		RTE_ASSERT(queue_idx == 0);
828 		vf = VF_ENIC_TO_VF_REP(enic);
829 		sop_queue_idx = vf->pf_rq_sop_idx;
830 		data_queue_idx = vf->pf_rq_data_idx;
831 		enic = vf->pf;
832 		queue_idx = sop_queue_idx;
833 	} else {
834 		sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
835 		data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx, enic);
836 	}
837 	cq_idx = enic_cq_rq(enic, sop_queue_idx);
838 	rq_sop = &enic->rq[sop_queue_idx];
839 	rq_data = &enic->rq[data_queue_idx];
840 	rq_sop->is_sop = 1;
841 	rq_sop->data_queue_idx = data_queue_idx;
842 	rq_data->is_sop = 0;
843 	rq_data->data_queue_idx = 0;
844 	rq_sop->socket_id = socket_id;
845 	rq_sop->mp = mp;
846 	rq_data->socket_id = socket_id;
847 	rq_data->mp = mp;
848 	rq_sop->in_use = 1;
849 	rq_sop->rx_free_thresh = free_thresh;
850 	rq_data->rx_free_thresh = free_thresh;
851 	dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
852 		  free_thresh);
853 
854 	mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
855 			       RTE_PKTMBUF_HEADROOM);
856 	/* max_rx_pkt_len includes the ethernet header and CRC. */
857 	max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
858 
859 	if (enic->rte_dev->data->dev_conf.rxmode.offloads &
860 	    DEV_RX_OFFLOAD_SCATTER) {
861 		dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
862 		/* ceil((max pkt len)/mbuf_size) */
863 		mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
864 	} else {
865 		dev_info(enic, "Scatter rx mode disabled\n");
866 		mbufs_per_pkt = 1;
867 		if (max_rx_pkt_len > mbuf_size) {
868 			dev_warning(enic, "The maximum Rx packet size (%u) is"
869 				    " larger than the mbuf size (%u), and"
870 				    " scatter is disabled. Larger packets will"
871 				    " be truncated.\n",
872 				    max_rx_pkt_len, mbuf_size);
873 		}
874 	}
875 
876 	if (mbufs_per_pkt > 1) {
877 		dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
878 		rq_sop->data_queue_enable = 1;
879 		rq_data->in_use = 1;
880 		/*
881 		 * HW does not directly support rxmode.max_rx_pkt_len. HW always
882 		 * receives packet sizes up to the "max" MTU.
883 		 * If not using scatter, we can achieve the effect of dropping
884 		 * larger packets by reducing the size of posted buffers.
885 		 * See enic_alloc_rx_queue_mbufs().
886 		 */
887 		if (max_rx_pkt_len <
888 		    enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
889 			dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
890 				    " when scatter rx mode is in use.\n");
891 		}
892 	} else {
893 		dev_info(enic, "Rq %u Scatter rx mode not being used\n",
894 			 queue_idx);
895 		rq_sop->data_queue_enable = 0;
896 		rq_data->in_use = 0;
897 	}
898 
899 	/* number of descriptors have to be a multiple of 32 */
900 	nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
901 	nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
902 
903 	rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
904 	rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
905 
906 	if (mbufs_per_pkt > 1) {
907 		min_sop = ENIC_RX_BURST_MAX;
908 		max_sop = ((enic->config.rq_desc_count /
909 			    (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
910 		min_data = min_sop * (mbufs_per_pkt - 1);
911 		max_data = enic->config.rq_desc_count;
912 	} else {
913 		min_sop = ENIC_RX_BURST_MAX;
914 		max_sop = enic->config.rq_desc_count;
915 		min_data = 0;
916 		max_data = 0;
917 	}
918 
919 	if (nb_desc < (min_sop + min_data)) {
920 		dev_warning(enic,
921 			    "Number of rx descs too low, adjusting to minimum\n");
922 		nb_sop_desc = min_sop;
923 		nb_data_desc = min_data;
924 	} else if (nb_desc > (max_sop + max_data)) {
925 		dev_warning(enic,
926 			    "Number of rx_descs too high, adjusting to maximum\n");
927 		nb_sop_desc = max_sop;
928 		nb_data_desc = max_data;
929 	}
930 	if (mbufs_per_pkt > 1) {
931 		dev_info(enic, "For max packet size %u and mbuf size %u valid"
932 			 " rx descriptor range is %u to %u\n",
933 			 max_rx_pkt_len, mbuf_size, min_sop + min_data,
934 			 max_sop + max_data);
935 	}
936 	dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
937 		 nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
938 
939 	/* Allocate sop queue resources */
940 	rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
941 		nb_sop_desc, sizeof(struct rq_enet_desc));
942 	if (rc) {
943 		dev_err(enic, "error in allocation of sop rq\n");
944 		goto err_exit;
945 	}
946 	nb_sop_desc = rq_sop->ring.desc_count;
947 
948 	if (rq_data->in_use) {
949 		/* Allocate data queue resources */
950 		rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
951 				   nb_data_desc,
952 				   sizeof(struct rq_enet_desc));
953 		if (rc) {
954 			dev_err(enic, "error in allocation of data rq\n");
955 			goto err_free_rq_sop;
956 		}
957 		nb_data_desc = rq_data->ring.desc_count;
958 	}
959 	/* Enable 64B CQ entry if requested */
960 	if (enic->cq64 && vnic_dev_set_cq_entry_size(enic->vdev,
961 				sop_queue_idx, VNIC_RQ_CQ_ENTRY_SIZE_64)) {
962 		dev_err(enic, "failed to enable 64B CQ entry on sop rq\n");
963 		goto err_free_rq_data;
964 	}
965 	if (rq_data->in_use && enic->cq64 &&
966 	    vnic_dev_set_cq_entry_size(enic->vdev, data_queue_idx,
967 		VNIC_RQ_CQ_ENTRY_SIZE_64)) {
968 		dev_err(enic, "failed to enable 64B CQ entry on data rq\n");
969 		goto err_free_rq_data;
970 	}
971 
972 	rc = vnic_cq_alloc(enic->vdev, &enic->cq[cq_idx], cq_idx,
973 			   socket_id, nb_sop_desc + nb_data_desc,
974 			   enic->cq64 ?	sizeof(struct cq_enet_rq_desc_64) :
975 			   sizeof(struct cq_enet_rq_desc));
976 	if (rc) {
977 		dev_err(enic, "error in allocation of cq for rq\n");
978 		goto err_free_rq_data;
979 	}
980 
981 	/* Allocate the mbuf rings */
982 	rq_sop->mbuf_ring = (struct rte_mbuf **)
983 		rte_zmalloc_socket("rq->mbuf_ring",
984 				   sizeof(struct rte_mbuf *) * nb_sop_desc,
985 				   RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
986 	if (rq_sop->mbuf_ring == NULL)
987 		goto err_free_cq;
988 
989 	if (rq_data->in_use) {
990 		rq_data->mbuf_ring = (struct rte_mbuf **)
991 			rte_zmalloc_socket("rq->mbuf_ring",
992 				sizeof(struct rte_mbuf *) * nb_data_desc,
993 				RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
994 		if (rq_data->mbuf_ring == NULL)
995 			goto err_free_sop_mbuf;
996 	}
997 
998 	rq_sop->free_mbufs = (struct rte_mbuf **)
999 		rte_zmalloc_socket("rq->free_mbufs",
1000 				   sizeof(struct rte_mbuf *) *
1001 				   ENIC_RX_BURST_MAX,
1002 				   RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
1003 	if (rq_sop->free_mbufs == NULL)
1004 		goto err_free_data_mbuf;
1005 	rq_sop->num_free_mbufs = 0;
1006 
1007 	rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
1008 
1009 	return 0;
1010 
1011 err_free_data_mbuf:
1012 	rte_free(rq_data->mbuf_ring);
1013 err_free_sop_mbuf:
1014 	rte_free(rq_sop->mbuf_ring);
1015 err_free_cq:
1016 	/* cleanup on error */
1017 	vnic_cq_free(&enic->cq[cq_idx]);
1018 err_free_rq_data:
1019 	if (rq_data->in_use)
1020 		vnic_rq_free(rq_data);
1021 err_free_rq_sop:
1022 	vnic_rq_free(rq_sop);
1023 err_exit:
1024 	return -ENOMEM;
1025 }
1026 
1027 void enic_free_wq(void *txq)
1028 {
1029 	struct vnic_wq *wq;
1030 	struct enic *enic;
1031 
1032 	if (txq == NULL)
1033 		return;
1034 
1035 	wq = (struct vnic_wq *)txq;
1036 	enic = vnic_dev_priv(wq->vdev);
1037 	rte_memzone_free(wq->cqmsg_rz);
1038 	vnic_wq_free(wq);
1039 	vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
1040 }
1041 
1042 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
1043 	unsigned int socket_id, uint16_t nb_desc)
1044 {
1045 	struct enic_vf_representor *vf;
1046 	int err;
1047 	struct vnic_wq *wq;
1048 	unsigned int cq_index;
1049 	char name[RTE_MEMZONE_NAMESIZE];
1050 	static int instance;
1051 
1052 	/*
1053 	 * Representor uses a reserved PF queue. Translate representor
1054 	 * queue number to PF queue number.
1055 	 */
1056 	if (enic_is_vf_rep(enic)) {
1057 		RTE_ASSERT(queue_idx == 0);
1058 		vf = VF_ENIC_TO_VF_REP(enic);
1059 		queue_idx = vf->pf_wq_idx;
1060 		cq_index = vf->pf_wq_cq_idx;
1061 		enic = vf->pf;
1062 	} else {
1063 		cq_index = enic_cq_wq(enic, queue_idx);
1064 	}
1065 	wq = &enic->wq[queue_idx];
1066 	wq->socket_id = socket_id;
1067 	/*
1068 	 * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
1069 	 * print an info message for diagnostics.
1070 	 */
1071 	dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
1072 
1073 	/* Allocate queue resources */
1074 	err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
1075 		nb_desc,
1076 		sizeof(struct wq_enet_desc));
1077 	if (err) {
1078 		dev_err(enic, "error in allocation of wq\n");
1079 		return err;
1080 	}
1081 
1082 	err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
1083 		socket_id, nb_desc,
1084 		sizeof(struct cq_enet_wq_desc));
1085 	if (err) {
1086 		vnic_wq_free(wq);
1087 		dev_err(enic, "error in allocation of cq for wq\n");
1088 	}
1089 
1090 	/* setup up CQ message */
1091 	snprintf((char *)name, sizeof(name),
1092 		 "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
1093 		instance++);
1094 
1095 	wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
1096 			sizeof(uint32_t), SOCKET_ID_ANY,
1097 			RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
1098 	if (!wq->cqmsg_rz)
1099 		return -ENOMEM;
1100 
1101 	return err;
1102 }
1103 
1104 int enic_disable(struct enic *enic)
1105 {
1106 	unsigned int i;
1107 	int err;
1108 
1109 	for (i = 0; i < enic->intr_count; i++) {
1110 		vnic_intr_mask(&enic->intr[i]);
1111 		(void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1112 	}
1113 	enic_rxq_intr_deinit(enic);
1114 	rte_intr_disable(&enic->pdev->intr_handle);
1115 	rte_intr_callback_unregister(&enic->pdev->intr_handle,
1116 				     enic_intr_handler,
1117 				     (void *)enic->rte_dev);
1118 
1119 	vnic_dev_disable(enic->vdev);
1120 
1121 	enic_fm_destroy(enic);
1122 
1123 	if (!enic_is_sriov_vf(enic))
1124 		vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1125 
1126 	for (i = 0; i < enic->wq_count; i++) {
1127 		err = vnic_wq_disable(&enic->wq[i]);
1128 		if (err)
1129 			return err;
1130 	}
1131 	for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1132 		if (enic->rq[i].in_use) {
1133 			err = vnic_rq_disable(&enic->rq[i]);
1134 			if (err)
1135 				return err;
1136 		}
1137 	}
1138 
1139 	/* If we were using interrupts, set the interrupt vector to -1
1140 	 * to disable interrupts.  We are not disabling link notifcations,
1141 	 * though, as we want the polling of link status to continue working.
1142 	 */
1143 	if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1144 		vnic_dev_notify_set(enic->vdev, -1);
1145 
1146 	vnic_dev_set_reset_flag(enic->vdev, 1);
1147 
1148 	for (i = 0; i < enic->wq_count; i++)
1149 		vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1150 
1151 	for (i = 0; i < enic_vnic_rq_count(enic); i++)
1152 		if (enic->rq[i].in_use)
1153 			vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1154 	for (i = 0; i < enic->cq_count; i++)
1155 		vnic_cq_clean(&enic->cq[i]);
1156 	for (i = 0; i < enic->intr_count; i++)
1157 		vnic_intr_clean(&enic->intr[i]);
1158 
1159 	return 0;
1160 }
1161 
1162 static int enic_dev_wait(struct vnic_dev *vdev,
1163 	int (*start)(struct vnic_dev *, int),
1164 	int (*finished)(struct vnic_dev *, int *),
1165 	int arg)
1166 {
1167 	int done;
1168 	int err;
1169 	int i;
1170 
1171 	err = start(vdev, arg);
1172 	if (err)
1173 		return err;
1174 
1175 	/* Wait for func to complete...2 seconds max */
1176 	for (i = 0; i < 2000; i++) {
1177 		err = finished(vdev, &done);
1178 		if (err)
1179 			return err;
1180 		if (done)
1181 			return 0;
1182 		usleep(1000);
1183 	}
1184 	return -ETIMEDOUT;
1185 }
1186 
1187 static int enic_dev_open(struct enic *enic)
1188 {
1189 	int err;
1190 	int flags = CMD_OPENF_IG_DESCCACHE;
1191 
1192 	err = enic_dev_wait(enic->vdev, vnic_dev_open,
1193 		vnic_dev_open_done, flags);
1194 	if (err)
1195 		dev_err(enic_get_dev(enic),
1196 			"vNIC device open failed, err %d\n", err);
1197 
1198 	return err;
1199 }
1200 
1201 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1202 {
1203 	dma_addr_t rss_key_buf_pa;
1204 	union vnic_rss_key *rss_key_buf_va = NULL;
1205 	int err, i;
1206 	uint8_t name[RTE_MEMZONE_NAMESIZE];
1207 
1208 	RTE_ASSERT(user_key != NULL);
1209 	snprintf((char *)name, sizeof(name), "rss_key-%s", enic->bdf_name);
1210 	rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1211 		&rss_key_buf_pa, name);
1212 	if (!rss_key_buf_va)
1213 		return -ENOMEM;
1214 
1215 	for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1216 		rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1217 
1218 	err = enic_set_rss_key(enic,
1219 		rss_key_buf_pa,
1220 		sizeof(union vnic_rss_key));
1221 
1222 	/* Save for later queries */
1223 	if (!err) {
1224 		rte_memcpy(&enic->rss_key, rss_key_buf_va,
1225 			   sizeof(union vnic_rss_key));
1226 	}
1227 	enic_free_consistent(enic, sizeof(union vnic_rss_key),
1228 		rss_key_buf_va, rss_key_buf_pa);
1229 
1230 	return err;
1231 }
1232 
1233 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1234 {
1235 	dma_addr_t rss_cpu_buf_pa;
1236 	union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1237 	int err;
1238 	uint8_t name[RTE_MEMZONE_NAMESIZE];
1239 
1240 	snprintf((char *)name, sizeof(name), "rss_cpu-%s", enic->bdf_name);
1241 	rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1242 		&rss_cpu_buf_pa, name);
1243 	if (!rss_cpu_buf_va)
1244 		return -ENOMEM;
1245 
1246 	rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1247 
1248 	err = enic_set_rss_cpu(enic,
1249 		rss_cpu_buf_pa,
1250 		sizeof(union vnic_rss_cpu));
1251 
1252 	enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1253 		rss_cpu_buf_va, rss_cpu_buf_pa);
1254 
1255 	/* Save for later queries */
1256 	if (!err)
1257 		rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1258 	return err;
1259 }
1260 
1261 static int enic_set_niccfg(struct enic *enic, uint8_t rss_default_cpu,
1262 	uint8_t rss_hash_type, uint8_t rss_hash_bits, uint8_t rss_base_cpu,
1263 	uint8_t rss_enable)
1264 {
1265 	const uint8_t tso_ipid_split_en = 0;
1266 	int err;
1267 
1268 	err = enic_set_nic_cfg(enic,
1269 		rss_default_cpu, rss_hash_type,
1270 		rss_hash_bits, rss_base_cpu,
1271 		rss_enable, tso_ipid_split_en,
1272 		enic->ig_vlan_strip_en);
1273 
1274 	return err;
1275 }
1276 
1277 /* Initialize RSS with defaults, called from dev_configure */
1278 int enic_init_rss_nic_cfg(struct enic *enic)
1279 {
1280 	static uint8_t default_rss_key[] = {
1281 		85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1282 		80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1283 		76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1284 		69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1285 	};
1286 	struct rte_eth_rss_conf rss_conf;
1287 	union vnic_rss_cpu rss_cpu;
1288 	int ret, i;
1289 
1290 	rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1291 	/*
1292 	 * If setting key for the first time, and the user gives us none, then
1293 	 * push the default key to NIC.
1294 	 */
1295 	if (rss_conf.rss_key == NULL) {
1296 		rss_conf.rss_key = default_rss_key;
1297 		rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1298 	}
1299 	ret = enic_set_rss_conf(enic, &rss_conf);
1300 	if (ret) {
1301 		dev_err(enic, "Failed to configure RSS\n");
1302 		return ret;
1303 	}
1304 	if (enic->rss_enable) {
1305 		/* If enabling RSS, use the default reta */
1306 		for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1307 			rss_cpu.cpu[i / 4].b[i % 4] =
1308 				enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1309 		}
1310 		ret = enic_set_rss_reta(enic, &rss_cpu);
1311 		if (ret)
1312 			dev_err(enic, "Failed to set RSS indirection table\n");
1313 	}
1314 	return ret;
1315 }
1316 
1317 int enic_setup_finish(struct enic *enic)
1318 {
1319 	enic_init_soft_stats(enic);
1320 
1321 	/* switchdev: enable promisc mode on PF */
1322 	if (enic->switchdev_mode) {
1323 		vnic_dev_packet_filter(enic->vdev,
1324 				       0 /* directed  */,
1325 				       0 /* multicast */,
1326 				       0 /* broadcast */,
1327 				       1 /* promisc   */,
1328 				       0 /* allmulti  */);
1329 		enic->promisc = 1;
1330 		enic->allmulti = 0;
1331 		return 0;
1332 	}
1333 	/* Default conf */
1334 	vnic_dev_packet_filter(enic->vdev,
1335 		1 /* directed  */,
1336 		1 /* multicast */,
1337 		1 /* broadcast */,
1338 		0 /* promisc   */,
1339 		1 /* allmulti  */);
1340 
1341 	enic->promisc = 0;
1342 	enic->allmulti = 1;
1343 
1344 	return 0;
1345 }
1346 
1347 static int enic_rss_conf_valid(struct enic *enic,
1348 			       struct rte_eth_rss_conf *rss_conf)
1349 {
1350 	/* RSS is disabled per VIC settings. Ignore rss_conf. */
1351 	if (enic->flow_type_rss_offloads == 0)
1352 		return 0;
1353 	if (rss_conf->rss_key != NULL &&
1354 	    rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1355 		dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1356 			rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1357 		return -EINVAL;
1358 	}
1359 	if (rss_conf->rss_hf != 0 &&
1360 	    (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1361 		dev_err(enic, "Given rss_hf contains none of the supported"
1362 			" types\n");
1363 		return -EINVAL;
1364 	}
1365 	return 0;
1366 }
1367 
1368 /* Set hash type and key according to rss_conf */
1369 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1370 {
1371 	struct rte_eth_dev *eth_dev;
1372 	uint64_t rss_hf;
1373 	uint8_t rss_hash_type;
1374 	uint8_t rss_enable;
1375 	int ret;
1376 
1377 	RTE_ASSERT(rss_conf != NULL);
1378 	ret = enic_rss_conf_valid(enic, rss_conf);
1379 	if (ret) {
1380 		dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1381 		return ret;
1382 	}
1383 
1384 	eth_dev = enic->rte_dev;
1385 	rss_hash_type = 0;
1386 	rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1387 	if (enic->rq_count > 1 &&
1388 	    (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1389 	    rss_hf != 0) {
1390 		rss_enable = 1;
1391 		if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1392 			      ETH_RSS_NONFRAG_IPV4_OTHER))
1393 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1394 		if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1395 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1396 		if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1397 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1398 			if (enic->udp_rss_weak) {
1399 				/*
1400 				 * 'TCP' is not a typo. The "weak" version of
1401 				 * UDP RSS requires both the TCP and UDP bits
1402 				 * be set. It does enable TCP RSS as well.
1403 				 */
1404 				rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1405 			}
1406 		}
1407 		if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1408 			      ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1409 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1410 		if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1411 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1412 		if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1413 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1414 			if (enic->udp_rss_weak)
1415 				rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1416 		}
1417 	} else {
1418 		rss_enable = 0;
1419 		rss_hf = 0;
1420 	}
1421 
1422 	/* Set the hash key if provided */
1423 	if (rss_enable && rss_conf->rss_key) {
1424 		ret = enic_set_rsskey(enic, rss_conf->rss_key);
1425 		if (ret) {
1426 			dev_err(enic, "Failed to set RSS key\n");
1427 			return ret;
1428 		}
1429 	}
1430 
1431 	ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1432 			      ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1433 			      rss_enable);
1434 	if (!ret) {
1435 		enic->rss_hf = rss_hf;
1436 		enic->rss_hash_type = rss_hash_type;
1437 		enic->rss_enable = rss_enable;
1438 	} else {
1439 		dev_err(enic, "Failed to update RSS configurations."
1440 			" hash=0x%x\n", rss_hash_type);
1441 	}
1442 	return ret;
1443 }
1444 
1445 int enic_set_vlan_strip(struct enic *enic)
1446 {
1447 	/*
1448 	 * Unfortunately, VLAN strip on/off and RSS on/off are configured
1449 	 * together. So, re-do niccfg, preserving the current RSS settings.
1450 	 */
1451 	return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1452 			       ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1453 			       enic->rss_enable);
1454 }
1455 
1456 int enic_add_packet_filter(struct enic *enic)
1457 {
1458 	/* switchdev ignores packet filters */
1459 	if (enic->switchdev_mode) {
1460 		ENICPMD_LOG(DEBUG, " switchdev: ignore packet filter");
1461 		return 0;
1462 	}
1463 	/* Args -> directed, multicast, broadcast, promisc, allmulti */
1464 	return vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1465 		enic->promisc, enic->allmulti);
1466 }
1467 
1468 int enic_get_link_status(struct enic *enic)
1469 {
1470 	return vnic_dev_link_status(enic->vdev);
1471 }
1472 
1473 static void enic_dev_deinit(struct enic *enic)
1474 {
1475 	/* stop link status checking */
1476 	vnic_dev_notify_unset(enic->vdev);
1477 
1478 	/* mac_addrs is freed by rte_eth_dev_release_port() */
1479 	rte_free(enic->cq);
1480 	rte_free(enic->intr);
1481 	rte_free(enic->rq);
1482 	rte_free(enic->wq);
1483 }
1484 
1485 
1486 int enic_set_vnic_res(struct enic *enic)
1487 {
1488 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1489 	int rc = 0;
1490 	unsigned int required_rq, required_wq, required_cq, required_intr;
1491 
1492 	/* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1493 	required_rq = eth_dev->data->nb_rx_queues * 2;
1494 	required_wq = eth_dev->data->nb_tx_queues;
1495 	required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1496 	required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1497 	if (eth_dev->data->dev_conf.intr_conf.rxq) {
1498 		required_intr += eth_dev->data->nb_rx_queues;
1499 	}
1500 	ENICPMD_LOG(DEBUG, "Required queues for PF: rq %u wq %u cq %u",
1501 		    required_rq, required_wq, required_cq);
1502 	if (enic->vf_required_rq) {
1503 		/* Queues needed for VF representors */
1504 		required_rq += enic->vf_required_rq;
1505 		required_wq += enic->vf_required_wq;
1506 		required_cq += enic->vf_required_cq;
1507 		ENICPMD_LOG(DEBUG, "Required queues for VF representors: rq %u wq %u cq %u",
1508 			    enic->vf_required_rq, enic->vf_required_wq,
1509 			    enic->vf_required_cq);
1510 	}
1511 
1512 	if (enic->conf_rq_count < required_rq) {
1513 		dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1514 			eth_dev->data->nb_rx_queues,
1515 			required_rq, enic->conf_rq_count);
1516 		rc = -EINVAL;
1517 	}
1518 	if (enic->conf_wq_count < required_wq) {
1519 		dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1520 			eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1521 		rc = -EINVAL;
1522 	}
1523 
1524 	if (enic->conf_cq_count < required_cq) {
1525 		dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1526 			required_cq, enic->conf_cq_count);
1527 		rc = -EINVAL;
1528 	}
1529 	if (enic->conf_intr_count < required_intr) {
1530 		dev_err(dev, "Not enough Interrupts to support Rx queue"
1531 			" interrupts. Required:%u, Configured:%u\n",
1532 			required_intr, enic->conf_intr_count);
1533 		rc = -EINVAL;
1534 	}
1535 
1536 	if (rc == 0) {
1537 		enic->rq_count = eth_dev->data->nb_rx_queues;
1538 		enic->wq_count = eth_dev->data->nb_tx_queues;
1539 		enic->cq_count = enic->rq_count + enic->wq_count;
1540 		enic->intr_count = required_intr;
1541 	}
1542 
1543 	return rc;
1544 }
1545 
1546 /* Initialize the completion queue for an RQ */
1547 static int
1548 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1549 {
1550 	struct vnic_rq *sop_rq, *data_rq;
1551 	unsigned int cq_idx;
1552 	int rc = 0;
1553 
1554 	sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1555 	data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx, enic)];
1556 	cq_idx = enic_cq_rq(enic, rq_idx);
1557 
1558 	vnic_cq_clean(&enic->cq[cq_idx]);
1559 	vnic_cq_init(&enic->cq[cq_idx],
1560 		     0 /* flow_control_enable */,
1561 		     1 /* color_enable */,
1562 		     0 /* cq_head */,
1563 		     0 /* cq_tail */,
1564 		     1 /* cq_tail_color */,
1565 		     0 /* interrupt_enable */,
1566 		     1 /* cq_entry_enable */,
1567 		     0 /* cq_message_enable */,
1568 		     0 /* interrupt offset */,
1569 		     0 /* cq_message_addr */);
1570 
1571 
1572 	vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1573 			   enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1574 			   sop_rq->ring.desc_count - 1, 1, 0);
1575 	if (data_rq->in_use) {
1576 		vnic_rq_init_start(data_rq,
1577 				   enic_cq_rq(enic,
1578 				   enic_rte_rq_idx_to_data_idx(rq_idx, enic)),
1579 				   0, data_rq->ring.desc_count - 1, 1, 0);
1580 	}
1581 
1582 	rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1583 	if (rc)
1584 		return rc;
1585 
1586 	if (data_rq->in_use) {
1587 		rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1588 		if (rc) {
1589 			enic_rxmbuf_queue_release(enic, sop_rq);
1590 			return rc;
1591 		}
1592 	}
1593 
1594 	return 0;
1595 }
1596 
1597 /* The Cisco NIC can send and receive packets up to a max packet size
1598  * determined by the NIC type and firmware. There is also an MTU
1599  * configured into the NIC via the CIMC/UCSM management interface
1600  * which can be overridden by this function (up to the max packet size).
1601  * Depending on the network setup, doing so may cause packet drops
1602  * and unexpected behavior.
1603  */
1604 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1605 {
1606 	unsigned int rq_idx;
1607 	struct vnic_rq *rq;
1608 	int rc = 0;
1609 	uint16_t old_mtu;	/* previous setting */
1610 	uint16_t config_mtu;	/* Value configured into NIC via CIMC/UCSM */
1611 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1612 
1613 	old_mtu = eth_dev->data->mtu;
1614 	config_mtu = enic->config.mtu;
1615 
1616 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1617 		return -E_RTE_SECONDARY;
1618 
1619 	if (new_mtu > enic->max_mtu) {
1620 		dev_err(enic,
1621 			"MTU not updated: requested (%u) greater than max (%u)\n",
1622 			new_mtu, enic->max_mtu);
1623 		return -EINVAL;
1624 	}
1625 	if (new_mtu < ENIC_MIN_MTU) {
1626 		dev_info(enic,
1627 			"MTU not updated: requested (%u) less than min (%u)\n",
1628 			new_mtu, ENIC_MIN_MTU);
1629 		return -EINVAL;
1630 	}
1631 	if (new_mtu > config_mtu)
1632 		dev_warning(enic,
1633 			"MTU (%u) is greater than value configured in NIC (%u)\n",
1634 			new_mtu, config_mtu);
1635 
1636 	/* Update the MTU and maximum packet length */
1637 	eth_dev->data->mtu = new_mtu;
1638 	eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1639 		enic_mtu_to_max_rx_pktlen(new_mtu);
1640 
1641 	/*
1642 	 * If the device has not started (enic_enable), nothing to do.
1643 	 * Later, enic_enable() will set up RQs reflecting the new maximum
1644 	 * packet length.
1645 	 */
1646 	if (!eth_dev->data->dev_started)
1647 		goto set_mtu_done;
1648 
1649 	/*
1650 	 * The device has started, re-do RQs on the fly. In the process, we
1651 	 * pick up the new maximum packet length.
1652 	 *
1653 	 * Some applications rely on the ability to change MTU without stopping
1654 	 * the device. So keep this behavior for now.
1655 	 */
1656 	rte_spinlock_lock(&enic->mtu_lock);
1657 
1658 	/* Stop traffic on all RQs */
1659 	for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1660 		rq = &enic->rq[rq_idx];
1661 		if (rq->is_sop && rq->in_use) {
1662 			rc = enic_stop_rq(enic,
1663 					  enic_sop_rq_idx_to_rte_idx(rq_idx));
1664 			if (rc) {
1665 				dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1666 				goto set_mtu_done;
1667 			}
1668 		}
1669 	}
1670 
1671 	/* replace Rx function with a no-op to avoid getting stale pkts */
1672 	eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1673 	rte_mb();
1674 
1675 	/* Allow time for threads to exit the real Rx function. */
1676 	usleep(100000);
1677 
1678 	/* now it is safe to reconfigure the RQs */
1679 
1680 
1681 	/* free and reallocate RQs with the new MTU */
1682 	for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1683 		rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1684 		if (!rq->in_use)
1685 			continue;
1686 
1687 		enic_free_rq(rq);
1688 		rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1689 				   rq->tot_nb_desc, rq->rx_free_thresh);
1690 		if (rc) {
1691 			dev_err(enic,
1692 				"Fatal MTU alloc error- No traffic will pass\n");
1693 			goto set_mtu_done;
1694 		}
1695 
1696 		rc = enic_reinit_rq(enic, rq_idx);
1697 		if (rc) {
1698 			dev_err(enic,
1699 				"Fatal MTU RQ reinit- No traffic will pass\n");
1700 			goto set_mtu_done;
1701 		}
1702 	}
1703 
1704 	/* put back the real receive function */
1705 	rte_mb();
1706 	enic_pick_rx_handler(eth_dev);
1707 	rte_mb();
1708 
1709 	/* restart Rx traffic */
1710 	for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1711 		rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1712 		if (rq->is_sop && rq->in_use)
1713 			enic_start_rq(enic, rq_idx);
1714 	}
1715 
1716 set_mtu_done:
1717 	dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1718 	rte_spinlock_unlock(&enic->mtu_lock);
1719 	return rc;
1720 }
1721 
1722 static int enic_dev_init(struct enic *enic)
1723 {
1724 	int err;
1725 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1726 
1727 	vnic_dev_intr_coal_timer_info_default(enic->vdev);
1728 
1729 	/* Get vNIC configuration
1730 	*/
1731 	err = enic_get_vnic_config(enic);
1732 	if (err) {
1733 		dev_err(dev, "Get vNIC configuration failed, aborting\n");
1734 		return err;
1735 	}
1736 
1737 	/* Get available resource counts */
1738 	enic_get_res_counts(enic);
1739 	if (enic->conf_rq_count == 1) {
1740 		dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1741 		dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1742 		dev_err(enic, "See the ENIC PMD guide for more information.\n");
1743 		return -EINVAL;
1744 	}
1745 	/* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1746 	enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1747 			       enic->conf_cq_count, 8);
1748 	enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1749 				 enic->conf_intr_count, 8);
1750 	enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1751 			       enic->conf_rq_count, 8);
1752 	enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1753 			       enic->conf_wq_count, 8);
1754 	if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1755 		dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1756 		return -1;
1757 	}
1758 	if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1759 		dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1760 		return -1;
1761 	}
1762 	if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1763 		dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1764 		return -1;
1765 	}
1766 	if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1767 		dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1768 		return -1;
1769 	}
1770 
1771 	eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr",
1772 					sizeof(struct rte_ether_addr) *
1773 					ENIC_UNICAST_PERFECT_FILTERS, 0);
1774 	if (!eth_dev->data->mac_addrs) {
1775 		dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1776 		return -1;
1777 	}
1778 	rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr,
1779 			eth_dev->data->mac_addrs);
1780 
1781 	vnic_dev_set_reset_flag(enic->vdev, 0);
1782 
1783 	LIST_INIT(&enic->flows);
1784 
1785 	/* set up link status checking */
1786 	vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1787 
1788 	/*
1789 	 * When Geneve with options offload is available, always disable it
1790 	 * first as it can interfere with user flow rules.
1791 	 */
1792 	if (enic->geneve_opt_avail) {
1793 		/*
1794 		 * Disabling fails if the feature is provisioned but
1795 		 * not enabled. So ignore result and do not log error.
1796 		 */
1797 		vnic_dev_overlay_offload_ctrl(enic->vdev,
1798 			OVERLAY_FEATURE_GENEVE,
1799 			OVERLAY_OFFLOAD_DISABLE);
1800 	}
1801 	enic->overlay_offload = false;
1802 	if (enic->disable_overlay && enic->vxlan) {
1803 		/*
1804 		 * Explicitly disable overlay offload as the setting is
1805 		 * sticky, and resetting vNIC does not disable it.
1806 		 */
1807 		if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1808 						  OVERLAY_FEATURE_VXLAN,
1809 						  OVERLAY_OFFLOAD_DISABLE)) {
1810 			dev_err(enic, "failed to disable overlay offload\n");
1811 		} else {
1812 			dev_info(enic, "Overlay offload is disabled\n");
1813 		}
1814 	}
1815 	if (!enic->disable_overlay && enic->vxlan &&
1816 	    /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1817 	    vnic_dev_overlay_offload_ctrl(enic->vdev,
1818 					  OVERLAY_FEATURE_VXLAN,
1819 					  OVERLAY_OFFLOAD_ENABLE) == 0) {
1820 		enic->tx_offload_capa |=
1821 			DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1822 			DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1823 			DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1824 		enic->tx_offload_mask |=
1825 			PKT_TX_OUTER_IPV6 |
1826 			PKT_TX_OUTER_IPV4 |
1827 			PKT_TX_OUTER_IP_CKSUM |
1828 			PKT_TX_TUNNEL_MASK;
1829 		enic->overlay_offload = true;
1830 		dev_info(enic, "Overlay offload is enabled\n");
1831 	}
1832 	/* Geneve with options offload requires overlay offload */
1833 	if (enic->overlay_offload && enic->geneve_opt_avail &&
1834 	    enic->geneve_opt_request) {
1835 		if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1836 				OVERLAY_FEATURE_GENEVE,
1837 				OVERLAY_OFFLOAD_ENABLE)) {
1838 			dev_err(enic, "failed to enable geneve+option\n");
1839 		} else {
1840 			enic->geneve_opt_enabled = 1;
1841 			dev_info(enic, "Geneve with options is enabled\n");
1842 		}
1843 	}
1844 	/*
1845 	 * Reset the vxlan port if HW vxlan parsing is available. It
1846 	 * is always enabled regardless of overlay offload
1847 	 * enable/disable.
1848 	 */
1849 	if (enic->vxlan) {
1850 		enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT;
1851 		/*
1852 		 * Reset the vxlan port to the default, as the NIC firmware
1853 		 * does not reset it automatically and keeps the old setting.
1854 		 */
1855 		if (vnic_dev_overlay_offload_cfg(enic->vdev,
1856 						 OVERLAY_CFG_VXLAN_PORT_UPDATE,
1857 						 RTE_VXLAN_DEFAULT_PORT)) {
1858 			dev_err(enic, "failed to update vxlan port\n");
1859 			return -EINVAL;
1860 		}
1861 	}
1862 
1863 	if (enic_fm_init(enic))
1864 		dev_warning(enic, "Init of flowman failed.\n");
1865 	return 0;
1866 
1867 }
1868 
1869 static void lock_devcmd(void *priv)
1870 {
1871 	struct enic *enic = priv;
1872 
1873 	rte_spinlock_lock(&enic->devcmd_lock);
1874 }
1875 
1876 static void unlock_devcmd(void *priv)
1877 {
1878 	struct enic *enic = priv;
1879 
1880 	rte_spinlock_unlock(&enic->devcmd_lock);
1881 }
1882 
1883 int enic_probe(struct enic *enic)
1884 {
1885 	struct rte_pci_device *pdev = enic->pdev;
1886 	int err = -1;
1887 
1888 	dev_debug(enic, "Initializing ENIC PMD\n");
1889 
1890 	/* if this is a secondary process the hardware is already initialized */
1891 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1892 		return 0;
1893 
1894 	enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1895 	enic->bar0.len = pdev->mem_resource[0].len;
1896 
1897 	/* Register vNIC device */
1898 	enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1899 	if (!enic->vdev) {
1900 		dev_err(enic, "vNIC registration failed, aborting\n");
1901 		goto err_out;
1902 	}
1903 
1904 	LIST_INIT(&enic->memzone_list);
1905 	rte_spinlock_init(&enic->memzone_list_lock);
1906 
1907 	vnic_register_cbacks(enic->vdev,
1908 		enic_alloc_consistent,
1909 		enic_free_consistent);
1910 
1911 	/*
1912 	 * Allocate the consistent memory for stats upfront so both primary and
1913 	 * secondary processes can dump stats.
1914 	 */
1915 	err = vnic_dev_alloc_stats_mem(enic->vdev);
1916 	if (err) {
1917 		dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1918 		goto err_out_unregister;
1919 	}
1920 	/* Issue device open to get device in known state */
1921 	err = enic_dev_open(enic);
1922 	if (err) {
1923 		dev_err(enic, "vNIC dev open failed, aborting\n");
1924 		goto err_out_unregister;
1925 	}
1926 
1927 	/* Set ingress vlan rewrite mode before vnic initialization */
1928 	dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1929 		  enic->ig_vlan_rewrite_mode);
1930 	err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1931 		enic->ig_vlan_rewrite_mode);
1932 	if (err) {
1933 		dev_err(enic,
1934 			"Failed to set ingress vlan rewrite mode, aborting.\n");
1935 		goto err_out_dev_close;
1936 	}
1937 
1938 	/* Issue device init to initialize the vnic-to-switch link.
1939 	 * We'll start with carrier off and wait for link UP
1940 	 * notification later to turn on carrier.  We don't need
1941 	 * to wait here for the vnic-to-switch link initialization
1942 	 * to complete; link UP notification is the indication that
1943 	 * the process is complete.
1944 	 */
1945 
1946 	err = vnic_dev_init(enic->vdev, 0);
1947 	if (err) {
1948 		dev_err(enic, "vNIC dev init failed, aborting\n");
1949 		goto err_out_dev_close;
1950 	}
1951 
1952 	err = enic_dev_init(enic);
1953 	if (err) {
1954 		dev_err(enic, "Device initialization failed, aborting\n");
1955 		goto err_out_dev_close;
1956 	}
1957 
1958 	/* Use a PF spinlock to serialize devcmd from PF and VF representors */
1959 	if (enic->switchdev_mode) {
1960 		rte_spinlock_init(&enic->devcmd_lock);
1961 		vnic_register_lock(enic->vdev, lock_devcmd, unlock_devcmd);
1962 	}
1963 	return 0;
1964 
1965 err_out_dev_close:
1966 	vnic_dev_close(enic->vdev);
1967 err_out_unregister:
1968 	vnic_dev_unregister(enic->vdev);
1969 err_out:
1970 	return err;
1971 }
1972 
1973 void enic_remove(struct enic *enic)
1974 {
1975 	enic_dev_deinit(enic);
1976 	vnic_dev_close(enic->vdev);
1977 	vnic_dev_unregister(enic->vdev);
1978 }
1979