xref: /dpdk/drivers/net/enic/enic_main.c (revision bbbe38a6d59ccdda25917712701e629d0b10af6f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5 
6 #include <stdio.h>
7 
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 
12 #include <rte_pci.h>
13 #include <rte_bus_pci.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_string_fns.h>
18 #include <ethdev_driver.h>
19 #include <rte_geneve.h>
20 
21 #include "enic_compat.h"
22 #include "enic.h"
23 #include "wq_enet_desc.h"
24 #include "rq_enet_desc.h"
25 #include "cq_enet_desc.h"
26 #include "vnic_enet.h"
27 #include "vnic_dev.h"
28 #include "vnic_wq.h"
29 #include "vnic_rq.h"
30 #include "vnic_cq.h"
31 #include "vnic_intr.h"
32 #include "vnic_nic.h"
33 
34 static inline int enic_is_sriov_vf(struct enic *enic)
35 {
36 	return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
37 }
38 
39 static int is_zero_addr(uint8_t *addr)
40 {
41 	return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
42 }
43 
44 static int is_mcast_addr(uint8_t *addr)
45 {
46 	return addr[0] & 1;
47 }
48 
49 static int is_eth_addr_valid(uint8_t *addr)
50 {
51 	return !is_mcast_addr(addr) && !is_zero_addr(addr);
52 }
53 
54 void
55 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
56 {
57 	uint16_t i;
58 
59 	if (!rq || !rq->mbuf_ring) {
60 		dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
61 		return;
62 	}
63 
64 	for (i = 0; i < rq->ring.desc_count; i++) {
65 		if (rq->mbuf_ring[i]) {
66 			rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
67 			rq->mbuf_ring[i] = NULL;
68 		}
69 	}
70 }
71 
72 void enic_free_wq_buf(struct rte_mbuf **buf)
73 {
74 	struct rte_mbuf *mbuf = *buf;
75 
76 	rte_pktmbuf_free_seg(mbuf);
77 	*buf = NULL;
78 }
79 
80 static void enic_log_q_error(struct enic *enic)
81 {
82 	unsigned int i;
83 	uint32_t error_status;
84 
85 	for (i = 0; i < enic->wq_count; i++) {
86 		error_status = vnic_wq_error_status(&enic->wq[i]);
87 		if (error_status)
88 			dev_err(enic, "WQ[%d] error_status %d\n", i,
89 				error_status);
90 	}
91 
92 	for (i = 0; i < enic_vnic_rq_count(enic); i++) {
93 		if (!enic->rq[i].in_use)
94 			continue;
95 		error_status = vnic_rq_error_status(&enic->rq[i]);
96 		if (error_status)
97 			dev_err(enic, "RQ[%d] error_status %d\n", i,
98 				error_status);
99 	}
100 }
101 
102 static void enic_clear_soft_stats(struct enic *enic)
103 {
104 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
105 	rte_atomic64_clear(&soft_stats->rx_nombuf);
106 	rte_atomic64_clear(&soft_stats->rx_packet_errors);
107 	rte_atomic64_clear(&soft_stats->tx_oversized);
108 }
109 
110 static void enic_init_soft_stats(struct enic *enic)
111 {
112 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
113 	rte_atomic64_init(&soft_stats->rx_nombuf);
114 	rte_atomic64_init(&soft_stats->rx_packet_errors);
115 	rte_atomic64_init(&soft_stats->tx_oversized);
116 	enic_clear_soft_stats(enic);
117 }
118 
119 int enic_dev_stats_clear(struct enic *enic)
120 {
121 	int ret;
122 
123 	ret = vnic_dev_stats_clear(enic->vdev);
124 	if (ret != 0) {
125 		dev_err(enic, "Error in clearing stats\n");
126 		return ret;
127 	}
128 	enic_clear_soft_stats(enic);
129 
130 	return 0;
131 }
132 
133 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
134 {
135 	struct vnic_stats *stats;
136 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
137 	int64_t rx_truncated;
138 	uint64_t rx_packet_errors;
139 	int ret = vnic_dev_stats_dump(enic->vdev, &stats);
140 
141 	if (ret) {
142 		dev_err(enic, "Error in getting stats\n");
143 		return ret;
144 	}
145 
146 	/* The number of truncated packets can only be calculated by
147 	 * subtracting a hardware counter from error packets received by
148 	 * the driver. Note: this causes transient inaccuracies in the
149 	 * ipackets count. Also, the length of truncated packets are
150 	 * counted in ibytes even though truncated packets are dropped
151 	 * which can make ibytes be slightly higher than it should be.
152 	 */
153 	rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
154 	rx_truncated = rx_packet_errors - stats->rx.rx_errors;
155 
156 	r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
157 	r_stats->opackets = stats->tx.tx_frames_ok;
158 
159 	r_stats->ibytes = stats->rx.rx_bytes_ok;
160 	r_stats->obytes = stats->tx.tx_bytes_ok;
161 
162 	r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
163 	r_stats->oerrors = stats->tx.tx_errors
164 			   + rte_atomic64_read(&soft_stats->tx_oversized);
165 
166 	r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
167 
168 	r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
169 	return 0;
170 }
171 
172 int enic_del_mac_address(struct enic *enic, int mac_index)
173 {
174 	struct rte_eth_dev *eth_dev = enic->rte_dev;
175 	uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
176 
177 	return vnic_dev_del_addr(enic->vdev, mac_addr);
178 }
179 
180 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
181 {
182 	int err;
183 
184 	if (!is_eth_addr_valid(mac_addr)) {
185 		dev_err(enic, "invalid mac address\n");
186 		return -EINVAL;
187 	}
188 
189 	err = vnic_dev_add_addr(enic->vdev, mac_addr);
190 	if (err)
191 		dev_err(enic, "add mac addr failed\n");
192 	return err;
193 }
194 
195 void enic_free_rq_buf(struct rte_mbuf **mbuf)
196 {
197 	if (*mbuf == NULL)
198 		return;
199 
200 	rte_pktmbuf_free(*mbuf);
201 	*mbuf = NULL;
202 }
203 
204 void enic_init_vnic_resources(struct enic *enic)
205 {
206 	unsigned int error_interrupt_enable = 1;
207 	unsigned int error_interrupt_offset = 0;
208 	unsigned int rxq_interrupt_enable = 0;
209 	unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
210 	unsigned int index = 0;
211 	unsigned int cq_idx;
212 	struct vnic_rq *data_rq;
213 
214 	if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
215 		rxq_interrupt_enable = 1;
216 
217 	for (index = 0; index < enic->rq_count; index++) {
218 		cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
219 
220 		vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
221 			cq_idx,
222 			error_interrupt_enable,
223 			error_interrupt_offset);
224 
225 		data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)];
226 		if (data_rq->in_use)
227 			vnic_rq_init(data_rq,
228 				     cq_idx,
229 				     error_interrupt_enable,
230 				     error_interrupt_offset);
231 		vnic_cq_init(&enic->cq[cq_idx],
232 			0 /* flow_control_enable */,
233 			1 /* color_enable */,
234 			0 /* cq_head */,
235 			0 /* cq_tail */,
236 			1 /* cq_tail_color */,
237 			rxq_interrupt_enable,
238 			1 /* cq_entry_enable */,
239 			0 /* cq_message_enable */,
240 			rxq_interrupt_offset,
241 			0 /* cq_message_addr */);
242 		if (rxq_interrupt_enable)
243 			rxq_interrupt_offset++;
244 	}
245 
246 	for (index = 0; index < enic->wq_count; index++) {
247 		vnic_wq_init(&enic->wq[index],
248 			enic_cq_wq(enic, index),
249 			error_interrupt_enable,
250 			error_interrupt_offset);
251 		/* Compute unsupported ol flags for enic_prep_pkts() */
252 		enic->wq[index].tx_offload_notsup_mask =
253 			PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
254 
255 		cq_idx = enic_cq_wq(enic, index);
256 		vnic_cq_init(&enic->cq[cq_idx],
257 			0 /* flow_control_enable */,
258 			1 /* color_enable */,
259 			0 /* cq_head */,
260 			0 /* cq_tail */,
261 			1 /* cq_tail_color */,
262 			0 /* interrupt_enable */,
263 			0 /* cq_entry_enable */,
264 			1 /* cq_message_enable */,
265 			0 /* interrupt offset */,
266 			(uint64_t)enic->wq[index].cqmsg_rz->iova);
267 	}
268 
269 	for (index = 0; index < enic->intr_count; index++) {
270 		vnic_intr_init(&enic->intr[index],
271 			       enic->config.intr_timer_usec,
272 			       enic->config.intr_timer_type,
273 			       /*mask_on_assertion*/1);
274 	}
275 }
276 
277 
278 int
279 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
280 {
281 	struct rte_mbuf *mb;
282 	struct rq_enet_desc *rqd = rq->ring.descs;
283 	unsigned i;
284 	dma_addr_t dma_addr;
285 	uint32_t max_rx_pkt_len;
286 	uint16_t rq_buf_len;
287 
288 	if (!rq->in_use)
289 		return 0;
290 
291 	dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
292 		  rq->ring.desc_count);
293 
294 	/*
295 	 * If *not* using scatter and the mbuf size is greater than the
296 	 * requested max packet size (max_rx_pkt_len), then reduce the
297 	 * posted buffer size to max_rx_pkt_len. HW still receives packets
298 	 * larger than max_rx_pkt_len, but they will be truncated, which we
299 	 * drop in the rx handler. Not ideal, but better than returning
300 	 * large packets when the user is not expecting them.
301 	 */
302 	max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
303 	rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
304 	if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
305 		rq_buf_len = max_rx_pkt_len;
306 	for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
307 		mb = rte_mbuf_raw_alloc(rq->mp);
308 		if (mb == NULL) {
309 			dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
310 			(unsigned)rq->index);
311 			return -ENOMEM;
312 		}
313 
314 		mb->data_off = RTE_PKTMBUF_HEADROOM;
315 		dma_addr = (dma_addr_t)(mb->buf_iova
316 			   + RTE_PKTMBUF_HEADROOM);
317 		rq_enet_desc_enc(rqd, dma_addr,
318 				(rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
319 				: RQ_ENET_TYPE_NOT_SOP),
320 				rq_buf_len);
321 		rq->mbuf_ring[i] = mb;
322 	}
323 	/*
324 	 * Do not post the buffers to the NIC until we enable the RQ via
325 	 * enic_start_rq().
326 	 */
327 	rq->need_initial_post = true;
328 	/* Initialize fetch index while RQ is disabled */
329 	iowrite32(0, &rq->ctrl->fetch_index);
330 	return 0;
331 }
332 
333 /*
334  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
335  * allocated the buffers and filled the RQ descriptor ring. Just need to push
336  * the post index to the NIC.
337  */
338 static void
339 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
340 {
341 	if (!rq->in_use || !rq->need_initial_post)
342 		return;
343 
344 	/* make sure all prior writes are complete before doing the PIO write */
345 	rte_rmb();
346 
347 	/* Post all but the last buffer to VIC. */
348 	rq->posted_index = rq->ring.desc_count - 1;
349 
350 	rq->rx_nb_hold = 0;
351 
352 	dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
353 		enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
354 	iowrite32(rq->posted_index, &rq->ctrl->posted_index);
355 	rte_rmb();
356 	rq->need_initial_post = false;
357 }
358 
359 void *
360 enic_alloc_consistent(void *priv, size_t size,
361 	dma_addr_t *dma_handle, uint8_t *name)
362 {
363 	void *vaddr;
364 	const struct rte_memzone *rz;
365 	*dma_handle = 0;
366 	struct enic *enic = (struct enic *)priv;
367 	struct enic_memzone_entry *mze;
368 
369 	rz = rte_memzone_reserve_aligned((const char *)name, size,
370 			SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
371 	if (!rz) {
372 		pr_err("%s : Failed to allocate memory requested for %s\n",
373 			__func__, name);
374 		return NULL;
375 	}
376 
377 	vaddr = rz->addr;
378 	*dma_handle = (dma_addr_t)rz->iova;
379 
380 	mze = rte_malloc("enic memzone entry",
381 			 sizeof(struct enic_memzone_entry), 0);
382 
383 	if (!mze) {
384 		pr_err("%s : Failed to allocate memory for memzone list\n",
385 		       __func__);
386 		rte_memzone_free(rz);
387 		return NULL;
388 	}
389 
390 	mze->rz = rz;
391 
392 	rte_spinlock_lock(&enic->memzone_list_lock);
393 	LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
394 	rte_spinlock_unlock(&enic->memzone_list_lock);
395 
396 	return vaddr;
397 }
398 
399 void
400 enic_free_consistent(void *priv,
401 		     __rte_unused size_t size,
402 		     void *vaddr,
403 		     dma_addr_t dma_handle)
404 {
405 	struct enic_memzone_entry *mze;
406 	struct enic *enic = (struct enic *)priv;
407 
408 	rte_spinlock_lock(&enic->memzone_list_lock);
409 	LIST_FOREACH(mze, &enic->memzone_list, entries) {
410 		if (mze->rz->addr == vaddr &&
411 		    mze->rz->iova == dma_handle)
412 			break;
413 	}
414 	if (mze == NULL) {
415 		rte_spinlock_unlock(&enic->memzone_list_lock);
416 		dev_warning(enic,
417 			    "Tried to free memory, but couldn't find it in the memzone list\n");
418 		return;
419 	}
420 	LIST_REMOVE(mze, entries);
421 	rte_spinlock_unlock(&enic->memzone_list_lock);
422 	rte_memzone_free(mze->rz);
423 	rte_free(mze);
424 }
425 
426 int enic_link_update(struct rte_eth_dev *eth_dev)
427 {
428 	struct enic *enic = pmd_priv(eth_dev);
429 	struct rte_eth_link link;
430 
431 	memset(&link, 0, sizeof(link));
432 	link.link_status = enic_get_link_status(enic);
433 	link.link_duplex = ETH_LINK_FULL_DUPLEX;
434 	link.link_speed = vnic_dev_port_speed(enic->vdev);
435 
436 	return rte_eth_linkstatus_set(eth_dev, &link);
437 }
438 
439 static void
440 enic_intr_handler(void *arg)
441 {
442 	struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
443 	struct enic *enic = pmd_priv(dev);
444 
445 	vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
446 
447 	enic_link_update(dev);
448 	rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
449 	enic_log_q_error(enic);
450 	/* Re-enable irq in case of INTx */
451 	rte_intr_ack(&enic->pdev->intr_handle);
452 }
453 
454 static int enic_rxq_intr_init(struct enic *enic)
455 {
456 	struct rte_intr_handle *intr_handle;
457 	uint32_t rxq_intr_count, i;
458 	int err;
459 
460 	intr_handle = enic->rte_dev->intr_handle;
461 	if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
462 		return 0;
463 	/*
464 	 * Rx queue interrupts only work when we have MSI-X interrupts,
465 	 * one per queue. Sharing one interrupt is technically
466 	 * possible with VIC, but it is not worth the complications it brings.
467 	 */
468 	if (!rte_intr_cap_multiple(intr_handle)) {
469 		dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
470 			" (vfio-pci driver)\n");
471 		return -ENOTSUP;
472 	}
473 	rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
474 	err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
475 	if (err) {
476 		dev_err(enic, "Failed to enable event fds for Rx queue"
477 			" interrupts\n");
478 		return err;
479 	}
480 	intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
481 					    rxq_intr_count * sizeof(int), 0);
482 	if (intr_handle->intr_vec == NULL) {
483 		dev_err(enic, "Failed to allocate intr_vec\n");
484 		return -ENOMEM;
485 	}
486 	for (i = 0; i < rxq_intr_count; i++)
487 		intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
488 	return 0;
489 }
490 
491 static void enic_rxq_intr_deinit(struct enic *enic)
492 {
493 	struct rte_intr_handle *intr_handle;
494 
495 	intr_handle = enic->rte_dev->intr_handle;
496 	rte_intr_efd_disable(intr_handle);
497 	if (intr_handle->intr_vec != NULL) {
498 		rte_free(intr_handle->intr_vec);
499 		intr_handle->intr_vec = NULL;
500 	}
501 }
502 
503 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
504 {
505 	struct wq_enet_desc *desc;
506 	struct vnic_wq *wq;
507 	unsigned int i;
508 
509 	/*
510 	 * Fill WQ descriptor fields that never change. Every descriptor is
511 	 * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
512 	 * descriptors (i.e. request one completion update every 32 packets).
513 	 */
514 	wq = &enic->wq[queue_idx];
515 	desc = (struct wq_enet_desc *)wq->ring.descs;
516 	for (i = 0; i < wq->ring.desc_count; i++, desc++) {
517 		desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
518 		if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
519 			desc->header_length_flags |=
520 				(1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
521 	}
522 }
523 
524 /*
525  * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
526  * used when that file is not compiled.
527  */
528 __rte_weak bool
529 enic_use_vector_rx_handler(__rte_unused struct rte_eth_dev *eth_dev)
530 {
531 	return false;
532 }
533 
534 void enic_pick_rx_handler(struct rte_eth_dev *eth_dev)
535 {
536 	struct enic *enic = pmd_priv(eth_dev);
537 
538 	if (enic->cq64) {
539 		ENICPMD_LOG(DEBUG, " use the normal Rx handler for 64B CQ entry");
540 		eth_dev->rx_pkt_burst = &enic_recv_pkts_64;
541 		return;
542 	}
543 	/*
544 	 * Preference order:
545 	 * 1. The vectorized handler if possible and requested.
546 	 * 2. The non-scatter, simplified handler if scatter Rx is not used.
547 	 * 3. The default handler as a fallback.
548 	 */
549 	if (enic_use_vector_rx_handler(eth_dev))
550 		return;
551 	if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
552 		ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler");
553 		eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
554 	} else {
555 		ENICPMD_LOG(DEBUG, " use the normal Rx handler");
556 		eth_dev->rx_pkt_burst = &enic_recv_pkts;
557 	}
558 }
559 
560 /* Secondary process uses this to set the Tx handler */
561 void enic_pick_tx_handler(struct rte_eth_dev *eth_dev)
562 {
563 	struct enic *enic = pmd_priv(eth_dev);
564 
565 	if (enic->use_simple_tx_handler) {
566 		ENICPMD_LOG(DEBUG, " use the simple tx handler");
567 		eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
568 	} else {
569 		ENICPMD_LOG(DEBUG, " use the default tx handler");
570 		eth_dev->tx_pkt_burst = &enic_xmit_pkts;
571 	}
572 }
573 
574 int enic_enable(struct enic *enic)
575 {
576 	unsigned int index;
577 	int err;
578 	struct rte_eth_dev *eth_dev = enic->rte_dev;
579 	uint64_t simple_tx_offloads;
580 	uintptr_t p;
581 
582 	if (enic->enable_avx2_rx) {
583 		struct rte_mbuf mb_def = { .buf_addr = 0 };
584 
585 		/*
586 		 * mbuf_initializer contains const-after-init fields of
587 		 * receive mbufs (i.e. 64 bits of fields from rearm_data).
588 		 * It is currently used by the vectorized handler.
589 		 */
590 		mb_def.nb_segs = 1;
591 		mb_def.data_off = RTE_PKTMBUF_HEADROOM;
592 		mb_def.port = enic->port_id;
593 		rte_mbuf_refcnt_set(&mb_def, 1);
594 		rte_compiler_barrier();
595 		p = (uintptr_t)&mb_def.rearm_data;
596 		enic->mbuf_initializer = *(uint64_t *)p;
597 	}
598 
599 	eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
600 	eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
601 
602 	/* vnic notification of link status has already been turned on in
603 	 * enic_dev_init() which is called during probe time.  Here we are
604 	 * just turning on interrupt vector 0 if needed.
605 	 */
606 	if (eth_dev->data->dev_conf.intr_conf.lsc)
607 		vnic_dev_notify_set(enic->vdev, 0);
608 
609 	err = enic_rxq_intr_init(enic);
610 	if (err)
611 		return err;
612 
613 	/* Initialize flowman if not already initialized during probe */
614 	if (enic->fm == NULL && enic_fm_init(enic))
615 		dev_warning(enic, "Init of flowman failed.\n");
616 
617 	for (index = 0; index < enic->rq_count; index++) {
618 		err = enic_alloc_rx_queue_mbufs(enic,
619 			&enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
620 		if (err) {
621 			dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
622 			return err;
623 		}
624 		err = enic_alloc_rx_queue_mbufs(enic,
625 			&enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)]);
626 		if (err) {
627 			/* release the allocated mbufs for the sop rq*/
628 			enic_rxmbuf_queue_release(enic,
629 				&enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
630 
631 			dev_err(enic, "Failed to alloc data RX queue mbufs\n");
632 			return err;
633 		}
634 	}
635 
636 	/*
637 	 * Use the simple TX handler if possible. Only checksum offloads
638 	 * and vlan insertion are supported.
639 	 */
640 	simple_tx_offloads = enic->tx_offload_capa &
641 		(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
642 		 DEV_TX_OFFLOAD_VLAN_INSERT |
643 		 DEV_TX_OFFLOAD_IPV4_CKSUM |
644 		 DEV_TX_OFFLOAD_UDP_CKSUM |
645 		 DEV_TX_OFFLOAD_TCP_CKSUM);
646 	if ((eth_dev->data->dev_conf.txmode.offloads &
647 	     ~simple_tx_offloads) == 0) {
648 		ENICPMD_LOG(DEBUG, " use the simple tx handler");
649 		eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
650 		for (index = 0; index < enic->wq_count; index++)
651 			enic_prep_wq_for_simple_tx(enic, index);
652 		enic->use_simple_tx_handler = 1;
653 	} else {
654 		ENICPMD_LOG(DEBUG, " use the default tx handler");
655 		eth_dev->tx_pkt_burst = &enic_xmit_pkts;
656 	}
657 
658 	enic_pick_rx_handler(eth_dev);
659 
660 	for (index = 0; index < enic->wq_count; index++)
661 		enic_start_wq(enic, index);
662 	for (index = 0; index < enic->rq_count; index++)
663 		enic_start_rq(enic, index);
664 
665 	vnic_dev_add_addr(enic->vdev, enic->mac_addr);
666 
667 	vnic_dev_enable_wait(enic->vdev);
668 
669 	/* Register and enable error interrupt */
670 	rte_intr_callback_register(&(enic->pdev->intr_handle),
671 		enic_intr_handler, (void *)enic->rte_dev);
672 
673 	rte_intr_enable(&(enic->pdev->intr_handle));
674 	/* Unmask LSC interrupt */
675 	vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
676 
677 	return 0;
678 }
679 
680 int enic_alloc_intr_resources(struct enic *enic)
681 {
682 	int err;
683 	unsigned int i;
684 
685 	dev_info(enic, "vNIC resources used:  "\
686 		"wq %d rq %d cq %d intr %d\n",
687 		enic->wq_count, enic_vnic_rq_count(enic),
688 		enic->cq_count, enic->intr_count);
689 
690 	for (i = 0; i < enic->intr_count; i++) {
691 		err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
692 		if (err) {
693 			enic_free_vnic_resources(enic);
694 			return err;
695 		}
696 	}
697 	return 0;
698 }
699 
700 void enic_free_rq(void *rxq)
701 {
702 	struct vnic_rq *rq_sop, *rq_data;
703 	struct enic *enic;
704 
705 	if (rxq == NULL)
706 		return;
707 
708 	rq_sop = (struct vnic_rq *)rxq;
709 	enic = vnic_dev_priv(rq_sop->vdev);
710 	rq_data = &enic->rq[rq_sop->data_queue_idx];
711 
712 	if (rq_sop->free_mbufs) {
713 		struct rte_mbuf **mb;
714 		int i;
715 
716 		mb = rq_sop->free_mbufs;
717 		for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
718 		     i < ENIC_RX_BURST_MAX; i++)
719 			rte_pktmbuf_free(mb[i]);
720 		rte_free(rq_sop->free_mbufs);
721 		rq_sop->free_mbufs = NULL;
722 		rq_sop->num_free_mbufs = 0;
723 	}
724 
725 	enic_rxmbuf_queue_release(enic, rq_sop);
726 	if (rq_data->in_use)
727 		enic_rxmbuf_queue_release(enic, rq_data);
728 
729 	rte_free(rq_sop->mbuf_ring);
730 	if (rq_data->in_use)
731 		rte_free(rq_data->mbuf_ring);
732 
733 	rq_sop->mbuf_ring = NULL;
734 	rq_data->mbuf_ring = NULL;
735 
736 	vnic_rq_free(rq_sop);
737 	if (rq_data->in_use)
738 		vnic_rq_free(rq_data);
739 
740 	vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
741 
742 	rq_sop->in_use = 0;
743 	rq_data->in_use = 0;
744 }
745 
746 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
747 {
748 	struct rte_eth_dev_data *data = enic->dev_data;
749 	vnic_wq_enable(&enic->wq[queue_idx]);
750 	data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
751 }
752 
753 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
754 {
755 	struct rte_eth_dev_data *data = enic->dev_data;
756 	int ret;
757 
758 	ret = vnic_wq_disable(&enic->wq[queue_idx]);
759 	if (ret)
760 		return ret;
761 
762 	data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
763 	return 0;
764 }
765 
766 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
767 {
768 	struct rte_eth_dev_data *data = enic->dev_data;
769 	struct vnic_rq *rq_sop;
770 	struct vnic_rq *rq_data;
771 	rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
772 	rq_data = &enic->rq[rq_sop->data_queue_idx];
773 
774 	if (rq_data->in_use) {
775 		vnic_rq_enable(rq_data);
776 		enic_initial_post_rx(enic, rq_data);
777 	}
778 	rte_mb();
779 	vnic_rq_enable(rq_sop);
780 	enic_initial_post_rx(enic, rq_sop);
781 	data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
782 }
783 
784 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
785 {
786 	struct rte_eth_dev_data *data = enic->dev_data;
787 	int ret1 = 0, ret2 = 0;
788 	struct vnic_rq *rq_sop;
789 	struct vnic_rq *rq_data;
790 	rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
791 	rq_data = &enic->rq[rq_sop->data_queue_idx];
792 
793 	ret2 = vnic_rq_disable(rq_sop);
794 	rte_mb();
795 	if (rq_data->in_use)
796 		ret1 = vnic_rq_disable(rq_data);
797 
798 	if (ret2)
799 		return ret2;
800 	else if (ret1)
801 		return ret1;
802 
803 	data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
804 	return 0;
805 }
806 
807 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
808 	unsigned int socket_id, struct rte_mempool *mp,
809 	uint16_t nb_desc, uint16_t free_thresh)
810 {
811 	struct enic_vf_representor *vf;
812 	int rc;
813 	uint16_t sop_queue_idx;
814 	uint16_t data_queue_idx;
815 	uint16_t cq_idx;
816 	struct vnic_rq *rq_sop;
817 	struct vnic_rq *rq_data;
818 	unsigned int mbuf_size, mbufs_per_pkt;
819 	unsigned int nb_sop_desc, nb_data_desc;
820 	uint16_t min_sop, max_sop, min_data, max_data;
821 	uint32_t max_rx_pkt_len;
822 
823 	/*
824 	 * Representor uses a reserved PF queue. Translate representor
825 	 * queue number to PF queue number.
826 	 */
827 	if (enic_is_vf_rep(enic)) {
828 		RTE_ASSERT(queue_idx == 0);
829 		vf = VF_ENIC_TO_VF_REP(enic);
830 		sop_queue_idx = vf->pf_rq_sop_idx;
831 		data_queue_idx = vf->pf_rq_data_idx;
832 		enic = vf->pf;
833 		queue_idx = sop_queue_idx;
834 	} else {
835 		sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
836 		data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx, enic);
837 	}
838 	cq_idx = enic_cq_rq(enic, sop_queue_idx);
839 	rq_sop = &enic->rq[sop_queue_idx];
840 	rq_data = &enic->rq[data_queue_idx];
841 	rq_sop->is_sop = 1;
842 	rq_sop->data_queue_idx = data_queue_idx;
843 	rq_data->is_sop = 0;
844 	rq_data->data_queue_idx = 0;
845 	rq_sop->socket_id = socket_id;
846 	rq_sop->mp = mp;
847 	rq_data->socket_id = socket_id;
848 	rq_data->mp = mp;
849 	rq_sop->in_use = 1;
850 	rq_sop->rx_free_thresh = free_thresh;
851 	rq_data->rx_free_thresh = free_thresh;
852 	dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
853 		  free_thresh);
854 
855 	mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
856 			       RTE_PKTMBUF_HEADROOM);
857 	/* max_rx_pkt_len includes the ethernet header and CRC. */
858 	max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
859 
860 	if (enic->rte_dev->data->dev_conf.rxmode.offloads &
861 	    DEV_RX_OFFLOAD_SCATTER) {
862 		dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
863 		/* ceil((max pkt len)/mbuf_size) */
864 		mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
865 	} else {
866 		dev_info(enic, "Scatter rx mode disabled\n");
867 		mbufs_per_pkt = 1;
868 		if (max_rx_pkt_len > mbuf_size) {
869 			dev_warning(enic, "The maximum Rx packet size (%u) is"
870 				    " larger than the mbuf size (%u), and"
871 				    " scatter is disabled. Larger packets will"
872 				    " be truncated.\n",
873 				    max_rx_pkt_len, mbuf_size);
874 		}
875 	}
876 
877 	if (mbufs_per_pkt > 1) {
878 		dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
879 		rq_sop->data_queue_enable = 1;
880 		rq_data->in_use = 1;
881 		/*
882 		 * HW does not directly support rxmode.max_rx_pkt_len. HW always
883 		 * receives packet sizes up to the "max" MTU.
884 		 * If not using scatter, we can achieve the effect of dropping
885 		 * larger packets by reducing the size of posted buffers.
886 		 * See enic_alloc_rx_queue_mbufs().
887 		 */
888 		if (max_rx_pkt_len <
889 		    enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
890 			dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
891 				    " when scatter rx mode is in use.\n");
892 		}
893 	} else {
894 		dev_info(enic, "Rq %u Scatter rx mode not being used\n",
895 			 queue_idx);
896 		rq_sop->data_queue_enable = 0;
897 		rq_data->in_use = 0;
898 	}
899 
900 	/* number of descriptors have to be a multiple of 32 */
901 	nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
902 	nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
903 
904 	rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
905 	rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
906 
907 	if (mbufs_per_pkt > 1) {
908 		min_sop = ENIC_RX_BURST_MAX;
909 		max_sop = ((enic->config.rq_desc_count /
910 			    (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
911 		min_data = min_sop * (mbufs_per_pkt - 1);
912 		max_data = enic->config.rq_desc_count;
913 	} else {
914 		min_sop = ENIC_RX_BURST_MAX;
915 		max_sop = enic->config.rq_desc_count;
916 		min_data = 0;
917 		max_data = 0;
918 	}
919 
920 	if (nb_desc < (min_sop + min_data)) {
921 		dev_warning(enic,
922 			    "Number of rx descs too low, adjusting to minimum\n");
923 		nb_sop_desc = min_sop;
924 		nb_data_desc = min_data;
925 	} else if (nb_desc > (max_sop + max_data)) {
926 		dev_warning(enic,
927 			    "Number of rx_descs too high, adjusting to maximum\n");
928 		nb_sop_desc = max_sop;
929 		nb_data_desc = max_data;
930 	}
931 	if (mbufs_per_pkt > 1) {
932 		dev_info(enic, "For max packet size %u and mbuf size %u valid"
933 			 " rx descriptor range is %u to %u\n",
934 			 max_rx_pkt_len, mbuf_size, min_sop + min_data,
935 			 max_sop + max_data);
936 	}
937 	dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
938 		 nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
939 
940 	/* Allocate sop queue resources */
941 	rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
942 		nb_sop_desc, sizeof(struct rq_enet_desc));
943 	if (rc) {
944 		dev_err(enic, "error in allocation of sop rq\n");
945 		goto err_exit;
946 	}
947 	nb_sop_desc = rq_sop->ring.desc_count;
948 
949 	if (rq_data->in_use) {
950 		/* Allocate data queue resources */
951 		rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
952 				   nb_data_desc,
953 				   sizeof(struct rq_enet_desc));
954 		if (rc) {
955 			dev_err(enic, "error in allocation of data rq\n");
956 			goto err_free_rq_sop;
957 		}
958 		nb_data_desc = rq_data->ring.desc_count;
959 	}
960 	/* Enable 64B CQ entry if requested */
961 	if (enic->cq64 && vnic_dev_set_cq_entry_size(enic->vdev,
962 				sop_queue_idx, VNIC_RQ_CQ_ENTRY_SIZE_64)) {
963 		dev_err(enic, "failed to enable 64B CQ entry on sop rq\n");
964 		goto err_free_rq_data;
965 	}
966 	if (rq_data->in_use && enic->cq64 &&
967 	    vnic_dev_set_cq_entry_size(enic->vdev, data_queue_idx,
968 		VNIC_RQ_CQ_ENTRY_SIZE_64)) {
969 		dev_err(enic, "failed to enable 64B CQ entry on data rq\n");
970 		goto err_free_rq_data;
971 	}
972 
973 	rc = vnic_cq_alloc(enic->vdev, &enic->cq[cq_idx], cq_idx,
974 			   socket_id, nb_sop_desc + nb_data_desc,
975 			   enic->cq64 ?	sizeof(struct cq_enet_rq_desc_64) :
976 			   sizeof(struct cq_enet_rq_desc));
977 	if (rc) {
978 		dev_err(enic, "error in allocation of cq for rq\n");
979 		goto err_free_rq_data;
980 	}
981 
982 	/* Allocate the mbuf rings */
983 	rq_sop->mbuf_ring = (struct rte_mbuf **)
984 		rte_zmalloc_socket("rq->mbuf_ring",
985 				   sizeof(struct rte_mbuf *) * nb_sop_desc,
986 				   RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
987 	if (rq_sop->mbuf_ring == NULL)
988 		goto err_free_cq;
989 
990 	if (rq_data->in_use) {
991 		rq_data->mbuf_ring = (struct rte_mbuf **)
992 			rte_zmalloc_socket("rq->mbuf_ring",
993 				sizeof(struct rte_mbuf *) * nb_data_desc,
994 				RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
995 		if (rq_data->mbuf_ring == NULL)
996 			goto err_free_sop_mbuf;
997 	}
998 
999 	rq_sop->free_mbufs = (struct rte_mbuf **)
1000 		rte_zmalloc_socket("rq->free_mbufs",
1001 				   sizeof(struct rte_mbuf *) *
1002 				   ENIC_RX_BURST_MAX,
1003 				   RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
1004 	if (rq_sop->free_mbufs == NULL)
1005 		goto err_free_data_mbuf;
1006 	rq_sop->num_free_mbufs = 0;
1007 
1008 	rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
1009 
1010 	return 0;
1011 
1012 err_free_data_mbuf:
1013 	rte_free(rq_data->mbuf_ring);
1014 err_free_sop_mbuf:
1015 	rte_free(rq_sop->mbuf_ring);
1016 err_free_cq:
1017 	/* cleanup on error */
1018 	vnic_cq_free(&enic->cq[cq_idx]);
1019 err_free_rq_data:
1020 	if (rq_data->in_use)
1021 		vnic_rq_free(rq_data);
1022 err_free_rq_sop:
1023 	vnic_rq_free(rq_sop);
1024 err_exit:
1025 	return -ENOMEM;
1026 }
1027 
1028 void enic_free_wq(void *txq)
1029 {
1030 	struct vnic_wq *wq;
1031 	struct enic *enic;
1032 
1033 	if (txq == NULL)
1034 		return;
1035 
1036 	wq = (struct vnic_wq *)txq;
1037 	enic = vnic_dev_priv(wq->vdev);
1038 	rte_memzone_free(wq->cqmsg_rz);
1039 	vnic_wq_free(wq);
1040 	vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
1041 }
1042 
1043 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
1044 	unsigned int socket_id, uint16_t nb_desc)
1045 {
1046 	struct enic_vf_representor *vf;
1047 	int err;
1048 	struct vnic_wq *wq;
1049 	unsigned int cq_index;
1050 	char name[RTE_MEMZONE_NAMESIZE];
1051 	static int instance;
1052 
1053 	/*
1054 	 * Representor uses a reserved PF queue. Translate representor
1055 	 * queue number to PF queue number.
1056 	 */
1057 	if (enic_is_vf_rep(enic)) {
1058 		RTE_ASSERT(queue_idx == 0);
1059 		vf = VF_ENIC_TO_VF_REP(enic);
1060 		queue_idx = vf->pf_wq_idx;
1061 		cq_index = vf->pf_wq_cq_idx;
1062 		enic = vf->pf;
1063 	} else {
1064 		cq_index = enic_cq_wq(enic, queue_idx);
1065 	}
1066 	wq = &enic->wq[queue_idx];
1067 	wq->socket_id = socket_id;
1068 	/*
1069 	 * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
1070 	 * print an info message for diagnostics.
1071 	 */
1072 	dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
1073 
1074 	/* Allocate queue resources */
1075 	err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
1076 		nb_desc,
1077 		sizeof(struct wq_enet_desc));
1078 	if (err) {
1079 		dev_err(enic, "error in allocation of wq\n");
1080 		return err;
1081 	}
1082 
1083 	err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
1084 		socket_id, nb_desc,
1085 		sizeof(struct cq_enet_wq_desc));
1086 	if (err) {
1087 		vnic_wq_free(wq);
1088 		dev_err(enic, "error in allocation of cq for wq\n");
1089 	}
1090 
1091 	/* setup up CQ message */
1092 	snprintf((char *)name, sizeof(name),
1093 		 "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
1094 		instance++);
1095 
1096 	wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
1097 			sizeof(uint32_t), SOCKET_ID_ANY,
1098 			RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
1099 	if (!wq->cqmsg_rz)
1100 		return -ENOMEM;
1101 
1102 	return err;
1103 }
1104 
1105 int enic_disable(struct enic *enic)
1106 {
1107 	unsigned int i;
1108 	int err;
1109 
1110 	for (i = 0; i < enic->intr_count; i++) {
1111 		vnic_intr_mask(&enic->intr[i]);
1112 		(void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1113 	}
1114 	enic_rxq_intr_deinit(enic);
1115 	rte_intr_disable(&enic->pdev->intr_handle);
1116 	rte_intr_callback_unregister(&enic->pdev->intr_handle,
1117 				     enic_intr_handler,
1118 				     (void *)enic->rte_dev);
1119 
1120 	vnic_dev_disable(enic->vdev);
1121 
1122 	enic_fm_destroy(enic);
1123 
1124 	if (!enic_is_sriov_vf(enic))
1125 		vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1126 
1127 	for (i = 0; i < enic->wq_count; i++) {
1128 		err = vnic_wq_disable(&enic->wq[i]);
1129 		if (err)
1130 			return err;
1131 	}
1132 	for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1133 		if (enic->rq[i].in_use) {
1134 			err = vnic_rq_disable(&enic->rq[i]);
1135 			if (err)
1136 				return err;
1137 		}
1138 	}
1139 
1140 	/* If we were using interrupts, set the interrupt vector to -1
1141 	 * to disable interrupts.  We are not disabling link notifcations,
1142 	 * though, as we want the polling of link status to continue working.
1143 	 */
1144 	if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1145 		vnic_dev_notify_set(enic->vdev, -1);
1146 
1147 	vnic_dev_set_reset_flag(enic->vdev, 1);
1148 
1149 	for (i = 0; i < enic->wq_count; i++)
1150 		vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1151 
1152 	for (i = 0; i < enic_vnic_rq_count(enic); i++)
1153 		if (enic->rq[i].in_use)
1154 			vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1155 	for (i = 0; i < enic->cq_count; i++)
1156 		vnic_cq_clean(&enic->cq[i]);
1157 	for (i = 0; i < enic->intr_count; i++)
1158 		vnic_intr_clean(&enic->intr[i]);
1159 
1160 	return 0;
1161 }
1162 
1163 static int enic_dev_wait(struct vnic_dev *vdev,
1164 	int (*start)(struct vnic_dev *, int),
1165 	int (*finished)(struct vnic_dev *, int *),
1166 	int arg)
1167 {
1168 	int done;
1169 	int err;
1170 	int i;
1171 
1172 	err = start(vdev, arg);
1173 	if (err)
1174 		return err;
1175 
1176 	/* Wait for func to complete...2 seconds max */
1177 	for (i = 0; i < 2000; i++) {
1178 		err = finished(vdev, &done);
1179 		if (err)
1180 			return err;
1181 		if (done)
1182 			return 0;
1183 		usleep(1000);
1184 	}
1185 	return -ETIMEDOUT;
1186 }
1187 
1188 static int enic_dev_open(struct enic *enic)
1189 {
1190 	int err;
1191 	int flags = CMD_OPENF_IG_DESCCACHE;
1192 
1193 	err = enic_dev_wait(enic->vdev, vnic_dev_open,
1194 		vnic_dev_open_done, flags);
1195 	if (err)
1196 		dev_err(enic_get_dev(enic),
1197 			"vNIC device open failed, err %d\n", err);
1198 
1199 	return err;
1200 }
1201 
1202 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1203 {
1204 	dma_addr_t rss_key_buf_pa;
1205 	union vnic_rss_key *rss_key_buf_va = NULL;
1206 	int err, i;
1207 	uint8_t name[RTE_MEMZONE_NAMESIZE];
1208 
1209 	RTE_ASSERT(user_key != NULL);
1210 	snprintf((char *)name, sizeof(name), "rss_key-%s", enic->bdf_name);
1211 	rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1212 		&rss_key_buf_pa, name);
1213 	if (!rss_key_buf_va)
1214 		return -ENOMEM;
1215 
1216 	for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1217 		rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1218 
1219 	err = enic_set_rss_key(enic,
1220 		rss_key_buf_pa,
1221 		sizeof(union vnic_rss_key));
1222 
1223 	/* Save for later queries */
1224 	if (!err) {
1225 		rte_memcpy(&enic->rss_key, rss_key_buf_va,
1226 			   sizeof(union vnic_rss_key));
1227 	}
1228 	enic_free_consistent(enic, sizeof(union vnic_rss_key),
1229 		rss_key_buf_va, rss_key_buf_pa);
1230 
1231 	return err;
1232 }
1233 
1234 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1235 {
1236 	dma_addr_t rss_cpu_buf_pa;
1237 	union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1238 	int err;
1239 	uint8_t name[RTE_MEMZONE_NAMESIZE];
1240 
1241 	snprintf((char *)name, sizeof(name), "rss_cpu-%s", enic->bdf_name);
1242 	rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1243 		&rss_cpu_buf_pa, name);
1244 	if (!rss_cpu_buf_va)
1245 		return -ENOMEM;
1246 
1247 	rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1248 
1249 	err = enic_set_rss_cpu(enic,
1250 		rss_cpu_buf_pa,
1251 		sizeof(union vnic_rss_cpu));
1252 
1253 	enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1254 		rss_cpu_buf_va, rss_cpu_buf_pa);
1255 
1256 	/* Save for later queries */
1257 	if (!err)
1258 		rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1259 	return err;
1260 }
1261 
1262 static int enic_set_niccfg(struct enic *enic, uint8_t rss_default_cpu,
1263 	uint8_t rss_hash_type, uint8_t rss_hash_bits, uint8_t rss_base_cpu,
1264 	uint8_t rss_enable)
1265 {
1266 	const uint8_t tso_ipid_split_en = 0;
1267 	int err;
1268 
1269 	err = enic_set_nic_cfg(enic,
1270 		rss_default_cpu, rss_hash_type,
1271 		rss_hash_bits, rss_base_cpu,
1272 		rss_enable, tso_ipid_split_en,
1273 		enic->ig_vlan_strip_en);
1274 
1275 	return err;
1276 }
1277 
1278 /* Initialize RSS with defaults, called from dev_configure */
1279 int enic_init_rss_nic_cfg(struct enic *enic)
1280 {
1281 	static uint8_t default_rss_key[] = {
1282 		85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1283 		80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1284 		76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1285 		69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1286 	};
1287 	struct rte_eth_rss_conf rss_conf;
1288 	union vnic_rss_cpu rss_cpu;
1289 	int ret, i;
1290 
1291 	rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1292 	/*
1293 	 * If setting key for the first time, and the user gives us none, then
1294 	 * push the default key to NIC.
1295 	 */
1296 	if (rss_conf.rss_key == NULL) {
1297 		rss_conf.rss_key = default_rss_key;
1298 		rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1299 	}
1300 	ret = enic_set_rss_conf(enic, &rss_conf);
1301 	if (ret) {
1302 		dev_err(enic, "Failed to configure RSS\n");
1303 		return ret;
1304 	}
1305 	if (enic->rss_enable) {
1306 		/* If enabling RSS, use the default reta */
1307 		for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1308 			rss_cpu.cpu[i / 4].b[i % 4] =
1309 				enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1310 		}
1311 		ret = enic_set_rss_reta(enic, &rss_cpu);
1312 		if (ret)
1313 			dev_err(enic, "Failed to set RSS indirection table\n");
1314 	}
1315 	return ret;
1316 }
1317 
1318 int enic_setup_finish(struct enic *enic)
1319 {
1320 	enic_init_soft_stats(enic);
1321 
1322 	/* switchdev: enable promisc mode on PF */
1323 	if (enic->switchdev_mode) {
1324 		vnic_dev_packet_filter(enic->vdev,
1325 				       0 /* directed  */,
1326 				       0 /* multicast */,
1327 				       0 /* broadcast */,
1328 				       1 /* promisc   */,
1329 				       0 /* allmulti  */);
1330 		enic->promisc = 1;
1331 		enic->allmulti = 0;
1332 		return 0;
1333 	}
1334 	/* Default conf */
1335 	vnic_dev_packet_filter(enic->vdev,
1336 		1 /* directed  */,
1337 		1 /* multicast */,
1338 		1 /* broadcast */,
1339 		0 /* promisc   */,
1340 		1 /* allmulti  */);
1341 
1342 	enic->promisc = 0;
1343 	enic->allmulti = 1;
1344 
1345 	return 0;
1346 }
1347 
1348 static int enic_rss_conf_valid(struct enic *enic,
1349 			       struct rte_eth_rss_conf *rss_conf)
1350 {
1351 	/* RSS is disabled per VIC settings. Ignore rss_conf. */
1352 	if (enic->flow_type_rss_offloads == 0)
1353 		return 0;
1354 	if (rss_conf->rss_key != NULL &&
1355 	    rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1356 		dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1357 			rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1358 		return -EINVAL;
1359 	}
1360 	if (rss_conf->rss_hf != 0 &&
1361 	    (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1362 		dev_err(enic, "Given rss_hf contains none of the supported"
1363 			" types\n");
1364 		return -EINVAL;
1365 	}
1366 	return 0;
1367 }
1368 
1369 /* Set hash type and key according to rss_conf */
1370 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1371 {
1372 	struct rte_eth_dev *eth_dev;
1373 	uint64_t rss_hf;
1374 	uint8_t rss_hash_type;
1375 	uint8_t rss_enable;
1376 	int ret;
1377 
1378 	RTE_ASSERT(rss_conf != NULL);
1379 	ret = enic_rss_conf_valid(enic, rss_conf);
1380 	if (ret) {
1381 		dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1382 		return ret;
1383 	}
1384 
1385 	eth_dev = enic->rte_dev;
1386 	rss_hash_type = 0;
1387 	rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1388 	if (enic->rq_count > 1 &&
1389 	    (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1390 	    rss_hf != 0) {
1391 		rss_enable = 1;
1392 		if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1393 			      ETH_RSS_NONFRAG_IPV4_OTHER))
1394 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1395 		if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1396 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1397 		if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1398 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1399 			if (enic->udp_rss_weak) {
1400 				/*
1401 				 * 'TCP' is not a typo. The "weak" version of
1402 				 * UDP RSS requires both the TCP and UDP bits
1403 				 * be set. It does enable TCP RSS as well.
1404 				 */
1405 				rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1406 			}
1407 		}
1408 		if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1409 			      ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1410 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1411 		if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1412 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1413 		if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1414 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1415 			if (enic->udp_rss_weak)
1416 				rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1417 		}
1418 	} else {
1419 		rss_enable = 0;
1420 		rss_hf = 0;
1421 	}
1422 
1423 	/* Set the hash key if provided */
1424 	if (rss_enable && rss_conf->rss_key) {
1425 		ret = enic_set_rsskey(enic, rss_conf->rss_key);
1426 		if (ret) {
1427 			dev_err(enic, "Failed to set RSS key\n");
1428 			return ret;
1429 		}
1430 	}
1431 
1432 	ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1433 			      ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1434 			      rss_enable);
1435 	if (!ret) {
1436 		enic->rss_hf = rss_hf;
1437 		enic->rss_hash_type = rss_hash_type;
1438 		enic->rss_enable = rss_enable;
1439 	} else {
1440 		dev_err(enic, "Failed to update RSS configurations."
1441 			" hash=0x%x\n", rss_hash_type);
1442 	}
1443 	return ret;
1444 }
1445 
1446 int enic_set_vlan_strip(struct enic *enic)
1447 {
1448 	/*
1449 	 * Unfortunately, VLAN strip on/off and RSS on/off are configured
1450 	 * together. So, re-do niccfg, preserving the current RSS settings.
1451 	 */
1452 	return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1453 			       ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1454 			       enic->rss_enable);
1455 }
1456 
1457 int enic_add_packet_filter(struct enic *enic)
1458 {
1459 	/* switchdev ignores packet filters */
1460 	if (enic->switchdev_mode) {
1461 		ENICPMD_LOG(DEBUG, " switchdev: ignore packet filter");
1462 		return 0;
1463 	}
1464 	/* Args -> directed, multicast, broadcast, promisc, allmulti */
1465 	return vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1466 		enic->promisc, enic->allmulti);
1467 }
1468 
1469 int enic_get_link_status(struct enic *enic)
1470 {
1471 	return vnic_dev_link_status(enic->vdev);
1472 }
1473 
1474 static void enic_dev_deinit(struct enic *enic)
1475 {
1476 	/* stop link status checking */
1477 	vnic_dev_notify_unset(enic->vdev);
1478 
1479 	/* mac_addrs is freed by rte_eth_dev_release_port() */
1480 	rte_free(enic->cq);
1481 	rte_free(enic->intr);
1482 	rte_free(enic->rq);
1483 	rte_free(enic->wq);
1484 }
1485 
1486 
1487 int enic_set_vnic_res(struct enic *enic)
1488 {
1489 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1490 	int rc = 0;
1491 	unsigned int required_rq, required_wq, required_cq, required_intr;
1492 
1493 	/* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1494 	required_rq = eth_dev->data->nb_rx_queues * 2;
1495 	required_wq = eth_dev->data->nb_tx_queues;
1496 	required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1497 	required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1498 	if (eth_dev->data->dev_conf.intr_conf.rxq) {
1499 		required_intr += eth_dev->data->nb_rx_queues;
1500 	}
1501 	ENICPMD_LOG(DEBUG, "Required queues for PF: rq %u wq %u cq %u",
1502 		    required_rq, required_wq, required_cq);
1503 	if (enic->vf_required_rq) {
1504 		/* Queues needed for VF representors */
1505 		required_rq += enic->vf_required_rq;
1506 		required_wq += enic->vf_required_wq;
1507 		required_cq += enic->vf_required_cq;
1508 		ENICPMD_LOG(DEBUG, "Required queues for VF representors: rq %u wq %u cq %u",
1509 			    enic->vf_required_rq, enic->vf_required_wq,
1510 			    enic->vf_required_cq);
1511 	}
1512 
1513 	if (enic->conf_rq_count < required_rq) {
1514 		dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1515 			eth_dev->data->nb_rx_queues,
1516 			required_rq, enic->conf_rq_count);
1517 		rc = -EINVAL;
1518 	}
1519 	if (enic->conf_wq_count < required_wq) {
1520 		dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1521 			eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1522 		rc = -EINVAL;
1523 	}
1524 
1525 	if (enic->conf_cq_count < required_cq) {
1526 		dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1527 			required_cq, enic->conf_cq_count);
1528 		rc = -EINVAL;
1529 	}
1530 	if (enic->conf_intr_count < required_intr) {
1531 		dev_err(dev, "Not enough Interrupts to support Rx queue"
1532 			" interrupts. Required:%u, Configured:%u\n",
1533 			required_intr, enic->conf_intr_count);
1534 		rc = -EINVAL;
1535 	}
1536 
1537 	if (rc == 0) {
1538 		enic->rq_count = eth_dev->data->nb_rx_queues;
1539 		enic->wq_count = eth_dev->data->nb_tx_queues;
1540 		enic->cq_count = enic->rq_count + enic->wq_count;
1541 		enic->intr_count = required_intr;
1542 	}
1543 
1544 	return rc;
1545 }
1546 
1547 /* Initialize the completion queue for an RQ */
1548 static int
1549 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1550 {
1551 	struct vnic_rq *sop_rq, *data_rq;
1552 	unsigned int cq_idx;
1553 	int rc = 0;
1554 
1555 	sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1556 	data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx, enic)];
1557 	cq_idx = enic_cq_rq(enic, rq_idx);
1558 
1559 	vnic_cq_clean(&enic->cq[cq_idx]);
1560 	vnic_cq_init(&enic->cq[cq_idx],
1561 		     0 /* flow_control_enable */,
1562 		     1 /* color_enable */,
1563 		     0 /* cq_head */,
1564 		     0 /* cq_tail */,
1565 		     1 /* cq_tail_color */,
1566 		     0 /* interrupt_enable */,
1567 		     1 /* cq_entry_enable */,
1568 		     0 /* cq_message_enable */,
1569 		     0 /* interrupt offset */,
1570 		     0 /* cq_message_addr */);
1571 
1572 
1573 	vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1574 			   enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1575 			   sop_rq->ring.desc_count - 1, 1, 0);
1576 	if (data_rq->in_use) {
1577 		vnic_rq_init_start(data_rq,
1578 				   enic_cq_rq(enic,
1579 				   enic_rte_rq_idx_to_data_idx(rq_idx, enic)),
1580 				   0, data_rq->ring.desc_count - 1, 1, 0);
1581 	}
1582 
1583 	rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1584 	if (rc)
1585 		return rc;
1586 
1587 	if (data_rq->in_use) {
1588 		rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1589 		if (rc) {
1590 			enic_rxmbuf_queue_release(enic, sop_rq);
1591 			return rc;
1592 		}
1593 	}
1594 
1595 	return 0;
1596 }
1597 
1598 /* The Cisco NIC can send and receive packets up to a max packet size
1599  * determined by the NIC type and firmware. There is also an MTU
1600  * configured into the NIC via the CIMC/UCSM management interface
1601  * which can be overridden by this function (up to the max packet size).
1602  * Depending on the network setup, doing so may cause packet drops
1603  * and unexpected behavior.
1604  */
1605 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1606 {
1607 	unsigned int rq_idx;
1608 	struct vnic_rq *rq;
1609 	int rc = 0;
1610 	uint16_t old_mtu;	/* previous setting */
1611 	uint16_t config_mtu;	/* Value configured into NIC via CIMC/UCSM */
1612 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1613 
1614 	old_mtu = eth_dev->data->mtu;
1615 	config_mtu = enic->config.mtu;
1616 
1617 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1618 		return -E_RTE_SECONDARY;
1619 
1620 	if (new_mtu > enic->max_mtu) {
1621 		dev_err(enic,
1622 			"MTU not updated: requested (%u) greater than max (%u)\n",
1623 			new_mtu, enic->max_mtu);
1624 		return -EINVAL;
1625 	}
1626 	if (new_mtu < ENIC_MIN_MTU) {
1627 		dev_info(enic,
1628 			"MTU not updated: requested (%u) less than min (%u)\n",
1629 			new_mtu, ENIC_MIN_MTU);
1630 		return -EINVAL;
1631 	}
1632 	if (new_mtu > config_mtu)
1633 		dev_warning(enic,
1634 			"MTU (%u) is greater than value configured in NIC (%u)\n",
1635 			new_mtu, config_mtu);
1636 
1637 	/* Update the MTU and maximum packet length */
1638 	eth_dev->data->mtu = new_mtu;
1639 	eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1640 		enic_mtu_to_max_rx_pktlen(new_mtu);
1641 
1642 	/*
1643 	 * If the device has not started (enic_enable), nothing to do.
1644 	 * Later, enic_enable() will set up RQs reflecting the new maximum
1645 	 * packet length.
1646 	 */
1647 	if (!eth_dev->data->dev_started)
1648 		goto set_mtu_done;
1649 
1650 	/*
1651 	 * The device has started, re-do RQs on the fly. In the process, we
1652 	 * pick up the new maximum packet length.
1653 	 *
1654 	 * Some applications rely on the ability to change MTU without stopping
1655 	 * the device. So keep this behavior for now.
1656 	 */
1657 	rte_spinlock_lock(&enic->mtu_lock);
1658 
1659 	/* Stop traffic on all RQs */
1660 	for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1661 		rq = &enic->rq[rq_idx];
1662 		if (rq->is_sop && rq->in_use) {
1663 			rc = enic_stop_rq(enic,
1664 					  enic_sop_rq_idx_to_rte_idx(rq_idx));
1665 			if (rc) {
1666 				dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1667 				goto set_mtu_done;
1668 			}
1669 		}
1670 	}
1671 
1672 	/* replace Rx function with a no-op to avoid getting stale pkts */
1673 	eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1674 	rte_mb();
1675 
1676 	/* Allow time for threads to exit the real Rx function. */
1677 	usleep(100000);
1678 
1679 	/* now it is safe to reconfigure the RQs */
1680 
1681 
1682 	/* free and reallocate RQs with the new MTU */
1683 	for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1684 		rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1685 		if (!rq->in_use)
1686 			continue;
1687 
1688 		enic_free_rq(rq);
1689 		rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1690 				   rq->tot_nb_desc, rq->rx_free_thresh);
1691 		if (rc) {
1692 			dev_err(enic,
1693 				"Fatal MTU alloc error- No traffic will pass\n");
1694 			goto set_mtu_done;
1695 		}
1696 
1697 		rc = enic_reinit_rq(enic, rq_idx);
1698 		if (rc) {
1699 			dev_err(enic,
1700 				"Fatal MTU RQ reinit- No traffic will pass\n");
1701 			goto set_mtu_done;
1702 		}
1703 	}
1704 
1705 	/* put back the real receive function */
1706 	rte_mb();
1707 	enic_pick_rx_handler(eth_dev);
1708 	rte_mb();
1709 
1710 	/* restart Rx traffic */
1711 	for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1712 		rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1713 		if (rq->is_sop && rq->in_use)
1714 			enic_start_rq(enic, rq_idx);
1715 	}
1716 
1717 set_mtu_done:
1718 	dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1719 	rte_spinlock_unlock(&enic->mtu_lock);
1720 	return rc;
1721 }
1722 
1723 static void
1724 enic_disable_overlay_offload(struct enic *enic)
1725 {
1726 	/*
1727 	 * Disabling fails if the feature is provisioned but
1728 	 * not enabled. So ignore result and do not log error.
1729 	 */
1730 	if (enic->vxlan) {
1731 		vnic_dev_overlay_offload_ctrl(enic->vdev,
1732 			OVERLAY_FEATURE_VXLAN, OVERLAY_OFFLOAD_DISABLE);
1733 	}
1734 	if (enic->geneve) {
1735 		vnic_dev_overlay_offload_ctrl(enic->vdev,
1736 			OVERLAY_FEATURE_GENEVE, OVERLAY_OFFLOAD_DISABLE);
1737 	}
1738 }
1739 
1740 static int
1741 enic_enable_overlay_offload(struct enic *enic)
1742 {
1743 	if (enic->vxlan && vnic_dev_overlay_offload_ctrl(enic->vdev,
1744 			OVERLAY_FEATURE_VXLAN, OVERLAY_OFFLOAD_ENABLE) != 0) {
1745 		dev_err(NULL, "failed to enable VXLAN offload\n");
1746 		return -EINVAL;
1747 	}
1748 	if (enic->geneve && vnic_dev_overlay_offload_ctrl(enic->vdev,
1749 			OVERLAY_FEATURE_GENEVE, OVERLAY_OFFLOAD_ENABLE) != 0) {
1750 		dev_err(NULL, "failed to enable Geneve offload\n");
1751 		return -EINVAL;
1752 	}
1753 	enic->tx_offload_capa |=
1754 		DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1755 		(enic->geneve ? DEV_TX_OFFLOAD_GENEVE_TNL_TSO : 0) |
1756 		(enic->vxlan ? DEV_TX_OFFLOAD_VXLAN_TNL_TSO : 0);
1757 	enic->tx_offload_mask |=
1758 		PKT_TX_OUTER_IPV6 |
1759 		PKT_TX_OUTER_IPV4 |
1760 		PKT_TX_OUTER_IP_CKSUM |
1761 		PKT_TX_TUNNEL_MASK;
1762 	enic->overlay_offload = true;
1763 
1764 	if (enic->vxlan && enic->geneve)
1765 		dev_info(NULL, "Overlay offload is enabled (VxLAN, Geneve)\n");
1766 	else if (enic->vxlan)
1767 		dev_info(NULL, "Overlay offload is enabled (VxLAN)\n");
1768 	else
1769 		dev_info(NULL, "Overlay offload is enabled (Geneve)\n");
1770 
1771 	return 0;
1772 }
1773 
1774 static int
1775 enic_reset_overlay_port(struct enic *enic)
1776 {
1777 	if (enic->vxlan) {
1778 		enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT;
1779 		/*
1780 		 * Reset the vxlan port to the default, as the NIC firmware
1781 		 * does not reset it automatically and keeps the old setting.
1782 		 */
1783 		if (vnic_dev_overlay_offload_cfg(enic->vdev,
1784 						 OVERLAY_CFG_VXLAN_PORT_UPDATE,
1785 						 RTE_VXLAN_DEFAULT_PORT)) {
1786 			dev_err(enic, "failed to update vxlan port\n");
1787 			return -EINVAL;
1788 		}
1789 	}
1790 	if (enic->geneve) {
1791 		enic->geneve_port = RTE_GENEVE_DEFAULT_PORT;
1792 		if (vnic_dev_overlay_offload_cfg(enic->vdev,
1793 						 OVERLAY_CFG_GENEVE_PORT_UPDATE,
1794 						 RTE_GENEVE_DEFAULT_PORT)) {
1795 			dev_err(enic, "failed to update vxlan port\n");
1796 			return -EINVAL;
1797 		}
1798 	}
1799 	return 0;
1800 }
1801 
1802 static int enic_dev_init(struct enic *enic)
1803 {
1804 	int err;
1805 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1806 
1807 	vnic_dev_intr_coal_timer_info_default(enic->vdev);
1808 
1809 	/* Get vNIC configuration
1810 	*/
1811 	err = enic_get_vnic_config(enic);
1812 	if (err) {
1813 		dev_err(dev, "Get vNIC configuration failed, aborting\n");
1814 		return err;
1815 	}
1816 
1817 	/* Get available resource counts */
1818 	enic_get_res_counts(enic);
1819 	if (enic->conf_rq_count == 1) {
1820 		dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1821 		dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1822 		dev_err(enic, "See the ENIC PMD guide for more information.\n");
1823 		return -EINVAL;
1824 	}
1825 	/* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1826 	enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1827 			       enic->conf_cq_count, 8);
1828 	enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1829 				 enic->conf_intr_count, 8);
1830 	enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1831 			       enic->conf_rq_count, 8);
1832 	enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1833 			       enic->conf_wq_count, 8);
1834 	if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1835 		dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1836 		return -1;
1837 	}
1838 	if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1839 		dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1840 		return -1;
1841 	}
1842 	if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1843 		dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1844 		return -1;
1845 	}
1846 	if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1847 		dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1848 		return -1;
1849 	}
1850 
1851 	eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr",
1852 					sizeof(struct rte_ether_addr) *
1853 					ENIC_UNICAST_PERFECT_FILTERS, 0);
1854 	if (!eth_dev->data->mac_addrs) {
1855 		dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1856 		return -1;
1857 	}
1858 	rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr,
1859 			eth_dev->data->mac_addrs);
1860 
1861 	vnic_dev_set_reset_flag(enic->vdev, 0);
1862 
1863 	LIST_INIT(&enic->flows);
1864 
1865 	/* set up link status checking */
1866 	vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1867 
1868 	enic->overlay_offload = false;
1869 	/*
1870 	 * First, explicitly disable overlay offload as the setting is
1871 	 * sticky, and resetting vNIC may not disable it.
1872 	 */
1873 	enic_disable_overlay_offload(enic);
1874 	/* Then, enable overlay offload according to vNIC flags */
1875 	if (!enic->disable_overlay && (enic->vxlan || enic->geneve)) {
1876 		err = enic_enable_overlay_offload(enic);
1877 		if (err) {
1878 			dev_info(NULL, "failed to enable overlay offload\n");
1879 			return err;
1880 		}
1881 	}
1882 	/*
1883 	 * Reset the vxlan/geneve port if HW parsing is available. It
1884 	 * is always enabled regardless of overlay offload
1885 	 * enable/disable.
1886 	 */
1887 	err = enic_reset_overlay_port(enic);
1888 	if (err)
1889 		return err;
1890 
1891 	if (enic_fm_init(enic))
1892 		dev_warning(enic, "Init of flowman failed.\n");
1893 	return 0;
1894 }
1895 
1896 static void lock_devcmd(void *priv)
1897 {
1898 	struct enic *enic = priv;
1899 
1900 	rte_spinlock_lock(&enic->devcmd_lock);
1901 }
1902 
1903 static void unlock_devcmd(void *priv)
1904 {
1905 	struct enic *enic = priv;
1906 
1907 	rte_spinlock_unlock(&enic->devcmd_lock);
1908 }
1909 
1910 int enic_probe(struct enic *enic)
1911 {
1912 	struct rte_pci_device *pdev = enic->pdev;
1913 	int err = -1;
1914 
1915 	dev_debug(enic, "Initializing ENIC PMD\n");
1916 
1917 	/* if this is a secondary process the hardware is already initialized */
1918 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1919 		return 0;
1920 
1921 	enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1922 	enic->bar0.len = pdev->mem_resource[0].len;
1923 
1924 	/* Register vNIC device */
1925 	enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1926 	if (!enic->vdev) {
1927 		dev_err(enic, "vNIC registration failed, aborting\n");
1928 		goto err_out;
1929 	}
1930 
1931 	LIST_INIT(&enic->memzone_list);
1932 	rte_spinlock_init(&enic->memzone_list_lock);
1933 
1934 	vnic_register_cbacks(enic->vdev,
1935 		enic_alloc_consistent,
1936 		enic_free_consistent);
1937 
1938 	/*
1939 	 * Allocate the consistent memory for stats upfront so both primary and
1940 	 * secondary processes can dump stats.
1941 	 */
1942 	err = vnic_dev_alloc_stats_mem(enic->vdev);
1943 	if (err) {
1944 		dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1945 		goto err_out_unregister;
1946 	}
1947 	/* Issue device open to get device in known state */
1948 	err = enic_dev_open(enic);
1949 	if (err) {
1950 		dev_err(enic, "vNIC dev open failed, aborting\n");
1951 		goto err_out_unregister;
1952 	}
1953 
1954 	/* Set ingress vlan rewrite mode before vnic initialization */
1955 	dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1956 		  enic->ig_vlan_rewrite_mode);
1957 	err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1958 		enic->ig_vlan_rewrite_mode);
1959 	if (err) {
1960 		dev_err(enic,
1961 			"Failed to set ingress vlan rewrite mode, aborting.\n");
1962 		goto err_out_dev_close;
1963 	}
1964 
1965 	/* Issue device init to initialize the vnic-to-switch link.
1966 	 * We'll start with carrier off and wait for link UP
1967 	 * notification later to turn on carrier.  We don't need
1968 	 * to wait here for the vnic-to-switch link initialization
1969 	 * to complete; link UP notification is the indication that
1970 	 * the process is complete.
1971 	 */
1972 
1973 	err = vnic_dev_init(enic->vdev, 0);
1974 	if (err) {
1975 		dev_err(enic, "vNIC dev init failed, aborting\n");
1976 		goto err_out_dev_close;
1977 	}
1978 
1979 	err = enic_dev_init(enic);
1980 	if (err) {
1981 		dev_err(enic, "Device initialization failed, aborting\n");
1982 		goto err_out_dev_close;
1983 	}
1984 
1985 	/* Use a PF spinlock to serialize devcmd from PF and VF representors */
1986 	if (enic->switchdev_mode) {
1987 		rte_spinlock_init(&enic->devcmd_lock);
1988 		vnic_register_lock(enic->vdev, lock_devcmd, unlock_devcmd);
1989 	}
1990 	return 0;
1991 
1992 err_out_dev_close:
1993 	vnic_dev_close(enic->vdev);
1994 err_out_unregister:
1995 	vnic_dev_unregister(enic->vdev);
1996 err_out:
1997 	return err;
1998 }
1999 
2000 void enic_remove(struct enic *enic)
2001 {
2002 	enic_dev_deinit(enic);
2003 	vnic_dev_close(enic->vdev);
2004 	vnic_dev_unregister(enic->vdev);
2005 }
2006