xref: /dpdk/drivers/net/enic/enic_main.c (revision db4e81351fb85ff623bd0438d1b5a8fb55fe9fee)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5 
6 #include <stdio.h>
7 
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 
12 #include <rte_pci.h>
13 #include <rte_bus_pci.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_string_fns.h>
18 #include <rte_ethdev_driver.h>
19 
20 #include "enic_compat.h"
21 #include "enic.h"
22 #include "wq_enet_desc.h"
23 #include "rq_enet_desc.h"
24 #include "cq_enet_desc.h"
25 #include "vnic_enet.h"
26 #include "vnic_dev.h"
27 #include "vnic_wq.h"
28 #include "vnic_rq.h"
29 #include "vnic_cq.h"
30 #include "vnic_intr.h"
31 #include "vnic_nic.h"
32 
33 static inline int enic_is_sriov_vf(struct enic *enic)
34 {
35 	return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
36 }
37 
38 static int is_zero_addr(uint8_t *addr)
39 {
40 	return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
41 }
42 
43 static int is_mcast_addr(uint8_t *addr)
44 {
45 	return addr[0] & 1;
46 }
47 
48 static int is_eth_addr_valid(uint8_t *addr)
49 {
50 	return !is_mcast_addr(addr) && !is_zero_addr(addr);
51 }
52 
53 static void
54 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
55 {
56 	uint16_t i;
57 
58 	if (!rq || !rq->mbuf_ring) {
59 		dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
60 		return;
61 	}
62 
63 	for (i = 0; i < rq->ring.desc_count; i++) {
64 		if (rq->mbuf_ring[i]) {
65 			rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
66 			rq->mbuf_ring[i] = NULL;
67 		}
68 	}
69 }
70 
71 static void enic_free_wq_buf(struct rte_mbuf **buf)
72 {
73 	struct rte_mbuf *mbuf = *buf;
74 
75 	rte_pktmbuf_free_seg(mbuf);
76 	*buf = NULL;
77 }
78 
79 static void enic_log_q_error(struct enic *enic)
80 {
81 	unsigned int i;
82 	uint32_t error_status;
83 
84 	for (i = 0; i < enic->wq_count; i++) {
85 		error_status = vnic_wq_error_status(&enic->wq[i]);
86 		if (error_status)
87 			dev_err(enic, "WQ[%d] error_status %d\n", i,
88 				error_status);
89 	}
90 
91 	for (i = 0; i < enic_vnic_rq_count(enic); i++) {
92 		if (!enic->rq[i].in_use)
93 			continue;
94 		error_status = vnic_rq_error_status(&enic->rq[i]);
95 		if (error_status)
96 			dev_err(enic, "RQ[%d] error_status %d\n", i,
97 				error_status);
98 	}
99 }
100 
101 static void enic_clear_soft_stats(struct enic *enic)
102 {
103 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
104 	rte_atomic64_clear(&soft_stats->rx_nombuf);
105 	rte_atomic64_clear(&soft_stats->rx_packet_errors);
106 	rte_atomic64_clear(&soft_stats->tx_oversized);
107 }
108 
109 static void enic_init_soft_stats(struct enic *enic)
110 {
111 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
112 	rte_atomic64_init(&soft_stats->rx_nombuf);
113 	rte_atomic64_init(&soft_stats->rx_packet_errors);
114 	rte_atomic64_init(&soft_stats->tx_oversized);
115 	enic_clear_soft_stats(enic);
116 }
117 
118 int enic_dev_stats_clear(struct enic *enic)
119 {
120 	int ret;
121 
122 	ret = vnic_dev_stats_clear(enic->vdev);
123 	if (ret != 0) {
124 		dev_err(enic, "Error in clearing stats\n");
125 		return ret;
126 	}
127 	enic_clear_soft_stats(enic);
128 
129 	return 0;
130 }
131 
132 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
133 {
134 	struct vnic_stats *stats;
135 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
136 	int64_t rx_truncated;
137 	uint64_t rx_packet_errors;
138 	int ret = vnic_dev_stats_dump(enic->vdev, &stats);
139 
140 	if (ret) {
141 		dev_err(enic, "Error in getting stats\n");
142 		return ret;
143 	}
144 
145 	/* The number of truncated packets can only be calculated by
146 	 * subtracting a hardware counter from error packets received by
147 	 * the driver. Note: this causes transient inaccuracies in the
148 	 * ipackets count. Also, the length of truncated packets are
149 	 * counted in ibytes even though truncated packets are dropped
150 	 * which can make ibytes be slightly higher than it should be.
151 	 */
152 	rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
153 	rx_truncated = rx_packet_errors - stats->rx.rx_errors;
154 
155 	r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
156 	r_stats->opackets = stats->tx.tx_frames_ok;
157 
158 	r_stats->ibytes = stats->rx.rx_bytes_ok;
159 	r_stats->obytes = stats->tx.tx_bytes_ok;
160 
161 	r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
162 	r_stats->oerrors = stats->tx.tx_errors
163 			   + rte_atomic64_read(&soft_stats->tx_oversized);
164 
165 	r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
166 
167 	r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
168 	return 0;
169 }
170 
171 int enic_del_mac_address(struct enic *enic, int mac_index)
172 {
173 	struct rte_eth_dev *eth_dev = enic->rte_dev;
174 	uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
175 
176 	return vnic_dev_del_addr(enic->vdev, mac_addr);
177 }
178 
179 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
180 {
181 	int err;
182 
183 	if (!is_eth_addr_valid(mac_addr)) {
184 		dev_err(enic, "invalid mac address\n");
185 		return -EINVAL;
186 	}
187 
188 	err = vnic_dev_add_addr(enic->vdev, mac_addr);
189 	if (err)
190 		dev_err(enic, "add mac addr failed\n");
191 	return err;
192 }
193 
194 static void
195 enic_free_rq_buf(struct rte_mbuf **mbuf)
196 {
197 	if (*mbuf == NULL)
198 		return;
199 
200 	rte_pktmbuf_free(*mbuf);
201 	*mbuf = NULL;
202 }
203 
204 void enic_init_vnic_resources(struct enic *enic)
205 {
206 	unsigned int error_interrupt_enable = 1;
207 	unsigned int error_interrupt_offset = 0;
208 	unsigned int rxq_interrupt_enable = 0;
209 	unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
210 	unsigned int index = 0;
211 	unsigned int cq_idx;
212 	struct vnic_rq *data_rq;
213 
214 	if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
215 		rxq_interrupt_enable = 1;
216 
217 	for (index = 0; index < enic->rq_count; index++) {
218 		cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
219 
220 		vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
221 			cq_idx,
222 			error_interrupt_enable,
223 			error_interrupt_offset);
224 
225 		data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)];
226 		if (data_rq->in_use)
227 			vnic_rq_init(data_rq,
228 				     cq_idx,
229 				     error_interrupt_enable,
230 				     error_interrupt_offset);
231 		vnic_cq_init(&enic->cq[cq_idx],
232 			0 /* flow_control_enable */,
233 			1 /* color_enable */,
234 			0 /* cq_head */,
235 			0 /* cq_tail */,
236 			1 /* cq_tail_color */,
237 			rxq_interrupt_enable,
238 			1 /* cq_entry_enable */,
239 			0 /* cq_message_enable */,
240 			rxq_interrupt_offset,
241 			0 /* cq_message_addr */);
242 		if (rxq_interrupt_enable)
243 			rxq_interrupt_offset++;
244 	}
245 
246 	for (index = 0; index < enic->wq_count; index++) {
247 		vnic_wq_init(&enic->wq[index],
248 			enic_cq_wq(enic, index),
249 			error_interrupt_enable,
250 			error_interrupt_offset);
251 		/* Compute unsupported ol flags for enic_prep_pkts() */
252 		enic->wq[index].tx_offload_notsup_mask =
253 			PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
254 
255 		cq_idx = enic_cq_wq(enic, index);
256 		vnic_cq_init(&enic->cq[cq_idx],
257 			0 /* flow_control_enable */,
258 			1 /* color_enable */,
259 			0 /* cq_head */,
260 			0 /* cq_tail */,
261 			1 /* cq_tail_color */,
262 			0 /* interrupt_enable */,
263 			0 /* cq_entry_enable */,
264 			1 /* cq_message_enable */,
265 			0 /* interrupt offset */,
266 			(uint64_t)enic->wq[index].cqmsg_rz->iova);
267 	}
268 
269 	for (index = 0; index < enic->intr_count; index++) {
270 		vnic_intr_init(&enic->intr[index],
271 			       enic->config.intr_timer_usec,
272 			       enic->config.intr_timer_type,
273 			       /*mask_on_assertion*/1);
274 	}
275 }
276 
277 
278 static int
279 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
280 {
281 	struct rte_mbuf *mb;
282 	struct rq_enet_desc *rqd = rq->ring.descs;
283 	unsigned i;
284 	dma_addr_t dma_addr;
285 	uint32_t max_rx_pkt_len;
286 	uint16_t rq_buf_len;
287 
288 	if (!rq->in_use)
289 		return 0;
290 
291 	dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
292 		  rq->ring.desc_count);
293 
294 	/*
295 	 * If *not* using scatter and the mbuf size is greater than the
296 	 * requested max packet size (max_rx_pkt_len), then reduce the
297 	 * posted buffer size to max_rx_pkt_len. HW still receives packets
298 	 * larger than max_rx_pkt_len, but they will be truncated, which we
299 	 * drop in the rx handler. Not ideal, but better than returning
300 	 * large packets when the user is not expecting them.
301 	 */
302 	max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
303 	rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
304 	if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
305 		rq_buf_len = max_rx_pkt_len;
306 	for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
307 		mb = rte_mbuf_raw_alloc(rq->mp);
308 		if (mb == NULL) {
309 			dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
310 			(unsigned)rq->index);
311 			return -ENOMEM;
312 		}
313 
314 		mb->data_off = RTE_PKTMBUF_HEADROOM;
315 		dma_addr = (dma_addr_t)(mb->buf_iova
316 			   + RTE_PKTMBUF_HEADROOM);
317 		rq_enet_desc_enc(rqd, dma_addr,
318 				(rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
319 				: RQ_ENET_TYPE_NOT_SOP),
320 				rq_buf_len);
321 		rq->mbuf_ring[i] = mb;
322 	}
323 	/*
324 	 * Do not post the buffers to the NIC until we enable the RQ via
325 	 * enic_start_rq().
326 	 */
327 	rq->need_initial_post = true;
328 	/* Initialize fetch index while RQ is disabled */
329 	iowrite32(0, &rq->ctrl->fetch_index);
330 	return 0;
331 }
332 
333 /*
334  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
335  * allocated the buffers and filled the RQ descriptor ring. Just need to push
336  * the post index to the NIC.
337  */
338 static void
339 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
340 {
341 	if (!rq->in_use || !rq->need_initial_post)
342 		return;
343 
344 	/* make sure all prior writes are complete before doing the PIO write */
345 	rte_rmb();
346 
347 	/* Post all but the last buffer to VIC. */
348 	rq->posted_index = rq->ring.desc_count - 1;
349 
350 	rq->rx_nb_hold = 0;
351 
352 	dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
353 		enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
354 	iowrite32(rq->posted_index, &rq->ctrl->posted_index);
355 	rte_rmb();
356 	rq->need_initial_post = false;
357 }
358 
359 void *
360 enic_alloc_consistent(void *priv, size_t size,
361 	dma_addr_t *dma_handle, uint8_t *name)
362 {
363 	void *vaddr;
364 	const struct rte_memzone *rz;
365 	*dma_handle = 0;
366 	struct enic *enic = (struct enic *)priv;
367 	struct enic_memzone_entry *mze;
368 
369 	rz = rte_memzone_reserve_aligned((const char *)name, size,
370 			SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
371 	if (!rz) {
372 		pr_err("%s : Failed to allocate memory requested for %s\n",
373 			__func__, name);
374 		return NULL;
375 	}
376 
377 	vaddr = rz->addr;
378 	*dma_handle = (dma_addr_t)rz->iova;
379 
380 	mze = rte_malloc("enic memzone entry",
381 			 sizeof(struct enic_memzone_entry), 0);
382 
383 	if (!mze) {
384 		pr_err("%s : Failed to allocate memory for memzone list\n",
385 		       __func__);
386 		rte_memzone_free(rz);
387 		return NULL;
388 	}
389 
390 	mze->rz = rz;
391 
392 	rte_spinlock_lock(&enic->memzone_list_lock);
393 	LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
394 	rte_spinlock_unlock(&enic->memzone_list_lock);
395 
396 	return vaddr;
397 }
398 
399 void
400 enic_free_consistent(void *priv,
401 		     __rte_unused size_t size,
402 		     void *vaddr,
403 		     dma_addr_t dma_handle)
404 {
405 	struct enic_memzone_entry *mze;
406 	struct enic *enic = (struct enic *)priv;
407 
408 	rte_spinlock_lock(&enic->memzone_list_lock);
409 	LIST_FOREACH(mze, &enic->memzone_list, entries) {
410 		if (mze->rz->addr == vaddr &&
411 		    mze->rz->iova == dma_handle)
412 			break;
413 	}
414 	if (mze == NULL) {
415 		rte_spinlock_unlock(&enic->memzone_list_lock);
416 		dev_warning(enic,
417 			    "Tried to free memory, but couldn't find it in the memzone list\n");
418 		return;
419 	}
420 	LIST_REMOVE(mze, entries);
421 	rte_spinlock_unlock(&enic->memzone_list_lock);
422 	rte_memzone_free(mze->rz);
423 	rte_free(mze);
424 }
425 
426 int enic_link_update(struct rte_eth_dev *eth_dev)
427 {
428 	struct enic *enic = pmd_priv(eth_dev);
429 	struct rte_eth_link link;
430 
431 	memset(&link, 0, sizeof(link));
432 	link.link_status = enic_get_link_status(enic);
433 	link.link_duplex = ETH_LINK_FULL_DUPLEX;
434 	link.link_speed = vnic_dev_port_speed(enic->vdev);
435 
436 	return rte_eth_linkstatus_set(eth_dev, &link);
437 }
438 
439 static void
440 enic_intr_handler(void *arg)
441 {
442 	struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
443 	struct enic *enic = pmd_priv(dev);
444 
445 	vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
446 
447 	enic_link_update(dev);
448 	rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
449 	enic_log_q_error(enic);
450 	/* Re-enable irq in case of INTx */
451 	rte_intr_ack(&enic->pdev->intr_handle);
452 }
453 
454 static int enic_rxq_intr_init(struct enic *enic)
455 {
456 	struct rte_intr_handle *intr_handle;
457 	uint32_t rxq_intr_count, i;
458 	int err;
459 
460 	intr_handle = enic->rte_dev->intr_handle;
461 	if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
462 		return 0;
463 	/*
464 	 * Rx queue interrupts only work when we have MSI-X interrupts,
465 	 * one per queue. Sharing one interrupt is technically
466 	 * possible with VIC, but it is not worth the complications it brings.
467 	 */
468 	if (!rte_intr_cap_multiple(intr_handle)) {
469 		dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
470 			" (vfio-pci driver)\n");
471 		return -ENOTSUP;
472 	}
473 	rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
474 	err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
475 	if (err) {
476 		dev_err(enic, "Failed to enable event fds for Rx queue"
477 			" interrupts\n");
478 		return err;
479 	}
480 	intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
481 					    rxq_intr_count * sizeof(int), 0);
482 	if (intr_handle->intr_vec == NULL) {
483 		dev_err(enic, "Failed to allocate intr_vec\n");
484 		return -ENOMEM;
485 	}
486 	for (i = 0; i < rxq_intr_count; i++)
487 		intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
488 	return 0;
489 }
490 
491 static void enic_rxq_intr_deinit(struct enic *enic)
492 {
493 	struct rte_intr_handle *intr_handle;
494 
495 	intr_handle = enic->rte_dev->intr_handle;
496 	rte_intr_efd_disable(intr_handle);
497 	if (intr_handle->intr_vec != NULL) {
498 		rte_free(intr_handle->intr_vec);
499 		intr_handle->intr_vec = NULL;
500 	}
501 }
502 
503 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
504 {
505 	struct wq_enet_desc *desc;
506 	struct vnic_wq *wq;
507 	unsigned int i;
508 
509 	/*
510 	 * Fill WQ descriptor fields that never change. Every descriptor is
511 	 * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
512 	 * descriptors (i.e. request one completion update every 32 packets).
513 	 */
514 	wq = &enic->wq[queue_idx];
515 	desc = (struct wq_enet_desc *)wq->ring.descs;
516 	for (i = 0; i < wq->ring.desc_count; i++, desc++) {
517 		desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
518 		if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
519 			desc->header_length_flags |=
520 				(1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
521 	}
522 }
523 
524 /*
525  * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
526  * used when that file is not compiled.
527  */
528 __rte_weak bool
529 enic_use_vector_rx_handler(__rte_unused struct rte_eth_dev *eth_dev)
530 {
531 	return false;
532 }
533 
534 void enic_pick_rx_handler(struct rte_eth_dev *eth_dev)
535 {
536 	struct enic *enic = pmd_priv(eth_dev);
537 
538 	/*
539 	 * Preference order:
540 	 * 1. The vectorized handler if possible and requested.
541 	 * 2. The non-scatter, simplified handler if scatter Rx is not used.
542 	 * 3. The default handler as a fallback.
543 	 */
544 	if (enic_use_vector_rx_handler(eth_dev))
545 		return;
546 	if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
547 		ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler");
548 		eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
549 	} else {
550 		ENICPMD_LOG(DEBUG, " use the normal Rx handler");
551 		eth_dev->rx_pkt_burst = &enic_recv_pkts;
552 	}
553 }
554 
555 /* Secondary process uses this to set the Tx handler */
556 void enic_pick_tx_handler(struct rte_eth_dev *eth_dev)
557 {
558 	struct enic *enic = pmd_priv(eth_dev);
559 
560 	if (enic->use_simple_tx_handler) {
561 		ENICPMD_LOG(DEBUG, " use the simple tx handler");
562 		eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
563 	} else {
564 		ENICPMD_LOG(DEBUG, " use the default tx handler");
565 		eth_dev->tx_pkt_burst = &enic_xmit_pkts;
566 	}
567 }
568 
569 int enic_enable(struct enic *enic)
570 {
571 	unsigned int index;
572 	int err;
573 	struct rte_eth_dev *eth_dev = enic->rte_dev;
574 	uint64_t simple_tx_offloads;
575 	uintptr_t p;
576 
577 	if (enic->enable_avx2_rx) {
578 		struct rte_mbuf mb_def = { .buf_addr = 0 };
579 
580 		/*
581 		 * mbuf_initializer contains const-after-init fields of
582 		 * receive mbufs (i.e. 64 bits of fields from rearm_data).
583 		 * It is currently used by the vectorized handler.
584 		 */
585 		mb_def.nb_segs = 1;
586 		mb_def.data_off = RTE_PKTMBUF_HEADROOM;
587 		mb_def.port = enic->port_id;
588 		rte_mbuf_refcnt_set(&mb_def, 1);
589 		rte_compiler_barrier();
590 		p = (uintptr_t)&mb_def.rearm_data;
591 		enic->mbuf_initializer = *(uint64_t *)p;
592 	}
593 
594 	eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
595 	eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
596 
597 	/* vnic notification of link status has already been turned on in
598 	 * enic_dev_init() which is called during probe time.  Here we are
599 	 * just turning on interrupt vector 0 if needed.
600 	 */
601 	if (eth_dev->data->dev_conf.intr_conf.lsc)
602 		vnic_dev_notify_set(enic->vdev, 0);
603 
604 	err = enic_rxq_intr_init(enic);
605 	if (err)
606 		return err;
607 	if (enic_clsf_init(enic))
608 		dev_warning(enic, "Init of hash table for clsf failed."\
609 			"Flow director feature will not work\n");
610 
611 	if (enic_fm_init(enic))
612 		dev_warning(enic, "Init of flowman failed.\n");
613 
614 	for (index = 0; index < enic->rq_count; index++) {
615 		err = enic_alloc_rx_queue_mbufs(enic,
616 			&enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
617 		if (err) {
618 			dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
619 			return err;
620 		}
621 		err = enic_alloc_rx_queue_mbufs(enic,
622 			&enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)]);
623 		if (err) {
624 			/* release the allocated mbufs for the sop rq*/
625 			enic_rxmbuf_queue_release(enic,
626 				&enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
627 
628 			dev_err(enic, "Failed to alloc data RX queue mbufs\n");
629 			return err;
630 		}
631 	}
632 
633 	/*
634 	 * Use the simple TX handler if possible. Only checksum offloads
635 	 * and vlan insertion are supported.
636 	 */
637 	simple_tx_offloads = enic->tx_offload_capa &
638 		(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
639 		 DEV_TX_OFFLOAD_VLAN_INSERT |
640 		 DEV_TX_OFFLOAD_IPV4_CKSUM |
641 		 DEV_TX_OFFLOAD_UDP_CKSUM |
642 		 DEV_TX_OFFLOAD_TCP_CKSUM);
643 	if ((eth_dev->data->dev_conf.txmode.offloads &
644 	     ~simple_tx_offloads) == 0) {
645 		ENICPMD_LOG(DEBUG, " use the simple tx handler");
646 		eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
647 		for (index = 0; index < enic->wq_count; index++)
648 			enic_prep_wq_for_simple_tx(enic, index);
649 		enic->use_simple_tx_handler = 1;
650 	} else {
651 		ENICPMD_LOG(DEBUG, " use the default tx handler");
652 		eth_dev->tx_pkt_burst = &enic_xmit_pkts;
653 	}
654 
655 	enic_pick_rx_handler(eth_dev);
656 
657 	for (index = 0; index < enic->wq_count; index++)
658 		enic_start_wq(enic, index);
659 	for (index = 0; index < enic->rq_count; index++)
660 		enic_start_rq(enic, index);
661 
662 	vnic_dev_add_addr(enic->vdev, enic->mac_addr);
663 
664 	vnic_dev_enable_wait(enic->vdev);
665 
666 	/* Register and enable error interrupt */
667 	rte_intr_callback_register(&(enic->pdev->intr_handle),
668 		enic_intr_handler, (void *)enic->rte_dev);
669 
670 	rte_intr_enable(&(enic->pdev->intr_handle));
671 	/* Unmask LSC interrupt */
672 	vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
673 
674 	return 0;
675 }
676 
677 int enic_alloc_intr_resources(struct enic *enic)
678 {
679 	int err;
680 	unsigned int i;
681 
682 	dev_info(enic, "vNIC resources used:  "\
683 		"wq %d rq %d cq %d intr %d\n",
684 		enic->wq_count, enic_vnic_rq_count(enic),
685 		enic->cq_count, enic->intr_count);
686 
687 	for (i = 0; i < enic->intr_count; i++) {
688 		err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
689 		if (err) {
690 			enic_free_vnic_resources(enic);
691 			return err;
692 		}
693 	}
694 	return 0;
695 }
696 
697 void enic_free_rq(void *rxq)
698 {
699 	struct vnic_rq *rq_sop, *rq_data;
700 	struct enic *enic;
701 
702 	if (rxq == NULL)
703 		return;
704 
705 	rq_sop = (struct vnic_rq *)rxq;
706 	enic = vnic_dev_priv(rq_sop->vdev);
707 	rq_data = &enic->rq[rq_sop->data_queue_idx];
708 
709 	if (rq_sop->free_mbufs) {
710 		struct rte_mbuf **mb;
711 		int i;
712 
713 		mb = rq_sop->free_mbufs;
714 		for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
715 		     i < ENIC_RX_BURST_MAX; i++)
716 			rte_pktmbuf_free(mb[i]);
717 		rte_free(rq_sop->free_mbufs);
718 		rq_sop->free_mbufs = NULL;
719 		rq_sop->num_free_mbufs = 0;
720 	}
721 
722 	enic_rxmbuf_queue_release(enic, rq_sop);
723 	if (rq_data->in_use)
724 		enic_rxmbuf_queue_release(enic, rq_data);
725 
726 	rte_free(rq_sop->mbuf_ring);
727 	if (rq_data->in_use)
728 		rte_free(rq_data->mbuf_ring);
729 
730 	rq_sop->mbuf_ring = NULL;
731 	rq_data->mbuf_ring = NULL;
732 
733 	vnic_rq_free(rq_sop);
734 	if (rq_data->in_use)
735 		vnic_rq_free(rq_data);
736 
737 	vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
738 
739 	rq_sop->in_use = 0;
740 	rq_data->in_use = 0;
741 }
742 
743 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
744 {
745 	struct rte_eth_dev_data *data = enic->dev_data;
746 	vnic_wq_enable(&enic->wq[queue_idx]);
747 	data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
748 }
749 
750 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
751 {
752 	struct rte_eth_dev_data *data = enic->dev_data;
753 	int ret;
754 
755 	ret = vnic_wq_disable(&enic->wq[queue_idx]);
756 	if (ret)
757 		return ret;
758 
759 	data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
760 	return 0;
761 }
762 
763 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
764 {
765 	struct rte_eth_dev_data *data = enic->dev_data;
766 	struct vnic_rq *rq_sop;
767 	struct vnic_rq *rq_data;
768 	rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
769 	rq_data = &enic->rq[rq_sop->data_queue_idx];
770 
771 	if (rq_data->in_use) {
772 		vnic_rq_enable(rq_data);
773 		enic_initial_post_rx(enic, rq_data);
774 	}
775 	rte_mb();
776 	vnic_rq_enable(rq_sop);
777 	enic_initial_post_rx(enic, rq_sop);
778 	data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
779 }
780 
781 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
782 {
783 	struct rte_eth_dev_data *data = enic->dev_data;
784 	int ret1 = 0, ret2 = 0;
785 	struct vnic_rq *rq_sop;
786 	struct vnic_rq *rq_data;
787 	rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
788 	rq_data = &enic->rq[rq_sop->data_queue_idx];
789 
790 	ret2 = vnic_rq_disable(rq_sop);
791 	rte_mb();
792 	if (rq_data->in_use)
793 		ret1 = vnic_rq_disable(rq_data);
794 
795 	if (ret2)
796 		return ret2;
797 	else if (ret1)
798 		return ret1;
799 
800 	data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
801 	return 0;
802 }
803 
804 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
805 	unsigned int socket_id, struct rte_mempool *mp,
806 	uint16_t nb_desc, uint16_t free_thresh)
807 {
808 	int rc;
809 	uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
810 	uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx, enic);
811 	struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
812 	struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
813 	unsigned int mbuf_size, mbufs_per_pkt;
814 	unsigned int nb_sop_desc, nb_data_desc;
815 	uint16_t min_sop, max_sop, min_data, max_data;
816 	uint32_t max_rx_pkt_len;
817 
818 	rq_sop->is_sop = 1;
819 	rq_sop->data_queue_idx = data_queue_idx;
820 	rq_data->is_sop = 0;
821 	rq_data->data_queue_idx = 0;
822 	rq_sop->socket_id = socket_id;
823 	rq_sop->mp = mp;
824 	rq_data->socket_id = socket_id;
825 	rq_data->mp = mp;
826 	rq_sop->in_use = 1;
827 	rq_sop->rx_free_thresh = free_thresh;
828 	rq_data->rx_free_thresh = free_thresh;
829 	dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
830 		  free_thresh);
831 
832 	mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
833 			       RTE_PKTMBUF_HEADROOM);
834 	/* max_rx_pkt_len includes the ethernet header and CRC. */
835 	max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
836 
837 	if (enic->rte_dev->data->dev_conf.rxmode.offloads &
838 	    DEV_RX_OFFLOAD_SCATTER) {
839 		dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
840 		/* ceil((max pkt len)/mbuf_size) */
841 		mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
842 	} else {
843 		dev_info(enic, "Scatter rx mode disabled\n");
844 		mbufs_per_pkt = 1;
845 		if (max_rx_pkt_len > mbuf_size) {
846 			dev_warning(enic, "The maximum Rx packet size (%u) is"
847 				    " larger than the mbuf size (%u), and"
848 				    " scatter is disabled. Larger packets will"
849 				    " be truncated.\n",
850 				    max_rx_pkt_len, mbuf_size);
851 		}
852 	}
853 
854 	if (mbufs_per_pkt > 1) {
855 		dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
856 		rq_sop->data_queue_enable = 1;
857 		rq_data->in_use = 1;
858 		/*
859 		 * HW does not directly support rxmode.max_rx_pkt_len. HW always
860 		 * receives packet sizes up to the "max" MTU.
861 		 * If not using scatter, we can achieve the effect of dropping
862 		 * larger packets by reducing the size of posted buffers.
863 		 * See enic_alloc_rx_queue_mbufs().
864 		 */
865 		if (max_rx_pkt_len <
866 		    enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
867 			dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
868 				    " when scatter rx mode is in use.\n");
869 		}
870 	} else {
871 		dev_info(enic, "Rq %u Scatter rx mode not being used\n",
872 			 queue_idx);
873 		rq_sop->data_queue_enable = 0;
874 		rq_data->in_use = 0;
875 	}
876 
877 	/* number of descriptors have to be a multiple of 32 */
878 	nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
879 	nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
880 
881 	rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
882 	rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
883 
884 	if (mbufs_per_pkt > 1) {
885 		min_sop = ENIC_RX_BURST_MAX;
886 		max_sop = ((enic->config.rq_desc_count /
887 			    (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
888 		min_data = min_sop * (mbufs_per_pkt - 1);
889 		max_data = enic->config.rq_desc_count;
890 	} else {
891 		min_sop = ENIC_RX_BURST_MAX;
892 		max_sop = enic->config.rq_desc_count;
893 		min_data = 0;
894 		max_data = 0;
895 	}
896 
897 	if (nb_desc < (min_sop + min_data)) {
898 		dev_warning(enic,
899 			    "Number of rx descs too low, adjusting to minimum\n");
900 		nb_sop_desc = min_sop;
901 		nb_data_desc = min_data;
902 	} else if (nb_desc > (max_sop + max_data)) {
903 		dev_warning(enic,
904 			    "Number of rx_descs too high, adjusting to maximum\n");
905 		nb_sop_desc = max_sop;
906 		nb_data_desc = max_data;
907 	}
908 	if (mbufs_per_pkt > 1) {
909 		dev_info(enic, "For max packet size %u and mbuf size %u valid"
910 			 " rx descriptor range is %u to %u\n",
911 			 max_rx_pkt_len, mbuf_size, min_sop + min_data,
912 			 max_sop + max_data);
913 	}
914 	dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
915 		 nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
916 
917 	/* Allocate sop queue resources */
918 	rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
919 		nb_sop_desc, sizeof(struct rq_enet_desc));
920 	if (rc) {
921 		dev_err(enic, "error in allocation of sop rq\n");
922 		goto err_exit;
923 	}
924 	nb_sop_desc = rq_sop->ring.desc_count;
925 
926 	if (rq_data->in_use) {
927 		/* Allocate data queue resources */
928 		rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
929 				   nb_data_desc,
930 				   sizeof(struct rq_enet_desc));
931 		if (rc) {
932 			dev_err(enic, "error in allocation of data rq\n");
933 			goto err_free_rq_sop;
934 		}
935 		nb_data_desc = rq_data->ring.desc_count;
936 	}
937 	rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
938 			   socket_id, nb_sop_desc + nb_data_desc,
939 			   sizeof(struct cq_enet_rq_desc));
940 	if (rc) {
941 		dev_err(enic, "error in allocation of cq for rq\n");
942 		goto err_free_rq_data;
943 	}
944 
945 	/* Allocate the mbuf rings */
946 	rq_sop->mbuf_ring = (struct rte_mbuf **)
947 		rte_zmalloc_socket("rq->mbuf_ring",
948 				   sizeof(struct rte_mbuf *) * nb_sop_desc,
949 				   RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
950 	if (rq_sop->mbuf_ring == NULL)
951 		goto err_free_cq;
952 
953 	if (rq_data->in_use) {
954 		rq_data->mbuf_ring = (struct rte_mbuf **)
955 			rte_zmalloc_socket("rq->mbuf_ring",
956 				sizeof(struct rte_mbuf *) * nb_data_desc,
957 				RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
958 		if (rq_data->mbuf_ring == NULL)
959 			goto err_free_sop_mbuf;
960 	}
961 
962 	rq_sop->free_mbufs = (struct rte_mbuf **)
963 		rte_zmalloc_socket("rq->free_mbufs",
964 				   sizeof(struct rte_mbuf *) *
965 				   ENIC_RX_BURST_MAX,
966 				   RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
967 	if (rq_sop->free_mbufs == NULL)
968 		goto err_free_data_mbuf;
969 	rq_sop->num_free_mbufs = 0;
970 
971 	rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
972 
973 	return 0;
974 
975 err_free_data_mbuf:
976 	rte_free(rq_data->mbuf_ring);
977 err_free_sop_mbuf:
978 	rte_free(rq_sop->mbuf_ring);
979 err_free_cq:
980 	/* cleanup on error */
981 	vnic_cq_free(&enic->cq[queue_idx]);
982 err_free_rq_data:
983 	if (rq_data->in_use)
984 		vnic_rq_free(rq_data);
985 err_free_rq_sop:
986 	vnic_rq_free(rq_sop);
987 err_exit:
988 	return -ENOMEM;
989 }
990 
991 void enic_free_wq(void *txq)
992 {
993 	struct vnic_wq *wq;
994 	struct enic *enic;
995 
996 	if (txq == NULL)
997 		return;
998 
999 	wq = (struct vnic_wq *)txq;
1000 	enic = vnic_dev_priv(wq->vdev);
1001 	rte_memzone_free(wq->cqmsg_rz);
1002 	vnic_wq_free(wq);
1003 	vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
1004 }
1005 
1006 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
1007 	unsigned int socket_id, uint16_t nb_desc)
1008 {
1009 	int err;
1010 	struct vnic_wq *wq = &enic->wq[queue_idx];
1011 	unsigned int cq_index = enic_cq_wq(enic, queue_idx);
1012 	char name[RTE_MEMZONE_NAMESIZE];
1013 	static int instance;
1014 
1015 	wq->socket_id = socket_id;
1016 	/*
1017 	 * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
1018 	 * print an info message for diagnostics.
1019 	 */
1020 	dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
1021 
1022 	/* Allocate queue resources */
1023 	err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
1024 		nb_desc,
1025 		sizeof(struct wq_enet_desc));
1026 	if (err) {
1027 		dev_err(enic, "error in allocation of wq\n");
1028 		return err;
1029 	}
1030 
1031 	err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
1032 		socket_id, nb_desc,
1033 		sizeof(struct cq_enet_wq_desc));
1034 	if (err) {
1035 		vnic_wq_free(wq);
1036 		dev_err(enic, "error in allocation of cq for wq\n");
1037 	}
1038 
1039 	/* setup up CQ message */
1040 	snprintf((char *)name, sizeof(name),
1041 		 "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
1042 		instance++);
1043 
1044 	wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
1045 			sizeof(uint32_t), SOCKET_ID_ANY,
1046 			RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
1047 	if (!wq->cqmsg_rz)
1048 		return -ENOMEM;
1049 
1050 	return err;
1051 }
1052 
1053 int enic_disable(struct enic *enic)
1054 {
1055 	unsigned int i;
1056 	int err;
1057 
1058 	for (i = 0; i < enic->intr_count; i++) {
1059 		vnic_intr_mask(&enic->intr[i]);
1060 		(void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1061 	}
1062 	enic_rxq_intr_deinit(enic);
1063 	rte_intr_disable(&enic->pdev->intr_handle);
1064 	rte_intr_callback_unregister(&enic->pdev->intr_handle,
1065 				     enic_intr_handler,
1066 				     (void *)enic->rte_dev);
1067 
1068 	vnic_dev_disable(enic->vdev);
1069 
1070 	enic_clsf_destroy(enic);
1071 	enic_fm_destroy(enic);
1072 
1073 	if (!enic_is_sriov_vf(enic))
1074 		vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1075 
1076 	for (i = 0; i < enic->wq_count; i++) {
1077 		err = vnic_wq_disable(&enic->wq[i]);
1078 		if (err)
1079 			return err;
1080 	}
1081 	for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1082 		if (enic->rq[i].in_use) {
1083 			err = vnic_rq_disable(&enic->rq[i]);
1084 			if (err)
1085 				return err;
1086 		}
1087 	}
1088 
1089 	/* If we were using interrupts, set the interrupt vector to -1
1090 	 * to disable interrupts.  We are not disabling link notifcations,
1091 	 * though, as we want the polling of link status to continue working.
1092 	 */
1093 	if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1094 		vnic_dev_notify_set(enic->vdev, -1);
1095 
1096 	vnic_dev_set_reset_flag(enic->vdev, 1);
1097 
1098 	for (i = 0; i < enic->wq_count; i++)
1099 		vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1100 
1101 	for (i = 0; i < enic_vnic_rq_count(enic); i++)
1102 		if (enic->rq[i].in_use)
1103 			vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1104 	for (i = 0; i < enic->cq_count; i++)
1105 		vnic_cq_clean(&enic->cq[i]);
1106 	for (i = 0; i < enic->intr_count; i++)
1107 		vnic_intr_clean(&enic->intr[i]);
1108 
1109 	return 0;
1110 }
1111 
1112 static int enic_dev_wait(struct vnic_dev *vdev,
1113 	int (*start)(struct vnic_dev *, int),
1114 	int (*finished)(struct vnic_dev *, int *),
1115 	int arg)
1116 {
1117 	int done;
1118 	int err;
1119 	int i;
1120 
1121 	err = start(vdev, arg);
1122 	if (err)
1123 		return err;
1124 
1125 	/* Wait for func to complete...2 seconds max */
1126 	for (i = 0; i < 2000; i++) {
1127 		err = finished(vdev, &done);
1128 		if (err)
1129 			return err;
1130 		if (done)
1131 			return 0;
1132 		usleep(1000);
1133 	}
1134 	return -ETIMEDOUT;
1135 }
1136 
1137 static int enic_dev_open(struct enic *enic)
1138 {
1139 	int err;
1140 	int flags = CMD_OPENF_IG_DESCCACHE;
1141 
1142 	err = enic_dev_wait(enic->vdev, vnic_dev_open,
1143 		vnic_dev_open_done, flags);
1144 	if (err)
1145 		dev_err(enic_get_dev(enic),
1146 			"vNIC device open failed, err %d\n", err);
1147 
1148 	return err;
1149 }
1150 
1151 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1152 {
1153 	dma_addr_t rss_key_buf_pa;
1154 	union vnic_rss_key *rss_key_buf_va = NULL;
1155 	int err, i;
1156 	uint8_t name[RTE_MEMZONE_NAMESIZE];
1157 
1158 	RTE_ASSERT(user_key != NULL);
1159 	snprintf((char *)name, sizeof(name), "rss_key-%s", enic->bdf_name);
1160 	rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1161 		&rss_key_buf_pa, name);
1162 	if (!rss_key_buf_va)
1163 		return -ENOMEM;
1164 
1165 	for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1166 		rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1167 
1168 	err = enic_set_rss_key(enic,
1169 		rss_key_buf_pa,
1170 		sizeof(union vnic_rss_key));
1171 
1172 	/* Save for later queries */
1173 	if (!err) {
1174 		rte_memcpy(&enic->rss_key, rss_key_buf_va,
1175 			   sizeof(union vnic_rss_key));
1176 	}
1177 	enic_free_consistent(enic, sizeof(union vnic_rss_key),
1178 		rss_key_buf_va, rss_key_buf_pa);
1179 
1180 	return err;
1181 }
1182 
1183 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1184 {
1185 	dma_addr_t rss_cpu_buf_pa;
1186 	union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1187 	int err;
1188 	uint8_t name[RTE_MEMZONE_NAMESIZE];
1189 
1190 	snprintf((char *)name, sizeof(name), "rss_cpu-%s", enic->bdf_name);
1191 	rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1192 		&rss_cpu_buf_pa, name);
1193 	if (!rss_cpu_buf_va)
1194 		return -ENOMEM;
1195 
1196 	rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1197 
1198 	err = enic_set_rss_cpu(enic,
1199 		rss_cpu_buf_pa,
1200 		sizeof(union vnic_rss_cpu));
1201 
1202 	enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1203 		rss_cpu_buf_va, rss_cpu_buf_pa);
1204 
1205 	/* Save for later queries */
1206 	if (!err)
1207 		rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1208 	return err;
1209 }
1210 
1211 static int enic_set_niccfg(struct enic *enic, uint8_t rss_default_cpu,
1212 	uint8_t rss_hash_type, uint8_t rss_hash_bits, uint8_t rss_base_cpu,
1213 	uint8_t rss_enable)
1214 {
1215 	const uint8_t tso_ipid_split_en = 0;
1216 	int err;
1217 
1218 	err = enic_set_nic_cfg(enic,
1219 		rss_default_cpu, rss_hash_type,
1220 		rss_hash_bits, rss_base_cpu,
1221 		rss_enable, tso_ipid_split_en,
1222 		enic->ig_vlan_strip_en);
1223 
1224 	return err;
1225 }
1226 
1227 /* Initialize RSS with defaults, called from dev_configure */
1228 int enic_init_rss_nic_cfg(struct enic *enic)
1229 {
1230 	static uint8_t default_rss_key[] = {
1231 		85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1232 		80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1233 		76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1234 		69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1235 	};
1236 	struct rte_eth_rss_conf rss_conf;
1237 	union vnic_rss_cpu rss_cpu;
1238 	int ret, i;
1239 
1240 	rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1241 	/*
1242 	 * If setting key for the first time, and the user gives us none, then
1243 	 * push the default key to NIC.
1244 	 */
1245 	if (rss_conf.rss_key == NULL) {
1246 		rss_conf.rss_key = default_rss_key;
1247 		rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1248 	}
1249 	ret = enic_set_rss_conf(enic, &rss_conf);
1250 	if (ret) {
1251 		dev_err(enic, "Failed to configure RSS\n");
1252 		return ret;
1253 	}
1254 	if (enic->rss_enable) {
1255 		/* If enabling RSS, use the default reta */
1256 		for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1257 			rss_cpu.cpu[i / 4].b[i % 4] =
1258 				enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1259 		}
1260 		ret = enic_set_rss_reta(enic, &rss_cpu);
1261 		if (ret)
1262 			dev_err(enic, "Failed to set RSS indirection table\n");
1263 	}
1264 	return ret;
1265 }
1266 
1267 int enic_setup_finish(struct enic *enic)
1268 {
1269 	enic_init_soft_stats(enic);
1270 
1271 	/* Default conf */
1272 	vnic_dev_packet_filter(enic->vdev,
1273 		1 /* directed  */,
1274 		1 /* multicast */,
1275 		1 /* broadcast */,
1276 		0 /* promisc   */,
1277 		1 /* allmulti  */);
1278 
1279 	enic->promisc = 0;
1280 	enic->allmulti = 1;
1281 
1282 	return 0;
1283 }
1284 
1285 static int enic_rss_conf_valid(struct enic *enic,
1286 			       struct rte_eth_rss_conf *rss_conf)
1287 {
1288 	/* RSS is disabled per VIC settings. Ignore rss_conf. */
1289 	if (enic->flow_type_rss_offloads == 0)
1290 		return 0;
1291 	if (rss_conf->rss_key != NULL &&
1292 	    rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1293 		dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1294 			rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1295 		return -EINVAL;
1296 	}
1297 	if (rss_conf->rss_hf != 0 &&
1298 	    (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1299 		dev_err(enic, "Given rss_hf contains none of the supported"
1300 			" types\n");
1301 		return -EINVAL;
1302 	}
1303 	return 0;
1304 }
1305 
1306 /* Set hash type and key according to rss_conf */
1307 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1308 {
1309 	struct rte_eth_dev *eth_dev;
1310 	uint64_t rss_hf;
1311 	uint8_t rss_hash_type;
1312 	uint8_t rss_enable;
1313 	int ret;
1314 
1315 	RTE_ASSERT(rss_conf != NULL);
1316 	ret = enic_rss_conf_valid(enic, rss_conf);
1317 	if (ret) {
1318 		dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1319 		return ret;
1320 	}
1321 
1322 	eth_dev = enic->rte_dev;
1323 	rss_hash_type = 0;
1324 	rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1325 	if (enic->rq_count > 1 &&
1326 	    (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1327 	    rss_hf != 0) {
1328 		rss_enable = 1;
1329 		if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1330 			      ETH_RSS_NONFRAG_IPV4_OTHER))
1331 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1332 		if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1333 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1334 		if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1335 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1336 			if (enic->udp_rss_weak) {
1337 				/*
1338 				 * 'TCP' is not a typo. The "weak" version of
1339 				 * UDP RSS requires both the TCP and UDP bits
1340 				 * be set. It does enable TCP RSS as well.
1341 				 */
1342 				rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1343 			}
1344 		}
1345 		if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1346 			      ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1347 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1348 		if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1349 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1350 		if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1351 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1352 			if (enic->udp_rss_weak)
1353 				rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1354 		}
1355 	} else {
1356 		rss_enable = 0;
1357 		rss_hf = 0;
1358 	}
1359 
1360 	/* Set the hash key if provided */
1361 	if (rss_enable && rss_conf->rss_key) {
1362 		ret = enic_set_rsskey(enic, rss_conf->rss_key);
1363 		if (ret) {
1364 			dev_err(enic, "Failed to set RSS key\n");
1365 			return ret;
1366 		}
1367 	}
1368 
1369 	ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1370 			      ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1371 			      rss_enable);
1372 	if (!ret) {
1373 		enic->rss_hf = rss_hf;
1374 		enic->rss_hash_type = rss_hash_type;
1375 		enic->rss_enable = rss_enable;
1376 	} else {
1377 		dev_err(enic, "Failed to update RSS configurations."
1378 			" hash=0x%x\n", rss_hash_type);
1379 	}
1380 	return ret;
1381 }
1382 
1383 int enic_set_vlan_strip(struct enic *enic)
1384 {
1385 	/*
1386 	 * Unfortunately, VLAN strip on/off and RSS on/off are configured
1387 	 * together. So, re-do niccfg, preserving the current RSS settings.
1388 	 */
1389 	return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1390 			       ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1391 			       enic->rss_enable);
1392 }
1393 
1394 int enic_add_packet_filter(struct enic *enic)
1395 {
1396 	/* Args -> directed, multicast, broadcast, promisc, allmulti */
1397 	return vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1398 		enic->promisc, enic->allmulti);
1399 }
1400 
1401 int enic_get_link_status(struct enic *enic)
1402 {
1403 	return vnic_dev_link_status(enic->vdev);
1404 }
1405 
1406 static void enic_dev_deinit(struct enic *enic)
1407 {
1408 	/* stop link status checking */
1409 	vnic_dev_notify_unset(enic->vdev);
1410 
1411 	/* mac_addrs is freed by rte_eth_dev_release_port() */
1412 	rte_free(enic->cq);
1413 	rte_free(enic->intr);
1414 	rte_free(enic->rq);
1415 	rte_free(enic->wq);
1416 }
1417 
1418 
1419 int enic_set_vnic_res(struct enic *enic)
1420 {
1421 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1422 	int rc = 0;
1423 	unsigned int required_rq, required_wq, required_cq, required_intr;
1424 
1425 	/* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1426 	required_rq = eth_dev->data->nb_rx_queues * 2;
1427 	required_wq = eth_dev->data->nb_tx_queues;
1428 	required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1429 	required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1430 	if (eth_dev->data->dev_conf.intr_conf.rxq) {
1431 		required_intr += eth_dev->data->nb_rx_queues;
1432 	}
1433 
1434 	if (enic->conf_rq_count < required_rq) {
1435 		dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1436 			eth_dev->data->nb_rx_queues,
1437 			required_rq, enic->conf_rq_count);
1438 		rc = -EINVAL;
1439 	}
1440 	if (enic->conf_wq_count < required_wq) {
1441 		dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1442 			eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1443 		rc = -EINVAL;
1444 	}
1445 
1446 	if (enic->conf_cq_count < required_cq) {
1447 		dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1448 			required_cq, enic->conf_cq_count);
1449 		rc = -EINVAL;
1450 	}
1451 	if (enic->conf_intr_count < required_intr) {
1452 		dev_err(dev, "Not enough Interrupts to support Rx queue"
1453 			" interrupts. Required:%u, Configured:%u\n",
1454 			required_intr, enic->conf_intr_count);
1455 		rc = -EINVAL;
1456 	}
1457 
1458 	if (rc == 0) {
1459 		enic->rq_count = eth_dev->data->nb_rx_queues;
1460 		enic->wq_count = eth_dev->data->nb_tx_queues;
1461 		enic->cq_count = enic->rq_count + enic->wq_count;
1462 		enic->intr_count = required_intr;
1463 	}
1464 
1465 	return rc;
1466 }
1467 
1468 /* Initialize the completion queue for an RQ */
1469 static int
1470 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1471 {
1472 	struct vnic_rq *sop_rq, *data_rq;
1473 	unsigned int cq_idx;
1474 	int rc = 0;
1475 
1476 	sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1477 	data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx, enic)];
1478 	cq_idx = rq_idx;
1479 
1480 	vnic_cq_clean(&enic->cq[cq_idx]);
1481 	vnic_cq_init(&enic->cq[cq_idx],
1482 		     0 /* flow_control_enable */,
1483 		     1 /* color_enable */,
1484 		     0 /* cq_head */,
1485 		     0 /* cq_tail */,
1486 		     1 /* cq_tail_color */,
1487 		     0 /* interrupt_enable */,
1488 		     1 /* cq_entry_enable */,
1489 		     0 /* cq_message_enable */,
1490 		     0 /* interrupt offset */,
1491 		     0 /* cq_message_addr */);
1492 
1493 
1494 	vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1495 			   enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1496 			   sop_rq->ring.desc_count - 1, 1, 0);
1497 	if (data_rq->in_use) {
1498 		vnic_rq_init_start(data_rq,
1499 				   enic_cq_rq(enic,
1500 				   enic_rte_rq_idx_to_data_idx(rq_idx, enic)),
1501 				   0, data_rq->ring.desc_count - 1, 1, 0);
1502 	}
1503 
1504 	rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1505 	if (rc)
1506 		return rc;
1507 
1508 	if (data_rq->in_use) {
1509 		rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1510 		if (rc) {
1511 			enic_rxmbuf_queue_release(enic, sop_rq);
1512 			return rc;
1513 		}
1514 	}
1515 
1516 	return 0;
1517 }
1518 
1519 /* The Cisco NIC can send and receive packets up to a max packet size
1520  * determined by the NIC type and firmware. There is also an MTU
1521  * configured into the NIC via the CIMC/UCSM management interface
1522  * which can be overridden by this function (up to the max packet size).
1523  * Depending on the network setup, doing so may cause packet drops
1524  * and unexpected behavior.
1525  */
1526 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1527 {
1528 	unsigned int rq_idx;
1529 	struct vnic_rq *rq;
1530 	int rc = 0;
1531 	uint16_t old_mtu;	/* previous setting */
1532 	uint16_t config_mtu;	/* Value configured into NIC via CIMC/UCSM */
1533 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1534 
1535 	old_mtu = eth_dev->data->mtu;
1536 	config_mtu = enic->config.mtu;
1537 
1538 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1539 		return -E_RTE_SECONDARY;
1540 
1541 	if (new_mtu > enic->max_mtu) {
1542 		dev_err(enic,
1543 			"MTU not updated: requested (%u) greater than max (%u)\n",
1544 			new_mtu, enic->max_mtu);
1545 		return -EINVAL;
1546 	}
1547 	if (new_mtu < ENIC_MIN_MTU) {
1548 		dev_info(enic,
1549 			"MTU not updated: requested (%u) less than min (%u)\n",
1550 			new_mtu, ENIC_MIN_MTU);
1551 		return -EINVAL;
1552 	}
1553 	if (new_mtu > config_mtu)
1554 		dev_warning(enic,
1555 			"MTU (%u) is greater than value configured in NIC (%u)\n",
1556 			new_mtu, config_mtu);
1557 
1558 	/* Update the MTU and maximum packet length */
1559 	eth_dev->data->mtu = new_mtu;
1560 	eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1561 		enic_mtu_to_max_rx_pktlen(new_mtu);
1562 
1563 	/*
1564 	 * If the device has not started (enic_enable), nothing to do.
1565 	 * Later, enic_enable() will set up RQs reflecting the new maximum
1566 	 * packet length.
1567 	 */
1568 	if (!eth_dev->data->dev_started)
1569 		goto set_mtu_done;
1570 
1571 	/*
1572 	 * The device has started, re-do RQs on the fly. In the process, we
1573 	 * pick up the new maximum packet length.
1574 	 *
1575 	 * Some applications rely on the ability to change MTU without stopping
1576 	 * the device. So keep this behavior for now.
1577 	 */
1578 	rte_spinlock_lock(&enic->mtu_lock);
1579 
1580 	/* Stop traffic on all RQs */
1581 	for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1582 		rq = &enic->rq[rq_idx];
1583 		if (rq->is_sop && rq->in_use) {
1584 			rc = enic_stop_rq(enic,
1585 					  enic_sop_rq_idx_to_rte_idx(rq_idx));
1586 			if (rc) {
1587 				dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1588 				goto set_mtu_done;
1589 			}
1590 		}
1591 	}
1592 
1593 	/* replace Rx function with a no-op to avoid getting stale pkts */
1594 	eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1595 	rte_mb();
1596 
1597 	/* Allow time for threads to exit the real Rx function. */
1598 	usleep(100000);
1599 
1600 	/* now it is safe to reconfigure the RQs */
1601 
1602 
1603 	/* free and reallocate RQs with the new MTU */
1604 	for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1605 		rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1606 		if (!rq->in_use)
1607 			continue;
1608 
1609 		enic_free_rq(rq);
1610 		rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1611 				   rq->tot_nb_desc, rq->rx_free_thresh);
1612 		if (rc) {
1613 			dev_err(enic,
1614 				"Fatal MTU alloc error- No traffic will pass\n");
1615 			goto set_mtu_done;
1616 		}
1617 
1618 		rc = enic_reinit_rq(enic, rq_idx);
1619 		if (rc) {
1620 			dev_err(enic,
1621 				"Fatal MTU RQ reinit- No traffic will pass\n");
1622 			goto set_mtu_done;
1623 		}
1624 	}
1625 
1626 	/* put back the real receive function */
1627 	rte_mb();
1628 	enic_pick_rx_handler(eth_dev);
1629 	rte_mb();
1630 
1631 	/* restart Rx traffic */
1632 	for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1633 		rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1634 		if (rq->is_sop && rq->in_use)
1635 			enic_start_rq(enic, rq_idx);
1636 	}
1637 
1638 set_mtu_done:
1639 	dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1640 	rte_spinlock_unlock(&enic->mtu_lock);
1641 	return rc;
1642 }
1643 
1644 static int enic_dev_init(struct enic *enic)
1645 {
1646 	int err;
1647 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1648 
1649 	vnic_dev_intr_coal_timer_info_default(enic->vdev);
1650 
1651 	/* Get vNIC configuration
1652 	*/
1653 	err = enic_get_vnic_config(enic);
1654 	if (err) {
1655 		dev_err(dev, "Get vNIC configuration failed, aborting\n");
1656 		return err;
1657 	}
1658 
1659 	/* Get available resource counts */
1660 	enic_get_res_counts(enic);
1661 	if (enic->conf_rq_count == 1) {
1662 		dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1663 		dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1664 		dev_err(enic, "See the ENIC PMD guide for more information.\n");
1665 		return -EINVAL;
1666 	}
1667 	/* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1668 	enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1669 			       enic->conf_cq_count, 8);
1670 	enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1671 				 enic->conf_intr_count, 8);
1672 	enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1673 			       enic->conf_rq_count, 8);
1674 	enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1675 			       enic->conf_wq_count, 8);
1676 	if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1677 		dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1678 		return -1;
1679 	}
1680 	if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1681 		dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1682 		return -1;
1683 	}
1684 	if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1685 		dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1686 		return -1;
1687 	}
1688 	if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1689 		dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1690 		return -1;
1691 	}
1692 
1693 	/* Get the supported filters */
1694 	enic_fdir_info(enic);
1695 
1696 	eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr",
1697 					sizeof(struct rte_ether_addr) *
1698 					ENIC_UNICAST_PERFECT_FILTERS, 0);
1699 	if (!eth_dev->data->mac_addrs) {
1700 		dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1701 		return -1;
1702 	}
1703 	rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr,
1704 			eth_dev->data->mac_addrs);
1705 
1706 	vnic_dev_set_reset_flag(enic->vdev, 0);
1707 
1708 	LIST_INIT(&enic->flows);
1709 
1710 	/* set up link status checking */
1711 	vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1712 
1713 	/*
1714 	 * When Geneve with options offload is available, always disable it
1715 	 * first as it can interfere with user flow rules.
1716 	 */
1717 	if (enic->geneve_opt_avail) {
1718 		/*
1719 		 * Disabling fails if the feature is provisioned but
1720 		 * not enabled. So ignore result and do not log error.
1721 		 */
1722 		vnic_dev_overlay_offload_ctrl(enic->vdev,
1723 			OVERLAY_FEATURE_GENEVE,
1724 			OVERLAY_OFFLOAD_DISABLE);
1725 	}
1726 	enic->overlay_offload = false;
1727 	if (enic->disable_overlay && enic->vxlan) {
1728 		/*
1729 		 * Explicitly disable overlay offload as the setting is
1730 		 * sticky, and resetting vNIC does not disable it.
1731 		 */
1732 		if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1733 						  OVERLAY_FEATURE_VXLAN,
1734 						  OVERLAY_OFFLOAD_DISABLE)) {
1735 			dev_err(enic, "failed to disable overlay offload\n");
1736 		} else {
1737 			dev_info(enic, "Overlay offload is disabled\n");
1738 		}
1739 	}
1740 	if (!enic->disable_overlay && enic->vxlan &&
1741 	    /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1742 	    vnic_dev_overlay_offload_ctrl(enic->vdev,
1743 					  OVERLAY_FEATURE_VXLAN,
1744 					  OVERLAY_OFFLOAD_ENABLE) == 0) {
1745 		enic->tx_offload_capa |=
1746 			DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1747 			DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1748 			DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1749 		enic->tx_offload_mask |=
1750 			PKT_TX_OUTER_IPV6 |
1751 			PKT_TX_OUTER_IPV4 |
1752 			PKT_TX_OUTER_IP_CKSUM |
1753 			PKT_TX_TUNNEL_MASK;
1754 		enic->overlay_offload = true;
1755 		dev_info(enic, "Overlay offload is enabled\n");
1756 	}
1757 	/* Geneve with options offload requires overlay offload */
1758 	if (enic->overlay_offload && enic->geneve_opt_avail &&
1759 	    enic->geneve_opt_request) {
1760 		if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1761 				OVERLAY_FEATURE_GENEVE,
1762 				OVERLAY_OFFLOAD_ENABLE)) {
1763 			dev_err(enic, "failed to enable geneve+option\n");
1764 		} else {
1765 			enic->geneve_opt_enabled = 1;
1766 			dev_info(enic, "Geneve with options is enabled\n");
1767 		}
1768 	}
1769 	/*
1770 	 * Reset the vxlan port if HW vxlan parsing is available. It
1771 	 * is always enabled regardless of overlay offload
1772 	 * enable/disable.
1773 	 */
1774 	if (enic->vxlan) {
1775 		enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT;
1776 		/*
1777 		 * Reset the vxlan port to the default, as the NIC firmware
1778 		 * does not reset it automatically and keeps the old setting.
1779 		 */
1780 		if (vnic_dev_overlay_offload_cfg(enic->vdev,
1781 						 OVERLAY_CFG_VXLAN_PORT_UPDATE,
1782 						 RTE_VXLAN_DEFAULT_PORT)) {
1783 			dev_err(enic, "failed to update vxlan port\n");
1784 			return -EINVAL;
1785 		}
1786 	}
1787 
1788 	return 0;
1789 
1790 }
1791 
1792 int enic_probe(struct enic *enic)
1793 {
1794 	struct rte_pci_device *pdev = enic->pdev;
1795 	int err = -1;
1796 
1797 	dev_debug(enic, "Initializing ENIC PMD\n");
1798 
1799 	/* if this is a secondary process the hardware is already initialized */
1800 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1801 		return 0;
1802 
1803 	enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1804 	enic->bar0.len = pdev->mem_resource[0].len;
1805 
1806 	/* Register vNIC device */
1807 	enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1808 	if (!enic->vdev) {
1809 		dev_err(enic, "vNIC registration failed, aborting\n");
1810 		goto err_out;
1811 	}
1812 
1813 	LIST_INIT(&enic->memzone_list);
1814 	rte_spinlock_init(&enic->memzone_list_lock);
1815 
1816 	vnic_register_cbacks(enic->vdev,
1817 		enic_alloc_consistent,
1818 		enic_free_consistent);
1819 
1820 	/*
1821 	 * Allocate the consistent memory for stats upfront so both primary and
1822 	 * secondary processes can dump stats.
1823 	 */
1824 	err = vnic_dev_alloc_stats_mem(enic->vdev);
1825 	if (err) {
1826 		dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1827 		goto err_out_unregister;
1828 	}
1829 	/* Issue device open to get device in known state */
1830 	err = enic_dev_open(enic);
1831 	if (err) {
1832 		dev_err(enic, "vNIC dev open failed, aborting\n");
1833 		goto err_out_unregister;
1834 	}
1835 
1836 	/* Set ingress vlan rewrite mode before vnic initialization */
1837 	dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1838 		  enic->ig_vlan_rewrite_mode);
1839 	err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1840 		enic->ig_vlan_rewrite_mode);
1841 	if (err) {
1842 		dev_err(enic,
1843 			"Failed to set ingress vlan rewrite mode, aborting.\n");
1844 		goto err_out_dev_close;
1845 	}
1846 
1847 	/* Issue device init to initialize the vnic-to-switch link.
1848 	 * We'll start with carrier off and wait for link UP
1849 	 * notification later to turn on carrier.  We don't need
1850 	 * to wait here for the vnic-to-switch link initialization
1851 	 * to complete; link UP notification is the indication that
1852 	 * the process is complete.
1853 	 */
1854 
1855 	err = vnic_dev_init(enic->vdev, 0);
1856 	if (err) {
1857 		dev_err(enic, "vNIC dev init failed, aborting\n");
1858 		goto err_out_dev_close;
1859 	}
1860 
1861 	err = enic_dev_init(enic);
1862 	if (err) {
1863 		dev_err(enic, "Device initialization failed, aborting\n");
1864 		goto err_out_dev_close;
1865 	}
1866 
1867 	return 0;
1868 
1869 err_out_dev_close:
1870 	vnic_dev_close(enic->vdev);
1871 err_out_unregister:
1872 	vnic_dev_unregister(enic->vdev);
1873 err_out:
1874 	return err;
1875 }
1876 
1877 void enic_remove(struct enic *enic)
1878 {
1879 	enic_dev_deinit(enic);
1880 	vnic_dev_close(enic->vdev);
1881 	vnic_dev_unregister(enic->vdev);
1882 }
1883