xref: /dpdk/drivers/net/enic/enic_main.c (revision 1cde1b9a9b4dbf31cb5e5ccdfc5da3cb079f43a2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5 
6 #include <stdio.h>
7 
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 
12 #include <rte_pci.h>
13 #include <rte_bus_pci.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_string_fns.h>
18 #include <rte_ethdev_driver.h>
19 
20 #include "enic_compat.h"
21 #include "enic.h"
22 #include "wq_enet_desc.h"
23 #include "rq_enet_desc.h"
24 #include "cq_enet_desc.h"
25 #include "vnic_enet.h"
26 #include "vnic_dev.h"
27 #include "vnic_wq.h"
28 #include "vnic_rq.h"
29 #include "vnic_cq.h"
30 #include "vnic_intr.h"
31 #include "vnic_nic.h"
32 
33 static inline int enic_is_sriov_vf(struct enic *enic)
34 {
35 	return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
36 }
37 
38 static int is_zero_addr(uint8_t *addr)
39 {
40 	return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
41 }
42 
43 static int is_mcast_addr(uint8_t *addr)
44 {
45 	return addr[0] & 1;
46 }
47 
48 static int is_eth_addr_valid(uint8_t *addr)
49 {
50 	return !is_mcast_addr(addr) && !is_zero_addr(addr);
51 }
52 
53 static void
54 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
55 {
56 	uint16_t i;
57 
58 	if (!rq || !rq->mbuf_ring) {
59 		dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
60 		return;
61 	}
62 
63 	for (i = 0; i < rq->ring.desc_count; i++) {
64 		if (rq->mbuf_ring[i]) {
65 			rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
66 			rq->mbuf_ring[i] = NULL;
67 		}
68 	}
69 }
70 
71 static void enic_free_wq_buf(struct rte_mbuf **buf)
72 {
73 	struct rte_mbuf *mbuf = *buf;
74 
75 	rte_pktmbuf_free_seg(mbuf);
76 	*buf = NULL;
77 }
78 
79 static void enic_log_q_error(struct enic *enic)
80 {
81 	unsigned int i;
82 	u32 error_status;
83 
84 	for (i = 0; i < enic->wq_count; i++) {
85 		error_status = vnic_wq_error_status(&enic->wq[i]);
86 		if (error_status)
87 			dev_err(enic, "WQ[%d] error_status %d\n", i,
88 				error_status);
89 	}
90 
91 	for (i = 0; i < enic_vnic_rq_count(enic); i++) {
92 		if (!enic->rq[i].in_use)
93 			continue;
94 		error_status = vnic_rq_error_status(&enic->rq[i]);
95 		if (error_status)
96 			dev_err(enic, "RQ[%d] error_status %d\n", i,
97 				error_status);
98 	}
99 }
100 
101 static void enic_clear_soft_stats(struct enic *enic)
102 {
103 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
104 	rte_atomic64_clear(&soft_stats->rx_nombuf);
105 	rte_atomic64_clear(&soft_stats->rx_packet_errors);
106 	rte_atomic64_clear(&soft_stats->tx_oversized);
107 }
108 
109 static void enic_init_soft_stats(struct enic *enic)
110 {
111 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
112 	rte_atomic64_init(&soft_stats->rx_nombuf);
113 	rte_atomic64_init(&soft_stats->rx_packet_errors);
114 	rte_atomic64_init(&soft_stats->tx_oversized);
115 	enic_clear_soft_stats(enic);
116 }
117 
118 int enic_dev_stats_clear(struct enic *enic)
119 {
120 	int ret;
121 
122 	ret = vnic_dev_stats_clear(enic->vdev);
123 	if (ret != 0) {
124 		dev_err(enic, "Error in clearing stats\n");
125 		return ret;
126 	}
127 	enic_clear_soft_stats(enic);
128 
129 	return 0;
130 }
131 
132 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
133 {
134 	struct vnic_stats *stats;
135 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
136 	int64_t rx_truncated;
137 	uint64_t rx_packet_errors;
138 	int ret = vnic_dev_stats_dump(enic->vdev, &stats);
139 
140 	if (ret) {
141 		dev_err(enic, "Error in getting stats\n");
142 		return ret;
143 	}
144 
145 	/* The number of truncated packets can only be calculated by
146 	 * subtracting a hardware counter from error packets received by
147 	 * the driver. Note: this causes transient inaccuracies in the
148 	 * ipackets count. Also, the length of truncated packets are
149 	 * counted in ibytes even though truncated packets are dropped
150 	 * which can make ibytes be slightly higher than it should be.
151 	 */
152 	rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
153 	rx_truncated = rx_packet_errors - stats->rx.rx_errors;
154 
155 	r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
156 	r_stats->opackets = stats->tx.tx_frames_ok;
157 
158 	r_stats->ibytes = stats->rx.rx_bytes_ok;
159 	r_stats->obytes = stats->tx.tx_bytes_ok;
160 
161 	r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
162 	r_stats->oerrors = stats->tx.tx_errors
163 			   + rte_atomic64_read(&soft_stats->tx_oversized);
164 
165 	r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
166 
167 	r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
168 	return 0;
169 }
170 
171 int enic_del_mac_address(struct enic *enic, int mac_index)
172 {
173 	struct rte_eth_dev *eth_dev = enic->rte_dev;
174 	uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
175 
176 	return vnic_dev_del_addr(enic->vdev, mac_addr);
177 }
178 
179 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
180 {
181 	int err;
182 
183 	if (!is_eth_addr_valid(mac_addr)) {
184 		dev_err(enic, "invalid mac address\n");
185 		return -EINVAL;
186 	}
187 
188 	err = vnic_dev_add_addr(enic->vdev, mac_addr);
189 	if (err)
190 		dev_err(enic, "add mac addr failed\n");
191 	return err;
192 }
193 
194 static void
195 enic_free_rq_buf(struct rte_mbuf **mbuf)
196 {
197 	if (*mbuf == NULL)
198 		return;
199 
200 	rte_pktmbuf_free(*mbuf);
201 	*mbuf = NULL;
202 }
203 
204 void enic_init_vnic_resources(struct enic *enic)
205 {
206 	unsigned int error_interrupt_enable = 1;
207 	unsigned int error_interrupt_offset = 0;
208 	unsigned int rxq_interrupt_enable = 0;
209 	unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
210 	unsigned int index = 0;
211 	unsigned int cq_idx;
212 	struct vnic_rq *data_rq;
213 
214 	if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
215 		rxq_interrupt_enable = 1;
216 
217 	for (index = 0; index < enic->rq_count; index++) {
218 		cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
219 
220 		vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
221 			cq_idx,
222 			error_interrupt_enable,
223 			error_interrupt_offset);
224 
225 		data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index)];
226 		if (data_rq->in_use)
227 			vnic_rq_init(data_rq,
228 				     cq_idx,
229 				     error_interrupt_enable,
230 				     error_interrupt_offset);
231 
232 		vnic_cq_init(&enic->cq[cq_idx],
233 			0 /* flow_control_enable */,
234 			1 /* color_enable */,
235 			0 /* cq_head */,
236 			0 /* cq_tail */,
237 			1 /* cq_tail_color */,
238 			rxq_interrupt_enable,
239 			1 /* cq_entry_enable */,
240 			0 /* cq_message_enable */,
241 			rxq_interrupt_offset,
242 			0 /* cq_message_addr */);
243 		if (rxq_interrupt_enable)
244 			rxq_interrupt_offset++;
245 	}
246 
247 	for (index = 0; index < enic->wq_count; index++) {
248 		vnic_wq_init(&enic->wq[index],
249 			enic_cq_wq(enic, index),
250 			error_interrupt_enable,
251 			error_interrupt_offset);
252 		/* Compute unsupported ol flags for enic_prep_pkts() */
253 		enic->wq[index].tx_offload_notsup_mask =
254 			PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
255 
256 		cq_idx = enic_cq_wq(enic, index);
257 		vnic_cq_init(&enic->cq[cq_idx],
258 			0 /* flow_control_enable */,
259 			1 /* color_enable */,
260 			0 /* cq_head */,
261 			0 /* cq_tail */,
262 			1 /* cq_tail_color */,
263 			0 /* interrupt_enable */,
264 			0 /* cq_entry_enable */,
265 			1 /* cq_message_enable */,
266 			0 /* interrupt offset */,
267 			(u64)enic->wq[index].cqmsg_rz->iova);
268 	}
269 
270 	for (index = 0; index < enic->intr_count; index++) {
271 		vnic_intr_init(&enic->intr[index],
272 			       enic->config.intr_timer_usec,
273 			       enic->config.intr_timer_type,
274 			       /*mask_on_assertion*/1);
275 	}
276 }
277 
278 
279 static int
280 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
281 {
282 	struct rte_mbuf *mb;
283 	struct rq_enet_desc *rqd = rq->ring.descs;
284 	unsigned i;
285 	dma_addr_t dma_addr;
286 	uint32_t max_rx_pkt_len;
287 	uint16_t rq_buf_len;
288 
289 	if (!rq->in_use)
290 		return 0;
291 
292 	dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
293 		  rq->ring.desc_count);
294 
295 	/*
296 	 * If *not* using scatter and the mbuf size is greater than the
297 	 * requested max packet size (max_rx_pkt_len), then reduce the
298 	 * posted buffer size to max_rx_pkt_len. HW still receives packets
299 	 * larger than max_rx_pkt_len, but they will be truncated, which we
300 	 * drop in the rx handler. Not ideal, but better than returning
301 	 * large packets when the user is not expecting them.
302 	 */
303 	max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
304 	rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
305 	if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
306 		rq_buf_len = max_rx_pkt_len;
307 	for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
308 		mb = rte_mbuf_raw_alloc(rq->mp);
309 		if (mb == NULL) {
310 			dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
311 			(unsigned)rq->index);
312 			return -ENOMEM;
313 		}
314 
315 		mb->data_off = RTE_PKTMBUF_HEADROOM;
316 		dma_addr = (dma_addr_t)(mb->buf_iova
317 			   + RTE_PKTMBUF_HEADROOM);
318 		rq_enet_desc_enc(rqd, dma_addr,
319 				(rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
320 				: RQ_ENET_TYPE_NOT_SOP),
321 				rq_buf_len);
322 		rq->mbuf_ring[i] = mb;
323 	}
324 	/*
325 	 * Do not post the buffers to the NIC until we enable the RQ via
326 	 * enic_start_rq().
327 	 */
328 	rq->need_initial_post = true;
329 	/* Initialize fetch index while RQ is disabled */
330 	iowrite32(0, &rq->ctrl->fetch_index);
331 	return 0;
332 }
333 
334 /*
335  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
336  * allocated the buffers and filled the RQ descriptor ring. Just need to push
337  * the post index to the NIC.
338  */
339 static void
340 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
341 {
342 	if (!rq->in_use || !rq->need_initial_post)
343 		return;
344 
345 	/* make sure all prior writes are complete before doing the PIO write */
346 	rte_rmb();
347 
348 	/* Post all but the last buffer to VIC. */
349 	rq->posted_index = rq->ring.desc_count - 1;
350 
351 	rq->rx_nb_hold = 0;
352 
353 	dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
354 		enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
355 	iowrite32(rq->posted_index, &rq->ctrl->posted_index);
356 	rte_rmb();
357 	rq->need_initial_post = false;
358 }
359 
360 static void *
361 enic_alloc_consistent(void *priv, size_t size,
362 	dma_addr_t *dma_handle, u8 *name)
363 {
364 	void *vaddr;
365 	const struct rte_memzone *rz;
366 	*dma_handle = 0;
367 	struct enic *enic = (struct enic *)priv;
368 	struct enic_memzone_entry *mze;
369 
370 	rz = rte_memzone_reserve_aligned((const char *)name, size,
371 			SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
372 	if (!rz) {
373 		pr_err("%s : Failed to allocate memory requested for %s\n",
374 			__func__, name);
375 		return NULL;
376 	}
377 
378 	vaddr = rz->addr;
379 	*dma_handle = (dma_addr_t)rz->iova;
380 
381 	mze = rte_malloc("enic memzone entry",
382 			 sizeof(struct enic_memzone_entry), 0);
383 
384 	if (!mze) {
385 		pr_err("%s : Failed to allocate memory for memzone list\n",
386 		       __func__);
387 		rte_memzone_free(rz);
388 		return NULL;
389 	}
390 
391 	mze->rz = rz;
392 
393 	rte_spinlock_lock(&enic->memzone_list_lock);
394 	LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
395 	rte_spinlock_unlock(&enic->memzone_list_lock);
396 
397 	return vaddr;
398 }
399 
400 static void
401 enic_free_consistent(void *priv,
402 		     __rte_unused size_t size,
403 		     void *vaddr,
404 		     dma_addr_t dma_handle)
405 {
406 	struct enic_memzone_entry *mze;
407 	struct enic *enic = (struct enic *)priv;
408 
409 	rte_spinlock_lock(&enic->memzone_list_lock);
410 	LIST_FOREACH(mze, &enic->memzone_list, entries) {
411 		if (mze->rz->addr == vaddr &&
412 		    mze->rz->iova == dma_handle)
413 			break;
414 	}
415 	if (mze == NULL) {
416 		rte_spinlock_unlock(&enic->memzone_list_lock);
417 		dev_warning(enic,
418 			    "Tried to free memory, but couldn't find it in the memzone list\n");
419 		return;
420 	}
421 	LIST_REMOVE(mze, entries);
422 	rte_spinlock_unlock(&enic->memzone_list_lock);
423 	rte_memzone_free(mze->rz);
424 	rte_free(mze);
425 }
426 
427 int enic_link_update(struct enic *enic)
428 {
429 	struct rte_eth_dev *eth_dev = enic->rte_dev;
430 	struct rte_eth_link link;
431 
432 	memset(&link, 0, sizeof(link));
433 	link.link_status = enic_get_link_status(enic);
434 	link.link_duplex = ETH_LINK_FULL_DUPLEX;
435 	link.link_speed = vnic_dev_port_speed(enic->vdev);
436 
437 	return rte_eth_linkstatus_set(eth_dev, &link);
438 }
439 
440 static void
441 enic_intr_handler(void *arg)
442 {
443 	struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
444 	struct enic *enic = pmd_priv(dev);
445 
446 	vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
447 
448 	enic_link_update(enic);
449 	_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
450 	enic_log_q_error(enic);
451 }
452 
453 static int enic_rxq_intr_init(struct enic *enic)
454 {
455 	struct rte_intr_handle *intr_handle;
456 	uint32_t rxq_intr_count, i;
457 	int err;
458 
459 	intr_handle = enic->rte_dev->intr_handle;
460 	if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
461 		return 0;
462 	/*
463 	 * Rx queue interrupts only work when we have MSI-X interrupts,
464 	 * one per queue. Sharing one interrupt is technically
465 	 * possible with VIC, but it is not worth the complications it brings.
466 	 */
467 	if (!rte_intr_cap_multiple(intr_handle)) {
468 		dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
469 			" (vfio-pci driver)\n");
470 		return -ENOTSUP;
471 	}
472 	rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
473 	err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
474 	if (err) {
475 		dev_err(enic, "Failed to enable event fds for Rx queue"
476 			" interrupts\n");
477 		return err;
478 	}
479 	intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
480 					    rxq_intr_count * sizeof(int), 0);
481 	if (intr_handle->intr_vec == NULL) {
482 		dev_err(enic, "Failed to allocate intr_vec\n");
483 		return -ENOMEM;
484 	}
485 	for (i = 0; i < rxq_intr_count; i++)
486 		intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
487 	return 0;
488 }
489 
490 static void enic_rxq_intr_deinit(struct enic *enic)
491 {
492 	struct rte_intr_handle *intr_handle;
493 
494 	intr_handle = enic->rte_dev->intr_handle;
495 	rte_intr_efd_disable(intr_handle);
496 	if (intr_handle->intr_vec != NULL) {
497 		rte_free(intr_handle->intr_vec);
498 		intr_handle->intr_vec = NULL;
499 	}
500 }
501 
502 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
503 {
504 	struct wq_enet_desc *desc;
505 	struct vnic_wq *wq;
506 	unsigned int i;
507 
508 	/*
509 	 * Fill WQ descriptor fields that never change. Every descriptor is
510 	 * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
511 	 * descriptors (i.e. request one completion update every 32 packets).
512 	 */
513 	wq = &enic->wq[queue_idx];
514 	desc = (struct wq_enet_desc *)wq->ring.descs;
515 	for (i = 0; i < wq->ring.desc_count; i++, desc++) {
516 		desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
517 		if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
518 			desc->header_length_flags |=
519 				(1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
520 	}
521 }
522 
523 /*
524  * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
525  * used when that file is not compiled.
526  */
527 __rte_weak bool
528 enic_use_vector_rx_handler(__rte_unused struct enic *enic)
529 {
530 	return false;
531 }
532 
533 static void pick_rx_handler(struct enic *enic)
534 {
535 	struct rte_eth_dev *eth_dev;
536 
537 	/*
538 	 * Preference order:
539 	 * 1. The vectorized handler if possible and requested.
540 	 * 2. The non-scatter, simplified handler if scatter Rx is not used.
541 	 * 3. The default handler as a fallback.
542 	 */
543 	eth_dev = enic->rte_dev;
544 	if (enic_use_vector_rx_handler(enic))
545 		return;
546 	if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
547 		ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler");
548 		eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
549 	} else {
550 		ENICPMD_LOG(DEBUG, " use the normal Rx handler");
551 		eth_dev->rx_pkt_burst = &enic_recv_pkts;
552 	}
553 }
554 
555 int enic_enable(struct enic *enic)
556 {
557 	unsigned int index;
558 	int err;
559 	struct rte_eth_dev *eth_dev = enic->rte_dev;
560 	uint64_t simple_tx_offloads;
561 	uintptr_t p;
562 
563 	if (enic->enable_avx2_rx) {
564 		struct rte_mbuf mb_def = { .buf_addr = 0 };
565 
566 		/*
567 		 * mbuf_initializer contains const-after-init fields of
568 		 * receive mbufs (i.e. 64 bits of fields from rearm_data).
569 		 * It is currently used by the vectorized handler.
570 		 */
571 		mb_def.nb_segs = 1;
572 		mb_def.data_off = RTE_PKTMBUF_HEADROOM;
573 		mb_def.port = enic->port_id;
574 		rte_mbuf_refcnt_set(&mb_def, 1);
575 		rte_compiler_barrier();
576 		p = (uintptr_t)&mb_def.rearm_data;
577 		enic->mbuf_initializer = *(uint64_t *)p;
578 	}
579 
580 	eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
581 	eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
582 
583 	/* vnic notification of link status has already been turned on in
584 	 * enic_dev_init() which is called during probe time.  Here we are
585 	 * just turning on interrupt vector 0 if needed.
586 	 */
587 	if (eth_dev->data->dev_conf.intr_conf.lsc)
588 		vnic_dev_notify_set(enic->vdev, 0);
589 
590 	err = enic_rxq_intr_init(enic);
591 	if (err)
592 		return err;
593 	if (enic_clsf_init(enic))
594 		dev_warning(enic, "Init of hash table for clsf failed."\
595 			"Flow director feature will not work\n");
596 
597 	for (index = 0; index < enic->rq_count; index++) {
598 		err = enic_alloc_rx_queue_mbufs(enic,
599 			&enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
600 		if (err) {
601 			dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
602 			return err;
603 		}
604 		err = enic_alloc_rx_queue_mbufs(enic,
605 			&enic->rq[enic_rte_rq_idx_to_data_idx(index)]);
606 		if (err) {
607 			/* release the allocated mbufs for the sop rq*/
608 			enic_rxmbuf_queue_release(enic,
609 				&enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
610 
611 			dev_err(enic, "Failed to alloc data RX queue mbufs\n");
612 			return err;
613 		}
614 	}
615 
616 	/*
617 	 * Use the simple TX handler if possible. Only checksum offloads
618 	 * and vlan insertion are supported.
619 	 */
620 	simple_tx_offloads = enic->tx_offload_capa &
621 		(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
622 		 DEV_TX_OFFLOAD_VLAN_INSERT |
623 		 DEV_TX_OFFLOAD_IPV4_CKSUM |
624 		 DEV_TX_OFFLOAD_UDP_CKSUM |
625 		 DEV_TX_OFFLOAD_TCP_CKSUM);
626 	if ((eth_dev->data->dev_conf.txmode.offloads &
627 	     ~simple_tx_offloads) == 0) {
628 		ENICPMD_LOG(DEBUG, " use the simple tx handler");
629 		eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
630 		for (index = 0; index < enic->wq_count; index++)
631 			enic_prep_wq_for_simple_tx(enic, index);
632 	} else {
633 		ENICPMD_LOG(DEBUG, " use the default tx handler");
634 		eth_dev->tx_pkt_burst = &enic_xmit_pkts;
635 	}
636 
637 	pick_rx_handler(enic);
638 
639 	for (index = 0; index < enic->wq_count; index++)
640 		enic_start_wq(enic, index);
641 	for (index = 0; index < enic->rq_count; index++)
642 		enic_start_rq(enic, index);
643 
644 	vnic_dev_add_addr(enic->vdev, enic->mac_addr);
645 
646 	vnic_dev_enable_wait(enic->vdev);
647 
648 	/* Register and enable error interrupt */
649 	rte_intr_callback_register(&(enic->pdev->intr_handle),
650 		enic_intr_handler, (void *)enic->rte_dev);
651 
652 	rte_intr_enable(&(enic->pdev->intr_handle));
653 	/* Unmask LSC interrupt */
654 	vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
655 
656 	return 0;
657 }
658 
659 int enic_alloc_intr_resources(struct enic *enic)
660 {
661 	int err;
662 	unsigned int i;
663 
664 	dev_info(enic, "vNIC resources used:  "\
665 		"wq %d rq %d cq %d intr %d\n",
666 		enic->wq_count, enic_vnic_rq_count(enic),
667 		enic->cq_count, enic->intr_count);
668 
669 	for (i = 0; i < enic->intr_count; i++) {
670 		err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
671 		if (err) {
672 			enic_free_vnic_resources(enic);
673 			return err;
674 		}
675 	}
676 	return 0;
677 }
678 
679 void enic_free_rq(void *rxq)
680 {
681 	struct vnic_rq *rq_sop, *rq_data;
682 	struct enic *enic;
683 
684 	if (rxq == NULL)
685 		return;
686 
687 	rq_sop = (struct vnic_rq *)rxq;
688 	enic = vnic_dev_priv(rq_sop->vdev);
689 	rq_data = &enic->rq[rq_sop->data_queue_idx];
690 
691 	if (rq_sop->free_mbufs) {
692 		struct rte_mbuf **mb;
693 		int i;
694 
695 		mb = rq_sop->free_mbufs;
696 		for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
697 		     i < ENIC_RX_BURST_MAX; i++)
698 			rte_pktmbuf_free(mb[i]);
699 		rte_free(rq_sop->free_mbufs);
700 		rq_sop->free_mbufs = NULL;
701 		rq_sop->num_free_mbufs = 0;
702 	}
703 
704 	enic_rxmbuf_queue_release(enic, rq_sop);
705 	if (rq_data->in_use)
706 		enic_rxmbuf_queue_release(enic, rq_data);
707 
708 	rte_free(rq_sop->mbuf_ring);
709 	if (rq_data->in_use)
710 		rte_free(rq_data->mbuf_ring);
711 
712 	rq_sop->mbuf_ring = NULL;
713 	rq_data->mbuf_ring = NULL;
714 
715 	vnic_rq_free(rq_sop);
716 	if (rq_data->in_use)
717 		vnic_rq_free(rq_data);
718 
719 	vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
720 
721 	rq_sop->in_use = 0;
722 	rq_data->in_use = 0;
723 }
724 
725 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
726 {
727 	struct rte_eth_dev *eth_dev = enic->rte_dev;
728 	vnic_wq_enable(&enic->wq[queue_idx]);
729 	eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
730 }
731 
732 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
733 {
734 	struct rte_eth_dev *eth_dev = enic->rte_dev;
735 	int ret;
736 
737 	ret = vnic_wq_disable(&enic->wq[queue_idx]);
738 	if (ret)
739 		return ret;
740 
741 	eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
742 	return 0;
743 }
744 
745 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
746 {
747 	struct vnic_rq *rq_sop;
748 	struct vnic_rq *rq_data;
749 	rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
750 	rq_data = &enic->rq[rq_sop->data_queue_idx];
751 	struct rte_eth_dev *eth_dev = enic->rte_dev;
752 
753 	if (rq_data->in_use) {
754 		vnic_rq_enable(rq_data);
755 		enic_initial_post_rx(enic, rq_data);
756 	}
757 	rte_mb();
758 	vnic_rq_enable(rq_sop);
759 	enic_initial_post_rx(enic, rq_sop);
760 	eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
761 }
762 
763 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
764 {
765 	int ret1 = 0, ret2 = 0;
766 	struct rte_eth_dev *eth_dev = enic->rte_dev;
767 	struct vnic_rq *rq_sop;
768 	struct vnic_rq *rq_data;
769 	rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
770 	rq_data = &enic->rq[rq_sop->data_queue_idx];
771 
772 	ret2 = vnic_rq_disable(rq_sop);
773 	rte_mb();
774 	if (rq_data->in_use)
775 		ret1 = vnic_rq_disable(rq_data);
776 
777 	if (ret2)
778 		return ret2;
779 	else if (ret1)
780 		return ret1;
781 
782 	eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
783 	return 0;
784 }
785 
786 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
787 	unsigned int socket_id, struct rte_mempool *mp,
788 	uint16_t nb_desc, uint16_t free_thresh)
789 {
790 	int rc;
791 	uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
792 	uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx);
793 	struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
794 	struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
795 	unsigned int mbuf_size, mbufs_per_pkt;
796 	unsigned int nb_sop_desc, nb_data_desc;
797 	uint16_t min_sop, max_sop, min_data, max_data;
798 	uint32_t max_rx_pkt_len;
799 
800 	rq_sop->is_sop = 1;
801 	rq_sop->data_queue_idx = data_queue_idx;
802 	rq_data->is_sop = 0;
803 	rq_data->data_queue_idx = 0;
804 	rq_sop->socket_id = socket_id;
805 	rq_sop->mp = mp;
806 	rq_data->socket_id = socket_id;
807 	rq_data->mp = mp;
808 	rq_sop->in_use = 1;
809 	rq_sop->rx_free_thresh = free_thresh;
810 	rq_data->rx_free_thresh = free_thresh;
811 	dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
812 		  free_thresh);
813 
814 	mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
815 			       RTE_PKTMBUF_HEADROOM);
816 	/* max_rx_pkt_len includes the ethernet header and CRC. */
817 	max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
818 
819 	if (enic->rte_dev->data->dev_conf.rxmode.offloads &
820 	    DEV_RX_OFFLOAD_SCATTER) {
821 		dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
822 		/* ceil((max pkt len)/mbuf_size) */
823 		mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
824 	} else {
825 		dev_info(enic, "Scatter rx mode disabled\n");
826 		mbufs_per_pkt = 1;
827 		if (max_rx_pkt_len > mbuf_size) {
828 			dev_warning(enic, "The maximum Rx packet size (%u) is"
829 				    " larger than the mbuf size (%u), and"
830 				    " scatter is disabled. Larger packets will"
831 				    " be truncated.\n",
832 				    max_rx_pkt_len, mbuf_size);
833 		}
834 	}
835 
836 	if (mbufs_per_pkt > 1) {
837 		dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
838 		rq_sop->data_queue_enable = 1;
839 		rq_data->in_use = 1;
840 		/*
841 		 * HW does not directly support rxmode.max_rx_pkt_len. HW always
842 		 * receives packet sizes up to the "max" MTU.
843 		 * If not using scatter, we can achieve the effect of dropping
844 		 * larger packets by reducing the size of posted buffers.
845 		 * See enic_alloc_rx_queue_mbufs().
846 		 */
847 		if (max_rx_pkt_len <
848 		    enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
849 			dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
850 				    " when scatter rx mode is in use.\n");
851 		}
852 	} else {
853 		dev_info(enic, "Rq %u Scatter rx mode not being used\n",
854 			 queue_idx);
855 		rq_sop->data_queue_enable = 0;
856 		rq_data->in_use = 0;
857 	}
858 
859 	/* number of descriptors have to be a multiple of 32 */
860 	nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
861 	nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
862 
863 	rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
864 	rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
865 
866 	if (mbufs_per_pkt > 1) {
867 		min_sop = ENIC_RX_BURST_MAX;
868 		max_sop = ((enic->config.rq_desc_count /
869 			    (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
870 		min_data = min_sop * (mbufs_per_pkt - 1);
871 		max_data = enic->config.rq_desc_count;
872 	} else {
873 		min_sop = ENIC_RX_BURST_MAX;
874 		max_sop = enic->config.rq_desc_count;
875 		min_data = 0;
876 		max_data = 0;
877 	}
878 
879 	if (nb_desc < (min_sop + min_data)) {
880 		dev_warning(enic,
881 			    "Number of rx descs too low, adjusting to minimum\n");
882 		nb_sop_desc = min_sop;
883 		nb_data_desc = min_data;
884 	} else if (nb_desc > (max_sop + max_data)) {
885 		dev_warning(enic,
886 			    "Number of rx_descs too high, adjusting to maximum\n");
887 		nb_sop_desc = max_sop;
888 		nb_data_desc = max_data;
889 	}
890 	if (mbufs_per_pkt > 1) {
891 		dev_info(enic, "For max packet size %u and mbuf size %u valid"
892 			 " rx descriptor range is %u to %u\n",
893 			 max_rx_pkt_len, mbuf_size, min_sop + min_data,
894 			 max_sop + max_data);
895 	}
896 	dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
897 		 nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
898 
899 	/* Allocate sop queue resources */
900 	rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
901 		nb_sop_desc, sizeof(struct rq_enet_desc));
902 	if (rc) {
903 		dev_err(enic, "error in allocation of sop rq\n");
904 		goto err_exit;
905 	}
906 	nb_sop_desc = rq_sop->ring.desc_count;
907 
908 	if (rq_data->in_use) {
909 		/* Allocate data queue resources */
910 		rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
911 				   nb_data_desc,
912 				   sizeof(struct rq_enet_desc));
913 		if (rc) {
914 			dev_err(enic, "error in allocation of data rq\n");
915 			goto err_free_rq_sop;
916 		}
917 		nb_data_desc = rq_data->ring.desc_count;
918 	}
919 	rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
920 			   socket_id, nb_sop_desc + nb_data_desc,
921 			   sizeof(struct cq_enet_rq_desc));
922 	if (rc) {
923 		dev_err(enic, "error in allocation of cq for rq\n");
924 		goto err_free_rq_data;
925 	}
926 
927 	/* Allocate the mbuf rings */
928 	rq_sop->mbuf_ring = (struct rte_mbuf **)
929 		rte_zmalloc_socket("rq->mbuf_ring",
930 				   sizeof(struct rte_mbuf *) * nb_sop_desc,
931 				   RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
932 	if (rq_sop->mbuf_ring == NULL)
933 		goto err_free_cq;
934 
935 	if (rq_data->in_use) {
936 		rq_data->mbuf_ring = (struct rte_mbuf **)
937 			rte_zmalloc_socket("rq->mbuf_ring",
938 				sizeof(struct rte_mbuf *) * nb_data_desc,
939 				RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
940 		if (rq_data->mbuf_ring == NULL)
941 			goto err_free_sop_mbuf;
942 	}
943 
944 	rq_sop->free_mbufs = (struct rte_mbuf **)
945 		rte_zmalloc_socket("rq->free_mbufs",
946 				   sizeof(struct rte_mbuf *) *
947 				   ENIC_RX_BURST_MAX,
948 				   RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
949 	if (rq_sop->free_mbufs == NULL)
950 		goto err_free_data_mbuf;
951 	rq_sop->num_free_mbufs = 0;
952 
953 	rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
954 
955 	return 0;
956 
957 err_free_data_mbuf:
958 	rte_free(rq_data->mbuf_ring);
959 err_free_sop_mbuf:
960 	rte_free(rq_sop->mbuf_ring);
961 err_free_cq:
962 	/* cleanup on error */
963 	vnic_cq_free(&enic->cq[queue_idx]);
964 err_free_rq_data:
965 	if (rq_data->in_use)
966 		vnic_rq_free(rq_data);
967 err_free_rq_sop:
968 	vnic_rq_free(rq_sop);
969 err_exit:
970 	return -ENOMEM;
971 }
972 
973 void enic_free_wq(void *txq)
974 {
975 	struct vnic_wq *wq;
976 	struct enic *enic;
977 
978 	if (txq == NULL)
979 		return;
980 
981 	wq = (struct vnic_wq *)txq;
982 	enic = vnic_dev_priv(wq->vdev);
983 	rte_memzone_free(wq->cqmsg_rz);
984 	vnic_wq_free(wq);
985 	vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
986 }
987 
988 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
989 	unsigned int socket_id, uint16_t nb_desc)
990 {
991 	int err;
992 	struct vnic_wq *wq = &enic->wq[queue_idx];
993 	unsigned int cq_index = enic_cq_wq(enic, queue_idx);
994 	char name[NAME_MAX];
995 	static int instance;
996 
997 	wq->socket_id = socket_id;
998 	/*
999 	 * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
1000 	 * print an info message for diagnostics.
1001 	 */
1002 	dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
1003 
1004 	/* Allocate queue resources */
1005 	err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
1006 		nb_desc,
1007 		sizeof(struct wq_enet_desc));
1008 	if (err) {
1009 		dev_err(enic, "error in allocation of wq\n");
1010 		return err;
1011 	}
1012 
1013 	err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
1014 		socket_id, nb_desc,
1015 		sizeof(struct cq_enet_wq_desc));
1016 	if (err) {
1017 		vnic_wq_free(wq);
1018 		dev_err(enic, "error in allocation of cq for wq\n");
1019 	}
1020 
1021 	/* setup up CQ message */
1022 	snprintf((char *)name, sizeof(name),
1023 		 "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
1024 		instance++);
1025 
1026 	wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
1027 			sizeof(uint32_t), SOCKET_ID_ANY,
1028 			RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
1029 	if (!wq->cqmsg_rz)
1030 		return -ENOMEM;
1031 
1032 	return err;
1033 }
1034 
1035 int enic_disable(struct enic *enic)
1036 {
1037 	unsigned int i;
1038 	int err;
1039 
1040 	for (i = 0; i < enic->intr_count; i++) {
1041 		vnic_intr_mask(&enic->intr[i]);
1042 		(void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1043 	}
1044 	enic_rxq_intr_deinit(enic);
1045 	rte_intr_disable(&enic->pdev->intr_handle);
1046 	rte_intr_callback_unregister(&enic->pdev->intr_handle,
1047 				     enic_intr_handler,
1048 				     (void *)enic->rte_dev);
1049 
1050 	vnic_dev_disable(enic->vdev);
1051 
1052 	enic_clsf_destroy(enic);
1053 
1054 	if (!enic_is_sriov_vf(enic))
1055 		vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1056 
1057 	for (i = 0; i < enic->wq_count; i++) {
1058 		err = vnic_wq_disable(&enic->wq[i]);
1059 		if (err)
1060 			return err;
1061 	}
1062 	for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1063 		if (enic->rq[i].in_use) {
1064 			err = vnic_rq_disable(&enic->rq[i]);
1065 			if (err)
1066 				return err;
1067 		}
1068 	}
1069 
1070 	/* If we were using interrupts, set the interrupt vector to -1
1071 	 * to disable interrupts.  We are not disabling link notifcations,
1072 	 * though, as we want the polling of link status to continue working.
1073 	 */
1074 	if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1075 		vnic_dev_notify_set(enic->vdev, -1);
1076 
1077 	vnic_dev_set_reset_flag(enic->vdev, 1);
1078 
1079 	for (i = 0; i < enic->wq_count; i++)
1080 		vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1081 
1082 	for (i = 0; i < enic_vnic_rq_count(enic); i++)
1083 		if (enic->rq[i].in_use)
1084 			vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1085 	for (i = 0; i < enic->cq_count; i++)
1086 		vnic_cq_clean(&enic->cq[i]);
1087 	for (i = 0; i < enic->intr_count; i++)
1088 		vnic_intr_clean(&enic->intr[i]);
1089 
1090 	return 0;
1091 }
1092 
1093 static int enic_dev_wait(struct vnic_dev *vdev,
1094 	int (*start)(struct vnic_dev *, int),
1095 	int (*finished)(struct vnic_dev *, int *),
1096 	int arg)
1097 {
1098 	int done;
1099 	int err;
1100 	int i;
1101 
1102 	err = start(vdev, arg);
1103 	if (err)
1104 		return err;
1105 
1106 	/* Wait for func to complete...2 seconds max */
1107 	for (i = 0; i < 2000; i++) {
1108 		err = finished(vdev, &done);
1109 		if (err)
1110 			return err;
1111 		if (done)
1112 			return 0;
1113 		usleep(1000);
1114 	}
1115 	return -ETIMEDOUT;
1116 }
1117 
1118 static int enic_dev_open(struct enic *enic)
1119 {
1120 	int err;
1121 	int flags = CMD_OPENF_IG_DESCCACHE;
1122 
1123 	err = enic_dev_wait(enic->vdev, vnic_dev_open,
1124 		vnic_dev_open_done, flags);
1125 	if (err)
1126 		dev_err(enic_get_dev(enic),
1127 			"vNIC device open failed, err %d\n", err);
1128 
1129 	return err;
1130 }
1131 
1132 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1133 {
1134 	dma_addr_t rss_key_buf_pa;
1135 	union vnic_rss_key *rss_key_buf_va = NULL;
1136 	int err, i;
1137 	u8 name[NAME_MAX];
1138 
1139 	RTE_ASSERT(user_key != NULL);
1140 	snprintf((char *)name, NAME_MAX, "rss_key-%s", enic->bdf_name);
1141 	rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1142 		&rss_key_buf_pa, name);
1143 	if (!rss_key_buf_va)
1144 		return -ENOMEM;
1145 
1146 	for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1147 		rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1148 
1149 	err = enic_set_rss_key(enic,
1150 		rss_key_buf_pa,
1151 		sizeof(union vnic_rss_key));
1152 
1153 	/* Save for later queries */
1154 	if (!err) {
1155 		rte_memcpy(&enic->rss_key, rss_key_buf_va,
1156 			   sizeof(union vnic_rss_key));
1157 	}
1158 	enic_free_consistent(enic, sizeof(union vnic_rss_key),
1159 		rss_key_buf_va, rss_key_buf_pa);
1160 
1161 	return err;
1162 }
1163 
1164 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1165 {
1166 	dma_addr_t rss_cpu_buf_pa;
1167 	union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1168 	int err;
1169 	u8 name[NAME_MAX];
1170 
1171 	snprintf((char *)name, NAME_MAX, "rss_cpu-%s", enic->bdf_name);
1172 	rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1173 		&rss_cpu_buf_pa, name);
1174 	if (!rss_cpu_buf_va)
1175 		return -ENOMEM;
1176 
1177 	rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1178 
1179 	err = enic_set_rss_cpu(enic,
1180 		rss_cpu_buf_pa,
1181 		sizeof(union vnic_rss_cpu));
1182 
1183 	enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1184 		rss_cpu_buf_va, rss_cpu_buf_pa);
1185 
1186 	/* Save for later queries */
1187 	if (!err)
1188 		rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1189 	return err;
1190 }
1191 
1192 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
1193 	u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
1194 {
1195 	const u8 tso_ipid_split_en = 0;
1196 	int err;
1197 
1198 	err = enic_set_nic_cfg(enic,
1199 		rss_default_cpu, rss_hash_type,
1200 		rss_hash_bits, rss_base_cpu,
1201 		rss_enable, tso_ipid_split_en,
1202 		enic->ig_vlan_strip_en);
1203 
1204 	return err;
1205 }
1206 
1207 /* Initialize RSS with defaults, called from dev_configure */
1208 int enic_init_rss_nic_cfg(struct enic *enic)
1209 {
1210 	static uint8_t default_rss_key[] = {
1211 		85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1212 		80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1213 		76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1214 		69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1215 	};
1216 	struct rte_eth_rss_conf rss_conf;
1217 	union vnic_rss_cpu rss_cpu;
1218 	int ret, i;
1219 
1220 	rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1221 	/*
1222 	 * If setting key for the first time, and the user gives us none, then
1223 	 * push the default key to NIC.
1224 	 */
1225 	if (rss_conf.rss_key == NULL) {
1226 		rss_conf.rss_key = default_rss_key;
1227 		rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1228 	}
1229 	ret = enic_set_rss_conf(enic, &rss_conf);
1230 	if (ret) {
1231 		dev_err(enic, "Failed to configure RSS\n");
1232 		return ret;
1233 	}
1234 	if (enic->rss_enable) {
1235 		/* If enabling RSS, use the default reta */
1236 		for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1237 			rss_cpu.cpu[i / 4].b[i % 4] =
1238 				enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1239 		}
1240 		ret = enic_set_rss_reta(enic, &rss_cpu);
1241 		if (ret)
1242 			dev_err(enic, "Failed to set RSS indirection table\n");
1243 	}
1244 	return ret;
1245 }
1246 
1247 int enic_setup_finish(struct enic *enic)
1248 {
1249 	enic_init_soft_stats(enic);
1250 
1251 	/* Default conf */
1252 	vnic_dev_packet_filter(enic->vdev,
1253 		1 /* directed  */,
1254 		1 /* multicast */,
1255 		1 /* broadcast */,
1256 		0 /* promisc   */,
1257 		1 /* allmulti  */);
1258 
1259 	enic->promisc = 0;
1260 	enic->allmulti = 1;
1261 
1262 	return 0;
1263 }
1264 
1265 static int enic_rss_conf_valid(struct enic *enic,
1266 			       struct rte_eth_rss_conf *rss_conf)
1267 {
1268 	/* RSS is disabled per VIC settings. Ignore rss_conf. */
1269 	if (enic->flow_type_rss_offloads == 0)
1270 		return 0;
1271 	if (rss_conf->rss_key != NULL &&
1272 	    rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1273 		dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1274 			rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1275 		return -EINVAL;
1276 	}
1277 	if (rss_conf->rss_hf != 0 &&
1278 	    (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1279 		dev_err(enic, "Given rss_hf contains none of the supported"
1280 			" types\n");
1281 		return -EINVAL;
1282 	}
1283 	return 0;
1284 }
1285 
1286 /* Set hash type and key according to rss_conf */
1287 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1288 {
1289 	struct rte_eth_dev *eth_dev;
1290 	uint64_t rss_hf;
1291 	u8 rss_hash_type;
1292 	u8 rss_enable;
1293 	int ret;
1294 
1295 	RTE_ASSERT(rss_conf != NULL);
1296 	ret = enic_rss_conf_valid(enic, rss_conf);
1297 	if (ret) {
1298 		dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1299 		return ret;
1300 	}
1301 
1302 	eth_dev = enic->rte_dev;
1303 	rss_hash_type = 0;
1304 	rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1305 	if (enic->rq_count > 1 &&
1306 	    (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1307 	    rss_hf != 0) {
1308 		rss_enable = 1;
1309 		if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1310 			      ETH_RSS_NONFRAG_IPV4_OTHER))
1311 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1312 		if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1313 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1314 		if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1315 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1316 			if (enic->udp_rss_weak) {
1317 				/*
1318 				 * 'TCP' is not a typo. The "weak" version of
1319 				 * UDP RSS requires both the TCP and UDP bits
1320 				 * be set. It does enable TCP RSS as well.
1321 				 */
1322 				rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1323 			}
1324 		}
1325 		if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1326 			      ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1327 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1328 		if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1329 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1330 		if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1331 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1332 			if (enic->udp_rss_weak)
1333 				rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1334 		}
1335 	} else {
1336 		rss_enable = 0;
1337 		rss_hf = 0;
1338 	}
1339 
1340 	/* Set the hash key if provided */
1341 	if (rss_enable && rss_conf->rss_key) {
1342 		ret = enic_set_rsskey(enic, rss_conf->rss_key);
1343 		if (ret) {
1344 			dev_err(enic, "Failed to set RSS key\n");
1345 			return ret;
1346 		}
1347 	}
1348 
1349 	ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1350 			      ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1351 			      rss_enable);
1352 	if (!ret) {
1353 		enic->rss_hf = rss_hf;
1354 		enic->rss_hash_type = rss_hash_type;
1355 		enic->rss_enable = rss_enable;
1356 	} else {
1357 		dev_err(enic, "Failed to update RSS configurations."
1358 			" hash=0x%x\n", rss_hash_type);
1359 	}
1360 	return ret;
1361 }
1362 
1363 int enic_set_vlan_strip(struct enic *enic)
1364 {
1365 	/*
1366 	 * Unfortunately, VLAN strip on/off and RSS on/off are configured
1367 	 * together. So, re-do niccfg, preserving the current RSS settings.
1368 	 */
1369 	return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1370 			       ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1371 			       enic->rss_enable);
1372 }
1373 
1374 int enic_add_packet_filter(struct enic *enic)
1375 {
1376 	/* Args -> directed, multicast, broadcast, promisc, allmulti */
1377 	return vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1378 		enic->promisc, enic->allmulti);
1379 }
1380 
1381 int enic_get_link_status(struct enic *enic)
1382 {
1383 	return vnic_dev_link_status(enic->vdev);
1384 }
1385 
1386 static void enic_dev_deinit(struct enic *enic)
1387 {
1388 	/* stop link status checking */
1389 	vnic_dev_notify_unset(enic->vdev);
1390 
1391 	/* mac_addrs is freed by rte_eth_dev_release_port() */
1392 	rte_free(enic->cq);
1393 	rte_free(enic->intr);
1394 	rte_free(enic->rq);
1395 	rte_free(enic->wq);
1396 }
1397 
1398 
1399 int enic_set_vnic_res(struct enic *enic)
1400 {
1401 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1402 	int rc = 0;
1403 	unsigned int required_rq, required_wq, required_cq, required_intr;
1404 
1405 	/* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1406 	required_rq = eth_dev->data->nb_rx_queues * 2;
1407 	required_wq = eth_dev->data->nb_tx_queues;
1408 	required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1409 	required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1410 	if (eth_dev->data->dev_conf.intr_conf.rxq) {
1411 		required_intr += eth_dev->data->nb_rx_queues;
1412 	}
1413 
1414 	if (enic->conf_rq_count < required_rq) {
1415 		dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1416 			eth_dev->data->nb_rx_queues,
1417 			required_rq, enic->conf_rq_count);
1418 		rc = -EINVAL;
1419 	}
1420 	if (enic->conf_wq_count < required_wq) {
1421 		dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1422 			eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1423 		rc = -EINVAL;
1424 	}
1425 
1426 	if (enic->conf_cq_count < required_cq) {
1427 		dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1428 			required_cq, enic->conf_cq_count);
1429 		rc = -EINVAL;
1430 	}
1431 	if (enic->conf_intr_count < required_intr) {
1432 		dev_err(dev, "Not enough Interrupts to support Rx queue"
1433 			" interrupts. Required:%u, Configured:%u\n",
1434 			required_intr, enic->conf_intr_count);
1435 		rc = -EINVAL;
1436 	}
1437 
1438 	if (rc == 0) {
1439 		enic->rq_count = eth_dev->data->nb_rx_queues;
1440 		enic->wq_count = eth_dev->data->nb_tx_queues;
1441 		enic->cq_count = enic->rq_count + enic->wq_count;
1442 		enic->intr_count = required_intr;
1443 	}
1444 
1445 	return rc;
1446 }
1447 
1448 /* Initialize the completion queue for an RQ */
1449 static int
1450 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1451 {
1452 	struct vnic_rq *sop_rq, *data_rq;
1453 	unsigned int cq_idx;
1454 	int rc = 0;
1455 
1456 	sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1457 	data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)];
1458 	cq_idx = rq_idx;
1459 
1460 	vnic_cq_clean(&enic->cq[cq_idx]);
1461 	vnic_cq_init(&enic->cq[cq_idx],
1462 		     0 /* flow_control_enable */,
1463 		     1 /* color_enable */,
1464 		     0 /* cq_head */,
1465 		     0 /* cq_tail */,
1466 		     1 /* cq_tail_color */,
1467 		     0 /* interrupt_enable */,
1468 		     1 /* cq_entry_enable */,
1469 		     0 /* cq_message_enable */,
1470 		     0 /* interrupt offset */,
1471 		     0 /* cq_message_addr */);
1472 
1473 
1474 	vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1475 			   enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1476 			   sop_rq->ring.desc_count - 1, 1, 0);
1477 	if (data_rq->in_use) {
1478 		vnic_rq_init_start(data_rq,
1479 				   enic_cq_rq(enic,
1480 				   enic_rte_rq_idx_to_data_idx(rq_idx)), 0,
1481 				   data_rq->ring.desc_count - 1, 1, 0);
1482 	}
1483 
1484 	rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1485 	if (rc)
1486 		return rc;
1487 
1488 	if (data_rq->in_use) {
1489 		rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1490 		if (rc) {
1491 			enic_rxmbuf_queue_release(enic, sop_rq);
1492 			return rc;
1493 		}
1494 	}
1495 
1496 	return 0;
1497 }
1498 
1499 /* The Cisco NIC can send and receive packets up to a max packet size
1500  * determined by the NIC type and firmware. There is also an MTU
1501  * configured into the NIC via the CIMC/UCSM management interface
1502  * which can be overridden by this function (up to the max packet size).
1503  * Depending on the network setup, doing so may cause packet drops
1504  * and unexpected behavior.
1505  */
1506 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1507 {
1508 	unsigned int rq_idx;
1509 	struct vnic_rq *rq;
1510 	int rc = 0;
1511 	uint16_t old_mtu;	/* previous setting */
1512 	uint16_t config_mtu;	/* Value configured into NIC via CIMC/UCSM */
1513 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1514 
1515 	old_mtu = eth_dev->data->mtu;
1516 	config_mtu = enic->config.mtu;
1517 
1518 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1519 		return -E_RTE_SECONDARY;
1520 
1521 	if (new_mtu > enic->max_mtu) {
1522 		dev_err(enic,
1523 			"MTU not updated: requested (%u) greater than max (%u)\n",
1524 			new_mtu, enic->max_mtu);
1525 		return -EINVAL;
1526 	}
1527 	if (new_mtu < ENIC_MIN_MTU) {
1528 		dev_info(enic,
1529 			"MTU not updated: requested (%u) less than min (%u)\n",
1530 			new_mtu, ENIC_MIN_MTU);
1531 		return -EINVAL;
1532 	}
1533 	if (new_mtu > config_mtu)
1534 		dev_warning(enic,
1535 			"MTU (%u) is greater than value configured in NIC (%u)\n",
1536 			new_mtu, config_mtu);
1537 
1538 	/* Update the MTU and maximum packet length */
1539 	eth_dev->data->mtu = new_mtu;
1540 	eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1541 		enic_mtu_to_max_rx_pktlen(new_mtu);
1542 
1543 	/*
1544 	 * If the device has not started (enic_enable), nothing to do.
1545 	 * Later, enic_enable() will set up RQs reflecting the new maximum
1546 	 * packet length.
1547 	 */
1548 	if (!eth_dev->data->dev_started)
1549 		goto set_mtu_done;
1550 
1551 	/*
1552 	 * The device has started, re-do RQs on the fly. In the process, we
1553 	 * pick up the new maximum packet length.
1554 	 *
1555 	 * Some applications rely on the ability to change MTU without stopping
1556 	 * the device. So keep this behavior for now.
1557 	 */
1558 	rte_spinlock_lock(&enic->mtu_lock);
1559 
1560 	/* Stop traffic on all RQs */
1561 	for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1562 		rq = &enic->rq[rq_idx];
1563 		if (rq->is_sop && rq->in_use) {
1564 			rc = enic_stop_rq(enic,
1565 					  enic_sop_rq_idx_to_rte_idx(rq_idx));
1566 			if (rc) {
1567 				dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1568 				goto set_mtu_done;
1569 			}
1570 		}
1571 	}
1572 
1573 	/* replace Rx function with a no-op to avoid getting stale pkts */
1574 	eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1575 	rte_mb();
1576 
1577 	/* Allow time for threads to exit the real Rx function. */
1578 	usleep(100000);
1579 
1580 	/* now it is safe to reconfigure the RQs */
1581 
1582 
1583 	/* free and reallocate RQs with the new MTU */
1584 	for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1585 		rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1586 		if (!rq->in_use)
1587 			continue;
1588 
1589 		enic_free_rq(rq);
1590 		rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1591 				   rq->tot_nb_desc, rq->rx_free_thresh);
1592 		if (rc) {
1593 			dev_err(enic,
1594 				"Fatal MTU alloc error- No traffic will pass\n");
1595 			goto set_mtu_done;
1596 		}
1597 
1598 		rc = enic_reinit_rq(enic, rq_idx);
1599 		if (rc) {
1600 			dev_err(enic,
1601 				"Fatal MTU RQ reinit- No traffic will pass\n");
1602 			goto set_mtu_done;
1603 		}
1604 	}
1605 
1606 	/* put back the real receive function */
1607 	rte_mb();
1608 	pick_rx_handler(enic);
1609 	rte_mb();
1610 
1611 	/* restart Rx traffic */
1612 	for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1613 		rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1614 		if (rq->is_sop && rq->in_use)
1615 			enic_start_rq(enic, rq_idx);
1616 	}
1617 
1618 set_mtu_done:
1619 	dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1620 	rte_spinlock_unlock(&enic->mtu_lock);
1621 	return rc;
1622 }
1623 
1624 static int enic_dev_init(struct enic *enic)
1625 {
1626 	int err;
1627 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1628 
1629 	vnic_dev_intr_coal_timer_info_default(enic->vdev);
1630 
1631 	/* Get vNIC configuration
1632 	*/
1633 	err = enic_get_vnic_config(enic);
1634 	if (err) {
1635 		dev_err(dev, "Get vNIC configuration failed, aborting\n");
1636 		return err;
1637 	}
1638 
1639 	/* Get available resource counts */
1640 	enic_get_res_counts(enic);
1641 	if (enic->conf_rq_count == 1) {
1642 		dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1643 		dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1644 		dev_err(enic, "See the ENIC PMD guide for more information.\n");
1645 		return -EINVAL;
1646 	}
1647 	/* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1648 	enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1649 			       enic->conf_cq_count, 8);
1650 	enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1651 				 enic->conf_intr_count, 8);
1652 	enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1653 			       enic->conf_rq_count, 8);
1654 	enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1655 			       enic->conf_wq_count, 8);
1656 	if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1657 		dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1658 		return -1;
1659 	}
1660 	if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1661 		dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1662 		return -1;
1663 	}
1664 	if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1665 		dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1666 		return -1;
1667 	}
1668 	if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1669 		dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1670 		return -1;
1671 	}
1672 
1673 	/* Get the supported filters */
1674 	enic_fdir_info(enic);
1675 
1676 	eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr",
1677 					sizeof(struct rte_ether_addr) *
1678 					ENIC_UNICAST_PERFECT_FILTERS, 0);
1679 	if (!eth_dev->data->mac_addrs) {
1680 		dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1681 		return -1;
1682 	}
1683 	rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr,
1684 			eth_dev->data->mac_addrs);
1685 
1686 	vnic_dev_set_reset_flag(enic->vdev, 0);
1687 
1688 	LIST_INIT(&enic->flows);
1689 
1690 	/* set up link status checking */
1691 	vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1692 
1693 	enic->overlay_offload = false;
1694 	if (enic->disable_overlay && enic->vxlan) {
1695 		/*
1696 		 * Explicitly disable overlay offload as the setting is
1697 		 * sticky, and resetting vNIC does not disable it.
1698 		 */
1699 		if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1700 						  OVERLAY_FEATURE_VXLAN,
1701 						  OVERLAY_OFFLOAD_DISABLE)) {
1702 			dev_err(enic, "failed to disable overlay offload\n");
1703 		} else {
1704 			dev_info(enic, "Overlay offload is disabled\n");
1705 		}
1706 	}
1707 	if (!enic->disable_overlay && enic->vxlan &&
1708 	    /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1709 	    vnic_dev_overlay_offload_ctrl(enic->vdev,
1710 					  OVERLAY_FEATURE_VXLAN,
1711 					  OVERLAY_OFFLOAD_ENABLE) == 0) {
1712 		enic->tx_offload_capa |=
1713 			DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1714 			DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1715 			DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1716 		enic->tx_offload_mask |=
1717 			PKT_TX_OUTER_IPV6 |
1718 			PKT_TX_OUTER_IPV4 |
1719 			PKT_TX_OUTER_IP_CKSUM |
1720 			PKT_TX_TUNNEL_MASK;
1721 		enic->overlay_offload = true;
1722 		dev_info(enic, "Overlay offload is enabled\n");
1723 	}
1724 	/*
1725 	 * Reset the vxlan port if HW vxlan parsing is available. It
1726 	 * is always enabled regardless of overlay offload
1727 	 * enable/disable.
1728 	 */
1729 	if (enic->vxlan) {
1730 		enic->vxlan_port = ENIC_DEFAULT_VXLAN_PORT;
1731 		/*
1732 		 * Reset the vxlan port to the default, as the NIC firmware
1733 		 * does not reset it automatically and keeps the old setting.
1734 		 */
1735 		if (vnic_dev_overlay_offload_cfg(enic->vdev,
1736 						 OVERLAY_CFG_VXLAN_PORT_UPDATE,
1737 						 ENIC_DEFAULT_VXLAN_PORT)) {
1738 			dev_err(enic, "failed to update vxlan port\n");
1739 			return -EINVAL;
1740 		}
1741 	}
1742 
1743 	return 0;
1744 
1745 }
1746 
1747 int enic_probe(struct enic *enic)
1748 {
1749 	struct rte_pci_device *pdev = enic->pdev;
1750 	int err = -1;
1751 
1752 	dev_debug(enic, "Initializing ENIC PMD\n");
1753 
1754 	/* if this is a secondary process the hardware is already initialized */
1755 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1756 		return 0;
1757 
1758 	enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1759 	enic->bar0.len = pdev->mem_resource[0].len;
1760 
1761 	/* Register vNIC device */
1762 	enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1763 	if (!enic->vdev) {
1764 		dev_err(enic, "vNIC registration failed, aborting\n");
1765 		goto err_out;
1766 	}
1767 
1768 	LIST_INIT(&enic->memzone_list);
1769 	rte_spinlock_init(&enic->memzone_list_lock);
1770 
1771 	vnic_register_cbacks(enic->vdev,
1772 		enic_alloc_consistent,
1773 		enic_free_consistent);
1774 
1775 	/*
1776 	 * Allocate the consistent memory for stats upfront so both primary and
1777 	 * secondary processes can dump stats.
1778 	 */
1779 	err = vnic_dev_alloc_stats_mem(enic->vdev);
1780 	if (err) {
1781 		dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1782 		goto err_out_unregister;
1783 	}
1784 	/* Issue device open to get device in known state */
1785 	err = enic_dev_open(enic);
1786 	if (err) {
1787 		dev_err(enic, "vNIC dev open failed, aborting\n");
1788 		goto err_out_unregister;
1789 	}
1790 
1791 	/* Set ingress vlan rewrite mode before vnic initialization */
1792 	dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1793 		  enic->ig_vlan_rewrite_mode);
1794 	err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1795 		enic->ig_vlan_rewrite_mode);
1796 	if (err) {
1797 		dev_err(enic,
1798 			"Failed to set ingress vlan rewrite mode, aborting.\n");
1799 		goto err_out_dev_close;
1800 	}
1801 
1802 	/* Issue device init to initialize the vnic-to-switch link.
1803 	 * We'll start with carrier off and wait for link UP
1804 	 * notification later to turn on carrier.  We don't need
1805 	 * to wait here for the vnic-to-switch link initialization
1806 	 * to complete; link UP notification is the indication that
1807 	 * the process is complete.
1808 	 */
1809 
1810 	err = vnic_dev_init(enic->vdev, 0);
1811 	if (err) {
1812 		dev_err(enic, "vNIC dev init failed, aborting\n");
1813 		goto err_out_dev_close;
1814 	}
1815 
1816 	err = enic_dev_init(enic);
1817 	if (err) {
1818 		dev_err(enic, "Device initialization failed, aborting\n");
1819 		goto err_out_dev_close;
1820 	}
1821 
1822 	return 0;
1823 
1824 err_out_dev_close:
1825 	vnic_dev_close(enic->vdev);
1826 err_out_unregister:
1827 	vnic_dev_unregister(enic->vdev);
1828 err_out:
1829 	return err;
1830 }
1831 
1832 void enic_remove(struct enic *enic)
1833 {
1834 	enic_dev_deinit(enic);
1835 	vnic_dev_close(enic->vdev);
1836 	vnic_dev_unregister(enic->vdev);
1837 }
1838