xref: /dpdk/drivers/net/enic/enic_main.c (revision f5057be340e44f3edc0fe90fa875eb89a4c49b4f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5 
6 #include <stdio.h>
7 
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 
12 #include <rte_pci.h>
13 #include <rte_bus_pci.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_string_fns.h>
18 #include <rte_ethdev_driver.h>
19 
20 #include "enic_compat.h"
21 #include "enic.h"
22 #include "wq_enet_desc.h"
23 #include "rq_enet_desc.h"
24 #include "cq_enet_desc.h"
25 #include "vnic_enet.h"
26 #include "vnic_dev.h"
27 #include "vnic_wq.h"
28 #include "vnic_rq.h"
29 #include "vnic_cq.h"
30 #include "vnic_intr.h"
31 #include "vnic_nic.h"
32 
33 static inline int enic_is_sriov_vf(struct enic *enic)
34 {
35 	return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
36 }
37 
38 static int is_zero_addr(uint8_t *addr)
39 {
40 	return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
41 }
42 
43 static int is_mcast_addr(uint8_t *addr)
44 {
45 	return addr[0] & 1;
46 }
47 
48 static int is_eth_addr_valid(uint8_t *addr)
49 {
50 	return !is_mcast_addr(addr) && !is_zero_addr(addr);
51 }
52 
53 void
54 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
55 {
56 	uint16_t i;
57 
58 	if (!rq || !rq->mbuf_ring) {
59 		dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
60 		return;
61 	}
62 
63 	for (i = 0; i < rq->ring.desc_count; i++) {
64 		if (rq->mbuf_ring[i]) {
65 			rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
66 			rq->mbuf_ring[i] = NULL;
67 		}
68 	}
69 }
70 
71 void enic_free_wq_buf(struct rte_mbuf **buf)
72 {
73 	struct rte_mbuf *mbuf = *buf;
74 
75 	rte_pktmbuf_free_seg(mbuf);
76 	*buf = NULL;
77 }
78 
79 static void enic_log_q_error(struct enic *enic)
80 {
81 	unsigned int i;
82 	uint32_t error_status;
83 
84 	for (i = 0; i < enic->wq_count; i++) {
85 		error_status = vnic_wq_error_status(&enic->wq[i]);
86 		if (error_status)
87 			dev_err(enic, "WQ[%d] error_status %d\n", i,
88 				error_status);
89 	}
90 
91 	for (i = 0; i < enic_vnic_rq_count(enic); i++) {
92 		if (!enic->rq[i].in_use)
93 			continue;
94 		error_status = vnic_rq_error_status(&enic->rq[i]);
95 		if (error_status)
96 			dev_err(enic, "RQ[%d] error_status %d\n", i,
97 				error_status);
98 	}
99 }
100 
101 static void enic_clear_soft_stats(struct enic *enic)
102 {
103 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
104 	rte_atomic64_clear(&soft_stats->rx_nombuf);
105 	rte_atomic64_clear(&soft_stats->rx_packet_errors);
106 	rte_atomic64_clear(&soft_stats->tx_oversized);
107 }
108 
109 static void enic_init_soft_stats(struct enic *enic)
110 {
111 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
112 	rte_atomic64_init(&soft_stats->rx_nombuf);
113 	rte_atomic64_init(&soft_stats->rx_packet_errors);
114 	rte_atomic64_init(&soft_stats->tx_oversized);
115 	enic_clear_soft_stats(enic);
116 }
117 
118 int enic_dev_stats_clear(struct enic *enic)
119 {
120 	int ret;
121 
122 	ret = vnic_dev_stats_clear(enic->vdev);
123 	if (ret != 0) {
124 		dev_err(enic, "Error in clearing stats\n");
125 		return ret;
126 	}
127 	enic_clear_soft_stats(enic);
128 
129 	return 0;
130 }
131 
132 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
133 {
134 	struct vnic_stats *stats;
135 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
136 	int64_t rx_truncated;
137 	uint64_t rx_packet_errors;
138 	int ret = vnic_dev_stats_dump(enic->vdev, &stats);
139 
140 	if (ret) {
141 		dev_err(enic, "Error in getting stats\n");
142 		return ret;
143 	}
144 
145 	/* The number of truncated packets can only be calculated by
146 	 * subtracting a hardware counter from error packets received by
147 	 * the driver. Note: this causes transient inaccuracies in the
148 	 * ipackets count. Also, the length of truncated packets are
149 	 * counted in ibytes even though truncated packets are dropped
150 	 * which can make ibytes be slightly higher than it should be.
151 	 */
152 	rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
153 	rx_truncated = rx_packet_errors - stats->rx.rx_errors;
154 
155 	r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
156 	r_stats->opackets = stats->tx.tx_frames_ok;
157 
158 	r_stats->ibytes = stats->rx.rx_bytes_ok;
159 	r_stats->obytes = stats->tx.tx_bytes_ok;
160 
161 	r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
162 	r_stats->oerrors = stats->tx.tx_errors
163 			   + rte_atomic64_read(&soft_stats->tx_oversized);
164 
165 	r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
166 
167 	r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
168 	return 0;
169 }
170 
171 int enic_del_mac_address(struct enic *enic, int mac_index)
172 {
173 	struct rte_eth_dev *eth_dev = enic->rte_dev;
174 	uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
175 
176 	return vnic_dev_del_addr(enic->vdev, mac_addr);
177 }
178 
179 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
180 {
181 	int err;
182 
183 	if (!is_eth_addr_valid(mac_addr)) {
184 		dev_err(enic, "invalid mac address\n");
185 		return -EINVAL;
186 	}
187 
188 	err = vnic_dev_add_addr(enic->vdev, mac_addr);
189 	if (err)
190 		dev_err(enic, "add mac addr failed\n");
191 	return err;
192 }
193 
194 void enic_free_rq_buf(struct rte_mbuf **mbuf)
195 {
196 	if (*mbuf == NULL)
197 		return;
198 
199 	rte_pktmbuf_free(*mbuf);
200 	*mbuf = NULL;
201 }
202 
203 void enic_init_vnic_resources(struct enic *enic)
204 {
205 	unsigned int error_interrupt_enable = 1;
206 	unsigned int error_interrupt_offset = 0;
207 	unsigned int rxq_interrupt_enable = 0;
208 	unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
209 	unsigned int index = 0;
210 	unsigned int cq_idx;
211 	struct vnic_rq *data_rq;
212 
213 	if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
214 		rxq_interrupt_enable = 1;
215 
216 	for (index = 0; index < enic->rq_count; index++) {
217 		cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
218 
219 		vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
220 			cq_idx,
221 			error_interrupt_enable,
222 			error_interrupt_offset);
223 
224 		data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)];
225 		if (data_rq->in_use)
226 			vnic_rq_init(data_rq,
227 				     cq_idx,
228 				     error_interrupt_enable,
229 				     error_interrupt_offset);
230 		vnic_cq_init(&enic->cq[cq_idx],
231 			0 /* flow_control_enable */,
232 			1 /* color_enable */,
233 			0 /* cq_head */,
234 			0 /* cq_tail */,
235 			1 /* cq_tail_color */,
236 			rxq_interrupt_enable,
237 			1 /* cq_entry_enable */,
238 			0 /* cq_message_enable */,
239 			rxq_interrupt_offset,
240 			0 /* cq_message_addr */);
241 		if (rxq_interrupt_enable)
242 			rxq_interrupt_offset++;
243 	}
244 
245 	for (index = 0; index < enic->wq_count; index++) {
246 		vnic_wq_init(&enic->wq[index],
247 			enic_cq_wq(enic, index),
248 			error_interrupt_enable,
249 			error_interrupt_offset);
250 		/* Compute unsupported ol flags for enic_prep_pkts() */
251 		enic->wq[index].tx_offload_notsup_mask =
252 			PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
253 
254 		cq_idx = enic_cq_wq(enic, index);
255 		vnic_cq_init(&enic->cq[cq_idx],
256 			0 /* flow_control_enable */,
257 			1 /* color_enable */,
258 			0 /* cq_head */,
259 			0 /* cq_tail */,
260 			1 /* cq_tail_color */,
261 			0 /* interrupt_enable */,
262 			0 /* cq_entry_enable */,
263 			1 /* cq_message_enable */,
264 			0 /* interrupt offset */,
265 			(uint64_t)enic->wq[index].cqmsg_rz->iova);
266 	}
267 
268 	for (index = 0; index < enic->intr_count; index++) {
269 		vnic_intr_init(&enic->intr[index],
270 			       enic->config.intr_timer_usec,
271 			       enic->config.intr_timer_type,
272 			       /*mask_on_assertion*/1);
273 	}
274 }
275 
276 
277 int
278 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
279 {
280 	struct rte_mbuf *mb;
281 	struct rq_enet_desc *rqd = rq->ring.descs;
282 	unsigned i;
283 	dma_addr_t dma_addr;
284 	uint32_t max_rx_pkt_len;
285 	uint16_t rq_buf_len;
286 
287 	if (!rq->in_use)
288 		return 0;
289 
290 	dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
291 		  rq->ring.desc_count);
292 
293 	/*
294 	 * If *not* using scatter and the mbuf size is greater than the
295 	 * requested max packet size (max_rx_pkt_len), then reduce the
296 	 * posted buffer size to max_rx_pkt_len. HW still receives packets
297 	 * larger than max_rx_pkt_len, but they will be truncated, which we
298 	 * drop in the rx handler. Not ideal, but better than returning
299 	 * large packets when the user is not expecting them.
300 	 */
301 	max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
302 	rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
303 	if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
304 		rq_buf_len = max_rx_pkt_len;
305 	for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
306 		mb = rte_mbuf_raw_alloc(rq->mp);
307 		if (mb == NULL) {
308 			dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
309 			(unsigned)rq->index);
310 			return -ENOMEM;
311 		}
312 
313 		mb->data_off = RTE_PKTMBUF_HEADROOM;
314 		dma_addr = (dma_addr_t)(mb->buf_iova
315 			   + RTE_PKTMBUF_HEADROOM);
316 		rq_enet_desc_enc(rqd, dma_addr,
317 				(rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
318 				: RQ_ENET_TYPE_NOT_SOP),
319 				rq_buf_len);
320 		rq->mbuf_ring[i] = mb;
321 	}
322 	/*
323 	 * Do not post the buffers to the NIC until we enable the RQ via
324 	 * enic_start_rq().
325 	 */
326 	rq->need_initial_post = true;
327 	/* Initialize fetch index while RQ is disabled */
328 	iowrite32(0, &rq->ctrl->fetch_index);
329 	return 0;
330 }
331 
332 /*
333  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
334  * allocated the buffers and filled the RQ descriptor ring. Just need to push
335  * the post index to the NIC.
336  */
337 static void
338 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
339 {
340 	if (!rq->in_use || !rq->need_initial_post)
341 		return;
342 
343 	/* make sure all prior writes are complete before doing the PIO write */
344 	rte_rmb();
345 
346 	/* Post all but the last buffer to VIC. */
347 	rq->posted_index = rq->ring.desc_count - 1;
348 
349 	rq->rx_nb_hold = 0;
350 
351 	dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
352 		enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
353 	iowrite32(rq->posted_index, &rq->ctrl->posted_index);
354 	rte_rmb();
355 	rq->need_initial_post = false;
356 }
357 
358 void *
359 enic_alloc_consistent(void *priv, size_t size,
360 	dma_addr_t *dma_handle, uint8_t *name)
361 {
362 	void *vaddr;
363 	const struct rte_memzone *rz;
364 	*dma_handle = 0;
365 	struct enic *enic = (struct enic *)priv;
366 	struct enic_memzone_entry *mze;
367 
368 	rz = rte_memzone_reserve_aligned((const char *)name, size,
369 			SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
370 	if (!rz) {
371 		pr_err("%s : Failed to allocate memory requested for %s\n",
372 			__func__, name);
373 		return NULL;
374 	}
375 
376 	vaddr = rz->addr;
377 	*dma_handle = (dma_addr_t)rz->iova;
378 
379 	mze = rte_malloc("enic memzone entry",
380 			 sizeof(struct enic_memzone_entry), 0);
381 
382 	if (!mze) {
383 		pr_err("%s : Failed to allocate memory for memzone list\n",
384 		       __func__);
385 		rte_memzone_free(rz);
386 		return NULL;
387 	}
388 
389 	mze->rz = rz;
390 
391 	rte_spinlock_lock(&enic->memzone_list_lock);
392 	LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
393 	rte_spinlock_unlock(&enic->memzone_list_lock);
394 
395 	return vaddr;
396 }
397 
398 void
399 enic_free_consistent(void *priv,
400 		     __rte_unused size_t size,
401 		     void *vaddr,
402 		     dma_addr_t dma_handle)
403 {
404 	struct enic_memzone_entry *mze;
405 	struct enic *enic = (struct enic *)priv;
406 
407 	rte_spinlock_lock(&enic->memzone_list_lock);
408 	LIST_FOREACH(mze, &enic->memzone_list, entries) {
409 		if (mze->rz->addr == vaddr &&
410 		    mze->rz->iova == dma_handle)
411 			break;
412 	}
413 	if (mze == NULL) {
414 		rte_spinlock_unlock(&enic->memzone_list_lock);
415 		dev_warning(enic,
416 			    "Tried to free memory, but couldn't find it in the memzone list\n");
417 		return;
418 	}
419 	LIST_REMOVE(mze, entries);
420 	rte_spinlock_unlock(&enic->memzone_list_lock);
421 	rte_memzone_free(mze->rz);
422 	rte_free(mze);
423 }
424 
425 int enic_link_update(struct rte_eth_dev *eth_dev)
426 {
427 	struct enic *enic = pmd_priv(eth_dev);
428 	struct rte_eth_link link;
429 
430 	memset(&link, 0, sizeof(link));
431 	link.link_status = enic_get_link_status(enic);
432 	link.link_duplex = ETH_LINK_FULL_DUPLEX;
433 	link.link_speed = vnic_dev_port_speed(enic->vdev);
434 
435 	return rte_eth_linkstatus_set(eth_dev, &link);
436 }
437 
438 static void
439 enic_intr_handler(void *arg)
440 {
441 	struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
442 	struct enic *enic = pmd_priv(dev);
443 
444 	vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
445 
446 	enic_link_update(dev);
447 	rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
448 	enic_log_q_error(enic);
449 	/* Re-enable irq in case of INTx */
450 	rte_intr_ack(&enic->pdev->intr_handle);
451 }
452 
453 static int enic_rxq_intr_init(struct enic *enic)
454 {
455 	struct rte_intr_handle *intr_handle;
456 	uint32_t rxq_intr_count, i;
457 	int err;
458 
459 	intr_handle = enic->rte_dev->intr_handle;
460 	if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
461 		return 0;
462 	/*
463 	 * Rx queue interrupts only work when we have MSI-X interrupts,
464 	 * one per queue. Sharing one interrupt is technically
465 	 * possible with VIC, but it is not worth the complications it brings.
466 	 */
467 	if (!rte_intr_cap_multiple(intr_handle)) {
468 		dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
469 			" (vfio-pci driver)\n");
470 		return -ENOTSUP;
471 	}
472 	rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
473 	err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
474 	if (err) {
475 		dev_err(enic, "Failed to enable event fds for Rx queue"
476 			" interrupts\n");
477 		return err;
478 	}
479 	intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
480 					    rxq_intr_count * sizeof(int), 0);
481 	if (intr_handle->intr_vec == NULL) {
482 		dev_err(enic, "Failed to allocate intr_vec\n");
483 		return -ENOMEM;
484 	}
485 	for (i = 0; i < rxq_intr_count; i++)
486 		intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
487 	return 0;
488 }
489 
490 static void enic_rxq_intr_deinit(struct enic *enic)
491 {
492 	struct rte_intr_handle *intr_handle;
493 
494 	intr_handle = enic->rte_dev->intr_handle;
495 	rte_intr_efd_disable(intr_handle);
496 	if (intr_handle->intr_vec != NULL) {
497 		rte_free(intr_handle->intr_vec);
498 		intr_handle->intr_vec = NULL;
499 	}
500 }
501 
502 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
503 {
504 	struct wq_enet_desc *desc;
505 	struct vnic_wq *wq;
506 	unsigned int i;
507 
508 	/*
509 	 * Fill WQ descriptor fields that never change. Every descriptor is
510 	 * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
511 	 * descriptors (i.e. request one completion update every 32 packets).
512 	 */
513 	wq = &enic->wq[queue_idx];
514 	desc = (struct wq_enet_desc *)wq->ring.descs;
515 	for (i = 0; i < wq->ring.desc_count; i++, desc++) {
516 		desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
517 		if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
518 			desc->header_length_flags |=
519 				(1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
520 	}
521 }
522 
523 /*
524  * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
525  * used when that file is not compiled.
526  */
527 __rte_weak bool
528 enic_use_vector_rx_handler(__rte_unused struct rte_eth_dev *eth_dev)
529 {
530 	return false;
531 }
532 
533 void enic_pick_rx_handler(struct rte_eth_dev *eth_dev)
534 {
535 	struct enic *enic = pmd_priv(eth_dev);
536 
537 	/*
538 	 * Preference order:
539 	 * 1. The vectorized handler if possible and requested.
540 	 * 2. The non-scatter, simplified handler if scatter Rx is not used.
541 	 * 3. The default handler as a fallback.
542 	 */
543 	if (enic_use_vector_rx_handler(eth_dev))
544 		return;
545 	if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
546 		ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler");
547 		eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
548 	} else {
549 		ENICPMD_LOG(DEBUG, " use the normal Rx handler");
550 		eth_dev->rx_pkt_burst = &enic_recv_pkts;
551 	}
552 }
553 
554 /* Secondary process uses this to set the Tx handler */
555 void enic_pick_tx_handler(struct rte_eth_dev *eth_dev)
556 {
557 	struct enic *enic = pmd_priv(eth_dev);
558 
559 	if (enic->use_simple_tx_handler) {
560 		ENICPMD_LOG(DEBUG, " use the simple tx handler");
561 		eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
562 	} else {
563 		ENICPMD_LOG(DEBUG, " use the default tx handler");
564 		eth_dev->tx_pkt_burst = &enic_xmit_pkts;
565 	}
566 }
567 
568 int enic_enable(struct enic *enic)
569 {
570 	unsigned int index;
571 	int err;
572 	struct rte_eth_dev *eth_dev = enic->rte_dev;
573 	uint64_t simple_tx_offloads;
574 	uintptr_t p;
575 
576 	if (enic->enable_avx2_rx) {
577 		struct rte_mbuf mb_def = { .buf_addr = 0 };
578 
579 		/*
580 		 * mbuf_initializer contains const-after-init fields of
581 		 * receive mbufs (i.e. 64 bits of fields from rearm_data).
582 		 * It is currently used by the vectorized handler.
583 		 */
584 		mb_def.nb_segs = 1;
585 		mb_def.data_off = RTE_PKTMBUF_HEADROOM;
586 		mb_def.port = enic->port_id;
587 		rte_mbuf_refcnt_set(&mb_def, 1);
588 		rte_compiler_barrier();
589 		p = (uintptr_t)&mb_def.rearm_data;
590 		enic->mbuf_initializer = *(uint64_t *)p;
591 	}
592 
593 	eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
594 	eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
595 
596 	/* vnic notification of link status has already been turned on in
597 	 * enic_dev_init() which is called during probe time.  Here we are
598 	 * just turning on interrupt vector 0 if needed.
599 	 */
600 	if (eth_dev->data->dev_conf.intr_conf.lsc)
601 		vnic_dev_notify_set(enic->vdev, 0);
602 
603 	err = enic_rxq_intr_init(enic);
604 	if (err)
605 		return err;
606 	if (enic_clsf_init(enic))
607 		dev_warning(enic, "Init of hash table for clsf failed."\
608 			"Flow director feature will not work\n");
609 
610 	/* Initialize flowman if not already initialized during probe */
611 	if (enic->fm == NULL && enic_fm_init(enic))
612 		dev_warning(enic, "Init of flowman failed.\n");
613 
614 	for (index = 0; index < enic->rq_count; index++) {
615 		err = enic_alloc_rx_queue_mbufs(enic,
616 			&enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
617 		if (err) {
618 			dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
619 			return err;
620 		}
621 		err = enic_alloc_rx_queue_mbufs(enic,
622 			&enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)]);
623 		if (err) {
624 			/* release the allocated mbufs for the sop rq*/
625 			enic_rxmbuf_queue_release(enic,
626 				&enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
627 
628 			dev_err(enic, "Failed to alloc data RX queue mbufs\n");
629 			return err;
630 		}
631 	}
632 
633 	/*
634 	 * Use the simple TX handler if possible. Only checksum offloads
635 	 * and vlan insertion are supported.
636 	 */
637 	simple_tx_offloads = enic->tx_offload_capa &
638 		(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
639 		 DEV_TX_OFFLOAD_VLAN_INSERT |
640 		 DEV_TX_OFFLOAD_IPV4_CKSUM |
641 		 DEV_TX_OFFLOAD_UDP_CKSUM |
642 		 DEV_TX_OFFLOAD_TCP_CKSUM);
643 	if ((eth_dev->data->dev_conf.txmode.offloads &
644 	     ~simple_tx_offloads) == 0) {
645 		ENICPMD_LOG(DEBUG, " use the simple tx handler");
646 		eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
647 		for (index = 0; index < enic->wq_count; index++)
648 			enic_prep_wq_for_simple_tx(enic, index);
649 		enic->use_simple_tx_handler = 1;
650 	} else {
651 		ENICPMD_LOG(DEBUG, " use the default tx handler");
652 		eth_dev->tx_pkt_burst = &enic_xmit_pkts;
653 	}
654 
655 	enic_pick_rx_handler(eth_dev);
656 
657 	for (index = 0; index < enic->wq_count; index++)
658 		enic_start_wq(enic, index);
659 	for (index = 0; index < enic->rq_count; index++)
660 		enic_start_rq(enic, index);
661 
662 	vnic_dev_add_addr(enic->vdev, enic->mac_addr);
663 
664 	vnic_dev_enable_wait(enic->vdev);
665 
666 	/* Register and enable error interrupt */
667 	rte_intr_callback_register(&(enic->pdev->intr_handle),
668 		enic_intr_handler, (void *)enic->rte_dev);
669 
670 	rte_intr_enable(&(enic->pdev->intr_handle));
671 	/* Unmask LSC interrupt */
672 	vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
673 
674 	return 0;
675 }
676 
677 int enic_alloc_intr_resources(struct enic *enic)
678 {
679 	int err;
680 	unsigned int i;
681 
682 	dev_info(enic, "vNIC resources used:  "\
683 		"wq %d rq %d cq %d intr %d\n",
684 		enic->wq_count, enic_vnic_rq_count(enic),
685 		enic->cq_count, enic->intr_count);
686 
687 	for (i = 0; i < enic->intr_count; i++) {
688 		err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
689 		if (err) {
690 			enic_free_vnic_resources(enic);
691 			return err;
692 		}
693 	}
694 	return 0;
695 }
696 
697 void enic_free_rq(void *rxq)
698 {
699 	struct vnic_rq *rq_sop, *rq_data;
700 	struct enic *enic;
701 
702 	if (rxq == NULL)
703 		return;
704 
705 	rq_sop = (struct vnic_rq *)rxq;
706 	enic = vnic_dev_priv(rq_sop->vdev);
707 	rq_data = &enic->rq[rq_sop->data_queue_idx];
708 
709 	if (rq_sop->free_mbufs) {
710 		struct rte_mbuf **mb;
711 		int i;
712 
713 		mb = rq_sop->free_mbufs;
714 		for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
715 		     i < ENIC_RX_BURST_MAX; i++)
716 			rte_pktmbuf_free(mb[i]);
717 		rte_free(rq_sop->free_mbufs);
718 		rq_sop->free_mbufs = NULL;
719 		rq_sop->num_free_mbufs = 0;
720 	}
721 
722 	enic_rxmbuf_queue_release(enic, rq_sop);
723 	if (rq_data->in_use)
724 		enic_rxmbuf_queue_release(enic, rq_data);
725 
726 	rte_free(rq_sop->mbuf_ring);
727 	if (rq_data->in_use)
728 		rte_free(rq_data->mbuf_ring);
729 
730 	rq_sop->mbuf_ring = NULL;
731 	rq_data->mbuf_ring = NULL;
732 
733 	vnic_rq_free(rq_sop);
734 	if (rq_data->in_use)
735 		vnic_rq_free(rq_data);
736 
737 	vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
738 
739 	rq_sop->in_use = 0;
740 	rq_data->in_use = 0;
741 }
742 
743 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
744 {
745 	struct rte_eth_dev_data *data = enic->dev_data;
746 	vnic_wq_enable(&enic->wq[queue_idx]);
747 	data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
748 }
749 
750 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
751 {
752 	struct rte_eth_dev_data *data = enic->dev_data;
753 	int ret;
754 
755 	ret = vnic_wq_disable(&enic->wq[queue_idx]);
756 	if (ret)
757 		return ret;
758 
759 	data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
760 	return 0;
761 }
762 
763 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
764 {
765 	struct rte_eth_dev_data *data = enic->dev_data;
766 	struct vnic_rq *rq_sop;
767 	struct vnic_rq *rq_data;
768 	rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
769 	rq_data = &enic->rq[rq_sop->data_queue_idx];
770 
771 	if (rq_data->in_use) {
772 		vnic_rq_enable(rq_data);
773 		enic_initial_post_rx(enic, rq_data);
774 	}
775 	rte_mb();
776 	vnic_rq_enable(rq_sop);
777 	enic_initial_post_rx(enic, rq_sop);
778 	data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
779 }
780 
781 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
782 {
783 	struct rte_eth_dev_data *data = enic->dev_data;
784 	int ret1 = 0, ret2 = 0;
785 	struct vnic_rq *rq_sop;
786 	struct vnic_rq *rq_data;
787 	rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
788 	rq_data = &enic->rq[rq_sop->data_queue_idx];
789 
790 	ret2 = vnic_rq_disable(rq_sop);
791 	rte_mb();
792 	if (rq_data->in_use)
793 		ret1 = vnic_rq_disable(rq_data);
794 
795 	if (ret2)
796 		return ret2;
797 	else if (ret1)
798 		return ret1;
799 
800 	data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
801 	return 0;
802 }
803 
804 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
805 	unsigned int socket_id, struct rte_mempool *mp,
806 	uint16_t nb_desc, uint16_t free_thresh)
807 {
808 	struct enic_vf_representor *vf;
809 	int rc;
810 	uint16_t sop_queue_idx;
811 	uint16_t data_queue_idx;
812 	uint16_t cq_idx;
813 	struct vnic_rq *rq_sop;
814 	struct vnic_rq *rq_data;
815 	unsigned int mbuf_size, mbufs_per_pkt;
816 	unsigned int nb_sop_desc, nb_data_desc;
817 	uint16_t min_sop, max_sop, min_data, max_data;
818 	uint32_t max_rx_pkt_len;
819 
820 	/*
821 	 * Representor uses a reserved PF queue. Translate representor
822 	 * queue number to PF queue number.
823 	 */
824 	if (enic_is_vf_rep(enic)) {
825 		RTE_ASSERT(queue_idx == 0);
826 		vf = VF_ENIC_TO_VF_REP(enic);
827 		sop_queue_idx = vf->pf_rq_sop_idx;
828 		data_queue_idx = vf->pf_rq_data_idx;
829 		enic = vf->pf;
830 		queue_idx = sop_queue_idx;
831 	} else {
832 		sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
833 		data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx, enic);
834 	}
835 	cq_idx = enic_cq_rq(enic, sop_queue_idx);
836 	rq_sop = &enic->rq[sop_queue_idx];
837 	rq_data = &enic->rq[data_queue_idx];
838 	rq_sop->is_sop = 1;
839 	rq_sop->data_queue_idx = data_queue_idx;
840 	rq_data->is_sop = 0;
841 	rq_data->data_queue_idx = 0;
842 	rq_sop->socket_id = socket_id;
843 	rq_sop->mp = mp;
844 	rq_data->socket_id = socket_id;
845 	rq_data->mp = mp;
846 	rq_sop->in_use = 1;
847 	rq_sop->rx_free_thresh = free_thresh;
848 	rq_data->rx_free_thresh = free_thresh;
849 	dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
850 		  free_thresh);
851 
852 	mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
853 			       RTE_PKTMBUF_HEADROOM);
854 	/* max_rx_pkt_len includes the ethernet header and CRC. */
855 	max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
856 
857 	if (enic->rte_dev->data->dev_conf.rxmode.offloads &
858 	    DEV_RX_OFFLOAD_SCATTER) {
859 		dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
860 		/* ceil((max pkt len)/mbuf_size) */
861 		mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
862 	} else {
863 		dev_info(enic, "Scatter rx mode disabled\n");
864 		mbufs_per_pkt = 1;
865 		if (max_rx_pkt_len > mbuf_size) {
866 			dev_warning(enic, "The maximum Rx packet size (%u) is"
867 				    " larger than the mbuf size (%u), and"
868 				    " scatter is disabled. Larger packets will"
869 				    " be truncated.\n",
870 				    max_rx_pkt_len, mbuf_size);
871 		}
872 	}
873 
874 	if (mbufs_per_pkt > 1) {
875 		dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
876 		rq_sop->data_queue_enable = 1;
877 		rq_data->in_use = 1;
878 		/*
879 		 * HW does not directly support rxmode.max_rx_pkt_len. HW always
880 		 * receives packet sizes up to the "max" MTU.
881 		 * If not using scatter, we can achieve the effect of dropping
882 		 * larger packets by reducing the size of posted buffers.
883 		 * See enic_alloc_rx_queue_mbufs().
884 		 */
885 		if (max_rx_pkt_len <
886 		    enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
887 			dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
888 				    " when scatter rx mode is in use.\n");
889 		}
890 	} else {
891 		dev_info(enic, "Rq %u Scatter rx mode not being used\n",
892 			 queue_idx);
893 		rq_sop->data_queue_enable = 0;
894 		rq_data->in_use = 0;
895 	}
896 
897 	/* number of descriptors have to be a multiple of 32 */
898 	nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
899 	nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
900 
901 	rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
902 	rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
903 
904 	if (mbufs_per_pkt > 1) {
905 		min_sop = ENIC_RX_BURST_MAX;
906 		max_sop = ((enic->config.rq_desc_count /
907 			    (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
908 		min_data = min_sop * (mbufs_per_pkt - 1);
909 		max_data = enic->config.rq_desc_count;
910 	} else {
911 		min_sop = ENIC_RX_BURST_MAX;
912 		max_sop = enic->config.rq_desc_count;
913 		min_data = 0;
914 		max_data = 0;
915 	}
916 
917 	if (nb_desc < (min_sop + min_data)) {
918 		dev_warning(enic,
919 			    "Number of rx descs too low, adjusting to minimum\n");
920 		nb_sop_desc = min_sop;
921 		nb_data_desc = min_data;
922 	} else if (nb_desc > (max_sop + max_data)) {
923 		dev_warning(enic,
924 			    "Number of rx_descs too high, adjusting to maximum\n");
925 		nb_sop_desc = max_sop;
926 		nb_data_desc = max_data;
927 	}
928 	if (mbufs_per_pkt > 1) {
929 		dev_info(enic, "For max packet size %u and mbuf size %u valid"
930 			 " rx descriptor range is %u to %u\n",
931 			 max_rx_pkt_len, mbuf_size, min_sop + min_data,
932 			 max_sop + max_data);
933 	}
934 	dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
935 		 nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
936 
937 	/* Allocate sop queue resources */
938 	rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
939 		nb_sop_desc, sizeof(struct rq_enet_desc));
940 	if (rc) {
941 		dev_err(enic, "error in allocation of sop rq\n");
942 		goto err_exit;
943 	}
944 	nb_sop_desc = rq_sop->ring.desc_count;
945 
946 	if (rq_data->in_use) {
947 		/* Allocate data queue resources */
948 		rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
949 				   nb_data_desc,
950 				   sizeof(struct rq_enet_desc));
951 		if (rc) {
952 			dev_err(enic, "error in allocation of data rq\n");
953 			goto err_free_rq_sop;
954 		}
955 		nb_data_desc = rq_data->ring.desc_count;
956 	}
957 	rc = vnic_cq_alloc(enic->vdev, &enic->cq[cq_idx], cq_idx,
958 			   socket_id, nb_sop_desc + nb_data_desc,
959 			   sizeof(struct cq_enet_rq_desc));
960 	if (rc) {
961 		dev_err(enic, "error in allocation of cq for rq\n");
962 		goto err_free_rq_data;
963 	}
964 
965 	/* Allocate the mbuf rings */
966 	rq_sop->mbuf_ring = (struct rte_mbuf **)
967 		rte_zmalloc_socket("rq->mbuf_ring",
968 				   sizeof(struct rte_mbuf *) * nb_sop_desc,
969 				   RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
970 	if (rq_sop->mbuf_ring == NULL)
971 		goto err_free_cq;
972 
973 	if (rq_data->in_use) {
974 		rq_data->mbuf_ring = (struct rte_mbuf **)
975 			rte_zmalloc_socket("rq->mbuf_ring",
976 				sizeof(struct rte_mbuf *) * nb_data_desc,
977 				RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
978 		if (rq_data->mbuf_ring == NULL)
979 			goto err_free_sop_mbuf;
980 	}
981 
982 	rq_sop->free_mbufs = (struct rte_mbuf **)
983 		rte_zmalloc_socket("rq->free_mbufs",
984 				   sizeof(struct rte_mbuf *) *
985 				   ENIC_RX_BURST_MAX,
986 				   RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
987 	if (rq_sop->free_mbufs == NULL)
988 		goto err_free_data_mbuf;
989 	rq_sop->num_free_mbufs = 0;
990 
991 	rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
992 
993 	return 0;
994 
995 err_free_data_mbuf:
996 	rte_free(rq_data->mbuf_ring);
997 err_free_sop_mbuf:
998 	rte_free(rq_sop->mbuf_ring);
999 err_free_cq:
1000 	/* cleanup on error */
1001 	vnic_cq_free(&enic->cq[cq_idx]);
1002 err_free_rq_data:
1003 	if (rq_data->in_use)
1004 		vnic_rq_free(rq_data);
1005 err_free_rq_sop:
1006 	vnic_rq_free(rq_sop);
1007 err_exit:
1008 	return -ENOMEM;
1009 }
1010 
1011 void enic_free_wq(void *txq)
1012 {
1013 	struct vnic_wq *wq;
1014 	struct enic *enic;
1015 
1016 	if (txq == NULL)
1017 		return;
1018 
1019 	wq = (struct vnic_wq *)txq;
1020 	enic = vnic_dev_priv(wq->vdev);
1021 	rte_memzone_free(wq->cqmsg_rz);
1022 	vnic_wq_free(wq);
1023 	vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
1024 }
1025 
1026 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
1027 	unsigned int socket_id, uint16_t nb_desc)
1028 {
1029 	struct enic_vf_representor *vf;
1030 	int err;
1031 	struct vnic_wq *wq;
1032 	unsigned int cq_index;
1033 	char name[RTE_MEMZONE_NAMESIZE];
1034 	static int instance;
1035 
1036 	/*
1037 	 * Representor uses a reserved PF queue. Translate representor
1038 	 * queue number to PF queue number.
1039 	 */
1040 	if (enic_is_vf_rep(enic)) {
1041 		RTE_ASSERT(queue_idx == 0);
1042 		vf = VF_ENIC_TO_VF_REP(enic);
1043 		queue_idx = vf->pf_wq_idx;
1044 		cq_index = vf->pf_wq_cq_idx;
1045 		enic = vf->pf;
1046 	} else {
1047 		cq_index = enic_cq_wq(enic, queue_idx);
1048 	}
1049 	wq = &enic->wq[queue_idx];
1050 	wq->socket_id = socket_id;
1051 	/*
1052 	 * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
1053 	 * print an info message for diagnostics.
1054 	 */
1055 	dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
1056 
1057 	/* Allocate queue resources */
1058 	err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
1059 		nb_desc,
1060 		sizeof(struct wq_enet_desc));
1061 	if (err) {
1062 		dev_err(enic, "error in allocation of wq\n");
1063 		return err;
1064 	}
1065 
1066 	err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
1067 		socket_id, nb_desc,
1068 		sizeof(struct cq_enet_wq_desc));
1069 	if (err) {
1070 		vnic_wq_free(wq);
1071 		dev_err(enic, "error in allocation of cq for wq\n");
1072 	}
1073 
1074 	/* setup up CQ message */
1075 	snprintf((char *)name, sizeof(name),
1076 		 "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
1077 		instance++);
1078 
1079 	wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
1080 			sizeof(uint32_t), SOCKET_ID_ANY,
1081 			RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
1082 	if (!wq->cqmsg_rz)
1083 		return -ENOMEM;
1084 
1085 	return err;
1086 }
1087 
1088 int enic_disable(struct enic *enic)
1089 {
1090 	unsigned int i;
1091 	int err;
1092 
1093 	for (i = 0; i < enic->intr_count; i++) {
1094 		vnic_intr_mask(&enic->intr[i]);
1095 		(void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1096 	}
1097 	enic_rxq_intr_deinit(enic);
1098 	rte_intr_disable(&enic->pdev->intr_handle);
1099 	rte_intr_callback_unregister(&enic->pdev->intr_handle,
1100 				     enic_intr_handler,
1101 				     (void *)enic->rte_dev);
1102 
1103 	vnic_dev_disable(enic->vdev);
1104 
1105 	enic_clsf_destroy(enic);
1106 	enic_fm_destroy(enic);
1107 
1108 	if (!enic_is_sriov_vf(enic))
1109 		vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1110 
1111 	for (i = 0; i < enic->wq_count; i++) {
1112 		err = vnic_wq_disable(&enic->wq[i]);
1113 		if (err)
1114 			return err;
1115 	}
1116 	for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1117 		if (enic->rq[i].in_use) {
1118 			err = vnic_rq_disable(&enic->rq[i]);
1119 			if (err)
1120 				return err;
1121 		}
1122 	}
1123 
1124 	/* If we were using interrupts, set the interrupt vector to -1
1125 	 * to disable interrupts.  We are not disabling link notifcations,
1126 	 * though, as we want the polling of link status to continue working.
1127 	 */
1128 	if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1129 		vnic_dev_notify_set(enic->vdev, -1);
1130 
1131 	vnic_dev_set_reset_flag(enic->vdev, 1);
1132 
1133 	for (i = 0; i < enic->wq_count; i++)
1134 		vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1135 
1136 	for (i = 0; i < enic_vnic_rq_count(enic); i++)
1137 		if (enic->rq[i].in_use)
1138 			vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1139 	for (i = 0; i < enic->cq_count; i++)
1140 		vnic_cq_clean(&enic->cq[i]);
1141 	for (i = 0; i < enic->intr_count; i++)
1142 		vnic_intr_clean(&enic->intr[i]);
1143 
1144 	return 0;
1145 }
1146 
1147 static int enic_dev_wait(struct vnic_dev *vdev,
1148 	int (*start)(struct vnic_dev *, int),
1149 	int (*finished)(struct vnic_dev *, int *),
1150 	int arg)
1151 {
1152 	int done;
1153 	int err;
1154 	int i;
1155 
1156 	err = start(vdev, arg);
1157 	if (err)
1158 		return err;
1159 
1160 	/* Wait for func to complete...2 seconds max */
1161 	for (i = 0; i < 2000; i++) {
1162 		err = finished(vdev, &done);
1163 		if (err)
1164 			return err;
1165 		if (done)
1166 			return 0;
1167 		usleep(1000);
1168 	}
1169 	return -ETIMEDOUT;
1170 }
1171 
1172 static int enic_dev_open(struct enic *enic)
1173 {
1174 	int err;
1175 	int flags = CMD_OPENF_IG_DESCCACHE;
1176 
1177 	err = enic_dev_wait(enic->vdev, vnic_dev_open,
1178 		vnic_dev_open_done, flags);
1179 	if (err)
1180 		dev_err(enic_get_dev(enic),
1181 			"vNIC device open failed, err %d\n", err);
1182 
1183 	return err;
1184 }
1185 
1186 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1187 {
1188 	dma_addr_t rss_key_buf_pa;
1189 	union vnic_rss_key *rss_key_buf_va = NULL;
1190 	int err, i;
1191 	uint8_t name[RTE_MEMZONE_NAMESIZE];
1192 
1193 	RTE_ASSERT(user_key != NULL);
1194 	snprintf((char *)name, sizeof(name), "rss_key-%s", enic->bdf_name);
1195 	rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1196 		&rss_key_buf_pa, name);
1197 	if (!rss_key_buf_va)
1198 		return -ENOMEM;
1199 
1200 	for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1201 		rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1202 
1203 	err = enic_set_rss_key(enic,
1204 		rss_key_buf_pa,
1205 		sizeof(union vnic_rss_key));
1206 
1207 	/* Save for later queries */
1208 	if (!err) {
1209 		rte_memcpy(&enic->rss_key, rss_key_buf_va,
1210 			   sizeof(union vnic_rss_key));
1211 	}
1212 	enic_free_consistent(enic, sizeof(union vnic_rss_key),
1213 		rss_key_buf_va, rss_key_buf_pa);
1214 
1215 	return err;
1216 }
1217 
1218 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1219 {
1220 	dma_addr_t rss_cpu_buf_pa;
1221 	union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1222 	int err;
1223 	uint8_t name[RTE_MEMZONE_NAMESIZE];
1224 
1225 	snprintf((char *)name, sizeof(name), "rss_cpu-%s", enic->bdf_name);
1226 	rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1227 		&rss_cpu_buf_pa, name);
1228 	if (!rss_cpu_buf_va)
1229 		return -ENOMEM;
1230 
1231 	rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1232 
1233 	err = enic_set_rss_cpu(enic,
1234 		rss_cpu_buf_pa,
1235 		sizeof(union vnic_rss_cpu));
1236 
1237 	enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1238 		rss_cpu_buf_va, rss_cpu_buf_pa);
1239 
1240 	/* Save for later queries */
1241 	if (!err)
1242 		rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1243 	return err;
1244 }
1245 
1246 static int enic_set_niccfg(struct enic *enic, uint8_t rss_default_cpu,
1247 	uint8_t rss_hash_type, uint8_t rss_hash_bits, uint8_t rss_base_cpu,
1248 	uint8_t rss_enable)
1249 {
1250 	const uint8_t tso_ipid_split_en = 0;
1251 	int err;
1252 
1253 	err = enic_set_nic_cfg(enic,
1254 		rss_default_cpu, rss_hash_type,
1255 		rss_hash_bits, rss_base_cpu,
1256 		rss_enable, tso_ipid_split_en,
1257 		enic->ig_vlan_strip_en);
1258 
1259 	return err;
1260 }
1261 
1262 /* Initialize RSS with defaults, called from dev_configure */
1263 int enic_init_rss_nic_cfg(struct enic *enic)
1264 {
1265 	static uint8_t default_rss_key[] = {
1266 		85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1267 		80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1268 		76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1269 		69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1270 	};
1271 	struct rte_eth_rss_conf rss_conf;
1272 	union vnic_rss_cpu rss_cpu;
1273 	int ret, i;
1274 
1275 	rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1276 	/*
1277 	 * If setting key for the first time, and the user gives us none, then
1278 	 * push the default key to NIC.
1279 	 */
1280 	if (rss_conf.rss_key == NULL) {
1281 		rss_conf.rss_key = default_rss_key;
1282 		rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1283 	}
1284 	ret = enic_set_rss_conf(enic, &rss_conf);
1285 	if (ret) {
1286 		dev_err(enic, "Failed to configure RSS\n");
1287 		return ret;
1288 	}
1289 	if (enic->rss_enable) {
1290 		/* If enabling RSS, use the default reta */
1291 		for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1292 			rss_cpu.cpu[i / 4].b[i % 4] =
1293 				enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1294 		}
1295 		ret = enic_set_rss_reta(enic, &rss_cpu);
1296 		if (ret)
1297 			dev_err(enic, "Failed to set RSS indirection table\n");
1298 	}
1299 	return ret;
1300 }
1301 
1302 int enic_setup_finish(struct enic *enic)
1303 {
1304 	enic_init_soft_stats(enic);
1305 
1306 	/* switchdev: enable promisc mode on PF */
1307 	if (enic->switchdev_mode) {
1308 		vnic_dev_packet_filter(enic->vdev,
1309 				       0 /* directed  */,
1310 				       0 /* multicast */,
1311 				       0 /* broadcast */,
1312 				       1 /* promisc   */,
1313 				       0 /* allmulti  */);
1314 		enic->promisc = 1;
1315 		enic->allmulti = 0;
1316 		return 0;
1317 	}
1318 	/* Default conf */
1319 	vnic_dev_packet_filter(enic->vdev,
1320 		1 /* directed  */,
1321 		1 /* multicast */,
1322 		1 /* broadcast */,
1323 		0 /* promisc   */,
1324 		1 /* allmulti  */);
1325 
1326 	enic->promisc = 0;
1327 	enic->allmulti = 1;
1328 
1329 	return 0;
1330 }
1331 
1332 static int enic_rss_conf_valid(struct enic *enic,
1333 			       struct rte_eth_rss_conf *rss_conf)
1334 {
1335 	/* RSS is disabled per VIC settings. Ignore rss_conf. */
1336 	if (enic->flow_type_rss_offloads == 0)
1337 		return 0;
1338 	if (rss_conf->rss_key != NULL &&
1339 	    rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1340 		dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1341 			rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1342 		return -EINVAL;
1343 	}
1344 	if (rss_conf->rss_hf != 0 &&
1345 	    (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1346 		dev_err(enic, "Given rss_hf contains none of the supported"
1347 			" types\n");
1348 		return -EINVAL;
1349 	}
1350 	return 0;
1351 }
1352 
1353 /* Set hash type and key according to rss_conf */
1354 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1355 {
1356 	struct rte_eth_dev *eth_dev;
1357 	uint64_t rss_hf;
1358 	uint8_t rss_hash_type;
1359 	uint8_t rss_enable;
1360 	int ret;
1361 
1362 	RTE_ASSERT(rss_conf != NULL);
1363 	ret = enic_rss_conf_valid(enic, rss_conf);
1364 	if (ret) {
1365 		dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1366 		return ret;
1367 	}
1368 
1369 	eth_dev = enic->rte_dev;
1370 	rss_hash_type = 0;
1371 	rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1372 	if (enic->rq_count > 1 &&
1373 	    (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1374 	    rss_hf != 0) {
1375 		rss_enable = 1;
1376 		if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1377 			      ETH_RSS_NONFRAG_IPV4_OTHER))
1378 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1379 		if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1380 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1381 		if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1382 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1383 			if (enic->udp_rss_weak) {
1384 				/*
1385 				 * 'TCP' is not a typo. The "weak" version of
1386 				 * UDP RSS requires both the TCP and UDP bits
1387 				 * be set. It does enable TCP RSS as well.
1388 				 */
1389 				rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1390 			}
1391 		}
1392 		if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1393 			      ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1394 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1395 		if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1396 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1397 		if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1398 			rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1399 			if (enic->udp_rss_weak)
1400 				rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1401 		}
1402 	} else {
1403 		rss_enable = 0;
1404 		rss_hf = 0;
1405 	}
1406 
1407 	/* Set the hash key if provided */
1408 	if (rss_enable && rss_conf->rss_key) {
1409 		ret = enic_set_rsskey(enic, rss_conf->rss_key);
1410 		if (ret) {
1411 			dev_err(enic, "Failed to set RSS key\n");
1412 			return ret;
1413 		}
1414 	}
1415 
1416 	ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1417 			      ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1418 			      rss_enable);
1419 	if (!ret) {
1420 		enic->rss_hf = rss_hf;
1421 		enic->rss_hash_type = rss_hash_type;
1422 		enic->rss_enable = rss_enable;
1423 	} else {
1424 		dev_err(enic, "Failed to update RSS configurations."
1425 			" hash=0x%x\n", rss_hash_type);
1426 	}
1427 	return ret;
1428 }
1429 
1430 int enic_set_vlan_strip(struct enic *enic)
1431 {
1432 	/*
1433 	 * Unfortunately, VLAN strip on/off and RSS on/off are configured
1434 	 * together. So, re-do niccfg, preserving the current RSS settings.
1435 	 */
1436 	return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1437 			       ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1438 			       enic->rss_enable);
1439 }
1440 
1441 int enic_add_packet_filter(struct enic *enic)
1442 {
1443 	/* switchdev ignores packet filters */
1444 	if (enic->switchdev_mode) {
1445 		ENICPMD_LOG(DEBUG, " switchdev: ignore packet filter");
1446 		return 0;
1447 	}
1448 	/* Args -> directed, multicast, broadcast, promisc, allmulti */
1449 	return vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1450 		enic->promisc, enic->allmulti);
1451 }
1452 
1453 int enic_get_link_status(struct enic *enic)
1454 {
1455 	return vnic_dev_link_status(enic->vdev);
1456 }
1457 
1458 static void enic_dev_deinit(struct enic *enic)
1459 {
1460 	/* stop link status checking */
1461 	vnic_dev_notify_unset(enic->vdev);
1462 
1463 	/* mac_addrs is freed by rte_eth_dev_release_port() */
1464 	rte_free(enic->cq);
1465 	rte_free(enic->intr);
1466 	rte_free(enic->rq);
1467 	rte_free(enic->wq);
1468 }
1469 
1470 
1471 int enic_set_vnic_res(struct enic *enic)
1472 {
1473 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1474 	int rc = 0;
1475 	unsigned int required_rq, required_wq, required_cq, required_intr;
1476 
1477 	/* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1478 	required_rq = eth_dev->data->nb_rx_queues * 2;
1479 	required_wq = eth_dev->data->nb_tx_queues;
1480 	required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1481 	required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1482 	if (eth_dev->data->dev_conf.intr_conf.rxq) {
1483 		required_intr += eth_dev->data->nb_rx_queues;
1484 	}
1485 	ENICPMD_LOG(DEBUG, "Required queues for PF: rq %u wq %u cq %u",
1486 		    required_rq, required_wq, required_cq);
1487 	if (enic->vf_required_rq) {
1488 		/* Queues needed for VF representors */
1489 		required_rq += enic->vf_required_rq;
1490 		required_wq += enic->vf_required_wq;
1491 		required_cq += enic->vf_required_cq;
1492 		ENICPMD_LOG(DEBUG, "Required queues for VF representors: rq %u wq %u cq %u",
1493 			    enic->vf_required_rq, enic->vf_required_wq,
1494 			    enic->vf_required_cq);
1495 	}
1496 
1497 	if (enic->conf_rq_count < required_rq) {
1498 		dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1499 			eth_dev->data->nb_rx_queues,
1500 			required_rq, enic->conf_rq_count);
1501 		rc = -EINVAL;
1502 	}
1503 	if (enic->conf_wq_count < required_wq) {
1504 		dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1505 			eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1506 		rc = -EINVAL;
1507 	}
1508 
1509 	if (enic->conf_cq_count < required_cq) {
1510 		dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1511 			required_cq, enic->conf_cq_count);
1512 		rc = -EINVAL;
1513 	}
1514 	if (enic->conf_intr_count < required_intr) {
1515 		dev_err(dev, "Not enough Interrupts to support Rx queue"
1516 			" interrupts. Required:%u, Configured:%u\n",
1517 			required_intr, enic->conf_intr_count);
1518 		rc = -EINVAL;
1519 	}
1520 
1521 	if (rc == 0) {
1522 		enic->rq_count = eth_dev->data->nb_rx_queues;
1523 		enic->wq_count = eth_dev->data->nb_tx_queues;
1524 		enic->cq_count = enic->rq_count + enic->wq_count;
1525 		enic->intr_count = required_intr;
1526 	}
1527 
1528 	return rc;
1529 }
1530 
1531 /* Initialize the completion queue for an RQ */
1532 static int
1533 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1534 {
1535 	struct vnic_rq *sop_rq, *data_rq;
1536 	unsigned int cq_idx;
1537 	int rc = 0;
1538 
1539 	sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1540 	data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx, enic)];
1541 	cq_idx = enic_cq_rq(enic, rq_idx);
1542 
1543 	vnic_cq_clean(&enic->cq[cq_idx]);
1544 	vnic_cq_init(&enic->cq[cq_idx],
1545 		     0 /* flow_control_enable */,
1546 		     1 /* color_enable */,
1547 		     0 /* cq_head */,
1548 		     0 /* cq_tail */,
1549 		     1 /* cq_tail_color */,
1550 		     0 /* interrupt_enable */,
1551 		     1 /* cq_entry_enable */,
1552 		     0 /* cq_message_enable */,
1553 		     0 /* interrupt offset */,
1554 		     0 /* cq_message_addr */);
1555 
1556 
1557 	vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1558 			   enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1559 			   sop_rq->ring.desc_count - 1, 1, 0);
1560 	if (data_rq->in_use) {
1561 		vnic_rq_init_start(data_rq,
1562 				   enic_cq_rq(enic,
1563 				   enic_rte_rq_idx_to_data_idx(rq_idx, enic)),
1564 				   0, data_rq->ring.desc_count - 1, 1, 0);
1565 	}
1566 
1567 	rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1568 	if (rc)
1569 		return rc;
1570 
1571 	if (data_rq->in_use) {
1572 		rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1573 		if (rc) {
1574 			enic_rxmbuf_queue_release(enic, sop_rq);
1575 			return rc;
1576 		}
1577 	}
1578 
1579 	return 0;
1580 }
1581 
1582 /* The Cisco NIC can send and receive packets up to a max packet size
1583  * determined by the NIC type and firmware. There is also an MTU
1584  * configured into the NIC via the CIMC/UCSM management interface
1585  * which can be overridden by this function (up to the max packet size).
1586  * Depending on the network setup, doing so may cause packet drops
1587  * and unexpected behavior.
1588  */
1589 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1590 {
1591 	unsigned int rq_idx;
1592 	struct vnic_rq *rq;
1593 	int rc = 0;
1594 	uint16_t old_mtu;	/* previous setting */
1595 	uint16_t config_mtu;	/* Value configured into NIC via CIMC/UCSM */
1596 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1597 
1598 	old_mtu = eth_dev->data->mtu;
1599 	config_mtu = enic->config.mtu;
1600 
1601 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1602 		return -E_RTE_SECONDARY;
1603 
1604 	if (new_mtu > enic->max_mtu) {
1605 		dev_err(enic,
1606 			"MTU not updated: requested (%u) greater than max (%u)\n",
1607 			new_mtu, enic->max_mtu);
1608 		return -EINVAL;
1609 	}
1610 	if (new_mtu < ENIC_MIN_MTU) {
1611 		dev_info(enic,
1612 			"MTU not updated: requested (%u) less than min (%u)\n",
1613 			new_mtu, ENIC_MIN_MTU);
1614 		return -EINVAL;
1615 	}
1616 	if (new_mtu > config_mtu)
1617 		dev_warning(enic,
1618 			"MTU (%u) is greater than value configured in NIC (%u)\n",
1619 			new_mtu, config_mtu);
1620 
1621 	/* Update the MTU and maximum packet length */
1622 	eth_dev->data->mtu = new_mtu;
1623 	eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1624 		enic_mtu_to_max_rx_pktlen(new_mtu);
1625 
1626 	/*
1627 	 * If the device has not started (enic_enable), nothing to do.
1628 	 * Later, enic_enable() will set up RQs reflecting the new maximum
1629 	 * packet length.
1630 	 */
1631 	if (!eth_dev->data->dev_started)
1632 		goto set_mtu_done;
1633 
1634 	/*
1635 	 * The device has started, re-do RQs on the fly. In the process, we
1636 	 * pick up the new maximum packet length.
1637 	 *
1638 	 * Some applications rely on the ability to change MTU without stopping
1639 	 * the device. So keep this behavior for now.
1640 	 */
1641 	rte_spinlock_lock(&enic->mtu_lock);
1642 
1643 	/* Stop traffic on all RQs */
1644 	for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1645 		rq = &enic->rq[rq_idx];
1646 		if (rq->is_sop && rq->in_use) {
1647 			rc = enic_stop_rq(enic,
1648 					  enic_sop_rq_idx_to_rte_idx(rq_idx));
1649 			if (rc) {
1650 				dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1651 				goto set_mtu_done;
1652 			}
1653 		}
1654 	}
1655 
1656 	/* replace Rx function with a no-op to avoid getting stale pkts */
1657 	eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1658 	rte_mb();
1659 
1660 	/* Allow time for threads to exit the real Rx function. */
1661 	usleep(100000);
1662 
1663 	/* now it is safe to reconfigure the RQs */
1664 
1665 
1666 	/* free and reallocate RQs with the new MTU */
1667 	for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1668 		rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1669 		if (!rq->in_use)
1670 			continue;
1671 
1672 		enic_free_rq(rq);
1673 		rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1674 				   rq->tot_nb_desc, rq->rx_free_thresh);
1675 		if (rc) {
1676 			dev_err(enic,
1677 				"Fatal MTU alloc error- No traffic will pass\n");
1678 			goto set_mtu_done;
1679 		}
1680 
1681 		rc = enic_reinit_rq(enic, rq_idx);
1682 		if (rc) {
1683 			dev_err(enic,
1684 				"Fatal MTU RQ reinit- No traffic will pass\n");
1685 			goto set_mtu_done;
1686 		}
1687 	}
1688 
1689 	/* put back the real receive function */
1690 	rte_mb();
1691 	enic_pick_rx_handler(eth_dev);
1692 	rte_mb();
1693 
1694 	/* restart Rx traffic */
1695 	for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1696 		rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1697 		if (rq->is_sop && rq->in_use)
1698 			enic_start_rq(enic, rq_idx);
1699 	}
1700 
1701 set_mtu_done:
1702 	dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1703 	rte_spinlock_unlock(&enic->mtu_lock);
1704 	return rc;
1705 }
1706 
1707 static int enic_dev_init(struct enic *enic)
1708 {
1709 	int err;
1710 	struct rte_eth_dev *eth_dev = enic->rte_dev;
1711 
1712 	vnic_dev_intr_coal_timer_info_default(enic->vdev);
1713 
1714 	/* Get vNIC configuration
1715 	*/
1716 	err = enic_get_vnic_config(enic);
1717 	if (err) {
1718 		dev_err(dev, "Get vNIC configuration failed, aborting\n");
1719 		return err;
1720 	}
1721 
1722 	/* Get available resource counts */
1723 	enic_get_res_counts(enic);
1724 	if (enic->conf_rq_count == 1) {
1725 		dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1726 		dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1727 		dev_err(enic, "See the ENIC PMD guide for more information.\n");
1728 		return -EINVAL;
1729 	}
1730 	/* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1731 	enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1732 			       enic->conf_cq_count, 8);
1733 	enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1734 				 enic->conf_intr_count, 8);
1735 	enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1736 			       enic->conf_rq_count, 8);
1737 	enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1738 			       enic->conf_wq_count, 8);
1739 	if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1740 		dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1741 		return -1;
1742 	}
1743 	if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1744 		dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1745 		return -1;
1746 	}
1747 	if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1748 		dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1749 		return -1;
1750 	}
1751 	if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1752 		dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1753 		return -1;
1754 	}
1755 
1756 	/* Get the supported filters */
1757 	enic_fdir_info(enic);
1758 
1759 	eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr",
1760 					sizeof(struct rte_ether_addr) *
1761 					ENIC_UNICAST_PERFECT_FILTERS, 0);
1762 	if (!eth_dev->data->mac_addrs) {
1763 		dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1764 		return -1;
1765 	}
1766 	rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr,
1767 			eth_dev->data->mac_addrs);
1768 
1769 	vnic_dev_set_reset_flag(enic->vdev, 0);
1770 
1771 	LIST_INIT(&enic->flows);
1772 
1773 	/* set up link status checking */
1774 	vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1775 
1776 	/*
1777 	 * When Geneve with options offload is available, always disable it
1778 	 * first as it can interfere with user flow rules.
1779 	 */
1780 	if (enic->geneve_opt_avail) {
1781 		/*
1782 		 * Disabling fails if the feature is provisioned but
1783 		 * not enabled. So ignore result and do not log error.
1784 		 */
1785 		vnic_dev_overlay_offload_ctrl(enic->vdev,
1786 			OVERLAY_FEATURE_GENEVE,
1787 			OVERLAY_OFFLOAD_DISABLE);
1788 	}
1789 	enic->overlay_offload = false;
1790 	if (enic->disable_overlay && enic->vxlan) {
1791 		/*
1792 		 * Explicitly disable overlay offload as the setting is
1793 		 * sticky, and resetting vNIC does not disable it.
1794 		 */
1795 		if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1796 						  OVERLAY_FEATURE_VXLAN,
1797 						  OVERLAY_OFFLOAD_DISABLE)) {
1798 			dev_err(enic, "failed to disable overlay offload\n");
1799 		} else {
1800 			dev_info(enic, "Overlay offload is disabled\n");
1801 		}
1802 	}
1803 	if (!enic->disable_overlay && enic->vxlan &&
1804 	    /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1805 	    vnic_dev_overlay_offload_ctrl(enic->vdev,
1806 					  OVERLAY_FEATURE_VXLAN,
1807 					  OVERLAY_OFFLOAD_ENABLE) == 0) {
1808 		enic->tx_offload_capa |=
1809 			DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1810 			DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1811 			DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1812 		enic->tx_offload_mask |=
1813 			PKT_TX_OUTER_IPV6 |
1814 			PKT_TX_OUTER_IPV4 |
1815 			PKT_TX_OUTER_IP_CKSUM |
1816 			PKT_TX_TUNNEL_MASK;
1817 		enic->overlay_offload = true;
1818 		dev_info(enic, "Overlay offload is enabled\n");
1819 	}
1820 	/* Geneve with options offload requires overlay offload */
1821 	if (enic->overlay_offload && enic->geneve_opt_avail &&
1822 	    enic->geneve_opt_request) {
1823 		if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1824 				OVERLAY_FEATURE_GENEVE,
1825 				OVERLAY_OFFLOAD_ENABLE)) {
1826 			dev_err(enic, "failed to enable geneve+option\n");
1827 		} else {
1828 			enic->geneve_opt_enabled = 1;
1829 			dev_info(enic, "Geneve with options is enabled\n");
1830 		}
1831 	}
1832 	/*
1833 	 * Reset the vxlan port if HW vxlan parsing is available. It
1834 	 * is always enabled regardless of overlay offload
1835 	 * enable/disable.
1836 	 */
1837 	if (enic->vxlan) {
1838 		enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT;
1839 		/*
1840 		 * Reset the vxlan port to the default, as the NIC firmware
1841 		 * does not reset it automatically and keeps the old setting.
1842 		 */
1843 		if (vnic_dev_overlay_offload_cfg(enic->vdev,
1844 						 OVERLAY_CFG_VXLAN_PORT_UPDATE,
1845 						 RTE_VXLAN_DEFAULT_PORT)) {
1846 			dev_err(enic, "failed to update vxlan port\n");
1847 			return -EINVAL;
1848 		}
1849 	}
1850 
1851 	if (enic_fm_init(enic))
1852 		dev_warning(enic, "Init of flowman failed.\n");
1853 	return 0;
1854 
1855 }
1856 
1857 static void lock_devcmd(void *priv)
1858 {
1859 	struct enic *enic = priv;
1860 
1861 	rte_spinlock_lock(&enic->devcmd_lock);
1862 }
1863 
1864 static void unlock_devcmd(void *priv)
1865 {
1866 	struct enic *enic = priv;
1867 
1868 	rte_spinlock_unlock(&enic->devcmd_lock);
1869 }
1870 
1871 int enic_probe(struct enic *enic)
1872 {
1873 	struct rte_pci_device *pdev = enic->pdev;
1874 	int err = -1;
1875 
1876 	dev_debug(enic, "Initializing ENIC PMD\n");
1877 
1878 	/* if this is a secondary process the hardware is already initialized */
1879 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1880 		return 0;
1881 
1882 	enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1883 	enic->bar0.len = pdev->mem_resource[0].len;
1884 
1885 	/* Register vNIC device */
1886 	enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1887 	if (!enic->vdev) {
1888 		dev_err(enic, "vNIC registration failed, aborting\n");
1889 		goto err_out;
1890 	}
1891 
1892 	LIST_INIT(&enic->memzone_list);
1893 	rte_spinlock_init(&enic->memzone_list_lock);
1894 
1895 	vnic_register_cbacks(enic->vdev,
1896 		enic_alloc_consistent,
1897 		enic_free_consistent);
1898 
1899 	/*
1900 	 * Allocate the consistent memory for stats upfront so both primary and
1901 	 * secondary processes can dump stats.
1902 	 */
1903 	err = vnic_dev_alloc_stats_mem(enic->vdev);
1904 	if (err) {
1905 		dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1906 		goto err_out_unregister;
1907 	}
1908 	/* Issue device open to get device in known state */
1909 	err = enic_dev_open(enic);
1910 	if (err) {
1911 		dev_err(enic, "vNIC dev open failed, aborting\n");
1912 		goto err_out_unregister;
1913 	}
1914 
1915 	/* Set ingress vlan rewrite mode before vnic initialization */
1916 	dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1917 		  enic->ig_vlan_rewrite_mode);
1918 	err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1919 		enic->ig_vlan_rewrite_mode);
1920 	if (err) {
1921 		dev_err(enic,
1922 			"Failed to set ingress vlan rewrite mode, aborting.\n");
1923 		goto err_out_dev_close;
1924 	}
1925 
1926 	/* Issue device init to initialize the vnic-to-switch link.
1927 	 * We'll start with carrier off and wait for link UP
1928 	 * notification later to turn on carrier.  We don't need
1929 	 * to wait here for the vnic-to-switch link initialization
1930 	 * to complete; link UP notification is the indication that
1931 	 * the process is complete.
1932 	 */
1933 
1934 	err = vnic_dev_init(enic->vdev, 0);
1935 	if (err) {
1936 		dev_err(enic, "vNIC dev init failed, aborting\n");
1937 		goto err_out_dev_close;
1938 	}
1939 
1940 	err = enic_dev_init(enic);
1941 	if (err) {
1942 		dev_err(enic, "Device initialization failed, aborting\n");
1943 		goto err_out_dev_close;
1944 	}
1945 
1946 	/* Use a PF spinlock to serialize devcmd from PF and VF representors */
1947 	if (enic->switchdev_mode) {
1948 		rte_spinlock_init(&enic->devcmd_lock);
1949 		vnic_register_lock(enic->vdev, lock_devcmd, unlock_devcmd);
1950 	}
1951 	return 0;
1952 
1953 err_out_dev_close:
1954 	vnic_dev_close(enic->vdev);
1955 err_out_unregister:
1956 	vnic_dev_unregister(enic->vdev);
1957 err_out:
1958 	return err;
1959 }
1960 
1961 void enic_remove(struct enic *enic)
1962 {
1963 	enic_dev_deinit(enic);
1964 	vnic_dev_close(enic->vdev);
1965 	vnic_dev_unregister(enic->vdev);
1966 }
1967