xref: /dpdk/drivers/net/ena/ena_ethdev.c (revision 980d0ba4b14de13dd61fc3e77aceef53f1036375)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
3  * All rights reserved.
4  */
5 
6 #include <rte_string_fns.h>
7 #include <rte_errno.h>
8 #include <rte_version.h>
9 #include <rte_net.h>
10 #include <rte_kvargs.h>
11 
12 #include "ena_ethdev.h"
13 #include "ena_logs.h"
14 #include "ena_platform.h"
15 #include "ena_com.h"
16 #include "ena_eth_com.h"
17 
18 #include <ena_common_defs.h>
19 #include <ena_regs_defs.h>
20 #include <ena_admin_defs.h>
21 #include <ena_eth_io_defs.h>
22 
23 #define DRV_MODULE_VER_MAJOR	2
24 #define DRV_MODULE_VER_MINOR	8
25 #define DRV_MODULE_VER_SUBMINOR	0
26 
27 #define __MERGE_64B_H_L(h, l) (((uint64_t)h << 32) | l)
28 
29 #define GET_L4_HDR_LEN(mbuf)					\
30 	((rte_pktmbuf_mtod_offset(mbuf,	struct rte_tcp_hdr *,	\
31 		mbuf->l3_len + mbuf->l2_len)->data_off) >> 4)
32 
33 #define ETH_GSTRING_LEN	32
34 
35 #define ARRAY_SIZE(x) RTE_DIM(x)
36 
37 #define ENA_MIN_RING_DESC	128
38 
39 /*
40  * We should try to keep ENA_CLEANUP_BUF_SIZE lower than
41  * RTE_MEMPOOL_CACHE_MAX_SIZE, so we can fit this in mempool local cache.
42  */
43 #define ENA_CLEANUP_BUF_SIZE	256
44 
45 #define ENA_PTYPE_HAS_HASH	(RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP)
46 
47 struct ena_stats {
48 	char name[ETH_GSTRING_LEN];
49 	int stat_offset;
50 };
51 
52 #define ENA_STAT_ENTRY(stat, stat_type) { \
53 	.name = #stat, \
54 	.stat_offset = offsetof(struct ena_stats_##stat_type, stat) \
55 }
56 
57 #define ENA_STAT_RX_ENTRY(stat) \
58 	ENA_STAT_ENTRY(stat, rx)
59 
60 #define ENA_STAT_TX_ENTRY(stat) \
61 	ENA_STAT_ENTRY(stat, tx)
62 
63 #define ENA_STAT_METRICS_ENTRY(stat) \
64 	ENA_STAT_ENTRY(stat, metrics)
65 
66 #define ENA_STAT_GLOBAL_ENTRY(stat) \
67 	ENA_STAT_ENTRY(stat, dev)
68 
69 #define ENA_STAT_ENA_SRD_ENTRY(stat) \
70 	ENA_STAT_ENTRY(stat, srd)
71 
72 /* Device arguments */
73 #define ENA_DEVARG_LARGE_LLQ_HDR "large_llq_hdr"
74 /* Timeout in seconds after which a single uncompleted Tx packet should be
75  * considered as a missing.
76  */
77 #define ENA_DEVARG_MISS_TXC_TO "miss_txc_to"
78 /*
79  * Controls whether LLQ should be used (if available). Enabled by default.
80  * NOTE: It's highly not recommended to disable the LLQ, as it may lead to a
81  * huge performance degradation on 6th generation AWS instances.
82  */
83 #define ENA_DEVARG_ENABLE_LLQ "enable_llq"
84 
85 /*
86  * Each rte_memzone should have unique name.
87  * To satisfy it, count number of allocation and add it to name.
88  */
89 rte_atomic64_t ena_alloc_cnt;
90 
91 static const struct ena_stats ena_stats_global_strings[] = {
92 	ENA_STAT_GLOBAL_ENTRY(wd_expired),
93 	ENA_STAT_GLOBAL_ENTRY(dev_start),
94 	ENA_STAT_GLOBAL_ENTRY(dev_stop),
95 	ENA_STAT_GLOBAL_ENTRY(tx_drops),
96 	ENA_STAT_GLOBAL_ENTRY(rx_overruns),
97 };
98 
99 /*
100  * The legacy metrics (also known as eni stats) consisted of 5 stats, while the reworked
101  * metrics (also known as customer metrics) support an additional stat.
102  */
103 static struct ena_stats ena_stats_metrics_strings[] = {
104 	ENA_STAT_METRICS_ENTRY(bw_in_allowance_exceeded),
105 	ENA_STAT_METRICS_ENTRY(bw_out_allowance_exceeded),
106 	ENA_STAT_METRICS_ENTRY(pps_allowance_exceeded),
107 	ENA_STAT_METRICS_ENTRY(conntrack_allowance_exceeded),
108 	ENA_STAT_METRICS_ENTRY(linklocal_allowance_exceeded),
109 	ENA_STAT_METRICS_ENTRY(conntrack_allowance_available),
110 };
111 
112 static const struct ena_stats ena_stats_srd_strings[] = {
113 	ENA_STAT_ENA_SRD_ENTRY(ena_srd_mode),
114 	ENA_STAT_ENA_SRD_ENTRY(ena_srd_tx_pkts),
115 	ENA_STAT_ENA_SRD_ENTRY(ena_srd_eligible_tx_pkts),
116 	ENA_STAT_ENA_SRD_ENTRY(ena_srd_rx_pkts),
117 	ENA_STAT_ENA_SRD_ENTRY(ena_srd_resource_utilization),
118 };
119 
120 static const struct ena_stats ena_stats_tx_strings[] = {
121 	ENA_STAT_TX_ENTRY(cnt),
122 	ENA_STAT_TX_ENTRY(bytes),
123 	ENA_STAT_TX_ENTRY(prepare_ctx_err),
124 	ENA_STAT_TX_ENTRY(tx_poll),
125 	ENA_STAT_TX_ENTRY(doorbells),
126 	ENA_STAT_TX_ENTRY(bad_req_id),
127 	ENA_STAT_TX_ENTRY(available_desc),
128 	ENA_STAT_TX_ENTRY(missed_tx),
129 };
130 
131 static const struct ena_stats ena_stats_rx_strings[] = {
132 	ENA_STAT_RX_ENTRY(cnt),
133 	ENA_STAT_RX_ENTRY(bytes),
134 	ENA_STAT_RX_ENTRY(refill_partial),
135 	ENA_STAT_RX_ENTRY(l3_csum_bad),
136 	ENA_STAT_RX_ENTRY(l4_csum_bad),
137 	ENA_STAT_RX_ENTRY(l4_csum_good),
138 	ENA_STAT_RX_ENTRY(mbuf_alloc_fail),
139 	ENA_STAT_RX_ENTRY(bad_desc_num),
140 	ENA_STAT_RX_ENTRY(bad_req_id),
141 };
142 
143 #define ENA_STATS_ARRAY_GLOBAL	ARRAY_SIZE(ena_stats_global_strings)
144 #define ENA_STATS_ARRAY_METRICS	ARRAY_SIZE(ena_stats_metrics_strings)
145 #define ENA_STATS_ARRAY_METRICS_LEGACY	(ENA_STATS_ARRAY_METRICS - 1)
146 #define ENA_STATS_ARRAY_ENA_SRD	ARRAY_SIZE(ena_stats_srd_strings)
147 #define ENA_STATS_ARRAY_TX	ARRAY_SIZE(ena_stats_tx_strings)
148 #define ENA_STATS_ARRAY_RX	ARRAY_SIZE(ena_stats_rx_strings)
149 
150 #define QUEUE_OFFLOADS (RTE_ETH_TX_OFFLOAD_TCP_CKSUM |\
151 			RTE_ETH_TX_OFFLOAD_UDP_CKSUM |\
152 			RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |\
153 			RTE_ETH_TX_OFFLOAD_TCP_TSO)
154 #define MBUF_OFFLOADS (RTE_MBUF_F_TX_L4_MASK |\
155 		       RTE_MBUF_F_TX_IP_CKSUM |\
156 		       RTE_MBUF_F_TX_TCP_SEG)
157 
158 /** Vendor ID used by Amazon devices */
159 #define PCI_VENDOR_ID_AMAZON 0x1D0F
160 /** Amazon devices */
161 #define PCI_DEVICE_ID_ENA_VF		0xEC20
162 #define PCI_DEVICE_ID_ENA_VF_RSERV0	0xEC21
163 
164 #define	ENA_TX_OFFLOAD_MASK	(RTE_MBUF_F_TX_L4_MASK |         \
165 	RTE_MBUF_F_TX_IPV6 |            \
166 	RTE_MBUF_F_TX_IPV4 |            \
167 	RTE_MBUF_F_TX_IP_CKSUM |        \
168 	RTE_MBUF_F_TX_TCP_SEG)
169 
170 #define	ENA_TX_OFFLOAD_NOTSUP_MASK	\
171 	(RTE_MBUF_F_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK)
172 
173 /** HW specific offloads capabilities. */
174 /* IPv4 checksum offload. */
175 #define ENA_L3_IPV4_CSUM		0x0001
176 /* TCP/UDP checksum offload for IPv4 packets. */
177 #define ENA_L4_IPV4_CSUM		0x0002
178 /* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */
179 #define ENA_L4_IPV4_CSUM_PARTIAL	0x0004
180 /* TCP/UDP checksum offload for IPv6 packets. */
181 #define ENA_L4_IPV6_CSUM		0x0008
182 /* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */
183 #define ENA_L4_IPV6_CSUM_PARTIAL	0x0010
184 /* TSO support for IPv4 packets. */
185 #define ENA_IPV4_TSO			0x0020
186 
187 /* Device supports setting RSS hash. */
188 #define ENA_RX_RSS_HASH			0x0040
189 
190 static const struct rte_pci_id pci_id_ena_map[] = {
191 	{ RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) },
192 	{ RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) },
193 	{ .device_id = 0 },
194 };
195 
196 static struct ena_aenq_handlers aenq_handlers;
197 
198 static int ena_device_init(struct ena_adapter *adapter,
199 			   struct rte_pci_device *pdev,
200 			   struct ena_com_dev_get_features_ctx *get_feat_ctx);
201 static int ena_dev_configure(struct rte_eth_dev *dev);
202 static void ena_tx_map_mbuf(struct ena_ring *tx_ring,
203 	struct ena_tx_buffer *tx_info,
204 	struct rte_mbuf *mbuf,
205 	void **push_header,
206 	uint16_t *header_len);
207 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf);
208 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt);
209 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
210 				  uint16_t nb_pkts);
211 static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
212 		uint16_t nb_pkts);
213 static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
214 			      uint16_t nb_desc, unsigned int socket_id,
215 			      const struct rte_eth_txconf *tx_conf);
216 static int ena_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
217 			      uint16_t nb_desc, unsigned int socket_id,
218 			      const struct rte_eth_rxconf *rx_conf,
219 			      struct rte_mempool *mp);
220 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len);
221 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring,
222 				    struct ena_com_rx_buf_info *ena_bufs,
223 				    uint32_t descs,
224 				    uint16_t *next_to_clean,
225 				    uint8_t offset);
226 static uint16_t eth_ena_recv_pkts(void *rx_queue,
227 				  struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
228 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq,
229 				  struct rte_mbuf *mbuf, uint16_t id);
230 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count);
231 static void ena_init_rings(struct ena_adapter *adapter,
232 			   bool disable_meta_caching);
233 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
234 static int ena_start(struct rte_eth_dev *dev);
235 static int ena_stop(struct rte_eth_dev *dev);
236 static int ena_close(struct rte_eth_dev *dev);
237 static int ena_dev_reset(struct rte_eth_dev *dev);
238 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
239 static void ena_rx_queue_release_all(struct rte_eth_dev *dev);
240 static void ena_tx_queue_release_all(struct rte_eth_dev *dev);
241 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid);
242 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid);
243 static void ena_rx_queue_release_bufs(struct ena_ring *ring);
244 static void ena_tx_queue_release_bufs(struct ena_ring *ring);
245 static int ena_link_update(struct rte_eth_dev *dev,
246 			   int wait_to_complete);
247 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring);
248 static void ena_queue_stop(struct ena_ring *ring);
249 static void ena_queue_stop_all(struct rte_eth_dev *dev,
250 			      enum ena_ring_type ring_type);
251 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring);
252 static int ena_queue_start_all(struct rte_eth_dev *dev,
253 			       enum ena_ring_type ring_type);
254 static void ena_stats_restart(struct rte_eth_dev *dev);
255 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter);
256 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter);
257 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter);
258 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter);
259 static int ena_infos_get(struct rte_eth_dev *dev,
260 			 struct rte_eth_dev_info *dev_info);
261 static void ena_interrupt_handler_rte(void *cb_arg);
262 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg);
263 static void ena_destroy_device(struct rte_eth_dev *eth_dev);
264 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev);
265 static int ena_xstats_get_names(struct rte_eth_dev *dev,
266 				struct rte_eth_xstat_name *xstats_names,
267 				unsigned int n);
268 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev,
269 				      const uint64_t *ids,
270 				      struct rte_eth_xstat_name *xstats_names,
271 				      unsigned int size);
272 static int ena_xstats_get(struct rte_eth_dev *dev,
273 			  struct rte_eth_xstat *stats,
274 			  unsigned int n);
275 static int ena_xstats_get_by_id(struct rte_eth_dev *dev,
276 				const uint64_t *ids,
277 				uint64_t *values,
278 				unsigned int n);
279 static int ena_process_bool_devarg(const char *key,
280 				   const char *value,
281 				   void *opaque);
282 static int ena_parse_devargs(struct ena_adapter *adapter,
283 			     struct rte_devargs *devargs);
284 static void ena_copy_customer_metrics(struct ena_adapter *adapter,
285 					uint64_t *buf,
286 					size_t buf_size);
287 static void ena_copy_ena_srd_info(struct ena_adapter *adapter,
288 				  struct ena_stats_srd *srd_info);
289 static int ena_setup_rx_intr(struct rte_eth_dev *dev);
290 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev,
291 				    uint16_t queue_id);
292 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev,
293 				     uint16_t queue_id);
294 static int ena_configure_aenq(struct ena_adapter *adapter);
295 static int ena_mp_primary_handle(const struct rte_mp_msg *mp_msg,
296 				 const void *peer);
297 
298 static const struct eth_dev_ops ena_dev_ops = {
299 	.dev_configure          = ena_dev_configure,
300 	.dev_infos_get          = ena_infos_get,
301 	.rx_queue_setup         = ena_rx_queue_setup,
302 	.tx_queue_setup         = ena_tx_queue_setup,
303 	.dev_start              = ena_start,
304 	.dev_stop               = ena_stop,
305 	.link_update            = ena_link_update,
306 	.stats_get              = ena_stats_get,
307 	.xstats_get_names       = ena_xstats_get_names,
308 	.xstats_get_names_by_id = ena_xstats_get_names_by_id,
309 	.xstats_get             = ena_xstats_get,
310 	.xstats_get_by_id       = ena_xstats_get_by_id,
311 	.mtu_set                = ena_mtu_set,
312 	.rx_queue_release       = ena_rx_queue_release,
313 	.tx_queue_release       = ena_tx_queue_release,
314 	.dev_close              = ena_close,
315 	.dev_reset              = ena_dev_reset,
316 	.reta_update            = ena_rss_reta_update,
317 	.reta_query             = ena_rss_reta_query,
318 	.rx_queue_intr_enable   = ena_rx_queue_intr_enable,
319 	.rx_queue_intr_disable  = ena_rx_queue_intr_disable,
320 	.rss_hash_update        = ena_rss_hash_update,
321 	.rss_hash_conf_get      = ena_rss_hash_conf_get,
322 	.tx_done_cleanup        = ena_tx_cleanup,
323 };
324 
325 /*********************************************************************
326  *  Multi-Process communication bits
327  *********************************************************************/
328 /* rte_mp IPC message name */
329 #define ENA_MP_NAME	"net_ena_mp"
330 /* Request timeout in seconds */
331 #define ENA_MP_REQ_TMO	5
332 
333 /** Proxy request type */
334 enum ena_mp_req {
335 	ENA_MP_DEV_STATS_GET,
336 	ENA_MP_ENI_STATS_GET,
337 	ENA_MP_MTU_SET,
338 	ENA_MP_IND_TBL_GET,
339 	ENA_MP_IND_TBL_SET,
340 	ENA_MP_CUSTOMER_METRICS_GET,
341 	ENA_MP_SRD_STATS_GET,
342 };
343 
344 /** Proxy message body. Shared between requests and responses. */
345 struct ena_mp_body {
346 	/* Message type */
347 	enum ena_mp_req type;
348 	int port_id;
349 	/* Processing result. Set in replies. 0 if message succeeded, negative
350 	 * error code otherwise.
351 	 */
352 	int result;
353 	union {
354 		int mtu; /* For ENA_MP_MTU_SET */
355 	} args;
356 };
357 
358 /**
359  * Initialize IPC message.
360  *
361  * @param[out] msg
362  *   Pointer to the message to initialize.
363  * @param[in] type
364  *   Message type.
365  * @param[in] port_id
366  *   Port ID of target device.
367  *
368  */
369 static void
370 mp_msg_init(struct rte_mp_msg *msg, enum ena_mp_req type, int port_id)
371 {
372 	struct ena_mp_body *body = (struct ena_mp_body *)&msg->param;
373 
374 	memset(msg, 0, sizeof(*msg));
375 	strlcpy(msg->name, ENA_MP_NAME, sizeof(msg->name));
376 	msg->len_param = sizeof(*body);
377 	body->type = type;
378 	body->port_id = port_id;
379 }
380 
381 /*********************************************************************
382  *  Multi-Process communication PMD API
383  *********************************************************************/
384 /**
385  * Define proxy request descriptor
386  *
387  * Used to define all structures and functions required for proxying a given
388  * function to the primary process including the code to perform to prepare the
389  * request and process the response.
390  *
391  * @param[in] f
392  *   Name of the function to proxy
393  * @param[in] t
394  *   Message type to use
395  * @param[in] prep
396  *   Body of a function to prepare the request in form of a statement
397  *   expression. It is passed all the original function arguments along with two
398  *   extra ones:
399  *   - struct ena_adapter *adapter - PMD data of the device calling the proxy.
400  *   - struct ena_mp_body *req - body of a request to prepare.
401  * @param[in] proc
402  *   Body of a function to process the response in form of a statement
403  *   expression. It is passed all the original function arguments along with two
404  *   extra ones:
405  *   - struct ena_adapter *adapter - PMD data of the device calling the proxy.
406  *   - struct ena_mp_body *rsp - body of a response to process.
407  * @param ...
408  *   Proxied function's arguments
409  *
410  * @note Inside prep and proc any parameters which aren't used should be marked
411  *       as such (with ENA_TOUCH or __rte_unused).
412  */
413 #define ENA_PROXY_DESC(f, t, prep, proc, ...)			\
414 	static const enum ena_mp_req mp_type_ ## f =  t;	\
415 	static const char *mp_name_ ## f = #t;			\
416 	static void mp_prep_ ## f(struct ena_adapter *adapter,	\
417 				  struct ena_mp_body *req,	\
418 				  __VA_ARGS__)			\
419 	{							\
420 		prep;						\
421 	}							\
422 	static void mp_proc_ ## f(struct ena_adapter *adapter,	\
423 				  struct ena_mp_body *rsp,	\
424 				  __VA_ARGS__)			\
425 	{							\
426 		proc;						\
427 	}
428 
429 /**
430  * Proxy wrapper for calling primary functions in a secondary process.
431  *
432  * Depending on whether called in primary or secondary process, calls the
433  * @p func directly or proxies the call to the primary process via rte_mp IPC.
434  * This macro requires a proxy request descriptor to be defined for @p func
435  * using ENA_PROXY_DESC() macro.
436  *
437  * @param[in/out] a
438  *   Device PMD data. Used for sending the message and sharing message results
439  *   between primary and secondary.
440  * @param[in] f
441  *   Function to proxy.
442  * @param ...
443  *   Arguments of @p func.
444  *
445  * @return
446  *   - 0: Processing succeeded and response handler was called.
447  *   - -EPERM: IPC is unavailable on this platform. This means only primary
448  *             process may call the proxied function.
449  *   - -EIO:   IPC returned error on request send. Inspect rte_errno detailed
450  *             error code.
451  *   - Negative error code from the proxied function.
452  *
453  * @note This mechanism is geared towards control-path tasks. Avoid calling it
454  *       in fast-path unless unbound delays are allowed. This is due to the IPC
455  *       mechanism itself (socket based).
456  * @note Due to IPC parameter size limitations the proxy logic shares call
457  *       results through the struct ena_adapter shared memory. This makes the
458  *       proxy mechanism strictly single-threaded. Therefore be sure to make all
459  *       calls to the same proxied function under the same lock.
460  */
461 #define ENA_PROXY(a, f, ...)						\
462 __extension__ ({							\
463 	struct ena_adapter *_a = (a);					\
464 	struct timespec ts = { .tv_sec = ENA_MP_REQ_TMO };		\
465 	struct ena_mp_body *req, *rsp;					\
466 	struct rte_mp_reply mp_rep;					\
467 	struct rte_mp_msg mp_req;					\
468 	int ret;							\
469 									\
470 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {		\
471 		ret = f(__VA_ARGS__);					\
472 	} else {							\
473 		/* Prepare and send request */				\
474 		req = (struct ena_mp_body *)&mp_req.param;		\
475 		mp_msg_init(&mp_req, mp_type_ ## f, _a->edev_data->port_id); \
476 		mp_prep_ ## f(_a, req, ## __VA_ARGS__);			\
477 									\
478 		ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);	\
479 		if (likely(!ret)) {					\
480 			RTE_ASSERT(mp_rep.nb_received == 1);		\
481 			rsp = (struct ena_mp_body *)&mp_rep.msgs[0].param; \
482 			ret = rsp->result;				\
483 			if (ret == 0) {					\
484 				mp_proc_##f(_a, rsp, ## __VA_ARGS__);	\
485 			} else {					\
486 				PMD_DRV_LOG(ERR,			\
487 					    "%s returned error: %d\n",	\
488 					    mp_name_ ## f, rsp->result);\
489 			}						\
490 			free(mp_rep.msgs);				\
491 		} else if (rte_errno == ENOTSUP) {			\
492 			PMD_DRV_LOG(ERR,				\
493 				    "No IPC, can't proxy to primary\n");\
494 			ret = -rte_errno;				\
495 		} else {						\
496 			PMD_DRV_LOG(ERR, "Request %s failed: %s\n",	\
497 				    mp_name_ ## f,			\
498 				    rte_strerror(rte_errno));		\
499 			ret = -EIO;					\
500 		}							\
501 	}								\
502 	ret;								\
503 })
504 
505 /*********************************************************************
506  *  Multi-Process communication request descriptors
507  *********************************************************************/
508 
509 ENA_PROXY_DESC(ena_com_get_dev_basic_stats, ENA_MP_DEV_STATS_GET,
510 __extension__ ({
511 	ENA_TOUCH(adapter);
512 	ENA_TOUCH(req);
513 	ENA_TOUCH(ena_dev);
514 	ENA_TOUCH(stats);
515 }),
516 __extension__ ({
517 	ENA_TOUCH(rsp);
518 	ENA_TOUCH(ena_dev);
519 	if (stats != &adapter->basic_stats)
520 		rte_memcpy(stats, &adapter->basic_stats, sizeof(*stats));
521 }),
522 	struct ena_com_dev *ena_dev, struct ena_admin_basic_stats *stats);
523 
524 ENA_PROXY_DESC(ena_com_get_eni_stats, ENA_MP_ENI_STATS_GET,
525 __extension__ ({
526 	ENA_TOUCH(adapter);
527 	ENA_TOUCH(req);
528 	ENA_TOUCH(ena_dev);
529 	ENA_TOUCH(stats);
530 }),
531 __extension__ ({
532 	ENA_TOUCH(rsp);
533 	ENA_TOUCH(ena_dev);
534 	if (stats != (struct ena_admin_eni_stats *)adapter->metrics_stats)
535 		rte_memcpy(stats, adapter->metrics_stats, sizeof(*stats));
536 }),
537 	struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats);
538 
539 ENA_PROXY_DESC(ena_com_set_dev_mtu, ENA_MP_MTU_SET,
540 __extension__ ({
541 	ENA_TOUCH(adapter);
542 	ENA_TOUCH(ena_dev);
543 	req->args.mtu = mtu;
544 }),
545 __extension__ ({
546 	ENA_TOUCH(adapter);
547 	ENA_TOUCH(rsp);
548 	ENA_TOUCH(ena_dev);
549 	ENA_TOUCH(mtu);
550 }),
551 	struct ena_com_dev *ena_dev, int mtu);
552 
553 ENA_PROXY_DESC(ena_com_indirect_table_set, ENA_MP_IND_TBL_SET,
554 __extension__ ({
555 	ENA_TOUCH(adapter);
556 	ENA_TOUCH(req);
557 	ENA_TOUCH(ena_dev);
558 }),
559 __extension__ ({
560 	ENA_TOUCH(adapter);
561 	ENA_TOUCH(rsp);
562 	ENA_TOUCH(ena_dev);
563 }),
564 	struct ena_com_dev *ena_dev);
565 
566 ENA_PROXY_DESC(ena_com_indirect_table_get, ENA_MP_IND_TBL_GET,
567 __extension__ ({
568 	ENA_TOUCH(adapter);
569 	ENA_TOUCH(req);
570 	ENA_TOUCH(ena_dev);
571 	ENA_TOUCH(ind_tbl);
572 }),
573 __extension__ ({
574 	ENA_TOUCH(rsp);
575 	ENA_TOUCH(ena_dev);
576 	if (ind_tbl != adapter->indirect_table)
577 		rte_memcpy(ind_tbl, adapter->indirect_table,
578 			   sizeof(adapter->indirect_table));
579 }),
580 	struct ena_com_dev *ena_dev, u32 *ind_tbl);
581 
582 ENA_PROXY_DESC(ena_com_get_customer_metrics, ENA_MP_CUSTOMER_METRICS_GET,
583 __extension__ ({
584 	ENA_TOUCH(adapter);
585 	ENA_TOUCH(req);
586 	ENA_TOUCH(ena_dev);
587 	ENA_TOUCH(buf);
588 	ENA_TOUCH(buf_size);
589 }),
590 __extension__ ({
591 	ENA_TOUCH(rsp);
592 	ENA_TOUCH(ena_dev);
593 	if (buf != (char *)adapter->metrics_stats)
594 		rte_memcpy(buf, adapter->metrics_stats, buf_size);
595 }),
596 	struct ena_com_dev *ena_dev, char *buf, size_t buf_size);
597 
598 ENA_PROXY_DESC(ena_com_get_ena_srd_info, ENA_MP_SRD_STATS_GET,
599 __extension__ ({
600 	ENA_TOUCH(adapter);
601 	ENA_TOUCH(req);
602 	ENA_TOUCH(ena_dev);
603 	ENA_TOUCH(info);
604 }),
605 __extension__ ({
606 	ENA_TOUCH(rsp);
607 	ENA_TOUCH(ena_dev);
608 	if ((struct ena_stats_srd *)info != &adapter->srd_stats)
609 		rte_memcpy((struct ena_stats_srd *)info,
610 				&adapter->srd_stats,
611 				sizeof(struct ena_stats_srd));
612 }),
613 	struct ena_com_dev *ena_dev, struct ena_admin_ena_srd_info *info);
614 
615 static inline void ena_trigger_reset(struct ena_adapter *adapter,
616 				     enum ena_regs_reset_reason_types reason)
617 {
618 	if (likely(!adapter->trigger_reset)) {
619 		adapter->reset_reason = reason;
620 		adapter->trigger_reset = true;
621 	}
622 }
623 
624 static inline void ena_rx_mbuf_prepare(struct ena_ring *rx_ring,
625 				       struct rte_mbuf *mbuf,
626 				       struct ena_com_rx_ctx *ena_rx_ctx,
627 				       bool fill_hash)
628 {
629 	struct ena_stats_rx *rx_stats = &rx_ring->rx_stats;
630 	uint64_t ol_flags = 0;
631 	uint32_t packet_type = 0;
632 
633 	if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP)
634 		packet_type |= RTE_PTYPE_L4_TCP;
635 	else if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)
636 		packet_type |= RTE_PTYPE_L4_UDP;
637 
638 	if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) {
639 		packet_type |= RTE_PTYPE_L3_IPV4;
640 		if (unlikely(ena_rx_ctx->l3_csum_err)) {
641 			++rx_stats->l3_csum_bad;
642 			ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
643 		} else {
644 			ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
645 		}
646 	} else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6) {
647 		packet_type |= RTE_PTYPE_L3_IPV6;
648 	}
649 
650 	if (!ena_rx_ctx->l4_csum_checked || ena_rx_ctx->frag) {
651 		ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN;
652 	} else {
653 		if (unlikely(ena_rx_ctx->l4_csum_err)) {
654 			++rx_stats->l4_csum_bad;
655 			/*
656 			 * For the L4 Rx checksum offload the HW may indicate
657 			 * bad checksum although it's valid. Because of that,
658 			 * we're setting the UNKNOWN flag to let the app
659 			 * re-verify the checksum.
660 			 */
661 			ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN;
662 		} else {
663 			++rx_stats->l4_csum_good;
664 			ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
665 		}
666 	}
667 
668 	if (fill_hash &&
669 	    likely((packet_type & ENA_PTYPE_HAS_HASH) && !ena_rx_ctx->frag)) {
670 		ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
671 		mbuf->hash.rss = ena_rx_ctx->hash;
672 	}
673 
674 	mbuf->ol_flags = ol_flags;
675 	mbuf->packet_type = packet_type;
676 }
677 
678 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf,
679 				       struct ena_com_tx_ctx *ena_tx_ctx,
680 				       uint64_t queue_offloads,
681 				       bool disable_meta_caching)
682 {
683 	struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
684 
685 	if ((mbuf->ol_flags & MBUF_OFFLOADS) &&
686 	    (queue_offloads & QUEUE_OFFLOADS)) {
687 		/* check if TSO is required */
688 		if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) &&
689 		    (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)) {
690 			ena_tx_ctx->tso_enable = true;
691 
692 			ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf);
693 		}
694 
695 		/* check if L3 checksum is needed */
696 		if ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) &&
697 		    (queue_offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM))
698 			ena_tx_ctx->l3_csum_enable = true;
699 
700 		if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV6) {
701 			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
702 			/* For the IPv6 packets, DF always needs to be true. */
703 			ena_tx_ctx->df = 1;
704 		} else {
705 			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
706 
707 			/* set don't fragment (DF) flag */
708 			if (mbuf->packet_type &
709 				(RTE_PTYPE_L4_NONFRAG
710 				 | RTE_PTYPE_INNER_L4_NONFRAG))
711 				ena_tx_ctx->df = 1;
712 		}
713 
714 		/* check if L4 checksum is needed */
715 		if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) &&
716 		    (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) {
717 			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
718 			ena_tx_ctx->l4_csum_enable = true;
719 		} else if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
720 				RTE_MBUF_F_TX_UDP_CKSUM) &&
721 				(queue_offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) {
722 			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
723 			ena_tx_ctx->l4_csum_enable = true;
724 		} else {
725 			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
726 			ena_tx_ctx->l4_csum_enable = false;
727 		}
728 
729 		ena_meta->mss = mbuf->tso_segsz;
730 		ena_meta->l3_hdr_len = mbuf->l3_len;
731 		ena_meta->l3_hdr_offset = mbuf->l2_len;
732 
733 		ena_tx_ctx->meta_valid = true;
734 	} else if (disable_meta_caching) {
735 		memset(ena_meta, 0, sizeof(*ena_meta));
736 		ena_tx_ctx->meta_valid = true;
737 	} else {
738 		ena_tx_ctx->meta_valid = false;
739 	}
740 }
741 
742 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
743 {
744 	struct ena_tx_buffer *tx_info = NULL;
745 
746 	if (likely(req_id < tx_ring->ring_size)) {
747 		tx_info = &tx_ring->tx_buffer_info[req_id];
748 		if (likely(tx_info->mbuf))
749 			return 0;
750 	}
751 
752 	if (tx_info)
753 		PMD_TX_LOG(ERR, "tx_info doesn't have valid mbuf. queue %d:%d req_id %u\n",
754 			tx_ring->port_id, tx_ring->id, req_id);
755 	else
756 		PMD_TX_LOG(ERR, "Invalid req_id: %hu in queue %d:%d\n",
757 			req_id, tx_ring->port_id, tx_ring->id);
758 
759 	/* Trigger device reset */
760 	++tx_ring->tx_stats.bad_req_id;
761 	ena_trigger_reset(tx_ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
762 	return -EFAULT;
763 }
764 
765 static void ena_config_host_info(struct ena_com_dev *ena_dev)
766 {
767 	struct ena_admin_host_info *host_info;
768 	int rc;
769 
770 	/* Allocate only the host info */
771 	rc = ena_com_allocate_host_info(ena_dev);
772 	if (rc) {
773 		PMD_DRV_LOG(ERR, "Cannot allocate host info\n");
774 		return;
775 	}
776 
777 	host_info = ena_dev->host_attr.host_info;
778 
779 	host_info->os_type = ENA_ADMIN_OS_DPDK;
780 	host_info->kernel_ver = RTE_VERSION;
781 	strlcpy((char *)host_info->kernel_ver_str, rte_version(),
782 		sizeof(host_info->kernel_ver_str));
783 	host_info->os_dist = RTE_VERSION;
784 	strlcpy((char *)host_info->os_dist_str, rte_version(),
785 		sizeof(host_info->os_dist_str));
786 	host_info->driver_version =
787 		(DRV_MODULE_VER_MAJOR) |
788 		(DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
789 		(DRV_MODULE_VER_SUBMINOR <<
790 			ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
791 	host_info->num_cpus = rte_lcore_count();
792 
793 	host_info->driver_supported_features =
794 		ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
795 		ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK;
796 
797 	rc = ena_com_set_host_attributes(ena_dev);
798 	if (rc) {
799 		if (rc == -ENA_COM_UNSUPPORTED)
800 			PMD_DRV_LOG(WARNING, "Cannot set host attributes\n");
801 		else
802 			PMD_DRV_LOG(ERR, "Cannot set host attributes\n");
803 
804 		goto err;
805 	}
806 
807 	return;
808 
809 err:
810 	ena_com_delete_host_info(ena_dev);
811 }
812 
813 /* This function calculates the number of xstats based on the current config */
814 static unsigned int ena_xstats_calc_num(struct rte_eth_dev_data *data)
815 {
816 	struct ena_adapter *adapter = data->dev_private;
817 
818 	return ENA_STATS_ARRAY_GLOBAL +
819 		adapter->metrics_num +
820 		ENA_STATS_ARRAY_ENA_SRD +
821 		(data->nb_tx_queues * ENA_STATS_ARRAY_TX) +
822 		(data->nb_rx_queues * ENA_STATS_ARRAY_RX);
823 }
824 
825 static void ena_config_debug_area(struct ena_adapter *adapter)
826 {
827 	u32 debug_area_size;
828 	int rc, ss_count;
829 
830 	ss_count = ena_xstats_calc_num(adapter->edev_data);
831 
832 	/* allocate 32 bytes for each string and 64bit for the value */
833 	debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
834 
835 	rc = ena_com_allocate_debug_area(&adapter->ena_dev, debug_area_size);
836 	if (rc) {
837 		PMD_DRV_LOG(ERR, "Cannot allocate debug area\n");
838 		return;
839 	}
840 
841 	rc = ena_com_set_host_attributes(&adapter->ena_dev);
842 	if (rc) {
843 		if (rc == -ENA_COM_UNSUPPORTED)
844 			PMD_DRV_LOG(WARNING, "Cannot set host attributes\n");
845 		else
846 			PMD_DRV_LOG(ERR, "Cannot set host attributes\n");
847 
848 		goto err;
849 	}
850 
851 	return;
852 err:
853 	ena_com_delete_debug_area(&adapter->ena_dev);
854 }
855 
856 static int ena_close(struct rte_eth_dev *dev)
857 {
858 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
859 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
860 	struct ena_adapter *adapter = dev->data->dev_private;
861 	int ret = 0;
862 
863 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
864 		return 0;
865 
866 	if (adapter->state == ENA_ADAPTER_STATE_RUNNING)
867 		ret = ena_stop(dev);
868 	adapter->state = ENA_ADAPTER_STATE_CLOSED;
869 
870 	ena_rx_queue_release_all(dev);
871 	ena_tx_queue_release_all(dev);
872 
873 	rte_free(adapter->drv_stats);
874 	adapter->drv_stats = NULL;
875 
876 	rte_intr_disable(intr_handle);
877 	rte_intr_callback_unregister(intr_handle,
878 				     ena_interrupt_handler_rte,
879 				     dev);
880 
881 	/*
882 	 * MAC is not allocated dynamically. Setting NULL should prevent from
883 	 * release of the resource in the rte_eth_dev_release_port().
884 	 */
885 	dev->data->mac_addrs = NULL;
886 
887 	return ret;
888 }
889 
890 static int
891 ena_dev_reset(struct rte_eth_dev *dev)
892 {
893 	int rc = 0;
894 
895 	/* Cannot release memory in secondary process */
896 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
897 		PMD_DRV_LOG(WARNING, "dev_reset not supported in secondary.\n");
898 		return -EPERM;
899 	}
900 
901 	ena_destroy_device(dev);
902 	rc = eth_ena_dev_init(dev);
903 	if (rc)
904 		PMD_INIT_LOG(CRIT, "Cannot initialize device\n");
905 
906 	return rc;
907 }
908 
909 static void ena_rx_queue_release_all(struct rte_eth_dev *dev)
910 {
911 	int nb_queues = dev->data->nb_rx_queues;
912 	int i;
913 
914 	for (i = 0; i < nb_queues; i++)
915 		ena_rx_queue_release(dev, i);
916 }
917 
918 static void ena_tx_queue_release_all(struct rte_eth_dev *dev)
919 {
920 	int nb_queues = dev->data->nb_tx_queues;
921 	int i;
922 
923 	for (i = 0; i < nb_queues; i++)
924 		ena_tx_queue_release(dev, i);
925 }
926 
927 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
928 {
929 	struct ena_ring *ring = dev->data->rx_queues[qid];
930 
931 	/* Free ring resources */
932 	rte_free(ring->rx_buffer_info);
933 	ring->rx_buffer_info = NULL;
934 
935 	rte_free(ring->rx_refill_buffer);
936 	ring->rx_refill_buffer = NULL;
937 
938 	rte_free(ring->empty_rx_reqs);
939 	ring->empty_rx_reqs = NULL;
940 
941 	ring->configured = 0;
942 
943 	PMD_DRV_LOG(NOTICE, "Rx queue %d:%d released\n",
944 		ring->port_id, ring->id);
945 }
946 
947 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
948 {
949 	struct ena_ring *ring = dev->data->tx_queues[qid];
950 
951 	/* Free ring resources */
952 	rte_free(ring->push_buf_intermediate_buf);
953 
954 	rte_free(ring->tx_buffer_info);
955 
956 	rte_free(ring->empty_tx_reqs);
957 
958 	ring->empty_tx_reqs = NULL;
959 	ring->tx_buffer_info = NULL;
960 	ring->push_buf_intermediate_buf = NULL;
961 
962 	ring->configured = 0;
963 
964 	PMD_DRV_LOG(NOTICE, "Tx queue %d:%d released\n",
965 		ring->port_id, ring->id);
966 }
967 
968 static void ena_rx_queue_release_bufs(struct ena_ring *ring)
969 {
970 	unsigned int i;
971 
972 	for (i = 0; i < ring->ring_size; ++i) {
973 		struct ena_rx_buffer *rx_info = &ring->rx_buffer_info[i];
974 		if (rx_info->mbuf) {
975 			rte_mbuf_raw_free(rx_info->mbuf);
976 			rx_info->mbuf = NULL;
977 		}
978 	}
979 }
980 
981 static void ena_tx_queue_release_bufs(struct ena_ring *ring)
982 {
983 	unsigned int i;
984 
985 	for (i = 0; i < ring->ring_size; ++i) {
986 		struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i];
987 
988 		if (tx_buf->mbuf) {
989 			rte_pktmbuf_free(tx_buf->mbuf);
990 			tx_buf->mbuf = NULL;
991 		}
992 	}
993 }
994 
995 static int ena_link_update(struct rte_eth_dev *dev,
996 			   __rte_unused int wait_to_complete)
997 {
998 	struct rte_eth_link *link = &dev->data->dev_link;
999 	struct ena_adapter *adapter = dev->data->dev_private;
1000 
1001 	link->link_status = adapter->link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN;
1002 	link->link_speed = RTE_ETH_SPEED_NUM_NONE;
1003 	link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
1004 
1005 	return 0;
1006 }
1007 
1008 static int ena_queue_start_all(struct rte_eth_dev *dev,
1009 			       enum ena_ring_type ring_type)
1010 {
1011 	struct ena_adapter *adapter = dev->data->dev_private;
1012 	struct ena_ring *queues = NULL;
1013 	int nb_queues;
1014 	int i = 0;
1015 	int rc = 0;
1016 
1017 	if (ring_type == ENA_RING_TYPE_RX) {
1018 		queues = adapter->rx_ring;
1019 		nb_queues = dev->data->nb_rx_queues;
1020 	} else {
1021 		queues = adapter->tx_ring;
1022 		nb_queues = dev->data->nb_tx_queues;
1023 	}
1024 	for (i = 0; i < nb_queues; i++) {
1025 		if (queues[i].configured) {
1026 			if (ring_type == ENA_RING_TYPE_RX) {
1027 				ena_assert_msg(
1028 					dev->data->rx_queues[i] == &queues[i],
1029 					"Inconsistent state of Rx queues\n");
1030 			} else {
1031 				ena_assert_msg(
1032 					dev->data->tx_queues[i] == &queues[i],
1033 					"Inconsistent state of Tx queues\n");
1034 			}
1035 
1036 			rc = ena_queue_start(dev, &queues[i]);
1037 
1038 			if (rc) {
1039 				PMD_INIT_LOG(ERR,
1040 					"Failed to start queue[%d] of type(%d)\n",
1041 					i, ring_type);
1042 				goto err;
1043 			}
1044 		}
1045 	}
1046 
1047 	return 0;
1048 
1049 err:
1050 	while (i--)
1051 		if (queues[i].configured)
1052 			ena_queue_stop(&queues[i]);
1053 
1054 	return rc;
1055 }
1056 
1057 static int
1058 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx,
1059 		       bool use_large_llq_hdr)
1060 {
1061 	struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
1062 	struct ena_com_dev *ena_dev = ctx->ena_dev;
1063 	uint32_t max_tx_queue_size;
1064 	uint32_t max_rx_queue_size;
1065 
1066 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
1067 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
1068 			&ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
1069 		max_rx_queue_size = RTE_MIN(max_queue_ext->max_rx_cq_depth,
1070 			max_queue_ext->max_rx_sq_depth);
1071 		max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
1072 
1073 		if (ena_dev->tx_mem_queue_type ==
1074 		    ENA_ADMIN_PLACEMENT_POLICY_DEV) {
1075 			max_tx_queue_size = RTE_MIN(max_tx_queue_size,
1076 				llq->max_llq_depth);
1077 		} else {
1078 			max_tx_queue_size = RTE_MIN(max_tx_queue_size,
1079 				max_queue_ext->max_tx_sq_depth);
1080 		}
1081 
1082 		ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS,
1083 			max_queue_ext->max_per_packet_rx_descs);
1084 		ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS,
1085 			max_queue_ext->max_per_packet_tx_descs);
1086 	} else {
1087 		struct ena_admin_queue_feature_desc *max_queues =
1088 			&ctx->get_feat_ctx->max_queues;
1089 		max_rx_queue_size = RTE_MIN(max_queues->max_cq_depth,
1090 			max_queues->max_sq_depth);
1091 		max_tx_queue_size = max_queues->max_cq_depth;
1092 
1093 		if (ena_dev->tx_mem_queue_type ==
1094 		    ENA_ADMIN_PLACEMENT_POLICY_DEV) {
1095 			max_tx_queue_size = RTE_MIN(max_tx_queue_size,
1096 				llq->max_llq_depth);
1097 		} else {
1098 			max_tx_queue_size = RTE_MIN(max_tx_queue_size,
1099 				max_queues->max_sq_depth);
1100 		}
1101 
1102 		ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS,
1103 			max_queues->max_packet_rx_descs);
1104 		ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS,
1105 			max_queues->max_packet_tx_descs);
1106 	}
1107 
1108 	/* Round down to the nearest power of 2 */
1109 	max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size);
1110 	max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size);
1111 
1112 	if (use_large_llq_hdr) {
1113 		if ((llq->entry_size_ctrl_supported &
1114 		     ENA_ADMIN_LIST_ENTRY_SIZE_256B) &&
1115 		    (ena_dev->tx_mem_queue_type ==
1116 		     ENA_ADMIN_PLACEMENT_POLICY_DEV)) {
1117 			max_tx_queue_size /= 2;
1118 			PMD_INIT_LOG(INFO,
1119 				"Forcing large headers and decreasing maximum Tx queue size to %d\n",
1120 				max_tx_queue_size);
1121 		} else {
1122 			PMD_INIT_LOG(ERR,
1123 				"Forcing large headers failed: LLQ is disabled or device does not support large headers\n");
1124 		}
1125 	}
1126 
1127 	if (unlikely(max_rx_queue_size == 0 || max_tx_queue_size == 0)) {
1128 		PMD_INIT_LOG(ERR, "Invalid queue size\n");
1129 		return -EFAULT;
1130 	}
1131 
1132 	ctx->max_tx_queue_size = max_tx_queue_size;
1133 	ctx->max_rx_queue_size = max_rx_queue_size;
1134 
1135 	return 0;
1136 }
1137 
1138 static void ena_stats_restart(struct rte_eth_dev *dev)
1139 {
1140 	struct ena_adapter *adapter = dev->data->dev_private;
1141 
1142 	rte_atomic64_init(&adapter->drv_stats->ierrors);
1143 	rte_atomic64_init(&adapter->drv_stats->oerrors);
1144 	rte_atomic64_init(&adapter->drv_stats->rx_nombuf);
1145 	adapter->drv_stats->rx_drops = 0;
1146 }
1147 
1148 static int ena_stats_get(struct rte_eth_dev *dev,
1149 			  struct rte_eth_stats *stats)
1150 {
1151 	struct ena_admin_basic_stats ena_stats;
1152 	struct ena_adapter *adapter = dev->data->dev_private;
1153 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
1154 	int rc;
1155 	int i;
1156 	int max_rings_stats;
1157 
1158 	memset(&ena_stats, 0, sizeof(ena_stats));
1159 
1160 	rte_spinlock_lock(&adapter->admin_lock);
1161 	rc = ENA_PROXY(adapter, ena_com_get_dev_basic_stats, ena_dev,
1162 		       &ena_stats);
1163 	rte_spinlock_unlock(&adapter->admin_lock);
1164 	if (unlikely(rc)) {
1165 		PMD_DRV_LOG(ERR, "Could not retrieve statistics from ENA\n");
1166 		return rc;
1167 	}
1168 
1169 	/* Set of basic statistics from ENA */
1170 	stats->ipackets = __MERGE_64B_H_L(ena_stats.rx_pkts_high,
1171 					  ena_stats.rx_pkts_low);
1172 	stats->opackets = __MERGE_64B_H_L(ena_stats.tx_pkts_high,
1173 					  ena_stats.tx_pkts_low);
1174 	stats->ibytes = __MERGE_64B_H_L(ena_stats.rx_bytes_high,
1175 					ena_stats.rx_bytes_low);
1176 	stats->obytes = __MERGE_64B_H_L(ena_stats.tx_bytes_high,
1177 					ena_stats.tx_bytes_low);
1178 
1179 	/* Driver related stats */
1180 	stats->imissed = adapter->drv_stats->rx_drops;
1181 	stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors);
1182 	stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors);
1183 	stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf);
1184 
1185 	max_rings_stats = RTE_MIN(dev->data->nb_rx_queues,
1186 		RTE_ETHDEV_QUEUE_STAT_CNTRS);
1187 	for (i = 0; i < max_rings_stats; ++i) {
1188 		struct ena_stats_rx *rx_stats = &adapter->rx_ring[i].rx_stats;
1189 
1190 		stats->q_ibytes[i] = rx_stats->bytes;
1191 		stats->q_ipackets[i] = rx_stats->cnt;
1192 		stats->q_errors[i] = rx_stats->bad_desc_num +
1193 			rx_stats->bad_req_id;
1194 	}
1195 
1196 	max_rings_stats = RTE_MIN(dev->data->nb_tx_queues,
1197 		RTE_ETHDEV_QUEUE_STAT_CNTRS);
1198 	for (i = 0; i < max_rings_stats; ++i) {
1199 		struct ena_stats_tx *tx_stats = &adapter->tx_ring[i].tx_stats;
1200 
1201 		stats->q_obytes[i] = tx_stats->bytes;
1202 		stats->q_opackets[i] = tx_stats->cnt;
1203 	}
1204 
1205 	return 0;
1206 }
1207 
1208 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
1209 {
1210 	struct ena_adapter *adapter;
1211 	struct ena_com_dev *ena_dev;
1212 	int rc = 0;
1213 
1214 	ena_assert_msg(dev->data != NULL, "Uninitialized device\n");
1215 	ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n");
1216 	adapter = dev->data->dev_private;
1217 
1218 	ena_dev = &adapter->ena_dev;
1219 	ena_assert_msg(ena_dev != NULL, "Uninitialized device\n");
1220 
1221 	rc = ENA_PROXY(adapter, ena_com_set_dev_mtu, ena_dev, mtu);
1222 	if (rc)
1223 		PMD_DRV_LOG(ERR, "Could not set MTU: %d\n", mtu);
1224 	else
1225 		PMD_DRV_LOG(NOTICE, "MTU set to: %d\n", mtu);
1226 
1227 	return rc;
1228 }
1229 
1230 static int ena_start(struct rte_eth_dev *dev)
1231 {
1232 	struct ena_adapter *adapter = dev->data->dev_private;
1233 	uint64_t ticks;
1234 	int rc = 0;
1235 	uint16_t i;
1236 
1237 	/* Cannot allocate memory in secondary process */
1238 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
1239 		PMD_DRV_LOG(WARNING, "dev_start not supported in secondary.\n");
1240 		return -EPERM;
1241 	}
1242 
1243 	rc = ena_setup_rx_intr(dev);
1244 	if (rc)
1245 		return rc;
1246 
1247 	rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX);
1248 	if (rc)
1249 		return rc;
1250 
1251 	rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX);
1252 	if (rc)
1253 		goto err_start_tx;
1254 
1255 	if (adapter->edev_data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) {
1256 		rc = ena_rss_configure(adapter);
1257 		if (rc)
1258 			goto err_rss_init;
1259 	}
1260 
1261 	ena_stats_restart(dev);
1262 
1263 	adapter->timestamp_wd = rte_get_timer_cycles();
1264 	adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
1265 
1266 	ticks = rte_get_timer_hz();
1267 	rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(),
1268 			ena_timer_wd_callback, dev);
1269 
1270 	++adapter->dev_stats.dev_start;
1271 	adapter->state = ENA_ADAPTER_STATE_RUNNING;
1272 
1273 	for (i = 0; i < dev->data->nb_rx_queues; i++)
1274 		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
1275 	for (i = 0; i < dev->data->nb_tx_queues; i++)
1276 		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
1277 
1278 	return 0;
1279 
1280 err_rss_init:
1281 	ena_queue_stop_all(dev, ENA_RING_TYPE_TX);
1282 err_start_tx:
1283 	ena_queue_stop_all(dev, ENA_RING_TYPE_RX);
1284 	return rc;
1285 }
1286 
1287 static int ena_stop(struct rte_eth_dev *dev)
1288 {
1289 	struct ena_adapter *adapter = dev->data->dev_private;
1290 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
1291 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1292 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
1293 	uint16_t i;
1294 	int rc;
1295 
1296 	/* Cannot free memory in secondary process */
1297 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
1298 		PMD_DRV_LOG(WARNING, "dev_stop not supported in secondary.\n");
1299 		return -EPERM;
1300 	}
1301 
1302 	rte_timer_stop_sync(&adapter->timer_wd);
1303 	ena_queue_stop_all(dev, ENA_RING_TYPE_TX);
1304 	ena_queue_stop_all(dev, ENA_RING_TYPE_RX);
1305 
1306 	if (adapter->trigger_reset) {
1307 		rc = ena_com_dev_reset(ena_dev, adapter->reset_reason);
1308 		if (rc)
1309 			PMD_DRV_LOG(ERR, "Device reset failed, rc: %d\n", rc);
1310 	}
1311 
1312 	rte_intr_disable(intr_handle);
1313 
1314 	rte_intr_efd_disable(intr_handle);
1315 
1316 	/* Cleanup vector list */
1317 	rte_intr_vec_list_free(intr_handle);
1318 
1319 	rte_intr_enable(intr_handle);
1320 
1321 	++adapter->dev_stats.dev_stop;
1322 	adapter->state = ENA_ADAPTER_STATE_STOPPED;
1323 	dev->data->dev_started = 0;
1324 
1325 	for (i = 0; i < dev->data->nb_rx_queues; i++)
1326 		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
1327 	for (i = 0; i < dev->data->nb_tx_queues; i++)
1328 		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
1329 
1330 	return 0;
1331 }
1332 
1333 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring)
1334 {
1335 	struct ena_adapter *adapter = ring->adapter;
1336 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
1337 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1338 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
1339 	struct ena_com_create_io_ctx ctx =
1340 		/* policy set to _HOST just to satisfy icc compiler */
1341 		{ ENA_ADMIN_PLACEMENT_POLICY_HOST,
1342 		  0, 0, 0, 0, 0 };
1343 	uint16_t ena_qid;
1344 	unsigned int i;
1345 	int rc;
1346 
1347 	ctx.msix_vector = -1;
1348 	if (ring->type == ENA_RING_TYPE_TX) {
1349 		ena_qid = ENA_IO_TXQ_IDX(ring->id);
1350 		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1351 		ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1352 		for (i = 0; i < ring->ring_size; i++)
1353 			ring->empty_tx_reqs[i] = i;
1354 	} else {
1355 		ena_qid = ENA_IO_RXQ_IDX(ring->id);
1356 		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1357 		if (rte_intr_dp_is_en(intr_handle))
1358 			ctx.msix_vector =
1359 				rte_intr_vec_list_index_get(intr_handle,
1360 								   ring->id);
1361 
1362 		for (i = 0; i < ring->ring_size; i++)
1363 			ring->empty_rx_reqs[i] = i;
1364 	}
1365 	ctx.queue_size = ring->ring_size;
1366 	ctx.qid = ena_qid;
1367 	ctx.numa_node = ring->numa_socket_id;
1368 
1369 	rc = ena_com_create_io_queue(ena_dev, &ctx);
1370 	if (rc) {
1371 		PMD_DRV_LOG(ERR,
1372 			"Failed to create IO queue[%d] (qid:%d), rc: %d\n",
1373 			ring->id, ena_qid, rc);
1374 		return rc;
1375 	}
1376 
1377 	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1378 				     &ring->ena_com_io_sq,
1379 				     &ring->ena_com_io_cq);
1380 	if (rc) {
1381 		PMD_DRV_LOG(ERR,
1382 			"Failed to get IO queue[%d] handlers, rc: %d\n",
1383 			ring->id, rc);
1384 		ena_com_destroy_io_queue(ena_dev, ena_qid);
1385 		return rc;
1386 	}
1387 
1388 	if (ring->type == ENA_RING_TYPE_TX)
1389 		ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node);
1390 
1391 	/* Start with Rx interrupts being masked. */
1392 	if (ring->type == ENA_RING_TYPE_RX && rte_intr_dp_is_en(intr_handle))
1393 		ena_rx_queue_intr_disable(dev, ring->id);
1394 
1395 	return 0;
1396 }
1397 
1398 static void ena_queue_stop(struct ena_ring *ring)
1399 {
1400 	struct ena_com_dev *ena_dev = &ring->adapter->ena_dev;
1401 
1402 	if (ring->type == ENA_RING_TYPE_RX) {
1403 		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id));
1404 		ena_rx_queue_release_bufs(ring);
1405 	} else {
1406 		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id));
1407 		ena_tx_queue_release_bufs(ring);
1408 	}
1409 }
1410 
1411 static void ena_queue_stop_all(struct rte_eth_dev *dev,
1412 			      enum ena_ring_type ring_type)
1413 {
1414 	struct ena_adapter *adapter = dev->data->dev_private;
1415 	struct ena_ring *queues = NULL;
1416 	uint16_t nb_queues, i;
1417 
1418 	if (ring_type == ENA_RING_TYPE_RX) {
1419 		queues = adapter->rx_ring;
1420 		nb_queues = dev->data->nb_rx_queues;
1421 	} else {
1422 		queues = adapter->tx_ring;
1423 		nb_queues = dev->data->nb_tx_queues;
1424 	}
1425 
1426 	for (i = 0; i < nb_queues; ++i)
1427 		if (queues[i].configured)
1428 			ena_queue_stop(&queues[i]);
1429 }
1430 
1431 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring)
1432 {
1433 	int rc, bufs_num;
1434 
1435 	ena_assert_msg(ring->configured == 1,
1436 		       "Trying to start unconfigured queue\n");
1437 
1438 	rc = ena_create_io_queue(dev, ring);
1439 	if (rc) {
1440 		PMD_INIT_LOG(ERR, "Failed to create IO queue\n");
1441 		return rc;
1442 	}
1443 
1444 	ring->next_to_clean = 0;
1445 	ring->next_to_use = 0;
1446 
1447 	if (ring->type == ENA_RING_TYPE_TX) {
1448 		ring->tx_stats.available_desc =
1449 			ena_com_free_q_entries(ring->ena_com_io_sq);
1450 		return 0;
1451 	}
1452 
1453 	bufs_num = ring->ring_size - 1;
1454 	rc = ena_populate_rx_queue(ring, bufs_num);
1455 	if (rc != bufs_num) {
1456 		ena_com_destroy_io_queue(&ring->adapter->ena_dev,
1457 					 ENA_IO_RXQ_IDX(ring->id));
1458 		PMD_INIT_LOG(ERR, "Failed to populate Rx ring\n");
1459 		return ENA_COM_FAULT;
1460 	}
1461 	/* Flush per-core RX buffers pools cache as they can be used on other
1462 	 * cores as well.
1463 	 */
1464 	rte_mempool_cache_flush(NULL, ring->mb_pool);
1465 
1466 	return 0;
1467 }
1468 
1469 static int ena_tx_queue_setup(struct rte_eth_dev *dev,
1470 			      uint16_t queue_idx,
1471 			      uint16_t nb_desc,
1472 			      unsigned int socket_id,
1473 			      const struct rte_eth_txconf *tx_conf)
1474 {
1475 	struct ena_ring *txq = NULL;
1476 	struct ena_adapter *adapter = dev->data->dev_private;
1477 	unsigned int i;
1478 	uint16_t dyn_thresh;
1479 
1480 	txq = &adapter->tx_ring[queue_idx];
1481 
1482 	if (txq->configured) {
1483 		PMD_DRV_LOG(CRIT,
1484 			"API violation. Queue[%d] is already configured\n",
1485 			queue_idx);
1486 		return ENA_COM_FAULT;
1487 	}
1488 
1489 	if (!rte_is_power_of_2(nb_desc)) {
1490 		PMD_DRV_LOG(ERR,
1491 			"Unsupported size of Tx queue: %d is not a power of 2.\n",
1492 			nb_desc);
1493 		return -EINVAL;
1494 	}
1495 
1496 	if (nb_desc > adapter->max_tx_ring_size) {
1497 		PMD_DRV_LOG(ERR,
1498 			"Unsupported size of Tx queue (max size: %d)\n",
1499 			adapter->max_tx_ring_size);
1500 		return -EINVAL;
1501 	}
1502 
1503 	txq->port_id = dev->data->port_id;
1504 	txq->next_to_clean = 0;
1505 	txq->next_to_use = 0;
1506 	txq->ring_size = nb_desc;
1507 	txq->size_mask = nb_desc - 1;
1508 	txq->numa_socket_id = socket_id;
1509 	txq->pkts_without_db = false;
1510 	txq->last_cleanup_ticks = 0;
1511 
1512 	txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info",
1513 		sizeof(struct ena_tx_buffer) * txq->ring_size,
1514 		RTE_CACHE_LINE_SIZE,
1515 		socket_id);
1516 	if (!txq->tx_buffer_info) {
1517 		PMD_DRV_LOG(ERR,
1518 			"Failed to allocate memory for Tx buffer info\n");
1519 		return -ENOMEM;
1520 	}
1521 
1522 	txq->empty_tx_reqs = rte_zmalloc_socket("txq->empty_tx_reqs",
1523 		sizeof(uint16_t) * txq->ring_size,
1524 		RTE_CACHE_LINE_SIZE,
1525 		socket_id);
1526 	if (!txq->empty_tx_reqs) {
1527 		PMD_DRV_LOG(ERR,
1528 			"Failed to allocate memory for empty Tx requests\n");
1529 		rte_free(txq->tx_buffer_info);
1530 		return -ENOMEM;
1531 	}
1532 
1533 	txq->push_buf_intermediate_buf =
1534 		rte_zmalloc_socket("txq->push_buf_intermediate_buf",
1535 			txq->tx_max_header_size,
1536 			RTE_CACHE_LINE_SIZE,
1537 			socket_id);
1538 	if (!txq->push_buf_intermediate_buf) {
1539 		PMD_DRV_LOG(ERR, "Failed to alloc push buffer for LLQ\n");
1540 		rte_free(txq->tx_buffer_info);
1541 		rte_free(txq->empty_tx_reqs);
1542 		return -ENOMEM;
1543 	}
1544 
1545 	for (i = 0; i < txq->ring_size; i++)
1546 		txq->empty_tx_reqs[i] = i;
1547 
1548 	txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
1549 
1550 	/* Check if caller provided the Tx cleanup threshold value. */
1551 	if (tx_conf->tx_free_thresh != 0) {
1552 		txq->tx_free_thresh = tx_conf->tx_free_thresh;
1553 	} else {
1554 		dyn_thresh = txq->ring_size -
1555 			txq->ring_size / ENA_REFILL_THRESH_DIVIDER;
1556 		txq->tx_free_thresh = RTE_MAX(dyn_thresh,
1557 			txq->ring_size - ENA_REFILL_THRESH_PACKET);
1558 	}
1559 
1560 	txq->missing_tx_completion_threshold =
1561 		RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP);
1562 
1563 	/* Store pointer to this queue in upper layer */
1564 	txq->configured = 1;
1565 	dev->data->tx_queues[queue_idx] = txq;
1566 
1567 	return 0;
1568 }
1569 
1570 static int ena_rx_queue_setup(struct rte_eth_dev *dev,
1571 			      uint16_t queue_idx,
1572 			      uint16_t nb_desc,
1573 			      unsigned int socket_id,
1574 			      const struct rte_eth_rxconf *rx_conf,
1575 			      struct rte_mempool *mp)
1576 {
1577 	struct ena_adapter *adapter = dev->data->dev_private;
1578 	struct ena_ring *rxq = NULL;
1579 	size_t buffer_size;
1580 	int i;
1581 	uint16_t dyn_thresh;
1582 
1583 	rxq = &adapter->rx_ring[queue_idx];
1584 	if (rxq->configured) {
1585 		PMD_DRV_LOG(CRIT,
1586 			"API violation. Queue[%d] is already configured\n",
1587 			queue_idx);
1588 		return ENA_COM_FAULT;
1589 	}
1590 
1591 	if (!rte_is_power_of_2(nb_desc)) {
1592 		PMD_DRV_LOG(ERR,
1593 			"Unsupported size of Rx queue: %d is not a power of 2.\n",
1594 			nb_desc);
1595 		return -EINVAL;
1596 	}
1597 
1598 	if (nb_desc > adapter->max_rx_ring_size) {
1599 		PMD_DRV_LOG(ERR,
1600 			"Unsupported size of Rx queue (max size: %d)\n",
1601 			adapter->max_rx_ring_size);
1602 		return -EINVAL;
1603 	}
1604 
1605 	/* ENA isn't supporting buffers smaller than 1400 bytes */
1606 	buffer_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
1607 	if (buffer_size < ENA_RX_BUF_MIN_SIZE) {
1608 		PMD_DRV_LOG(ERR,
1609 			"Unsupported size of Rx buffer: %zu (min size: %d)\n",
1610 			buffer_size, ENA_RX_BUF_MIN_SIZE);
1611 		return -EINVAL;
1612 	}
1613 
1614 	rxq->port_id = dev->data->port_id;
1615 	rxq->next_to_clean = 0;
1616 	rxq->next_to_use = 0;
1617 	rxq->ring_size = nb_desc;
1618 	rxq->size_mask = nb_desc - 1;
1619 	rxq->numa_socket_id = socket_id;
1620 	rxq->mb_pool = mp;
1621 
1622 	rxq->rx_buffer_info = rte_zmalloc_socket("rxq->buffer_info",
1623 		sizeof(struct ena_rx_buffer) * nb_desc,
1624 		RTE_CACHE_LINE_SIZE,
1625 		socket_id);
1626 	if (!rxq->rx_buffer_info) {
1627 		PMD_DRV_LOG(ERR,
1628 			"Failed to allocate memory for Rx buffer info\n");
1629 		return -ENOMEM;
1630 	}
1631 
1632 	rxq->rx_refill_buffer = rte_zmalloc_socket("rxq->rx_refill_buffer",
1633 		sizeof(struct rte_mbuf *) * nb_desc,
1634 		RTE_CACHE_LINE_SIZE,
1635 		socket_id);
1636 	if (!rxq->rx_refill_buffer) {
1637 		PMD_DRV_LOG(ERR,
1638 			"Failed to allocate memory for Rx refill buffer\n");
1639 		rte_free(rxq->rx_buffer_info);
1640 		rxq->rx_buffer_info = NULL;
1641 		return -ENOMEM;
1642 	}
1643 
1644 	rxq->empty_rx_reqs = rte_zmalloc_socket("rxq->empty_rx_reqs",
1645 		sizeof(uint16_t) * nb_desc,
1646 		RTE_CACHE_LINE_SIZE,
1647 		socket_id);
1648 	if (!rxq->empty_rx_reqs) {
1649 		PMD_DRV_LOG(ERR,
1650 			"Failed to allocate memory for empty Rx requests\n");
1651 		rte_free(rxq->rx_buffer_info);
1652 		rxq->rx_buffer_info = NULL;
1653 		rte_free(rxq->rx_refill_buffer);
1654 		rxq->rx_refill_buffer = NULL;
1655 		return -ENOMEM;
1656 	}
1657 
1658 	for (i = 0; i < nb_desc; i++)
1659 		rxq->empty_rx_reqs[i] = i;
1660 
1661 	rxq->offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
1662 
1663 	if (rx_conf->rx_free_thresh != 0) {
1664 		rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1665 	} else {
1666 		dyn_thresh = rxq->ring_size / ENA_REFILL_THRESH_DIVIDER;
1667 		rxq->rx_free_thresh = RTE_MIN(dyn_thresh,
1668 			(uint16_t)(ENA_REFILL_THRESH_PACKET));
1669 	}
1670 
1671 	/* Store pointer to this queue in upper layer */
1672 	rxq->configured = 1;
1673 	dev->data->rx_queues[queue_idx] = rxq;
1674 
1675 	return 0;
1676 }
1677 
1678 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq,
1679 				  struct rte_mbuf *mbuf, uint16_t id)
1680 {
1681 	struct ena_com_buf ebuf;
1682 	int rc;
1683 
1684 	/* prepare physical address for DMA transaction */
1685 	ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM;
1686 	ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM;
1687 
1688 	/* pass resource to device */
1689 	rc = ena_com_add_single_rx_desc(io_sq, &ebuf, id);
1690 	if (unlikely(rc != 0))
1691 		PMD_RX_LOG(WARNING, "Failed adding Rx desc\n");
1692 
1693 	return rc;
1694 }
1695 
1696 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count)
1697 {
1698 	unsigned int i;
1699 	int rc;
1700 	uint16_t next_to_use = rxq->next_to_use;
1701 	uint16_t req_id;
1702 #ifdef RTE_ETHDEV_DEBUG_RX
1703 	uint16_t in_use;
1704 #endif
1705 	struct rte_mbuf **mbufs = rxq->rx_refill_buffer;
1706 
1707 	if (unlikely(!count))
1708 		return 0;
1709 
1710 #ifdef RTE_ETHDEV_DEBUG_RX
1711 	in_use = rxq->ring_size - 1 -
1712 		ena_com_free_q_entries(rxq->ena_com_io_sq);
1713 	if (unlikely((in_use + count) >= rxq->ring_size))
1714 		PMD_RX_LOG(ERR, "Bad Rx ring state\n");
1715 #endif
1716 
1717 	/* get resources for incoming packets */
1718 	rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count);
1719 	if (unlikely(rc < 0)) {
1720 		rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf);
1721 		++rxq->rx_stats.mbuf_alloc_fail;
1722 		PMD_RX_LOG(DEBUG, "There are not enough free buffers\n");
1723 		return 0;
1724 	}
1725 
1726 	for (i = 0; i < count; i++) {
1727 		struct rte_mbuf *mbuf = mbufs[i];
1728 		struct ena_rx_buffer *rx_info;
1729 
1730 		if (likely((i + 4) < count))
1731 			rte_prefetch0(mbufs[i + 4]);
1732 
1733 		req_id = rxq->empty_rx_reqs[next_to_use];
1734 		rx_info = &rxq->rx_buffer_info[req_id];
1735 
1736 		rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id);
1737 		if (unlikely(rc != 0))
1738 			break;
1739 
1740 		rx_info->mbuf = mbuf;
1741 		next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, rxq->size_mask);
1742 	}
1743 
1744 	if (unlikely(i < count)) {
1745 		PMD_RX_LOG(WARNING,
1746 			"Refilled Rx queue[%d] with only %d/%d buffers\n",
1747 			rxq->id, i, count);
1748 		rte_pktmbuf_free_bulk(&mbufs[i], count - i);
1749 		++rxq->rx_stats.refill_partial;
1750 	}
1751 
1752 	/* When we submitted free resources to device... */
1753 	if (likely(i > 0)) {
1754 		/* ...let HW know that it can fill buffers with data. */
1755 		ena_com_write_sq_doorbell(rxq->ena_com_io_sq);
1756 
1757 		rxq->next_to_use = next_to_use;
1758 	}
1759 
1760 	return i;
1761 }
1762 
1763 static size_t ena_get_metrics_entries(struct ena_adapter *adapter)
1764 {
1765 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
1766 	size_t metrics_num = 0;
1767 
1768 	if (ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS))
1769 		metrics_num = ENA_STATS_ARRAY_METRICS;
1770 	else if (ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS))
1771 		metrics_num = ENA_STATS_ARRAY_METRICS_LEGACY;
1772 	PMD_DRV_LOG(NOTICE, "0x%x customer metrics are supported\n", (unsigned int)metrics_num);
1773 	if (metrics_num > ENA_MAX_CUSTOMER_METRICS) {
1774 		PMD_DRV_LOG(NOTICE, "Not enough space for the requested customer metrics\n");
1775 		metrics_num = ENA_MAX_CUSTOMER_METRICS;
1776 	}
1777 	return metrics_num;
1778 }
1779 
1780 static int ena_device_init(struct ena_adapter *adapter,
1781 			   struct rte_pci_device *pdev,
1782 			   struct ena_com_dev_get_features_ctx *get_feat_ctx)
1783 {
1784 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
1785 	uint32_t aenq_groups;
1786 	int rc;
1787 	bool readless_supported;
1788 
1789 	/* Initialize mmio registers */
1790 	rc = ena_com_mmio_reg_read_request_init(ena_dev);
1791 	if (rc) {
1792 		PMD_DRV_LOG(ERR, "Failed to init MMIO read less\n");
1793 		return rc;
1794 	}
1795 
1796 	/* The PCIe configuration space revision id indicate if mmio reg
1797 	 * read is disabled.
1798 	 */
1799 	readless_supported = !(pdev->id.class_id & ENA_MMIO_DISABLE_REG_READ);
1800 	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
1801 
1802 	/* reset device */
1803 	rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
1804 	if (rc) {
1805 		PMD_DRV_LOG(ERR, "Cannot reset device\n");
1806 		goto err_mmio_read_less;
1807 	}
1808 
1809 	/* check FW version */
1810 	rc = ena_com_validate_version(ena_dev);
1811 	if (rc) {
1812 		PMD_DRV_LOG(ERR, "Device version is too low\n");
1813 		goto err_mmio_read_less;
1814 	}
1815 
1816 	ena_dev->dma_addr_bits = ena_com_get_dma_width(ena_dev);
1817 
1818 	/* ENA device administration layer init */
1819 	rc = ena_com_admin_init(ena_dev, &aenq_handlers);
1820 	if (rc) {
1821 		PMD_DRV_LOG(ERR,
1822 			"Cannot initialize ENA admin queue\n");
1823 		goto err_mmio_read_less;
1824 	}
1825 
1826 	/* To enable the msix interrupts the driver needs to know the number
1827 	 * of queues. So the driver uses polling mode to retrieve this
1828 	 * information.
1829 	 */
1830 	ena_com_set_admin_polling_mode(ena_dev, true);
1831 
1832 	ena_config_host_info(ena_dev);
1833 
1834 	/* Get Device Attributes and features */
1835 	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
1836 	if (rc) {
1837 		PMD_DRV_LOG(ERR,
1838 			"Cannot get attribute for ENA device, rc: %d\n", rc);
1839 		goto err_admin_init;
1840 	}
1841 
1842 	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
1843 		      BIT(ENA_ADMIN_NOTIFICATION) |
1844 		      BIT(ENA_ADMIN_KEEP_ALIVE) |
1845 		      BIT(ENA_ADMIN_FATAL_ERROR) |
1846 		      BIT(ENA_ADMIN_WARNING);
1847 
1848 	aenq_groups &= get_feat_ctx->aenq.supported_groups;
1849 
1850 	adapter->all_aenq_groups = aenq_groups;
1851 	/* The actual supported number of metrics is negotiated with the device at runtime */
1852 	adapter->metrics_num = ena_get_metrics_entries(adapter);
1853 
1854 	return 0;
1855 
1856 err_admin_init:
1857 	ena_com_admin_destroy(ena_dev);
1858 
1859 err_mmio_read_less:
1860 	ena_com_mmio_reg_read_request_destroy(ena_dev);
1861 
1862 	return rc;
1863 }
1864 
1865 static void ena_interrupt_handler_rte(void *cb_arg)
1866 {
1867 	struct rte_eth_dev *dev = cb_arg;
1868 	struct ena_adapter *adapter = dev->data->dev_private;
1869 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
1870 
1871 	ena_com_admin_q_comp_intr_handler(ena_dev);
1872 	if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED))
1873 		ena_com_aenq_intr_handler(ena_dev, dev);
1874 }
1875 
1876 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
1877 {
1878 	if (!(adapter->active_aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)))
1879 		return;
1880 
1881 	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
1882 		return;
1883 
1884 	if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >=
1885 	    adapter->keep_alive_timeout)) {
1886 		PMD_DRV_LOG(ERR, "Keep alive timeout\n");
1887 		ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
1888 		++adapter->dev_stats.wd_expired;
1889 	}
1890 }
1891 
1892 /* Check if admin queue is enabled */
1893 static void check_for_admin_com_state(struct ena_adapter *adapter)
1894 {
1895 	if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) {
1896 		PMD_DRV_LOG(ERR, "ENA admin queue is not in running state\n");
1897 		ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO);
1898 	}
1899 }
1900 
1901 static int check_for_tx_completion_in_queue(struct ena_adapter *adapter,
1902 					    struct ena_ring *tx_ring)
1903 {
1904 	struct ena_tx_buffer *tx_buf;
1905 	uint64_t timestamp;
1906 	uint64_t completion_delay;
1907 	uint32_t missed_tx = 0;
1908 	unsigned int i;
1909 	int rc = 0;
1910 
1911 	for (i = 0; i < tx_ring->ring_size; ++i) {
1912 		tx_buf = &tx_ring->tx_buffer_info[i];
1913 		timestamp = tx_buf->timestamp;
1914 
1915 		if (timestamp == 0)
1916 			continue;
1917 
1918 		completion_delay = rte_get_timer_cycles() - timestamp;
1919 		if (completion_delay > adapter->missing_tx_completion_to) {
1920 			if (unlikely(!tx_buf->print_once)) {
1921 				PMD_TX_LOG(WARNING,
1922 					"Found a Tx that wasn't completed on time, qid %d, index %d. "
1923 					"Missing Tx outstanding for %" PRIu64 " msecs.\n",
1924 					tx_ring->id, i,	completion_delay /
1925 					rte_get_timer_hz() * 1000);
1926 				tx_buf->print_once = true;
1927 			}
1928 			++missed_tx;
1929 		}
1930 	}
1931 
1932 	if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) {
1933 		PMD_DRV_LOG(ERR,
1934 			"The number of lost Tx completions is above the threshold (%d > %d). "
1935 			"Trigger the device reset.\n",
1936 			missed_tx,
1937 			tx_ring->missing_tx_completion_threshold);
1938 		adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL;
1939 		adapter->trigger_reset = true;
1940 		rc = -EIO;
1941 	}
1942 
1943 	tx_ring->tx_stats.missed_tx += missed_tx;
1944 
1945 	return rc;
1946 }
1947 
1948 static void check_for_tx_completions(struct ena_adapter *adapter)
1949 {
1950 	struct ena_ring *tx_ring;
1951 	uint64_t tx_cleanup_delay;
1952 	size_t qid;
1953 	int budget;
1954 	uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues;
1955 
1956 	if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
1957 		return;
1958 
1959 	nb_tx_queues = adapter->edev_data->nb_tx_queues;
1960 	budget = adapter->missing_tx_completion_budget;
1961 
1962 	qid = adapter->last_tx_comp_qid;
1963 	while (budget-- > 0) {
1964 		tx_ring = &adapter->tx_ring[qid];
1965 
1966 		/* Tx cleanup is called only by the burst function and can be
1967 		 * called dynamically by the application. Also cleanup is
1968 		 * limited by the threshold. To avoid false detection of the
1969 		 * missing HW Tx completion, get the delay since last cleanup
1970 		 * function was called.
1971 		 */
1972 		tx_cleanup_delay = rte_get_timer_cycles() -
1973 			tx_ring->last_cleanup_ticks;
1974 		if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay)
1975 			check_for_tx_completion_in_queue(adapter, tx_ring);
1976 		qid = (qid + 1) % nb_tx_queues;
1977 	}
1978 
1979 	adapter->last_tx_comp_qid = qid;
1980 }
1981 
1982 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer,
1983 				  void *arg)
1984 {
1985 	struct rte_eth_dev *dev = arg;
1986 	struct ena_adapter *adapter = dev->data->dev_private;
1987 
1988 	if (unlikely(adapter->trigger_reset))
1989 		return;
1990 
1991 	check_for_missing_keep_alive(adapter);
1992 	check_for_admin_com_state(adapter);
1993 	check_for_tx_completions(adapter);
1994 
1995 	if (unlikely(adapter->trigger_reset)) {
1996 		PMD_DRV_LOG(ERR, "Trigger reset is on\n");
1997 		rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET,
1998 			NULL);
1999 	}
2000 }
2001 
2002 static inline void
2003 set_default_llq_configurations(struct ena_llq_configurations *llq_config,
2004 			       struct ena_admin_feature_llq_desc *llq,
2005 			       bool use_large_llq_hdr)
2006 {
2007 	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
2008 	llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
2009 	llq_config->llq_num_decs_before_header =
2010 		ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
2011 
2012 	if (use_large_llq_hdr &&
2013 	    (llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B)) {
2014 		llq_config->llq_ring_entry_size =
2015 			ENA_ADMIN_LIST_ENTRY_SIZE_256B;
2016 		llq_config->llq_ring_entry_size_value = 256;
2017 	} else {
2018 		llq_config->llq_ring_entry_size =
2019 			ENA_ADMIN_LIST_ENTRY_SIZE_128B;
2020 		llq_config->llq_ring_entry_size_value = 128;
2021 	}
2022 }
2023 
2024 static int
2025 ena_set_queues_placement_policy(struct ena_adapter *adapter,
2026 				struct ena_com_dev *ena_dev,
2027 				struct ena_admin_feature_llq_desc *llq,
2028 				struct ena_llq_configurations *llq_default_configurations)
2029 {
2030 	int rc;
2031 	u32 llq_feature_mask;
2032 
2033 	if (!adapter->enable_llq) {
2034 		PMD_DRV_LOG(WARNING,
2035 			"NOTE: LLQ has been disabled as per user's request. "
2036 			"This may lead to a huge performance degradation!\n");
2037 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2038 		return 0;
2039 	}
2040 
2041 	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
2042 	if (!(ena_dev->supported_features & llq_feature_mask)) {
2043 		PMD_DRV_LOG(INFO,
2044 			"LLQ is not supported. Fallback to host mode policy.\n");
2045 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2046 		return 0;
2047 	}
2048 
2049 	if (adapter->dev_mem_base == NULL) {
2050 		PMD_DRV_LOG(ERR,
2051 			"LLQ is advertised as supported, but device doesn't expose mem bar\n");
2052 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2053 		return 0;
2054 	}
2055 
2056 	rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
2057 	if (unlikely(rc)) {
2058 		PMD_INIT_LOG(WARNING,
2059 			"Failed to config dev mode. Fallback to host mode policy.\n");
2060 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2061 		return 0;
2062 	}
2063 
2064 	/* Nothing to config, exit */
2065 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
2066 		return 0;
2067 
2068 	ena_dev->mem_bar = adapter->dev_mem_base;
2069 
2070 	return 0;
2071 }
2072 
2073 static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev,
2074 	struct ena_com_dev_get_features_ctx *get_feat_ctx)
2075 {
2076 	uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
2077 
2078 	/* Regular queues capabilities */
2079 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2080 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2081 			&get_feat_ctx->max_queue_ext.max_queue_ext;
2082 		io_rx_num = RTE_MIN(max_queue_ext->max_rx_sq_num,
2083 				    max_queue_ext->max_rx_cq_num);
2084 		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
2085 		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
2086 	} else {
2087 		struct ena_admin_queue_feature_desc *max_queues =
2088 			&get_feat_ctx->max_queues;
2089 		io_tx_sq_num = max_queues->max_sq_num;
2090 		io_tx_cq_num = max_queues->max_cq_num;
2091 		io_rx_num = RTE_MIN(io_tx_sq_num, io_tx_cq_num);
2092 	}
2093 
2094 	/* In case of LLQ use the llq number in the get feature cmd */
2095 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
2096 		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
2097 
2098 	max_num_io_queues = RTE_MIN(ENA_MAX_NUM_IO_QUEUES, io_rx_num);
2099 	max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_sq_num);
2100 	max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_cq_num);
2101 
2102 	if (unlikely(max_num_io_queues == 0)) {
2103 		PMD_DRV_LOG(ERR, "Number of IO queues cannot not be 0\n");
2104 		return -EFAULT;
2105 	}
2106 
2107 	return max_num_io_queues;
2108 }
2109 
2110 static void
2111 ena_set_offloads(struct ena_offloads *offloads,
2112 		 struct ena_admin_feature_offload_desc *offload_desc)
2113 {
2114 	if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
2115 		offloads->tx_offloads |= ENA_IPV4_TSO;
2116 
2117 	/* Tx IPv4 checksum offloads */
2118 	if (offload_desc->tx &
2119 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)
2120 		offloads->tx_offloads |= ENA_L3_IPV4_CSUM;
2121 	if (offload_desc->tx &
2122 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK)
2123 		offloads->tx_offloads |= ENA_L4_IPV4_CSUM;
2124 	if (offload_desc->tx &
2125 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
2126 		offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL;
2127 
2128 	/* Tx IPv6 checksum offloads */
2129 	if (offload_desc->tx &
2130 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK)
2131 		offloads->tx_offloads |= ENA_L4_IPV6_CSUM;
2132 	if (offload_desc->tx &
2133 	     ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
2134 		offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL;
2135 
2136 	/* Rx IPv4 checksum offloads */
2137 	if (offload_desc->rx_supported &
2138 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)
2139 		offloads->rx_offloads |= ENA_L3_IPV4_CSUM;
2140 	if (offload_desc->rx_supported &
2141 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
2142 		offloads->rx_offloads |= ENA_L4_IPV4_CSUM;
2143 
2144 	/* Rx IPv6 checksum offloads */
2145 	if (offload_desc->rx_supported &
2146 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
2147 		offloads->rx_offloads |= ENA_L4_IPV6_CSUM;
2148 
2149 	if (offload_desc->rx_supported &
2150 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK)
2151 		offloads->rx_offloads |= ENA_RX_RSS_HASH;
2152 }
2153 
2154 static int ena_init_once(void)
2155 {
2156 	static bool init_done;
2157 
2158 	if (init_done)
2159 		return 0;
2160 
2161 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
2162 		/* Init timer subsystem for the ENA timer service. */
2163 		rte_timer_subsystem_init();
2164 		/* Register handler for requests from secondary processes. */
2165 		rte_mp_action_register(ENA_MP_NAME, ena_mp_primary_handle);
2166 	}
2167 
2168 	init_done = true;
2169 	return 0;
2170 }
2171 
2172 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev)
2173 {
2174 	struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
2175 	struct rte_pci_device *pci_dev;
2176 	struct rte_intr_handle *intr_handle;
2177 	struct ena_adapter *adapter = eth_dev->data->dev_private;
2178 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
2179 	struct ena_com_dev_get_features_ctx get_feat_ctx;
2180 	struct ena_llq_configurations llq_config;
2181 	const char *queue_type_str;
2182 	uint32_t max_num_io_queues;
2183 	int rc;
2184 	static int adapters_found;
2185 	bool disable_meta_caching;
2186 
2187 	eth_dev->dev_ops = &ena_dev_ops;
2188 	eth_dev->rx_pkt_burst = &eth_ena_recv_pkts;
2189 	eth_dev->tx_pkt_burst = &eth_ena_xmit_pkts;
2190 	eth_dev->tx_pkt_prepare = &eth_ena_prep_pkts;
2191 
2192 	rc = ena_init_once();
2193 	if (rc != 0)
2194 		return rc;
2195 
2196 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2197 		return 0;
2198 
2199 	eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
2200 
2201 	memset(adapter, 0, sizeof(struct ena_adapter));
2202 	ena_dev = &adapter->ena_dev;
2203 
2204 	adapter->edev_data = eth_dev->data;
2205 
2206 	pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
2207 
2208 	PMD_INIT_LOG(INFO, "Initializing " PCI_PRI_FMT "\n",
2209 		     pci_dev->addr.domain,
2210 		     pci_dev->addr.bus,
2211 		     pci_dev->addr.devid,
2212 		     pci_dev->addr.function);
2213 
2214 	intr_handle = pci_dev->intr_handle;
2215 
2216 	adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr;
2217 	adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr;
2218 
2219 	if (!adapter->regs) {
2220 		PMD_INIT_LOG(CRIT, "Failed to access registers BAR(%d)\n",
2221 			     ENA_REGS_BAR);
2222 		return -ENXIO;
2223 	}
2224 
2225 	ena_dev->reg_bar = adapter->regs;
2226 	/* Pass device data as a pointer which can be passed to the IO functions
2227 	 * by the ena_com (for example - the memory allocation).
2228 	 */
2229 	ena_dev->dmadev = eth_dev->data;
2230 
2231 	adapter->id_number = adapters_found;
2232 
2233 	snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d",
2234 		 adapter->id_number);
2235 
2236 	/* Assign default devargs values */
2237 	adapter->missing_tx_completion_to = ENA_TX_TIMEOUT;
2238 	adapter->enable_llq = true;
2239 	adapter->use_large_llq_hdr = false;
2240 
2241 	rc = ena_parse_devargs(adapter, pci_dev->device.devargs);
2242 	if (rc != 0) {
2243 		PMD_INIT_LOG(CRIT, "Failed to parse devargs\n");
2244 		goto err;
2245 	}
2246 	rc = ena_com_allocate_customer_metrics_buffer(ena_dev);
2247 	if (rc != 0) {
2248 		PMD_INIT_LOG(CRIT, "Failed to allocate customer metrics buffer\n");
2249 		goto err;
2250 	}
2251 
2252 	/* device specific initialization routine */
2253 	rc = ena_device_init(adapter, pci_dev, &get_feat_ctx);
2254 	if (rc) {
2255 		PMD_INIT_LOG(CRIT, "Failed to init ENA device\n");
2256 		goto err_metrics_delete;
2257 	}
2258 
2259 	/* Check if device supports LSC */
2260 	if (!(adapter->all_aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE)))
2261 		adapter->edev_data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
2262 
2263 	set_default_llq_configurations(&llq_config, &get_feat_ctx.llq,
2264 		adapter->use_large_llq_hdr);
2265 	rc = ena_set_queues_placement_policy(adapter, ena_dev,
2266 					     &get_feat_ctx.llq, &llq_config);
2267 	if (unlikely(rc)) {
2268 		PMD_INIT_LOG(CRIT, "Failed to set placement policy\n");
2269 		return rc;
2270 	}
2271 
2272 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
2273 		queue_type_str = "Regular";
2274 	else
2275 		queue_type_str = "Low latency";
2276 	PMD_DRV_LOG(INFO, "Placement policy: %s\n", queue_type_str);
2277 
2278 	calc_queue_ctx.ena_dev = ena_dev;
2279 	calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
2280 
2281 	max_num_io_queues = ena_calc_max_io_queue_num(ena_dev, &get_feat_ctx);
2282 	rc = ena_calc_io_queue_size(&calc_queue_ctx,
2283 		adapter->use_large_llq_hdr);
2284 	if (unlikely((rc != 0) || (max_num_io_queues == 0))) {
2285 		rc = -EFAULT;
2286 		goto err_device_destroy;
2287 	}
2288 
2289 	adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
2290 	adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
2291 	adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
2292 	adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
2293 	adapter->max_num_io_queues = max_num_io_queues;
2294 
2295 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2296 		disable_meta_caching =
2297 			!!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
2298 			BIT(ENA_ADMIN_DISABLE_META_CACHING));
2299 	} else {
2300 		disable_meta_caching = false;
2301 	}
2302 
2303 	/* prepare ring structures */
2304 	ena_init_rings(adapter, disable_meta_caching);
2305 
2306 	ena_config_debug_area(adapter);
2307 
2308 	/* Set max MTU for this device */
2309 	adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
2310 
2311 	ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload);
2312 
2313 	/* Copy MAC address and point DPDK to it */
2314 	eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr;
2315 	rte_ether_addr_copy((struct rte_ether_addr *)
2316 			get_feat_ctx.dev_attr.mac_addr,
2317 			(struct rte_ether_addr *)adapter->mac_addr);
2318 
2319 	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
2320 	if (unlikely(rc != 0)) {
2321 		PMD_DRV_LOG(ERR, "Failed to initialize RSS in ENA device\n");
2322 		goto err_delete_debug_area;
2323 	}
2324 
2325 	adapter->drv_stats = rte_zmalloc("adapter stats",
2326 					 sizeof(*adapter->drv_stats),
2327 					 RTE_CACHE_LINE_SIZE);
2328 	if (!adapter->drv_stats) {
2329 		PMD_DRV_LOG(ERR,
2330 			"Failed to allocate memory for adapter statistics\n");
2331 		rc = -ENOMEM;
2332 		goto err_rss_destroy;
2333 	}
2334 
2335 	rte_spinlock_init(&adapter->admin_lock);
2336 
2337 	rte_intr_callback_register(intr_handle,
2338 				   ena_interrupt_handler_rte,
2339 				   eth_dev);
2340 	rte_intr_enable(intr_handle);
2341 	ena_com_set_admin_polling_mode(ena_dev, false);
2342 	ena_com_admin_aenq_enable(ena_dev);
2343 
2344 	rte_timer_init(&adapter->timer_wd);
2345 
2346 	adapters_found++;
2347 	adapter->state = ENA_ADAPTER_STATE_INIT;
2348 
2349 	return 0;
2350 
2351 err_rss_destroy:
2352 	ena_com_rss_destroy(ena_dev);
2353 err_delete_debug_area:
2354 	ena_com_delete_debug_area(ena_dev);
2355 
2356 err_device_destroy:
2357 	ena_com_delete_host_info(ena_dev);
2358 	ena_com_admin_destroy(ena_dev);
2359 err_metrics_delete:
2360 	ena_com_delete_customer_metrics_buffer(ena_dev);
2361 err:
2362 	return rc;
2363 }
2364 
2365 static void ena_destroy_device(struct rte_eth_dev *eth_dev)
2366 {
2367 	struct ena_adapter *adapter = eth_dev->data->dev_private;
2368 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
2369 
2370 	if (adapter->state == ENA_ADAPTER_STATE_FREE)
2371 		return;
2372 
2373 	ena_com_set_admin_running_state(ena_dev, false);
2374 
2375 	if (adapter->state != ENA_ADAPTER_STATE_CLOSED)
2376 		ena_close(eth_dev);
2377 
2378 	ena_com_rss_destroy(ena_dev);
2379 
2380 	ena_com_delete_debug_area(ena_dev);
2381 	ena_com_delete_host_info(ena_dev);
2382 
2383 	ena_com_abort_admin_commands(ena_dev);
2384 	ena_com_wait_for_abort_completion(ena_dev);
2385 	ena_com_admin_destroy(ena_dev);
2386 	ena_com_mmio_reg_read_request_destroy(ena_dev);
2387 	ena_com_delete_customer_metrics_buffer(ena_dev);
2388 
2389 	adapter->state = ENA_ADAPTER_STATE_FREE;
2390 }
2391 
2392 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev)
2393 {
2394 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2395 		return 0;
2396 
2397 	ena_destroy_device(eth_dev);
2398 
2399 	return 0;
2400 }
2401 
2402 static int ena_dev_configure(struct rte_eth_dev *dev)
2403 {
2404 	struct ena_adapter *adapter = dev->data->dev_private;
2405 	int rc;
2406 
2407 	adapter->state = ENA_ADAPTER_STATE_CONFIG;
2408 
2409 	if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG)
2410 		dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH;
2411 	dev->data->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
2412 
2413 	/* Scattered Rx cannot be turned off in the HW, so this capability must
2414 	 * be forced.
2415 	 */
2416 	dev->data->scattered_rx = 1;
2417 
2418 	adapter->last_tx_comp_qid = 0;
2419 
2420 	adapter->missing_tx_completion_budget =
2421 		RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues);
2422 
2423 	/* To avoid detection of the spurious Tx completion timeout due to
2424 	 * application not calling the Tx cleanup function, set timeout for the
2425 	 * Tx queue which should be half of the missing completion timeout for a
2426 	 * safety. If there will be a lot of missing Tx completions in the
2427 	 * queue, they will be detected sooner or later.
2428 	 */
2429 	adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2;
2430 
2431 	rc = ena_configure_aenq(adapter);
2432 
2433 	return rc;
2434 }
2435 
2436 static void ena_init_rings(struct ena_adapter *adapter,
2437 			   bool disable_meta_caching)
2438 {
2439 	size_t i;
2440 
2441 	for (i = 0; i < adapter->max_num_io_queues; i++) {
2442 		struct ena_ring *ring = &adapter->tx_ring[i];
2443 
2444 		ring->configured = 0;
2445 		ring->type = ENA_RING_TYPE_TX;
2446 		ring->adapter = adapter;
2447 		ring->id = i;
2448 		ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type;
2449 		ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size;
2450 		ring->sgl_size = adapter->max_tx_sgl_size;
2451 		ring->disable_meta_caching = disable_meta_caching;
2452 	}
2453 
2454 	for (i = 0; i < adapter->max_num_io_queues; i++) {
2455 		struct ena_ring *ring = &adapter->rx_ring[i];
2456 
2457 		ring->configured = 0;
2458 		ring->type = ENA_RING_TYPE_RX;
2459 		ring->adapter = adapter;
2460 		ring->id = i;
2461 		ring->sgl_size = adapter->max_rx_sgl_size;
2462 	}
2463 }
2464 
2465 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter)
2466 {
2467 	uint64_t port_offloads = 0;
2468 
2469 	if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM)
2470 		port_offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
2471 
2472 	if (adapter->offloads.rx_offloads &
2473 	    (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM))
2474 		port_offloads |=
2475 			RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM;
2476 
2477 	if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH)
2478 		port_offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH;
2479 
2480 	port_offloads |= RTE_ETH_RX_OFFLOAD_SCATTER;
2481 
2482 	return port_offloads;
2483 }
2484 
2485 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter)
2486 {
2487 	uint64_t port_offloads = 0;
2488 
2489 	if (adapter->offloads.tx_offloads & ENA_IPV4_TSO)
2490 		port_offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
2491 
2492 	if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM)
2493 		port_offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
2494 	if (adapter->offloads.tx_offloads &
2495 	    (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM |
2496 	     ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL))
2497 		port_offloads |=
2498 			RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
2499 
2500 	port_offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
2501 
2502 	port_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
2503 
2504 	return port_offloads;
2505 }
2506 
2507 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter)
2508 {
2509 	RTE_SET_USED(adapter);
2510 
2511 	return 0;
2512 }
2513 
2514 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter)
2515 {
2516 	uint64_t queue_offloads = 0;
2517 	RTE_SET_USED(adapter);
2518 
2519 	queue_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
2520 
2521 	return queue_offloads;
2522 }
2523 
2524 static int ena_infos_get(struct rte_eth_dev *dev,
2525 			  struct rte_eth_dev_info *dev_info)
2526 {
2527 	struct ena_adapter *adapter;
2528 	struct ena_com_dev *ena_dev;
2529 
2530 	ena_assert_msg(dev->data != NULL, "Uninitialized device\n");
2531 	ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n");
2532 	adapter = dev->data->dev_private;
2533 
2534 	ena_dev = &adapter->ena_dev;
2535 	ena_assert_msg(ena_dev != NULL, "Uninitialized device\n");
2536 
2537 	dev_info->speed_capa =
2538 			RTE_ETH_LINK_SPEED_1G   |
2539 			RTE_ETH_LINK_SPEED_2_5G |
2540 			RTE_ETH_LINK_SPEED_5G   |
2541 			RTE_ETH_LINK_SPEED_10G  |
2542 			RTE_ETH_LINK_SPEED_25G  |
2543 			RTE_ETH_LINK_SPEED_40G  |
2544 			RTE_ETH_LINK_SPEED_50G  |
2545 			RTE_ETH_LINK_SPEED_100G;
2546 
2547 	/* Inform framework about available features */
2548 	dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter);
2549 	dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter);
2550 	dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter);
2551 	dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter);
2552 
2553 	dev_info->flow_type_rss_offloads = ENA_ALL_RSS_HF;
2554 	dev_info->hash_key_size = ENA_HASH_KEY_SIZE;
2555 
2556 	dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN;
2557 	dev_info->max_rx_pktlen  = adapter->max_mtu + RTE_ETHER_HDR_LEN +
2558 		RTE_ETHER_CRC_LEN;
2559 	dev_info->min_mtu = ENA_MIN_MTU;
2560 	dev_info->max_mtu = adapter->max_mtu;
2561 	dev_info->max_mac_addrs = 1;
2562 
2563 	dev_info->max_rx_queues = adapter->max_num_io_queues;
2564 	dev_info->max_tx_queues = adapter->max_num_io_queues;
2565 	dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE;
2566 
2567 	dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size;
2568 	dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC;
2569 	dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS,
2570 					adapter->max_rx_sgl_size);
2571 	dev_info->rx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS,
2572 					adapter->max_rx_sgl_size);
2573 
2574 	dev_info->tx_desc_lim.nb_max = adapter->max_tx_ring_size;
2575 	dev_info->tx_desc_lim.nb_min = ENA_MIN_RING_DESC;
2576 	dev_info->tx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS,
2577 					adapter->max_tx_sgl_size);
2578 	dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS,
2579 					adapter->max_tx_sgl_size);
2580 
2581 	dev_info->default_rxportconf.ring_size = ENA_DEFAULT_RING_SIZE;
2582 	dev_info->default_txportconf.ring_size = ENA_DEFAULT_RING_SIZE;
2583 
2584 	dev_info->err_handle_mode = RTE_ETH_ERROR_HANDLE_MODE_PASSIVE;
2585 
2586 	return 0;
2587 }
2588 
2589 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len)
2590 {
2591 	mbuf->data_len = len;
2592 	mbuf->data_off = RTE_PKTMBUF_HEADROOM;
2593 	mbuf->refcnt = 1;
2594 	mbuf->next = NULL;
2595 }
2596 
2597 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring,
2598 				    struct ena_com_rx_buf_info *ena_bufs,
2599 				    uint32_t descs,
2600 				    uint16_t *next_to_clean,
2601 				    uint8_t offset)
2602 {
2603 	struct rte_mbuf *mbuf;
2604 	struct rte_mbuf *mbuf_head;
2605 	struct ena_rx_buffer *rx_info;
2606 	int rc;
2607 	uint16_t ntc, len, req_id, buf = 0;
2608 
2609 	if (unlikely(descs == 0))
2610 		return NULL;
2611 
2612 	ntc = *next_to_clean;
2613 
2614 	len = ena_bufs[buf].len;
2615 	req_id = ena_bufs[buf].req_id;
2616 
2617 	rx_info = &rx_ring->rx_buffer_info[req_id];
2618 
2619 	mbuf = rx_info->mbuf;
2620 	RTE_ASSERT(mbuf != NULL);
2621 
2622 	ena_init_rx_mbuf(mbuf, len);
2623 
2624 	/* Fill the mbuf head with the data specific for 1st segment. */
2625 	mbuf_head = mbuf;
2626 	mbuf_head->nb_segs = descs;
2627 	mbuf_head->port = rx_ring->port_id;
2628 	mbuf_head->pkt_len = len;
2629 	mbuf_head->data_off += offset;
2630 
2631 	rx_info->mbuf = NULL;
2632 	rx_ring->empty_rx_reqs[ntc] = req_id;
2633 	ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask);
2634 
2635 	while (--descs) {
2636 		++buf;
2637 		len = ena_bufs[buf].len;
2638 		req_id = ena_bufs[buf].req_id;
2639 
2640 		rx_info = &rx_ring->rx_buffer_info[req_id];
2641 		RTE_ASSERT(rx_info->mbuf != NULL);
2642 
2643 		if (unlikely(len == 0)) {
2644 			/*
2645 			 * Some devices can pass descriptor with the length 0.
2646 			 * To avoid confusion, the PMD is simply putting the
2647 			 * descriptor back, as it was never used. We'll avoid
2648 			 * mbuf allocation that way.
2649 			 */
2650 			rc = ena_add_single_rx_desc(rx_ring->ena_com_io_sq,
2651 				rx_info->mbuf, req_id);
2652 			if (unlikely(rc != 0)) {
2653 				/* Free the mbuf in case of an error. */
2654 				rte_mbuf_raw_free(rx_info->mbuf);
2655 			} else {
2656 				/*
2657 				 * If there was no error, just exit the loop as
2658 				 * 0 length descriptor is always the last one.
2659 				 */
2660 				break;
2661 			}
2662 		} else {
2663 			/* Create an mbuf chain. */
2664 			mbuf->next = rx_info->mbuf;
2665 			mbuf = mbuf->next;
2666 
2667 			ena_init_rx_mbuf(mbuf, len);
2668 			mbuf_head->pkt_len += len;
2669 		}
2670 
2671 		/*
2672 		 * Mark the descriptor as depleted and perform necessary
2673 		 * cleanup.
2674 		 * This code will execute in two cases:
2675 		 *  1. Descriptor len was greater than 0 - normal situation.
2676 		 *  2. Descriptor len was 0 and we failed to add the descriptor
2677 		 *     to the device. In that situation, we should try to add
2678 		 *     the mbuf again in the populate routine and mark the
2679 		 *     descriptor as used up by the device.
2680 		 */
2681 		rx_info->mbuf = NULL;
2682 		rx_ring->empty_rx_reqs[ntc] = req_id;
2683 		ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask);
2684 	}
2685 
2686 	*next_to_clean = ntc;
2687 
2688 	return mbuf_head;
2689 }
2690 
2691 static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
2692 				  uint16_t nb_pkts)
2693 {
2694 	struct ena_ring *rx_ring = (struct ena_ring *)(rx_queue);
2695 	unsigned int free_queue_entries;
2696 	uint16_t next_to_clean = rx_ring->next_to_clean;
2697 	uint16_t descs_in_use;
2698 	struct rte_mbuf *mbuf;
2699 	uint16_t completed;
2700 	struct ena_com_rx_ctx ena_rx_ctx;
2701 	int i, rc = 0;
2702 	bool fill_hash;
2703 
2704 #ifdef RTE_ETHDEV_DEBUG_RX
2705 	/* Check adapter state */
2706 	if (unlikely(rx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) {
2707 		PMD_RX_LOG(ALERT,
2708 			"Trying to receive pkts while device is NOT running\n");
2709 		return 0;
2710 	}
2711 #endif
2712 
2713 	fill_hash = rx_ring->offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH;
2714 
2715 	descs_in_use = rx_ring->ring_size -
2716 		ena_com_free_q_entries(rx_ring->ena_com_io_sq) - 1;
2717 	nb_pkts = RTE_MIN(descs_in_use, nb_pkts);
2718 
2719 	for (completed = 0; completed < nb_pkts; completed++) {
2720 		ena_rx_ctx.max_bufs = rx_ring->sgl_size;
2721 		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
2722 		ena_rx_ctx.descs = 0;
2723 		ena_rx_ctx.pkt_offset = 0;
2724 		/* receive packet context */
2725 		rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
2726 				    rx_ring->ena_com_io_sq,
2727 				    &ena_rx_ctx);
2728 		if (unlikely(rc)) {
2729 			PMD_RX_LOG(ERR,
2730 				"Failed to get the packet from the device, rc: %d\n",
2731 				rc);
2732 			if (rc == ENA_COM_NO_SPACE) {
2733 				++rx_ring->rx_stats.bad_desc_num;
2734 				ena_trigger_reset(rx_ring->adapter,
2735 					ENA_REGS_RESET_TOO_MANY_RX_DESCS);
2736 			} else {
2737 				++rx_ring->rx_stats.bad_req_id;
2738 				ena_trigger_reset(rx_ring->adapter,
2739 					ENA_REGS_RESET_INV_RX_REQ_ID);
2740 			}
2741 			return 0;
2742 		}
2743 
2744 		mbuf = ena_rx_mbuf(rx_ring,
2745 			ena_rx_ctx.ena_bufs,
2746 			ena_rx_ctx.descs,
2747 			&next_to_clean,
2748 			ena_rx_ctx.pkt_offset);
2749 		if (unlikely(mbuf == NULL)) {
2750 			for (i = 0; i < ena_rx_ctx.descs; ++i) {
2751 				rx_ring->empty_rx_reqs[next_to_clean] =
2752 					rx_ring->ena_bufs[i].req_id;
2753 				next_to_clean = ENA_IDX_NEXT_MASKED(
2754 					next_to_clean, rx_ring->size_mask);
2755 			}
2756 			break;
2757 		}
2758 
2759 		/* fill mbuf attributes if any */
2760 		ena_rx_mbuf_prepare(rx_ring, mbuf, &ena_rx_ctx, fill_hash);
2761 
2762 		if (unlikely(mbuf->ol_flags &
2763 				(RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD)))
2764 			rte_atomic64_inc(&rx_ring->adapter->drv_stats->ierrors);
2765 
2766 		rx_pkts[completed] = mbuf;
2767 		rx_ring->rx_stats.bytes += mbuf->pkt_len;
2768 	}
2769 
2770 	rx_ring->rx_stats.cnt += completed;
2771 	rx_ring->next_to_clean = next_to_clean;
2772 
2773 	free_queue_entries = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
2774 
2775 	/* Burst refill to save doorbells, memory barriers, const interval */
2776 	if (free_queue_entries >= rx_ring->rx_free_thresh) {
2777 		ena_populate_rx_queue(rx_ring, free_queue_entries);
2778 	}
2779 
2780 	return completed;
2781 }
2782 
2783 static uint16_t
2784 eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
2785 		uint16_t nb_pkts)
2786 {
2787 	int32_t ret;
2788 	uint32_t i;
2789 	struct rte_mbuf *m;
2790 	struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue);
2791 	struct ena_adapter *adapter = tx_ring->adapter;
2792 	struct rte_ipv4_hdr *ip_hdr;
2793 	uint64_t ol_flags;
2794 	uint64_t l4_csum_flag;
2795 	uint64_t dev_offload_capa;
2796 	uint16_t frag_field;
2797 	bool need_pseudo_csum;
2798 
2799 	dev_offload_capa = adapter->offloads.tx_offloads;
2800 	for (i = 0; i != nb_pkts; i++) {
2801 		m = tx_pkts[i];
2802 		ol_flags = m->ol_flags;
2803 
2804 		/* Check if any offload flag was set */
2805 		if (ol_flags == 0)
2806 			continue;
2807 
2808 		l4_csum_flag = ol_flags & RTE_MBUF_F_TX_L4_MASK;
2809 		/* SCTP checksum offload is not supported by the ENA. */
2810 		if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) ||
2811 		    l4_csum_flag == RTE_MBUF_F_TX_SCTP_CKSUM) {
2812 			PMD_TX_LOG(DEBUG,
2813 				"mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64 "\n",
2814 				i, ol_flags);
2815 			rte_errno = ENOTSUP;
2816 			return i;
2817 		}
2818 
2819 		if (unlikely(m->nb_segs >= tx_ring->sgl_size &&
2820 		    !(tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV &&
2821 		      m->nb_segs == tx_ring->sgl_size &&
2822 		      m->data_len < tx_ring->tx_max_header_size))) {
2823 			PMD_TX_LOG(DEBUG,
2824 				"mbuf[%" PRIu32 "] has too many segments: %" PRIu16 "\n",
2825 				i, m->nb_segs);
2826 			rte_errno = EINVAL;
2827 			return i;
2828 		}
2829 
2830 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
2831 		/* Check if requested offload is also enabled for the queue */
2832 		if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM &&
2833 		     !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) ||
2834 		    (l4_csum_flag == RTE_MBUF_F_TX_TCP_CKSUM &&
2835 		     !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) ||
2836 		    (l4_csum_flag == RTE_MBUF_F_TX_UDP_CKSUM &&
2837 		     !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM))) {
2838 			PMD_TX_LOG(DEBUG,
2839 				"mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]\n",
2840 				i, m->nb_segs, tx_ring->id);
2841 			rte_errno = EINVAL;
2842 			return i;
2843 		}
2844 
2845 		/* The caller is obligated to set l2 and l3 len if any cksum
2846 		 * offload is enabled.
2847 		 */
2848 		if (unlikely(ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK) &&
2849 		    (m->l2_len == 0 || m->l3_len == 0))) {
2850 			PMD_TX_LOG(DEBUG,
2851 				"mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested\n",
2852 				i);
2853 			rte_errno = EINVAL;
2854 			return i;
2855 		}
2856 		ret = rte_validate_tx_offload(m);
2857 		if (ret != 0) {
2858 			rte_errno = -ret;
2859 			return i;
2860 		}
2861 #endif
2862 
2863 		/* Verify HW support for requested offloads and determine if
2864 		 * pseudo header checksum is needed.
2865 		 */
2866 		need_pseudo_csum = false;
2867 		if (ol_flags & RTE_MBUF_F_TX_IPV4) {
2868 			if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM &&
2869 			    !(dev_offload_capa & ENA_L3_IPV4_CSUM)) {
2870 				rte_errno = ENOTSUP;
2871 				return i;
2872 			}
2873 
2874 			if (ol_flags & RTE_MBUF_F_TX_TCP_SEG &&
2875 			    !(dev_offload_capa & ENA_IPV4_TSO)) {
2876 				rte_errno = ENOTSUP;
2877 				return i;
2878 			}
2879 
2880 			/* Check HW capabilities and if pseudo csum is needed
2881 			 * for L4 offloads.
2882 			 */
2883 			if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM &&
2884 			    !(dev_offload_capa & ENA_L4_IPV4_CSUM)) {
2885 				if (dev_offload_capa &
2886 				    ENA_L4_IPV4_CSUM_PARTIAL) {
2887 					need_pseudo_csum = true;
2888 				} else {
2889 					rte_errno = ENOTSUP;
2890 					return i;
2891 				}
2892 			}
2893 
2894 			/* Parse the DF flag */
2895 			ip_hdr = rte_pktmbuf_mtod_offset(m,
2896 				struct rte_ipv4_hdr *, m->l2_len);
2897 			frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset);
2898 			if (frag_field & RTE_IPV4_HDR_DF_FLAG) {
2899 				m->packet_type |= RTE_PTYPE_L4_NONFRAG;
2900 			} else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
2901 				/* In case we are supposed to TSO and have DF
2902 				 * not set (DF=0) hardware must be provided with
2903 				 * partial checksum.
2904 				 */
2905 				need_pseudo_csum = true;
2906 			}
2907 		} else if (ol_flags & RTE_MBUF_F_TX_IPV6) {
2908 			/* There is no support for IPv6 TSO as for now. */
2909 			if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
2910 				rte_errno = ENOTSUP;
2911 				return i;
2912 			}
2913 
2914 			/* Check HW capabilities and if pseudo csum is needed */
2915 			if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM &&
2916 			    !(dev_offload_capa & ENA_L4_IPV6_CSUM)) {
2917 				if (dev_offload_capa &
2918 				    ENA_L4_IPV6_CSUM_PARTIAL) {
2919 					need_pseudo_csum = true;
2920 				} else {
2921 					rte_errno = ENOTSUP;
2922 					return i;
2923 				}
2924 			}
2925 		}
2926 
2927 		if (need_pseudo_csum) {
2928 			ret = rte_net_intel_cksum_flags_prepare(m, ol_flags);
2929 			if (ret != 0) {
2930 				rte_errno = -ret;
2931 				return i;
2932 			}
2933 		}
2934 	}
2935 
2936 	return i;
2937 }
2938 
2939 static void ena_update_hints(struct ena_adapter *adapter,
2940 			     struct ena_admin_ena_hw_hints *hints)
2941 {
2942 	if (hints->admin_completion_tx_timeout)
2943 		adapter->ena_dev.admin_queue.completion_timeout =
2944 			hints->admin_completion_tx_timeout * 1000;
2945 
2946 	if (hints->mmio_read_timeout)
2947 		/* convert to usec */
2948 		adapter->ena_dev.mmio_read.reg_read_to =
2949 			hints->mmio_read_timeout * 1000;
2950 
2951 	if (hints->driver_watchdog_timeout) {
2952 		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
2953 			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
2954 		else
2955 			// Convert msecs to ticks
2956 			adapter->keep_alive_timeout =
2957 				(hints->driver_watchdog_timeout *
2958 				rte_get_timer_hz()) / 1000;
2959 	}
2960 }
2961 
2962 static void ena_tx_map_mbuf(struct ena_ring *tx_ring,
2963 	struct ena_tx_buffer *tx_info,
2964 	struct rte_mbuf *mbuf,
2965 	void **push_header,
2966 	uint16_t *header_len)
2967 {
2968 	struct ena_com_buf *ena_buf;
2969 	uint16_t delta, seg_len, push_len;
2970 
2971 	delta = 0;
2972 	seg_len = mbuf->data_len;
2973 
2974 	tx_info->mbuf = mbuf;
2975 	ena_buf = tx_info->bufs;
2976 
2977 	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2978 		/*
2979 		 * Tx header might be (and will be in most cases) smaller than
2980 		 * tx_max_header_size. But it's not an issue to send more data
2981 		 * to the device, than actually needed if the mbuf size is
2982 		 * greater than tx_max_header_size.
2983 		 */
2984 		push_len = RTE_MIN(mbuf->pkt_len, tx_ring->tx_max_header_size);
2985 		*header_len = push_len;
2986 
2987 		if (likely(push_len <= seg_len)) {
2988 			/* If the push header is in the single segment, then
2989 			 * just point it to the 1st mbuf data.
2990 			 */
2991 			*push_header = rte_pktmbuf_mtod(mbuf, uint8_t *);
2992 		} else {
2993 			/* If the push header lays in the several segments, copy
2994 			 * it to the intermediate buffer.
2995 			 */
2996 			rte_pktmbuf_read(mbuf, 0, push_len,
2997 				tx_ring->push_buf_intermediate_buf);
2998 			*push_header = tx_ring->push_buf_intermediate_buf;
2999 			delta = push_len - seg_len;
3000 		}
3001 	} else {
3002 		*push_header = NULL;
3003 		*header_len = 0;
3004 		push_len = 0;
3005 	}
3006 
3007 	/* Process first segment taking into consideration pushed header */
3008 	if (seg_len > push_len) {
3009 		ena_buf->paddr = mbuf->buf_iova +
3010 				mbuf->data_off +
3011 				push_len;
3012 		ena_buf->len = seg_len - push_len;
3013 		ena_buf++;
3014 		tx_info->num_of_bufs++;
3015 	}
3016 
3017 	while ((mbuf = mbuf->next) != NULL) {
3018 		seg_len = mbuf->data_len;
3019 
3020 		/* Skip mbufs if whole data is pushed as a header */
3021 		if (unlikely(delta > seg_len)) {
3022 			delta -= seg_len;
3023 			continue;
3024 		}
3025 
3026 		ena_buf->paddr = mbuf->buf_iova + mbuf->data_off + delta;
3027 		ena_buf->len = seg_len - delta;
3028 		ena_buf++;
3029 		tx_info->num_of_bufs++;
3030 
3031 		delta = 0;
3032 	}
3033 }
3034 
3035 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf)
3036 {
3037 	struct ena_tx_buffer *tx_info;
3038 	struct ena_com_tx_ctx ena_tx_ctx = { { 0 } };
3039 	uint16_t next_to_use;
3040 	uint16_t header_len;
3041 	uint16_t req_id;
3042 	void *push_header;
3043 	int nb_hw_desc;
3044 	int rc;
3045 
3046 	/* Checking for space for 2 additional metadata descriptors due to
3047 	 * possible header split and metadata descriptor
3048 	 */
3049 	if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3050 					  mbuf->nb_segs + 2)) {
3051 		PMD_DRV_LOG(DEBUG, "Not enough space in the tx queue\n");
3052 		return ENA_COM_NO_MEM;
3053 	}
3054 
3055 	next_to_use = tx_ring->next_to_use;
3056 
3057 	req_id = tx_ring->empty_tx_reqs[next_to_use];
3058 	tx_info = &tx_ring->tx_buffer_info[req_id];
3059 	tx_info->num_of_bufs = 0;
3060 	RTE_ASSERT(tx_info->mbuf == NULL);
3061 
3062 	ena_tx_map_mbuf(tx_ring, tx_info, mbuf, &push_header, &header_len);
3063 
3064 	ena_tx_ctx.ena_bufs = tx_info->bufs;
3065 	ena_tx_ctx.push_header = push_header;
3066 	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
3067 	ena_tx_ctx.req_id = req_id;
3068 	ena_tx_ctx.header_len = header_len;
3069 
3070 	/* Set Tx offloads flags, if applicable */
3071 	ena_tx_mbuf_prepare(mbuf, &ena_tx_ctx, tx_ring->offloads,
3072 		tx_ring->disable_meta_caching);
3073 
3074 	if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq,
3075 			&ena_tx_ctx))) {
3076 		PMD_TX_LOG(DEBUG,
3077 			"LLQ Tx max burst size of queue %d achieved, writing doorbell to send burst\n",
3078 			tx_ring->id);
3079 		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
3080 		tx_ring->tx_stats.doorbells++;
3081 		tx_ring->pkts_without_db = false;
3082 	}
3083 
3084 	/* prepare the packet's descriptors to dma engine */
3085 	rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq,	&ena_tx_ctx,
3086 		&nb_hw_desc);
3087 	if (unlikely(rc)) {
3088 		PMD_DRV_LOG(ERR, "Failed to prepare Tx buffers, rc: %d\n", rc);
3089 		++tx_ring->tx_stats.prepare_ctx_err;
3090 		ena_trigger_reset(tx_ring->adapter,
3091 			ENA_REGS_RESET_DRIVER_INVALID_STATE);
3092 		return rc;
3093 	}
3094 
3095 	tx_info->tx_descs = nb_hw_desc;
3096 	tx_info->timestamp = rte_get_timer_cycles();
3097 
3098 	tx_ring->tx_stats.cnt++;
3099 	tx_ring->tx_stats.bytes += mbuf->pkt_len;
3100 
3101 	tx_ring->next_to_use = ENA_IDX_NEXT_MASKED(next_to_use,
3102 		tx_ring->size_mask);
3103 
3104 	return 0;
3105 }
3106 
3107 static __rte_always_inline size_t
3108 ena_tx_cleanup_mbuf_fast(struct rte_mbuf **mbufs_to_clean,
3109 			 struct rte_mbuf *mbuf,
3110 			 size_t mbuf_cnt,
3111 			 size_t buf_size)
3112 {
3113 	struct rte_mbuf *m_next;
3114 
3115 	while (mbuf != NULL) {
3116 		m_next = mbuf->next;
3117 		mbufs_to_clean[mbuf_cnt++] = mbuf;
3118 		if (mbuf_cnt == buf_size) {
3119 			rte_mempool_put_bulk(mbufs_to_clean[0]->pool, (void **)mbufs_to_clean,
3120 				(unsigned int)mbuf_cnt);
3121 			mbuf_cnt = 0;
3122 		}
3123 		mbuf = m_next;
3124 	}
3125 
3126 	return mbuf_cnt;
3127 }
3128 
3129 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt)
3130 {
3131 	struct rte_mbuf *mbufs_to_clean[ENA_CLEANUP_BUF_SIZE];
3132 	struct ena_ring *tx_ring = (struct ena_ring *)txp;
3133 	size_t mbuf_cnt = 0;
3134 	unsigned int total_tx_descs = 0;
3135 	unsigned int total_tx_pkts = 0;
3136 	uint16_t cleanup_budget;
3137 	uint16_t next_to_clean = tx_ring->next_to_clean;
3138 	bool fast_free = tx_ring->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
3139 
3140 	/*
3141 	 * If free_pkt_cnt is equal to 0, it means that the user requested
3142 	 * full cleanup, so attempt to release all Tx descriptors
3143 	 * (ring_size - 1 -> size_mask)
3144 	 */
3145 	cleanup_budget = (free_pkt_cnt == 0) ? tx_ring->size_mask : free_pkt_cnt;
3146 
3147 	while (likely(total_tx_pkts < cleanup_budget)) {
3148 		struct rte_mbuf *mbuf;
3149 		struct ena_tx_buffer *tx_info;
3150 		uint16_t req_id;
3151 
3152 		if (ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id) != 0)
3153 			break;
3154 
3155 		if (unlikely(validate_tx_req_id(tx_ring, req_id) != 0))
3156 			break;
3157 
3158 		/* Get Tx info & store how many descs were processed  */
3159 		tx_info = &tx_ring->tx_buffer_info[req_id];
3160 		tx_info->timestamp = 0;
3161 
3162 		mbuf = tx_info->mbuf;
3163 		if (fast_free) {
3164 			mbuf_cnt = ena_tx_cleanup_mbuf_fast(mbufs_to_clean, mbuf, mbuf_cnt,
3165 				ENA_CLEANUP_BUF_SIZE);
3166 		} else {
3167 			rte_pktmbuf_free(mbuf);
3168 		}
3169 
3170 		tx_info->mbuf = NULL;
3171 		tx_ring->empty_tx_reqs[next_to_clean] = req_id;
3172 
3173 		total_tx_descs += tx_info->tx_descs;
3174 		total_tx_pkts++;
3175 
3176 		/* Put back descriptor to the ring for reuse */
3177 		next_to_clean = ENA_IDX_NEXT_MASKED(next_to_clean,
3178 			tx_ring->size_mask);
3179 	}
3180 
3181 	if (likely(total_tx_descs > 0)) {
3182 		/* acknowledge completion of sent packets */
3183 		tx_ring->next_to_clean = next_to_clean;
3184 		ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs);
3185 	}
3186 
3187 	if (mbuf_cnt != 0)
3188 		rte_mempool_put_bulk(mbufs_to_clean[0]->pool,
3189 			(void **)mbufs_to_clean, mbuf_cnt);
3190 
3191 	/* Notify completion handler that full cleanup was performed */
3192 	if (free_pkt_cnt == 0 || total_tx_pkts < cleanup_budget)
3193 		tx_ring->last_cleanup_ticks = rte_get_timer_cycles();
3194 
3195 	return total_tx_pkts;
3196 }
3197 
3198 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
3199 				  uint16_t nb_pkts)
3200 {
3201 	struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue);
3202 	int available_desc;
3203 	uint16_t sent_idx = 0;
3204 
3205 #ifdef RTE_ETHDEV_DEBUG_TX
3206 	/* Check adapter state */
3207 	if (unlikely(tx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) {
3208 		PMD_TX_LOG(ALERT,
3209 			"Trying to xmit pkts while device is NOT running\n");
3210 		return 0;
3211 	}
3212 #endif
3213 
3214 	available_desc = ena_com_free_q_entries(tx_ring->ena_com_io_sq);
3215 	if (available_desc < tx_ring->tx_free_thresh)
3216 		ena_tx_cleanup((void *)tx_ring, 0);
3217 
3218 	for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) {
3219 		if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx]))
3220 			break;
3221 		tx_ring->pkts_without_db = true;
3222 		rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4,
3223 			tx_ring->size_mask)]);
3224 	}
3225 
3226 	/* If there are ready packets to be xmitted... */
3227 	if (likely(tx_ring->pkts_without_db)) {
3228 		/* ...let HW do its best :-) */
3229 		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
3230 		tx_ring->tx_stats.doorbells++;
3231 		tx_ring->pkts_without_db = false;
3232 	}
3233 
3234 	tx_ring->tx_stats.available_desc =
3235 		ena_com_free_q_entries(tx_ring->ena_com_io_sq);
3236 	tx_ring->tx_stats.tx_poll++;
3237 
3238 	return sent_idx;
3239 }
3240 
3241 static void ena_copy_customer_metrics(struct ena_adapter *adapter, uint64_t *buf,
3242 					     size_t num_metrics)
3243 {
3244 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
3245 	int rc;
3246 
3247 	if (ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) {
3248 		if (num_metrics != ENA_STATS_ARRAY_METRICS) {
3249 			PMD_DRV_LOG(ERR, "Detected discrepancy in the number of customer metrics");
3250 			return;
3251 		}
3252 		rte_spinlock_lock(&adapter->admin_lock);
3253 		rc = ENA_PROXY(adapter,
3254 					ena_com_get_customer_metrics,
3255 					&adapter->ena_dev,
3256 					(char *)buf,
3257 					num_metrics * sizeof(uint64_t));
3258 		rte_spinlock_unlock(&adapter->admin_lock);
3259 		if (rc != 0) {
3260 			PMD_DRV_LOG(WARNING, "Failed to get customer metrics, rc: %d\n", rc);
3261 			return;
3262 		}
3263 
3264 	} else if (ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) {
3265 		if (num_metrics != ENA_STATS_ARRAY_METRICS_LEGACY) {
3266 			PMD_DRV_LOG(ERR, "Detected discrepancy in the number of legacy metrics");
3267 			return;
3268 		}
3269 
3270 		rte_spinlock_lock(&adapter->admin_lock);
3271 		rc = ENA_PROXY(adapter,
3272 			       ena_com_get_eni_stats,
3273 			       &adapter->ena_dev,
3274 			       (struct ena_admin_eni_stats *)buf);
3275 		rte_spinlock_unlock(&adapter->admin_lock);
3276 		if (rc != 0) {
3277 			PMD_DRV_LOG(WARNING,
3278 				"Failed to get ENI metrics, rc: %d\n", rc);
3279 			return;
3280 		}
3281 	}
3282 }
3283 
3284 static void ena_copy_ena_srd_info(struct ena_adapter *adapter,
3285 		struct ena_stats_srd *srd_info)
3286 {
3287 	int rc;
3288 
3289 	if (!ena_com_get_cap(&adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO))
3290 		return;
3291 
3292 	rte_spinlock_lock(&adapter->admin_lock);
3293 	rc = ENA_PROXY(adapter,
3294 		       ena_com_get_ena_srd_info,
3295 		       &adapter->ena_dev,
3296 		       (struct ena_admin_ena_srd_info *)srd_info);
3297 	rte_spinlock_unlock(&adapter->admin_lock);
3298 	if (rc != ENA_COM_OK && rc != ENA_COM_UNSUPPORTED) {
3299 		PMD_DRV_LOG(WARNING,
3300 				"Failed to get ENA express srd info, rc: %d\n", rc);
3301 		return;
3302 	}
3303 }
3304 
3305 /**
3306  * DPDK callback to retrieve names of extended device statistics
3307  *
3308  * @param dev
3309  *   Pointer to Ethernet device structure.
3310  * @param[out] xstats_names
3311  *   Buffer to insert names into.
3312  * @param n
3313  *   Number of names.
3314  *
3315  * @return
3316  *   Number of xstats names.
3317  */
3318 static int ena_xstats_get_names(struct rte_eth_dev *dev,
3319 				struct rte_eth_xstat_name *xstats_names,
3320 				unsigned int n)
3321 {
3322 	struct ena_adapter *adapter = dev->data->dev_private;
3323 	unsigned int xstats_count = ena_xstats_calc_num(dev->data);
3324 	unsigned int stat, i, count = 0;
3325 
3326 	if (n < xstats_count || !xstats_names)
3327 		return xstats_count;
3328 
3329 	for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++)
3330 		strcpy(xstats_names[count].name,
3331 			ena_stats_global_strings[stat].name);
3332 
3333 	for (stat = 0; stat < adapter->metrics_num; stat++, count++)
3334 		rte_strscpy(xstats_names[count].name,
3335 			    ena_stats_metrics_strings[stat].name,
3336 			    RTE_ETH_XSTATS_NAME_SIZE);
3337 	for (stat = 0; stat < ENA_STATS_ARRAY_ENA_SRD; stat++, count++)
3338 		rte_strscpy(xstats_names[count].name,
3339 			    ena_stats_srd_strings[stat].name,
3340 			    RTE_ETH_XSTATS_NAME_SIZE);
3341 
3342 	for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++)
3343 		for (i = 0; i < dev->data->nb_rx_queues; i++, count++)
3344 			snprintf(xstats_names[count].name,
3345 				sizeof(xstats_names[count].name),
3346 				"rx_q%d_%s", i,
3347 				ena_stats_rx_strings[stat].name);
3348 
3349 	for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++)
3350 		for (i = 0; i < dev->data->nb_tx_queues; i++, count++)
3351 			snprintf(xstats_names[count].name,
3352 				sizeof(xstats_names[count].name),
3353 				"tx_q%d_%s", i,
3354 				ena_stats_tx_strings[stat].name);
3355 
3356 	return xstats_count;
3357 }
3358 
3359 /**
3360  * DPDK callback to retrieve names of extended device statistics for the given
3361  * ids.
3362  *
3363  * @param dev
3364  *   Pointer to Ethernet device structure.
3365  * @param[out] xstats_names
3366  *   Buffer to insert names into.
3367  * @param ids
3368  *   IDs array for which the names should be retrieved.
3369  * @param size
3370  *   Number of ids.
3371  *
3372  * @return
3373  *   Positive value: number of xstats names. Negative value: error code.
3374  */
3375 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev,
3376 				      const uint64_t *ids,
3377 				      struct rte_eth_xstat_name *xstats_names,
3378 				      unsigned int size)
3379 {
3380 	struct ena_adapter *adapter = dev->data->dev_private;
3381 	uint64_t xstats_count = ena_xstats_calc_num(dev->data);
3382 	uint64_t id, qid;
3383 	unsigned int i;
3384 
3385 	if (xstats_names == NULL)
3386 		return xstats_count;
3387 
3388 	for (i = 0; i < size; ++i) {
3389 		id = ids[i];
3390 		if (id > xstats_count) {
3391 			PMD_DRV_LOG(ERR,
3392 				"ID value out of range: id=%" PRIu64 ", xstats_num=%" PRIu64 "\n",
3393 				 id, xstats_count);
3394 			return -EINVAL;
3395 		}
3396 
3397 		if (id < ENA_STATS_ARRAY_GLOBAL) {
3398 			strcpy(xstats_names[i].name,
3399 			       ena_stats_global_strings[id].name);
3400 			continue;
3401 		}
3402 
3403 		id -= ENA_STATS_ARRAY_GLOBAL;
3404 		if (id < adapter->metrics_num) {
3405 			rte_strscpy(xstats_names[i].name,
3406 				    ena_stats_metrics_strings[id].name,
3407 				    RTE_ETH_XSTATS_NAME_SIZE);
3408 			continue;
3409 		}
3410 
3411 		id -= adapter->metrics_num;
3412 
3413 		if (id < ENA_STATS_ARRAY_ENA_SRD) {
3414 			rte_strscpy(xstats_names[i].name,
3415 				    ena_stats_srd_strings[id].name,
3416 				    RTE_ETH_XSTATS_NAME_SIZE);
3417 			continue;
3418 		}
3419 		id -= ENA_STATS_ARRAY_ENA_SRD;
3420 
3421 		if (id < ENA_STATS_ARRAY_RX) {
3422 			qid = id / dev->data->nb_rx_queues;
3423 			id %= dev->data->nb_rx_queues;
3424 			snprintf(xstats_names[i].name,
3425 				 sizeof(xstats_names[i].name),
3426 				 "rx_q%" PRIu64 "d_%s",
3427 				 qid, ena_stats_rx_strings[id].name);
3428 			continue;
3429 		}
3430 
3431 		id -= ENA_STATS_ARRAY_RX;
3432 		/* Although this condition is not needed, it was added for
3433 		 * compatibility if new xstat structure would be ever added.
3434 		 */
3435 		if (id < ENA_STATS_ARRAY_TX) {
3436 			qid = id / dev->data->nb_tx_queues;
3437 			id %= dev->data->nb_tx_queues;
3438 			snprintf(xstats_names[i].name,
3439 				 sizeof(xstats_names[i].name),
3440 				 "tx_q%" PRIu64 "_%s",
3441 				 qid, ena_stats_tx_strings[id].name);
3442 			continue;
3443 		}
3444 	}
3445 
3446 	return i;
3447 }
3448 
3449 /**
3450  * DPDK callback to get extended device statistics.
3451  *
3452  * @param dev
3453  *   Pointer to Ethernet device structure.
3454  * @param[out] stats
3455  *   Stats table output buffer.
3456  * @param n
3457  *   The size of the stats table.
3458  *
3459  * @return
3460  *   Number of xstats on success, negative on failure.
3461  */
3462 static int ena_xstats_get(struct rte_eth_dev *dev,
3463 			  struct rte_eth_xstat *xstats,
3464 			  unsigned int n)
3465 {
3466 	struct ena_adapter *adapter = dev->data->dev_private;
3467 	unsigned int xstats_count = ena_xstats_calc_num(dev->data);
3468 	unsigned int stat, i, count = 0;
3469 	int stat_offset;
3470 	void *stats_begin;
3471 	uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS];
3472 	struct ena_stats_srd srd_info = {0};
3473 
3474 	if (n < xstats_count)
3475 		return xstats_count;
3476 
3477 	if (!xstats)
3478 		return 0;
3479 
3480 	for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) {
3481 		stat_offset = ena_stats_global_strings[stat].stat_offset;
3482 		stats_begin = &adapter->dev_stats;
3483 
3484 		xstats[count].id = count;
3485 		xstats[count].value = *((uint64_t *)
3486 			((char *)stats_begin + stat_offset));
3487 	}
3488 
3489 	ena_copy_customer_metrics(adapter, metrics_stats, adapter->metrics_num);
3490 	stats_begin = metrics_stats;
3491 	for (stat = 0; stat < adapter->metrics_num; stat++, count++) {
3492 		stat_offset = ena_stats_metrics_strings[stat].stat_offset;
3493 
3494 		xstats[count].id = count;
3495 		xstats[count].value = *((uint64_t *)
3496 		    ((char *)stats_begin + stat_offset));
3497 	}
3498 
3499 	ena_copy_ena_srd_info(adapter, &srd_info);
3500 	stats_begin = &srd_info;
3501 	for (stat = 0; stat < ENA_STATS_ARRAY_ENA_SRD; stat++, count++) {
3502 		stat_offset = ena_stats_srd_strings[stat].stat_offset;
3503 		xstats[count].id = count;
3504 		xstats[count].value = *((uint64_t *)
3505 		    ((char *)stats_begin + stat_offset));
3506 	}
3507 
3508 	for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) {
3509 		for (i = 0; i < dev->data->nb_rx_queues; i++, count++) {
3510 			stat_offset = ena_stats_rx_strings[stat].stat_offset;
3511 			stats_begin = &adapter->rx_ring[i].rx_stats;
3512 
3513 			xstats[count].id = count;
3514 			xstats[count].value = *((uint64_t *)
3515 				((char *)stats_begin + stat_offset));
3516 		}
3517 	}
3518 
3519 	for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) {
3520 		for (i = 0; i < dev->data->nb_tx_queues; i++, count++) {
3521 			stat_offset = ena_stats_tx_strings[stat].stat_offset;
3522 			stats_begin = &adapter->tx_ring[i].rx_stats;
3523 
3524 			xstats[count].id = count;
3525 			xstats[count].value = *((uint64_t *)
3526 				((char *)stats_begin + stat_offset));
3527 		}
3528 	}
3529 
3530 	return count;
3531 }
3532 
3533 static int ena_xstats_get_by_id(struct rte_eth_dev *dev,
3534 				const uint64_t *ids,
3535 				uint64_t *values,
3536 				unsigned int n)
3537 {
3538 	struct ena_adapter *adapter = dev->data->dev_private;
3539 	uint64_t id;
3540 	uint64_t rx_entries, tx_entries;
3541 	unsigned int i;
3542 	int qid;
3543 	int valid = 0;
3544 	bool were_metrics_copied = false;
3545 	bool was_srd_info_copied = false;
3546 	uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS];
3547 	struct ena_stats_srd srd_info = {0};
3548 
3549 	for (i = 0; i < n; ++i) {
3550 		id = ids[i];
3551 		/* Check if id belongs to global statistics */
3552 		if (id < ENA_STATS_ARRAY_GLOBAL) {
3553 			values[i] = *((uint64_t *)&adapter->dev_stats + id);
3554 			++valid;
3555 			continue;
3556 		}
3557 
3558 		/* Check if id belongs to ENI statistics */
3559 		id -= ENA_STATS_ARRAY_GLOBAL;
3560 		if (id < adapter->metrics_num) {
3561 			/* Avoid reading metrics multiple times in a single
3562 			 * function call, as it requires communication with the
3563 			 * admin queue.
3564 			 */
3565 			if (!were_metrics_copied) {
3566 				were_metrics_copied = true;
3567 				ena_copy_customer_metrics(adapter,
3568 						metrics_stats,
3569 						adapter->metrics_num);
3570 			}
3571 
3572 			values[i] = *((uint64_t *)&metrics_stats + id);
3573 			++valid;
3574 			continue;
3575 		}
3576 
3577 		/* Check if id belongs to SRD info statistics */
3578 		id -= adapter->metrics_num;
3579 
3580 		if (id < ENA_STATS_ARRAY_ENA_SRD) {
3581 			/*
3582 			 * Avoid reading srd info multiple times in a single
3583 			 * function call, as it requires communication with the
3584 			 * admin queue.
3585 			 */
3586 			if (!was_srd_info_copied) {
3587 				was_srd_info_copied = true;
3588 				ena_copy_ena_srd_info(adapter, &srd_info);
3589 			}
3590 			values[i] = *((uint64_t *)&adapter->srd_stats + id);
3591 			++valid;
3592 			continue;
3593 		}
3594 
3595 		/* Check if id belongs to rx queue statistics */
3596 		id -= ENA_STATS_ARRAY_ENA_SRD;
3597 
3598 		rx_entries = ENA_STATS_ARRAY_RX * dev->data->nb_rx_queues;
3599 		if (id < rx_entries) {
3600 			qid = id % dev->data->nb_rx_queues;
3601 			id /= dev->data->nb_rx_queues;
3602 			values[i] = *((uint64_t *)
3603 				&adapter->rx_ring[qid].rx_stats + id);
3604 			++valid;
3605 			continue;
3606 		}
3607 				/* Check if id belongs to rx queue statistics */
3608 		id -= rx_entries;
3609 		tx_entries = ENA_STATS_ARRAY_TX * dev->data->nb_tx_queues;
3610 		if (id < tx_entries) {
3611 			qid = id % dev->data->nb_tx_queues;
3612 			id /= dev->data->nb_tx_queues;
3613 			values[i] = *((uint64_t *)
3614 				&adapter->tx_ring[qid].tx_stats + id);
3615 			++valid;
3616 			continue;
3617 		}
3618 	}
3619 
3620 	return valid;
3621 }
3622 
3623 static int ena_process_uint_devarg(const char *key,
3624 				  const char *value,
3625 				  void *opaque)
3626 {
3627 	struct ena_adapter *adapter = opaque;
3628 	char *str_end;
3629 	uint64_t uint_value;
3630 
3631 	uint_value = strtoull(value, &str_end, 10);
3632 	if (value == str_end) {
3633 		PMD_INIT_LOG(ERR,
3634 			"Invalid value for key '%s'. Only uint values are accepted.\n",
3635 			key);
3636 		return -EINVAL;
3637 	}
3638 
3639 	if (strcmp(key, ENA_DEVARG_MISS_TXC_TO) == 0) {
3640 		if (uint_value > ENA_MAX_TX_TIMEOUT_SECONDS) {
3641 			PMD_INIT_LOG(ERR,
3642 				"Tx timeout too high: %" PRIu64 " sec. Maximum allowed: %d sec.\n",
3643 				uint_value, ENA_MAX_TX_TIMEOUT_SECONDS);
3644 			return -EINVAL;
3645 		} else if (uint_value == 0) {
3646 			PMD_INIT_LOG(INFO,
3647 				"Check for missing Tx completions has been disabled.\n");
3648 			adapter->missing_tx_completion_to =
3649 				ENA_HW_HINTS_NO_TIMEOUT;
3650 		} else {
3651 			PMD_INIT_LOG(INFO,
3652 				"Tx packet completion timeout set to %" PRIu64 " seconds.\n",
3653 				uint_value);
3654 			adapter->missing_tx_completion_to =
3655 				uint_value * rte_get_timer_hz();
3656 		}
3657 	}
3658 
3659 	return 0;
3660 }
3661 
3662 static int ena_process_bool_devarg(const char *key,
3663 				   const char *value,
3664 				   void *opaque)
3665 {
3666 	struct ena_adapter *adapter = opaque;
3667 	bool bool_value;
3668 
3669 	/* Parse the value. */
3670 	if (strcmp(value, "1") == 0) {
3671 		bool_value = true;
3672 	} else if (strcmp(value, "0") == 0) {
3673 		bool_value = false;
3674 	} else {
3675 		PMD_INIT_LOG(ERR,
3676 			"Invalid value: '%s' for key '%s'. Accepted: '0' or '1'\n",
3677 			value, key);
3678 		return -EINVAL;
3679 	}
3680 
3681 	/* Now, assign it to the proper adapter field. */
3682 	if (strcmp(key, ENA_DEVARG_LARGE_LLQ_HDR) == 0)
3683 		adapter->use_large_llq_hdr = bool_value;
3684 	else if (strcmp(key, ENA_DEVARG_ENABLE_LLQ) == 0)
3685 		adapter->enable_llq = bool_value;
3686 
3687 	return 0;
3688 }
3689 
3690 static int ena_parse_devargs(struct ena_adapter *adapter,
3691 			     struct rte_devargs *devargs)
3692 {
3693 	static const char * const allowed_args[] = {
3694 		ENA_DEVARG_LARGE_LLQ_HDR,
3695 		ENA_DEVARG_MISS_TXC_TO,
3696 		ENA_DEVARG_ENABLE_LLQ,
3697 		NULL,
3698 	};
3699 	struct rte_kvargs *kvlist;
3700 	int rc;
3701 
3702 	if (devargs == NULL)
3703 		return 0;
3704 
3705 	kvlist = rte_kvargs_parse(devargs->args, allowed_args);
3706 	if (kvlist == NULL) {
3707 		PMD_INIT_LOG(ERR, "Invalid device arguments: %s\n",
3708 			devargs->args);
3709 		return -EINVAL;
3710 	}
3711 
3712 	rc = rte_kvargs_process(kvlist, ENA_DEVARG_LARGE_LLQ_HDR,
3713 		ena_process_bool_devarg, adapter);
3714 	if (rc != 0)
3715 		goto exit;
3716 	rc = rte_kvargs_process(kvlist, ENA_DEVARG_MISS_TXC_TO,
3717 		ena_process_uint_devarg, adapter);
3718 	if (rc != 0)
3719 		goto exit;
3720 	rc = rte_kvargs_process(kvlist, ENA_DEVARG_ENABLE_LLQ,
3721 		ena_process_bool_devarg, adapter);
3722 
3723 exit:
3724 	rte_kvargs_free(kvlist);
3725 
3726 	return rc;
3727 }
3728 
3729 static int ena_setup_rx_intr(struct rte_eth_dev *dev)
3730 {
3731 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
3732 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
3733 	int rc;
3734 	uint16_t vectors_nb, i;
3735 	bool rx_intr_requested = dev->data->dev_conf.intr_conf.rxq;
3736 
3737 	if (!rx_intr_requested)
3738 		return 0;
3739 
3740 	if (!rte_intr_cap_multiple(intr_handle)) {
3741 		PMD_DRV_LOG(ERR,
3742 			"Rx interrupt requested, but it isn't supported by the PCI driver\n");
3743 		return -ENOTSUP;
3744 	}
3745 
3746 	/* Disable interrupt mapping before the configuration starts. */
3747 	rte_intr_disable(intr_handle);
3748 
3749 	/* Verify if there are enough vectors available. */
3750 	vectors_nb = dev->data->nb_rx_queues;
3751 	if (vectors_nb > RTE_MAX_RXTX_INTR_VEC_ID) {
3752 		PMD_DRV_LOG(ERR,
3753 			"Too many Rx interrupts requested, maximum number: %d\n",
3754 			RTE_MAX_RXTX_INTR_VEC_ID);
3755 		rc = -ENOTSUP;
3756 		goto enable_intr;
3757 	}
3758 
3759 	/* Allocate the vector list */
3760 	if (rte_intr_vec_list_alloc(intr_handle, "intr_vec",
3761 					   dev->data->nb_rx_queues)) {
3762 		PMD_DRV_LOG(ERR,
3763 			"Failed to allocate interrupt vector for %d queues\n",
3764 			dev->data->nb_rx_queues);
3765 		rc = -ENOMEM;
3766 		goto enable_intr;
3767 	}
3768 
3769 	rc = rte_intr_efd_enable(intr_handle, vectors_nb);
3770 	if (rc != 0)
3771 		goto free_intr_vec;
3772 
3773 	if (!rte_intr_allow_others(intr_handle)) {
3774 		PMD_DRV_LOG(ERR,
3775 			"Not enough interrupts available to use both ENA Admin and Rx interrupts\n");
3776 		goto disable_intr_efd;
3777 	}
3778 
3779 	for (i = 0; i < vectors_nb; ++i)
3780 		if (rte_intr_vec_list_index_set(intr_handle, i,
3781 					   RTE_INTR_VEC_RXTX_OFFSET + i))
3782 			goto disable_intr_efd;
3783 
3784 	rte_intr_enable(intr_handle);
3785 	return 0;
3786 
3787 disable_intr_efd:
3788 	rte_intr_efd_disable(intr_handle);
3789 free_intr_vec:
3790 	rte_intr_vec_list_free(intr_handle);
3791 enable_intr:
3792 	rte_intr_enable(intr_handle);
3793 	return rc;
3794 }
3795 
3796 static void ena_rx_queue_intr_set(struct rte_eth_dev *dev,
3797 				 uint16_t queue_id,
3798 				 bool unmask)
3799 {
3800 	struct ena_adapter *adapter = dev->data->dev_private;
3801 	struct ena_ring *rxq = &adapter->rx_ring[queue_id];
3802 	struct ena_eth_io_intr_reg intr_reg;
3803 
3804 	ena_com_update_intr_reg(&intr_reg, 0, 0, unmask, 1);
3805 	ena_com_unmask_intr(rxq->ena_com_io_cq, &intr_reg);
3806 }
3807 
3808 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev,
3809 				    uint16_t queue_id)
3810 {
3811 	ena_rx_queue_intr_set(dev, queue_id, true);
3812 
3813 	return 0;
3814 }
3815 
3816 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev,
3817 				     uint16_t queue_id)
3818 {
3819 	ena_rx_queue_intr_set(dev, queue_id, false);
3820 
3821 	return 0;
3822 }
3823 
3824 static int ena_configure_aenq(struct ena_adapter *adapter)
3825 {
3826 	uint32_t aenq_groups = adapter->all_aenq_groups;
3827 	int rc;
3828 
3829 	/* All_aenq_groups holds all AENQ functions supported by the device and
3830 	 * the HW, so at first we need to be sure the LSC request is valid.
3831 	 */
3832 	if (adapter->edev_data->dev_conf.intr_conf.lsc != 0) {
3833 		if (!(aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) {
3834 			PMD_DRV_LOG(ERR,
3835 				"LSC requested, but it's not supported by the AENQ\n");
3836 			return -EINVAL;
3837 		}
3838 	} else {
3839 		/* If LSC wasn't enabled by the app, let's enable all supported
3840 		 * AENQ procedures except the LSC.
3841 		 */
3842 		aenq_groups &= ~BIT(ENA_ADMIN_LINK_CHANGE);
3843 	}
3844 
3845 	rc = ena_com_set_aenq_config(&adapter->ena_dev, aenq_groups);
3846 	if (rc != 0) {
3847 		PMD_DRV_LOG(ERR, "Cannot configure AENQ groups, rc=%d\n", rc);
3848 		return rc;
3849 	}
3850 
3851 	adapter->active_aenq_groups = aenq_groups;
3852 
3853 	return 0;
3854 }
3855 
3856 int ena_mp_indirect_table_set(struct ena_adapter *adapter)
3857 {
3858 	return ENA_PROXY(adapter, ena_com_indirect_table_set, &adapter->ena_dev);
3859 }
3860 
3861 int ena_mp_indirect_table_get(struct ena_adapter *adapter,
3862 			      uint32_t *indirect_table)
3863 {
3864 	return ENA_PROXY(adapter, ena_com_indirect_table_get, &adapter->ena_dev,
3865 		indirect_table);
3866 }
3867 
3868 /*********************************************************************
3869  *  ena_plat_dpdk.h functions implementations
3870  *********************************************************************/
3871 
3872 const struct rte_memzone *
3873 ena_mem_alloc_coherent(struct rte_eth_dev_data *data, size_t size,
3874 		       int socket_id, unsigned int alignment, void **virt_addr,
3875 		       dma_addr_t *phys_addr)
3876 {
3877 	char z_name[RTE_MEMZONE_NAMESIZE];
3878 	struct ena_adapter *adapter = data->dev_private;
3879 	const struct rte_memzone *memzone;
3880 	int rc;
3881 
3882 	rc = snprintf(z_name, RTE_MEMZONE_NAMESIZE, "ena_p%d_mz%" PRIu64 "",
3883 		data->port_id, adapter->memzone_cnt);
3884 	if (rc >= RTE_MEMZONE_NAMESIZE) {
3885 		PMD_DRV_LOG(ERR,
3886 			"Name for the ena_com memzone is too long. Port: %d, mz_num: %" PRIu64 "\n",
3887 			data->port_id, adapter->memzone_cnt);
3888 		goto error;
3889 	}
3890 	adapter->memzone_cnt++;
3891 
3892 	memzone = rte_memzone_reserve_aligned(z_name, size, socket_id,
3893 		RTE_MEMZONE_IOVA_CONTIG, alignment);
3894 	if (memzone == NULL) {
3895 		PMD_DRV_LOG(ERR, "Failed to allocate ena_com memzone: %s\n",
3896 			z_name);
3897 		goto error;
3898 	}
3899 
3900 	memset(memzone->addr, 0, size);
3901 	*virt_addr = memzone->addr;
3902 	*phys_addr = memzone->iova;
3903 
3904 	return memzone;
3905 
3906 error:
3907 	*virt_addr = NULL;
3908 	*phys_addr = 0;
3909 
3910 	return NULL;
3911 }
3912 
3913 
3914 /*********************************************************************
3915  *  PMD configuration
3916  *********************************************************************/
3917 static int eth_ena_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
3918 	struct rte_pci_device *pci_dev)
3919 {
3920 	return rte_eth_dev_pci_generic_probe(pci_dev,
3921 		sizeof(struct ena_adapter), eth_ena_dev_init);
3922 }
3923 
3924 static int eth_ena_pci_remove(struct rte_pci_device *pci_dev)
3925 {
3926 	return rte_eth_dev_pci_generic_remove(pci_dev, eth_ena_dev_uninit);
3927 }
3928 
3929 static struct rte_pci_driver rte_ena_pmd = {
3930 	.id_table = pci_id_ena_map,
3931 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
3932 		     RTE_PCI_DRV_WC_ACTIVATE,
3933 	.probe = eth_ena_pci_probe,
3934 	.remove = eth_ena_pci_remove,
3935 };
3936 
3937 RTE_PMD_REGISTER_PCI(net_ena, rte_ena_pmd);
3938 RTE_PMD_REGISTER_PCI_TABLE(net_ena, pci_id_ena_map);
3939 RTE_PMD_REGISTER_KMOD_DEP(net_ena, "* igb_uio | uio_pci_generic | vfio-pci");
3940 RTE_PMD_REGISTER_PARAM_STRING(net_ena,
3941 	ENA_DEVARG_LARGE_LLQ_HDR "=<0|1> "
3942 	ENA_DEVARG_ENABLE_LLQ "=<0|1> "
3943 	ENA_DEVARG_MISS_TXC_TO "=<uint>");
3944 RTE_LOG_REGISTER_SUFFIX(ena_logtype_init, init, NOTICE);
3945 RTE_LOG_REGISTER_SUFFIX(ena_logtype_driver, driver, NOTICE);
3946 #ifdef RTE_ETHDEV_DEBUG_RX
3947 RTE_LOG_REGISTER_SUFFIX(ena_logtype_rx, rx, DEBUG);
3948 #endif
3949 #ifdef RTE_ETHDEV_DEBUG_TX
3950 RTE_LOG_REGISTER_SUFFIX(ena_logtype_tx, tx, DEBUG);
3951 #endif
3952 RTE_LOG_REGISTER_SUFFIX(ena_logtype_com, com, WARNING);
3953 
3954 /******************************************************************************
3955  ******************************** AENQ Handlers *******************************
3956  *****************************************************************************/
3957 static void ena_update_on_link_change(void *adapter_data,
3958 				      struct ena_admin_aenq_entry *aenq_e)
3959 {
3960 	struct rte_eth_dev *eth_dev = adapter_data;
3961 	struct ena_adapter *adapter = eth_dev->data->dev_private;
3962 	struct ena_admin_aenq_link_change_desc *aenq_link_desc;
3963 	uint32_t status;
3964 
3965 	aenq_link_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e;
3966 
3967 	status = get_ena_admin_aenq_link_change_desc_link_status(aenq_link_desc);
3968 	adapter->link_status = status;
3969 
3970 	ena_link_update(eth_dev, 0);
3971 	rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
3972 }
3973 
3974 static void ena_notification(void *adapter_data,
3975 			     struct ena_admin_aenq_entry *aenq_e)
3976 {
3977 	struct rte_eth_dev *eth_dev = adapter_data;
3978 	struct ena_adapter *adapter = eth_dev->data->dev_private;
3979 	struct ena_admin_ena_hw_hints *hints;
3980 
3981 	if (aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION)
3982 		PMD_DRV_LOG(WARNING, "Invalid AENQ group: %x. Expected: %x\n",
3983 			aenq_e->aenq_common_desc.group,
3984 			ENA_ADMIN_NOTIFICATION);
3985 
3986 	switch (aenq_e->aenq_common_desc.syndrome) {
3987 	case ENA_ADMIN_UPDATE_HINTS:
3988 		hints = (struct ena_admin_ena_hw_hints *)
3989 			(&aenq_e->inline_data_w4);
3990 		ena_update_hints(adapter, hints);
3991 		break;
3992 	default:
3993 		PMD_DRV_LOG(ERR, "Invalid AENQ notification link state: %d\n",
3994 			aenq_e->aenq_common_desc.syndrome);
3995 	}
3996 }
3997 
3998 static void ena_keep_alive(void *adapter_data,
3999 			   __rte_unused struct ena_admin_aenq_entry *aenq_e)
4000 {
4001 	struct rte_eth_dev *eth_dev = adapter_data;
4002 	struct ena_adapter *adapter = eth_dev->data->dev_private;
4003 	struct ena_admin_aenq_keep_alive_desc *desc;
4004 	uint64_t rx_drops;
4005 	uint64_t tx_drops;
4006 	uint64_t rx_overruns;
4007 
4008 	adapter->timestamp_wd = rte_get_timer_cycles();
4009 
4010 	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
4011 	rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low;
4012 	tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low;
4013 	rx_overruns = ((uint64_t)desc->rx_overruns_high << 32) | desc->rx_overruns_low;
4014 
4015 	adapter->drv_stats->rx_drops = rx_drops;
4016 	adapter->dev_stats.tx_drops = tx_drops;
4017 	adapter->dev_stats.rx_overruns = rx_overruns;
4018 }
4019 
4020 /**
4021  * This handler will called for unknown event group or unimplemented handlers
4022  **/
4023 static void unimplemented_aenq_handler(__rte_unused void *data,
4024 				       __rte_unused struct ena_admin_aenq_entry *aenq_e)
4025 {
4026 	PMD_DRV_LOG(ERR,
4027 		"Unknown event was received or event with unimplemented handler\n");
4028 }
4029 
4030 static struct ena_aenq_handlers aenq_handlers = {
4031 	.handlers = {
4032 		[ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
4033 		[ENA_ADMIN_NOTIFICATION] = ena_notification,
4034 		[ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive
4035 	},
4036 	.unimplemented_handler = unimplemented_aenq_handler
4037 };
4038 
4039 /*********************************************************************
4040  *  Multi-Process communication request handling (in primary)
4041  *********************************************************************/
4042 static int
4043 ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
4044 {
4045 	const struct ena_mp_body *req =
4046 		(const struct ena_mp_body *)mp_msg->param;
4047 	struct ena_adapter *adapter;
4048 	struct ena_com_dev *ena_dev;
4049 	struct ena_mp_body *rsp;
4050 	struct rte_mp_msg mp_rsp;
4051 	struct rte_eth_dev *dev;
4052 	int res = 0;
4053 
4054 	rsp = (struct ena_mp_body *)&mp_rsp.param;
4055 	mp_msg_init(&mp_rsp, req->type, req->port_id);
4056 
4057 	if (!rte_eth_dev_is_valid_port(req->port_id)) {
4058 		rte_errno = ENODEV;
4059 		res = -rte_errno;
4060 		PMD_DRV_LOG(ERR, "Unknown port %d in request %d\n",
4061 			    req->port_id, req->type);
4062 		goto end;
4063 	}
4064 	dev = &rte_eth_devices[req->port_id];
4065 	adapter = dev->data->dev_private;
4066 	ena_dev = &adapter->ena_dev;
4067 
4068 	switch (req->type) {
4069 	case ENA_MP_DEV_STATS_GET:
4070 		res = ena_com_get_dev_basic_stats(ena_dev,
4071 						  &adapter->basic_stats);
4072 		break;
4073 	case ENA_MP_ENI_STATS_GET:
4074 		res = ena_com_get_eni_stats(ena_dev,
4075 			(struct ena_admin_eni_stats *)&adapter->metrics_stats);
4076 		break;
4077 	case ENA_MP_MTU_SET:
4078 		res = ena_com_set_dev_mtu(ena_dev, req->args.mtu);
4079 		break;
4080 	case ENA_MP_IND_TBL_GET:
4081 		res = ena_com_indirect_table_get(ena_dev,
4082 						 adapter->indirect_table);
4083 		break;
4084 	case ENA_MP_IND_TBL_SET:
4085 		res = ena_com_indirect_table_set(ena_dev);
4086 		break;
4087 	case ENA_MP_CUSTOMER_METRICS_GET:
4088 		res = ena_com_get_customer_metrics(ena_dev,
4089 				(char *)adapter->metrics_stats,
4090 				adapter->metrics_num * sizeof(uint64_t));
4091 		break;
4092 	case ENA_MP_SRD_STATS_GET:
4093 		res = ena_com_get_ena_srd_info(ena_dev,
4094 				(struct ena_admin_ena_srd_info *)&adapter->srd_stats);
4095 		break;
4096 	default:
4097 		PMD_DRV_LOG(ERR, "Unknown request type %d\n", req->type);
4098 		res = -EINVAL;
4099 		break;
4100 	}
4101 
4102 end:
4103 	/* Save processing result in the reply */
4104 	rsp->result = res;
4105 	/* Return just IPC processing status */
4106 	return rte_mp_reply(&mp_rsp, peer);
4107 }
4108