xref: /dpdk/drivers/net/ena/ena_ethdev.c (revision a52b317e7d8b89a12d2e8b08e19f380fe61c5220)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
3  * All rights reserved.
4  */
5 
6 #include <rte_string_fns.h>
7 #include <rte_errno.h>
8 #include <rte_version.h>
9 #include <rte_net.h>
10 #include <rte_kvargs.h>
11 
12 #include "ena_ethdev.h"
13 #include "ena_logs.h"
14 #include "ena_platform.h"
15 #include "ena_com.h"
16 #include "ena_eth_com.h"
17 
18 #include <ena_common_defs.h>
19 #include <ena_regs_defs.h>
20 #include <ena_admin_defs.h>
21 #include <ena_eth_io_defs.h>
22 
23 #define DRV_MODULE_VER_MAJOR	2
24 #define DRV_MODULE_VER_MINOR	5
25 #define DRV_MODULE_VER_SUBMINOR	0
26 
27 #define __MERGE_64B_H_L(h, l) (((uint64_t)h << 32) | l)
28 
29 #define GET_L4_HDR_LEN(mbuf)					\
30 	((rte_pktmbuf_mtod_offset(mbuf,	struct rte_tcp_hdr *,	\
31 		mbuf->l3_len + mbuf->l2_len)->data_off) >> 4)
32 
33 #define ETH_GSTRING_LEN	32
34 
35 #define ARRAY_SIZE(x) RTE_DIM(x)
36 
37 #define ENA_MIN_RING_DESC	128
38 
39 #define ENA_PTYPE_HAS_HASH	(RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP)
40 
41 struct ena_stats {
42 	char name[ETH_GSTRING_LEN];
43 	int stat_offset;
44 };
45 
46 #define ENA_STAT_ENTRY(stat, stat_type) { \
47 	.name = #stat, \
48 	.stat_offset = offsetof(struct ena_stats_##stat_type, stat) \
49 }
50 
51 #define ENA_STAT_RX_ENTRY(stat) \
52 	ENA_STAT_ENTRY(stat, rx)
53 
54 #define ENA_STAT_TX_ENTRY(stat) \
55 	ENA_STAT_ENTRY(stat, tx)
56 
57 #define ENA_STAT_ENI_ENTRY(stat) \
58 	ENA_STAT_ENTRY(stat, eni)
59 
60 #define ENA_STAT_GLOBAL_ENTRY(stat) \
61 	ENA_STAT_ENTRY(stat, dev)
62 
63 /* Device arguments */
64 #define ENA_DEVARG_LARGE_LLQ_HDR "large_llq_hdr"
65 
66 /*
67  * Each rte_memzone should have unique name.
68  * To satisfy it, count number of allocation and add it to name.
69  */
70 rte_atomic64_t ena_alloc_cnt;
71 
72 static const struct ena_stats ena_stats_global_strings[] = {
73 	ENA_STAT_GLOBAL_ENTRY(wd_expired),
74 	ENA_STAT_GLOBAL_ENTRY(dev_start),
75 	ENA_STAT_GLOBAL_ENTRY(dev_stop),
76 	ENA_STAT_GLOBAL_ENTRY(tx_drops),
77 };
78 
79 static const struct ena_stats ena_stats_eni_strings[] = {
80 	ENA_STAT_ENI_ENTRY(bw_in_allowance_exceeded),
81 	ENA_STAT_ENI_ENTRY(bw_out_allowance_exceeded),
82 	ENA_STAT_ENI_ENTRY(pps_allowance_exceeded),
83 	ENA_STAT_ENI_ENTRY(conntrack_allowance_exceeded),
84 	ENA_STAT_ENI_ENTRY(linklocal_allowance_exceeded),
85 };
86 
87 static const struct ena_stats ena_stats_tx_strings[] = {
88 	ENA_STAT_TX_ENTRY(cnt),
89 	ENA_STAT_TX_ENTRY(bytes),
90 	ENA_STAT_TX_ENTRY(prepare_ctx_err),
91 	ENA_STAT_TX_ENTRY(tx_poll),
92 	ENA_STAT_TX_ENTRY(doorbells),
93 	ENA_STAT_TX_ENTRY(bad_req_id),
94 	ENA_STAT_TX_ENTRY(available_desc),
95 	ENA_STAT_TX_ENTRY(missed_tx),
96 };
97 
98 static const struct ena_stats ena_stats_rx_strings[] = {
99 	ENA_STAT_RX_ENTRY(cnt),
100 	ENA_STAT_RX_ENTRY(bytes),
101 	ENA_STAT_RX_ENTRY(refill_partial),
102 	ENA_STAT_RX_ENTRY(l3_csum_bad),
103 	ENA_STAT_RX_ENTRY(l4_csum_bad),
104 	ENA_STAT_RX_ENTRY(l4_csum_good),
105 	ENA_STAT_RX_ENTRY(mbuf_alloc_fail),
106 	ENA_STAT_RX_ENTRY(bad_desc_num),
107 	ENA_STAT_RX_ENTRY(bad_req_id),
108 };
109 
110 #define ENA_STATS_ARRAY_GLOBAL	ARRAY_SIZE(ena_stats_global_strings)
111 #define ENA_STATS_ARRAY_ENI	ARRAY_SIZE(ena_stats_eni_strings)
112 #define ENA_STATS_ARRAY_TX	ARRAY_SIZE(ena_stats_tx_strings)
113 #define ENA_STATS_ARRAY_RX	ARRAY_SIZE(ena_stats_rx_strings)
114 
115 #define QUEUE_OFFLOADS (RTE_ETH_TX_OFFLOAD_TCP_CKSUM |\
116 			RTE_ETH_TX_OFFLOAD_UDP_CKSUM |\
117 			RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |\
118 			RTE_ETH_TX_OFFLOAD_TCP_TSO)
119 #define MBUF_OFFLOADS (RTE_MBUF_F_TX_L4_MASK |\
120 		       RTE_MBUF_F_TX_IP_CKSUM |\
121 		       RTE_MBUF_F_TX_TCP_SEG)
122 
123 /** Vendor ID used by Amazon devices */
124 #define PCI_VENDOR_ID_AMAZON 0x1D0F
125 /** Amazon devices */
126 #define PCI_DEVICE_ID_ENA_VF		0xEC20
127 #define PCI_DEVICE_ID_ENA_VF_RSERV0	0xEC21
128 
129 #define	ENA_TX_OFFLOAD_MASK	(RTE_MBUF_F_TX_L4_MASK |         \
130 	RTE_MBUF_F_TX_IPV6 |            \
131 	RTE_MBUF_F_TX_IPV4 |            \
132 	RTE_MBUF_F_TX_IP_CKSUM |        \
133 	RTE_MBUF_F_TX_TCP_SEG)
134 
135 #define	ENA_TX_OFFLOAD_NOTSUP_MASK	\
136 	(RTE_MBUF_F_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK)
137 
138 /** HW specific offloads capabilities. */
139 /* IPv4 checksum offload. */
140 #define ENA_L3_IPV4_CSUM		0x0001
141 /* TCP/UDP checksum offload for IPv4 packets. */
142 #define ENA_L4_IPV4_CSUM		0x0002
143 /* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */
144 #define ENA_L4_IPV4_CSUM_PARTIAL	0x0004
145 /* TCP/UDP checksum offload for IPv6 packets. */
146 #define ENA_L4_IPV6_CSUM		0x0008
147 /* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */
148 #define ENA_L4_IPV6_CSUM_PARTIAL	0x0010
149 /* TSO support for IPv4 packets. */
150 #define ENA_IPV4_TSO			0x0020
151 
152 /* Device supports setting RSS hash. */
153 #define ENA_RX_RSS_HASH			0x0040
154 
155 static const struct rte_pci_id pci_id_ena_map[] = {
156 	{ RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) },
157 	{ RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) },
158 	{ .device_id = 0 },
159 };
160 
161 static struct ena_aenq_handlers aenq_handlers;
162 
163 static int ena_device_init(struct ena_adapter *adapter,
164 			   struct rte_pci_device *pdev,
165 			   struct ena_com_dev_get_features_ctx *get_feat_ctx);
166 static int ena_dev_configure(struct rte_eth_dev *dev);
167 static void ena_tx_map_mbuf(struct ena_ring *tx_ring,
168 	struct ena_tx_buffer *tx_info,
169 	struct rte_mbuf *mbuf,
170 	void **push_header,
171 	uint16_t *header_len);
172 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf);
173 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt);
174 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
175 				  uint16_t nb_pkts);
176 static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
177 		uint16_t nb_pkts);
178 static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
179 			      uint16_t nb_desc, unsigned int socket_id,
180 			      const struct rte_eth_txconf *tx_conf);
181 static int ena_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
182 			      uint16_t nb_desc, unsigned int socket_id,
183 			      const struct rte_eth_rxconf *rx_conf,
184 			      struct rte_mempool *mp);
185 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len);
186 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring,
187 				    struct ena_com_rx_buf_info *ena_bufs,
188 				    uint32_t descs,
189 				    uint16_t *next_to_clean,
190 				    uint8_t offset);
191 static uint16_t eth_ena_recv_pkts(void *rx_queue,
192 				  struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
193 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq,
194 				  struct rte_mbuf *mbuf, uint16_t id);
195 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count);
196 static void ena_init_rings(struct ena_adapter *adapter,
197 			   bool disable_meta_caching);
198 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
199 static int ena_start(struct rte_eth_dev *dev);
200 static int ena_stop(struct rte_eth_dev *dev);
201 static int ena_close(struct rte_eth_dev *dev);
202 static int ena_dev_reset(struct rte_eth_dev *dev);
203 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
204 static void ena_rx_queue_release_all(struct rte_eth_dev *dev);
205 static void ena_tx_queue_release_all(struct rte_eth_dev *dev);
206 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid);
207 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid);
208 static void ena_rx_queue_release_bufs(struct ena_ring *ring);
209 static void ena_tx_queue_release_bufs(struct ena_ring *ring);
210 static int ena_link_update(struct rte_eth_dev *dev,
211 			   int wait_to_complete);
212 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring);
213 static void ena_queue_stop(struct ena_ring *ring);
214 static void ena_queue_stop_all(struct rte_eth_dev *dev,
215 			      enum ena_ring_type ring_type);
216 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring);
217 static int ena_queue_start_all(struct rte_eth_dev *dev,
218 			       enum ena_ring_type ring_type);
219 static void ena_stats_restart(struct rte_eth_dev *dev);
220 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter);
221 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter);
222 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter);
223 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter);
224 static int ena_infos_get(struct rte_eth_dev *dev,
225 			 struct rte_eth_dev_info *dev_info);
226 static void ena_interrupt_handler_rte(void *cb_arg);
227 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg);
228 static void ena_destroy_device(struct rte_eth_dev *eth_dev);
229 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev);
230 static int ena_xstats_get_names(struct rte_eth_dev *dev,
231 				struct rte_eth_xstat_name *xstats_names,
232 				unsigned int n);
233 static int ena_xstats_get(struct rte_eth_dev *dev,
234 			  struct rte_eth_xstat *stats,
235 			  unsigned int n);
236 static int ena_xstats_get_by_id(struct rte_eth_dev *dev,
237 				const uint64_t *ids,
238 				uint64_t *values,
239 				unsigned int n);
240 static int ena_process_bool_devarg(const char *key,
241 				   const char *value,
242 				   void *opaque);
243 static int ena_parse_devargs(struct ena_adapter *adapter,
244 			     struct rte_devargs *devargs);
245 static int ena_copy_eni_stats(struct ena_adapter *adapter,
246 			      struct ena_stats_eni *stats);
247 static int ena_setup_rx_intr(struct rte_eth_dev *dev);
248 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev,
249 				    uint16_t queue_id);
250 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev,
251 				     uint16_t queue_id);
252 static int ena_configure_aenq(struct ena_adapter *adapter);
253 static int ena_mp_primary_handle(const struct rte_mp_msg *mp_msg,
254 				 const void *peer);
255 
256 static const struct eth_dev_ops ena_dev_ops = {
257 	.dev_configure        = ena_dev_configure,
258 	.dev_infos_get        = ena_infos_get,
259 	.rx_queue_setup       = ena_rx_queue_setup,
260 	.tx_queue_setup       = ena_tx_queue_setup,
261 	.dev_start            = ena_start,
262 	.dev_stop             = ena_stop,
263 	.link_update          = ena_link_update,
264 	.stats_get            = ena_stats_get,
265 	.xstats_get_names     = ena_xstats_get_names,
266 	.xstats_get	      = ena_xstats_get,
267 	.xstats_get_by_id     = ena_xstats_get_by_id,
268 	.mtu_set              = ena_mtu_set,
269 	.rx_queue_release     = ena_rx_queue_release,
270 	.tx_queue_release     = ena_tx_queue_release,
271 	.dev_close            = ena_close,
272 	.dev_reset            = ena_dev_reset,
273 	.reta_update          = ena_rss_reta_update,
274 	.reta_query           = ena_rss_reta_query,
275 	.rx_queue_intr_enable = ena_rx_queue_intr_enable,
276 	.rx_queue_intr_disable = ena_rx_queue_intr_disable,
277 	.rss_hash_update      = ena_rss_hash_update,
278 	.rss_hash_conf_get    = ena_rss_hash_conf_get,
279 	.tx_done_cleanup      = ena_tx_cleanup,
280 };
281 
282 /*********************************************************************
283  *  Multi-Process communication bits
284  *********************************************************************/
285 /* rte_mp IPC message name */
286 #define ENA_MP_NAME	"net_ena_mp"
287 /* Request timeout in seconds */
288 #define ENA_MP_REQ_TMO	5
289 
290 /** Proxy request type */
291 enum ena_mp_req {
292 	ENA_MP_DEV_STATS_GET,
293 	ENA_MP_ENI_STATS_GET,
294 	ENA_MP_MTU_SET,
295 	ENA_MP_IND_TBL_GET,
296 	ENA_MP_IND_TBL_SET
297 };
298 
299 /** Proxy message body. Shared between requests and responses. */
300 struct ena_mp_body {
301 	/* Message type */
302 	enum ena_mp_req type;
303 	int port_id;
304 	/* Processing result. Set in replies. 0 if message succeeded, negative
305 	 * error code otherwise.
306 	 */
307 	int result;
308 	union {
309 		int mtu; /* For ENA_MP_MTU_SET */
310 	} args;
311 };
312 
313 /**
314  * Initialize IPC message.
315  *
316  * @param[out] msg
317  *   Pointer to the message to initialize.
318  * @param[in] type
319  *   Message type.
320  * @param[in] port_id
321  *   Port ID of target device.
322  *
323  */
324 static void
325 mp_msg_init(struct rte_mp_msg *msg, enum ena_mp_req type, int port_id)
326 {
327 	struct ena_mp_body *body = (struct ena_mp_body *)&msg->param;
328 
329 	memset(msg, 0, sizeof(*msg));
330 	strlcpy(msg->name, ENA_MP_NAME, sizeof(msg->name));
331 	msg->len_param = sizeof(*body);
332 	body->type = type;
333 	body->port_id = port_id;
334 }
335 
336 /*********************************************************************
337  *  Multi-Process communication PMD API
338  *********************************************************************/
339 /**
340  * Define proxy request descriptor
341  *
342  * Used to define all structures and functions required for proxying a given
343  * function to the primary process including the code to perform to prepare the
344  * request and process the response.
345  *
346  * @param[in] f
347  *   Name of the function to proxy
348  * @param[in] t
349  *   Message type to use
350  * @param[in] prep
351  *   Body of a function to prepare the request in form of a statement
352  *   expression. It is passed all the original function arguments along with two
353  *   extra ones:
354  *   - struct ena_adapter *adapter - PMD data of the device calling the proxy.
355  *   - struct ena_mp_body *req - body of a request to prepare.
356  * @param[in] proc
357  *   Body of a function to process the response in form of a statement
358  *   expression. It is passed all the original function arguments along with two
359  *   extra ones:
360  *   - struct ena_adapter *adapter - PMD data of the device calling the proxy.
361  *   - struct ena_mp_body *rsp - body of a response to process.
362  * @param ...
363  *   Proxied function's arguments
364  *
365  * @note Inside prep and proc any parameters which aren't used should be marked
366  *       as such (with ENA_TOUCH or __rte_unused).
367  */
368 #define ENA_PROXY_DESC(f, t, prep, proc, ...)			\
369 	static const enum ena_mp_req mp_type_ ## f =  t;	\
370 	static const char *mp_name_ ## f = #t;			\
371 	static void mp_prep_ ## f(struct ena_adapter *adapter,	\
372 				  struct ena_mp_body *req,	\
373 				  __VA_ARGS__)			\
374 	{							\
375 		prep;						\
376 	}							\
377 	static void mp_proc_ ## f(struct ena_adapter *adapter,	\
378 				  struct ena_mp_body *rsp,	\
379 				  __VA_ARGS__)			\
380 	{							\
381 		proc;						\
382 	}
383 
384 /**
385  * Proxy wrapper for calling primary functions in a secondary process.
386  *
387  * Depending on whether called in primary or secondary process, calls the
388  * @p func directly or proxies the call to the primary process via rte_mp IPC.
389  * This macro requires a proxy request descriptor to be defined for @p func
390  * using ENA_PROXY_DESC() macro.
391  *
392  * @param[in/out] a
393  *   Device PMD data. Used for sending the message and sharing message results
394  *   between primary and secondary.
395  * @param[in] f
396  *   Function to proxy.
397  * @param ...
398  *   Arguments of @p func.
399  *
400  * @return
401  *   - 0: Processing succeeded and response handler was called.
402  *   - -EPERM: IPC is unavailable on this platform. This means only primary
403  *             process may call the proxied function.
404  *   - -EIO:   IPC returned error on request send. Inspect rte_errno detailed
405  *             error code.
406  *   - Negative error code from the proxied function.
407  *
408  * @note This mechanism is geared towards control-path tasks. Avoid calling it
409  *       in fast-path unless unbound delays are allowed. This is due to the IPC
410  *       mechanism itself (socket based).
411  * @note Due to IPC parameter size limitations the proxy logic shares call
412  *       results through the struct ena_adapter shared memory. This makes the
413  *       proxy mechanism strictly single-threaded. Therefore be sure to make all
414  *       calls to the same proxied function under the same lock.
415  */
416 #define ENA_PROXY(a, f, ...)						\
417 ({									\
418 	struct ena_adapter *_a = (a);					\
419 	struct timespec ts = { .tv_sec = ENA_MP_REQ_TMO };		\
420 	struct ena_mp_body *req, *rsp;					\
421 	struct rte_mp_reply mp_rep;					\
422 	struct rte_mp_msg mp_req;					\
423 	int ret;							\
424 									\
425 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {		\
426 		ret = f(__VA_ARGS__);					\
427 	} else {							\
428 		/* Prepare and send request */				\
429 		req = (struct ena_mp_body *)&mp_req.param;		\
430 		mp_msg_init(&mp_req, mp_type_ ## f, _a->edev_data->port_id); \
431 		mp_prep_ ## f(_a, req, ## __VA_ARGS__);			\
432 									\
433 		ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);	\
434 		if (likely(!ret)) {					\
435 			RTE_ASSERT(mp_rep.nb_received == 1);		\
436 			rsp = (struct ena_mp_body *)&mp_rep.msgs[0].param; \
437 			ret = rsp->result;				\
438 			if (ret == 0) {					\
439 				mp_proc_##f(_a, rsp, ## __VA_ARGS__);	\
440 			} else {					\
441 				PMD_DRV_LOG(ERR,			\
442 					    "%s returned error: %d\n",	\
443 					    mp_name_ ## f, rsp->result);\
444 			}						\
445 			free(mp_rep.msgs);				\
446 		} else if (rte_errno == ENOTSUP) {			\
447 			PMD_DRV_LOG(ERR,				\
448 				    "No IPC, can't proxy to primary\n");\
449 			ret = -rte_errno;				\
450 		} else {						\
451 			PMD_DRV_LOG(ERR, "Request %s failed: %s\n",	\
452 				    mp_name_ ## f,			\
453 				    rte_strerror(rte_errno));		\
454 			ret = -EIO;					\
455 		}							\
456 	}								\
457 	ret;								\
458 })
459 
460 /*********************************************************************
461  *  Multi-Process communication request descriptors
462  *********************************************************************/
463 
464 ENA_PROXY_DESC(ena_com_get_dev_basic_stats, ENA_MP_DEV_STATS_GET,
465 ({
466 	ENA_TOUCH(adapter);
467 	ENA_TOUCH(req);
468 	ENA_TOUCH(ena_dev);
469 	ENA_TOUCH(stats);
470 }),
471 ({
472 	ENA_TOUCH(rsp);
473 	ENA_TOUCH(ena_dev);
474 	if (stats != &adapter->basic_stats)
475 		rte_memcpy(stats, &adapter->basic_stats, sizeof(*stats));
476 }),
477 	struct ena_com_dev *ena_dev, struct ena_admin_basic_stats *stats);
478 
479 ENA_PROXY_DESC(ena_com_get_eni_stats, ENA_MP_ENI_STATS_GET,
480 ({
481 	ENA_TOUCH(adapter);
482 	ENA_TOUCH(req);
483 	ENA_TOUCH(ena_dev);
484 	ENA_TOUCH(stats);
485 }),
486 ({
487 	ENA_TOUCH(rsp);
488 	ENA_TOUCH(ena_dev);
489 	if (stats != (struct ena_admin_eni_stats *)&adapter->eni_stats)
490 		rte_memcpy(stats, &adapter->eni_stats, sizeof(*stats));
491 }),
492 	struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats);
493 
494 ENA_PROXY_DESC(ena_com_set_dev_mtu, ENA_MP_MTU_SET,
495 ({
496 	ENA_TOUCH(adapter);
497 	ENA_TOUCH(ena_dev);
498 	req->args.mtu = mtu;
499 }),
500 ({
501 	ENA_TOUCH(adapter);
502 	ENA_TOUCH(rsp);
503 	ENA_TOUCH(ena_dev);
504 	ENA_TOUCH(mtu);
505 }),
506 	struct ena_com_dev *ena_dev, int mtu);
507 
508 ENA_PROXY_DESC(ena_com_indirect_table_set, ENA_MP_IND_TBL_SET,
509 ({
510 	ENA_TOUCH(adapter);
511 	ENA_TOUCH(req);
512 	ENA_TOUCH(ena_dev);
513 }),
514 ({
515 	ENA_TOUCH(adapter);
516 	ENA_TOUCH(rsp);
517 	ENA_TOUCH(ena_dev);
518 }),
519 	struct ena_com_dev *ena_dev);
520 
521 ENA_PROXY_DESC(ena_com_indirect_table_get, ENA_MP_IND_TBL_GET,
522 ({
523 	ENA_TOUCH(adapter);
524 	ENA_TOUCH(req);
525 	ENA_TOUCH(ena_dev);
526 	ENA_TOUCH(ind_tbl);
527 }),
528 ({
529 	ENA_TOUCH(rsp);
530 	ENA_TOUCH(ena_dev);
531 	if (ind_tbl != adapter->indirect_table)
532 		rte_memcpy(ind_tbl, adapter->indirect_table,
533 			   sizeof(adapter->indirect_table));
534 }),
535 	struct ena_com_dev *ena_dev, u32 *ind_tbl);
536 
537 static inline void ena_rx_mbuf_prepare(struct ena_ring *rx_ring,
538 				       struct rte_mbuf *mbuf,
539 				       struct ena_com_rx_ctx *ena_rx_ctx,
540 				       bool fill_hash)
541 {
542 	struct ena_stats_rx *rx_stats = &rx_ring->rx_stats;
543 	uint64_t ol_flags = 0;
544 	uint32_t packet_type = 0;
545 
546 	if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP)
547 		packet_type |= RTE_PTYPE_L4_TCP;
548 	else if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)
549 		packet_type |= RTE_PTYPE_L4_UDP;
550 
551 	if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) {
552 		packet_type |= RTE_PTYPE_L3_IPV4;
553 		if (unlikely(ena_rx_ctx->l3_csum_err)) {
554 			++rx_stats->l3_csum_bad;
555 			ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
556 		} else {
557 			ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
558 		}
559 	} else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6) {
560 		packet_type |= RTE_PTYPE_L3_IPV6;
561 	}
562 
563 	if (!ena_rx_ctx->l4_csum_checked || ena_rx_ctx->frag) {
564 		ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN;
565 	} else {
566 		if (unlikely(ena_rx_ctx->l4_csum_err)) {
567 			++rx_stats->l4_csum_bad;
568 			ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
569 		} else {
570 			++rx_stats->l4_csum_good;
571 			ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
572 		}
573 	}
574 
575 	if (fill_hash &&
576 	    likely((packet_type & ENA_PTYPE_HAS_HASH) && !ena_rx_ctx->frag)) {
577 		ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
578 		mbuf->hash.rss = ena_rx_ctx->hash;
579 	}
580 
581 	mbuf->ol_flags = ol_flags;
582 	mbuf->packet_type = packet_type;
583 }
584 
585 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf,
586 				       struct ena_com_tx_ctx *ena_tx_ctx,
587 				       uint64_t queue_offloads,
588 				       bool disable_meta_caching)
589 {
590 	struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
591 
592 	if ((mbuf->ol_flags & MBUF_OFFLOADS) &&
593 	    (queue_offloads & QUEUE_OFFLOADS)) {
594 		/* check if TSO is required */
595 		if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) &&
596 		    (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)) {
597 			ena_tx_ctx->tso_enable = true;
598 
599 			ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf);
600 		}
601 
602 		/* check if L3 checksum is needed */
603 		if ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) &&
604 		    (queue_offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM))
605 			ena_tx_ctx->l3_csum_enable = true;
606 
607 		if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV6) {
608 			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
609 		} else {
610 			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
611 
612 			/* set don't fragment (DF) flag */
613 			if (mbuf->packet_type &
614 				(RTE_PTYPE_L4_NONFRAG
615 				 | RTE_PTYPE_INNER_L4_NONFRAG))
616 				ena_tx_ctx->df = true;
617 		}
618 
619 		/* check if L4 checksum is needed */
620 		if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) &&
621 		    (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) {
622 			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
623 			ena_tx_ctx->l4_csum_enable = true;
624 		} else if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
625 				RTE_MBUF_F_TX_UDP_CKSUM) &&
626 				(queue_offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) {
627 			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
628 			ena_tx_ctx->l4_csum_enable = true;
629 		} else {
630 			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
631 			ena_tx_ctx->l4_csum_enable = false;
632 		}
633 
634 		ena_meta->mss = mbuf->tso_segsz;
635 		ena_meta->l3_hdr_len = mbuf->l3_len;
636 		ena_meta->l3_hdr_offset = mbuf->l2_len;
637 
638 		ena_tx_ctx->meta_valid = true;
639 	} else if (disable_meta_caching) {
640 		memset(ena_meta, 0, sizeof(*ena_meta));
641 		ena_tx_ctx->meta_valid = true;
642 	} else {
643 		ena_tx_ctx->meta_valid = false;
644 	}
645 }
646 
647 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
648 {
649 	struct ena_tx_buffer *tx_info = NULL;
650 
651 	if (likely(req_id < tx_ring->ring_size)) {
652 		tx_info = &tx_ring->tx_buffer_info[req_id];
653 		if (likely(tx_info->mbuf))
654 			return 0;
655 	}
656 
657 	if (tx_info)
658 		PMD_TX_LOG(ERR, "tx_info doesn't have valid mbuf\n");
659 	else
660 		PMD_TX_LOG(ERR, "Invalid req_id: %hu\n", req_id);
661 
662 	/* Trigger device reset */
663 	++tx_ring->tx_stats.bad_req_id;
664 	tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
665 	tx_ring->adapter->trigger_reset	= true;
666 	return -EFAULT;
667 }
668 
669 static void ena_config_host_info(struct ena_com_dev *ena_dev)
670 {
671 	struct ena_admin_host_info *host_info;
672 	int rc;
673 
674 	/* Allocate only the host info */
675 	rc = ena_com_allocate_host_info(ena_dev);
676 	if (rc) {
677 		PMD_DRV_LOG(ERR, "Cannot allocate host info\n");
678 		return;
679 	}
680 
681 	host_info = ena_dev->host_attr.host_info;
682 
683 	host_info->os_type = ENA_ADMIN_OS_DPDK;
684 	host_info->kernel_ver = RTE_VERSION;
685 	strlcpy((char *)host_info->kernel_ver_str, rte_version(),
686 		sizeof(host_info->kernel_ver_str));
687 	host_info->os_dist = RTE_VERSION;
688 	strlcpy((char *)host_info->os_dist_str, rte_version(),
689 		sizeof(host_info->os_dist_str));
690 	host_info->driver_version =
691 		(DRV_MODULE_VER_MAJOR) |
692 		(DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
693 		(DRV_MODULE_VER_SUBMINOR <<
694 			ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
695 	host_info->num_cpus = rte_lcore_count();
696 
697 	host_info->driver_supported_features =
698 		ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
699 		ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK;
700 
701 	rc = ena_com_set_host_attributes(ena_dev);
702 	if (rc) {
703 		if (rc == -ENA_COM_UNSUPPORTED)
704 			PMD_DRV_LOG(WARNING, "Cannot set host attributes\n");
705 		else
706 			PMD_DRV_LOG(ERR, "Cannot set host attributes\n");
707 
708 		goto err;
709 	}
710 
711 	return;
712 
713 err:
714 	ena_com_delete_host_info(ena_dev);
715 }
716 
717 /* This function calculates the number of xstats based on the current config */
718 static unsigned int ena_xstats_calc_num(struct rte_eth_dev_data *data)
719 {
720 	return ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENI +
721 		(data->nb_tx_queues * ENA_STATS_ARRAY_TX) +
722 		(data->nb_rx_queues * ENA_STATS_ARRAY_RX);
723 }
724 
725 static void ena_config_debug_area(struct ena_adapter *adapter)
726 {
727 	u32 debug_area_size;
728 	int rc, ss_count;
729 
730 	ss_count = ena_xstats_calc_num(adapter->edev_data);
731 
732 	/* allocate 32 bytes for each string and 64bit for the value */
733 	debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
734 
735 	rc = ena_com_allocate_debug_area(&adapter->ena_dev, debug_area_size);
736 	if (rc) {
737 		PMD_DRV_LOG(ERR, "Cannot allocate debug area\n");
738 		return;
739 	}
740 
741 	rc = ena_com_set_host_attributes(&adapter->ena_dev);
742 	if (rc) {
743 		if (rc == -ENA_COM_UNSUPPORTED)
744 			PMD_DRV_LOG(WARNING, "Cannot set host attributes\n");
745 		else
746 			PMD_DRV_LOG(ERR, "Cannot set host attributes\n");
747 
748 		goto err;
749 	}
750 
751 	return;
752 err:
753 	ena_com_delete_debug_area(&adapter->ena_dev);
754 }
755 
756 static int ena_close(struct rte_eth_dev *dev)
757 {
758 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
759 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
760 	struct ena_adapter *adapter = dev->data->dev_private;
761 	int ret = 0;
762 
763 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
764 		return 0;
765 
766 	if (adapter->state == ENA_ADAPTER_STATE_RUNNING)
767 		ret = ena_stop(dev);
768 	adapter->state = ENA_ADAPTER_STATE_CLOSED;
769 
770 	ena_rx_queue_release_all(dev);
771 	ena_tx_queue_release_all(dev);
772 
773 	rte_free(adapter->drv_stats);
774 	adapter->drv_stats = NULL;
775 
776 	rte_intr_disable(intr_handle);
777 	rte_intr_callback_unregister(intr_handle,
778 				     ena_interrupt_handler_rte,
779 				     dev);
780 
781 	/*
782 	 * MAC is not allocated dynamically. Setting NULL should prevent from
783 	 * release of the resource in the rte_eth_dev_release_port().
784 	 */
785 	dev->data->mac_addrs = NULL;
786 
787 	return ret;
788 }
789 
790 static int
791 ena_dev_reset(struct rte_eth_dev *dev)
792 {
793 	int rc = 0;
794 
795 	/* Cannot release memory in secondary process */
796 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
797 		PMD_DRV_LOG(WARNING, "dev_reset not supported in secondary.\n");
798 		return -EPERM;
799 	}
800 
801 	ena_destroy_device(dev);
802 	rc = eth_ena_dev_init(dev);
803 	if (rc)
804 		PMD_INIT_LOG(CRIT, "Cannot initialize device\n");
805 
806 	return rc;
807 }
808 
809 static void ena_rx_queue_release_all(struct rte_eth_dev *dev)
810 {
811 	int nb_queues = dev->data->nb_rx_queues;
812 	int i;
813 
814 	for (i = 0; i < nb_queues; i++)
815 		ena_rx_queue_release(dev, i);
816 }
817 
818 static void ena_tx_queue_release_all(struct rte_eth_dev *dev)
819 {
820 	int nb_queues = dev->data->nb_tx_queues;
821 	int i;
822 
823 	for (i = 0; i < nb_queues; i++)
824 		ena_tx_queue_release(dev, i);
825 }
826 
827 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
828 {
829 	struct ena_ring *ring = dev->data->rx_queues[qid];
830 
831 	/* Free ring resources */
832 	rte_free(ring->rx_buffer_info);
833 	ring->rx_buffer_info = NULL;
834 
835 	rte_free(ring->rx_refill_buffer);
836 	ring->rx_refill_buffer = NULL;
837 
838 	rte_free(ring->empty_rx_reqs);
839 	ring->empty_rx_reqs = NULL;
840 
841 	ring->configured = 0;
842 
843 	PMD_DRV_LOG(NOTICE, "Rx queue %d:%d released\n",
844 		ring->port_id, ring->id);
845 }
846 
847 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
848 {
849 	struct ena_ring *ring = dev->data->tx_queues[qid];
850 
851 	/* Free ring resources */
852 	rte_free(ring->push_buf_intermediate_buf);
853 
854 	rte_free(ring->tx_buffer_info);
855 
856 	rte_free(ring->empty_tx_reqs);
857 
858 	ring->empty_tx_reqs = NULL;
859 	ring->tx_buffer_info = NULL;
860 	ring->push_buf_intermediate_buf = NULL;
861 
862 	ring->configured = 0;
863 
864 	PMD_DRV_LOG(NOTICE, "Tx queue %d:%d released\n",
865 		ring->port_id, ring->id);
866 }
867 
868 static void ena_rx_queue_release_bufs(struct ena_ring *ring)
869 {
870 	unsigned int i;
871 
872 	for (i = 0; i < ring->ring_size; ++i) {
873 		struct ena_rx_buffer *rx_info = &ring->rx_buffer_info[i];
874 		if (rx_info->mbuf) {
875 			rte_mbuf_raw_free(rx_info->mbuf);
876 			rx_info->mbuf = NULL;
877 		}
878 	}
879 }
880 
881 static void ena_tx_queue_release_bufs(struct ena_ring *ring)
882 {
883 	unsigned int i;
884 
885 	for (i = 0; i < ring->ring_size; ++i) {
886 		struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i];
887 
888 		if (tx_buf->mbuf) {
889 			rte_pktmbuf_free(tx_buf->mbuf);
890 			tx_buf->mbuf = NULL;
891 		}
892 	}
893 }
894 
895 static int ena_link_update(struct rte_eth_dev *dev,
896 			   __rte_unused int wait_to_complete)
897 {
898 	struct rte_eth_link *link = &dev->data->dev_link;
899 	struct ena_adapter *adapter = dev->data->dev_private;
900 
901 	link->link_status = adapter->link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN;
902 	link->link_speed = RTE_ETH_SPEED_NUM_NONE;
903 	link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
904 
905 	return 0;
906 }
907 
908 static int ena_queue_start_all(struct rte_eth_dev *dev,
909 			       enum ena_ring_type ring_type)
910 {
911 	struct ena_adapter *adapter = dev->data->dev_private;
912 	struct ena_ring *queues = NULL;
913 	int nb_queues;
914 	int i = 0;
915 	int rc = 0;
916 
917 	if (ring_type == ENA_RING_TYPE_RX) {
918 		queues = adapter->rx_ring;
919 		nb_queues = dev->data->nb_rx_queues;
920 	} else {
921 		queues = adapter->tx_ring;
922 		nb_queues = dev->data->nb_tx_queues;
923 	}
924 	for (i = 0; i < nb_queues; i++) {
925 		if (queues[i].configured) {
926 			if (ring_type == ENA_RING_TYPE_RX) {
927 				ena_assert_msg(
928 					dev->data->rx_queues[i] == &queues[i],
929 					"Inconsistent state of Rx queues\n");
930 			} else {
931 				ena_assert_msg(
932 					dev->data->tx_queues[i] == &queues[i],
933 					"Inconsistent state of Tx queues\n");
934 			}
935 
936 			rc = ena_queue_start(dev, &queues[i]);
937 
938 			if (rc) {
939 				PMD_INIT_LOG(ERR,
940 					"Failed to start queue[%d] of type(%d)\n",
941 					i, ring_type);
942 				goto err;
943 			}
944 		}
945 	}
946 
947 	return 0;
948 
949 err:
950 	while (i--)
951 		if (queues[i].configured)
952 			ena_queue_stop(&queues[i]);
953 
954 	return rc;
955 }
956 
957 static int ena_check_valid_conf(struct ena_adapter *adapter)
958 {
959 	uint32_t mtu = adapter->edev_data->mtu;
960 
961 	if (mtu > adapter->max_mtu || mtu < ENA_MIN_MTU) {
962 		PMD_INIT_LOG(ERR,
963 			"Unsupported MTU of %d. Max MTU: %d, min MTU: %d\n",
964 			mtu, adapter->max_mtu, ENA_MIN_MTU);
965 		return ENA_COM_UNSUPPORTED;
966 	}
967 
968 	return 0;
969 }
970 
971 static int
972 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx,
973 		       bool use_large_llq_hdr)
974 {
975 	struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
976 	struct ena_com_dev *ena_dev = ctx->ena_dev;
977 	uint32_t max_tx_queue_size;
978 	uint32_t max_rx_queue_size;
979 
980 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
981 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
982 			&ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
983 		max_rx_queue_size = RTE_MIN(max_queue_ext->max_rx_cq_depth,
984 			max_queue_ext->max_rx_sq_depth);
985 		max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
986 
987 		if (ena_dev->tx_mem_queue_type ==
988 		    ENA_ADMIN_PLACEMENT_POLICY_DEV) {
989 			max_tx_queue_size = RTE_MIN(max_tx_queue_size,
990 				llq->max_llq_depth);
991 		} else {
992 			max_tx_queue_size = RTE_MIN(max_tx_queue_size,
993 				max_queue_ext->max_tx_sq_depth);
994 		}
995 
996 		ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS,
997 			max_queue_ext->max_per_packet_rx_descs);
998 		ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS,
999 			max_queue_ext->max_per_packet_tx_descs);
1000 	} else {
1001 		struct ena_admin_queue_feature_desc *max_queues =
1002 			&ctx->get_feat_ctx->max_queues;
1003 		max_rx_queue_size = RTE_MIN(max_queues->max_cq_depth,
1004 			max_queues->max_sq_depth);
1005 		max_tx_queue_size = max_queues->max_cq_depth;
1006 
1007 		if (ena_dev->tx_mem_queue_type ==
1008 		    ENA_ADMIN_PLACEMENT_POLICY_DEV) {
1009 			max_tx_queue_size = RTE_MIN(max_tx_queue_size,
1010 				llq->max_llq_depth);
1011 		} else {
1012 			max_tx_queue_size = RTE_MIN(max_tx_queue_size,
1013 				max_queues->max_sq_depth);
1014 		}
1015 
1016 		ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS,
1017 			max_queues->max_packet_rx_descs);
1018 		ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS,
1019 			max_queues->max_packet_tx_descs);
1020 	}
1021 
1022 	/* Round down to the nearest power of 2 */
1023 	max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size);
1024 	max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size);
1025 
1026 	if (use_large_llq_hdr) {
1027 		if ((llq->entry_size_ctrl_supported &
1028 		     ENA_ADMIN_LIST_ENTRY_SIZE_256B) &&
1029 		    (ena_dev->tx_mem_queue_type ==
1030 		     ENA_ADMIN_PLACEMENT_POLICY_DEV)) {
1031 			max_tx_queue_size /= 2;
1032 			PMD_INIT_LOG(INFO,
1033 				"Forcing large headers and decreasing maximum Tx queue size to %d\n",
1034 				max_tx_queue_size);
1035 		} else {
1036 			PMD_INIT_LOG(ERR,
1037 				"Forcing large headers failed: LLQ is disabled or device does not support large headers\n");
1038 		}
1039 	}
1040 
1041 	if (unlikely(max_rx_queue_size == 0 || max_tx_queue_size == 0)) {
1042 		PMD_INIT_LOG(ERR, "Invalid queue size\n");
1043 		return -EFAULT;
1044 	}
1045 
1046 	ctx->max_tx_queue_size = max_tx_queue_size;
1047 	ctx->max_rx_queue_size = max_rx_queue_size;
1048 
1049 	return 0;
1050 }
1051 
1052 static void ena_stats_restart(struct rte_eth_dev *dev)
1053 {
1054 	struct ena_adapter *adapter = dev->data->dev_private;
1055 
1056 	rte_atomic64_init(&adapter->drv_stats->ierrors);
1057 	rte_atomic64_init(&adapter->drv_stats->oerrors);
1058 	rte_atomic64_init(&adapter->drv_stats->rx_nombuf);
1059 	adapter->drv_stats->rx_drops = 0;
1060 }
1061 
1062 static int ena_stats_get(struct rte_eth_dev *dev,
1063 			  struct rte_eth_stats *stats)
1064 {
1065 	struct ena_admin_basic_stats ena_stats;
1066 	struct ena_adapter *adapter = dev->data->dev_private;
1067 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
1068 	int rc;
1069 	int i;
1070 	int max_rings_stats;
1071 
1072 	memset(&ena_stats, 0, sizeof(ena_stats));
1073 
1074 	rte_spinlock_lock(&adapter->admin_lock);
1075 	rc = ENA_PROXY(adapter, ena_com_get_dev_basic_stats, ena_dev,
1076 		       &ena_stats);
1077 	rte_spinlock_unlock(&adapter->admin_lock);
1078 	if (unlikely(rc)) {
1079 		PMD_DRV_LOG(ERR, "Could not retrieve statistics from ENA\n");
1080 		return rc;
1081 	}
1082 
1083 	/* Set of basic statistics from ENA */
1084 	stats->ipackets = __MERGE_64B_H_L(ena_stats.rx_pkts_high,
1085 					  ena_stats.rx_pkts_low);
1086 	stats->opackets = __MERGE_64B_H_L(ena_stats.tx_pkts_high,
1087 					  ena_stats.tx_pkts_low);
1088 	stats->ibytes = __MERGE_64B_H_L(ena_stats.rx_bytes_high,
1089 					ena_stats.rx_bytes_low);
1090 	stats->obytes = __MERGE_64B_H_L(ena_stats.tx_bytes_high,
1091 					ena_stats.tx_bytes_low);
1092 
1093 	/* Driver related stats */
1094 	stats->imissed = adapter->drv_stats->rx_drops;
1095 	stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors);
1096 	stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors);
1097 	stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf);
1098 
1099 	max_rings_stats = RTE_MIN(dev->data->nb_rx_queues,
1100 		RTE_ETHDEV_QUEUE_STAT_CNTRS);
1101 	for (i = 0; i < max_rings_stats; ++i) {
1102 		struct ena_stats_rx *rx_stats = &adapter->rx_ring[i].rx_stats;
1103 
1104 		stats->q_ibytes[i] = rx_stats->bytes;
1105 		stats->q_ipackets[i] = rx_stats->cnt;
1106 		stats->q_errors[i] = rx_stats->bad_desc_num +
1107 			rx_stats->bad_req_id;
1108 	}
1109 
1110 	max_rings_stats = RTE_MIN(dev->data->nb_tx_queues,
1111 		RTE_ETHDEV_QUEUE_STAT_CNTRS);
1112 	for (i = 0; i < max_rings_stats; ++i) {
1113 		struct ena_stats_tx *tx_stats = &adapter->tx_ring[i].tx_stats;
1114 
1115 		stats->q_obytes[i] = tx_stats->bytes;
1116 		stats->q_opackets[i] = tx_stats->cnt;
1117 	}
1118 
1119 	return 0;
1120 }
1121 
1122 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
1123 {
1124 	struct ena_adapter *adapter;
1125 	struct ena_com_dev *ena_dev;
1126 	int rc = 0;
1127 
1128 	ena_assert_msg(dev->data != NULL, "Uninitialized device\n");
1129 	ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n");
1130 	adapter = dev->data->dev_private;
1131 
1132 	ena_dev = &adapter->ena_dev;
1133 	ena_assert_msg(ena_dev != NULL, "Uninitialized device\n");
1134 
1135 	if (mtu > adapter->max_mtu || mtu < ENA_MIN_MTU) {
1136 		PMD_DRV_LOG(ERR,
1137 			"Invalid MTU setting. New MTU: %d, max MTU: %d, min MTU: %d\n",
1138 			mtu, adapter->max_mtu, ENA_MIN_MTU);
1139 		return -EINVAL;
1140 	}
1141 
1142 	rc = ENA_PROXY(adapter, ena_com_set_dev_mtu, ena_dev, mtu);
1143 	if (rc)
1144 		PMD_DRV_LOG(ERR, "Could not set MTU: %d\n", mtu);
1145 	else
1146 		PMD_DRV_LOG(NOTICE, "MTU set to: %d\n", mtu);
1147 
1148 	return rc;
1149 }
1150 
1151 static int ena_start(struct rte_eth_dev *dev)
1152 {
1153 	struct ena_adapter *adapter = dev->data->dev_private;
1154 	uint64_t ticks;
1155 	int rc = 0;
1156 
1157 	/* Cannot allocate memory in secondary process */
1158 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
1159 		PMD_DRV_LOG(WARNING, "dev_start not supported in secondary.\n");
1160 		return -EPERM;
1161 	}
1162 
1163 	rc = ena_check_valid_conf(adapter);
1164 	if (rc)
1165 		return rc;
1166 
1167 	rc = ena_setup_rx_intr(dev);
1168 	if (rc)
1169 		return rc;
1170 
1171 	rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX);
1172 	if (rc)
1173 		return rc;
1174 
1175 	rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX);
1176 	if (rc)
1177 		goto err_start_tx;
1178 
1179 	if (adapter->edev_data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) {
1180 		rc = ena_rss_configure(adapter);
1181 		if (rc)
1182 			goto err_rss_init;
1183 	}
1184 
1185 	ena_stats_restart(dev);
1186 
1187 	adapter->timestamp_wd = rte_get_timer_cycles();
1188 	adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
1189 
1190 	ticks = rte_get_timer_hz();
1191 	rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(),
1192 			ena_timer_wd_callback, dev);
1193 
1194 	++adapter->dev_stats.dev_start;
1195 	adapter->state = ENA_ADAPTER_STATE_RUNNING;
1196 
1197 	return 0;
1198 
1199 err_rss_init:
1200 	ena_queue_stop_all(dev, ENA_RING_TYPE_TX);
1201 err_start_tx:
1202 	ena_queue_stop_all(dev, ENA_RING_TYPE_RX);
1203 	return rc;
1204 }
1205 
1206 static int ena_stop(struct rte_eth_dev *dev)
1207 {
1208 	struct ena_adapter *adapter = dev->data->dev_private;
1209 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
1210 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1211 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
1212 	int rc;
1213 
1214 	/* Cannot free memory in secondary process */
1215 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
1216 		PMD_DRV_LOG(WARNING, "dev_stop not supported in secondary.\n");
1217 		return -EPERM;
1218 	}
1219 
1220 	rte_timer_stop_sync(&adapter->timer_wd);
1221 	ena_queue_stop_all(dev, ENA_RING_TYPE_TX);
1222 	ena_queue_stop_all(dev, ENA_RING_TYPE_RX);
1223 
1224 	if (adapter->trigger_reset) {
1225 		rc = ena_com_dev_reset(ena_dev, adapter->reset_reason);
1226 		if (rc)
1227 			PMD_DRV_LOG(ERR, "Device reset failed, rc: %d\n", rc);
1228 	}
1229 
1230 	rte_intr_disable(intr_handle);
1231 
1232 	rte_intr_efd_disable(intr_handle);
1233 
1234 	/* Cleanup vector list */
1235 	rte_intr_vec_list_free(intr_handle);
1236 
1237 	rte_intr_enable(intr_handle);
1238 
1239 	++adapter->dev_stats.dev_stop;
1240 	adapter->state = ENA_ADAPTER_STATE_STOPPED;
1241 	dev->data->dev_started = 0;
1242 
1243 	return 0;
1244 }
1245 
1246 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring)
1247 {
1248 	struct ena_adapter *adapter = ring->adapter;
1249 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
1250 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1251 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
1252 	struct ena_com_create_io_ctx ctx =
1253 		/* policy set to _HOST just to satisfy icc compiler */
1254 		{ ENA_ADMIN_PLACEMENT_POLICY_HOST,
1255 		  0, 0, 0, 0, 0 };
1256 	uint16_t ena_qid;
1257 	unsigned int i;
1258 	int rc;
1259 
1260 	ctx.msix_vector = -1;
1261 	if (ring->type == ENA_RING_TYPE_TX) {
1262 		ena_qid = ENA_IO_TXQ_IDX(ring->id);
1263 		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1264 		ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1265 		for (i = 0; i < ring->ring_size; i++)
1266 			ring->empty_tx_reqs[i] = i;
1267 	} else {
1268 		ena_qid = ENA_IO_RXQ_IDX(ring->id);
1269 		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1270 		if (rte_intr_dp_is_en(intr_handle))
1271 			ctx.msix_vector =
1272 				rte_intr_vec_list_index_get(intr_handle,
1273 								   ring->id);
1274 
1275 		for (i = 0; i < ring->ring_size; i++)
1276 			ring->empty_rx_reqs[i] = i;
1277 	}
1278 	ctx.queue_size = ring->ring_size;
1279 	ctx.qid = ena_qid;
1280 	ctx.numa_node = ring->numa_socket_id;
1281 
1282 	rc = ena_com_create_io_queue(ena_dev, &ctx);
1283 	if (rc) {
1284 		PMD_DRV_LOG(ERR,
1285 			"Failed to create IO queue[%d] (qid:%d), rc: %d\n",
1286 			ring->id, ena_qid, rc);
1287 		return rc;
1288 	}
1289 
1290 	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1291 				     &ring->ena_com_io_sq,
1292 				     &ring->ena_com_io_cq);
1293 	if (rc) {
1294 		PMD_DRV_LOG(ERR,
1295 			"Failed to get IO queue[%d] handlers, rc: %d\n",
1296 			ring->id, rc);
1297 		ena_com_destroy_io_queue(ena_dev, ena_qid);
1298 		return rc;
1299 	}
1300 
1301 	if (ring->type == ENA_RING_TYPE_TX)
1302 		ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node);
1303 
1304 	/* Start with Rx interrupts being masked. */
1305 	if (ring->type == ENA_RING_TYPE_RX && rte_intr_dp_is_en(intr_handle))
1306 		ena_rx_queue_intr_disable(dev, ring->id);
1307 
1308 	return 0;
1309 }
1310 
1311 static void ena_queue_stop(struct ena_ring *ring)
1312 {
1313 	struct ena_com_dev *ena_dev = &ring->adapter->ena_dev;
1314 
1315 	if (ring->type == ENA_RING_TYPE_RX) {
1316 		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id));
1317 		ena_rx_queue_release_bufs(ring);
1318 	} else {
1319 		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id));
1320 		ena_tx_queue_release_bufs(ring);
1321 	}
1322 }
1323 
1324 static void ena_queue_stop_all(struct rte_eth_dev *dev,
1325 			      enum ena_ring_type ring_type)
1326 {
1327 	struct ena_adapter *adapter = dev->data->dev_private;
1328 	struct ena_ring *queues = NULL;
1329 	uint16_t nb_queues, i;
1330 
1331 	if (ring_type == ENA_RING_TYPE_RX) {
1332 		queues = adapter->rx_ring;
1333 		nb_queues = dev->data->nb_rx_queues;
1334 	} else {
1335 		queues = adapter->tx_ring;
1336 		nb_queues = dev->data->nb_tx_queues;
1337 	}
1338 
1339 	for (i = 0; i < nb_queues; ++i)
1340 		if (queues[i].configured)
1341 			ena_queue_stop(&queues[i]);
1342 }
1343 
1344 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring)
1345 {
1346 	int rc, bufs_num;
1347 
1348 	ena_assert_msg(ring->configured == 1,
1349 		       "Trying to start unconfigured queue\n");
1350 
1351 	rc = ena_create_io_queue(dev, ring);
1352 	if (rc) {
1353 		PMD_INIT_LOG(ERR, "Failed to create IO queue\n");
1354 		return rc;
1355 	}
1356 
1357 	ring->next_to_clean = 0;
1358 	ring->next_to_use = 0;
1359 
1360 	if (ring->type == ENA_RING_TYPE_TX) {
1361 		ring->tx_stats.available_desc =
1362 			ena_com_free_q_entries(ring->ena_com_io_sq);
1363 		return 0;
1364 	}
1365 
1366 	bufs_num = ring->ring_size - 1;
1367 	rc = ena_populate_rx_queue(ring, bufs_num);
1368 	if (rc != bufs_num) {
1369 		ena_com_destroy_io_queue(&ring->adapter->ena_dev,
1370 					 ENA_IO_RXQ_IDX(ring->id));
1371 		PMD_INIT_LOG(ERR, "Failed to populate Rx ring\n");
1372 		return ENA_COM_FAULT;
1373 	}
1374 	/* Flush per-core RX buffers pools cache as they can be used on other
1375 	 * cores as well.
1376 	 */
1377 	rte_mempool_cache_flush(NULL, ring->mb_pool);
1378 
1379 	return 0;
1380 }
1381 
1382 static int ena_tx_queue_setup(struct rte_eth_dev *dev,
1383 			      uint16_t queue_idx,
1384 			      uint16_t nb_desc,
1385 			      unsigned int socket_id,
1386 			      const struct rte_eth_txconf *tx_conf)
1387 {
1388 	struct ena_ring *txq = NULL;
1389 	struct ena_adapter *adapter = dev->data->dev_private;
1390 	unsigned int i;
1391 	uint16_t dyn_thresh;
1392 
1393 	txq = &adapter->tx_ring[queue_idx];
1394 
1395 	if (txq->configured) {
1396 		PMD_DRV_LOG(CRIT,
1397 			"API violation. Queue[%d] is already configured\n",
1398 			queue_idx);
1399 		return ENA_COM_FAULT;
1400 	}
1401 
1402 	if (!rte_is_power_of_2(nb_desc)) {
1403 		PMD_DRV_LOG(ERR,
1404 			"Unsupported size of Tx queue: %d is not a power of 2.\n",
1405 			nb_desc);
1406 		return -EINVAL;
1407 	}
1408 
1409 	if (nb_desc > adapter->max_tx_ring_size) {
1410 		PMD_DRV_LOG(ERR,
1411 			"Unsupported size of Tx queue (max size: %d)\n",
1412 			adapter->max_tx_ring_size);
1413 		return -EINVAL;
1414 	}
1415 
1416 	txq->port_id = dev->data->port_id;
1417 	txq->next_to_clean = 0;
1418 	txq->next_to_use = 0;
1419 	txq->ring_size = nb_desc;
1420 	txq->size_mask = nb_desc - 1;
1421 	txq->numa_socket_id = socket_id;
1422 	txq->pkts_without_db = false;
1423 	txq->last_cleanup_ticks = 0;
1424 
1425 	txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info",
1426 		sizeof(struct ena_tx_buffer) * txq->ring_size,
1427 		RTE_CACHE_LINE_SIZE,
1428 		socket_id);
1429 	if (!txq->tx_buffer_info) {
1430 		PMD_DRV_LOG(ERR,
1431 			"Failed to allocate memory for Tx buffer info\n");
1432 		return -ENOMEM;
1433 	}
1434 
1435 	txq->empty_tx_reqs = rte_zmalloc_socket("txq->empty_tx_reqs",
1436 		sizeof(uint16_t) * txq->ring_size,
1437 		RTE_CACHE_LINE_SIZE,
1438 		socket_id);
1439 	if (!txq->empty_tx_reqs) {
1440 		PMD_DRV_LOG(ERR,
1441 			"Failed to allocate memory for empty Tx requests\n");
1442 		rte_free(txq->tx_buffer_info);
1443 		return -ENOMEM;
1444 	}
1445 
1446 	txq->push_buf_intermediate_buf =
1447 		rte_zmalloc_socket("txq->push_buf_intermediate_buf",
1448 			txq->tx_max_header_size,
1449 			RTE_CACHE_LINE_SIZE,
1450 			socket_id);
1451 	if (!txq->push_buf_intermediate_buf) {
1452 		PMD_DRV_LOG(ERR, "Failed to alloc push buffer for LLQ\n");
1453 		rte_free(txq->tx_buffer_info);
1454 		rte_free(txq->empty_tx_reqs);
1455 		return -ENOMEM;
1456 	}
1457 
1458 	for (i = 0; i < txq->ring_size; i++)
1459 		txq->empty_tx_reqs[i] = i;
1460 
1461 	txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
1462 
1463 	/* Check if caller provided the Tx cleanup threshold value. */
1464 	if (tx_conf->tx_free_thresh != 0) {
1465 		txq->tx_free_thresh = tx_conf->tx_free_thresh;
1466 	} else {
1467 		dyn_thresh = txq->ring_size -
1468 			txq->ring_size / ENA_REFILL_THRESH_DIVIDER;
1469 		txq->tx_free_thresh = RTE_MAX(dyn_thresh,
1470 			txq->ring_size - ENA_REFILL_THRESH_PACKET);
1471 	}
1472 
1473 	txq->missing_tx_completion_threshold =
1474 		RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP);
1475 
1476 	/* Store pointer to this queue in upper layer */
1477 	txq->configured = 1;
1478 	dev->data->tx_queues[queue_idx] = txq;
1479 
1480 	return 0;
1481 }
1482 
1483 static int ena_rx_queue_setup(struct rte_eth_dev *dev,
1484 			      uint16_t queue_idx,
1485 			      uint16_t nb_desc,
1486 			      unsigned int socket_id,
1487 			      const struct rte_eth_rxconf *rx_conf,
1488 			      struct rte_mempool *mp)
1489 {
1490 	struct ena_adapter *adapter = dev->data->dev_private;
1491 	struct ena_ring *rxq = NULL;
1492 	size_t buffer_size;
1493 	int i;
1494 	uint16_t dyn_thresh;
1495 
1496 	rxq = &adapter->rx_ring[queue_idx];
1497 	if (rxq->configured) {
1498 		PMD_DRV_LOG(CRIT,
1499 			"API violation. Queue[%d] is already configured\n",
1500 			queue_idx);
1501 		return ENA_COM_FAULT;
1502 	}
1503 
1504 	if (!rte_is_power_of_2(nb_desc)) {
1505 		PMD_DRV_LOG(ERR,
1506 			"Unsupported size of Rx queue: %d is not a power of 2.\n",
1507 			nb_desc);
1508 		return -EINVAL;
1509 	}
1510 
1511 	if (nb_desc > adapter->max_rx_ring_size) {
1512 		PMD_DRV_LOG(ERR,
1513 			"Unsupported size of Rx queue (max size: %d)\n",
1514 			adapter->max_rx_ring_size);
1515 		return -EINVAL;
1516 	}
1517 
1518 	/* ENA isn't supporting buffers smaller than 1400 bytes */
1519 	buffer_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
1520 	if (buffer_size < ENA_RX_BUF_MIN_SIZE) {
1521 		PMD_DRV_LOG(ERR,
1522 			"Unsupported size of Rx buffer: %zu (min size: %d)\n",
1523 			buffer_size, ENA_RX_BUF_MIN_SIZE);
1524 		return -EINVAL;
1525 	}
1526 
1527 	rxq->port_id = dev->data->port_id;
1528 	rxq->next_to_clean = 0;
1529 	rxq->next_to_use = 0;
1530 	rxq->ring_size = nb_desc;
1531 	rxq->size_mask = nb_desc - 1;
1532 	rxq->numa_socket_id = socket_id;
1533 	rxq->mb_pool = mp;
1534 
1535 	rxq->rx_buffer_info = rte_zmalloc_socket("rxq->buffer_info",
1536 		sizeof(struct ena_rx_buffer) * nb_desc,
1537 		RTE_CACHE_LINE_SIZE,
1538 		socket_id);
1539 	if (!rxq->rx_buffer_info) {
1540 		PMD_DRV_LOG(ERR,
1541 			"Failed to allocate memory for Rx buffer info\n");
1542 		return -ENOMEM;
1543 	}
1544 
1545 	rxq->rx_refill_buffer = rte_zmalloc_socket("rxq->rx_refill_buffer",
1546 		sizeof(struct rte_mbuf *) * nb_desc,
1547 		RTE_CACHE_LINE_SIZE,
1548 		socket_id);
1549 	if (!rxq->rx_refill_buffer) {
1550 		PMD_DRV_LOG(ERR,
1551 			"Failed to allocate memory for Rx refill buffer\n");
1552 		rte_free(rxq->rx_buffer_info);
1553 		rxq->rx_buffer_info = NULL;
1554 		return -ENOMEM;
1555 	}
1556 
1557 	rxq->empty_rx_reqs = rte_zmalloc_socket("rxq->empty_rx_reqs",
1558 		sizeof(uint16_t) * nb_desc,
1559 		RTE_CACHE_LINE_SIZE,
1560 		socket_id);
1561 	if (!rxq->empty_rx_reqs) {
1562 		PMD_DRV_LOG(ERR,
1563 			"Failed to allocate memory for empty Rx requests\n");
1564 		rte_free(rxq->rx_buffer_info);
1565 		rxq->rx_buffer_info = NULL;
1566 		rte_free(rxq->rx_refill_buffer);
1567 		rxq->rx_refill_buffer = NULL;
1568 		return -ENOMEM;
1569 	}
1570 
1571 	for (i = 0; i < nb_desc; i++)
1572 		rxq->empty_rx_reqs[i] = i;
1573 
1574 	rxq->offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
1575 
1576 	if (rx_conf->rx_free_thresh != 0) {
1577 		rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1578 	} else {
1579 		dyn_thresh = rxq->ring_size / ENA_REFILL_THRESH_DIVIDER;
1580 		rxq->rx_free_thresh = RTE_MIN(dyn_thresh,
1581 			(uint16_t)(ENA_REFILL_THRESH_PACKET));
1582 	}
1583 
1584 	/* Store pointer to this queue in upper layer */
1585 	rxq->configured = 1;
1586 	dev->data->rx_queues[queue_idx] = rxq;
1587 
1588 	return 0;
1589 }
1590 
1591 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq,
1592 				  struct rte_mbuf *mbuf, uint16_t id)
1593 {
1594 	struct ena_com_buf ebuf;
1595 	int rc;
1596 
1597 	/* prepare physical address for DMA transaction */
1598 	ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM;
1599 	ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM;
1600 
1601 	/* pass resource to device */
1602 	rc = ena_com_add_single_rx_desc(io_sq, &ebuf, id);
1603 	if (unlikely(rc != 0))
1604 		PMD_RX_LOG(WARNING, "Failed adding Rx desc\n");
1605 
1606 	return rc;
1607 }
1608 
1609 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count)
1610 {
1611 	unsigned int i;
1612 	int rc;
1613 	uint16_t next_to_use = rxq->next_to_use;
1614 	uint16_t req_id;
1615 #ifdef RTE_ETHDEV_DEBUG_RX
1616 	uint16_t in_use;
1617 #endif
1618 	struct rte_mbuf **mbufs = rxq->rx_refill_buffer;
1619 
1620 	if (unlikely(!count))
1621 		return 0;
1622 
1623 #ifdef RTE_ETHDEV_DEBUG_RX
1624 	in_use = rxq->ring_size - 1 -
1625 		ena_com_free_q_entries(rxq->ena_com_io_sq);
1626 	if (unlikely((in_use + count) >= rxq->ring_size))
1627 		PMD_RX_LOG(ERR, "Bad Rx ring state\n");
1628 #endif
1629 
1630 	/* get resources for incoming packets */
1631 	rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count);
1632 	if (unlikely(rc < 0)) {
1633 		rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf);
1634 		++rxq->rx_stats.mbuf_alloc_fail;
1635 		PMD_RX_LOG(DEBUG, "There are not enough free buffers\n");
1636 		return 0;
1637 	}
1638 
1639 	for (i = 0; i < count; i++) {
1640 		struct rte_mbuf *mbuf = mbufs[i];
1641 		struct ena_rx_buffer *rx_info;
1642 
1643 		if (likely((i + 4) < count))
1644 			rte_prefetch0(mbufs[i + 4]);
1645 
1646 		req_id = rxq->empty_rx_reqs[next_to_use];
1647 		rx_info = &rxq->rx_buffer_info[req_id];
1648 
1649 		rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id);
1650 		if (unlikely(rc != 0))
1651 			break;
1652 
1653 		rx_info->mbuf = mbuf;
1654 		next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, rxq->size_mask);
1655 	}
1656 
1657 	if (unlikely(i < count)) {
1658 		PMD_RX_LOG(WARNING,
1659 			"Refilled Rx queue[%d] with only %d/%d buffers\n",
1660 			rxq->id, i, count);
1661 		rte_pktmbuf_free_bulk(&mbufs[i], count - i);
1662 		++rxq->rx_stats.refill_partial;
1663 	}
1664 
1665 	/* When we submitted free resources to device... */
1666 	if (likely(i > 0)) {
1667 		/* ...let HW know that it can fill buffers with data. */
1668 		ena_com_write_sq_doorbell(rxq->ena_com_io_sq);
1669 
1670 		rxq->next_to_use = next_to_use;
1671 	}
1672 
1673 	return i;
1674 }
1675 
1676 static int ena_device_init(struct ena_adapter *adapter,
1677 			   struct rte_pci_device *pdev,
1678 			   struct ena_com_dev_get_features_ctx *get_feat_ctx)
1679 {
1680 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
1681 	uint32_t aenq_groups;
1682 	int rc;
1683 	bool readless_supported;
1684 
1685 	/* Initialize mmio registers */
1686 	rc = ena_com_mmio_reg_read_request_init(ena_dev);
1687 	if (rc) {
1688 		PMD_DRV_LOG(ERR, "Failed to init MMIO read less\n");
1689 		return rc;
1690 	}
1691 
1692 	/* The PCIe configuration space revision id indicate if mmio reg
1693 	 * read is disabled.
1694 	 */
1695 	readless_supported = !(pdev->id.class_id & ENA_MMIO_DISABLE_REG_READ);
1696 	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
1697 
1698 	/* reset device */
1699 	rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
1700 	if (rc) {
1701 		PMD_DRV_LOG(ERR, "Cannot reset device\n");
1702 		goto err_mmio_read_less;
1703 	}
1704 
1705 	/* check FW version */
1706 	rc = ena_com_validate_version(ena_dev);
1707 	if (rc) {
1708 		PMD_DRV_LOG(ERR, "Device version is too low\n");
1709 		goto err_mmio_read_less;
1710 	}
1711 
1712 	ena_dev->dma_addr_bits = ena_com_get_dma_width(ena_dev);
1713 
1714 	/* ENA device administration layer init */
1715 	rc = ena_com_admin_init(ena_dev, &aenq_handlers);
1716 	if (rc) {
1717 		PMD_DRV_LOG(ERR,
1718 			"Cannot initialize ENA admin queue\n");
1719 		goto err_mmio_read_less;
1720 	}
1721 
1722 	/* To enable the msix interrupts the driver needs to know the number
1723 	 * of queues. So the driver uses polling mode to retrieve this
1724 	 * information.
1725 	 */
1726 	ena_com_set_admin_polling_mode(ena_dev, true);
1727 
1728 	ena_config_host_info(ena_dev);
1729 
1730 	/* Get Device Attributes and features */
1731 	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
1732 	if (rc) {
1733 		PMD_DRV_LOG(ERR,
1734 			"Cannot get attribute for ENA device, rc: %d\n", rc);
1735 		goto err_admin_init;
1736 	}
1737 
1738 	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
1739 		      BIT(ENA_ADMIN_NOTIFICATION) |
1740 		      BIT(ENA_ADMIN_KEEP_ALIVE) |
1741 		      BIT(ENA_ADMIN_FATAL_ERROR) |
1742 		      BIT(ENA_ADMIN_WARNING);
1743 
1744 	aenq_groups &= get_feat_ctx->aenq.supported_groups;
1745 
1746 	adapter->all_aenq_groups = aenq_groups;
1747 
1748 	return 0;
1749 
1750 err_admin_init:
1751 	ena_com_admin_destroy(ena_dev);
1752 
1753 err_mmio_read_less:
1754 	ena_com_mmio_reg_read_request_destroy(ena_dev);
1755 
1756 	return rc;
1757 }
1758 
1759 static void ena_interrupt_handler_rte(void *cb_arg)
1760 {
1761 	struct rte_eth_dev *dev = cb_arg;
1762 	struct ena_adapter *adapter = dev->data->dev_private;
1763 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
1764 
1765 	ena_com_admin_q_comp_intr_handler(ena_dev);
1766 	if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED))
1767 		ena_com_aenq_intr_handler(ena_dev, dev);
1768 }
1769 
1770 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
1771 {
1772 	if (!(adapter->active_aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)))
1773 		return;
1774 
1775 	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
1776 		return;
1777 
1778 	if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >=
1779 	    adapter->keep_alive_timeout)) {
1780 		PMD_DRV_LOG(ERR, "Keep alive timeout\n");
1781 		adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
1782 		adapter->trigger_reset = true;
1783 		++adapter->dev_stats.wd_expired;
1784 	}
1785 }
1786 
1787 /* Check if admin queue is enabled */
1788 static void check_for_admin_com_state(struct ena_adapter *adapter)
1789 {
1790 	if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) {
1791 		PMD_DRV_LOG(ERR, "ENA admin queue is not in running state\n");
1792 		adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
1793 		adapter->trigger_reset = true;
1794 	}
1795 }
1796 
1797 static int check_for_tx_completion_in_queue(struct ena_adapter *adapter,
1798 					    struct ena_ring *tx_ring)
1799 {
1800 	struct ena_tx_buffer *tx_buf;
1801 	uint64_t timestamp;
1802 	uint64_t completion_delay;
1803 	uint32_t missed_tx = 0;
1804 	unsigned int i;
1805 	int rc = 0;
1806 
1807 	for (i = 0; i < tx_ring->ring_size; ++i) {
1808 		tx_buf = &tx_ring->tx_buffer_info[i];
1809 		timestamp = tx_buf->timestamp;
1810 
1811 		if (timestamp == 0)
1812 			continue;
1813 
1814 		completion_delay = rte_get_timer_cycles() - timestamp;
1815 		if (completion_delay > adapter->missing_tx_completion_to) {
1816 			if (unlikely(!tx_buf->print_once)) {
1817 				PMD_TX_LOG(WARNING,
1818 					"Found a Tx that wasn't completed on time, qid %d, index %d. "
1819 					"Missing Tx outstanding for %" PRIu64 " msecs.\n",
1820 					tx_ring->id, i,	completion_delay /
1821 					rte_get_timer_hz() * 1000);
1822 				tx_buf->print_once = true;
1823 			}
1824 			++missed_tx;
1825 		}
1826 	}
1827 
1828 	if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) {
1829 		PMD_DRV_LOG(ERR,
1830 			"The number of lost Tx completions is above the threshold (%d > %d). "
1831 			"Trigger the device reset.\n",
1832 			missed_tx,
1833 			tx_ring->missing_tx_completion_threshold);
1834 		adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL;
1835 		adapter->trigger_reset = true;
1836 		rc = -EIO;
1837 	}
1838 
1839 	tx_ring->tx_stats.missed_tx += missed_tx;
1840 
1841 	return rc;
1842 }
1843 
1844 static void check_for_tx_completions(struct ena_adapter *adapter)
1845 {
1846 	struct ena_ring *tx_ring;
1847 	uint64_t tx_cleanup_delay;
1848 	size_t qid;
1849 	int budget;
1850 	uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues;
1851 
1852 	if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
1853 		return;
1854 
1855 	nb_tx_queues = adapter->edev_data->nb_tx_queues;
1856 	budget = adapter->missing_tx_completion_budget;
1857 
1858 	qid = adapter->last_tx_comp_qid;
1859 	while (budget-- > 0) {
1860 		tx_ring = &adapter->tx_ring[qid];
1861 
1862 		/* Tx cleanup is called only by the burst function and can be
1863 		 * called dynamically by the application. Also cleanup is
1864 		 * limited by the threshold. To avoid false detection of the
1865 		 * missing HW Tx completion, get the delay since last cleanup
1866 		 * function was called.
1867 		 */
1868 		tx_cleanup_delay = rte_get_timer_cycles() -
1869 			tx_ring->last_cleanup_ticks;
1870 		if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay)
1871 			check_for_tx_completion_in_queue(adapter, tx_ring);
1872 		qid = (qid + 1) % nb_tx_queues;
1873 	}
1874 
1875 	adapter->last_tx_comp_qid = qid;
1876 }
1877 
1878 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer,
1879 				  void *arg)
1880 {
1881 	struct rte_eth_dev *dev = arg;
1882 	struct ena_adapter *adapter = dev->data->dev_private;
1883 
1884 	if (unlikely(adapter->trigger_reset))
1885 		return;
1886 
1887 	check_for_missing_keep_alive(adapter);
1888 	check_for_admin_com_state(adapter);
1889 	check_for_tx_completions(adapter);
1890 
1891 	if (unlikely(adapter->trigger_reset)) {
1892 		PMD_DRV_LOG(ERR, "Trigger reset is on\n");
1893 		rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET,
1894 			NULL);
1895 	}
1896 }
1897 
1898 static inline void
1899 set_default_llq_configurations(struct ena_llq_configurations *llq_config,
1900 			       struct ena_admin_feature_llq_desc *llq,
1901 			       bool use_large_llq_hdr)
1902 {
1903 	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
1904 	llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
1905 	llq_config->llq_num_decs_before_header =
1906 		ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
1907 
1908 	if (use_large_llq_hdr &&
1909 	    (llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B)) {
1910 		llq_config->llq_ring_entry_size =
1911 			ENA_ADMIN_LIST_ENTRY_SIZE_256B;
1912 		llq_config->llq_ring_entry_size_value = 256;
1913 	} else {
1914 		llq_config->llq_ring_entry_size =
1915 			ENA_ADMIN_LIST_ENTRY_SIZE_128B;
1916 		llq_config->llq_ring_entry_size_value = 128;
1917 	}
1918 }
1919 
1920 static int
1921 ena_set_queues_placement_policy(struct ena_adapter *adapter,
1922 				struct ena_com_dev *ena_dev,
1923 				struct ena_admin_feature_llq_desc *llq,
1924 				struct ena_llq_configurations *llq_default_configurations)
1925 {
1926 	int rc;
1927 	u32 llq_feature_mask;
1928 
1929 	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
1930 	if (!(ena_dev->supported_features & llq_feature_mask)) {
1931 		PMD_DRV_LOG(INFO,
1932 			"LLQ is not supported. Fallback to host mode policy.\n");
1933 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1934 		return 0;
1935 	}
1936 
1937 	rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
1938 	if (unlikely(rc)) {
1939 		PMD_INIT_LOG(WARNING,
1940 			"Failed to config dev mode. Fallback to host mode policy.\n");
1941 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1942 		return 0;
1943 	}
1944 
1945 	/* Nothing to config, exit */
1946 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
1947 		return 0;
1948 
1949 	if (!adapter->dev_mem_base) {
1950 		PMD_DRV_LOG(ERR,
1951 			"Unable to access LLQ BAR resource. Fallback to host mode policy.\n");
1952 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1953 		return 0;
1954 	}
1955 
1956 	ena_dev->mem_bar = adapter->dev_mem_base;
1957 
1958 	return 0;
1959 }
1960 
1961 static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev,
1962 	struct ena_com_dev_get_features_ctx *get_feat_ctx)
1963 {
1964 	uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
1965 
1966 	/* Regular queues capabilities */
1967 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
1968 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
1969 			&get_feat_ctx->max_queue_ext.max_queue_ext;
1970 		io_rx_num = RTE_MIN(max_queue_ext->max_rx_sq_num,
1971 				    max_queue_ext->max_rx_cq_num);
1972 		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
1973 		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
1974 	} else {
1975 		struct ena_admin_queue_feature_desc *max_queues =
1976 			&get_feat_ctx->max_queues;
1977 		io_tx_sq_num = max_queues->max_sq_num;
1978 		io_tx_cq_num = max_queues->max_cq_num;
1979 		io_rx_num = RTE_MIN(io_tx_sq_num, io_tx_cq_num);
1980 	}
1981 
1982 	/* In case of LLQ use the llq number in the get feature cmd */
1983 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
1984 		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
1985 
1986 	max_num_io_queues = RTE_MIN(ENA_MAX_NUM_IO_QUEUES, io_rx_num);
1987 	max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_sq_num);
1988 	max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_cq_num);
1989 
1990 	if (unlikely(max_num_io_queues == 0)) {
1991 		PMD_DRV_LOG(ERR, "Number of IO queues cannot not be 0\n");
1992 		return -EFAULT;
1993 	}
1994 
1995 	return max_num_io_queues;
1996 }
1997 
1998 static void
1999 ena_set_offloads(struct ena_offloads *offloads,
2000 		 struct ena_admin_feature_offload_desc *offload_desc)
2001 {
2002 	if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
2003 		offloads->tx_offloads |= ENA_IPV4_TSO;
2004 
2005 	/* Tx IPv4 checksum offloads */
2006 	if (offload_desc->tx &
2007 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)
2008 		offloads->tx_offloads |= ENA_L3_IPV4_CSUM;
2009 	if (offload_desc->tx &
2010 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK)
2011 		offloads->tx_offloads |= ENA_L4_IPV4_CSUM;
2012 	if (offload_desc->tx &
2013 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
2014 		offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL;
2015 
2016 	/* Tx IPv6 checksum offloads */
2017 	if (offload_desc->tx &
2018 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK)
2019 		offloads->tx_offloads |= ENA_L4_IPV6_CSUM;
2020 	if (offload_desc->tx &
2021 	     ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
2022 		offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL;
2023 
2024 	/* Rx IPv4 checksum offloads */
2025 	if (offload_desc->rx_supported &
2026 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)
2027 		offloads->rx_offloads |= ENA_L3_IPV4_CSUM;
2028 	if (offload_desc->rx_supported &
2029 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
2030 		offloads->rx_offloads |= ENA_L4_IPV4_CSUM;
2031 
2032 	/* Rx IPv6 checksum offloads */
2033 	if (offload_desc->rx_supported &
2034 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
2035 		offloads->rx_offloads |= ENA_L4_IPV6_CSUM;
2036 
2037 	if (offload_desc->rx_supported &
2038 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK)
2039 		offloads->rx_offloads |= ENA_RX_RSS_HASH;
2040 }
2041 
2042 static int ena_init_once(void)
2043 {
2044 	static bool init_done;
2045 
2046 	if (init_done)
2047 		return 0;
2048 
2049 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
2050 		/* Init timer subsystem for the ENA timer service. */
2051 		rte_timer_subsystem_init();
2052 		/* Register handler for requests from secondary processes. */
2053 		rte_mp_action_register(ENA_MP_NAME, ena_mp_primary_handle);
2054 	}
2055 
2056 	init_done = true;
2057 	return 0;
2058 }
2059 
2060 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev)
2061 {
2062 	struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
2063 	struct rte_pci_device *pci_dev;
2064 	struct rte_intr_handle *intr_handle;
2065 	struct ena_adapter *adapter = eth_dev->data->dev_private;
2066 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
2067 	struct ena_com_dev_get_features_ctx get_feat_ctx;
2068 	struct ena_llq_configurations llq_config;
2069 	const char *queue_type_str;
2070 	uint32_t max_num_io_queues;
2071 	int rc;
2072 	static int adapters_found;
2073 	bool disable_meta_caching;
2074 
2075 	eth_dev->dev_ops = &ena_dev_ops;
2076 	eth_dev->rx_pkt_burst = &eth_ena_recv_pkts;
2077 	eth_dev->tx_pkt_burst = &eth_ena_xmit_pkts;
2078 	eth_dev->tx_pkt_prepare = &eth_ena_prep_pkts;
2079 
2080 	rc = ena_init_once();
2081 	if (rc != 0)
2082 		return rc;
2083 
2084 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2085 		return 0;
2086 
2087 	eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
2088 
2089 	memset(adapter, 0, sizeof(struct ena_adapter));
2090 	ena_dev = &adapter->ena_dev;
2091 
2092 	adapter->edev_data = eth_dev->data;
2093 
2094 	pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
2095 
2096 	PMD_INIT_LOG(INFO, "Initializing %x:%x:%x.%d\n",
2097 		     pci_dev->addr.domain,
2098 		     pci_dev->addr.bus,
2099 		     pci_dev->addr.devid,
2100 		     pci_dev->addr.function);
2101 
2102 	intr_handle = pci_dev->intr_handle;
2103 
2104 	adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr;
2105 	adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr;
2106 
2107 	if (!adapter->regs) {
2108 		PMD_INIT_LOG(CRIT, "Failed to access registers BAR(%d)\n",
2109 			     ENA_REGS_BAR);
2110 		return -ENXIO;
2111 	}
2112 
2113 	ena_dev->reg_bar = adapter->regs;
2114 	/* Pass device data as a pointer which can be passed to the IO functions
2115 	 * by the ena_com (for example - the memory allocation).
2116 	 */
2117 	ena_dev->dmadev = eth_dev->data;
2118 
2119 	adapter->id_number = adapters_found;
2120 
2121 	snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d",
2122 		 adapter->id_number);
2123 
2124 	rc = ena_parse_devargs(adapter, pci_dev->device.devargs);
2125 	if (rc != 0) {
2126 		PMD_INIT_LOG(CRIT, "Failed to parse devargs\n");
2127 		goto err;
2128 	}
2129 
2130 	/* device specific initialization routine */
2131 	rc = ena_device_init(adapter, pci_dev, &get_feat_ctx);
2132 	if (rc) {
2133 		PMD_INIT_LOG(CRIT, "Failed to init ENA device\n");
2134 		goto err;
2135 	}
2136 
2137 	/* Check if device supports LSC */
2138 	if (!(adapter->all_aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE)))
2139 		adapter->edev_data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
2140 
2141 	set_default_llq_configurations(&llq_config, &get_feat_ctx.llq,
2142 		adapter->use_large_llq_hdr);
2143 	rc = ena_set_queues_placement_policy(adapter, ena_dev,
2144 					     &get_feat_ctx.llq, &llq_config);
2145 	if (unlikely(rc)) {
2146 		PMD_INIT_LOG(CRIT, "Failed to set placement policy\n");
2147 		return rc;
2148 	}
2149 
2150 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
2151 		queue_type_str = "Regular";
2152 	else
2153 		queue_type_str = "Low latency";
2154 	PMD_DRV_LOG(INFO, "Placement policy: %s\n", queue_type_str);
2155 
2156 	calc_queue_ctx.ena_dev = ena_dev;
2157 	calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
2158 
2159 	max_num_io_queues = ena_calc_max_io_queue_num(ena_dev, &get_feat_ctx);
2160 	rc = ena_calc_io_queue_size(&calc_queue_ctx,
2161 		adapter->use_large_llq_hdr);
2162 	if (unlikely((rc != 0) || (max_num_io_queues == 0))) {
2163 		rc = -EFAULT;
2164 		goto err_device_destroy;
2165 	}
2166 
2167 	adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
2168 	adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
2169 	adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
2170 	adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
2171 	adapter->max_num_io_queues = max_num_io_queues;
2172 
2173 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2174 		disable_meta_caching =
2175 			!!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
2176 			BIT(ENA_ADMIN_DISABLE_META_CACHING));
2177 	} else {
2178 		disable_meta_caching = false;
2179 	}
2180 
2181 	/* prepare ring structures */
2182 	ena_init_rings(adapter, disable_meta_caching);
2183 
2184 	ena_config_debug_area(adapter);
2185 
2186 	/* Set max MTU for this device */
2187 	adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
2188 
2189 	ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload);
2190 
2191 	/* Copy MAC address and point DPDK to it */
2192 	eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr;
2193 	rte_ether_addr_copy((struct rte_ether_addr *)
2194 			get_feat_ctx.dev_attr.mac_addr,
2195 			(struct rte_ether_addr *)adapter->mac_addr);
2196 
2197 	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
2198 	if (unlikely(rc != 0)) {
2199 		PMD_DRV_LOG(ERR, "Failed to initialize RSS in ENA device\n");
2200 		goto err_delete_debug_area;
2201 	}
2202 
2203 	adapter->drv_stats = rte_zmalloc("adapter stats",
2204 					 sizeof(*adapter->drv_stats),
2205 					 RTE_CACHE_LINE_SIZE);
2206 	if (!adapter->drv_stats) {
2207 		PMD_DRV_LOG(ERR,
2208 			"Failed to allocate memory for adapter statistics\n");
2209 		rc = -ENOMEM;
2210 		goto err_rss_destroy;
2211 	}
2212 
2213 	rte_spinlock_init(&adapter->admin_lock);
2214 
2215 	rte_intr_callback_register(intr_handle,
2216 				   ena_interrupt_handler_rte,
2217 				   eth_dev);
2218 	rte_intr_enable(intr_handle);
2219 	ena_com_set_admin_polling_mode(ena_dev, false);
2220 	ena_com_admin_aenq_enable(ena_dev);
2221 
2222 	rte_timer_init(&adapter->timer_wd);
2223 
2224 	adapters_found++;
2225 	adapter->state = ENA_ADAPTER_STATE_INIT;
2226 
2227 	return 0;
2228 
2229 err_rss_destroy:
2230 	ena_com_rss_destroy(ena_dev);
2231 err_delete_debug_area:
2232 	ena_com_delete_debug_area(ena_dev);
2233 
2234 err_device_destroy:
2235 	ena_com_delete_host_info(ena_dev);
2236 	ena_com_admin_destroy(ena_dev);
2237 
2238 err:
2239 	return rc;
2240 }
2241 
2242 static void ena_destroy_device(struct rte_eth_dev *eth_dev)
2243 {
2244 	struct ena_adapter *adapter = eth_dev->data->dev_private;
2245 	struct ena_com_dev *ena_dev = &adapter->ena_dev;
2246 
2247 	if (adapter->state == ENA_ADAPTER_STATE_FREE)
2248 		return;
2249 
2250 	ena_com_set_admin_running_state(ena_dev, false);
2251 
2252 	if (adapter->state != ENA_ADAPTER_STATE_CLOSED)
2253 		ena_close(eth_dev);
2254 
2255 	ena_com_rss_destroy(ena_dev);
2256 
2257 	ena_com_delete_debug_area(ena_dev);
2258 	ena_com_delete_host_info(ena_dev);
2259 
2260 	ena_com_abort_admin_commands(ena_dev);
2261 	ena_com_wait_for_abort_completion(ena_dev);
2262 	ena_com_admin_destroy(ena_dev);
2263 	ena_com_mmio_reg_read_request_destroy(ena_dev);
2264 
2265 	adapter->state = ENA_ADAPTER_STATE_FREE;
2266 }
2267 
2268 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev)
2269 {
2270 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2271 		return 0;
2272 
2273 	ena_destroy_device(eth_dev);
2274 
2275 	return 0;
2276 }
2277 
2278 static int ena_dev_configure(struct rte_eth_dev *dev)
2279 {
2280 	struct ena_adapter *adapter = dev->data->dev_private;
2281 	int rc;
2282 
2283 	adapter->state = ENA_ADAPTER_STATE_CONFIG;
2284 
2285 	if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG)
2286 		dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH;
2287 	dev->data->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
2288 
2289 	/* Scattered Rx cannot be turned off in the HW, so this capability must
2290 	 * be forced.
2291 	 */
2292 	dev->data->scattered_rx = 1;
2293 
2294 	adapter->last_tx_comp_qid = 0;
2295 
2296 	adapter->missing_tx_completion_budget =
2297 		RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues);
2298 
2299 	adapter->missing_tx_completion_to = ENA_TX_TIMEOUT;
2300 	/* To avoid detection of the spurious Tx completion timeout due to
2301 	 * application not calling the Tx cleanup function, set timeout for the
2302 	 * Tx queue which should be half of the missing completion timeout for a
2303 	 * safety. If there will be a lot of missing Tx completions in the
2304 	 * queue, they will be detected sooner or later.
2305 	 */
2306 	adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2;
2307 
2308 	rc = ena_configure_aenq(adapter);
2309 
2310 	return rc;
2311 }
2312 
2313 static void ena_init_rings(struct ena_adapter *adapter,
2314 			   bool disable_meta_caching)
2315 {
2316 	size_t i;
2317 
2318 	for (i = 0; i < adapter->max_num_io_queues; i++) {
2319 		struct ena_ring *ring = &adapter->tx_ring[i];
2320 
2321 		ring->configured = 0;
2322 		ring->type = ENA_RING_TYPE_TX;
2323 		ring->adapter = adapter;
2324 		ring->id = i;
2325 		ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type;
2326 		ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size;
2327 		ring->sgl_size = adapter->max_tx_sgl_size;
2328 		ring->disable_meta_caching = disable_meta_caching;
2329 	}
2330 
2331 	for (i = 0; i < adapter->max_num_io_queues; i++) {
2332 		struct ena_ring *ring = &adapter->rx_ring[i];
2333 
2334 		ring->configured = 0;
2335 		ring->type = ENA_RING_TYPE_RX;
2336 		ring->adapter = adapter;
2337 		ring->id = i;
2338 		ring->sgl_size = adapter->max_rx_sgl_size;
2339 	}
2340 }
2341 
2342 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter)
2343 {
2344 	uint64_t port_offloads = 0;
2345 
2346 	if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM)
2347 		port_offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
2348 
2349 	if (adapter->offloads.rx_offloads &
2350 	    (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM))
2351 		port_offloads |=
2352 			RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM;
2353 
2354 	if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH)
2355 		port_offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH;
2356 
2357 	port_offloads |= RTE_ETH_RX_OFFLOAD_SCATTER;
2358 
2359 	return port_offloads;
2360 }
2361 
2362 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter)
2363 {
2364 	uint64_t port_offloads = 0;
2365 
2366 	if (adapter->offloads.tx_offloads & ENA_IPV4_TSO)
2367 		port_offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
2368 
2369 	if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM)
2370 		port_offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
2371 	if (adapter->offloads.tx_offloads &
2372 	    (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM |
2373 	     ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL))
2374 		port_offloads |=
2375 			RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
2376 
2377 	port_offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
2378 
2379 	return port_offloads;
2380 }
2381 
2382 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter)
2383 {
2384 	RTE_SET_USED(adapter);
2385 
2386 	return 0;
2387 }
2388 
2389 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter)
2390 {
2391 	RTE_SET_USED(adapter);
2392 
2393 	return 0;
2394 }
2395 
2396 static int ena_infos_get(struct rte_eth_dev *dev,
2397 			  struct rte_eth_dev_info *dev_info)
2398 {
2399 	struct ena_adapter *adapter;
2400 	struct ena_com_dev *ena_dev;
2401 
2402 	ena_assert_msg(dev->data != NULL, "Uninitialized device\n");
2403 	ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n");
2404 	adapter = dev->data->dev_private;
2405 
2406 	ena_dev = &adapter->ena_dev;
2407 	ena_assert_msg(ena_dev != NULL, "Uninitialized device\n");
2408 
2409 	dev_info->speed_capa =
2410 			RTE_ETH_LINK_SPEED_1G   |
2411 			RTE_ETH_LINK_SPEED_2_5G |
2412 			RTE_ETH_LINK_SPEED_5G   |
2413 			RTE_ETH_LINK_SPEED_10G  |
2414 			RTE_ETH_LINK_SPEED_25G  |
2415 			RTE_ETH_LINK_SPEED_40G  |
2416 			RTE_ETH_LINK_SPEED_50G  |
2417 			RTE_ETH_LINK_SPEED_100G;
2418 
2419 	/* Inform framework about available features */
2420 	dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter);
2421 	dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter);
2422 	dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter);
2423 	dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter);
2424 
2425 	dev_info->flow_type_rss_offloads = ENA_ALL_RSS_HF;
2426 	dev_info->hash_key_size = ENA_HASH_KEY_SIZE;
2427 
2428 	dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN;
2429 	dev_info->max_rx_pktlen  = adapter->max_mtu + RTE_ETHER_HDR_LEN +
2430 		RTE_ETHER_CRC_LEN;
2431 	dev_info->min_mtu = ENA_MIN_MTU;
2432 	dev_info->max_mtu = adapter->max_mtu;
2433 	dev_info->max_mac_addrs = 1;
2434 
2435 	dev_info->max_rx_queues = adapter->max_num_io_queues;
2436 	dev_info->max_tx_queues = adapter->max_num_io_queues;
2437 	dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE;
2438 
2439 	dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size;
2440 	dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC;
2441 	dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS,
2442 					adapter->max_rx_sgl_size);
2443 	dev_info->rx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS,
2444 					adapter->max_rx_sgl_size);
2445 
2446 	dev_info->tx_desc_lim.nb_max = adapter->max_tx_ring_size;
2447 	dev_info->tx_desc_lim.nb_min = ENA_MIN_RING_DESC;
2448 	dev_info->tx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS,
2449 					adapter->max_tx_sgl_size);
2450 	dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS,
2451 					adapter->max_tx_sgl_size);
2452 
2453 	dev_info->default_rxportconf.ring_size = ENA_DEFAULT_RING_SIZE;
2454 	dev_info->default_txportconf.ring_size = ENA_DEFAULT_RING_SIZE;
2455 
2456 	return 0;
2457 }
2458 
2459 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len)
2460 {
2461 	mbuf->data_len = len;
2462 	mbuf->data_off = RTE_PKTMBUF_HEADROOM;
2463 	mbuf->refcnt = 1;
2464 	mbuf->next = NULL;
2465 }
2466 
2467 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring,
2468 				    struct ena_com_rx_buf_info *ena_bufs,
2469 				    uint32_t descs,
2470 				    uint16_t *next_to_clean,
2471 				    uint8_t offset)
2472 {
2473 	struct rte_mbuf *mbuf;
2474 	struct rte_mbuf *mbuf_head;
2475 	struct ena_rx_buffer *rx_info;
2476 	int rc;
2477 	uint16_t ntc, len, req_id, buf = 0;
2478 
2479 	if (unlikely(descs == 0))
2480 		return NULL;
2481 
2482 	ntc = *next_to_clean;
2483 
2484 	len = ena_bufs[buf].len;
2485 	req_id = ena_bufs[buf].req_id;
2486 
2487 	rx_info = &rx_ring->rx_buffer_info[req_id];
2488 
2489 	mbuf = rx_info->mbuf;
2490 	RTE_ASSERT(mbuf != NULL);
2491 
2492 	ena_init_rx_mbuf(mbuf, len);
2493 
2494 	/* Fill the mbuf head with the data specific for 1st segment. */
2495 	mbuf_head = mbuf;
2496 	mbuf_head->nb_segs = descs;
2497 	mbuf_head->port = rx_ring->port_id;
2498 	mbuf_head->pkt_len = len;
2499 	mbuf_head->data_off += offset;
2500 
2501 	rx_info->mbuf = NULL;
2502 	rx_ring->empty_rx_reqs[ntc] = req_id;
2503 	ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask);
2504 
2505 	while (--descs) {
2506 		++buf;
2507 		len = ena_bufs[buf].len;
2508 		req_id = ena_bufs[buf].req_id;
2509 
2510 		rx_info = &rx_ring->rx_buffer_info[req_id];
2511 		RTE_ASSERT(rx_info->mbuf != NULL);
2512 
2513 		if (unlikely(len == 0)) {
2514 			/*
2515 			 * Some devices can pass descriptor with the length 0.
2516 			 * To avoid confusion, the PMD is simply putting the
2517 			 * descriptor back, as it was never used. We'll avoid
2518 			 * mbuf allocation that way.
2519 			 */
2520 			rc = ena_add_single_rx_desc(rx_ring->ena_com_io_sq,
2521 				rx_info->mbuf, req_id);
2522 			if (unlikely(rc != 0)) {
2523 				/* Free the mbuf in case of an error. */
2524 				rte_mbuf_raw_free(rx_info->mbuf);
2525 			} else {
2526 				/*
2527 				 * If there was no error, just exit the loop as
2528 				 * 0 length descriptor is always the last one.
2529 				 */
2530 				break;
2531 			}
2532 		} else {
2533 			/* Create an mbuf chain. */
2534 			mbuf->next = rx_info->mbuf;
2535 			mbuf = mbuf->next;
2536 
2537 			ena_init_rx_mbuf(mbuf, len);
2538 			mbuf_head->pkt_len += len;
2539 		}
2540 
2541 		/*
2542 		 * Mark the descriptor as depleted and perform necessary
2543 		 * cleanup.
2544 		 * This code will execute in two cases:
2545 		 *  1. Descriptor len was greater than 0 - normal situation.
2546 		 *  2. Descriptor len was 0 and we failed to add the descriptor
2547 		 *     to the device. In that situation, we should try to add
2548 		 *     the mbuf again in the populate routine and mark the
2549 		 *     descriptor as used up by the device.
2550 		 */
2551 		rx_info->mbuf = NULL;
2552 		rx_ring->empty_rx_reqs[ntc] = req_id;
2553 		ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask);
2554 	}
2555 
2556 	*next_to_clean = ntc;
2557 
2558 	return mbuf_head;
2559 }
2560 
2561 static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
2562 				  uint16_t nb_pkts)
2563 {
2564 	struct ena_ring *rx_ring = (struct ena_ring *)(rx_queue);
2565 	unsigned int free_queue_entries;
2566 	uint16_t next_to_clean = rx_ring->next_to_clean;
2567 	uint16_t descs_in_use;
2568 	struct rte_mbuf *mbuf;
2569 	uint16_t completed;
2570 	struct ena_com_rx_ctx ena_rx_ctx;
2571 	int i, rc = 0;
2572 	bool fill_hash;
2573 
2574 #ifdef RTE_ETHDEV_DEBUG_RX
2575 	/* Check adapter state */
2576 	if (unlikely(rx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) {
2577 		PMD_RX_LOG(ALERT,
2578 			"Trying to receive pkts while device is NOT running\n");
2579 		return 0;
2580 	}
2581 #endif
2582 
2583 	fill_hash = rx_ring->offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH;
2584 
2585 	descs_in_use = rx_ring->ring_size -
2586 		ena_com_free_q_entries(rx_ring->ena_com_io_sq) - 1;
2587 	nb_pkts = RTE_MIN(descs_in_use, nb_pkts);
2588 
2589 	for (completed = 0; completed < nb_pkts; completed++) {
2590 		ena_rx_ctx.max_bufs = rx_ring->sgl_size;
2591 		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
2592 		ena_rx_ctx.descs = 0;
2593 		ena_rx_ctx.pkt_offset = 0;
2594 		/* receive packet context */
2595 		rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
2596 				    rx_ring->ena_com_io_sq,
2597 				    &ena_rx_ctx);
2598 		if (unlikely(rc)) {
2599 			PMD_RX_LOG(ERR,
2600 				"Failed to get the packet from the device, rc: %d\n",
2601 				rc);
2602 			if (rc == ENA_COM_NO_SPACE) {
2603 				++rx_ring->rx_stats.bad_desc_num;
2604 				rx_ring->adapter->reset_reason =
2605 					ENA_REGS_RESET_TOO_MANY_RX_DESCS;
2606 			} else {
2607 				++rx_ring->rx_stats.bad_req_id;
2608 				rx_ring->adapter->reset_reason =
2609 					ENA_REGS_RESET_INV_RX_REQ_ID;
2610 			}
2611 			rx_ring->adapter->trigger_reset = true;
2612 			return 0;
2613 		}
2614 
2615 		mbuf = ena_rx_mbuf(rx_ring,
2616 			ena_rx_ctx.ena_bufs,
2617 			ena_rx_ctx.descs,
2618 			&next_to_clean,
2619 			ena_rx_ctx.pkt_offset);
2620 		if (unlikely(mbuf == NULL)) {
2621 			for (i = 0; i < ena_rx_ctx.descs; ++i) {
2622 				rx_ring->empty_rx_reqs[next_to_clean] =
2623 					rx_ring->ena_bufs[i].req_id;
2624 				next_to_clean = ENA_IDX_NEXT_MASKED(
2625 					next_to_clean, rx_ring->size_mask);
2626 			}
2627 			break;
2628 		}
2629 
2630 		/* fill mbuf attributes if any */
2631 		ena_rx_mbuf_prepare(rx_ring, mbuf, &ena_rx_ctx, fill_hash);
2632 
2633 		if (unlikely(mbuf->ol_flags &
2634 				(RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD)))
2635 			rte_atomic64_inc(&rx_ring->adapter->drv_stats->ierrors);
2636 
2637 		rx_pkts[completed] = mbuf;
2638 		rx_ring->rx_stats.bytes += mbuf->pkt_len;
2639 	}
2640 
2641 	rx_ring->rx_stats.cnt += completed;
2642 	rx_ring->next_to_clean = next_to_clean;
2643 
2644 	free_queue_entries = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
2645 
2646 	/* Burst refill to save doorbells, memory barriers, const interval */
2647 	if (free_queue_entries >= rx_ring->rx_free_thresh) {
2648 		ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
2649 		ena_populate_rx_queue(rx_ring, free_queue_entries);
2650 	}
2651 
2652 	return completed;
2653 }
2654 
2655 static uint16_t
2656 eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
2657 		uint16_t nb_pkts)
2658 {
2659 	int32_t ret;
2660 	uint32_t i;
2661 	struct rte_mbuf *m;
2662 	struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue);
2663 	struct ena_adapter *adapter = tx_ring->adapter;
2664 	struct rte_ipv4_hdr *ip_hdr;
2665 	uint64_t ol_flags;
2666 	uint64_t l4_csum_flag;
2667 	uint64_t dev_offload_capa;
2668 	uint16_t frag_field;
2669 	bool need_pseudo_csum;
2670 
2671 	dev_offload_capa = adapter->offloads.tx_offloads;
2672 	for (i = 0; i != nb_pkts; i++) {
2673 		m = tx_pkts[i];
2674 		ol_flags = m->ol_flags;
2675 
2676 		/* Check if any offload flag was set */
2677 		if (ol_flags == 0)
2678 			continue;
2679 
2680 		l4_csum_flag = ol_flags & RTE_MBUF_F_TX_L4_MASK;
2681 		/* SCTP checksum offload is not supported by the ENA. */
2682 		if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) ||
2683 		    l4_csum_flag == RTE_MBUF_F_TX_SCTP_CKSUM) {
2684 			PMD_TX_LOG(DEBUG,
2685 				"mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64 "\n",
2686 				i, ol_flags);
2687 			rte_errno = ENOTSUP;
2688 			return i;
2689 		}
2690 
2691 		if (unlikely(m->nb_segs >= tx_ring->sgl_size &&
2692 		    !(tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV &&
2693 		      m->nb_segs == tx_ring->sgl_size &&
2694 		      m->data_len < tx_ring->tx_max_header_size))) {
2695 			PMD_TX_LOG(DEBUG,
2696 				"mbuf[%" PRIu32 "] has too many segments: %" PRIu16 "\n",
2697 				i, m->nb_segs);
2698 			rte_errno = EINVAL;
2699 			return i;
2700 		}
2701 
2702 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
2703 		/* Check if requested offload is also enabled for the queue */
2704 		if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM &&
2705 		     !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) ||
2706 		    (l4_csum_flag == RTE_MBUF_F_TX_TCP_CKSUM &&
2707 		     !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) ||
2708 		    (l4_csum_flag == RTE_MBUF_F_TX_UDP_CKSUM &&
2709 		     !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM))) {
2710 			PMD_TX_LOG(DEBUG,
2711 				"mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]\n",
2712 				i, m->nb_segs, tx_ring->id);
2713 			rte_errno = EINVAL;
2714 			return i;
2715 		}
2716 
2717 		/* The caller is obligated to set l2 and l3 len if any cksum
2718 		 * offload is enabled.
2719 		 */
2720 		if (unlikely(ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK) &&
2721 		    (m->l2_len == 0 || m->l3_len == 0))) {
2722 			PMD_TX_LOG(DEBUG,
2723 				"mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested\n",
2724 				i);
2725 			rte_errno = EINVAL;
2726 			return i;
2727 		}
2728 		ret = rte_validate_tx_offload(m);
2729 		if (ret != 0) {
2730 			rte_errno = -ret;
2731 			return i;
2732 		}
2733 #endif
2734 
2735 		/* Verify HW support for requested offloads and determine if
2736 		 * pseudo header checksum is needed.
2737 		 */
2738 		need_pseudo_csum = false;
2739 		if (ol_flags & RTE_MBUF_F_TX_IPV4) {
2740 			if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM &&
2741 			    !(dev_offload_capa & ENA_L3_IPV4_CSUM)) {
2742 				rte_errno = ENOTSUP;
2743 				return i;
2744 			}
2745 
2746 			if (ol_flags & RTE_MBUF_F_TX_TCP_SEG &&
2747 			    !(dev_offload_capa & ENA_IPV4_TSO)) {
2748 				rte_errno = ENOTSUP;
2749 				return i;
2750 			}
2751 
2752 			/* Check HW capabilities and if pseudo csum is needed
2753 			 * for L4 offloads.
2754 			 */
2755 			if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM &&
2756 			    !(dev_offload_capa & ENA_L4_IPV4_CSUM)) {
2757 				if (dev_offload_capa &
2758 				    ENA_L4_IPV4_CSUM_PARTIAL) {
2759 					need_pseudo_csum = true;
2760 				} else {
2761 					rte_errno = ENOTSUP;
2762 					return i;
2763 				}
2764 			}
2765 
2766 			/* Parse the DF flag */
2767 			ip_hdr = rte_pktmbuf_mtod_offset(m,
2768 				struct rte_ipv4_hdr *, m->l2_len);
2769 			frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset);
2770 			if (frag_field & RTE_IPV4_HDR_DF_FLAG) {
2771 				m->packet_type |= RTE_PTYPE_L4_NONFRAG;
2772 			} else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
2773 				/* In case we are supposed to TSO and have DF
2774 				 * not set (DF=0) hardware must be provided with
2775 				 * partial checksum.
2776 				 */
2777 				need_pseudo_csum = true;
2778 			}
2779 		} else if (ol_flags & RTE_MBUF_F_TX_IPV6) {
2780 			/* There is no support for IPv6 TSO as for now. */
2781 			if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
2782 				rte_errno = ENOTSUP;
2783 				return i;
2784 			}
2785 
2786 			/* Check HW capabilities and if pseudo csum is needed */
2787 			if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM &&
2788 			    !(dev_offload_capa & ENA_L4_IPV6_CSUM)) {
2789 				if (dev_offload_capa &
2790 				    ENA_L4_IPV6_CSUM_PARTIAL) {
2791 					need_pseudo_csum = true;
2792 				} else {
2793 					rte_errno = ENOTSUP;
2794 					return i;
2795 				}
2796 			}
2797 		}
2798 
2799 		if (need_pseudo_csum) {
2800 			ret = rte_net_intel_cksum_flags_prepare(m, ol_flags);
2801 			if (ret != 0) {
2802 				rte_errno = -ret;
2803 				return i;
2804 			}
2805 		}
2806 	}
2807 
2808 	return i;
2809 }
2810 
2811 static void ena_update_hints(struct ena_adapter *adapter,
2812 			     struct ena_admin_ena_hw_hints *hints)
2813 {
2814 	if (hints->admin_completion_tx_timeout)
2815 		adapter->ena_dev.admin_queue.completion_timeout =
2816 			hints->admin_completion_tx_timeout * 1000;
2817 
2818 	if (hints->mmio_read_timeout)
2819 		/* convert to usec */
2820 		adapter->ena_dev.mmio_read.reg_read_to =
2821 			hints->mmio_read_timeout * 1000;
2822 
2823 	if (hints->missing_tx_completion_timeout) {
2824 		if (hints->missing_tx_completion_timeout ==
2825 		    ENA_HW_HINTS_NO_TIMEOUT) {
2826 			adapter->missing_tx_completion_to =
2827 				ENA_HW_HINTS_NO_TIMEOUT;
2828 		} else {
2829 			/* Convert from msecs to ticks */
2830 			adapter->missing_tx_completion_to = rte_get_timer_hz() *
2831 				hints->missing_tx_completion_timeout / 1000;
2832 			adapter->tx_cleanup_stall_delay =
2833 				adapter->missing_tx_completion_to / 2;
2834 		}
2835 	}
2836 
2837 	if (hints->driver_watchdog_timeout) {
2838 		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
2839 			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
2840 		else
2841 			// Convert msecs to ticks
2842 			adapter->keep_alive_timeout =
2843 				(hints->driver_watchdog_timeout *
2844 				rte_get_timer_hz()) / 1000;
2845 	}
2846 }
2847 
2848 static void ena_tx_map_mbuf(struct ena_ring *tx_ring,
2849 	struct ena_tx_buffer *tx_info,
2850 	struct rte_mbuf *mbuf,
2851 	void **push_header,
2852 	uint16_t *header_len)
2853 {
2854 	struct ena_com_buf *ena_buf;
2855 	uint16_t delta, seg_len, push_len;
2856 
2857 	delta = 0;
2858 	seg_len = mbuf->data_len;
2859 
2860 	tx_info->mbuf = mbuf;
2861 	ena_buf = tx_info->bufs;
2862 
2863 	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2864 		/*
2865 		 * Tx header might be (and will be in most cases) smaller than
2866 		 * tx_max_header_size. But it's not an issue to send more data
2867 		 * to the device, than actually needed if the mbuf size is
2868 		 * greater than tx_max_header_size.
2869 		 */
2870 		push_len = RTE_MIN(mbuf->pkt_len, tx_ring->tx_max_header_size);
2871 		*header_len = push_len;
2872 
2873 		if (likely(push_len <= seg_len)) {
2874 			/* If the push header is in the single segment, then
2875 			 * just point it to the 1st mbuf data.
2876 			 */
2877 			*push_header = rte_pktmbuf_mtod(mbuf, uint8_t *);
2878 		} else {
2879 			/* If the push header lays in the several segments, copy
2880 			 * it to the intermediate buffer.
2881 			 */
2882 			rte_pktmbuf_read(mbuf, 0, push_len,
2883 				tx_ring->push_buf_intermediate_buf);
2884 			*push_header = tx_ring->push_buf_intermediate_buf;
2885 			delta = push_len - seg_len;
2886 		}
2887 	} else {
2888 		*push_header = NULL;
2889 		*header_len = 0;
2890 		push_len = 0;
2891 	}
2892 
2893 	/* Process first segment taking into consideration pushed header */
2894 	if (seg_len > push_len) {
2895 		ena_buf->paddr = mbuf->buf_iova +
2896 				mbuf->data_off +
2897 				push_len;
2898 		ena_buf->len = seg_len - push_len;
2899 		ena_buf++;
2900 		tx_info->num_of_bufs++;
2901 	}
2902 
2903 	while ((mbuf = mbuf->next) != NULL) {
2904 		seg_len = mbuf->data_len;
2905 
2906 		/* Skip mbufs if whole data is pushed as a header */
2907 		if (unlikely(delta > seg_len)) {
2908 			delta -= seg_len;
2909 			continue;
2910 		}
2911 
2912 		ena_buf->paddr = mbuf->buf_iova + mbuf->data_off + delta;
2913 		ena_buf->len = seg_len - delta;
2914 		ena_buf++;
2915 		tx_info->num_of_bufs++;
2916 
2917 		delta = 0;
2918 	}
2919 }
2920 
2921 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf)
2922 {
2923 	struct ena_tx_buffer *tx_info;
2924 	struct ena_com_tx_ctx ena_tx_ctx = { { 0 } };
2925 	uint16_t next_to_use;
2926 	uint16_t header_len;
2927 	uint16_t req_id;
2928 	void *push_header;
2929 	int nb_hw_desc;
2930 	int rc;
2931 
2932 	/* Checking for space for 2 additional metadata descriptors due to
2933 	 * possible header split and metadata descriptor
2934 	 */
2935 	if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
2936 					  mbuf->nb_segs + 2)) {
2937 		PMD_DRV_LOG(DEBUG, "Not enough space in the tx queue\n");
2938 		return ENA_COM_NO_MEM;
2939 	}
2940 
2941 	next_to_use = tx_ring->next_to_use;
2942 
2943 	req_id = tx_ring->empty_tx_reqs[next_to_use];
2944 	tx_info = &tx_ring->tx_buffer_info[req_id];
2945 	tx_info->num_of_bufs = 0;
2946 	RTE_ASSERT(tx_info->mbuf == NULL);
2947 
2948 	ena_tx_map_mbuf(tx_ring, tx_info, mbuf, &push_header, &header_len);
2949 
2950 	ena_tx_ctx.ena_bufs = tx_info->bufs;
2951 	ena_tx_ctx.push_header = push_header;
2952 	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
2953 	ena_tx_ctx.req_id = req_id;
2954 	ena_tx_ctx.header_len = header_len;
2955 
2956 	/* Set Tx offloads flags, if applicable */
2957 	ena_tx_mbuf_prepare(mbuf, &ena_tx_ctx, tx_ring->offloads,
2958 		tx_ring->disable_meta_caching);
2959 
2960 	if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq,
2961 			&ena_tx_ctx))) {
2962 		PMD_TX_LOG(DEBUG,
2963 			"LLQ Tx max burst size of queue %d achieved, writing doorbell to send burst\n",
2964 			tx_ring->id);
2965 		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
2966 		tx_ring->tx_stats.doorbells++;
2967 		tx_ring->pkts_without_db = false;
2968 	}
2969 
2970 	/* prepare the packet's descriptors to dma engine */
2971 	rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq,	&ena_tx_ctx,
2972 		&nb_hw_desc);
2973 	if (unlikely(rc)) {
2974 		PMD_DRV_LOG(ERR, "Failed to prepare Tx buffers, rc: %d\n", rc);
2975 		++tx_ring->tx_stats.prepare_ctx_err;
2976 		tx_ring->adapter->reset_reason =
2977 		    ENA_REGS_RESET_DRIVER_INVALID_STATE;
2978 		tx_ring->adapter->trigger_reset = true;
2979 		return rc;
2980 	}
2981 
2982 	tx_info->tx_descs = nb_hw_desc;
2983 	tx_info->timestamp = rte_get_timer_cycles();
2984 
2985 	tx_ring->tx_stats.cnt++;
2986 	tx_ring->tx_stats.bytes += mbuf->pkt_len;
2987 
2988 	tx_ring->next_to_use = ENA_IDX_NEXT_MASKED(next_to_use,
2989 		tx_ring->size_mask);
2990 
2991 	return 0;
2992 }
2993 
2994 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt)
2995 {
2996 	struct ena_ring *tx_ring = (struct ena_ring *)txp;
2997 	unsigned int total_tx_descs = 0;
2998 	unsigned int total_tx_pkts = 0;
2999 	uint16_t cleanup_budget;
3000 	uint16_t next_to_clean = tx_ring->next_to_clean;
3001 
3002 	/*
3003 	 * If free_pkt_cnt is equal to 0, it means that the user requested
3004 	 * full cleanup, so attempt to release all Tx descriptors
3005 	 * (ring_size - 1 -> size_mask)
3006 	 */
3007 	cleanup_budget = (free_pkt_cnt == 0) ? tx_ring->size_mask : free_pkt_cnt;
3008 
3009 	while (likely(total_tx_pkts < cleanup_budget)) {
3010 		struct rte_mbuf *mbuf;
3011 		struct ena_tx_buffer *tx_info;
3012 		uint16_t req_id;
3013 
3014 		if (ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id) != 0)
3015 			break;
3016 
3017 		if (unlikely(validate_tx_req_id(tx_ring, req_id) != 0))
3018 			break;
3019 
3020 		/* Get Tx info & store how many descs were processed  */
3021 		tx_info = &tx_ring->tx_buffer_info[req_id];
3022 		tx_info->timestamp = 0;
3023 
3024 		mbuf = tx_info->mbuf;
3025 		rte_pktmbuf_free(mbuf);
3026 
3027 		tx_info->mbuf = NULL;
3028 		tx_ring->empty_tx_reqs[next_to_clean] = req_id;
3029 
3030 		total_tx_descs += tx_info->tx_descs;
3031 		total_tx_pkts++;
3032 
3033 		/* Put back descriptor to the ring for reuse */
3034 		next_to_clean = ENA_IDX_NEXT_MASKED(next_to_clean,
3035 			tx_ring->size_mask);
3036 	}
3037 
3038 	if (likely(total_tx_descs > 0)) {
3039 		/* acknowledge completion of sent packets */
3040 		tx_ring->next_to_clean = next_to_clean;
3041 		ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs);
3042 		ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
3043 	}
3044 
3045 	/* Notify completion handler that full cleanup was performed */
3046 	if (free_pkt_cnt == 0 || total_tx_pkts < cleanup_budget)
3047 		tx_ring->last_cleanup_ticks = rte_get_timer_cycles();
3048 
3049 	return total_tx_pkts;
3050 }
3051 
3052 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
3053 				  uint16_t nb_pkts)
3054 {
3055 	struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue);
3056 	int available_desc;
3057 	uint16_t sent_idx = 0;
3058 
3059 #ifdef RTE_ETHDEV_DEBUG_TX
3060 	/* Check adapter state */
3061 	if (unlikely(tx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) {
3062 		PMD_TX_LOG(ALERT,
3063 			"Trying to xmit pkts while device is NOT running\n");
3064 		return 0;
3065 	}
3066 #endif
3067 
3068 	available_desc = ena_com_free_q_entries(tx_ring->ena_com_io_sq);
3069 	if (available_desc < tx_ring->tx_free_thresh)
3070 		ena_tx_cleanup((void *)tx_ring, 0);
3071 
3072 	for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) {
3073 		if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx]))
3074 			break;
3075 		tx_ring->pkts_without_db = true;
3076 		rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4,
3077 			tx_ring->size_mask)]);
3078 	}
3079 
3080 	/* If there are ready packets to be xmitted... */
3081 	if (likely(tx_ring->pkts_without_db)) {
3082 		/* ...let HW do its best :-) */
3083 		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
3084 		tx_ring->tx_stats.doorbells++;
3085 		tx_ring->pkts_without_db = false;
3086 	}
3087 
3088 	tx_ring->tx_stats.available_desc =
3089 		ena_com_free_q_entries(tx_ring->ena_com_io_sq);
3090 	tx_ring->tx_stats.tx_poll++;
3091 
3092 	return sent_idx;
3093 }
3094 
3095 int ena_copy_eni_stats(struct ena_adapter *adapter, struct ena_stats_eni *stats)
3096 {
3097 	int rc;
3098 
3099 	rte_spinlock_lock(&adapter->admin_lock);
3100 	/* Retrieve and store the latest statistics from the AQ. This ensures
3101 	 * that previous value is returned in case of a com error.
3102 	 */
3103 	rc = ENA_PROXY(adapter, ena_com_get_eni_stats, &adapter->ena_dev,
3104 		(struct ena_admin_eni_stats *)stats);
3105 	rte_spinlock_unlock(&adapter->admin_lock);
3106 	if (rc != 0) {
3107 		if (rc == ENA_COM_UNSUPPORTED) {
3108 			PMD_DRV_LOG(DEBUG,
3109 				"Retrieving ENI metrics is not supported\n");
3110 		} else {
3111 			PMD_DRV_LOG(WARNING,
3112 				"Failed to get ENI metrics, rc: %d\n", rc);
3113 		}
3114 		return rc;
3115 	}
3116 
3117 	return 0;
3118 }
3119 
3120 /**
3121  * DPDK callback to retrieve names of extended device statistics
3122  *
3123  * @param dev
3124  *   Pointer to Ethernet device structure.
3125  * @param[out] xstats_names
3126  *   Buffer to insert names into.
3127  * @param n
3128  *   Number of names.
3129  *
3130  * @return
3131  *   Number of xstats names.
3132  */
3133 static int ena_xstats_get_names(struct rte_eth_dev *dev,
3134 				struct rte_eth_xstat_name *xstats_names,
3135 				unsigned int n)
3136 {
3137 	unsigned int xstats_count = ena_xstats_calc_num(dev->data);
3138 	unsigned int stat, i, count = 0;
3139 
3140 	if (n < xstats_count || !xstats_names)
3141 		return xstats_count;
3142 
3143 	for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++)
3144 		strcpy(xstats_names[count].name,
3145 			ena_stats_global_strings[stat].name);
3146 
3147 	for (stat = 0; stat < ENA_STATS_ARRAY_ENI; stat++, count++)
3148 		strcpy(xstats_names[count].name,
3149 			ena_stats_eni_strings[stat].name);
3150 
3151 	for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++)
3152 		for (i = 0; i < dev->data->nb_rx_queues; i++, count++)
3153 			snprintf(xstats_names[count].name,
3154 				sizeof(xstats_names[count].name),
3155 				"rx_q%d_%s", i,
3156 				ena_stats_rx_strings[stat].name);
3157 
3158 	for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++)
3159 		for (i = 0; i < dev->data->nb_tx_queues; i++, count++)
3160 			snprintf(xstats_names[count].name,
3161 				sizeof(xstats_names[count].name),
3162 				"tx_q%d_%s", i,
3163 				ena_stats_tx_strings[stat].name);
3164 
3165 	return xstats_count;
3166 }
3167 
3168 /**
3169  * DPDK callback to get extended device statistics.
3170  *
3171  * @param dev
3172  *   Pointer to Ethernet device structure.
3173  * @param[out] stats
3174  *   Stats table output buffer.
3175  * @param n
3176  *   The size of the stats table.
3177  *
3178  * @return
3179  *   Number of xstats on success, negative on failure.
3180  */
3181 static int ena_xstats_get(struct rte_eth_dev *dev,
3182 			  struct rte_eth_xstat *xstats,
3183 			  unsigned int n)
3184 {
3185 	struct ena_adapter *adapter = dev->data->dev_private;
3186 	unsigned int xstats_count = ena_xstats_calc_num(dev->data);
3187 	struct ena_stats_eni eni_stats;
3188 	unsigned int stat, i, count = 0;
3189 	int stat_offset;
3190 	void *stats_begin;
3191 
3192 	if (n < xstats_count)
3193 		return xstats_count;
3194 
3195 	if (!xstats)
3196 		return 0;
3197 
3198 	for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) {
3199 		stat_offset = ena_stats_global_strings[stat].stat_offset;
3200 		stats_begin = &adapter->dev_stats;
3201 
3202 		xstats[count].id = count;
3203 		xstats[count].value = *((uint64_t *)
3204 			((char *)stats_begin + stat_offset));
3205 	}
3206 
3207 	/* Even if the function below fails, we should copy previous (or initial
3208 	 * values) to keep structure of rte_eth_xstat consistent.
3209 	 */
3210 	ena_copy_eni_stats(adapter, &eni_stats);
3211 	for (stat = 0; stat < ENA_STATS_ARRAY_ENI; stat++, count++) {
3212 		stat_offset = ena_stats_eni_strings[stat].stat_offset;
3213 		stats_begin = &eni_stats;
3214 
3215 		xstats[count].id = count;
3216 		xstats[count].value = *((uint64_t *)
3217 		    ((char *)stats_begin + stat_offset));
3218 	}
3219 
3220 	for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) {
3221 		for (i = 0; i < dev->data->nb_rx_queues; i++, count++) {
3222 			stat_offset = ena_stats_rx_strings[stat].stat_offset;
3223 			stats_begin = &adapter->rx_ring[i].rx_stats;
3224 
3225 			xstats[count].id = count;
3226 			xstats[count].value = *((uint64_t *)
3227 				((char *)stats_begin + stat_offset));
3228 		}
3229 	}
3230 
3231 	for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) {
3232 		for (i = 0; i < dev->data->nb_tx_queues; i++, count++) {
3233 			stat_offset = ena_stats_tx_strings[stat].stat_offset;
3234 			stats_begin = &adapter->tx_ring[i].rx_stats;
3235 
3236 			xstats[count].id = count;
3237 			xstats[count].value = *((uint64_t *)
3238 				((char *)stats_begin + stat_offset));
3239 		}
3240 	}
3241 
3242 	return count;
3243 }
3244 
3245 static int ena_xstats_get_by_id(struct rte_eth_dev *dev,
3246 				const uint64_t *ids,
3247 				uint64_t *values,
3248 				unsigned int n)
3249 {
3250 	struct ena_adapter *adapter = dev->data->dev_private;
3251 	struct ena_stats_eni eni_stats;
3252 	uint64_t id;
3253 	uint64_t rx_entries, tx_entries;
3254 	unsigned int i;
3255 	int qid;
3256 	int valid = 0;
3257 	bool was_eni_copied = false;
3258 
3259 	for (i = 0; i < n; ++i) {
3260 		id = ids[i];
3261 		/* Check if id belongs to global statistics */
3262 		if (id < ENA_STATS_ARRAY_GLOBAL) {
3263 			values[i] = *((uint64_t *)&adapter->dev_stats + id);
3264 			++valid;
3265 			continue;
3266 		}
3267 
3268 		/* Check if id belongs to ENI statistics */
3269 		id -= ENA_STATS_ARRAY_GLOBAL;
3270 		if (id < ENA_STATS_ARRAY_ENI) {
3271 			/* Avoid reading ENI stats multiple times in a single
3272 			 * function call, as it requires communication with the
3273 			 * admin queue.
3274 			 */
3275 			if (!was_eni_copied) {
3276 				was_eni_copied = true;
3277 				ena_copy_eni_stats(adapter, &eni_stats);
3278 			}
3279 			values[i] = *((uint64_t *)&eni_stats + id);
3280 			++valid;
3281 			continue;
3282 		}
3283 
3284 		/* Check if id belongs to rx queue statistics */
3285 		id -= ENA_STATS_ARRAY_ENI;
3286 		rx_entries = ENA_STATS_ARRAY_RX * dev->data->nb_rx_queues;
3287 		if (id < rx_entries) {
3288 			qid = id % dev->data->nb_rx_queues;
3289 			id /= dev->data->nb_rx_queues;
3290 			values[i] = *((uint64_t *)
3291 				&adapter->rx_ring[qid].rx_stats + id);
3292 			++valid;
3293 			continue;
3294 		}
3295 				/* Check if id belongs to rx queue statistics */
3296 		id -= rx_entries;
3297 		tx_entries = ENA_STATS_ARRAY_TX * dev->data->nb_tx_queues;
3298 		if (id < tx_entries) {
3299 			qid = id % dev->data->nb_tx_queues;
3300 			id /= dev->data->nb_tx_queues;
3301 			values[i] = *((uint64_t *)
3302 				&adapter->tx_ring[qid].tx_stats + id);
3303 			++valid;
3304 			continue;
3305 		}
3306 	}
3307 
3308 	return valid;
3309 }
3310 
3311 static int ena_process_bool_devarg(const char *key,
3312 				   const char *value,
3313 				   void *opaque)
3314 {
3315 	struct ena_adapter *adapter = opaque;
3316 	bool bool_value;
3317 
3318 	/* Parse the value. */
3319 	if (strcmp(value, "1") == 0) {
3320 		bool_value = true;
3321 	} else if (strcmp(value, "0") == 0) {
3322 		bool_value = false;
3323 	} else {
3324 		PMD_INIT_LOG(ERR,
3325 			"Invalid value: '%s' for key '%s'. Accepted: '0' or '1'\n",
3326 			value, key);
3327 		return -EINVAL;
3328 	}
3329 
3330 	/* Now, assign it to the proper adapter field. */
3331 	if (strcmp(key, ENA_DEVARG_LARGE_LLQ_HDR) == 0)
3332 		adapter->use_large_llq_hdr = bool_value;
3333 
3334 	return 0;
3335 }
3336 
3337 static int ena_parse_devargs(struct ena_adapter *adapter,
3338 			     struct rte_devargs *devargs)
3339 {
3340 	static const char * const allowed_args[] = {
3341 		ENA_DEVARG_LARGE_LLQ_HDR,
3342 		NULL,
3343 	};
3344 	struct rte_kvargs *kvlist;
3345 	int rc;
3346 
3347 	if (devargs == NULL)
3348 		return 0;
3349 
3350 	kvlist = rte_kvargs_parse(devargs->args, allowed_args);
3351 	if (kvlist == NULL) {
3352 		PMD_INIT_LOG(ERR, "Invalid device arguments: %s\n",
3353 			devargs->args);
3354 		return -EINVAL;
3355 	}
3356 
3357 	rc = rte_kvargs_process(kvlist, ENA_DEVARG_LARGE_LLQ_HDR,
3358 		ena_process_bool_devarg, adapter);
3359 
3360 	rte_kvargs_free(kvlist);
3361 
3362 	return rc;
3363 }
3364 
3365 static int ena_setup_rx_intr(struct rte_eth_dev *dev)
3366 {
3367 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
3368 	struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
3369 	int rc;
3370 	uint16_t vectors_nb, i;
3371 	bool rx_intr_requested = dev->data->dev_conf.intr_conf.rxq;
3372 
3373 	if (!rx_intr_requested)
3374 		return 0;
3375 
3376 	if (!rte_intr_cap_multiple(intr_handle)) {
3377 		PMD_DRV_LOG(ERR,
3378 			"Rx interrupt requested, but it isn't supported by the PCI driver\n");
3379 		return -ENOTSUP;
3380 	}
3381 
3382 	/* Disable interrupt mapping before the configuration starts. */
3383 	rte_intr_disable(intr_handle);
3384 
3385 	/* Verify if there are enough vectors available. */
3386 	vectors_nb = dev->data->nb_rx_queues;
3387 	if (vectors_nb > RTE_MAX_RXTX_INTR_VEC_ID) {
3388 		PMD_DRV_LOG(ERR,
3389 			"Too many Rx interrupts requested, maximum number: %d\n",
3390 			RTE_MAX_RXTX_INTR_VEC_ID);
3391 		rc = -ENOTSUP;
3392 		goto enable_intr;
3393 	}
3394 
3395 	/* Allocate the vector list */
3396 	if (rte_intr_vec_list_alloc(intr_handle, "intr_vec",
3397 					   dev->data->nb_rx_queues)) {
3398 		PMD_DRV_LOG(ERR,
3399 			"Failed to allocate interrupt vector for %d queues\n",
3400 			dev->data->nb_rx_queues);
3401 		rc = -ENOMEM;
3402 		goto enable_intr;
3403 	}
3404 
3405 	rc = rte_intr_efd_enable(intr_handle, vectors_nb);
3406 	if (rc != 0)
3407 		goto free_intr_vec;
3408 
3409 	if (!rte_intr_allow_others(intr_handle)) {
3410 		PMD_DRV_LOG(ERR,
3411 			"Not enough interrupts available to use both ENA Admin and Rx interrupts\n");
3412 		goto disable_intr_efd;
3413 	}
3414 
3415 	for (i = 0; i < vectors_nb; ++i)
3416 		if (rte_intr_vec_list_index_set(intr_handle, i,
3417 					   RTE_INTR_VEC_RXTX_OFFSET + i))
3418 			goto disable_intr_efd;
3419 
3420 	rte_intr_enable(intr_handle);
3421 	return 0;
3422 
3423 disable_intr_efd:
3424 	rte_intr_efd_disable(intr_handle);
3425 free_intr_vec:
3426 	rte_intr_vec_list_free(intr_handle);
3427 enable_intr:
3428 	rte_intr_enable(intr_handle);
3429 	return rc;
3430 }
3431 
3432 static void ena_rx_queue_intr_set(struct rte_eth_dev *dev,
3433 				 uint16_t queue_id,
3434 				 bool unmask)
3435 {
3436 	struct ena_adapter *adapter = dev->data->dev_private;
3437 	struct ena_ring *rxq = &adapter->rx_ring[queue_id];
3438 	struct ena_eth_io_intr_reg intr_reg;
3439 
3440 	ena_com_update_intr_reg(&intr_reg, 0, 0, unmask);
3441 	ena_com_unmask_intr(rxq->ena_com_io_cq, &intr_reg);
3442 }
3443 
3444 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev,
3445 				    uint16_t queue_id)
3446 {
3447 	ena_rx_queue_intr_set(dev, queue_id, true);
3448 
3449 	return 0;
3450 }
3451 
3452 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev,
3453 				     uint16_t queue_id)
3454 {
3455 	ena_rx_queue_intr_set(dev, queue_id, false);
3456 
3457 	return 0;
3458 }
3459 
3460 static int ena_configure_aenq(struct ena_adapter *adapter)
3461 {
3462 	uint32_t aenq_groups = adapter->all_aenq_groups;
3463 	int rc;
3464 
3465 	/* All_aenq_groups holds all AENQ functions supported by the device and
3466 	 * the HW, so at first we need to be sure the LSC request is valid.
3467 	 */
3468 	if (adapter->edev_data->dev_conf.intr_conf.lsc != 0) {
3469 		if (!(aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) {
3470 			PMD_DRV_LOG(ERR,
3471 				"LSC requested, but it's not supported by the AENQ\n");
3472 			return -EINVAL;
3473 		}
3474 	} else {
3475 		/* If LSC wasn't enabled by the app, let's enable all supported
3476 		 * AENQ procedures except the LSC.
3477 		 */
3478 		aenq_groups &= ~BIT(ENA_ADMIN_LINK_CHANGE);
3479 	}
3480 
3481 	rc = ena_com_set_aenq_config(&adapter->ena_dev, aenq_groups);
3482 	if (rc != 0) {
3483 		PMD_DRV_LOG(ERR, "Cannot configure AENQ groups, rc=%d\n", rc);
3484 		return rc;
3485 	}
3486 
3487 	adapter->active_aenq_groups = aenq_groups;
3488 
3489 	return 0;
3490 }
3491 
3492 int ena_mp_indirect_table_set(struct ena_adapter *adapter)
3493 {
3494 	return ENA_PROXY(adapter, ena_com_indirect_table_set, &adapter->ena_dev);
3495 }
3496 
3497 int ena_mp_indirect_table_get(struct ena_adapter *adapter,
3498 			      uint32_t *indirect_table)
3499 {
3500 	return ENA_PROXY(adapter, ena_com_indirect_table_get, &adapter->ena_dev,
3501 		indirect_table);
3502 }
3503 
3504 /*********************************************************************
3505  *  ena_plat_dpdk.h functions implementations
3506  *********************************************************************/
3507 
3508 const struct rte_memzone *
3509 ena_mem_alloc_coherent(struct rte_eth_dev_data *data, size_t size,
3510 		       int socket_id, unsigned int alignment, void **virt_addr,
3511 		       dma_addr_t *phys_addr)
3512 {
3513 	char z_name[RTE_MEMZONE_NAMESIZE];
3514 	struct ena_adapter *adapter = data->dev_private;
3515 	const struct rte_memzone *memzone;
3516 	int rc;
3517 
3518 	rc = snprintf(z_name, RTE_MEMZONE_NAMESIZE, "ena_p%d_mz%" PRIu64 "",
3519 		data->port_id, adapter->memzone_cnt);
3520 	if (rc >= RTE_MEMZONE_NAMESIZE) {
3521 		PMD_DRV_LOG(ERR,
3522 			"Name for the ena_com memzone is too long. Port: %d, mz_num: %" PRIu64 "\n",
3523 			data->port_id, adapter->memzone_cnt);
3524 		goto error;
3525 	}
3526 	adapter->memzone_cnt++;
3527 
3528 	memzone = rte_memzone_reserve_aligned(z_name, size, socket_id,
3529 		RTE_MEMZONE_IOVA_CONTIG, alignment);
3530 	if (memzone == NULL) {
3531 		PMD_DRV_LOG(ERR, "Failed to allocate ena_com memzone: %s\n",
3532 			z_name);
3533 		goto error;
3534 	}
3535 
3536 	memset(memzone->addr, 0, size);
3537 	*virt_addr = memzone->addr;
3538 	*phys_addr = memzone->iova;
3539 
3540 	return memzone;
3541 
3542 error:
3543 	*virt_addr = NULL;
3544 	*phys_addr = 0;
3545 
3546 	return NULL;
3547 }
3548 
3549 
3550 /*********************************************************************
3551  *  PMD configuration
3552  *********************************************************************/
3553 static int eth_ena_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
3554 	struct rte_pci_device *pci_dev)
3555 {
3556 	return rte_eth_dev_pci_generic_probe(pci_dev,
3557 		sizeof(struct ena_adapter), eth_ena_dev_init);
3558 }
3559 
3560 static int eth_ena_pci_remove(struct rte_pci_device *pci_dev)
3561 {
3562 	return rte_eth_dev_pci_generic_remove(pci_dev, eth_ena_dev_uninit);
3563 }
3564 
3565 static struct rte_pci_driver rte_ena_pmd = {
3566 	.id_table = pci_id_ena_map,
3567 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
3568 		     RTE_PCI_DRV_WC_ACTIVATE,
3569 	.probe = eth_ena_pci_probe,
3570 	.remove = eth_ena_pci_remove,
3571 };
3572 
3573 RTE_PMD_REGISTER_PCI(net_ena, rte_ena_pmd);
3574 RTE_PMD_REGISTER_PCI_TABLE(net_ena, pci_id_ena_map);
3575 RTE_PMD_REGISTER_KMOD_DEP(net_ena, "* igb_uio | uio_pci_generic | vfio-pci");
3576 RTE_PMD_REGISTER_PARAM_STRING(net_ena, ENA_DEVARG_LARGE_LLQ_HDR "=<0|1>");
3577 RTE_LOG_REGISTER_SUFFIX(ena_logtype_init, init, NOTICE);
3578 RTE_LOG_REGISTER_SUFFIX(ena_logtype_driver, driver, NOTICE);
3579 #ifdef RTE_ETHDEV_DEBUG_RX
3580 RTE_LOG_REGISTER_SUFFIX(ena_logtype_rx, rx, DEBUG);
3581 #endif
3582 #ifdef RTE_ETHDEV_DEBUG_TX
3583 RTE_LOG_REGISTER_SUFFIX(ena_logtype_tx, tx, DEBUG);
3584 #endif
3585 RTE_LOG_REGISTER_SUFFIX(ena_logtype_com, com, WARNING);
3586 
3587 /******************************************************************************
3588  ******************************** AENQ Handlers *******************************
3589  *****************************************************************************/
3590 static void ena_update_on_link_change(void *adapter_data,
3591 				      struct ena_admin_aenq_entry *aenq_e)
3592 {
3593 	struct rte_eth_dev *eth_dev = adapter_data;
3594 	struct ena_adapter *adapter = eth_dev->data->dev_private;
3595 	struct ena_admin_aenq_link_change_desc *aenq_link_desc;
3596 	uint32_t status;
3597 
3598 	aenq_link_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e;
3599 
3600 	status = get_ena_admin_aenq_link_change_desc_link_status(aenq_link_desc);
3601 	adapter->link_status = status;
3602 
3603 	ena_link_update(eth_dev, 0);
3604 	rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
3605 }
3606 
3607 static void ena_notification(void *adapter_data,
3608 			     struct ena_admin_aenq_entry *aenq_e)
3609 {
3610 	struct rte_eth_dev *eth_dev = adapter_data;
3611 	struct ena_adapter *adapter = eth_dev->data->dev_private;
3612 	struct ena_admin_ena_hw_hints *hints;
3613 
3614 	if (aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION)
3615 		PMD_DRV_LOG(WARNING, "Invalid AENQ group: %x. Expected: %x\n",
3616 			aenq_e->aenq_common_desc.group,
3617 			ENA_ADMIN_NOTIFICATION);
3618 
3619 	switch (aenq_e->aenq_common_desc.syndrome) {
3620 	case ENA_ADMIN_UPDATE_HINTS:
3621 		hints = (struct ena_admin_ena_hw_hints *)
3622 			(&aenq_e->inline_data_w4);
3623 		ena_update_hints(adapter, hints);
3624 		break;
3625 	default:
3626 		PMD_DRV_LOG(ERR, "Invalid AENQ notification link state: %d\n",
3627 			aenq_e->aenq_common_desc.syndrome);
3628 	}
3629 }
3630 
3631 static void ena_keep_alive(void *adapter_data,
3632 			   __rte_unused struct ena_admin_aenq_entry *aenq_e)
3633 {
3634 	struct rte_eth_dev *eth_dev = adapter_data;
3635 	struct ena_adapter *adapter = eth_dev->data->dev_private;
3636 	struct ena_admin_aenq_keep_alive_desc *desc;
3637 	uint64_t rx_drops;
3638 	uint64_t tx_drops;
3639 
3640 	adapter->timestamp_wd = rte_get_timer_cycles();
3641 
3642 	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
3643 	rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low;
3644 	tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low;
3645 
3646 	adapter->drv_stats->rx_drops = rx_drops;
3647 	adapter->dev_stats.tx_drops = tx_drops;
3648 }
3649 
3650 /**
3651  * This handler will called for unknown event group or unimplemented handlers
3652  **/
3653 static void unimplemented_aenq_handler(__rte_unused void *data,
3654 				       __rte_unused struct ena_admin_aenq_entry *aenq_e)
3655 {
3656 	PMD_DRV_LOG(ERR,
3657 		"Unknown event was received or event with unimplemented handler\n");
3658 }
3659 
3660 static struct ena_aenq_handlers aenq_handlers = {
3661 	.handlers = {
3662 		[ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
3663 		[ENA_ADMIN_NOTIFICATION] = ena_notification,
3664 		[ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive
3665 	},
3666 	.unimplemented_handler = unimplemented_aenq_handler
3667 };
3668 
3669 /*********************************************************************
3670  *  Multi-Process communication request handling (in primary)
3671  *********************************************************************/
3672 static int
3673 ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
3674 {
3675 	const struct ena_mp_body *req =
3676 		(const struct ena_mp_body *)mp_msg->param;
3677 	struct ena_adapter *adapter;
3678 	struct ena_com_dev *ena_dev;
3679 	struct ena_mp_body *rsp;
3680 	struct rte_mp_msg mp_rsp;
3681 	struct rte_eth_dev *dev;
3682 	int res = 0;
3683 
3684 	rsp = (struct ena_mp_body *)&mp_rsp.param;
3685 	mp_msg_init(&mp_rsp, req->type, req->port_id);
3686 
3687 	if (!rte_eth_dev_is_valid_port(req->port_id)) {
3688 		rte_errno = ENODEV;
3689 		res = -rte_errno;
3690 		PMD_DRV_LOG(ERR, "Unknown port %d in request %d\n",
3691 			    req->port_id, req->type);
3692 		goto end;
3693 	}
3694 	dev = &rte_eth_devices[req->port_id];
3695 	adapter = dev->data->dev_private;
3696 	ena_dev = &adapter->ena_dev;
3697 
3698 	switch (req->type) {
3699 	case ENA_MP_DEV_STATS_GET:
3700 		res = ena_com_get_dev_basic_stats(ena_dev,
3701 						  &adapter->basic_stats);
3702 		break;
3703 	case ENA_MP_ENI_STATS_GET:
3704 		res = ena_com_get_eni_stats(ena_dev,
3705 			(struct ena_admin_eni_stats *)&adapter->eni_stats);
3706 		break;
3707 	case ENA_MP_MTU_SET:
3708 		res = ena_com_set_dev_mtu(ena_dev, req->args.mtu);
3709 		break;
3710 	case ENA_MP_IND_TBL_GET:
3711 		res = ena_com_indirect_table_get(ena_dev,
3712 						 adapter->indirect_table);
3713 		break;
3714 	case ENA_MP_IND_TBL_SET:
3715 		res = ena_com_indirect_table_set(ena_dev);
3716 		break;
3717 	default:
3718 		PMD_DRV_LOG(ERR, "Unknown request type %d\n", req->type);
3719 		res = -EINVAL;
3720 		break;
3721 	}
3722 
3723 end:
3724 	/* Save processing result in the reply */
3725 	rsp->result = res;
3726 	/* Return just IPC processing status */
3727 	return rte_mp_reply(&mp_rsp, peer);
3728 }
3729