1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 3 * All rights reserved. 4 */ 5 6 #include <rte_alarm.h> 7 #include <rte_string_fns.h> 8 #include <rte_errno.h> 9 #include <rte_version.h> 10 #include <rte_net.h> 11 #include <rte_kvargs.h> 12 13 #include "ena_ethdev.h" 14 #include "ena_logs.h" 15 #include "ena_platform.h" 16 #include "ena_com.h" 17 #include "ena_eth_com.h" 18 19 #include <ena_common_defs.h> 20 #include <ena_regs_defs.h> 21 #include <ena_admin_defs.h> 22 #include <ena_eth_io_defs.h> 23 24 #define DRV_MODULE_VER_MAJOR 2 25 #define DRV_MODULE_VER_MINOR 11 26 #define DRV_MODULE_VER_SUBMINOR 0 27 28 #define __MERGE_64B_H_L(h, l) (((uint64_t)h << 32) | l) 29 30 #define GET_L4_HDR_LEN(mbuf) \ 31 ((rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, \ 32 mbuf->l3_len + mbuf->l2_len)->data_off) >> 4) 33 34 #define ETH_GSTRING_LEN 32 35 36 #define ARRAY_SIZE(x) RTE_DIM(x) 37 38 #define ENA_MIN_RING_DESC 128 39 40 #define USEC_PER_MSEC 1000UL 41 42 #define BITS_PER_BYTE 8 43 44 #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) 45 46 #define DECIMAL_BASE 10 47 48 #define MAX_WIDE_LLQ_DEPTH_UNSUPPORTED 0 49 50 /* 51 * We should try to keep ENA_CLEANUP_BUF_THRESH lower than 52 * RTE_MEMPOOL_CACHE_MAX_SIZE, so we can fit this in mempool local cache. 53 */ 54 #define ENA_CLEANUP_BUF_THRESH 256 55 56 struct ena_stats { 57 char name[ETH_GSTRING_LEN]; 58 int stat_offset; 59 }; 60 61 #define ENA_STAT_ENTRY(stat, stat_type) { \ 62 .name = #stat, \ 63 .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ 64 } 65 66 #define ENA_STAT_RX_ENTRY(stat) \ 67 ENA_STAT_ENTRY(stat, rx) 68 69 #define ENA_STAT_TX_ENTRY(stat) \ 70 ENA_STAT_ENTRY(stat, tx) 71 72 #define ENA_STAT_METRICS_ENTRY(stat) \ 73 ENA_STAT_ENTRY(stat, metrics) 74 75 #define ENA_STAT_GLOBAL_ENTRY(stat) \ 76 ENA_STAT_ENTRY(stat, dev) 77 78 #define ENA_STAT_ENA_SRD_ENTRY(stat) \ 79 ENA_STAT_ENTRY(stat, srd) 80 81 /* Device arguments */ 82 83 /* llq_policy Controls whether to disable LLQ, use device recommended 84 * header policy or overriding the device recommendation. 85 * 0 - Disable LLQ. Use with extreme caution as it leads to a huge 86 * performance degradation on AWS instances built with Nitro v4 onwards. 87 * 1 - Accept device recommended LLQ policy (Default). 88 * Device can recommend normal or large LLQ policy. 89 * 2 - Enforce normal LLQ policy. 90 * 3 - Enforce large LLQ policy. 91 * Required for packets with header that exceed 96 bytes on 92 * AWS instances built with Nitro v2 and Nitro v1. 93 */ 94 #define ENA_DEVARG_LLQ_POLICY "llq_policy" 95 96 /* Timeout in seconds after which a single uncompleted Tx packet should be 97 * considered as a missing. 98 */ 99 #define ENA_DEVARG_MISS_TXC_TO "miss_txc_to" 100 101 /* 102 * Controls the period of time (in milliseconds) between two consecutive inspections of 103 * the control queues when the driver is in poll mode and not using interrupts. 104 * By default, this value is zero, indicating that the driver will not be in poll mode and will 105 * use interrupts. A non-zero value for this argument is mandatory when using uio_pci_generic 106 * driver. 107 */ 108 #define ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL "control_path_poll_interval" 109 110 /* 111 * Each rte_memzone should have unique name. 112 * To satisfy it, count number of allocation and add it to name. 113 */ 114 rte_atomic64_t ena_alloc_cnt; 115 116 static const struct ena_stats ena_stats_global_strings[] = { 117 ENA_STAT_GLOBAL_ENTRY(wd_expired), 118 ENA_STAT_GLOBAL_ENTRY(dev_start), 119 ENA_STAT_GLOBAL_ENTRY(dev_stop), 120 ENA_STAT_GLOBAL_ENTRY(tx_drops), 121 }; 122 123 /* 124 * The legacy metrics (also known as eni stats) consisted of 5 stats, while the reworked 125 * metrics (also known as customer metrics) support an additional stat. 126 */ 127 static struct ena_stats ena_stats_metrics_strings[] = { 128 ENA_STAT_METRICS_ENTRY(bw_in_allowance_exceeded), 129 ENA_STAT_METRICS_ENTRY(bw_out_allowance_exceeded), 130 ENA_STAT_METRICS_ENTRY(pps_allowance_exceeded), 131 ENA_STAT_METRICS_ENTRY(conntrack_allowance_exceeded), 132 ENA_STAT_METRICS_ENTRY(linklocal_allowance_exceeded), 133 ENA_STAT_METRICS_ENTRY(conntrack_allowance_available), 134 }; 135 136 static const struct ena_stats ena_stats_srd_strings[] = { 137 ENA_STAT_ENA_SRD_ENTRY(ena_srd_mode), 138 ENA_STAT_ENA_SRD_ENTRY(ena_srd_tx_pkts), 139 ENA_STAT_ENA_SRD_ENTRY(ena_srd_eligible_tx_pkts), 140 ENA_STAT_ENA_SRD_ENTRY(ena_srd_rx_pkts), 141 ENA_STAT_ENA_SRD_ENTRY(ena_srd_resource_utilization), 142 }; 143 144 static const struct ena_stats ena_stats_tx_strings[] = { 145 ENA_STAT_TX_ENTRY(cnt), 146 ENA_STAT_TX_ENTRY(bytes), 147 ENA_STAT_TX_ENTRY(prepare_ctx_err), 148 ENA_STAT_TX_ENTRY(tx_poll), 149 ENA_STAT_TX_ENTRY(doorbells), 150 ENA_STAT_TX_ENTRY(bad_req_id), 151 ENA_STAT_TX_ENTRY(available_desc), 152 ENA_STAT_TX_ENTRY(missed_tx), 153 }; 154 155 static const struct ena_stats ena_stats_rx_strings[] = { 156 ENA_STAT_RX_ENTRY(cnt), 157 ENA_STAT_RX_ENTRY(bytes), 158 ENA_STAT_RX_ENTRY(refill_partial), 159 ENA_STAT_RX_ENTRY(l3_csum_bad), 160 ENA_STAT_RX_ENTRY(l4_csum_bad), 161 ENA_STAT_RX_ENTRY(l4_csum_good), 162 ENA_STAT_RX_ENTRY(mbuf_alloc_fail), 163 ENA_STAT_RX_ENTRY(bad_desc_num), 164 ENA_STAT_RX_ENTRY(bad_req_id), 165 ENA_STAT_RX_ENTRY(bad_desc), 166 ENA_STAT_RX_ENTRY(unknown_error), 167 }; 168 169 #define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) 170 #define ENA_STATS_ARRAY_METRICS ARRAY_SIZE(ena_stats_metrics_strings) 171 #define ENA_STATS_ARRAY_METRICS_LEGACY (ENA_STATS_ARRAY_METRICS - 1) 172 #define ENA_STATS_ARRAY_ENA_SRD ARRAY_SIZE(ena_stats_srd_strings) 173 #define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) 174 #define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) 175 176 #define QUEUE_OFFLOADS (RTE_ETH_TX_OFFLOAD_TCP_CKSUM |\ 177 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |\ 178 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |\ 179 RTE_ETH_TX_OFFLOAD_TCP_TSO) 180 #define MBUF_OFFLOADS (RTE_MBUF_F_TX_L4_MASK |\ 181 RTE_MBUF_F_TX_IP_CKSUM |\ 182 RTE_MBUF_F_TX_TCP_SEG) 183 184 /** Vendor ID used by Amazon devices */ 185 #define PCI_VENDOR_ID_AMAZON 0x1D0F 186 /** Amazon devices */ 187 #define PCI_DEVICE_ID_ENA_VF 0xEC20 188 #define PCI_DEVICE_ID_ENA_VF_RSERV0 0xEC21 189 190 #define ENA_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_L4_MASK | \ 191 RTE_MBUF_F_TX_IPV6 | \ 192 RTE_MBUF_F_TX_IPV4 | \ 193 RTE_MBUF_F_TX_IP_CKSUM | \ 194 RTE_MBUF_F_TX_TCP_SEG) 195 196 #define ENA_TX_OFFLOAD_NOTSUP_MASK \ 197 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) 198 199 /** HW specific offloads capabilities. */ 200 /* IPv4 checksum offload. */ 201 #define ENA_L3_IPV4_CSUM 0x0001 202 /* TCP/UDP checksum offload for IPv4 packets. */ 203 #define ENA_L4_IPV4_CSUM 0x0002 204 /* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */ 205 #define ENA_L4_IPV4_CSUM_PARTIAL 0x0004 206 /* TCP/UDP checksum offload for IPv6 packets. */ 207 #define ENA_L4_IPV6_CSUM 0x0008 208 /* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */ 209 #define ENA_L4_IPV6_CSUM_PARTIAL 0x0010 210 /* TSO support for IPv4 packets. */ 211 #define ENA_IPV4_TSO 0x0020 212 213 /* Device supports setting RSS hash. */ 214 #define ENA_RX_RSS_HASH 0x0040 215 216 static const struct rte_pci_id pci_id_ena_map[] = { 217 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, 218 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) }, 219 { .device_id = 0 }, 220 }; 221 222 static struct ena_aenq_handlers aenq_handlers; 223 224 static int ena_device_init(struct ena_adapter *adapter, 225 struct rte_pci_device *pdev, 226 struct ena_com_dev_get_features_ctx *get_feat_ctx); 227 static int ena_dev_configure(struct rte_eth_dev *dev); 228 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 229 struct ena_tx_buffer *tx_info, 230 struct rte_mbuf *mbuf, 231 void **push_header, 232 uint16_t *header_len); 233 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf); 234 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt); 235 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 236 uint16_t nb_pkts); 237 static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 238 uint16_t nb_pkts); 239 static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 240 uint16_t nb_desc, unsigned int socket_id, 241 const struct rte_eth_txconf *tx_conf); 242 static int ena_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 243 uint16_t nb_desc, unsigned int socket_id, 244 const struct rte_eth_rxconf *rx_conf, 245 struct rte_mempool *mp); 246 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len); 247 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 248 struct ena_com_rx_buf_info *ena_bufs, 249 uint32_t descs, 250 uint16_t *next_to_clean, 251 uint8_t offset); 252 static uint16_t eth_ena_recv_pkts(void *rx_queue, 253 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 254 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 255 struct rte_mbuf *mbuf, uint16_t id); 256 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count); 257 static void ena_init_rings(struct ena_adapter *adapter, 258 bool disable_meta_caching); 259 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); 260 static int ena_start(struct rte_eth_dev *dev); 261 static int ena_stop(struct rte_eth_dev *dev); 262 static int ena_close(struct rte_eth_dev *dev); 263 static int ena_dev_reset(struct rte_eth_dev *dev); 264 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); 265 static void ena_rx_queue_release_all(struct rte_eth_dev *dev); 266 static void ena_tx_queue_release_all(struct rte_eth_dev *dev); 267 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 268 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 269 static void ena_rx_queue_release_bufs(struct ena_ring *ring); 270 static void ena_tx_queue_release_bufs(struct ena_ring *ring); 271 static int ena_link_update(struct rte_eth_dev *dev, 272 int wait_to_complete); 273 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring); 274 static void ena_queue_stop(struct ena_ring *ring); 275 static void ena_queue_stop_all(struct rte_eth_dev *dev, 276 enum ena_ring_type ring_type); 277 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring); 278 static int ena_queue_start_all(struct rte_eth_dev *dev, 279 enum ena_ring_type ring_type); 280 static void ena_stats_restart(struct rte_eth_dev *dev); 281 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter); 282 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter); 283 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter); 284 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter); 285 static int ena_infos_get(struct rte_eth_dev *dev, 286 struct rte_eth_dev_info *dev_info); 287 static void ena_control_path_handler(void *cb_arg); 288 static void ena_control_path_poll_handler(void *cb_arg); 289 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg); 290 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev); 291 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev); 292 static int ena_xstats_get_names(struct rte_eth_dev *dev, 293 struct rte_eth_xstat_name *xstats_names, 294 unsigned int n); 295 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 296 const uint64_t *ids, 297 struct rte_eth_xstat_name *xstats_names, 298 unsigned int size); 299 static int ena_xstats_get(struct rte_eth_dev *dev, 300 struct rte_eth_xstat *stats, 301 unsigned int n); 302 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 303 const uint64_t *ids, 304 uint64_t *values, 305 unsigned int n); 306 static int ena_process_llq_policy_devarg(const char *key, 307 const char *value, 308 void *opaque); 309 static int ena_parse_devargs(struct ena_adapter *adapter, 310 struct rte_devargs *devargs); 311 static void ena_copy_customer_metrics(struct ena_adapter *adapter, 312 uint64_t *buf, 313 size_t buf_size); 314 static void ena_copy_ena_srd_info(struct ena_adapter *adapter, 315 struct ena_stats_srd *srd_info); 316 static int ena_setup_rx_intr(struct rte_eth_dev *dev); 317 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 318 uint16_t queue_id); 319 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 320 uint16_t queue_id); 321 static int ena_configure_aenq(struct ena_adapter *adapter); 322 static int ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, 323 const void *peer); 324 static bool ena_use_large_llq_hdr(struct ena_adapter *adapter, uint8_t recommended_entry_size); 325 326 static const struct eth_dev_ops ena_dev_ops = { 327 .dev_configure = ena_dev_configure, 328 .dev_infos_get = ena_infos_get, 329 .rx_queue_setup = ena_rx_queue_setup, 330 .tx_queue_setup = ena_tx_queue_setup, 331 .dev_start = ena_start, 332 .dev_stop = ena_stop, 333 .link_update = ena_link_update, 334 .stats_get = ena_stats_get, 335 .xstats_get_names = ena_xstats_get_names, 336 .xstats_get_names_by_id = ena_xstats_get_names_by_id, 337 .xstats_get = ena_xstats_get, 338 .xstats_get_by_id = ena_xstats_get_by_id, 339 .mtu_set = ena_mtu_set, 340 .rx_queue_release = ena_rx_queue_release, 341 .tx_queue_release = ena_tx_queue_release, 342 .dev_close = ena_close, 343 .dev_reset = ena_dev_reset, 344 .reta_update = ena_rss_reta_update, 345 .reta_query = ena_rss_reta_query, 346 .rx_queue_intr_enable = ena_rx_queue_intr_enable, 347 .rx_queue_intr_disable = ena_rx_queue_intr_disable, 348 .rss_hash_update = ena_rss_hash_update, 349 .rss_hash_conf_get = ena_rss_hash_conf_get, 350 .tx_done_cleanup = ena_tx_cleanup, 351 }; 352 353 /********************************************************************* 354 * Multi-Process communication bits 355 *********************************************************************/ 356 /* rte_mp IPC message name */ 357 #define ENA_MP_NAME "net_ena_mp" 358 /* Request timeout in seconds */ 359 #define ENA_MP_REQ_TMO 5 360 361 /** Proxy request type */ 362 enum ena_mp_req { 363 ENA_MP_DEV_STATS_GET, 364 ENA_MP_ENI_STATS_GET, 365 ENA_MP_MTU_SET, 366 ENA_MP_IND_TBL_GET, 367 ENA_MP_IND_TBL_SET, 368 ENA_MP_CUSTOMER_METRICS_GET, 369 ENA_MP_SRD_STATS_GET, 370 }; 371 372 /** Proxy message body. Shared between requests and responses. */ 373 struct ena_mp_body { 374 /* Message type */ 375 enum ena_mp_req type; 376 int port_id; 377 /* Processing result. Set in replies. 0 if message succeeded, negative 378 * error code otherwise. 379 */ 380 int result; 381 union { 382 int mtu; /* For ENA_MP_MTU_SET */ 383 } args; 384 }; 385 386 /** 387 * Initialize IPC message. 388 * 389 * @param[out] msg 390 * Pointer to the message to initialize. 391 * @param[in] type 392 * Message type. 393 * @param[in] port_id 394 * Port ID of target device. 395 * 396 */ 397 static void 398 mp_msg_init(struct rte_mp_msg *msg, enum ena_mp_req type, int port_id) 399 { 400 struct ena_mp_body *body = (struct ena_mp_body *)&msg->param; 401 402 memset(msg, 0, sizeof(*msg)); 403 strlcpy(msg->name, ENA_MP_NAME, sizeof(msg->name)); 404 msg->len_param = sizeof(*body); 405 body->type = type; 406 body->port_id = port_id; 407 } 408 409 /********************************************************************* 410 * Multi-Process communication PMD API 411 *********************************************************************/ 412 /** 413 * Define proxy request descriptor 414 * 415 * Used to define all structures and functions required for proxying a given 416 * function to the primary process including the code to perform to prepare the 417 * request and process the response. 418 * 419 * @param[in] f 420 * Name of the function to proxy 421 * @param[in] t 422 * Message type to use 423 * @param[in] prep 424 * Body of a function to prepare the request in form of a statement 425 * expression. It is passed all the original function arguments along with two 426 * extra ones: 427 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 428 * - struct ena_mp_body *req - body of a request to prepare. 429 * @param[in] proc 430 * Body of a function to process the response in form of a statement 431 * expression. It is passed all the original function arguments along with two 432 * extra ones: 433 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 434 * - struct ena_mp_body *rsp - body of a response to process. 435 * @param ... 436 * Proxied function's arguments 437 * 438 * @note Inside prep and proc any parameters which aren't used should be marked 439 * as such (with ENA_TOUCH or __rte_unused). 440 */ 441 #define ENA_PROXY_DESC(f, t, prep, proc, ...) \ 442 static const enum ena_mp_req mp_type_ ## f = t; \ 443 static const char *mp_name_ ## f = #t; \ 444 static void mp_prep_ ## f(struct ena_adapter *adapter, \ 445 struct ena_mp_body *req, \ 446 __VA_ARGS__) \ 447 { \ 448 prep; \ 449 } \ 450 static void mp_proc_ ## f(struct ena_adapter *adapter, \ 451 struct ena_mp_body *rsp, \ 452 __VA_ARGS__) \ 453 { \ 454 proc; \ 455 } 456 457 /** 458 * Proxy wrapper for calling primary functions in a secondary process. 459 * 460 * Depending on whether called in primary or secondary process, calls the 461 * @p func directly or proxies the call to the primary process via rte_mp IPC. 462 * This macro requires a proxy request descriptor to be defined for @p func 463 * using ENA_PROXY_DESC() macro. 464 * 465 * @param[in/out] a 466 * Device PMD data. Used for sending the message and sharing message results 467 * between primary and secondary. 468 * @param[in] f 469 * Function to proxy. 470 * @param ... 471 * Arguments of @p func. 472 * 473 * @return 474 * - 0: Processing succeeded and response handler was called. 475 * - -EPERM: IPC is unavailable on this platform. This means only primary 476 * process may call the proxied function. 477 * - -EIO: IPC returned error on request send. Inspect rte_errno detailed 478 * error code. 479 * - Negative error code from the proxied function. 480 * 481 * @note This mechanism is geared towards control-path tasks. Avoid calling it 482 * in fast-path unless unbound delays are allowed. This is due to the IPC 483 * mechanism itself (socket based). 484 * @note Due to IPC parameter size limitations the proxy logic shares call 485 * results through the struct ena_adapter shared memory. This makes the 486 * proxy mechanism strictly single-threaded. Therefore be sure to make all 487 * calls to the same proxied function under the same lock. 488 */ 489 #define ENA_PROXY(a, f, ...) \ 490 __extension__ ({ \ 491 struct ena_adapter *_a = (a); \ 492 struct timespec ts = { .tv_sec = ENA_MP_REQ_TMO }; \ 493 struct ena_mp_body *req, *rsp; \ 494 struct rte_mp_reply mp_rep; \ 495 struct rte_mp_msg mp_req; \ 496 int ret; \ 497 \ 498 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { \ 499 ret = f(__VA_ARGS__); \ 500 } else { \ 501 /* Prepare and send request */ \ 502 req = (struct ena_mp_body *)&mp_req.param; \ 503 mp_msg_init(&mp_req, mp_type_ ## f, _a->edev_data->port_id); \ 504 mp_prep_ ## f(_a, req, ## __VA_ARGS__); \ 505 \ 506 ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); \ 507 if (likely(!ret)) { \ 508 RTE_ASSERT(mp_rep.nb_received == 1); \ 509 rsp = (struct ena_mp_body *)&mp_rep.msgs[0].param; \ 510 ret = rsp->result; \ 511 if (ret == 0) { \ 512 mp_proc_##f(_a, rsp, ## __VA_ARGS__); \ 513 } else { \ 514 PMD_DRV_LOG_LINE(ERR, \ 515 "%s returned error: %d", \ 516 mp_name_ ## f, rsp->result);\ 517 } \ 518 free(mp_rep.msgs); \ 519 } else if (rte_errno == ENOTSUP) { \ 520 PMD_DRV_LOG_LINE(ERR, \ 521 "No IPC, can't proxy to primary");\ 522 ret = -rte_errno; \ 523 } else { \ 524 PMD_DRV_LOG_LINE(ERR, "Request %s failed: %s", \ 525 mp_name_ ## f, \ 526 rte_strerror(rte_errno)); \ 527 ret = -EIO; \ 528 } \ 529 } \ 530 ret; \ 531 }) 532 533 /********************************************************************* 534 * Multi-Process communication request descriptors 535 *********************************************************************/ 536 537 ENA_PROXY_DESC(ena_com_get_dev_basic_stats, ENA_MP_DEV_STATS_GET, 538 __extension__ ({ 539 ENA_TOUCH(adapter); 540 ENA_TOUCH(req); 541 ENA_TOUCH(ena_dev); 542 ENA_TOUCH(stats); 543 }), 544 __extension__ ({ 545 ENA_TOUCH(rsp); 546 ENA_TOUCH(ena_dev); 547 if (stats != &adapter->basic_stats) 548 rte_memcpy(stats, &adapter->basic_stats, sizeof(*stats)); 549 }), 550 struct ena_com_dev *ena_dev, struct ena_admin_basic_stats *stats); 551 552 ENA_PROXY_DESC(ena_com_get_eni_stats, ENA_MP_ENI_STATS_GET, 553 __extension__ ({ 554 ENA_TOUCH(adapter); 555 ENA_TOUCH(req); 556 ENA_TOUCH(ena_dev); 557 ENA_TOUCH(stats); 558 }), 559 __extension__ ({ 560 ENA_TOUCH(rsp); 561 ENA_TOUCH(ena_dev); 562 if (stats != (struct ena_admin_eni_stats *)adapter->metrics_stats) 563 rte_memcpy(stats, adapter->metrics_stats, sizeof(*stats)); 564 }), 565 struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats); 566 567 ENA_PROXY_DESC(ena_com_set_dev_mtu, ENA_MP_MTU_SET, 568 __extension__ ({ 569 ENA_TOUCH(adapter); 570 ENA_TOUCH(ena_dev); 571 req->args.mtu = mtu; 572 }), 573 __extension__ ({ 574 ENA_TOUCH(adapter); 575 ENA_TOUCH(rsp); 576 ENA_TOUCH(ena_dev); 577 ENA_TOUCH(mtu); 578 }), 579 struct ena_com_dev *ena_dev, int mtu); 580 581 ENA_PROXY_DESC(ena_com_indirect_table_set, ENA_MP_IND_TBL_SET, 582 __extension__ ({ 583 ENA_TOUCH(adapter); 584 ENA_TOUCH(req); 585 ENA_TOUCH(ena_dev); 586 }), 587 __extension__ ({ 588 ENA_TOUCH(adapter); 589 ENA_TOUCH(rsp); 590 ENA_TOUCH(ena_dev); 591 }), 592 struct ena_com_dev *ena_dev); 593 594 ENA_PROXY_DESC(ena_com_indirect_table_get, ENA_MP_IND_TBL_GET, 595 __extension__ ({ 596 ENA_TOUCH(adapter); 597 ENA_TOUCH(req); 598 ENA_TOUCH(ena_dev); 599 ENA_TOUCH(ind_tbl); 600 }), 601 __extension__ ({ 602 ENA_TOUCH(rsp); 603 ENA_TOUCH(ena_dev); 604 if (ind_tbl != adapter->indirect_table) 605 rte_memcpy(ind_tbl, adapter->indirect_table, 606 sizeof(adapter->indirect_table)); 607 }), 608 struct ena_com_dev *ena_dev, u32 *ind_tbl); 609 610 ENA_PROXY_DESC(ena_com_get_customer_metrics, ENA_MP_CUSTOMER_METRICS_GET, 611 __extension__ ({ 612 ENA_TOUCH(adapter); 613 ENA_TOUCH(req); 614 ENA_TOUCH(ena_dev); 615 ENA_TOUCH(buf); 616 ENA_TOUCH(buf_size); 617 }), 618 __extension__ ({ 619 ENA_TOUCH(rsp); 620 ENA_TOUCH(ena_dev); 621 if (buf != (char *)adapter->metrics_stats) 622 rte_memcpy(buf, adapter->metrics_stats, buf_size); 623 }), 624 struct ena_com_dev *ena_dev, char *buf, size_t buf_size); 625 626 ENA_PROXY_DESC(ena_com_get_ena_srd_info, ENA_MP_SRD_STATS_GET, 627 __extension__ ({ 628 ENA_TOUCH(adapter); 629 ENA_TOUCH(req); 630 ENA_TOUCH(ena_dev); 631 ENA_TOUCH(info); 632 }), 633 __extension__ ({ 634 ENA_TOUCH(rsp); 635 ENA_TOUCH(ena_dev); 636 if ((struct ena_stats_srd *)info != &adapter->srd_stats) 637 rte_memcpy((struct ena_stats_srd *)info, 638 &adapter->srd_stats, 639 sizeof(struct ena_stats_srd)); 640 }), 641 struct ena_com_dev *ena_dev, struct ena_admin_ena_srd_info *info); 642 643 static inline void ena_trigger_reset(struct ena_adapter *adapter, 644 enum ena_regs_reset_reason_types reason) 645 { 646 if (likely(!adapter->trigger_reset)) { 647 adapter->reset_reason = reason; 648 adapter->trigger_reset = true; 649 } 650 } 651 652 static inline void ena_rx_mbuf_prepare(struct ena_ring *rx_ring, 653 struct rte_mbuf *mbuf, 654 struct ena_com_rx_ctx *ena_rx_ctx) 655 { 656 struct ena_stats_rx *rx_stats = &rx_ring->rx_stats; 657 uint64_t ol_flags = 0; 658 uint32_t packet_type = 0; 659 660 switch (ena_rx_ctx->l3_proto) { 661 case ENA_ETH_IO_L3_PROTO_IPV4: 662 packet_type |= RTE_PTYPE_L3_IPV4; 663 if (unlikely(ena_rx_ctx->l3_csum_err)) { 664 ++rx_stats->l3_csum_bad; 665 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 666 } else { 667 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 668 } 669 break; 670 case ENA_ETH_IO_L3_PROTO_IPV6: 671 packet_type |= RTE_PTYPE_L3_IPV6; 672 break; 673 default: 674 break; 675 } 676 677 switch (ena_rx_ctx->l4_proto) { 678 case ENA_ETH_IO_L4_PROTO_TCP: 679 packet_type |= RTE_PTYPE_L4_TCP; 680 break; 681 case ENA_ETH_IO_L4_PROTO_UDP: 682 packet_type |= RTE_PTYPE_L4_UDP; 683 break; 684 default: 685 break; 686 } 687 688 /* L4 csum is relevant only for TCP/UDP packets */ 689 if ((packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP)) && !ena_rx_ctx->frag) { 690 if (ena_rx_ctx->l4_csum_checked) { 691 if (likely(!ena_rx_ctx->l4_csum_err)) { 692 ++rx_stats->l4_csum_good; 693 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 694 } else { 695 ++rx_stats->l4_csum_bad; 696 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 697 } 698 } else { 699 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 700 } 701 702 if (rx_ring->offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH) { 703 ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 704 mbuf->hash.rss = ena_rx_ctx->hash; 705 } 706 } else { 707 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 708 } 709 710 mbuf->ol_flags = ol_flags; 711 mbuf->packet_type = packet_type; 712 } 713 714 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, 715 struct ena_com_tx_ctx *ena_tx_ctx, 716 uint64_t queue_offloads, 717 bool disable_meta_caching) 718 { 719 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; 720 721 if ((mbuf->ol_flags & MBUF_OFFLOADS) && 722 (queue_offloads & QUEUE_OFFLOADS)) { 723 /* check if TSO is required */ 724 if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) && 725 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)) { 726 ena_tx_ctx->tso_enable = true; 727 728 ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf); 729 } 730 731 /* check if L3 checksum is needed */ 732 if ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) && 733 (queue_offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) 734 ena_tx_ctx->l3_csum_enable = true; 735 736 if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV6) { 737 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 738 /* For the IPv6 packets, DF always needs to be true. */ 739 ena_tx_ctx->df = 1; 740 } else { 741 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 742 743 /* set don't fragment (DF) flag */ 744 if (mbuf->packet_type & 745 (RTE_PTYPE_L4_NONFRAG 746 | RTE_PTYPE_INNER_L4_NONFRAG)) 747 ena_tx_ctx->df = 1; 748 } 749 750 /* check if L4 checksum is needed */ 751 if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) && 752 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) { 753 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 754 ena_tx_ctx->l4_csum_enable = true; 755 } else if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 756 RTE_MBUF_F_TX_UDP_CKSUM) && 757 (queue_offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) { 758 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 759 ena_tx_ctx->l4_csum_enable = true; 760 } else { 761 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 762 ena_tx_ctx->l4_csum_enable = false; 763 } 764 765 ena_meta->mss = mbuf->tso_segsz; 766 ena_meta->l3_hdr_len = mbuf->l3_len; 767 ena_meta->l3_hdr_offset = mbuf->l2_len; 768 769 ena_tx_ctx->meta_valid = true; 770 } else if (disable_meta_caching) { 771 memset(ena_meta, 0, sizeof(*ena_meta)); 772 ena_tx_ctx->meta_valid = true; 773 } else { 774 ena_tx_ctx->meta_valid = false; 775 } 776 } 777 778 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 779 { 780 struct ena_tx_buffer *tx_info = NULL; 781 782 if (likely(req_id < tx_ring->ring_size)) { 783 tx_info = &tx_ring->tx_buffer_info[req_id]; 784 if (likely(tx_info->mbuf)) 785 return 0; 786 } 787 788 if (tx_info) 789 PMD_TX_LOG_LINE(ERR, "tx_info doesn't have valid mbuf. queue %d:%d req_id %u", 790 tx_ring->port_id, tx_ring->id, req_id); 791 else 792 PMD_TX_LOG_LINE(ERR, "Invalid req_id: %hu in queue %d:%d", 793 req_id, tx_ring->port_id, tx_ring->id); 794 795 /* Trigger device reset */ 796 ++tx_ring->tx_stats.bad_req_id; 797 ena_trigger_reset(tx_ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID); 798 return -EFAULT; 799 } 800 801 static void ena_config_host_info(struct ena_com_dev *ena_dev) 802 { 803 struct ena_admin_host_info *host_info; 804 int rc; 805 806 /* Allocate only the host info */ 807 rc = ena_com_allocate_host_info(ena_dev); 808 if (rc) { 809 PMD_DRV_LOG_LINE(ERR, "Cannot allocate host info"); 810 return; 811 } 812 813 host_info = ena_dev->host_attr.host_info; 814 815 host_info->os_type = ENA_ADMIN_OS_DPDK; 816 host_info->kernel_ver = RTE_VERSION; 817 strlcpy((char *)host_info->kernel_ver_str, rte_version(), 818 sizeof(host_info->kernel_ver_str)); 819 host_info->os_dist = RTE_VERSION; 820 strlcpy((char *)host_info->os_dist_str, rte_version(), 821 sizeof(host_info->os_dist_str)); 822 host_info->driver_version = 823 (DRV_MODULE_VER_MAJOR) | 824 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 825 (DRV_MODULE_VER_SUBMINOR << 826 ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 827 host_info->num_cpus = rte_lcore_count(); 828 829 host_info->driver_supported_features = 830 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 831 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 832 833 rc = ena_com_set_host_attributes(ena_dev); 834 if (rc) { 835 if (rc == ENA_COM_UNSUPPORTED) 836 PMD_DRV_LOG_LINE(WARNING, "Cannot set host attributes"); 837 else 838 PMD_DRV_LOG_LINE(ERR, "Cannot set host attributes"); 839 840 goto err; 841 } 842 843 return; 844 845 err: 846 ena_com_delete_host_info(ena_dev); 847 } 848 849 /* This function calculates the number of xstats based on the current config */ 850 static unsigned int ena_xstats_calc_num(struct rte_eth_dev_data *data) 851 { 852 struct ena_adapter *adapter = data->dev_private; 853 854 return ENA_STATS_ARRAY_GLOBAL + 855 adapter->metrics_num + 856 ENA_STATS_ARRAY_ENA_SRD + 857 (data->nb_tx_queues * ENA_STATS_ARRAY_TX) + 858 (data->nb_rx_queues * ENA_STATS_ARRAY_RX); 859 } 860 861 static void ena_config_debug_area(struct ena_adapter *adapter) 862 { 863 u32 debug_area_size; 864 int rc, ss_count; 865 866 ss_count = ena_xstats_calc_num(adapter->edev_data); 867 868 /* allocate 32 bytes for each string and 64bit for the value */ 869 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; 870 871 rc = ena_com_allocate_debug_area(&adapter->ena_dev, debug_area_size); 872 if (rc) { 873 PMD_DRV_LOG_LINE(ERR, "Cannot allocate debug area"); 874 return; 875 } 876 877 rc = ena_com_set_host_attributes(&adapter->ena_dev); 878 if (rc) { 879 if (rc == ENA_COM_UNSUPPORTED) 880 PMD_DRV_LOG_LINE(WARNING, "Cannot set host attributes"); 881 else 882 PMD_DRV_LOG_LINE(ERR, "Cannot set host attributes"); 883 884 goto err; 885 } 886 887 return; 888 err: 889 ena_com_delete_debug_area(&adapter->ena_dev); 890 } 891 892 static int ena_close(struct rte_eth_dev *dev) 893 { 894 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 895 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 896 struct ena_adapter *adapter = dev->data->dev_private; 897 struct ena_com_dev *ena_dev = &adapter->ena_dev; 898 int ret = 0; 899 int rc; 900 901 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 902 return 0; 903 904 if (adapter->state == ENA_ADAPTER_STATE_CLOSED) 905 return 0; 906 907 if (adapter->state == ENA_ADAPTER_STATE_RUNNING) 908 ret = ena_stop(dev); 909 adapter->state = ENA_ADAPTER_STATE_CLOSED; 910 911 if (!adapter->control_path_poll_interval) { 912 rte_intr_disable(intr_handle); 913 rc = rte_intr_callback_unregister_sync(intr_handle, ena_control_path_handler, dev); 914 if (unlikely(rc != 0)) 915 PMD_INIT_LOG_LINE(ERR, "Failed to unregister interrupt handler"); 916 } else { 917 rte_eal_alarm_cancel(ena_control_path_poll_handler, dev); 918 } 919 920 ena_rx_queue_release_all(dev); 921 ena_tx_queue_release_all(dev); 922 923 rte_free(adapter->drv_stats); 924 adapter->drv_stats = NULL; 925 926 ena_com_set_admin_running_state(ena_dev, false); 927 928 ena_com_rss_destroy(ena_dev); 929 930 ena_com_delete_debug_area(ena_dev); 931 ena_com_delete_host_info(ena_dev); 932 933 ena_com_abort_admin_commands(ena_dev); 934 ena_com_wait_for_abort_completion(ena_dev); 935 ena_com_admin_destroy(ena_dev); 936 ena_com_mmio_reg_read_request_destroy(ena_dev); 937 ena_com_delete_customer_metrics_buffer(ena_dev); 938 939 /* 940 * MAC is not allocated dynamically. Setting NULL should prevent from 941 * release of the resource in the rte_eth_dev_release_port(). 942 */ 943 dev->data->mac_addrs = NULL; 944 945 return ret; 946 } 947 948 static int 949 ena_dev_reset(struct rte_eth_dev *dev) 950 { 951 int rc = 0; 952 953 /* Cannot release memory in secondary process */ 954 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 955 PMD_DRV_LOG_LINE(WARNING, "dev_reset not supported in secondary."); 956 return -EPERM; 957 } 958 959 rc = eth_ena_dev_uninit(dev); 960 if (rc) { 961 PMD_INIT_LOG_LINE(CRIT, "Failed to un-initialize device"); 962 return rc; 963 } 964 965 rc = eth_ena_dev_init(dev); 966 if (rc) 967 PMD_INIT_LOG_LINE(CRIT, "Cannot initialize device"); 968 969 return rc; 970 } 971 972 static void ena_rx_queue_release_all(struct rte_eth_dev *dev) 973 { 974 int nb_queues = dev->data->nb_rx_queues; 975 int i; 976 977 for (i = 0; i < nb_queues; i++) 978 ena_rx_queue_release(dev, i); 979 } 980 981 static void ena_tx_queue_release_all(struct rte_eth_dev *dev) 982 { 983 int nb_queues = dev->data->nb_tx_queues; 984 int i; 985 986 for (i = 0; i < nb_queues; i++) 987 ena_tx_queue_release(dev, i); 988 } 989 990 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 991 { 992 struct ena_ring *ring = dev->data->rx_queues[qid]; 993 994 /* Free ring resources */ 995 rte_free(ring->rx_buffer_info); 996 ring->rx_buffer_info = NULL; 997 998 rte_free(ring->rx_refill_buffer); 999 ring->rx_refill_buffer = NULL; 1000 1001 rte_free(ring->empty_rx_reqs); 1002 ring->empty_rx_reqs = NULL; 1003 1004 ring->configured = 0; 1005 1006 PMD_DRV_LOG_LINE(NOTICE, "Rx queue %d:%d released", 1007 ring->port_id, ring->id); 1008 } 1009 1010 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 1011 { 1012 struct ena_ring *ring = dev->data->tx_queues[qid]; 1013 1014 /* Free ring resources */ 1015 rte_free(ring->push_buf_intermediate_buf); 1016 1017 rte_free(ring->tx_buffer_info); 1018 1019 rte_free(ring->empty_tx_reqs); 1020 1021 ring->empty_tx_reqs = NULL; 1022 ring->tx_buffer_info = NULL; 1023 ring->push_buf_intermediate_buf = NULL; 1024 1025 ring->configured = 0; 1026 1027 PMD_DRV_LOG_LINE(NOTICE, "Tx queue %d:%d released", 1028 ring->port_id, ring->id); 1029 } 1030 1031 static void ena_rx_queue_release_bufs(struct ena_ring *ring) 1032 { 1033 unsigned int i; 1034 1035 for (i = 0; i < ring->ring_size; ++i) { 1036 struct ena_rx_buffer *rx_info = &ring->rx_buffer_info[i]; 1037 if (rx_info->mbuf) { 1038 rte_mbuf_raw_free(rx_info->mbuf); 1039 rx_info->mbuf = NULL; 1040 } 1041 } 1042 } 1043 1044 static void ena_tx_queue_release_bufs(struct ena_ring *ring) 1045 { 1046 unsigned int i; 1047 1048 for (i = 0; i < ring->ring_size; ++i) { 1049 struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i]; 1050 1051 if (tx_buf->mbuf) { 1052 rte_pktmbuf_free(tx_buf->mbuf); 1053 tx_buf->mbuf = NULL; 1054 } 1055 } 1056 } 1057 1058 static int ena_link_update(struct rte_eth_dev *dev, 1059 __rte_unused int wait_to_complete) 1060 { 1061 struct rte_eth_link *link = &dev->data->dev_link; 1062 struct ena_adapter *adapter = dev->data->dev_private; 1063 1064 link->link_status = adapter->link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN; 1065 link->link_speed = RTE_ETH_SPEED_NUM_NONE; 1066 link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX; 1067 1068 return 0; 1069 } 1070 1071 static int ena_queue_start_all(struct rte_eth_dev *dev, 1072 enum ena_ring_type ring_type) 1073 { 1074 struct ena_adapter *adapter = dev->data->dev_private; 1075 struct ena_ring *queues = NULL; 1076 int nb_queues; 1077 int i = 0; 1078 int rc = 0; 1079 1080 if (ring_type == ENA_RING_TYPE_RX) { 1081 queues = adapter->rx_ring; 1082 nb_queues = dev->data->nb_rx_queues; 1083 } else { 1084 queues = adapter->tx_ring; 1085 nb_queues = dev->data->nb_tx_queues; 1086 } 1087 for (i = 0; i < nb_queues; i++) { 1088 if (queues[i].configured) { 1089 if (ring_type == ENA_RING_TYPE_RX) { 1090 ena_assert_msg( 1091 dev->data->rx_queues[i] == &queues[i], 1092 "Inconsistent state of Rx queues\n"); 1093 } else { 1094 ena_assert_msg( 1095 dev->data->tx_queues[i] == &queues[i], 1096 "Inconsistent state of Tx queues\n"); 1097 } 1098 1099 rc = ena_queue_start(dev, &queues[i]); 1100 1101 if (rc) { 1102 PMD_INIT_LOG_LINE(ERR, 1103 "Failed to start queue[%d] of type(%d)", 1104 i, ring_type); 1105 goto err; 1106 } 1107 } 1108 } 1109 1110 return 0; 1111 1112 err: 1113 while (i--) 1114 if (queues[i].configured) 1115 ena_queue_stop(&queues[i]); 1116 1117 return rc; 1118 } 1119 1120 static int 1121 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx, 1122 bool use_large_llq_hdr) 1123 { 1124 struct ena_admin_feature_llq_desc *dev = &ctx->get_feat_ctx->llq; 1125 struct ena_com_dev *ena_dev = ctx->ena_dev; 1126 uint32_t max_tx_queue_size; 1127 uint32_t max_rx_queue_size; 1128 1129 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1130 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1131 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 1132 max_rx_queue_size = RTE_MIN(max_queue_ext->max_rx_cq_depth, 1133 max_queue_ext->max_rx_sq_depth); 1134 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 1135 1136 if (ena_dev->tx_mem_queue_type == 1137 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1138 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1139 dev->max_llq_depth); 1140 } else { 1141 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1142 max_queue_ext->max_tx_sq_depth); 1143 } 1144 1145 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1146 max_queue_ext->max_per_packet_rx_descs); 1147 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1148 max_queue_ext->max_per_packet_tx_descs); 1149 } else { 1150 struct ena_admin_queue_feature_desc *max_queues = 1151 &ctx->get_feat_ctx->max_queues; 1152 max_rx_queue_size = RTE_MIN(max_queues->max_cq_depth, 1153 max_queues->max_sq_depth); 1154 max_tx_queue_size = max_queues->max_cq_depth; 1155 1156 if (ena_dev->tx_mem_queue_type == 1157 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1158 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1159 dev->max_llq_depth); 1160 } else { 1161 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1162 max_queues->max_sq_depth); 1163 } 1164 1165 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1166 max_queues->max_packet_rx_descs); 1167 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1168 max_queues->max_packet_tx_descs); 1169 } 1170 1171 /* Round down to the nearest power of 2 */ 1172 max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size); 1173 max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size); 1174 1175 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && use_large_llq_hdr) { 1176 /* intersection between driver configuration and device capabilities */ 1177 if (dev->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) { 1178 if (dev->max_wide_llq_depth == MAX_WIDE_LLQ_DEPTH_UNSUPPORTED) { 1179 /* Devices that do not support the double-sized ENA memory BAR will 1180 * report max_wide_llq_depth as 0. In such case, driver halves the 1181 * queue depth when working in large llq policy. 1182 */ 1183 max_tx_queue_size >>= 1; 1184 PMD_INIT_LOG_LINE(INFO, 1185 "large LLQ policy requires limiting Tx queue size to %u entries", 1186 max_tx_queue_size); 1187 } else if (dev->max_wide_llq_depth < max_tx_queue_size) { 1188 /* In case the queue depth that the driver calculated exceeds 1189 * the maximal value that the device allows, it will be limited 1190 * to that maximal value 1191 */ 1192 max_tx_queue_size = dev->max_wide_llq_depth; 1193 } 1194 } else { 1195 PMD_INIT_LOG_LINE(INFO, 1196 "Forcing large LLQ headers failed since device lacks this support"); 1197 } 1198 } 1199 1200 if (unlikely(max_rx_queue_size == 0 || max_tx_queue_size == 0)) { 1201 PMD_INIT_LOG_LINE(ERR, "Invalid queue size"); 1202 return -EFAULT; 1203 } 1204 1205 ctx->max_tx_queue_size = max_tx_queue_size; 1206 ctx->max_rx_queue_size = max_rx_queue_size; 1207 1208 PMD_DRV_LOG_LINE(INFO, "tx queue size %u", max_tx_queue_size); 1209 return 0; 1210 } 1211 1212 static void ena_stats_restart(struct rte_eth_dev *dev) 1213 { 1214 struct ena_adapter *adapter = dev->data->dev_private; 1215 1216 rte_atomic64_init(&adapter->drv_stats->ierrors); 1217 rte_atomic64_init(&adapter->drv_stats->oerrors); 1218 rte_atomic64_init(&adapter->drv_stats->rx_nombuf); 1219 adapter->drv_stats->rx_drops = 0; 1220 } 1221 1222 static int ena_stats_get(struct rte_eth_dev *dev, 1223 struct rte_eth_stats *stats) 1224 { 1225 struct ena_admin_basic_stats ena_stats; 1226 struct ena_adapter *adapter = dev->data->dev_private; 1227 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1228 int rc; 1229 int i; 1230 int max_rings_stats; 1231 1232 memset(&ena_stats, 0, sizeof(ena_stats)); 1233 1234 rte_spinlock_lock(&adapter->admin_lock); 1235 rc = ENA_PROXY(adapter, ena_com_get_dev_basic_stats, ena_dev, 1236 &ena_stats); 1237 rte_spinlock_unlock(&adapter->admin_lock); 1238 if (unlikely(rc)) { 1239 PMD_DRV_LOG_LINE(ERR, "Could not retrieve statistics from ENA"); 1240 return rc; 1241 } 1242 1243 /* Set of basic statistics from ENA */ 1244 stats->ipackets = __MERGE_64B_H_L(ena_stats.rx_pkts_high, 1245 ena_stats.rx_pkts_low); 1246 stats->opackets = __MERGE_64B_H_L(ena_stats.tx_pkts_high, 1247 ena_stats.tx_pkts_low); 1248 stats->ibytes = __MERGE_64B_H_L(ena_stats.rx_bytes_high, 1249 ena_stats.rx_bytes_low); 1250 stats->obytes = __MERGE_64B_H_L(ena_stats.tx_bytes_high, 1251 ena_stats.tx_bytes_low); 1252 1253 /* Driver related stats */ 1254 stats->imissed = adapter->drv_stats->rx_drops; 1255 stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors); 1256 stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors); 1257 stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf); 1258 1259 max_rings_stats = RTE_MIN(dev->data->nb_rx_queues, 1260 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1261 for (i = 0; i < max_rings_stats; ++i) { 1262 struct ena_stats_rx *rx_stats = &adapter->rx_ring[i].rx_stats; 1263 1264 stats->q_ibytes[i] = rx_stats->bytes; 1265 stats->q_ipackets[i] = rx_stats->cnt; 1266 stats->q_errors[i] = rx_stats->bad_desc_num + 1267 rx_stats->bad_req_id + 1268 rx_stats->bad_desc + 1269 rx_stats->unknown_error; 1270 } 1271 1272 max_rings_stats = RTE_MIN(dev->data->nb_tx_queues, 1273 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1274 for (i = 0; i < max_rings_stats; ++i) { 1275 struct ena_stats_tx *tx_stats = &adapter->tx_ring[i].tx_stats; 1276 1277 stats->q_obytes[i] = tx_stats->bytes; 1278 stats->q_opackets[i] = tx_stats->cnt; 1279 } 1280 1281 return 0; 1282 } 1283 1284 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 1285 { 1286 struct ena_adapter *adapter; 1287 struct ena_com_dev *ena_dev; 1288 int rc = 0; 1289 1290 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 1291 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 1292 adapter = dev->data->dev_private; 1293 1294 ena_dev = &adapter->ena_dev; 1295 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 1296 1297 rc = ENA_PROXY(adapter, ena_com_set_dev_mtu, ena_dev, mtu); 1298 if (rc) 1299 PMD_DRV_LOG_LINE(ERR, "Could not set MTU: %d", mtu); 1300 else 1301 PMD_DRV_LOG_LINE(NOTICE, "MTU set to: %d", mtu); 1302 1303 return rc; 1304 } 1305 1306 static int ena_start(struct rte_eth_dev *dev) 1307 { 1308 struct ena_adapter *adapter = dev->data->dev_private; 1309 uint64_t ticks; 1310 int rc = 0; 1311 uint16_t i; 1312 1313 /* Cannot allocate memory in secondary process */ 1314 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1315 PMD_DRV_LOG_LINE(WARNING, "dev_start not supported in secondary."); 1316 return -EPERM; 1317 } 1318 1319 rc = ena_setup_rx_intr(dev); 1320 if (rc) 1321 return rc; 1322 1323 rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX); 1324 if (rc) 1325 return rc; 1326 1327 rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX); 1328 if (rc) 1329 goto err_start_tx; 1330 1331 if (adapter->edev_data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { 1332 rc = ena_rss_configure(adapter); 1333 if (rc) 1334 goto err_rss_init; 1335 } 1336 1337 ena_stats_restart(dev); 1338 1339 adapter->timestamp_wd = rte_get_timer_cycles(); 1340 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT; 1341 1342 ticks = rte_get_timer_hz(); 1343 rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(), 1344 ena_timer_wd_callback, dev); 1345 1346 ++adapter->dev_stats.dev_start; 1347 adapter->state = ENA_ADAPTER_STATE_RUNNING; 1348 1349 for (i = 0; i < dev->data->nb_rx_queues; i++) 1350 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1351 for (i = 0; i < dev->data->nb_tx_queues; i++) 1352 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1353 1354 return 0; 1355 1356 err_rss_init: 1357 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1358 err_start_tx: 1359 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1360 return rc; 1361 } 1362 1363 static int ena_stop(struct rte_eth_dev *dev) 1364 { 1365 struct ena_adapter *adapter = dev->data->dev_private; 1366 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1367 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1368 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1369 uint16_t i; 1370 int rc; 1371 1372 /* Cannot free memory in secondary process */ 1373 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1374 PMD_DRV_LOG_LINE(WARNING, "dev_stop not supported in secondary."); 1375 return -EPERM; 1376 } 1377 1378 rte_timer_stop_sync(&adapter->timer_wd); 1379 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1380 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1381 1382 if (adapter->trigger_reset) { 1383 rc = ena_com_dev_reset(ena_dev, adapter->reset_reason); 1384 if (rc) 1385 PMD_DRV_LOG_LINE(ERR, "Device reset failed, rc: %d", rc); 1386 } 1387 1388 rte_intr_disable(intr_handle); 1389 1390 rte_intr_efd_disable(intr_handle); 1391 1392 /* Cleanup vector list */ 1393 rte_intr_vec_list_free(intr_handle); 1394 1395 rte_intr_enable(intr_handle); 1396 1397 ++adapter->dev_stats.dev_stop; 1398 adapter->state = ENA_ADAPTER_STATE_STOPPED; 1399 dev->data->dev_started = 0; 1400 1401 for (i = 0; i < dev->data->nb_rx_queues; i++) 1402 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1403 for (i = 0; i < dev->data->nb_tx_queues; i++) 1404 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1405 1406 return 0; 1407 } 1408 1409 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring) 1410 { 1411 struct ena_adapter *adapter = ring->adapter; 1412 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1413 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1414 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1415 struct ena_com_create_io_ctx ctx = 1416 /* policy set to _HOST just to satisfy icc compiler */ 1417 { ENA_ADMIN_PLACEMENT_POLICY_HOST, 1418 0, 0, 0, 0, 0 }; 1419 uint16_t ena_qid; 1420 unsigned int i; 1421 int rc; 1422 1423 ctx.msix_vector = -1; 1424 if (ring->type == ENA_RING_TYPE_TX) { 1425 ena_qid = ENA_IO_TXQ_IDX(ring->id); 1426 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1427 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1428 for (i = 0; i < ring->ring_size; i++) 1429 ring->empty_tx_reqs[i] = i; 1430 } else { 1431 ena_qid = ENA_IO_RXQ_IDX(ring->id); 1432 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1433 if (rte_intr_dp_is_en(intr_handle)) 1434 ctx.msix_vector = 1435 rte_intr_vec_list_index_get(intr_handle, 1436 ring->id); 1437 1438 for (i = 0; i < ring->ring_size; i++) 1439 ring->empty_rx_reqs[i] = i; 1440 } 1441 ctx.queue_size = ring->ring_size; 1442 ctx.qid = ena_qid; 1443 ctx.numa_node = ring->numa_socket_id; 1444 1445 rc = ena_com_create_io_queue(ena_dev, &ctx); 1446 if (rc) { 1447 PMD_DRV_LOG_LINE(ERR, 1448 "Failed to create IO queue[%d] (qid:%d), rc: %d", 1449 ring->id, ena_qid, rc); 1450 return rc; 1451 } 1452 1453 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1454 &ring->ena_com_io_sq, 1455 &ring->ena_com_io_cq); 1456 if (rc) { 1457 PMD_DRV_LOG_LINE(ERR, 1458 "Failed to get IO queue[%d] handlers, rc: %d", 1459 ring->id, rc); 1460 ena_com_destroy_io_queue(ena_dev, ena_qid); 1461 return rc; 1462 } 1463 1464 if (ring->type == ENA_RING_TYPE_TX) 1465 ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node); 1466 1467 /* Start with Rx interrupts being masked. */ 1468 if (ring->type == ENA_RING_TYPE_RX && rte_intr_dp_is_en(intr_handle)) 1469 ena_rx_queue_intr_disable(dev, ring->id); 1470 1471 return 0; 1472 } 1473 1474 static void ena_queue_stop(struct ena_ring *ring) 1475 { 1476 struct ena_com_dev *ena_dev = &ring->adapter->ena_dev; 1477 1478 if (ring->type == ENA_RING_TYPE_RX) { 1479 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id)); 1480 ena_rx_queue_release_bufs(ring); 1481 } else { 1482 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id)); 1483 ena_tx_queue_release_bufs(ring); 1484 } 1485 } 1486 1487 static void ena_queue_stop_all(struct rte_eth_dev *dev, 1488 enum ena_ring_type ring_type) 1489 { 1490 struct ena_adapter *adapter = dev->data->dev_private; 1491 struct ena_ring *queues = NULL; 1492 uint16_t nb_queues, i; 1493 1494 if (ring_type == ENA_RING_TYPE_RX) { 1495 queues = adapter->rx_ring; 1496 nb_queues = dev->data->nb_rx_queues; 1497 } else { 1498 queues = adapter->tx_ring; 1499 nb_queues = dev->data->nb_tx_queues; 1500 } 1501 1502 for (i = 0; i < nb_queues; ++i) 1503 if (queues[i].configured) 1504 ena_queue_stop(&queues[i]); 1505 } 1506 1507 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring) 1508 { 1509 int rc, bufs_num; 1510 1511 ena_assert_msg(ring->configured == 1, 1512 "Trying to start unconfigured queue\n"); 1513 1514 rc = ena_create_io_queue(dev, ring); 1515 if (rc) { 1516 PMD_INIT_LOG_LINE(ERR, "Failed to create IO queue"); 1517 return rc; 1518 } 1519 1520 ring->next_to_clean = 0; 1521 ring->next_to_use = 0; 1522 1523 if (ring->type == ENA_RING_TYPE_TX) { 1524 ring->tx_stats.available_desc = 1525 ena_com_free_q_entries(ring->ena_com_io_sq); 1526 return 0; 1527 } 1528 1529 bufs_num = ring->ring_size - 1; 1530 rc = ena_populate_rx_queue(ring, bufs_num); 1531 if (rc != bufs_num) { 1532 ena_com_destroy_io_queue(&ring->adapter->ena_dev, 1533 ENA_IO_RXQ_IDX(ring->id)); 1534 PMD_INIT_LOG_LINE(ERR, "Failed to populate Rx ring"); 1535 return ENA_COM_FAULT; 1536 } 1537 /* Flush per-core RX buffers pools cache as they can be used on other 1538 * cores as well. 1539 */ 1540 rte_mempool_cache_flush(NULL, ring->mb_pool); 1541 1542 return 0; 1543 } 1544 1545 static int ena_tx_queue_setup(struct rte_eth_dev *dev, 1546 uint16_t queue_idx, 1547 uint16_t nb_desc, 1548 unsigned int socket_id, 1549 const struct rte_eth_txconf *tx_conf) 1550 { 1551 struct ena_ring *txq = NULL; 1552 struct ena_adapter *adapter = dev->data->dev_private; 1553 unsigned int i; 1554 uint16_t dyn_thresh; 1555 1556 txq = &adapter->tx_ring[queue_idx]; 1557 1558 if (txq->configured) { 1559 PMD_DRV_LOG_LINE(CRIT, 1560 "API violation. Queue[%d] is already configured", 1561 queue_idx); 1562 return ENA_COM_FAULT; 1563 } 1564 1565 if (!rte_is_power_of_2(nb_desc)) { 1566 PMD_DRV_LOG_LINE(ERR, 1567 "Unsupported size of Tx queue: %d is not a power of 2.", 1568 nb_desc); 1569 return -EINVAL; 1570 } 1571 1572 if (nb_desc > adapter->max_tx_ring_size) { 1573 PMD_DRV_LOG_LINE(ERR, 1574 "Unsupported size of Tx queue (max size: %d)", 1575 adapter->max_tx_ring_size); 1576 return -EINVAL; 1577 } 1578 1579 txq->port_id = dev->data->port_id; 1580 txq->next_to_clean = 0; 1581 txq->next_to_use = 0; 1582 txq->ring_size = nb_desc; 1583 txq->size_mask = nb_desc - 1; 1584 txq->numa_socket_id = socket_id; 1585 txq->pkts_without_db = false; 1586 txq->last_cleanup_ticks = 0; 1587 1588 txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info", 1589 sizeof(struct ena_tx_buffer) * txq->ring_size, 1590 RTE_CACHE_LINE_SIZE, 1591 socket_id); 1592 if (!txq->tx_buffer_info) { 1593 PMD_DRV_LOG_LINE(ERR, 1594 "Failed to allocate memory for Tx buffer info"); 1595 return -ENOMEM; 1596 } 1597 1598 txq->empty_tx_reqs = rte_zmalloc_socket("txq->empty_tx_reqs", 1599 sizeof(uint16_t) * txq->ring_size, 1600 RTE_CACHE_LINE_SIZE, 1601 socket_id); 1602 if (!txq->empty_tx_reqs) { 1603 PMD_DRV_LOG_LINE(ERR, 1604 "Failed to allocate memory for empty Tx requests"); 1605 rte_free(txq->tx_buffer_info); 1606 return -ENOMEM; 1607 } 1608 1609 txq->push_buf_intermediate_buf = 1610 rte_zmalloc_socket("txq->push_buf_intermediate_buf", 1611 txq->tx_max_header_size, 1612 RTE_CACHE_LINE_SIZE, 1613 socket_id); 1614 if (!txq->push_buf_intermediate_buf) { 1615 PMD_DRV_LOG_LINE(ERR, "Failed to alloc push buffer for LLQ"); 1616 rte_free(txq->tx_buffer_info); 1617 rte_free(txq->empty_tx_reqs); 1618 return -ENOMEM; 1619 } 1620 1621 for (i = 0; i < txq->ring_size; i++) 1622 txq->empty_tx_reqs[i] = i; 1623 1624 txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads; 1625 1626 /* Check if caller provided the Tx cleanup threshold value. */ 1627 if (tx_conf->tx_free_thresh != 0) { 1628 txq->tx_free_thresh = tx_conf->tx_free_thresh; 1629 } else { 1630 dyn_thresh = txq->ring_size - 1631 txq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1632 txq->tx_free_thresh = RTE_MAX(dyn_thresh, 1633 txq->ring_size - ENA_REFILL_THRESH_PACKET); 1634 } 1635 1636 txq->missing_tx_completion_threshold = 1637 RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP); 1638 1639 /* Store pointer to this queue in upper layer */ 1640 txq->configured = 1; 1641 dev->data->tx_queues[queue_idx] = txq; 1642 1643 return 0; 1644 } 1645 1646 static int ena_rx_queue_setup(struct rte_eth_dev *dev, 1647 uint16_t queue_idx, 1648 uint16_t nb_desc, 1649 unsigned int socket_id, 1650 const struct rte_eth_rxconf *rx_conf, 1651 struct rte_mempool *mp) 1652 { 1653 struct ena_adapter *adapter = dev->data->dev_private; 1654 struct ena_ring *rxq = NULL; 1655 size_t buffer_size; 1656 int i; 1657 uint16_t dyn_thresh; 1658 1659 rxq = &adapter->rx_ring[queue_idx]; 1660 if (rxq->configured) { 1661 PMD_DRV_LOG_LINE(CRIT, 1662 "API violation. Queue[%d] is already configured", 1663 queue_idx); 1664 return ENA_COM_FAULT; 1665 } 1666 1667 if (!rte_is_power_of_2(nb_desc)) { 1668 PMD_DRV_LOG_LINE(ERR, 1669 "Unsupported size of Rx queue: %d is not a power of 2.", 1670 nb_desc); 1671 return -EINVAL; 1672 } 1673 1674 if (nb_desc > adapter->max_rx_ring_size) { 1675 PMD_DRV_LOG_LINE(ERR, 1676 "Unsupported size of Rx queue (max size: %d)", 1677 adapter->max_rx_ring_size); 1678 return -EINVAL; 1679 } 1680 1681 /* ENA isn't supporting buffers smaller than 1400 bytes */ 1682 buffer_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 1683 if (buffer_size < ENA_RX_BUF_MIN_SIZE) { 1684 PMD_DRV_LOG_LINE(ERR, 1685 "Unsupported size of Rx buffer: %zu (min size: %d)", 1686 buffer_size, ENA_RX_BUF_MIN_SIZE); 1687 return -EINVAL; 1688 } 1689 1690 rxq->port_id = dev->data->port_id; 1691 rxq->next_to_clean = 0; 1692 rxq->next_to_use = 0; 1693 rxq->ring_size = nb_desc; 1694 rxq->size_mask = nb_desc - 1; 1695 rxq->numa_socket_id = socket_id; 1696 rxq->mb_pool = mp; 1697 1698 rxq->rx_buffer_info = rte_zmalloc_socket("rxq->buffer_info", 1699 sizeof(struct ena_rx_buffer) * nb_desc, 1700 RTE_CACHE_LINE_SIZE, 1701 socket_id); 1702 if (!rxq->rx_buffer_info) { 1703 PMD_DRV_LOG_LINE(ERR, 1704 "Failed to allocate memory for Rx buffer info"); 1705 return -ENOMEM; 1706 } 1707 1708 rxq->rx_refill_buffer = rte_zmalloc_socket("rxq->rx_refill_buffer", 1709 sizeof(struct rte_mbuf *) * nb_desc, 1710 RTE_CACHE_LINE_SIZE, 1711 socket_id); 1712 if (!rxq->rx_refill_buffer) { 1713 PMD_DRV_LOG_LINE(ERR, 1714 "Failed to allocate memory for Rx refill buffer"); 1715 rte_free(rxq->rx_buffer_info); 1716 rxq->rx_buffer_info = NULL; 1717 return -ENOMEM; 1718 } 1719 1720 rxq->empty_rx_reqs = rte_zmalloc_socket("rxq->empty_rx_reqs", 1721 sizeof(uint16_t) * nb_desc, 1722 RTE_CACHE_LINE_SIZE, 1723 socket_id); 1724 if (!rxq->empty_rx_reqs) { 1725 PMD_DRV_LOG_LINE(ERR, 1726 "Failed to allocate memory for empty Rx requests"); 1727 rte_free(rxq->rx_buffer_info); 1728 rxq->rx_buffer_info = NULL; 1729 rte_free(rxq->rx_refill_buffer); 1730 rxq->rx_refill_buffer = NULL; 1731 return -ENOMEM; 1732 } 1733 1734 for (i = 0; i < nb_desc; i++) 1735 rxq->empty_rx_reqs[i] = i; 1736 1737 rxq->offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; 1738 1739 if (rx_conf->rx_free_thresh != 0) { 1740 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 1741 } else { 1742 dyn_thresh = rxq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1743 rxq->rx_free_thresh = RTE_MIN(dyn_thresh, 1744 (uint16_t)(ENA_REFILL_THRESH_PACKET)); 1745 } 1746 1747 /* Store pointer to this queue in upper layer */ 1748 rxq->configured = 1; 1749 dev->data->rx_queues[queue_idx] = rxq; 1750 1751 return 0; 1752 } 1753 1754 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 1755 struct rte_mbuf *mbuf, uint16_t id) 1756 { 1757 struct ena_com_buf ebuf; 1758 int rc; 1759 1760 /* prepare physical address for DMA transaction */ 1761 ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM; 1762 ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM; 1763 1764 /* pass resource to device */ 1765 rc = ena_com_add_single_rx_desc(io_sq, &ebuf, id); 1766 if (unlikely(rc != 0)) 1767 PMD_RX_LOG_LINE(WARNING, "Failed adding Rx desc"); 1768 1769 return rc; 1770 } 1771 1772 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) 1773 { 1774 unsigned int i; 1775 int rc; 1776 uint16_t next_to_use = rxq->next_to_use; 1777 uint16_t req_id; 1778 #ifdef RTE_ETHDEV_DEBUG_RX 1779 uint16_t in_use; 1780 #endif 1781 struct rte_mbuf **mbufs = rxq->rx_refill_buffer; 1782 1783 if (unlikely(!count)) 1784 return 0; 1785 1786 #ifdef RTE_ETHDEV_DEBUG_RX 1787 in_use = rxq->ring_size - 1 - 1788 ena_com_free_q_entries(rxq->ena_com_io_sq); 1789 if (unlikely((in_use + count) >= rxq->ring_size)) 1790 PMD_RX_LOG_LINE(ERR, "Bad Rx ring state"); 1791 #endif 1792 1793 /* get resources for incoming packets */ 1794 rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count); 1795 if (unlikely(rc < 0)) { 1796 rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf); 1797 ++rxq->rx_stats.mbuf_alloc_fail; 1798 PMD_RX_LOG_LINE(DEBUG, "There are not enough free buffers"); 1799 return 0; 1800 } 1801 1802 for (i = 0; i < count; i++) { 1803 struct rte_mbuf *mbuf = mbufs[i]; 1804 struct ena_rx_buffer *rx_info; 1805 1806 if (likely((i + 4) < count)) 1807 rte_prefetch0(mbufs[i + 4]); 1808 1809 req_id = rxq->empty_rx_reqs[next_to_use]; 1810 rx_info = &rxq->rx_buffer_info[req_id]; 1811 1812 rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id); 1813 if (unlikely(rc != 0)) 1814 break; 1815 1816 rx_info->mbuf = mbuf; 1817 next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, rxq->size_mask); 1818 } 1819 1820 if (unlikely(i < count)) { 1821 PMD_RX_LOG_LINE(WARNING, 1822 "Refilled Rx queue[%d] with only %d/%d buffers", 1823 rxq->id, i, count); 1824 rte_pktmbuf_free_bulk(&mbufs[i], count - i); 1825 ++rxq->rx_stats.refill_partial; 1826 } 1827 1828 /* When we submitted free resources to device... */ 1829 if (likely(i > 0)) { 1830 /* ...let HW know that it can fill buffers with data. */ 1831 ena_com_write_sq_doorbell(rxq->ena_com_io_sq); 1832 1833 rxq->next_to_use = next_to_use; 1834 } 1835 1836 return i; 1837 } 1838 1839 static size_t ena_get_metrics_entries(struct ena_adapter *adapter) 1840 { 1841 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1842 size_t metrics_num = 0; 1843 1844 if (ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) 1845 metrics_num = ENA_STATS_ARRAY_METRICS; 1846 else if (ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) 1847 metrics_num = ENA_STATS_ARRAY_METRICS_LEGACY; 1848 PMD_DRV_LOG_LINE(NOTICE, "0x%x customer metrics are supported", (unsigned int)metrics_num); 1849 if (metrics_num > ENA_MAX_CUSTOMER_METRICS) { 1850 PMD_DRV_LOG_LINE(NOTICE, "Not enough space for the requested customer metrics"); 1851 metrics_num = ENA_MAX_CUSTOMER_METRICS; 1852 } 1853 return metrics_num; 1854 } 1855 1856 static int ena_device_init(struct ena_adapter *adapter, 1857 struct rte_pci_device *pdev, 1858 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1859 { 1860 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1861 uint32_t aenq_groups; 1862 int rc; 1863 bool readless_supported; 1864 1865 /* Initialize mmio registers */ 1866 rc = ena_com_mmio_reg_read_request_init(ena_dev); 1867 if (rc) { 1868 PMD_DRV_LOG_LINE(ERR, "Failed to init MMIO read less"); 1869 return rc; 1870 } 1871 1872 /* The PCIe configuration space revision id indicate if mmio reg 1873 * read is disabled. 1874 */ 1875 readless_supported = !(pdev->id.class_id & ENA_MMIO_DISABLE_REG_READ); 1876 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 1877 1878 /* reset device */ 1879 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 1880 if (rc) { 1881 PMD_DRV_LOG_LINE(ERR, "Cannot reset device"); 1882 goto err_mmio_read_less; 1883 } 1884 1885 /* check FW version */ 1886 rc = ena_com_validate_version(ena_dev); 1887 if (rc) { 1888 PMD_DRV_LOG_LINE(ERR, "Device version is too low"); 1889 goto err_mmio_read_less; 1890 } 1891 1892 ena_dev->dma_addr_bits = ena_com_get_dma_width(ena_dev); 1893 1894 /* ENA device administration layer init */ 1895 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 1896 if (rc) { 1897 PMD_DRV_LOG_LINE(ERR, 1898 "Cannot initialize ENA admin queue"); 1899 goto err_mmio_read_less; 1900 } 1901 1902 /* To enable the msix interrupts the driver needs to know the number 1903 * of queues. So the driver uses polling mode to retrieve this 1904 * information. 1905 */ 1906 ena_com_set_admin_polling_mode(ena_dev, true); 1907 1908 ena_config_host_info(ena_dev); 1909 1910 /* Get Device Attributes and features */ 1911 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 1912 if (rc) { 1913 PMD_DRV_LOG_LINE(ERR, 1914 "Cannot get attribute for ENA device, rc: %d", rc); 1915 goto err_admin_init; 1916 } 1917 1918 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 1919 BIT(ENA_ADMIN_NOTIFICATION) | 1920 BIT(ENA_ADMIN_KEEP_ALIVE) | 1921 BIT(ENA_ADMIN_FATAL_ERROR) | 1922 BIT(ENA_ADMIN_WARNING) | 1923 BIT(ENA_ADMIN_CONF_NOTIFICATIONS); 1924 1925 aenq_groups &= get_feat_ctx->aenq.supported_groups; 1926 1927 adapter->all_aenq_groups = aenq_groups; 1928 /* The actual supported number of metrics is negotiated with the device at runtime */ 1929 adapter->metrics_num = ena_get_metrics_entries(adapter); 1930 1931 return 0; 1932 1933 err_admin_init: 1934 ena_com_admin_destroy(ena_dev); 1935 1936 err_mmio_read_less: 1937 ena_com_mmio_reg_read_request_destroy(ena_dev); 1938 1939 return rc; 1940 } 1941 1942 static void ena_control_path_handler(void *cb_arg) 1943 { 1944 struct rte_eth_dev *dev = cb_arg; 1945 struct ena_adapter *adapter = dev->data->dev_private; 1946 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1947 1948 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) { 1949 ena_com_admin_q_comp_intr_handler(ena_dev); 1950 ena_com_aenq_intr_handler(ena_dev, dev); 1951 } 1952 } 1953 1954 static void ena_control_path_poll_handler(void *cb_arg) 1955 { 1956 struct rte_eth_dev *dev = cb_arg; 1957 struct ena_adapter *adapter = dev->data->dev_private; 1958 int rc; 1959 1960 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) { 1961 ena_control_path_handler(cb_arg); 1962 rc = rte_eal_alarm_set(adapter->control_path_poll_interval, 1963 ena_control_path_poll_handler, cb_arg); 1964 if (unlikely(rc != 0)) { 1965 PMD_DRV_LOG_LINE(ERR, "Failed to retrigger control path alarm"); 1966 ena_trigger_reset(adapter, ENA_REGS_RESET_GENERIC); 1967 } 1968 } 1969 } 1970 1971 static void check_for_missing_keep_alive(struct ena_adapter *adapter) 1972 { 1973 if (!(adapter->active_aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE))) 1974 return; 1975 1976 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 1977 return; 1978 1979 if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >= 1980 adapter->keep_alive_timeout)) { 1981 PMD_DRV_LOG_LINE(ERR, "Keep alive timeout"); 1982 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); 1983 ++adapter->dev_stats.wd_expired; 1984 } 1985 } 1986 1987 /* Check if admin queue is enabled */ 1988 static void check_for_admin_com_state(struct ena_adapter *adapter) 1989 { 1990 if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) { 1991 PMD_DRV_LOG_LINE(ERR, "ENA admin queue is not in running state"); 1992 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO); 1993 } 1994 } 1995 1996 static int check_for_tx_completion_in_queue(struct ena_adapter *adapter, 1997 struct ena_ring *tx_ring) 1998 { 1999 struct ena_tx_buffer *tx_buf; 2000 uint64_t timestamp; 2001 uint64_t completion_delay; 2002 uint32_t missed_tx = 0; 2003 unsigned int i; 2004 int rc = 0; 2005 2006 for (i = 0; i < tx_ring->ring_size; ++i) { 2007 tx_buf = &tx_ring->tx_buffer_info[i]; 2008 timestamp = tx_buf->timestamp; 2009 2010 if (timestamp == 0) 2011 continue; 2012 2013 completion_delay = rte_get_timer_cycles() - timestamp; 2014 if (completion_delay > adapter->missing_tx_completion_to) { 2015 if (unlikely(!tx_buf->print_once)) { 2016 PMD_TX_LOG_LINE(WARNING, 2017 "Found a Tx that wasn't completed on time, qid %d, index %d. " 2018 "Missing Tx outstanding for %" PRIu64 " msecs.", 2019 tx_ring->id, i, completion_delay / 2020 rte_get_timer_hz() * 1000); 2021 tx_buf->print_once = true; 2022 } 2023 ++missed_tx; 2024 } 2025 } 2026 2027 if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) { 2028 PMD_DRV_LOG_LINE(ERR, 2029 "The number of lost Tx completions is above the threshold (%d > %d). " 2030 "Trigger the device reset.", 2031 missed_tx, 2032 tx_ring->missing_tx_completion_threshold); 2033 adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL; 2034 adapter->trigger_reset = true; 2035 rc = -EIO; 2036 } 2037 2038 tx_ring->tx_stats.missed_tx += missed_tx; 2039 2040 return rc; 2041 } 2042 2043 static void check_for_tx_completions(struct ena_adapter *adapter) 2044 { 2045 struct ena_ring *tx_ring; 2046 uint64_t tx_cleanup_delay; 2047 size_t qid; 2048 int budget; 2049 uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues; 2050 2051 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) 2052 return; 2053 2054 nb_tx_queues = adapter->edev_data->nb_tx_queues; 2055 budget = adapter->missing_tx_completion_budget; 2056 2057 qid = adapter->last_tx_comp_qid; 2058 while (budget-- > 0) { 2059 tx_ring = &adapter->tx_ring[qid]; 2060 2061 /* Tx cleanup is called only by the burst function and can be 2062 * called dynamically by the application. Also cleanup is 2063 * limited by the threshold. To avoid false detection of the 2064 * missing HW Tx completion, get the delay since last cleanup 2065 * function was called. 2066 */ 2067 tx_cleanup_delay = rte_get_timer_cycles() - 2068 tx_ring->last_cleanup_ticks; 2069 if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay) 2070 check_for_tx_completion_in_queue(adapter, tx_ring); 2071 qid = (qid + 1) % nb_tx_queues; 2072 } 2073 2074 adapter->last_tx_comp_qid = qid; 2075 } 2076 2077 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer, 2078 void *arg) 2079 { 2080 struct rte_eth_dev *dev = arg; 2081 struct ena_adapter *adapter = dev->data->dev_private; 2082 2083 if (unlikely(adapter->trigger_reset)) 2084 return; 2085 2086 check_for_missing_keep_alive(adapter); 2087 check_for_admin_com_state(adapter); 2088 check_for_tx_completions(adapter); 2089 2090 if (unlikely(adapter->trigger_reset)) { 2091 PMD_DRV_LOG_LINE(ERR, "Trigger reset is on"); 2092 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, 2093 NULL); 2094 } 2095 } 2096 2097 static inline void 2098 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 2099 struct ena_admin_feature_llq_desc *llq, 2100 bool use_large_llq_hdr) 2101 { 2102 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 2103 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 2104 llq_config->llq_num_decs_before_header = 2105 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 2106 2107 if (use_large_llq_hdr && 2108 (llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B)) { 2109 llq_config->llq_ring_entry_size = 2110 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 2111 llq_config->llq_ring_entry_size_value = 256; 2112 } else { 2113 llq_config->llq_ring_entry_size = 2114 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 2115 llq_config->llq_ring_entry_size_value = 128; 2116 } 2117 } 2118 2119 static int 2120 ena_set_queues_placement_policy(struct ena_adapter *adapter, 2121 struct ena_com_dev *ena_dev, 2122 struct ena_admin_feature_llq_desc *llq, 2123 struct ena_llq_configurations *llq_default_configurations) 2124 { 2125 int rc; 2126 u32 llq_feature_mask; 2127 2128 if (adapter->llq_header_policy == ENA_LLQ_POLICY_DISABLED) { 2129 PMD_DRV_LOG_LINE(WARNING, 2130 "NOTE: LLQ has been disabled as per user's request. " 2131 "This may lead to a huge performance degradation!"); 2132 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2133 return 0; 2134 } 2135 2136 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 2137 if (!(ena_dev->supported_features & llq_feature_mask)) { 2138 PMD_DRV_LOG_LINE(INFO, 2139 "LLQ is not supported. Fallback to host mode policy."); 2140 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2141 return 0; 2142 } 2143 2144 if (adapter->dev_mem_base == NULL) { 2145 PMD_DRV_LOG_LINE(ERR, 2146 "LLQ is advertised as supported, but device doesn't expose mem bar"); 2147 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2148 return 0; 2149 } 2150 2151 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 2152 if (unlikely(rc)) { 2153 PMD_INIT_LOG_LINE(WARNING, 2154 "Failed to config dev mode. Fallback to host mode policy."); 2155 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2156 return 0; 2157 } 2158 2159 /* Nothing to config, exit */ 2160 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 2161 return 0; 2162 2163 ena_dev->mem_bar = adapter->dev_mem_base; 2164 2165 return 0; 2166 } 2167 2168 static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev, 2169 struct ena_com_dev_get_features_ctx *get_feat_ctx) 2170 { 2171 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 2172 2173 /* Regular queues capabilities */ 2174 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 2175 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 2176 &get_feat_ctx->max_queue_ext.max_queue_ext; 2177 io_rx_num = RTE_MIN(max_queue_ext->max_rx_sq_num, 2178 max_queue_ext->max_rx_cq_num); 2179 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 2180 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 2181 } else { 2182 struct ena_admin_queue_feature_desc *max_queues = 2183 &get_feat_ctx->max_queues; 2184 io_tx_sq_num = max_queues->max_sq_num; 2185 io_tx_cq_num = max_queues->max_cq_num; 2186 io_rx_num = RTE_MIN(io_tx_sq_num, io_tx_cq_num); 2187 } 2188 2189 /* In case of LLQ use the llq number in the get feature cmd */ 2190 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 2191 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 2192 2193 max_num_io_queues = RTE_MIN(ENA_MAX_NUM_IO_QUEUES, io_rx_num); 2194 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_sq_num); 2195 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_cq_num); 2196 2197 if (unlikely(max_num_io_queues == 0)) { 2198 PMD_DRV_LOG_LINE(ERR, "Number of IO queues cannot not be 0"); 2199 return -EFAULT; 2200 } 2201 2202 return max_num_io_queues; 2203 } 2204 2205 static void 2206 ena_set_offloads(struct ena_offloads *offloads, 2207 struct ena_admin_feature_offload_desc *offload_desc) 2208 { 2209 if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) 2210 offloads->tx_offloads |= ENA_IPV4_TSO; 2211 2212 /* Tx IPv4 checksum offloads */ 2213 if (offload_desc->tx & 2214 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) 2215 offloads->tx_offloads |= ENA_L3_IPV4_CSUM; 2216 if (offload_desc->tx & 2217 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK) 2218 offloads->tx_offloads |= ENA_L4_IPV4_CSUM; 2219 if (offload_desc->tx & 2220 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) 2221 offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL; 2222 2223 /* Tx IPv6 checksum offloads */ 2224 if (offload_desc->tx & 2225 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK) 2226 offloads->tx_offloads |= ENA_L4_IPV6_CSUM; 2227 if (offload_desc->tx & 2228 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) 2229 offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL; 2230 2231 /* Rx IPv4 checksum offloads */ 2232 if (offload_desc->rx_supported & 2233 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK) 2234 offloads->rx_offloads |= ENA_L3_IPV4_CSUM; 2235 if (offload_desc->rx_supported & 2236 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) 2237 offloads->rx_offloads |= ENA_L4_IPV4_CSUM; 2238 2239 /* Rx IPv6 checksum offloads */ 2240 if (offload_desc->rx_supported & 2241 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) 2242 offloads->rx_offloads |= ENA_L4_IPV6_CSUM; 2243 2244 if (offload_desc->rx_supported & 2245 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK) 2246 offloads->rx_offloads |= ENA_RX_RSS_HASH; 2247 } 2248 2249 static int ena_init_once(void) 2250 { 2251 static bool init_done; 2252 2253 if (init_done) 2254 return 0; 2255 2256 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 2257 /* Init timer subsystem for the ENA timer service. */ 2258 rte_timer_subsystem_init(); 2259 /* Register handler for requests from secondary processes. */ 2260 rte_mp_action_register(ENA_MP_NAME, ena_mp_primary_handle); 2261 } 2262 2263 init_done = true; 2264 return 0; 2265 } 2266 2267 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) 2268 { 2269 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 2270 struct rte_pci_device *pci_dev; 2271 struct rte_intr_handle *intr_handle; 2272 struct ena_adapter *adapter = eth_dev->data->dev_private; 2273 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2274 struct ena_com_dev_get_features_ctx get_feat_ctx; 2275 struct ena_llq_configurations llq_config; 2276 const char *queue_type_str; 2277 uint32_t max_num_io_queues; 2278 int rc; 2279 static int adapters_found; 2280 bool disable_meta_caching; 2281 2282 eth_dev->dev_ops = &ena_dev_ops; 2283 eth_dev->rx_pkt_burst = ð_ena_recv_pkts; 2284 eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; 2285 eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; 2286 2287 rc = ena_init_once(); 2288 if (rc != 0) 2289 return rc; 2290 2291 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2292 return 0; 2293 2294 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 2295 2296 memset(adapter, 0, sizeof(struct ena_adapter)); 2297 ena_dev = &adapter->ena_dev; 2298 2299 adapter->edev_data = eth_dev->data; 2300 2301 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 2302 2303 PMD_INIT_LOG_LINE(INFO, "Initializing " PCI_PRI_FMT, 2304 pci_dev->addr.domain, 2305 pci_dev->addr.bus, 2306 pci_dev->addr.devid, 2307 pci_dev->addr.function); 2308 2309 intr_handle = pci_dev->intr_handle; 2310 2311 adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr; 2312 adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr; 2313 2314 if (!adapter->regs) { 2315 PMD_INIT_LOG_LINE(CRIT, "Failed to access registers BAR(%d)", 2316 ENA_REGS_BAR); 2317 return -ENXIO; 2318 } 2319 2320 ena_dev->reg_bar = adapter->regs; 2321 /* Pass device data as a pointer which can be passed to the IO functions 2322 * by the ena_com (for example - the memory allocation). 2323 */ 2324 ena_dev->dmadev = eth_dev->data; 2325 2326 adapter->id_number = adapters_found; 2327 2328 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", 2329 adapter->id_number); 2330 2331 /* Assign default devargs values */ 2332 adapter->missing_tx_completion_to = ENA_TX_TIMEOUT; 2333 2334 /* Get user bypass */ 2335 rc = ena_parse_devargs(adapter, pci_dev->device.devargs); 2336 if (rc != 0) { 2337 PMD_INIT_LOG_LINE(CRIT, "Failed to parse devargs"); 2338 goto err; 2339 } 2340 rc = ena_com_allocate_customer_metrics_buffer(ena_dev); 2341 if (rc != 0) { 2342 PMD_INIT_LOG_LINE(CRIT, "Failed to allocate customer metrics buffer"); 2343 goto err; 2344 } 2345 2346 /* device specific initialization routine */ 2347 rc = ena_device_init(adapter, pci_dev, &get_feat_ctx); 2348 if (rc) { 2349 PMD_INIT_LOG_LINE(CRIT, "Failed to init ENA device"); 2350 goto err_metrics_delete; 2351 } 2352 2353 /* Check if device supports LSC */ 2354 if (!(adapter->all_aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) 2355 adapter->edev_data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; 2356 2357 bool use_large_llq_hdr = ena_use_large_llq_hdr(adapter, 2358 get_feat_ctx.llq.entry_size_recommended); 2359 set_default_llq_configurations(&llq_config, &get_feat_ctx.llq, use_large_llq_hdr); 2360 rc = ena_set_queues_placement_policy(adapter, ena_dev, 2361 &get_feat_ctx.llq, &llq_config); 2362 if (unlikely(rc)) { 2363 PMD_INIT_LOG_LINE(CRIT, "Failed to set placement policy"); 2364 return rc; 2365 } 2366 2367 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { 2368 queue_type_str = "Regular"; 2369 } else { 2370 queue_type_str = "Low latency"; 2371 PMD_DRV_LOG_LINE(INFO, "LLQ entry size %uB", llq_config.llq_ring_entry_size_value); 2372 } 2373 PMD_DRV_LOG_LINE(INFO, "Placement policy: %s", queue_type_str); 2374 2375 calc_queue_ctx.ena_dev = ena_dev; 2376 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 2377 2378 max_num_io_queues = ena_calc_max_io_queue_num(ena_dev, &get_feat_ctx); 2379 rc = ena_calc_io_queue_size(&calc_queue_ctx, use_large_llq_hdr); 2380 if (unlikely((rc != 0) || (max_num_io_queues == 0))) { 2381 rc = -EFAULT; 2382 goto err_device_destroy; 2383 } 2384 2385 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 2386 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 2387 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 2388 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 2389 adapter->max_num_io_queues = max_num_io_queues; 2390 2391 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2392 disable_meta_caching = 2393 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags & 2394 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 2395 } else { 2396 disable_meta_caching = false; 2397 } 2398 2399 /* prepare ring structures */ 2400 ena_init_rings(adapter, disable_meta_caching); 2401 2402 ena_config_debug_area(adapter); 2403 2404 /* Set max MTU for this device */ 2405 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 2406 2407 ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload); 2408 2409 /* Copy MAC address and point DPDK to it */ 2410 eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr; 2411 rte_ether_addr_copy((struct rte_ether_addr *) 2412 get_feat_ctx.dev_attr.mac_addr, 2413 (struct rte_ether_addr *)adapter->mac_addr); 2414 2415 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); 2416 if (unlikely(rc != 0)) { 2417 PMD_DRV_LOG_LINE(ERR, "Failed to initialize RSS in ENA device"); 2418 goto err_delete_debug_area; 2419 } 2420 2421 adapter->drv_stats = rte_zmalloc("adapter stats", 2422 sizeof(*adapter->drv_stats), 2423 RTE_CACHE_LINE_SIZE); 2424 if (!adapter->drv_stats) { 2425 PMD_DRV_LOG_LINE(ERR, 2426 "Failed to allocate memory for adapter statistics"); 2427 rc = -ENOMEM; 2428 goto err_rss_destroy; 2429 } 2430 2431 rte_spinlock_init(&adapter->admin_lock); 2432 2433 if (!adapter->control_path_poll_interval) { 2434 /* Control path interrupt mode */ 2435 rte_intr_callback_register(intr_handle, ena_control_path_handler, eth_dev); 2436 rte_intr_enable(intr_handle); 2437 ena_com_set_admin_polling_mode(ena_dev, false); 2438 } else { 2439 /* Control path polling mode */ 2440 rc = rte_eal_alarm_set(adapter->control_path_poll_interval, 2441 ena_control_path_poll_handler, eth_dev); 2442 if (unlikely(rc != 0)) { 2443 PMD_DRV_LOG_LINE(ERR, "Failed to set control path alarm"); 2444 goto err_control_path_destroy; 2445 } 2446 } 2447 ena_com_admin_aenq_enable(ena_dev); 2448 rte_timer_init(&adapter->timer_wd); 2449 2450 adapters_found++; 2451 adapter->state = ENA_ADAPTER_STATE_INIT; 2452 2453 return 0; 2454 err_control_path_destroy: 2455 rte_free(adapter->drv_stats); 2456 err_rss_destroy: 2457 ena_com_rss_destroy(ena_dev); 2458 err_delete_debug_area: 2459 ena_com_delete_debug_area(ena_dev); 2460 2461 err_device_destroy: 2462 ena_com_delete_host_info(ena_dev); 2463 ena_com_admin_destroy(ena_dev); 2464 err_metrics_delete: 2465 ena_com_delete_customer_metrics_buffer(ena_dev); 2466 err: 2467 return rc; 2468 } 2469 2470 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) 2471 { 2472 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2473 return 0; 2474 2475 ena_close(eth_dev); 2476 2477 return 0; 2478 } 2479 2480 static int ena_dev_configure(struct rte_eth_dev *dev) 2481 { 2482 struct ena_adapter *adapter = dev->data->dev_private; 2483 int rc; 2484 2485 adapter->state = ENA_ADAPTER_STATE_CONFIG; 2486 2487 if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 2488 dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2489 dev->data->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2490 2491 /* Scattered Rx cannot be turned off in the HW, so this capability must 2492 * be forced. 2493 */ 2494 dev->data->scattered_rx = 1; 2495 2496 adapter->last_tx_comp_qid = 0; 2497 2498 adapter->missing_tx_completion_budget = 2499 RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues); 2500 2501 /* To avoid detection of the spurious Tx completion timeout due to 2502 * application not calling the Tx cleanup function, set timeout for the 2503 * Tx queue which should be half of the missing completion timeout for a 2504 * safety. If there will be a lot of missing Tx completions in the 2505 * queue, they will be detected sooner or later. 2506 */ 2507 adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2; 2508 2509 rc = ena_configure_aenq(adapter); 2510 2511 return rc; 2512 } 2513 2514 static void ena_init_rings(struct ena_adapter *adapter, 2515 bool disable_meta_caching) 2516 { 2517 size_t i; 2518 2519 for (i = 0; i < adapter->max_num_io_queues; i++) { 2520 struct ena_ring *ring = &adapter->tx_ring[i]; 2521 2522 ring->configured = 0; 2523 ring->type = ENA_RING_TYPE_TX; 2524 ring->adapter = adapter; 2525 ring->id = i; 2526 ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type; 2527 ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size; 2528 ring->sgl_size = adapter->max_tx_sgl_size; 2529 ring->disable_meta_caching = disable_meta_caching; 2530 } 2531 2532 for (i = 0; i < adapter->max_num_io_queues; i++) { 2533 struct ena_ring *ring = &adapter->rx_ring[i]; 2534 2535 ring->configured = 0; 2536 ring->type = ENA_RING_TYPE_RX; 2537 ring->adapter = adapter; 2538 ring->id = i; 2539 ring->sgl_size = adapter->max_rx_sgl_size; 2540 } 2541 } 2542 2543 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter) 2544 { 2545 uint64_t port_offloads = 0; 2546 2547 if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM) 2548 port_offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; 2549 2550 if (adapter->offloads.rx_offloads & 2551 (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM)) 2552 port_offloads |= 2553 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM; 2554 2555 if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH) 2556 port_offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2557 2558 port_offloads |= RTE_ETH_RX_OFFLOAD_SCATTER; 2559 2560 return port_offloads; 2561 } 2562 2563 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter) 2564 { 2565 uint64_t port_offloads = 0; 2566 2567 if (adapter->offloads.tx_offloads & ENA_IPV4_TSO) 2568 port_offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 2569 2570 if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM) 2571 port_offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; 2572 if (adapter->offloads.tx_offloads & 2573 (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM | 2574 ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL)) 2575 port_offloads |= 2576 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM; 2577 2578 port_offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2579 2580 port_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2581 2582 return port_offloads; 2583 } 2584 2585 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter) 2586 { 2587 RTE_SET_USED(adapter); 2588 2589 return 0; 2590 } 2591 2592 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter) 2593 { 2594 uint64_t queue_offloads = 0; 2595 RTE_SET_USED(adapter); 2596 2597 queue_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2598 2599 return queue_offloads; 2600 } 2601 2602 static int ena_infos_get(struct rte_eth_dev *dev, 2603 struct rte_eth_dev_info *dev_info) 2604 { 2605 struct ena_adapter *adapter; 2606 struct ena_com_dev *ena_dev; 2607 2608 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 2609 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 2610 adapter = dev->data->dev_private; 2611 2612 ena_dev = &adapter->ena_dev; 2613 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 2614 2615 dev_info->speed_capa = 2616 RTE_ETH_LINK_SPEED_1G | 2617 RTE_ETH_LINK_SPEED_2_5G | 2618 RTE_ETH_LINK_SPEED_5G | 2619 RTE_ETH_LINK_SPEED_10G | 2620 RTE_ETH_LINK_SPEED_25G | 2621 RTE_ETH_LINK_SPEED_40G | 2622 RTE_ETH_LINK_SPEED_50G | 2623 RTE_ETH_LINK_SPEED_100G | 2624 RTE_ETH_LINK_SPEED_200G | 2625 RTE_ETH_LINK_SPEED_400G; 2626 2627 /* Inform framework about available features */ 2628 dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter); 2629 dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter); 2630 dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter); 2631 dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter); 2632 2633 dev_info->flow_type_rss_offloads = ENA_ALL_RSS_HF; 2634 dev_info->hash_key_size = ENA_HASH_KEY_SIZE; 2635 2636 dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN; 2637 dev_info->max_rx_pktlen = adapter->max_mtu + RTE_ETHER_HDR_LEN + 2638 RTE_ETHER_CRC_LEN; 2639 dev_info->min_mtu = ENA_MIN_MTU; 2640 dev_info->max_mtu = adapter->max_mtu; 2641 dev_info->max_mac_addrs = 1; 2642 2643 dev_info->max_rx_queues = adapter->max_num_io_queues; 2644 dev_info->max_tx_queues = adapter->max_num_io_queues; 2645 dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE; 2646 2647 dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size; 2648 dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2649 dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2650 adapter->max_rx_sgl_size); 2651 dev_info->rx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2652 adapter->max_rx_sgl_size); 2653 2654 dev_info->tx_desc_lim.nb_max = adapter->max_tx_ring_size; 2655 dev_info->tx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2656 dev_info->tx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2657 adapter->max_tx_sgl_size); 2658 dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2659 adapter->max_tx_sgl_size); 2660 2661 dev_info->default_rxportconf.ring_size = RTE_MIN(ENA_DEFAULT_RING_SIZE, 2662 dev_info->rx_desc_lim.nb_max); 2663 dev_info->default_txportconf.ring_size = RTE_MIN(ENA_DEFAULT_RING_SIZE, 2664 dev_info->tx_desc_lim.nb_max); 2665 2666 dev_info->err_handle_mode = RTE_ETH_ERROR_HANDLE_MODE_PASSIVE; 2667 2668 return 0; 2669 } 2670 2671 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len) 2672 { 2673 mbuf->data_len = len; 2674 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 2675 mbuf->refcnt = 1; 2676 mbuf->next = NULL; 2677 } 2678 2679 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 2680 struct ena_com_rx_buf_info *ena_bufs, 2681 uint32_t descs, 2682 uint16_t *next_to_clean, 2683 uint8_t offset) 2684 { 2685 struct rte_mbuf *mbuf; 2686 struct rte_mbuf *mbuf_head; 2687 struct ena_rx_buffer *rx_info; 2688 int rc; 2689 uint16_t ntc, len, req_id, buf = 0; 2690 2691 if (unlikely(descs == 0)) 2692 return NULL; 2693 2694 ntc = *next_to_clean; 2695 2696 len = ena_bufs[buf].len; 2697 req_id = ena_bufs[buf].req_id; 2698 2699 rx_info = &rx_ring->rx_buffer_info[req_id]; 2700 2701 mbuf = rx_info->mbuf; 2702 RTE_ASSERT(mbuf != NULL); 2703 2704 ena_init_rx_mbuf(mbuf, len); 2705 2706 /* Fill the mbuf head with the data specific for 1st segment. */ 2707 mbuf_head = mbuf; 2708 mbuf_head->nb_segs = descs; 2709 mbuf_head->port = rx_ring->port_id; 2710 mbuf_head->pkt_len = len; 2711 mbuf_head->data_off += offset; 2712 2713 rx_info->mbuf = NULL; 2714 rx_ring->empty_rx_reqs[ntc] = req_id; 2715 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2716 2717 while (--descs) { 2718 ++buf; 2719 len = ena_bufs[buf].len; 2720 req_id = ena_bufs[buf].req_id; 2721 2722 rx_info = &rx_ring->rx_buffer_info[req_id]; 2723 RTE_ASSERT(rx_info->mbuf != NULL); 2724 2725 if (unlikely(len == 0)) { 2726 /* 2727 * Some devices can pass descriptor with the length 0. 2728 * To avoid confusion, the PMD is simply putting the 2729 * descriptor back, as it was never used. We'll avoid 2730 * mbuf allocation that way. 2731 */ 2732 rc = ena_add_single_rx_desc(rx_ring->ena_com_io_sq, 2733 rx_info->mbuf, req_id); 2734 if (unlikely(rc != 0)) { 2735 /* Free the mbuf in case of an error. */ 2736 rte_mbuf_raw_free(rx_info->mbuf); 2737 } else { 2738 /* 2739 * If there was no error, just exit the loop as 2740 * 0 length descriptor is always the last one. 2741 */ 2742 break; 2743 } 2744 } else { 2745 /* Create an mbuf chain. */ 2746 mbuf->next = rx_info->mbuf; 2747 mbuf = mbuf->next; 2748 2749 ena_init_rx_mbuf(mbuf, len); 2750 mbuf_head->pkt_len += len; 2751 } 2752 2753 /* 2754 * Mark the descriptor as depleted and perform necessary 2755 * cleanup. 2756 * This code will execute in two cases: 2757 * 1. Descriptor len was greater than 0 - normal situation. 2758 * 2. Descriptor len was 0 and we failed to add the descriptor 2759 * to the device. In that situation, we should try to add 2760 * the mbuf again in the populate routine and mark the 2761 * descriptor as used up by the device. 2762 */ 2763 rx_info->mbuf = NULL; 2764 rx_ring->empty_rx_reqs[ntc] = req_id; 2765 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2766 } 2767 2768 *next_to_clean = ntc; 2769 2770 return mbuf_head; 2771 } 2772 2773 static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 2774 uint16_t nb_pkts) 2775 { 2776 struct ena_ring *rx_ring = (struct ena_ring *)(rx_queue); 2777 unsigned int free_queue_entries; 2778 uint16_t next_to_clean = rx_ring->next_to_clean; 2779 enum ena_regs_reset_reason_types reset_reason; 2780 uint16_t descs_in_use; 2781 struct rte_mbuf *mbuf; 2782 uint16_t completed; 2783 struct ena_com_rx_ctx ena_rx_ctx; 2784 int i, rc = 0; 2785 2786 #ifdef RTE_ETHDEV_DEBUG_RX 2787 /* Check adapter state */ 2788 if (unlikely(rx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 2789 PMD_RX_LOG_LINE(ALERT, 2790 "Trying to receive pkts while device is NOT running"); 2791 return 0; 2792 } 2793 #endif 2794 2795 descs_in_use = rx_ring->ring_size - 2796 ena_com_free_q_entries(rx_ring->ena_com_io_sq) - 1; 2797 nb_pkts = RTE_MIN(descs_in_use, nb_pkts); 2798 2799 for (completed = 0; completed < nb_pkts; completed++) { 2800 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 2801 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 2802 ena_rx_ctx.descs = 0; 2803 ena_rx_ctx.pkt_offset = 0; 2804 /* receive packet context */ 2805 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, 2806 rx_ring->ena_com_io_sq, 2807 &ena_rx_ctx); 2808 if (unlikely(rc)) { 2809 PMD_RX_LOG_LINE(ERR, 2810 "Failed to get the packet from the device, rc: %d", 2811 rc); 2812 switch (rc) { 2813 case ENA_COM_NO_SPACE: 2814 ++rx_ring->rx_stats.bad_desc_num; 2815 reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS; 2816 break; 2817 case ENA_COM_FAULT: 2818 ++rx_ring->rx_stats.bad_desc; 2819 reset_reason = ENA_REGS_RESET_RX_DESCRIPTOR_MALFORMED; 2820 break; 2821 case ENA_COM_EIO: 2822 ++rx_ring->rx_stats.bad_req_id; 2823 reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID; 2824 break; 2825 default: 2826 ++rx_ring->rx_stats.unknown_error; 2827 reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE; 2828 break; 2829 } 2830 ena_trigger_reset(rx_ring->adapter, reset_reason); 2831 return 0; 2832 } 2833 2834 mbuf = ena_rx_mbuf(rx_ring, 2835 ena_rx_ctx.ena_bufs, 2836 ena_rx_ctx.descs, 2837 &next_to_clean, 2838 ena_rx_ctx.pkt_offset); 2839 if (unlikely(mbuf == NULL)) { 2840 for (i = 0; i < ena_rx_ctx.descs; ++i) { 2841 rx_ring->empty_rx_reqs[next_to_clean] = 2842 rx_ring->ena_bufs[i].req_id; 2843 next_to_clean = ENA_IDX_NEXT_MASKED( 2844 next_to_clean, rx_ring->size_mask); 2845 } 2846 break; 2847 } 2848 2849 /* fill mbuf attributes if any */ 2850 ena_rx_mbuf_prepare(rx_ring, mbuf, &ena_rx_ctx); 2851 2852 if (unlikely(mbuf->ol_flags & 2853 (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD))) 2854 rte_atomic64_inc(&rx_ring->adapter->drv_stats->ierrors); 2855 2856 rx_pkts[completed] = mbuf; 2857 rx_ring->rx_stats.bytes += mbuf->pkt_len; 2858 } 2859 2860 rx_ring->rx_stats.cnt += completed; 2861 rx_ring->next_to_clean = next_to_clean; 2862 2863 free_queue_entries = ena_com_free_q_entries(rx_ring->ena_com_io_sq); 2864 2865 /* Burst refill to save doorbells, memory barriers, const interval */ 2866 if (free_queue_entries >= rx_ring->rx_free_thresh) { 2867 ena_populate_rx_queue(rx_ring, free_queue_entries); 2868 } 2869 2870 return completed; 2871 } 2872 2873 static uint16_t 2874 eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 2875 uint16_t nb_pkts) 2876 { 2877 int32_t ret; 2878 uint32_t i; 2879 struct rte_mbuf *m; 2880 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 2881 struct ena_adapter *adapter = tx_ring->adapter; 2882 struct rte_ipv4_hdr *ip_hdr; 2883 uint64_t ol_flags; 2884 uint64_t l4_csum_flag; 2885 uint64_t dev_offload_capa; 2886 uint16_t frag_field; 2887 bool need_pseudo_csum; 2888 2889 dev_offload_capa = adapter->offloads.tx_offloads; 2890 for (i = 0; i != nb_pkts; i++) { 2891 m = tx_pkts[i]; 2892 ol_flags = m->ol_flags; 2893 2894 /* Check if any offload flag was set */ 2895 if (ol_flags == 0) 2896 continue; 2897 2898 l4_csum_flag = ol_flags & RTE_MBUF_F_TX_L4_MASK; 2899 /* SCTP checksum offload is not supported by the ENA. */ 2900 if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) || 2901 l4_csum_flag == RTE_MBUF_F_TX_SCTP_CKSUM) { 2902 PMD_TX_LOG_LINE(DEBUG, 2903 "mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64, 2904 i, ol_flags); 2905 rte_errno = ENOTSUP; 2906 return i; 2907 } 2908 2909 if (unlikely(m->nb_segs >= tx_ring->sgl_size && 2910 !(tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && 2911 m->nb_segs == tx_ring->sgl_size && 2912 m->data_len < tx_ring->tx_max_header_size))) { 2913 PMD_TX_LOG_LINE(DEBUG, 2914 "mbuf[%" PRIu32 "] has too many segments: %" PRIu16, 2915 i, m->nb_segs); 2916 rte_errno = EINVAL; 2917 return i; 2918 } 2919 2920 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 2921 /* Check if requested offload is also enabled for the queue */ 2922 if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2923 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) || 2924 (l4_csum_flag == RTE_MBUF_F_TX_TCP_CKSUM && 2925 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) || 2926 (l4_csum_flag == RTE_MBUF_F_TX_UDP_CKSUM && 2927 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM))) { 2928 PMD_TX_LOG_LINE(DEBUG, 2929 "mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]", 2930 i, m->nb_segs, tx_ring->id); 2931 rte_errno = EINVAL; 2932 return i; 2933 } 2934 2935 /* The caller is obligated to set l2 and l3 len if any cksum 2936 * offload is enabled. 2937 */ 2938 if (unlikely(ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK) && 2939 (m->l2_len == 0 || m->l3_len == 0))) { 2940 PMD_TX_LOG_LINE(DEBUG, 2941 "mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested", 2942 i); 2943 rte_errno = EINVAL; 2944 return i; 2945 } 2946 ret = rte_validate_tx_offload(m); 2947 if (ret != 0) { 2948 rte_errno = -ret; 2949 return i; 2950 } 2951 #endif 2952 2953 /* Verify HW support for requested offloads and determine if 2954 * pseudo header checksum is needed. 2955 */ 2956 need_pseudo_csum = false; 2957 if (ol_flags & RTE_MBUF_F_TX_IPV4) { 2958 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2959 !(dev_offload_capa & ENA_L3_IPV4_CSUM)) { 2960 rte_errno = ENOTSUP; 2961 return i; 2962 } 2963 2964 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG && 2965 !(dev_offload_capa & ENA_IPV4_TSO)) { 2966 rte_errno = ENOTSUP; 2967 return i; 2968 } 2969 2970 /* Check HW capabilities and if pseudo csum is needed 2971 * for L4 offloads. 2972 */ 2973 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2974 !(dev_offload_capa & ENA_L4_IPV4_CSUM)) { 2975 if (dev_offload_capa & 2976 ENA_L4_IPV4_CSUM_PARTIAL) { 2977 need_pseudo_csum = true; 2978 } else { 2979 rte_errno = ENOTSUP; 2980 return i; 2981 } 2982 } 2983 2984 /* Parse the DF flag */ 2985 ip_hdr = rte_pktmbuf_mtod_offset(m, 2986 struct rte_ipv4_hdr *, m->l2_len); 2987 frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset); 2988 if (frag_field & RTE_IPV4_HDR_DF_FLAG) { 2989 m->packet_type |= RTE_PTYPE_L4_NONFRAG; 2990 } else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2991 /* In case we are supposed to TSO and have DF 2992 * not set (DF=0) hardware must be provided with 2993 * partial checksum. 2994 */ 2995 need_pseudo_csum = true; 2996 } 2997 } else if (ol_flags & RTE_MBUF_F_TX_IPV6) { 2998 /* There is no support for IPv6 TSO as for now. */ 2999 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 3000 rte_errno = ENOTSUP; 3001 return i; 3002 } 3003 3004 /* Check HW capabilities and if pseudo csum is needed */ 3005 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 3006 !(dev_offload_capa & ENA_L4_IPV6_CSUM)) { 3007 if (dev_offload_capa & 3008 ENA_L4_IPV6_CSUM_PARTIAL) { 3009 need_pseudo_csum = true; 3010 } else { 3011 rte_errno = ENOTSUP; 3012 return i; 3013 } 3014 } 3015 } 3016 3017 if (need_pseudo_csum) { 3018 ret = rte_net_intel_cksum_flags_prepare(m, ol_flags); 3019 if (ret != 0) { 3020 rte_errno = -ret; 3021 return i; 3022 } 3023 } 3024 } 3025 3026 return i; 3027 } 3028 3029 static void ena_update_hints(struct ena_adapter *adapter, 3030 struct ena_admin_ena_hw_hints *hints) 3031 { 3032 if (hints->admin_completion_tx_timeout) 3033 adapter->ena_dev.admin_queue.completion_timeout = 3034 hints->admin_completion_tx_timeout * 1000; 3035 3036 if (hints->mmio_read_timeout) 3037 /* convert to usec */ 3038 adapter->ena_dev.mmio_read.reg_read_to = 3039 hints->mmio_read_timeout * 1000; 3040 3041 if (hints->driver_watchdog_timeout) { 3042 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3043 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 3044 else 3045 // Convert msecs to ticks 3046 adapter->keep_alive_timeout = 3047 (hints->driver_watchdog_timeout * 3048 rte_get_timer_hz()) / 1000; 3049 } 3050 } 3051 3052 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 3053 struct ena_tx_buffer *tx_info, 3054 struct rte_mbuf *mbuf, 3055 void **push_header, 3056 uint16_t *header_len) 3057 { 3058 struct ena_com_buf *ena_buf; 3059 uint16_t delta, seg_len, push_len; 3060 3061 delta = 0; 3062 seg_len = mbuf->data_len; 3063 3064 tx_info->mbuf = mbuf; 3065 ena_buf = tx_info->bufs; 3066 3067 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 3068 /* 3069 * Tx header might be (and will be in most cases) smaller than 3070 * tx_max_header_size. But it's not an issue to send more data 3071 * to the device, than actually needed if the mbuf size is 3072 * greater than tx_max_header_size. 3073 */ 3074 push_len = RTE_MIN(mbuf->pkt_len, tx_ring->tx_max_header_size); 3075 *header_len = push_len; 3076 3077 if (likely(push_len <= seg_len)) { 3078 /* If the push header is in the single segment, then 3079 * just point it to the 1st mbuf data. 3080 */ 3081 *push_header = rte_pktmbuf_mtod(mbuf, uint8_t *); 3082 } else { 3083 /* If the push header lays in the several segments, copy 3084 * it to the intermediate buffer. 3085 */ 3086 rte_pktmbuf_read(mbuf, 0, push_len, 3087 tx_ring->push_buf_intermediate_buf); 3088 *push_header = tx_ring->push_buf_intermediate_buf; 3089 delta = push_len - seg_len; 3090 } 3091 } else { 3092 *push_header = NULL; 3093 *header_len = 0; 3094 push_len = 0; 3095 } 3096 3097 /* Process first segment taking into consideration pushed header */ 3098 if (seg_len > push_len) { 3099 ena_buf->paddr = mbuf->buf_iova + 3100 mbuf->data_off + 3101 push_len; 3102 ena_buf->len = seg_len - push_len; 3103 ena_buf++; 3104 tx_info->num_of_bufs++; 3105 } 3106 3107 while ((mbuf = mbuf->next) != NULL) { 3108 seg_len = mbuf->data_len; 3109 3110 /* Skip mbufs if whole data is pushed as a header */ 3111 if (unlikely(delta > seg_len)) { 3112 delta -= seg_len; 3113 continue; 3114 } 3115 3116 ena_buf->paddr = mbuf->buf_iova + mbuf->data_off + delta; 3117 ena_buf->len = seg_len - delta; 3118 ena_buf++; 3119 tx_info->num_of_bufs++; 3120 3121 delta = 0; 3122 } 3123 } 3124 3125 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) 3126 { 3127 struct ena_tx_buffer *tx_info; 3128 struct ena_com_tx_ctx ena_tx_ctx = { { 0 } }; 3129 uint16_t next_to_use; 3130 uint16_t header_len; 3131 uint16_t req_id; 3132 void *push_header; 3133 int nb_hw_desc; 3134 int rc; 3135 3136 /* Checking for space for 2 additional metadata descriptors due to 3137 * possible header split and metadata descriptor 3138 */ 3139 if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 3140 mbuf->nb_segs + 2)) { 3141 PMD_TX_LOG_LINE(DEBUG, "Not enough space in the tx queue"); 3142 return ENA_COM_NO_MEM; 3143 } 3144 3145 next_to_use = tx_ring->next_to_use; 3146 3147 req_id = tx_ring->empty_tx_reqs[next_to_use]; 3148 tx_info = &tx_ring->tx_buffer_info[req_id]; 3149 tx_info->num_of_bufs = 0; 3150 RTE_ASSERT(tx_info->mbuf == NULL); 3151 3152 ena_tx_map_mbuf(tx_ring, tx_info, mbuf, &push_header, &header_len); 3153 3154 ena_tx_ctx.ena_bufs = tx_info->bufs; 3155 ena_tx_ctx.push_header = push_header; 3156 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 3157 ena_tx_ctx.req_id = req_id; 3158 ena_tx_ctx.header_len = header_len; 3159 3160 /* Set Tx offloads flags, if applicable */ 3161 ena_tx_mbuf_prepare(mbuf, &ena_tx_ctx, tx_ring->offloads, 3162 tx_ring->disable_meta_caching); 3163 3164 if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, 3165 &ena_tx_ctx))) { 3166 PMD_TX_LOG_LINE(DEBUG, 3167 "LLQ Tx max burst size of queue %d achieved, writing doorbell to send burst", 3168 tx_ring->id); 3169 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3170 tx_ring->tx_stats.doorbells++; 3171 tx_ring->pkts_without_db = false; 3172 } 3173 3174 /* prepare the packet's descriptors to dma engine */ 3175 rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, 3176 &nb_hw_desc); 3177 if (unlikely(rc)) { 3178 PMD_DRV_LOG_LINE(ERR, "Failed to prepare Tx buffers, rc: %d", rc); 3179 ++tx_ring->tx_stats.prepare_ctx_err; 3180 ena_trigger_reset(tx_ring->adapter, 3181 ENA_REGS_RESET_DRIVER_INVALID_STATE); 3182 return rc; 3183 } 3184 3185 tx_info->tx_descs = nb_hw_desc; 3186 tx_info->timestamp = rte_get_timer_cycles(); 3187 3188 tx_ring->tx_stats.cnt++; 3189 tx_ring->tx_stats.bytes += mbuf->pkt_len; 3190 3191 tx_ring->next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, 3192 tx_ring->size_mask); 3193 3194 return 0; 3195 } 3196 3197 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt) 3198 { 3199 struct rte_mbuf *pkts_to_clean[ENA_CLEANUP_BUF_THRESH]; 3200 struct ena_ring *tx_ring = (struct ena_ring *)txp; 3201 size_t mbuf_cnt = 0; 3202 size_t pkt_cnt = 0; 3203 unsigned int total_tx_descs = 0; 3204 unsigned int total_tx_pkts = 0; 3205 uint16_t cleanup_budget; 3206 uint16_t next_to_clean = tx_ring->next_to_clean; 3207 bool fast_free = tx_ring->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 3208 3209 /* 3210 * If free_pkt_cnt is equal to 0, it means that the user requested 3211 * full cleanup, so attempt to release all Tx descriptors 3212 * (ring_size - 1 -> size_mask) 3213 */ 3214 cleanup_budget = (free_pkt_cnt == 0) ? tx_ring->size_mask : free_pkt_cnt; 3215 3216 while (likely(total_tx_pkts < cleanup_budget)) { 3217 struct rte_mbuf *mbuf; 3218 struct ena_tx_buffer *tx_info; 3219 uint16_t req_id; 3220 3221 if (ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id) != 0) 3222 break; 3223 3224 if (unlikely(validate_tx_req_id(tx_ring, req_id) != 0)) 3225 break; 3226 3227 /* Get Tx info & store how many descs were processed */ 3228 tx_info = &tx_ring->tx_buffer_info[req_id]; 3229 tx_info->timestamp = 0; 3230 3231 mbuf = tx_info->mbuf; 3232 if (fast_free) { 3233 pkts_to_clean[pkt_cnt++] = mbuf; 3234 mbuf_cnt += mbuf->nb_segs; 3235 if (mbuf_cnt >= ENA_CLEANUP_BUF_THRESH) { 3236 rte_pktmbuf_free_bulk(pkts_to_clean, pkt_cnt); 3237 mbuf_cnt = 0; 3238 pkt_cnt = 0; 3239 } 3240 } else { 3241 rte_pktmbuf_free(mbuf); 3242 } 3243 3244 tx_info->mbuf = NULL; 3245 tx_ring->empty_tx_reqs[next_to_clean] = req_id; 3246 3247 total_tx_descs += tx_info->tx_descs; 3248 total_tx_pkts++; 3249 3250 /* Put back descriptor to the ring for reuse */ 3251 next_to_clean = ENA_IDX_NEXT_MASKED(next_to_clean, 3252 tx_ring->size_mask); 3253 } 3254 3255 if (likely(total_tx_descs > 0)) { 3256 /* acknowledge completion of sent packets */ 3257 tx_ring->next_to_clean = next_to_clean; 3258 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs); 3259 } 3260 3261 if (mbuf_cnt != 0) 3262 rte_pktmbuf_free_bulk(pkts_to_clean, pkt_cnt); 3263 3264 /* Notify completion handler that full cleanup was performed */ 3265 if (free_pkt_cnt == 0 || total_tx_pkts < cleanup_budget) 3266 tx_ring->last_cleanup_ticks = rte_get_timer_cycles(); 3267 3268 return total_tx_pkts; 3269 } 3270 3271 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 3272 uint16_t nb_pkts) 3273 { 3274 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 3275 int available_desc; 3276 uint16_t sent_idx = 0; 3277 3278 #ifdef RTE_ETHDEV_DEBUG_TX 3279 /* Check adapter state */ 3280 if (unlikely(tx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 3281 PMD_TX_LOG_LINE(ALERT, 3282 "Trying to xmit pkts while device is NOT running"); 3283 return 0; 3284 } 3285 #endif 3286 3287 available_desc = ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3288 if (available_desc < tx_ring->tx_free_thresh) 3289 ena_tx_cleanup((void *)tx_ring, 0); 3290 3291 for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) { 3292 if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx])) 3293 break; 3294 tx_ring->pkts_without_db = true; 3295 rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4, 3296 tx_ring->size_mask)]); 3297 } 3298 3299 /* If there are ready packets to be xmitted... */ 3300 if (likely(tx_ring->pkts_without_db)) { 3301 /* ...let HW do its best :-) */ 3302 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3303 tx_ring->tx_stats.doorbells++; 3304 tx_ring->pkts_without_db = false; 3305 } 3306 3307 tx_ring->tx_stats.available_desc = 3308 ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3309 tx_ring->tx_stats.tx_poll++; 3310 3311 return sent_idx; 3312 } 3313 3314 static void ena_copy_customer_metrics(struct ena_adapter *adapter, uint64_t *buf, 3315 size_t num_metrics) 3316 { 3317 struct ena_com_dev *ena_dev = &adapter->ena_dev; 3318 int rc; 3319 3320 if (ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) { 3321 if (num_metrics != ENA_STATS_ARRAY_METRICS) { 3322 PMD_DRV_LOG_LINE(ERR, "Detected discrepancy in the number of customer metrics"); 3323 return; 3324 } 3325 rte_spinlock_lock(&adapter->admin_lock); 3326 rc = ENA_PROXY(adapter, 3327 ena_com_get_customer_metrics, 3328 &adapter->ena_dev, 3329 (char *)buf, 3330 num_metrics * sizeof(uint64_t)); 3331 rte_spinlock_unlock(&adapter->admin_lock); 3332 if (rc != 0) { 3333 PMD_DRV_LOG_LINE(WARNING, "Failed to get customer metrics, rc: %d", rc); 3334 return; 3335 } 3336 3337 } else if (ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) { 3338 if (num_metrics != ENA_STATS_ARRAY_METRICS_LEGACY) { 3339 PMD_DRV_LOG_LINE(ERR, "Detected discrepancy in the number of legacy metrics"); 3340 return; 3341 } 3342 3343 rte_spinlock_lock(&adapter->admin_lock); 3344 rc = ENA_PROXY(adapter, 3345 ena_com_get_eni_stats, 3346 &adapter->ena_dev, 3347 (struct ena_admin_eni_stats *)buf); 3348 rte_spinlock_unlock(&adapter->admin_lock); 3349 if (rc != 0) { 3350 PMD_DRV_LOG_LINE(WARNING, 3351 "Failed to get ENI metrics, rc: %d", rc); 3352 return; 3353 } 3354 } 3355 } 3356 3357 static void ena_copy_ena_srd_info(struct ena_adapter *adapter, 3358 struct ena_stats_srd *srd_info) 3359 { 3360 int rc; 3361 3362 if (!ena_com_get_cap(&adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO)) 3363 return; 3364 3365 rte_spinlock_lock(&adapter->admin_lock); 3366 rc = ENA_PROXY(adapter, 3367 ena_com_get_ena_srd_info, 3368 &adapter->ena_dev, 3369 (struct ena_admin_ena_srd_info *)srd_info); 3370 rte_spinlock_unlock(&adapter->admin_lock); 3371 if (rc != ENA_COM_OK && rc != ENA_COM_UNSUPPORTED) { 3372 PMD_DRV_LOG_LINE(WARNING, 3373 "Failed to get ENA express srd info, rc: %d", rc); 3374 return; 3375 } 3376 } 3377 3378 /** 3379 * DPDK callback to retrieve names of extended device statistics 3380 * 3381 * @param dev 3382 * Pointer to Ethernet device structure. 3383 * @param[out] xstats_names 3384 * Buffer to insert names into. 3385 * @param n 3386 * Number of names. 3387 * 3388 * @return 3389 * Number of xstats names. 3390 */ 3391 static int ena_xstats_get_names(struct rte_eth_dev *dev, 3392 struct rte_eth_xstat_name *xstats_names, 3393 unsigned int n) 3394 { 3395 struct ena_adapter *adapter = dev->data->dev_private; 3396 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3397 unsigned int stat, i, count = 0; 3398 3399 if (n < xstats_count || !xstats_names) 3400 return xstats_count; 3401 3402 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) 3403 strcpy(xstats_names[count].name, 3404 ena_stats_global_strings[stat].name); 3405 3406 for (stat = 0; stat < adapter->metrics_num; stat++, count++) 3407 rte_strscpy(xstats_names[count].name, 3408 ena_stats_metrics_strings[stat].name, 3409 RTE_ETH_XSTATS_NAME_SIZE); 3410 for (stat = 0; stat < ENA_STATS_ARRAY_ENA_SRD; stat++, count++) 3411 rte_strscpy(xstats_names[count].name, 3412 ena_stats_srd_strings[stat].name, 3413 RTE_ETH_XSTATS_NAME_SIZE); 3414 3415 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) 3416 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) 3417 snprintf(xstats_names[count].name, 3418 sizeof(xstats_names[count].name), 3419 "rx_q%d_%s", i, 3420 ena_stats_rx_strings[stat].name); 3421 3422 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) 3423 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) 3424 snprintf(xstats_names[count].name, 3425 sizeof(xstats_names[count].name), 3426 "tx_q%d_%s", i, 3427 ena_stats_tx_strings[stat].name); 3428 3429 return xstats_count; 3430 } 3431 3432 /** 3433 * DPDK callback to retrieve names of extended device statistics for the given 3434 * ids. 3435 * 3436 * @param dev 3437 * Pointer to Ethernet device structure. 3438 * @param[out] xstats_names 3439 * Buffer to insert names into. 3440 * @param ids 3441 * IDs array for which the names should be retrieved. 3442 * @param size 3443 * Number of ids. 3444 * 3445 * @return 3446 * Positive value: number of xstats names. Negative value: error code. 3447 */ 3448 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 3449 const uint64_t *ids, 3450 struct rte_eth_xstat_name *xstats_names, 3451 unsigned int size) 3452 { 3453 struct ena_adapter *adapter = dev->data->dev_private; 3454 uint64_t xstats_count = ena_xstats_calc_num(dev->data); 3455 uint64_t id, qid; 3456 unsigned int i; 3457 3458 if (xstats_names == NULL) 3459 return xstats_count; 3460 3461 for (i = 0; i < size; ++i) { 3462 id = ids[i]; 3463 if (id > xstats_count) { 3464 PMD_DRV_LOG_LINE(ERR, 3465 "ID value out of range: id=%" PRIu64 ", xstats_num=%" PRIu64, 3466 id, xstats_count); 3467 return -EINVAL; 3468 } 3469 3470 if (id < ENA_STATS_ARRAY_GLOBAL) { 3471 strcpy(xstats_names[i].name, 3472 ena_stats_global_strings[id].name); 3473 continue; 3474 } 3475 3476 id -= ENA_STATS_ARRAY_GLOBAL; 3477 if (id < adapter->metrics_num) { 3478 rte_strscpy(xstats_names[i].name, 3479 ena_stats_metrics_strings[id].name, 3480 RTE_ETH_XSTATS_NAME_SIZE); 3481 continue; 3482 } 3483 3484 id -= adapter->metrics_num; 3485 3486 if (id < ENA_STATS_ARRAY_ENA_SRD) { 3487 rte_strscpy(xstats_names[i].name, 3488 ena_stats_srd_strings[id].name, 3489 RTE_ETH_XSTATS_NAME_SIZE); 3490 continue; 3491 } 3492 id -= ENA_STATS_ARRAY_ENA_SRD; 3493 3494 if (id < ENA_STATS_ARRAY_RX) { 3495 qid = id / dev->data->nb_rx_queues; 3496 id %= dev->data->nb_rx_queues; 3497 snprintf(xstats_names[i].name, 3498 sizeof(xstats_names[i].name), 3499 "rx_q%" PRIu64 "d_%s", 3500 qid, ena_stats_rx_strings[id].name); 3501 continue; 3502 } 3503 3504 id -= ENA_STATS_ARRAY_RX; 3505 /* Although this condition is not needed, it was added for 3506 * compatibility if new xstat structure would be ever added. 3507 */ 3508 if (id < ENA_STATS_ARRAY_TX) { 3509 qid = id / dev->data->nb_tx_queues; 3510 id %= dev->data->nb_tx_queues; 3511 snprintf(xstats_names[i].name, 3512 sizeof(xstats_names[i].name), 3513 "tx_q%" PRIu64 "_%s", 3514 qid, ena_stats_tx_strings[id].name); 3515 continue; 3516 } 3517 } 3518 3519 return i; 3520 } 3521 3522 /** 3523 * DPDK callback to get extended device statistics. 3524 * 3525 * @param dev 3526 * Pointer to Ethernet device structure. 3527 * @param[out] stats 3528 * Stats table output buffer. 3529 * @param n 3530 * The size of the stats table. 3531 * 3532 * @return 3533 * Number of xstats on success, negative on failure. 3534 */ 3535 static int ena_xstats_get(struct rte_eth_dev *dev, 3536 struct rte_eth_xstat *xstats, 3537 unsigned int n) 3538 { 3539 struct ena_adapter *adapter = dev->data->dev_private; 3540 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3541 unsigned int stat, i, count = 0; 3542 int stat_offset; 3543 void *stats_begin; 3544 uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS]; 3545 struct ena_stats_srd srd_info = {0}; 3546 3547 if (n < xstats_count) 3548 return xstats_count; 3549 3550 if (!xstats) 3551 return 0; 3552 3553 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) { 3554 stat_offset = ena_stats_global_strings[stat].stat_offset; 3555 stats_begin = &adapter->dev_stats; 3556 3557 xstats[count].id = count; 3558 xstats[count].value = *((uint64_t *) 3559 ((char *)stats_begin + stat_offset)); 3560 } 3561 3562 ena_copy_customer_metrics(adapter, metrics_stats, adapter->metrics_num); 3563 stats_begin = metrics_stats; 3564 for (stat = 0; stat < adapter->metrics_num; stat++, count++) { 3565 stat_offset = ena_stats_metrics_strings[stat].stat_offset; 3566 3567 xstats[count].id = count; 3568 xstats[count].value = *((uint64_t *) 3569 ((char *)stats_begin + stat_offset)); 3570 } 3571 3572 ena_copy_ena_srd_info(adapter, &srd_info); 3573 stats_begin = &srd_info; 3574 for (stat = 0; stat < ENA_STATS_ARRAY_ENA_SRD; stat++, count++) { 3575 stat_offset = ena_stats_srd_strings[stat].stat_offset; 3576 xstats[count].id = count; 3577 xstats[count].value = *((uint64_t *) 3578 ((char *)stats_begin + stat_offset)); 3579 } 3580 3581 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) { 3582 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) { 3583 stat_offset = ena_stats_rx_strings[stat].stat_offset; 3584 stats_begin = &adapter->rx_ring[i].rx_stats; 3585 3586 xstats[count].id = count; 3587 xstats[count].value = *((uint64_t *) 3588 ((char *)stats_begin + stat_offset)); 3589 } 3590 } 3591 3592 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) { 3593 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) { 3594 stat_offset = ena_stats_tx_strings[stat].stat_offset; 3595 stats_begin = &adapter->tx_ring[i].rx_stats; 3596 3597 xstats[count].id = count; 3598 xstats[count].value = *((uint64_t *) 3599 ((char *)stats_begin + stat_offset)); 3600 } 3601 } 3602 3603 return count; 3604 } 3605 3606 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 3607 const uint64_t *ids, 3608 uint64_t *values, 3609 unsigned int n) 3610 { 3611 struct ena_adapter *adapter = dev->data->dev_private; 3612 uint64_t id; 3613 uint64_t rx_entries, tx_entries; 3614 unsigned int i; 3615 int qid; 3616 int valid = 0; 3617 bool were_metrics_copied = false; 3618 bool was_srd_info_copied = false; 3619 uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS]; 3620 struct ena_stats_srd srd_info = {0}; 3621 3622 for (i = 0; i < n; ++i) { 3623 id = ids[i]; 3624 /* Check if id belongs to global statistics */ 3625 if (id < ENA_STATS_ARRAY_GLOBAL) { 3626 values[i] = *((uint64_t *)&adapter->dev_stats + id); 3627 ++valid; 3628 continue; 3629 } 3630 3631 /* Check if id belongs to ENI statistics */ 3632 id -= ENA_STATS_ARRAY_GLOBAL; 3633 if (id < adapter->metrics_num) { 3634 /* Avoid reading metrics multiple times in a single 3635 * function call, as it requires communication with the 3636 * admin queue. 3637 */ 3638 if (!were_metrics_copied) { 3639 were_metrics_copied = true; 3640 ena_copy_customer_metrics(adapter, 3641 metrics_stats, 3642 adapter->metrics_num); 3643 } 3644 3645 values[i] = *((uint64_t *)&metrics_stats + id); 3646 ++valid; 3647 continue; 3648 } 3649 3650 /* Check if id belongs to SRD info statistics */ 3651 id -= adapter->metrics_num; 3652 3653 if (id < ENA_STATS_ARRAY_ENA_SRD) { 3654 /* 3655 * Avoid reading srd info multiple times in a single 3656 * function call, as it requires communication with the 3657 * admin queue. 3658 */ 3659 if (!was_srd_info_copied) { 3660 was_srd_info_copied = true; 3661 ena_copy_ena_srd_info(adapter, &srd_info); 3662 } 3663 values[i] = *((uint64_t *)&adapter->srd_stats + id); 3664 ++valid; 3665 continue; 3666 } 3667 3668 /* Check if id belongs to rx queue statistics */ 3669 id -= ENA_STATS_ARRAY_ENA_SRD; 3670 3671 rx_entries = ENA_STATS_ARRAY_RX * dev->data->nb_rx_queues; 3672 if (id < rx_entries) { 3673 qid = id % dev->data->nb_rx_queues; 3674 id /= dev->data->nb_rx_queues; 3675 values[i] = *((uint64_t *) 3676 &adapter->rx_ring[qid].rx_stats + id); 3677 ++valid; 3678 continue; 3679 } 3680 /* Check if id belongs to rx queue statistics */ 3681 id -= rx_entries; 3682 tx_entries = ENA_STATS_ARRAY_TX * dev->data->nb_tx_queues; 3683 if (id < tx_entries) { 3684 qid = id % dev->data->nb_tx_queues; 3685 id /= dev->data->nb_tx_queues; 3686 values[i] = *((uint64_t *) 3687 &adapter->tx_ring[qid].tx_stats + id); 3688 ++valid; 3689 continue; 3690 } 3691 } 3692 3693 return valid; 3694 } 3695 3696 static int ena_process_uint_devarg(const char *key, 3697 const char *value, 3698 void *opaque) 3699 { 3700 struct ena_adapter *adapter = opaque; 3701 char *str_end; 3702 uint64_t uint64_value; 3703 3704 uint64_value = strtoull(value, &str_end, DECIMAL_BASE); 3705 if (value == str_end) { 3706 PMD_INIT_LOG_LINE(ERR, 3707 "Invalid value for key '%s'. Only uint values are accepted.", 3708 key); 3709 return -EINVAL; 3710 } 3711 3712 if (strcmp(key, ENA_DEVARG_MISS_TXC_TO) == 0) { 3713 if (uint64_value > ENA_MAX_TX_TIMEOUT_SECONDS) { 3714 PMD_INIT_LOG_LINE(ERR, 3715 "Tx timeout too high: %" PRIu64 " sec. Maximum allowed: %d sec.", 3716 uint64_value, ENA_MAX_TX_TIMEOUT_SECONDS); 3717 return -EINVAL; 3718 } else if (uint64_value == 0) { 3719 PMD_INIT_LOG_LINE(INFO, 3720 "Check for missing Tx completions has been disabled."); 3721 adapter->missing_tx_completion_to = 3722 ENA_HW_HINTS_NO_TIMEOUT; 3723 } else { 3724 PMD_INIT_LOG_LINE(INFO, 3725 "Tx packet completion timeout set to %" PRIu64 " seconds.", 3726 uint64_value); 3727 adapter->missing_tx_completion_to = 3728 uint64_value * rte_get_timer_hz(); 3729 } 3730 } else if (strcmp(key, ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL) == 0) { 3731 if (uint64_value > ENA_MAX_CONTROL_PATH_POLL_INTERVAL_MSEC) { 3732 PMD_INIT_LOG_LINE(ERR, 3733 "Control path polling interval is too long: %" PRIu64 " msecs. " 3734 "Maximum allowed: %d msecs.", 3735 uint64_value, ENA_MAX_CONTROL_PATH_POLL_INTERVAL_MSEC); 3736 return -EINVAL; 3737 } else if (uint64_value == 0) { 3738 PMD_INIT_LOG_LINE(INFO, 3739 "Control path polling interval is set to zero. Operating in " 3740 "interrupt mode."); 3741 adapter->control_path_poll_interval = 0; 3742 } else { 3743 PMD_INIT_LOG_LINE(INFO, 3744 "Control path polling interval is set to %" PRIu64 " msecs.", 3745 uint64_value); 3746 adapter->control_path_poll_interval = uint64_value * USEC_PER_MSEC; 3747 } 3748 } 3749 3750 return 0; 3751 } 3752 3753 static int ena_process_llq_policy_devarg(const char *key, const char *value, void *opaque) 3754 { 3755 struct ena_adapter *adapter = opaque; 3756 uint32_t policy; 3757 3758 policy = strtoul(value, NULL, DECIMAL_BASE); 3759 if (policy < ENA_LLQ_POLICY_LAST) { 3760 adapter->llq_header_policy = policy; 3761 } else { 3762 PMD_INIT_LOG_LINE(ERR, 3763 "Invalid value: '%s' for key '%s'. valid [0-3]", 3764 value, key); 3765 return -EINVAL; 3766 } 3767 PMD_INIT_LOG_LINE(INFO, 3768 "LLQ policy is %u [0 - disabled, 1 - device recommended, 2 - normal, 3 - large]", 3769 adapter->llq_header_policy); 3770 return 0; 3771 } 3772 3773 static int ena_parse_devargs(struct ena_adapter *adapter, struct rte_devargs *devargs) 3774 { 3775 static const char * const allowed_args[] = { 3776 ENA_DEVARG_LLQ_POLICY, 3777 ENA_DEVARG_MISS_TXC_TO, 3778 ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL, 3779 NULL, 3780 }; 3781 struct rte_kvargs *kvlist; 3782 int rc; 3783 3784 if (devargs == NULL) 3785 return 0; 3786 3787 kvlist = rte_kvargs_parse(devargs->args, allowed_args); 3788 if (kvlist == NULL) { 3789 PMD_INIT_LOG_LINE(ERR, "Invalid device arguments: %s", 3790 devargs->args); 3791 return -EINVAL; 3792 } 3793 rc = rte_kvargs_process(kvlist, ENA_DEVARG_LLQ_POLICY, 3794 ena_process_llq_policy_devarg, adapter); 3795 if (rc != 0) 3796 goto exit; 3797 rc = rte_kvargs_process(kvlist, ENA_DEVARG_MISS_TXC_TO, 3798 ena_process_uint_devarg, adapter); 3799 if (rc != 0) 3800 goto exit; 3801 rc = rte_kvargs_process(kvlist, ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL, 3802 ena_process_uint_devarg, adapter); 3803 if (rc != 0) 3804 goto exit; 3805 3806 exit: 3807 rte_kvargs_free(kvlist); 3808 3809 return rc; 3810 } 3811 3812 static int ena_setup_rx_intr(struct rte_eth_dev *dev) 3813 { 3814 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 3815 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 3816 int rc; 3817 uint16_t vectors_nb, i; 3818 bool rx_intr_requested = dev->data->dev_conf.intr_conf.rxq; 3819 3820 if (!rx_intr_requested) 3821 return 0; 3822 3823 if (!rte_intr_cap_multiple(intr_handle)) { 3824 PMD_DRV_LOG_LINE(ERR, 3825 "Rx interrupt requested, but it isn't supported by the PCI driver"); 3826 return -ENOTSUP; 3827 } 3828 3829 /* Disable interrupt mapping before the configuration starts. */ 3830 rte_intr_disable(intr_handle); 3831 3832 /* Verify if there are enough vectors available. */ 3833 vectors_nb = dev->data->nb_rx_queues; 3834 if (vectors_nb > RTE_MAX_RXTX_INTR_VEC_ID) { 3835 PMD_DRV_LOG_LINE(ERR, 3836 "Too many Rx interrupts requested, maximum number: %d", 3837 RTE_MAX_RXTX_INTR_VEC_ID); 3838 rc = -ENOTSUP; 3839 goto enable_intr; 3840 } 3841 3842 /* Allocate the vector list */ 3843 if (rte_intr_vec_list_alloc(intr_handle, "intr_vec", 3844 dev->data->nb_rx_queues)) { 3845 PMD_DRV_LOG_LINE(ERR, 3846 "Failed to allocate interrupt vector for %d queues", 3847 dev->data->nb_rx_queues); 3848 rc = -ENOMEM; 3849 goto enable_intr; 3850 } 3851 3852 rc = rte_intr_efd_enable(intr_handle, vectors_nb); 3853 if (rc != 0) 3854 goto free_intr_vec; 3855 3856 if (!rte_intr_allow_others(intr_handle)) { 3857 PMD_DRV_LOG_LINE(ERR, 3858 "Not enough interrupts available to use both ENA Admin and Rx interrupts"); 3859 goto disable_intr_efd; 3860 } 3861 3862 for (i = 0; i < vectors_nb; ++i) 3863 if (rte_intr_vec_list_index_set(intr_handle, i, 3864 RTE_INTR_VEC_RXTX_OFFSET + i)) 3865 goto disable_intr_efd; 3866 3867 rte_intr_enable(intr_handle); 3868 return 0; 3869 3870 disable_intr_efd: 3871 rte_intr_efd_disable(intr_handle); 3872 free_intr_vec: 3873 rte_intr_vec_list_free(intr_handle); 3874 enable_intr: 3875 rte_intr_enable(intr_handle); 3876 return rc; 3877 } 3878 3879 static void ena_rx_queue_intr_set(struct rte_eth_dev *dev, 3880 uint16_t queue_id, 3881 bool unmask) 3882 { 3883 struct ena_adapter *adapter = dev->data->dev_private; 3884 struct ena_ring *rxq = &adapter->rx_ring[queue_id]; 3885 struct ena_eth_io_intr_reg intr_reg; 3886 3887 ena_com_update_intr_reg(&intr_reg, 0, 0, unmask, 1); 3888 ena_com_unmask_intr(rxq->ena_com_io_cq, &intr_reg); 3889 } 3890 3891 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 3892 uint16_t queue_id) 3893 { 3894 ena_rx_queue_intr_set(dev, queue_id, true); 3895 3896 return 0; 3897 } 3898 3899 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 3900 uint16_t queue_id) 3901 { 3902 ena_rx_queue_intr_set(dev, queue_id, false); 3903 3904 return 0; 3905 } 3906 3907 static int ena_configure_aenq(struct ena_adapter *adapter) 3908 { 3909 uint32_t aenq_groups = adapter->all_aenq_groups; 3910 int rc; 3911 3912 /* All_aenq_groups holds all AENQ functions supported by the device and 3913 * the HW, so at first we need to be sure the LSC request is valid. 3914 */ 3915 if (adapter->edev_data->dev_conf.intr_conf.lsc != 0) { 3916 if (!(aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) { 3917 PMD_DRV_LOG_LINE(ERR, 3918 "LSC requested, but it's not supported by the AENQ"); 3919 return -EINVAL; 3920 } 3921 } else { 3922 /* If LSC wasn't enabled by the app, let's enable all supported 3923 * AENQ procedures except the LSC. 3924 */ 3925 aenq_groups &= ~BIT(ENA_ADMIN_LINK_CHANGE); 3926 } 3927 3928 rc = ena_com_set_aenq_config(&adapter->ena_dev, aenq_groups); 3929 if (rc != 0) { 3930 PMD_DRV_LOG_LINE(ERR, "Cannot configure AENQ groups, rc=%d", rc); 3931 return rc; 3932 } 3933 3934 adapter->active_aenq_groups = aenq_groups; 3935 3936 return 0; 3937 } 3938 3939 int ena_mp_indirect_table_set(struct ena_adapter *adapter) 3940 { 3941 return ENA_PROXY(adapter, ena_com_indirect_table_set, &adapter->ena_dev); 3942 } 3943 3944 int ena_mp_indirect_table_get(struct ena_adapter *adapter, 3945 uint32_t *indirect_table) 3946 { 3947 return ENA_PROXY(adapter, ena_com_indirect_table_get, &adapter->ena_dev, 3948 indirect_table); 3949 } 3950 3951 /********************************************************************* 3952 * ena_plat_dpdk.h functions implementations 3953 *********************************************************************/ 3954 3955 const struct rte_memzone * 3956 ena_mem_alloc_coherent(struct rte_eth_dev_data *data, size_t size, 3957 int socket_id, unsigned int alignment, void **virt_addr, 3958 dma_addr_t *phys_addr) 3959 { 3960 char z_name[RTE_MEMZONE_NAMESIZE]; 3961 struct ena_adapter *adapter = data->dev_private; 3962 const struct rte_memzone *memzone; 3963 int rc; 3964 3965 rc = snprintf(z_name, RTE_MEMZONE_NAMESIZE, "ena_p%d_mz%" PRIu64 "", 3966 data->port_id, adapter->memzone_cnt); 3967 if (rc >= RTE_MEMZONE_NAMESIZE) { 3968 PMD_DRV_LOG_LINE(ERR, 3969 "Name for the ena_com memzone is too long. Port: %d, mz_num: %" PRIu64, 3970 data->port_id, adapter->memzone_cnt); 3971 goto error; 3972 } 3973 adapter->memzone_cnt++; 3974 3975 memzone = rte_memzone_reserve_aligned(z_name, size, socket_id, 3976 RTE_MEMZONE_IOVA_CONTIG, alignment); 3977 if (memzone == NULL) { 3978 PMD_DRV_LOG_LINE(ERR, "Failed to allocate ena_com memzone: %s", 3979 z_name); 3980 goto error; 3981 } 3982 3983 memset(memzone->addr, 0, size); 3984 *virt_addr = memzone->addr; 3985 *phys_addr = memzone->iova; 3986 3987 return memzone; 3988 3989 error: 3990 *virt_addr = NULL; 3991 *phys_addr = 0; 3992 3993 return NULL; 3994 } 3995 3996 3997 /********************************************************************* 3998 * PMD configuration 3999 *********************************************************************/ 4000 static int eth_ena_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 4001 struct rte_pci_device *pci_dev) 4002 { 4003 return rte_eth_dev_pci_generic_probe(pci_dev, 4004 sizeof(struct ena_adapter), eth_ena_dev_init); 4005 } 4006 4007 static int eth_ena_pci_remove(struct rte_pci_device *pci_dev) 4008 { 4009 return rte_eth_dev_pci_generic_remove(pci_dev, eth_ena_dev_uninit); 4010 } 4011 4012 static struct rte_pci_driver rte_ena_pmd = { 4013 .id_table = pci_id_ena_map, 4014 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | 4015 RTE_PCI_DRV_WC_ACTIVATE, 4016 .probe = eth_ena_pci_probe, 4017 .remove = eth_ena_pci_remove, 4018 }; 4019 4020 RTE_PMD_REGISTER_PCI(net_ena, rte_ena_pmd); 4021 RTE_PMD_REGISTER_PCI_TABLE(net_ena, pci_id_ena_map); 4022 RTE_PMD_REGISTER_KMOD_DEP(net_ena, "* igb_uio | uio_pci_generic | vfio-pci"); 4023 RTE_PMD_REGISTER_PARAM_STRING(net_ena, 4024 ENA_DEVARG_LLQ_POLICY "=<0|1|2|3> " 4025 ENA_DEVARG_MISS_TXC_TO "=<uint>" 4026 ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL "=<0-1000>"); 4027 RTE_LOG_REGISTER_SUFFIX(ena_logtype_init, init, NOTICE); 4028 RTE_LOG_REGISTER_SUFFIX(ena_logtype_driver, driver, NOTICE); 4029 #ifdef RTE_ETHDEV_DEBUG_RX 4030 RTE_LOG_REGISTER_SUFFIX(ena_logtype_rx, rx, DEBUG); 4031 #endif 4032 #ifdef RTE_ETHDEV_DEBUG_TX 4033 RTE_LOG_REGISTER_SUFFIX(ena_logtype_tx, tx, DEBUG); 4034 #endif 4035 RTE_LOG_REGISTER_SUFFIX(ena_logtype_com, com, WARNING); 4036 4037 /****************************************************************************** 4038 ******************************** AENQ Handlers ******************************* 4039 *****************************************************************************/ 4040 static void ena_update_on_link_change(void *adapter_data, 4041 struct ena_admin_aenq_entry *aenq_e) 4042 { 4043 struct rte_eth_dev *eth_dev = adapter_data; 4044 struct ena_adapter *adapter = eth_dev->data->dev_private; 4045 struct ena_admin_aenq_link_change_desc *aenq_link_desc; 4046 uint32_t status; 4047 4048 aenq_link_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 4049 4050 status = get_ena_admin_aenq_link_change_desc_link_status(aenq_link_desc); 4051 adapter->link_status = status; 4052 4053 ena_link_update(eth_dev, 0); 4054 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 4055 } 4056 4057 static void ena_notification(void *adapter_data, 4058 struct ena_admin_aenq_entry *aenq_e) 4059 { 4060 struct rte_eth_dev *eth_dev = adapter_data; 4061 struct ena_adapter *adapter = eth_dev->data->dev_private; 4062 struct ena_admin_ena_hw_hints *hints; 4063 4064 if (aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION) 4065 PMD_DRV_LOG_LINE(WARNING, "Invalid AENQ group: %x. Expected: %x", 4066 aenq_e->aenq_common_desc.group, 4067 ENA_ADMIN_NOTIFICATION); 4068 4069 switch (aenq_e->aenq_common_desc.syndrome) { 4070 case ENA_ADMIN_UPDATE_HINTS: 4071 hints = (struct ena_admin_ena_hw_hints *) 4072 (&aenq_e->inline_data_w4); 4073 ena_update_hints(adapter, hints); 4074 break; 4075 default: 4076 PMD_DRV_LOG_LINE(ERR, "Invalid AENQ notification link state: %d", 4077 aenq_e->aenq_common_desc.syndrome); 4078 } 4079 } 4080 4081 static void ena_keep_alive(void *adapter_data, 4082 __rte_unused struct ena_admin_aenq_entry *aenq_e) 4083 { 4084 struct rte_eth_dev *eth_dev = adapter_data; 4085 struct ena_adapter *adapter = eth_dev->data->dev_private; 4086 struct ena_admin_aenq_keep_alive_desc *desc; 4087 uint64_t rx_drops; 4088 uint64_t tx_drops; 4089 uint64_t rx_overruns; 4090 4091 adapter->timestamp_wd = rte_get_timer_cycles(); 4092 4093 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 4094 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 4095 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 4096 rx_overruns = ((uint64_t)desc->rx_overruns_high << 32) | desc->rx_overruns_low; 4097 4098 /* 4099 * Depending on its acceleration support, the device updates a different statistic when 4100 * Rx packet is dropped because there are no available buffers to accommodate it. 4101 */ 4102 adapter->drv_stats->rx_drops = rx_drops + rx_overruns; 4103 adapter->dev_stats.tx_drops = tx_drops; 4104 } 4105 4106 static void ena_suboptimal_configuration(__rte_unused void *adapter_data, 4107 struct ena_admin_aenq_entry *aenq_e) 4108 { 4109 struct ena_admin_aenq_conf_notifications_desc *desc; 4110 int bit, num_bits; 4111 4112 desc = (struct ena_admin_aenq_conf_notifications_desc *)aenq_e; 4113 num_bits = BITS_PER_TYPE(desc->notifications_bitmap); 4114 for (bit = 0; bit < num_bits; bit++) { 4115 if (desc->notifications_bitmap & RTE_BIT64(bit)) { 4116 PMD_DRV_LOG_LINE(WARNING, 4117 "Sub-optimal configuration notification code: %d", bit + 1); 4118 } 4119 } 4120 } 4121 4122 /** 4123 * This handler will called for unknown event group or unimplemented handlers 4124 **/ 4125 static void unimplemented_aenq_handler(__rte_unused void *data, 4126 __rte_unused struct ena_admin_aenq_entry *aenq_e) 4127 { 4128 PMD_DRV_LOG_LINE(ERR, 4129 "Unknown event was received or event with unimplemented handler"); 4130 } 4131 4132 static struct ena_aenq_handlers aenq_handlers = { 4133 .handlers = { 4134 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 4135 [ENA_ADMIN_NOTIFICATION] = ena_notification, 4136 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive, 4137 [ENA_ADMIN_CONF_NOTIFICATIONS] = ena_suboptimal_configuration 4138 }, 4139 .unimplemented_handler = unimplemented_aenq_handler 4140 }; 4141 4142 /********************************************************************* 4143 * Multi-Process communication request handling (in primary) 4144 *********************************************************************/ 4145 static int 4146 ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer) 4147 { 4148 const struct ena_mp_body *req = 4149 (const struct ena_mp_body *)mp_msg->param; 4150 struct ena_adapter *adapter; 4151 struct ena_com_dev *ena_dev; 4152 struct ena_mp_body *rsp; 4153 struct rte_mp_msg mp_rsp; 4154 struct rte_eth_dev *dev; 4155 int res = 0; 4156 4157 rsp = (struct ena_mp_body *)&mp_rsp.param; 4158 mp_msg_init(&mp_rsp, req->type, req->port_id); 4159 4160 if (!rte_eth_dev_is_valid_port(req->port_id)) { 4161 rte_errno = ENODEV; 4162 res = -rte_errno; 4163 PMD_DRV_LOG_LINE(ERR, "Unknown port %d in request %d", 4164 req->port_id, req->type); 4165 goto end; 4166 } 4167 dev = &rte_eth_devices[req->port_id]; 4168 adapter = dev->data->dev_private; 4169 ena_dev = &adapter->ena_dev; 4170 4171 switch (req->type) { 4172 case ENA_MP_DEV_STATS_GET: 4173 res = ena_com_get_dev_basic_stats(ena_dev, 4174 &adapter->basic_stats); 4175 break; 4176 case ENA_MP_ENI_STATS_GET: 4177 res = ena_com_get_eni_stats(ena_dev, 4178 (struct ena_admin_eni_stats *)&adapter->metrics_stats); 4179 break; 4180 case ENA_MP_MTU_SET: 4181 res = ena_com_set_dev_mtu(ena_dev, req->args.mtu); 4182 break; 4183 case ENA_MP_IND_TBL_GET: 4184 res = ena_com_indirect_table_get(ena_dev, 4185 adapter->indirect_table); 4186 break; 4187 case ENA_MP_IND_TBL_SET: 4188 res = ena_com_indirect_table_set(ena_dev); 4189 break; 4190 case ENA_MP_CUSTOMER_METRICS_GET: 4191 res = ena_com_get_customer_metrics(ena_dev, 4192 (char *)adapter->metrics_stats, 4193 adapter->metrics_num * sizeof(uint64_t)); 4194 break; 4195 case ENA_MP_SRD_STATS_GET: 4196 res = ena_com_get_ena_srd_info(ena_dev, 4197 (struct ena_admin_ena_srd_info *)&adapter->srd_stats); 4198 break; 4199 default: 4200 PMD_DRV_LOG_LINE(ERR, "Unknown request type %d", req->type); 4201 res = -EINVAL; 4202 break; 4203 } 4204 4205 end: 4206 /* Save processing result in the reply */ 4207 rsp->result = res; 4208 /* Return just IPC processing status */ 4209 return rte_mp_reply(&mp_rsp, peer); 4210 } 4211 4212 static bool ena_use_large_llq_hdr(struct ena_adapter *adapter, uint8_t recommended_entry_size) 4213 { 4214 if (adapter->llq_header_policy == ENA_LLQ_POLICY_LARGE) { 4215 return true; 4216 } else if (adapter->llq_header_policy == ENA_LLQ_POLICY_RECOMMENDED) { 4217 PMD_DRV_LOG_LINE(INFO, "Recommended device entry size policy %u", 4218 recommended_entry_size); 4219 if (recommended_entry_size == ENA_ADMIN_LIST_ENTRY_SIZE_256B) 4220 return true; 4221 } 4222 return false; 4223 } 4224