1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 3 * All rights reserved. 4 */ 5 6 #include <rte_alarm.h> 7 #include <rte_string_fns.h> 8 #include <rte_errno.h> 9 #include <rte_version.h> 10 #include <rte_net.h> 11 #include <rte_kvargs.h> 12 13 #include "ena_ethdev.h" 14 #include "ena_logs.h" 15 #include "ena_platform.h" 16 #include "ena_com.h" 17 #include "ena_eth_com.h" 18 19 #include <ena_common_defs.h> 20 #include <ena_regs_defs.h> 21 #include <ena_admin_defs.h> 22 #include <ena_eth_io_defs.h> 23 24 #define DRV_MODULE_VER_MAJOR 2 25 #define DRV_MODULE_VER_MINOR 11 26 #define DRV_MODULE_VER_SUBMINOR 0 27 28 #define __MERGE_64B_H_L(h, l) (((uint64_t)h << 32) | l) 29 30 #define GET_L4_HDR_LEN(mbuf) \ 31 ((rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, \ 32 mbuf->l3_len + mbuf->l2_len)->data_off) >> 4) 33 34 #define ETH_GSTRING_LEN 32 35 36 #define ARRAY_SIZE(x) RTE_DIM(x) 37 38 #define ENA_MIN_RING_DESC 128 39 40 #define USEC_PER_MSEC 1000UL 41 42 #define BITS_PER_BYTE 8 43 44 #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) 45 46 #define DECIMAL_BASE 10 47 48 #define MAX_WIDE_LLQ_DEPTH_UNSUPPORTED 0 49 50 /* 51 * We should try to keep ENA_CLEANUP_BUF_THRESH lower than 52 * RTE_MEMPOOL_CACHE_MAX_SIZE, so we can fit this in mempool local cache. 53 */ 54 #define ENA_CLEANUP_BUF_THRESH 256 55 56 struct ena_stats { 57 char name[ETH_GSTRING_LEN]; 58 int stat_offset; 59 }; 60 61 #define ENA_STAT_ENTRY(stat, stat_type) { \ 62 .name = #stat, \ 63 .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ 64 } 65 66 #define ENA_STAT_RX_ENTRY(stat) \ 67 ENA_STAT_ENTRY(stat, rx) 68 69 #define ENA_STAT_TX_ENTRY(stat) \ 70 ENA_STAT_ENTRY(stat, tx) 71 72 #define ENA_STAT_METRICS_ENTRY(stat) \ 73 ENA_STAT_ENTRY(stat, metrics) 74 75 #define ENA_STAT_GLOBAL_ENTRY(stat) \ 76 ENA_STAT_ENTRY(stat, dev) 77 78 #define ENA_STAT_ENA_SRD_ENTRY(stat) \ 79 ENA_STAT_ENTRY(stat, srd) 80 81 /* Device arguments */ 82 83 /* llq_policy Controls whether to disable LLQ, use device recommended 84 * header policy or overriding the device recommendation. 85 * 0 - Disable LLQ. Use with extreme caution as it leads to a huge 86 * performance degradation on AWS instances built with Nitro v4 onwards. 87 * 1 - Accept device recommended LLQ policy (Default). 88 * Device can recommend normal or large LLQ policy. 89 * 2 - Enforce normal LLQ policy. 90 * 3 - Enforce large LLQ policy. 91 * Required for packets with header that exceed 96 bytes on 92 * AWS instances built with Nitro v2 and Nitro v1. 93 */ 94 #define ENA_DEVARG_LLQ_POLICY "llq_policy" 95 96 /* Timeout in seconds after which a single uncompleted Tx packet should be 97 * considered as a missing. 98 */ 99 #define ENA_DEVARG_MISS_TXC_TO "miss_txc_to" 100 101 /* 102 * Controls the period of time (in milliseconds) between two consecutive inspections of 103 * the control queues when the driver is in poll mode and not using interrupts. 104 * By default, this value is zero, indicating that the driver will not be in poll mode and will 105 * use interrupts. A non-zero value for this argument is mandatory when using uio_pci_generic 106 * driver. 107 */ 108 #define ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL "control_path_poll_interval" 109 110 /* 111 * Each rte_memzone should have unique name. 112 * To satisfy it, count number of allocation and add it to name. 113 */ 114 rte_atomic64_t ena_alloc_cnt; 115 116 static const struct ena_stats ena_stats_global_strings[] = { 117 ENA_STAT_GLOBAL_ENTRY(wd_expired), 118 ENA_STAT_GLOBAL_ENTRY(dev_start), 119 ENA_STAT_GLOBAL_ENTRY(dev_stop), 120 ENA_STAT_GLOBAL_ENTRY(tx_drops), 121 }; 122 123 /* 124 * The legacy metrics (also known as eni stats) consisted of 5 stats, while the reworked 125 * metrics (also known as customer metrics) support an additional stat. 126 */ 127 static struct ena_stats ena_stats_metrics_strings[] = { 128 ENA_STAT_METRICS_ENTRY(bw_in_allowance_exceeded), 129 ENA_STAT_METRICS_ENTRY(bw_out_allowance_exceeded), 130 ENA_STAT_METRICS_ENTRY(pps_allowance_exceeded), 131 ENA_STAT_METRICS_ENTRY(conntrack_allowance_exceeded), 132 ENA_STAT_METRICS_ENTRY(linklocal_allowance_exceeded), 133 ENA_STAT_METRICS_ENTRY(conntrack_allowance_available), 134 }; 135 136 static const struct ena_stats ena_stats_srd_strings[] = { 137 ENA_STAT_ENA_SRD_ENTRY(ena_srd_mode), 138 ENA_STAT_ENA_SRD_ENTRY(ena_srd_tx_pkts), 139 ENA_STAT_ENA_SRD_ENTRY(ena_srd_eligible_tx_pkts), 140 ENA_STAT_ENA_SRD_ENTRY(ena_srd_rx_pkts), 141 ENA_STAT_ENA_SRD_ENTRY(ena_srd_resource_utilization), 142 }; 143 144 static const struct ena_stats ena_stats_tx_strings[] = { 145 ENA_STAT_TX_ENTRY(cnt), 146 ENA_STAT_TX_ENTRY(bytes), 147 ENA_STAT_TX_ENTRY(prepare_ctx_err), 148 ENA_STAT_TX_ENTRY(tx_poll), 149 ENA_STAT_TX_ENTRY(doorbells), 150 ENA_STAT_TX_ENTRY(bad_req_id), 151 ENA_STAT_TX_ENTRY(available_desc), 152 ENA_STAT_TX_ENTRY(missed_tx), 153 }; 154 155 static const struct ena_stats ena_stats_rx_strings[] = { 156 ENA_STAT_RX_ENTRY(cnt), 157 ENA_STAT_RX_ENTRY(bytes), 158 ENA_STAT_RX_ENTRY(refill_partial), 159 ENA_STAT_RX_ENTRY(l3_csum_bad), 160 ENA_STAT_RX_ENTRY(l4_csum_bad), 161 ENA_STAT_RX_ENTRY(l4_csum_good), 162 ENA_STAT_RX_ENTRY(mbuf_alloc_fail), 163 ENA_STAT_RX_ENTRY(bad_desc_num), 164 ENA_STAT_RX_ENTRY(bad_req_id), 165 ENA_STAT_RX_ENTRY(bad_desc), 166 ENA_STAT_RX_ENTRY(unknown_error), 167 }; 168 169 #define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) 170 #define ENA_STATS_ARRAY_METRICS ARRAY_SIZE(ena_stats_metrics_strings) 171 #define ENA_STATS_ARRAY_METRICS_LEGACY (ENA_STATS_ARRAY_METRICS - 1) 172 #define ENA_STATS_ARRAY_ENA_SRD ARRAY_SIZE(ena_stats_srd_strings) 173 #define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) 174 #define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) 175 176 #define QUEUE_OFFLOADS (RTE_ETH_TX_OFFLOAD_TCP_CKSUM |\ 177 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |\ 178 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |\ 179 RTE_ETH_TX_OFFLOAD_TCP_TSO) 180 #define MBUF_OFFLOADS (RTE_MBUF_F_TX_L4_MASK |\ 181 RTE_MBUF_F_TX_IP_CKSUM |\ 182 RTE_MBUF_F_TX_TCP_SEG) 183 184 /** Vendor ID used by Amazon devices */ 185 #define PCI_VENDOR_ID_AMAZON 0x1D0F 186 /** Amazon devices */ 187 #define PCI_DEVICE_ID_ENA_VF 0xEC20 188 #define PCI_DEVICE_ID_ENA_VF_RSERV0 0xEC21 189 190 #define ENA_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_L4_MASK | \ 191 RTE_MBUF_F_TX_IPV6 | \ 192 RTE_MBUF_F_TX_IPV4 | \ 193 RTE_MBUF_F_TX_IP_CKSUM | \ 194 RTE_MBUF_F_TX_TCP_SEG) 195 196 #define ENA_TX_OFFLOAD_NOTSUP_MASK \ 197 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) 198 199 /** HW specific offloads capabilities. */ 200 /* IPv4 checksum offload. */ 201 #define ENA_L3_IPV4_CSUM 0x0001 202 /* TCP/UDP checksum offload for IPv4 packets. */ 203 #define ENA_L4_IPV4_CSUM 0x0002 204 /* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */ 205 #define ENA_L4_IPV4_CSUM_PARTIAL 0x0004 206 /* TCP/UDP checksum offload for IPv6 packets. */ 207 #define ENA_L4_IPV6_CSUM 0x0008 208 /* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */ 209 #define ENA_L4_IPV6_CSUM_PARTIAL 0x0010 210 /* TSO support for IPv4 packets. */ 211 #define ENA_IPV4_TSO 0x0020 212 213 /* Device supports setting RSS hash. */ 214 #define ENA_RX_RSS_HASH 0x0040 215 216 static const struct rte_pci_id pci_id_ena_map[] = { 217 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, 218 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) }, 219 { .device_id = 0 }, 220 }; 221 222 static struct ena_aenq_handlers aenq_handlers; 223 224 static int ena_device_init(struct ena_adapter *adapter, 225 struct rte_pci_device *pdev, 226 struct ena_com_dev_get_features_ctx *get_feat_ctx); 227 static int ena_dev_configure(struct rte_eth_dev *dev); 228 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 229 struct ena_tx_buffer *tx_info, 230 struct rte_mbuf *mbuf, 231 void **push_header, 232 uint16_t *header_len); 233 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf); 234 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt); 235 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 236 uint16_t nb_pkts); 237 static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 238 uint16_t nb_pkts); 239 static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 240 uint16_t nb_desc, unsigned int socket_id, 241 const struct rte_eth_txconf *tx_conf); 242 static int ena_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 243 uint16_t nb_desc, unsigned int socket_id, 244 const struct rte_eth_rxconf *rx_conf, 245 struct rte_mempool *mp); 246 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len); 247 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 248 struct ena_com_rx_buf_info *ena_bufs, 249 uint32_t descs, 250 uint16_t *next_to_clean, 251 uint8_t offset); 252 static uint16_t eth_ena_recv_pkts(void *rx_queue, 253 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 254 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 255 struct rte_mbuf *mbuf, uint16_t id); 256 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count); 257 static void ena_init_rings(struct ena_adapter *adapter, 258 bool disable_meta_caching); 259 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); 260 static int ena_start(struct rte_eth_dev *dev); 261 static int ena_stop(struct rte_eth_dev *dev); 262 static int ena_close(struct rte_eth_dev *dev); 263 static int ena_dev_reset(struct rte_eth_dev *dev); 264 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); 265 static void ena_rx_queue_release_all(struct rte_eth_dev *dev); 266 static void ena_tx_queue_release_all(struct rte_eth_dev *dev); 267 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 268 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 269 static void ena_rx_queue_release_bufs(struct ena_ring *ring); 270 static void ena_tx_queue_release_bufs(struct ena_ring *ring); 271 static int ena_link_update(struct rte_eth_dev *dev, 272 int wait_to_complete); 273 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring); 274 static void ena_queue_stop(struct ena_ring *ring); 275 static void ena_queue_stop_all(struct rte_eth_dev *dev, 276 enum ena_ring_type ring_type); 277 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring); 278 static int ena_queue_start_all(struct rte_eth_dev *dev, 279 enum ena_ring_type ring_type); 280 static void ena_stats_restart(struct rte_eth_dev *dev); 281 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter); 282 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter); 283 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter); 284 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter); 285 static int ena_infos_get(struct rte_eth_dev *dev, 286 struct rte_eth_dev_info *dev_info); 287 static void ena_control_path_handler(void *cb_arg); 288 static void ena_control_path_poll_handler(void *cb_arg); 289 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg); 290 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev); 291 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev); 292 static int ena_xstats_get_names(struct rte_eth_dev *dev, 293 struct rte_eth_xstat_name *xstats_names, 294 unsigned int n); 295 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 296 const uint64_t *ids, 297 struct rte_eth_xstat_name *xstats_names, 298 unsigned int size); 299 static int ena_xstats_get(struct rte_eth_dev *dev, 300 struct rte_eth_xstat *stats, 301 unsigned int n); 302 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 303 const uint64_t *ids, 304 uint64_t *values, 305 unsigned int n); 306 static int ena_process_llq_policy_devarg(const char *key, 307 const char *value, 308 void *opaque); 309 static int ena_parse_devargs(struct ena_adapter *adapter, 310 struct rte_devargs *devargs); 311 static void ena_copy_customer_metrics(struct ena_adapter *adapter, 312 uint64_t *buf, 313 size_t buf_size); 314 static void ena_copy_ena_srd_info(struct ena_adapter *adapter, 315 struct ena_stats_srd *srd_info); 316 static int ena_setup_rx_intr(struct rte_eth_dev *dev); 317 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 318 uint16_t queue_id); 319 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 320 uint16_t queue_id); 321 static int ena_configure_aenq(struct ena_adapter *adapter); 322 static int ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, 323 const void *peer); 324 static bool ena_use_large_llq_hdr(struct ena_adapter *adapter, uint8_t recommended_entry_size); 325 326 static const struct eth_dev_ops ena_dev_ops = { 327 .dev_configure = ena_dev_configure, 328 .dev_infos_get = ena_infos_get, 329 .rx_queue_setup = ena_rx_queue_setup, 330 .tx_queue_setup = ena_tx_queue_setup, 331 .dev_start = ena_start, 332 .dev_stop = ena_stop, 333 .link_update = ena_link_update, 334 .stats_get = ena_stats_get, 335 .xstats_get_names = ena_xstats_get_names, 336 .xstats_get_names_by_id = ena_xstats_get_names_by_id, 337 .xstats_get = ena_xstats_get, 338 .xstats_get_by_id = ena_xstats_get_by_id, 339 .mtu_set = ena_mtu_set, 340 .rx_queue_release = ena_rx_queue_release, 341 .tx_queue_release = ena_tx_queue_release, 342 .dev_close = ena_close, 343 .dev_reset = ena_dev_reset, 344 .reta_update = ena_rss_reta_update, 345 .reta_query = ena_rss_reta_query, 346 .rx_queue_intr_enable = ena_rx_queue_intr_enable, 347 .rx_queue_intr_disable = ena_rx_queue_intr_disable, 348 .rss_hash_update = ena_rss_hash_update, 349 .rss_hash_conf_get = ena_rss_hash_conf_get, 350 .tx_done_cleanup = ena_tx_cleanup, 351 }; 352 353 /********************************************************************* 354 * Multi-Process communication bits 355 *********************************************************************/ 356 /* rte_mp IPC message name */ 357 #define ENA_MP_NAME "net_ena_mp" 358 /* Request timeout in seconds */ 359 #define ENA_MP_REQ_TMO 5 360 361 /** Proxy request type */ 362 enum ena_mp_req { 363 ENA_MP_DEV_STATS_GET, 364 ENA_MP_ENI_STATS_GET, 365 ENA_MP_MTU_SET, 366 ENA_MP_IND_TBL_GET, 367 ENA_MP_IND_TBL_SET, 368 ENA_MP_CUSTOMER_METRICS_GET, 369 ENA_MP_SRD_STATS_GET, 370 }; 371 372 /** Proxy message body. Shared between requests and responses. */ 373 struct ena_mp_body { 374 /* Message type */ 375 enum ena_mp_req type; 376 int port_id; 377 /* Processing result. Set in replies. 0 if message succeeded, negative 378 * error code otherwise. 379 */ 380 int result; 381 union { 382 int mtu; /* For ENA_MP_MTU_SET */ 383 } args; 384 }; 385 386 /** 387 * Initialize IPC message. 388 * 389 * @param[out] msg 390 * Pointer to the message to initialize. 391 * @param[in] type 392 * Message type. 393 * @param[in] port_id 394 * Port ID of target device. 395 * 396 */ 397 static void 398 mp_msg_init(struct rte_mp_msg *msg, enum ena_mp_req type, int port_id) 399 { 400 struct ena_mp_body *body = (struct ena_mp_body *)&msg->param; 401 402 memset(msg, 0, sizeof(*msg)); 403 strlcpy(msg->name, ENA_MP_NAME, sizeof(msg->name)); 404 msg->len_param = sizeof(*body); 405 body->type = type; 406 body->port_id = port_id; 407 } 408 409 /********************************************************************* 410 * Multi-Process communication PMD API 411 *********************************************************************/ 412 /** 413 * Define proxy request descriptor 414 * 415 * Used to define all structures and functions required for proxying a given 416 * function to the primary process including the code to perform to prepare the 417 * request and process the response. 418 * 419 * @param[in] f 420 * Name of the function to proxy 421 * @param[in] t 422 * Message type to use 423 * @param[in] prep 424 * Body of a function to prepare the request in form of a statement 425 * expression. It is passed all the original function arguments along with two 426 * extra ones: 427 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 428 * - struct ena_mp_body *req - body of a request to prepare. 429 * @param[in] proc 430 * Body of a function to process the response in form of a statement 431 * expression. It is passed all the original function arguments along with two 432 * extra ones: 433 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 434 * - struct ena_mp_body *rsp - body of a response to process. 435 * @param ... 436 * Proxied function's arguments 437 * 438 * @note Inside prep and proc any parameters which aren't used should be marked 439 * as such (with ENA_TOUCH or __rte_unused). 440 */ 441 #define ENA_PROXY_DESC(f, t, prep, proc, ...) \ 442 static const enum ena_mp_req mp_type_ ## f = t; \ 443 static const char *mp_name_ ## f = #t; \ 444 static void mp_prep_ ## f(struct ena_adapter *adapter, \ 445 struct ena_mp_body *req, \ 446 __VA_ARGS__) \ 447 { \ 448 prep; \ 449 } \ 450 static void mp_proc_ ## f(struct ena_adapter *adapter, \ 451 struct ena_mp_body *rsp, \ 452 __VA_ARGS__) \ 453 { \ 454 proc; \ 455 } 456 457 /** 458 * Proxy wrapper for calling primary functions in a secondary process. 459 * 460 * Depending on whether called in primary or secondary process, calls the 461 * @p func directly or proxies the call to the primary process via rte_mp IPC. 462 * This macro requires a proxy request descriptor to be defined for @p func 463 * using ENA_PROXY_DESC() macro. 464 * 465 * @param[in/out] a 466 * Device PMD data. Used for sending the message and sharing message results 467 * between primary and secondary. 468 * @param[in] f 469 * Function to proxy. 470 * @param ... 471 * Arguments of @p func. 472 * 473 * @return 474 * - 0: Processing succeeded and response handler was called. 475 * - -EPERM: IPC is unavailable on this platform. This means only primary 476 * process may call the proxied function. 477 * - -EIO: IPC returned error on request send. Inspect rte_errno detailed 478 * error code. 479 * - Negative error code from the proxied function. 480 * 481 * @note This mechanism is geared towards control-path tasks. Avoid calling it 482 * in fast-path unless unbound delays are allowed. This is due to the IPC 483 * mechanism itself (socket based). 484 * @note Due to IPC parameter size limitations the proxy logic shares call 485 * results through the struct ena_adapter shared memory. This makes the 486 * proxy mechanism strictly single-threaded. Therefore be sure to make all 487 * calls to the same proxied function under the same lock. 488 */ 489 #define ENA_PROXY(a, f, ...) \ 490 __extension__ ({ \ 491 struct ena_adapter *_a = (a); \ 492 struct timespec ts = { .tv_sec = ENA_MP_REQ_TMO }; \ 493 struct ena_mp_body *req, *rsp; \ 494 struct rte_mp_reply mp_rep; \ 495 struct rte_mp_msg mp_req; \ 496 int ret; \ 497 \ 498 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { \ 499 ret = f(__VA_ARGS__); \ 500 } else { \ 501 /* Prepare and send request */ \ 502 req = (struct ena_mp_body *)&mp_req.param; \ 503 mp_msg_init(&mp_req, mp_type_ ## f, _a->edev_data->port_id); \ 504 mp_prep_ ## f(_a, req, ## __VA_ARGS__); \ 505 \ 506 ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); \ 507 if (likely(!ret)) { \ 508 RTE_ASSERT(mp_rep.nb_received == 1); \ 509 rsp = (struct ena_mp_body *)&mp_rep.msgs[0].param; \ 510 ret = rsp->result; \ 511 if (ret == 0) { \ 512 mp_proc_##f(_a, rsp, ## __VA_ARGS__); \ 513 } else { \ 514 PMD_DRV_LOG_LINE(ERR, \ 515 "%s returned error: %d", \ 516 mp_name_ ## f, rsp->result);\ 517 } \ 518 free(mp_rep.msgs); \ 519 } else if (rte_errno == ENOTSUP) { \ 520 PMD_DRV_LOG_LINE(ERR, \ 521 "No IPC, can't proxy to primary");\ 522 ret = -rte_errno; \ 523 } else { \ 524 PMD_DRV_LOG_LINE(ERR, "Request %s failed: %s", \ 525 mp_name_ ## f, \ 526 rte_strerror(rte_errno)); \ 527 ret = -EIO; \ 528 } \ 529 } \ 530 ret; \ 531 }) 532 533 /********************************************************************* 534 * Multi-Process communication request descriptors 535 *********************************************************************/ 536 537 ENA_PROXY_DESC(ena_com_get_dev_basic_stats, ENA_MP_DEV_STATS_GET, 538 __extension__ ({ 539 ENA_TOUCH(adapter); 540 ENA_TOUCH(req); 541 ENA_TOUCH(ena_dev); 542 ENA_TOUCH(stats); 543 }), 544 __extension__ ({ 545 ENA_TOUCH(rsp); 546 ENA_TOUCH(ena_dev); 547 if (stats != &adapter->basic_stats) 548 rte_memcpy(stats, &adapter->basic_stats, sizeof(*stats)); 549 }), 550 struct ena_com_dev *ena_dev, struct ena_admin_basic_stats *stats); 551 552 ENA_PROXY_DESC(ena_com_get_eni_stats, ENA_MP_ENI_STATS_GET, 553 __extension__ ({ 554 ENA_TOUCH(adapter); 555 ENA_TOUCH(req); 556 ENA_TOUCH(ena_dev); 557 ENA_TOUCH(stats); 558 }), 559 __extension__ ({ 560 ENA_TOUCH(rsp); 561 ENA_TOUCH(ena_dev); 562 if (stats != (struct ena_admin_eni_stats *)adapter->metrics_stats) 563 rte_memcpy(stats, adapter->metrics_stats, sizeof(*stats)); 564 }), 565 struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats); 566 567 ENA_PROXY_DESC(ena_com_set_dev_mtu, ENA_MP_MTU_SET, 568 __extension__ ({ 569 ENA_TOUCH(adapter); 570 ENA_TOUCH(ena_dev); 571 req->args.mtu = mtu; 572 }), 573 __extension__ ({ 574 ENA_TOUCH(adapter); 575 ENA_TOUCH(rsp); 576 ENA_TOUCH(ena_dev); 577 ENA_TOUCH(mtu); 578 }), 579 struct ena_com_dev *ena_dev, int mtu); 580 581 ENA_PROXY_DESC(ena_com_indirect_table_set, ENA_MP_IND_TBL_SET, 582 __extension__ ({ 583 ENA_TOUCH(adapter); 584 ENA_TOUCH(req); 585 ENA_TOUCH(ena_dev); 586 }), 587 __extension__ ({ 588 ENA_TOUCH(adapter); 589 ENA_TOUCH(rsp); 590 ENA_TOUCH(ena_dev); 591 }), 592 struct ena_com_dev *ena_dev); 593 594 ENA_PROXY_DESC(ena_com_indirect_table_get, ENA_MP_IND_TBL_GET, 595 __extension__ ({ 596 ENA_TOUCH(adapter); 597 ENA_TOUCH(req); 598 ENA_TOUCH(ena_dev); 599 ENA_TOUCH(ind_tbl); 600 }), 601 __extension__ ({ 602 ENA_TOUCH(rsp); 603 ENA_TOUCH(ena_dev); 604 if (ind_tbl != adapter->indirect_table) 605 rte_memcpy(ind_tbl, adapter->indirect_table, 606 sizeof(adapter->indirect_table)); 607 }), 608 struct ena_com_dev *ena_dev, u32 *ind_tbl); 609 610 ENA_PROXY_DESC(ena_com_get_customer_metrics, ENA_MP_CUSTOMER_METRICS_GET, 611 __extension__ ({ 612 ENA_TOUCH(adapter); 613 ENA_TOUCH(req); 614 ENA_TOUCH(ena_dev); 615 ENA_TOUCH(buf); 616 ENA_TOUCH(buf_size); 617 }), 618 __extension__ ({ 619 ENA_TOUCH(rsp); 620 ENA_TOUCH(ena_dev); 621 if (buf != (char *)adapter->metrics_stats) 622 rte_memcpy(buf, adapter->metrics_stats, buf_size); 623 }), 624 struct ena_com_dev *ena_dev, char *buf, size_t buf_size); 625 626 ENA_PROXY_DESC(ena_com_get_ena_srd_info, ENA_MP_SRD_STATS_GET, 627 __extension__ ({ 628 ENA_TOUCH(adapter); 629 ENA_TOUCH(req); 630 ENA_TOUCH(ena_dev); 631 ENA_TOUCH(info); 632 }), 633 __extension__ ({ 634 ENA_TOUCH(rsp); 635 ENA_TOUCH(ena_dev); 636 if ((struct ena_stats_srd *)info != &adapter->srd_stats) 637 rte_memcpy((struct ena_stats_srd *)info, 638 &adapter->srd_stats, 639 sizeof(struct ena_stats_srd)); 640 }), 641 struct ena_com_dev *ena_dev, struct ena_admin_ena_srd_info *info); 642 643 static inline void ena_trigger_reset(struct ena_adapter *adapter, 644 enum ena_regs_reset_reason_types reason) 645 { 646 if (likely(!adapter->trigger_reset)) { 647 adapter->reset_reason = reason; 648 adapter->trigger_reset = true; 649 } 650 } 651 652 static inline void ena_rx_mbuf_prepare(struct ena_ring *rx_ring, 653 struct rte_mbuf *mbuf, 654 struct ena_com_rx_ctx *ena_rx_ctx) 655 { 656 struct ena_stats_rx *rx_stats = &rx_ring->rx_stats; 657 uint64_t ol_flags = 0; 658 uint32_t packet_type = 0; 659 660 switch (ena_rx_ctx->l3_proto) { 661 case ENA_ETH_IO_L3_PROTO_IPV4: 662 packet_type |= RTE_PTYPE_L3_IPV4; 663 if (unlikely(ena_rx_ctx->l3_csum_err)) { 664 ++rx_stats->l3_csum_bad; 665 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 666 } else { 667 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 668 } 669 break; 670 case ENA_ETH_IO_L3_PROTO_IPV6: 671 packet_type |= RTE_PTYPE_L3_IPV6; 672 break; 673 default: 674 break; 675 } 676 677 switch (ena_rx_ctx->l4_proto) { 678 case ENA_ETH_IO_L4_PROTO_TCP: 679 packet_type |= RTE_PTYPE_L4_TCP; 680 break; 681 case ENA_ETH_IO_L4_PROTO_UDP: 682 packet_type |= RTE_PTYPE_L4_UDP; 683 break; 684 default: 685 break; 686 } 687 688 /* L4 csum is relevant only for TCP/UDP packets */ 689 if ((packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP)) && !ena_rx_ctx->frag) { 690 if (ena_rx_ctx->l4_csum_checked) { 691 if (likely(!ena_rx_ctx->l4_csum_err)) { 692 ++rx_stats->l4_csum_good; 693 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 694 } else { 695 ++rx_stats->l4_csum_bad; 696 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 697 } 698 } else { 699 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 700 } 701 702 if (rx_ring->offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH) { 703 ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 704 mbuf->hash.rss = ena_rx_ctx->hash; 705 } 706 } else { 707 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 708 } 709 710 mbuf->ol_flags = ol_flags; 711 mbuf->packet_type = packet_type; 712 } 713 714 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, 715 struct ena_com_tx_ctx *ena_tx_ctx, 716 uint64_t queue_offloads, 717 bool disable_meta_caching) 718 { 719 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; 720 721 if ((mbuf->ol_flags & MBUF_OFFLOADS) && 722 (queue_offloads & QUEUE_OFFLOADS)) { 723 /* check if TSO is required */ 724 if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) && 725 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)) { 726 ena_tx_ctx->tso_enable = true; 727 728 ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf); 729 } 730 731 /* check if L3 checksum is needed */ 732 if ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) && 733 (queue_offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) 734 ena_tx_ctx->l3_csum_enable = true; 735 736 if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV6) { 737 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 738 /* For the IPv6 packets, DF always needs to be true. */ 739 ena_tx_ctx->df = 1; 740 } else { 741 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 742 743 /* set don't fragment (DF) flag */ 744 if (mbuf->packet_type & 745 (RTE_PTYPE_L4_NONFRAG 746 | RTE_PTYPE_INNER_L4_NONFRAG)) 747 ena_tx_ctx->df = 1; 748 } 749 750 /* check if L4 checksum is needed */ 751 if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) && 752 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) { 753 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 754 ena_tx_ctx->l4_csum_enable = true; 755 } else if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 756 RTE_MBUF_F_TX_UDP_CKSUM) && 757 (queue_offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) { 758 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 759 ena_tx_ctx->l4_csum_enable = true; 760 } else { 761 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 762 ena_tx_ctx->l4_csum_enable = false; 763 } 764 765 ena_meta->mss = mbuf->tso_segsz; 766 ena_meta->l3_hdr_len = mbuf->l3_len; 767 ena_meta->l3_hdr_offset = mbuf->l2_len; 768 769 ena_tx_ctx->meta_valid = true; 770 } else if (disable_meta_caching) { 771 memset(ena_meta, 0, sizeof(*ena_meta)); 772 ena_tx_ctx->meta_valid = true; 773 } else { 774 ena_tx_ctx->meta_valid = false; 775 } 776 } 777 778 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 779 { 780 struct ena_tx_buffer *tx_info = NULL; 781 782 if (likely(req_id < tx_ring->ring_size)) { 783 tx_info = &tx_ring->tx_buffer_info[req_id]; 784 if (likely(tx_info->mbuf)) 785 return 0; 786 } 787 788 if (tx_info) 789 PMD_TX_LOG_LINE(ERR, "tx_info doesn't have valid mbuf. queue %d:%d req_id %u", 790 tx_ring->port_id, tx_ring->id, req_id); 791 else 792 PMD_TX_LOG_LINE(ERR, "Invalid req_id: %hu in queue %d:%d", 793 req_id, tx_ring->port_id, tx_ring->id); 794 795 /* Trigger device reset */ 796 ++tx_ring->tx_stats.bad_req_id; 797 ena_trigger_reset(tx_ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID); 798 return -EFAULT; 799 } 800 801 static void ena_config_host_info(struct ena_com_dev *ena_dev) 802 { 803 struct ena_admin_host_info *host_info; 804 int rc; 805 806 /* Allocate only the host info */ 807 rc = ena_com_allocate_host_info(ena_dev); 808 if (rc) { 809 PMD_DRV_LOG_LINE(ERR, "Cannot allocate host info"); 810 return; 811 } 812 813 host_info = ena_dev->host_attr.host_info; 814 815 host_info->os_type = ENA_ADMIN_OS_DPDK; 816 host_info->kernel_ver = RTE_VERSION; 817 strlcpy((char *)host_info->kernel_ver_str, rte_version(), 818 sizeof(host_info->kernel_ver_str)); 819 host_info->os_dist = RTE_VERSION; 820 strlcpy((char *)host_info->os_dist_str, rte_version(), 821 sizeof(host_info->os_dist_str)); 822 host_info->driver_version = 823 (DRV_MODULE_VER_MAJOR) | 824 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 825 (DRV_MODULE_VER_SUBMINOR << 826 ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 827 host_info->num_cpus = rte_lcore_count(); 828 829 host_info->driver_supported_features = 830 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 831 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 832 833 rc = ena_com_set_host_attributes(ena_dev); 834 if (rc) { 835 if (rc == ENA_COM_UNSUPPORTED) 836 PMD_DRV_LOG_LINE(WARNING, "Cannot set host attributes"); 837 else 838 PMD_DRV_LOG_LINE(ERR, "Cannot set host attributes"); 839 840 goto err; 841 } 842 843 return; 844 845 err: 846 ena_com_delete_host_info(ena_dev); 847 } 848 849 /* This function calculates the number of xstats based on the current config */ 850 static unsigned int ena_xstats_calc_num(struct rte_eth_dev_data *data) 851 { 852 struct ena_adapter *adapter = data->dev_private; 853 854 return ENA_STATS_ARRAY_GLOBAL + 855 adapter->metrics_num + 856 ENA_STATS_ARRAY_ENA_SRD + 857 (data->nb_tx_queues * ENA_STATS_ARRAY_TX) + 858 (data->nb_rx_queues * ENA_STATS_ARRAY_RX); 859 } 860 861 static void ena_config_debug_area(struct ena_adapter *adapter) 862 { 863 u32 debug_area_size; 864 int rc, ss_count; 865 866 ss_count = ena_xstats_calc_num(adapter->edev_data); 867 868 /* allocate 32 bytes for each string and 64bit for the value */ 869 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; 870 871 rc = ena_com_allocate_debug_area(&adapter->ena_dev, debug_area_size); 872 if (rc) { 873 PMD_DRV_LOG_LINE(ERR, "Cannot allocate debug area"); 874 return; 875 } 876 877 rc = ena_com_set_host_attributes(&adapter->ena_dev); 878 if (rc) { 879 if (rc == ENA_COM_UNSUPPORTED) 880 PMD_DRV_LOG_LINE(WARNING, "Cannot set host attributes"); 881 else 882 PMD_DRV_LOG_LINE(ERR, "Cannot set host attributes"); 883 884 goto err; 885 } 886 887 return; 888 err: 889 ena_com_delete_debug_area(&adapter->ena_dev); 890 } 891 892 static int ena_close(struct rte_eth_dev *dev) 893 { 894 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 895 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 896 struct ena_adapter *adapter = dev->data->dev_private; 897 struct ena_com_dev *ena_dev = &adapter->ena_dev; 898 int ret = 0; 899 int rc; 900 901 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 902 return 0; 903 904 if (adapter->state == ENA_ADAPTER_STATE_CLOSED) 905 return 0; 906 907 if (adapter->state == ENA_ADAPTER_STATE_RUNNING) 908 ret = ena_stop(dev); 909 adapter->state = ENA_ADAPTER_STATE_CLOSED; 910 911 if (!adapter->control_path_poll_interval) { 912 rte_intr_disable(intr_handle); 913 rc = rte_intr_callback_unregister_sync(intr_handle, ena_control_path_handler, dev); 914 if (unlikely(rc != 0)) 915 PMD_INIT_LOG_LINE(ERR, "Failed to unregister interrupt handler"); 916 } else { 917 rte_eal_alarm_cancel(ena_control_path_poll_handler, dev); 918 } 919 920 ena_rx_queue_release_all(dev); 921 ena_tx_queue_release_all(dev); 922 923 rte_free(adapter->drv_stats); 924 adapter->drv_stats = NULL; 925 926 ena_com_set_admin_running_state(ena_dev, false); 927 928 ena_com_rss_destroy(ena_dev); 929 930 ena_com_delete_debug_area(ena_dev); 931 ena_com_delete_host_info(ena_dev); 932 933 ena_com_abort_admin_commands(ena_dev); 934 ena_com_wait_for_abort_completion(ena_dev); 935 ena_com_admin_destroy(ena_dev); 936 ena_com_mmio_reg_read_request_destroy(ena_dev); 937 ena_com_delete_customer_metrics_buffer(ena_dev); 938 939 /* 940 * MAC is not allocated dynamically. Setting NULL should prevent from 941 * release of the resource in the rte_eth_dev_release_port(). 942 */ 943 dev->data->mac_addrs = NULL; 944 945 return ret; 946 } 947 948 static int 949 ena_dev_reset(struct rte_eth_dev *dev) 950 { 951 int rc = 0; 952 953 /* Cannot release memory in secondary process */ 954 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 955 PMD_DRV_LOG_LINE(WARNING, "dev_reset not supported in secondary."); 956 return -EPERM; 957 } 958 959 rc = eth_ena_dev_uninit(dev); 960 if (rc) { 961 PMD_INIT_LOG_LINE(CRIT, "Failed to un-initialize device"); 962 return rc; 963 } 964 965 rc = eth_ena_dev_init(dev); 966 if (rc) 967 PMD_INIT_LOG_LINE(CRIT, "Cannot initialize device"); 968 969 return rc; 970 } 971 972 static void ena_rx_queue_release_all(struct rte_eth_dev *dev) 973 { 974 int nb_queues = dev->data->nb_rx_queues; 975 int i; 976 977 for (i = 0; i < nb_queues; i++) 978 ena_rx_queue_release(dev, i); 979 } 980 981 static void ena_tx_queue_release_all(struct rte_eth_dev *dev) 982 { 983 int nb_queues = dev->data->nb_tx_queues; 984 int i; 985 986 for (i = 0; i < nb_queues; i++) 987 ena_tx_queue_release(dev, i); 988 } 989 990 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 991 { 992 struct ena_ring *ring = dev->data->rx_queues[qid]; 993 994 /* Free ring resources */ 995 rte_free(ring->rx_buffer_info); 996 ring->rx_buffer_info = NULL; 997 998 rte_free(ring->rx_refill_buffer); 999 ring->rx_refill_buffer = NULL; 1000 1001 rte_free(ring->empty_rx_reqs); 1002 ring->empty_rx_reqs = NULL; 1003 1004 ring->configured = 0; 1005 1006 PMD_DRV_LOG_LINE(NOTICE, "Rx queue %d:%d released", 1007 ring->port_id, ring->id); 1008 } 1009 1010 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 1011 { 1012 struct ena_ring *ring = dev->data->tx_queues[qid]; 1013 1014 /* Free ring resources */ 1015 rte_free(ring->push_buf_intermediate_buf); 1016 1017 rte_free(ring->tx_buffer_info); 1018 1019 rte_free(ring->empty_tx_reqs); 1020 1021 ring->empty_tx_reqs = NULL; 1022 ring->tx_buffer_info = NULL; 1023 ring->push_buf_intermediate_buf = NULL; 1024 1025 ring->configured = 0; 1026 1027 PMD_DRV_LOG_LINE(NOTICE, "Tx queue %d:%d released", 1028 ring->port_id, ring->id); 1029 } 1030 1031 static void ena_rx_queue_release_bufs(struct ena_ring *ring) 1032 { 1033 unsigned int i; 1034 1035 for (i = 0; i < ring->ring_size; ++i) { 1036 struct ena_rx_buffer *rx_info = &ring->rx_buffer_info[i]; 1037 if (rx_info->mbuf) { 1038 rte_mbuf_raw_free(rx_info->mbuf); 1039 rx_info->mbuf = NULL; 1040 } 1041 } 1042 } 1043 1044 static void ena_tx_queue_release_bufs(struct ena_ring *ring) 1045 { 1046 unsigned int i; 1047 1048 for (i = 0; i < ring->ring_size; ++i) { 1049 struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i]; 1050 1051 if (tx_buf->mbuf) { 1052 rte_pktmbuf_free(tx_buf->mbuf); 1053 tx_buf->mbuf = NULL; 1054 } 1055 } 1056 } 1057 1058 static int ena_link_update(struct rte_eth_dev *dev, 1059 __rte_unused int wait_to_complete) 1060 { 1061 struct rte_eth_link *link = &dev->data->dev_link; 1062 struct ena_adapter *adapter = dev->data->dev_private; 1063 1064 link->link_status = adapter->link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN; 1065 link->link_speed = RTE_ETH_SPEED_NUM_NONE; 1066 link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX; 1067 1068 return 0; 1069 } 1070 1071 static int ena_queue_start_all(struct rte_eth_dev *dev, 1072 enum ena_ring_type ring_type) 1073 { 1074 struct ena_adapter *adapter = dev->data->dev_private; 1075 struct ena_ring *queues = NULL; 1076 int nb_queues; 1077 int i = 0; 1078 int rc = 0; 1079 1080 if (ring_type == ENA_RING_TYPE_RX) { 1081 queues = adapter->rx_ring; 1082 nb_queues = dev->data->nb_rx_queues; 1083 } else { 1084 queues = adapter->tx_ring; 1085 nb_queues = dev->data->nb_tx_queues; 1086 } 1087 for (i = 0; i < nb_queues; i++) { 1088 if (queues[i].configured) { 1089 if (ring_type == ENA_RING_TYPE_RX) { 1090 ena_assert_msg( 1091 dev->data->rx_queues[i] == &queues[i], 1092 "Inconsistent state of Rx queues\n"); 1093 } else { 1094 ena_assert_msg( 1095 dev->data->tx_queues[i] == &queues[i], 1096 "Inconsistent state of Tx queues\n"); 1097 } 1098 1099 rc = ena_queue_start(dev, &queues[i]); 1100 1101 if (rc) { 1102 PMD_INIT_LOG_LINE(ERR, 1103 "Failed to start queue[%d] of type(%d)", 1104 i, ring_type); 1105 goto err; 1106 } 1107 } 1108 } 1109 1110 return 0; 1111 1112 err: 1113 while (i--) 1114 if (queues[i].configured) 1115 ena_queue_stop(&queues[i]); 1116 1117 return rc; 1118 } 1119 1120 static int 1121 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx, 1122 bool use_large_llq_hdr) 1123 { 1124 struct ena_admin_feature_llq_desc *dev = &ctx->get_feat_ctx->llq; 1125 struct ena_com_dev *ena_dev = ctx->ena_dev; 1126 uint32_t max_tx_queue_size; 1127 uint32_t max_rx_queue_size; 1128 1129 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1130 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1131 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 1132 max_rx_queue_size = RTE_MIN(max_queue_ext->max_rx_cq_depth, 1133 max_queue_ext->max_rx_sq_depth); 1134 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 1135 1136 if (ena_dev->tx_mem_queue_type == 1137 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1138 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1139 dev->max_llq_depth); 1140 } else { 1141 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1142 max_queue_ext->max_tx_sq_depth); 1143 } 1144 1145 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1146 max_queue_ext->max_per_packet_rx_descs); 1147 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1148 max_queue_ext->max_per_packet_tx_descs); 1149 } else { 1150 struct ena_admin_queue_feature_desc *max_queues = 1151 &ctx->get_feat_ctx->max_queues; 1152 max_rx_queue_size = RTE_MIN(max_queues->max_cq_depth, 1153 max_queues->max_sq_depth); 1154 max_tx_queue_size = max_queues->max_cq_depth; 1155 1156 if (ena_dev->tx_mem_queue_type == 1157 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1158 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1159 dev->max_llq_depth); 1160 } else { 1161 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1162 max_queues->max_sq_depth); 1163 } 1164 1165 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1166 max_queues->max_packet_rx_descs); 1167 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1168 max_queues->max_packet_tx_descs); 1169 } 1170 1171 /* Round down to the nearest power of 2 */ 1172 max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size); 1173 max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size); 1174 1175 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && use_large_llq_hdr) { 1176 /* intersection between driver configuration and device capabilities */ 1177 if (dev->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) { 1178 if (dev->max_wide_llq_depth == MAX_WIDE_LLQ_DEPTH_UNSUPPORTED) { 1179 /* Devices that do not support the double-sized ENA memory BAR will 1180 * report max_wide_llq_depth as 0. In such case, driver halves the 1181 * queue depth when working in large llq policy. 1182 */ 1183 max_tx_queue_size >>= 1; 1184 PMD_INIT_LOG_LINE(INFO, 1185 "large LLQ policy requires limiting Tx queue size to %u entries", 1186 max_tx_queue_size); 1187 } else if (dev->max_wide_llq_depth < max_tx_queue_size) { 1188 /* In case the queue depth that the driver calculated exceeds 1189 * the maximal value that the device allows, it will be limited 1190 * to that maximal value 1191 */ 1192 max_tx_queue_size = dev->max_wide_llq_depth; 1193 } 1194 } else { 1195 PMD_INIT_LOG_LINE(INFO, 1196 "Forcing large LLQ headers failed since device lacks this support"); 1197 } 1198 } 1199 1200 if (unlikely(max_rx_queue_size == 0 || max_tx_queue_size == 0)) { 1201 PMD_INIT_LOG_LINE(ERR, "Invalid queue size"); 1202 return -EFAULT; 1203 } 1204 1205 ctx->max_tx_queue_size = max_tx_queue_size; 1206 ctx->max_rx_queue_size = max_rx_queue_size; 1207 1208 PMD_DRV_LOG_LINE(INFO, "tx queue size %u", max_tx_queue_size); 1209 return 0; 1210 } 1211 1212 static void ena_stats_restart(struct rte_eth_dev *dev) 1213 { 1214 struct ena_adapter *adapter = dev->data->dev_private; 1215 1216 rte_atomic64_init(&adapter->drv_stats->ierrors); 1217 rte_atomic64_init(&adapter->drv_stats->oerrors); 1218 rte_atomic64_init(&adapter->drv_stats->rx_nombuf); 1219 adapter->drv_stats->rx_drops = 0; 1220 } 1221 1222 static int ena_stats_get(struct rte_eth_dev *dev, 1223 struct rte_eth_stats *stats) 1224 { 1225 struct ena_admin_basic_stats ena_stats; 1226 struct ena_adapter *adapter = dev->data->dev_private; 1227 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1228 int rc; 1229 int i; 1230 int max_rings_stats; 1231 1232 memset(&ena_stats, 0, sizeof(ena_stats)); 1233 1234 rte_spinlock_lock(&adapter->admin_lock); 1235 rc = ENA_PROXY(adapter, ena_com_get_dev_basic_stats, ena_dev, 1236 &ena_stats); 1237 rte_spinlock_unlock(&adapter->admin_lock); 1238 if (unlikely(rc)) { 1239 PMD_DRV_LOG_LINE(ERR, "Could not retrieve statistics from ENA"); 1240 return rc; 1241 } 1242 1243 /* Set of basic statistics from ENA */ 1244 stats->ipackets = __MERGE_64B_H_L(ena_stats.rx_pkts_high, 1245 ena_stats.rx_pkts_low); 1246 stats->opackets = __MERGE_64B_H_L(ena_stats.tx_pkts_high, 1247 ena_stats.tx_pkts_low); 1248 stats->ibytes = __MERGE_64B_H_L(ena_stats.rx_bytes_high, 1249 ena_stats.rx_bytes_low); 1250 stats->obytes = __MERGE_64B_H_L(ena_stats.tx_bytes_high, 1251 ena_stats.tx_bytes_low); 1252 1253 /* Driver related stats */ 1254 stats->imissed = adapter->drv_stats->rx_drops; 1255 stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors); 1256 stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors); 1257 stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf); 1258 1259 max_rings_stats = RTE_MIN(dev->data->nb_rx_queues, 1260 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1261 for (i = 0; i < max_rings_stats; ++i) { 1262 struct ena_stats_rx *rx_stats = &adapter->rx_ring[i].rx_stats; 1263 1264 stats->q_ibytes[i] = rx_stats->bytes; 1265 stats->q_ipackets[i] = rx_stats->cnt; 1266 stats->q_errors[i] = rx_stats->bad_desc_num + 1267 rx_stats->bad_req_id + 1268 rx_stats->bad_desc + 1269 rx_stats->unknown_error; 1270 } 1271 1272 max_rings_stats = RTE_MIN(dev->data->nb_tx_queues, 1273 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1274 for (i = 0; i < max_rings_stats; ++i) { 1275 struct ena_stats_tx *tx_stats = &adapter->tx_ring[i].tx_stats; 1276 1277 stats->q_obytes[i] = tx_stats->bytes; 1278 stats->q_opackets[i] = tx_stats->cnt; 1279 } 1280 1281 return 0; 1282 } 1283 1284 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 1285 { 1286 struct ena_adapter *adapter; 1287 struct ena_com_dev *ena_dev; 1288 int rc = 0; 1289 1290 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 1291 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 1292 adapter = dev->data->dev_private; 1293 1294 ena_dev = &adapter->ena_dev; 1295 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 1296 1297 rc = ENA_PROXY(adapter, ena_com_set_dev_mtu, ena_dev, mtu); 1298 if (rc) 1299 PMD_DRV_LOG_LINE(ERR, "Could not set MTU: %d", mtu); 1300 else 1301 PMD_DRV_LOG_LINE(NOTICE, "MTU set to: %d", mtu); 1302 1303 return rc; 1304 } 1305 1306 static int ena_start(struct rte_eth_dev *dev) 1307 { 1308 struct ena_adapter *adapter = dev->data->dev_private; 1309 uint64_t ticks; 1310 int rc = 0; 1311 uint16_t i; 1312 1313 /* Cannot allocate memory in secondary process */ 1314 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1315 PMD_DRV_LOG_LINE(WARNING, "dev_start not supported in secondary."); 1316 return -EPERM; 1317 } 1318 1319 rc = ena_setup_rx_intr(dev); 1320 if (rc) 1321 return rc; 1322 1323 rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX); 1324 if (rc) 1325 return rc; 1326 1327 rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX); 1328 if (rc) 1329 goto err_start_tx; 1330 1331 if (adapter->edev_data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { 1332 rc = ena_rss_configure(adapter); 1333 if (rc) 1334 goto err_rss_init; 1335 } 1336 1337 ena_stats_restart(dev); 1338 1339 adapter->timestamp_wd = rte_get_timer_cycles(); 1340 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT; 1341 1342 ticks = rte_get_timer_hz(); 1343 rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(), 1344 ena_timer_wd_callback, dev); 1345 1346 ++adapter->dev_stats.dev_start; 1347 adapter->state = ENA_ADAPTER_STATE_RUNNING; 1348 1349 for (i = 0; i < dev->data->nb_rx_queues; i++) 1350 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1351 for (i = 0; i < dev->data->nb_tx_queues; i++) 1352 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1353 1354 return 0; 1355 1356 err_rss_init: 1357 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1358 err_start_tx: 1359 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1360 return rc; 1361 } 1362 1363 static int ena_stop(struct rte_eth_dev *dev) 1364 { 1365 struct ena_adapter *adapter = dev->data->dev_private; 1366 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1367 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1368 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1369 uint16_t i; 1370 int rc; 1371 1372 /* Cannot free memory in secondary process */ 1373 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1374 PMD_DRV_LOG_LINE(WARNING, "dev_stop not supported in secondary."); 1375 return -EPERM; 1376 } 1377 1378 rte_timer_stop_sync(&adapter->timer_wd); 1379 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1380 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1381 1382 if (adapter->trigger_reset) { 1383 rc = ena_com_dev_reset(ena_dev, adapter->reset_reason); 1384 if (rc) 1385 PMD_DRV_LOG_LINE(ERR, "Device reset failed, rc: %d", rc); 1386 } 1387 1388 rte_intr_disable(intr_handle); 1389 1390 rte_intr_efd_disable(intr_handle); 1391 1392 /* Cleanup vector list */ 1393 rte_intr_vec_list_free(intr_handle); 1394 1395 rte_intr_enable(intr_handle); 1396 1397 ++adapter->dev_stats.dev_stop; 1398 adapter->state = ENA_ADAPTER_STATE_STOPPED; 1399 dev->data->dev_started = 0; 1400 1401 for (i = 0; i < dev->data->nb_rx_queues; i++) 1402 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1403 for (i = 0; i < dev->data->nb_tx_queues; i++) 1404 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1405 1406 return 0; 1407 } 1408 1409 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring) 1410 { 1411 struct ena_adapter *adapter = ring->adapter; 1412 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1413 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1414 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1415 struct ena_com_create_io_ctx ctx = 1416 /* policy set to _HOST just to satisfy icc compiler */ 1417 { ENA_ADMIN_PLACEMENT_POLICY_HOST, 1418 0, 0, 0, 0, 0 }; 1419 uint16_t ena_qid; 1420 unsigned int i; 1421 int rc; 1422 1423 ctx.msix_vector = -1; 1424 if (ring->type == ENA_RING_TYPE_TX) { 1425 ena_qid = ENA_IO_TXQ_IDX(ring->id); 1426 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1427 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1428 for (i = 0; i < ring->ring_size; i++) 1429 ring->empty_tx_reqs[i] = i; 1430 } else { 1431 ena_qid = ENA_IO_RXQ_IDX(ring->id); 1432 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1433 if (rte_intr_dp_is_en(intr_handle)) 1434 ctx.msix_vector = 1435 rte_intr_vec_list_index_get(intr_handle, 1436 ring->id); 1437 1438 for (i = 0; i < ring->ring_size; i++) 1439 ring->empty_rx_reqs[i] = i; 1440 } 1441 ctx.queue_size = ring->ring_size; 1442 ctx.qid = ena_qid; 1443 ctx.numa_node = ring->numa_socket_id; 1444 1445 rc = ena_com_create_io_queue(ena_dev, &ctx); 1446 if (rc) { 1447 PMD_DRV_LOG_LINE(ERR, 1448 "Failed to create IO queue[%d] (qid:%d), rc: %d", 1449 ring->id, ena_qid, rc); 1450 return rc; 1451 } 1452 1453 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1454 &ring->ena_com_io_sq, 1455 &ring->ena_com_io_cq); 1456 if (rc) { 1457 PMD_DRV_LOG_LINE(ERR, 1458 "Failed to get IO queue[%d] handlers, rc: %d", 1459 ring->id, rc); 1460 ena_com_destroy_io_queue(ena_dev, ena_qid); 1461 return rc; 1462 } 1463 1464 if (ring->type == ENA_RING_TYPE_TX) 1465 ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node); 1466 1467 /* Start with Rx interrupts being masked. */ 1468 if (ring->type == ENA_RING_TYPE_RX && rte_intr_dp_is_en(intr_handle)) 1469 ena_rx_queue_intr_disable(dev, ring->id); 1470 1471 return 0; 1472 } 1473 1474 static void ena_queue_stop(struct ena_ring *ring) 1475 { 1476 struct ena_com_dev *ena_dev = &ring->adapter->ena_dev; 1477 1478 if (ring->type == ENA_RING_TYPE_RX) { 1479 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id)); 1480 ena_rx_queue_release_bufs(ring); 1481 } else { 1482 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id)); 1483 ena_tx_queue_release_bufs(ring); 1484 } 1485 } 1486 1487 static void ena_queue_stop_all(struct rte_eth_dev *dev, 1488 enum ena_ring_type ring_type) 1489 { 1490 struct ena_adapter *adapter = dev->data->dev_private; 1491 struct ena_ring *queues = NULL; 1492 uint16_t nb_queues, i; 1493 1494 if (ring_type == ENA_RING_TYPE_RX) { 1495 queues = adapter->rx_ring; 1496 nb_queues = dev->data->nb_rx_queues; 1497 } else { 1498 queues = adapter->tx_ring; 1499 nb_queues = dev->data->nb_tx_queues; 1500 } 1501 1502 for (i = 0; i < nb_queues; ++i) 1503 if (queues[i].configured) 1504 ena_queue_stop(&queues[i]); 1505 } 1506 1507 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring) 1508 { 1509 int rc, bufs_num; 1510 1511 ena_assert_msg(ring->configured == 1, 1512 "Trying to start unconfigured queue\n"); 1513 1514 rc = ena_create_io_queue(dev, ring); 1515 if (rc) { 1516 PMD_INIT_LOG_LINE(ERR, "Failed to create IO queue"); 1517 return rc; 1518 } 1519 1520 ring->next_to_clean = 0; 1521 ring->next_to_use = 0; 1522 1523 if (ring->type == ENA_RING_TYPE_TX) { 1524 ring->tx_stats.available_desc = 1525 ena_com_free_q_entries(ring->ena_com_io_sq); 1526 return 0; 1527 } 1528 1529 bufs_num = ring->ring_size - 1; 1530 rc = ena_populate_rx_queue(ring, bufs_num); 1531 if (rc != bufs_num) { 1532 ena_com_destroy_io_queue(&ring->adapter->ena_dev, 1533 ENA_IO_RXQ_IDX(ring->id)); 1534 PMD_INIT_LOG_LINE(ERR, "Failed to populate Rx ring"); 1535 return ENA_COM_FAULT; 1536 } 1537 /* Flush per-core RX buffers pools cache as they can be used on other 1538 * cores as well. 1539 */ 1540 rte_mempool_cache_flush(NULL, ring->mb_pool); 1541 1542 return 0; 1543 } 1544 1545 static int ena_tx_queue_setup(struct rte_eth_dev *dev, 1546 uint16_t queue_idx, 1547 uint16_t nb_desc, 1548 unsigned int socket_id, 1549 const struct rte_eth_txconf *tx_conf) 1550 { 1551 struct ena_ring *txq = NULL; 1552 struct ena_adapter *adapter = dev->data->dev_private; 1553 unsigned int i; 1554 uint16_t dyn_thresh; 1555 1556 txq = &adapter->tx_ring[queue_idx]; 1557 1558 if (txq->configured) { 1559 PMD_DRV_LOG_LINE(CRIT, 1560 "API violation. Queue[%d] is already configured", 1561 queue_idx); 1562 return ENA_COM_FAULT; 1563 } 1564 1565 if (!rte_is_power_of_2(nb_desc)) { 1566 PMD_DRV_LOG_LINE(ERR, 1567 "Unsupported size of Tx queue: %d is not a power of 2.", 1568 nb_desc); 1569 return -EINVAL; 1570 } 1571 1572 if (nb_desc > adapter->max_tx_ring_size) { 1573 PMD_DRV_LOG_LINE(ERR, 1574 "Unsupported size of Tx queue (max size: %d)", 1575 adapter->max_tx_ring_size); 1576 return -EINVAL; 1577 } 1578 1579 txq->port_id = dev->data->port_id; 1580 txq->next_to_clean = 0; 1581 txq->next_to_use = 0; 1582 txq->ring_size = nb_desc; 1583 txq->size_mask = nb_desc - 1; 1584 txq->numa_socket_id = socket_id; 1585 txq->pkts_without_db = false; 1586 txq->last_cleanup_ticks = 0; 1587 1588 txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info", 1589 sizeof(struct ena_tx_buffer) * txq->ring_size, 1590 RTE_CACHE_LINE_SIZE, 1591 socket_id); 1592 if (!txq->tx_buffer_info) { 1593 PMD_DRV_LOG_LINE(ERR, 1594 "Failed to allocate memory for Tx buffer info"); 1595 return -ENOMEM; 1596 } 1597 1598 txq->empty_tx_reqs = rte_zmalloc_socket("txq->empty_tx_reqs", 1599 sizeof(uint16_t) * txq->ring_size, 1600 RTE_CACHE_LINE_SIZE, 1601 socket_id); 1602 if (!txq->empty_tx_reqs) { 1603 PMD_DRV_LOG_LINE(ERR, 1604 "Failed to allocate memory for empty Tx requests"); 1605 rte_free(txq->tx_buffer_info); 1606 return -ENOMEM; 1607 } 1608 1609 txq->push_buf_intermediate_buf = 1610 rte_zmalloc_socket("txq->push_buf_intermediate_buf", 1611 txq->tx_max_header_size, 1612 RTE_CACHE_LINE_SIZE, 1613 socket_id); 1614 if (!txq->push_buf_intermediate_buf) { 1615 PMD_DRV_LOG_LINE(ERR, "Failed to alloc push buffer for LLQ"); 1616 rte_free(txq->tx_buffer_info); 1617 rte_free(txq->empty_tx_reqs); 1618 return -ENOMEM; 1619 } 1620 1621 for (i = 0; i < txq->ring_size; i++) 1622 txq->empty_tx_reqs[i] = i; 1623 1624 txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads; 1625 1626 /* Check if caller provided the Tx cleanup threshold value. */ 1627 if (tx_conf->tx_free_thresh != 0) { 1628 txq->tx_free_thresh = tx_conf->tx_free_thresh; 1629 } else { 1630 dyn_thresh = txq->ring_size - 1631 txq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1632 txq->tx_free_thresh = RTE_MAX(dyn_thresh, 1633 txq->ring_size - ENA_REFILL_THRESH_PACKET); 1634 } 1635 1636 txq->missing_tx_completion_threshold = 1637 RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP); 1638 1639 /* Store pointer to this queue in upper layer */ 1640 txq->configured = 1; 1641 dev->data->tx_queues[queue_idx] = txq; 1642 1643 return 0; 1644 } 1645 1646 static int ena_rx_queue_setup(struct rte_eth_dev *dev, 1647 uint16_t queue_idx, 1648 uint16_t nb_desc, 1649 unsigned int socket_id, 1650 const struct rte_eth_rxconf *rx_conf, 1651 struct rte_mempool *mp) 1652 { 1653 struct ena_adapter *adapter = dev->data->dev_private; 1654 struct ena_ring *rxq = NULL; 1655 size_t buffer_size; 1656 int i; 1657 uint16_t dyn_thresh; 1658 1659 rxq = &adapter->rx_ring[queue_idx]; 1660 if (rxq->configured) { 1661 PMD_DRV_LOG_LINE(CRIT, 1662 "API violation. Queue[%d] is already configured", 1663 queue_idx); 1664 return ENA_COM_FAULT; 1665 } 1666 1667 if (!rte_is_power_of_2(nb_desc)) { 1668 PMD_DRV_LOG_LINE(ERR, 1669 "Unsupported size of Rx queue: %d is not a power of 2.", 1670 nb_desc); 1671 return -EINVAL; 1672 } 1673 1674 if (nb_desc > adapter->max_rx_ring_size) { 1675 PMD_DRV_LOG_LINE(ERR, 1676 "Unsupported size of Rx queue (max size: %d)", 1677 adapter->max_rx_ring_size); 1678 return -EINVAL; 1679 } 1680 1681 /* ENA isn't supporting buffers smaller than 1400 bytes */ 1682 buffer_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 1683 if (buffer_size < ENA_RX_BUF_MIN_SIZE) { 1684 PMD_DRV_LOG_LINE(ERR, 1685 "Unsupported size of Rx buffer: %zu (min size: %d)", 1686 buffer_size, ENA_RX_BUF_MIN_SIZE); 1687 return -EINVAL; 1688 } 1689 1690 rxq->port_id = dev->data->port_id; 1691 rxq->next_to_clean = 0; 1692 rxq->next_to_use = 0; 1693 rxq->ring_size = nb_desc; 1694 rxq->size_mask = nb_desc - 1; 1695 rxq->numa_socket_id = socket_id; 1696 rxq->mb_pool = mp; 1697 1698 rxq->rx_buffer_info = rte_zmalloc_socket("rxq->buffer_info", 1699 sizeof(struct ena_rx_buffer) * nb_desc, 1700 RTE_CACHE_LINE_SIZE, 1701 socket_id); 1702 if (!rxq->rx_buffer_info) { 1703 PMD_DRV_LOG_LINE(ERR, 1704 "Failed to allocate memory for Rx buffer info"); 1705 return -ENOMEM; 1706 } 1707 1708 rxq->rx_refill_buffer = rte_zmalloc_socket("rxq->rx_refill_buffer", 1709 sizeof(struct rte_mbuf *) * nb_desc, 1710 RTE_CACHE_LINE_SIZE, 1711 socket_id); 1712 if (!rxq->rx_refill_buffer) { 1713 PMD_DRV_LOG_LINE(ERR, 1714 "Failed to allocate memory for Rx refill buffer"); 1715 rte_free(rxq->rx_buffer_info); 1716 rxq->rx_buffer_info = NULL; 1717 return -ENOMEM; 1718 } 1719 1720 rxq->empty_rx_reqs = rte_zmalloc_socket("rxq->empty_rx_reqs", 1721 sizeof(uint16_t) * nb_desc, 1722 RTE_CACHE_LINE_SIZE, 1723 socket_id); 1724 if (!rxq->empty_rx_reqs) { 1725 PMD_DRV_LOG_LINE(ERR, 1726 "Failed to allocate memory for empty Rx requests"); 1727 rte_free(rxq->rx_buffer_info); 1728 rxq->rx_buffer_info = NULL; 1729 rte_free(rxq->rx_refill_buffer); 1730 rxq->rx_refill_buffer = NULL; 1731 return -ENOMEM; 1732 } 1733 1734 for (i = 0; i < nb_desc; i++) 1735 rxq->empty_rx_reqs[i] = i; 1736 1737 rxq->offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; 1738 1739 if (rx_conf->rx_free_thresh != 0) { 1740 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 1741 } else { 1742 dyn_thresh = rxq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1743 rxq->rx_free_thresh = RTE_MIN(dyn_thresh, 1744 (uint16_t)(ENA_REFILL_THRESH_PACKET)); 1745 } 1746 1747 /* Store pointer to this queue in upper layer */ 1748 rxq->configured = 1; 1749 dev->data->rx_queues[queue_idx] = rxq; 1750 1751 return 0; 1752 } 1753 1754 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 1755 struct rte_mbuf *mbuf, uint16_t id) 1756 { 1757 struct ena_com_buf ebuf; 1758 int rc; 1759 1760 /* prepare physical address for DMA transaction */ 1761 ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM; 1762 ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM; 1763 1764 /* pass resource to device */ 1765 rc = ena_com_add_single_rx_desc(io_sq, &ebuf, id); 1766 if (unlikely(rc != 0)) 1767 PMD_RX_LOG_LINE(WARNING, "Failed adding Rx desc"); 1768 1769 return rc; 1770 } 1771 1772 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) 1773 { 1774 unsigned int i; 1775 int rc; 1776 uint16_t next_to_use = rxq->next_to_use; 1777 uint16_t req_id; 1778 #ifdef RTE_ETHDEV_DEBUG_RX 1779 uint16_t in_use; 1780 #endif 1781 struct rte_mbuf **mbufs = rxq->rx_refill_buffer; 1782 1783 if (unlikely(!count)) 1784 return 0; 1785 1786 #ifdef RTE_ETHDEV_DEBUG_RX 1787 in_use = rxq->ring_size - 1 - 1788 ena_com_free_q_entries(rxq->ena_com_io_sq); 1789 if (unlikely((in_use + count) >= rxq->ring_size)) 1790 PMD_RX_LOG_LINE(ERR, "Bad Rx ring state"); 1791 #endif 1792 1793 /* get resources for incoming packets */ 1794 rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count); 1795 if (unlikely(rc < 0)) { 1796 rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf); 1797 ++rxq->rx_stats.mbuf_alloc_fail; 1798 PMD_RX_LOG_LINE(DEBUG, "There are not enough free buffers"); 1799 return 0; 1800 } 1801 1802 for (i = 0; i < count; i++) { 1803 struct rte_mbuf *mbuf = mbufs[i]; 1804 struct ena_rx_buffer *rx_info; 1805 1806 if (likely((i + 4) < count)) 1807 rte_prefetch0(mbufs[i + 4]); 1808 1809 req_id = rxq->empty_rx_reqs[next_to_use]; 1810 rx_info = &rxq->rx_buffer_info[req_id]; 1811 1812 rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id); 1813 if (unlikely(rc != 0)) 1814 break; 1815 1816 rx_info->mbuf = mbuf; 1817 next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, rxq->size_mask); 1818 } 1819 1820 if (unlikely(i < count)) { 1821 PMD_RX_LOG_LINE(WARNING, 1822 "Refilled Rx queue[%d] with only %d/%d buffers", 1823 rxq->id, i, count); 1824 rte_pktmbuf_free_bulk(&mbufs[i], count - i); 1825 ++rxq->rx_stats.refill_partial; 1826 } 1827 1828 /* When we submitted free resources to device... */ 1829 if (likely(i > 0)) { 1830 /* ...let HW know that it can fill buffers with data. */ 1831 ena_com_write_sq_doorbell(rxq->ena_com_io_sq); 1832 1833 rxq->next_to_use = next_to_use; 1834 } 1835 1836 return i; 1837 } 1838 1839 static size_t ena_get_metrics_entries(struct ena_adapter *adapter) 1840 { 1841 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1842 size_t metrics_num = 0; 1843 1844 if (ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) 1845 metrics_num = ENA_STATS_ARRAY_METRICS; 1846 else if (ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) 1847 metrics_num = ENA_STATS_ARRAY_METRICS_LEGACY; 1848 PMD_DRV_LOG_LINE(NOTICE, "0x%x customer metrics are supported", (unsigned int)metrics_num); 1849 if (metrics_num > ENA_MAX_CUSTOMER_METRICS) { 1850 PMD_DRV_LOG_LINE(NOTICE, "Not enough space for the requested customer metrics"); 1851 metrics_num = ENA_MAX_CUSTOMER_METRICS; 1852 } 1853 return metrics_num; 1854 } 1855 1856 static int ena_device_init(struct ena_adapter *adapter, 1857 struct rte_pci_device *pdev, 1858 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1859 { 1860 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1861 uint32_t aenq_groups; 1862 int rc; 1863 bool readless_supported; 1864 1865 /* Initialize mmio registers */ 1866 rc = ena_com_mmio_reg_read_request_init(ena_dev); 1867 if (rc) { 1868 PMD_DRV_LOG_LINE(ERR, "Failed to init MMIO read less"); 1869 return rc; 1870 } 1871 1872 /* The PCIe configuration space revision id indicate if mmio reg 1873 * read is disabled. 1874 */ 1875 readless_supported = !(pdev->id.class_id & ENA_MMIO_DISABLE_REG_READ); 1876 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 1877 1878 /* reset device */ 1879 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 1880 if (rc) { 1881 PMD_DRV_LOG_LINE(ERR, "Cannot reset device"); 1882 goto err_mmio_read_less; 1883 } 1884 1885 /* check FW version */ 1886 rc = ena_com_validate_version(ena_dev); 1887 if (rc) { 1888 PMD_DRV_LOG_LINE(ERR, "Device version is too low"); 1889 goto err_mmio_read_less; 1890 } 1891 1892 ena_dev->dma_addr_bits = ena_com_get_dma_width(ena_dev); 1893 1894 /* ENA device administration layer init */ 1895 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 1896 if (rc) { 1897 PMD_DRV_LOG_LINE(ERR, 1898 "Cannot initialize ENA admin queue"); 1899 goto err_mmio_read_less; 1900 } 1901 1902 /* To enable the msix interrupts the driver needs to know the number 1903 * of queues. So the driver uses polling mode to retrieve this 1904 * information. 1905 */ 1906 ena_com_set_admin_polling_mode(ena_dev, true); 1907 1908 ena_config_host_info(ena_dev); 1909 1910 /* Get Device Attributes and features */ 1911 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 1912 if (rc) { 1913 PMD_DRV_LOG_LINE(ERR, 1914 "Cannot get attribute for ENA device, rc: %d", rc); 1915 goto err_admin_init; 1916 } 1917 1918 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 1919 BIT(ENA_ADMIN_NOTIFICATION) | 1920 BIT(ENA_ADMIN_KEEP_ALIVE) | 1921 BIT(ENA_ADMIN_FATAL_ERROR) | 1922 BIT(ENA_ADMIN_WARNING) | 1923 BIT(ENA_ADMIN_CONF_NOTIFICATIONS); 1924 1925 aenq_groups &= get_feat_ctx->aenq.supported_groups; 1926 1927 adapter->all_aenq_groups = aenq_groups; 1928 /* The actual supported number of metrics is negotiated with the device at runtime */ 1929 adapter->metrics_num = ena_get_metrics_entries(adapter); 1930 1931 return 0; 1932 1933 err_admin_init: 1934 ena_com_admin_destroy(ena_dev); 1935 1936 err_mmio_read_less: 1937 ena_com_mmio_reg_read_request_destroy(ena_dev); 1938 1939 return rc; 1940 } 1941 1942 static void ena_control_path_handler(void *cb_arg) 1943 { 1944 struct rte_eth_dev *dev = cb_arg; 1945 struct ena_adapter *adapter = dev->data->dev_private; 1946 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1947 1948 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) { 1949 ena_com_admin_q_comp_intr_handler(ena_dev); 1950 ena_com_aenq_intr_handler(ena_dev, dev); 1951 } 1952 } 1953 1954 static void ena_control_path_poll_handler(void *cb_arg) 1955 { 1956 struct rte_eth_dev *dev = cb_arg; 1957 struct ena_adapter *adapter = dev->data->dev_private; 1958 int rc; 1959 1960 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) { 1961 ena_control_path_handler(cb_arg); 1962 rc = rte_eal_alarm_set(adapter->control_path_poll_interval, 1963 ena_control_path_poll_handler, cb_arg); 1964 if (unlikely(rc != 0)) { 1965 PMD_DRV_LOG_LINE(ERR, "Failed to retrigger control path alarm"); 1966 ena_trigger_reset(adapter, ENA_REGS_RESET_GENERIC); 1967 } 1968 } 1969 } 1970 1971 static void check_for_missing_keep_alive(struct ena_adapter *adapter) 1972 { 1973 if (!(adapter->active_aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE))) 1974 return; 1975 1976 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 1977 return; 1978 1979 if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >= 1980 adapter->keep_alive_timeout)) { 1981 PMD_DRV_LOG_LINE(ERR, "Keep alive timeout"); 1982 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); 1983 ++adapter->dev_stats.wd_expired; 1984 } 1985 } 1986 1987 /* Check if admin queue is enabled */ 1988 static void check_for_admin_com_state(struct ena_adapter *adapter) 1989 { 1990 if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) { 1991 PMD_DRV_LOG_LINE(ERR, "ENA admin queue is not in running state"); 1992 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO); 1993 } 1994 } 1995 1996 static int check_for_tx_completion_in_queue(struct ena_adapter *adapter, 1997 struct ena_ring *tx_ring) 1998 { 1999 struct ena_tx_buffer *tx_buf; 2000 uint64_t timestamp; 2001 uint64_t completion_delay; 2002 uint32_t missed_tx = 0; 2003 unsigned int i; 2004 int rc = 0; 2005 2006 for (i = 0; i < tx_ring->ring_size; ++i) { 2007 tx_buf = &tx_ring->tx_buffer_info[i]; 2008 timestamp = tx_buf->timestamp; 2009 2010 if (timestamp == 0) 2011 continue; 2012 2013 completion_delay = rte_get_timer_cycles() - timestamp; 2014 if (completion_delay > adapter->missing_tx_completion_to) { 2015 if (unlikely(!tx_buf->print_once)) { 2016 PMD_TX_LOG_LINE(WARNING, 2017 "Found a Tx that wasn't completed on time, qid %d, index %d. " 2018 "Missing Tx outstanding for %" PRIu64 " msecs.", 2019 tx_ring->id, i, completion_delay / 2020 rte_get_timer_hz() * 1000); 2021 tx_buf->print_once = true; 2022 } 2023 ++missed_tx; 2024 } 2025 } 2026 2027 if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) { 2028 PMD_DRV_LOG_LINE(ERR, 2029 "The number of lost Tx completions is above the threshold (%d > %d). " 2030 "Trigger the device reset.", 2031 missed_tx, 2032 tx_ring->missing_tx_completion_threshold); 2033 adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL; 2034 adapter->trigger_reset = true; 2035 rc = -EIO; 2036 } 2037 2038 tx_ring->tx_stats.missed_tx += missed_tx; 2039 2040 return rc; 2041 } 2042 2043 static void check_for_tx_completions(struct ena_adapter *adapter) 2044 { 2045 struct ena_ring *tx_ring; 2046 uint64_t tx_cleanup_delay; 2047 size_t qid; 2048 int budget; 2049 uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues; 2050 2051 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) 2052 return; 2053 2054 nb_tx_queues = adapter->edev_data->nb_tx_queues; 2055 budget = adapter->missing_tx_completion_budget; 2056 2057 qid = adapter->last_tx_comp_qid; 2058 while (budget-- > 0) { 2059 tx_ring = &adapter->tx_ring[qid]; 2060 2061 /* Tx cleanup is called only by the burst function and can be 2062 * called dynamically by the application. Also cleanup is 2063 * limited by the threshold. To avoid false detection of the 2064 * missing HW Tx completion, get the delay since last cleanup 2065 * function was called. 2066 */ 2067 tx_cleanup_delay = rte_get_timer_cycles() - 2068 tx_ring->last_cleanup_ticks; 2069 if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay) 2070 check_for_tx_completion_in_queue(adapter, tx_ring); 2071 qid = (qid + 1) % nb_tx_queues; 2072 } 2073 2074 adapter->last_tx_comp_qid = qid; 2075 } 2076 2077 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer, 2078 void *arg) 2079 { 2080 struct rte_eth_dev *dev = arg; 2081 struct ena_adapter *adapter = dev->data->dev_private; 2082 2083 if (unlikely(adapter->trigger_reset)) 2084 return; 2085 2086 check_for_missing_keep_alive(adapter); 2087 check_for_admin_com_state(adapter); 2088 check_for_tx_completions(adapter); 2089 2090 if (unlikely(adapter->trigger_reset)) { 2091 PMD_DRV_LOG_LINE(ERR, "Trigger reset is on"); 2092 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, 2093 NULL); 2094 } 2095 } 2096 2097 static inline void 2098 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 2099 struct ena_admin_feature_llq_desc *llq, 2100 bool use_large_llq_hdr) 2101 { 2102 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 2103 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 2104 llq_config->llq_num_decs_before_header = 2105 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 2106 2107 if (use_large_llq_hdr && 2108 (llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B)) { 2109 llq_config->llq_ring_entry_size = 2110 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 2111 llq_config->llq_ring_entry_size_value = 256; 2112 } else { 2113 llq_config->llq_ring_entry_size = 2114 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 2115 llq_config->llq_ring_entry_size_value = 128; 2116 } 2117 } 2118 2119 static int 2120 ena_set_queues_placement_policy(struct ena_adapter *adapter, 2121 struct ena_com_dev *ena_dev, 2122 struct ena_admin_feature_llq_desc *llq, 2123 struct ena_llq_configurations *llq_default_configurations) 2124 { 2125 int rc; 2126 u32 llq_feature_mask; 2127 2128 if (adapter->llq_header_policy == ENA_LLQ_POLICY_DISABLED) { 2129 PMD_DRV_LOG_LINE(WARNING, 2130 "NOTE: LLQ has been disabled as per user's request. " 2131 "This may lead to a huge performance degradation!"); 2132 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2133 return 0; 2134 } 2135 2136 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 2137 if (!(ena_dev->supported_features & llq_feature_mask)) { 2138 PMD_DRV_LOG_LINE(INFO, 2139 "LLQ is not supported. Fallback to host mode policy."); 2140 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2141 return 0; 2142 } 2143 2144 if (adapter->dev_mem_base == NULL) { 2145 PMD_DRV_LOG_LINE(ERR, 2146 "LLQ is advertised as supported, but device doesn't expose mem bar"); 2147 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2148 return 0; 2149 } 2150 2151 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 2152 if (unlikely(rc)) { 2153 PMD_INIT_LOG_LINE(WARNING, 2154 "Failed to config dev mode. Fallback to host mode policy."); 2155 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2156 return 0; 2157 } 2158 2159 /* Nothing to config, exit */ 2160 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 2161 return 0; 2162 2163 ena_dev->mem_bar = adapter->dev_mem_base; 2164 2165 return 0; 2166 } 2167 2168 static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev, 2169 struct ena_com_dev_get_features_ctx *get_feat_ctx) 2170 { 2171 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 2172 2173 /* Regular queues capabilities */ 2174 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 2175 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 2176 &get_feat_ctx->max_queue_ext.max_queue_ext; 2177 io_rx_num = RTE_MIN(max_queue_ext->max_rx_sq_num, 2178 max_queue_ext->max_rx_cq_num); 2179 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 2180 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 2181 } else { 2182 struct ena_admin_queue_feature_desc *max_queues = 2183 &get_feat_ctx->max_queues; 2184 io_tx_sq_num = max_queues->max_sq_num; 2185 io_tx_cq_num = max_queues->max_cq_num; 2186 io_rx_num = RTE_MIN(io_tx_sq_num, io_tx_cq_num); 2187 } 2188 2189 /* In case of LLQ use the llq number in the get feature cmd */ 2190 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 2191 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 2192 2193 max_num_io_queues = RTE_MIN(ENA_MAX_NUM_IO_QUEUES, io_rx_num); 2194 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_sq_num); 2195 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_cq_num); 2196 2197 if (unlikely(max_num_io_queues == 0)) { 2198 PMD_DRV_LOG_LINE(ERR, "Number of IO queues cannot not be 0"); 2199 return -EFAULT; 2200 } 2201 2202 return max_num_io_queues; 2203 } 2204 2205 static void 2206 ena_set_offloads(struct ena_offloads *offloads, 2207 struct ena_admin_feature_offload_desc *offload_desc) 2208 { 2209 if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) 2210 offloads->tx_offloads |= ENA_IPV4_TSO; 2211 2212 /* Tx IPv4 checksum offloads */ 2213 if (offload_desc->tx & 2214 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) 2215 offloads->tx_offloads |= ENA_L3_IPV4_CSUM; 2216 if (offload_desc->tx & 2217 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK) 2218 offloads->tx_offloads |= ENA_L4_IPV4_CSUM; 2219 if (offload_desc->tx & 2220 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) 2221 offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL; 2222 2223 /* Tx IPv6 checksum offloads */ 2224 if (offload_desc->tx & 2225 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK) 2226 offloads->tx_offloads |= ENA_L4_IPV6_CSUM; 2227 if (offload_desc->tx & 2228 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) 2229 offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL; 2230 2231 /* Rx IPv4 checksum offloads */ 2232 if (offload_desc->rx_supported & 2233 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK) 2234 offloads->rx_offloads |= ENA_L3_IPV4_CSUM; 2235 if (offload_desc->rx_supported & 2236 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) 2237 offloads->rx_offloads |= ENA_L4_IPV4_CSUM; 2238 2239 /* Rx IPv6 checksum offloads */ 2240 if (offload_desc->rx_supported & 2241 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) 2242 offloads->rx_offloads |= ENA_L4_IPV6_CSUM; 2243 2244 if (offload_desc->rx_supported & 2245 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK) 2246 offloads->rx_offloads |= ENA_RX_RSS_HASH; 2247 } 2248 2249 static int ena_init_once(void) 2250 { 2251 static bool init_done; 2252 2253 if (init_done) 2254 return 0; 2255 2256 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 2257 /* Init timer subsystem for the ENA timer service. */ 2258 rte_timer_subsystem_init(); 2259 /* Register handler for requests from secondary processes. */ 2260 rte_mp_action_register(ENA_MP_NAME, ena_mp_primary_handle); 2261 } 2262 2263 init_done = true; 2264 return 0; 2265 } 2266 2267 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) 2268 { 2269 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 2270 struct rte_pci_device *pci_dev; 2271 struct rte_intr_handle *intr_handle; 2272 struct ena_adapter *adapter = eth_dev->data->dev_private; 2273 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2274 struct ena_com_dev_get_features_ctx get_feat_ctx; 2275 struct ena_llq_configurations llq_config; 2276 const char *queue_type_str; 2277 uint32_t max_num_io_queues; 2278 int rc; 2279 static int adapters_found; 2280 bool disable_meta_caching; 2281 2282 eth_dev->dev_ops = &ena_dev_ops; 2283 eth_dev->rx_pkt_burst = ð_ena_recv_pkts; 2284 eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; 2285 eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; 2286 2287 rc = ena_init_once(); 2288 if (rc != 0) 2289 return rc; 2290 2291 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2292 return 0; 2293 2294 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 2295 2296 memset(adapter, 0, sizeof(struct ena_adapter)); 2297 ena_dev = &adapter->ena_dev; 2298 2299 adapter->edev_data = eth_dev->data; 2300 2301 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 2302 2303 PMD_INIT_LOG_LINE(INFO, "Initializing " PCI_PRI_FMT, 2304 pci_dev->addr.domain, 2305 pci_dev->addr.bus, 2306 pci_dev->addr.devid, 2307 pci_dev->addr.function); 2308 2309 intr_handle = pci_dev->intr_handle; 2310 2311 adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr; 2312 adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr; 2313 2314 if (!adapter->regs) { 2315 PMD_INIT_LOG_LINE(CRIT, "Failed to access registers BAR(%d)", 2316 ENA_REGS_BAR); 2317 return -ENXIO; 2318 } 2319 2320 ena_dev->reg_bar = adapter->regs; 2321 /* Pass device data as a pointer which can be passed to the IO functions 2322 * by the ena_com (for example - the memory allocation). 2323 */ 2324 ena_dev->dmadev = eth_dev->data; 2325 2326 adapter->id_number = adapters_found; 2327 2328 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", 2329 adapter->id_number); 2330 2331 /* Assign default devargs values */ 2332 adapter->missing_tx_completion_to = ENA_TX_TIMEOUT; 2333 adapter->llq_header_policy = ENA_LLQ_POLICY_RECOMMENDED; 2334 2335 /* Get user bypass */ 2336 rc = ena_parse_devargs(adapter, pci_dev->device.devargs); 2337 if (rc != 0) { 2338 PMD_INIT_LOG_LINE(CRIT, "Failed to parse devargs"); 2339 goto err; 2340 } 2341 rc = ena_com_allocate_customer_metrics_buffer(ena_dev); 2342 if (rc != 0) { 2343 PMD_INIT_LOG_LINE(CRIT, "Failed to allocate customer metrics buffer"); 2344 goto err; 2345 } 2346 2347 /* device specific initialization routine */ 2348 rc = ena_device_init(adapter, pci_dev, &get_feat_ctx); 2349 if (rc) { 2350 PMD_INIT_LOG_LINE(CRIT, "Failed to init ENA device"); 2351 goto err_metrics_delete; 2352 } 2353 2354 /* Check if device supports LSC */ 2355 if (!(adapter->all_aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) 2356 adapter->edev_data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; 2357 2358 bool use_large_llq_hdr = ena_use_large_llq_hdr(adapter, 2359 get_feat_ctx.llq.entry_size_recommended); 2360 set_default_llq_configurations(&llq_config, &get_feat_ctx.llq, use_large_llq_hdr); 2361 rc = ena_set_queues_placement_policy(adapter, ena_dev, 2362 &get_feat_ctx.llq, &llq_config); 2363 if (unlikely(rc)) { 2364 PMD_INIT_LOG_LINE(CRIT, "Failed to set placement policy"); 2365 return rc; 2366 } 2367 2368 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { 2369 queue_type_str = "Regular"; 2370 } else { 2371 queue_type_str = "Low latency"; 2372 PMD_DRV_LOG_LINE(INFO, "LLQ entry size %uB", llq_config.llq_ring_entry_size_value); 2373 } 2374 PMD_DRV_LOG_LINE(INFO, "Placement policy: %s", queue_type_str); 2375 2376 calc_queue_ctx.ena_dev = ena_dev; 2377 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 2378 2379 max_num_io_queues = ena_calc_max_io_queue_num(ena_dev, &get_feat_ctx); 2380 rc = ena_calc_io_queue_size(&calc_queue_ctx, use_large_llq_hdr); 2381 if (unlikely((rc != 0) || (max_num_io_queues == 0))) { 2382 rc = -EFAULT; 2383 goto err_device_destroy; 2384 } 2385 2386 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 2387 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 2388 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 2389 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 2390 adapter->max_num_io_queues = max_num_io_queues; 2391 2392 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2393 disable_meta_caching = 2394 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags & 2395 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 2396 } else { 2397 disable_meta_caching = false; 2398 } 2399 2400 /* prepare ring structures */ 2401 ena_init_rings(adapter, disable_meta_caching); 2402 2403 ena_config_debug_area(adapter); 2404 2405 /* Set max MTU for this device */ 2406 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 2407 2408 ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload); 2409 2410 /* Copy MAC address and point DPDK to it */ 2411 eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr; 2412 rte_ether_addr_copy((struct rte_ether_addr *) 2413 get_feat_ctx.dev_attr.mac_addr, 2414 (struct rte_ether_addr *)adapter->mac_addr); 2415 2416 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); 2417 if (unlikely(rc != 0)) { 2418 PMD_DRV_LOG_LINE(ERR, "Failed to initialize RSS in ENA device"); 2419 goto err_delete_debug_area; 2420 } 2421 2422 adapter->drv_stats = rte_zmalloc("adapter stats", 2423 sizeof(*adapter->drv_stats), 2424 RTE_CACHE_LINE_SIZE); 2425 if (!adapter->drv_stats) { 2426 PMD_DRV_LOG_LINE(ERR, 2427 "Failed to allocate memory for adapter statistics"); 2428 rc = -ENOMEM; 2429 goto err_rss_destroy; 2430 } 2431 2432 rte_spinlock_init(&adapter->admin_lock); 2433 2434 if (!adapter->control_path_poll_interval) { 2435 /* Control path interrupt mode */ 2436 rte_intr_callback_register(intr_handle, ena_control_path_handler, eth_dev); 2437 rte_intr_enable(intr_handle); 2438 ena_com_set_admin_polling_mode(ena_dev, false); 2439 } else { 2440 /* Control path polling mode */ 2441 rc = rte_eal_alarm_set(adapter->control_path_poll_interval, 2442 ena_control_path_poll_handler, eth_dev); 2443 if (unlikely(rc != 0)) { 2444 PMD_DRV_LOG_LINE(ERR, "Failed to set control path alarm"); 2445 goto err_control_path_destroy; 2446 } 2447 } 2448 ena_com_admin_aenq_enable(ena_dev); 2449 rte_timer_init(&adapter->timer_wd); 2450 2451 adapters_found++; 2452 adapter->state = ENA_ADAPTER_STATE_INIT; 2453 2454 return 0; 2455 err_control_path_destroy: 2456 rte_free(adapter->drv_stats); 2457 err_rss_destroy: 2458 ena_com_rss_destroy(ena_dev); 2459 err_delete_debug_area: 2460 ena_com_delete_debug_area(ena_dev); 2461 2462 err_device_destroy: 2463 ena_com_delete_host_info(ena_dev); 2464 ena_com_admin_destroy(ena_dev); 2465 err_metrics_delete: 2466 ena_com_delete_customer_metrics_buffer(ena_dev); 2467 err: 2468 return rc; 2469 } 2470 2471 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) 2472 { 2473 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2474 return 0; 2475 2476 ena_close(eth_dev); 2477 2478 return 0; 2479 } 2480 2481 static int ena_dev_configure(struct rte_eth_dev *dev) 2482 { 2483 struct ena_adapter *adapter = dev->data->dev_private; 2484 int rc; 2485 2486 adapter->state = ENA_ADAPTER_STATE_CONFIG; 2487 2488 if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 2489 dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2490 dev->data->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2491 2492 /* Scattered Rx cannot be turned off in the HW, so this capability must 2493 * be forced. 2494 */ 2495 dev->data->scattered_rx = 1; 2496 2497 adapter->last_tx_comp_qid = 0; 2498 2499 adapter->missing_tx_completion_budget = 2500 RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues); 2501 2502 /* To avoid detection of the spurious Tx completion timeout due to 2503 * application not calling the Tx cleanup function, set timeout for the 2504 * Tx queue which should be half of the missing completion timeout for a 2505 * safety. If there will be a lot of missing Tx completions in the 2506 * queue, they will be detected sooner or later. 2507 */ 2508 adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2; 2509 2510 rc = ena_configure_aenq(adapter); 2511 2512 return rc; 2513 } 2514 2515 static void ena_init_rings(struct ena_adapter *adapter, 2516 bool disable_meta_caching) 2517 { 2518 size_t i; 2519 2520 for (i = 0; i < adapter->max_num_io_queues; i++) { 2521 struct ena_ring *ring = &adapter->tx_ring[i]; 2522 2523 ring->configured = 0; 2524 ring->type = ENA_RING_TYPE_TX; 2525 ring->adapter = adapter; 2526 ring->id = i; 2527 ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type; 2528 ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size; 2529 ring->sgl_size = adapter->max_tx_sgl_size; 2530 ring->disable_meta_caching = disable_meta_caching; 2531 } 2532 2533 for (i = 0; i < adapter->max_num_io_queues; i++) { 2534 struct ena_ring *ring = &adapter->rx_ring[i]; 2535 2536 ring->configured = 0; 2537 ring->type = ENA_RING_TYPE_RX; 2538 ring->adapter = adapter; 2539 ring->id = i; 2540 ring->sgl_size = adapter->max_rx_sgl_size; 2541 } 2542 } 2543 2544 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter) 2545 { 2546 uint64_t port_offloads = 0; 2547 2548 if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM) 2549 port_offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; 2550 2551 if (adapter->offloads.rx_offloads & 2552 (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM)) 2553 port_offloads |= 2554 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM; 2555 2556 if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH) 2557 port_offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2558 2559 port_offloads |= RTE_ETH_RX_OFFLOAD_SCATTER; 2560 2561 return port_offloads; 2562 } 2563 2564 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter) 2565 { 2566 uint64_t port_offloads = 0; 2567 2568 if (adapter->offloads.tx_offloads & ENA_IPV4_TSO) 2569 port_offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 2570 2571 if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM) 2572 port_offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; 2573 if (adapter->offloads.tx_offloads & 2574 (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM | 2575 ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL)) 2576 port_offloads |= 2577 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM; 2578 2579 port_offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2580 2581 port_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2582 2583 return port_offloads; 2584 } 2585 2586 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter) 2587 { 2588 RTE_SET_USED(adapter); 2589 2590 return 0; 2591 } 2592 2593 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter) 2594 { 2595 uint64_t queue_offloads = 0; 2596 RTE_SET_USED(adapter); 2597 2598 queue_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2599 2600 return queue_offloads; 2601 } 2602 2603 static int ena_infos_get(struct rte_eth_dev *dev, 2604 struct rte_eth_dev_info *dev_info) 2605 { 2606 struct ena_adapter *adapter; 2607 struct ena_com_dev *ena_dev; 2608 2609 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 2610 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 2611 adapter = dev->data->dev_private; 2612 2613 ena_dev = &adapter->ena_dev; 2614 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 2615 2616 dev_info->speed_capa = 2617 RTE_ETH_LINK_SPEED_1G | 2618 RTE_ETH_LINK_SPEED_2_5G | 2619 RTE_ETH_LINK_SPEED_5G | 2620 RTE_ETH_LINK_SPEED_10G | 2621 RTE_ETH_LINK_SPEED_25G | 2622 RTE_ETH_LINK_SPEED_40G | 2623 RTE_ETH_LINK_SPEED_50G | 2624 RTE_ETH_LINK_SPEED_100G | 2625 RTE_ETH_LINK_SPEED_200G | 2626 RTE_ETH_LINK_SPEED_400G; 2627 2628 /* Inform framework about available features */ 2629 dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter); 2630 dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter); 2631 dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter); 2632 dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter); 2633 2634 dev_info->flow_type_rss_offloads = ENA_ALL_RSS_HF; 2635 dev_info->hash_key_size = ENA_HASH_KEY_SIZE; 2636 2637 dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN; 2638 dev_info->max_rx_pktlen = adapter->max_mtu + RTE_ETHER_HDR_LEN + 2639 RTE_ETHER_CRC_LEN; 2640 dev_info->min_mtu = ENA_MIN_MTU; 2641 dev_info->max_mtu = adapter->max_mtu; 2642 dev_info->max_mac_addrs = 1; 2643 2644 dev_info->max_rx_queues = adapter->max_num_io_queues; 2645 dev_info->max_tx_queues = adapter->max_num_io_queues; 2646 dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE; 2647 2648 dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size; 2649 dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2650 dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2651 adapter->max_rx_sgl_size); 2652 dev_info->rx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2653 adapter->max_rx_sgl_size); 2654 2655 dev_info->tx_desc_lim.nb_max = adapter->max_tx_ring_size; 2656 dev_info->tx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2657 dev_info->tx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2658 adapter->max_tx_sgl_size); 2659 dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2660 adapter->max_tx_sgl_size); 2661 2662 dev_info->default_rxportconf.ring_size = RTE_MIN(ENA_DEFAULT_RING_SIZE, 2663 dev_info->rx_desc_lim.nb_max); 2664 dev_info->default_txportconf.ring_size = RTE_MIN(ENA_DEFAULT_RING_SIZE, 2665 dev_info->tx_desc_lim.nb_max); 2666 2667 dev_info->err_handle_mode = RTE_ETH_ERROR_HANDLE_MODE_PASSIVE; 2668 2669 return 0; 2670 } 2671 2672 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len) 2673 { 2674 mbuf->data_len = len; 2675 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 2676 mbuf->refcnt = 1; 2677 mbuf->next = NULL; 2678 } 2679 2680 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 2681 struct ena_com_rx_buf_info *ena_bufs, 2682 uint32_t descs, 2683 uint16_t *next_to_clean, 2684 uint8_t offset) 2685 { 2686 struct rte_mbuf *mbuf; 2687 struct rte_mbuf *mbuf_head; 2688 struct ena_rx_buffer *rx_info; 2689 int rc; 2690 uint16_t ntc, len, req_id, buf = 0; 2691 2692 if (unlikely(descs == 0)) 2693 return NULL; 2694 2695 ntc = *next_to_clean; 2696 2697 len = ena_bufs[buf].len; 2698 req_id = ena_bufs[buf].req_id; 2699 2700 rx_info = &rx_ring->rx_buffer_info[req_id]; 2701 2702 mbuf = rx_info->mbuf; 2703 RTE_ASSERT(mbuf != NULL); 2704 2705 ena_init_rx_mbuf(mbuf, len); 2706 2707 /* Fill the mbuf head with the data specific for 1st segment. */ 2708 mbuf_head = mbuf; 2709 mbuf_head->nb_segs = descs; 2710 mbuf_head->port = rx_ring->port_id; 2711 mbuf_head->pkt_len = len; 2712 mbuf_head->data_off += offset; 2713 2714 rx_info->mbuf = NULL; 2715 rx_ring->empty_rx_reqs[ntc] = req_id; 2716 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2717 2718 while (--descs) { 2719 ++buf; 2720 len = ena_bufs[buf].len; 2721 req_id = ena_bufs[buf].req_id; 2722 2723 rx_info = &rx_ring->rx_buffer_info[req_id]; 2724 RTE_ASSERT(rx_info->mbuf != NULL); 2725 2726 if (unlikely(len == 0)) { 2727 /* 2728 * Some devices can pass descriptor with the length 0. 2729 * To avoid confusion, the PMD is simply putting the 2730 * descriptor back, as it was never used. We'll avoid 2731 * mbuf allocation that way. 2732 */ 2733 rc = ena_add_single_rx_desc(rx_ring->ena_com_io_sq, 2734 rx_info->mbuf, req_id); 2735 if (unlikely(rc != 0)) { 2736 /* Free the mbuf in case of an error. */ 2737 rte_mbuf_raw_free(rx_info->mbuf); 2738 } else { 2739 /* 2740 * If there was no error, just exit the loop as 2741 * 0 length descriptor is always the last one. 2742 */ 2743 break; 2744 } 2745 } else { 2746 /* Create an mbuf chain. */ 2747 mbuf->next = rx_info->mbuf; 2748 mbuf = mbuf->next; 2749 2750 ena_init_rx_mbuf(mbuf, len); 2751 mbuf_head->pkt_len += len; 2752 } 2753 2754 /* 2755 * Mark the descriptor as depleted and perform necessary 2756 * cleanup. 2757 * This code will execute in two cases: 2758 * 1. Descriptor len was greater than 0 - normal situation. 2759 * 2. Descriptor len was 0 and we failed to add the descriptor 2760 * to the device. In that situation, we should try to add 2761 * the mbuf again in the populate routine and mark the 2762 * descriptor as used up by the device. 2763 */ 2764 rx_info->mbuf = NULL; 2765 rx_ring->empty_rx_reqs[ntc] = req_id; 2766 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2767 } 2768 2769 *next_to_clean = ntc; 2770 2771 return mbuf_head; 2772 } 2773 2774 static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 2775 uint16_t nb_pkts) 2776 { 2777 struct ena_ring *rx_ring = (struct ena_ring *)(rx_queue); 2778 unsigned int free_queue_entries; 2779 uint16_t next_to_clean = rx_ring->next_to_clean; 2780 enum ena_regs_reset_reason_types reset_reason; 2781 uint16_t descs_in_use; 2782 struct rte_mbuf *mbuf; 2783 uint16_t completed; 2784 struct ena_com_rx_ctx ena_rx_ctx; 2785 int i, rc = 0; 2786 2787 #ifdef RTE_ETHDEV_DEBUG_RX 2788 /* Check adapter state */ 2789 if (unlikely(rx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 2790 PMD_RX_LOG_LINE(ALERT, 2791 "Trying to receive pkts while device is NOT running"); 2792 return 0; 2793 } 2794 #endif 2795 2796 descs_in_use = rx_ring->ring_size - 2797 ena_com_free_q_entries(rx_ring->ena_com_io_sq) - 1; 2798 nb_pkts = RTE_MIN(descs_in_use, nb_pkts); 2799 2800 for (completed = 0; completed < nb_pkts; completed++) { 2801 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 2802 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 2803 ena_rx_ctx.descs = 0; 2804 ena_rx_ctx.pkt_offset = 0; 2805 /* receive packet context */ 2806 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, 2807 rx_ring->ena_com_io_sq, 2808 &ena_rx_ctx); 2809 if (unlikely(rc)) { 2810 PMD_RX_LOG_LINE(ERR, 2811 "Failed to get the packet from the device, rc: %d", 2812 rc); 2813 switch (rc) { 2814 case ENA_COM_NO_SPACE: 2815 ++rx_ring->rx_stats.bad_desc_num; 2816 reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS; 2817 break; 2818 case ENA_COM_FAULT: 2819 ++rx_ring->rx_stats.bad_desc; 2820 reset_reason = ENA_REGS_RESET_RX_DESCRIPTOR_MALFORMED; 2821 break; 2822 case ENA_COM_EIO: 2823 ++rx_ring->rx_stats.bad_req_id; 2824 reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID; 2825 break; 2826 default: 2827 ++rx_ring->rx_stats.unknown_error; 2828 reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE; 2829 break; 2830 } 2831 ena_trigger_reset(rx_ring->adapter, reset_reason); 2832 return 0; 2833 } 2834 2835 mbuf = ena_rx_mbuf(rx_ring, 2836 ena_rx_ctx.ena_bufs, 2837 ena_rx_ctx.descs, 2838 &next_to_clean, 2839 ena_rx_ctx.pkt_offset); 2840 if (unlikely(mbuf == NULL)) { 2841 for (i = 0; i < ena_rx_ctx.descs; ++i) { 2842 rx_ring->empty_rx_reqs[next_to_clean] = 2843 rx_ring->ena_bufs[i].req_id; 2844 next_to_clean = ENA_IDX_NEXT_MASKED( 2845 next_to_clean, rx_ring->size_mask); 2846 } 2847 break; 2848 } 2849 2850 /* fill mbuf attributes if any */ 2851 ena_rx_mbuf_prepare(rx_ring, mbuf, &ena_rx_ctx); 2852 2853 if (unlikely(mbuf->ol_flags & 2854 (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD))) 2855 rte_atomic64_inc(&rx_ring->adapter->drv_stats->ierrors); 2856 2857 rx_pkts[completed] = mbuf; 2858 rx_ring->rx_stats.bytes += mbuf->pkt_len; 2859 } 2860 2861 rx_ring->rx_stats.cnt += completed; 2862 rx_ring->next_to_clean = next_to_clean; 2863 2864 free_queue_entries = ena_com_free_q_entries(rx_ring->ena_com_io_sq); 2865 2866 /* Burst refill to save doorbells, memory barriers, const interval */ 2867 if (free_queue_entries >= rx_ring->rx_free_thresh) { 2868 ena_populate_rx_queue(rx_ring, free_queue_entries); 2869 } 2870 2871 return completed; 2872 } 2873 2874 static uint16_t 2875 eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 2876 uint16_t nb_pkts) 2877 { 2878 int32_t ret; 2879 uint32_t i; 2880 struct rte_mbuf *m; 2881 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 2882 struct ena_adapter *adapter = tx_ring->adapter; 2883 struct rte_ipv4_hdr *ip_hdr; 2884 uint64_t ol_flags; 2885 uint64_t l4_csum_flag; 2886 uint64_t dev_offload_capa; 2887 uint16_t frag_field; 2888 bool need_pseudo_csum; 2889 2890 dev_offload_capa = adapter->offloads.tx_offloads; 2891 for (i = 0; i != nb_pkts; i++) { 2892 m = tx_pkts[i]; 2893 ol_flags = m->ol_flags; 2894 2895 /* Check if any offload flag was set */ 2896 if (ol_flags == 0) 2897 continue; 2898 2899 l4_csum_flag = ol_flags & RTE_MBUF_F_TX_L4_MASK; 2900 /* SCTP checksum offload is not supported by the ENA. */ 2901 if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) || 2902 l4_csum_flag == RTE_MBUF_F_TX_SCTP_CKSUM) { 2903 PMD_TX_LOG_LINE(DEBUG, 2904 "mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64, 2905 i, ol_flags); 2906 rte_errno = ENOTSUP; 2907 return i; 2908 } 2909 2910 if (unlikely(m->nb_segs >= tx_ring->sgl_size && 2911 !(tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && 2912 m->nb_segs == tx_ring->sgl_size && 2913 m->data_len < tx_ring->tx_max_header_size))) { 2914 PMD_TX_LOG_LINE(DEBUG, 2915 "mbuf[%" PRIu32 "] has too many segments: %" PRIu16, 2916 i, m->nb_segs); 2917 rte_errno = EINVAL; 2918 return i; 2919 } 2920 2921 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 2922 /* Check if requested offload is also enabled for the queue */ 2923 if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2924 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) || 2925 (l4_csum_flag == RTE_MBUF_F_TX_TCP_CKSUM && 2926 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) || 2927 (l4_csum_flag == RTE_MBUF_F_TX_UDP_CKSUM && 2928 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM))) { 2929 PMD_TX_LOG_LINE(DEBUG, 2930 "mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]", 2931 i, m->nb_segs, tx_ring->id); 2932 rte_errno = EINVAL; 2933 return i; 2934 } 2935 2936 /* The caller is obligated to set l2 and l3 len if any cksum 2937 * offload is enabled. 2938 */ 2939 if (unlikely(ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK) && 2940 (m->l2_len == 0 || m->l3_len == 0))) { 2941 PMD_TX_LOG_LINE(DEBUG, 2942 "mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested", 2943 i); 2944 rte_errno = EINVAL; 2945 return i; 2946 } 2947 ret = rte_validate_tx_offload(m); 2948 if (ret != 0) { 2949 rte_errno = -ret; 2950 return i; 2951 } 2952 #endif 2953 2954 /* Verify HW support for requested offloads and determine if 2955 * pseudo header checksum is needed. 2956 */ 2957 need_pseudo_csum = false; 2958 if (ol_flags & RTE_MBUF_F_TX_IPV4) { 2959 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2960 !(dev_offload_capa & ENA_L3_IPV4_CSUM)) { 2961 rte_errno = ENOTSUP; 2962 return i; 2963 } 2964 2965 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG && 2966 !(dev_offload_capa & ENA_IPV4_TSO)) { 2967 rte_errno = ENOTSUP; 2968 return i; 2969 } 2970 2971 /* Check HW capabilities and if pseudo csum is needed 2972 * for L4 offloads. 2973 */ 2974 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2975 !(dev_offload_capa & ENA_L4_IPV4_CSUM)) { 2976 if (dev_offload_capa & 2977 ENA_L4_IPV4_CSUM_PARTIAL) { 2978 need_pseudo_csum = true; 2979 } else { 2980 rte_errno = ENOTSUP; 2981 return i; 2982 } 2983 } 2984 2985 /* Parse the DF flag */ 2986 ip_hdr = rte_pktmbuf_mtod_offset(m, 2987 struct rte_ipv4_hdr *, m->l2_len); 2988 frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset); 2989 if (frag_field & RTE_IPV4_HDR_DF_FLAG) { 2990 m->packet_type |= RTE_PTYPE_L4_NONFRAG; 2991 } else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2992 /* In case we are supposed to TSO and have DF 2993 * not set (DF=0) hardware must be provided with 2994 * partial checksum. 2995 */ 2996 need_pseudo_csum = true; 2997 } 2998 } else if (ol_flags & RTE_MBUF_F_TX_IPV6) { 2999 /* There is no support for IPv6 TSO as for now. */ 3000 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 3001 rte_errno = ENOTSUP; 3002 return i; 3003 } 3004 3005 /* Check HW capabilities and if pseudo csum is needed */ 3006 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 3007 !(dev_offload_capa & ENA_L4_IPV6_CSUM)) { 3008 if (dev_offload_capa & 3009 ENA_L4_IPV6_CSUM_PARTIAL) { 3010 need_pseudo_csum = true; 3011 } else { 3012 rte_errno = ENOTSUP; 3013 return i; 3014 } 3015 } 3016 } 3017 3018 if (need_pseudo_csum) { 3019 ret = rte_net_intel_cksum_flags_prepare(m, ol_flags); 3020 if (ret != 0) { 3021 rte_errno = -ret; 3022 return i; 3023 } 3024 } 3025 } 3026 3027 return i; 3028 } 3029 3030 static void ena_update_hints(struct ena_adapter *adapter, 3031 struct ena_admin_ena_hw_hints *hints) 3032 { 3033 if (hints->admin_completion_tx_timeout) 3034 adapter->ena_dev.admin_queue.completion_timeout = 3035 hints->admin_completion_tx_timeout * 1000; 3036 3037 if (hints->mmio_read_timeout) 3038 /* convert to usec */ 3039 adapter->ena_dev.mmio_read.reg_read_to = 3040 hints->mmio_read_timeout * 1000; 3041 3042 if (hints->driver_watchdog_timeout) { 3043 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3044 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 3045 else 3046 // Convert msecs to ticks 3047 adapter->keep_alive_timeout = 3048 (hints->driver_watchdog_timeout * 3049 rte_get_timer_hz()) / 1000; 3050 } 3051 } 3052 3053 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 3054 struct ena_tx_buffer *tx_info, 3055 struct rte_mbuf *mbuf, 3056 void **push_header, 3057 uint16_t *header_len) 3058 { 3059 struct ena_com_buf *ena_buf; 3060 uint16_t delta, seg_len, push_len; 3061 3062 delta = 0; 3063 seg_len = mbuf->data_len; 3064 3065 tx_info->mbuf = mbuf; 3066 ena_buf = tx_info->bufs; 3067 3068 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 3069 /* 3070 * Tx header might be (and will be in most cases) smaller than 3071 * tx_max_header_size. But it's not an issue to send more data 3072 * to the device, than actually needed if the mbuf size is 3073 * greater than tx_max_header_size. 3074 */ 3075 push_len = RTE_MIN(mbuf->pkt_len, tx_ring->tx_max_header_size); 3076 *header_len = push_len; 3077 3078 if (likely(push_len <= seg_len)) { 3079 /* If the push header is in the single segment, then 3080 * just point it to the 1st mbuf data. 3081 */ 3082 *push_header = rte_pktmbuf_mtod(mbuf, uint8_t *); 3083 } else { 3084 /* If the push header lays in the several segments, copy 3085 * it to the intermediate buffer. 3086 */ 3087 rte_pktmbuf_read(mbuf, 0, push_len, 3088 tx_ring->push_buf_intermediate_buf); 3089 *push_header = tx_ring->push_buf_intermediate_buf; 3090 delta = push_len - seg_len; 3091 } 3092 } else { 3093 *push_header = NULL; 3094 *header_len = 0; 3095 push_len = 0; 3096 } 3097 3098 /* Process first segment taking into consideration pushed header */ 3099 if (seg_len > push_len) { 3100 ena_buf->paddr = mbuf->buf_iova + 3101 mbuf->data_off + 3102 push_len; 3103 ena_buf->len = seg_len - push_len; 3104 ena_buf++; 3105 tx_info->num_of_bufs++; 3106 } 3107 3108 while ((mbuf = mbuf->next) != NULL) { 3109 seg_len = mbuf->data_len; 3110 3111 /* Skip mbufs if whole data is pushed as a header */ 3112 if (unlikely(delta > seg_len)) { 3113 delta -= seg_len; 3114 continue; 3115 } 3116 3117 ena_buf->paddr = mbuf->buf_iova + mbuf->data_off + delta; 3118 ena_buf->len = seg_len - delta; 3119 ena_buf++; 3120 tx_info->num_of_bufs++; 3121 3122 delta = 0; 3123 } 3124 } 3125 3126 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) 3127 { 3128 struct ena_tx_buffer *tx_info; 3129 struct ena_com_tx_ctx ena_tx_ctx = { { 0 } }; 3130 uint16_t next_to_use; 3131 uint16_t header_len; 3132 uint16_t req_id; 3133 void *push_header; 3134 int nb_hw_desc; 3135 int rc; 3136 3137 /* Checking for space for 2 additional metadata descriptors due to 3138 * possible header split and metadata descriptor 3139 */ 3140 if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 3141 mbuf->nb_segs + 2)) { 3142 PMD_TX_LOG_LINE(DEBUG, "Not enough space in the tx queue"); 3143 return ENA_COM_NO_MEM; 3144 } 3145 3146 next_to_use = tx_ring->next_to_use; 3147 3148 req_id = tx_ring->empty_tx_reqs[next_to_use]; 3149 tx_info = &tx_ring->tx_buffer_info[req_id]; 3150 tx_info->num_of_bufs = 0; 3151 RTE_ASSERT(tx_info->mbuf == NULL); 3152 3153 ena_tx_map_mbuf(tx_ring, tx_info, mbuf, &push_header, &header_len); 3154 3155 ena_tx_ctx.ena_bufs = tx_info->bufs; 3156 ena_tx_ctx.push_header = push_header; 3157 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 3158 ena_tx_ctx.req_id = req_id; 3159 ena_tx_ctx.header_len = header_len; 3160 3161 /* Set Tx offloads flags, if applicable */ 3162 ena_tx_mbuf_prepare(mbuf, &ena_tx_ctx, tx_ring->offloads, 3163 tx_ring->disable_meta_caching); 3164 3165 if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, 3166 &ena_tx_ctx))) { 3167 PMD_TX_LOG_LINE(DEBUG, 3168 "LLQ Tx max burst size of queue %d achieved, writing doorbell to send burst", 3169 tx_ring->id); 3170 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3171 tx_ring->tx_stats.doorbells++; 3172 tx_ring->pkts_without_db = false; 3173 } 3174 3175 /* prepare the packet's descriptors to dma engine */ 3176 rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, 3177 &nb_hw_desc); 3178 if (unlikely(rc)) { 3179 PMD_DRV_LOG_LINE(ERR, "Failed to prepare Tx buffers, rc: %d", rc); 3180 ++tx_ring->tx_stats.prepare_ctx_err; 3181 ena_trigger_reset(tx_ring->adapter, 3182 ENA_REGS_RESET_DRIVER_INVALID_STATE); 3183 return rc; 3184 } 3185 3186 tx_info->tx_descs = nb_hw_desc; 3187 tx_info->timestamp = rte_get_timer_cycles(); 3188 3189 tx_ring->tx_stats.cnt++; 3190 tx_ring->tx_stats.bytes += mbuf->pkt_len; 3191 3192 tx_ring->next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, 3193 tx_ring->size_mask); 3194 3195 return 0; 3196 } 3197 3198 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt) 3199 { 3200 struct rte_mbuf *pkts_to_clean[ENA_CLEANUP_BUF_THRESH]; 3201 struct ena_ring *tx_ring = (struct ena_ring *)txp; 3202 size_t mbuf_cnt = 0; 3203 size_t pkt_cnt = 0; 3204 unsigned int total_tx_descs = 0; 3205 unsigned int total_tx_pkts = 0; 3206 uint16_t cleanup_budget; 3207 uint16_t next_to_clean = tx_ring->next_to_clean; 3208 bool fast_free = tx_ring->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 3209 3210 /* 3211 * If free_pkt_cnt is equal to 0, it means that the user requested 3212 * full cleanup, so attempt to release all Tx descriptors 3213 * (ring_size - 1 -> size_mask) 3214 */ 3215 cleanup_budget = (free_pkt_cnt == 0) ? tx_ring->size_mask : free_pkt_cnt; 3216 3217 while (likely(total_tx_pkts < cleanup_budget)) { 3218 struct rte_mbuf *mbuf; 3219 struct ena_tx_buffer *tx_info; 3220 uint16_t req_id; 3221 3222 if (ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id) != 0) 3223 break; 3224 3225 if (unlikely(validate_tx_req_id(tx_ring, req_id) != 0)) 3226 break; 3227 3228 /* Get Tx info & store how many descs were processed */ 3229 tx_info = &tx_ring->tx_buffer_info[req_id]; 3230 tx_info->timestamp = 0; 3231 3232 mbuf = tx_info->mbuf; 3233 if (fast_free) { 3234 pkts_to_clean[pkt_cnt++] = mbuf; 3235 mbuf_cnt += mbuf->nb_segs; 3236 if (mbuf_cnt >= ENA_CLEANUP_BUF_THRESH) { 3237 rte_pktmbuf_free_bulk(pkts_to_clean, pkt_cnt); 3238 mbuf_cnt = 0; 3239 pkt_cnt = 0; 3240 } 3241 } else { 3242 rte_pktmbuf_free(mbuf); 3243 } 3244 3245 tx_info->mbuf = NULL; 3246 tx_ring->empty_tx_reqs[next_to_clean] = req_id; 3247 3248 total_tx_descs += tx_info->tx_descs; 3249 total_tx_pkts++; 3250 3251 /* Put back descriptor to the ring for reuse */ 3252 next_to_clean = ENA_IDX_NEXT_MASKED(next_to_clean, 3253 tx_ring->size_mask); 3254 } 3255 3256 if (likely(total_tx_descs > 0)) { 3257 /* acknowledge completion of sent packets */ 3258 tx_ring->next_to_clean = next_to_clean; 3259 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs); 3260 } 3261 3262 if (mbuf_cnt != 0) 3263 rte_pktmbuf_free_bulk(pkts_to_clean, pkt_cnt); 3264 3265 /* Notify completion handler that full cleanup was performed */ 3266 if (free_pkt_cnt == 0 || total_tx_pkts < cleanup_budget) 3267 tx_ring->last_cleanup_ticks = rte_get_timer_cycles(); 3268 3269 return total_tx_pkts; 3270 } 3271 3272 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 3273 uint16_t nb_pkts) 3274 { 3275 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 3276 int available_desc; 3277 uint16_t sent_idx = 0; 3278 3279 #ifdef RTE_ETHDEV_DEBUG_TX 3280 /* Check adapter state */ 3281 if (unlikely(tx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 3282 PMD_TX_LOG_LINE(ALERT, 3283 "Trying to xmit pkts while device is NOT running"); 3284 return 0; 3285 } 3286 #endif 3287 3288 available_desc = ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3289 if (available_desc < tx_ring->tx_free_thresh) 3290 ena_tx_cleanup((void *)tx_ring, 0); 3291 3292 for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) { 3293 if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx])) 3294 break; 3295 tx_ring->pkts_without_db = true; 3296 rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4, 3297 tx_ring->size_mask)]); 3298 } 3299 3300 /* If there are ready packets to be xmitted... */ 3301 if (likely(tx_ring->pkts_without_db)) { 3302 /* ...let HW do its best :-) */ 3303 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3304 tx_ring->tx_stats.doorbells++; 3305 tx_ring->pkts_without_db = false; 3306 } 3307 3308 tx_ring->tx_stats.available_desc = 3309 ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3310 tx_ring->tx_stats.tx_poll++; 3311 3312 return sent_idx; 3313 } 3314 3315 static void ena_copy_customer_metrics(struct ena_adapter *adapter, uint64_t *buf, 3316 size_t num_metrics) 3317 { 3318 struct ena_com_dev *ena_dev = &adapter->ena_dev; 3319 int rc; 3320 3321 if (ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) { 3322 if (num_metrics != ENA_STATS_ARRAY_METRICS) { 3323 PMD_DRV_LOG_LINE(ERR, "Detected discrepancy in the number of customer metrics"); 3324 return; 3325 } 3326 rte_spinlock_lock(&adapter->admin_lock); 3327 rc = ENA_PROXY(adapter, 3328 ena_com_get_customer_metrics, 3329 &adapter->ena_dev, 3330 (char *)buf, 3331 num_metrics * sizeof(uint64_t)); 3332 rte_spinlock_unlock(&adapter->admin_lock); 3333 if (rc != 0) { 3334 PMD_DRV_LOG_LINE(WARNING, "Failed to get customer metrics, rc: %d", rc); 3335 return; 3336 } 3337 3338 } else if (ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) { 3339 if (num_metrics != ENA_STATS_ARRAY_METRICS_LEGACY) { 3340 PMD_DRV_LOG_LINE(ERR, "Detected discrepancy in the number of legacy metrics"); 3341 return; 3342 } 3343 3344 rte_spinlock_lock(&adapter->admin_lock); 3345 rc = ENA_PROXY(adapter, 3346 ena_com_get_eni_stats, 3347 &adapter->ena_dev, 3348 (struct ena_admin_eni_stats *)buf); 3349 rte_spinlock_unlock(&adapter->admin_lock); 3350 if (rc != 0) { 3351 PMD_DRV_LOG_LINE(WARNING, 3352 "Failed to get ENI metrics, rc: %d", rc); 3353 return; 3354 } 3355 } 3356 } 3357 3358 static void ena_copy_ena_srd_info(struct ena_adapter *adapter, 3359 struct ena_stats_srd *srd_info) 3360 { 3361 int rc; 3362 3363 if (!ena_com_get_cap(&adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO)) 3364 return; 3365 3366 rte_spinlock_lock(&adapter->admin_lock); 3367 rc = ENA_PROXY(adapter, 3368 ena_com_get_ena_srd_info, 3369 &adapter->ena_dev, 3370 (struct ena_admin_ena_srd_info *)srd_info); 3371 rte_spinlock_unlock(&adapter->admin_lock); 3372 if (rc != ENA_COM_OK && rc != ENA_COM_UNSUPPORTED) { 3373 PMD_DRV_LOG_LINE(WARNING, 3374 "Failed to get ENA express srd info, rc: %d", rc); 3375 return; 3376 } 3377 } 3378 3379 /** 3380 * DPDK callback to retrieve names of extended device statistics 3381 * 3382 * @param dev 3383 * Pointer to Ethernet device structure. 3384 * @param[out] xstats_names 3385 * Buffer to insert names into. 3386 * @param n 3387 * Number of names. 3388 * 3389 * @return 3390 * Number of xstats names. 3391 */ 3392 static int ena_xstats_get_names(struct rte_eth_dev *dev, 3393 struct rte_eth_xstat_name *xstats_names, 3394 unsigned int n) 3395 { 3396 struct ena_adapter *adapter = dev->data->dev_private; 3397 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3398 unsigned int stat, i, count = 0; 3399 3400 if (n < xstats_count || !xstats_names) 3401 return xstats_count; 3402 3403 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) 3404 strcpy(xstats_names[count].name, 3405 ena_stats_global_strings[stat].name); 3406 3407 for (stat = 0; stat < adapter->metrics_num; stat++, count++) 3408 rte_strscpy(xstats_names[count].name, 3409 ena_stats_metrics_strings[stat].name, 3410 RTE_ETH_XSTATS_NAME_SIZE); 3411 for (stat = 0; stat < ENA_STATS_ARRAY_ENA_SRD; stat++, count++) 3412 rte_strscpy(xstats_names[count].name, 3413 ena_stats_srd_strings[stat].name, 3414 RTE_ETH_XSTATS_NAME_SIZE); 3415 3416 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) 3417 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) 3418 snprintf(xstats_names[count].name, 3419 sizeof(xstats_names[count].name), 3420 "rx_q%d_%s", i, 3421 ena_stats_rx_strings[stat].name); 3422 3423 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) 3424 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) 3425 snprintf(xstats_names[count].name, 3426 sizeof(xstats_names[count].name), 3427 "tx_q%d_%s", i, 3428 ena_stats_tx_strings[stat].name); 3429 3430 return xstats_count; 3431 } 3432 3433 /** 3434 * DPDK callback to retrieve names of extended device statistics for the given 3435 * ids. 3436 * 3437 * @param dev 3438 * Pointer to Ethernet device structure. 3439 * @param[out] xstats_names 3440 * Buffer to insert names into. 3441 * @param ids 3442 * IDs array for which the names should be retrieved. 3443 * @param size 3444 * Number of ids. 3445 * 3446 * @return 3447 * Positive value: number of xstats names. Negative value: error code. 3448 */ 3449 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 3450 const uint64_t *ids, 3451 struct rte_eth_xstat_name *xstats_names, 3452 unsigned int size) 3453 { 3454 struct ena_adapter *adapter = dev->data->dev_private; 3455 uint64_t xstats_count = ena_xstats_calc_num(dev->data); 3456 uint64_t id, qid; 3457 unsigned int i; 3458 3459 if (xstats_names == NULL) 3460 return xstats_count; 3461 3462 for (i = 0; i < size; ++i) { 3463 id = ids[i]; 3464 if (id > xstats_count) { 3465 PMD_DRV_LOG_LINE(ERR, 3466 "ID value out of range: id=%" PRIu64 ", xstats_num=%" PRIu64, 3467 id, xstats_count); 3468 return -EINVAL; 3469 } 3470 3471 if (id < ENA_STATS_ARRAY_GLOBAL) { 3472 strcpy(xstats_names[i].name, 3473 ena_stats_global_strings[id].name); 3474 continue; 3475 } 3476 3477 id -= ENA_STATS_ARRAY_GLOBAL; 3478 if (id < adapter->metrics_num) { 3479 rte_strscpy(xstats_names[i].name, 3480 ena_stats_metrics_strings[id].name, 3481 RTE_ETH_XSTATS_NAME_SIZE); 3482 continue; 3483 } 3484 3485 id -= adapter->metrics_num; 3486 3487 if (id < ENA_STATS_ARRAY_ENA_SRD) { 3488 rte_strscpy(xstats_names[i].name, 3489 ena_stats_srd_strings[id].name, 3490 RTE_ETH_XSTATS_NAME_SIZE); 3491 continue; 3492 } 3493 id -= ENA_STATS_ARRAY_ENA_SRD; 3494 3495 if (id < ENA_STATS_ARRAY_RX) { 3496 qid = id / dev->data->nb_rx_queues; 3497 id %= dev->data->nb_rx_queues; 3498 snprintf(xstats_names[i].name, 3499 sizeof(xstats_names[i].name), 3500 "rx_q%" PRIu64 "d_%s", 3501 qid, ena_stats_rx_strings[id].name); 3502 continue; 3503 } 3504 3505 id -= ENA_STATS_ARRAY_RX; 3506 /* Although this condition is not needed, it was added for 3507 * compatibility if new xstat structure would be ever added. 3508 */ 3509 if (id < ENA_STATS_ARRAY_TX) { 3510 qid = id / dev->data->nb_tx_queues; 3511 id %= dev->data->nb_tx_queues; 3512 snprintf(xstats_names[i].name, 3513 sizeof(xstats_names[i].name), 3514 "tx_q%" PRIu64 "_%s", 3515 qid, ena_stats_tx_strings[id].name); 3516 continue; 3517 } 3518 } 3519 3520 return i; 3521 } 3522 3523 /** 3524 * DPDK callback to get extended device statistics. 3525 * 3526 * @param dev 3527 * Pointer to Ethernet device structure. 3528 * @param[out] stats 3529 * Stats table output buffer. 3530 * @param n 3531 * The size of the stats table. 3532 * 3533 * @return 3534 * Number of xstats on success, negative on failure. 3535 */ 3536 static int ena_xstats_get(struct rte_eth_dev *dev, 3537 struct rte_eth_xstat *xstats, 3538 unsigned int n) 3539 { 3540 struct ena_adapter *adapter = dev->data->dev_private; 3541 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3542 unsigned int stat, i, count = 0; 3543 int stat_offset; 3544 void *stats_begin; 3545 uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS]; 3546 struct ena_stats_srd srd_info = {0}; 3547 3548 if (n < xstats_count) 3549 return xstats_count; 3550 3551 if (!xstats) 3552 return 0; 3553 3554 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) { 3555 stat_offset = ena_stats_global_strings[stat].stat_offset; 3556 stats_begin = &adapter->dev_stats; 3557 3558 xstats[count].id = count; 3559 xstats[count].value = *((uint64_t *) 3560 ((char *)stats_begin + stat_offset)); 3561 } 3562 3563 ena_copy_customer_metrics(adapter, metrics_stats, adapter->metrics_num); 3564 stats_begin = metrics_stats; 3565 for (stat = 0; stat < adapter->metrics_num; stat++, count++) { 3566 stat_offset = ena_stats_metrics_strings[stat].stat_offset; 3567 3568 xstats[count].id = count; 3569 xstats[count].value = *((uint64_t *) 3570 ((char *)stats_begin + stat_offset)); 3571 } 3572 3573 ena_copy_ena_srd_info(adapter, &srd_info); 3574 stats_begin = &srd_info; 3575 for (stat = 0; stat < ENA_STATS_ARRAY_ENA_SRD; stat++, count++) { 3576 stat_offset = ena_stats_srd_strings[stat].stat_offset; 3577 xstats[count].id = count; 3578 xstats[count].value = *((uint64_t *) 3579 ((char *)stats_begin + stat_offset)); 3580 } 3581 3582 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) { 3583 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) { 3584 stat_offset = ena_stats_rx_strings[stat].stat_offset; 3585 stats_begin = &adapter->rx_ring[i].rx_stats; 3586 3587 xstats[count].id = count; 3588 xstats[count].value = *((uint64_t *) 3589 ((char *)stats_begin + stat_offset)); 3590 } 3591 } 3592 3593 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) { 3594 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) { 3595 stat_offset = ena_stats_tx_strings[stat].stat_offset; 3596 stats_begin = &adapter->tx_ring[i].rx_stats; 3597 3598 xstats[count].id = count; 3599 xstats[count].value = *((uint64_t *) 3600 ((char *)stats_begin + stat_offset)); 3601 } 3602 } 3603 3604 return count; 3605 } 3606 3607 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 3608 const uint64_t *ids, 3609 uint64_t *values, 3610 unsigned int n) 3611 { 3612 struct ena_adapter *adapter = dev->data->dev_private; 3613 uint64_t id; 3614 uint64_t rx_entries, tx_entries; 3615 unsigned int i; 3616 int qid; 3617 int valid = 0; 3618 bool were_metrics_copied = false; 3619 bool was_srd_info_copied = false; 3620 uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS]; 3621 struct ena_stats_srd srd_info = {0}; 3622 3623 for (i = 0; i < n; ++i) { 3624 id = ids[i]; 3625 /* Check if id belongs to global statistics */ 3626 if (id < ENA_STATS_ARRAY_GLOBAL) { 3627 values[i] = *((uint64_t *)&adapter->dev_stats + id); 3628 ++valid; 3629 continue; 3630 } 3631 3632 /* Check if id belongs to ENI statistics */ 3633 id -= ENA_STATS_ARRAY_GLOBAL; 3634 if (id < adapter->metrics_num) { 3635 /* Avoid reading metrics multiple times in a single 3636 * function call, as it requires communication with the 3637 * admin queue. 3638 */ 3639 if (!were_metrics_copied) { 3640 were_metrics_copied = true; 3641 ena_copy_customer_metrics(adapter, 3642 metrics_stats, 3643 adapter->metrics_num); 3644 } 3645 3646 values[i] = *((uint64_t *)&metrics_stats + id); 3647 ++valid; 3648 continue; 3649 } 3650 3651 /* Check if id belongs to SRD info statistics */ 3652 id -= adapter->metrics_num; 3653 3654 if (id < ENA_STATS_ARRAY_ENA_SRD) { 3655 /* 3656 * Avoid reading srd info multiple times in a single 3657 * function call, as it requires communication with the 3658 * admin queue. 3659 */ 3660 if (!was_srd_info_copied) { 3661 was_srd_info_copied = true; 3662 ena_copy_ena_srd_info(adapter, &srd_info); 3663 } 3664 values[i] = *((uint64_t *)&adapter->srd_stats + id); 3665 ++valid; 3666 continue; 3667 } 3668 3669 /* Check if id belongs to rx queue statistics */ 3670 id -= ENA_STATS_ARRAY_ENA_SRD; 3671 3672 rx_entries = ENA_STATS_ARRAY_RX * dev->data->nb_rx_queues; 3673 if (id < rx_entries) { 3674 qid = id % dev->data->nb_rx_queues; 3675 id /= dev->data->nb_rx_queues; 3676 values[i] = *((uint64_t *) 3677 &adapter->rx_ring[qid].rx_stats + id); 3678 ++valid; 3679 continue; 3680 } 3681 /* Check if id belongs to rx queue statistics */ 3682 id -= rx_entries; 3683 tx_entries = ENA_STATS_ARRAY_TX * dev->data->nb_tx_queues; 3684 if (id < tx_entries) { 3685 qid = id % dev->data->nb_tx_queues; 3686 id /= dev->data->nb_tx_queues; 3687 values[i] = *((uint64_t *) 3688 &adapter->tx_ring[qid].tx_stats + id); 3689 ++valid; 3690 continue; 3691 } 3692 } 3693 3694 return valid; 3695 } 3696 3697 static int ena_process_uint_devarg(const char *key, 3698 const char *value, 3699 void *opaque) 3700 { 3701 struct ena_adapter *adapter = opaque; 3702 char *str_end; 3703 uint64_t uint64_value; 3704 3705 uint64_value = strtoull(value, &str_end, DECIMAL_BASE); 3706 if (value == str_end) { 3707 PMD_INIT_LOG_LINE(ERR, 3708 "Invalid value for key '%s'. Only uint values are accepted.", 3709 key); 3710 return -EINVAL; 3711 } 3712 3713 if (strcmp(key, ENA_DEVARG_MISS_TXC_TO) == 0) { 3714 if (uint64_value > ENA_MAX_TX_TIMEOUT_SECONDS) { 3715 PMD_INIT_LOG_LINE(ERR, 3716 "Tx timeout too high: %" PRIu64 " sec. Maximum allowed: %d sec.", 3717 uint64_value, ENA_MAX_TX_TIMEOUT_SECONDS); 3718 return -EINVAL; 3719 } else if (uint64_value == 0) { 3720 PMD_INIT_LOG_LINE(INFO, 3721 "Check for missing Tx completions has been disabled."); 3722 adapter->missing_tx_completion_to = 3723 ENA_HW_HINTS_NO_TIMEOUT; 3724 } else { 3725 PMD_INIT_LOG_LINE(INFO, 3726 "Tx packet completion timeout set to %" PRIu64 " seconds.", 3727 uint64_value); 3728 adapter->missing_tx_completion_to = 3729 uint64_value * rte_get_timer_hz(); 3730 } 3731 } else if (strcmp(key, ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL) == 0) { 3732 if (uint64_value > ENA_MAX_CONTROL_PATH_POLL_INTERVAL_MSEC) { 3733 PMD_INIT_LOG_LINE(ERR, 3734 "Control path polling interval is too long: %" PRIu64 " msecs. " 3735 "Maximum allowed: %d msecs.", 3736 uint64_value, ENA_MAX_CONTROL_PATH_POLL_INTERVAL_MSEC); 3737 return -EINVAL; 3738 } else if (uint64_value == 0) { 3739 PMD_INIT_LOG_LINE(INFO, 3740 "Control path polling interval is set to zero. Operating in " 3741 "interrupt mode."); 3742 adapter->control_path_poll_interval = 0; 3743 } else { 3744 PMD_INIT_LOG_LINE(INFO, 3745 "Control path polling interval is set to %" PRIu64 " msecs.", 3746 uint64_value); 3747 adapter->control_path_poll_interval = uint64_value * USEC_PER_MSEC; 3748 } 3749 } 3750 3751 return 0; 3752 } 3753 3754 static int ena_process_llq_policy_devarg(const char *key, const char *value, void *opaque) 3755 { 3756 struct ena_adapter *adapter = opaque; 3757 uint32_t policy; 3758 3759 policy = strtoul(value, NULL, DECIMAL_BASE); 3760 if (policy < ENA_LLQ_POLICY_LAST) { 3761 adapter->llq_header_policy = policy; 3762 } else { 3763 PMD_INIT_LOG_LINE(ERR, 3764 "Invalid value: '%s' for key '%s'. valid [0-3]", 3765 value, key); 3766 return -EINVAL; 3767 } 3768 PMD_INIT_LOG_LINE(INFO, 3769 "LLQ policy is %u [0 - disabled, 1 - device recommended, 2 - normal, 3 - large]", 3770 adapter->llq_header_policy); 3771 return 0; 3772 } 3773 3774 static int ena_parse_devargs(struct ena_adapter *adapter, struct rte_devargs *devargs) 3775 { 3776 static const char * const allowed_args[] = { 3777 ENA_DEVARG_LLQ_POLICY, 3778 ENA_DEVARG_MISS_TXC_TO, 3779 ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL, 3780 NULL, 3781 }; 3782 struct rte_kvargs *kvlist; 3783 int rc; 3784 3785 if (devargs == NULL) 3786 return 0; 3787 3788 kvlist = rte_kvargs_parse(devargs->args, allowed_args); 3789 if (kvlist == NULL) { 3790 PMD_INIT_LOG_LINE(ERR, "Invalid device arguments: %s", 3791 devargs->args); 3792 return -EINVAL; 3793 } 3794 rc = rte_kvargs_process(kvlist, ENA_DEVARG_LLQ_POLICY, 3795 ena_process_llq_policy_devarg, adapter); 3796 if (rc != 0) 3797 goto exit; 3798 rc = rte_kvargs_process(kvlist, ENA_DEVARG_MISS_TXC_TO, 3799 ena_process_uint_devarg, adapter); 3800 if (rc != 0) 3801 goto exit; 3802 rc = rte_kvargs_process(kvlist, ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL, 3803 ena_process_uint_devarg, adapter); 3804 if (rc != 0) 3805 goto exit; 3806 3807 exit: 3808 rte_kvargs_free(kvlist); 3809 3810 return rc; 3811 } 3812 3813 static int ena_setup_rx_intr(struct rte_eth_dev *dev) 3814 { 3815 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 3816 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 3817 int rc; 3818 uint16_t vectors_nb, i; 3819 bool rx_intr_requested = dev->data->dev_conf.intr_conf.rxq; 3820 3821 if (!rx_intr_requested) 3822 return 0; 3823 3824 if (!rte_intr_cap_multiple(intr_handle)) { 3825 PMD_DRV_LOG_LINE(ERR, 3826 "Rx interrupt requested, but it isn't supported by the PCI driver"); 3827 return -ENOTSUP; 3828 } 3829 3830 /* Disable interrupt mapping before the configuration starts. */ 3831 rte_intr_disable(intr_handle); 3832 3833 /* Verify if there are enough vectors available. */ 3834 vectors_nb = dev->data->nb_rx_queues; 3835 if (vectors_nb > RTE_MAX_RXTX_INTR_VEC_ID) { 3836 PMD_DRV_LOG_LINE(ERR, 3837 "Too many Rx interrupts requested, maximum number: %d", 3838 RTE_MAX_RXTX_INTR_VEC_ID); 3839 rc = -ENOTSUP; 3840 goto enable_intr; 3841 } 3842 3843 /* Allocate the vector list */ 3844 if (rte_intr_vec_list_alloc(intr_handle, "intr_vec", 3845 dev->data->nb_rx_queues)) { 3846 PMD_DRV_LOG_LINE(ERR, 3847 "Failed to allocate interrupt vector for %d queues", 3848 dev->data->nb_rx_queues); 3849 rc = -ENOMEM; 3850 goto enable_intr; 3851 } 3852 3853 rc = rte_intr_efd_enable(intr_handle, vectors_nb); 3854 if (rc != 0) 3855 goto free_intr_vec; 3856 3857 if (!rte_intr_allow_others(intr_handle)) { 3858 PMD_DRV_LOG_LINE(ERR, 3859 "Not enough interrupts available to use both ENA Admin and Rx interrupts"); 3860 goto disable_intr_efd; 3861 } 3862 3863 for (i = 0; i < vectors_nb; ++i) 3864 if (rte_intr_vec_list_index_set(intr_handle, i, 3865 RTE_INTR_VEC_RXTX_OFFSET + i)) 3866 goto disable_intr_efd; 3867 3868 rte_intr_enable(intr_handle); 3869 return 0; 3870 3871 disable_intr_efd: 3872 rte_intr_efd_disable(intr_handle); 3873 free_intr_vec: 3874 rte_intr_vec_list_free(intr_handle); 3875 enable_intr: 3876 rte_intr_enable(intr_handle); 3877 return rc; 3878 } 3879 3880 static void ena_rx_queue_intr_set(struct rte_eth_dev *dev, 3881 uint16_t queue_id, 3882 bool unmask) 3883 { 3884 struct ena_adapter *adapter = dev->data->dev_private; 3885 struct ena_ring *rxq = &adapter->rx_ring[queue_id]; 3886 struct ena_eth_io_intr_reg intr_reg; 3887 3888 ena_com_update_intr_reg(&intr_reg, 0, 0, unmask, 1); 3889 ena_com_unmask_intr(rxq->ena_com_io_cq, &intr_reg); 3890 } 3891 3892 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 3893 uint16_t queue_id) 3894 { 3895 ena_rx_queue_intr_set(dev, queue_id, true); 3896 3897 return 0; 3898 } 3899 3900 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 3901 uint16_t queue_id) 3902 { 3903 ena_rx_queue_intr_set(dev, queue_id, false); 3904 3905 return 0; 3906 } 3907 3908 static int ena_configure_aenq(struct ena_adapter *adapter) 3909 { 3910 uint32_t aenq_groups = adapter->all_aenq_groups; 3911 int rc; 3912 3913 /* All_aenq_groups holds all AENQ functions supported by the device and 3914 * the HW, so at first we need to be sure the LSC request is valid. 3915 */ 3916 if (adapter->edev_data->dev_conf.intr_conf.lsc != 0) { 3917 if (!(aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) { 3918 PMD_DRV_LOG_LINE(ERR, 3919 "LSC requested, but it's not supported by the AENQ"); 3920 return -EINVAL; 3921 } 3922 } else { 3923 /* If LSC wasn't enabled by the app, let's enable all supported 3924 * AENQ procedures except the LSC. 3925 */ 3926 aenq_groups &= ~BIT(ENA_ADMIN_LINK_CHANGE); 3927 } 3928 3929 rc = ena_com_set_aenq_config(&adapter->ena_dev, aenq_groups); 3930 if (rc != 0) { 3931 PMD_DRV_LOG_LINE(ERR, "Cannot configure AENQ groups, rc=%d", rc); 3932 return rc; 3933 } 3934 3935 adapter->active_aenq_groups = aenq_groups; 3936 3937 return 0; 3938 } 3939 3940 int ena_mp_indirect_table_set(struct ena_adapter *adapter) 3941 { 3942 return ENA_PROXY(adapter, ena_com_indirect_table_set, &adapter->ena_dev); 3943 } 3944 3945 int ena_mp_indirect_table_get(struct ena_adapter *adapter, 3946 uint32_t *indirect_table) 3947 { 3948 return ENA_PROXY(adapter, ena_com_indirect_table_get, &adapter->ena_dev, 3949 indirect_table); 3950 } 3951 3952 /********************************************************************* 3953 * ena_plat_dpdk.h functions implementations 3954 *********************************************************************/ 3955 3956 const struct rte_memzone * 3957 ena_mem_alloc_coherent(struct rte_eth_dev_data *data, size_t size, 3958 int socket_id, unsigned int alignment, void **virt_addr, 3959 dma_addr_t *phys_addr) 3960 { 3961 char z_name[RTE_MEMZONE_NAMESIZE]; 3962 struct ena_adapter *adapter = data->dev_private; 3963 const struct rte_memzone *memzone; 3964 int rc; 3965 3966 rc = snprintf(z_name, RTE_MEMZONE_NAMESIZE, "ena_p%d_mz%" PRIu64 "", 3967 data->port_id, adapter->memzone_cnt); 3968 if (rc >= RTE_MEMZONE_NAMESIZE) { 3969 PMD_DRV_LOG_LINE(ERR, 3970 "Name for the ena_com memzone is too long. Port: %d, mz_num: %" PRIu64, 3971 data->port_id, adapter->memzone_cnt); 3972 goto error; 3973 } 3974 adapter->memzone_cnt++; 3975 3976 memzone = rte_memzone_reserve_aligned(z_name, size, socket_id, 3977 RTE_MEMZONE_IOVA_CONTIG, alignment); 3978 if (memzone == NULL) { 3979 PMD_DRV_LOG_LINE(ERR, "Failed to allocate ena_com memzone: %s", 3980 z_name); 3981 goto error; 3982 } 3983 3984 memset(memzone->addr, 0, size); 3985 *virt_addr = memzone->addr; 3986 *phys_addr = memzone->iova; 3987 3988 return memzone; 3989 3990 error: 3991 *virt_addr = NULL; 3992 *phys_addr = 0; 3993 3994 return NULL; 3995 } 3996 3997 3998 /********************************************************************* 3999 * PMD configuration 4000 *********************************************************************/ 4001 static int eth_ena_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 4002 struct rte_pci_device *pci_dev) 4003 { 4004 return rte_eth_dev_pci_generic_probe(pci_dev, 4005 sizeof(struct ena_adapter), eth_ena_dev_init); 4006 } 4007 4008 static int eth_ena_pci_remove(struct rte_pci_device *pci_dev) 4009 { 4010 return rte_eth_dev_pci_generic_remove(pci_dev, eth_ena_dev_uninit); 4011 } 4012 4013 static struct rte_pci_driver rte_ena_pmd = { 4014 .id_table = pci_id_ena_map, 4015 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | 4016 RTE_PCI_DRV_WC_ACTIVATE, 4017 .probe = eth_ena_pci_probe, 4018 .remove = eth_ena_pci_remove, 4019 }; 4020 4021 RTE_PMD_REGISTER_PCI(net_ena, rte_ena_pmd); 4022 RTE_PMD_REGISTER_PCI_TABLE(net_ena, pci_id_ena_map); 4023 RTE_PMD_REGISTER_KMOD_DEP(net_ena, "* igb_uio | uio_pci_generic | vfio-pci"); 4024 RTE_PMD_REGISTER_PARAM_STRING(net_ena, 4025 ENA_DEVARG_LLQ_POLICY "=<0|1|2|3> " 4026 ENA_DEVARG_MISS_TXC_TO "=<uint>" 4027 ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL "=<0-1000>"); 4028 RTE_LOG_REGISTER_SUFFIX(ena_logtype_init, init, NOTICE); 4029 RTE_LOG_REGISTER_SUFFIX(ena_logtype_driver, driver, NOTICE); 4030 #ifdef RTE_ETHDEV_DEBUG_RX 4031 RTE_LOG_REGISTER_SUFFIX(ena_logtype_rx, rx, DEBUG); 4032 #endif 4033 #ifdef RTE_ETHDEV_DEBUG_TX 4034 RTE_LOG_REGISTER_SUFFIX(ena_logtype_tx, tx, DEBUG); 4035 #endif 4036 RTE_LOG_REGISTER_SUFFIX(ena_logtype_com, com, WARNING); 4037 4038 /****************************************************************************** 4039 ******************************** AENQ Handlers ******************************* 4040 *****************************************************************************/ 4041 static void ena_update_on_link_change(void *adapter_data, 4042 struct ena_admin_aenq_entry *aenq_e) 4043 { 4044 struct rte_eth_dev *eth_dev = adapter_data; 4045 struct ena_adapter *adapter = eth_dev->data->dev_private; 4046 struct ena_admin_aenq_link_change_desc *aenq_link_desc; 4047 uint32_t status; 4048 4049 aenq_link_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 4050 4051 status = get_ena_admin_aenq_link_change_desc_link_status(aenq_link_desc); 4052 adapter->link_status = status; 4053 4054 ena_link_update(eth_dev, 0); 4055 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 4056 } 4057 4058 static void ena_notification(void *adapter_data, 4059 struct ena_admin_aenq_entry *aenq_e) 4060 { 4061 struct rte_eth_dev *eth_dev = adapter_data; 4062 struct ena_adapter *adapter = eth_dev->data->dev_private; 4063 struct ena_admin_ena_hw_hints *hints; 4064 4065 if (aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION) 4066 PMD_DRV_LOG_LINE(WARNING, "Invalid AENQ group: %x. Expected: %x", 4067 aenq_e->aenq_common_desc.group, 4068 ENA_ADMIN_NOTIFICATION); 4069 4070 switch (aenq_e->aenq_common_desc.syndrome) { 4071 case ENA_ADMIN_UPDATE_HINTS: 4072 hints = (struct ena_admin_ena_hw_hints *) 4073 (&aenq_e->inline_data_w4); 4074 ena_update_hints(adapter, hints); 4075 break; 4076 default: 4077 PMD_DRV_LOG_LINE(ERR, "Invalid AENQ notification link state: %d", 4078 aenq_e->aenq_common_desc.syndrome); 4079 } 4080 } 4081 4082 static void ena_keep_alive(void *adapter_data, 4083 __rte_unused struct ena_admin_aenq_entry *aenq_e) 4084 { 4085 struct rte_eth_dev *eth_dev = adapter_data; 4086 struct ena_adapter *adapter = eth_dev->data->dev_private; 4087 struct ena_admin_aenq_keep_alive_desc *desc; 4088 uint64_t rx_drops; 4089 uint64_t tx_drops; 4090 uint64_t rx_overruns; 4091 4092 adapter->timestamp_wd = rte_get_timer_cycles(); 4093 4094 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 4095 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 4096 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 4097 rx_overruns = ((uint64_t)desc->rx_overruns_high << 32) | desc->rx_overruns_low; 4098 4099 /* 4100 * Depending on its acceleration support, the device updates a different statistic when 4101 * Rx packet is dropped because there are no available buffers to accommodate it. 4102 */ 4103 adapter->drv_stats->rx_drops = rx_drops + rx_overruns; 4104 adapter->dev_stats.tx_drops = tx_drops; 4105 } 4106 4107 static void ena_suboptimal_configuration(__rte_unused void *adapter_data, 4108 struct ena_admin_aenq_entry *aenq_e) 4109 { 4110 struct ena_admin_aenq_conf_notifications_desc *desc; 4111 int bit, num_bits; 4112 4113 desc = (struct ena_admin_aenq_conf_notifications_desc *)aenq_e; 4114 num_bits = BITS_PER_TYPE(desc->notifications_bitmap); 4115 for (bit = 0; bit < num_bits; bit++) { 4116 if (desc->notifications_bitmap & RTE_BIT64(bit)) { 4117 PMD_DRV_LOG_LINE(WARNING, 4118 "Sub-optimal configuration notification code: %d", bit + 1); 4119 } 4120 } 4121 } 4122 4123 /** 4124 * This handler will called for unknown event group or unimplemented handlers 4125 **/ 4126 static void unimplemented_aenq_handler(__rte_unused void *data, 4127 __rte_unused struct ena_admin_aenq_entry *aenq_e) 4128 { 4129 PMD_DRV_LOG_LINE(ERR, 4130 "Unknown event was received or event with unimplemented handler"); 4131 } 4132 4133 static struct ena_aenq_handlers aenq_handlers = { 4134 .handlers = { 4135 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 4136 [ENA_ADMIN_NOTIFICATION] = ena_notification, 4137 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive, 4138 [ENA_ADMIN_CONF_NOTIFICATIONS] = ena_suboptimal_configuration 4139 }, 4140 .unimplemented_handler = unimplemented_aenq_handler 4141 }; 4142 4143 /********************************************************************* 4144 * Multi-Process communication request handling (in primary) 4145 *********************************************************************/ 4146 static int 4147 ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer) 4148 { 4149 const struct ena_mp_body *req = 4150 (const struct ena_mp_body *)mp_msg->param; 4151 struct ena_adapter *adapter; 4152 struct ena_com_dev *ena_dev; 4153 struct ena_mp_body *rsp; 4154 struct rte_mp_msg mp_rsp; 4155 struct rte_eth_dev *dev; 4156 int res = 0; 4157 4158 rsp = (struct ena_mp_body *)&mp_rsp.param; 4159 mp_msg_init(&mp_rsp, req->type, req->port_id); 4160 4161 if (!rte_eth_dev_is_valid_port(req->port_id)) { 4162 rte_errno = ENODEV; 4163 res = -rte_errno; 4164 PMD_DRV_LOG_LINE(ERR, "Unknown port %d in request %d", 4165 req->port_id, req->type); 4166 goto end; 4167 } 4168 dev = &rte_eth_devices[req->port_id]; 4169 adapter = dev->data->dev_private; 4170 ena_dev = &adapter->ena_dev; 4171 4172 switch (req->type) { 4173 case ENA_MP_DEV_STATS_GET: 4174 res = ena_com_get_dev_basic_stats(ena_dev, 4175 &adapter->basic_stats); 4176 break; 4177 case ENA_MP_ENI_STATS_GET: 4178 res = ena_com_get_eni_stats(ena_dev, 4179 (struct ena_admin_eni_stats *)&adapter->metrics_stats); 4180 break; 4181 case ENA_MP_MTU_SET: 4182 res = ena_com_set_dev_mtu(ena_dev, req->args.mtu); 4183 break; 4184 case ENA_MP_IND_TBL_GET: 4185 res = ena_com_indirect_table_get(ena_dev, 4186 adapter->indirect_table); 4187 break; 4188 case ENA_MP_IND_TBL_SET: 4189 res = ena_com_indirect_table_set(ena_dev); 4190 break; 4191 case ENA_MP_CUSTOMER_METRICS_GET: 4192 res = ena_com_get_customer_metrics(ena_dev, 4193 (char *)adapter->metrics_stats, 4194 adapter->metrics_num * sizeof(uint64_t)); 4195 break; 4196 case ENA_MP_SRD_STATS_GET: 4197 res = ena_com_get_ena_srd_info(ena_dev, 4198 (struct ena_admin_ena_srd_info *)&adapter->srd_stats); 4199 break; 4200 default: 4201 PMD_DRV_LOG_LINE(ERR, "Unknown request type %d", req->type); 4202 res = -EINVAL; 4203 break; 4204 } 4205 4206 end: 4207 /* Save processing result in the reply */ 4208 rsp->result = res; 4209 /* Return just IPC processing status */ 4210 return rte_mp_reply(&mp_rsp, peer); 4211 } 4212 4213 static bool ena_use_large_llq_hdr(struct ena_adapter *adapter, uint8_t recommended_entry_size) 4214 { 4215 if (adapter->llq_header_policy == ENA_LLQ_POLICY_LARGE) { 4216 return true; 4217 } else if (adapter->llq_header_policy == ENA_LLQ_POLICY_RECOMMENDED) { 4218 PMD_DRV_LOG_LINE(INFO, "Recommended device entry size policy %u", 4219 recommended_entry_size); 4220 if (recommended_entry_size == ENA_ADMIN_LIST_ENTRY_SIZE_256B) 4221 return true; 4222 } 4223 return false; 4224 } 4225