1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 3 * All rights reserved. 4 */ 5 6 #include <rte_alarm.h> 7 #include <rte_string_fns.h> 8 #include <rte_errno.h> 9 #include <rte_version.h> 10 #include <rte_net.h> 11 #include <rte_kvargs.h> 12 13 #include "ena_ethdev.h" 14 #include "ena_logs.h" 15 #include "ena_platform.h" 16 #include "ena_com.h" 17 #include "ena_eth_com.h" 18 19 #include <ena_common_defs.h> 20 #include <ena_regs_defs.h> 21 #include <ena_admin_defs.h> 22 #include <ena_eth_io_defs.h> 23 24 #define DRV_MODULE_VER_MAJOR 2 25 #define DRV_MODULE_VER_MINOR 10 26 #define DRV_MODULE_VER_SUBMINOR 0 27 28 #define __MERGE_64B_H_L(h, l) (((uint64_t)h << 32) | l) 29 30 #define GET_L4_HDR_LEN(mbuf) \ 31 ((rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, \ 32 mbuf->l3_len + mbuf->l2_len)->data_off) >> 4) 33 34 #define ETH_GSTRING_LEN 32 35 36 #define ARRAY_SIZE(x) RTE_DIM(x) 37 38 #define ENA_MIN_RING_DESC 128 39 40 #define USEC_PER_MSEC 1000UL 41 42 #define BITS_PER_BYTE 8 43 44 #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) 45 46 #define DECIMAL_BASE 10 47 48 #define MAX_WIDE_LLQ_DEPTH_UNSUPPORTED 0 49 50 /* 51 * We should try to keep ENA_CLEANUP_BUF_THRESH lower than 52 * RTE_MEMPOOL_CACHE_MAX_SIZE, so we can fit this in mempool local cache. 53 */ 54 #define ENA_CLEANUP_BUF_THRESH 256 55 56 struct ena_stats { 57 char name[ETH_GSTRING_LEN]; 58 int stat_offset; 59 }; 60 61 #define ENA_STAT_ENTRY(stat, stat_type) { \ 62 .name = #stat, \ 63 .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ 64 } 65 66 #define ENA_STAT_RX_ENTRY(stat) \ 67 ENA_STAT_ENTRY(stat, rx) 68 69 #define ENA_STAT_TX_ENTRY(stat) \ 70 ENA_STAT_ENTRY(stat, tx) 71 72 #define ENA_STAT_METRICS_ENTRY(stat) \ 73 ENA_STAT_ENTRY(stat, metrics) 74 75 #define ENA_STAT_GLOBAL_ENTRY(stat) \ 76 ENA_STAT_ENTRY(stat, dev) 77 78 #define ENA_STAT_ENA_SRD_ENTRY(stat) \ 79 ENA_STAT_ENTRY(stat, srd) 80 81 /* Device arguments */ 82 83 /* llq_policy Controls whether to disable LLQ, use device recommended 84 * header policy or overriding the device recommendation. 85 * 0 - Disable LLQ. Use with extreme caution as it leads to a huge 86 * performance degradation on AWS instances built with Nitro v4 onwards. 87 * 1 - Accept device recommended LLQ policy (Default). 88 * Device can recommend normal or large LLQ policy. 89 * 2 - Enforce normal LLQ policy. 90 * 3 - Enforce large LLQ policy. 91 * Required for packets with header that exceed 96 bytes on 92 * AWS instances built with Nitro v2 and Nitro v1. 93 */ 94 #define ENA_DEVARG_LLQ_POLICY "llq_policy" 95 96 /* Timeout in seconds after which a single uncompleted Tx packet should be 97 * considered as a missing. 98 */ 99 #define ENA_DEVARG_MISS_TXC_TO "miss_txc_to" 100 101 /* 102 * Controls the period of time (in milliseconds) between two consecutive inspections of 103 * the control queues when the driver is in poll mode and not using interrupts. 104 * By default, this value is zero, indicating that the driver will not be in poll mode and will 105 * use interrupts. A non-zero value for this argument is mandatory when using uio_pci_generic 106 * driver. 107 */ 108 #define ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL "control_path_poll_interval" 109 110 /* 111 * Each rte_memzone should have unique name. 112 * To satisfy it, count number of allocation and add it to name. 113 */ 114 rte_atomic64_t ena_alloc_cnt; 115 116 static const struct ena_stats ena_stats_global_strings[] = { 117 ENA_STAT_GLOBAL_ENTRY(wd_expired), 118 ENA_STAT_GLOBAL_ENTRY(dev_start), 119 ENA_STAT_GLOBAL_ENTRY(dev_stop), 120 ENA_STAT_GLOBAL_ENTRY(tx_drops), 121 }; 122 123 /* 124 * The legacy metrics (also known as eni stats) consisted of 5 stats, while the reworked 125 * metrics (also known as customer metrics) support an additional stat. 126 */ 127 static struct ena_stats ena_stats_metrics_strings[] = { 128 ENA_STAT_METRICS_ENTRY(bw_in_allowance_exceeded), 129 ENA_STAT_METRICS_ENTRY(bw_out_allowance_exceeded), 130 ENA_STAT_METRICS_ENTRY(pps_allowance_exceeded), 131 ENA_STAT_METRICS_ENTRY(conntrack_allowance_exceeded), 132 ENA_STAT_METRICS_ENTRY(linklocal_allowance_exceeded), 133 ENA_STAT_METRICS_ENTRY(conntrack_allowance_available), 134 }; 135 136 static const struct ena_stats ena_stats_srd_strings[] = { 137 ENA_STAT_ENA_SRD_ENTRY(ena_srd_mode), 138 ENA_STAT_ENA_SRD_ENTRY(ena_srd_tx_pkts), 139 ENA_STAT_ENA_SRD_ENTRY(ena_srd_eligible_tx_pkts), 140 ENA_STAT_ENA_SRD_ENTRY(ena_srd_rx_pkts), 141 ENA_STAT_ENA_SRD_ENTRY(ena_srd_resource_utilization), 142 }; 143 144 static const struct ena_stats ena_stats_tx_strings[] = { 145 ENA_STAT_TX_ENTRY(cnt), 146 ENA_STAT_TX_ENTRY(bytes), 147 ENA_STAT_TX_ENTRY(prepare_ctx_err), 148 ENA_STAT_TX_ENTRY(tx_poll), 149 ENA_STAT_TX_ENTRY(doorbells), 150 ENA_STAT_TX_ENTRY(bad_req_id), 151 ENA_STAT_TX_ENTRY(available_desc), 152 ENA_STAT_TX_ENTRY(missed_tx), 153 }; 154 155 static const struct ena_stats ena_stats_rx_strings[] = { 156 ENA_STAT_RX_ENTRY(cnt), 157 ENA_STAT_RX_ENTRY(bytes), 158 ENA_STAT_RX_ENTRY(refill_partial), 159 ENA_STAT_RX_ENTRY(l3_csum_bad), 160 ENA_STAT_RX_ENTRY(l4_csum_bad), 161 ENA_STAT_RX_ENTRY(l4_csum_good), 162 ENA_STAT_RX_ENTRY(mbuf_alloc_fail), 163 ENA_STAT_RX_ENTRY(bad_desc_num), 164 ENA_STAT_RX_ENTRY(bad_req_id), 165 }; 166 167 #define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) 168 #define ENA_STATS_ARRAY_METRICS ARRAY_SIZE(ena_stats_metrics_strings) 169 #define ENA_STATS_ARRAY_METRICS_LEGACY (ENA_STATS_ARRAY_METRICS - 1) 170 #define ENA_STATS_ARRAY_ENA_SRD ARRAY_SIZE(ena_stats_srd_strings) 171 #define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) 172 #define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) 173 174 #define QUEUE_OFFLOADS (RTE_ETH_TX_OFFLOAD_TCP_CKSUM |\ 175 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |\ 176 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |\ 177 RTE_ETH_TX_OFFLOAD_TCP_TSO) 178 #define MBUF_OFFLOADS (RTE_MBUF_F_TX_L4_MASK |\ 179 RTE_MBUF_F_TX_IP_CKSUM |\ 180 RTE_MBUF_F_TX_TCP_SEG) 181 182 /** Vendor ID used by Amazon devices */ 183 #define PCI_VENDOR_ID_AMAZON 0x1D0F 184 /** Amazon devices */ 185 #define PCI_DEVICE_ID_ENA_VF 0xEC20 186 #define PCI_DEVICE_ID_ENA_VF_RSERV0 0xEC21 187 188 #define ENA_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_L4_MASK | \ 189 RTE_MBUF_F_TX_IPV6 | \ 190 RTE_MBUF_F_TX_IPV4 | \ 191 RTE_MBUF_F_TX_IP_CKSUM | \ 192 RTE_MBUF_F_TX_TCP_SEG) 193 194 #define ENA_TX_OFFLOAD_NOTSUP_MASK \ 195 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) 196 197 /** HW specific offloads capabilities. */ 198 /* IPv4 checksum offload. */ 199 #define ENA_L3_IPV4_CSUM 0x0001 200 /* TCP/UDP checksum offload for IPv4 packets. */ 201 #define ENA_L4_IPV4_CSUM 0x0002 202 /* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */ 203 #define ENA_L4_IPV4_CSUM_PARTIAL 0x0004 204 /* TCP/UDP checksum offload for IPv6 packets. */ 205 #define ENA_L4_IPV6_CSUM 0x0008 206 /* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */ 207 #define ENA_L4_IPV6_CSUM_PARTIAL 0x0010 208 /* TSO support for IPv4 packets. */ 209 #define ENA_IPV4_TSO 0x0020 210 211 /* Device supports setting RSS hash. */ 212 #define ENA_RX_RSS_HASH 0x0040 213 214 static const struct rte_pci_id pci_id_ena_map[] = { 215 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, 216 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) }, 217 { .device_id = 0 }, 218 }; 219 220 static struct ena_aenq_handlers aenq_handlers; 221 222 static int ena_device_init(struct ena_adapter *adapter, 223 struct rte_pci_device *pdev, 224 struct ena_com_dev_get_features_ctx *get_feat_ctx); 225 static int ena_dev_configure(struct rte_eth_dev *dev); 226 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 227 struct ena_tx_buffer *tx_info, 228 struct rte_mbuf *mbuf, 229 void **push_header, 230 uint16_t *header_len); 231 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf); 232 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt); 233 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 234 uint16_t nb_pkts); 235 static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 236 uint16_t nb_pkts); 237 static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 238 uint16_t nb_desc, unsigned int socket_id, 239 const struct rte_eth_txconf *tx_conf); 240 static int ena_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 241 uint16_t nb_desc, unsigned int socket_id, 242 const struct rte_eth_rxconf *rx_conf, 243 struct rte_mempool *mp); 244 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len); 245 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 246 struct ena_com_rx_buf_info *ena_bufs, 247 uint32_t descs, 248 uint16_t *next_to_clean, 249 uint8_t offset); 250 static uint16_t eth_ena_recv_pkts(void *rx_queue, 251 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 252 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 253 struct rte_mbuf *mbuf, uint16_t id); 254 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count); 255 static void ena_init_rings(struct ena_adapter *adapter, 256 bool disable_meta_caching); 257 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); 258 static int ena_start(struct rte_eth_dev *dev); 259 static int ena_stop(struct rte_eth_dev *dev); 260 static int ena_close(struct rte_eth_dev *dev); 261 static int ena_dev_reset(struct rte_eth_dev *dev); 262 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); 263 static void ena_rx_queue_release_all(struct rte_eth_dev *dev); 264 static void ena_tx_queue_release_all(struct rte_eth_dev *dev); 265 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 266 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 267 static void ena_rx_queue_release_bufs(struct ena_ring *ring); 268 static void ena_tx_queue_release_bufs(struct ena_ring *ring); 269 static int ena_link_update(struct rte_eth_dev *dev, 270 int wait_to_complete); 271 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring); 272 static void ena_queue_stop(struct ena_ring *ring); 273 static void ena_queue_stop_all(struct rte_eth_dev *dev, 274 enum ena_ring_type ring_type); 275 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring); 276 static int ena_queue_start_all(struct rte_eth_dev *dev, 277 enum ena_ring_type ring_type); 278 static void ena_stats_restart(struct rte_eth_dev *dev); 279 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter); 280 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter); 281 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter); 282 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter); 283 static int ena_infos_get(struct rte_eth_dev *dev, 284 struct rte_eth_dev_info *dev_info); 285 static void ena_control_path_handler(void *cb_arg); 286 static void ena_control_path_poll_handler(void *cb_arg); 287 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg); 288 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev); 289 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev); 290 static int ena_xstats_get_names(struct rte_eth_dev *dev, 291 struct rte_eth_xstat_name *xstats_names, 292 unsigned int n); 293 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 294 const uint64_t *ids, 295 struct rte_eth_xstat_name *xstats_names, 296 unsigned int size); 297 static int ena_xstats_get(struct rte_eth_dev *dev, 298 struct rte_eth_xstat *stats, 299 unsigned int n); 300 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 301 const uint64_t *ids, 302 uint64_t *values, 303 unsigned int n); 304 static int ena_process_llq_policy_devarg(const char *key, 305 const char *value, 306 void *opaque); 307 static int ena_parse_devargs(struct ena_adapter *adapter, 308 struct rte_devargs *devargs); 309 static void ena_copy_customer_metrics(struct ena_adapter *adapter, 310 uint64_t *buf, 311 size_t buf_size); 312 static void ena_copy_ena_srd_info(struct ena_adapter *adapter, 313 struct ena_stats_srd *srd_info); 314 static int ena_setup_rx_intr(struct rte_eth_dev *dev); 315 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 316 uint16_t queue_id); 317 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 318 uint16_t queue_id); 319 static int ena_configure_aenq(struct ena_adapter *adapter); 320 static int ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, 321 const void *peer); 322 static bool ena_use_large_llq_hdr(struct ena_adapter *adapter, uint8_t recommended_entry_size); 323 324 static const struct eth_dev_ops ena_dev_ops = { 325 .dev_configure = ena_dev_configure, 326 .dev_infos_get = ena_infos_get, 327 .rx_queue_setup = ena_rx_queue_setup, 328 .tx_queue_setup = ena_tx_queue_setup, 329 .dev_start = ena_start, 330 .dev_stop = ena_stop, 331 .link_update = ena_link_update, 332 .stats_get = ena_stats_get, 333 .xstats_get_names = ena_xstats_get_names, 334 .xstats_get_names_by_id = ena_xstats_get_names_by_id, 335 .xstats_get = ena_xstats_get, 336 .xstats_get_by_id = ena_xstats_get_by_id, 337 .mtu_set = ena_mtu_set, 338 .rx_queue_release = ena_rx_queue_release, 339 .tx_queue_release = ena_tx_queue_release, 340 .dev_close = ena_close, 341 .dev_reset = ena_dev_reset, 342 .reta_update = ena_rss_reta_update, 343 .reta_query = ena_rss_reta_query, 344 .rx_queue_intr_enable = ena_rx_queue_intr_enable, 345 .rx_queue_intr_disable = ena_rx_queue_intr_disable, 346 .rss_hash_update = ena_rss_hash_update, 347 .rss_hash_conf_get = ena_rss_hash_conf_get, 348 .tx_done_cleanup = ena_tx_cleanup, 349 }; 350 351 /********************************************************************* 352 * Multi-Process communication bits 353 *********************************************************************/ 354 /* rte_mp IPC message name */ 355 #define ENA_MP_NAME "net_ena_mp" 356 /* Request timeout in seconds */ 357 #define ENA_MP_REQ_TMO 5 358 359 /** Proxy request type */ 360 enum ena_mp_req { 361 ENA_MP_DEV_STATS_GET, 362 ENA_MP_ENI_STATS_GET, 363 ENA_MP_MTU_SET, 364 ENA_MP_IND_TBL_GET, 365 ENA_MP_IND_TBL_SET, 366 ENA_MP_CUSTOMER_METRICS_GET, 367 ENA_MP_SRD_STATS_GET, 368 }; 369 370 /** Proxy message body. Shared between requests and responses. */ 371 struct ena_mp_body { 372 /* Message type */ 373 enum ena_mp_req type; 374 int port_id; 375 /* Processing result. Set in replies. 0 if message succeeded, negative 376 * error code otherwise. 377 */ 378 int result; 379 union { 380 int mtu; /* For ENA_MP_MTU_SET */ 381 } args; 382 }; 383 384 /** 385 * Initialize IPC message. 386 * 387 * @param[out] msg 388 * Pointer to the message to initialize. 389 * @param[in] type 390 * Message type. 391 * @param[in] port_id 392 * Port ID of target device. 393 * 394 */ 395 static void 396 mp_msg_init(struct rte_mp_msg *msg, enum ena_mp_req type, int port_id) 397 { 398 struct ena_mp_body *body = (struct ena_mp_body *)&msg->param; 399 400 memset(msg, 0, sizeof(*msg)); 401 strlcpy(msg->name, ENA_MP_NAME, sizeof(msg->name)); 402 msg->len_param = sizeof(*body); 403 body->type = type; 404 body->port_id = port_id; 405 } 406 407 /********************************************************************* 408 * Multi-Process communication PMD API 409 *********************************************************************/ 410 /** 411 * Define proxy request descriptor 412 * 413 * Used to define all structures and functions required for proxying a given 414 * function to the primary process including the code to perform to prepare the 415 * request and process the response. 416 * 417 * @param[in] f 418 * Name of the function to proxy 419 * @param[in] t 420 * Message type to use 421 * @param[in] prep 422 * Body of a function to prepare the request in form of a statement 423 * expression. It is passed all the original function arguments along with two 424 * extra ones: 425 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 426 * - struct ena_mp_body *req - body of a request to prepare. 427 * @param[in] proc 428 * Body of a function to process the response in form of a statement 429 * expression. It is passed all the original function arguments along with two 430 * extra ones: 431 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 432 * - struct ena_mp_body *rsp - body of a response to process. 433 * @param ... 434 * Proxied function's arguments 435 * 436 * @note Inside prep and proc any parameters which aren't used should be marked 437 * as such (with ENA_TOUCH or __rte_unused). 438 */ 439 #define ENA_PROXY_DESC(f, t, prep, proc, ...) \ 440 static const enum ena_mp_req mp_type_ ## f = t; \ 441 static const char *mp_name_ ## f = #t; \ 442 static void mp_prep_ ## f(struct ena_adapter *adapter, \ 443 struct ena_mp_body *req, \ 444 __VA_ARGS__) \ 445 { \ 446 prep; \ 447 } \ 448 static void mp_proc_ ## f(struct ena_adapter *adapter, \ 449 struct ena_mp_body *rsp, \ 450 __VA_ARGS__) \ 451 { \ 452 proc; \ 453 } 454 455 /** 456 * Proxy wrapper for calling primary functions in a secondary process. 457 * 458 * Depending on whether called in primary or secondary process, calls the 459 * @p func directly or proxies the call to the primary process via rte_mp IPC. 460 * This macro requires a proxy request descriptor to be defined for @p func 461 * using ENA_PROXY_DESC() macro. 462 * 463 * @param[in/out] a 464 * Device PMD data. Used for sending the message and sharing message results 465 * between primary and secondary. 466 * @param[in] f 467 * Function to proxy. 468 * @param ... 469 * Arguments of @p func. 470 * 471 * @return 472 * - 0: Processing succeeded and response handler was called. 473 * - -EPERM: IPC is unavailable on this platform. This means only primary 474 * process may call the proxied function. 475 * - -EIO: IPC returned error on request send. Inspect rte_errno detailed 476 * error code. 477 * - Negative error code from the proxied function. 478 * 479 * @note This mechanism is geared towards control-path tasks. Avoid calling it 480 * in fast-path unless unbound delays are allowed. This is due to the IPC 481 * mechanism itself (socket based). 482 * @note Due to IPC parameter size limitations the proxy logic shares call 483 * results through the struct ena_adapter shared memory. This makes the 484 * proxy mechanism strictly single-threaded. Therefore be sure to make all 485 * calls to the same proxied function under the same lock. 486 */ 487 #define ENA_PROXY(a, f, ...) \ 488 __extension__ ({ \ 489 struct ena_adapter *_a = (a); \ 490 struct timespec ts = { .tv_sec = ENA_MP_REQ_TMO }; \ 491 struct ena_mp_body *req, *rsp; \ 492 struct rte_mp_reply mp_rep; \ 493 struct rte_mp_msg mp_req; \ 494 int ret; \ 495 \ 496 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { \ 497 ret = f(__VA_ARGS__); \ 498 } else { \ 499 /* Prepare and send request */ \ 500 req = (struct ena_mp_body *)&mp_req.param; \ 501 mp_msg_init(&mp_req, mp_type_ ## f, _a->edev_data->port_id); \ 502 mp_prep_ ## f(_a, req, ## __VA_ARGS__); \ 503 \ 504 ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); \ 505 if (likely(!ret)) { \ 506 RTE_ASSERT(mp_rep.nb_received == 1); \ 507 rsp = (struct ena_mp_body *)&mp_rep.msgs[0].param; \ 508 ret = rsp->result; \ 509 if (ret == 0) { \ 510 mp_proc_##f(_a, rsp, ## __VA_ARGS__); \ 511 } else { \ 512 PMD_DRV_LOG_LINE(ERR, \ 513 "%s returned error: %d", \ 514 mp_name_ ## f, rsp->result);\ 515 } \ 516 free(mp_rep.msgs); \ 517 } else if (rte_errno == ENOTSUP) { \ 518 PMD_DRV_LOG_LINE(ERR, \ 519 "No IPC, can't proxy to primary");\ 520 ret = -rte_errno; \ 521 } else { \ 522 PMD_DRV_LOG_LINE(ERR, "Request %s failed: %s", \ 523 mp_name_ ## f, \ 524 rte_strerror(rte_errno)); \ 525 ret = -EIO; \ 526 } \ 527 } \ 528 ret; \ 529 }) 530 531 /********************************************************************* 532 * Multi-Process communication request descriptors 533 *********************************************************************/ 534 535 ENA_PROXY_DESC(ena_com_get_dev_basic_stats, ENA_MP_DEV_STATS_GET, 536 __extension__ ({ 537 ENA_TOUCH(adapter); 538 ENA_TOUCH(req); 539 ENA_TOUCH(ena_dev); 540 ENA_TOUCH(stats); 541 }), 542 __extension__ ({ 543 ENA_TOUCH(rsp); 544 ENA_TOUCH(ena_dev); 545 if (stats != &adapter->basic_stats) 546 rte_memcpy(stats, &adapter->basic_stats, sizeof(*stats)); 547 }), 548 struct ena_com_dev *ena_dev, struct ena_admin_basic_stats *stats); 549 550 ENA_PROXY_DESC(ena_com_get_eni_stats, ENA_MP_ENI_STATS_GET, 551 __extension__ ({ 552 ENA_TOUCH(adapter); 553 ENA_TOUCH(req); 554 ENA_TOUCH(ena_dev); 555 ENA_TOUCH(stats); 556 }), 557 __extension__ ({ 558 ENA_TOUCH(rsp); 559 ENA_TOUCH(ena_dev); 560 if (stats != (struct ena_admin_eni_stats *)adapter->metrics_stats) 561 rte_memcpy(stats, adapter->metrics_stats, sizeof(*stats)); 562 }), 563 struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats); 564 565 ENA_PROXY_DESC(ena_com_set_dev_mtu, ENA_MP_MTU_SET, 566 __extension__ ({ 567 ENA_TOUCH(adapter); 568 ENA_TOUCH(ena_dev); 569 req->args.mtu = mtu; 570 }), 571 __extension__ ({ 572 ENA_TOUCH(adapter); 573 ENA_TOUCH(rsp); 574 ENA_TOUCH(ena_dev); 575 ENA_TOUCH(mtu); 576 }), 577 struct ena_com_dev *ena_dev, int mtu); 578 579 ENA_PROXY_DESC(ena_com_indirect_table_set, ENA_MP_IND_TBL_SET, 580 __extension__ ({ 581 ENA_TOUCH(adapter); 582 ENA_TOUCH(req); 583 ENA_TOUCH(ena_dev); 584 }), 585 __extension__ ({ 586 ENA_TOUCH(adapter); 587 ENA_TOUCH(rsp); 588 ENA_TOUCH(ena_dev); 589 }), 590 struct ena_com_dev *ena_dev); 591 592 ENA_PROXY_DESC(ena_com_indirect_table_get, ENA_MP_IND_TBL_GET, 593 __extension__ ({ 594 ENA_TOUCH(adapter); 595 ENA_TOUCH(req); 596 ENA_TOUCH(ena_dev); 597 ENA_TOUCH(ind_tbl); 598 }), 599 __extension__ ({ 600 ENA_TOUCH(rsp); 601 ENA_TOUCH(ena_dev); 602 if (ind_tbl != adapter->indirect_table) 603 rte_memcpy(ind_tbl, adapter->indirect_table, 604 sizeof(adapter->indirect_table)); 605 }), 606 struct ena_com_dev *ena_dev, u32 *ind_tbl); 607 608 ENA_PROXY_DESC(ena_com_get_customer_metrics, ENA_MP_CUSTOMER_METRICS_GET, 609 __extension__ ({ 610 ENA_TOUCH(adapter); 611 ENA_TOUCH(req); 612 ENA_TOUCH(ena_dev); 613 ENA_TOUCH(buf); 614 ENA_TOUCH(buf_size); 615 }), 616 __extension__ ({ 617 ENA_TOUCH(rsp); 618 ENA_TOUCH(ena_dev); 619 if (buf != (char *)adapter->metrics_stats) 620 rte_memcpy(buf, adapter->metrics_stats, buf_size); 621 }), 622 struct ena_com_dev *ena_dev, char *buf, size_t buf_size); 623 624 ENA_PROXY_DESC(ena_com_get_ena_srd_info, ENA_MP_SRD_STATS_GET, 625 __extension__ ({ 626 ENA_TOUCH(adapter); 627 ENA_TOUCH(req); 628 ENA_TOUCH(ena_dev); 629 ENA_TOUCH(info); 630 }), 631 __extension__ ({ 632 ENA_TOUCH(rsp); 633 ENA_TOUCH(ena_dev); 634 if ((struct ena_stats_srd *)info != &adapter->srd_stats) 635 rte_memcpy((struct ena_stats_srd *)info, 636 &adapter->srd_stats, 637 sizeof(struct ena_stats_srd)); 638 }), 639 struct ena_com_dev *ena_dev, struct ena_admin_ena_srd_info *info); 640 641 static inline void ena_trigger_reset(struct ena_adapter *adapter, 642 enum ena_regs_reset_reason_types reason) 643 { 644 if (likely(!adapter->trigger_reset)) { 645 adapter->reset_reason = reason; 646 adapter->trigger_reset = true; 647 } 648 } 649 650 static inline void ena_rx_mbuf_prepare(struct ena_ring *rx_ring, 651 struct rte_mbuf *mbuf, 652 struct ena_com_rx_ctx *ena_rx_ctx) 653 { 654 struct ena_stats_rx *rx_stats = &rx_ring->rx_stats; 655 uint64_t ol_flags = 0; 656 uint32_t packet_type = 0; 657 658 switch (ena_rx_ctx->l3_proto) { 659 case ENA_ETH_IO_L3_PROTO_IPV4: 660 packet_type |= RTE_PTYPE_L3_IPV4; 661 if (unlikely(ena_rx_ctx->l3_csum_err)) { 662 ++rx_stats->l3_csum_bad; 663 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 664 } else { 665 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 666 } 667 break; 668 case ENA_ETH_IO_L3_PROTO_IPV6: 669 packet_type |= RTE_PTYPE_L3_IPV6; 670 break; 671 default: 672 break; 673 } 674 675 switch (ena_rx_ctx->l4_proto) { 676 case ENA_ETH_IO_L4_PROTO_TCP: 677 packet_type |= RTE_PTYPE_L4_TCP; 678 break; 679 case ENA_ETH_IO_L4_PROTO_UDP: 680 packet_type |= RTE_PTYPE_L4_UDP; 681 break; 682 default: 683 break; 684 } 685 686 /* L4 csum is relevant only for TCP/UDP packets */ 687 if ((packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP)) && !ena_rx_ctx->frag) { 688 if (ena_rx_ctx->l4_csum_checked) { 689 if (likely(!ena_rx_ctx->l4_csum_err)) { 690 ++rx_stats->l4_csum_good; 691 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 692 } else { 693 ++rx_stats->l4_csum_bad; 694 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 695 } 696 } else { 697 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 698 } 699 700 if (rx_ring->offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH) { 701 ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 702 mbuf->hash.rss = ena_rx_ctx->hash; 703 } 704 } else { 705 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 706 } 707 708 mbuf->ol_flags = ol_flags; 709 mbuf->packet_type = packet_type; 710 } 711 712 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, 713 struct ena_com_tx_ctx *ena_tx_ctx, 714 uint64_t queue_offloads, 715 bool disable_meta_caching) 716 { 717 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; 718 719 if ((mbuf->ol_flags & MBUF_OFFLOADS) && 720 (queue_offloads & QUEUE_OFFLOADS)) { 721 /* check if TSO is required */ 722 if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) && 723 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)) { 724 ena_tx_ctx->tso_enable = true; 725 726 ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf); 727 } 728 729 /* check if L3 checksum is needed */ 730 if ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) && 731 (queue_offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) 732 ena_tx_ctx->l3_csum_enable = true; 733 734 if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV6) { 735 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 736 /* For the IPv6 packets, DF always needs to be true. */ 737 ena_tx_ctx->df = 1; 738 } else { 739 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 740 741 /* set don't fragment (DF) flag */ 742 if (mbuf->packet_type & 743 (RTE_PTYPE_L4_NONFRAG 744 | RTE_PTYPE_INNER_L4_NONFRAG)) 745 ena_tx_ctx->df = 1; 746 } 747 748 /* check if L4 checksum is needed */ 749 if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) && 750 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) { 751 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 752 ena_tx_ctx->l4_csum_enable = true; 753 } else if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 754 RTE_MBUF_F_TX_UDP_CKSUM) && 755 (queue_offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) { 756 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 757 ena_tx_ctx->l4_csum_enable = true; 758 } else { 759 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 760 ena_tx_ctx->l4_csum_enable = false; 761 } 762 763 ena_meta->mss = mbuf->tso_segsz; 764 ena_meta->l3_hdr_len = mbuf->l3_len; 765 ena_meta->l3_hdr_offset = mbuf->l2_len; 766 767 ena_tx_ctx->meta_valid = true; 768 } else if (disable_meta_caching) { 769 memset(ena_meta, 0, sizeof(*ena_meta)); 770 ena_tx_ctx->meta_valid = true; 771 } else { 772 ena_tx_ctx->meta_valid = false; 773 } 774 } 775 776 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 777 { 778 struct ena_tx_buffer *tx_info = NULL; 779 780 if (likely(req_id < tx_ring->ring_size)) { 781 tx_info = &tx_ring->tx_buffer_info[req_id]; 782 if (likely(tx_info->mbuf)) 783 return 0; 784 } 785 786 if (tx_info) 787 PMD_TX_LOG_LINE(ERR, "tx_info doesn't have valid mbuf. queue %d:%d req_id %u", 788 tx_ring->port_id, tx_ring->id, req_id); 789 else 790 PMD_TX_LOG_LINE(ERR, "Invalid req_id: %hu in queue %d:%d", 791 req_id, tx_ring->port_id, tx_ring->id); 792 793 /* Trigger device reset */ 794 ++tx_ring->tx_stats.bad_req_id; 795 ena_trigger_reset(tx_ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID); 796 return -EFAULT; 797 } 798 799 static void ena_config_host_info(struct ena_com_dev *ena_dev) 800 { 801 struct ena_admin_host_info *host_info; 802 int rc; 803 804 /* Allocate only the host info */ 805 rc = ena_com_allocate_host_info(ena_dev); 806 if (rc) { 807 PMD_DRV_LOG_LINE(ERR, "Cannot allocate host info"); 808 return; 809 } 810 811 host_info = ena_dev->host_attr.host_info; 812 813 host_info->os_type = ENA_ADMIN_OS_DPDK; 814 host_info->kernel_ver = RTE_VERSION; 815 strlcpy((char *)host_info->kernel_ver_str, rte_version(), 816 sizeof(host_info->kernel_ver_str)); 817 host_info->os_dist = RTE_VERSION; 818 strlcpy((char *)host_info->os_dist_str, rte_version(), 819 sizeof(host_info->os_dist_str)); 820 host_info->driver_version = 821 (DRV_MODULE_VER_MAJOR) | 822 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 823 (DRV_MODULE_VER_SUBMINOR << 824 ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 825 host_info->num_cpus = rte_lcore_count(); 826 827 host_info->driver_supported_features = 828 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 829 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 830 831 rc = ena_com_set_host_attributes(ena_dev); 832 if (rc) { 833 if (rc == ENA_COM_UNSUPPORTED) 834 PMD_DRV_LOG_LINE(WARNING, "Cannot set host attributes"); 835 else 836 PMD_DRV_LOG_LINE(ERR, "Cannot set host attributes"); 837 838 goto err; 839 } 840 841 return; 842 843 err: 844 ena_com_delete_host_info(ena_dev); 845 } 846 847 /* This function calculates the number of xstats based on the current config */ 848 static unsigned int ena_xstats_calc_num(struct rte_eth_dev_data *data) 849 { 850 struct ena_adapter *adapter = data->dev_private; 851 852 return ENA_STATS_ARRAY_GLOBAL + 853 adapter->metrics_num + 854 ENA_STATS_ARRAY_ENA_SRD + 855 (data->nb_tx_queues * ENA_STATS_ARRAY_TX) + 856 (data->nb_rx_queues * ENA_STATS_ARRAY_RX); 857 } 858 859 static void ena_config_debug_area(struct ena_adapter *adapter) 860 { 861 u32 debug_area_size; 862 int rc, ss_count; 863 864 ss_count = ena_xstats_calc_num(adapter->edev_data); 865 866 /* allocate 32 bytes for each string and 64bit for the value */ 867 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; 868 869 rc = ena_com_allocate_debug_area(&adapter->ena_dev, debug_area_size); 870 if (rc) { 871 PMD_DRV_LOG_LINE(ERR, "Cannot allocate debug area"); 872 return; 873 } 874 875 rc = ena_com_set_host_attributes(&adapter->ena_dev); 876 if (rc) { 877 if (rc == ENA_COM_UNSUPPORTED) 878 PMD_DRV_LOG_LINE(WARNING, "Cannot set host attributes"); 879 else 880 PMD_DRV_LOG_LINE(ERR, "Cannot set host attributes"); 881 882 goto err; 883 } 884 885 return; 886 err: 887 ena_com_delete_debug_area(&adapter->ena_dev); 888 } 889 890 static int ena_close(struct rte_eth_dev *dev) 891 { 892 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 893 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 894 struct ena_adapter *adapter = dev->data->dev_private; 895 struct ena_com_dev *ena_dev = &adapter->ena_dev; 896 int ret = 0; 897 int rc; 898 899 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 900 return 0; 901 902 if (adapter->state == ENA_ADAPTER_STATE_CLOSED) 903 return 0; 904 905 if (adapter->state == ENA_ADAPTER_STATE_RUNNING) 906 ret = ena_stop(dev); 907 adapter->state = ENA_ADAPTER_STATE_CLOSED; 908 909 if (!adapter->control_path_poll_interval) { 910 rte_intr_disable(intr_handle); 911 rc = rte_intr_callback_unregister_sync(intr_handle, ena_control_path_handler, dev); 912 if (unlikely(rc != 0)) 913 PMD_INIT_LOG_LINE(ERR, "Failed to unregister interrupt handler"); 914 } else { 915 rte_eal_alarm_cancel(ena_control_path_poll_handler, dev); 916 } 917 918 ena_rx_queue_release_all(dev); 919 ena_tx_queue_release_all(dev); 920 921 rte_free(adapter->drv_stats); 922 adapter->drv_stats = NULL; 923 924 ena_com_set_admin_running_state(ena_dev, false); 925 926 ena_com_rss_destroy(ena_dev); 927 928 ena_com_delete_debug_area(ena_dev); 929 ena_com_delete_host_info(ena_dev); 930 931 ena_com_abort_admin_commands(ena_dev); 932 ena_com_wait_for_abort_completion(ena_dev); 933 ena_com_admin_destroy(ena_dev); 934 ena_com_mmio_reg_read_request_destroy(ena_dev); 935 ena_com_delete_customer_metrics_buffer(ena_dev); 936 937 /* 938 * MAC is not allocated dynamically. Setting NULL should prevent from 939 * release of the resource in the rte_eth_dev_release_port(). 940 */ 941 dev->data->mac_addrs = NULL; 942 943 return ret; 944 } 945 946 static int 947 ena_dev_reset(struct rte_eth_dev *dev) 948 { 949 int rc = 0; 950 951 /* Cannot release memory in secondary process */ 952 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 953 PMD_DRV_LOG_LINE(WARNING, "dev_reset not supported in secondary."); 954 return -EPERM; 955 } 956 957 rc = eth_ena_dev_uninit(dev); 958 if (rc) { 959 PMD_INIT_LOG_LINE(CRIT, "Failed to un-initialize device"); 960 return rc; 961 } 962 963 rc = eth_ena_dev_init(dev); 964 if (rc) 965 PMD_INIT_LOG_LINE(CRIT, "Cannot initialize device"); 966 967 return rc; 968 } 969 970 static void ena_rx_queue_release_all(struct rte_eth_dev *dev) 971 { 972 int nb_queues = dev->data->nb_rx_queues; 973 int i; 974 975 for (i = 0; i < nb_queues; i++) 976 ena_rx_queue_release(dev, i); 977 } 978 979 static void ena_tx_queue_release_all(struct rte_eth_dev *dev) 980 { 981 int nb_queues = dev->data->nb_tx_queues; 982 int i; 983 984 for (i = 0; i < nb_queues; i++) 985 ena_tx_queue_release(dev, i); 986 } 987 988 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 989 { 990 struct ena_ring *ring = dev->data->rx_queues[qid]; 991 992 /* Free ring resources */ 993 rte_free(ring->rx_buffer_info); 994 ring->rx_buffer_info = NULL; 995 996 rte_free(ring->rx_refill_buffer); 997 ring->rx_refill_buffer = NULL; 998 999 rte_free(ring->empty_rx_reqs); 1000 ring->empty_rx_reqs = NULL; 1001 1002 ring->configured = 0; 1003 1004 PMD_DRV_LOG_LINE(NOTICE, "Rx queue %d:%d released", 1005 ring->port_id, ring->id); 1006 } 1007 1008 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 1009 { 1010 struct ena_ring *ring = dev->data->tx_queues[qid]; 1011 1012 /* Free ring resources */ 1013 rte_free(ring->push_buf_intermediate_buf); 1014 1015 rte_free(ring->tx_buffer_info); 1016 1017 rte_free(ring->empty_tx_reqs); 1018 1019 ring->empty_tx_reqs = NULL; 1020 ring->tx_buffer_info = NULL; 1021 ring->push_buf_intermediate_buf = NULL; 1022 1023 ring->configured = 0; 1024 1025 PMD_DRV_LOG_LINE(NOTICE, "Tx queue %d:%d released", 1026 ring->port_id, ring->id); 1027 } 1028 1029 static void ena_rx_queue_release_bufs(struct ena_ring *ring) 1030 { 1031 unsigned int i; 1032 1033 for (i = 0; i < ring->ring_size; ++i) { 1034 struct ena_rx_buffer *rx_info = &ring->rx_buffer_info[i]; 1035 if (rx_info->mbuf) { 1036 rte_mbuf_raw_free(rx_info->mbuf); 1037 rx_info->mbuf = NULL; 1038 } 1039 } 1040 } 1041 1042 static void ena_tx_queue_release_bufs(struct ena_ring *ring) 1043 { 1044 unsigned int i; 1045 1046 for (i = 0; i < ring->ring_size; ++i) { 1047 struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i]; 1048 1049 if (tx_buf->mbuf) { 1050 rte_pktmbuf_free(tx_buf->mbuf); 1051 tx_buf->mbuf = NULL; 1052 } 1053 } 1054 } 1055 1056 static int ena_link_update(struct rte_eth_dev *dev, 1057 __rte_unused int wait_to_complete) 1058 { 1059 struct rte_eth_link *link = &dev->data->dev_link; 1060 struct ena_adapter *adapter = dev->data->dev_private; 1061 1062 link->link_status = adapter->link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN; 1063 link->link_speed = RTE_ETH_SPEED_NUM_NONE; 1064 link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX; 1065 1066 return 0; 1067 } 1068 1069 static int ena_queue_start_all(struct rte_eth_dev *dev, 1070 enum ena_ring_type ring_type) 1071 { 1072 struct ena_adapter *adapter = dev->data->dev_private; 1073 struct ena_ring *queues = NULL; 1074 int nb_queues; 1075 int i = 0; 1076 int rc = 0; 1077 1078 if (ring_type == ENA_RING_TYPE_RX) { 1079 queues = adapter->rx_ring; 1080 nb_queues = dev->data->nb_rx_queues; 1081 } else { 1082 queues = adapter->tx_ring; 1083 nb_queues = dev->data->nb_tx_queues; 1084 } 1085 for (i = 0; i < nb_queues; i++) { 1086 if (queues[i].configured) { 1087 if (ring_type == ENA_RING_TYPE_RX) { 1088 ena_assert_msg( 1089 dev->data->rx_queues[i] == &queues[i], 1090 "Inconsistent state of Rx queues\n"); 1091 } else { 1092 ena_assert_msg( 1093 dev->data->tx_queues[i] == &queues[i], 1094 "Inconsistent state of Tx queues\n"); 1095 } 1096 1097 rc = ena_queue_start(dev, &queues[i]); 1098 1099 if (rc) { 1100 PMD_INIT_LOG_LINE(ERR, 1101 "Failed to start queue[%d] of type(%d)", 1102 i, ring_type); 1103 goto err; 1104 } 1105 } 1106 } 1107 1108 return 0; 1109 1110 err: 1111 while (i--) 1112 if (queues[i].configured) 1113 ena_queue_stop(&queues[i]); 1114 1115 return rc; 1116 } 1117 1118 static int 1119 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx, 1120 bool use_large_llq_hdr) 1121 { 1122 struct ena_admin_feature_llq_desc *dev = &ctx->get_feat_ctx->llq; 1123 struct ena_com_dev *ena_dev = ctx->ena_dev; 1124 uint32_t max_tx_queue_size; 1125 uint32_t max_rx_queue_size; 1126 1127 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1128 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1129 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 1130 max_rx_queue_size = RTE_MIN(max_queue_ext->max_rx_cq_depth, 1131 max_queue_ext->max_rx_sq_depth); 1132 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 1133 1134 if (ena_dev->tx_mem_queue_type == 1135 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1136 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1137 dev->max_llq_depth); 1138 } else { 1139 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1140 max_queue_ext->max_tx_sq_depth); 1141 } 1142 1143 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1144 max_queue_ext->max_per_packet_rx_descs); 1145 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1146 max_queue_ext->max_per_packet_tx_descs); 1147 } else { 1148 struct ena_admin_queue_feature_desc *max_queues = 1149 &ctx->get_feat_ctx->max_queues; 1150 max_rx_queue_size = RTE_MIN(max_queues->max_cq_depth, 1151 max_queues->max_sq_depth); 1152 max_tx_queue_size = max_queues->max_cq_depth; 1153 1154 if (ena_dev->tx_mem_queue_type == 1155 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1156 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1157 dev->max_llq_depth); 1158 } else { 1159 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1160 max_queues->max_sq_depth); 1161 } 1162 1163 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1164 max_queues->max_packet_rx_descs); 1165 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1166 max_queues->max_packet_tx_descs); 1167 } 1168 1169 /* Round down to the nearest power of 2 */ 1170 max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size); 1171 max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size); 1172 1173 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && use_large_llq_hdr) { 1174 /* intersection between driver configuration and device capabilities */ 1175 if (dev->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) { 1176 if (dev->max_wide_llq_depth == MAX_WIDE_LLQ_DEPTH_UNSUPPORTED) { 1177 /* Devices that do not support the double-sized ENA memory BAR will 1178 * report max_wide_llq_depth as 0. In such case, driver halves the 1179 * queue depth when working in large llq policy. 1180 */ 1181 max_tx_queue_size >>= 1; 1182 PMD_INIT_LOG_LINE(INFO, 1183 "large LLQ policy requires limiting Tx queue size to %u entries", 1184 max_tx_queue_size); 1185 } else if (dev->max_wide_llq_depth < max_tx_queue_size) { 1186 /* In case the queue depth that the driver calculated exceeds 1187 * the maximal value that the device allows, it will be limited 1188 * to that maximal value 1189 */ 1190 max_tx_queue_size = dev->max_wide_llq_depth; 1191 } 1192 } else { 1193 PMD_INIT_LOG_LINE(INFO, 1194 "Forcing large LLQ headers failed since device lacks this support"); 1195 } 1196 } 1197 1198 if (unlikely(max_rx_queue_size == 0 || max_tx_queue_size == 0)) { 1199 PMD_INIT_LOG_LINE(ERR, "Invalid queue size"); 1200 return -EFAULT; 1201 } 1202 1203 ctx->max_tx_queue_size = max_tx_queue_size; 1204 ctx->max_rx_queue_size = max_rx_queue_size; 1205 1206 PMD_DRV_LOG_LINE(INFO, "tx queue size %u", max_tx_queue_size); 1207 return 0; 1208 } 1209 1210 static void ena_stats_restart(struct rte_eth_dev *dev) 1211 { 1212 struct ena_adapter *adapter = dev->data->dev_private; 1213 1214 rte_atomic64_init(&adapter->drv_stats->ierrors); 1215 rte_atomic64_init(&adapter->drv_stats->oerrors); 1216 rte_atomic64_init(&adapter->drv_stats->rx_nombuf); 1217 adapter->drv_stats->rx_drops = 0; 1218 } 1219 1220 static int ena_stats_get(struct rte_eth_dev *dev, 1221 struct rte_eth_stats *stats) 1222 { 1223 struct ena_admin_basic_stats ena_stats; 1224 struct ena_adapter *adapter = dev->data->dev_private; 1225 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1226 int rc; 1227 int i; 1228 int max_rings_stats; 1229 1230 memset(&ena_stats, 0, sizeof(ena_stats)); 1231 1232 rte_spinlock_lock(&adapter->admin_lock); 1233 rc = ENA_PROXY(adapter, ena_com_get_dev_basic_stats, ena_dev, 1234 &ena_stats); 1235 rte_spinlock_unlock(&adapter->admin_lock); 1236 if (unlikely(rc)) { 1237 PMD_DRV_LOG_LINE(ERR, "Could not retrieve statistics from ENA"); 1238 return rc; 1239 } 1240 1241 /* Set of basic statistics from ENA */ 1242 stats->ipackets = __MERGE_64B_H_L(ena_stats.rx_pkts_high, 1243 ena_stats.rx_pkts_low); 1244 stats->opackets = __MERGE_64B_H_L(ena_stats.tx_pkts_high, 1245 ena_stats.tx_pkts_low); 1246 stats->ibytes = __MERGE_64B_H_L(ena_stats.rx_bytes_high, 1247 ena_stats.rx_bytes_low); 1248 stats->obytes = __MERGE_64B_H_L(ena_stats.tx_bytes_high, 1249 ena_stats.tx_bytes_low); 1250 1251 /* Driver related stats */ 1252 stats->imissed = adapter->drv_stats->rx_drops; 1253 stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors); 1254 stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors); 1255 stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf); 1256 1257 max_rings_stats = RTE_MIN(dev->data->nb_rx_queues, 1258 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1259 for (i = 0; i < max_rings_stats; ++i) { 1260 struct ena_stats_rx *rx_stats = &adapter->rx_ring[i].rx_stats; 1261 1262 stats->q_ibytes[i] = rx_stats->bytes; 1263 stats->q_ipackets[i] = rx_stats->cnt; 1264 stats->q_errors[i] = rx_stats->bad_desc_num + 1265 rx_stats->bad_req_id; 1266 } 1267 1268 max_rings_stats = RTE_MIN(dev->data->nb_tx_queues, 1269 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1270 for (i = 0; i < max_rings_stats; ++i) { 1271 struct ena_stats_tx *tx_stats = &adapter->tx_ring[i].tx_stats; 1272 1273 stats->q_obytes[i] = tx_stats->bytes; 1274 stats->q_opackets[i] = tx_stats->cnt; 1275 } 1276 1277 return 0; 1278 } 1279 1280 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 1281 { 1282 struct ena_adapter *adapter; 1283 struct ena_com_dev *ena_dev; 1284 int rc = 0; 1285 1286 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 1287 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 1288 adapter = dev->data->dev_private; 1289 1290 ena_dev = &adapter->ena_dev; 1291 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 1292 1293 rc = ENA_PROXY(adapter, ena_com_set_dev_mtu, ena_dev, mtu); 1294 if (rc) 1295 PMD_DRV_LOG_LINE(ERR, "Could not set MTU: %d", mtu); 1296 else 1297 PMD_DRV_LOG_LINE(NOTICE, "MTU set to: %d", mtu); 1298 1299 return rc; 1300 } 1301 1302 static int ena_start(struct rte_eth_dev *dev) 1303 { 1304 struct ena_adapter *adapter = dev->data->dev_private; 1305 uint64_t ticks; 1306 int rc = 0; 1307 uint16_t i; 1308 1309 /* Cannot allocate memory in secondary process */ 1310 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1311 PMD_DRV_LOG_LINE(WARNING, "dev_start not supported in secondary."); 1312 return -EPERM; 1313 } 1314 1315 rc = ena_setup_rx_intr(dev); 1316 if (rc) 1317 return rc; 1318 1319 rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX); 1320 if (rc) 1321 return rc; 1322 1323 rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX); 1324 if (rc) 1325 goto err_start_tx; 1326 1327 if (adapter->edev_data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { 1328 rc = ena_rss_configure(adapter); 1329 if (rc) 1330 goto err_rss_init; 1331 } 1332 1333 ena_stats_restart(dev); 1334 1335 adapter->timestamp_wd = rte_get_timer_cycles(); 1336 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT; 1337 1338 ticks = rte_get_timer_hz(); 1339 rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(), 1340 ena_timer_wd_callback, dev); 1341 1342 ++adapter->dev_stats.dev_start; 1343 adapter->state = ENA_ADAPTER_STATE_RUNNING; 1344 1345 for (i = 0; i < dev->data->nb_rx_queues; i++) 1346 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1347 for (i = 0; i < dev->data->nb_tx_queues; i++) 1348 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1349 1350 return 0; 1351 1352 err_rss_init: 1353 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1354 err_start_tx: 1355 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1356 return rc; 1357 } 1358 1359 static int ena_stop(struct rte_eth_dev *dev) 1360 { 1361 struct ena_adapter *adapter = dev->data->dev_private; 1362 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1363 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1364 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1365 uint16_t i; 1366 int rc; 1367 1368 /* Cannot free memory in secondary process */ 1369 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1370 PMD_DRV_LOG_LINE(WARNING, "dev_stop not supported in secondary."); 1371 return -EPERM; 1372 } 1373 1374 rte_timer_stop_sync(&adapter->timer_wd); 1375 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1376 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1377 1378 if (adapter->trigger_reset) { 1379 rc = ena_com_dev_reset(ena_dev, adapter->reset_reason); 1380 if (rc) 1381 PMD_DRV_LOG_LINE(ERR, "Device reset failed, rc: %d", rc); 1382 } 1383 1384 rte_intr_disable(intr_handle); 1385 1386 rte_intr_efd_disable(intr_handle); 1387 1388 /* Cleanup vector list */ 1389 rte_intr_vec_list_free(intr_handle); 1390 1391 rte_intr_enable(intr_handle); 1392 1393 ++adapter->dev_stats.dev_stop; 1394 adapter->state = ENA_ADAPTER_STATE_STOPPED; 1395 dev->data->dev_started = 0; 1396 1397 for (i = 0; i < dev->data->nb_rx_queues; i++) 1398 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1399 for (i = 0; i < dev->data->nb_tx_queues; i++) 1400 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1401 1402 return 0; 1403 } 1404 1405 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring) 1406 { 1407 struct ena_adapter *adapter = ring->adapter; 1408 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1409 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1410 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1411 struct ena_com_create_io_ctx ctx = 1412 /* policy set to _HOST just to satisfy icc compiler */ 1413 { ENA_ADMIN_PLACEMENT_POLICY_HOST, 1414 0, 0, 0, 0, 0 }; 1415 uint16_t ena_qid; 1416 unsigned int i; 1417 int rc; 1418 1419 ctx.msix_vector = -1; 1420 if (ring->type == ENA_RING_TYPE_TX) { 1421 ena_qid = ENA_IO_TXQ_IDX(ring->id); 1422 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1423 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1424 for (i = 0; i < ring->ring_size; i++) 1425 ring->empty_tx_reqs[i] = i; 1426 } else { 1427 ena_qid = ENA_IO_RXQ_IDX(ring->id); 1428 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1429 if (rte_intr_dp_is_en(intr_handle)) 1430 ctx.msix_vector = 1431 rte_intr_vec_list_index_get(intr_handle, 1432 ring->id); 1433 1434 for (i = 0; i < ring->ring_size; i++) 1435 ring->empty_rx_reqs[i] = i; 1436 } 1437 ctx.queue_size = ring->ring_size; 1438 ctx.qid = ena_qid; 1439 ctx.numa_node = ring->numa_socket_id; 1440 1441 rc = ena_com_create_io_queue(ena_dev, &ctx); 1442 if (rc) { 1443 PMD_DRV_LOG_LINE(ERR, 1444 "Failed to create IO queue[%d] (qid:%d), rc: %d", 1445 ring->id, ena_qid, rc); 1446 return rc; 1447 } 1448 1449 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1450 &ring->ena_com_io_sq, 1451 &ring->ena_com_io_cq); 1452 if (rc) { 1453 PMD_DRV_LOG_LINE(ERR, 1454 "Failed to get IO queue[%d] handlers, rc: %d", 1455 ring->id, rc); 1456 ena_com_destroy_io_queue(ena_dev, ena_qid); 1457 return rc; 1458 } 1459 1460 if (ring->type == ENA_RING_TYPE_TX) 1461 ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node); 1462 1463 /* Start with Rx interrupts being masked. */ 1464 if (ring->type == ENA_RING_TYPE_RX && rte_intr_dp_is_en(intr_handle)) 1465 ena_rx_queue_intr_disable(dev, ring->id); 1466 1467 return 0; 1468 } 1469 1470 static void ena_queue_stop(struct ena_ring *ring) 1471 { 1472 struct ena_com_dev *ena_dev = &ring->adapter->ena_dev; 1473 1474 if (ring->type == ENA_RING_TYPE_RX) { 1475 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id)); 1476 ena_rx_queue_release_bufs(ring); 1477 } else { 1478 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id)); 1479 ena_tx_queue_release_bufs(ring); 1480 } 1481 } 1482 1483 static void ena_queue_stop_all(struct rte_eth_dev *dev, 1484 enum ena_ring_type ring_type) 1485 { 1486 struct ena_adapter *adapter = dev->data->dev_private; 1487 struct ena_ring *queues = NULL; 1488 uint16_t nb_queues, i; 1489 1490 if (ring_type == ENA_RING_TYPE_RX) { 1491 queues = adapter->rx_ring; 1492 nb_queues = dev->data->nb_rx_queues; 1493 } else { 1494 queues = adapter->tx_ring; 1495 nb_queues = dev->data->nb_tx_queues; 1496 } 1497 1498 for (i = 0; i < nb_queues; ++i) 1499 if (queues[i].configured) 1500 ena_queue_stop(&queues[i]); 1501 } 1502 1503 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring) 1504 { 1505 int rc, bufs_num; 1506 1507 ena_assert_msg(ring->configured == 1, 1508 "Trying to start unconfigured queue\n"); 1509 1510 rc = ena_create_io_queue(dev, ring); 1511 if (rc) { 1512 PMD_INIT_LOG_LINE(ERR, "Failed to create IO queue"); 1513 return rc; 1514 } 1515 1516 ring->next_to_clean = 0; 1517 ring->next_to_use = 0; 1518 1519 if (ring->type == ENA_RING_TYPE_TX) { 1520 ring->tx_stats.available_desc = 1521 ena_com_free_q_entries(ring->ena_com_io_sq); 1522 return 0; 1523 } 1524 1525 bufs_num = ring->ring_size - 1; 1526 rc = ena_populate_rx_queue(ring, bufs_num); 1527 if (rc != bufs_num) { 1528 ena_com_destroy_io_queue(&ring->adapter->ena_dev, 1529 ENA_IO_RXQ_IDX(ring->id)); 1530 PMD_INIT_LOG_LINE(ERR, "Failed to populate Rx ring"); 1531 return ENA_COM_FAULT; 1532 } 1533 /* Flush per-core RX buffers pools cache as they can be used on other 1534 * cores as well. 1535 */ 1536 rte_mempool_cache_flush(NULL, ring->mb_pool); 1537 1538 return 0; 1539 } 1540 1541 static int ena_tx_queue_setup(struct rte_eth_dev *dev, 1542 uint16_t queue_idx, 1543 uint16_t nb_desc, 1544 unsigned int socket_id, 1545 const struct rte_eth_txconf *tx_conf) 1546 { 1547 struct ena_ring *txq = NULL; 1548 struct ena_adapter *adapter = dev->data->dev_private; 1549 unsigned int i; 1550 uint16_t dyn_thresh; 1551 1552 txq = &adapter->tx_ring[queue_idx]; 1553 1554 if (txq->configured) { 1555 PMD_DRV_LOG_LINE(CRIT, 1556 "API violation. Queue[%d] is already configured", 1557 queue_idx); 1558 return ENA_COM_FAULT; 1559 } 1560 1561 if (!rte_is_power_of_2(nb_desc)) { 1562 PMD_DRV_LOG_LINE(ERR, 1563 "Unsupported size of Tx queue: %d is not a power of 2.", 1564 nb_desc); 1565 return -EINVAL; 1566 } 1567 1568 if (nb_desc > adapter->max_tx_ring_size) { 1569 PMD_DRV_LOG_LINE(ERR, 1570 "Unsupported size of Tx queue (max size: %d)", 1571 adapter->max_tx_ring_size); 1572 return -EINVAL; 1573 } 1574 1575 txq->port_id = dev->data->port_id; 1576 txq->next_to_clean = 0; 1577 txq->next_to_use = 0; 1578 txq->ring_size = nb_desc; 1579 txq->size_mask = nb_desc - 1; 1580 txq->numa_socket_id = socket_id; 1581 txq->pkts_without_db = false; 1582 txq->last_cleanup_ticks = 0; 1583 1584 txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info", 1585 sizeof(struct ena_tx_buffer) * txq->ring_size, 1586 RTE_CACHE_LINE_SIZE, 1587 socket_id); 1588 if (!txq->tx_buffer_info) { 1589 PMD_DRV_LOG_LINE(ERR, 1590 "Failed to allocate memory for Tx buffer info"); 1591 return -ENOMEM; 1592 } 1593 1594 txq->empty_tx_reqs = rte_zmalloc_socket("txq->empty_tx_reqs", 1595 sizeof(uint16_t) * txq->ring_size, 1596 RTE_CACHE_LINE_SIZE, 1597 socket_id); 1598 if (!txq->empty_tx_reqs) { 1599 PMD_DRV_LOG_LINE(ERR, 1600 "Failed to allocate memory for empty Tx requests"); 1601 rte_free(txq->tx_buffer_info); 1602 return -ENOMEM; 1603 } 1604 1605 txq->push_buf_intermediate_buf = 1606 rte_zmalloc_socket("txq->push_buf_intermediate_buf", 1607 txq->tx_max_header_size, 1608 RTE_CACHE_LINE_SIZE, 1609 socket_id); 1610 if (!txq->push_buf_intermediate_buf) { 1611 PMD_DRV_LOG_LINE(ERR, "Failed to alloc push buffer for LLQ"); 1612 rte_free(txq->tx_buffer_info); 1613 rte_free(txq->empty_tx_reqs); 1614 return -ENOMEM; 1615 } 1616 1617 for (i = 0; i < txq->ring_size; i++) 1618 txq->empty_tx_reqs[i] = i; 1619 1620 txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads; 1621 1622 /* Check if caller provided the Tx cleanup threshold value. */ 1623 if (tx_conf->tx_free_thresh != 0) { 1624 txq->tx_free_thresh = tx_conf->tx_free_thresh; 1625 } else { 1626 dyn_thresh = txq->ring_size - 1627 txq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1628 txq->tx_free_thresh = RTE_MAX(dyn_thresh, 1629 txq->ring_size - ENA_REFILL_THRESH_PACKET); 1630 } 1631 1632 txq->missing_tx_completion_threshold = 1633 RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP); 1634 1635 /* Store pointer to this queue in upper layer */ 1636 txq->configured = 1; 1637 dev->data->tx_queues[queue_idx] = txq; 1638 1639 return 0; 1640 } 1641 1642 static int ena_rx_queue_setup(struct rte_eth_dev *dev, 1643 uint16_t queue_idx, 1644 uint16_t nb_desc, 1645 unsigned int socket_id, 1646 const struct rte_eth_rxconf *rx_conf, 1647 struct rte_mempool *mp) 1648 { 1649 struct ena_adapter *adapter = dev->data->dev_private; 1650 struct ena_ring *rxq = NULL; 1651 size_t buffer_size; 1652 int i; 1653 uint16_t dyn_thresh; 1654 1655 rxq = &adapter->rx_ring[queue_idx]; 1656 if (rxq->configured) { 1657 PMD_DRV_LOG_LINE(CRIT, 1658 "API violation. Queue[%d] is already configured", 1659 queue_idx); 1660 return ENA_COM_FAULT; 1661 } 1662 1663 if (!rte_is_power_of_2(nb_desc)) { 1664 PMD_DRV_LOG_LINE(ERR, 1665 "Unsupported size of Rx queue: %d is not a power of 2.", 1666 nb_desc); 1667 return -EINVAL; 1668 } 1669 1670 if (nb_desc > adapter->max_rx_ring_size) { 1671 PMD_DRV_LOG_LINE(ERR, 1672 "Unsupported size of Rx queue (max size: %d)", 1673 adapter->max_rx_ring_size); 1674 return -EINVAL; 1675 } 1676 1677 /* ENA isn't supporting buffers smaller than 1400 bytes */ 1678 buffer_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 1679 if (buffer_size < ENA_RX_BUF_MIN_SIZE) { 1680 PMD_DRV_LOG_LINE(ERR, 1681 "Unsupported size of Rx buffer: %zu (min size: %d)", 1682 buffer_size, ENA_RX_BUF_MIN_SIZE); 1683 return -EINVAL; 1684 } 1685 1686 rxq->port_id = dev->data->port_id; 1687 rxq->next_to_clean = 0; 1688 rxq->next_to_use = 0; 1689 rxq->ring_size = nb_desc; 1690 rxq->size_mask = nb_desc - 1; 1691 rxq->numa_socket_id = socket_id; 1692 rxq->mb_pool = mp; 1693 1694 rxq->rx_buffer_info = rte_zmalloc_socket("rxq->buffer_info", 1695 sizeof(struct ena_rx_buffer) * nb_desc, 1696 RTE_CACHE_LINE_SIZE, 1697 socket_id); 1698 if (!rxq->rx_buffer_info) { 1699 PMD_DRV_LOG_LINE(ERR, 1700 "Failed to allocate memory for Rx buffer info"); 1701 return -ENOMEM; 1702 } 1703 1704 rxq->rx_refill_buffer = rte_zmalloc_socket("rxq->rx_refill_buffer", 1705 sizeof(struct rte_mbuf *) * nb_desc, 1706 RTE_CACHE_LINE_SIZE, 1707 socket_id); 1708 if (!rxq->rx_refill_buffer) { 1709 PMD_DRV_LOG_LINE(ERR, 1710 "Failed to allocate memory for Rx refill buffer"); 1711 rte_free(rxq->rx_buffer_info); 1712 rxq->rx_buffer_info = NULL; 1713 return -ENOMEM; 1714 } 1715 1716 rxq->empty_rx_reqs = rte_zmalloc_socket("rxq->empty_rx_reqs", 1717 sizeof(uint16_t) * nb_desc, 1718 RTE_CACHE_LINE_SIZE, 1719 socket_id); 1720 if (!rxq->empty_rx_reqs) { 1721 PMD_DRV_LOG_LINE(ERR, 1722 "Failed to allocate memory for empty Rx requests"); 1723 rte_free(rxq->rx_buffer_info); 1724 rxq->rx_buffer_info = NULL; 1725 rte_free(rxq->rx_refill_buffer); 1726 rxq->rx_refill_buffer = NULL; 1727 return -ENOMEM; 1728 } 1729 1730 for (i = 0; i < nb_desc; i++) 1731 rxq->empty_rx_reqs[i] = i; 1732 1733 rxq->offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; 1734 1735 if (rx_conf->rx_free_thresh != 0) { 1736 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 1737 } else { 1738 dyn_thresh = rxq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1739 rxq->rx_free_thresh = RTE_MIN(dyn_thresh, 1740 (uint16_t)(ENA_REFILL_THRESH_PACKET)); 1741 } 1742 1743 /* Store pointer to this queue in upper layer */ 1744 rxq->configured = 1; 1745 dev->data->rx_queues[queue_idx] = rxq; 1746 1747 return 0; 1748 } 1749 1750 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 1751 struct rte_mbuf *mbuf, uint16_t id) 1752 { 1753 struct ena_com_buf ebuf; 1754 int rc; 1755 1756 /* prepare physical address for DMA transaction */ 1757 ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM; 1758 ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM; 1759 1760 /* pass resource to device */ 1761 rc = ena_com_add_single_rx_desc(io_sq, &ebuf, id); 1762 if (unlikely(rc != 0)) 1763 PMD_RX_LOG_LINE(WARNING, "Failed adding Rx desc"); 1764 1765 return rc; 1766 } 1767 1768 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) 1769 { 1770 unsigned int i; 1771 int rc; 1772 uint16_t next_to_use = rxq->next_to_use; 1773 uint16_t req_id; 1774 #ifdef RTE_ETHDEV_DEBUG_RX 1775 uint16_t in_use; 1776 #endif 1777 struct rte_mbuf **mbufs = rxq->rx_refill_buffer; 1778 1779 if (unlikely(!count)) 1780 return 0; 1781 1782 #ifdef RTE_ETHDEV_DEBUG_RX 1783 in_use = rxq->ring_size - 1 - 1784 ena_com_free_q_entries(rxq->ena_com_io_sq); 1785 if (unlikely((in_use + count) >= rxq->ring_size)) 1786 PMD_RX_LOG_LINE(ERR, "Bad Rx ring state"); 1787 #endif 1788 1789 /* get resources for incoming packets */ 1790 rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count); 1791 if (unlikely(rc < 0)) { 1792 rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf); 1793 ++rxq->rx_stats.mbuf_alloc_fail; 1794 PMD_RX_LOG_LINE(DEBUG, "There are not enough free buffers"); 1795 return 0; 1796 } 1797 1798 for (i = 0; i < count; i++) { 1799 struct rte_mbuf *mbuf = mbufs[i]; 1800 struct ena_rx_buffer *rx_info; 1801 1802 if (likely((i + 4) < count)) 1803 rte_prefetch0(mbufs[i + 4]); 1804 1805 req_id = rxq->empty_rx_reqs[next_to_use]; 1806 rx_info = &rxq->rx_buffer_info[req_id]; 1807 1808 rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id); 1809 if (unlikely(rc != 0)) 1810 break; 1811 1812 rx_info->mbuf = mbuf; 1813 next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, rxq->size_mask); 1814 } 1815 1816 if (unlikely(i < count)) { 1817 PMD_RX_LOG_LINE(WARNING, 1818 "Refilled Rx queue[%d] with only %d/%d buffers", 1819 rxq->id, i, count); 1820 rte_pktmbuf_free_bulk(&mbufs[i], count - i); 1821 ++rxq->rx_stats.refill_partial; 1822 } 1823 1824 /* When we submitted free resources to device... */ 1825 if (likely(i > 0)) { 1826 /* ...let HW know that it can fill buffers with data. */ 1827 ena_com_write_sq_doorbell(rxq->ena_com_io_sq); 1828 1829 rxq->next_to_use = next_to_use; 1830 } 1831 1832 return i; 1833 } 1834 1835 static size_t ena_get_metrics_entries(struct ena_adapter *adapter) 1836 { 1837 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1838 size_t metrics_num = 0; 1839 1840 if (ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) 1841 metrics_num = ENA_STATS_ARRAY_METRICS; 1842 else if (ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) 1843 metrics_num = ENA_STATS_ARRAY_METRICS_LEGACY; 1844 PMD_DRV_LOG_LINE(NOTICE, "0x%x customer metrics are supported", (unsigned int)metrics_num); 1845 if (metrics_num > ENA_MAX_CUSTOMER_METRICS) { 1846 PMD_DRV_LOG_LINE(NOTICE, "Not enough space for the requested customer metrics"); 1847 metrics_num = ENA_MAX_CUSTOMER_METRICS; 1848 } 1849 return metrics_num; 1850 } 1851 1852 static int ena_device_init(struct ena_adapter *adapter, 1853 struct rte_pci_device *pdev, 1854 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1855 { 1856 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1857 uint32_t aenq_groups; 1858 int rc; 1859 bool readless_supported; 1860 1861 /* Initialize mmio registers */ 1862 rc = ena_com_mmio_reg_read_request_init(ena_dev); 1863 if (rc) { 1864 PMD_DRV_LOG_LINE(ERR, "Failed to init MMIO read less"); 1865 return rc; 1866 } 1867 1868 /* The PCIe configuration space revision id indicate if mmio reg 1869 * read is disabled. 1870 */ 1871 readless_supported = !(pdev->id.class_id & ENA_MMIO_DISABLE_REG_READ); 1872 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 1873 1874 /* reset device */ 1875 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 1876 if (rc) { 1877 PMD_DRV_LOG_LINE(ERR, "Cannot reset device"); 1878 goto err_mmio_read_less; 1879 } 1880 1881 /* check FW version */ 1882 rc = ena_com_validate_version(ena_dev); 1883 if (rc) { 1884 PMD_DRV_LOG_LINE(ERR, "Device version is too low"); 1885 goto err_mmio_read_less; 1886 } 1887 1888 ena_dev->dma_addr_bits = ena_com_get_dma_width(ena_dev); 1889 1890 /* ENA device administration layer init */ 1891 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 1892 if (rc) { 1893 PMD_DRV_LOG_LINE(ERR, 1894 "Cannot initialize ENA admin queue"); 1895 goto err_mmio_read_less; 1896 } 1897 1898 /* To enable the msix interrupts the driver needs to know the number 1899 * of queues. So the driver uses polling mode to retrieve this 1900 * information. 1901 */ 1902 ena_com_set_admin_polling_mode(ena_dev, true); 1903 1904 ena_config_host_info(ena_dev); 1905 1906 /* Get Device Attributes and features */ 1907 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 1908 if (rc) { 1909 PMD_DRV_LOG_LINE(ERR, 1910 "Cannot get attribute for ENA device, rc: %d", rc); 1911 goto err_admin_init; 1912 } 1913 1914 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 1915 BIT(ENA_ADMIN_NOTIFICATION) | 1916 BIT(ENA_ADMIN_KEEP_ALIVE) | 1917 BIT(ENA_ADMIN_FATAL_ERROR) | 1918 BIT(ENA_ADMIN_WARNING) | 1919 BIT(ENA_ADMIN_CONF_NOTIFICATIONS); 1920 1921 aenq_groups &= get_feat_ctx->aenq.supported_groups; 1922 1923 adapter->all_aenq_groups = aenq_groups; 1924 /* The actual supported number of metrics is negotiated with the device at runtime */ 1925 adapter->metrics_num = ena_get_metrics_entries(adapter); 1926 1927 return 0; 1928 1929 err_admin_init: 1930 ena_com_admin_destroy(ena_dev); 1931 1932 err_mmio_read_less: 1933 ena_com_mmio_reg_read_request_destroy(ena_dev); 1934 1935 return rc; 1936 } 1937 1938 static void ena_control_path_handler(void *cb_arg) 1939 { 1940 struct rte_eth_dev *dev = cb_arg; 1941 struct ena_adapter *adapter = dev->data->dev_private; 1942 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1943 1944 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) { 1945 ena_com_admin_q_comp_intr_handler(ena_dev); 1946 ena_com_aenq_intr_handler(ena_dev, dev); 1947 } 1948 } 1949 1950 static void ena_control_path_poll_handler(void *cb_arg) 1951 { 1952 struct rte_eth_dev *dev = cb_arg; 1953 struct ena_adapter *adapter = dev->data->dev_private; 1954 int rc; 1955 1956 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) { 1957 ena_control_path_handler(cb_arg); 1958 rc = rte_eal_alarm_set(adapter->control_path_poll_interval, 1959 ena_control_path_poll_handler, cb_arg); 1960 if (unlikely(rc != 0)) { 1961 PMD_DRV_LOG_LINE(ERR, "Failed to retrigger control path alarm"); 1962 ena_trigger_reset(adapter, ENA_REGS_RESET_GENERIC); 1963 } 1964 } 1965 } 1966 1967 static void check_for_missing_keep_alive(struct ena_adapter *adapter) 1968 { 1969 if (!(adapter->active_aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE))) 1970 return; 1971 1972 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 1973 return; 1974 1975 if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >= 1976 adapter->keep_alive_timeout)) { 1977 PMD_DRV_LOG_LINE(ERR, "Keep alive timeout"); 1978 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); 1979 ++adapter->dev_stats.wd_expired; 1980 } 1981 } 1982 1983 /* Check if admin queue is enabled */ 1984 static void check_for_admin_com_state(struct ena_adapter *adapter) 1985 { 1986 if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) { 1987 PMD_DRV_LOG_LINE(ERR, "ENA admin queue is not in running state"); 1988 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO); 1989 } 1990 } 1991 1992 static int check_for_tx_completion_in_queue(struct ena_adapter *adapter, 1993 struct ena_ring *tx_ring) 1994 { 1995 struct ena_tx_buffer *tx_buf; 1996 uint64_t timestamp; 1997 uint64_t completion_delay; 1998 uint32_t missed_tx = 0; 1999 unsigned int i; 2000 int rc = 0; 2001 2002 for (i = 0; i < tx_ring->ring_size; ++i) { 2003 tx_buf = &tx_ring->tx_buffer_info[i]; 2004 timestamp = tx_buf->timestamp; 2005 2006 if (timestamp == 0) 2007 continue; 2008 2009 completion_delay = rte_get_timer_cycles() - timestamp; 2010 if (completion_delay > adapter->missing_tx_completion_to) { 2011 if (unlikely(!tx_buf->print_once)) { 2012 PMD_TX_LOG_LINE(WARNING, 2013 "Found a Tx that wasn't completed on time, qid %d, index %d. " 2014 "Missing Tx outstanding for %" PRIu64 " msecs.", 2015 tx_ring->id, i, completion_delay / 2016 rte_get_timer_hz() * 1000); 2017 tx_buf->print_once = true; 2018 } 2019 ++missed_tx; 2020 } 2021 } 2022 2023 if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) { 2024 PMD_DRV_LOG_LINE(ERR, 2025 "The number of lost Tx completions is above the threshold (%d > %d). " 2026 "Trigger the device reset.", 2027 missed_tx, 2028 tx_ring->missing_tx_completion_threshold); 2029 adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL; 2030 adapter->trigger_reset = true; 2031 rc = -EIO; 2032 } 2033 2034 tx_ring->tx_stats.missed_tx += missed_tx; 2035 2036 return rc; 2037 } 2038 2039 static void check_for_tx_completions(struct ena_adapter *adapter) 2040 { 2041 struct ena_ring *tx_ring; 2042 uint64_t tx_cleanup_delay; 2043 size_t qid; 2044 int budget; 2045 uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues; 2046 2047 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) 2048 return; 2049 2050 nb_tx_queues = adapter->edev_data->nb_tx_queues; 2051 budget = adapter->missing_tx_completion_budget; 2052 2053 qid = adapter->last_tx_comp_qid; 2054 while (budget-- > 0) { 2055 tx_ring = &adapter->tx_ring[qid]; 2056 2057 /* Tx cleanup is called only by the burst function and can be 2058 * called dynamically by the application. Also cleanup is 2059 * limited by the threshold. To avoid false detection of the 2060 * missing HW Tx completion, get the delay since last cleanup 2061 * function was called. 2062 */ 2063 tx_cleanup_delay = rte_get_timer_cycles() - 2064 tx_ring->last_cleanup_ticks; 2065 if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay) 2066 check_for_tx_completion_in_queue(adapter, tx_ring); 2067 qid = (qid + 1) % nb_tx_queues; 2068 } 2069 2070 adapter->last_tx_comp_qid = qid; 2071 } 2072 2073 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer, 2074 void *arg) 2075 { 2076 struct rte_eth_dev *dev = arg; 2077 struct ena_adapter *adapter = dev->data->dev_private; 2078 2079 if (unlikely(adapter->trigger_reset)) 2080 return; 2081 2082 check_for_missing_keep_alive(adapter); 2083 check_for_admin_com_state(adapter); 2084 check_for_tx_completions(adapter); 2085 2086 if (unlikely(adapter->trigger_reset)) { 2087 PMD_DRV_LOG_LINE(ERR, "Trigger reset is on"); 2088 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, 2089 NULL); 2090 } 2091 } 2092 2093 static inline void 2094 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 2095 struct ena_admin_feature_llq_desc *llq, 2096 bool use_large_llq_hdr) 2097 { 2098 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 2099 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 2100 llq_config->llq_num_decs_before_header = 2101 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 2102 2103 if (use_large_llq_hdr && 2104 (llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B)) { 2105 llq_config->llq_ring_entry_size = 2106 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 2107 llq_config->llq_ring_entry_size_value = 256; 2108 } else { 2109 llq_config->llq_ring_entry_size = 2110 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 2111 llq_config->llq_ring_entry_size_value = 128; 2112 } 2113 } 2114 2115 static int 2116 ena_set_queues_placement_policy(struct ena_adapter *adapter, 2117 struct ena_com_dev *ena_dev, 2118 struct ena_admin_feature_llq_desc *llq, 2119 struct ena_llq_configurations *llq_default_configurations) 2120 { 2121 int rc; 2122 u32 llq_feature_mask; 2123 2124 if (adapter->llq_header_policy == ENA_LLQ_POLICY_DISABLED) { 2125 PMD_DRV_LOG_LINE(WARNING, 2126 "NOTE: LLQ has been disabled as per user's request. " 2127 "This may lead to a huge performance degradation!"); 2128 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2129 return 0; 2130 } 2131 2132 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 2133 if (!(ena_dev->supported_features & llq_feature_mask)) { 2134 PMD_DRV_LOG_LINE(INFO, 2135 "LLQ is not supported. Fallback to host mode policy."); 2136 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2137 return 0; 2138 } 2139 2140 if (adapter->dev_mem_base == NULL) { 2141 PMD_DRV_LOG_LINE(ERR, 2142 "LLQ is advertised as supported, but device doesn't expose mem bar"); 2143 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2144 return 0; 2145 } 2146 2147 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 2148 if (unlikely(rc)) { 2149 PMD_INIT_LOG_LINE(WARNING, 2150 "Failed to config dev mode. Fallback to host mode policy."); 2151 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2152 return 0; 2153 } 2154 2155 /* Nothing to config, exit */ 2156 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 2157 return 0; 2158 2159 ena_dev->mem_bar = adapter->dev_mem_base; 2160 2161 return 0; 2162 } 2163 2164 static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev, 2165 struct ena_com_dev_get_features_ctx *get_feat_ctx) 2166 { 2167 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 2168 2169 /* Regular queues capabilities */ 2170 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 2171 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 2172 &get_feat_ctx->max_queue_ext.max_queue_ext; 2173 io_rx_num = RTE_MIN(max_queue_ext->max_rx_sq_num, 2174 max_queue_ext->max_rx_cq_num); 2175 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 2176 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 2177 } else { 2178 struct ena_admin_queue_feature_desc *max_queues = 2179 &get_feat_ctx->max_queues; 2180 io_tx_sq_num = max_queues->max_sq_num; 2181 io_tx_cq_num = max_queues->max_cq_num; 2182 io_rx_num = RTE_MIN(io_tx_sq_num, io_tx_cq_num); 2183 } 2184 2185 /* In case of LLQ use the llq number in the get feature cmd */ 2186 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 2187 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 2188 2189 max_num_io_queues = RTE_MIN(ENA_MAX_NUM_IO_QUEUES, io_rx_num); 2190 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_sq_num); 2191 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_cq_num); 2192 2193 if (unlikely(max_num_io_queues == 0)) { 2194 PMD_DRV_LOG_LINE(ERR, "Number of IO queues cannot not be 0"); 2195 return -EFAULT; 2196 } 2197 2198 return max_num_io_queues; 2199 } 2200 2201 static void 2202 ena_set_offloads(struct ena_offloads *offloads, 2203 struct ena_admin_feature_offload_desc *offload_desc) 2204 { 2205 if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) 2206 offloads->tx_offloads |= ENA_IPV4_TSO; 2207 2208 /* Tx IPv4 checksum offloads */ 2209 if (offload_desc->tx & 2210 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) 2211 offloads->tx_offloads |= ENA_L3_IPV4_CSUM; 2212 if (offload_desc->tx & 2213 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK) 2214 offloads->tx_offloads |= ENA_L4_IPV4_CSUM; 2215 if (offload_desc->tx & 2216 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) 2217 offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL; 2218 2219 /* Tx IPv6 checksum offloads */ 2220 if (offload_desc->tx & 2221 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK) 2222 offloads->tx_offloads |= ENA_L4_IPV6_CSUM; 2223 if (offload_desc->tx & 2224 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) 2225 offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL; 2226 2227 /* Rx IPv4 checksum offloads */ 2228 if (offload_desc->rx_supported & 2229 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK) 2230 offloads->rx_offloads |= ENA_L3_IPV4_CSUM; 2231 if (offload_desc->rx_supported & 2232 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) 2233 offloads->rx_offloads |= ENA_L4_IPV4_CSUM; 2234 2235 /* Rx IPv6 checksum offloads */ 2236 if (offload_desc->rx_supported & 2237 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) 2238 offloads->rx_offloads |= ENA_L4_IPV6_CSUM; 2239 2240 if (offload_desc->rx_supported & 2241 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK) 2242 offloads->rx_offloads |= ENA_RX_RSS_HASH; 2243 } 2244 2245 static int ena_init_once(void) 2246 { 2247 static bool init_done; 2248 2249 if (init_done) 2250 return 0; 2251 2252 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 2253 /* Init timer subsystem for the ENA timer service. */ 2254 rte_timer_subsystem_init(); 2255 /* Register handler for requests from secondary processes. */ 2256 rte_mp_action_register(ENA_MP_NAME, ena_mp_primary_handle); 2257 } 2258 2259 init_done = true; 2260 return 0; 2261 } 2262 2263 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) 2264 { 2265 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 2266 struct rte_pci_device *pci_dev; 2267 struct rte_intr_handle *intr_handle; 2268 struct ena_adapter *adapter = eth_dev->data->dev_private; 2269 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2270 struct ena_com_dev_get_features_ctx get_feat_ctx; 2271 struct ena_llq_configurations llq_config; 2272 const char *queue_type_str; 2273 uint32_t max_num_io_queues; 2274 int rc; 2275 static int adapters_found; 2276 bool disable_meta_caching; 2277 2278 eth_dev->dev_ops = &ena_dev_ops; 2279 eth_dev->rx_pkt_burst = ð_ena_recv_pkts; 2280 eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; 2281 eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; 2282 2283 rc = ena_init_once(); 2284 if (rc != 0) 2285 return rc; 2286 2287 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2288 return 0; 2289 2290 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 2291 2292 memset(adapter, 0, sizeof(struct ena_adapter)); 2293 ena_dev = &adapter->ena_dev; 2294 2295 adapter->edev_data = eth_dev->data; 2296 2297 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 2298 2299 PMD_INIT_LOG_LINE(INFO, "Initializing " PCI_PRI_FMT, 2300 pci_dev->addr.domain, 2301 pci_dev->addr.bus, 2302 pci_dev->addr.devid, 2303 pci_dev->addr.function); 2304 2305 intr_handle = pci_dev->intr_handle; 2306 2307 adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr; 2308 adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr; 2309 2310 if (!adapter->regs) { 2311 PMD_INIT_LOG_LINE(CRIT, "Failed to access registers BAR(%d)", 2312 ENA_REGS_BAR); 2313 return -ENXIO; 2314 } 2315 2316 ena_dev->reg_bar = adapter->regs; 2317 /* Pass device data as a pointer which can be passed to the IO functions 2318 * by the ena_com (for example - the memory allocation). 2319 */ 2320 ena_dev->dmadev = eth_dev->data; 2321 2322 adapter->id_number = adapters_found; 2323 2324 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", 2325 adapter->id_number); 2326 2327 /* Assign default devargs values */ 2328 adapter->missing_tx_completion_to = ENA_TX_TIMEOUT; 2329 2330 /* Get user bypass */ 2331 rc = ena_parse_devargs(adapter, pci_dev->device.devargs); 2332 if (rc != 0) { 2333 PMD_INIT_LOG_LINE(CRIT, "Failed to parse devargs"); 2334 goto err; 2335 } 2336 rc = ena_com_allocate_customer_metrics_buffer(ena_dev); 2337 if (rc != 0) { 2338 PMD_INIT_LOG_LINE(CRIT, "Failed to allocate customer metrics buffer"); 2339 goto err; 2340 } 2341 2342 /* device specific initialization routine */ 2343 rc = ena_device_init(adapter, pci_dev, &get_feat_ctx); 2344 if (rc) { 2345 PMD_INIT_LOG_LINE(CRIT, "Failed to init ENA device"); 2346 goto err_metrics_delete; 2347 } 2348 2349 /* Check if device supports LSC */ 2350 if (!(adapter->all_aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) 2351 adapter->edev_data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; 2352 2353 bool use_large_llq_hdr = ena_use_large_llq_hdr(adapter, 2354 get_feat_ctx.llq.entry_size_recommended); 2355 set_default_llq_configurations(&llq_config, &get_feat_ctx.llq, use_large_llq_hdr); 2356 rc = ena_set_queues_placement_policy(adapter, ena_dev, 2357 &get_feat_ctx.llq, &llq_config); 2358 if (unlikely(rc)) { 2359 PMD_INIT_LOG_LINE(CRIT, "Failed to set placement policy"); 2360 return rc; 2361 } 2362 2363 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { 2364 queue_type_str = "Regular"; 2365 } else { 2366 queue_type_str = "Low latency"; 2367 PMD_DRV_LOG_LINE(INFO, "LLQ entry size %uB", llq_config.llq_ring_entry_size_value); 2368 } 2369 PMD_DRV_LOG_LINE(INFO, "Placement policy: %s", queue_type_str); 2370 2371 calc_queue_ctx.ena_dev = ena_dev; 2372 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 2373 2374 max_num_io_queues = ena_calc_max_io_queue_num(ena_dev, &get_feat_ctx); 2375 rc = ena_calc_io_queue_size(&calc_queue_ctx, use_large_llq_hdr); 2376 if (unlikely((rc != 0) || (max_num_io_queues == 0))) { 2377 rc = -EFAULT; 2378 goto err_device_destroy; 2379 } 2380 2381 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 2382 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 2383 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 2384 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 2385 adapter->max_num_io_queues = max_num_io_queues; 2386 2387 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2388 disable_meta_caching = 2389 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags & 2390 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 2391 } else { 2392 disable_meta_caching = false; 2393 } 2394 2395 /* prepare ring structures */ 2396 ena_init_rings(adapter, disable_meta_caching); 2397 2398 ena_config_debug_area(adapter); 2399 2400 /* Set max MTU for this device */ 2401 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 2402 2403 ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload); 2404 2405 /* Copy MAC address and point DPDK to it */ 2406 eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr; 2407 rte_ether_addr_copy((struct rte_ether_addr *) 2408 get_feat_ctx.dev_attr.mac_addr, 2409 (struct rte_ether_addr *)adapter->mac_addr); 2410 2411 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); 2412 if (unlikely(rc != 0)) { 2413 PMD_DRV_LOG_LINE(ERR, "Failed to initialize RSS in ENA device"); 2414 goto err_delete_debug_area; 2415 } 2416 2417 adapter->drv_stats = rte_zmalloc("adapter stats", 2418 sizeof(*adapter->drv_stats), 2419 RTE_CACHE_LINE_SIZE); 2420 if (!adapter->drv_stats) { 2421 PMD_DRV_LOG_LINE(ERR, 2422 "Failed to allocate memory for adapter statistics"); 2423 rc = -ENOMEM; 2424 goto err_rss_destroy; 2425 } 2426 2427 rte_spinlock_init(&adapter->admin_lock); 2428 2429 if (!adapter->control_path_poll_interval) { 2430 /* Control path interrupt mode */ 2431 rte_intr_callback_register(intr_handle, ena_control_path_handler, eth_dev); 2432 rte_intr_enable(intr_handle); 2433 ena_com_set_admin_polling_mode(ena_dev, false); 2434 } else { 2435 /* Control path polling mode */ 2436 rc = rte_eal_alarm_set(adapter->control_path_poll_interval, 2437 ena_control_path_poll_handler, eth_dev); 2438 if (unlikely(rc != 0)) { 2439 PMD_DRV_LOG_LINE(ERR, "Failed to set control path alarm"); 2440 goto err_control_path_destroy; 2441 } 2442 } 2443 ena_com_admin_aenq_enable(ena_dev); 2444 rte_timer_init(&adapter->timer_wd); 2445 2446 adapters_found++; 2447 adapter->state = ENA_ADAPTER_STATE_INIT; 2448 2449 return 0; 2450 err_control_path_destroy: 2451 rte_free(adapter->drv_stats); 2452 err_rss_destroy: 2453 ena_com_rss_destroy(ena_dev); 2454 err_delete_debug_area: 2455 ena_com_delete_debug_area(ena_dev); 2456 2457 err_device_destroy: 2458 ena_com_delete_host_info(ena_dev); 2459 ena_com_admin_destroy(ena_dev); 2460 err_metrics_delete: 2461 ena_com_delete_customer_metrics_buffer(ena_dev); 2462 err: 2463 return rc; 2464 } 2465 2466 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) 2467 { 2468 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2469 return 0; 2470 2471 ena_close(eth_dev); 2472 2473 return 0; 2474 } 2475 2476 static int ena_dev_configure(struct rte_eth_dev *dev) 2477 { 2478 struct ena_adapter *adapter = dev->data->dev_private; 2479 int rc; 2480 2481 adapter->state = ENA_ADAPTER_STATE_CONFIG; 2482 2483 if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 2484 dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2485 dev->data->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2486 2487 /* Scattered Rx cannot be turned off in the HW, so this capability must 2488 * be forced. 2489 */ 2490 dev->data->scattered_rx = 1; 2491 2492 adapter->last_tx_comp_qid = 0; 2493 2494 adapter->missing_tx_completion_budget = 2495 RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues); 2496 2497 /* To avoid detection of the spurious Tx completion timeout due to 2498 * application not calling the Tx cleanup function, set timeout for the 2499 * Tx queue which should be half of the missing completion timeout for a 2500 * safety. If there will be a lot of missing Tx completions in the 2501 * queue, they will be detected sooner or later. 2502 */ 2503 adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2; 2504 2505 rc = ena_configure_aenq(adapter); 2506 2507 return rc; 2508 } 2509 2510 static void ena_init_rings(struct ena_adapter *adapter, 2511 bool disable_meta_caching) 2512 { 2513 size_t i; 2514 2515 for (i = 0; i < adapter->max_num_io_queues; i++) { 2516 struct ena_ring *ring = &adapter->tx_ring[i]; 2517 2518 ring->configured = 0; 2519 ring->type = ENA_RING_TYPE_TX; 2520 ring->adapter = adapter; 2521 ring->id = i; 2522 ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type; 2523 ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size; 2524 ring->sgl_size = adapter->max_tx_sgl_size; 2525 ring->disable_meta_caching = disable_meta_caching; 2526 } 2527 2528 for (i = 0; i < adapter->max_num_io_queues; i++) { 2529 struct ena_ring *ring = &adapter->rx_ring[i]; 2530 2531 ring->configured = 0; 2532 ring->type = ENA_RING_TYPE_RX; 2533 ring->adapter = adapter; 2534 ring->id = i; 2535 ring->sgl_size = adapter->max_rx_sgl_size; 2536 } 2537 } 2538 2539 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter) 2540 { 2541 uint64_t port_offloads = 0; 2542 2543 if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM) 2544 port_offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; 2545 2546 if (adapter->offloads.rx_offloads & 2547 (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM)) 2548 port_offloads |= 2549 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM; 2550 2551 if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH) 2552 port_offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2553 2554 port_offloads |= RTE_ETH_RX_OFFLOAD_SCATTER; 2555 2556 return port_offloads; 2557 } 2558 2559 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter) 2560 { 2561 uint64_t port_offloads = 0; 2562 2563 if (adapter->offloads.tx_offloads & ENA_IPV4_TSO) 2564 port_offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 2565 2566 if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM) 2567 port_offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; 2568 if (adapter->offloads.tx_offloads & 2569 (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM | 2570 ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL)) 2571 port_offloads |= 2572 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM; 2573 2574 port_offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2575 2576 port_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2577 2578 return port_offloads; 2579 } 2580 2581 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter) 2582 { 2583 RTE_SET_USED(adapter); 2584 2585 return 0; 2586 } 2587 2588 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter) 2589 { 2590 uint64_t queue_offloads = 0; 2591 RTE_SET_USED(adapter); 2592 2593 queue_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2594 2595 return queue_offloads; 2596 } 2597 2598 static int ena_infos_get(struct rte_eth_dev *dev, 2599 struct rte_eth_dev_info *dev_info) 2600 { 2601 struct ena_adapter *adapter; 2602 struct ena_com_dev *ena_dev; 2603 2604 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 2605 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 2606 adapter = dev->data->dev_private; 2607 2608 ena_dev = &adapter->ena_dev; 2609 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 2610 2611 dev_info->speed_capa = 2612 RTE_ETH_LINK_SPEED_1G | 2613 RTE_ETH_LINK_SPEED_2_5G | 2614 RTE_ETH_LINK_SPEED_5G | 2615 RTE_ETH_LINK_SPEED_10G | 2616 RTE_ETH_LINK_SPEED_25G | 2617 RTE_ETH_LINK_SPEED_40G | 2618 RTE_ETH_LINK_SPEED_50G | 2619 RTE_ETH_LINK_SPEED_100G | 2620 RTE_ETH_LINK_SPEED_200G | 2621 RTE_ETH_LINK_SPEED_400G; 2622 2623 /* Inform framework about available features */ 2624 dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter); 2625 dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter); 2626 dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter); 2627 dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter); 2628 2629 dev_info->flow_type_rss_offloads = ENA_ALL_RSS_HF; 2630 dev_info->hash_key_size = ENA_HASH_KEY_SIZE; 2631 2632 dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN; 2633 dev_info->max_rx_pktlen = adapter->max_mtu + RTE_ETHER_HDR_LEN + 2634 RTE_ETHER_CRC_LEN; 2635 dev_info->min_mtu = ENA_MIN_MTU; 2636 dev_info->max_mtu = adapter->max_mtu; 2637 dev_info->max_mac_addrs = 1; 2638 2639 dev_info->max_rx_queues = adapter->max_num_io_queues; 2640 dev_info->max_tx_queues = adapter->max_num_io_queues; 2641 dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE; 2642 2643 dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size; 2644 dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2645 dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2646 adapter->max_rx_sgl_size); 2647 dev_info->rx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2648 adapter->max_rx_sgl_size); 2649 2650 dev_info->tx_desc_lim.nb_max = adapter->max_tx_ring_size; 2651 dev_info->tx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2652 dev_info->tx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2653 adapter->max_tx_sgl_size); 2654 dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2655 adapter->max_tx_sgl_size); 2656 2657 dev_info->default_rxportconf.ring_size = RTE_MIN(ENA_DEFAULT_RING_SIZE, 2658 dev_info->rx_desc_lim.nb_max); 2659 dev_info->default_txportconf.ring_size = RTE_MIN(ENA_DEFAULT_RING_SIZE, 2660 dev_info->tx_desc_lim.nb_max); 2661 2662 dev_info->err_handle_mode = RTE_ETH_ERROR_HANDLE_MODE_PASSIVE; 2663 2664 return 0; 2665 } 2666 2667 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len) 2668 { 2669 mbuf->data_len = len; 2670 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 2671 mbuf->refcnt = 1; 2672 mbuf->next = NULL; 2673 } 2674 2675 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 2676 struct ena_com_rx_buf_info *ena_bufs, 2677 uint32_t descs, 2678 uint16_t *next_to_clean, 2679 uint8_t offset) 2680 { 2681 struct rte_mbuf *mbuf; 2682 struct rte_mbuf *mbuf_head; 2683 struct ena_rx_buffer *rx_info; 2684 int rc; 2685 uint16_t ntc, len, req_id, buf = 0; 2686 2687 if (unlikely(descs == 0)) 2688 return NULL; 2689 2690 ntc = *next_to_clean; 2691 2692 len = ena_bufs[buf].len; 2693 req_id = ena_bufs[buf].req_id; 2694 2695 rx_info = &rx_ring->rx_buffer_info[req_id]; 2696 2697 mbuf = rx_info->mbuf; 2698 RTE_ASSERT(mbuf != NULL); 2699 2700 ena_init_rx_mbuf(mbuf, len); 2701 2702 /* Fill the mbuf head with the data specific for 1st segment. */ 2703 mbuf_head = mbuf; 2704 mbuf_head->nb_segs = descs; 2705 mbuf_head->port = rx_ring->port_id; 2706 mbuf_head->pkt_len = len; 2707 mbuf_head->data_off += offset; 2708 2709 rx_info->mbuf = NULL; 2710 rx_ring->empty_rx_reqs[ntc] = req_id; 2711 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2712 2713 while (--descs) { 2714 ++buf; 2715 len = ena_bufs[buf].len; 2716 req_id = ena_bufs[buf].req_id; 2717 2718 rx_info = &rx_ring->rx_buffer_info[req_id]; 2719 RTE_ASSERT(rx_info->mbuf != NULL); 2720 2721 if (unlikely(len == 0)) { 2722 /* 2723 * Some devices can pass descriptor with the length 0. 2724 * To avoid confusion, the PMD is simply putting the 2725 * descriptor back, as it was never used. We'll avoid 2726 * mbuf allocation that way. 2727 */ 2728 rc = ena_add_single_rx_desc(rx_ring->ena_com_io_sq, 2729 rx_info->mbuf, req_id); 2730 if (unlikely(rc != 0)) { 2731 /* Free the mbuf in case of an error. */ 2732 rte_mbuf_raw_free(rx_info->mbuf); 2733 } else { 2734 /* 2735 * If there was no error, just exit the loop as 2736 * 0 length descriptor is always the last one. 2737 */ 2738 break; 2739 } 2740 } else { 2741 /* Create an mbuf chain. */ 2742 mbuf->next = rx_info->mbuf; 2743 mbuf = mbuf->next; 2744 2745 ena_init_rx_mbuf(mbuf, len); 2746 mbuf_head->pkt_len += len; 2747 } 2748 2749 /* 2750 * Mark the descriptor as depleted and perform necessary 2751 * cleanup. 2752 * This code will execute in two cases: 2753 * 1. Descriptor len was greater than 0 - normal situation. 2754 * 2. Descriptor len was 0 and we failed to add the descriptor 2755 * to the device. In that situation, we should try to add 2756 * the mbuf again in the populate routine and mark the 2757 * descriptor as used up by the device. 2758 */ 2759 rx_info->mbuf = NULL; 2760 rx_ring->empty_rx_reqs[ntc] = req_id; 2761 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2762 } 2763 2764 *next_to_clean = ntc; 2765 2766 return mbuf_head; 2767 } 2768 2769 static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 2770 uint16_t nb_pkts) 2771 { 2772 struct ena_ring *rx_ring = (struct ena_ring *)(rx_queue); 2773 unsigned int free_queue_entries; 2774 uint16_t next_to_clean = rx_ring->next_to_clean; 2775 uint16_t descs_in_use; 2776 struct rte_mbuf *mbuf; 2777 uint16_t completed; 2778 struct ena_com_rx_ctx ena_rx_ctx; 2779 int i, rc = 0; 2780 2781 #ifdef RTE_ETHDEV_DEBUG_RX 2782 /* Check adapter state */ 2783 if (unlikely(rx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 2784 PMD_RX_LOG_LINE(ALERT, 2785 "Trying to receive pkts while device is NOT running"); 2786 return 0; 2787 } 2788 #endif 2789 2790 descs_in_use = rx_ring->ring_size - 2791 ena_com_free_q_entries(rx_ring->ena_com_io_sq) - 1; 2792 nb_pkts = RTE_MIN(descs_in_use, nb_pkts); 2793 2794 for (completed = 0; completed < nb_pkts; completed++) { 2795 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 2796 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 2797 ena_rx_ctx.descs = 0; 2798 ena_rx_ctx.pkt_offset = 0; 2799 /* receive packet context */ 2800 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, 2801 rx_ring->ena_com_io_sq, 2802 &ena_rx_ctx); 2803 if (unlikely(rc)) { 2804 PMD_RX_LOG_LINE(ERR, 2805 "Failed to get the packet from the device, rc: %d", 2806 rc); 2807 if (rc == ENA_COM_NO_SPACE) { 2808 ++rx_ring->rx_stats.bad_desc_num; 2809 ena_trigger_reset(rx_ring->adapter, 2810 ENA_REGS_RESET_TOO_MANY_RX_DESCS); 2811 } else { 2812 ++rx_ring->rx_stats.bad_req_id; 2813 ena_trigger_reset(rx_ring->adapter, 2814 ENA_REGS_RESET_INV_RX_REQ_ID); 2815 } 2816 return 0; 2817 } 2818 2819 mbuf = ena_rx_mbuf(rx_ring, 2820 ena_rx_ctx.ena_bufs, 2821 ena_rx_ctx.descs, 2822 &next_to_clean, 2823 ena_rx_ctx.pkt_offset); 2824 if (unlikely(mbuf == NULL)) { 2825 for (i = 0; i < ena_rx_ctx.descs; ++i) { 2826 rx_ring->empty_rx_reqs[next_to_clean] = 2827 rx_ring->ena_bufs[i].req_id; 2828 next_to_clean = ENA_IDX_NEXT_MASKED( 2829 next_to_clean, rx_ring->size_mask); 2830 } 2831 break; 2832 } 2833 2834 /* fill mbuf attributes if any */ 2835 ena_rx_mbuf_prepare(rx_ring, mbuf, &ena_rx_ctx); 2836 2837 if (unlikely(mbuf->ol_flags & 2838 (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD))) 2839 rte_atomic64_inc(&rx_ring->adapter->drv_stats->ierrors); 2840 2841 rx_pkts[completed] = mbuf; 2842 rx_ring->rx_stats.bytes += mbuf->pkt_len; 2843 } 2844 2845 rx_ring->rx_stats.cnt += completed; 2846 rx_ring->next_to_clean = next_to_clean; 2847 2848 free_queue_entries = ena_com_free_q_entries(rx_ring->ena_com_io_sq); 2849 2850 /* Burst refill to save doorbells, memory barriers, const interval */ 2851 if (free_queue_entries >= rx_ring->rx_free_thresh) { 2852 ena_populate_rx_queue(rx_ring, free_queue_entries); 2853 } 2854 2855 return completed; 2856 } 2857 2858 static uint16_t 2859 eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 2860 uint16_t nb_pkts) 2861 { 2862 int32_t ret; 2863 uint32_t i; 2864 struct rte_mbuf *m; 2865 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 2866 struct ena_adapter *adapter = tx_ring->adapter; 2867 struct rte_ipv4_hdr *ip_hdr; 2868 uint64_t ol_flags; 2869 uint64_t l4_csum_flag; 2870 uint64_t dev_offload_capa; 2871 uint16_t frag_field; 2872 bool need_pseudo_csum; 2873 2874 dev_offload_capa = adapter->offloads.tx_offloads; 2875 for (i = 0; i != nb_pkts; i++) { 2876 m = tx_pkts[i]; 2877 ol_flags = m->ol_flags; 2878 2879 /* Check if any offload flag was set */ 2880 if (ol_flags == 0) 2881 continue; 2882 2883 l4_csum_flag = ol_flags & RTE_MBUF_F_TX_L4_MASK; 2884 /* SCTP checksum offload is not supported by the ENA. */ 2885 if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) || 2886 l4_csum_flag == RTE_MBUF_F_TX_SCTP_CKSUM) { 2887 PMD_TX_LOG_LINE(DEBUG, 2888 "mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64, 2889 i, ol_flags); 2890 rte_errno = ENOTSUP; 2891 return i; 2892 } 2893 2894 if (unlikely(m->nb_segs >= tx_ring->sgl_size && 2895 !(tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && 2896 m->nb_segs == tx_ring->sgl_size && 2897 m->data_len < tx_ring->tx_max_header_size))) { 2898 PMD_TX_LOG_LINE(DEBUG, 2899 "mbuf[%" PRIu32 "] has too many segments: %" PRIu16, 2900 i, m->nb_segs); 2901 rte_errno = EINVAL; 2902 return i; 2903 } 2904 2905 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 2906 /* Check if requested offload is also enabled for the queue */ 2907 if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2908 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) || 2909 (l4_csum_flag == RTE_MBUF_F_TX_TCP_CKSUM && 2910 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) || 2911 (l4_csum_flag == RTE_MBUF_F_TX_UDP_CKSUM && 2912 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM))) { 2913 PMD_TX_LOG_LINE(DEBUG, 2914 "mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]", 2915 i, m->nb_segs, tx_ring->id); 2916 rte_errno = EINVAL; 2917 return i; 2918 } 2919 2920 /* The caller is obligated to set l2 and l3 len if any cksum 2921 * offload is enabled. 2922 */ 2923 if (unlikely(ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK) && 2924 (m->l2_len == 0 || m->l3_len == 0))) { 2925 PMD_TX_LOG_LINE(DEBUG, 2926 "mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested", 2927 i); 2928 rte_errno = EINVAL; 2929 return i; 2930 } 2931 ret = rte_validate_tx_offload(m); 2932 if (ret != 0) { 2933 rte_errno = -ret; 2934 return i; 2935 } 2936 #endif 2937 2938 /* Verify HW support for requested offloads and determine if 2939 * pseudo header checksum is needed. 2940 */ 2941 need_pseudo_csum = false; 2942 if (ol_flags & RTE_MBUF_F_TX_IPV4) { 2943 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2944 !(dev_offload_capa & ENA_L3_IPV4_CSUM)) { 2945 rte_errno = ENOTSUP; 2946 return i; 2947 } 2948 2949 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG && 2950 !(dev_offload_capa & ENA_IPV4_TSO)) { 2951 rte_errno = ENOTSUP; 2952 return i; 2953 } 2954 2955 /* Check HW capabilities and if pseudo csum is needed 2956 * for L4 offloads. 2957 */ 2958 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2959 !(dev_offload_capa & ENA_L4_IPV4_CSUM)) { 2960 if (dev_offload_capa & 2961 ENA_L4_IPV4_CSUM_PARTIAL) { 2962 need_pseudo_csum = true; 2963 } else { 2964 rte_errno = ENOTSUP; 2965 return i; 2966 } 2967 } 2968 2969 /* Parse the DF flag */ 2970 ip_hdr = rte_pktmbuf_mtod_offset(m, 2971 struct rte_ipv4_hdr *, m->l2_len); 2972 frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset); 2973 if (frag_field & RTE_IPV4_HDR_DF_FLAG) { 2974 m->packet_type |= RTE_PTYPE_L4_NONFRAG; 2975 } else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2976 /* In case we are supposed to TSO and have DF 2977 * not set (DF=0) hardware must be provided with 2978 * partial checksum. 2979 */ 2980 need_pseudo_csum = true; 2981 } 2982 } else if (ol_flags & RTE_MBUF_F_TX_IPV6) { 2983 /* There is no support for IPv6 TSO as for now. */ 2984 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2985 rte_errno = ENOTSUP; 2986 return i; 2987 } 2988 2989 /* Check HW capabilities and if pseudo csum is needed */ 2990 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2991 !(dev_offload_capa & ENA_L4_IPV6_CSUM)) { 2992 if (dev_offload_capa & 2993 ENA_L4_IPV6_CSUM_PARTIAL) { 2994 need_pseudo_csum = true; 2995 } else { 2996 rte_errno = ENOTSUP; 2997 return i; 2998 } 2999 } 3000 } 3001 3002 if (need_pseudo_csum) { 3003 ret = rte_net_intel_cksum_flags_prepare(m, ol_flags); 3004 if (ret != 0) { 3005 rte_errno = -ret; 3006 return i; 3007 } 3008 } 3009 } 3010 3011 return i; 3012 } 3013 3014 static void ena_update_hints(struct ena_adapter *adapter, 3015 struct ena_admin_ena_hw_hints *hints) 3016 { 3017 if (hints->admin_completion_tx_timeout) 3018 adapter->ena_dev.admin_queue.completion_timeout = 3019 hints->admin_completion_tx_timeout * 1000; 3020 3021 if (hints->mmio_read_timeout) 3022 /* convert to usec */ 3023 adapter->ena_dev.mmio_read.reg_read_to = 3024 hints->mmio_read_timeout * 1000; 3025 3026 if (hints->driver_watchdog_timeout) { 3027 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3028 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 3029 else 3030 // Convert msecs to ticks 3031 adapter->keep_alive_timeout = 3032 (hints->driver_watchdog_timeout * 3033 rte_get_timer_hz()) / 1000; 3034 } 3035 } 3036 3037 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 3038 struct ena_tx_buffer *tx_info, 3039 struct rte_mbuf *mbuf, 3040 void **push_header, 3041 uint16_t *header_len) 3042 { 3043 struct ena_com_buf *ena_buf; 3044 uint16_t delta, seg_len, push_len; 3045 3046 delta = 0; 3047 seg_len = mbuf->data_len; 3048 3049 tx_info->mbuf = mbuf; 3050 ena_buf = tx_info->bufs; 3051 3052 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 3053 /* 3054 * Tx header might be (and will be in most cases) smaller than 3055 * tx_max_header_size. But it's not an issue to send more data 3056 * to the device, than actually needed if the mbuf size is 3057 * greater than tx_max_header_size. 3058 */ 3059 push_len = RTE_MIN(mbuf->pkt_len, tx_ring->tx_max_header_size); 3060 *header_len = push_len; 3061 3062 if (likely(push_len <= seg_len)) { 3063 /* If the push header is in the single segment, then 3064 * just point it to the 1st mbuf data. 3065 */ 3066 *push_header = rte_pktmbuf_mtod(mbuf, uint8_t *); 3067 } else { 3068 /* If the push header lays in the several segments, copy 3069 * it to the intermediate buffer. 3070 */ 3071 rte_pktmbuf_read(mbuf, 0, push_len, 3072 tx_ring->push_buf_intermediate_buf); 3073 *push_header = tx_ring->push_buf_intermediate_buf; 3074 delta = push_len - seg_len; 3075 } 3076 } else { 3077 *push_header = NULL; 3078 *header_len = 0; 3079 push_len = 0; 3080 } 3081 3082 /* Process first segment taking into consideration pushed header */ 3083 if (seg_len > push_len) { 3084 ena_buf->paddr = mbuf->buf_iova + 3085 mbuf->data_off + 3086 push_len; 3087 ena_buf->len = seg_len - push_len; 3088 ena_buf++; 3089 tx_info->num_of_bufs++; 3090 } 3091 3092 while ((mbuf = mbuf->next) != NULL) { 3093 seg_len = mbuf->data_len; 3094 3095 /* Skip mbufs if whole data is pushed as a header */ 3096 if (unlikely(delta > seg_len)) { 3097 delta -= seg_len; 3098 continue; 3099 } 3100 3101 ena_buf->paddr = mbuf->buf_iova + mbuf->data_off + delta; 3102 ena_buf->len = seg_len - delta; 3103 ena_buf++; 3104 tx_info->num_of_bufs++; 3105 3106 delta = 0; 3107 } 3108 } 3109 3110 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) 3111 { 3112 struct ena_tx_buffer *tx_info; 3113 struct ena_com_tx_ctx ena_tx_ctx = { { 0 } }; 3114 uint16_t next_to_use; 3115 uint16_t header_len; 3116 uint16_t req_id; 3117 void *push_header; 3118 int nb_hw_desc; 3119 int rc; 3120 3121 /* Checking for space for 2 additional metadata descriptors due to 3122 * possible header split and metadata descriptor 3123 */ 3124 if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 3125 mbuf->nb_segs + 2)) { 3126 PMD_TX_LOG_LINE(DEBUG, "Not enough space in the tx queue"); 3127 return ENA_COM_NO_MEM; 3128 } 3129 3130 next_to_use = tx_ring->next_to_use; 3131 3132 req_id = tx_ring->empty_tx_reqs[next_to_use]; 3133 tx_info = &tx_ring->tx_buffer_info[req_id]; 3134 tx_info->num_of_bufs = 0; 3135 RTE_ASSERT(tx_info->mbuf == NULL); 3136 3137 ena_tx_map_mbuf(tx_ring, tx_info, mbuf, &push_header, &header_len); 3138 3139 ena_tx_ctx.ena_bufs = tx_info->bufs; 3140 ena_tx_ctx.push_header = push_header; 3141 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 3142 ena_tx_ctx.req_id = req_id; 3143 ena_tx_ctx.header_len = header_len; 3144 3145 /* Set Tx offloads flags, if applicable */ 3146 ena_tx_mbuf_prepare(mbuf, &ena_tx_ctx, tx_ring->offloads, 3147 tx_ring->disable_meta_caching); 3148 3149 if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, 3150 &ena_tx_ctx))) { 3151 PMD_TX_LOG_LINE(DEBUG, 3152 "LLQ Tx max burst size of queue %d achieved, writing doorbell to send burst", 3153 tx_ring->id); 3154 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3155 tx_ring->tx_stats.doorbells++; 3156 tx_ring->pkts_without_db = false; 3157 } 3158 3159 /* prepare the packet's descriptors to dma engine */ 3160 rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, 3161 &nb_hw_desc); 3162 if (unlikely(rc)) { 3163 PMD_DRV_LOG_LINE(ERR, "Failed to prepare Tx buffers, rc: %d", rc); 3164 ++tx_ring->tx_stats.prepare_ctx_err; 3165 ena_trigger_reset(tx_ring->adapter, 3166 ENA_REGS_RESET_DRIVER_INVALID_STATE); 3167 return rc; 3168 } 3169 3170 tx_info->tx_descs = nb_hw_desc; 3171 tx_info->timestamp = rte_get_timer_cycles(); 3172 3173 tx_ring->tx_stats.cnt++; 3174 tx_ring->tx_stats.bytes += mbuf->pkt_len; 3175 3176 tx_ring->next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, 3177 tx_ring->size_mask); 3178 3179 return 0; 3180 } 3181 3182 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt) 3183 { 3184 struct rte_mbuf *pkts_to_clean[ENA_CLEANUP_BUF_THRESH]; 3185 struct ena_ring *tx_ring = (struct ena_ring *)txp; 3186 size_t mbuf_cnt = 0; 3187 size_t pkt_cnt = 0; 3188 unsigned int total_tx_descs = 0; 3189 unsigned int total_tx_pkts = 0; 3190 uint16_t cleanup_budget; 3191 uint16_t next_to_clean = tx_ring->next_to_clean; 3192 bool fast_free = tx_ring->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 3193 3194 /* 3195 * If free_pkt_cnt is equal to 0, it means that the user requested 3196 * full cleanup, so attempt to release all Tx descriptors 3197 * (ring_size - 1 -> size_mask) 3198 */ 3199 cleanup_budget = (free_pkt_cnt == 0) ? tx_ring->size_mask : free_pkt_cnt; 3200 3201 while (likely(total_tx_pkts < cleanup_budget)) { 3202 struct rte_mbuf *mbuf; 3203 struct ena_tx_buffer *tx_info; 3204 uint16_t req_id; 3205 3206 if (ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id) != 0) 3207 break; 3208 3209 if (unlikely(validate_tx_req_id(tx_ring, req_id) != 0)) 3210 break; 3211 3212 /* Get Tx info & store how many descs were processed */ 3213 tx_info = &tx_ring->tx_buffer_info[req_id]; 3214 tx_info->timestamp = 0; 3215 3216 mbuf = tx_info->mbuf; 3217 if (fast_free) { 3218 pkts_to_clean[pkt_cnt++] = mbuf; 3219 mbuf_cnt += mbuf->nb_segs; 3220 if (mbuf_cnt >= ENA_CLEANUP_BUF_THRESH) { 3221 rte_pktmbuf_free_bulk(pkts_to_clean, pkt_cnt); 3222 mbuf_cnt = 0; 3223 pkt_cnt = 0; 3224 } 3225 } else { 3226 rte_pktmbuf_free(mbuf); 3227 } 3228 3229 tx_info->mbuf = NULL; 3230 tx_ring->empty_tx_reqs[next_to_clean] = req_id; 3231 3232 total_tx_descs += tx_info->tx_descs; 3233 total_tx_pkts++; 3234 3235 /* Put back descriptor to the ring for reuse */ 3236 next_to_clean = ENA_IDX_NEXT_MASKED(next_to_clean, 3237 tx_ring->size_mask); 3238 } 3239 3240 if (likely(total_tx_descs > 0)) { 3241 /* acknowledge completion of sent packets */ 3242 tx_ring->next_to_clean = next_to_clean; 3243 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs); 3244 } 3245 3246 if (mbuf_cnt != 0) 3247 rte_pktmbuf_free_bulk(pkts_to_clean, pkt_cnt); 3248 3249 /* Notify completion handler that full cleanup was performed */ 3250 if (free_pkt_cnt == 0 || total_tx_pkts < cleanup_budget) 3251 tx_ring->last_cleanup_ticks = rte_get_timer_cycles(); 3252 3253 return total_tx_pkts; 3254 } 3255 3256 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 3257 uint16_t nb_pkts) 3258 { 3259 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 3260 int available_desc; 3261 uint16_t sent_idx = 0; 3262 3263 #ifdef RTE_ETHDEV_DEBUG_TX 3264 /* Check adapter state */ 3265 if (unlikely(tx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 3266 PMD_TX_LOG_LINE(ALERT, 3267 "Trying to xmit pkts while device is NOT running"); 3268 return 0; 3269 } 3270 #endif 3271 3272 available_desc = ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3273 if (available_desc < tx_ring->tx_free_thresh) 3274 ena_tx_cleanup((void *)tx_ring, 0); 3275 3276 for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) { 3277 if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx])) 3278 break; 3279 tx_ring->pkts_without_db = true; 3280 rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4, 3281 tx_ring->size_mask)]); 3282 } 3283 3284 /* If there are ready packets to be xmitted... */ 3285 if (likely(tx_ring->pkts_without_db)) { 3286 /* ...let HW do its best :-) */ 3287 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3288 tx_ring->tx_stats.doorbells++; 3289 tx_ring->pkts_without_db = false; 3290 } 3291 3292 tx_ring->tx_stats.available_desc = 3293 ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3294 tx_ring->tx_stats.tx_poll++; 3295 3296 return sent_idx; 3297 } 3298 3299 static void ena_copy_customer_metrics(struct ena_adapter *adapter, uint64_t *buf, 3300 size_t num_metrics) 3301 { 3302 struct ena_com_dev *ena_dev = &adapter->ena_dev; 3303 int rc; 3304 3305 if (ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) { 3306 if (num_metrics != ENA_STATS_ARRAY_METRICS) { 3307 PMD_DRV_LOG_LINE(ERR, "Detected discrepancy in the number of customer metrics"); 3308 return; 3309 } 3310 rte_spinlock_lock(&adapter->admin_lock); 3311 rc = ENA_PROXY(adapter, 3312 ena_com_get_customer_metrics, 3313 &adapter->ena_dev, 3314 (char *)buf, 3315 num_metrics * sizeof(uint64_t)); 3316 rte_spinlock_unlock(&adapter->admin_lock); 3317 if (rc != 0) { 3318 PMD_DRV_LOG_LINE(WARNING, "Failed to get customer metrics, rc: %d", rc); 3319 return; 3320 } 3321 3322 } else if (ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) { 3323 if (num_metrics != ENA_STATS_ARRAY_METRICS_LEGACY) { 3324 PMD_DRV_LOG_LINE(ERR, "Detected discrepancy in the number of legacy metrics"); 3325 return; 3326 } 3327 3328 rte_spinlock_lock(&adapter->admin_lock); 3329 rc = ENA_PROXY(adapter, 3330 ena_com_get_eni_stats, 3331 &adapter->ena_dev, 3332 (struct ena_admin_eni_stats *)buf); 3333 rte_spinlock_unlock(&adapter->admin_lock); 3334 if (rc != 0) { 3335 PMD_DRV_LOG_LINE(WARNING, 3336 "Failed to get ENI metrics, rc: %d", rc); 3337 return; 3338 } 3339 } 3340 } 3341 3342 static void ena_copy_ena_srd_info(struct ena_adapter *adapter, 3343 struct ena_stats_srd *srd_info) 3344 { 3345 int rc; 3346 3347 if (!ena_com_get_cap(&adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO)) 3348 return; 3349 3350 rte_spinlock_lock(&adapter->admin_lock); 3351 rc = ENA_PROXY(adapter, 3352 ena_com_get_ena_srd_info, 3353 &adapter->ena_dev, 3354 (struct ena_admin_ena_srd_info *)srd_info); 3355 rte_spinlock_unlock(&adapter->admin_lock); 3356 if (rc != ENA_COM_OK && rc != ENA_COM_UNSUPPORTED) { 3357 PMD_DRV_LOG_LINE(WARNING, 3358 "Failed to get ENA express srd info, rc: %d", rc); 3359 return; 3360 } 3361 } 3362 3363 /** 3364 * DPDK callback to retrieve names of extended device statistics 3365 * 3366 * @param dev 3367 * Pointer to Ethernet device structure. 3368 * @param[out] xstats_names 3369 * Buffer to insert names into. 3370 * @param n 3371 * Number of names. 3372 * 3373 * @return 3374 * Number of xstats names. 3375 */ 3376 static int ena_xstats_get_names(struct rte_eth_dev *dev, 3377 struct rte_eth_xstat_name *xstats_names, 3378 unsigned int n) 3379 { 3380 struct ena_adapter *adapter = dev->data->dev_private; 3381 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3382 unsigned int stat, i, count = 0; 3383 3384 if (n < xstats_count || !xstats_names) 3385 return xstats_count; 3386 3387 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) 3388 strcpy(xstats_names[count].name, 3389 ena_stats_global_strings[stat].name); 3390 3391 for (stat = 0; stat < adapter->metrics_num; stat++, count++) 3392 rte_strscpy(xstats_names[count].name, 3393 ena_stats_metrics_strings[stat].name, 3394 RTE_ETH_XSTATS_NAME_SIZE); 3395 for (stat = 0; stat < ENA_STATS_ARRAY_ENA_SRD; stat++, count++) 3396 rte_strscpy(xstats_names[count].name, 3397 ena_stats_srd_strings[stat].name, 3398 RTE_ETH_XSTATS_NAME_SIZE); 3399 3400 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) 3401 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) 3402 snprintf(xstats_names[count].name, 3403 sizeof(xstats_names[count].name), 3404 "rx_q%d_%s", i, 3405 ena_stats_rx_strings[stat].name); 3406 3407 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) 3408 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) 3409 snprintf(xstats_names[count].name, 3410 sizeof(xstats_names[count].name), 3411 "tx_q%d_%s", i, 3412 ena_stats_tx_strings[stat].name); 3413 3414 return xstats_count; 3415 } 3416 3417 /** 3418 * DPDK callback to retrieve names of extended device statistics for the given 3419 * ids. 3420 * 3421 * @param dev 3422 * Pointer to Ethernet device structure. 3423 * @param[out] xstats_names 3424 * Buffer to insert names into. 3425 * @param ids 3426 * IDs array for which the names should be retrieved. 3427 * @param size 3428 * Number of ids. 3429 * 3430 * @return 3431 * Positive value: number of xstats names. Negative value: error code. 3432 */ 3433 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 3434 const uint64_t *ids, 3435 struct rte_eth_xstat_name *xstats_names, 3436 unsigned int size) 3437 { 3438 struct ena_adapter *adapter = dev->data->dev_private; 3439 uint64_t xstats_count = ena_xstats_calc_num(dev->data); 3440 uint64_t id, qid; 3441 unsigned int i; 3442 3443 if (xstats_names == NULL) 3444 return xstats_count; 3445 3446 for (i = 0; i < size; ++i) { 3447 id = ids[i]; 3448 if (id > xstats_count) { 3449 PMD_DRV_LOG_LINE(ERR, 3450 "ID value out of range: id=%" PRIu64 ", xstats_num=%" PRIu64, 3451 id, xstats_count); 3452 return -EINVAL; 3453 } 3454 3455 if (id < ENA_STATS_ARRAY_GLOBAL) { 3456 strcpy(xstats_names[i].name, 3457 ena_stats_global_strings[id].name); 3458 continue; 3459 } 3460 3461 id -= ENA_STATS_ARRAY_GLOBAL; 3462 if (id < adapter->metrics_num) { 3463 rte_strscpy(xstats_names[i].name, 3464 ena_stats_metrics_strings[id].name, 3465 RTE_ETH_XSTATS_NAME_SIZE); 3466 continue; 3467 } 3468 3469 id -= adapter->metrics_num; 3470 3471 if (id < ENA_STATS_ARRAY_ENA_SRD) { 3472 rte_strscpy(xstats_names[i].name, 3473 ena_stats_srd_strings[id].name, 3474 RTE_ETH_XSTATS_NAME_SIZE); 3475 continue; 3476 } 3477 id -= ENA_STATS_ARRAY_ENA_SRD; 3478 3479 if (id < ENA_STATS_ARRAY_RX) { 3480 qid = id / dev->data->nb_rx_queues; 3481 id %= dev->data->nb_rx_queues; 3482 snprintf(xstats_names[i].name, 3483 sizeof(xstats_names[i].name), 3484 "rx_q%" PRIu64 "d_%s", 3485 qid, ena_stats_rx_strings[id].name); 3486 continue; 3487 } 3488 3489 id -= ENA_STATS_ARRAY_RX; 3490 /* Although this condition is not needed, it was added for 3491 * compatibility if new xstat structure would be ever added. 3492 */ 3493 if (id < ENA_STATS_ARRAY_TX) { 3494 qid = id / dev->data->nb_tx_queues; 3495 id %= dev->data->nb_tx_queues; 3496 snprintf(xstats_names[i].name, 3497 sizeof(xstats_names[i].name), 3498 "tx_q%" PRIu64 "_%s", 3499 qid, ena_stats_tx_strings[id].name); 3500 continue; 3501 } 3502 } 3503 3504 return i; 3505 } 3506 3507 /** 3508 * DPDK callback to get extended device statistics. 3509 * 3510 * @param dev 3511 * Pointer to Ethernet device structure. 3512 * @param[out] stats 3513 * Stats table output buffer. 3514 * @param n 3515 * The size of the stats table. 3516 * 3517 * @return 3518 * Number of xstats on success, negative on failure. 3519 */ 3520 static int ena_xstats_get(struct rte_eth_dev *dev, 3521 struct rte_eth_xstat *xstats, 3522 unsigned int n) 3523 { 3524 struct ena_adapter *adapter = dev->data->dev_private; 3525 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3526 unsigned int stat, i, count = 0; 3527 int stat_offset; 3528 void *stats_begin; 3529 uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS]; 3530 struct ena_stats_srd srd_info = {0}; 3531 3532 if (n < xstats_count) 3533 return xstats_count; 3534 3535 if (!xstats) 3536 return 0; 3537 3538 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) { 3539 stat_offset = ena_stats_global_strings[stat].stat_offset; 3540 stats_begin = &adapter->dev_stats; 3541 3542 xstats[count].id = count; 3543 xstats[count].value = *((uint64_t *) 3544 ((char *)stats_begin + stat_offset)); 3545 } 3546 3547 ena_copy_customer_metrics(adapter, metrics_stats, adapter->metrics_num); 3548 stats_begin = metrics_stats; 3549 for (stat = 0; stat < adapter->metrics_num; stat++, count++) { 3550 stat_offset = ena_stats_metrics_strings[stat].stat_offset; 3551 3552 xstats[count].id = count; 3553 xstats[count].value = *((uint64_t *) 3554 ((char *)stats_begin + stat_offset)); 3555 } 3556 3557 ena_copy_ena_srd_info(adapter, &srd_info); 3558 stats_begin = &srd_info; 3559 for (stat = 0; stat < ENA_STATS_ARRAY_ENA_SRD; stat++, count++) { 3560 stat_offset = ena_stats_srd_strings[stat].stat_offset; 3561 xstats[count].id = count; 3562 xstats[count].value = *((uint64_t *) 3563 ((char *)stats_begin + stat_offset)); 3564 } 3565 3566 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) { 3567 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) { 3568 stat_offset = ena_stats_rx_strings[stat].stat_offset; 3569 stats_begin = &adapter->rx_ring[i].rx_stats; 3570 3571 xstats[count].id = count; 3572 xstats[count].value = *((uint64_t *) 3573 ((char *)stats_begin + stat_offset)); 3574 } 3575 } 3576 3577 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) { 3578 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) { 3579 stat_offset = ena_stats_tx_strings[stat].stat_offset; 3580 stats_begin = &adapter->tx_ring[i].rx_stats; 3581 3582 xstats[count].id = count; 3583 xstats[count].value = *((uint64_t *) 3584 ((char *)stats_begin + stat_offset)); 3585 } 3586 } 3587 3588 return count; 3589 } 3590 3591 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 3592 const uint64_t *ids, 3593 uint64_t *values, 3594 unsigned int n) 3595 { 3596 struct ena_adapter *adapter = dev->data->dev_private; 3597 uint64_t id; 3598 uint64_t rx_entries, tx_entries; 3599 unsigned int i; 3600 int qid; 3601 int valid = 0; 3602 bool were_metrics_copied = false; 3603 bool was_srd_info_copied = false; 3604 uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS]; 3605 struct ena_stats_srd srd_info = {0}; 3606 3607 for (i = 0; i < n; ++i) { 3608 id = ids[i]; 3609 /* Check if id belongs to global statistics */ 3610 if (id < ENA_STATS_ARRAY_GLOBAL) { 3611 values[i] = *((uint64_t *)&adapter->dev_stats + id); 3612 ++valid; 3613 continue; 3614 } 3615 3616 /* Check if id belongs to ENI statistics */ 3617 id -= ENA_STATS_ARRAY_GLOBAL; 3618 if (id < adapter->metrics_num) { 3619 /* Avoid reading metrics multiple times in a single 3620 * function call, as it requires communication with the 3621 * admin queue. 3622 */ 3623 if (!were_metrics_copied) { 3624 were_metrics_copied = true; 3625 ena_copy_customer_metrics(adapter, 3626 metrics_stats, 3627 adapter->metrics_num); 3628 } 3629 3630 values[i] = *((uint64_t *)&metrics_stats + id); 3631 ++valid; 3632 continue; 3633 } 3634 3635 /* Check if id belongs to SRD info statistics */ 3636 id -= adapter->metrics_num; 3637 3638 if (id < ENA_STATS_ARRAY_ENA_SRD) { 3639 /* 3640 * Avoid reading srd info multiple times in a single 3641 * function call, as it requires communication with the 3642 * admin queue. 3643 */ 3644 if (!was_srd_info_copied) { 3645 was_srd_info_copied = true; 3646 ena_copy_ena_srd_info(adapter, &srd_info); 3647 } 3648 values[i] = *((uint64_t *)&adapter->srd_stats + id); 3649 ++valid; 3650 continue; 3651 } 3652 3653 /* Check if id belongs to rx queue statistics */ 3654 id -= ENA_STATS_ARRAY_ENA_SRD; 3655 3656 rx_entries = ENA_STATS_ARRAY_RX * dev->data->nb_rx_queues; 3657 if (id < rx_entries) { 3658 qid = id % dev->data->nb_rx_queues; 3659 id /= dev->data->nb_rx_queues; 3660 values[i] = *((uint64_t *) 3661 &adapter->rx_ring[qid].rx_stats + id); 3662 ++valid; 3663 continue; 3664 } 3665 /* Check if id belongs to rx queue statistics */ 3666 id -= rx_entries; 3667 tx_entries = ENA_STATS_ARRAY_TX * dev->data->nb_tx_queues; 3668 if (id < tx_entries) { 3669 qid = id % dev->data->nb_tx_queues; 3670 id /= dev->data->nb_tx_queues; 3671 values[i] = *((uint64_t *) 3672 &adapter->tx_ring[qid].tx_stats + id); 3673 ++valid; 3674 continue; 3675 } 3676 } 3677 3678 return valid; 3679 } 3680 3681 static int ena_process_uint_devarg(const char *key, 3682 const char *value, 3683 void *opaque) 3684 { 3685 struct ena_adapter *adapter = opaque; 3686 char *str_end; 3687 uint64_t uint64_value; 3688 3689 uint64_value = strtoull(value, &str_end, DECIMAL_BASE); 3690 if (value == str_end) { 3691 PMD_INIT_LOG_LINE(ERR, 3692 "Invalid value for key '%s'. Only uint values are accepted.", 3693 key); 3694 return -EINVAL; 3695 } 3696 3697 if (strcmp(key, ENA_DEVARG_MISS_TXC_TO) == 0) { 3698 if (uint64_value > ENA_MAX_TX_TIMEOUT_SECONDS) { 3699 PMD_INIT_LOG_LINE(ERR, 3700 "Tx timeout too high: %" PRIu64 " sec. Maximum allowed: %d sec.", 3701 uint64_value, ENA_MAX_TX_TIMEOUT_SECONDS); 3702 return -EINVAL; 3703 } else if (uint64_value == 0) { 3704 PMD_INIT_LOG_LINE(INFO, 3705 "Check for missing Tx completions has been disabled."); 3706 adapter->missing_tx_completion_to = 3707 ENA_HW_HINTS_NO_TIMEOUT; 3708 } else { 3709 PMD_INIT_LOG_LINE(INFO, 3710 "Tx packet completion timeout set to %" PRIu64 " seconds.", 3711 uint64_value); 3712 adapter->missing_tx_completion_to = 3713 uint64_value * rte_get_timer_hz(); 3714 } 3715 } else if (strcmp(key, ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL) == 0) { 3716 if (uint64_value > ENA_MAX_CONTROL_PATH_POLL_INTERVAL_MSEC) { 3717 PMD_INIT_LOG_LINE(ERR, 3718 "Control path polling interval is too long: %" PRIu64 " msecs. " 3719 "Maximum allowed: %d msecs.", 3720 uint64_value, ENA_MAX_CONTROL_PATH_POLL_INTERVAL_MSEC); 3721 return -EINVAL; 3722 } else if (uint64_value == 0) { 3723 PMD_INIT_LOG_LINE(INFO, 3724 "Control path polling interval is set to zero. Operating in " 3725 "interrupt mode."); 3726 adapter->control_path_poll_interval = 0; 3727 } else { 3728 PMD_INIT_LOG_LINE(INFO, 3729 "Control path polling interval is set to %" PRIu64 " msecs.", 3730 uint64_value); 3731 adapter->control_path_poll_interval = uint64_value * USEC_PER_MSEC; 3732 } 3733 } 3734 3735 return 0; 3736 } 3737 3738 static int ena_process_llq_policy_devarg(const char *key, const char *value, void *opaque) 3739 { 3740 struct ena_adapter *adapter = opaque; 3741 uint32_t policy; 3742 3743 policy = strtoul(value, NULL, DECIMAL_BASE); 3744 if (policy < ENA_LLQ_POLICY_LAST) { 3745 adapter->llq_header_policy = policy; 3746 } else { 3747 PMD_INIT_LOG_LINE(ERR, 3748 "Invalid value: '%s' for key '%s'. valid [0-3]", 3749 value, key); 3750 return -EINVAL; 3751 } 3752 PMD_INIT_LOG_LINE(INFO, 3753 "LLQ policy is %u [0 - disabled, 1 - device recommended, 2 - normal, 3 - large]", 3754 adapter->llq_header_policy); 3755 return 0; 3756 } 3757 3758 static int ena_parse_devargs(struct ena_adapter *adapter, struct rte_devargs *devargs) 3759 { 3760 static const char * const allowed_args[] = { 3761 ENA_DEVARG_LLQ_POLICY, 3762 ENA_DEVARG_MISS_TXC_TO, 3763 ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL, 3764 NULL, 3765 }; 3766 struct rte_kvargs *kvlist; 3767 int rc; 3768 3769 if (devargs == NULL) 3770 return 0; 3771 3772 kvlist = rte_kvargs_parse(devargs->args, allowed_args); 3773 if (kvlist == NULL) { 3774 PMD_INIT_LOG_LINE(ERR, "Invalid device arguments: %s", 3775 devargs->args); 3776 return -EINVAL; 3777 } 3778 rc = rte_kvargs_process(kvlist, ENA_DEVARG_LLQ_POLICY, 3779 ena_process_llq_policy_devarg, adapter); 3780 if (rc != 0) 3781 goto exit; 3782 rc = rte_kvargs_process(kvlist, ENA_DEVARG_MISS_TXC_TO, 3783 ena_process_uint_devarg, adapter); 3784 if (rc != 0) 3785 goto exit; 3786 rc = rte_kvargs_process(kvlist, ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL, 3787 ena_process_uint_devarg, adapter); 3788 if (rc != 0) 3789 goto exit; 3790 3791 exit: 3792 rte_kvargs_free(kvlist); 3793 3794 return rc; 3795 } 3796 3797 static int ena_setup_rx_intr(struct rte_eth_dev *dev) 3798 { 3799 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 3800 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 3801 int rc; 3802 uint16_t vectors_nb, i; 3803 bool rx_intr_requested = dev->data->dev_conf.intr_conf.rxq; 3804 3805 if (!rx_intr_requested) 3806 return 0; 3807 3808 if (!rte_intr_cap_multiple(intr_handle)) { 3809 PMD_DRV_LOG_LINE(ERR, 3810 "Rx interrupt requested, but it isn't supported by the PCI driver"); 3811 return -ENOTSUP; 3812 } 3813 3814 /* Disable interrupt mapping before the configuration starts. */ 3815 rte_intr_disable(intr_handle); 3816 3817 /* Verify if there are enough vectors available. */ 3818 vectors_nb = dev->data->nb_rx_queues; 3819 if (vectors_nb > RTE_MAX_RXTX_INTR_VEC_ID) { 3820 PMD_DRV_LOG_LINE(ERR, 3821 "Too many Rx interrupts requested, maximum number: %d", 3822 RTE_MAX_RXTX_INTR_VEC_ID); 3823 rc = -ENOTSUP; 3824 goto enable_intr; 3825 } 3826 3827 /* Allocate the vector list */ 3828 if (rte_intr_vec_list_alloc(intr_handle, "intr_vec", 3829 dev->data->nb_rx_queues)) { 3830 PMD_DRV_LOG_LINE(ERR, 3831 "Failed to allocate interrupt vector for %d queues", 3832 dev->data->nb_rx_queues); 3833 rc = -ENOMEM; 3834 goto enable_intr; 3835 } 3836 3837 rc = rte_intr_efd_enable(intr_handle, vectors_nb); 3838 if (rc != 0) 3839 goto free_intr_vec; 3840 3841 if (!rte_intr_allow_others(intr_handle)) { 3842 PMD_DRV_LOG_LINE(ERR, 3843 "Not enough interrupts available to use both ENA Admin and Rx interrupts"); 3844 goto disable_intr_efd; 3845 } 3846 3847 for (i = 0; i < vectors_nb; ++i) 3848 if (rte_intr_vec_list_index_set(intr_handle, i, 3849 RTE_INTR_VEC_RXTX_OFFSET + i)) 3850 goto disable_intr_efd; 3851 3852 rte_intr_enable(intr_handle); 3853 return 0; 3854 3855 disable_intr_efd: 3856 rte_intr_efd_disable(intr_handle); 3857 free_intr_vec: 3858 rte_intr_vec_list_free(intr_handle); 3859 enable_intr: 3860 rte_intr_enable(intr_handle); 3861 return rc; 3862 } 3863 3864 static void ena_rx_queue_intr_set(struct rte_eth_dev *dev, 3865 uint16_t queue_id, 3866 bool unmask) 3867 { 3868 struct ena_adapter *adapter = dev->data->dev_private; 3869 struct ena_ring *rxq = &adapter->rx_ring[queue_id]; 3870 struct ena_eth_io_intr_reg intr_reg; 3871 3872 ena_com_update_intr_reg(&intr_reg, 0, 0, unmask, 1); 3873 ena_com_unmask_intr(rxq->ena_com_io_cq, &intr_reg); 3874 } 3875 3876 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 3877 uint16_t queue_id) 3878 { 3879 ena_rx_queue_intr_set(dev, queue_id, true); 3880 3881 return 0; 3882 } 3883 3884 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 3885 uint16_t queue_id) 3886 { 3887 ena_rx_queue_intr_set(dev, queue_id, false); 3888 3889 return 0; 3890 } 3891 3892 static int ena_configure_aenq(struct ena_adapter *adapter) 3893 { 3894 uint32_t aenq_groups = adapter->all_aenq_groups; 3895 int rc; 3896 3897 /* All_aenq_groups holds all AENQ functions supported by the device and 3898 * the HW, so at first we need to be sure the LSC request is valid. 3899 */ 3900 if (adapter->edev_data->dev_conf.intr_conf.lsc != 0) { 3901 if (!(aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) { 3902 PMD_DRV_LOG_LINE(ERR, 3903 "LSC requested, but it's not supported by the AENQ"); 3904 return -EINVAL; 3905 } 3906 } else { 3907 /* If LSC wasn't enabled by the app, let's enable all supported 3908 * AENQ procedures except the LSC. 3909 */ 3910 aenq_groups &= ~BIT(ENA_ADMIN_LINK_CHANGE); 3911 } 3912 3913 rc = ena_com_set_aenq_config(&adapter->ena_dev, aenq_groups); 3914 if (rc != 0) { 3915 PMD_DRV_LOG_LINE(ERR, "Cannot configure AENQ groups, rc=%d", rc); 3916 return rc; 3917 } 3918 3919 adapter->active_aenq_groups = aenq_groups; 3920 3921 return 0; 3922 } 3923 3924 int ena_mp_indirect_table_set(struct ena_adapter *adapter) 3925 { 3926 return ENA_PROXY(adapter, ena_com_indirect_table_set, &adapter->ena_dev); 3927 } 3928 3929 int ena_mp_indirect_table_get(struct ena_adapter *adapter, 3930 uint32_t *indirect_table) 3931 { 3932 return ENA_PROXY(adapter, ena_com_indirect_table_get, &adapter->ena_dev, 3933 indirect_table); 3934 } 3935 3936 /********************************************************************* 3937 * ena_plat_dpdk.h functions implementations 3938 *********************************************************************/ 3939 3940 const struct rte_memzone * 3941 ena_mem_alloc_coherent(struct rte_eth_dev_data *data, size_t size, 3942 int socket_id, unsigned int alignment, void **virt_addr, 3943 dma_addr_t *phys_addr) 3944 { 3945 char z_name[RTE_MEMZONE_NAMESIZE]; 3946 struct ena_adapter *adapter = data->dev_private; 3947 const struct rte_memzone *memzone; 3948 int rc; 3949 3950 rc = snprintf(z_name, RTE_MEMZONE_NAMESIZE, "ena_p%d_mz%" PRIu64 "", 3951 data->port_id, adapter->memzone_cnt); 3952 if (rc >= RTE_MEMZONE_NAMESIZE) { 3953 PMD_DRV_LOG_LINE(ERR, 3954 "Name for the ena_com memzone is too long. Port: %d, mz_num: %" PRIu64, 3955 data->port_id, adapter->memzone_cnt); 3956 goto error; 3957 } 3958 adapter->memzone_cnt++; 3959 3960 memzone = rte_memzone_reserve_aligned(z_name, size, socket_id, 3961 RTE_MEMZONE_IOVA_CONTIG, alignment); 3962 if (memzone == NULL) { 3963 PMD_DRV_LOG_LINE(ERR, "Failed to allocate ena_com memzone: %s", 3964 z_name); 3965 goto error; 3966 } 3967 3968 memset(memzone->addr, 0, size); 3969 *virt_addr = memzone->addr; 3970 *phys_addr = memzone->iova; 3971 3972 return memzone; 3973 3974 error: 3975 *virt_addr = NULL; 3976 *phys_addr = 0; 3977 3978 return NULL; 3979 } 3980 3981 3982 /********************************************************************* 3983 * PMD configuration 3984 *********************************************************************/ 3985 static int eth_ena_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 3986 struct rte_pci_device *pci_dev) 3987 { 3988 return rte_eth_dev_pci_generic_probe(pci_dev, 3989 sizeof(struct ena_adapter), eth_ena_dev_init); 3990 } 3991 3992 static int eth_ena_pci_remove(struct rte_pci_device *pci_dev) 3993 { 3994 return rte_eth_dev_pci_generic_remove(pci_dev, eth_ena_dev_uninit); 3995 } 3996 3997 static struct rte_pci_driver rte_ena_pmd = { 3998 .id_table = pci_id_ena_map, 3999 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | 4000 RTE_PCI_DRV_WC_ACTIVATE, 4001 .probe = eth_ena_pci_probe, 4002 .remove = eth_ena_pci_remove, 4003 }; 4004 4005 RTE_PMD_REGISTER_PCI(net_ena, rte_ena_pmd); 4006 RTE_PMD_REGISTER_PCI_TABLE(net_ena, pci_id_ena_map); 4007 RTE_PMD_REGISTER_KMOD_DEP(net_ena, "* igb_uio | uio_pci_generic | vfio-pci"); 4008 RTE_PMD_REGISTER_PARAM_STRING(net_ena, 4009 ENA_DEVARG_LLQ_POLICY "=<0|1|2|3> " 4010 ENA_DEVARG_MISS_TXC_TO "=<uint>" 4011 ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL "=<0-1000>"); 4012 RTE_LOG_REGISTER_SUFFIX(ena_logtype_init, init, NOTICE); 4013 RTE_LOG_REGISTER_SUFFIX(ena_logtype_driver, driver, NOTICE); 4014 #ifdef RTE_ETHDEV_DEBUG_RX 4015 RTE_LOG_REGISTER_SUFFIX(ena_logtype_rx, rx, DEBUG); 4016 #endif 4017 #ifdef RTE_ETHDEV_DEBUG_TX 4018 RTE_LOG_REGISTER_SUFFIX(ena_logtype_tx, tx, DEBUG); 4019 #endif 4020 RTE_LOG_REGISTER_SUFFIX(ena_logtype_com, com, WARNING); 4021 4022 /****************************************************************************** 4023 ******************************** AENQ Handlers ******************************* 4024 *****************************************************************************/ 4025 static void ena_update_on_link_change(void *adapter_data, 4026 struct ena_admin_aenq_entry *aenq_e) 4027 { 4028 struct rte_eth_dev *eth_dev = adapter_data; 4029 struct ena_adapter *adapter = eth_dev->data->dev_private; 4030 struct ena_admin_aenq_link_change_desc *aenq_link_desc; 4031 uint32_t status; 4032 4033 aenq_link_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 4034 4035 status = get_ena_admin_aenq_link_change_desc_link_status(aenq_link_desc); 4036 adapter->link_status = status; 4037 4038 ena_link_update(eth_dev, 0); 4039 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 4040 } 4041 4042 static void ena_notification(void *adapter_data, 4043 struct ena_admin_aenq_entry *aenq_e) 4044 { 4045 struct rte_eth_dev *eth_dev = adapter_data; 4046 struct ena_adapter *adapter = eth_dev->data->dev_private; 4047 struct ena_admin_ena_hw_hints *hints; 4048 4049 if (aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION) 4050 PMD_DRV_LOG_LINE(WARNING, "Invalid AENQ group: %x. Expected: %x", 4051 aenq_e->aenq_common_desc.group, 4052 ENA_ADMIN_NOTIFICATION); 4053 4054 switch (aenq_e->aenq_common_desc.syndrome) { 4055 case ENA_ADMIN_UPDATE_HINTS: 4056 hints = (struct ena_admin_ena_hw_hints *) 4057 (&aenq_e->inline_data_w4); 4058 ena_update_hints(adapter, hints); 4059 break; 4060 default: 4061 PMD_DRV_LOG_LINE(ERR, "Invalid AENQ notification link state: %d", 4062 aenq_e->aenq_common_desc.syndrome); 4063 } 4064 } 4065 4066 static void ena_keep_alive(void *adapter_data, 4067 __rte_unused struct ena_admin_aenq_entry *aenq_e) 4068 { 4069 struct rte_eth_dev *eth_dev = adapter_data; 4070 struct ena_adapter *adapter = eth_dev->data->dev_private; 4071 struct ena_admin_aenq_keep_alive_desc *desc; 4072 uint64_t rx_drops; 4073 uint64_t tx_drops; 4074 uint64_t rx_overruns; 4075 4076 adapter->timestamp_wd = rte_get_timer_cycles(); 4077 4078 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 4079 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 4080 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 4081 rx_overruns = ((uint64_t)desc->rx_overruns_high << 32) | desc->rx_overruns_low; 4082 4083 /* 4084 * Depending on its acceleration support, the device updates a different statistic when 4085 * Rx packet is dropped because there are no available buffers to accommodate it. 4086 */ 4087 adapter->drv_stats->rx_drops = rx_drops + rx_overruns; 4088 adapter->dev_stats.tx_drops = tx_drops; 4089 } 4090 4091 static void ena_suboptimal_configuration(__rte_unused void *adapter_data, 4092 struct ena_admin_aenq_entry *aenq_e) 4093 { 4094 struct ena_admin_aenq_conf_notifications_desc *desc; 4095 int bit, num_bits; 4096 4097 desc = (struct ena_admin_aenq_conf_notifications_desc *)aenq_e; 4098 num_bits = BITS_PER_TYPE(desc->notifications_bitmap); 4099 for (bit = 0; bit < num_bits; bit++) { 4100 if (desc->notifications_bitmap & RTE_BIT64(bit)) { 4101 PMD_DRV_LOG_LINE(WARNING, 4102 "Sub-optimal configuration notification code: %d", bit + 1); 4103 } 4104 } 4105 } 4106 4107 /** 4108 * This handler will called for unknown event group or unimplemented handlers 4109 **/ 4110 static void unimplemented_aenq_handler(__rte_unused void *data, 4111 __rte_unused struct ena_admin_aenq_entry *aenq_e) 4112 { 4113 PMD_DRV_LOG_LINE(ERR, 4114 "Unknown event was received or event with unimplemented handler"); 4115 } 4116 4117 static struct ena_aenq_handlers aenq_handlers = { 4118 .handlers = { 4119 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 4120 [ENA_ADMIN_NOTIFICATION] = ena_notification, 4121 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive, 4122 [ENA_ADMIN_CONF_NOTIFICATIONS] = ena_suboptimal_configuration 4123 }, 4124 .unimplemented_handler = unimplemented_aenq_handler 4125 }; 4126 4127 /********************************************************************* 4128 * Multi-Process communication request handling (in primary) 4129 *********************************************************************/ 4130 static int 4131 ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer) 4132 { 4133 const struct ena_mp_body *req = 4134 (const struct ena_mp_body *)mp_msg->param; 4135 struct ena_adapter *adapter; 4136 struct ena_com_dev *ena_dev; 4137 struct ena_mp_body *rsp; 4138 struct rte_mp_msg mp_rsp; 4139 struct rte_eth_dev *dev; 4140 int res = 0; 4141 4142 rsp = (struct ena_mp_body *)&mp_rsp.param; 4143 mp_msg_init(&mp_rsp, req->type, req->port_id); 4144 4145 if (!rte_eth_dev_is_valid_port(req->port_id)) { 4146 rte_errno = ENODEV; 4147 res = -rte_errno; 4148 PMD_DRV_LOG_LINE(ERR, "Unknown port %d in request %d", 4149 req->port_id, req->type); 4150 goto end; 4151 } 4152 dev = &rte_eth_devices[req->port_id]; 4153 adapter = dev->data->dev_private; 4154 ena_dev = &adapter->ena_dev; 4155 4156 switch (req->type) { 4157 case ENA_MP_DEV_STATS_GET: 4158 res = ena_com_get_dev_basic_stats(ena_dev, 4159 &adapter->basic_stats); 4160 break; 4161 case ENA_MP_ENI_STATS_GET: 4162 res = ena_com_get_eni_stats(ena_dev, 4163 (struct ena_admin_eni_stats *)&adapter->metrics_stats); 4164 break; 4165 case ENA_MP_MTU_SET: 4166 res = ena_com_set_dev_mtu(ena_dev, req->args.mtu); 4167 break; 4168 case ENA_MP_IND_TBL_GET: 4169 res = ena_com_indirect_table_get(ena_dev, 4170 adapter->indirect_table); 4171 break; 4172 case ENA_MP_IND_TBL_SET: 4173 res = ena_com_indirect_table_set(ena_dev); 4174 break; 4175 case ENA_MP_CUSTOMER_METRICS_GET: 4176 res = ena_com_get_customer_metrics(ena_dev, 4177 (char *)adapter->metrics_stats, 4178 adapter->metrics_num * sizeof(uint64_t)); 4179 break; 4180 case ENA_MP_SRD_STATS_GET: 4181 res = ena_com_get_ena_srd_info(ena_dev, 4182 (struct ena_admin_ena_srd_info *)&adapter->srd_stats); 4183 break; 4184 default: 4185 PMD_DRV_LOG_LINE(ERR, "Unknown request type %d", req->type); 4186 res = -EINVAL; 4187 break; 4188 } 4189 4190 end: 4191 /* Save processing result in the reply */ 4192 rsp->result = res; 4193 /* Return just IPC processing status */ 4194 return rte_mp_reply(&mp_rsp, peer); 4195 } 4196 4197 static bool ena_use_large_llq_hdr(struct ena_adapter *adapter, uint8_t recommended_entry_size) 4198 { 4199 if (adapter->llq_header_policy == ENA_LLQ_POLICY_LARGE) { 4200 return true; 4201 } else if (adapter->llq_header_policy == ENA_LLQ_POLICY_RECOMMENDED) { 4202 PMD_DRV_LOG_LINE(INFO, "Recommended device entry size policy %u", 4203 recommended_entry_size); 4204 if (recommended_entry_size == ENA_ADMIN_LIST_ENTRY_SIZE_256B) 4205 return true; 4206 } 4207 return false; 4208 } 4209