1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 3 * All rights reserved. 4 */ 5 6 #include <rte_string_fns.h> 7 #include <rte_errno.h> 8 #include <rte_version.h> 9 #include <rte_net.h> 10 #include <rte_kvargs.h> 11 12 #include "ena_ethdev.h" 13 #include "ena_logs.h" 14 #include "ena_platform.h" 15 #include "ena_com.h" 16 #include "ena_eth_com.h" 17 18 #include <ena_common_defs.h> 19 #include <ena_regs_defs.h> 20 #include <ena_admin_defs.h> 21 #include <ena_eth_io_defs.h> 22 23 #define DRV_MODULE_VER_MAJOR 2 24 #define DRV_MODULE_VER_MINOR 8 25 #define DRV_MODULE_VER_SUBMINOR 0 26 27 #define __MERGE_64B_H_L(h, l) (((uint64_t)h << 32) | l) 28 29 #define GET_L4_HDR_LEN(mbuf) \ 30 ((rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, \ 31 mbuf->l3_len + mbuf->l2_len)->data_off) >> 4) 32 33 #define ETH_GSTRING_LEN 32 34 35 #define ARRAY_SIZE(x) RTE_DIM(x) 36 37 #define ENA_MIN_RING_DESC 128 38 39 #define BITS_PER_BYTE 8 40 41 #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) 42 43 #define DECIMAL_BASE 10 44 45 #define MAX_WIDE_LLQ_DEPTH_UNSUPPORTED 0 46 47 /* 48 * We should try to keep ENA_CLEANUP_BUF_SIZE lower than 49 * RTE_MEMPOOL_CACHE_MAX_SIZE, so we can fit this in mempool local cache. 50 */ 51 #define ENA_CLEANUP_BUF_SIZE 256 52 53 #define ENA_PTYPE_HAS_HASH (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP) 54 55 struct ena_stats { 56 char name[ETH_GSTRING_LEN]; 57 int stat_offset; 58 }; 59 60 #define ENA_STAT_ENTRY(stat, stat_type) { \ 61 .name = #stat, \ 62 .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ 63 } 64 65 #define ENA_STAT_RX_ENTRY(stat) \ 66 ENA_STAT_ENTRY(stat, rx) 67 68 #define ENA_STAT_TX_ENTRY(stat) \ 69 ENA_STAT_ENTRY(stat, tx) 70 71 #define ENA_STAT_METRICS_ENTRY(stat) \ 72 ENA_STAT_ENTRY(stat, metrics) 73 74 #define ENA_STAT_GLOBAL_ENTRY(stat) \ 75 ENA_STAT_ENTRY(stat, dev) 76 77 #define ENA_STAT_ENA_SRD_ENTRY(stat) \ 78 ENA_STAT_ENTRY(stat, srd) 79 80 /* Device arguments */ 81 #define ENA_DEVARG_LARGE_LLQ_HDR "large_llq_hdr" 82 #define ENA_DEVARG_NORMAL_LLQ_HDR "normal_llq_hdr" 83 /* Timeout in seconds after which a single uncompleted Tx packet should be 84 * considered as a missing. 85 */ 86 #define ENA_DEVARG_MISS_TXC_TO "miss_txc_to" 87 /* 88 * Controls whether LLQ should be used (if available). Enabled by default. 89 * NOTE: It's highly not recommended to disable the LLQ, as it may lead to a 90 * huge performance degradation on 6th generation AWS instances. 91 */ 92 #define ENA_DEVARG_ENABLE_LLQ "enable_llq" 93 94 /* 95 * Each rte_memzone should have unique name. 96 * To satisfy it, count number of allocation and add it to name. 97 */ 98 rte_atomic64_t ena_alloc_cnt; 99 100 static const struct ena_stats ena_stats_global_strings[] = { 101 ENA_STAT_GLOBAL_ENTRY(wd_expired), 102 ENA_STAT_GLOBAL_ENTRY(dev_start), 103 ENA_STAT_GLOBAL_ENTRY(dev_stop), 104 ENA_STAT_GLOBAL_ENTRY(tx_drops), 105 }; 106 107 /* 108 * The legacy metrics (also known as eni stats) consisted of 5 stats, while the reworked 109 * metrics (also known as customer metrics) support an additional stat. 110 */ 111 static struct ena_stats ena_stats_metrics_strings[] = { 112 ENA_STAT_METRICS_ENTRY(bw_in_allowance_exceeded), 113 ENA_STAT_METRICS_ENTRY(bw_out_allowance_exceeded), 114 ENA_STAT_METRICS_ENTRY(pps_allowance_exceeded), 115 ENA_STAT_METRICS_ENTRY(conntrack_allowance_exceeded), 116 ENA_STAT_METRICS_ENTRY(linklocal_allowance_exceeded), 117 ENA_STAT_METRICS_ENTRY(conntrack_allowance_available), 118 }; 119 120 static const struct ena_stats ena_stats_srd_strings[] = { 121 ENA_STAT_ENA_SRD_ENTRY(ena_srd_mode), 122 ENA_STAT_ENA_SRD_ENTRY(ena_srd_tx_pkts), 123 ENA_STAT_ENA_SRD_ENTRY(ena_srd_eligible_tx_pkts), 124 ENA_STAT_ENA_SRD_ENTRY(ena_srd_rx_pkts), 125 ENA_STAT_ENA_SRD_ENTRY(ena_srd_resource_utilization), 126 }; 127 128 static const struct ena_stats ena_stats_tx_strings[] = { 129 ENA_STAT_TX_ENTRY(cnt), 130 ENA_STAT_TX_ENTRY(bytes), 131 ENA_STAT_TX_ENTRY(prepare_ctx_err), 132 ENA_STAT_TX_ENTRY(tx_poll), 133 ENA_STAT_TX_ENTRY(doorbells), 134 ENA_STAT_TX_ENTRY(bad_req_id), 135 ENA_STAT_TX_ENTRY(available_desc), 136 ENA_STAT_TX_ENTRY(missed_tx), 137 }; 138 139 static const struct ena_stats ena_stats_rx_strings[] = { 140 ENA_STAT_RX_ENTRY(cnt), 141 ENA_STAT_RX_ENTRY(bytes), 142 ENA_STAT_RX_ENTRY(refill_partial), 143 ENA_STAT_RX_ENTRY(l3_csum_bad), 144 ENA_STAT_RX_ENTRY(l4_csum_bad), 145 ENA_STAT_RX_ENTRY(l4_csum_good), 146 ENA_STAT_RX_ENTRY(mbuf_alloc_fail), 147 ENA_STAT_RX_ENTRY(bad_desc_num), 148 ENA_STAT_RX_ENTRY(bad_req_id), 149 }; 150 151 #define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) 152 #define ENA_STATS_ARRAY_METRICS ARRAY_SIZE(ena_stats_metrics_strings) 153 #define ENA_STATS_ARRAY_METRICS_LEGACY (ENA_STATS_ARRAY_METRICS - 1) 154 #define ENA_STATS_ARRAY_ENA_SRD ARRAY_SIZE(ena_stats_srd_strings) 155 #define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) 156 #define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) 157 158 #define QUEUE_OFFLOADS (RTE_ETH_TX_OFFLOAD_TCP_CKSUM |\ 159 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |\ 160 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |\ 161 RTE_ETH_TX_OFFLOAD_TCP_TSO) 162 #define MBUF_OFFLOADS (RTE_MBUF_F_TX_L4_MASK |\ 163 RTE_MBUF_F_TX_IP_CKSUM |\ 164 RTE_MBUF_F_TX_TCP_SEG) 165 166 /** Vendor ID used by Amazon devices */ 167 #define PCI_VENDOR_ID_AMAZON 0x1D0F 168 /** Amazon devices */ 169 #define PCI_DEVICE_ID_ENA_VF 0xEC20 170 #define PCI_DEVICE_ID_ENA_VF_RSERV0 0xEC21 171 172 #define ENA_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_L4_MASK | \ 173 RTE_MBUF_F_TX_IPV6 | \ 174 RTE_MBUF_F_TX_IPV4 | \ 175 RTE_MBUF_F_TX_IP_CKSUM | \ 176 RTE_MBUF_F_TX_TCP_SEG) 177 178 #define ENA_TX_OFFLOAD_NOTSUP_MASK \ 179 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) 180 181 /** HW specific offloads capabilities. */ 182 /* IPv4 checksum offload. */ 183 #define ENA_L3_IPV4_CSUM 0x0001 184 /* TCP/UDP checksum offload for IPv4 packets. */ 185 #define ENA_L4_IPV4_CSUM 0x0002 186 /* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */ 187 #define ENA_L4_IPV4_CSUM_PARTIAL 0x0004 188 /* TCP/UDP checksum offload for IPv6 packets. */ 189 #define ENA_L4_IPV6_CSUM 0x0008 190 /* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */ 191 #define ENA_L4_IPV6_CSUM_PARTIAL 0x0010 192 /* TSO support for IPv4 packets. */ 193 #define ENA_IPV4_TSO 0x0020 194 195 /* Device supports setting RSS hash. */ 196 #define ENA_RX_RSS_HASH 0x0040 197 198 static const struct rte_pci_id pci_id_ena_map[] = { 199 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, 200 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) }, 201 { .device_id = 0 }, 202 }; 203 204 static struct ena_aenq_handlers aenq_handlers; 205 206 static int ena_device_init(struct ena_adapter *adapter, 207 struct rte_pci_device *pdev, 208 struct ena_com_dev_get_features_ctx *get_feat_ctx); 209 static int ena_dev_configure(struct rte_eth_dev *dev); 210 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 211 struct ena_tx_buffer *tx_info, 212 struct rte_mbuf *mbuf, 213 void **push_header, 214 uint16_t *header_len); 215 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf); 216 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt); 217 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 218 uint16_t nb_pkts); 219 static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 220 uint16_t nb_pkts); 221 static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 222 uint16_t nb_desc, unsigned int socket_id, 223 const struct rte_eth_txconf *tx_conf); 224 static int ena_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 225 uint16_t nb_desc, unsigned int socket_id, 226 const struct rte_eth_rxconf *rx_conf, 227 struct rte_mempool *mp); 228 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len); 229 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 230 struct ena_com_rx_buf_info *ena_bufs, 231 uint32_t descs, 232 uint16_t *next_to_clean, 233 uint8_t offset); 234 static uint16_t eth_ena_recv_pkts(void *rx_queue, 235 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 236 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 237 struct rte_mbuf *mbuf, uint16_t id); 238 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count); 239 static void ena_init_rings(struct ena_adapter *adapter, 240 bool disable_meta_caching); 241 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); 242 static int ena_start(struct rte_eth_dev *dev); 243 static int ena_stop(struct rte_eth_dev *dev); 244 static int ena_close(struct rte_eth_dev *dev); 245 static int ena_dev_reset(struct rte_eth_dev *dev); 246 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); 247 static void ena_rx_queue_release_all(struct rte_eth_dev *dev); 248 static void ena_tx_queue_release_all(struct rte_eth_dev *dev); 249 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 250 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 251 static void ena_rx_queue_release_bufs(struct ena_ring *ring); 252 static void ena_tx_queue_release_bufs(struct ena_ring *ring); 253 static int ena_link_update(struct rte_eth_dev *dev, 254 int wait_to_complete); 255 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring); 256 static void ena_queue_stop(struct ena_ring *ring); 257 static void ena_queue_stop_all(struct rte_eth_dev *dev, 258 enum ena_ring_type ring_type); 259 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring); 260 static int ena_queue_start_all(struct rte_eth_dev *dev, 261 enum ena_ring_type ring_type); 262 static void ena_stats_restart(struct rte_eth_dev *dev); 263 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter); 264 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter); 265 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter); 266 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter); 267 static int ena_infos_get(struct rte_eth_dev *dev, 268 struct rte_eth_dev_info *dev_info); 269 static void ena_interrupt_handler_rte(void *cb_arg); 270 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg); 271 static void ena_destroy_device(struct rte_eth_dev *eth_dev); 272 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev); 273 static int ena_xstats_get_names(struct rte_eth_dev *dev, 274 struct rte_eth_xstat_name *xstats_names, 275 unsigned int n); 276 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 277 const uint64_t *ids, 278 struct rte_eth_xstat_name *xstats_names, 279 unsigned int size); 280 static int ena_xstats_get(struct rte_eth_dev *dev, 281 struct rte_eth_xstat *stats, 282 unsigned int n); 283 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 284 const uint64_t *ids, 285 uint64_t *values, 286 unsigned int n); 287 static int ena_process_bool_devarg(const char *key, 288 const char *value, 289 void *opaque); 290 static int ena_parse_devargs(struct ena_adapter *adapter, 291 struct rte_devargs *devargs); 292 static void ena_copy_customer_metrics(struct ena_adapter *adapter, 293 uint64_t *buf, 294 size_t buf_size); 295 static void ena_copy_ena_srd_info(struct ena_adapter *adapter, 296 struct ena_stats_srd *srd_info); 297 static int ena_setup_rx_intr(struct rte_eth_dev *dev); 298 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 299 uint16_t queue_id); 300 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 301 uint16_t queue_id); 302 static int ena_configure_aenq(struct ena_adapter *adapter); 303 static int ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, 304 const void *peer); 305 static ena_llq_policy ena_define_llq_hdr_policy(struct ena_adapter *adapter); 306 static bool ena_use_large_llq_hdr(struct ena_adapter *adapter, uint8_t recommended_entry_size); 307 308 static const struct eth_dev_ops ena_dev_ops = { 309 .dev_configure = ena_dev_configure, 310 .dev_infos_get = ena_infos_get, 311 .rx_queue_setup = ena_rx_queue_setup, 312 .tx_queue_setup = ena_tx_queue_setup, 313 .dev_start = ena_start, 314 .dev_stop = ena_stop, 315 .link_update = ena_link_update, 316 .stats_get = ena_stats_get, 317 .xstats_get_names = ena_xstats_get_names, 318 .xstats_get_names_by_id = ena_xstats_get_names_by_id, 319 .xstats_get = ena_xstats_get, 320 .xstats_get_by_id = ena_xstats_get_by_id, 321 .mtu_set = ena_mtu_set, 322 .rx_queue_release = ena_rx_queue_release, 323 .tx_queue_release = ena_tx_queue_release, 324 .dev_close = ena_close, 325 .dev_reset = ena_dev_reset, 326 .reta_update = ena_rss_reta_update, 327 .reta_query = ena_rss_reta_query, 328 .rx_queue_intr_enable = ena_rx_queue_intr_enable, 329 .rx_queue_intr_disable = ena_rx_queue_intr_disable, 330 .rss_hash_update = ena_rss_hash_update, 331 .rss_hash_conf_get = ena_rss_hash_conf_get, 332 .tx_done_cleanup = ena_tx_cleanup, 333 }; 334 335 /********************************************************************* 336 * Multi-Process communication bits 337 *********************************************************************/ 338 /* rte_mp IPC message name */ 339 #define ENA_MP_NAME "net_ena_mp" 340 /* Request timeout in seconds */ 341 #define ENA_MP_REQ_TMO 5 342 343 /** Proxy request type */ 344 enum ena_mp_req { 345 ENA_MP_DEV_STATS_GET, 346 ENA_MP_ENI_STATS_GET, 347 ENA_MP_MTU_SET, 348 ENA_MP_IND_TBL_GET, 349 ENA_MP_IND_TBL_SET, 350 ENA_MP_CUSTOMER_METRICS_GET, 351 ENA_MP_SRD_STATS_GET, 352 }; 353 354 /** Proxy message body. Shared between requests and responses. */ 355 struct ena_mp_body { 356 /* Message type */ 357 enum ena_mp_req type; 358 int port_id; 359 /* Processing result. Set in replies. 0 if message succeeded, negative 360 * error code otherwise. 361 */ 362 int result; 363 union { 364 int mtu; /* For ENA_MP_MTU_SET */ 365 } args; 366 }; 367 368 /** 369 * Initialize IPC message. 370 * 371 * @param[out] msg 372 * Pointer to the message to initialize. 373 * @param[in] type 374 * Message type. 375 * @param[in] port_id 376 * Port ID of target device. 377 * 378 */ 379 static void 380 mp_msg_init(struct rte_mp_msg *msg, enum ena_mp_req type, int port_id) 381 { 382 struct ena_mp_body *body = (struct ena_mp_body *)&msg->param; 383 384 memset(msg, 0, sizeof(*msg)); 385 strlcpy(msg->name, ENA_MP_NAME, sizeof(msg->name)); 386 msg->len_param = sizeof(*body); 387 body->type = type; 388 body->port_id = port_id; 389 } 390 391 /********************************************************************* 392 * Multi-Process communication PMD API 393 *********************************************************************/ 394 /** 395 * Define proxy request descriptor 396 * 397 * Used to define all structures and functions required for proxying a given 398 * function to the primary process including the code to perform to prepare the 399 * request and process the response. 400 * 401 * @param[in] f 402 * Name of the function to proxy 403 * @param[in] t 404 * Message type to use 405 * @param[in] prep 406 * Body of a function to prepare the request in form of a statement 407 * expression. It is passed all the original function arguments along with two 408 * extra ones: 409 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 410 * - struct ena_mp_body *req - body of a request to prepare. 411 * @param[in] proc 412 * Body of a function to process the response in form of a statement 413 * expression. It is passed all the original function arguments along with two 414 * extra ones: 415 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 416 * - struct ena_mp_body *rsp - body of a response to process. 417 * @param ... 418 * Proxied function's arguments 419 * 420 * @note Inside prep and proc any parameters which aren't used should be marked 421 * as such (with ENA_TOUCH or __rte_unused). 422 */ 423 #define ENA_PROXY_DESC(f, t, prep, proc, ...) \ 424 static const enum ena_mp_req mp_type_ ## f = t; \ 425 static const char *mp_name_ ## f = #t; \ 426 static void mp_prep_ ## f(struct ena_adapter *adapter, \ 427 struct ena_mp_body *req, \ 428 __VA_ARGS__) \ 429 { \ 430 prep; \ 431 } \ 432 static void mp_proc_ ## f(struct ena_adapter *adapter, \ 433 struct ena_mp_body *rsp, \ 434 __VA_ARGS__) \ 435 { \ 436 proc; \ 437 } 438 439 /** 440 * Proxy wrapper for calling primary functions in a secondary process. 441 * 442 * Depending on whether called in primary or secondary process, calls the 443 * @p func directly or proxies the call to the primary process via rte_mp IPC. 444 * This macro requires a proxy request descriptor to be defined for @p func 445 * using ENA_PROXY_DESC() macro. 446 * 447 * @param[in/out] a 448 * Device PMD data. Used for sending the message and sharing message results 449 * between primary and secondary. 450 * @param[in] f 451 * Function to proxy. 452 * @param ... 453 * Arguments of @p func. 454 * 455 * @return 456 * - 0: Processing succeeded and response handler was called. 457 * - -EPERM: IPC is unavailable on this platform. This means only primary 458 * process may call the proxied function. 459 * - -EIO: IPC returned error on request send. Inspect rte_errno detailed 460 * error code. 461 * - Negative error code from the proxied function. 462 * 463 * @note This mechanism is geared towards control-path tasks. Avoid calling it 464 * in fast-path unless unbound delays are allowed. This is due to the IPC 465 * mechanism itself (socket based). 466 * @note Due to IPC parameter size limitations the proxy logic shares call 467 * results through the struct ena_adapter shared memory. This makes the 468 * proxy mechanism strictly single-threaded. Therefore be sure to make all 469 * calls to the same proxied function under the same lock. 470 */ 471 #define ENA_PROXY(a, f, ...) \ 472 __extension__ ({ \ 473 struct ena_adapter *_a = (a); \ 474 struct timespec ts = { .tv_sec = ENA_MP_REQ_TMO }; \ 475 struct ena_mp_body *req, *rsp; \ 476 struct rte_mp_reply mp_rep; \ 477 struct rte_mp_msg mp_req; \ 478 int ret; \ 479 \ 480 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { \ 481 ret = f(__VA_ARGS__); \ 482 } else { \ 483 /* Prepare and send request */ \ 484 req = (struct ena_mp_body *)&mp_req.param; \ 485 mp_msg_init(&mp_req, mp_type_ ## f, _a->edev_data->port_id); \ 486 mp_prep_ ## f(_a, req, ## __VA_ARGS__); \ 487 \ 488 ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); \ 489 if (likely(!ret)) { \ 490 RTE_ASSERT(mp_rep.nb_received == 1); \ 491 rsp = (struct ena_mp_body *)&mp_rep.msgs[0].param; \ 492 ret = rsp->result; \ 493 if (ret == 0) { \ 494 mp_proc_##f(_a, rsp, ## __VA_ARGS__); \ 495 } else { \ 496 PMD_DRV_LOG(ERR, \ 497 "%s returned error: %d\n", \ 498 mp_name_ ## f, rsp->result);\ 499 } \ 500 free(mp_rep.msgs); \ 501 } else if (rte_errno == ENOTSUP) { \ 502 PMD_DRV_LOG(ERR, \ 503 "No IPC, can't proxy to primary\n");\ 504 ret = -rte_errno; \ 505 } else { \ 506 PMD_DRV_LOG(ERR, "Request %s failed: %s\n", \ 507 mp_name_ ## f, \ 508 rte_strerror(rte_errno)); \ 509 ret = -EIO; \ 510 } \ 511 } \ 512 ret; \ 513 }) 514 515 /********************************************************************* 516 * Multi-Process communication request descriptors 517 *********************************************************************/ 518 519 ENA_PROXY_DESC(ena_com_get_dev_basic_stats, ENA_MP_DEV_STATS_GET, 520 __extension__ ({ 521 ENA_TOUCH(adapter); 522 ENA_TOUCH(req); 523 ENA_TOUCH(ena_dev); 524 ENA_TOUCH(stats); 525 }), 526 __extension__ ({ 527 ENA_TOUCH(rsp); 528 ENA_TOUCH(ena_dev); 529 if (stats != &adapter->basic_stats) 530 rte_memcpy(stats, &adapter->basic_stats, sizeof(*stats)); 531 }), 532 struct ena_com_dev *ena_dev, struct ena_admin_basic_stats *stats); 533 534 ENA_PROXY_DESC(ena_com_get_eni_stats, ENA_MP_ENI_STATS_GET, 535 __extension__ ({ 536 ENA_TOUCH(adapter); 537 ENA_TOUCH(req); 538 ENA_TOUCH(ena_dev); 539 ENA_TOUCH(stats); 540 }), 541 __extension__ ({ 542 ENA_TOUCH(rsp); 543 ENA_TOUCH(ena_dev); 544 if (stats != (struct ena_admin_eni_stats *)adapter->metrics_stats) 545 rte_memcpy(stats, adapter->metrics_stats, sizeof(*stats)); 546 }), 547 struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats); 548 549 ENA_PROXY_DESC(ena_com_set_dev_mtu, ENA_MP_MTU_SET, 550 __extension__ ({ 551 ENA_TOUCH(adapter); 552 ENA_TOUCH(ena_dev); 553 req->args.mtu = mtu; 554 }), 555 __extension__ ({ 556 ENA_TOUCH(adapter); 557 ENA_TOUCH(rsp); 558 ENA_TOUCH(ena_dev); 559 ENA_TOUCH(mtu); 560 }), 561 struct ena_com_dev *ena_dev, int mtu); 562 563 ENA_PROXY_DESC(ena_com_indirect_table_set, ENA_MP_IND_TBL_SET, 564 __extension__ ({ 565 ENA_TOUCH(adapter); 566 ENA_TOUCH(req); 567 ENA_TOUCH(ena_dev); 568 }), 569 __extension__ ({ 570 ENA_TOUCH(adapter); 571 ENA_TOUCH(rsp); 572 ENA_TOUCH(ena_dev); 573 }), 574 struct ena_com_dev *ena_dev); 575 576 ENA_PROXY_DESC(ena_com_indirect_table_get, ENA_MP_IND_TBL_GET, 577 __extension__ ({ 578 ENA_TOUCH(adapter); 579 ENA_TOUCH(req); 580 ENA_TOUCH(ena_dev); 581 ENA_TOUCH(ind_tbl); 582 }), 583 __extension__ ({ 584 ENA_TOUCH(rsp); 585 ENA_TOUCH(ena_dev); 586 if (ind_tbl != adapter->indirect_table) 587 rte_memcpy(ind_tbl, adapter->indirect_table, 588 sizeof(adapter->indirect_table)); 589 }), 590 struct ena_com_dev *ena_dev, u32 *ind_tbl); 591 592 ENA_PROXY_DESC(ena_com_get_customer_metrics, ENA_MP_CUSTOMER_METRICS_GET, 593 __extension__ ({ 594 ENA_TOUCH(adapter); 595 ENA_TOUCH(req); 596 ENA_TOUCH(ena_dev); 597 ENA_TOUCH(buf); 598 ENA_TOUCH(buf_size); 599 }), 600 __extension__ ({ 601 ENA_TOUCH(rsp); 602 ENA_TOUCH(ena_dev); 603 if (buf != (char *)adapter->metrics_stats) 604 rte_memcpy(buf, adapter->metrics_stats, buf_size); 605 }), 606 struct ena_com_dev *ena_dev, char *buf, size_t buf_size); 607 608 ENA_PROXY_DESC(ena_com_get_ena_srd_info, ENA_MP_SRD_STATS_GET, 609 __extension__ ({ 610 ENA_TOUCH(adapter); 611 ENA_TOUCH(req); 612 ENA_TOUCH(ena_dev); 613 ENA_TOUCH(info); 614 }), 615 __extension__ ({ 616 ENA_TOUCH(rsp); 617 ENA_TOUCH(ena_dev); 618 if ((struct ena_stats_srd *)info != &adapter->srd_stats) 619 rte_memcpy((struct ena_stats_srd *)info, 620 &adapter->srd_stats, 621 sizeof(struct ena_stats_srd)); 622 }), 623 struct ena_com_dev *ena_dev, struct ena_admin_ena_srd_info *info); 624 625 static inline void ena_trigger_reset(struct ena_adapter *adapter, 626 enum ena_regs_reset_reason_types reason) 627 { 628 if (likely(!adapter->trigger_reset)) { 629 adapter->reset_reason = reason; 630 adapter->trigger_reset = true; 631 } 632 } 633 634 static inline void ena_rx_mbuf_prepare(struct ena_ring *rx_ring, 635 struct rte_mbuf *mbuf, 636 struct ena_com_rx_ctx *ena_rx_ctx, 637 bool fill_hash) 638 { 639 struct ena_stats_rx *rx_stats = &rx_ring->rx_stats; 640 uint64_t ol_flags = 0; 641 uint32_t packet_type = 0; 642 643 if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) 644 packet_type |= RTE_PTYPE_L4_TCP; 645 else if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP) 646 packet_type |= RTE_PTYPE_L4_UDP; 647 648 if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) { 649 packet_type |= RTE_PTYPE_L3_IPV4; 650 if (unlikely(ena_rx_ctx->l3_csum_err)) { 651 ++rx_stats->l3_csum_bad; 652 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 653 } else { 654 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 655 } 656 } else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6) { 657 packet_type |= RTE_PTYPE_L3_IPV6; 658 } 659 660 if (!ena_rx_ctx->l4_csum_checked || ena_rx_ctx->frag) { 661 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 662 } else { 663 if (unlikely(ena_rx_ctx->l4_csum_err)) { 664 ++rx_stats->l4_csum_bad; 665 /* 666 * For the L4 Rx checksum offload the HW may indicate 667 * bad checksum although it's valid. Because of that, 668 * we're setting the UNKNOWN flag to let the app 669 * re-verify the checksum. 670 */ 671 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 672 } else { 673 ++rx_stats->l4_csum_good; 674 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 675 } 676 } 677 678 if (fill_hash && 679 likely((packet_type & ENA_PTYPE_HAS_HASH) && !ena_rx_ctx->frag)) { 680 ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 681 mbuf->hash.rss = ena_rx_ctx->hash; 682 } 683 684 mbuf->ol_flags = ol_flags; 685 mbuf->packet_type = packet_type; 686 } 687 688 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, 689 struct ena_com_tx_ctx *ena_tx_ctx, 690 uint64_t queue_offloads, 691 bool disable_meta_caching) 692 { 693 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; 694 695 if ((mbuf->ol_flags & MBUF_OFFLOADS) && 696 (queue_offloads & QUEUE_OFFLOADS)) { 697 /* check if TSO is required */ 698 if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) && 699 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)) { 700 ena_tx_ctx->tso_enable = true; 701 702 ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf); 703 } 704 705 /* check if L3 checksum is needed */ 706 if ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) && 707 (queue_offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) 708 ena_tx_ctx->l3_csum_enable = true; 709 710 if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV6) { 711 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 712 /* For the IPv6 packets, DF always needs to be true. */ 713 ena_tx_ctx->df = 1; 714 } else { 715 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 716 717 /* set don't fragment (DF) flag */ 718 if (mbuf->packet_type & 719 (RTE_PTYPE_L4_NONFRAG 720 | RTE_PTYPE_INNER_L4_NONFRAG)) 721 ena_tx_ctx->df = 1; 722 } 723 724 /* check if L4 checksum is needed */ 725 if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) && 726 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) { 727 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 728 ena_tx_ctx->l4_csum_enable = true; 729 } else if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 730 RTE_MBUF_F_TX_UDP_CKSUM) && 731 (queue_offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) { 732 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 733 ena_tx_ctx->l4_csum_enable = true; 734 } else { 735 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 736 ena_tx_ctx->l4_csum_enable = false; 737 } 738 739 ena_meta->mss = mbuf->tso_segsz; 740 ena_meta->l3_hdr_len = mbuf->l3_len; 741 ena_meta->l3_hdr_offset = mbuf->l2_len; 742 743 ena_tx_ctx->meta_valid = true; 744 } else if (disable_meta_caching) { 745 memset(ena_meta, 0, sizeof(*ena_meta)); 746 ena_tx_ctx->meta_valid = true; 747 } else { 748 ena_tx_ctx->meta_valid = false; 749 } 750 } 751 752 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 753 { 754 struct ena_tx_buffer *tx_info = NULL; 755 756 if (likely(req_id < tx_ring->ring_size)) { 757 tx_info = &tx_ring->tx_buffer_info[req_id]; 758 if (likely(tx_info->mbuf)) 759 return 0; 760 } 761 762 if (tx_info) 763 PMD_TX_LOG(ERR, "tx_info doesn't have valid mbuf. queue %d:%d req_id %u\n", 764 tx_ring->port_id, tx_ring->id, req_id); 765 else 766 PMD_TX_LOG(ERR, "Invalid req_id: %hu in queue %d:%d\n", 767 req_id, tx_ring->port_id, tx_ring->id); 768 769 /* Trigger device reset */ 770 ++tx_ring->tx_stats.bad_req_id; 771 ena_trigger_reset(tx_ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID); 772 return -EFAULT; 773 } 774 775 static void ena_config_host_info(struct ena_com_dev *ena_dev) 776 { 777 struct ena_admin_host_info *host_info; 778 int rc; 779 780 /* Allocate only the host info */ 781 rc = ena_com_allocate_host_info(ena_dev); 782 if (rc) { 783 PMD_DRV_LOG(ERR, "Cannot allocate host info\n"); 784 return; 785 } 786 787 host_info = ena_dev->host_attr.host_info; 788 789 host_info->os_type = ENA_ADMIN_OS_DPDK; 790 host_info->kernel_ver = RTE_VERSION; 791 strlcpy((char *)host_info->kernel_ver_str, rte_version(), 792 sizeof(host_info->kernel_ver_str)); 793 host_info->os_dist = RTE_VERSION; 794 strlcpy((char *)host_info->os_dist_str, rte_version(), 795 sizeof(host_info->os_dist_str)); 796 host_info->driver_version = 797 (DRV_MODULE_VER_MAJOR) | 798 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 799 (DRV_MODULE_VER_SUBMINOR << 800 ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 801 host_info->num_cpus = rte_lcore_count(); 802 803 host_info->driver_supported_features = 804 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 805 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 806 807 rc = ena_com_set_host_attributes(ena_dev); 808 if (rc) { 809 if (rc == -ENA_COM_UNSUPPORTED) 810 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 811 else 812 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 813 814 goto err; 815 } 816 817 return; 818 819 err: 820 ena_com_delete_host_info(ena_dev); 821 } 822 823 /* This function calculates the number of xstats based on the current config */ 824 static unsigned int ena_xstats_calc_num(struct rte_eth_dev_data *data) 825 { 826 struct ena_adapter *adapter = data->dev_private; 827 828 return ENA_STATS_ARRAY_GLOBAL + 829 adapter->metrics_num + 830 ENA_STATS_ARRAY_ENA_SRD + 831 (data->nb_tx_queues * ENA_STATS_ARRAY_TX) + 832 (data->nb_rx_queues * ENA_STATS_ARRAY_RX); 833 } 834 835 static void ena_config_debug_area(struct ena_adapter *adapter) 836 { 837 u32 debug_area_size; 838 int rc, ss_count; 839 840 ss_count = ena_xstats_calc_num(adapter->edev_data); 841 842 /* allocate 32 bytes for each string and 64bit for the value */ 843 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; 844 845 rc = ena_com_allocate_debug_area(&adapter->ena_dev, debug_area_size); 846 if (rc) { 847 PMD_DRV_LOG(ERR, "Cannot allocate debug area\n"); 848 return; 849 } 850 851 rc = ena_com_set_host_attributes(&adapter->ena_dev); 852 if (rc) { 853 if (rc == -ENA_COM_UNSUPPORTED) 854 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 855 else 856 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 857 858 goto err; 859 } 860 861 return; 862 err: 863 ena_com_delete_debug_area(&adapter->ena_dev); 864 } 865 866 static int ena_close(struct rte_eth_dev *dev) 867 { 868 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 869 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 870 struct ena_adapter *adapter = dev->data->dev_private; 871 int ret = 0; 872 int rc; 873 874 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 875 return 0; 876 877 if (adapter->state == ENA_ADAPTER_STATE_RUNNING) 878 ret = ena_stop(dev); 879 adapter->state = ENA_ADAPTER_STATE_CLOSED; 880 881 rte_intr_disable(intr_handle); 882 rc = rte_intr_callback_unregister_sync(intr_handle, ena_interrupt_handler_rte, dev); 883 if (unlikely(rc != 0)) 884 PMD_INIT_LOG(ERR, "Failed to unregister interrupt handler\n"); 885 886 ena_rx_queue_release_all(dev); 887 ena_tx_queue_release_all(dev); 888 889 rte_free(adapter->drv_stats); 890 adapter->drv_stats = NULL; 891 892 /* 893 * MAC is not allocated dynamically. Setting NULL should prevent from 894 * release of the resource in the rte_eth_dev_release_port(). 895 */ 896 dev->data->mac_addrs = NULL; 897 898 return ret; 899 } 900 901 static int 902 ena_dev_reset(struct rte_eth_dev *dev) 903 { 904 int rc = 0; 905 906 /* Cannot release memory in secondary process */ 907 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 908 PMD_DRV_LOG(WARNING, "dev_reset not supported in secondary.\n"); 909 return -EPERM; 910 } 911 912 ena_destroy_device(dev); 913 rc = eth_ena_dev_init(dev); 914 if (rc) 915 PMD_INIT_LOG(CRIT, "Cannot initialize device\n"); 916 917 return rc; 918 } 919 920 static void ena_rx_queue_release_all(struct rte_eth_dev *dev) 921 { 922 int nb_queues = dev->data->nb_rx_queues; 923 int i; 924 925 for (i = 0; i < nb_queues; i++) 926 ena_rx_queue_release(dev, i); 927 } 928 929 static void ena_tx_queue_release_all(struct rte_eth_dev *dev) 930 { 931 int nb_queues = dev->data->nb_tx_queues; 932 int i; 933 934 for (i = 0; i < nb_queues; i++) 935 ena_tx_queue_release(dev, i); 936 } 937 938 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 939 { 940 struct ena_ring *ring = dev->data->rx_queues[qid]; 941 942 /* Free ring resources */ 943 rte_free(ring->rx_buffer_info); 944 ring->rx_buffer_info = NULL; 945 946 rte_free(ring->rx_refill_buffer); 947 ring->rx_refill_buffer = NULL; 948 949 rte_free(ring->empty_rx_reqs); 950 ring->empty_rx_reqs = NULL; 951 952 ring->configured = 0; 953 954 PMD_DRV_LOG(NOTICE, "Rx queue %d:%d released\n", 955 ring->port_id, ring->id); 956 } 957 958 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 959 { 960 struct ena_ring *ring = dev->data->tx_queues[qid]; 961 962 /* Free ring resources */ 963 rte_free(ring->push_buf_intermediate_buf); 964 965 rte_free(ring->tx_buffer_info); 966 967 rte_free(ring->empty_tx_reqs); 968 969 ring->empty_tx_reqs = NULL; 970 ring->tx_buffer_info = NULL; 971 ring->push_buf_intermediate_buf = NULL; 972 973 ring->configured = 0; 974 975 PMD_DRV_LOG(NOTICE, "Tx queue %d:%d released\n", 976 ring->port_id, ring->id); 977 } 978 979 static void ena_rx_queue_release_bufs(struct ena_ring *ring) 980 { 981 unsigned int i; 982 983 for (i = 0; i < ring->ring_size; ++i) { 984 struct ena_rx_buffer *rx_info = &ring->rx_buffer_info[i]; 985 if (rx_info->mbuf) { 986 rte_mbuf_raw_free(rx_info->mbuf); 987 rx_info->mbuf = NULL; 988 } 989 } 990 } 991 992 static void ena_tx_queue_release_bufs(struct ena_ring *ring) 993 { 994 unsigned int i; 995 996 for (i = 0; i < ring->ring_size; ++i) { 997 struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i]; 998 999 if (tx_buf->mbuf) { 1000 rte_pktmbuf_free(tx_buf->mbuf); 1001 tx_buf->mbuf = NULL; 1002 } 1003 } 1004 } 1005 1006 static int ena_link_update(struct rte_eth_dev *dev, 1007 __rte_unused int wait_to_complete) 1008 { 1009 struct rte_eth_link *link = &dev->data->dev_link; 1010 struct ena_adapter *adapter = dev->data->dev_private; 1011 1012 link->link_status = adapter->link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN; 1013 link->link_speed = RTE_ETH_SPEED_NUM_NONE; 1014 link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX; 1015 1016 return 0; 1017 } 1018 1019 static int ena_queue_start_all(struct rte_eth_dev *dev, 1020 enum ena_ring_type ring_type) 1021 { 1022 struct ena_adapter *adapter = dev->data->dev_private; 1023 struct ena_ring *queues = NULL; 1024 int nb_queues; 1025 int i = 0; 1026 int rc = 0; 1027 1028 if (ring_type == ENA_RING_TYPE_RX) { 1029 queues = adapter->rx_ring; 1030 nb_queues = dev->data->nb_rx_queues; 1031 } else { 1032 queues = adapter->tx_ring; 1033 nb_queues = dev->data->nb_tx_queues; 1034 } 1035 for (i = 0; i < nb_queues; i++) { 1036 if (queues[i].configured) { 1037 if (ring_type == ENA_RING_TYPE_RX) { 1038 ena_assert_msg( 1039 dev->data->rx_queues[i] == &queues[i], 1040 "Inconsistent state of Rx queues\n"); 1041 } else { 1042 ena_assert_msg( 1043 dev->data->tx_queues[i] == &queues[i], 1044 "Inconsistent state of Tx queues\n"); 1045 } 1046 1047 rc = ena_queue_start(dev, &queues[i]); 1048 1049 if (rc) { 1050 PMD_INIT_LOG(ERR, 1051 "Failed to start queue[%d] of type(%d)\n", 1052 i, ring_type); 1053 goto err; 1054 } 1055 } 1056 } 1057 1058 return 0; 1059 1060 err: 1061 while (i--) 1062 if (queues[i].configured) 1063 ena_queue_stop(&queues[i]); 1064 1065 return rc; 1066 } 1067 1068 static int 1069 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx, 1070 bool use_large_llq_hdr) 1071 { 1072 struct ena_admin_feature_llq_desc *dev = &ctx->get_feat_ctx->llq; 1073 struct ena_com_dev *ena_dev = ctx->ena_dev; 1074 uint32_t max_tx_queue_size; 1075 uint32_t max_rx_queue_size; 1076 1077 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1078 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1079 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 1080 max_rx_queue_size = RTE_MIN(max_queue_ext->max_rx_cq_depth, 1081 max_queue_ext->max_rx_sq_depth); 1082 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 1083 1084 if (ena_dev->tx_mem_queue_type == 1085 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1086 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1087 dev->max_llq_depth); 1088 } else { 1089 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1090 max_queue_ext->max_tx_sq_depth); 1091 } 1092 1093 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1094 max_queue_ext->max_per_packet_rx_descs); 1095 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1096 max_queue_ext->max_per_packet_tx_descs); 1097 } else { 1098 struct ena_admin_queue_feature_desc *max_queues = 1099 &ctx->get_feat_ctx->max_queues; 1100 max_rx_queue_size = RTE_MIN(max_queues->max_cq_depth, 1101 max_queues->max_sq_depth); 1102 max_tx_queue_size = max_queues->max_cq_depth; 1103 1104 if (ena_dev->tx_mem_queue_type == 1105 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1106 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1107 dev->max_llq_depth); 1108 } else { 1109 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1110 max_queues->max_sq_depth); 1111 } 1112 1113 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1114 max_queues->max_packet_rx_descs); 1115 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1116 max_queues->max_packet_tx_descs); 1117 } 1118 1119 /* Round down to the nearest power of 2 */ 1120 max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size); 1121 max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size); 1122 1123 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && use_large_llq_hdr) { 1124 /* intersection between driver configuration and device capabilities */ 1125 if (dev->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) { 1126 if (dev->max_wide_llq_depth == MAX_WIDE_LLQ_DEPTH_UNSUPPORTED) { 1127 /* Devices that do not support the double-sized ENA memory BAR will 1128 * report max_wide_llq_depth as 0. In such case, driver halves the 1129 * queue depth when working in large llq policy. 1130 */ 1131 max_tx_queue_size >>= 1; 1132 PMD_INIT_LOG(INFO, 1133 "large LLQ policy requires limiting Tx queue size to %u entries\n", 1134 max_tx_queue_size); 1135 } else if (dev->max_wide_llq_depth < max_tx_queue_size) { 1136 /* In case the queue depth that the driver calculated exceeds 1137 * the maximal value that the device allows, it will be limited 1138 * to that maximal value 1139 */ 1140 max_tx_queue_size = dev->max_wide_llq_depth; 1141 } 1142 } else { 1143 PMD_INIT_LOG(INFO, 1144 "Forcing large LLQ headers failed since device lacks this support\n"); 1145 } 1146 } 1147 1148 if (unlikely(max_rx_queue_size == 0 || max_tx_queue_size == 0)) { 1149 PMD_INIT_LOG(ERR, "Invalid queue size\n"); 1150 return -EFAULT; 1151 } 1152 1153 ctx->max_tx_queue_size = max_tx_queue_size; 1154 ctx->max_rx_queue_size = max_rx_queue_size; 1155 1156 PMD_DRV_LOG(INFO, "tx queue size %u\n", max_tx_queue_size); 1157 return 0; 1158 } 1159 1160 static void ena_stats_restart(struct rte_eth_dev *dev) 1161 { 1162 struct ena_adapter *adapter = dev->data->dev_private; 1163 1164 rte_atomic64_init(&adapter->drv_stats->ierrors); 1165 rte_atomic64_init(&adapter->drv_stats->oerrors); 1166 rte_atomic64_init(&adapter->drv_stats->rx_nombuf); 1167 adapter->drv_stats->rx_drops = 0; 1168 } 1169 1170 static int ena_stats_get(struct rte_eth_dev *dev, 1171 struct rte_eth_stats *stats) 1172 { 1173 struct ena_admin_basic_stats ena_stats; 1174 struct ena_adapter *adapter = dev->data->dev_private; 1175 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1176 int rc; 1177 int i; 1178 int max_rings_stats; 1179 1180 memset(&ena_stats, 0, sizeof(ena_stats)); 1181 1182 rte_spinlock_lock(&adapter->admin_lock); 1183 rc = ENA_PROXY(adapter, ena_com_get_dev_basic_stats, ena_dev, 1184 &ena_stats); 1185 rte_spinlock_unlock(&adapter->admin_lock); 1186 if (unlikely(rc)) { 1187 PMD_DRV_LOG(ERR, "Could not retrieve statistics from ENA\n"); 1188 return rc; 1189 } 1190 1191 /* Set of basic statistics from ENA */ 1192 stats->ipackets = __MERGE_64B_H_L(ena_stats.rx_pkts_high, 1193 ena_stats.rx_pkts_low); 1194 stats->opackets = __MERGE_64B_H_L(ena_stats.tx_pkts_high, 1195 ena_stats.tx_pkts_low); 1196 stats->ibytes = __MERGE_64B_H_L(ena_stats.rx_bytes_high, 1197 ena_stats.rx_bytes_low); 1198 stats->obytes = __MERGE_64B_H_L(ena_stats.tx_bytes_high, 1199 ena_stats.tx_bytes_low); 1200 1201 /* Driver related stats */ 1202 stats->imissed = adapter->drv_stats->rx_drops; 1203 stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors); 1204 stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors); 1205 stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf); 1206 1207 max_rings_stats = RTE_MIN(dev->data->nb_rx_queues, 1208 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1209 for (i = 0; i < max_rings_stats; ++i) { 1210 struct ena_stats_rx *rx_stats = &adapter->rx_ring[i].rx_stats; 1211 1212 stats->q_ibytes[i] = rx_stats->bytes; 1213 stats->q_ipackets[i] = rx_stats->cnt; 1214 stats->q_errors[i] = rx_stats->bad_desc_num + 1215 rx_stats->bad_req_id; 1216 } 1217 1218 max_rings_stats = RTE_MIN(dev->data->nb_tx_queues, 1219 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1220 for (i = 0; i < max_rings_stats; ++i) { 1221 struct ena_stats_tx *tx_stats = &adapter->tx_ring[i].tx_stats; 1222 1223 stats->q_obytes[i] = tx_stats->bytes; 1224 stats->q_opackets[i] = tx_stats->cnt; 1225 } 1226 1227 return 0; 1228 } 1229 1230 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 1231 { 1232 struct ena_adapter *adapter; 1233 struct ena_com_dev *ena_dev; 1234 int rc = 0; 1235 1236 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 1237 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 1238 adapter = dev->data->dev_private; 1239 1240 ena_dev = &adapter->ena_dev; 1241 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 1242 1243 rc = ENA_PROXY(adapter, ena_com_set_dev_mtu, ena_dev, mtu); 1244 if (rc) 1245 PMD_DRV_LOG(ERR, "Could not set MTU: %d\n", mtu); 1246 else 1247 PMD_DRV_LOG(NOTICE, "MTU set to: %d\n", mtu); 1248 1249 return rc; 1250 } 1251 1252 static int ena_start(struct rte_eth_dev *dev) 1253 { 1254 struct ena_adapter *adapter = dev->data->dev_private; 1255 uint64_t ticks; 1256 int rc = 0; 1257 uint16_t i; 1258 1259 /* Cannot allocate memory in secondary process */ 1260 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1261 PMD_DRV_LOG(WARNING, "dev_start not supported in secondary.\n"); 1262 return -EPERM; 1263 } 1264 1265 rc = ena_setup_rx_intr(dev); 1266 if (rc) 1267 return rc; 1268 1269 rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX); 1270 if (rc) 1271 return rc; 1272 1273 rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX); 1274 if (rc) 1275 goto err_start_tx; 1276 1277 if (adapter->edev_data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { 1278 rc = ena_rss_configure(adapter); 1279 if (rc) 1280 goto err_rss_init; 1281 } 1282 1283 ena_stats_restart(dev); 1284 1285 adapter->timestamp_wd = rte_get_timer_cycles(); 1286 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT; 1287 1288 ticks = rte_get_timer_hz(); 1289 rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(), 1290 ena_timer_wd_callback, dev); 1291 1292 ++adapter->dev_stats.dev_start; 1293 adapter->state = ENA_ADAPTER_STATE_RUNNING; 1294 1295 for (i = 0; i < dev->data->nb_rx_queues; i++) 1296 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1297 for (i = 0; i < dev->data->nb_tx_queues; i++) 1298 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1299 1300 return 0; 1301 1302 err_rss_init: 1303 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1304 err_start_tx: 1305 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1306 return rc; 1307 } 1308 1309 static int ena_stop(struct rte_eth_dev *dev) 1310 { 1311 struct ena_adapter *adapter = dev->data->dev_private; 1312 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1313 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1314 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1315 uint16_t i; 1316 int rc; 1317 1318 /* Cannot free memory in secondary process */ 1319 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1320 PMD_DRV_LOG(WARNING, "dev_stop not supported in secondary.\n"); 1321 return -EPERM; 1322 } 1323 1324 rte_timer_stop_sync(&adapter->timer_wd); 1325 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1326 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1327 1328 if (adapter->trigger_reset) { 1329 rc = ena_com_dev_reset(ena_dev, adapter->reset_reason); 1330 if (rc) 1331 PMD_DRV_LOG(ERR, "Device reset failed, rc: %d\n", rc); 1332 } 1333 1334 rte_intr_disable(intr_handle); 1335 1336 rte_intr_efd_disable(intr_handle); 1337 1338 /* Cleanup vector list */ 1339 rte_intr_vec_list_free(intr_handle); 1340 1341 rte_intr_enable(intr_handle); 1342 1343 ++adapter->dev_stats.dev_stop; 1344 adapter->state = ENA_ADAPTER_STATE_STOPPED; 1345 dev->data->dev_started = 0; 1346 1347 for (i = 0; i < dev->data->nb_rx_queues; i++) 1348 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1349 for (i = 0; i < dev->data->nb_tx_queues; i++) 1350 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1351 1352 return 0; 1353 } 1354 1355 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring) 1356 { 1357 struct ena_adapter *adapter = ring->adapter; 1358 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1359 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1360 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1361 struct ena_com_create_io_ctx ctx = 1362 /* policy set to _HOST just to satisfy icc compiler */ 1363 { ENA_ADMIN_PLACEMENT_POLICY_HOST, 1364 0, 0, 0, 0, 0 }; 1365 uint16_t ena_qid; 1366 unsigned int i; 1367 int rc; 1368 1369 ctx.msix_vector = -1; 1370 if (ring->type == ENA_RING_TYPE_TX) { 1371 ena_qid = ENA_IO_TXQ_IDX(ring->id); 1372 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1373 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1374 for (i = 0; i < ring->ring_size; i++) 1375 ring->empty_tx_reqs[i] = i; 1376 } else { 1377 ena_qid = ENA_IO_RXQ_IDX(ring->id); 1378 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1379 if (rte_intr_dp_is_en(intr_handle)) 1380 ctx.msix_vector = 1381 rte_intr_vec_list_index_get(intr_handle, 1382 ring->id); 1383 1384 for (i = 0; i < ring->ring_size; i++) 1385 ring->empty_rx_reqs[i] = i; 1386 } 1387 ctx.queue_size = ring->ring_size; 1388 ctx.qid = ena_qid; 1389 ctx.numa_node = ring->numa_socket_id; 1390 1391 rc = ena_com_create_io_queue(ena_dev, &ctx); 1392 if (rc) { 1393 PMD_DRV_LOG(ERR, 1394 "Failed to create IO queue[%d] (qid:%d), rc: %d\n", 1395 ring->id, ena_qid, rc); 1396 return rc; 1397 } 1398 1399 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1400 &ring->ena_com_io_sq, 1401 &ring->ena_com_io_cq); 1402 if (rc) { 1403 PMD_DRV_LOG(ERR, 1404 "Failed to get IO queue[%d] handlers, rc: %d\n", 1405 ring->id, rc); 1406 ena_com_destroy_io_queue(ena_dev, ena_qid); 1407 return rc; 1408 } 1409 1410 if (ring->type == ENA_RING_TYPE_TX) 1411 ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node); 1412 1413 /* Start with Rx interrupts being masked. */ 1414 if (ring->type == ENA_RING_TYPE_RX && rte_intr_dp_is_en(intr_handle)) 1415 ena_rx_queue_intr_disable(dev, ring->id); 1416 1417 return 0; 1418 } 1419 1420 static void ena_queue_stop(struct ena_ring *ring) 1421 { 1422 struct ena_com_dev *ena_dev = &ring->adapter->ena_dev; 1423 1424 if (ring->type == ENA_RING_TYPE_RX) { 1425 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id)); 1426 ena_rx_queue_release_bufs(ring); 1427 } else { 1428 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id)); 1429 ena_tx_queue_release_bufs(ring); 1430 } 1431 } 1432 1433 static void ena_queue_stop_all(struct rte_eth_dev *dev, 1434 enum ena_ring_type ring_type) 1435 { 1436 struct ena_adapter *adapter = dev->data->dev_private; 1437 struct ena_ring *queues = NULL; 1438 uint16_t nb_queues, i; 1439 1440 if (ring_type == ENA_RING_TYPE_RX) { 1441 queues = adapter->rx_ring; 1442 nb_queues = dev->data->nb_rx_queues; 1443 } else { 1444 queues = adapter->tx_ring; 1445 nb_queues = dev->data->nb_tx_queues; 1446 } 1447 1448 for (i = 0; i < nb_queues; ++i) 1449 if (queues[i].configured) 1450 ena_queue_stop(&queues[i]); 1451 } 1452 1453 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring) 1454 { 1455 int rc, bufs_num; 1456 1457 ena_assert_msg(ring->configured == 1, 1458 "Trying to start unconfigured queue\n"); 1459 1460 rc = ena_create_io_queue(dev, ring); 1461 if (rc) { 1462 PMD_INIT_LOG(ERR, "Failed to create IO queue\n"); 1463 return rc; 1464 } 1465 1466 ring->next_to_clean = 0; 1467 ring->next_to_use = 0; 1468 1469 if (ring->type == ENA_RING_TYPE_TX) { 1470 ring->tx_stats.available_desc = 1471 ena_com_free_q_entries(ring->ena_com_io_sq); 1472 return 0; 1473 } 1474 1475 bufs_num = ring->ring_size - 1; 1476 rc = ena_populate_rx_queue(ring, bufs_num); 1477 if (rc != bufs_num) { 1478 ena_com_destroy_io_queue(&ring->adapter->ena_dev, 1479 ENA_IO_RXQ_IDX(ring->id)); 1480 PMD_INIT_LOG(ERR, "Failed to populate Rx ring\n"); 1481 return ENA_COM_FAULT; 1482 } 1483 /* Flush per-core RX buffers pools cache as they can be used on other 1484 * cores as well. 1485 */ 1486 rte_mempool_cache_flush(NULL, ring->mb_pool); 1487 1488 return 0; 1489 } 1490 1491 static int ena_tx_queue_setup(struct rte_eth_dev *dev, 1492 uint16_t queue_idx, 1493 uint16_t nb_desc, 1494 unsigned int socket_id, 1495 const struct rte_eth_txconf *tx_conf) 1496 { 1497 struct ena_ring *txq = NULL; 1498 struct ena_adapter *adapter = dev->data->dev_private; 1499 unsigned int i; 1500 uint16_t dyn_thresh; 1501 1502 txq = &adapter->tx_ring[queue_idx]; 1503 1504 if (txq->configured) { 1505 PMD_DRV_LOG(CRIT, 1506 "API violation. Queue[%d] is already configured\n", 1507 queue_idx); 1508 return ENA_COM_FAULT; 1509 } 1510 1511 if (!rte_is_power_of_2(nb_desc)) { 1512 PMD_DRV_LOG(ERR, 1513 "Unsupported size of Tx queue: %d is not a power of 2.\n", 1514 nb_desc); 1515 return -EINVAL; 1516 } 1517 1518 if (nb_desc > adapter->max_tx_ring_size) { 1519 PMD_DRV_LOG(ERR, 1520 "Unsupported size of Tx queue (max size: %d)\n", 1521 adapter->max_tx_ring_size); 1522 return -EINVAL; 1523 } 1524 1525 txq->port_id = dev->data->port_id; 1526 txq->next_to_clean = 0; 1527 txq->next_to_use = 0; 1528 txq->ring_size = nb_desc; 1529 txq->size_mask = nb_desc - 1; 1530 txq->numa_socket_id = socket_id; 1531 txq->pkts_without_db = false; 1532 txq->last_cleanup_ticks = 0; 1533 1534 txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info", 1535 sizeof(struct ena_tx_buffer) * txq->ring_size, 1536 RTE_CACHE_LINE_SIZE, 1537 socket_id); 1538 if (!txq->tx_buffer_info) { 1539 PMD_DRV_LOG(ERR, 1540 "Failed to allocate memory for Tx buffer info\n"); 1541 return -ENOMEM; 1542 } 1543 1544 txq->empty_tx_reqs = rte_zmalloc_socket("txq->empty_tx_reqs", 1545 sizeof(uint16_t) * txq->ring_size, 1546 RTE_CACHE_LINE_SIZE, 1547 socket_id); 1548 if (!txq->empty_tx_reqs) { 1549 PMD_DRV_LOG(ERR, 1550 "Failed to allocate memory for empty Tx requests\n"); 1551 rte_free(txq->tx_buffer_info); 1552 return -ENOMEM; 1553 } 1554 1555 txq->push_buf_intermediate_buf = 1556 rte_zmalloc_socket("txq->push_buf_intermediate_buf", 1557 txq->tx_max_header_size, 1558 RTE_CACHE_LINE_SIZE, 1559 socket_id); 1560 if (!txq->push_buf_intermediate_buf) { 1561 PMD_DRV_LOG(ERR, "Failed to alloc push buffer for LLQ\n"); 1562 rte_free(txq->tx_buffer_info); 1563 rte_free(txq->empty_tx_reqs); 1564 return -ENOMEM; 1565 } 1566 1567 for (i = 0; i < txq->ring_size; i++) 1568 txq->empty_tx_reqs[i] = i; 1569 1570 txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads; 1571 1572 /* Check if caller provided the Tx cleanup threshold value. */ 1573 if (tx_conf->tx_free_thresh != 0) { 1574 txq->tx_free_thresh = tx_conf->tx_free_thresh; 1575 } else { 1576 dyn_thresh = txq->ring_size - 1577 txq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1578 txq->tx_free_thresh = RTE_MAX(dyn_thresh, 1579 txq->ring_size - ENA_REFILL_THRESH_PACKET); 1580 } 1581 1582 txq->missing_tx_completion_threshold = 1583 RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP); 1584 1585 /* Store pointer to this queue in upper layer */ 1586 txq->configured = 1; 1587 dev->data->tx_queues[queue_idx] = txq; 1588 1589 return 0; 1590 } 1591 1592 static int ena_rx_queue_setup(struct rte_eth_dev *dev, 1593 uint16_t queue_idx, 1594 uint16_t nb_desc, 1595 unsigned int socket_id, 1596 const struct rte_eth_rxconf *rx_conf, 1597 struct rte_mempool *mp) 1598 { 1599 struct ena_adapter *adapter = dev->data->dev_private; 1600 struct ena_ring *rxq = NULL; 1601 size_t buffer_size; 1602 int i; 1603 uint16_t dyn_thresh; 1604 1605 rxq = &adapter->rx_ring[queue_idx]; 1606 if (rxq->configured) { 1607 PMD_DRV_LOG(CRIT, 1608 "API violation. Queue[%d] is already configured\n", 1609 queue_idx); 1610 return ENA_COM_FAULT; 1611 } 1612 1613 if (!rte_is_power_of_2(nb_desc)) { 1614 PMD_DRV_LOG(ERR, 1615 "Unsupported size of Rx queue: %d is not a power of 2.\n", 1616 nb_desc); 1617 return -EINVAL; 1618 } 1619 1620 if (nb_desc > adapter->max_rx_ring_size) { 1621 PMD_DRV_LOG(ERR, 1622 "Unsupported size of Rx queue (max size: %d)\n", 1623 adapter->max_rx_ring_size); 1624 return -EINVAL; 1625 } 1626 1627 /* ENA isn't supporting buffers smaller than 1400 bytes */ 1628 buffer_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 1629 if (buffer_size < ENA_RX_BUF_MIN_SIZE) { 1630 PMD_DRV_LOG(ERR, 1631 "Unsupported size of Rx buffer: %zu (min size: %d)\n", 1632 buffer_size, ENA_RX_BUF_MIN_SIZE); 1633 return -EINVAL; 1634 } 1635 1636 rxq->port_id = dev->data->port_id; 1637 rxq->next_to_clean = 0; 1638 rxq->next_to_use = 0; 1639 rxq->ring_size = nb_desc; 1640 rxq->size_mask = nb_desc - 1; 1641 rxq->numa_socket_id = socket_id; 1642 rxq->mb_pool = mp; 1643 1644 rxq->rx_buffer_info = rte_zmalloc_socket("rxq->buffer_info", 1645 sizeof(struct ena_rx_buffer) * nb_desc, 1646 RTE_CACHE_LINE_SIZE, 1647 socket_id); 1648 if (!rxq->rx_buffer_info) { 1649 PMD_DRV_LOG(ERR, 1650 "Failed to allocate memory for Rx buffer info\n"); 1651 return -ENOMEM; 1652 } 1653 1654 rxq->rx_refill_buffer = rte_zmalloc_socket("rxq->rx_refill_buffer", 1655 sizeof(struct rte_mbuf *) * nb_desc, 1656 RTE_CACHE_LINE_SIZE, 1657 socket_id); 1658 if (!rxq->rx_refill_buffer) { 1659 PMD_DRV_LOG(ERR, 1660 "Failed to allocate memory for Rx refill buffer\n"); 1661 rte_free(rxq->rx_buffer_info); 1662 rxq->rx_buffer_info = NULL; 1663 return -ENOMEM; 1664 } 1665 1666 rxq->empty_rx_reqs = rte_zmalloc_socket("rxq->empty_rx_reqs", 1667 sizeof(uint16_t) * nb_desc, 1668 RTE_CACHE_LINE_SIZE, 1669 socket_id); 1670 if (!rxq->empty_rx_reqs) { 1671 PMD_DRV_LOG(ERR, 1672 "Failed to allocate memory for empty Rx requests\n"); 1673 rte_free(rxq->rx_buffer_info); 1674 rxq->rx_buffer_info = NULL; 1675 rte_free(rxq->rx_refill_buffer); 1676 rxq->rx_refill_buffer = NULL; 1677 return -ENOMEM; 1678 } 1679 1680 for (i = 0; i < nb_desc; i++) 1681 rxq->empty_rx_reqs[i] = i; 1682 1683 rxq->offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; 1684 1685 if (rx_conf->rx_free_thresh != 0) { 1686 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 1687 } else { 1688 dyn_thresh = rxq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1689 rxq->rx_free_thresh = RTE_MIN(dyn_thresh, 1690 (uint16_t)(ENA_REFILL_THRESH_PACKET)); 1691 } 1692 1693 /* Store pointer to this queue in upper layer */ 1694 rxq->configured = 1; 1695 dev->data->rx_queues[queue_idx] = rxq; 1696 1697 return 0; 1698 } 1699 1700 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 1701 struct rte_mbuf *mbuf, uint16_t id) 1702 { 1703 struct ena_com_buf ebuf; 1704 int rc; 1705 1706 /* prepare physical address for DMA transaction */ 1707 ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM; 1708 ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM; 1709 1710 /* pass resource to device */ 1711 rc = ena_com_add_single_rx_desc(io_sq, &ebuf, id); 1712 if (unlikely(rc != 0)) 1713 PMD_RX_LOG(WARNING, "Failed adding Rx desc\n"); 1714 1715 return rc; 1716 } 1717 1718 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) 1719 { 1720 unsigned int i; 1721 int rc; 1722 uint16_t next_to_use = rxq->next_to_use; 1723 uint16_t req_id; 1724 #ifdef RTE_ETHDEV_DEBUG_RX 1725 uint16_t in_use; 1726 #endif 1727 struct rte_mbuf **mbufs = rxq->rx_refill_buffer; 1728 1729 if (unlikely(!count)) 1730 return 0; 1731 1732 #ifdef RTE_ETHDEV_DEBUG_RX 1733 in_use = rxq->ring_size - 1 - 1734 ena_com_free_q_entries(rxq->ena_com_io_sq); 1735 if (unlikely((in_use + count) >= rxq->ring_size)) 1736 PMD_RX_LOG(ERR, "Bad Rx ring state\n"); 1737 #endif 1738 1739 /* get resources for incoming packets */ 1740 rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count); 1741 if (unlikely(rc < 0)) { 1742 rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf); 1743 ++rxq->rx_stats.mbuf_alloc_fail; 1744 PMD_RX_LOG(DEBUG, "There are not enough free buffers\n"); 1745 return 0; 1746 } 1747 1748 for (i = 0; i < count; i++) { 1749 struct rte_mbuf *mbuf = mbufs[i]; 1750 struct ena_rx_buffer *rx_info; 1751 1752 if (likely((i + 4) < count)) 1753 rte_prefetch0(mbufs[i + 4]); 1754 1755 req_id = rxq->empty_rx_reqs[next_to_use]; 1756 rx_info = &rxq->rx_buffer_info[req_id]; 1757 1758 rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id); 1759 if (unlikely(rc != 0)) 1760 break; 1761 1762 rx_info->mbuf = mbuf; 1763 next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, rxq->size_mask); 1764 } 1765 1766 if (unlikely(i < count)) { 1767 PMD_RX_LOG(WARNING, 1768 "Refilled Rx queue[%d] with only %d/%d buffers\n", 1769 rxq->id, i, count); 1770 rte_pktmbuf_free_bulk(&mbufs[i], count - i); 1771 ++rxq->rx_stats.refill_partial; 1772 } 1773 1774 /* When we submitted free resources to device... */ 1775 if (likely(i > 0)) { 1776 /* ...let HW know that it can fill buffers with data. */ 1777 ena_com_write_sq_doorbell(rxq->ena_com_io_sq); 1778 1779 rxq->next_to_use = next_to_use; 1780 } 1781 1782 return i; 1783 } 1784 1785 static size_t ena_get_metrics_entries(struct ena_adapter *adapter) 1786 { 1787 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1788 size_t metrics_num = 0; 1789 1790 if (ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) 1791 metrics_num = ENA_STATS_ARRAY_METRICS; 1792 else if (ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) 1793 metrics_num = ENA_STATS_ARRAY_METRICS_LEGACY; 1794 PMD_DRV_LOG(NOTICE, "0x%x customer metrics are supported\n", (unsigned int)metrics_num); 1795 if (metrics_num > ENA_MAX_CUSTOMER_METRICS) { 1796 PMD_DRV_LOG(NOTICE, "Not enough space for the requested customer metrics\n"); 1797 metrics_num = ENA_MAX_CUSTOMER_METRICS; 1798 } 1799 return metrics_num; 1800 } 1801 1802 static int ena_device_init(struct ena_adapter *adapter, 1803 struct rte_pci_device *pdev, 1804 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1805 { 1806 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1807 uint32_t aenq_groups; 1808 int rc; 1809 bool readless_supported; 1810 1811 /* Initialize mmio registers */ 1812 rc = ena_com_mmio_reg_read_request_init(ena_dev); 1813 if (rc) { 1814 PMD_DRV_LOG(ERR, "Failed to init MMIO read less\n"); 1815 return rc; 1816 } 1817 1818 /* The PCIe configuration space revision id indicate if mmio reg 1819 * read is disabled. 1820 */ 1821 readless_supported = !(pdev->id.class_id & ENA_MMIO_DISABLE_REG_READ); 1822 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 1823 1824 /* reset device */ 1825 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 1826 if (rc) { 1827 PMD_DRV_LOG(ERR, "Cannot reset device\n"); 1828 goto err_mmio_read_less; 1829 } 1830 1831 /* check FW version */ 1832 rc = ena_com_validate_version(ena_dev); 1833 if (rc) { 1834 PMD_DRV_LOG(ERR, "Device version is too low\n"); 1835 goto err_mmio_read_less; 1836 } 1837 1838 ena_dev->dma_addr_bits = ena_com_get_dma_width(ena_dev); 1839 1840 /* ENA device administration layer init */ 1841 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 1842 if (rc) { 1843 PMD_DRV_LOG(ERR, 1844 "Cannot initialize ENA admin queue\n"); 1845 goto err_mmio_read_less; 1846 } 1847 1848 /* To enable the msix interrupts the driver needs to know the number 1849 * of queues. So the driver uses polling mode to retrieve this 1850 * information. 1851 */ 1852 ena_com_set_admin_polling_mode(ena_dev, true); 1853 1854 ena_config_host_info(ena_dev); 1855 1856 /* Get Device Attributes and features */ 1857 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 1858 if (rc) { 1859 PMD_DRV_LOG(ERR, 1860 "Cannot get attribute for ENA device, rc: %d\n", rc); 1861 goto err_admin_init; 1862 } 1863 1864 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 1865 BIT(ENA_ADMIN_NOTIFICATION) | 1866 BIT(ENA_ADMIN_KEEP_ALIVE) | 1867 BIT(ENA_ADMIN_FATAL_ERROR) | 1868 BIT(ENA_ADMIN_WARNING) | 1869 BIT(ENA_ADMIN_CONF_NOTIFICATIONS); 1870 1871 aenq_groups &= get_feat_ctx->aenq.supported_groups; 1872 1873 adapter->all_aenq_groups = aenq_groups; 1874 /* The actual supported number of metrics is negotiated with the device at runtime */ 1875 adapter->metrics_num = ena_get_metrics_entries(adapter); 1876 1877 return 0; 1878 1879 err_admin_init: 1880 ena_com_admin_destroy(ena_dev); 1881 1882 err_mmio_read_less: 1883 ena_com_mmio_reg_read_request_destroy(ena_dev); 1884 1885 return rc; 1886 } 1887 1888 static void ena_interrupt_handler_rte(void *cb_arg) 1889 { 1890 struct rte_eth_dev *dev = cb_arg; 1891 struct ena_adapter *adapter = dev->data->dev_private; 1892 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1893 1894 ena_com_admin_q_comp_intr_handler(ena_dev); 1895 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) 1896 ena_com_aenq_intr_handler(ena_dev, dev); 1897 } 1898 1899 static void check_for_missing_keep_alive(struct ena_adapter *adapter) 1900 { 1901 if (!(adapter->active_aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE))) 1902 return; 1903 1904 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 1905 return; 1906 1907 if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >= 1908 adapter->keep_alive_timeout)) { 1909 PMD_DRV_LOG(ERR, "Keep alive timeout\n"); 1910 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); 1911 ++adapter->dev_stats.wd_expired; 1912 } 1913 } 1914 1915 /* Check if admin queue is enabled */ 1916 static void check_for_admin_com_state(struct ena_adapter *adapter) 1917 { 1918 if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) { 1919 PMD_DRV_LOG(ERR, "ENA admin queue is not in running state\n"); 1920 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO); 1921 } 1922 } 1923 1924 static int check_for_tx_completion_in_queue(struct ena_adapter *adapter, 1925 struct ena_ring *tx_ring) 1926 { 1927 struct ena_tx_buffer *tx_buf; 1928 uint64_t timestamp; 1929 uint64_t completion_delay; 1930 uint32_t missed_tx = 0; 1931 unsigned int i; 1932 int rc = 0; 1933 1934 for (i = 0; i < tx_ring->ring_size; ++i) { 1935 tx_buf = &tx_ring->tx_buffer_info[i]; 1936 timestamp = tx_buf->timestamp; 1937 1938 if (timestamp == 0) 1939 continue; 1940 1941 completion_delay = rte_get_timer_cycles() - timestamp; 1942 if (completion_delay > adapter->missing_tx_completion_to) { 1943 if (unlikely(!tx_buf->print_once)) { 1944 PMD_TX_LOG(WARNING, 1945 "Found a Tx that wasn't completed on time, qid %d, index %d. " 1946 "Missing Tx outstanding for %" PRIu64 " msecs.\n", 1947 tx_ring->id, i, completion_delay / 1948 rte_get_timer_hz() * 1000); 1949 tx_buf->print_once = true; 1950 } 1951 ++missed_tx; 1952 } 1953 } 1954 1955 if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) { 1956 PMD_DRV_LOG(ERR, 1957 "The number of lost Tx completions is above the threshold (%d > %d). " 1958 "Trigger the device reset.\n", 1959 missed_tx, 1960 tx_ring->missing_tx_completion_threshold); 1961 adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL; 1962 adapter->trigger_reset = true; 1963 rc = -EIO; 1964 } 1965 1966 tx_ring->tx_stats.missed_tx += missed_tx; 1967 1968 return rc; 1969 } 1970 1971 static void check_for_tx_completions(struct ena_adapter *adapter) 1972 { 1973 struct ena_ring *tx_ring; 1974 uint64_t tx_cleanup_delay; 1975 size_t qid; 1976 int budget; 1977 uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues; 1978 1979 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) 1980 return; 1981 1982 nb_tx_queues = adapter->edev_data->nb_tx_queues; 1983 budget = adapter->missing_tx_completion_budget; 1984 1985 qid = adapter->last_tx_comp_qid; 1986 while (budget-- > 0) { 1987 tx_ring = &adapter->tx_ring[qid]; 1988 1989 /* Tx cleanup is called only by the burst function and can be 1990 * called dynamically by the application. Also cleanup is 1991 * limited by the threshold. To avoid false detection of the 1992 * missing HW Tx completion, get the delay since last cleanup 1993 * function was called. 1994 */ 1995 tx_cleanup_delay = rte_get_timer_cycles() - 1996 tx_ring->last_cleanup_ticks; 1997 if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay) 1998 check_for_tx_completion_in_queue(adapter, tx_ring); 1999 qid = (qid + 1) % nb_tx_queues; 2000 } 2001 2002 adapter->last_tx_comp_qid = qid; 2003 } 2004 2005 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer, 2006 void *arg) 2007 { 2008 struct rte_eth_dev *dev = arg; 2009 struct ena_adapter *adapter = dev->data->dev_private; 2010 2011 if (unlikely(adapter->trigger_reset)) 2012 return; 2013 2014 check_for_missing_keep_alive(adapter); 2015 check_for_admin_com_state(adapter); 2016 check_for_tx_completions(adapter); 2017 2018 if (unlikely(adapter->trigger_reset)) { 2019 PMD_DRV_LOG(ERR, "Trigger reset is on\n"); 2020 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, 2021 NULL); 2022 } 2023 } 2024 2025 static inline void 2026 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 2027 struct ena_admin_feature_llq_desc *llq, 2028 bool use_large_llq_hdr) 2029 { 2030 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 2031 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 2032 llq_config->llq_num_decs_before_header = 2033 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 2034 2035 if (use_large_llq_hdr && 2036 (llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B)) { 2037 llq_config->llq_ring_entry_size = 2038 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 2039 llq_config->llq_ring_entry_size_value = 256; 2040 } else { 2041 llq_config->llq_ring_entry_size = 2042 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 2043 llq_config->llq_ring_entry_size_value = 128; 2044 } 2045 } 2046 2047 static int 2048 ena_set_queues_placement_policy(struct ena_adapter *adapter, 2049 struct ena_com_dev *ena_dev, 2050 struct ena_admin_feature_llq_desc *llq, 2051 struct ena_llq_configurations *llq_default_configurations) 2052 { 2053 int rc; 2054 u32 llq_feature_mask; 2055 2056 if (adapter->llq_header_policy == ENA_LLQ_POLICY_DISABLED) { 2057 PMD_DRV_LOG(WARNING, 2058 "NOTE: LLQ has been disabled as per user's request. " 2059 "This may lead to a huge performance degradation!\n"); 2060 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2061 return 0; 2062 } 2063 2064 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 2065 if (!(ena_dev->supported_features & llq_feature_mask)) { 2066 PMD_DRV_LOG(INFO, 2067 "LLQ is not supported. Fallback to host mode policy.\n"); 2068 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2069 return 0; 2070 } 2071 2072 if (adapter->dev_mem_base == NULL) { 2073 PMD_DRV_LOG(ERR, 2074 "LLQ is advertised as supported, but device doesn't expose mem bar\n"); 2075 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2076 return 0; 2077 } 2078 2079 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 2080 if (unlikely(rc)) { 2081 PMD_INIT_LOG(WARNING, 2082 "Failed to config dev mode. Fallback to host mode policy.\n"); 2083 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2084 return 0; 2085 } 2086 2087 /* Nothing to config, exit */ 2088 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 2089 return 0; 2090 2091 ena_dev->mem_bar = adapter->dev_mem_base; 2092 2093 return 0; 2094 } 2095 2096 static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev, 2097 struct ena_com_dev_get_features_ctx *get_feat_ctx) 2098 { 2099 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 2100 2101 /* Regular queues capabilities */ 2102 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 2103 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 2104 &get_feat_ctx->max_queue_ext.max_queue_ext; 2105 io_rx_num = RTE_MIN(max_queue_ext->max_rx_sq_num, 2106 max_queue_ext->max_rx_cq_num); 2107 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 2108 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 2109 } else { 2110 struct ena_admin_queue_feature_desc *max_queues = 2111 &get_feat_ctx->max_queues; 2112 io_tx_sq_num = max_queues->max_sq_num; 2113 io_tx_cq_num = max_queues->max_cq_num; 2114 io_rx_num = RTE_MIN(io_tx_sq_num, io_tx_cq_num); 2115 } 2116 2117 /* In case of LLQ use the llq number in the get feature cmd */ 2118 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 2119 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 2120 2121 max_num_io_queues = RTE_MIN(ENA_MAX_NUM_IO_QUEUES, io_rx_num); 2122 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_sq_num); 2123 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_cq_num); 2124 2125 if (unlikely(max_num_io_queues == 0)) { 2126 PMD_DRV_LOG(ERR, "Number of IO queues cannot not be 0\n"); 2127 return -EFAULT; 2128 } 2129 2130 return max_num_io_queues; 2131 } 2132 2133 static void 2134 ena_set_offloads(struct ena_offloads *offloads, 2135 struct ena_admin_feature_offload_desc *offload_desc) 2136 { 2137 if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) 2138 offloads->tx_offloads |= ENA_IPV4_TSO; 2139 2140 /* Tx IPv4 checksum offloads */ 2141 if (offload_desc->tx & 2142 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) 2143 offloads->tx_offloads |= ENA_L3_IPV4_CSUM; 2144 if (offload_desc->tx & 2145 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK) 2146 offloads->tx_offloads |= ENA_L4_IPV4_CSUM; 2147 if (offload_desc->tx & 2148 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) 2149 offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL; 2150 2151 /* Tx IPv6 checksum offloads */ 2152 if (offload_desc->tx & 2153 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK) 2154 offloads->tx_offloads |= ENA_L4_IPV6_CSUM; 2155 if (offload_desc->tx & 2156 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) 2157 offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL; 2158 2159 /* Rx IPv4 checksum offloads */ 2160 if (offload_desc->rx_supported & 2161 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK) 2162 offloads->rx_offloads |= ENA_L3_IPV4_CSUM; 2163 if (offload_desc->rx_supported & 2164 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) 2165 offloads->rx_offloads |= ENA_L4_IPV4_CSUM; 2166 2167 /* Rx IPv6 checksum offloads */ 2168 if (offload_desc->rx_supported & 2169 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) 2170 offloads->rx_offloads |= ENA_L4_IPV6_CSUM; 2171 2172 if (offload_desc->rx_supported & 2173 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK) 2174 offloads->rx_offloads |= ENA_RX_RSS_HASH; 2175 } 2176 2177 static int ena_init_once(void) 2178 { 2179 static bool init_done; 2180 2181 if (init_done) 2182 return 0; 2183 2184 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 2185 /* Init timer subsystem for the ENA timer service. */ 2186 rte_timer_subsystem_init(); 2187 /* Register handler for requests from secondary processes. */ 2188 rte_mp_action_register(ENA_MP_NAME, ena_mp_primary_handle); 2189 } 2190 2191 init_done = true; 2192 return 0; 2193 } 2194 2195 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) 2196 { 2197 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 2198 struct rte_pci_device *pci_dev; 2199 struct rte_intr_handle *intr_handle; 2200 struct ena_adapter *adapter = eth_dev->data->dev_private; 2201 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2202 struct ena_com_dev_get_features_ctx get_feat_ctx; 2203 struct ena_llq_configurations llq_config; 2204 const char *queue_type_str; 2205 uint32_t max_num_io_queues; 2206 int rc; 2207 static int adapters_found; 2208 bool disable_meta_caching; 2209 2210 eth_dev->dev_ops = &ena_dev_ops; 2211 eth_dev->rx_pkt_burst = ð_ena_recv_pkts; 2212 eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; 2213 eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; 2214 2215 rc = ena_init_once(); 2216 if (rc != 0) 2217 return rc; 2218 2219 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2220 return 0; 2221 2222 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 2223 2224 memset(adapter, 0, sizeof(struct ena_adapter)); 2225 ena_dev = &adapter->ena_dev; 2226 2227 adapter->edev_data = eth_dev->data; 2228 2229 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 2230 2231 PMD_INIT_LOG(INFO, "Initializing " PCI_PRI_FMT "\n", 2232 pci_dev->addr.domain, 2233 pci_dev->addr.bus, 2234 pci_dev->addr.devid, 2235 pci_dev->addr.function); 2236 2237 intr_handle = pci_dev->intr_handle; 2238 2239 adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr; 2240 adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr; 2241 2242 if (!adapter->regs) { 2243 PMD_INIT_LOG(CRIT, "Failed to access registers BAR(%d)\n", 2244 ENA_REGS_BAR); 2245 return -ENXIO; 2246 } 2247 2248 ena_dev->reg_bar = adapter->regs; 2249 /* Pass device data as a pointer which can be passed to the IO functions 2250 * by the ena_com (for example - the memory allocation). 2251 */ 2252 ena_dev->dmadev = eth_dev->data; 2253 2254 adapter->id_number = adapters_found; 2255 2256 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", 2257 adapter->id_number); 2258 2259 /* Assign default devargs values */ 2260 adapter->missing_tx_completion_to = ENA_TX_TIMEOUT; 2261 adapter->enable_llq = true; 2262 adapter->use_large_llq_hdr = false; 2263 adapter->use_normal_llq_hdr = false; 2264 2265 /* Get user bypass */ 2266 rc = ena_parse_devargs(adapter, pci_dev->device.devargs); 2267 if (rc != 0) { 2268 PMD_INIT_LOG(CRIT, "Failed to parse devargs\n"); 2269 goto err; 2270 } 2271 adapter->llq_header_policy = ena_define_llq_hdr_policy(adapter); 2272 2273 rc = ena_com_allocate_customer_metrics_buffer(ena_dev); 2274 if (rc != 0) { 2275 PMD_INIT_LOG(CRIT, "Failed to allocate customer metrics buffer\n"); 2276 goto err; 2277 } 2278 2279 /* device specific initialization routine */ 2280 rc = ena_device_init(adapter, pci_dev, &get_feat_ctx); 2281 if (rc) { 2282 PMD_INIT_LOG(CRIT, "Failed to init ENA device\n"); 2283 goto err_metrics_delete; 2284 } 2285 2286 /* Check if device supports LSC */ 2287 if (!(adapter->all_aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) 2288 adapter->edev_data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; 2289 2290 bool use_large_llq_hdr = ena_use_large_llq_hdr(adapter, 2291 get_feat_ctx.llq.entry_size_recommended); 2292 set_default_llq_configurations(&llq_config, &get_feat_ctx.llq, use_large_llq_hdr); 2293 rc = ena_set_queues_placement_policy(adapter, ena_dev, 2294 &get_feat_ctx.llq, &llq_config); 2295 if (unlikely(rc)) { 2296 PMD_INIT_LOG(CRIT, "Failed to set placement policy\n"); 2297 return rc; 2298 } 2299 2300 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { 2301 queue_type_str = "Regular"; 2302 } else { 2303 queue_type_str = "Low latency"; 2304 PMD_DRV_LOG(INFO, "LLQ entry size %uB\n", llq_config.llq_ring_entry_size_value); 2305 } 2306 PMD_DRV_LOG(INFO, "Placement policy: %s\n", queue_type_str); 2307 2308 calc_queue_ctx.ena_dev = ena_dev; 2309 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 2310 2311 max_num_io_queues = ena_calc_max_io_queue_num(ena_dev, &get_feat_ctx); 2312 rc = ena_calc_io_queue_size(&calc_queue_ctx, use_large_llq_hdr); 2313 if (unlikely((rc != 0) || (max_num_io_queues == 0))) { 2314 rc = -EFAULT; 2315 goto err_device_destroy; 2316 } 2317 2318 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 2319 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 2320 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 2321 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 2322 adapter->max_num_io_queues = max_num_io_queues; 2323 2324 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2325 disable_meta_caching = 2326 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags & 2327 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 2328 } else { 2329 disable_meta_caching = false; 2330 } 2331 2332 /* prepare ring structures */ 2333 ena_init_rings(adapter, disable_meta_caching); 2334 2335 ena_config_debug_area(adapter); 2336 2337 /* Set max MTU for this device */ 2338 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 2339 2340 ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload); 2341 2342 /* Copy MAC address and point DPDK to it */ 2343 eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr; 2344 rte_ether_addr_copy((struct rte_ether_addr *) 2345 get_feat_ctx.dev_attr.mac_addr, 2346 (struct rte_ether_addr *)adapter->mac_addr); 2347 2348 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); 2349 if (unlikely(rc != 0)) { 2350 PMD_DRV_LOG(ERR, "Failed to initialize RSS in ENA device\n"); 2351 goto err_delete_debug_area; 2352 } 2353 2354 adapter->drv_stats = rte_zmalloc("adapter stats", 2355 sizeof(*adapter->drv_stats), 2356 RTE_CACHE_LINE_SIZE); 2357 if (!adapter->drv_stats) { 2358 PMD_DRV_LOG(ERR, 2359 "Failed to allocate memory for adapter statistics\n"); 2360 rc = -ENOMEM; 2361 goto err_rss_destroy; 2362 } 2363 2364 rte_spinlock_init(&adapter->admin_lock); 2365 2366 rte_intr_callback_register(intr_handle, 2367 ena_interrupt_handler_rte, 2368 eth_dev); 2369 rte_intr_enable(intr_handle); 2370 ena_com_set_admin_polling_mode(ena_dev, false); 2371 ena_com_admin_aenq_enable(ena_dev); 2372 2373 rte_timer_init(&adapter->timer_wd); 2374 2375 adapters_found++; 2376 adapter->state = ENA_ADAPTER_STATE_INIT; 2377 2378 return 0; 2379 2380 err_rss_destroy: 2381 ena_com_rss_destroy(ena_dev); 2382 err_delete_debug_area: 2383 ena_com_delete_debug_area(ena_dev); 2384 2385 err_device_destroy: 2386 ena_com_delete_host_info(ena_dev); 2387 ena_com_admin_destroy(ena_dev); 2388 err_metrics_delete: 2389 ena_com_delete_customer_metrics_buffer(ena_dev); 2390 err: 2391 return rc; 2392 } 2393 2394 static void ena_destroy_device(struct rte_eth_dev *eth_dev) 2395 { 2396 struct ena_adapter *adapter = eth_dev->data->dev_private; 2397 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2398 2399 if (adapter->state == ENA_ADAPTER_STATE_FREE) 2400 return; 2401 2402 ena_com_set_admin_running_state(ena_dev, false); 2403 2404 if (adapter->state != ENA_ADAPTER_STATE_CLOSED) 2405 ena_close(eth_dev); 2406 2407 ena_com_rss_destroy(ena_dev); 2408 2409 ena_com_delete_debug_area(ena_dev); 2410 ena_com_delete_host_info(ena_dev); 2411 2412 ena_com_abort_admin_commands(ena_dev); 2413 ena_com_wait_for_abort_completion(ena_dev); 2414 ena_com_admin_destroy(ena_dev); 2415 ena_com_mmio_reg_read_request_destroy(ena_dev); 2416 ena_com_delete_customer_metrics_buffer(ena_dev); 2417 2418 adapter->state = ENA_ADAPTER_STATE_FREE; 2419 } 2420 2421 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) 2422 { 2423 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2424 return 0; 2425 2426 ena_destroy_device(eth_dev); 2427 2428 return 0; 2429 } 2430 2431 static int ena_dev_configure(struct rte_eth_dev *dev) 2432 { 2433 struct ena_adapter *adapter = dev->data->dev_private; 2434 int rc; 2435 2436 adapter->state = ENA_ADAPTER_STATE_CONFIG; 2437 2438 if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 2439 dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2440 dev->data->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2441 2442 /* Scattered Rx cannot be turned off in the HW, so this capability must 2443 * be forced. 2444 */ 2445 dev->data->scattered_rx = 1; 2446 2447 adapter->last_tx_comp_qid = 0; 2448 2449 adapter->missing_tx_completion_budget = 2450 RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues); 2451 2452 /* To avoid detection of the spurious Tx completion timeout due to 2453 * application not calling the Tx cleanup function, set timeout for the 2454 * Tx queue which should be half of the missing completion timeout for a 2455 * safety. If there will be a lot of missing Tx completions in the 2456 * queue, they will be detected sooner or later. 2457 */ 2458 adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2; 2459 2460 rc = ena_configure_aenq(adapter); 2461 2462 return rc; 2463 } 2464 2465 static void ena_init_rings(struct ena_adapter *adapter, 2466 bool disable_meta_caching) 2467 { 2468 size_t i; 2469 2470 for (i = 0; i < adapter->max_num_io_queues; i++) { 2471 struct ena_ring *ring = &adapter->tx_ring[i]; 2472 2473 ring->configured = 0; 2474 ring->type = ENA_RING_TYPE_TX; 2475 ring->adapter = adapter; 2476 ring->id = i; 2477 ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type; 2478 ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size; 2479 ring->sgl_size = adapter->max_tx_sgl_size; 2480 ring->disable_meta_caching = disable_meta_caching; 2481 } 2482 2483 for (i = 0; i < adapter->max_num_io_queues; i++) { 2484 struct ena_ring *ring = &adapter->rx_ring[i]; 2485 2486 ring->configured = 0; 2487 ring->type = ENA_RING_TYPE_RX; 2488 ring->adapter = adapter; 2489 ring->id = i; 2490 ring->sgl_size = adapter->max_rx_sgl_size; 2491 } 2492 } 2493 2494 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter) 2495 { 2496 uint64_t port_offloads = 0; 2497 2498 if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM) 2499 port_offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; 2500 2501 if (adapter->offloads.rx_offloads & 2502 (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM)) 2503 port_offloads |= 2504 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM; 2505 2506 if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH) 2507 port_offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2508 2509 port_offloads |= RTE_ETH_RX_OFFLOAD_SCATTER; 2510 2511 return port_offloads; 2512 } 2513 2514 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter) 2515 { 2516 uint64_t port_offloads = 0; 2517 2518 if (adapter->offloads.tx_offloads & ENA_IPV4_TSO) 2519 port_offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 2520 2521 if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM) 2522 port_offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; 2523 if (adapter->offloads.tx_offloads & 2524 (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM | 2525 ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL)) 2526 port_offloads |= 2527 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM; 2528 2529 port_offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2530 2531 port_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2532 2533 return port_offloads; 2534 } 2535 2536 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter) 2537 { 2538 RTE_SET_USED(adapter); 2539 2540 return 0; 2541 } 2542 2543 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter) 2544 { 2545 uint64_t queue_offloads = 0; 2546 RTE_SET_USED(adapter); 2547 2548 queue_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2549 2550 return queue_offloads; 2551 } 2552 2553 static int ena_infos_get(struct rte_eth_dev *dev, 2554 struct rte_eth_dev_info *dev_info) 2555 { 2556 struct ena_adapter *adapter; 2557 struct ena_com_dev *ena_dev; 2558 2559 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 2560 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 2561 adapter = dev->data->dev_private; 2562 2563 ena_dev = &adapter->ena_dev; 2564 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 2565 2566 dev_info->speed_capa = 2567 RTE_ETH_LINK_SPEED_1G | 2568 RTE_ETH_LINK_SPEED_2_5G | 2569 RTE_ETH_LINK_SPEED_5G | 2570 RTE_ETH_LINK_SPEED_10G | 2571 RTE_ETH_LINK_SPEED_25G | 2572 RTE_ETH_LINK_SPEED_40G | 2573 RTE_ETH_LINK_SPEED_50G | 2574 RTE_ETH_LINK_SPEED_100G | 2575 RTE_ETH_LINK_SPEED_200G | 2576 RTE_ETH_LINK_SPEED_400G; 2577 2578 /* Inform framework about available features */ 2579 dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter); 2580 dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter); 2581 dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter); 2582 dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter); 2583 2584 dev_info->flow_type_rss_offloads = ENA_ALL_RSS_HF; 2585 dev_info->hash_key_size = ENA_HASH_KEY_SIZE; 2586 2587 dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN; 2588 dev_info->max_rx_pktlen = adapter->max_mtu + RTE_ETHER_HDR_LEN + 2589 RTE_ETHER_CRC_LEN; 2590 dev_info->min_mtu = ENA_MIN_MTU; 2591 dev_info->max_mtu = adapter->max_mtu; 2592 dev_info->max_mac_addrs = 1; 2593 2594 dev_info->max_rx_queues = adapter->max_num_io_queues; 2595 dev_info->max_tx_queues = adapter->max_num_io_queues; 2596 dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE; 2597 2598 dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size; 2599 dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2600 dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2601 adapter->max_rx_sgl_size); 2602 dev_info->rx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2603 adapter->max_rx_sgl_size); 2604 2605 dev_info->tx_desc_lim.nb_max = adapter->max_tx_ring_size; 2606 dev_info->tx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2607 dev_info->tx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2608 adapter->max_tx_sgl_size); 2609 dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2610 adapter->max_tx_sgl_size); 2611 2612 dev_info->default_rxportconf.ring_size = RTE_MIN(ENA_DEFAULT_RING_SIZE, 2613 dev_info->rx_desc_lim.nb_max); 2614 dev_info->default_txportconf.ring_size = RTE_MIN(ENA_DEFAULT_RING_SIZE, 2615 dev_info->tx_desc_lim.nb_max); 2616 2617 dev_info->err_handle_mode = RTE_ETH_ERROR_HANDLE_MODE_PASSIVE; 2618 2619 return 0; 2620 } 2621 2622 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len) 2623 { 2624 mbuf->data_len = len; 2625 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 2626 mbuf->refcnt = 1; 2627 mbuf->next = NULL; 2628 } 2629 2630 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 2631 struct ena_com_rx_buf_info *ena_bufs, 2632 uint32_t descs, 2633 uint16_t *next_to_clean, 2634 uint8_t offset) 2635 { 2636 struct rte_mbuf *mbuf; 2637 struct rte_mbuf *mbuf_head; 2638 struct ena_rx_buffer *rx_info; 2639 int rc; 2640 uint16_t ntc, len, req_id, buf = 0; 2641 2642 if (unlikely(descs == 0)) 2643 return NULL; 2644 2645 ntc = *next_to_clean; 2646 2647 len = ena_bufs[buf].len; 2648 req_id = ena_bufs[buf].req_id; 2649 2650 rx_info = &rx_ring->rx_buffer_info[req_id]; 2651 2652 mbuf = rx_info->mbuf; 2653 RTE_ASSERT(mbuf != NULL); 2654 2655 ena_init_rx_mbuf(mbuf, len); 2656 2657 /* Fill the mbuf head with the data specific for 1st segment. */ 2658 mbuf_head = mbuf; 2659 mbuf_head->nb_segs = descs; 2660 mbuf_head->port = rx_ring->port_id; 2661 mbuf_head->pkt_len = len; 2662 mbuf_head->data_off += offset; 2663 2664 rx_info->mbuf = NULL; 2665 rx_ring->empty_rx_reqs[ntc] = req_id; 2666 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2667 2668 while (--descs) { 2669 ++buf; 2670 len = ena_bufs[buf].len; 2671 req_id = ena_bufs[buf].req_id; 2672 2673 rx_info = &rx_ring->rx_buffer_info[req_id]; 2674 RTE_ASSERT(rx_info->mbuf != NULL); 2675 2676 if (unlikely(len == 0)) { 2677 /* 2678 * Some devices can pass descriptor with the length 0. 2679 * To avoid confusion, the PMD is simply putting the 2680 * descriptor back, as it was never used. We'll avoid 2681 * mbuf allocation that way. 2682 */ 2683 rc = ena_add_single_rx_desc(rx_ring->ena_com_io_sq, 2684 rx_info->mbuf, req_id); 2685 if (unlikely(rc != 0)) { 2686 /* Free the mbuf in case of an error. */ 2687 rte_mbuf_raw_free(rx_info->mbuf); 2688 } else { 2689 /* 2690 * If there was no error, just exit the loop as 2691 * 0 length descriptor is always the last one. 2692 */ 2693 break; 2694 } 2695 } else { 2696 /* Create an mbuf chain. */ 2697 mbuf->next = rx_info->mbuf; 2698 mbuf = mbuf->next; 2699 2700 ena_init_rx_mbuf(mbuf, len); 2701 mbuf_head->pkt_len += len; 2702 } 2703 2704 /* 2705 * Mark the descriptor as depleted and perform necessary 2706 * cleanup. 2707 * This code will execute in two cases: 2708 * 1. Descriptor len was greater than 0 - normal situation. 2709 * 2. Descriptor len was 0 and we failed to add the descriptor 2710 * to the device. In that situation, we should try to add 2711 * the mbuf again in the populate routine and mark the 2712 * descriptor as used up by the device. 2713 */ 2714 rx_info->mbuf = NULL; 2715 rx_ring->empty_rx_reqs[ntc] = req_id; 2716 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2717 } 2718 2719 *next_to_clean = ntc; 2720 2721 return mbuf_head; 2722 } 2723 2724 static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 2725 uint16_t nb_pkts) 2726 { 2727 struct ena_ring *rx_ring = (struct ena_ring *)(rx_queue); 2728 unsigned int free_queue_entries; 2729 uint16_t next_to_clean = rx_ring->next_to_clean; 2730 uint16_t descs_in_use; 2731 struct rte_mbuf *mbuf; 2732 uint16_t completed; 2733 struct ena_com_rx_ctx ena_rx_ctx; 2734 int i, rc = 0; 2735 bool fill_hash; 2736 2737 #ifdef RTE_ETHDEV_DEBUG_RX 2738 /* Check adapter state */ 2739 if (unlikely(rx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 2740 PMD_RX_LOG(ALERT, 2741 "Trying to receive pkts while device is NOT running\n"); 2742 return 0; 2743 } 2744 #endif 2745 2746 fill_hash = rx_ring->offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH; 2747 2748 descs_in_use = rx_ring->ring_size - 2749 ena_com_free_q_entries(rx_ring->ena_com_io_sq) - 1; 2750 nb_pkts = RTE_MIN(descs_in_use, nb_pkts); 2751 2752 for (completed = 0; completed < nb_pkts; completed++) { 2753 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 2754 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 2755 ena_rx_ctx.descs = 0; 2756 ena_rx_ctx.pkt_offset = 0; 2757 /* receive packet context */ 2758 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, 2759 rx_ring->ena_com_io_sq, 2760 &ena_rx_ctx); 2761 if (unlikely(rc)) { 2762 PMD_RX_LOG(ERR, 2763 "Failed to get the packet from the device, rc: %d\n", 2764 rc); 2765 if (rc == ENA_COM_NO_SPACE) { 2766 ++rx_ring->rx_stats.bad_desc_num; 2767 ena_trigger_reset(rx_ring->adapter, 2768 ENA_REGS_RESET_TOO_MANY_RX_DESCS); 2769 } else { 2770 ++rx_ring->rx_stats.bad_req_id; 2771 ena_trigger_reset(rx_ring->adapter, 2772 ENA_REGS_RESET_INV_RX_REQ_ID); 2773 } 2774 return 0; 2775 } 2776 2777 mbuf = ena_rx_mbuf(rx_ring, 2778 ena_rx_ctx.ena_bufs, 2779 ena_rx_ctx.descs, 2780 &next_to_clean, 2781 ena_rx_ctx.pkt_offset); 2782 if (unlikely(mbuf == NULL)) { 2783 for (i = 0; i < ena_rx_ctx.descs; ++i) { 2784 rx_ring->empty_rx_reqs[next_to_clean] = 2785 rx_ring->ena_bufs[i].req_id; 2786 next_to_clean = ENA_IDX_NEXT_MASKED( 2787 next_to_clean, rx_ring->size_mask); 2788 } 2789 break; 2790 } 2791 2792 /* fill mbuf attributes if any */ 2793 ena_rx_mbuf_prepare(rx_ring, mbuf, &ena_rx_ctx, fill_hash); 2794 2795 if (unlikely(mbuf->ol_flags & 2796 (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD))) 2797 rte_atomic64_inc(&rx_ring->adapter->drv_stats->ierrors); 2798 2799 rx_pkts[completed] = mbuf; 2800 rx_ring->rx_stats.bytes += mbuf->pkt_len; 2801 } 2802 2803 rx_ring->rx_stats.cnt += completed; 2804 rx_ring->next_to_clean = next_to_clean; 2805 2806 free_queue_entries = ena_com_free_q_entries(rx_ring->ena_com_io_sq); 2807 2808 /* Burst refill to save doorbells, memory barriers, const interval */ 2809 if (free_queue_entries >= rx_ring->rx_free_thresh) { 2810 ena_populate_rx_queue(rx_ring, free_queue_entries); 2811 } 2812 2813 return completed; 2814 } 2815 2816 static uint16_t 2817 eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 2818 uint16_t nb_pkts) 2819 { 2820 int32_t ret; 2821 uint32_t i; 2822 struct rte_mbuf *m; 2823 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 2824 struct ena_adapter *adapter = tx_ring->adapter; 2825 struct rte_ipv4_hdr *ip_hdr; 2826 uint64_t ol_flags; 2827 uint64_t l4_csum_flag; 2828 uint64_t dev_offload_capa; 2829 uint16_t frag_field; 2830 bool need_pseudo_csum; 2831 2832 dev_offload_capa = adapter->offloads.tx_offloads; 2833 for (i = 0; i != nb_pkts; i++) { 2834 m = tx_pkts[i]; 2835 ol_flags = m->ol_flags; 2836 2837 /* Check if any offload flag was set */ 2838 if (ol_flags == 0) 2839 continue; 2840 2841 l4_csum_flag = ol_flags & RTE_MBUF_F_TX_L4_MASK; 2842 /* SCTP checksum offload is not supported by the ENA. */ 2843 if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) || 2844 l4_csum_flag == RTE_MBUF_F_TX_SCTP_CKSUM) { 2845 PMD_TX_LOG(DEBUG, 2846 "mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64 "\n", 2847 i, ol_flags); 2848 rte_errno = ENOTSUP; 2849 return i; 2850 } 2851 2852 if (unlikely(m->nb_segs >= tx_ring->sgl_size && 2853 !(tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && 2854 m->nb_segs == tx_ring->sgl_size && 2855 m->data_len < tx_ring->tx_max_header_size))) { 2856 PMD_TX_LOG(DEBUG, 2857 "mbuf[%" PRIu32 "] has too many segments: %" PRIu16 "\n", 2858 i, m->nb_segs); 2859 rte_errno = EINVAL; 2860 return i; 2861 } 2862 2863 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 2864 /* Check if requested offload is also enabled for the queue */ 2865 if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2866 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) || 2867 (l4_csum_flag == RTE_MBUF_F_TX_TCP_CKSUM && 2868 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) || 2869 (l4_csum_flag == RTE_MBUF_F_TX_UDP_CKSUM && 2870 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM))) { 2871 PMD_TX_LOG(DEBUG, 2872 "mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]\n", 2873 i, m->nb_segs, tx_ring->id); 2874 rte_errno = EINVAL; 2875 return i; 2876 } 2877 2878 /* The caller is obligated to set l2 and l3 len if any cksum 2879 * offload is enabled. 2880 */ 2881 if (unlikely(ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK) && 2882 (m->l2_len == 0 || m->l3_len == 0))) { 2883 PMD_TX_LOG(DEBUG, 2884 "mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested\n", 2885 i); 2886 rte_errno = EINVAL; 2887 return i; 2888 } 2889 ret = rte_validate_tx_offload(m); 2890 if (ret != 0) { 2891 rte_errno = -ret; 2892 return i; 2893 } 2894 #endif 2895 2896 /* Verify HW support for requested offloads and determine if 2897 * pseudo header checksum is needed. 2898 */ 2899 need_pseudo_csum = false; 2900 if (ol_flags & RTE_MBUF_F_TX_IPV4) { 2901 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2902 !(dev_offload_capa & ENA_L3_IPV4_CSUM)) { 2903 rte_errno = ENOTSUP; 2904 return i; 2905 } 2906 2907 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG && 2908 !(dev_offload_capa & ENA_IPV4_TSO)) { 2909 rte_errno = ENOTSUP; 2910 return i; 2911 } 2912 2913 /* Check HW capabilities and if pseudo csum is needed 2914 * for L4 offloads. 2915 */ 2916 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2917 !(dev_offload_capa & ENA_L4_IPV4_CSUM)) { 2918 if (dev_offload_capa & 2919 ENA_L4_IPV4_CSUM_PARTIAL) { 2920 need_pseudo_csum = true; 2921 } else { 2922 rte_errno = ENOTSUP; 2923 return i; 2924 } 2925 } 2926 2927 /* Parse the DF flag */ 2928 ip_hdr = rte_pktmbuf_mtod_offset(m, 2929 struct rte_ipv4_hdr *, m->l2_len); 2930 frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset); 2931 if (frag_field & RTE_IPV4_HDR_DF_FLAG) { 2932 m->packet_type |= RTE_PTYPE_L4_NONFRAG; 2933 } else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2934 /* In case we are supposed to TSO and have DF 2935 * not set (DF=0) hardware must be provided with 2936 * partial checksum. 2937 */ 2938 need_pseudo_csum = true; 2939 } 2940 } else if (ol_flags & RTE_MBUF_F_TX_IPV6) { 2941 /* There is no support for IPv6 TSO as for now. */ 2942 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2943 rte_errno = ENOTSUP; 2944 return i; 2945 } 2946 2947 /* Check HW capabilities and if pseudo csum is needed */ 2948 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2949 !(dev_offload_capa & ENA_L4_IPV6_CSUM)) { 2950 if (dev_offload_capa & 2951 ENA_L4_IPV6_CSUM_PARTIAL) { 2952 need_pseudo_csum = true; 2953 } else { 2954 rte_errno = ENOTSUP; 2955 return i; 2956 } 2957 } 2958 } 2959 2960 if (need_pseudo_csum) { 2961 ret = rte_net_intel_cksum_flags_prepare(m, ol_flags); 2962 if (ret != 0) { 2963 rte_errno = -ret; 2964 return i; 2965 } 2966 } 2967 } 2968 2969 return i; 2970 } 2971 2972 static void ena_update_hints(struct ena_adapter *adapter, 2973 struct ena_admin_ena_hw_hints *hints) 2974 { 2975 if (hints->admin_completion_tx_timeout) 2976 adapter->ena_dev.admin_queue.completion_timeout = 2977 hints->admin_completion_tx_timeout * 1000; 2978 2979 if (hints->mmio_read_timeout) 2980 /* convert to usec */ 2981 adapter->ena_dev.mmio_read.reg_read_to = 2982 hints->mmio_read_timeout * 1000; 2983 2984 if (hints->driver_watchdog_timeout) { 2985 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 2986 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 2987 else 2988 // Convert msecs to ticks 2989 adapter->keep_alive_timeout = 2990 (hints->driver_watchdog_timeout * 2991 rte_get_timer_hz()) / 1000; 2992 } 2993 } 2994 2995 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 2996 struct ena_tx_buffer *tx_info, 2997 struct rte_mbuf *mbuf, 2998 void **push_header, 2999 uint16_t *header_len) 3000 { 3001 struct ena_com_buf *ena_buf; 3002 uint16_t delta, seg_len, push_len; 3003 3004 delta = 0; 3005 seg_len = mbuf->data_len; 3006 3007 tx_info->mbuf = mbuf; 3008 ena_buf = tx_info->bufs; 3009 3010 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 3011 /* 3012 * Tx header might be (and will be in most cases) smaller than 3013 * tx_max_header_size. But it's not an issue to send more data 3014 * to the device, than actually needed if the mbuf size is 3015 * greater than tx_max_header_size. 3016 */ 3017 push_len = RTE_MIN(mbuf->pkt_len, tx_ring->tx_max_header_size); 3018 *header_len = push_len; 3019 3020 if (likely(push_len <= seg_len)) { 3021 /* If the push header is in the single segment, then 3022 * just point it to the 1st mbuf data. 3023 */ 3024 *push_header = rte_pktmbuf_mtod(mbuf, uint8_t *); 3025 } else { 3026 /* If the push header lays in the several segments, copy 3027 * it to the intermediate buffer. 3028 */ 3029 rte_pktmbuf_read(mbuf, 0, push_len, 3030 tx_ring->push_buf_intermediate_buf); 3031 *push_header = tx_ring->push_buf_intermediate_buf; 3032 delta = push_len - seg_len; 3033 } 3034 } else { 3035 *push_header = NULL; 3036 *header_len = 0; 3037 push_len = 0; 3038 } 3039 3040 /* Process first segment taking into consideration pushed header */ 3041 if (seg_len > push_len) { 3042 ena_buf->paddr = mbuf->buf_iova + 3043 mbuf->data_off + 3044 push_len; 3045 ena_buf->len = seg_len - push_len; 3046 ena_buf++; 3047 tx_info->num_of_bufs++; 3048 } 3049 3050 while ((mbuf = mbuf->next) != NULL) { 3051 seg_len = mbuf->data_len; 3052 3053 /* Skip mbufs if whole data is pushed as a header */ 3054 if (unlikely(delta > seg_len)) { 3055 delta -= seg_len; 3056 continue; 3057 } 3058 3059 ena_buf->paddr = mbuf->buf_iova + mbuf->data_off + delta; 3060 ena_buf->len = seg_len - delta; 3061 ena_buf++; 3062 tx_info->num_of_bufs++; 3063 3064 delta = 0; 3065 } 3066 } 3067 3068 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) 3069 { 3070 struct ena_tx_buffer *tx_info; 3071 struct ena_com_tx_ctx ena_tx_ctx = { { 0 } }; 3072 uint16_t next_to_use; 3073 uint16_t header_len; 3074 uint16_t req_id; 3075 void *push_header; 3076 int nb_hw_desc; 3077 int rc; 3078 3079 /* Checking for space for 2 additional metadata descriptors due to 3080 * possible header split and metadata descriptor 3081 */ 3082 if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 3083 mbuf->nb_segs + 2)) { 3084 PMD_DRV_LOG(DEBUG, "Not enough space in the tx queue\n"); 3085 return ENA_COM_NO_MEM; 3086 } 3087 3088 next_to_use = tx_ring->next_to_use; 3089 3090 req_id = tx_ring->empty_tx_reqs[next_to_use]; 3091 tx_info = &tx_ring->tx_buffer_info[req_id]; 3092 tx_info->num_of_bufs = 0; 3093 RTE_ASSERT(tx_info->mbuf == NULL); 3094 3095 ena_tx_map_mbuf(tx_ring, tx_info, mbuf, &push_header, &header_len); 3096 3097 ena_tx_ctx.ena_bufs = tx_info->bufs; 3098 ena_tx_ctx.push_header = push_header; 3099 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 3100 ena_tx_ctx.req_id = req_id; 3101 ena_tx_ctx.header_len = header_len; 3102 3103 /* Set Tx offloads flags, if applicable */ 3104 ena_tx_mbuf_prepare(mbuf, &ena_tx_ctx, tx_ring->offloads, 3105 tx_ring->disable_meta_caching); 3106 3107 if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, 3108 &ena_tx_ctx))) { 3109 PMD_TX_LOG(DEBUG, 3110 "LLQ Tx max burst size of queue %d achieved, writing doorbell to send burst\n", 3111 tx_ring->id); 3112 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3113 tx_ring->tx_stats.doorbells++; 3114 tx_ring->pkts_without_db = false; 3115 } 3116 3117 /* prepare the packet's descriptors to dma engine */ 3118 rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, 3119 &nb_hw_desc); 3120 if (unlikely(rc)) { 3121 PMD_DRV_LOG(ERR, "Failed to prepare Tx buffers, rc: %d\n", rc); 3122 ++tx_ring->tx_stats.prepare_ctx_err; 3123 ena_trigger_reset(tx_ring->adapter, 3124 ENA_REGS_RESET_DRIVER_INVALID_STATE); 3125 return rc; 3126 } 3127 3128 tx_info->tx_descs = nb_hw_desc; 3129 tx_info->timestamp = rte_get_timer_cycles(); 3130 3131 tx_ring->tx_stats.cnt++; 3132 tx_ring->tx_stats.bytes += mbuf->pkt_len; 3133 3134 tx_ring->next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, 3135 tx_ring->size_mask); 3136 3137 return 0; 3138 } 3139 3140 static __rte_always_inline size_t 3141 ena_tx_cleanup_mbuf_fast(struct rte_mbuf **mbufs_to_clean, 3142 struct rte_mbuf *mbuf, 3143 size_t mbuf_cnt, 3144 size_t buf_size) 3145 { 3146 struct rte_mbuf *m_next; 3147 3148 while (mbuf != NULL) { 3149 m_next = mbuf->next; 3150 mbufs_to_clean[mbuf_cnt++] = mbuf; 3151 if (mbuf_cnt == buf_size) { 3152 rte_pktmbuf_free_bulk(mbufs_to_clean, mbuf_cnt); 3153 mbuf_cnt = 0; 3154 } 3155 mbuf = m_next; 3156 } 3157 3158 return mbuf_cnt; 3159 } 3160 3161 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt) 3162 { 3163 struct rte_mbuf *mbufs_to_clean[ENA_CLEANUP_BUF_SIZE]; 3164 struct ena_ring *tx_ring = (struct ena_ring *)txp; 3165 size_t mbuf_cnt = 0; 3166 unsigned int total_tx_descs = 0; 3167 unsigned int total_tx_pkts = 0; 3168 uint16_t cleanup_budget; 3169 uint16_t next_to_clean = tx_ring->next_to_clean; 3170 bool fast_free = tx_ring->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 3171 3172 /* 3173 * If free_pkt_cnt is equal to 0, it means that the user requested 3174 * full cleanup, so attempt to release all Tx descriptors 3175 * (ring_size - 1 -> size_mask) 3176 */ 3177 cleanup_budget = (free_pkt_cnt == 0) ? tx_ring->size_mask : free_pkt_cnt; 3178 3179 while (likely(total_tx_pkts < cleanup_budget)) { 3180 struct rte_mbuf *mbuf; 3181 struct ena_tx_buffer *tx_info; 3182 uint16_t req_id; 3183 3184 if (ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id) != 0) 3185 break; 3186 3187 if (unlikely(validate_tx_req_id(tx_ring, req_id) != 0)) 3188 break; 3189 3190 /* Get Tx info & store how many descs were processed */ 3191 tx_info = &tx_ring->tx_buffer_info[req_id]; 3192 tx_info->timestamp = 0; 3193 3194 mbuf = tx_info->mbuf; 3195 if (fast_free) { 3196 mbuf_cnt = ena_tx_cleanup_mbuf_fast(mbufs_to_clean, mbuf, mbuf_cnt, 3197 ENA_CLEANUP_BUF_SIZE); 3198 } else { 3199 rte_pktmbuf_free(mbuf); 3200 } 3201 3202 tx_info->mbuf = NULL; 3203 tx_ring->empty_tx_reqs[next_to_clean] = req_id; 3204 3205 total_tx_descs += tx_info->tx_descs; 3206 total_tx_pkts++; 3207 3208 /* Put back descriptor to the ring for reuse */ 3209 next_to_clean = ENA_IDX_NEXT_MASKED(next_to_clean, 3210 tx_ring->size_mask); 3211 } 3212 3213 if (likely(total_tx_descs > 0)) { 3214 /* acknowledge completion of sent packets */ 3215 tx_ring->next_to_clean = next_to_clean; 3216 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs); 3217 } 3218 3219 if (mbuf_cnt != 0) 3220 rte_pktmbuf_free_bulk(mbufs_to_clean, mbuf_cnt); 3221 3222 /* Notify completion handler that full cleanup was performed */ 3223 if (free_pkt_cnt == 0 || total_tx_pkts < cleanup_budget) 3224 tx_ring->last_cleanup_ticks = rte_get_timer_cycles(); 3225 3226 return total_tx_pkts; 3227 } 3228 3229 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 3230 uint16_t nb_pkts) 3231 { 3232 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 3233 int available_desc; 3234 uint16_t sent_idx = 0; 3235 3236 #ifdef RTE_ETHDEV_DEBUG_TX 3237 /* Check adapter state */ 3238 if (unlikely(tx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 3239 PMD_TX_LOG(ALERT, 3240 "Trying to xmit pkts while device is NOT running\n"); 3241 return 0; 3242 } 3243 #endif 3244 3245 available_desc = ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3246 if (available_desc < tx_ring->tx_free_thresh) 3247 ena_tx_cleanup((void *)tx_ring, 0); 3248 3249 for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) { 3250 if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx])) 3251 break; 3252 tx_ring->pkts_without_db = true; 3253 rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4, 3254 tx_ring->size_mask)]); 3255 } 3256 3257 /* If there are ready packets to be xmitted... */ 3258 if (likely(tx_ring->pkts_without_db)) { 3259 /* ...let HW do its best :-) */ 3260 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3261 tx_ring->tx_stats.doorbells++; 3262 tx_ring->pkts_without_db = false; 3263 } 3264 3265 tx_ring->tx_stats.available_desc = 3266 ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3267 tx_ring->tx_stats.tx_poll++; 3268 3269 return sent_idx; 3270 } 3271 3272 static void ena_copy_customer_metrics(struct ena_adapter *adapter, uint64_t *buf, 3273 size_t num_metrics) 3274 { 3275 struct ena_com_dev *ena_dev = &adapter->ena_dev; 3276 int rc; 3277 3278 if (ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) { 3279 if (num_metrics != ENA_STATS_ARRAY_METRICS) { 3280 PMD_DRV_LOG(ERR, "Detected discrepancy in the number of customer metrics"); 3281 return; 3282 } 3283 rte_spinlock_lock(&adapter->admin_lock); 3284 rc = ENA_PROXY(adapter, 3285 ena_com_get_customer_metrics, 3286 &adapter->ena_dev, 3287 (char *)buf, 3288 num_metrics * sizeof(uint64_t)); 3289 rte_spinlock_unlock(&adapter->admin_lock); 3290 if (rc != 0) { 3291 PMD_DRV_LOG(WARNING, "Failed to get customer metrics, rc: %d\n", rc); 3292 return; 3293 } 3294 3295 } else if (ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) { 3296 if (num_metrics != ENA_STATS_ARRAY_METRICS_LEGACY) { 3297 PMD_DRV_LOG(ERR, "Detected discrepancy in the number of legacy metrics"); 3298 return; 3299 } 3300 3301 rte_spinlock_lock(&adapter->admin_lock); 3302 rc = ENA_PROXY(adapter, 3303 ena_com_get_eni_stats, 3304 &adapter->ena_dev, 3305 (struct ena_admin_eni_stats *)buf); 3306 rte_spinlock_unlock(&adapter->admin_lock); 3307 if (rc != 0) { 3308 PMD_DRV_LOG(WARNING, 3309 "Failed to get ENI metrics, rc: %d\n", rc); 3310 return; 3311 } 3312 } 3313 } 3314 3315 static void ena_copy_ena_srd_info(struct ena_adapter *adapter, 3316 struct ena_stats_srd *srd_info) 3317 { 3318 int rc; 3319 3320 if (!ena_com_get_cap(&adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO)) 3321 return; 3322 3323 rte_spinlock_lock(&adapter->admin_lock); 3324 rc = ENA_PROXY(adapter, 3325 ena_com_get_ena_srd_info, 3326 &adapter->ena_dev, 3327 (struct ena_admin_ena_srd_info *)srd_info); 3328 rte_spinlock_unlock(&adapter->admin_lock); 3329 if (rc != ENA_COM_OK && rc != ENA_COM_UNSUPPORTED) { 3330 PMD_DRV_LOG(WARNING, 3331 "Failed to get ENA express srd info, rc: %d\n", rc); 3332 return; 3333 } 3334 } 3335 3336 /** 3337 * DPDK callback to retrieve names of extended device statistics 3338 * 3339 * @param dev 3340 * Pointer to Ethernet device structure. 3341 * @param[out] xstats_names 3342 * Buffer to insert names into. 3343 * @param n 3344 * Number of names. 3345 * 3346 * @return 3347 * Number of xstats names. 3348 */ 3349 static int ena_xstats_get_names(struct rte_eth_dev *dev, 3350 struct rte_eth_xstat_name *xstats_names, 3351 unsigned int n) 3352 { 3353 struct ena_adapter *adapter = dev->data->dev_private; 3354 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3355 unsigned int stat, i, count = 0; 3356 3357 if (n < xstats_count || !xstats_names) 3358 return xstats_count; 3359 3360 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) 3361 strcpy(xstats_names[count].name, 3362 ena_stats_global_strings[stat].name); 3363 3364 for (stat = 0; stat < adapter->metrics_num; stat++, count++) 3365 rte_strscpy(xstats_names[count].name, 3366 ena_stats_metrics_strings[stat].name, 3367 RTE_ETH_XSTATS_NAME_SIZE); 3368 for (stat = 0; stat < ENA_STATS_ARRAY_ENA_SRD; stat++, count++) 3369 rte_strscpy(xstats_names[count].name, 3370 ena_stats_srd_strings[stat].name, 3371 RTE_ETH_XSTATS_NAME_SIZE); 3372 3373 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) 3374 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) 3375 snprintf(xstats_names[count].name, 3376 sizeof(xstats_names[count].name), 3377 "rx_q%d_%s", i, 3378 ena_stats_rx_strings[stat].name); 3379 3380 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) 3381 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) 3382 snprintf(xstats_names[count].name, 3383 sizeof(xstats_names[count].name), 3384 "tx_q%d_%s", i, 3385 ena_stats_tx_strings[stat].name); 3386 3387 return xstats_count; 3388 } 3389 3390 /** 3391 * DPDK callback to retrieve names of extended device statistics for the given 3392 * ids. 3393 * 3394 * @param dev 3395 * Pointer to Ethernet device structure. 3396 * @param[out] xstats_names 3397 * Buffer to insert names into. 3398 * @param ids 3399 * IDs array for which the names should be retrieved. 3400 * @param size 3401 * Number of ids. 3402 * 3403 * @return 3404 * Positive value: number of xstats names. Negative value: error code. 3405 */ 3406 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 3407 const uint64_t *ids, 3408 struct rte_eth_xstat_name *xstats_names, 3409 unsigned int size) 3410 { 3411 struct ena_adapter *adapter = dev->data->dev_private; 3412 uint64_t xstats_count = ena_xstats_calc_num(dev->data); 3413 uint64_t id, qid; 3414 unsigned int i; 3415 3416 if (xstats_names == NULL) 3417 return xstats_count; 3418 3419 for (i = 0; i < size; ++i) { 3420 id = ids[i]; 3421 if (id > xstats_count) { 3422 PMD_DRV_LOG(ERR, 3423 "ID value out of range: id=%" PRIu64 ", xstats_num=%" PRIu64 "\n", 3424 id, xstats_count); 3425 return -EINVAL; 3426 } 3427 3428 if (id < ENA_STATS_ARRAY_GLOBAL) { 3429 strcpy(xstats_names[i].name, 3430 ena_stats_global_strings[id].name); 3431 continue; 3432 } 3433 3434 id -= ENA_STATS_ARRAY_GLOBAL; 3435 if (id < adapter->metrics_num) { 3436 rte_strscpy(xstats_names[i].name, 3437 ena_stats_metrics_strings[id].name, 3438 RTE_ETH_XSTATS_NAME_SIZE); 3439 continue; 3440 } 3441 3442 id -= adapter->metrics_num; 3443 3444 if (id < ENA_STATS_ARRAY_ENA_SRD) { 3445 rte_strscpy(xstats_names[i].name, 3446 ena_stats_srd_strings[id].name, 3447 RTE_ETH_XSTATS_NAME_SIZE); 3448 continue; 3449 } 3450 id -= ENA_STATS_ARRAY_ENA_SRD; 3451 3452 if (id < ENA_STATS_ARRAY_RX) { 3453 qid = id / dev->data->nb_rx_queues; 3454 id %= dev->data->nb_rx_queues; 3455 snprintf(xstats_names[i].name, 3456 sizeof(xstats_names[i].name), 3457 "rx_q%" PRIu64 "d_%s", 3458 qid, ena_stats_rx_strings[id].name); 3459 continue; 3460 } 3461 3462 id -= ENA_STATS_ARRAY_RX; 3463 /* Although this condition is not needed, it was added for 3464 * compatibility if new xstat structure would be ever added. 3465 */ 3466 if (id < ENA_STATS_ARRAY_TX) { 3467 qid = id / dev->data->nb_tx_queues; 3468 id %= dev->data->nb_tx_queues; 3469 snprintf(xstats_names[i].name, 3470 sizeof(xstats_names[i].name), 3471 "tx_q%" PRIu64 "_%s", 3472 qid, ena_stats_tx_strings[id].name); 3473 continue; 3474 } 3475 } 3476 3477 return i; 3478 } 3479 3480 /** 3481 * DPDK callback to get extended device statistics. 3482 * 3483 * @param dev 3484 * Pointer to Ethernet device structure. 3485 * @param[out] stats 3486 * Stats table output buffer. 3487 * @param n 3488 * The size of the stats table. 3489 * 3490 * @return 3491 * Number of xstats on success, negative on failure. 3492 */ 3493 static int ena_xstats_get(struct rte_eth_dev *dev, 3494 struct rte_eth_xstat *xstats, 3495 unsigned int n) 3496 { 3497 struct ena_adapter *adapter = dev->data->dev_private; 3498 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3499 unsigned int stat, i, count = 0; 3500 int stat_offset; 3501 void *stats_begin; 3502 uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS]; 3503 struct ena_stats_srd srd_info = {0}; 3504 3505 if (n < xstats_count) 3506 return xstats_count; 3507 3508 if (!xstats) 3509 return 0; 3510 3511 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) { 3512 stat_offset = ena_stats_global_strings[stat].stat_offset; 3513 stats_begin = &adapter->dev_stats; 3514 3515 xstats[count].id = count; 3516 xstats[count].value = *((uint64_t *) 3517 ((char *)stats_begin + stat_offset)); 3518 } 3519 3520 ena_copy_customer_metrics(adapter, metrics_stats, adapter->metrics_num); 3521 stats_begin = metrics_stats; 3522 for (stat = 0; stat < adapter->metrics_num; stat++, count++) { 3523 stat_offset = ena_stats_metrics_strings[stat].stat_offset; 3524 3525 xstats[count].id = count; 3526 xstats[count].value = *((uint64_t *) 3527 ((char *)stats_begin + stat_offset)); 3528 } 3529 3530 ena_copy_ena_srd_info(adapter, &srd_info); 3531 stats_begin = &srd_info; 3532 for (stat = 0; stat < ENA_STATS_ARRAY_ENA_SRD; stat++, count++) { 3533 stat_offset = ena_stats_srd_strings[stat].stat_offset; 3534 xstats[count].id = count; 3535 xstats[count].value = *((uint64_t *) 3536 ((char *)stats_begin + stat_offset)); 3537 } 3538 3539 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) { 3540 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) { 3541 stat_offset = ena_stats_rx_strings[stat].stat_offset; 3542 stats_begin = &adapter->rx_ring[i].rx_stats; 3543 3544 xstats[count].id = count; 3545 xstats[count].value = *((uint64_t *) 3546 ((char *)stats_begin + stat_offset)); 3547 } 3548 } 3549 3550 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) { 3551 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) { 3552 stat_offset = ena_stats_tx_strings[stat].stat_offset; 3553 stats_begin = &adapter->tx_ring[i].rx_stats; 3554 3555 xstats[count].id = count; 3556 xstats[count].value = *((uint64_t *) 3557 ((char *)stats_begin + stat_offset)); 3558 } 3559 } 3560 3561 return count; 3562 } 3563 3564 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 3565 const uint64_t *ids, 3566 uint64_t *values, 3567 unsigned int n) 3568 { 3569 struct ena_adapter *adapter = dev->data->dev_private; 3570 uint64_t id; 3571 uint64_t rx_entries, tx_entries; 3572 unsigned int i; 3573 int qid; 3574 int valid = 0; 3575 bool were_metrics_copied = false; 3576 bool was_srd_info_copied = false; 3577 uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS]; 3578 struct ena_stats_srd srd_info = {0}; 3579 3580 for (i = 0; i < n; ++i) { 3581 id = ids[i]; 3582 /* Check if id belongs to global statistics */ 3583 if (id < ENA_STATS_ARRAY_GLOBAL) { 3584 values[i] = *((uint64_t *)&adapter->dev_stats + id); 3585 ++valid; 3586 continue; 3587 } 3588 3589 /* Check if id belongs to ENI statistics */ 3590 id -= ENA_STATS_ARRAY_GLOBAL; 3591 if (id < adapter->metrics_num) { 3592 /* Avoid reading metrics multiple times in a single 3593 * function call, as it requires communication with the 3594 * admin queue. 3595 */ 3596 if (!were_metrics_copied) { 3597 were_metrics_copied = true; 3598 ena_copy_customer_metrics(adapter, 3599 metrics_stats, 3600 adapter->metrics_num); 3601 } 3602 3603 values[i] = *((uint64_t *)&metrics_stats + id); 3604 ++valid; 3605 continue; 3606 } 3607 3608 /* Check if id belongs to SRD info statistics */ 3609 id -= adapter->metrics_num; 3610 3611 if (id < ENA_STATS_ARRAY_ENA_SRD) { 3612 /* 3613 * Avoid reading srd info multiple times in a single 3614 * function call, as it requires communication with the 3615 * admin queue. 3616 */ 3617 if (!was_srd_info_copied) { 3618 was_srd_info_copied = true; 3619 ena_copy_ena_srd_info(adapter, &srd_info); 3620 } 3621 values[i] = *((uint64_t *)&adapter->srd_stats + id); 3622 ++valid; 3623 continue; 3624 } 3625 3626 /* Check if id belongs to rx queue statistics */ 3627 id -= ENA_STATS_ARRAY_ENA_SRD; 3628 3629 rx_entries = ENA_STATS_ARRAY_RX * dev->data->nb_rx_queues; 3630 if (id < rx_entries) { 3631 qid = id % dev->data->nb_rx_queues; 3632 id /= dev->data->nb_rx_queues; 3633 values[i] = *((uint64_t *) 3634 &adapter->rx_ring[qid].rx_stats + id); 3635 ++valid; 3636 continue; 3637 } 3638 /* Check if id belongs to rx queue statistics */ 3639 id -= rx_entries; 3640 tx_entries = ENA_STATS_ARRAY_TX * dev->data->nb_tx_queues; 3641 if (id < tx_entries) { 3642 qid = id % dev->data->nb_tx_queues; 3643 id /= dev->data->nb_tx_queues; 3644 values[i] = *((uint64_t *) 3645 &adapter->tx_ring[qid].tx_stats + id); 3646 ++valid; 3647 continue; 3648 } 3649 } 3650 3651 return valid; 3652 } 3653 3654 static int ena_process_uint_devarg(const char *key, 3655 const char *value, 3656 void *opaque) 3657 { 3658 struct ena_adapter *adapter = opaque; 3659 char *str_end; 3660 uint64_t uint_value; 3661 3662 uint_value = strtoull(value, &str_end, DECIMAL_BASE); 3663 if (value == str_end) { 3664 PMD_INIT_LOG(ERR, 3665 "Invalid value for key '%s'. Only uint values are accepted.\n", 3666 key); 3667 return -EINVAL; 3668 } 3669 3670 if (strcmp(key, ENA_DEVARG_MISS_TXC_TO) == 0) { 3671 if (uint_value > ENA_MAX_TX_TIMEOUT_SECONDS) { 3672 PMD_INIT_LOG(ERR, 3673 "Tx timeout too high: %" PRIu64 " sec. Maximum allowed: %d sec.\n", 3674 uint_value, ENA_MAX_TX_TIMEOUT_SECONDS); 3675 return -EINVAL; 3676 } else if (uint_value == 0) { 3677 PMD_INIT_LOG(INFO, 3678 "Check for missing Tx completions has been disabled.\n"); 3679 adapter->missing_tx_completion_to = 3680 ENA_HW_HINTS_NO_TIMEOUT; 3681 } else { 3682 PMD_INIT_LOG(INFO, 3683 "Tx packet completion timeout set to %" PRIu64 " seconds.\n", 3684 uint_value); 3685 adapter->missing_tx_completion_to = 3686 uint_value * rte_get_timer_hz(); 3687 } 3688 } 3689 3690 return 0; 3691 } 3692 3693 static int ena_process_bool_devarg(const char *key, 3694 const char *value, 3695 void *opaque) 3696 { 3697 struct ena_adapter *adapter = opaque; 3698 bool bool_value; 3699 3700 /* Parse the value. */ 3701 if (strcmp(value, "1") == 0) { 3702 bool_value = true; 3703 } else if (strcmp(value, "0") == 0) { 3704 bool_value = false; 3705 } else { 3706 PMD_INIT_LOG(ERR, 3707 "Invalid value: '%s' for key '%s'. Accepted: '0' or '1'\n", 3708 value, key); 3709 return -EINVAL; 3710 } 3711 3712 /* Now, assign it to the proper adapter field. */ 3713 if (strcmp(key, ENA_DEVARG_LARGE_LLQ_HDR) == 0) 3714 adapter->use_large_llq_hdr = bool_value; 3715 else if (strcmp(key, ENA_DEVARG_NORMAL_LLQ_HDR) == 0) 3716 adapter->use_normal_llq_hdr = bool_value; 3717 else if (strcmp(key, ENA_DEVARG_ENABLE_LLQ) == 0) 3718 adapter->enable_llq = bool_value; 3719 3720 return 0; 3721 } 3722 3723 static int ena_parse_devargs(struct ena_adapter *adapter, 3724 struct rte_devargs *devargs) 3725 { 3726 static const char * const allowed_args[] = { 3727 ENA_DEVARG_LARGE_LLQ_HDR, 3728 ENA_DEVARG_NORMAL_LLQ_HDR, 3729 ENA_DEVARG_MISS_TXC_TO, 3730 ENA_DEVARG_ENABLE_LLQ, 3731 NULL, 3732 }; 3733 struct rte_kvargs *kvlist; 3734 int rc; 3735 3736 if (devargs == NULL) 3737 return 0; 3738 3739 kvlist = rte_kvargs_parse(devargs->args, allowed_args); 3740 if (kvlist == NULL) { 3741 PMD_INIT_LOG(ERR, "Invalid device arguments: %s\n", 3742 devargs->args); 3743 return -EINVAL; 3744 } 3745 3746 rc = rte_kvargs_process(kvlist, ENA_DEVARG_LARGE_LLQ_HDR, 3747 ena_process_bool_devarg, adapter); 3748 if (rc != 0) 3749 goto exit; 3750 rc = rte_kvargs_process(kvlist, ENA_DEVARG_NORMAL_LLQ_HDR, 3751 ena_process_bool_devarg, adapter); 3752 if (rc != 0) 3753 goto exit; 3754 rc = rte_kvargs_process(kvlist, ENA_DEVARG_MISS_TXC_TO, 3755 ena_process_uint_devarg, adapter); 3756 if (rc != 0) 3757 goto exit; 3758 rc = rte_kvargs_process(kvlist, ENA_DEVARG_ENABLE_LLQ, 3759 ena_process_bool_devarg, adapter); 3760 3761 exit: 3762 rte_kvargs_free(kvlist); 3763 3764 return rc; 3765 } 3766 3767 static int ena_setup_rx_intr(struct rte_eth_dev *dev) 3768 { 3769 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 3770 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 3771 int rc; 3772 uint16_t vectors_nb, i; 3773 bool rx_intr_requested = dev->data->dev_conf.intr_conf.rxq; 3774 3775 if (!rx_intr_requested) 3776 return 0; 3777 3778 if (!rte_intr_cap_multiple(intr_handle)) { 3779 PMD_DRV_LOG(ERR, 3780 "Rx interrupt requested, but it isn't supported by the PCI driver\n"); 3781 return -ENOTSUP; 3782 } 3783 3784 /* Disable interrupt mapping before the configuration starts. */ 3785 rte_intr_disable(intr_handle); 3786 3787 /* Verify if there are enough vectors available. */ 3788 vectors_nb = dev->data->nb_rx_queues; 3789 if (vectors_nb > RTE_MAX_RXTX_INTR_VEC_ID) { 3790 PMD_DRV_LOG(ERR, 3791 "Too many Rx interrupts requested, maximum number: %d\n", 3792 RTE_MAX_RXTX_INTR_VEC_ID); 3793 rc = -ENOTSUP; 3794 goto enable_intr; 3795 } 3796 3797 /* Allocate the vector list */ 3798 if (rte_intr_vec_list_alloc(intr_handle, "intr_vec", 3799 dev->data->nb_rx_queues)) { 3800 PMD_DRV_LOG(ERR, 3801 "Failed to allocate interrupt vector for %d queues\n", 3802 dev->data->nb_rx_queues); 3803 rc = -ENOMEM; 3804 goto enable_intr; 3805 } 3806 3807 rc = rte_intr_efd_enable(intr_handle, vectors_nb); 3808 if (rc != 0) 3809 goto free_intr_vec; 3810 3811 if (!rte_intr_allow_others(intr_handle)) { 3812 PMD_DRV_LOG(ERR, 3813 "Not enough interrupts available to use both ENA Admin and Rx interrupts\n"); 3814 goto disable_intr_efd; 3815 } 3816 3817 for (i = 0; i < vectors_nb; ++i) 3818 if (rte_intr_vec_list_index_set(intr_handle, i, 3819 RTE_INTR_VEC_RXTX_OFFSET + i)) 3820 goto disable_intr_efd; 3821 3822 rte_intr_enable(intr_handle); 3823 return 0; 3824 3825 disable_intr_efd: 3826 rte_intr_efd_disable(intr_handle); 3827 free_intr_vec: 3828 rte_intr_vec_list_free(intr_handle); 3829 enable_intr: 3830 rte_intr_enable(intr_handle); 3831 return rc; 3832 } 3833 3834 static void ena_rx_queue_intr_set(struct rte_eth_dev *dev, 3835 uint16_t queue_id, 3836 bool unmask) 3837 { 3838 struct ena_adapter *adapter = dev->data->dev_private; 3839 struct ena_ring *rxq = &adapter->rx_ring[queue_id]; 3840 struct ena_eth_io_intr_reg intr_reg; 3841 3842 ena_com_update_intr_reg(&intr_reg, 0, 0, unmask, 1); 3843 ena_com_unmask_intr(rxq->ena_com_io_cq, &intr_reg); 3844 } 3845 3846 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 3847 uint16_t queue_id) 3848 { 3849 ena_rx_queue_intr_set(dev, queue_id, true); 3850 3851 return 0; 3852 } 3853 3854 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 3855 uint16_t queue_id) 3856 { 3857 ena_rx_queue_intr_set(dev, queue_id, false); 3858 3859 return 0; 3860 } 3861 3862 static int ena_configure_aenq(struct ena_adapter *adapter) 3863 { 3864 uint32_t aenq_groups = adapter->all_aenq_groups; 3865 int rc; 3866 3867 /* All_aenq_groups holds all AENQ functions supported by the device and 3868 * the HW, so at first we need to be sure the LSC request is valid. 3869 */ 3870 if (adapter->edev_data->dev_conf.intr_conf.lsc != 0) { 3871 if (!(aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) { 3872 PMD_DRV_LOG(ERR, 3873 "LSC requested, but it's not supported by the AENQ\n"); 3874 return -EINVAL; 3875 } 3876 } else { 3877 /* If LSC wasn't enabled by the app, let's enable all supported 3878 * AENQ procedures except the LSC. 3879 */ 3880 aenq_groups &= ~BIT(ENA_ADMIN_LINK_CHANGE); 3881 } 3882 3883 rc = ena_com_set_aenq_config(&adapter->ena_dev, aenq_groups); 3884 if (rc != 0) { 3885 PMD_DRV_LOG(ERR, "Cannot configure AENQ groups, rc=%d\n", rc); 3886 return rc; 3887 } 3888 3889 adapter->active_aenq_groups = aenq_groups; 3890 3891 return 0; 3892 } 3893 3894 int ena_mp_indirect_table_set(struct ena_adapter *adapter) 3895 { 3896 return ENA_PROXY(adapter, ena_com_indirect_table_set, &adapter->ena_dev); 3897 } 3898 3899 int ena_mp_indirect_table_get(struct ena_adapter *adapter, 3900 uint32_t *indirect_table) 3901 { 3902 return ENA_PROXY(adapter, ena_com_indirect_table_get, &adapter->ena_dev, 3903 indirect_table); 3904 } 3905 3906 /********************************************************************* 3907 * ena_plat_dpdk.h functions implementations 3908 *********************************************************************/ 3909 3910 const struct rte_memzone * 3911 ena_mem_alloc_coherent(struct rte_eth_dev_data *data, size_t size, 3912 int socket_id, unsigned int alignment, void **virt_addr, 3913 dma_addr_t *phys_addr) 3914 { 3915 char z_name[RTE_MEMZONE_NAMESIZE]; 3916 struct ena_adapter *adapter = data->dev_private; 3917 const struct rte_memzone *memzone; 3918 int rc; 3919 3920 rc = snprintf(z_name, RTE_MEMZONE_NAMESIZE, "ena_p%d_mz%" PRIu64 "", 3921 data->port_id, adapter->memzone_cnt); 3922 if (rc >= RTE_MEMZONE_NAMESIZE) { 3923 PMD_DRV_LOG(ERR, 3924 "Name for the ena_com memzone is too long. Port: %d, mz_num: %" PRIu64 "\n", 3925 data->port_id, adapter->memzone_cnt); 3926 goto error; 3927 } 3928 adapter->memzone_cnt++; 3929 3930 memzone = rte_memzone_reserve_aligned(z_name, size, socket_id, 3931 RTE_MEMZONE_IOVA_CONTIG, alignment); 3932 if (memzone == NULL) { 3933 PMD_DRV_LOG(ERR, "Failed to allocate ena_com memzone: %s\n", 3934 z_name); 3935 goto error; 3936 } 3937 3938 memset(memzone->addr, 0, size); 3939 *virt_addr = memzone->addr; 3940 *phys_addr = memzone->iova; 3941 3942 return memzone; 3943 3944 error: 3945 *virt_addr = NULL; 3946 *phys_addr = 0; 3947 3948 return NULL; 3949 } 3950 3951 3952 /********************************************************************* 3953 * PMD configuration 3954 *********************************************************************/ 3955 static int eth_ena_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 3956 struct rte_pci_device *pci_dev) 3957 { 3958 return rte_eth_dev_pci_generic_probe(pci_dev, 3959 sizeof(struct ena_adapter), eth_ena_dev_init); 3960 } 3961 3962 static int eth_ena_pci_remove(struct rte_pci_device *pci_dev) 3963 { 3964 return rte_eth_dev_pci_generic_remove(pci_dev, eth_ena_dev_uninit); 3965 } 3966 3967 static struct rte_pci_driver rte_ena_pmd = { 3968 .id_table = pci_id_ena_map, 3969 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | 3970 RTE_PCI_DRV_WC_ACTIVATE, 3971 .probe = eth_ena_pci_probe, 3972 .remove = eth_ena_pci_remove, 3973 }; 3974 3975 RTE_PMD_REGISTER_PCI(net_ena, rte_ena_pmd); 3976 RTE_PMD_REGISTER_PCI_TABLE(net_ena, pci_id_ena_map); 3977 RTE_PMD_REGISTER_KMOD_DEP(net_ena, "* igb_uio | uio_pci_generic | vfio-pci"); 3978 RTE_PMD_REGISTER_PARAM_STRING(net_ena, 3979 ENA_DEVARG_LARGE_LLQ_HDR "=<0|1> " 3980 ENA_DEVARG_NORMAL_LLQ_HDR "=<0|1> " 3981 ENA_DEVARG_ENABLE_LLQ "=<0|1> " 3982 ENA_DEVARG_MISS_TXC_TO "=<uint>"); 3983 RTE_LOG_REGISTER_SUFFIX(ena_logtype_init, init, NOTICE); 3984 RTE_LOG_REGISTER_SUFFIX(ena_logtype_driver, driver, NOTICE); 3985 #ifdef RTE_ETHDEV_DEBUG_RX 3986 RTE_LOG_REGISTER_SUFFIX(ena_logtype_rx, rx, DEBUG); 3987 #endif 3988 #ifdef RTE_ETHDEV_DEBUG_TX 3989 RTE_LOG_REGISTER_SUFFIX(ena_logtype_tx, tx, DEBUG); 3990 #endif 3991 RTE_LOG_REGISTER_SUFFIX(ena_logtype_com, com, WARNING); 3992 3993 /****************************************************************************** 3994 ******************************** AENQ Handlers ******************************* 3995 *****************************************************************************/ 3996 static void ena_update_on_link_change(void *adapter_data, 3997 struct ena_admin_aenq_entry *aenq_e) 3998 { 3999 struct rte_eth_dev *eth_dev = adapter_data; 4000 struct ena_adapter *adapter = eth_dev->data->dev_private; 4001 struct ena_admin_aenq_link_change_desc *aenq_link_desc; 4002 uint32_t status; 4003 4004 aenq_link_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 4005 4006 status = get_ena_admin_aenq_link_change_desc_link_status(aenq_link_desc); 4007 adapter->link_status = status; 4008 4009 ena_link_update(eth_dev, 0); 4010 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 4011 } 4012 4013 static void ena_notification(void *adapter_data, 4014 struct ena_admin_aenq_entry *aenq_e) 4015 { 4016 struct rte_eth_dev *eth_dev = adapter_data; 4017 struct ena_adapter *adapter = eth_dev->data->dev_private; 4018 struct ena_admin_ena_hw_hints *hints; 4019 4020 if (aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION) 4021 PMD_DRV_LOG(WARNING, "Invalid AENQ group: %x. Expected: %x\n", 4022 aenq_e->aenq_common_desc.group, 4023 ENA_ADMIN_NOTIFICATION); 4024 4025 switch (aenq_e->aenq_common_desc.syndrome) { 4026 case ENA_ADMIN_UPDATE_HINTS: 4027 hints = (struct ena_admin_ena_hw_hints *) 4028 (&aenq_e->inline_data_w4); 4029 ena_update_hints(adapter, hints); 4030 break; 4031 default: 4032 PMD_DRV_LOG(ERR, "Invalid AENQ notification link state: %d\n", 4033 aenq_e->aenq_common_desc.syndrome); 4034 } 4035 } 4036 4037 static void ena_keep_alive(void *adapter_data, 4038 __rte_unused struct ena_admin_aenq_entry *aenq_e) 4039 { 4040 struct rte_eth_dev *eth_dev = adapter_data; 4041 struct ena_adapter *adapter = eth_dev->data->dev_private; 4042 struct ena_admin_aenq_keep_alive_desc *desc; 4043 uint64_t rx_drops; 4044 uint64_t tx_drops; 4045 uint64_t rx_overruns; 4046 4047 adapter->timestamp_wd = rte_get_timer_cycles(); 4048 4049 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 4050 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 4051 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 4052 rx_overruns = ((uint64_t)desc->rx_overruns_high << 32) | desc->rx_overruns_low; 4053 4054 /* 4055 * Depending on its acceleration support, the device updates a different statistic when 4056 * Rx packet is dropped because there are no available buffers to accommodate it. 4057 */ 4058 adapter->drv_stats->rx_drops = rx_drops + rx_overruns; 4059 adapter->dev_stats.tx_drops = tx_drops; 4060 } 4061 4062 static void ena_suboptimal_configuration(__rte_unused void *adapter_data, 4063 struct ena_admin_aenq_entry *aenq_e) 4064 { 4065 struct ena_admin_aenq_conf_notifications_desc *desc; 4066 int bit, num_bits; 4067 4068 desc = (struct ena_admin_aenq_conf_notifications_desc *)aenq_e; 4069 num_bits = BITS_PER_TYPE(desc->notifications_bitmap); 4070 for (bit = 0; bit < num_bits; bit++) { 4071 if (desc->notifications_bitmap & RTE_BIT64(bit)) { 4072 PMD_DRV_LOG(WARNING, 4073 "Sub-optimal configuration notification code: %d\n", bit + 1); 4074 } 4075 } 4076 } 4077 4078 /** 4079 * This handler will called for unknown event group or unimplemented handlers 4080 **/ 4081 static void unimplemented_aenq_handler(__rte_unused void *data, 4082 __rte_unused struct ena_admin_aenq_entry *aenq_e) 4083 { 4084 PMD_DRV_LOG(ERR, 4085 "Unknown event was received or event with unimplemented handler\n"); 4086 } 4087 4088 static struct ena_aenq_handlers aenq_handlers = { 4089 .handlers = { 4090 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 4091 [ENA_ADMIN_NOTIFICATION] = ena_notification, 4092 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive, 4093 [ENA_ADMIN_CONF_NOTIFICATIONS] = ena_suboptimal_configuration 4094 }, 4095 .unimplemented_handler = unimplemented_aenq_handler 4096 }; 4097 4098 /********************************************************************* 4099 * Multi-Process communication request handling (in primary) 4100 *********************************************************************/ 4101 static int 4102 ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer) 4103 { 4104 const struct ena_mp_body *req = 4105 (const struct ena_mp_body *)mp_msg->param; 4106 struct ena_adapter *adapter; 4107 struct ena_com_dev *ena_dev; 4108 struct ena_mp_body *rsp; 4109 struct rte_mp_msg mp_rsp; 4110 struct rte_eth_dev *dev; 4111 int res = 0; 4112 4113 rsp = (struct ena_mp_body *)&mp_rsp.param; 4114 mp_msg_init(&mp_rsp, req->type, req->port_id); 4115 4116 if (!rte_eth_dev_is_valid_port(req->port_id)) { 4117 rte_errno = ENODEV; 4118 res = -rte_errno; 4119 PMD_DRV_LOG(ERR, "Unknown port %d in request %d\n", 4120 req->port_id, req->type); 4121 goto end; 4122 } 4123 dev = &rte_eth_devices[req->port_id]; 4124 adapter = dev->data->dev_private; 4125 ena_dev = &adapter->ena_dev; 4126 4127 switch (req->type) { 4128 case ENA_MP_DEV_STATS_GET: 4129 res = ena_com_get_dev_basic_stats(ena_dev, 4130 &adapter->basic_stats); 4131 break; 4132 case ENA_MP_ENI_STATS_GET: 4133 res = ena_com_get_eni_stats(ena_dev, 4134 (struct ena_admin_eni_stats *)&adapter->metrics_stats); 4135 break; 4136 case ENA_MP_MTU_SET: 4137 res = ena_com_set_dev_mtu(ena_dev, req->args.mtu); 4138 break; 4139 case ENA_MP_IND_TBL_GET: 4140 res = ena_com_indirect_table_get(ena_dev, 4141 adapter->indirect_table); 4142 break; 4143 case ENA_MP_IND_TBL_SET: 4144 res = ena_com_indirect_table_set(ena_dev); 4145 break; 4146 case ENA_MP_CUSTOMER_METRICS_GET: 4147 res = ena_com_get_customer_metrics(ena_dev, 4148 (char *)adapter->metrics_stats, 4149 adapter->metrics_num * sizeof(uint64_t)); 4150 break; 4151 case ENA_MP_SRD_STATS_GET: 4152 res = ena_com_get_ena_srd_info(ena_dev, 4153 (struct ena_admin_ena_srd_info *)&adapter->srd_stats); 4154 break; 4155 default: 4156 PMD_DRV_LOG(ERR, "Unknown request type %d\n", req->type); 4157 res = -EINVAL; 4158 break; 4159 } 4160 4161 end: 4162 /* Save processing result in the reply */ 4163 rsp->result = res; 4164 /* Return just IPC processing status */ 4165 return rte_mp_reply(&mp_rsp, peer); 4166 } 4167 4168 static ena_llq_policy ena_define_llq_hdr_policy(struct ena_adapter *adapter) 4169 { 4170 if (!adapter->enable_llq) 4171 return ENA_LLQ_POLICY_DISABLED; 4172 if (adapter->use_large_llq_hdr) 4173 return ENA_LLQ_POLICY_LARGE; 4174 if (adapter->use_normal_llq_hdr) 4175 return ENA_LLQ_POLICY_NORMAL; 4176 return ENA_LLQ_POLICY_RECOMMENDED; 4177 } 4178 4179 static bool ena_use_large_llq_hdr(struct ena_adapter *adapter, uint8_t recommended_entry_size) 4180 { 4181 if (adapter->llq_header_policy == ENA_LLQ_POLICY_LARGE) { 4182 return true; 4183 } else if (adapter->llq_header_policy == ENA_LLQ_POLICY_RECOMMENDED) { 4184 PMD_DRV_LOG(INFO, "Recommended device entry size policy %u\n", 4185 recommended_entry_size); 4186 if (recommended_entry_size == ENA_ADMIN_LIST_ENTRY_SIZE_256B) 4187 return true; 4188 } 4189 return false; 4190 } 4191