1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 3 * All rights reserved. 4 */ 5 6 #include <rte_alarm.h> 7 #include <rte_string_fns.h> 8 #include <rte_errno.h> 9 #include <rte_version.h> 10 #include <rte_net.h> 11 #include <rte_kvargs.h> 12 13 #include "ena_ethdev.h" 14 #include "ena_logs.h" 15 #include "ena_platform.h" 16 #include "ena_com.h" 17 #include "ena_eth_com.h" 18 19 #include <ena_common_defs.h> 20 #include <ena_regs_defs.h> 21 #include <ena_admin_defs.h> 22 #include <ena_eth_io_defs.h> 23 24 #define DRV_MODULE_VER_MAJOR 2 25 #define DRV_MODULE_VER_MINOR 10 26 #define DRV_MODULE_VER_SUBMINOR 0 27 28 #define __MERGE_64B_H_L(h, l) (((uint64_t)h << 32) | l) 29 30 #define GET_L4_HDR_LEN(mbuf) \ 31 ((rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, \ 32 mbuf->l3_len + mbuf->l2_len)->data_off) >> 4) 33 34 #define ETH_GSTRING_LEN 32 35 36 #define ARRAY_SIZE(x) RTE_DIM(x) 37 38 #define ENA_MIN_RING_DESC 128 39 40 #define USEC_PER_MSEC 1000UL 41 42 #define BITS_PER_BYTE 8 43 44 #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) 45 46 #define DECIMAL_BASE 10 47 48 #define MAX_WIDE_LLQ_DEPTH_UNSUPPORTED 0 49 50 /* 51 * We should try to keep ENA_CLEANUP_BUF_THRESH lower than 52 * RTE_MEMPOOL_CACHE_MAX_SIZE, so we can fit this in mempool local cache. 53 */ 54 #define ENA_CLEANUP_BUF_THRESH 256 55 56 struct ena_stats { 57 char name[ETH_GSTRING_LEN]; 58 int stat_offset; 59 }; 60 61 #define ENA_STAT_ENTRY(stat, stat_type) { \ 62 .name = #stat, \ 63 .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ 64 } 65 66 #define ENA_STAT_RX_ENTRY(stat) \ 67 ENA_STAT_ENTRY(stat, rx) 68 69 #define ENA_STAT_TX_ENTRY(stat) \ 70 ENA_STAT_ENTRY(stat, tx) 71 72 #define ENA_STAT_METRICS_ENTRY(stat) \ 73 ENA_STAT_ENTRY(stat, metrics) 74 75 #define ENA_STAT_GLOBAL_ENTRY(stat) \ 76 ENA_STAT_ENTRY(stat, dev) 77 78 #define ENA_STAT_ENA_SRD_ENTRY(stat) \ 79 ENA_STAT_ENTRY(stat, srd) 80 81 /* Device arguments */ 82 #define ENA_DEVARG_LARGE_LLQ_HDR "large_llq_hdr" 83 #define ENA_DEVARG_NORMAL_LLQ_HDR "normal_llq_hdr" 84 /* Timeout in seconds after which a single uncompleted Tx packet should be 85 * considered as a missing. 86 */ 87 #define ENA_DEVARG_MISS_TXC_TO "miss_txc_to" 88 /* 89 * Controls whether LLQ should be used (if available). Enabled by default. 90 * NOTE: It's highly not recommended to disable the LLQ, as it may lead to a 91 * huge performance degradation on 6th generation AWS instances. 92 */ 93 #define ENA_DEVARG_ENABLE_LLQ "enable_llq" 94 /* 95 * Controls the period of time (in milliseconds) between two consecutive inspections of 96 * the control queues when the driver is in poll mode and not using interrupts. 97 * By default, this value is zero, indicating that the driver will not be in poll mode and will 98 * use interrupts. A non-zero value for this argument is mandatory when using uio_pci_generic 99 * driver. 100 */ 101 #define ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL "control_path_poll_interval" 102 103 /* 104 * Each rte_memzone should have unique name. 105 * To satisfy it, count number of allocation and add it to name. 106 */ 107 rte_atomic64_t ena_alloc_cnt; 108 109 static const struct ena_stats ena_stats_global_strings[] = { 110 ENA_STAT_GLOBAL_ENTRY(wd_expired), 111 ENA_STAT_GLOBAL_ENTRY(dev_start), 112 ENA_STAT_GLOBAL_ENTRY(dev_stop), 113 ENA_STAT_GLOBAL_ENTRY(tx_drops), 114 }; 115 116 /* 117 * The legacy metrics (also known as eni stats) consisted of 5 stats, while the reworked 118 * metrics (also known as customer metrics) support an additional stat. 119 */ 120 static struct ena_stats ena_stats_metrics_strings[] = { 121 ENA_STAT_METRICS_ENTRY(bw_in_allowance_exceeded), 122 ENA_STAT_METRICS_ENTRY(bw_out_allowance_exceeded), 123 ENA_STAT_METRICS_ENTRY(pps_allowance_exceeded), 124 ENA_STAT_METRICS_ENTRY(conntrack_allowance_exceeded), 125 ENA_STAT_METRICS_ENTRY(linklocal_allowance_exceeded), 126 ENA_STAT_METRICS_ENTRY(conntrack_allowance_available), 127 }; 128 129 static const struct ena_stats ena_stats_srd_strings[] = { 130 ENA_STAT_ENA_SRD_ENTRY(ena_srd_mode), 131 ENA_STAT_ENA_SRD_ENTRY(ena_srd_tx_pkts), 132 ENA_STAT_ENA_SRD_ENTRY(ena_srd_eligible_tx_pkts), 133 ENA_STAT_ENA_SRD_ENTRY(ena_srd_rx_pkts), 134 ENA_STAT_ENA_SRD_ENTRY(ena_srd_resource_utilization), 135 }; 136 137 static const struct ena_stats ena_stats_tx_strings[] = { 138 ENA_STAT_TX_ENTRY(cnt), 139 ENA_STAT_TX_ENTRY(bytes), 140 ENA_STAT_TX_ENTRY(prepare_ctx_err), 141 ENA_STAT_TX_ENTRY(tx_poll), 142 ENA_STAT_TX_ENTRY(doorbells), 143 ENA_STAT_TX_ENTRY(bad_req_id), 144 ENA_STAT_TX_ENTRY(available_desc), 145 ENA_STAT_TX_ENTRY(missed_tx), 146 }; 147 148 static const struct ena_stats ena_stats_rx_strings[] = { 149 ENA_STAT_RX_ENTRY(cnt), 150 ENA_STAT_RX_ENTRY(bytes), 151 ENA_STAT_RX_ENTRY(refill_partial), 152 ENA_STAT_RX_ENTRY(l3_csum_bad), 153 ENA_STAT_RX_ENTRY(l4_csum_bad), 154 ENA_STAT_RX_ENTRY(l4_csum_good), 155 ENA_STAT_RX_ENTRY(mbuf_alloc_fail), 156 ENA_STAT_RX_ENTRY(bad_desc_num), 157 ENA_STAT_RX_ENTRY(bad_req_id), 158 }; 159 160 #define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) 161 #define ENA_STATS_ARRAY_METRICS ARRAY_SIZE(ena_stats_metrics_strings) 162 #define ENA_STATS_ARRAY_METRICS_LEGACY (ENA_STATS_ARRAY_METRICS - 1) 163 #define ENA_STATS_ARRAY_ENA_SRD ARRAY_SIZE(ena_stats_srd_strings) 164 #define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) 165 #define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) 166 167 #define QUEUE_OFFLOADS (RTE_ETH_TX_OFFLOAD_TCP_CKSUM |\ 168 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |\ 169 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |\ 170 RTE_ETH_TX_OFFLOAD_TCP_TSO) 171 #define MBUF_OFFLOADS (RTE_MBUF_F_TX_L4_MASK |\ 172 RTE_MBUF_F_TX_IP_CKSUM |\ 173 RTE_MBUF_F_TX_TCP_SEG) 174 175 /** Vendor ID used by Amazon devices */ 176 #define PCI_VENDOR_ID_AMAZON 0x1D0F 177 /** Amazon devices */ 178 #define PCI_DEVICE_ID_ENA_VF 0xEC20 179 #define PCI_DEVICE_ID_ENA_VF_RSERV0 0xEC21 180 181 #define ENA_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_L4_MASK | \ 182 RTE_MBUF_F_TX_IPV6 | \ 183 RTE_MBUF_F_TX_IPV4 | \ 184 RTE_MBUF_F_TX_IP_CKSUM | \ 185 RTE_MBUF_F_TX_TCP_SEG) 186 187 #define ENA_TX_OFFLOAD_NOTSUP_MASK \ 188 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) 189 190 /** HW specific offloads capabilities. */ 191 /* IPv4 checksum offload. */ 192 #define ENA_L3_IPV4_CSUM 0x0001 193 /* TCP/UDP checksum offload for IPv4 packets. */ 194 #define ENA_L4_IPV4_CSUM 0x0002 195 /* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */ 196 #define ENA_L4_IPV4_CSUM_PARTIAL 0x0004 197 /* TCP/UDP checksum offload for IPv6 packets. */ 198 #define ENA_L4_IPV6_CSUM 0x0008 199 /* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */ 200 #define ENA_L4_IPV6_CSUM_PARTIAL 0x0010 201 /* TSO support for IPv4 packets. */ 202 #define ENA_IPV4_TSO 0x0020 203 204 /* Device supports setting RSS hash. */ 205 #define ENA_RX_RSS_HASH 0x0040 206 207 static const struct rte_pci_id pci_id_ena_map[] = { 208 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, 209 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) }, 210 { .device_id = 0 }, 211 }; 212 213 static struct ena_aenq_handlers aenq_handlers; 214 215 static int ena_device_init(struct ena_adapter *adapter, 216 struct rte_pci_device *pdev, 217 struct ena_com_dev_get_features_ctx *get_feat_ctx); 218 static int ena_dev_configure(struct rte_eth_dev *dev); 219 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 220 struct ena_tx_buffer *tx_info, 221 struct rte_mbuf *mbuf, 222 void **push_header, 223 uint16_t *header_len); 224 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf); 225 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt); 226 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 227 uint16_t nb_pkts); 228 static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 229 uint16_t nb_pkts); 230 static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 231 uint16_t nb_desc, unsigned int socket_id, 232 const struct rte_eth_txconf *tx_conf); 233 static int ena_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 234 uint16_t nb_desc, unsigned int socket_id, 235 const struct rte_eth_rxconf *rx_conf, 236 struct rte_mempool *mp); 237 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len); 238 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 239 struct ena_com_rx_buf_info *ena_bufs, 240 uint32_t descs, 241 uint16_t *next_to_clean, 242 uint8_t offset); 243 static uint16_t eth_ena_recv_pkts(void *rx_queue, 244 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 245 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 246 struct rte_mbuf *mbuf, uint16_t id); 247 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count); 248 static void ena_init_rings(struct ena_adapter *adapter, 249 bool disable_meta_caching); 250 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); 251 static int ena_start(struct rte_eth_dev *dev); 252 static int ena_stop(struct rte_eth_dev *dev); 253 static int ena_close(struct rte_eth_dev *dev); 254 static int ena_dev_reset(struct rte_eth_dev *dev); 255 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); 256 static void ena_rx_queue_release_all(struct rte_eth_dev *dev); 257 static void ena_tx_queue_release_all(struct rte_eth_dev *dev); 258 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 259 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 260 static void ena_rx_queue_release_bufs(struct ena_ring *ring); 261 static void ena_tx_queue_release_bufs(struct ena_ring *ring); 262 static int ena_link_update(struct rte_eth_dev *dev, 263 int wait_to_complete); 264 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring); 265 static void ena_queue_stop(struct ena_ring *ring); 266 static void ena_queue_stop_all(struct rte_eth_dev *dev, 267 enum ena_ring_type ring_type); 268 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring); 269 static int ena_queue_start_all(struct rte_eth_dev *dev, 270 enum ena_ring_type ring_type); 271 static void ena_stats_restart(struct rte_eth_dev *dev); 272 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter); 273 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter); 274 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter); 275 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter); 276 static int ena_infos_get(struct rte_eth_dev *dev, 277 struct rte_eth_dev_info *dev_info); 278 static void ena_control_path_handler(void *cb_arg); 279 static void ena_control_path_poll_handler(void *cb_arg); 280 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg); 281 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev); 282 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev); 283 static int ena_xstats_get_names(struct rte_eth_dev *dev, 284 struct rte_eth_xstat_name *xstats_names, 285 unsigned int n); 286 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 287 const uint64_t *ids, 288 struct rte_eth_xstat_name *xstats_names, 289 unsigned int size); 290 static int ena_xstats_get(struct rte_eth_dev *dev, 291 struct rte_eth_xstat *stats, 292 unsigned int n); 293 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 294 const uint64_t *ids, 295 uint64_t *values, 296 unsigned int n); 297 static int ena_process_bool_devarg(const char *key, 298 const char *value, 299 void *opaque); 300 static int ena_parse_devargs(struct ena_adapter *adapter, 301 struct rte_devargs *devargs); 302 static void ena_copy_customer_metrics(struct ena_adapter *adapter, 303 uint64_t *buf, 304 size_t buf_size); 305 static void ena_copy_ena_srd_info(struct ena_adapter *adapter, 306 struct ena_stats_srd *srd_info); 307 static int ena_setup_rx_intr(struct rte_eth_dev *dev); 308 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 309 uint16_t queue_id); 310 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 311 uint16_t queue_id); 312 static int ena_configure_aenq(struct ena_adapter *adapter); 313 static int ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, 314 const void *peer); 315 static ena_llq_policy ena_define_llq_hdr_policy(struct ena_adapter *adapter); 316 static bool ena_use_large_llq_hdr(struct ena_adapter *adapter, uint8_t recommended_entry_size); 317 318 static const struct eth_dev_ops ena_dev_ops = { 319 .dev_configure = ena_dev_configure, 320 .dev_infos_get = ena_infos_get, 321 .rx_queue_setup = ena_rx_queue_setup, 322 .tx_queue_setup = ena_tx_queue_setup, 323 .dev_start = ena_start, 324 .dev_stop = ena_stop, 325 .link_update = ena_link_update, 326 .stats_get = ena_stats_get, 327 .xstats_get_names = ena_xstats_get_names, 328 .xstats_get_names_by_id = ena_xstats_get_names_by_id, 329 .xstats_get = ena_xstats_get, 330 .xstats_get_by_id = ena_xstats_get_by_id, 331 .mtu_set = ena_mtu_set, 332 .rx_queue_release = ena_rx_queue_release, 333 .tx_queue_release = ena_tx_queue_release, 334 .dev_close = ena_close, 335 .dev_reset = ena_dev_reset, 336 .reta_update = ena_rss_reta_update, 337 .reta_query = ena_rss_reta_query, 338 .rx_queue_intr_enable = ena_rx_queue_intr_enable, 339 .rx_queue_intr_disable = ena_rx_queue_intr_disable, 340 .rss_hash_update = ena_rss_hash_update, 341 .rss_hash_conf_get = ena_rss_hash_conf_get, 342 .tx_done_cleanup = ena_tx_cleanup, 343 }; 344 345 /********************************************************************* 346 * Multi-Process communication bits 347 *********************************************************************/ 348 /* rte_mp IPC message name */ 349 #define ENA_MP_NAME "net_ena_mp" 350 /* Request timeout in seconds */ 351 #define ENA_MP_REQ_TMO 5 352 353 /** Proxy request type */ 354 enum ena_mp_req { 355 ENA_MP_DEV_STATS_GET, 356 ENA_MP_ENI_STATS_GET, 357 ENA_MP_MTU_SET, 358 ENA_MP_IND_TBL_GET, 359 ENA_MP_IND_TBL_SET, 360 ENA_MP_CUSTOMER_METRICS_GET, 361 ENA_MP_SRD_STATS_GET, 362 }; 363 364 /** Proxy message body. Shared between requests and responses. */ 365 struct ena_mp_body { 366 /* Message type */ 367 enum ena_mp_req type; 368 int port_id; 369 /* Processing result. Set in replies. 0 if message succeeded, negative 370 * error code otherwise. 371 */ 372 int result; 373 union { 374 int mtu; /* For ENA_MP_MTU_SET */ 375 } args; 376 }; 377 378 /** 379 * Initialize IPC message. 380 * 381 * @param[out] msg 382 * Pointer to the message to initialize. 383 * @param[in] type 384 * Message type. 385 * @param[in] port_id 386 * Port ID of target device. 387 * 388 */ 389 static void 390 mp_msg_init(struct rte_mp_msg *msg, enum ena_mp_req type, int port_id) 391 { 392 struct ena_mp_body *body = (struct ena_mp_body *)&msg->param; 393 394 memset(msg, 0, sizeof(*msg)); 395 strlcpy(msg->name, ENA_MP_NAME, sizeof(msg->name)); 396 msg->len_param = sizeof(*body); 397 body->type = type; 398 body->port_id = port_id; 399 } 400 401 /********************************************************************* 402 * Multi-Process communication PMD API 403 *********************************************************************/ 404 /** 405 * Define proxy request descriptor 406 * 407 * Used to define all structures and functions required for proxying a given 408 * function to the primary process including the code to perform to prepare the 409 * request and process the response. 410 * 411 * @param[in] f 412 * Name of the function to proxy 413 * @param[in] t 414 * Message type to use 415 * @param[in] prep 416 * Body of a function to prepare the request in form of a statement 417 * expression. It is passed all the original function arguments along with two 418 * extra ones: 419 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 420 * - struct ena_mp_body *req - body of a request to prepare. 421 * @param[in] proc 422 * Body of a function to process the response in form of a statement 423 * expression. It is passed all the original function arguments along with two 424 * extra ones: 425 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 426 * - struct ena_mp_body *rsp - body of a response to process. 427 * @param ... 428 * Proxied function's arguments 429 * 430 * @note Inside prep and proc any parameters which aren't used should be marked 431 * as such (with ENA_TOUCH or __rte_unused). 432 */ 433 #define ENA_PROXY_DESC(f, t, prep, proc, ...) \ 434 static const enum ena_mp_req mp_type_ ## f = t; \ 435 static const char *mp_name_ ## f = #t; \ 436 static void mp_prep_ ## f(struct ena_adapter *adapter, \ 437 struct ena_mp_body *req, \ 438 __VA_ARGS__) \ 439 { \ 440 prep; \ 441 } \ 442 static void mp_proc_ ## f(struct ena_adapter *adapter, \ 443 struct ena_mp_body *rsp, \ 444 __VA_ARGS__) \ 445 { \ 446 proc; \ 447 } 448 449 /** 450 * Proxy wrapper for calling primary functions in a secondary process. 451 * 452 * Depending on whether called in primary or secondary process, calls the 453 * @p func directly or proxies the call to the primary process via rte_mp IPC. 454 * This macro requires a proxy request descriptor to be defined for @p func 455 * using ENA_PROXY_DESC() macro. 456 * 457 * @param[in/out] a 458 * Device PMD data. Used for sending the message and sharing message results 459 * between primary and secondary. 460 * @param[in] f 461 * Function to proxy. 462 * @param ... 463 * Arguments of @p func. 464 * 465 * @return 466 * - 0: Processing succeeded and response handler was called. 467 * - -EPERM: IPC is unavailable on this platform. This means only primary 468 * process may call the proxied function. 469 * - -EIO: IPC returned error on request send. Inspect rte_errno detailed 470 * error code. 471 * - Negative error code from the proxied function. 472 * 473 * @note This mechanism is geared towards control-path tasks. Avoid calling it 474 * in fast-path unless unbound delays are allowed. This is due to the IPC 475 * mechanism itself (socket based). 476 * @note Due to IPC parameter size limitations the proxy logic shares call 477 * results through the struct ena_adapter shared memory. This makes the 478 * proxy mechanism strictly single-threaded. Therefore be sure to make all 479 * calls to the same proxied function under the same lock. 480 */ 481 #define ENA_PROXY(a, f, ...) \ 482 __extension__ ({ \ 483 struct ena_adapter *_a = (a); \ 484 struct timespec ts = { .tv_sec = ENA_MP_REQ_TMO }; \ 485 struct ena_mp_body *req, *rsp; \ 486 struct rte_mp_reply mp_rep; \ 487 struct rte_mp_msg mp_req; \ 488 int ret; \ 489 \ 490 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { \ 491 ret = f(__VA_ARGS__); \ 492 } else { \ 493 /* Prepare and send request */ \ 494 req = (struct ena_mp_body *)&mp_req.param; \ 495 mp_msg_init(&mp_req, mp_type_ ## f, _a->edev_data->port_id); \ 496 mp_prep_ ## f(_a, req, ## __VA_ARGS__); \ 497 \ 498 ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); \ 499 if (likely(!ret)) { \ 500 RTE_ASSERT(mp_rep.nb_received == 1); \ 501 rsp = (struct ena_mp_body *)&mp_rep.msgs[0].param; \ 502 ret = rsp->result; \ 503 if (ret == 0) { \ 504 mp_proc_##f(_a, rsp, ## __VA_ARGS__); \ 505 } else { \ 506 PMD_DRV_LOG_LINE(ERR, \ 507 "%s returned error: %d", \ 508 mp_name_ ## f, rsp->result);\ 509 } \ 510 free(mp_rep.msgs); \ 511 } else if (rte_errno == ENOTSUP) { \ 512 PMD_DRV_LOG_LINE(ERR, \ 513 "No IPC, can't proxy to primary");\ 514 ret = -rte_errno; \ 515 } else { \ 516 PMD_DRV_LOG_LINE(ERR, "Request %s failed: %s", \ 517 mp_name_ ## f, \ 518 rte_strerror(rte_errno)); \ 519 ret = -EIO; \ 520 } \ 521 } \ 522 ret; \ 523 }) 524 525 /********************************************************************* 526 * Multi-Process communication request descriptors 527 *********************************************************************/ 528 529 ENA_PROXY_DESC(ena_com_get_dev_basic_stats, ENA_MP_DEV_STATS_GET, 530 __extension__ ({ 531 ENA_TOUCH(adapter); 532 ENA_TOUCH(req); 533 ENA_TOUCH(ena_dev); 534 ENA_TOUCH(stats); 535 }), 536 __extension__ ({ 537 ENA_TOUCH(rsp); 538 ENA_TOUCH(ena_dev); 539 if (stats != &adapter->basic_stats) 540 rte_memcpy(stats, &adapter->basic_stats, sizeof(*stats)); 541 }), 542 struct ena_com_dev *ena_dev, struct ena_admin_basic_stats *stats); 543 544 ENA_PROXY_DESC(ena_com_get_eni_stats, ENA_MP_ENI_STATS_GET, 545 __extension__ ({ 546 ENA_TOUCH(adapter); 547 ENA_TOUCH(req); 548 ENA_TOUCH(ena_dev); 549 ENA_TOUCH(stats); 550 }), 551 __extension__ ({ 552 ENA_TOUCH(rsp); 553 ENA_TOUCH(ena_dev); 554 if (stats != (struct ena_admin_eni_stats *)adapter->metrics_stats) 555 rte_memcpy(stats, adapter->metrics_stats, sizeof(*stats)); 556 }), 557 struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats); 558 559 ENA_PROXY_DESC(ena_com_set_dev_mtu, ENA_MP_MTU_SET, 560 __extension__ ({ 561 ENA_TOUCH(adapter); 562 ENA_TOUCH(ena_dev); 563 req->args.mtu = mtu; 564 }), 565 __extension__ ({ 566 ENA_TOUCH(adapter); 567 ENA_TOUCH(rsp); 568 ENA_TOUCH(ena_dev); 569 ENA_TOUCH(mtu); 570 }), 571 struct ena_com_dev *ena_dev, int mtu); 572 573 ENA_PROXY_DESC(ena_com_indirect_table_set, ENA_MP_IND_TBL_SET, 574 __extension__ ({ 575 ENA_TOUCH(adapter); 576 ENA_TOUCH(req); 577 ENA_TOUCH(ena_dev); 578 }), 579 __extension__ ({ 580 ENA_TOUCH(adapter); 581 ENA_TOUCH(rsp); 582 ENA_TOUCH(ena_dev); 583 }), 584 struct ena_com_dev *ena_dev); 585 586 ENA_PROXY_DESC(ena_com_indirect_table_get, ENA_MP_IND_TBL_GET, 587 __extension__ ({ 588 ENA_TOUCH(adapter); 589 ENA_TOUCH(req); 590 ENA_TOUCH(ena_dev); 591 ENA_TOUCH(ind_tbl); 592 }), 593 __extension__ ({ 594 ENA_TOUCH(rsp); 595 ENA_TOUCH(ena_dev); 596 if (ind_tbl != adapter->indirect_table) 597 rte_memcpy(ind_tbl, adapter->indirect_table, 598 sizeof(adapter->indirect_table)); 599 }), 600 struct ena_com_dev *ena_dev, u32 *ind_tbl); 601 602 ENA_PROXY_DESC(ena_com_get_customer_metrics, ENA_MP_CUSTOMER_METRICS_GET, 603 __extension__ ({ 604 ENA_TOUCH(adapter); 605 ENA_TOUCH(req); 606 ENA_TOUCH(ena_dev); 607 ENA_TOUCH(buf); 608 ENA_TOUCH(buf_size); 609 }), 610 __extension__ ({ 611 ENA_TOUCH(rsp); 612 ENA_TOUCH(ena_dev); 613 if (buf != (char *)adapter->metrics_stats) 614 rte_memcpy(buf, adapter->metrics_stats, buf_size); 615 }), 616 struct ena_com_dev *ena_dev, char *buf, size_t buf_size); 617 618 ENA_PROXY_DESC(ena_com_get_ena_srd_info, ENA_MP_SRD_STATS_GET, 619 __extension__ ({ 620 ENA_TOUCH(adapter); 621 ENA_TOUCH(req); 622 ENA_TOUCH(ena_dev); 623 ENA_TOUCH(info); 624 }), 625 __extension__ ({ 626 ENA_TOUCH(rsp); 627 ENA_TOUCH(ena_dev); 628 if ((struct ena_stats_srd *)info != &adapter->srd_stats) 629 rte_memcpy((struct ena_stats_srd *)info, 630 &adapter->srd_stats, 631 sizeof(struct ena_stats_srd)); 632 }), 633 struct ena_com_dev *ena_dev, struct ena_admin_ena_srd_info *info); 634 635 static inline void ena_trigger_reset(struct ena_adapter *adapter, 636 enum ena_regs_reset_reason_types reason) 637 { 638 if (likely(!adapter->trigger_reset)) { 639 adapter->reset_reason = reason; 640 adapter->trigger_reset = true; 641 } 642 } 643 644 static inline void ena_rx_mbuf_prepare(struct ena_ring *rx_ring, 645 struct rte_mbuf *mbuf, 646 struct ena_com_rx_ctx *ena_rx_ctx) 647 { 648 struct ena_stats_rx *rx_stats = &rx_ring->rx_stats; 649 uint64_t ol_flags = 0; 650 uint32_t packet_type = 0; 651 652 switch (ena_rx_ctx->l3_proto) { 653 case ENA_ETH_IO_L3_PROTO_IPV4: 654 packet_type |= RTE_PTYPE_L3_IPV4; 655 if (unlikely(ena_rx_ctx->l3_csum_err)) { 656 ++rx_stats->l3_csum_bad; 657 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 658 } else { 659 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 660 } 661 break; 662 case ENA_ETH_IO_L3_PROTO_IPV6: 663 packet_type |= RTE_PTYPE_L3_IPV6; 664 break; 665 default: 666 break; 667 } 668 669 switch (ena_rx_ctx->l4_proto) { 670 case ENA_ETH_IO_L4_PROTO_TCP: 671 packet_type |= RTE_PTYPE_L4_TCP; 672 break; 673 case ENA_ETH_IO_L4_PROTO_UDP: 674 packet_type |= RTE_PTYPE_L4_UDP; 675 break; 676 default: 677 break; 678 } 679 680 /* L4 csum is relevant only for TCP/UDP packets */ 681 if ((packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP)) && !ena_rx_ctx->frag) { 682 if (ena_rx_ctx->l4_csum_checked) { 683 if (likely(!ena_rx_ctx->l4_csum_err)) { 684 ++rx_stats->l4_csum_good; 685 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 686 } else { 687 ++rx_stats->l4_csum_bad; 688 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 689 } 690 } else { 691 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 692 } 693 694 if (rx_ring->offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH) { 695 ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 696 mbuf->hash.rss = ena_rx_ctx->hash; 697 } 698 } else { 699 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 700 } 701 702 mbuf->ol_flags = ol_flags; 703 mbuf->packet_type = packet_type; 704 } 705 706 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, 707 struct ena_com_tx_ctx *ena_tx_ctx, 708 uint64_t queue_offloads, 709 bool disable_meta_caching) 710 { 711 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; 712 713 if ((mbuf->ol_flags & MBUF_OFFLOADS) && 714 (queue_offloads & QUEUE_OFFLOADS)) { 715 /* check if TSO is required */ 716 if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) && 717 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)) { 718 ena_tx_ctx->tso_enable = true; 719 720 ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf); 721 } 722 723 /* check if L3 checksum is needed */ 724 if ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) && 725 (queue_offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) 726 ena_tx_ctx->l3_csum_enable = true; 727 728 if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV6) { 729 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 730 /* For the IPv6 packets, DF always needs to be true. */ 731 ena_tx_ctx->df = 1; 732 } else { 733 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 734 735 /* set don't fragment (DF) flag */ 736 if (mbuf->packet_type & 737 (RTE_PTYPE_L4_NONFRAG 738 | RTE_PTYPE_INNER_L4_NONFRAG)) 739 ena_tx_ctx->df = 1; 740 } 741 742 /* check if L4 checksum is needed */ 743 if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) && 744 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) { 745 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 746 ena_tx_ctx->l4_csum_enable = true; 747 } else if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 748 RTE_MBUF_F_TX_UDP_CKSUM) && 749 (queue_offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) { 750 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 751 ena_tx_ctx->l4_csum_enable = true; 752 } else { 753 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 754 ena_tx_ctx->l4_csum_enable = false; 755 } 756 757 ena_meta->mss = mbuf->tso_segsz; 758 ena_meta->l3_hdr_len = mbuf->l3_len; 759 ena_meta->l3_hdr_offset = mbuf->l2_len; 760 761 ena_tx_ctx->meta_valid = true; 762 } else if (disable_meta_caching) { 763 memset(ena_meta, 0, sizeof(*ena_meta)); 764 ena_tx_ctx->meta_valid = true; 765 } else { 766 ena_tx_ctx->meta_valid = false; 767 } 768 } 769 770 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 771 { 772 struct ena_tx_buffer *tx_info = NULL; 773 774 if (likely(req_id < tx_ring->ring_size)) { 775 tx_info = &tx_ring->tx_buffer_info[req_id]; 776 if (likely(tx_info->mbuf)) 777 return 0; 778 } 779 780 if (tx_info) 781 PMD_TX_LOG_LINE(ERR, "tx_info doesn't have valid mbuf. queue %d:%d req_id %u", 782 tx_ring->port_id, tx_ring->id, req_id); 783 else 784 PMD_TX_LOG_LINE(ERR, "Invalid req_id: %hu in queue %d:%d", 785 req_id, tx_ring->port_id, tx_ring->id); 786 787 /* Trigger device reset */ 788 ++tx_ring->tx_stats.bad_req_id; 789 ena_trigger_reset(tx_ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID); 790 return -EFAULT; 791 } 792 793 static void ena_config_host_info(struct ena_com_dev *ena_dev) 794 { 795 struct ena_admin_host_info *host_info; 796 int rc; 797 798 /* Allocate only the host info */ 799 rc = ena_com_allocate_host_info(ena_dev); 800 if (rc) { 801 PMD_DRV_LOG_LINE(ERR, "Cannot allocate host info"); 802 return; 803 } 804 805 host_info = ena_dev->host_attr.host_info; 806 807 host_info->os_type = ENA_ADMIN_OS_DPDK; 808 host_info->kernel_ver = RTE_VERSION; 809 strlcpy((char *)host_info->kernel_ver_str, rte_version(), 810 sizeof(host_info->kernel_ver_str)); 811 host_info->os_dist = RTE_VERSION; 812 strlcpy((char *)host_info->os_dist_str, rte_version(), 813 sizeof(host_info->os_dist_str)); 814 host_info->driver_version = 815 (DRV_MODULE_VER_MAJOR) | 816 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 817 (DRV_MODULE_VER_SUBMINOR << 818 ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 819 host_info->num_cpus = rte_lcore_count(); 820 821 host_info->driver_supported_features = 822 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 823 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 824 825 rc = ena_com_set_host_attributes(ena_dev); 826 if (rc) { 827 if (rc == ENA_COM_UNSUPPORTED) 828 PMD_DRV_LOG_LINE(WARNING, "Cannot set host attributes"); 829 else 830 PMD_DRV_LOG_LINE(ERR, "Cannot set host attributes"); 831 832 goto err; 833 } 834 835 return; 836 837 err: 838 ena_com_delete_host_info(ena_dev); 839 } 840 841 /* This function calculates the number of xstats based on the current config */ 842 static unsigned int ena_xstats_calc_num(struct rte_eth_dev_data *data) 843 { 844 struct ena_adapter *adapter = data->dev_private; 845 846 return ENA_STATS_ARRAY_GLOBAL + 847 adapter->metrics_num + 848 ENA_STATS_ARRAY_ENA_SRD + 849 (data->nb_tx_queues * ENA_STATS_ARRAY_TX) + 850 (data->nb_rx_queues * ENA_STATS_ARRAY_RX); 851 } 852 853 static void ena_config_debug_area(struct ena_adapter *adapter) 854 { 855 u32 debug_area_size; 856 int rc, ss_count; 857 858 ss_count = ena_xstats_calc_num(adapter->edev_data); 859 860 /* allocate 32 bytes for each string and 64bit for the value */ 861 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; 862 863 rc = ena_com_allocate_debug_area(&adapter->ena_dev, debug_area_size); 864 if (rc) { 865 PMD_DRV_LOG_LINE(ERR, "Cannot allocate debug area"); 866 return; 867 } 868 869 rc = ena_com_set_host_attributes(&adapter->ena_dev); 870 if (rc) { 871 if (rc == ENA_COM_UNSUPPORTED) 872 PMD_DRV_LOG_LINE(WARNING, "Cannot set host attributes"); 873 else 874 PMD_DRV_LOG_LINE(ERR, "Cannot set host attributes"); 875 876 goto err; 877 } 878 879 return; 880 err: 881 ena_com_delete_debug_area(&adapter->ena_dev); 882 } 883 884 static int ena_close(struct rte_eth_dev *dev) 885 { 886 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 887 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 888 struct ena_adapter *adapter = dev->data->dev_private; 889 struct ena_com_dev *ena_dev = &adapter->ena_dev; 890 int ret = 0; 891 int rc; 892 893 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 894 return 0; 895 896 if (adapter->state == ENA_ADAPTER_STATE_CLOSED) 897 return 0; 898 899 if (adapter->state == ENA_ADAPTER_STATE_RUNNING) 900 ret = ena_stop(dev); 901 adapter->state = ENA_ADAPTER_STATE_CLOSED; 902 903 if (!adapter->control_path_poll_interval) { 904 rte_intr_disable(intr_handle); 905 rc = rte_intr_callback_unregister_sync(intr_handle, ena_control_path_handler, dev); 906 if (unlikely(rc != 0)) 907 PMD_INIT_LOG_LINE(ERR, "Failed to unregister interrupt handler"); 908 } else { 909 rte_eal_alarm_cancel(ena_control_path_poll_handler, dev); 910 } 911 912 ena_rx_queue_release_all(dev); 913 ena_tx_queue_release_all(dev); 914 915 rte_free(adapter->drv_stats); 916 adapter->drv_stats = NULL; 917 918 ena_com_set_admin_running_state(ena_dev, false); 919 920 ena_com_rss_destroy(ena_dev); 921 922 ena_com_delete_debug_area(ena_dev); 923 ena_com_delete_host_info(ena_dev); 924 925 ena_com_abort_admin_commands(ena_dev); 926 ena_com_wait_for_abort_completion(ena_dev); 927 ena_com_admin_destroy(ena_dev); 928 ena_com_mmio_reg_read_request_destroy(ena_dev); 929 ena_com_delete_customer_metrics_buffer(ena_dev); 930 931 /* 932 * MAC is not allocated dynamically. Setting NULL should prevent from 933 * release of the resource in the rte_eth_dev_release_port(). 934 */ 935 dev->data->mac_addrs = NULL; 936 937 return ret; 938 } 939 940 static int 941 ena_dev_reset(struct rte_eth_dev *dev) 942 { 943 int rc = 0; 944 945 /* Cannot release memory in secondary process */ 946 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 947 PMD_DRV_LOG_LINE(WARNING, "dev_reset not supported in secondary."); 948 return -EPERM; 949 } 950 951 rc = eth_ena_dev_uninit(dev); 952 if (rc) { 953 PMD_INIT_LOG_LINE(CRIT, "Failed to un-initialize device"); 954 return rc; 955 } 956 957 rc = eth_ena_dev_init(dev); 958 if (rc) 959 PMD_INIT_LOG_LINE(CRIT, "Cannot initialize device"); 960 961 return rc; 962 } 963 964 static void ena_rx_queue_release_all(struct rte_eth_dev *dev) 965 { 966 int nb_queues = dev->data->nb_rx_queues; 967 int i; 968 969 for (i = 0; i < nb_queues; i++) 970 ena_rx_queue_release(dev, i); 971 } 972 973 static void ena_tx_queue_release_all(struct rte_eth_dev *dev) 974 { 975 int nb_queues = dev->data->nb_tx_queues; 976 int i; 977 978 for (i = 0; i < nb_queues; i++) 979 ena_tx_queue_release(dev, i); 980 } 981 982 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 983 { 984 struct ena_ring *ring = dev->data->rx_queues[qid]; 985 986 /* Free ring resources */ 987 rte_free(ring->rx_buffer_info); 988 ring->rx_buffer_info = NULL; 989 990 rte_free(ring->rx_refill_buffer); 991 ring->rx_refill_buffer = NULL; 992 993 rte_free(ring->empty_rx_reqs); 994 ring->empty_rx_reqs = NULL; 995 996 ring->configured = 0; 997 998 PMD_DRV_LOG_LINE(NOTICE, "Rx queue %d:%d released", 999 ring->port_id, ring->id); 1000 } 1001 1002 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 1003 { 1004 struct ena_ring *ring = dev->data->tx_queues[qid]; 1005 1006 /* Free ring resources */ 1007 rte_free(ring->push_buf_intermediate_buf); 1008 1009 rte_free(ring->tx_buffer_info); 1010 1011 rte_free(ring->empty_tx_reqs); 1012 1013 ring->empty_tx_reqs = NULL; 1014 ring->tx_buffer_info = NULL; 1015 ring->push_buf_intermediate_buf = NULL; 1016 1017 ring->configured = 0; 1018 1019 PMD_DRV_LOG_LINE(NOTICE, "Tx queue %d:%d released", 1020 ring->port_id, ring->id); 1021 } 1022 1023 static void ena_rx_queue_release_bufs(struct ena_ring *ring) 1024 { 1025 unsigned int i; 1026 1027 for (i = 0; i < ring->ring_size; ++i) { 1028 struct ena_rx_buffer *rx_info = &ring->rx_buffer_info[i]; 1029 if (rx_info->mbuf) { 1030 rte_mbuf_raw_free(rx_info->mbuf); 1031 rx_info->mbuf = NULL; 1032 } 1033 } 1034 } 1035 1036 static void ena_tx_queue_release_bufs(struct ena_ring *ring) 1037 { 1038 unsigned int i; 1039 1040 for (i = 0; i < ring->ring_size; ++i) { 1041 struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i]; 1042 1043 if (tx_buf->mbuf) { 1044 rte_pktmbuf_free(tx_buf->mbuf); 1045 tx_buf->mbuf = NULL; 1046 } 1047 } 1048 } 1049 1050 static int ena_link_update(struct rte_eth_dev *dev, 1051 __rte_unused int wait_to_complete) 1052 { 1053 struct rte_eth_link *link = &dev->data->dev_link; 1054 struct ena_adapter *adapter = dev->data->dev_private; 1055 1056 link->link_status = adapter->link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN; 1057 link->link_speed = RTE_ETH_SPEED_NUM_NONE; 1058 link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX; 1059 1060 return 0; 1061 } 1062 1063 static int ena_queue_start_all(struct rte_eth_dev *dev, 1064 enum ena_ring_type ring_type) 1065 { 1066 struct ena_adapter *adapter = dev->data->dev_private; 1067 struct ena_ring *queues = NULL; 1068 int nb_queues; 1069 int i = 0; 1070 int rc = 0; 1071 1072 if (ring_type == ENA_RING_TYPE_RX) { 1073 queues = adapter->rx_ring; 1074 nb_queues = dev->data->nb_rx_queues; 1075 } else { 1076 queues = adapter->tx_ring; 1077 nb_queues = dev->data->nb_tx_queues; 1078 } 1079 for (i = 0; i < nb_queues; i++) { 1080 if (queues[i].configured) { 1081 if (ring_type == ENA_RING_TYPE_RX) { 1082 ena_assert_msg( 1083 dev->data->rx_queues[i] == &queues[i], 1084 "Inconsistent state of Rx queues\n"); 1085 } else { 1086 ena_assert_msg( 1087 dev->data->tx_queues[i] == &queues[i], 1088 "Inconsistent state of Tx queues\n"); 1089 } 1090 1091 rc = ena_queue_start(dev, &queues[i]); 1092 1093 if (rc) { 1094 PMD_INIT_LOG_LINE(ERR, 1095 "Failed to start queue[%d] of type(%d)", 1096 i, ring_type); 1097 goto err; 1098 } 1099 } 1100 } 1101 1102 return 0; 1103 1104 err: 1105 while (i--) 1106 if (queues[i].configured) 1107 ena_queue_stop(&queues[i]); 1108 1109 return rc; 1110 } 1111 1112 static int 1113 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx, 1114 bool use_large_llq_hdr) 1115 { 1116 struct ena_admin_feature_llq_desc *dev = &ctx->get_feat_ctx->llq; 1117 struct ena_com_dev *ena_dev = ctx->ena_dev; 1118 uint32_t max_tx_queue_size; 1119 uint32_t max_rx_queue_size; 1120 1121 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1122 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1123 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 1124 max_rx_queue_size = RTE_MIN(max_queue_ext->max_rx_cq_depth, 1125 max_queue_ext->max_rx_sq_depth); 1126 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 1127 1128 if (ena_dev->tx_mem_queue_type == 1129 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1130 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1131 dev->max_llq_depth); 1132 } else { 1133 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1134 max_queue_ext->max_tx_sq_depth); 1135 } 1136 1137 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1138 max_queue_ext->max_per_packet_rx_descs); 1139 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1140 max_queue_ext->max_per_packet_tx_descs); 1141 } else { 1142 struct ena_admin_queue_feature_desc *max_queues = 1143 &ctx->get_feat_ctx->max_queues; 1144 max_rx_queue_size = RTE_MIN(max_queues->max_cq_depth, 1145 max_queues->max_sq_depth); 1146 max_tx_queue_size = max_queues->max_cq_depth; 1147 1148 if (ena_dev->tx_mem_queue_type == 1149 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1150 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1151 dev->max_llq_depth); 1152 } else { 1153 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1154 max_queues->max_sq_depth); 1155 } 1156 1157 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1158 max_queues->max_packet_rx_descs); 1159 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1160 max_queues->max_packet_tx_descs); 1161 } 1162 1163 /* Round down to the nearest power of 2 */ 1164 max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size); 1165 max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size); 1166 1167 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && use_large_llq_hdr) { 1168 /* intersection between driver configuration and device capabilities */ 1169 if (dev->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) { 1170 if (dev->max_wide_llq_depth == MAX_WIDE_LLQ_DEPTH_UNSUPPORTED) { 1171 /* Devices that do not support the double-sized ENA memory BAR will 1172 * report max_wide_llq_depth as 0. In such case, driver halves the 1173 * queue depth when working in large llq policy. 1174 */ 1175 max_tx_queue_size >>= 1; 1176 PMD_INIT_LOG_LINE(INFO, 1177 "large LLQ policy requires limiting Tx queue size to %u entries", 1178 max_tx_queue_size); 1179 } else if (dev->max_wide_llq_depth < max_tx_queue_size) { 1180 /* In case the queue depth that the driver calculated exceeds 1181 * the maximal value that the device allows, it will be limited 1182 * to that maximal value 1183 */ 1184 max_tx_queue_size = dev->max_wide_llq_depth; 1185 } 1186 } else { 1187 PMD_INIT_LOG_LINE(INFO, 1188 "Forcing large LLQ headers failed since device lacks this support"); 1189 } 1190 } 1191 1192 if (unlikely(max_rx_queue_size == 0 || max_tx_queue_size == 0)) { 1193 PMD_INIT_LOG_LINE(ERR, "Invalid queue size"); 1194 return -EFAULT; 1195 } 1196 1197 ctx->max_tx_queue_size = max_tx_queue_size; 1198 ctx->max_rx_queue_size = max_rx_queue_size; 1199 1200 PMD_DRV_LOG_LINE(INFO, "tx queue size %u", max_tx_queue_size); 1201 return 0; 1202 } 1203 1204 static void ena_stats_restart(struct rte_eth_dev *dev) 1205 { 1206 struct ena_adapter *adapter = dev->data->dev_private; 1207 1208 rte_atomic64_init(&adapter->drv_stats->ierrors); 1209 rte_atomic64_init(&adapter->drv_stats->oerrors); 1210 rte_atomic64_init(&adapter->drv_stats->rx_nombuf); 1211 adapter->drv_stats->rx_drops = 0; 1212 } 1213 1214 static int ena_stats_get(struct rte_eth_dev *dev, 1215 struct rte_eth_stats *stats) 1216 { 1217 struct ena_admin_basic_stats ena_stats; 1218 struct ena_adapter *adapter = dev->data->dev_private; 1219 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1220 int rc; 1221 int i; 1222 int max_rings_stats; 1223 1224 memset(&ena_stats, 0, sizeof(ena_stats)); 1225 1226 rte_spinlock_lock(&adapter->admin_lock); 1227 rc = ENA_PROXY(adapter, ena_com_get_dev_basic_stats, ena_dev, 1228 &ena_stats); 1229 rte_spinlock_unlock(&adapter->admin_lock); 1230 if (unlikely(rc)) { 1231 PMD_DRV_LOG_LINE(ERR, "Could not retrieve statistics from ENA"); 1232 return rc; 1233 } 1234 1235 /* Set of basic statistics from ENA */ 1236 stats->ipackets = __MERGE_64B_H_L(ena_stats.rx_pkts_high, 1237 ena_stats.rx_pkts_low); 1238 stats->opackets = __MERGE_64B_H_L(ena_stats.tx_pkts_high, 1239 ena_stats.tx_pkts_low); 1240 stats->ibytes = __MERGE_64B_H_L(ena_stats.rx_bytes_high, 1241 ena_stats.rx_bytes_low); 1242 stats->obytes = __MERGE_64B_H_L(ena_stats.tx_bytes_high, 1243 ena_stats.tx_bytes_low); 1244 1245 /* Driver related stats */ 1246 stats->imissed = adapter->drv_stats->rx_drops; 1247 stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors); 1248 stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors); 1249 stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf); 1250 1251 max_rings_stats = RTE_MIN(dev->data->nb_rx_queues, 1252 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1253 for (i = 0; i < max_rings_stats; ++i) { 1254 struct ena_stats_rx *rx_stats = &adapter->rx_ring[i].rx_stats; 1255 1256 stats->q_ibytes[i] = rx_stats->bytes; 1257 stats->q_ipackets[i] = rx_stats->cnt; 1258 stats->q_errors[i] = rx_stats->bad_desc_num + 1259 rx_stats->bad_req_id; 1260 } 1261 1262 max_rings_stats = RTE_MIN(dev->data->nb_tx_queues, 1263 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1264 for (i = 0; i < max_rings_stats; ++i) { 1265 struct ena_stats_tx *tx_stats = &adapter->tx_ring[i].tx_stats; 1266 1267 stats->q_obytes[i] = tx_stats->bytes; 1268 stats->q_opackets[i] = tx_stats->cnt; 1269 } 1270 1271 return 0; 1272 } 1273 1274 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 1275 { 1276 struct ena_adapter *adapter; 1277 struct ena_com_dev *ena_dev; 1278 int rc = 0; 1279 1280 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 1281 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 1282 adapter = dev->data->dev_private; 1283 1284 ena_dev = &adapter->ena_dev; 1285 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 1286 1287 rc = ENA_PROXY(adapter, ena_com_set_dev_mtu, ena_dev, mtu); 1288 if (rc) 1289 PMD_DRV_LOG_LINE(ERR, "Could not set MTU: %d", mtu); 1290 else 1291 PMD_DRV_LOG_LINE(NOTICE, "MTU set to: %d", mtu); 1292 1293 return rc; 1294 } 1295 1296 static int ena_start(struct rte_eth_dev *dev) 1297 { 1298 struct ena_adapter *adapter = dev->data->dev_private; 1299 uint64_t ticks; 1300 int rc = 0; 1301 uint16_t i; 1302 1303 /* Cannot allocate memory in secondary process */ 1304 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1305 PMD_DRV_LOG_LINE(WARNING, "dev_start not supported in secondary."); 1306 return -EPERM; 1307 } 1308 1309 rc = ena_setup_rx_intr(dev); 1310 if (rc) 1311 return rc; 1312 1313 rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX); 1314 if (rc) 1315 return rc; 1316 1317 rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX); 1318 if (rc) 1319 goto err_start_tx; 1320 1321 if (adapter->edev_data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { 1322 rc = ena_rss_configure(adapter); 1323 if (rc) 1324 goto err_rss_init; 1325 } 1326 1327 ena_stats_restart(dev); 1328 1329 adapter->timestamp_wd = rte_get_timer_cycles(); 1330 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT; 1331 1332 ticks = rte_get_timer_hz(); 1333 rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(), 1334 ena_timer_wd_callback, dev); 1335 1336 ++adapter->dev_stats.dev_start; 1337 adapter->state = ENA_ADAPTER_STATE_RUNNING; 1338 1339 for (i = 0; i < dev->data->nb_rx_queues; i++) 1340 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1341 for (i = 0; i < dev->data->nb_tx_queues; i++) 1342 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1343 1344 return 0; 1345 1346 err_rss_init: 1347 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1348 err_start_tx: 1349 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1350 return rc; 1351 } 1352 1353 static int ena_stop(struct rte_eth_dev *dev) 1354 { 1355 struct ena_adapter *adapter = dev->data->dev_private; 1356 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1357 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1358 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1359 uint16_t i; 1360 int rc; 1361 1362 /* Cannot free memory in secondary process */ 1363 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1364 PMD_DRV_LOG_LINE(WARNING, "dev_stop not supported in secondary."); 1365 return -EPERM; 1366 } 1367 1368 rte_timer_stop_sync(&adapter->timer_wd); 1369 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1370 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1371 1372 if (adapter->trigger_reset) { 1373 rc = ena_com_dev_reset(ena_dev, adapter->reset_reason); 1374 if (rc) 1375 PMD_DRV_LOG_LINE(ERR, "Device reset failed, rc: %d", rc); 1376 } 1377 1378 rte_intr_disable(intr_handle); 1379 1380 rte_intr_efd_disable(intr_handle); 1381 1382 /* Cleanup vector list */ 1383 rte_intr_vec_list_free(intr_handle); 1384 1385 rte_intr_enable(intr_handle); 1386 1387 ++adapter->dev_stats.dev_stop; 1388 adapter->state = ENA_ADAPTER_STATE_STOPPED; 1389 dev->data->dev_started = 0; 1390 1391 for (i = 0; i < dev->data->nb_rx_queues; i++) 1392 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1393 for (i = 0; i < dev->data->nb_tx_queues; i++) 1394 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1395 1396 return 0; 1397 } 1398 1399 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring) 1400 { 1401 struct ena_adapter *adapter = ring->adapter; 1402 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1403 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1404 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1405 struct ena_com_create_io_ctx ctx = 1406 /* policy set to _HOST just to satisfy icc compiler */ 1407 { ENA_ADMIN_PLACEMENT_POLICY_HOST, 1408 0, 0, 0, 0, 0 }; 1409 uint16_t ena_qid; 1410 unsigned int i; 1411 int rc; 1412 1413 ctx.msix_vector = -1; 1414 if (ring->type == ENA_RING_TYPE_TX) { 1415 ena_qid = ENA_IO_TXQ_IDX(ring->id); 1416 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1417 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1418 for (i = 0; i < ring->ring_size; i++) 1419 ring->empty_tx_reqs[i] = i; 1420 } else { 1421 ena_qid = ENA_IO_RXQ_IDX(ring->id); 1422 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1423 if (rte_intr_dp_is_en(intr_handle)) 1424 ctx.msix_vector = 1425 rte_intr_vec_list_index_get(intr_handle, 1426 ring->id); 1427 1428 for (i = 0; i < ring->ring_size; i++) 1429 ring->empty_rx_reqs[i] = i; 1430 } 1431 ctx.queue_size = ring->ring_size; 1432 ctx.qid = ena_qid; 1433 ctx.numa_node = ring->numa_socket_id; 1434 1435 rc = ena_com_create_io_queue(ena_dev, &ctx); 1436 if (rc) { 1437 PMD_DRV_LOG_LINE(ERR, 1438 "Failed to create IO queue[%d] (qid:%d), rc: %d", 1439 ring->id, ena_qid, rc); 1440 return rc; 1441 } 1442 1443 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1444 &ring->ena_com_io_sq, 1445 &ring->ena_com_io_cq); 1446 if (rc) { 1447 PMD_DRV_LOG_LINE(ERR, 1448 "Failed to get IO queue[%d] handlers, rc: %d", 1449 ring->id, rc); 1450 ena_com_destroy_io_queue(ena_dev, ena_qid); 1451 return rc; 1452 } 1453 1454 if (ring->type == ENA_RING_TYPE_TX) 1455 ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node); 1456 1457 /* Start with Rx interrupts being masked. */ 1458 if (ring->type == ENA_RING_TYPE_RX && rte_intr_dp_is_en(intr_handle)) 1459 ena_rx_queue_intr_disable(dev, ring->id); 1460 1461 return 0; 1462 } 1463 1464 static void ena_queue_stop(struct ena_ring *ring) 1465 { 1466 struct ena_com_dev *ena_dev = &ring->adapter->ena_dev; 1467 1468 if (ring->type == ENA_RING_TYPE_RX) { 1469 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id)); 1470 ena_rx_queue_release_bufs(ring); 1471 } else { 1472 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id)); 1473 ena_tx_queue_release_bufs(ring); 1474 } 1475 } 1476 1477 static void ena_queue_stop_all(struct rte_eth_dev *dev, 1478 enum ena_ring_type ring_type) 1479 { 1480 struct ena_adapter *adapter = dev->data->dev_private; 1481 struct ena_ring *queues = NULL; 1482 uint16_t nb_queues, i; 1483 1484 if (ring_type == ENA_RING_TYPE_RX) { 1485 queues = adapter->rx_ring; 1486 nb_queues = dev->data->nb_rx_queues; 1487 } else { 1488 queues = adapter->tx_ring; 1489 nb_queues = dev->data->nb_tx_queues; 1490 } 1491 1492 for (i = 0; i < nb_queues; ++i) 1493 if (queues[i].configured) 1494 ena_queue_stop(&queues[i]); 1495 } 1496 1497 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring) 1498 { 1499 int rc, bufs_num; 1500 1501 ena_assert_msg(ring->configured == 1, 1502 "Trying to start unconfigured queue\n"); 1503 1504 rc = ena_create_io_queue(dev, ring); 1505 if (rc) { 1506 PMD_INIT_LOG_LINE(ERR, "Failed to create IO queue"); 1507 return rc; 1508 } 1509 1510 ring->next_to_clean = 0; 1511 ring->next_to_use = 0; 1512 1513 if (ring->type == ENA_RING_TYPE_TX) { 1514 ring->tx_stats.available_desc = 1515 ena_com_free_q_entries(ring->ena_com_io_sq); 1516 return 0; 1517 } 1518 1519 bufs_num = ring->ring_size - 1; 1520 rc = ena_populate_rx_queue(ring, bufs_num); 1521 if (rc != bufs_num) { 1522 ena_com_destroy_io_queue(&ring->adapter->ena_dev, 1523 ENA_IO_RXQ_IDX(ring->id)); 1524 PMD_INIT_LOG_LINE(ERR, "Failed to populate Rx ring"); 1525 return ENA_COM_FAULT; 1526 } 1527 /* Flush per-core RX buffers pools cache as they can be used on other 1528 * cores as well. 1529 */ 1530 rte_mempool_cache_flush(NULL, ring->mb_pool); 1531 1532 return 0; 1533 } 1534 1535 static int ena_tx_queue_setup(struct rte_eth_dev *dev, 1536 uint16_t queue_idx, 1537 uint16_t nb_desc, 1538 unsigned int socket_id, 1539 const struct rte_eth_txconf *tx_conf) 1540 { 1541 struct ena_ring *txq = NULL; 1542 struct ena_adapter *adapter = dev->data->dev_private; 1543 unsigned int i; 1544 uint16_t dyn_thresh; 1545 1546 txq = &adapter->tx_ring[queue_idx]; 1547 1548 if (txq->configured) { 1549 PMD_DRV_LOG_LINE(CRIT, 1550 "API violation. Queue[%d] is already configured", 1551 queue_idx); 1552 return ENA_COM_FAULT; 1553 } 1554 1555 if (!rte_is_power_of_2(nb_desc)) { 1556 PMD_DRV_LOG_LINE(ERR, 1557 "Unsupported size of Tx queue: %d is not a power of 2.", 1558 nb_desc); 1559 return -EINVAL; 1560 } 1561 1562 if (nb_desc > adapter->max_tx_ring_size) { 1563 PMD_DRV_LOG_LINE(ERR, 1564 "Unsupported size of Tx queue (max size: %d)", 1565 adapter->max_tx_ring_size); 1566 return -EINVAL; 1567 } 1568 1569 txq->port_id = dev->data->port_id; 1570 txq->next_to_clean = 0; 1571 txq->next_to_use = 0; 1572 txq->ring_size = nb_desc; 1573 txq->size_mask = nb_desc - 1; 1574 txq->numa_socket_id = socket_id; 1575 txq->pkts_without_db = false; 1576 txq->last_cleanup_ticks = 0; 1577 1578 txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info", 1579 sizeof(struct ena_tx_buffer) * txq->ring_size, 1580 RTE_CACHE_LINE_SIZE, 1581 socket_id); 1582 if (!txq->tx_buffer_info) { 1583 PMD_DRV_LOG_LINE(ERR, 1584 "Failed to allocate memory for Tx buffer info"); 1585 return -ENOMEM; 1586 } 1587 1588 txq->empty_tx_reqs = rte_zmalloc_socket("txq->empty_tx_reqs", 1589 sizeof(uint16_t) * txq->ring_size, 1590 RTE_CACHE_LINE_SIZE, 1591 socket_id); 1592 if (!txq->empty_tx_reqs) { 1593 PMD_DRV_LOG_LINE(ERR, 1594 "Failed to allocate memory for empty Tx requests"); 1595 rte_free(txq->tx_buffer_info); 1596 return -ENOMEM; 1597 } 1598 1599 txq->push_buf_intermediate_buf = 1600 rte_zmalloc_socket("txq->push_buf_intermediate_buf", 1601 txq->tx_max_header_size, 1602 RTE_CACHE_LINE_SIZE, 1603 socket_id); 1604 if (!txq->push_buf_intermediate_buf) { 1605 PMD_DRV_LOG_LINE(ERR, "Failed to alloc push buffer for LLQ"); 1606 rte_free(txq->tx_buffer_info); 1607 rte_free(txq->empty_tx_reqs); 1608 return -ENOMEM; 1609 } 1610 1611 for (i = 0; i < txq->ring_size; i++) 1612 txq->empty_tx_reqs[i] = i; 1613 1614 txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads; 1615 1616 /* Check if caller provided the Tx cleanup threshold value. */ 1617 if (tx_conf->tx_free_thresh != 0) { 1618 txq->tx_free_thresh = tx_conf->tx_free_thresh; 1619 } else { 1620 dyn_thresh = txq->ring_size - 1621 txq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1622 txq->tx_free_thresh = RTE_MAX(dyn_thresh, 1623 txq->ring_size - ENA_REFILL_THRESH_PACKET); 1624 } 1625 1626 txq->missing_tx_completion_threshold = 1627 RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP); 1628 1629 /* Store pointer to this queue in upper layer */ 1630 txq->configured = 1; 1631 dev->data->tx_queues[queue_idx] = txq; 1632 1633 return 0; 1634 } 1635 1636 static int ena_rx_queue_setup(struct rte_eth_dev *dev, 1637 uint16_t queue_idx, 1638 uint16_t nb_desc, 1639 unsigned int socket_id, 1640 const struct rte_eth_rxconf *rx_conf, 1641 struct rte_mempool *mp) 1642 { 1643 struct ena_adapter *adapter = dev->data->dev_private; 1644 struct ena_ring *rxq = NULL; 1645 size_t buffer_size; 1646 int i; 1647 uint16_t dyn_thresh; 1648 1649 rxq = &adapter->rx_ring[queue_idx]; 1650 if (rxq->configured) { 1651 PMD_DRV_LOG_LINE(CRIT, 1652 "API violation. Queue[%d] is already configured", 1653 queue_idx); 1654 return ENA_COM_FAULT; 1655 } 1656 1657 if (!rte_is_power_of_2(nb_desc)) { 1658 PMD_DRV_LOG_LINE(ERR, 1659 "Unsupported size of Rx queue: %d is not a power of 2.", 1660 nb_desc); 1661 return -EINVAL; 1662 } 1663 1664 if (nb_desc > adapter->max_rx_ring_size) { 1665 PMD_DRV_LOG_LINE(ERR, 1666 "Unsupported size of Rx queue (max size: %d)", 1667 adapter->max_rx_ring_size); 1668 return -EINVAL; 1669 } 1670 1671 /* ENA isn't supporting buffers smaller than 1400 bytes */ 1672 buffer_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 1673 if (buffer_size < ENA_RX_BUF_MIN_SIZE) { 1674 PMD_DRV_LOG_LINE(ERR, 1675 "Unsupported size of Rx buffer: %zu (min size: %d)", 1676 buffer_size, ENA_RX_BUF_MIN_SIZE); 1677 return -EINVAL; 1678 } 1679 1680 rxq->port_id = dev->data->port_id; 1681 rxq->next_to_clean = 0; 1682 rxq->next_to_use = 0; 1683 rxq->ring_size = nb_desc; 1684 rxq->size_mask = nb_desc - 1; 1685 rxq->numa_socket_id = socket_id; 1686 rxq->mb_pool = mp; 1687 1688 rxq->rx_buffer_info = rte_zmalloc_socket("rxq->buffer_info", 1689 sizeof(struct ena_rx_buffer) * nb_desc, 1690 RTE_CACHE_LINE_SIZE, 1691 socket_id); 1692 if (!rxq->rx_buffer_info) { 1693 PMD_DRV_LOG_LINE(ERR, 1694 "Failed to allocate memory for Rx buffer info"); 1695 return -ENOMEM; 1696 } 1697 1698 rxq->rx_refill_buffer = rte_zmalloc_socket("rxq->rx_refill_buffer", 1699 sizeof(struct rte_mbuf *) * nb_desc, 1700 RTE_CACHE_LINE_SIZE, 1701 socket_id); 1702 if (!rxq->rx_refill_buffer) { 1703 PMD_DRV_LOG_LINE(ERR, 1704 "Failed to allocate memory for Rx refill buffer"); 1705 rte_free(rxq->rx_buffer_info); 1706 rxq->rx_buffer_info = NULL; 1707 return -ENOMEM; 1708 } 1709 1710 rxq->empty_rx_reqs = rte_zmalloc_socket("rxq->empty_rx_reqs", 1711 sizeof(uint16_t) * nb_desc, 1712 RTE_CACHE_LINE_SIZE, 1713 socket_id); 1714 if (!rxq->empty_rx_reqs) { 1715 PMD_DRV_LOG_LINE(ERR, 1716 "Failed to allocate memory for empty Rx requests"); 1717 rte_free(rxq->rx_buffer_info); 1718 rxq->rx_buffer_info = NULL; 1719 rte_free(rxq->rx_refill_buffer); 1720 rxq->rx_refill_buffer = NULL; 1721 return -ENOMEM; 1722 } 1723 1724 for (i = 0; i < nb_desc; i++) 1725 rxq->empty_rx_reqs[i] = i; 1726 1727 rxq->offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; 1728 1729 if (rx_conf->rx_free_thresh != 0) { 1730 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 1731 } else { 1732 dyn_thresh = rxq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1733 rxq->rx_free_thresh = RTE_MIN(dyn_thresh, 1734 (uint16_t)(ENA_REFILL_THRESH_PACKET)); 1735 } 1736 1737 /* Store pointer to this queue in upper layer */ 1738 rxq->configured = 1; 1739 dev->data->rx_queues[queue_idx] = rxq; 1740 1741 return 0; 1742 } 1743 1744 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 1745 struct rte_mbuf *mbuf, uint16_t id) 1746 { 1747 struct ena_com_buf ebuf; 1748 int rc; 1749 1750 /* prepare physical address for DMA transaction */ 1751 ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM; 1752 ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM; 1753 1754 /* pass resource to device */ 1755 rc = ena_com_add_single_rx_desc(io_sq, &ebuf, id); 1756 if (unlikely(rc != 0)) 1757 PMD_RX_LOG_LINE(WARNING, "Failed adding Rx desc"); 1758 1759 return rc; 1760 } 1761 1762 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) 1763 { 1764 unsigned int i; 1765 int rc; 1766 uint16_t next_to_use = rxq->next_to_use; 1767 uint16_t req_id; 1768 #ifdef RTE_ETHDEV_DEBUG_RX 1769 uint16_t in_use; 1770 #endif 1771 struct rte_mbuf **mbufs = rxq->rx_refill_buffer; 1772 1773 if (unlikely(!count)) 1774 return 0; 1775 1776 #ifdef RTE_ETHDEV_DEBUG_RX 1777 in_use = rxq->ring_size - 1 - 1778 ena_com_free_q_entries(rxq->ena_com_io_sq); 1779 if (unlikely((in_use + count) >= rxq->ring_size)) 1780 PMD_RX_LOG_LINE(ERR, "Bad Rx ring state"); 1781 #endif 1782 1783 /* get resources for incoming packets */ 1784 rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count); 1785 if (unlikely(rc < 0)) { 1786 rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf); 1787 ++rxq->rx_stats.mbuf_alloc_fail; 1788 PMD_RX_LOG_LINE(DEBUG, "There are not enough free buffers"); 1789 return 0; 1790 } 1791 1792 for (i = 0; i < count; i++) { 1793 struct rte_mbuf *mbuf = mbufs[i]; 1794 struct ena_rx_buffer *rx_info; 1795 1796 if (likely((i + 4) < count)) 1797 rte_prefetch0(mbufs[i + 4]); 1798 1799 req_id = rxq->empty_rx_reqs[next_to_use]; 1800 rx_info = &rxq->rx_buffer_info[req_id]; 1801 1802 rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id); 1803 if (unlikely(rc != 0)) 1804 break; 1805 1806 rx_info->mbuf = mbuf; 1807 next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, rxq->size_mask); 1808 } 1809 1810 if (unlikely(i < count)) { 1811 PMD_RX_LOG_LINE(WARNING, 1812 "Refilled Rx queue[%d] with only %d/%d buffers", 1813 rxq->id, i, count); 1814 rte_pktmbuf_free_bulk(&mbufs[i], count - i); 1815 ++rxq->rx_stats.refill_partial; 1816 } 1817 1818 /* When we submitted free resources to device... */ 1819 if (likely(i > 0)) { 1820 /* ...let HW know that it can fill buffers with data. */ 1821 ena_com_write_sq_doorbell(rxq->ena_com_io_sq); 1822 1823 rxq->next_to_use = next_to_use; 1824 } 1825 1826 return i; 1827 } 1828 1829 static size_t ena_get_metrics_entries(struct ena_adapter *adapter) 1830 { 1831 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1832 size_t metrics_num = 0; 1833 1834 if (ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) 1835 metrics_num = ENA_STATS_ARRAY_METRICS; 1836 else if (ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) 1837 metrics_num = ENA_STATS_ARRAY_METRICS_LEGACY; 1838 PMD_DRV_LOG_LINE(NOTICE, "0x%x customer metrics are supported", (unsigned int)metrics_num); 1839 if (metrics_num > ENA_MAX_CUSTOMER_METRICS) { 1840 PMD_DRV_LOG_LINE(NOTICE, "Not enough space for the requested customer metrics"); 1841 metrics_num = ENA_MAX_CUSTOMER_METRICS; 1842 } 1843 return metrics_num; 1844 } 1845 1846 static int ena_device_init(struct ena_adapter *adapter, 1847 struct rte_pci_device *pdev, 1848 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1849 { 1850 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1851 uint32_t aenq_groups; 1852 int rc; 1853 bool readless_supported; 1854 1855 /* Initialize mmio registers */ 1856 rc = ena_com_mmio_reg_read_request_init(ena_dev); 1857 if (rc) { 1858 PMD_DRV_LOG_LINE(ERR, "Failed to init MMIO read less"); 1859 return rc; 1860 } 1861 1862 /* The PCIe configuration space revision id indicate if mmio reg 1863 * read is disabled. 1864 */ 1865 readless_supported = !(pdev->id.class_id & ENA_MMIO_DISABLE_REG_READ); 1866 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 1867 1868 /* reset device */ 1869 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 1870 if (rc) { 1871 PMD_DRV_LOG_LINE(ERR, "Cannot reset device"); 1872 goto err_mmio_read_less; 1873 } 1874 1875 /* check FW version */ 1876 rc = ena_com_validate_version(ena_dev); 1877 if (rc) { 1878 PMD_DRV_LOG_LINE(ERR, "Device version is too low"); 1879 goto err_mmio_read_less; 1880 } 1881 1882 ena_dev->dma_addr_bits = ena_com_get_dma_width(ena_dev); 1883 1884 /* ENA device administration layer init */ 1885 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 1886 if (rc) { 1887 PMD_DRV_LOG_LINE(ERR, 1888 "Cannot initialize ENA admin queue"); 1889 goto err_mmio_read_less; 1890 } 1891 1892 /* To enable the msix interrupts the driver needs to know the number 1893 * of queues. So the driver uses polling mode to retrieve this 1894 * information. 1895 */ 1896 ena_com_set_admin_polling_mode(ena_dev, true); 1897 1898 ena_config_host_info(ena_dev); 1899 1900 /* Get Device Attributes and features */ 1901 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 1902 if (rc) { 1903 PMD_DRV_LOG_LINE(ERR, 1904 "Cannot get attribute for ENA device, rc: %d", rc); 1905 goto err_admin_init; 1906 } 1907 1908 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 1909 BIT(ENA_ADMIN_NOTIFICATION) | 1910 BIT(ENA_ADMIN_KEEP_ALIVE) | 1911 BIT(ENA_ADMIN_FATAL_ERROR) | 1912 BIT(ENA_ADMIN_WARNING) | 1913 BIT(ENA_ADMIN_CONF_NOTIFICATIONS); 1914 1915 aenq_groups &= get_feat_ctx->aenq.supported_groups; 1916 1917 adapter->all_aenq_groups = aenq_groups; 1918 /* The actual supported number of metrics is negotiated with the device at runtime */ 1919 adapter->metrics_num = ena_get_metrics_entries(adapter); 1920 1921 return 0; 1922 1923 err_admin_init: 1924 ena_com_admin_destroy(ena_dev); 1925 1926 err_mmio_read_less: 1927 ena_com_mmio_reg_read_request_destroy(ena_dev); 1928 1929 return rc; 1930 } 1931 1932 static void ena_control_path_handler(void *cb_arg) 1933 { 1934 struct rte_eth_dev *dev = cb_arg; 1935 struct ena_adapter *adapter = dev->data->dev_private; 1936 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1937 1938 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) { 1939 ena_com_admin_q_comp_intr_handler(ena_dev); 1940 ena_com_aenq_intr_handler(ena_dev, dev); 1941 } 1942 } 1943 1944 static void ena_control_path_poll_handler(void *cb_arg) 1945 { 1946 struct rte_eth_dev *dev = cb_arg; 1947 struct ena_adapter *adapter = dev->data->dev_private; 1948 int rc; 1949 1950 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) { 1951 ena_control_path_handler(cb_arg); 1952 rc = rte_eal_alarm_set(adapter->control_path_poll_interval, 1953 ena_control_path_poll_handler, cb_arg); 1954 if (unlikely(rc != 0)) { 1955 PMD_DRV_LOG_LINE(ERR, "Failed to retrigger control path alarm"); 1956 ena_trigger_reset(adapter, ENA_REGS_RESET_GENERIC); 1957 } 1958 } 1959 } 1960 1961 static void check_for_missing_keep_alive(struct ena_adapter *adapter) 1962 { 1963 if (!(adapter->active_aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE))) 1964 return; 1965 1966 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 1967 return; 1968 1969 if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >= 1970 adapter->keep_alive_timeout)) { 1971 PMD_DRV_LOG_LINE(ERR, "Keep alive timeout"); 1972 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); 1973 ++adapter->dev_stats.wd_expired; 1974 } 1975 } 1976 1977 /* Check if admin queue is enabled */ 1978 static void check_for_admin_com_state(struct ena_adapter *adapter) 1979 { 1980 if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) { 1981 PMD_DRV_LOG_LINE(ERR, "ENA admin queue is not in running state"); 1982 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO); 1983 } 1984 } 1985 1986 static int check_for_tx_completion_in_queue(struct ena_adapter *adapter, 1987 struct ena_ring *tx_ring) 1988 { 1989 struct ena_tx_buffer *tx_buf; 1990 uint64_t timestamp; 1991 uint64_t completion_delay; 1992 uint32_t missed_tx = 0; 1993 unsigned int i; 1994 int rc = 0; 1995 1996 for (i = 0; i < tx_ring->ring_size; ++i) { 1997 tx_buf = &tx_ring->tx_buffer_info[i]; 1998 timestamp = tx_buf->timestamp; 1999 2000 if (timestamp == 0) 2001 continue; 2002 2003 completion_delay = rte_get_timer_cycles() - timestamp; 2004 if (completion_delay > adapter->missing_tx_completion_to) { 2005 if (unlikely(!tx_buf->print_once)) { 2006 PMD_TX_LOG_LINE(WARNING, 2007 "Found a Tx that wasn't completed on time, qid %d, index %d. " 2008 "Missing Tx outstanding for %" PRIu64 " msecs.", 2009 tx_ring->id, i, completion_delay / 2010 rte_get_timer_hz() * 1000); 2011 tx_buf->print_once = true; 2012 } 2013 ++missed_tx; 2014 } 2015 } 2016 2017 if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) { 2018 PMD_DRV_LOG_LINE(ERR, 2019 "The number of lost Tx completions is above the threshold (%d > %d). " 2020 "Trigger the device reset.", 2021 missed_tx, 2022 tx_ring->missing_tx_completion_threshold); 2023 adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL; 2024 adapter->trigger_reset = true; 2025 rc = -EIO; 2026 } 2027 2028 tx_ring->tx_stats.missed_tx += missed_tx; 2029 2030 return rc; 2031 } 2032 2033 static void check_for_tx_completions(struct ena_adapter *adapter) 2034 { 2035 struct ena_ring *tx_ring; 2036 uint64_t tx_cleanup_delay; 2037 size_t qid; 2038 int budget; 2039 uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues; 2040 2041 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) 2042 return; 2043 2044 nb_tx_queues = adapter->edev_data->nb_tx_queues; 2045 budget = adapter->missing_tx_completion_budget; 2046 2047 qid = adapter->last_tx_comp_qid; 2048 while (budget-- > 0) { 2049 tx_ring = &adapter->tx_ring[qid]; 2050 2051 /* Tx cleanup is called only by the burst function and can be 2052 * called dynamically by the application. Also cleanup is 2053 * limited by the threshold. To avoid false detection of the 2054 * missing HW Tx completion, get the delay since last cleanup 2055 * function was called. 2056 */ 2057 tx_cleanup_delay = rte_get_timer_cycles() - 2058 tx_ring->last_cleanup_ticks; 2059 if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay) 2060 check_for_tx_completion_in_queue(adapter, tx_ring); 2061 qid = (qid + 1) % nb_tx_queues; 2062 } 2063 2064 adapter->last_tx_comp_qid = qid; 2065 } 2066 2067 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer, 2068 void *arg) 2069 { 2070 struct rte_eth_dev *dev = arg; 2071 struct ena_adapter *adapter = dev->data->dev_private; 2072 2073 if (unlikely(adapter->trigger_reset)) 2074 return; 2075 2076 check_for_missing_keep_alive(adapter); 2077 check_for_admin_com_state(adapter); 2078 check_for_tx_completions(adapter); 2079 2080 if (unlikely(adapter->trigger_reset)) { 2081 PMD_DRV_LOG_LINE(ERR, "Trigger reset is on"); 2082 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, 2083 NULL); 2084 } 2085 } 2086 2087 static inline void 2088 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 2089 struct ena_admin_feature_llq_desc *llq, 2090 bool use_large_llq_hdr) 2091 { 2092 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 2093 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 2094 llq_config->llq_num_decs_before_header = 2095 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 2096 2097 if (use_large_llq_hdr && 2098 (llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B)) { 2099 llq_config->llq_ring_entry_size = 2100 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 2101 llq_config->llq_ring_entry_size_value = 256; 2102 } else { 2103 llq_config->llq_ring_entry_size = 2104 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 2105 llq_config->llq_ring_entry_size_value = 128; 2106 } 2107 } 2108 2109 static int 2110 ena_set_queues_placement_policy(struct ena_adapter *adapter, 2111 struct ena_com_dev *ena_dev, 2112 struct ena_admin_feature_llq_desc *llq, 2113 struct ena_llq_configurations *llq_default_configurations) 2114 { 2115 int rc; 2116 u32 llq_feature_mask; 2117 2118 if (adapter->llq_header_policy == ENA_LLQ_POLICY_DISABLED) { 2119 PMD_DRV_LOG_LINE(WARNING, 2120 "NOTE: LLQ has been disabled as per user's request. " 2121 "This may lead to a huge performance degradation!"); 2122 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2123 return 0; 2124 } 2125 2126 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 2127 if (!(ena_dev->supported_features & llq_feature_mask)) { 2128 PMD_DRV_LOG_LINE(INFO, 2129 "LLQ is not supported. Fallback to host mode policy."); 2130 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2131 return 0; 2132 } 2133 2134 if (adapter->dev_mem_base == NULL) { 2135 PMD_DRV_LOG_LINE(ERR, 2136 "LLQ is advertised as supported, but device doesn't expose mem bar"); 2137 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2138 return 0; 2139 } 2140 2141 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 2142 if (unlikely(rc)) { 2143 PMD_INIT_LOG_LINE(WARNING, 2144 "Failed to config dev mode. Fallback to host mode policy."); 2145 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2146 return 0; 2147 } 2148 2149 /* Nothing to config, exit */ 2150 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 2151 return 0; 2152 2153 ena_dev->mem_bar = adapter->dev_mem_base; 2154 2155 return 0; 2156 } 2157 2158 static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev, 2159 struct ena_com_dev_get_features_ctx *get_feat_ctx) 2160 { 2161 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 2162 2163 /* Regular queues capabilities */ 2164 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 2165 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 2166 &get_feat_ctx->max_queue_ext.max_queue_ext; 2167 io_rx_num = RTE_MIN(max_queue_ext->max_rx_sq_num, 2168 max_queue_ext->max_rx_cq_num); 2169 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 2170 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 2171 } else { 2172 struct ena_admin_queue_feature_desc *max_queues = 2173 &get_feat_ctx->max_queues; 2174 io_tx_sq_num = max_queues->max_sq_num; 2175 io_tx_cq_num = max_queues->max_cq_num; 2176 io_rx_num = RTE_MIN(io_tx_sq_num, io_tx_cq_num); 2177 } 2178 2179 /* In case of LLQ use the llq number in the get feature cmd */ 2180 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 2181 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 2182 2183 max_num_io_queues = RTE_MIN(ENA_MAX_NUM_IO_QUEUES, io_rx_num); 2184 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_sq_num); 2185 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_cq_num); 2186 2187 if (unlikely(max_num_io_queues == 0)) { 2188 PMD_DRV_LOG_LINE(ERR, "Number of IO queues cannot not be 0"); 2189 return -EFAULT; 2190 } 2191 2192 return max_num_io_queues; 2193 } 2194 2195 static void 2196 ena_set_offloads(struct ena_offloads *offloads, 2197 struct ena_admin_feature_offload_desc *offload_desc) 2198 { 2199 if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) 2200 offloads->tx_offloads |= ENA_IPV4_TSO; 2201 2202 /* Tx IPv4 checksum offloads */ 2203 if (offload_desc->tx & 2204 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) 2205 offloads->tx_offloads |= ENA_L3_IPV4_CSUM; 2206 if (offload_desc->tx & 2207 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK) 2208 offloads->tx_offloads |= ENA_L4_IPV4_CSUM; 2209 if (offload_desc->tx & 2210 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) 2211 offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL; 2212 2213 /* Tx IPv6 checksum offloads */ 2214 if (offload_desc->tx & 2215 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK) 2216 offloads->tx_offloads |= ENA_L4_IPV6_CSUM; 2217 if (offload_desc->tx & 2218 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) 2219 offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL; 2220 2221 /* Rx IPv4 checksum offloads */ 2222 if (offload_desc->rx_supported & 2223 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK) 2224 offloads->rx_offloads |= ENA_L3_IPV4_CSUM; 2225 if (offload_desc->rx_supported & 2226 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) 2227 offloads->rx_offloads |= ENA_L4_IPV4_CSUM; 2228 2229 /* Rx IPv6 checksum offloads */ 2230 if (offload_desc->rx_supported & 2231 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) 2232 offloads->rx_offloads |= ENA_L4_IPV6_CSUM; 2233 2234 if (offload_desc->rx_supported & 2235 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK) 2236 offloads->rx_offloads |= ENA_RX_RSS_HASH; 2237 } 2238 2239 static int ena_init_once(void) 2240 { 2241 static bool init_done; 2242 2243 if (init_done) 2244 return 0; 2245 2246 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 2247 /* Init timer subsystem for the ENA timer service. */ 2248 rte_timer_subsystem_init(); 2249 /* Register handler for requests from secondary processes. */ 2250 rte_mp_action_register(ENA_MP_NAME, ena_mp_primary_handle); 2251 } 2252 2253 init_done = true; 2254 return 0; 2255 } 2256 2257 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) 2258 { 2259 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 2260 struct rte_pci_device *pci_dev; 2261 struct rte_intr_handle *intr_handle; 2262 struct ena_adapter *adapter = eth_dev->data->dev_private; 2263 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2264 struct ena_com_dev_get_features_ctx get_feat_ctx; 2265 struct ena_llq_configurations llq_config; 2266 const char *queue_type_str; 2267 uint32_t max_num_io_queues; 2268 int rc; 2269 static int adapters_found; 2270 bool disable_meta_caching; 2271 2272 eth_dev->dev_ops = &ena_dev_ops; 2273 eth_dev->rx_pkt_burst = ð_ena_recv_pkts; 2274 eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; 2275 eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; 2276 2277 rc = ena_init_once(); 2278 if (rc != 0) 2279 return rc; 2280 2281 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2282 return 0; 2283 2284 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 2285 2286 memset(adapter, 0, sizeof(struct ena_adapter)); 2287 ena_dev = &adapter->ena_dev; 2288 2289 adapter->edev_data = eth_dev->data; 2290 2291 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 2292 2293 PMD_INIT_LOG_LINE(INFO, "Initializing " PCI_PRI_FMT, 2294 pci_dev->addr.domain, 2295 pci_dev->addr.bus, 2296 pci_dev->addr.devid, 2297 pci_dev->addr.function); 2298 2299 intr_handle = pci_dev->intr_handle; 2300 2301 adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr; 2302 adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr; 2303 2304 if (!adapter->regs) { 2305 PMD_INIT_LOG_LINE(CRIT, "Failed to access registers BAR(%d)", 2306 ENA_REGS_BAR); 2307 return -ENXIO; 2308 } 2309 2310 ena_dev->reg_bar = adapter->regs; 2311 /* Pass device data as a pointer which can be passed to the IO functions 2312 * by the ena_com (for example - the memory allocation). 2313 */ 2314 ena_dev->dmadev = eth_dev->data; 2315 2316 adapter->id_number = adapters_found; 2317 2318 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", 2319 adapter->id_number); 2320 2321 /* Assign default devargs values */ 2322 adapter->missing_tx_completion_to = ENA_TX_TIMEOUT; 2323 adapter->enable_llq = true; 2324 adapter->use_large_llq_hdr = false; 2325 adapter->use_normal_llq_hdr = false; 2326 2327 /* Get user bypass */ 2328 rc = ena_parse_devargs(adapter, pci_dev->device.devargs); 2329 if (rc != 0) { 2330 PMD_INIT_LOG_LINE(CRIT, "Failed to parse devargs"); 2331 goto err; 2332 } 2333 adapter->llq_header_policy = ena_define_llq_hdr_policy(adapter); 2334 2335 rc = ena_com_allocate_customer_metrics_buffer(ena_dev); 2336 if (rc != 0) { 2337 PMD_INIT_LOG_LINE(CRIT, "Failed to allocate customer metrics buffer"); 2338 goto err; 2339 } 2340 2341 /* device specific initialization routine */ 2342 rc = ena_device_init(adapter, pci_dev, &get_feat_ctx); 2343 if (rc) { 2344 PMD_INIT_LOG_LINE(CRIT, "Failed to init ENA device"); 2345 goto err_metrics_delete; 2346 } 2347 2348 /* Check if device supports LSC */ 2349 if (!(adapter->all_aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) 2350 adapter->edev_data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; 2351 2352 bool use_large_llq_hdr = ena_use_large_llq_hdr(adapter, 2353 get_feat_ctx.llq.entry_size_recommended); 2354 set_default_llq_configurations(&llq_config, &get_feat_ctx.llq, use_large_llq_hdr); 2355 rc = ena_set_queues_placement_policy(adapter, ena_dev, 2356 &get_feat_ctx.llq, &llq_config); 2357 if (unlikely(rc)) { 2358 PMD_INIT_LOG_LINE(CRIT, "Failed to set placement policy"); 2359 return rc; 2360 } 2361 2362 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { 2363 queue_type_str = "Regular"; 2364 } else { 2365 queue_type_str = "Low latency"; 2366 PMD_DRV_LOG_LINE(INFO, "LLQ entry size %uB", llq_config.llq_ring_entry_size_value); 2367 } 2368 PMD_DRV_LOG_LINE(INFO, "Placement policy: %s", queue_type_str); 2369 2370 calc_queue_ctx.ena_dev = ena_dev; 2371 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 2372 2373 max_num_io_queues = ena_calc_max_io_queue_num(ena_dev, &get_feat_ctx); 2374 rc = ena_calc_io_queue_size(&calc_queue_ctx, use_large_llq_hdr); 2375 if (unlikely((rc != 0) || (max_num_io_queues == 0))) { 2376 rc = -EFAULT; 2377 goto err_device_destroy; 2378 } 2379 2380 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 2381 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 2382 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 2383 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 2384 adapter->max_num_io_queues = max_num_io_queues; 2385 2386 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2387 disable_meta_caching = 2388 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags & 2389 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 2390 } else { 2391 disable_meta_caching = false; 2392 } 2393 2394 /* prepare ring structures */ 2395 ena_init_rings(adapter, disable_meta_caching); 2396 2397 ena_config_debug_area(adapter); 2398 2399 /* Set max MTU for this device */ 2400 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 2401 2402 ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload); 2403 2404 /* Copy MAC address and point DPDK to it */ 2405 eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr; 2406 rte_ether_addr_copy((struct rte_ether_addr *) 2407 get_feat_ctx.dev_attr.mac_addr, 2408 (struct rte_ether_addr *)adapter->mac_addr); 2409 2410 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); 2411 if (unlikely(rc != 0)) { 2412 PMD_DRV_LOG_LINE(ERR, "Failed to initialize RSS in ENA device"); 2413 goto err_delete_debug_area; 2414 } 2415 2416 adapter->drv_stats = rte_zmalloc("adapter stats", 2417 sizeof(*adapter->drv_stats), 2418 RTE_CACHE_LINE_SIZE); 2419 if (!adapter->drv_stats) { 2420 PMD_DRV_LOG_LINE(ERR, 2421 "Failed to allocate memory for adapter statistics"); 2422 rc = -ENOMEM; 2423 goto err_rss_destroy; 2424 } 2425 2426 rte_spinlock_init(&adapter->admin_lock); 2427 2428 if (!adapter->control_path_poll_interval) { 2429 /* Control path interrupt mode */ 2430 rte_intr_callback_register(intr_handle, ena_control_path_handler, eth_dev); 2431 rte_intr_enable(intr_handle); 2432 ena_com_set_admin_polling_mode(ena_dev, false); 2433 } else { 2434 /* Control path polling mode */ 2435 rc = rte_eal_alarm_set(adapter->control_path_poll_interval, 2436 ena_control_path_poll_handler, eth_dev); 2437 if (unlikely(rc != 0)) { 2438 PMD_DRV_LOG_LINE(ERR, "Failed to set control path alarm"); 2439 goto err_control_path_destroy; 2440 } 2441 } 2442 ena_com_admin_aenq_enable(ena_dev); 2443 rte_timer_init(&adapter->timer_wd); 2444 2445 adapters_found++; 2446 adapter->state = ENA_ADAPTER_STATE_INIT; 2447 2448 return 0; 2449 err_control_path_destroy: 2450 rte_free(adapter->drv_stats); 2451 err_rss_destroy: 2452 ena_com_rss_destroy(ena_dev); 2453 err_delete_debug_area: 2454 ena_com_delete_debug_area(ena_dev); 2455 2456 err_device_destroy: 2457 ena_com_delete_host_info(ena_dev); 2458 ena_com_admin_destroy(ena_dev); 2459 err_metrics_delete: 2460 ena_com_delete_customer_metrics_buffer(ena_dev); 2461 err: 2462 return rc; 2463 } 2464 2465 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) 2466 { 2467 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2468 return 0; 2469 2470 ena_close(eth_dev); 2471 2472 return 0; 2473 } 2474 2475 static int ena_dev_configure(struct rte_eth_dev *dev) 2476 { 2477 struct ena_adapter *adapter = dev->data->dev_private; 2478 int rc; 2479 2480 adapter->state = ENA_ADAPTER_STATE_CONFIG; 2481 2482 if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 2483 dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2484 dev->data->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2485 2486 /* Scattered Rx cannot be turned off in the HW, so this capability must 2487 * be forced. 2488 */ 2489 dev->data->scattered_rx = 1; 2490 2491 adapter->last_tx_comp_qid = 0; 2492 2493 adapter->missing_tx_completion_budget = 2494 RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues); 2495 2496 /* To avoid detection of the spurious Tx completion timeout due to 2497 * application not calling the Tx cleanup function, set timeout for the 2498 * Tx queue which should be half of the missing completion timeout for a 2499 * safety. If there will be a lot of missing Tx completions in the 2500 * queue, they will be detected sooner or later. 2501 */ 2502 adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2; 2503 2504 rc = ena_configure_aenq(adapter); 2505 2506 return rc; 2507 } 2508 2509 static void ena_init_rings(struct ena_adapter *adapter, 2510 bool disable_meta_caching) 2511 { 2512 size_t i; 2513 2514 for (i = 0; i < adapter->max_num_io_queues; i++) { 2515 struct ena_ring *ring = &adapter->tx_ring[i]; 2516 2517 ring->configured = 0; 2518 ring->type = ENA_RING_TYPE_TX; 2519 ring->adapter = adapter; 2520 ring->id = i; 2521 ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type; 2522 ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size; 2523 ring->sgl_size = adapter->max_tx_sgl_size; 2524 ring->disable_meta_caching = disable_meta_caching; 2525 } 2526 2527 for (i = 0; i < adapter->max_num_io_queues; i++) { 2528 struct ena_ring *ring = &adapter->rx_ring[i]; 2529 2530 ring->configured = 0; 2531 ring->type = ENA_RING_TYPE_RX; 2532 ring->adapter = adapter; 2533 ring->id = i; 2534 ring->sgl_size = adapter->max_rx_sgl_size; 2535 } 2536 } 2537 2538 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter) 2539 { 2540 uint64_t port_offloads = 0; 2541 2542 if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM) 2543 port_offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; 2544 2545 if (adapter->offloads.rx_offloads & 2546 (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM)) 2547 port_offloads |= 2548 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM; 2549 2550 if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH) 2551 port_offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2552 2553 port_offloads |= RTE_ETH_RX_OFFLOAD_SCATTER; 2554 2555 return port_offloads; 2556 } 2557 2558 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter) 2559 { 2560 uint64_t port_offloads = 0; 2561 2562 if (adapter->offloads.tx_offloads & ENA_IPV4_TSO) 2563 port_offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 2564 2565 if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM) 2566 port_offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; 2567 if (adapter->offloads.tx_offloads & 2568 (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM | 2569 ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL)) 2570 port_offloads |= 2571 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM; 2572 2573 port_offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2574 2575 port_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2576 2577 return port_offloads; 2578 } 2579 2580 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter) 2581 { 2582 RTE_SET_USED(adapter); 2583 2584 return 0; 2585 } 2586 2587 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter) 2588 { 2589 uint64_t queue_offloads = 0; 2590 RTE_SET_USED(adapter); 2591 2592 queue_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2593 2594 return queue_offloads; 2595 } 2596 2597 static int ena_infos_get(struct rte_eth_dev *dev, 2598 struct rte_eth_dev_info *dev_info) 2599 { 2600 struct ena_adapter *adapter; 2601 struct ena_com_dev *ena_dev; 2602 2603 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 2604 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 2605 adapter = dev->data->dev_private; 2606 2607 ena_dev = &adapter->ena_dev; 2608 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 2609 2610 dev_info->speed_capa = 2611 RTE_ETH_LINK_SPEED_1G | 2612 RTE_ETH_LINK_SPEED_2_5G | 2613 RTE_ETH_LINK_SPEED_5G | 2614 RTE_ETH_LINK_SPEED_10G | 2615 RTE_ETH_LINK_SPEED_25G | 2616 RTE_ETH_LINK_SPEED_40G | 2617 RTE_ETH_LINK_SPEED_50G | 2618 RTE_ETH_LINK_SPEED_100G | 2619 RTE_ETH_LINK_SPEED_200G | 2620 RTE_ETH_LINK_SPEED_400G; 2621 2622 /* Inform framework about available features */ 2623 dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter); 2624 dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter); 2625 dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter); 2626 dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter); 2627 2628 dev_info->flow_type_rss_offloads = ENA_ALL_RSS_HF; 2629 dev_info->hash_key_size = ENA_HASH_KEY_SIZE; 2630 2631 dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN; 2632 dev_info->max_rx_pktlen = adapter->max_mtu + RTE_ETHER_HDR_LEN + 2633 RTE_ETHER_CRC_LEN; 2634 dev_info->min_mtu = ENA_MIN_MTU; 2635 dev_info->max_mtu = adapter->max_mtu; 2636 dev_info->max_mac_addrs = 1; 2637 2638 dev_info->max_rx_queues = adapter->max_num_io_queues; 2639 dev_info->max_tx_queues = adapter->max_num_io_queues; 2640 dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE; 2641 2642 dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size; 2643 dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2644 dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2645 adapter->max_rx_sgl_size); 2646 dev_info->rx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2647 adapter->max_rx_sgl_size); 2648 2649 dev_info->tx_desc_lim.nb_max = adapter->max_tx_ring_size; 2650 dev_info->tx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2651 dev_info->tx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2652 adapter->max_tx_sgl_size); 2653 dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2654 adapter->max_tx_sgl_size); 2655 2656 dev_info->default_rxportconf.ring_size = RTE_MIN(ENA_DEFAULT_RING_SIZE, 2657 dev_info->rx_desc_lim.nb_max); 2658 dev_info->default_txportconf.ring_size = RTE_MIN(ENA_DEFAULT_RING_SIZE, 2659 dev_info->tx_desc_lim.nb_max); 2660 2661 dev_info->err_handle_mode = RTE_ETH_ERROR_HANDLE_MODE_PASSIVE; 2662 2663 return 0; 2664 } 2665 2666 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len) 2667 { 2668 mbuf->data_len = len; 2669 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 2670 mbuf->refcnt = 1; 2671 mbuf->next = NULL; 2672 } 2673 2674 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 2675 struct ena_com_rx_buf_info *ena_bufs, 2676 uint32_t descs, 2677 uint16_t *next_to_clean, 2678 uint8_t offset) 2679 { 2680 struct rte_mbuf *mbuf; 2681 struct rte_mbuf *mbuf_head; 2682 struct ena_rx_buffer *rx_info; 2683 int rc; 2684 uint16_t ntc, len, req_id, buf = 0; 2685 2686 if (unlikely(descs == 0)) 2687 return NULL; 2688 2689 ntc = *next_to_clean; 2690 2691 len = ena_bufs[buf].len; 2692 req_id = ena_bufs[buf].req_id; 2693 2694 rx_info = &rx_ring->rx_buffer_info[req_id]; 2695 2696 mbuf = rx_info->mbuf; 2697 RTE_ASSERT(mbuf != NULL); 2698 2699 ena_init_rx_mbuf(mbuf, len); 2700 2701 /* Fill the mbuf head with the data specific for 1st segment. */ 2702 mbuf_head = mbuf; 2703 mbuf_head->nb_segs = descs; 2704 mbuf_head->port = rx_ring->port_id; 2705 mbuf_head->pkt_len = len; 2706 mbuf_head->data_off += offset; 2707 2708 rx_info->mbuf = NULL; 2709 rx_ring->empty_rx_reqs[ntc] = req_id; 2710 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2711 2712 while (--descs) { 2713 ++buf; 2714 len = ena_bufs[buf].len; 2715 req_id = ena_bufs[buf].req_id; 2716 2717 rx_info = &rx_ring->rx_buffer_info[req_id]; 2718 RTE_ASSERT(rx_info->mbuf != NULL); 2719 2720 if (unlikely(len == 0)) { 2721 /* 2722 * Some devices can pass descriptor with the length 0. 2723 * To avoid confusion, the PMD is simply putting the 2724 * descriptor back, as it was never used. We'll avoid 2725 * mbuf allocation that way. 2726 */ 2727 rc = ena_add_single_rx_desc(rx_ring->ena_com_io_sq, 2728 rx_info->mbuf, req_id); 2729 if (unlikely(rc != 0)) { 2730 /* Free the mbuf in case of an error. */ 2731 rte_mbuf_raw_free(rx_info->mbuf); 2732 } else { 2733 /* 2734 * If there was no error, just exit the loop as 2735 * 0 length descriptor is always the last one. 2736 */ 2737 break; 2738 } 2739 } else { 2740 /* Create an mbuf chain. */ 2741 mbuf->next = rx_info->mbuf; 2742 mbuf = mbuf->next; 2743 2744 ena_init_rx_mbuf(mbuf, len); 2745 mbuf_head->pkt_len += len; 2746 } 2747 2748 /* 2749 * Mark the descriptor as depleted and perform necessary 2750 * cleanup. 2751 * This code will execute in two cases: 2752 * 1. Descriptor len was greater than 0 - normal situation. 2753 * 2. Descriptor len was 0 and we failed to add the descriptor 2754 * to the device. In that situation, we should try to add 2755 * the mbuf again in the populate routine and mark the 2756 * descriptor as used up by the device. 2757 */ 2758 rx_info->mbuf = NULL; 2759 rx_ring->empty_rx_reqs[ntc] = req_id; 2760 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2761 } 2762 2763 *next_to_clean = ntc; 2764 2765 return mbuf_head; 2766 } 2767 2768 static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 2769 uint16_t nb_pkts) 2770 { 2771 struct ena_ring *rx_ring = (struct ena_ring *)(rx_queue); 2772 unsigned int free_queue_entries; 2773 uint16_t next_to_clean = rx_ring->next_to_clean; 2774 uint16_t descs_in_use; 2775 struct rte_mbuf *mbuf; 2776 uint16_t completed; 2777 struct ena_com_rx_ctx ena_rx_ctx; 2778 int i, rc = 0; 2779 2780 #ifdef RTE_ETHDEV_DEBUG_RX 2781 /* Check adapter state */ 2782 if (unlikely(rx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 2783 PMD_RX_LOG_LINE(ALERT, 2784 "Trying to receive pkts while device is NOT running"); 2785 return 0; 2786 } 2787 #endif 2788 2789 descs_in_use = rx_ring->ring_size - 2790 ena_com_free_q_entries(rx_ring->ena_com_io_sq) - 1; 2791 nb_pkts = RTE_MIN(descs_in_use, nb_pkts); 2792 2793 for (completed = 0; completed < nb_pkts; completed++) { 2794 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 2795 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 2796 ena_rx_ctx.descs = 0; 2797 ena_rx_ctx.pkt_offset = 0; 2798 /* receive packet context */ 2799 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, 2800 rx_ring->ena_com_io_sq, 2801 &ena_rx_ctx); 2802 if (unlikely(rc)) { 2803 PMD_RX_LOG_LINE(ERR, 2804 "Failed to get the packet from the device, rc: %d", 2805 rc); 2806 if (rc == ENA_COM_NO_SPACE) { 2807 ++rx_ring->rx_stats.bad_desc_num; 2808 ena_trigger_reset(rx_ring->adapter, 2809 ENA_REGS_RESET_TOO_MANY_RX_DESCS); 2810 } else { 2811 ++rx_ring->rx_stats.bad_req_id; 2812 ena_trigger_reset(rx_ring->adapter, 2813 ENA_REGS_RESET_INV_RX_REQ_ID); 2814 } 2815 return 0; 2816 } 2817 2818 mbuf = ena_rx_mbuf(rx_ring, 2819 ena_rx_ctx.ena_bufs, 2820 ena_rx_ctx.descs, 2821 &next_to_clean, 2822 ena_rx_ctx.pkt_offset); 2823 if (unlikely(mbuf == NULL)) { 2824 for (i = 0; i < ena_rx_ctx.descs; ++i) { 2825 rx_ring->empty_rx_reqs[next_to_clean] = 2826 rx_ring->ena_bufs[i].req_id; 2827 next_to_clean = ENA_IDX_NEXT_MASKED( 2828 next_to_clean, rx_ring->size_mask); 2829 } 2830 break; 2831 } 2832 2833 /* fill mbuf attributes if any */ 2834 ena_rx_mbuf_prepare(rx_ring, mbuf, &ena_rx_ctx); 2835 2836 if (unlikely(mbuf->ol_flags & 2837 (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD))) 2838 rte_atomic64_inc(&rx_ring->adapter->drv_stats->ierrors); 2839 2840 rx_pkts[completed] = mbuf; 2841 rx_ring->rx_stats.bytes += mbuf->pkt_len; 2842 } 2843 2844 rx_ring->rx_stats.cnt += completed; 2845 rx_ring->next_to_clean = next_to_clean; 2846 2847 free_queue_entries = ena_com_free_q_entries(rx_ring->ena_com_io_sq); 2848 2849 /* Burst refill to save doorbells, memory barriers, const interval */ 2850 if (free_queue_entries >= rx_ring->rx_free_thresh) { 2851 ena_populate_rx_queue(rx_ring, free_queue_entries); 2852 } 2853 2854 return completed; 2855 } 2856 2857 static uint16_t 2858 eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 2859 uint16_t nb_pkts) 2860 { 2861 int32_t ret; 2862 uint32_t i; 2863 struct rte_mbuf *m; 2864 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 2865 struct ena_adapter *adapter = tx_ring->adapter; 2866 struct rte_ipv4_hdr *ip_hdr; 2867 uint64_t ol_flags; 2868 uint64_t l4_csum_flag; 2869 uint64_t dev_offload_capa; 2870 uint16_t frag_field; 2871 bool need_pseudo_csum; 2872 2873 dev_offload_capa = adapter->offloads.tx_offloads; 2874 for (i = 0; i != nb_pkts; i++) { 2875 m = tx_pkts[i]; 2876 ol_flags = m->ol_flags; 2877 2878 /* Check if any offload flag was set */ 2879 if (ol_flags == 0) 2880 continue; 2881 2882 l4_csum_flag = ol_flags & RTE_MBUF_F_TX_L4_MASK; 2883 /* SCTP checksum offload is not supported by the ENA. */ 2884 if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) || 2885 l4_csum_flag == RTE_MBUF_F_TX_SCTP_CKSUM) { 2886 PMD_TX_LOG_LINE(DEBUG, 2887 "mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64, 2888 i, ol_flags); 2889 rte_errno = ENOTSUP; 2890 return i; 2891 } 2892 2893 if (unlikely(m->nb_segs >= tx_ring->sgl_size && 2894 !(tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && 2895 m->nb_segs == tx_ring->sgl_size && 2896 m->data_len < tx_ring->tx_max_header_size))) { 2897 PMD_TX_LOG_LINE(DEBUG, 2898 "mbuf[%" PRIu32 "] has too many segments: %" PRIu16, 2899 i, m->nb_segs); 2900 rte_errno = EINVAL; 2901 return i; 2902 } 2903 2904 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 2905 /* Check if requested offload is also enabled for the queue */ 2906 if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2907 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) || 2908 (l4_csum_flag == RTE_MBUF_F_TX_TCP_CKSUM && 2909 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) || 2910 (l4_csum_flag == RTE_MBUF_F_TX_UDP_CKSUM && 2911 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM))) { 2912 PMD_TX_LOG_LINE(DEBUG, 2913 "mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]", 2914 i, m->nb_segs, tx_ring->id); 2915 rte_errno = EINVAL; 2916 return i; 2917 } 2918 2919 /* The caller is obligated to set l2 and l3 len if any cksum 2920 * offload is enabled. 2921 */ 2922 if (unlikely(ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK) && 2923 (m->l2_len == 0 || m->l3_len == 0))) { 2924 PMD_TX_LOG_LINE(DEBUG, 2925 "mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested", 2926 i); 2927 rte_errno = EINVAL; 2928 return i; 2929 } 2930 ret = rte_validate_tx_offload(m); 2931 if (ret != 0) { 2932 rte_errno = -ret; 2933 return i; 2934 } 2935 #endif 2936 2937 /* Verify HW support for requested offloads and determine if 2938 * pseudo header checksum is needed. 2939 */ 2940 need_pseudo_csum = false; 2941 if (ol_flags & RTE_MBUF_F_TX_IPV4) { 2942 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2943 !(dev_offload_capa & ENA_L3_IPV4_CSUM)) { 2944 rte_errno = ENOTSUP; 2945 return i; 2946 } 2947 2948 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG && 2949 !(dev_offload_capa & ENA_IPV4_TSO)) { 2950 rte_errno = ENOTSUP; 2951 return i; 2952 } 2953 2954 /* Check HW capabilities and if pseudo csum is needed 2955 * for L4 offloads. 2956 */ 2957 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2958 !(dev_offload_capa & ENA_L4_IPV4_CSUM)) { 2959 if (dev_offload_capa & 2960 ENA_L4_IPV4_CSUM_PARTIAL) { 2961 need_pseudo_csum = true; 2962 } else { 2963 rte_errno = ENOTSUP; 2964 return i; 2965 } 2966 } 2967 2968 /* Parse the DF flag */ 2969 ip_hdr = rte_pktmbuf_mtod_offset(m, 2970 struct rte_ipv4_hdr *, m->l2_len); 2971 frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset); 2972 if (frag_field & RTE_IPV4_HDR_DF_FLAG) { 2973 m->packet_type |= RTE_PTYPE_L4_NONFRAG; 2974 } else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2975 /* In case we are supposed to TSO and have DF 2976 * not set (DF=0) hardware must be provided with 2977 * partial checksum. 2978 */ 2979 need_pseudo_csum = true; 2980 } 2981 } else if (ol_flags & RTE_MBUF_F_TX_IPV6) { 2982 /* There is no support for IPv6 TSO as for now. */ 2983 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2984 rte_errno = ENOTSUP; 2985 return i; 2986 } 2987 2988 /* Check HW capabilities and if pseudo csum is needed */ 2989 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2990 !(dev_offload_capa & ENA_L4_IPV6_CSUM)) { 2991 if (dev_offload_capa & 2992 ENA_L4_IPV6_CSUM_PARTIAL) { 2993 need_pseudo_csum = true; 2994 } else { 2995 rte_errno = ENOTSUP; 2996 return i; 2997 } 2998 } 2999 } 3000 3001 if (need_pseudo_csum) { 3002 ret = rte_net_intel_cksum_flags_prepare(m, ol_flags); 3003 if (ret != 0) { 3004 rte_errno = -ret; 3005 return i; 3006 } 3007 } 3008 } 3009 3010 return i; 3011 } 3012 3013 static void ena_update_hints(struct ena_adapter *adapter, 3014 struct ena_admin_ena_hw_hints *hints) 3015 { 3016 if (hints->admin_completion_tx_timeout) 3017 adapter->ena_dev.admin_queue.completion_timeout = 3018 hints->admin_completion_tx_timeout * 1000; 3019 3020 if (hints->mmio_read_timeout) 3021 /* convert to usec */ 3022 adapter->ena_dev.mmio_read.reg_read_to = 3023 hints->mmio_read_timeout * 1000; 3024 3025 if (hints->driver_watchdog_timeout) { 3026 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3027 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 3028 else 3029 // Convert msecs to ticks 3030 adapter->keep_alive_timeout = 3031 (hints->driver_watchdog_timeout * 3032 rte_get_timer_hz()) / 1000; 3033 } 3034 } 3035 3036 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 3037 struct ena_tx_buffer *tx_info, 3038 struct rte_mbuf *mbuf, 3039 void **push_header, 3040 uint16_t *header_len) 3041 { 3042 struct ena_com_buf *ena_buf; 3043 uint16_t delta, seg_len, push_len; 3044 3045 delta = 0; 3046 seg_len = mbuf->data_len; 3047 3048 tx_info->mbuf = mbuf; 3049 ena_buf = tx_info->bufs; 3050 3051 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 3052 /* 3053 * Tx header might be (and will be in most cases) smaller than 3054 * tx_max_header_size. But it's not an issue to send more data 3055 * to the device, than actually needed if the mbuf size is 3056 * greater than tx_max_header_size. 3057 */ 3058 push_len = RTE_MIN(mbuf->pkt_len, tx_ring->tx_max_header_size); 3059 *header_len = push_len; 3060 3061 if (likely(push_len <= seg_len)) { 3062 /* If the push header is in the single segment, then 3063 * just point it to the 1st mbuf data. 3064 */ 3065 *push_header = rte_pktmbuf_mtod(mbuf, uint8_t *); 3066 } else { 3067 /* If the push header lays in the several segments, copy 3068 * it to the intermediate buffer. 3069 */ 3070 rte_pktmbuf_read(mbuf, 0, push_len, 3071 tx_ring->push_buf_intermediate_buf); 3072 *push_header = tx_ring->push_buf_intermediate_buf; 3073 delta = push_len - seg_len; 3074 } 3075 } else { 3076 *push_header = NULL; 3077 *header_len = 0; 3078 push_len = 0; 3079 } 3080 3081 /* Process first segment taking into consideration pushed header */ 3082 if (seg_len > push_len) { 3083 ena_buf->paddr = mbuf->buf_iova + 3084 mbuf->data_off + 3085 push_len; 3086 ena_buf->len = seg_len - push_len; 3087 ena_buf++; 3088 tx_info->num_of_bufs++; 3089 } 3090 3091 while ((mbuf = mbuf->next) != NULL) { 3092 seg_len = mbuf->data_len; 3093 3094 /* Skip mbufs if whole data is pushed as a header */ 3095 if (unlikely(delta > seg_len)) { 3096 delta -= seg_len; 3097 continue; 3098 } 3099 3100 ena_buf->paddr = mbuf->buf_iova + mbuf->data_off + delta; 3101 ena_buf->len = seg_len - delta; 3102 ena_buf++; 3103 tx_info->num_of_bufs++; 3104 3105 delta = 0; 3106 } 3107 } 3108 3109 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) 3110 { 3111 struct ena_tx_buffer *tx_info; 3112 struct ena_com_tx_ctx ena_tx_ctx = { { 0 } }; 3113 uint16_t next_to_use; 3114 uint16_t header_len; 3115 uint16_t req_id; 3116 void *push_header; 3117 int nb_hw_desc; 3118 int rc; 3119 3120 /* Checking for space for 2 additional metadata descriptors due to 3121 * possible header split and metadata descriptor 3122 */ 3123 if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 3124 mbuf->nb_segs + 2)) { 3125 PMD_TX_LOG_LINE(DEBUG, "Not enough space in the tx queue"); 3126 return ENA_COM_NO_MEM; 3127 } 3128 3129 next_to_use = tx_ring->next_to_use; 3130 3131 req_id = tx_ring->empty_tx_reqs[next_to_use]; 3132 tx_info = &tx_ring->tx_buffer_info[req_id]; 3133 tx_info->num_of_bufs = 0; 3134 RTE_ASSERT(tx_info->mbuf == NULL); 3135 3136 ena_tx_map_mbuf(tx_ring, tx_info, mbuf, &push_header, &header_len); 3137 3138 ena_tx_ctx.ena_bufs = tx_info->bufs; 3139 ena_tx_ctx.push_header = push_header; 3140 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 3141 ena_tx_ctx.req_id = req_id; 3142 ena_tx_ctx.header_len = header_len; 3143 3144 /* Set Tx offloads flags, if applicable */ 3145 ena_tx_mbuf_prepare(mbuf, &ena_tx_ctx, tx_ring->offloads, 3146 tx_ring->disable_meta_caching); 3147 3148 if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, 3149 &ena_tx_ctx))) { 3150 PMD_TX_LOG_LINE(DEBUG, 3151 "LLQ Tx max burst size of queue %d achieved, writing doorbell to send burst", 3152 tx_ring->id); 3153 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3154 tx_ring->tx_stats.doorbells++; 3155 tx_ring->pkts_without_db = false; 3156 } 3157 3158 /* prepare the packet's descriptors to dma engine */ 3159 rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, 3160 &nb_hw_desc); 3161 if (unlikely(rc)) { 3162 PMD_DRV_LOG_LINE(ERR, "Failed to prepare Tx buffers, rc: %d", rc); 3163 ++tx_ring->tx_stats.prepare_ctx_err; 3164 ena_trigger_reset(tx_ring->adapter, 3165 ENA_REGS_RESET_DRIVER_INVALID_STATE); 3166 return rc; 3167 } 3168 3169 tx_info->tx_descs = nb_hw_desc; 3170 tx_info->timestamp = rte_get_timer_cycles(); 3171 3172 tx_ring->tx_stats.cnt++; 3173 tx_ring->tx_stats.bytes += mbuf->pkt_len; 3174 3175 tx_ring->next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, 3176 tx_ring->size_mask); 3177 3178 return 0; 3179 } 3180 3181 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt) 3182 { 3183 struct rte_mbuf *pkts_to_clean[ENA_CLEANUP_BUF_THRESH]; 3184 struct ena_ring *tx_ring = (struct ena_ring *)txp; 3185 size_t mbuf_cnt = 0; 3186 size_t pkt_cnt = 0; 3187 unsigned int total_tx_descs = 0; 3188 unsigned int total_tx_pkts = 0; 3189 uint16_t cleanup_budget; 3190 uint16_t next_to_clean = tx_ring->next_to_clean; 3191 bool fast_free = tx_ring->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 3192 3193 /* 3194 * If free_pkt_cnt is equal to 0, it means that the user requested 3195 * full cleanup, so attempt to release all Tx descriptors 3196 * (ring_size - 1 -> size_mask) 3197 */ 3198 cleanup_budget = (free_pkt_cnt == 0) ? tx_ring->size_mask : free_pkt_cnt; 3199 3200 while (likely(total_tx_pkts < cleanup_budget)) { 3201 struct rte_mbuf *mbuf; 3202 struct ena_tx_buffer *tx_info; 3203 uint16_t req_id; 3204 3205 if (ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id) != 0) 3206 break; 3207 3208 if (unlikely(validate_tx_req_id(tx_ring, req_id) != 0)) 3209 break; 3210 3211 /* Get Tx info & store how many descs were processed */ 3212 tx_info = &tx_ring->tx_buffer_info[req_id]; 3213 tx_info->timestamp = 0; 3214 3215 mbuf = tx_info->mbuf; 3216 if (fast_free) { 3217 pkts_to_clean[pkt_cnt++] = mbuf; 3218 mbuf_cnt += mbuf->nb_segs; 3219 if (mbuf_cnt >= ENA_CLEANUP_BUF_THRESH) { 3220 rte_pktmbuf_free_bulk(pkts_to_clean, pkt_cnt); 3221 mbuf_cnt = 0; 3222 pkt_cnt = 0; 3223 } 3224 } else { 3225 rte_pktmbuf_free(mbuf); 3226 } 3227 3228 tx_info->mbuf = NULL; 3229 tx_ring->empty_tx_reqs[next_to_clean] = req_id; 3230 3231 total_tx_descs += tx_info->tx_descs; 3232 total_tx_pkts++; 3233 3234 /* Put back descriptor to the ring for reuse */ 3235 next_to_clean = ENA_IDX_NEXT_MASKED(next_to_clean, 3236 tx_ring->size_mask); 3237 } 3238 3239 if (likely(total_tx_descs > 0)) { 3240 /* acknowledge completion of sent packets */ 3241 tx_ring->next_to_clean = next_to_clean; 3242 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs); 3243 } 3244 3245 if (mbuf_cnt != 0) 3246 rte_pktmbuf_free_bulk(pkts_to_clean, pkt_cnt); 3247 3248 /* Notify completion handler that full cleanup was performed */ 3249 if (free_pkt_cnt == 0 || total_tx_pkts < cleanup_budget) 3250 tx_ring->last_cleanup_ticks = rte_get_timer_cycles(); 3251 3252 return total_tx_pkts; 3253 } 3254 3255 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 3256 uint16_t nb_pkts) 3257 { 3258 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 3259 int available_desc; 3260 uint16_t sent_idx = 0; 3261 3262 #ifdef RTE_ETHDEV_DEBUG_TX 3263 /* Check adapter state */ 3264 if (unlikely(tx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 3265 PMD_TX_LOG_LINE(ALERT, 3266 "Trying to xmit pkts while device is NOT running"); 3267 return 0; 3268 } 3269 #endif 3270 3271 available_desc = ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3272 if (available_desc < tx_ring->tx_free_thresh) 3273 ena_tx_cleanup((void *)tx_ring, 0); 3274 3275 for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) { 3276 if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx])) 3277 break; 3278 tx_ring->pkts_without_db = true; 3279 rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4, 3280 tx_ring->size_mask)]); 3281 } 3282 3283 /* If there are ready packets to be xmitted... */ 3284 if (likely(tx_ring->pkts_without_db)) { 3285 /* ...let HW do its best :-) */ 3286 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3287 tx_ring->tx_stats.doorbells++; 3288 tx_ring->pkts_without_db = false; 3289 } 3290 3291 tx_ring->tx_stats.available_desc = 3292 ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3293 tx_ring->tx_stats.tx_poll++; 3294 3295 return sent_idx; 3296 } 3297 3298 static void ena_copy_customer_metrics(struct ena_adapter *adapter, uint64_t *buf, 3299 size_t num_metrics) 3300 { 3301 struct ena_com_dev *ena_dev = &adapter->ena_dev; 3302 int rc; 3303 3304 if (ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) { 3305 if (num_metrics != ENA_STATS_ARRAY_METRICS) { 3306 PMD_DRV_LOG_LINE(ERR, "Detected discrepancy in the number of customer metrics"); 3307 return; 3308 } 3309 rte_spinlock_lock(&adapter->admin_lock); 3310 rc = ENA_PROXY(adapter, 3311 ena_com_get_customer_metrics, 3312 &adapter->ena_dev, 3313 (char *)buf, 3314 num_metrics * sizeof(uint64_t)); 3315 rte_spinlock_unlock(&adapter->admin_lock); 3316 if (rc != 0) { 3317 PMD_DRV_LOG_LINE(WARNING, "Failed to get customer metrics, rc: %d", rc); 3318 return; 3319 } 3320 3321 } else if (ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) { 3322 if (num_metrics != ENA_STATS_ARRAY_METRICS_LEGACY) { 3323 PMD_DRV_LOG_LINE(ERR, "Detected discrepancy in the number of legacy metrics"); 3324 return; 3325 } 3326 3327 rte_spinlock_lock(&adapter->admin_lock); 3328 rc = ENA_PROXY(adapter, 3329 ena_com_get_eni_stats, 3330 &adapter->ena_dev, 3331 (struct ena_admin_eni_stats *)buf); 3332 rte_spinlock_unlock(&adapter->admin_lock); 3333 if (rc != 0) { 3334 PMD_DRV_LOG_LINE(WARNING, 3335 "Failed to get ENI metrics, rc: %d", rc); 3336 return; 3337 } 3338 } 3339 } 3340 3341 static void ena_copy_ena_srd_info(struct ena_adapter *adapter, 3342 struct ena_stats_srd *srd_info) 3343 { 3344 int rc; 3345 3346 if (!ena_com_get_cap(&adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO)) 3347 return; 3348 3349 rte_spinlock_lock(&adapter->admin_lock); 3350 rc = ENA_PROXY(adapter, 3351 ena_com_get_ena_srd_info, 3352 &adapter->ena_dev, 3353 (struct ena_admin_ena_srd_info *)srd_info); 3354 rte_spinlock_unlock(&adapter->admin_lock); 3355 if (rc != ENA_COM_OK && rc != ENA_COM_UNSUPPORTED) { 3356 PMD_DRV_LOG_LINE(WARNING, 3357 "Failed to get ENA express srd info, rc: %d", rc); 3358 return; 3359 } 3360 } 3361 3362 /** 3363 * DPDK callback to retrieve names of extended device statistics 3364 * 3365 * @param dev 3366 * Pointer to Ethernet device structure. 3367 * @param[out] xstats_names 3368 * Buffer to insert names into. 3369 * @param n 3370 * Number of names. 3371 * 3372 * @return 3373 * Number of xstats names. 3374 */ 3375 static int ena_xstats_get_names(struct rte_eth_dev *dev, 3376 struct rte_eth_xstat_name *xstats_names, 3377 unsigned int n) 3378 { 3379 struct ena_adapter *adapter = dev->data->dev_private; 3380 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3381 unsigned int stat, i, count = 0; 3382 3383 if (n < xstats_count || !xstats_names) 3384 return xstats_count; 3385 3386 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) 3387 strcpy(xstats_names[count].name, 3388 ena_stats_global_strings[stat].name); 3389 3390 for (stat = 0; stat < adapter->metrics_num; stat++, count++) 3391 rte_strscpy(xstats_names[count].name, 3392 ena_stats_metrics_strings[stat].name, 3393 RTE_ETH_XSTATS_NAME_SIZE); 3394 for (stat = 0; stat < ENA_STATS_ARRAY_ENA_SRD; stat++, count++) 3395 rte_strscpy(xstats_names[count].name, 3396 ena_stats_srd_strings[stat].name, 3397 RTE_ETH_XSTATS_NAME_SIZE); 3398 3399 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) 3400 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) 3401 snprintf(xstats_names[count].name, 3402 sizeof(xstats_names[count].name), 3403 "rx_q%d_%s", i, 3404 ena_stats_rx_strings[stat].name); 3405 3406 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) 3407 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) 3408 snprintf(xstats_names[count].name, 3409 sizeof(xstats_names[count].name), 3410 "tx_q%d_%s", i, 3411 ena_stats_tx_strings[stat].name); 3412 3413 return xstats_count; 3414 } 3415 3416 /** 3417 * DPDK callback to retrieve names of extended device statistics for the given 3418 * ids. 3419 * 3420 * @param dev 3421 * Pointer to Ethernet device structure. 3422 * @param[out] xstats_names 3423 * Buffer to insert names into. 3424 * @param ids 3425 * IDs array for which the names should be retrieved. 3426 * @param size 3427 * Number of ids. 3428 * 3429 * @return 3430 * Positive value: number of xstats names. Negative value: error code. 3431 */ 3432 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 3433 const uint64_t *ids, 3434 struct rte_eth_xstat_name *xstats_names, 3435 unsigned int size) 3436 { 3437 struct ena_adapter *adapter = dev->data->dev_private; 3438 uint64_t xstats_count = ena_xstats_calc_num(dev->data); 3439 uint64_t id, qid; 3440 unsigned int i; 3441 3442 if (xstats_names == NULL) 3443 return xstats_count; 3444 3445 for (i = 0; i < size; ++i) { 3446 id = ids[i]; 3447 if (id > xstats_count) { 3448 PMD_DRV_LOG_LINE(ERR, 3449 "ID value out of range: id=%" PRIu64 ", xstats_num=%" PRIu64, 3450 id, xstats_count); 3451 return -EINVAL; 3452 } 3453 3454 if (id < ENA_STATS_ARRAY_GLOBAL) { 3455 strcpy(xstats_names[i].name, 3456 ena_stats_global_strings[id].name); 3457 continue; 3458 } 3459 3460 id -= ENA_STATS_ARRAY_GLOBAL; 3461 if (id < adapter->metrics_num) { 3462 rte_strscpy(xstats_names[i].name, 3463 ena_stats_metrics_strings[id].name, 3464 RTE_ETH_XSTATS_NAME_SIZE); 3465 continue; 3466 } 3467 3468 id -= adapter->metrics_num; 3469 3470 if (id < ENA_STATS_ARRAY_ENA_SRD) { 3471 rte_strscpy(xstats_names[i].name, 3472 ena_stats_srd_strings[id].name, 3473 RTE_ETH_XSTATS_NAME_SIZE); 3474 continue; 3475 } 3476 id -= ENA_STATS_ARRAY_ENA_SRD; 3477 3478 if (id < ENA_STATS_ARRAY_RX) { 3479 qid = id / dev->data->nb_rx_queues; 3480 id %= dev->data->nb_rx_queues; 3481 snprintf(xstats_names[i].name, 3482 sizeof(xstats_names[i].name), 3483 "rx_q%" PRIu64 "d_%s", 3484 qid, ena_stats_rx_strings[id].name); 3485 continue; 3486 } 3487 3488 id -= ENA_STATS_ARRAY_RX; 3489 /* Although this condition is not needed, it was added for 3490 * compatibility if new xstat structure would be ever added. 3491 */ 3492 if (id < ENA_STATS_ARRAY_TX) { 3493 qid = id / dev->data->nb_tx_queues; 3494 id %= dev->data->nb_tx_queues; 3495 snprintf(xstats_names[i].name, 3496 sizeof(xstats_names[i].name), 3497 "tx_q%" PRIu64 "_%s", 3498 qid, ena_stats_tx_strings[id].name); 3499 continue; 3500 } 3501 } 3502 3503 return i; 3504 } 3505 3506 /** 3507 * DPDK callback to get extended device statistics. 3508 * 3509 * @param dev 3510 * Pointer to Ethernet device structure. 3511 * @param[out] stats 3512 * Stats table output buffer. 3513 * @param n 3514 * The size of the stats table. 3515 * 3516 * @return 3517 * Number of xstats on success, negative on failure. 3518 */ 3519 static int ena_xstats_get(struct rte_eth_dev *dev, 3520 struct rte_eth_xstat *xstats, 3521 unsigned int n) 3522 { 3523 struct ena_adapter *adapter = dev->data->dev_private; 3524 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3525 unsigned int stat, i, count = 0; 3526 int stat_offset; 3527 void *stats_begin; 3528 uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS]; 3529 struct ena_stats_srd srd_info = {0}; 3530 3531 if (n < xstats_count) 3532 return xstats_count; 3533 3534 if (!xstats) 3535 return 0; 3536 3537 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) { 3538 stat_offset = ena_stats_global_strings[stat].stat_offset; 3539 stats_begin = &adapter->dev_stats; 3540 3541 xstats[count].id = count; 3542 xstats[count].value = *((uint64_t *) 3543 ((char *)stats_begin + stat_offset)); 3544 } 3545 3546 ena_copy_customer_metrics(adapter, metrics_stats, adapter->metrics_num); 3547 stats_begin = metrics_stats; 3548 for (stat = 0; stat < adapter->metrics_num; stat++, count++) { 3549 stat_offset = ena_stats_metrics_strings[stat].stat_offset; 3550 3551 xstats[count].id = count; 3552 xstats[count].value = *((uint64_t *) 3553 ((char *)stats_begin + stat_offset)); 3554 } 3555 3556 ena_copy_ena_srd_info(adapter, &srd_info); 3557 stats_begin = &srd_info; 3558 for (stat = 0; stat < ENA_STATS_ARRAY_ENA_SRD; stat++, count++) { 3559 stat_offset = ena_stats_srd_strings[stat].stat_offset; 3560 xstats[count].id = count; 3561 xstats[count].value = *((uint64_t *) 3562 ((char *)stats_begin + stat_offset)); 3563 } 3564 3565 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) { 3566 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) { 3567 stat_offset = ena_stats_rx_strings[stat].stat_offset; 3568 stats_begin = &adapter->rx_ring[i].rx_stats; 3569 3570 xstats[count].id = count; 3571 xstats[count].value = *((uint64_t *) 3572 ((char *)stats_begin + stat_offset)); 3573 } 3574 } 3575 3576 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) { 3577 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) { 3578 stat_offset = ena_stats_tx_strings[stat].stat_offset; 3579 stats_begin = &adapter->tx_ring[i].rx_stats; 3580 3581 xstats[count].id = count; 3582 xstats[count].value = *((uint64_t *) 3583 ((char *)stats_begin + stat_offset)); 3584 } 3585 } 3586 3587 return count; 3588 } 3589 3590 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 3591 const uint64_t *ids, 3592 uint64_t *values, 3593 unsigned int n) 3594 { 3595 struct ena_adapter *adapter = dev->data->dev_private; 3596 uint64_t id; 3597 uint64_t rx_entries, tx_entries; 3598 unsigned int i; 3599 int qid; 3600 int valid = 0; 3601 bool were_metrics_copied = false; 3602 bool was_srd_info_copied = false; 3603 uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS]; 3604 struct ena_stats_srd srd_info = {0}; 3605 3606 for (i = 0; i < n; ++i) { 3607 id = ids[i]; 3608 /* Check if id belongs to global statistics */ 3609 if (id < ENA_STATS_ARRAY_GLOBAL) { 3610 values[i] = *((uint64_t *)&adapter->dev_stats + id); 3611 ++valid; 3612 continue; 3613 } 3614 3615 /* Check if id belongs to ENI statistics */ 3616 id -= ENA_STATS_ARRAY_GLOBAL; 3617 if (id < adapter->metrics_num) { 3618 /* Avoid reading metrics multiple times in a single 3619 * function call, as it requires communication with the 3620 * admin queue. 3621 */ 3622 if (!were_metrics_copied) { 3623 were_metrics_copied = true; 3624 ena_copy_customer_metrics(adapter, 3625 metrics_stats, 3626 adapter->metrics_num); 3627 } 3628 3629 values[i] = *((uint64_t *)&metrics_stats + id); 3630 ++valid; 3631 continue; 3632 } 3633 3634 /* Check if id belongs to SRD info statistics */ 3635 id -= adapter->metrics_num; 3636 3637 if (id < ENA_STATS_ARRAY_ENA_SRD) { 3638 /* 3639 * Avoid reading srd info multiple times in a single 3640 * function call, as it requires communication with the 3641 * admin queue. 3642 */ 3643 if (!was_srd_info_copied) { 3644 was_srd_info_copied = true; 3645 ena_copy_ena_srd_info(adapter, &srd_info); 3646 } 3647 values[i] = *((uint64_t *)&adapter->srd_stats + id); 3648 ++valid; 3649 continue; 3650 } 3651 3652 /* Check if id belongs to rx queue statistics */ 3653 id -= ENA_STATS_ARRAY_ENA_SRD; 3654 3655 rx_entries = ENA_STATS_ARRAY_RX * dev->data->nb_rx_queues; 3656 if (id < rx_entries) { 3657 qid = id % dev->data->nb_rx_queues; 3658 id /= dev->data->nb_rx_queues; 3659 values[i] = *((uint64_t *) 3660 &adapter->rx_ring[qid].rx_stats + id); 3661 ++valid; 3662 continue; 3663 } 3664 /* Check if id belongs to rx queue statistics */ 3665 id -= rx_entries; 3666 tx_entries = ENA_STATS_ARRAY_TX * dev->data->nb_tx_queues; 3667 if (id < tx_entries) { 3668 qid = id % dev->data->nb_tx_queues; 3669 id /= dev->data->nb_tx_queues; 3670 values[i] = *((uint64_t *) 3671 &adapter->tx_ring[qid].tx_stats + id); 3672 ++valid; 3673 continue; 3674 } 3675 } 3676 3677 return valid; 3678 } 3679 3680 static int ena_process_uint_devarg(const char *key, 3681 const char *value, 3682 void *opaque) 3683 { 3684 struct ena_adapter *adapter = opaque; 3685 char *str_end; 3686 uint64_t uint64_value; 3687 3688 uint64_value = strtoull(value, &str_end, DECIMAL_BASE); 3689 if (value == str_end) { 3690 PMD_INIT_LOG_LINE(ERR, 3691 "Invalid value for key '%s'. Only uint values are accepted.", 3692 key); 3693 return -EINVAL; 3694 } 3695 3696 if (strcmp(key, ENA_DEVARG_MISS_TXC_TO) == 0) { 3697 if (uint64_value > ENA_MAX_TX_TIMEOUT_SECONDS) { 3698 PMD_INIT_LOG_LINE(ERR, 3699 "Tx timeout too high: %" PRIu64 " sec. Maximum allowed: %d sec.", 3700 uint64_value, ENA_MAX_TX_TIMEOUT_SECONDS); 3701 return -EINVAL; 3702 } else if (uint64_value == 0) { 3703 PMD_INIT_LOG_LINE(INFO, 3704 "Check for missing Tx completions has been disabled."); 3705 adapter->missing_tx_completion_to = 3706 ENA_HW_HINTS_NO_TIMEOUT; 3707 } else { 3708 PMD_INIT_LOG_LINE(INFO, 3709 "Tx packet completion timeout set to %" PRIu64 " seconds.", 3710 uint64_value); 3711 adapter->missing_tx_completion_to = 3712 uint64_value * rte_get_timer_hz(); 3713 } 3714 } else if (strcmp(key, ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL) == 0) { 3715 if (uint64_value > ENA_MAX_CONTROL_PATH_POLL_INTERVAL_MSEC) { 3716 PMD_INIT_LOG_LINE(ERR, 3717 "Control path polling interval is too long: %" PRIu64 " msecs. " 3718 "Maximum allowed: %d msecs.", 3719 uint64_value, ENA_MAX_CONTROL_PATH_POLL_INTERVAL_MSEC); 3720 return -EINVAL; 3721 } else if (uint64_value == 0) { 3722 PMD_INIT_LOG_LINE(INFO, 3723 "Control path polling interval is set to zero. Operating in " 3724 "interrupt mode."); 3725 adapter->control_path_poll_interval = 0; 3726 } else { 3727 PMD_INIT_LOG_LINE(INFO, 3728 "Control path polling interval is set to %" PRIu64 " msecs.", 3729 uint64_value); 3730 adapter->control_path_poll_interval = uint64_value * USEC_PER_MSEC; 3731 } 3732 } 3733 3734 return 0; 3735 } 3736 3737 static int ena_process_bool_devarg(const char *key, 3738 const char *value, 3739 void *opaque) 3740 { 3741 struct ena_adapter *adapter = opaque; 3742 bool bool_value; 3743 3744 /* Parse the value. */ 3745 if (strcmp(value, "1") == 0) { 3746 bool_value = true; 3747 } else if (strcmp(value, "0") == 0) { 3748 bool_value = false; 3749 } else { 3750 PMD_INIT_LOG_LINE(ERR, 3751 "Invalid value: '%s' for key '%s'. Accepted: '0' or '1'", 3752 value, key); 3753 return -EINVAL; 3754 } 3755 3756 /* Now, assign it to the proper adapter field. */ 3757 if (strcmp(key, ENA_DEVARG_LARGE_LLQ_HDR) == 0) 3758 adapter->use_large_llq_hdr = bool_value; 3759 else if (strcmp(key, ENA_DEVARG_NORMAL_LLQ_HDR) == 0) 3760 adapter->use_normal_llq_hdr = bool_value; 3761 else if (strcmp(key, ENA_DEVARG_ENABLE_LLQ) == 0) 3762 adapter->enable_llq = bool_value; 3763 3764 return 0; 3765 } 3766 3767 static int ena_parse_devargs(struct ena_adapter *adapter, 3768 struct rte_devargs *devargs) 3769 { 3770 static const char * const allowed_args[] = { 3771 ENA_DEVARG_LARGE_LLQ_HDR, 3772 ENA_DEVARG_NORMAL_LLQ_HDR, 3773 ENA_DEVARG_MISS_TXC_TO, 3774 ENA_DEVARG_ENABLE_LLQ, 3775 ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL, 3776 NULL, 3777 }; 3778 struct rte_kvargs *kvlist; 3779 int rc; 3780 3781 if (devargs == NULL) 3782 return 0; 3783 3784 kvlist = rte_kvargs_parse(devargs->args, allowed_args); 3785 if (kvlist == NULL) { 3786 PMD_INIT_LOG_LINE(ERR, "Invalid device arguments: %s", 3787 devargs->args); 3788 return -EINVAL; 3789 } 3790 3791 rc = rte_kvargs_process(kvlist, ENA_DEVARG_LARGE_LLQ_HDR, 3792 ena_process_bool_devarg, adapter); 3793 if (rc != 0) 3794 goto exit; 3795 rc = rte_kvargs_process(kvlist, ENA_DEVARG_NORMAL_LLQ_HDR, 3796 ena_process_bool_devarg, adapter); 3797 if (rc != 0) 3798 goto exit; 3799 rc = rte_kvargs_process(kvlist, ENA_DEVARG_MISS_TXC_TO, 3800 ena_process_uint_devarg, adapter); 3801 if (rc != 0) 3802 goto exit; 3803 rc = rte_kvargs_process(kvlist, ENA_DEVARG_ENABLE_LLQ, 3804 ena_process_bool_devarg, adapter); 3805 if (rc != 0) 3806 goto exit; 3807 rc = rte_kvargs_process(kvlist, ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL, 3808 ena_process_uint_devarg, adapter); 3809 if (rc != 0) 3810 goto exit; 3811 3812 exit: 3813 rte_kvargs_free(kvlist); 3814 3815 return rc; 3816 } 3817 3818 static int ena_setup_rx_intr(struct rte_eth_dev *dev) 3819 { 3820 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 3821 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 3822 int rc; 3823 uint16_t vectors_nb, i; 3824 bool rx_intr_requested = dev->data->dev_conf.intr_conf.rxq; 3825 3826 if (!rx_intr_requested) 3827 return 0; 3828 3829 if (!rte_intr_cap_multiple(intr_handle)) { 3830 PMD_DRV_LOG_LINE(ERR, 3831 "Rx interrupt requested, but it isn't supported by the PCI driver"); 3832 return -ENOTSUP; 3833 } 3834 3835 /* Disable interrupt mapping before the configuration starts. */ 3836 rte_intr_disable(intr_handle); 3837 3838 /* Verify if there are enough vectors available. */ 3839 vectors_nb = dev->data->nb_rx_queues; 3840 if (vectors_nb > RTE_MAX_RXTX_INTR_VEC_ID) { 3841 PMD_DRV_LOG_LINE(ERR, 3842 "Too many Rx interrupts requested, maximum number: %d", 3843 RTE_MAX_RXTX_INTR_VEC_ID); 3844 rc = -ENOTSUP; 3845 goto enable_intr; 3846 } 3847 3848 /* Allocate the vector list */ 3849 if (rte_intr_vec_list_alloc(intr_handle, "intr_vec", 3850 dev->data->nb_rx_queues)) { 3851 PMD_DRV_LOG_LINE(ERR, 3852 "Failed to allocate interrupt vector for %d queues", 3853 dev->data->nb_rx_queues); 3854 rc = -ENOMEM; 3855 goto enable_intr; 3856 } 3857 3858 rc = rte_intr_efd_enable(intr_handle, vectors_nb); 3859 if (rc != 0) 3860 goto free_intr_vec; 3861 3862 if (!rte_intr_allow_others(intr_handle)) { 3863 PMD_DRV_LOG_LINE(ERR, 3864 "Not enough interrupts available to use both ENA Admin and Rx interrupts"); 3865 goto disable_intr_efd; 3866 } 3867 3868 for (i = 0; i < vectors_nb; ++i) 3869 if (rte_intr_vec_list_index_set(intr_handle, i, 3870 RTE_INTR_VEC_RXTX_OFFSET + i)) 3871 goto disable_intr_efd; 3872 3873 rte_intr_enable(intr_handle); 3874 return 0; 3875 3876 disable_intr_efd: 3877 rte_intr_efd_disable(intr_handle); 3878 free_intr_vec: 3879 rte_intr_vec_list_free(intr_handle); 3880 enable_intr: 3881 rte_intr_enable(intr_handle); 3882 return rc; 3883 } 3884 3885 static void ena_rx_queue_intr_set(struct rte_eth_dev *dev, 3886 uint16_t queue_id, 3887 bool unmask) 3888 { 3889 struct ena_adapter *adapter = dev->data->dev_private; 3890 struct ena_ring *rxq = &adapter->rx_ring[queue_id]; 3891 struct ena_eth_io_intr_reg intr_reg; 3892 3893 ena_com_update_intr_reg(&intr_reg, 0, 0, unmask, 1); 3894 ena_com_unmask_intr(rxq->ena_com_io_cq, &intr_reg); 3895 } 3896 3897 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 3898 uint16_t queue_id) 3899 { 3900 ena_rx_queue_intr_set(dev, queue_id, true); 3901 3902 return 0; 3903 } 3904 3905 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 3906 uint16_t queue_id) 3907 { 3908 ena_rx_queue_intr_set(dev, queue_id, false); 3909 3910 return 0; 3911 } 3912 3913 static int ena_configure_aenq(struct ena_adapter *adapter) 3914 { 3915 uint32_t aenq_groups = adapter->all_aenq_groups; 3916 int rc; 3917 3918 /* All_aenq_groups holds all AENQ functions supported by the device and 3919 * the HW, so at first we need to be sure the LSC request is valid. 3920 */ 3921 if (adapter->edev_data->dev_conf.intr_conf.lsc != 0) { 3922 if (!(aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) { 3923 PMD_DRV_LOG_LINE(ERR, 3924 "LSC requested, but it's not supported by the AENQ"); 3925 return -EINVAL; 3926 } 3927 } else { 3928 /* If LSC wasn't enabled by the app, let's enable all supported 3929 * AENQ procedures except the LSC. 3930 */ 3931 aenq_groups &= ~BIT(ENA_ADMIN_LINK_CHANGE); 3932 } 3933 3934 rc = ena_com_set_aenq_config(&adapter->ena_dev, aenq_groups); 3935 if (rc != 0) { 3936 PMD_DRV_LOG_LINE(ERR, "Cannot configure AENQ groups, rc=%d", rc); 3937 return rc; 3938 } 3939 3940 adapter->active_aenq_groups = aenq_groups; 3941 3942 return 0; 3943 } 3944 3945 int ena_mp_indirect_table_set(struct ena_adapter *adapter) 3946 { 3947 return ENA_PROXY(adapter, ena_com_indirect_table_set, &adapter->ena_dev); 3948 } 3949 3950 int ena_mp_indirect_table_get(struct ena_adapter *adapter, 3951 uint32_t *indirect_table) 3952 { 3953 return ENA_PROXY(adapter, ena_com_indirect_table_get, &adapter->ena_dev, 3954 indirect_table); 3955 } 3956 3957 /********************************************************************* 3958 * ena_plat_dpdk.h functions implementations 3959 *********************************************************************/ 3960 3961 const struct rte_memzone * 3962 ena_mem_alloc_coherent(struct rte_eth_dev_data *data, size_t size, 3963 int socket_id, unsigned int alignment, void **virt_addr, 3964 dma_addr_t *phys_addr) 3965 { 3966 char z_name[RTE_MEMZONE_NAMESIZE]; 3967 struct ena_adapter *adapter = data->dev_private; 3968 const struct rte_memzone *memzone; 3969 int rc; 3970 3971 rc = snprintf(z_name, RTE_MEMZONE_NAMESIZE, "ena_p%d_mz%" PRIu64 "", 3972 data->port_id, adapter->memzone_cnt); 3973 if (rc >= RTE_MEMZONE_NAMESIZE) { 3974 PMD_DRV_LOG_LINE(ERR, 3975 "Name for the ena_com memzone is too long. Port: %d, mz_num: %" PRIu64, 3976 data->port_id, adapter->memzone_cnt); 3977 goto error; 3978 } 3979 adapter->memzone_cnt++; 3980 3981 memzone = rte_memzone_reserve_aligned(z_name, size, socket_id, 3982 RTE_MEMZONE_IOVA_CONTIG, alignment); 3983 if (memzone == NULL) { 3984 PMD_DRV_LOG_LINE(ERR, "Failed to allocate ena_com memzone: %s", 3985 z_name); 3986 goto error; 3987 } 3988 3989 memset(memzone->addr, 0, size); 3990 *virt_addr = memzone->addr; 3991 *phys_addr = memzone->iova; 3992 3993 return memzone; 3994 3995 error: 3996 *virt_addr = NULL; 3997 *phys_addr = 0; 3998 3999 return NULL; 4000 } 4001 4002 4003 /********************************************************************* 4004 * PMD configuration 4005 *********************************************************************/ 4006 static int eth_ena_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 4007 struct rte_pci_device *pci_dev) 4008 { 4009 return rte_eth_dev_pci_generic_probe(pci_dev, 4010 sizeof(struct ena_adapter), eth_ena_dev_init); 4011 } 4012 4013 static int eth_ena_pci_remove(struct rte_pci_device *pci_dev) 4014 { 4015 return rte_eth_dev_pci_generic_remove(pci_dev, eth_ena_dev_uninit); 4016 } 4017 4018 static struct rte_pci_driver rte_ena_pmd = { 4019 .id_table = pci_id_ena_map, 4020 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | 4021 RTE_PCI_DRV_WC_ACTIVATE, 4022 .probe = eth_ena_pci_probe, 4023 .remove = eth_ena_pci_remove, 4024 }; 4025 4026 RTE_PMD_REGISTER_PCI(net_ena, rte_ena_pmd); 4027 RTE_PMD_REGISTER_PCI_TABLE(net_ena, pci_id_ena_map); 4028 RTE_PMD_REGISTER_KMOD_DEP(net_ena, "* igb_uio | uio_pci_generic | vfio-pci"); 4029 RTE_PMD_REGISTER_PARAM_STRING(net_ena, 4030 ENA_DEVARG_LARGE_LLQ_HDR "=<0|1> " 4031 ENA_DEVARG_NORMAL_LLQ_HDR "=<0|1> " 4032 ENA_DEVARG_ENABLE_LLQ "=<0|1> " 4033 ENA_DEVARG_MISS_TXC_TO "=<uint>" 4034 ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL "=<0-1000>"); 4035 RTE_LOG_REGISTER_SUFFIX(ena_logtype_init, init, NOTICE); 4036 RTE_LOG_REGISTER_SUFFIX(ena_logtype_driver, driver, NOTICE); 4037 #ifdef RTE_ETHDEV_DEBUG_RX 4038 RTE_LOG_REGISTER_SUFFIX(ena_logtype_rx, rx, DEBUG); 4039 #endif 4040 #ifdef RTE_ETHDEV_DEBUG_TX 4041 RTE_LOG_REGISTER_SUFFIX(ena_logtype_tx, tx, DEBUG); 4042 #endif 4043 RTE_LOG_REGISTER_SUFFIX(ena_logtype_com, com, WARNING); 4044 4045 /****************************************************************************** 4046 ******************************** AENQ Handlers ******************************* 4047 *****************************************************************************/ 4048 static void ena_update_on_link_change(void *adapter_data, 4049 struct ena_admin_aenq_entry *aenq_e) 4050 { 4051 struct rte_eth_dev *eth_dev = adapter_data; 4052 struct ena_adapter *adapter = eth_dev->data->dev_private; 4053 struct ena_admin_aenq_link_change_desc *aenq_link_desc; 4054 uint32_t status; 4055 4056 aenq_link_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 4057 4058 status = get_ena_admin_aenq_link_change_desc_link_status(aenq_link_desc); 4059 adapter->link_status = status; 4060 4061 ena_link_update(eth_dev, 0); 4062 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 4063 } 4064 4065 static void ena_notification(void *adapter_data, 4066 struct ena_admin_aenq_entry *aenq_e) 4067 { 4068 struct rte_eth_dev *eth_dev = adapter_data; 4069 struct ena_adapter *adapter = eth_dev->data->dev_private; 4070 struct ena_admin_ena_hw_hints *hints; 4071 4072 if (aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION) 4073 PMD_DRV_LOG_LINE(WARNING, "Invalid AENQ group: %x. Expected: %x", 4074 aenq_e->aenq_common_desc.group, 4075 ENA_ADMIN_NOTIFICATION); 4076 4077 switch (aenq_e->aenq_common_desc.syndrome) { 4078 case ENA_ADMIN_UPDATE_HINTS: 4079 hints = (struct ena_admin_ena_hw_hints *) 4080 (&aenq_e->inline_data_w4); 4081 ena_update_hints(adapter, hints); 4082 break; 4083 default: 4084 PMD_DRV_LOG_LINE(ERR, "Invalid AENQ notification link state: %d", 4085 aenq_e->aenq_common_desc.syndrome); 4086 } 4087 } 4088 4089 static void ena_keep_alive(void *adapter_data, 4090 __rte_unused struct ena_admin_aenq_entry *aenq_e) 4091 { 4092 struct rte_eth_dev *eth_dev = adapter_data; 4093 struct ena_adapter *adapter = eth_dev->data->dev_private; 4094 struct ena_admin_aenq_keep_alive_desc *desc; 4095 uint64_t rx_drops; 4096 uint64_t tx_drops; 4097 uint64_t rx_overruns; 4098 4099 adapter->timestamp_wd = rte_get_timer_cycles(); 4100 4101 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 4102 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 4103 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 4104 rx_overruns = ((uint64_t)desc->rx_overruns_high << 32) | desc->rx_overruns_low; 4105 4106 /* 4107 * Depending on its acceleration support, the device updates a different statistic when 4108 * Rx packet is dropped because there are no available buffers to accommodate it. 4109 */ 4110 adapter->drv_stats->rx_drops = rx_drops + rx_overruns; 4111 adapter->dev_stats.tx_drops = tx_drops; 4112 } 4113 4114 static void ena_suboptimal_configuration(__rte_unused void *adapter_data, 4115 struct ena_admin_aenq_entry *aenq_e) 4116 { 4117 struct ena_admin_aenq_conf_notifications_desc *desc; 4118 int bit, num_bits; 4119 4120 desc = (struct ena_admin_aenq_conf_notifications_desc *)aenq_e; 4121 num_bits = BITS_PER_TYPE(desc->notifications_bitmap); 4122 for (bit = 0; bit < num_bits; bit++) { 4123 if (desc->notifications_bitmap & RTE_BIT64(bit)) { 4124 PMD_DRV_LOG_LINE(WARNING, 4125 "Sub-optimal configuration notification code: %d", bit + 1); 4126 } 4127 } 4128 } 4129 4130 /** 4131 * This handler will called for unknown event group or unimplemented handlers 4132 **/ 4133 static void unimplemented_aenq_handler(__rte_unused void *data, 4134 __rte_unused struct ena_admin_aenq_entry *aenq_e) 4135 { 4136 PMD_DRV_LOG_LINE(ERR, 4137 "Unknown event was received or event with unimplemented handler"); 4138 } 4139 4140 static struct ena_aenq_handlers aenq_handlers = { 4141 .handlers = { 4142 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 4143 [ENA_ADMIN_NOTIFICATION] = ena_notification, 4144 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive, 4145 [ENA_ADMIN_CONF_NOTIFICATIONS] = ena_suboptimal_configuration 4146 }, 4147 .unimplemented_handler = unimplemented_aenq_handler 4148 }; 4149 4150 /********************************************************************* 4151 * Multi-Process communication request handling (in primary) 4152 *********************************************************************/ 4153 static int 4154 ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer) 4155 { 4156 const struct ena_mp_body *req = 4157 (const struct ena_mp_body *)mp_msg->param; 4158 struct ena_adapter *adapter; 4159 struct ena_com_dev *ena_dev; 4160 struct ena_mp_body *rsp; 4161 struct rte_mp_msg mp_rsp; 4162 struct rte_eth_dev *dev; 4163 int res = 0; 4164 4165 rsp = (struct ena_mp_body *)&mp_rsp.param; 4166 mp_msg_init(&mp_rsp, req->type, req->port_id); 4167 4168 if (!rte_eth_dev_is_valid_port(req->port_id)) { 4169 rte_errno = ENODEV; 4170 res = -rte_errno; 4171 PMD_DRV_LOG_LINE(ERR, "Unknown port %d in request %d", 4172 req->port_id, req->type); 4173 goto end; 4174 } 4175 dev = &rte_eth_devices[req->port_id]; 4176 adapter = dev->data->dev_private; 4177 ena_dev = &adapter->ena_dev; 4178 4179 switch (req->type) { 4180 case ENA_MP_DEV_STATS_GET: 4181 res = ena_com_get_dev_basic_stats(ena_dev, 4182 &adapter->basic_stats); 4183 break; 4184 case ENA_MP_ENI_STATS_GET: 4185 res = ena_com_get_eni_stats(ena_dev, 4186 (struct ena_admin_eni_stats *)&adapter->metrics_stats); 4187 break; 4188 case ENA_MP_MTU_SET: 4189 res = ena_com_set_dev_mtu(ena_dev, req->args.mtu); 4190 break; 4191 case ENA_MP_IND_TBL_GET: 4192 res = ena_com_indirect_table_get(ena_dev, 4193 adapter->indirect_table); 4194 break; 4195 case ENA_MP_IND_TBL_SET: 4196 res = ena_com_indirect_table_set(ena_dev); 4197 break; 4198 case ENA_MP_CUSTOMER_METRICS_GET: 4199 res = ena_com_get_customer_metrics(ena_dev, 4200 (char *)adapter->metrics_stats, 4201 adapter->metrics_num * sizeof(uint64_t)); 4202 break; 4203 case ENA_MP_SRD_STATS_GET: 4204 res = ena_com_get_ena_srd_info(ena_dev, 4205 (struct ena_admin_ena_srd_info *)&adapter->srd_stats); 4206 break; 4207 default: 4208 PMD_DRV_LOG_LINE(ERR, "Unknown request type %d", req->type); 4209 res = -EINVAL; 4210 break; 4211 } 4212 4213 end: 4214 /* Save processing result in the reply */ 4215 rsp->result = res; 4216 /* Return just IPC processing status */ 4217 return rte_mp_reply(&mp_rsp, peer); 4218 } 4219 4220 static ena_llq_policy ena_define_llq_hdr_policy(struct ena_adapter *adapter) 4221 { 4222 if (!adapter->enable_llq) 4223 return ENA_LLQ_POLICY_DISABLED; 4224 if (adapter->use_large_llq_hdr) 4225 return ENA_LLQ_POLICY_LARGE; 4226 if (adapter->use_normal_llq_hdr) 4227 return ENA_LLQ_POLICY_NORMAL; 4228 return ENA_LLQ_POLICY_RECOMMENDED; 4229 } 4230 4231 static bool ena_use_large_llq_hdr(struct ena_adapter *adapter, uint8_t recommended_entry_size) 4232 { 4233 if (adapter->llq_header_policy == ENA_LLQ_POLICY_LARGE) { 4234 return true; 4235 } else if (adapter->llq_header_policy == ENA_LLQ_POLICY_RECOMMENDED) { 4236 PMD_DRV_LOG_LINE(INFO, "Recommended device entry size policy %u", 4237 recommended_entry_size); 4238 if (recommended_entry_size == ENA_ADMIN_LIST_ENTRY_SIZE_256B) 4239 return true; 4240 } 4241 return false; 4242 } 4243