1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 3 * All rights reserved. 4 */ 5 6 #include <rte_alarm.h> 7 #include <rte_string_fns.h> 8 #include <rte_errno.h> 9 #include <rte_version.h> 10 #include <rte_net.h> 11 #include <rte_kvargs.h> 12 13 #include "ena_ethdev.h" 14 #include "ena_logs.h" 15 #include "ena_platform.h" 16 #include "ena_com.h" 17 #include "ena_eth_com.h" 18 19 #include <ena_common_defs.h> 20 #include <ena_regs_defs.h> 21 #include <ena_admin_defs.h> 22 #include <ena_eth_io_defs.h> 23 24 #define DRV_MODULE_VER_MAJOR 2 25 #define DRV_MODULE_VER_MINOR 9 26 #define DRV_MODULE_VER_SUBMINOR 0 27 28 #define __MERGE_64B_H_L(h, l) (((uint64_t)h << 32) | l) 29 30 #define GET_L4_HDR_LEN(mbuf) \ 31 ((rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, \ 32 mbuf->l3_len + mbuf->l2_len)->data_off) >> 4) 33 34 #define ETH_GSTRING_LEN 32 35 36 #define ARRAY_SIZE(x) RTE_DIM(x) 37 38 #define ENA_MIN_RING_DESC 128 39 40 #define USEC_PER_MSEC 1000UL 41 42 #define BITS_PER_BYTE 8 43 44 #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) 45 46 #define DECIMAL_BASE 10 47 48 #define MAX_WIDE_LLQ_DEPTH_UNSUPPORTED 0 49 50 /* 51 * We should try to keep ENA_CLEANUP_BUF_THRESH lower than 52 * RTE_MEMPOOL_CACHE_MAX_SIZE, so we can fit this in mempool local cache. 53 */ 54 #define ENA_CLEANUP_BUF_THRESH 256 55 56 #define ENA_PTYPE_HAS_HASH (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP) 57 58 struct ena_stats { 59 char name[ETH_GSTRING_LEN]; 60 int stat_offset; 61 }; 62 63 #define ENA_STAT_ENTRY(stat, stat_type) { \ 64 .name = #stat, \ 65 .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ 66 } 67 68 #define ENA_STAT_RX_ENTRY(stat) \ 69 ENA_STAT_ENTRY(stat, rx) 70 71 #define ENA_STAT_TX_ENTRY(stat) \ 72 ENA_STAT_ENTRY(stat, tx) 73 74 #define ENA_STAT_METRICS_ENTRY(stat) \ 75 ENA_STAT_ENTRY(stat, metrics) 76 77 #define ENA_STAT_GLOBAL_ENTRY(stat) \ 78 ENA_STAT_ENTRY(stat, dev) 79 80 #define ENA_STAT_ENA_SRD_ENTRY(stat) \ 81 ENA_STAT_ENTRY(stat, srd) 82 83 /* Device arguments */ 84 #define ENA_DEVARG_LARGE_LLQ_HDR "large_llq_hdr" 85 #define ENA_DEVARG_NORMAL_LLQ_HDR "normal_llq_hdr" 86 /* Timeout in seconds after which a single uncompleted Tx packet should be 87 * considered as a missing. 88 */ 89 #define ENA_DEVARG_MISS_TXC_TO "miss_txc_to" 90 /* 91 * Controls whether LLQ should be used (if available). Enabled by default. 92 * NOTE: It's highly not recommended to disable the LLQ, as it may lead to a 93 * huge performance degradation on 6th generation AWS instances. 94 */ 95 #define ENA_DEVARG_ENABLE_LLQ "enable_llq" 96 /* 97 * Controls the period of time (in milliseconds) between two consecutive inspections of 98 * the control queues when the driver is in poll mode and not using interrupts. 99 * By default, this value is zero, indicating that the driver will not be in poll mode and will 100 * use interrupts. A non-zero value for this argument is mandatory when using uio_pci_generic 101 * driver. 102 */ 103 #define ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL "control_path_poll_interval" 104 105 /* 106 * Each rte_memzone should have unique name. 107 * To satisfy it, count number of allocation and add it to name. 108 */ 109 rte_atomic64_t ena_alloc_cnt; 110 111 static const struct ena_stats ena_stats_global_strings[] = { 112 ENA_STAT_GLOBAL_ENTRY(wd_expired), 113 ENA_STAT_GLOBAL_ENTRY(dev_start), 114 ENA_STAT_GLOBAL_ENTRY(dev_stop), 115 ENA_STAT_GLOBAL_ENTRY(tx_drops), 116 }; 117 118 /* 119 * The legacy metrics (also known as eni stats) consisted of 5 stats, while the reworked 120 * metrics (also known as customer metrics) support an additional stat. 121 */ 122 static struct ena_stats ena_stats_metrics_strings[] = { 123 ENA_STAT_METRICS_ENTRY(bw_in_allowance_exceeded), 124 ENA_STAT_METRICS_ENTRY(bw_out_allowance_exceeded), 125 ENA_STAT_METRICS_ENTRY(pps_allowance_exceeded), 126 ENA_STAT_METRICS_ENTRY(conntrack_allowance_exceeded), 127 ENA_STAT_METRICS_ENTRY(linklocal_allowance_exceeded), 128 ENA_STAT_METRICS_ENTRY(conntrack_allowance_available), 129 }; 130 131 static const struct ena_stats ena_stats_srd_strings[] = { 132 ENA_STAT_ENA_SRD_ENTRY(ena_srd_mode), 133 ENA_STAT_ENA_SRD_ENTRY(ena_srd_tx_pkts), 134 ENA_STAT_ENA_SRD_ENTRY(ena_srd_eligible_tx_pkts), 135 ENA_STAT_ENA_SRD_ENTRY(ena_srd_rx_pkts), 136 ENA_STAT_ENA_SRD_ENTRY(ena_srd_resource_utilization), 137 }; 138 139 static const struct ena_stats ena_stats_tx_strings[] = { 140 ENA_STAT_TX_ENTRY(cnt), 141 ENA_STAT_TX_ENTRY(bytes), 142 ENA_STAT_TX_ENTRY(prepare_ctx_err), 143 ENA_STAT_TX_ENTRY(tx_poll), 144 ENA_STAT_TX_ENTRY(doorbells), 145 ENA_STAT_TX_ENTRY(bad_req_id), 146 ENA_STAT_TX_ENTRY(available_desc), 147 ENA_STAT_TX_ENTRY(missed_tx), 148 }; 149 150 static const struct ena_stats ena_stats_rx_strings[] = { 151 ENA_STAT_RX_ENTRY(cnt), 152 ENA_STAT_RX_ENTRY(bytes), 153 ENA_STAT_RX_ENTRY(refill_partial), 154 ENA_STAT_RX_ENTRY(l3_csum_bad), 155 ENA_STAT_RX_ENTRY(l4_csum_bad), 156 ENA_STAT_RX_ENTRY(l4_csum_good), 157 ENA_STAT_RX_ENTRY(mbuf_alloc_fail), 158 ENA_STAT_RX_ENTRY(bad_desc_num), 159 ENA_STAT_RX_ENTRY(bad_req_id), 160 }; 161 162 #define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) 163 #define ENA_STATS_ARRAY_METRICS ARRAY_SIZE(ena_stats_metrics_strings) 164 #define ENA_STATS_ARRAY_METRICS_LEGACY (ENA_STATS_ARRAY_METRICS - 1) 165 #define ENA_STATS_ARRAY_ENA_SRD ARRAY_SIZE(ena_stats_srd_strings) 166 #define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) 167 #define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) 168 169 #define QUEUE_OFFLOADS (RTE_ETH_TX_OFFLOAD_TCP_CKSUM |\ 170 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |\ 171 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |\ 172 RTE_ETH_TX_OFFLOAD_TCP_TSO) 173 #define MBUF_OFFLOADS (RTE_MBUF_F_TX_L4_MASK |\ 174 RTE_MBUF_F_TX_IP_CKSUM |\ 175 RTE_MBUF_F_TX_TCP_SEG) 176 177 /** Vendor ID used by Amazon devices */ 178 #define PCI_VENDOR_ID_AMAZON 0x1D0F 179 /** Amazon devices */ 180 #define PCI_DEVICE_ID_ENA_VF 0xEC20 181 #define PCI_DEVICE_ID_ENA_VF_RSERV0 0xEC21 182 183 #define ENA_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_L4_MASK | \ 184 RTE_MBUF_F_TX_IPV6 | \ 185 RTE_MBUF_F_TX_IPV4 | \ 186 RTE_MBUF_F_TX_IP_CKSUM | \ 187 RTE_MBUF_F_TX_TCP_SEG) 188 189 #define ENA_TX_OFFLOAD_NOTSUP_MASK \ 190 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) 191 192 /** HW specific offloads capabilities. */ 193 /* IPv4 checksum offload. */ 194 #define ENA_L3_IPV4_CSUM 0x0001 195 /* TCP/UDP checksum offload for IPv4 packets. */ 196 #define ENA_L4_IPV4_CSUM 0x0002 197 /* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */ 198 #define ENA_L4_IPV4_CSUM_PARTIAL 0x0004 199 /* TCP/UDP checksum offload for IPv6 packets. */ 200 #define ENA_L4_IPV6_CSUM 0x0008 201 /* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */ 202 #define ENA_L4_IPV6_CSUM_PARTIAL 0x0010 203 /* TSO support for IPv4 packets. */ 204 #define ENA_IPV4_TSO 0x0020 205 206 /* Device supports setting RSS hash. */ 207 #define ENA_RX_RSS_HASH 0x0040 208 209 static const struct rte_pci_id pci_id_ena_map[] = { 210 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, 211 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) }, 212 { .device_id = 0 }, 213 }; 214 215 static struct ena_aenq_handlers aenq_handlers; 216 217 static int ena_device_init(struct ena_adapter *adapter, 218 struct rte_pci_device *pdev, 219 struct ena_com_dev_get_features_ctx *get_feat_ctx); 220 static int ena_dev_configure(struct rte_eth_dev *dev); 221 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 222 struct ena_tx_buffer *tx_info, 223 struct rte_mbuf *mbuf, 224 void **push_header, 225 uint16_t *header_len); 226 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf); 227 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt); 228 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 229 uint16_t nb_pkts); 230 static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 231 uint16_t nb_pkts); 232 static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 233 uint16_t nb_desc, unsigned int socket_id, 234 const struct rte_eth_txconf *tx_conf); 235 static int ena_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 236 uint16_t nb_desc, unsigned int socket_id, 237 const struct rte_eth_rxconf *rx_conf, 238 struct rte_mempool *mp); 239 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len); 240 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 241 struct ena_com_rx_buf_info *ena_bufs, 242 uint32_t descs, 243 uint16_t *next_to_clean, 244 uint8_t offset); 245 static uint16_t eth_ena_recv_pkts(void *rx_queue, 246 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 247 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 248 struct rte_mbuf *mbuf, uint16_t id); 249 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count); 250 static void ena_init_rings(struct ena_adapter *adapter, 251 bool disable_meta_caching); 252 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); 253 static int ena_start(struct rte_eth_dev *dev); 254 static int ena_stop(struct rte_eth_dev *dev); 255 static int ena_close(struct rte_eth_dev *dev); 256 static int ena_dev_reset(struct rte_eth_dev *dev); 257 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); 258 static void ena_rx_queue_release_all(struct rte_eth_dev *dev); 259 static void ena_tx_queue_release_all(struct rte_eth_dev *dev); 260 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 261 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 262 static void ena_rx_queue_release_bufs(struct ena_ring *ring); 263 static void ena_tx_queue_release_bufs(struct ena_ring *ring); 264 static int ena_link_update(struct rte_eth_dev *dev, 265 int wait_to_complete); 266 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring); 267 static void ena_queue_stop(struct ena_ring *ring); 268 static void ena_queue_stop_all(struct rte_eth_dev *dev, 269 enum ena_ring_type ring_type); 270 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring); 271 static int ena_queue_start_all(struct rte_eth_dev *dev, 272 enum ena_ring_type ring_type); 273 static void ena_stats_restart(struct rte_eth_dev *dev); 274 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter); 275 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter); 276 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter); 277 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter); 278 static int ena_infos_get(struct rte_eth_dev *dev, 279 struct rte_eth_dev_info *dev_info); 280 static void ena_control_path_handler(void *cb_arg); 281 static void ena_control_path_poll_handler(void *cb_arg); 282 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg); 283 static void ena_destroy_device(struct rte_eth_dev *eth_dev); 284 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev); 285 static int ena_xstats_get_names(struct rte_eth_dev *dev, 286 struct rte_eth_xstat_name *xstats_names, 287 unsigned int n); 288 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 289 const uint64_t *ids, 290 struct rte_eth_xstat_name *xstats_names, 291 unsigned int size); 292 static int ena_xstats_get(struct rte_eth_dev *dev, 293 struct rte_eth_xstat *stats, 294 unsigned int n); 295 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 296 const uint64_t *ids, 297 uint64_t *values, 298 unsigned int n); 299 static int ena_process_bool_devarg(const char *key, 300 const char *value, 301 void *opaque); 302 static int ena_parse_devargs(struct ena_adapter *adapter, 303 struct rte_devargs *devargs); 304 static void ena_copy_customer_metrics(struct ena_adapter *adapter, 305 uint64_t *buf, 306 size_t buf_size); 307 static void ena_copy_ena_srd_info(struct ena_adapter *adapter, 308 struct ena_stats_srd *srd_info); 309 static int ena_setup_rx_intr(struct rte_eth_dev *dev); 310 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 311 uint16_t queue_id); 312 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 313 uint16_t queue_id); 314 static int ena_configure_aenq(struct ena_adapter *adapter); 315 static int ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, 316 const void *peer); 317 static ena_llq_policy ena_define_llq_hdr_policy(struct ena_adapter *adapter); 318 static bool ena_use_large_llq_hdr(struct ena_adapter *adapter, uint8_t recommended_entry_size); 319 320 static const struct eth_dev_ops ena_dev_ops = { 321 .dev_configure = ena_dev_configure, 322 .dev_infos_get = ena_infos_get, 323 .rx_queue_setup = ena_rx_queue_setup, 324 .tx_queue_setup = ena_tx_queue_setup, 325 .dev_start = ena_start, 326 .dev_stop = ena_stop, 327 .link_update = ena_link_update, 328 .stats_get = ena_stats_get, 329 .xstats_get_names = ena_xstats_get_names, 330 .xstats_get_names_by_id = ena_xstats_get_names_by_id, 331 .xstats_get = ena_xstats_get, 332 .xstats_get_by_id = ena_xstats_get_by_id, 333 .mtu_set = ena_mtu_set, 334 .rx_queue_release = ena_rx_queue_release, 335 .tx_queue_release = ena_tx_queue_release, 336 .dev_close = ena_close, 337 .dev_reset = ena_dev_reset, 338 .reta_update = ena_rss_reta_update, 339 .reta_query = ena_rss_reta_query, 340 .rx_queue_intr_enable = ena_rx_queue_intr_enable, 341 .rx_queue_intr_disable = ena_rx_queue_intr_disable, 342 .rss_hash_update = ena_rss_hash_update, 343 .rss_hash_conf_get = ena_rss_hash_conf_get, 344 .tx_done_cleanup = ena_tx_cleanup, 345 }; 346 347 /********************************************************************* 348 * Multi-Process communication bits 349 *********************************************************************/ 350 /* rte_mp IPC message name */ 351 #define ENA_MP_NAME "net_ena_mp" 352 /* Request timeout in seconds */ 353 #define ENA_MP_REQ_TMO 5 354 355 /** Proxy request type */ 356 enum ena_mp_req { 357 ENA_MP_DEV_STATS_GET, 358 ENA_MP_ENI_STATS_GET, 359 ENA_MP_MTU_SET, 360 ENA_MP_IND_TBL_GET, 361 ENA_MP_IND_TBL_SET, 362 ENA_MP_CUSTOMER_METRICS_GET, 363 ENA_MP_SRD_STATS_GET, 364 }; 365 366 /** Proxy message body. Shared between requests and responses. */ 367 struct ena_mp_body { 368 /* Message type */ 369 enum ena_mp_req type; 370 int port_id; 371 /* Processing result. Set in replies. 0 if message succeeded, negative 372 * error code otherwise. 373 */ 374 int result; 375 union { 376 int mtu; /* For ENA_MP_MTU_SET */ 377 } args; 378 }; 379 380 /** 381 * Initialize IPC message. 382 * 383 * @param[out] msg 384 * Pointer to the message to initialize. 385 * @param[in] type 386 * Message type. 387 * @param[in] port_id 388 * Port ID of target device. 389 * 390 */ 391 static void 392 mp_msg_init(struct rte_mp_msg *msg, enum ena_mp_req type, int port_id) 393 { 394 struct ena_mp_body *body = (struct ena_mp_body *)&msg->param; 395 396 memset(msg, 0, sizeof(*msg)); 397 strlcpy(msg->name, ENA_MP_NAME, sizeof(msg->name)); 398 msg->len_param = sizeof(*body); 399 body->type = type; 400 body->port_id = port_id; 401 } 402 403 /********************************************************************* 404 * Multi-Process communication PMD API 405 *********************************************************************/ 406 /** 407 * Define proxy request descriptor 408 * 409 * Used to define all structures and functions required for proxying a given 410 * function to the primary process including the code to perform to prepare the 411 * request and process the response. 412 * 413 * @param[in] f 414 * Name of the function to proxy 415 * @param[in] t 416 * Message type to use 417 * @param[in] prep 418 * Body of a function to prepare the request in form of a statement 419 * expression. It is passed all the original function arguments along with two 420 * extra ones: 421 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 422 * - struct ena_mp_body *req - body of a request to prepare. 423 * @param[in] proc 424 * Body of a function to process the response in form of a statement 425 * expression. It is passed all the original function arguments along with two 426 * extra ones: 427 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 428 * - struct ena_mp_body *rsp - body of a response to process. 429 * @param ... 430 * Proxied function's arguments 431 * 432 * @note Inside prep and proc any parameters which aren't used should be marked 433 * as such (with ENA_TOUCH or __rte_unused). 434 */ 435 #define ENA_PROXY_DESC(f, t, prep, proc, ...) \ 436 static const enum ena_mp_req mp_type_ ## f = t; \ 437 static const char *mp_name_ ## f = #t; \ 438 static void mp_prep_ ## f(struct ena_adapter *adapter, \ 439 struct ena_mp_body *req, \ 440 __VA_ARGS__) \ 441 { \ 442 prep; \ 443 } \ 444 static void mp_proc_ ## f(struct ena_adapter *adapter, \ 445 struct ena_mp_body *rsp, \ 446 __VA_ARGS__) \ 447 { \ 448 proc; \ 449 } 450 451 /** 452 * Proxy wrapper for calling primary functions in a secondary process. 453 * 454 * Depending on whether called in primary or secondary process, calls the 455 * @p func directly or proxies the call to the primary process via rte_mp IPC. 456 * This macro requires a proxy request descriptor to be defined for @p func 457 * using ENA_PROXY_DESC() macro. 458 * 459 * @param[in/out] a 460 * Device PMD data. Used for sending the message and sharing message results 461 * between primary and secondary. 462 * @param[in] f 463 * Function to proxy. 464 * @param ... 465 * Arguments of @p func. 466 * 467 * @return 468 * - 0: Processing succeeded and response handler was called. 469 * - -EPERM: IPC is unavailable on this platform. This means only primary 470 * process may call the proxied function. 471 * - -EIO: IPC returned error on request send. Inspect rte_errno detailed 472 * error code. 473 * - Negative error code from the proxied function. 474 * 475 * @note This mechanism is geared towards control-path tasks. Avoid calling it 476 * in fast-path unless unbound delays are allowed. This is due to the IPC 477 * mechanism itself (socket based). 478 * @note Due to IPC parameter size limitations the proxy logic shares call 479 * results through the struct ena_adapter shared memory. This makes the 480 * proxy mechanism strictly single-threaded. Therefore be sure to make all 481 * calls to the same proxied function under the same lock. 482 */ 483 #define ENA_PROXY(a, f, ...) \ 484 __extension__ ({ \ 485 struct ena_adapter *_a = (a); \ 486 struct timespec ts = { .tv_sec = ENA_MP_REQ_TMO }; \ 487 struct ena_mp_body *req, *rsp; \ 488 struct rte_mp_reply mp_rep; \ 489 struct rte_mp_msg mp_req; \ 490 int ret; \ 491 \ 492 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { \ 493 ret = f(__VA_ARGS__); \ 494 } else { \ 495 /* Prepare and send request */ \ 496 req = (struct ena_mp_body *)&mp_req.param; \ 497 mp_msg_init(&mp_req, mp_type_ ## f, _a->edev_data->port_id); \ 498 mp_prep_ ## f(_a, req, ## __VA_ARGS__); \ 499 \ 500 ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); \ 501 if (likely(!ret)) { \ 502 RTE_ASSERT(mp_rep.nb_received == 1); \ 503 rsp = (struct ena_mp_body *)&mp_rep.msgs[0].param; \ 504 ret = rsp->result; \ 505 if (ret == 0) { \ 506 mp_proc_##f(_a, rsp, ## __VA_ARGS__); \ 507 } else { \ 508 PMD_DRV_LOG(ERR, \ 509 "%s returned error: %d\n", \ 510 mp_name_ ## f, rsp->result);\ 511 } \ 512 free(mp_rep.msgs); \ 513 } else if (rte_errno == ENOTSUP) { \ 514 PMD_DRV_LOG(ERR, \ 515 "No IPC, can't proxy to primary\n");\ 516 ret = -rte_errno; \ 517 } else { \ 518 PMD_DRV_LOG(ERR, "Request %s failed: %s\n", \ 519 mp_name_ ## f, \ 520 rte_strerror(rte_errno)); \ 521 ret = -EIO; \ 522 } \ 523 } \ 524 ret; \ 525 }) 526 527 /********************************************************************* 528 * Multi-Process communication request descriptors 529 *********************************************************************/ 530 531 ENA_PROXY_DESC(ena_com_get_dev_basic_stats, ENA_MP_DEV_STATS_GET, 532 __extension__ ({ 533 ENA_TOUCH(adapter); 534 ENA_TOUCH(req); 535 ENA_TOUCH(ena_dev); 536 ENA_TOUCH(stats); 537 }), 538 __extension__ ({ 539 ENA_TOUCH(rsp); 540 ENA_TOUCH(ena_dev); 541 if (stats != &adapter->basic_stats) 542 rte_memcpy(stats, &adapter->basic_stats, sizeof(*stats)); 543 }), 544 struct ena_com_dev *ena_dev, struct ena_admin_basic_stats *stats); 545 546 ENA_PROXY_DESC(ena_com_get_eni_stats, ENA_MP_ENI_STATS_GET, 547 __extension__ ({ 548 ENA_TOUCH(adapter); 549 ENA_TOUCH(req); 550 ENA_TOUCH(ena_dev); 551 ENA_TOUCH(stats); 552 }), 553 __extension__ ({ 554 ENA_TOUCH(rsp); 555 ENA_TOUCH(ena_dev); 556 if (stats != (struct ena_admin_eni_stats *)adapter->metrics_stats) 557 rte_memcpy(stats, adapter->metrics_stats, sizeof(*stats)); 558 }), 559 struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats); 560 561 ENA_PROXY_DESC(ena_com_set_dev_mtu, ENA_MP_MTU_SET, 562 __extension__ ({ 563 ENA_TOUCH(adapter); 564 ENA_TOUCH(ena_dev); 565 req->args.mtu = mtu; 566 }), 567 __extension__ ({ 568 ENA_TOUCH(adapter); 569 ENA_TOUCH(rsp); 570 ENA_TOUCH(ena_dev); 571 ENA_TOUCH(mtu); 572 }), 573 struct ena_com_dev *ena_dev, int mtu); 574 575 ENA_PROXY_DESC(ena_com_indirect_table_set, ENA_MP_IND_TBL_SET, 576 __extension__ ({ 577 ENA_TOUCH(adapter); 578 ENA_TOUCH(req); 579 ENA_TOUCH(ena_dev); 580 }), 581 __extension__ ({ 582 ENA_TOUCH(adapter); 583 ENA_TOUCH(rsp); 584 ENA_TOUCH(ena_dev); 585 }), 586 struct ena_com_dev *ena_dev); 587 588 ENA_PROXY_DESC(ena_com_indirect_table_get, ENA_MP_IND_TBL_GET, 589 __extension__ ({ 590 ENA_TOUCH(adapter); 591 ENA_TOUCH(req); 592 ENA_TOUCH(ena_dev); 593 ENA_TOUCH(ind_tbl); 594 }), 595 __extension__ ({ 596 ENA_TOUCH(rsp); 597 ENA_TOUCH(ena_dev); 598 if (ind_tbl != adapter->indirect_table) 599 rte_memcpy(ind_tbl, adapter->indirect_table, 600 sizeof(adapter->indirect_table)); 601 }), 602 struct ena_com_dev *ena_dev, u32 *ind_tbl); 603 604 ENA_PROXY_DESC(ena_com_get_customer_metrics, ENA_MP_CUSTOMER_METRICS_GET, 605 __extension__ ({ 606 ENA_TOUCH(adapter); 607 ENA_TOUCH(req); 608 ENA_TOUCH(ena_dev); 609 ENA_TOUCH(buf); 610 ENA_TOUCH(buf_size); 611 }), 612 __extension__ ({ 613 ENA_TOUCH(rsp); 614 ENA_TOUCH(ena_dev); 615 if (buf != (char *)adapter->metrics_stats) 616 rte_memcpy(buf, adapter->metrics_stats, buf_size); 617 }), 618 struct ena_com_dev *ena_dev, char *buf, size_t buf_size); 619 620 ENA_PROXY_DESC(ena_com_get_ena_srd_info, ENA_MP_SRD_STATS_GET, 621 __extension__ ({ 622 ENA_TOUCH(adapter); 623 ENA_TOUCH(req); 624 ENA_TOUCH(ena_dev); 625 ENA_TOUCH(info); 626 }), 627 __extension__ ({ 628 ENA_TOUCH(rsp); 629 ENA_TOUCH(ena_dev); 630 if ((struct ena_stats_srd *)info != &adapter->srd_stats) 631 rte_memcpy((struct ena_stats_srd *)info, 632 &adapter->srd_stats, 633 sizeof(struct ena_stats_srd)); 634 }), 635 struct ena_com_dev *ena_dev, struct ena_admin_ena_srd_info *info); 636 637 static inline void ena_trigger_reset(struct ena_adapter *adapter, 638 enum ena_regs_reset_reason_types reason) 639 { 640 if (likely(!adapter->trigger_reset)) { 641 adapter->reset_reason = reason; 642 adapter->trigger_reset = true; 643 } 644 } 645 646 static inline void ena_rx_mbuf_prepare(struct ena_ring *rx_ring, 647 struct rte_mbuf *mbuf, 648 struct ena_com_rx_ctx *ena_rx_ctx, 649 bool fill_hash) 650 { 651 struct ena_stats_rx *rx_stats = &rx_ring->rx_stats; 652 uint64_t ol_flags = 0; 653 uint32_t packet_type = 0; 654 655 if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) 656 packet_type |= RTE_PTYPE_L4_TCP; 657 else if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP) 658 packet_type |= RTE_PTYPE_L4_UDP; 659 660 if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) { 661 packet_type |= RTE_PTYPE_L3_IPV4; 662 if (unlikely(ena_rx_ctx->l3_csum_err)) { 663 ++rx_stats->l3_csum_bad; 664 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 665 } else { 666 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 667 } 668 } else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6) { 669 packet_type |= RTE_PTYPE_L3_IPV6; 670 } 671 672 if (!ena_rx_ctx->l4_csum_checked || ena_rx_ctx->frag) { 673 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 674 } else { 675 if (unlikely(ena_rx_ctx->l4_csum_err)) { 676 ++rx_stats->l4_csum_bad; 677 /* 678 * For the L4 Rx checksum offload the HW may indicate 679 * bad checksum although it's valid. Because of that, 680 * we're setting the UNKNOWN flag to let the app 681 * re-verify the checksum. 682 */ 683 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 684 } else { 685 ++rx_stats->l4_csum_good; 686 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 687 } 688 } 689 690 if (fill_hash && 691 likely((packet_type & ENA_PTYPE_HAS_HASH) && !ena_rx_ctx->frag)) { 692 ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 693 mbuf->hash.rss = ena_rx_ctx->hash; 694 } 695 696 mbuf->ol_flags = ol_flags; 697 mbuf->packet_type = packet_type; 698 } 699 700 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, 701 struct ena_com_tx_ctx *ena_tx_ctx, 702 uint64_t queue_offloads, 703 bool disable_meta_caching) 704 { 705 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; 706 707 if ((mbuf->ol_flags & MBUF_OFFLOADS) && 708 (queue_offloads & QUEUE_OFFLOADS)) { 709 /* check if TSO is required */ 710 if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) && 711 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)) { 712 ena_tx_ctx->tso_enable = true; 713 714 ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf); 715 } 716 717 /* check if L3 checksum is needed */ 718 if ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) && 719 (queue_offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) 720 ena_tx_ctx->l3_csum_enable = true; 721 722 if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV6) { 723 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 724 /* For the IPv6 packets, DF always needs to be true. */ 725 ena_tx_ctx->df = 1; 726 } else { 727 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 728 729 /* set don't fragment (DF) flag */ 730 if (mbuf->packet_type & 731 (RTE_PTYPE_L4_NONFRAG 732 | RTE_PTYPE_INNER_L4_NONFRAG)) 733 ena_tx_ctx->df = 1; 734 } 735 736 /* check if L4 checksum is needed */ 737 if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) && 738 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) { 739 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 740 ena_tx_ctx->l4_csum_enable = true; 741 } else if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 742 RTE_MBUF_F_TX_UDP_CKSUM) && 743 (queue_offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) { 744 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 745 ena_tx_ctx->l4_csum_enable = true; 746 } else { 747 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 748 ena_tx_ctx->l4_csum_enable = false; 749 } 750 751 ena_meta->mss = mbuf->tso_segsz; 752 ena_meta->l3_hdr_len = mbuf->l3_len; 753 ena_meta->l3_hdr_offset = mbuf->l2_len; 754 755 ena_tx_ctx->meta_valid = true; 756 } else if (disable_meta_caching) { 757 memset(ena_meta, 0, sizeof(*ena_meta)); 758 ena_tx_ctx->meta_valid = true; 759 } else { 760 ena_tx_ctx->meta_valid = false; 761 } 762 } 763 764 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 765 { 766 struct ena_tx_buffer *tx_info = NULL; 767 768 if (likely(req_id < tx_ring->ring_size)) { 769 tx_info = &tx_ring->tx_buffer_info[req_id]; 770 if (likely(tx_info->mbuf)) 771 return 0; 772 } 773 774 if (tx_info) 775 PMD_TX_LOG(ERR, "tx_info doesn't have valid mbuf. queue %d:%d req_id %u\n", 776 tx_ring->port_id, tx_ring->id, req_id); 777 else 778 PMD_TX_LOG(ERR, "Invalid req_id: %hu in queue %d:%d\n", 779 req_id, tx_ring->port_id, tx_ring->id); 780 781 /* Trigger device reset */ 782 ++tx_ring->tx_stats.bad_req_id; 783 ena_trigger_reset(tx_ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID); 784 return -EFAULT; 785 } 786 787 static void ena_config_host_info(struct ena_com_dev *ena_dev) 788 { 789 struct ena_admin_host_info *host_info; 790 int rc; 791 792 /* Allocate only the host info */ 793 rc = ena_com_allocate_host_info(ena_dev); 794 if (rc) { 795 PMD_DRV_LOG(ERR, "Cannot allocate host info\n"); 796 return; 797 } 798 799 host_info = ena_dev->host_attr.host_info; 800 801 host_info->os_type = ENA_ADMIN_OS_DPDK; 802 host_info->kernel_ver = RTE_VERSION; 803 strlcpy((char *)host_info->kernel_ver_str, rte_version(), 804 sizeof(host_info->kernel_ver_str)); 805 host_info->os_dist = RTE_VERSION; 806 strlcpy((char *)host_info->os_dist_str, rte_version(), 807 sizeof(host_info->os_dist_str)); 808 host_info->driver_version = 809 (DRV_MODULE_VER_MAJOR) | 810 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 811 (DRV_MODULE_VER_SUBMINOR << 812 ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 813 host_info->num_cpus = rte_lcore_count(); 814 815 host_info->driver_supported_features = 816 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 817 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 818 819 rc = ena_com_set_host_attributes(ena_dev); 820 if (rc) { 821 if (rc == -ENA_COM_UNSUPPORTED) 822 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 823 else 824 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 825 826 goto err; 827 } 828 829 return; 830 831 err: 832 ena_com_delete_host_info(ena_dev); 833 } 834 835 /* This function calculates the number of xstats based on the current config */ 836 static unsigned int ena_xstats_calc_num(struct rte_eth_dev_data *data) 837 { 838 struct ena_adapter *adapter = data->dev_private; 839 840 return ENA_STATS_ARRAY_GLOBAL + 841 adapter->metrics_num + 842 ENA_STATS_ARRAY_ENA_SRD + 843 (data->nb_tx_queues * ENA_STATS_ARRAY_TX) + 844 (data->nb_rx_queues * ENA_STATS_ARRAY_RX); 845 } 846 847 static void ena_config_debug_area(struct ena_adapter *adapter) 848 { 849 u32 debug_area_size; 850 int rc, ss_count; 851 852 ss_count = ena_xstats_calc_num(adapter->edev_data); 853 854 /* allocate 32 bytes for each string and 64bit for the value */ 855 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; 856 857 rc = ena_com_allocate_debug_area(&adapter->ena_dev, debug_area_size); 858 if (rc) { 859 PMD_DRV_LOG(ERR, "Cannot allocate debug area\n"); 860 return; 861 } 862 863 rc = ena_com_set_host_attributes(&adapter->ena_dev); 864 if (rc) { 865 if (rc == -ENA_COM_UNSUPPORTED) 866 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 867 else 868 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 869 870 goto err; 871 } 872 873 return; 874 err: 875 ena_com_delete_debug_area(&adapter->ena_dev); 876 } 877 878 static int ena_close(struct rte_eth_dev *dev) 879 { 880 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 881 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 882 struct ena_adapter *adapter = dev->data->dev_private; 883 int ret = 0; 884 int rc; 885 886 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 887 return 0; 888 889 if (adapter->state == ENA_ADAPTER_STATE_RUNNING) 890 ret = ena_stop(dev); 891 adapter->state = ENA_ADAPTER_STATE_CLOSED; 892 893 if (!adapter->control_path_poll_interval) { 894 rte_intr_disable(intr_handle); 895 rc = rte_intr_callback_unregister_sync(intr_handle, ena_control_path_handler, dev); 896 if (unlikely(rc != 0)) 897 PMD_INIT_LOG(ERR, "Failed to unregister interrupt handler\n"); 898 } else { 899 rte_eal_alarm_cancel(ena_control_path_poll_handler, dev); 900 } 901 902 ena_rx_queue_release_all(dev); 903 ena_tx_queue_release_all(dev); 904 905 rte_free(adapter->drv_stats); 906 adapter->drv_stats = NULL; 907 908 /* 909 * MAC is not allocated dynamically. Setting NULL should prevent from 910 * release of the resource in the rte_eth_dev_release_port(). 911 */ 912 dev->data->mac_addrs = NULL; 913 914 return ret; 915 } 916 917 static int 918 ena_dev_reset(struct rte_eth_dev *dev) 919 { 920 int rc = 0; 921 922 /* Cannot release memory in secondary process */ 923 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 924 PMD_DRV_LOG(WARNING, "dev_reset not supported in secondary.\n"); 925 return -EPERM; 926 } 927 928 ena_destroy_device(dev); 929 rc = eth_ena_dev_init(dev); 930 if (rc) 931 PMD_INIT_LOG(CRIT, "Cannot initialize device\n"); 932 933 return rc; 934 } 935 936 static void ena_rx_queue_release_all(struct rte_eth_dev *dev) 937 { 938 int nb_queues = dev->data->nb_rx_queues; 939 int i; 940 941 for (i = 0; i < nb_queues; i++) 942 ena_rx_queue_release(dev, i); 943 } 944 945 static void ena_tx_queue_release_all(struct rte_eth_dev *dev) 946 { 947 int nb_queues = dev->data->nb_tx_queues; 948 int i; 949 950 for (i = 0; i < nb_queues; i++) 951 ena_tx_queue_release(dev, i); 952 } 953 954 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 955 { 956 struct ena_ring *ring = dev->data->rx_queues[qid]; 957 958 /* Free ring resources */ 959 rte_free(ring->rx_buffer_info); 960 ring->rx_buffer_info = NULL; 961 962 rte_free(ring->rx_refill_buffer); 963 ring->rx_refill_buffer = NULL; 964 965 rte_free(ring->empty_rx_reqs); 966 ring->empty_rx_reqs = NULL; 967 968 ring->configured = 0; 969 970 PMD_DRV_LOG(NOTICE, "Rx queue %d:%d released\n", 971 ring->port_id, ring->id); 972 } 973 974 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 975 { 976 struct ena_ring *ring = dev->data->tx_queues[qid]; 977 978 /* Free ring resources */ 979 rte_free(ring->push_buf_intermediate_buf); 980 981 rte_free(ring->tx_buffer_info); 982 983 rte_free(ring->empty_tx_reqs); 984 985 ring->empty_tx_reqs = NULL; 986 ring->tx_buffer_info = NULL; 987 ring->push_buf_intermediate_buf = NULL; 988 989 ring->configured = 0; 990 991 PMD_DRV_LOG(NOTICE, "Tx queue %d:%d released\n", 992 ring->port_id, ring->id); 993 } 994 995 static void ena_rx_queue_release_bufs(struct ena_ring *ring) 996 { 997 unsigned int i; 998 999 for (i = 0; i < ring->ring_size; ++i) { 1000 struct ena_rx_buffer *rx_info = &ring->rx_buffer_info[i]; 1001 if (rx_info->mbuf) { 1002 rte_mbuf_raw_free(rx_info->mbuf); 1003 rx_info->mbuf = NULL; 1004 } 1005 } 1006 } 1007 1008 static void ena_tx_queue_release_bufs(struct ena_ring *ring) 1009 { 1010 unsigned int i; 1011 1012 for (i = 0; i < ring->ring_size; ++i) { 1013 struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i]; 1014 1015 if (tx_buf->mbuf) { 1016 rte_pktmbuf_free(tx_buf->mbuf); 1017 tx_buf->mbuf = NULL; 1018 } 1019 } 1020 } 1021 1022 static int ena_link_update(struct rte_eth_dev *dev, 1023 __rte_unused int wait_to_complete) 1024 { 1025 struct rte_eth_link *link = &dev->data->dev_link; 1026 struct ena_adapter *adapter = dev->data->dev_private; 1027 1028 link->link_status = adapter->link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN; 1029 link->link_speed = RTE_ETH_SPEED_NUM_NONE; 1030 link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX; 1031 1032 return 0; 1033 } 1034 1035 static int ena_queue_start_all(struct rte_eth_dev *dev, 1036 enum ena_ring_type ring_type) 1037 { 1038 struct ena_adapter *adapter = dev->data->dev_private; 1039 struct ena_ring *queues = NULL; 1040 int nb_queues; 1041 int i = 0; 1042 int rc = 0; 1043 1044 if (ring_type == ENA_RING_TYPE_RX) { 1045 queues = adapter->rx_ring; 1046 nb_queues = dev->data->nb_rx_queues; 1047 } else { 1048 queues = adapter->tx_ring; 1049 nb_queues = dev->data->nb_tx_queues; 1050 } 1051 for (i = 0; i < nb_queues; i++) { 1052 if (queues[i].configured) { 1053 if (ring_type == ENA_RING_TYPE_RX) { 1054 ena_assert_msg( 1055 dev->data->rx_queues[i] == &queues[i], 1056 "Inconsistent state of Rx queues\n"); 1057 } else { 1058 ena_assert_msg( 1059 dev->data->tx_queues[i] == &queues[i], 1060 "Inconsistent state of Tx queues\n"); 1061 } 1062 1063 rc = ena_queue_start(dev, &queues[i]); 1064 1065 if (rc) { 1066 PMD_INIT_LOG(ERR, 1067 "Failed to start queue[%d] of type(%d)\n", 1068 i, ring_type); 1069 goto err; 1070 } 1071 } 1072 } 1073 1074 return 0; 1075 1076 err: 1077 while (i--) 1078 if (queues[i].configured) 1079 ena_queue_stop(&queues[i]); 1080 1081 return rc; 1082 } 1083 1084 static int 1085 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx, 1086 bool use_large_llq_hdr) 1087 { 1088 struct ena_admin_feature_llq_desc *dev = &ctx->get_feat_ctx->llq; 1089 struct ena_com_dev *ena_dev = ctx->ena_dev; 1090 uint32_t max_tx_queue_size; 1091 uint32_t max_rx_queue_size; 1092 1093 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1094 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1095 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 1096 max_rx_queue_size = RTE_MIN(max_queue_ext->max_rx_cq_depth, 1097 max_queue_ext->max_rx_sq_depth); 1098 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 1099 1100 if (ena_dev->tx_mem_queue_type == 1101 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1102 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1103 dev->max_llq_depth); 1104 } else { 1105 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1106 max_queue_ext->max_tx_sq_depth); 1107 } 1108 1109 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1110 max_queue_ext->max_per_packet_rx_descs); 1111 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1112 max_queue_ext->max_per_packet_tx_descs); 1113 } else { 1114 struct ena_admin_queue_feature_desc *max_queues = 1115 &ctx->get_feat_ctx->max_queues; 1116 max_rx_queue_size = RTE_MIN(max_queues->max_cq_depth, 1117 max_queues->max_sq_depth); 1118 max_tx_queue_size = max_queues->max_cq_depth; 1119 1120 if (ena_dev->tx_mem_queue_type == 1121 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1122 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1123 dev->max_llq_depth); 1124 } else { 1125 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1126 max_queues->max_sq_depth); 1127 } 1128 1129 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1130 max_queues->max_packet_rx_descs); 1131 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1132 max_queues->max_packet_tx_descs); 1133 } 1134 1135 /* Round down to the nearest power of 2 */ 1136 max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size); 1137 max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size); 1138 1139 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && use_large_llq_hdr) { 1140 /* intersection between driver configuration and device capabilities */ 1141 if (dev->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) { 1142 if (dev->max_wide_llq_depth == MAX_WIDE_LLQ_DEPTH_UNSUPPORTED) { 1143 /* Devices that do not support the double-sized ENA memory BAR will 1144 * report max_wide_llq_depth as 0. In such case, driver halves the 1145 * queue depth when working in large llq policy. 1146 */ 1147 max_tx_queue_size >>= 1; 1148 PMD_INIT_LOG(INFO, 1149 "large LLQ policy requires limiting Tx queue size to %u entries\n", 1150 max_tx_queue_size); 1151 } else if (dev->max_wide_llq_depth < max_tx_queue_size) { 1152 /* In case the queue depth that the driver calculated exceeds 1153 * the maximal value that the device allows, it will be limited 1154 * to that maximal value 1155 */ 1156 max_tx_queue_size = dev->max_wide_llq_depth; 1157 } 1158 } else { 1159 PMD_INIT_LOG(INFO, 1160 "Forcing large LLQ headers failed since device lacks this support\n"); 1161 } 1162 } 1163 1164 if (unlikely(max_rx_queue_size == 0 || max_tx_queue_size == 0)) { 1165 PMD_INIT_LOG(ERR, "Invalid queue size\n"); 1166 return -EFAULT; 1167 } 1168 1169 ctx->max_tx_queue_size = max_tx_queue_size; 1170 ctx->max_rx_queue_size = max_rx_queue_size; 1171 1172 PMD_DRV_LOG(INFO, "tx queue size %u\n", max_tx_queue_size); 1173 return 0; 1174 } 1175 1176 static void ena_stats_restart(struct rte_eth_dev *dev) 1177 { 1178 struct ena_adapter *adapter = dev->data->dev_private; 1179 1180 rte_atomic64_init(&adapter->drv_stats->ierrors); 1181 rte_atomic64_init(&adapter->drv_stats->oerrors); 1182 rte_atomic64_init(&adapter->drv_stats->rx_nombuf); 1183 adapter->drv_stats->rx_drops = 0; 1184 } 1185 1186 static int ena_stats_get(struct rte_eth_dev *dev, 1187 struct rte_eth_stats *stats) 1188 { 1189 struct ena_admin_basic_stats ena_stats; 1190 struct ena_adapter *adapter = dev->data->dev_private; 1191 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1192 int rc; 1193 int i; 1194 int max_rings_stats; 1195 1196 memset(&ena_stats, 0, sizeof(ena_stats)); 1197 1198 rte_spinlock_lock(&adapter->admin_lock); 1199 rc = ENA_PROXY(adapter, ena_com_get_dev_basic_stats, ena_dev, 1200 &ena_stats); 1201 rte_spinlock_unlock(&adapter->admin_lock); 1202 if (unlikely(rc)) { 1203 PMD_DRV_LOG(ERR, "Could not retrieve statistics from ENA\n"); 1204 return rc; 1205 } 1206 1207 /* Set of basic statistics from ENA */ 1208 stats->ipackets = __MERGE_64B_H_L(ena_stats.rx_pkts_high, 1209 ena_stats.rx_pkts_low); 1210 stats->opackets = __MERGE_64B_H_L(ena_stats.tx_pkts_high, 1211 ena_stats.tx_pkts_low); 1212 stats->ibytes = __MERGE_64B_H_L(ena_stats.rx_bytes_high, 1213 ena_stats.rx_bytes_low); 1214 stats->obytes = __MERGE_64B_H_L(ena_stats.tx_bytes_high, 1215 ena_stats.tx_bytes_low); 1216 1217 /* Driver related stats */ 1218 stats->imissed = adapter->drv_stats->rx_drops; 1219 stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors); 1220 stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors); 1221 stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf); 1222 1223 max_rings_stats = RTE_MIN(dev->data->nb_rx_queues, 1224 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1225 for (i = 0; i < max_rings_stats; ++i) { 1226 struct ena_stats_rx *rx_stats = &adapter->rx_ring[i].rx_stats; 1227 1228 stats->q_ibytes[i] = rx_stats->bytes; 1229 stats->q_ipackets[i] = rx_stats->cnt; 1230 stats->q_errors[i] = rx_stats->bad_desc_num + 1231 rx_stats->bad_req_id; 1232 } 1233 1234 max_rings_stats = RTE_MIN(dev->data->nb_tx_queues, 1235 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1236 for (i = 0; i < max_rings_stats; ++i) { 1237 struct ena_stats_tx *tx_stats = &adapter->tx_ring[i].tx_stats; 1238 1239 stats->q_obytes[i] = tx_stats->bytes; 1240 stats->q_opackets[i] = tx_stats->cnt; 1241 } 1242 1243 return 0; 1244 } 1245 1246 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 1247 { 1248 struct ena_adapter *adapter; 1249 struct ena_com_dev *ena_dev; 1250 int rc = 0; 1251 1252 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 1253 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 1254 adapter = dev->data->dev_private; 1255 1256 ena_dev = &adapter->ena_dev; 1257 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 1258 1259 rc = ENA_PROXY(adapter, ena_com_set_dev_mtu, ena_dev, mtu); 1260 if (rc) 1261 PMD_DRV_LOG(ERR, "Could not set MTU: %d\n", mtu); 1262 else 1263 PMD_DRV_LOG(NOTICE, "MTU set to: %d\n", mtu); 1264 1265 return rc; 1266 } 1267 1268 static int ena_start(struct rte_eth_dev *dev) 1269 { 1270 struct ena_adapter *adapter = dev->data->dev_private; 1271 uint64_t ticks; 1272 int rc = 0; 1273 uint16_t i; 1274 1275 /* Cannot allocate memory in secondary process */ 1276 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1277 PMD_DRV_LOG(WARNING, "dev_start not supported in secondary.\n"); 1278 return -EPERM; 1279 } 1280 1281 rc = ena_setup_rx_intr(dev); 1282 if (rc) 1283 return rc; 1284 1285 rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX); 1286 if (rc) 1287 return rc; 1288 1289 rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX); 1290 if (rc) 1291 goto err_start_tx; 1292 1293 if (adapter->edev_data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { 1294 rc = ena_rss_configure(adapter); 1295 if (rc) 1296 goto err_rss_init; 1297 } 1298 1299 ena_stats_restart(dev); 1300 1301 adapter->timestamp_wd = rte_get_timer_cycles(); 1302 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT; 1303 1304 ticks = rte_get_timer_hz(); 1305 rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(), 1306 ena_timer_wd_callback, dev); 1307 1308 ++adapter->dev_stats.dev_start; 1309 adapter->state = ENA_ADAPTER_STATE_RUNNING; 1310 1311 for (i = 0; i < dev->data->nb_rx_queues; i++) 1312 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1313 for (i = 0; i < dev->data->nb_tx_queues; i++) 1314 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 1315 1316 return 0; 1317 1318 err_rss_init: 1319 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1320 err_start_tx: 1321 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1322 return rc; 1323 } 1324 1325 static int ena_stop(struct rte_eth_dev *dev) 1326 { 1327 struct ena_adapter *adapter = dev->data->dev_private; 1328 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1329 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1330 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1331 uint16_t i; 1332 int rc; 1333 1334 /* Cannot free memory in secondary process */ 1335 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1336 PMD_DRV_LOG(WARNING, "dev_stop not supported in secondary.\n"); 1337 return -EPERM; 1338 } 1339 1340 rte_timer_stop_sync(&adapter->timer_wd); 1341 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1342 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1343 1344 if (adapter->trigger_reset) { 1345 rc = ena_com_dev_reset(ena_dev, adapter->reset_reason); 1346 if (rc) 1347 PMD_DRV_LOG(ERR, "Device reset failed, rc: %d\n", rc); 1348 } 1349 1350 rte_intr_disable(intr_handle); 1351 1352 rte_intr_efd_disable(intr_handle); 1353 1354 /* Cleanup vector list */ 1355 rte_intr_vec_list_free(intr_handle); 1356 1357 rte_intr_enable(intr_handle); 1358 1359 ++adapter->dev_stats.dev_stop; 1360 adapter->state = ENA_ADAPTER_STATE_STOPPED; 1361 dev->data->dev_started = 0; 1362 1363 for (i = 0; i < dev->data->nb_rx_queues; i++) 1364 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1365 for (i = 0; i < dev->data->nb_tx_queues; i++) 1366 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 1367 1368 return 0; 1369 } 1370 1371 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring) 1372 { 1373 struct ena_adapter *adapter = ring->adapter; 1374 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1375 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1376 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1377 struct ena_com_create_io_ctx ctx = 1378 /* policy set to _HOST just to satisfy icc compiler */ 1379 { ENA_ADMIN_PLACEMENT_POLICY_HOST, 1380 0, 0, 0, 0, 0 }; 1381 uint16_t ena_qid; 1382 unsigned int i; 1383 int rc; 1384 1385 ctx.msix_vector = -1; 1386 if (ring->type == ENA_RING_TYPE_TX) { 1387 ena_qid = ENA_IO_TXQ_IDX(ring->id); 1388 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1389 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1390 for (i = 0; i < ring->ring_size; i++) 1391 ring->empty_tx_reqs[i] = i; 1392 } else { 1393 ena_qid = ENA_IO_RXQ_IDX(ring->id); 1394 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1395 if (rte_intr_dp_is_en(intr_handle)) 1396 ctx.msix_vector = 1397 rte_intr_vec_list_index_get(intr_handle, 1398 ring->id); 1399 1400 for (i = 0; i < ring->ring_size; i++) 1401 ring->empty_rx_reqs[i] = i; 1402 } 1403 ctx.queue_size = ring->ring_size; 1404 ctx.qid = ena_qid; 1405 ctx.numa_node = ring->numa_socket_id; 1406 1407 rc = ena_com_create_io_queue(ena_dev, &ctx); 1408 if (rc) { 1409 PMD_DRV_LOG(ERR, 1410 "Failed to create IO queue[%d] (qid:%d), rc: %d\n", 1411 ring->id, ena_qid, rc); 1412 return rc; 1413 } 1414 1415 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1416 &ring->ena_com_io_sq, 1417 &ring->ena_com_io_cq); 1418 if (rc) { 1419 PMD_DRV_LOG(ERR, 1420 "Failed to get IO queue[%d] handlers, rc: %d\n", 1421 ring->id, rc); 1422 ena_com_destroy_io_queue(ena_dev, ena_qid); 1423 return rc; 1424 } 1425 1426 if (ring->type == ENA_RING_TYPE_TX) 1427 ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node); 1428 1429 /* Start with Rx interrupts being masked. */ 1430 if (ring->type == ENA_RING_TYPE_RX && rte_intr_dp_is_en(intr_handle)) 1431 ena_rx_queue_intr_disable(dev, ring->id); 1432 1433 return 0; 1434 } 1435 1436 static void ena_queue_stop(struct ena_ring *ring) 1437 { 1438 struct ena_com_dev *ena_dev = &ring->adapter->ena_dev; 1439 1440 if (ring->type == ENA_RING_TYPE_RX) { 1441 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id)); 1442 ena_rx_queue_release_bufs(ring); 1443 } else { 1444 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id)); 1445 ena_tx_queue_release_bufs(ring); 1446 } 1447 } 1448 1449 static void ena_queue_stop_all(struct rte_eth_dev *dev, 1450 enum ena_ring_type ring_type) 1451 { 1452 struct ena_adapter *adapter = dev->data->dev_private; 1453 struct ena_ring *queues = NULL; 1454 uint16_t nb_queues, i; 1455 1456 if (ring_type == ENA_RING_TYPE_RX) { 1457 queues = adapter->rx_ring; 1458 nb_queues = dev->data->nb_rx_queues; 1459 } else { 1460 queues = adapter->tx_ring; 1461 nb_queues = dev->data->nb_tx_queues; 1462 } 1463 1464 for (i = 0; i < nb_queues; ++i) 1465 if (queues[i].configured) 1466 ena_queue_stop(&queues[i]); 1467 } 1468 1469 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring) 1470 { 1471 int rc, bufs_num; 1472 1473 ena_assert_msg(ring->configured == 1, 1474 "Trying to start unconfigured queue\n"); 1475 1476 rc = ena_create_io_queue(dev, ring); 1477 if (rc) { 1478 PMD_INIT_LOG(ERR, "Failed to create IO queue\n"); 1479 return rc; 1480 } 1481 1482 ring->next_to_clean = 0; 1483 ring->next_to_use = 0; 1484 1485 if (ring->type == ENA_RING_TYPE_TX) { 1486 ring->tx_stats.available_desc = 1487 ena_com_free_q_entries(ring->ena_com_io_sq); 1488 return 0; 1489 } 1490 1491 bufs_num = ring->ring_size - 1; 1492 rc = ena_populate_rx_queue(ring, bufs_num); 1493 if (rc != bufs_num) { 1494 ena_com_destroy_io_queue(&ring->adapter->ena_dev, 1495 ENA_IO_RXQ_IDX(ring->id)); 1496 PMD_INIT_LOG(ERR, "Failed to populate Rx ring\n"); 1497 return ENA_COM_FAULT; 1498 } 1499 /* Flush per-core RX buffers pools cache as they can be used on other 1500 * cores as well. 1501 */ 1502 rte_mempool_cache_flush(NULL, ring->mb_pool); 1503 1504 return 0; 1505 } 1506 1507 static int ena_tx_queue_setup(struct rte_eth_dev *dev, 1508 uint16_t queue_idx, 1509 uint16_t nb_desc, 1510 unsigned int socket_id, 1511 const struct rte_eth_txconf *tx_conf) 1512 { 1513 struct ena_ring *txq = NULL; 1514 struct ena_adapter *adapter = dev->data->dev_private; 1515 unsigned int i; 1516 uint16_t dyn_thresh; 1517 1518 txq = &adapter->tx_ring[queue_idx]; 1519 1520 if (txq->configured) { 1521 PMD_DRV_LOG(CRIT, 1522 "API violation. Queue[%d] is already configured\n", 1523 queue_idx); 1524 return ENA_COM_FAULT; 1525 } 1526 1527 if (!rte_is_power_of_2(nb_desc)) { 1528 PMD_DRV_LOG(ERR, 1529 "Unsupported size of Tx queue: %d is not a power of 2.\n", 1530 nb_desc); 1531 return -EINVAL; 1532 } 1533 1534 if (nb_desc > adapter->max_tx_ring_size) { 1535 PMD_DRV_LOG(ERR, 1536 "Unsupported size of Tx queue (max size: %d)\n", 1537 adapter->max_tx_ring_size); 1538 return -EINVAL; 1539 } 1540 1541 txq->port_id = dev->data->port_id; 1542 txq->next_to_clean = 0; 1543 txq->next_to_use = 0; 1544 txq->ring_size = nb_desc; 1545 txq->size_mask = nb_desc - 1; 1546 txq->numa_socket_id = socket_id; 1547 txq->pkts_without_db = false; 1548 txq->last_cleanup_ticks = 0; 1549 1550 txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info", 1551 sizeof(struct ena_tx_buffer) * txq->ring_size, 1552 RTE_CACHE_LINE_SIZE, 1553 socket_id); 1554 if (!txq->tx_buffer_info) { 1555 PMD_DRV_LOG(ERR, 1556 "Failed to allocate memory for Tx buffer info\n"); 1557 return -ENOMEM; 1558 } 1559 1560 txq->empty_tx_reqs = rte_zmalloc_socket("txq->empty_tx_reqs", 1561 sizeof(uint16_t) * txq->ring_size, 1562 RTE_CACHE_LINE_SIZE, 1563 socket_id); 1564 if (!txq->empty_tx_reqs) { 1565 PMD_DRV_LOG(ERR, 1566 "Failed to allocate memory for empty Tx requests\n"); 1567 rte_free(txq->tx_buffer_info); 1568 return -ENOMEM; 1569 } 1570 1571 txq->push_buf_intermediate_buf = 1572 rte_zmalloc_socket("txq->push_buf_intermediate_buf", 1573 txq->tx_max_header_size, 1574 RTE_CACHE_LINE_SIZE, 1575 socket_id); 1576 if (!txq->push_buf_intermediate_buf) { 1577 PMD_DRV_LOG(ERR, "Failed to alloc push buffer for LLQ\n"); 1578 rte_free(txq->tx_buffer_info); 1579 rte_free(txq->empty_tx_reqs); 1580 return -ENOMEM; 1581 } 1582 1583 for (i = 0; i < txq->ring_size; i++) 1584 txq->empty_tx_reqs[i] = i; 1585 1586 txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads; 1587 1588 /* Check if caller provided the Tx cleanup threshold value. */ 1589 if (tx_conf->tx_free_thresh != 0) { 1590 txq->tx_free_thresh = tx_conf->tx_free_thresh; 1591 } else { 1592 dyn_thresh = txq->ring_size - 1593 txq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1594 txq->tx_free_thresh = RTE_MAX(dyn_thresh, 1595 txq->ring_size - ENA_REFILL_THRESH_PACKET); 1596 } 1597 1598 txq->missing_tx_completion_threshold = 1599 RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP); 1600 1601 /* Store pointer to this queue in upper layer */ 1602 txq->configured = 1; 1603 dev->data->tx_queues[queue_idx] = txq; 1604 1605 return 0; 1606 } 1607 1608 static int ena_rx_queue_setup(struct rte_eth_dev *dev, 1609 uint16_t queue_idx, 1610 uint16_t nb_desc, 1611 unsigned int socket_id, 1612 const struct rte_eth_rxconf *rx_conf, 1613 struct rte_mempool *mp) 1614 { 1615 struct ena_adapter *adapter = dev->data->dev_private; 1616 struct ena_ring *rxq = NULL; 1617 size_t buffer_size; 1618 int i; 1619 uint16_t dyn_thresh; 1620 1621 rxq = &adapter->rx_ring[queue_idx]; 1622 if (rxq->configured) { 1623 PMD_DRV_LOG(CRIT, 1624 "API violation. Queue[%d] is already configured\n", 1625 queue_idx); 1626 return ENA_COM_FAULT; 1627 } 1628 1629 if (!rte_is_power_of_2(nb_desc)) { 1630 PMD_DRV_LOG(ERR, 1631 "Unsupported size of Rx queue: %d is not a power of 2.\n", 1632 nb_desc); 1633 return -EINVAL; 1634 } 1635 1636 if (nb_desc > adapter->max_rx_ring_size) { 1637 PMD_DRV_LOG(ERR, 1638 "Unsupported size of Rx queue (max size: %d)\n", 1639 adapter->max_rx_ring_size); 1640 return -EINVAL; 1641 } 1642 1643 /* ENA isn't supporting buffers smaller than 1400 bytes */ 1644 buffer_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 1645 if (buffer_size < ENA_RX_BUF_MIN_SIZE) { 1646 PMD_DRV_LOG(ERR, 1647 "Unsupported size of Rx buffer: %zu (min size: %d)\n", 1648 buffer_size, ENA_RX_BUF_MIN_SIZE); 1649 return -EINVAL; 1650 } 1651 1652 rxq->port_id = dev->data->port_id; 1653 rxq->next_to_clean = 0; 1654 rxq->next_to_use = 0; 1655 rxq->ring_size = nb_desc; 1656 rxq->size_mask = nb_desc - 1; 1657 rxq->numa_socket_id = socket_id; 1658 rxq->mb_pool = mp; 1659 1660 rxq->rx_buffer_info = rte_zmalloc_socket("rxq->buffer_info", 1661 sizeof(struct ena_rx_buffer) * nb_desc, 1662 RTE_CACHE_LINE_SIZE, 1663 socket_id); 1664 if (!rxq->rx_buffer_info) { 1665 PMD_DRV_LOG(ERR, 1666 "Failed to allocate memory for Rx buffer info\n"); 1667 return -ENOMEM; 1668 } 1669 1670 rxq->rx_refill_buffer = rte_zmalloc_socket("rxq->rx_refill_buffer", 1671 sizeof(struct rte_mbuf *) * nb_desc, 1672 RTE_CACHE_LINE_SIZE, 1673 socket_id); 1674 if (!rxq->rx_refill_buffer) { 1675 PMD_DRV_LOG(ERR, 1676 "Failed to allocate memory for Rx refill buffer\n"); 1677 rte_free(rxq->rx_buffer_info); 1678 rxq->rx_buffer_info = NULL; 1679 return -ENOMEM; 1680 } 1681 1682 rxq->empty_rx_reqs = rte_zmalloc_socket("rxq->empty_rx_reqs", 1683 sizeof(uint16_t) * nb_desc, 1684 RTE_CACHE_LINE_SIZE, 1685 socket_id); 1686 if (!rxq->empty_rx_reqs) { 1687 PMD_DRV_LOG(ERR, 1688 "Failed to allocate memory for empty Rx requests\n"); 1689 rte_free(rxq->rx_buffer_info); 1690 rxq->rx_buffer_info = NULL; 1691 rte_free(rxq->rx_refill_buffer); 1692 rxq->rx_refill_buffer = NULL; 1693 return -ENOMEM; 1694 } 1695 1696 for (i = 0; i < nb_desc; i++) 1697 rxq->empty_rx_reqs[i] = i; 1698 1699 rxq->offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; 1700 1701 if (rx_conf->rx_free_thresh != 0) { 1702 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 1703 } else { 1704 dyn_thresh = rxq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1705 rxq->rx_free_thresh = RTE_MIN(dyn_thresh, 1706 (uint16_t)(ENA_REFILL_THRESH_PACKET)); 1707 } 1708 1709 /* Store pointer to this queue in upper layer */ 1710 rxq->configured = 1; 1711 dev->data->rx_queues[queue_idx] = rxq; 1712 1713 return 0; 1714 } 1715 1716 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 1717 struct rte_mbuf *mbuf, uint16_t id) 1718 { 1719 struct ena_com_buf ebuf; 1720 int rc; 1721 1722 /* prepare physical address for DMA transaction */ 1723 ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM; 1724 ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM; 1725 1726 /* pass resource to device */ 1727 rc = ena_com_add_single_rx_desc(io_sq, &ebuf, id); 1728 if (unlikely(rc != 0)) 1729 PMD_RX_LOG(WARNING, "Failed adding Rx desc\n"); 1730 1731 return rc; 1732 } 1733 1734 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) 1735 { 1736 unsigned int i; 1737 int rc; 1738 uint16_t next_to_use = rxq->next_to_use; 1739 uint16_t req_id; 1740 #ifdef RTE_ETHDEV_DEBUG_RX 1741 uint16_t in_use; 1742 #endif 1743 struct rte_mbuf **mbufs = rxq->rx_refill_buffer; 1744 1745 if (unlikely(!count)) 1746 return 0; 1747 1748 #ifdef RTE_ETHDEV_DEBUG_RX 1749 in_use = rxq->ring_size - 1 - 1750 ena_com_free_q_entries(rxq->ena_com_io_sq); 1751 if (unlikely((in_use + count) >= rxq->ring_size)) 1752 PMD_RX_LOG(ERR, "Bad Rx ring state\n"); 1753 #endif 1754 1755 /* get resources for incoming packets */ 1756 rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count); 1757 if (unlikely(rc < 0)) { 1758 rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf); 1759 ++rxq->rx_stats.mbuf_alloc_fail; 1760 PMD_RX_LOG(DEBUG, "There are not enough free buffers\n"); 1761 return 0; 1762 } 1763 1764 for (i = 0; i < count; i++) { 1765 struct rte_mbuf *mbuf = mbufs[i]; 1766 struct ena_rx_buffer *rx_info; 1767 1768 if (likely((i + 4) < count)) 1769 rte_prefetch0(mbufs[i + 4]); 1770 1771 req_id = rxq->empty_rx_reqs[next_to_use]; 1772 rx_info = &rxq->rx_buffer_info[req_id]; 1773 1774 rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id); 1775 if (unlikely(rc != 0)) 1776 break; 1777 1778 rx_info->mbuf = mbuf; 1779 next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, rxq->size_mask); 1780 } 1781 1782 if (unlikely(i < count)) { 1783 PMD_RX_LOG(WARNING, 1784 "Refilled Rx queue[%d] with only %d/%d buffers\n", 1785 rxq->id, i, count); 1786 rte_pktmbuf_free_bulk(&mbufs[i], count - i); 1787 ++rxq->rx_stats.refill_partial; 1788 } 1789 1790 /* When we submitted free resources to device... */ 1791 if (likely(i > 0)) { 1792 /* ...let HW know that it can fill buffers with data. */ 1793 ena_com_write_sq_doorbell(rxq->ena_com_io_sq); 1794 1795 rxq->next_to_use = next_to_use; 1796 } 1797 1798 return i; 1799 } 1800 1801 static size_t ena_get_metrics_entries(struct ena_adapter *adapter) 1802 { 1803 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1804 size_t metrics_num = 0; 1805 1806 if (ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) 1807 metrics_num = ENA_STATS_ARRAY_METRICS; 1808 else if (ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) 1809 metrics_num = ENA_STATS_ARRAY_METRICS_LEGACY; 1810 PMD_DRV_LOG(NOTICE, "0x%x customer metrics are supported\n", (unsigned int)metrics_num); 1811 if (metrics_num > ENA_MAX_CUSTOMER_METRICS) { 1812 PMD_DRV_LOG(NOTICE, "Not enough space for the requested customer metrics\n"); 1813 metrics_num = ENA_MAX_CUSTOMER_METRICS; 1814 } 1815 return metrics_num; 1816 } 1817 1818 static int ena_device_init(struct ena_adapter *adapter, 1819 struct rte_pci_device *pdev, 1820 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1821 { 1822 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1823 uint32_t aenq_groups; 1824 int rc; 1825 bool readless_supported; 1826 1827 /* Initialize mmio registers */ 1828 rc = ena_com_mmio_reg_read_request_init(ena_dev); 1829 if (rc) { 1830 PMD_DRV_LOG(ERR, "Failed to init MMIO read less\n"); 1831 return rc; 1832 } 1833 1834 /* The PCIe configuration space revision id indicate if mmio reg 1835 * read is disabled. 1836 */ 1837 readless_supported = !(pdev->id.class_id & ENA_MMIO_DISABLE_REG_READ); 1838 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 1839 1840 /* reset device */ 1841 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 1842 if (rc) { 1843 PMD_DRV_LOG(ERR, "Cannot reset device\n"); 1844 goto err_mmio_read_less; 1845 } 1846 1847 /* check FW version */ 1848 rc = ena_com_validate_version(ena_dev); 1849 if (rc) { 1850 PMD_DRV_LOG(ERR, "Device version is too low\n"); 1851 goto err_mmio_read_less; 1852 } 1853 1854 ena_dev->dma_addr_bits = ena_com_get_dma_width(ena_dev); 1855 1856 /* ENA device administration layer init */ 1857 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 1858 if (rc) { 1859 PMD_DRV_LOG(ERR, 1860 "Cannot initialize ENA admin queue\n"); 1861 goto err_mmio_read_less; 1862 } 1863 1864 /* To enable the msix interrupts the driver needs to know the number 1865 * of queues. So the driver uses polling mode to retrieve this 1866 * information. 1867 */ 1868 ena_com_set_admin_polling_mode(ena_dev, true); 1869 1870 ena_config_host_info(ena_dev); 1871 1872 /* Get Device Attributes and features */ 1873 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 1874 if (rc) { 1875 PMD_DRV_LOG(ERR, 1876 "Cannot get attribute for ENA device, rc: %d\n", rc); 1877 goto err_admin_init; 1878 } 1879 1880 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 1881 BIT(ENA_ADMIN_NOTIFICATION) | 1882 BIT(ENA_ADMIN_KEEP_ALIVE) | 1883 BIT(ENA_ADMIN_FATAL_ERROR) | 1884 BIT(ENA_ADMIN_WARNING) | 1885 BIT(ENA_ADMIN_CONF_NOTIFICATIONS); 1886 1887 aenq_groups &= get_feat_ctx->aenq.supported_groups; 1888 1889 adapter->all_aenq_groups = aenq_groups; 1890 /* The actual supported number of metrics is negotiated with the device at runtime */ 1891 adapter->metrics_num = ena_get_metrics_entries(adapter); 1892 1893 return 0; 1894 1895 err_admin_init: 1896 ena_com_admin_destroy(ena_dev); 1897 1898 err_mmio_read_less: 1899 ena_com_mmio_reg_read_request_destroy(ena_dev); 1900 1901 return rc; 1902 } 1903 1904 static void ena_control_path_handler(void *cb_arg) 1905 { 1906 struct rte_eth_dev *dev = cb_arg; 1907 struct ena_adapter *adapter = dev->data->dev_private; 1908 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1909 1910 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) { 1911 ena_com_admin_q_comp_intr_handler(ena_dev); 1912 ena_com_aenq_intr_handler(ena_dev, dev); 1913 } 1914 } 1915 1916 static void ena_control_path_poll_handler(void *cb_arg) 1917 { 1918 struct rte_eth_dev *dev = cb_arg; 1919 struct ena_adapter *adapter = dev->data->dev_private; 1920 int rc; 1921 1922 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) { 1923 ena_control_path_handler(cb_arg); 1924 rc = rte_eal_alarm_set(adapter->control_path_poll_interval, 1925 ena_control_path_poll_handler, cb_arg); 1926 if (unlikely(rc != 0)) { 1927 PMD_DRV_LOG(ERR, "Failed to retrigger control path alarm\n"); 1928 ena_trigger_reset(adapter, ENA_REGS_RESET_GENERIC); 1929 } 1930 } 1931 } 1932 1933 static void check_for_missing_keep_alive(struct ena_adapter *adapter) 1934 { 1935 if (!(adapter->active_aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE))) 1936 return; 1937 1938 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 1939 return; 1940 1941 if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >= 1942 adapter->keep_alive_timeout)) { 1943 PMD_DRV_LOG(ERR, "Keep alive timeout\n"); 1944 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); 1945 ++adapter->dev_stats.wd_expired; 1946 } 1947 } 1948 1949 /* Check if admin queue is enabled */ 1950 static void check_for_admin_com_state(struct ena_adapter *adapter) 1951 { 1952 if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) { 1953 PMD_DRV_LOG(ERR, "ENA admin queue is not in running state\n"); 1954 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO); 1955 } 1956 } 1957 1958 static int check_for_tx_completion_in_queue(struct ena_adapter *adapter, 1959 struct ena_ring *tx_ring) 1960 { 1961 struct ena_tx_buffer *tx_buf; 1962 uint64_t timestamp; 1963 uint64_t completion_delay; 1964 uint32_t missed_tx = 0; 1965 unsigned int i; 1966 int rc = 0; 1967 1968 for (i = 0; i < tx_ring->ring_size; ++i) { 1969 tx_buf = &tx_ring->tx_buffer_info[i]; 1970 timestamp = tx_buf->timestamp; 1971 1972 if (timestamp == 0) 1973 continue; 1974 1975 completion_delay = rte_get_timer_cycles() - timestamp; 1976 if (completion_delay > adapter->missing_tx_completion_to) { 1977 if (unlikely(!tx_buf->print_once)) { 1978 PMD_TX_LOG(WARNING, 1979 "Found a Tx that wasn't completed on time, qid %d, index %d. " 1980 "Missing Tx outstanding for %" PRIu64 " msecs.\n", 1981 tx_ring->id, i, completion_delay / 1982 rte_get_timer_hz() * 1000); 1983 tx_buf->print_once = true; 1984 } 1985 ++missed_tx; 1986 } 1987 } 1988 1989 if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) { 1990 PMD_DRV_LOG(ERR, 1991 "The number of lost Tx completions is above the threshold (%d > %d). " 1992 "Trigger the device reset.\n", 1993 missed_tx, 1994 tx_ring->missing_tx_completion_threshold); 1995 adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL; 1996 adapter->trigger_reset = true; 1997 rc = -EIO; 1998 } 1999 2000 tx_ring->tx_stats.missed_tx += missed_tx; 2001 2002 return rc; 2003 } 2004 2005 static void check_for_tx_completions(struct ena_adapter *adapter) 2006 { 2007 struct ena_ring *tx_ring; 2008 uint64_t tx_cleanup_delay; 2009 size_t qid; 2010 int budget; 2011 uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues; 2012 2013 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) 2014 return; 2015 2016 nb_tx_queues = adapter->edev_data->nb_tx_queues; 2017 budget = adapter->missing_tx_completion_budget; 2018 2019 qid = adapter->last_tx_comp_qid; 2020 while (budget-- > 0) { 2021 tx_ring = &adapter->tx_ring[qid]; 2022 2023 /* Tx cleanup is called only by the burst function and can be 2024 * called dynamically by the application. Also cleanup is 2025 * limited by the threshold. To avoid false detection of the 2026 * missing HW Tx completion, get the delay since last cleanup 2027 * function was called. 2028 */ 2029 tx_cleanup_delay = rte_get_timer_cycles() - 2030 tx_ring->last_cleanup_ticks; 2031 if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay) 2032 check_for_tx_completion_in_queue(adapter, tx_ring); 2033 qid = (qid + 1) % nb_tx_queues; 2034 } 2035 2036 adapter->last_tx_comp_qid = qid; 2037 } 2038 2039 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer, 2040 void *arg) 2041 { 2042 struct rte_eth_dev *dev = arg; 2043 struct ena_adapter *adapter = dev->data->dev_private; 2044 2045 if (unlikely(adapter->trigger_reset)) 2046 return; 2047 2048 check_for_missing_keep_alive(adapter); 2049 check_for_admin_com_state(adapter); 2050 check_for_tx_completions(adapter); 2051 2052 if (unlikely(adapter->trigger_reset)) { 2053 PMD_DRV_LOG(ERR, "Trigger reset is on\n"); 2054 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, 2055 NULL); 2056 } 2057 } 2058 2059 static inline void 2060 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 2061 struct ena_admin_feature_llq_desc *llq, 2062 bool use_large_llq_hdr) 2063 { 2064 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 2065 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 2066 llq_config->llq_num_decs_before_header = 2067 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 2068 2069 if (use_large_llq_hdr && 2070 (llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B)) { 2071 llq_config->llq_ring_entry_size = 2072 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 2073 llq_config->llq_ring_entry_size_value = 256; 2074 } else { 2075 llq_config->llq_ring_entry_size = 2076 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 2077 llq_config->llq_ring_entry_size_value = 128; 2078 } 2079 } 2080 2081 static int 2082 ena_set_queues_placement_policy(struct ena_adapter *adapter, 2083 struct ena_com_dev *ena_dev, 2084 struct ena_admin_feature_llq_desc *llq, 2085 struct ena_llq_configurations *llq_default_configurations) 2086 { 2087 int rc; 2088 u32 llq_feature_mask; 2089 2090 if (adapter->llq_header_policy == ENA_LLQ_POLICY_DISABLED) { 2091 PMD_DRV_LOG(WARNING, 2092 "NOTE: LLQ has been disabled as per user's request. " 2093 "This may lead to a huge performance degradation!\n"); 2094 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2095 return 0; 2096 } 2097 2098 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 2099 if (!(ena_dev->supported_features & llq_feature_mask)) { 2100 PMD_DRV_LOG(INFO, 2101 "LLQ is not supported. Fallback to host mode policy.\n"); 2102 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2103 return 0; 2104 } 2105 2106 if (adapter->dev_mem_base == NULL) { 2107 PMD_DRV_LOG(ERR, 2108 "LLQ is advertised as supported, but device doesn't expose mem bar\n"); 2109 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2110 return 0; 2111 } 2112 2113 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 2114 if (unlikely(rc)) { 2115 PMD_INIT_LOG(WARNING, 2116 "Failed to config dev mode. Fallback to host mode policy.\n"); 2117 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2118 return 0; 2119 } 2120 2121 /* Nothing to config, exit */ 2122 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 2123 return 0; 2124 2125 ena_dev->mem_bar = adapter->dev_mem_base; 2126 2127 return 0; 2128 } 2129 2130 static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev, 2131 struct ena_com_dev_get_features_ctx *get_feat_ctx) 2132 { 2133 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 2134 2135 /* Regular queues capabilities */ 2136 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 2137 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 2138 &get_feat_ctx->max_queue_ext.max_queue_ext; 2139 io_rx_num = RTE_MIN(max_queue_ext->max_rx_sq_num, 2140 max_queue_ext->max_rx_cq_num); 2141 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 2142 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 2143 } else { 2144 struct ena_admin_queue_feature_desc *max_queues = 2145 &get_feat_ctx->max_queues; 2146 io_tx_sq_num = max_queues->max_sq_num; 2147 io_tx_cq_num = max_queues->max_cq_num; 2148 io_rx_num = RTE_MIN(io_tx_sq_num, io_tx_cq_num); 2149 } 2150 2151 /* In case of LLQ use the llq number in the get feature cmd */ 2152 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 2153 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 2154 2155 max_num_io_queues = RTE_MIN(ENA_MAX_NUM_IO_QUEUES, io_rx_num); 2156 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_sq_num); 2157 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_cq_num); 2158 2159 if (unlikely(max_num_io_queues == 0)) { 2160 PMD_DRV_LOG(ERR, "Number of IO queues cannot not be 0\n"); 2161 return -EFAULT; 2162 } 2163 2164 return max_num_io_queues; 2165 } 2166 2167 static void 2168 ena_set_offloads(struct ena_offloads *offloads, 2169 struct ena_admin_feature_offload_desc *offload_desc) 2170 { 2171 if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) 2172 offloads->tx_offloads |= ENA_IPV4_TSO; 2173 2174 /* Tx IPv4 checksum offloads */ 2175 if (offload_desc->tx & 2176 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) 2177 offloads->tx_offloads |= ENA_L3_IPV4_CSUM; 2178 if (offload_desc->tx & 2179 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK) 2180 offloads->tx_offloads |= ENA_L4_IPV4_CSUM; 2181 if (offload_desc->tx & 2182 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) 2183 offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL; 2184 2185 /* Tx IPv6 checksum offloads */ 2186 if (offload_desc->tx & 2187 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK) 2188 offloads->tx_offloads |= ENA_L4_IPV6_CSUM; 2189 if (offload_desc->tx & 2190 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) 2191 offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL; 2192 2193 /* Rx IPv4 checksum offloads */ 2194 if (offload_desc->rx_supported & 2195 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK) 2196 offloads->rx_offloads |= ENA_L3_IPV4_CSUM; 2197 if (offload_desc->rx_supported & 2198 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) 2199 offloads->rx_offloads |= ENA_L4_IPV4_CSUM; 2200 2201 /* Rx IPv6 checksum offloads */ 2202 if (offload_desc->rx_supported & 2203 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) 2204 offloads->rx_offloads |= ENA_L4_IPV6_CSUM; 2205 2206 if (offload_desc->rx_supported & 2207 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK) 2208 offloads->rx_offloads |= ENA_RX_RSS_HASH; 2209 } 2210 2211 static int ena_init_once(void) 2212 { 2213 static bool init_done; 2214 2215 if (init_done) 2216 return 0; 2217 2218 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 2219 /* Init timer subsystem for the ENA timer service. */ 2220 rte_timer_subsystem_init(); 2221 /* Register handler for requests from secondary processes. */ 2222 rte_mp_action_register(ENA_MP_NAME, ena_mp_primary_handle); 2223 } 2224 2225 init_done = true; 2226 return 0; 2227 } 2228 2229 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) 2230 { 2231 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 2232 struct rte_pci_device *pci_dev; 2233 struct rte_intr_handle *intr_handle; 2234 struct ena_adapter *adapter = eth_dev->data->dev_private; 2235 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2236 struct ena_com_dev_get_features_ctx get_feat_ctx; 2237 struct ena_llq_configurations llq_config; 2238 const char *queue_type_str; 2239 uint32_t max_num_io_queues; 2240 int rc; 2241 static int adapters_found; 2242 bool disable_meta_caching; 2243 2244 eth_dev->dev_ops = &ena_dev_ops; 2245 eth_dev->rx_pkt_burst = ð_ena_recv_pkts; 2246 eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; 2247 eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; 2248 2249 rc = ena_init_once(); 2250 if (rc != 0) 2251 return rc; 2252 2253 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2254 return 0; 2255 2256 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 2257 2258 memset(adapter, 0, sizeof(struct ena_adapter)); 2259 ena_dev = &adapter->ena_dev; 2260 2261 adapter->edev_data = eth_dev->data; 2262 2263 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 2264 2265 PMD_INIT_LOG(INFO, "Initializing " PCI_PRI_FMT "\n", 2266 pci_dev->addr.domain, 2267 pci_dev->addr.bus, 2268 pci_dev->addr.devid, 2269 pci_dev->addr.function); 2270 2271 intr_handle = pci_dev->intr_handle; 2272 2273 adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr; 2274 adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr; 2275 2276 if (!adapter->regs) { 2277 PMD_INIT_LOG(CRIT, "Failed to access registers BAR(%d)\n", 2278 ENA_REGS_BAR); 2279 return -ENXIO; 2280 } 2281 2282 ena_dev->reg_bar = adapter->regs; 2283 /* Pass device data as a pointer which can be passed to the IO functions 2284 * by the ena_com (for example - the memory allocation). 2285 */ 2286 ena_dev->dmadev = eth_dev->data; 2287 2288 adapter->id_number = adapters_found; 2289 2290 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", 2291 adapter->id_number); 2292 2293 /* Assign default devargs values */ 2294 adapter->missing_tx_completion_to = ENA_TX_TIMEOUT; 2295 adapter->enable_llq = true; 2296 adapter->use_large_llq_hdr = false; 2297 adapter->use_normal_llq_hdr = false; 2298 2299 /* Get user bypass */ 2300 rc = ena_parse_devargs(adapter, pci_dev->device.devargs); 2301 if (rc != 0) { 2302 PMD_INIT_LOG(CRIT, "Failed to parse devargs\n"); 2303 goto err; 2304 } 2305 adapter->llq_header_policy = ena_define_llq_hdr_policy(adapter); 2306 2307 rc = ena_com_allocate_customer_metrics_buffer(ena_dev); 2308 if (rc != 0) { 2309 PMD_INIT_LOG(CRIT, "Failed to allocate customer metrics buffer\n"); 2310 goto err; 2311 } 2312 2313 /* device specific initialization routine */ 2314 rc = ena_device_init(adapter, pci_dev, &get_feat_ctx); 2315 if (rc) { 2316 PMD_INIT_LOG(CRIT, "Failed to init ENA device\n"); 2317 goto err_metrics_delete; 2318 } 2319 2320 /* Check if device supports LSC */ 2321 if (!(adapter->all_aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) 2322 adapter->edev_data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; 2323 2324 bool use_large_llq_hdr = ena_use_large_llq_hdr(adapter, 2325 get_feat_ctx.llq.entry_size_recommended); 2326 set_default_llq_configurations(&llq_config, &get_feat_ctx.llq, use_large_llq_hdr); 2327 rc = ena_set_queues_placement_policy(adapter, ena_dev, 2328 &get_feat_ctx.llq, &llq_config); 2329 if (unlikely(rc)) { 2330 PMD_INIT_LOG(CRIT, "Failed to set placement policy\n"); 2331 return rc; 2332 } 2333 2334 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { 2335 queue_type_str = "Regular"; 2336 } else { 2337 queue_type_str = "Low latency"; 2338 PMD_DRV_LOG(INFO, "LLQ entry size %uB\n", llq_config.llq_ring_entry_size_value); 2339 } 2340 PMD_DRV_LOG(INFO, "Placement policy: %s\n", queue_type_str); 2341 2342 calc_queue_ctx.ena_dev = ena_dev; 2343 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 2344 2345 max_num_io_queues = ena_calc_max_io_queue_num(ena_dev, &get_feat_ctx); 2346 rc = ena_calc_io_queue_size(&calc_queue_ctx, use_large_llq_hdr); 2347 if (unlikely((rc != 0) || (max_num_io_queues == 0))) { 2348 rc = -EFAULT; 2349 goto err_device_destroy; 2350 } 2351 2352 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 2353 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 2354 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 2355 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 2356 adapter->max_num_io_queues = max_num_io_queues; 2357 2358 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2359 disable_meta_caching = 2360 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags & 2361 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 2362 } else { 2363 disable_meta_caching = false; 2364 } 2365 2366 /* prepare ring structures */ 2367 ena_init_rings(adapter, disable_meta_caching); 2368 2369 ena_config_debug_area(adapter); 2370 2371 /* Set max MTU for this device */ 2372 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 2373 2374 ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload); 2375 2376 /* Copy MAC address and point DPDK to it */ 2377 eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr; 2378 rte_ether_addr_copy((struct rte_ether_addr *) 2379 get_feat_ctx.dev_attr.mac_addr, 2380 (struct rte_ether_addr *)adapter->mac_addr); 2381 2382 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); 2383 if (unlikely(rc != 0)) { 2384 PMD_DRV_LOG(ERR, "Failed to initialize RSS in ENA device\n"); 2385 goto err_delete_debug_area; 2386 } 2387 2388 adapter->drv_stats = rte_zmalloc("adapter stats", 2389 sizeof(*adapter->drv_stats), 2390 RTE_CACHE_LINE_SIZE); 2391 if (!adapter->drv_stats) { 2392 PMD_DRV_LOG(ERR, 2393 "Failed to allocate memory for adapter statistics\n"); 2394 rc = -ENOMEM; 2395 goto err_rss_destroy; 2396 } 2397 2398 rte_spinlock_init(&adapter->admin_lock); 2399 2400 if (!adapter->control_path_poll_interval) { 2401 /* Control path interrupt mode */ 2402 rte_intr_callback_register(intr_handle, ena_control_path_handler, eth_dev); 2403 rte_intr_enable(intr_handle); 2404 ena_com_set_admin_polling_mode(ena_dev, false); 2405 } else { 2406 /* Control path polling mode */ 2407 rc = rte_eal_alarm_set(adapter->control_path_poll_interval, 2408 ena_control_path_poll_handler, eth_dev); 2409 if (unlikely(rc != 0)) { 2410 PMD_DRV_LOG(ERR, "Failed to set control path alarm\n"); 2411 goto err_control_path_destroy; 2412 } 2413 } 2414 ena_com_admin_aenq_enable(ena_dev); 2415 rte_timer_init(&adapter->timer_wd); 2416 2417 adapters_found++; 2418 adapter->state = ENA_ADAPTER_STATE_INIT; 2419 2420 return 0; 2421 err_control_path_destroy: 2422 rte_free(adapter->drv_stats); 2423 err_rss_destroy: 2424 ena_com_rss_destroy(ena_dev); 2425 err_delete_debug_area: 2426 ena_com_delete_debug_area(ena_dev); 2427 2428 err_device_destroy: 2429 ena_com_delete_host_info(ena_dev); 2430 ena_com_admin_destroy(ena_dev); 2431 err_metrics_delete: 2432 ena_com_delete_customer_metrics_buffer(ena_dev); 2433 err: 2434 return rc; 2435 } 2436 2437 static void ena_destroy_device(struct rte_eth_dev *eth_dev) 2438 { 2439 struct ena_adapter *adapter = eth_dev->data->dev_private; 2440 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2441 2442 if (adapter->state == ENA_ADAPTER_STATE_FREE) 2443 return; 2444 2445 ena_com_set_admin_running_state(ena_dev, false); 2446 2447 if (adapter->state != ENA_ADAPTER_STATE_CLOSED) 2448 ena_close(eth_dev); 2449 2450 ena_com_rss_destroy(ena_dev); 2451 2452 ena_com_delete_debug_area(ena_dev); 2453 ena_com_delete_host_info(ena_dev); 2454 2455 ena_com_abort_admin_commands(ena_dev); 2456 ena_com_wait_for_abort_completion(ena_dev); 2457 ena_com_admin_destroy(ena_dev); 2458 ena_com_mmio_reg_read_request_destroy(ena_dev); 2459 ena_com_delete_customer_metrics_buffer(ena_dev); 2460 2461 adapter->state = ENA_ADAPTER_STATE_FREE; 2462 } 2463 2464 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) 2465 { 2466 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2467 return 0; 2468 2469 ena_destroy_device(eth_dev); 2470 2471 return 0; 2472 } 2473 2474 static int ena_dev_configure(struct rte_eth_dev *dev) 2475 { 2476 struct ena_adapter *adapter = dev->data->dev_private; 2477 int rc; 2478 2479 adapter->state = ENA_ADAPTER_STATE_CONFIG; 2480 2481 if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 2482 dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2483 dev->data->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2484 2485 /* Scattered Rx cannot be turned off in the HW, so this capability must 2486 * be forced. 2487 */ 2488 dev->data->scattered_rx = 1; 2489 2490 adapter->last_tx_comp_qid = 0; 2491 2492 adapter->missing_tx_completion_budget = 2493 RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues); 2494 2495 /* To avoid detection of the spurious Tx completion timeout due to 2496 * application not calling the Tx cleanup function, set timeout for the 2497 * Tx queue which should be half of the missing completion timeout for a 2498 * safety. If there will be a lot of missing Tx completions in the 2499 * queue, they will be detected sooner or later. 2500 */ 2501 adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2; 2502 2503 rc = ena_configure_aenq(adapter); 2504 2505 return rc; 2506 } 2507 2508 static void ena_init_rings(struct ena_adapter *adapter, 2509 bool disable_meta_caching) 2510 { 2511 size_t i; 2512 2513 for (i = 0; i < adapter->max_num_io_queues; i++) { 2514 struct ena_ring *ring = &adapter->tx_ring[i]; 2515 2516 ring->configured = 0; 2517 ring->type = ENA_RING_TYPE_TX; 2518 ring->adapter = adapter; 2519 ring->id = i; 2520 ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type; 2521 ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size; 2522 ring->sgl_size = adapter->max_tx_sgl_size; 2523 ring->disable_meta_caching = disable_meta_caching; 2524 } 2525 2526 for (i = 0; i < adapter->max_num_io_queues; i++) { 2527 struct ena_ring *ring = &adapter->rx_ring[i]; 2528 2529 ring->configured = 0; 2530 ring->type = ENA_RING_TYPE_RX; 2531 ring->adapter = adapter; 2532 ring->id = i; 2533 ring->sgl_size = adapter->max_rx_sgl_size; 2534 } 2535 } 2536 2537 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter) 2538 { 2539 uint64_t port_offloads = 0; 2540 2541 if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM) 2542 port_offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; 2543 2544 if (adapter->offloads.rx_offloads & 2545 (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM)) 2546 port_offloads |= 2547 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM; 2548 2549 if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH) 2550 port_offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2551 2552 port_offloads |= RTE_ETH_RX_OFFLOAD_SCATTER; 2553 2554 return port_offloads; 2555 } 2556 2557 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter) 2558 { 2559 uint64_t port_offloads = 0; 2560 2561 if (adapter->offloads.tx_offloads & ENA_IPV4_TSO) 2562 port_offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 2563 2564 if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM) 2565 port_offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; 2566 if (adapter->offloads.tx_offloads & 2567 (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM | 2568 ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL)) 2569 port_offloads |= 2570 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM; 2571 2572 port_offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2573 2574 port_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2575 2576 return port_offloads; 2577 } 2578 2579 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter) 2580 { 2581 RTE_SET_USED(adapter); 2582 2583 return 0; 2584 } 2585 2586 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter) 2587 { 2588 uint64_t queue_offloads = 0; 2589 RTE_SET_USED(adapter); 2590 2591 queue_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2592 2593 return queue_offloads; 2594 } 2595 2596 static int ena_infos_get(struct rte_eth_dev *dev, 2597 struct rte_eth_dev_info *dev_info) 2598 { 2599 struct ena_adapter *adapter; 2600 struct ena_com_dev *ena_dev; 2601 2602 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 2603 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 2604 adapter = dev->data->dev_private; 2605 2606 ena_dev = &adapter->ena_dev; 2607 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 2608 2609 dev_info->speed_capa = 2610 RTE_ETH_LINK_SPEED_1G | 2611 RTE_ETH_LINK_SPEED_2_5G | 2612 RTE_ETH_LINK_SPEED_5G | 2613 RTE_ETH_LINK_SPEED_10G | 2614 RTE_ETH_LINK_SPEED_25G | 2615 RTE_ETH_LINK_SPEED_40G | 2616 RTE_ETH_LINK_SPEED_50G | 2617 RTE_ETH_LINK_SPEED_100G | 2618 RTE_ETH_LINK_SPEED_200G | 2619 RTE_ETH_LINK_SPEED_400G; 2620 2621 /* Inform framework about available features */ 2622 dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter); 2623 dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter); 2624 dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter); 2625 dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter); 2626 2627 dev_info->flow_type_rss_offloads = ENA_ALL_RSS_HF; 2628 dev_info->hash_key_size = ENA_HASH_KEY_SIZE; 2629 2630 dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN; 2631 dev_info->max_rx_pktlen = adapter->max_mtu + RTE_ETHER_HDR_LEN + 2632 RTE_ETHER_CRC_LEN; 2633 dev_info->min_mtu = ENA_MIN_MTU; 2634 dev_info->max_mtu = adapter->max_mtu; 2635 dev_info->max_mac_addrs = 1; 2636 2637 dev_info->max_rx_queues = adapter->max_num_io_queues; 2638 dev_info->max_tx_queues = adapter->max_num_io_queues; 2639 dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE; 2640 2641 dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size; 2642 dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2643 dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2644 adapter->max_rx_sgl_size); 2645 dev_info->rx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2646 adapter->max_rx_sgl_size); 2647 2648 dev_info->tx_desc_lim.nb_max = adapter->max_tx_ring_size; 2649 dev_info->tx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2650 dev_info->tx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2651 adapter->max_tx_sgl_size); 2652 dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2653 adapter->max_tx_sgl_size); 2654 2655 dev_info->default_rxportconf.ring_size = RTE_MIN(ENA_DEFAULT_RING_SIZE, 2656 dev_info->rx_desc_lim.nb_max); 2657 dev_info->default_txportconf.ring_size = RTE_MIN(ENA_DEFAULT_RING_SIZE, 2658 dev_info->tx_desc_lim.nb_max); 2659 2660 dev_info->err_handle_mode = RTE_ETH_ERROR_HANDLE_MODE_PASSIVE; 2661 2662 return 0; 2663 } 2664 2665 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len) 2666 { 2667 mbuf->data_len = len; 2668 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 2669 mbuf->refcnt = 1; 2670 mbuf->next = NULL; 2671 } 2672 2673 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 2674 struct ena_com_rx_buf_info *ena_bufs, 2675 uint32_t descs, 2676 uint16_t *next_to_clean, 2677 uint8_t offset) 2678 { 2679 struct rte_mbuf *mbuf; 2680 struct rte_mbuf *mbuf_head; 2681 struct ena_rx_buffer *rx_info; 2682 int rc; 2683 uint16_t ntc, len, req_id, buf = 0; 2684 2685 if (unlikely(descs == 0)) 2686 return NULL; 2687 2688 ntc = *next_to_clean; 2689 2690 len = ena_bufs[buf].len; 2691 req_id = ena_bufs[buf].req_id; 2692 2693 rx_info = &rx_ring->rx_buffer_info[req_id]; 2694 2695 mbuf = rx_info->mbuf; 2696 RTE_ASSERT(mbuf != NULL); 2697 2698 ena_init_rx_mbuf(mbuf, len); 2699 2700 /* Fill the mbuf head with the data specific for 1st segment. */ 2701 mbuf_head = mbuf; 2702 mbuf_head->nb_segs = descs; 2703 mbuf_head->port = rx_ring->port_id; 2704 mbuf_head->pkt_len = len; 2705 mbuf_head->data_off += offset; 2706 2707 rx_info->mbuf = NULL; 2708 rx_ring->empty_rx_reqs[ntc] = req_id; 2709 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2710 2711 while (--descs) { 2712 ++buf; 2713 len = ena_bufs[buf].len; 2714 req_id = ena_bufs[buf].req_id; 2715 2716 rx_info = &rx_ring->rx_buffer_info[req_id]; 2717 RTE_ASSERT(rx_info->mbuf != NULL); 2718 2719 if (unlikely(len == 0)) { 2720 /* 2721 * Some devices can pass descriptor with the length 0. 2722 * To avoid confusion, the PMD is simply putting the 2723 * descriptor back, as it was never used. We'll avoid 2724 * mbuf allocation that way. 2725 */ 2726 rc = ena_add_single_rx_desc(rx_ring->ena_com_io_sq, 2727 rx_info->mbuf, req_id); 2728 if (unlikely(rc != 0)) { 2729 /* Free the mbuf in case of an error. */ 2730 rte_mbuf_raw_free(rx_info->mbuf); 2731 } else { 2732 /* 2733 * If there was no error, just exit the loop as 2734 * 0 length descriptor is always the last one. 2735 */ 2736 break; 2737 } 2738 } else { 2739 /* Create an mbuf chain. */ 2740 mbuf->next = rx_info->mbuf; 2741 mbuf = mbuf->next; 2742 2743 ena_init_rx_mbuf(mbuf, len); 2744 mbuf_head->pkt_len += len; 2745 } 2746 2747 /* 2748 * Mark the descriptor as depleted and perform necessary 2749 * cleanup. 2750 * This code will execute in two cases: 2751 * 1. Descriptor len was greater than 0 - normal situation. 2752 * 2. Descriptor len was 0 and we failed to add the descriptor 2753 * to the device. In that situation, we should try to add 2754 * the mbuf again in the populate routine and mark the 2755 * descriptor as used up by the device. 2756 */ 2757 rx_info->mbuf = NULL; 2758 rx_ring->empty_rx_reqs[ntc] = req_id; 2759 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2760 } 2761 2762 *next_to_clean = ntc; 2763 2764 return mbuf_head; 2765 } 2766 2767 static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 2768 uint16_t nb_pkts) 2769 { 2770 struct ena_ring *rx_ring = (struct ena_ring *)(rx_queue); 2771 unsigned int free_queue_entries; 2772 uint16_t next_to_clean = rx_ring->next_to_clean; 2773 uint16_t descs_in_use; 2774 struct rte_mbuf *mbuf; 2775 uint16_t completed; 2776 struct ena_com_rx_ctx ena_rx_ctx; 2777 int i, rc = 0; 2778 bool fill_hash; 2779 2780 #ifdef RTE_ETHDEV_DEBUG_RX 2781 /* Check adapter state */ 2782 if (unlikely(rx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 2783 PMD_RX_LOG(ALERT, 2784 "Trying to receive pkts while device is NOT running\n"); 2785 return 0; 2786 } 2787 #endif 2788 2789 fill_hash = rx_ring->offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH; 2790 2791 descs_in_use = rx_ring->ring_size - 2792 ena_com_free_q_entries(rx_ring->ena_com_io_sq) - 1; 2793 nb_pkts = RTE_MIN(descs_in_use, nb_pkts); 2794 2795 for (completed = 0; completed < nb_pkts; completed++) { 2796 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 2797 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 2798 ena_rx_ctx.descs = 0; 2799 ena_rx_ctx.pkt_offset = 0; 2800 /* receive packet context */ 2801 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, 2802 rx_ring->ena_com_io_sq, 2803 &ena_rx_ctx); 2804 if (unlikely(rc)) { 2805 PMD_RX_LOG(ERR, 2806 "Failed to get the packet from the device, rc: %d\n", 2807 rc); 2808 if (rc == ENA_COM_NO_SPACE) { 2809 ++rx_ring->rx_stats.bad_desc_num; 2810 ena_trigger_reset(rx_ring->adapter, 2811 ENA_REGS_RESET_TOO_MANY_RX_DESCS); 2812 } else { 2813 ++rx_ring->rx_stats.bad_req_id; 2814 ena_trigger_reset(rx_ring->adapter, 2815 ENA_REGS_RESET_INV_RX_REQ_ID); 2816 } 2817 return 0; 2818 } 2819 2820 mbuf = ena_rx_mbuf(rx_ring, 2821 ena_rx_ctx.ena_bufs, 2822 ena_rx_ctx.descs, 2823 &next_to_clean, 2824 ena_rx_ctx.pkt_offset); 2825 if (unlikely(mbuf == NULL)) { 2826 for (i = 0; i < ena_rx_ctx.descs; ++i) { 2827 rx_ring->empty_rx_reqs[next_to_clean] = 2828 rx_ring->ena_bufs[i].req_id; 2829 next_to_clean = ENA_IDX_NEXT_MASKED( 2830 next_to_clean, rx_ring->size_mask); 2831 } 2832 break; 2833 } 2834 2835 /* fill mbuf attributes if any */ 2836 ena_rx_mbuf_prepare(rx_ring, mbuf, &ena_rx_ctx, fill_hash); 2837 2838 if (unlikely(mbuf->ol_flags & 2839 (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD))) 2840 rte_atomic64_inc(&rx_ring->adapter->drv_stats->ierrors); 2841 2842 rx_pkts[completed] = mbuf; 2843 rx_ring->rx_stats.bytes += mbuf->pkt_len; 2844 } 2845 2846 rx_ring->rx_stats.cnt += completed; 2847 rx_ring->next_to_clean = next_to_clean; 2848 2849 free_queue_entries = ena_com_free_q_entries(rx_ring->ena_com_io_sq); 2850 2851 /* Burst refill to save doorbells, memory barriers, const interval */ 2852 if (free_queue_entries >= rx_ring->rx_free_thresh) { 2853 ena_populate_rx_queue(rx_ring, free_queue_entries); 2854 } 2855 2856 return completed; 2857 } 2858 2859 static uint16_t 2860 eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 2861 uint16_t nb_pkts) 2862 { 2863 int32_t ret; 2864 uint32_t i; 2865 struct rte_mbuf *m; 2866 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 2867 struct ena_adapter *adapter = tx_ring->adapter; 2868 struct rte_ipv4_hdr *ip_hdr; 2869 uint64_t ol_flags; 2870 uint64_t l4_csum_flag; 2871 uint64_t dev_offload_capa; 2872 uint16_t frag_field; 2873 bool need_pseudo_csum; 2874 2875 dev_offload_capa = adapter->offloads.tx_offloads; 2876 for (i = 0; i != nb_pkts; i++) { 2877 m = tx_pkts[i]; 2878 ol_flags = m->ol_flags; 2879 2880 /* Check if any offload flag was set */ 2881 if (ol_flags == 0) 2882 continue; 2883 2884 l4_csum_flag = ol_flags & RTE_MBUF_F_TX_L4_MASK; 2885 /* SCTP checksum offload is not supported by the ENA. */ 2886 if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) || 2887 l4_csum_flag == RTE_MBUF_F_TX_SCTP_CKSUM) { 2888 PMD_TX_LOG(DEBUG, 2889 "mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64 "\n", 2890 i, ol_flags); 2891 rte_errno = ENOTSUP; 2892 return i; 2893 } 2894 2895 if (unlikely(m->nb_segs >= tx_ring->sgl_size && 2896 !(tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && 2897 m->nb_segs == tx_ring->sgl_size && 2898 m->data_len < tx_ring->tx_max_header_size))) { 2899 PMD_TX_LOG(DEBUG, 2900 "mbuf[%" PRIu32 "] has too many segments: %" PRIu16 "\n", 2901 i, m->nb_segs); 2902 rte_errno = EINVAL; 2903 return i; 2904 } 2905 2906 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 2907 /* Check if requested offload is also enabled for the queue */ 2908 if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2909 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) || 2910 (l4_csum_flag == RTE_MBUF_F_TX_TCP_CKSUM && 2911 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) || 2912 (l4_csum_flag == RTE_MBUF_F_TX_UDP_CKSUM && 2913 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM))) { 2914 PMD_TX_LOG(DEBUG, 2915 "mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]\n", 2916 i, m->nb_segs, tx_ring->id); 2917 rte_errno = EINVAL; 2918 return i; 2919 } 2920 2921 /* The caller is obligated to set l2 and l3 len if any cksum 2922 * offload is enabled. 2923 */ 2924 if (unlikely(ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK) && 2925 (m->l2_len == 0 || m->l3_len == 0))) { 2926 PMD_TX_LOG(DEBUG, 2927 "mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested\n", 2928 i); 2929 rte_errno = EINVAL; 2930 return i; 2931 } 2932 ret = rte_validate_tx_offload(m); 2933 if (ret != 0) { 2934 rte_errno = -ret; 2935 return i; 2936 } 2937 #endif 2938 2939 /* Verify HW support for requested offloads and determine if 2940 * pseudo header checksum is needed. 2941 */ 2942 need_pseudo_csum = false; 2943 if (ol_flags & RTE_MBUF_F_TX_IPV4) { 2944 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2945 !(dev_offload_capa & ENA_L3_IPV4_CSUM)) { 2946 rte_errno = ENOTSUP; 2947 return i; 2948 } 2949 2950 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG && 2951 !(dev_offload_capa & ENA_IPV4_TSO)) { 2952 rte_errno = ENOTSUP; 2953 return i; 2954 } 2955 2956 /* Check HW capabilities and if pseudo csum is needed 2957 * for L4 offloads. 2958 */ 2959 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2960 !(dev_offload_capa & ENA_L4_IPV4_CSUM)) { 2961 if (dev_offload_capa & 2962 ENA_L4_IPV4_CSUM_PARTIAL) { 2963 need_pseudo_csum = true; 2964 } else { 2965 rte_errno = ENOTSUP; 2966 return i; 2967 } 2968 } 2969 2970 /* Parse the DF flag */ 2971 ip_hdr = rte_pktmbuf_mtod_offset(m, 2972 struct rte_ipv4_hdr *, m->l2_len); 2973 frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset); 2974 if (frag_field & RTE_IPV4_HDR_DF_FLAG) { 2975 m->packet_type |= RTE_PTYPE_L4_NONFRAG; 2976 } else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2977 /* In case we are supposed to TSO and have DF 2978 * not set (DF=0) hardware must be provided with 2979 * partial checksum. 2980 */ 2981 need_pseudo_csum = true; 2982 } 2983 } else if (ol_flags & RTE_MBUF_F_TX_IPV6) { 2984 /* There is no support for IPv6 TSO as for now. */ 2985 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2986 rte_errno = ENOTSUP; 2987 return i; 2988 } 2989 2990 /* Check HW capabilities and if pseudo csum is needed */ 2991 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2992 !(dev_offload_capa & ENA_L4_IPV6_CSUM)) { 2993 if (dev_offload_capa & 2994 ENA_L4_IPV6_CSUM_PARTIAL) { 2995 need_pseudo_csum = true; 2996 } else { 2997 rte_errno = ENOTSUP; 2998 return i; 2999 } 3000 } 3001 } 3002 3003 if (need_pseudo_csum) { 3004 ret = rte_net_intel_cksum_flags_prepare(m, ol_flags); 3005 if (ret != 0) { 3006 rte_errno = -ret; 3007 return i; 3008 } 3009 } 3010 } 3011 3012 return i; 3013 } 3014 3015 static void ena_update_hints(struct ena_adapter *adapter, 3016 struct ena_admin_ena_hw_hints *hints) 3017 { 3018 if (hints->admin_completion_tx_timeout) 3019 adapter->ena_dev.admin_queue.completion_timeout = 3020 hints->admin_completion_tx_timeout * 1000; 3021 3022 if (hints->mmio_read_timeout) 3023 /* convert to usec */ 3024 adapter->ena_dev.mmio_read.reg_read_to = 3025 hints->mmio_read_timeout * 1000; 3026 3027 if (hints->driver_watchdog_timeout) { 3028 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3029 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 3030 else 3031 // Convert msecs to ticks 3032 adapter->keep_alive_timeout = 3033 (hints->driver_watchdog_timeout * 3034 rte_get_timer_hz()) / 1000; 3035 } 3036 } 3037 3038 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 3039 struct ena_tx_buffer *tx_info, 3040 struct rte_mbuf *mbuf, 3041 void **push_header, 3042 uint16_t *header_len) 3043 { 3044 struct ena_com_buf *ena_buf; 3045 uint16_t delta, seg_len, push_len; 3046 3047 delta = 0; 3048 seg_len = mbuf->data_len; 3049 3050 tx_info->mbuf = mbuf; 3051 ena_buf = tx_info->bufs; 3052 3053 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 3054 /* 3055 * Tx header might be (and will be in most cases) smaller than 3056 * tx_max_header_size. But it's not an issue to send more data 3057 * to the device, than actually needed if the mbuf size is 3058 * greater than tx_max_header_size. 3059 */ 3060 push_len = RTE_MIN(mbuf->pkt_len, tx_ring->tx_max_header_size); 3061 *header_len = push_len; 3062 3063 if (likely(push_len <= seg_len)) { 3064 /* If the push header is in the single segment, then 3065 * just point it to the 1st mbuf data. 3066 */ 3067 *push_header = rte_pktmbuf_mtod(mbuf, uint8_t *); 3068 } else { 3069 /* If the push header lays in the several segments, copy 3070 * it to the intermediate buffer. 3071 */ 3072 rte_pktmbuf_read(mbuf, 0, push_len, 3073 tx_ring->push_buf_intermediate_buf); 3074 *push_header = tx_ring->push_buf_intermediate_buf; 3075 delta = push_len - seg_len; 3076 } 3077 } else { 3078 *push_header = NULL; 3079 *header_len = 0; 3080 push_len = 0; 3081 } 3082 3083 /* Process first segment taking into consideration pushed header */ 3084 if (seg_len > push_len) { 3085 ena_buf->paddr = mbuf->buf_iova + 3086 mbuf->data_off + 3087 push_len; 3088 ena_buf->len = seg_len - push_len; 3089 ena_buf++; 3090 tx_info->num_of_bufs++; 3091 } 3092 3093 while ((mbuf = mbuf->next) != NULL) { 3094 seg_len = mbuf->data_len; 3095 3096 /* Skip mbufs if whole data is pushed as a header */ 3097 if (unlikely(delta > seg_len)) { 3098 delta -= seg_len; 3099 continue; 3100 } 3101 3102 ena_buf->paddr = mbuf->buf_iova + mbuf->data_off + delta; 3103 ena_buf->len = seg_len - delta; 3104 ena_buf++; 3105 tx_info->num_of_bufs++; 3106 3107 delta = 0; 3108 } 3109 } 3110 3111 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) 3112 { 3113 struct ena_tx_buffer *tx_info; 3114 struct ena_com_tx_ctx ena_tx_ctx = { { 0 } }; 3115 uint16_t next_to_use; 3116 uint16_t header_len; 3117 uint16_t req_id; 3118 void *push_header; 3119 int nb_hw_desc; 3120 int rc; 3121 3122 /* Checking for space for 2 additional metadata descriptors due to 3123 * possible header split and metadata descriptor 3124 */ 3125 if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 3126 mbuf->nb_segs + 2)) { 3127 PMD_TX_LOG(DEBUG, "Not enough space in the tx queue\n"); 3128 return ENA_COM_NO_MEM; 3129 } 3130 3131 next_to_use = tx_ring->next_to_use; 3132 3133 req_id = tx_ring->empty_tx_reqs[next_to_use]; 3134 tx_info = &tx_ring->tx_buffer_info[req_id]; 3135 tx_info->num_of_bufs = 0; 3136 RTE_ASSERT(tx_info->mbuf == NULL); 3137 3138 ena_tx_map_mbuf(tx_ring, tx_info, mbuf, &push_header, &header_len); 3139 3140 ena_tx_ctx.ena_bufs = tx_info->bufs; 3141 ena_tx_ctx.push_header = push_header; 3142 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 3143 ena_tx_ctx.req_id = req_id; 3144 ena_tx_ctx.header_len = header_len; 3145 3146 /* Set Tx offloads flags, if applicable */ 3147 ena_tx_mbuf_prepare(mbuf, &ena_tx_ctx, tx_ring->offloads, 3148 tx_ring->disable_meta_caching); 3149 3150 if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, 3151 &ena_tx_ctx))) { 3152 PMD_TX_LOG(DEBUG, 3153 "LLQ Tx max burst size of queue %d achieved, writing doorbell to send burst\n", 3154 tx_ring->id); 3155 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3156 tx_ring->tx_stats.doorbells++; 3157 tx_ring->pkts_without_db = false; 3158 } 3159 3160 /* prepare the packet's descriptors to dma engine */ 3161 rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, 3162 &nb_hw_desc); 3163 if (unlikely(rc)) { 3164 PMD_DRV_LOG(ERR, "Failed to prepare Tx buffers, rc: %d\n", rc); 3165 ++tx_ring->tx_stats.prepare_ctx_err; 3166 ena_trigger_reset(tx_ring->adapter, 3167 ENA_REGS_RESET_DRIVER_INVALID_STATE); 3168 return rc; 3169 } 3170 3171 tx_info->tx_descs = nb_hw_desc; 3172 tx_info->timestamp = rte_get_timer_cycles(); 3173 3174 tx_ring->tx_stats.cnt++; 3175 tx_ring->tx_stats.bytes += mbuf->pkt_len; 3176 3177 tx_ring->next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, 3178 tx_ring->size_mask); 3179 3180 return 0; 3181 } 3182 3183 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt) 3184 { 3185 struct rte_mbuf *pkts_to_clean[ENA_CLEANUP_BUF_THRESH]; 3186 struct ena_ring *tx_ring = (struct ena_ring *)txp; 3187 size_t mbuf_cnt = 0; 3188 size_t pkt_cnt = 0; 3189 unsigned int total_tx_descs = 0; 3190 unsigned int total_tx_pkts = 0; 3191 uint16_t cleanup_budget; 3192 uint16_t next_to_clean = tx_ring->next_to_clean; 3193 bool fast_free = tx_ring->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 3194 3195 /* 3196 * If free_pkt_cnt is equal to 0, it means that the user requested 3197 * full cleanup, so attempt to release all Tx descriptors 3198 * (ring_size - 1 -> size_mask) 3199 */ 3200 cleanup_budget = (free_pkt_cnt == 0) ? tx_ring->size_mask : free_pkt_cnt; 3201 3202 while (likely(total_tx_pkts < cleanup_budget)) { 3203 struct rte_mbuf *mbuf; 3204 struct ena_tx_buffer *tx_info; 3205 uint16_t req_id; 3206 3207 if (ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id) != 0) 3208 break; 3209 3210 if (unlikely(validate_tx_req_id(tx_ring, req_id) != 0)) 3211 break; 3212 3213 /* Get Tx info & store how many descs were processed */ 3214 tx_info = &tx_ring->tx_buffer_info[req_id]; 3215 tx_info->timestamp = 0; 3216 3217 mbuf = tx_info->mbuf; 3218 if (fast_free) { 3219 pkts_to_clean[pkt_cnt++] = mbuf; 3220 mbuf_cnt += mbuf->nb_segs; 3221 if (mbuf_cnt >= ENA_CLEANUP_BUF_THRESH) { 3222 rte_pktmbuf_free_bulk(pkts_to_clean, pkt_cnt); 3223 mbuf_cnt = 0; 3224 pkt_cnt = 0; 3225 } 3226 } else { 3227 rte_pktmbuf_free(mbuf); 3228 } 3229 3230 tx_info->mbuf = NULL; 3231 tx_ring->empty_tx_reqs[next_to_clean] = req_id; 3232 3233 total_tx_descs += tx_info->tx_descs; 3234 total_tx_pkts++; 3235 3236 /* Put back descriptor to the ring for reuse */ 3237 next_to_clean = ENA_IDX_NEXT_MASKED(next_to_clean, 3238 tx_ring->size_mask); 3239 } 3240 3241 if (likely(total_tx_descs > 0)) { 3242 /* acknowledge completion of sent packets */ 3243 tx_ring->next_to_clean = next_to_clean; 3244 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs); 3245 } 3246 3247 if (mbuf_cnt != 0) 3248 rte_pktmbuf_free_bulk(pkts_to_clean, pkt_cnt); 3249 3250 /* Notify completion handler that full cleanup was performed */ 3251 if (free_pkt_cnt == 0 || total_tx_pkts < cleanup_budget) 3252 tx_ring->last_cleanup_ticks = rte_get_timer_cycles(); 3253 3254 return total_tx_pkts; 3255 } 3256 3257 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 3258 uint16_t nb_pkts) 3259 { 3260 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 3261 int available_desc; 3262 uint16_t sent_idx = 0; 3263 3264 #ifdef RTE_ETHDEV_DEBUG_TX 3265 /* Check adapter state */ 3266 if (unlikely(tx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 3267 PMD_TX_LOG(ALERT, 3268 "Trying to xmit pkts while device is NOT running\n"); 3269 return 0; 3270 } 3271 #endif 3272 3273 available_desc = ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3274 if (available_desc < tx_ring->tx_free_thresh) 3275 ena_tx_cleanup((void *)tx_ring, 0); 3276 3277 for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) { 3278 if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx])) 3279 break; 3280 tx_ring->pkts_without_db = true; 3281 rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4, 3282 tx_ring->size_mask)]); 3283 } 3284 3285 /* If there are ready packets to be xmitted... */ 3286 if (likely(tx_ring->pkts_without_db)) { 3287 /* ...let HW do its best :-) */ 3288 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3289 tx_ring->tx_stats.doorbells++; 3290 tx_ring->pkts_without_db = false; 3291 } 3292 3293 tx_ring->tx_stats.available_desc = 3294 ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3295 tx_ring->tx_stats.tx_poll++; 3296 3297 return sent_idx; 3298 } 3299 3300 static void ena_copy_customer_metrics(struct ena_adapter *adapter, uint64_t *buf, 3301 size_t num_metrics) 3302 { 3303 struct ena_com_dev *ena_dev = &adapter->ena_dev; 3304 int rc; 3305 3306 if (ena_com_get_cap(ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) { 3307 if (num_metrics != ENA_STATS_ARRAY_METRICS) { 3308 PMD_DRV_LOG(ERR, "Detected discrepancy in the number of customer metrics"); 3309 return; 3310 } 3311 rte_spinlock_lock(&adapter->admin_lock); 3312 rc = ENA_PROXY(adapter, 3313 ena_com_get_customer_metrics, 3314 &adapter->ena_dev, 3315 (char *)buf, 3316 num_metrics * sizeof(uint64_t)); 3317 rte_spinlock_unlock(&adapter->admin_lock); 3318 if (rc != 0) { 3319 PMD_DRV_LOG(WARNING, "Failed to get customer metrics, rc: %d\n", rc); 3320 return; 3321 } 3322 3323 } else if (ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) { 3324 if (num_metrics != ENA_STATS_ARRAY_METRICS_LEGACY) { 3325 PMD_DRV_LOG(ERR, "Detected discrepancy in the number of legacy metrics"); 3326 return; 3327 } 3328 3329 rte_spinlock_lock(&adapter->admin_lock); 3330 rc = ENA_PROXY(adapter, 3331 ena_com_get_eni_stats, 3332 &adapter->ena_dev, 3333 (struct ena_admin_eni_stats *)buf); 3334 rte_spinlock_unlock(&adapter->admin_lock); 3335 if (rc != 0) { 3336 PMD_DRV_LOG(WARNING, 3337 "Failed to get ENI metrics, rc: %d\n", rc); 3338 return; 3339 } 3340 } 3341 } 3342 3343 static void ena_copy_ena_srd_info(struct ena_adapter *adapter, 3344 struct ena_stats_srd *srd_info) 3345 { 3346 int rc; 3347 3348 if (!ena_com_get_cap(&adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO)) 3349 return; 3350 3351 rte_spinlock_lock(&adapter->admin_lock); 3352 rc = ENA_PROXY(adapter, 3353 ena_com_get_ena_srd_info, 3354 &adapter->ena_dev, 3355 (struct ena_admin_ena_srd_info *)srd_info); 3356 rte_spinlock_unlock(&adapter->admin_lock); 3357 if (rc != ENA_COM_OK && rc != ENA_COM_UNSUPPORTED) { 3358 PMD_DRV_LOG(WARNING, 3359 "Failed to get ENA express srd info, rc: %d\n", rc); 3360 return; 3361 } 3362 } 3363 3364 /** 3365 * DPDK callback to retrieve names of extended device statistics 3366 * 3367 * @param dev 3368 * Pointer to Ethernet device structure. 3369 * @param[out] xstats_names 3370 * Buffer to insert names into. 3371 * @param n 3372 * Number of names. 3373 * 3374 * @return 3375 * Number of xstats names. 3376 */ 3377 static int ena_xstats_get_names(struct rte_eth_dev *dev, 3378 struct rte_eth_xstat_name *xstats_names, 3379 unsigned int n) 3380 { 3381 struct ena_adapter *adapter = dev->data->dev_private; 3382 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3383 unsigned int stat, i, count = 0; 3384 3385 if (n < xstats_count || !xstats_names) 3386 return xstats_count; 3387 3388 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) 3389 strcpy(xstats_names[count].name, 3390 ena_stats_global_strings[stat].name); 3391 3392 for (stat = 0; stat < adapter->metrics_num; stat++, count++) 3393 rte_strscpy(xstats_names[count].name, 3394 ena_stats_metrics_strings[stat].name, 3395 RTE_ETH_XSTATS_NAME_SIZE); 3396 for (stat = 0; stat < ENA_STATS_ARRAY_ENA_SRD; stat++, count++) 3397 rte_strscpy(xstats_names[count].name, 3398 ena_stats_srd_strings[stat].name, 3399 RTE_ETH_XSTATS_NAME_SIZE); 3400 3401 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) 3402 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) 3403 snprintf(xstats_names[count].name, 3404 sizeof(xstats_names[count].name), 3405 "rx_q%d_%s", i, 3406 ena_stats_rx_strings[stat].name); 3407 3408 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) 3409 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) 3410 snprintf(xstats_names[count].name, 3411 sizeof(xstats_names[count].name), 3412 "tx_q%d_%s", i, 3413 ena_stats_tx_strings[stat].name); 3414 3415 return xstats_count; 3416 } 3417 3418 /** 3419 * DPDK callback to retrieve names of extended device statistics for the given 3420 * ids. 3421 * 3422 * @param dev 3423 * Pointer to Ethernet device structure. 3424 * @param[out] xstats_names 3425 * Buffer to insert names into. 3426 * @param ids 3427 * IDs array for which the names should be retrieved. 3428 * @param size 3429 * Number of ids. 3430 * 3431 * @return 3432 * Positive value: number of xstats names. Negative value: error code. 3433 */ 3434 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 3435 const uint64_t *ids, 3436 struct rte_eth_xstat_name *xstats_names, 3437 unsigned int size) 3438 { 3439 struct ena_adapter *adapter = dev->data->dev_private; 3440 uint64_t xstats_count = ena_xstats_calc_num(dev->data); 3441 uint64_t id, qid; 3442 unsigned int i; 3443 3444 if (xstats_names == NULL) 3445 return xstats_count; 3446 3447 for (i = 0; i < size; ++i) { 3448 id = ids[i]; 3449 if (id > xstats_count) { 3450 PMD_DRV_LOG(ERR, 3451 "ID value out of range: id=%" PRIu64 ", xstats_num=%" PRIu64 "\n", 3452 id, xstats_count); 3453 return -EINVAL; 3454 } 3455 3456 if (id < ENA_STATS_ARRAY_GLOBAL) { 3457 strcpy(xstats_names[i].name, 3458 ena_stats_global_strings[id].name); 3459 continue; 3460 } 3461 3462 id -= ENA_STATS_ARRAY_GLOBAL; 3463 if (id < adapter->metrics_num) { 3464 rte_strscpy(xstats_names[i].name, 3465 ena_stats_metrics_strings[id].name, 3466 RTE_ETH_XSTATS_NAME_SIZE); 3467 continue; 3468 } 3469 3470 id -= adapter->metrics_num; 3471 3472 if (id < ENA_STATS_ARRAY_ENA_SRD) { 3473 rte_strscpy(xstats_names[i].name, 3474 ena_stats_srd_strings[id].name, 3475 RTE_ETH_XSTATS_NAME_SIZE); 3476 continue; 3477 } 3478 id -= ENA_STATS_ARRAY_ENA_SRD; 3479 3480 if (id < ENA_STATS_ARRAY_RX) { 3481 qid = id / dev->data->nb_rx_queues; 3482 id %= dev->data->nb_rx_queues; 3483 snprintf(xstats_names[i].name, 3484 sizeof(xstats_names[i].name), 3485 "rx_q%" PRIu64 "d_%s", 3486 qid, ena_stats_rx_strings[id].name); 3487 continue; 3488 } 3489 3490 id -= ENA_STATS_ARRAY_RX; 3491 /* Although this condition is not needed, it was added for 3492 * compatibility if new xstat structure would be ever added. 3493 */ 3494 if (id < ENA_STATS_ARRAY_TX) { 3495 qid = id / dev->data->nb_tx_queues; 3496 id %= dev->data->nb_tx_queues; 3497 snprintf(xstats_names[i].name, 3498 sizeof(xstats_names[i].name), 3499 "tx_q%" PRIu64 "_%s", 3500 qid, ena_stats_tx_strings[id].name); 3501 continue; 3502 } 3503 } 3504 3505 return i; 3506 } 3507 3508 /** 3509 * DPDK callback to get extended device statistics. 3510 * 3511 * @param dev 3512 * Pointer to Ethernet device structure. 3513 * @param[out] stats 3514 * Stats table output buffer. 3515 * @param n 3516 * The size of the stats table. 3517 * 3518 * @return 3519 * Number of xstats on success, negative on failure. 3520 */ 3521 static int ena_xstats_get(struct rte_eth_dev *dev, 3522 struct rte_eth_xstat *xstats, 3523 unsigned int n) 3524 { 3525 struct ena_adapter *adapter = dev->data->dev_private; 3526 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3527 unsigned int stat, i, count = 0; 3528 int stat_offset; 3529 void *stats_begin; 3530 uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS]; 3531 struct ena_stats_srd srd_info = {0}; 3532 3533 if (n < xstats_count) 3534 return xstats_count; 3535 3536 if (!xstats) 3537 return 0; 3538 3539 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) { 3540 stat_offset = ena_stats_global_strings[stat].stat_offset; 3541 stats_begin = &adapter->dev_stats; 3542 3543 xstats[count].id = count; 3544 xstats[count].value = *((uint64_t *) 3545 ((char *)stats_begin + stat_offset)); 3546 } 3547 3548 ena_copy_customer_metrics(adapter, metrics_stats, adapter->metrics_num); 3549 stats_begin = metrics_stats; 3550 for (stat = 0; stat < adapter->metrics_num; stat++, count++) { 3551 stat_offset = ena_stats_metrics_strings[stat].stat_offset; 3552 3553 xstats[count].id = count; 3554 xstats[count].value = *((uint64_t *) 3555 ((char *)stats_begin + stat_offset)); 3556 } 3557 3558 ena_copy_ena_srd_info(adapter, &srd_info); 3559 stats_begin = &srd_info; 3560 for (stat = 0; stat < ENA_STATS_ARRAY_ENA_SRD; stat++, count++) { 3561 stat_offset = ena_stats_srd_strings[stat].stat_offset; 3562 xstats[count].id = count; 3563 xstats[count].value = *((uint64_t *) 3564 ((char *)stats_begin + stat_offset)); 3565 } 3566 3567 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) { 3568 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) { 3569 stat_offset = ena_stats_rx_strings[stat].stat_offset; 3570 stats_begin = &adapter->rx_ring[i].rx_stats; 3571 3572 xstats[count].id = count; 3573 xstats[count].value = *((uint64_t *) 3574 ((char *)stats_begin + stat_offset)); 3575 } 3576 } 3577 3578 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) { 3579 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) { 3580 stat_offset = ena_stats_tx_strings[stat].stat_offset; 3581 stats_begin = &adapter->tx_ring[i].rx_stats; 3582 3583 xstats[count].id = count; 3584 xstats[count].value = *((uint64_t *) 3585 ((char *)stats_begin + stat_offset)); 3586 } 3587 } 3588 3589 return count; 3590 } 3591 3592 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 3593 const uint64_t *ids, 3594 uint64_t *values, 3595 unsigned int n) 3596 { 3597 struct ena_adapter *adapter = dev->data->dev_private; 3598 uint64_t id; 3599 uint64_t rx_entries, tx_entries; 3600 unsigned int i; 3601 int qid; 3602 int valid = 0; 3603 bool were_metrics_copied = false; 3604 bool was_srd_info_copied = false; 3605 uint64_t metrics_stats[ENA_MAX_CUSTOMER_METRICS]; 3606 struct ena_stats_srd srd_info = {0}; 3607 3608 for (i = 0; i < n; ++i) { 3609 id = ids[i]; 3610 /* Check if id belongs to global statistics */ 3611 if (id < ENA_STATS_ARRAY_GLOBAL) { 3612 values[i] = *((uint64_t *)&adapter->dev_stats + id); 3613 ++valid; 3614 continue; 3615 } 3616 3617 /* Check if id belongs to ENI statistics */ 3618 id -= ENA_STATS_ARRAY_GLOBAL; 3619 if (id < adapter->metrics_num) { 3620 /* Avoid reading metrics multiple times in a single 3621 * function call, as it requires communication with the 3622 * admin queue. 3623 */ 3624 if (!were_metrics_copied) { 3625 were_metrics_copied = true; 3626 ena_copy_customer_metrics(adapter, 3627 metrics_stats, 3628 adapter->metrics_num); 3629 } 3630 3631 values[i] = *((uint64_t *)&metrics_stats + id); 3632 ++valid; 3633 continue; 3634 } 3635 3636 /* Check if id belongs to SRD info statistics */ 3637 id -= adapter->metrics_num; 3638 3639 if (id < ENA_STATS_ARRAY_ENA_SRD) { 3640 /* 3641 * Avoid reading srd info multiple times in a single 3642 * function call, as it requires communication with the 3643 * admin queue. 3644 */ 3645 if (!was_srd_info_copied) { 3646 was_srd_info_copied = true; 3647 ena_copy_ena_srd_info(adapter, &srd_info); 3648 } 3649 values[i] = *((uint64_t *)&adapter->srd_stats + id); 3650 ++valid; 3651 continue; 3652 } 3653 3654 /* Check if id belongs to rx queue statistics */ 3655 id -= ENA_STATS_ARRAY_ENA_SRD; 3656 3657 rx_entries = ENA_STATS_ARRAY_RX * dev->data->nb_rx_queues; 3658 if (id < rx_entries) { 3659 qid = id % dev->data->nb_rx_queues; 3660 id /= dev->data->nb_rx_queues; 3661 values[i] = *((uint64_t *) 3662 &adapter->rx_ring[qid].rx_stats + id); 3663 ++valid; 3664 continue; 3665 } 3666 /* Check if id belongs to rx queue statistics */ 3667 id -= rx_entries; 3668 tx_entries = ENA_STATS_ARRAY_TX * dev->data->nb_tx_queues; 3669 if (id < tx_entries) { 3670 qid = id % dev->data->nb_tx_queues; 3671 id /= dev->data->nb_tx_queues; 3672 values[i] = *((uint64_t *) 3673 &adapter->tx_ring[qid].tx_stats + id); 3674 ++valid; 3675 continue; 3676 } 3677 } 3678 3679 return valid; 3680 } 3681 3682 static int ena_process_uint_devarg(const char *key, 3683 const char *value, 3684 void *opaque) 3685 { 3686 struct ena_adapter *adapter = opaque; 3687 char *str_end; 3688 uint64_t uint64_value; 3689 3690 uint64_value = strtoull(value, &str_end, DECIMAL_BASE); 3691 if (value == str_end) { 3692 PMD_INIT_LOG(ERR, 3693 "Invalid value for key '%s'. Only uint values are accepted.\n", 3694 key); 3695 return -EINVAL; 3696 } 3697 3698 if (strcmp(key, ENA_DEVARG_MISS_TXC_TO) == 0) { 3699 if (uint64_value > ENA_MAX_TX_TIMEOUT_SECONDS) { 3700 PMD_INIT_LOG(ERR, 3701 "Tx timeout too high: %" PRIu64 " sec. Maximum allowed: %d sec.\n", 3702 uint64_value, ENA_MAX_TX_TIMEOUT_SECONDS); 3703 return -EINVAL; 3704 } else if (uint64_value == 0) { 3705 PMD_INIT_LOG(INFO, 3706 "Check for missing Tx completions has been disabled.\n"); 3707 adapter->missing_tx_completion_to = 3708 ENA_HW_HINTS_NO_TIMEOUT; 3709 } else { 3710 PMD_INIT_LOG(INFO, 3711 "Tx packet completion timeout set to %" PRIu64 " seconds.\n", 3712 uint64_value); 3713 adapter->missing_tx_completion_to = 3714 uint64_value * rte_get_timer_hz(); 3715 } 3716 } else if (strcmp(key, ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL) == 0) { 3717 if (uint64_value > ENA_MAX_CONTROL_PATH_POLL_INTERVAL_MSEC) { 3718 PMD_INIT_LOG(ERR, 3719 "Control path polling interval is too long: %" PRIu64 " msecs. " 3720 "Maximum allowed: %d msecs.\n", 3721 uint64_value, ENA_MAX_CONTROL_PATH_POLL_INTERVAL_MSEC); 3722 return -EINVAL; 3723 } else if (uint64_value == 0) { 3724 PMD_INIT_LOG(INFO, 3725 "Control path polling interval is set to zero. Operating in " 3726 "interrupt mode.\n"); 3727 adapter->control_path_poll_interval = 0; 3728 } else { 3729 PMD_INIT_LOG(INFO, 3730 "Control path polling interval is set to %" PRIu64 " msecs.\n", 3731 uint64_value); 3732 adapter->control_path_poll_interval = uint64_value * USEC_PER_MSEC; 3733 } 3734 } 3735 3736 return 0; 3737 } 3738 3739 static int ena_process_bool_devarg(const char *key, 3740 const char *value, 3741 void *opaque) 3742 { 3743 struct ena_adapter *adapter = opaque; 3744 bool bool_value; 3745 3746 /* Parse the value. */ 3747 if (strcmp(value, "1") == 0) { 3748 bool_value = true; 3749 } else if (strcmp(value, "0") == 0) { 3750 bool_value = false; 3751 } else { 3752 PMD_INIT_LOG(ERR, 3753 "Invalid value: '%s' for key '%s'. Accepted: '0' or '1'\n", 3754 value, key); 3755 return -EINVAL; 3756 } 3757 3758 /* Now, assign it to the proper adapter field. */ 3759 if (strcmp(key, ENA_DEVARG_LARGE_LLQ_HDR) == 0) 3760 adapter->use_large_llq_hdr = bool_value; 3761 else if (strcmp(key, ENA_DEVARG_NORMAL_LLQ_HDR) == 0) 3762 adapter->use_normal_llq_hdr = bool_value; 3763 else if (strcmp(key, ENA_DEVARG_ENABLE_LLQ) == 0) 3764 adapter->enable_llq = bool_value; 3765 3766 return 0; 3767 } 3768 3769 static int ena_parse_devargs(struct ena_adapter *adapter, 3770 struct rte_devargs *devargs) 3771 { 3772 static const char * const allowed_args[] = { 3773 ENA_DEVARG_LARGE_LLQ_HDR, 3774 ENA_DEVARG_NORMAL_LLQ_HDR, 3775 ENA_DEVARG_MISS_TXC_TO, 3776 ENA_DEVARG_ENABLE_LLQ, 3777 ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL, 3778 NULL, 3779 }; 3780 struct rte_kvargs *kvlist; 3781 int rc; 3782 3783 if (devargs == NULL) 3784 return 0; 3785 3786 kvlist = rte_kvargs_parse(devargs->args, allowed_args); 3787 if (kvlist == NULL) { 3788 PMD_INIT_LOG(ERR, "Invalid device arguments: %s\n", 3789 devargs->args); 3790 return -EINVAL; 3791 } 3792 3793 rc = rte_kvargs_process(kvlist, ENA_DEVARG_LARGE_LLQ_HDR, 3794 ena_process_bool_devarg, adapter); 3795 if (rc != 0) 3796 goto exit; 3797 rc = rte_kvargs_process(kvlist, ENA_DEVARG_NORMAL_LLQ_HDR, 3798 ena_process_bool_devarg, adapter); 3799 if (rc != 0) 3800 goto exit; 3801 rc = rte_kvargs_process(kvlist, ENA_DEVARG_MISS_TXC_TO, 3802 ena_process_uint_devarg, adapter); 3803 if (rc != 0) 3804 goto exit; 3805 rc = rte_kvargs_process(kvlist, ENA_DEVARG_ENABLE_LLQ, 3806 ena_process_bool_devarg, adapter); 3807 if (rc != 0) 3808 goto exit; 3809 rc = rte_kvargs_process(kvlist, ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL, 3810 ena_process_uint_devarg, adapter); 3811 if (rc != 0) 3812 goto exit; 3813 3814 exit: 3815 rte_kvargs_free(kvlist); 3816 3817 return rc; 3818 } 3819 3820 static int ena_setup_rx_intr(struct rte_eth_dev *dev) 3821 { 3822 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 3823 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 3824 int rc; 3825 uint16_t vectors_nb, i; 3826 bool rx_intr_requested = dev->data->dev_conf.intr_conf.rxq; 3827 3828 if (!rx_intr_requested) 3829 return 0; 3830 3831 if (!rte_intr_cap_multiple(intr_handle)) { 3832 PMD_DRV_LOG(ERR, 3833 "Rx interrupt requested, but it isn't supported by the PCI driver\n"); 3834 return -ENOTSUP; 3835 } 3836 3837 /* Disable interrupt mapping before the configuration starts. */ 3838 rte_intr_disable(intr_handle); 3839 3840 /* Verify if there are enough vectors available. */ 3841 vectors_nb = dev->data->nb_rx_queues; 3842 if (vectors_nb > RTE_MAX_RXTX_INTR_VEC_ID) { 3843 PMD_DRV_LOG(ERR, 3844 "Too many Rx interrupts requested, maximum number: %d\n", 3845 RTE_MAX_RXTX_INTR_VEC_ID); 3846 rc = -ENOTSUP; 3847 goto enable_intr; 3848 } 3849 3850 /* Allocate the vector list */ 3851 if (rte_intr_vec_list_alloc(intr_handle, "intr_vec", 3852 dev->data->nb_rx_queues)) { 3853 PMD_DRV_LOG(ERR, 3854 "Failed to allocate interrupt vector for %d queues\n", 3855 dev->data->nb_rx_queues); 3856 rc = -ENOMEM; 3857 goto enable_intr; 3858 } 3859 3860 rc = rte_intr_efd_enable(intr_handle, vectors_nb); 3861 if (rc != 0) 3862 goto free_intr_vec; 3863 3864 if (!rte_intr_allow_others(intr_handle)) { 3865 PMD_DRV_LOG(ERR, 3866 "Not enough interrupts available to use both ENA Admin and Rx interrupts\n"); 3867 goto disable_intr_efd; 3868 } 3869 3870 for (i = 0; i < vectors_nb; ++i) 3871 if (rte_intr_vec_list_index_set(intr_handle, i, 3872 RTE_INTR_VEC_RXTX_OFFSET + i)) 3873 goto disable_intr_efd; 3874 3875 rte_intr_enable(intr_handle); 3876 return 0; 3877 3878 disable_intr_efd: 3879 rte_intr_efd_disable(intr_handle); 3880 free_intr_vec: 3881 rte_intr_vec_list_free(intr_handle); 3882 enable_intr: 3883 rte_intr_enable(intr_handle); 3884 return rc; 3885 } 3886 3887 static void ena_rx_queue_intr_set(struct rte_eth_dev *dev, 3888 uint16_t queue_id, 3889 bool unmask) 3890 { 3891 struct ena_adapter *adapter = dev->data->dev_private; 3892 struct ena_ring *rxq = &adapter->rx_ring[queue_id]; 3893 struct ena_eth_io_intr_reg intr_reg; 3894 3895 ena_com_update_intr_reg(&intr_reg, 0, 0, unmask, 1); 3896 ena_com_unmask_intr(rxq->ena_com_io_cq, &intr_reg); 3897 } 3898 3899 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 3900 uint16_t queue_id) 3901 { 3902 ena_rx_queue_intr_set(dev, queue_id, true); 3903 3904 return 0; 3905 } 3906 3907 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 3908 uint16_t queue_id) 3909 { 3910 ena_rx_queue_intr_set(dev, queue_id, false); 3911 3912 return 0; 3913 } 3914 3915 static int ena_configure_aenq(struct ena_adapter *adapter) 3916 { 3917 uint32_t aenq_groups = adapter->all_aenq_groups; 3918 int rc; 3919 3920 /* All_aenq_groups holds all AENQ functions supported by the device and 3921 * the HW, so at first we need to be sure the LSC request is valid. 3922 */ 3923 if (adapter->edev_data->dev_conf.intr_conf.lsc != 0) { 3924 if (!(aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) { 3925 PMD_DRV_LOG(ERR, 3926 "LSC requested, but it's not supported by the AENQ\n"); 3927 return -EINVAL; 3928 } 3929 } else { 3930 /* If LSC wasn't enabled by the app, let's enable all supported 3931 * AENQ procedures except the LSC. 3932 */ 3933 aenq_groups &= ~BIT(ENA_ADMIN_LINK_CHANGE); 3934 } 3935 3936 rc = ena_com_set_aenq_config(&adapter->ena_dev, aenq_groups); 3937 if (rc != 0) { 3938 PMD_DRV_LOG(ERR, "Cannot configure AENQ groups, rc=%d\n", rc); 3939 return rc; 3940 } 3941 3942 adapter->active_aenq_groups = aenq_groups; 3943 3944 return 0; 3945 } 3946 3947 int ena_mp_indirect_table_set(struct ena_adapter *adapter) 3948 { 3949 return ENA_PROXY(adapter, ena_com_indirect_table_set, &adapter->ena_dev); 3950 } 3951 3952 int ena_mp_indirect_table_get(struct ena_adapter *adapter, 3953 uint32_t *indirect_table) 3954 { 3955 return ENA_PROXY(adapter, ena_com_indirect_table_get, &adapter->ena_dev, 3956 indirect_table); 3957 } 3958 3959 /********************************************************************* 3960 * ena_plat_dpdk.h functions implementations 3961 *********************************************************************/ 3962 3963 const struct rte_memzone * 3964 ena_mem_alloc_coherent(struct rte_eth_dev_data *data, size_t size, 3965 int socket_id, unsigned int alignment, void **virt_addr, 3966 dma_addr_t *phys_addr) 3967 { 3968 char z_name[RTE_MEMZONE_NAMESIZE]; 3969 struct ena_adapter *adapter = data->dev_private; 3970 const struct rte_memzone *memzone; 3971 int rc; 3972 3973 rc = snprintf(z_name, RTE_MEMZONE_NAMESIZE, "ena_p%d_mz%" PRIu64 "", 3974 data->port_id, adapter->memzone_cnt); 3975 if (rc >= RTE_MEMZONE_NAMESIZE) { 3976 PMD_DRV_LOG(ERR, 3977 "Name for the ena_com memzone is too long. Port: %d, mz_num: %" PRIu64 "\n", 3978 data->port_id, adapter->memzone_cnt); 3979 goto error; 3980 } 3981 adapter->memzone_cnt++; 3982 3983 memzone = rte_memzone_reserve_aligned(z_name, size, socket_id, 3984 RTE_MEMZONE_IOVA_CONTIG, alignment); 3985 if (memzone == NULL) { 3986 PMD_DRV_LOG(ERR, "Failed to allocate ena_com memzone: %s\n", 3987 z_name); 3988 goto error; 3989 } 3990 3991 memset(memzone->addr, 0, size); 3992 *virt_addr = memzone->addr; 3993 *phys_addr = memzone->iova; 3994 3995 return memzone; 3996 3997 error: 3998 *virt_addr = NULL; 3999 *phys_addr = 0; 4000 4001 return NULL; 4002 } 4003 4004 4005 /********************************************************************* 4006 * PMD configuration 4007 *********************************************************************/ 4008 static int eth_ena_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 4009 struct rte_pci_device *pci_dev) 4010 { 4011 return rte_eth_dev_pci_generic_probe(pci_dev, 4012 sizeof(struct ena_adapter), eth_ena_dev_init); 4013 } 4014 4015 static int eth_ena_pci_remove(struct rte_pci_device *pci_dev) 4016 { 4017 return rte_eth_dev_pci_generic_remove(pci_dev, eth_ena_dev_uninit); 4018 } 4019 4020 static struct rte_pci_driver rte_ena_pmd = { 4021 .id_table = pci_id_ena_map, 4022 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | 4023 RTE_PCI_DRV_WC_ACTIVATE, 4024 .probe = eth_ena_pci_probe, 4025 .remove = eth_ena_pci_remove, 4026 }; 4027 4028 RTE_PMD_REGISTER_PCI(net_ena, rte_ena_pmd); 4029 RTE_PMD_REGISTER_PCI_TABLE(net_ena, pci_id_ena_map); 4030 RTE_PMD_REGISTER_KMOD_DEP(net_ena, "* igb_uio | uio_pci_generic | vfio-pci"); 4031 RTE_PMD_REGISTER_PARAM_STRING(net_ena, 4032 ENA_DEVARG_LARGE_LLQ_HDR "=<0|1> " 4033 ENA_DEVARG_NORMAL_LLQ_HDR "=<0|1> " 4034 ENA_DEVARG_ENABLE_LLQ "=<0|1> " 4035 ENA_DEVARG_MISS_TXC_TO "=<uint>" 4036 ENA_DEVARG_CONTROL_PATH_POLL_INTERVAL "=<0-1000>"); 4037 RTE_LOG_REGISTER_SUFFIX(ena_logtype_init, init, NOTICE); 4038 RTE_LOG_REGISTER_SUFFIX(ena_logtype_driver, driver, NOTICE); 4039 #ifdef RTE_ETHDEV_DEBUG_RX 4040 RTE_LOG_REGISTER_SUFFIX(ena_logtype_rx, rx, DEBUG); 4041 #endif 4042 #ifdef RTE_ETHDEV_DEBUG_TX 4043 RTE_LOG_REGISTER_SUFFIX(ena_logtype_tx, tx, DEBUG); 4044 #endif 4045 RTE_LOG_REGISTER_SUFFIX(ena_logtype_com, com, WARNING); 4046 4047 /****************************************************************************** 4048 ******************************** AENQ Handlers ******************************* 4049 *****************************************************************************/ 4050 static void ena_update_on_link_change(void *adapter_data, 4051 struct ena_admin_aenq_entry *aenq_e) 4052 { 4053 struct rte_eth_dev *eth_dev = adapter_data; 4054 struct ena_adapter *adapter = eth_dev->data->dev_private; 4055 struct ena_admin_aenq_link_change_desc *aenq_link_desc; 4056 uint32_t status; 4057 4058 aenq_link_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 4059 4060 status = get_ena_admin_aenq_link_change_desc_link_status(aenq_link_desc); 4061 adapter->link_status = status; 4062 4063 ena_link_update(eth_dev, 0); 4064 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 4065 } 4066 4067 static void ena_notification(void *adapter_data, 4068 struct ena_admin_aenq_entry *aenq_e) 4069 { 4070 struct rte_eth_dev *eth_dev = adapter_data; 4071 struct ena_adapter *adapter = eth_dev->data->dev_private; 4072 struct ena_admin_ena_hw_hints *hints; 4073 4074 if (aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION) 4075 PMD_DRV_LOG(WARNING, "Invalid AENQ group: %x. Expected: %x\n", 4076 aenq_e->aenq_common_desc.group, 4077 ENA_ADMIN_NOTIFICATION); 4078 4079 switch (aenq_e->aenq_common_desc.syndrome) { 4080 case ENA_ADMIN_UPDATE_HINTS: 4081 hints = (struct ena_admin_ena_hw_hints *) 4082 (&aenq_e->inline_data_w4); 4083 ena_update_hints(adapter, hints); 4084 break; 4085 default: 4086 PMD_DRV_LOG(ERR, "Invalid AENQ notification link state: %d\n", 4087 aenq_e->aenq_common_desc.syndrome); 4088 } 4089 } 4090 4091 static void ena_keep_alive(void *adapter_data, 4092 __rte_unused struct ena_admin_aenq_entry *aenq_e) 4093 { 4094 struct rte_eth_dev *eth_dev = adapter_data; 4095 struct ena_adapter *adapter = eth_dev->data->dev_private; 4096 struct ena_admin_aenq_keep_alive_desc *desc; 4097 uint64_t rx_drops; 4098 uint64_t tx_drops; 4099 uint64_t rx_overruns; 4100 4101 adapter->timestamp_wd = rte_get_timer_cycles(); 4102 4103 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 4104 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 4105 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 4106 rx_overruns = ((uint64_t)desc->rx_overruns_high << 32) | desc->rx_overruns_low; 4107 4108 /* 4109 * Depending on its acceleration support, the device updates a different statistic when 4110 * Rx packet is dropped because there are no available buffers to accommodate it. 4111 */ 4112 adapter->drv_stats->rx_drops = rx_drops + rx_overruns; 4113 adapter->dev_stats.tx_drops = tx_drops; 4114 } 4115 4116 static void ena_suboptimal_configuration(__rte_unused void *adapter_data, 4117 struct ena_admin_aenq_entry *aenq_e) 4118 { 4119 struct ena_admin_aenq_conf_notifications_desc *desc; 4120 int bit, num_bits; 4121 4122 desc = (struct ena_admin_aenq_conf_notifications_desc *)aenq_e; 4123 num_bits = BITS_PER_TYPE(desc->notifications_bitmap); 4124 for (bit = 0; bit < num_bits; bit++) { 4125 if (desc->notifications_bitmap & RTE_BIT64(bit)) { 4126 PMD_DRV_LOG(WARNING, 4127 "Sub-optimal configuration notification code: %d\n", bit + 1); 4128 } 4129 } 4130 } 4131 4132 /** 4133 * This handler will called for unknown event group or unimplemented handlers 4134 **/ 4135 static void unimplemented_aenq_handler(__rte_unused void *data, 4136 __rte_unused struct ena_admin_aenq_entry *aenq_e) 4137 { 4138 PMD_DRV_LOG(ERR, 4139 "Unknown event was received or event with unimplemented handler\n"); 4140 } 4141 4142 static struct ena_aenq_handlers aenq_handlers = { 4143 .handlers = { 4144 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 4145 [ENA_ADMIN_NOTIFICATION] = ena_notification, 4146 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive, 4147 [ENA_ADMIN_CONF_NOTIFICATIONS] = ena_suboptimal_configuration 4148 }, 4149 .unimplemented_handler = unimplemented_aenq_handler 4150 }; 4151 4152 /********************************************************************* 4153 * Multi-Process communication request handling (in primary) 4154 *********************************************************************/ 4155 static int 4156 ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer) 4157 { 4158 const struct ena_mp_body *req = 4159 (const struct ena_mp_body *)mp_msg->param; 4160 struct ena_adapter *adapter; 4161 struct ena_com_dev *ena_dev; 4162 struct ena_mp_body *rsp; 4163 struct rte_mp_msg mp_rsp; 4164 struct rte_eth_dev *dev; 4165 int res = 0; 4166 4167 rsp = (struct ena_mp_body *)&mp_rsp.param; 4168 mp_msg_init(&mp_rsp, req->type, req->port_id); 4169 4170 if (!rte_eth_dev_is_valid_port(req->port_id)) { 4171 rte_errno = ENODEV; 4172 res = -rte_errno; 4173 PMD_DRV_LOG(ERR, "Unknown port %d in request %d\n", 4174 req->port_id, req->type); 4175 goto end; 4176 } 4177 dev = &rte_eth_devices[req->port_id]; 4178 adapter = dev->data->dev_private; 4179 ena_dev = &adapter->ena_dev; 4180 4181 switch (req->type) { 4182 case ENA_MP_DEV_STATS_GET: 4183 res = ena_com_get_dev_basic_stats(ena_dev, 4184 &adapter->basic_stats); 4185 break; 4186 case ENA_MP_ENI_STATS_GET: 4187 res = ena_com_get_eni_stats(ena_dev, 4188 (struct ena_admin_eni_stats *)&adapter->metrics_stats); 4189 break; 4190 case ENA_MP_MTU_SET: 4191 res = ena_com_set_dev_mtu(ena_dev, req->args.mtu); 4192 break; 4193 case ENA_MP_IND_TBL_GET: 4194 res = ena_com_indirect_table_get(ena_dev, 4195 adapter->indirect_table); 4196 break; 4197 case ENA_MP_IND_TBL_SET: 4198 res = ena_com_indirect_table_set(ena_dev); 4199 break; 4200 case ENA_MP_CUSTOMER_METRICS_GET: 4201 res = ena_com_get_customer_metrics(ena_dev, 4202 (char *)adapter->metrics_stats, 4203 adapter->metrics_num * sizeof(uint64_t)); 4204 break; 4205 case ENA_MP_SRD_STATS_GET: 4206 res = ena_com_get_ena_srd_info(ena_dev, 4207 (struct ena_admin_ena_srd_info *)&adapter->srd_stats); 4208 break; 4209 default: 4210 PMD_DRV_LOG(ERR, "Unknown request type %d\n", req->type); 4211 res = -EINVAL; 4212 break; 4213 } 4214 4215 end: 4216 /* Save processing result in the reply */ 4217 rsp->result = res; 4218 /* Return just IPC processing status */ 4219 return rte_mp_reply(&mp_rsp, peer); 4220 } 4221 4222 static ena_llq_policy ena_define_llq_hdr_policy(struct ena_adapter *adapter) 4223 { 4224 if (!adapter->enable_llq) 4225 return ENA_LLQ_POLICY_DISABLED; 4226 if (adapter->use_large_llq_hdr) 4227 return ENA_LLQ_POLICY_LARGE; 4228 if (adapter->use_normal_llq_hdr) 4229 return ENA_LLQ_POLICY_NORMAL; 4230 return ENA_LLQ_POLICY_RECOMMENDED; 4231 } 4232 4233 static bool ena_use_large_llq_hdr(struct ena_adapter *adapter, uint8_t recommended_entry_size) 4234 { 4235 if (adapter->llq_header_policy == ENA_LLQ_POLICY_LARGE) { 4236 return true; 4237 } else if (adapter->llq_header_policy == ENA_LLQ_POLICY_RECOMMENDED) { 4238 PMD_DRV_LOG(INFO, "Recommended device entry size policy %u\n", 4239 recommended_entry_size); 4240 if (recommended_entry_size == ENA_ADMIN_LIST_ENTRY_SIZE_256B) 4241 return true; 4242 } 4243 return false; 4244 } 4245