1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 3 * All rights reserved. 4 */ 5 6 #include <rte_string_fns.h> 7 #include <rte_errno.h> 8 #include <rte_version.h> 9 #include <rte_net.h> 10 #include <rte_kvargs.h> 11 12 #include "ena_ethdev.h" 13 #include "ena_logs.h" 14 #include "ena_platform.h" 15 #include "ena_com.h" 16 #include "ena_eth_com.h" 17 18 #include <ena_common_defs.h> 19 #include <ena_regs_defs.h> 20 #include <ena_admin_defs.h> 21 #include <ena_eth_io_defs.h> 22 23 #define DRV_MODULE_VER_MAJOR 2 24 #define DRV_MODULE_VER_MINOR 7 25 #define DRV_MODULE_VER_SUBMINOR 0 26 27 #define __MERGE_64B_H_L(h, l) (((uint64_t)h << 32) | l) 28 29 #define GET_L4_HDR_LEN(mbuf) \ 30 ((rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, \ 31 mbuf->l3_len + mbuf->l2_len)->data_off) >> 4) 32 33 #define ETH_GSTRING_LEN 32 34 35 #define ARRAY_SIZE(x) RTE_DIM(x) 36 37 #define ENA_MIN_RING_DESC 128 38 39 /* 40 * We should try to keep ENA_CLEANUP_BUF_SIZE lower than 41 * RTE_MEMPOOL_CACHE_MAX_SIZE, so we can fit this in mempool local cache. 42 */ 43 #define ENA_CLEANUP_BUF_SIZE 256 44 45 #define ENA_PTYPE_HAS_HASH (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP) 46 47 struct ena_stats { 48 char name[ETH_GSTRING_LEN]; 49 int stat_offset; 50 }; 51 52 #define ENA_STAT_ENTRY(stat, stat_type) { \ 53 .name = #stat, \ 54 .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ 55 } 56 57 #define ENA_STAT_RX_ENTRY(stat) \ 58 ENA_STAT_ENTRY(stat, rx) 59 60 #define ENA_STAT_TX_ENTRY(stat) \ 61 ENA_STAT_ENTRY(stat, tx) 62 63 #define ENA_STAT_ENI_ENTRY(stat) \ 64 ENA_STAT_ENTRY(stat, eni) 65 66 #define ENA_STAT_GLOBAL_ENTRY(stat) \ 67 ENA_STAT_ENTRY(stat, dev) 68 69 /* Device arguments */ 70 #define ENA_DEVARG_LARGE_LLQ_HDR "large_llq_hdr" 71 /* Timeout in seconds after which a single uncompleted Tx packet should be 72 * considered as a missing. 73 */ 74 #define ENA_DEVARG_MISS_TXC_TO "miss_txc_to" 75 /* 76 * Controls whether LLQ should be used (if available). Enabled by default. 77 * NOTE: It's highly not recommended to disable the LLQ, as it may lead to a 78 * huge performance degradation on 6th generation AWS instances. 79 */ 80 #define ENA_DEVARG_ENABLE_LLQ "enable_llq" 81 82 /* 83 * Each rte_memzone should have unique name. 84 * To satisfy it, count number of allocation and add it to name. 85 */ 86 rte_atomic64_t ena_alloc_cnt; 87 88 static const struct ena_stats ena_stats_global_strings[] = { 89 ENA_STAT_GLOBAL_ENTRY(wd_expired), 90 ENA_STAT_GLOBAL_ENTRY(dev_start), 91 ENA_STAT_GLOBAL_ENTRY(dev_stop), 92 ENA_STAT_GLOBAL_ENTRY(tx_drops), 93 }; 94 95 static const struct ena_stats ena_stats_eni_strings[] = { 96 ENA_STAT_ENI_ENTRY(bw_in_allowance_exceeded), 97 ENA_STAT_ENI_ENTRY(bw_out_allowance_exceeded), 98 ENA_STAT_ENI_ENTRY(pps_allowance_exceeded), 99 ENA_STAT_ENI_ENTRY(conntrack_allowance_exceeded), 100 ENA_STAT_ENI_ENTRY(linklocal_allowance_exceeded), 101 }; 102 103 static const struct ena_stats ena_stats_tx_strings[] = { 104 ENA_STAT_TX_ENTRY(cnt), 105 ENA_STAT_TX_ENTRY(bytes), 106 ENA_STAT_TX_ENTRY(prepare_ctx_err), 107 ENA_STAT_TX_ENTRY(tx_poll), 108 ENA_STAT_TX_ENTRY(doorbells), 109 ENA_STAT_TX_ENTRY(bad_req_id), 110 ENA_STAT_TX_ENTRY(available_desc), 111 ENA_STAT_TX_ENTRY(missed_tx), 112 }; 113 114 static const struct ena_stats ena_stats_rx_strings[] = { 115 ENA_STAT_RX_ENTRY(cnt), 116 ENA_STAT_RX_ENTRY(bytes), 117 ENA_STAT_RX_ENTRY(refill_partial), 118 ENA_STAT_RX_ENTRY(l3_csum_bad), 119 ENA_STAT_RX_ENTRY(l4_csum_bad), 120 ENA_STAT_RX_ENTRY(l4_csum_good), 121 ENA_STAT_RX_ENTRY(mbuf_alloc_fail), 122 ENA_STAT_RX_ENTRY(bad_desc_num), 123 ENA_STAT_RX_ENTRY(bad_req_id), 124 }; 125 126 #define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) 127 #define ENA_STATS_ARRAY_ENI ARRAY_SIZE(ena_stats_eni_strings) 128 #define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) 129 #define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) 130 131 #define QUEUE_OFFLOADS (RTE_ETH_TX_OFFLOAD_TCP_CKSUM |\ 132 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |\ 133 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |\ 134 RTE_ETH_TX_OFFLOAD_TCP_TSO) 135 #define MBUF_OFFLOADS (RTE_MBUF_F_TX_L4_MASK |\ 136 RTE_MBUF_F_TX_IP_CKSUM |\ 137 RTE_MBUF_F_TX_TCP_SEG) 138 139 /** Vendor ID used by Amazon devices */ 140 #define PCI_VENDOR_ID_AMAZON 0x1D0F 141 /** Amazon devices */ 142 #define PCI_DEVICE_ID_ENA_VF 0xEC20 143 #define PCI_DEVICE_ID_ENA_VF_RSERV0 0xEC21 144 145 #define ENA_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_L4_MASK | \ 146 RTE_MBUF_F_TX_IPV6 | \ 147 RTE_MBUF_F_TX_IPV4 | \ 148 RTE_MBUF_F_TX_IP_CKSUM | \ 149 RTE_MBUF_F_TX_TCP_SEG) 150 151 #define ENA_TX_OFFLOAD_NOTSUP_MASK \ 152 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) 153 154 /** HW specific offloads capabilities. */ 155 /* IPv4 checksum offload. */ 156 #define ENA_L3_IPV4_CSUM 0x0001 157 /* TCP/UDP checksum offload for IPv4 packets. */ 158 #define ENA_L4_IPV4_CSUM 0x0002 159 /* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */ 160 #define ENA_L4_IPV4_CSUM_PARTIAL 0x0004 161 /* TCP/UDP checksum offload for IPv6 packets. */ 162 #define ENA_L4_IPV6_CSUM 0x0008 163 /* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */ 164 #define ENA_L4_IPV6_CSUM_PARTIAL 0x0010 165 /* TSO support for IPv4 packets. */ 166 #define ENA_IPV4_TSO 0x0020 167 168 /* Device supports setting RSS hash. */ 169 #define ENA_RX_RSS_HASH 0x0040 170 171 static const struct rte_pci_id pci_id_ena_map[] = { 172 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, 173 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) }, 174 { .device_id = 0 }, 175 }; 176 177 static struct ena_aenq_handlers aenq_handlers; 178 179 static int ena_device_init(struct ena_adapter *adapter, 180 struct rte_pci_device *pdev, 181 struct ena_com_dev_get_features_ctx *get_feat_ctx); 182 static int ena_dev_configure(struct rte_eth_dev *dev); 183 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 184 struct ena_tx_buffer *tx_info, 185 struct rte_mbuf *mbuf, 186 void **push_header, 187 uint16_t *header_len); 188 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf); 189 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt); 190 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 191 uint16_t nb_pkts); 192 static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 193 uint16_t nb_pkts); 194 static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 195 uint16_t nb_desc, unsigned int socket_id, 196 const struct rte_eth_txconf *tx_conf); 197 static int ena_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 198 uint16_t nb_desc, unsigned int socket_id, 199 const struct rte_eth_rxconf *rx_conf, 200 struct rte_mempool *mp); 201 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len); 202 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 203 struct ena_com_rx_buf_info *ena_bufs, 204 uint32_t descs, 205 uint16_t *next_to_clean, 206 uint8_t offset); 207 static uint16_t eth_ena_recv_pkts(void *rx_queue, 208 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 209 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 210 struct rte_mbuf *mbuf, uint16_t id); 211 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count); 212 static void ena_init_rings(struct ena_adapter *adapter, 213 bool disable_meta_caching); 214 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); 215 static int ena_start(struct rte_eth_dev *dev); 216 static int ena_stop(struct rte_eth_dev *dev); 217 static int ena_close(struct rte_eth_dev *dev); 218 static int ena_dev_reset(struct rte_eth_dev *dev); 219 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); 220 static void ena_rx_queue_release_all(struct rte_eth_dev *dev); 221 static void ena_tx_queue_release_all(struct rte_eth_dev *dev); 222 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 223 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 224 static void ena_rx_queue_release_bufs(struct ena_ring *ring); 225 static void ena_tx_queue_release_bufs(struct ena_ring *ring); 226 static int ena_link_update(struct rte_eth_dev *dev, 227 int wait_to_complete); 228 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring); 229 static void ena_queue_stop(struct ena_ring *ring); 230 static void ena_queue_stop_all(struct rte_eth_dev *dev, 231 enum ena_ring_type ring_type); 232 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring); 233 static int ena_queue_start_all(struct rte_eth_dev *dev, 234 enum ena_ring_type ring_type); 235 static void ena_stats_restart(struct rte_eth_dev *dev); 236 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter); 237 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter); 238 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter); 239 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter); 240 static int ena_infos_get(struct rte_eth_dev *dev, 241 struct rte_eth_dev_info *dev_info); 242 static void ena_interrupt_handler_rte(void *cb_arg); 243 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg); 244 static void ena_destroy_device(struct rte_eth_dev *eth_dev); 245 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev); 246 static int ena_xstats_get_names(struct rte_eth_dev *dev, 247 struct rte_eth_xstat_name *xstats_names, 248 unsigned int n); 249 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 250 const uint64_t *ids, 251 struct rte_eth_xstat_name *xstats_names, 252 unsigned int size); 253 static int ena_xstats_get(struct rte_eth_dev *dev, 254 struct rte_eth_xstat *stats, 255 unsigned int n); 256 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 257 const uint64_t *ids, 258 uint64_t *values, 259 unsigned int n); 260 static int ena_process_bool_devarg(const char *key, 261 const char *value, 262 void *opaque); 263 static int ena_parse_devargs(struct ena_adapter *adapter, 264 struct rte_devargs *devargs); 265 static int ena_copy_eni_stats(struct ena_adapter *adapter, 266 struct ena_stats_eni *stats); 267 static int ena_setup_rx_intr(struct rte_eth_dev *dev); 268 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 269 uint16_t queue_id); 270 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 271 uint16_t queue_id); 272 static int ena_configure_aenq(struct ena_adapter *adapter); 273 static int ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, 274 const void *peer); 275 276 static const struct eth_dev_ops ena_dev_ops = { 277 .dev_configure = ena_dev_configure, 278 .dev_infos_get = ena_infos_get, 279 .rx_queue_setup = ena_rx_queue_setup, 280 .tx_queue_setup = ena_tx_queue_setup, 281 .dev_start = ena_start, 282 .dev_stop = ena_stop, 283 .link_update = ena_link_update, 284 .stats_get = ena_stats_get, 285 .xstats_get_names = ena_xstats_get_names, 286 .xstats_get_names_by_id = ena_xstats_get_names_by_id, 287 .xstats_get = ena_xstats_get, 288 .xstats_get_by_id = ena_xstats_get_by_id, 289 .mtu_set = ena_mtu_set, 290 .rx_queue_release = ena_rx_queue_release, 291 .tx_queue_release = ena_tx_queue_release, 292 .dev_close = ena_close, 293 .dev_reset = ena_dev_reset, 294 .reta_update = ena_rss_reta_update, 295 .reta_query = ena_rss_reta_query, 296 .rx_queue_intr_enable = ena_rx_queue_intr_enable, 297 .rx_queue_intr_disable = ena_rx_queue_intr_disable, 298 .rss_hash_update = ena_rss_hash_update, 299 .rss_hash_conf_get = ena_rss_hash_conf_get, 300 .tx_done_cleanup = ena_tx_cleanup, 301 }; 302 303 /********************************************************************* 304 * Multi-Process communication bits 305 *********************************************************************/ 306 /* rte_mp IPC message name */ 307 #define ENA_MP_NAME "net_ena_mp" 308 /* Request timeout in seconds */ 309 #define ENA_MP_REQ_TMO 5 310 311 /** Proxy request type */ 312 enum ena_mp_req { 313 ENA_MP_DEV_STATS_GET, 314 ENA_MP_ENI_STATS_GET, 315 ENA_MP_MTU_SET, 316 ENA_MP_IND_TBL_GET, 317 ENA_MP_IND_TBL_SET 318 }; 319 320 /** Proxy message body. Shared between requests and responses. */ 321 struct ena_mp_body { 322 /* Message type */ 323 enum ena_mp_req type; 324 int port_id; 325 /* Processing result. Set in replies. 0 if message succeeded, negative 326 * error code otherwise. 327 */ 328 int result; 329 union { 330 int mtu; /* For ENA_MP_MTU_SET */ 331 } args; 332 }; 333 334 /** 335 * Initialize IPC message. 336 * 337 * @param[out] msg 338 * Pointer to the message to initialize. 339 * @param[in] type 340 * Message type. 341 * @param[in] port_id 342 * Port ID of target device. 343 * 344 */ 345 static void 346 mp_msg_init(struct rte_mp_msg *msg, enum ena_mp_req type, int port_id) 347 { 348 struct ena_mp_body *body = (struct ena_mp_body *)&msg->param; 349 350 memset(msg, 0, sizeof(*msg)); 351 strlcpy(msg->name, ENA_MP_NAME, sizeof(msg->name)); 352 msg->len_param = sizeof(*body); 353 body->type = type; 354 body->port_id = port_id; 355 } 356 357 /********************************************************************* 358 * Multi-Process communication PMD API 359 *********************************************************************/ 360 /** 361 * Define proxy request descriptor 362 * 363 * Used to define all structures and functions required for proxying a given 364 * function to the primary process including the code to perform to prepare the 365 * request and process the response. 366 * 367 * @param[in] f 368 * Name of the function to proxy 369 * @param[in] t 370 * Message type to use 371 * @param[in] prep 372 * Body of a function to prepare the request in form of a statement 373 * expression. It is passed all the original function arguments along with two 374 * extra ones: 375 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 376 * - struct ena_mp_body *req - body of a request to prepare. 377 * @param[in] proc 378 * Body of a function to process the response in form of a statement 379 * expression. It is passed all the original function arguments along with two 380 * extra ones: 381 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 382 * - struct ena_mp_body *rsp - body of a response to process. 383 * @param ... 384 * Proxied function's arguments 385 * 386 * @note Inside prep and proc any parameters which aren't used should be marked 387 * as such (with ENA_TOUCH or __rte_unused). 388 */ 389 #define ENA_PROXY_DESC(f, t, prep, proc, ...) \ 390 static const enum ena_mp_req mp_type_ ## f = t; \ 391 static const char *mp_name_ ## f = #t; \ 392 static void mp_prep_ ## f(struct ena_adapter *adapter, \ 393 struct ena_mp_body *req, \ 394 __VA_ARGS__) \ 395 { \ 396 prep; \ 397 } \ 398 static void mp_proc_ ## f(struct ena_adapter *adapter, \ 399 struct ena_mp_body *rsp, \ 400 __VA_ARGS__) \ 401 { \ 402 proc; \ 403 } 404 405 /** 406 * Proxy wrapper for calling primary functions in a secondary process. 407 * 408 * Depending on whether called in primary or secondary process, calls the 409 * @p func directly or proxies the call to the primary process via rte_mp IPC. 410 * This macro requires a proxy request descriptor to be defined for @p func 411 * using ENA_PROXY_DESC() macro. 412 * 413 * @param[in/out] a 414 * Device PMD data. Used for sending the message and sharing message results 415 * between primary and secondary. 416 * @param[in] f 417 * Function to proxy. 418 * @param ... 419 * Arguments of @p func. 420 * 421 * @return 422 * - 0: Processing succeeded and response handler was called. 423 * - -EPERM: IPC is unavailable on this platform. This means only primary 424 * process may call the proxied function. 425 * - -EIO: IPC returned error on request send. Inspect rte_errno detailed 426 * error code. 427 * - Negative error code from the proxied function. 428 * 429 * @note This mechanism is geared towards control-path tasks. Avoid calling it 430 * in fast-path unless unbound delays are allowed. This is due to the IPC 431 * mechanism itself (socket based). 432 * @note Due to IPC parameter size limitations the proxy logic shares call 433 * results through the struct ena_adapter shared memory. This makes the 434 * proxy mechanism strictly single-threaded. Therefore be sure to make all 435 * calls to the same proxied function under the same lock. 436 */ 437 #define ENA_PROXY(a, f, ...) \ 438 ({ \ 439 struct ena_adapter *_a = (a); \ 440 struct timespec ts = { .tv_sec = ENA_MP_REQ_TMO }; \ 441 struct ena_mp_body *req, *rsp; \ 442 struct rte_mp_reply mp_rep; \ 443 struct rte_mp_msg mp_req; \ 444 int ret; \ 445 \ 446 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { \ 447 ret = f(__VA_ARGS__); \ 448 } else { \ 449 /* Prepare and send request */ \ 450 req = (struct ena_mp_body *)&mp_req.param; \ 451 mp_msg_init(&mp_req, mp_type_ ## f, _a->edev_data->port_id); \ 452 mp_prep_ ## f(_a, req, ## __VA_ARGS__); \ 453 \ 454 ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); \ 455 if (likely(!ret)) { \ 456 RTE_ASSERT(mp_rep.nb_received == 1); \ 457 rsp = (struct ena_mp_body *)&mp_rep.msgs[0].param; \ 458 ret = rsp->result; \ 459 if (ret == 0) { \ 460 mp_proc_##f(_a, rsp, ## __VA_ARGS__); \ 461 } else { \ 462 PMD_DRV_LOG(ERR, \ 463 "%s returned error: %d\n", \ 464 mp_name_ ## f, rsp->result);\ 465 } \ 466 free(mp_rep.msgs); \ 467 } else if (rte_errno == ENOTSUP) { \ 468 PMD_DRV_LOG(ERR, \ 469 "No IPC, can't proxy to primary\n");\ 470 ret = -rte_errno; \ 471 } else { \ 472 PMD_DRV_LOG(ERR, "Request %s failed: %s\n", \ 473 mp_name_ ## f, \ 474 rte_strerror(rte_errno)); \ 475 ret = -EIO; \ 476 } \ 477 } \ 478 ret; \ 479 }) 480 481 /********************************************************************* 482 * Multi-Process communication request descriptors 483 *********************************************************************/ 484 485 ENA_PROXY_DESC(ena_com_get_dev_basic_stats, ENA_MP_DEV_STATS_GET, 486 ({ 487 ENA_TOUCH(adapter); 488 ENA_TOUCH(req); 489 ENA_TOUCH(ena_dev); 490 ENA_TOUCH(stats); 491 }), 492 ({ 493 ENA_TOUCH(rsp); 494 ENA_TOUCH(ena_dev); 495 if (stats != &adapter->basic_stats) 496 rte_memcpy(stats, &adapter->basic_stats, sizeof(*stats)); 497 }), 498 struct ena_com_dev *ena_dev, struct ena_admin_basic_stats *stats); 499 500 ENA_PROXY_DESC(ena_com_get_eni_stats, ENA_MP_ENI_STATS_GET, 501 ({ 502 ENA_TOUCH(adapter); 503 ENA_TOUCH(req); 504 ENA_TOUCH(ena_dev); 505 ENA_TOUCH(stats); 506 }), 507 ({ 508 ENA_TOUCH(rsp); 509 ENA_TOUCH(ena_dev); 510 if (stats != (struct ena_admin_eni_stats *)&adapter->eni_stats) 511 rte_memcpy(stats, &adapter->eni_stats, sizeof(*stats)); 512 }), 513 struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats); 514 515 ENA_PROXY_DESC(ena_com_set_dev_mtu, ENA_MP_MTU_SET, 516 ({ 517 ENA_TOUCH(adapter); 518 ENA_TOUCH(ena_dev); 519 req->args.mtu = mtu; 520 }), 521 ({ 522 ENA_TOUCH(adapter); 523 ENA_TOUCH(rsp); 524 ENA_TOUCH(ena_dev); 525 ENA_TOUCH(mtu); 526 }), 527 struct ena_com_dev *ena_dev, int mtu); 528 529 ENA_PROXY_DESC(ena_com_indirect_table_set, ENA_MP_IND_TBL_SET, 530 ({ 531 ENA_TOUCH(adapter); 532 ENA_TOUCH(req); 533 ENA_TOUCH(ena_dev); 534 }), 535 ({ 536 ENA_TOUCH(adapter); 537 ENA_TOUCH(rsp); 538 ENA_TOUCH(ena_dev); 539 }), 540 struct ena_com_dev *ena_dev); 541 542 ENA_PROXY_DESC(ena_com_indirect_table_get, ENA_MP_IND_TBL_GET, 543 ({ 544 ENA_TOUCH(adapter); 545 ENA_TOUCH(req); 546 ENA_TOUCH(ena_dev); 547 ENA_TOUCH(ind_tbl); 548 }), 549 ({ 550 ENA_TOUCH(rsp); 551 ENA_TOUCH(ena_dev); 552 if (ind_tbl != adapter->indirect_table) 553 rte_memcpy(ind_tbl, adapter->indirect_table, 554 sizeof(adapter->indirect_table)); 555 }), 556 struct ena_com_dev *ena_dev, u32 *ind_tbl); 557 558 static inline void ena_trigger_reset(struct ena_adapter *adapter, 559 enum ena_regs_reset_reason_types reason) 560 { 561 if (likely(!adapter->trigger_reset)) { 562 adapter->reset_reason = reason; 563 adapter->trigger_reset = true; 564 } 565 } 566 567 static inline void ena_rx_mbuf_prepare(struct ena_ring *rx_ring, 568 struct rte_mbuf *mbuf, 569 struct ena_com_rx_ctx *ena_rx_ctx, 570 bool fill_hash) 571 { 572 struct ena_stats_rx *rx_stats = &rx_ring->rx_stats; 573 uint64_t ol_flags = 0; 574 uint32_t packet_type = 0; 575 576 if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) 577 packet_type |= RTE_PTYPE_L4_TCP; 578 else if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP) 579 packet_type |= RTE_PTYPE_L4_UDP; 580 581 if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) { 582 packet_type |= RTE_PTYPE_L3_IPV4; 583 if (unlikely(ena_rx_ctx->l3_csum_err)) { 584 ++rx_stats->l3_csum_bad; 585 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 586 } else { 587 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 588 } 589 } else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6) { 590 packet_type |= RTE_PTYPE_L3_IPV6; 591 } 592 593 if (!ena_rx_ctx->l4_csum_checked || ena_rx_ctx->frag) { 594 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 595 } else { 596 if (unlikely(ena_rx_ctx->l4_csum_err)) { 597 ++rx_stats->l4_csum_bad; 598 /* 599 * For the L4 Rx checksum offload the HW may indicate 600 * bad checksum although it's valid. Because of that, 601 * we're setting the UNKNOWN flag to let the app 602 * re-verify the checksum. 603 */ 604 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 605 } else { 606 ++rx_stats->l4_csum_good; 607 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 608 } 609 } 610 611 if (fill_hash && 612 likely((packet_type & ENA_PTYPE_HAS_HASH) && !ena_rx_ctx->frag)) { 613 ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 614 mbuf->hash.rss = ena_rx_ctx->hash; 615 } 616 617 mbuf->ol_flags = ol_flags; 618 mbuf->packet_type = packet_type; 619 } 620 621 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, 622 struct ena_com_tx_ctx *ena_tx_ctx, 623 uint64_t queue_offloads, 624 bool disable_meta_caching) 625 { 626 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; 627 628 if ((mbuf->ol_flags & MBUF_OFFLOADS) && 629 (queue_offloads & QUEUE_OFFLOADS)) { 630 /* check if TSO is required */ 631 if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) && 632 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)) { 633 ena_tx_ctx->tso_enable = true; 634 635 ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf); 636 } 637 638 /* check if L3 checksum is needed */ 639 if ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) && 640 (queue_offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) 641 ena_tx_ctx->l3_csum_enable = true; 642 643 if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV6) { 644 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 645 /* For the IPv6 packets, DF always needs to be true. */ 646 ena_tx_ctx->df = 1; 647 } else { 648 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 649 650 /* set don't fragment (DF) flag */ 651 if (mbuf->packet_type & 652 (RTE_PTYPE_L4_NONFRAG 653 | RTE_PTYPE_INNER_L4_NONFRAG)) 654 ena_tx_ctx->df = 1; 655 } 656 657 /* check if L4 checksum is needed */ 658 if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) && 659 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) { 660 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 661 ena_tx_ctx->l4_csum_enable = true; 662 } else if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 663 RTE_MBUF_F_TX_UDP_CKSUM) && 664 (queue_offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) { 665 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 666 ena_tx_ctx->l4_csum_enable = true; 667 } else { 668 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 669 ena_tx_ctx->l4_csum_enable = false; 670 } 671 672 ena_meta->mss = mbuf->tso_segsz; 673 ena_meta->l3_hdr_len = mbuf->l3_len; 674 ena_meta->l3_hdr_offset = mbuf->l2_len; 675 676 ena_tx_ctx->meta_valid = true; 677 } else if (disable_meta_caching) { 678 memset(ena_meta, 0, sizeof(*ena_meta)); 679 ena_tx_ctx->meta_valid = true; 680 } else { 681 ena_tx_ctx->meta_valid = false; 682 } 683 } 684 685 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 686 { 687 struct ena_tx_buffer *tx_info = NULL; 688 689 if (likely(req_id < tx_ring->ring_size)) { 690 tx_info = &tx_ring->tx_buffer_info[req_id]; 691 if (likely(tx_info->mbuf)) 692 return 0; 693 } 694 695 if (tx_info) 696 PMD_TX_LOG(ERR, "tx_info doesn't have valid mbuf. queue %d:%d req_id %u\n", 697 tx_ring->port_id, tx_ring->id, req_id); 698 else 699 PMD_TX_LOG(ERR, "Invalid req_id: %hu in queue %d:%d\n", 700 req_id, tx_ring->port_id, tx_ring->id); 701 702 /* Trigger device reset */ 703 ++tx_ring->tx_stats.bad_req_id; 704 ena_trigger_reset(tx_ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID); 705 return -EFAULT; 706 } 707 708 static void ena_config_host_info(struct ena_com_dev *ena_dev) 709 { 710 struct ena_admin_host_info *host_info; 711 int rc; 712 713 /* Allocate only the host info */ 714 rc = ena_com_allocate_host_info(ena_dev); 715 if (rc) { 716 PMD_DRV_LOG(ERR, "Cannot allocate host info\n"); 717 return; 718 } 719 720 host_info = ena_dev->host_attr.host_info; 721 722 host_info->os_type = ENA_ADMIN_OS_DPDK; 723 host_info->kernel_ver = RTE_VERSION; 724 strlcpy((char *)host_info->kernel_ver_str, rte_version(), 725 sizeof(host_info->kernel_ver_str)); 726 host_info->os_dist = RTE_VERSION; 727 strlcpy((char *)host_info->os_dist_str, rte_version(), 728 sizeof(host_info->os_dist_str)); 729 host_info->driver_version = 730 (DRV_MODULE_VER_MAJOR) | 731 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 732 (DRV_MODULE_VER_SUBMINOR << 733 ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 734 host_info->num_cpus = rte_lcore_count(); 735 736 host_info->driver_supported_features = 737 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 738 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 739 740 rc = ena_com_set_host_attributes(ena_dev); 741 if (rc) { 742 if (rc == -ENA_COM_UNSUPPORTED) 743 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 744 else 745 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 746 747 goto err; 748 } 749 750 return; 751 752 err: 753 ena_com_delete_host_info(ena_dev); 754 } 755 756 /* This function calculates the number of xstats based on the current config */ 757 static unsigned int ena_xstats_calc_num(struct rte_eth_dev_data *data) 758 { 759 return ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENI + 760 (data->nb_tx_queues * ENA_STATS_ARRAY_TX) + 761 (data->nb_rx_queues * ENA_STATS_ARRAY_RX); 762 } 763 764 static void ena_config_debug_area(struct ena_adapter *adapter) 765 { 766 u32 debug_area_size; 767 int rc, ss_count; 768 769 ss_count = ena_xstats_calc_num(adapter->edev_data); 770 771 /* allocate 32 bytes for each string and 64bit for the value */ 772 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; 773 774 rc = ena_com_allocate_debug_area(&adapter->ena_dev, debug_area_size); 775 if (rc) { 776 PMD_DRV_LOG(ERR, "Cannot allocate debug area\n"); 777 return; 778 } 779 780 rc = ena_com_set_host_attributes(&adapter->ena_dev); 781 if (rc) { 782 if (rc == -ENA_COM_UNSUPPORTED) 783 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 784 else 785 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 786 787 goto err; 788 } 789 790 return; 791 err: 792 ena_com_delete_debug_area(&adapter->ena_dev); 793 } 794 795 static int ena_close(struct rte_eth_dev *dev) 796 { 797 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 798 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 799 struct ena_adapter *adapter = dev->data->dev_private; 800 int ret = 0; 801 802 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 803 return 0; 804 805 if (adapter->state == ENA_ADAPTER_STATE_RUNNING) 806 ret = ena_stop(dev); 807 adapter->state = ENA_ADAPTER_STATE_CLOSED; 808 809 ena_rx_queue_release_all(dev); 810 ena_tx_queue_release_all(dev); 811 812 rte_free(adapter->drv_stats); 813 adapter->drv_stats = NULL; 814 815 rte_intr_disable(intr_handle); 816 rte_intr_callback_unregister(intr_handle, 817 ena_interrupt_handler_rte, 818 dev); 819 820 /* 821 * MAC is not allocated dynamically. Setting NULL should prevent from 822 * release of the resource in the rte_eth_dev_release_port(). 823 */ 824 dev->data->mac_addrs = NULL; 825 826 return ret; 827 } 828 829 static int 830 ena_dev_reset(struct rte_eth_dev *dev) 831 { 832 int rc = 0; 833 834 /* Cannot release memory in secondary process */ 835 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 836 PMD_DRV_LOG(WARNING, "dev_reset not supported in secondary.\n"); 837 return -EPERM; 838 } 839 840 ena_destroy_device(dev); 841 rc = eth_ena_dev_init(dev); 842 if (rc) 843 PMD_INIT_LOG(CRIT, "Cannot initialize device\n"); 844 845 return rc; 846 } 847 848 static void ena_rx_queue_release_all(struct rte_eth_dev *dev) 849 { 850 int nb_queues = dev->data->nb_rx_queues; 851 int i; 852 853 for (i = 0; i < nb_queues; i++) 854 ena_rx_queue_release(dev, i); 855 } 856 857 static void ena_tx_queue_release_all(struct rte_eth_dev *dev) 858 { 859 int nb_queues = dev->data->nb_tx_queues; 860 int i; 861 862 for (i = 0; i < nb_queues; i++) 863 ena_tx_queue_release(dev, i); 864 } 865 866 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 867 { 868 struct ena_ring *ring = dev->data->rx_queues[qid]; 869 870 /* Free ring resources */ 871 rte_free(ring->rx_buffer_info); 872 ring->rx_buffer_info = NULL; 873 874 rte_free(ring->rx_refill_buffer); 875 ring->rx_refill_buffer = NULL; 876 877 rte_free(ring->empty_rx_reqs); 878 ring->empty_rx_reqs = NULL; 879 880 ring->configured = 0; 881 882 PMD_DRV_LOG(NOTICE, "Rx queue %d:%d released\n", 883 ring->port_id, ring->id); 884 } 885 886 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 887 { 888 struct ena_ring *ring = dev->data->tx_queues[qid]; 889 890 /* Free ring resources */ 891 rte_free(ring->push_buf_intermediate_buf); 892 893 rte_free(ring->tx_buffer_info); 894 895 rte_free(ring->empty_tx_reqs); 896 897 ring->empty_tx_reqs = NULL; 898 ring->tx_buffer_info = NULL; 899 ring->push_buf_intermediate_buf = NULL; 900 901 ring->configured = 0; 902 903 PMD_DRV_LOG(NOTICE, "Tx queue %d:%d released\n", 904 ring->port_id, ring->id); 905 } 906 907 static void ena_rx_queue_release_bufs(struct ena_ring *ring) 908 { 909 unsigned int i; 910 911 for (i = 0; i < ring->ring_size; ++i) { 912 struct ena_rx_buffer *rx_info = &ring->rx_buffer_info[i]; 913 if (rx_info->mbuf) { 914 rte_mbuf_raw_free(rx_info->mbuf); 915 rx_info->mbuf = NULL; 916 } 917 } 918 } 919 920 static void ena_tx_queue_release_bufs(struct ena_ring *ring) 921 { 922 unsigned int i; 923 924 for (i = 0; i < ring->ring_size; ++i) { 925 struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i]; 926 927 if (tx_buf->mbuf) { 928 rte_pktmbuf_free(tx_buf->mbuf); 929 tx_buf->mbuf = NULL; 930 } 931 } 932 } 933 934 static int ena_link_update(struct rte_eth_dev *dev, 935 __rte_unused int wait_to_complete) 936 { 937 struct rte_eth_link *link = &dev->data->dev_link; 938 struct ena_adapter *adapter = dev->data->dev_private; 939 940 link->link_status = adapter->link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN; 941 link->link_speed = RTE_ETH_SPEED_NUM_NONE; 942 link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX; 943 944 return 0; 945 } 946 947 static int ena_queue_start_all(struct rte_eth_dev *dev, 948 enum ena_ring_type ring_type) 949 { 950 struct ena_adapter *adapter = dev->data->dev_private; 951 struct ena_ring *queues = NULL; 952 int nb_queues; 953 int i = 0; 954 int rc = 0; 955 956 if (ring_type == ENA_RING_TYPE_RX) { 957 queues = adapter->rx_ring; 958 nb_queues = dev->data->nb_rx_queues; 959 } else { 960 queues = adapter->tx_ring; 961 nb_queues = dev->data->nb_tx_queues; 962 } 963 for (i = 0; i < nb_queues; i++) { 964 if (queues[i].configured) { 965 if (ring_type == ENA_RING_TYPE_RX) { 966 ena_assert_msg( 967 dev->data->rx_queues[i] == &queues[i], 968 "Inconsistent state of Rx queues\n"); 969 } else { 970 ena_assert_msg( 971 dev->data->tx_queues[i] == &queues[i], 972 "Inconsistent state of Tx queues\n"); 973 } 974 975 rc = ena_queue_start(dev, &queues[i]); 976 977 if (rc) { 978 PMD_INIT_LOG(ERR, 979 "Failed to start queue[%d] of type(%d)\n", 980 i, ring_type); 981 goto err; 982 } 983 } 984 } 985 986 return 0; 987 988 err: 989 while (i--) 990 if (queues[i].configured) 991 ena_queue_stop(&queues[i]); 992 993 return rc; 994 } 995 996 static int 997 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx, 998 bool use_large_llq_hdr) 999 { 1000 struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; 1001 struct ena_com_dev *ena_dev = ctx->ena_dev; 1002 uint32_t max_tx_queue_size; 1003 uint32_t max_rx_queue_size; 1004 1005 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1006 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1007 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 1008 max_rx_queue_size = RTE_MIN(max_queue_ext->max_rx_cq_depth, 1009 max_queue_ext->max_rx_sq_depth); 1010 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 1011 1012 if (ena_dev->tx_mem_queue_type == 1013 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1014 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1015 llq->max_llq_depth); 1016 } else { 1017 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1018 max_queue_ext->max_tx_sq_depth); 1019 } 1020 1021 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1022 max_queue_ext->max_per_packet_rx_descs); 1023 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1024 max_queue_ext->max_per_packet_tx_descs); 1025 } else { 1026 struct ena_admin_queue_feature_desc *max_queues = 1027 &ctx->get_feat_ctx->max_queues; 1028 max_rx_queue_size = RTE_MIN(max_queues->max_cq_depth, 1029 max_queues->max_sq_depth); 1030 max_tx_queue_size = max_queues->max_cq_depth; 1031 1032 if (ena_dev->tx_mem_queue_type == 1033 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1034 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1035 llq->max_llq_depth); 1036 } else { 1037 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1038 max_queues->max_sq_depth); 1039 } 1040 1041 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1042 max_queues->max_packet_rx_descs); 1043 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1044 max_queues->max_packet_tx_descs); 1045 } 1046 1047 /* Round down to the nearest power of 2 */ 1048 max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size); 1049 max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size); 1050 1051 if (use_large_llq_hdr) { 1052 if ((llq->entry_size_ctrl_supported & 1053 ENA_ADMIN_LIST_ENTRY_SIZE_256B) && 1054 (ena_dev->tx_mem_queue_type == 1055 ENA_ADMIN_PLACEMENT_POLICY_DEV)) { 1056 max_tx_queue_size /= 2; 1057 PMD_INIT_LOG(INFO, 1058 "Forcing large headers and decreasing maximum Tx queue size to %d\n", 1059 max_tx_queue_size); 1060 } else { 1061 PMD_INIT_LOG(ERR, 1062 "Forcing large headers failed: LLQ is disabled or device does not support large headers\n"); 1063 } 1064 } 1065 1066 if (unlikely(max_rx_queue_size == 0 || max_tx_queue_size == 0)) { 1067 PMD_INIT_LOG(ERR, "Invalid queue size\n"); 1068 return -EFAULT; 1069 } 1070 1071 ctx->max_tx_queue_size = max_tx_queue_size; 1072 ctx->max_rx_queue_size = max_rx_queue_size; 1073 1074 return 0; 1075 } 1076 1077 static void ena_stats_restart(struct rte_eth_dev *dev) 1078 { 1079 struct ena_adapter *adapter = dev->data->dev_private; 1080 1081 rte_atomic64_init(&adapter->drv_stats->ierrors); 1082 rte_atomic64_init(&adapter->drv_stats->oerrors); 1083 rte_atomic64_init(&adapter->drv_stats->rx_nombuf); 1084 adapter->drv_stats->rx_drops = 0; 1085 } 1086 1087 static int ena_stats_get(struct rte_eth_dev *dev, 1088 struct rte_eth_stats *stats) 1089 { 1090 struct ena_admin_basic_stats ena_stats; 1091 struct ena_adapter *adapter = dev->data->dev_private; 1092 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1093 int rc; 1094 int i; 1095 int max_rings_stats; 1096 1097 memset(&ena_stats, 0, sizeof(ena_stats)); 1098 1099 rte_spinlock_lock(&adapter->admin_lock); 1100 rc = ENA_PROXY(adapter, ena_com_get_dev_basic_stats, ena_dev, 1101 &ena_stats); 1102 rte_spinlock_unlock(&adapter->admin_lock); 1103 if (unlikely(rc)) { 1104 PMD_DRV_LOG(ERR, "Could not retrieve statistics from ENA\n"); 1105 return rc; 1106 } 1107 1108 /* Set of basic statistics from ENA */ 1109 stats->ipackets = __MERGE_64B_H_L(ena_stats.rx_pkts_high, 1110 ena_stats.rx_pkts_low); 1111 stats->opackets = __MERGE_64B_H_L(ena_stats.tx_pkts_high, 1112 ena_stats.tx_pkts_low); 1113 stats->ibytes = __MERGE_64B_H_L(ena_stats.rx_bytes_high, 1114 ena_stats.rx_bytes_low); 1115 stats->obytes = __MERGE_64B_H_L(ena_stats.tx_bytes_high, 1116 ena_stats.tx_bytes_low); 1117 1118 /* Driver related stats */ 1119 stats->imissed = adapter->drv_stats->rx_drops; 1120 stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors); 1121 stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors); 1122 stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf); 1123 1124 max_rings_stats = RTE_MIN(dev->data->nb_rx_queues, 1125 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1126 for (i = 0; i < max_rings_stats; ++i) { 1127 struct ena_stats_rx *rx_stats = &adapter->rx_ring[i].rx_stats; 1128 1129 stats->q_ibytes[i] = rx_stats->bytes; 1130 stats->q_ipackets[i] = rx_stats->cnt; 1131 stats->q_errors[i] = rx_stats->bad_desc_num + 1132 rx_stats->bad_req_id; 1133 } 1134 1135 max_rings_stats = RTE_MIN(dev->data->nb_tx_queues, 1136 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1137 for (i = 0; i < max_rings_stats; ++i) { 1138 struct ena_stats_tx *tx_stats = &adapter->tx_ring[i].tx_stats; 1139 1140 stats->q_obytes[i] = tx_stats->bytes; 1141 stats->q_opackets[i] = tx_stats->cnt; 1142 } 1143 1144 return 0; 1145 } 1146 1147 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 1148 { 1149 struct ena_adapter *adapter; 1150 struct ena_com_dev *ena_dev; 1151 int rc = 0; 1152 1153 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 1154 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 1155 adapter = dev->data->dev_private; 1156 1157 ena_dev = &adapter->ena_dev; 1158 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 1159 1160 rc = ENA_PROXY(adapter, ena_com_set_dev_mtu, ena_dev, mtu); 1161 if (rc) 1162 PMD_DRV_LOG(ERR, "Could not set MTU: %d\n", mtu); 1163 else 1164 PMD_DRV_LOG(NOTICE, "MTU set to: %d\n", mtu); 1165 1166 return rc; 1167 } 1168 1169 static int ena_start(struct rte_eth_dev *dev) 1170 { 1171 struct ena_adapter *adapter = dev->data->dev_private; 1172 uint64_t ticks; 1173 int rc = 0; 1174 1175 /* Cannot allocate memory in secondary process */ 1176 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1177 PMD_DRV_LOG(WARNING, "dev_start not supported in secondary.\n"); 1178 return -EPERM; 1179 } 1180 1181 rc = ena_setup_rx_intr(dev); 1182 if (rc) 1183 return rc; 1184 1185 rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX); 1186 if (rc) 1187 return rc; 1188 1189 rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX); 1190 if (rc) 1191 goto err_start_tx; 1192 1193 if (adapter->edev_data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { 1194 rc = ena_rss_configure(adapter); 1195 if (rc) 1196 goto err_rss_init; 1197 } 1198 1199 ena_stats_restart(dev); 1200 1201 adapter->timestamp_wd = rte_get_timer_cycles(); 1202 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT; 1203 1204 ticks = rte_get_timer_hz(); 1205 rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(), 1206 ena_timer_wd_callback, dev); 1207 1208 ++adapter->dev_stats.dev_start; 1209 adapter->state = ENA_ADAPTER_STATE_RUNNING; 1210 1211 return 0; 1212 1213 err_rss_init: 1214 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1215 err_start_tx: 1216 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1217 return rc; 1218 } 1219 1220 static int ena_stop(struct rte_eth_dev *dev) 1221 { 1222 struct ena_adapter *adapter = dev->data->dev_private; 1223 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1224 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1225 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1226 int rc; 1227 1228 /* Cannot free memory in secondary process */ 1229 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1230 PMD_DRV_LOG(WARNING, "dev_stop not supported in secondary.\n"); 1231 return -EPERM; 1232 } 1233 1234 rte_timer_stop_sync(&adapter->timer_wd); 1235 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1236 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1237 1238 if (adapter->trigger_reset) { 1239 rc = ena_com_dev_reset(ena_dev, adapter->reset_reason); 1240 if (rc) 1241 PMD_DRV_LOG(ERR, "Device reset failed, rc: %d\n", rc); 1242 } 1243 1244 rte_intr_disable(intr_handle); 1245 1246 rte_intr_efd_disable(intr_handle); 1247 1248 /* Cleanup vector list */ 1249 rte_intr_vec_list_free(intr_handle); 1250 1251 rte_intr_enable(intr_handle); 1252 1253 ++adapter->dev_stats.dev_stop; 1254 adapter->state = ENA_ADAPTER_STATE_STOPPED; 1255 dev->data->dev_started = 0; 1256 1257 return 0; 1258 } 1259 1260 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring) 1261 { 1262 struct ena_adapter *adapter = ring->adapter; 1263 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1264 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1265 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1266 struct ena_com_create_io_ctx ctx = 1267 /* policy set to _HOST just to satisfy icc compiler */ 1268 { ENA_ADMIN_PLACEMENT_POLICY_HOST, 1269 0, 0, 0, 0, 0 }; 1270 uint16_t ena_qid; 1271 unsigned int i; 1272 int rc; 1273 1274 ctx.msix_vector = -1; 1275 if (ring->type == ENA_RING_TYPE_TX) { 1276 ena_qid = ENA_IO_TXQ_IDX(ring->id); 1277 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1278 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1279 for (i = 0; i < ring->ring_size; i++) 1280 ring->empty_tx_reqs[i] = i; 1281 } else { 1282 ena_qid = ENA_IO_RXQ_IDX(ring->id); 1283 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1284 if (rte_intr_dp_is_en(intr_handle)) 1285 ctx.msix_vector = 1286 rte_intr_vec_list_index_get(intr_handle, 1287 ring->id); 1288 1289 for (i = 0; i < ring->ring_size; i++) 1290 ring->empty_rx_reqs[i] = i; 1291 } 1292 ctx.queue_size = ring->ring_size; 1293 ctx.qid = ena_qid; 1294 ctx.numa_node = ring->numa_socket_id; 1295 1296 rc = ena_com_create_io_queue(ena_dev, &ctx); 1297 if (rc) { 1298 PMD_DRV_LOG(ERR, 1299 "Failed to create IO queue[%d] (qid:%d), rc: %d\n", 1300 ring->id, ena_qid, rc); 1301 return rc; 1302 } 1303 1304 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1305 &ring->ena_com_io_sq, 1306 &ring->ena_com_io_cq); 1307 if (rc) { 1308 PMD_DRV_LOG(ERR, 1309 "Failed to get IO queue[%d] handlers, rc: %d\n", 1310 ring->id, rc); 1311 ena_com_destroy_io_queue(ena_dev, ena_qid); 1312 return rc; 1313 } 1314 1315 if (ring->type == ENA_RING_TYPE_TX) 1316 ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node); 1317 1318 /* Start with Rx interrupts being masked. */ 1319 if (ring->type == ENA_RING_TYPE_RX && rte_intr_dp_is_en(intr_handle)) 1320 ena_rx_queue_intr_disable(dev, ring->id); 1321 1322 return 0; 1323 } 1324 1325 static void ena_queue_stop(struct ena_ring *ring) 1326 { 1327 struct ena_com_dev *ena_dev = &ring->adapter->ena_dev; 1328 1329 if (ring->type == ENA_RING_TYPE_RX) { 1330 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id)); 1331 ena_rx_queue_release_bufs(ring); 1332 } else { 1333 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id)); 1334 ena_tx_queue_release_bufs(ring); 1335 } 1336 } 1337 1338 static void ena_queue_stop_all(struct rte_eth_dev *dev, 1339 enum ena_ring_type ring_type) 1340 { 1341 struct ena_adapter *adapter = dev->data->dev_private; 1342 struct ena_ring *queues = NULL; 1343 uint16_t nb_queues, i; 1344 1345 if (ring_type == ENA_RING_TYPE_RX) { 1346 queues = adapter->rx_ring; 1347 nb_queues = dev->data->nb_rx_queues; 1348 } else { 1349 queues = adapter->tx_ring; 1350 nb_queues = dev->data->nb_tx_queues; 1351 } 1352 1353 for (i = 0; i < nb_queues; ++i) 1354 if (queues[i].configured) 1355 ena_queue_stop(&queues[i]); 1356 } 1357 1358 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring) 1359 { 1360 int rc, bufs_num; 1361 1362 ena_assert_msg(ring->configured == 1, 1363 "Trying to start unconfigured queue\n"); 1364 1365 rc = ena_create_io_queue(dev, ring); 1366 if (rc) { 1367 PMD_INIT_LOG(ERR, "Failed to create IO queue\n"); 1368 return rc; 1369 } 1370 1371 ring->next_to_clean = 0; 1372 ring->next_to_use = 0; 1373 1374 if (ring->type == ENA_RING_TYPE_TX) { 1375 ring->tx_stats.available_desc = 1376 ena_com_free_q_entries(ring->ena_com_io_sq); 1377 return 0; 1378 } 1379 1380 bufs_num = ring->ring_size - 1; 1381 rc = ena_populate_rx_queue(ring, bufs_num); 1382 if (rc != bufs_num) { 1383 ena_com_destroy_io_queue(&ring->adapter->ena_dev, 1384 ENA_IO_RXQ_IDX(ring->id)); 1385 PMD_INIT_LOG(ERR, "Failed to populate Rx ring\n"); 1386 return ENA_COM_FAULT; 1387 } 1388 /* Flush per-core RX buffers pools cache as they can be used on other 1389 * cores as well. 1390 */ 1391 rte_mempool_cache_flush(NULL, ring->mb_pool); 1392 1393 return 0; 1394 } 1395 1396 static int ena_tx_queue_setup(struct rte_eth_dev *dev, 1397 uint16_t queue_idx, 1398 uint16_t nb_desc, 1399 unsigned int socket_id, 1400 const struct rte_eth_txconf *tx_conf) 1401 { 1402 struct ena_ring *txq = NULL; 1403 struct ena_adapter *adapter = dev->data->dev_private; 1404 unsigned int i; 1405 uint16_t dyn_thresh; 1406 1407 txq = &adapter->tx_ring[queue_idx]; 1408 1409 if (txq->configured) { 1410 PMD_DRV_LOG(CRIT, 1411 "API violation. Queue[%d] is already configured\n", 1412 queue_idx); 1413 return ENA_COM_FAULT; 1414 } 1415 1416 if (!rte_is_power_of_2(nb_desc)) { 1417 PMD_DRV_LOG(ERR, 1418 "Unsupported size of Tx queue: %d is not a power of 2.\n", 1419 nb_desc); 1420 return -EINVAL; 1421 } 1422 1423 if (nb_desc > adapter->max_tx_ring_size) { 1424 PMD_DRV_LOG(ERR, 1425 "Unsupported size of Tx queue (max size: %d)\n", 1426 adapter->max_tx_ring_size); 1427 return -EINVAL; 1428 } 1429 1430 txq->port_id = dev->data->port_id; 1431 txq->next_to_clean = 0; 1432 txq->next_to_use = 0; 1433 txq->ring_size = nb_desc; 1434 txq->size_mask = nb_desc - 1; 1435 txq->numa_socket_id = socket_id; 1436 txq->pkts_without_db = false; 1437 txq->last_cleanup_ticks = 0; 1438 1439 txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info", 1440 sizeof(struct ena_tx_buffer) * txq->ring_size, 1441 RTE_CACHE_LINE_SIZE, 1442 socket_id); 1443 if (!txq->tx_buffer_info) { 1444 PMD_DRV_LOG(ERR, 1445 "Failed to allocate memory for Tx buffer info\n"); 1446 return -ENOMEM; 1447 } 1448 1449 txq->empty_tx_reqs = rte_zmalloc_socket("txq->empty_tx_reqs", 1450 sizeof(uint16_t) * txq->ring_size, 1451 RTE_CACHE_LINE_SIZE, 1452 socket_id); 1453 if (!txq->empty_tx_reqs) { 1454 PMD_DRV_LOG(ERR, 1455 "Failed to allocate memory for empty Tx requests\n"); 1456 rte_free(txq->tx_buffer_info); 1457 return -ENOMEM; 1458 } 1459 1460 txq->push_buf_intermediate_buf = 1461 rte_zmalloc_socket("txq->push_buf_intermediate_buf", 1462 txq->tx_max_header_size, 1463 RTE_CACHE_LINE_SIZE, 1464 socket_id); 1465 if (!txq->push_buf_intermediate_buf) { 1466 PMD_DRV_LOG(ERR, "Failed to alloc push buffer for LLQ\n"); 1467 rte_free(txq->tx_buffer_info); 1468 rte_free(txq->empty_tx_reqs); 1469 return -ENOMEM; 1470 } 1471 1472 for (i = 0; i < txq->ring_size; i++) 1473 txq->empty_tx_reqs[i] = i; 1474 1475 txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads; 1476 1477 /* Check if caller provided the Tx cleanup threshold value. */ 1478 if (tx_conf->tx_free_thresh != 0) { 1479 txq->tx_free_thresh = tx_conf->tx_free_thresh; 1480 } else { 1481 dyn_thresh = txq->ring_size - 1482 txq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1483 txq->tx_free_thresh = RTE_MAX(dyn_thresh, 1484 txq->ring_size - ENA_REFILL_THRESH_PACKET); 1485 } 1486 1487 txq->missing_tx_completion_threshold = 1488 RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP); 1489 1490 /* Store pointer to this queue in upper layer */ 1491 txq->configured = 1; 1492 dev->data->tx_queues[queue_idx] = txq; 1493 1494 return 0; 1495 } 1496 1497 static int ena_rx_queue_setup(struct rte_eth_dev *dev, 1498 uint16_t queue_idx, 1499 uint16_t nb_desc, 1500 unsigned int socket_id, 1501 const struct rte_eth_rxconf *rx_conf, 1502 struct rte_mempool *mp) 1503 { 1504 struct ena_adapter *adapter = dev->data->dev_private; 1505 struct ena_ring *rxq = NULL; 1506 size_t buffer_size; 1507 int i; 1508 uint16_t dyn_thresh; 1509 1510 rxq = &adapter->rx_ring[queue_idx]; 1511 if (rxq->configured) { 1512 PMD_DRV_LOG(CRIT, 1513 "API violation. Queue[%d] is already configured\n", 1514 queue_idx); 1515 return ENA_COM_FAULT; 1516 } 1517 1518 if (!rte_is_power_of_2(nb_desc)) { 1519 PMD_DRV_LOG(ERR, 1520 "Unsupported size of Rx queue: %d is not a power of 2.\n", 1521 nb_desc); 1522 return -EINVAL; 1523 } 1524 1525 if (nb_desc > adapter->max_rx_ring_size) { 1526 PMD_DRV_LOG(ERR, 1527 "Unsupported size of Rx queue (max size: %d)\n", 1528 adapter->max_rx_ring_size); 1529 return -EINVAL; 1530 } 1531 1532 /* ENA isn't supporting buffers smaller than 1400 bytes */ 1533 buffer_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 1534 if (buffer_size < ENA_RX_BUF_MIN_SIZE) { 1535 PMD_DRV_LOG(ERR, 1536 "Unsupported size of Rx buffer: %zu (min size: %d)\n", 1537 buffer_size, ENA_RX_BUF_MIN_SIZE); 1538 return -EINVAL; 1539 } 1540 1541 rxq->port_id = dev->data->port_id; 1542 rxq->next_to_clean = 0; 1543 rxq->next_to_use = 0; 1544 rxq->ring_size = nb_desc; 1545 rxq->size_mask = nb_desc - 1; 1546 rxq->numa_socket_id = socket_id; 1547 rxq->mb_pool = mp; 1548 1549 rxq->rx_buffer_info = rte_zmalloc_socket("rxq->buffer_info", 1550 sizeof(struct ena_rx_buffer) * nb_desc, 1551 RTE_CACHE_LINE_SIZE, 1552 socket_id); 1553 if (!rxq->rx_buffer_info) { 1554 PMD_DRV_LOG(ERR, 1555 "Failed to allocate memory for Rx buffer info\n"); 1556 return -ENOMEM; 1557 } 1558 1559 rxq->rx_refill_buffer = rte_zmalloc_socket("rxq->rx_refill_buffer", 1560 sizeof(struct rte_mbuf *) * nb_desc, 1561 RTE_CACHE_LINE_SIZE, 1562 socket_id); 1563 if (!rxq->rx_refill_buffer) { 1564 PMD_DRV_LOG(ERR, 1565 "Failed to allocate memory for Rx refill buffer\n"); 1566 rte_free(rxq->rx_buffer_info); 1567 rxq->rx_buffer_info = NULL; 1568 return -ENOMEM; 1569 } 1570 1571 rxq->empty_rx_reqs = rte_zmalloc_socket("rxq->empty_rx_reqs", 1572 sizeof(uint16_t) * nb_desc, 1573 RTE_CACHE_LINE_SIZE, 1574 socket_id); 1575 if (!rxq->empty_rx_reqs) { 1576 PMD_DRV_LOG(ERR, 1577 "Failed to allocate memory for empty Rx requests\n"); 1578 rte_free(rxq->rx_buffer_info); 1579 rxq->rx_buffer_info = NULL; 1580 rte_free(rxq->rx_refill_buffer); 1581 rxq->rx_refill_buffer = NULL; 1582 return -ENOMEM; 1583 } 1584 1585 for (i = 0; i < nb_desc; i++) 1586 rxq->empty_rx_reqs[i] = i; 1587 1588 rxq->offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; 1589 1590 if (rx_conf->rx_free_thresh != 0) { 1591 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 1592 } else { 1593 dyn_thresh = rxq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1594 rxq->rx_free_thresh = RTE_MIN(dyn_thresh, 1595 (uint16_t)(ENA_REFILL_THRESH_PACKET)); 1596 } 1597 1598 /* Store pointer to this queue in upper layer */ 1599 rxq->configured = 1; 1600 dev->data->rx_queues[queue_idx] = rxq; 1601 1602 return 0; 1603 } 1604 1605 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 1606 struct rte_mbuf *mbuf, uint16_t id) 1607 { 1608 struct ena_com_buf ebuf; 1609 int rc; 1610 1611 /* prepare physical address for DMA transaction */ 1612 ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM; 1613 ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM; 1614 1615 /* pass resource to device */ 1616 rc = ena_com_add_single_rx_desc(io_sq, &ebuf, id); 1617 if (unlikely(rc != 0)) 1618 PMD_RX_LOG(WARNING, "Failed adding Rx desc\n"); 1619 1620 return rc; 1621 } 1622 1623 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) 1624 { 1625 unsigned int i; 1626 int rc; 1627 uint16_t next_to_use = rxq->next_to_use; 1628 uint16_t req_id; 1629 #ifdef RTE_ETHDEV_DEBUG_RX 1630 uint16_t in_use; 1631 #endif 1632 struct rte_mbuf **mbufs = rxq->rx_refill_buffer; 1633 1634 if (unlikely(!count)) 1635 return 0; 1636 1637 #ifdef RTE_ETHDEV_DEBUG_RX 1638 in_use = rxq->ring_size - 1 - 1639 ena_com_free_q_entries(rxq->ena_com_io_sq); 1640 if (unlikely((in_use + count) >= rxq->ring_size)) 1641 PMD_RX_LOG(ERR, "Bad Rx ring state\n"); 1642 #endif 1643 1644 /* get resources for incoming packets */ 1645 rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count); 1646 if (unlikely(rc < 0)) { 1647 rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf); 1648 ++rxq->rx_stats.mbuf_alloc_fail; 1649 PMD_RX_LOG(DEBUG, "There are not enough free buffers\n"); 1650 return 0; 1651 } 1652 1653 for (i = 0; i < count; i++) { 1654 struct rte_mbuf *mbuf = mbufs[i]; 1655 struct ena_rx_buffer *rx_info; 1656 1657 if (likely((i + 4) < count)) 1658 rte_prefetch0(mbufs[i + 4]); 1659 1660 req_id = rxq->empty_rx_reqs[next_to_use]; 1661 rx_info = &rxq->rx_buffer_info[req_id]; 1662 1663 rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id); 1664 if (unlikely(rc != 0)) 1665 break; 1666 1667 rx_info->mbuf = mbuf; 1668 next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, rxq->size_mask); 1669 } 1670 1671 if (unlikely(i < count)) { 1672 PMD_RX_LOG(WARNING, 1673 "Refilled Rx queue[%d] with only %d/%d buffers\n", 1674 rxq->id, i, count); 1675 rte_pktmbuf_free_bulk(&mbufs[i], count - i); 1676 ++rxq->rx_stats.refill_partial; 1677 } 1678 1679 /* When we submitted free resources to device... */ 1680 if (likely(i > 0)) { 1681 /* ...let HW know that it can fill buffers with data. */ 1682 ena_com_write_sq_doorbell(rxq->ena_com_io_sq); 1683 1684 rxq->next_to_use = next_to_use; 1685 } 1686 1687 return i; 1688 } 1689 1690 static int ena_device_init(struct ena_adapter *adapter, 1691 struct rte_pci_device *pdev, 1692 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1693 { 1694 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1695 uint32_t aenq_groups; 1696 int rc; 1697 bool readless_supported; 1698 1699 /* Initialize mmio registers */ 1700 rc = ena_com_mmio_reg_read_request_init(ena_dev); 1701 if (rc) { 1702 PMD_DRV_LOG(ERR, "Failed to init MMIO read less\n"); 1703 return rc; 1704 } 1705 1706 /* The PCIe configuration space revision id indicate if mmio reg 1707 * read is disabled. 1708 */ 1709 readless_supported = !(pdev->id.class_id & ENA_MMIO_DISABLE_REG_READ); 1710 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 1711 1712 /* reset device */ 1713 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 1714 if (rc) { 1715 PMD_DRV_LOG(ERR, "Cannot reset device\n"); 1716 goto err_mmio_read_less; 1717 } 1718 1719 /* check FW version */ 1720 rc = ena_com_validate_version(ena_dev); 1721 if (rc) { 1722 PMD_DRV_LOG(ERR, "Device version is too low\n"); 1723 goto err_mmio_read_less; 1724 } 1725 1726 ena_dev->dma_addr_bits = ena_com_get_dma_width(ena_dev); 1727 1728 /* ENA device administration layer init */ 1729 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 1730 if (rc) { 1731 PMD_DRV_LOG(ERR, 1732 "Cannot initialize ENA admin queue\n"); 1733 goto err_mmio_read_less; 1734 } 1735 1736 /* To enable the msix interrupts the driver needs to know the number 1737 * of queues. So the driver uses polling mode to retrieve this 1738 * information. 1739 */ 1740 ena_com_set_admin_polling_mode(ena_dev, true); 1741 1742 ena_config_host_info(ena_dev); 1743 1744 /* Get Device Attributes and features */ 1745 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 1746 if (rc) { 1747 PMD_DRV_LOG(ERR, 1748 "Cannot get attribute for ENA device, rc: %d\n", rc); 1749 goto err_admin_init; 1750 } 1751 1752 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 1753 BIT(ENA_ADMIN_NOTIFICATION) | 1754 BIT(ENA_ADMIN_KEEP_ALIVE) | 1755 BIT(ENA_ADMIN_FATAL_ERROR) | 1756 BIT(ENA_ADMIN_WARNING); 1757 1758 aenq_groups &= get_feat_ctx->aenq.supported_groups; 1759 1760 adapter->all_aenq_groups = aenq_groups; 1761 1762 return 0; 1763 1764 err_admin_init: 1765 ena_com_admin_destroy(ena_dev); 1766 1767 err_mmio_read_less: 1768 ena_com_mmio_reg_read_request_destroy(ena_dev); 1769 1770 return rc; 1771 } 1772 1773 static void ena_interrupt_handler_rte(void *cb_arg) 1774 { 1775 struct rte_eth_dev *dev = cb_arg; 1776 struct ena_adapter *adapter = dev->data->dev_private; 1777 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1778 1779 ena_com_admin_q_comp_intr_handler(ena_dev); 1780 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) 1781 ena_com_aenq_intr_handler(ena_dev, dev); 1782 } 1783 1784 static void check_for_missing_keep_alive(struct ena_adapter *adapter) 1785 { 1786 if (!(adapter->active_aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE))) 1787 return; 1788 1789 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 1790 return; 1791 1792 if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >= 1793 adapter->keep_alive_timeout)) { 1794 PMD_DRV_LOG(ERR, "Keep alive timeout\n"); 1795 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); 1796 ++adapter->dev_stats.wd_expired; 1797 } 1798 } 1799 1800 /* Check if admin queue is enabled */ 1801 static void check_for_admin_com_state(struct ena_adapter *adapter) 1802 { 1803 if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) { 1804 PMD_DRV_LOG(ERR, "ENA admin queue is not in running state\n"); 1805 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO); 1806 } 1807 } 1808 1809 static int check_for_tx_completion_in_queue(struct ena_adapter *adapter, 1810 struct ena_ring *tx_ring) 1811 { 1812 struct ena_tx_buffer *tx_buf; 1813 uint64_t timestamp; 1814 uint64_t completion_delay; 1815 uint32_t missed_tx = 0; 1816 unsigned int i; 1817 int rc = 0; 1818 1819 for (i = 0; i < tx_ring->ring_size; ++i) { 1820 tx_buf = &tx_ring->tx_buffer_info[i]; 1821 timestamp = tx_buf->timestamp; 1822 1823 if (timestamp == 0) 1824 continue; 1825 1826 completion_delay = rte_get_timer_cycles() - timestamp; 1827 if (completion_delay > adapter->missing_tx_completion_to) { 1828 if (unlikely(!tx_buf->print_once)) { 1829 PMD_TX_LOG(WARNING, 1830 "Found a Tx that wasn't completed on time, qid %d, index %d. " 1831 "Missing Tx outstanding for %" PRIu64 " msecs.\n", 1832 tx_ring->id, i, completion_delay / 1833 rte_get_timer_hz() * 1000); 1834 tx_buf->print_once = true; 1835 } 1836 ++missed_tx; 1837 } 1838 } 1839 1840 if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) { 1841 PMD_DRV_LOG(ERR, 1842 "The number of lost Tx completions is above the threshold (%d > %d). " 1843 "Trigger the device reset.\n", 1844 missed_tx, 1845 tx_ring->missing_tx_completion_threshold); 1846 adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL; 1847 adapter->trigger_reset = true; 1848 rc = -EIO; 1849 } 1850 1851 tx_ring->tx_stats.missed_tx += missed_tx; 1852 1853 return rc; 1854 } 1855 1856 static void check_for_tx_completions(struct ena_adapter *adapter) 1857 { 1858 struct ena_ring *tx_ring; 1859 uint64_t tx_cleanup_delay; 1860 size_t qid; 1861 int budget; 1862 uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues; 1863 1864 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) 1865 return; 1866 1867 nb_tx_queues = adapter->edev_data->nb_tx_queues; 1868 budget = adapter->missing_tx_completion_budget; 1869 1870 qid = adapter->last_tx_comp_qid; 1871 while (budget-- > 0) { 1872 tx_ring = &adapter->tx_ring[qid]; 1873 1874 /* Tx cleanup is called only by the burst function and can be 1875 * called dynamically by the application. Also cleanup is 1876 * limited by the threshold. To avoid false detection of the 1877 * missing HW Tx completion, get the delay since last cleanup 1878 * function was called. 1879 */ 1880 tx_cleanup_delay = rte_get_timer_cycles() - 1881 tx_ring->last_cleanup_ticks; 1882 if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay) 1883 check_for_tx_completion_in_queue(adapter, tx_ring); 1884 qid = (qid + 1) % nb_tx_queues; 1885 } 1886 1887 adapter->last_tx_comp_qid = qid; 1888 } 1889 1890 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer, 1891 void *arg) 1892 { 1893 struct rte_eth_dev *dev = arg; 1894 struct ena_adapter *adapter = dev->data->dev_private; 1895 1896 if (unlikely(adapter->trigger_reset)) 1897 return; 1898 1899 check_for_missing_keep_alive(adapter); 1900 check_for_admin_com_state(adapter); 1901 check_for_tx_completions(adapter); 1902 1903 if (unlikely(adapter->trigger_reset)) { 1904 PMD_DRV_LOG(ERR, "Trigger reset is on\n"); 1905 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, 1906 NULL); 1907 } 1908 } 1909 1910 static inline void 1911 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 1912 struct ena_admin_feature_llq_desc *llq, 1913 bool use_large_llq_hdr) 1914 { 1915 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 1916 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 1917 llq_config->llq_num_decs_before_header = 1918 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 1919 1920 if (use_large_llq_hdr && 1921 (llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B)) { 1922 llq_config->llq_ring_entry_size = 1923 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 1924 llq_config->llq_ring_entry_size_value = 256; 1925 } else { 1926 llq_config->llq_ring_entry_size = 1927 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 1928 llq_config->llq_ring_entry_size_value = 128; 1929 } 1930 } 1931 1932 static int 1933 ena_set_queues_placement_policy(struct ena_adapter *adapter, 1934 struct ena_com_dev *ena_dev, 1935 struct ena_admin_feature_llq_desc *llq, 1936 struct ena_llq_configurations *llq_default_configurations) 1937 { 1938 int rc; 1939 u32 llq_feature_mask; 1940 1941 if (!adapter->enable_llq) { 1942 PMD_DRV_LOG(WARNING, 1943 "NOTE: LLQ has been disabled as per user's request. " 1944 "This may lead to a huge performance degradation!\n"); 1945 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1946 return 0; 1947 } 1948 1949 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 1950 if (!(ena_dev->supported_features & llq_feature_mask)) { 1951 PMD_DRV_LOG(INFO, 1952 "LLQ is not supported. Fallback to host mode policy.\n"); 1953 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1954 return 0; 1955 } 1956 1957 if (adapter->dev_mem_base == NULL) { 1958 PMD_DRV_LOG(ERR, 1959 "LLQ is advertised as supported, but device doesn't expose mem bar\n"); 1960 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1961 return 0; 1962 } 1963 1964 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 1965 if (unlikely(rc)) { 1966 PMD_INIT_LOG(WARNING, 1967 "Failed to config dev mode. Fallback to host mode policy.\n"); 1968 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1969 return 0; 1970 } 1971 1972 /* Nothing to config, exit */ 1973 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 1974 return 0; 1975 1976 ena_dev->mem_bar = adapter->dev_mem_base; 1977 1978 return 0; 1979 } 1980 1981 static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev, 1982 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1983 { 1984 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 1985 1986 /* Regular queues capabilities */ 1987 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1988 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1989 &get_feat_ctx->max_queue_ext.max_queue_ext; 1990 io_rx_num = RTE_MIN(max_queue_ext->max_rx_sq_num, 1991 max_queue_ext->max_rx_cq_num); 1992 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 1993 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 1994 } else { 1995 struct ena_admin_queue_feature_desc *max_queues = 1996 &get_feat_ctx->max_queues; 1997 io_tx_sq_num = max_queues->max_sq_num; 1998 io_tx_cq_num = max_queues->max_cq_num; 1999 io_rx_num = RTE_MIN(io_tx_sq_num, io_tx_cq_num); 2000 } 2001 2002 /* In case of LLQ use the llq number in the get feature cmd */ 2003 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 2004 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 2005 2006 max_num_io_queues = RTE_MIN(ENA_MAX_NUM_IO_QUEUES, io_rx_num); 2007 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_sq_num); 2008 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_cq_num); 2009 2010 if (unlikely(max_num_io_queues == 0)) { 2011 PMD_DRV_LOG(ERR, "Number of IO queues cannot not be 0\n"); 2012 return -EFAULT; 2013 } 2014 2015 return max_num_io_queues; 2016 } 2017 2018 static void 2019 ena_set_offloads(struct ena_offloads *offloads, 2020 struct ena_admin_feature_offload_desc *offload_desc) 2021 { 2022 if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) 2023 offloads->tx_offloads |= ENA_IPV4_TSO; 2024 2025 /* Tx IPv4 checksum offloads */ 2026 if (offload_desc->tx & 2027 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) 2028 offloads->tx_offloads |= ENA_L3_IPV4_CSUM; 2029 if (offload_desc->tx & 2030 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK) 2031 offloads->tx_offloads |= ENA_L4_IPV4_CSUM; 2032 if (offload_desc->tx & 2033 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) 2034 offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL; 2035 2036 /* Tx IPv6 checksum offloads */ 2037 if (offload_desc->tx & 2038 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK) 2039 offloads->tx_offloads |= ENA_L4_IPV6_CSUM; 2040 if (offload_desc->tx & 2041 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) 2042 offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL; 2043 2044 /* Rx IPv4 checksum offloads */ 2045 if (offload_desc->rx_supported & 2046 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK) 2047 offloads->rx_offloads |= ENA_L3_IPV4_CSUM; 2048 if (offload_desc->rx_supported & 2049 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) 2050 offloads->rx_offloads |= ENA_L4_IPV4_CSUM; 2051 2052 /* Rx IPv6 checksum offloads */ 2053 if (offload_desc->rx_supported & 2054 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) 2055 offloads->rx_offloads |= ENA_L4_IPV6_CSUM; 2056 2057 if (offload_desc->rx_supported & 2058 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK) 2059 offloads->rx_offloads |= ENA_RX_RSS_HASH; 2060 } 2061 2062 static int ena_init_once(void) 2063 { 2064 static bool init_done; 2065 2066 if (init_done) 2067 return 0; 2068 2069 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 2070 /* Init timer subsystem for the ENA timer service. */ 2071 rte_timer_subsystem_init(); 2072 /* Register handler for requests from secondary processes. */ 2073 rte_mp_action_register(ENA_MP_NAME, ena_mp_primary_handle); 2074 } 2075 2076 init_done = true; 2077 return 0; 2078 } 2079 2080 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) 2081 { 2082 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 2083 struct rte_pci_device *pci_dev; 2084 struct rte_intr_handle *intr_handle; 2085 struct ena_adapter *adapter = eth_dev->data->dev_private; 2086 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2087 struct ena_com_dev_get_features_ctx get_feat_ctx; 2088 struct ena_llq_configurations llq_config; 2089 const char *queue_type_str; 2090 uint32_t max_num_io_queues; 2091 int rc; 2092 static int adapters_found; 2093 bool disable_meta_caching; 2094 2095 eth_dev->dev_ops = &ena_dev_ops; 2096 eth_dev->rx_pkt_burst = ð_ena_recv_pkts; 2097 eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; 2098 eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; 2099 2100 rc = ena_init_once(); 2101 if (rc != 0) 2102 return rc; 2103 2104 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2105 return 0; 2106 2107 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 2108 2109 memset(adapter, 0, sizeof(struct ena_adapter)); 2110 ena_dev = &adapter->ena_dev; 2111 2112 adapter->edev_data = eth_dev->data; 2113 2114 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 2115 2116 PMD_INIT_LOG(INFO, "Initializing %x:%x:%x.%d\n", 2117 pci_dev->addr.domain, 2118 pci_dev->addr.bus, 2119 pci_dev->addr.devid, 2120 pci_dev->addr.function); 2121 2122 intr_handle = pci_dev->intr_handle; 2123 2124 adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr; 2125 adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr; 2126 2127 if (!adapter->regs) { 2128 PMD_INIT_LOG(CRIT, "Failed to access registers BAR(%d)\n", 2129 ENA_REGS_BAR); 2130 return -ENXIO; 2131 } 2132 2133 ena_dev->reg_bar = adapter->regs; 2134 /* Pass device data as a pointer which can be passed to the IO functions 2135 * by the ena_com (for example - the memory allocation). 2136 */ 2137 ena_dev->dmadev = eth_dev->data; 2138 2139 adapter->id_number = adapters_found; 2140 2141 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", 2142 adapter->id_number); 2143 2144 /* Assign default devargs values */ 2145 adapter->missing_tx_completion_to = ENA_TX_TIMEOUT; 2146 adapter->enable_llq = true; 2147 adapter->use_large_llq_hdr = false; 2148 2149 rc = ena_parse_devargs(adapter, pci_dev->device.devargs); 2150 if (rc != 0) { 2151 PMD_INIT_LOG(CRIT, "Failed to parse devargs\n"); 2152 goto err; 2153 } 2154 2155 /* device specific initialization routine */ 2156 rc = ena_device_init(adapter, pci_dev, &get_feat_ctx); 2157 if (rc) { 2158 PMD_INIT_LOG(CRIT, "Failed to init ENA device\n"); 2159 goto err; 2160 } 2161 2162 /* Check if device supports LSC */ 2163 if (!(adapter->all_aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) 2164 adapter->edev_data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; 2165 2166 set_default_llq_configurations(&llq_config, &get_feat_ctx.llq, 2167 adapter->use_large_llq_hdr); 2168 rc = ena_set_queues_placement_policy(adapter, ena_dev, 2169 &get_feat_ctx.llq, &llq_config); 2170 if (unlikely(rc)) { 2171 PMD_INIT_LOG(CRIT, "Failed to set placement policy\n"); 2172 return rc; 2173 } 2174 2175 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 2176 queue_type_str = "Regular"; 2177 else 2178 queue_type_str = "Low latency"; 2179 PMD_DRV_LOG(INFO, "Placement policy: %s\n", queue_type_str); 2180 2181 calc_queue_ctx.ena_dev = ena_dev; 2182 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 2183 2184 max_num_io_queues = ena_calc_max_io_queue_num(ena_dev, &get_feat_ctx); 2185 rc = ena_calc_io_queue_size(&calc_queue_ctx, 2186 adapter->use_large_llq_hdr); 2187 if (unlikely((rc != 0) || (max_num_io_queues == 0))) { 2188 rc = -EFAULT; 2189 goto err_device_destroy; 2190 } 2191 2192 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 2193 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 2194 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 2195 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 2196 adapter->max_num_io_queues = max_num_io_queues; 2197 2198 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2199 disable_meta_caching = 2200 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags & 2201 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 2202 } else { 2203 disable_meta_caching = false; 2204 } 2205 2206 /* prepare ring structures */ 2207 ena_init_rings(adapter, disable_meta_caching); 2208 2209 ena_config_debug_area(adapter); 2210 2211 /* Set max MTU for this device */ 2212 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 2213 2214 ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload); 2215 2216 /* Copy MAC address and point DPDK to it */ 2217 eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr; 2218 rte_ether_addr_copy((struct rte_ether_addr *) 2219 get_feat_ctx.dev_attr.mac_addr, 2220 (struct rte_ether_addr *)adapter->mac_addr); 2221 2222 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); 2223 if (unlikely(rc != 0)) { 2224 PMD_DRV_LOG(ERR, "Failed to initialize RSS in ENA device\n"); 2225 goto err_delete_debug_area; 2226 } 2227 2228 adapter->drv_stats = rte_zmalloc("adapter stats", 2229 sizeof(*adapter->drv_stats), 2230 RTE_CACHE_LINE_SIZE); 2231 if (!adapter->drv_stats) { 2232 PMD_DRV_LOG(ERR, 2233 "Failed to allocate memory for adapter statistics\n"); 2234 rc = -ENOMEM; 2235 goto err_rss_destroy; 2236 } 2237 2238 rte_spinlock_init(&adapter->admin_lock); 2239 2240 rte_intr_callback_register(intr_handle, 2241 ena_interrupt_handler_rte, 2242 eth_dev); 2243 rte_intr_enable(intr_handle); 2244 ena_com_set_admin_polling_mode(ena_dev, false); 2245 ena_com_admin_aenq_enable(ena_dev); 2246 2247 rte_timer_init(&adapter->timer_wd); 2248 2249 adapters_found++; 2250 adapter->state = ENA_ADAPTER_STATE_INIT; 2251 2252 return 0; 2253 2254 err_rss_destroy: 2255 ena_com_rss_destroy(ena_dev); 2256 err_delete_debug_area: 2257 ena_com_delete_debug_area(ena_dev); 2258 2259 err_device_destroy: 2260 ena_com_delete_host_info(ena_dev); 2261 ena_com_admin_destroy(ena_dev); 2262 2263 err: 2264 return rc; 2265 } 2266 2267 static void ena_destroy_device(struct rte_eth_dev *eth_dev) 2268 { 2269 struct ena_adapter *adapter = eth_dev->data->dev_private; 2270 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2271 2272 if (adapter->state == ENA_ADAPTER_STATE_FREE) 2273 return; 2274 2275 ena_com_set_admin_running_state(ena_dev, false); 2276 2277 if (adapter->state != ENA_ADAPTER_STATE_CLOSED) 2278 ena_close(eth_dev); 2279 2280 ena_com_rss_destroy(ena_dev); 2281 2282 ena_com_delete_debug_area(ena_dev); 2283 ena_com_delete_host_info(ena_dev); 2284 2285 ena_com_abort_admin_commands(ena_dev); 2286 ena_com_wait_for_abort_completion(ena_dev); 2287 ena_com_admin_destroy(ena_dev); 2288 ena_com_mmio_reg_read_request_destroy(ena_dev); 2289 2290 adapter->state = ENA_ADAPTER_STATE_FREE; 2291 } 2292 2293 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) 2294 { 2295 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2296 return 0; 2297 2298 ena_destroy_device(eth_dev); 2299 2300 return 0; 2301 } 2302 2303 static int ena_dev_configure(struct rte_eth_dev *dev) 2304 { 2305 struct ena_adapter *adapter = dev->data->dev_private; 2306 int rc; 2307 2308 adapter->state = ENA_ADAPTER_STATE_CONFIG; 2309 2310 if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 2311 dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2312 dev->data->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2313 2314 /* Scattered Rx cannot be turned off in the HW, so this capability must 2315 * be forced. 2316 */ 2317 dev->data->scattered_rx = 1; 2318 2319 adapter->last_tx_comp_qid = 0; 2320 2321 adapter->missing_tx_completion_budget = 2322 RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues); 2323 2324 /* To avoid detection of the spurious Tx completion timeout due to 2325 * application not calling the Tx cleanup function, set timeout for the 2326 * Tx queue which should be half of the missing completion timeout for a 2327 * safety. If there will be a lot of missing Tx completions in the 2328 * queue, they will be detected sooner or later. 2329 */ 2330 adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2; 2331 2332 rc = ena_configure_aenq(adapter); 2333 2334 return rc; 2335 } 2336 2337 static void ena_init_rings(struct ena_adapter *adapter, 2338 bool disable_meta_caching) 2339 { 2340 size_t i; 2341 2342 for (i = 0; i < adapter->max_num_io_queues; i++) { 2343 struct ena_ring *ring = &adapter->tx_ring[i]; 2344 2345 ring->configured = 0; 2346 ring->type = ENA_RING_TYPE_TX; 2347 ring->adapter = adapter; 2348 ring->id = i; 2349 ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type; 2350 ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size; 2351 ring->sgl_size = adapter->max_tx_sgl_size; 2352 ring->disable_meta_caching = disable_meta_caching; 2353 } 2354 2355 for (i = 0; i < adapter->max_num_io_queues; i++) { 2356 struct ena_ring *ring = &adapter->rx_ring[i]; 2357 2358 ring->configured = 0; 2359 ring->type = ENA_RING_TYPE_RX; 2360 ring->adapter = adapter; 2361 ring->id = i; 2362 ring->sgl_size = adapter->max_rx_sgl_size; 2363 } 2364 } 2365 2366 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter) 2367 { 2368 uint64_t port_offloads = 0; 2369 2370 if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM) 2371 port_offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; 2372 2373 if (adapter->offloads.rx_offloads & 2374 (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM)) 2375 port_offloads |= 2376 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM; 2377 2378 if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH) 2379 port_offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2380 2381 port_offloads |= RTE_ETH_RX_OFFLOAD_SCATTER; 2382 2383 return port_offloads; 2384 } 2385 2386 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter) 2387 { 2388 uint64_t port_offloads = 0; 2389 2390 if (adapter->offloads.tx_offloads & ENA_IPV4_TSO) 2391 port_offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 2392 2393 if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM) 2394 port_offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; 2395 if (adapter->offloads.tx_offloads & 2396 (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM | 2397 ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL)) 2398 port_offloads |= 2399 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM; 2400 2401 port_offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2402 2403 port_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2404 2405 return port_offloads; 2406 } 2407 2408 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter) 2409 { 2410 RTE_SET_USED(adapter); 2411 2412 return 0; 2413 } 2414 2415 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter) 2416 { 2417 uint64_t queue_offloads = 0; 2418 RTE_SET_USED(adapter); 2419 2420 queue_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2421 2422 return queue_offloads; 2423 } 2424 2425 static int ena_infos_get(struct rte_eth_dev *dev, 2426 struct rte_eth_dev_info *dev_info) 2427 { 2428 struct ena_adapter *adapter; 2429 struct ena_com_dev *ena_dev; 2430 2431 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 2432 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 2433 adapter = dev->data->dev_private; 2434 2435 ena_dev = &adapter->ena_dev; 2436 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 2437 2438 dev_info->speed_capa = 2439 RTE_ETH_LINK_SPEED_1G | 2440 RTE_ETH_LINK_SPEED_2_5G | 2441 RTE_ETH_LINK_SPEED_5G | 2442 RTE_ETH_LINK_SPEED_10G | 2443 RTE_ETH_LINK_SPEED_25G | 2444 RTE_ETH_LINK_SPEED_40G | 2445 RTE_ETH_LINK_SPEED_50G | 2446 RTE_ETH_LINK_SPEED_100G; 2447 2448 /* Inform framework about available features */ 2449 dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter); 2450 dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter); 2451 dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter); 2452 dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter); 2453 2454 dev_info->flow_type_rss_offloads = ENA_ALL_RSS_HF; 2455 dev_info->hash_key_size = ENA_HASH_KEY_SIZE; 2456 2457 dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN; 2458 dev_info->max_rx_pktlen = adapter->max_mtu + RTE_ETHER_HDR_LEN + 2459 RTE_ETHER_CRC_LEN; 2460 dev_info->min_mtu = ENA_MIN_MTU; 2461 dev_info->max_mtu = adapter->max_mtu; 2462 dev_info->max_mac_addrs = 1; 2463 2464 dev_info->max_rx_queues = adapter->max_num_io_queues; 2465 dev_info->max_tx_queues = adapter->max_num_io_queues; 2466 dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE; 2467 2468 dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size; 2469 dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2470 dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2471 adapter->max_rx_sgl_size); 2472 dev_info->rx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2473 adapter->max_rx_sgl_size); 2474 2475 dev_info->tx_desc_lim.nb_max = adapter->max_tx_ring_size; 2476 dev_info->tx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2477 dev_info->tx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2478 adapter->max_tx_sgl_size); 2479 dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2480 adapter->max_tx_sgl_size); 2481 2482 dev_info->default_rxportconf.ring_size = ENA_DEFAULT_RING_SIZE; 2483 dev_info->default_txportconf.ring_size = ENA_DEFAULT_RING_SIZE; 2484 2485 return 0; 2486 } 2487 2488 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len) 2489 { 2490 mbuf->data_len = len; 2491 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 2492 mbuf->refcnt = 1; 2493 mbuf->next = NULL; 2494 } 2495 2496 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 2497 struct ena_com_rx_buf_info *ena_bufs, 2498 uint32_t descs, 2499 uint16_t *next_to_clean, 2500 uint8_t offset) 2501 { 2502 struct rte_mbuf *mbuf; 2503 struct rte_mbuf *mbuf_head; 2504 struct ena_rx_buffer *rx_info; 2505 int rc; 2506 uint16_t ntc, len, req_id, buf = 0; 2507 2508 if (unlikely(descs == 0)) 2509 return NULL; 2510 2511 ntc = *next_to_clean; 2512 2513 len = ena_bufs[buf].len; 2514 req_id = ena_bufs[buf].req_id; 2515 2516 rx_info = &rx_ring->rx_buffer_info[req_id]; 2517 2518 mbuf = rx_info->mbuf; 2519 RTE_ASSERT(mbuf != NULL); 2520 2521 ena_init_rx_mbuf(mbuf, len); 2522 2523 /* Fill the mbuf head with the data specific for 1st segment. */ 2524 mbuf_head = mbuf; 2525 mbuf_head->nb_segs = descs; 2526 mbuf_head->port = rx_ring->port_id; 2527 mbuf_head->pkt_len = len; 2528 mbuf_head->data_off += offset; 2529 2530 rx_info->mbuf = NULL; 2531 rx_ring->empty_rx_reqs[ntc] = req_id; 2532 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2533 2534 while (--descs) { 2535 ++buf; 2536 len = ena_bufs[buf].len; 2537 req_id = ena_bufs[buf].req_id; 2538 2539 rx_info = &rx_ring->rx_buffer_info[req_id]; 2540 RTE_ASSERT(rx_info->mbuf != NULL); 2541 2542 if (unlikely(len == 0)) { 2543 /* 2544 * Some devices can pass descriptor with the length 0. 2545 * To avoid confusion, the PMD is simply putting the 2546 * descriptor back, as it was never used. We'll avoid 2547 * mbuf allocation that way. 2548 */ 2549 rc = ena_add_single_rx_desc(rx_ring->ena_com_io_sq, 2550 rx_info->mbuf, req_id); 2551 if (unlikely(rc != 0)) { 2552 /* Free the mbuf in case of an error. */ 2553 rte_mbuf_raw_free(rx_info->mbuf); 2554 } else { 2555 /* 2556 * If there was no error, just exit the loop as 2557 * 0 length descriptor is always the last one. 2558 */ 2559 break; 2560 } 2561 } else { 2562 /* Create an mbuf chain. */ 2563 mbuf->next = rx_info->mbuf; 2564 mbuf = mbuf->next; 2565 2566 ena_init_rx_mbuf(mbuf, len); 2567 mbuf_head->pkt_len += len; 2568 } 2569 2570 /* 2571 * Mark the descriptor as depleted and perform necessary 2572 * cleanup. 2573 * This code will execute in two cases: 2574 * 1. Descriptor len was greater than 0 - normal situation. 2575 * 2. Descriptor len was 0 and we failed to add the descriptor 2576 * to the device. In that situation, we should try to add 2577 * the mbuf again in the populate routine and mark the 2578 * descriptor as used up by the device. 2579 */ 2580 rx_info->mbuf = NULL; 2581 rx_ring->empty_rx_reqs[ntc] = req_id; 2582 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2583 } 2584 2585 *next_to_clean = ntc; 2586 2587 return mbuf_head; 2588 } 2589 2590 static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 2591 uint16_t nb_pkts) 2592 { 2593 struct ena_ring *rx_ring = (struct ena_ring *)(rx_queue); 2594 unsigned int free_queue_entries; 2595 uint16_t next_to_clean = rx_ring->next_to_clean; 2596 uint16_t descs_in_use; 2597 struct rte_mbuf *mbuf; 2598 uint16_t completed; 2599 struct ena_com_rx_ctx ena_rx_ctx; 2600 int i, rc = 0; 2601 bool fill_hash; 2602 2603 #ifdef RTE_ETHDEV_DEBUG_RX 2604 /* Check adapter state */ 2605 if (unlikely(rx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 2606 PMD_RX_LOG(ALERT, 2607 "Trying to receive pkts while device is NOT running\n"); 2608 return 0; 2609 } 2610 #endif 2611 2612 fill_hash = rx_ring->offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH; 2613 2614 descs_in_use = rx_ring->ring_size - 2615 ena_com_free_q_entries(rx_ring->ena_com_io_sq) - 1; 2616 nb_pkts = RTE_MIN(descs_in_use, nb_pkts); 2617 2618 for (completed = 0; completed < nb_pkts; completed++) { 2619 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 2620 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 2621 ena_rx_ctx.descs = 0; 2622 ena_rx_ctx.pkt_offset = 0; 2623 /* receive packet context */ 2624 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, 2625 rx_ring->ena_com_io_sq, 2626 &ena_rx_ctx); 2627 if (unlikely(rc)) { 2628 PMD_RX_LOG(ERR, 2629 "Failed to get the packet from the device, rc: %d\n", 2630 rc); 2631 if (rc == ENA_COM_NO_SPACE) { 2632 ++rx_ring->rx_stats.bad_desc_num; 2633 ena_trigger_reset(rx_ring->adapter, 2634 ENA_REGS_RESET_TOO_MANY_RX_DESCS); 2635 } else { 2636 ++rx_ring->rx_stats.bad_req_id; 2637 ena_trigger_reset(rx_ring->adapter, 2638 ENA_REGS_RESET_INV_RX_REQ_ID); 2639 } 2640 return 0; 2641 } 2642 2643 mbuf = ena_rx_mbuf(rx_ring, 2644 ena_rx_ctx.ena_bufs, 2645 ena_rx_ctx.descs, 2646 &next_to_clean, 2647 ena_rx_ctx.pkt_offset); 2648 if (unlikely(mbuf == NULL)) { 2649 for (i = 0; i < ena_rx_ctx.descs; ++i) { 2650 rx_ring->empty_rx_reqs[next_to_clean] = 2651 rx_ring->ena_bufs[i].req_id; 2652 next_to_clean = ENA_IDX_NEXT_MASKED( 2653 next_to_clean, rx_ring->size_mask); 2654 } 2655 break; 2656 } 2657 2658 /* fill mbuf attributes if any */ 2659 ena_rx_mbuf_prepare(rx_ring, mbuf, &ena_rx_ctx, fill_hash); 2660 2661 if (unlikely(mbuf->ol_flags & 2662 (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD))) 2663 rte_atomic64_inc(&rx_ring->adapter->drv_stats->ierrors); 2664 2665 rx_pkts[completed] = mbuf; 2666 rx_ring->rx_stats.bytes += mbuf->pkt_len; 2667 } 2668 2669 rx_ring->rx_stats.cnt += completed; 2670 rx_ring->next_to_clean = next_to_clean; 2671 2672 free_queue_entries = ena_com_free_q_entries(rx_ring->ena_com_io_sq); 2673 2674 /* Burst refill to save doorbells, memory barriers, const interval */ 2675 if (free_queue_entries >= rx_ring->rx_free_thresh) { 2676 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); 2677 ena_populate_rx_queue(rx_ring, free_queue_entries); 2678 } 2679 2680 return completed; 2681 } 2682 2683 static uint16_t 2684 eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 2685 uint16_t nb_pkts) 2686 { 2687 int32_t ret; 2688 uint32_t i; 2689 struct rte_mbuf *m; 2690 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 2691 struct ena_adapter *adapter = tx_ring->adapter; 2692 struct rte_ipv4_hdr *ip_hdr; 2693 uint64_t ol_flags; 2694 uint64_t l4_csum_flag; 2695 uint64_t dev_offload_capa; 2696 uint16_t frag_field; 2697 bool need_pseudo_csum; 2698 2699 dev_offload_capa = adapter->offloads.tx_offloads; 2700 for (i = 0; i != nb_pkts; i++) { 2701 m = tx_pkts[i]; 2702 ol_flags = m->ol_flags; 2703 2704 /* Check if any offload flag was set */ 2705 if (ol_flags == 0) 2706 continue; 2707 2708 l4_csum_flag = ol_flags & RTE_MBUF_F_TX_L4_MASK; 2709 /* SCTP checksum offload is not supported by the ENA. */ 2710 if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) || 2711 l4_csum_flag == RTE_MBUF_F_TX_SCTP_CKSUM) { 2712 PMD_TX_LOG(DEBUG, 2713 "mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64 "\n", 2714 i, ol_flags); 2715 rte_errno = ENOTSUP; 2716 return i; 2717 } 2718 2719 if (unlikely(m->nb_segs >= tx_ring->sgl_size && 2720 !(tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && 2721 m->nb_segs == tx_ring->sgl_size && 2722 m->data_len < tx_ring->tx_max_header_size))) { 2723 PMD_TX_LOG(DEBUG, 2724 "mbuf[%" PRIu32 "] has too many segments: %" PRIu16 "\n", 2725 i, m->nb_segs); 2726 rte_errno = EINVAL; 2727 return i; 2728 } 2729 2730 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 2731 /* Check if requested offload is also enabled for the queue */ 2732 if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2733 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) || 2734 (l4_csum_flag == RTE_MBUF_F_TX_TCP_CKSUM && 2735 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) || 2736 (l4_csum_flag == RTE_MBUF_F_TX_UDP_CKSUM && 2737 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM))) { 2738 PMD_TX_LOG(DEBUG, 2739 "mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]\n", 2740 i, m->nb_segs, tx_ring->id); 2741 rte_errno = EINVAL; 2742 return i; 2743 } 2744 2745 /* The caller is obligated to set l2 and l3 len if any cksum 2746 * offload is enabled. 2747 */ 2748 if (unlikely(ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK) && 2749 (m->l2_len == 0 || m->l3_len == 0))) { 2750 PMD_TX_LOG(DEBUG, 2751 "mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested\n", 2752 i); 2753 rte_errno = EINVAL; 2754 return i; 2755 } 2756 ret = rte_validate_tx_offload(m); 2757 if (ret != 0) { 2758 rte_errno = -ret; 2759 return i; 2760 } 2761 #endif 2762 2763 /* Verify HW support for requested offloads and determine if 2764 * pseudo header checksum is needed. 2765 */ 2766 need_pseudo_csum = false; 2767 if (ol_flags & RTE_MBUF_F_TX_IPV4) { 2768 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2769 !(dev_offload_capa & ENA_L3_IPV4_CSUM)) { 2770 rte_errno = ENOTSUP; 2771 return i; 2772 } 2773 2774 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG && 2775 !(dev_offload_capa & ENA_IPV4_TSO)) { 2776 rte_errno = ENOTSUP; 2777 return i; 2778 } 2779 2780 /* Check HW capabilities and if pseudo csum is needed 2781 * for L4 offloads. 2782 */ 2783 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2784 !(dev_offload_capa & ENA_L4_IPV4_CSUM)) { 2785 if (dev_offload_capa & 2786 ENA_L4_IPV4_CSUM_PARTIAL) { 2787 need_pseudo_csum = true; 2788 } else { 2789 rte_errno = ENOTSUP; 2790 return i; 2791 } 2792 } 2793 2794 /* Parse the DF flag */ 2795 ip_hdr = rte_pktmbuf_mtod_offset(m, 2796 struct rte_ipv4_hdr *, m->l2_len); 2797 frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset); 2798 if (frag_field & RTE_IPV4_HDR_DF_FLAG) { 2799 m->packet_type |= RTE_PTYPE_L4_NONFRAG; 2800 } else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2801 /* In case we are supposed to TSO and have DF 2802 * not set (DF=0) hardware must be provided with 2803 * partial checksum. 2804 */ 2805 need_pseudo_csum = true; 2806 } 2807 } else if (ol_flags & RTE_MBUF_F_TX_IPV6) { 2808 /* There is no support for IPv6 TSO as for now. */ 2809 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2810 rte_errno = ENOTSUP; 2811 return i; 2812 } 2813 2814 /* Check HW capabilities and if pseudo csum is needed */ 2815 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2816 !(dev_offload_capa & ENA_L4_IPV6_CSUM)) { 2817 if (dev_offload_capa & 2818 ENA_L4_IPV6_CSUM_PARTIAL) { 2819 need_pseudo_csum = true; 2820 } else { 2821 rte_errno = ENOTSUP; 2822 return i; 2823 } 2824 } 2825 } 2826 2827 if (need_pseudo_csum) { 2828 ret = rte_net_intel_cksum_flags_prepare(m, ol_flags); 2829 if (ret != 0) { 2830 rte_errno = -ret; 2831 return i; 2832 } 2833 } 2834 } 2835 2836 return i; 2837 } 2838 2839 static void ena_update_hints(struct ena_adapter *adapter, 2840 struct ena_admin_ena_hw_hints *hints) 2841 { 2842 if (hints->admin_completion_tx_timeout) 2843 adapter->ena_dev.admin_queue.completion_timeout = 2844 hints->admin_completion_tx_timeout * 1000; 2845 2846 if (hints->mmio_read_timeout) 2847 /* convert to usec */ 2848 adapter->ena_dev.mmio_read.reg_read_to = 2849 hints->mmio_read_timeout * 1000; 2850 2851 if (hints->driver_watchdog_timeout) { 2852 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 2853 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 2854 else 2855 // Convert msecs to ticks 2856 adapter->keep_alive_timeout = 2857 (hints->driver_watchdog_timeout * 2858 rte_get_timer_hz()) / 1000; 2859 } 2860 } 2861 2862 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 2863 struct ena_tx_buffer *tx_info, 2864 struct rte_mbuf *mbuf, 2865 void **push_header, 2866 uint16_t *header_len) 2867 { 2868 struct ena_com_buf *ena_buf; 2869 uint16_t delta, seg_len, push_len; 2870 2871 delta = 0; 2872 seg_len = mbuf->data_len; 2873 2874 tx_info->mbuf = mbuf; 2875 ena_buf = tx_info->bufs; 2876 2877 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2878 /* 2879 * Tx header might be (and will be in most cases) smaller than 2880 * tx_max_header_size. But it's not an issue to send more data 2881 * to the device, than actually needed if the mbuf size is 2882 * greater than tx_max_header_size. 2883 */ 2884 push_len = RTE_MIN(mbuf->pkt_len, tx_ring->tx_max_header_size); 2885 *header_len = push_len; 2886 2887 if (likely(push_len <= seg_len)) { 2888 /* If the push header is in the single segment, then 2889 * just point it to the 1st mbuf data. 2890 */ 2891 *push_header = rte_pktmbuf_mtod(mbuf, uint8_t *); 2892 } else { 2893 /* If the push header lays in the several segments, copy 2894 * it to the intermediate buffer. 2895 */ 2896 rte_pktmbuf_read(mbuf, 0, push_len, 2897 tx_ring->push_buf_intermediate_buf); 2898 *push_header = tx_ring->push_buf_intermediate_buf; 2899 delta = push_len - seg_len; 2900 } 2901 } else { 2902 *push_header = NULL; 2903 *header_len = 0; 2904 push_len = 0; 2905 } 2906 2907 /* Process first segment taking into consideration pushed header */ 2908 if (seg_len > push_len) { 2909 ena_buf->paddr = mbuf->buf_iova + 2910 mbuf->data_off + 2911 push_len; 2912 ena_buf->len = seg_len - push_len; 2913 ena_buf++; 2914 tx_info->num_of_bufs++; 2915 } 2916 2917 while ((mbuf = mbuf->next) != NULL) { 2918 seg_len = mbuf->data_len; 2919 2920 /* Skip mbufs if whole data is pushed as a header */ 2921 if (unlikely(delta > seg_len)) { 2922 delta -= seg_len; 2923 continue; 2924 } 2925 2926 ena_buf->paddr = mbuf->buf_iova + mbuf->data_off + delta; 2927 ena_buf->len = seg_len - delta; 2928 ena_buf++; 2929 tx_info->num_of_bufs++; 2930 2931 delta = 0; 2932 } 2933 } 2934 2935 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) 2936 { 2937 struct ena_tx_buffer *tx_info; 2938 struct ena_com_tx_ctx ena_tx_ctx = { { 0 } }; 2939 uint16_t next_to_use; 2940 uint16_t header_len; 2941 uint16_t req_id; 2942 void *push_header; 2943 int nb_hw_desc; 2944 int rc; 2945 2946 /* Checking for space for 2 additional metadata descriptors due to 2947 * possible header split and metadata descriptor 2948 */ 2949 if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 2950 mbuf->nb_segs + 2)) { 2951 PMD_DRV_LOG(DEBUG, "Not enough space in the tx queue\n"); 2952 return ENA_COM_NO_MEM; 2953 } 2954 2955 next_to_use = tx_ring->next_to_use; 2956 2957 req_id = tx_ring->empty_tx_reqs[next_to_use]; 2958 tx_info = &tx_ring->tx_buffer_info[req_id]; 2959 tx_info->num_of_bufs = 0; 2960 RTE_ASSERT(tx_info->mbuf == NULL); 2961 2962 ena_tx_map_mbuf(tx_ring, tx_info, mbuf, &push_header, &header_len); 2963 2964 ena_tx_ctx.ena_bufs = tx_info->bufs; 2965 ena_tx_ctx.push_header = push_header; 2966 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 2967 ena_tx_ctx.req_id = req_id; 2968 ena_tx_ctx.header_len = header_len; 2969 2970 /* Set Tx offloads flags, if applicable */ 2971 ena_tx_mbuf_prepare(mbuf, &ena_tx_ctx, tx_ring->offloads, 2972 tx_ring->disable_meta_caching); 2973 2974 if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, 2975 &ena_tx_ctx))) { 2976 PMD_TX_LOG(DEBUG, 2977 "LLQ Tx max burst size of queue %d achieved, writing doorbell to send burst\n", 2978 tx_ring->id); 2979 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 2980 tx_ring->tx_stats.doorbells++; 2981 tx_ring->pkts_without_db = false; 2982 } 2983 2984 /* prepare the packet's descriptors to dma engine */ 2985 rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, 2986 &nb_hw_desc); 2987 if (unlikely(rc)) { 2988 PMD_DRV_LOG(ERR, "Failed to prepare Tx buffers, rc: %d\n", rc); 2989 ++tx_ring->tx_stats.prepare_ctx_err; 2990 ena_trigger_reset(tx_ring->adapter, 2991 ENA_REGS_RESET_DRIVER_INVALID_STATE); 2992 return rc; 2993 } 2994 2995 tx_info->tx_descs = nb_hw_desc; 2996 tx_info->timestamp = rte_get_timer_cycles(); 2997 2998 tx_ring->tx_stats.cnt++; 2999 tx_ring->tx_stats.bytes += mbuf->pkt_len; 3000 3001 tx_ring->next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, 3002 tx_ring->size_mask); 3003 3004 return 0; 3005 } 3006 3007 static __rte_always_inline size_t 3008 ena_tx_cleanup_mbuf_fast(struct rte_mbuf **mbufs_to_clean, 3009 struct rte_mbuf *mbuf, 3010 size_t mbuf_cnt, 3011 size_t buf_size) 3012 { 3013 struct rte_mbuf *m_next; 3014 3015 while (mbuf != NULL) { 3016 m_next = mbuf->next; 3017 mbufs_to_clean[mbuf_cnt++] = mbuf; 3018 if (mbuf_cnt == buf_size) { 3019 rte_mempool_put_bulk(mbufs_to_clean[0]->pool, (void **)mbufs_to_clean, 3020 (unsigned int)mbuf_cnt); 3021 mbuf_cnt = 0; 3022 } 3023 mbuf = m_next; 3024 } 3025 3026 return mbuf_cnt; 3027 } 3028 3029 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt) 3030 { 3031 struct rte_mbuf *mbufs_to_clean[ENA_CLEANUP_BUF_SIZE]; 3032 struct ena_ring *tx_ring = (struct ena_ring *)txp; 3033 size_t mbuf_cnt = 0; 3034 unsigned int total_tx_descs = 0; 3035 unsigned int total_tx_pkts = 0; 3036 uint16_t cleanup_budget; 3037 uint16_t next_to_clean = tx_ring->next_to_clean; 3038 bool fast_free = tx_ring->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 3039 3040 /* 3041 * If free_pkt_cnt is equal to 0, it means that the user requested 3042 * full cleanup, so attempt to release all Tx descriptors 3043 * (ring_size - 1 -> size_mask) 3044 */ 3045 cleanup_budget = (free_pkt_cnt == 0) ? tx_ring->size_mask : free_pkt_cnt; 3046 3047 while (likely(total_tx_pkts < cleanup_budget)) { 3048 struct rte_mbuf *mbuf; 3049 struct ena_tx_buffer *tx_info; 3050 uint16_t req_id; 3051 3052 if (ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id) != 0) 3053 break; 3054 3055 if (unlikely(validate_tx_req_id(tx_ring, req_id) != 0)) 3056 break; 3057 3058 /* Get Tx info & store how many descs were processed */ 3059 tx_info = &tx_ring->tx_buffer_info[req_id]; 3060 tx_info->timestamp = 0; 3061 3062 mbuf = tx_info->mbuf; 3063 if (fast_free) { 3064 mbuf_cnt = ena_tx_cleanup_mbuf_fast(mbufs_to_clean, mbuf, mbuf_cnt, 3065 ENA_CLEANUP_BUF_SIZE); 3066 } else { 3067 rte_pktmbuf_free(mbuf); 3068 } 3069 3070 tx_info->mbuf = NULL; 3071 tx_ring->empty_tx_reqs[next_to_clean] = req_id; 3072 3073 total_tx_descs += tx_info->tx_descs; 3074 total_tx_pkts++; 3075 3076 /* Put back descriptor to the ring for reuse */ 3077 next_to_clean = ENA_IDX_NEXT_MASKED(next_to_clean, 3078 tx_ring->size_mask); 3079 } 3080 3081 if (likely(total_tx_descs > 0)) { 3082 /* acknowledge completion of sent packets */ 3083 tx_ring->next_to_clean = next_to_clean; 3084 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs); 3085 ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); 3086 } 3087 3088 if (mbuf_cnt != 0) 3089 rte_mempool_put_bulk(mbufs_to_clean[0]->pool, 3090 (void **)mbufs_to_clean, mbuf_cnt); 3091 3092 /* Notify completion handler that full cleanup was performed */ 3093 if (free_pkt_cnt == 0 || total_tx_pkts < cleanup_budget) 3094 tx_ring->last_cleanup_ticks = rte_get_timer_cycles(); 3095 3096 return total_tx_pkts; 3097 } 3098 3099 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 3100 uint16_t nb_pkts) 3101 { 3102 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 3103 int available_desc; 3104 uint16_t sent_idx = 0; 3105 3106 #ifdef RTE_ETHDEV_DEBUG_TX 3107 /* Check adapter state */ 3108 if (unlikely(tx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 3109 PMD_TX_LOG(ALERT, 3110 "Trying to xmit pkts while device is NOT running\n"); 3111 return 0; 3112 } 3113 #endif 3114 3115 available_desc = ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3116 if (available_desc < tx_ring->tx_free_thresh) 3117 ena_tx_cleanup((void *)tx_ring, 0); 3118 3119 for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) { 3120 if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx])) 3121 break; 3122 tx_ring->pkts_without_db = true; 3123 rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4, 3124 tx_ring->size_mask)]); 3125 } 3126 3127 /* If there are ready packets to be xmitted... */ 3128 if (likely(tx_ring->pkts_without_db)) { 3129 /* ...let HW do its best :-) */ 3130 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3131 tx_ring->tx_stats.doorbells++; 3132 tx_ring->pkts_without_db = false; 3133 } 3134 3135 tx_ring->tx_stats.available_desc = 3136 ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3137 tx_ring->tx_stats.tx_poll++; 3138 3139 return sent_idx; 3140 } 3141 3142 int ena_copy_eni_stats(struct ena_adapter *adapter, struct ena_stats_eni *stats) 3143 { 3144 int rc; 3145 3146 rte_spinlock_lock(&adapter->admin_lock); 3147 /* Retrieve and store the latest statistics from the AQ. This ensures 3148 * that previous value is returned in case of a com error. 3149 */ 3150 rc = ENA_PROXY(adapter, ena_com_get_eni_stats, &adapter->ena_dev, 3151 (struct ena_admin_eni_stats *)stats); 3152 rte_spinlock_unlock(&adapter->admin_lock); 3153 if (rc != 0) { 3154 if (rc == ENA_COM_UNSUPPORTED) { 3155 PMD_DRV_LOG(DEBUG, 3156 "Retrieving ENI metrics is not supported\n"); 3157 } else { 3158 PMD_DRV_LOG(WARNING, 3159 "Failed to get ENI metrics, rc: %d\n", rc); 3160 } 3161 return rc; 3162 } 3163 3164 return 0; 3165 } 3166 3167 /** 3168 * DPDK callback to retrieve names of extended device statistics 3169 * 3170 * @param dev 3171 * Pointer to Ethernet device structure. 3172 * @param[out] xstats_names 3173 * Buffer to insert names into. 3174 * @param n 3175 * Number of names. 3176 * 3177 * @return 3178 * Number of xstats names. 3179 */ 3180 static int ena_xstats_get_names(struct rte_eth_dev *dev, 3181 struct rte_eth_xstat_name *xstats_names, 3182 unsigned int n) 3183 { 3184 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3185 unsigned int stat, i, count = 0; 3186 3187 if (n < xstats_count || !xstats_names) 3188 return xstats_count; 3189 3190 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) 3191 strcpy(xstats_names[count].name, 3192 ena_stats_global_strings[stat].name); 3193 3194 for (stat = 0; stat < ENA_STATS_ARRAY_ENI; stat++, count++) 3195 strcpy(xstats_names[count].name, 3196 ena_stats_eni_strings[stat].name); 3197 3198 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) 3199 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) 3200 snprintf(xstats_names[count].name, 3201 sizeof(xstats_names[count].name), 3202 "rx_q%d_%s", i, 3203 ena_stats_rx_strings[stat].name); 3204 3205 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) 3206 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) 3207 snprintf(xstats_names[count].name, 3208 sizeof(xstats_names[count].name), 3209 "tx_q%d_%s", i, 3210 ena_stats_tx_strings[stat].name); 3211 3212 return xstats_count; 3213 } 3214 3215 /** 3216 * DPDK callback to retrieve names of extended device statistics for the given 3217 * ids. 3218 * 3219 * @param dev 3220 * Pointer to Ethernet device structure. 3221 * @param[out] xstats_names 3222 * Buffer to insert names into. 3223 * @param ids 3224 * IDs array for which the names should be retrieved. 3225 * @param size 3226 * Number of ids. 3227 * 3228 * @return 3229 * Positive value: number of xstats names. Negative value: error code. 3230 */ 3231 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 3232 const uint64_t *ids, 3233 struct rte_eth_xstat_name *xstats_names, 3234 unsigned int size) 3235 { 3236 uint64_t xstats_count = ena_xstats_calc_num(dev->data); 3237 uint64_t id, qid; 3238 unsigned int i; 3239 3240 if (xstats_names == NULL) 3241 return xstats_count; 3242 3243 for (i = 0; i < size; ++i) { 3244 id = ids[i]; 3245 if (id > xstats_count) { 3246 PMD_DRV_LOG(ERR, 3247 "ID value out of range: id=%" PRIu64 ", xstats_num=%" PRIu64 "\n", 3248 id, xstats_count); 3249 return -EINVAL; 3250 } 3251 3252 if (id < ENA_STATS_ARRAY_GLOBAL) { 3253 strcpy(xstats_names[i].name, 3254 ena_stats_global_strings[id].name); 3255 continue; 3256 } 3257 3258 id -= ENA_STATS_ARRAY_GLOBAL; 3259 if (id < ENA_STATS_ARRAY_ENI) { 3260 strcpy(xstats_names[i].name, 3261 ena_stats_eni_strings[id].name); 3262 continue; 3263 } 3264 3265 id -= ENA_STATS_ARRAY_ENI; 3266 if (id < ENA_STATS_ARRAY_RX) { 3267 qid = id / dev->data->nb_rx_queues; 3268 id %= dev->data->nb_rx_queues; 3269 snprintf(xstats_names[i].name, 3270 sizeof(xstats_names[i].name), 3271 "rx_q%" PRIu64 "d_%s", 3272 qid, ena_stats_rx_strings[id].name); 3273 continue; 3274 } 3275 3276 id -= ENA_STATS_ARRAY_RX; 3277 /* Although this condition is not needed, it was added for 3278 * compatibility if new xstat structure would be ever added. 3279 */ 3280 if (id < ENA_STATS_ARRAY_TX) { 3281 qid = id / dev->data->nb_tx_queues; 3282 id %= dev->data->nb_tx_queues; 3283 snprintf(xstats_names[i].name, 3284 sizeof(xstats_names[i].name), 3285 "tx_q%" PRIu64 "_%s", 3286 qid, ena_stats_tx_strings[id].name); 3287 continue; 3288 } 3289 } 3290 3291 return i; 3292 } 3293 3294 /** 3295 * DPDK callback to get extended device statistics. 3296 * 3297 * @param dev 3298 * Pointer to Ethernet device structure. 3299 * @param[out] stats 3300 * Stats table output buffer. 3301 * @param n 3302 * The size of the stats table. 3303 * 3304 * @return 3305 * Number of xstats on success, negative on failure. 3306 */ 3307 static int ena_xstats_get(struct rte_eth_dev *dev, 3308 struct rte_eth_xstat *xstats, 3309 unsigned int n) 3310 { 3311 struct ena_adapter *adapter = dev->data->dev_private; 3312 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3313 struct ena_stats_eni eni_stats; 3314 unsigned int stat, i, count = 0; 3315 int stat_offset; 3316 void *stats_begin; 3317 3318 if (n < xstats_count) 3319 return xstats_count; 3320 3321 if (!xstats) 3322 return 0; 3323 3324 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) { 3325 stat_offset = ena_stats_global_strings[stat].stat_offset; 3326 stats_begin = &adapter->dev_stats; 3327 3328 xstats[count].id = count; 3329 xstats[count].value = *((uint64_t *) 3330 ((char *)stats_begin + stat_offset)); 3331 } 3332 3333 /* Even if the function below fails, we should copy previous (or initial 3334 * values) to keep structure of rte_eth_xstat consistent. 3335 */ 3336 ena_copy_eni_stats(adapter, &eni_stats); 3337 for (stat = 0; stat < ENA_STATS_ARRAY_ENI; stat++, count++) { 3338 stat_offset = ena_stats_eni_strings[stat].stat_offset; 3339 stats_begin = &eni_stats; 3340 3341 xstats[count].id = count; 3342 xstats[count].value = *((uint64_t *) 3343 ((char *)stats_begin + stat_offset)); 3344 } 3345 3346 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) { 3347 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) { 3348 stat_offset = ena_stats_rx_strings[stat].stat_offset; 3349 stats_begin = &adapter->rx_ring[i].rx_stats; 3350 3351 xstats[count].id = count; 3352 xstats[count].value = *((uint64_t *) 3353 ((char *)stats_begin + stat_offset)); 3354 } 3355 } 3356 3357 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) { 3358 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) { 3359 stat_offset = ena_stats_tx_strings[stat].stat_offset; 3360 stats_begin = &adapter->tx_ring[i].rx_stats; 3361 3362 xstats[count].id = count; 3363 xstats[count].value = *((uint64_t *) 3364 ((char *)stats_begin + stat_offset)); 3365 } 3366 } 3367 3368 return count; 3369 } 3370 3371 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 3372 const uint64_t *ids, 3373 uint64_t *values, 3374 unsigned int n) 3375 { 3376 struct ena_adapter *adapter = dev->data->dev_private; 3377 struct ena_stats_eni eni_stats; 3378 uint64_t id; 3379 uint64_t rx_entries, tx_entries; 3380 unsigned int i; 3381 int qid; 3382 int valid = 0; 3383 bool was_eni_copied = false; 3384 3385 for (i = 0; i < n; ++i) { 3386 id = ids[i]; 3387 /* Check if id belongs to global statistics */ 3388 if (id < ENA_STATS_ARRAY_GLOBAL) { 3389 values[i] = *((uint64_t *)&adapter->dev_stats + id); 3390 ++valid; 3391 continue; 3392 } 3393 3394 /* Check if id belongs to ENI statistics */ 3395 id -= ENA_STATS_ARRAY_GLOBAL; 3396 if (id < ENA_STATS_ARRAY_ENI) { 3397 /* Avoid reading ENI stats multiple times in a single 3398 * function call, as it requires communication with the 3399 * admin queue. 3400 */ 3401 if (!was_eni_copied) { 3402 was_eni_copied = true; 3403 ena_copy_eni_stats(adapter, &eni_stats); 3404 } 3405 values[i] = *((uint64_t *)&eni_stats + id); 3406 ++valid; 3407 continue; 3408 } 3409 3410 /* Check if id belongs to rx queue statistics */ 3411 id -= ENA_STATS_ARRAY_ENI; 3412 rx_entries = ENA_STATS_ARRAY_RX * dev->data->nb_rx_queues; 3413 if (id < rx_entries) { 3414 qid = id % dev->data->nb_rx_queues; 3415 id /= dev->data->nb_rx_queues; 3416 values[i] = *((uint64_t *) 3417 &adapter->rx_ring[qid].rx_stats + id); 3418 ++valid; 3419 continue; 3420 } 3421 /* Check if id belongs to rx queue statistics */ 3422 id -= rx_entries; 3423 tx_entries = ENA_STATS_ARRAY_TX * dev->data->nb_tx_queues; 3424 if (id < tx_entries) { 3425 qid = id % dev->data->nb_tx_queues; 3426 id /= dev->data->nb_tx_queues; 3427 values[i] = *((uint64_t *) 3428 &adapter->tx_ring[qid].tx_stats + id); 3429 ++valid; 3430 continue; 3431 } 3432 } 3433 3434 return valid; 3435 } 3436 3437 static int ena_process_uint_devarg(const char *key, 3438 const char *value, 3439 void *opaque) 3440 { 3441 struct ena_adapter *adapter = opaque; 3442 char *str_end; 3443 uint64_t uint_value; 3444 3445 uint_value = strtoull(value, &str_end, 10); 3446 if (value == str_end) { 3447 PMD_INIT_LOG(ERR, 3448 "Invalid value for key '%s'. Only uint values are accepted.\n", 3449 key); 3450 return -EINVAL; 3451 } 3452 3453 if (strcmp(key, ENA_DEVARG_MISS_TXC_TO) == 0) { 3454 if (uint_value > ENA_MAX_TX_TIMEOUT_SECONDS) { 3455 PMD_INIT_LOG(ERR, 3456 "Tx timeout too high: %" PRIu64 " sec. Maximum allowed: %d sec.\n", 3457 uint_value, ENA_MAX_TX_TIMEOUT_SECONDS); 3458 return -EINVAL; 3459 } else if (uint_value == 0) { 3460 PMD_INIT_LOG(INFO, 3461 "Check for missing Tx completions has been disabled.\n"); 3462 adapter->missing_tx_completion_to = 3463 ENA_HW_HINTS_NO_TIMEOUT; 3464 } else { 3465 PMD_INIT_LOG(INFO, 3466 "Tx packet completion timeout set to %" PRIu64 " seconds.\n", 3467 uint_value); 3468 adapter->missing_tx_completion_to = 3469 uint_value * rte_get_timer_hz(); 3470 } 3471 } 3472 3473 return 0; 3474 } 3475 3476 static int ena_process_bool_devarg(const char *key, 3477 const char *value, 3478 void *opaque) 3479 { 3480 struct ena_adapter *adapter = opaque; 3481 bool bool_value; 3482 3483 /* Parse the value. */ 3484 if (strcmp(value, "1") == 0) { 3485 bool_value = true; 3486 } else if (strcmp(value, "0") == 0) { 3487 bool_value = false; 3488 } else { 3489 PMD_INIT_LOG(ERR, 3490 "Invalid value: '%s' for key '%s'. Accepted: '0' or '1'\n", 3491 value, key); 3492 return -EINVAL; 3493 } 3494 3495 /* Now, assign it to the proper adapter field. */ 3496 if (strcmp(key, ENA_DEVARG_LARGE_LLQ_HDR) == 0) 3497 adapter->use_large_llq_hdr = bool_value; 3498 else if (strcmp(key, ENA_DEVARG_ENABLE_LLQ) == 0) 3499 adapter->enable_llq = bool_value; 3500 3501 return 0; 3502 } 3503 3504 static int ena_parse_devargs(struct ena_adapter *adapter, 3505 struct rte_devargs *devargs) 3506 { 3507 static const char * const allowed_args[] = { 3508 ENA_DEVARG_LARGE_LLQ_HDR, 3509 ENA_DEVARG_MISS_TXC_TO, 3510 ENA_DEVARG_ENABLE_LLQ, 3511 NULL, 3512 }; 3513 struct rte_kvargs *kvlist; 3514 int rc; 3515 3516 if (devargs == NULL) 3517 return 0; 3518 3519 kvlist = rte_kvargs_parse(devargs->args, allowed_args); 3520 if (kvlist == NULL) { 3521 PMD_INIT_LOG(ERR, "Invalid device arguments: %s\n", 3522 devargs->args); 3523 return -EINVAL; 3524 } 3525 3526 rc = rte_kvargs_process(kvlist, ENA_DEVARG_LARGE_LLQ_HDR, 3527 ena_process_bool_devarg, adapter); 3528 if (rc != 0) 3529 goto exit; 3530 rc = rte_kvargs_process(kvlist, ENA_DEVARG_MISS_TXC_TO, 3531 ena_process_uint_devarg, adapter); 3532 if (rc != 0) 3533 goto exit; 3534 rc = rte_kvargs_process(kvlist, ENA_DEVARG_ENABLE_LLQ, 3535 ena_process_bool_devarg, adapter); 3536 3537 exit: 3538 rte_kvargs_free(kvlist); 3539 3540 return rc; 3541 } 3542 3543 static int ena_setup_rx_intr(struct rte_eth_dev *dev) 3544 { 3545 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 3546 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 3547 int rc; 3548 uint16_t vectors_nb, i; 3549 bool rx_intr_requested = dev->data->dev_conf.intr_conf.rxq; 3550 3551 if (!rx_intr_requested) 3552 return 0; 3553 3554 if (!rte_intr_cap_multiple(intr_handle)) { 3555 PMD_DRV_LOG(ERR, 3556 "Rx interrupt requested, but it isn't supported by the PCI driver\n"); 3557 return -ENOTSUP; 3558 } 3559 3560 /* Disable interrupt mapping before the configuration starts. */ 3561 rte_intr_disable(intr_handle); 3562 3563 /* Verify if there are enough vectors available. */ 3564 vectors_nb = dev->data->nb_rx_queues; 3565 if (vectors_nb > RTE_MAX_RXTX_INTR_VEC_ID) { 3566 PMD_DRV_LOG(ERR, 3567 "Too many Rx interrupts requested, maximum number: %d\n", 3568 RTE_MAX_RXTX_INTR_VEC_ID); 3569 rc = -ENOTSUP; 3570 goto enable_intr; 3571 } 3572 3573 /* Allocate the vector list */ 3574 if (rte_intr_vec_list_alloc(intr_handle, "intr_vec", 3575 dev->data->nb_rx_queues)) { 3576 PMD_DRV_LOG(ERR, 3577 "Failed to allocate interrupt vector for %d queues\n", 3578 dev->data->nb_rx_queues); 3579 rc = -ENOMEM; 3580 goto enable_intr; 3581 } 3582 3583 rc = rte_intr_efd_enable(intr_handle, vectors_nb); 3584 if (rc != 0) 3585 goto free_intr_vec; 3586 3587 if (!rte_intr_allow_others(intr_handle)) { 3588 PMD_DRV_LOG(ERR, 3589 "Not enough interrupts available to use both ENA Admin and Rx interrupts\n"); 3590 goto disable_intr_efd; 3591 } 3592 3593 for (i = 0; i < vectors_nb; ++i) 3594 if (rte_intr_vec_list_index_set(intr_handle, i, 3595 RTE_INTR_VEC_RXTX_OFFSET + i)) 3596 goto disable_intr_efd; 3597 3598 rte_intr_enable(intr_handle); 3599 return 0; 3600 3601 disable_intr_efd: 3602 rte_intr_efd_disable(intr_handle); 3603 free_intr_vec: 3604 rte_intr_vec_list_free(intr_handle); 3605 enable_intr: 3606 rte_intr_enable(intr_handle); 3607 return rc; 3608 } 3609 3610 static void ena_rx_queue_intr_set(struct rte_eth_dev *dev, 3611 uint16_t queue_id, 3612 bool unmask) 3613 { 3614 struct ena_adapter *adapter = dev->data->dev_private; 3615 struct ena_ring *rxq = &adapter->rx_ring[queue_id]; 3616 struct ena_eth_io_intr_reg intr_reg; 3617 3618 ena_com_update_intr_reg(&intr_reg, 0, 0, unmask); 3619 ena_com_unmask_intr(rxq->ena_com_io_cq, &intr_reg); 3620 } 3621 3622 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 3623 uint16_t queue_id) 3624 { 3625 ena_rx_queue_intr_set(dev, queue_id, true); 3626 3627 return 0; 3628 } 3629 3630 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 3631 uint16_t queue_id) 3632 { 3633 ena_rx_queue_intr_set(dev, queue_id, false); 3634 3635 return 0; 3636 } 3637 3638 static int ena_configure_aenq(struct ena_adapter *adapter) 3639 { 3640 uint32_t aenq_groups = adapter->all_aenq_groups; 3641 int rc; 3642 3643 /* All_aenq_groups holds all AENQ functions supported by the device and 3644 * the HW, so at first we need to be sure the LSC request is valid. 3645 */ 3646 if (adapter->edev_data->dev_conf.intr_conf.lsc != 0) { 3647 if (!(aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) { 3648 PMD_DRV_LOG(ERR, 3649 "LSC requested, but it's not supported by the AENQ\n"); 3650 return -EINVAL; 3651 } 3652 } else { 3653 /* If LSC wasn't enabled by the app, let's enable all supported 3654 * AENQ procedures except the LSC. 3655 */ 3656 aenq_groups &= ~BIT(ENA_ADMIN_LINK_CHANGE); 3657 } 3658 3659 rc = ena_com_set_aenq_config(&adapter->ena_dev, aenq_groups); 3660 if (rc != 0) { 3661 PMD_DRV_LOG(ERR, "Cannot configure AENQ groups, rc=%d\n", rc); 3662 return rc; 3663 } 3664 3665 adapter->active_aenq_groups = aenq_groups; 3666 3667 return 0; 3668 } 3669 3670 int ena_mp_indirect_table_set(struct ena_adapter *adapter) 3671 { 3672 return ENA_PROXY(adapter, ena_com_indirect_table_set, &adapter->ena_dev); 3673 } 3674 3675 int ena_mp_indirect_table_get(struct ena_adapter *adapter, 3676 uint32_t *indirect_table) 3677 { 3678 return ENA_PROXY(adapter, ena_com_indirect_table_get, &adapter->ena_dev, 3679 indirect_table); 3680 } 3681 3682 /********************************************************************* 3683 * ena_plat_dpdk.h functions implementations 3684 *********************************************************************/ 3685 3686 const struct rte_memzone * 3687 ena_mem_alloc_coherent(struct rte_eth_dev_data *data, size_t size, 3688 int socket_id, unsigned int alignment, void **virt_addr, 3689 dma_addr_t *phys_addr) 3690 { 3691 char z_name[RTE_MEMZONE_NAMESIZE]; 3692 struct ena_adapter *adapter = data->dev_private; 3693 const struct rte_memzone *memzone; 3694 int rc; 3695 3696 rc = snprintf(z_name, RTE_MEMZONE_NAMESIZE, "ena_p%d_mz%" PRIu64 "", 3697 data->port_id, adapter->memzone_cnt); 3698 if (rc >= RTE_MEMZONE_NAMESIZE) { 3699 PMD_DRV_LOG(ERR, 3700 "Name for the ena_com memzone is too long. Port: %d, mz_num: %" PRIu64 "\n", 3701 data->port_id, adapter->memzone_cnt); 3702 goto error; 3703 } 3704 adapter->memzone_cnt++; 3705 3706 memzone = rte_memzone_reserve_aligned(z_name, size, socket_id, 3707 RTE_MEMZONE_IOVA_CONTIG, alignment); 3708 if (memzone == NULL) { 3709 PMD_DRV_LOG(ERR, "Failed to allocate ena_com memzone: %s\n", 3710 z_name); 3711 goto error; 3712 } 3713 3714 memset(memzone->addr, 0, size); 3715 *virt_addr = memzone->addr; 3716 *phys_addr = memzone->iova; 3717 3718 return memzone; 3719 3720 error: 3721 *virt_addr = NULL; 3722 *phys_addr = 0; 3723 3724 return NULL; 3725 } 3726 3727 3728 /********************************************************************* 3729 * PMD configuration 3730 *********************************************************************/ 3731 static int eth_ena_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 3732 struct rte_pci_device *pci_dev) 3733 { 3734 return rte_eth_dev_pci_generic_probe(pci_dev, 3735 sizeof(struct ena_adapter), eth_ena_dev_init); 3736 } 3737 3738 static int eth_ena_pci_remove(struct rte_pci_device *pci_dev) 3739 { 3740 return rte_eth_dev_pci_generic_remove(pci_dev, eth_ena_dev_uninit); 3741 } 3742 3743 static struct rte_pci_driver rte_ena_pmd = { 3744 .id_table = pci_id_ena_map, 3745 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | 3746 RTE_PCI_DRV_WC_ACTIVATE, 3747 .probe = eth_ena_pci_probe, 3748 .remove = eth_ena_pci_remove, 3749 }; 3750 3751 RTE_PMD_REGISTER_PCI(net_ena, rte_ena_pmd); 3752 RTE_PMD_REGISTER_PCI_TABLE(net_ena, pci_id_ena_map); 3753 RTE_PMD_REGISTER_KMOD_DEP(net_ena, "* igb_uio | uio_pci_generic | vfio-pci"); 3754 RTE_PMD_REGISTER_PARAM_STRING(net_ena, 3755 ENA_DEVARG_LARGE_LLQ_HDR "=<0|1> " 3756 ENA_DEVARG_ENABLE_LLQ "=<0|1> " 3757 ENA_DEVARG_MISS_TXC_TO "=<uint>"); 3758 RTE_LOG_REGISTER_SUFFIX(ena_logtype_init, init, NOTICE); 3759 RTE_LOG_REGISTER_SUFFIX(ena_logtype_driver, driver, NOTICE); 3760 #ifdef RTE_ETHDEV_DEBUG_RX 3761 RTE_LOG_REGISTER_SUFFIX(ena_logtype_rx, rx, DEBUG); 3762 #endif 3763 #ifdef RTE_ETHDEV_DEBUG_TX 3764 RTE_LOG_REGISTER_SUFFIX(ena_logtype_tx, tx, DEBUG); 3765 #endif 3766 RTE_LOG_REGISTER_SUFFIX(ena_logtype_com, com, WARNING); 3767 3768 /****************************************************************************** 3769 ******************************** AENQ Handlers ******************************* 3770 *****************************************************************************/ 3771 static void ena_update_on_link_change(void *adapter_data, 3772 struct ena_admin_aenq_entry *aenq_e) 3773 { 3774 struct rte_eth_dev *eth_dev = adapter_data; 3775 struct ena_adapter *adapter = eth_dev->data->dev_private; 3776 struct ena_admin_aenq_link_change_desc *aenq_link_desc; 3777 uint32_t status; 3778 3779 aenq_link_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 3780 3781 status = get_ena_admin_aenq_link_change_desc_link_status(aenq_link_desc); 3782 adapter->link_status = status; 3783 3784 ena_link_update(eth_dev, 0); 3785 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 3786 } 3787 3788 static void ena_notification(void *adapter_data, 3789 struct ena_admin_aenq_entry *aenq_e) 3790 { 3791 struct rte_eth_dev *eth_dev = adapter_data; 3792 struct ena_adapter *adapter = eth_dev->data->dev_private; 3793 struct ena_admin_ena_hw_hints *hints; 3794 3795 if (aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION) 3796 PMD_DRV_LOG(WARNING, "Invalid AENQ group: %x. Expected: %x\n", 3797 aenq_e->aenq_common_desc.group, 3798 ENA_ADMIN_NOTIFICATION); 3799 3800 switch (aenq_e->aenq_common_desc.syndrome) { 3801 case ENA_ADMIN_UPDATE_HINTS: 3802 hints = (struct ena_admin_ena_hw_hints *) 3803 (&aenq_e->inline_data_w4); 3804 ena_update_hints(adapter, hints); 3805 break; 3806 default: 3807 PMD_DRV_LOG(ERR, "Invalid AENQ notification link state: %d\n", 3808 aenq_e->aenq_common_desc.syndrome); 3809 } 3810 } 3811 3812 static void ena_keep_alive(void *adapter_data, 3813 __rte_unused struct ena_admin_aenq_entry *aenq_e) 3814 { 3815 struct rte_eth_dev *eth_dev = adapter_data; 3816 struct ena_adapter *adapter = eth_dev->data->dev_private; 3817 struct ena_admin_aenq_keep_alive_desc *desc; 3818 uint64_t rx_drops; 3819 uint64_t tx_drops; 3820 3821 adapter->timestamp_wd = rte_get_timer_cycles(); 3822 3823 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 3824 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 3825 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 3826 3827 adapter->drv_stats->rx_drops = rx_drops; 3828 adapter->dev_stats.tx_drops = tx_drops; 3829 } 3830 3831 /** 3832 * This handler will called for unknown event group or unimplemented handlers 3833 **/ 3834 static void unimplemented_aenq_handler(__rte_unused void *data, 3835 __rte_unused struct ena_admin_aenq_entry *aenq_e) 3836 { 3837 PMD_DRV_LOG(ERR, 3838 "Unknown event was received or event with unimplemented handler\n"); 3839 } 3840 3841 static struct ena_aenq_handlers aenq_handlers = { 3842 .handlers = { 3843 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 3844 [ENA_ADMIN_NOTIFICATION] = ena_notification, 3845 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive 3846 }, 3847 .unimplemented_handler = unimplemented_aenq_handler 3848 }; 3849 3850 /********************************************************************* 3851 * Multi-Process communication request handling (in primary) 3852 *********************************************************************/ 3853 static int 3854 ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer) 3855 { 3856 const struct ena_mp_body *req = 3857 (const struct ena_mp_body *)mp_msg->param; 3858 struct ena_adapter *adapter; 3859 struct ena_com_dev *ena_dev; 3860 struct ena_mp_body *rsp; 3861 struct rte_mp_msg mp_rsp; 3862 struct rte_eth_dev *dev; 3863 int res = 0; 3864 3865 rsp = (struct ena_mp_body *)&mp_rsp.param; 3866 mp_msg_init(&mp_rsp, req->type, req->port_id); 3867 3868 if (!rte_eth_dev_is_valid_port(req->port_id)) { 3869 rte_errno = ENODEV; 3870 res = -rte_errno; 3871 PMD_DRV_LOG(ERR, "Unknown port %d in request %d\n", 3872 req->port_id, req->type); 3873 goto end; 3874 } 3875 dev = &rte_eth_devices[req->port_id]; 3876 adapter = dev->data->dev_private; 3877 ena_dev = &adapter->ena_dev; 3878 3879 switch (req->type) { 3880 case ENA_MP_DEV_STATS_GET: 3881 res = ena_com_get_dev_basic_stats(ena_dev, 3882 &adapter->basic_stats); 3883 break; 3884 case ENA_MP_ENI_STATS_GET: 3885 res = ena_com_get_eni_stats(ena_dev, 3886 (struct ena_admin_eni_stats *)&adapter->eni_stats); 3887 break; 3888 case ENA_MP_MTU_SET: 3889 res = ena_com_set_dev_mtu(ena_dev, req->args.mtu); 3890 break; 3891 case ENA_MP_IND_TBL_GET: 3892 res = ena_com_indirect_table_get(ena_dev, 3893 adapter->indirect_table); 3894 break; 3895 case ENA_MP_IND_TBL_SET: 3896 res = ena_com_indirect_table_set(ena_dev); 3897 break; 3898 default: 3899 PMD_DRV_LOG(ERR, "Unknown request type %d\n", req->type); 3900 res = -EINVAL; 3901 break; 3902 } 3903 3904 end: 3905 /* Save processing result in the reply */ 3906 rsp->result = res; 3907 /* Return just IPC processing status */ 3908 return rte_mp_reply(&mp_rsp, peer); 3909 } 3910