1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 3 * All rights reserved. 4 */ 5 6 #include <rte_string_fns.h> 7 #include <rte_errno.h> 8 #include <rte_version.h> 9 #include <rte_net.h> 10 #include <rte_kvargs.h> 11 12 #include "ena_ethdev.h" 13 #include "ena_logs.h" 14 #include "ena_platform.h" 15 #include "ena_com.h" 16 #include "ena_eth_com.h" 17 18 #include <ena_common_defs.h> 19 #include <ena_regs_defs.h> 20 #include <ena_admin_defs.h> 21 #include <ena_eth_io_defs.h> 22 23 #define DRV_MODULE_VER_MAJOR 2 24 #define DRV_MODULE_VER_MINOR 6 25 #define DRV_MODULE_VER_SUBMINOR 0 26 27 #define __MERGE_64B_H_L(h, l) (((uint64_t)h << 32) | l) 28 29 #define GET_L4_HDR_LEN(mbuf) \ 30 ((rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, \ 31 mbuf->l3_len + mbuf->l2_len)->data_off) >> 4) 32 33 #define ETH_GSTRING_LEN 32 34 35 #define ARRAY_SIZE(x) RTE_DIM(x) 36 37 #define ENA_MIN_RING_DESC 128 38 39 /* 40 * We should try to keep ENA_CLEANUP_BUF_SIZE lower than 41 * RTE_MEMPOOL_CACHE_MAX_SIZE, so we can fit this in mempool local cache. 42 */ 43 #define ENA_CLEANUP_BUF_SIZE 256 44 45 #define ENA_PTYPE_HAS_HASH (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP) 46 47 struct ena_stats { 48 char name[ETH_GSTRING_LEN]; 49 int stat_offset; 50 }; 51 52 #define ENA_STAT_ENTRY(stat, stat_type) { \ 53 .name = #stat, \ 54 .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ 55 } 56 57 #define ENA_STAT_RX_ENTRY(stat) \ 58 ENA_STAT_ENTRY(stat, rx) 59 60 #define ENA_STAT_TX_ENTRY(stat) \ 61 ENA_STAT_ENTRY(stat, tx) 62 63 #define ENA_STAT_ENI_ENTRY(stat) \ 64 ENA_STAT_ENTRY(stat, eni) 65 66 #define ENA_STAT_GLOBAL_ENTRY(stat) \ 67 ENA_STAT_ENTRY(stat, dev) 68 69 /* Device arguments */ 70 #define ENA_DEVARG_LARGE_LLQ_HDR "large_llq_hdr" 71 /* Timeout in seconds after which a single uncompleted Tx packet should be 72 * considered as a missing. 73 */ 74 #define ENA_DEVARG_MISS_TXC_TO "miss_txc_to" 75 76 /* 77 * Each rte_memzone should have unique name. 78 * To satisfy it, count number of allocation and add it to name. 79 */ 80 rte_atomic64_t ena_alloc_cnt; 81 82 static const struct ena_stats ena_stats_global_strings[] = { 83 ENA_STAT_GLOBAL_ENTRY(wd_expired), 84 ENA_STAT_GLOBAL_ENTRY(dev_start), 85 ENA_STAT_GLOBAL_ENTRY(dev_stop), 86 ENA_STAT_GLOBAL_ENTRY(tx_drops), 87 }; 88 89 static const struct ena_stats ena_stats_eni_strings[] = { 90 ENA_STAT_ENI_ENTRY(bw_in_allowance_exceeded), 91 ENA_STAT_ENI_ENTRY(bw_out_allowance_exceeded), 92 ENA_STAT_ENI_ENTRY(pps_allowance_exceeded), 93 ENA_STAT_ENI_ENTRY(conntrack_allowance_exceeded), 94 ENA_STAT_ENI_ENTRY(linklocal_allowance_exceeded), 95 }; 96 97 static const struct ena_stats ena_stats_tx_strings[] = { 98 ENA_STAT_TX_ENTRY(cnt), 99 ENA_STAT_TX_ENTRY(bytes), 100 ENA_STAT_TX_ENTRY(prepare_ctx_err), 101 ENA_STAT_TX_ENTRY(tx_poll), 102 ENA_STAT_TX_ENTRY(doorbells), 103 ENA_STAT_TX_ENTRY(bad_req_id), 104 ENA_STAT_TX_ENTRY(available_desc), 105 ENA_STAT_TX_ENTRY(missed_tx), 106 }; 107 108 static const struct ena_stats ena_stats_rx_strings[] = { 109 ENA_STAT_RX_ENTRY(cnt), 110 ENA_STAT_RX_ENTRY(bytes), 111 ENA_STAT_RX_ENTRY(refill_partial), 112 ENA_STAT_RX_ENTRY(l3_csum_bad), 113 ENA_STAT_RX_ENTRY(l4_csum_bad), 114 ENA_STAT_RX_ENTRY(l4_csum_good), 115 ENA_STAT_RX_ENTRY(mbuf_alloc_fail), 116 ENA_STAT_RX_ENTRY(bad_desc_num), 117 ENA_STAT_RX_ENTRY(bad_req_id), 118 }; 119 120 #define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) 121 #define ENA_STATS_ARRAY_ENI ARRAY_SIZE(ena_stats_eni_strings) 122 #define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) 123 #define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) 124 125 #define QUEUE_OFFLOADS (RTE_ETH_TX_OFFLOAD_TCP_CKSUM |\ 126 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |\ 127 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |\ 128 RTE_ETH_TX_OFFLOAD_TCP_TSO) 129 #define MBUF_OFFLOADS (RTE_MBUF_F_TX_L4_MASK |\ 130 RTE_MBUF_F_TX_IP_CKSUM |\ 131 RTE_MBUF_F_TX_TCP_SEG) 132 133 /** Vendor ID used by Amazon devices */ 134 #define PCI_VENDOR_ID_AMAZON 0x1D0F 135 /** Amazon devices */ 136 #define PCI_DEVICE_ID_ENA_VF 0xEC20 137 #define PCI_DEVICE_ID_ENA_VF_RSERV0 0xEC21 138 139 #define ENA_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_L4_MASK | \ 140 RTE_MBUF_F_TX_IPV6 | \ 141 RTE_MBUF_F_TX_IPV4 | \ 142 RTE_MBUF_F_TX_IP_CKSUM | \ 143 RTE_MBUF_F_TX_TCP_SEG) 144 145 #define ENA_TX_OFFLOAD_NOTSUP_MASK \ 146 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) 147 148 /** HW specific offloads capabilities. */ 149 /* IPv4 checksum offload. */ 150 #define ENA_L3_IPV4_CSUM 0x0001 151 /* TCP/UDP checksum offload for IPv4 packets. */ 152 #define ENA_L4_IPV4_CSUM 0x0002 153 /* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */ 154 #define ENA_L4_IPV4_CSUM_PARTIAL 0x0004 155 /* TCP/UDP checksum offload for IPv6 packets. */ 156 #define ENA_L4_IPV6_CSUM 0x0008 157 /* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */ 158 #define ENA_L4_IPV6_CSUM_PARTIAL 0x0010 159 /* TSO support for IPv4 packets. */ 160 #define ENA_IPV4_TSO 0x0020 161 162 /* Device supports setting RSS hash. */ 163 #define ENA_RX_RSS_HASH 0x0040 164 165 static const struct rte_pci_id pci_id_ena_map[] = { 166 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, 167 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) }, 168 { .device_id = 0 }, 169 }; 170 171 static struct ena_aenq_handlers aenq_handlers; 172 173 static int ena_device_init(struct ena_adapter *adapter, 174 struct rte_pci_device *pdev, 175 struct ena_com_dev_get_features_ctx *get_feat_ctx); 176 static int ena_dev_configure(struct rte_eth_dev *dev); 177 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 178 struct ena_tx_buffer *tx_info, 179 struct rte_mbuf *mbuf, 180 void **push_header, 181 uint16_t *header_len); 182 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf); 183 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt); 184 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 185 uint16_t nb_pkts); 186 static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 187 uint16_t nb_pkts); 188 static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 189 uint16_t nb_desc, unsigned int socket_id, 190 const struct rte_eth_txconf *tx_conf); 191 static int ena_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 192 uint16_t nb_desc, unsigned int socket_id, 193 const struct rte_eth_rxconf *rx_conf, 194 struct rte_mempool *mp); 195 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len); 196 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 197 struct ena_com_rx_buf_info *ena_bufs, 198 uint32_t descs, 199 uint16_t *next_to_clean, 200 uint8_t offset); 201 static uint16_t eth_ena_recv_pkts(void *rx_queue, 202 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 203 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 204 struct rte_mbuf *mbuf, uint16_t id); 205 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count); 206 static void ena_init_rings(struct ena_adapter *adapter, 207 bool disable_meta_caching); 208 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); 209 static int ena_start(struct rte_eth_dev *dev); 210 static int ena_stop(struct rte_eth_dev *dev); 211 static int ena_close(struct rte_eth_dev *dev); 212 static int ena_dev_reset(struct rte_eth_dev *dev); 213 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); 214 static void ena_rx_queue_release_all(struct rte_eth_dev *dev); 215 static void ena_tx_queue_release_all(struct rte_eth_dev *dev); 216 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 217 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 218 static void ena_rx_queue_release_bufs(struct ena_ring *ring); 219 static void ena_tx_queue_release_bufs(struct ena_ring *ring); 220 static int ena_link_update(struct rte_eth_dev *dev, 221 int wait_to_complete); 222 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring); 223 static void ena_queue_stop(struct ena_ring *ring); 224 static void ena_queue_stop_all(struct rte_eth_dev *dev, 225 enum ena_ring_type ring_type); 226 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring); 227 static int ena_queue_start_all(struct rte_eth_dev *dev, 228 enum ena_ring_type ring_type); 229 static void ena_stats_restart(struct rte_eth_dev *dev); 230 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter); 231 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter); 232 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter); 233 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter); 234 static int ena_infos_get(struct rte_eth_dev *dev, 235 struct rte_eth_dev_info *dev_info); 236 static void ena_interrupt_handler_rte(void *cb_arg); 237 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg); 238 static void ena_destroy_device(struct rte_eth_dev *eth_dev); 239 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev); 240 static int ena_xstats_get_names(struct rte_eth_dev *dev, 241 struct rte_eth_xstat_name *xstats_names, 242 unsigned int n); 243 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 244 const uint64_t *ids, 245 struct rte_eth_xstat_name *xstats_names, 246 unsigned int size); 247 static int ena_xstats_get(struct rte_eth_dev *dev, 248 struct rte_eth_xstat *stats, 249 unsigned int n); 250 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 251 const uint64_t *ids, 252 uint64_t *values, 253 unsigned int n); 254 static int ena_process_bool_devarg(const char *key, 255 const char *value, 256 void *opaque); 257 static int ena_parse_devargs(struct ena_adapter *adapter, 258 struct rte_devargs *devargs); 259 static int ena_copy_eni_stats(struct ena_adapter *adapter, 260 struct ena_stats_eni *stats); 261 static int ena_setup_rx_intr(struct rte_eth_dev *dev); 262 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 263 uint16_t queue_id); 264 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 265 uint16_t queue_id); 266 static int ena_configure_aenq(struct ena_adapter *adapter); 267 static int ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, 268 const void *peer); 269 270 static const struct eth_dev_ops ena_dev_ops = { 271 .dev_configure = ena_dev_configure, 272 .dev_infos_get = ena_infos_get, 273 .rx_queue_setup = ena_rx_queue_setup, 274 .tx_queue_setup = ena_tx_queue_setup, 275 .dev_start = ena_start, 276 .dev_stop = ena_stop, 277 .link_update = ena_link_update, 278 .stats_get = ena_stats_get, 279 .xstats_get_names = ena_xstats_get_names, 280 .xstats_get_names_by_id = ena_xstats_get_names_by_id, 281 .xstats_get = ena_xstats_get, 282 .xstats_get_by_id = ena_xstats_get_by_id, 283 .mtu_set = ena_mtu_set, 284 .rx_queue_release = ena_rx_queue_release, 285 .tx_queue_release = ena_tx_queue_release, 286 .dev_close = ena_close, 287 .dev_reset = ena_dev_reset, 288 .reta_update = ena_rss_reta_update, 289 .reta_query = ena_rss_reta_query, 290 .rx_queue_intr_enable = ena_rx_queue_intr_enable, 291 .rx_queue_intr_disable = ena_rx_queue_intr_disable, 292 .rss_hash_update = ena_rss_hash_update, 293 .rss_hash_conf_get = ena_rss_hash_conf_get, 294 .tx_done_cleanup = ena_tx_cleanup, 295 }; 296 297 /********************************************************************* 298 * Multi-Process communication bits 299 *********************************************************************/ 300 /* rte_mp IPC message name */ 301 #define ENA_MP_NAME "net_ena_mp" 302 /* Request timeout in seconds */ 303 #define ENA_MP_REQ_TMO 5 304 305 /** Proxy request type */ 306 enum ena_mp_req { 307 ENA_MP_DEV_STATS_GET, 308 ENA_MP_ENI_STATS_GET, 309 ENA_MP_MTU_SET, 310 ENA_MP_IND_TBL_GET, 311 ENA_MP_IND_TBL_SET 312 }; 313 314 /** Proxy message body. Shared between requests and responses. */ 315 struct ena_mp_body { 316 /* Message type */ 317 enum ena_mp_req type; 318 int port_id; 319 /* Processing result. Set in replies. 0 if message succeeded, negative 320 * error code otherwise. 321 */ 322 int result; 323 union { 324 int mtu; /* For ENA_MP_MTU_SET */ 325 } args; 326 }; 327 328 /** 329 * Initialize IPC message. 330 * 331 * @param[out] msg 332 * Pointer to the message to initialize. 333 * @param[in] type 334 * Message type. 335 * @param[in] port_id 336 * Port ID of target device. 337 * 338 */ 339 static void 340 mp_msg_init(struct rte_mp_msg *msg, enum ena_mp_req type, int port_id) 341 { 342 struct ena_mp_body *body = (struct ena_mp_body *)&msg->param; 343 344 memset(msg, 0, sizeof(*msg)); 345 strlcpy(msg->name, ENA_MP_NAME, sizeof(msg->name)); 346 msg->len_param = sizeof(*body); 347 body->type = type; 348 body->port_id = port_id; 349 } 350 351 /********************************************************************* 352 * Multi-Process communication PMD API 353 *********************************************************************/ 354 /** 355 * Define proxy request descriptor 356 * 357 * Used to define all structures and functions required for proxying a given 358 * function to the primary process including the code to perform to prepare the 359 * request and process the response. 360 * 361 * @param[in] f 362 * Name of the function to proxy 363 * @param[in] t 364 * Message type to use 365 * @param[in] prep 366 * Body of a function to prepare the request in form of a statement 367 * expression. It is passed all the original function arguments along with two 368 * extra ones: 369 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 370 * - struct ena_mp_body *req - body of a request to prepare. 371 * @param[in] proc 372 * Body of a function to process the response in form of a statement 373 * expression. It is passed all the original function arguments along with two 374 * extra ones: 375 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 376 * - struct ena_mp_body *rsp - body of a response to process. 377 * @param ... 378 * Proxied function's arguments 379 * 380 * @note Inside prep and proc any parameters which aren't used should be marked 381 * as such (with ENA_TOUCH or __rte_unused). 382 */ 383 #define ENA_PROXY_DESC(f, t, prep, proc, ...) \ 384 static const enum ena_mp_req mp_type_ ## f = t; \ 385 static const char *mp_name_ ## f = #t; \ 386 static void mp_prep_ ## f(struct ena_adapter *adapter, \ 387 struct ena_mp_body *req, \ 388 __VA_ARGS__) \ 389 { \ 390 prep; \ 391 } \ 392 static void mp_proc_ ## f(struct ena_adapter *adapter, \ 393 struct ena_mp_body *rsp, \ 394 __VA_ARGS__) \ 395 { \ 396 proc; \ 397 } 398 399 /** 400 * Proxy wrapper for calling primary functions in a secondary process. 401 * 402 * Depending on whether called in primary or secondary process, calls the 403 * @p func directly or proxies the call to the primary process via rte_mp IPC. 404 * This macro requires a proxy request descriptor to be defined for @p func 405 * using ENA_PROXY_DESC() macro. 406 * 407 * @param[in/out] a 408 * Device PMD data. Used for sending the message and sharing message results 409 * between primary and secondary. 410 * @param[in] f 411 * Function to proxy. 412 * @param ... 413 * Arguments of @p func. 414 * 415 * @return 416 * - 0: Processing succeeded and response handler was called. 417 * - -EPERM: IPC is unavailable on this platform. This means only primary 418 * process may call the proxied function. 419 * - -EIO: IPC returned error on request send. Inspect rte_errno detailed 420 * error code. 421 * - Negative error code from the proxied function. 422 * 423 * @note This mechanism is geared towards control-path tasks. Avoid calling it 424 * in fast-path unless unbound delays are allowed. This is due to the IPC 425 * mechanism itself (socket based). 426 * @note Due to IPC parameter size limitations the proxy logic shares call 427 * results through the struct ena_adapter shared memory. This makes the 428 * proxy mechanism strictly single-threaded. Therefore be sure to make all 429 * calls to the same proxied function under the same lock. 430 */ 431 #define ENA_PROXY(a, f, ...) \ 432 ({ \ 433 struct ena_adapter *_a = (a); \ 434 struct timespec ts = { .tv_sec = ENA_MP_REQ_TMO }; \ 435 struct ena_mp_body *req, *rsp; \ 436 struct rte_mp_reply mp_rep; \ 437 struct rte_mp_msg mp_req; \ 438 int ret; \ 439 \ 440 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { \ 441 ret = f(__VA_ARGS__); \ 442 } else { \ 443 /* Prepare and send request */ \ 444 req = (struct ena_mp_body *)&mp_req.param; \ 445 mp_msg_init(&mp_req, mp_type_ ## f, _a->edev_data->port_id); \ 446 mp_prep_ ## f(_a, req, ## __VA_ARGS__); \ 447 \ 448 ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); \ 449 if (likely(!ret)) { \ 450 RTE_ASSERT(mp_rep.nb_received == 1); \ 451 rsp = (struct ena_mp_body *)&mp_rep.msgs[0].param; \ 452 ret = rsp->result; \ 453 if (ret == 0) { \ 454 mp_proc_##f(_a, rsp, ## __VA_ARGS__); \ 455 } else { \ 456 PMD_DRV_LOG(ERR, \ 457 "%s returned error: %d\n", \ 458 mp_name_ ## f, rsp->result);\ 459 } \ 460 free(mp_rep.msgs); \ 461 } else if (rte_errno == ENOTSUP) { \ 462 PMD_DRV_LOG(ERR, \ 463 "No IPC, can't proxy to primary\n");\ 464 ret = -rte_errno; \ 465 } else { \ 466 PMD_DRV_LOG(ERR, "Request %s failed: %s\n", \ 467 mp_name_ ## f, \ 468 rte_strerror(rte_errno)); \ 469 ret = -EIO; \ 470 } \ 471 } \ 472 ret; \ 473 }) 474 475 /********************************************************************* 476 * Multi-Process communication request descriptors 477 *********************************************************************/ 478 479 ENA_PROXY_DESC(ena_com_get_dev_basic_stats, ENA_MP_DEV_STATS_GET, 480 ({ 481 ENA_TOUCH(adapter); 482 ENA_TOUCH(req); 483 ENA_TOUCH(ena_dev); 484 ENA_TOUCH(stats); 485 }), 486 ({ 487 ENA_TOUCH(rsp); 488 ENA_TOUCH(ena_dev); 489 if (stats != &adapter->basic_stats) 490 rte_memcpy(stats, &adapter->basic_stats, sizeof(*stats)); 491 }), 492 struct ena_com_dev *ena_dev, struct ena_admin_basic_stats *stats); 493 494 ENA_PROXY_DESC(ena_com_get_eni_stats, ENA_MP_ENI_STATS_GET, 495 ({ 496 ENA_TOUCH(adapter); 497 ENA_TOUCH(req); 498 ENA_TOUCH(ena_dev); 499 ENA_TOUCH(stats); 500 }), 501 ({ 502 ENA_TOUCH(rsp); 503 ENA_TOUCH(ena_dev); 504 if (stats != (struct ena_admin_eni_stats *)&adapter->eni_stats) 505 rte_memcpy(stats, &adapter->eni_stats, sizeof(*stats)); 506 }), 507 struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats); 508 509 ENA_PROXY_DESC(ena_com_set_dev_mtu, ENA_MP_MTU_SET, 510 ({ 511 ENA_TOUCH(adapter); 512 ENA_TOUCH(ena_dev); 513 req->args.mtu = mtu; 514 }), 515 ({ 516 ENA_TOUCH(adapter); 517 ENA_TOUCH(rsp); 518 ENA_TOUCH(ena_dev); 519 ENA_TOUCH(mtu); 520 }), 521 struct ena_com_dev *ena_dev, int mtu); 522 523 ENA_PROXY_DESC(ena_com_indirect_table_set, ENA_MP_IND_TBL_SET, 524 ({ 525 ENA_TOUCH(adapter); 526 ENA_TOUCH(req); 527 ENA_TOUCH(ena_dev); 528 }), 529 ({ 530 ENA_TOUCH(adapter); 531 ENA_TOUCH(rsp); 532 ENA_TOUCH(ena_dev); 533 }), 534 struct ena_com_dev *ena_dev); 535 536 ENA_PROXY_DESC(ena_com_indirect_table_get, ENA_MP_IND_TBL_GET, 537 ({ 538 ENA_TOUCH(adapter); 539 ENA_TOUCH(req); 540 ENA_TOUCH(ena_dev); 541 ENA_TOUCH(ind_tbl); 542 }), 543 ({ 544 ENA_TOUCH(rsp); 545 ENA_TOUCH(ena_dev); 546 if (ind_tbl != adapter->indirect_table) 547 rte_memcpy(ind_tbl, adapter->indirect_table, 548 sizeof(adapter->indirect_table)); 549 }), 550 struct ena_com_dev *ena_dev, u32 *ind_tbl); 551 552 static inline void ena_trigger_reset(struct ena_adapter *adapter, 553 enum ena_regs_reset_reason_types reason) 554 { 555 if (likely(!adapter->trigger_reset)) { 556 adapter->reset_reason = reason; 557 adapter->trigger_reset = true; 558 } 559 } 560 561 static inline void ena_rx_mbuf_prepare(struct ena_ring *rx_ring, 562 struct rte_mbuf *mbuf, 563 struct ena_com_rx_ctx *ena_rx_ctx, 564 bool fill_hash) 565 { 566 struct ena_stats_rx *rx_stats = &rx_ring->rx_stats; 567 uint64_t ol_flags = 0; 568 uint32_t packet_type = 0; 569 570 if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) 571 packet_type |= RTE_PTYPE_L4_TCP; 572 else if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP) 573 packet_type |= RTE_PTYPE_L4_UDP; 574 575 if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) { 576 packet_type |= RTE_PTYPE_L3_IPV4; 577 if (unlikely(ena_rx_ctx->l3_csum_err)) { 578 ++rx_stats->l3_csum_bad; 579 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 580 } else { 581 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 582 } 583 } else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6) { 584 packet_type |= RTE_PTYPE_L3_IPV6; 585 } 586 587 if (!ena_rx_ctx->l4_csum_checked || ena_rx_ctx->frag) { 588 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 589 } else { 590 if (unlikely(ena_rx_ctx->l4_csum_err)) { 591 ++rx_stats->l4_csum_bad; 592 /* 593 * For the L4 Rx checksum offload the HW may indicate 594 * bad checksum although it's valid. Because of that, 595 * we're setting the UNKNOWN flag to let the app 596 * re-verify the checksum. 597 */ 598 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 599 } else { 600 ++rx_stats->l4_csum_good; 601 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 602 } 603 } 604 605 if (fill_hash && 606 likely((packet_type & ENA_PTYPE_HAS_HASH) && !ena_rx_ctx->frag)) { 607 ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 608 mbuf->hash.rss = ena_rx_ctx->hash; 609 } 610 611 mbuf->ol_flags = ol_flags; 612 mbuf->packet_type = packet_type; 613 } 614 615 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, 616 struct ena_com_tx_ctx *ena_tx_ctx, 617 uint64_t queue_offloads, 618 bool disable_meta_caching) 619 { 620 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; 621 622 if ((mbuf->ol_flags & MBUF_OFFLOADS) && 623 (queue_offloads & QUEUE_OFFLOADS)) { 624 /* check if TSO is required */ 625 if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) && 626 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)) { 627 ena_tx_ctx->tso_enable = true; 628 629 ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf); 630 } 631 632 /* check if L3 checksum is needed */ 633 if ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) && 634 (queue_offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) 635 ena_tx_ctx->l3_csum_enable = true; 636 637 if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV6) { 638 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 639 /* For the IPv6 packets, DF always needs to be true. */ 640 ena_tx_ctx->df = 1; 641 } else { 642 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 643 644 /* set don't fragment (DF) flag */ 645 if (mbuf->packet_type & 646 (RTE_PTYPE_L4_NONFRAG 647 | RTE_PTYPE_INNER_L4_NONFRAG)) 648 ena_tx_ctx->df = 1; 649 } 650 651 /* check if L4 checksum is needed */ 652 if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) && 653 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) { 654 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 655 ena_tx_ctx->l4_csum_enable = true; 656 } else if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 657 RTE_MBUF_F_TX_UDP_CKSUM) && 658 (queue_offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) { 659 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 660 ena_tx_ctx->l4_csum_enable = true; 661 } else { 662 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 663 ena_tx_ctx->l4_csum_enable = false; 664 } 665 666 ena_meta->mss = mbuf->tso_segsz; 667 ena_meta->l3_hdr_len = mbuf->l3_len; 668 ena_meta->l3_hdr_offset = mbuf->l2_len; 669 670 ena_tx_ctx->meta_valid = true; 671 } else if (disable_meta_caching) { 672 memset(ena_meta, 0, sizeof(*ena_meta)); 673 ena_tx_ctx->meta_valid = true; 674 } else { 675 ena_tx_ctx->meta_valid = false; 676 } 677 } 678 679 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 680 { 681 struct ena_tx_buffer *tx_info = NULL; 682 683 if (likely(req_id < tx_ring->ring_size)) { 684 tx_info = &tx_ring->tx_buffer_info[req_id]; 685 if (likely(tx_info->mbuf)) 686 return 0; 687 } 688 689 if (tx_info) 690 PMD_TX_LOG(ERR, "tx_info doesn't have valid mbuf. queue %d:%d req_id %u\n", 691 tx_ring->port_id, tx_ring->id, req_id); 692 else 693 PMD_TX_LOG(ERR, "Invalid req_id: %hu in queue %d:%d\n", 694 req_id, tx_ring->port_id, tx_ring->id); 695 696 /* Trigger device reset */ 697 ++tx_ring->tx_stats.bad_req_id; 698 ena_trigger_reset(tx_ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID); 699 return -EFAULT; 700 } 701 702 static void ena_config_host_info(struct ena_com_dev *ena_dev) 703 { 704 struct ena_admin_host_info *host_info; 705 int rc; 706 707 /* Allocate only the host info */ 708 rc = ena_com_allocate_host_info(ena_dev); 709 if (rc) { 710 PMD_DRV_LOG(ERR, "Cannot allocate host info\n"); 711 return; 712 } 713 714 host_info = ena_dev->host_attr.host_info; 715 716 host_info->os_type = ENA_ADMIN_OS_DPDK; 717 host_info->kernel_ver = RTE_VERSION; 718 strlcpy((char *)host_info->kernel_ver_str, rte_version(), 719 sizeof(host_info->kernel_ver_str)); 720 host_info->os_dist = RTE_VERSION; 721 strlcpy((char *)host_info->os_dist_str, rte_version(), 722 sizeof(host_info->os_dist_str)); 723 host_info->driver_version = 724 (DRV_MODULE_VER_MAJOR) | 725 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 726 (DRV_MODULE_VER_SUBMINOR << 727 ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 728 host_info->num_cpus = rte_lcore_count(); 729 730 host_info->driver_supported_features = 731 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 732 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 733 734 rc = ena_com_set_host_attributes(ena_dev); 735 if (rc) { 736 if (rc == -ENA_COM_UNSUPPORTED) 737 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 738 else 739 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 740 741 goto err; 742 } 743 744 return; 745 746 err: 747 ena_com_delete_host_info(ena_dev); 748 } 749 750 /* This function calculates the number of xstats based on the current config */ 751 static unsigned int ena_xstats_calc_num(struct rte_eth_dev_data *data) 752 { 753 return ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENI + 754 (data->nb_tx_queues * ENA_STATS_ARRAY_TX) + 755 (data->nb_rx_queues * ENA_STATS_ARRAY_RX); 756 } 757 758 static void ena_config_debug_area(struct ena_adapter *adapter) 759 { 760 u32 debug_area_size; 761 int rc, ss_count; 762 763 ss_count = ena_xstats_calc_num(adapter->edev_data); 764 765 /* allocate 32 bytes for each string and 64bit for the value */ 766 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; 767 768 rc = ena_com_allocate_debug_area(&adapter->ena_dev, debug_area_size); 769 if (rc) { 770 PMD_DRV_LOG(ERR, "Cannot allocate debug area\n"); 771 return; 772 } 773 774 rc = ena_com_set_host_attributes(&adapter->ena_dev); 775 if (rc) { 776 if (rc == -ENA_COM_UNSUPPORTED) 777 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 778 else 779 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 780 781 goto err; 782 } 783 784 return; 785 err: 786 ena_com_delete_debug_area(&adapter->ena_dev); 787 } 788 789 static int ena_close(struct rte_eth_dev *dev) 790 { 791 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 792 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 793 struct ena_adapter *adapter = dev->data->dev_private; 794 int ret = 0; 795 796 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 797 return 0; 798 799 if (adapter->state == ENA_ADAPTER_STATE_RUNNING) 800 ret = ena_stop(dev); 801 adapter->state = ENA_ADAPTER_STATE_CLOSED; 802 803 ena_rx_queue_release_all(dev); 804 ena_tx_queue_release_all(dev); 805 806 rte_free(adapter->drv_stats); 807 adapter->drv_stats = NULL; 808 809 rte_intr_disable(intr_handle); 810 rte_intr_callback_unregister(intr_handle, 811 ena_interrupt_handler_rte, 812 dev); 813 814 /* 815 * MAC is not allocated dynamically. Setting NULL should prevent from 816 * release of the resource in the rte_eth_dev_release_port(). 817 */ 818 dev->data->mac_addrs = NULL; 819 820 return ret; 821 } 822 823 static int 824 ena_dev_reset(struct rte_eth_dev *dev) 825 { 826 int rc = 0; 827 828 /* Cannot release memory in secondary process */ 829 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 830 PMD_DRV_LOG(WARNING, "dev_reset not supported in secondary.\n"); 831 return -EPERM; 832 } 833 834 ena_destroy_device(dev); 835 rc = eth_ena_dev_init(dev); 836 if (rc) 837 PMD_INIT_LOG(CRIT, "Cannot initialize device\n"); 838 839 return rc; 840 } 841 842 static void ena_rx_queue_release_all(struct rte_eth_dev *dev) 843 { 844 int nb_queues = dev->data->nb_rx_queues; 845 int i; 846 847 for (i = 0; i < nb_queues; i++) 848 ena_rx_queue_release(dev, i); 849 } 850 851 static void ena_tx_queue_release_all(struct rte_eth_dev *dev) 852 { 853 int nb_queues = dev->data->nb_tx_queues; 854 int i; 855 856 for (i = 0; i < nb_queues; i++) 857 ena_tx_queue_release(dev, i); 858 } 859 860 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 861 { 862 struct ena_ring *ring = dev->data->rx_queues[qid]; 863 864 /* Free ring resources */ 865 rte_free(ring->rx_buffer_info); 866 ring->rx_buffer_info = NULL; 867 868 rte_free(ring->rx_refill_buffer); 869 ring->rx_refill_buffer = NULL; 870 871 rte_free(ring->empty_rx_reqs); 872 ring->empty_rx_reqs = NULL; 873 874 ring->configured = 0; 875 876 PMD_DRV_LOG(NOTICE, "Rx queue %d:%d released\n", 877 ring->port_id, ring->id); 878 } 879 880 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 881 { 882 struct ena_ring *ring = dev->data->tx_queues[qid]; 883 884 /* Free ring resources */ 885 rte_free(ring->push_buf_intermediate_buf); 886 887 rte_free(ring->tx_buffer_info); 888 889 rte_free(ring->empty_tx_reqs); 890 891 ring->empty_tx_reqs = NULL; 892 ring->tx_buffer_info = NULL; 893 ring->push_buf_intermediate_buf = NULL; 894 895 ring->configured = 0; 896 897 PMD_DRV_LOG(NOTICE, "Tx queue %d:%d released\n", 898 ring->port_id, ring->id); 899 } 900 901 static void ena_rx_queue_release_bufs(struct ena_ring *ring) 902 { 903 unsigned int i; 904 905 for (i = 0; i < ring->ring_size; ++i) { 906 struct ena_rx_buffer *rx_info = &ring->rx_buffer_info[i]; 907 if (rx_info->mbuf) { 908 rte_mbuf_raw_free(rx_info->mbuf); 909 rx_info->mbuf = NULL; 910 } 911 } 912 } 913 914 static void ena_tx_queue_release_bufs(struct ena_ring *ring) 915 { 916 unsigned int i; 917 918 for (i = 0; i < ring->ring_size; ++i) { 919 struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i]; 920 921 if (tx_buf->mbuf) { 922 rte_pktmbuf_free(tx_buf->mbuf); 923 tx_buf->mbuf = NULL; 924 } 925 } 926 } 927 928 static int ena_link_update(struct rte_eth_dev *dev, 929 __rte_unused int wait_to_complete) 930 { 931 struct rte_eth_link *link = &dev->data->dev_link; 932 struct ena_adapter *adapter = dev->data->dev_private; 933 934 link->link_status = adapter->link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN; 935 link->link_speed = RTE_ETH_SPEED_NUM_NONE; 936 link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX; 937 938 return 0; 939 } 940 941 static int ena_queue_start_all(struct rte_eth_dev *dev, 942 enum ena_ring_type ring_type) 943 { 944 struct ena_adapter *adapter = dev->data->dev_private; 945 struct ena_ring *queues = NULL; 946 int nb_queues; 947 int i = 0; 948 int rc = 0; 949 950 if (ring_type == ENA_RING_TYPE_RX) { 951 queues = adapter->rx_ring; 952 nb_queues = dev->data->nb_rx_queues; 953 } else { 954 queues = adapter->tx_ring; 955 nb_queues = dev->data->nb_tx_queues; 956 } 957 for (i = 0; i < nb_queues; i++) { 958 if (queues[i].configured) { 959 if (ring_type == ENA_RING_TYPE_RX) { 960 ena_assert_msg( 961 dev->data->rx_queues[i] == &queues[i], 962 "Inconsistent state of Rx queues\n"); 963 } else { 964 ena_assert_msg( 965 dev->data->tx_queues[i] == &queues[i], 966 "Inconsistent state of Tx queues\n"); 967 } 968 969 rc = ena_queue_start(dev, &queues[i]); 970 971 if (rc) { 972 PMD_INIT_LOG(ERR, 973 "Failed to start queue[%d] of type(%d)\n", 974 i, ring_type); 975 goto err; 976 } 977 } 978 } 979 980 return 0; 981 982 err: 983 while (i--) 984 if (queues[i].configured) 985 ena_queue_stop(&queues[i]); 986 987 return rc; 988 } 989 990 static int ena_check_valid_conf(struct ena_adapter *adapter) 991 { 992 uint32_t mtu = adapter->edev_data->mtu; 993 994 if (mtu > adapter->max_mtu || mtu < ENA_MIN_MTU) { 995 PMD_INIT_LOG(ERR, 996 "Unsupported MTU of %d. Max MTU: %d, min MTU: %d\n", 997 mtu, adapter->max_mtu, ENA_MIN_MTU); 998 return ENA_COM_UNSUPPORTED; 999 } 1000 1001 return 0; 1002 } 1003 1004 static int 1005 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx, 1006 bool use_large_llq_hdr) 1007 { 1008 struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; 1009 struct ena_com_dev *ena_dev = ctx->ena_dev; 1010 uint32_t max_tx_queue_size; 1011 uint32_t max_rx_queue_size; 1012 1013 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1014 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1015 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 1016 max_rx_queue_size = RTE_MIN(max_queue_ext->max_rx_cq_depth, 1017 max_queue_ext->max_rx_sq_depth); 1018 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 1019 1020 if (ena_dev->tx_mem_queue_type == 1021 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1022 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1023 llq->max_llq_depth); 1024 } else { 1025 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1026 max_queue_ext->max_tx_sq_depth); 1027 } 1028 1029 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1030 max_queue_ext->max_per_packet_rx_descs); 1031 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1032 max_queue_ext->max_per_packet_tx_descs); 1033 } else { 1034 struct ena_admin_queue_feature_desc *max_queues = 1035 &ctx->get_feat_ctx->max_queues; 1036 max_rx_queue_size = RTE_MIN(max_queues->max_cq_depth, 1037 max_queues->max_sq_depth); 1038 max_tx_queue_size = max_queues->max_cq_depth; 1039 1040 if (ena_dev->tx_mem_queue_type == 1041 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1042 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1043 llq->max_llq_depth); 1044 } else { 1045 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1046 max_queues->max_sq_depth); 1047 } 1048 1049 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1050 max_queues->max_packet_rx_descs); 1051 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1052 max_queues->max_packet_tx_descs); 1053 } 1054 1055 /* Round down to the nearest power of 2 */ 1056 max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size); 1057 max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size); 1058 1059 if (use_large_llq_hdr) { 1060 if ((llq->entry_size_ctrl_supported & 1061 ENA_ADMIN_LIST_ENTRY_SIZE_256B) && 1062 (ena_dev->tx_mem_queue_type == 1063 ENA_ADMIN_PLACEMENT_POLICY_DEV)) { 1064 max_tx_queue_size /= 2; 1065 PMD_INIT_LOG(INFO, 1066 "Forcing large headers and decreasing maximum Tx queue size to %d\n", 1067 max_tx_queue_size); 1068 } else { 1069 PMD_INIT_LOG(ERR, 1070 "Forcing large headers failed: LLQ is disabled or device does not support large headers\n"); 1071 } 1072 } 1073 1074 if (unlikely(max_rx_queue_size == 0 || max_tx_queue_size == 0)) { 1075 PMD_INIT_LOG(ERR, "Invalid queue size\n"); 1076 return -EFAULT; 1077 } 1078 1079 ctx->max_tx_queue_size = max_tx_queue_size; 1080 ctx->max_rx_queue_size = max_rx_queue_size; 1081 1082 return 0; 1083 } 1084 1085 static void ena_stats_restart(struct rte_eth_dev *dev) 1086 { 1087 struct ena_adapter *adapter = dev->data->dev_private; 1088 1089 rte_atomic64_init(&adapter->drv_stats->ierrors); 1090 rte_atomic64_init(&adapter->drv_stats->oerrors); 1091 rte_atomic64_init(&adapter->drv_stats->rx_nombuf); 1092 adapter->drv_stats->rx_drops = 0; 1093 } 1094 1095 static int ena_stats_get(struct rte_eth_dev *dev, 1096 struct rte_eth_stats *stats) 1097 { 1098 struct ena_admin_basic_stats ena_stats; 1099 struct ena_adapter *adapter = dev->data->dev_private; 1100 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1101 int rc; 1102 int i; 1103 int max_rings_stats; 1104 1105 memset(&ena_stats, 0, sizeof(ena_stats)); 1106 1107 rte_spinlock_lock(&adapter->admin_lock); 1108 rc = ENA_PROXY(adapter, ena_com_get_dev_basic_stats, ena_dev, 1109 &ena_stats); 1110 rte_spinlock_unlock(&adapter->admin_lock); 1111 if (unlikely(rc)) { 1112 PMD_DRV_LOG(ERR, "Could not retrieve statistics from ENA\n"); 1113 return rc; 1114 } 1115 1116 /* Set of basic statistics from ENA */ 1117 stats->ipackets = __MERGE_64B_H_L(ena_stats.rx_pkts_high, 1118 ena_stats.rx_pkts_low); 1119 stats->opackets = __MERGE_64B_H_L(ena_stats.tx_pkts_high, 1120 ena_stats.tx_pkts_low); 1121 stats->ibytes = __MERGE_64B_H_L(ena_stats.rx_bytes_high, 1122 ena_stats.rx_bytes_low); 1123 stats->obytes = __MERGE_64B_H_L(ena_stats.tx_bytes_high, 1124 ena_stats.tx_bytes_low); 1125 1126 /* Driver related stats */ 1127 stats->imissed = adapter->drv_stats->rx_drops; 1128 stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors); 1129 stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors); 1130 stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf); 1131 1132 max_rings_stats = RTE_MIN(dev->data->nb_rx_queues, 1133 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1134 for (i = 0; i < max_rings_stats; ++i) { 1135 struct ena_stats_rx *rx_stats = &adapter->rx_ring[i].rx_stats; 1136 1137 stats->q_ibytes[i] = rx_stats->bytes; 1138 stats->q_ipackets[i] = rx_stats->cnt; 1139 stats->q_errors[i] = rx_stats->bad_desc_num + 1140 rx_stats->bad_req_id; 1141 } 1142 1143 max_rings_stats = RTE_MIN(dev->data->nb_tx_queues, 1144 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1145 for (i = 0; i < max_rings_stats; ++i) { 1146 struct ena_stats_tx *tx_stats = &adapter->tx_ring[i].tx_stats; 1147 1148 stats->q_obytes[i] = tx_stats->bytes; 1149 stats->q_opackets[i] = tx_stats->cnt; 1150 } 1151 1152 return 0; 1153 } 1154 1155 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 1156 { 1157 struct ena_adapter *adapter; 1158 struct ena_com_dev *ena_dev; 1159 int rc = 0; 1160 1161 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 1162 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 1163 adapter = dev->data->dev_private; 1164 1165 ena_dev = &adapter->ena_dev; 1166 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 1167 1168 if (mtu > adapter->max_mtu || mtu < ENA_MIN_MTU) { 1169 PMD_DRV_LOG(ERR, 1170 "Invalid MTU setting. New MTU: %d, max MTU: %d, min MTU: %d\n", 1171 mtu, adapter->max_mtu, ENA_MIN_MTU); 1172 return -EINVAL; 1173 } 1174 1175 rc = ENA_PROXY(adapter, ena_com_set_dev_mtu, ena_dev, mtu); 1176 if (rc) 1177 PMD_DRV_LOG(ERR, "Could not set MTU: %d\n", mtu); 1178 else 1179 PMD_DRV_LOG(NOTICE, "MTU set to: %d\n", mtu); 1180 1181 return rc; 1182 } 1183 1184 static int ena_start(struct rte_eth_dev *dev) 1185 { 1186 struct ena_adapter *adapter = dev->data->dev_private; 1187 uint64_t ticks; 1188 int rc = 0; 1189 1190 /* Cannot allocate memory in secondary process */ 1191 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1192 PMD_DRV_LOG(WARNING, "dev_start not supported in secondary.\n"); 1193 return -EPERM; 1194 } 1195 1196 rc = ena_check_valid_conf(adapter); 1197 if (rc) 1198 return rc; 1199 1200 rc = ena_setup_rx_intr(dev); 1201 if (rc) 1202 return rc; 1203 1204 rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX); 1205 if (rc) 1206 return rc; 1207 1208 rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX); 1209 if (rc) 1210 goto err_start_tx; 1211 1212 if (adapter->edev_data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { 1213 rc = ena_rss_configure(adapter); 1214 if (rc) 1215 goto err_rss_init; 1216 } 1217 1218 ena_stats_restart(dev); 1219 1220 adapter->timestamp_wd = rte_get_timer_cycles(); 1221 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT; 1222 1223 ticks = rte_get_timer_hz(); 1224 rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(), 1225 ena_timer_wd_callback, dev); 1226 1227 ++adapter->dev_stats.dev_start; 1228 adapter->state = ENA_ADAPTER_STATE_RUNNING; 1229 1230 return 0; 1231 1232 err_rss_init: 1233 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1234 err_start_tx: 1235 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1236 return rc; 1237 } 1238 1239 static int ena_stop(struct rte_eth_dev *dev) 1240 { 1241 struct ena_adapter *adapter = dev->data->dev_private; 1242 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1243 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1244 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1245 int rc; 1246 1247 /* Cannot free memory in secondary process */ 1248 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1249 PMD_DRV_LOG(WARNING, "dev_stop not supported in secondary.\n"); 1250 return -EPERM; 1251 } 1252 1253 rte_timer_stop_sync(&adapter->timer_wd); 1254 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1255 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1256 1257 if (adapter->trigger_reset) { 1258 rc = ena_com_dev_reset(ena_dev, adapter->reset_reason); 1259 if (rc) 1260 PMD_DRV_LOG(ERR, "Device reset failed, rc: %d\n", rc); 1261 } 1262 1263 rte_intr_disable(intr_handle); 1264 1265 rte_intr_efd_disable(intr_handle); 1266 1267 /* Cleanup vector list */ 1268 rte_intr_vec_list_free(intr_handle); 1269 1270 rte_intr_enable(intr_handle); 1271 1272 ++adapter->dev_stats.dev_stop; 1273 adapter->state = ENA_ADAPTER_STATE_STOPPED; 1274 dev->data->dev_started = 0; 1275 1276 return 0; 1277 } 1278 1279 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring) 1280 { 1281 struct ena_adapter *adapter = ring->adapter; 1282 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1283 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1284 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1285 struct ena_com_create_io_ctx ctx = 1286 /* policy set to _HOST just to satisfy icc compiler */ 1287 { ENA_ADMIN_PLACEMENT_POLICY_HOST, 1288 0, 0, 0, 0, 0 }; 1289 uint16_t ena_qid; 1290 unsigned int i; 1291 int rc; 1292 1293 ctx.msix_vector = -1; 1294 if (ring->type == ENA_RING_TYPE_TX) { 1295 ena_qid = ENA_IO_TXQ_IDX(ring->id); 1296 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1297 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1298 for (i = 0; i < ring->ring_size; i++) 1299 ring->empty_tx_reqs[i] = i; 1300 } else { 1301 ena_qid = ENA_IO_RXQ_IDX(ring->id); 1302 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1303 if (rte_intr_dp_is_en(intr_handle)) 1304 ctx.msix_vector = 1305 rte_intr_vec_list_index_get(intr_handle, 1306 ring->id); 1307 1308 for (i = 0; i < ring->ring_size; i++) 1309 ring->empty_rx_reqs[i] = i; 1310 } 1311 ctx.queue_size = ring->ring_size; 1312 ctx.qid = ena_qid; 1313 ctx.numa_node = ring->numa_socket_id; 1314 1315 rc = ena_com_create_io_queue(ena_dev, &ctx); 1316 if (rc) { 1317 PMD_DRV_LOG(ERR, 1318 "Failed to create IO queue[%d] (qid:%d), rc: %d\n", 1319 ring->id, ena_qid, rc); 1320 return rc; 1321 } 1322 1323 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1324 &ring->ena_com_io_sq, 1325 &ring->ena_com_io_cq); 1326 if (rc) { 1327 PMD_DRV_LOG(ERR, 1328 "Failed to get IO queue[%d] handlers, rc: %d\n", 1329 ring->id, rc); 1330 ena_com_destroy_io_queue(ena_dev, ena_qid); 1331 return rc; 1332 } 1333 1334 if (ring->type == ENA_RING_TYPE_TX) 1335 ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node); 1336 1337 /* Start with Rx interrupts being masked. */ 1338 if (ring->type == ENA_RING_TYPE_RX && rte_intr_dp_is_en(intr_handle)) 1339 ena_rx_queue_intr_disable(dev, ring->id); 1340 1341 return 0; 1342 } 1343 1344 static void ena_queue_stop(struct ena_ring *ring) 1345 { 1346 struct ena_com_dev *ena_dev = &ring->adapter->ena_dev; 1347 1348 if (ring->type == ENA_RING_TYPE_RX) { 1349 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id)); 1350 ena_rx_queue_release_bufs(ring); 1351 } else { 1352 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id)); 1353 ena_tx_queue_release_bufs(ring); 1354 } 1355 } 1356 1357 static void ena_queue_stop_all(struct rte_eth_dev *dev, 1358 enum ena_ring_type ring_type) 1359 { 1360 struct ena_adapter *adapter = dev->data->dev_private; 1361 struct ena_ring *queues = NULL; 1362 uint16_t nb_queues, i; 1363 1364 if (ring_type == ENA_RING_TYPE_RX) { 1365 queues = adapter->rx_ring; 1366 nb_queues = dev->data->nb_rx_queues; 1367 } else { 1368 queues = adapter->tx_ring; 1369 nb_queues = dev->data->nb_tx_queues; 1370 } 1371 1372 for (i = 0; i < nb_queues; ++i) 1373 if (queues[i].configured) 1374 ena_queue_stop(&queues[i]); 1375 } 1376 1377 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring) 1378 { 1379 int rc, bufs_num; 1380 1381 ena_assert_msg(ring->configured == 1, 1382 "Trying to start unconfigured queue\n"); 1383 1384 rc = ena_create_io_queue(dev, ring); 1385 if (rc) { 1386 PMD_INIT_LOG(ERR, "Failed to create IO queue\n"); 1387 return rc; 1388 } 1389 1390 ring->next_to_clean = 0; 1391 ring->next_to_use = 0; 1392 1393 if (ring->type == ENA_RING_TYPE_TX) { 1394 ring->tx_stats.available_desc = 1395 ena_com_free_q_entries(ring->ena_com_io_sq); 1396 return 0; 1397 } 1398 1399 bufs_num = ring->ring_size - 1; 1400 rc = ena_populate_rx_queue(ring, bufs_num); 1401 if (rc != bufs_num) { 1402 ena_com_destroy_io_queue(&ring->adapter->ena_dev, 1403 ENA_IO_RXQ_IDX(ring->id)); 1404 PMD_INIT_LOG(ERR, "Failed to populate Rx ring\n"); 1405 return ENA_COM_FAULT; 1406 } 1407 /* Flush per-core RX buffers pools cache as they can be used on other 1408 * cores as well. 1409 */ 1410 rte_mempool_cache_flush(NULL, ring->mb_pool); 1411 1412 return 0; 1413 } 1414 1415 static int ena_tx_queue_setup(struct rte_eth_dev *dev, 1416 uint16_t queue_idx, 1417 uint16_t nb_desc, 1418 unsigned int socket_id, 1419 const struct rte_eth_txconf *tx_conf) 1420 { 1421 struct ena_ring *txq = NULL; 1422 struct ena_adapter *adapter = dev->data->dev_private; 1423 unsigned int i; 1424 uint16_t dyn_thresh; 1425 1426 txq = &adapter->tx_ring[queue_idx]; 1427 1428 if (txq->configured) { 1429 PMD_DRV_LOG(CRIT, 1430 "API violation. Queue[%d] is already configured\n", 1431 queue_idx); 1432 return ENA_COM_FAULT; 1433 } 1434 1435 if (!rte_is_power_of_2(nb_desc)) { 1436 PMD_DRV_LOG(ERR, 1437 "Unsupported size of Tx queue: %d is not a power of 2.\n", 1438 nb_desc); 1439 return -EINVAL; 1440 } 1441 1442 if (nb_desc > adapter->max_tx_ring_size) { 1443 PMD_DRV_LOG(ERR, 1444 "Unsupported size of Tx queue (max size: %d)\n", 1445 adapter->max_tx_ring_size); 1446 return -EINVAL; 1447 } 1448 1449 txq->port_id = dev->data->port_id; 1450 txq->next_to_clean = 0; 1451 txq->next_to_use = 0; 1452 txq->ring_size = nb_desc; 1453 txq->size_mask = nb_desc - 1; 1454 txq->numa_socket_id = socket_id; 1455 txq->pkts_without_db = false; 1456 txq->last_cleanup_ticks = 0; 1457 1458 txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info", 1459 sizeof(struct ena_tx_buffer) * txq->ring_size, 1460 RTE_CACHE_LINE_SIZE, 1461 socket_id); 1462 if (!txq->tx_buffer_info) { 1463 PMD_DRV_LOG(ERR, 1464 "Failed to allocate memory for Tx buffer info\n"); 1465 return -ENOMEM; 1466 } 1467 1468 txq->empty_tx_reqs = rte_zmalloc_socket("txq->empty_tx_reqs", 1469 sizeof(uint16_t) * txq->ring_size, 1470 RTE_CACHE_LINE_SIZE, 1471 socket_id); 1472 if (!txq->empty_tx_reqs) { 1473 PMD_DRV_LOG(ERR, 1474 "Failed to allocate memory for empty Tx requests\n"); 1475 rte_free(txq->tx_buffer_info); 1476 return -ENOMEM; 1477 } 1478 1479 txq->push_buf_intermediate_buf = 1480 rte_zmalloc_socket("txq->push_buf_intermediate_buf", 1481 txq->tx_max_header_size, 1482 RTE_CACHE_LINE_SIZE, 1483 socket_id); 1484 if (!txq->push_buf_intermediate_buf) { 1485 PMD_DRV_LOG(ERR, "Failed to alloc push buffer for LLQ\n"); 1486 rte_free(txq->tx_buffer_info); 1487 rte_free(txq->empty_tx_reqs); 1488 return -ENOMEM; 1489 } 1490 1491 for (i = 0; i < txq->ring_size; i++) 1492 txq->empty_tx_reqs[i] = i; 1493 1494 txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads; 1495 1496 /* Check if caller provided the Tx cleanup threshold value. */ 1497 if (tx_conf->tx_free_thresh != 0) { 1498 txq->tx_free_thresh = tx_conf->tx_free_thresh; 1499 } else { 1500 dyn_thresh = txq->ring_size - 1501 txq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1502 txq->tx_free_thresh = RTE_MAX(dyn_thresh, 1503 txq->ring_size - ENA_REFILL_THRESH_PACKET); 1504 } 1505 1506 txq->missing_tx_completion_threshold = 1507 RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP); 1508 1509 /* Store pointer to this queue in upper layer */ 1510 txq->configured = 1; 1511 dev->data->tx_queues[queue_idx] = txq; 1512 1513 return 0; 1514 } 1515 1516 static int ena_rx_queue_setup(struct rte_eth_dev *dev, 1517 uint16_t queue_idx, 1518 uint16_t nb_desc, 1519 unsigned int socket_id, 1520 const struct rte_eth_rxconf *rx_conf, 1521 struct rte_mempool *mp) 1522 { 1523 struct ena_adapter *adapter = dev->data->dev_private; 1524 struct ena_ring *rxq = NULL; 1525 size_t buffer_size; 1526 int i; 1527 uint16_t dyn_thresh; 1528 1529 rxq = &adapter->rx_ring[queue_idx]; 1530 if (rxq->configured) { 1531 PMD_DRV_LOG(CRIT, 1532 "API violation. Queue[%d] is already configured\n", 1533 queue_idx); 1534 return ENA_COM_FAULT; 1535 } 1536 1537 if (!rte_is_power_of_2(nb_desc)) { 1538 PMD_DRV_LOG(ERR, 1539 "Unsupported size of Rx queue: %d is not a power of 2.\n", 1540 nb_desc); 1541 return -EINVAL; 1542 } 1543 1544 if (nb_desc > adapter->max_rx_ring_size) { 1545 PMD_DRV_LOG(ERR, 1546 "Unsupported size of Rx queue (max size: %d)\n", 1547 adapter->max_rx_ring_size); 1548 return -EINVAL; 1549 } 1550 1551 /* ENA isn't supporting buffers smaller than 1400 bytes */ 1552 buffer_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 1553 if (buffer_size < ENA_RX_BUF_MIN_SIZE) { 1554 PMD_DRV_LOG(ERR, 1555 "Unsupported size of Rx buffer: %zu (min size: %d)\n", 1556 buffer_size, ENA_RX_BUF_MIN_SIZE); 1557 return -EINVAL; 1558 } 1559 1560 rxq->port_id = dev->data->port_id; 1561 rxq->next_to_clean = 0; 1562 rxq->next_to_use = 0; 1563 rxq->ring_size = nb_desc; 1564 rxq->size_mask = nb_desc - 1; 1565 rxq->numa_socket_id = socket_id; 1566 rxq->mb_pool = mp; 1567 1568 rxq->rx_buffer_info = rte_zmalloc_socket("rxq->buffer_info", 1569 sizeof(struct ena_rx_buffer) * nb_desc, 1570 RTE_CACHE_LINE_SIZE, 1571 socket_id); 1572 if (!rxq->rx_buffer_info) { 1573 PMD_DRV_LOG(ERR, 1574 "Failed to allocate memory for Rx buffer info\n"); 1575 return -ENOMEM; 1576 } 1577 1578 rxq->rx_refill_buffer = rte_zmalloc_socket("rxq->rx_refill_buffer", 1579 sizeof(struct rte_mbuf *) * nb_desc, 1580 RTE_CACHE_LINE_SIZE, 1581 socket_id); 1582 if (!rxq->rx_refill_buffer) { 1583 PMD_DRV_LOG(ERR, 1584 "Failed to allocate memory for Rx refill buffer\n"); 1585 rte_free(rxq->rx_buffer_info); 1586 rxq->rx_buffer_info = NULL; 1587 return -ENOMEM; 1588 } 1589 1590 rxq->empty_rx_reqs = rte_zmalloc_socket("rxq->empty_rx_reqs", 1591 sizeof(uint16_t) * nb_desc, 1592 RTE_CACHE_LINE_SIZE, 1593 socket_id); 1594 if (!rxq->empty_rx_reqs) { 1595 PMD_DRV_LOG(ERR, 1596 "Failed to allocate memory for empty Rx requests\n"); 1597 rte_free(rxq->rx_buffer_info); 1598 rxq->rx_buffer_info = NULL; 1599 rte_free(rxq->rx_refill_buffer); 1600 rxq->rx_refill_buffer = NULL; 1601 return -ENOMEM; 1602 } 1603 1604 for (i = 0; i < nb_desc; i++) 1605 rxq->empty_rx_reqs[i] = i; 1606 1607 rxq->offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; 1608 1609 if (rx_conf->rx_free_thresh != 0) { 1610 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 1611 } else { 1612 dyn_thresh = rxq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1613 rxq->rx_free_thresh = RTE_MIN(dyn_thresh, 1614 (uint16_t)(ENA_REFILL_THRESH_PACKET)); 1615 } 1616 1617 /* Store pointer to this queue in upper layer */ 1618 rxq->configured = 1; 1619 dev->data->rx_queues[queue_idx] = rxq; 1620 1621 return 0; 1622 } 1623 1624 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 1625 struct rte_mbuf *mbuf, uint16_t id) 1626 { 1627 struct ena_com_buf ebuf; 1628 int rc; 1629 1630 /* prepare physical address for DMA transaction */ 1631 ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM; 1632 ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM; 1633 1634 /* pass resource to device */ 1635 rc = ena_com_add_single_rx_desc(io_sq, &ebuf, id); 1636 if (unlikely(rc != 0)) 1637 PMD_RX_LOG(WARNING, "Failed adding Rx desc\n"); 1638 1639 return rc; 1640 } 1641 1642 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) 1643 { 1644 unsigned int i; 1645 int rc; 1646 uint16_t next_to_use = rxq->next_to_use; 1647 uint16_t req_id; 1648 #ifdef RTE_ETHDEV_DEBUG_RX 1649 uint16_t in_use; 1650 #endif 1651 struct rte_mbuf **mbufs = rxq->rx_refill_buffer; 1652 1653 if (unlikely(!count)) 1654 return 0; 1655 1656 #ifdef RTE_ETHDEV_DEBUG_RX 1657 in_use = rxq->ring_size - 1 - 1658 ena_com_free_q_entries(rxq->ena_com_io_sq); 1659 if (unlikely((in_use + count) >= rxq->ring_size)) 1660 PMD_RX_LOG(ERR, "Bad Rx ring state\n"); 1661 #endif 1662 1663 /* get resources for incoming packets */ 1664 rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count); 1665 if (unlikely(rc < 0)) { 1666 rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf); 1667 ++rxq->rx_stats.mbuf_alloc_fail; 1668 PMD_RX_LOG(DEBUG, "There are not enough free buffers\n"); 1669 return 0; 1670 } 1671 1672 for (i = 0; i < count; i++) { 1673 struct rte_mbuf *mbuf = mbufs[i]; 1674 struct ena_rx_buffer *rx_info; 1675 1676 if (likely((i + 4) < count)) 1677 rte_prefetch0(mbufs[i + 4]); 1678 1679 req_id = rxq->empty_rx_reqs[next_to_use]; 1680 rx_info = &rxq->rx_buffer_info[req_id]; 1681 1682 rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id); 1683 if (unlikely(rc != 0)) 1684 break; 1685 1686 rx_info->mbuf = mbuf; 1687 next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, rxq->size_mask); 1688 } 1689 1690 if (unlikely(i < count)) { 1691 PMD_RX_LOG(WARNING, 1692 "Refilled Rx queue[%d] with only %d/%d buffers\n", 1693 rxq->id, i, count); 1694 rte_pktmbuf_free_bulk(&mbufs[i], count - i); 1695 ++rxq->rx_stats.refill_partial; 1696 } 1697 1698 /* When we submitted free resources to device... */ 1699 if (likely(i > 0)) { 1700 /* ...let HW know that it can fill buffers with data. */ 1701 ena_com_write_sq_doorbell(rxq->ena_com_io_sq); 1702 1703 rxq->next_to_use = next_to_use; 1704 } 1705 1706 return i; 1707 } 1708 1709 static int ena_device_init(struct ena_adapter *adapter, 1710 struct rte_pci_device *pdev, 1711 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1712 { 1713 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1714 uint32_t aenq_groups; 1715 int rc; 1716 bool readless_supported; 1717 1718 /* Initialize mmio registers */ 1719 rc = ena_com_mmio_reg_read_request_init(ena_dev); 1720 if (rc) { 1721 PMD_DRV_LOG(ERR, "Failed to init MMIO read less\n"); 1722 return rc; 1723 } 1724 1725 /* The PCIe configuration space revision id indicate if mmio reg 1726 * read is disabled. 1727 */ 1728 readless_supported = !(pdev->id.class_id & ENA_MMIO_DISABLE_REG_READ); 1729 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 1730 1731 /* reset device */ 1732 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 1733 if (rc) { 1734 PMD_DRV_LOG(ERR, "Cannot reset device\n"); 1735 goto err_mmio_read_less; 1736 } 1737 1738 /* check FW version */ 1739 rc = ena_com_validate_version(ena_dev); 1740 if (rc) { 1741 PMD_DRV_LOG(ERR, "Device version is too low\n"); 1742 goto err_mmio_read_less; 1743 } 1744 1745 ena_dev->dma_addr_bits = ena_com_get_dma_width(ena_dev); 1746 1747 /* ENA device administration layer init */ 1748 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 1749 if (rc) { 1750 PMD_DRV_LOG(ERR, 1751 "Cannot initialize ENA admin queue\n"); 1752 goto err_mmio_read_less; 1753 } 1754 1755 /* To enable the msix interrupts the driver needs to know the number 1756 * of queues. So the driver uses polling mode to retrieve this 1757 * information. 1758 */ 1759 ena_com_set_admin_polling_mode(ena_dev, true); 1760 1761 ena_config_host_info(ena_dev); 1762 1763 /* Get Device Attributes and features */ 1764 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 1765 if (rc) { 1766 PMD_DRV_LOG(ERR, 1767 "Cannot get attribute for ENA device, rc: %d\n", rc); 1768 goto err_admin_init; 1769 } 1770 1771 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 1772 BIT(ENA_ADMIN_NOTIFICATION) | 1773 BIT(ENA_ADMIN_KEEP_ALIVE) | 1774 BIT(ENA_ADMIN_FATAL_ERROR) | 1775 BIT(ENA_ADMIN_WARNING); 1776 1777 aenq_groups &= get_feat_ctx->aenq.supported_groups; 1778 1779 adapter->all_aenq_groups = aenq_groups; 1780 1781 return 0; 1782 1783 err_admin_init: 1784 ena_com_admin_destroy(ena_dev); 1785 1786 err_mmio_read_less: 1787 ena_com_mmio_reg_read_request_destroy(ena_dev); 1788 1789 return rc; 1790 } 1791 1792 static void ena_interrupt_handler_rte(void *cb_arg) 1793 { 1794 struct rte_eth_dev *dev = cb_arg; 1795 struct ena_adapter *adapter = dev->data->dev_private; 1796 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1797 1798 ena_com_admin_q_comp_intr_handler(ena_dev); 1799 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) 1800 ena_com_aenq_intr_handler(ena_dev, dev); 1801 } 1802 1803 static void check_for_missing_keep_alive(struct ena_adapter *adapter) 1804 { 1805 if (!(adapter->active_aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE))) 1806 return; 1807 1808 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 1809 return; 1810 1811 if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >= 1812 adapter->keep_alive_timeout)) { 1813 PMD_DRV_LOG(ERR, "Keep alive timeout\n"); 1814 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); 1815 ++adapter->dev_stats.wd_expired; 1816 } 1817 } 1818 1819 /* Check if admin queue is enabled */ 1820 static void check_for_admin_com_state(struct ena_adapter *adapter) 1821 { 1822 if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) { 1823 PMD_DRV_LOG(ERR, "ENA admin queue is not in running state\n"); 1824 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO); 1825 } 1826 } 1827 1828 static int check_for_tx_completion_in_queue(struct ena_adapter *adapter, 1829 struct ena_ring *tx_ring) 1830 { 1831 struct ena_tx_buffer *tx_buf; 1832 uint64_t timestamp; 1833 uint64_t completion_delay; 1834 uint32_t missed_tx = 0; 1835 unsigned int i; 1836 int rc = 0; 1837 1838 for (i = 0; i < tx_ring->ring_size; ++i) { 1839 tx_buf = &tx_ring->tx_buffer_info[i]; 1840 timestamp = tx_buf->timestamp; 1841 1842 if (timestamp == 0) 1843 continue; 1844 1845 completion_delay = rte_get_timer_cycles() - timestamp; 1846 if (completion_delay > adapter->missing_tx_completion_to) { 1847 if (unlikely(!tx_buf->print_once)) { 1848 PMD_TX_LOG(WARNING, 1849 "Found a Tx that wasn't completed on time, qid %d, index %d. " 1850 "Missing Tx outstanding for %" PRIu64 " msecs.\n", 1851 tx_ring->id, i, completion_delay / 1852 rte_get_timer_hz() * 1000); 1853 tx_buf->print_once = true; 1854 } 1855 ++missed_tx; 1856 } 1857 } 1858 1859 if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) { 1860 PMD_DRV_LOG(ERR, 1861 "The number of lost Tx completions is above the threshold (%d > %d). " 1862 "Trigger the device reset.\n", 1863 missed_tx, 1864 tx_ring->missing_tx_completion_threshold); 1865 adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL; 1866 adapter->trigger_reset = true; 1867 rc = -EIO; 1868 } 1869 1870 tx_ring->tx_stats.missed_tx += missed_tx; 1871 1872 return rc; 1873 } 1874 1875 static void check_for_tx_completions(struct ena_adapter *adapter) 1876 { 1877 struct ena_ring *tx_ring; 1878 uint64_t tx_cleanup_delay; 1879 size_t qid; 1880 int budget; 1881 uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues; 1882 1883 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) 1884 return; 1885 1886 nb_tx_queues = adapter->edev_data->nb_tx_queues; 1887 budget = adapter->missing_tx_completion_budget; 1888 1889 qid = adapter->last_tx_comp_qid; 1890 while (budget-- > 0) { 1891 tx_ring = &adapter->tx_ring[qid]; 1892 1893 /* Tx cleanup is called only by the burst function and can be 1894 * called dynamically by the application. Also cleanup is 1895 * limited by the threshold. To avoid false detection of the 1896 * missing HW Tx completion, get the delay since last cleanup 1897 * function was called. 1898 */ 1899 tx_cleanup_delay = rte_get_timer_cycles() - 1900 tx_ring->last_cleanup_ticks; 1901 if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay) 1902 check_for_tx_completion_in_queue(adapter, tx_ring); 1903 qid = (qid + 1) % nb_tx_queues; 1904 } 1905 1906 adapter->last_tx_comp_qid = qid; 1907 } 1908 1909 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer, 1910 void *arg) 1911 { 1912 struct rte_eth_dev *dev = arg; 1913 struct ena_adapter *adapter = dev->data->dev_private; 1914 1915 if (unlikely(adapter->trigger_reset)) 1916 return; 1917 1918 check_for_missing_keep_alive(adapter); 1919 check_for_admin_com_state(adapter); 1920 check_for_tx_completions(adapter); 1921 1922 if (unlikely(adapter->trigger_reset)) { 1923 PMD_DRV_LOG(ERR, "Trigger reset is on\n"); 1924 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, 1925 NULL); 1926 } 1927 } 1928 1929 static inline void 1930 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 1931 struct ena_admin_feature_llq_desc *llq, 1932 bool use_large_llq_hdr) 1933 { 1934 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 1935 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 1936 llq_config->llq_num_decs_before_header = 1937 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 1938 1939 if (use_large_llq_hdr && 1940 (llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B)) { 1941 llq_config->llq_ring_entry_size = 1942 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 1943 llq_config->llq_ring_entry_size_value = 256; 1944 } else { 1945 llq_config->llq_ring_entry_size = 1946 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 1947 llq_config->llq_ring_entry_size_value = 128; 1948 } 1949 } 1950 1951 static int 1952 ena_set_queues_placement_policy(struct ena_adapter *adapter, 1953 struct ena_com_dev *ena_dev, 1954 struct ena_admin_feature_llq_desc *llq, 1955 struct ena_llq_configurations *llq_default_configurations) 1956 { 1957 int rc; 1958 u32 llq_feature_mask; 1959 1960 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 1961 if (!(ena_dev->supported_features & llq_feature_mask)) { 1962 PMD_DRV_LOG(INFO, 1963 "LLQ is not supported. Fallback to host mode policy.\n"); 1964 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1965 return 0; 1966 } 1967 1968 if (adapter->dev_mem_base == NULL) { 1969 PMD_DRV_LOG(ERR, 1970 "LLQ is advertised as supported, but device doesn't expose mem bar\n"); 1971 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1972 return 0; 1973 } 1974 1975 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 1976 if (unlikely(rc)) { 1977 PMD_INIT_LOG(WARNING, 1978 "Failed to config dev mode. Fallback to host mode policy.\n"); 1979 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1980 return 0; 1981 } 1982 1983 /* Nothing to config, exit */ 1984 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 1985 return 0; 1986 1987 ena_dev->mem_bar = adapter->dev_mem_base; 1988 1989 return 0; 1990 } 1991 1992 static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev, 1993 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1994 { 1995 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 1996 1997 /* Regular queues capabilities */ 1998 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1999 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 2000 &get_feat_ctx->max_queue_ext.max_queue_ext; 2001 io_rx_num = RTE_MIN(max_queue_ext->max_rx_sq_num, 2002 max_queue_ext->max_rx_cq_num); 2003 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 2004 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 2005 } else { 2006 struct ena_admin_queue_feature_desc *max_queues = 2007 &get_feat_ctx->max_queues; 2008 io_tx_sq_num = max_queues->max_sq_num; 2009 io_tx_cq_num = max_queues->max_cq_num; 2010 io_rx_num = RTE_MIN(io_tx_sq_num, io_tx_cq_num); 2011 } 2012 2013 /* In case of LLQ use the llq number in the get feature cmd */ 2014 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 2015 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 2016 2017 max_num_io_queues = RTE_MIN(ENA_MAX_NUM_IO_QUEUES, io_rx_num); 2018 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_sq_num); 2019 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_cq_num); 2020 2021 if (unlikely(max_num_io_queues == 0)) { 2022 PMD_DRV_LOG(ERR, "Number of IO queues cannot not be 0\n"); 2023 return -EFAULT; 2024 } 2025 2026 return max_num_io_queues; 2027 } 2028 2029 static void 2030 ena_set_offloads(struct ena_offloads *offloads, 2031 struct ena_admin_feature_offload_desc *offload_desc) 2032 { 2033 if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) 2034 offloads->tx_offloads |= ENA_IPV4_TSO; 2035 2036 /* Tx IPv4 checksum offloads */ 2037 if (offload_desc->tx & 2038 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) 2039 offloads->tx_offloads |= ENA_L3_IPV4_CSUM; 2040 if (offload_desc->tx & 2041 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK) 2042 offloads->tx_offloads |= ENA_L4_IPV4_CSUM; 2043 if (offload_desc->tx & 2044 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) 2045 offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL; 2046 2047 /* Tx IPv6 checksum offloads */ 2048 if (offload_desc->tx & 2049 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK) 2050 offloads->tx_offloads |= ENA_L4_IPV6_CSUM; 2051 if (offload_desc->tx & 2052 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) 2053 offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL; 2054 2055 /* Rx IPv4 checksum offloads */ 2056 if (offload_desc->rx_supported & 2057 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK) 2058 offloads->rx_offloads |= ENA_L3_IPV4_CSUM; 2059 if (offload_desc->rx_supported & 2060 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) 2061 offloads->rx_offloads |= ENA_L4_IPV4_CSUM; 2062 2063 /* Rx IPv6 checksum offloads */ 2064 if (offload_desc->rx_supported & 2065 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) 2066 offloads->rx_offloads |= ENA_L4_IPV6_CSUM; 2067 2068 if (offload_desc->rx_supported & 2069 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK) 2070 offloads->rx_offloads |= ENA_RX_RSS_HASH; 2071 } 2072 2073 static int ena_init_once(void) 2074 { 2075 static bool init_done; 2076 2077 if (init_done) 2078 return 0; 2079 2080 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 2081 /* Init timer subsystem for the ENA timer service. */ 2082 rte_timer_subsystem_init(); 2083 /* Register handler for requests from secondary processes. */ 2084 rte_mp_action_register(ENA_MP_NAME, ena_mp_primary_handle); 2085 } 2086 2087 init_done = true; 2088 return 0; 2089 } 2090 2091 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) 2092 { 2093 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 2094 struct rte_pci_device *pci_dev; 2095 struct rte_intr_handle *intr_handle; 2096 struct ena_adapter *adapter = eth_dev->data->dev_private; 2097 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2098 struct ena_com_dev_get_features_ctx get_feat_ctx; 2099 struct ena_llq_configurations llq_config; 2100 const char *queue_type_str; 2101 uint32_t max_num_io_queues; 2102 int rc; 2103 static int adapters_found; 2104 bool disable_meta_caching; 2105 2106 eth_dev->dev_ops = &ena_dev_ops; 2107 eth_dev->rx_pkt_burst = ð_ena_recv_pkts; 2108 eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; 2109 eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; 2110 2111 rc = ena_init_once(); 2112 if (rc != 0) 2113 return rc; 2114 2115 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2116 return 0; 2117 2118 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 2119 2120 memset(adapter, 0, sizeof(struct ena_adapter)); 2121 ena_dev = &adapter->ena_dev; 2122 2123 adapter->edev_data = eth_dev->data; 2124 2125 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 2126 2127 PMD_INIT_LOG(INFO, "Initializing %x:%x:%x.%d\n", 2128 pci_dev->addr.domain, 2129 pci_dev->addr.bus, 2130 pci_dev->addr.devid, 2131 pci_dev->addr.function); 2132 2133 intr_handle = pci_dev->intr_handle; 2134 2135 adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr; 2136 adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr; 2137 2138 if (!adapter->regs) { 2139 PMD_INIT_LOG(CRIT, "Failed to access registers BAR(%d)\n", 2140 ENA_REGS_BAR); 2141 return -ENXIO; 2142 } 2143 2144 ena_dev->reg_bar = adapter->regs; 2145 /* Pass device data as a pointer which can be passed to the IO functions 2146 * by the ena_com (for example - the memory allocation). 2147 */ 2148 ena_dev->dmadev = eth_dev->data; 2149 2150 adapter->id_number = adapters_found; 2151 2152 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", 2153 adapter->id_number); 2154 2155 adapter->missing_tx_completion_to = ENA_TX_TIMEOUT; 2156 2157 rc = ena_parse_devargs(adapter, pci_dev->device.devargs); 2158 if (rc != 0) { 2159 PMD_INIT_LOG(CRIT, "Failed to parse devargs\n"); 2160 goto err; 2161 } 2162 2163 /* device specific initialization routine */ 2164 rc = ena_device_init(adapter, pci_dev, &get_feat_ctx); 2165 if (rc) { 2166 PMD_INIT_LOG(CRIT, "Failed to init ENA device\n"); 2167 goto err; 2168 } 2169 2170 /* Check if device supports LSC */ 2171 if (!(adapter->all_aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) 2172 adapter->edev_data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; 2173 2174 set_default_llq_configurations(&llq_config, &get_feat_ctx.llq, 2175 adapter->use_large_llq_hdr); 2176 rc = ena_set_queues_placement_policy(adapter, ena_dev, 2177 &get_feat_ctx.llq, &llq_config); 2178 if (unlikely(rc)) { 2179 PMD_INIT_LOG(CRIT, "Failed to set placement policy\n"); 2180 return rc; 2181 } 2182 2183 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 2184 queue_type_str = "Regular"; 2185 else 2186 queue_type_str = "Low latency"; 2187 PMD_DRV_LOG(INFO, "Placement policy: %s\n", queue_type_str); 2188 2189 calc_queue_ctx.ena_dev = ena_dev; 2190 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 2191 2192 max_num_io_queues = ena_calc_max_io_queue_num(ena_dev, &get_feat_ctx); 2193 rc = ena_calc_io_queue_size(&calc_queue_ctx, 2194 adapter->use_large_llq_hdr); 2195 if (unlikely((rc != 0) || (max_num_io_queues == 0))) { 2196 rc = -EFAULT; 2197 goto err_device_destroy; 2198 } 2199 2200 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 2201 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 2202 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 2203 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 2204 adapter->max_num_io_queues = max_num_io_queues; 2205 2206 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2207 disable_meta_caching = 2208 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags & 2209 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 2210 } else { 2211 disable_meta_caching = false; 2212 } 2213 2214 /* prepare ring structures */ 2215 ena_init_rings(adapter, disable_meta_caching); 2216 2217 ena_config_debug_area(adapter); 2218 2219 /* Set max MTU for this device */ 2220 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 2221 2222 ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload); 2223 2224 /* Copy MAC address and point DPDK to it */ 2225 eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr; 2226 rte_ether_addr_copy((struct rte_ether_addr *) 2227 get_feat_ctx.dev_attr.mac_addr, 2228 (struct rte_ether_addr *)adapter->mac_addr); 2229 2230 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); 2231 if (unlikely(rc != 0)) { 2232 PMD_DRV_LOG(ERR, "Failed to initialize RSS in ENA device\n"); 2233 goto err_delete_debug_area; 2234 } 2235 2236 adapter->drv_stats = rte_zmalloc("adapter stats", 2237 sizeof(*adapter->drv_stats), 2238 RTE_CACHE_LINE_SIZE); 2239 if (!adapter->drv_stats) { 2240 PMD_DRV_LOG(ERR, 2241 "Failed to allocate memory for adapter statistics\n"); 2242 rc = -ENOMEM; 2243 goto err_rss_destroy; 2244 } 2245 2246 rte_spinlock_init(&adapter->admin_lock); 2247 2248 rte_intr_callback_register(intr_handle, 2249 ena_interrupt_handler_rte, 2250 eth_dev); 2251 rte_intr_enable(intr_handle); 2252 ena_com_set_admin_polling_mode(ena_dev, false); 2253 ena_com_admin_aenq_enable(ena_dev); 2254 2255 rte_timer_init(&adapter->timer_wd); 2256 2257 adapters_found++; 2258 adapter->state = ENA_ADAPTER_STATE_INIT; 2259 2260 return 0; 2261 2262 err_rss_destroy: 2263 ena_com_rss_destroy(ena_dev); 2264 err_delete_debug_area: 2265 ena_com_delete_debug_area(ena_dev); 2266 2267 err_device_destroy: 2268 ena_com_delete_host_info(ena_dev); 2269 ena_com_admin_destroy(ena_dev); 2270 2271 err: 2272 return rc; 2273 } 2274 2275 static void ena_destroy_device(struct rte_eth_dev *eth_dev) 2276 { 2277 struct ena_adapter *adapter = eth_dev->data->dev_private; 2278 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2279 2280 if (adapter->state == ENA_ADAPTER_STATE_FREE) 2281 return; 2282 2283 ena_com_set_admin_running_state(ena_dev, false); 2284 2285 if (adapter->state != ENA_ADAPTER_STATE_CLOSED) 2286 ena_close(eth_dev); 2287 2288 ena_com_rss_destroy(ena_dev); 2289 2290 ena_com_delete_debug_area(ena_dev); 2291 ena_com_delete_host_info(ena_dev); 2292 2293 ena_com_abort_admin_commands(ena_dev); 2294 ena_com_wait_for_abort_completion(ena_dev); 2295 ena_com_admin_destroy(ena_dev); 2296 ena_com_mmio_reg_read_request_destroy(ena_dev); 2297 2298 adapter->state = ENA_ADAPTER_STATE_FREE; 2299 } 2300 2301 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) 2302 { 2303 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2304 return 0; 2305 2306 ena_destroy_device(eth_dev); 2307 2308 return 0; 2309 } 2310 2311 static int ena_dev_configure(struct rte_eth_dev *dev) 2312 { 2313 struct ena_adapter *adapter = dev->data->dev_private; 2314 int rc; 2315 2316 adapter->state = ENA_ADAPTER_STATE_CONFIG; 2317 2318 if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 2319 dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2320 dev->data->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2321 2322 /* Scattered Rx cannot be turned off in the HW, so this capability must 2323 * be forced. 2324 */ 2325 dev->data->scattered_rx = 1; 2326 2327 adapter->last_tx_comp_qid = 0; 2328 2329 adapter->missing_tx_completion_budget = 2330 RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues); 2331 2332 /* To avoid detection of the spurious Tx completion timeout due to 2333 * application not calling the Tx cleanup function, set timeout for the 2334 * Tx queue which should be half of the missing completion timeout for a 2335 * safety. If there will be a lot of missing Tx completions in the 2336 * queue, they will be detected sooner or later. 2337 */ 2338 adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2; 2339 2340 rc = ena_configure_aenq(adapter); 2341 2342 return rc; 2343 } 2344 2345 static void ena_init_rings(struct ena_adapter *adapter, 2346 bool disable_meta_caching) 2347 { 2348 size_t i; 2349 2350 for (i = 0; i < adapter->max_num_io_queues; i++) { 2351 struct ena_ring *ring = &adapter->tx_ring[i]; 2352 2353 ring->configured = 0; 2354 ring->type = ENA_RING_TYPE_TX; 2355 ring->adapter = adapter; 2356 ring->id = i; 2357 ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type; 2358 ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size; 2359 ring->sgl_size = adapter->max_tx_sgl_size; 2360 ring->disable_meta_caching = disable_meta_caching; 2361 } 2362 2363 for (i = 0; i < adapter->max_num_io_queues; i++) { 2364 struct ena_ring *ring = &adapter->rx_ring[i]; 2365 2366 ring->configured = 0; 2367 ring->type = ENA_RING_TYPE_RX; 2368 ring->adapter = adapter; 2369 ring->id = i; 2370 ring->sgl_size = adapter->max_rx_sgl_size; 2371 } 2372 } 2373 2374 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter) 2375 { 2376 uint64_t port_offloads = 0; 2377 2378 if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM) 2379 port_offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; 2380 2381 if (adapter->offloads.rx_offloads & 2382 (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM)) 2383 port_offloads |= 2384 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM; 2385 2386 if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH) 2387 port_offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2388 2389 port_offloads |= RTE_ETH_RX_OFFLOAD_SCATTER; 2390 2391 return port_offloads; 2392 } 2393 2394 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter) 2395 { 2396 uint64_t port_offloads = 0; 2397 2398 if (adapter->offloads.tx_offloads & ENA_IPV4_TSO) 2399 port_offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 2400 2401 if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM) 2402 port_offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; 2403 if (adapter->offloads.tx_offloads & 2404 (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM | 2405 ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL)) 2406 port_offloads |= 2407 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM; 2408 2409 port_offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2410 2411 port_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2412 2413 return port_offloads; 2414 } 2415 2416 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter) 2417 { 2418 RTE_SET_USED(adapter); 2419 2420 return 0; 2421 } 2422 2423 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter) 2424 { 2425 uint64_t queue_offloads = 0; 2426 RTE_SET_USED(adapter); 2427 2428 queue_offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 2429 2430 return queue_offloads; 2431 } 2432 2433 static int ena_infos_get(struct rte_eth_dev *dev, 2434 struct rte_eth_dev_info *dev_info) 2435 { 2436 struct ena_adapter *adapter; 2437 struct ena_com_dev *ena_dev; 2438 2439 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 2440 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 2441 adapter = dev->data->dev_private; 2442 2443 ena_dev = &adapter->ena_dev; 2444 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 2445 2446 dev_info->speed_capa = 2447 RTE_ETH_LINK_SPEED_1G | 2448 RTE_ETH_LINK_SPEED_2_5G | 2449 RTE_ETH_LINK_SPEED_5G | 2450 RTE_ETH_LINK_SPEED_10G | 2451 RTE_ETH_LINK_SPEED_25G | 2452 RTE_ETH_LINK_SPEED_40G | 2453 RTE_ETH_LINK_SPEED_50G | 2454 RTE_ETH_LINK_SPEED_100G; 2455 2456 /* Inform framework about available features */ 2457 dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter); 2458 dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter); 2459 dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter); 2460 dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter); 2461 2462 dev_info->flow_type_rss_offloads = ENA_ALL_RSS_HF; 2463 dev_info->hash_key_size = ENA_HASH_KEY_SIZE; 2464 2465 dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN; 2466 dev_info->max_rx_pktlen = adapter->max_mtu + RTE_ETHER_HDR_LEN + 2467 RTE_ETHER_CRC_LEN; 2468 dev_info->min_mtu = ENA_MIN_MTU; 2469 dev_info->max_mtu = adapter->max_mtu; 2470 dev_info->max_mac_addrs = 1; 2471 2472 dev_info->max_rx_queues = adapter->max_num_io_queues; 2473 dev_info->max_tx_queues = adapter->max_num_io_queues; 2474 dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE; 2475 2476 dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size; 2477 dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2478 dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2479 adapter->max_rx_sgl_size); 2480 dev_info->rx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2481 adapter->max_rx_sgl_size); 2482 2483 dev_info->tx_desc_lim.nb_max = adapter->max_tx_ring_size; 2484 dev_info->tx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2485 dev_info->tx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2486 adapter->max_tx_sgl_size); 2487 dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2488 adapter->max_tx_sgl_size); 2489 2490 dev_info->default_rxportconf.ring_size = ENA_DEFAULT_RING_SIZE; 2491 dev_info->default_txportconf.ring_size = ENA_DEFAULT_RING_SIZE; 2492 2493 return 0; 2494 } 2495 2496 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len) 2497 { 2498 mbuf->data_len = len; 2499 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 2500 mbuf->refcnt = 1; 2501 mbuf->next = NULL; 2502 } 2503 2504 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 2505 struct ena_com_rx_buf_info *ena_bufs, 2506 uint32_t descs, 2507 uint16_t *next_to_clean, 2508 uint8_t offset) 2509 { 2510 struct rte_mbuf *mbuf; 2511 struct rte_mbuf *mbuf_head; 2512 struct ena_rx_buffer *rx_info; 2513 int rc; 2514 uint16_t ntc, len, req_id, buf = 0; 2515 2516 if (unlikely(descs == 0)) 2517 return NULL; 2518 2519 ntc = *next_to_clean; 2520 2521 len = ena_bufs[buf].len; 2522 req_id = ena_bufs[buf].req_id; 2523 2524 rx_info = &rx_ring->rx_buffer_info[req_id]; 2525 2526 mbuf = rx_info->mbuf; 2527 RTE_ASSERT(mbuf != NULL); 2528 2529 ena_init_rx_mbuf(mbuf, len); 2530 2531 /* Fill the mbuf head with the data specific for 1st segment. */ 2532 mbuf_head = mbuf; 2533 mbuf_head->nb_segs = descs; 2534 mbuf_head->port = rx_ring->port_id; 2535 mbuf_head->pkt_len = len; 2536 mbuf_head->data_off += offset; 2537 2538 rx_info->mbuf = NULL; 2539 rx_ring->empty_rx_reqs[ntc] = req_id; 2540 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2541 2542 while (--descs) { 2543 ++buf; 2544 len = ena_bufs[buf].len; 2545 req_id = ena_bufs[buf].req_id; 2546 2547 rx_info = &rx_ring->rx_buffer_info[req_id]; 2548 RTE_ASSERT(rx_info->mbuf != NULL); 2549 2550 if (unlikely(len == 0)) { 2551 /* 2552 * Some devices can pass descriptor with the length 0. 2553 * To avoid confusion, the PMD is simply putting the 2554 * descriptor back, as it was never used. We'll avoid 2555 * mbuf allocation that way. 2556 */ 2557 rc = ena_add_single_rx_desc(rx_ring->ena_com_io_sq, 2558 rx_info->mbuf, req_id); 2559 if (unlikely(rc != 0)) { 2560 /* Free the mbuf in case of an error. */ 2561 rte_mbuf_raw_free(rx_info->mbuf); 2562 } else { 2563 /* 2564 * If there was no error, just exit the loop as 2565 * 0 length descriptor is always the last one. 2566 */ 2567 break; 2568 } 2569 } else { 2570 /* Create an mbuf chain. */ 2571 mbuf->next = rx_info->mbuf; 2572 mbuf = mbuf->next; 2573 2574 ena_init_rx_mbuf(mbuf, len); 2575 mbuf_head->pkt_len += len; 2576 } 2577 2578 /* 2579 * Mark the descriptor as depleted and perform necessary 2580 * cleanup. 2581 * This code will execute in two cases: 2582 * 1. Descriptor len was greater than 0 - normal situation. 2583 * 2. Descriptor len was 0 and we failed to add the descriptor 2584 * to the device. In that situation, we should try to add 2585 * the mbuf again in the populate routine and mark the 2586 * descriptor as used up by the device. 2587 */ 2588 rx_info->mbuf = NULL; 2589 rx_ring->empty_rx_reqs[ntc] = req_id; 2590 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2591 } 2592 2593 *next_to_clean = ntc; 2594 2595 return mbuf_head; 2596 } 2597 2598 static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 2599 uint16_t nb_pkts) 2600 { 2601 struct ena_ring *rx_ring = (struct ena_ring *)(rx_queue); 2602 unsigned int free_queue_entries; 2603 uint16_t next_to_clean = rx_ring->next_to_clean; 2604 uint16_t descs_in_use; 2605 struct rte_mbuf *mbuf; 2606 uint16_t completed; 2607 struct ena_com_rx_ctx ena_rx_ctx; 2608 int i, rc = 0; 2609 bool fill_hash; 2610 2611 #ifdef RTE_ETHDEV_DEBUG_RX 2612 /* Check adapter state */ 2613 if (unlikely(rx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 2614 PMD_RX_LOG(ALERT, 2615 "Trying to receive pkts while device is NOT running\n"); 2616 return 0; 2617 } 2618 #endif 2619 2620 fill_hash = rx_ring->offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH; 2621 2622 descs_in_use = rx_ring->ring_size - 2623 ena_com_free_q_entries(rx_ring->ena_com_io_sq) - 1; 2624 nb_pkts = RTE_MIN(descs_in_use, nb_pkts); 2625 2626 for (completed = 0; completed < nb_pkts; completed++) { 2627 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 2628 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 2629 ena_rx_ctx.descs = 0; 2630 ena_rx_ctx.pkt_offset = 0; 2631 /* receive packet context */ 2632 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, 2633 rx_ring->ena_com_io_sq, 2634 &ena_rx_ctx); 2635 if (unlikely(rc)) { 2636 PMD_RX_LOG(ERR, 2637 "Failed to get the packet from the device, rc: %d\n", 2638 rc); 2639 if (rc == ENA_COM_NO_SPACE) { 2640 ++rx_ring->rx_stats.bad_desc_num; 2641 ena_trigger_reset(rx_ring->adapter, 2642 ENA_REGS_RESET_TOO_MANY_RX_DESCS); 2643 } else { 2644 ++rx_ring->rx_stats.bad_req_id; 2645 ena_trigger_reset(rx_ring->adapter, 2646 ENA_REGS_RESET_INV_RX_REQ_ID); 2647 } 2648 return 0; 2649 } 2650 2651 mbuf = ena_rx_mbuf(rx_ring, 2652 ena_rx_ctx.ena_bufs, 2653 ena_rx_ctx.descs, 2654 &next_to_clean, 2655 ena_rx_ctx.pkt_offset); 2656 if (unlikely(mbuf == NULL)) { 2657 for (i = 0; i < ena_rx_ctx.descs; ++i) { 2658 rx_ring->empty_rx_reqs[next_to_clean] = 2659 rx_ring->ena_bufs[i].req_id; 2660 next_to_clean = ENA_IDX_NEXT_MASKED( 2661 next_to_clean, rx_ring->size_mask); 2662 } 2663 break; 2664 } 2665 2666 /* fill mbuf attributes if any */ 2667 ena_rx_mbuf_prepare(rx_ring, mbuf, &ena_rx_ctx, fill_hash); 2668 2669 if (unlikely(mbuf->ol_flags & 2670 (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD))) 2671 rte_atomic64_inc(&rx_ring->adapter->drv_stats->ierrors); 2672 2673 rx_pkts[completed] = mbuf; 2674 rx_ring->rx_stats.bytes += mbuf->pkt_len; 2675 } 2676 2677 rx_ring->rx_stats.cnt += completed; 2678 rx_ring->next_to_clean = next_to_clean; 2679 2680 free_queue_entries = ena_com_free_q_entries(rx_ring->ena_com_io_sq); 2681 2682 /* Burst refill to save doorbells, memory barriers, const interval */ 2683 if (free_queue_entries >= rx_ring->rx_free_thresh) { 2684 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); 2685 ena_populate_rx_queue(rx_ring, free_queue_entries); 2686 } 2687 2688 return completed; 2689 } 2690 2691 static uint16_t 2692 eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 2693 uint16_t nb_pkts) 2694 { 2695 int32_t ret; 2696 uint32_t i; 2697 struct rte_mbuf *m; 2698 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 2699 struct ena_adapter *adapter = tx_ring->adapter; 2700 struct rte_ipv4_hdr *ip_hdr; 2701 uint64_t ol_flags; 2702 uint64_t l4_csum_flag; 2703 uint64_t dev_offload_capa; 2704 uint16_t frag_field; 2705 bool need_pseudo_csum; 2706 2707 dev_offload_capa = adapter->offloads.tx_offloads; 2708 for (i = 0; i != nb_pkts; i++) { 2709 m = tx_pkts[i]; 2710 ol_flags = m->ol_flags; 2711 2712 /* Check if any offload flag was set */ 2713 if (ol_flags == 0) 2714 continue; 2715 2716 l4_csum_flag = ol_flags & RTE_MBUF_F_TX_L4_MASK; 2717 /* SCTP checksum offload is not supported by the ENA. */ 2718 if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) || 2719 l4_csum_flag == RTE_MBUF_F_TX_SCTP_CKSUM) { 2720 PMD_TX_LOG(DEBUG, 2721 "mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64 "\n", 2722 i, ol_flags); 2723 rte_errno = ENOTSUP; 2724 return i; 2725 } 2726 2727 if (unlikely(m->nb_segs >= tx_ring->sgl_size && 2728 !(tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && 2729 m->nb_segs == tx_ring->sgl_size && 2730 m->data_len < tx_ring->tx_max_header_size))) { 2731 PMD_TX_LOG(DEBUG, 2732 "mbuf[%" PRIu32 "] has too many segments: %" PRIu16 "\n", 2733 i, m->nb_segs); 2734 rte_errno = EINVAL; 2735 return i; 2736 } 2737 2738 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 2739 /* Check if requested offload is also enabled for the queue */ 2740 if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2741 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) || 2742 (l4_csum_flag == RTE_MBUF_F_TX_TCP_CKSUM && 2743 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) || 2744 (l4_csum_flag == RTE_MBUF_F_TX_UDP_CKSUM && 2745 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM))) { 2746 PMD_TX_LOG(DEBUG, 2747 "mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]\n", 2748 i, m->nb_segs, tx_ring->id); 2749 rte_errno = EINVAL; 2750 return i; 2751 } 2752 2753 /* The caller is obligated to set l2 and l3 len if any cksum 2754 * offload is enabled. 2755 */ 2756 if (unlikely(ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK) && 2757 (m->l2_len == 0 || m->l3_len == 0))) { 2758 PMD_TX_LOG(DEBUG, 2759 "mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested\n", 2760 i); 2761 rte_errno = EINVAL; 2762 return i; 2763 } 2764 ret = rte_validate_tx_offload(m); 2765 if (ret != 0) { 2766 rte_errno = -ret; 2767 return i; 2768 } 2769 #endif 2770 2771 /* Verify HW support for requested offloads and determine if 2772 * pseudo header checksum is needed. 2773 */ 2774 need_pseudo_csum = false; 2775 if (ol_flags & RTE_MBUF_F_TX_IPV4) { 2776 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2777 !(dev_offload_capa & ENA_L3_IPV4_CSUM)) { 2778 rte_errno = ENOTSUP; 2779 return i; 2780 } 2781 2782 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG && 2783 !(dev_offload_capa & ENA_IPV4_TSO)) { 2784 rte_errno = ENOTSUP; 2785 return i; 2786 } 2787 2788 /* Check HW capabilities and if pseudo csum is needed 2789 * for L4 offloads. 2790 */ 2791 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2792 !(dev_offload_capa & ENA_L4_IPV4_CSUM)) { 2793 if (dev_offload_capa & 2794 ENA_L4_IPV4_CSUM_PARTIAL) { 2795 need_pseudo_csum = true; 2796 } else { 2797 rte_errno = ENOTSUP; 2798 return i; 2799 } 2800 } 2801 2802 /* Parse the DF flag */ 2803 ip_hdr = rte_pktmbuf_mtod_offset(m, 2804 struct rte_ipv4_hdr *, m->l2_len); 2805 frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset); 2806 if (frag_field & RTE_IPV4_HDR_DF_FLAG) { 2807 m->packet_type |= RTE_PTYPE_L4_NONFRAG; 2808 } else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2809 /* In case we are supposed to TSO and have DF 2810 * not set (DF=0) hardware must be provided with 2811 * partial checksum. 2812 */ 2813 need_pseudo_csum = true; 2814 } 2815 } else if (ol_flags & RTE_MBUF_F_TX_IPV6) { 2816 /* There is no support for IPv6 TSO as for now. */ 2817 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2818 rte_errno = ENOTSUP; 2819 return i; 2820 } 2821 2822 /* Check HW capabilities and if pseudo csum is needed */ 2823 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2824 !(dev_offload_capa & ENA_L4_IPV6_CSUM)) { 2825 if (dev_offload_capa & 2826 ENA_L4_IPV6_CSUM_PARTIAL) { 2827 need_pseudo_csum = true; 2828 } else { 2829 rte_errno = ENOTSUP; 2830 return i; 2831 } 2832 } 2833 } 2834 2835 if (need_pseudo_csum) { 2836 ret = rte_net_intel_cksum_flags_prepare(m, ol_flags); 2837 if (ret != 0) { 2838 rte_errno = -ret; 2839 return i; 2840 } 2841 } 2842 } 2843 2844 return i; 2845 } 2846 2847 static void ena_update_hints(struct ena_adapter *adapter, 2848 struct ena_admin_ena_hw_hints *hints) 2849 { 2850 if (hints->admin_completion_tx_timeout) 2851 adapter->ena_dev.admin_queue.completion_timeout = 2852 hints->admin_completion_tx_timeout * 1000; 2853 2854 if (hints->mmio_read_timeout) 2855 /* convert to usec */ 2856 adapter->ena_dev.mmio_read.reg_read_to = 2857 hints->mmio_read_timeout * 1000; 2858 2859 if (hints->driver_watchdog_timeout) { 2860 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 2861 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 2862 else 2863 // Convert msecs to ticks 2864 adapter->keep_alive_timeout = 2865 (hints->driver_watchdog_timeout * 2866 rte_get_timer_hz()) / 1000; 2867 } 2868 } 2869 2870 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 2871 struct ena_tx_buffer *tx_info, 2872 struct rte_mbuf *mbuf, 2873 void **push_header, 2874 uint16_t *header_len) 2875 { 2876 struct ena_com_buf *ena_buf; 2877 uint16_t delta, seg_len, push_len; 2878 2879 delta = 0; 2880 seg_len = mbuf->data_len; 2881 2882 tx_info->mbuf = mbuf; 2883 ena_buf = tx_info->bufs; 2884 2885 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2886 /* 2887 * Tx header might be (and will be in most cases) smaller than 2888 * tx_max_header_size. But it's not an issue to send more data 2889 * to the device, than actually needed if the mbuf size is 2890 * greater than tx_max_header_size. 2891 */ 2892 push_len = RTE_MIN(mbuf->pkt_len, tx_ring->tx_max_header_size); 2893 *header_len = push_len; 2894 2895 if (likely(push_len <= seg_len)) { 2896 /* If the push header is in the single segment, then 2897 * just point it to the 1st mbuf data. 2898 */ 2899 *push_header = rte_pktmbuf_mtod(mbuf, uint8_t *); 2900 } else { 2901 /* If the push header lays in the several segments, copy 2902 * it to the intermediate buffer. 2903 */ 2904 rte_pktmbuf_read(mbuf, 0, push_len, 2905 tx_ring->push_buf_intermediate_buf); 2906 *push_header = tx_ring->push_buf_intermediate_buf; 2907 delta = push_len - seg_len; 2908 } 2909 } else { 2910 *push_header = NULL; 2911 *header_len = 0; 2912 push_len = 0; 2913 } 2914 2915 /* Process first segment taking into consideration pushed header */ 2916 if (seg_len > push_len) { 2917 ena_buf->paddr = mbuf->buf_iova + 2918 mbuf->data_off + 2919 push_len; 2920 ena_buf->len = seg_len - push_len; 2921 ena_buf++; 2922 tx_info->num_of_bufs++; 2923 } 2924 2925 while ((mbuf = mbuf->next) != NULL) { 2926 seg_len = mbuf->data_len; 2927 2928 /* Skip mbufs if whole data is pushed as a header */ 2929 if (unlikely(delta > seg_len)) { 2930 delta -= seg_len; 2931 continue; 2932 } 2933 2934 ena_buf->paddr = mbuf->buf_iova + mbuf->data_off + delta; 2935 ena_buf->len = seg_len - delta; 2936 ena_buf++; 2937 tx_info->num_of_bufs++; 2938 2939 delta = 0; 2940 } 2941 } 2942 2943 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) 2944 { 2945 struct ena_tx_buffer *tx_info; 2946 struct ena_com_tx_ctx ena_tx_ctx = { { 0 } }; 2947 uint16_t next_to_use; 2948 uint16_t header_len; 2949 uint16_t req_id; 2950 void *push_header; 2951 int nb_hw_desc; 2952 int rc; 2953 2954 /* Checking for space for 2 additional metadata descriptors due to 2955 * possible header split and metadata descriptor 2956 */ 2957 if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 2958 mbuf->nb_segs + 2)) { 2959 PMD_DRV_LOG(DEBUG, "Not enough space in the tx queue\n"); 2960 return ENA_COM_NO_MEM; 2961 } 2962 2963 next_to_use = tx_ring->next_to_use; 2964 2965 req_id = tx_ring->empty_tx_reqs[next_to_use]; 2966 tx_info = &tx_ring->tx_buffer_info[req_id]; 2967 tx_info->num_of_bufs = 0; 2968 RTE_ASSERT(tx_info->mbuf == NULL); 2969 2970 ena_tx_map_mbuf(tx_ring, tx_info, mbuf, &push_header, &header_len); 2971 2972 ena_tx_ctx.ena_bufs = tx_info->bufs; 2973 ena_tx_ctx.push_header = push_header; 2974 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 2975 ena_tx_ctx.req_id = req_id; 2976 ena_tx_ctx.header_len = header_len; 2977 2978 /* Set Tx offloads flags, if applicable */ 2979 ena_tx_mbuf_prepare(mbuf, &ena_tx_ctx, tx_ring->offloads, 2980 tx_ring->disable_meta_caching); 2981 2982 if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, 2983 &ena_tx_ctx))) { 2984 PMD_TX_LOG(DEBUG, 2985 "LLQ Tx max burst size of queue %d achieved, writing doorbell to send burst\n", 2986 tx_ring->id); 2987 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 2988 tx_ring->tx_stats.doorbells++; 2989 tx_ring->pkts_without_db = false; 2990 } 2991 2992 /* prepare the packet's descriptors to dma engine */ 2993 rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, 2994 &nb_hw_desc); 2995 if (unlikely(rc)) { 2996 PMD_DRV_LOG(ERR, "Failed to prepare Tx buffers, rc: %d\n", rc); 2997 ++tx_ring->tx_stats.prepare_ctx_err; 2998 ena_trigger_reset(tx_ring->adapter, 2999 ENA_REGS_RESET_DRIVER_INVALID_STATE); 3000 return rc; 3001 } 3002 3003 tx_info->tx_descs = nb_hw_desc; 3004 tx_info->timestamp = rte_get_timer_cycles(); 3005 3006 tx_ring->tx_stats.cnt++; 3007 tx_ring->tx_stats.bytes += mbuf->pkt_len; 3008 3009 tx_ring->next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, 3010 tx_ring->size_mask); 3011 3012 return 0; 3013 } 3014 3015 static __rte_always_inline size_t 3016 ena_tx_cleanup_mbuf_fast(struct rte_mbuf **mbufs_to_clean, 3017 struct rte_mbuf *mbuf, 3018 size_t mbuf_cnt, 3019 size_t buf_size) 3020 { 3021 struct rte_mbuf *m_next; 3022 3023 while (mbuf != NULL) { 3024 m_next = mbuf->next; 3025 mbufs_to_clean[mbuf_cnt++] = mbuf; 3026 if (mbuf_cnt == buf_size) { 3027 rte_mempool_put_bulk(mbufs_to_clean[0]->pool, (void **)mbufs_to_clean, 3028 (unsigned int)mbuf_cnt); 3029 mbuf_cnt = 0; 3030 } 3031 mbuf = m_next; 3032 } 3033 3034 return mbuf_cnt; 3035 } 3036 3037 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt) 3038 { 3039 struct rte_mbuf *mbufs_to_clean[ENA_CLEANUP_BUF_SIZE]; 3040 struct ena_ring *tx_ring = (struct ena_ring *)txp; 3041 size_t mbuf_cnt = 0; 3042 unsigned int total_tx_descs = 0; 3043 unsigned int total_tx_pkts = 0; 3044 uint16_t cleanup_budget; 3045 uint16_t next_to_clean = tx_ring->next_to_clean; 3046 bool fast_free = tx_ring->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 3047 3048 /* 3049 * If free_pkt_cnt is equal to 0, it means that the user requested 3050 * full cleanup, so attempt to release all Tx descriptors 3051 * (ring_size - 1 -> size_mask) 3052 */ 3053 cleanup_budget = (free_pkt_cnt == 0) ? tx_ring->size_mask : free_pkt_cnt; 3054 3055 while (likely(total_tx_pkts < cleanup_budget)) { 3056 struct rte_mbuf *mbuf; 3057 struct ena_tx_buffer *tx_info; 3058 uint16_t req_id; 3059 3060 if (ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id) != 0) 3061 break; 3062 3063 if (unlikely(validate_tx_req_id(tx_ring, req_id) != 0)) 3064 break; 3065 3066 /* Get Tx info & store how many descs were processed */ 3067 tx_info = &tx_ring->tx_buffer_info[req_id]; 3068 tx_info->timestamp = 0; 3069 3070 mbuf = tx_info->mbuf; 3071 if (fast_free) { 3072 mbuf_cnt = ena_tx_cleanup_mbuf_fast(mbufs_to_clean, mbuf, mbuf_cnt, 3073 ENA_CLEANUP_BUF_SIZE); 3074 } else { 3075 rte_pktmbuf_free(mbuf); 3076 } 3077 3078 tx_info->mbuf = NULL; 3079 tx_ring->empty_tx_reqs[next_to_clean] = req_id; 3080 3081 total_tx_descs += tx_info->tx_descs; 3082 total_tx_pkts++; 3083 3084 /* Put back descriptor to the ring for reuse */ 3085 next_to_clean = ENA_IDX_NEXT_MASKED(next_to_clean, 3086 tx_ring->size_mask); 3087 } 3088 3089 if (likely(total_tx_descs > 0)) { 3090 /* acknowledge completion of sent packets */ 3091 tx_ring->next_to_clean = next_to_clean; 3092 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs); 3093 ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); 3094 } 3095 3096 if (mbuf_cnt != 0) 3097 rte_mempool_put_bulk(mbufs_to_clean[0]->pool, 3098 (void **)mbufs_to_clean, mbuf_cnt); 3099 3100 /* Notify completion handler that full cleanup was performed */ 3101 if (free_pkt_cnt == 0 || total_tx_pkts < cleanup_budget) 3102 tx_ring->last_cleanup_ticks = rte_get_timer_cycles(); 3103 3104 return total_tx_pkts; 3105 } 3106 3107 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 3108 uint16_t nb_pkts) 3109 { 3110 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 3111 int available_desc; 3112 uint16_t sent_idx = 0; 3113 3114 #ifdef RTE_ETHDEV_DEBUG_TX 3115 /* Check adapter state */ 3116 if (unlikely(tx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 3117 PMD_TX_LOG(ALERT, 3118 "Trying to xmit pkts while device is NOT running\n"); 3119 return 0; 3120 } 3121 #endif 3122 3123 available_desc = ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3124 if (available_desc < tx_ring->tx_free_thresh) 3125 ena_tx_cleanup((void *)tx_ring, 0); 3126 3127 for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) { 3128 if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx])) 3129 break; 3130 tx_ring->pkts_without_db = true; 3131 rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4, 3132 tx_ring->size_mask)]); 3133 } 3134 3135 /* If there are ready packets to be xmitted... */ 3136 if (likely(tx_ring->pkts_without_db)) { 3137 /* ...let HW do its best :-) */ 3138 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3139 tx_ring->tx_stats.doorbells++; 3140 tx_ring->pkts_without_db = false; 3141 } 3142 3143 tx_ring->tx_stats.available_desc = 3144 ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3145 tx_ring->tx_stats.tx_poll++; 3146 3147 return sent_idx; 3148 } 3149 3150 int ena_copy_eni_stats(struct ena_adapter *adapter, struct ena_stats_eni *stats) 3151 { 3152 int rc; 3153 3154 rte_spinlock_lock(&adapter->admin_lock); 3155 /* Retrieve and store the latest statistics from the AQ. This ensures 3156 * that previous value is returned in case of a com error. 3157 */ 3158 rc = ENA_PROXY(adapter, ena_com_get_eni_stats, &adapter->ena_dev, 3159 (struct ena_admin_eni_stats *)stats); 3160 rte_spinlock_unlock(&adapter->admin_lock); 3161 if (rc != 0) { 3162 if (rc == ENA_COM_UNSUPPORTED) { 3163 PMD_DRV_LOG(DEBUG, 3164 "Retrieving ENI metrics is not supported\n"); 3165 } else { 3166 PMD_DRV_LOG(WARNING, 3167 "Failed to get ENI metrics, rc: %d\n", rc); 3168 } 3169 return rc; 3170 } 3171 3172 return 0; 3173 } 3174 3175 /** 3176 * DPDK callback to retrieve names of extended device statistics 3177 * 3178 * @param dev 3179 * Pointer to Ethernet device structure. 3180 * @param[out] xstats_names 3181 * Buffer to insert names into. 3182 * @param n 3183 * Number of names. 3184 * 3185 * @return 3186 * Number of xstats names. 3187 */ 3188 static int ena_xstats_get_names(struct rte_eth_dev *dev, 3189 struct rte_eth_xstat_name *xstats_names, 3190 unsigned int n) 3191 { 3192 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3193 unsigned int stat, i, count = 0; 3194 3195 if (n < xstats_count || !xstats_names) 3196 return xstats_count; 3197 3198 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) 3199 strcpy(xstats_names[count].name, 3200 ena_stats_global_strings[stat].name); 3201 3202 for (stat = 0; stat < ENA_STATS_ARRAY_ENI; stat++, count++) 3203 strcpy(xstats_names[count].name, 3204 ena_stats_eni_strings[stat].name); 3205 3206 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) 3207 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) 3208 snprintf(xstats_names[count].name, 3209 sizeof(xstats_names[count].name), 3210 "rx_q%d_%s", i, 3211 ena_stats_rx_strings[stat].name); 3212 3213 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) 3214 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) 3215 snprintf(xstats_names[count].name, 3216 sizeof(xstats_names[count].name), 3217 "tx_q%d_%s", i, 3218 ena_stats_tx_strings[stat].name); 3219 3220 return xstats_count; 3221 } 3222 3223 /** 3224 * DPDK callback to retrieve names of extended device statistics for the given 3225 * ids. 3226 * 3227 * @param dev 3228 * Pointer to Ethernet device structure. 3229 * @param[out] xstats_names 3230 * Buffer to insert names into. 3231 * @param ids 3232 * IDs array for which the names should be retrieved. 3233 * @param size 3234 * Number of ids. 3235 * 3236 * @return 3237 * Positive value: number of xstats names. Negative value: error code. 3238 */ 3239 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 3240 const uint64_t *ids, 3241 struct rte_eth_xstat_name *xstats_names, 3242 unsigned int size) 3243 { 3244 uint64_t xstats_count = ena_xstats_calc_num(dev->data); 3245 uint64_t id, qid; 3246 unsigned int i; 3247 3248 if (xstats_names == NULL) 3249 return xstats_count; 3250 3251 for (i = 0; i < size; ++i) { 3252 id = ids[i]; 3253 if (id > xstats_count) { 3254 PMD_DRV_LOG(ERR, 3255 "ID value out of range: id=%" PRIu64 ", xstats_num=%" PRIu64 "\n", 3256 id, xstats_count); 3257 return -EINVAL; 3258 } 3259 3260 if (id < ENA_STATS_ARRAY_GLOBAL) { 3261 strcpy(xstats_names[i].name, 3262 ena_stats_global_strings[id].name); 3263 continue; 3264 } 3265 3266 id -= ENA_STATS_ARRAY_GLOBAL; 3267 if (id < ENA_STATS_ARRAY_ENI) { 3268 strcpy(xstats_names[i].name, 3269 ena_stats_eni_strings[id].name); 3270 continue; 3271 } 3272 3273 id -= ENA_STATS_ARRAY_ENI; 3274 if (id < ENA_STATS_ARRAY_RX) { 3275 qid = id / dev->data->nb_rx_queues; 3276 id %= dev->data->nb_rx_queues; 3277 snprintf(xstats_names[i].name, 3278 sizeof(xstats_names[i].name), 3279 "rx_q%" PRIu64 "d_%s", 3280 qid, ena_stats_rx_strings[id].name); 3281 continue; 3282 } 3283 3284 id -= ENA_STATS_ARRAY_RX; 3285 /* Although this condition is not needed, it was added for 3286 * compatibility if new xstat structure would be ever added. 3287 */ 3288 if (id < ENA_STATS_ARRAY_TX) { 3289 qid = id / dev->data->nb_tx_queues; 3290 id %= dev->data->nb_tx_queues; 3291 snprintf(xstats_names[i].name, 3292 sizeof(xstats_names[i].name), 3293 "tx_q%" PRIu64 "_%s", 3294 qid, ena_stats_tx_strings[id].name); 3295 continue; 3296 } 3297 } 3298 3299 return i; 3300 } 3301 3302 /** 3303 * DPDK callback to get extended device statistics. 3304 * 3305 * @param dev 3306 * Pointer to Ethernet device structure. 3307 * @param[out] stats 3308 * Stats table output buffer. 3309 * @param n 3310 * The size of the stats table. 3311 * 3312 * @return 3313 * Number of xstats on success, negative on failure. 3314 */ 3315 static int ena_xstats_get(struct rte_eth_dev *dev, 3316 struct rte_eth_xstat *xstats, 3317 unsigned int n) 3318 { 3319 struct ena_adapter *adapter = dev->data->dev_private; 3320 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3321 struct ena_stats_eni eni_stats; 3322 unsigned int stat, i, count = 0; 3323 int stat_offset; 3324 void *stats_begin; 3325 3326 if (n < xstats_count) 3327 return xstats_count; 3328 3329 if (!xstats) 3330 return 0; 3331 3332 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) { 3333 stat_offset = ena_stats_global_strings[stat].stat_offset; 3334 stats_begin = &adapter->dev_stats; 3335 3336 xstats[count].id = count; 3337 xstats[count].value = *((uint64_t *) 3338 ((char *)stats_begin + stat_offset)); 3339 } 3340 3341 /* Even if the function below fails, we should copy previous (or initial 3342 * values) to keep structure of rte_eth_xstat consistent. 3343 */ 3344 ena_copy_eni_stats(adapter, &eni_stats); 3345 for (stat = 0; stat < ENA_STATS_ARRAY_ENI; stat++, count++) { 3346 stat_offset = ena_stats_eni_strings[stat].stat_offset; 3347 stats_begin = &eni_stats; 3348 3349 xstats[count].id = count; 3350 xstats[count].value = *((uint64_t *) 3351 ((char *)stats_begin + stat_offset)); 3352 } 3353 3354 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) { 3355 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) { 3356 stat_offset = ena_stats_rx_strings[stat].stat_offset; 3357 stats_begin = &adapter->rx_ring[i].rx_stats; 3358 3359 xstats[count].id = count; 3360 xstats[count].value = *((uint64_t *) 3361 ((char *)stats_begin + stat_offset)); 3362 } 3363 } 3364 3365 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) { 3366 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) { 3367 stat_offset = ena_stats_tx_strings[stat].stat_offset; 3368 stats_begin = &adapter->tx_ring[i].rx_stats; 3369 3370 xstats[count].id = count; 3371 xstats[count].value = *((uint64_t *) 3372 ((char *)stats_begin + stat_offset)); 3373 } 3374 } 3375 3376 return count; 3377 } 3378 3379 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 3380 const uint64_t *ids, 3381 uint64_t *values, 3382 unsigned int n) 3383 { 3384 struct ena_adapter *adapter = dev->data->dev_private; 3385 struct ena_stats_eni eni_stats; 3386 uint64_t id; 3387 uint64_t rx_entries, tx_entries; 3388 unsigned int i; 3389 int qid; 3390 int valid = 0; 3391 bool was_eni_copied = false; 3392 3393 for (i = 0; i < n; ++i) { 3394 id = ids[i]; 3395 /* Check if id belongs to global statistics */ 3396 if (id < ENA_STATS_ARRAY_GLOBAL) { 3397 values[i] = *((uint64_t *)&adapter->dev_stats + id); 3398 ++valid; 3399 continue; 3400 } 3401 3402 /* Check if id belongs to ENI statistics */ 3403 id -= ENA_STATS_ARRAY_GLOBAL; 3404 if (id < ENA_STATS_ARRAY_ENI) { 3405 /* Avoid reading ENI stats multiple times in a single 3406 * function call, as it requires communication with the 3407 * admin queue. 3408 */ 3409 if (!was_eni_copied) { 3410 was_eni_copied = true; 3411 ena_copy_eni_stats(adapter, &eni_stats); 3412 } 3413 values[i] = *((uint64_t *)&eni_stats + id); 3414 ++valid; 3415 continue; 3416 } 3417 3418 /* Check if id belongs to rx queue statistics */ 3419 id -= ENA_STATS_ARRAY_ENI; 3420 rx_entries = ENA_STATS_ARRAY_RX * dev->data->nb_rx_queues; 3421 if (id < rx_entries) { 3422 qid = id % dev->data->nb_rx_queues; 3423 id /= dev->data->nb_rx_queues; 3424 values[i] = *((uint64_t *) 3425 &adapter->rx_ring[qid].rx_stats + id); 3426 ++valid; 3427 continue; 3428 } 3429 /* Check if id belongs to rx queue statistics */ 3430 id -= rx_entries; 3431 tx_entries = ENA_STATS_ARRAY_TX * dev->data->nb_tx_queues; 3432 if (id < tx_entries) { 3433 qid = id % dev->data->nb_tx_queues; 3434 id /= dev->data->nb_tx_queues; 3435 values[i] = *((uint64_t *) 3436 &adapter->tx_ring[qid].tx_stats + id); 3437 ++valid; 3438 continue; 3439 } 3440 } 3441 3442 return valid; 3443 } 3444 3445 static int ena_process_uint_devarg(const char *key, 3446 const char *value, 3447 void *opaque) 3448 { 3449 struct ena_adapter *adapter = opaque; 3450 char *str_end; 3451 uint64_t uint_value; 3452 3453 uint_value = strtoull(value, &str_end, 10); 3454 if (value == str_end) { 3455 PMD_INIT_LOG(ERR, 3456 "Invalid value for key '%s'. Only uint values are accepted.\n", 3457 key); 3458 return -EINVAL; 3459 } 3460 3461 if (strcmp(key, ENA_DEVARG_MISS_TXC_TO) == 0) { 3462 if (uint_value > ENA_MAX_TX_TIMEOUT_SECONDS) { 3463 PMD_INIT_LOG(ERR, 3464 "Tx timeout too high: %" PRIu64 " sec. Maximum allowed: %d sec.\n", 3465 uint_value, ENA_MAX_TX_TIMEOUT_SECONDS); 3466 return -EINVAL; 3467 } else if (uint_value == 0) { 3468 PMD_INIT_LOG(INFO, 3469 "Check for missing Tx completions has been disabled.\n"); 3470 adapter->missing_tx_completion_to = 3471 ENA_HW_HINTS_NO_TIMEOUT; 3472 } else { 3473 PMD_INIT_LOG(INFO, 3474 "Tx packet completion timeout set to %" PRIu64 " seconds.\n", 3475 uint_value); 3476 adapter->missing_tx_completion_to = 3477 uint_value * rte_get_timer_hz(); 3478 } 3479 } 3480 3481 return 0; 3482 } 3483 3484 static int ena_process_bool_devarg(const char *key, 3485 const char *value, 3486 void *opaque) 3487 { 3488 struct ena_adapter *adapter = opaque; 3489 bool bool_value; 3490 3491 /* Parse the value. */ 3492 if (strcmp(value, "1") == 0) { 3493 bool_value = true; 3494 } else if (strcmp(value, "0") == 0) { 3495 bool_value = false; 3496 } else { 3497 PMD_INIT_LOG(ERR, 3498 "Invalid value: '%s' for key '%s'. Accepted: '0' or '1'\n", 3499 value, key); 3500 return -EINVAL; 3501 } 3502 3503 /* Now, assign it to the proper adapter field. */ 3504 if (strcmp(key, ENA_DEVARG_LARGE_LLQ_HDR) == 0) 3505 adapter->use_large_llq_hdr = bool_value; 3506 3507 return 0; 3508 } 3509 3510 static int ena_parse_devargs(struct ena_adapter *adapter, 3511 struct rte_devargs *devargs) 3512 { 3513 static const char * const allowed_args[] = { 3514 ENA_DEVARG_LARGE_LLQ_HDR, 3515 ENA_DEVARG_MISS_TXC_TO, 3516 NULL, 3517 }; 3518 struct rte_kvargs *kvlist; 3519 int rc; 3520 3521 if (devargs == NULL) 3522 return 0; 3523 3524 kvlist = rte_kvargs_parse(devargs->args, allowed_args); 3525 if (kvlist == NULL) { 3526 PMD_INIT_LOG(ERR, "Invalid device arguments: %s\n", 3527 devargs->args); 3528 return -EINVAL; 3529 } 3530 3531 rc = rte_kvargs_process(kvlist, ENA_DEVARG_LARGE_LLQ_HDR, 3532 ena_process_bool_devarg, adapter); 3533 if (rc != 0) 3534 goto exit; 3535 rc = rte_kvargs_process(kvlist, ENA_DEVARG_MISS_TXC_TO, 3536 ena_process_uint_devarg, adapter); 3537 3538 exit: 3539 rte_kvargs_free(kvlist); 3540 3541 return rc; 3542 } 3543 3544 static int ena_setup_rx_intr(struct rte_eth_dev *dev) 3545 { 3546 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 3547 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 3548 int rc; 3549 uint16_t vectors_nb, i; 3550 bool rx_intr_requested = dev->data->dev_conf.intr_conf.rxq; 3551 3552 if (!rx_intr_requested) 3553 return 0; 3554 3555 if (!rte_intr_cap_multiple(intr_handle)) { 3556 PMD_DRV_LOG(ERR, 3557 "Rx interrupt requested, but it isn't supported by the PCI driver\n"); 3558 return -ENOTSUP; 3559 } 3560 3561 /* Disable interrupt mapping before the configuration starts. */ 3562 rte_intr_disable(intr_handle); 3563 3564 /* Verify if there are enough vectors available. */ 3565 vectors_nb = dev->data->nb_rx_queues; 3566 if (vectors_nb > RTE_MAX_RXTX_INTR_VEC_ID) { 3567 PMD_DRV_LOG(ERR, 3568 "Too many Rx interrupts requested, maximum number: %d\n", 3569 RTE_MAX_RXTX_INTR_VEC_ID); 3570 rc = -ENOTSUP; 3571 goto enable_intr; 3572 } 3573 3574 /* Allocate the vector list */ 3575 if (rte_intr_vec_list_alloc(intr_handle, "intr_vec", 3576 dev->data->nb_rx_queues)) { 3577 PMD_DRV_LOG(ERR, 3578 "Failed to allocate interrupt vector for %d queues\n", 3579 dev->data->nb_rx_queues); 3580 rc = -ENOMEM; 3581 goto enable_intr; 3582 } 3583 3584 rc = rte_intr_efd_enable(intr_handle, vectors_nb); 3585 if (rc != 0) 3586 goto free_intr_vec; 3587 3588 if (!rte_intr_allow_others(intr_handle)) { 3589 PMD_DRV_LOG(ERR, 3590 "Not enough interrupts available to use both ENA Admin and Rx interrupts\n"); 3591 goto disable_intr_efd; 3592 } 3593 3594 for (i = 0; i < vectors_nb; ++i) 3595 if (rte_intr_vec_list_index_set(intr_handle, i, 3596 RTE_INTR_VEC_RXTX_OFFSET + i)) 3597 goto disable_intr_efd; 3598 3599 rte_intr_enable(intr_handle); 3600 return 0; 3601 3602 disable_intr_efd: 3603 rte_intr_efd_disable(intr_handle); 3604 free_intr_vec: 3605 rte_intr_vec_list_free(intr_handle); 3606 enable_intr: 3607 rte_intr_enable(intr_handle); 3608 return rc; 3609 } 3610 3611 static void ena_rx_queue_intr_set(struct rte_eth_dev *dev, 3612 uint16_t queue_id, 3613 bool unmask) 3614 { 3615 struct ena_adapter *adapter = dev->data->dev_private; 3616 struct ena_ring *rxq = &adapter->rx_ring[queue_id]; 3617 struct ena_eth_io_intr_reg intr_reg; 3618 3619 ena_com_update_intr_reg(&intr_reg, 0, 0, unmask); 3620 ena_com_unmask_intr(rxq->ena_com_io_cq, &intr_reg); 3621 } 3622 3623 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 3624 uint16_t queue_id) 3625 { 3626 ena_rx_queue_intr_set(dev, queue_id, true); 3627 3628 return 0; 3629 } 3630 3631 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 3632 uint16_t queue_id) 3633 { 3634 ena_rx_queue_intr_set(dev, queue_id, false); 3635 3636 return 0; 3637 } 3638 3639 static int ena_configure_aenq(struct ena_adapter *adapter) 3640 { 3641 uint32_t aenq_groups = adapter->all_aenq_groups; 3642 int rc; 3643 3644 /* All_aenq_groups holds all AENQ functions supported by the device and 3645 * the HW, so at first we need to be sure the LSC request is valid. 3646 */ 3647 if (adapter->edev_data->dev_conf.intr_conf.lsc != 0) { 3648 if (!(aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) { 3649 PMD_DRV_LOG(ERR, 3650 "LSC requested, but it's not supported by the AENQ\n"); 3651 return -EINVAL; 3652 } 3653 } else { 3654 /* If LSC wasn't enabled by the app, let's enable all supported 3655 * AENQ procedures except the LSC. 3656 */ 3657 aenq_groups &= ~BIT(ENA_ADMIN_LINK_CHANGE); 3658 } 3659 3660 rc = ena_com_set_aenq_config(&adapter->ena_dev, aenq_groups); 3661 if (rc != 0) { 3662 PMD_DRV_LOG(ERR, "Cannot configure AENQ groups, rc=%d\n", rc); 3663 return rc; 3664 } 3665 3666 adapter->active_aenq_groups = aenq_groups; 3667 3668 return 0; 3669 } 3670 3671 int ena_mp_indirect_table_set(struct ena_adapter *adapter) 3672 { 3673 return ENA_PROXY(adapter, ena_com_indirect_table_set, &adapter->ena_dev); 3674 } 3675 3676 int ena_mp_indirect_table_get(struct ena_adapter *adapter, 3677 uint32_t *indirect_table) 3678 { 3679 return ENA_PROXY(adapter, ena_com_indirect_table_get, &adapter->ena_dev, 3680 indirect_table); 3681 } 3682 3683 /********************************************************************* 3684 * ena_plat_dpdk.h functions implementations 3685 *********************************************************************/ 3686 3687 const struct rte_memzone * 3688 ena_mem_alloc_coherent(struct rte_eth_dev_data *data, size_t size, 3689 int socket_id, unsigned int alignment, void **virt_addr, 3690 dma_addr_t *phys_addr) 3691 { 3692 char z_name[RTE_MEMZONE_NAMESIZE]; 3693 struct ena_adapter *adapter = data->dev_private; 3694 const struct rte_memzone *memzone; 3695 int rc; 3696 3697 rc = snprintf(z_name, RTE_MEMZONE_NAMESIZE, "ena_p%d_mz%" PRIu64 "", 3698 data->port_id, adapter->memzone_cnt); 3699 if (rc >= RTE_MEMZONE_NAMESIZE) { 3700 PMD_DRV_LOG(ERR, 3701 "Name for the ena_com memzone is too long. Port: %d, mz_num: %" PRIu64 "\n", 3702 data->port_id, adapter->memzone_cnt); 3703 goto error; 3704 } 3705 adapter->memzone_cnt++; 3706 3707 memzone = rte_memzone_reserve_aligned(z_name, size, socket_id, 3708 RTE_MEMZONE_IOVA_CONTIG, alignment); 3709 if (memzone == NULL) { 3710 PMD_DRV_LOG(ERR, "Failed to allocate ena_com memzone: %s\n", 3711 z_name); 3712 goto error; 3713 } 3714 3715 memset(memzone->addr, 0, size); 3716 *virt_addr = memzone->addr; 3717 *phys_addr = memzone->iova; 3718 3719 return memzone; 3720 3721 error: 3722 *virt_addr = NULL; 3723 *phys_addr = 0; 3724 3725 return NULL; 3726 } 3727 3728 3729 /********************************************************************* 3730 * PMD configuration 3731 *********************************************************************/ 3732 static int eth_ena_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 3733 struct rte_pci_device *pci_dev) 3734 { 3735 return rte_eth_dev_pci_generic_probe(pci_dev, 3736 sizeof(struct ena_adapter), eth_ena_dev_init); 3737 } 3738 3739 static int eth_ena_pci_remove(struct rte_pci_device *pci_dev) 3740 { 3741 return rte_eth_dev_pci_generic_remove(pci_dev, eth_ena_dev_uninit); 3742 } 3743 3744 static struct rte_pci_driver rte_ena_pmd = { 3745 .id_table = pci_id_ena_map, 3746 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | 3747 RTE_PCI_DRV_WC_ACTIVATE, 3748 .probe = eth_ena_pci_probe, 3749 .remove = eth_ena_pci_remove, 3750 }; 3751 3752 RTE_PMD_REGISTER_PCI(net_ena, rte_ena_pmd); 3753 RTE_PMD_REGISTER_PCI_TABLE(net_ena, pci_id_ena_map); 3754 RTE_PMD_REGISTER_KMOD_DEP(net_ena, "* igb_uio | uio_pci_generic | vfio-pci"); 3755 RTE_PMD_REGISTER_PARAM_STRING(net_ena, ENA_DEVARG_LARGE_LLQ_HDR "=<0|1>"); 3756 RTE_LOG_REGISTER_SUFFIX(ena_logtype_init, init, NOTICE); 3757 RTE_LOG_REGISTER_SUFFIX(ena_logtype_driver, driver, NOTICE); 3758 #ifdef RTE_ETHDEV_DEBUG_RX 3759 RTE_LOG_REGISTER_SUFFIX(ena_logtype_rx, rx, DEBUG); 3760 #endif 3761 #ifdef RTE_ETHDEV_DEBUG_TX 3762 RTE_LOG_REGISTER_SUFFIX(ena_logtype_tx, tx, DEBUG); 3763 #endif 3764 RTE_LOG_REGISTER_SUFFIX(ena_logtype_com, com, WARNING); 3765 3766 /****************************************************************************** 3767 ******************************** AENQ Handlers ******************************* 3768 *****************************************************************************/ 3769 static void ena_update_on_link_change(void *adapter_data, 3770 struct ena_admin_aenq_entry *aenq_e) 3771 { 3772 struct rte_eth_dev *eth_dev = adapter_data; 3773 struct ena_adapter *adapter = eth_dev->data->dev_private; 3774 struct ena_admin_aenq_link_change_desc *aenq_link_desc; 3775 uint32_t status; 3776 3777 aenq_link_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 3778 3779 status = get_ena_admin_aenq_link_change_desc_link_status(aenq_link_desc); 3780 adapter->link_status = status; 3781 3782 ena_link_update(eth_dev, 0); 3783 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 3784 } 3785 3786 static void ena_notification(void *adapter_data, 3787 struct ena_admin_aenq_entry *aenq_e) 3788 { 3789 struct rte_eth_dev *eth_dev = adapter_data; 3790 struct ena_adapter *adapter = eth_dev->data->dev_private; 3791 struct ena_admin_ena_hw_hints *hints; 3792 3793 if (aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION) 3794 PMD_DRV_LOG(WARNING, "Invalid AENQ group: %x. Expected: %x\n", 3795 aenq_e->aenq_common_desc.group, 3796 ENA_ADMIN_NOTIFICATION); 3797 3798 switch (aenq_e->aenq_common_desc.syndrome) { 3799 case ENA_ADMIN_UPDATE_HINTS: 3800 hints = (struct ena_admin_ena_hw_hints *) 3801 (&aenq_e->inline_data_w4); 3802 ena_update_hints(adapter, hints); 3803 break; 3804 default: 3805 PMD_DRV_LOG(ERR, "Invalid AENQ notification link state: %d\n", 3806 aenq_e->aenq_common_desc.syndrome); 3807 } 3808 } 3809 3810 static void ena_keep_alive(void *adapter_data, 3811 __rte_unused struct ena_admin_aenq_entry *aenq_e) 3812 { 3813 struct rte_eth_dev *eth_dev = adapter_data; 3814 struct ena_adapter *adapter = eth_dev->data->dev_private; 3815 struct ena_admin_aenq_keep_alive_desc *desc; 3816 uint64_t rx_drops; 3817 uint64_t tx_drops; 3818 3819 adapter->timestamp_wd = rte_get_timer_cycles(); 3820 3821 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 3822 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 3823 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 3824 3825 adapter->drv_stats->rx_drops = rx_drops; 3826 adapter->dev_stats.tx_drops = tx_drops; 3827 } 3828 3829 /** 3830 * This handler will called for unknown event group or unimplemented handlers 3831 **/ 3832 static void unimplemented_aenq_handler(__rte_unused void *data, 3833 __rte_unused struct ena_admin_aenq_entry *aenq_e) 3834 { 3835 PMD_DRV_LOG(ERR, 3836 "Unknown event was received or event with unimplemented handler\n"); 3837 } 3838 3839 static struct ena_aenq_handlers aenq_handlers = { 3840 .handlers = { 3841 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 3842 [ENA_ADMIN_NOTIFICATION] = ena_notification, 3843 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive 3844 }, 3845 .unimplemented_handler = unimplemented_aenq_handler 3846 }; 3847 3848 /********************************************************************* 3849 * Multi-Process communication request handling (in primary) 3850 *********************************************************************/ 3851 static int 3852 ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer) 3853 { 3854 const struct ena_mp_body *req = 3855 (const struct ena_mp_body *)mp_msg->param; 3856 struct ena_adapter *adapter; 3857 struct ena_com_dev *ena_dev; 3858 struct ena_mp_body *rsp; 3859 struct rte_mp_msg mp_rsp; 3860 struct rte_eth_dev *dev; 3861 int res = 0; 3862 3863 rsp = (struct ena_mp_body *)&mp_rsp.param; 3864 mp_msg_init(&mp_rsp, req->type, req->port_id); 3865 3866 if (!rte_eth_dev_is_valid_port(req->port_id)) { 3867 rte_errno = ENODEV; 3868 res = -rte_errno; 3869 PMD_DRV_LOG(ERR, "Unknown port %d in request %d\n", 3870 req->port_id, req->type); 3871 goto end; 3872 } 3873 dev = &rte_eth_devices[req->port_id]; 3874 adapter = dev->data->dev_private; 3875 ena_dev = &adapter->ena_dev; 3876 3877 switch (req->type) { 3878 case ENA_MP_DEV_STATS_GET: 3879 res = ena_com_get_dev_basic_stats(ena_dev, 3880 &adapter->basic_stats); 3881 break; 3882 case ENA_MP_ENI_STATS_GET: 3883 res = ena_com_get_eni_stats(ena_dev, 3884 (struct ena_admin_eni_stats *)&adapter->eni_stats); 3885 break; 3886 case ENA_MP_MTU_SET: 3887 res = ena_com_set_dev_mtu(ena_dev, req->args.mtu); 3888 break; 3889 case ENA_MP_IND_TBL_GET: 3890 res = ena_com_indirect_table_get(ena_dev, 3891 adapter->indirect_table); 3892 break; 3893 case ENA_MP_IND_TBL_SET: 3894 res = ena_com_indirect_table_set(ena_dev); 3895 break; 3896 default: 3897 PMD_DRV_LOG(ERR, "Unknown request type %d\n", req->type); 3898 res = -EINVAL; 3899 break; 3900 } 3901 3902 end: 3903 /* Save processing result in the reply */ 3904 rsp->result = res; 3905 /* Return just IPC processing status */ 3906 return rte_mp_reply(&mp_rsp, peer); 3907 } 3908