1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 3 * All rights reserved. 4 */ 5 6 #include <rte_string_fns.h> 7 #include <rte_errno.h> 8 #include <rte_version.h> 9 #include <rte_net.h> 10 #include <rte_kvargs.h> 11 12 #include "ena_ethdev.h" 13 #include "ena_logs.h" 14 #include "ena_platform.h" 15 #include "ena_com.h" 16 #include "ena_eth_com.h" 17 18 #include <ena_common_defs.h> 19 #include <ena_regs_defs.h> 20 #include <ena_admin_defs.h> 21 #include <ena_eth_io_defs.h> 22 23 #define DRV_MODULE_VER_MAJOR 2 24 #define DRV_MODULE_VER_MINOR 5 25 #define DRV_MODULE_VER_SUBMINOR 0 26 27 #define __MERGE_64B_H_L(h, l) (((uint64_t)h << 32) | l) 28 29 #define GET_L4_HDR_LEN(mbuf) \ 30 ((rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, \ 31 mbuf->l3_len + mbuf->l2_len)->data_off) >> 4) 32 33 #define ETH_GSTRING_LEN 32 34 35 #define ARRAY_SIZE(x) RTE_DIM(x) 36 37 #define ENA_MIN_RING_DESC 128 38 39 #define ENA_PTYPE_HAS_HASH (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP) 40 41 struct ena_stats { 42 char name[ETH_GSTRING_LEN]; 43 int stat_offset; 44 }; 45 46 #define ENA_STAT_ENTRY(stat, stat_type) { \ 47 .name = #stat, \ 48 .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ 49 } 50 51 #define ENA_STAT_RX_ENTRY(stat) \ 52 ENA_STAT_ENTRY(stat, rx) 53 54 #define ENA_STAT_TX_ENTRY(stat) \ 55 ENA_STAT_ENTRY(stat, tx) 56 57 #define ENA_STAT_ENI_ENTRY(stat) \ 58 ENA_STAT_ENTRY(stat, eni) 59 60 #define ENA_STAT_GLOBAL_ENTRY(stat) \ 61 ENA_STAT_ENTRY(stat, dev) 62 63 /* Device arguments */ 64 #define ENA_DEVARG_LARGE_LLQ_HDR "large_llq_hdr" 65 /* Timeout in seconds after which a single uncompleted Tx packet should be 66 * considered as a missing. 67 */ 68 #define ENA_DEVARG_MISS_TXC_TO "miss_txc_to" 69 70 /* 71 * Each rte_memzone should have unique name. 72 * To satisfy it, count number of allocation and add it to name. 73 */ 74 rte_atomic64_t ena_alloc_cnt; 75 76 static const struct ena_stats ena_stats_global_strings[] = { 77 ENA_STAT_GLOBAL_ENTRY(wd_expired), 78 ENA_STAT_GLOBAL_ENTRY(dev_start), 79 ENA_STAT_GLOBAL_ENTRY(dev_stop), 80 ENA_STAT_GLOBAL_ENTRY(tx_drops), 81 }; 82 83 static const struct ena_stats ena_stats_eni_strings[] = { 84 ENA_STAT_ENI_ENTRY(bw_in_allowance_exceeded), 85 ENA_STAT_ENI_ENTRY(bw_out_allowance_exceeded), 86 ENA_STAT_ENI_ENTRY(pps_allowance_exceeded), 87 ENA_STAT_ENI_ENTRY(conntrack_allowance_exceeded), 88 ENA_STAT_ENI_ENTRY(linklocal_allowance_exceeded), 89 }; 90 91 static const struct ena_stats ena_stats_tx_strings[] = { 92 ENA_STAT_TX_ENTRY(cnt), 93 ENA_STAT_TX_ENTRY(bytes), 94 ENA_STAT_TX_ENTRY(prepare_ctx_err), 95 ENA_STAT_TX_ENTRY(tx_poll), 96 ENA_STAT_TX_ENTRY(doorbells), 97 ENA_STAT_TX_ENTRY(bad_req_id), 98 ENA_STAT_TX_ENTRY(available_desc), 99 ENA_STAT_TX_ENTRY(missed_tx), 100 }; 101 102 static const struct ena_stats ena_stats_rx_strings[] = { 103 ENA_STAT_RX_ENTRY(cnt), 104 ENA_STAT_RX_ENTRY(bytes), 105 ENA_STAT_RX_ENTRY(refill_partial), 106 ENA_STAT_RX_ENTRY(l3_csum_bad), 107 ENA_STAT_RX_ENTRY(l4_csum_bad), 108 ENA_STAT_RX_ENTRY(l4_csum_good), 109 ENA_STAT_RX_ENTRY(mbuf_alloc_fail), 110 ENA_STAT_RX_ENTRY(bad_desc_num), 111 ENA_STAT_RX_ENTRY(bad_req_id), 112 }; 113 114 #define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) 115 #define ENA_STATS_ARRAY_ENI ARRAY_SIZE(ena_stats_eni_strings) 116 #define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) 117 #define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) 118 119 #define QUEUE_OFFLOADS (RTE_ETH_TX_OFFLOAD_TCP_CKSUM |\ 120 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |\ 121 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |\ 122 RTE_ETH_TX_OFFLOAD_TCP_TSO) 123 #define MBUF_OFFLOADS (RTE_MBUF_F_TX_L4_MASK |\ 124 RTE_MBUF_F_TX_IP_CKSUM |\ 125 RTE_MBUF_F_TX_TCP_SEG) 126 127 /** Vendor ID used by Amazon devices */ 128 #define PCI_VENDOR_ID_AMAZON 0x1D0F 129 /** Amazon devices */ 130 #define PCI_DEVICE_ID_ENA_VF 0xEC20 131 #define PCI_DEVICE_ID_ENA_VF_RSERV0 0xEC21 132 133 #define ENA_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_L4_MASK | \ 134 RTE_MBUF_F_TX_IPV6 | \ 135 RTE_MBUF_F_TX_IPV4 | \ 136 RTE_MBUF_F_TX_IP_CKSUM | \ 137 RTE_MBUF_F_TX_TCP_SEG) 138 139 #define ENA_TX_OFFLOAD_NOTSUP_MASK \ 140 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) 141 142 /** HW specific offloads capabilities. */ 143 /* IPv4 checksum offload. */ 144 #define ENA_L3_IPV4_CSUM 0x0001 145 /* TCP/UDP checksum offload for IPv4 packets. */ 146 #define ENA_L4_IPV4_CSUM 0x0002 147 /* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */ 148 #define ENA_L4_IPV4_CSUM_PARTIAL 0x0004 149 /* TCP/UDP checksum offload for IPv6 packets. */ 150 #define ENA_L4_IPV6_CSUM 0x0008 151 /* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */ 152 #define ENA_L4_IPV6_CSUM_PARTIAL 0x0010 153 /* TSO support for IPv4 packets. */ 154 #define ENA_IPV4_TSO 0x0020 155 156 /* Device supports setting RSS hash. */ 157 #define ENA_RX_RSS_HASH 0x0040 158 159 static const struct rte_pci_id pci_id_ena_map[] = { 160 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, 161 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) }, 162 { .device_id = 0 }, 163 }; 164 165 static struct ena_aenq_handlers aenq_handlers; 166 167 static int ena_device_init(struct ena_adapter *adapter, 168 struct rte_pci_device *pdev, 169 struct ena_com_dev_get_features_ctx *get_feat_ctx); 170 static int ena_dev_configure(struct rte_eth_dev *dev); 171 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 172 struct ena_tx_buffer *tx_info, 173 struct rte_mbuf *mbuf, 174 void **push_header, 175 uint16_t *header_len); 176 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf); 177 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt); 178 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 179 uint16_t nb_pkts); 180 static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 181 uint16_t nb_pkts); 182 static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 183 uint16_t nb_desc, unsigned int socket_id, 184 const struct rte_eth_txconf *tx_conf); 185 static int ena_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 186 uint16_t nb_desc, unsigned int socket_id, 187 const struct rte_eth_rxconf *rx_conf, 188 struct rte_mempool *mp); 189 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len); 190 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 191 struct ena_com_rx_buf_info *ena_bufs, 192 uint32_t descs, 193 uint16_t *next_to_clean, 194 uint8_t offset); 195 static uint16_t eth_ena_recv_pkts(void *rx_queue, 196 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 197 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 198 struct rte_mbuf *mbuf, uint16_t id); 199 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count); 200 static void ena_init_rings(struct ena_adapter *adapter, 201 bool disable_meta_caching); 202 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); 203 static int ena_start(struct rte_eth_dev *dev); 204 static int ena_stop(struct rte_eth_dev *dev); 205 static int ena_close(struct rte_eth_dev *dev); 206 static int ena_dev_reset(struct rte_eth_dev *dev); 207 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); 208 static void ena_rx_queue_release_all(struct rte_eth_dev *dev); 209 static void ena_tx_queue_release_all(struct rte_eth_dev *dev); 210 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 211 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 212 static void ena_rx_queue_release_bufs(struct ena_ring *ring); 213 static void ena_tx_queue_release_bufs(struct ena_ring *ring); 214 static int ena_link_update(struct rte_eth_dev *dev, 215 int wait_to_complete); 216 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring); 217 static void ena_queue_stop(struct ena_ring *ring); 218 static void ena_queue_stop_all(struct rte_eth_dev *dev, 219 enum ena_ring_type ring_type); 220 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring); 221 static int ena_queue_start_all(struct rte_eth_dev *dev, 222 enum ena_ring_type ring_type); 223 static void ena_stats_restart(struct rte_eth_dev *dev); 224 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter); 225 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter); 226 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter); 227 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter); 228 static int ena_infos_get(struct rte_eth_dev *dev, 229 struct rte_eth_dev_info *dev_info); 230 static void ena_interrupt_handler_rte(void *cb_arg); 231 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg); 232 static void ena_destroy_device(struct rte_eth_dev *eth_dev); 233 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev); 234 static int ena_xstats_get_names(struct rte_eth_dev *dev, 235 struct rte_eth_xstat_name *xstats_names, 236 unsigned int n); 237 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 238 const uint64_t *ids, 239 struct rte_eth_xstat_name *xstats_names, 240 unsigned int size); 241 static int ena_xstats_get(struct rte_eth_dev *dev, 242 struct rte_eth_xstat *stats, 243 unsigned int n); 244 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 245 const uint64_t *ids, 246 uint64_t *values, 247 unsigned int n); 248 static int ena_process_bool_devarg(const char *key, 249 const char *value, 250 void *opaque); 251 static int ena_parse_devargs(struct ena_adapter *adapter, 252 struct rte_devargs *devargs); 253 static int ena_copy_eni_stats(struct ena_adapter *adapter, 254 struct ena_stats_eni *stats); 255 static int ena_setup_rx_intr(struct rte_eth_dev *dev); 256 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 257 uint16_t queue_id); 258 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 259 uint16_t queue_id); 260 static int ena_configure_aenq(struct ena_adapter *adapter); 261 static int ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, 262 const void *peer); 263 264 static const struct eth_dev_ops ena_dev_ops = { 265 .dev_configure = ena_dev_configure, 266 .dev_infos_get = ena_infos_get, 267 .rx_queue_setup = ena_rx_queue_setup, 268 .tx_queue_setup = ena_tx_queue_setup, 269 .dev_start = ena_start, 270 .dev_stop = ena_stop, 271 .link_update = ena_link_update, 272 .stats_get = ena_stats_get, 273 .xstats_get_names = ena_xstats_get_names, 274 .xstats_get_names_by_id = ena_xstats_get_names_by_id, 275 .xstats_get = ena_xstats_get, 276 .xstats_get_by_id = ena_xstats_get_by_id, 277 .mtu_set = ena_mtu_set, 278 .rx_queue_release = ena_rx_queue_release, 279 .tx_queue_release = ena_tx_queue_release, 280 .dev_close = ena_close, 281 .dev_reset = ena_dev_reset, 282 .reta_update = ena_rss_reta_update, 283 .reta_query = ena_rss_reta_query, 284 .rx_queue_intr_enable = ena_rx_queue_intr_enable, 285 .rx_queue_intr_disable = ena_rx_queue_intr_disable, 286 .rss_hash_update = ena_rss_hash_update, 287 .rss_hash_conf_get = ena_rss_hash_conf_get, 288 .tx_done_cleanup = ena_tx_cleanup, 289 }; 290 291 /********************************************************************* 292 * Multi-Process communication bits 293 *********************************************************************/ 294 /* rte_mp IPC message name */ 295 #define ENA_MP_NAME "net_ena_mp" 296 /* Request timeout in seconds */ 297 #define ENA_MP_REQ_TMO 5 298 299 /** Proxy request type */ 300 enum ena_mp_req { 301 ENA_MP_DEV_STATS_GET, 302 ENA_MP_ENI_STATS_GET, 303 ENA_MP_MTU_SET, 304 ENA_MP_IND_TBL_GET, 305 ENA_MP_IND_TBL_SET 306 }; 307 308 /** Proxy message body. Shared between requests and responses. */ 309 struct ena_mp_body { 310 /* Message type */ 311 enum ena_mp_req type; 312 int port_id; 313 /* Processing result. Set in replies. 0 if message succeeded, negative 314 * error code otherwise. 315 */ 316 int result; 317 union { 318 int mtu; /* For ENA_MP_MTU_SET */ 319 } args; 320 }; 321 322 /** 323 * Initialize IPC message. 324 * 325 * @param[out] msg 326 * Pointer to the message to initialize. 327 * @param[in] type 328 * Message type. 329 * @param[in] port_id 330 * Port ID of target device. 331 * 332 */ 333 static void 334 mp_msg_init(struct rte_mp_msg *msg, enum ena_mp_req type, int port_id) 335 { 336 struct ena_mp_body *body = (struct ena_mp_body *)&msg->param; 337 338 memset(msg, 0, sizeof(*msg)); 339 strlcpy(msg->name, ENA_MP_NAME, sizeof(msg->name)); 340 msg->len_param = sizeof(*body); 341 body->type = type; 342 body->port_id = port_id; 343 } 344 345 /********************************************************************* 346 * Multi-Process communication PMD API 347 *********************************************************************/ 348 /** 349 * Define proxy request descriptor 350 * 351 * Used to define all structures and functions required for proxying a given 352 * function to the primary process including the code to perform to prepare the 353 * request and process the response. 354 * 355 * @param[in] f 356 * Name of the function to proxy 357 * @param[in] t 358 * Message type to use 359 * @param[in] prep 360 * Body of a function to prepare the request in form of a statement 361 * expression. It is passed all the original function arguments along with two 362 * extra ones: 363 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 364 * - struct ena_mp_body *req - body of a request to prepare. 365 * @param[in] proc 366 * Body of a function to process the response in form of a statement 367 * expression. It is passed all the original function arguments along with two 368 * extra ones: 369 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 370 * - struct ena_mp_body *rsp - body of a response to process. 371 * @param ... 372 * Proxied function's arguments 373 * 374 * @note Inside prep and proc any parameters which aren't used should be marked 375 * as such (with ENA_TOUCH or __rte_unused). 376 */ 377 #define ENA_PROXY_DESC(f, t, prep, proc, ...) \ 378 static const enum ena_mp_req mp_type_ ## f = t; \ 379 static const char *mp_name_ ## f = #t; \ 380 static void mp_prep_ ## f(struct ena_adapter *adapter, \ 381 struct ena_mp_body *req, \ 382 __VA_ARGS__) \ 383 { \ 384 prep; \ 385 } \ 386 static void mp_proc_ ## f(struct ena_adapter *adapter, \ 387 struct ena_mp_body *rsp, \ 388 __VA_ARGS__) \ 389 { \ 390 proc; \ 391 } 392 393 /** 394 * Proxy wrapper for calling primary functions in a secondary process. 395 * 396 * Depending on whether called in primary or secondary process, calls the 397 * @p func directly or proxies the call to the primary process via rte_mp IPC. 398 * This macro requires a proxy request descriptor to be defined for @p func 399 * using ENA_PROXY_DESC() macro. 400 * 401 * @param[in/out] a 402 * Device PMD data. Used for sending the message and sharing message results 403 * between primary and secondary. 404 * @param[in] f 405 * Function to proxy. 406 * @param ... 407 * Arguments of @p func. 408 * 409 * @return 410 * - 0: Processing succeeded and response handler was called. 411 * - -EPERM: IPC is unavailable on this platform. This means only primary 412 * process may call the proxied function. 413 * - -EIO: IPC returned error on request send. Inspect rte_errno detailed 414 * error code. 415 * - Negative error code from the proxied function. 416 * 417 * @note This mechanism is geared towards control-path tasks. Avoid calling it 418 * in fast-path unless unbound delays are allowed. This is due to the IPC 419 * mechanism itself (socket based). 420 * @note Due to IPC parameter size limitations the proxy logic shares call 421 * results through the struct ena_adapter shared memory. This makes the 422 * proxy mechanism strictly single-threaded. Therefore be sure to make all 423 * calls to the same proxied function under the same lock. 424 */ 425 #define ENA_PROXY(a, f, ...) \ 426 ({ \ 427 struct ena_adapter *_a = (a); \ 428 struct timespec ts = { .tv_sec = ENA_MP_REQ_TMO }; \ 429 struct ena_mp_body *req, *rsp; \ 430 struct rte_mp_reply mp_rep; \ 431 struct rte_mp_msg mp_req; \ 432 int ret; \ 433 \ 434 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { \ 435 ret = f(__VA_ARGS__); \ 436 } else { \ 437 /* Prepare and send request */ \ 438 req = (struct ena_mp_body *)&mp_req.param; \ 439 mp_msg_init(&mp_req, mp_type_ ## f, _a->edev_data->port_id); \ 440 mp_prep_ ## f(_a, req, ## __VA_ARGS__); \ 441 \ 442 ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); \ 443 if (likely(!ret)) { \ 444 RTE_ASSERT(mp_rep.nb_received == 1); \ 445 rsp = (struct ena_mp_body *)&mp_rep.msgs[0].param; \ 446 ret = rsp->result; \ 447 if (ret == 0) { \ 448 mp_proc_##f(_a, rsp, ## __VA_ARGS__); \ 449 } else { \ 450 PMD_DRV_LOG(ERR, \ 451 "%s returned error: %d\n", \ 452 mp_name_ ## f, rsp->result);\ 453 } \ 454 free(mp_rep.msgs); \ 455 } else if (rte_errno == ENOTSUP) { \ 456 PMD_DRV_LOG(ERR, \ 457 "No IPC, can't proxy to primary\n");\ 458 ret = -rte_errno; \ 459 } else { \ 460 PMD_DRV_LOG(ERR, "Request %s failed: %s\n", \ 461 mp_name_ ## f, \ 462 rte_strerror(rte_errno)); \ 463 ret = -EIO; \ 464 } \ 465 } \ 466 ret; \ 467 }) 468 469 /********************************************************************* 470 * Multi-Process communication request descriptors 471 *********************************************************************/ 472 473 ENA_PROXY_DESC(ena_com_get_dev_basic_stats, ENA_MP_DEV_STATS_GET, 474 ({ 475 ENA_TOUCH(adapter); 476 ENA_TOUCH(req); 477 ENA_TOUCH(ena_dev); 478 ENA_TOUCH(stats); 479 }), 480 ({ 481 ENA_TOUCH(rsp); 482 ENA_TOUCH(ena_dev); 483 if (stats != &adapter->basic_stats) 484 rte_memcpy(stats, &adapter->basic_stats, sizeof(*stats)); 485 }), 486 struct ena_com_dev *ena_dev, struct ena_admin_basic_stats *stats); 487 488 ENA_PROXY_DESC(ena_com_get_eni_stats, ENA_MP_ENI_STATS_GET, 489 ({ 490 ENA_TOUCH(adapter); 491 ENA_TOUCH(req); 492 ENA_TOUCH(ena_dev); 493 ENA_TOUCH(stats); 494 }), 495 ({ 496 ENA_TOUCH(rsp); 497 ENA_TOUCH(ena_dev); 498 if (stats != (struct ena_admin_eni_stats *)&adapter->eni_stats) 499 rte_memcpy(stats, &adapter->eni_stats, sizeof(*stats)); 500 }), 501 struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats); 502 503 ENA_PROXY_DESC(ena_com_set_dev_mtu, ENA_MP_MTU_SET, 504 ({ 505 ENA_TOUCH(adapter); 506 ENA_TOUCH(ena_dev); 507 req->args.mtu = mtu; 508 }), 509 ({ 510 ENA_TOUCH(adapter); 511 ENA_TOUCH(rsp); 512 ENA_TOUCH(ena_dev); 513 ENA_TOUCH(mtu); 514 }), 515 struct ena_com_dev *ena_dev, int mtu); 516 517 ENA_PROXY_DESC(ena_com_indirect_table_set, ENA_MP_IND_TBL_SET, 518 ({ 519 ENA_TOUCH(adapter); 520 ENA_TOUCH(req); 521 ENA_TOUCH(ena_dev); 522 }), 523 ({ 524 ENA_TOUCH(adapter); 525 ENA_TOUCH(rsp); 526 ENA_TOUCH(ena_dev); 527 }), 528 struct ena_com_dev *ena_dev); 529 530 ENA_PROXY_DESC(ena_com_indirect_table_get, ENA_MP_IND_TBL_GET, 531 ({ 532 ENA_TOUCH(adapter); 533 ENA_TOUCH(req); 534 ENA_TOUCH(ena_dev); 535 ENA_TOUCH(ind_tbl); 536 }), 537 ({ 538 ENA_TOUCH(rsp); 539 ENA_TOUCH(ena_dev); 540 if (ind_tbl != adapter->indirect_table) 541 rte_memcpy(ind_tbl, adapter->indirect_table, 542 sizeof(adapter->indirect_table)); 543 }), 544 struct ena_com_dev *ena_dev, u32 *ind_tbl); 545 546 static inline void ena_trigger_reset(struct ena_adapter *adapter, 547 enum ena_regs_reset_reason_types reason) 548 { 549 if (likely(!adapter->trigger_reset)) { 550 adapter->reset_reason = reason; 551 adapter->trigger_reset = true; 552 } 553 } 554 555 static inline void ena_rx_mbuf_prepare(struct ena_ring *rx_ring, 556 struct rte_mbuf *mbuf, 557 struct ena_com_rx_ctx *ena_rx_ctx, 558 bool fill_hash) 559 { 560 struct ena_stats_rx *rx_stats = &rx_ring->rx_stats; 561 uint64_t ol_flags = 0; 562 uint32_t packet_type = 0; 563 564 if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) 565 packet_type |= RTE_PTYPE_L4_TCP; 566 else if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP) 567 packet_type |= RTE_PTYPE_L4_UDP; 568 569 if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) { 570 packet_type |= RTE_PTYPE_L3_IPV4; 571 if (unlikely(ena_rx_ctx->l3_csum_err)) { 572 ++rx_stats->l3_csum_bad; 573 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 574 } else { 575 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 576 } 577 } else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6) { 578 packet_type |= RTE_PTYPE_L3_IPV6; 579 } 580 581 if (!ena_rx_ctx->l4_csum_checked || ena_rx_ctx->frag) { 582 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 583 } else { 584 if (unlikely(ena_rx_ctx->l4_csum_err)) { 585 ++rx_stats->l4_csum_bad; 586 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 587 } else { 588 ++rx_stats->l4_csum_good; 589 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 590 } 591 } 592 593 if (fill_hash && 594 likely((packet_type & ENA_PTYPE_HAS_HASH) && !ena_rx_ctx->frag)) { 595 ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 596 mbuf->hash.rss = ena_rx_ctx->hash; 597 } 598 599 mbuf->ol_flags = ol_flags; 600 mbuf->packet_type = packet_type; 601 } 602 603 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, 604 struct ena_com_tx_ctx *ena_tx_ctx, 605 uint64_t queue_offloads, 606 bool disable_meta_caching) 607 { 608 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; 609 610 if ((mbuf->ol_flags & MBUF_OFFLOADS) && 611 (queue_offloads & QUEUE_OFFLOADS)) { 612 /* check if TSO is required */ 613 if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) && 614 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)) { 615 ena_tx_ctx->tso_enable = true; 616 617 ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf); 618 } 619 620 /* check if L3 checksum is needed */ 621 if ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) && 622 (queue_offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) 623 ena_tx_ctx->l3_csum_enable = true; 624 625 if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV6) { 626 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 627 /* For the IPv6 packets, DF always needs to be true. */ 628 ena_tx_ctx->df = 1; 629 } else { 630 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 631 632 /* set don't fragment (DF) flag */ 633 if (mbuf->packet_type & 634 (RTE_PTYPE_L4_NONFRAG 635 | RTE_PTYPE_INNER_L4_NONFRAG)) 636 ena_tx_ctx->df = 1; 637 } 638 639 /* check if L4 checksum is needed */ 640 if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) && 641 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) { 642 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 643 ena_tx_ctx->l4_csum_enable = true; 644 } else if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 645 RTE_MBUF_F_TX_UDP_CKSUM) && 646 (queue_offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) { 647 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 648 ena_tx_ctx->l4_csum_enable = true; 649 } else { 650 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 651 ena_tx_ctx->l4_csum_enable = false; 652 } 653 654 ena_meta->mss = mbuf->tso_segsz; 655 ena_meta->l3_hdr_len = mbuf->l3_len; 656 ena_meta->l3_hdr_offset = mbuf->l2_len; 657 658 ena_tx_ctx->meta_valid = true; 659 } else if (disable_meta_caching) { 660 memset(ena_meta, 0, sizeof(*ena_meta)); 661 ena_tx_ctx->meta_valid = true; 662 } else { 663 ena_tx_ctx->meta_valid = false; 664 } 665 } 666 667 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 668 { 669 struct ena_tx_buffer *tx_info = NULL; 670 671 if (likely(req_id < tx_ring->ring_size)) { 672 tx_info = &tx_ring->tx_buffer_info[req_id]; 673 if (likely(tx_info->mbuf)) 674 return 0; 675 } 676 677 if (tx_info) 678 PMD_TX_LOG(ERR, "tx_info doesn't have valid mbuf. queue %d:%d req_id %u\n", 679 tx_ring->port_id, tx_ring->id, req_id); 680 else 681 PMD_TX_LOG(ERR, "Invalid req_id: %hu in queue %d:%d\n", 682 req_id, tx_ring->port_id, tx_ring->id); 683 684 /* Trigger device reset */ 685 ++tx_ring->tx_stats.bad_req_id; 686 ena_trigger_reset(tx_ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID); 687 return -EFAULT; 688 } 689 690 static void ena_config_host_info(struct ena_com_dev *ena_dev) 691 { 692 struct ena_admin_host_info *host_info; 693 int rc; 694 695 /* Allocate only the host info */ 696 rc = ena_com_allocate_host_info(ena_dev); 697 if (rc) { 698 PMD_DRV_LOG(ERR, "Cannot allocate host info\n"); 699 return; 700 } 701 702 host_info = ena_dev->host_attr.host_info; 703 704 host_info->os_type = ENA_ADMIN_OS_DPDK; 705 host_info->kernel_ver = RTE_VERSION; 706 strlcpy((char *)host_info->kernel_ver_str, rte_version(), 707 sizeof(host_info->kernel_ver_str)); 708 host_info->os_dist = RTE_VERSION; 709 strlcpy((char *)host_info->os_dist_str, rte_version(), 710 sizeof(host_info->os_dist_str)); 711 host_info->driver_version = 712 (DRV_MODULE_VER_MAJOR) | 713 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 714 (DRV_MODULE_VER_SUBMINOR << 715 ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 716 host_info->num_cpus = rte_lcore_count(); 717 718 host_info->driver_supported_features = 719 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 720 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 721 722 rc = ena_com_set_host_attributes(ena_dev); 723 if (rc) { 724 if (rc == -ENA_COM_UNSUPPORTED) 725 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 726 else 727 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 728 729 goto err; 730 } 731 732 return; 733 734 err: 735 ena_com_delete_host_info(ena_dev); 736 } 737 738 /* This function calculates the number of xstats based on the current config */ 739 static unsigned int ena_xstats_calc_num(struct rte_eth_dev_data *data) 740 { 741 return ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENI + 742 (data->nb_tx_queues * ENA_STATS_ARRAY_TX) + 743 (data->nb_rx_queues * ENA_STATS_ARRAY_RX); 744 } 745 746 static void ena_config_debug_area(struct ena_adapter *adapter) 747 { 748 u32 debug_area_size; 749 int rc, ss_count; 750 751 ss_count = ena_xstats_calc_num(adapter->edev_data); 752 753 /* allocate 32 bytes for each string and 64bit for the value */ 754 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; 755 756 rc = ena_com_allocate_debug_area(&adapter->ena_dev, debug_area_size); 757 if (rc) { 758 PMD_DRV_LOG(ERR, "Cannot allocate debug area\n"); 759 return; 760 } 761 762 rc = ena_com_set_host_attributes(&adapter->ena_dev); 763 if (rc) { 764 if (rc == -ENA_COM_UNSUPPORTED) 765 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 766 else 767 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 768 769 goto err; 770 } 771 772 return; 773 err: 774 ena_com_delete_debug_area(&adapter->ena_dev); 775 } 776 777 static int ena_close(struct rte_eth_dev *dev) 778 { 779 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 780 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 781 struct ena_adapter *adapter = dev->data->dev_private; 782 int ret = 0; 783 784 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 785 return 0; 786 787 if (adapter->state == ENA_ADAPTER_STATE_RUNNING) 788 ret = ena_stop(dev); 789 adapter->state = ENA_ADAPTER_STATE_CLOSED; 790 791 ena_rx_queue_release_all(dev); 792 ena_tx_queue_release_all(dev); 793 794 rte_free(adapter->drv_stats); 795 adapter->drv_stats = NULL; 796 797 rte_intr_disable(intr_handle); 798 rte_intr_callback_unregister(intr_handle, 799 ena_interrupt_handler_rte, 800 dev); 801 802 /* 803 * MAC is not allocated dynamically. Setting NULL should prevent from 804 * release of the resource in the rte_eth_dev_release_port(). 805 */ 806 dev->data->mac_addrs = NULL; 807 808 return ret; 809 } 810 811 static int 812 ena_dev_reset(struct rte_eth_dev *dev) 813 { 814 int rc = 0; 815 816 /* Cannot release memory in secondary process */ 817 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 818 PMD_DRV_LOG(WARNING, "dev_reset not supported in secondary.\n"); 819 return -EPERM; 820 } 821 822 ena_destroy_device(dev); 823 rc = eth_ena_dev_init(dev); 824 if (rc) 825 PMD_INIT_LOG(CRIT, "Cannot initialize device\n"); 826 827 return rc; 828 } 829 830 static void ena_rx_queue_release_all(struct rte_eth_dev *dev) 831 { 832 int nb_queues = dev->data->nb_rx_queues; 833 int i; 834 835 for (i = 0; i < nb_queues; i++) 836 ena_rx_queue_release(dev, i); 837 } 838 839 static void ena_tx_queue_release_all(struct rte_eth_dev *dev) 840 { 841 int nb_queues = dev->data->nb_tx_queues; 842 int i; 843 844 for (i = 0; i < nb_queues; i++) 845 ena_tx_queue_release(dev, i); 846 } 847 848 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 849 { 850 struct ena_ring *ring = dev->data->rx_queues[qid]; 851 852 /* Free ring resources */ 853 rte_free(ring->rx_buffer_info); 854 ring->rx_buffer_info = NULL; 855 856 rte_free(ring->rx_refill_buffer); 857 ring->rx_refill_buffer = NULL; 858 859 rte_free(ring->empty_rx_reqs); 860 ring->empty_rx_reqs = NULL; 861 862 ring->configured = 0; 863 864 PMD_DRV_LOG(NOTICE, "Rx queue %d:%d released\n", 865 ring->port_id, ring->id); 866 } 867 868 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 869 { 870 struct ena_ring *ring = dev->data->tx_queues[qid]; 871 872 /* Free ring resources */ 873 rte_free(ring->push_buf_intermediate_buf); 874 875 rte_free(ring->tx_buffer_info); 876 877 rte_free(ring->empty_tx_reqs); 878 879 ring->empty_tx_reqs = NULL; 880 ring->tx_buffer_info = NULL; 881 ring->push_buf_intermediate_buf = NULL; 882 883 ring->configured = 0; 884 885 PMD_DRV_LOG(NOTICE, "Tx queue %d:%d released\n", 886 ring->port_id, ring->id); 887 } 888 889 static void ena_rx_queue_release_bufs(struct ena_ring *ring) 890 { 891 unsigned int i; 892 893 for (i = 0; i < ring->ring_size; ++i) { 894 struct ena_rx_buffer *rx_info = &ring->rx_buffer_info[i]; 895 if (rx_info->mbuf) { 896 rte_mbuf_raw_free(rx_info->mbuf); 897 rx_info->mbuf = NULL; 898 } 899 } 900 } 901 902 static void ena_tx_queue_release_bufs(struct ena_ring *ring) 903 { 904 unsigned int i; 905 906 for (i = 0; i < ring->ring_size; ++i) { 907 struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i]; 908 909 if (tx_buf->mbuf) { 910 rte_pktmbuf_free(tx_buf->mbuf); 911 tx_buf->mbuf = NULL; 912 } 913 } 914 } 915 916 static int ena_link_update(struct rte_eth_dev *dev, 917 __rte_unused int wait_to_complete) 918 { 919 struct rte_eth_link *link = &dev->data->dev_link; 920 struct ena_adapter *adapter = dev->data->dev_private; 921 922 link->link_status = adapter->link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN; 923 link->link_speed = RTE_ETH_SPEED_NUM_NONE; 924 link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX; 925 926 return 0; 927 } 928 929 static int ena_queue_start_all(struct rte_eth_dev *dev, 930 enum ena_ring_type ring_type) 931 { 932 struct ena_adapter *adapter = dev->data->dev_private; 933 struct ena_ring *queues = NULL; 934 int nb_queues; 935 int i = 0; 936 int rc = 0; 937 938 if (ring_type == ENA_RING_TYPE_RX) { 939 queues = adapter->rx_ring; 940 nb_queues = dev->data->nb_rx_queues; 941 } else { 942 queues = adapter->tx_ring; 943 nb_queues = dev->data->nb_tx_queues; 944 } 945 for (i = 0; i < nb_queues; i++) { 946 if (queues[i].configured) { 947 if (ring_type == ENA_RING_TYPE_RX) { 948 ena_assert_msg( 949 dev->data->rx_queues[i] == &queues[i], 950 "Inconsistent state of Rx queues\n"); 951 } else { 952 ena_assert_msg( 953 dev->data->tx_queues[i] == &queues[i], 954 "Inconsistent state of Tx queues\n"); 955 } 956 957 rc = ena_queue_start(dev, &queues[i]); 958 959 if (rc) { 960 PMD_INIT_LOG(ERR, 961 "Failed to start queue[%d] of type(%d)\n", 962 i, ring_type); 963 goto err; 964 } 965 } 966 } 967 968 return 0; 969 970 err: 971 while (i--) 972 if (queues[i].configured) 973 ena_queue_stop(&queues[i]); 974 975 return rc; 976 } 977 978 static int ena_check_valid_conf(struct ena_adapter *adapter) 979 { 980 uint32_t mtu = adapter->edev_data->mtu; 981 982 if (mtu > adapter->max_mtu || mtu < ENA_MIN_MTU) { 983 PMD_INIT_LOG(ERR, 984 "Unsupported MTU of %d. Max MTU: %d, min MTU: %d\n", 985 mtu, adapter->max_mtu, ENA_MIN_MTU); 986 return ENA_COM_UNSUPPORTED; 987 } 988 989 return 0; 990 } 991 992 static int 993 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx, 994 bool use_large_llq_hdr) 995 { 996 struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; 997 struct ena_com_dev *ena_dev = ctx->ena_dev; 998 uint32_t max_tx_queue_size; 999 uint32_t max_rx_queue_size; 1000 1001 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1002 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1003 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 1004 max_rx_queue_size = RTE_MIN(max_queue_ext->max_rx_cq_depth, 1005 max_queue_ext->max_rx_sq_depth); 1006 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 1007 1008 if (ena_dev->tx_mem_queue_type == 1009 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1010 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1011 llq->max_llq_depth); 1012 } else { 1013 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1014 max_queue_ext->max_tx_sq_depth); 1015 } 1016 1017 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1018 max_queue_ext->max_per_packet_rx_descs); 1019 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1020 max_queue_ext->max_per_packet_tx_descs); 1021 } else { 1022 struct ena_admin_queue_feature_desc *max_queues = 1023 &ctx->get_feat_ctx->max_queues; 1024 max_rx_queue_size = RTE_MIN(max_queues->max_cq_depth, 1025 max_queues->max_sq_depth); 1026 max_tx_queue_size = max_queues->max_cq_depth; 1027 1028 if (ena_dev->tx_mem_queue_type == 1029 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1030 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1031 llq->max_llq_depth); 1032 } else { 1033 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1034 max_queues->max_sq_depth); 1035 } 1036 1037 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1038 max_queues->max_packet_rx_descs); 1039 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1040 max_queues->max_packet_tx_descs); 1041 } 1042 1043 /* Round down to the nearest power of 2 */ 1044 max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size); 1045 max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size); 1046 1047 if (use_large_llq_hdr) { 1048 if ((llq->entry_size_ctrl_supported & 1049 ENA_ADMIN_LIST_ENTRY_SIZE_256B) && 1050 (ena_dev->tx_mem_queue_type == 1051 ENA_ADMIN_PLACEMENT_POLICY_DEV)) { 1052 max_tx_queue_size /= 2; 1053 PMD_INIT_LOG(INFO, 1054 "Forcing large headers and decreasing maximum Tx queue size to %d\n", 1055 max_tx_queue_size); 1056 } else { 1057 PMD_INIT_LOG(ERR, 1058 "Forcing large headers failed: LLQ is disabled or device does not support large headers\n"); 1059 } 1060 } 1061 1062 if (unlikely(max_rx_queue_size == 0 || max_tx_queue_size == 0)) { 1063 PMD_INIT_LOG(ERR, "Invalid queue size\n"); 1064 return -EFAULT; 1065 } 1066 1067 ctx->max_tx_queue_size = max_tx_queue_size; 1068 ctx->max_rx_queue_size = max_rx_queue_size; 1069 1070 return 0; 1071 } 1072 1073 static void ena_stats_restart(struct rte_eth_dev *dev) 1074 { 1075 struct ena_adapter *adapter = dev->data->dev_private; 1076 1077 rte_atomic64_init(&adapter->drv_stats->ierrors); 1078 rte_atomic64_init(&adapter->drv_stats->oerrors); 1079 rte_atomic64_init(&adapter->drv_stats->rx_nombuf); 1080 adapter->drv_stats->rx_drops = 0; 1081 } 1082 1083 static int ena_stats_get(struct rte_eth_dev *dev, 1084 struct rte_eth_stats *stats) 1085 { 1086 struct ena_admin_basic_stats ena_stats; 1087 struct ena_adapter *adapter = dev->data->dev_private; 1088 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1089 int rc; 1090 int i; 1091 int max_rings_stats; 1092 1093 memset(&ena_stats, 0, sizeof(ena_stats)); 1094 1095 rte_spinlock_lock(&adapter->admin_lock); 1096 rc = ENA_PROXY(adapter, ena_com_get_dev_basic_stats, ena_dev, 1097 &ena_stats); 1098 rte_spinlock_unlock(&adapter->admin_lock); 1099 if (unlikely(rc)) { 1100 PMD_DRV_LOG(ERR, "Could not retrieve statistics from ENA\n"); 1101 return rc; 1102 } 1103 1104 /* Set of basic statistics from ENA */ 1105 stats->ipackets = __MERGE_64B_H_L(ena_stats.rx_pkts_high, 1106 ena_stats.rx_pkts_low); 1107 stats->opackets = __MERGE_64B_H_L(ena_stats.tx_pkts_high, 1108 ena_stats.tx_pkts_low); 1109 stats->ibytes = __MERGE_64B_H_L(ena_stats.rx_bytes_high, 1110 ena_stats.rx_bytes_low); 1111 stats->obytes = __MERGE_64B_H_L(ena_stats.tx_bytes_high, 1112 ena_stats.tx_bytes_low); 1113 1114 /* Driver related stats */ 1115 stats->imissed = adapter->drv_stats->rx_drops; 1116 stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors); 1117 stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors); 1118 stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf); 1119 1120 max_rings_stats = RTE_MIN(dev->data->nb_rx_queues, 1121 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1122 for (i = 0; i < max_rings_stats; ++i) { 1123 struct ena_stats_rx *rx_stats = &adapter->rx_ring[i].rx_stats; 1124 1125 stats->q_ibytes[i] = rx_stats->bytes; 1126 stats->q_ipackets[i] = rx_stats->cnt; 1127 stats->q_errors[i] = rx_stats->bad_desc_num + 1128 rx_stats->bad_req_id; 1129 } 1130 1131 max_rings_stats = RTE_MIN(dev->data->nb_tx_queues, 1132 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1133 for (i = 0; i < max_rings_stats; ++i) { 1134 struct ena_stats_tx *tx_stats = &adapter->tx_ring[i].tx_stats; 1135 1136 stats->q_obytes[i] = tx_stats->bytes; 1137 stats->q_opackets[i] = tx_stats->cnt; 1138 } 1139 1140 return 0; 1141 } 1142 1143 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 1144 { 1145 struct ena_adapter *adapter; 1146 struct ena_com_dev *ena_dev; 1147 int rc = 0; 1148 1149 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 1150 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 1151 adapter = dev->data->dev_private; 1152 1153 ena_dev = &adapter->ena_dev; 1154 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 1155 1156 if (mtu > adapter->max_mtu || mtu < ENA_MIN_MTU) { 1157 PMD_DRV_LOG(ERR, 1158 "Invalid MTU setting. New MTU: %d, max MTU: %d, min MTU: %d\n", 1159 mtu, adapter->max_mtu, ENA_MIN_MTU); 1160 return -EINVAL; 1161 } 1162 1163 rc = ENA_PROXY(adapter, ena_com_set_dev_mtu, ena_dev, mtu); 1164 if (rc) 1165 PMD_DRV_LOG(ERR, "Could not set MTU: %d\n", mtu); 1166 else 1167 PMD_DRV_LOG(NOTICE, "MTU set to: %d\n", mtu); 1168 1169 return rc; 1170 } 1171 1172 static int ena_start(struct rte_eth_dev *dev) 1173 { 1174 struct ena_adapter *adapter = dev->data->dev_private; 1175 uint64_t ticks; 1176 int rc = 0; 1177 1178 /* Cannot allocate memory in secondary process */ 1179 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1180 PMD_DRV_LOG(WARNING, "dev_start not supported in secondary.\n"); 1181 return -EPERM; 1182 } 1183 1184 rc = ena_check_valid_conf(adapter); 1185 if (rc) 1186 return rc; 1187 1188 rc = ena_setup_rx_intr(dev); 1189 if (rc) 1190 return rc; 1191 1192 rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX); 1193 if (rc) 1194 return rc; 1195 1196 rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX); 1197 if (rc) 1198 goto err_start_tx; 1199 1200 if (adapter->edev_data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { 1201 rc = ena_rss_configure(adapter); 1202 if (rc) 1203 goto err_rss_init; 1204 } 1205 1206 ena_stats_restart(dev); 1207 1208 adapter->timestamp_wd = rte_get_timer_cycles(); 1209 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT; 1210 1211 ticks = rte_get_timer_hz(); 1212 rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(), 1213 ena_timer_wd_callback, dev); 1214 1215 ++adapter->dev_stats.dev_start; 1216 adapter->state = ENA_ADAPTER_STATE_RUNNING; 1217 1218 return 0; 1219 1220 err_rss_init: 1221 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1222 err_start_tx: 1223 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1224 return rc; 1225 } 1226 1227 static int ena_stop(struct rte_eth_dev *dev) 1228 { 1229 struct ena_adapter *adapter = dev->data->dev_private; 1230 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1231 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1232 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1233 int rc; 1234 1235 /* Cannot free memory in secondary process */ 1236 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1237 PMD_DRV_LOG(WARNING, "dev_stop not supported in secondary.\n"); 1238 return -EPERM; 1239 } 1240 1241 rte_timer_stop_sync(&adapter->timer_wd); 1242 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1243 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1244 1245 if (adapter->trigger_reset) { 1246 rc = ena_com_dev_reset(ena_dev, adapter->reset_reason); 1247 if (rc) 1248 PMD_DRV_LOG(ERR, "Device reset failed, rc: %d\n", rc); 1249 } 1250 1251 rte_intr_disable(intr_handle); 1252 1253 rte_intr_efd_disable(intr_handle); 1254 1255 /* Cleanup vector list */ 1256 rte_intr_vec_list_free(intr_handle); 1257 1258 rte_intr_enable(intr_handle); 1259 1260 ++adapter->dev_stats.dev_stop; 1261 adapter->state = ENA_ADAPTER_STATE_STOPPED; 1262 dev->data->dev_started = 0; 1263 1264 return 0; 1265 } 1266 1267 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring) 1268 { 1269 struct ena_adapter *adapter = ring->adapter; 1270 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1271 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1272 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1273 struct ena_com_create_io_ctx ctx = 1274 /* policy set to _HOST just to satisfy icc compiler */ 1275 { ENA_ADMIN_PLACEMENT_POLICY_HOST, 1276 0, 0, 0, 0, 0 }; 1277 uint16_t ena_qid; 1278 unsigned int i; 1279 int rc; 1280 1281 ctx.msix_vector = -1; 1282 if (ring->type == ENA_RING_TYPE_TX) { 1283 ena_qid = ENA_IO_TXQ_IDX(ring->id); 1284 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1285 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1286 for (i = 0; i < ring->ring_size; i++) 1287 ring->empty_tx_reqs[i] = i; 1288 } else { 1289 ena_qid = ENA_IO_RXQ_IDX(ring->id); 1290 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1291 if (rte_intr_dp_is_en(intr_handle)) 1292 ctx.msix_vector = 1293 rte_intr_vec_list_index_get(intr_handle, 1294 ring->id); 1295 1296 for (i = 0; i < ring->ring_size; i++) 1297 ring->empty_rx_reqs[i] = i; 1298 } 1299 ctx.queue_size = ring->ring_size; 1300 ctx.qid = ena_qid; 1301 ctx.numa_node = ring->numa_socket_id; 1302 1303 rc = ena_com_create_io_queue(ena_dev, &ctx); 1304 if (rc) { 1305 PMD_DRV_LOG(ERR, 1306 "Failed to create IO queue[%d] (qid:%d), rc: %d\n", 1307 ring->id, ena_qid, rc); 1308 return rc; 1309 } 1310 1311 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1312 &ring->ena_com_io_sq, 1313 &ring->ena_com_io_cq); 1314 if (rc) { 1315 PMD_DRV_LOG(ERR, 1316 "Failed to get IO queue[%d] handlers, rc: %d\n", 1317 ring->id, rc); 1318 ena_com_destroy_io_queue(ena_dev, ena_qid); 1319 return rc; 1320 } 1321 1322 if (ring->type == ENA_RING_TYPE_TX) 1323 ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node); 1324 1325 /* Start with Rx interrupts being masked. */ 1326 if (ring->type == ENA_RING_TYPE_RX && rte_intr_dp_is_en(intr_handle)) 1327 ena_rx_queue_intr_disable(dev, ring->id); 1328 1329 return 0; 1330 } 1331 1332 static void ena_queue_stop(struct ena_ring *ring) 1333 { 1334 struct ena_com_dev *ena_dev = &ring->adapter->ena_dev; 1335 1336 if (ring->type == ENA_RING_TYPE_RX) { 1337 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id)); 1338 ena_rx_queue_release_bufs(ring); 1339 } else { 1340 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id)); 1341 ena_tx_queue_release_bufs(ring); 1342 } 1343 } 1344 1345 static void ena_queue_stop_all(struct rte_eth_dev *dev, 1346 enum ena_ring_type ring_type) 1347 { 1348 struct ena_adapter *adapter = dev->data->dev_private; 1349 struct ena_ring *queues = NULL; 1350 uint16_t nb_queues, i; 1351 1352 if (ring_type == ENA_RING_TYPE_RX) { 1353 queues = adapter->rx_ring; 1354 nb_queues = dev->data->nb_rx_queues; 1355 } else { 1356 queues = adapter->tx_ring; 1357 nb_queues = dev->data->nb_tx_queues; 1358 } 1359 1360 for (i = 0; i < nb_queues; ++i) 1361 if (queues[i].configured) 1362 ena_queue_stop(&queues[i]); 1363 } 1364 1365 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring) 1366 { 1367 int rc, bufs_num; 1368 1369 ena_assert_msg(ring->configured == 1, 1370 "Trying to start unconfigured queue\n"); 1371 1372 rc = ena_create_io_queue(dev, ring); 1373 if (rc) { 1374 PMD_INIT_LOG(ERR, "Failed to create IO queue\n"); 1375 return rc; 1376 } 1377 1378 ring->next_to_clean = 0; 1379 ring->next_to_use = 0; 1380 1381 if (ring->type == ENA_RING_TYPE_TX) { 1382 ring->tx_stats.available_desc = 1383 ena_com_free_q_entries(ring->ena_com_io_sq); 1384 return 0; 1385 } 1386 1387 bufs_num = ring->ring_size - 1; 1388 rc = ena_populate_rx_queue(ring, bufs_num); 1389 if (rc != bufs_num) { 1390 ena_com_destroy_io_queue(&ring->adapter->ena_dev, 1391 ENA_IO_RXQ_IDX(ring->id)); 1392 PMD_INIT_LOG(ERR, "Failed to populate Rx ring\n"); 1393 return ENA_COM_FAULT; 1394 } 1395 /* Flush per-core RX buffers pools cache as they can be used on other 1396 * cores as well. 1397 */ 1398 rte_mempool_cache_flush(NULL, ring->mb_pool); 1399 1400 return 0; 1401 } 1402 1403 static int ena_tx_queue_setup(struct rte_eth_dev *dev, 1404 uint16_t queue_idx, 1405 uint16_t nb_desc, 1406 unsigned int socket_id, 1407 const struct rte_eth_txconf *tx_conf) 1408 { 1409 struct ena_ring *txq = NULL; 1410 struct ena_adapter *adapter = dev->data->dev_private; 1411 unsigned int i; 1412 uint16_t dyn_thresh; 1413 1414 txq = &adapter->tx_ring[queue_idx]; 1415 1416 if (txq->configured) { 1417 PMD_DRV_LOG(CRIT, 1418 "API violation. Queue[%d] is already configured\n", 1419 queue_idx); 1420 return ENA_COM_FAULT; 1421 } 1422 1423 if (!rte_is_power_of_2(nb_desc)) { 1424 PMD_DRV_LOG(ERR, 1425 "Unsupported size of Tx queue: %d is not a power of 2.\n", 1426 nb_desc); 1427 return -EINVAL; 1428 } 1429 1430 if (nb_desc > adapter->max_tx_ring_size) { 1431 PMD_DRV_LOG(ERR, 1432 "Unsupported size of Tx queue (max size: %d)\n", 1433 adapter->max_tx_ring_size); 1434 return -EINVAL; 1435 } 1436 1437 txq->port_id = dev->data->port_id; 1438 txq->next_to_clean = 0; 1439 txq->next_to_use = 0; 1440 txq->ring_size = nb_desc; 1441 txq->size_mask = nb_desc - 1; 1442 txq->numa_socket_id = socket_id; 1443 txq->pkts_without_db = false; 1444 txq->last_cleanup_ticks = 0; 1445 1446 txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info", 1447 sizeof(struct ena_tx_buffer) * txq->ring_size, 1448 RTE_CACHE_LINE_SIZE, 1449 socket_id); 1450 if (!txq->tx_buffer_info) { 1451 PMD_DRV_LOG(ERR, 1452 "Failed to allocate memory for Tx buffer info\n"); 1453 return -ENOMEM; 1454 } 1455 1456 txq->empty_tx_reqs = rte_zmalloc_socket("txq->empty_tx_reqs", 1457 sizeof(uint16_t) * txq->ring_size, 1458 RTE_CACHE_LINE_SIZE, 1459 socket_id); 1460 if (!txq->empty_tx_reqs) { 1461 PMD_DRV_LOG(ERR, 1462 "Failed to allocate memory for empty Tx requests\n"); 1463 rte_free(txq->tx_buffer_info); 1464 return -ENOMEM; 1465 } 1466 1467 txq->push_buf_intermediate_buf = 1468 rte_zmalloc_socket("txq->push_buf_intermediate_buf", 1469 txq->tx_max_header_size, 1470 RTE_CACHE_LINE_SIZE, 1471 socket_id); 1472 if (!txq->push_buf_intermediate_buf) { 1473 PMD_DRV_LOG(ERR, "Failed to alloc push buffer for LLQ\n"); 1474 rte_free(txq->tx_buffer_info); 1475 rte_free(txq->empty_tx_reqs); 1476 return -ENOMEM; 1477 } 1478 1479 for (i = 0; i < txq->ring_size; i++) 1480 txq->empty_tx_reqs[i] = i; 1481 1482 txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads; 1483 1484 /* Check if caller provided the Tx cleanup threshold value. */ 1485 if (tx_conf->tx_free_thresh != 0) { 1486 txq->tx_free_thresh = tx_conf->tx_free_thresh; 1487 } else { 1488 dyn_thresh = txq->ring_size - 1489 txq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1490 txq->tx_free_thresh = RTE_MAX(dyn_thresh, 1491 txq->ring_size - ENA_REFILL_THRESH_PACKET); 1492 } 1493 1494 txq->missing_tx_completion_threshold = 1495 RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP); 1496 1497 /* Store pointer to this queue in upper layer */ 1498 txq->configured = 1; 1499 dev->data->tx_queues[queue_idx] = txq; 1500 1501 return 0; 1502 } 1503 1504 static int ena_rx_queue_setup(struct rte_eth_dev *dev, 1505 uint16_t queue_idx, 1506 uint16_t nb_desc, 1507 unsigned int socket_id, 1508 const struct rte_eth_rxconf *rx_conf, 1509 struct rte_mempool *mp) 1510 { 1511 struct ena_adapter *adapter = dev->data->dev_private; 1512 struct ena_ring *rxq = NULL; 1513 size_t buffer_size; 1514 int i; 1515 uint16_t dyn_thresh; 1516 1517 rxq = &adapter->rx_ring[queue_idx]; 1518 if (rxq->configured) { 1519 PMD_DRV_LOG(CRIT, 1520 "API violation. Queue[%d] is already configured\n", 1521 queue_idx); 1522 return ENA_COM_FAULT; 1523 } 1524 1525 if (!rte_is_power_of_2(nb_desc)) { 1526 PMD_DRV_LOG(ERR, 1527 "Unsupported size of Rx queue: %d is not a power of 2.\n", 1528 nb_desc); 1529 return -EINVAL; 1530 } 1531 1532 if (nb_desc > adapter->max_rx_ring_size) { 1533 PMD_DRV_LOG(ERR, 1534 "Unsupported size of Rx queue (max size: %d)\n", 1535 adapter->max_rx_ring_size); 1536 return -EINVAL; 1537 } 1538 1539 /* ENA isn't supporting buffers smaller than 1400 bytes */ 1540 buffer_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 1541 if (buffer_size < ENA_RX_BUF_MIN_SIZE) { 1542 PMD_DRV_LOG(ERR, 1543 "Unsupported size of Rx buffer: %zu (min size: %d)\n", 1544 buffer_size, ENA_RX_BUF_MIN_SIZE); 1545 return -EINVAL; 1546 } 1547 1548 rxq->port_id = dev->data->port_id; 1549 rxq->next_to_clean = 0; 1550 rxq->next_to_use = 0; 1551 rxq->ring_size = nb_desc; 1552 rxq->size_mask = nb_desc - 1; 1553 rxq->numa_socket_id = socket_id; 1554 rxq->mb_pool = mp; 1555 1556 rxq->rx_buffer_info = rte_zmalloc_socket("rxq->buffer_info", 1557 sizeof(struct ena_rx_buffer) * nb_desc, 1558 RTE_CACHE_LINE_SIZE, 1559 socket_id); 1560 if (!rxq->rx_buffer_info) { 1561 PMD_DRV_LOG(ERR, 1562 "Failed to allocate memory for Rx buffer info\n"); 1563 return -ENOMEM; 1564 } 1565 1566 rxq->rx_refill_buffer = rte_zmalloc_socket("rxq->rx_refill_buffer", 1567 sizeof(struct rte_mbuf *) * nb_desc, 1568 RTE_CACHE_LINE_SIZE, 1569 socket_id); 1570 if (!rxq->rx_refill_buffer) { 1571 PMD_DRV_LOG(ERR, 1572 "Failed to allocate memory for Rx refill buffer\n"); 1573 rte_free(rxq->rx_buffer_info); 1574 rxq->rx_buffer_info = NULL; 1575 return -ENOMEM; 1576 } 1577 1578 rxq->empty_rx_reqs = rte_zmalloc_socket("rxq->empty_rx_reqs", 1579 sizeof(uint16_t) * nb_desc, 1580 RTE_CACHE_LINE_SIZE, 1581 socket_id); 1582 if (!rxq->empty_rx_reqs) { 1583 PMD_DRV_LOG(ERR, 1584 "Failed to allocate memory for empty Rx requests\n"); 1585 rte_free(rxq->rx_buffer_info); 1586 rxq->rx_buffer_info = NULL; 1587 rte_free(rxq->rx_refill_buffer); 1588 rxq->rx_refill_buffer = NULL; 1589 return -ENOMEM; 1590 } 1591 1592 for (i = 0; i < nb_desc; i++) 1593 rxq->empty_rx_reqs[i] = i; 1594 1595 rxq->offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; 1596 1597 if (rx_conf->rx_free_thresh != 0) { 1598 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 1599 } else { 1600 dyn_thresh = rxq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1601 rxq->rx_free_thresh = RTE_MIN(dyn_thresh, 1602 (uint16_t)(ENA_REFILL_THRESH_PACKET)); 1603 } 1604 1605 /* Store pointer to this queue in upper layer */ 1606 rxq->configured = 1; 1607 dev->data->rx_queues[queue_idx] = rxq; 1608 1609 return 0; 1610 } 1611 1612 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 1613 struct rte_mbuf *mbuf, uint16_t id) 1614 { 1615 struct ena_com_buf ebuf; 1616 int rc; 1617 1618 /* prepare physical address for DMA transaction */ 1619 ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM; 1620 ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM; 1621 1622 /* pass resource to device */ 1623 rc = ena_com_add_single_rx_desc(io_sq, &ebuf, id); 1624 if (unlikely(rc != 0)) 1625 PMD_RX_LOG(WARNING, "Failed adding Rx desc\n"); 1626 1627 return rc; 1628 } 1629 1630 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) 1631 { 1632 unsigned int i; 1633 int rc; 1634 uint16_t next_to_use = rxq->next_to_use; 1635 uint16_t req_id; 1636 #ifdef RTE_ETHDEV_DEBUG_RX 1637 uint16_t in_use; 1638 #endif 1639 struct rte_mbuf **mbufs = rxq->rx_refill_buffer; 1640 1641 if (unlikely(!count)) 1642 return 0; 1643 1644 #ifdef RTE_ETHDEV_DEBUG_RX 1645 in_use = rxq->ring_size - 1 - 1646 ena_com_free_q_entries(rxq->ena_com_io_sq); 1647 if (unlikely((in_use + count) >= rxq->ring_size)) 1648 PMD_RX_LOG(ERR, "Bad Rx ring state\n"); 1649 #endif 1650 1651 /* get resources for incoming packets */ 1652 rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count); 1653 if (unlikely(rc < 0)) { 1654 rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf); 1655 ++rxq->rx_stats.mbuf_alloc_fail; 1656 PMD_RX_LOG(DEBUG, "There are not enough free buffers\n"); 1657 return 0; 1658 } 1659 1660 for (i = 0; i < count; i++) { 1661 struct rte_mbuf *mbuf = mbufs[i]; 1662 struct ena_rx_buffer *rx_info; 1663 1664 if (likely((i + 4) < count)) 1665 rte_prefetch0(mbufs[i + 4]); 1666 1667 req_id = rxq->empty_rx_reqs[next_to_use]; 1668 rx_info = &rxq->rx_buffer_info[req_id]; 1669 1670 rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id); 1671 if (unlikely(rc != 0)) 1672 break; 1673 1674 rx_info->mbuf = mbuf; 1675 next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, rxq->size_mask); 1676 } 1677 1678 if (unlikely(i < count)) { 1679 PMD_RX_LOG(WARNING, 1680 "Refilled Rx queue[%d] with only %d/%d buffers\n", 1681 rxq->id, i, count); 1682 rte_pktmbuf_free_bulk(&mbufs[i], count - i); 1683 ++rxq->rx_stats.refill_partial; 1684 } 1685 1686 /* When we submitted free resources to device... */ 1687 if (likely(i > 0)) { 1688 /* ...let HW know that it can fill buffers with data. */ 1689 ena_com_write_sq_doorbell(rxq->ena_com_io_sq); 1690 1691 rxq->next_to_use = next_to_use; 1692 } 1693 1694 return i; 1695 } 1696 1697 static int ena_device_init(struct ena_adapter *adapter, 1698 struct rte_pci_device *pdev, 1699 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1700 { 1701 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1702 uint32_t aenq_groups; 1703 int rc; 1704 bool readless_supported; 1705 1706 /* Initialize mmio registers */ 1707 rc = ena_com_mmio_reg_read_request_init(ena_dev); 1708 if (rc) { 1709 PMD_DRV_LOG(ERR, "Failed to init MMIO read less\n"); 1710 return rc; 1711 } 1712 1713 /* The PCIe configuration space revision id indicate if mmio reg 1714 * read is disabled. 1715 */ 1716 readless_supported = !(pdev->id.class_id & ENA_MMIO_DISABLE_REG_READ); 1717 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 1718 1719 /* reset device */ 1720 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 1721 if (rc) { 1722 PMD_DRV_LOG(ERR, "Cannot reset device\n"); 1723 goto err_mmio_read_less; 1724 } 1725 1726 /* check FW version */ 1727 rc = ena_com_validate_version(ena_dev); 1728 if (rc) { 1729 PMD_DRV_LOG(ERR, "Device version is too low\n"); 1730 goto err_mmio_read_less; 1731 } 1732 1733 ena_dev->dma_addr_bits = ena_com_get_dma_width(ena_dev); 1734 1735 /* ENA device administration layer init */ 1736 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 1737 if (rc) { 1738 PMD_DRV_LOG(ERR, 1739 "Cannot initialize ENA admin queue\n"); 1740 goto err_mmio_read_less; 1741 } 1742 1743 /* To enable the msix interrupts the driver needs to know the number 1744 * of queues. So the driver uses polling mode to retrieve this 1745 * information. 1746 */ 1747 ena_com_set_admin_polling_mode(ena_dev, true); 1748 1749 ena_config_host_info(ena_dev); 1750 1751 /* Get Device Attributes and features */ 1752 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 1753 if (rc) { 1754 PMD_DRV_LOG(ERR, 1755 "Cannot get attribute for ENA device, rc: %d\n", rc); 1756 goto err_admin_init; 1757 } 1758 1759 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 1760 BIT(ENA_ADMIN_NOTIFICATION) | 1761 BIT(ENA_ADMIN_KEEP_ALIVE) | 1762 BIT(ENA_ADMIN_FATAL_ERROR) | 1763 BIT(ENA_ADMIN_WARNING); 1764 1765 aenq_groups &= get_feat_ctx->aenq.supported_groups; 1766 1767 adapter->all_aenq_groups = aenq_groups; 1768 1769 return 0; 1770 1771 err_admin_init: 1772 ena_com_admin_destroy(ena_dev); 1773 1774 err_mmio_read_less: 1775 ena_com_mmio_reg_read_request_destroy(ena_dev); 1776 1777 return rc; 1778 } 1779 1780 static void ena_interrupt_handler_rte(void *cb_arg) 1781 { 1782 struct rte_eth_dev *dev = cb_arg; 1783 struct ena_adapter *adapter = dev->data->dev_private; 1784 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1785 1786 ena_com_admin_q_comp_intr_handler(ena_dev); 1787 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) 1788 ena_com_aenq_intr_handler(ena_dev, dev); 1789 } 1790 1791 static void check_for_missing_keep_alive(struct ena_adapter *adapter) 1792 { 1793 if (!(adapter->active_aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE))) 1794 return; 1795 1796 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 1797 return; 1798 1799 if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >= 1800 adapter->keep_alive_timeout)) { 1801 PMD_DRV_LOG(ERR, "Keep alive timeout\n"); 1802 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); 1803 ++adapter->dev_stats.wd_expired; 1804 } 1805 } 1806 1807 /* Check if admin queue is enabled */ 1808 static void check_for_admin_com_state(struct ena_adapter *adapter) 1809 { 1810 if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) { 1811 PMD_DRV_LOG(ERR, "ENA admin queue is not in running state\n"); 1812 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO); 1813 } 1814 } 1815 1816 static int check_for_tx_completion_in_queue(struct ena_adapter *adapter, 1817 struct ena_ring *tx_ring) 1818 { 1819 struct ena_tx_buffer *tx_buf; 1820 uint64_t timestamp; 1821 uint64_t completion_delay; 1822 uint32_t missed_tx = 0; 1823 unsigned int i; 1824 int rc = 0; 1825 1826 for (i = 0; i < tx_ring->ring_size; ++i) { 1827 tx_buf = &tx_ring->tx_buffer_info[i]; 1828 timestamp = tx_buf->timestamp; 1829 1830 if (timestamp == 0) 1831 continue; 1832 1833 completion_delay = rte_get_timer_cycles() - timestamp; 1834 if (completion_delay > adapter->missing_tx_completion_to) { 1835 if (unlikely(!tx_buf->print_once)) { 1836 PMD_TX_LOG(WARNING, 1837 "Found a Tx that wasn't completed on time, qid %d, index %d. " 1838 "Missing Tx outstanding for %" PRIu64 " msecs.\n", 1839 tx_ring->id, i, completion_delay / 1840 rte_get_timer_hz() * 1000); 1841 tx_buf->print_once = true; 1842 } 1843 ++missed_tx; 1844 } 1845 } 1846 1847 if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) { 1848 PMD_DRV_LOG(ERR, 1849 "The number of lost Tx completions is above the threshold (%d > %d). " 1850 "Trigger the device reset.\n", 1851 missed_tx, 1852 tx_ring->missing_tx_completion_threshold); 1853 adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL; 1854 adapter->trigger_reset = true; 1855 rc = -EIO; 1856 } 1857 1858 tx_ring->tx_stats.missed_tx += missed_tx; 1859 1860 return rc; 1861 } 1862 1863 static void check_for_tx_completions(struct ena_adapter *adapter) 1864 { 1865 struct ena_ring *tx_ring; 1866 uint64_t tx_cleanup_delay; 1867 size_t qid; 1868 int budget; 1869 uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues; 1870 1871 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) 1872 return; 1873 1874 nb_tx_queues = adapter->edev_data->nb_tx_queues; 1875 budget = adapter->missing_tx_completion_budget; 1876 1877 qid = adapter->last_tx_comp_qid; 1878 while (budget-- > 0) { 1879 tx_ring = &adapter->tx_ring[qid]; 1880 1881 /* Tx cleanup is called only by the burst function and can be 1882 * called dynamically by the application. Also cleanup is 1883 * limited by the threshold. To avoid false detection of the 1884 * missing HW Tx completion, get the delay since last cleanup 1885 * function was called. 1886 */ 1887 tx_cleanup_delay = rte_get_timer_cycles() - 1888 tx_ring->last_cleanup_ticks; 1889 if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay) 1890 check_for_tx_completion_in_queue(adapter, tx_ring); 1891 qid = (qid + 1) % nb_tx_queues; 1892 } 1893 1894 adapter->last_tx_comp_qid = qid; 1895 } 1896 1897 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer, 1898 void *arg) 1899 { 1900 struct rte_eth_dev *dev = arg; 1901 struct ena_adapter *adapter = dev->data->dev_private; 1902 1903 if (unlikely(adapter->trigger_reset)) 1904 return; 1905 1906 check_for_missing_keep_alive(adapter); 1907 check_for_admin_com_state(adapter); 1908 check_for_tx_completions(adapter); 1909 1910 if (unlikely(adapter->trigger_reset)) { 1911 PMD_DRV_LOG(ERR, "Trigger reset is on\n"); 1912 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, 1913 NULL); 1914 } 1915 } 1916 1917 static inline void 1918 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 1919 struct ena_admin_feature_llq_desc *llq, 1920 bool use_large_llq_hdr) 1921 { 1922 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 1923 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 1924 llq_config->llq_num_decs_before_header = 1925 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 1926 1927 if (use_large_llq_hdr && 1928 (llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B)) { 1929 llq_config->llq_ring_entry_size = 1930 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 1931 llq_config->llq_ring_entry_size_value = 256; 1932 } else { 1933 llq_config->llq_ring_entry_size = 1934 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 1935 llq_config->llq_ring_entry_size_value = 128; 1936 } 1937 } 1938 1939 static int 1940 ena_set_queues_placement_policy(struct ena_adapter *adapter, 1941 struct ena_com_dev *ena_dev, 1942 struct ena_admin_feature_llq_desc *llq, 1943 struct ena_llq_configurations *llq_default_configurations) 1944 { 1945 int rc; 1946 u32 llq_feature_mask; 1947 1948 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 1949 if (!(ena_dev->supported_features & llq_feature_mask)) { 1950 PMD_DRV_LOG(INFO, 1951 "LLQ is not supported. Fallback to host mode policy.\n"); 1952 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1953 return 0; 1954 } 1955 1956 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 1957 if (unlikely(rc)) { 1958 PMD_INIT_LOG(WARNING, 1959 "Failed to config dev mode. Fallback to host mode policy.\n"); 1960 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1961 return 0; 1962 } 1963 1964 /* Nothing to config, exit */ 1965 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 1966 return 0; 1967 1968 if (!adapter->dev_mem_base) { 1969 PMD_DRV_LOG(ERR, 1970 "Unable to access LLQ BAR resource. Fallback to host mode policy.\n"); 1971 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1972 return 0; 1973 } 1974 1975 ena_dev->mem_bar = adapter->dev_mem_base; 1976 1977 return 0; 1978 } 1979 1980 static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev, 1981 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1982 { 1983 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 1984 1985 /* Regular queues capabilities */ 1986 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1987 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1988 &get_feat_ctx->max_queue_ext.max_queue_ext; 1989 io_rx_num = RTE_MIN(max_queue_ext->max_rx_sq_num, 1990 max_queue_ext->max_rx_cq_num); 1991 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 1992 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 1993 } else { 1994 struct ena_admin_queue_feature_desc *max_queues = 1995 &get_feat_ctx->max_queues; 1996 io_tx_sq_num = max_queues->max_sq_num; 1997 io_tx_cq_num = max_queues->max_cq_num; 1998 io_rx_num = RTE_MIN(io_tx_sq_num, io_tx_cq_num); 1999 } 2000 2001 /* In case of LLQ use the llq number in the get feature cmd */ 2002 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 2003 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 2004 2005 max_num_io_queues = RTE_MIN(ENA_MAX_NUM_IO_QUEUES, io_rx_num); 2006 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_sq_num); 2007 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_cq_num); 2008 2009 if (unlikely(max_num_io_queues == 0)) { 2010 PMD_DRV_LOG(ERR, "Number of IO queues cannot not be 0\n"); 2011 return -EFAULT; 2012 } 2013 2014 return max_num_io_queues; 2015 } 2016 2017 static void 2018 ena_set_offloads(struct ena_offloads *offloads, 2019 struct ena_admin_feature_offload_desc *offload_desc) 2020 { 2021 if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) 2022 offloads->tx_offloads |= ENA_IPV4_TSO; 2023 2024 /* Tx IPv4 checksum offloads */ 2025 if (offload_desc->tx & 2026 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) 2027 offloads->tx_offloads |= ENA_L3_IPV4_CSUM; 2028 if (offload_desc->tx & 2029 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK) 2030 offloads->tx_offloads |= ENA_L4_IPV4_CSUM; 2031 if (offload_desc->tx & 2032 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) 2033 offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL; 2034 2035 /* Tx IPv6 checksum offloads */ 2036 if (offload_desc->tx & 2037 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK) 2038 offloads->tx_offloads |= ENA_L4_IPV6_CSUM; 2039 if (offload_desc->tx & 2040 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) 2041 offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL; 2042 2043 /* Rx IPv4 checksum offloads */ 2044 if (offload_desc->rx_supported & 2045 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK) 2046 offloads->rx_offloads |= ENA_L3_IPV4_CSUM; 2047 if (offload_desc->rx_supported & 2048 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) 2049 offloads->rx_offloads |= ENA_L4_IPV4_CSUM; 2050 2051 /* Rx IPv6 checksum offloads */ 2052 if (offload_desc->rx_supported & 2053 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) 2054 offloads->rx_offloads |= ENA_L4_IPV6_CSUM; 2055 2056 if (offload_desc->rx_supported & 2057 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK) 2058 offloads->rx_offloads |= ENA_RX_RSS_HASH; 2059 } 2060 2061 static int ena_init_once(void) 2062 { 2063 static bool init_done; 2064 2065 if (init_done) 2066 return 0; 2067 2068 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 2069 /* Init timer subsystem for the ENA timer service. */ 2070 rte_timer_subsystem_init(); 2071 /* Register handler for requests from secondary processes. */ 2072 rte_mp_action_register(ENA_MP_NAME, ena_mp_primary_handle); 2073 } 2074 2075 init_done = true; 2076 return 0; 2077 } 2078 2079 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) 2080 { 2081 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 2082 struct rte_pci_device *pci_dev; 2083 struct rte_intr_handle *intr_handle; 2084 struct ena_adapter *adapter = eth_dev->data->dev_private; 2085 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2086 struct ena_com_dev_get_features_ctx get_feat_ctx; 2087 struct ena_llq_configurations llq_config; 2088 const char *queue_type_str; 2089 uint32_t max_num_io_queues; 2090 int rc; 2091 static int adapters_found; 2092 bool disable_meta_caching; 2093 2094 eth_dev->dev_ops = &ena_dev_ops; 2095 eth_dev->rx_pkt_burst = ð_ena_recv_pkts; 2096 eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; 2097 eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; 2098 2099 rc = ena_init_once(); 2100 if (rc != 0) 2101 return rc; 2102 2103 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2104 return 0; 2105 2106 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 2107 2108 memset(adapter, 0, sizeof(struct ena_adapter)); 2109 ena_dev = &adapter->ena_dev; 2110 2111 adapter->edev_data = eth_dev->data; 2112 2113 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 2114 2115 PMD_INIT_LOG(INFO, "Initializing %x:%x:%x.%d\n", 2116 pci_dev->addr.domain, 2117 pci_dev->addr.bus, 2118 pci_dev->addr.devid, 2119 pci_dev->addr.function); 2120 2121 intr_handle = pci_dev->intr_handle; 2122 2123 adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr; 2124 adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr; 2125 2126 if (!adapter->regs) { 2127 PMD_INIT_LOG(CRIT, "Failed to access registers BAR(%d)\n", 2128 ENA_REGS_BAR); 2129 return -ENXIO; 2130 } 2131 2132 ena_dev->reg_bar = adapter->regs; 2133 /* Pass device data as a pointer which can be passed to the IO functions 2134 * by the ena_com (for example - the memory allocation). 2135 */ 2136 ena_dev->dmadev = eth_dev->data; 2137 2138 adapter->id_number = adapters_found; 2139 2140 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", 2141 adapter->id_number); 2142 2143 adapter->missing_tx_completion_to = ENA_TX_TIMEOUT; 2144 2145 rc = ena_parse_devargs(adapter, pci_dev->device.devargs); 2146 if (rc != 0) { 2147 PMD_INIT_LOG(CRIT, "Failed to parse devargs\n"); 2148 goto err; 2149 } 2150 2151 /* device specific initialization routine */ 2152 rc = ena_device_init(adapter, pci_dev, &get_feat_ctx); 2153 if (rc) { 2154 PMD_INIT_LOG(CRIT, "Failed to init ENA device\n"); 2155 goto err; 2156 } 2157 2158 /* Check if device supports LSC */ 2159 if (!(adapter->all_aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) 2160 adapter->edev_data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; 2161 2162 set_default_llq_configurations(&llq_config, &get_feat_ctx.llq, 2163 adapter->use_large_llq_hdr); 2164 rc = ena_set_queues_placement_policy(adapter, ena_dev, 2165 &get_feat_ctx.llq, &llq_config); 2166 if (unlikely(rc)) { 2167 PMD_INIT_LOG(CRIT, "Failed to set placement policy\n"); 2168 return rc; 2169 } 2170 2171 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 2172 queue_type_str = "Regular"; 2173 else 2174 queue_type_str = "Low latency"; 2175 PMD_DRV_LOG(INFO, "Placement policy: %s\n", queue_type_str); 2176 2177 calc_queue_ctx.ena_dev = ena_dev; 2178 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 2179 2180 max_num_io_queues = ena_calc_max_io_queue_num(ena_dev, &get_feat_ctx); 2181 rc = ena_calc_io_queue_size(&calc_queue_ctx, 2182 adapter->use_large_llq_hdr); 2183 if (unlikely((rc != 0) || (max_num_io_queues == 0))) { 2184 rc = -EFAULT; 2185 goto err_device_destroy; 2186 } 2187 2188 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 2189 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 2190 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 2191 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 2192 adapter->max_num_io_queues = max_num_io_queues; 2193 2194 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2195 disable_meta_caching = 2196 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags & 2197 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 2198 } else { 2199 disable_meta_caching = false; 2200 } 2201 2202 /* prepare ring structures */ 2203 ena_init_rings(adapter, disable_meta_caching); 2204 2205 ena_config_debug_area(adapter); 2206 2207 /* Set max MTU for this device */ 2208 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 2209 2210 ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload); 2211 2212 /* Copy MAC address and point DPDK to it */ 2213 eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr; 2214 rte_ether_addr_copy((struct rte_ether_addr *) 2215 get_feat_ctx.dev_attr.mac_addr, 2216 (struct rte_ether_addr *)adapter->mac_addr); 2217 2218 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); 2219 if (unlikely(rc != 0)) { 2220 PMD_DRV_LOG(ERR, "Failed to initialize RSS in ENA device\n"); 2221 goto err_delete_debug_area; 2222 } 2223 2224 adapter->drv_stats = rte_zmalloc("adapter stats", 2225 sizeof(*adapter->drv_stats), 2226 RTE_CACHE_LINE_SIZE); 2227 if (!adapter->drv_stats) { 2228 PMD_DRV_LOG(ERR, 2229 "Failed to allocate memory for adapter statistics\n"); 2230 rc = -ENOMEM; 2231 goto err_rss_destroy; 2232 } 2233 2234 rte_spinlock_init(&adapter->admin_lock); 2235 2236 rte_intr_callback_register(intr_handle, 2237 ena_interrupt_handler_rte, 2238 eth_dev); 2239 rte_intr_enable(intr_handle); 2240 ena_com_set_admin_polling_mode(ena_dev, false); 2241 ena_com_admin_aenq_enable(ena_dev); 2242 2243 rte_timer_init(&adapter->timer_wd); 2244 2245 adapters_found++; 2246 adapter->state = ENA_ADAPTER_STATE_INIT; 2247 2248 return 0; 2249 2250 err_rss_destroy: 2251 ena_com_rss_destroy(ena_dev); 2252 err_delete_debug_area: 2253 ena_com_delete_debug_area(ena_dev); 2254 2255 err_device_destroy: 2256 ena_com_delete_host_info(ena_dev); 2257 ena_com_admin_destroy(ena_dev); 2258 2259 err: 2260 return rc; 2261 } 2262 2263 static void ena_destroy_device(struct rte_eth_dev *eth_dev) 2264 { 2265 struct ena_adapter *adapter = eth_dev->data->dev_private; 2266 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2267 2268 if (adapter->state == ENA_ADAPTER_STATE_FREE) 2269 return; 2270 2271 ena_com_set_admin_running_state(ena_dev, false); 2272 2273 if (adapter->state != ENA_ADAPTER_STATE_CLOSED) 2274 ena_close(eth_dev); 2275 2276 ena_com_rss_destroy(ena_dev); 2277 2278 ena_com_delete_debug_area(ena_dev); 2279 ena_com_delete_host_info(ena_dev); 2280 2281 ena_com_abort_admin_commands(ena_dev); 2282 ena_com_wait_for_abort_completion(ena_dev); 2283 ena_com_admin_destroy(ena_dev); 2284 ena_com_mmio_reg_read_request_destroy(ena_dev); 2285 2286 adapter->state = ENA_ADAPTER_STATE_FREE; 2287 } 2288 2289 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) 2290 { 2291 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2292 return 0; 2293 2294 ena_destroy_device(eth_dev); 2295 2296 return 0; 2297 } 2298 2299 static int ena_dev_configure(struct rte_eth_dev *dev) 2300 { 2301 struct ena_adapter *adapter = dev->data->dev_private; 2302 int rc; 2303 2304 adapter->state = ENA_ADAPTER_STATE_CONFIG; 2305 2306 if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 2307 dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2308 dev->data->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2309 2310 /* Scattered Rx cannot be turned off in the HW, so this capability must 2311 * be forced. 2312 */ 2313 dev->data->scattered_rx = 1; 2314 2315 adapter->last_tx_comp_qid = 0; 2316 2317 adapter->missing_tx_completion_budget = 2318 RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues); 2319 2320 /* To avoid detection of the spurious Tx completion timeout due to 2321 * application not calling the Tx cleanup function, set timeout for the 2322 * Tx queue which should be half of the missing completion timeout for a 2323 * safety. If there will be a lot of missing Tx completions in the 2324 * queue, they will be detected sooner or later. 2325 */ 2326 adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2; 2327 2328 rc = ena_configure_aenq(adapter); 2329 2330 return rc; 2331 } 2332 2333 static void ena_init_rings(struct ena_adapter *adapter, 2334 bool disable_meta_caching) 2335 { 2336 size_t i; 2337 2338 for (i = 0; i < adapter->max_num_io_queues; i++) { 2339 struct ena_ring *ring = &adapter->tx_ring[i]; 2340 2341 ring->configured = 0; 2342 ring->type = ENA_RING_TYPE_TX; 2343 ring->adapter = adapter; 2344 ring->id = i; 2345 ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type; 2346 ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size; 2347 ring->sgl_size = adapter->max_tx_sgl_size; 2348 ring->disable_meta_caching = disable_meta_caching; 2349 } 2350 2351 for (i = 0; i < adapter->max_num_io_queues; i++) { 2352 struct ena_ring *ring = &adapter->rx_ring[i]; 2353 2354 ring->configured = 0; 2355 ring->type = ENA_RING_TYPE_RX; 2356 ring->adapter = adapter; 2357 ring->id = i; 2358 ring->sgl_size = adapter->max_rx_sgl_size; 2359 } 2360 } 2361 2362 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter) 2363 { 2364 uint64_t port_offloads = 0; 2365 2366 if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM) 2367 port_offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; 2368 2369 if (adapter->offloads.rx_offloads & 2370 (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM)) 2371 port_offloads |= 2372 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM; 2373 2374 if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH) 2375 port_offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2376 2377 port_offloads |= RTE_ETH_RX_OFFLOAD_SCATTER; 2378 2379 return port_offloads; 2380 } 2381 2382 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter) 2383 { 2384 uint64_t port_offloads = 0; 2385 2386 if (adapter->offloads.tx_offloads & ENA_IPV4_TSO) 2387 port_offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 2388 2389 if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM) 2390 port_offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; 2391 if (adapter->offloads.tx_offloads & 2392 (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM | 2393 ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL)) 2394 port_offloads |= 2395 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM; 2396 2397 port_offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2398 2399 return port_offloads; 2400 } 2401 2402 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter) 2403 { 2404 RTE_SET_USED(adapter); 2405 2406 return 0; 2407 } 2408 2409 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter) 2410 { 2411 RTE_SET_USED(adapter); 2412 2413 return 0; 2414 } 2415 2416 static int ena_infos_get(struct rte_eth_dev *dev, 2417 struct rte_eth_dev_info *dev_info) 2418 { 2419 struct ena_adapter *adapter; 2420 struct ena_com_dev *ena_dev; 2421 2422 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 2423 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 2424 adapter = dev->data->dev_private; 2425 2426 ena_dev = &adapter->ena_dev; 2427 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 2428 2429 dev_info->speed_capa = 2430 RTE_ETH_LINK_SPEED_1G | 2431 RTE_ETH_LINK_SPEED_2_5G | 2432 RTE_ETH_LINK_SPEED_5G | 2433 RTE_ETH_LINK_SPEED_10G | 2434 RTE_ETH_LINK_SPEED_25G | 2435 RTE_ETH_LINK_SPEED_40G | 2436 RTE_ETH_LINK_SPEED_50G | 2437 RTE_ETH_LINK_SPEED_100G; 2438 2439 /* Inform framework about available features */ 2440 dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter); 2441 dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter); 2442 dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter); 2443 dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter); 2444 2445 dev_info->flow_type_rss_offloads = ENA_ALL_RSS_HF; 2446 dev_info->hash_key_size = ENA_HASH_KEY_SIZE; 2447 2448 dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN; 2449 dev_info->max_rx_pktlen = adapter->max_mtu + RTE_ETHER_HDR_LEN + 2450 RTE_ETHER_CRC_LEN; 2451 dev_info->min_mtu = ENA_MIN_MTU; 2452 dev_info->max_mtu = adapter->max_mtu; 2453 dev_info->max_mac_addrs = 1; 2454 2455 dev_info->max_rx_queues = adapter->max_num_io_queues; 2456 dev_info->max_tx_queues = adapter->max_num_io_queues; 2457 dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE; 2458 2459 dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size; 2460 dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2461 dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2462 adapter->max_rx_sgl_size); 2463 dev_info->rx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2464 adapter->max_rx_sgl_size); 2465 2466 dev_info->tx_desc_lim.nb_max = adapter->max_tx_ring_size; 2467 dev_info->tx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2468 dev_info->tx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2469 adapter->max_tx_sgl_size); 2470 dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2471 adapter->max_tx_sgl_size); 2472 2473 dev_info->default_rxportconf.ring_size = ENA_DEFAULT_RING_SIZE; 2474 dev_info->default_txportconf.ring_size = ENA_DEFAULT_RING_SIZE; 2475 2476 return 0; 2477 } 2478 2479 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len) 2480 { 2481 mbuf->data_len = len; 2482 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 2483 mbuf->refcnt = 1; 2484 mbuf->next = NULL; 2485 } 2486 2487 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 2488 struct ena_com_rx_buf_info *ena_bufs, 2489 uint32_t descs, 2490 uint16_t *next_to_clean, 2491 uint8_t offset) 2492 { 2493 struct rte_mbuf *mbuf; 2494 struct rte_mbuf *mbuf_head; 2495 struct ena_rx_buffer *rx_info; 2496 int rc; 2497 uint16_t ntc, len, req_id, buf = 0; 2498 2499 if (unlikely(descs == 0)) 2500 return NULL; 2501 2502 ntc = *next_to_clean; 2503 2504 len = ena_bufs[buf].len; 2505 req_id = ena_bufs[buf].req_id; 2506 2507 rx_info = &rx_ring->rx_buffer_info[req_id]; 2508 2509 mbuf = rx_info->mbuf; 2510 RTE_ASSERT(mbuf != NULL); 2511 2512 ena_init_rx_mbuf(mbuf, len); 2513 2514 /* Fill the mbuf head with the data specific for 1st segment. */ 2515 mbuf_head = mbuf; 2516 mbuf_head->nb_segs = descs; 2517 mbuf_head->port = rx_ring->port_id; 2518 mbuf_head->pkt_len = len; 2519 mbuf_head->data_off += offset; 2520 2521 rx_info->mbuf = NULL; 2522 rx_ring->empty_rx_reqs[ntc] = req_id; 2523 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2524 2525 while (--descs) { 2526 ++buf; 2527 len = ena_bufs[buf].len; 2528 req_id = ena_bufs[buf].req_id; 2529 2530 rx_info = &rx_ring->rx_buffer_info[req_id]; 2531 RTE_ASSERT(rx_info->mbuf != NULL); 2532 2533 if (unlikely(len == 0)) { 2534 /* 2535 * Some devices can pass descriptor with the length 0. 2536 * To avoid confusion, the PMD is simply putting the 2537 * descriptor back, as it was never used. We'll avoid 2538 * mbuf allocation that way. 2539 */ 2540 rc = ena_add_single_rx_desc(rx_ring->ena_com_io_sq, 2541 rx_info->mbuf, req_id); 2542 if (unlikely(rc != 0)) { 2543 /* Free the mbuf in case of an error. */ 2544 rte_mbuf_raw_free(rx_info->mbuf); 2545 } else { 2546 /* 2547 * If there was no error, just exit the loop as 2548 * 0 length descriptor is always the last one. 2549 */ 2550 break; 2551 } 2552 } else { 2553 /* Create an mbuf chain. */ 2554 mbuf->next = rx_info->mbuf; 2555 mbuf = mbuf->next; 2556 2557 ena_init_rx_mbuf(mbuf, len); 2558 mbuf_head->pkt_len += len; 2559 } 2560 2561 /* 2562 * Mark the descriptor as depleted and perform necessary 2563 * cleanup. 2564 * This code will execute in two cases: 2565 * 1. Descriptor len was greater than 0 - normal situation. 2566 * 2. Descriptor len was 0 and we failed to add the descriptor 2567 * to the device. In that situation, we should try to add 2568 * the mbuf again in the populate routine and mark the 2569 * descriptor as used up by the device. 2570 */ 2571 rx_info->mbuf = NULL; 2572 rx_ring->empty_rx_reqs[ntc] = req_id; 2573 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2574 } 2575 2576 *next_to_clean = ntc; 2577 2578 return mbuf_head; 2579 } 2580 2581 static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 2582 uint16_t nb_pkts) 2583 { 2584 struct ena_ring *rx_ring = (struct ena_ring *)(rx_queue); 2585 unsigned int free_queue_entries; 2586 uint16_t next_to_clean = rx_ring->next_to_clean; 2587 uint16_t descs_in_use; 2588 struct rte_mbuf *mbuf; 2589 uint16_t completed; 2590 struct ena_com_rx_ctx ena_rx_ctx; 2591 int i, rc = 0; 2592 bool fill_hash; 2593 2594 #ifdef RTE_ETHDEV_DEBUG_RX 2595 /* Check adapter state */ 2596 if (unlikely(rx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 2597 PMD_RX_LOG(ALERT, 2598 "Trying to receive pkts while device is NOT running\n"); 2599 return 0; 2600 } 2601 #endif 2602 2603 fill_hash = rx_ring->offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH; 2604 2605 descs_in_use = rx_ring->ring_size - 2606 ena_com_free_q_entries(rx_ring->ena_com_io_sq) - 1; 2607 nb_pkts = RTE_MIN(descs_in_use, nb_pkts); 2608 2609 for (completed = 0; completed < nb_pkts; completed++) { 2610 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 2611 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 2612 ena_rx_ctx.descs = 0; 2613 ena_rx_ctx.pkt_offset = 0; 2614 /* receive packet context */ 2615 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, 2616 rx_ring->ena_com_io_sq, 2617 &ena_rx_ctx); 2618 if (unlikely(rc)) { 2619 PMD_RX_LOG(ERR, 2620 "Failed to get the packet from the device, rc: %d\n", 2621 rc); 2622 if (rc == ENA_COM_NO_SPACE) { 2623 ++rx_ring->rx_stats.bad_desc_num; 2624 ena_trigger_reset(rx_ring->adapter, 2625 ENA_REGS_RESET_TOO_MANY_RX_DESCS); 2626 } else { 2627 ++rx_ring->rx_stats.bad_req_id; 2628 ena_trigger_reset(rx_ring->adapter, 2629 ENA_REGS_RESET_INV_RX_REQ_ID); 2630 } 2631 return 0; 2632 } 2633 2634 mbuf = ena_rx_mbuf(rx_ring, 2635 ena_rx_ctx.ena_bufs, 2636 ena_rx_ctx.descs, 2637 &next_to_clean, 2638 ena_rx_ctx.pkt_offset); 2639 if (unlikely(mbuf == NULL)) { 2640 for (i = 0; i < ena_rx_ctx.descs; ++i) { 2641 rx_ring->empty_rx_reqs[next_to_clean] = 2642 rx_ring->ena_bufs[i].req_id; 2643 next_to_clean = ENA_IDX_NEXT_MASKED( 2644 next_to_clean, rx_ring->size_mask); 2645 } 2646 break; 2647 } 2648 2649 /* fill mbuf attributes if any */ 2650 ena_rx_mbuf_prepare(rx_ring, mbuf, &ena_rx_ctx, fill_hash); 2651 2652 if (unlikely(mbuf->ol_flags & 2653 (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD))) 2654 rte_atomic64_inc(&rx_ring->adapter->drv_stats->ierrors); 2655 2656 rx_pkts[completed] = mbuf; 2657 rx_ring->rx_stats.bytes += mbuf->pkt_len; 2658 } 2659 2660 rx_ring->rx_stats.cnt += completed; 2661 rx_ring->next_to_clean = next_to_clean; 2662 2663 free_queue_entries = ena_com_free_q_entries(rx_ring->ena_com_io_sq); 2664 2665 /* Burst refill to save doorbells, memory barriers, const interval */ 2666 if (free_queue_entries >= rx_ring->rx_free_thresh) { 2667 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); 2668 ena_populate_rx_queue(rx_ring, free_queue_entries); 2669 } 2670 2671 return completed; 2672 } 2673 2674 static uint16_t 2675 eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 2676 uint16_t nb_pkts) 2677 { 2678 int32_t ret; 2679 uint32_t i; 2680 struct rte_mbuf *m; 2681 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 2682 struct ena_adapter *adapter = tx_ring->adapter; 2683 struct rte_ipv4_hdr *ip_hdr; 2684 uint64_t ol_flags; 2685 uint64_t l4_csum_flag; 2686 uint64_t dev_offload_capa; 2687 uint16_t frag_field; 2688 bool need_pseudo_csum; 2689 2690 dev_offload_capa = adapter->offloads.tx_offloads; 2691 for (i = 0; i != nb_pkts; i++) { 2692 m = tx_pkts[i]; 2693 ol_flags = m->ol_flags; 2694 2695 /* Check if any offload flag was set */ 2696 if (ol_flags == 0) 2697 continue; 2698 2699 l4_csum_flag = ol_flags & RTE_MBUF_F_TX_L4_MASK; 2700 /* SCTP checksum offload is not supported by the ENA. */ 2701 if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) || 2702 l4_csum_flag == RTE_MBUF_F_TX_SCTP_CKSUM) { 2703 PMD_TX_LOG(DEBUG, 2704 "mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64 "\n", 2705 i, ol_flags); 2706 rte_errno = ENOTSUP; 2707 return i; 2708 } 2709 2710 if (unlikely(m->nb_segs >= tx_ring->sgl_size && 2711 !(tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && 2712 m->nb_segs == tx_ring->sgl_size && 2713 m->data_len < tx_ring->tx_max_header_size))) { 2714 PMD_TX_LOG(DEBUG, 2715 "mbuf[%" PRIu32 "] has too many segments: %" PRIu16 "\n", 2716 i, m->nb_segs); 2717 rte_errno = EINVAL; 2718 return i; 2719 } 2720 2721 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 2722 /* Check if requested offload is also enabled for the queue */ 2723 if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2724 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) || 2725 (l4_csum_flag == RTE_MBUF_F_TX_TCP_CKSUM && 2726 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) || 2727 (l4_csum_flag == RTE_MBUF_F_TX_UDP_CKSUM && 2728 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM))) { 2729 PMD_TX_LOG(DEBUG, 2730 "mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]\n", 2731 i, m->nb_segs, tx_ring->id); 2732 rte_errno = EINVAL; 2733 return i; 2734 } 2735 2736 /* The caller is obligated to set l2 and l3 len if any cksum 2737 * offload is enabled. 2738 */ 2739 if (unlikely(ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK) && 2740 (m->l2_len == 0 || m->l3_len == 0))) { 2741 PMD_TX_LOG(DEBUG, 2742 "mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested\n", 2743 i); 2744 rte_errno = EINVAL; 2745 return i; 2746 } 2747 ret = rte_validate_tx_offload(m); 2748 if (ret != 0) { 2749 rte_errno = -ret; 2750 return i; 2751 } 2752 #endif 2753 2754 /* Verify HW support for requested offloads and determine if 2755 * pseudo header checksum is needed. 2756 */ 2757 need_pseudo_csum = false; 2758 if (ol_flags & RTE_MBUF_F_TX_IPV4) { 2759 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2760 !(dev_offload_capa & ENA_L3_IPV4_CSUM)) { 2761 rte_errno = ENOTSUP; 2762 return i; 2763 } 2764 2765 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG && 2766 !(dev_offload_capa & ENA_IPV4_TSO)) { 2767 rte_errno = ENOTSUP; 2768 return i; 2769 } 2770 2771 /* Check HW capabilities and if pseudo csum is needed 2772 * for L4 offloads. 2773 */ 2774 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2775 !(dev_offload_capa & ENA_L4_IPV4_CSUM)) { 2776 if (dev_offload_capa & 2777 ENA_L4_IPV4_CSUM_PARTIAL) { 2778 need_pseudo_csum = true; 2779 } else { 2780 rte_errno = ENOTSUP; 2781 return i; 2782 } 2783 } 2784 2785 /* Parse the DF flag */ 2786 ip_hdr = rte_pktmbuf_mtod_offset(m, 2787 struct rte_ipv4_hdr *, m->l2_len); 2788 frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset); 2789 if (frag_field & RTE_IPV4_HDR_DF_FLAG) { 2790 m->packet_type |= RTE_PTYPE_L4_NONFRAG; 2791 } else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2792 /* In case we are supposed to TSO and have DF 2793 * not set (DF=0) hardware must be provided with 2794 * partial checksum. 2795 */ 2796 need_pseudo_csum = true; 2797 } 2798 } else if (ol_flags & RTE_MBUF_F_TX_IPV6) { 2799 /* There is no support for IPv6 TSO as for now. */ 2800 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2801 rte_errno = ENOTSUP; 2802 return i; 2803 } 2804 2805 /* Check HW capabilities and if pseudo csum is needed */ 2806 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2807 !(dev_offload_capa & ENA_L4_IPV6_CSUM)) { 2808 if (dev_offload_capa & 2809 ENA_L4_IPV6_CSUM_PARTIAL) { 2810 need_pseudo_csum = true; 2811 } else { 2812 rte_errno = ENOTSUP; 2813 return i; 2814 } 2815 } 2816 } 2817 2818 if (need_pseudo_csum) { 2819 ret = rte_net_intel_cksum_flags_prepare(m, ol_flags); 2820 if (ret != 0) { 2821 rte_errno = -ret; 2822 return i; 2823 } 2824 } 2825 } 2826 2827 return i; 2828 } 2829 2830 static void ena_update_hints(struct ena_adapter *adapter, 2831 struct ena_admin_ena_hw_hints *hints) 2832 { 2833 if (hints->admin_completion_tx_timeout) 2834 adapter->ena_dev.admin_queue.completion_timeout = 2835 hints->admin_completion_tx_timeout * 1000; 2836 2837 if (hints->mmio_read_timeout) 2838 /* convert to usec */ 2839 adapter->ena_dev.mmio_read.reg_read_to = 2840 hints->mmio_read_timeout * 1000; 2841 2842 if (hints->driver_watchdog_timeout) { 2843 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 2844 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 2845 else 2846 // Convert msecs to ticks 2847 adapter->keep_alive_timeout = 2848 (hints->driver_watchdog_timeout * 2849 rte_get_timer_hz()) / 1000; 2850 } 2851 } 2852 2853 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 2854 struct ena_tx_buffer *tx_info, 2855 struct rte_mbuf *mbuf, 2856 void **push_header, 2857 uint16_t *header_len) 2858 { 2859 struct ena_com_buf *ena_buf; 2860 uint16_t delta, seg_len, push_len; 2861 2862 delta = 0; 2863 seg_len = mbuf->data_len; 2864 2865 tx_info->mbuf = mbuf; 2866 ena_buf = tx_info->bufs; 2867 2868 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2869 /* 2870 * Tx header might be (and will be in most cases) smaller than 2871 * tx_max_header_size. But it's not an issue to send more data 2872 * to the device, than actually needed if the mbuf size is 2873 * greater than tx_max_header_size. 2874 */ 2875 push_len = RTE_MIN(mbuf->pkt_len, tx_ring->tx_max_header_size); 2876 *header_len = push_len; 2877 2878 if (likely(push_len <= seg_len)) { 2879 /* If the push header is in the single segment, then 2880 * just point it to the 1st mbuf data. 2881 */ 2882 *push_header = rte_pktmbuf_mtod(mbuf, uint8_t *); 2883 } else { 2884 /* If the push header lays in the several segments, copy 2885 * it to the intermediate buffer. 2886 */ 2887 rte_pktmbuf_read(mbuf, 0, push_len, 2888 tx_ring->push_buf_intermediate_buf); 2889 *push_header = tx_ring->push_buf_intermediate_buf; 2890 delta = push_len - seg_len; 2891 } 2892 } else { 2893 *push_header = NULL; 2894 *header_len = 0; 2895 push_len = 0; 2896 } 2897 2898 /* Process first segment taking into consideration pushed header */ 2899 if (seg_len > push_len) { 2900 ena_buf->paddr = mbuf->buf_iova + 2901 mbuf->data_off + 2902 push_len; 2903 ena_buf->len = seg_len - push_len; 2904 ena_buf++; 2905 tx_info->num_of_bufs++; 2906 } 2907 2908 while ((mbuf = mbuf->next) != NULL) { 2909 seg_len = mbuf->data_len; 2910 2911 /* Skip mbufs if whole data is pushed as a header */ 2912 if (unlikely(delta > seg_len)) { 2913 delta -= seg_len; 2914 continue; 2915 } 2916 2917 ena_buf->paddr = mbuf->buf_iova + mbuf->data_off + delta; 2918 ena_buf->len = seg_len - delta; 2919 ena_buf++; 2920 tx_info->num_of_bufs++; 2921 2922 delta = 0; 2923 } 2924 } 2925 2926 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) 2927 { 2928 struct ena_tx_buffer *tx_info; 2929 struct ena_com_tx_ctx ena_tx_ctx = { { 0 } }; 2930 uint16_t next_to_use; 2931 uint16_t header_len; 2932 uint16_t req_id; 2933 void *push_header; 2934 int nb_hw_desc; 2935 int rc; 2936 2937 /* Checking for space for 2 additional metadata descriptors due to 2938 * possible header split and metadata descriptor 2939 */ 2940 if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 2941 mbuf->nb_segs + 2)) { 2942 PMD_DRV_LOG(DEBUG, "Not enough space in the tx queue\n"); 2943 return ENA_COM_NO_MEM; 2944 } 2945 2946 next_to_use = tx_ring->next_to_use; 2947 2948 req_id = tx_ring->empty_tx_reqs[next_to_use]; 2949 tx_info = &tx_ring->tx_buffer_info[req_id]; 2950 tx_info->num_of_bufs = 0; 2951 RTE_ASSERT(tx_info->mbuf == NULL); 2952 2953 ena_tx_map_mbuf(tx_ring, tx_info, mbuf, &push_header, &header_len); 2954 2955 ena_tx_ctx.ena_bufs = tx_info->bufs; 2956 ena_tx_ctx.push_header = push_header; 2957 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 2958 ena_tx_ctx.req_id = req_id; 2959 ena_tx_ctx.header_len = header_len; 2960 2961 /* Set Tx offloads flags, if applicable */ 2962 ena_tx_mbuf_prepare(mbuf, &ena_tx_ctx, tx_ring->offloads, 2963 tx_ring->disable_meta_caching); 2964 2965 if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, 2966 &ena_tx_ctx))) { 2967 PMD_TX_LOG(DEBUG, 2968 "LLQ Tx max burst size of queue %d achieved, writing doorbell to send burst\n", 2969 tx_ring->id); 2970 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 2971 tx_ring->tx_stats.doorbells++; 2972 tx_ring->pkts_without_db = false; 2973 } 2974 2975 /* prepare the packet's descriptors to dma engine */ 2976 rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, 2977 &nb_hw_desc); 2978 if (unlikely(rc)) { 2979 PMD_DRV_LOG(ERR, "Failed to prepare Tx buffers, rc: %d\n", rc); 2980 ++tx_ring->tx_stats.prepare_ctx_err; 2981 ena_trigger_reset(tx_ring->adapter, 2982 ENA_REGS_RESET_DRIVER_INVALID_STATE); 2983 return rc; 2984 } 2985 2986 tx_info->tx_descs = nb_hw_desc; 2987 tx_info->timestamp = rte_get_timer_cycles(); 2988 2989 tx_ring->tx_stats.cnt++; 2990 tx_ring->tx_stats.bytes += mbuf->pkt_len; 2991 2992 tx_ring->next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, 2993 tx_ring->size_mask); 2994 2995 return 0; 2996 } 2997 2998 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt) 2999 { 3000 struct ena_ring *tx_ring = (struct ena_ring *)txp; 3001 unsigned int total_tx_descs = 0; 3002 unsigned int total_tx_pkts = 0; 3003 uint16_t cleanup_budget; 3004 uint16_t next_to_clean = tx_ring->next_to_clean; 3005 3006 /* 3007 * If free_pkt_cnt is equal to 0, it means that the user requested 3008 * full cleanup, so attempt to release all Tx descriptors 3009 * (ring_size - 1 -> size_mask) 3010 */ 3011 cleanup_budget = (free_pkt_cnt == 0) ? tx_ring->size_mask : free_pkt_cnt; 3012 3013 while (likely(total_tx_pkts < cleanup_budget)) { 3014 struct rte_mbuf *mbuf; 3015 struct ena_tx_buffer *tx_info; 3016 uint16_t req_id; 3017 3018 if (ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id) != 0) 3019 break; 3020 3021 if (unlikely(validate_tx_req_id(tx_ring, req_id) != 0)) 3022 break; 3023 3024 /* Get Tx info & store how many descs were processed */ 3025 tx_info = &tx_ring->tx_buffer_info[req_id]; 3026 tx_info->timestamp = 0; 3027 3028 mbuf = tx_info->mbuf; 3029 rte_pktmbuf_free(mbuf); 3030 3031 tx_info->mbuf = NULL; 3032 tx_ring->empty_tx_reqs[next_to_clean] = req_id; 3033 3034 total_tx_descs += tx_info->tx_descs; 3035 total_tx_pkts++; 3036 3037 /* Put back descriptor to the ring for reuse */ 3038 next_to_clean = ENA_IDX_NEXT_MASKED(next_to_clean, 3039 tx_ring->size_mask); 3040 } 3041 3042 if (likely(total_tx_descs > 0)) { 3043 /* acknowledge completion of sent packets */ 3044 tx_ring->next_to_clean = next_to_clean; 3045 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs); 3046 ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); 3047 } 3048 3049 /* Notify completion handler that full cleanup was performed */ 3050 if (free_pkt_cnt == 0 || total_tx_pkts < cleanup_budget) 3051 tx_ring->last_cleanup_ticks = rte_get_timer_cycles(); 3052 3053 return total_tx_pkts; 3054 } 3055 3056 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 3057 uint16_t nb_pkts) 3058 { 3059 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 3060 int available_desc; 3061 uint16_t sent_idx = 0; 3062 3063 #ifdef RTE_ETHDEV_DEBUG_TX 3064 /* Check adapter state */ 3065 if (unlikely(tx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 3066 PMD_TX_LOG(ALERT, 3067 "Trying to xmit pkts while device is NOT running\n"); 3068 return 0; 3069 } 3070 #endif 3071 3072 available_desc = ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3073 if (available_desc < tx_ring->tx_free_thresh) 3074 ena_tx_cleanup((void *)tx_ring, 0); 3075 3076 for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) { 3077 if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx])) 3078 break; 3079 tx_ring->pkts_without_db = true; 3080 rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4, 3081 tx_ring->size_mask)]); 3082 } 3083 3084 /* If there are ready packets to be xmitted... */ 3085 if (likely(tx_ring->pkts_without_db)) { 3086 /* ...let HW do its best :-) */ 3087 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3088 tx_ring->tx_stats.doorbells++; 3089 tx_ring->pkts_without_db = false; 3090 } 3091 3092 tx_ring->tx_stats.available_desc = 3093 ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3094 tx_ring->tx_stats.tx_poll++; 3095 3096 return sent_idx; 3097 } 3098 3099 int ena_copy_eni_stats(struct ena_adapter *adapter, struct ena_stats_eni *stats) 3100 { 3101 int rc; 3102 3103 rte_spinlock_lock(&adapter->admin_lock); 3104 /* Retrieve and store the latest statistics from the AQ. This ensures 3105 * that previous value is returned in case of a com error. 3106 */ 3107 rc = ENA_PROXY(adapter, ena_com_get_eni_stats, &adapter->ena_dev, 3108 (struct ena_admin_eni_stats *)stats); 3109 rte_spinlock_unlock(&adapter->admin_lock); 3110 if (rc != 0) { 3111 if (rc == ENA_COM_UNSUPPORTED) { 3112 PMD_DRV_LOG(DEBUG, 3113 "Retrieving ENI metrics is not supported\n"); 3114 } else { 3115 PMD_DRV_LOG(WARNING, 3116 "Failed to get ENI metrics, rc: %d\n", rc); 3117 } 3118 return rc; 3119 } 3120 3121 return 0; 3122 } 3123 3124 /** 3125 * DPDK callback to retrieve names of extended device statistics 3126 * 3127 * @param dev 3128 * Pointer to Ethernet device structure. 3129 * @param[out] xstats_names 3130 * Buffer to insert names into. 3131 * @param n 3132 * Number of names. 3133 * 3134 * @return 3135 * Number of xstats names. 3136 */ 3137 static int ena_xstats_get_names(struct rte_eth_dev *dev, 3138 struct rte_eth_xstat_name *xstats_names, 3139 unsigned int n) 3140 { 3141 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3142 unsigned int stat, i, count = 0; 3143 3144 if (n < xstats_count || !xstats_names) 3145 return xstats_count; 3146 3147 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) 3148 strcpy(xstats_names[count].name, 3149 ena_stats_global_strings[stat].name); 3150 3151 for (stat = 0; stat < ENA_STATS_ARRAY_ENI; stat++, count++) 3152 strcpy(xstats_names[count].name, 3153 ena_stats_eni_strings[stat].name); 3154 3155 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) 3156 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) 3157 snprintf(xstats_names[count].name, 3158 sizeof(xstats_names[count].name), 3159 "rx_q%d_%s", i, 3160 ena_stats_rx_strings[stat].name); 3161 3162 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) 3163 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) 3164 snprintf(xstats_names[count].name, 3165 sizeof(xstats_names[count].name), 3166 "tx_q%d_%s", i, 3167 ena_stats_tx_strings[stat].name); 3168 3169 return xstats_count; 3170 } 3171 3172 /** 3173 * DPDK callback to retrieve names of extended device statistics for the given 3174 * ids. 3175 * 3176 * @param dev 3177 * Pointer to Ethernet device structure. 3178 * @param[out] xstats_names 3179 * Buffer to insert names into. 3180 * @param ids 3181 * IDs array for which the names should be retrieved. 3182 * @param size 3183 * Number of ids. 3184 * 3185 * @return 3186 * Positive value: number of xstats names. Negative value: error code. 3187 */ 3188 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 3189 const uint64_t *ids, 3190 struct rte_eth_xstat_name *xstats_names, 3191 unsigned int size) 3192 { 3193 uint64_t xstats_count = ena_xstats_calc_num(dev->data); 3194 uint64_t id, qid; 3195 unsigned int i; 3196 3197 if (xstats_names == NULL) 3198 return xstats_count; 3199 3200 for (i = 0; i < size; ++i) { 3201 id = ids[i]; 3202 if (id > xstats_count) { 3203 PMD_DRV_LOG(ERR, 3204 "ID value out of range: id=%" PRIu64 ", xstats_num=%" PRIu64 "\n", 3205 id, xstats_count); 3206 return -EINVAL; 3207 } 3208 3209 if (id < ENA_STATS_ARRAY_GLOBAL) { 3210 strcpy(xstats_names[i].name, 3211 ena_stats_global_strings[id].name); 3212 continue; 3213 } 3214 3215 id -= ENA_STATS_ARRAY_GLOBAL; 3216 if (id < ENA_STATS_ARRAY_ENI) { 3217 strcpy(xstats_names[i].name, 3218 ena_stats_eni_strings[id].name); 3219 continue; 3220 } 3221 3222 id -= ENA_STATS_ARRAY_ENI; 3223 if (id < ENA_STATS_ARRAY_RX) { 3224 qid = id / dev->data->nb_rx_queues; 3225 id %= dev->data->nb_rx_queues; 3226 snprintf(xstats_names[i].name, 3227 sizeof(xstats_names[i].name), 3228 "rx_q%" PRIu64 "d_%s", 3229 qid, ena_stats_rx_strings[id].name); 3230 continue; 3231 } 3232 3233 id -= ENA_STATS_ARRAY_RX; 3234 /* Although this condition is not needed, it was added for 3235 * compatibility if new xstat structure would be ever added. 3236 */ 3237 if (id < ENA_STATS_ARRAY_TX) { 3238 qid = id / dev->data->nb_tx_queues; 3239 id %= dev->data->nb_tx_queues; 3240 snprintf(xstats_names[i].name, 3241 sizeof(xstats_names[i].name), 3242 "tx_q%" PRIu64 "_%s", 3243 qid, ena_stats_tx_strings[id].name); 3244 continue; 3245 } 3246 } 3247 3248 return i; 3249 } 3250 3251 /** 3252 * DPDK callback to get extended device statistics. 3253 * 3254 * @param dev 3255 * Pointer to Ethernet device structure. 3256 * @param[out] stats 3257 * Stats table output buffer. 3258 * @param n 3259 * The size of the stats table. 3260 * 3261 * @return 3262 * Number of xstats on success, negative on failure. 3263 */ 3264 static int ena_xstats_get(struct rte_eth_dev *dev, 3265 struct rte_eth_xstat *xstats, 3266 unsigned int n) 3267 { 3268 struct ena_adapter *adapter = dev->data->dev_private; 3269 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3270 struct ena_stats_eni eni_stats; 3271 unsigned int stat, i, count = 0; 3272 int stat_offset; 3273 void *stats_begin; 3274 3275 if (n < xstats_count) 3276 return xstats_count; 3277 3278 if (!xstats) 3279 return 0; 3280 3281 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) { 3282 stat_offset = ena_stats_global_strings[stat].stat_offset; 3283 stats_begin = &adapter->dev_stats; 3284 3285 xstats[count].id = count; 3286 xstats[count].value = *((uint64_t *) 3287 ((char *)stats_begin + stat_offset)); 3288 } 3289 3290 /* Even if the function below fails, we should copy previous (or initial 3291 * values) to keep structure of rte_eth_xstat consistent. 3292 */ 3293 ena_copy_eni_stats(adapter, &eni_stats); 3294 for (stat = 0; stat < ENA_STATS_ARRAY_ENI; stat++, count++) { 3295 stat_offset = ena_stats_eni_strings[stat].stat_offset; 3296 stats_begin = &eni_stats; 3297 3298 xstats[count].id = count; 3299 xstats[count].value = *((uint64_t *) 3300 ((char *)stats_begin + stat_offset)); 3301 } 3302 3303 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) { 3304 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) { 3305 stat_offset = ena_stats_rx_strings[stat].stat_offset; 3306 stats_begin = &adapter->rx_ring[i].rx_stats; 3307 3308 xstats[count].id = count; 3309 xstats[count].value = *((uint64_t *) 3310 ((char *)stats_begin + stat_offset)); 3311 } 3312 } 3313 3314 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) { 3315 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) { 3316 stat_offset = ena_stats_tx_strings[stat].stat_offset; 3317 stats_begin = &adapter->tx_ring[i].rx_stats; 3318 3319 xstats[count].id = count; 3320 xstats[count].value = *((uint64_t *) 3321 ((char *)stats_begin + stat_offset)); 3322 } 3323 } 3324 3325 return count; 3326 } 3327 3328 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 3329 const uint64_t *ids, 3330 uint64_t *values, 3331 unsigned int n) 3332 { 3333 struct ena_adapter *adapter = dev->data->dev_private; 3334 struct ena_stats_eni eni_stats; 3335 uint64_t id; 3336 uint64_t rx_entries, tx_entries; 3337 unsigned int i; 3338 int qid; 3339 int valid = 0; 3340 bool was_eni_copied = false; 3341 3342 for (i = 0; i < n; ++i) { 3343 id = ids[i]; 3344 /* Check if id belongs to global statistics */ 3345 if (id < ENA_STATS_ARRAY_GLOBAL) { 3346 values[i] = *((uint64_t *)&adapter->dev_stats + id); 3347 ++valid; 3348 continue; 3349 } 3350 3351 /* Check if id belongs to ENI statistics */ 3352 id -= ENA_STATS_ARRAY_GLOBAL; 3353 if (id < ENA_STATS_ARRAY_ENI) { 3354 /* Avoid reading ENI stats multiple times in a single 3355 * function call, as it requires communication with the 3356 * admin queue. 3357 */ 3358 if (!was_eni_copied) { 3359 was_eni_copied = true; 3360 ena_copy_eni_stats(adapter, &eni_stats); 3361 } 3362 values[i] = *((uint64_t *)&eni_stats + id); 3363 ++valid; 3364 continue; 3365 } 3366 3367 /* Check if id belongs to rx queue statistics */ 3368 id -= ENA_STATS_ARRAY_ENI; 3369 rx_entries = ENA_STATS_ARRAY_RX * dev->data->nb_rx_queues; 3370 if (id < rx_entries) { 3371 qid = id % dev->data->nb_rx_queues; 3372 id /= dev->data->nb_rx_queues; 3373 values[i] = *((uint64_t *) 3374 &adapter->rx_ring[qid].rx_stats + id); 3375 ++valid; 3376 continue; 3377 } 3378 /* Check if id belongs to rx queue statistics */ 3379 id -= rx_entries; 3380 tx_entries = ENA_STATS_ARRAY_TX * dev->data->nb_tx_queues; 3381 if (id < tx_entries) { 3382 qid = id % dev->data->nb_tx_queues; 3383 id /= dev->data->nb_tx_queues; 3384 values[i] = *((uint64_t *) 3385 &adapter->tx_ring[qid].tx_stats + id); 3386 ++valid; 3387 continue; 3388 } 3389 } 3390 3391 return valid; 3392 } 3393 3394 static int ena_process_uint_devarg(const char *key, 3395 const char *value, 3396 void *opaque) 3397 { 3398 struct ena_adapter *adapter = opaque; 3399 char *str_end; 3400 uint64_t uint_value; 3401 3402 uint_value = strtoull(value, &str_end, 10); 3403 if (value == str_end) { 3404 PMD_INIT_LOG(ERR, 3405 "Invalid value for key '%s'. Only uint values are accepted.\n", 3406 key); 3407 return -EINVAL; 3408 } 3409 3410 if (strcmp(key, ENA_DEVARG_MISS_TXC_TO) == 0) { 3411 if (uint_value > ENA_MAX_TX_TIMEOUT_SECONDS) { 3412 PMD_INIT_LOG(ERR, 3413 "Tx timeout too high: %" PRIu64 " sec. Maximum allowed: %d sec.\n", 3414 uint_value, ENA_MAX_TX_TIMEOUT_SECONDS); 3415 return -EINVAL; 3416 } else if (uint_value == 0) { 3417 PMD_INIT_LOG(INFO, 3418 "Check for missing Tx completions has been disabled.\n"); 3419 adapter->missing_tx_completion_to = 3420 ENA_HW_HINTS_NO_TIMEOUT; 3421 } else { 3422 PMD_INIT_LOG(INFO, 3423 "Tx packet completion timeout set to %" PRIu64 " seconds.\n", 3424 uint_value); 3425 adapter->missing_tx_completion_to = 3426 uint_value * rte_get_timer_hz(); 3427 } 3428 } 3429 3430 return 0; 3431 } 3432 3433 static int ena_process_bool_devarg(const char *key, 3434 const char *value, 3435 void *opaque) 3436 { 3437 struct ena_adapter *adapter = opaque; 3438 bool bool_value; 3439 3440 /* Parse the value. */ 3441 if (strcmp(value, "1") == 0) { 3442 bool_value = true; 3443 } else if (strcmp(value, "0") == 0) { 3444 bool_value = false; 3445 } else { 3446 PMD_INIT_LOG(ERR, 3447 "Invalid value: '%s' for key '%s'. Accepted: '0' or '1'\n", 3448 value, key); 3449 return -EINVAL; 3450 } 3451 3452 /* Now, assign it to the proper adapter field. */ 3453 if (strcmp(key, ENA_DEVARG_LARGE_LLQ_HDR) == 0) 3454 adapter->use_large_llq_hdr = bool_value; 3455 3456 return 0; 3457 } 3458 3459 static int ena_parse_devargs(struct ena_adapter *adapter, 3460 struct rte_devargs *devargs) 3461 { 3462 static const char * const allowed_args[] = { 3463 ENA_DEVARG_LARGE_LLQ_HDR, 3464 ENA_DEVARG_MISS_TXC_TO, 3465 NULL, 3466 }; 3467 struct rte_kvargs *kvlist; 3468 int rc; 3469 3470 if (devargs == NULL) 3471 return 0; 3472 3473 kvlist = rte_kvargs_parse(devargs->args, allowed_args); 3474 if (kvlist == NULL) { 3475 PMD_INIT_LOG(ERR, "Invalid device arguments: %s\n", 3476 devargs->args); 3477 return -EINVAL; 3478 } 3479 3480 rc = rte_kvargs_process(kvlist, ENA_DEVARG_LARGE_LLQ_HDR, 3481 ena_process_bool_devarg, adapter); 3482 if (rc != 0) 3483 goto exit; 3484 rc = rte_kvargs_process(kvlist, ENA_DEVARG_MISS_TXC_TO, 3485 ena_process_uint_devarg, adapter); 3486 3487 exit: 3488 rte_kvargs_free(kvlist); 3489 3490 return rc; 3491 } 3492 3493 static int ena_setup_rx_intr(struct rte_eth_dev *dev) 3494 { 3495 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 3496 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 3497 int rc; 3498 uint16_t vectors_nb, i; 3499 bool rx_intr_requested = dev->data->dev_conf.intr_conf.rxq; 3500 3501 if (!rx_intr_requested) 3502 return 0; 3503 3504 if (!rte_intr_cap_multiple(intr_handle)) { 3505 PMD_DRV_LOG(ERR, 3506 "Rx interrupt requested, but it isn't supported by the PCI driver\n"); 3507 return -ENOTSUP; 3508 } 3509 3510 /* Disable interrupt mapping before the configuration starts. */ 3511 rte_intr_disable(intr_handle); 3512 3513 /* Verify if there are enough vectors available. */ 3514 vectors_nb = dev->data->nb_rx_queues; 3515 if (vectors_nb > RTE_MAX_RXTX_INTR_VEC_ID) { 3516 PMD_DRV_LOG(ERR, 3517 "Too many Rx interrupts requested, maximum number: %d\n", 3518 RTE_MAX_RXTX_INTR_VEC_ID); 3519 rc = -ENOTSUP; 3520 goto enable_intr; 3521 } 3522 3523 /* Allocate the vector list */ 3524 if (rte_intr_vec_list_alloc(intr_handle, "intr_vec", 3525 dev->data->nb_rx_queues)) { 3526 PMD_DRV_LOG(ERR, 3527 "Failed to allocate interrupt vector for %d queues\n", 3528 dev->data->nb_rx_queues); 3529 rc = -ENOMEM; 3530 goto enable_intr; 3531 } 3532 3533 rc = rte_intr_efd_enable(intr_handle, vectors_nb); 3534 if (rc != 0) 3535 goto free_intr_vec; 3536 3537 if (!rte_intr_allow_others(intr_handle)) { 3538 PMD_DRV_LOG(ERR, 3539 "Not enough interrupts available to use both ENA Admin and Rx interrupts\n"); 3540 goto disable_intr_efd; 3541 } 3542 3543 for (i = 0; i < vectors_nb; ++i) 3544 if (rte_intr_vec_list_index_set(intr_handle, i, 3545 RTE_INTR_VEC_RXTX_OFFSET + i)) 3546 goto disable_intr_efd; 3547 3548 rte_intr_enable(intr_handle); 3549 return 0; 3550 3551 disable_intr_efd: 3552 rte_intr_efd_disable(intr_handle); 3553 free_intr_vec: 3554 rte_intr_vec_list_free(intr_handle); 3555 enable_intr: 3556 rte_intr_enable(intr_handle); 3557 return rc; 3558 } 3559 3560 static void ena_rx_queue_intr_set(struct rte_eth_dev *dev, 3561 uint16_t queue_id, 3562 bool unmask) 3563 { 3564 struct ena_adapter *adapter = dev->data->dev_private; 3565 struct ena_ring *rxq = &adapter->rx_ring[queue_id]; 3566 struct ena_eth_io_intr_reg intr_reg; 3567 3568 ena_com_update_intr_reg(&intr_reg, 0, 0, unmask); 3569 ena_com_unmask_intr(rxq->ena_com_io_cq, &intr_reg); 3570 } 3571 3572 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 3573 uint16_t queue_id) 3574 { 3575 ena_rx_queue_intr_set(dev, queue_id, true); 3576 3577 return 0; 3578 } 3579 3580 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 3581 uint16_t queue_id) 3582 { 3583 ena_rx_queue_intr_set(dev, queue_id, false); 3584 3585 return 0; 3586 } 3587 3588 static int ena_configure_aenq(struct ena_adapter *adapter) 3589 { 3590 uint32_t aenq_groups = adapter->all_aenq_groups; 3591 int rc; 3592 3593 /* All_aenq_groups holds all AENQ functions supported by the device and 3594 * the HW, so at first we need to be sure the LSC request is valid. 3595 */ 3596 if (adapter->edev_data->dev_conf.intr_conf.lsc != 0) { 3597 if (!(aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) { 3598 PMD_DRV_LOG(ERR, 3599 "LSC requested, but it's not supported by the AENQ\n"); 3600 return -EINVAL; 3601 } 3602 } else { 3603 /* If LSC wasn't enabled by the app, let's enable all supported 3604 * AENQ procedures except the LSC. 3605 */ 3606 aenq_groups &= ~BIT(ENA_ADMIN_LINK_CHANGE); 3607 } 3608 3609 rc = ena_com_set_aenq_config(&adapter->ena_dev, aenq_groups); 3610 if (rc != 0) { 3611 PMD_DRV_LOG(ERR, "Cannot configure AENQ groups, rc=%d\n", rc); 3612 return rc; 3613 } 3614 3615 adapter->active_aenq_groups = aenq_groups; 3616 3617 return 0; 3618 } 3619 3620 int ena_mp_indirect_table_set(struct ena_adapter *adapter) 3621 { 3622 return ENA_PROXY(adapter, ena_com_indirect_table_set, &adapter->ena_dev); 3623 } 3624 3625 int ena_mp_indirect_table_get(struct ena_adapter *adapter, 3626 uint32_t *indirect_table) 3627 { 3628 return ENA_PROXY(adapter, ena_com_indirect_table_get, &adapter->ena_dev, 3629 indirect_table); 3630 } 3631 3632 /********************************************************************* 3633 * ena_plat_dpdk.h functions implementations 3634 *********************************************************************/ 3635 3636 const struct rte_memzone * 3637 ena_mem_alloc_coherent(struct rte_eth_dev_data *data, size_t size, 3638 int socket_id, unsigned int alignment, void **virt_addr, 3639 dma_addr_t *phys_addr) 3640 { 3641 char z_name[RTE_MEMZONE_NAMESIZE]; 3642 struct ena_adapter *adapter = data->dev_private; 3643 const struct rte_memzone *memzone; 3644 int rc; 3645 3646 rc = snprintf(z_name, RTE_MEMZONE_NAMESIZE, "ena_p%d_mz%" PRIu64 "", 3647 data->port_id, adapter->memzone_cnt); 3648 if (rc >= RTE_MEMZONE_NAMESIZE) { 3649 PMD_DRV_LOG(ERR, 3650 "Name for the ena_com memzone is too long. Port: %d, mz_num: %" PRIu64 "\n", 3651 data->port_id, adapter->memzone_cnt); 3652 goto error; 3653 } 3654 adapter->memzone_cnt++; 3655 3656 memzone = rte_memzone_reserve_aligned(z_name, size, socket_id, 3657 RTE_MEMZONE_IOVA_CONTIG, alignment); 3658 if (memzone == NULL) { 3659 PMD_DRV_LOG(ERR, "Failed to allocate ena_com memzone: %s\n", 3660 z_name); 3661 goto error; 3662 } 3663 3664 memset(memzone->addr, 0, size); 3665 *virt_addr = memzone->addr; 3666 *phys_addr = memzone->iova; 3667 3668 return memzone; 3669 3670 error: 3671 *virt_addr = NULL; 3672 *phys_addr = 0; 3673 3674 return NULL; 3675 } 3676 3677 3678 /********************************************************************* 3679 * PMD configuration 3680 *********************************************************************/ 3681 static int eth_ena_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 3682 struct rte_pci_device *pci_dev) 3683 { 3684 return rte_eth_dev_pci_generic_probe(pci_dev, 3685 sizeof(struct ena_adapter), eth_ena_dev_init); 3686 } 3687 3688 static int eth_ena_pci_remove(struct rte_pci_device *pci_dev) 3689 { 3690 return rte_eth_dev_pci_generic_remove(pci_dev, eth_ena_dev_uninit); 3691 } 3692 3693 static struct rte_pci_driver rte_ena_pmd = { 3694 .id_table = pci_id_ena_map, 3695 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | 3696 RTE_PCI_DRV_WC_ACTIVATE, 3697 .probe = eth_ena_pci_probe, 3698 .remove = eth_ena_pci_remove, 3699 }; 3700 3701 RTE_PMD_REGISTER_PCI(net_ena, rte_ena_pmd); 3702 RTE_PMD_REGISTER_PCI_TABLE(net_ena, pci_id_ena_map); 3703 RTE_PMD_REGISTER_KMOD_DEP(net_ena, "* igb_uio | uio_pci_generic | vfio-pci"); 3704 RTE_PMD_REGISTER_PARAM_STRING(net_ena, ENA_DEVARG_LARGE_LLQ_HDR "=<0|1>"); 3705 RTE_LOG_REGISTER_SUFFIX(ena_logtype_init, init, NOTICE); 3706 RTE_LOG_REGISTER_SUFFIX(ena_logtype_driver, driver, NOTICE); 3707 #ifdef RTE_ETHDEV_DEBUG_RX 3708 RTE_LOG_REGISTER_SUFFIX(ena_logtype_rx, rx, DEBUG); 3709 #endif 3710 #ifdef RTE_ETHDEV_DEBUG_TX 3711 RTE_LOG_REGISTER_SUFFIX(ena_logtype_tx, tx, DEBUG); 3712 #endif 3713 RTE_LOG_REGISTER_SUFFIX(ena_logtype_com, com, WARNING); 3714 3715 /****************************************************************************** 3716 ******************************** AENQ Handlers ******************************* 3717 *****************************************************************************/ 3718 static void ena_update_on_link_change(void *adapter_data, 3719 struct ena_admin_aenq_entry *aenq_e) 3720 { 3721 struct rte_eth_dev *eth_dev = adapter_data; 3722 struct ena_adapter *adapter = eth_dev->data->dev_private; 3723 struct ena_admin_aenq_link_change_desc *aenq_link_desc; 3724 uint32_t status; 3725 3726 aenq_link_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 3727 3728 status = get_ena_admin_aenq_link_change_desc_link_status(aenq_link_desc); 3729 adapter->link_status = status; 3730 3731 ena_link_update(eth_dev, 0); 3732 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 3733 } 3734 3735 static void ena_notification(void *adapter_data, 3736 struct ena_admin_aenq_entry *aenq_e) 3737 { 3738 struct rte_eth_dev *eth_dev = adapter_data; 3739 struct ena_adapter *adapter = eth_dev->data->dev_private; 3740 struct ena_admin_ena_hw_hints *hints; 3741 3742 if (aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION) 3743 PMD_DRV_LOG(WARNING, "Invalid AENQ group: %x. Expected: %x\n", 3744 aenq_e->aenq_common_desc.group, 3745 ENA_ADMIN_NOTIFICATION); 3746 3747 switch (aenq_e->aenq_common_desc.syndrome) { 3748 case ENA_ADMIN_UPDATE_HINTS: 3749 hints = (struct ena_admin_ena_hw_hints *) 3750 (&aenq_e->inline_data_w4); 3751 ena_update_hints(adapter, hints); 3752 break; 3753 default: 3754 PMD_DRV_LOG(ERR, "Invalid AENQ notification link state: %d\n", 3755 aenq_e->aenq_common_desc.syndrome); 3756 } 3757 } 3758 3759 static void ena_keep_alive(void *adapter_data, 3760 __rte_unused struct ena_admin_aenq_entry *aenq_e) 3761 { 3762 struct rte_eth_dev *eth_dev = adapter_data; 3763 struct ena_adapter *adapter = eth_dev->data->dev_private; 3764 struct ena_admin_aenq_keep_alive_desc *desc; 3765 uint64_t rx_drops; 3766 uint64_t tx_drops; 3767 3768 adapter->timestamp_wd = rte_get_timer_cycles(); 3769 3770 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 3771 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 3772 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 3773 3774 adapter->drv_stats->rx_drops = rx_drops; 3775 adapter->dev_stats.tx_drops = tx_drops; 3776 } 3777 3778 /** 3779 * This handler will called for unknown event group or unimplemented handlers 3780 **/ 3781 static void unimplemented_aenq_handler(__rte_unused void *data, 3782 __rte_unused struct ena_admin_aenq_entry *aenq_e) 3783 { 3784 PMD_DRV_LOG(ERR, 3785 "Unknown event was received or event with unimplemented handler\n"); 3786 } 3787 3788 static struct ena_aenq_handlers aenq_handlers = { 3789 .handlers = { 3790 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 3791 [ENA_ADMIN_NOTIFICATION] = ena_notification, 3792 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive 3793 }, 3794 .unimplemented_handler = unimplemented_aenq_handler 3795 }; 3796 3797 /********************************************************************* 3798 * Multi-Process communication request handling (in primary) 3799 *********************************************************************/ 3800 static int 3801 ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer) 3802 { 3803 const struct ena_mp_body *req = 3804 (const struct ena_mp_body *)mp_msg->param; 3805 struct ena_adapter *adapter; 3806 struct ena_com_dev *ena_dev; 3807 struct ena_mp_body *rsp; 3808 struct rte_mp_msg mp_rsp; 3809 struct rte_eth_dev *dev; 3810 int res = 0; 3811 3812 rsp = (struct ena_mp_body *)&mp_rsp.param; 3813 mp_msg_init(&mp_rsp, req->type, req->port_id); 3814 3815 if (!rte_eth_dev_is_valid_port(req->port_id)) { 3816 rte_errno = ENODEV; 3817 res = -rte_errno; 3818 PMD_DRV_LOG(ERR, "Unknown port %d in request %d\n", 3819 req->port_id, req->type); 3820 goto end; 3821 } 3822 dev = &rte_eth_devices[req->port_id]; 3823 adapter = dev->data->dev_private; 3824 ena_dev = &adapter->ena_dev; 3825 3826 switch (req->type) { 3827 case ENA_MP_DEV_STATS_GET: 3828 res = ena_com_get_dev_basic_stats(ena_dev, 3829 &adapter->basic_stats); 3830 break; 3831 case ENA_MP_ENI_STATS_GET: 3832 res = ena_com_get_eni_stats(ena_dev, 3833 (struct ena_admin_eni_stats *)&adapter->eni_stats); 3834 break; 3835 case ENA_MP_MTU_SET: 3836 res = ena_com_set_dev_mtu(ena_dev, req->args.mtu); 3837 break; 3838 case ENA_MP_IND_TBL_GET: 3839 res = ena_com_indirect_table_get(ena_dev, 3840 adapter->indirect_table); 3841 break; 3842 case ENA_MP_IND_TBL_SET: 3843 res = ena_com_indirect_table_set(ena_dev); 3844 break; 3845 default: 3846 PMD_DRV_LOG(ERR, "Unknown request type %d\n", req->type); 3847 res = -EINVAL; 3848 break; 3849 } 3850 3851 end: 3852 /* Save processing result in the reply */ 3853 rsp->result = res; 3854 /* Return just IPC processing status */ 3855 return rte_mp_reply(&mp_rsp, peer); 3856 } 3857