1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 3 * All rights reserved. 4 */ 5 6 #include <rte_string_fns.h> 7 #include <rte_errno.h> 8 #include <rte_version.h> 9 #include <rte_net.h> 10 #include <rte_kvargs.h> 11 12 #include "ena_ethdev.h" 13 #include "ena_logs.h" 14 #include "ena_platform.h" 15 #include "ena_com.h" 16 #include "ena_eth_com.h" 17 18 #include <ena_common_defs.h> 19 #include <ena_regs_defs.h> 20 #include <ena_admin_defs.h> 21 #include <ena_eth_io_defs.h> 22 23 #define DRV_MODULE_VER_MAJOR 2 24 #define DRV_MODULE_VER_MINOR 6 25 #define DRV_MODULE_VER_SUBMINOR 0 26 27 #define __MERGE_64B_H_L(h, l) (((uint64_t)h << 32) | l) 28 29 #define GET_L4_HDR_LEN(mbuf) \ 30 ((rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, \ 31 mbuf->l3_len + mbuf->l2_len)->data_off) >> 4) 32 33 #define ETH_GSTRING_LEN 32 34 35 #define ARRAY_SIZE(x) RTE_DIM(x) 36 37 #define ENA_MIN_RING_DESC 128 38 39 #define ENA_PTYPE_HAS_HASH (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP) 40 41 struct ena_stats { 42 char name[ETH_GSTRING_LEN]; 43 int stat_offset; 44 }; 45 46 #define ENA_STAT_ENTRY(stat, stat_type) { \ 47 .name = #stat, \ 48 .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ 49 } 50 51 #define ENA_STAT_RX_ENTRY(stat) \ 52 ENA_STAT_ENTRY(stat, rx) 53 54 #define ENA_STAT_TX_ENTRY(stat) \ 55 ENA_STAT_ENTRY(stat, tx) 56 57 #define ENA_STAT_ENI_ENTRY(stat) \ 58 ENA_STAT_ENTRY(stat, eni) 59 60 #define ENA_STAT_GLOBAL_ENTRY(stat) \ 61 ENA_STAT_ENTRY(stat, dev) 62 63 /* Device arguments */ 64 #define ENA_DEVARG_LARGE_LLQ_HDR "large_llq_hdr" 65 /* Timeout in seconds after which a single uncompleted Tx packet should be 66 * considered as a missing. 67 */ 68 #define ENA_DEVARG_MISS_TXC_TO "miss_txc_to" 69 70 /* 71 * Each rte_memzone should have unique name. 72 * To satisfy it, count number of allocation and add it to name. 73 */ 74 rte_atomic64_t ena_alloc_cnt; 75 76 static const struct ena_stats ena_stats_global_strings[] = { 77 ENA_STAT_GLOBAL_ENTRY(wd_expired), 78 ENA_STAT_GLOBAL_ENTRY(dev_start), 79 ENA_STAT_GLOBAL_ENTRY(dev_stop), 80 ENA_STAT_GLOBAL_ENTRY(tx_drops), 81 }; 82 83 static const struct ena_stats ena_stats_eni_strings[] = { 84 ENA_STAT_ENI_ENTRY(bw_in_allowance_exceeded), 85 ENA_STAT_ENI_ENTRY(bw_out_allowance_exceeded), 86 ENA_STAT_ENI_ENTRY(pps_allowance_exceeded), 87 ENA_STAT_ENI_ENTRY(conntrack_allowance_exceeded), 88 ENA_STAT_ENI_ENTRY(linklocal_allowance_exceeded), 89 }; 90 91 static const struct ena_stats ena_stats_tx_strings[] = { 92 ENA_STAT_TX_ENTRY(cnt), 93 ENA_STAT_TX_ENTRY(bytes), 94 ENA_STAT_TX_ENTRY(prepare_ctx_err), 95 ENA_STAT_TX_ENTRY(tx_poll), 96 ENA_STAT_TX_ENTRY(doorbells), 97 ENA_STAT_TX_ENTRY(bad_req_id), 98 ENA_STAT_TX_ENTRY(available_desc), 99 ENA_STAT_TX_ENTRY(missed_tx), 100 }; 101 102 static const struct ena_stats ena_stats_rx_strings[] = { 103 ENA_STAT_RX_ENTRY(cnt), 104 ENA_STAT_RX_ENTRY(bytes), 105 ENA_STAT_RX_ENTRY(refill_partial), 106 ENA_STAT_RX_ENTRY(l3_csum_bad), 107 ENA_STAT_RX_ENTRY(l4_csum_bad), 108 ENA_STAT_RX_ENTRY(l4_csum_good), 109 ENA_STAT_RX_ENTRY(mbuf_alloc_fail), 110 ENA_STAT_RX_ENTRY(bad_desc_num), 111 ENA_STAT_RX_ENTRY(bad_req_id), 112 }; 113 114 #define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) 115 #define ENA_STATS_ARRAY_ENI ARRAY_SIZE(ena_stats_eni_strings) 116 #define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) 117 #define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) 118 119 #define QUEUE_OFFLOADS (RTE_ETH_TX_OFFLOAD_TCP_CKSUM |\ 120 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |\ 121 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |\ 122 RTE_ETH_TX_OFFLOAD_TCP_TSO) 123 #define MBUF_OFFLOADS (RTE_MBUF_F_TX_L4_MASK |\ 124 RTE_MBUF_F_TX_IP_CKSUM |\ 125 RTE_MBUF_F_TX_TCP_SEG) 126 127 /** Vendor ID used by Amazon devices */ 128 #define PCI_VENDOR_ID_AMAZON 0x1D0F 129 /** Amazon devices */ 130 #define PCI_DEVICE_ID_ENA_VF 0xEC20 131 #define PCI_DEVICE_ID_ENA_VF_RSERV0 0xEC21 132 133 #define ENA_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_L4_MASK | \ 134 RTE_MBUF_F_TX_IPV6 | \ 135 RTE_MBUF_F_TX_IPV4 | \ 136 RTE_MBUF_F_TX_IP_CKSUM | \ 137 RTE_MBUF_F_TX_TCP_SEG) 138 139 #define ENA_TX_OFFLOAD_NOTSUP_MASK \ 140 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) 141 142 /** HW specific offloads capabilities. */ 143 /* IPv4 checksum offload. */ 144 #define ENA_L3_IPV4_CSUM 0x0001 145 /* TCP/UDP checksum offload for IPv4 packets. */ 146 #define ENA_L4_IPV4_CSUM 0x0002 147 /* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */ 148 #define ENA_L4_IPV4_CSUM_PARTIAL 0x0004 149 /* TCP/UDP checksum offload for IPv6 packets. */ 150 #define ENA_L4_IPV6_CSUM 0x0008 151 /* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */ 152 #define ENA_L4_IPV6_CSUM_PARTIAL 0x0010 153 /* TSO support for IPv4 packets. */ 154 #define ENA_IPV4_TSO 0x0020 155 156 /* Device supports setting RSS hash. */ 157 #define ENA_RX_RSS_HASH 0x0040 158 159 static const struct rte_pci_id pci_id_ena_map[] = { 160 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, 161 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) }, 162 { .device_id = 0 }, 163 }; 164 165 static struct ena_aenq_handlers aenq_handlers; 166 167 static int ena_device_init(struct ena_adapter *adapter, 168 struct rte_pci_device *pdev, 169 struct ena_com_dev_get_features_ctx *get_feat_ctx); 170 static int ena_dev_configure(struct rte_eth_dev *dev); 171 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 172 struct ena_tx_buffer *tx_info, 173 struct rte_mbuf *mbuf, 174 void **push_header, 175 uint16_t *header_len); 176 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf); 177 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt); 178 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 179 uint16_t nb_pkts); 180 static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 181 uint16_t nb_pkts); 182 static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 183 uint16_t nb_desc, unsigned int socket_id, 184 const struct rte_eth_txconf *tx_conf); 185 static int ena_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 186 uint16_t nb_desc, unsigned int socket_id, 187 const struct rte_eth_rxconf *rx_conf, 188 struct rte_mempool *mp); 189 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len); 190 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 191 struct ena_com_rx_buf_info *ena_bufs, 192 uint32_t descs, 193 uint16_t *next_to_clean, 194 uint8_t offset); 195 static uint16_t eth_ena_recv_pkts(void *rx_queue, 196 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 197 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 198 struct rte_mbuf *mbuf, uint16_t id); 199 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count); 200 static void ena_init_rings(struct ena_adapter *adapter, 201 bool disable_meta_caching); 202 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); 203 static int ena_start(struct rte_eth_dev *dev); 204 static int ena_stop(struct rte_eth_dev *dev); 205 static int ena_close(struct rte_eth_dev *dev); 206 static int ena_dev_reset(struct rte_eth_dev *dev); 207 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); 208 static void ena_rx_queue_release_all(struct rte_eth_dev *dev); 209 static void ena_tx_queue_release_all(struct rte_eth_dev *dev); 210 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 211 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 212 static void ena_rx_queue_release_bufs(struct ena_ring *ring); 213 static void ena_tx_queue_release_bufs(struct ena_ring *ring); 214 static int ena_link_update(struct rte_eth_dev *dev, 215 int wait_to_complete); 216 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring); 217 static void ena_queue_stop(struct ena_ring *ring); 218 static void ena_queue_stop_all(struct rte_eth_dev *dev, 219 enum ena_ring_type ring_type); 220 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring); 221 static int ena_queue_start_all(struct rte_eth_dev *dev, 222 enum ena_ring_type ring_type); 223 static void ena_stats_restart(struct rte_eth_dev *dev); 224 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter); 225 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter); 226 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter); 227 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter); 228 static int ena_infos_get(struct rte_eth_dev *dev, 229 struct rte_eth_dev_info *dev_info); 230 static void ena_interrupt_handler_rte(void *cb_arg); 231 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg); 232 static void ena_destroy_device(struct rte_eth_dev *eth_dev); 233 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev); 234 static int ena_xstats_get_names(struct rte_eth_dev *dev, 235 struct rte_eth_xstat_name *xstats_names, 236 unsigned int n); 237 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 238 const uint64_t *ids, 239 struct rte_eth_xstat_name *xstats_names, 240 unsigned int size); 241 static int ena_xstats_get(struct rte_eth_dev *dev, 242 struct rte_eth_xstat *stats, 243 unsigned int n); 244 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 245 const uint64_t *ids, 246 uint64_t *values, 247 unsigned int n); 248 static int ena_process_bool_devarg(const char *key, 249 const char *value, 250 void *opaque); 251 static int ena_parse_devargs(struct ena_adapter *adapter, 252 struct rte_devargs *devargs); 253 static int ena_copy_eni_stats(struct ena_adapter *adapter, 254 struct ena_stats_eni *stats); 255 static int ena_setup_rx_intr(struct rte_eth_dev *dev); 256 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 257 uint16_t queue_id); 258 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 259 uint16_t queue_id); 260 static int ena_configure_aenq(struct ena_adapter *adapter); 261 static int ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, 262 const void *peer); 263 264 static const struct eth_dev_ops ena_dev_ops = { 265 .dev_configure = ena_dev_configure, 266 .dev_infos_get = ena_infos_get, 267 .rx_queue_setup = ena_rx_queue_setup, 268 .tx_queue_setup = ena_tx_queue_setup, 269 .dev_start = ena_start, 270 .dev_stop = ena_stop, 271 .link_update = ena_link_update, 272 .stats_get = ena_stats_get, 273 .xstats_get_names = ena_xstats_get_names, 274 .xstats_get_names_by_id = ena_xstats_get_names_by_id, 275 .xstats_get = ena_xstats_get, 276 .xstats_get_by_id = ena_xstats_get_by_id, 277 .mtu_set = ena_mtu_set, 278 .rx_queue_release = ena_rx_queue_release, 279 .tx_queue_release = ena_tx_queue_release, 280 .dev_close = ena_close, 281 .dev_reset = ena_dev_reset, 282 .reta_update = ena_rss_reta_update, 283 .reta_query = ena_rss_reta_query, 284 .rx_queue_intr_enable = ena_rx_queue_intr_enable, 285 .rx_queue_intr_disable = ena_rx_queue_intr_disable, 286 .rss_hash_update = ena_rss_hash_update, 287 .rss_hash_conf_get = ena_rss_hash_conf_get, 288 .tx_done_cleanup = ena_tx_cleanup, 289 }; 290 291 /********************************************************************* 292 * Multi-Process communication bits 293 *********************************************************************/ 294 /* rte_mp IPC message name */ 295 #define ENA_MP_NAME "net_ena_mp" 296 /* Request timeout in seconds */ 297 #define ENA_MP_REQ_TMO 5 298 299 /** Proxy request type */ 300 enum ena_mp_req { 301 ENA_MP_DEV_STATS_GET, 302 ENA_MP_ENI_STATS_GET, 303 ENA_MP_MTU_SET, 304 ENA_MP_IND_TBL_GET, 305 ENA_MP_IND_TBL_SET 306 }; 307 308 /** Proxy message body. Shared between requests and responses. */ 309 struct ena_mp_body { 310 /* Message type */ 311 enum ena_mp_req type; 312 int port_id; 313 /* Processing result. Set in replies. 0 if message succeeded, negative 314 * error code otherwise. 315 */ 316 int result; 317 union { 318 int mtu; /* For ENA_MP_MTU_SET */ 319 } args; 320 }; 321 322 /** 323 * Initialize IPC message. 324 * 325 * @param[out] msg 326 * Pointer to the message to initialize. 327 * @param[in] type 328 * Message type. 329 * @param[in] port_id 330 * Port ID of target device. 331 * 332 */ 333 static void 334 mp_msg_init(struct rte_mp_msg *msg, enum ena_mp_req type, int port_id) 335 { 336 struct ena_mp_body *body = (struct ena_mp_body *)&msg->param; 337 338 memset(msg, 0, sizeof(*msg)); 339 strlcpy(msg->name, ENA_MP_NAME, sizeof(msg->name)); 340 msg->len_param = sizeof(*body); 341 body->type = type; 342 body->port_id = port_id; 343 } 344 345 /********************************************************************* 346 * Multi-Process communication PMD API 347 *********************************************************************/ 348 /** 349 * Define proxy request descriptor 350 * 351 * Used to define all structures and functions required for proxying a given 352 * function to the primary process including the code to perform to prepare the 353 * request and process the response. 354 * 355 * @param[in] f 356 * Name of the function to proxy 357 * @param[in] t 358 * Message type to use 359 * @param[in] prep 360 * Body of a function to prepare the request in form of a statement 361 * expression. It is passed all the original function arguments along with two 362 * extra ones: 363 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 364 * - struct ena_mp_body *req - body of a request to prepare. 365 * @param[in] proc 366 * Body of a function to process the response in form of a statement 367 * expression. It is passed all the original function arguments along with two 368 * extra ones: 369 * - struct ena_adapter *adapter - PMD data of the device calling the proxy. 370 * - struct ena_mp_body *rsp - body of a response to process. 371 * @param ... 372 * Proxied function's arguments 373 * 374 * @note Inside prep and proc any parameters which aren't used should be marked 375 * as such (with ENA_TOUCH or __rte_unused). 376 */ 377 #define ENA_PROXY_DESC(f, t, prep, proc, ...) \ 378 static const enum ena_mp_req mp_type_ ## f = t; \ 379 static const char *mp_name_ ## f = #t; \ 380 static void mp_prep_ ## f(struct ena_adapter *adapter, \ 381 struct ena_mp_body *req, \ 382 __VA_ARGS__) \ 383 { \ 384 prep; \ 385 } \ 386 static void mp_proc_ ## f(struct ena_adapter *adapter, \ 387 struct ena_mp_body *rsp, \ 388 __VA_ARGS__) \ 389 { \ 390 proc; \ 391 } 392 393 /** 394 * Proxy wrapper for calling primary functions in a secondary process. 395 * 396 * Depending on whether called in primary or secondary process, calls the 397 * @p func directly or proxies the call to the primary process via rte_mp IPC. 398 * This macro requires a proxy request descriptor to be defined for @p func 399 * using ENA_PROXY_DESC() macro. 400 * 401 * @param[in/out] a 402 * Device PMD data. Used for sending the message and sharing message results 403 * between primary and secondary. 404 * @param[in] f 405 * Function to proxy. 406 * @param ... 407 * Arguments of @p func. 408 * 409 * @return 410 * - 0: Processing succeeded and response handler was called. 411 * - -EPERM: IPC is unavailable on this platform. This means only primary 412 * process may call the proxied function. 413 * - -EIO: IPC returned error on request send. Inspect rte_errno detailed 414 * error code. 415 * - Negative error code from the proxied function. 416 * 417 * @note This mechanism is geared towards control-path tasks. Avoid calling it 418 * in fast-path unless unbound delays are allowed. This is due to the IPC 419 * mechanism itself (socket based). 420 * @note Due to IPC parameter size limitations the proxy logic shares call 421 * results through the struct ena_adapter shared memory. This makes the 422 * proxy mechanism strictly single-threaded. Therefore be sure to make all 423 * calls to the same proxied function under the same lock. 424 */ 425 #define ENA_PROXY(a, f, ...) \ 426 ({ \ 427 struct ena_adapter *_a = (a); \ 428 struct timespec ts = { .tv_sec = ENA_MP_REQ_TMO }; \ 429 struct ena_mp_body *req, *rsp; \ 430 struct rte_mp_reply mp_rep; \ 431 struct rte_mp_msg mp_req; \ 432 int ret; \ 433 \ 434 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { \ 435 ret = f(__VA_ARGS__); \ 436 } else { \ 437 /* Prepare and send request */ \ 438 req = (struct ena_mp_body *)&mp_req.param; \ 439 mp_msg_init(&mp_req, mp_type_ ## f, _a->edev_data->port_id); \ 440 mp_prep_ ## f(_a, req, ## __VA_ARGS__); \ 441 \ 442 ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); \ 443 if (likely(!ret)) { \ 444 RTE_ASSERT(mp_rep.nb_received == 1); \ 445 rsp = (struct ena_mp_body *)&mp_rep.msgs[0].param; \ 446 ret = rsp->result; \ 447 if (ret == 0) { \ 448 mp_proc_##f(_a, rsp, ## __VA_ARGS__); \ 449 } else { \ 450 PMD_DRV_LOG(ERR, \ 451 "%s returned error: %d\n", \ 452 mp_name_ ## f, rsp->result);\ 453 } \ 454 free(mp_rep.msgs); \ 455 } else if (rte_errno == ENOTSUP) { \ 456 PMD_DRV_LOG(ERR, \ 457 "No IPC, can't proxy to primary\n");\ 458 ret = -rte_errno; \ 459 } else { \ 460 PMD_DRV_LOG(ERR, "Request %s failed: %s\n", \ 461 mp_name_ ## f, \ 462 rte_strerror(rte_errno)); \ 463 ret = -EIO; \ 464 } \ 465 } \ 466 ret; \ 467 }) 468 469 /********************************************************************* 470 * Multi-Process communication request descriptors 471 *********************************************************************/ 472 473 ENA_PROXY_DESC(ena_com_get_dev_basic_stats, ENA_MP_DEV_STATS_GET, 474 ({ 475 ENA_TOUCH(adapter); 476 ENA_TOUCH(req); 477 ENA_TOUCH(ena_dev); 478 ENA_TOUCH(stats); 479 }), 480 ({ 481 ENA_TOUCH(rsp); 482 ENA_TOUCH(ena_dev); 483 if (stats != &adapter->basic_stats) 484 rte_memcpy(stats, &adapter->basic_stats, sizeof(*stats)); 485 }), 486 struct ena_com_dev *ena_dev, struct ena_admin_basic_stats *stats); 487 488 ENA_PROXY_DESC(ena_com_get_eni_stats, ENA_MP_ENI_STATS_GET, 489 ({ 490 ENA_TOUCH(adapter); 491 ENA_TOUCH(req); 492 ENA_TOUCH(ena_dev); 493 ENA_TOUCH(stats); 494 }), 495 ({ 496 ENA_TOUCH(rsp); 497 ENA_TOUCH(ena_dev); 498 if (stats != (struct ena_admin_eni_stats *)&adapter->eni_stats) 499 rte_memcpy(stats, &adapter->eni_stats, sizeof(*stats)); 500 }), 501 struct ena_com_dev *ena_dev, struct ena_admin_eni_stats *stats); 502 503 ENA_PROXY_DESC(ena_com_set_dev_mtu, ENA_MP_MTU_SET, 504 ({ 505 ENA_TOUCH(adapter); 506 ENA_TOUCH(ena_dev); 507 req->args.mtu = mtu; 508 }), 509 ({ 510 ENA_TOUCH(adapter); 511 ENA_TOUCH(rsp); 512 ENA_TOUCH(ena_dev); 513 ENA_TOUCH(mtu); 514 }), 515 struct ena_com_dev *ena_dev, int mtu); 516 517 ENA_PROXY_DESC(ena_com_indirect_table_set, ENA_MP_IND_TBL_SET, 518 ({ 519 ENA_TOUCH(adapter); 520 ENA_TOUCH(req); 521 ENA_TOUCH(ena_dev); 522 }), 523 ({ 524 ENA_TOUCH(adapter); 525 ENA_TOUCH(rsp); 526 ENA_TOUCH(ena_dev); 527 }), 528 struct ena_com_dev *ena_dev); 529 530 ENA_PROXY_DESC(ena_com_indirect_table_get, ENA_MP_IND_TBL_GET, 531 ({ 532 ENA_TOUCH(adapter); 533 ENA_TOUCH(req); 534 ENA_TOUCH(ena_dev); 535 ENA_TOUCH(ind_tbl); 536 }), 537 ({ 538 ENA_TOUCH(rsp); 539 ENA_TOUCH(ena_dev); 540 if (ind_tbl != adapter->indirect_table) 541 rte_memcpy(ind_tbl, adapter->indirect_table, 542 sizeof(adapter->indirect_table)); 543 }), 544 struct ena_com_dev *ena_dev, u32 *ind_tbl); 545 546 static inline void ena_trigger_reset(struct ena_adapter *adapter, 547 enum ena_regs_reset_reason_types reason) 548 { 549 if (likely(!adapter->trigger_reset)) { 550 adapter->reset_reason = reason; 551 adapter->trigger_reset = true; 552 } 553 } 554 555 static inline void ena_rx_mbuf_prepare(struct ena_ring *rx_ring, 556 struct rte_mbuf *mbuf, 557 struct ena_com_rx_ctx *ena_rx_ctx, 558 bool fill_hash) 559 { 560 struct ena_stats_rx *rx_stats = &rx_ring->rx_stats; 561 uint64_t ol_flags = 0; 562 uint32_t packet_type = 0; 563 564 if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) 565 packet_type |= RTE_PTYPE_L4_TCP; 566 else if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP) 567 packet_type |= RTE_PTYPE_L4_UDP; 568 569 if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) { 570 packet_type |= RTE_PTYPE_L3_IPV4; 571 if (unlikely(ena_rx_ctx->l3_csum_err)) { 572 ++rx_stats->l3_csum_bad; 573 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 574 } else { 575 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 576 } 577 } else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6) { 578 packet_type |= RTE_PTYPE_L3_IPV6; 579 } 580 581 if (!ena_rx_ctx->l4_csum_checked || ena_rx_ctx->frag) { 582 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 583 } else { 584 if (unlikely(ena_rx_ctx->l4_csum_err)) { 585 ++rx_stats->l4_csum_bad; 586 /* 587 * For the L4 Rx checksum offload the HW may indicate 588 * bad checksum although it's valid. Because of that, 589 * we're setting the UNKNOWN flag to let the app 590 * re-verify the checksum. 591 */ 592 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 593 } else { 594 ++rx_stats->l4_csum_good; 595 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 596 } 597 } 598 599 if (fill_hash && 600 likely((packet_type & ENA_PTYPE_HAS_HASH) && !ena_rx_ctx->frag)) { 601 ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 602 mbuf->hash.rss = ena_rx_ctx->hash; 603 } 604 605 mbuf->ol_flags = ol_flags; 606 mbuf->packet_type = packet_type; 607 } 608 609 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, 610 struct ena_com_tx_ctx *ena_tx_ctx, 611 uint64_t queue_offloads, 612 bool disable_meta_caching) 613 { 614 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; 615 616 if ((mbuf->ol_flags & MBUF_OFFLOADS) && 617 (queue_offloads & QUEUE_OFFLOADS)) { 618 /* check if TSO is required */ 619 if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) && 620 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)) { 621 ena_tx_ctx->tso_enable = true; 622 623 ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf); 624 } 625 626 /* check if L3 checksum is needed */ 627 if ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) && 628 (queue_offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) 629 ena_tx_ctx->l3_csum_enable = true; 630 631 if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV6) { 632 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 633 /* For the IPv6 packets, DF always needs to be true. */ 634 ena_tx_ctx->df = 1; 635 } else { 636 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 637 638 /* set don't fragment (DF) flag */ 639 if (mbuf->packet_type & 640 (RTE_PTYPE_L4_NONFRAG 641 | RTE_PTYPE_INNER_L4_NONFRAG)) 642 ena_tx_ctx->df = 1; 643 } 644 645 /* check if L4 checksum is needed */ 646 if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) && 647 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) { 648 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 649 ena_tx_ctx->l4_csum_enable = true; 650 } else if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 651 RTE_MBUF_F_TX_UDP_CKSUM) && 652 (queue_offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) { 653 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 654 ena_tx_ctx->l4_csum_enable = true; 655 } else { 656 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 657 ena_tx_ctx->l4_csum_enable = false; 658 } 659 660 ena_meta->mss = mbuf->tso_segsz; 661 ena_meta->l3_hdr_len = mbuf->l3_len; 662 ena_meta->l3_hdr_offset = mbuf->l2_len; 663 664 ena_tx_ctx->meta_valid = true; 665 } else if (disable_meta_caching) { 666 memset(ena_meta, 0, sizeof(*ena_meta)); 667 ena_tx_ctx->meta_valid = true; 668 } else { 669 ena_tx_ctx->meta_valid = false; 670 } 671 } 672 673 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 674 { 675 struct ena_tx_buffer *tx_info = NULL; 676 677 if (likely(req_id < tx_ring->ring_size)) { 678 tx_info = &tx_ring->tx_buffer_info[req_id]; 679 if (likely(tx_info->mbuf)) 680 return 0; 681 } 682 683 if (tx_info) 684 PMD_TX_LOG(ERR, "tx_info doesn't have valid mbuf. queue %d:%d req_id %u\n", 685 tx_ring->port_id, tx_ring->id, req_id); 686 else 687 PMD_TX_LOG(ERR, "Invalid req_id: %hu in queue %d:%d\n", 688 req_id, tx_ring->port_id, tx_ring->id); 689 690 /* Trigger device reset */ 691 ++tx_ring->tx_stats.bad_req_id; 692 ena_trigger_reset(tx_ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID); 693 return -EFAULT; 694 } 695 696 static void ena_config_host_info(struct ena_com_dev *ena_dev) 697 { 698 struct ena_admin_host_info *host_info; 699 int rc; 700 701 /* Allocate only the host info */ 702 rc = ena_com_allocate_host_info(ena_dev); 703 if (rc) { 704 PMD_DRV_LOG(ERR, "Cannot allocate host info\n"); 705 return; 706 } 707 708 host_info = ena_dev->host_attr.host_info; 709 710 host_info->os_type = ENA_ADMIN_OS_DPDK; 711 host_info->kernel_ver = RTE_VERSION; 712 strlcpy((char *)host_info->kernel_ver_str, rte_version(), 713 sizeof(host_info->kernel_ver_str)); 714 host_info->os_dist = RTE_VERSION; 715 strlcpy((char *)host_info->os_dist_str, rte_version(), 716 sizeof(host_info->os_dist_str)); 717 host_info->driver_version = 718 (DRV_MODULE_VER_MAJOR) | 719 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 720 (DRV_MODULE_VER_SUBMINOR << 721 ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 722 host_info->num_cpus = rte_lcore_count(); 723 724 host_info->driver_supported_features = 725 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 726 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 727 728 rc = ena_com_set_host_attributes(ena_dev); 729 if (rc) { 730 if (rc == -ENA_COM_UNSUPPORTED) 731 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 732 else 733 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 734 735 goto err; 736 } 737 738 return; 739 740 err: 741 ena_com_delete_host_info(ena_dev); 742 } 743 744 /* This function calculates the number of xstats based on the current config */ 745 static unsigned int ena_xstats_calc_num(struct rte_eth_dev_data *data) 746 { 747 return ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENI + 748 (data->nb_tx_queues * ENA_STATS_ARRAY_TX) + 749 (data->nb_rx_queues * ENA_STATS_ARRAY_RX); 750 } 751 752 static void ena_config_debug_area(struct ena_adapter *adapter) 753 { 754 u32 debug_area_size; 755 int rc, ss_count; 756 757 ss_count = ena_xstats_calc_num(adapter->edev_data); 758 759 /* allocate 32 bytes for each string and 64bit for the value */ 760 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; 761 762 rc = ena_com_allocate_debug_area(&adapter->ena_dev, debug_area_size); 763 if (rc) { 764 PMD_DRV_LOG(ERR, "Cannot allocate debug area\n"); 765 return; 766 } 767 768 rc = ena_com_set_host_attributes(&adapter->ena_dev); 769 if (rc) { 770 if (rc == -ENA_COM_UNSUPPORTED) 771 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 772 else 773 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 774 775 goto err; 776 } 777 778 return; 779 err: 780 ena_com_delete_debug_area(&adapter->ena_dev); 781 } 782 783 static int ena_close(struct rte_eth_dev *dev) 784 { 785 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 786 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 787 struct ena_adapter *adapter = dev->data->dev_private; 788 int ret = 0; 789 790 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 791 return 0; 792 793 if (adapter->state == ENA_ADAPTER_STATE_RUNNING) 794 ret = ena_stop(dev); 795 adapter->state = ENA_ADAPTER_STATE_CLOSED; 796 797 ena_rx_queue_release_all(dev); 798 ena_tx_queue_release_all(dev); 799 800 rte_free(adapter->drv_stats); 801 adapter->drv_stats = NULL; 802 803 rte_intr_disable(intr_handle); 804 rte_intr_callback_unregister(intr_handle, 805 ena_interrupt_handler_rte, 806 dev); 807 808 /* 809 * MAC is not allocated dynamically. Setting NULL should prevent from 810 * release of the resource in the rte_eth_dev_release_port(). 811 */ 812 dev->data->mac_addrs = NULL; 813 814 return ret; 815 } 816 817 static int 818 ena_dev_reset(struct rte_eth_dev *dev) 819 { 820 int rc = 0; 821 822 /* Cannot release memory in secondary process */ 823 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 824 PMD_DRV_LOG(WARNING, "dev_reset not supported in secondary.\n"); 825 return -EPERM; 826 } 827 828 ena_destroy_device(dev); 829 rc = eth_ena_dev_init(dev); 830 if (rc) 831 PMD_INIT_LOG(CRIT, "Cannot initialize device\n"); 832 833 return rc; 834 } 835 836 static void ena_rx_queue_release_all(struct rte_eth_dev *dev) 837 { 838 int nb_queues = dev->data->nb_rx_queues; 839 int i; 840 841 for (i = 0; i < nb_queues; i++) 842 ena_rx_queue_release(dev, i); 843 } 844 845 static void ena_tx_queue_release_all(struct rte_eth_dev *dev) 846 { 847 int nb_queues = dev->data->nb_tx_queues; 848 int i; 849 850 for (i = 0; i < nb_queues; i++) 851 ena_tx_queue_release(dev, i); 852 } 853 854 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 855 { 856 struct ena_ring *ring = dev->data->rx_queues[qid]; 857 858 /* Free ring resources */ 859 rte_free(ring->rx_buffer_info); 860 ring->rx_buffer_info = NULL; 861 862 rte_free(ring->rx_refill_buffer); 863 ring->rx_refill_buffer = NULL; 864 865 rte_free(ring->empty_rx_reqs); 866 ring->empty_rx_reqs = NULL; 867 868 ring->configured = 0; 869 870 PMD_DRV_LOG(NOTICE, "Rx queue %d:%d released\n", 871 ring->port_id, ring->id); 872 } 873 874 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 875 { 876 struct ena_ring *ring = dev->data->tx_queues[qid]; 877 878 /* Free ring resources */ 879 rte_free(ring->push_buf_intermediate_buf); 880 881 rte_free(ring->tx_buffer_info); 882 883 rte_free(ring->empty_tx_reqs); 884 885 ring->empty_tx_reqs = NULL; 886 ring->tx_buffer_info = NULL; 887 ring->push_buf_intermediate_buf = NULL; 888 889 ring->configured = 0; 890 891 PMD_DRV_LOG(NOTICE, "Tx queue %d:%d released\n", 892 ring->port_id, ring->id); 893 } 894 895 static void ena_rx_queue_release_bufs(struct ena_ring *ring) 896 { 897 unsigned int i; 898 899 for (i = 0; i < ring->ring_size; ++i) { 900 struct ena_rx_buffer *rx_info = &ring->rx_buffer_info[i]; 901 if (rx_info->mbuf) { 902 rte_mbuf_raw_free(rx_info->mbuf); 903 rx_info->mbuf = NULL; 904 } 905 } 906 } 907 908 static void ena_tx_queue_release_bufs(struct ena_ring *ring) 909 { 910 unsigned int i; 911 912 for (i = 0; i < ring->ring_size; ++i) { 913 struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i]; 914 915 if (tx_buf->mbuf) { 916 rte_pktmbuf_free(tx_buf->mbuf); 917 tx_buf->mbuf = NULL; 918 } 919 } 920 } 921 922 static int ena_link_update(struct rte_eth_dev *dev, 923 __rte_unused int wait_to_complete) 924 { 925 struct rte_eth_link *link = &dev->data->dev_link; 926 struct ena_adapter *adapter = dev->data->dev_private; 927 928 link->link_status = adapter->link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN; 929 link->link_speed = RTE_ETH_SPEED_NUM_NONE; 930 link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX; 931 932 return 0; 933 } 934 935 static int ena_queue_start_all(struct rte_eth_dev *dev, 936 enum ena_ring_type ring_type) 937 { 938 struct ena_adapter *adapter = dev->data->dev_private; 939 struct ena_ring *queues = NULL; 940 int nb_queues; 941 int i = 0; 942 int rc = 0; 943 944 if (ring_type == ENA_RING_TYPE_RX) { 945 queues = adapter->rx_ring; 946 nb_queues = dev->data->nb_rx_queues; 947 } else { 948 queues = adapter->tx_ring; 949 nb_queues = dev->data->nb_tx_queues; 950 } 951 for (i = 0; i < nb_queues; i++) { 952 if (queues[i].configured) { 953 if (ring_type == ENA_RING_TYPE_RX) { 954 ena_assert_msg( 955 dev->data->rx_queues[i] == &queues[i], 956 "Inconsistent state of Rx queues\n"); 957 } else { 958 ena_assert_msg( 959 dev->data->tx_queues[i] == &queues[i], 960 "Inconsistent state of Tx queues\n"); 961 } 962 963 rc = ena_queue_start(dev, &queues[i]); 964 965 if (rc) { 966 PMD_INIT_LOG(ERR, 967 "Failed to start queue[%d] of type(%d)\n", 968 i, ring_type); 969 goto err; 970 } 971 } 972 } 973 974 return 0; 975 976 err: 977 while (i--) 978 if (queues[i].configured) 979 ena_queue_stop(&queues[i]); 980 981 return rc; 982 } 983 984 static int ena_check_valid_conf(struct ena_adapter *adapter) 985 { 986 uint32_t mtu = adapter->edev_data->mtu; 987 988 if (mtu > adapter->max_mtu || mtu < ENA_MIN_MTU) { 989 PMD_INIT_LOG(ERR, 990 "Unsupported MTU of %d. Max MTU: %d, min MTU: %d\n", 991 mtu, adapter->max_mtu, ENA_MIN_MTU); 992 return ENA_COM_UNSUPPORTED; 993 } 994 995 return 0; 996 } 997 998 static int 999 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx, 1000 bool use_large_llq_hdr) 1001 { 1002 struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; 1003 struct ena_com_dev *ena_dev = ctx->ena_dev; 1004 uint32_t max_tx_queue_size; 1005 uint32_t max_rx_queue_size; 1006 1007 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1008 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1009 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 1010 max_rx_queue_size = RTE_MIN(max_queue_ext->max_rx_cq_depth, 1011 max_queue_ext->max_rx_sq_depth); 1012 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 1013 1014 if (ena_dev->tx_mem_queue_type == 1015 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1016 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1017 llq->max_llq_depth); 1018 } else { 1019 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1020 max_queue_ext->max_tx_sq_depth); 1021 } 1022 1023 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1024 max_queue_ext->max_per_packet_rx_descs); 1025 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1026 max_queue_ext->max_per_packet_tx_descs); 1027 } else { 1028 struct ena_admin_queue_feature_desc *max_queues = 1029 &ctx->get_feat_ctx->max_queues; 1030 max_rx_queue_size = RTE_MIN(max_queues->max_cq_depth, 1031 max_queues->max_sq_depth); 1032 max_tx_queue_size = max_queues->max_cq_depth; 1033 1034 if (ena_dev->tx_mem_queue_type == 1035 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1036 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1037 llq->max_llq_depth); 1038 } else { 1039 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 1040 max_queues->max_sq_depth); 1041 } 1042 1043 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1044 max_queues->max_packet_rx_descs); 1045 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 1046 max_queues->max_packet_tx_descs); 1047 } 1048 1049 /* Round down to the nearest power of 2 */ 1050 max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size); 1051 max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size); 1052 1053 if (use_large_llq_hdr) { 1054 if ((llq->entry_size_ctrl_supported & 1055 ENA_ADMIN_LIST_ENTRY_SIZE_256B) && 1056 (ena_dev->tx_mem_queue_type == 1057 ENA_ADMIN_PLACEMENT_POLICY_DEV)) { 1058 max_tx_queue_size /= 2; 1059 PMD_INIT_LOG(INFO, 1060 "Forcing large headers and decreasing maximum Tx queue size to %d\n", 1061 max_tx_queue_size); 1062 } else { 1063 PMD_INIT_LOG(ERR, 1064 "Forcing large headers failed: LLQ is disabled or device does not support large headers\n"); 1065 } 1066 } 1067 1068 if (unlikely(max_rx_queue_size == 0 || max_tx_queue_size == 0)) { 1069 PMD_INIT_LOG(ERR, "Invalid queue size\n"); 1070 return -EFAULT; 1071 } 1072 1073 ctx->max_tx_queue_size = max_tx_queue_size; 1074 ctx->max_rx_queue_size = max_rx_queue_size; 1075 1076 return 0; 1077 } 1078 1079 static void ena_stats_restart(struct rte_eth_dev *dev) 1080 { 1081 struct ena_adapter *adapter = dev->data->dev_private; 1082 1083 rte_atomic64_init(&adapter->drv_stats->ierrors); 1084 rte_atomic64_init(&adapter->drv_stats->oerrors); 1085 rte_atomic64_init(&adapter->drv_stats->rx_nombuf); 1086 adapter->drv_stats->rx_drops = 0; 1087 } 1088 1089 static int ena_stats_get(struct rte_eth_dev *dev, 1090 struct rte_eth_stats *stats) 1091 { 1092 struct ena_admin_basic_stats ena_stats; 1093 struct ena_adapter *adapter = dev->data->dev_private; 1094 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1095 int rc; 1096 int i; 1097 int max_rings_stats; 1098 1099 memset(&ena_stats, 0, sizeof(ena_stats)); 1100 1101 rte_spinlock_lock(&adapter->admin_lock); 1102 rc = ENA_PROXY(adapter, ena_com_get_dev_basic_stats, ena_dev, 1103 &ena_stats); 1104 rte_spinlock_unlock(&adapter->admin_lock); 1105 if (unlikely(rc)) { 1106 PMD_DRV_LOG(ERR, "Could not retrieve statistics from ENA\n"); 1107 return rc; 1108 } 1109 1110 /* Set of basic statistics from ENA */ 1111 stats->ipackets = __MERGE_64B_H_L(ena_stats.rx_pkts_high, 1112 ena_stats.rx_pkts_low); 1113 stats->opackets = __MERGE_64B_H_L(ena_stats.tx_pkts_high, 1114 ena_stats.tx_pkts_low); 1115 stats->ibytes = __MERGE_64B_H_L(ena_stats.rx_bytes_high, 1116 ena_stats.rx_bytes_low); 1117 stats->obytes = __MERGE_64B_H_L(ena_stats.tx_bytes_high, 1118 ena_stats.tx_bytes_low); 1119 1120 /* Driver related stats */ 1121 stats->imissed = adapter->drv_stats->rx_drops; 1122 stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors); 1123 stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors); 1124 stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf); 1125 1126 max_rings_stats = RTE_MIN(dev->data->nb_rx_queues, 1127 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1128 for (i = 0; i < max_rings_stats; ++i) { 1129 struct ena_stats_rx *rx_stats = &adapter->rx_ring[i].rx_stats; 1130 1131 stats->q_ibytes[i] = rx_stats->bytes; 1132 stats->q_ipackets[i] = rx_stats->cnt; 1133 stats->q_errors[i] = rx_stats->bad_desc_num + 1134 rx_stats->bad_req_id; 1135 } 1136 1137 max_rings_stats = RTE_MIN(dev->data->nb_tx_queues, 1138 RTE_ETHDEV_QUEUE_STAT_CNTRS); 1139 for (i = 0; i < max_rings_stats; ++i) { 1140 struct ena_stats_tx *tx_stats = &adapter->tx_ring[i].tx_stats; 1141 1142 stats->q_obytes[i] = tx_stats->bytes; 1143 stats->q_opackets[i] = tx_stats->cnt; 1144 } 1145 1146 return 0; 1147 } 1148 1149 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 1150 { 1151 struct ena_adapter *adapter; 1152 struct ena_com_dev *ena_dev; 1153 int rc = 0; 1154 1155 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 1156 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 1157 adapter = dev->data->dev_private; 1158 1159 ena_dev = &adapter->ena_dev; 1160 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 1161 1162 if (mtu > adapter->max_mtu || mtu < ENA_MIN_MTU) { 1163 PMD_DRV_LOG(ERR, 1164 "Invalid MTU setting. New MTU: %d, max MTU: %d, min MTU: %d\n", 1165 mtu, adapter->max_mtu, ENA_MIN_MTU); 1166 return -EINVAL; 1167 } 1168 1169 rc = ENA_PROXY(adapter, ena_com_set_dev_mtu, ena_dev, mtu); 1170 if (rc) 1171 PMD_DRV_LOG(ERR, "Could not set MTU: %d\n", mtu); 1172 else 1173 PMD_DRV_LOG(NOTICE, "MTU set to: %d\n", mtu); 1174 1175 return rc; 1176 } 1177 1178 static int ena_start(struct rte_eth_dev *dev) 1179 { 1180 struct ena_adapter *adapter = dev->data->dev_private; 1181 uint64_t ticks; 1182 int rc = 0; 1183 1184 /* Cannot allocate memory in secondary process */ 1185 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1186 PMD_DRV_LOG(WARNING, "dev_start not supported in secondary.\n"); 1187 return -EPERM; 1188 } 1189 1190 rc = ena_check_valid_conf(adapter); 1191 if (rc) 1192 return rc; 1193 1194 rc = ena_setup_rx_intr(dev); 1195 if (rc) 1196 return rc; 1197 1198 rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX); 1199 if (rc) 1200 return rc; 1201 1202 rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX); 1203 if (rc) 1204 goto err_start_tx; 1205 1206 if (adapter->edev_data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { 1207 rc = ena_rss_configure(adapter); 1208 if (rc) 1209 goto err_rss_init; 1210 } 1211 1212 ena_stats_restart(dev); 1213 1214 adapter->timestamp_wd = rte_get_timer_cycles(); 1215 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT; 1216 1217 ticks = rte_get_timer_hz(); 1218 rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(), 1219 ena_timer_wd_callback, dev); 1220 1221 ++adapter->dev_stats.dev_start; 1222 adapter->state = ENA_ADAPTER_STATE_RUNNING; 1223 1224 return 0; 1225 1226 err_rss_init: 1227 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1228 err_start_tx: 1229 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1230 return rc; 1231 } 1232 1233 static int ena_stop(struct rte_eth_dev *dev) 1234 { 1235 struct ena_adapter *adapter = dev->data->dev_private; 1236 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1237 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1238 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1239 int rc; 1240 1241 /* Cannot free memory in secondary process */ 1242 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 1243 PMD_DRV_LOG(WARNING, "dev_stop not supported in secondary.\n"); 1244 return -EPERM; 1245 } 1246 1247 rte_timer_stop_sync(&adapter->timer_wd); 1248 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 1249 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 1250 1251 if (adapter->trigger_reset) { 1252 rc = ena_com_dev_reset(ena_dev, adapter->reset_reason); 1253 if (rc) 1254 PMD_DRV_LOG(ERR, "Device reset failed, rc: %d\n", rc); 1255 } 1256 1257 rte_intr_disable(intr_handle); 1258 1259 rte_intr_efd_disable(intr_handle); 1260 1261 /* Cleanup vector list */ 1262 rte_intr_vec_list_free(intr_handle); 1263 1264 rte_intr_enable(intr_handle); 1265 1266 ++adapter->dev_stats.dev_stop; 1267 adapter->state = ENA_ADAPTER_STATE_STOPPED; 1268 dev->data->dev_started = 0; 1269 1270 return 0; 1271 } 1272 1273 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring) 1274 { 1275 struct ena_adapter *adapter = ring->adapter; 1276 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1277 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 1278 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 1279 struct ena_com_create_io_ctx ctx = 1280 /* policy set to _HOST just to satisfy icc compiler */ 1281 { ENA_ADMIN_PLACEMENT_POLICY_HOST, 1282 0, 0, 0, 0, 0 }; 1283 uint16_t ena_qid; 1284 unsigned int i; 1285 int rc; 1286 1287 ctx.msix_vector = -1; 1288 if (ring->type == ENA_RING_TYPE_TX) { 1289 ena_qid = ENA_IO_TXQ_IDX(ring->id); 1290 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1291 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1292 for (i = 0; i < ring->ring_size; i++) 1293 ring->empty_tx_reqs[i] = i; 1294 } else { 1295 ena_qid = ENA_IO_RXQ_IDX(ring->id); 1296 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1297 if (rte_intr_dp_is_en(intr_handle)) 1298 ctx.msix_vector = 1299 rte_intr_vec_list_index_get(intr_handle, 1300 ring->id); 1301 1302 for (i = 0; i < ring->ring_size; i++) 1303 ring->empty_rx_reqs[i] = i; 1304 } 1305 ctx.queue_size = ring->ring_size; 1306 ctx.qid = ena_qid; 1307 ctx.numa_node = ring->numa_socket_id; 1308 1309 rc = ena_com_create_io_queue(ena_dev, &ctx); 1310 if (rc) { 1311 PMD_DRV_LOG(ERR, 1312 "Failed to create IO queue[%d] (qid:%d), rc: %d\n", 1313 ring->id, ena_qid, rc); 1314 return rc; 1315 } 1316 1317 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1318 &ring->ena_com_io_sq, 1319 &ring->ena_com_io_cq); 1320 if (rc) { 1321 PMD_DRV_LOG(ERR, 1322 "Failed to get IO queue[%d] handlers, rc: %d\n", 1323 ring->id, rc); 1324 ena_com_destroy_io_queue(ena_dev, ena_qid); 1325 return rc; 1326 } 1327 1328 if (ring->type == ENA_RING_TYPE_TX) 1329 ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node); 1330 1331 /* Start with Rx interrupts being masked. */ 1332 if (ring->type == ENA_RING_TYPE_RX && rte_intr_dp_is_en(intr_handle)) 1333 ena_rx_queue_intr_disable(dev, ring->id); 1334 1335 return 0; 1336 } 1337 1338 static void ena_queue_stop(struct ena_ring *ring) 1339 { 1340 struct ena_com_dev *ena_dev = &ring->adapter->ena_dev; 1341 1342 if (ring->type == ENA_RING_TYPE_RX) { 1343 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id)); 1344 ena_rx_queue_release_bufs(ring); 1345 } else { 1346 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id)); 1347 ena_tx_queue_release_bufs(ring); 1348 } 1349 } 1350 1351 static void ena_queue_stop_all(struct rte_eth_dev *dev, 1352 enum ena_ring_type ring_type) 1353 { 1354 struct ena_adapter *adapter = dev->data->dev_private; 1355 struct ena_ring *queues = NULL; 1356 uint16_t nb_queues, i; 1357 1358 if (ring_type == ENA_RING_TYPE_RX) { 1359 queues = adapter->rx_ring; 1360 nb_queues = dev->data->nb_rx_queues; 1361 } else { 1362 queues = adapter->tx_ring; 1363 nb_queues = dev->data->nb_tx_queues; 1364 } 1365 1366 for (i = 0; i < nb_queues; ++i) 1367 if (queues[i].configured) 1368 ena_queue_stop(&queues[i]); 1369 } 1370 1371 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring) 1372 { 1373 int rc, bufs_num; 1374 1375 ena_assert_msg(ring->configured == 1, 1376 "Trying to start unconfigured queue\n"); 1377 1378 rc = ena_create_io_queue(dev, ring); 1379 if (rc) { 1380 PMD_INIT_LOG(ERR, "Failed to create IO queue\n"); 1381 return rc; 1382 } 1383 1384 ring->next_to_clean = 0; 1385 ring->next_to_use = 0; 1386 1387 if (ring->type == ENA_RING_TYPE_TX) { 1388 ring->tx_stats.available_desc = 1389 ena_com_free_q_entries(ring->ena_com_io_sq); 1390 return 0; 1391 } 1392 1393 bufs_num = ring->ring_size - 1; 1394 rc = ena_populate_rx_queue(ring, bufs_num); 1395 if (rc != bufs_num) { 1396 ena_com_destroy_io_queue(&ring->adapter->ena_dev, 1397 ENA_IO_RXQ_IDX(ring->id)); 1398 PMD_INIT_LOG(ERR, "Failed to populate Rx ring\n"); 1399 return ENA_COM_FAULT; 1400 } 1401 /* Flush per-core RX buffers pools cache as they can be used on other 1402 * cores as well. 1403 */ 1404 rte_mempool_cache_flush(NULL, ring->mb_pool); 1405 1406 return 0; 1407 } 1408 1409 static int ena_tx_queue_setup(struct rte_eth_dev *dev, 1410 uint16_t queue_idx, 1411 uint16_t nb_desc, 1412 unsigned int socket_id, 1413 const struct rte_eth_txconf *tx_conf) 1414 { 1415 struct ena_ring *txq = NULL; 1416 struct ena_adapter *adapter = dev->data->dev_private; 1417 unsigned int i; 1418 uint16_t dyn_thresh; 1419 1420 txq = &adapter->tx_ring[queue_idx]; 1421 1422 if (txq->configured) { 1423 PMD_DRV_LOG(CRIT, 1424 "API violation. Queue[%d] is already configured\n", 1425 queue_idx); 1426 return ENA_COM_FAULT; 1427 } 1428 1429 if (!rte_is_power_of_2(nb_desc)) { 1430 PMD_DRV_LOG(ERR, 1431 "Unsupported size of Tx queue: %d is not a power of 2.\n", 1432 nb_desc); 1433 return -EINVAL; 1434 } 1435 1436 if (nb_desc > adapter->max_tx_ring_size) { 1437 PMD_DRV_LOG(ERR, 1438 "Unsupported size of Tx queue (max size: %d)\n", 1439 adapter->max_tx_ring_size); 1440 return -EINVAL; 1441 } 1442 1443 txq->port_id = dev->data->port_id; 1444 txq->next_to_clean = 0; 1445 txq->next_to_use = 0; 1446 txq->ring_size = nb_desc; 1447 txq->size_mask = nb_desc - 1; 1448 txq->numa_socket_id = socket_id; 1449 txq->pkts_without_db = false; 1450 txq->last_cleanup_ticks = 0; 1451 1452 txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info", 1453 sizeof(struct ena_tx_buffer) * txq->ring_size, 1454 RTE_CACHE_LINE_SIZE, 1455 socket_id); 1456 if (!txq->tx_buffer_info) { 1457 PMD_DRV_LOG(ERR, 1458 "Failed to allocate memory for Tx buffer info\n"); 1459 return -ENOMEM; 1460 } 1461 1462 txq->empty_tx_reqs = rte_zmalloc_socket("txq->empty_tx_reqs", 1463 sizeof(uint16_t) * txq->ring_size, 1464 RTE_CACHE_LINE_SIZE, 1465 socket_id); 1466 if (!txq->empty_tx_reqs) { 1467 PMD_DRV_LOG(ERR, 1468 "Failed to allocate memory for empty Tx requests\n"); 1469 rte_free(txq->tx_buffer_info); 1470 return -ENOMEM; 1471 } 1472 1473 txq->push_buf_intermediate_buf = 1474 rte_zmalloc_socket("txq->push_buf_intermediate_buf", 1475 txq->tx_max_header_size, 1476 RTE_CACHE_LINE_SIZE, 1477 socket_id); 1478 if (!txq->push_buf_intermediate_buf) { 1479 PMD_DRV_LOG(ERR, "Failed to alloc push buffer for LLQ\n"); 1480 rte_free(txq->tx_buffer_info); 1481 rte_free(txq->empty_tx_reqs); 1482 return -ENOMEM; 1483 } 1484 1485 for (i = 0; i < txq->ring_size; i++) 1486 txq->empty_tx_reqs[i] = i; 1487 1488 txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads; 1489 1490 /* Check if caller provided the Tx cleanup threshold value. */ 1491 if (tx_conf->tx_free_thresh != 0) { 1492 txq->tx_free_thresh = tx_conf->tx_free_thresh; 1493 } else { 1494 dyn_thresh = txq->ring_size - 1495 txq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1496 txq->tx_free_thresh = RTE_MAX(dyn_thresh, 1497 txq->ring_size - ENA_REFILL_THRESH_PACKET); 1498 } 1499 1500 txq->missing_tx_completion_threshold = 1501 RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP); 1502 1503 /* Store pointer to this queue in upper layer */ 1504 txq->configured = 1; 1505 dev->data->tx_queues[queue_idx] = txq; 1506 1507 return 0; 1508 } 1509 1510 static int ena_rx_queue_setup(struct rte_eth_dev *dev, 1511 uint16_t queue_idx, 1512 uint16_t nb_desc, 1513 unsigned int socket_id, 1514 const struct rte_eth_rxconf *rx_conf, 1515 struct rte_mempool *mp) 1516 { 1517 struct ena_adapter *adapter = dev->data->dev_private; 1518 struct ena_ring *rxq = NULL; 1519 size_t buffer_size; 1520 int i; 1521 uint16_t dyn_thresh; 1522 1523 rxq = &adapter->rx_ring[queue_idx]; 1524 if (rxq->configured) { 1525 PMD_DRV_LOG(CRIT, 1526 "API violation. Queue[%d] is already configured\n", 1527 queue_idx); 1528 return ENA_COM_FAULT; 1529 } 1530 1531 if (!rte_is_power_of_2(nb_desc)) { 1532 PMD_DRV_LOG(ERR, 1533 "Unsupported size of Rx queue: %d is not a power of 2.\n", 1534 nb_desc); 1535 return -EINVAL; 1536 } 1537 1538 if (nb_desc > adapter->max_rx_ring_size) { 1539 PMD_DRV_LOG(ERR, 1540 "Unsupported size of Rx queue (max size: %d)\n", 1541 adapter->max_rx_ring_size); 1542 return -EINVAL; 1543 } 1544 1545 /* ENA isn't supporting buffers smaller than 1400 bytes */ 1546 buffer_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 1547 if (buffer_size < ENA_RX_BUF_MIN_SIZE) { 1548 PMD_DRV_LOG(ERR, 1549 "Unsupported size of Rx buffer: %zu (min size: %d)\n", 1550 buffer_size, ENA_RX_BUF_MIN_SIZE); 1551 return -EINVAL; 1552 } 1553 1554 rxq->port_id = dev->data->port_id; 1555 rxq->next_to_clean = 0; 1556 rxq->next_to_use = 0; 1557 rxq->ring_size = nb_desc; 1558 rxq->size_mask = nb_desc - 1; 1559 rxq->numa_socket_id = socket_id; 1560 rxq->mb_pool = mp; 1561 1562 rxq->rx_buffer_info = rte_zmalloc_socket("rxq->buffer_info", 1563 sizeof(struct ena_rx_buffer) * nb_desc, 1564 RTE_CACHE_LINE_SIZE, 1565 socket_id); 1566 if (!rxq->rx_buffer_info) { 1567 PMD_DRV_LOG(ERR, 1568 "Failed to allocate memory for Rx buffer info\n"); 1569 return -ENOMEM; 1570 } 1571 1572 rxq->rx_refill_buffer = rte_zmalloc_socket("rxq->rx_refill_buffer", 1573 sizeof(struct rte_mbuf *) * nb_desc, 1574 RTE_CACHE_LINE_SIZE, 1575 socket_id); 1576 if (!rxq->rx_refill_buffer) { 1577 PMD_DRV_LOG(ERR, 1578 "Failed to allocate memory for Rx refill buffer\n"); 1579 rte_free(rxq->rx_buffer_info); 1580 rxq->rx_buffer_info = NULL; 1581 return -ENOMEM; 1582 } 1583 1584 rxq->empty_rx_reqs = rte_zmalloc_socket("rxq->empty_rx_reqs", 1585 sizeof(uint16_t) * nb_desc, 1586 RTE_CACHE_LINE_SIZE, 1587 socket_id); 1588 if (!rxq->empty_rx_reqs) { 1589 PMD_DRV_LOG(ERR, 1590 "Failed to allocate memory for empty Rx requests\n"); 1591 rte_free(rxq->rx_buffer_info); 1592 rxq->rx_buffer_info = NULL; 1593 rte_free(rxq->rx_refill_buffer); 1594 rxq->rx_refill_buffer = NULL; 1595 return -ENOMEM; 1596 } 1597 1598 for (i = 0; i < nb_desc; i++) 1599 rxq->empty_rx_reqs[i] = i; 1600 1601 rxq->offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; 1602 1603 if (rx_conf->rx_free_thresh != 0) { 1604 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 1605 } else { 1606 dyn_thresh = rxq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1607 rxq->rx_free_thresh = RTE_MIN(dyn_thresh, 1608 (uint16_t)(ENA_REFILL_THRESH_PACKET)); 1609 } 1610 1611 /* Store pointer to this queue in upper layer */ 1612 rxq->configured = 1; 1613 dev->data->rx_queues[queue_idx] = rxq; 1614 1615 return 0; 1616 } 1617 1618 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 1619 struct rte_mbuf *mbuf, uint16_t id) 1620 { 1621 struct ena_com_buf ebuf; 1622 int rc; 1623 1624 /* prepare physical address for DMA transaction */ 1625 ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM; 1626 ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM; 1627 1628 /* pass resource to device */ 1629 rc = ena_com_add_single_rx_desc(io_sq, &ebuf, id); 1630 if (unlikely(rc != 0)) 1631 PMD_RX_LOG(WARNING, "Failed adding Rx desc\n"); 1632 1633 return rc; 1634 } 1635 1636 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) 1637 { 1638 unsigned int i; 1639 int rc; 1640 uint16_t next_to_use = rxq->next_to_use; 1641 uint16_t req_id; 1642 #ifdef RTE_ETHDEV_DEBUG_RX 1643 uint16_t in_use; 1644 #endif 1645 struct rte_mbuf **mbufs = rxq->rx_refill_buffer; 1646 1647 if (unlikely(!count)) 1648 return 0; 1649 1650 #ifdef RTE_ETHDEV_DEBUG_RX 1651 in_use = rxq->ring_size - 1 - 1652 ena_com_free_q_entries(rxq->ena_com_io_sq); 1653 if (unlikely((in_use + count) >= rxq->ring_size)) 1654 PMD_RX_LOG(ERR, "Bad Rx ring state\n"); 1655 #endif 1656 1657 /* get resources for incoming packets */ 1658 rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count); 1659 if (unlikely(rc < 0)) { 1660 rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf); 1661 ++rxq->rx_stats.mbuf_alloc_fail; 1662 PMD_RX_LOG(DEBUG, "There are not enough free buffers\n"); 1663 return 0; 1664 } 1665 1666 for (i = 0; i < count; i++) { 1667 struct rte_mbuf *mbuf = mbufs[i]; 1668 struct ena_rx_buffer *rx_info; 1669 1670 if (likely((i + 4) < count)) 1671 rte_prefetch0(mbufs[i + 4]); 1672 1673 req_id = rxq->empty_rx_reqs[next_to_use]; 1674 rx_info = &rxq->rx_buffer_info[req_id]; 1675 1676 rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id); 1677 if (unlikely(rc != 0)) 1678 break; 1679 1680 rx_info->mbuf = mbuf; 1681 next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, rxq->size_mask); 1682 } 1683 1684 if (unlikely(i < count)) { 1685 PMD_RX_LOG(WARNING, 1686 "Refilled Rx queue[%d] with only %d/%d buffers\n", 1687 rxq->id, i, count); 1688 rte_pktmbuf_free_bulk(&mbufs[i], count - i); 1689 ++rxq->rx_stats.refill_partial; 1690 } 1691 1692 /* When we submitted free resources to device... */ 1693 if (likely(i > 0)) { 1694 /* ...let HW know that it can fill buffers with data. */ 1695 ena_com_write_sq_doorbell(rxq->ena_com_io_sq); 1696 1697 rxq->next_to_use = next_to_use; 1698 } 1699 1700 return i; 1701 } 1702 1703 static int ena_device_init(struct ena_adapter *adapter, 1704 struct rte_pci_device *pdev, 1705 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1706 { 1707 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1708 uint32_t aenq_groups; 1709 int rc; 1710 bool readless_supported; 1711 1712 /* Initialize mmio registers */ 1713 rc = ena_com_mmio_reg_read_request_init(ena_dev); 1714 if (rc) { 1715 PMD_DRV_LOG(ERR, "Failed to init MMIO read less\n"); 1716 return rc; 1717 } 1718 1719 /* The PCIe configuration space revision id indicate if mmio reg 1720 * read is disabled. 1721 */ 1722 readless_supported = !(pdev->id.class_id & ENA_MMIO_DISABLE_REG_READ); 1723 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 1724 1725 /* reset device */ 1726 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 1727 if (rc) { 1728 PMD_DRV_LOG(ERR, "Cannot reset device\n"); 1729 goto err_mmio_read_less; 1730 } 1731 1732 /* check FW version */ 1733 rc = ena_com_validate_version(ena_dev); 1734 if (rc) { 1735 PMD_DRV_LOG(ERR, "Device version is too low\n"); 1736 goto err_mmio_read_less; 1737 } 1738 1739 ena_dev->dma_addr_bits = ena_com_get_dma_width(ena_dev); 1740 1741 /* ENA device administration layer init */ 1742 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 1743 if (rc) { 1744 PMD_DRV_LOG(ERR, 1745 "Cannot initialize ENA admin queue\n"); 1746 goto err_mmio_read_less; 1747 } 1748 1749 /* To enable the msix interrupts the driver needs to know the number 1750 * of queues. So the driver uses polling mode to retrieve this 1751 * information. 1752 */ 1753 ena_com_set_admin_polling_mode(ena_dev, true); 1754 1755 ena_config_host_info(ena_dev); 1756 1757 /* Get Device Attributes and features */ 1758 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 1759 if (rc) { 1760 PMD_DRV_LOG(ERR, 1761 "Cannot get attribute for ENA device, rc: %d\n", rc); 1762 goto err_admin_init; 1763 } 1764 1765 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 1766 BIT(ENA_ADMIN_NOTIFICATION) | 1767 BIT(ENA_ADMIN_KEEP_ALIVE) | 1768 BIT(ENA_ADMIN_FATAL_ERROR) | 1769 BIT(ENA_ADMIN_WARNING); 1770 1771 aenq_groups &= get_feat_ctx->aenq.supported_groups; 1772 1773 adapter->all_aenq_groups = aenq_groups; 1774 1775 return 0; 1776 1777 err_admin_init: 1778 ena_com_admin_destroy(ena_dev); 1779 1780 err_mmio_read_less: 1781 ena_com_mmio_reg_read_request_destroy(ena_dev); 1782 1783 return rc; 1784 } 1785 1786 static void ena_interrupt_handler_rte(void *cb_arg) 1787 { 1788 struct rte_eth_dev *dev = cb_arg; 1789 struct ena_adapter *adapter = dev->data->dev_private; 1790 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1791 1792 ena_com_admin_q_comp_intr_handler(ena_dev); 1793 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) 1794 ena_com_aenq_intr_handler(ena_dev, dev); 1795 } 1796 1797 static void check_for_missing_keep_alive(struct ena_adapter *adapter) 1798 { 1799 if (!(adapter->active_aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE))) 1800 return; 1801 1802 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 1803 return; 1804 1805 if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >= 1806 adapter->keep_alive_timeout)) { 1807 PMD_DRV_LOG(ERR, "Keep alive timeout\n"); 1808 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); 1809 ++adapter->dev_stats.wd_expired; 1810 } 1811 } 1812 1813 /* Check if admin queue is enabled */ 1814 static void check_for_admin_com_state(struct ena_adapter *adapter) 1815 { 1816 if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) { 1817 PMD_DRV_LOG(ERR, "ENA admin queue is not in running state\n"); 1818 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO); 1819 } 1820 } 1821 1822 static int check_for_tx_completion_in_queue(struct ena_adapter *adapter, 1823 struct ena_ring *tx_ring) 1824 { 1825 struct ena_tx_buffer *tx_buf; 1826 uint64_t timestamp; 1827 uint64_t completion_delay; 1828 uint32_t missed_tx = 0; 1829 unsigned int i; 1830 int rc = 0; 1831 1832 for (i = 0; i < tx_ring->ring_size; ++i) { 1833 tx_buf = &tx_ring->tx_buffer_info[i]; 1834 timestamp = tx_buf->timestamp; 1835 1836 if (timestamp == 0) 1837 continue; 1838 1839 completion_delay = rte_get_timer_cycles() - timestamp; 1840 if (completion_delay > adapter->missing_tx_completion_to) { 1841 if (unlikely(!tx_buf->print_once)) { 1842 PMD_TX_LOG(WARNING, 1843 "Found a Tx that wasn't completed on time, qid %d, index %d. " 1844 "Missing Tx outstanding for %" PRIu64 " msecs.\n", 1845 tx_ring->id, i, completion_delay / 1846 rte_get_timer_hz() * 1000); 1847 tx_buf->print_once = true; 1848 } 1849 ++missed_tx; 1850 } 1851 } 1852 1853 if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) { 1854 PMD_DRV_LOG(ERR, 1855 "The number of lost Tx completions is above the threshold (%d > %d). " 1856 "Trigger the device reset.\n", 1857 missed_tx, 1858 tx_ring->missing_tx_completion_threshold); 1859 adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL; 1860 adapter->trigger_reset = true; 1861 rc = -EIO; 1862 } 1863 1864 tx_ring->tx_stats.missed_tx += missed_tx; 1865 1866 return rc; 1867 } 1868 1869 static void check_for_tx_completions(struct ena_adapter *adapter) 1870 { 1871 struct ena_ring *tx_ring; 1872 uint64_t tx_cleanup_delay; 1873 size_t qid; 1874 int budget; 1875 uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues; 1876 1877 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) 1878 return; 1879 1880 nb_tx_queues = adapter->edev_data->nb_tx_queues; 1881 budget = adapter->missing_tx_completion_budget; 1882 1883 qid = adapter->last_tx_comp_qid; 1884 while (budget-- > 0) { 1885 tx_ring = &adapter->tx_ring[qid]; 1886 1887 /* Tx cleanup is called only by the burst function and can be 1888 * called dynamically by the application. Also cleanup is 1889 * limited by the threshold. To avoid false detection of the 1890 * missing HW Tx completion, get the delay since last cleanup 1891 * function was called. 1892 */ 1893 tx_cleanup_delay = rte_get_timer_cycles() - 1894 tx_ring->last_cleanup_ticks; 1895 if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay) 1896 check_for_tx_completion_in_queue(adapter, tx_ring); 1897 qid = (qid + 1) % nb_tx_queues; 1898 } 1899 1900 adapter->last_tx_comp_qid = qid; 1901 } 1902 1903 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer, 1904 void *arg) 1905 { 1906 struct rte_eth_dev *dev = arg; 1907 struct ena_adapter *adapter = dev->data->dev_private; 1908 1909 if (unlikely(adapter->trigger_reset)) 1910 return; 1911 1912 check_for_missing_keep_alive(adapter); 1913 check_for_admin_com_state(adapter); 1914 check_for_tx_completions(adapter); 1915 1916 if (unlikely(adapter->trigger_reset)) { 1917 PMD_DRV_LOG(ERR, "Trigger reset is on\n"); 1918 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, 1919 NULL); 1920 } 1921 } 1922 1923 static inline void 1924 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 1925 struct ena_admin_feature_llq_desc *llq, 1926 bool use_large_llq_hdr) 1927 { 1928 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 1929 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 1930 llq_config->llq_num_decs_before_header = 1931 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 1932 1933 if (use_large_llq_hdr && 1934 (llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B)) { 1935 llq_config->llq_ring_entry_size = 1936 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 1937 llq_config->llq_ring_entry_size_value = 256; 1938 } else { 1939 llq_config->llq_ring_entry_size = 1940 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 1941 llq_config->llq_ring_entry_size_value = 128; 1942 } 1943 } 1944 1945 static int 1946 ena_set_queues_placement_policy(struct ena_adapter *adapter, 1947 struct ena_com_dev *ena_dev, 1948 struct ena_admin_feature_llq_desc *llq, 1949 struct ena_llq_configurations *llq_default_configurations) 1950 { 1951 int rc; 1952 u32 llq_feature_mask; 1953 1954 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 1955 if (!(ena_dev->supported_features & llq_feature_mask)) { 1956 PMD_DRV_LOG(INFO, 1957 "LLQ is not supported. Fallback to host mode policy.\n"); 1958 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1959 return 0; 1960 } 1961 1962 if (adapter->dev_mem_base == NULL) { 1963 PMD_DRV_LOG(ERR, 1964 "LLQ is advertised as supported, but device doesn't expose mem bar\n"); 1965 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1966 return 0; 1967 } 1968 1969 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 1970 if (unlikely(rc)) { 1971 PMD_INIT_LOG(WARNING, 1972 "Failed to config dev mode. Fallback to host mode policy.\n"); 1973 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1974 return 0; 1975 } 1976 1977 /* Nothing to config, exit */ 1978 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 1979 return 0; 1980 1981 ena_dev->mem_bar = adapter->dev_mem_base; 1982 1983 return 0; 1984 } 1985 1986 static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev, 1987 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1988 { 1989 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 1990 1991 /* Regular queues capabilities */ 1992 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1993 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1994 &get_feat_ctx->max_queue_ext.max_queue_ext; 1995 io_rx_num = RTE_MIN(max_queue_ext->max_rx_sq_num, 1996 max_queue_ext->max_rx_cq_num); 1997 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 1998 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 1999 } else { 2000 struct ena_admin_queue_feature_desc *max_queues = 2001 &get_feat_ctx->max_queues; 2002 io_tx_sq_num = max_queues->max_sq_num; 2003 io_tx_cq_num = max_queues->max_cq_num; 2004 io_rx_num = RTE_MIN(io_tx_sq_num, io_tx_cq_num); 2005 } 2006 2007 /* In case of LLQ use the llq number in the get feature cmd */ 2008 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 2009 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 2010 2011 max_num_io_queues = RTE_MIN(ENA_MAX_NUM_IO_QUEUES, io_rx_num); 2012 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_sq_num); 2013 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_cq_num); 2014 2015 if (unlikely(max_num_io_queues == 0)) { 2016 PMD_DRV_LOG(ERR, "Number of IO queues cannot not be 0\n"); 2017 return -EFAULT; 2018 } 2019 2020 return max_num_io_queues; 2021 } 2022 2023 static void 2024 ena_set_offloads(struct ena_offloads *offloads, 2025 struct ena_admin_feature_offload_desc *offload_desc) 2026 { 2027 if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) 2028 offloads->tx_offloads |= ENA_IPV4_TSO; 2029 2030 /* Tx IPv4 checksum offloads */ 2031 if (offload_desc->tx & 2032 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) 2033 offloads->tx_offloads |= ENA_L3_IPV4_CSUM; 2034 if (offload_desc->tx & 2035 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK) 2036 offloads->tx_offloads |= ENA_L4_IPV4_CSUM; 2037 if (offload_desc->tx & 2038 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) 2039 offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL; 2040 2041 /* Tx IPv6 checksum offloads */ 2042 if (offload_desc->tx & 2043 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK) 2044 offloads->tx_offloads |= ENA_L4_IPV6_CSUM; 2045 if (offload_desc->tx & 2046 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) 2047 offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL; 2048 2049 /* Rx IPv4 checksum offloads */ 2050 if (offload_desc->rx_supported & 2051 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK) 2052 offloads->rx_offloads |= ENA_L3_IPV4_CSUM; 2053 if (offload_desc->rx_supported & 2054 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) 2055 offloads->rx_offloads |= ENA_L4_IPV4_CSUM; 2056 2057 /* Rx IPv6 checksum offloads */ 2058 if (offload_desc->rx_supported & 2059 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) 2060 offloads->rx_offloads |= ENA_L4_IPV6_CSUM; 2061 2062 if (offload_desc->rx_supported & 2063 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK) 2064 offloads->rx_offloads |= ENA_RX_RSS_HASH; 2065 } 2066 2067 static int ena_init_once(void) 2068 { 2069 static bool init_done; 2070 2071 if (init_done) 2072 return 0; 2073 2074 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 2075 /* Init timer subsystem for the ENA timer service. */ 2076 rte_timer_subsystem_init(); 2077 /* Register handler for requests from secondary processes. */ 2078 rte_mp_action_register(ENA_MP_NAME, ena_mp_primary_handle); 2079 } 2080 2081 init_done = true; 2082 return 0; 2083 } 2084 2085 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) 2086 { 2087 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 2088 struct rte_pci_device *pci_dev; 2089 struct rte_intr_handle *intr_handle; 2090 struct ena_adapter *adapter = eth_dev->data->dev_private; 2091 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2092 struct ena_com_dev_get_features_ctx get_feat_ctx; 2093 struct ena_llq_configurations llq_config; 2094 const char *queue_type_str; 2095 uint32_t max_num_io_queues; 2096 int rc; 2097 static int adapters_found; 2098 bool disable_meta_caching; 2099 2100 eth_dev->dev_ops = &ena_dev_ops; 2101 eth_dev->rx_pkt_burst = ð_ena_recv_pkts; 2102 eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; 2103 eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; 2104 2105 rc = ena_init_once(); 2106 if (rc != 0) 2107 return rc; 2108 2109 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2110 return 0; 2111 2112 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 2113 2114 memset(adapter, 0, sizeof(struct ena_adapter)); 2115 ena_dev = &adapter->ena_dev; 2116 2117 adapter->edev_data = eth_dev->data; 2118 2119 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 2120 2121 PMD_INIT_LOG(INFO, "Initializing %x:%x:%x.%d\n", 2122 pci_dev->addr.domain, 2123 pci_dev->addr.bus, 2124 pci_dev->addr.devid, 2125 pci_dev->addr.function); 2126 2127 intr_handle = pci_dev->intr_handle; 2128 2129 adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr; 2130 adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr; 2131 2132 if (!adapter->regs) { 2133 PMD_INIT_LOG(CRIT, "Failed to access registers BAR(%d)\n", 2134 ENA_REGS_BAR); 2135 return -ENXIO; 2136 } 2137 2138 ena_dev->reg_bar = adapter->regs; 2139 /* Pass device data as a pointer which can be passed to the IO functions 2140 * by the ena_com (for example - the memory allocation). 2141 */ 2142 ena_dev->dmadev = eth_dev->data; 2143 2144 adapter->id_number = adapters_found; 2145 2146 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", 2147 adapter->id_number); 2148 2149 adapter->missing_tx_completion_to = ENA_TX_TIMEOUT; 2150 2151 rc = ena_parse_devargs(adapter, pci_dev->device.devargs); 2152 if (rc != 0) { 2153 PMD_INIT_LOG(CRIT, "Failed to parse devargs\n"); 2154 goto err; 2155 } 2156 2157 /* device specific initialization routine */ 2158 rc = ena_device_init(adapter, pci_dev, &get_feat_ctx); 2159 if (rc) { 2160 PMD_INIT_LOG(CRIT, "Failed to init ENA device\n"); 2161 goto err; 2162 } 2163 2164 /* Check if device supports LSC */ 2165 if (!(adapter->all_aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) 2166 adapter->edev_data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; 2167 2168 set_default_llq_configurations(&llq_config, &get_feat_ctx.llq, 2169 adapter->use_large_llq_hdr); 2170 rc = ena_set_queues_placement_policy(adapter, ena_dev, 2171 &get_feat_ctx.llq, &llq_config); 2172 if (unlikely(rc)) { 2173 PMD_INIT_LOG(CRIT, "Failed to set placement policy\n"); 2174 return rc; 2175 } 2176 2177 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 2178 queue_type_str = "Regular"; 2179 else 2180 queue_type_str = "Low latency"; 2181 PMD_DRV_LOG(INFO, "Placement policy: %s\n", queue_type_str); 2182 2183 calc_queue_ctx.ena_dev = ena_dev; 2184 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 2185 2186 max_num_io_queues = ena_calc_max_io_queue_num(ena_dev, &get_feat_ctx); 2187 rc = ena_calc_io_queue_size(&calc_queue_ctx, 2188 adapter->use_large_llq_hdr); 2189 if (unlikely((rc != 0) || (max_num_io_queues == 0))) { 2190 rc = -EFAULT; 2191 goto err_device_destroy; 2192 } 2193 2194 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 2195 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 2196 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 2197 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 2198 adapter->max_num_io_queues = max_num_io_queues; 2199 2200 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2201 disable_meta_caching = 2202 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags & 2203 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 2204 } else { 2205 disable_meta_caching = false; 2206 } 2207 2208 /* prepare ring structures */ 2209 ena_init_rings(adapter, disable_meta_caching); 2210 2211 ena_config_debug_area(adapter); 2212 2213 /* Set max MTU for this device */ 2214 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 2215 2216 ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload); 2217 2218 /* Copy MAC address and point DPDK to it */ 2219 eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr; 2220 rte_ether_addr_copy((struct rte_ether_addr *) 2221 get_feat_ctx.dev_attr.mac_addr, 2222 (struct rte_ether_addr *)adapter->mac_addr); 2223 2224 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); 2225 if (unlikely(rc != 0)) { 2226 PMD_DRV_LOG(ERR, "Failed to initialize RSS in ENA device\n"); 2227 goto err_delete_debug_area; 2228 } 2229 2230 adapter->drv_stats = rte_zmalloc("adapter stats", 2231 sizeof(*adapter->drv_stats), 2232 RTE_CACHE_LINE_SIZE); 2233 if (!adapter->drv_stats) { 2234 PMD_DRV_LOG(ERR, 2235 "Failed to allocate memory for adapter statistics\n"); 2236 rc = -ENOMEM; 2237 goto err_rss_destroy; 2238 } 2239 2240 rte_spinlock_init(&adapter->admin_lock); 2241 2242 rte_intr_callback_register(intr_handle, 2243 ena_interrupt_handler_rte, 2244 eth_dev); 2245 rte_intr_enable(intr_handle); 2246 ena_com_set_admin_polling_mode(ena_dev, false); 2247 ena_com_admin_aenq_enable(ena_dev); 2248 2249 rte_timer_init(&adapter->timer_wd); 2250 2251 adapters_found++; 2252 adapter->state = ENA_ADAPTER_STATE_INIT; 2253 2254 return 0; 2255 2256 err_rss_destroy: 2257 ena_com_rss_destroy(ena_dev); 2258 err_delete_debug_area: 2259 ena_com_delete_debug_area(ena_dev); 2260 2261 err_device_destroy: 2262 ena_com_delete_host_info(ena_dev); 2263 ena_com_admin_destroy(ena_dev); 2264 2265 err: 2266 return rc; 2267 } 2268 2269 static void ena_destroy_device(struct rte_eth_dev *eth_dev) 2270 { 2271 struct ena_adapter *adapter = eth_dev->data->dev_private; 2272 struct ena_com_dev *ena_dev = &adapter->ena_dev; 2273 2274 if (adapter->state == ENA_ADAPTER_STATE_FREE) 2275 return; 2276 2277 ena_com_set_admin_running_state(ena_dev, false); 2278 2279 if (adapter->state != ENA_ADAPTER_STATE_CLOSED) 2280 ena_close(eth_dev); 2281 2282 ena_com_rss_destroy(ena_dev); 2283 2284 ena_com_delete_debug_area(ena_dev); 2285 ena_com_delete_host_info(ena_dev); 2286 2287 ena_com_abort_admin_commands(ena_dev); 2288 ena_com_wait_for_abort_completion(ena_dev); 2289 ena_com_admin_destroy(ena_dev); 2290 ena_com_mmio_reg_read_request_destroy(ena_dev); 2291 2292 adapter->state = ENA_ADAPTER_STATE_FREE; 2293 } 2294 2295 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) 2296 { 2297 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2298 return 0; 2299 2300 ena_destroy_device(eth_dev); 2301 2302 return 0; 2303 } 2304 2305 static int ena_dev_configure(struct rte_eth_dev *dev) 2306 { 2307 struct ena_adapter *adapter = dev->data->dev_private; 2308 int rc; 2309 2310 adapter->state = ENA_ADAPTER_STATE_CONFIG; 2311 2312 if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 2313 dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2314 dev->data->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2315 2316 /* Scattered Rx cannot be turned off in the HW, so this capability must 2317 * be forced. 2318 */ 2319 dev->data->scattered_rx = 1; 2320 2321 adapter->last_tx_comp_qid = 0; 2322 2323 adapter->missing_tx_completion_budget = 2324 RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues); 2325 2326 /* To avoid detection of the spurious Tx completion timeout due to 2327 * application not calling the Tx cleanup function, set timeout for the 2328 * Tx queue which should be half of the missing completion timeout for a 2329 * safety. If there will be a lot of missing Tx completions in the 2330 * queue, they will be detected sooner or later. 2331 */ 2332 adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2; 2333 2334 rc = ena_configure_aenq(adapter); 2335 2336 return rc; 2337 } 2338 2339 static void ena_init_rings(struct ena_adapter *adapter, 2340 bool disable_meta_caching) 2341 { 2342 size_t i; 2343 2344 for (i = 0; i < adapter->max_num_io_queues; i++) { 2345 struct ena_ring *ring = &adapter->tx_ring[i]; 2346 2347 ring->configured = 0; 2348 ring->type = ENA_RING_TYPE_TX; 2349 ring->adapter = adapter; 2350 ring->id = i; 2351 ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type; 2352 ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size; 2353 ring->sgl_size = adapter->max_tx_sgl_size; 2354 ring->disable_meta_caching = disable_meta_caching; 2355 } 2356 2357 for (i = 0; i < adapter->max_num_io_queues; i++) { 2358 struct ena_ring *ring = &adapter->rx_ring[i]; 2359 2360 ring->configured = 0; 2361 ring->type = ENA_RING_TYPE_RX; 2362 ring->adapter = adapter; 2363 ring->id = i; 2364 ring->sgl_size = adapter->max_rx_sgl_size; 2365 } 2366 } 2367 2368 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter) 2369 { 2370 uint64_t port_offloads = 0; 2371 2372 if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM) 2373 port_offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; 2374 2375 if (adapter->offloads.rx_offloads & 2376 (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM)) 2377 port_offloads |= 2378 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM; 2379 2380 if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH) 2381 port_offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2382 2383 port_offloads |= RTE_ETH_RX_OFFLOAD_SCATTER; 2384 2385 return port_offloads; 2386 } 2387 2388 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter) 2389 { 2390 uint64_t port_offloads = 0; 2391 2392 if (adapter->offloads.tx_offloads & ENA_IPV4_TSO) 2393 port_offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 2394 2395 if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM) 2396 port_offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; 2397 if (adapter->offloads.tx_offloads & 2398 (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM | 2399 ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL)) 2400 port_offloads |= 2401 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM; 2402 2403 port_offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2404 2405 return port_offloads; 2406 } 2407 2408 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter) 2409 { 2410 RTE_SET_USED(adapter); 2411 2412 return 0; 2413 } 2414 2415 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter) 2416 { 2417 RTE_SET_USED(adapter); 2418 2419 return 0; 2420 } 2421 2422 static int ena_infos_get(struct rte_eth_dev *dev, 2423 struct rte_eth_dev_info *dev_info) 2424 { 2425 struct ena_adapter *adapter; 2426 struct ena_com_dev *ena_dev; 2427 2428 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 2429 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 2430 adapter = dev->data->dev_private; 2431 2432 ena_dev = &adapter->ena_dev; 2433 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 2434 2435 dev_info->speed_capa = 2436 RTE_ETH_LINK_SPEED_1G | 2437 RTE_ETH_LINK_SPEED_2_5G | 2438 RTE_ETH_LINK_SPEED_5G | 2439 RTE_ETH_LINK_SPEED_10G | 2440 RTE_ETH_LINK_SPEED_25G | 2441 RTE_ETH_LINK_SPEED_40G | 2442 RTE_ETH_LINK_SPEED_50G | 2443 RTE_ETH_LINK_SPEED_100G; 2444 2445 /* Inform framework about available features */ 2446 dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter); 2447 dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter); 2448 dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter); 2449 dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter); 2450 2451 dev_info->flow_type_rss_offloads = ENA_ALL_RSS_HF; 2452 dev_info->hash_key_size = ENA_HASH_KEY_SIZE; 2453 2454 dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN; 2455 dev_info->max_rx_pktlen = adapter->max_mtu + RTE_ETHER_HDR_LEN + 2456 RTE_ETHER_CRC_LEN; 2457 dev_info->min_mtu = ENA_MIN_MTU; 2458 dev_info->max_mtu = adapter->max_mtu; 2459 dev_info->max_mac_addrs = 1; 2460 2461 dev_info->max_rx_queues = adapter->max_num_io_queues; 2462 dev_info->max_tx_queues = adapter->max_num_io_queues; 2463 dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE; 2464 2465 dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size; 2466 dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2467 dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2468 adapter->max_rx_sgl_size); 2469 dev_info->rx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2470 adapter->max_rx_sgl_size); 2471 2472 dev_info->tx_desc_lim.nb_max = adapter->max_tx_ring_size; 2473 dev_info->tx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2474 dev_info->tx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2475 adapter->max_tx_sgl_size); 2476 dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2477 adapter->max_tx_sgl_size); 2478 2479 dev_info->default_rxportconf.ring_size = ENA_DEFAULT_RING_SIZE; 2480 dev_info->default_txportconf.ring_size = ENA_DEFAULT_RING_SIZE; 2481 2482 return 0; 2483 } 2484 2485 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len) 2486 { 2487 mbuf->data_len = len; 2488 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 2489 mbuf->refcnt = 1; 2490 mbuf->next = NULL; 2491 } 2492 2493 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 2494 struct ena_com_rx_buf_info *ena_bufs, 2495 uint32_t descs, 2496 uint16_t *next_to_clean, 2497 uint8_t offset) 2498 { 2499 struct rte_mbuf *mbuf; 2500 struct rte_mbuf *mbuf_head; 2501 struct ena_rx_buffer *rx_info; 2502 int rc; 2503 uint16_t ntc, len, req_id, buf = 0; 2504 2505 if (unlikely(descs == 0)) 2506 return NULL; 2507 2508 ntc = *next_to_clean; 2509 2510 len = ena_bufs[buf].len; 2511 req_id = ena_bufs[buf].req_id; 2512 2513 rx_info = &rx_ring->rx_buffer_info[req_id]; 2514 2515 mbuf = rx_info->mbuf; 2516 RTE_ASSERT(mbuf != NULL); 2517 2518 ena_init_rx_mbuf(mbuf, len); 2519 2520 /* Fill the mbuf head with the data specific for 1st segment. */ 2521 mbuf_head = mbuf; 2522 mbuf_head->nb_segs = descs; 2523 mbuf_head->port = rx_ring->port_id; 2524 mbuf_head->pkt_len = len; 2525 mbuf_head->data_off += offset; 2526 2527 rx_info->mbuf = NULL; 2528 rx_ring->empty_rx_reqs[ntc] = req_id; 2529 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2530 2531 while (--descs) { 2532 ++buf; 2533 len = ena_bufs[buf].len; 2534 req_id = ena_bufs[buf].req_id; 2535 2536 rx_info = &rx_ring->rx_buffer_info[req_id]; 2537 RTE_ASSERT(rx_info->mbuf != NULL); 2538 2539 if (unlikely(len == 0)) { 2540 /* 2541 * Some devices can pass descriptor with the length 0. 2542 * To avoid confusion, the PMD is simply putting the 2543 * descriptor back, as it was never used. We'll avoid 2544 * mbuf allocation that way. 2545 */ 2546 rc = ena_add_single_rx_desc(rx_ring->ena_com_io_sq, 2547 rx_info->mbuf, req_id); 2548 if (unlikely(rc != 0)) { 2549 /* Free the mbuf in case of an error. */ 2550 rte_mbuf_raw_free(rx_info->mbuf); 2551 } else { 2552 /* 2553 * If there was no error, just exit the loop as 2554 * 0 length descriptor is always the last one. 2555 */ 2556 break; 2557 } 2558 } else { 2559 /* Create an mbuf chain. */ 2560 mbuf->next = rx_info->mbuf; 2561 mbuf = mbuf->next; 2562 2563 ena_init_rx_mbuf(mbuf, len); 2564 mbuf_head->pkt_len += len; 2565 } 2566 2567 /* 2568 * Mark the descriptor as depleted and perform necessary 2569 * cleanup. 2570 * This code will execute in two cases: 2571 * 1. Descriptor len was greater than 0 - normal situation. 2572 * 2. Descriptor len was 0 and we failed to add the descriptor 2573 * to the device. In that situation, we should try to add 2574 * the mbuf again in the populate routine and mark the 2575 * descriptor as used up by the device. 2576 */ 2577 rx_info->mbuf = NULL; 2578 rx_ring->empty_rx_reqs[ntc] = req_id; 2579 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2580 } 2581 2582 *next_to_clean = ntc; 2583 2584 return mbuf_head; 2585 } 2586 2587 static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 2588 uint16_t nb_pkts) 2589 { 2590 struct ena_ring *rx_ring = (struct ena_ring *)(rx_queue); 2591 unsigned int free_queue_entries; 2592 uint16_t next_to_clean = rx_ring->next_to_clean; 2593 uint16_t descs_in_use; 2594 struct rte_mbuf *mbuf; 2595 uint16_t completed; 2596 struct ena_com_rx_ctx ena_rx_ctx; 2597 int i, rc = 0; 2598 bool fill_hash; 2599 2600 #ifdef RTE_ETHDEV_DEBUG_RX 2601 /* Check adapter state */ 2602 if (unlikely(rx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 2603 PMD_RX_LOG(ALERT, 2604 "Trying to receive pkts while device is NOT running\n"); 2605 return 0; 2606 } 2607 #endif 2608 2609 fill_hash = rx_ring->offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH; 2610 2611 descs_in_use = rx_ring->ring_size - 2612 ena_com_free_q_entries(rx_ring->ena_com_io_sq) - 1; 2613 nb_pkts = RTE_MIN(descs_in_use, nb_pkts); 2614 2615 for (completed = 0; completed < nb_pkts; completed++) { 2616 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 2617 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 2618 ena_rx_ctx.descs = 0; 2619 ena_rx_ctx.pkt_offset = 0; 2620 /* receive packet context */ 2621 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, 2622 rx_ring->ena_com_io_sq, 2623 &ena_rx_ctx); 2624 if (unlikely(rc)) { 2625 PMD_RX_LOG(ERR, 2626 "Failed to get the packet from the device, rc: %d\n", 2627 rc); 2628 if (rc == ENA_COM_NO_SPACE) { 2629 ++rx_ring->rx_stats.bad_desc_num; 2630 ena_trigger_reset(rx_ring->adapter, 2631 ENA_REGS_RESET_TOO_MANY_RX_DESCS); 2632 } else { 2633 ++rx_ring->rx_stats.bad_req_id; 2634 ena_trigger_reset(rx_ring->adapter, 2635 ENA_REGS_RESET_INV_RX_REQ_ID); 2636 } 2637 return 0; 2638 } 2639 2640 mbuf = ena_rx_mbuf(rx_ring, 2641 ena_rx_ctx.ena_bufs, 2642 ena_rx_ctx.descs, 2643 &next_to_clean, 2644 ena_rx_ctx.pkt_offset); 2645 if (unlikely(mbuf == NULL)) { 2646 for (i = 0; i < ena_rx_ctx.descs; ++i) { 2647 rx_ring->empty_rx_reqs[next_to_clean] = 2648 rx_ring->ena_bufs[i].req_id; 2649 next_to_clean = ENA_IDX_NEXT_MASKED( 2650 next_to_clean, rx_ring->size_mask); 2651 } 2652 break; 2653 } 2654 2655 /* fill mbuf attributes if any */ 2656 ena_rx_mbuf_prepare(rx_ring, mbuf, &ena_rx_ctx, fill_hash); 2657 2658 if (unlikely(mbuf->ol_flags & 2659 (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD))) 2660 rte_atomic64_inc(&rx_ring->adapter->drv_stats->ierrors); 2661 2662 rx_pkts[completed] = mbuf; 2663 rx_ring->rx_stats.bytes += mbuf->pkt_len; 2664 } 2665 2666 rx_ring->rx_stats.cnt += completed; 2667 rx_ring->next_to_clean = next_to_clean; 2668 2669 free_queue_entries = ena_com_free_q_entries(rx_ring->ena_com_io_sq); 2670 2671 /* Burst refill to save doorbells, memory barriers, const interval */ 2672 if (free_queue_entries >= rx_ring->rx_free_thresh) { 2673 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); 2674 ena_populate_rx_queue(rx_ring, free_queue_entries); 2675 } 2676 2677 return completed; 2678 } 2679 2680 static uint16_t 2681 eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 2682 uint16_t nb_pkts) 2683 { 2684 int32_t ret; 2685 uint32_t i; 2686 struct rte_mbuf *m; 2687 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 2688 struct ena_adapter *adapter = tx_ring->adapter; 2689 struct rte_ipv4_hdr *ip_hdr; 2690 uint64_t ol_flags; 2691 uint64_t l4_csum_flag; 2692 uint64_t dev_offload_capa; 2693 uint16_t frag_field; 2694 bool need_pseudo_csum; 2695 2696 dev_offload_capa = adapter->offloads.tx_offloads; 2697 for (i = 0; i != nb_pkts; i++) { 2698 m = tx_pkts[i]; 2699 ol_flags = m->ol_flags; 2700 2701 /* Check if any offload flag was set */ 2702 if (ol_flags == 0) 2703 continue; 2704 2705 l4_csum_flag = ol_flags & RTE_MBUF_F_TX_L4_MASK; 2706 /* SCTP checksum offload is not supported by the ENA. */ 2707 if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) || 2708 l4_csum_flag == RTE_MBUF_F_TX_SCTP_CKSUM) { 2709 PMD_TX_LOG(DEBUG, 2710 "mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64 "\n", 2711 i, ol_flags); 2712 rte_errno = ENOTSUP; 2713 return i; 2714 } 2715 2716 if (unlikely(m->nb_segs >= tx_ring->sgl_size && 2717 !(tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && 2718 m->nb_segs == tx_ring->sgl_size && 2719 m->data_len < tx_ring->tx_max_header_size))) { 2720 PMD_TX_LOG(DEBUG, 2721 "mbuf[%" PRIu32 "] has too many segments: %" PRIu16 "\n", 2722 i, m->nb_segs); 2723 rte_errno = EINVAL; 2724 return i; 2725 } 2726 2727 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 2728 /* Check if requested offload is also enabled for the queue */ 2729 if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2730 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) || 2731 (l4_csum_flag == RTE_MBUF_F_TX_TCP_CKSUM && 2732 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) || 2733 (l4_csum_flag == RTE_MBUF_F_TX_UDP_CKSUM && 2734 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM))) { 2735 PMD_TX_LOG(DEBUG, 2736 "mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]\n", 2737 i, m->nb_segs, tx_ring->id); 2738 rte_errno = EINVAL; 2739 return i; 2740 } 2741 2742 /* The caller is obligated to set l2 and l3 len if any cksum 2743 * offload is enabled. 2744 */ 2745 if (unlikely(ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK) && 2746 (m->l2_len == 0 || m->l3_len == 0))) { 2747 PMD_TX_LOG(DEBUG, 2748 "mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested\n", 2749 i); 2750 rte_errno = EINVAL; 2751 return i; 2752 } 2753 ret = rte_validate_tx_offload(m); 2754 if (ret != 0) { 2755 rte_errno = -ret; 2756 return i; 2757 } 2758 #endif 2759 2760 /* Verify HW support for requested offloads and determine if 2761 * pseudo header checksum is needed. 2762 */ 2763 need_pseudo_csum = false; 2764 if (ol_flags & RTE_MBUF_F_TX_IPV4) { 2765 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2766 !(dev_offload_capa & ENA_L3_IPV4_CSUM)) { 2767 rte_errno = ENOTSUP; 2768 return i; 2769 } 2770 2771 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG && 2772 !(dev_offload_capa & ENA_IPV4_TSO)) { 2773 rte_errno = ENOTSUP; 2774 return i; 2775 } 2776 2777 /* Check HW capabilities and if pseudo csum is needed 2778 * for L4 offloads. 2779 */ 2780 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2781 !(dev_offload_capa & ENA_L4_IPV4_CSUM)) { 2782 if (dev_offload_capa & 2783 ENA_L4_IPV4_CSUM_PARTIAL) { 2784 need_pseudo_csum = true; 2785 } else { 2786 rte_errno = ENOTSUP; 2787 return i; 2788 } 2789 } 2790 2791 /* Parse the DF flag */ 2792 ip_hdr = rte_pktmbuf_mtod_offset(m, 2793 struct rte_ipv4_hdr *, m->l2_len); 2794 frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset); 2795 if (frag_field & RTE_IPV4_HDR_DF_FLAG) { 2796 m->packet_type |= RTE_PTYPE_L4_NONFRAG; 2797 } else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2798 /* In case we are supposed to TSO and have DF 2799 * not set (DF=0) hardware must be provided with 2800 * partial checksum. 2801 */ 2802 need_pseudo_csum = true; 2803 } 2804 } else if (ol_flags & RTE_MBUF_F_TX_IPV6) { 2805 /* There is no support for IPv6 TSO as for now. */ 2806 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2807 rte_errno = ENOTSUP; 2808 return i; 2809 } 2810 2811 /* Check HW capabilities and if pseudo csum is needed */ 2812 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2813 !(dev_offload_capa & ENA_L4_IPV6_CSUM)) { 2814 if (dev_offload_capa & 2815 ENA_L4_IPV6_CSUM_PARTIAL) { 2816 need_pseudo_csum = true; 2817 } else { 2818 rte_errno = ENOTSUP; 2819 return i; 2820 } 2821 } 2822 } 2823 2824 if (need_pseudo_csum) { 2825 ret = rte_net_intel_cksum_flags_prepare(m, ol_flags); 2826 if (ret != 0) { 2827 rte_errno = -ret; 2828 return i; 2829 } 2830 } 2831 } 2832 2833 return i; 2834 } 2835 2836 static void ena_update_hints(struct ena_adapter *adapter, 2837 struct ena_admin_ena_hw_hints *hints) 2838 { 2839 if (hints->admin_completion_tx_timeout) 2840 adapter->ena_dev.admin_queue.completion_timeout = 2841 hints->admin_completion_tx_timeout * 1000; 2842 2843 if (hints->mmio_read_timeout) 2844 /* convert to usec */ 2845 adapter->ena_dev.mmio_read.reg_read_to = 2846 hints->mmio_read_timeout * 1000; 2847 2848 if (hints->driver_watchdog_timeout) { 2849 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 2850 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 2851 else 2852 // Convert msecs to ticks 2853 adapter->keep_alive_timeout = 2854 (hints->driver_watchdog_timeout * 2855 rte_get_timer_hz()) / 1000; 2856 } 2857 } 2858 2859 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 2860 struct ena_tx_buffer *tx_info, 2861 struct rte_mbuf *mbuf, 2862 void **push_header, 2863 uint16_t *header_len) 2864 { 2865 struct ena_com_buf *ena_buf; 2866 uint16_t delta, seg_len, push_len; 2867 2868 delta = 0; 2869 seg_len = mbuf->data_len; 2870 2871 tx_info->mbuf = mbuf; 2872 ena_buf = tx_info->bufs; 2873 2874 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2875 /* 2876 * Tx header might be (and will be in most cases) smaller than 2877 * tx_max_header_size. But it's not an issue to send more data 2878 * to the device, than actually needed if the mbuf size is 2879 * greater than tx_max_header_size. 2880 */ 2881 push_len = RTE_MIN(mbuf->pkt_len, tx_ring->tx_max_header_size); 2882 *header_len = push_len; 2883 2884 if (likely(push_len <= seg_len)) { 2885 /* If the push header is in the single segment, then 2886 * just point it to the 1st mbuf data. 2887 */ 2888 *push_header = rte_pktmbuf_mtod(mbuf, uint8_t *); 2889 } else { 2890 /* If the push header lays in the several segments, copy 2891 * it to the intermediate buffer. 2892 */ 2893 rte_pktmbuf_read(mbuf, 0, push_len, 2894 tx_ring->push_buf_intermediate_buf); 2895 *push_header = tx_ring->push_buf_intermediate_buf; 2896 delta = push_len - seg_len; 2897 } 2898 } else { 2899 *push_header = NULL; 2900 *header_len = 0; 2901 push_len = 0; 2902 } 2903 2904 /* Process first segment taking into consideration pushed header */ 2905 if (seg_len > push_len) { 2906 ena_buf->paddr = mbuf->buf_iova + 2907 mbuf->data_off + 2908 push_len; 2909 ena_buf->len = seg_len - push_len; 2910 ena_buf++; 2911 tx_info->num_of_bufs++; 2912 } 2913 2914 while ((mbuf = mbuf->next) != NULL) { 2915 seg_len = mbuf->data_len; 2916 2917 /* Skip mbufs if whole data is pushed as a header */ 2918 if (unlikely(delta > seg_len)) { 2919 delta -= seg_len; 2920 continue; 2921 } 2922 2923 ena_buf->paddr = mbuf->buf_iova + mbuf->data_off + delta; 2924 ena_buf->len = seg_len - delta; 2925 ena_buf++; 2926 tx_info->num_of_bufs++; 2927 2928 delta = 0; 2929 } 2930 } 2931 2932 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) 2933 { 2934 struct ena_tx_buffer *tx_info; 2935 struct ena_com_tx_ctx ena_tx_ctx = { { 0 } }; 2936 uint16_t next_to_use; 2937 uint16_t header_len; 2938 uint16_t req_id; 2939 void *push_header; 2940 int nb_hw_desc; 2941 int rc; 2942 2943 /* Checking for space for 2 additional metadata descriptors due to 2944 * possible header split and metadata descriptor 2945 */ 2946 if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 2947 mbuf->nb_segs + 2)) { 2948 PMD_DRV_LOG(DEBUG, "Not enough space in the tx queue\n"); 2949 return ENA_COM_NO_MEM; 2950 } 2951 2952 next_to_use = tx_ring->next_to_use; 2953 2954 req_id = tx_ring->empty_tx_reqs[next_to_use]; 2955 tx_info = &tx_ring->tx_buffer_info[req_id]; 2956 tx_info->num_of_bufs = 0; 2957 RTE_ASSERT(tx_info->mbuf == NULL); 2958 2959 ena_tx_map_mbuf(tx_ring, tx_info, mbuf, &push_header, &header_len); 2960 2961 ena_tx_ctx.ena_bufs = tx_info->bufs; 2962 ena_tx_ctx.push_header = push_header; 2963 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 2964 ena_tx_ctx.req_id = req_id; 2965 ena_tx_ctx.header_len = header_len; 2966 2967 /* Set Tx offloads flags, if applicable */ 2968 ena_tx_mbuf_prepare(mbuf, &ena_tx_ctx, tx_ring->offloads, 2969 tx_ring->disable_meta_caching); 2970 2971 if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, 2972 &ena_tx_ctx))) { 2973 PMD_TX_LOG(DEBUG, 2974 "LLQ Tx max burst size of queue %d achieved, writing doorbell to send burst\n", 2975 tx_ring->id); 2976 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 2977 tx_ring->tx_stats.doorbells++; 2978 tx_ring->pkts_without_db = false; 2979 } 2980 2981 /* prepare the packet's descriptors to dma engine */ 2982 rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, 2983 &nb_hw_desc); 2984 if (unlikely(rc)) { 2985 PMD_DRV_LOG(ERR, "Failed to prepare Tx buffers, rc: %d\n", rc); 2986 ++tx_ring->tx_stats.prepare_ctx_err; 2987 ena_trigger_reset(tx_ring->adapter, 2988 ENA_REGS_RESET_DRIVER_INVALID_STATE); 2989 return rc; 2990 } 2991 2992 tx_info->tx_descs = nb_hw_desc; 2993 tx_info->timestamp = rte_get_timer_cycles(); 2994 2995 tx_ring->tx_stats.cnt++; 2996 tx_ring->tx_stats.bytes += mbuf->pkt_len; 2997 2998 tx_ring->next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, 2999 tx_ring->size_mask); 3000 3001 return 0; 3002 } 3003 3004 static int ena_tx_cleanup(void *txp, uint32_t free_pkt_cnt) 3005 { 3006 struct ena_ring *tx_ring = (struct ena_ring *)txp; 3007 unsigned int total_tx_descs = 0; 3008 unsigned int total_tx_pkts = 0; 3009 uint16_t cleanup_budget; 3010 uint16_t next_to_clean = tx_ring->next_to_clean; 3011 3012 /* 3013 * If free_pkt_cnt is equal to 0, it means that the user requested 3014 * full cleanup, so attempt to release all Tx descriptors 3015 * (ring_size - 1 -> size_mask) 3016 */ 3017 cleanup_budget = (free_pkt_cnt == 0) ? tx_ring->size_mask : free_pkt_cnt; 3018 3019 while (likely(total_tx_pkts < cleanup_budget)) { 3020 struct rte_mbuf *mbuf; 3021 struct ena_tx_buffer *tx_info; 3022 uint16_t req_id; 3023 3024 if (ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id) != 0) 3025 break; 3026 3027 if (unlikely(validate_tx_req_id(tx_ring, req_id) != 0)) 3028 break; 3029 3030 /* Get Tx info & store how many descs were processed */ 3031 tx_info = &tx_ring->tx_buffer_info[req_id]; 3032 tx_info->timestamp = 0; 3033 3034 mbuf = tx_info->mbuf; 3035 rte_pktmbuf_free(mbuf); 3036 3037 tx_info->mbuf = NULL; 3038 tx_ring->empty_tx_reqs[next_to_clean] = req_id; 3039 3040 total_tx_descs += tx_info->tx_descs; 3041 total_tx_pkts++; 3042 3043 /* Put back descriptor to the ring for reuse */ 3044 next_to_clean = ENA_IDX_NEXT_MASKED(next_to_clean, 3045 tx_ring->size_mask); 3046 } 3047 3048 if (likely(total_tx_descs > 0)) { 3049 /* acknowledge completion of sent packets */ 3050 tx_ring->next_to_clean = next_to_clean; 3051 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs); 3052 ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); 3053 } 3054 3055 /* Notify completion handler that full cleanup was performed */ 3056 if (free_pkt_cnt == 0 || total_tx_pkts < cleanup_budget) 3057 tx_ring->last_cleanup_ticks = rte_get_timer_cycles(); 3058 3059 return total_tx_pkts; 3060 } 3061 3062 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 3063 uint16_t nb_pkts) 3064 { 3065 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 3066 int available_desc; 3067 uint16_t sent_idx = 0; 3068 3069 #ifdef RTE_ETHDEV_DEBUG_TX 3070 /* Check adapter state */ 3071 if (unlikely(tx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 3072 PMD_TX_LOG(ALERT, 3073 "Trying to xmit pkts while device is NOT running\n"); 3074 return 0; 3075 } 3076 #endif 3077 3078 available_desc = ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3079 if (available_desc < tx_ring->tx_free_thresh) 3080 ena_tx_cleanup((void *)tx_ring, 0); 3081 3082 for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) { 3083 if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx])) 3084 break; 3085 tx_ring->pkts_without_db = true; 3086 rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4, 3087 tx_ring->size_mask)]); 3088 } 3089 3090 /* If there are ready packets to be xmitted... */ 3091 if (likely(tx_ring->pkts_without_db)) { 3092 /* ...let HW do its best :-) */ 3093 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3094 tx_ring->tx_stats.doorbells++; 3095 tx_ring->pkts_without_db = false; 3096 } 3097 3098 tx_ring->tx_stats.available_desc = 3099 ena_com_free_q_entries(tx_ring->ena_com_io_sq); 3100 tx_ring->tx_stats.tx_poll++; 3101 3102 return sent_idx; 3103 } 3104 3105 int ena_copy_eni_stats(struct ena_adapter *adapter, struct ena_stats_eni *stats) 3106 { 3107 int rc; 3108 3109 rte_spinlock_lock(&adapter->admin_lock); 3110 /* Retrieve and store the latest statistics from the AQ. This ensures 3111 * that previous value is returned in case of a com error. 3112 */ 3113 rc = ENA_PROXY(adapter, ena_com_get_eni_stats, &adapter->ena_dev, 3114 (struct ena_admin_eni_stats *)stats); 3115 rte_spinlock_unlock(&adapter->admin_lock); 3116 if (rc != 0) { 3117 if (rc == ENA_COM_UNSUPPORTED) { 3118 PMD_DRV_LOG(DEBUG, 3119 "Retrieving ENI metrics is not supported\n"); 3120 } else { 3121 PMD_DRV_LOG(WARNING, 3122 "Failed to get ENI metrics, rc: %d\n", rc); 3123 } 3124 return rc; 3125 } 3126 3127 return 0; 3128 } 3129 3130 /** 3131 * DPDK callback to retrieve names of extended device statistics 3132 * 3133 * @param dev 3134 * Pointer to Ethernet device structure. 3135 * @param[out] xstats_names 3136 * Buffer to insert names into. 3137 * @param n 3138 * Number of names. 3139 * 3140 * @return 3141 * Number of xstats names. 3142 */ 3143 static int ena_xstats_get_names(struct rte_eth_dev *dev, 3144 struct rte_eth_xstat_name *xstats_names, 3145 unsigned int n) 3146 { 3147 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3148 unsigned int stat, i, count = 0; 3149 3150 if (n < xstats_count || !xstats_names) 3151 return xstats_count; 3152 3153 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) 3154 strcpy(xstats_names[count].name, 3155 ena_stats_global_strings[stat].name); 3156 3157 for (stat = 0; stat < ENA_STATS_ARRAY_ENI; stat++, count++) 3158 strcpy(xstats_names[count].name, 3159 ena_stats_eni_strings[stat].name); 3160 3161 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) 3162 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) 3163 snprintf(xstats_names[count].name, 3164 sizeof(xstats_names[count].name), 3165 "rx_q%d_%s", i, 3166 ena_stats_rx_strings[stat].name); 3167 3168 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) 3169 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) 3170 snprintf(xstats_names[count].name, 3171 sizeof(xstats_names[count].name), 3172 "tx_q%d_%s", i, 3173 ena_stats_tx_strings[stat].name); 3174 3175 return xstats_count; 3176 } 3177 3178 /** 3179 * DPDK callback to retrieve names of extended device statistics for the given 3180 * ids. 3181 * 3182 * @param dev 3183 * Pointer to Ethernet device structure. 3184 * @param[out] xstats_names 3185 * Buffer to insert names into. 3186 * @param ids 3187 * IDs array for which the names should be retrieved. 3188 * @param size 3189 * Number of ids. 3190 * 3191 * @return 3192 * Positive value: number of xstats names. Negative value: error code. 3193 */ 3194 static int ena_xstats_get_names_by_id(struct rte_eth_dev *dev, 3195 const uint64_t *ids, 3196 struct rte_eth_xstat_name *xstats_names, 3197 unsigned int size) 3198 { 3199 uint64_t xstats_count = ena_xstats_calc_num(dev->data); 3200 uint64_t id, qid; 3201 unsigned int i; 3202 3203 if (xstats_names == NULL) 3204 return xstats_count; 3205 3206 for (i = 0; i < size; ++i) { 3207 id = ids[i]; 3208 if (id > xstats_count) { 3209 PMD_DRV_LOG(ERR, 3210 "ID value out of range: id=%" PRIu64 ", xstats_num=%" PRIu64 "\n", 3211 id, xstats_count); 3212 return -EINVAL; 3213 } 3214 3215 if (id < ENA_STATS_ARRAY_GLOBAL) { 3216 strcpy(xstats_names[i].name, 3217 ena_stats_global_strings[id].name); 3218 continue; 3219 } 3220 3221 id -= ENA_STATS_ARRAY_GLOBAL; 3222 if (id < ENA_STATS_ARRAY_ENI) { 3223 strcpy(xstats_names[i].name, 3224 ena_stats_eni_strings[id].name); 3225 continue; 3226 } 3227 3228 id -= ENA_STATS_ARRAY_ENI; 3229 if (id < ENA_STATS_ARRAY_RX) { 3230 qid = id / dev->data->nb_rx_queues; 3231 id %= dev->data->nb_rx_queues; 3232 snprintf(xstats_names[i].name, 3233 sizeof(xstats_names[i].name), 3234 "rx_q%" PRIu64 "d_%s", 3235 qid, ena_stats_rx_strings[id].name); 3236 continue; 3237 } 3238 3239 id -= ENA_STATS_ARRAY_RX; 3240 /* Although this condition is not needed, it was added for 3241 * compatibility if new xstat structure would be ever added. 3242 */ 3243 if (id < ENA_STATS_ARRAY_TX) { 3244 qid = id / dev->data->nb_tx_queues; 3245 id %= dev->data->nb_tx_queues; 3246 snprintf(xstats_names[i].name, 3247 sizeof(xstats_names[i].name), 3248 "tx_q%" PRIu64 "_%s", 3249 qid, ena_stats_tx_strings[id].name); 3250 continue; 3251 } 3252 } 3253 3254 return i; 3255 } 3256 3257 /** 3258 * DPDK callback to get extended device statistics. 3259 * 3260 * @param dev 3261 * Pointer to Ethernet device structure. 3262 * @param[out] stats 3263 * Stats table output buffer. 3264 * @param n 3265 * The size of the stats table. 3266 * 3267 * @return 3268 * Number of xstats on success, negative on failure. 3269 */ 3270 static int ena_xstats_get(struct rte_eth_dev *dev, 3271 struct rte_eth_xstat *xstats, 3272 unsigned int n) 3273 { 3274 struct ena_adapter *adapter = dev->data->dev_private; 3275 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 3276 struct ena_stats_eni eni_stats; 3277 unsigned int stat, i, count = 0; 3278 int stat_offset; 3279 void *stats_begin; 3280 3281 if (n < xstats_count) 3282 return xstats_count; 3283 3284 if (!xstats) 3285 return 0; 3286 3287 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) { 3288 stat_offset = ena_stats_global_strings[stat].stat_offset; 3289 stats_begin = &adapter->dev_stats; 3290 3291 xstats[count].id = count; 3292 xstats[count].value = *((uint64_t *) 3293 ((char *)stats_begin + stat_offset)); 3294 } 3295 3296 /* Even if the function below fails, we should copy previous (or initial 3297 * values) to keep structure of rte_eth_xstat consistent. 3298 */ 3299 ena_copy_eni_stats(adapter, &eni_stats); 3300 for (stat = 0; stat < ENA_STATS_ARRAY_ENI; stat++, count++) { 3301 stat_offset = ena_stats_eni_strings[stat].stat_offset; 3302 stats_begin = &eni_stats; 3303 3304 xstats[count].id = count; 3305 xstats[count].value = *((uint64_t *) 3306 ((char *)stats_begin + stat_offset)); 3307 } 3308 3309 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) { 3310 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) { 3311 stat_offset = ena_stats_rx_strings[stat].stat_offset; 3312 stats_begin = &adapter->rx_ring[i].rx_stats; 3313 3314 xstats[count].id = count; 3315 xstats[count].value = *((uint64_t *) 3316 ((char *)stats_begin + stat_offset)); 3317 } 3318 } 3319 3320 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) { 3321 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) { 3322 stat_offset = ena_stats_tx_strings[stat].stat_offset; 3323 stats_begin = &adapter->tx_ring[i].rx_stats; 3324 3325 xstats[count].id = count; 3326 xstats[count].value = *((uint64_t *) 3327 ((char *)stats_begin + stat_offset)); 3328 } 3329 } 3330 3331 return count; 3332 } 3333 3334 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 3335 const uint64_t *ids, 3336 uint64_t *values, 3337 unsigned int n) 3338 { 3339 struct ena_adapter *adapter = dev->data->dev_private; 3340 struct ena_stats_eni eni_stats; 3341 uint64_t id; 3342 uint64_t rx_entries, tx_entries; 3343 unsigned int i; 3344 int qid; 3345 int valid = 0; 3346 bool was_eni_copied = false; 3347 3348 for (i = 0; i < n; ++i) { 3349 id = ids[i]; 3350 /* Check if id belongs to global statistics */ 3351 if (id < ENA_STATS_ARRAY_GLOBAL) { 3352 values[i] = *((uint64_t *)&adapter->dev_stats + id); 3353 ++valid; 3354 continue; 3355 } 3356 3357 /* Check if id belongs to ENI statistics */ 3358 id -= ENA_STATS_ARRAY_GLOBAL; 3359 if (id < ENA_STATS_ARRAY_ENI) { 3360 /* Avoid reading ENI stats multiple times in a single 3361 * function call, as it requires communication with the 3362 * admin queue. 3363 */ 3364 if (!was_eni_copied) { 3365 was_eni_copied = true; 3366 ena_copy_eni_stats(adapter, &eni_stats); 3367 } 3368 values[i] = *((uint64_t *)&eni_stats + id); 3369 ++valid; 3370 continue; 3371 } 3372 3373 /* Check if id belongs to rx queue statistics */ 3374 id -= ENA_STATS_ARRAY_ENI; 3375 rx_entries = ENA_STATS_ARRAY_RX * dev->data->nb_rx_queues; 3376 if (id < rx_entries) { 3377 qid = id % dev->data->nb_rx_queues; 3378 id /= dev->data->nb_rx_queues; 3379 values[i] = *((uint64_t *) 3380 &adapter->rx_ring[qid].rx_stats + id); 3381 ++valid; 3382 continue; 3383 } 3384 /* Check if id belongs to rx queue statistics */ 3385 id -= rx_entries; 3386 tx_entries = ENA_STATS_ARRAY_TX * dev->data->nb_tx_queues; 3387 if (id < tx_entries) { 3388 qid = id % dev->data->nb_tx_queues; 3389 id /= dev->data->nb_tx_queues; 3390 values[i] = *((uint64_t *) 3391 &adapter->tx_ring[qid].tx_stats + id); 3392 ++valid; 3393 continue; 3394 } 3395 } 3396 3397 return valid; 3398 } 3399 3400 static int ena_process_uint_devarg(const char *key, 3401 const char *value, 3402 void *opaque) 3403 { 3404 struct ena_adapter *adapter = opaque; 3405 char *str_end; 3406 uint64_t uint_value; 3407 3408 uint_value = strtoull(value, &str_end, 10); 3409 if (value == str_end) { 3410 PMD_INIT_LOG(ERR, 3411 "Invalid value for key '%s'. Only uint values are accepted.\n", 3412 key); 3413 return -EINVAL; 3414 } 3415 3416 if (strcmp(key, ENA_DEVARG_MISS_TXC_TO) == 0) { 3417 if (uint_value > ENA_MAX_TX_TIMEOUT_SECONDS) { 3418 PMD_INIT_LOG(ERR, 3419 "Tx timeout too high: %" PRIu64 " sec. Maximum allowed: %d sec.\n", 3420 uint_value, ENA_MAX_TX_TIMEOUT_SECONDS); 3421 return -EINVAL; 3422 } else if (uint_value == 0) { 3423 PMD_INIT_LOG(INFO, 3424 "Check for missing Tx completions has been disabled.\n"); 3425 adapter->missing_tx_completion_to = 3426 ENA_HW_HINTS_NO_TIMEOUT; 3427 } else { 3428 PMD_INIT_LOG(INFO, 3429 "Tx packet completion timeout set to %" PRIu64 " seconds.\n", 3430 uint_value); 3431 adapter->missing_tx_completion_to = 3432 uint_value * rte_get_timer_hz(); 3433 } 3434 } 3435 3436 return 0; 3437 } 3438 3439 static int ena_process_bool_devarg(const char *key, 3440 const char *value, 3441 void *opaque) 3442 { 3443 struct ena_adapter *adapter = opaque; 3444 bool bool_value; 3445 3446 /* Parse the value. */ 3447 if (strcmp(value, "1") == 0) { 3448 bool_value = true; 3449 } else if (strcmp(value, "0") == 0) { 3450 bool_value = false; 3451 } else { 3452 PMD_INIT_LOG(ERR, 3453 "Invalid value: '%s' for key '%s'. Accepted: '0' or '1'\n", 3454 value, key); 3455 return -EINVAL; 3456 } 3457 3458 /* Now, assign it to the proper adapter field. */ 3459 if (strcmp(key, ENA_DEVARG_LARGE_LLQ_HDR) == 0) 3460 adapter->use_large_llq_hdr = bool_value; 3461 3462 return 0; 3463 } 3464 3465 static int ena_parse_devargs(struct ena_adapter *adapter, 3466 struct rte_devargs *devargs) 3467 { 3468 static const char * const allowed_args[] = { 3469 ENA_DEVARG_LARGE_LLQ_HDR, 3470 ENA_DEVARG_MISS_TXC_TO, 3471 NULL, 3472 }; 3473 struct rte_kvargs *kvlist; 3474 int rc; 3475 3476 if (devargs == NULL) 3477 return 0; 3478 3479 kvlist = rte_kvargs_parse(devargs->args, allowed_args); 3480 if (kvlist == NULL) { 3481 PMD_INIT_LOG(ERR, "Invalid device arguments: %s\n", 3482 devargs->args); 3483 return -EINVAL; 3484 } 3485 3486 rc = rte_kvargs_process(kvlist, ENA_DEVARG_LARGE_LLQ_HDR, 3487 ena_process_bool_devarg, adapter); 3488 if (rc != 0) 3489 goto exit; 3490 rc = rte_kvargs_process(kvlist, ENA_DEVARG_MISS_TXC_TO, 3491 ena_process_uint_devarg, adapter); 3492 3493 exit: 3494 rte_kvargs_free(kvlist); 3495 3496 return rc; 3497 } 3498 3499 static int ena_setup_rx_intr(struct rte_eth_dev *dev) 3500 { 3501 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 3502 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 3503 int rc; 3504 uint16_t vectors_nb, i; 3505 bool rx_intr_requested = dev->data->dev_conf.intr_conf.rxq; 3506 3507 if (!rx_intr_requested) 3508 return 0; 3509 3510 if (!rte_intr_cap_multiple(intr_handle)) { 3511 PMD_DRV_LOG(ERR, 3512 "Rx interrupt requested, but it isn't supported by the PCI driver\n"); 3513 return -ENOTSUP; 3514 } 3515 3516 /* Disable interrupt mapping before the configuration starts. */ 3517 rte_intr_disable(intr_handle); 3518 3519 /* Verify if there are enough vectors available. */ 3520 vectors_nb = dev->data->nb_rx_queues; 3521 if (vectors_nb > RTE_MAX_RXTX_INTR_VEC_ID) { 3522 PMD_DRV_LOG(ERR, 3523 "Too many Rx interrupts requested, maximum number: %d\n", 3524 RTE_MAX_RXTX_INTR_VEC_ID); 3525 rc = -ENOTSUP; 3526 goto enable_intr; 3527 } 3528 3529 /* Allocate the vector list */ 3530 if (rte_intr_vec_list_alloc(intr_handle, "intr_vec", 3531 dev->data->nb_rx_queues)) { 3532 PMD_DRV_LOG(ERR, 3533 "Failed to allocate interrupt vector for %d queues\n", 3534 dev->data->nb_rx_queues); 3535 rc = -ENOMEM; 3536 goto enable_intr; 3537 } 3538 3539 rc = rte_intr_efd_enable(intr_handle, vectors_nb); 3540 if (rc != 0) 3541 goto free_intr_vec; 3542 3543 if (!rte_intr_allow_others(intr_handle)) { 3544 PMD_DRV_LOG(ERR, 3545 "Not enough interrupts available to use both ENA Admin and Rx interrupts\n"); 3546 goto disable_intr_efd; 3547 } 3548 3549 for (i = 0; i < vectors_nb; ++i) 3550 if (rte_intr_vec_list_index_set(intr_handle, i, 3551 RTE_INTR_VEC_RXTX_OFFSET + i)) 3552 goto disable_intr_efd; 3553 3554 rte_intr_enable(intr_handle); 3555 return 0; 3556 3557 disable_intr_efd: 3558 rte_intr_efd_disable(intr_handle); 3559 free_intr_vec: 3560 rte_intr_vec_list_free(intr_handle); 3561 enable_intr: 3562 rte_intr_enable(intr_handle); 3563 return rc; 3564 } 3565 3566 static void ena_rx_queue_intr_set(struct rte_eth_dev *dev, 3567 uint16_t queue_id, 3568 bool unmask) 3569 { 3570 struct ena_adapter *adapter = dev->data->dev_private; 3571 struct ena_ring *rxq = &adapter->rx_ring[queue_id]; 3572 struct ena_eth_io_intr_reg intr_reg; 3573 3574 ena_com_update_intr_reg(&intr_reg, 0, 0, unmask); 3575 ena_com_unmask_intr(rxq->ena_com_io_cq, &intr_reg); 3576 } 3577 3578 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 3579 uint16_t queue_id) 3580 { 3581 ena_rx_queue_intr_set(dev, queue_id, true); 3582 3583 return 0; 3584 } 3585 3586 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 3587 uint16_t queue_id) 3588 { 3589 ena_rx_queue_intr_set(dev, queue_id, false); 3590 3591 return 0; 3592 } 3593 3594 static int ena_configure_aenq(struct ena_adapter *adapter) 3595 { 3596 uint32_t aenq_groups = adapter->all_aenq_groups; 3597 int rc; 3598 3599 /* All_aenq_groups holds all AENQ functions supported by the device and 3600 * the HW, so at first we need to be sure the LSC request is valid. 3601 */ 3602 if (adapter->edev_data->dev_conf.intr_conf.lsc != 0) { 3603 if (!(aenq_groups & BIT(ENA_ADMIN_LINK_CHANGE))) { 3604 PMD_DRV_LOG(ERR, 3605 "LSC requested, but it's not supported by the AENQ\n"); 3606 return -EINVAL; 3607 } 3608 } else { 3609 /* If LSC wasn't enabled by the app, let's enable all supported 3610 * AENQ procedures except the LSC. 3611 */ 3612 aenq_groups &= ~BIT(ENA_ADMIN_LINK_CHANGE); 3613 } 3614 3615 rc = ena_com_set_aenq_config(&adapter->ena_dev, aenq_groups); 3616 if (rc != 0) { 3617 PMD_DRV_LOG(ERR, "Cannot configure AENQ groups, rc=%d\n", rc); 3618 return rc; 3619 } 3620 3621 adapter->active_aenq_groups = aenq_groups; 3622 3623 return 0; 3624 } 3625 3626 int ena_mp_indirect_table_set(struct ena_adapter *adapter) 3627 { 3628 return ENA_PROXY(adapter, ena_com_indirect_table_set, &adapter->ena_dev); 3629 } 3630 3631 int ena_mp_indirect_table_get(struct ena_adapter *adapter, 3632 uint32_t *indirect_table) 3633 { 3634 return ENA_PROXY(adapter, ena_com_indirect_table_get, &adapter->ena_dev, 3635 indirect_table); 3636 } 3637 3638 /********************************************************************* 3639 * ena_plat_dpdk.h functions implementations 3640 *********************************************************************/ 3641 3642 const struct rte_memzone * 3643 ena_mem_alloc_coherent(struct rte_eth_dev_data *data, size_t size, 3644 int socket_id, unsigned int alignment, void **virt_addr, 3645 dma_addr_t *phys_addr) 3646 { 3647 char z_name[RTE_MEMZONE_NAMESIZE]; 3648 struct ena_adapter *adapter = data->dev_private; 3649 const struct rte_memzone *memzone; 3650 int rc; 3651 3652 rc = snprintf(z_name, RTE_MEMZONE_NAMESIZE, "ena_p%d_mz%" PRIu64 "", 3653 data->port_id, adapter->memzone_cnt); 3654 if (rc >= RTE_MEMZONE_NAMESIZE) { 3655 PMD_DRV_LOG(ERR, 3656 "Name for the ena_com memzone is too long. Port: %d, mz_num: %" PRIu64 "\n", 3657 data->port_id, adapter->memzone_cnt); 3658 goto error; 3659 } 3660 adapter->memzone_cnt++; 3661 3662 memzone = rte_memzone_reserve_aligned(z_name, size, socket_id, 3663 RTE_MEMZONE_IOVA_CONTIG, alignment); 3664 if (memzone == NULL) { 3665 PMD_DRV_LOG(ERR, "Failed to allocate ena_com memzone: %s\n", 3666 z_name); 3667 goto error; 3668 } 3669 3670 memset(memzone->addr, 0, size); 3671 *virt_addr = memzone->addr; 3672 *phys_addr = memzone->iova; 3673 3674 return memzone; 3675 3676 error: 3677 *virt_addr = NULL; 3678 *phys_addr = 0; 3679 3680 return NULL; 3681 } 3682 3683 3684 /********************************************************************* 3685 * PMD configuration 3686 *********************************************************************/ 3687 static int eth_ena_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 3688 struct rte_pci_device *pci_dev) 3689 { 3690 return rte_eth_dev_pci_generic_probe(pci_dev, 3691 sizeof(struct ena_adapter), eth_ena_dev_init); 3692 } 3693 3694 static int eth_ena_pci_remove(struct rte_pci_device *pci_dev) 3695 { 3696 return rte_eth_dev_pci_generic_remove(pci_dev, eth_ena_dev_uninit); 3697 } 3698 3699 static struct rte_pci_driver rte_ena_pmd = { 3700 .id_table = pci_id_ena_map, 3701 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | 3702 RTE_PCI_DRV_WC_ACTIVATE, 3703 .probe = eth_ena_pci_probe, 3704 .remove = eth_ena_pci_remove, 3705 }; 3706 3707 RTE_PMD_REGISTER_PCI(net_ena, rte_ena_pmd); 3708 RTE_PMD_REGISTER_PCI_TABLE(net_ena, pci_id_ena_map); 3709 RTE_PMD_REGISTER_KMOD_DEP(net_ena, "* igb_uio | uio_pci_generic | vfio-pci"); 3710 RTE_PMD_REGISTER_PARAM_STRING(net_ena, ENA_DEVARG_LARGE_LLQ_HDR "=<0|1>"); 3711 RTE_LOG_REGISTER_SUFFIX(ena_logtype_init, init, NOTICE); 3712 RTE_LOG_REGISTER_SUFFIX(ena_logtype_driver, driver, NOTICE); 3713 #ifdef RTE_ETHDEV_DEBUG_RX 3714 RTE_LOG_REGISTER_SUFFIX(ena_logtype_rx, rx, DEBUG); 3715 #endif 3716 #ifdef RTE_ETHDEV_DEBUG_TX 3717 RTE_LOG_REGISTER_SUFFIX(ena_logtype_tx, tx, DEBUG); 3718 #endif 3719 RTE_LOG_REGISTER_SUFFIX(ena_logtype_com, com, WARNING); 3720 3721 /****************************************************************************** 3722 ******************************** AENQ Handlers ******************************* 3723 *****************************************************************************/ 3724 static void ena_update_on_link_change(void *adapter_data, 3725 struct ena_admin_aenq_entry *aenq_e) 3726 { 3727 struct rte_eth_dev *eth_dev = adapter_data; 3728 struct ena_adapter *adapter = eth_dev->data->dev_private; 3729 struct ena_admin_aenq_link_change_desc *aenq_link_desc; 3730 uint32_t status; 3731 3732 aenq_link_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 3733 3734 status = get_ena_admin_aenq_link_change_desc_link_status(aenq_link_desc); 3735 adapter->link_status = status; 3736 3737 ena_link_update(eth_dev, 0); 3738 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 3739 } 3740 3741 static void ena_notification(void *adapter_data, 3742 struct ena_admin_aenq_entry *aenq_e) 3743 { 3744 struct rte_eth_dev *eth_dev = adapter_data; 3745 struct ena_adapter *adapter = eth_dev->data->dev_private; 3746 struct ena_admin_ena_hw_hints *hints; 3747 3748 if (aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION) 3749 PMD_DRV_LOG(WARNING, "Invalid AENQ group: %x. Expected: %x\n", 3750 aenq_e->aenq_common_desc.group, 3751 ENA_ADMIN_NOTIFICATION); 3752 3753 switch (aenq_e->aenq_common_desc.syndrome) { 3754 case ENA_ADMIN_UPDATE_HINTS: 3755 hints = (struct ena_admin_ena_hw_hints *) 3756 (&aenq_e->inline_data_w4); 3757 ena_update_hints(adapter, hints); 3758 break; 3759 default: 3760 PMD_DRV_LOG(ERR, "Invalid AENQ notification link state: %d\n", 3761 aenq_e->aenq_common_desc.syndrome); 3762 } 3763 } 3764 3765 static void ena_keep_alive(void *adapter_data, 3766 __rte_unused struct ena_admin_aenq_entry *aenq_e) 3767 { 3768 struct rte_eth_dev *eth_dev = adapter_data; 3769 struct ena_adapter *adapter = eth_dev->data->dev_private; 3770 struct ena_admin_aenq_keep_alive_desc *desc; 3771 uint64_t rx_drops; 3772 uint64_t tx_drops; 3773 3774 adapter->timestamp_wd = rte_get_timer_cycles(); 3775 3776 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 3777 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 3778 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 3779 3780 adapter->drv_stats->rx_drops = rx_drops; 3781 adapter->dev_stats.tx_drops = tx_drops; 3782 } 3783 3784 /** 3785 * This handler will called for unknown event group or unimplemented handlers 3786 **/ 3787 static void unimplemented_aenq_handler(__rte_unused void *data, 3788 __rte_unused struct ena_admin_aenq_entry *aenq_e) 3789 { 3790 PMD_DRV_LOG(ERR, 3791 "Unknown event was received or event with unimplemented handler\n"); 3792 } 3793 3794 static struct ena_aenq_handlers aenq_handlers = { 3795 .handlers = { 3796 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 3797 [ENA_ADMIN_NOTIFICATION] = ena_notification, 3798 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive 3799 }, 3800 .unimplemented_handler = unimplemented_aenq_handler 3801 }; 3802 3803 /********************************************************************* 3804 * Multi-Process communication request handling (in primary) 3805 *********************************************************************/ 3806 static int 3807 ena_mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer) 3808 { 3809 const struct ena_mp_body *req = 3810 (const struct ena_mp_body *)mp_msg->param; 3811 struct ena_adapter *adapter; 3812 struct ena_com_dev *ena_dev; 3813 struct ena_mp_body *rsp; 3814 struct rte_mp_msg mp_rsp; 3815 struct rte_eth_dev *dev; 3816 int res = 0; 3817 3818 rsp = (struct ena_mp_body *)&mp_rsp.param; 3819 mp_msg_init(&mp_rsp, req->type, req->port_id); 3820 3821 if (!rte_eth_dev_is_valid_port(req->port_id)) { 3822 rte_errno = ENODEV; 3823 res = -rte_errno; 3824 PMD_DRV_LOG(ERR, "Unknown port %d in request %d\n", 3825 req->port_id, req->type); 3826 goto end; 3827 } 3828 dev = &rte_eth_devices[req->port_id]; 3829 adapter = dev->data->dev_private; 3830 ena_dev = &adapter->ena_dev; 3831 3832 switch (req->type) { 3833 case ENA_MP_DEV_STATS_GET: 3834 res = ena_com_get_dev_basic_stats(ena_dev, 3835 &adapter->basic_stats); 3836 break; 3837 case ENA_MP_ENI_STATS_GET: 3838 res = ena_com_get_eni_stats(ena_dev, 3839 (struct ena_admin_eni_stats *)&adapter->eni_stats); 3840 break; 3841 case ENA_MP_MTU_SET: 3842 res = ena_com_set_dev_mtu(ena_dev, req->args.mtu); 3843 break; 3844 case ENA_MP_IND_TBL_GET: 3845 res = ena_com_indirect_table_get(ena_dev, 3846 adapter->indirect_table); 3847 break; 3848 case ENA_MP_IND_TBL_SET: 3849 res = ena_com_indirect_table_set(ena_dev); 3850 break; 3851 default: 3852 PMD_DRV_LOG(ERR, "Unknown request type %d\n", req->type); 3853 res = -EINVAL; 3854 break; 3855 } 3856 3857 end: 3858 /* Save processing result in the reply */ 3859 rsp->result = res; 3860 /* Return just IPC processing status */ 3861 return rte_mp_reply(&mp_rsp, peer); 3862 } 3863