1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 3 * All rights reserved. 4 */ 5 6 #include <rte_string_fns.h> 7 #include <rte_errno.h> 8 #include <rte_version.h> 9 #include <rte_net.h> 10 #include <rte_kvargs.h> 11 12 #include "ena_ethdev.h" 13 #include "ena_logs.h" 14 #include "ena_platform.h" 15 #include "ena_com.h" 16 #include "ena_eth_com.h" 17 18 #include <ena_common_defs.h> 19 #include <ena_regs_defs.h> 20 #include <ena_admin_defs.h> 21 #include <ena_eth_io_defs.h> 22 23 #define DRV_MODULE_VER_MAJOR 2 24 #define DRV_MODULE_VER_MINOR 5 25 #define DRV_MODULE_VER_SUBMINOR 0 26 27 #define __MERGE_64B_H_L(h, l) (((uint64_t)h << 32) | l) 28 29 #define GET_L4_HDR_LEN(mbuf) \ 30 ((rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, \ 31 mbuf->l3_len + mbuf->l2_len)->data_off) >> 4) 32 33 #define ETH_GSTRING_LEN 32 34 35 #define ARRAY_SIZE(x) RTE_DIM(x) 36 37 #define ENA_MIN_RING_DESC 128 38 39 #define ENA_PTYPE_HAS_HASH (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP) 40 41 enum ethtool_stringset { 42 ETH_SS_TEST = 0, 43 ETH_SS_STATS, 44 }; 45 46 struct ena_stats { 47 char name[ETH_GSTRING_LEN]; 48 int stat_offset; 49 }; 50 51 #define ENA_STAT_ENTRY(stat, stat_type) { \ 52 .name = #stat, \ 53 .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ 54 } 55 56 #define ENA_STAT_RX_ENTRY(stat) \ 57 ENA_STAT_ENTRY(stat, rx) 58 59 #define ENA_STAT_TX_ENTRY(stat) \ 60 ENA_STAT_ENTRY(stat, tx) 61 62 #define ENA_STAT_ENI_ENTRY(stat) \ 63 ENA_STAT_ENTRY(stat, eni) 64 65 #define ENA_STAT_GLOBAL_ENTRY(stat) \ 66 ENA_STAT_ENTRY(stat, dev) 67 68 /* Device arguments */ 69 #define ENA_DEVARG_LARGE_LLQ_HDR "large_llq_hdr" 70 71 /* 72 * Each rte_memzone should have unique name. 73 * To satisfy it, count number of allocation and add it to name. 74 */ 75 rte_atomic64_t ena_alloc_cnt; 76 77 static const struct ena_stats ena_stats_global_strings[] = { 78 ENA_STAT_GLOBAL_ENTRY(wd_expired), 79 ENA_STAT_GLOBAL_ENTRY(dev_start), 80 ENA_STAT_GLOBAL_ENTRY(dev_stop), 81 ENA_STAT_GLOBAL_ENTRY(tx_drops), 82 }; 83 84 static const struct ena_stats ena_stats_eni_strings[] = { 85 ENA_STAT_ENI_ENTRY(bw_in_allowance_exceeded), 86 ENA_STAT_ENI_ENTRY(bw_out_allowance_exceeded), 87 ENA_STAT_ENI_ENTRY(pps_allowance_exceeded), 88 ENA_STAT_ENI_ENTRY(conntrack_allowance_exceeded), 89 ENA_STAT_ENI_ENTRY(linklocal_allowance_exceeded), 90 }; 91 92 static const struct ena_stats ena_stats_tx_strings[] = { 93 ENA_STAT_TX_ENTRY(cnt), 94 ENA_STAT_TX_ENTRY(bytes), 95 ENA_STAT_TX_ENTRY(prepare_ctx_err), 96 ENA_STAT_TX_ENTRY(linearize), 97 ENA_STAT_TX_ENTRY(linearize_failed), 98 ENA_STAT_TX_ENTRY(tx_poll), 99 ENA_STAT_TX_ENTRY(doorbells), 100 ENA_STAT_TX_ENTRY(bad_req_id), 101 ENA_STAT_TX_ENTRY(available_desc), 102 ENA_STAT_TX_ENTRY(missed_tx), 103 }; 104 105 static const struct ena_stats ena_stats_rx_strings[] = { 106 ENA_STAT_RX_ENTRY(cnt), 107 ENA_STAT_RX_ENTRY(bytes), 108 ENA_STAT_RX_ENTRY(refill_partial), 109 ENA_STAT_RX_ENTRY(bad_csum), 110 ENA_STAT_RX_ENTRY(mbuf_alloc_fail), 111 ENA_STAT_RX_ENTRY(bad_desc_num), 112 ENA_STAT_RX_ENTRY(bad_req_id), 113 }; 114 115 #define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) 116 #define ENA_STATS_ARRAY_ENI ARRAY_SIZE(ena_stats_eni_strings) 117 #define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) 118 #define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) 119 120 #define QUEUE_OFFLOADS (RTE_ETH_TX_OFFLOAD_TCP_CKSUM |\ 121 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |\ 122 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |\ 123 RTE_ETH_TX_OFFLOAD_TCP_TSO) 124 #define MBUF_OFFLOADS (RTE_MBUF_F_TX_L4_MASK |\ 125 RTE_MBUF_F_TX_IP_CKSUM |\ 126 RTE_MBUF_F_TX_TCP_SEG) 127 128 /** Vendor ID used by Amazon devices */ 129 #define PCI_VENDOR_ID_AMAZON 0x1D0F 130 /** Amazon devices */ 131 #define PCI_DEVICE_ID_ENA_VF 0xEC20 132 #define PCI_DEVICE_ID_ENA_VF_RSERV0 0xEC21 133 134 #define ENA_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_L4_MASK | \ 135 RTE_MBUF_F_TX_IPV6 | \ 136 RTE_MBUF_F_TX_IPV4 | \ 137 RTE_MBUF_F_TX_IP_CKSUM | \ 138 RTE_MBUF_F_TX_TCP_SEG) 139 140 #define ENA_TX_OFFLOAD_NOTSUP_MASK \ 141 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) 142 143 /** HW specific offloads capabilities. */ 144 /* IPv4 checksum offload. */ 145 #define ENA_L3_IPV4_CSUM 0x0001 146 /* TCP/UDP checksum offload for IPv4 packets. */ 147 #define ENA_L4_IPV4_CSUM 0x0002 148 /* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */ 149 #define ENA_L4_IPV4_CSUM_PARTIAL 0x0004 150 /* TCP/UDP checksum offload for IPv6 packets. */ 151 #define ENA_L4_IPV6_CSUM 0x0008 152 /* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */ 153 #define ENA_L4_IPV6_CSUM_PARTIAL 0x0010 154 /* TSO support for IPv4 packets. */ 155 #define ENA_IPV4_TSO 0x0020 156 157 /* Device supports setting RSS hash. */ 158 #define ENA_RX_RSS_HASH 0x0040 159 160 static const struct rte_pci_id pci_id_ena_map[] = { 161 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, 162 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) }, 163 { .device_id = 0 }, 164 }; 165 166 static struct ena_aenq_handlers aenq_handlers; 167 168 static int ena_device_init(struct ena_com_dev *ena_dev, 169 struct rte_pci_device *pdev, 170 struct ena_com_dev_get_features_ctx *get_feat_ctx, 171 bool *wd_state); 172 static int ena_dev_configure(struct rte_eth_dev *dev); 173 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 174 struct ena_tx_buffer *tx_info, 175 struct rte_mbuf *mbuf, 176 void **push_header, 177 uint16_t *header_len); 178 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf); 179 static void ena_tx_cleanup(struct ena_ring *tx_ring); 180 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 181 uint16_t nb_pkts); 182 static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 183 uint16_t nb_pkts); 184 static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 185 uint16_t nb_desc, unsigned int socket_id, 186 const struct rte_eth_txconf *tx_conf); 187 static int ena_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 188 uint16_t nb_desc, unsigned int socket_id, 189 const struct rte_eth_rxconf *rx_conf, 190 struct rte_mempool *mp); 191 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len); 192 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 193 struct ena_com_rx_buf_info *ena_bufs, 194 uint32_t descs, 195 uint16_t *next_to_clean, 196 uint8_t offset); 197 static uint16_t eth_ena_recv_pkts(void *rx_queue, 198 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 199 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 200 struct rte_mbuf *mbuf, uint16_t id); 201 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count); 202 static void ena_init_rings(struct ena_adapter *adapter, 203 bool disable_meta_caching); 204 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); 205 static int ena_start(struct rte_eth_dev *dev); 206 static int ena_stop(struct rte_eth_dev *dev); 207 static int ena_close(struct rte_eth_dev *dev); 208 static int ena_dev_reset(struct rte_eth_dev *dev); 209 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); 210 static void ena_rx_queue_release_all(struct rte_eth_dev *dev); 211 static void ena_tx_queue_release_all(struct rte_eth_dev *dev); 212 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 213 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 214 static void ena_rx_queue_release_bufs(struct ena_ring *ring); 215 static void ena_tx_queue_release_bufs(struct ena_ring *ring); 216 static int ena_link_update(struct rte_eth_dev *dev, 217 int wait_to_complete); 218 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring); 219 static void ena_queue_stop(struct ena_ring *ring); 220 static void ena_queue_stop_all(struct rte_eth_dev *dev, 221 enum ena_ring_type ring_type); 222 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring); 223 static int ena_queue_start_all(struct rte_eth_dev *dev, 224 enum ena_ring_type ring_type); 225 static void ena_stats_restart(struct rte_eth_dev *dev); 226 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter); 227 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter); 228 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter); 229 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter); 230 static int ena_infos_get(struct rte_eth_dev *dev, 231 struct rte_eth_dev_info *dev_info); 232 static void ena_interrupt_handler_rte(void *cb_arg); 233 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg); 234 static void ena_destroy_device(struct rte_eth_dev *eth_dev); 235 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev); 236 static int ena_xstats_get_names(struct rte_eth_dev *dev, 237 struct rte_eth_xstat_name *xstats_names, 238 unsigned int n); 239 static int ena_xstats_get(struct rte_eth_dev *dev, 240 struct rte_eth_xstat *stats, 241 unsigned int n); 242 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 243 const uint64_t *ids, 244 uint64_t *values, 245 unsigned int n); 246 static int ena_process_bool_devarg(const char *key, 247 const char *value, 248 void *opaque); 249 static int ena_parse_devargs(struct ena_adapter *adapter, 250 struct rte_devargs *devargs); 251 static int ena_copy_eni_stats(struct ena_adapter *adapter); 252 static int ena_setup_rx_intr(struct rte_eth_dev *dev); 253 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 254 uint16_t queue_id); 255 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 256 uint16_t queue_id); 257 258 static const struct eth_dev_ops ena_dev_ops = { 259 .dev_configure = ena_dev_configure, 260 .dev_infos_get = ena_infos_get, 261 .rx_queue_setup = ena_rx_queue_setup, 262 .tx_queue_setup = ena_tx_queue_setup, 263 .dev_start = ena_start, 264 .dev_stop = ena_stop, 265 .link_update = ena_link_update, 266 .stats_get = ena_stats_get, 267 .xstats_get_names = ena_xstats_get_names, 268 .xstats_get = ena_xstats_get, 269 .xstats_get_by_id = ena_xstats_get_by_id, 270 .mtu_set = ena_mtu_set, 271 .rx_queue_release = ena_rx_queue_release, 272 .tx_queue_release = ena_tx_queue_release, 273 .dev_close = ena_close, 274 .dev_reset = ena_dev_reset, 275 .reta_update = ena_rss_reta_update, 276 .reta_query = ena_rss_reta_query, 277 .rx_queue_intr_enable = ena_rx_queue_intr_enable, 278 .rx_queue_intr_disable = ena_rx_queue_intr_disable, 279 .rss_hash_update = ena_rss_hash_update, 280 .rss_hash_conf_get = ena_rss_hash_conf_get, 281 }; 282 283 static inline void ena_rx_mbuf_prepare(struct rte_mbuf *mbuf, 284 struct ena_com_rx_ctx *ena_rx_ctx, 285 bool fill_hash) 286 { 287 uint64_t ol_flags = 0; 288 uint32_t packet_type = 0; 289 290 if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) 291 packet_type |= RTE_PTYPE_L4_TCP; 292 else if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP) 293 packet_type |= RTE_PTYPE_L4_UDP; 294 295 if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) { 296 packet_type |= RTE_PTYPE_L3_IPV4; 297 if (unlikely(ena_rx_ctx->l3_csum_err)) 298 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 299 else 300 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 301 } else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6) { 302 packet_type |= RTE_PTYPE_L3_IPV6; 303 } 304 305 if (!ena_rx_ctx->l4_csum_checked || ena_rx_ctx->frag) 306 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 307 else 308 if (unlikely(ena_rx_ctx->l4_csum_err)) 309 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 310 else 311 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 312 313 if (fill_hash && 314 likely((packet_type & ENA_PTYPE_HAS_HASH) && !ena_rx_ctx->frag)) { 315 ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 316 mbuf->hash.rss = ena_rx_ctx->hash; 317 } 318 319 mbuf->ol_flags = ol_flags; 320 mbuf->packet_type = packet_type; 321 } 322 323 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, 324 struct ena_com_tx_ctx *ena_tx_ctx, 325 uint64_t queue_offloads, 326 bool disable_meta_caching) 327 { 328 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; 329 330 if ((mbuf->ol_flags & MBUF_OFFLOADS) && 331 (queue_offloads & QUEUE_OFFLOADS)) { 332 /* check if TSO is required */ 333 if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) && 334 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)) { 335 ena_tx_ctx->tso_enable = true; 336 337 ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf); 338 } 339 340 /* check if L3 checksum is needed */ 341 if ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) && 342 (queue_offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) 343 ena_tx_ctx->l3_csum_enable = true; 344 345 if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV6) { 346 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 347 } else { 348 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 349 350 /* set don't fragment (DF) flag */ 351 if (mbuf->packet_type & 352 (RTE_PTYPE_L4_NONFRAG 353 | RTE_PTYPE_INNER_L4_NONFRAG)) 354 ena_tx_ctx->df = true; 355 } 356 357 /* check if L4 checksum is needed */ 358 if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) && 359 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) { 360 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 361 ena_tx_ctx->l4_csum_enable = true; 362 } else if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 363 RTE_MBUF_F_TX_UDP_CKSUM) && 364 (queue_offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) { 365 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 366 ena_tx_ctx->l4_csum_enable = true; 367 } else { 368 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 369 ena_tx_ctx->l4_csum_enable = false; 370 } 371 372 ena_meta->mss = mbuf->tso_segsz; 373 ena_meta->l3_hdr_len = mbuf->l3_len; 374 ena_meta->l3_hdr_offset = mbuf->l2_len; 375 376 ena_tx_ctx->meta_valid = true; 377 } else if (disable_meta_caching) { 378 memset(ena_meta, 0, sizeof(*ena_meta)); 379 ena_tx_ctx->meta_valid = true; 380 } else { 381 ena_tx_ctx->meta_valid = false; 382 } 383 } 384 385 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 386 { 387 struct ena_tx_buffer *tx_info = NULL; 388 389 if (likely(req_id < tx_ring->ring_size)) { 390 tx_info = &tx_ring->tx_buffer_info[req_id]; 391 if (likely(tx_info->mbuf)) 392 return 0; 393 } 394 395 if (tx_info) 396 PMD_TX_LOG(ERR, "tx_info doesn't have valid mbuf\n"); 397 else 398 PMD_TX_LOG(ERR, "Invalid req_id: %hu\n", req_id); 399 400 /* Trigger device reset */ 401 ++tx_ring->tx_stats.bad_req_id; 402 tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; 403 tx_ring->adapter->trigger_reset = true; 404 return -EFAULT; 405 } 406 407 static void ena_config_host_info(struct ena_com_dev *ena_dev) 408 { 409 struct ena_admin_host_info *host_info; 410 int rc; 411 412 /* Allocate only the host info */ 413 rc = ena_com_allocate_host_info(ena_dev); 414 if (rc) { 415 PMD_DRV_LOG(ERR, "Cannot allocate host info\n"); 416 return; 417 } 418 419 host_info = ena_dev->host_attr.host_info; 420 421 host_info->os_type = ENA_ADMIN_OS_DPDK; 422 host_info->kernel_ver = RTE_VERSION; 423 strlcpy((char *)host_info->kernel_ver_str, rte_version(), 424 sizeof(host_info->kernel_ver_str)); 425 host_info->os_dist = RTE_VERSION; 426 strlcpy((char *)host_info->os_dist_str, rte_version(), 427 sizeof(host_info->os_dist_str)); 428 host_info->driver_version = 429 (DRV_MODULE_VER_MAJOR) | 430 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 431 (DRV_MODULE_VER_SUBMINOR << 432 ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 433 host_info->num_cpus = rte_lcore_count(); 434 435 host_info->driver_supported_features = 436 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 437 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 438 439 rc = ena_com_set_host_attributes(ena_dev); 440 if (rc) { 441 if (rc == -ENA_COM_UNSUPPORTED) 442 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 443 else 444 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 445 446 goto err; 447 } 448 449 return; 450 451 err: 452 ena_com_delete_host_info(ena_dev); 453 } 454 455 /* This function calculates the number of xstats based on the current config */ 456 static unsigned int ena_xstats_calc_num(struct rte_eth_dev_data *data) 457 { 458 return ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENI + 459 (data->nb_tx_queues * ENA_STATS_ARRAY_TX) + 460 (data->nb_rx_queues * ENA_STATS_ARRAY_RX); 461 } 462 463 static void ena_config_debug_area(struct ena_adapter *adapter) 464 { 465 u32 debug_area_size; 466 int rc, ss_count; 467 468 ss_count = ena_xstats_calc_num(adapter->edev_data); 469 470 /* allocate 32 bytes for each string and 64bit for the value */ 471 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; 472 473 rc = ena_com_allocate_debug_area(&adapter->ena_dev, debug_area_size); 474 if (rc) { 475 PMD_DRV_LOG(ERR, "Cannot allocate debug area\n"); 476 return; 477 } 478 479 rc = ena_com_set_host_attributes(&adapter->ena_dev); 480 if (rc) { 481 if (rc == -ENA_COM_UNSUPPORTED) 482 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 483 else 484 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 485 486 goto err; 487 } 488 489 return; 490 err: 491 ena_com_delete_debug_area(&adapter->ena_dev); 492 } 493 494 static int ena_close(struct rte_eth_dev *dev) 495 { 496 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 497 struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; 498 struct ena_adapter *adapter = dev->data->dev_private; 499 int ret = 0; 500 501 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 502 return 0; 503 504 if (adapter->state == ENA_ADAPTER_STATE_RUNNING) 505 ret = ena_stop(dev); 506 adapter->state = ENA_ADAPTER_STATE_CLOSED; 507 508 ena_rx_queue_release_all(dev); 509 ena_tx_queue_release_all(dev); 510 511 rte_free(adapter->drv_stats); 512 adapter->drv_stats = NULL; 513 514 rte_intr_disable(intr_handle); 515 rte_intr_callback_unregister(intr_handle, 516 ena_interrupt_handler_rte, 517 dev); 518 519 /* 520 * MAC is not allocated dynamically. Setting NULL should prevent from 521 * release of the resource in the rte_eth_dev_release_port(). 522 */ 523 dev->data->mac_addrs = NULL; 524 525 return ret; 526 } 527 528 static int 529 ena_dev_reset(struct rte_eth_dev *dev) 530 { 531 int rc = 0; 532 533 /* Cannot release memory in secondary process */ 534 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 535 PMD_DRV_LOG(WARNING, "dev_reset not supported in secondary.\n"); 536 return -EPERM; 537 } 538 539 ena_destroy_device(dev); 540 rc = eth_ena_dev_init(dev); 541 if (rc) 542 PMD_INIT_LOG(CRIT, "Cannot initialize device\n"); 543 544 return rc; 545 } 546 547 static void ena_rx_queue_release_all(struct rte_eth_dev *dev) 548 { 549 int nb_queues = dev->data->nb_rx_queues; 550 int i; 551 552 for (i = 0; i < nb_queues; i++) 553 ena_rx_queue_release(dev, i); 554 } 555 556 static void ena_tx_queue_release_all(struct rte_eth_dev *dev) 557 { 558 int nb_queues = dev->data->nb_tx_queues; 559 int i; 560 561 for (i = 0; i < nb_queues; i++) 562 ena_tx_queue_release(dev, i); 563 } 564 565 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 566 { 567 struct ena_ring *ring = dev->data->rx_queues[qid]; 568 569 /* Free ring resources */ 570 if (ring->rx_buffer_info) 571 rte_free(ring->rx_buffer_info); 572 ring->rx_buffer_info = NULL; 573 574 if (ring->rx_refill_buffer) 575 rte_free(ring->rx_refill_buffer); 576 ring->rx_refill_buffer = NULL; 577 578 if (ring->empty_rx_reqs) 579 rte_free(ring->empty_rx_reqs); 580 ring->empty_rx_reqs = NULL; 581 582 ring->configured = 0; 583 584 PMD_DRV_LOG(NOTICE, "Rx queue %d:%d released\n", 585 ring->port_id, ring->id); 586 } 587 588 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 589 { 590 struct ena_ring *ring = dev->data->tx_queues[qid]; 591 592 /* Free ring resources */ 593 if (ring->push_buf_intermediate_buf) 594 rte_free(ring->push_buf_intermediate_buf); 595 596 if (ring->tx_buffer_info) 597 rte_free(ring->tx_buffer_info); 598 599 if (ring->empty_tx_reqs) 600 rte_free(ring->empty_tx_reqs); 601 602 ring->empty_tx_reqs = NULL; 603 ring->tx_buffer_info = NULL; 604 ring->push_buf_intermediate_buf = NULL; 605 606 ring->configured = 0; 607 608 PMD_DRV_LOG(NOTICE, "Tx queue %d:%d released\n", 609 ring->port_id, ring->id); 610 } 611 612 static void ena_rx_queue_release_bufs(struct ena_ring *ring) 613 { 614 unsigned int i; 615 616 for (i = 0; i < ring->ring_size; ++i) { 617 struct ena_rx_buffer *rx_info = &ring->rx_buffer_info[i]; 618 if (rx_info->mbuf) { 619 rte_mbuf_raw_free(rx_info->mbuf); 620 rx_info->mbuf = NULL; 621 } 622 } 623 } 624 625 static void ena_tx_queue_release_bufs(struct ena_ring *ring) 626 { 627 unsigned int i; 628 629 for (i = 0; i < ring->ring_size; ++i) { 630 struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i]; 631 632 if (tx_buf->mbuf) { 633 rte_pktmbuf_free(tx_buf->mbuf); 634 tx_buf->mbuf = NULL; 635 } 636 } 637 } 638 639 static int ena_link_update(struct rte_eth_dev *dev, 640 __rte_unused int wait_to_complete) 641 { 642 struct rte_eth_link *link = &dev->data->dev_link; 643 struct ena_adapter *adapter = dev->data->dev_private; 644 645 link->link_status = adapter->link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN; 646 link->link_speed = RTE_ETH_SPEED_NUM_NONE; 647 link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX; 648 649 return 0; 650 } 651 652 static int ena_queue_start_all(struct rte_eth_dev *dev, 653 enum ena_ring_type ring_type) 654 { 655 struct ena_adapter *adapter = dev->data->dev_private; 656 struct ena_ring *queues = NULL; 657 int nb_queues; 658 int i = 0; 659 int rc = 0; 660 661 if (ring_type == ENA_RING_TYPE_RX) { 662 queues = adapter->rx_ring; 663 nb_queues = dev->data->nb_rx_queues; 664 } else { 665 queues = adapter->tx_ring; 666 nb_queues = dev->data->nb_tx_queues; 667 } 668 for (i = 0; i < nb_queues; i++) { 669 if (queues[i].configured) { 670 if (ring_type == ENA_RING_TYPE_RX) { 671 ena_assert_msg( 672 dev->data->rx_queues[i] == &queues[i], 673 "Inconsistent state of Rx queues\n"); 674 } else { 675 ena_assert_msg( 676 dev->data->tx_queues[i] == &queues[i], 677 "Inconsistent state of Tx queues\n"); 678 } 679 680 rc = ena_queue_start(dev, &queues[i]); 681 682 if (rc) { 683 PMD_INIT_LOG(ERR, 684 "Failed to start queue[%d] of type(%d)\n", 685 i, ring_type); 686 goto err; 687 } 688 } 689 } 690 691 return 0; 692 693 err: 694 while (i--) 695 if (queues[i].configured) 696 ena_queue_stop(&queues[i]); 697 698 return rc; 699 } 700 701 static int ena_check_valid_conf(struct ena_adapter *adapter) 702 { 703 uint32_t mtu = adapter->edev_data->mtu; 704 705 if (mtu > adapter->max_mtu || mtu < ENA_MIN_MTU) { 706 PMD_INIT_LOG(ERR, 707 "Unsupported MTU of %d. Max MTU: %d, min MTU: %d\n", 708 mtu, adapter->max_mtu, ENA_MIN_MTU); 709 return ENA_COM_UNSUPPORTED; 710 } 711 712 return 0; 713 } 714 715 static int 716 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx, 717 bool use_large_llq_hdr) 718 { 719 struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; 720 struct ena_com_dev *ena_dev = ctx->ena_dev; 721 uint32_t max_tx_queue_size; 722 uint32_t max_rx_queue_size; 723 724 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 725 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 726 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 727 max_rx_queue_size = RTE_MIN(max_queue_ext->max_rx_cq_depth, 728 max_queue_ext->max_rx_sq_depth); 729 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 730 731 if (ena_dev->tx_mem_queue_type == 732 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 733 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 734 llq->max_llq_depth); 735 } else { 736 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 737 max_queue_ext->max_tx_sq_depth); 738 } 739 740 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 741 max_queue_ext->max_per_packet_rx_descs); 742 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 743 max_queue_ext->max_per_packet_tx_descs); 744 } else { 745 struct ena_admin_queue_feature_desc *max_queues = 746 &ctx->get_feat_ctx->max_queues; 747 max_rx_queue_size = RTE_MIN(max_queues->max_cq_depth, 748 max_queues->max_sq_depth); 749 max_tx_queue_size = max_queues->max_cq_depth; 750 751 if (ena_dev->tx_mem_queue_type == 752 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 753 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 754 llq->max_llq_depth); 755 } else { 756 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 757 max_queues->max_sq_depth); 758 } 759 760 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 761 max_queues->max_packet_rx_descs); 762 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 763 max_queues->max_packet_tx_descs); 764 } 765 766 /* Round down to the nearest power of 2 */ 767 max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size); 768 max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size); 769 770 if (use_large_llq_hdr) { 771 if ((llq->entry_size_ctrl_supported & 772 ENA_ADMIN_LIST_ENTRY_SIZE_256B) && 773 (ena_dev->tx_mem_queue_type == 774 ENA_ADMIN_PLACEMENT_POLICY_DEV)) { 775 max_tx_queue_size /= 2; 776 PMD_INIT_LOG(INFO, 777 "Forcing large headers and decreasing maximum Tx queue size to %d\n", 778 max_tx_queue_size); 779 } else { 780 PMD_INIT_LOG(ERR, 781 "Forcing large headers failed: LLQ is disabled or device does not support large headers\n"); 782 } 783 } 784 785 if (unlikely(max_rx_queue_size == 0 || max_tx_queue_size == 0)) { 786 PMD_INIT_LOG(ERR, "Invalid queue size\n"); 787 return -EFAULT; 788 } 789 790 ctx->max_tx_queue_size = max_tx_queue_size; 791 ctx->max_rx_queue_size = max_rx_queue_size; 792 793 return 0; 794 } 795 796 static void ena_stats_restart(struct rte_eth_dev *dev) 797 { 798 struct ena_adapter *adapter = dev->data->dev_private; 799 800 rte_atomic64_init(&adapter->drv_stats->ierrors); 801 rte_atomic64_init(&adapter->drv_stats->oerrors); 802 rte_atomic64_init(&adapter->drv_stats->rx_nombuf); 803 adapter->drv_stats->rx_drops = 0; 804 } 805 806 static int ena_stats_get(struct rte_eth_dev *dev, 807 struct rte_eth_stats *stats) 808 { 809 struct ena_admin_basic_stats ena_stats; 810 struct ena_adapter *adapter = dev->data->dev_private; 811 struct ena_com_dev *ena_dev = &adapter->ena_dev; 812 int rc; 813 int i; 814 int max_rings_stats; 815 816 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 817 return -ENOTSUP; 818 819 memset(&ena_stats, 0, sizeof(ena_stats)); 820 821 rte_spinlock_lock(&adapter->admin_lock); 822 rc = ena_com_get_dev_basic_stats(ena_dev, &ena_stats); 823 rte_spinlock_unlock(&adapter->admin_lock); 824 if (unlikely(rc)) { 825 PMD_DRV_LOG(ERR, "Could not retrieve statistics from ENA\n"); 826 return rc; 827 } 828 829 /* Set of basic statistics from ENA */ 830 stats->ipackets = __MERGE_64B_H_L(ena_stats.rx_pkts_high, 831 ena_stats.rx_pkts_low); 832 stats->opackets = __MERGE_64B_H_L(ena_stats.tx_pkts_high, 833 ena_stats.tx_pkts_low); 834 stats->ibytes = __MERGE_64B_H_L(ena_stats.rx_bytes_high, 835 ena_stats.rx_bytes_low); 836 stats->obytes = __MERGE_64B_H_L(ena_stats.tx_bytes_high, 837 ena_stats.tx_bytes_low); 838 839 /* Driver related stats */ 840 stats->imissed = adapter->drv_stats->rx_drops; 841 stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors); 842 stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors); 843 stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf); 844 845 max_rings_stats = RTE_MIN(dev->data->nb_rx_queues, 846 RTE_ETHDEV_QUEUE_STAT_CNTRS); 847 for (i = 0; i < max_rings_stats; ++i) { 848 struct ena_stats_rx *rx_stats = &adapter->rx_ring[i].rx_stats; 849 850 stats->q_ibytes[i] = rx_stats->bytes; 851 stats->q_ipackets[i] = rx_stats->cnt; 852 stats->q_errors[i] = rx_stats->bad_desc_num + 853 rx_stats->bad_req_id; 854 } 855 856 max_rings_stats = RTE_MIN(dev->data->nb_tx_queues, 857 RTE_ETHDEV_QUEUE_STAT_CNTRS); 858 for (i = 0; i < max_rings_stats; ++i) { 859 struct ena_stats_tx *tx_stats = &adapter->tx_ring[i].tx_stats; 860 861 stats->q_obytes[i] = tx_stats->bytes; 862 stats->q_opackets[i] = tx_stats->cnt; 863 } 864 865 return 0; 866 } 867 868 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 869 { 870 struct ena_adapter *adapter; 871 struct ena_com_dev *ena_dev; 872 int rc = 0; 873 874 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 875 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 876 adapter = dev->data->dev_private; 877 878 ena_dev = &adapter->ena_dev; 879 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 880 881 if (mtu > adapter->max_mtu || mtu < ENA_MIN_MTU) { 882 PMD_DRV_LOG(ERR, 883 "Invalid MTU setting. New MTU: %d, max MTU: %d, min MTU: %d\n", 884 mtu, adapter->max_mtu, ENA_MIN_MTU); 885 return -EINVAL; 886 } 887 888 rc = ena_com_set_dev_mtu(ena_dev, mtu); 889 if (rc) 890 PMD_DRV_LOG(ERR, "Could not set MTU: %d\n", mtu); 891 else 892 PMD_DRV_LOG(NOTICE, "MTU set to: %d\n", mtu); 893 894 return rc; 895 } 896 897 static int ena_start(struct rte_eth_dev *dev) 898 { 899 struct ena_adapter *adapter = dev->data->dev_private; 900 uint64_t ticks; 901 int rc = 0; 902 903 /* Cannot allocate memory in secondary process */ 904 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 905 PMD_DRV_LOG(WARNING, "dev_start not supported in secondary.\n"); 906 return -EPERM; 907 } 908 909 rc = ena_check_valid_conf(adapter); 910 if (rc) 911 return rc; 912 913 rc = ena_setup_rx_intr(dev); 914 if (rc) 915 return rc; 916 917 rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX); 918 if (rc) 919 return rc; 920 921 rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX); 922 if (rc) 923 goto err_start_tx; 924 925 if (adapter->edev_data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { 926 rc = ena_rss_configure(adapter); 927 if (rc) 928 goto err_rss_init; 929 } 930 931 ena_stats_restart(dev); 932 933 adapter->timestamp_wd = rte_get_timer_cycles(); 934 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT; 935 936 ticks = rte_get_timer_hz(); 937 rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(), 938 ena_timer_wd_callback, dev); 939 940 ++adapter->dev_stats.dev_start; 941 adapter->state = ENA_ADAPTER_STATE_RUNNING; 942 943 return 0; 944 945 err_rss_init: 946 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 947 err_start_tx: 948 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 949 return rc; 950 } 951 952 static int ena_stop(struct rte_eth_dev *dev) 953 { 954 struct ena_adapter *adapter = dev->data->dev_private; 955 struct ena_com_dev *ena_dev = &adapter->ena_dev; 956 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 957 struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; 958 int rc; 959 960 /* Cannot free memory in secondary process */ 961 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 962 PMD_DRV_LOG(WARNING, "dev_stop not supported in secondary.\n"); 963 return -EPERM; 964 } 965 966 rte_timer_stop_sync(&adapter->timer_wd); 967 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 968 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 969 970 if (adapter->trigger_reset) { 971 rc = ena_com_dev_reset(ena_dev, adapter->reset_reason); 972 if (rc) 973 PMD_DRV_LOG(ERR, "Device reset failed, rc: %d\n", rc); 974 } 975 976 rte_intr_disable(intr_handle); 977 978 rte_intr_efd_disable(intr_handle); 979 if (intr_handle->intr_vec != NULL) { 980 rte_free(intr_handle->intr_vec); 981 intr_handle->intr_vec = NULL; 982 } 983 984 rte_intr_enable(intr_handle); 985 986 ++adapter->dev_stats.dev_stop; 987 adapter->state = ENA_ADAPTER_STATE_STOPPED; 988 dev->data->dev_started = 0; 989 990 return 0; 991 } 992 993 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring) 994 { 995 struct ena_adapter *adapter = ring->adapter; 996 struct ena_com_dev *ena_dev = &adapter->ena_dev; 997 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 998 struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; 999 struct ena_com_create_io_ctx ctx = 1000 /* policy set to _HOST just to satisfy icc compiler */ 1001 { ENA_ADMIN_PLACEMENT_POLICY_HOST, 1002 0, 0, 0, 0, 0 }; 1003 uint16_t ena_qid; 1004 unsigned int i; 1005 int rc; 1006 1007 ctx.msix_vector = -1; 1008 if (ring->type == ENA_RING_TYPE_TX) { 1009 ena_qid = ENA_IO_TXQ_IDX(ring->id); 1010 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1011 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1012 for (i = 0; i < ring->ring_size; i++) 1013 ring->empty_tx_reqs[i] = i; 1014 } else { 1015 ena_qid = ENA_IO_RXQ_IDX(ring->id); 1016 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1017 if (rte_intr_dp_is_en(intr_handle)) 1018 ctx.msix_vector = intr_handle->intr_vec[ring->id]; 1019 for (i = 0; i < ring->ring_size; i++) 1020 ring->empty_rx_reqs[i] = i; 1021 } 1022 ctx.queue_size = ring->ring_size; 1023 ctx.qid = ena_qid; 1024 ctx.numa_node = ring->numa_socket_id; 1025 1026 rc = ena_com_create_io_queue(ena_dev, &ctx); 1027 if (rc) { 1028 PMD_DRV_LOG(ERR, 1029 "Failed to create IO queue[%d] (qid:%d), rc: %d\n", 1030 ring->id, ena_qid, rc); 1031 return rc; 1032 } 1033 1034 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1035 &ring->ena_com_io_sq, 1036 &ring->ena_com_io_cq); 1037 if (rc) { 1038 PMD_DRV_LOG(ERR, 1039 "Failed to get IO queue[%d] handlers, rc: %d\n", 1040 ring->id, rc); 1041 ena_com_destroy_io_queue(ena_dev, ena_qid); 1042 return rc; 1043 } 1044 1045 if (ring->type == ENA_RING_TYPE_TX) 1046 ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node); 1047 1048 /* Start with Rx interrupts being masked. */ 1049 if (ring->type == ENA_RING_TYPE_RX && rte_intr_dp_is_en(intr_handle)) 1050 ena_rx_queue_intr_disable(dev, ring->id); 1051 1052 return 0; 1053 } 1054 1055 static void ena_queue_stop(struct ena_ring *ring) 1056 { 1057 struct ena_com_dev *ena_dev = &ring->adapter->ena_dev; 1058 1059 if (ring->type == ENA_RING_TYPE_RX) { 1060 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id)); 1061 ena_rx_queue_release_bufs(ring); 1062 } else { 1063 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id)); 1064 ena_tx_queue_release_bufs(ring); 1065 } 1066 } 1067 1068 static void ena_queue_stop_all(struct rte_eth_dev *dev, 1069 enum ena_ring_type ring_type) 1070 { 1071 struct ena_adapter *adapter = dev->data->dev_private; 1072 struct ena_ring *queues = NULL; 1073 uint16_t nb_queues, i; 1074 1075 if (ring_type == ENA_RING_TYPE_RX) { 1076 queues = adapter->rx_ring; 1077 nb_queues = dev->data->nb_rx_queues; 1078 } else { 1079 queues = adapter->tx_ring; 1080 nb_queues = dev->data->nb_tx_queues; 1081 } 1082 1083 for (i = 0; i < nb_queues; ++i) 1084 if (queues[i].configured) 1085 ena_queue_stop(&queues[i]); 1086 } 1087 1088 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring) 1089 { 1090 int rc, bufs_num; 1091 1092 ena_assert_msg(ring->configured == 1, 1093 "Trying to start unconfigured queue\n"); 1094 1095 rc = ena_create_io_queue(dev, ring); 1096 if (rc) { 1097 PMD_INIT_LOG(ERR, "Failed to create IO queue\n"); 1098 return rc; 1099 } 1100 1101 ring->next_to_clean = 0; 1102 ring->next_to_use = 0; 1103 1104 if (ring->type == ENA_RING_TYPE_TX) { 1105 ring->tx_stats.available_desc = 1106 ena_com_free_q_entries(ring->ena_com_io_sq); 1107 return 0; 1108 } 1109 1110 bufs_num = ring->ring_size - 1; 1111 rc = ena_populate_rx_queue(ring, bufs_num); 1112 if (rc != bufs_num) { 1113 ena_com_destroy_io_queue(&ring->adapter->ena_dev, 1114 ENA_IO_RXQ_IDX(ring->id)); 1115 PMD_INIT_LOG(ERR, "Failed to populate Rx ring\n"); 1116 return ENA_COM_FAULT; 1117 } 1118 /* Flush per-core RX buffers pools cache as they can be used on other 1119 * cores as well. 1120 */ 1121 rte_mempool_cache_flush(NULL, ring->mb_pool); 1122 1123 return 0; 1124 } 1125 1126 static int ena_tx_queue_setup(struct rte_eth_dev *dev, 1127 uint16_t queue_idx, 1128 uint16_t nb_desc, 1129 unsigned int socket_id, 1130 const struct rte_eth_txconf *tx_conf) 1131 { 1132 struct ena_ring *txq = NULL; 1133 struct ena_adapter *adapter = dev->data->dev_private; 1134 unsigned int i; 1135 uint16_t dyn_thresh; 1136 1137 txq = &adapter->tx_ring[queue_idx]; 1138 1139 if (txq->configured) { 1140 PMD_DRV_LOG(CRIT, 1141 "API violation. Queue[%d] is already configured\n", 1142 queue_idx); 1143 return ENA_COM_FAULT; 1144 } 1145 1146 if (!rte_is_power_of_2(nb_desc)) { 1147 PMD_DRV_LOG(ERR, 1148 "Unsupported size of Tx queue: %d is not a power of 2.\n", 1149 nb_desc); 1150 return -EINVAL; 1151 } 1152 1153 if (nb_desc > adapter->max_tx_ring_size) { 1154 PMD_DRV_LOG(ERR, 1155 "Unsupported size of Tx queue (max size: %d)\n", 1156 adapter->max_tx_ring_size); 1157 return -EINVAL; 1158 } 1159 1160 txq->port_id = dev->data->port_id; 1161 txq->next_to_clean = 0; 1162 txq->next_to_use = 0; 1163 txq->ring_size = nb_desc; 1164 txq->size_mask = nb_desc - 1; 1165 txq->numa_socket_id = socket_id; 1166 txq->pkts_without_db = false; 1167 txq->last_cleanup_ticks = 0; 1168 1169 txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info", 1170 sizeof(struct ena_tx_buffer) * txq->ring_size, 1171 RTE_CACHE_LINE_SIZE, 1172 socket_id); 1173 if (!txq->tx_buffer_info) { 1174 PMD_DRV_LOG(ERR, 1175 "Failed to allocate memory for Tx buffer info\n"); 1176 return -ENOMEM; 1177 } 1178 1179 txq->empty_tx_reqs = rte_zmalloc_socket("txq->empty_tx_reqs", 1180 sizeof(uint16_t) * txq->ring_size, 1181 RTE_CACHE_LINE_SIZE, 1182 socket_id); 1183 if (!txq->empty_tx_reqs) { 1184 PMD_DRV_LOG(ERR, 1185 "Failed to allocate memory for empty Tx requests\n"); 1186 rte_free(txq->tx_buffer_info); 1187 return -ENOMEM; 1188 } 1189 1190 txq->push_buf_intermediate_buf = 1191 rte_zmalloc_socket("txq->push_buf_intermediate_buf", 1192 txq->tx_max_header_size, 1193 RTE_CACHE_LINE_SIZE, 1194 socket_id); 1195 if (!txq->push_buf_intermediate_buf) { 1196 PMD_DRV_LOG(ERR, "Failed to alloc push buffer for LLQ\n"); 1197 rte_free(txq->tx_buffer_info); 1198 rte_free(txq->empty_tx_reqs); 1199 return -ENOMEM; 1200 } 1201 1202 for (i = 0; i < txq->ring_size; i++) 1203 txq->empty_tx_reqs[i] = i; 1204 1205 txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads; 1206 1207 /* Check if caller provided the Tx cleanup threshold value. */ 1208 if (tx_conf->tx_free_thresh != 0) { 1209 txq->tx_free_thresh = tx_conf->tx_free_thresh; 1210 } else { 1211 dyn_thresh = txq->ring_size - 1212 txq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1213 txq->tx_free_thresh = RTE_MAX(dyn_thresh, 1214 txq->ring_size - ENA_REFILL_THRESH_PACKET); 1215 } 1216 1217 txq->missing_tx_completion_threshold = 1218 RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP); 1219 1220 /* Store pointer to this queue in upper layer */ 1221 txq->configured = 1; 1222 dev->data->tx_queues[queue_idx] = txq; 1223 1224 return 0; 1225 } 1226 1227 static int ena_rx_queue_setup(struct rte_eth_dev *dev, 1228 uint16_t queue_idx, 1229 uint16_t nb_desc, 1230 unsigned int socket_id, 1231 const struct rte_eth_rxconf *rx_conf, 1232 struct rte_mempool *mp) 1233 { 1234 struct ena_adapter *adapter = dev->data->dev_private; 1235 struct ena_ring *rxq = NULL; 1236 size_t buffer_size; 1237 int i; 1238 uint16_t dyn_thresh; 1239 1240 rxq = &adapter->rx_ring[queue_idx]; 1241 if (rxq->configured) { 1242 PMD_DRV_LOG(CRIT, 1243 "API violation. Queue[%d] is already configured\n", 1244 queue_idx); 1245 return ENA_COM_FAULT; 1246 } 1247 1248 if (!rte_is_power_of_2(nb_desc)) { 1249 PMD_DRV_LOG(ERR, 1250 "Unsupported size of Rx queue: %d is not a power of 2.\n", 1251 nb_desc); 1252 return -EINVAL; 1253 } 1254 1255 if (nb_desc > adapter->max_rx_ring_size) { 1256 PMD_DRV_LOG(ERR, 1257 "Unsupported size of Rx queue (max size: %d)\n", 1258 adapter->max_rx_ring_size); 1259 return -EINVAL; 1260 } 1261 1262 /* ENA isn't supporting buffers smaller than 1400 bytes */ 1263 buffer_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 1264 if (buffer_size < ENA_RX_BUF_MIN_SIZE) { 1265 PMD_DRV_LOG(ERR, 1266 "Unsupported size of Rx buffer: %zu (min size: %d)\n", 1267 buffer_size, ENA_RX_BUF_MIN_SIZE); 1268 return -EINVAL; 1269 } 1270 1271 rxq->port_id = dev->data->port_id; 1272 rxq->next_to_clean = 0; 1273 rxq->next_to_use = 0; 1274 rxq->ring_size = nb_desc; 1275 rxq->size_mask = nb_desc - 1; 1276 rxq->numa_socket_id = socket_id; 1277 rxq->mb_pool = mp; 1278 1279 rxq->rx_buffer_info = rte_zmalloc_socket("rxq->buffer_info", 1280 sizeof(struct ena_rx_buffer) * nb_desc, 1281 RTE_CACHE_LINE_SIZE, 1282 socket_id); 1283 if (!rxq->rx_buffer_info) { 1284 PMD_DRV_LOG(ERR, 1285 "Failed to allocate memory for Rx buffer info\n"); 1286 return -ENOMEM; 1287 } 1288 1289 rxq->rx_refill_buffer = rte_zmalloc_socket("rxq->rx_refill_buffer", 1290 sizeof(struct rte_mbuf *) * nb_desc, 1291 RTE_CACHE_LINE_SIZE, 1292 socket_id); 1293 if (!rxq->rx_refill_buffer) { 1294 PMD_DRV_LOG(ERR, 1295 "Failed to allocate memory for Rx refill buffer\n"); 1296 rte_free(rxq->rx_buffer_info); 1297 rxq->rx_buffer_info = NULL; 1298 return -ENOMEM; 1299 } 1300 1301 rxq->empty_rx_reqs = rte_zmalloc_socket("rxq->empty_rx_reqs", 1302 sizeof(uint16_t) * nb_desc, 1303 RTE_CACHE_LINE_SIZE, 1304 socket_id); 1305 if (!rxq->empty_rx_reqs) { 1306 PMD_DRV_LOG(ERR, 1307 "Failed to allocate memory for empty Rx requests\n"); 1308 rte_free(rxq->rx_buffer_info); 1309 rxq->rx_buffer_info = NULL; 1310 rte_free(rxq->rx_refill_buffer); 1311 rxq->rx_refill_buffer = NULL; 1312 return -ENOMEM; 1313 } 1314 1315 for (i = 0; i < nb_desc; i++) 1316 rxq->empty_rx_reqs[i] = i; 1317 1318 rxq->offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; 1319 1320 if (rx_conf->rx_free_thresh != 0) { 1321 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 1322 } else { 1323 dyn_thresh = rxq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1324 rxq->rx_free_thresh = RTE_MIN(dyn_thresh, 1325 (uint16_t)(ENA_REFILL_THRESH_PACKET)); 1326 } 1327 1328 /* Store pointer to this queue in upper layer */ 1329 rxq->configured = 1; 1330 dev->data->rx_queues[queue_idx] = rxq; 1331 1332 return 0; 1333 } 1334 1335 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 1336 struct rte_mbuf *mbuf, uint16_t id) 1337 { 1338 struct ena_com_buf ebuf; 1339 int rc; 1340 1341 /* prepare physical address for DMA transaction */ 1342 ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM; 1343 ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM; 1344 1345 /* pass resource to device */ 1346 rc = ena_com_add_single_rx_desc(io_sq, &ebuf, id); 1347 if (unlikely(rc != 0)) 1348 PMD_RX_LOG(WARNING, "Failed adding Rx desc\n"); 1349 1350 return rc; 1351 } 1352 1353 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) 1354 { 1355 unsigned int i; 1356 int rc; 1357 uint16_t next_to_use = rxq->next_to_use; 1358 uint16_t req_id; 1359 #ifdef RTE_ETHDEV_DEBUG_RX 1360 uint16_t in_use; 1361 #endif 1362 struct rte_mbuf **mbufs = rxq->rx_refill_buffer; 1363 1364 if (unlikely(!count)) 1365 return 0; 1366 1367 #ifdef RTE_ETHDEV_DEBUG_RX 1368 in_use = rxq->ring_size - 1 - 1369 ena_com_free_q_entries(rxq->ena_com_io_sq); 1370 if (unlikely((in_use + count) >= rxq->ring_size)) 1371 PMD_RX_LOG(ERR, "Bad Rx ring state\n"); 1372 #endif 1373 1374 /* get resources for incoming packets */ 1375 rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count); 1376 if (unlikely(rc < 0)) { 1377 rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf); 1378 ++rxq->rx_stats.mbuf_alloc_fail; 1379 PMD_RX_LOG(DEBUG, "There are not enough free buffers\n"); 1380 return 0; 1381 } 1382 1383 for (i = 0; i < count; i++) { 1384 struct rte_mbuf *mbuf = mbufs[i]; 1385 struct ena_rx_buffer *rx_info; 1386 1387 if (likely((i + 4) < count)) 1388 rte_prefetch0(mbufs[i + 4]); 1389 1390 req_id = rxq->empty_rx_reqs[next_to_use]; 1391 rx_info = &rxq->rx_buffer_info[req_id]; 1392 1393 rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id); 1394 if (unlikely(rc != 0)) 1395 break; 1396 1397 rx_info->mbuf = mbuf; 1398 next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, rxq->size_mask); 1399 } 1400 1401 if (unlikely(i < count)) { 1402 PMD_RX_LOG(WARNING, 1403 "Refilled Rx queue[%d] with only %d/%d buffers\n", 1404 rxq->id, i, count); 1405 rte_pktmbuf_free_bulk(&mbufs[i], count - i); 1406 ++rxq->rx_stats.refill_partial; 1407 } 1408 1409 /* When we submitted free recources to device... */ 1410 if (likely(i > 0)) { 1411 /* ...let HW know that it can fill buffers with data. */ 1412 ena_com_write_sq_doorbell(rxq->ena_com_io_sq); 1413 1414 rxq->next_to_use = next_to_use; 1415 } 1416 1417 return i; 1418 } 1419 1420 static int ena_device_init(struct ena_com_dev *ena_dev, 1421 struct rte_pci_device *pdev, 1422 struct ena_com_dev_get_features_ctx *get_feat_ctx, 1423 bool *wd_state) 1424 { 1425 uint32_t aenq_groups; 1426 int rc; 1427 bool readless_supported; 1428 1429 /* Initialize mmio registers */ 1430 rc = ena_com_mmio_reg_read_request_init(ena_dev); 1431 if (rc) { 1432 PMD_DRV_LOG(ERR, "Failed to init MMIO read less\n"); 1433 return rc; 1434 } 1435 1436 /* The PCIe configuration space revision id indicate if mmio reg 1437 * read is disabled. 1438 */ 1439 readless_supported = !(pdev->id.class_id & ENA_MMIO_DISABLE_REG_READ); 1440 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 1441 1442 /* reset device */ 1443 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 1444 if (rc) { 1445 PMD_DRV_LOG(ERR, "Cannot reset device\n"); 1446 goto err_mmio_read_less; 1447 } 1448 1449 /* check FW version */ 1450 rc = ena_com_validate_version(ena_dev); 1451 if (rc) { 1452 PMD_DRV_LOG(ERR, "Device version is too low\n"); 1453 goto err_mmio_read_less; 1454 } 1455 1456 ena_dev->dma_addr_bits = ena_com_get_dma_width(ena_dev); 1457 1458 /* ENA device administration layer init */ 1459 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 1460 if (rc) { 1461 PMD_DRV_LOG(ERR, 1462 "Cannot initialize ENA admin queue\n"); 1463 goto err_mmio_read_less; 1464 } 1465 1466 /* To enable the msix interrupts the driver needs to know the number 1467 * of queues. So the driver uses polling mode to retrieve this 1468 * information. 1469 */ 1470 ena_com_set_admin_polling_mode(ena_dev, true); 1471 1472 ena_config_host_info(ena_dev); 1473 1474 /* Get Device Attributes and features */ 1475 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 1476 if (rc) { 1477 PMD_DRV_LOG(ERR, 1478 "Cannot get attribute for ENA device, rc: %d\n", rc); 1479 goto err_admin_init; 1480 } 1481 1482 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 1483 BIT(ENA_ADMIN_NOTIFICATION) | 1484 BIT(ENA_ADMIN_KEEP_ALIVE) | 1485 BIT(ENA_ADMIN_FATAL_ERROR) | 1486 BIT(ENA_ADMIN_WARNING); 1487 1488 aenq_groups &= get_feat_ctx->aenq.supported_groups; 1489 rc = ena_com_set_aenq_config(ena_dev, aenq_groups); 1490 if (rc) { 1491 PMD_DRV_LOG(ERR, "Cannot configure AENQ groups, rc: %d\n", rc); 1492 goto err_admin_init; 1493 } 1494 1495 *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)); 1496 1497 return 0; 1498 1499 err_admin_init: 1500 ena_com_admin_destroy(ena_dev); 1501 1502 err_mmio_read_less: 1503 ena_com_mmio_reg_read_request_destroy(ena_dev); 1504 1505 return rc; 1506 } 1507 1508 static void ena_interrupt_handler_rte(void *cb_arg) 1509 { 1510 struct rte_eth_dev *dev = cb_arg; 1511 struct ena_adapter *adapter = dev->data->dev_private; 1512 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1513 1514 ena_com_admin_q_comp_intr_handler(ena_dev); 1515 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) 1516 ena_com_aenq_intr_handler(ena_dev, dev); 1517 } 1518 1519 static void check_for_missing_keep_alive(struct ena_adapter *adapter) 1520 { 1521 if (!adapter->wd_state) 1522 return; 1523 1524 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 1525 return; 1526 1527 if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >= 1528 adapter->keep_alive_timeout)) { 1529 PMD_DRV_LOG(ERR, "Keep alive timeout\n"); 1530 adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO; 1531 adapter->trigger_reset = true; 1532 ++adapter->dev_stats.wd_expired; 1533 } 1534 } 1535 1536 /* Check if admin queue is enabled */ 1537 static void check_for_admin_com_state(struct ena_adapter *adapter) 1538 { 1539 if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) { 1540 PMD_DRV_LOG(ERR, "ENA admin queue is not in running state\n"); 1541 adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO; 1542 adapter->trigger_reset = true; 1543 } 1544 } 1545 1546 static int check_for_tx_completion_in_queue(struct ena_adapter *adapter, 1547 struct ena_ring *tx_ring) 1548 { 1549 struct ena_tx_buffer *tx_buf; 1550 uint64_t timestamp; 1551 uint64_t completion_delay; 1552 uint32_t missed_tx = 0; 1553 unsigned int i; 1554 int rc = 0; 1555 1556 for (i = 0; i < tx_ring->ring_size; ++i) { 1557 tx_buf = &tx_ring->tx_buffer_info[i]; 1558 timestamp = tx_buf->timestamp; 1559 1560 if (timestamp == 0) 1561 continue; 1562 1563 completion_delay = rte_get_timer_cycles() - timestamp; 1564 if (completion_delay > adapter->missing_tx_completion_to) { 1565 if (unlikely(!tx_buf->print_once)) { 1566 PMD_TX_LOG(WARNING, 1567 "Found a Tx that wasn't completed on time, qid %d, index %d. " 1568 "Missing Tx outstanding for %" PRIu64 " msecs.\n", 1569 tx_ring->id, i, completion_delay / 1570 rte_get_timer_hz() * 1000); 1571 tx_buf->print_once = true; 1572 } 1573 ++missed_tx; 1574 } 1575 } 1576 1577 if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) { 1578 PMD_DRV_LOG(ERR, 1579 "The number of lost Tx completions is above the threshold (%d > %d). " 1580 "Trigger the device reset.\n", 1581 missed_tx, 1582 tx_ring->missing_tx_completion_threshold); 1583 adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL; 1584 adapter->trigger_reset = true; 1585 rc = -EIO; 1586 } 1587 1588 tx_ring->tx_stats.missed_tx += missed_tx; 1589 1590 return rc; 1591 } 1592 1593 static void check_for_tx_completions(struct ena_adapter *adapter) 1594 { 1595 struct ena_ring *tx_ring; 1596 uint64_t tx_cleanup_delay; 1597 size_t qid; 1598 int budget; 1599 uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues; 1600 1601 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) 1602 return; 1603 1604 nb_tx_queues = adapter->edev_data->nb_tx_queues; 1605 budget = adapter->missing_tx_completion_budget; 1606 1607 qid = adapter->last_tx_comp_qid; 1608 while (budget-- > 0) { 1609 tx_ring = &adapter->tx_ring[qid]; 1610 1611 /* Tx cleanup is called only by the burst function and can be 1612 * called dynamically by the application. Also cleanup is 1613 * limited by the threshold. To avoid false detection of the 1614 * missing HW Tx completion, get the delay since last cleanup 1615 * function was called. 1616 */ 1617 tx_cleanup_delay = rte_get_timer_cycles() - 1618 tx_ring->last_cleanup_ticks; 1619 if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay) 1620 check_for_tx_completion_in_queue(adapter, tx_ring); 1621 qid = (qid + 1) % nb_tx_queues; 1622 } 1623 1624 adapter->last_tx_comp_qid = qid; 1625 } 1626 1627 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer, 1628 void *arg) 1629 { 1630 struct rte_eth_dev *dev = arg; 1631 struct ena_adapter *adapter = dev->data->dev_private; 1632 1633 check_for_missing_keep_alive(adapter); 1634 check_for_admin_com_state(adapter); 1635 check_for_tx_completions(adapter); 1636 1637 if (unlikely(adapter->trigger_reset)) { 1638 PMD_DRV_LOG(ERR, "Trigger reset is on\n"); 1639 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, 1640 NULL); 1641 } 1642 } 1643 1644 static inline void 1645 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 1646 struct ena_admin_feature_llq_desc *llq, 1647 bool use_large_llq_hdr) 1648 { 1649 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 1650 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 1651 llq_config->llq_num_decs_before_header = 1652 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 1653 1654 if (use_large_llq_hdr && 1655 (llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B)) { 1656 llq_config->llq_ring_entry_size = 1657 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 1658 llq_config->llq_ring_entry_size_value = 256; 1659 } else { 1660 llq_config->llq_ring_entry_size = 1661 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 1662 llq_config->llq_ring_entry_size_value = 128; 1663 } 1664 } 1665 1666 static int 1667 ena_set_queues_placement_policy(struct ena_adapter *adapter, 1668 struct ena_com_dev *ena_dev, 1669 struct ena_admin_feature_llq_desc *llq, 1670 struct ena_llq_configurations *llq_default_configurations) 1671 { 1672 int rc; 1673 u32 llq_feature_mask; 1674 1675 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 1676 if (!(ena_dev->supported_features & llq_feature_mask)) { 1677 PMD_DRV_LOG(INFO, 1678 "LLQ is not supported. Fallback to host mode policy.\n"); 1679 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1680 return 0; 1681 } 1682 1683 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 1684 if (unlikely(rc)) { 1685 PMD_INIT_LOG(WARNING, 1686 "Failed to config dev mode. Fallback to host mode policy.\n"); 1687 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1688 return 0; 1689 } 1690 1691 /* Nothing to config, exit */ 1692 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 1693 return 0; 1694 1695 if (!adapter->dev_mem_base) { 1696 PMD_DRV_LOG(ERR, 1697 "Unable to access LLQ BAR resource. Fallback to host mode policy.\n"); 1698 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1699 return 0; 1700 } 1701 1702 ena_dev->mem_bar = adapter->dev_mem_base; 1703 1704 return 0; 1705 } 1706 1707 static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev, 1708 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1709 { 1710 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 1711 1712 /* Regular queues capabilities */ 1713 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1714 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1715 &get_feat_ctx->max_queue_ext.max_queue_ext; 1716 io_rx_num = RTE_MIN(max_queue_ext->max_rx_sq_num, 1717 max_queue_ext->max_rx_cq_num); 1718 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 1719 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 1720 } else { 1721 struct ena_admin_queue_feature_desc *max_queues = 1722 &get_feat_ctx->max_queues; 1723 io_tx_sq_num = max_queues->max_sq_num; 1724 io_tx_cq_num = max_queues->max_cq_num; 1725 io_rx_num = RTE_MIN(io_tx_sq_num, io_tx_cq_num); 1726 } 1727 1728 /* In case of LLQ use the llq number in the get feature cmd */ 1729 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 1730 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 1731 1732 max_num_io_queues = RTE_MIN(ENA_MAX_NUM_IO_QUEUES, io_rx_num); 1733 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_sq_num); 1734 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_cq_num); 1735 1736 if (unlikely(max_num_io_queues == 0)) { 1737 PMD_DRV_LOG(ERR, "Number of IO queues cannot not be 0\n"); 1738 return -EFAULT; 1739 } 1740 1741 return max_num_io_queues; 1742 } 1743 1744 static void 1745 ena_set_offloads(struct ena_offloads *offloads, 1746 struct ena_admin_feature_offload_desc *offload_desc) 1747 { 1748 if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) 1749 offloads->tx_offloads |= ENA_IPV4_TSO; 1750 1751 /* Tx IPv4 checksum offloads */ 1752 if (offload_desc->tx & 1753 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) 1754 offloads->tx_offloads |= ENA_L3_IPV4_CSUM; 1755 if (offload_desc->tx & 1756 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK) 1757 offloads->tx_offloads |= ENA_L4_IPV4_CSUM; 1758 if (offload_desc->tx & 1759 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) 1760 offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL; 1761 1762 /* Tx IPv6 checksum offloads */ 1763 if (offload_desc->tx & 1764 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK) 1765 offloads->tx_offloads |= ENA_L4_IPV6_CSUM; 1766 if (offload_desc->tx & 1767 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) 1768 offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL; 1769 1770 /* Rx IPv4 checksum offloads */ 1771 if (offload_desc->rx_supported & 1772 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK) 1773 offloads->rx_offloads |= ENA_L3_IPV4_CSUM; 1774 if (offload_desc->rx_supported & 1775 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) 1776 offloads->rx_offloads |= ENA_L4_IPV4_CSUM; 1777 1778 /* Rx IPv6 checksum offloads */ 1779 if (offload_desc->rx_supported & 1780 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) 1781 offloads->rx_offloads |= ENA_L4_IPV6_CSUM; 1782 1783 if (offload_desc->rx_supported & 1784 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK) 1785 offloads->rx_offloads |= ENA_RX_RSS_HASH; 1786 } 1787 1788 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) 1789 { 1790 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 1791 struct rte_pci_device *pci_dev; 1792 struct rte_intr_handle *intr_handle; 1793 struct ena_adapter *adapter = eth_dev->data->dev_private; 1794 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1795 struct ena_com_dev_get_features_ctx get_feat_ctx; 1796 struct ena_llq_configurations llq_config; 1797 const char *queue_type_str; 1798 uint32_t max_num_io_queues; 1799 int rc; 1800 static int adapters_found; 1801 bool disable_meta_caching; 1802 bool wd_state = false; 1803 1804 eth_dev->dev_ops = &ena_dev_ops; 1805 eth_dev->rx_pkt_burst = ð_ena_recv_pkts; 1806 eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; 1807 eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; 1808 1809 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1810 return 0; 1811 1812 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 1813 1814 memset(adapter, 0, sizeof(struct ena_adapter)); 1815 ena_dev = &adapter->ena_dev; 1816 1817 adapter->edev_data = eth_dev->data; 1818 1819 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 1820 1821 PMD_INIT_LOG(INFO, "Initializing %x:%x:%x.%d\n", 1822 pci_dev->addr.domain, 1823 pci_dev->addr.bus, 1824 pci_dev->addr.devid, 1825 pci_dev->addr.function); 1826 1827 intr_handle = &pci_dev->intr_handle; 1828 1829 adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr; 1830 adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr; 1831 1832 if (!adapter->regs) { 1833 PMD_INIT_LOG(CRIT, "Failed to access registers BAR(%d)\n", 1834 ENA_REGS_BAR); 1835 return -ENXIO; 1836 } 1837 1838 ena_dev->reg_bar = adapter->regs; 1839 /* This is a dummy pointer for ena_com functions. */ 1840 ena_dev->dmadev = adapter; 1841 1842 adapter->id_number = adapters_found; 1843 1844 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", 1845 adapter->id_number); 1846 1847 rc = ena_parse_devargs(adapter, pci_dev->device.devargs); 1848 if (rc != 0) { 1849 PMD_INIT_LOG(CRIT, "Failed to parse devargs\n"); 1850 goto err; 1851 } 1852 1853 /* device specific initialization routine */ 1854 rc = ena_device_init(ena_dev, pci_dev, &get_feat_ctx, &wd_state); 1855 if (rc) { 1856 PMD_INIT_LOG(CRIT, "Failed to init ENA device\n"); 1857 goto err; 1858 } 1859 adapter->wd_state = wd_state; 1860 1861 set_default_llq_configurations(&llq_config, &get_feat_ctx.llq, 1862 adapter->use_large_llq_hdr); 1863 rc = ena_set_queues_placement_policy(adapter, ena_dev, 1864 &get_feat_ctx.llq, &llq_config); 1865 if (unlikely(rc)) { 1866 PMD_INIT_LOG(CRIT, "Failed to set placement policy\n"); 1867 return rc; 1868 } 1869 1870 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 1871 queue_type_str = "Regular"; 1872 else 1873 queue_type_str = "Low latency"; 1874 PMD_DRV_LOG(INFO, "Placement policy: %s\n", queue_type_str); 1875 1876 calc_queue_ctx.ena_dev = ena_dev; 1877 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 1878 1879 max_num_io_queues = ena_calc_max_io_queue_num(ena_dev, &get_feat_ctx); 1880 rc = ena_calc_io_queue_size(&calc_queue_ctx, 1881 adapter->use_large_llq_hdr); 1882 if (unlikely((rc != 0) || (max_num_io_queues == 0))) { 1883 rc = -EFAULT; 1884 goto err_device_destroy; 1885 } 1886 1887 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 1888 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 1889 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 1890 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 1891 adapter->max_num_io_queues = max_num_io_queues; 1892 1893 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1894 disable_meta_caching = 1895 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags & 1896 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 1897 } else { 1898 disable_meta_caching = false; 1899 } 1900 1901 /* prepare ring structures */ 1902 ena_init_rings(adapter, disable_meta_caching); 1903 1904 ena_config_debug_area(adapter); 1905 1906 /* Set max MTU for this device */ 1907 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 1908 1909 ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload); 1910 1911 /* Copy MAC address and point DPDK to it */ 1912 eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr; 1913 rte_ether_addr_copy((struct rte_ether_addr *) 1914 get_feat_ctx.dev_attr.mac_addr, 1915 (struct rte_ether_addr *)adapter->mac_addr); 1916 1917 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); 1918 if (unlikely(rc != 0)) { 1919 PMD_DRV_LOG(ERR, "Failed to initialize RSS in ENA device\n"); 1920 goto err_delete_debug_area; 1921 } 1922 1923 adapter->drv_stats = rte_zmalloc("adapter stats", 1924 sizeof(*adapter->drv_stats), 1925 RTE_CACHE_LINE_SIZE); 1926 if (!adapter->drv_stats) { 1927 PMD_DRV_LOG(ERR, 1928 "Failed to allocate memory for adapter statistics\n"); 1929 rc = -ENOMEM; 1930 goto err_rss_destroy; 1931 } 1932 1933 rte_spinlock_init(&adapter->admin_lock); 1934 1935 rte_intr_callback_register(intr_handle, 1936 ena_interrupt_handler_rte, 1937 eth_dev); 1938 rte_intr_enable(intr_handle); 1939 ena_com_set_admin_polling_mode(ena_dev, false); 1940 ena_com_admin_aenq_enable(ena_dev); 1941 1942 if (adapters_found == 0) 1943 rte_timer_subsystem_init(); 1944 rte_timer_init(&adapter->timer_wd); 1945 1946 adapters_found++; 1947 adapter->state = ENA_ADAPTER_STATE_INIT; 1948 1949 return 0; 1950 1951 err_rss_destroy: 1952 ena_com_rss_destroy(ena_dev); 1953 err_delete_debug_area: 1954 ena_com_delete_debug_area(ena_dev); 1955 1956 err_device_destroy: 1957 ena_com_delete_host_info(ena_dev); 1958 ena_com_admin_destroy(ena_dev); 1959 1960 err: 1961 return rc; 1962 } 1963 1964 static void ena_destroy_device(struct rte_eth_dev *eth_dev) 1965 { 1966 struct ena_adapter *adapter = eth_dev->data->dev_private; 1967 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1968 1969 if (adapter->state == ENA_ADAPTER_STATE_FREE) 1970 return; 1971 1972 ena_com_set_admin_running_state(ena_dev, false); 1973 1974 if (adapter->state != ENA_ADAPTER_STATE_CLOSED) 1975 ena_close(eth_dev); 1976 1977 ena_com_rss_destroy(ena_dev); 1978 1979 ena_com_delete_debug_area(ena_dev); 1980 ena_com_delete_host_info(ena_dev); 1981 1982 ena_com_abort_admin_commands(ena_dev); 1983 ena_com_wait_for_abort_completion(ena_dev); 1984 ena_com_admin_destroy(ena_dev); 1985 ena_com_mmio_reg_read_request_destroy(ena_dev); 1986 1987 adapter->state = ENA_ADAPTER_STATE_FREE; 1988 } 1989 1990 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) 1991 { 1992 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1993 return 0; 1994 1995 ena_destroy_device(eth_dev); 1996 1997 return 0; 1998 } 1999 2000 static int ena_dev_configure(struct rte_eth_dev *dev) 2001 { 2002 struct ena_adapter *adapter = dev->data->dev_private; 2003 2004 adapter->state = ENA_ADAPTER_STATE_CONFIG; 2005 2006 if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 2007 dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2008 dev->data->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2009 2010 /* Scattered Rx cannot be turned off in the HW, so this capability must 2011 * be forced. 2012 */ 2013 dev->data->scattered_rx = 1; 2014 2015 adapter->last_tx_comp_qid = 0; 2016 2017 adapter->missing_tx_completion_budget = 2018 RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues); 2019 2020 adapter->missing_tx_completion_to = ENA_TX_TIMEOUT; 2021 /* To avoid detection of the spurious Tx completion timeout due to 2022 * application not calling the Tx cleanup function, set timeout for the 2023 * Tx queue which should be half of the missing completion timeout for a 2024 * safety. If there will be a lot of missing Tx completions in the 2025 * queue, they will be detected sooner or later. 2026 */ 2027 adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2; 2028 2029 adapter->tx_selected_offloads = dev->data->dev_conf.txmode.offloads; 2030 adapter->rx_selected_offloads = dev->data->dev_conf.rxmode.offloads; 2031 2032 return 0; 2033 } 2034 2035 static void ena_init_rings(struct ena_adapter *adapter, 2036 bool disable_meta_caching) 2037 { 2038 size_t i; 2039 2040 for (i = 0; i < adapter->max_num_io_queues; i++) { 2041 struct ena_ring *ring = &adapter->tx_ring[i]; 2042 2043 ring->configured = 0; 2044 ring->type = ENA_RING_TYPE_TX; 2045 ring->adapter = adapter; 2046 ring->id = i; 2047 ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type; 2048 ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size; 2049 ring->sgl_size = adapter->max_tx_sgl_size; 2050 ring->disable_meta_caching = disable_meta_caching; 2051 } 2052 2053 for (i = 0; i < adapter->max_num_io_queues; i++) { 2054 struct ena_ring *ring = &adapter->rx_ring[i]; 2055 2056 ring->configured = 0; 2057 ring->type = ENA_RING_TYPE_RX; 2058 ring->adapter = adapter; 2059 ring->id = i; 2060 ring->sgl_size = adapter->max_rx_sgl_size; 2061 } 2062 } 2063 2064 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter) 2065 { 2066 uint64_t port_offloads = 0; 2067 2068 if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM) 2069 port_offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; 2070 2071 if (adapter->offloads.rx_offloads & 2072 (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM)) 2073 port_offloads |= 2074 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM; 2075 2076 if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH) 2077 port_offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2078 2079 port_offloads |= RTE_ETH_RX_OFFLOAD_SCATTER; 2080 2081 return port_offloads; 2082 } 2083 2084 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter) 2085 { 2086 uint64_t port_offloads = 0; 2087 2088 if (adapter->offloads.tx_offloads & ENA_IPV4_TSO) 2089 port_offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 2090 2091 if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM) 2092 port_offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; 2093 if (adapter->offloads.tx_offloads & 2094 (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM | 2095 ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL)) 2096 port_offloads |= 2097 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM; 2098 2099 port_offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2100 2101 return port_offloads; 2102 } 2103 2104 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter) 2105 { 2106 RTE_SET_USED(adapter); 2107 2108 return 0; 2109 } 2110 2111 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter) 2112 { 2113 RTE_SET_USED(adapter); 2114 2115 return 0; 2116 } 2117 2118 static int ena_infos_get(struct rte_eth_dev *dev, 2119 struct rte_eth_dev_info *dev_info) 2120 { 2121 struct ena_adapter *adapter; 2122 struct ena_com_dev *ena_dev; 2123 2124 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 2125 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 2126 adapter = dev->data->dev_private; 2127 2128 ena_dev = &adapter->ena_dev; 2129 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 2130 2131 dev_info->speed_capa = 2132 RTE_ETH_LINK_SPEED_1G | 2133 RTE_ETH_LINK_SPEED_2_5G | 2134 RTE_ETH_LINK_SPEED_5G | 2135 RTE_ETH_LINK_SPEED_10G | 2136 RTE_ETH_LINK_SPEED_25G | 2137 RTE_ETH_LINK_SPEED_40G | 2138 RTE_ETH_LINK_SPEED_50G | 2139 RTE_ETH_LINK_SPEED_100G; 2140 2141 /* Inform framework about available features */ 2142 dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter); 2143 dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter); 2144 dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter); 2145 dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter); 2146 2147 dev_info->flow_type_rss_offloads = ENA_ALL_RSS_HF; 2148 dev_info->hash_key_size = ENA_HASH_KEY_SIZE; 2149 2150 dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN; 2151 dev_info->max_rx_pktlen = adapter->max_mtu + RTE_ETHER_HDR_LEN + 2152 RTE_ETHER_CRC_LEN; 2153 dev_info->min_mtu = ENA_MIN_MTU; 2154 dev_info->max_mtu = adapter->max_mtu; 2155 dev_info->max_mac_addrs = 1; 2156 2157 dev_info->max_rx_queues = adapter->max_num_io_queues; 2158 dev_info->max_tx_queues = adapter->max_num_io_queues; 2159 dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE; 2160 2161 dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size; 2162 dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2163 dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2164 adapter->max_rx_sgl_size); 2165 dev_info->rx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2166 adapter->max_rx_sgl_size); 2167 2168 dev_info->tx_desc_lim.nb_max = adapter->max_tx_ring_size; 2169 dev_info->tx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2170 dev_info->tx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2171 adapter->max_tx_sgl_size); 2172 dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2173 adapter->max_tx_sgl_size); 2174 2175 dev_info->default_rxportconf.ring_size = ENA_DEFAULT_RING_SIZE; 2176 dev_info->default_txportconf.ring_size = ENA_DEFAULT_RING_SIZE; 2177 2178 return 0; 2179 } 2180 2181 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len) 2182 { 2183 mbuf->data_len = len; 2184 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 2185 mbuf->refcnt = 1; 2186 mbuf->next = NULL; 2187 } 2188 2189 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 2190 struct ena_com_rx_buf_info *ena_bufs, 2191 uint32_t descs, 2192 uint16_t *next_to_clean, 2193 uint8_t offset) 2194 { 2195 struct rte_mbuf *mbuf; 2196 struct rte_mbuf *mbuf_head; 2197 struct ena_rx_buffer *rx_info; 2198 int rc; 2199 uint16_t ntc, len, req_id, buf = 0; 2200 2201 if (unlikely(descs == 0)) 2202 return NULL; 2203 2204 ntc = *next_to_clean; 2205 2206 len = ena_bufs[buf].len; 2207 req_id = ena_bufs[buf].req_id; 2208 2209 rx_info = &rx_ring->rx_buffer_info[req_id]; 2210 2211 mbuf = rx_info->mbuf; 2212 RTE_ASSERT(mbuf != NULL); 2213 2214 ena_init_rx_mbuf(mbuf, len); 2215 2216 /* Fill the mbuf head with the data specific for 1st segment. */ 2217 mbuf_head = mbuf; 2218 mbuf_head->nb_segs = descs; 2219 mbuf_head->port = rx_ring->port_id; 2220 mbuf_head->pkt_len = len; 2221 mbuf_head->data_off += offset; 2222 2223 rx_info->mbuf = NULL; 2224 rx_ring->empty_rx_reqs[ntc] = req_id; 2225 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2226 2227 while (--descs) { 2228 ++buf; 2229 len = ena_bufs[buf].len; 2230 req_id = ena_bufs[buf].req_id; 2231 2232 rx_info = &rx_ring->rx_buffer_info[req_id]; 2233 RTE_ASSERT(rx_info->mbuf != NULL); 2234 2235 if (unlikely(len == 0)) { 2236 /* 2237 * Some devices can pass descriptor with the length 0. 2238 * To avoid confusion, the PMD is simply putting the 2239 * descriptor back, as it was never used. We'll avoid 2240 * mbuf allocation that way. 2241 */ 2242 rc = ena_add_single_rx_desc(rx_ring->ena_com_io_sq, 2243 rx_info->mbuf, req_id); 2244 if (unlikely(rc != 0)) { 2245 /* Free the mbuf in case of an error. */ 2246 rte_mbuf_raw_free(rx_info->mbuf); 2247 } else { 2248 /* 2249 * If there was no error, just exit the loop as 2250 * 0 length descriptor is always the last one. 2251 */ 2252 break; 2253 } 2254 } else { 2255 /* Create an mbuf chain. */ 2256 mbuf->next = rx_info->mbuf; 2257 mbuf = mbuf->next; 2258 2259 ena_init_rx_mbuf(mbuf, len); 2260 mbuf_head->pkt_len += len; 2261 } 2262 2263 /* 2264 * Mark the descriptor as depleted and perform necessary 2265 * cleanup. 2266 * This code will execute in two cases: 2267 * 1. Descriptor len was greater than 0 - normal situation. 2268 * 2. Descriptor len was 0 and we failed to add the descriptor 2269 * to the device. In that situation, we should try to add 2270 * the mbuf again in the populate routine and mark the 2271 * descriptor as used up by the device. 2272 */ 2273 rx_info->mbuf = NULL; 2274 rx_ring->empty_rx_reqs[ntc] = req_id; 2275 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2276 } 2277 2278 *next_to_clean = ntc; 2279 2280 return mbuf_head; 2281 } 2282 2283 static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 2284 uint16_t nb_pkts) 2285 { 2286 struct ena_ring *rx_ring = (struct ena_ring *)(rx_queue); 2287 unsigned int free_queue_entries; 2288 uint16_t next_to_clean = rx_ring->next_to_clean; 2289 uint16_t descs_in_use; 2290 struct rte_mbuf *mbuf; 2291 uint16_t completed; 2292 struct ena_com_rx_ctx ena_rx_ctx; 2293 int i, rc = 0; 2294 bool fill_hash; 2295 2296 #ifdef RTE_ETHDEV_DEBUG_RX 2297 /* Check adapter state */ 2298 if (unlikely(rx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 2299 PMD_RX_LOG(ALERT, 2300 "Trying to receive pkts while device is NOT running\n"); 2301 return 0; 2302 } 2303 #endif 2304 2305 fill_hash = rx_ring->offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH; 2306 2307 descs_in_use = rx_ring->ring_size - 2308 ena_com_free_q_entries(rx_ring->ena_com_io_sq) - 1; 2309 nb_pkts = RTE_MIN(descs_in_use, nb_pkts); 2310 2311 for (completed = 0; completed < nb_pkts; completed++) { 2312 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 2313 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 2314 ena_rx_ctx.descs = 0; 2315 ena_rx_ctx.pkt_offset = 0; 2316 /* receive packet context */ 2317 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, 2318 rx_ring->ena_com_io_sq, 2319 &ena_rx_ctx); 2320 if (unlikely(rc)) { 2321 PMD_RX_LOG(ERR, 2322 "Failed to get the packet from the device, rc: %d\n", 2323 rc); 2324 if (rc == ENA_COM_NO_SPACE) { 2325 ++rx_ring->rx_stats.bad_desc_num; 2326 rx_ring->adapter->reset_reason = 2327 ENA_REGS_RESET_TOO_MANY_RX_DESCS; 2328 } else { 2329 ++rx_ring->rx_stats.bad_req_id; 2330 rx_ring->adapter->reset_reason = 2331 ENA_REGS_RESET_INV_RX_REQ_ID; 2332 } 2333 rx_ring->adapter->trigger_reset = true; 2334 return 0; 2335 } 2336 2337 mbuf = ena_rx_mbuf(rx_ring, 2338 ena_rx_ctx.ena_bufs, 2339 ena_rx_ctx.descs, 2340 &next_to_clean, 2341 ena_rx_ctx.pkt_offset); 2342 if (unlikely(mbuf == NULL)) { 2343 for (i = 0; i < ena_rx_ctx.descs; ++i) { 2344 rx_ring->empty_rx_reqs[next_to_clean] = 2345 rx_ring->ena_bufs[i].req_id; 2346 next_to_clean = ENA_IDX_NEXT_MASKED( 2347 next_to_clean, rx_ring->size_mask); 2348 } 2349 break; 2350 } 2351 2352 /* fill mbuf attributes if any */ 2353 ena_rx_mbuf_prepare(mbuf, &ena_rx_ctx, fill_hash); 2354 2355 if (unlikely(mbuf->ol_flags & 2356 (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD))) { 2357 rte_atomic64_inc(&rx_ring->adapter->drv_stats->ierrors); 2358 ++rx_ring->rx_stats.bad_csum; 2359 } 2360 2361 rx_pkts[completed] = mbuf; 2362 rx_ring->rx_stats.bytes += mbuf->pkt_len; 2363 } 2364 2365 rx_ring->rx_stats.cnt += completed; 2366 rx_ring->next_to_clean = next_to_clean; 2367 2368 free_queue_entries = ena_com_free_q_entries(rx_ring->ena_com_io_sq); 2369 2370 /* Burst refill to save doorbells, memory barriers, const interval */ 2371 if (free_queue_entries >= rx_ring->rx_free_thresh) { 2372 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); 2373 ena_populate_rx_queue(rx_ring, free_queue_entries); 2374 } 2375 2376 return completed; 2377 } 2378 2379 static uint16_t 2380 eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 2381 uint16_t nb_pkts) 2382 { 2383 int32_t ret; 2384 uint32_t i; 2385 struct rte_mbuf *m; 2386 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 2387 struct ena_adapter *adapter = tx_ring->adapter; 2388 struct rte_ipv4_hdr *ip_hdr; 2389 uint64_t ol_flags; 2390 uint64_t l4_csum_flag; 2391 uint64_t dev_offload_capa; 2392 uint16_t frag_field; 2393 bool need_pseudo_csum; 2394 2395 dev_offload_capa = adapter->offloads.tx_offloads; 2396 for (i = 0; i != nb_pkts; i++) { 2397 m = tx_pkts[i]; 2398 ol_flags = m->ol_flags; 2399 2400 /* Check if any offload flag was set */ 2401 if (ol_flags == 0) 2402 continue; 2403 2404 l4_csum_flag = ol_flags & RTE_MBUF_F_TX_L4_MASK; 2405 /* SCTP checksum offload is not supported by the ENA. */ 2406 if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) || 2407 l4_csum_flag == RTE_MBUF_F_TX_SCTP_CKSUM) { 2408 PMD_TX_LOG(DEBUG, 2409 "mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64 "\n", 2410 i, ol_flags); 2411 rte_errno = ENOTSUP; 2412 return i; 2413 } 2414 2415 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 2416 /* Check if requested offload is also enabled for the queue */ 2417 if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2418 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) || 2419 (l4_csum_flag == RTE_MBUF_F_TX_TCP_CKSUM && 2420 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) || 2421 (l4_csum_flag == RTE_MBUF_F_TX_UDP_CKSUM && 2422 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM))) { 2423 PMD_TX_LOG(DEBUG, 2424 "mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]\n", 2425 i, m->nb_segs, tx_ring->id); 2426 rte_errno = EINVAL; 2427 return i; 2428 } 2429 2430 /* The caller is obligated to set l2 and l3 len if any cksum 2431 * offload is enabled. 2432 */ 2433 if (unlikely(ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK) && 2434 (m->l2_len == 0 || m->l3_len == 0))) { 2435 PMD_TX_LOG(DEBUG, 2436 "mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested\n", 2437 i); 2438 rte_errno = EINVAL; 2439 return i; 2440 } 2441 ret = rte_validate_tx_offload(m); 2442 if (ret != 0) { 2443 rte_errno = -ret; 2444 return i; 2445 } 2446 #endif 2447 2448 /* Verify HW support for requested offloads and determine if 2449 * pseudo header checksum is needed. 2450 */ 2451 need_pseudo_csum = false; 2452 if (ol_flags & RTE_MBUF_F_TX_IPV4) { 2453 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2454 !(dev_offload_capa & ENA_L3_IPV4_CSUM)) { 2455 rte_errno = ENOTSUP; 2456 return i; 2457 } 2458 2459 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG && 2460 !(dev_offload_capa & ENA_IPV4_TSO)) { 2461 rte_errno = ENOTSUP; 2462 return i; 2463 } 2464 2465 /* Check HW capabilities and if pseudo csum is needed 2466 * for L4 offloads. 2467 */ 2468 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2469 !(dev_offload_capa & ENA_L4_IPV4_CSUM)) { 2470 if (dev_offload_capa & 2471 ENA_L4_IPV4_CSUM_PARTIAL) { 2472 need_pseudo_csum = true; 2473 } else { 2474 rte_errno = ENOTSUP; 2475 return i; 2476 } 2477 } 2478 2479 /* Parse the DF flag */ 2480 ip_hdr = rte_pktmbuf_mtod_offset(m, 2481 struct rte_ipv4_hdr *, m->l2_len); 2482 frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset); 2483 if (frag_field & RTE_IPV4_HDR_DF_FLAG) { 2484 m->packet_type |= RTE_PTYPE_L4_NONFRAG; 2485 } else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2486 /* In case we are supposed to TSO and have DF 2487 * not set (DF=0) hardware must be provided with 2488 * partial checksum. 2489 */ 2490 need_pseudo_csum = true; 2491 } 2492 } else if (ol_flags & RTE_MBUF_F_TX_IPV6) { 2493 /* There is no support for IPv6 TSO as for now. */ 2494 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2495 rte_errno = ENOTSUP; 2496 return i; 2497 } 2498 2499 /* Check HW capabilities and if pseudo csum is needed */ 2500 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2501 !(dev_offload_capa & ENA_L4_IPV6_CSUM)) { 2502 if (dev_offload_capa & 2503 ENA_L4_IPV6_CSUM_PARTIAL) { 2504 need_pseudo_csum = true; 2505 } else { 2506 rte_errno = ENOTSUP; 2507 return i; 2508 } 2509 } 2510 } 2511 2512 if (need_pseudo_csum) { 2513 ret = rte_net_intel_cksum_flags_prepare(m, ol_flags); 2514 if (ret != 0) { 2515 rte_errno = -ret; 2516 return i; 2517 } 2518 } 2519 } 2520 2521 return i; 2522 } 2523 2524 static void ena_update_hints(struct ena_adapter *adapter, 2525 struct ena_admin_ena_hw_hints *hints) 2526 { 2527 if (hints->admin_completion_tx_timeout) 2528 adapter->ena_dev.admin_queue.completion_timeout = 2529 hints->admin_completion_tx_timeout * 1000; 2530 2531 if (hints->mmio_read_timeout) 2532 /* convert to usec */ 2533 adapter->ena_dev.mmio_read.reg_read_to = 2534 hints->mmio_read_timeout * 1000; 2535 2536 if (hints->missing_tx_completion_timeout) { 2537 if (hints->missing_tx_completion_timeout == 2538 ENA_HW_HINTS_NO_TIMEOUT) { 2539 adapter->missing_tx_completion_to = 2540 ENA_HW_HINTS_NO_TIMEOUT; 2541 } else { 2542 /* Convert from msecs to ticks */ 2543 adapter->missing_tx_completion_to = rte_get_timer_hz() * 2544 hints->missing_tx_completion_timeout / 1000; 2545 adapter->tx_cleanup_stall_delay = 2546 adapter->missing_tx_completion_to / 2; 2547 } 2548 } 2549 2550 if (hints->driver_watchdog_timeout) { 2551 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 2552 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 2553 else 2554 // Convert msecs to ticks 2555 adapter->keep_alive_timeout = 2556 (hints->driver_watchdog_timeout * 2557 rte_get_timer_hz()) / 1000; 2558 } 2559 } 2560 2561 static int ena_check_space_and_linearize_mbuf(struct ena_ring *tx_ring, 2562 struct rte_mbuf *mbuf) 2563 { 2564 struct ena_com_dev *ena_dev; 2565 int num_segments, header_len, rc; 2566 2567 ena_dev = &tx_ring->adapter->ena_dev; 2568 num_segments = mbuf->nb_segs; 2569 header_len = mbuf->data_len; 2570 2571 if (likely(num_segments < tx_ring->sgl_size)) 2572 goto checkspace; 2573 2574 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && 2575 (num_segments == tx_ring->sgl_size) && 2576 (header_len < tx_ring->tx_max_header_size)) 2577 goto checkspace; 2578 2579 /* Checking for space for 2 additional metadata descriptors due to 2580 * possible header split and metadata descriptor. Linearization will 2581 * be needed so we reduce the segments number from num_segments to 1 2582 */ 2583 if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 3)) { 2584 PMD_TX_LOG(DEBUG, "Not enough space in the Tx queue\n"); 2585 return ENA_COM_NO_MEM; 2586 } 2587 ++tx_ring->tx_stats.linearize; 2588 rc = rte_pktmbuf_linearize(mbuf); 2589 if (unlikely(rc)) { 2590 PMD_TX_LOG(WARNING, "Mbuf linearize failed\n"); 2591 rte_atomic64_inc(&tx_ring->adapter->drv_stats->ierrors); 2592 ++tx_ring->tx_stats.linearize_failed; 2593 return rc; 2594 } 2595 2596 return 0; 2597 2598 checkspace: 2599 /* Checking for space for 2 additional metadata descriptors due to 2600 * possible header split and metadata descriptor 2601 */ 2602 if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 2603 num_segments + 2)) { 2604 PMD_TX_LOG(DEBUG, "Not enough space in the Tx queue\n"); 2605 return ENA_COM_NO_MEM; 2606 } 2607 2608 return 0; 2609 } 2610 2611 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 2612 struct ena_tx_buffer *tx_info, 2613 struct rte_mbuf *mbuf, 2614 void **push_header, 2615 uint16_t *header_len) 2616 { 2617 struct ena_com_buf *ena_buf; 2618 uint16_t delta, seg_len, push_len; 2619 2620 delta = 0; 2621 seg_len = mbuf->data_len; 2622 2623 tx_info->mbuf = mbuf; 2624 ena_buf = tx_info->bufs; 2625 2626 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2627 /* 2628 * Tx header might be (and will be in most cases) smaller than 2629 * tx_max_header_size. But it's not an issue to send more data 2630 * to the device, than actually needed if the mbuf size is 2631 * greater than tx_max_header_size. 2632 */ 2633 push_len = RTE_MIN(mbuf->pkt_len, tx_ring->tx_max_header_size); 2634 *header_len = push_len; 2635 2636 if (likely(push_len <= seg_len)) { 2637 /* If the push header is in the single segment, then 2638 * just point it to the 1st mbuf data. 2639 */ 2640 *push_header = rte_pktmbuf_mtod(mbuf, uint8_t *); 2641 } else { 2642 /* If the push header lays in the several segments, copy 2643 * it to the intermediate buffer. 2644 */ 2645 rte_pktmbuf_read(mbuf, 0, push_len, 2646 tx_ring->push_buf_intermediate_buf); 2647 *push_header = tx_ring->push_buf_intermediate_buf; 2648 delta = push_len - seg_len; 2649 } 2650 } else { 2651 *push_header = NULL; 2652 *header_len = 0; 2653 push_len = 0; 2654 } 2655 2656 /* Process first segment taking into consideration pushed header */ 2657 if (seg_len > push_len) { 2658 ena_buf->paddr = mbuf->buf_iova + 2659 mbuf->data_off + 2660 push_len; 2661 ena_buf->len = seg_len - push_len; 2662 ena_buf++; 2663 tx_info->num_of_bufs++; 2664 } 2665 2666 while ((mbuf = mbuf->next) != NULL) { 2667 seg_len = mbuf->data_len; 2668 2669 /* Skip mbufs if whole data is pushed as a header */ 2670 if (unlikely(delta > seg_len)) { 2671 delta -= seg_len; 2672 continue; 2673 } 2674 2675 ena_buf->paddr = mbuf->buf_iova + mbuf->data_off + delta; 2676 ena_buf->len = seg_len - delta; 2677 ena_buf++; 2678 tx_info->num_of_bufs++; 2679 2680 delta = 0; 2681 } 2682 } 2683 2684 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) 2685 { 2686 struct ena_tx_buffer *tx_info; 2687 struct ena_com_tx_ctx ena_tx_ctx = { { 0 } }; 2688 uint16_t next_to_use; 2689 uint16_t header_len; 2690 uint16_t req_id; 2691 void *push_header; 2692 int nb_hw_desc; 2693 int rc; 2694 2695 rc = ena_check_space_and_linearize_mbuf(tx_ring, mbuf); 2696 if (unlikely(rc)) 2697 return rc; 2698 2699 next_to_use = tx_ring->next_to_use; 2700 2701 req_id = tx_ring->empty_tx_reqs[next_to_use]; 2702 tx_info = &tx_ring->tx_buffer_info[req_id]; 2703 tx_info->num_of_bufs = 0; 2704 2705 ena_tx_map_mbuf(tx_ring, tx_info, mbuf, &push_header, &header_len); 2706 2707 ena_tx_ctx.ena_bufs = tx_info->bufs; 2708 ena_tx_ctx.push_header = push_header; 2709 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 2710 ena_tx_ctx.req_id = req_id; 2711 ena_tx_ctx.header_len = header_len; 2712 2713 /* Set Tx offloads flags, if applicable */ 2714 ena_tx_mbuf_prepare(mbuf, &ena_tx_ctx, tx_ring->offloads, 2715 tx_ring->disable_meta_caching); 2716 2717 if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, 2718 &ena_tx_ctx))) { 2719 PMD_TX_LOG(DEBUG, 2720 "LLQ Tx max burst size of queue %d achieved, writing doorbell to send burst\n", 2721 tx_ring->id); 2722 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 2723 tx_ring->tx_stats.doorbells++; 2724 tx_ring->pkts_without_db = false; 2725 } 2726 2727 /* prepare the packet's descriptors to dma engine */ 2728 rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, 2729 &nb_hw_desc); 2730 if (unlikely(rc)) { 2731 PMD_DRV_LOG(ERR, "Failed to prepare Tx buffers, rc: %d\n", rc); 2732 ++tx_ring->tx_stats.prepare_ctx_err; 2733 tx_ring->adapter->reset_reason = 2734 ENA_REGS_RESET_DRIVER_INVALID_STATE; 2735 tx_ring->adapter->trigger_reset = true; 2736 return rc; 2737 } 2738 2739 tx_info->tx_descs = nb_hw_desc; 2740 tx_info->timestamp = rte_get_timer_cycles(); 2741 2742 tx_ring->tx_stats.cnt++; 2743 tx_ring->tx_stats.bytes += mbuf->pkt_len; 2744 2745 tx_ring->next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, 2746 tx_ring->size_mask); 2747 2748 return 0; 2749 } 2750 2751 static void ena_tx_cleanup(struct ena_ring *tx_ring) 2752 { 2753 unsigned int total_tx_descs = 0; 2754 uint16_t cleanup_budget; 2755 uint16_t next_to_clean = tx_ring->next_to_clean; 2756 2757 /* Attempt to release all Tx descriptors (ring_size - 1 -> size_mask) */ 2758 cleanup_budget = tx_ring->size_mask; 2759 2760 while (likely(total_tx_descs < cleanup_budget)) { 2761 struct rte_mbuf *mbuf; 2762 struct ena_tx_buffer *tx_info; 2763 uint16_t req_id; 2764 2765 if (ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id) != 0) 2766 break; 2767 2768 if (unlikely(validate_tx_req_id(tx_ring, req_id) != 0)) 2769 break; 2770 2771 /* Get Tx info & store how many descs were processed */ 2772 tx_info = &tx_ring->tx_buffer_info[req_id]; 2773 tx_info->timestamp = 0; 2774 2775 mbuf = tx_info->mbuf; 2776 rte_pktmbuf_free(mbuf); 2777 2778 tx_info->mbuf = NULL; 2779 tx_ring->empty_tx_reqs[next_to_clean] = req_id; 2780 2781 total_tx_descs += tx_info->tx_descs; 2782 2783 /* Put back descriptor to the ring for reuse */ 2784 next_to_clean = ENA_IDX_NEXT_MASKED(next_to_clean, 2785 tx_ring->size_mask); 2786 } 2787 2788 if (likely(total_tx_descs > 0)) { 2789 /* acknowledge completion of sent packets */ 2790 tx_ring->next_to_clean = next_to_clean; 2791 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs); 2792 ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); 2793 } 2794 2795 /* Notify completion handler that the cleanup was just called */ 2796 tx_ring->last_cleanup_ticks = rte_get_timer_cycles(); 2797 } 2798 2799 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 2800 uint16_t nb_pkts) 2801 { 2802 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 2803 int available_desc; 2804 uint16_t sent_idx = 0; 2805 2806 #ifdef RTE_ETHDEV_DEBUG_TX 2807 /* Check adapter state */ 2808 if (unlikely(tx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 2809 PMD_TX_LOG(ALERT, 2810 "Trying to xmit pkts while device is NOT running\n"); 2811 return 0; 2812 } 2813 #endif 2814 2815 for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) { 2816 if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx])) 2817 break; 2818 tx_ring->pkts_without_db = true; 2819 rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4, 2820 tx_ring->size_mask)]); 2821 } 2822 2823 available_desc = ena_com_free_q_entries(tx_ring->ena_com_io_sq); 2824 tx_ring->tx_stats.available_desc = available_desc; 2825 2826 /* If there are ready packets to be xmitted... */ 2827 if (likely(tx_ring->pkts_without_db)) { 2828 /* ...let HW do its best :-) */ 2829 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 2830 tx_ring->tx_stats.doorbells++; 2831 tx_ring->pkts_without_db = false; 2832 } 2833 2834 if (available_desc < tx_ring->tx_free_thresh) 2835 ena_tx_cleanup(tx_ring); 2836 2837 tx_ring->tx_stats.available_desc = 2838 ena_com_free_q_entries(tx_ring->ena_com_io_sq); 2839 tx_ring->tx_stats.tx_poll++; 2840 2841 return sent_idx; 2842 } 2843 2844 int ena_copy_eni_stats(struct ena_adapter *adapter) 2845 { 2846 struct ena_admin_eni_stats admin_eni_stats; 2847 int rc; 2848 2849 rte_spinlock_lock(&adapter->admin_lock); 2850 rc = ena_com_get_eni_stats(&adapter->ena_dev, &admin_eni_stats); 2851 rte_spinlock_unlock(&adapter->admin_lock); 2852 if (rc != 0) { 2853 if (rc == ENA_COM_UNSUPPORTED) { 2854 PMD_DRV_LOG(DEBUG, 2855 "Retrieving ENI metrics is not supported\n"); 2856 } else { 2857 PMD_DRV_LOG(WARNING, 2858 "Failed to get ENI metrics, rc: %d\n", rc); 2859 } 2860 return rc; 2861 } 2862 2863 rte_memcpy(&adapter->eni_stats, &admin_eni_stats, 2864 sizeof(struct ena_stats_eni)); 2865 2866 return 0; 2867 } 2868 2869 /** 2870 * DPDK callback to retrieve names of extended device statistics 2871 * 2872 * @param dev 2873 * Pointer to Ethernet device structure. 2874 * @param[out] xstats_names 2875 * Buffer to insert names into. 2876 * @param n 2877 * Number of names. 2878 * 2879 * @return 2880 * Number of xstats names. 2881 */ 2882 static int ena_xstats_get_names(struct rte_eth_dev *dev, 2883 struct rte_eth_xstat_name *xstats_names, 2884 unsigned int n) 2885 { 2886 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 2887 unsigned int stat, i, count = 0; 2888 2889 if (n < xstats_count || !xstats_names) 2890 return xstats_count; 2891 2892 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) 2893 strcpy(xstats_names[count].name, 2894 ena_stats_global_strings[stat].name); 2895 2896 for (stat = 0; stat < ENA_STATS_ARRAY_ENI; stat++, count++) 2897 strcpy(xstats_names[count].name, 2898 ena_stats_eni_strings[stat].name); 2899 2900 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) 2901 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) 2902 snprintf(xstats_names[count].name, 2903 sizeof(xstats_names[count].name), 2904 "rx_q%d_%s", i, 2905 ena_stats_rx_strings[stat].name); 2906 2907 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) 2908 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) 2909 snprintf(xstats_names[count].name, 2910 sizeof(xstats_names[count].name), 2911 "tx_q%d_%s", i, 2912 ena_stats_tx_strings[stat].name); 2913 2914 return xstats_count; 2915 } 2916 2917 /** 2918 * DPDK callback to get extended device statistics. 2919 * 2920 * @param dev 2921 * Pointer to Ethernet device structure. 2922 * @param[out] stats 2923 * Stats table output buffer. 2924 * @param n 2925 * The size of the stats table. 2926 * 2927 * @return 2928 * Number of xstats on success, negative on failure. 2929 */ 2930 static int ena_xstats_get(struct rte_eth_dev *dev, 2931 struct rte_eth_xstat *xstats, 2932 unsigned int n) 2933 { 2934 struct ena_adapter *adapter = dev->data->dev_private; 2935 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 2936 unsigned int stat, i, count = 0; 2937 int stat_offset; 2938 void *stats_begin; 2939 2940 if (n < xstats_count) 2941 return xstats_count; 2942 2943 if (!xstats) 2944 return 0; 2945 2946 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) { 2947 stat_offset = ena_stats_global_strings[stat].stat_offset; 2948 stats_begin = &adapter->dev_stats; 2949 2950 xstats[count].id = count; 2951 xstats[count].value = *((uint64_t *) 2952 ((char *)stats_begin + stat_offset)); 2953 } 2954 2955 /* Even if the function below fails, we should copy previous (or initial 2956 * values) to keep structure of rte_eth_xstat consistent. 2957 */ 2958 ena_copy_eni_stats(adapter); 2959 for (stat = 0; stat < ENA_STATS_ARRAY_ENI; stat++, count++) { 2960 stat_offset = ena_stats_eni_strings[stat].stat_offset; 2961 stats_begin = &adapter->eni_stats; 2962 2963 xstats[count].id = count; 2964 xstats[count].value = *((uint64_t *) 2965 ((char *)stats_begin + stat_offset)); 2966 } 2967 2968 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) { 2969 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) { 2970 stat_offset = ena_stats_rx_strings[stat].stat_offset; 2971 stats_begin = &adapter->rx_ring[i].rx_stats; 2972 2973 xstats[count].id = count; 2974 xstats[count].value = *((uint64_t *) 2975 ((char *)stats_begin + stat_offset)); 2976 } 2977 } 2978 2979 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) { 2980 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) { 2981 stat_offset = ena_stats_tx_strings[stat].stat_offset; 2982 stats_begin = &adapter->tx_ring[i].rx_stats; 2983 2984 xstats[count].id = count; 2985 xstats[count].value = *((uint64_t *) 2986 ((char *)stats_begin + stat_offset)); 2987 } 2988 } 2989 2990 return count; 2991 } 2992 2993 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 2994 const uint64_t *ids, 2995 uint64_t *values, 2996 unsigned int n) 2997 { 2998 struct ena_adapter *adapter = dev->data->dev_private; 2999 uint64_t id; 3000 uint64_t rx_entries, tx_entries; 3001 unsigned int i; 3002 int qid; 3003 int valid = 0; 3004 bool was_eni_copied = false; 3005 3006 for (i = 0; i < n; ++i) { 3007 id = ids[i]; 3008 /* Check if id belongs to global statistics */ 3009 if (id < ENA_STATS_ARRAY_GLOBAL) { 3010 values[i] = *((uint64_t *)&adapter->dev_stats + id); 3011 ++valid; 3012 continue; 3013 } 3014 3015 /* Check if id belongs to ENI statistics */ 3016 id -= ENA_STATS_ARRAY_GLOBAL; 3017 if (id < ENA_STATS_ARRAY_ENI) { 3018 /* Avoid reading ENI stats multiple times in a single 3019 * function call, as it requires communication with the 3020 * admin queue. 3021 */ 3022 if (!was_eni_copied) { 3023 was_eni_copied = true; 3024 ena_copy_eni_stats(adapter); 3025 } 3026 values[i] = *((uint64_t *)&adapter->eni_stats + id); 3027 ++valid; 3028 continue; 3029 } 3030 3031 /* Check if id belongs to rx queue statistics */ 3032 id -= ENA_STATS_ARRAY_ENI; 3033 rx_entries = ENA_STATS_ARRAY_RX * dev->data->nb_rx_queues; 3034 if (id < rx_entries) { 3035 qid = id % dev->data->nb_rx_queues; 3036 id /= dev->data->nb_rx_queues; 3037 values[i] = *((uint64_t *) 3038 &adapter->rx_ring[qid].rx_stats + id); 3039 ++valid; 3040 continue; 3041 } 3042 /* Check if id belongs to rx queue statistics */ 3043 id -= rx_entries; 3044 tx_entries = ENA_STATS_ARRAY_TX * dev->data->nb_tx_queues; 3045 if (id < tx_entries) { 3046 qid = id % dev->data->nb_tx_queues; 3047 id /= dev->data->nb_tx_queues; 3048 values[i] = *((uint64_t *) 3049 &adapter->tx_ring[qid].tx_stats + id); 3050 ++valid; 3051 continue; 3052 } 3053 } 3054 3055 return valid; 3056 } 3057 3058 static int ena_process_bool_devarg(const char *key, 3059 const char *value, 3060 void *opaque) 3061 { 3062 struct ena_adapter *adapter = opaque; 3063 bool bool_value; 3064 3065 /* Parse the value. */ 3066 if (strcmp(value, "1") == 0) { 3067 bool_value = true; 3068 } else if (strcmp(value, "0") == 0) { 3069 bool_value = false; 3070 } else { 3071 PMD_INIT_LOG(ERR, 3072 "Invalid value: '%s' for key '%s'. Accepted: '0' or '1'\n", 3073 value, key); 3074 return -EINVAL; 3075 } 3076 3077 /* Now, assign it to the proper adapter field. */ 3078 if (strcmp(key, ENA_DEVARG_LARGE_LLQ_HDR) == 0) 3079 adapter->use_large_llq_hdr = bool_value; 3080 3081 return 0; 3082 } 3083 3084 static int ena_parse_devargs(struct ena_adapter *adapter, 3085 struct rte_devargs *devargs) 3086 { 3087 static const char * const allowed_args[] = { 3088 ENA_DEVARG_LARGE_LLQ_HDR, 3089 NULL, 3090 }; 3091 struct rte_kvargs *kvlist; 3092 int rc; 3093 3094 if (devargs == NULL) 3095 return 0; 3096 3097 kvlist = rte_kvargs_parse(devargs->args, allowed_args); 3098 if (kvlist == NULL) { 3099 PMD_INIT_LOG(ERR, "Invalid device arguments: %s\n", 3100 devargs->args); 3101 return -EINVAL; 3102 } 3103 3104 rc = rte_kvargs_process(kvlist, ENA_DEVARG_LARGE_LLQ_HDR, 3105 ena_process_bool_devarg, adapter); 3106 3107 rte_kvargs_free(kvlist); 3108 3109 return rc; 3110 } 3111 3112 static int ena_setup_rx_intr(struct rte_eth_dev *dev) 3113 { 3114 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 3115 struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; 3116 int rc; 3117 uint16_t vectors_nb, i; 3118 bool rx_intr_requested = dev->data->dev_conf.intr_conf.rxq; 3119 3120 if (!rx_intr_requested) 3121 return 0; 3122 3123 if (!rte_intr_cap_multiple(intr_handle)) { 3124 PMD_DRV_LOG(ERR, 3125 "Rx interrupt requested, but it isn't supported by the PCI driver\n"); 3126 return -ENOTSUP; 3127 } 3128 3129 /* Disable interrupt mapping before the configuration starts. */ 3130 rte_intr_disable(intr_handle); 3131 3132 /* Verify if there are enough vectors available. */ 3133 vectors_nb = dev->data->nb_rx_queues; 3134 if (vectors_nb > RTE_MAX_RXTX_INTR_VEC_ID) { 3135 PMD_DRV_LOG(ERR, 3136 "Too many Rx interrupts requested, maximum number: %d\n", 3137 RTE_MAX_RXTX_INTR_VEC_ID); 3138 rc = -ENOTSUP; 3139 goto enable_intr; 3140 } 3141 3142 intr_handle->intr_vec = rte_zmalloc("intr_vec", 3143 dev->data->nb_rx_queues * sizeof(*intr_handle->intr_vec), 0); 3144 if (intr_handle->intr_vec == NULL) { 3145 PMD_DRV_LOG(ERR, 3146 "Failed to allocate interrupt vector for %d queues\n", 3147 dev->data->nb_rx_queues); 3148 rc = -ENOMEM; 3149 goto enable_intr; 3150 } 3151 3152 rc = rte_intr_efd_enable(intr_handle, vectors_nb); 3153 if (rc != 0) 3154 goto free_intr_vec; 3155 3156 if (!rte_intr_allow_others(intr_handle)) { 3157 PMD_DRV_LOG(ERR, 3158 "Not enough interrupts available to use both ENA Admin and Rx interrupts\n"); 3159 goto disable_intr_efd; 3160 } 3161 3162 for (i = 0; i < vectors_nb; ++i) 3163 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i; 3164 3165 rte_intr_enable(intr_handle); 3166 return 0; 3167 3168 disable_intr_efd: 3169 rte_intr_efd_disable(intr_handle); 3170 free_intr_vec: 3171 rte_free(intr_handle->intr_vec); 3172 intr_handle->intr_vec = NULL; 3173 enable_intr: 3174 rte_intr_enable(intr_handle); 3175 return rc; 3176 } 3177 3178 static void ena_rx_queue_intr_set(struct rte_eth_dev *dev, 3179 uint16_t queue_id, 3180 bool unmask) 3181 { 3182 struct ena_adapter *adapter = dev->data->dev_private; 3183 struct ena_ring *rxq = &adapter->rx_ring[queue_id]; 3184 struct ena_eth_io_intr_reg intr_reg; 3185 3186 ena_com_update_intr_reg(&intr_reg, 0, 0, unmask); 3187 ena_com_unmask_intr(rxq->ena_com_io_cq, &intr_reg); 3188 } 3189 3190 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 3191 uint16_t queue_id) 3192 { 3193 ena_rx_queue_intr_set(dev, queue_id, true); 3194 3195 return 0; 3196 } 3197 3198 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 3199 uint16_t queue_id) 3200 { 3201 ena_rx_queue_intr_set(dev, queue_id, false); 3202 3203 return 0; 3204 } 3205 3206 /********************************************************************* 3207 * PMD configuration 3208 *********************************************************************/ 3209 static int eth_ena_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 3210 struct rte_pci_device *pci_dev) 3211 { 3212 return rte_eth_dev_pci_generic_probe(pci_dev, 3213 sizeof(struct ena_adapter), eth_ena_dev_init); 3214 } 3215 3216 static int eth_ena_pci_remove(struct rte_pci_device *pci_dev) 3217 { 3218 return rte_eth_dev_pci_generic_remove(pci_dev, eth_ena_dev_uninit); 3219 } 3220 3221 static struct rte_pci_driver rte_ena_pmd = { 3222 .id_table = pci_id_ena_map, 3223 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | 3224 RTE_PCI_DRV_WC_ACTIVATE, 3225 .probe = eth_ena_pci_probe, 3226 .remove = eth_ena_pci_remove, 3227 }; 3228 3229 RTE_PMD_REGISTER_PCI(net_ena, rte_ena_pmd); 3230 RTE_PMD_REGISTER_PCI_TABLE(net_ena, pci_id_ena_map); 3231 RTE_PMD_REGISTER_KMOD_DEP(net_ena, "* igb_uio | uio_pci_generic | vfio-pci"); 3232 RTE_PMD_REGISTER_PARAM_STRING(net_ena, ENA_DEVARG_LARGE_LLQ_HDR "=<0|1>"); 3233 RTE_LOG_REGISTER_SUFFIX(ena_logtype_init, init, NOTICE); 3234 RTE_LOG_REGISTER_SUFFIX(ena_logtype_driver, driver, NOTICE); 3235 #ifdef RTE_ETHDEV_DEBUG_RX 3236 RTE_LOG_REGISTER_SUFFIX(ena_logtype_rx, rx, DEBUG); 3237 #endif 3238 #ifdef RTE_ETHDEV_DEBUG_TX 3239 RTE_LOG_REGISTER_SUFFIX(ena_logtype_tx, tx, DEBUG); 3240 #endif 3241 RTE_LOG_REGISTER_SUFFIX(ena_logtype_com, com, WARNING); 3242 3243 /****************************************************************************** 3244 ******************************** AENQ Handlers ******************************* 3245 *****************************************************************************/ 3246 static void ena_update_on_link_change(void *adapter_data, 3247 struct ena_admin_aenq_entry *aenq_e) 3248 { 3249 struct rte_eth_dev *eth_dev = adapter_data; 3250 struct ena_adapter *adapter = eth_dev->data->dev_private; 3251 struct ena_admin_aenq_link_change_desc *aenq_link_desc; 3252 uint32_t status; 3253 3254 aenq_link_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 3255 3256 status = get_ena_admin_aenq_link_change_desc_link_status(aenq_link_desc); 3257 adapter->link_status = status; 3258 3259 ena_link_update(eth_dev, 0); 3260 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 3261 } 3262 3263 static void ena_notification(void *adapter_data, 3264 struct ena_admin_aenq_entry *aenq_e) 3265 { 3266 struct rte_eth_dev *eth_dev = adapter_data; 3267 struct ena_adapter *adapter = eth_dev->data->dev_private; 3268 struct ena_admin_ena_hw_hints *hints; 3269 3270 if (aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION) 3271 PMD_DRV_LOG(WARNING, "Invalid AENQ group: %x. Expected: %x\n", 3272 aenq_e->aenq_common_desc.group, 3273 ENA_ADMIN_NOTIFICATION); 3274 3275 switch (aenq_e->aenq_common_desc.syndrome) { 3276 case ENA_ADMIN_UPDATE_HINTS: 3277 hints = (struct ena_admin_ena_hw_hints *) 3278 (&aenq_e->inline_data_w4); 3279 ena_update_hints(adapter, hints); 3280 break; 3281 default: 3282 PMD_DRV_LOG(ERR, "Invalid AENQ notification link state: %d\n", 3283 aenq_e->aenq_common_desc.syndrome); 3284 } 3285 } 3286 3287 static void ena_keep_alive(void *adapter_data, 3288 __rte_unused struct ena_admin_aenq_entry *aenq_e) 3289 { 3290 struct rte_eth_dev *eth_dev = adapter_data; 3291 struct ena_adapter *adapter = eth_dev->data->dev_private; 3292 struct ena_admin_aenq_keep_alive_desc *desc; 3293 uint64_t rx_drops; 3294 uint64_t tx_drops; 3295 3296 adapter->timestamp_wd = rte_get_timer_cycles(); 3297 3298 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 3299 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 3300 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 3301 3302 adapter->drv_stats->rx_drops = rx_drops; 3303 adapter->dev_stats.tx_drops = tx_drops; 3304 } 3305 3306 /** 3307 * This handler will called for unknown event group or unimplemented handlers 3308 **/ 3309 static void unimplemented_aenq_handler(__rte_unused void *data, 3310 __rte_unused struct ena_admin_aenq_entry *aenq_e) 3311 { 3312 PMD_DRV_LOG(ERR, 3313 "Unknown event was received or event with unimplemented handler\n"); 3314 } 3315 3316 static struct ena_aenq_handlers aenq_handlers = { 3317 .handlers = { 3318 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 3319 [ENA_ADMIN_NOTIFICATION] = ena_notification, 3320 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive 3321 }, 3322 .unimplemented_handler = unimplemented_aenq_handler 3323 }; 3324