1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 3 * All rights reserved. 4 */ 5 6 #include <rte_string_fns.h> 7 #include <rte_errno.h> 8 #include <rte_version.h> 9 #include <rte_net.h> 10 #include <rte_kvargs.h> 11 12 #include "ena_ethdev.h" 13 #include "ena_logs.h" 14 #include "ena_platform.h" 15 #include "ena_com.h" 16 #include "ena_eth_com.h" 17 18 #include <ena_common_defs.h> 19 #include <ena_regs_defs.h> 20 #include <ena_admin_defs.h> 21 #include <ena_eth_io_defs.h> 22 23 #define DRV_MODULE_VER_MAJOR 2 24 #define DRV_MODULE_VER_MINOR 5 25 #define DRV_MODULE_VER_SUBMINOR 0 26 27 #define __MERGE_64B_H_L(h, l) (((uint64_t)h << 32) | l) 28 29 #define GET_L4_HDR_LEN(mbuf) \ 30 ((rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *, \ 31 mbuf->l3_len + mbuf->l2_len)->data_off) >> 4) 32 33 #define ETH_GSTRING_LEN 32 34 35 #define ARRAY_SIZE(x) RTE_DIM(x) 36 37 #define ENA_MIN_RING_DESC 128 38 39 #define ENA_PTYPE_HAS_HASH (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP) 40 41 enum ethtool_stringset { 42 ETH_SS_TEST = 0, 43 ETH_SS_STATS, 44 }; 45 46 struct ena_stats { 47 char name[ETH_GSTRING_LEN]; 48 int stat_offset; 49 }; 50 51 #define ENA_STAT_ENTRY(stat, stat_type) { \ 52 .name = #stat, \ 53 .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ 54 } 55 56 #define ENA_STAT_RX_ENTRY(stat) \ 57 ENA_STAT_ENTRY(stat, rx) 58 59 #define ENA_STAT_TX_ENTRY(stat) \ 60 ENA_STAT_ENTRY(stat, tx) 61 62 #define ENA_STAT_ENI_ENTRY(stat) \ 63 ENA_STAT_ENTRY(stat, eni) 64 65 #define ENA_STAT_GLOBAL_ENTRY(stat) \ 66 ENA_STAT_ENTRY(stat, dev) 67 68 /* Device arguments */ 69 #define ENA_DEVARG_LARGE_LLQ_HDR "large_llq_hdr" 70 71 /* 72 * Each rte_memzone should have unique name. 73 * To satisfy it, count number of allocation and add it to name. 74 */ 75 rte_atomic64_t ena_alloc_cnt; 76 77 static const struct ena_stats ena_stats_global_strings[] = { 78 ENA_STAT_GLOBAL_ENTRY(wd_expired), 79 ENA_STAT_GLOBAL_ENTRY(dev_start), 80 ENA_STAT_GLOBAL_ENTRY(dev_stop), 81 ENA_STAT_GLOBAL_ENTRY(tx_drops), 82 }; 83 84 static const struct ena_stats ena_stats_eni_strings[] = { 85 ENA_STAT_ENI_ENTRY(bw_in_allowance_exceeded), 86 ENA_STAT_ENI_ENTRY(bw_out_allowance_exceeded), 87 ENA_STAT_ENI_ENTRY(pps_allowance_exceeded), 88 ENA_STAT_ENI_ENTRY(conntrack_allowance_exceeded), 89 ENA_STAT_ENI_ENTRY(linklocal_allowance_exceeded), 90 }; 91 92 static const struct ena_stats ena_stats_tx_strings[] = { 93 ENA_STAT_TX_ENTRY(cnt), 94 ENA_STAT_TX_ENTRY(bytes), 95 ENA_STAT_TX_ENTRY(prepare_ctx_err), 96 ENA_STAT_TX_ENTRY(linearize), 97 ENA_STAT_TX_ENTRY(linearize_failed), 98 ENA_STAT_TX_ENTRY(tx_poll), 99 ENA_STAT_TX_ENTRY(doorbells), 100 ENA_STAT_TX_ENTRY(bad_req_id), 101 ENA_STAT_TX_ENTRY(available_desc), 102 ENA_STAT_TX_ENTRY(missed_tx), 103 }; 104 105 static const struct ena_stats ena_stats_rx_strings[] = { 106 ENA_STAT_RX_ENTRY(cnt), 107 ENA_STAT_RX_ENTRY(bytes), 108 ENA_STAT_RX_ENTRY(refill_partial), 109 ENA_STAT_RX_ENTRY(bad_csum), 110 ENA_STAT_RX_ENTRY(mbuf_alloc_fail), 111 ENA_STAT_RX_ENTRY(bad_desc_num), 112 ENA_STAT_RX_ENTRY(bad_req_id), 113 }; 114 115 #define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) 116 #define ENA_STATS_ARRAY_ENI ARRAY_SIZE(ena_stats_eni_strings) 117 #define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) 118 #define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) 119 120 #define QUEUE_OFFLOADS (RTE_ETH_TX_OFFLOAD_TCP_CKSUM |\ 121 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |\ 122 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |\ 123 RTE_ETH_TX_OFFLOAD_TCP_TSO) 124 #define MBUF_OFFLOADS (RTE_MBUF_F_TX_L4_MASK |\ 125 RTE_MBUF_F_TX_IP_CKSUM |\ 126 RTE_MBUF_F_TX_TCP_SEG) 127 128 /** Vendor ID used by Amazon devices */ 129 #define PCI_VENDOR_ID_AMAZON 0x1D0F 130 /** Amazon devices */ 131 #define PCI_DEVICE_ID_ENA_VF 0xEC20 132 #define PCI_DEVICE_ID_ENA_VF_RSERV0 0xEC21 133 134 #define ENA_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_L4_MASK | \ 135 RTE_MBUF_F_TX_IPV6 | \ 136 RTE_MBUF_F_TX_IPV4 | \ 137 RTE_MBUF_F_TX_IP_CKSUM | \ 138 RTE_MBUF_F_TX_TCP_SEG) 139 140 #define ENA_TX_OFFLOAD_NOTSUP_MASK \ 141 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) 142 143 /** HW specific offloads capabilities. */ 144 /* IPv4 checksum offload. */ 145 #define ENA_L3_IPV4_CSUM 0x0001 146 /* TCP/UDP checksum offload for IPv4 packets. */ 147 #define ENA_L4_IPV4_CSUM 0x0002 148 /* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */ 149 #define ENA_L4_IPV4_CSUM_PARTIAL 0x0004 150 /* TCP/UDP checksum offload for IPv6 packets. */ 151 #define ENA_L4_IPV6_CSUM 0x0008 152 /* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */ 153 #define ENA_L4_IPV6_CSUM_PARTIAL 0x0010 154 /* TSO support for IPv4 packets. */ 155 #define ENA_IPV4_TSO 0x0020 156 157 /* Device supports setting RSS hash. */ 158 #define ENA_RX_RSS_HASH 0x0040 159 160 static const struct rte_pci_id pci_id_ena_map[] = { 161 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, 162 { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) }, 163 { .device_id = 0 }, 164 }; 165 166 static struct ena_aenq_handlers aenq_handlers; 167 168 static int ena_device_init(struct ena_com_dev *ena_dev, 169 struct rte_pci_device *pdev, 170 struct ena_com_dev_get_features_ctx *get_feat_ctx, 171 bool *wd_state); 172 static int ena_dev_configure(struct rte_eth_dev *dev); 173 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 174 struct ena_tx_buffer *tx_info, 175 struct rte_mbuf *mbuf, 176 void **push_header, 177 uint16_t *header_len); 178 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf); 179 static void ena_tx_cleanup(struct ena_ring *tx_ring); 180 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 181 uint16_t nb_pkts); 182 static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 183 uint16_t nb_pkts); 184 static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 185 uint16_t nb_desc, unsigned int socket_id, 186 const struct rte_eth_txconf *tx_conf); 187 static int ena_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 188 uint16_t nb_desc, unsigned int socket_id, 189 const struct rte_eth_rxconf *rx_conf, 190 struct rte_mempool *mp); 191 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len); 192 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 193 struct ena_com_rx_buf_info *ena_bufs, 194 uint32_t descs, 195 uint16_t *next_to_clean, 196 uint8_t offset); 197 static uint16_t eth_ena_recv_pkts(void *rx_queue, 198 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 199 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 200 struct rte_mbuf *mbuf, uint16_t id); 201 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count); 202 static void ena_init_rings(struct ena_adapter *adapter, 203 bool disable_meta_caching); 204 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); 205 static int ena_start(struct rte_eth_dev *dev); 206 static int ena_stop(struct rte_eth_dev *dev); 207 static int ena_close(struct rte_eth_dev *dev); 208 static int ena_dev_reset(struct rte_eth_dev *dev); 209 static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); 210 static void ena_rx_queue_release_all(struct rte_eth_dev *dev); 211 static void ena_tx_queue_release_all(struct rte_eth_dev *dev); 212 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 213 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 214 static void ena_rx_queue_release_bufs(struct ena_ring *ring); 215 static void ena_tx_queue_release_bufs(struct ena_ring *ring); 216 static int ena_link_update(struct rte_eth_dev *dev, 217 int wait_to_complete); 218 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring); 219 static void ena_queue_stop(struct ena_ring *ring); 220 static void ena_queue_stop_all(struct rte_eth_dev *dev, 221 enum ena_ring_type ring_type); 222 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring); 223 static int ena_queue_start_all(struct rte_eth_dev *dev, 224 enum ena_ring_type ring_type); 225 static void ena_stats_restart(struct rte_eth_dev *dev); 226 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter); 227 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter); 228 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter); 229 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter); 230 static int ena_infos_get(struct rte_eth_dev *dev, 231 struct rte_eth_dev_info *dev_info); 232 static void ena_interrupt_handler_rte(void *cb_arg); 233 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg); 234 static void ena_destroy_device(struct rte_eth_dev *eth_dev); 235 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev); 236 static int ena_xstats_get_names(struct rte_eth_dev *dev, 237 struct rte_eth_xstat_name *xstats_names, 238 unsigned int n); 239 static int ena_xstats_get(struct rte_eth_dev *dev, 240 struct rte_eth_xstat *stats, 241 unsigned int n); 242 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 243 const uint64_t *ids, 244 uint64_t *values, 245 unsigned int n); 246 static int ena_process_bool_devarg(const char *key, 247 const char *value, 248 void *opaque); 249 static int ena_parse_devargs(struct ena_adapter *adapter, 250 struct rte_devargs *devargs); 251 static int ena_copy_eni_stats(struct ena_adapter *adapter); 252 static int ena_setup_rx_intr(struct rte_eth_dev *dev); 253 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 254 uint16_t queue_id); 255 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 256 uint16_t queue_id); 257 258 static const struct eth_dev_ops ena_dev_ops = { 259 .dev_configure = ena_dev_configure, 260 .dev_infos_get = ena_infos_get, 261 .rx_queue_setup = ena_rx_queue_setup, 262 .tx_queue_setup = ena_tx_queue_setup, 263 .dev_start = ena_start, 264 .dev_stop = ena_stop, 265 .link_update = ena_link_update, 266 .stats_get = ena_stats_get, 267 .xstats_get_names = ena_xstats_get_names, 268 .xstats_get = ena_xstats_get, 269 .xstats_get_by_id = ena_xstats_get_by_id, 270 .mtu_set = ena_mtu_set, 271 .rx_queue_release = ena_rx_queue_release, 272 .tx_queue_release = ena_tx_queue_release, 273 .dev_close = ena_close, 274 .dev_reset = ena_dev_reset, 275 .reta_update = ena_rss_reta_update, 276 .reta_query = ena_rss_reta_query, 277 .rx_queue_intr_enable = ena_rx_queue_intr_enable, 278 .rx_queue_intr_disable = ena_rx_queue_intr_disable, 279 .rss_hash_update = ena_rss_hash_update, 280 .rss_hash_conf_get = ena_rss_hash_conf_get, 281 }; 282 283 static inline void ena_rx_mbuf_prepare(struct rte_mbuf *mbuf, 284 struct ena_com_rx_ctx *ena_rx_ctx, 285 bool fill_hash) 286 { 287 uint64_t ol_flags = 0; 288 uint32_t packet_type = 0; 289 290 if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) 291 packet_type |= RTE_PTYPE_L4_TCP; 292 else if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP) 293 packet_type |= RTE_PTYPE_L4_UDP; 294 295 if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) { 296 packet_type |= RTE_PTYPE_L3_IPV4; 297 if (unlikely(ena_rx_ctx->l3_csum_err)) 298 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 299 else 300 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 301 } else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6) { 302 packet_type |= RTE_PTYPE_L3_IPV6; 303 } 304 305 if (!ena_rx_ctx->l4_csum_checked || ena_rx_ctx->frag) 306 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 307 else 308 if (unlikely(ena_rx_ctx->l4_csum_err)) 309 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 310 else 311 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 312 313 if (fill_hash && 314 likely((packet_type & ENA_PTYPE_HAS_HASH) && !ena_rx_ctx->frag)) { 315 ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 316 mbuf->hash.rss = ena_rx_ctx->hash; 317 } 318 319 mbuf->ol_flags = ol_flags; 320 mbuf->packet_type = packet_type; 321 } 322 323 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, 324 struct ena_com_tx_ctx *ena_tx_ctx, 325 uint64_t queue_offloads, 326 bool disable_meta_caching) 327 { 328 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; 329 330 if ((mbuf->ol_flags & MBUF_OFFLOADS) && 331 (queue_offloads & QUEUE_OFFLOADS)) { 332 /* check if TSO is required */ 333 if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) && 334 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_TSO)) { 335 ena_tx_ctx->tso_enable = true; 336 337 ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf); 338 } 339 340 /* check if L3 checksum is needed */ 341 if ((mbuf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) && 342 (queue_offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) 343 ena_tx_ctx->l3_csum_enable = true; 344 345 if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV6) { 346 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 347 } else { 348 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 349 350 /* set don't fragment (DF) flag */ 351 if (mbuf->packet_type & 352 (RTE_PTYPE_L4_NONFRAG 353 | RTE_PTYPE_INNER_L4_NONFRAG)) 354 ena_tx_ctx->df = true; 355 } 356 357 /* check if L4 checksum is needed */ 358 if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_TCP_CKSUM) && 359 (queue_offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) { 360 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 361 ena_tx_ctx->l4_csum_enable = true; 362 } else if (((mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) == 363 RTE_MBUF_F_TX_UDP_CKSUM) && 364 (queue_offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) { 365 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 366 ena_tx_ctx->l4_csum_enable = true; 367 } else { 368 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 369 ena_tx_ctx->l4_csum_enable = false; 370 } 371 372 ena_meta->mss = mbuf->tso_segsz; 373 ena_meta->l3_hdr_len = mbuf->l3_len; 374 ena_meta->l3_hdr_offset = mbuf->l2_len; 375 376 ena_tx_ctx->meta_valid = true; 377 } else if (disable_meta_caching) { 378 memset(ena_meta, 0, sizeof(*ena_meta)); 379 ena_tx_ctx->meta_valid = true; 380 } else { 381 ena_tx_ctx->meta_valid = false; 382 } 383 } 384 385 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 386 { 387 struct ena_tx_buffer *tx_info = NULL; 388 389 if (likely(req_id < tx_ring->ring_size)) { 390 tx_info = &tx_ring->tx_buffer_info[req_id]; 391 if (likely(tx_info->mbuf)) 392 return 0; 393 } 394 395 if (tx_info) 396 PMD_TX_LOG(ERR, "tx_info doesn't have valid mbuf\n"); 397 else 398 PMD_TX_LOG(ERR, "Invalid req_id: %hu\n", req_id); 399 400 /* Trigger device reset */ 401 ++tx_ring->tx_stats.bad_req_id; 402 tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; 403 tx_ring->adapter->trigger_reset = true; 404 return -EFAULT; 405 } 406 407 static void ena_config_host_info(struct ena_com_dev *ena_dev) 408 { 409 struct ena_admin_host_info *host_info; 410 int rc; 411 412 /* Allocate only the host info */ 413 rc = ena_com_allocate_host_info(ena_dev); 414 if (rc) { 415 PMD_DRV_LOG(ERR, "Cannot allocate host info\n"); 416 return; 417 } 418 419 host_info = ena_dev->host_attr.host_info; 420 421 host_info->os_type = ENA_ADMIN_OS_DPDK; 422 host_info->kernel_ver = RTE_VERSION; 423 strlcpy((char *)host_info->kernel_ver_str, rte_version(), 424 sizeof(host_info->kernel_ver_str)); 425 host_info->os_dist = RTE_VERSION; 426 strlcpy((char *)host_info->os_dist_str, rte_version(), 427 sizeof(host_info->os_dist_str)); 428 host_info->driver_version = 429 (DRV_MODULE_VER_MAJOR) | 430 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 431 (DRV_MODULE_VER_SUBMINOR << 432 ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 433 host_info->num_cpus = rte_lcore_count(); 434 435 host_info->driver_supported_features = 436 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 437 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 438 439 rc = ena_com_set_host_attributes(ena_dev); 440 if (rc) { 441 if (rc == -ENA_COM_UNSUPPORTED) 442 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 443 else 444 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 445 446 goto err; 447 } 448 449 return; 450 451 err: 452 ena_com_delete_host_info(ena_dev); 453 } 454 455 /* This function calculates the number of xstats based on the current config */ 456 static unsigned int ena_xstats_calc_num(struct rte_eth_dev_data *data) 457 { 458 return ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENI + 459 (data->nb_tx_queues * ENA_STATS_ARRAY_TX) + 460 (data->nb_rx_queues * ENA_STATS_ARRAY_RX); 461 } 462 463 static void ena_config_debug_area(struct ena_adapter *adapter) 464 { 465 u32 debug_area_size; 466 int rc, ss_count; 467 468 ss_count = ena_xstats_calc_num(adapter->edev_data); 469 470 /* allocate 32 bytes for each string and 64bit for the value */ 471 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; 472 473 rc = ena_com_allocate_debug_area(&adapter->ena_dev, debug_area_size); 474 if (rc) { 475 PMD_DRV_LOG(ERR, "Cannot allocate debug area\n"); 476 return; 477 } 478 479 rc = ena_com_set_host_attributes(&adapter->ena_dev); 480 if (rc) { 481 if (rc == -ENA_COM_UNSUPPORTED) 482 PMD_DRV_LOG(WARNING, "Cannot set host attributes\n"); 483 else 484 PMD_DRV_LOG(ERR, "Cannot set host attributes\n"); 485 486 goto err; 487 } 488 489 return; 490 err: 491 ena_com_delete_debug_area(&adapter->ena_dev); 492 } 493 494 static int ena_close(struct rte_eth_dev *dev) 495 { 496 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 497 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 498 struct ena_adapter *adapter = dev->data->dev_private; 499 int ret = 0; 500 501 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 502 return 0; 503 504 if (adapter->state == ENA_ADAPTER_STATE_RUNNING) 505 ret = ena_stop(dev); 506 adapter->state = ENA_ADAPTER_STATE_CLOSED; 507 508 ena_rx_queue_release_all(dev); 509 ena_tx_queue_release_all(dev); 510 511 rte_free(adapter->drv_stats); 512 adapter->drv_stats = NULL; 513 514 rte_intr_disable(intr_handle); 515 rte_intr_callback_unregister(intr_handle, 516 ena_interrupt_handler_rte, 517 dev); 518 519 /* 520 * MAC is not allocated dynamically. Setting NULL should prevent from 521 * release of the resource in the rte_eth_dev_release_port(). 522 */ 523 dev->data->mac_addrs = NULL; 524 525 return ret; 526 } 527 528 static int 529 ena_dev_reset(struct rte_eth_dev *dev) 530 { 531 int rc = 0; 532 533 /* Cannot release memory in secondary process */ 534 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 535 PMD_DRV_LOG(WARNING, "dev_reset not supported in secondary.\n"); 536 return -EPERM; 537 } 538 539 ena_destroy_device(dev); 540 rc = eth_ena_dev_init(dev); 541 if (rc) 542 PMD_INIT_LOG(CRIT, "Cannot initialize device\n"); 543 544 return rc; 545 } 546 547 static void ena_rx_queue_release_all(struct rte_eth_dev *dev) 548 { 549 int nb_queues = dev->data->nb_rx_queues; 550 int i; 551 552 for (i = 0; i < nb_queues; i++) 553 ena_rx_queue_release(dev, i); 554 } 555 556 static void ena_tx_queue_release_all(struct rte_eth_dev *dev) 557 { 558 int nb_queues = dev->data->nb_tx_queues; 559 int i; 560 561 for (i = 0; i < nb_queues; i++) 562 ena_tx_queue_release(dev, i); 563 } 564 565 static void ena_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 566 { 567 struct ena_ring *ring = dev->data->rx_queues[qid]; 568 569 /* Free ring resources */ 570 if (ring->rx_buffer_info) 571 rte_free(ring->rx_buffer_info); 572 ring->rx_buffer_info = NULL; 573 574 if (ring->rx_refill_buffer) 575 rte_free(ring->rx_refill_buffer); 576 ring->rx_refill_buffer = NULL; 577 578 if (ring->empty_rx_reqs) 579 rte_free(ring->empty_rx_reqs); 580 ring->empty_rx_reqs = NULL; 581 582 ring->configured = 0; 583 584 PMD_DRV_LOG(NOTICE, "Rx queue %d:%d released\n", 585 ring->port_id, ring->id); 586 } 587 588 static void ena_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 589 { 590 struct ena_ring *ring = dev->data->tx_queues[qid]; 591 592 /* Free ring resources */ 593 if (ring->push_buf_intermediate_buf) 594 rte_free(ring->push_buf_intermediate_buf); 595 596 if (ring->tx_buffer_info) 597 rte_free(ring->tx_buffer_info); 598 599 if (ring->empty_tx_reqs) 600 rte_free(ring->empty_tx_reqs); 601 602 ring->empty_tx_reqs = NULL; 603 ring->tx_buffer_info = NULL; 604 ring->push_buf_intermediate_buf = NULL; 605 606 ring->configured = 0; 607 608 PMD_DRV_LOG(NOTICE, "Tx queue %d:%d released\n", 609 ring->port_id, ring->id); 610 } 611 612 static void ena_rx_queue_release_bufs(struct ena_ring *ring) 613 { 614 unsigned int i; 615 616 for (i = 0; i < ring->ring_size; ++i) { 617 struct ena_rx_buffer *rx_info = &ring->rx_buffer_info[i]; 618 if (rx_info->mbuf) { 619 rte_mbuf_raw_free(rx_info->mbuf); 620 rx_info->mbuf = NULL; 621 } 622 } 623 } 624 625 static void ena_tx_queue_release_bufs(struct ena_ring *ring) 626 { 627 unsigned int i; 628 629 for (i = 0; i < ring->ring_size; ++i) { 630 struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i]; 631 632 if (tx_buf->mbuf) { 633 rte_pktmbuf_free(tx_buf->mbuf); 634 tx_buf->mbuf = NULL; 635 } 636 } 637 } 638 639 static int ena_link_update(struct rte_eth_dev *dev, 640 __rte_unused int wait_to_complete) 641 { 642 struct rte_eth_link *link = &dev->data->dev_link; 643 struct ena_adapter *adapter = dev->data->dev_private; 644 645 link->link_status = adapter->link_status ? RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN; 646 link->link_speed = RTE_ETH_SPEED_NUM_NONE; 647 link->link_duplex = RTE_ETH_LINK_FULL_DUPLEX; 648 649 return 0; 650 } 651 652 static int ena_queue_start_all(struct rte_eth_dev *dev, 653 enum ena_ring_type ring_type) 654 { 655 struct ena_adapter *adapter = dev->data->dev_private; 656 struct ena_ring *queues = NULL; 657 int nb_queues; 658 int i = 0; 659 int rc = 0; 660 661 if (ring_type == ENA_RING_TYPE_RX) { 662 queues = adapter->rx_ring; 663 nb_queues = dev->data->nb_rx_queues; 664 } else { 665 queues = adapter->tx_ring; 666 nb_queues = dev->data->nb_tx_queues; 667 } 668 for (i = 0; i < nb_queues; i++) { 669 if (queues[i].configured) { 670 if (ring_type == ENA_RING_TYPE_RX) { 671 ena_assert_msg( 672 dev->data->rx_queues[i] == &queues[i], 673 "Inconsistent state of Rx queues\n"); 674 } else { 675 ena_assert_msg( 676 dev->data->tx_queues[i] == &queues[i], 677 "Inconsistent state of Tx queues\n"); 678 } 679 680 rc = ena_queue_start(dev, &queues[i]); 681 682 if (rc) { 683 PMD_INIT_LOG(ERR, 684 "Failed to start queue[%d] of type(%d)\n", 685 i, ring_type); 686 goto err; 687 } 688 } 689 } 690 691 return 0; 692 693 err: 694 while (i--) 695 if (queues[i].configured) 696 ena_queue_stop(&queues[i]); 697 698 return rc; 699 } 700 701 static int ena_check_valid_conf(struct ena_adapter *adapter) 702 { 703 uint32_t mtu = adapter->edev_data->mtu; 704 705 if (mtu > adapter->max_mtu || mtu < ENA_MIN_MTU) { 706 PMD_INIT_LOG(ERR, 707 "Unsupported MTU of %d. Max MTU: %d, min MTU: %d\n", 708 mtu, adapter->max_mtu, ENA_MIN_MTU); 709 return ENA_COM_UNSUPPORTED; 710 } 711 712 return 0; 713 } 714 715 static int 716 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx, 717 bool use_large_llq_hdr) 718 { 719 struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; 720 struct ena_com_dev *ena_dev = ctx->ena_dev; 721 uint32_t max_tx_queue_size; 722 uint32_t max_rx_queue_size; 723 724 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 725 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 726 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 727 max_rx_queue_size = RTE_MIN(max_queue_ext->max_rx_cq_depth, 728 max_queue_ext->max_rx_sq_depth); 729 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 730 731 if (ena_dev->tx_mem_queue_type == 732 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 733 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 734 llq->max_llq_depth); 735 } else { 736 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 737 max_queue_ext->max_tx_sq_depth); 738 } 739 740 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 741 max_queue_ext->max_per_packet_rx_descs); 742 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 743 max_queue_ext->max_per_packet_tx_descs); 744 } else { 745 struct ena_admin_queue_feature_desc *max_queues = 746 &ctx->get_feat_ctx->max_queues; 747 max_rx_queue_size = RTE_MIN(max_queues->max_cq_depth, 748 max_queues->max_sq_depth); 749 max_tx_queue_size = max_queues->max_cq_depth; 750 751 if (ena_dev->tx_mem_queue_type == 752 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 753 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 754 llq->max_llq_depth); 755 } else { 756 max_tx_queue_size = RTE_MIN(max_tx_queue_size, 757 max_queues->max_sq_depth); 758 } 759 760 ctx->max_rx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 761 max_queues->max_packet_rx_descs); 762 ctx->max_tx_sgl_size = RTE_MIN(ENA_PKT_MAX_BUFS, 763 max_queues->max_packet_tx_descs); 764 } 765 766 /* Round down to the nearest power of 2 */ 767 max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size); 768 max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size); 769 770 if (use_large_llq_hdr) { 771 if ((llq->entry_size_ctrl_supported & 772 ENA_ADMIN_LIST_ENTRY_SIZE_256B) && 773 (ena_dev->tx_mem_queue_type == 774 ENA_ADMIN_PLACEMENT_POLICY_DEV)) { 775 max_tx_queue_size /= 2; 776 PMD_INIT_LOG(INFO, 777 "Forcing large headers and decreasing maximum Tx queue size to %d\n", 778 max_tx_queue_size); 779 } else { 780 PMD_INIT_LOG(ERR, 781 "Forcing large headers failed: LLQ is disabled or device does not support large headers\n"); 782 } 783 } 784 785 if (unlikely(max_rx_queue_size == 0 || max_tx_queue_size == 0)) { 786 PMD_INIT_LOG(ERR, "Invalid queue size\n"); 787 return -EFAULT; 788 } 789 790 ctx->max_tx_queue_size = max_tx_queue_size; 791 ctx->max_rx_queue_size = max_rx_queue_size; 792 793 return 0; 794 } 795 796 static void ena_stats_restart(struct rte_eth_dev *dev) 797 { 798 struct ena_adapter *adapter = dev->data->dev_private; 799 800 rte_atomic64_init(&adapter->drv_stats->ierrors); 801 rte_atomic64_init(&adapter->drv_stats->oerrors); 802 rte_atomic64_init(&adapter->drv_stats->rx_nombuf); 803 adapter->drv_stats->rx_drops = 0; 804 } 805 806 static int ena_stats_get(struct rte_eth_dev *dev, 807 struct rte_eth_stats *stats) 808 { 809 struct ena_admin_basic_stats ena_stats; 810 struct ena_adapter *adapter = dev->data->dev_private; 811 struct ena_com_dev *ena_dev = &adapter->ena_dev; 812 int rc; 813 int i; 814 int max_rings_stats; 815 816 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 817 return -ENOTSUP; 818 819 memset(&ena_stats, 0, sizeof(ena_stats)); 820 821 rte_spinlock_lock(&adapter->admin_lock); 822 rc = ena_com_get_dev_basic_stats(ena_dev, &ena_stats); 823 rte_spinlock_unlock(&adapter->admin_lock); 824 if (unlikely(rc)) { 825 PMD_DRV_LOG(ERR, "Could not retrieve statistics from ENA\n"); 826 return rc; 827 } 828 829 /* Set of basic statistics from ENA */ 830 stats->ipackets = __MERGE_64B_H_L(ena_stats.rx_pkts_high, 831 ena_stats.rx_pkts_low); 832 stats->opackets = __MERGE_64B_H_L(ena_stats.tx_pkts_high, 833 ena_stats.tx_pkts_low); 834 stats->ibytes = __MERGE_64B_H_L(ena_stats.rx_bytes_high, 835 ena_stats.rx_bytes_low); 836 stats->obytes = __MERGE_64B_H_L(ena_stats.tx_bytes_high, 837 ena_stats.tx_bytes_low); 838 839 /* Driver related stats */ 840 stats->imissed = adapter->drv_stats->rx_drops; 841 stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors); 842 stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors); 843 stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf); 844 845 max_rings_stats = RTE_MIN(dev->data->nb_rx_queues, 846 RTE_ETHDEV_QUEUE_STAT_CNTRS); 847 for (i = 0; i < max_rings_stats; ++i) { 848 struct ena_stats_rx *rx_stats = &adapter->rx_ring[i].rx_stats; 849 850 stats->q_ibytes[i] = rx_stats->bytes; 851 stats->q_ipackets[i] = rx_stats->cnt; 852 stats->q_errors[i] = rx_stats->bad_desc_num + 853 rx_stats->bad_req_id; 854 } 855 856 max_rings_stats = RTE_MIN(dev->data->nb_tx_queues, 857 RTE_ETHDEV_QUEUE_STAT_CNTRS); 858 for (i = 0; i < max_rings_stats; ++i) { 859 struct ena_stats_tx *tx_stats = &adapter->tx_ring[i].tx_stats; 860 861 stats->q_obytes[i] = tx_stats->bytes; 862 stats->q_opackets[i] = tx_stats->cnt; 863 } 864 865 return 0; 866 } 867 868 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 869 { 870 struct ena_adapter *adapter; 871 struct ena_com_dev *ena_dev; 872 int rc = 0; 873 874 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 875 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 876 adapter = dev->data->dev_private; 877 878 ena_dev = &adapter->ena_dev; 879 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 880 881 if (mtu > adapter->max_mtu || mtu < ENA_MIN_MTU) { 882 PMD_DRV_LOG(ERR, 883 "Invalid MTU setting. New MTU: %d, max MTU: %d, min MTU: %d\n", 884 mtu, adapter->max_mtu, ENA_MIN_MTU); 885 return -EINVAL; 886 } 887 888 rc = ena_com_set_dev_mtu(ena_dev, mtu); 889 if (rc) 890 PMD_DRV_LOG(ERR, "Could not set MTU: %d\n", mtu); 891 else 892 PMD_DRV_LOG(NOTICE, "MTU set to: %d\n", mtu); 893 894 return rc; 895 } 896 897 static int ena_start(struct rte_eth_dev *dev) 898 { 899 struct ena_adapter *adapter = dev->data->dev_private; 900 uint64_t ticks; 901 int rc = 0; 902 903 /* Cannot allocate memory in secondary process */ 904 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 905 PMD_DRV_LOG(WARNING, "dev_start not supported in secondary.\n"); 906 return -EPERM; 907 } 908 909 rc = ena_check_valid_conf(adapter); 910 if (rc) 911 return rc; 912 913 rc = ena_setup_rx_intr(dev); 914 if (rc) 915 return rc; 916 917 rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX); 918 if (rc) 919 return rc; 920 921 rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX); 922 if (rc) 923 goto err_start_tx; 924 925 if (adapter->edev_data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { 926 rc = ena_rss_configure(adapter); 927 if (rc) 928 goto err_rss_init; 929 } 930 931 ena_stats_restart(dev); 932 933 adapter->timestamp_wd = rte_get_timer_cycles(); 934 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT; 935 936 ticks = rte_get_timer_hz(); 937 rte_timer_reset(&adapter->timer_wd, ticks, PERIODICAL, rte_lcore_id(), 938 ena_timer_wd_callback, dev); 939 940 ++adapter->dev_stats.dev_start; 941 adapter->state = ENA_ADAPTER_STATE_RUNNING; 942 943 return 0; 944 945 err_rss_init: 946 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 947 err_start_tx: 948 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 949 return rc; 950 } 951 952 static int ena_stop(struct rte_eth_dev *dev) 953 { 954 struct ena_adapter *adapter = dev->data->dev_private; 955 struct ena_com_dev *ena_dev = &adapter->ena_dev; 956 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 957 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 958 int rc; 959 960 /* Cannot free memory in secondary process */ 961 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 962 PMD_DRV_LOG(WARNING, "dev_stop not supported in secondary.\n"); 963 return -EPERM; 964 } 965 966 rte_timer_stop_sync(&adapter->timer_wd); 967 ena_queue_stop_all(dev, ENA_RING_TYPE_TX); 968 ena_queue_stop_all(dev, ENA_RING_TYPE_RX); 969 970 if (adapter->trigger_reset) { 971 rc = ena_com_dev_reset(ena_dev, adapter->reset_reason); 972 if (rc) 973 PMD_DRV_LOG(ERR, "Device reset failed, rc: %d\n", rc); 974 } 975 976 rte_intr_disable(intr_handle); 977 978 rte_intr_efd_disable(intr_handle); 979 980 /* Cleanup vector list */ 981 rte_intr_vec_list_free(intr_handle); 982 983 rte_intr_enable(intr_handle); 984 985 ++adapter->dev_stats.dev_stop; 986 adapter->state = ENA_ADAPTER_STATE_STOPPED; 987 dev->data->dev_started = 0; 988 989 return 0; 990 } 991 992 static int ena_create_io_queue(struct rte_eth_dev *dev, struct ena_ring *ring) 993 { 994 struct ena_adapter *adapter = ring->adapter; 995 struct ena_com_dev *ena_dev = &adapter->ena_dev; 996 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 997 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 998 struct ena_com_create_io_ctx ctx = 999 /* policy set to _HOST just to satisfy icc compiler */ 1000 { ENA_ADMIN_PLACEMENT_POLICY_HOST, 1001 0, 0, 0, 0, 0 }; 1002 uint16_t ena_qid; 1003 unsigned int i; 1004 int rc; 1005 1006 ctx.msix_vector = -1; 1007 if (ring->type == ENA_RING_TYPE_TX) { 1008 ena_qid = ENA_IO_TXQ_IDX(ring->id); 1009 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1010 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1011 for (i = 0; i < ring->ring_size; i++) 1012 ring->empty_tx_reqs[i] = i; 1013 } else { 1014 ena_qid = ENA_IO_RXQ_IDX(ring->id); 1015 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1016 if (rte_intr_dp_is_en(intr_handle)) 1017 ctx.msix_vector = 1018 rte_intr_vec_list_index_get(intr_handle, 1019 ring->id); 1020 1021 for (i = 0; i < ring->ring_size; i++) 1022 ring->empty_rx_reqs[i] = i; 1023 } 1024 ctx.queue_size = ring->ring_size; 1025 ctx.qid = ena_qid; 1026 ctx.numa_node = ring->numa_socket_id; 1027 1028 rc = ena_com_create_io_queue(ena_dev, &ctx); 1029 if (rc) { 1030 PMD_DRV_LOG(ERR, 1031 "Failed to create IO queue[%d] (qid:%d), rc: %d\n", 1032 ring->id, ena_qid, rc); 1033 return rc; 1034 } 1035 1036 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1037 &ring->ena_com_io_sq, 1038 &ring->ena_com_io_cq); 1039 if (rc) { 1040 PMD_DRV_LOG(ERR, 1041 "Failed to get IO queue[%d] handlers, rc: %d\n", 1042 ring->id, rc); 1043 ena_com_destroy_io_queue(ena_dev, ena_qid); 1044 return rc; 1045 } 1046 1047 if (ring->type == ENA_RING_TYPE_TX) 1048 ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node); 1049 1050 /* Start with Rx interrupts being masked. */ 1051 if (ring->type == ENA_RING_TYPE_RX && rte_intr_dp_is_en(intr_handle)) 1052 ena_rx_queue_intr_disable(dev, ring->id); 1053 1054 return 0; 1055 } 1056 1057 static void ena_queue_stop(struct ena_ring *ring) 1058 { 1059 struct ena_com_dev *ena_dev = &ring->adapter->ena_dev; 1060 1061 if (ring->type == ENA_RING_TYPE_RX) { 1062 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id)); 1063 ena_rx_queue_release_bufs(ring); 1064 } else { 1065 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id)); 1066 ena_tx_queue_release_bufs(ring); 1067 } 1068 } 1069 1070 static void ena_queue_stop_all(struct rte_eth_dev *dev, 1071 enum ena_ring_type ring_type) 1072 { 1073 struct ena_adapter *adapter = dev->data->dev_private; 1074 struct ena_ring *queues = NULL; 1075 uint16_t nb_queues, i; 1076 1077 if (ring_type == ENA_RING_TYPE_RX) { 1078 queues = adapter->rx_ring; 1079 nb_queues = dev->data->nb_rx_queues; 1080 } else { 1081 queues = adapter->tx_ring; 1082 nb_queues = dev->data->nb_tx_queues; 1083 } 1084 1085 for (i = 0; i < nb_queues; ++i) 1086 if (queues[i].configured) 1087 ena_queue_stop(&queues[i]); 1088 } 1089 1090 static int ena_queue_start(struct rte_eth_dev *dev, struct ena_ring *ring) 1091 { 1092 int rc, bufs_num; 1093 1094 ena_assert_msg(ring->configured == 1, 1095 "Trying to start unconfigured queue\n"); 1096 1097 rc = ena_create_io_queue(dev, ring); 1098 if (rc) { 1099 PMD_INIT_LOG(ERR, "Failed to create IO queue\n"); 1100 return rc; 1101 } 1102 1103 ring->next_to_clean = 0; 1104 ring->next_to_use = 0; 1105 1106 if (ring->type == ENA_RING_TYPE_TX) { 1107 ring->tx_stats.available_desc = 1108 ena_com_free_q_entries(ring->ena_com_io_sq); 1109 return 0; 1110 } 1111 1112 bufs_num = ring->ring_size - 1; 1113 rc = ena_populate_rx_queue(ring, bufs_num); 1114 if (rc != bufs_num) { 1115 ena_com_destroy_io_queue(&ring->adapter->ena_dev, 1116 ENA_IO_RXQ_IDX(ring->id)); 1117 PMD_INIT_LOG(ERR, "Failed to populate Rx ring\n"); 1118 return ENA_COM_FAULT; 1119 } 1120 /* Flush per-core RX buffers pools cache as they can be used on other 1121 * cores as well. 1122 */ 1123 rte_mempool_cache_flush(NULL, ring->mb_pool); 1124 1125 return 0; 1126 } 1127 1128 static int ena_tx_queue_setup(struct rte_eth_dev *dev, 1129 uint16_t queue_idx, 1130 uint16_t nb_desc, 1131 unsigned int socket_id, 1132 const struct rte_eth_txconf *tx_conf) 1133 { 1134 struct ena_ring *txq = NULL; 1135 struct ena_adapter *adapter = dev->data->dev_private; 1136 unsigned int i; 1137 uint16_t dyn_thresh; 1138 1139 txq = &adapter->tx_ring[queue_idx]; 1140 1141 if (txq->configured) { 1142 PMD_DRV_LOG(CRIT, 1143 "API violation. Queue[%d] is already configured\n", 1144 queue_idx); 1145 return ENA_COM_FAULT; 1146 } 1147 1148 if (!rte_is_power_of_2(nb_desc)) { 1149 PMD_DRV_LOG(ERR, 1150 "Unsupported size of Tx queue: %d is not a power of 2.\n", 1151 nb_desc); 1152 return -EINVAL; 1153 } 1154 1155 if (nb_desc > adapter->max_tx_ring_size) { 1156 PMD_DRV_LOG(ERR, 1157 "Unsupported size of Tx queue (max size: %d)\n", 1158 adapter->max_tx_ring_size); 1159 return -EINVAL; 1160 } 1161 1162 txq->port_id = dev->data->port_id; 1163 txq->next_to_clean = 0; 1164 txq->next_to_use = 0; 1165 txq->ring_size = nb_desc; 1166 txq->size_mask = nb_desc - 1; 1167 txq->numa_socket_id = socket_id; 1168 txq->pkts_without_db = false; 1169 txq->last_cleanup_ticks = 0; 1170 1171 txq->tx_buffer_info = rte_zmalloc_socket("txq->tx_buffer_info", 1172 sizeof(struct ena_tx_buffer) * txq->ring_size, 1173 RTE_CACHE_LINE_SIZE, 1174 socket_id); 1175 if (!txq->tx_buffer_info) { 1176 PMD_DRV_LOG(ERR, 1177 "Failed to allocate memory for Tx buffer info\n"); 1178 return -ENOMEM; 1179 } 1180 1181 txq->empty_tx_reqs = rte_zmalloc_socket("txq->empty_tx_reqs", 1182 sizeof(uint16_t) * txq->ring_size, 1183 RTE_CACHE_LINE_SIZE, 1184 socket_id); 1185 if (!txq->empty_tx_reqs) { 1186 PMD_DRV_LOG(ERR, 1187 "Failed to allocate memory for empty Tx requests\n"); 1188 rte_free(txq->tx_buffer_info); 1189 return -ENOMEM; 1190 } 1191 1192 txq->push_buf_intermediate_buf = 1193 rte_zmalloc_socket("txq->push_buf_intermediate_buf", 1194 txq->tx_max_header_size, 1195 RTE_CACHE_LINE_SIZE, 1196 socket_id); 1197 if (!txq->push_buf_intermediate_buf) { 1198 PMD_DRV_LOG(ERR, "Failed to alloc push buffer for LLQ\n"); 1199 rte_free(txq->tx_buffer_info); 1200 rte_free(txq->empty_tx_reqs); 1201 return -ENOMEM; 1202 } 1203 1204 for (i = 0; i < txq->ring_size; i++) 1205 txq->empty_tx_reqs[i] = i; 1206 1207 txq->offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads; 1208 1209 /* Check if caller provided the Tx cleanup threshold value. */ 1210 if (tx_conf->tx_free_thresh != 0) { 1211 txq->tx_free_thresh = tx_conf->tx_free_thresh; 1212 } else { 1213 dyn_thresh = txq->ring_size - 1214 txq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1215 txq->tx_free_thresh = RTE_MAX(dyn_thresh, 1216 txq->ring_size - ENA_REFILL_THRESH_PACKET); 1217 } 1218 1219 txq->missing_tx_completion_threshold = 1220 RTE_MIN(txq->ring_size / 2, ENA_DEFAULT_MISSING_COMP); 1221 1222 /* Store pointer to this queue in upper layer */ 1223 txq->configured = 1; 1224 dev->data->tx_queues[queue_idx] = txq; 1225 1226 return 0; 1227 } 1228 1229 static int ena_rx_queue_setup(struct rte_eth_dev *dev, 1230 uint16_t queue_idx, 1231 uint16_t nb_desc, 1232 unsigned int socket_id, 1233 const struct rte_eth_rxconf *rx_conf, 1234 struct rte_mempool *mp) 1235 { 1236 struct ena_adapter *adapter = dev->data->dev_private; 1237 struct ena_ring *rxq = NULL; 1238 size_t buffer_size; 1239 int i; 1240 uint16_t dyn_thresh; 1241 1242 rxq = &adapter->rx_ring[queue_idx]; 1243 if (rxq->configured) { 1244 PMD_DRV_LOG(CRIT, 1245 "API violation. Queue[%d] is already configured\n", 1246 queue_idx); 1247 return ENA_COM_FAULT; 1248 } 1249 1250 if (!rte_is_power_of_2(nb_desc)) { 1251 PMD_DRV_LOG(ERR, 1252 "Unsupported size of Rx queue: %d is not a power of 2.\n", 1253 nb_desc); 1254 return -EINVAL; 1255 } 1256 1257 if (nb_desc > adapter->max_rx_ring_size) { 1258 PMD_DRV_LOG(ERR, 1259 "Unsupported size of Rx queue (max size: %d)\n", 1260 adapter->max_rx_ring_size); 1261 return -EINVAL; 1262 } 1263 1264 /* ENA isn't supporting buffers smaller than 1400 bytes */ 1265 buffer_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 1266 if (buffer_size < ENA_RX_BUF_MIN_SIZE) { 1267 PMD_DRV_LOG(ERR, 1268 "Unsupported size of Rx buffer: %zu (min size: %d)\n", 1269 buffer_size, ENA_RX_BUF_MIN_SIZE); 1270 return -EINVAL; 1271 } 1272 1273 rxq->port_id = dev->data->port_id; 1274 rxq->next_to_clean = 0; 1275 rxq->next_to_use = 0; 1276 rxq->ring_size = nb_desc; 1277 rxq->size_mask = nb_desc - 1; 1278 rxq->numa_socket_id = socket_id; 1279 rxq->mb_pool = mp; 1280 1281 rxq->rx_buffer_info = rte_zmalloc_socket("rxq->buffer_info", 1282 sizeof(struct ena_rx_buffer) * nb_desc, 1283 RTE_CACHE_LINE_SIZE, 1284 socket_id); 1285 if (!rxq->rx_buffer_info) { 1286 PMD_DRV_LOG(ERR, 1287 "Failed to allocate memory for Rx buffer info\n"); 1288 return -ENOMEM; 1289 } 1290 1291 rxq->rx_refill_buffer = rte_zmalloc_socket("rxq->rx_refill_buffer", 1292 sizeof(struct rte_mbuf *) * nb_desc, 1293 RTE_CACHE_LINE_SIZE, 1294 socket_id); 1295 if (!rxq->rx_refill_buffer) { 1296 PMD_DRV_LOG(ERR, 1297 "Failed to allocate memory for Rx refill buffer\n"); 1298 rte_free(rxq->rx_buffer_info); 1299 rxq->rx_buffer_info = NULL; 1300 return -ENOMEM; 1301 } 1302 1303 rxq->empty_rx_reqs = rte_zmalloc_socket("rxq->empty_rx_reqs", 1304 sizeof(uint16_t) * nb_desc, 1305 RTE_CACHE_LINE_SIZE, 1306 socket_id); 1307 if (!rxq->empty_rx_reqs) { 1308 PMD_DRV_LOG(ERR, 1309 "Failed to allocate memory for empty Rx requests\n"); 1310 rte_free(rxq->rx_buffer_info); 1311 rxq->rx_buffer_info = NULL; 1312 rte_free(rxq->rx_refill_buffer); 1313 rxq->rx_refill_buffer = NULL; 1314 return -ENOMEM; 1315 } 1316 1317 for (i = 0; i < nb_desc; i++) 1318 rxq->empty_rx_reqs[i] = i; 1319 1320 rxq->offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; 1321 1322 if (rx_conf->rx_free_thresh != 0) { 1323 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 1324 } else { 1325 dyn_thresh = rxq->ring_size / ENA_REFILL_THRESH_DIVIDER; 1326 rxq->rx_free_thresh = RTE_MIN(dyn_thresh, 1327 (uint16_t)(ENA_REFILL_THRESH_PACKET)); 1328 } 1329 1330 /* Store pointer to this queue in upper layer */ 1331 rxq->configured = 1; 1332 dev->data->rx_queues[queue_idx] = rxq; 1333 1334 return 0; 1335 } 1336 1337 static int ena_add_single_rx_desc(struct ena_com_io_sq *io_sq, 1338 struct rte_mbuf *mbuf, uint16_t id) 1339 { 1340 struct ena_com_buf ebuf; 1341 int rc; 1342 1343 /* prepare physical address for DMA transaction */ 1344 ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM; 1345 ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM; 1346 1347 /* pass resource to device */ 1348 rc = ena_com_add_single_rx_desc(io_sq, &ebuf, id); 1349 if (unlikely(rc != 0)) 1350 PMD_RX_LOG(WARNING, "Failed adding Rx desc\n"); 1351 1352 return rc; 1353 } 1354 1355 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) 1356 { 1357 unsigned int i; 1358 int rc; 1359 uint16_t next_to_use = rxq->next_to_use; 1360 uint16_t req_id; 1361 #ifdef RTE_ETHDEV_DEBUG_RX 1362 uint16_t in_use; 1363 #endif 1364 struct rte_mbuf **mbufs = rxq->rx_refill_buffer; 1365 1366 if (unlikely(!count)) 1367 return 0; 1368 1369 #ifdef RTE_ETHDEV_DEBUG_RX 1370 in_use = rxq->ring_size - 1 - 1371 ena_com_free_q_entries(rxq->ena_com_io_sq); 1372 if (unlikely((in_use + count) >= rxq->ring_size)) 1373 PMD_RX_LOG(ERR, "Bad Rx ring state\n"); 1374 #endif 1375 1376 /* get resources for incoming packets */ 1377 rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count); 1378 if (unlikely(rc < 0)) { 1379 rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf); 1380 ++rxq->rx_stats.mbuf_alloc_fail; 1381 PMD_RX_LOG(DEBUG, "There are not enough free buffers\n"); 1382 return 0; 1383 } 1384 1385 for (i = 0; i < count; i++) { 1386 struct rte_mbuf *mbuf = mbufs[i]; 1387 struct ena_rx_buffer *rx_info; 1388 1389 if (likely((i + 4) < count)) 1390 rte_prefetch0(mbufs[i + 4]); 1391 1392 req_id = rxq->empty_rx_reqs[next_to_use]; 1393 rx_info = &rxq->rx_buffer_info[req_id]; 1394 1395 rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id); 1396 if (unlikely(rc != 0)) 1397 break; 1398 1399 rx_info->mbuf = mbuf; 1400 next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, rxq->size_mask); 1401 } 1402 1403 if (unlikely(i < count)) { 1404 PMD_RX_LOG(WARNING, 1405 "Refilled Rx queue[%d] with only %d/%d buffers\n", 1406 rxq->id, i, count); 1407 rte_pktmbuf_free_bulk(&mbufs[i], count - i); 1408 ++rxq->rx_stats.refill_partial; 1409 } 1410 1411 /* When we submitted free recources to device... */ 1412 if (likely(i > 0)) { 1413 /* ...let HW know that it can fill buffers with data. */ 1414 ena_com_write_sq_doorbell(rxq->ena_com_io_sq); 1415 1416 rxq->next_to_use = next_to_use; 1417 } 1418 1419 return i; 1420 } 1421 1422 static int ena_device_init(struct ena_com_dev *ena_dev, 1423 struct rte_pci_device *pdev, 1424 struct ena_com_dev_get_features_ctx *get_feat_ctx, 1425 bool *wd_state) 1426 { 1427 uint32_t aenq_groups; 1428 int rc; 1429 bool readless_supported; 1430 1431 /* Initialize mmio registers */ 1432 rc = ena_com_mmio_reg_read_request_init(ena_dev); 1433 if (rc) { 1434 PMD_DRV_LOG(ERR, "Failed to init MMIO read less\n"); 1435 return rc; 1436 } 1437 1438 /* The PCIe configuration space revision id indicate if mmio reg 1439 * read is disabled. 1440 */ 1441 readless_supported = !(pdev->id.class_id & ENA_MMIO_DISABLE_REG_READ); 1442 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 1443 1444 /* reset device */ 1445 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 1446 if (rc) { 1447 PMD_DRV_LOG(ERR, "Cannot reset device\n"); 1448 goto err_mmio_read_less; 1449 } 1450 1451 /* check FW version */ 1452 rc = ena_com_validate_version(ena_dev); 1453 if (rc) { 1454 PMD_DRV_LOG(ERR, "Device version is too low\n"); 1455 goto err_mmio_read_less; 1456 } 1457 1458 ena_dev->dma_addr_bits = ena_com_get_dma_width(ena_dev); 1459 1460 /* ENA device administration layer init */ 1461 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 1462 if (rc) { 1463 PMD_DRV_LOG(ERR, 1464 "Cannot initialize ENA admin queue\n"); 1465 goto err_mmio_read_less; 1466 } 1467 1468 /* To enable the msix interrupts the driver needs to know the number 1469 * of queues. So the driver uses polling mode to retrieve this 1470 * information. 1471 */ 1472 ena_com_set_admin_polling_mode(ena_dev, true); 1473 1474 ena_config_host_info(ena_dev); 1475 1476 /* Get Device Attributes and features */ 1477 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 1478 if (rc) { 1479 PMD_DRV_LOG(ERR, 1480 "Cannot get attribute for ENA device, rc: %d\n", rc); 1481 goto err_admin_init; 1482 } 1483 1484 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 1485 BIT(ENA_ADMIN_NOTIFICATION) | 1486 BIT(ENA_ADMIN_KEEP_ALIVE) | 1487 BIT(ENA_ADMIN_FATAL_ERROR) | 1488 BIT(ENA_ADMIN_WARNING); 1489 1490 aenq_groups &= get_feat_ctx->aenq.supported_groups; 1491 rc = ena_com_set_aenq_config(ena_dev, aenq_groups); 1492 if (rc) { 1493 PMD_DRV_LOG(ERR, "Cannot configure AENQ groups, rc: %d\n", rc); 1494 goto err_admin_init; 1495 } 1496 1497 *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)); 1498 1499 return 0; 1500 1501 err_admin_init: 1502 ena_com_admin_destroy(ena_dev); 1503 1504 err_mmio_read_less: 1505 ena_com_mmio_reg_read_request_destroy(ena_dev); 1506 1507 return rc; 1508 } 1509 1510 static void ena_interrupt_handler_rte(void *cb_arg) 1511 { 1512 struct rte_eth_dev *dev = cb_arg; 1513 struct ena_adapter *adapter = dev->data->dev_private; 1514 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1515 1516 ena_com_admin_q_comp_intr_handler(ena_dev); 1517 if (likely(adapter->state != ENA_ADAPTER_STATE_CLOSED)) 1518 ena_com_aenq_intr_handler(ena_dev, dev); 1519 } 1520 1521 static void check_for_missing_keep_alive(struct ena_adapter *adapter) 1522 { 1523 if (!adapter->wd_state) 1524 return; 1525 1526 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 1527 return; 1528 1529 if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >= 1530 adapter->keep_alive_timeout)) { 1531 PMD_DRV_LOG(ERR, "Keep alive timeout\n"); 1532 adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO; 1533 adapter->trigger_reset = true; 1534 ++adapter->dev_stats.wd_expired; 1535 } 1536 } 1537 1538 /* Check if admin queue is enabled */ 1539 static void check_for_admin_com_state(struct ena_adapter *adapter) 1540 { 1541 if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) { 1542 PMD_DRV_LOG(ERR, "ENA admin queue is not in running state\n"); 1543 adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO; 1544 adapter->trigger_reset = true; 1545 } 1546 } 1547 1548 static int check_for_tx_completion_in_queue(struct ena_adapter *adapter, 1549 struct ena_ring *tx_ring) 1550 { 1551 struct ena_tx_buffer *tx_buf; 1552 uint64_t timestamp; 1553 uint64_t completion_delay; 1554 uint32_t missed_tx = 0; 1555 unsigned int i; 1556 int rc = 0; 1557 1558 for (i = 0; i < tx_ring->ring_size; ++i) { 1559 tx_buf = &tx_ring->tx_buffer_info[i]; 1560 timestamp = tx_buf->timestamp; 1561 1562 if (timestamp == 0) 1563 continue; 1564 1565 completion_delay = rte_get_timer_cycles() - timestamp; 1566 if (completion_delay > adapter->missing_tx_completion_to) { 1567 if (unlikely(!tx_buf->print_once)) { 1568 PMD_TX_LOG(WARNING, 1569 "Found a Tx that wasn't completed on time, qid %d, index %d. " 1570 "Missing Tx outstanding for %" PRIu64 " msecs.\n", 1571 tx_ring->id, i, completion_delay / 1572 rte_get_timer_hz() * 1000); 1573 tx_buf->print_once = true; 1574 } 1575 ++missed_tx; 1576 } 1577 } 1578 1579 if (unlikely(missed_tx > tx_ring->missing_tx_completion_threshold)) { 1580 PMD_DRV_LOG(ERR, 1581 "The number of lost Tx completions is above the threshold (%d > %d). " 1582 "Trigger the device reset.\n", 1583 missed_tx, 1584 tx_ring->missing_tx_completion_threshold); 1585 adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL; 1586 adapter->trigger_reset = true; 1587 rc = -EIO; 1588 } 1589 1590 tx_ring->tx_stats.missed_tx += missed_tx; 1591 1592 return rc; 1593 } 1594 1595 static void check_for_tx_completions(struct ena_adapter *adapter) 1596 { 1597 struct ena_ring *tx_ring; 1598 uint64_t tx_cleanup_delay; 1599 size_t qid; 1600 int budget; 1601 uint16_t nb_tx_queues = adapter->edev_data->nb_tx_queues; 1602 1603 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) 1604 return; 1605 1606 nb_tx_queues = adapter->edev_data->nb_tx_queues; 1607 budget = adapter->missing_tx_completion_budget; 1608 1609 qid = adapter->last_tx_comp_qid; 1610 while (budget-- > 0) { 1611 tx_ring = &adapter->tx_ring[qid]; 1612 1613 /* Tx cleanup is called only by the burst function and can be 1614 * called dynamically by the application. Also cleanup is 1615 * limited by the threshold. To avoid false detection of the 1616 * missing HW Tx completion, get the delay since last cleanup 1617 * function was called. 1618 */ 1619 tx_cleanup_delay = rte_get_timer_cycles() - 1620 tx_ring->last_cleanup_ticks; 1621 if (tx_cleanup_delay < adapter->tx_cleanup_stall_delay) 1622 check_for_tx_completion_in_queue(adapter, tx_ring); 1623 qid = (qid + 1) % nb_tx_queues; 1624 } 1625 1626 adapter->last_tx_comp_qid = qid; 1627 } 1628 1629 static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer, 1630 void *arg) 1631 { 1632 struct rte_eth_dev *dev = arg; 1633 struct ena_adapter *adapter = dev->data->dev_private; 1634 1635 check_for_missing_keep_alive(adapter); 1636 check_for_admin_com_state(adapter); 1637 check_for_tx_completions(adapter); 1638 1639 if (unlikely(adapter->trigger_reset)) { 1640 PMD_DRV_LOG(ERR, "Trigger reset is on\n"); 1641 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, 1642 NULL); 1643 } 1644 } 1645 1646 static inline void 1647 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 1648 struct ena_admin_feature_llq_desc *llq, 1649 bool use_large_llq_hdr) 1650 { 1651 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 1652 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 1653 llq_config->llq_num_decs_before_header = 1654 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 1655 1656 if (use_large_llq_hdr && 1657 (llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B)) { 1658 llq_config->llq_ring_entry_size = 1659 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 1660 llq_config->llq_ring_entry_size_value = 256; 1661 } else { 1662 llq_config->llq_ring_entry_size = 1663 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 1664 llq_config->llq_ring_entry_size_value = 128; 1665 } 1666 } 1667 1668 static int 1669 ena_set_queues_placement_policy(struct ena_adapter *adapter, 1670 struct ena_com_dev *ena_dev, 1671 struct ena_admin_feature_llq_desc *llq, 1672 struct ena_llq_configurations *llq_default_configurations) 1673 { 1674 int rc; 1675 u32 llq_feature_mask; 1676 1677 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 1678 if (!(ena_dev->supported_features & llq_feature_mask)) { 1679 PMD_DRV_LOG(INFO, 1680 "LLQ is not supported. Fallback to host mode policy.\n"); 1681 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1682 return 0; 1683 } 1684 1685 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 1686 if (unlikely(rc)) { 1687 PMD_INIT_LOG(WARNING, 1688 "Failed to config dev mode. Fallback to host mode policy.\n"); 1689 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1690 return 0; 1691 } 1692 1693 /* Nothing to config, exit */ 1694 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 1695 return 0; 1696 1697 if (!adapter->dev_mem_base) { 1698 PMD_DRV_LOG(ERR, 1699 "Unable to access LLQ BAR resource. Fallback to host mode policy.\n"); 1700 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1701 return 0; 1702 } 1703 1704 ena_dev->mem_bar = adapter->dev_mem_base; 1705 1706 return 0; 1707 } 1708 1709 static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev, 1710 struct ena_com_dev_get_features_ctx *get_feat_ctx) 1711 { 1712 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 1713 1714 /* Regular queues capabilities */ 1715 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 1716 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 1717 &get_feat_ctx->max_queue_ext.max_queue_ext; 1718 io_rx_num = RTE_MIN(max_queue_ext->max_rx_sq_num, 1719 max_queue_ext->max_rx_cq_num); 1720 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 1721 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 1722 } else { 1723 struct ena_admin_queue_feature_desc *max_queues = 1724 &get_feat_ctx->max_queues; 1725 io_tx_sq_num = max_queues->max_sq_num; 1726 io_tx_cq_num = max_queues->max_cq_num; 1727 io_rx_num = RTE_MIN(io_tx_sq_num, io_tx_cq_num); 1728 } 1729 1730 /* In case of LLQ use the llq number in the get feature cmd */ 1731 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 1732 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 1733 1734 max_num_io_queues = RTE_MIN(ENA_MAX_NUM_IO_QUEUES, io_rx_num); 1735 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_sq_num); 1736 max_num_io_queues = RTE_MIN(max_num_io_queues, io_tx_cq_num); 1737 1738 if (unlikely(max_num_io_queues == 0)) { 1739 PMD_DRV_LOG(ERR, "Number of IO queues cannot not be 0\n"); 1740 return -EFAULT; 1741 } 1742 1743 return max_num_io_queues; 1744 } 1745 1746 static void 1747 ena_set_offloads(struct ena_offloads *offloads, 1748 struct ena_admin_feature_offload_desc *offload_desc) 1749 { 1750 if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) 1751 offloads->tx_offloads |= ENA_IPV4_TSO; 1752 1753 /* Tx IPv4 checksum offloads */ 1754 if (offload_desc->tx & 1755 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) 1756 offloads->tx_offloads |= ENA_L3_IPV4_CSUM; 1757 if (offload_desc->tx & 1758 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK) 1759 offloads->tx_offloads |= ENA_L4_IPV4_CSUM; 1760 if (offload_desc->tx & 1761 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) 1762 offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL; 1763 1764 /* Tx IPv6 checksum offloads */ 1765 if (offload_desc->tx & 1766 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK) 1767 offloads->tx_offloads |= ENA_L4_IPV6_CSUM; 1768 if (offload_desc->tx & 1769 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) 1770 offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL; 1771 1772 /* Rx IPv4 checksum offloads */ 1773 if (offload_desc->rx_supported & 1774 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK) 1775 offloads->rx_offloads |= ENA_L3_IPV4_CSUM; 1776 if (offload_desc->rx_supported & 1777 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) 1778 offloads->rx_offloads |= ENA_L4_IPV4_CSUM; 1779 1780 /* Rx IPv6 checksum offloads */ 1781 if (offload_desc->rx_supported & 1782 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) 1783 offloads->rx_offloads |= ENA_L4_IPV6_CSUM; 1784 1785 if (offload_desc->rx_supported & 1786 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK) 1787 offloads->rx_offloads |= ENA_RX_RSS_HASH; 1788 } 1789 1790 static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) 1791 { 1792 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 1793 struct rte_pci_device *pci_dev; 1794 struct rte_intr_handle *intr_handle; 1795 struct ena_adapter *adapter = eth_dev->data->dev_private; 1796 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1797 struct ena_com_dev_get_features_ctx get_feat_ctx; 1798 struct ena_llq_configurations llq_config; 1799 const char *queue_type_str; 1800 uint32_t max_num_io_queues; 1801 int rc; 1802 static int adapters_found; 1803 bool disable_meta_caching; 1804 bool wd_state = false; 1805 1806 eth_dev->dev_ops = &ena_dev_ops; 1807 eth_dev->rx_pkt_burst = ð_ena_recv_pkts; 1808 eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; 1809 eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; 1810 1811 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1812 return 0; 1813 1814 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 1815 1816 memset(adapter, 0, sizeof(struct ena_adapter)); 1817 ena_dev = &adapter->ena_dev; 1818 1819 adapter->edev_data = eth_dev->data; 1820 1821 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 1822 1823 PMD_INIT_LOG(INFO, "Initializing %x:%x:%x.%d\n", 1824 pci_dev->addr.domain, 1825 pci_dev->addr.bus, 1826 pci_dev->addr.devid, 1827 pci_dev->addr.function); 1828 1829 intr_handle = pci_dev->intr_handle; 1830 1831 adapter->regs = pci_dev->mem_resource[ENA_REGS_BAR].addr; 1832 adapter->dev_mem_base = pci_dev->mem_resource[ENA_MEM_BAR].addr; 1833 1834 if (!adapter->regs) { 1835 PMD_INIT_LOG(CRIT, "Failed to access registers BAR(%d)\n", 1836 ENA_REGS_BAR); 1837 return -ENXIO; 1838 } 1839 1840 ena_dev->reg_bar = adapter->regs; 1841 /* This is a dummy pointer for ena_com functions. */ 1842 ena_dev->dmadev = adapter; 1843 1844 adapter->id_number = adapters_found; 1845 1846 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", 1847 adapter->id_number); 1848 1849 rc = ena_parse_devargs(adapter, pci_dev->device.devargs); 1850 if (rc != 0) { 1851 PMD_INIT_LOG(CRIT, "Failed to parse devargs\n"); 1852 goto err; 1853 } 1854 1855 /* device specific initialization routine */ 1856 rc = ena_device_init(ena_dev, pci_dev, &get_feat_ctx, &wd_state); 1857 if (rc) { 1858 PMD_INIT_LOG(CRIT, "Failed to init ENA device\n"); 1859 goto err; 1860 } 1861 adapter->wd_state = wd_state; 1862 1863 set_default_llq_configurations(&llq_config, &get_feat_ctx.llq, 1864 adapter->use_large_llq_hdr); 1865 rc = ena_set_queues_placement_policy(adapter, ena_dev, 1866 &get_feat_ctx.llq, &llq_config); 1867 if (unlikely(rc)) { 1868 PMD_INIT_LOG(CRIT, "Failed to set placement policy\n"); 1869 return rc; 1870 } 1871 1872 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) 1873 queue_type_str = "Regular"; 1874 else 1875 queue_type_str = "Low latency"; 1876 PMD_DRV_LOG(INFO, "Placement policy: %s\n", queue_type_str); 1877 1878 calc_queue_ctx.ena_dev = ena_dev; 1879 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 1880 1881 max_num_io_queues = ena_calc_max_io_queue_num(ena_dev, &get_feat_ctx); 1882 rc = ena_calc_io_queue_size(&calc_queue_ctx, 1883 adapter->use_large_llq_hdr); 1884 if (unlikely((rc != 0) || (max_num_io_queues == 0))) { 1885 rc = -EFAULT; 1886 goto err_device_destroy; 1887 } 1888 1889 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 1890 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 1891 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 1892 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 1893 adapter->max_num_io_queues = max_num_io_queues; 1894 1895 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 1896 disable_meta_caching = 1897 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags & 1898 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 1899 } else { 1900 disable_meta_caching = false; 1901 } 1902 1903 /* prepare ring structures */ 1904 ena_init_rings(adapter, disable_meta_caching); 1905 1906 ena_config_debug_area(adapter); 1907 1908 /* Set max MTU for this device */ 1909 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 1910 1911 ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload); 1912 1913 /* Copy MAC address and point DPDK to it */ 1914 eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr; 1915 rte_ether_addr_copy((struct rte_ether_addr *) 1916 get_feat_ctx.dev_attr.mac_addr, 1917 (struct rte_ether_addr *)adapter->mac_addr); 1918 1919 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); 1920 if (unlikely(rc != 0)) { 1921 PMD_DRV_LOG(ERR, "Failed to initialize RSS in ENA device\n"); 1922 goto err_delete_debug_area; 1923 } 1924 1925 adapter->drv_stats = rte_zmalloc("adapter stats", 1926 sizeof(*adapter->drv_stats), 1927 RTE_CACHE_LINE_SIZE); 1928 if (!adapter->drv_stats) { 1929 PMD_DRV_LOG(ERR, 1930 "Failed to allocate memory for adapter statistics\n"); 1931 rc = -ENOMEM; 1932 goto err_rss_destroy; 1933 } 1934 1935 rte_spinlock_init(&adapter->admin_lock); 1936 1937 rte_intr_callback_register(intr_handle, 1938 ena_interrupt_handler_rte, 1939 eth_dev); 1940 rte_intr_enable(intr_handle); 1941 ena_com_set_admin_polling_mode(ena_dev, false); 1942 ena_com_admin_aenq_enable(ena_dev); 1943 1944 if (adapters_found == 0) 1945 rte_timer_subsystem_init(); 1946 rte_timer_init(&adapter->timer_wd); 1947 1948 adapters_found++; 1949 adapter->state = ENA_ADAPTER_STATE_INIT; 1950 1951 return 0; 1952 1953 err_rss_destroy: 1954 ena_com_rss_destroy(ena_dev); 1955 err_delete_debug_area: 1956 ena_com_delete_debug_area(ena_dev); 1957 1958 err_device_destroy: 1959 ena_com_delete_host_info(ena_dev); 1960 ena_com_admin_destroy(ena_dev); 1961 1962 err: 1963 return rc; 1964 } 1965 1966 static void ena_destroy_device(struct rte_eth_dev *eth_dev) 1967 { 1968 struct ena_adapter *adapter = eth_dev->data->dev_private; 1969 struct ena_com_dev *ena_dev = &adapter->ena_dev; 1970 1971 if (adapter->state == ENA_ADAPTER_STATE_FREE) 1972 return; 1973 1974 ena_com_set_admin_running_state(ena_dev, false); 1975 1976 if (adapter->state != ENA_ADAPTER_STATE_CLOSED) 1977 ena_close(eth_dev); 1978 1979 ena_com_rss_destroy(ena_dev); 1980 1981 ena_com_delete_debug_area(ena_dev); 1982 ena_com_delete_host_info(ena_dev); 1983 1984 ena_com_abort_admin_commands(ena_dev); 1985 ena_com_wait_for_abort_completion(ena_dev); 1986 ena_com_admin_destroy(ena_dev); 1987 ena_com_mmio_reg_read_request_destroy(ena_dev); 1988 1989 adapter->state = ENA_ADAPTER_STATE_FREE; 1990 } 1991 1992 static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) 1993 { 1994 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1995 return 0; 1996 1997 ena_destroy_device(eth_dev); 1998 1999 return 0; 2000 } 2001 2002 static int ena_dev_configure(struct rte_eth_dev *dev) 2003 { 2004 struct ena_adapter *adapter = dev->data->dev_private; 2005 2006 adapter->state = ENA_ADAPTER_STATE_CONFIG; 2007 2008 if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 2009 dev->data->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2010 dev->data->dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2011 2012 /* Scattered Rx cannot be turned off in the HW, so this capability must 2013 * be forced. 2014 */ 2015 dev->data->scattered_rx = 1; 2016 2017 adapter->last_tx_comp_qid = 0; 2018 2019 adapter->missing_tx_completion_budget = 2020 RTE_MIN(ENA_MONITORED_TX_QUEUES, dev->data->nb_tx_queues); 2021 2022 adapter->missing_tx_completion_to = ENA_TX_TIMEOUT; 2023 /* To avoid detection of the spurious Tx completion timeout due to 2024 * application not calling the Tx cleanup function, set timeout for the 2025 * Tx queue which should be half of the missing completion timeout for a 2026 * safety. If there will be a lot of missing Tx completions in the 2027 * queue, they will be detected sooner or later. 2028 */ 2029 adapter->tx_cleanup_stall_delay = adapter->missing_tx_completion_to / 2; 2030 2031 adapter->tx_selected_offloads = dev->data->dev_conf.txmode.offloads; 2032 adapter->rx_selected_offloads = dev->data->dev_conf.rxmode.offloads; 2033 2034 return 0; 2035 } 2036 2037 static void ena_init_rings(struct ena_adapter *adapter, 2038 bool disable_meta_caching) 2039 { 2040 size_t i; 2041 2042 for (i = 0; i < adapter->max_num_io_queues; i++) { 2043 struct ena_ring *ring = &adapter->tx_ring[i]; 2044 2045 ring->configured = 0; 2046 ring->type = ENA_RING_TYPE_TX; 2047 ring->adapter = adapter; 2048 ring->id = i; 2049 ring->tx_mem_queue_type = adapter->ena_dev.tx_mem_queue_type; 2050 ring->tx_max_header_size = adapter->ena_dev.tx_max_header_size; 2051 ring->sgl_size = adapter->max_tx_sgl_size; 2052 ring->disable_meta_caching = disable_meta_caching; 2053 } 2054 2055 for (i = 0; i < adapter->max_num_io_queues; i++) { 2056 struct ena_ring *ring = &adapter->rx_ring[i]; 2057 2058 ring->configured = 0; 2059 ring->type = ENA_RING_TYPE_RX; 2060 ring->adapter = adapter; 2061 ring->id = i; 2062 ring->sgl_size = adapter->max_rx_sgl_size; 2063 } 2064 } 2065 2066 static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter) 2067 { 2068 uint64_t port_offloads = 0; 2069 2070 if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM) 2071 port_offloads |= RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; 2072 2073 if (adapter->offloads.rx_offloads & 2074 (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM)) 2075 port_offloads |= 2076 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM; 2077 2078 if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH) 2079 port_offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 2080 2081 port_offloads |= RTE_ETH_RX_OFFLOAD_SCATTER; 2082 2083 return port_offloads; 2084 } 2085 2086 static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter) 2087 { 2088 uint64_t port_offloads = 0; 2089 2090 if (adapter->offloads.tx_offloads & ENA_IPV4_TSO) 2091 port_offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 2092 2093 if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM) 2094 port_offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM; 2095 if (adapter->offloads.tx_offloads & 2096 (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM | 2097 ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL)) 2098 port_offloads |= 2099 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM; 2100 2101 port_offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; 2102 2103 return port_offloads; 2104 } 2105 2106 static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter) 2107 { 2108 RTE_SET_USED(adapter); 2109 2110 return 0; 2111 } 2112 2113 static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter) 2114 { 2115 RTE_SET_USED(adapter); 2116 2117 return 0; 2118 } 2119 2120 static int ena_infos_get(struct rte_eth_dev *dev, 2121 struct rte_eth_dev_info *dev_info) 2122 { 2123 struct ena_adapter *adapter; 2124 struct ena_com_dev *ena_dev; 2125 2126 ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); 2127 ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); 2128 adapter = dev->data->dev_private; 2129 2130 ena_dev = &adapter->ena_dev; 2131 ena_assert_msg(ena_dev != NULL, "Uninitialized device\n"); 2132 2133 dev_info->speed_capa = 2134 RTE_ETH_LINK_SPEED_1G | 2135 RTE_ETH_LINK_SPEED_2_5G | 2136 RTE_ETH_LINK_SPEED_5G | 2137 RTE_ETH_LINK_SPEED_10G | 2138 RTE_ETH_LINK_SPEED_25G | 2139 RTE_ETH_LINK_SPEED_40G | 2140 RTE_ETH_LINK_SPEED_50G | 2141 RTE_ETH_LINK_SPEED_100G; 2142 2143 /* Inform framework about available features */ 2144 dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter); 2145 dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter); 2146 dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter); 2147 dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter); 2148 2149 dev_info->flow_type_rss_offloads = ENA_ALL_RSS_HF; 2150 dev_info->hash_key_size = ENA_HASH_KEY_SIZE; 2151 2152 dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN; 2153 dev_info->max_rx_pktlen = adapter->max_mtu + RTE_ETHER_HDR_LEN + 2154 RTE_ETHER_CRC_LEN; 2155 dev_info->min_mtu = ENA_MIN_MTU; 2156 dev_info->max_mtu = adapter->max_mtu; 2157 dev_info->max_mac_addrs = 1; 2158 2159 dev_info->max_rx_queues = adapter->max_num_io_queues; 2160 dev_info->max_tx_queues = adapter->max_num_io_queues; 2161 dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE; 2162 2163 dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size; 2164 dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2165 dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2166 adapter->max_rx_sgl_size); 2167 dev_info->rx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2168 adapter->max_rx_sgl_size); 2169 2170 dev_info->tx_desc_lim.nb_max = adapter->max_tx_ring_size; 2171 dev_info->tx_desc_lim.nb_min = ENA_MIN_RING_DESC; 2172 dev_info->tx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2173 adapter->max_tx_sgl_size); 2174 dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, 2175 adapter->max_tx_sgl_size); 2176 2177 dev_info->default_rxportconf.ring_size = ENA_DEFAULT_RING_SIZE; 2178 dev_info->default_txportconf.ring_size = ENA_DEFAULT_RING_SIZE; 2179 2180 return 0; 2181 } 2182 2183 static inline void ena_init_rx_mbuf(struct rte_mbuf *mbuf, uint16_t len) 2184 { 2185 mbuf->data_len = len; 2186 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 2187 mbuf->refcnt = 1; 2188 mbuf->next = NULL; 2189 } 2190 2191 static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, 2192 struct ena_com_rx_buf_info *ena_bufs, 2193 uint32_t descs, 2194 uint16_t *next_to_clean, 2195 uint8_t offset) 2196 { 2197 struct rte_mbuf *mbuf; 2198 struct rte_mbuf *mbuf_head; 2199 struct ena_rx_buffer *rx_info; 2200 int rc; 2201 uint16_t ntc, len, req_id, buf = 0; 2202 2203 if (unlikely(descs == 0)) 2204 return NULL; 2205 2206 ntc = *next_to_clean; 2207 2208 len = ena_bufs[buf].len; 2209 req_id = ena_bufs[buf].req_id; 2210 2211 rx_info = &rx_ring->rx_buffer_info[req_id]; 2212 2213 mbuf = rx_info->mbuf; 2214 RTE_ASSERT(mbuf != NULL); 2215 2216 ena_init_rx_mbuf(mbuf, len); 2217 2218 /* Fill the mbuf head with the data specific for 1st segment. */ 2219 mbuf_head = mbuf; 2220 mbuf_head->nb_segs = descs; 2221 mbuf_head->port = rx_ring->port_id; 2222 mbuf_head->pkt_len = len; 2223 mbuf_head->data_off += offset; 2224 2225 rx_info->mbuf = NULL; 2226 rx_ring->empty_rx_reqs[ntc] = req_id; 2227 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2228 2229 while (--descs) { 2230 ++buf; 2231 len = ena_bufs[buf].len; 2232 req_id = ena_bufs[buf].req_id; 2233 2234 rx_info = &rx_ring->rx_buffer_info[req_id]; 2235 RTE_ASSERT(rx_info->mbuf != NULL); 2236 2237 if (unlikely(len == 0)) { 2238 /* 2239 * Some devices can pass descriptor with the length 0. 2240 * To avoid confusion, the PMD is simply putting the 2241 * descriptor back, as it was never used. We'll avoid 2242 * mbuf allocation that way. 2243 */ 2244 rc = ena_add_single_rx_desc(rx_ring->ena_com_io_sq, 2245 rx_info->mbuf, req_id); 2246 if (unlikely(rc != 0)) { 2247 /* Free the mbuf in case of an error. */ 2248 rte_mbuf_raw_free(rx_info->mbuf); 2249 } else { 2250 /* 2251 * If there was no error, just exit the loop as 2252 * 0 length descriptor is always the last one. 2253 */ 2254 break; 2255 } 2256 } else { 2257 /* Create an mbuf chain. */ 2258 mbuf->next = rx_info->mbuf; 2259 mbuf = mbuf->next; 2260 2261 ena_init_rx_mbuf(mbuf, len); 2262 mbuf_head->pkt_len += len; 2263 } 2264 2265 /* 2266 * Mark the descriptor as depleted and perform necessary 2267 * cleanup. 2268 * This code will execute in two cases: 2269 * 1. Descriptor len was greater than 0 - normal situation. 2270 * 2. Descriptor len was 0 and we failed to add the descriptor 2271 * to the device. In that situation, we should try to add 2272 * the mbuf again in the populate routine and mark the 2273 * descriptor as used up by the device. 2274 */ 2275 rx_info->mbuf = NULL; 2276 rx_ring->empty_rx_reqs[ntc] = req_id; 2277 ntc = ENA_IDX_NEXT_MASKED(ntc, rx_ring->size_mask); 2278 } 2279 2280 *next_to_clean = ntc; 2281 2282 return mbuf_head; 2283 } 2284 2285 static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 2286 uint16_t nb_pkts) 2287 { 2288 struct ena_ring *rx_ring = (struct ena_ring *)(rx_queue); 2289 unsigned int free_queue_entries; 2290 uint16_t next_to_clean = rx_ring->next_to_clean; 2291 uint16_t descs_in_use; 2292 struct rte_mbuf *mbuf; 2293 uint16_t completed; 2294 struct ena_com_rx_ctx ena_rx_ctx; 2295 int i, rc = 0; 2296 bool fill_hash; 2297 2298 #ifdef RTE_ETHDEV_DEBUG_RX 2299 /* Check adapter state */ 2300 if (unlikely(rx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 2301 PMD_RX_LOG(ALERT, 2302 "Trying to receive pkts while device is NOT running\n"); 2303 return 0; 2304 } 2305 #endif 2306 2307 fill_hash = rx_ring->offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH; 2308 2309 descs_in_use = rx_ring->ring_size - 2310 ena_com_free_q_entries(rx_ring->ena_com_io_sq) - 1; 2311 nb_pkts = RTE_MIN(descs_in_use, nb_pkts); 2312 2313 for (completed = 0; completed < nb_pkts; completed++) { 2314 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 2315 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 2316 ena_rx_ctx.descs = 0; 2317 ena_rx_ctx.pkt_offset = 0; 2318 /* receive packet context */ 2319 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, 2320 rx_ring->ena_com_io_sq, 2321 &ena_rx_ctx); 2322 if (unlikely(rc)) { 2323 PMD_RX_LOG(ERR, 2324 "Failed to get the packet from the device, rc: %d\n", 2325 rc); 2326 if (rc == ENA_COM_NO_SPACE) { 2327 ++rx_ring->rx_stats.bad_desc_num; 2328 rx_ring->adapter->reset_reason = 2329 ENA_REGS_RESET_TOO_MANY_RX_DESCS; 2330 } else { 2331 ++rx_ring->rx_stats.bad_req_id; 2332 rx_ring->adapter->reset_reason = 2333 ENA_REGS_RESET_INV_RX_REQ_ID; 2334 } 2335 rx_ring->adapter->trigger_reset = true; 2336 return 0; 2337 } 2338 2339 mbuf = ena_rx_mbuf(rx_ring, 2340 ena_rx_ctx.ena_bufs, 2341 ena_rx_ctx.descs, 2342 &next_to_clean, 2343 ena_rx_ctx.pkt_offset); 2344 if (unlikely(mbuf == NULL)) { 2345 for (i = 0; i < ena_rx_ctx.descs; ++i) { 2346 rx_ring->empty_rx_reqs[next_to_clean] = 2347 rx_ring->ena_bufs[i].req_id; 2348 next_to_clean = ENA_IDX_NEXT_MASKED( 2349 next_to_clean, rx_ring->size_mask); 2350 } 2351 break; 2352 } 2353 2354 /* fill mbuf attributes if any */ 2355 ena_rx_mbuf_prepare(mbuf, &ena_rx_ctx, fill_hash); 2356 2357 if (unlikely(mbuf->ol_flags & 2358 (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD))) { 2359 rte_atomic64_inc(&rx_ring->adapter->drv_stats->ierrors); 2360 ++rx_ring->rx_stats.bad_csum; 2361 } 2362 2363 rx_pkts[completed] = mbuf; 2364 rx_ring->rx_stats.bytes += mbuf->pkt_len; 2365 } 2366 2367 rx_ring->rx_stats.cnt += completed; 2368 rx_ring->next_to_clean = next_to_clean; 2369 2370 free_queue_entries = ena_com_free_q_entries(rx_ring->ena_com_io_sq); 2371 2372 /* Burst refill to save doorbells, memory barriers, const interval */ 2373 if (free_queue_entries >= rx_ring->rx_free_thresh) { 2374 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); 2375 ena_populate_rx_queue(rx_ring, free_queue_entries); 2376 } 2377 2378 return completed; 2379 } 2380 2381 static uint16_t 2382 eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 2383 uint16_t nb_pkts) 2384 { 2385 int32_t ret; 2386 uint32_t i; 2387 struct rte_mbuf *m; 2388 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 2389 struct ena_adapter *adapter = tx_ring->adapter; 2390 struct rte_ipv4_hdr *ip_hdr; 2391 uint64_t ol_flags; 2392 uint64_t l4_csum_flag; 2393 uint64_t dev_offload_capa; 2394 uint16_t frag_field; 2395 bool need_pseudo_csum; 2396 2397 dev_offload_capa = adapter->offloads.tx_offloads; 2398 for (i = 0; i != nb_pkts; i++) { 2399 m = tx_pkts[i]; 2400 ol_flags = m->ol_flags; 2401 2402 /* Check if any offload flag was set */ 2403 if (ol_flags == 0) 2404 continue; 2405 2406 l4_csum_flag = ol_flags & RTE_MBUF_F_TX_L4_MASK; 2407 /* SCTP checksum offload is not supported by the ENA. */ 2408 if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) || 2409 l4_csum_flag == RTE_MBUF_F_TX_SCTP_CKSUM) { 2410 PMD_TX_LOG(DEBUG, 2411 "mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64 "\n", 2412 i, ol_flags); 2413 rte_errno = ENOTSUP; 2414 return i; 2415 } 2416 2417 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 2418 /* Check if requested offload is also enabled for the queue */ 2419 if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2420 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) || 2421 (l4_csum_flag == RTE_MBUF_F_TX_TCP_CKSUM && 2422 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) || 2423 (l4_csum_flag == RTE_MBUF_F_TX_UDP_CKSUM && 2424 !(tx_ring->offloads & RTE_ETH_TX_OFFLOAD_UDP_CKSUM))) { 2425 PMD_TX_LOG(DEBUG, 2426 "mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]\n", 2427 i, m->nb_segs, tx_ring->id); 2428 rte_errno = EINVAL; 2429 return i; 2430 } 2431 2432 /* The caller is obligated to set l2 and l3 len if any cksum 2433 * offload is enabled. 2434 */ 2435 if (unlikely(ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK) && 2436 (m->l2_len == 0 || m->l3_len == 0))) { 2437 PMD_TX_LOG(DEBUG, 2438 "mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested\n", 2439 i); 2440 rte_errno = EINVAL; 2441 return i; 2442 } 2443 ret = rte_validate_tx_offload(m); 2444 if (ret != 0) { 2445 rte_errno = -ret; 2446 return i; 2447 } 2448 #endif 2449 2450 /* Verify HW support for requested offloads and determine if 2451 * pseudo header checksum is needed. 2452 */ 2453 need_pseudo_csum = false; 2454 if (ol_flags & RTE_MBUF_F_TX_IPV4) { 2455 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM && 2456 !(dev_offload_capa & ENA_L3_IPV4_CSUM)) { 2457 rte_errno = ENOTSUP; 2458 return i; 2459 } 2460 2461 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG && 2462 !(dev_offload_capa & ENA_IPV4_TSO)) { 2463 rte_errno = ENOTSUP; 2464 return i; 2465 } 2466 2467 /* Check HW capabilities and if pseudo csum is needed 2468 * for L4 offloads. 2469 */ 2470 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2471 !(dev_offload_capa & ENA_L4_IPV4_CSUM)) { 2472 if (dev_offload_capa & 2473 ENA_L4_IPV4_CSUM_PARTIAL) { 2474 need_pseudo_csum = true; 2475 } else { 2476 rte_errno = ENOTSUP; 2477 return i; 2478 } 2479 } 2480 2481 /* Parse the DF flag */ 2482 ip_hdr = rte_pktmbuf_mtod_offset(m, 2483 struct rte_ipv4_hdr *, m->l2_len); 2484 frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset); 2485 if (frag_field & RTE_IPV4_HDR_DF_FLAG) { 2486 m->packet_type |= RTE_PTYPE_L4_NONFRAG; 2487 } else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2488 /* In case we are supposed to TSO and have DF 2489 * not set (DF=0) hardware must be provided with 2490 * partial checksum. 2491 */ 2492 need_pseudo_csum = true; 2493 } 2494 } else if (ol_flags & RTE_MBUF_F_TX_IPV6) { 2495 /* There is no support for IPv6 TSO as for now. */ 2496 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) { 2497 rte_errno = ENOTSUP; 2498 return i; 2499 } 2500 2501 /* Check HW capabilities and if pseudo csum is needed */ 2502 if (l4_csum_flag != RTE_MBUF_F_TX_L4_NO_CKSUM && 2503 !(dev_offload_capa & ENA_L4_IPV6_CSUM)) { 2504 if (dev_offload_capa & 2505 ENA_L4_IPV6_CSUM_PARTIAL) { 2506 need_pseudo_csum = true; 2507 } else { 2508 rte_errno = ENOTSUP; 2509 return i; 2510 } 2511 } 2512 } 2513 2514 if (need_pseudo_csum) { 2515 ret = rte_net_intel_cksum_flags_prepare(m, ol_flags); 2516 if (ret != 0) { 2517 rte_errno = -ret; 2518 return i; 2519 } 2520 } 2521 } 2522 2523 return i; 2524 } 2525 2526 static void ena_update_hints(struct ena_adapter *adapter, 2527 struct ena_admin_ena_hw_hints *hints) 2528 { 2529 if (hints->admin_completion_tx_timeout) 2530 adapter->ena_dev.admin_queue.completion_timeout = 2531 hints->admin_completion_tx_timeout * 1000; 2532 2533 if (hints->mmio_read_timeout) 2534 /* convert to usec */ 2535 adapter->ena_dev.mmio_read.reg_read_to = 2536 hints->mmio_read_timeout * 1000; 2537 2538 if (hints->missing_tx_completion_timeout) { 2539 if (hints->missing_tx_completion_timeout == 2540 ENA_HW_HINTS_NO_TIMEOUT) { 2541 adapter->missing_tx_completion_to = 2542 ENA_HW_HINTS_NO_TIMEOUT; 2543 } else { 2544 /* Convert from msecs to ticks */ 2545 adapter->missing_tx_completion_to = rte_get_timer_hz() * 2546 hints->missing_tx_completion_timeout / 1000; 2547 adapter->tx_cleanup_stall_delay = 2548 adapter->missing_tx_completion_to / 2; 2549 } 2550 } 2551 2552 if (hints->driver_watchdog_timeout) { 2553 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 2554 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 2555 else 2556 // Convert msecs to ticks 2557 adapter->keep_alive_timeout = 2558 (hints->driver_watchdog_timeout * 2559 rte_get_timer_hz()) / 1000; 2560 } 2561 } 2562 2563 static int ena_check_space_and_linearize_mbuf(struct ena_ring *tx_ring, 2564 struct rte_mbuf *mbuf) 2565 { 2566 struct ena_com_dev *ena_dev; 2567 int num_segments, header_len, rc; 2568 2569 ena_dev = &tx_ring->adapter->ena_dev; 2570 num_segments = mbuf->nb_segs; 2571 header_len = mbuf->data_len; 2572 2573 if (likely(num_segments < tx_ring->sgl_size)) 2574 goto checkspace; 2575 2576 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && 2577 (num_segments == tx_ring->sgl_size) && 2578 (header_len < tx_ring->tx_max_header_size)) 2579 goto checkspace; 2580 2581 /* Checking for space for 2 additional metadata descriptors due to 2582 * possible header split and metadata descriptor. Linearization will 2583 * be needed so we reduce the segments number from num_segments to 1 2584 */ 2585 if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 3)) { 2586 PMD_TX_LOG(DEBUG, "Not enough space in the Tx queue\n"); 2587 return ENA_COM_NO_MEM; 2588 } 2589 ++tx_ring->tx_stats.linearize; 2590 rc = rte_pktmbuf_linearize(mbuf); 2591 if (unlikely(rc)) { 2592 PMD_TX_LOG(WARNING, "Mbuf linearize failed\n"); 2593 rte_atomic64_inc(&tx_ring->adapter->drv_stats->ierrors); 2594 ++tx_ring->tx_stats.linearize_failed; 2595 return rc; 2596 } 2597 2598 return 0; 2599 2600 checkspace: 2601 /* Checking for space for 2 additional metadata descriptors due to 2602 * possible header split and metadata descriptor 2603 */ 2604 if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 2605 num_segments + 2)) { 2606 PMD_TX_LOG(DEBUG, "Not enough space in the Tx queue\n"); 2607 return ENA_COM_NO_MEM; 2608 } 2609 2610 return 0; 2611 } 2612 2613 static void ena_tx_map_mbuf(struct ena_ring *tx_ring, 2614 struct ena_tx_buffer *tx_info, 2615 struct rte_mbuf *mbuf, 2616 void **push_header, 2617 uint16_t *header_len) 2618 { 2619 struct ena_com_buf *ena_buf; 2620 uint16_t delta, seg_len, push_len; 2621 2622 delta = 0; 2623 seg_len = mbuf->data_len; 2624 2625 tx_info->mbuf = mbuf; 2626 ena_buf = tx_info->bufs; 2627 2628 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2629 /* 2630 * Tx header might be (and will be in most cases) smaller than 2631 * tx_max_header_size. But it's not an issue to send more data 2632 * to the device, than actually needed if the mbuf size is 2633 * greater than tx_max_header_size. 2634 */ 2635 push_len = RTE_MIN(mbuf->pkt_len, tx_ring->tx_max_header_size); 2636 *header_len = push_len; 2637 2638 if (likely(push_len <= seg_len)) { 2639 /* If the push header is in the single segment, then 2640 * just point it to the 1st mbuf data. 2641 */ 2642 *push_header = rte_pktmbuf_mtod(mbuf, uint8_t *); 2643 } else { 2644 /* If the push header lays in the several segments, copy 2645 * it to the intermediate buffer. 2646 */ 2647 rte_pktmbuf_read(mbuf, 0, push_len, 2648 tx_ring->push_buf_intermediate_buf); 2649 *push_header = tx_ring->push_buf_intermediate_buf; 2650 delta = push_len - seg_len; 2651 } 2652 } else { 2653 *push_header = NULL; 2654 *header_len = 0; 2655 push_len = 0; 2656 } 2657 2658 /* Process first segment taking into consideration pushed header */ 2659 if (seg_len > push_len) { 2660 ena_buf->paddr = mbuf->buf_iova + 2661 mbuf->data_off + 2662 push_len; 2663 ena_buf->len = seg_len - push_len; 2664 ena_buf++; 2665 tx_info->num_of_bufs++; 2666 } 2667 2668 while ((mbuf = mbuf->next) != NULL) { 2669 seg_len = mbuf->data_len; 2670 2671 /* Skip mbufs if whole data is pushed as a header */ 2672 if (unlikely(delta > seg_len)) { 2673 delta -= seg_len; 2674 continue; 2675 } 2676 2677 ena_buf->paddr = mbuf->buf_iova + mbuf->data_off + delta; 2678 ena_buf->len = seg_len - delta; 2679 ena_buf++; 2680 tx_info->num_of_bufs++; 2681 2682 delta = 0; 2683 } 2684 } 2685 2686 static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) 2687 { 2688 struct ena_tx_buffer *tx_info; 2689 struct ena_com_tx_ctx ena_tx_ctx = { { 0 } }; 2690 uint16_t next_to_use; 2691 uint16_t header_len; 2692 uint16_t req_id; 2693 void *push_header; 2694 int nb_hw_desc; 2695 int rc; 2696 2697 rc = ena_check_space_and_linearize_mbuf(tx_ring, mbuf); 2698 if (unlikely(rc)) 2699 return rc; 2700 2701 next_to_use = tx_ring->next_to_use; 2702 2703 req_id = tx_ring->empty_tx_reqs[next_to_use]; 2704 tx_info = &tx_ring->tx_buffer_info[req_id]; 2705 tx_info->num_of_bufs = 0; 2706 2707 ena_tx_map_mbuf(tx_ring, tx_info, mbuf, &push_header, &header_len); 2708 2709 ena_tx_ctx.ena_bufs = tx_info->bufs; 2710 ena_tx_ctx.push_header = push_header; 2711 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 2712 ena_tx_ctx.req_id = req_id; 2713 ena_tx_ctx.header_len = header_len; 2714 2715 /* Set Tx offloads flags, if applicable */ 2716 ena_tx_mbuf_prepare(mbuf, &ena_tx_ctx, tx_ring->offloads, 2717 tx_ring->disable_meta_caching); 2718 2719 if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, 2720 &ena_tx_ctx))) { 2721 PMD_TX_LOG(DEBUG, 2722 "LLQ Tx max burst size of queue %d achieved, writing doorbell to send burst\n", 2723 tx_ring->id); 2724 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 2725 tx_ring->tx_stats.doorbells++; 2726 tx_ring->pkts_without_db = false; 2727 } 2728 2729 /* prepare the packet's descriptors to dma engine */ 2730 rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, 2731 &nb_hw_desc); 2732 if (unlikely(rc)) { 2733 PMD_DRV_LOG(ERR, "Failed to prepare Tx buffers, rc: %d\n", rc); 2734 ++tx_ring->tx_stats.prepare_ctx_err; 2735 tx_ring->adapter->reset_reason = 2736 ENA_REGS_RESET_DRIVER_INVALID_STATE; 2737 tx_ring->adapter->trigger_reset = true; 2738 return rc; 2739 } 2740 2741 tx_info->tx_descs = nb_hw_desc; 2742 tx_info->timestamp = rte_get_timer_cycles(); 2743 2744 tx_ring->tx_stats.cnt++; 2745 tx_ring->tx_stats.bytes += mbuf->pkt_len; 2746 2747 tx_ring->next_to_use = ENA_IDX_NEXT_MASKED(next_to_use, 2748 tx_ring->size_mask); 2749 2750 return 0; 2751 } 2752 2753 static void ena_tx_cleanup(struct ena_ring *tx_ring) 2754 { 2755 unsigned int total_tx_descs = 0; 2756 uint16_t cleanup_budget; 2757 uint16_t next_to_clean = tx_ring->next_to_clean; 2758 2759 /* Attempt to release all Tx descriptors (ring_size - 1 -> size_mask) */ 2760 cleanup_budget = tx_ring->size_mask; 2761 2762 while (likely(total_tx_descs < cleanup_budget)) { 2763 struct rte_mbuf *mbuf; 2764 struct ena_tx_buffer *tx_info; 2765 uint16_t req_id; 2766 2767 if (ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id) != 0) 2768 break; 2769 2770 if (unlikely(validate_tx_req_id(tx_ring, req_id) != 0)) 2771 break; 2772 2773 /* Get Tx info & store how many descs were processed */ 2774 tx_info = &tx_ring->tx_buffer_info[req_id]; 2775 tx_info->timestamp = 0; 2776 2777 mbuf = tx_info->mbuf; 2778 rte_pktmbuf_free(mbuf); 2779 2780 tx_info->mbuf = NULL; 2781 tx_ring->empty_tx_reqs[next_to_clean] = req_id; 2782 2783 total_tx_descs += tx_info->tx_descs; 2784 2785 /* Put back descriptor to the ring for reuse */ 2786 next_to_clean = ENA_IDX_NEXT_MASKED(next_to_clean, 2787 tx_ring->size_mask); 2788 } 2789 2790 if (likely(total_tx_descs > 0)) { 2791 /* acknowledge completion of sent packets */ 2792 tx_ring->next_to_clean = next_to_clean; 2793 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs); 2794 ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); 2795 } 2796 2797 /* Notify completion handler that the cleanup was just called */ 2798 tx_ring->last_cleanup_ticks = rte_get_timer_cycles(); 2799 } 2800 2801 static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 2802 uint16_t nb_pkts) 2803 { 2804 struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); 2805 int available_desc; 2806 uint16_t sent_idx = 0; 2807 2808 #ifdef RTE_ETHDEV_DEBUG_TX 2809 /* Check adapter state */ 2810 if (unlikely(tx_ring->adapter->state != ENA_ADAPTER_STATE_RUNNING)) { 2811 PMD_TX_LOG(ALERT, 2812 "Trying to xmit pkts while device is NOT running\n"); 2813 return 0; 2814 } 2815 #endif 2816 2817 for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) { 2818 if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx])) 2819 break; 2820 tx_ring->pkts_without_db = true; 2821 rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4, 2822 tx_ring->size_mask)]); 2823 } 2824 2825 available_desc = ena_com_free_q_entries(tx_ring->ena_com_io_sq); 2826 tx_ring->tx_stats.available_desc = available_desc; 2827 2828 /* If there are ready packets to be xmitted... */ 2829 if (likely(tx_ring->pkts_without_db)) { 2830 /* ...let HW do its best :-) */ 2831 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 2832 tx_ring->tx_stats.doorbells++; 2833 tx_ring->pkts_without_db = false; 2834 } 2835 2836 if (available_desc < tx_ring->tx_free_thresh) 2837 ena_tx_cleanup(tx_ring); 2838 2839 tx_ring->tx_stats.available_desc = 2840 ena_com_free_q_entries(tx_ring->ena_com_io_sq); 2841 tx_ring->tx_stats.tx_poll++; 2842 2843 return sent_idx; 2844 } 2845 2846 int ena_copy_eni_stats(struct ena_adapter *adapter) 2847 { 2848 struct ena_admin_eni_stats admin_eni_stats; 2849 int rc; 2850 2851 rte_spinlock_lock(&adapter->admin_lock); 2852 rc = ena_com_get_eni_stats(&adapter->ena_dev, &admin_eni_stats); 2853 rte_spinlock_unlock(&adapter->admin_lock); 2854 if (rc != 0) { 2855 if (rc == ENA_COM_UNSUPPORTED) { 2856 PMD_DRV_LOG(DEBUG, 2857 "Retrieving ENI metrics is not supported\n"); 2858 } else { 2859 PMD_DRV_LOG(WARNING, 2860 "Failed to get ENI metrics, rc: %d\n", rc); 2861 } 2862 return rc; 2863 } 2864 2865 rte_memcpy(&adapter->eni_stats, &admin_eni_stats, 2866 sizeof(struct ena_stats_eni)); 2867 2868 return 0; 2869 } 2870 2871 /** 2872 * DPDK callback to retrieve names of extended device statistics 2873 * 2874 * @param dev 2875 * Pointer to Ethernet device structure. 2876 * @param[out] xstats_names 2877 * Buffer to insert names into. 2878 * @param n 2879 * Number of names. 2880 * 2881 * @return 2882 * Number of xstats names. 2883 */ 2884 static int ena_xstats_get_names(struct rte_eth_dev *dev, 2885 struct rte_eth_xstat_name *xstats_names, 2886 unsigned int n) 2887 { 2888 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 2889 unsigned int stat, i, count = 0; 2890 2891 if (n < xstats_count || !xstats_names) 2892 return xstats_count; 2893 2894 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) 2895 strcpy(xstats_names[count].name, 2896 ena_stats_global_strings[stat].name); 2897 2898 for (stat = 0; stat < ENA_STATS_ARRAY_ENI; stat++, count++) 2899 strcpy(xstats_names[count].name, 2900 ena_stats_eni_strings[stat].name); 2901 2902 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) 2903 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) 2904 snprintf(xstats_names[count].name, 2905 sizeof(xstats_names[count].name), 2906 "rx_q%d_%s", i, 2907 ena_stats_rx_strings[stat].name); 2908 2909 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) 2910 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) 2911 snprintf(xstats_names[count].name, 2912 sizeof(xstats_names[count].name), 2913 "tx_q%d_%s", i, 2914 ena_stats_tx_strings[stat].name); 2915 2916 return xstats_count; 2917 } 2918 2919 /** 2920 * DPDK callback to get extended device statistics. 2921 * 2922 * @param dev 2923 * Pointer to Ethernet device structure. 2924 * @param[out] stats 2925 * Stats table output buffer. 2926 * @param n 2927 * The size of the stats table. 2928 * 2929 * @return 2930 * Number of xstats on success, negative on failure. 2931 */ 2932 static int ena_xstats_get(struct rte_eth_dev *dev, 2933 struct rte_eth_xstat *xstats, 2934 unsigned int n) 2935 { 2936 struct ena_adapter *adapter = dev->data->dev_private; 2937 unsigned int xstats_count = ena_xstats_calc_num(dev->data); 2938 unsigned int stat, i, count = 0; 2939 int stat_offset; 2940 void *stats_begin; 2941 2942 if (n < xstats_count) 2943 return xstats_count; 2944 2945 if (!xstats) 2946 return 0; 2947 2948 for (stat = 0; stat < ENA_STATS_ARRAY_GLOBAL; stat++, count++) { 2949 stat_offset = ena_stats_global_strings[stat].stat_offset; 2950 stats_begin = &adapter->dev_stats; 2951 2952 xstats[count].id = count; 2953 xstats[count].value = *((uint64_t *) 2954 ((char *)stats_begin + stat_offset)); 2955 } 2956 2957 /* Even if the function below fails, we should copy previous (or initial 2958 * values) to keep structure of rte_eth_xstat consistent. 2959 */ 2960 ena_copy_eni_stats(adapter); 2961 for (stat = 0; stat < ENA_STATS_ARRAY_ENI; stat++, count++) { 2962 stat_offset = ena_stats_eni_strings[stat].stat_offset; 2963 stats_begin = &adapter->eni_stats; 2964 2965 xstats[count].id = count; 2966 xstats[count].value = *((uint64_t *) 2967 ((char *)stats_begin + stat_offset)); 2968 } 2969 2970 for (stat = 0; stat < ENA_STATS_ARRAY_RX; stat++) { 2971 for (i = 0; i < dev->data->nb_rx_queues; i++, count++) { 2972 stat_offset = ena_stats_rx_strings[stat].stat_offset; 2973 stats_begin = &adapter->rx_ring[i].rx_stats; 2974 2975 xstats[count].id = count; 2976 xstats[count].value = *((uint64_t *) 2977 ((char *)stats_begin + stat_offset)); 2978 } 2979 } 2980 2981 for (stat = 0; stat < ENA_STATS_ARRAY_TX; stat++) { 2982 for (i = 0; i < dev->data->nb_tx_queues; i++, count++) { 2983 stat_offset = ena_stats_tx_strings[stat].stat_offset; 2984 stats_begin = &adapter->tx_ring[i].rx_stats; 2985 2986 xstats[count].id = count; 2987 xstats[count].value = *((uint64_t *) 2988 ((char *)stats_begin + stat_offset)); 2989 } 2990 } 2991 2992 return count; 2993 } 2994 2995 static int ena_xstats_get_by_id(struct rte_eth_dev *dev, 2996 const uint64_t *ids, 2997 uint64_t *values, 2998 unsigned int n) 2999 { 3000 struct ena_adapter *adapter = dev->data->dev_private; 3001 uint64_t id; 3002 uint64_t rx_entries, tx_entries; 3003 unsigned int i; 3004 int qid; 3005 int valid = 0; 3006 bool was_eni_copied = false; 3007 3008 for (i = 0; i < n; ++i) { 3009 id = ids[i]; 3010 /* Check if id belongs to global statistics */ 3011 if (id < ENA_STATS_ARRAY_GLOBAL) { 3012 values[i] = *((uint64_t *)&adapter->dev_stats + id); 3013 ++valid; 3014 continue; 3015 } 3016 3017 /* Check if id belongs to ENI statistics */ 3018 id -= ENA_STATS_ARRAY_GLOBAL; 3019 if (id < ENA_STATS_ARRAY_ENI) { 3020 /* Avoid reading ENI stats multiple times in a single 3021 * function call, as it requires communication with the 3022 * admin queue. 3023 */ 3024 if (!was_eni_copied) { 3025 was_eni_copied = true; 3026 ena_copy_eni_stats(adapter); 3027 } 3028 values[i] = *((uint64_t *)&adapter->eni_stats + id); 3029 ++valid; 3030 continue; 3031 } 3032 3033 /* Check if id belongs to rx queue statistics */ 3034 id -= ENA_STATS_ARRAY_ENI; 3035 rx_entries = ENA_STATS_ARRAY_RX * dev->data->nb_rx_queues; 3036 if (id < rx_entries) { 3037 qid = id % dev->data->nb_rx_queues; 3038 id /= dev->data->nb_rx_queues; 3039 values[i] = *((uint64_t *) 3040 &adapter->rx_ring[qid].rx_stats + id); 3041 ++valid; 3042 continue; 3043 } 3044 /* Check if id belongs to rx queue statistics */ 3045 id -= rx_entries; 3046 tx_entries = ENA_STATS_ARRAY_TX * dev->data->nb_tx_queues; 3047 if (id < tx_entries) { 3048 qid = id % dev->data->nb_tx_queues; 3049 id /= dev->data->nb_tx_queues; 3050 values[i] = *((uint64_t *) 3051 &adapter->tx_ring[qid].tx_stats + id); 3052 ++valid; 3053 continue; 3054 } 3055 } 3056 3057 return valid; 3058 } 3059 3060 static int ena_process_bool_devarg(const char *key, 3061 const char *value, 3062 void *opaque) 3063 { 3064 struct ena_adapter *adapter = opaque; 3065 bool bool_value; 3066 3067 /* Parse the value. */ 3068 if (strcmp(value, "1") == 0) { 3069 bool_value = true; 3070 } else if (strcmp(value, "0") == 0) { 3071 bool_value = false; 3072 } else { 3073 PMD_INIT_LOG(ERR, 3074 "Invalid value: '%s' for key '%s'. Accepted: '0' or '1'\n", 3075 value, key); 3076 return -EINVAL; 3077 } 3078 3079 /* Now, assign it to the proper adapter field. */ 3080 if (strcmp(key, ENA_DEVARG_LARGE_LLQ_HDR) == 0) 3081 adapter->use_large_llq_hdr = bool_value; 3082 3083 return 0; 3084 } 3085 3086 static int ena_parse_devargs(struct ena_adapter *adapter, 3087 struct rte_devargs *devargs) 3088 { 3089 static const char * const allowed_args[] = { 3090 ENA_DEVARG_LARGE_LLQ_HDR, 3091 NULL, 3092 }; 3093 struct rte_kvargs *kvlist; 3094 int rc; 3095 3096 if (devargs == NULL) 3097 return 0; 3098 3099 kvlist = rte_kvargs_parse(devargs->args, allowed_args); 3100 if (kvlist == NULL) { 3101 PMD_INIT_LOG(ERR, "Invalid device arguments: %s\n", 3102 devargs->args); 3103 return -EINVAL; 3104 } 3105 3106 rc = rte_kvargs_process(kvlist, ENA_DEVARG_LARGE_LLQ_HDR, 3107 ena_process_bool_devarg, adapter); 3108 3109 rte_kvargs_free(kvlist); 3110 3111 return rc; 3112 } 3113 3114 static int ena_setup_rx_intr(struct rte_eth_dev *dev) 3115 { 3116 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); 3117 struct rte_intr_handle *intr_handle = pci_dev->intr_handle; 3118 int rc; 3119 uint16_t vectors_nb, i; 3120 bool rx_intr_requested = dev->data->dev_conf.intr_conf.rxq; 3121 3122 if (!rx_intr_requested) 3123 return 0; 3124 3125 if (!rte_intr_cap_multiple(intr_handle)) { 3126 PMD_DRV_LOG(ERR, 3127 "Rx interrupt requested, but it isn't supported by the PCI driver\n"); 3128 return -ENOTSUP; 3129 } 3130 3131 /* Disable interrupt mapping before the configuration starts. */ 3132 rte_intr_disable(intr_handle); 3133 3134 /* Verify if there are enough vectors available. */ 3135 vectors_nb = dev->data->nb_rx_queues; 3136 if (vectors_nb > RTE_MAX_RXTX_INTR_VEC_ID) { 3137 PMD_DRV_LOG(ERR, 3138 "Too many Rx interrupts requested, maximum number: %d\n", 3139 RTE_MAX_RXTX_INTR_VEC_ID); 3140 rc = -ENOTSUP; 3141 goto enable_intr; 3142 } 3143 3144 /* Allocate the vector list */ 3145 if (rte_intr_vec_list_alloc(intr_handle, "intr_vec", 3146 dev->data->nb_rx_queues)) { 3147 PMD_DRV_LOG(ERR, 3148 "Failed to allocate interrupt vector for %d queues\n", 3149 dev->data->nb_rx_queues); 3150 rc = -ENOMEM; 3151 goto enable_intr; 3152 } 3153 3154 rc = rte_intr_efd_enable(intr_handle, vectors_nb); 3155 if (rc != 0) 3156 goto free_intr_vec; 3157 3158 if (!rte_intr_allow_others(intr_handle)) { 3159 PMD_DRV_LOG(ERR, 3160 "Not enough interrupts available to use both ENA Admin and Rx interrupts\n"); 3161 goto disable_intr_efd; 3162 } 3163 3164 for (i = 0; i < vectors_nb; ++i) 3165 if (rte_intr_vec_list_index_set(intr_handle, i, 3166 RTE_INTR_VEC_RXTX_OFFSET + i)) 3167 goto disable_intr_efd; 3168 3169 rte_intr_enable(intr_handle); 3170 return 0; 3171 3172 disable_intr_efd: 3173 rte_intr_efd_disable(intr_handle); 3174 free_intr_vec: 3175 rte_intr_vec_list_free(intr_handle); 3176 enable_intr: 3177 rte_intr_enable(intr_handle); 3178 return rc; 3179 } 3180 3181 static void ena_rx_queue_intr_set(struct rte_eth_dev *dev, 3182 uint16_t queue_id, 3183 bool unmask) 3184 { 3185 struct ena_adapter *adapter = dev->data->dev_private; 3186 struct ena_ring *rxq = &adapter->rx_ring[queue_id]; 3187 struct ena_eth_io_intr_reg intr_reg; 3188 3189 ena_com_update_intr_reg(&intr_reg, 0, 0, unmask); 3190 ena_com_unmask_intr(rxq->ena_com_io_cq, &intr_reg); 3191 } 3192 3193 static int ena_rx_queue_intr_enable(struct rte_eth_dev *dev, 3194 uint16_t queue_id) 3195 { 3196 ena_rx_queue_intr_set(dev, queue_id, true); 3197 3198 return 0; 3199 } 3200 3201 static int ena_rx_queue_intr_disable(struct rte_eth_dev *dev, 3202 uint16_t queue_id) 3203 { 3204 ena_rx_queue_intr_set(dev, queue_id, false); 3205 3206 return 0; 3207 } 3208 3209 /********************************************************************* 3210 * PMD configuration 3211 *********************************************************************/ 3212 static int eth_ena_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 3213 struct rte_pci_device *pci_dev) 3214 { 3215 return rte_eth_dev_pci_generic_probe(pci_dev, 3216 sizeof(struct ena_adapter), eth_ena_dev_init); 3217 } 3218 3219 static int eth_ena_pci_remove(struct rte_pci_device *pci_dev) 3220 { 3221 return rte_eth_dev_pci_generic_remove(pci_dev, eth_ena_dev_uninit); 3222 } 3223 3224 static struct rte_pci_driver rte_ena_pmd = { 3225 .id_table = pci_id_ena_map, 3226 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | 3227 RTE_PCI_DRV_WC_ACTIVATE, 3228 .probe = eth_ena_pci_probe, 3229 .remove = eth_ena_pci_remove, 3230 }; 3231 3232 RTE_PMD_REGISTER_PCI(net_ena, rte_ena_pmd); 3233 RTE_PMD_REGISTER_PCI_TABLE(net_ena, pci_id_ena_map); 3234 RTE_PMD_REGISTER_KMOD_DEP(net_ena, "* igb_uio | uio_pci_generic | vfio-pci"); 3235 RTE_PMD_REGISTER_PARAM_STRING(net_ena, ENA_DEVARG_LARGE_LLQ_HDR "=<0|1>"); 3236 RTE_LOG_REGISTER_SUFFIX(ena_logtype_init, init, NOTICE); 3237 RTE_LOG_REGISTER_SUFFIX(ena_logtype_driver, driver, NOTICE); 3238 #ifdef RTE_ETHDEV_DEBUG_RX 3239 RTE_LOG_REGISTER_SUFFIX(ena_logtype_rx, rx, DEBUG); 3240 #endif 3241 #ifdef RTE_ETHDEV_DEBUG_TX 3242 RTE_LOG_REGISTER_SUFFIX(ena_logtype_tx, tx, DEBUG); 3243 #endif 3244 RTE_LOG_REGISTER_SUFFIX(ena_logtype_com, com, WARNING); 3245 3246 /****************************************************************************** 3247 ******************************** AENQ Handlers ******************************* 3248 *****************************************************************************/ 3249 static void ena_update_on_link_change(void *adapter_data, 3250 struct ena_admin_aenq_entry *aenq_e) 3251 { 3252 struct rte_eth_dev *eth_dev = adapter_data; 3253 struct ena_adapter *adapter = eth_dev->data->dev_private; 3254 struct ena_admin_aenq_link_change_desc *aenq_link_desc; 3255 uint32_t status; 3256 3257 aenq_link_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 3258 3259 status = get_ena_admin_aenq_link_change_desc_link_status(aenq_link_desc); 3260 adapter->link_status = status; 3261 3262 ena_link_update(eth_dev, 0); 3263 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); 3264 } 3265 3266 static void ena_notification(void *adapter_data, 3267 struct ena_admin_aenq_entry *aenq_e) 3268 { 3269 struct rte_eth_dev *eth_dev = adapter_data; 3270 struct ena_adapter *adapter = eth_dev->data->dev_private; 3271 struct ena_admin_ena_hw_hints *hints; 3272 3273 if (aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION) 3274 PMD_DRV_LOG(WARNING, "Invalid AENQ group: %x. Expected: %x\n", 3275 aenq_e->aenq_common_desc.group, 3276 ENA_ADMIN_NOTIFICATION); 3277 3278 switch (aenq_e->aenq_common_desc.syndrome) { 3279 case ENA_ADMIN_UPDATE_HINTS: 3280 hints = (struct ena_admin_ena_hw_hints *) 3281 (&aenq_e->inline_data_w4); 3282 ena_update_hints(adapter, hints); 3283 break; 3284 default: 3285 PMD_DRV_LOG(ERR, "Invalid AENQ notification link state: %d\n", 3286 aenq_e->aenq_common_desc.syndrome); 3287 } 3288 } 3289 3290 static void ena_keep_alive(void *adapter_data, 3291 __rte_unused struct ena_admin_aenq_entry *aenq_e) 3292 { 3293 struct rte_eth_dev *eth_dev = adapter_data; 3294 struct ena_adapter *adapter = eth_dev->data->dev_private; 3295 struct ena_admin_aenq_keep_alive_desc *desc; 3296 uint64_t rx_drops; 3297 uint64_t tx_drops; 3298 3299 adapter->timestamp_wd = rte_get_timer_cycles(); 3300 3301 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 3302 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 3303 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 3304 3305 adapter->drv_stats->rx_drops = rx_drops; 3306 adapter->dev_stats.tx_drops = tx_drops; 3307 } 3308 3309 /** 3310 * This handler will called for unknown event group or unimplemented handlers 3311 **/ 3312 static void unimplemented_aenq_handler(__rte_unused void *data, 3313 __rte_unused struct ena_admin_aenq_entry *aenq_e) 3314 { 3315 PMD_DRV_LOG(ERR, 3316 "Unknown event was received or event with unimplemented handler\n"); 3317 } 3318 3319 static struct ena_aenq_handlers aenq_handlers = { 3320 .handlers = { 3321 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 3322 [ENA_ADMIN_NOTIFICATION] = ena_notification, 3323 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive 3324 }, 3325 .unimplemented_handler = unimplemented_aenq_handler 3326 }; 3327