1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <unistd.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <errno.h> 12 #include <fcntl.h> 13 14 #include <rte_malloc.h> 15 #include <ethdev_driver.h> 16 #include <rte_pci.h> 17 #include <bus_pci_driver.h> 18 #include <rte_common.h> 19 #include <rte_kvargs.h> 20 #include <rte_rwlock.h> 21 #include <rte_spinlock.h> 22 #include <rte_string_fns.h> 23 #include <rte_eal_paging.h> 24 #include <rte_alarm.h> 25 #include <rte_cycles.h> 26 #include <rte_interrupts.h> 27 28 #include <mlx5_glue.h> 29 #include <mlx5_devx_cmds.h> 30 #include <mlx5_common.h> 31 #include <mlx5_common_os.h> 32 #include <mlx5_common_mp.h> 33 #include <mlx5_malloc.h> 34 35 #include "mlx5_defs.h" 36 #include "mlx5.h" 37 #include "mlx5_utils.h" 38 #include "mlx5_rxtx.h" 39 #include "mlx5_rx.h" 40 #include "mlx5_tx.h" 41 #include "mlx5_autoconf.h" 42 #include "mlx5_flow.h" 43 #include "mlx5_flow_os.h" 44 #include "rte_pmd_mlx5.h" 45 46 #define MLX5_ETH_DRIVER_NAME mlx5_eth 47 48 /* Device parameter to enable RX completion queue compression. */ 49 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en" 50 51 /* Device parameter to enable padding Rx packet to cacheline size. */ 52 #define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en" 53 54 /* Device parameter to enable Multi-Packet Rx queue. */ 55 #define MLX5_RX_MPRQ_EN "mprq_en" 56 57 /* Device parameter to configure log 2 of the number of strides for MPRQ. */ 58 #define MLX5_RX_MPRQ_LOG_STRIDE_NUM "mprq_log_stride_num" 59 60 /* Device parameter to configure log 2 of the stride size for MPRQ. */ 61 #define MLX5_RX_MPRQ_LOG_STRIDE_SIZE "mprq_log_stride_size" 62 63 /* Device parameter to limit the size of memcpy'd packet for MPRQ. */ 64 #define MLX5_RX_MPRQ_MAX_MEMCPY_LEN "mprq_max_memcpy_len" 65 66 /* Device parameter to set the minimum number of Rx queues to enable MPRQ. */ 67 #define MLX5_RXQS_MIN_MPRQ "rxqs_min_mprq" 68 69 /* Device parameter to configure inline send. Deprecated, ignored.*/ 70 #define MLX5_TXQ_INLINE "txq_inline" 71 72 /* Device parameter to limit packet size to inline with ordinary SEND. */ 73 #define MLX5_TXQ_INLINE_MAX "txq_inline_max" 74 75 /* Device parameter to configure minimal data size to inline. */ 76 #define MLX5_TXQ_INLINE_MIN "txq_inline_min" 77 78 /* Device parameter to limit packet size to inline with Enhanced MPW. */ 79 #define MLX5_TXQ_INLINE_MPW "txq_inline_mpw" 80 81 /* 82 * Device parameter to configure the number of TX queues threshold for 83 * enabling inline send. 84 */ 85 #define MLX5_TXQS_MIN_INLINE "txqs_min_inline" 86 87 /* 88 * Device parameter to configure the number of TX queues threshold for 89 * enabling vectorized Tx, deprecated, ignored (no vectorized Tx routines). 90 */ 91 #define MLX5_TXQS_MAX_VEC "txqs_max_vec" 92 93 /* Device parameter to enable multi-packet send WQEs. */ 94 #define MLX5_TXQ_MPW_EN "txq_mpw_en" 95 96 /* 97 * Device parameter to include 2 dsegs in the title WQEBB. 98 * Deprecated, ignored. 99 */ 100 #define MLX5_TXQ_MPW_HDR_DSEG_EN "txq_mpw_hdr_dseg_en" 101 102 /* 103 * Device parameter to limit the size of inlining packet. 104 * Deprecated, ignored. 105 */ 106 #define MLX5_TXQ_MAX_INLINE_LEN "txq_max_inline_len" 107 108 /* 109 * Device parameter to enable Tx scheduling on timestamps 110 * and specify the packet pacing granularity in nanoseconds. 111 */ 112 #define MLX5_TX_PP "tx_pp" 113 114 /* 115 * Device parameter to specify skew in nanoseconds on Tx datapath, 116 * it represents the time between SQ start WQE processing and 117 * appearing actual packet data on the wire. 118 */ 119 #define MLX5_TX_SKEW "tx_skew" 120 121 /* 122 * Device parameter to enable hardware Tx vector. 123 * Deprecated, ignored (no vectorized Tx routines anymore). 124 */ 125 #define MLX5_TX_VEC_EN "tx_vec_en" 126 127 /* Device parameter to enable hardware Rx vector. */ 128 #define MLX5_RX_VEC_EN "rx_vec_en" 129 130 /* Allow L3 VXLAN flow creation. */ 131 #define MLX5_L3_VXLAN_EN "l3_vxlan_en" 132 133 /* Activate DV E-Switch flow steering. */ 134 #define MLX5_DV_ESW_EN "dv_esw_en" 135 136 /* Activate DV flow steering. */ 137 #define MLX5_DV_FLOW_EN "dv_flow_en" 138 139 /* Enable extensive flow metadata support. */ 140 #define MLX5_DV_XMETA_EN "dv_xmeta_en" 141 142 /* Device parameter to let the user manage the lacp traffic of bonded device */ 143 #define MLX5_LACP_BY_USER "lacp_by_user" 144 145 /* Activate Netlink support in VF mode. */ 146 #define MLX5_VF_NL_EN "vf_nl_en" 147 148 /* Select port representors to instantiate. */ 149 #define MLX5_REPRESENTOR "representor" 150 151 /* Device parameter to configure the maximum number of dump files per queue. */ 152 #define MLX5_MAX_DUMP_FILES_NUM "max_dump_files_num" 153 154 /* Configure timeout of LRO session (in microseconds). */ 155 #define MLX5_LRO_TIMEOUT_USEC "lro_timeout_usec" 156 157 /* 158 * Device parameter to configure the total data buffer size for a single 159 * hairpin queue (logarithm value). 160 */ 161 #define MLX5_HP_BUF_SIZE "hp_buf_log_sz" 162 163 /* Flow memory reclaim mode. */ 164 #define MLX5_RECLAIM_MEM "reclaim_mem_mode" 165 166 /* Decap will be used or not. */ 167 #define MLX5_DECAP_EN "decap_en" 168 169 /* Device parameter to configure allow or prevent duplicate rules pattern. */ 170 #define MLX5_ALLOW_DUPLICATE_PATTERN "allow_duplicate_pattern" 171 172 /* Device parameter to configure the delay drop when creating Rxqs. */ 173 #define MLX5_DELAY_DROP "delay_drop" 174 175 /* Device parameter to create the fdb default rule in PMD */ 176 #define MLX5_FDB_DEFAULT_RULE_EN "fdb_def_rule_en" 177 178 /* HW steering counter configuration. */ 179 #define MLX5_HWS_CNT_SERVICE_CORE "service_core" 180 181 /* HW steering counter's query interval. */ 182 #define MLX5_HWS_CNT_CYCLE_TIME "svc_cycle_time" 183 184 /* Device parameter to control representor matching in ingress/egress flows with HWS. */ 185 #define MLX5_REPR_MATCHING_EN "repr_matching_en" 186 187 /* Shared memory between primary and secondary processes. */ 188 struct mlx5_shared_data *mlx5_shared_data; 189 190 /** Driver-specific log messages type. */ 191 int mlx5_logtype; 192 193 static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = 194 LIST_HEAD_INITIALIZER(); 195 static pthread_mutex_t mlx5_dev_ctx_list_mutex; 196 static const struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = { 197 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H) 198 [MLX5_IPOOL_DECAP_ENCAP] = { 199 .size = sizeof(struct mlx5_flow_dv_encap_decap_resource), 200 .trunk_size = 64, 201 .grow_trunk = 3, 202 .grow_shift = 2, 203 .need_lock = 1, 204 .release_mem_en = 1, 205 .malloc = mlx5_malloc, 206 .free = mlx5_free, 207 .type = "mlx5_encap_decap_ipool", 208 }, 209 [MLX5_IPOOL_PUSH_VLAN] = { 210 .size = sizeof(struct mlx5_flow_dv_push_vlan_action_resource), 211 .trunk_size = 64, 212 .grow_trunk = 3, 213 .grow_shift = 2, 214 .need_lock = 1, 215 .release_mem_en = 1, 216 .malloc = mlx5_malloc, 217 .free = mlx5_free, 218 .type = "mlx5_push_vlan_ipool", 219 }, 220 [MLX5_IPOOL_TAG] = { 221 .size = sizeof(struct mlx5_flow_dv_tag_resource), 222 .trunk_size = 64, 223 .grow_trunk = 3, 224 .grow_shift = 2, 225 .need_lock = 1, 226 .release_mem_en = 0, 227 .per_core_cache = (1 << 16), 228 .malloc = mlx5_malloc, 229 .free = mlx5_free, 230 .type = "mlx5_tag_ipool", 231 }, 232 [MLX5_IPOOL_PORT_ID] = { 233 .size = sizeof(struct mlx5_flow_dv_port_id_action_resource), 234 .trunk_size = 64, 235 .grow_trunk = 3, 236 .grow_shift = 2, 237 .need_lock = 1, 238 .release_mem_en = 1, 239 .malloc = mlx5_malloc, 240 .free = mlx5_free, 241 .type = "mlx5_port_id_ipool", 242 }, 243 [MLX5_IPOOL_JUMP] = { 244 .size = sizeof(struct mlx5_flow_tbl_data_entry), 245 .trunk_size = 64, 246 .grow_trunk = 3, 247 .grow_shift = 2, 248 .need_lock = 1, 249 .release_mem_en = 1, 250 .malloc = mlx5_malloc, 251 .free = mlx5_free, 252 .type = "mlx5_jump_ipool", 253 }, 254 [MLX5_IPOOL_SAMPLE] = { 255 .size = sizeof(struct mlx5_flow_dv_sample_resource), 256 .trunk_size = 64, 257 .grow_trunk = 3, 258 .grow_shift = 2, 259 .need_lock = 1, 260 .release_mem_en = 1, 261 .malloc = mlx5_malloc, 262 .free = mlx5_free, 263 .type = "mlx5_sample_ipool", 264 }, 265 [MLX5_IPOOL_DEST_ARRAY] = { 266 .size = sizeof(struct mlx5_flow_dv_dest_array_resource), 267 .trunk_size = 64, 268 .grow_trunk = 3, 269 .grow_shift = 2, 270 .need_lock = 1, 271 .release_mem_en = 1, 272 .malloc = mlx5_malloc, 273 .free = mlx5_free, 274 .type = "mlx5_dest_array_ipool", 275 }, 276 [MLX5_IPOOL_TUNNEL_ID] = { 277 .size = sizeof(struct mlx5_flow_tunnel), 278 .trunk_size = MLX5_MAX_TUNNELS, 279 .need_lock = 1, 280 .release_mem_en = 1, 281 .type = "mlx5_tunnel_offload", 282 }, 283 [MLX5_IPOOL_TNL_TBL_ID] = { 284 .size = 0, 285 .need_lock = 1, 286 .type = "mlx5_flow_tnl_tbl_ipool", 287 }, 288 #endif 289 [MLX5_IPOOL_MTR] = { 290 /** 291 * The ipool index should grow continually from small to big, 292 * for meter idx, so not set grow_trunk to avoid meter index 293 * not jump continually. 294 */ 295 .size = sizeof(struct mlx5_legacy_flow_meter), 296 .trunk_size = 64, 297 .need_lock = 1, 298 .release_mem_en = 1, 299 .malloc = mlx5_malloc, 300 .free = mlx5_free, 301 .type = "mlx5_meter_ipool", 302 }, 303 [MLX5_IPOOL_MCP] = { 304 .size = sizeof(struct mlx5_flow_mreg_copy_resource), 305 .trunk_size = 64, 306 .grow_trunk = 3, 307 .grow_shift = 2, 308 .need_lock = 1, 309 .release_mem_en = 1, 310 .malloc = mlx5_malloc, 311 .free = mlx5_free, 312 .type = "mlx5_mcp_ipool", 313 }, 314 [MLX5_IPOOL_HRXQ] = { 315 .size = (sizeof(struct mlx5_hrxq) + MLX5_RSS_HASH_KEY_LEN), 316 .trunk_size = 64, 317 .grow_trunk = 3, 318 .grow_shift = 2, 319 .need_lock = 1, 320 .release_mem_en = 1, 321 .malloc = mlx5_malloc, 322 .free = mlx5_free, 323 .type = "mlx5_hrxq_ipool", 324 }, 325 [MLX5_IPOOL_MLX5_FLOW] = { 326 /* 327 * MLX5_IPOOL_MLX5_FLOW size varies for DV and VERBS flows. 328 * It set in run time according to PCI function configuration. 329 */ 330 .size = 0, 331 .trunk_size = 64, 332 .grow_trunk = 3, 333 .grow_shift = 2, 334 .need_lock = 1, 335 .release_mem_en = 0, 336 .per_core_cache = 1 << 19, 337 .malloc = mlx5_malloc, 338 .free = mlx5_free, 339 .type = "mlx5_flow_handle_ipool", 340 }, 341 [MLX5_IPOOL_RTE_FLOW] = { 342 .size = sizeof(struct rte_flow), 343 .trunk_size = 4096, 344 .need_lock = 1, 345 .release_mem_en = 1, 346 .malloc = mlx5_malloc, 347 .free = mlx5_free, 348 .type = "rte_flow_ipool", 349 }, 350 [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID] = { 351 .size = 0, 352 .need_lock = 1, 353 .type = "mlx5_flow_rss_id_ipool", 354 }, 355 [MLX5_IPOOL_RSS_SHARED_ACTIONS] = { 356 .size = sizeof(struct mlx5_shared_action_rss), 357 .trunk_size = 64, 358 .grow_trunk = 3, 359 .grow_shift = 2, 360 .need_lock = 1, 361 .release_mem_en = 1, 362 .malloc = mlx5_malloc, 363 .free = mlx5_free, 364 .type = "mlx5_shared_action_rss", 365 }, 366 [MLX5_IPOOL_MTR_POLICY] = { 367 /** 368 * The ipool index should grow continually from small to big, 369 * for policy idx, so not set grow_trunk to avoid policy index 370 * not jump continually. 371 */ 372 .size = sizeof(struct mlx5_flow_meter_sub_policy), 373 .trunk_size = 64, 374 .need_lock = 1, 375 .release_mem_en = 1, 376 .malloc = mlx5_malloc, 377 .free = mlx5_free, 378 .type = "mlx5_meter_policy_ipool", 379 }, 380 }; 381 382 #define MLX5_FLOW_MIN_ID_POOL_SIZE 512 383 #define MLX5_ID_GENERATION_ARRAY_FACTOR 16 384 385 #define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 1024 386 387 /** 388 * Decide whether representor ID is a HPF(host PF) port on BF2. 389 * 390 * @param dev 391 * Pointer to Ethernet device structure. 392 * 393 * @return 394 * Non-zero if HPF, otherwise 0. 395 */ 396 bool 397 mlx5_is_hpf(struct rte_eth_dev *dev) 398 { 399 struct mlx5_priv *priv = dev->data->dev_private; 400 uint16_t repr = MLX5_REPRESENTOR_REPR(priv->representor_id); 401 int type = MLX5_REPRESENTOR_TYPE(priv->representor_id); 402 403 return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_VF && 404 MLX5_REPRESENTOR_REPR(-1) == repr; 405 } 406 407 /** 408 * Decide whether representor ID is a SF port representor. 409 * 410 * @param dev 411 * Pointer to Ethernet device structure. 412 * 413 * @return 414 * Non-zero if HPF, otherwise 0. 415 */ 416 bool 417 mlx5_is_sf_repr(struct rte_eth_dev *dev) 418 { 419 struct mlx5_priv *priv = dev->data->dev_private; 420 int type = MLX5_REPRESENTOR_TYPE(priv->representor_id); 421 422 return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_SF; 423 } 424 425 /** 426 * Initialize the ASO aging management structure. 427 * 428 * @param[in] sh 429 * Pointer to mlx5_dev_ctx_shared object to free 430 * 431 * @return 432 * 0 on success, a negative errno value otherwise and rte_errno is set. 433 */ 434 int 435 mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh) 436 { 437 int err; 438 439 if (sh->aso_age_mng) 440 return 0; 441 sh->aso_age_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->aso_age_mng), 442 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 443 if (!sh->aso_age_mng) { 444 DRV_LOG(ERR, "aso_age_mng allocation was failed."); 445 rte_errno = ENOMEM; 446 return -ENOMEM; 447 } 448 err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_FLOW_HIT, 1); 449 if (err) { 450 mlx5_free(sh->aso_age_mng); 451 return -1; 452 } 453 rte_rwlock_init(&sh->aso_age_mng->resize_rwl); 454 rte_spinlock_init(&sh->aso_age_mng->free_sl); 455 LIST_INIT(&sh->aso_age_mng->free); 456 return 0; 457 } 458 459 /** 460 * Close and release all the resources of the ASO aging management structure. 461 * 462 * @param[in] sh 463 * Pointer to mlx5_dev_ctx_shared object to free. 464 */ 465 static void 466 mlx5_flow_aso_age_mng_close(struct mlx5_dev_ctx_shared *sh) 467 { 468 int i, j; 469 470 mlx5_aso_flow_hit_queue_poll_stop(sh); 471 mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_FLOW_HIT); 472 if (sh->aso_age_mng->pools) { 473 struct mlx5_aso_age_pool *pool; 474 475 for (i = 0; i < sh->aso_age_mng->next; ++i) { 476 pool = sh->aso_age_mng->pools[i]; 477 claim_zero(mlx5_devx_cmd_destroy 478 (pool->flow_hit_aso_obj)); 479 for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) 480 if (pool->actions[j].dr_action) 481 claim_zero 482 (mlx5_flow_os_destroy_flow_action 483 (pool->actions[j].dr_action)); 484 mlx5_free(pool); 485 } 486 mlx5_free(sh->aso_age_mng->pools); 487 } 488 mlx5_free(sh->aso_age_mng); 489 } 490 491 /** 492 * Initialize the shared aging list information per port. 493 * 494 * @param[in] sh 495 * Pointer to mlx5_dev_ctx_shared object. 496 */ 497 static void 498 mlx5_flow_aging_init(struct mlx5_dev_ctx_shared *sh) 499 { 500 uint32_t i; 501 struct mlx5_age_info *age_info; 502 503 /* 504 * In HW steering, aging information structure is initialized later 505 * during configure function. 506 */ 507 if (sh->config.dv_flow_en == 2) 508 return; 509 for (i = 0; i < sh->max_port; i++) { 510 age_info = &sh->port[i].age_info; 511 age_info->flags = 0; 512 TAILQ_INIT(&age_info->aged_counters); 513 LIST_INIT(&age_info->aged_aso); 514 rte_spinlock_init(&age_info->aged_sl); 515 MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER); 516 } 517 } 518 519 /** 520 * DV flow counter mode detect and config. 521 * 522 * @param dev 523 * Pointer to rte_eth_dev structure. 524 * 525 */ 526 void 527 mlx5_flow_counter_mode_config(struct rte_eth_dev *dev __rte_unused) 528 { 529 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 530 struct mlx5_priv *priv = dev->data->dev_private; 531 struct mlx5_dev_ctx_shared *sh = priv->sh; 532 struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr; 533 bool fallback; 534 535 #ifndef HAVE_IBV_DEVX_ASYNC 536 fallback = true; 537 #else 538 fallback = false; 539 if (!sh->cdev->config.devx || !sh->config.dv_flow_en || 540 !hca_attr->flow_counters_dump || 541 !(hca_attr->flow_counter_bulk_alloc_bitmap & 0x4) || 542 (mlx5_flow_dv_discover_counter_offset_support(dev) == -ENOTSUP)) 543 fallback = true; 544 #endif 545 if (fallback) 546 DRV_LOG(INFO, "Use fall-back DV counter management. Flow " 547 "counter dump:%d, bulk_alloc_bitmap:0x%hhx.", 548 hca_attr->flow_counters_dump, 549 hca_attr->flow_counter_bulk_alloc_bitmap); 550 /* Initialize fallback mode only on the port initializes sh. */ 551 if (sh->refcnt == 1) 552 sh->sws_cmng.counter_fallback = fallback; 553 else if (fallback != sh->sws_cmng.counter_fallback) 554 DRV_LOG(WARNING, "Port %d in sh has different fallback mode " 555 "with others:%d.", PORT_ID(priv), fallback); 556 #endif 557 } 558 559 /** 560 * Initialize the counters management structure. 561 * 562 * @param[in] sh 563 * Pointer to mlx5_dev_ctx_shared object to free 564 * 565 * @return 566 * 0 on success, otherwise negative errno value and rte_errno is set. 567 */ 568 static int 569 mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh) 570 { 571 int i, j; 572 573 if (sh->config.dv_flow_en < 2) { 574 void *pools; 575 576 pools = mlx5_malloc(MLX5_MEM_ZERO, 577 sizeof(struct mlx5_flow_counter_pool *) * 578 MLX5_COUNTER_POOLS_MAX_NUM, 579 0, SOCKET_ID_ANY); 580 if (!pools) { 581 DRV_LOG(ERR, 582 "Counter management allocation was failed."); 583 rte_errno = ENOMEM; 584 return -rte_errno; 585 } 586 memset(&sh->sws_cmng, 0, sizeof(sh->sws_cmng)); 587 TAILQ_INIT(&sh->sws_cmng.flow_counters); 588 sh->sws_cmng.min_id = MLX5_CNT_BATCH_OFFSET; 589 sh->sws_cmng.max_id = -1; 590 sh->sws_cmng.last_pool_idx = POOL_IDX_INVALID; 591 sh->sws_cmng.pools = pools; 592 rte_spinlock_init(&sh->sws_cmng.pool_update_sl); 593 for (i = 0; i < MLX5_COUNTER_TYPE_MAX; i++) { 594 TAILQ_INIT(&sh->sws_cmng.counters[i]); 595 rte_spinlock_init(&sh->sws_cmng.csl[i]); 596 } 597 } else { 598 struct mlx5_hca_attr *attr = &sh->cdev->config.hca_attr; 599 uint32_t fw_max_nb_cnts = attr->max_flow_counter; 600 uint8_t log_dcs = log2above(fw_max_nb_cnts) - 1; 601 uint32_t max_nb_cnts = 0; 602 603 for (i = 0, j = 0; j < MLX5_HWS_CNT_DCS_NUM; ++i) { 604 int log_dcs_i = log_dcs - i; 605 606 if (log_dcs_i < 0) 607 break; 608 if ((max_nb_cnts | RTE_BIT32(log_dcs_i)) > 609 fw_max_nb_cnts) 610 continue; 611 max_nb_cnts |= RTE_BIT32(log_dcs_i); 612 j++; 613 } 614 sh->hws_max_log_bulk_sz = log_dcs; 615 sh->hws_max_nb_counters = max_nb_cnts; 616 } 617 return 0; 618 } 619 620 /** 621 * Destroy all the resources allocated for a counter memory management. 622 * 623 * @param[in] mng 624 * Pointer to the memory management structure. 625 */ 626 static void 627 mlx5_flow_destroy_counter_stat_mem_mng(struct mlx5_counter_stats_mem_mng *mng) 628 { 629 uint8_t *mem = (uint8_t *)(uintptr_t)mng->raws[0].data; 630 631 LIST_REMOVE(mng, next); 632 mlx5_os_wrapped_mkey_destroy(&mng->wm); 633 mlx5_free(mem); 634 } 635 636 /** 637 * Close and release all the resources of the counters management. 638 * 639 * @param[in] sh 640 * Pointer to mlx5_dev_ctx_shared object to free. 641 */ 642 static void 643 mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh) 644 { 645 struct mlx5_counter_stats_mem_mng *mng; 646 int i, j; 647 int retries = 1024; 648 649 rte_errno = 0; 650 while (--retries) { 651 rte_eal_alarm_cancel(mlx5_flow_query_alarm, sh); 652 if (rte_errno != EINPROGRESS) 653 break; 654 rte_pause(); 655 } 656 657 if (sh->sws_cmng.pools) { 658 struct mlx5_flow_counter_pool *pool; 659 uint16_t n_valid = sh->sws_cmng.n_valid; 660 bool fallback = sh->sws_cmng.counter_fallback; 661 662 for (i = 0; i < n_valid; ++i) { 663 pool = sh->sws_cmng.pools[i]; 664 if (!fallback && pool->min_dcs) 665 claim_zero(mlx5_devx_cmd_destroy 666 (pool->min_dcs)); 667 for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) { 668 struct mlx5_flow_counter *cnt = 669 MLX5_POOL_GET_CNT(pool, j); 670 671 if (cnt->action) 672 claim_zero 673 (mlx5_flow_os_destroy_flow_action 674 (cnt->action)); 675 if (fallback && cnt->dcs_when_free) 676 claim_zero(mlx5_devx_cmd_destroy 677 (cnt->dcs_when_free)); 678 } 679 mlx5_free(pool); 680 } 681 mlx5_free(sh->sws_cmng.pools); 682 } 683 mng = LIST_FIRST(&sh->sws_cmng.mem_mngs); 684 while (mng) { 685 mlx5_flow_destroy_counter_stat_mem_mng(mng); 686 mng = LIST_FIRST(&sh->sws_cmng.mem_mngs); 687 } 688 memset(&sh->sws_cmng, 0, sizeof(sh->sws_cmng)); 689 } 690 691 /** 692 * Initialize the aso flow meters management structure. 693 * 694 * @param[in] sh 695 * Pointer to mlx5_dev_ctx_shared object to free 696 */ 697 int 698 mlx5_aso_flow_mtrs_mng_init(struct mlx5_dev_ctx_shared *sh) 699 { 700 if (!sh->mtrmng) { 701 sh->mtrmng = mlx5_malloc(MLX5_MEM_ZERO, 702 sizeof(*sh->mtrmng), 703 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 704 if (!sh->mtrmng) { 705 DRV_LOG(ERR, 706 "meter management allocation was failed."); 707 rte_errno = ENOMEM; 708 return -ENOMEM; 709 } 710 if (sh->meter_aso_en) { 711 rte_spinlock_init(&sh->mtrmng->pools_mng.mtrsl); 712 rte_rwlock_init(&sh->mtrmng->pools_mng.resize_mtrwl); 713 LIST_INIT(&sh->mtrmng->pools_mng.meters); 714 } 715 sh->mtrmng->def_policy_id = MLX5_INVALID_POLICY_ID; 716 } 717 return 0; 718 } 719 720 /** 721 * Close and release all the resources of 722 * the ASO flow meter management structure. 723 * 724 * @param[in] sh 725 * Pointer to mlx5_dev_ctx_shared object to free. 726 */ 727 static void 728 mlx5_aso_flow_mtrs_mng_close(struct mlx5_dev_ctx_shared *sh) 729 { 730 struct mlx5_aso_mtr_pool *mtr_pool; 731 struct mlx5_flow_mtr_mng *mtrmng = sh->mtrmng; 732 uint32_t idx; 733 #ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO 734 struct mlx5_aso_mtr *aso_mtr; 735 int i; 736 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */ 737 738 if (sh->meter_aso_en) { 739 mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_POLICER); 740 idx = mtrmng->pools_mng.n_valid; 741 while (idx--) { 742 mtr_pool = mtrmng->pools_mng.pools[idx]; 743 #ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO 744 for (i = 0; i < MLX5_ASO_MTRS_PER_POOL; i++) { 745 aso_mtr = &mtr_pool->mtrs[i]; 746 if (aso_mtr->fm.meter_action_g) 747 claim_zero 748 (mlx5_glue->destroy_flow_action 749 (aso_mtr->fm.meter_action_g)); 750 if (aso_mtr->fm.meter_action_y) 751 claim_zero 752 (mlx5_glue->destroy_flow_action 753 (aso_mtr->fm.meter_action_y)); 754 } 755 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */ 756 claim_zero(mlx5_devx_cmd_destroy 757 (mtr_pool->devx_obj)); 758 mtrmng->pools_mng.n_valid--; 759 mlx5_free(mtr_pool); 760 } 761 mlx5_free(sh->mtrmng->pools_mng.pools); 762 } 763 mlx5_free(sh->mtrmng); 764 sh->mtrmng = NULL; 765 } 766 767 /* Send FLOW_AGED event if needed. */ 768 void 769 mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh) 770 { 771 struct mlx5_age_info *age_info; 772 uint32_t i; 773 774 for (i = 0; i < sh->max_port; i++) { 775 age_info = &sh->port[i].age_info; 776 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) 777 continue; 778 MLX5_AGE_UNSET(age_info, MLX5_AGE_EVENT_NEW); 779 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) { 780 MLX5_AGE_UNSET(age_info, MLX5_AGE_TRIGGER); 781 rte_eth_dev_callback_process 782 (&rte_eth_devices[sh->port[i].devx_ih_port_id], 783 RTE_ETH_EVENT_FLOW_AGED, NULL); 784 } 785 } 786 } 787 788 /* 789 * Initialize the ASO connection tracking structure. 790 * 791 * @param[in] sh 792 * Pointer to mlx5_dev_ctx_shared object. 793 * 794 * @return 795 * 0 on success, a negative errno value otherwise and rte_errno is set. 796 */ 797 int 798 mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh) 799 { 800 int err; 801 802 if (sh->ct_mng) 803 return 0; 804 sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng) + 805 sizeof(struct mlx5_aso_sq) * MLX5_ASO_CT_SQ_NUM, 806 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 807 if (!sh->ct_mng) { 808 DRV_LOG(ERR, "ASO CT management allocation failed."); 809 rte_errno = ENOMEM; 810 return -rte_errno; 811 } 812 err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_CONNECTION_TRACKING, MLX5_ASO_CT_SQ_NUM); 813 if (err) { 814 mlx5_free(sh->ct_mng); 815 /* rte_errno should be extracted from the failure. */ 816 rte_errno = EINVAL; 817 return -rte_errno; 818 } 819 rte_spinlock_init(&sh->ct_mng->ct_sl); 820 rte_rwlock_init(&sh->ct_mng->resize_rwl); 821 LIST_INIT(&sh->ct_mng->free_cts); 822 return 0; 823 } 824 825 /* 826 * Close and release all the resources of the 827 * ASO connection tracking management structure. 828 * 829 * @param[in] sh 830 * Pointer to mlx5_dev_ctx_shared object to free. 831 */ 832 static void 833 mlx5_flow_aso_ct_mng_close(struct mlx5_dev_ctx_shared *sh) 834 { 835 struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng; 836 struct mlx5_aso_ct_pool *ct_pool; 837 struct mlx5_aso_ct_action *ct; 838 uint32_t idx; 839 uint32_t val; 840 uint32_t cnt; 841 int i; 842 843 mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_CONNECTION_TRACKING); 844 idx = mng->next; 845 while (idx--) { 846 cnt = 0; 847 ct_pool = mng->pools[idx]; 848 for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) { 849 ct = &ct_pool->actions[i]; 850 val = __atomic_fetch_sub(&ct->refcnt, 1, 851 __ATOMIC_RELAXED); 852 MLX5_ASSERT(val == 1); 853 if (val > 1) 854 cnt++; 855 #ifdef HAVE_MLX5_DR_ACTION_ASO_CT 856 if (ct->dr_action_orig) 857 claim_zero(mlx5_glue->destroy_flow_action 858 (ct->dr_action_orig)); 859 if (ct->dr_action_rply) 860 claim_zero(mlx5_glue->destroy_flow_action 861 (ct->dr_action_rply)); 862 #endif 863 } 864 claim_zero(mlx5_devx_cmd_destroy(ct_pool->devx_obj)); 865 if (cnt) { 866 DRV_LOG(DEBUG, "%u ASO CT objects are being used in the pool %u", 867 cnt, i); 868 } 869 mlx5_free(ct_pool); 870 /* in case of failure. */ 871 mng->next--; 872 } 873 mlx5_free(mng->pools); 874 mlx5_free(mng); 875 /* Management structure must be cleared to 0s during allocation. */ 876 sh->ct_mng = NULL; 877 } 878 879 /** 880 * Initialize the flow resources' indexed mempool. 881 * 882 * @param[in] sh 883 * Pointer to mlx5_dev_ctx_shared object. 884 */ 885 static void 886 mlx5_flow_ipool_create(struct mlx5_dev_ctx_shared *sh) 887 { 888 uint8_t i; 889 struct mlx5_indexed_pool_config cfg; 890 891 for (i = 0; i < MLX5_IPOOL_MAX; ++i) { 892 cfg = mlx5_ipool_cfg[i]; 893 switch (i) { 894 default: 895 break; 896 /* 897 * Set MLX5_IPOOL_MLX5_FLOW ipool size 898 * according to PCI function flow configuration. 899 */ 900 case MLX5_IPOOL_MLX5_FLOW: 901 cfg.size = sh->config.dv_flow_en ? 902 sizeof(struct mlx5_flow_handle) : 903 MLX5_FLOW_HANDLE_VERBS_SIZE; 904 break; 905 } 906 if (sh->config.reclaim_mode) { 907 cfg.release_mem_en = 1; 908 cfg.per_core_cache = 0; 909 } else { 910 cfg.release_mem_en = 0; 911 } 912 sh->ipool[i] = mlx5_ipool_create(&cfg); 913 } 914 } 915 916 917 /** 918 * Release the flow resources' indexed mempool. 919 * 920 * @param[in] sh 921 * Pointer to mlx5_dev_ctx_shared object. 922 */ 923 static void 924 mlx5_flow_ipool_destroy(struct mlx5_dev_ctx_shared *sh) 925 { 926 uint8_t i; 927 928 for (i = 0; i < MLX5_IPOOL_MAX; ++i) 929 mlx5_ipool_destroy(sh->ipool[i]); 930 for (i = 0; i < MLX5_MAX_MODIFY_NUM; ++i) 931 if (sh->mdh_ipools[i]) 932 mlx5_ipool_destroy(sh->mdh_ipools[i]); 933 } 934 935 /* 936 * Check if dynamic flex parser for eCPRI already exists. 937 * 938 * @param dev 939 * Pointer to Ethernet device structure. 940 * 941 * @return 942 * true on exists, false on not. 943 */ 944 bool 945 mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev) 946 { 947 struct mlx5_priv *priv = dev->data->dev_private; 948 struct mlx5_ecpri_parser_profile *prf = &priv->sh->ecpri_parser; 949 950 return !!prf->obj; 951 } 952 953 /* 954 * Allocation of a flex parser for eCPRI. Once created, this parser related 955 * resources will be held until the device is closed. 956 * 957 * @param dev 958 * Pointer to Ethernet device structure. 959 * 960 * @return 961 * 0 on success, a negative errno value otherwise and rte_errno is set. 962 */ 963 int 964 mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev) 965 { 966 struct mlx5_priv *priv = dev->data->dev_private; 967 struct mlx5_ecpri_parser_profile *prf = &priv->sh->ecpri_parser; 968 struct mlx5_devx_graph_node_attr node = { 969 .modify_field_select = 0, 970 }; 971 uint32_t ids[8]; 972 int ret; 973 974 if (!priv->sh->cdev->config.hca_attr.parse_graph_flex_node) { 975 DRV_LOG(ERR, "Dynamic flex parser is not supported " 976 "for device %s.", priv->dev_data->name); 977 return -ENOTSUP; 978 } 979 node.header_length_mode = MLX5_GRAPH_NODE_LEN_FIXED; 980 /* 8 bytes now: 4B common header + 4B message body header. */ 981 node.header_length_base_value = 0x8; 982 /* After MAC layer: Ether / VLAN. */ 983 node.in[0].arc_parse_graph_node = MLX5_GRAPH_ARC_NODE_MAC; 984 /* Type of compared condition should be 0xAEFE in the L2 layer. */ 985 node.in[0].compare_condition_value = RTE_ETHER_TYPE_ECPRI; 986 /* Sample #0: type in common header. */ 987 node.sample[0].flow_match_sample_en = 1; 988 /* Fixed offset. */ 989 node.sample[0].flow_match_sample_offset_mode = 0x0; 990 /* Only the 2nd byte will be used. */ 991 node.sample[0].flow_match_sample_field_base_offset = 0x0; 992 /* Sample #1: message payload. */ 993 node.sample[1].flow_match_sample_en = 1; 994 /* Fixed offset. */ 995 node.sample[1].flow_match_sample_offset_mode = 0x0; 996 /* 997 * Only the first two bytes will be used right now, and its offset will 998 * start after the common header that with the length of a DW(u32). 999 */ 1000 node.sample[1].flow_match_sample_field_base_offset = sizeof(uint32_t); 1001 prf->obj = mlx5_devx_cmd_create_flex_parser(priv->sh->cdev->ctx, &node); 1002 if (!prf->obj) { 1003 DRV_LOG(ERR, "Failed to create flex parser node object."); 1004 return (rte_errno == 0) ? -ENODEV : -rte_errno; 1005 } 1006 prf->num = 2; 1007 ret = mlx5_devx_cmd_query_parse_samples(prf->obj, ids, prf->num); 1008 if (ret) { 1009 DRV_LOG(ERR, "Failed to query sample IDs."); 1010 return (rte_errno == 0) ? -ENODEV : -rte_errno; 1011 } 1012 prf->offset[0] = 0x0; 1013 prf->offset[1] = sizeof(uint32_t); 1014 prf->ids[0] = ids[0]; 1015 prf->ids[1] = ids[1]; 1016 return 0; 1017 } 1018 1019 /* 1020 * Destroy the flex parser node, including the parser itself, input / output 1021 * arcs and DW samples. Resources could be reused then. 1022 * 1023 * @param dev 1024 * Pointer to Ethernet device structure. 1025 */ 1026 static void 1027 mlx5_flex_parser_ecpri_release(struct rte_eth_dev *dev) 1028 { 1029 struct mlx5_priv *priv = dev->data->dev_private; 1030 struct mlx5_ecpri_parser_profile *prf = &priv->sh->ecpri_parser; 1031 1032 if (prf->obj) 1033 mlx5_devx_cmd_destroy(prf->obj); 1034 prf->obj = NULL; 1035 } 1036 1037 uint32_t 1038 mlx5_get_supported_sw_parsing_offloads(const struct mlx5_hca_attr *attr) 1039 { 1040 uint32_t sw_parsing_offloads = 0; 1041 1042 if (attr->swp) { 1043 sw_parsing_offloads |= MLX5_SW_PARSING_CAP; 1044 if (attr->swp_csum) 1045 sw_parsing_offloads |= MLX5_SW_PARSING_CSUM_CAP; 1046 1047 if (attr->swp_lso) 1048 sw_parsing_offloads |= MLX5_SW_PARSING_TSO_CAP; 1049 } 1050 return sw_parsing_offloads; 1051 } 1052 1053 uint32_t 1054 mlx5_get_supported_tunneling_offloads(const struct mlx5_hca_attr *attr) 1055 { 1056 uint32_t tn_offloads = 0; 1057 1058 if (attr->tunnel_stateless_vxlan) 1059 tn_offloads |= MLX5_TUNNELED_OFFLOADS_VXLAN_CAP; 1060 if (attr->tunnel_stateless_gre) 1061 tn_offloads |= MLX5_TUNNELED_OFFLOADS_GRE_CAP; 1062 if (attr->tunnel_stateless_geneve_rx) 1063 tn_offloads |= MLX5_TUNNELED_OFFLOADS_GENEVE_CAP; 1064 return tn_offloads; 1065 } 1066 1067 /* Fill all fields of UAR structure. */ 1068 static int 1069 mlx5_rxtx_uars_prepare(struct mlx5_dev_ctx_shared *sh) 1070 { 1071 int ret; 1072 1073 ret = mlx5_devx_uar_prepare(sh->cdev, &sh->tx_uar); 1074 if (ret) { 1075 DRV_LOG(ERR, "Failed to prepare Tx DevX UAR."); 1076 return -rte_errno; 1077 } 1078 MLX5_ASSERT(sh->tx_uar.obj); 1079 MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->tx_uar.obj)); 1080 ret = mlx5_devx_uar_prepare(sh->cdev, &sh->rx_uar); 1081 if (ret) { 1082 DRV_LOG(ERR, "Failed to prepare Rx DevX UAR."); 1083 mlx5_devx_uar_release(&sh->tx_uar); 1084 return -rte_errno; 1085 } 1086 MLX5_ASSERT(sh->rx_uar.obj); 1087 MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->rx_uar.obj)); 1088 return 0; 1089 } 1090 1091 static void 1092 mlx5_rxtx_uars_release(struct mlx5_dev_ctx_shared *sh) 1093 { 1094 mlx5_devx_uar_release(&sh->rx_uar); 1095 mlx5_devx_uar_release(&sh->tx_uar); 1096 } 1097 1098 /** 1099 * rte_mempool_walk() callback to unregister Rx mempools. 1100 * It used when implicit mempool registration is disabled. 1101 * 1102 * @param mp 1103 * The mempool being walked. 1104 * @param arg 1105 * Pointer to the device shared context. 1106 */ 1107 static void 1108 mlx5_dev_ctx_shared_rx_mempool_unregister_cb(struct rte_mempool *mp, void *arg) 1109 { 1110 struct mlx5_dev_ctx_shared *sh = arg; 1111 1112 mlx5_dev_mempool_unregister(sh->cdev, mp); 1113 } 1114 1115 /** 1116 * Callback used when implicit mempool registration is disabled 1117 * in order to track Rx mempool destruction. 1118 * 1119 * @param event 1120 * Mempool life cycle event. 1121 * @param mp 1122 * An Rx mempool registered explicitly when the port is started. 1123 * @param arg 1124 * Pointer to a device shared context. 1125 */ 1126 static void 1127 mlx5_dev_ctx_shared_rx_mempool_event_cb(enum rte_mempool_event event, 1128 struct rte_mempool *mp, void *arg) 1129 { 1130 struct mlx5_dev_ctx_shared *sh = arg; 1131 1132 if (event == RTE_MEMPOOL_EVENT_DESTROY) 1133 mlx5_dev_mempool_unregister(sh->cdev, mp); 1134 } 1135 1136 int 1137 mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev) 1138 { 1139 struct mlx5_priv *priv = dev->data->dev_private; 1140 struct mlx5_dev_ctx_shared *sh = priv->sh; 1141 int ret; 1142 1143 /* Check if we only need to track Rx mempool destruction. */ 1144 if (!sh->cdev->config.mr_mempool_reg_en) { 1145 ret = rte_mempool_event_callback_register 1146 (mlx5_dev_ctx_shared_rx_mempool_event_cb, sh); 1147 return ret == 0 || rte_errno == EEXIST ? 0 : ret; 1148 } 1149 return mlx5_dev_mempool_subscribe(sh->cdev); 1150 } 1151 1152 /** 1153 * Set up multiple TISs with different affinities according to 1154 * number of bonding ports 1155 * 1156 * @param priv 1157 * Pointer of shared context. 1158 * 1159 * @return 1160 * Zero on success, -1 otherwise. 1161 */ 1162 static int 1163 mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh) 1164 { 1165 int i; 1166 struct mlx5_devx_lag_context lag_ctx = { 0 }; 1167 struct mlx5_devx_tis_attr tis_attr = { 0 }; 1168 1169 tis_attr.transport_domain = sh->td->id; 1170 if (sh->bond.n_port) { 1171 if (!mlx5_devx_cmd_query_lag(sh->cdev->ctx, &lag_ctx)) { 1172 sh->lag.tx_remap_affinity[0] = 1173 lag_ctx.tx_remap_affinity_1; 1174 sh->lag.tx_remap_affinity[1] = 1175 lag_ctx.tx_remap_affinity_2; 1176 sh->lag.affinity_mode = lag_ctx.port_select_mode; 1177 } else { 1178 DRV_LOG(ERR, "Failed to query lag affinity."); 1179 return -1; 1180 } 1181 if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) { 1182 for (i = 0; i < sh->bond.n_port; i++) { 1183 tis_attr.lag_tx_port_affinity = 1184 MLX5_IFC_LAG_MAP_TIS_AFFINITY(i, 1185 sh->bond.n_port); 1186 sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, 1187 &tis_attr); 1188 if (!sh->tis[i]) { 1189 DRV_LOG(ERR, "Failed to TIS %d/%d for bonding device" 1190 " %s.", i, sh->bond.n_port, 1191 sh->ibdev_name); 1192 return -1; 1193 } 1194 } 1195 DRV_LOG(DEBUG, "LAG number of ports : %d, affinity_1 & 2 : pf%d & %d.\n", 1196 sh->bond.n_port, lag_ctx.tx_remap_affinity_1, 1197 lag_ctx.tx_remap_affinity_2); 1198 return 0; 1199 } 1200 if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH) 1201 DRV_LOG(INFO, "Device %s enabled HW hash based LAG.", 1202 sh->ibdev_name); 1203 } 1204 tis_attr.lag_tx_port_affinity = 0; 1205 sh->tis[0] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr); 1206 if (!sh->tis[0]) { 1207 DRV_LOG(ERR, "Failed to TIS 0 for bonding device" 1208 " %s.", sh->ibdev_name); 1209 return -1; 1210 } 1211 return 0; 1212 } 1213 1214 /** 1215 * Verify and store value for share device argument. 1216 * 1217 * @param[in] key 1218 * Key argument to verify. 1219 * @param[in] val 1220 * Value associated with key. 1221 * @param opaque 1222 * User data. 1223 * 1224 * @return 1225 * 0 on success, a negative errno value otherwise and rte_errno is set. 1226 */ 1227 static int 1228 mlx5_dev_args_check_handler(const char *key, const char *val, void *opaque) 1229 { 1230 struct mlx5_sh_config *config = opaque; 1231 signed long tmp; 1232 1233 errno = 0; 1234 tmp = strtol(val, NULL, 0); 1235 if (errno) { 1236 rte_errno = errno; 1237 DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val); 1238 return -rte_errno; 1239 } 1240 if (tmp < 0 && strcmp(MLX5_TX_PP, key) && strcmp(MLX5_TX_SKEW, key)) { 1241 /* Negative values are acceptable for some keys only. */ 1242 rte_errno = EINVAL; 1243 DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val); 1244 return -rte_errno; 1245 } 1246 if (strcmp(MLX5_TX_PP, key) == 0) { 1247 unsigned long mod = tmp >= 0 ? tmp : -tmp; 1248 1249 if (!mod) { 1250 DRV_LOG(ERR, "Zero Tx packet pacing parameter."); 1251 rte_errno = EINVAL; 1252 return -rte_errno; 1253 } 1254 config->tx_pp = tmp; 1255 } else if (strcmp(MLX5_TX_SKEW, key) == 0) { 1256 config->tx_skew = tmp; 1257 } else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) { 1258 config->l3_vxlan_en = !!tmp; 1259 } else if (strcmp(MLX5_VF_NL_EN, key) == 0) { 1260 config->vf_nl_en = !!tmp; 1261 } else if (strcmp(MLX5_DV_ESW_EN, key) == 0) { 1262 config->dv_esw_en = !!tmp; 1263 } else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) { 1264 if (tmp > 2) { 1265 DRV_LOG(ERR, "Invalid %s parameter.", key); 1266 rte_errno = EINVAL; 1267 return -rte_errno; 1268 } 1269 config->dv_flow_en = tmp; 1270 } else if (strcmp(MLX5_DV_XMETA_EN, key) == 0) { 1271 if (tmp != MLX5_XMETA_MODE_LEGACY && 1272 tmp != MLX5_XMETA_MODE_META16 && 1273 tmp != MLX5_XMETA_MODE_META32 && 1274 tmp != MLX5_XMETA_MODE_MISS_INFO && 1275 tmp != MLX5_XMETA_MODE_META32_HWS) { 1276 DRV_LOG(ERR, "Invalid extensive metadata parameter."); 1277 rte_errno = EINVAL; 1278 return -rte_errno; 1279 } 1280 if (tmp != MLX5_XMETA_MODE_MISS_INFO) 1281 config->dv_xmeta_en = tmp; 1282 else 1283 config->dv_miss_info = 1; 1284 } else if (strcmp(MLX5_LACP_BY_USER, key) == 0) { 1285 config->lacp_by_user = !!tmp; 1286 } else if (strcmp(MLX5_RECLAIM_MEM, key) == 0) { 1287 if (tmp != MLX5_RCM_NONE && 1288 tmp != MLX5_RCM_LIGHT && 1289 tmp != MLX5_RCM_AGGR) { 1290 DRV_LOG(ERR, "Unrecognize %s: \"%s\"", key, val); 1291 rte_errno = EINVAL; 1292 return -rte_errno; 1293 } 1294 config->reclaim_mode = tmp; 1295 } else if (strcmp(MLX5_DECAP_EN, key) == 0) { 1296 config->decap_en = !!tmp; 1297 } else if (strcmp(MLX5_ALLOW_DUPLICATE_PATTERN, key) == 0) { 1298 config->allow_duplicate_pattern = !!tmp; 1299 } else if (strcmp(MLX5_FDB_DEFAULT_RULE_EN, key) == 0) { 1300 config->fdb_def_rule = !!tmp; 1301 } else if (strcmp(MLX5_HWS_CNT_SERVICE_CORE, key) == 0) { 1302 config->cnt_svc.service_core = tmp; 1303 } else if (strcmp(MLX5_HWS_CNT_CYCLE_TIME, key) == 0) { 1304 config->cnt_svc.cycle_time = tmp; 1305 } else if (strcmp(MLX5_REPR_MATCHING_EN, key) == 0) { 1306 config->repr_matching = !!tmp; 1307 } 1308 return 0; 1309 } 1310 1311 /** 1312 * Parse user device parameters and adjust them according to device 1313 * capabilities. 1314 * 1315 * @param sh 1316 * Pointer to shared device context. 1317 * @param mkvlist 1318 * Pointer to mlx5 kvargs control, can be NULL if there is no devargs. 1319 * @param config 1320 * Pointer to shared device configuration structure. 1321 * 1322 * @return 1323 * 0 on success, a negative errno value otherwise and rte_errno is set. 1324 */ 1325 static int 1326 mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh, 1327 struct mlx5_kvargs_ctrl *mkvlist, 1328 struct mlx5_sh_config *config) 1329 { 1330 const char **params = (const char *[]){ 1331 MLX5_TX_PP, 1332 MLX5_TX_SKEW, 1333 MLX5_L3_VXLAN_EN, 1334 MLX5_VF_NL_EN, 1335 MLX5_DV_ESW_EN, 1336 MLX5_DV_FLOW_EN, 1337 MLX5_DV_XMETA_EN, 1338 MLX5_LACP_BY_USER, 1339 MLX5_RECLAIM_MEM, 1340 MLX5_DECAP_EN, 1341 MLX5_ALLOW_DUPLICATE_PATTERN, 1342 MLX5_FDB_DEFAULT_RULE_EN, 1343 MLX5_HWS_CNT_SERVICE_CORE, 1344 MLX5_HWS_CNT_CYCLE_TIME, 1345 MLX5_REPR_MATCHING_EN, 1346 NULL, 1347 }; 1348 int ret = 0; 1349 1350 /* Default configuration. */ 1351 memset(config, 0, sizeof(*config)); 1352 config->vf_nl_en = 1; 1353 config->dv_esw_en = 1; 1354 config->dv_flow_en = 1; 1355 config->decap_en = 1; 1356 config->allow_duplicate_pattern = 1; 1357 config->fdb_def_rule = 1; 1358 config->cnt_svc.cycle_time = MLX5_CNT_SVC_CYCLE_TIME_DEFAULT; 1359 config->cnt_svc.service_core = rte_get_main_lcore(); 1360 config->repr_matching = 1; 1361 if (mkvlist != NULL) { 1362 /* Process parameters. */ 1363 ret = mlx5_kvargs_process(mkvlist, params, 1364 mlx5_dev_args_check_handler, config); 1365 if (ret) { 1366 DRV_LOG(ERR, "Failed to process device arguments: %s", 1367 strerror(rte_errno)); 1368 return -rte_errno; 1369 } 1370 } 1371 /* Adjust parameters according to device capabilities. */ 1372 if (config->dv_flow_en && !sh->dev_cap.dv_flow_en) { 1373 DRV_LOG(WARNING, "DV flow is not supported."); 1374 config->dv_flow_en = 0; 1375 } 1376 if (config->dv_esw_en && !sh->dev_cap.dv_esw_en) { 1377 DRV_LOG(DEBUG, "E-Switch DV flow is not supported."); 1378 config->dv_esw_en = 0; 1379 } 1380 if (config->dv_esw_en && !config->dv_flow_en) { 1381 DRV_LOG(DEBUG, 1382 "E-Switch DV flow is supported only when DV flow is enabled."); 1383 config->dv_esw_en = 0; 1384 } 1385 if (config->dv_miss_info && config->dv_esw_en) 1386 config->dv_xmeta_en = MLX5_XMETA_MODE_META16; 1387 if (!config->dv_esw_en && 1388 config->dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { 1389 DRV_LOG(WARNING, 1390 "Metadata mode %u is not supported (no E-Switch).", 1391 config->dv_xmeta_en); 1392 config->dv_xmeta_en = MLX5_XMETA_MODE_LEGACY; 1393 } 1394 if (config->dv_flow_en != 2 && !config->repr_matching) { 1395 DRV_LOG(DEBUG, "Disabling representor matching is valid only " 1396 "when HW Steering is enabled."); 1397 config->repr_matching = 1; 1398 } 1399 if (config->tx_pp && !sh->dev_cap.txpp_en) { 1400 DRV_LOG(ERR, "Packet pacing is not supported."); 1401 rte_errno = ENODEV; 1402 return -rte_errno; 1403 } 1404 if (!config->tx_pp && config->tx_skew) { 1405 DRV_LOG(WARNING, 1406 "\"tx_skew\" doesn't affect without \"tx_pp\"."); 1407 } 1408 /* Check for LRO support. */ 1409 if (mlx5_devx_obj_ops_en(sh) && sh->cdev->config.hca_attr.lro_cap) { 1410 /* TBD check tunnel lro caps. */ 1411 config->lro_allowed = 1; 1412 DRV_LOG(DEBUG, "LRO is allowed."); 1413 DRV_LOG(DEBUG, 1414 "LRO minimal size of TCP segment required for coalescing is %d bytes.", 1415 sh->cdev->config.hca_attr.lro_min_mss_size); 1416 } 1417 /* 1418 * If HW has bug working with tunnel packet decapsulation and scatter 1419 * FCS, and decapsulation is needed, clear the hw_fcs_strip bit. 1420 * Then RTE_ETH_RX_OFFLOAD_KEEP_CRC bit will not be set anymore. 1421 */ 1422 if (sh->dev_cap.scatter_fcs_w_decap_disable && sh->config.decap_en) 1423 config->hw_fcs_strip = 0; 1424 else 1425 config->hw_fcs_strip = sh->dev_cap.hw_fcs_strip; 1426 DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported", 1427 (config->hw_fcs_strip ? "" : "not ")); 1428 DRV_LOG(DEBUG, "\"tx_pp\" is %d.", config->tx_pp); 1429 DRV_LOG(DEBUG, "\"tx_skew\" is %d.", config->tx_skew); 1430 DRV_LOG(DEBUG, "\"reclaim_mode\" is %u.", config->reclaim_mode); 1431 DRV_LOG(DEBUG, "\"dv_esw_en\" is %u.", config->dv_esw_en); 1432 DRV_LOG(DEBUG, "\"dv_flow_en\" is %u.", config->dv_flow_en); 1433 DRV_LOG(DEBUG, "\"dv_xmeta_en\" is %u.", config->dv_xmeta_en); 1434 DRV_LOG(DEBUG, "\"dv_miss_info\" is %u.", config->dv_miss_info); 1435 DRV_LOG(DEBUG, "\"l3_vxlan_en\" is %u.", config->l3_vxlan_en); 1436 DRV_LOG(DEBUG, "\"vf_nl_en\" is %u.", config->vf_nl_en); 1437 DRV_LOG(DEBUG, "\"lacp_by_user\" is %u.", config->lacp_by_user); 1438 DRV_LOG(DEBUG, "\"decap_en\" is %u.", config->decap_en); 1439 DRV_LOG(DEBUG, "\"allow_duplicate_pattern\" is %u.", 1440 config->allow_duplicate_pattern); 1441 DRV_LOG(DEBUG, "\"fdb_def_rule_en\" is %u.", config->fdb_def_rule); 1442 DRV_LOG(DEBUG, "\"repr_matching_en\" is %u.", config->repr_matching); 1443 return 0; 1444 } 1445 1446 /** 1447 * Configure realtime timestamp format. 1448 * 1449 * @param sh 1450 * Pointer to mlx5_dev_ctx_shared object. 1451 * @param hca_attr 1452 * Pointer to DevX HCA capabilities structure. 1453 */ 1454 void 1455 mlx5_rt_timestamp_config(struct mlx5_dev_ctx_shared *sh, 1456 struct mlx5_hca_attr *hca_attr) 1457 { 1458 uint32_t dw_cnt = MLX5_ST_SZ_DW(register_mtutc); 1459 uint32_t reg[dw_cnt]; 1460 int ret = ENOTSUP; 1461 1462 if (hca_attr->access_register_user) 1463 ret = mlx5_devx_cmd_register_read(sh->cdev->ctx, 1464 MLX5_REGISTER_ID_MTUTC, 0, 1465 reg, dw_cnt); 1466 if (!ret) { 1467 uint32_t ts_mode; 1468 1469 /* MTUTC register is read successfully. */ 1470 ts_mode = MLX5_GET(register_mtutc, reg, time_stamp_mode); 1471 if (ts_mode == MLX5_MTUTC_TIMESTAMP_MODE_REAL_TIME) 1472 sh->dev_cap.rt_timestamp = 1; 1473 } else { 1474 /* Kernel does not support register reading. */ 1475 if (hca_attr->dev_freq_khz == (NS_PER_S / MS_PER_S)) 1476 sh->dev_cap.rt_timestamp = 1; 1477 } 1478 } 1479 1480 /** 1481 * Allocate shared device context. If there is multiport device the 1482 * master and representors will share this context, if there is single 1483 * port dedicated device, the context will be used by only given 1484 * port due to unification. 1485 * 1486 * Routine first searches the context for the specified device name, 1487 * if found the shared context assumed and reference counter is incremented. 1488 * If no context found the new one is created and initialized with specified 1489 * device context and parameters. 1490 * 1491 * @param[in] spawn 1492 * Pointer to the device attributes (name, port, etc). 1493 * @param mkvlist 1494 * Pointer to mlx5 kvargs control, can be NULL if there is no devargs. 1495 * 1496 * @return 1497 * Pointer to mlx5_dev_ctx_shared object on success, 1498 * otherwise NULL and rte_errno is set. 1499 */ 1500 struct mlx5_dev_ctx_shared * 1501 mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, 1502 struct mlx5_kvargs_ctrl *mkvlist) 1503 { 1504 struct mlx5_dev_ctx_shared *sh; 1505 int err = 0; 1506 uint32_t i; 1507 1508 MLX5_ASSERT(spawn); 1509 /* Secondary process should not create the shared context. */ 1510 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 1511 pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); 1512 /* Search for IB context by device name. */ 1513 LIST_FOREACH(sh, &mlx5_dev_ctx_list, next) { 1514 if (!strcmp(sh->ibdev_name, spawn->phys_dev_name)) { 1515 sh->refcnt++; 1516 goto exit; 1517 } 1518 } 1519 /* No device found, we have to create new shared context. */ 1520 MLX5_ASSERT(spawn->max_port); 1521 sh = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, 1522 sizeof(struct mlx5_dev_ctx_shared) + 1523 spawn->max_port * sizeof(struct mlx5_dev_shared_port), 1524 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 1525 if (!sh) { 1526 DRV_LOG(ERR, "Shared context allocation failure."); 1527 rte_errno = ENOMEM; 1528 goto exit; 1529 } 1530 pthread_mutex_init(&sh->txpp.mutex, NULL); 1531 sh->numa_node = spawn->cdev->dev->numa_node; 1532 sh->cdev = spawn->cdev; 1533 sh->esw_mode = !!(spawn->info.master || spawn->info.representor); 1534 if (spawn->bond_info) 1535 sh->bond = *spawn->bond_info; 1536 err = mlx5_os_capabilities_prepare(sh); 1537 if (err) { 1538 DRV_LOG(ERR, "Fail to configure device capabilities."); 1539 goto error; 1540 } 1541 err = mlx5_shared_dev_ctx_args_config(sh, mkvlist, &sh->config); 1542 if (err) { 1543 DRV_LOG(ERR, "Failed to process device configure: %s", 1544 strerror(rte_errno)); 1545 goto error; 1546 } 1547 sh->refcnt = 1; 1548 sh->max_port = spawn->max_port; 1549 strncpy(sh->ibdev_name, mlx5_os_get_ctx_device_name(sh->cdev->ctx), 1550 sizeof(sh->ibdev_name) - 1); 1551 strncpy(sh->ibdev_path, mlx5_os_get_ctx_device_path(sh->cdev->ctx), 1552 sizeof(sh->ibdev_path) - 1); 1553 /* 1554 * Setting port_id to max unallowed value means there is no interrupt 1555 * subhandler installed for the given port index i. 1556 */ 1557 for (i = 0; i < sh->max_port; i++) { 1558 sh->port[i].ih_port_id = RTE_MAX_ETHPORTS; 1559 sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS; 1560 sh->port[i].nl_ih_port_id = RTE_MAX_ETHPORTS; 1561 } 1562 if (sh->cdev->config.devx) { 1563 sh->td = mlx5_devx_cmd_create_td(sh->cdev->ctx); 1564 if (!sh->td) { 1565 DRV_LOG(ERR, "TD allocation failure"); 1566 rte_errno = ENOMEM; 1567 goto error; 1568 } 1569 if (mlx5_setup_tis(sh)) { 1570 DRV_LOG(ERR, "TIS allocation failure"); 1571 rte_errno = ENOMEM; 1572 goto error; 1573 } 1574 err = mlx5_rxtx_uars_prepare(sh); 1575 if (err) 1576 goto error; 1577 #ifndef RTE_ARCH_64 1578 } else { 1579 /* Initialize UAR access locks for 32bit implementations. */ 1580 rte_spinlock_init(&sh->uar_lock_cq); 1581 for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++) 1582 rte_spinlock_init(&sh->uar_lock[i]); 1583 #endif 1584 } 1585 mlx5_os_dev_shared_handler_install(sh); 1586 if (LIST_EMPTY(&mlx5_dev_ctx_list)) { 1587 err = mlx5_flow_os_init_workspace_once(); 1588 if (err) 1589 goto error; 1590 } 1591 err = mlx5_flow_counters_mng_init(sh); 1592 if (err) { 1593 DRV_LOG(ERR, "Fail to initialize counters manage."); 1594 goto error; 1595 } 1596 mlx5_flow_aging_init(sh); 1597 mlx5_flow_ipool_create(sh); 1598 /* Add context to the global device list. */ 1599 LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next); 1600 rte_spinlock_init(&sh->geneve_tlv_opt_sl); 1601 exit: 1602 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1603 return sh; 1604 error: 1605 err = rte_errno; 1606 pthread_mutex_destroy(&sh->txpp.mutex); 1607 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1608 MLX5_ASSERT(sh); 1609 mlx5_rxtx_uars_release(sh); 1610 i = 0; 1611 do { 1612 if (sh->tis[i]) 1613 claim_zero(mlx5_devx_cmd_destroy(sh->tis[i])); 1614 } while (++i < (uint32_t)sh->bond.n_port); 1615 if (sh->td) 1616 claim_zero(mlx5_devx_cmd_destroy(sh->td)); 1617 mlx5_free(sh); 1618 rte_errno = err; 1619 return NULL; 1620 } 1621 1622 /** 1623 * Create LWM event_channel and interrupt handle for shared device 1624 * context. All rxqs sharing the device context share the event_channel. 1625 * A callback is registered in interrupt thread to receive the LWM event. 1626 * 1627 * @param[in] priv 1628 * Pointer to mlx5_priv instance. 1629 * 1630 * @return 1631 * 0 on success, negative with rte_errno set. 1632 */ 1633 int 1634 mlx5_lwm_setup(struct mlx5_priv *priv) 1635 { 1636 int fd_lwm; 1637 1638 pthread_mutex_init(&priv->sh->lwm_config_lock, NULL); 1639 priv->sh->devx_channel_lwm = mlx5_os_devx_create_event_channel 1640 (priv->sh->cdev->ctx, 1641 MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA); 1642 if (!priv->sh->devx_channel_lwm) 1643 goto err; 1644 fd_lwm = mlx5_os_get_devx_channel_fd(priv->sh->devx_channel_lwm); 1645 priv->sh->intr_handle_lwm = mlx5_os_interrupt_handler_create 1646 (RTE_INTR_INSTANCE_F_SHARED, true, 1647 fd_lwm, mlx5_dev_interrupt_handler_lwm, priv); 1648 if (!priv->sh->intr_handle_lwm) 1649 goto err; 1650 return 0; 1651 err: 1652 if (priv->sh->devx_channel_lwm) { 1653 mlx5_os_devx_destroy_event_channel 1654 (priv->sh->devx_channel_lwm); 1655 priv->sh->devx_channel_lwm = NULL; 1656 } 1657 pthread_mutex_destroy(&priv->sh->lwm_config_lock); 1658 return -rte_errno; 1659 } 1660 1661 /** 1662 * Destroy LWM event_channel and interrupt handle for shared device 1663 * context before free this context. The interrupt handler is also 1664 * unregistered. 1665 * 1666 * @param[in] sh 1667 * Pointer to shared device context. 1668 */ 1669 void 1670 mlx5_lwm_unset(struct mlx5_dev_ctx_shared *sh) 1671 { 1672 if (sh->intr_handle_lwm) { 1673 mlx5_os_interrupt_handler_destroy(sh->intr_handle_lwm, 1674 mlx5_dev_interrupt_handler_lwm, (void *)-1); 1675 sh->intr_handle_lwm = NULL; 1676 } 1677 if (sh->devx_channel_lwm) { 1678 mlx5_os_devx_destroy_event_channel 1679 (sh->devx_channel_lwm); 1680 sh->devx_channel_lwm = NULL; 1681 } 1682 pthread_mutex_destroy(&sh->lwm_config_lock); 1683 } 1684 1685 /** 1686 * Free shared IB device context. Decrement counter and if zero free 1687 * all allocated resources and close handles. 1688 * 1689 * @param[in] sh 1690 * Pointer to mlx5_dev_ctx_shared object to free 1691 */ 1692 void 1693 mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh) 1694 { 1695 int ret; 1696 int i = 0; 1697 1698 pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); 1699 #ifdef RTE_LIBRTE_MLX5_DEBUG 1700 /* Check the object presence in the list. */ 1701 struct mlx5_dev_ctx_shared *lctx; 1702 1703 LIST_FOREACH(lctx, &mlx5_dev_ctx_list, next) 1704 if (lctx == sh) 1705 break; 1706 MLX5_ASSERT(lctx); 1707 if (lctx != sh) { 1708 DRV_LOG(ERR, "Freeing non-existing shared IB context"); 1709 goto exit; 1710 } 1711 #endif 1712 MLX5_ASSERT(sh); 1713 MLX5_ASSERT(sh->refcnt); 1714 /* Secondary process should not free the shared context. */ 1715 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 1716 if (--sh->refcnt) 1717 goto exit; 1718 /* Stop watching for mempool events and unregister all mempools. */ 1719 if (!sh->cdev->config.mr_mempool_reg_en) { 1720 ret = rte_mempool_event_callback_unregister 1721 (mlx5_dev_ctx_shared_rx_mempool_event_cb, sh); 1722 if (ret == 0) 1723 rte_mempool_walk 1724 (mlx5_dev_ctx_shared_rx_mempool_unregister_cb, sh); 1725 } 1726 /* Remove context from the global device list. */ 1727 LIST_REMOVE(sh, next); 1728 /* Release resources on the last device removal. */ 1729 if (LIST_EMPTY(&mlx5_dev_ctx_list)) { 1730 mlx5_os_net_cleanup(); 1731 mlx5_flow_os_release_workspace(); 1732 } 1733 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1734 if (sh->flex_parsers_dv) { 1735 mlx5_list_destroy(sh->flex_parsers_dv); 1736 sh->flex_parsers_dv = NULL; 1737 } 1738 /* 1739 * Ensure there is no async event handler installed. 1740 * Only primary process handles async device events. 1741 **/ 1742 mlx5_flow_counters_mng_close(sh); 1743 if (sh->ct_mng) 1744 mlx5_flow_aso_ct_mng_close(sh); 1745 if (sh->aso_age_mng) { 1746 mlx5_flow_aso_age_mng_close(sh); 1747 sh->aso_age_mng = NULL; 1748 } 1749 if (sh->mtrmng) 1750 mlx5_aso_flow_mtrs_mng_close(sh); 1751 mlx5_flow_ipool_destroy(sh); 1752 mlx5_os_dev_shared_handler_uninstall(sh); 1753 mlx5_rxtx_uars_release(sh); 1754 do { 1755 if (sh->tis[i]) 1756 claim_zero(mlx5_devx_cmd_destroy(sh->tis[i])); 1757 } while (++i < sh->bond.n_port); 1758 if (sh->td) 1759 claim_zero(mlx5_devx_cmd_destroy(sh->td)); 1760 #ifdef HAVE_MLX5_HWS_SUPPORT 1761 /* HWS manages geneve_tlv_option resource as global. */ 1762 if (sh->config.dv_flow_en == 2) 1763 flow_dev_geneve_tlv_option_resource_release(sh); 1764 else 1765 #endif 1766 MLX5_ASSERT(sh->geneve_tlv_option_resource == NULL); 1767 pthread_mutex_destroy(&sh->txpp.mutex); 1768 mlx5_lwm_unset(sh); 1769 mlx5_free(sh); 1770 return; 1771 exit: 1772 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1773 } 1774 1775 /** 1776 * Destroy table hash list. 1777 * 1778 * @param[in] priv 1779 * Pointer to the private device data structure. 1780 */ 1781 void 1782 mlx5_free_table_hash_list(struct mlx5_priv *priv) 1783 { 1784 struct mlx5_dev_ctx_shared *sh = priv->sh; 1785 struct mlx5_hlist **tbls = (priv->sh->config.dv_flow_en == 2) ? 1786 &sh->groups : &sh->flow_tbls; 1787 if (*tbls == NULL) 1788 return; 1789 mlx5_hlist_destroy(*tbls); 1790 *tbls = NULL; 1791 } 1792 1793 #ifdef HAVE_MLX5_HWS_SUPPORT 1794 /** 1795 * Allocate HW steering group hash list. 1796 * 1797 * @param[in] priv 1798 * Pointer to the private device data structure. 1799 */ 1800 static int 1801 mlx5_alloc_hw_group_hash_list(struct mlx5_priv *priv) 1802 { 1803 int err = 0; 1804 struct mlx5_dev_ctx_shared *sh = priv->sh; 1805 char s[MLX5_NAME_SIZE]; 1806 1807 MLX5_ASSERT(sh); 1808 snprintf(s, sizeof(s), "%s_flow_groups", priv->sh->ibdev_name); 1809 sh->groups = mlx5_hlist_create 1810 (s, MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE, 1811 false, true, sh, 1812 flow_hw_grp_create_cb, 1813 flow_hw_grp_match_cb, 1814 flow_hw_grp_remove_cb, 1815 flow_hw_grp_clone_cb, 1816 flow_hw_grp_clone_free_cb); 1817 if (!sh->groups) { 1818 DRV_LOG(ERR, "flow groups with hash creation failed."); 1819 err = ENOMEM; 1820 } 1821 return err; 1822 } 1823 #endif 1824 1825 1826 /** 1827 * Initialize flow table hash list and create the root tables entry 1828 * for each domain. 1829 * 1830 * @param[in] priv 1831 * Pointer to the private device data structure. 1832 * 1833 * @return 1834 * Zero on success, positive error code otherwise. 1835 */ 1836 int 1837 mlx5_alloc_table_hash_list(struct mlx5_priv *priv __rte_unused) 1838 { 1839 int err = 0; 1840 1841 /* Tables are only used in DV and DR modes. */ 1842 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H) 1843 struct mlx5_dev_ctx_shared *sh = priv->sh; 1844 char s[MLX5_NAME_SIZE]; 1845 1846 #ifdef HAVE_MLX5_HWS_SUPPORT 1847 if (priv->sh->config.dv_flow_en == 2) 1848 return mlx5_alloc_hw_group_hash_list(priv); 1849 #endif 1850 MLX5_ASSERT(sh); 1851 snprintf(s, sizeof(s), "%s_flow_table", priv->sh->ibdev_name); 1852 sh->flow_tbls = mlx5_hlist_create(s, MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE, 1853 false, true, sh, 1854 flow_dv_tbl_create_cb, 1855 flow_dv_tbl_match_cb, 1856 flow_dv_tbl_remove_cb, 1857 flow_dv_tbl_clone_cb, 1858 flow_dv_tbl_clone_free_cb); 1859 if (!sh->flow_tbls) { 1860 DRV_LOG(ERR, "flow tables with hash creation failed."); 1861 err = ENOMEM; 1862 return err; 1863 } 1864 #ifndef HAVE_MLX5DV_DR 1865 struct rte_flow_error error; 1866 struct rte_eth_dev *dev = &rte_eth_devices[priv->dev_data->port_id]; 1867 1868 /* 1869 * In case we have not DR support, the zero tables should be created 1870 * because DV expect to see them even if they cannot be created by 1871 * RDMA-CORE. 1872 */ 1873 if (!flow_dv_tbl_resource_get(dev, 0, 0, 0, 0, 1874 NULL, 0, 1, 0, &error) || 1875 !flow_dv_tbl_resource_get(dev, 0, 1, 0, 0, 1876 NULL, 0, 1, 0, &error) || 1877 !flow_dv_tbl_resource_get(dev, 0, 0, 1, 0, 1878 NULL, 0, 1, 0, &error)) { 1879 err = ENOMEM; 1880 goto error; 1881 } 1882 return err; 1883 error: 1884 mlx5_free_table_hash_list(priv); 1885 #endif /* HAVE_MLX5DV_DR */ 1886 #endif 1887 return err; 1888 } 1889 1890 /** 1891 * Retrieve integer value from environment variable. 1892 * 1893 * @param[in] name 1894 * Environment variable name. 1895 * 1896 * @return 1897 * Integer value, 0 if the variable is not set. 1898 */ 1899 int 1900 mlx5_getenv_int(const char *name) 1901 { 1902 const char *val = getenv(name); 1903 1904 if (val == NULL) 1905 return 0; 1906 return atoi(val); 1907 } 1908 1909 /** 1910 * DPDK callback to add udp tunnel port 1911 * 1912 * @param[in] dev 1913 * A pointer to eth_dev 1914 * @param[in] udp_tunnel 1915 * A pointer to udp tunnel 1916 * 1917 * @return 1918 * 0 on valid udp ports and tunnels, -ENOTSUP otherwise. 1919 */ 1920 int 1921 mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev __rte_unused, 1922 struct rte_eth_udp_tunnel *udp_tunnel) 1923 { 1924 MLX5_ASSERT(udp_tunnel != NULL); 1925 if (udp_tunnel->prot_type == RTE_ETH_TUNNEL_TYPE_VXLAN && 1926 udp_tunnel->udp_port == 4789) 1927 return 0; 1928 if (udp_tunnel->prot_type == RTE_ETH_TUNNEL_TYPE_VXLAN_GPE && 1929 udp_tunnel->udp_port == 4790) 1930 return 0; 1931 return -ENOTSUP; 1932 } 1933 1934 /** 1935 * Initialize process private data structure. 1936 * 1937 * @param dev 1938 * Pointer to Ethernet device structure. 1939 * 1940 * @return 1941 * 0 on success, a negative errno value otherwise and rte_errno is set. 1942 */ 1943 int 1944 mlx5_proc_priv_init(struct rte_eth_dev *dev) 1945 { 1946 struct mlx5_priv *priv = dev->data->dev_private; 1947 struct mlx5_proc_priv *ppriv; 1948 size_t ppriv_size; 1949 1950 mlx5_proc_priv_uninit(dev); 1951 /* 1952 * UAR register table follows the process private structure. BlueFlame 1953 * registers for Tx queues are stored in the table. 1954 */ 1955 ppriv_size = sizeof(struct mlx5_proc_priv) + 1956 priv->txqs_n * sizeof(struct mlx5_uar_data); 1957 ppriv = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, ppriv_size, 1958 RTE_CACHE_LINE_SIZE, dev->device->numa_node); 1959 if (!ppriv) { 1960 rte_errno = ENOMEM; 1961 return -rte_errno; 1962 } 1963 ppriv->uar_table_sz = priv->txqs_n; 1964 dev->process_private = ppriv; 1965 if (rte_eal_process_type() == RTE_PROC_PRIMARY) 1966 priv->sh->pppriv = ppriv; 1967 return 0; 1968 } 1969 1970 /** 1971 * Un-initialize process private data structure. 1972 * 1973 * @param dev 1974 * Pointer to Ethernet device structure. 1975 */ 1976 void 1977 mlx5_proc_priv_uninit(struct rte_eth_dev *dev) 1978 { 1979 if (!dev->process_private) 1980 return; 1981 mlx5_free(dev->process_private); 1982 dev->process_private = NULL; 1983 } 1984 1985 /** 1986 * DPDK callback to close the device. 1987 * 1988 * Destroy all queues and objects, free memory. 1989 * 1990 * @param dev 1991 * Pointer to Ethernet device structure. 1992 */ 1993 int 1994 mlx5_dev_close(struct rte_eth_dev *dev) 1995 { 1996 struct mlx5_priv *priv = dev->data->dev_private; 1997 unsigned int i; 1998 int ret; 1999 2000 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 2001 /* Check if process_private released. */ 2002 if (!dev->process_private) 2003 return 0; 2004 mlx5_tx_uar_uninit_secondary(dev); 2005 mlx5_proc_priv_uninit(dev); 2006 rte_eth_dev_release_port(dev); 2007 return 0; 2008 } 2009 if (!priv->sh) 2010 return 0; 2011 DRV_LOG(DEBUG, "port %u closing device \"%s\"", 2012 dev->data->port_id, 2013 ((priv->sh->cdev->ctx != NULL) ? 2014 mlx5_os_get_ctx_device_name(priv->sh->cdev->ctx) : "")); 2015 /* 2016 * If default mreg copy action is removed at the stop stage, 2017 * the search will return none and nothing will be done anymore. 2018 */ 2019 if (priv->sh->config.dv_flow_en != 2) 2020 mlx5_flow_stop_default(dev); 2021 mlx5_traffic_disable(dev); 2022 /* 2023 * If all the flows are already flushed in the device stop stage, 2024 * then this will return directly without any action. 2025 */ 2026 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true); 2027 mlx5_action_handle_flush(dev); 2028 mlx5_flow_meter_flush(dev, NULL); 2029 /* Prevent crashes when queues are still in use. */ 2030 dev->rx_pkt_burst = rte_eth_pkt_burst_dummy; 2031 dev->tx_pkt_burst = rte_eth_pkt_burst_dummy; 2032 rte_wmb(); 2033 /* Disable datapath on secondary process. */ 2034 mlx5_mp_os_req_stop_rxtx(dev); 2035 /* Free the eCPRI flex parser resource. */ 2036 mlx5_flex_parser_ecpri_release(dev); 2037 mlx5_flex_item_port_cleanup(dev); 2038 #ifdef HAVE_MLX5_HWS_SUPPORT 2039 flow_hw_destroy_vport_action(dev); 2040 flow_hw_resource_release(dev); 2041 flow_hw_clear_port_info(dev); 2042 if (priv->sh->config.dv_flow_en == 2) { 2043 flow_hw_clear_flow_metadata_config(); 2044 flow_hw_clear_tags_set(dev); 2045 } 2046 #endif 2047 if (priv->rxq_privs != NULL) { 2048 /* XXX race condition if mlx5_rx_burst() is still running. */ 2049 rte_delay_us_sleep(1000); 2050 for (i = 0; (i != priv->rxqs_n); ++i) 2051 mlx5_rxq_release(dev, i); 2052 priv->rxqs_n = 0; 2053 mlx5_free(priv->rxq_privs); 2054 priv->rxq_privs = NULL; 2055 } 2056 if (priv->txqs != NULL) { 2057 /* XXX race condition if mlx5_tx_burst() is still running. */ 2058 rte_delay_us_sleep(1000); 2059 for (i = 0; (i != priv->txqs_n); ++i) 2060 mlx5_txq_release(dev, i); 2061 priv->txqs_n = 0; 2062 priv->txqs = NULL; 2063 } 2064 mlx5_proc_priv_uninit(dev); 2065 if (priv->q_counters) { 2066 mlx5_devx_cmd_destroy(priv->q_counters); 2067 priv->q_counters = NULL; 2068 } 2069 if (priv->drop_queue.hrxq) 2070 mlx5_drop_action_destroy(dev); 2071 if (priv->mreg_cp_tbl) 2072 mlx5_hlist_destroy(priv->mreg_cp_tbl); 2073 mlx5_mprq_free_mp(dev); 2074 mlx5_os_free_shared_dr(priv); 2075 if (priv->rss_conf.rss_key != NULL) 2076 mlx5_free(priv->rss_conf.rss_key); 2077 if (priv->reta_idx != NULL) 2078 mlx5_free(priv->reta_idx); 2079 if (priv->sh->dev_cap.vf) 2080 mlx5_os_mac_addr_flush(dev); 2081 if (priv->nl_socket_route >= 0) 2082 close(priv->nl_socket_route); 2083 if (priv->nl_socket_rdma >= 0) 2084 close(priv->nl_socket_rdma); 2085 if (priv->vmwa_context) 2086 mlx5_vlan_vmwa_exit(priv->vmwa_context); 2087 ret = mlx5_hrxq_verify(dev); 2088 if (ret) 2089 DRV_LOG(WARNING, "port %u some hash Rx queue still remain", 2090 dev->data->port_id); 2091 ret = mlx5_ind_table_obj_verify(dev); 2092 if (ret) 2093 DRV_LOG(WARNING, "port %u some indirection table still remain", 2094 dev->data->port_id); 2095 ret = mlx5_rxq_obj_verify(dev); 2096 if (ret) 2097 DRV_LOG(WARNING, "port %u some Rx queue objects still remain", 2098 dev->data->port_id); 2099 ret = mlx5_ext_rxq_verify(dev); 2100 if (ret) 2101 DRV_LOG(WARNING, "Port %u some external RxQ still remain.", 2102 dev->data->port_id); 2103 ret = mlx5_rxq_verify(dev); 2104 if (ret) 2105 DRV_LOG(WARNING, "port %u some Rx queues still remain", 2106 dev->data->port_id); 2107 ret = mlx5_txq_obj_verify(dev); 2108 if (ret) 2109 DRV_LOG(WARNING, "port %u some Verbs Tx queue still remain", 2110 dev->data->port_id); 2111 ret = mlx5_txq_verify(dev); 2112 if (ret) 2113 DRV_LOG(WARNING, "port %u some Tx queues still remain", 2114 dev->data->port_id); 2115 ret = mlx5_flow_verify(dev); 2116 if (ret) 2117 DRV_LOG(WARNING, "port %u some flows still remain", 2118 dev->data->port_id); 2119 if (priv->hrxqs) 2120 mlx5_list_destroy(priv->hrxqs); 2121 mlx5_free(priv->ext_rxqs); 2122 priv->sh->port[priv->dev_port - 1].nl_ih_port_id = RTE_MAX_ETHPORTS; 2123 /* 2124 * The interrupt handler port id must be reset before priv is reset 2125 * since 'mlx5_dev_interrupt_nl_cb' uses priv. 2126 */ 2127 rte_io_wmb(); 2128 /* 2129 * Free the shared context in last turn, because the cleanup 2130 * routines above may use some shared fields, like 2131 * mlx5_os_mac_addr_flush() uses ibdev_path for retrieving 2132 * ifindex if Netlink fails. 2133 */ 2134 mlx5_free_shared_dev_ctx(priv->sh); 2135 if (priv->domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) { 2136 unsigned int c = 0; 2137 uint16_t port_id; 2138 2139 MLX5_ETH_FOREACH_DEV(port_id, dev->device) { 2140 struct mlx5_priv *opriv = 2141 rte_eth_devices[port_id].data->dev_private; 2142 2143 if (!opriv || 2144 opriv->domain_id != priv->domain_id || 2145 &rte_eth_devices[port_id] == dev) 2146 continue; 2147 ++c; 2148 break; 2149 } 2150 if (!c) 2151 claim_zero(rte_eth_switch_domain_free(priv->domain_id)); 2152 } 2153 memset(priv, 0, sizeof(*priv)); 2154 priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; 2155 /* 2156 * Reset mac_addrs to NULL such that it is not freed as part of 2157 * rte_eth_dev_release_port(). mac_addrs is part of dev_private so 2158 * it is freed when dev_private is freed. 2159 */ 2160 dev->data->mac_addrs = NULL; 2161 return 0; 2162 } 2163 2164 const struct eth_dev_ops mlx5_dev_ops = { 2165 .dev_configure = mlx5_dev_configure, 2166 .dev_start = mlx5_dev_start, 2167 .dev_stop = mlx5_dev_stop, 2168 .dev_set_link_down = mlx5_set_link_down, 2169 .dev_set_link_up = mlx5_set_link_up, 2170 .dev_close = mlx5_dev_close, 2171 .promiscuous_enable = mlx5_promiscuous_enable, 2172 .promiscuous_disable = mlx5_promiscuous_disable, 2173 .allmulticast_enable = mlx5_allmulticast_enable, 2174 .allmulticast_disable = mlx5_allmulticast_disable, 2175 .link_update = mlx5_link_update, 2176 .stats_get = mlx5_stats_get, 2177 .stats_reset = mlx5_stats_reset, 2178 .xstats_get = mlx5_xstats_get, 2179 .xstats_reset = mlx5_xstats_reset, 2180 .xstats_get_names = mlx5_xstats_get_names, 2181 .fw_version_get = mlx5_fw_version_get, 2182 .dev_infos_get = mlx5_dev_infos_get, 2183 .representor_info_get = mlx5_representor_info_get, 2184 .read_clock = mlx5_txpp_read_clock, 2185 .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, 2186 .vlan_filter_set = mlx5_vlan_filter_set, 2187 .rx_queue_setup = mlx5_rx_queue_setup, 2188 .rx_queue_avail_thresh_set = mlx5_rx_queue_lwm_set, 2189 .rx_queue_avail_thresh_query = mlx5_rx_queue_lwm_query, 2190 .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, 2191 .tx_queue_setup = mlx5_tx_queue_setup, 2192 .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, 2193 .rx_queue_release = mlx5_rx_queue_release, 2194 .tx_queue_release = mlx5_tx_queue_release, 2195 .rx_queue_start = mlx5_rx_queue_start, 2196 .rx_queue_stop = mlx5_rx_queue_stop, 2197 .tx_queue_start = mlx5_tx_queue_start, 2198 .tx_queue_stop = mlx5_tx_queue_stop, 2199 .flow_ctrl_get = mlx5_dev_get_flow_ctrl, 2200 .flow_ctrl_set = mlx5_dev_set_flow_ctrl, 2201 .mac_addr_remove = mlx5_mac_addr_remove, 2202 .mac_addr_add = mlx5_mac_addr_add, 2203 .mac_addr_set = mlx5_mac_addr_set, 2204 .set_mc_addr_list = mlx5_set_mc_addr_list, 2205 .mtu_set = mlx5_dev_set_mtu, 2206 .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, 2207 .vlan_offload_set = mlx5_vlan_offload_set, 2208 .reta_update = mlx5_dev_rss_reta_update, 2209 .reta_query = mlx5_dev_rss_reta_query, 2210 .rss_hash_update = mlx5_rss_hash_update, 2211 .rss_hash_conf_get = mlx5_rss_hash_conf_get, 2212 .flow_ops_get = mlx5_flow_ops_get, 2213 .rxq_info_get = mlx5_rxq_info_get, 2214 .txq_info_get = mlx5_txq_info_get, 2215 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 2216 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 2217 .rx_queue_intr_enable = mlx5_rx_intr_enable, 2218 .rx_queue_intr_disable = mlx5_rx_intr_disable, 2219 .is_removed = mlx5_is_removed, 2220 .udp_tunnel_port_add = mlx5_udp_tunnel_port_add, 2221 .get_module_info = mlx5_get_module_info, 2222 .get_module_eeprom = mlx5_get_module_eeprom, 2223 .hairpin_cap_get = mlx5_hairpin_cap_get, 2224 .mtr_ops_get = mlx5_flow_meter_ops_get, 2225 .hairpin_bind = mlx5_hairpin_bind, 2226 .hairpin_unbind = mlx5_hairpin_unbind, 2227 .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports, 2228 .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update, 2229 .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, 2230 .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, 2231 .get_monitor_addr = mlx5_get_monitor_addr, 2232 }; 2233 2234 /* Available operations from secondary process. */ 2235 const struct eth_dev_ops mlx5_dev_sec_ops = { 2236 .stats_get = mlx5_stats_get, 2237 .stats_reset = mlx5_stats_reset, 2238 .xstats_get = mlx5_xstats_get, 2239 .xstats_reset = mlx5_xstats_reset, 2240 .xstats_get_names = mlx5_xstats_get_names, 2241 .fw_version_get = mlx5_fw_version_get, 2242 .dev_infos_get = mlx5_dev_infos_get, 2243 .representor_info_get = mlx5_representor_info_get, 2244 .read_clock = mlx5_txpp_read_clock, 2245 .rx_queue_start = mlx5_rx_queue_start, 2246 .rx_queue_stop = mlx5_rx_queue_stop, 2247 .tx_queue_start = mlx5_tx_queue_start, 2248 .tx_queue_stop = mlx5_tx_queue_stop, 2249 .rxq_info_get = mlx5_rxq_info_get, 2250 .txq_info_get = mlx5_txq_info_get, 2251 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 2252 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 2253 .get_module_info = mlx5_get_module_info, 2254 .get_module_eeprom = mlx5_get_module_eeprom, 2255 }; 2256 2257 /* Available operations in flow isolated mode. */ 2258 const struct eth_dev_ops mlx5_dev_ops_isolate = { 2259 .dev_configure = mlx5_dev_configure, 2260 .dev_start = mlx5_dev_start, 2261 .dev_stop = mlx5_dev_stop, 2262 .dev_set_link_down = mlx5_set_link_down, 2263 .dev_set_link_up = mlx5_set_link_up, 2264 .dev_close = mlx5_dev_close, 2265 .promiscuous_enable = mlx5_promiscuous_enable, 2266 .promiscuous_disable = mlx5_promiscuous_disable, 2267 .allmulticast_enable = mlx5_allmulticast_enable, 2268 .allmulticast_disable = mlx5_allmulticast_disable, 2269 .link_update = mlx5_link_update, 2270 .stats_get = mlx5_stats_get, 2271 .stats_reset = mlx5_stats_reset, 2272 .xstats_get = mlx5_xstats_get, 2273 .xstats_reset = mlx5_xstats_reset, 2274 .xstats_get_names = mlx5_xstats_get_names, 2275 .fw_version_get = mlx5_fw_version_get, 2276 .dev_infos_get = mlx5_dev_infos_get, 2277 .representor_info_get = mlx5_representor_info_get, 2278 .read_clock = mlx5_txpp_read_clock, 2279 .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, 2280 .vlan_filter_set = mlx5_vlan_filter_set, 2281 .rx_queue_setup = mlx5_rx_queue_setup, 2282 .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, 2283 .tx_queue_setup = mlx5_tx_queue_setup, 2284 .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, 2285 .rx_queue_release = mlx5_rx_queue_release, 2286 .tx_queue_release = mlx5_tx_queue_release, 2287 .rx_queue_start = mlx5_rx_queue_start, 2288 .rx_queue_stop = mlx5_rx_queue_stop, 2289 .tx_queue_start = mlx5_tx_queue_start, 2290 .tx_queue_stop = mlx5_tx_queue_stop, 2291 .flow_ctrl_get = mlx5_dev_get_flow_ctrl, 2292 .flow_ctrl_set = mlx5_dev_set_flow_ctrl, 2293 .mac_addr_remove = mlx5_mac_addr_remove, 2294 .mac_addr_add = mlx5_mac_addr_add, 2295 .mac_addr_set = mlx5_mac_addr_set, 2296 .set_mc_addr_list = mlx5_set_mc_addr_list, 2297 .mtu_set = mlx5_dev_set_mtu, 2298 .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, 2299 .vlan_offload_set = mlx5_vlan_offload_set, 2300 .flow_ops_get = mlx5_flow_ops_get, 2301 .rxq_info_get = mlx5_rxq_info_get, 2302 .txq_info_get = mlx5_txq_info_get, 2303 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 2304 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 2305 .rx_queue_intr_enable = mlx5_rx_intr_enable, 2306 .rx_queue_intr_disable = mlx5_rx_intr_disable, 2307 .is_removed = mlx5_is_removed, 2308 .get_module_info = mlx5_get_module_info, 2309 .get_module_eeprom = mlx5_get_module_eeprom, 2310 .hairpin_cap_get = mlx5_hairpin_cap_get, 2311 .mtr_ops_get = mlx5_flow_meter_ops_get, 2312 .hairpin_bind = mlx5_hairpin_bind, 2313 .hairpin_unbind = mlx5_hairpin_unbind, 2314 .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports, 2315 .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update, 2316 .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, 2317 .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, 2318 .get_monitor_addr = mlx5_get_monitor_addr, 2319 }; 2320 2321 /** 2322 * Verify and store value for device argument. 2323 * 2324 * @param[in] key 2325 * Key argument to verify. 2326 * @param[in] val 2327 * Value associated with key. 2328 * @param opaque 2329 * User data. 2330 * 2331 * @return 2332 * 0 on success, a negative errno value otherwise and rte_errno is set. 2333 */ 2334 static int 2335 mlx5_port_args_check_handler(const char *key, const char *val, void *opaque) 2336 { 2337 struct mlx5_port_config *config = opaque; 2338 signed long tmp; 2339 2340 /* No-op, port representors are processed in mlx5_dev_spawn(). */ 2341 if (!strcmp(MLX5_REPRESENTOR, key)) 2342 return 0; 2343 errno = 0; 2344 tmp = strtol(val, NULL, 0); 2345 if (errno) { 2346 rte_errno = errno; 2347 DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val); 2348 return -rte_errno; 2349 } 2350 if (tmp < 0) { 2351 /* Negative values are acceptable for some keys only. */ 2352 rte_errno = EINVAL; 2353 DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val); 2354 return -rte_errno; 2355 } 2356 if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { 2357 if (tmp > MLX5_CQE_RESP_FORMAT_L34H_STRIDX) { 2358 DRV_LOG(ERR, "invalid CQE compression " 2359 "format parameter"); 2360 rte_errno = EINVAL; 2361 return -rte_errno; 2362 } 2363 config->cqe_comp = !!tmp; 2364 config->cqe_comp_fmt = tmp; 2365 } else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) { 2366 config->hw_padding = !!tmp; 2367 } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) { 2368 config->mprq.enabled = !!tmp; 2369 } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) { 2370 config->mprq.log_stride_num = tmp; 2371 } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_SIZE, key) == 0) { 2372 config->mprq.log_stride_size = tmp; 2373 } else if (strcmp(MLX5_RX_MPRQ_MAX_MEMCPY_LEN, key) == 0) { 2374 config->mprq.max_memcpy_len = tmp; 2375 } else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) { 2376 config->mprq.min_rxqs_num = tmp; 2377 } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) { 2378 DRV_LOG(WARNING, "%s: deprecated parameter," 2379 " converted to txq_inline_max", key); 2380 config->txq_inline_max = tmp; 2381 } else if (strcmp(MLX5_TXQ_INLINE_MAX, key) == 0) { 2382 config->txq_inline_max = tmp; 2383 } else if (strcmp(MLX5_TXQ_INLINE_MIN, key) == 0) { 2384 config->txq_inline_min = tmp; 2385 } else if (strcmp(MLX5_TXQ_INLINE_MPW, key) == 0) { 2386 config->txq_inline_mpw = tmp; 2387 } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { 2388 config->txqs_inline = tmp; 2389 } else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) { 2390 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 2391 } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { 2392 config->mps = !!tmp; 2393 } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) { 2394 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 2395 } else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) { 2396 DRV_LOG(WARNING, "%s: deprecated parameter," 2397 " converted to txq_inline_mpw", key); 2398 config->txq_inline_mpw = tmp; 2399 } else if (strcmp(MLX5_TX_VEC_EN, key) == 0) { 2400 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 2401 } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) { 2402 config->rx_vec_en = !!tmp; 2403 } else if (strcmp(MLX5_MAX_DUMP_FILES_NUM, key) == 0) { 2404 config->max_dump_files_num = tmp; 2405 } else if (strcmp(MLX5_LRO_TIMEOUT_USEC, key) == 0) { 2406 config->lro_timeout = tmp; 2407 } else if (strcmp(MLX5_HP_BUF_SIZE, key) == 0) { 2408 config->log_hp_size = tmp; 2409 } else if (strcmp(MLX5_DELAY_DROP, key) == 0) { 2410 config->std_delay_drop = !!(tmp & MLX5_DELAY_DROP_STANDARD); 2411 config->hp_delay_drop = !!(tmp & MLX5_DELAY_DROP_HAIRPIN); 2412 } 2413 return 0; 2414 } 2415 2416 /** 2417 * Parse user port parameters and adjust them according to device capabilities. 2418 * 2419 * @param priv 2420 * Pointer to shared device context. 2421 * @param mkvlist 2422 * Pointer to mlx5 kvargs control, can be NULL if there is no devargs. 2423 * @param config 2424 * Pointer to port configuration structure. 2425 * 2426 * @return 2427 * 0 on success, a negative errno value otherwise and rte_errno is set. 2428 */ 2429 int 2430 mlx5_port_args_config(struct mlx5_priv *priv, struct mlx5_kvargs_ctrl *mkvlist, 2431 struct mlx5_port_config *config) 2432 { 2433 struct mlx5_hca_attr *hca_attr = &priv->sh->cdev->config.hca_attr; 2434 struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap; 2435 bool devx = priv->sh->cdev->config.devx; 2436 const char **params = (const char *[]){ 2437 MLX5_RXQ_CQE_COMP_EN, 2438 MLX5_RXQ_PKT_PAD_EN, 2439 MLX5_RX_MPRQ_EN, 2440 MLX5_RX_MPRQ_LOG_STRIDE_NUM, 2441 MLX5_RX_MPRQ_LOG_STRIDE_SIZE, 2442 MLX5_RX_MPRQ_MAX_MEMCPY_LEN, 2443 MLX5_RXQS_MIN_MPRQ, 2444 MLX5_TXQ_INLINE, 2445 MLX5_TXQ_INLINE_MIN, 2446 MLX5_TXQ_INLINE_MAX, 2447 MLX5_TXQ_INLINE_MPW, 2448 MLX5_TXQS_MIN_INLINE, 2449 MLX5_TXQS_MAX_VEC, 2450 MLX5_TXQ_MPW_EN, 2451 MLX5_TXQ_MPW_HDR_DSEG_EN, 2452 MLX5_TXQ_MAX_INLINE_LEN, 2453 MLX5_TX_VEC_EN, 2454 MLX5_RX_VEC_EN, 2455 MLX5_REPRESENTOR, 2456 MLX5_MAX_DUMP_FILES_NUM, 2457 MLX5_LRO_TIMEOUT_USEC, 2458 MLX5_HP_BUF_SIZE, 2459 MLX5_DELAY_DROP, 2460 NULL, 2461 }; 2462 int ret = 0; 2463 2464 /* Default configuration. */ 2465 memset(config, 0, sizeof(*config)); 2466 config->mps = MLX5_ARG_UNSET; 2467 config->cqe_comp = 1; 2468 config->rx_vec_en = 1; 2469 config->txq_inline_max = MLX5_ARG_UNSET; 2470 config->txq_inline_min = MLX5_ARG_UNSET; 2471 config->txq_inline_mpw = MLX5_ARG_UNSET; 2472 config->txqs_inline = MLX5_ARG_UNSET; 2473 config->mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN; 2474 config->mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS; 2475 config->mprq.log_stride_num = MLX5_MPRQ_DEFAULT_LOG_STRIDE_NUM; 2476 config->log_hp_size = MLX5_ARG_UNSET; 2477 config->std_delay_drop = 0; 2478 config->hp_delay_drop = 0; 2479 if (mkvlist != NULL) { 2480 /* Process parameters. */ 2481 ret = mlx5_kvargs_process(mkvlist, params, 2482 mlx5_port_args_check_handler, config); 2483 if (ret) { 2484 DRV_LOG(ERR, "Failed to process port arguments: %s", 2485 strerror(rte_errno)); 2486 return -rte_errno; 2487 } 2488 } 2489 /* Adjust parameters according to device capabilities. */ 2490 if (config->hw_padding && !dev_cap->hw_padding) { 2491 DRV_LOG(DEBUG, "Rx end alignment padding isn't supported."); 2492 config->hw_padding = 0; 2493 } else if (config->hw_padding) { 2494 DRV_LOG(DEBUG, "Rx end alignment padding is enabled."); 2495 } 2496 /* 2497 * MPW is disabled by default, while the Enhanced MPW is enabled 2498 * by default. 2499 */ 2500 if (config->mps == MLX5_ARG_UNSET) 2501 config->mps = (dev_cap->mps == MLX5_MPW_ENHANCED) ? 2502 MLX5_MPW_ENHANCED : MLX5_MPW_DISABLED; 2503 else 2504 config->mps = config->mps ? dev_cap->mps : MLX5_MPW_DISABLED; 2505 DRV_LOG(INFO, "%sMPS is %s", 2506 config->mps == MLX5_MPW_ENHANCED ? "enhanced " : 2507 config->mps == MLX5_MPW ? "legacy " : "", 2508 config->mps != MLX5_MPW_DISABLED ? "enabled" : "disabled"); 2509 if (priv->sh->config.lro_allowed) { 2510 /* 2511 * If LRO timeout is not configured by application, 2512 * use the minimal supported value. 2513 */ 2514 if (!config->lro_timeout) 2515 config->lro_timeout = 2516 hca_attr->lro_timer_supported_periods[0]; 2517 DRV_LOG(DEBUG, "LRO session timeout set to %d usec.", 2518 config->lro_timeout); 2519 } 2520 if (config->cqe_comp && !dev_cap->cqe_comp) { 2521 DRV_LOG(WARNING, "Rx CQE 128B compression is not supported."); 2522 config->cqe_comp = 0; 2523 } 2524 if (config->cqe_comp_fmt == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX && 2525 (!devx || !hca_attr->mini_cqe_resp_flow_tag)) { 2526 DRV_LOG(WARNING, 2527 "Flow Tag CQE compression format isn't supported."); 2528 config->cqe_comp = 0; 2529 } 2530 if (config->cqe_comp_fmt == MLX5_CQE_RESP_FORMAT_L34H_STRIDX && 2531 (!devx || !hca_attr->mini_cqe_resp_l3_l4_tag)) { 2532 DRV_LOG(WARNING, 2533 "L3/L4 Header CQE compression format isn't supported."); 2534 config->cqe_comp = 0; 2535 } 2536 DRV_LOG(DEBUG, "Rx CQE compression is %ssupported.", 2537 config->cqe_comp ? "" : "not "); 2538 if ((config->std_delay_drop || config->hp_delay_drop) && 2539 !dev_cap->rq_delay_drop_en) { 2540 config->std_delay_drop = 0; 2541 config->hp_delay_drop = 0; 2542 DRV_LOG(WARNING, "dev_port-%u: Rxq delay drop isn't supported.", 2543 priv->dev_port); 2544 } 2545 if (config->mprq.enabled && !priv->sh->dev_cap.mprq.enabled) { 2546 DRV_LOG(WARNING, "Multi-Packet RQ isn't supported."); 2547 config->mprq.enabled = 0; 2548 } 2549 if (config->max_dump_files_num == 0) 2550 config->max_dump_files_num = 128; 2551 /* Detect minimal data bytes to inline. */ 2552 mlx5_set_min_inline(priv); 2553 DRV_LOG(DEBUG, "VLAN insertion in WQE is %ssupported.", 2554 config->hw_vlan_insert ? "" : "not "); 2555 DRV_LOG(DEBUG, "\"rxq_pkt_pad_en\" is %u.", config->hw_padding); 2556 DRV_LOG(DEBUG, "\"rxq_cqe_comp_en\" is %u.", config->cqe_comp); 2557 DRV_LOG(DEBUG, "\"cqe_comp_fmt\" is %u.", config->cqe_comp_fmt); 2558 DRV_LOG(DEBUG, "\"rx_vec_en\" is %u.", config->rx_vec_en); 2559 DRV_LOG(DEBUG, "Standard \"delay_drop\" is %u.", 2560 config->std_delay_drop); 2561 DRV_LOG(DEBUG, "Hairpin \"delay_drop\" is %u.", config->hp_delay_drop); 2562 DRV_LOG(DEBUG, "\"max_dump_files_num\" is %u.", 2563 config->max_dump_files_num); 2564 DRV_LOG(DEBUG, "\"log_hp_size\" is %u.", config->log_hp_size); 2565 DRV_LOG(DEBUG, "\"mprq_en\" is %u.", config->mprq.enabled); 2566 DRV_LOG(DEBUG, "\"mprq_log_stride_num\" is %u.", 2567 config->mprq.log_stride_num); 2568 DRV_LOG(DEBUG, "\"mprq_log_stride_size\" is %u.", 2569 config->mprq.log_stride_size); 2570 DRV_LOG(DEBUG, "\"mprq_max_memcpy_len\" is %u.", 2571 config->mprq.max_memcpy_len); 2572 DRV_LOG(DEBUG, "\"rxqs_min_mprq\" is %u.", config->mprq.min_rxqs_num); 2573 DRV_LOG(DEBUG, "\"lro_timeout_usec\" is %u.", config->lro_timeout); 2574 DRV_LOG(DEBUG, "\"txq_mpw_en\" is %d.", config->mps); 2575 DRV_LOG(DEBUG, "\"txqs_min_inline\" is %d.", config->txqs_inline); 2576 DRV_LOG(DEBUG, "\"txq_inline_min\" is %d.", config->txq_inline_min); 2577 DRV_LOG(DEBUG, "\"txq_inline_max\" is %d.", config->txq_inline_max); 2578 DRV_LOG(DEBUG, "\"txq_inline_mpw\" is %d.", config->txq_inline_mpw); 2579 return 0; 2580 } 2581 2582 /** 2583 * Print the key for device argument. 2584 * 2585 * It is "dummy" handler whose whole purpose is to enable using 2586 * mlx5_kvargs_process() function which set devargs as used. 2587 * 2588 * @param key 2589 * Key argument. 2590 * @param val 2591 * Value associated with key, unused. 2592 * @param opaque 2593 * Unused, can be NULL. 2594 * 2595 * @return 2596 * 0 on success, function cannot fail. 2597 */ 2598 static int 2599 mlx5_dummy_handler(const char *key, const char *val, void *opaque) 2600 { 2601 DRV_LOG(DEBUG, "\tKey: \"%s\" is set as used.", key); 2602 RTE_SET_USED(opaque); 2603 RTE_SET_USED(val); 2604 return 0; 2605 } 2606 2607 /** 2608 * Set requested devargs as used when device is already spawned. 2609 * 2610 * It is necessary since it is valid to ask probe again for existing device, 2611 * if its devargs don't assign as used, mlx5_kvargs_validate() will fail. 2612 * 2613 * @param name 2614 * Name of the existing device. 2615 * @param port_id 2616 * Port identifier of the device. 2617 * @param mkvlist 2618 * Pointer to mlx5 kvargs control to sign as used. 2619 */ 2620 void 2621 mlx5_port_args_set_used(const char *name, uint16_t port_id, 2622 struct mlx5_kvargs_ctrl *mkvlist) 2623 { 2624 const char **params = (const char *[]){ 2625 MLX5_RXQ_CQE_COMP_EN, 2626 MLX5_RXQ_PKT_PAD_EN, 2627 MLX5_RX_MPRQ_EN, 2628 MLX5_RX_MPRQ_LOG_STRIDE_NUM, 2629 MLX5_RX_MPRQ_LOG_STRIDE_SIZE, 2630 MLX5_RX_MPRQ_MAX_MEMCPY_LEN, 2631 MLX5_RXQS_MIN_MPRQ, 2632 MLX5_TXQ_INLINE, 2633 MLX5_TXQ_INLINE_MIN, 2634 MLX5_TXQ_INLINE_MAX, 2635 MLX5_TXQ_INLINE_MPW, 2636 MLX5_TXQS_MIN_INLINE, 2637 MLX5_TXQS_MAX_VEC, 2638 MLX5_TXQ_MPW_EN, 2639 MLX5_TXQ_MPW_HDR_DSEG_EN, 2640 MLX5_TXQ_MAX_INLINE_LEN, 2641 MLX5_TX_VEC_EN, 2642 MLX5_RX_VEC_EN, 2643 MLX5_REPRESENTOR, 2644 MLX5_MAX_DUMP_FILES_NUM, 2645 MLX5_LRO_TIMEOUT_USEC, 2646 MLX5_HP_BUF_SIZE, 2647 MLX5_DELAY_DROP, 2648 NULL, 2649 }; 2650 2651 /* Secondary process should not handle devargs. */ 2652 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2653 return; 2654 MLX5_ASSERT(mkvlist != NULL); 2655 DRV_LOG(DEBUG, "Ethernet device \"%s\" for port %u " 2656 "already exists, set devargs as used:", name, port_id); 2657 /* This function cannot fail with this handler. */ 2658 mlx5_kvargs_process(mkvlist, params, mlx5_dummy_handler, NULL); 2659 } 2660 2661 /** 2662 * Check sibling device configurations when probing again. 2663 * 2664 * Sibling devices sharing infiniband device context should have compatible 2665 * configurations. This regards representors and bonding device. 2666 * 2667 * @param cdev 2668 * Pointer to mlx5 device structure. 2669 * @param mkvlist 2670 * Pointer to mlx5 kvargs control, can be NULL if there is no devargs. 2671 * 2672 * @return 2673 * 0 on success, a negative errno value otherwise and rte_errno is set. 2674 */ 2675 int 2676 mlx5_probe_again_args_validate(struct mlx5_common_device *cdev, 2677 struct mlx5_kvargs_ctrl *mkvlist) 2678 { 2679 struct mlx5_dev_ctx_shared *sh = NULL; 2680 struct mlx5_sh_config *config; 2681 int ret; 2682 2683 /* Secondary process should not handle devargs. */ 2684 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2685 return 0; 2686 pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); 2687 /* Search for IB context by common device pointer. */ 2688 LIST_FOREACH(sh, &mlx5_dev_ctx_list, next) 2689 if (sh->cdev == cdev) 2690 break; 2691 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 2692 /* There is sh for this device -> it isn't probe again. */ 2693 if (sh == NULL) 2694 return 0; 2695 config = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, 2696 sizeof(struct mlx5_sh_config), 2697 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 2698 if (config == NULL) { 2699 rte_errno = -ENOMEM; 2700 return -rte_errno; 2701 } 2702 /* 2703 * Creates a temporary IB context configure structure according to new 2704 * devargs attached in probing again. 2705 */ 2706 ret = mlx5_shared_dev_ctx_args_config(sh, mkvlist, config); 2707 if (ret) { 2708 DRV_LOG(ERR, "Failed to process device configure: %s", 2709 strerror(rte_errno)); 2710 mlx5_free(config); 2711 return ret; 2712 } 2713 /* 2714 * Checks the match between the temporary structure and the existing 2715 * IB context structure. 2716 */ 2717 if (sh->config.dv_flow_en ^ config->dv_flow_en) { 2718 DRV_LOG(ERR, "\"dv_flow_en\" " 2719 "configuration mismatch for shared %s context.", 2720 sh->ibdev_name); 2721 goto error; 2722 } 2723 if ((sh->config.dv_xmeta_en ^ config->dv_xmeta_en) || 2724 (sh->config.dv_miss_info ^ config->dv_miss_info)) { 2725 DRV_LOG(ERR, "\"dv_xmeta_en\" " 2726 "configuration mismatch for shared %s context.", 2727 sh->ibdev_name); 2728 goto error; 2729 } 2730 if (sh->config.dv_esw_en ^ config->dv_esw_en) { 2731 DRV_LOG(ERR, "\"dv_esw_en\" " 2732 "configuration mismatch for shared %s context.", 2733 sh->ibdev_name); 2734 goto error; 2735 } 2736 if (sh->config.reclaim_mode ^ config->reclaim_mode) { 2737 DRV_LOG(ERR, "\"reclaim_mode\" " 2738 "configuration mismatch for shared %s context.", 2739 sh->ibdev_name); 2740 goto error; 2741 } 2742 if (sh->config.allow_duplicate_pattern ^ 2743 config->allow_duplicate_pattern) { 2744 DRV_LOG(ERR, "\"allow_duplicate_pattern\" " 2745 "configuration mismatch for shared %s context.", 2746 sh->ibdev_name); 2747 goto error; 2748 } 2749 if (sh->config.fdb_def_rule ^ config->fdb_def_rule) { 2750 DRV_LOG(ERR, "\"fdb_def_rule_en\" configuration mismatch for shared %s context.", 2751 sh->ibdev_name); 2752 goto error; 2753 } 2754 if (sh->config.l3_vxlan_en ^ config->l3_vxlan_en) { 2755 DRV_LOG(ERR, "\"l3_vxlan_en\" " 2756 "configuration mismatch for shared %s context.", 2757 sh->ibdev_name); 2758 goto error; 2759 } 2760 if (sh->config.decap_en ^ config->decap_en) { 2761 DRV_LOG(ERR, "\"decap_en\" " 2762 "configuration mismatch for shared %s context.", 2763 sh->ibdev_name); 2764 goto error; 2765 } 2766 if (sh->config.lacp_by_user ^ config->lacp_by_user) { 2767 DRV_LOG(ERR, "\"lacp_by_user\" " 2768 "configuration mismatch for shared %s context.", 2769 sh->ibdev_name); 2770 goto error; 2771 } 2772 if (sh->config.tx_pp ^ config->tx_pp) { 2773 DRV_LOG(ERR, "\"tx_pp\" " 2774 "configuration mismatch for shared %s context.", 2775 sh->ibdev_name); 2776 goto error; 2777 } 2778 if (sh->config.tx_skew ^ config->tx_skew) { 2779 DRV_LOG(ERR, "\"tx_skew\" " 2780 "configuration mismatch for shared %s context.", 2781 sh->ibdev_name); 2782 goto error; 2783 } 2784 mlx5_free(config); 2785 return 0; 2786 error: 2787 mlx5_free(config); 2788 rte_errno = EINVAL; 2789 return -rte_errno; 2790 } 2791 2792 /** 2793 * Configures the minimal amount of data to inline into WQE 2794 * while sending packets. 2795 * 2796 * - the txq_inline_min has the maximal priority, if this 2797 * key is specified in devargs 2798 * - if DevX is enabled the inline mode is queried from the 2799 * device (HCA attributes and NIC vport context if needed). 2800 * - otherwise L2 mode (18 bytes) is assumed for ConnectX-4/4 Lx 2801 * and none (0 bytes) for other NICs 2802 * 2803 * @param priv 2804 * Pointer to the private device data structure. 2805 */ 2806 void 2807 mlx5_set_min_inline(struct mlx5_priv *priv) 2808 { 2809 struct mlx5_hca_attr *hca_attr = &priv->sh->cdev->config.hca_attr; 2810 struct mlx5_port_config *config = &priv->config; 2811 2812 if (config->txq_inline_min != MLX5_ARG_UNSET) { 2813 /* Application defines size of inlined data explicitly. */ 2814 if (priv->pci_dev != NULL) { 2815 switch (priv->pci_dev->id.device_id) { 2816 case PCI_DEVICE_ID_MELLANOX_CONNECTX4: 2817 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 2818 if (config->txq_inline_min < 2819 (int)MLX5_INLINE_HSIZE_L2) { 2820 DRV_LOG(DEBUG, 2821 "txq_inline_mix aligned to minimal ConnectX-4 required value %d", 2822 (int)MLX5_INLINE_HSIZE_L2); 2823 config->txq_inline_min = 2824 MLX5_INLINE_HSIZE_L2; 2825 } 2826 break; 2827 } 2828 } 2829 goto exit; 2830 } 2831 if (hca_attr->eth_net_offloads) { 2832 /* We have DevX enabled, inline mode queried successfully. */ 2833 switch (hca_attr->wqe_inline_mode) { 2834 case MLX5_CAP_INLINE_MODE_L2: 2835 /* outer L2 header must be inlined. */ 2836 config->txq_inline_min = MLX5_INLINE_HSIZE_L2; 2837 goto exit; 2838 case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: 2839 /* No inline data are required by NIC. */ 2840 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2841 config->hw_vlan_insert = 2842 hca_attr->wqe_vlan_insert; 2843 DRV_LOG(DEBUG, "Tx VLAN insertion is supported"); 2844 goto exit; 2845 case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: 2846 /* inline mode is defined by NIC vport context. */ 2847 if (!hca_attr->eth_virt) 2848 break; 2849 switch (hca_attr->vport_inline_mode) { 2850 case MLX5_INLINE_MODE_NONE: 2851 config->txq_inline_min = 2852 MLX5_INLINE_HSIZE_NONE; 2853 goto exit; 2854 case MLX5_INLINE_MODE_L2: 2855 config->txq_inline_min = 2856 MLX5_INLINE_HSIZE_L2; 2857 goto exit; 2858 case MLX5_INLINE_MODE_IP: 2859 config->txq_inline_min = 2860 MLX5_INLINE_HSIZE_L3; 2861 goto exit; 2862 case MLX5_INLINE_MODE_TCP_UDP: 2863 config->txq_inline_min = 2864 MLX5_INLINE_HSIZE_L4; 2865 goto exit; 2866 case MLX5_INLINE_MODE_INNER_L2: 2867 config->txq_inline_min = 2868 MLX5_INLINE_HSIZE_INNER_L2; 2869 goto exit; 2870 case MLX5_INLINE_MODE_INNER_IP: 2871 config->txq_inline_min = 2872 MLX5_INLINE_HSIZE_INNER_L3; 2873 goto exit; 2874 case MLX5_INLINE_MODE_INNER_TCP_UDP: 2875 config->txq_inline_min = 2876 MLX5_INLINE_HSIZE_INNER_L4; 2877 goto exit; 2878 } 2879 } 2880 } 2881 if (priv->pci_dev == NULL) { 2882 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2883 goto exit; 2884 } 2885 /* 2886 * We get here if we are unable to deduce 2887 * inline data size with DevX. Try PCI ID 2888 * to determine old NICs. 2889 */ 2890 switch (priv->pci_dev->id.device_id) { 2891 case PCI_DEVICE_ID_MELLANOX_CONNECTX4: 2892 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 2893 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX: 2894 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF: 2895 config->txq_inline_min = MLX5_INLINE_HSIZE_L2; 2896 config->hw_vlan_insert = 0; 2897 break; 2898 case PCI_DEVICE_ID_MELLANOX_CONNECTX5: 2899 case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: 2900 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX: 2901 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: 2902 /* 2903 * These NICs support VLAN insertion from WQE and 2904 * report the wqe_vlan_insert flag. But there is the bug 2905 * and PFC control may be broken, so disable feature. 2906 */ 2907 config->hw_vlan_insert = 0; 2908 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2909 break; 2910 default: 2911 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2912 break; 2913 } 2914 exit: 2915 DRV_LOG(DEBUG, "min tx inline configured: %d", config->txq_inline_min); 2916 } 2917 2918 /** 2919 * Configures the metadata mask fields in the shared context. 2920 * 2921 * @param [in] dev 2922 * Pointer to Ethernet device. 2923 */ 2924 void 2925 mlx5_set_metadata_mask(struct rte_eth_dev *dev) 2926 { 2927 struct mlx5_priv *priv = dev->data->dev_private; 2928 struct mlx5_dev_ctx_shared *sh = priv->sh; 2929 uint32_t meta, mark, reg_c0; 2930 2931 reg_c0 = ~priv->vport_meta_mask; 2932 switch (sh->config.dv_xmeta_en) { 2933 case MLX5_XMETA_MODE_LEGACY: 2934 meta = UINT32_MAX; 2935 mark = MLX5_FLOW_MARK_MASK; 2936 break; 2937 case MLX5_XMETA_MODE_META16: 2938 meta = reg_c0 >> rte_bsf32(reg_c0); 2939 mark = MLX5_FLOW_MARK_MASK; 2940 break; 2941 case MLX5_XMETA_MODE_META32: 2942 meta = UINT32_MAX; 2943 mark = (reg_c0 >> rte_bsf32(reg_c0)) & MLX5_FLOW_MARK_MASK; 2944 break; 2945 case MLX5_XMETA_MODE_META32_HWS: 2946 meta = UINT32_MAX; 2947 mark = MLX5_FLOW_MARK_MASK; 2948 break; 2949 default: 2950 meta = 0; 2951 mark = 0; 2952 MLX5_ASSERT(false); 2953 break; 2954 } 2955 if (sh->dv_mark_mask && sh->dv_mark_mask != mark) 2956 DRV_LOG(WARNING, "metadata MARK mask mismatch %08X:%08X", 2957 sh->dv_mark_mask, mark); 2958 else 2959 sh->dv_mark_mask = mark; 2960 if (sh->dv_meta_mask && sh->dv_meta_mask != meta) 2961 DRV_LOG(WARNING, "metadata META mask mismatch %08X:%08X", 2962 sh->dv_meta_mask, meta); 2963 else 2964 sh->dv_meta_mask = meta; 2965 if (sh->dv_regc0_mask && sh->dv_regc0_mask != reg_c0) 2966 DRV_LOG(WARNING, "metadata reg_c0 mask mismatch %08X:%08X", 2967 sh->dv_meta_mask, reg_c0); 2968 else 2969 sh->dv_regc0_mask = reg_c0; 2970 DRV_LOG(DEBUG, "metadata mode %u", sh->config.dv_xmeta_en); 2971 DRV_LOG(DEBUG, "metadata MARK mask %08X", sh->dv_mark_mask); 2972 DRV_LOG(DEBUG, "metadata META mask %08X", sh->dv_meta_mask); 2973 DRV_LOG(DEBUG, "metadata reg_c0 mask %08X", sh->dv_regc0_mask); 2974 } 2975 2976 int 2977 rte_pmd_mlx5_get_dyn_flag_names(char *names[], unsigned int n) 2978 { 2979 static const char *const dynf_names[] = { 2980 RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, 2981 RTE_MBUF_DYNFLAG_METADATA_NAME, 2982 RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME 2983 }; 2984 unsigned int i; 2985 2986 if (n < RTE_DIM(dynf_names)) 2987 return -ENOMEM; 2988 for (i = 0; i < RTE_DIM(dynf_names); i++) { 2989 if (names[i] == NULL) 2990 return -EINVAL; 2991 strcpy(names[i], dynf_names[i]); 2992 } 2993 return RTE_DIM(dynf_names); 2994 } 2995 2996 /** 2997 * Look for the ethernet device belonging to mlx5 driver. 2998 * 2999 * @param[in] port_id 3000 * port_id to start looking for device. 3001 * @param[in] odev 3002 * Pointer to the hint device. When device is being probed 3003 * the its siblings (master and preceding representors might 3004 * not have assigned driver yet (because the mlx5_os_pci_probe() 3005 * is not completed yet, for this case match on hint 3006 * device may be used to detect sibling device. 3007 * 3008 * @return 3009 * port_id of found device, RTE_MAX_ETHPORT if not found. 3010 */ 3011 uint16_t 3012 mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev) 3013 { 3014 while (port_id < RTE_MAX_ETHPORTS) { 3015 struct rte_eth_dev *dev = &rte_eth_devices[port_id]; 3016 3017 if (dev->state != RTE_ETH_DEV_UNUSED && 3018 dev->device && 3019 (dev->device == odev || 3020 (dev->device->driver && 3021 dev->device->driver->name && 3022 ((strcmp(dev->device->driver->name, 3023 MLX5_PCI_DRIVER_NAME) == 0) || 3024 (strcmp(dev->device->driver->name, 3025 MLX5_AUXILIARY_DRIVER_NAME) == 0))))) 3026 break; 3027 port_id++; 3028 } 3029 if (port_id >= RTE_MAX_ETHPORTS) 3030 return RTE_MAX_ETHPORTS; 3031 return port_id; 3032 } 3033 3034 /** 3035 * Callback to remove a device. 3036 * 3037 * This function removes all Ethernet devices belong to a given device. 3038 * 3039 * @param[in] cdev 3040 * Pointer to the generic device. 3041 * 3042 * @return 3043 * 0 on success, the function cannot fail. 3044 */ 3045 int 3046 mlx5_net_remove(struct mlx5_common_device *cdev) 3047 { 3048 uint16_t port_id; 3049 int ret = 0; 3050 3051 RTE_ETH_FOREACH_DEV_OF(port_id, cdev->dev) { 3052 /* 3053 * mlx5_dev_close() is not registered to secondary process, 3054 * call the close function explicitly for secondary process. 3055 */ 3056 if (rte_eal_process_type() == RTE_PROC_SECONDARY) 3057 ret |= mlx5_dev_close(&rte_eth_devices[port_id]); 3058 else 3059 ret |= rte_eth_dev_close(port_id); 3060 } 3061 return ret == 0 ? 0 : -EIO; 3062 } 3063 3064 static const struct rte_pci_id mlx5_pci_id_map[] = { 3065 { 3066 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3067 PCI_DEVICE_ID_MELLANOX_CONNECTX4) 3068 }, 3069 { 3070 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3071 PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) 3072 }, 3073 { 3074 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3075 PCI_DEVICE_ID_MELLANOX_CONNECTX4LX) 3076 }, 3077 { 3078 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3079 PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) 3080 }, 3081 { 3082 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3083 PCI_DEVICE_ID_MELLANOX_CONNECTX5) 3084 }, 3085 { 3086 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3087 PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) 3088 }, 3089 { 3090 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3091 PCI_DEVICE_ID_MELLANOX_CONNECTX5EX) 3092 }, 3093 { 3094 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3095 PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF) 3096 }, 3097 { 3098 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3099 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) 3100 }, 3101 { 3102 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3103 PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF) 3104 }, 3105 { 3106 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3107 PCI_DEVICE_ID_MELLANOX_CONNECTX6) 3108 }, 3109 { 3110 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3111 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF) 3112 }, 3113 { 3114 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3115 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX) 3116 }, 3117 { 3118 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3119 PCI_DEVICE_ID_MELLANOX_CONNECTXVF) 3120 }, 3121 { 3122 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3123 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF) 3124 }, 3125 { 3126 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3127 PCI_DEVICE_ID_MELLANOX_CONNECTX6LX) 3128 }, 3129 { 3130 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3131 PCI_DEVICE_ID_MELLANOX_CONNECTX7) 3132 }, 3133 { 3134 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3135 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF) 3136 }, 3137 { 3138 .vendor_id = 0 3139 } 3140 }; 3141 3142 static struct mlx5_class_driver mlx5_net_driver = { 3143 .drv_class = MLX5_CLASS_ETH, 3144 .name = RTE_STR(MLX5_ETH_DRIVER_NAME), 3145 .id_table = mlx5_pci_id_map, 3146 .probe = mlx5_os_net_probe, 3147 .remove = mlx5_net_remove, 3148 .probe_again = 1, 3149 .intr_lsc = 1, 3150 .intr_rmv = 1, 3151 }; 3152 3153 /* Initialize driver log type. */ 3154 RTE_LOG_REGISTER_DEFAULT(mlx5_logtype, NOTICE) 3155 3156 /** 3157 * Driver initialization routine. 3158 */ 3159 RTE_INIT(rte_mlx5_pmd_init) 3160 { 3161 pthread_mutex_init(&mlx5_dev_ctx_list_mutex, NULL); 3162 mlx5_common_init(); 3163 /* Build the static tables for Verbs conversion. */ 3164 mlx5_set_ptype_table(); 3165 mlx5_set_cksum_table(); 3166 mlx5_set_swp_types_table(); 3167 if (mlx5_glue) 3168 mlx5_class_driver_register(&mlx5_net_driver); 3169 } 3170 3171 RTE_PMD_EXPORT_NAME(MLX5_ETH_DRIVER_NAME, __COUNTER__); 3172 RTE_PMD_REGISTER_PCI_TABLE(MLX5_ETH_DRIVER_NAME, mlx5_pci_id_map); 3173 RTE_PMD_REGISTER_KMOD_DEP(MLX5_ETH_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib"); 3174