1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <unistd.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <errno.h> 12 13 #include <rte_malloc.h> 14 #include <ethdev_driver.h> 15 #include <rte_pci.h> 16 #include <rte_bus_pci.h> 17 #include <rte_common.h> 18 #include <rte_kvargs.h> 19 #include <rte_rwlock.h> 20 #include <rte_spinlock.h> 21 #include <rte_string_fns.h> 22 #include <rte_alarm.h> 23 #include <rte_cycles.h> 24 25 #include <mlx5_glue.h> 26 #include <mlx5_devx_cmds.h> 27 #include <mlx5_common.h> 28 #include <mlx5_common_os.h> 29 #include <mlx5_common_mp.h> 30 #include <mlx5_malloc.h> 31 32 #include "mlx5_defs.h" 33 #include "mlx5.h" 34 #include "mlx5_utils.h" 35 #include "mlx5_rxtx.h" 36 #include "mlx5_rx.h" 37 #include "mlx5_tx.h" 38 #include "mlx5_autoconf.h" 39 #include "mlx5_mr.h" 40 #include "mlx5_flow.h" 41 #include "mlx5_flow_os.h" 42 #include "rte_pmd_mlx5.h" 43 44 #define MLX5_ETH_DRIVER_NAME mlx5_eth 45 46 /* Driver type key for new device global syntax. */ 47 #define MLX5_DRIVER_KEY "driver" 48 49 /* Device parameter to enable RX completion queue compression. */ 50 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en" 51 52 /* Device parameter to enable padding Rx packet to cacheline size. */ 53 #define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en" 54 55 /* Device parameter to enable Multi-Packet Rx queue. */ 56 #define MLX5_RX_MPRQ_EN "mprq_en" 57 58 /* Device parameter to configure log 2 of the number of strides for MPRQ. */ 59 #define MLX5_RX_MPRQ_LOG_STRIDE_NUM "mprq_log_stride_num" 60 61 /* Device parameter to configure log 2 of the stride size for MPRQ. */ 62 #define MLX5_RX_MPRQ_LOG_STRIDE_SIZE "mprq_log_stride_size" 63 64 /* Device parameter to limit the size of memcpy'd packet for MPRQ. */ 65 #define MLX5_RX_MPRQ_MAX_MEMCPY_LEN "mprq_max_memcpy_len" 66 67 /* Device parameter to set the minimum number of Rx queues to enable MPRQ. */ 68 #define MLX5_RXQS_MIN_MPRQ "rxqs_min_mprq" 69 70 /* Device parameter to configure inline send. Deprecated, ignored.*/ 71 #define MLX5_TXQ_INLINE "txq_inline" 72 73 /* Device parameter to limit packet size to inline with ordinary SEND. */ 74 #define MLX5_TXQ_INLINE_MAX "txq_inline_max" 75 76 /* Device parameter to configure minimal data size to inline. */ 77 #define MLX5_TXQ_INLINE_MIN "txq_inline_min" 78 79 /* Device parameter to limit packet size to inline with Enhanced MPW. */ 80 #define MLX5_TXQ_INLINE_MPW "txq_inline_mpw" 81 82 /* 83 * Device parameter to configure the number of TX queues threshold for 84 * enabling inline send. 85 */ 86 #define MLX5_TXQS_MIN_INLINE "txqs_min_inline" 87 88 /* 89 * Device parameter to configure the number of TX queues threshold for 90 * enabling vectorized Tx, deprecated, ignored (no vectorized Tx routines). 91 */ 92 #define MLX5_TXQS_MAX_VEC "txqs_max_vec" 93 94 /* Device parameter to enable multi-packet send WQEs. */ 95 #define MLX5_TXQ_MPW_EN "txq_mpw_en" 96 97 /* 98 * Device parameter to force doorbell register mapping 99 * to non-cahed region eliminating the extra write memory barrier. 100 */ 101 #define MLX5_TX_DB_NC "tx_db_nc" 102 103 /* 104 * Device parameter to include 2 dsegs in the title WQEBB. 105 * Deprecated, ignored. 106 */ 107 #define MLX5_TXQ_MPW_HDR_DSEG_EN "txq_mpw_hdr_dseg_en" 108 109 /* 110 * Device parameter to limit the size of inlining packet. 111 * Deprecated, ignored. 112 */ 113 #define MLX5_TXQ_MAX_INLINE_LEN "txq_max_inline_len" 114 115 /* 116 * Device parameter to enable Tx scheduling on timestamps 117 * and specify the packet pacing granularity in nanoseconds. 118 */ 119 #define MLX5_TX_PP "tx_pp" 120 121 /* 122 * Device parameter to specify skew in nanoseconds on Tx datapath, 123 * it represents the time between SQ start WQE processing and 124 * appearing actual packet data on the wire. 125 */ 126 #define MLX5_TX_SKEW "tx_skew" 127 128 /* 129 * Device parameter to enable hardware Tx vector. 130 * Deprecated, ignored (no vectorized Tx routines anymore). 131 */ 132 #define MLX5_TX_VEC_EN "tx_vec_en" 133 134 /* Device parameter to enable hardware Rx vector. */ 135 #define MLX5_RX_VEC_EN "rx_vec_en" 136 137 /* Allow L3 VXLAN flow creation. */ 138 #define MLX5_L3_VXLAN_EN "l3_vxlan_en" 139 140 /* Activate DV E-Switch flow steering. */ 141 #define MLX5_DV_ESW_EN "dv_esw_en" 142 143 /* Activate DV flow steering. */ 144 #define MLX5_DV_FLOW_EN "dv_flow_en" 145 146 /* Enable extensive flow metadata support. */ 147 #define MLX5_DV_XMETA_EN "dv_xmeta_en" 148 149 /* Device parameter to let the user manage the lacp traffic of bonded device */ 150 #define MLX5_LACP_BY_USER "lacp_by_user" 151 152 /* Activate Netlink support in VF mode. */ 153 #define MLX5_VF_NL_EN "vf_nl_en" 154 155 /* Enable extending memsegs when creating a MR. */ 156 #define MLX5_MR_EXT_MEMSEG_EN "mr_ext_memseg_en" 157 158 /* Select port representors to instantiate. */ 159 #define MLX5_REPRESENTOR "representor" 160 161 /* Device parameter to configure the maximum number of dump files per queue. */ 162 #define MLX5_MAX_DUMP_FILES_NUM "max_dump_files_num" 163 164 /* Configure timeout of LRO session (in microseconds). */ 165 #define MLX5_LRO_TIMEOUT_USEC "lro_timeout_usec" 166 167 /* 168 * Device parameter to configure the total data buffer size for a single 169 * hairpin queue (logarithm value). 170 */ 171 #define MLX5_HP_BUF_SIZE "hp_buf_log_sz" 172 173 /* Flow memory reclaim mode. */ 174 #define MLX5_RECLAIM_MEM "reclaim_mem_mode" 175 176 /* The default memory allocator used in PMD. */ 177 #define MLX5_SYS_MEM_EN "sys_mem_en" 178 /* Decap will be used or not. */ 179 #define MLX5_DECAP_EN "decap_en" 180 181 /* Device parameter to configure allow or prevent duplicate rules pattern. */ 182 #define MLX5_ALLOW_DUPLICATE_PATTERN "allow_duplicate_pattern" 183 184 /* Device parameter to configure implicit registration of mempool memory. */ 185 #define MLX5_MR_MEMPOOL_REG_EN "mr_mempool_reg_en" 186 187 /* Shared memory between primary and secondary processes. */ 188 struct mlx5_shared_data *mlx5_shared_data; 189 190 /** Driver-specific log messages type. */ 191 int mlx5_logtype; 192 193 static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = 194 LIST_HEAD_INITIALIZER(); 195 static pthread_mutex_t mlx5_dev_ctx_list_mutex; 196 static const struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = { 197 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H) 198 [MLX5_IPOOL_DECAP_ENCAP] = { 199 .size = sizeof(struct mlx5_flow_dv_encap_decap_resource), 200 .trunk_size = 64, 201 .grow_trunk = 3, 202 .grow_shift = 2, 203 .need_lock = 1, 204 .release_mem_en = 1, 205 .malloc = mlx5_malloc, 206 .free = mlx5_free, 207 .type = "mlx5_encap_decap_ipool", 208 }, 209 [MLX5_IPOOL_PUSH_VLAN] = { 210 .size = sizeof(struct mlx5_flow_dv_push_vlan_action_resource), 211 .trunk_size = 64, 212 .grow_trunk = 3, 213 .grow_shift = 2, 214 .need_lock = 1, 215 .release_mem_en = 1, 216 .malloc = mlx5_malloc, 217 .free = mlx5_free, 218 .type = "mlx5_push_vlan_ipool", 219 }, 220 [MLX5_IPOOL_TAG] = { 221 .size = sizeof(struct mlx5_flow_dv_tag_resource), 222 .trunk_size = 64, 223 .grow_trunk = 3, 224 .grow_shift = 2, 225 .need_lock = 1, 226 .release_mem_en = 0, 227 .per_core_cache = (1 << 16), 228 .malloc = mlx5_malloc, 229 .free = mlx5_free, 230 .type = "mlx5_tag_ipool", 231 }, 232 [MLX5_IPOOL_PORT_ID] = { 233 .size = sizeof(struct mlx5_flow_dv_port_id_action_resource), 234 .trunk_size = 64, 235 .grow_trunk = 3, 236 .grow_shift = 2, 237 .need_lock = 1, 238 .release_mem_en = 1, 239 .malloc = mlx5_malloc, 240 .free = mlx5_free, 241 .type = "mlx5_port_id_ipool", 242 }, 243 [MLX5_IPOOL_JUMP] = { 244 .size = sizeof(struct mlx5_flow_tbl_data_entry), 245 .trunk_size = 64, 246 .grow_trunk = 3, 247 .grow_shift = 2, 248 .need_lock = 1, 249 .release_mem_en = 1, 250 .malloc = mlx5_malloc, 251 .free = mlx5_free, 252 .type = "mlx5_jump_ipool", 253 }, 254 [MLX5_IPOOL_SAMPLE] = { 255 .size = sizeof(struct mlx5_flow_dv_sample_resource), 256 .trunk_size = 64, 257 .grow_trunk = 3, 258 .grow_shift = 2, 259 .need_lock = 1, 260 .release_mem_en = 1, 261 .malloc = mlx5_malloc, 262 .free = mlx5_free, 263 .type = "mlx5_sample_ipool", 264 }, 265 [MLX5_IPOOL_DEST_ARRAY] = { 266 .size = sizeof(struct mlx5_flow_dv_dest_array_resource), 267 .trunk_size = 64, 268 .grow_trunk = 3, 269 .grow_shift = 2, 270 .need_lock = 1, 271 .release_mem_en = 1, 272 .malloc = mlx5_malloc, 273 .free = mlx5_free, 274 .type = "mlx5_dest_array_ipool", 275 }, 276 [MLX5_IPOOL_TUNNEL_ID] = { 277 .size = sizeof(struct mlx5_flow_tunnel), 278 .trunk_size = MLX5_MAX_TUNNELS, 279 .need_lock = 1, 280 .release_mem_en = 1, 281 .type = "mlx5_tunnel_offload", 282 }, 283 [MLX5_IPOOL_TNL_TBL_ID] = { 284 .size = 0, 285 .need_lock = 1, 286 .type = "mlx5_flow_tnl_tbl_ipool", 287 }, 288 #endif 289 [MLX5_IPOOL_MTR] = { 290 /** 291 * The ipool index should grow continually from small to big, 292 * for meter idx, so not set grow_trunk to avoid meter index 293 * not jump continually. 294 */ 295 .size = sizeof(struct mlx5_legacy_flow_meter), 296 .trunk_size = 64, 297 .need_lock = 1, 298 .release_mem_en = 1, 299 .malloc = mlx5_malloc, 300 .free = mlx5_free, 301 .type = "mlx5_meter_ipool", 302 }, 303 [MLX5_IPOOL_MCP] = { 304 .size = sizeof(struct mlx5_flow_mreg_copy_resource), 305 .trunk_size = 64, 306 .grow_trunk = 3, 307 .grow_shift = 2, 308 .need_lock = 1, 309 .release_mem_en = 1, 310 .malloc = mlx5_malloc, 311 .free = mlx5_free, 312 .type = "mlx5_mcp_ipool", 313 }, 314 [MLX5_IPOOL_HRXQ] = { 315 .size = (sizeof(struct mlx5_hrxq) + MLX5_RSS_HASH_KEY_LEN), 316 .trunk_size = 64, 317 .grow_trunk = 3, 318 .grow_shift = 2, 319 .need_lock = 1, 320 .release_mem_en = 1, 321 .malloc = mlx5_malloc, 322 .free = mlx5_free, 323 .type = "mlx5_hrxq_ipool", 324 }, 325 [MLX5_IPOOL_MLX5_FLOW] = { 326 /* 327 * MLX5_IPOOL_MLX5_FLOW size varies for DV and VERBS flows. 328 * It set in run time according to PCI function configuration. 329 */ 330 .size = 0, 331 .trunk_size = 64, 332 .grow_trunk = 3, 333 .grow_shift = 2, 334 .need_lock = 1, 335 .release_mem_en = 0, 336 .per_core_cache = 1 << 19, 337 .malloc = mlx5_malloc, 338 .free = mlx5_free, 339 .type = "mlx5_flow_handle_ipool", 340 }, 341 [MLX5_IPOOL_RTE_FLOW] = { 342 .size = sizeof(struct rte_flow), 343 .trunk_size = 4096, 344 .need_lock = 1, 345 .release_mem_en = 1, 346 .malloc = mlx5_malloc, 347 .free = mlx5_free, 348 .type = "rte_flow_ipool", 349 }, 350 [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID] = { 351 .size = 0, 352 .need_lock = 1, 353 .type = "mlx5_flow_rss_id_ipool", 354 }, 355 [MLX5_IPOOL_RSS_SHARED_ACTIONS] = { 356 .size = sizeof(struct mlx5_shared_action_rss), 357 .trunk_size = 64, 358 .grow_trunk = 3, 359 .grow_shift = 2, 360 .need_lock = 1, 361 .release_mem_en = 1, 362 .malloc = mlx5_malloc, 363 .free = mlx5_free, 364 .type = "mlx5_shared_action_rss", 365 }, 366 [MLX5_IPOOL_MTR_POLICY] = { 367 /** 368 * The ipool index should grow continually from small to big, 369 * for policy idx, so not set grow_trunk to avoid policy index 370 * not jump continually. 371 */ 372 .size = sizeof(struct mlx5_flow_meter_sub_policy), 373 .trunk_size = 64, 374 .need_lock = 1, 375 .release_mem_en = 1, 376 .malloc = mlx5_malloc, 377 .free = mlx5_free, 378 .type = "mlx5_meter_policy_ipool", 379 }, 380 }; 381 382 383 #define MLX5_FLOW_MIN_ID_POOL_SIZE 512 384 #define MLX5_ID_GENERATION_ARRAY_FACTOR 16 385 386 #define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 1024 387 388 /** 389 * Decide whether representor ID is a HPF(host PF) port on BF2. 390 * 391 * @param dev 392 * Pointer to Ethernet device structure. 393 * 394 * @return 395 * Non-zero if HPF, otherwise 0. 396 */ 397 bool 398 mlx5_is_hpf(struct rte_eth_dev *dev) 399 { 400 struct mlx5_priv *priv = dev->data->dev_private; 401 uint16_t repr = MLX5_REPRESENTOR_REPR(priv->representor_id); 402 int type = MLX5_REPRESENTOR_TYPE(priv->representor_id); 403 404 return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_VF && 405 MLX5_REPRESENTOR_REPR(-1) == repr; 406 } 407 408 /** 409 * Decide whether representor ID is a SF port representor. 410 * 411 * @param dev 412 * Pointer to Ethernet device structure. 413 * 414 * @return 415 * Non-zero if HPF, otherwise 0. 416 */ 417 bool 418 mlx5_is_sf_repr(struct rte_eth_dev *dev) 419 { 420 struct mlx5_priv *priv = dev->data->dev_private; 421 int type = MLX5_REPRESENTOR_TYPE(priv->representor_id); 422 423 return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_SF; 424 } 425 426 /** 427 * Initialize the ASO aging management structure. 428 * 429 * @param[in] sh 430 * Pointer to mlx5_dev_ctx_shared object to free 431 * 432 * @return 433 * 0 on success, a negative errno value otherwise and rte_errno is set. 434 */ 435 int 436 mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh) 437 { 438 int err; 439 440 if (sh->aso_age_mng) 441 return 0; 442 sh->aso_age_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->aso_age_mng), 443 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 444 if (!sh->aso_age_mng) { 445 DRV_LOG(ERR, "aso_age_mng allocation was failed."); 446 rte_errno = ENOMEM; 447 return -ENOMEM; 448 } 449 err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_FLOW_HIT); 450 if (err) { 451 mlx5_free(sh->aso_age_mng); 452 return -1; 453 } 454 rte_spinlock_init(&sh->aso_age_mng->resize_sl); 455 rte_spinlock_init(&sh->aso_age_mng->free_sl); 456 LIST_INIT(&sh->aso_age_mng->free); 457 return 0; 458 } 459 460 /** 461 * Close and release all the resources of the ASO aging management structure. 462 * 463 * @param[in] sh 464 * Pointer to mlx5_dev_ctx_shared object to free. 465 */ 466 static void 467 mlx5_flow_aso_age_mng_close(struct mlx5_dev_ctx_shared *sh) 468 { 469 int i, j; 470 471 mlx5_aso_flow_hit_queue_poll_stop(sh); 472 mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_FLOW_HIT); 473 if (sh->aso_age_mng->pools) { 474 struct mlx5_aso_age_pool *pool; 475 476 for (i = 0; i < sh->aso_age_mng->next; ++i) { 477 pool = sh->aso_age_mng->pools[i]; 478 claim_zero(mlx5_devx_cmd_destroy 479 (pool->flow_hit_aso_obj)); 480 for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) 481 if (pool->actions[j].dr_action) 482 claim_zero 483 (mlx5_flow_os_destroy_flow_action 484 (pool->actions[j].dr_action)); 485 mlx5_free(pool); 486 } 487 mlx5_free(sh->aso_age_mng->pools); 488 } 489 mlx5_free(sh->aso_age_mng); 490 } 491 492 /** 493 * Initialize the shared aging list information per port. 494 * 495 * @param[in] sh 496 * Pointer to mlx5_dev_ctx_shared object. 497 */ 498 static void 499 mlx5_flow_aging_init(struct mlx5_dev_ctx_shared *sh) 500 { 501 uint32_t i; 502 struct mlx5_age_info *age_info; 503 504 for (i = 0; i < sh->max_port; i++) { 505 age_info = &sh->port[i].age_info; 506 age_info->flags = 0; 507 TAILQ_INIT(&age_info->aged_counters); 508 LIST_INIT(&age_info->aged_aso); 509 rte_spinlock_init(&age_info->aged_sl); 510 MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER); 511 } 512 } 513 514 /** 515 * Initialize the counters management structure. 516 * 517 * @param[in] sh 518 * Pointer to mlx5_dev_ctx_shared object to free 519 */ 520 static void 521 mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh) 522 { 523 int i; 524 525 memset(&sh->cmng, 0, sizeof(sh->cmng)); 526 TAILQ_INIT(&sh->cmng.flow_counters); 527 sh->cmng.min_id = MLX5_CNT_BATCH_OFFSET; 528 sh->cmng.max_id = -1; 529 sh->cmng.last_pool_idx = POOL_IDX_INVALID; 530 rte_spinlock_init(&sh->cmng.pool_update_sl); 531 for (i = 0; i < MLX5_COUNTER_TYPE_MAX; i++) { 532 TAILQ_INIT(&sh->cmng.counters[i]); 533 rte_spinlock_init(&sh->cmng.csl[i]); 534 } 535 } 536 537 /** 538 * Destroy all the resources allocated for a counter memory management. 539 * 540 * @param[in] mng 541 * Pointer to the memory management structure. 542 */ 543 static void 544 mlx5_flow_destroy_counter_stat_mem_mng(struct mlx5_counter_stats_mem_mng *mng) 545 { 546 uint8_t *mem = (uint8_t *)(uintptr_t)mng->raws[0].data; 547 548 LIST_REMOVE(mng, next); 549 claim_zero(mlx5_devx_cmd_destroy(mng->dm)); 550 claim_zero(mlx5_os_umem_dereg(mng->umem)); 551 mlx5_free(mem); 552 } 553 554 /** 555 * Close and release all the resources of the counters management. 556 * 557 * @param[in] sh 558 * Pointer to mlx5_dev_ctx_shared object to free. 559 */ 560 static void 561 mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh) 562 { 563 struct mlx5_counter_stats_mem_mng *mng; 564 int i, j; 565 int retries = 1024; 566 567 rte_errno = 0; 568 while (--retries) { 569 rte_eal_alarm_cancel(mlx5_flow_query_alarm, sh); 570 if (rte_errno != EINPROGRESS) 571 break; 572 rte_pause(); 573 } 574 575 if (sh->cmng.pools) { 576 struct mlx5_flow_counter_pool *pool; 577 uint16_t n_valid = sh->cmng.n_valid; 578 bool fallback = sh->cmng.counter_fallback; 579 580 for (i = 0; i < n_valid; ++i) { 581 pool = sh->cmng.pools[i]; 582 if (!fallback && pool->min_dcs) 583 claim_zero(mlx5_devx_cmd_destroy 584 (pool->min_dcs)); 585 for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) { 586 struct mlx5_flow_counter *cnt = 587 MLX5_POOL_GET_CNT(pool, j); 588 589 if (cnt->action) 590 claim_zero 591 (mlx5_flow_os_destroy_flow_action 592 (cnt->action)); 593 if (fallback && MLX5_POOL_GET_CNT 594 (pool, j)->dcs_when_free) 595 claim_zero(mlx5_devx_cmd_destroy 596 (cnt->dcs_when_free)); 597 } 598 mlx5_free(pool); 599 } 600 mlx5_free(sh->cmng.pools); 601 } 602 mng = LIST_FIRST(&sh->cmng.mem_mngs); 603 while (mng) { 604 mlx5_flow_destroy_counter_stat_mem_mng(mng); 605 mng = LIST_FIRST(&sh->cmng.mem_mngs); 606 } 607 memset(&sh->cmng, 0, sizeof(sh->cmng)); 608 } 609 610 /** 611 * Initialize the aso flow meters management structure. 612 * 613 * @param[in] sh 614 * Pointer to mlx5_dev_ctx_shared object to free 615 */ 616 int 617 mlx5_aso_flow_mtrs_mng_init(struct mlx5_dev_ctx_shared *sh) 618 { 619 if (!sh->mtrmng) { 620 sh->mtrmng = mlx5_malloc(MLX5_MEM_ZERO, 621 sizeof(*sh->mtrmng), 622 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 623 if (!sh->mtrmng) { 624 DRV_LOG(ERR, 625 "meter management allocation was failed."); 626 rte_errno = ENOMEM; 627 return -ENOMEM; 628 } 629 if (sh->meter_aso_en) { 630 rte_spinlock_init(&sh->mtrmng->pools_mng.mtrsl); 631 LIST_INIT(&sh->mtrmng->pools_mng.meters); 632 } 633 sh->mtrmng->def_policy_id = MLX5_INVALID_POLICY_ID; 634 } 635 return 0; 636 } 637 638 /** 639 * Close and release all the resources of 640 * the ASO flow meter management structure. 641 * 642 * @param[in] sh 643 * Pointer to mlx5_dev_ctx_shared object to free. 644 */ 645 static void 646 mlx5_aso_flow_mtrs_mng_close(struct mlx5_dev_ctx_shared *sh) 647 { 648 struct mlx5_aso_mtr_pool *mtr_pool; 649 struct mlx5_flow_mtr_mng *mtrmng = sh->mtrmng; 650 uint32_t idx; 651 #ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO 652 struct mlx5_aso_mtr *aso_mtr; 653 int i; 654 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */ 655 656 if (sh->meter_aso_en) { 657 mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_POLICER); 658 idx = mtrmng->pools_mng.n_valid; 659 while (idx--) { 660 mtr_pool = mtrmng->pools_mng.pools[idx]; 661 #ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO 662 for (i = 0; i < MLX5_ASO_MTRS_PER_POOL; i++) { 663 aso_mtr = &mtr_pool->mtrs[i]; 664 if (aso_mtr->fm.meter_action) 665 claim_zero 666 (mlx5_glue->destroy_flow_action 667 (aso_mtr->fm.meter_action)); 668 } 669 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */ 670 claim_zero(mlx5_devx_cmd_destroy 671 (mtr_pool->devx_obj)); 672 mtrmng->pools_mng.n_valid--; 673 mlx5_free(mtr_pool); 674 } 675 mlx5_free(sh->mtrmng->pools_mng.pools); 676 } 677 mlx5_free(sh->mtrmng); 678 sh->mtrmng = NULL; 679 } 680 681 /* Send FLOW_AGED event if needed. */ 682 void 683 mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh) 684 { 685 struct mlx5_age_info *age_info; 686 uint32_t i; 687 688 for (i = 0; i < sh->max_port; i++) { 689 age_info = &sh->port[i].age_info; 690 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) 691 continue; 692 MLX5_AGE_UNSET(age_info, MLX5_AGE_EVENT_NEW); 693 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) { 694 MLX5_AGE_UNSET(age_info, MLX5_AGE_TRIGGER); 695 rte_eth_dev_callback_process 696 (&rte_eth_devices[sh->port[i].devx_ih_port_id], 697 RTE_ETH_EVENT_FLOW_AGED, NULL); 698 } 699 } 700 } 701 702 /* 703 * Initialize the ASO connection tracking structure. 704 * 705 * @param[in] sh 706 * Pointer to mlx5_dev_ctx_shared object. 707 * 708 * @return 709 * 0 on success, a negative errno value otherwise and rte_errno is set. 710 */ 711 int 712 mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh) 713 { 714 int err; 715 716 if (sh->ct_mng) 717 return 0; 718 sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng), 719 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 720 if (!sh->ct_mng) { 721 DRV_LOG(ERR, "ASO CT management allocation failed."); 722 rte_errno = ENOMEM; 723 return -rte_errno; 724 } 725 err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_CONNECTION_TRACKING); 726 if (err) { 727 mlx5_free(sh->ct_mng); 728 /* rte_errno should be extracted from the failure. */ 729 rte_errno = EINVAL; 730 return -rte_errno; 731 } 732 rte_spinlock_init(&sh->ct_mng->ct_sl); 733 rte_rwlock_init(&sh->ct_mng->resize_rwl); 734 LIST_INIT(&sh->ct_mng->free_cts); 735 return 0; 736 } 737 738 /* 739 * Close and release all the resources of the 740 * ASO connection tracking management structure. 741 * 742 * @param[in] sh 743 * Pointer to mlx5_dev_ctx_shared object to free. 744 */ 745 static void 746 mlx5_flow_aso_ct_mng_close(struct mlx5_dev_ctx_shared *sh) 747 { 748 struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng; 749 struct mlx5_aso_ct_pool *ct_pool; 750 struct mlx5_aso_ct_action *ct; 751 uint32_t idx; 752 uint32_t val; 753 uint32_t cnt; 754 int i; 755 756 mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_CONNECTION_TRACKING); 757 idx = mng->next; 758 while (idx--) { 759 cnt = 0; 760 ct_pool = mng->pools[idx]; 761 for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) { 762 ct = &ct_pool->actions[i]; 763 val = __atomic_fetch_sub(&ct->refcnt, 1, 764 __ATOMIC_RELAXED); 765 MLX5_ASSERT(val == 1); 766 if (val > 1) 767 cnt++; 768 #ifdef HAVE_MLX5_DR_ACTION_ASO_CT 769 if (ct->dr_action_orig) 770 claim_zero(mlx5_glue->destroy_flow_action 771 (ct->dr_action_orig)); 772 if (ct->dr_action_rply) 773 claim_zero(mlx5_glue->destroy_flow_action 774 (ct->dr_action_rply)); 775 #endif 776 } 777 claim_zero(mlx5_devx_cmd_destroy(ct_pool->devx_obj)); 778 if (cnt) { 779 DRV_LOG(DEBUG, "%u ASO CT objects are being used in the pool %u", 780 cnt, i); 781 } 782 mlx5_free(ct_pool); 783 /* in case of failure. */ 784 mng->next--; 785 } 786 mlx5_free(mng->pools); 787 mlx5_free(mng); 788 /* Management structure must be cleared to 0s during allocation. */ 789 sh->ct_mng = NULL; 790 } 791 792 /** 793 * Initialize the flow resources' indexed mempool. 794 * 795 * @param[in] sh 796 * Pointer to mlx5_dev_ctx_shared object. 797 * @param[in] config 798 * Pointer to user dev config. 799 */ 800 static void 801 mlx5_flow_ipool_create(struct mlx5_dev_ctx_shared *sh, 802 const struct mlx5_dev_config *config) 803 { 804 uint8_t i; 805 struct mlx5_indexed_pool_config cfg; 806 807 for (i = 0; i < MLX5_IPOOL_MAX; ++i) { 808 cfg = mlx5_ipool_cfg[i]; 809 switch (i) { 810 default: 811 break; 812 /* 813 * Set MLX5_IPOOL_MLX5_FLOW ipool size 814 * according to PCI function flow configuration. 815 */ 816 case MLX5_IPOOL_MLX5_FLOW: 817 cfg.size = config->dv_flow_en ? 818 sizeof(struct mlx5_flow_handle) : 819 MLX5_FLOW_HANDLE_VERBS_SIZE; 820 break; 821 } 822 if (config->reclaim_mode) { 823 cfg.release_mem_en = 1; 824 cfg.per_core_cache = 0; 825 } else { 826 cfg.release_mem_en = 0; 827 } 828 sh->ipool[i] = mlx5_ipool_create(&cfg); 829 } 830 } 831 832 833 /** 834 * Release the flow resources' indexed mempool. 835 * 836 * @param[in] sh 837 * Pointer to mlx5_dev_ctx_shared object. 838 */ 839 static void 840 mlx5_flow_ipool_destroy(struct mlx5_dev_ctx_shared *sh) 841 { 842 uint8_t i; 843 844 for (i = 0; i < MLX5_IPOOL_MAX; ++i) 845 mlx5_ipool_destroy(sh->ipool[i]); 846 for (i = 0; i < MLX5_MAX_MODIFY_NUM; ++i) 847 if (sh->mdh_ipools[i]) 848 mlx5_ipool_destroy(sh->mdh_ipools[i]); 849 } 850 851 /* 852 * Check if dynamic flex parser for eCPRI already exists. 853 * 854 * @param dev 855 * Pointer to Ethernet device structure. 856 * 857 * @return 858 * true on exists, false on not. 859 */ 860 bool 861 mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev) 862 { 863 struct mlx5_priv *priv = dev->data->dev_private; 864 struct mlx5_flex_parser_profiles *prf = 865 &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; 866 867 return !!prf->obj; 868 } 869 870 /* 871 * Allocation of a flex parser for eCPRI. Once created, this parser related 872 * resources will be held until the device is closed. 873 * 874 * @param dev 875 * Pointer to Ethernet device structure. 876 * 877 * @return 878 * 0 on success, a negative errno value otherwise and rte_errno is set. 879 */ 880 int 881 mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev) 882 { 883 struct mlx5_priv *priv = dev->data->dev_private; 884 struct mlx5_flex_parser_profiles *prf = 885 &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; 886 struct mlx5_devx_graph_node_attr node = { 887 .modify_field_select = 0, 888 }; 889 uint32_t ids[8]; 890 int ret; 891 892 if (!priv->config.hca_attr.parse_graph_flex_node) { 893 DRV_LOG(ERR, "Dynamic flex parser is not supported " 894 "for device %s.", priv->dev_data->name); 895 return -ENOTSUP; 896 } 897 node.header_length_mode = MLX5_GRAPH_NODE_LEN_FIXED; 898 /* 8 bytes now: 4B common header + 4B message body header. */ 899 node.header_length_base_value = 0x8; 900 /* After MAC layer: Ether / VLAN. */ 901 node.in[0].arc_parse_graph_node = MLX5_GRAPH_ARC_NODE_MAC; 902 /* Type of compared condition should be 0xAEFE in the L2 layer. */ 903 node.in[0].compare_condition_value = RTE_ETHER_TYPE_ECPRI; 904 /* Sample #0: type in common header. */ 905 node.sample[0].flow_match_sample_en = 1; 906 /* Fixed offset. */ 907 node.sample[0].flow_match_sample_offset_mode = 0x0; 908 /* Only the 2nd byte will be used. */ 909 node.sample[0].flow_match_sample_field_base_offset = 0x0; 910 /* Sample #1: message payload. */ 911 node.sample[1].flow_match_sample_en = 1; 912 /* Fixed offset. */ 913 node.sample[1].flow_match_sample_offset_mode = 0x0; 914 /* 915 * Only the first two bytes will be used right now, and its offset will 916 * start after the common header that with the length of a DW(u32). 917 */ 918 node.sample[1].flow_match_sample_field_base_offset = sizeof(uint32_t); 919 prf->obj = mlx5_devx_cmd_create_flex_parser(priv->sh->ctx, &node); 920 if (!prf->obj) { 921 DRV_LOG(ERR, "Failed to create flex parser node object."); 922 return (rte_errno == 0) ? -ENODEV : -rte_errno; 923 } 924 prf->num = 2; 925 ret = mlx5_devx_cmd_query_parse_samples(prf->obj, ids, prf->num); 926 if (ret) { 927 DRV_LOG(ERR, "Failed to query sample IDs."); 928 return (rte_errno == 0) ? -ENODEV : -rte_errno; 929 } 930 prf->offset[0] = 0x0; 931 prf->offset[1] = sizeof(uint32_t); 932 prf->ids[0] = ids[0]; 933 prf->ids[1] = ids[1]; 934 return 0; 935 } 936 937 /* 938 * Destroy the flex parser node, including the parser itself, input / output 939 * arcs and DW samples. Resources could be reused then. 940 * 941 * @param dev 942 * Pointer to Ethernet device structure. 943 */ 944 static void 945 mlx5_flex_parser_ecpri_release(struct rte_eth_dev *dev) 946 { 947 struct mlx5_priv *priv = dev->data->dev_private; 948 struct mlx5_flex_parser_profiles *prf = 949 &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; 950 951 if (prf->obj) 952 mlx5_devx_cmd_destroy(prf->obj); 953 prf->obj = NULL; 954 } 955 956 /* 957 * Allocate Rx and Tx UARs in robust fashion. 958 * This routine handles the following UAR allocation issues: 959 * 960 * - tries to allocate the UAR with the most appropriate memory 961 * mapping type from the ones supported by the host 962 * 963 * - tries to allocate the UAR with non-NULL base address 964 * OFED 5.0.x and Upstream rdma_core before v29 returned the NULL as 965 * UAR base address if UAR was not the first object in the UAR page. 966 * It caused the PMD failure and we should try to get another UAR 967 * till we get the first one with non-NULL base address returned. 968 */ 969 static int 970 mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh, 971 const struct mlx5_dev_config *config) 972 { 973 uint32_t uar_mapping, retry; 974 int err = 0; 975 void *base_addr; 976 977 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 978 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 979 /* Control the mapping type according to the settings. */ 980 uar_mapping = (config->dbnc == MLX5_TXDB_NCACHED) ? 981 MLX5DV_UAR_ALLOC_TYPE_NC : 982 MLX5DV_UAR_ALLOC_TYPE_BF; 983 #else 984 RTE_SET_USED(config); 985 /* 986 * It seems we have no way to control the memory mapping type 987 * for the UAR, the default "Write-Combining" type is supposed. 988 * The UAR initialization on queue creation queries the 989 * actual mapping type done by Verbs/kernel and setups the 990 * PMD datapath accordingly. 991 */ 992 uar_mapping = 0; 993 #endif 994 sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->ctx, uar_mapping); 995 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 996 if (!sh->tx_uar && 997 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 998 if (config->dbnc == MLX5_TXDB_CACHED || 999 config->dbnc == MLX5_TXDB_HEURISTIC) 1000 DRV_LOG(WARNING, "Devarg tx_db_nc setting " 1001 "is not supported by DevX"); 1002 /* 1003 * In some environments like virtual machine 1004 * the Write Combining mapped might be not supported 1005 * and UAR allocation fails. We try "Non-Cached" 1006 * mapping for the case. The tx_burst routines take 1007 * the UAR mapping type into account on UAR setup 1008 * on queue creation. 1009 */ 1010 DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (BF)"); 1011 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 1012 sh->tx_uar = mlx5_glue->devx_alloc_uar 1013 (sh->ctx, uar_mapping); 1014 } else if (!sh->tx_uar && 1015 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { 1016 if (config->dbnc == MLX5_TXDB_NCACHED) 1017 DRV_LOG(WARNING, "Devarg tx_db_nc settings " 1018 "is not supported by DevX"); 1019 /* 1020 * If Verbs/kernel does not support "Non-Cached" 1021 * try the "Write-Combining". 1022 */ 1023 DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (NC)"); 1024 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; 1025 sh->tx_uar = mlx5_glue->devx_alloc_uar 1026 (sh->ctx, uar_mapping); 1027 } 1028 #endif 1029 if (!sh->tx_uar) { 1030 DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (BF/NC)"); 1031 err = ENOMEM; 1032 goto exit; 1033 } 1034 base_addr = mlx5_os_get_devx_uar_base_addr(sh->tx_uar); 1035 if (base_addr) 1036 break; 1037 /* 1038 * The UARs are allocated by rdma_core within the 1039 * IB device context, on context closure all UARs 1040 * will be freed, should be no memory/object leakage. 1041 */ 1042 DRV_LOG(DEBUG, "Retrying to allocate Tx DevX UAR"); 1043 sh->tx_uar = NULL; 1044 } 1045 /* Check whether we finally succeeded with valid UAR allocation. */ 1046 if (!sh->tx_uar) { 1047 DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (NULL base)"); 1048 err = ENOMEM; 1049 goto exit; 1050 } 1051 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 1052 uar_mapping = 0; 1053 sh->devx_rx_uar = mlx5_glue->devx_alloc_uar 1054 (sh->ctx, uar_mapping); 1055 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 1056 if (!sh->devx_rx_uar && 1057 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 1058 /* 1059 * Rx UAR is used to control interrupts only, 1060 * should be no datapath noticeable impact, 1061 * can try "Non-Cached" mapping safely. 1062 */ 1063 DRV_LOG(DEBUG, "Failed to allocate Rx DevX UAR (BF)"); 1064 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 1065 sh->devx_rx_uar = mlx5_glue->devx_alloc_uar 1066 (sh->ctx, uar_mapping); 1067 } 1068 #endif 1069 if (!sh->devx_rx_uar) { 1070 DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (BF/NC)"); 1071 err = ENOMEM; 1072 goto exit; 1073 } 1074 base_addr = mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar); 1075 if (base_addr) 1076 break; 1077 /* 1078 * The UARs are allocated by rdma_core within the 1079 * IB device context, on context closure all UARs 1080 * will be freed, should be no memory/object leakage. 1081 */ 1082 DRV_LOG(DEBUG, "Retrying to allocate Rx DevX UAR"); 1083 sh->devx_rx_uar = NULL; 1084 } 1085 /* Check whether we finally succeeded with valid UAR allocation. */ 1086 if (!sh->devx_rx_uar) { 1087 DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (NULL base)"); 1088 err = ENOMEM; 1089 } 1090 exit: 1091 return err; 1092 } 1093 1094 /** 1095 * Unregister the mempool from the protection domain. 1096 * 1097 * @param sh 1098 * Pointer to the device shared context. 1099 * @param mp 1100 * Mempool being unregistered. 1101 */ 1102 static void 1103 mlx5_dev_ctx_shared_mempool_unregister(struct mlx5_dev_ctx_shared *sh, 1104 struct rte_mempool *mp) 1105 { 1106 struct mlx5_mp_id mp_id; 1107 1108 mlx5_mp_id_init(&mp_id, 0); 1109 if (mlx5_mr_mempool_unregister(&sh->share_cache, mp, &mp_id) < 0) 1110 DRV_LOG(WARNING, "Failed to unregister mempool %s for PD %p: %s", 1111 mp->name, sh->pd, rte_strerror(rte_errno)); 1112 } 1113 1114 /** 1115 * rte_mempool_walk() callback to register mempools 1116 * for the protection domain. 1117 * 1118 * @param mp 1119 * The mempool being walked. 1120 * @param arg 1121 * Pointer to the device shared context. 1122 */ 1123 static void 1124 mlx5_dev_ctx_shared_mempool_register_cb(struct rte_mempool *mp, void *arg) 1125 { 1126 struct mlx5_dev_ctx_shared *sh = arg; 1127 struct mlx5_mp_id mp_id; 1128 int ret; 1129 1130 mlx5_mp_id_init(&mp_id, 0); 1131 ret = mlx5_mr_mempool_register(&sh->share_cache, sh->pd, mp, &mp_id); 1132 if (ret < 0 && rte_errno != EEXIST) 1133 DRV_LOG(ERR, "Failed to register existing mempool %s for PD %p: %s", 1134 mp->name, sh->pd, rte_strerror(rte_errno)); 1135 } 1136 1137 /** 1138 * rte_mempool_walk() callback to unregister mempools 1139 * from the protection domain. 1140 * 1141 * @param mp 1142 * The mempool being walked. 1143 * @param arg 1144 * Pointer to the device shared context. 1145 */ 1146 static void 1147 mlx5_dev_ctx_shared_mempool_unregister_cb(struct rte_mempool *mp, void *arg) 1148 { 1149 mlx5_dev_ctx_shared_mempool_unregister 1150 ((struct mlx5_dev_ctx_shared *)arg, mp); 1151 } 1152 1153 /** 1154 * Mempool life cycle callback for Ethernet devices. 1155 * 1156 * @param event 1157 * Mempool life cycle event. 1158 * @param mp 1159 * Associated mempool. 1160 * @param arg 1161 * Pointer to a device shared context. 1162 */ 1163 static void 1164 mlx5_dev_ctx_shared_mempool_event_cb(enum rte_mempool_event event, 1165 struct rte_mempool *mp, void *arg) 1166 { 1167 struct mlx5_dev_ctx_shared *sh = arg; 1168 struct mlx5_mp_id mp_id; 1169 1170 switch (event) { 1171 case RTE_MEMPOOL_EVENT_READY: 1172 mlx5_mp_id_init(&mp_id, 0); 1173 if (mlx5_mr_mempool_register(&sh->share_cache, sh->pd, mp, 1174 &mp_id) < 0) 1175 DRV_LOG(ERR, "Failed to register new mempool %s for PD %p: %s", 1176 mp->name, sh->pd, rte_strerror(rte_errno)); 1177 break; 1178 case RTE_MEMPOOL_EVENT_DESTROY: 1179 mlx5_dev_ctx_shared_mempool_unregister(sh, mp); 1180 break; 1181 } 1182 } 1183 1184 /** 1185 * Callback used when implicit mempool registration is disabled 1186 * in order to track Rx mempool destruction. 1187 * 1188 * @param event 1189 * Mempool life cycle event. 1190 * @param mp 1191 * An Rx mempool registered explicitly when the port is started. 1192 * @param arg 1193 * Pointer to a device shared context. 1194 */ 1195 static void 1196 mlx5_dev_ctx_shared_rx_mempool_event_cb(enum rte_mempool_event event, 1197 struct rte_mempool *mp, void *arg) 1198 { 1199 struct mlx5_dev_ctx_shared *sh = arg; 1200 1201 if (event == RTE_MEMPOOL_EVENT_DESTROY) 1202 mlx5_dev_ctx_shared_mempool_unregister(sh, mp); 1203 } 1204 1205 int 1206 mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev) 1207 { 1208 struct mlx5_priv *priv = dev->data->dev_private; 1209 struct mlx5_dev_ctx_shared *sh = priv->sh; 1210 int ret; 1211 1212 /* Check if we only need to track Rx mempool destruction. */ 1213 if (!priv->config.mr_mempool_reg_en) { 1214 ret = rte_mempool_event_callback_register 1215 (mlx5_dev_ctx_shared_rx_mempool_event_cb, sh); 1216 return ret == 0 || rte_errno == EEXIST ? 0 : ret; 1217 } 1218 /* Callback for this shared context may be already registered. */ 1219 ret = rte_mempool_event_callback_register 1220 (mlx5_dev_ctx_shared_mempool_event_cb, sh); 1221 if (ret != 0 && rte_errno != EEXIST) 1222 return ret; 1223 /* Register mempools only once for this shared context. */ 1224 if (ret == 0) 1225 rte_mempool_walk(mlx5_dev_ctx_shared_mempool_register_cb, sh); 1226 return 0; 1227 } 1228 1229 /** 1230 * Allocate shared device context. If there is multiport device the 1231 * master and representors will share this context, if there is single 1232 * port dedicated device, the context will be used by only given 1233 * port due to unification. 1234 * 1235 * Routine first searches the context for the specified device name, 1236 * if found the shared context assumed and reference counter is incremented. 1237 * If no context found the new one is created and initialized with specified 1238 * device context and parameters. 1239 * 1240 * @param[in] spawn 1241 * Pointer to the device attributes (name, port, etc). 1242 * @param[in] config 1243 * Pointer to device configuration structure. 1244 * 1245 * @return 1246 * Pointer to mlx5_dev_ctx_shared object on success, 1247 * otherwise NULL and rte_errno is set. 1248 */ 1249 struct mlx5_dev_ctx_shared * 1250 mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, 1251 const struct mlx5_dev_config *config) 1252 { 1253 struct mlx5_dev_ctx_shared *sh; 1254 int err = 0; 1255 uint32_t i; 1256 struct mlx5_devx_tis_attr tis_attr = { 0 }; 1257 1258 MLX5_ASSERT(spawn); 1259 /* Secondary process should not create the shared context. */ 1260 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 1261 pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); 1262 /* Search for IB context by device name. */ 1263 LIST_FOREACH(sh, &mlx5_dev_ctx_list, next) { 1264 if (!strcmp(sh->ibdev_name, 1265 mlx5_os_get_dev_device_name(spawn->phys_dev))) { 1266 sh->refcnt++; 1267 goto exit; 1268 } 1269 } 1270 /* No device found, we have to create new shared context. */ 1271 MLX5_ASSERT(spawn->max_port); 1272 sh = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, 1273 sizeof(struct mlx5_dev_ctx_shared) + 1274 spawn->max_port * 1275 sizeof(struct mlx5_dev_shared_port), 1276 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 1277 if (!sh) { 1278 DRV_LOG(ERR, "shared context allocation failure"); 1279 rte_errno = ENOMEM; 1280 goto exit; 1281 } 1282 sh->numa_node = spawn->numa_node; 1283 if (spawn->bond_info) 1284 sh->bond = *spawn->bond_info; 1285 err = mlx5_os_open_device(spawn, config, sh); 1286 if (!sh->ctx) 1287 goto error; 1288 err = mlx5_os_get_dev_attr(sh->ctx, &sh->device_attr); 1289 if (err) { 1290 DRV_LOG(DEBUG, "mlx5_os_get_dev_attr() failed"); 1291 goto error; 1292 } 1293 sh->refcnt = 1; 1294 sh->max_port = spawn->max_port; 1295 sh->reclaim_mode = config->reclaim_mode; 1296 strncpy(sh->ibdev_name, mlx5_os_get_ctx_device_name(sh->ctx), 1297 sizeof(sh->ibdev_name) - 1); 1298 strncpy(sh->ibdev_path, mlx5_os_get_ctx_device_path(sh->ctx), 1299 sizeof(sh->ibdev_path) - 1); 1300 /* 1301 * Setting port_id to max unallowed value means 1302 * there is no interrupt subhandler installed for 1303 * the given port index i. 1304 */ 1305 for (i = 0; i < sh->max_port; i++) { 1306 sh->port[i].ih_port_id = RTE_MAX_ETHPORTS; 1307 sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS; 1308 } 1309 sh->pd = mlx5_os_alloc_pd(sh->ctx); 1310 if (sh->pd == NULL) { 1311 DRV_LOG(ERR, "PD allocation failure"); 1312 err = ENOMEM; 1313 goto error; 1314 } 1315 if (sh->devx) { 1316 err = mlx5_os_get_pdn(sh->pd, &sh->pdn); 1317 if (err) { 1318 DRV_LOG(ERR, "Fail to extract pdn from PD"); 1319 goto error; 1320 } 1321 sh->td = mlx5_devx_cmd_create_td(sh->ctx); 1322 if (!sh->td) { 1323 DRV_LOG(ERR, "TD allocation failure"); 1324 err = ENOMEM; 1325 goto error; 1326 } 1327 tis_attr.transport_domain = sh->td->id; 1328 sh->tis = mlx5_devx_cmd_create_tis(sh->ctx, &tis_attr); 1329 if (!sh->tis) { 1330 DRV_LOG(ERR, "TIS allocation failure"); 1331 err = ENOMEM; 1332 goto error; 1333 } 1334 err = mlx5_alloc_rxtx_uars(sh, config); 1335 if (err) 1336 goto error; 1337 MLX5_ASSERT(sh->tx_uar); 1338 MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->tx_uar)); 1339 1340 MLX5_ASSERT(sh->devx_rx_uar); 1341 MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar)); 1342 } 1343 #ifndef RTE_ARCH_64 1344 /* Initialize UAR access locks for 32bit implementations. */ 1345 rte_spinlock_init(&sh->uar_lock_cq); 1346 for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++) 1347 rte_spinlock_init(&sh->uar_lock[i]); 1348 #endif 1349 /* 1350 * Once the device is added to the list of memory event 1351 * callback, its global MR cache table cannot be expanded 1352 * on the fly because of deadlock. If it overflows, lookup 1353 * should be done by searching MR list linearly, which is slow. 1354 * 1355 * At this point the device is not added to the memory 1356 * event list yet, context is just being created. 1357 */ 1358 err = mlx5_mr_btree_init(&sh->share_cache.cache, 1359 MLX5_MR_BTREE_CACHE_N * 2, 1360 sh->numa_node); 1361 if (err) { 1362 err = rte_errno; 1363 goto error; 1364 } 1365 mlx5_os_set_reg_mr_cb(&sh->share_cache.reg_mr_cb, 1366 &sh->share_cache.dereg_mr_cb); 1367 mlx5_os_dev_shared_handler_install(sh); 1368 sh->cnt_id_tbl = mlx5_l3t_create(MLX5_L3T_TYPE_DWORD); 1369 if (!sh->cnt_id_tbl) { 1370 err = rte_errno; 1371 goto error; 1372 } 1373 if (LIST_EMPTY(&mlx5_dev_ctx_list)) { 1374 err = mlx5_flow_os_init_workspace_once(); 1375 if (err) 1376 goto error; 1377 } 1378 mlx5_flow_aging_init(sh); 1379 mlx5_flow_counters_mng_init(sh); 1380 mlx5_flow_ipool_create(sh, config); 1381 /* Add device to memory callback list. */ 1382 rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); 1383 LIST_INSERT_HEAD(&mlx5_shared_data->mem_event_cb_list, 1384 sh, mem_event_cb); 1385 rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); 1386 /* Add context to the global device list. */ 1387 LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next); 1388 rte_spinlock_init(&sh->geneve_tlv_opt_sl); 1389 exit: 1390 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1391 return sh; 1392 error: 1393 pthread_mutex_destroy(&sh->txpp.mutex); 1394 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1395 MLX5_ASSERT(sh); 1396 if (sh->cnt_id_tbl) 1397 mlx5_l3t_destroy(sh->cnt_id_tbl); 1398 if (sh->share_cache.cache.table) 1399 mlx5_mr_btree_free(&sh->share_cache.cache); 1400 if (sh->tis) 1401 claim_zero(mlx5_devx_cmd_destroy(sh->tis)); 1402 if (sh->td) 1403 claim_zero(mlx5_devx_cmd_destroy(sh->td)); 1404 if (sh->devx_rx_uar) 1405 mlx5_glue->devx_free_uar(sh->devx_rx_uar); 1406 if (sh->tx_uar) 1407 mlx5_glue->devx_free_uar(sh->tx_uar); 1408 if (sh->pd) 1409 claim_zero(mlx5_os_dealloc_pd(sh->pd)); 1410 if (sh->ctx) 1411 claim_zero(mlx5_glue->close_device(sh->ctx)); 1412 mlx5_free(sh); 1413 MLX5_ASSERT(err > 0); 1414 rte_errno = err; 1415 return NULL; 1416 } 1417 1418 /** 1419 * Free shared IB device context. Decrement counter and if zero free 1420 * all allocated resources and close handles. 1421 * 1422 * @param[in] sh 1423 * Pointer to mlx5_dev_ctx_shared object to free 1424 */ 1425 void 1426 mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh) 1427 { 1428 int ret; 1429 1430 pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); 1431 #ifdef RTE_LIBRTE_MLX5_DEBUG 1432 /* Check the object presence in the list. */ 1433 struct mlx5_dev_ctx_shared *lctx; 1434 1435 LIST_FOREACH(lctx, &mlx5_dev_ctx_list, next) 1436 if (lctx == sh) 1437 break; 1438 MLX5_ASSERT(lctx); 1439 if (lctx != sh) { 1440 DRV_LOG(ERR, "Freeing non-existing shared IB context"); 1441 goto exit; 1442 } 1443 #endif 1444 MLX5_ASSERT(sh); 1445 MLX5_ASSERT(sh->refcnt); 1446 /* Secondary process should not free the shared context. */ 1447 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 1448 if (--sh->refcnt) 1449 goto exit; 1450 /* Stop watching for mempool events and unregister all mempools. */ 1451 ret = rte_mempool_event_callback_unregister 1452 (mlx5_dev_ctx_shared_mempool_event_cb, sh); 1453 if (ret < 0 && rte_errno == ENOENT) 1454 ret = rte_mempool_event_callback_unregister 1455 (mlx5_dev_ctx_shared_rx_mempool_event_cb, sh); 1456 if (ret == 0) 1457 rte_mempool_walk(mlx5_dev_ctx_shared_mempool_unregister_cb, 1458 sh); 1459 /* Remove from memory callback device list. */ 1460 rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); 1461 LIST_REMOVE(sh, mem_event_cb); 1462 rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); 1463 /* Release created Memory Regions. */ 1464 mlx5_mr_release_cache(&sh->share_cache); 1465 /* Remove context from the global device list. */ 1466 LIST_REMOVE(sh, next); 1467 /* Release flow workspaces objects on the last device. */ 1468 if (LIST_EMPTY(&mlx5_dev_ctx_list)) 1469 mlx5_flow_os_release_workspace(); 1470 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1471 /* 1472 * Ensure there is no async event handler installed. 1473 * Only primary process handles async device events. 1474 **/ 1475 mlx5_flow_counters_mng_close(sh); 1476 if (sh->aso_age_mng) { 1477 mlx5_flow_aso_age_mng_close(sh); 1478 sh->aso_age_mng = NULL; 1479 } 1480 if (sh->mtrmng) 1481 mlx5_aso_flow_mtrs_mng_close(sh); 1482 mlx5_flow_ipool_destroy(sh); 1483 mlx5_os_dev_shared_handler_uninstall(sh); 1484 if (sh->cnt_id_tbl) { 1485 mlx5_l3t_destroy(sh->cnt_id_tbl); 1486 sh->cnt_id_tbl = NULL; 1487 } 1488 if (sh->tx_uar) { 1489 mlx5_glue->devx_free_uar(sh->tx_uar); 1490 sh->tx_uar = NULL; 1491 } 1492 if (sh->pd) 1493 claim_zero(mlx5_os_dealloc_pd(sh->pd)); 1494 if (sh->tis) 1495 claim_zero(mlx5_devx_cmd_destroy(sh->tis)); 1496 if (sh->td) 1497 claim_zero(mlx5_devx_cmd_destroy(sh->td)); 1498 if (sh->devx_rx_uar) 1499 mlx5_glue->devx_free_uar(sh->devx_rx_uar); 1500 if (sh->ctx) 1501 claim_zero(mlx5_glue->close_device(sh->ctx)); 1502 MLX5_ASSERT(sh->geneve_tlv_option_resource == NULL); 1503 pthread_mutex_destroy(&sh->txpp.mutex); 1504 mlx5_free(sh); 1505 return; 1506 exit: 1507 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1508 } 1509 1510 /** 1511 * Destroy table hash list. 1512 * 1513 * @param[in] priv 1514 * Pointer to the private device data structure. 1515 */ 1516 void 1517 mlx5_free_table_hash_list(struct mlx5_priv *priv) 1518 { 1519 struct mlx5_dev_ctx_shared *sh = priv->sh; 1520 1521 if (!sh->flow_tbls) 1522 return; 1523 mlx5_hlist_destroy(sh->flow_tbls); 1524 sh->flow_tbls = NULL; 1525 } 1526 1527 /** 1528 * Initialize flow table hash list and create the root tables entry 1529 * for each domain. 1530 * 1531 * @param[in] priv 1532 * Pointer to the private device data structure. 1533 * 1534 * @return 1535 * Zero on success, positive error code otherwise. 1536 */ 1537 int 1538 mlx5_alloc_table_hash_list(struct mlx5_priv *priv __rte_unused) 1539 { 1540 int err = 0; 1541 /* Tables are only used in DV and DR modes. */ 1542 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H) 1543 struct mlx5_dev_ctx_shared *sh = priv->sh; 1544 char s[MLX5_NAME_SIZE]; 1545 1546 MLX5_ASSERT(sh); 1547 snprintf(s, sizeof(s), "%s_flow_table", priv->sh->ibdev_name); 1548 sh->flow_tbls = mlx5_hlist_create(s, MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE, 1549 false, true, sh, 1550 flow_dv_tbl_create_cb, 1551 flow_dv_tbl_match_cb, 1552 flow_dv_tbl_remove_cb, 1553 flow_dv_tbl_clone_cb, 1554 flow_dv_tbl_clone_free_cb); 1555 if (!sh->flow_tbls) { 1556 DRV_LOG(ERR, "flow tables with hash creation failed."); 1557 err = ENOMEM; 1558 return err; 1559 } 1560 #ifndef HAVE_MLX5DV_DR 1561 struct rte_flow_error error; 1562 struct rte_eth_dev *dev = &rte_eth_devices[priv->dev_data->port_id]; 1563 1564 /* 1565 * In case we have not DR support, the zero tables should be created 1566 * because DV expect to see them even if they cannot be created by 1567 * RDMA-CORE. 1568 */ 1569 if (!flow_dv_tbl_resource_get(dev, 0, 0, 0, 0, 1570 NULL, 0, 1, 0, &error) || 1571 !flow_dv_tbl_resource_get(dev, 0, 1, 0, 0, 1572 NULL, 0, 1, 0, &error) || 1573 !flow_dv_tbl_resource_get(dev, 0, 0, 1, 0, 1574 NULL, 0, 1, 0, &error)) { 1575 err = ENOMEM; 1576 goto error; 1577 } 1578 return err; 1579 error: 1580 mlx5_free_table_hash_list(priv); 1581 #endif /* HAVE_MLX5DV_DR */ 1582 #endif 1583 return err; 1584 } 1585 1586 /** 1587 * Retrieve integer value from environment variable. 1588 * 1589 * @param[in] name 1590 * Environment variable name. 1591 * 1592 * @return 1593 * Integer value, 0 if the variable is not set. 1594 */ 1595 int 1596 mlx5_getenv_int(const char *name) 1597 { 1598 const char *val = getenv(name); 1599 1600 if (val == NULL) 1601 return 0; 1602 return atoi(val); 1603 } 1604 1605 /** 1606 * DPDK callback to add udp tunnel port 1607 * 1608 * @param[in] dev 1609 * A pointer to eth_dev 1610 * @param[in] udp_tunnel 1611 * A pointer to udp tunnel 1612 * 1613 * @return 1614 * 0 on valid udp ports and tunnels, -ENOTSUP otherwise. 1615 */ 1616 int 1617 mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev __rte_unused, 1618 struct rte_eth_udp_tunnel *udp_tunnel) 1619 { 1620 MLX5_ASSERT(udp_tunnel != NULL); 1621 if (udp_tunnel->prot_type == RTE_TUNNEL_TYPE_VXLAN && 1622 udp_tunnel->udp_port == 4789) 1623 return 0; 1624 if (udp_tunnel->prot_type == RTE_TUNNEL_TYPE_VXLAN_GPE && 1625 udp_tunnel->udp_port == 4790) 1626 return 0; 1627 return -ENOTSUP; 1628 } 1629 1630 /** 1631 * Initialize process private data structure. 1632 * 1633 * @param dev 1634 * Pointer to Ethernet device structure. 1635 * 1636 * @return 1637 * 0 on success, a negative errno value otherwise and rte_errno is set. 1638 */ 1639 int 1640 mlx5_proc_priv_init(struct rte_eth_dev *dev) 1641 { 1642 struct mlx5_priv *priv = dev->data->dev_private; 1643 struct mlx5_proc_priv *ppriv; 1644 size_t ppriv_size; 1645 1646 mlx5_proc_priv_uninit(dev); 1647 /* 1648 * UAR register table follows the process private structure. BlueFlame 1649 * registers for Tx queues are stored in the table. 1650 */ 1651 ppriv_size = 1652 sizeof(struct mlx5_proc_priv) + priv->txqs_n * sizeof(void *); 1653 ppriv = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, ppriv_size, 1654 RTE_CACHE_LINE_SIZE, dev->device->numa_node); 1655 if (!ppriv) { 1656 rte_errno = ENOMEM; 1657 return -rte_errno; 1658 } 1659 ppriv->uar_table_sz = priv->txqs_n; 1660 dev->process_private = ppriv; 1661 return 0; 1662 } 1663 1664 /** 1665 * Un-initialize process private data structure. 1666 * 1667 * @param dev 1668 * Pointer to Ethernet device structure. 1669 */ 1670 void 1671 mlx5_proc_priv_uninit(struct rte_eth_dev *dev) 1672 { 1673 if (!dev->process_private) 1674 return; 1675 mlx5_free(dev->process_private); 1676 dev->process_private = NULL; 1677 } 1678 1679 /** 1680 * DPDK callback to close the device. 1681 * 1682 * Destroy all queues and objects, free memory. 1683 * 1684 * @param dev 1685 * Pointer to Ethernet device structure. 1686 */ 1687 int 1688 mlx5_dev_close(struct rte_eth_dev *dev) 1689 { 1690 struct mlx5_priv *priv = dev->data->dev_private; 1691 unsigned int i; 1692 int ret; 1693 1694 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1695 /* Check if process_private released. */ 1696 if (!dev->process_private) 1697 return 0; 1698 mlx5_tx_uar_uninit_secondary(dev); 1699 mlx5_proc_priv_uninit(dev); 1700 rte_eth_dev_release_port(dev); 1701 return 0; 1702 } 1703 if (!priv->sh) 1704 return 0; 1705 DRV_LOG(DEBUG, "port %u closing device \"%s\"", 1706 dev->data->port_id, 1707 ((priv->sh->ctx != NULL) ? 1708 mlx5_os_get_ctx_device_name(priv->sh->ctx) : "")); 1709 /* 1710 * If default mreg copy action is removed at the stop stage, 1711 * the search will return none and nothing will be done anymore. 1712 */ 1713 mlx5_flow_stop_default(dev); 1714 mlx5_traffic_disable(dev); 1715 /* 1716 * If all the flows are already flushed in the device stop stage, 1717 * then this will return directly without any action. 1718 */ 1719 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true); 1720 mlx5_action_handle_flush(dev); 1721 mlx5_flow_meter_flush(dev, NULL); 1722 /* Prevent crashes when queues are still in use. */ 1723 dev->rx_pkt_burst = removed_rx_burst; 1724 dev->tx_pkt_burst = removed_tx_burst; 1725 rte_wmb(); 1726 /* Disable datapath on secondary process. */ 1727 mlx5_mp_os_req_stop_rxtx(dev); 1728 /* Free the eCPRI flex parser resource. */ 1729 mlx5_flex_parser_ecpri_release(dev); 1730 if (priv->rxqs != NULL) { 1731 /* XXX race condition if mlx5_rx_burst() is still running. */ 1732 rte_delay_us_sleep(1000); 1733 for (i = 0; (i != priv->rxqs_n); ++i) 1734 mlx5_rxq_release(dev, i); 1735 priv->rxqs_n = 0; 1736 priv->rxqs = NULL; 1737 } 1738 if (priv->representor) { 1739 /* Each representor has a dedicated interrupts handler */ 1740 mlx5_free(dev->intr_handle); 1741 dev->intr_handle = NULL; 1742 } 1743 if (priv->txqs != NULL) { 1744 /* XXX race condition if mlx5_tx_burst() is still running. */ 1745 rte_delay_us_sleep(1000); 1746 for (i = 0; (i != priv->txqs_n); ++i) 1747 mlx5_txq_release(dev, i); 1748 priv->txqs_n = 0; 1749 priv->txqs = NULL; 1750 } 1751 mlx5_proc_priv_uninit(dev); 1752 if (priv->q_counters) { 1753 mlx5_devx_cmd_destroy(priv->q_counters); 1754 priv->q_counters = NULL; 1755 } 1756 if (priv->drop_queue.hrxq) 1757 mlx5_drop_action_destroy(dev); 1758 if (priv->mreg_cp_tbl) 1759 mlx5_hlist_destroy(priv->mreg_cp_tbl); 1760 mlx5_mprq_free_mp(dev); 1761 if (priv->sh->ct_mng) 1762 mlx5_flow_aso_ct_mng_close(priv->sh); 1763 mlx5_os_free_shared_dr(priv); 1764 if (priv->rss_conf.rss_key != NULL) 1765 mlx5_free(priv->rss_conf.rss_key); 1766 if (priv->reta_idx != NULL) 1767 mlx5_free(priv->reta_idx); 1768 if (priv->config.vf) 1769 mlx5_os_mac_addr_flush(dev); 1770 if (priv->nl_socket_route >= 0) 1771 close(priv->nl_socket_route); 1772 if (priv->nl_socket_rdma >= 0) 1773 close(priv->nl_socket_rdma); 1774 if (priv->vmwa_context) 1775 mlx5_vlan_vmwa_exit(priv->vmwa_context); 1776 ret = mlx5_hrxq_verify(dev); 1777 if (ret) 1778 DRV_LOG(WARNING, "port %u some hash Rx queue still remain", 1779 dev->data->port_id); 1780 ret = mlx5_ind_table_obj_verify(dev); 1781 if (ret) 1782 DRV_LOG(WARNING, "port %u some indirection table still remain", 1783 dev->data->port_id); 1784 ret = mlx5_rxq_obj_verify(dev); 1785 if (ret) 1786 DRV_LOG(WARNING, "port %u some Rx queue objects still remain", 1787 dev->data->port_id); 1788 ret = mlx5_rxq_verify(dev); 1789 if (ret) 1790 DRV_LOG(WARNING, "port %u some Rx queues still remain", 1791 dev->data->port_id); 1792 ret = mlx5_txq_obj_verify(dev); 1793 if (ret) 1794 DRV_LOG(WARNING, "port %u some Verbs Tx queue still remain", 1795 dev->data->port_id); 1796 ret = mlx5_txq_verify(dev); 1797 if (ret) 1798 DRV_LOG(WARNING, "port %u some Tx queues still remain", 1799 dev->data->port_id); 1800 ret = mlx5_flow_verify(dev); 1801 if (ret) 1802 DRV_LOG(WARNING, "port %u some flows still remain", 1803 dev->data->port_id); 1804 if (priv->hrxqs) 1805 mlx5_list_destroy(priv->hrxqs); 1806 /* 1807 * Free the shared context in last turn, because the cleanup 1808 * routines above may use some shared fields, like 1809 * mlx5_os_mac_addr_flush() uses ibdev_path for retrieveing 1810 * ifindex if Netlink fails. 1811 */ 1812 mlx5_free_shared_dev_ctx(priv->sh); 1813 if (priv->domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) { 1814 unsigned int c = 0; 1815 uint16_t port_id; 1816 1817 MLX5_ETH_FOREACH_DEV(port_id, dev->device) { 1818 struct mlx5_priv *opriv = 1819 rte_eth_devices[port_id].data->dev_private; 1820 1821 if (!opriv || 1822 opriv->domain_id != priv->domain_id || 1823 &rte_eth_devices[port_id] == dev) 1824 continue; 1825 ++c; 1826 break; 1827 } 1828 if (!c) 1829 claim_zero(rte_eth_switch_domain_free(priv->domain_id)); 1830 } 1831 memset(priv, 0, sizeof(*priv)); 1832 priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; 1833 /* 1834 * Reset mac_addrs to NULL such that it is not freed as part of 1835 * rte_eth_dev_release_port(). mac_addrs is part of dev_private so 1836 * it is freed when dev_private is freed. 1837 */ 1838 dev->data->mac_addrs = NULL; 1839 return 0; 1840 } 1841 1842 const struct eth_dev_ops mlx5_dev_ops = { 1843 .dev_configure = mlx5_dev_configure, 1844 .dev_start = mlx5_dev_start, 1845 .dev_stop = mlx5_dev_stop, 1846 .dev_set_link_down = mlx5_set_link_down, 1847 .dev_set_link_up = mlx5_set_link_up, 1848 .dev_close = mlx5_dev_close, 1849 .promiscuous_enable = mlx5_promiscuous_enable, 1850 .promiscuous_disable = mlx5_promiscuous_disable, 1851 .allmulticast_enable = mlx5_allmulticast_enable, 1852 .allmulticast_disable = mlx5_allmulticast_disable, 1853 .link_update = mlx5_link_update, 1854 .stats_get = mlx5_stats_get, 1855 .stats_reset = mlx5_stats_reset, 1856 .xstats_get = mlx5_xstats_get, 1857 .xstats_reset = mlx5_xstats_reset, 1858 .xstats_get_names = mlx5_xstats_get_names, 1859 .fw_version_get = mlx5_fw_version_get, 1860 .dev_infos_get = mlx5_dev_infos_get, 1861 .representor_info_get = mlx5_representor_info_get, 1862 .read_clock = mlx5_txpp_read_clock, 1863 .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, 1864 .vlan_filter_set = mlx5_vlan_filter_set, 1865 .rx_queue_setup = mlx5_rx_queue_setup, 1866 .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, 1867 .tx_queue_setup = mlx5_tx_queue_setup, 1868 .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, 1869 .rx_queue_release = mlx5_rx_queue_release, 1870 .tx_queue_release = mlx5_tx_queue_release, 1871 .rx_queue_start = mlx5_rx_queue_start, 1872 .rx_queue_stop = mlx5_rx_queue_stop, 1873 .tx_queue_start = mlx5_tx_queue_start, 1874 .tx_queue_stop = mlx5_tx_queue_stop, 1875 .flow_ctrl_get = mlx5_dev_get_flow_ctrl, 1876 .flow_ctrl_set = mlx5_dev_set_flow_ctrl, 1877 .mac_addr_remove = mlx5_mac_addr_remove, 1878 .mac_addr_add = mlx5_mac_addr_add, 1879 .mac_addr_set = mlx5_mac_addr_set, 1880 .set_mc_addr_list = mlx5_set_mc_addr_list, 1881 .mtu_set = mlx5_dev_set_mtu, 1882 .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, 1883 .vlan_offload_set = mlx5_vlan_offload_set, 1884 .reta_update = mlx5_dev_rss_reta_update, 1885 .reta_query = mlx5_dev_rss_reta_query, 1886 .rss_hash_update = mlx5_rss_hash_update, 1887 .rss_hash_conf_get = mlx5_rss_hash_conf_get, 1888 .flow_ops_get = mlx5_flow_ops_get, 1889 .rxq_info_get = mlx5_rxq_info_get, 1890 .txq_info_get = mlx5_txq_info_get, 1891 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 1892 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 1893 .rx_queue_intr_enable = mlx5_rx_intr_enable, 1894 .rx_queue_intr_disable = mlx5_rx_intr_disable, 1895 .is_removed = mlx5_is_removed, 1896 .udp_tunnel_port_add = mlx5_udp_tunnel_port_add, 1897 .get_module_info = mlx5_get_module_info, 1898 .get_module_eeprom = mlx5_get_module_eeprom, 1899 .hairpin_cap_get = mlx5_hairpin_cap_get, 1900 .mtr_ops_get = mlx5_flow_meter_ops_get, 1901 .hairpin_bind = mlx5_hairpin_bind, 1902 .hairpin_unbind = mlx5_hairpin_unbind, 1903 .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports, 1904 .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update, 1905 .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, 1906 .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, 1907 .get_monitor_addr = mlx5_get_monitor_addr, 1908 }; 1909 1910 /* Available operations from secondary process. */ 1911 const struct eth_dev_ops mlx5_dev_sec_ops = { 1912 .stats_get = mlx5_stats_get, 1913 .stats_reset = mlx5_stats_reset, 1914 .xstats_get = mlx5_xstats_get, 1915 .xstats_reset = mlx5_xstats_reset, 1916 .xstats_get_names = mlx5_xstats_get_names, 1917 .fw_version_get = mlx5_fw_version_get, 1918 .dev_infos_get = mlx5_dev_infos_get, 1919 .representor_info_get = mlx5_representor_info_get, 1920 .read_clock = mlx5_txpp_read_clock, 1921 .rx_queue_start = mlx5_rx_queue_start, 1922 .rx_queue_stop = mlx5_rx_queue_stop, 1923 .tx_queue_start = mlx5_tx_queue_start, 1924 .tx_queue_stop = mlx5_tx_queue_stop, 1925 .rxq_info_get = mlx5_rxq_info_get, 1926 .txq_info_get = mlx5_txq_info_get, 1927 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 1928 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 1929 .get_module_info = mlx5_get_module_info, 1930 .get_module_eeprom = mlx5_get_module_eeprom, 1931 }; 1932 1933 /* Available operations in flow isolated mode. */ 1934 const struct eth_dev_ops mlx5_dev_ops_isolate = { 1935 .dev_configure = mlx5_dev_configure, 1936 .dev_start = mlx5_dev_start, 1937 .dev_stop = mlx5_dev_stop, 1938 .dev_set_link_down = mlx5_set_link_down, 1939 .dev_set_link_up = mlx5_set_link_up, 1940 .dev_close = mlx5_dev_close, 1941 .promiscuous_enable = mlx5_promiscuous_enable, 1942 .promiscuous_disable = mlx5_promiscuous_disable, 1943 .allmulticast_enable = mlx5_allmulticast_enable, 1944 .allmulticast_disable = mlx5_allmulticast_disable, 1945 .link_update = mlx5_link_update, 1946 .stats_get = mlx5_stats_get, 1947 .stats_reset = mlx5_stats_reset, 1948 .xstats_get = mlx5_xstats_get, 1949 .xstats_reset = mlx5_xstats_reset, 1950 .xstats_get_names = mlx5_xstats_get_names, 1951 .fw_version_get = mlx5_fw_version_get, 1952 .dev_infos_get = mlx5_dev_infos_get, 1953 .representor_info_get = mlx5_representor_info_get, 1954 .read_clock = mlx5_txpp_read_clock, 1955 .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, 1956 .vlan_filter_set = mlx5_vlan_filter_set, 1957 .rx_queue_setup = mlx5_rx_queue_setup, 1958 .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, 1959 .tx_queue_setup = mlx5_tx_queue_setup, 1960 .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, 1961 .rx_queue_release = mlx5_rx_queue_release, 1962 .tx_queue_release = mlx5_tx_queue_release, 1963 .rx_queue_start = mlx5_rx_queue_start, 1964 .rx_queue_stop = mlx5_rx_queue_stop, 1965 .tx_queue_start = mlx5_tx_queue_start, 1966 .tx_queue_stop = mlx5_tx_queue_stop, 1967 .flow_ctrl_get = mlx5_dev_get_flow_ctrl, 1968 .flow_ctrl_set = mlx5_dev_set_flow_ctrl, 1969 .mac_addr_remove = mlx5_mac_addr_remove, 1970 .mac_addr_add = mlx5_mac_addr_add, 1971 .mac_addr_set = mlx5_mac_addr_set, 1972 .set_mc_addr_list = mlx5_set_mc_addr_list, 1973 .mtu_set = mlx5_dev_set_mtu, 1974 .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, 1975 .vlan_offload_set = mlx5_vlan_offload_set, 1976 .flow_ops_get = mlx5_flow_ops_get, 1977 .rxq_info_get = mlx5_rxq_info_get, 1978 .txq_info_get = mlx5_txq_info_get, 1979 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 1980 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 1981 .rx_queue_intr_enable = mlx5_rx_intr_enable, 1982 .rx_queue_intr_disable = mlx5_rx_intr_disable, 1983 .is_removed = mlx5_is_removed, 1984 .get_module_info = mlx5_get_module_info, 1985 .get_module_eeprom = mlx5_get_module_eeprom, 1986 .hairpin_cap_get = mlx5_hairpin_cap_get, 1987 .mtr_ops_get = mlx5_flow_meter_ops_get, 1988 .hairpin_bind = mlx5_hairpin_bind, 1989 .hairpin_unbind = mlx5_hairpin_unbind, 1990 .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports, 1991 .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update, 1992 .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, 1993 .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, 1994 .get_monitor_addr = mlx5_get_monitor_addr, 1995 }; 1996 1997 /** 1998 * Verify and store value for device argument. 1999 * 2000 * @param[in] key 2001 * Key argument to verify. 2002 * @param[in] val 2003 * Value associated with key. 2004 * @param opaque 2005 * User data. 2006 * 2007 * @return 2008 * 0 on success, a negative errno value otherwise and rte_errno is set. 2009 */ 2010 static int 2011 mlx5_args_check(const char *key, const char *val, void *opaque) 2012 { 2013 struct mlx5_dev_config *config = opaque; 2014 unsigned long mod; 2015 signed long tmp; 2016 2017 /* No-op, port representors are processed in mlx5_dev_spawn(). */ 2018 if (!strcmp(MLX5_DRIVER_KEY, key) || !strcmp(MLX5_REPRESENTOR, key)) 2019 return 0; 2020 errno = 0; 2021 tmp = strtol(val, NULL, 0); 2022 if (errno) { 2023 rte_errno = errno; 2024 DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val); 2025 return -rte_errno; 2026 } 2027 if (tmp < 0 && strcmp(MLX5_TX_PP, key) && strcmp(MLX5_TX_SKEW, key)) { 2028 /* Negative values are acceptable for some keys only. */ 2029 rte_errno = EINVAL; 2030 DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val); 2031 return -rte_errno; 2032 } 2033 mod = tmp >= 0 ? tmp : -tmp; 2034 if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { 2035 if (tmp > MLX5_CQE_RESP_FORMAT_L34H_STRIDX) { 2036 DRV_LOG(ERR, "invalid CQE compression " 2037 "format parameter"); 2038 rte_errno = EINVAL; 2039 return -rte_errno; 2040 } 2041 config->cqe_comp = !!tmp; 2042 config->cqe_comp_fmt = tmp; 2043 } else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) { 2044 config->hw_padding = !!tmp; 2045 } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) { 2046 config->mprq.enabled = !!tmp; 2047 } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) { 2048 config->mprq.stride_num_n = tmp; 2049 } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_SIZE, key) == 0) { 2050 config->mprq.stride_size_n = tmp; 2051 } else if (strcmp(MLX5_RX_MPRQ_MAX_MEMCPY_LEN, key) == 0) { 2052 config->mprq.max_memcpy_len = tmp; 2053 } else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) { 2054 config->mprq.min_rxqs_num = tmp; 2055 } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) { 2056 DRV_LOG(WARNING, "%s: deprecated parameter," 2057 " converted to txq_inline_max", key); 2058 config->txq_inline_max = tmp; 2059 } else if (strcmp(MLX5_TXQ_INLINE_MAX, key) == 0) { 2060 config->txq_inline_max = tmp; 2061 } else if (strcmp(MLX5_TXQ_INLINE_MIN, key) == 0) { 2062 config->txq_inline_min = tmp; 2063 } else if (strcmp(MLX5_TXQ_INLINE_MPW, key) == 0) { 2064 config->txq_inline_mpw = tmp; 2065 } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { 2066 config->txqs_inline = tmp; 2067 } else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) { 2068 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 2069 } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { 2070 config->mps = !!tmp; 2071 } else if (strcmp(MLX5_TX_DB_NC, key) == 0) { 2072 if (tmp != MLX5_TXDB_CACHED && 2073 tmp != MLX5_TXDB_NCACHED && 2074 tmp != MLX5_TXDB_HEURISTIC) { 2075 DRV_LOG(ERR, "invalid Tx doorbell " 2076 "mapping parameter"); 2077 rte_errno = EINVAL; 2078 return -rte_errno; 2079 } 2080 config->dbnc = tmp; 2081 } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) { 2082 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 2083 } else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) { 2084 DRV_LOG(WARNING, "%s: deprecated parameter," 2085 " converted to txq_inline_mpw", key); 2086 config->txq_inline_mpw = tmp; 2087 } else if (strcmp(MLX5_TX_VEC_EN, key) == 0) { 2088 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 2089 } else if (strcmp(MLX5_TX_PP, key) == 0) { 2090 if (!mod) { 2091 DRV_LOG(ERR, "Zero Tx packet pacing parameter"); 2092 rte_errno = EINVAL; 2093 return -rte_errno; 2094 } 2095 config->tx_pp = tmp; 2096 } else if (strcmp(MLX5_TX_SKEW, key) == 0) { 2097 config->tx_skew = tmp; 2098 } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) { 2099 config->rx_vec_en = !!tmp; 2100 } else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) { 2101 config->l3_vxlan_en = !!tmp; 2102 } else if (strcmp(MLX5_VF_NL_EN, key) == 0) { 2103 config->vf_nl_en = !!tmp; 2104 } else if (strcmp(MLX5_DV_ESW_EN, key) == 0) { 2105 config->dv_esw_en = !!tmp; 2106 } else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) { 2107 config->dv_flow_en = !!tmp; 2108 } else if (strcmp(MLX5_DV_XMETA_EN, key) == 0) { 2109 if (tmp != MLX5_XMETA_MODE_LEGACY && 2110 tmp != MLX5_XMETA_MODE_META16 && 2111 tmp != MLX5_XMETA_MODE_META32 && 2112 tmp != MLX5_XMETA_MODE_MISS_INFO) { 2113 DRV_LOG(ERR, "invalid extensive " 2114 "metadata parameter"); 2115 rte_errno = EINVAL; 2116 return -rte_errno; 2117 } 2118 if (tmp != MLX5_XMETA_MODE_MISS_INFO) 2119 config->dv_xmeta_en = tmp; 2120 else 2121 config->dv_miss_info = 1; 2122 } else if (strcmp(MLX5_LACP_BY_USER, key) == 0) { 2123 config->lacp_by_user = !!tmp; 2124 } else if (strcmp(MLX5_MR_EXT_MEMSEG_EN, key) == 0) { 2125 config->mr_ext_memseg_en = !!tmp; 2126 } else if (strcmp(MLX5_MAX_DUMP_FILES_NUM, key) == 0) { 2127 config->max_dump_files_num = tmp; 2128 } else if (strcmp(MLX5_LRO_TIMEOUT_USEC, key) == 0) { 2129 config->lro.timeout = tmp; 2130 } else if (strcmp(RTE_DEVARGS_KEY_CLASS, key) == 0) { 2131 DRV_LOG(DEBUG, "class argument is %s.", val); 2132 } else if (strcmp(MLX5_HP_BUF_SIZE, key) == 0) { 2133 config->log_hp_size = tmp; 2134 } else if (strcmp(MLX5_RECLAIM_MEM, key) == 0) { 2135 if (tmp != MLX5_RCM_NONE && 2136 tmp != MLX5_RCM_LIGHT && 2137 tmp != MLX5_RCM_AGGR) { 2138 DRV_LOG(ERR, "Unrecognize %s: \"%s\"", key, val); 2139 rte_errno = EINVAL; 2140 return -rte_errno; 2141 } 2142 config->reclaim_mode = tmp; 2143 } else if (strcmp(MLX5_SYS_MEM_EN, key) == 0) { 2144 config->sys_mem_en = !!tmp; 2145 } else if (strcmp(MLX5_DECAP_EN, key) == 0) { 2146 config->decap_en = !!tmp; 2147 } else if (strcmp(MLX5_ALLOW_DUPLICATE_PATTERN, key) == 0) { 2148 config->allow_duplicate_pattern = !!tmp; 2149 } else if (strcmp(MLX5_MR_MEMPOOL_REG_EN, key) == 0) { 2150 config->mr_mempool_reg_en = !!tmp; 2151 } else { 2152 DRV_LOG(WARNING, "%s: unknown parameter", key); 2153 rte_errno = EINVAL; 2154 return -rte_errno; 2155 } 2156 return 0; 2157 } 2158 2159 /** 2160 * Parse device parameters. 2161 * 2162 * @param config 2163 * Pointer to device configuration structure. 2164 * @param devargs 2165 * Device arguments structure. 2166 * 2167 * @return 2168 * 0 on success, a negative errno value otherwise and rte_errno is set. 2169 */ 2170 int 2171 mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) 2172 { 2173 const char **params = (const char *[]){ 2174 MLX5_DRIVER_KEY, 2175 MLX5_RXQ_CQE_COMP_EN, 2176 MLX5_RXQ_PKT_PAD_EN, 2177 MLX5_RX_MPRQ_EN, 2178 MLX5_RX_MPRQ_LOG_STRIDE_NUM, 2179 MLX5_RX_MPRQ_LOG_STRIDE_SIZE, 2180 MLX5_RX_MPRQ_MAX_MEMCPY_LEN, 2181 MLX5_RXQS_MIN_MPRQ, 2182 MLX5_TXQ_INLINE, 2183 MLX5_TXQ_INLINE_MIN, 2184 MLX5_TXQ_INLINE_MAX, 2185 MLX5_TXQ_INLINE_MPW, 2186 MLX5_TXQS_MIN_INLINE, 2187 MLX5_TXQS_MAX_VEC, 2188 MLX5_TXQ_MPW_EN, 2189 MLX5_TXQ_MPW_HDR_DSEG_EN, 2190 MLX5_TXQ_MAX_INLINE_LEN, 2191 MLX5_TX_DB_NC, 2192 MLX5_TX_PP, 2193 MLX5_TX_SKEW, 2194 MLX5_TX_VEC_EN, 2195 MLX5_RX_VEC_EN, 2196 MLX5_L3_VXLAN_EN, 2197 MLX5_VF_NL_EN, 2198 MLX5_DV_ESW_EN, 2199 MLX5_DV_FLOW_EN, 2200 MLX5_DV_XMETA_EN, 2201 MLX5_LACP_BY_USER, 2202 MLX5_MR_EXT_MEMSEG_EN, 2203 MLX5_REPRESENTOR, 2204 MLX5_MAX_DUMP_FILES_NUM, 2205 MLX5_LRO_TIMEOUT_USEC, 2206 RTE_DEVARGS_KEY_CLASS, 2207 MLX5_HP_BUF_SIZE, 2208 MLX5_RECLAIM_MEM, 2209 MLX5_SYS_MEM_EN, 2210 MLX5_DECAP_EN, 2211 MLX5_ALLOW_DUPLICATE_PATTERN, 2212 MLX5_MR_MEMPOOL_REG_EN, 2213 NULL, 2214 }; 2215 struct rte_kvargs *kvlist; 2216 int ret = 0; 2217 int i; 2218 2219 if (devargs == NULL) 2220 return 0; 2221 /* Following UGLY cast is done to pass checkpatch. */ 2222 kvlist = rte_kvargs_parse(devargs->args, params); 2223 if (kvlist == NULL) { 2224 rte_errno = EINVAL; 2225 return -rte_errno; 2226 } 2227 /* Process parameters. */ 2228 for (i = 0; (params[i] != NULL); ++i) { 2229 if (rte_kvargs_count(kvlist, params[i])) { 2230 ret = rte_kvargs_process(kvlist, params[i], 2231 mlx5_args_check, config); 2232 if (ret) { 2233 rte_errno = EINVAL; 2234 rte_kvargs_free(kvlist); 2235 return -rte_errno; 2236 } 2237 } 2238 } 2239 rte_kvargs_free(kvlist); 2240 return 0; 2241 } 2242 2243 /** 2244 * Configures the minimal amount of data to inline into WQE 2245 * while sending packets. 2246 * 2247 * - the txq_inline_min has the maximal priority, if this 2248 * key is specified in devargs 2249 * - if DevX is enabled the inline mode is queried from the 2250 * device (HCA attributes and NIC vport context if needed). 2251 * - otherwise L2 mode (18 bytes) is assumed for ConnectX-4/4 Lx 2252 * and none (0 bytes) for other NICs 2253 * 2254 * @param spawn 2255 * Verbs device parameters (name, port, switch_info) to spawn. 2256 * @param config 2257 * Device configuration parameters. 2258 */ 2259 void 2260 mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn, 2261 struct mlx5_dev_config *config) 2262 { 2263 if (config->txq_inline_min != MLX5_ARG_UNSET) { 2264 /* Application defines size of inlined data explicitly. */ 2265 if (spawn->pci_dev != NULL) { 2266 switch (spawn->pci_dev->id.device_id) { 2267 case PCI_DEVICE_ID_MELLANOX_CONNECTX4: 2268 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 2269 if (config->txq_inline_min < 2270 (int)MLX5_INLINE_HSIZE_L2) { 2271 DRV_LOG(DEBUG, 2272 "txq_inline_mix aligned to minimal ConnectX-4 required value %d", 2273 (int)MLX5_INLINE_HSIZE_L2); 2274 config->txq_inline_min = 2275 MLX5_INLINE_HSIZE_L2; 2276 } 2277 break; 2278 } 2279 } 2280 goto exit; 2281 } 2282 if (config->hca_attr.eth_net_offloads) { 2283 /* We have DevX enabled, inline mode queried successfully. */ 2284 switch (config->hca_attr.wqe_inline_mode) { 2285 case MLX5_CAP_INLINE_MODE_L2: 2286 /* outer L2 header must be inlined. */ 2287 config->txq_inline_min = MLX5_INLINE_HSIZE_L2; 2288 goto exit; 2289 case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: 2290 /* No inline data are required by NIC. */ 2291 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2292 config->hw_vlan_insert = 2293 config->hca_attr.wqe_vlan_insert; 2294 DRV_LOG(DEBUG, "Tx VLAN insertion is supported"); 2295 goto exit; 2296 case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: 2297 /* inline mode is defined by NIC vport context. */ 2298 if (!config->hca_attr.eth_virt) 2299 break; 2300 switch (config->hca_attr.vport_inline_mode) { 2301 case MLX5_INLINE_MODE_NONE: 2302 config->txq_inline_min = 2303 MLX5_INLINE_HSIZE_NONE; 2304 goto exit; 2305 case MLX5_INLINE_MODE_L2: 2306 config->txq_inline_min = 2307 MLX5_INLINE_HSIZE_L2; 2308 goto exit; 2309 case MLX5_INLINE_MODE_IP: 2310 config->txq_inline_min = 2311 MLX5_INLINE_HSIZE_L3; 2312 goto exit; 2313 case MLX5_INLINE_MODE_TCP_UDP: 2314 config->txq_inline_min = 2315 MLX5_INLINE_HSIZE_L4; 2316 goto exit; 2317 case MLX5_INLINE_MODE_INNER_L2: 2318 config->txq_inline_min = 2319 MLX5_INLINE_HSIZE_INNER_L2; 2320 goto exit; 2321 case MLX5_INLINE_MODE_INNER_IP: 2322 config->txq_inline_min = 2323 MLX5_INLINE_HSIZE_INNER_L3; 2324 goto exit; 2325 case MLX5_INLINE_MODE_INNER_TCP_UDP: 2326 config->txq_inline_min = 2327 MLX5_INLINE_HSIZE_INNER_L4; 2328 goto exit; 2329 } 2330 } 2331 } 2332 if (spawn->pci_dev == NULL) { 2333 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2334 goto exit; 2335 } 2336 /* 2337 * We get here if we are unable to deduce 2338 * inline data size with DevX. Try PCI ID 2339 * to determine old NICs. 2340 */ 2341 switch (spawn->pci_dev->id.device_id) { 2342 case PCI_DEVICE_ID_MELLANOX_CONNECTX4: 2343 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 2344 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX: 2345 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF: 2346 config->txq_inline_min = MLX5_INLINE_HSIZE_L2; 2347 config->hw_vlan_insert = 0; 2348 break; 2349 case PCI_DEVICE_ID_MELLANOX_CONNECTX5: 2350 case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: 2351 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX: 2352 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: 2353 /* 2354 * These NICs support VLAN insertion from WQE and 2355 * report the wqe_vlan_insert flag. But there is the bug 2356 * and PFC control may be broken, so disable feature. 2357 */ 2358 config->hw_vlan_insert = 0; 2359 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2360 break; 2361 default: 2362 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2363 break; 2364 } 2365 exit: 2366 DRV_LOG(DEBUG, "min tx inline configured: %d", config->txq_inline_min); 2367 } 2368 2369 /** 2370 * Configures the metadata mask fields in the shared context. 2371 * 2372 * @param [in] dev 2373 * Pointer to Ethernet device. 2374 */ 2375 void 2376 mlx5_set_metadata_mask(struct rte_eth_dev *dev) 2377 { 2378 struct mlx5_priv *priv = dev->data->dev_private; 2379 struct mlx5_dev_ctx_shared *sh = priv->sh; 2380 uint32_t meta, mark, reg_c0; 2381 2382 reg_c0 = ~priv->vport_meta_mask; 2383 switch (priv->config.dv_xmeta_en) { 2384 case MLX5_XMETA_MODE_LEGACY: 2385 meta = UINT32_MAX; 2386 mark = MLX5_FLOW_MARK_MASK; 2387 break; 2388 case MLX5_XMETA_MODE_META16: 2389 meta = reg_c0 >> rte_bsf32(reg_c0); 2390 mark = MLX5_FLOW_MARK_MASK; 2391 break; 2392 case MLX5_XMETA_MODE_META32: 2393 meta = UINT32_MAX; 2394 mark = (reg_c0 >> rte_bsf32(reg_c0)) & MLX5_FLOW_MARK_MASK; 2395 break; 2396 default: 2397 meta = 0; 2398 mark = 0; 2399 MLX5_ASSERT(false); 2400 break; 2401 } 2402 if (sh->dv_mark_mask && sh->dv_mark_mask != mark) 2403 DRV_LOG(WARNING, "metadata MARK mask mismatche %08X:%08X", 2404 sh->dv_mark_mask, mark); 2405 else 2406 sh->dv_mark_mask = mark; 2407 if (sh->dv_meta_mask && sh->dv_meta_mask != meta) 2408 DRV_LOG(WARNING, "metadata META mask mismatche %08X:%08X", 2409 sh->dv_meta_mask, meta); 2410 else 2411 sh->dv_meta_mask = meta; 2412 if (sh->dv_regc0_mask && sh->dv_regc0_mask != reg_c0) 2413 DRV_LOG(WARNING, "metadata reg_c0 mask mismatche %08X:%08X", 2414 sh->dv_meta_mask, reg_c0); 2415 else 2416 sh->dv_regc0_mask = reg_c0; 2417 DRV_LOG(DEBUG, "metadata mode %u", priv->config.dv_xmeta_en); 2418 DRV_LOG(DEBUG, "metadata MARK mask %08X", sh->dv_mark_mask); 2419 DRV_LOG(DEBUG, "metadata META mask %08X", sh->dv_meta_mask); 2420 DRV_LOG(DEBUG, "metadata reg_c0 mask %08X", sh->dv_regc0_mask); 2421 } 2422 2423 int 2424 rte_pmd_mlx5_get_dyn_flag_names(char *names[], unsigned int n) 2425 { 2426 static const char *const dynf_names[] = { 2427 RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, 2428 RTE_MBUF_DYNFLAG_METADATA_NAME, 2429 RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME 2430 }; 2431 unsigned int i; 2432 2433 if (n < RTE_DIM(dynf_names)) 2434 return -ENOMEM; 2435 for (i = 0; i < RTE_DIM(dynf_names); i++) { 2436 if (names[i] == NULL) 2437 return -EINVAL; 2438 strcpy(names[i], dynf_names[i]); 2439 } 2440 return RTE_DIM(dynf_names); 2441 } 2442 2443 /** 2444 * Comparison callback to sort device data. 2445 * 2446 * This is meant to be used with qsort(). 2447 * 2448 * @param a[in] 2449 * Pointer to pointer to first data object. 2450 * @param b[in] 2451 * Pointer to pointer to second data object. 2452 * 2453 * @return 2454 * 0 if both objects are equal, less than 0 if the first argument is less 2455 * than the second, greater than 0 otherwise. 2456 */ 2457 int 2458 mlx5_dev_check_sibling_config(struct mlx5_priv *priv, 2459 struct mlx5_dev_config *config, 2460 struct rte_device *dpdk_dev) 2461 { 2462 struct mlx5_dev_ctx_shared *sh = priv->sh; 2463 struct mlx5_dev_config *sh_conf = NULL; 2464 uint16_t port_id; 2465 2466 MLX5_ASSERT(sh); 2467 /* Nothing to compare for the single/first device. */ 2468 if (sh->refcnt == 1) 2469 return 0; 2470 /* Find the device with shared context. */ 2471 MLX5_ETH_FOREACH_DEV(port_id, dpdk_dev) { 2472 struct mlx5_priv *opriv = 2473 rte_eth_devices[port_id].data->dev_private; 2474 2475 if (opriv && opriv != priv && opriv->sh == sh) { 2476 sh_conf = &opriv->config; 2477 break; 2478 } 2479 } 2480 if (!sh_conf) 2481 return 0; 2482 if (sh_conf->dv_flow_en ^ config->dv_flow_en) { 2483 DRV_LOG(ERR, "\"dv_flow_en\" configuration mismatch" 2484 " for shared %s context", sh->ibdev_name); 2485 rte_errno = EINVAL; 2486 return rte_errno; 2487 } 2488 if (sh_conf->dv_xmeta_en ^ config->dv_xmeta_en) { 2489 DRV_LOG(ERR, "\"dv_xmeta_en\" configuration mismatch" 2490 " for shared %s context", sh->ibdev_name); 2491 rte_errno = EINVAL; 2492 return rte_errno; 2493 } 2494 return 0; 2495 } 2496 2497 /** 2498 * Look for the ethernet device belonging to mlx5 driver. 2499 * 2500 * @param[in] port_id 2501 * port_id to start looking for device. 2502 * @param[in] odev 2503 * Pointer to the hint device. When device is being probed 2504 * the its siblings (master and preceding representors might 2505 * not have assigned driver yet (because the mlx5_os_pci_probe() 2506 * is not completed yet, for this case match on hint 2507 * device may be used to detect sibling device. 2508 * 2509 * @return 2510 * port_id of found device, RTE_MAX_ETHPORT if not found. 2511 */ 2512 uint16_t 2513 mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev) 2514 { 2515 while (port_id < RTE_MAX_ETHPORTS) { 2516 struct rte_eth_dev *dev = &rte_eth_devices[port_id]; 2517 2518 if (dev->state != RTE_ETH_DEV_UNUSED && 2519 dev->device && 2520 (dev->device == odev || 2521 (dev->device->driver && 2522 dev->device->driver->name && 2523 ((strcmp(dev->device->driver->name, 2524 MLX5_PCI_DRIVER_NAME) == 0) || 2525 (strcmp(dev->device->driver->name, 2526 MLX5_AUXILIARY_DRIVER_NAME) == 0))))) 2527 break; 2528 port_id++; 2529 } 2530 if (port_id >= RTE_MAX_ETHPORTS) 2531 return RTE_MAX_ETHPORTS; 2532 return port_id; 2533 } 2534 2535 /** 2536 * Callback to remove a device. 2537 * 2538 * This function removes all Ethernet devices belong to a given device. 2539 * 2540 * @param[in] dev 2541 * Pointer to the generic device. 2542 * 2543 * @return 2544 * 0 on success, the function cannot fail. 2545 */ 2546 int 2547 mlx5_net_remove(struct rte_device *dev) 2548 { 2549 uint16_t port_id; 2550 int ret = 0; 2551 2552 RTE_ETH_FOREACH_DEV_OF(port_id, dev) { 2553 /* 2554 * mlx5_dev_close() is not registered to secondary process, 2555 * call the close function explicitly for secondary process. 2556 */ 2557 if (rte_eal_process_type() == RTE_PROC_SECONDARY) 2558 ret |= mlx5_dev_close(&rte_eth_devices[port_id]); 2559 else 2560 ret |= rte_eth_dev_close(port_id); 2561 } 2562 return ret == 0 ? 0 : -EIO; 2563 } 2564 2565 static const struct rte_pci_id mlx5_pci_id_map[] = { 2566 { 2567 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2568 PCI_DEVICE_ID_MELLANOX_CONNECTX4) 2569 }, 2570 { 2571 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2572 PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) 2573 }, 2574 { 2575 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2576 PCI_DEVICE_ID_MELLANOX_CONNECTX4LX) 2577 }, 2578 { 2579 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2580 PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) 2581 }, 2582 { 2583 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2584 PCI_DEVICE_ID_MELLANOX_CONNECTX5) 2585 }, 2586 { 2587 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2588 PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) 2589 }, 2590 { 2591 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2592 PCI_DEVICE_ID_MELLANOX_CONNECTX5EX) 2593 }, 2594 { 2595 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2596 PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF) 2597 }, 2598 { 2599 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2600 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) 2601 }, 2602 { 2603 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2604 PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF) 2605 }, 2606 { 2607 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2608 PCI_DEVICE_ID_MELLANOX_CONNECTX6) 2609 }, 2610 { 2611 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2612 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF) 2613 }, 2614 { 2615 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2616 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX) 2617 }, 2618 { 2619 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2620 PCI_DEVICE_ID_MELLANOX_CONNECTXVF) 2621 }, 2622 { 2623 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2624 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF) 2625 }, 2626 { 2627 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2628 PCI_DEVICE_ID_MELLANOX_CONNECTX6LX) 2629 }, 2630 { 2631 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2632 PCI_DEVICE_ID_MELLANOX_CONNECTX7) 2633 }, 2634 { 2635 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2636 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF) 2637 }, 2638 { 2639 .vendor_id = 0 2640 } 2641 }; 2642 2643 static struct mlx5_class_driver mlx5_net_driver = { 2644 .drv_class = MLX5_CLASS_ETH, 2645 .name = RTE_STR(MLX5_ETH_DRIVER_NAME), 2646 .id_table = mlx5_pci_id_map, 2647 .probe = mlx5_os_net_probe, 2648 .remove = mlx5_net_remove, 2649 .dma_map = mlx5_net_dma_map, 2650 .dma_unmap = mlx5_net_dma_unmap, 2651 .probe_again = 1, 2652 .intr_lsc = 1, 2653 .intr_rmv = 1, 2654 }; 2655 2656 /* Initialize driver log type. */ 2657 RTE_LOG_REGISTER_DEFAULT(mlx5_logtype, NOTICE) 2658 2659 /** 2660 * Driver initialization routine. 2661 */ 2662 RTE_INIT(rte_mlx5_pmd_init) 2663 { 2664 pthread_mutex_init(&mlx5_dev_ctx_list_mutex, NULL); 2665 mlx5_common_init(); 2666 /* Build the static tables for Verbs conversion. */ 2667 mlx5_set_ptype_table(); 2668 mlx5_set_cksum_table(); 2669 mlx5_set_swp_types_table(); 2670 if (mlx5_glue) 2671 mlx5_class_driver_register(&mlx5_net_driver); 2672 } 2673 2674 RTE_PMD_EXPORT_NAME(MLX5_ETH_DRIVER_NAME, __COUNTER__); 2675 RTE_PMD_REGISTER_PCI_TABLE(MLX5_ETH_DRIVER_NAME, mlx5_pci_id_map); 2676 RTE_PMD_REGISTER_KMOD_DEP(MLX5_ETH_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib"); 2677