1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <unistd.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <errno.h> 12 13 #include <rte_malloc.h> 14 #include <ethdev_driver.h> 15 #include <rte_pci.h> 16 #include <rte_bus_pci.h> 17 #include <rte_common.h> 18 #include <rte_kvargs.h> 19 #include <rte_rwlock.h> 20 #include <rte_spinlock.h> 21 #include <rte_string_fns.h> 22 #include <rte_alarm.h> 23 #include <rte_cycles.h> 24 25 #include <mlx5_glue.h> 26 #include <mlx5_devx_cmds.h> 27 #include <mlx5_common.h> 28 #include <mlx5_common_os.h> 29 #include <mlx5_common_mp.h> 30 #include <mlx5_malloc.h> 31 32 #include "mlx5_defs.h" 33 #include "mlx5.h" 34 #include "mlx5_utils.h" 35 #include "mlx5_rxtx.h" 36 #include "mlx5_rx.h" 37 #include "mlx5_tx.h" 38 #include "mlx5_autoconf.h" 39 #include "mlx5_mr.h" 40 #include "mlx5_flow.h" 41 #include "mlx5_flow_os.h" 42 #include "rte_pmd_mlx5.h" 43 44 #define MLX5_ETH_DRIVER_NAME mlx5_eth 45 46 /* Driver type key for new device global syntax. */ 47 #define MLX5_DRIVER_KEY "driver" 48 49 /* Device parameter to enable RX completion queue compression. */ 50 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en" 51 52 /* Device parameter to enable padding Rx packet to cacheline size. */ 53 #define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en" 54 55 /* Device parameter to enable Multi-Packet Rx queue. */ 56 #define MLX5_RX_MPRQ_EN "mprq_en" 57 58 /* Device parameter to configure log 2 of the number of strides for MPRQ. */ 59 #define MLX5_RX_MPRQ_LOG_STRIDE_NUM "mprq_log_stride_num" 60 61 /* Device parameter to configure log 2 of the stride size for MPRQ. */ 62 #define MLX5_RX_MPRQ_LOG_STRIDE_SIZE "mprq_log_stride_size" 63 64 /* Device parameter to limit the size of memcpy'd packet for MPRQ. */ 65 #define MLX5_RX_MPRQ_MAX_MEMCPY_LEN "mprq_max_memcpy_len" 66 67 /* Device parameter to set the minimum number of Rx queues to enable MPRQ. */ 68 #define MLX5_RXQS_MIN_MPRQ "rxqs_min_mprq" 69 70 /* Device parameter to configure inline send. Deprecated, ignored.*/ 71 #define MLX5_TXQ_INLINE "txq_inline" 72 73 /* Device parameter to limit packet size to inline with ordinary SEND. */ 74 #define MLX5_TXQ_INLINE_MAX "txq_inline_max" 75 76 /* Device parameter to configure minimal data size to inline. */ 77 #define MLX5_TXQ_INLINE_MIN "txq_inline_min" 78 79 /* Device parameter to limit packet size to inline with Enhanced MPW. */ 80 #define MLX5_TXQ_INLINE_MPW "txq_inline_mpw" 81 82 /* 83 * Device parameter to configure the number of TX queues threshold for 84 * enabling inline send. 85 */ 86 #define MLX5_TXQS_MIN_INLINE "txqs_min_inline" 87 88 /* 89 * Device parameter to configure the number of TX queues threshold for 90 * enabling vectorized Tx, deprecated, ignored (no vectorized Tx routines). 91 */ 92 #define MLX5_TXQS_MAX_VEC "txqs_max_vec" 93 94 /* Device parameter to enable multi-packet send WQEs. */ 95 #define MLX5_TXQ_MPW_EN "txq_mpw_en" 96 97 /* 98 * Device parameter to force doorbell register mapping 99 * to non-cahed region eliminating the extra write memory barrier. 100 */ 101 #define MLX5_TX_DB_NC "tx_db_nc" 102 103 /* 104 * Device parameter to include 2 dsegs in the title WQEBB. 105 * Deprecated, ignored. 106 */ 107 #define MLX5_TXQ_MPW_HDR_DSEG_EN "txq_mpw_hdr_dseg_en" 108 109 /* 110 * Device parameter to limit the size of inlining packet. 111 * Deprecated, ignored. 112 */ 113 #define MLX5_TXQ_MAX_INLINE_LEN "txq_max_inline_len" 114 115 /* 116 * Device parameter to enable Tx scheduling on timestamps 117 * and specify the packet pacing granularity in nanoseconds. 118 */ 119 #define MLX5_TX_PP "tx_pp" 120 121 /* 122 * Device parameter to specify skew in nanoseconds on Tx datapath, 123 * it represents the time between SQ start WQE processing and 124 * appearing actual packet data on the wire. 125 */ 126 #define MLX5_TX_SKEW "tx_skew" 127 128 /* 129 * Device parameter to enable hardware Tx vector. 130 * Deprecated, ignored (no vectorized Tx routines anymore). 131 */ 132 #define MLX5_TX_VEC_EN "tx_vec_en" 133 134 /* Device parameter to enable hardware Rx vector. */ 135 #define MLX5_RX_VEC_EN "rx_vec_en" 136 137 /* Allow L3 VXLAN flow creation. */ 138 #define MLX5_L3_VXLAN_EN "l3_vxlan_en" 139 140 /* Activate DV E-Switch flow steering. */ 141 #define MLX5_DV_ESW_EN "dv_esw_en" 142 143 /* Activate DV flow steering. */ 144 #define MLX5_DV_FLOW_EN "dv_flow_en" 145 146 /* Enable extensive flow metadata support. */ 147 #define MLX5_DV_XMETA_EN "dv_xmeta_en" 148 149 /* Device parameter to let the user manage the lacp traffic of bonded device */ 150 #define MLX5_LACP_BY_USER "lacp_by_user" 151 152 /* Activate Netlink support in VF mode. */ 153 #define MLX5_VF_NL_EN "vf_nl_en" 154 155 /* Enable extending memsegs when creating a MR. */ 156 #define MLX5_MR_EXT_MEMSEG_EN "mr_ext_memseg_en" 157 158 /* Select port representors to instantiate. */ 159 #define MLX5_REPRESENTOR "representor" 160 161 /* Device parameter to configure the maximum number of dump files per queue. */ 162 #define MLX5_MAX_DUMP_FILES_NUM "max_dump_files_num" 163 164 /* Configure timeout of LRO session (in microseconds). */ 165 #define MLX5_LRO_TIMEOUT_USEC "lro_timeout_usec" 166 167 /* 168 * Device parameter to configure the total data buffer size for a single 169 * hairpin queue (logarithm value). 170 */ 171 #define MLX5_HP_BUF_SIZE "hp_buf_log_sz" 172 173 /* Flow memory reclaim mode. */ 174 #define MLX5_RECLAIM_MEM "reclaim_mem_mode" 175 176 /* The default memory allocator used in PMD. */ 177 #define MLX5_SYS_MEM_EN "sys_mem_en" 178 /* Decap will be used or not. */ 179 #define MLX5_DECAP_EN "decap_en" 180 181 /* Device parameter to configure allow or prevent duplicate rules pattern. */ 182 #define MLX5_ALLOW_DUPLICATE_PATTERN "allow_duplicate_pattern" 183 184 /* Shared memory between primary and secondary processes. */ 185 struct mlx5_shared_data *mlx5_shared_data; 186 187 /** Driver-specific log messages type. */ 188 int mlx5_logtype; 189 190 static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = 191 LIST_HEAD_INITIALIZER(); 192 static pthread_mutex_t mlx5_dev_ctx_list_mutex; 193 static const struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = { 194 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H) 195 [MLX5_IPOOL_DECAP_ENCAP] = { 196 .size = sizeof(struct mlx5_flow_dv_encap_decap_resource), 197 .trunk_size = 64, 198 .grow_trunk = 3, 199 .grow_shift = 2, 200 .need_lock = 1, 201 .release_mem_en = 1, 202 .malloc = mlx5_malloc, 203 .free = mlx5_free, 204 .type = "mlx5_encap_decap_ipool", 205 }, 206 [MLX5_IPOOL_PUSH_VLAN] = { 207 .size = sizeof(struct mlx5_flow_dv_push_vlan_action_resource), 208 .trunk_size = 64, 209 .grow_trunk = 3, 210 .grow_shift = 2, 211 .need_lock = 1, 212 .release_mem_en = 1, 213 .malloc = mlx5_malloc, 214 .free = mlx5_free, 215 .type = "mlx5_push_vlan_ipool", 216 }, 217 [MLX5_IPOOL_TAG] = { 218 .size = sizeof(struct mlx5_flow_dv_tag_resource), 219 .trunk_size = 64, 220 .grow_trunk = 3, 221 .grow_shift = 2, 222 .need_lock = 1, 223 .release_mem_en = 0, 224 .per_core_cache = (1 << 16), 225 .malloc = mlx5_malloc, 226 .free = mlx5_free, 227 .type = "mlx5_tag_ipool", 228 }, 229 [MLX5_IPOOL_PORT_ID] = { 230 .size = sizeof(struct mlx5_flow_dv_port_id_action_resource), 231 .trunk_size = 64, 232 .grow_trunk = 3, 233 .grow_shift = 2, 234 .need_lock = 1, 235 .release_mem_en = 1, 236 .malloc = mlx5_malloc, 237 .free = mlx5_free, 238 .type = "mlx5_port_id_ipool", 239 }, 240 [MLX5_IPOOL_JUMP] = { 241 .size = sizeof(struct mlx5_flow_tbl_data_entry), 242 .trunk_size = 64, 243 .grow_trunk = 3, 244 .grow_shift = 2, 245 .need_lock = 1, 246 .release_mem_en = 1, 247 .malloc = mlx5_malloc, 248 .free = mlx5_free, 249 .type = "mlx5_jump_ipool", 250 }, 251 [MLX5_IPOOL_SAMPLE] = { 252 .size = sizeof(struct mlx5_flow_dv_sample_resource), 253 .trunk_size = 64, 254 .grow_trunk = 3, 255 .grow_shift = 2, 256 .need_lock = 1, 257 .release_mem_en = 1, 258 .malloc = mlx5_malloc, 259 .free = mlx5_free, 260 .type = "mlx5_sample_ipool", 261 }, 262 [MLX5_IPOOL_DEST_ARRAY] = { 263 .size = sizeof(struct mlx5_flow_dv_dest_array_resource), 264 .trunk_size = 64, 265 .grow_trunk = 3, 266 .grow_shift = 2, 267 .need_lock = 1, 268 .release_mem_en = 1, 269 .malloc = mlx5_malloc, 270 .free = mlx5_free, 271 .type = "mlx5_dest_array_ipool", 272 }, 273 [MLX5_IPOOL_TUNNEL_ID] = { 274 .size = sizeof(struct mlx5_flow_tunnel), 275 .trunk_size = MLX5_MAX_TUNNELS, 276 .need_lock = 1, 277 .release_mem_en = 1, 278 .type = "mlx5_tunnel_offload", 279 }, 280 [MLX5_IPOOL_TNL_TBL_ID] = { 281 .size = 0, 282 .need_lock = 1, 283 .type = "mlx5_flow_tnl_tbl_ipool", 284 }, 285 #endif 286 [MLX5_IPOOL_MTR] = { 287 /** 288 * The ipool index should grow continually from small to big, 289 * for meter idx, so not set grow_trunk to avoid meter index 290 * not jump continually. 291 */ 292 .size = sizeof(struct mlx5_legacy_flow_meter), 293 .trunk_size = 64, 294 .need_lock = 1, 295 .release_mem_en = 1, 296 .malloc = mlx5_malloc, 297 .free = mlx5_free, 298 .type = "mlx5_meter_ipool", 299 }, 300 [MLX5_IPOOL_MCP] = { 301 .size = sizeof(struct mlx5_flow_mreg_copy_resource), 302 .trunk_size = 64, 303 .grow_trunk = 3, 304 .grow_shift = 2, 305 .need_lock = 1, 306 .release_mem_en = 1, 307 .malloc = mlx5_malloc, 308 .free = mlx5_free, 309 .type = "mlx5_mcp_ipool", 310 }, 311 [MLX5_IPOOL_HRXQ] = { 312 .size = (sizeof(struct mlx5_hrxq) + MLX5_RSS_HASH_KEY_LEN), 313 .trunk_size = 64, 314 .grow_trunk = 3, 315 .grow_shift = 2, 316 .need_lock = 1, 317 .release_mem_en = 1, 318 .malloc = mlx5_malloc, 319 .free = mlx5_free, 320 .type = "mlx5_hrxq_ipool", 321 }, 322 [MLX5_IPOOL_MLX5_FLOW] = { 323 /* 324 * MLX5_IPOOL_MLX5_FLOW size varies for DV and VERBS flows. 325 * It set in run time according to PCI function configuration. 326 */ 327 .size = 0, 328 .trunk_size = 64, 329 .grow_trunk = 3, 330 .grow_shift = 2, 331 .need_lock = 1, 332 .release_mem_en = 0, 333 .per_core_cache = 1 << 19, 334 .malloc = mlx5_malloc, 335 .free = mlx5_free, 336 .type = "mlx5_flow_handle_ipool", 337 }, 338 [MLX5_IPOOL_RTE_FLOW] = { 339 .size = sizeof(struct rte_flow), 340 .trunk_size = 4096, 341 .need_lock = 1, 342 .release_mem_en = 1, 343 .malloc = mlx5_malloc, 344 .free = mlx5_free, 345 .type = "rte_flow_ipool", 346 }, 347 [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID] = { 348 .size = 0, 349 .need_lock = 1, 350 .type = "mlx5_flow_rss_id_ipool", 351 }, 352 [MLX5_IPOOL_RSS_SHARED_ACTIONS] = { 353 .size = sizeof(struct mlx5_shared_action_rss), 354 .trunk_size = 64, 355 .grow_trunk = 3, 356 .grow_shift = 2, 357 .need_lock = 1, 358 .release_mem_en = 1, 359 .malloc = mlx5_malloc, 360 .free = mlx5_free, 361 .type = "mlx5_shared_action_rss", 362 }, 363 [MLX5_IPOOL_MTR_POLICY] = { 364 /** 365 * The ipool index should grow continually from small to big, 366 * for policy idx, so not set grow_trunk to avoid policy index 367 * not jump continually. 368 */ 369 .size = sizeof(struct mlx5_flow_meter_sub_policy), 370 .trunk_size = 64, 371 .need_lock = 1, 372 .release_mem_en = 1, 373 .malloc = mlx5_malloc, 374 .free = mlx5_free, 375 .type = "mlx5_meter_policy_ipool", 376 }, 377 }; 378 379 380 #define MLX5_FLOW_MIN_ID_POOL_SIZE 512 381 #define MLX5_ID_GENERATION_ARRAY_FACTOR 16 382 383 #define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 1024 384 385 /** 386 * Decide whether representor ID is a HPF(host PF) port on BF2. 387 * 388 * @param dev 389 * Pointer to Ethernet device structure. 390 * 391 * @return 392 * Non-zero if HPF, otherwise 0. 393 */ 394 bool 395 mlx5_is_hpf(struct rte_eth_dev *dev) 396 { 397 struct mlx5_priv *priv = dev->data->dev_private; 398 uint16_t repr = MLX5_REPRESENTOR_REPR(priv->representor_id); 399 int type = MLX5_REPRESENTOR_TYPE(priv->representor_id); 400 401 return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_VF && 402 MLX5_REPRESENTOR_REPR(-1) == repr; 403 } 404 405 /** 406 * Decide whether representor ID is a SF port representor. 407 * 408 * @param dev 409 * Pointer to Ethernet device structure. 410 * 411 * @return 412 * Non-zero if HPF, otherwise 0. 413 */ 414 bool 415 mlx5_is_sf_repr(struct rte_eth_dev *dev) 416 { 417 struct mlx5_priv *priv = dev->data->dev_private; 418 int type = MLX5_REPRESENTOR_TYPE(priv->representor_id); 419 420 return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_SF; 421 } 422 423 /** 424 * Initialize the ASO aging management structure. 425 * 426 * @param[in] sh 427 * Pointer to mlx5_dev_ctx_shared object to free 428 * 429 * @return 430 * 0 on success, a negative errno value otherwise and rte_errno is set. 431 */ 432 int 433 mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh) 434 { 435 int err; 436 437 if (sh->aso_age_mng) 438 return 0; 439 sh->aso_age_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->aso_age_mng), 440 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 441 if (!sh->aso_age_mng) { 442 DRV_LOG(ERR, "aso_age_mng allocation was failed."); 443 rte_errno = ENOMEM; 444 return -ENOMEM; 445 } 446 err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_FLOW_HIT); 447 if (err) { 448 mlx5_free(sh->aso_age_mng); 449 return -1; 450 } 451 rte_spinlock_init(&sh->aso_age_mng->resize_sl); 452 rte_spinlock_init(&sh->aso_age_mng->free_sl); 453 LIST_INIT(&sh->aso_age_mng->free); 454 return 0; 455 } 456 457 /** 458 * Close and release all the resources of the ASO aging management structure. 459 * 460 * @param[in] sh 461 * Pointer to mlx5_dev_ctx_shared object to free. 462 */ 463 static void 464 mlx5_flow_aso_age_mng_close(struct mlx5_dev_ctx_shared *sh) 465 { 466 int i, j; 467 468 mlx5_aso_flow_hit_queue_poll_stop(sh); 469 mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_FLOW_HIT); 470 if (sh->aso_age_mng->pools) { 471 struct mlx5_aso_age_pool *pool; 472 473 for (i = 0; i < sh->aso_age_mng->next; ++i) { 474 pool = sh->aso_age_mng->pools[i]; 475 claim_zero(mlx5_devx_cmd_destroy 476 (pool->flow_hit_aso_obj)); 477 for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) 478 if (pool->actions[j].dr_action) 479 claim_zero 480 (mlx5_flow_os_destroy_flow_action 481 (pool->actions[j].dr_action)); 482 mlx5_free(pool); 483 } 484 mlx5_free(sh->aso_age_mng->pools); 485 } 486 mlx5_free(sh->aso_age_mng); 487 } 488 489 /** 490 * Initialize the shared aging list information per port. 491 * 492 * @param[in] sh 493 * Pointer to mlx5_dev_ctx_shared object. 494 */ 495 static void 496 mlx5_flow_aging_init(struct mlx5_dev_ctx_shared *sh) 497 { 498 uint32_t i; 499 struct mlx5_age_info *age_info; 500 501 for (i = 0; i < sh->max_port; i++) { 502 age_info = &sh->port[i].age_info; 503 age_info->flags = 0; 504 TAILQ_INIT(&age_info->aged_counters); 505 LIST_INIT(&age_info->aged_aso); 506 rte_spinlock_init(&age_info->aged_sl); 507 MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER); 508 } 509 } 510 511 /** 512 * Initialize the counters management structure. 513 * 514 * @param[in] sh 515 * Pointer to mlx5_dev_ctx_shared object to free 516 */ 517 static void 518 mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh) 519 { 520 int i; 521 522 memset(&sh->cmng, 0, sizeof(sh->cmng)); 523 TAILQ_INIT(&sh->cmng.flow_counters); 524 sh->cmng.min_id = MLX5_CNT_BATCH_OFFSET; 525 sh->cmng.max_id = -1; 526 sh->cmng.last_pool_idx = POOL_IDX_INVALID; 527 rte_spinlock_init(&sh->cmng.pool_update_sl); 528 for (i = 0; i < MLX5_COUNTER_TYPE_MAX; i++) { 529 TAILQ_INIT(&sh->cmng.counters[i]); 530 rte_spinlock_init(&sh->cmng.csl[i]); 531 } 532 } 533 534 /** 535 * Destroy all the resources allocated for a counter memory management. 536 * 537 * @param[in] mng 538 * Pointer to the memory management structure. 539 */ 540 static void 541 mlx5_flow_destroy_counter_stat_mem_mng(struct mlx5_counter_stats_mem_mng *mng) 542 { 543 uint8_t *mem = (uint8_t *)(uintptr_t)mng->raws[0].data; 544 545 LIST_REMOVE(mng, next); 546 claim_zero(mlx5_devx_cmd_destroy(mng->dm)); 547 claim_zero(mlx5_os_umem_dereg(mng->umem)); 548 mlx5_free(mem); 549 } 550 551 /** 552 * Close and release all the resources of the counters management. 553 * 554 * @param[in] sh 555 * Pointer to mlx5_dev_ctx_shared object to free. 556 */ 557 static void 558 mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh) 559 { 560 struct mlx5_counter_stats_mem_mng *mng; 561 int i, j; 562 int retries = 1024; 563 564 rte_errno = 0; 565 while (--retries) { 566 rte_eal_alarm_cancel(mlx5_flow_query_alarm, sh); 567 if (rte_errno != EINPROGRESS) 568 break; 569 rte_pause(); 570 } 571 572 if (sh->cmng.pools) { 573 struct mlx5_flow_counter_pool *pool; 574 uint16_t n_valid = sh->cmng.n_valid; 575 bool fallback = sh->cmng.counter_fallback; 576 577 for (i = 0; i < n_valid; ++i) { 578 pool = sh->cmng.pools[i]; 579 if (!fallback && pool->min_dcs) 580 claim_zero(mlx5_devx_cmd_destroy 581 (pool->min_dcs)); 582 for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) { 583 struct mlx5_flow_counter *cnt = 584 MLX5_POOL_GET_CNT(pool, j); 585 586 if (cnt->action) 587 claim_zero 588 (mlx5_flow_os_destroy_flow_action 589 (cnt->action)); 590 if (fallback && MLX5_POOL_GET_CNT 591 (pool, j)->dcs_when_free) 592 claim_zero(mlx5_devx_cmd_destroy 593 (cnt->dcs_when_free)); 594 } 595 mlx5_free(pool); 596 } 597 mlx5_free(sh->cmng.pools); 598 } 599 mng = LIST_FIRST(&sh->cmng.mem_mngs); 600 while (mng) { 601 mlx5_flow_destroy_counter_stat_mem_mng(mng); 602 mng = LIST_FIRST(&sh->cmng.mem_mngs); 603 } 604 memset(&sh->cmng, 0, sizeof(sh->cmng)); 605 } 606 607 /** 608 * Initialize the aso flow meters management structure. 609 * 610 * @param[in] sh 611 * Pointer to mlx5_dev_ctx_shared object to free 612 */ 613 int 614 mlx5_aso_flow_mtrs_mng_init(struct mlx5_dev_ctx_shared *sh) 615 { 616 if (!sh->mtrmng) { 617 sh->mtrmng = mlx5_malloc(MLX5_MEM_ZERO, 618 sizeof(*sh->mtrmng), 619 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 620 if (!sh->mtrmng) { 621 DRV_LOG(ERR, 622 "meter management allocation was failed."); 623 rte_errno = ENOMEM; 624 return -ENOMEM; 625 } 626 if (sh->meter_aso_en) { 627 rte_spinlock_init(&sh->mtrmng->pools_mng.mtrsl); 628 LIST_INIT(&sh->mtrmng->pools_mng.meters); 629 } 630 sh->mtrmng->def_policy_id = MLX5_INVALID_POLICY_ID; 631 } 632 return 0; 633 } 634 635 /** 636 * Close and release all the resources of 637 * the ASO flow meter management structure. 638 * 639 * @param[in] sh 640 * Pointer to mlx5_dev_ctx_shared object to free. 641 */ 642 static void 643 mlx5_aso_flow_mtrs_mng_close(struct mlx5_dev_ctx_shared *sh) 644 { 645 struct mlx5_aso_mtr_pool *mtr_pool; 646 struct mlx5_flow_mtr_mng *mtrmng = sh->mtrmng; 647 uint32_t idx; 648 #ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO 649 struct mlx5_aso_mtr *aso_mtr; 650 int i; 651 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */ 652 653 if (sh->meter_aso_en) { 654 mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_POLICER); 655 idx = mtrmng->pools_mng.n_valid; 656 while (idx--) { 657 mtr_pool = mtrmng->pools_mng.pools[idx]; 658 #ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO 659 for (i = 0; i < MLX5_ASO_MTRS_PER_POOL; i++) { 660 aso_mtr = &mtr_pool->mtrs[i]; 661 if (aso_mtr->fm.meter_action) 662 claim_zero 663 (mlx5_glue->destroy_flow_action 664 (aso_mtr->fm.meter_action)); 665 } 666 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */ 667 claim_zero(mlx5_devx_cmd_destroy 668 (mtr_pool->devx_obj)); 669 mtrmng->pools_mng.n_valid--; 670 mlx5_free(mtr_pool); 671 } 672 mlx5_free(sh->mtrmng->pools_mng.pools); 673 } 674 mlx5_free(sh->mtrmng); 675 sh->mtrmng = NULL; 676 } 677 678 /* Send FLOW_AGED event if needed. */ 679 void 680 mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh) 681 { 682 struct mlx5_age_info *age_info; 683 uint32_t i; 684 685 for (i = 0; i < sh->max_port; i++) { 686 age_info = &sh->port[i].age_info; 687 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) 688 continue; 689 MLX5_AGE_UNSET(age_info, MLX5_AGE_EVENT_NEW); 690 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) { 691 MLX5_AGE_UNSET(age_info, MLX5_AGE_TRIGGER); 692 rte_eth_dev_callback_process 693 (&rte_eth_devices[sh->port[i].devx_ih_port_id], 694 RTE_ETH_EVENT_FLOW_AGED, NULL); 695 } 696 } 697 } 698 699 /* 700 * Initialize the ASO connection tracking structure. 701 * 702 * @param[in] sh 703 * Pointer to mlx5_dev_ctx_shared object. 704 * 705 * @return 706 * 0 on success, a negative errno value otherwise and rte_errno is set. 707 */ 708 int 709 mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh) 710 { 711 int err; 712 713 if (sh->ct_mng) 714 return 0; 715 sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng), 716 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 717 if (!sh->ct_mng) { 718 DRV_LOG(ERR, "ASO CT management allocation failed."); 719 rte_errno = ENOMEM; 720 return -rte_errno; 721 } 722 err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_CONNECTION_TRACKING); 723 if (err) { 724 mlx5_free(sh->ct_mng); 725 /* rte_errno should be extracted from the failure. */ 726 rte_errno = EINVAL; 727 return -rte_errno; 728 } 729 rte_spinlock_init(&sh->ct_mng->ct_sl); 730 rte_rwlock_init(&sh->ct_mng->resize_rwl); 731 LIST_INIT(&sh->ct_mng->free_cts); 732 return 0; 733 } 734 735 /* 736 * Close and release all the resources of the 737 * ASO connection tracking management structure. 738 * 739 * @param[in] sh 740 * Pointer to mlx5_dev_ctx_shared object to free. 741 */ 742 static void 743 mlx5_flow_aso_ct_mng_close(struct mlx5_dev_ctx_shared *sh) 744 { 745 struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng; 746 struct mlx5_aso_ct_pool *ct_pool; 747 struct mlx5_aso_ct_action *ct; 748 uint32_t idx; 749 uint32_t val; 750 uint32_t cnt; 751 int i; 752 753 mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_CONNECTION_TRACKING); 754 idx = mng->next; 755 while (idx--) { 756 cnt = 0; 757 ct_pool = mng->pools[idx]; 758 for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) { 759 ct = &ct_pool->actions[i]; 760 val = __atomic_fetch_sub(&ct->refcnt, 1, 761 __ATOMIC_RELAXED); 762 MLX5_ASSERT(val == 1); 763 if (val > 1) 764 cnt++; 765 #ifdef HAVE_MLX5_DR_ACTION_ASO_CT 766 if (ct->dr_action_orig) 767 claim_zero(mlx5_glue->destroy_flow_action 768 (ct->dr_action_orig)); 769 if (ct->dr_action_rply) 770 claim_zero(mlx5_glue->destroy_flow_action 771 (ct->dr_action_rply)); 772 #endif 773 } 774 claim_zero(mlx5_devx_cmd_destroy(ct_pool->devx_obj)); 775 if (cnt) { 776 DRV_LOG(DEBUG, "%u ASO CT objects are being used in the pool %u", 777 cnt, i); 778 } 779 mlx5_free(ct_pool); 780 /* in case of failure. */ 781 mng->next--; 782 } 783 mlx5_free(mng->pools); 784 mlx5_free(mng); 785 /* Management structure must be cleared to 0s during allocation. */ 786 sh->ct_mng = NULL; 787 } 788 789 /** 790 * Initialize the flow resources' indexed mempool. 791 * 792 * @param[in] sh 793 * Pointer to mlx5_dev_ctx_shared object. 794 * @param[in] config 795 * Pointer to user dev config. 796 */ 797 static void 798 mlx5_flow_ipool_create(struct mlx5_dev_ctx_shared *sh, 799 const struct mlx5_dev_config *config) 800 { 801 uint8_t i; 802 struct mlx5_indexed_pool_config cfg; 803 804 for (i = 0; i < MLX5_IPOOL_MAX; ++i) { 805 cfg = mlx5_ipool_cfg[i]; 806 switch (i) { 807 default: 808 break; 809 /* 810 * Set MLX5_IPOOL_MLX5_FLOW ipool size 811 * according to PCI function flow configuration. 812 */ 813 case MLX5_IPOOL_MLX5_FLOW: 814 cfg.size = config->dv_flow_en ? 815 sizeof(struct mlx5_flow_handle) : 816 MLX5_FLOW_HANDLE_VERBS_SIZE; 817 break; 818 } 819 if (config->reclaim_mode) { 820 cfg.release_mem_en = 1; 821 cfg.per_core_cache = 0; 822 } else { 823 cfg.release_mem_en = 0; 824 } 825 sh->ipool[i] = mlx5_ipool_create(&cfg); 826 } 827 } 828 829 830 /** 831 * Release the flow resources' indexed mempool. 832 * 833 * @param[in] sh 834 * Pointer to mlx5_dev_ctx_shared object. 835 */ 836 static void 837 mlx5_flow_ipool_destroy(struct mlx5_dev_ctx_shared *sh) 838 { 839 uint8_t i; 840 841 for (i = 0; i < MLX5_IPOOL_MAX; ++i) 842 mlx5_ipool_destroy(sh->ipool[i]); 843 for (i = 0; i < MLX5_MAX_MODIFY_NUM; ++i) 844 if (sh->mdh_ipools[i]) 845 mlx5_ipool_destroy(sh->mdh_ipools[i]); 846 } 847 848 /* 849 * Check if dynamic flex parser for eCPRI already exists. 850 * 851 * @param dev 852 * Pointer to Ethernet device structure. 853 * 854 * @return 855 * true on exists, false on not. 856 */ 857 bool 858 mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev) 859 { 860 struct mlx5_priv *priv = dev->data->dev_private; 861 struct mlx5_flex_parser_profiles *prf = 862 &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; 863 864 return !!prf->obj; 865 } 866 867 /* 868 * Allocation of a flex parser for eCPRI. Once created, this parser related 869 * resources will be held until the device is closed. 870 * 871 * @param dev 872 * Pointer to Ethernet device structure. 873 * 874 * @return 875 * 0 on success, a negative errno value otherwise and rte_errno is set. 876 */ 877 int 878 mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev) 879 { 880 struct mlx5_priv *priv = dev->data->dev_private; 881 struct mlx5_flex_parser_profiles *prf = 882 &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; 883 struct mlx5_devx_graph_node_attr node = { 884 .modify_field_select = 0, 885 }; 886 uint32_t ids[8]; 887 int ret; 888 889 if (!priv->config.hca_attr.parse_graph_flex_node) { 890 DRV_LOG(ERR, "Dynamic flex parser is not supported " 891 "for device %s.", priv->dev_data->name); 892 return -ENOTSUP; 893 } 894 node.header_length_mode = MLX5_GRAPH_NODE_LEN_FIXED; 895 /* 8 bytes now: 4B common header + 4B message body header. */ 896 node.header_length_base_value = 0x8; 897 /* After MAC layer: Ether / VLAN. */ 898 node.in[0].arc_parse_graph_node = MLX5_GRAPH_ARC_NODE_MAC; 899 /* Type of compared condition should be 0xAEFE in the L2 layer. */ 900 node.in[0].compare_condition_value = RTE_ETHER_TYPE_ECPRI; 901 /* Sample #0: type in common header. */ 902 node.sample[0].flow_match_sample_en = 1; 903 /* Fixed offset. */ 904 node.sample[0].flow_match_sample_offset_mode = 0x0; 905 /* Only the 2nd byte will be used. */ 906 node.sample[0].flow_match_sample_field_base_offset = 0x0; 907 /* Sample #1: message payload. */ 908 node.sample[1].flow_match_sample_en = 1; 909 /* Fixed offset. */ 910 node.sample[1].flow_match_sample_offset_mode = 0x0; 911 /* 912 * Only the first two bytes will be used right now, and its offset will 913 * start after the common header that with the length of a DW(u32). 914 */ 915 node.sample[1].flow_match_sample_field_base_offset = sizeof(uint32_t); 916 prf->obj = mlx5_devx_cmd_create_flex_parser(priv->sh->ctx, &node); 917 if (!prf->obj) { 918 DRV_LOG(ERR, "Failed to create flex parser node object."); 919 return (rte_errno == 0) ? -ENODEV : -rte_errno; 920 } 921 prf->num = 2; 922 ret = mlx5_devx_cmd_query_parse_samples(prf->obj, ids, prf->num); 923 if (ret) { 924 DRV_LOG(ERR, "Failed to query sample IDs."); 925 return (rte_errno == 0) ? -ENODEV : -rte_errno; 926 } 927 prf->offset[0] = 0x0; 928 prf->offset[1] = sizeof(uint32_t); 929 prf->ids[0] = ids[0]; 930 prf->ids[1] = ids[1]; 931 return 0; 932 } 933 934 /* 935 * Destroy the flex parser node, including the parser itself, input / output 936 * arcs and DW samples. Resources could be reused then. 937 * 938 * @param dev 939 * Pointer to Ethernet device structure. 940 */ 941 static void 942 mlx5_flex_parser_ecpri_release(struct rte_eth_dev *dev) 943 { 944 struct mlx5_priv *priv = dev->data->dev_private; 945 struct mlx5_flex_parser_profiles *prf = 946 &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; 947 948 if (prf->obj) 949 mlx5_devx_cmd_destroy(prf->obj); 950 prf->obj = NULL; 951 } 952 953 /* 954 * Allocate Rx and Tx UARs in robust fashion. 955 * This routine handles the following UAR allocation issues: 956 * 957 * - tries to allocate the UAR with the most appropriate memory 958 * mapping type from the ones supported by the host 959 * 960 * - tries to allocate the UAR with non-NULL base address 961 * OFED 5.0.x and Upstream rdma_core before v29 returned the NULL as 962 * UAR base address if UAR was not the first object in the UAR page. 963 * It caused the PMD failure and we should try to get another UAR 964 * till we get the first one with non-NULL base address returned. 965 */ 966 static int 967 mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh, 968 const struct mlx5_dev_config *config) 969 { 970 uint32_t uar_mapping, retry; 971 int err = 0; 972 void *base_addr; 973 974 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 975 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 976 /* Control the mapping type according to the settings. */ 977 uar_mapping = (config->dbnc == MLX5_TXDB_NCACHED) ? 978 MLX5DV_UAR_ALLOC_TYPE_NC : 979 MLX5DV_UAR_ALLOC_TYPE_BF; 980 #else 981 RTE_SET_USED(config); 982 /* 983 * It seems we have no way to control the memory mapping type 984 * for the UAR, the default "Write-Combining" type is supposed. 985 * The UAR initialization on queue creation queries the 986 * actual mapping type done by Verbs/kernel and setups the 987 * PMD datapath accordingly. 988 */ 989 uar_mapping = 0; 990 #endif 991 sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->ctx, uar_mapping); 992 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 993 if (!sh->tx_uar && 994 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 995 if (config->dbnc == MLX5_TXDB_CACHED || 996 config->dbnc == MLX5_TXDB_HEURISTIC) 997 DRV_LOG(WARNING, "Devarg tx_db_nc setting " 998 "is not supported by DevX"); 999 /* 1000 * In some environments like virtual machine 1001 * the Write Combining mapped might be not supported 1002 * and UAR allocation fails. We try "Non-Cached" 1003 * mapping for the case. The tx_burst routines take 1004 * the UAR mapping type into account on UAR setup 1005 * on queue creation. 1006 */ 1007 DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (BF)"); 1008 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 1009 sh->tx_uar = mlx5_glue->devx_alloc_uar 1010 (sh->ctx, uar_mapping); 1011 } else if (!sh->tx_uar && 1012 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { 1013 if (config->dbnc == MLX5_TXDB_NCACHED) 1014 DRV_LOG(WARNING, "Devarg tx_db_nc settings " 1015 "is not supported by DevX"); 1016 /* 1017 * If Verbs/kernel does not support "Non-Cached" 1018 * try the "Write-Combining". 1019 */ 1020 DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (NC)"); 1021 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; 1022 sh->tx_uar = mlx5_glue->devx_alloc_uar 1023 (sh->ctx, uar_mapping); 1024 } 1025 #endif 1026 if (!sh->tx_uar) { 1027 DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (BF/NC)"); 1028 err = ENOMEM; 1029 goto exit; 1030 } 1031 base_addr = mlx5_os_get_devx_uar_base_addr(sh->tx_uar); 1032 if (base_addr) 1033 break; 1034 /* 1035 * The UARs are allocated by rdma_core within the 1036 * IB device context, on context closure all UARs 1037 * will be freed, should be no memory/object leakage. 1038 */ 1039 DRV_LOG(DEBUG, "Retrying to allocate Tx DevX UAR"); 1040 sh->tx_uar = NULL; 1041 } 1042 /* Check whether we finally succeeded with valid UAR allocation. */ 1043 if (!sh->tx_uar) { 1044 DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (NULL base)"); 1045 err = ENOMEM; 1046 goto exit; 1047 } 1048 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 1049 uar_mapping = 0; 1050 sh->devx_rx_uar = mlx5_glue->devx_alloc_uar 1051 (sh->ctx, uar_mapping); 1052 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 1053 if (!sh->devx_rx_uar && 1054 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 1055 /* 1056 * Rx UAR is used to control interrupts only, 1057 * should be no datapath noticeable impact, 1058 * can try "Non-Cached" mapping safely. 1059 */ 1060 DRV_LOG(DEBUG, "Failed to allocate Rx DevX UAR (BF)"); 1061 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 1062 sh->devx_rx_uar = mlx5_glue->devx_alloc_uar 1063 (sh->ctx, uar_mapping); 1064 } 1065 #endif 1066 if (!sh->devx_rx_uar) { 1067 DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (BF/NC)"); 1068 err = ENOMEM; 1069 goto exit; 1070 } 1071 base_addr = mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar); 1072 if (base_addr) 1073 break; 1074 /* 1075 * The UARs are allocated by rdma_core within the 1076 * IB device context, on context closure all UARs 1077 * will be freed, should be no memory/object leakage. 1078 */ 1079 DRV_LOG(DEBUG, "Retrying to allocate Rx DevX UAR"); 1080 sh->devx_rx_uar = NULL; 1081 } 1082 /* Check whether we finally succeeded with valid UAR allocation. */ 1083 if (!sh->devx_rx_uar) { 1084 DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (NULL base)"); 1085 err = ENOMEM; 1086 } 1087 exit: 1088 return err; 1089 } 1090 1091 /** 1092 * Allocate shared device context. If there is multiport device the 1093 * master and representors will share this context, if there is single 1094 * port dedicated device, the context will be used by only given 1095 * port due to unification. 1096 * 1097 * Routine first searches the context for the specified device name, 1098 * if found the shared context assumed and reference counter is incremented. 1099 * If no context found the new one is created and initialized with specified 1100 * device context and parameters. 1101 * 1102 * @param[in] spawn 1103 * Pointer to the device attributes (name, port, etc). 1104 * @param[in] config 1105 * Pointer to device configuration structure. 1106 * 1107 * @return 1108 * Pointer to mlx5_dev_ctx_shared object on success, 1109 * otherwise NULL and rte_errno is set. 1110 */ 1111 struct mlx5_dev_ctx_shared * 1112 mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, 1113 const struct mlx5_dev_config *config) 1114 { 1115 struct mlx5_dev_ctx_shared *sh; 1116 int err = 0; 1117 uint32_t i; 1118 struct mlx5_devx_tis_attr tis_attr = { 0 }; 1119 1120 MLX5_ASSERT(spawn); 1121 /* Secondary process should not create the shared context. */ 1122 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 1123 pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); 1124 /* Search for IB context by device name. */ 1125 LIST_FOREACH(sh, &mlx5_dev_ctx_list, next) { 1126 if (!strcmp(sh->ibdev_name, 1127 mlx5_os_get_dev_device_name(spawn->phys_dev))) { 1128 sh->refcnt++; 1129 goto exit; 1130 } 1131 } 1132 /* No device found, we have to create new shared context. */ 1133 MLX5_ASSERT(spawn->max_port); 1134 sh = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, 1135 sizeof(struct mlx5_dev_ctx_shared) + 1136 spawn->max_port * 1137 sizeof(struct mlx5_dev_shared_port), 1138 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 1139 if (!sh) { 1140 DRV_LOG(ERR, "shared context allocation failure"); 1141 rte_errno = ENOMEM; 1142 goto exit; 1143 } 1144 sh->numa_node = spawn->numa_node; 1145 if (spawn->bond_info) 1146 sh->bond = *spawn->bond_info; 1147 err = mlx5_os_open_device(spawn, config, sh); 1148 if (!sh->ctx) 1149 goto error; 1150 err = mlx5_os_get_dev_attr(sh->ctx, &sh->device_attr); 1151 if (err) { 1152 DRV_LOG(DEBUG, "mlx5_os_get_dev_attr() failed"); 1153 goto error; 1154 } 1155 sh->refcnt = 1; 1156 sh->max_port = spawn->max_port; 1157 sh->reclaim_mode = config->reclaim_mode; 1158 strncpy(sh->ibdev_name, mlx5_os_get_ctx_device_name(sh->ctx), 1159 sizeof(sh->ibdev_name) - 1); 1160 strncpy(sh->ibdev_path, mlx5_os_get_ctx_device_path(sh->ctx), 1161 sizeof(sh->ibdev_path) - 1); 1162 /* 1163 * Setting port_id to max unallowed value means 1164 * there is no interrupt subhandler installed for 1165 * the given port index i. 1166 */ 1167 for (i = 0; i < sh->max_port; i++) { 1168 sh->port[i].ih_port_id = RTE_MAX_ETHPORTS; 1169 sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS; 1170 } 1171 sh->pd = mlx5_os_alloc_pd(sh->ctx); 1172 if (sh->pd == NULL) { 1173 DRV_LOG(ERR, "PD allocation failure"); 1174 err = ENOMEM; 1175 goto error; 1176 } 1177 if (sh->devx) { 1178 err = mlx5_os_get_pdn(sh->pd, &sh->pdn); 1179 if (err) { 1180 DRV_LOG(ERR, "Fail to extract pdn from PD"); 1181 goto error; 1182 } 1183 sh->td = mlx5_devx_cmd_create_td(sh->ctx); 1184 if (!sh->td) { 1185 DRV_LOG(ERR, "TD allocation failure"); 1186 err = ENOMEM; 1187 goto error; 1188 } 1189 tis_attr.transport_domain = sh->td->id; 1190 sh->tis = mlx5_devx_cmd_create_tis(sh->ctx, &tis_attr); 1191 if (!sh->tis) { 1192 DRV_LOG(ERR, "TIS allocation failure"); 1193 err = ENOMEM; 1194 goto error; 1195 } 1196 err = mlx5_alloc_rxtx_uars(sh, config); 1197 if (err) 1198 goto error; 1199 MLX5_ASSERT(sh->tx_uar); 1200 MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->tx_uar)); 1201 1202 MLX5_ASSERT(sh->devx_rx_uar); 1203 MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar)); 1204 } 1205 #ifndef RTE_ARCH_64 1206 /* Initialize UAR access locks for 32bit implementations. */ 1207 rte_spinlock_init(&sh->uar_lock_cq); 1208 for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++) 1209 rte_spinlock_init(&sh->uar_lock[i]); 1210 #endif 1211 /* 1212 * Once the device is added to the list of memory event 1213 * callback, its global MR cache table cannot be expanded 1214 * on the fly because of deadlock. If it overflows, lookup 1215 * should be done by searching MR list linearly, which is slow. 1216 * 1217 * At this point the device is not added to the memory 1218 * event list yet, context is just being created. 1219 */ 1220 err = mlx5_mr_btree_init(&sh->share_cache.cache, 1221 MLX5_MR_BTREE_CACHE_N * 2, 1222 sh->numa_node); 1223 if (err) { 1224 err = rte_errno; 1225 goto error; 1226 } 1227 mlx5_os_set_reg_mr_cb(&sh->share_cache.reg_mr_cb, 1228 &sh->share_cache.dereg_mr_cb); 1229 mlx5_os_dev_shared_handler_install(sh); 1230 sh->cnt_id_tbl = mlx5_l3t_create(MLX5_L3T_TYPE_DWORD); 1231 if (!sh->cnt_id_tbl) { 1232 err = rte_errno; 1233 goto error; 1234 } 1235 if (LIST_EMPTY(&mlx5_dev_ctx_list)) { 1236 err = mlx5_flow_os_init_workspace_once(); 1237 if (err) 1238 goto error; 1239 } 1240 mlx5_flow_aging_init(sh); 1241 mlx5_flow_counters_mng_init(sh); 1242 mlx5_flow_ipool_create(sh, config); 1243 /* Add device to memory callback list. */ 1244 rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); 1245 LIST_INSERT_HEAD(&mlx5_shared_data->mem_event_cb_list, 1246 sh, mem_event_cb); 1247 rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); 1248 /* Add context to the global device list. */ 1249 LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next); 1250 rte_spinlock_init(&sh->geneve_tlv_opt_sl); 1251 exit: 1252 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1253 return sh; 1254 error: 1255 pthread_mutex_destroy(&sh->txpp.mutex); 1256 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1257 MLX5_ASSERT(sh); 1258 if (sh->cnt_id_tbl) 1259 mlx5_l3t_destroy(sh->cnt_id_tbl); 1260 if (sh->share_cache.cache.table) 1261 mlx5_mr_btree_free(&sh->share_cache.cache); 1262 if (sh->tis) 1263 claim_zero(mlx5_devx_cmd_destroy(sh->tis)); 1264 if (sh->td) 1265 claim_zero(mlx5_devx_cmd_destroy(sh->td)); 1266 if (sh->devx_rx_uar) 1267 mlx5_glue->devx_free_uar(sh->devx_rx_uar); 1268 if (sh->tx_uar) 1269 mlx5_glue->devx_free_uar(sh->tx_uar); 1270 if (sh->pd) 1271 claim_zero(mlx5_os_dealloc_pd(sh->pd)); 1272 if (sh->ctx) 1273 claim_zero(mlx5_glue->close_device(sh->ctx)); 1274 mlx5_free(sh); 1275 MLX5_ASSERT(err > 0); 1276 rte_errno = err; 1277 return NULL; 1278 } 1279 1280 /** 1281 * Free shared IB device context. Decrement counter and if zero free 1282 * all allocated resources and close handles. 1283 * 1284 * @param[in] sh 1285 * Pointer to mlx5_dev_ctx_shared object to free 1286 */ 1287 void 1288 mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh) 1289 { 1290 pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); 1291 #ifdef RTE_LIBRTE_MLX5_DEBUG 1292 /* Check the object presence in the list. */ 1293 struct mlx5_dev_ctx_shared *lctx; 1294 1295 LIST_FOREACH(lctx, &mlx5_dev_ctx_list, next) 1296 if (lctx == sh) 1297 break; 1298 MLX5_ASSERT(lctx); 1299 if (lctx != sh) { 1300 DRV_LOG(ERR, "Freeing non-existing shared IB context"); 1301 goto exit; 1302 } 1303 #endif 1304 MLX5_ASSERT(sh); 1305 MLX5_ASSERT(sh->refcnt); 1306 /* Secondary process should not free the shared context. */ 1307 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 1308 if (--sh->refcnt) 1309 goto exit; 1310 /* Remove from memory callback device list. */ 1311 rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); 1312 LIST_REMOVE(sh, mem_event_cb); 1313 rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); 1314 /* Release created Memory Regions. */ 1315 mlx5_mr_release_cache(&sh->share_cache); 1316 /* Remove context from the global device list. */ 1317 LIST_REMOVE(sh, next); 1318 /* Release flow workspaces objects on the last device. */ 1319 if (LIST_EMPTY(&mlx5_dev_ctx_list)) 1320 mlx5_flow_os_release_workspace(); 1321 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1322 /* 1323 * Ensure there is no async event handler installed. 1324 * Only primary process handles async device events. 1325 **/ 1326 mlx5_flow_counters_mng_close(sh); 1327 if (sh->aso_age_mng) { 1328 mlx5_flow_aso_age_mng_close(sh); 1329 sh->aso_age_mng = NULL; 1330 } 1331 if (sh->mtrmng) 1332 mlx5_aso_flow_mtrs_mng_close(sh); 1333 mlx5_flow_ipool_destroy(sh); 1334 mlx5_os_dev_shared_handler_uninstall(sh); 1335 if (sh->cnt_id_tbl) { 1336 mlx5_l3t_destroy(sh->cnt_id_tbl); 1337 sh->cnt_id_tbl = NULL; 1338 } 1339 if (sh->tx_uar) { 1340 mlx5_glue->devx_free_uar(sh->tx_uar); 1341 sh->tx_uar = NULL; 1342 } 1343 if (sh->pd) 1344 claim_zero(mlx5_os_dealloc_pd(sh->pd)); 1345 if (sh->tis) 1346 claim_zero(mlx5_devx_cmd_destroy(sh->tis)); 1347 if (sh->td) 1348 claim_zero(mlx5_devx_cmd_destroy(sh->td)); 1349 if (sh->devx_rx_uar) 1350 mlx5_glue->devx_free_uar(sh->devx_rx_uar); 1351 if (sh->ctx) 1352 claim_zero(mlx5_glue->close_device(sh->ctx)); 1353 MLX5_ASSERT(sh->geneve_tlv_option_resource == NULL); 1354 pthread_mutex_destroy(&sh->txpp.mutex); 1355 mlx5_free(sh); 1356 return; 1357 exit: 1358 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1359 } 1360 1361 /** 1362 * Destroy table hash list. 1363 * 1364 * @param[in] priv 1365 * Pointer to the private device data structure. 1366 */ 1367 void 1368 mlx5_free_table_hash_list(struct mlx5_priv *priv) 1369 { 1370 struct mlx5_dev_ctx_shared *sh = priv->sh; 1371 1372 if (!sh->flow_tbls) 1373 return; 1374 mlx5_hlist_destroy(sh->flow_tbls); 1375 sh->flow_tbls = NULL; 1376 } 1377 1378 /** 1379 * Initialize flow table hash list and create the root tables entry 1380 * for each domain. 1381 * 1382 * @param[in] priv 1383 * Pointer to the private device data structure. 1384 * 1385 * @return 1386 * Zero on success, positive error code otherwise. 1387 */ 1388 int 1389 mlx5_alloc_table_hash_list(struct mlx5_priv *priv __rte_unused) 1390 { 1391 int err = 0; 1392 /* Tables are only used in DV and DR modes. */ 1393 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H) 1394 struct mlx5_dev_ctx_shared *sh = priv->sh; 1395 char s[MLX5_NAME_SIZE]; 1396 1397 MLX5_ASSERT(sh); 1398 snprintf(s, sizeof(s), "%s_flow_table", priv->sh->ibdev_name); 1399 sh->flow_tbls = mlx5_hlist_create(s, MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE, 1400 false, true, sh, 1401 flow_dv_tbl_create_cb, 1402 flow_dv_tbl_match_cb, 1403 flow_dv_tbl_remove_cb, 1404 flow_dv_tbl_clone_cb, 1405 flow_dv_tbl_clone_free_cb); 1406 if (!sh->flow_tbls) { 1407 DRV_LOG(ERR, "flow tables with hash creation failed."); 1408 err = ENOMEM; 1409 return err; 1410 } 1411 #ifndef HAVE_MLX5DV_DR 1412 struct rte_flow_error error; 1413 struct rte_eth_dev *dev = &rte_eth_devices[priv->dev_data->port_id]; 1414 1415 /* 1416 * In case we have not DR support, the zero tables should be created 1417 * because DV expect to see them even if they cannot be created by 1418 * RDMA-CORE. 1419 */ 1420 if (!flow_dv_tbl_resource_get(dev, 0, 0, 0, 0, 1421 NULL, 0, 1, 0, &error) || 1422 !flow_dv_tbl_resource_get(dev, 0, 1, 0, 0, 1423 NULL, 0, 1, 0, &error) || 1424 !flow_dv_tbl_resource_get(dev, 0, 0, 1, 0, 1425 NULL, 0, 1, 0, &error)) { 1426 err = ENOMEM; 1427 goto error; 1428 } 1429 return err; 1430 error: 1431 mlx5_free_table_hash_list(priv); 1432 #endif /* HAVE_MLX5DV_DR */ 1433 #endif 1434 return err; 1435 } 1436 1437 /** 1438 * Retrieve integer value from environment variable. 1439 * 1440 * @param[in] name 1441 * Environment variable name. 1442 * 1443 * @return 1444 * Integer value, 0 if the variable is not set. 1445 */ 1446 int 1447 mlx5_getenv_int(const char *name) 1448 { 1449 const char *val = getenv(name); 1450 1451 if (val == NULL) 1452 return 0; 1453 return atoi(val); 1454 } 1455 1456 /** 1457 * DPDK callback to add udp tunnel port 1458 * 1459 * @param[in] dev 1460 * A pointer to eth_dev 1461 * @param[in] udp_tunnel 1462 * A pointer to udp tunnel 1463 * 1464 * @return 1465 * 0 on valid udp ports and tunnels, -ENOTSUP otherwise. 1466 */ 1467 int 1468 mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev __rte_unused, 1469 struct rte_eth_udp_tunnel *udp_tunnel) 1470 { 1471 MLX5_ASSERT(udp_tunnel != NULL); 1472 if (udp_tunnel->prot_type == RTE_TUNNEL_TYPE_VXLAN && 1473 udp_tunnel->udp_port == 4789) 1474 return 0; 1475 if (udp_tunnel->prot_type == RTE_TUNNEL_TYPE_VXLAN_GPE && 1476 udp_tunnel->udp_port == 4790) 1477 return 0; 1478 return -ENOTSUP; 1479 } 1480 1481 /** 1482 * Initialize process private data structure. 1483 * 1484 * @param dev 1485 * Pointer to Ethernet device structure. 1486 * 1487 * @return 1488 * 0 on success, a negative errno value otherwise and rte_errno is set. 1489 */ 1490 int 1491 mlx5_proc_priv_init(struct rte_eth_dev *dev) 1492 { 1493 struct mlx5_priv *priv = dev->data->dev_private; 1494 struct mlx5_proc_priv *ppriv; 1495 size_t ppriv_size; 1496 1497 mlx5_proc_priv_uninit(dev); 1498 /* 1499 * UAR register table follows the process private structure. BlueFlame 1500 * registers for Tx queues are stored in the table. 1501 */ 1502 ppriv_size = 1503 sizeof(struct mlx5_proc_priv) + priv->txqs_n * sizeof(void *); 1504 ppriv = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, ppriv_size, 1505 RTE_CACHE_LINE_SIZE, dev->device->numa_node); 1506 if (!ppriv) { 1507 rte_errno = ENOMEM; 1508 return -rte_errno; 1509 } 1510 ppriv->uar_table_sz = priv->txqs_n; 1511 dev->process_private = ppriv; 1512 return 0; 1513 } 1514 1515 /** 1516 * Un-initialize process private data structure. 1517 * 1518 * @param dev 1519 * Pointer to Ethernet device structure. 1520 */ 1521 void 1522 mlx5_proc_priv_uninit(struct rte_eth_dev *dev) 1523 { 1524 if (!dev->process_private) 1525 return; 1526 mlx5_free(dev->process_private); 1527 dev->process_private = NULL; 1528 } 1529 1530 /** 1531 * DPDK callback to close the device. 1532 * 1533 * Destroy all queues and objects, free memory. 1534 * 1535 * @param dev 1536 * Pointer to Ethernet device structure. 1537 */ 1538 int 1539 mlx5_dev_close(struct rte_eth_dev *dev) 1540 { 1541 struct mlx5_priv *priv = dev->data->dev_private; 1542 unsigned int i; 1543 int ret; 1544 1545 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1546 /* Check if process_private released. */ 1547 if (!dev->process_private) 1548 return 0; 1549 mlx5_tx_uar_uninit_secondary(dev); 1550 mlx5_proc_priv_uninit(dev); 1551 rte_eth_dev_release_port(dev); 1552 return 0; 1553 } 1554 if (!priv->sh) 1555 return 0; 1556 DRV_LOG(DEBUG, "port %u closing device \"%s\"", 1557 dev->data->port_id, 1558 ((priv->sh->ctx != NULL) ? 1559 mlx5_os_get_ctx_device_name(priv->sh->ctx) : "")); 1560 /* 1561 * If default mreg copy action is removed at the stop stage, 1562 * the search will return none and nothing will be done anymore. 1563 */ 1564 mlx5_flow_stop_default(dev); 1565 mlx5_traffic_disable(dev); 1566 /* 1567 * If all the flows are already flushed in the device stop stage, 1568 * then this will return directly without any action. 1569 */ 1570 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true); 1571 mlx5_action_handle_flush(dev); 1572 mlx5_flow_meter_flush(dev, NULL); 1573 /* Prevent crashes when queues are still in use. */ 1574 dev->rx_pkt_burst = removed_rx_burst; 1575 dev->tx_pkt_burst = removed_tx_burst; 1576 rte_wmb(); 1577 /* Disable datapath on secondary process. */ 1578 mlx5_mp_os_req_stop_rxtx(dev); 1579 /* Free the eCPRI flex parser resource. */ 1580 mlx5_flex_parser_ecpri_release(dev); 1581 if (priv->rxqs != NULL) { 1582 /* XXX race condition if mlx5_rx_burst() is still running. */ 1583 rte_delay_us_sleep(1000); 1584 for (i = 0; (i != priv->rxqs_n); ++i) 1585 mlx5_rxq_release(dev, i); 1586 priv->rxqs_n = 0; 1587 priv->rxqs = NULL; 1588 } 1589 if (priv->representor) { 1590 /* Each representor has a dedicated interrupts handler */ 1591 mlx5_free(dev->intr_handle); 1592 dev->intr_handle = NULL; 1593 } 1594 if (priv->txqs != NULL) { 1595 /* XXX race condition if mlx5_tx_burst() is still running. */ 1596 rte_delay_us_sleep(1000); 1597 for (i = 0; (i != priv->txqs_n); ++i) 1598 mlx5_txq_release(dev, i); 1599 priv->txqs_n = 0; 1600 priv->txqs = NULL; 1601 } 1602 mlx5_proc_priv_uninit(dev); 1603 if (priv->q_counters) { 1604 mlx5_devx_cmd_destroy(priv->q_counters); 1605 priv->q_counters = NULL; 1606 } 1607 if (priv->drop_queue.hrxq) 1608 mlx5_drop_action_destroy(dev); 1609 if (priv->mreg_cp_tbl) 1610 mlx5_hlist_destroy(priv->mreg_cp_tbl); 1611 mlx5_mprq_free_mp(dev); 1612 if (priv->sh->ct_mng) 1613 mlx5_flow_aso_ct_mng_close(priv->sh); 1614 mlx5_os_free_shared_dr(priv); 1615 if (priv->rss_conf.rss_key != NULL) 1616 mlx5_free(priv->rss_conf.rss_key); 1617 if (priv->reta_idx != NULL) 1618 mlx5_free(priv->reta_idx); 1619 if (priv->config.vf) 1620 mlx5_os_mac_addr_flush(dev); 1621 if (priv->nl_socket_route >= 0) 1622 close(priv->nl_socket_route); 1623 if (priv->nl_socket_rdma >= 0) 1624 close(priv->nl_socket_rdma); 1625 if (priv->vmwa_context) 1626 mlx5_vlan_vmwa_exit(priv->vmwa_context); 1627 ret = mlx5_hrxq_verify(dev); 1628 if (ret) 1629 DRV_LOG(WARNING, "port %u some hash Rx queue still remain", 1630 dev->data->port_id); 1631 ret = mlx5_ind_table_obj_verify(dev); 1632 if (ret) 1633 DRV_LOG(WARNING, "port %u some indirection table still remain", 1634 dev->data->port_id); 1635 ret = mlx5_rxq_obj_verify(dev); 1636 if (ret) 1637 DRV_LOG(WARNING, "port %u some Rx queue objects still remain", 1638 dev->data->port_id); 1639 ret = mlx5_rxq_verify(dev); 1640 if (ret) 1641 DRV_LOG(WARNING, "port %u some Rx queues still remain", 1642 dev->data->port_id); 1643 ret = mlx5_txq_obj_verify(dev); 1644 if (ret) 1645 DRV_LOG(WARNING, "port %u some Verbs Tx queue still remain", 1646 dev->data->port_id); 1647 ret = mlx5_txq_verify(dev); 1648 if (ret) 1649 DRV_LOG(WARNING, "port %u some Tx queues still remain", 1650 dev->data->port_id); 1651 ret = mlx5_flow_verify(dev); 1652 if (ret) 1653 DRV_LOG(WARNING, "port %u some flows still remain", 1654 dev->data->port_id); 1655 if (priv->hrxqs) 1656 mlx5_list_destroy(priv->hrxqs); 1657 /* 1658 * Free the shared context in last turn, because the cleanup 1659 * routines above may use some shared fields, like 1660 * mlx5_os_mac_addr_flush() uses ibdev_path for retrieveing 1661 * ifindex if Netlink fails. 1662 */ 1663 mlx5_free_shared_dev_ctx(priv->sh); 1664 if (priv->domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) { 1665 unsigned int c = 0; 1666 uint16_t port_id; 1667 1668 MLX5_ETH_FOREACH_DEV(port_id, dev->device) { 1669 struct mlx5_priv *opriv = 1670 rte_eth_devices[port_id].data->dev_private; 1671 1672 if (!opriv || 1673 opriv->domain_id != priv->domain_id || 1674 &rte_eth_devices[port_id] == dev) 1675 continue; 1676 ++c; 1677 break; 1678 } 1679 if (!c) 1680 claim_zero(rte_eth_switch_domain_free(priv->domain_id)); 1681 } 1682 memset(priv, 0, sizeof(*priv)); 1683 priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; 1684 /* 1685 * Reset mac_addrs to NULL such that it is not freed as part of 1686 * rte_eth_dev_release_port(). mac_addrs is part of dev_private so 1687 * it is freed when dev_private is freed. 1688 */ 1689 dev->data->mac_addrs = NULL; 1690 return 0; 1691 } 1692 1693 const struct eth_dev_ops mlx5_dev_ops = { 1694 .dev_configure = mlx5_dev_configure, 1695 .dev_start = mlx5_dev_start, 1696 .dev_stop = mlx5_dev_stop, 1697 .dev_set_link_down = mlx5_set_link_down, 1698 .dev_set_link_up = mlx5_set_link_up, 1699 .dev_close = mlx5_dev_close, 1700 .promiscuous_enable = mlx5_promiscuous_enable, 1701 .promiscuous_disable = mlx5_promiscuous_disable, 1702 .allmulticast_enable = mlx5_allmulticast_enable, 1703 .allmulticast_disable = mlx5_allmulticast_disable, 1704 .link_update = mlx5_link_update, 1705 .stats_get = mlx5_stats_get, 1706 .stats_reset = mlx5_stats_reset, 1707 .xstats_get = mlx5_xstats_get, 1708 .xstats_reset = mlx5_xstats_reset, 1709 .xstats_get_names = mlx5_xstats_get_names, 1710 .fw_version_get = mlx5_fw_version_get, 1711 .dev_infos_get = mlx5_dev_infos_get, 1712 .representor_info_get = mlx5_representor_info_get, 1713 .read_clock = mlx5_txpp_read_clock, 1714 .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, 1715 .vlan_filter_set = mlx5_vlan_filter_set, 1716 .rx_queue_setup = mlx5_rx_queue_setup, 1717 .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, 1718 .tx_queue_setup = mlx5_tx_queue_setup, 1719 .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, 1720 .rx_queue_release = mlx5_rx_queue_release, 1721 .tx_queue_release = mlx5_tx_queue_release, 1722 .rx_queue_start = mlx5_rx_queue_start, 1723 .rx_queue_stop = mlx5_rx_queue_stop, 1724 .tx_queue_start = mlx5_tx_queue_start, 1725 .tx_queue_stop = mlx5_tx_queue_stop, 1726 .flow_ctrl_get = mlx5_dev_get_flow_ctrl, 1727 .flow_ctrl_set = mlx5_dev_set_flow_ctrl, 1728 .mac_addr_remove = mlx5_mac_addr_remove, 1729 .mac_addr_add = mlx5_mac_addr_add, 1730 .mac_addr_set = mlx5_mac_addr_set, 1731 .set_mc_addr_list = mlx5_set_mc_addr_list, 1732 .mtu_set = mlx5_dev_set_mtu, 1733 .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, 1734 .vlan_offload_set = mlx5_vlan_offload_set, 1735 .reta_update = mlx5_dev_rss_reta_update, 1736 .reta_query = mlx5_dev_rss_reta_query, 1737 .rss_hash_update = mlx5_rss_hash_update, 1738 .rss_hash_conf_get = mlx5_rss_hash_conf_get, 1739 .flow_ops_get = mlx5_flow_ops_get, 1740 .rxq_info_get = mlx5_rxq_info_get, 1741 .txq_info_get = mlx5_txq_info_get, 1742 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 1743 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 1744 .rx_queue_intr_enable = mlx5_rx_intr_enable, 1745 .rx_queue_intr_disable = mlx5_rx_intr_disable, 1746 .is_removed = mlx5_is_removed, 1747 .udp_tunnel_port_add = mlx5_udp_tunnel_port_add, 1748 .get_module_info = mlx5_get_module_info, 1749 .get_module_eeprom = mlx5_get_module_eeprom, 1750 .hairpin_cap_get = mlx5_hairpin_cap_get, 1751 .mtr_ops_get = mlx5_flow_meter_ops_get, 1752 .hairpin_bind = mlx5_hairpin_bind, 1753 .hairpin_unbind = mlx5_hairpin_unbind, 1754 .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports, 1755 .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update, 1756 .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, 1757 .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, 1758 .get_monitor_addr = mlx5_get_monitor_addr, 1759 }; 1760 1761 /* Available operations from secondary process. */ 1762 const struct eth_dev_ops mlx5_dev_sec_ops = { 1763 .stats_get = mlx5_stats_get, 1764 .stats_reset = mlx5_stats_reset, 1765 .xstats_get = mlx5_xstats_get, 1766 .xstats_reset = mlx5_xstats_reset, 1767 .xstats_get_names = mlx5_xstats_get_names, 1768 .fw_version_get = mlx5_fw_version_get, 1769 .dev_infos_get = mlx5_dev_infos_get, 1770 .representor_info_get = mlx5_representor_info_get, 1771 .read_clock = mlx5_txpp_read_clock, 1772 .rx_queue_start = mlx5_rx_queue_start, 1773 .rx_queue_stop = mlx5_rx_queue_stop, 1774 .tx_queue_start = mlx5_tx_queue_start, 1775 .tx_queue_stop = mlx5_tx_queue_stop, 1776 .rxq_info_get = mlx5_rxq_info_get, 1777 .txq_info_get = mlx5_txq_info_get, 1778 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 1779 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 1780 .get_module_info = mlx5_get_module_info, 1781 .get_module_eeprom = mlx5_get_module_eeprom, 1782 }; 1783 1784 /* Available operations in flow isolated mode. */ 1785 const struct eth_dev_ops mlx5_dev_ops_isolate = { 1786 .dev_configure = mlx5_dev_configure, 1787 .dev_start = mlx5_dev_start, 1788 .dev_stop = mlx5_dev_stop, 1789 .dev_set_link_down = mlx5_set_link_down, 1790 .dev_set_link_up = mlx5_set_link_up, 1791 .dev_close = mlx5_dev_close, 1792 .promiscuous_enable = mlx5_promiscuous_enable, 1793 .promiscuous_disable = mlx5_promiscuous_disable, 1794 .allmulticast_enable = mlx5_allmulticast_enable, 1795 .allmulticast_disable = mlx5_allmulticast_disable, 1796 .link_update = mlx5_link_update, 1797 .stats_get = mlx5_stats_get, 1798 .stats_reset = mlx5_stats_reset, 1799 .xstats_get = mlx5_xstats_get, 1800 .xstats_reset = mlx5_xstats_reset, 1801 .xstats_get_names = mlx5_xstats_get_names, 1802 .fw_version_get = mlx5_fw_version_get, 1803 .dev_infos_get = mlx5_dev_infos_get, 1804 .representor_info_get = mlx5_representor_info_get, 1805 .read_clock = mlx5_txpp_read_clock, 1806 .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, 1807 .vlan_filter_set = mlx5_vlan_filter_set, 1808 .rx_queue_setup = mlx5_rx_queue_setup, 1809 .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, 1810 .tx_queue_setup = mlx5_tx_queue_setup, 1811 .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, 1812 .rx_queue_release = mlx5_rx_queue_release, 1813 .tx_queue_release = mlx5_tx_queue_release, 1814 .rx_queue_start = mlx5_rx_queue_start, 1815 .rx_queue_stop = mlx5_rx_queue_stop, 1816 .tx_queue_start = mlx5_tx_queue_start, 1817 .tx_queue_stop = mlx5_tx_queue_stop, 1818 .flow_ctrl_get = mlx5_dev_get_flow_ctrl, 1819 .flow_ctrl_set = mlx5_dev_set_flow_ctrl, 1820 .mac_addr_remove = mlx5_mac_addr_remove, 1821 .mac_addr_add = mlx5_mac_addr_add, 1822 .mac_addr_set = mlx5_mac_addr_set, 1823 .set_mc_addr_list = mlx5_set_mc_addr_list, 1824 .mtu_set = mlx5_dev_set_mtu, 1825 .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, 1826 .vlan_offload_set = mlx5_vlan_offload_set, 1827 .flow_ops_get = mlx5_flow_ops_get, 1828 .rxq_info_get = mlx5_rxq_info_get, 1829 .txq_info_get = mlx5_txq_info_get, 1830 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 1831 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 1832 .rx_queue_intr_enable = mlx5_rx_intr_enable, 1833 .rx_queue_intr_disable = mlx5_rx_intr_disable, 1834 .is_removed = mlx5_is_removed, 1835 .get_module_info = mlx5_get_module_info, 1836 .get_module_eeprom = mlx5_get_module_eeprom, 1837 .hairpin_cap_get = mlx5_hairpin_cap_get, 1838 .mtr_ops_get = mlx5_flow_meter_ops_get, 1839 .hairpin_bind = mlx5_hairpin_bind, 1840 .hairpin_unbind = mlx5_hairpin_unbind, 1841 .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports, 1842 .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update, 1843 .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, 1844 .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, 1845 .get_monitor_addr = mlx5_get_monitor_addr, 1846 }; 1847 1848 /** 1849 * Verify and store value for device argument. 1850 * 1851 * @param[in] key 1852 * Key argument to verify. 1853 * @param[in] val 1854 * Value associated with key. 1855 * @param opaque 1856 * User data. 1857 * 1858 * @return 1859 * 0 on success, a negative errno value otherwise and rte_errno is set. 1860 */ 1861 static int 1862 mlx5_args_check(const char *key, const char *val, void *opaque) 1863 { 1864 struct mlx5_dev_config *config = opaque; 1865 unsigned long mod; 1866 signed long tmp; 1867 1868 /* No-op, port representors are processed in mlx5_dev_spawn(). */ 1869 if (!strcmp(MLX5_DRIVER_KEY, key) || !strcmp(MLX5_REPRESENTOR, key)) 1870 return 0; 1871 errno = 0; 1872 tmp = strtol(val, NULL, 0); 1873 if (errno) { 1874 rte_errno = errno; 1875 DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val); 1876 return -rte_errno; 1877 } 1878 if (tmp < 0 && strcmp(MLX5_TX_PP, key) && strcmp(MLX5_TX_SKEW, key)) { 1879 /* Negative values are acceptable for some keys only. */ 1880 rte_errno = EINVAL; 1881 DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val); 1882 return -rte_errno; 1883 } 1884 mod = tmp >= 0 ? tmp : -tmp; 1885 if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { 1886 if (tmp > MLX5_CQE_RESP_FORMAT_L34H_STRIDX) { 1887 DRV_LOG(ERR, "invalid CQE compression " 1888 "format parameter"); 1889 rte_errno = EINVAL; 1890 return -rte_errno; 1891 } 1892 config->cqe_comp = !!tmp; 1893 config->cqe_comp_fmt = tmp; 1894 } else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) { 1895 config->hw_padding = !!tmp; 1896 } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) { 1897 config->mprq.enabled = !!tmp; 1898 } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) { 1899 config->mprq.stride_num_n = tmp; 1900 } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_SIZE, key) == 0) { 1901 config->mprq.stride_size_n = tmp; 1902 } else if (strcmp(MLX5_RX_MPRQ_MAX_MEMCPY_LEN, key) == 0) { 1903 config->mprq.max_memcpy_len = tmp; 1904 } else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) { 1905 config->mprq.min_rxqs_num = tmp; 1906 } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) { 1907 DRV_LOG(WARNING, "%s: deprecated parameter," 1908 " converted to txq_inline_max", key); 1909 config->txq_inline_max = tmp; 1910 } else if (strcmp(MLX5_TXQ_INLINE_MAX, key) == 0) { 1911 config->txq_inline_max = tmp; 1912 } else if (strcmp(MLX5_TXQ_INLINE_MIN, key) == 0) { 1913 config->txq_inline_min = tmp; 1914 } else if (strcmp(MLX5_TXQ_INLINE_MPW, key) == 0) { 1915 config->txq_inline_mpw = tmp; 1916 } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { 1917 config->txqs_inline = tmp; 1918 } else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) { 1919 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 1920 } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { 1921 config->mps = !!tmp; 1922 } else if (strcmp(MLX5_TX_DB_NC, key) == 0) { 1923 if (tmp != MLX5_TXDB_CACHED && 1924 tmp != MLX5_TXDB_NCACHED && 1925 tmp != MLX5_TXDB_HEURISTIC) { 1926 DRV_LOG(ERR, "invalid Tx doorbell " 1927 "mapping parameter"); 1928 rte_errno = EINVAL; 1929 return -rte_errno; 1930 } 1931 config->dbnc = tmp; 1932 } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) { 1933 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 1934 } else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) { 1935 DRV_LOG(WARNING, "%s: deprecated parameter," 1936 " converted to txq_inline_mpw", key); 1937 config->txq_inline_mpw = tmp; 1938 } else if (strcmp(MLX5_TX_VEC_EN, key) == 0) { 1939 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 1940 } else if (strcmp(MLX5_TX_PP, key) == 0) { 1941 if (!mod) { 1942 DRV_LOG(ERR, "Zero Tx packet pacing parameter"); 1943 rte_errno = EINVAL; 1944 return -rte_errno; 1945 } 1946 config->tx_pp = tmp; 1947 } else if (strcmp(MLX5_TX_SKEW, key) == 0) { 1948 config->tx_skew = tmp; 1949 } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) { 1950 config->rx_vec_en = !!tmp; 1951 } else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) { 1952 config->l3_vxlan_en = !!tmp; 1953 } else if (strcmp(MLX5_VF_NL_EN, key) == 0) { 1954 config->vf_nl_en = !!tmp; 1955 } else if (strcmp(MLX5_DV_ESW_EN, key) == 0) { 1956 config->dv_esw_en = !!tmp; 1957 } else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) { 1958 config->dv_flow_en = !!tmp; 1959 } else if (strcmp(MLX5_DV_XMETA_EN, key) == 0) { 1960 if (tmp != MLX5_XMETA_MODE_LEGACY && 1961 tmp != MLX5_XMETA_MODE_META16 && 1962 tmp != MLX5_XMETA_MODE_META32 && 1963 tmp != MLX5_XMETA_MODE_MISS_INFO) { 1964 DRV_LOG(ERR, "invalid extensive " 1965 "metadata parameter"); 1966 rte_errno = EINVAL; 1967 return -rte_errno; 1968 } 1969 if (tmp != MLX5_XMETA_MODE_MISS_INFO) 1970 config->dv_xmeta_en = tmp; 1971 else 1972 config->dv_miss_info = 1; 1973 } else if (strcmp(MLX5_LACP_BY_USER, key) == 0) { 1974 config->lacp_by_user = !!tmp; 1975 } else if (strcmp(MLX5_MR_EXT_MEMSEG_EN, key) == 0) { 1976 config->mr_ext_memseg_en = !!tmp; 1977 } else if (strcmp(MLX5_MAX_DUMP_FILES_NUM, key) == 0) { 1978 config->max_dump_files_num = tmp; 1979 } else if (strcmp(MLX5_LRO_TIMEOUT_USEC, key) == 0) { 1980 config->lro.timeout = tmp; 1981 } else if (strcmp(RTE_DEVARGS_KEY_CLASS, key) == 0) { 1982 DRV_LOG(DEBUG, "class argument is %s.", val); 1983 } else if (strcmp(MLX5_HP_BUF_SIZE, key) == 0) { 1984 config->log_hp_size = tmp; 1985 } else if (strcmp(MLX5_RECLAIM_MEM, key) == 0) { 1986 if (tmp != MLX5_RCM_NONE && 1987 tmp != MLX5_RCM_LIGHT && 1988 tmp != MLX5_RCM_AGGR) { 1989 DRV_LOG(ERR, "Unrecognize %s: \"%s\"", key, val); 1990 rte_errno = EINVAL; 1991 return -rte_errno; 1992 } 1993 config->reclaim_mode = tmp; 1994 } else if (strcmp(MLX5_SYS_MEM_EN, key) == 0) { 1995 config->sys_mem_en = !!tmp; 1996 } else if (strcmp(MLX5_DECAP_EN, key) == 0) { 1997 config->decap_en = !!tmp; 1998 } else if (strcmp(MLX5_ALLOW_DUPLICATE_PATTERN, key) == 0) { 1999 config->allow_duplicate_pattern = !!tmp; 2000 } else { 2001 DRV_LOG(WARNING, "%s: unknown parameter", key); 2002 rte_errno = EINVAL; 2003 return -rte_errno; 2004 } 2005 return 0; 2006 } 2007 2008 /** 2009 * Parse device parameters. 2010 * 2011 * @param config 2012 * Pointer to device configuration structure. 2013 * @param devargs 2014 * Device arguments structure. 2015 * 2016 * @return 2017 * 0 on success, a negative errno value otherwise and rte_errno is set. 2018 */ 2019 int 2020 mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) 2021 { 2022 const char **params = (const char *[]){ 2023 MLX5_DRIVER_KEY, 2024 MLX5_RXQ_CQE_COMP_EN, 2025 MLX5_RXQ_PKT_PAD_EN, 2026 MLX5_RX_MPRQ_EN, 2027 MLX5_RX_MPRQ_LOG_STRIDE_NUM, 2028 MLX5_RX_MPRQ_LOG_STRIDE_SIZE, 2029 MLX5_RX_MPRQ_MAX_MEMCPY_LEN, 2030 MLX5_RXQS_MIN_MPRQ, 2031 MLX5_TXQ_INLINE, 2032 MLX5_TXQ_INLINE_MIN, 2033 MLX5_TXQ_INLINE_MAX, 2034 MLX5_TXQ_INLINE_MPW, 2035 MLX5_TXQS_MIN_INLINE, 2036 MLX5_TXQS_MAX_VEC, 2037 MLX5_TXQ_MPW_EN, 2038 MLX5_TXQ_MPW_HDR_DSEG_EN, 2039 MLX5_TXQ_MAX_INLINE_LEN, 2040 MLX5_TX_DB_NC, 2041 MLX5_TX_PP, 2042 MLX5_TX_SKEW, 2043 MLX5_TX_VEC_EN, 2044 MLX5_RX_VEC_EN, 2045 MLX5_L3_VXLAN_EN, 2046 MLX5_VF_NL_EN, 2047 MLX5_DV_ESW_EN, 2048 MLX5_DV_FLOW_EN, 2049 MLX5_DV_XMETA_EN, 2050 MLX5_LACP_BY_USER, 2051 MLX5_MR_EXT_MEMSEG_EN, 2052 MLX5_REPRESENTOR, 2053 MLX5_MAX_DUMP_FILES_NUM, 2054 MLX5_LRO_TIMEOUT_USEC, 2055 RTE_DEVARGS_KEY_CLASS, 2056 MLX5_HP_BUF_SIZE, 2057 MLX5_RECLAIM_MEM, 2058 MLX5_SYS_MEM_EN, 2059 MLX5_DECAP_EN, 2060 MLX5_ALLOW_DUPLICATE_PATTERN, 2061 NULL, 2062 }; 2063 struct rte_kvargs *kvlist; 2064 int ret = 0; 2065 int i; 2066 2067 if (devargs == NULL) 2068 return 0; 2069 /* Following UGLY cast is done to pass checkpatch. */ 2070 kvlist = rte_kvargs_parse(devargs->args, params); 2071 if (kvlist == NULL) { 2072 rte_errno = EINVAL; 2073 return -rte_errno; 2074 } 2075 /* Process parameters. */ 2076 for (i = 0; (params[i] != NULL); ++i) { 2077 if (rte_kvargs_count(kvlist, params[i])) { 2078 ret = rte_kvargs_process(kvlist, params[i], 2079 mlx5_args_check, config); 2080 if (ret) { 2081 rte_errno = EINVAL; 2082 rte_kvargs_free(kvlist); 2083 return -rte_errno; 2084 } 2085 } 2086 } 2087 rte_kvargs_free(kvlist); 2088 return 0; 2089 } 2090 2091 /** 2092 * Configures the minimal amount of data to inline into WQE 2093 * while sending packets. 2094 * 2095 * - the txq_inline_min has the maximal priority, if this 2096 * key is specified in devargs 2097 * - if DevX is enabled the inline mode is queried from the 2098 * device (HCA attributes and NIC vport context if needed). 2099 * - otherwise L2 mode (18 bytes) is assumed for ConnectX-4/4 Lx 2100 * and none (0 bytes) for other NICs 2101 * 2102 * @param spawn 2103 * Verbs device parameters (name, port, switch_info) to spawn. 2104 * @param config 2105 * Device configuration parameters. 2106 */ 2107 void 2108 mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn, 2109 struct mlx5_dev_config *config) 2110 { 2111 if (config->txq_inline_min != MLX5_ARG_UNSET) { 2112 /* Application defines size of inlined data explicitly. */ 2113 if (spawn->pci_dev != NULL) { 2114 switch (spawn->pci_dev->id.device_id) { 2115 case PCI_DEVICE_ID_MELLANOX_CONNECTX4: 2116 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 2117 if (config->txq_inline_min < 2118 (int)MLX5_INLINE_HSIZE_L2) { 2119 DRV_LOG(DEBUG, 2120 "txq_inline_mix aligned to minimal ConnectX-4 required value %d", 2121 (int)MLX5_INLINE_HSIZE_L2); 2122 config->txq_inline_min = 2123 MLX5_INLINE_HSIZE_L2; 2124 } 2125 break; 2126 } 2127 } 2128 goto exit; 2129 } 2130 if (config->hca_attr.eth_net_offloads) { 2131 /* We have DevX enabled, inline mode queried successfully. */ 2132 switch (config->hca_attr.wqe_inline_mode) { 2133 case MLX5_CAP_INLINE_MODE_L2: 2134 /* outer L2 header must be inlined. */ 2135 config->txq_inline_min = MLX5_INLINE_HSIZE_L2; 2136 goto exit; 2137 case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: 2138 /* No inline data are required by NIC. */ 2139 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2140 config->hw_vlan_insert = 2141 config->hca_attr.wqe_vlan_insert; 2142 DRV_LOG(DEBUG, "Tx VLAN insertion is supported"); 2143 goto exit; 2144 case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: 2145 /* inline mode is defined by NIC vport context. */ 2146 if (!config->hca_attr.eth_virt) 2147 break; 2148 switch (config->hca_attr.vport_inline_mode) { 2149 case MLX5_INLINE_MODE_NONE: 2150 config->txq_inline_min = 2151 MLX5_INLINE_HSIZE_NONE; 2152 goto exit; 2153 case MLX5_INLINE_MODE_L2: 2154 config->txq_inline_min = 2155 MLX5_INLINE_HSIZE_L2; 2156 goto exit; 2157 case MLX5_INLINE_MODE_IP: 2158 config->txq_inline_min = 2159 MLX5_INLINE_HSIZE_L3; 2160 goto exit; 2161 case MLX5_INLINE_MODE_TCP_UDP: 2162 config->txq_inline_min = 2163 MLX5_INLINE_HSIZE_L4; 2164 goto exit; 2165 case MLX5_INLINE_MODE_INNER_L2: 2166 config->txq_inline_min = 2167 MLX5_INLINE_HSIZE_INNER_L2; 2168 goto exit; 2169 case MLX5_INLINE_MODE_INNER_IP: 2170 config->txq_inline_min = 2171 MLX5_INLINE_HSIZE_INNER_L3; 2172 goto exit; 2173 case MLX5_INLINE_MODE_INNER_TCP_UDP: 2174 config->txq_inline_min = 2175 MLX5_INLINE_HSIZE_INNER_L4; 2176 goto exit; 2177 } 2178 } 2179 } 2180 if (spawn->pci_dev == NULL) { 2181 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2182 goto exit; 2183 } 2184 /* 2185 * We get here if we are unable to deduce 2186 * inline data size with DevX. Try PCI ID 2187 * to determine old NICs. 2188 */ 2189 switch (spawn->pci_dev->id.device_id) { 2190 case PCI_DEVICE_ID_MELLANOX_CONNECTX4: 2191 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 2192 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX: 2193 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF: 2194 config->txq_inline_min = MLX5_INLINE_HSIZE_L2; 2195 config->hw_vlan_insert = 0; 2196 break; 2197 case PCI_DEVICE_ID_MELLANOX_CONNECTX5: 2198 case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: 2199 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX: 2200 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: 2201 /* 2202 * These NICs support VLAN insertion from WQE and 2203 * report the wqe_vlan_insert flag. But there is the bug 2204 * and PFC control may be broken, so disable feature. 2205 */ 2206 config->hw_vlan_insert = 0; 2207 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2208 break; 2209 default: 2210 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2211 break; 2212 } 2213 exit: 2214 DRV_LOG(DEBUG, "min tx inline configured: %d", config->txq_inline_min); 2215 } 2216 2217 /** 2218 * Configures the metadata mask fields in the shared context. 2219 * 2220 * @param [in] dev 2221 * Pointer to Ethernet device. 2222 */ 2223 void 2224 mlx5_set_metadata_mask(struct rte_eth_dev *dev) 2225 { 2226 struct mlx5_priv *priv = dev->data->dev_private; 2227 struct mlx5_dev_ctx_shared *sh = priv->sh; 2228 uint32_t meta, mark, reg_c0; 2229 2230 reg_c0 = ~priv->vport_meta_mask; 2231 switch (priv->config.dv_xmeta_en) { 2232 case MLX5_XMETA_MODE_LEGACY: 2233 meta = UINT32_MAX; 2234 mark = MLX5_FLOW_MARK_MASK; 2235 break; 2236 case MLX5_XMETA_MODE_META16: 2237 meta = reg_c0 >> rte_bsf32(reg_c0); 2238 mark = MLX5_FLOW_MARK_MASK; 2239 break; 2240 case MLX5_XMETA_MODE_META32: 2241 meta = UINT32_MAX; 2242 mark = (reg_c0 >> rte_bsf32(reg_c0)) & MLX5_FLOW_MARK_MASK; 2243 break; 2244 default: 2245 meta = 0; 2246 mark = 0; 2247 MLX5_ASSERT(false); 2248 break; 2249 } 2250 if (sh->dv_mark_mask && sh->dv_mark_mask != mark) 2251 DRV_LOG(WARNING, "metadata MARK mask mismatche %08X:%08X", 2252 sh->dv_mark_mask, mark); 2253 else 2254 sh->dv_mark_mask = mark; 2255 if (sh->dv_meta_mask && sh->dv_meta_mask != meta) 2256 DRV_LOG(WARNING, "metadata META mask mismatche %08X:%08X", 2257 sh->dv_meta_mask, meta); 2258 else 2259 sh->dv_meta_mask = meta; 2260 if (sh->dv_regc0_mask && sh->dv_regc0_mask != reg_c0) 2261 DRV_LOG(WARNING, "metadata reg_c0 mask mismatche %08X:%08X", 2262 sh->dv_meta_mask, reg_c0); 2263 else 2264 sh->dv_regc0_mask = reg_c0; 2265 DRV_LOG(DEBUG, "metadata mode %u", priv->config.dv_xmeta_en); 2266 DRV_LOG(DEBUG, "metadata MARK mask %08X", sh->dv_mark_mask); 2267 DRV_LOG(DEBUG, "metadata META mask %08X", sh->dv_meta_mask); 2268 DRV_LOG(DEBUG, "metadata reg_c0 mask %08X", sh->dv_regc0_mask); 2269 } 2270 2271 int 2272 rte_pmd_mlx5_get_dyn_flag_names(char *names[], unsigned int n) 2273 { 2274 static const char *const dynf_names[] = { 2275 RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, 2276 RTE_MBUF_DYNFLAG_METADATA_NAME, 2277 RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME 2278 }; 2279 unsigned int i; 2280 2281 if (n < RTE_DIM(dynf_names)) 2282 return -ENOMEM; 2283 for (i = 0; i < RTE_DIM(dynf_names); i++) { 2284 if (names[i] == NULL) 2285 return -EINVAL; 2286 strcpy(names[i], dynf_names[i]); 2287 } 2288 return RTE_DIM(dynf_names); 2289 } 2290 2291 /** 2292 * Comparison callback to sort device data. 2293 * 2294 * This is meant to be used with qsort(). 2295 * 2296 * @param a[in] 2297 * Pointer to pointer to first data object. 2298 * @param b[in] 2299 * Pointer to pointer to second data object. 2300 * 2301 * @return 2302 * 0 if both objects are equal, less than 0 if the first argument is less 2303 * than the second, greater than 0 otherwise. 2304 */ 2305 int 2306 mlx5_dev_check_sibling_config(struct mlx5_priv *priv, 2307 struct mlx5_dev_config *config, 2308 struct rte_device *dpdk_dev) 2309 { 2310 struct mlx5_dev_ctx_shared *sh = priv->sh; 2311 struct mlx5_dev_config *sh_conf = NULL; 2312 uint16_t port_id; 2313 2314 MLX5_ASSERT(sh); 2315 /* Nothing to compare for the single/first device. */ 2316 if (sh->refcnt == 1) 2317 return 0; 2318 /* Find the device with shared context. */ 2319 MLX5_ETH_FOREACH_DEV(port_id, dpdk_dev) { 2320 struct mlx5_priv *opriv = 2321 rte_eth_devices[port_id].data->dev_private; 2322 2323 if (opriv && opriv != priv && opriv->sh == sh) { 2324 sh_conf = &opriv->config; 2325 break; 2326 } 2327 } 2328 if (!sh_conf) 2329 return 0; 2330 if (sh_conf->dv_flow_en ^ config->dv_flow_en) { 2331 DRV_LOG(ERR, "\"dv_flow_en\" configuration mismatch" 2332 " for shared %s context", sh->ibdev_name); 2333 rte_errno = EINVAL; 2334 return rte_errno; 2335 } 2336 if (sh_conf->dv_xmeta_en ^ config->dv_xmeta_en) { 2337 DRV_LOG(ERR, "\"dv_xmeta_en\" configuration mismatch" 2338 " for shared %s context", sh->ibdev_name); 2339 rte_errno = EINVAL; 2340 return rte_errno; 2341 } 2342 return 0; 2343 } 2344 2345 /** 2346 * Look for the ethernet device belonging to mlx5 driver. 2347 * 2348 * @param[in] port_id 2349 * port_id to start looking for device. 2350 * @param[in] odev 2351 * Pointer to the hint device. When device is being probed 2352 * the its siblings (master and preceding representors might 2353 * not have assigned driver yet (because the mlx5_os_pci_probe() 2354 * is not completed yet, for this case match on hint 2355 * device may be used to detect sibling device. 2356 * 2357 * @return 2358 * port_id of found device, RTE_MAX_ETHPORT if not found. 2359 */ 2360 uint16_t 2361 mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev) 2362 { 2363 while (port_id < RTE_MAX_ETHPORTS) { 2364 struct rte_eth_dev *dev = &rte_eth_devices[port_id]; 2365 2366 if (dev->state != RTE_ETH_DEV_UNUSED && 2367 dev->device && 2368 (dev->device == odev || 2369 (dev->device->driver && 2370 dev->device->driver->name && 2371 ((strcmp(dev->device->driver->name, 2372 MLX5_PCI_DRIVER_NAME) == 0) || 2373 (strcmp(dev->device->driver->name, 2374 MLX5_AUXILIARY_DRIVER_NAME) == 0))))) 2375 break; 2376 port_id++; 2377 } 2378 if (port_id >= RTE_MAX_ETHPORTS) 2379 return RTE_MAX_ETHPORTS; 2380 return port_id; 2381 } 2382 2383 /** 2384 * Callback to remove a device. 2385 * 2386 * This function removes all Ethernet devices belong to a given device. 2387 * 2388 * @param[in] dev 2389 * Pointer to the generic device. 2390 * 2391 * @return 2392 * 0 on success, the function cannot fail. 2393 */ 2394 int 2395 mlx5_net_remove(struct rte_device *dev) 2396 { 2397 uint16_t port_id; 2398 int ret = 0; 2399 2400 RTE_ETH_FOREACH_DEV_OF(port_id, dev) { 2401 /* 2402 * mlx5_dev_close() is not registered to secondary process, 2403 * call the close function explicitly for secondary process. 2404 */ 2405 if (rte_eal_process_type() == RTE_PROC_SECONDARY) 2406 ret |= mlx5_dev_close(&rte_eth_devices[port_id]); 2407 else 2408 ret |= rte_eth_dev_close(port_id); 2409 } 2410 return ret == 0 ? 0 : -EIO; 2411 } 2412 2413 static const struct rte_pci_id mlx5_pci_id_map[] = { 2414 { 2415 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2416 PCI_DEVICE_ID_MELLANOX_CONNECTX4) 2417 }, 2418 { 2419 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2420 PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) 2421 }, 2422 { 2423 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2424 PCI_DEVICE_ID_MELLANOX_CONNECTX4LX) 2425 }, 2426 { 2427 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2428 PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) 2429 }, 2430 { 2431 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2432 PCI_DEVICE_ID_MELLANOX_CONNECTX5) 2433 }, 2434 { 2435 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2436 PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) 2437 }, 2438 { 2439 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2440 PCI_DEVICE_ID_MELLANOX_CONNECTX5EX) 2441 }, 2442 { 2443 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2444 PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF) 2445 }, 2446 { 2447 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2448 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) 2449 }, 2450 { 2451 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2452 PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF) 2453 }, 2454 { 2455 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2456 PCI_DEVICE_ID_MELLANOX_CONNECTX6) 2457 }, 2458 { 2459 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2460 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF) 2461 }, 2462 { 2463 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2464 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX) 2465 }, 2466 { 2467 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2468 PCI_DEVICE_ID_MELLANOX_CONNECTXVF) 2469 }, 2470 { 2471 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2472 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF) 2473 }, 2474 { 2475 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2476 PCI_DEVICE_ID_MELLANOX_CONNECTX6LX) 2477 }, 2478 { 2479 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2480 PCI_DEVICE_ID_MELLANOX_CONNECTX7) 2481 }, 2482 { 2483 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2484 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF) 2485 }, 2486 { 2487 .vendor_id = 0 2488 } 2489 }; 2490 2491 static struct mlx5_class_driver mlx5_net_driver = { 2492 .drv_class = MLX5_CLASS_ETH, 2493 .name = RTE_STR(MLX5_ETH_DRIVER_NAME), 2494 .id_table = mlx5_pci_id_map, 2495 .probe = mlx5_os_net_probe, 2496 .remove = mlx5_net_remove, 2497 .dma_map = mlx5_net_dma_map, 2498 .dma_unmap = mlx5_net_dma_unmap, 2499 .probe_again = 1, 2500 .intr_lsc = 1, 2501 .intr_rmv = 1, 2502 }; 2503 2504 /* Initialize driver log type. */ 2505 RTE_LOG_REGISTER_DEFAULT(mlx5_logtype, NOTICE) 2506 2507 /** 2508 * Driver initialization routine. 2509 */ 2510 RTE_INIT(rte_mlx5_pmd_init) 2511 { 2512 pthread_mutex_init(&mlx5_dev_ctx_list_mutex, NULL); 2513 mlx5_common_init(); 2514 /* Build the static tables for Verbs conversion. */ 2515 mlx5_set_ptype_table(); 2516 mlx5_set_cksum_table(); 2517 mlx5_set_swp_types_table(); 2518 if (mlx5_glue) 2519 mlx5_class_driver_register(&mlx5_net_driver); 2520 } 2521 2522 RTE_PMD_EXPORT_NAME(MLX5_ETH_DRIVER_NAME, __COUNTER__); 2523 RTE_PMD_REGISTER_PCI_TABLE(MLX5_ETH_DRIVER_NAME, mlx5_pci_id_map); 2524 RTE_PMD_REGISTER_KMOD_DEP(MLX5_ETH_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib"); 2525