1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <unistd.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <errno.h> 12 13 #include <rte_malloc.h> 14 #include <rte_ethdev_driver.h> 15 #include <rte_ethdev_pci.h> 16 #include <rte_pci.h> 17 #include <rte_bus_pci.h> 18 #include <rte_common.h> 19 #include <rte_kvargs.h> 20 #include <rte_rwlock.h> 21 #include <rte_spinlock.h> 22 #include <rte_string_fns.h> 23 #include <rte_alarm.h> 24 25 #include <mlx5_glue.h> 26 #include <mlx5_devx_cmds.h> 27 #include <mlx5_common.h> 28 #include <mlx5_common_os.h> 29 #include <mlx5_common_mp.h> 30 #include <mlx5_common_pci.h> 31 #include <mlx5_malloc.h> 32 33 #include "mlx5_defs.h" 34 #include "mlx5.h" 35 #include "mlx5_utils.h" 36 #include "mlx5_rxtx.h" 37 #include "mlx5_autoconf.h" 38 #include "mlx5_mr.h" 39 #include "mlx5_flow.h" 40 #include "rte_pmd_mlx5.h" 41 42 /* Device parameter to enable RX completion queue compression. */ 43 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en" 44 45 /* Device parameter to enable RX completion entry padding to 128B. */ 46 #define MLX5_RXQ_CQE_PAD_EN "rxq_cqe_pad_en" 47 48 /* Device parameter to enable padding Rx packet to cacheline size. */ 49 #define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en" 50 51 /* Device parameter to enable Multi-Packet Rx queue. */ 52 #define MLX5_RX_MPRQ_EN "mprq_en" 53 54 /* Device parameter to configure log 2 of the number of strides for MPRQ. */ 55 #define MLX5_RX_MPRQ_LOG_STRIDE_NUM "mprq_log_stride_num" 56 57 /* Device parameter to configure log 2 of the stride size for MPRQ. */ 58 #define MLX5_RX_MPRQ_LOG_STRIDE_SIZE "mprq_log_stride_size" 59 60 /* Device parameter to limit the size of memcpy'd packet for MPRQ. */ 61 #define MLX5_RX_MPRQ_MAX_MEMCPY_LEN "mprq_max_memcpy_len" 62 63 /* Device parameter to set the minimum number of Rx queues to enable MPRQ. */ 64 #define MLX5_RXQS_MIN_MPRQ "rxqs_min_mprq" 65 66 /* Device parameter to configure inline send. Deprecated, ignored.*/ 67 #define MLX5_TXQ_INLINE "txq_inline" 68 69 /* Device parameter to limit packet size to inline with ordinary SEND. */ 70 #define MLX5_TXQ_INLINE_MAX "txq_inline_max" 71 72 /* Device parameter to configure minimal data size to inline. */ 73 #define MLX5_TXQ_INLINE_MIN "txq_inline_min" 74 75 /* Device parameter to limit packet size to inline with Enhanced MPW. */ 76 #define MLX5_TXQ_INLINE_MPW "txq_inline_mpw" 77 78 /* 79 * Device parameter to configure the number of TX queues threshold for 80 * enabling inline send. 81 */ 82 #define MLX5_TXQS_MIN_INLINE "txqs_min_inline" 83 84 /* 85 * Device parameter to configure the number of TX queues threshold for 86 * enabling vectorized Tx, deprecated, ignored (no vectorized Tx routines). 87 */ 88 #define MLX5_TXQS_MAX_VEC "txqs_max_vec" 89 90 /* Device parameter to enable multi-packet send WQEs. */ 91 #define MLX5_TXQ_MPW_EN "txq_mpw_en" 92 93 /* 94 * Device parameter to force doorbell register mapping 95 * to non-cahed region eliminating the extra write memory barrier. 96 */ 97 #define MLX5_TX_DB_NC "tx_db_nc" 98 99 /* 100 * Device parameter to include 2 dsegs in the title WQEBB. 101 * Deprecated, ignored. 102 */ 103 #define MLX5_TXQ_MPW_HDR_DSEG_EN "txq_mpw_hdr_dseg_en" 104 105 /* 106 * Device parameter to limit the size of inlining packet. 107 * Deprecated, ignored. 108 */ 109 #define MLX5_TXQ_MAX_INLINE_LEN "txq_max_inline_len" 110 111 /* 112 * Device parameter to enable Tx scheduling on timestamps 113 * and specify the packet pacing granularity in nanoseconds. 114 */ 115 #define MLX5_TX_PP "tx_pp" 116 117 /* 118 * Device parameter to specify skew in nanoseconds on Tx datapath, 119 * it represents the time between SQ start WQE processing and 120 * appearing actual packet data on the wire. 121 */ 122 #define MLX5_TX_SKEW "tx_skew" 123 124 /* 125 * Device parameter to enable hardware Tx vector. 126 * Deprecated, ignored (no vectorized Tx routines anymore). 127 */ 128 #define MLX5_TX_VEC_EN "tx_vec_en" 129 130 /* Device parameter to enable hardware Rx vector. */ 131 #define MLX5_RX_VEC_EN "rx_vec_en" 132 133 /* Allow L3 VXLAN flow creation. */ 134 #define MLX5_L3_VXLAN_EN "l3_vxlan_en" 135 136 /* Activate DV E-Switch flow steering. */ 137 #define MLX5_DV_ESW_EN "dv_esw_en" 138 139 /* Activate DV flow steering. */ 140 #define MLX5_DV_FLOW_EN "dv_flow_en" 141 142 /* Enable extensive flow metadata support. */ 143 #define MLX5_DV_XMETA_EN "dv_xmeta_en" 144 145 /* Device parameter to let the user manage the lacp traffic of bonded device */ 146 #define MLX5_LACP_BY_USER "lacp_by_user" 147 148 /* Activate Netlink support in VF mode. */ 149 #define MLX5_VF_NL_EN "vf_nl_en" 150 151 /* Enable extending memsegs when creating a MR. */ 152 #define MLX5_MR_EXT_MEMSEG_EN "mr_ext_memseg_en" 153 154 /* Select port representors to instantiate. */ 155 #define MLX5_REPRESENTOR "representor" 156 157 /* Device parameter to configure the maximum number of dump files per queue. */ 158 #define MLX5_MAX_DUMP_FILES_NUM "max_dump_files_num" 159 160 /* Configure timeout of LRO session (in microseconds). */ 161 #define MLX5_LRO_TIMEOUT_USEC "lro_timeout_usec" 162 163 /* 164 * Device parameter to configure the total data buffer size for a single 165 * hairpin queue (logarithm value). 166 */ 167 #define MLX5_HP_BUF_SIZE "hp_buf_log_sz" 168 169 /* Flow memory reclaim mode. */ 170 #define MLX5_RECLAIM_MEM "reclaim_mem_mode" 171 172 /* The default memory allocator used in PMD. */ 173 #define MLX5_SYS_MEM_EN "sys_mem_en" 174 /* Decap will be used or not. */ 175 #define MLX5_DECAP_EN "decap_en" 176 177 /* Shared memory between primary and secondary processes. */ 178 struct mlx5_shared_data *mlx5_shared_data; 179 180 /** Driver-specific log messages type. */ 181 int mlx5_logtype; 182 183 static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = 184 LIST_HEAD_INITIALIZER(); 185 static pthread_mutex_t mlx5_dev_ctx_list_mutex = PTHREAD_MUTEX_INITIALIZER; 186 187 static const struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = { 188 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 189 { 190 .size = sizeof(struct mlx5_flow_dv_encap_decap_resource), 191 .trunk_size = 64, 192 .grow_trunk = 3, 193 .grow_shift = 2, 194 .need_lock = 0, 195 .release_mem_en = 1, 196 .malloc = mlx5_malloc, 197 .free = mlx5_free, 198 .type = "mlx5_encap_decap_ipool", 199 }, 200 { 201 .size = sizeof(struct mlx5_flow_dv_push_vlan_action_resource), 202 .trunk_size = 64, 203 .grow_trunk = 3, 204 .grow_shift = 2, 205 .need_lock = 0, 206 .release_mem_en = 1, 207 .malloc = mlx5_malloc, 208 .free = mlx5_free, 209 .type = "mlx5_push_vlan_ipool", 210 }, 211 { 212 .size = sizeof(struct mlx5_flow_dv_tag_resource), 213 .trunk_size = 64, 214 .grow_trunk = 3, 215 .grow_shift = 2, 216 .need_lock = 0, 217 .release_mem_en = 1, 218 .malloc = mlx5_malloc, 219 .free = mlx5_free, 220 .type = "mlx5_tag_ipool", 221 }, 222 { 223 .size = sizeof(struct mlx5_flow_dv_port_id_action_resource), 224 .trunk_size = 64, 225 .grow_trunk = 3, 226 .grow_shift = 2, 227 .need_lock = 0, 228 .release_mem_en = 1, 229 .malloc = mlx5_malloc, 230 .free = mlx5_free, 231 .type = "mlx5_port_id_ipool", 232 }, 233 { 234 .size = sizeof(struct mlx5_flow_tbl_data_entry), 235 .trunk_size = 64, 236 .grow_trunk = 3, 237 .grow_shift = 2, 238 .need_lock = 0, 239 .release_mem_en = 1, 240 .malloc = mlx5_malloc, 241 .free = mlx5_free, 242 .type = "mlx5_jump_ipool", 243 }, 244 { 245 .size = sizeof(struct mlx5_flow_dv_sample_resource), 246 .trunk_size = 64, 247 .grow_trunk = 3, 248 .grow_shift = 2, 249 .need_lock = 0, 250 .release_mem_en = 1, 251 .malloc = mlx5_malloc, 252 .free = mlx5_free, 253 .type = "mlx5_sample_ipool", 254 }, 255 { 256 .size = sizeof(struct mlx5_flow_dv_dest_array_resource), 257 .trunk_size = 64, 258 .grow_trunk = 3, 259 .grow_shift = 2, 260 .need_lock = 0, 261 .release_mem_en = 1, 262 .malloc = mlx5_malloc, 263 .free = mlx5_free, 264 .type = "mlx5_dest_array_ipool", 265 }, 266 #endif 267 { 268 .size = sizeof(struct mlx5_flow_meter), 269 .trunk_size = 64, 270 .grow_trunk = 3, 271 .grow_shift = 2, 272 .need_lock = 0, 273 .release_mem_en = 1, 274 .malloc = mlx5_malloc, 275 .free = mlx5_free, 276 .type = "mlx5_meter_ipool", 277 }, 278 { 279 .size = sizeof(struct mlx5_flow_mreg_copy_resource), 280 .trunk_size = 64, 281 .grow_trunk = 3, 282 .grow_shift = 2, 283 .need_lock = 0, 284 .release_mem_en = 1, 285 .malloc = mlx5_malloc, 286 .free = mlx5_free, 287 .type = "mlx5_mcp_ipool", 288 }, 289 { 290 .size = (sizeof(struct mlx5_hrxq) + MLX5_RSS_HASH_KEY_LEN), 291 .trunk_size = 64, 292 .grow_trunk = 3, 293 .grow_shift = 2, 294 .need_lock = 0, 295 .release_mem_en = 1, 296 .malloc = mlx5_malloc, 297 .free = mlx5_free, 298 .type = "mlx5_hrxq_ipool", 299 }, 300 { 301 /* 302 * MLX5_IPOOL_MLX5_FLOW size varies for DV and VERBS flows. 303 * It set in run time according to PCI function configuration. 304 */ 305 .size = 0, 306 .trunk_size = 64, 307 .grow_trunk = 3, 308 .grow_shift = 2, 309 .need_lock = 0, 310 .release_mem_en = 1, 311 .malloc = mlx5_malloc, 312 .free = mlx5_free, 313 .type = "mlx5_flow_handle_ipool", 314 }, 315 { 316 .size = sizeof(struct rte_flow), 317 .trunk_size = 4096, 318 .need_lock = 1, 319 .release_mem_en = 1, 320 .malloc = mlx5_malloc, 321 .free = mlx5_free, 322 .type = "rte_flow_ipool", 323 }, 324 }; 325 326 327 #define MLX5_FLOW_MIN_ID_POOL_SIZE 512 328 #define MLX5_ID_GENERATION_ARRAY_FACTOR 16 329 330 #define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 4096 331 332 /** 333 * Allocate ID pool structure. 334 * 335 * @param[in] max_id 336 * The maximum id can be allocated from the pool. 337 * 338 * @return 339 * Pointer to pool object, NULL value otherwise. 340 */ 341 struct mlx5_flow_id_pool * 342 mlx5_flow_id_pool_alloc(uint32_t max_id) 343 { 344 struct mlx5_flow_id_pool *pool; 345 void *mem; 346 347 pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool), 348 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 349 if (!pool) { 350 DRV_LOG(ERR, "can't allocate id pool"); 351 rte_errno = ENOMEM; 352 return NULL; 353 } 354 mem = mlx5_malloc(MLX5_MEM_ZERO, 355 MLX5_FLOW_MIN_ID_POOL_SIZE * sizeof(uint32_t), 356 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 357 if (!mem) { 358 DRV_LOG(ERR, "can't allocate mem for id pool"); 359 rte_errno = ENOMEM; 360 goto error; 361 } 362 pool->free_arr = mem; 363 pool->curr = pool->free_arr; 364 pool->last = pool->free_arr + MLX5_FLOW_MIN_ID_POOL_SIZE; 365 pool->base_index = 0; 366 pool->max_id = max_id; 367 return pool; 368 error: 369 mlx5_free(pool); 370 return NULL; 371 } 372 373 /** 374 * Release ID pool structure. 375 * 376 * @param[in] pool 377 * Pointer to flow id pool object to free. 378 */ 379 void 380 mlx5_flow_id_pool_release(struct mlx5_flow_id_pool *pool) 381 { 382 mlx5_free(pool->free_arr); 383 mlx5_free(pool); 384 } 385 386 /** 387 * Generate ID. 388 * 389 * @param[in] pool 390 * Pointer to flow id pool. 391 * @param[out] id 392 * The generated ID. 393 * 394 * @return 395 * 0 on success, error value otherwise. 396 */ 397 uint32_t 398 mlx5_flow_id_get(struct mlx5_flow_id_pool *pool, uint32_t *id) 399 { 400 if (pool->curr == pool->free_arr) { 401 if (pool->base_index == pool->max_id) { 402 rte_errno = ENOMEM; 403 DRV_LOG(ERR, "no free id"); 404 return -rte_errno; 405 } 406 *id = ++pool->base_index; 407 return 0; 408 } 409 *id = *(--pool->curr); 410 return 0; 411 } 412 413 /** 414 * Release ID. 415 * 416 * @param[in] pool 417 * Pointer to flow id pool. 418 * @param[out] id 419 * The generated ID. 420 * 421 * @return 422 * 0 on success, error value otherwise. 423 */ 424 uint32_t 425 mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, uint32_t id) 426 { 427 uint32_t size; 428 uint32_t size2; 429 void *mem; 430 431 if (pool->curr == pool->last) { 432 size = pool->curr - pool->free_arr; 433 size2 = size * MLX5_ID_GENERATION_ARRAY_FACTOR; 434 MLX5_ASSERT(size2 > size); 435 mem = mlx5_malloc(0, size2 * sizeof(uint32_t), 0, 436 SOCKET_ID_ANY); 437 if (!mem) { 438 DRV_LOG(ERR, "can't allocate mem for id pool"); 439 rte_errno = ENOMEM; 440 return -rte_errno; 441 } 442 memcpy(mem, pool->free_arr, size * sizeof(uint32_t)); 443 mlx5_free(pool->free_arr); 444 pool->free_arr = mem; 445 pool->curr = pool->free_arr + size; 446 pool->last = pool->free_arr + size2; 447 } 448 *pool->curr = id; 449 pool->curr++; 450 return 0; 451 } 452 453 /** 454 * Initialize the shared aging list information per port. 455 * 456 * @param[in] sh 457 * Pointer to mlx5_dev_ctx_shared object. 458 */ 459 static void 460 mlx5_flow_aging_init(struct mlx5_dev_ctx_shared *sh) 461 { 462 uint32_t i; 463 struct mlx5_age_info *age_info; 464 465 for (i = 0; i < sh->max_port; i++) { 466 age_info = &sh->port[i].age_info; 467 age_info->flags = 0; 468 TAILQ_INIT(&age_info->aged_counters); 469 rte_spinlock_init(&age_info->aged_sl); 470 MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER); 471 } 472 } 473 474 /** 475 * Initialize the counters management structure. 476 * 477 * @param[in] sh 478 * Pointer to mlx5_dev_ctx_shared object to free 479 */ 480 static void 481 mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh) 482 { 483 int i; 484 485 memset(&sh->cmng, 0, sizeof(sh->cmng)); 486 TAILQ_INIT(&sh->cmng.flow_counters); 487 for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) { 488 sh->cmng.ccont[i].min_id = MLX5_CNT_BATCH_OFFSET; 489 sh->cmng.ccont[i].max_id = -1; 490 sh->cmng.ccont[i].last_pool_idx = POOL_IDX_INVALID; 491 TAILQ_INIT(&sh->cmng.ccont[i].pool_list); 492 rte_spinlock_init(&sh->cmng.ccont[i].resize_sl); 493 TAILQ_INIT(&sh->cmng.ccont[i].counters); 494 rte_spinlock_init(&sh->cmng.ccont[i].csl); 495 } 496 } 497 498 /** 499 * Destroy all the resources allocated for a counter memory management. 500 * 501 * @param[in] mng 502 * Pointer to the memory management structure. 503 */ 504 static void 505 mlx5_flow_destroy_counter_stat_mem_mng(struct mlx5_counter_stats_mem_mng *mng) 506 { 507 uint8_t *mem = (uint8_t *)(uintptr_t)mng->raws[0].data; 508 509 LIST_REMOVE(mng, next); 510 claim_zero(mlx5_devx_cmd_destroy(mng->dm)); 511 claim_zero(mlx5_glue->devx_umem_dereg(mng->umem)); 512 mlx5_free(mem); 513 } 514 515 /** 516 * Close and release all the resources of the counters management. 517 * 518 * @param[in] sh 519 * Pointer to mlx5_dev_ctx_shared object to free. 520 */ 521 static void 522 mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh) 523 { 524 struct mlx5_counter_stats_mem_mng *mng; 525 int i; 526 int j; 527 int retries = 1024; 528 529 rte_errno = 0; 530 while (--retries) { 531 rte_eal_alarm_cancel(mlx5_flow_query_alarm, sh); 532 if (rte_errno != EINPROGRESS) 533 break; 534 rte_pause(); 535 } 536 for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) { 537 struct mlx5_flow_counter_pool *pool; 538 uint32_t batch = !!(i > 1); 539 540 if (!sh->cmng.ccont[i].pools) 541 continue; 542 pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list); 543 while (pool) { 544 if (batch && pool->min_dcs) 545 claim_zero(mlx5_devx_cmd_destroy 546 (pool->min_dcs)); 547 for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) { 548 if (MLX5_POOL_GET_CNT(pool, j)->action) 549 claim_zero 550 (mlx5_glue->destroy_flow_action 551 (MLX5_POOL_GET_CNT 552 (pool, j)->action)); 553 if (!batch && MLX5_GET_POOL_CNT_EXT 554 (pool, j)->dcs) 555 claim_zero(mlx5_devx_cmd_destroy 556 (MLX5_GET_POOL_CNT_EXT 557 (pool, j)->dcs)); 558 } 559 TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool, next); 560 mlx5_free(pool); 561 pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list); 562 } 563 mlx5_free(sh->cmng.ccont[i].pools); 564 } 565 mng = LIST_FIRST(&sh->cmng.mem_mngs); 566 while (mng) { 567 mlx5_flow_destroy_counter_stat_mem_mng(mng); 568 mng = LIST_FIRST(&sh->cmng.mem_mngs); 569 } 570 memset(&sh->cmng, 0, sizeof(sh->cmng)); 571 } 572 573 /** 574 * Initialize the flow resources' indexed mempool. 575 * 576 * @param[in] sh 577 * Pointer to mlx5_dev_ctx_shared object. 578 * @param[in] sh 579 * Pointer to user dev config. 580 */ 581 static void 582 mlx5_flow_ipool_create(struct mlx5_dev_ctx_shared *sh, 583 const struct mlx5_dev_config *config) 584 { 585 uint8_t i; 586 struct mlx5_indexed_pool_config cfg; 587 588 for (i = 0; i < MLX5_IPOOL_MAX; ++i) { 589 cfg = mlx5_ipool_cfg[i]; 590 switch (i) { 591 default: 592 break; 593 /* 594 * Set MLX5_IPOOL_MLX5_FLOW ipool size 595 * according to PCI function flow configuration. 596 */ 597 case MLX5_IPOOL_MLX5_FLOW: 598 cfg.size = config->dv_flow_en ? 599 sizeof(struct mlx5_flow_handle) : 600 MLX5_FLOW_HANDLE_VERBS_SIZE; 601 break; 602 } 603 if (config->reclaim_mode) 604 cfg.release_mem_en = 1; 605 sh->ipool[i] = mlx5_ipool_create(&cfg); 606 } 607 } 608 609 /** 610 * Release the flow resources' indexed mempool. 611 * 612 * @param[in] sh 613 * Pointer to mlx5_dev_ctx_shared object. 614 */ 615 static void 616 mlx5_flow_ipool_destroy(struct mlx5_dev_ctx_shared *sh) 617 { 618 uint8_t i; 619 620 for (i = 0; i < MLX5_IPOOL_MAX; ++i) 621 mlx5_ipool_destroy(sh->ipool[i]); 622 } 623 624 /* 625 * Check if dynamic flex parser for eCPRI already exists. 626 * 627 * @param dev 628 * Pointer to Ethernet device structure. 629 * 630 * @return 631 * true on exists, false on not. 632 */ 633 bool 634 mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev) 635 { 636 struct mlx5_priv *priv = dev->data->dev_private; 637 struct mlx5_flex_parser_profiles *prf = 638 &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; 639 640 return !!prf->obj; 641 } 642 643 /* 644 * Allocation of a flex parser for eCPRI. Once created, this parser related 645 * resources will be held until the device is closed. 646 * 647 * @param dev 648 * Pointer to Ethernet device structure. 649 * 650 * @return 651 * 0 on success, a negative errno value otherwise and rte_errno is set. 652 */ 653 int 654 mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev) 655 { 656 struct mlx5_priv *priv = dev->data->dev_private; 657 struct mlx5_flex_parser_profiles *prf = 658 &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; 659 struct mlx5_devx_graph_node_attr node = { 660 .modify_field_select = 0, 661 }; 662 uint32_t ids[8]; 663 int ret; 664 665 if (!priv->config.hca_attr.parse_graph_flex_node) { 666 DRV_LOG(ERR, "Dynamic flex parser is not supported " 667 "for device %s.", priv->dev_data->name); 668 return -ENOTSUP; 669 } 670 node.header_length_mode = MLX5_GRAPH_NODE_LEN_FIXED; 671 /* 8 bytes now: 4B common header + 4B message body header. */ 672 node.header_length_base_value = 0x8; 673 /* After MAC layer: Ether / VLAN. */ 674 node.in[0].arc_parse_graph_node = MLX5_GRAPH_ARC_NODE_MAC; 675 /* Type of compared condition should be 0xAEFE in the L2 layer. */ 676 node.in[0].compare_condition_value = RTE_ETHER_TYPE_ECPRI; 677 /* Sample #0: type in common header. */ 678 node.sample[0].flow_match_sample_en = 1; 679 /* Fixed offset. */ 680 node.sample[0].flow_match_sample_offset_mode = 0x0; 681 /* Only the 2nd byte will be used. */ 682 node.sample[0].flow_match_sample_field_base_offset = 0x0; 683 /* Sample #1: message payload. */ 684 node.sample[1].flow_match_sample_en = 1; 685 /* Fixed offset. */ 686 node.sample[1].flow_match_sample_offset_mode = 0x0; 687 /* 688 * Only the first two bytes will be used right now, and its offset will 689 * start after the common header that with the length of a DW(u32). 690 */ 691 node.sample[1].flow_match_sample_field_base_offset = sizeof(uint32_t); 692 prf->obj = mlx5_devx_cmd_create_flex_parser(priv->sh->ctx, &node); 693 if (!prf->obj) { 694 DRV_LOG(ERR, "Failed to create flex parser node object."); 695 return (rte_errno == 0) ? -ENODEV : -rte_errno; 696 } 697 prf->num = 2; 698 ret = mlx5_devx_cmd_query_parse_samples(prf->obj, ids, prf->num); 699 if (ret) { 700 DRV_LOG(ERR, "Failed to query sample IDs."); 701 return (rte_errno == 0) ? -ENODEV : -rte_errno; 702 } 703 prf->offset[0] = 0x0; 704 prf->offset[1] = sizeof(uint32_t); 705 prf->ids[0] = ids[0]; 706 prf->ids[1] = ids[1]; 707 return 0; 708 } 709 710 /* 711 * Destroy the flex parser node, including the parser itself, input / output 712 * arcs and DW samples. Resources could be reused then. 713 * 714 * @param dev 715 * Pointer to Ethernet device structure. 716 */ 717 static void 718 mlx5_flex_parser_ecpri_release(struct rte_eth_dev *dev) 719 { 720 struct mlx5_priv *priv = dev->data->dev_private; 721 struct mlx5_flex_parser_profiles *prf = 722 &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; 723 724 if (prf->obj) 725 mlx5_devx_cmd_destroy(prf->obj); 726 prf->obj = NULL; 727 } 728 729 /* 730 * Allocate Rx and Tx UARs in robust fashion. 731 * This routine handles the following UAR allocation issues: 732 * 733 * - tries to allocate the UAR with the most appropriate memory 734 * mapping type from the ones supported by the host 735 * 736 * - tries to allocate the UAR with non-NULL base address 737 * OFED 5.0.x and Upstream rdma_core before v29 returned the NULL as 738 * UAR base address if UAR was not the first object in the UAR page. 739 * It caused the PMD failure and we should try to get another UAR 740 * till we get the first one with non-NULL base address returned. 741 */ 742 static int 743 mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh, 744 const struct mlx5_dev_config *config) 745 { 746 uint32_t uar_mapping, retry; 747 int err = 0; 748 void *base_addr; 749 750 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 751 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 752 /* Control the mapping type according to the settings. */ 753 uar_mapping = (config->dbnc == MLX5_TXDB_NCACHED) ? 754 MLX5DV_UAR_ALLOC_TYPE_NC : 755 MLX5DV_UAR_ALLOC_TYPE_BF; 756 #else 757 RTE_SET_USED(config); 758 /* 759 * It seems we have no way to control the memory mapping type 760 * for the UAR, the default "Write-Combining" type is supposed. 761 * The UAR initialization on queue creation queries the 762 * actual mapping type done by Verbs/kernel and setups the 763 * PMD datapath accordingly. 764 */ 765 uar_mapping = 0; 766 #endif 767 sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->ctx, uar_mapping); 768 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 769 if (!sh->tx_uar && 770 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 771 if (config->dbnc == MLX5_TXDB_CACHED || 772 config->dbnc == MLX5_TXDB_HEURISTIC) 773 DRV_LOG(WARNING, "Devarg tx_db_nc setting " 774 "is not supported by DevX"); 775 /* 776 * In some environments like virtual machine 777 * the Write Combining mapped might be not supported 778 * and UAR allocation fails. We try "Non-Cached" 779 * mapping for the case. The tx_burst routines take 780 * the UAR mapping type into account on UAR setup 781 * on queue creation. 782 */ 783 DRV_LOG(WARNING, "Failed to allocate Tx DevX UAR (BF)"); 784 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 785 sh->tx_uar = mlx5_glue->devx_alloc_uar 786 (sh->ctx, uar_mapping); 787 } else if (!sh->tx_uar && 788 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { 789 if (config->dbnc == MLX5_TXDB_NCACHED) 790 DRV_LOG(WARNING, "Devarg tx_db_nc settings " 791 "is not supported by DevX"); 792 /* 793 * If Verbs/kernel does not support "Non-Cached" 794 * try the "Write-Combining". 795 */ 796 DRV_LOG(WARNING, "Failed to allocate Tx DevX UAR (NC)"); 797 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; 798 sh->tx_uar = mlx5_glue->devx_alloc_uar 799 (sh->ctx, uar_mapping); 800 } 801 #endif 802 if (!sh->tx_uar) { 803 DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (BF/NC)"); 804 err = ENOMEM; 805 goto exit; 806 } 807 base_addr = mlx5_os_get_devx_uar_base_addr(sh->tx_uar); 808 if (base_addr) 809 break; 810 /* 811 * The UARs are allocated by rdma_core within the 812 * IB device context, on context closure all UARs 813 * will be freed, should be no memory/object leakage. 814 */ 815 DRV_LOG(WARNING, "Retrying to allocate Tx DevX UAR"); 816 sh->tx_uar = NULL; 817 } 818 /* Check whether we finally succeeded with valid UAR allocation. */ 819 if (!sh->tx_uar) { 820 DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (NULL base)"); 821 err = ENOMEM; 822 goto exit; 823 } 824 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 825 uar_mapping = 0; 826 sh->devx_rx_uar = mlx5_glue->devx_alloc_uar 827 (sh->ctx, uar_mapping); 828 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 829 if (!sh->devx_rx_uar && 830 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 831 /* 832 * Rx UAR is used to control interrupts only, 833 * should be no datapath noticeable impact, 834 * can try "Non-Cached" mapping safely. 835 */ 836 DRV_LOG(WARNING, "Failed to allocate Rx DevX UAR (BF)"); 837 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 838 sh->devx_rx_uar = mlx5_glue->devx_alloc_uar 839 (sh->ctx, uar_mapping); 840 } 841 #endif 842 if (!sh->devx_rx_uar) { 843 DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (BF/NC)"); 844 err = ENOMEM; 845 goto exit; 846 } 847 base_addr = mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar); 848 if (base_addr) 849 break; 850 /* 851 * The UARs are allocated by rdma_core within the 852 * IB device context, on context closure all UARs 853 * will be freed, should be no memory/object leakage. 854 */ 855 DRV_LOG(WARNING, "Retrying to allocate Rx DevX UAR"); 856 sh->devx_rx_uar = NULL; 857 } 858 /* Check whether we finally succeeded with valid UAR allocation. */ 859 if (!sh->devx_rx_uar) { 860 DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (NULL base)"); 861 err = ENOMEM; 862 } 863 exit: 864 return err; 865 } 866 867 /** 868 * Allocate shared device context. If there is multiport device the 869 * master and representors will share this context, if there is single 870 * port dedicated device, the context will be used by only given 871 * port due to unification. 872 * 873 * Routine first searches the context for the specified device name, 874 * if found the shared context assumed and reference counter is incremented. 875 * If no context found the new one is created and initialized with specified 876 * device context and parameters. 877 * 878 * @param[in] spawn 879 * Pointer to the device attributes (name, port, etc). 880 * @param[in] config 881 * Pointer to device configuration structure. 882 * 883 * @return 884 * Pointer to mlx5_dev_ctx_shared object on success, 885 * otherwise NULL and rte_errno is set. 886 */ 887 struct mlx5_dev_ctx_shared * 888 mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, 889 const struct mlx5_dev_config *config) 890 { 891 struct mlx5_dev_ctx_shared *sh; 892 int err = 0; 893 uint32_t i; 894 struct mlx5_devx_tis_attr tis_attr = { 0 }; 895 896 MLX5_ASSERT(spawn); 897 /* Secondary process should not create the shared context. */ 898 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 899 pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); 900 /* Search for IB context by device name. */ 901 LIST_FOREACH(sh, &mlx5_dev_ctx_list, next) { 902 if (!strcmp(sh->ibdev_name, 903 mlx5_os_get_dev_device_name(spawn->phys_dev))) { 904 sh->refcnt++; 905 goto exit; 906 } 907 } 908 /* No device found, we have to create new shared context. */ 909 MLX5_ASSERT(spawn->max_port); 910 sh = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, 911 sizeof(struct mlx5_dev_ctx_shared) + 912 spawn->max_port * 913 sizeof(struct mlx5_dev_shared_port), 914 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 915 if (!sh) { 916 DRV_LOG(ERR, "shared context allocation failure"); 917 rte_errno = ENOMEM; 918 goto exit; 919 } 920 err = mlx5_os_open_device(spawn, config, sh); 921 if (!sh->ctx) 922 goto error; 923 err = mlx5_os_get_dev_attr(sh->ctx, &sh->device_attr); 924 if (err) { 925 DRV_LOG(DEBUG, "mlx5_os_get_dev_attr() failed"); 926 goto error; 927 } 928 sh->refcnt = 1; 929 sh->max_port = spawn->max_port; 930 strncpy(sh->ibdev_name, mlx5_os_get_ctx_device_name(sh->ctx), 931 sizeof(sh->ibdev_name) - 1); 932 strncpy(sh->ibdev_path, mlx5_os_get_ctx_device_path(sh->ctx), 933 sizeof(sh->ibdev_path) - 1); 934 /* 935 * Setting port_id to max unallowed value means 936 * there is no interrupt subhandler installed for 937 * the given port index i. 938 */ 939 for (i = 0; i < sh->max_port; i++) { 940 sh->port[i].ih_port_id = RTE_MAX_ETHPORTS; 941 sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS; 942 } 943 sh->pd = mlx5_glue->alloc_pd(sh->ctx); 944 if (sh->pd == NULL) { 945 DRV_LOG(ERR, "PD allocation failure"); 946 err = ENOMEM; 947 goto error; 948 } 949 if (sh->devx) { 950 uint32_t lcore = (uint32_t)rte_lcore_to_cpu_id(-1); 951 952 /* Query the EQN for this core. */ 953 err = mlx5_glue->devx_query_eqn(sh->ctx, lcore, &sh->eqn); 954 if (err) { 955 rte_errno = errno; 956 DRV_LOG(ERR, "Failed to query event queue number %d.", 957 rte_errno); 958 goto error; 959 } 960 err = mlx5_os_get_pdn(sh->pd, &sh->pdn); 961 if (err) { 962 DRV_LOG(ERR, "Fail to extract pdn from PD"); 963 goto error; 964 } 965 sh->td = mlx5_devx_cmd_create_td(sh->ctx); 966 if (!sh->td) { 967 DRV_LOG(ERR, "TD allocation failure"); 968 err = ENOMEM; 969 goto error; 970 } 971 tis_attr.transport_domain = sh->td->id; 972 sh->tis = mlx5_devx_cmd_create_tis(sh->ctx, &tis_attr); 973 if (!sh->tis) { 974 DRV_LOG(ERR, "TIS allocation failure"); 975 err = ENOMEM; 976 goto error; 977 } 978 err = mlx5_alloc_rxtx_uars(sh, config); 979 if (err) 980 goto error; 981 MLX5_ASSERT(sh->tx_uar); 982 MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->tx_uar)); 983 984 MLX5_ASSERT(sh->devx_rx_uar); 985 MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar)); 986 } 987 sh->flow_id_pool = mlx5_flow_id_pool_alloc 988 ((1 << HAIRPIN_FLOW_ID_BITS) - 1); 989 if (!sh->flow_id_pool) { 990 DRV_LOG(ERR, "can't create flow id pool"); 991 err = ENOMEM; 992 goto error; 993 } 994 #ifndef RTE_ARCH_64 995 /* Initialize UAR access locks for 32bit implementations. */ 996 rte_spinlock_init(&sh->uar_lock_cq); 997 for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++) 998 rte_spinlock_init(&sh->uar_lock[i]); 999 #endif 1000 /* 1001 * Once the device is added to the list of memory event 1002 * callback, its global MR cache table cannot be expanded 1003 * on the fly because of deadlock. If it overflows, lookup 1004 * should be done by searching MR list linearly, which is slow. 1005 * 1006 * At this point the device is not added to the memory 1007 * event list yet, context is just being created. 1008 */ 1009 err = mlx5_mr_btree_init(&sh->share_cache.cache, 1010 MLX5_MR_BTREE_CACHE_N * 2, 1011 spawn->pci_dev->device.numa_node); 1012 if (err) { 1013 err = rte_errno; 1014 goto error; 1015 } 1016 mlx5_os_set_reg_mr_cb(&sh->share_cache.reg_mr_cb, 1017 &sh->share_cache.dereg_mr_cb); 1018 mlx5_os_dev_shared_handler_install(sh); 1019 sh->cnt_id_tbl = mlx5_l3t_create(MLX5_L3T_TYPE_DWORD); 1020 if (!sh->cnt_id_tbl) { 1021 err = rte_errno; 1022 goto error; 1023 } 1024 mlx5_flow_aging_init(sh); 1025 mlx5_flow_counters_mng_init(sh); 1026 mlx5_flow_ipool_create(sh, config); 1027 /* Add device to memory callback list. */ 1028 rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); 1029 LIST_INSERT_HEAD(&mlx5_shared_data->mem_event_cb_list, 1030 sh, mem_event_cb); 1031 rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); 1032 /* Add context to the global device list. */ 1033 LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next); 1034 exit: 1035 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1036 return sh; 1037 error: 1038 pthread_mutex_destroy(&sh->txpp.mutex); 1039 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1040 MLX5_ASSERT(sh); 1041 if (sh->cnt_id_tbl) 1042 mlx5_l3t_destroy(sh->cnt_id_tbl); 1043 if (sh->tis) 1044 claim_zero(mlx5_devx_cmd_destroy(sh->tis)); 1045 if (sh->td) 1046 claim_zero(mlx5_devx_cmd_destroy(sh->td)); 1047 if (sh->devx_rx_uar) 1048 mlx5_glue->devx_free_uar(sh->devx_rx_uar); 1049 if (sh->tx_uar) 1050 mlx5_glue->devx_free_uar(sh->tx_uar); 1051 if (sh->pd) 1052 claim_zero(mlx5_glue->dealloc_pd(sh->pd)); 1053 if (sh->ctx) 1054 claim_zero(mlx5_glue->close_device(sh->ctx)); 1055 if (sh->flow_id_pool) 1056 mlx5_flow_id_pool_release(sh->flow_id_pool); 1057 mlx5_free(sh); 1058 MLX5_ASSERT(err > 0); 1059 rte_errno = err; 1060 return NULL; 1061 } 1062 1063 /** 1064 * Free shared IB device context. Decrement counter and if zero free 1065 * all allocated resources and close handles. 1066 * 1067 * @param[in] sh 1068 * Pointer to mlx5_dev_ctx_shared object to free 1069 */ 1070 void 1071 mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh) 1072 { 1073 pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); 1074 #ifdef RTE_LIBRTE_MLX5_DEBUG 1075 /* Check the object presence in the list. */ 1076 struct mlx5_dev_ctx_shared *lctx; 1077 1078 LIST_FOREACH(lctx, &mlx5_dev_ctx_list, next) 1079 if (lctx == sh) 1080 break; 1081 MLX5_ASSERT(lctx); 1082 if (lctx != sh) { 1083 DRV_LOG(ERR, "Freeing non-existing shared IB context"); 1084 goto exit; 1085 } 1086 #endif 1087 MLX5_ASSERT(sh); 1088 MLX5_ASSERT(sh->refcnt); 1089 /* Secondary process should not free the shared context. */ 1090 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 1091 if (--sh->refcnt) 1092 goto exit; 1093 /* Remove from memory callback device list. */ 1094 rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); 1095 LIST_REMOVE(sh, mem_event_cb); 1096 rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); 1097 /* Release created Memory Regions. */ 1098 mlx5_mr_release_cache(&sh->share_cache); 1099 /* Remove context from the global device list. */ 1100 LIST_REMOVE(sh, next); 1101 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1102 /* 1103 * Ensure there is no async event handler installed. 1104 * Only primary process handles async device events. 1105 **/ 1106 mlx5_flow_counters_mng_close(sh); 1107 mlx5_flow_ipool_destroy(sh); 1108 mlx5_os_dev_shared_handler_uninstall(sh); 1109 if (sh->cnt_id_tbl) { 1110 mlx5_l3t_destroy(sh->cnt_id_tbl); 1111 sh->cnt_id_tbl = NULL; 1112 } 1113 if (sh->tx_uar) { 1114 mlx5_glue->devx_free_uar(sh->tx_uar); 1115 sh->tx_uar = NULL; 1116 } 1117 if (sh->pd) 1118 claim_zero(mlx5_glue->dealloc_pd(sh->pd)); 1119 if (sh->tis) 1120 claim_zero(mlx5_devx_cmd_destroy(sh->tis)); 1121 if (sh->td) 1122 claim_zero(mlx5_devx_cmd_destroy(sh->td)); 1123 if (sh->devx_rx_uar) 1124 mlx5_glue->devx_free_uar(sh->devx_rx_uar); 1125 if (sh->ctx) 1126 claim_zero(mlx5_glue->close_device(sh->ctx)); 1127 if (sh->flow_id_pool) 1128 mlx5_flow_id_pool_release(sh->flow_id_pool); 1129 pthread_mutex_destroy(&sh->txpp.mutex); 1130 mlx5_free(sh); 1131 return; 1132 exit: 1133 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1134 } 1135 1136 /** 1137 * Destroy table hash list and all the root entries per domain. 1138 * 1139 * @param[in] priv 1140 * Pointer to the private device data structure. 1141 */ 1142 void 1143 mlx5_free_table_hash_list(struct mlx5_priv *priv) 1144 { 1145 struct mlx5_dev_ctx_shared *sh = priv->sh; 1146 struct mlx5_flow_tbl_data_entry *tbl_data; 1147 union mlx5_flow_tbl_key table_key = { 1148 { 1149 .table_id = 0, 1150 .reserved = 0, 1151 .domain = 0, 1152 .direction = 0, 1153 } 1154 }; 1155 struct mlx5_hlist_entry *pos; 1156 1157 if (!sh->flow_tbls) 1158 return; 1159 pos = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64); 1160 if (pos) { 1161 tbl_data = container_of(pos, struct mlx5_flow_tbl_data_entry, 1162 entry); 1163 MLX5_ASSERT(tbl_data); 1164 mlx5_hlist_remove(sh->flow_tbls, pos); 1165 mlx5_free(tbl_data); 1166 } 1167 table_key.direction = 1; 1168 pos = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64); 1169 if (pos) { 1170 tbl_data = container_of(pos, struct mlx5_flow_tbl_data_entry, 1171 entry); 1172 MLX5_ASSERT(tbl_data); 1173 mlx5_hlist_remove(sh->flow_tbls, pos); 1174 mlx5_free(tbl_data); 1175 } 1176 table_key.direction = 0; 1177 table_key.domain = 1; 1178 pos = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64); 1179 if (pos) { 1180 tbl_data = container_of(pos, struct mlx5_flow_tbl_data_entry, 1181 entry); 1182 MLX5_ASSERT(tbl_data); 1183 mlx5_hlist_remove(sh->flow_tbls, pos); 1184 mlx5_free(tbl_data); 1185 } 1186 mlx5_hlist_destroy(sh->flow_tbls, NULL, NULL); 1187 } 1188 1189 /** 1190 * Initialize flow table hash list and create the root tables entry 1191 * for each domain. 1192 * 1193 * @param[in] priv 1194 * Pointer to the private device data structure. 1195 * 1196 * @return 1197 * Zero on success, positive error code otherwise. 1198 */ 1199 int 1200 mlx5_alloc_table_hash_list(struct mlx5_priv *priv) 1201 { 1202 struct mlx5_dev_ctx_shared *sh = priv->sh; 1203 char s[MLX5_HLIST_NAMESIZE]; 1204 int err = 0; 1205 1206 MLX5_ASSERT(sh); 1207 snprintf(s, sizeof(s), "%s_flow_table", priv->sh->ibdev_name); 1208 sh->flow_tbls = mlx5_hlist_create(s, MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE); 1209 if (!sh->flow_tbls) { 1210 DRV_LOG(ERR, "flow tables with hash creation failed."); 1211 err = ENOMEM; 1212 return err; 1213 } 1214 #ifndef HAVE_MLX5DV_DR 1215 /* 1216 * In case we have not DR support, the zero tables should be created 1217 * because DV expect to see them even if they cannot be created by 1218 * RDMA-CORE. 1219 */ 1220 union mlx5_flow_tbl_key table_key = { 1221 { 1222 .table_id = 0, 1223 .reserved = 0, 1224 .domain = 0, 1225 .direction = 0, 1226 } 1227 }; 1228 struct mlx5_flow_tbl_data_entry *tbl_data = mlx5_malloc(MLX5_MEM_ZERO, 1229 sizeof(*tbl_data), 0, 1230 SOCKET_ID_ANY); 1231 1232 if (!tbl_data) { 1233 err = ENOMEM; 1234 goto error; 1235 } 1236 tbl_data->entry.key = table_key.v64; 1237 err = mlx5_hlist_insert(sh->flow_tbls, &tbl_data->entry); 1238 if (err) 1239 goto error; 1240 rte_atomic32_init(&tbl_data->tbl.refcnt); 1241 rte_atomic32_inc(&tbl_data->tbl.refcnt); 1242 table_key.direction = 1; 1243 tbl_data = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*tbl_data), 0, 1244 SOCKET_ID_ANY); 1245 if (!tbl_data) { 1246 err = ENOMEM; 1247 goto error; 1248 } 1249 tbl_data->entry.key = table_key.v64; 1250 err = mlx5_hlist_insert(sh->flow_tbls, &tbl_data->entry); 1251 if (err) 1252 goto error; 1253 rte_atomic32_init(&tbl_data->tbl.refcnt); 1254 rte_atomic32_inc(&tbl_data->tbl.refcnt); 1255 table_key.direction = 0; 1256 table_key.domain = 1; 1257 tbl_data = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*tbl_data), 0, 1258 SOCKET_ID_ANY); 1259 if (!tbl_data) { 1260 err = ENOMEM; 1261 goto error; 1262 } 1263 tbl_data->entry.key = table_key.v64; 1264 err = mlx5_hlist_insert(sh->flow_tbls, &tbl_data->entry); 1265 if (err) 1266 goto error; 1267 rte_atomic32_init(&tbl_data->tbl.refcnt); 1268 rte_atomic32_inc(&tbl_data->tbl.refcnt); 1269 return err; 1270 error: 1271 mlx5_free_table_hash_list(priv); 1272 #endif /* HAVE_MLX5DV_DR */ 1273 return err; 1274 } 1275 1276 /** 1277 * Retrieve integer value from environment variable. 1278 * 1279 * @param[in] name 1280 * Environment variable name. 1281 * 1282 * @return 1283 * Integer value, 0 if the variable is not set. 1284 */ 1285 int 1286 mlx5_getenv_int(const char *name) 1287 { 1288 const char *val = getenv(name); 1289 1290 if (val == NULL) 1291 return 0; 1292 return atoi(val); 1293 } 1294 1295 /** 1296 * DPDK callback to add udp tunnel port 1297 * 1298 * @param[in] dev 1299 * A pointer to eth_dev 1300 * @param[in] udp_tunnel 1301 * A pointer to udp tunnel 1302 * 1303 * @return 1304 * 0 on valid udp ports and tunnels, -ENOTSUP otherwise. 1305 */ 1306 int 1307 mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev __rte_unused, 1308 struct rte_eth_udp_tunnel *udp_tunnel) 1309 { 1310 MLX5_ASSERT(udp_tunnel != NULL); 1311 if (udp_tunnel->prot_type == RTE_TUNNEL_TYPE_VXLAN && 1312 udp_tunnel->udp_port == 4789) 1313 return 0; 1314 if (udp_tunnel->prot_type == RTE_TUNNEL_TYPE_VXLAN_GPE && 1315 udp_tunnel->udp_port == 4790) 1316 return 0; 1317 return -ENOTSUP; 1318 } 1319 1320 /** 1321 * Initialize process private data structure. 1322 * 1323 * @param dev 1324 * Pointer to Ethernet device structure. 1325 * 1326 * @return 1327 * 0 on success, a negative errno value otherwise and rte_errno is set. 1328 */ 1329 int 1330 mlx5_proc_priv_init(struct rte_eth_dev *dev) 1331 { 1332 struct mlx5_priv *priv = dev->data->dev_private; 1333 struct mlx5_proc_priv *ppriv; 1334 size_t ppriv_size; 1335 1336 /* 1337 * UAR register table follows the process private structure. BlueFlame 1338 * registers for Tx queues are stored in the table. 1339 */ 1340 ppriv_size = 1341 sizeof(struct mlx5_proc_priv) + priv->txqs_n * sizeof(void *); 1342 ppriv = mlx5_malloc(MLX5_MEM_RTE, ppriv_size, RTE_CACHE_LINE_SIZE, 1343 dev->device->numa_node); 1344 if (!ppriv) { 1345 rte_errno = ENOMEM; 1346 return -rte_errno; 1347 } 1348 ppriv->uar_table_sz = ppriv_size; 1349 dev->process_private = ppriv; 1350 return 0; 1351 } 1352 1353 /** 1354 * Un-initialize process private data structure. 1355 * 1356 * @param dev 1357 * Pointer to Ethernet device structure. 1358 */ 1359 static void 1360 mlx5_proc_priv_uninit(struct rte_eth_dev *dev) 1361 { 1362 if (!dev->process_private) 1363 return; 1364 mlx5_free(dev->process_private); 1365 dev->process_private = NULL; 1366 } 1367 1368 /** 1369 * DPDK callback to close the device. 1370 * 1371 * Destroy all queues and objects, free memory. 1372 * 1373 * @param dev 1374 * Pointer to Ethernet device structure. 1375 */ 1376 int 1377 mlx5_dev_close(struct rte_eth_dev *dev) 1378 { 1379 struct mlx5_priv *priv = dev->data->dev_private; 1380 unsigned int i; 1381 int ret; 1382 1383 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1384 /* Check if process_private released. */ 1385 if (!dev->process_private) 1386 return 0; 1387 mlx5_tx_uar_uninit_secondary(dev); 1388 mlx5_proc_priv_uninit(dev); 1389 rte_eth_dev_release_port(dev); 1390 return 0; 1391 } 1392 if (!priv->sh) 1393 return 0; 1394 DRV_LOG(DEBUG, "port %u closing device \"%s\"", 1395 dev->data->port_id, 1396 ((priv->sh->ctx != NULL) ? 1397 mlx5_os_get_ctx_device_name(priv->sh->ctx) : "")); 1398 /* 1399 * If default mreg copy action is removed at the stop stage, 1400 * the search will return none and nothing will be done anymore. 1401 */ 1402 mlx5_flow_stop_default(dev); 1403 mlx5_traffic_disable(dev); 1404 /* 1405 * If all the flows are already flushed in the device stop stage, 1406 * then this will return directly without any action. 1407 */ 1408 mlx5_flow_list_flush(dev, &priv->flows, true); 1409 mlx5_flow_meter_flush(dev, NULL); 1410 /* Free the intermediate buffers for flow creation. */ 1411 mlx5_flow_free_intermediate(dev); 1412 /* Prevent crashes when queues are still in use. */ 1413 dev->rx_pkt_burst = removed_rx_burst; 1414 dev->tx_pkt_burst = removed_tx_burst; 1415 rte_wmb(); 1416 /* Disable datapath on secondary process. */ 1417 mlx5_mp_os_req_stop_rxtx(dev); 1418 /* Free the eCPRI flex parser resource. */ 1419 mlx5_flex_parser_ecpri_release(dev); 1420 if (priv->rxqs != NULL) { 1421 /* XXX race condition if mlx5_rx_burst() is still running. */ 1422 usleep(1000); 1423 for (i = 0; (i != priv->rxqs_n); ++i) 1424 mlx5_rxq_release(dev, i); 1425 priv->rxqs_n = 0; 1426 priv->rxqs = NULL; 1427 } 1428 if (priv->txqs != NULL) { 1429 /* XXX race condition if mlx5_tx_burst() is still running. */ 1430 usleep(1000); 1431 for (i = 0; (i != priv->txqs_n); ++i) 1432 mlx5_txq_release(dev, i); 1433 priv->txqs_n = 0; 1434 priv->txqs = NULL; 1435 } 1436 mlx5_proc_priv_uninit(dev); 1437 if (priv->mreg_cp_tbl) 1438 mlx5_hlist_destroy(priv->mreg_cp_tbl, NULL, NULL); 1439 mlx5_mprq_free_mp(dev); 1440 mlx5_os_free_shared_dr(priv); 1441 if (priv->rss_conf.rss_key != NULL) 1442 mlx5_free(priv->rss_conf.rss_key); 1443 if (priv->reta_idx != NULL) 1444 mlx5_free(priv->reta_idx); 1445 if (priv->config.vf) 1446 mlx5_os_mac_addr_flush(dev); 1447 if (priv->nl_socket_route >= 0) 1448 close(priv->nl_socket_route); 1449 if (priv->nl_socket_rdma >= 0) 1450 close(priv->nl_socket_rdma); 1451 if (priv->vmwa_context) 1452 mlx5_vlan_vmwa_exit(priv->vmwa_context); 1453 ret = mlx5_hrxq_verify(dev); 1454 if (ret) 1455 DRV_LOG(WARNING, "port %u some hash Rx queue still remain", 1456 dev->data->port_id); 1457 ret = mlx5_ind_table_obj_verify(dev); 1458 if (ret) 1459 DRV_LOG(WARNING, "port %u some indirection table still remain", 1460 dev->data->port_id); 1461 ret = mlx5_rxq_obj_verify(dev); 1462 if (ret) 1463 DRV_LOG(WARNING, "port %u some Rx queue objects still remain", 1464 dev->data->port_id); 1465 ret = mlx5_rxq_verify(dev); 1466 if (ret) 1467 DRV_LOG(WARNING, "port %u some Rx queues still remain", 1468 dev->data->port_id); 1469 ret = mlx5_txq_obj_verify(dev); 1470 if (ret) 1471 DRV_LOG(WARNING, "port %u some Verbs Tx queue still remain", 1472 dev->data->port_id); 1473 ret = mlx5_txq_verify(dev); 1474 if (ret) 1475 DRV_LOG(WARNING, "port %u some Tx queues still remain", 1476 dev->data->port_id); 1477 ret = mlx5_flow_verify(dev); 1478 if (ret) 1479 DRV_LOG(WARNING, "port %u some flows still remain", 1480 dev->data->port_id); 1481 /* 1482 * Free the shared context in last turn, because the cleanup 1483 * routines above may use some shared fields, like 1484 * mlx5_os_mac_addr_flush() uses ibdev_path for retrieveing 1485 * ifindex if Netlink fails. 1486 */ 1487 mlx5_free_shared_dev_ctx(priv->sh); 1488 if (priv->domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) { 1489 unsigned int c = 0; 1490 uint16_t port_id; 1491 1492 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 1493 struct mlx5_priv *opriv = 1494 rte_eth_devices[port_id].data->dev_private; 1495 1496 if (!opriv || 1497 opriv->domain_id != priv->domain_id || 1498 &rte_eth_devices[port_id] == dev) 1499 continue; 1500 ++c; 1501 break; 1502 } 1503 if (!c) 1504 claim_zero(rte_eth_switch_domain_free(priv->domain_id)); 1505 } 1506 memset(priv, 0, sizeof(*priv)); 1507 priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; 1508 /* 1509 * Reset mac_addrs to NULL such that it is not freed as part of 1510 * rte_eth_dev_release_port(). mac_addrs is part of dev_private so 1511 * it is freed when dev_private is freed. 1512 */ 1513 dev->data->mac_addrs = NULL; 1514 return 0; 1515 } 1516 1517 /** 1518 * Verify and store value for device argument. 1519 * 1520 * @param[in] key 1521 * Key argument to verify. 1522 * @param[in] val 1523 * Value associated with key. 1524 * @param opaque 1525 * User data. 1526 * 1527 * @return 1528 * 0 on success, a negative errno value otherwise and rte_errno is set. 1529 */ 1530 static int 1531 mlx5_args_check(const char *key, const char *val, void *opaque) 1532 { 1533 struct mlx5_dev_config *config = opaque; 1534 unsigned long mod; 1535 signed long tmp; 1536 1537 /* No-op, port representors are processed in mlx5_dev_spawn(). */ 1538 if (!strcmp(MLX5_REPRESENTOR, key)) 1539 return 0; 1540 errno = 0; 1541 tmp = strtol(val, NULL, 0); 1542 if (errno) { 1543 rte_errno = errno; 1544 DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val); 1545 return -rte_errno; 1546 } 1547 if (tmp < 0 && strcmp(MLX5_TX_PP, key) && strcmp(MLX5_TX_SKEW, key)) { 1548 /* Negative values are acceptable for some keys only. */ 1549 rte_errno = EINVAL; 1550 DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val); 1551 return -rte_errno; 1552 } 1553 mod = tmp >= 0 ? tmp : -tmp; 1554 if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { 1555 config->cqe_comp = !!tmp; 1556 } else if (strcmp(MLX5_RXQ_CQE_PAD_EN, key) == 0) { 1557 config->cqe_pad = !!tmp; 1558 } else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) { 1559 config->hw_padding = !!tmp; 1560 } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) { 1561 config->mprq.enabled = !!tmp; 1562 } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) { 1563 config->mprq.stride_num_n = tmp; 1564 } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_SIZE, key) == 0) { 1565 config->mprq.stride_size_n = tmp; 1566 } else if (strcmp(MLX5_RX_MPRQ_MAX_MEMCPY_LEN, key) == 0) { 1567 config->mprq.max_memcpy_len = tmp; 1568 } else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) { 1569 config->mprq.min_rxqs_num = tmp; 1570 } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) { 1571 DRV_LOG(WARNING, "%s: deprecated parameter," 1572 " converted to txq_inline_max", key); 1573 config->txq_inline_max = tmp; 1574 } else if (strcmp(MLX5_TXQ_INLINE_MAX, key) == 0) { 1575 config->txq_inline_max = tmp; 1576 } else if (strcmp(MLX5_TXQ_INLINE_MIN, key) == 0) { 1577 config->txq_inline_min = tmp; 1578 } else if (strcmp(MLX5_TXQ_INLINE_MPW, key) == 0) { 1579 config->txq_inline_mpw = tmp; 1580 } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { 1581 config->txqs_inline = tmp; 1582 } else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) { 1583 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 1584 } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { 1585 config->mps = !!tmp; 1586 } else if (strcmp(MLX5_TX_DB_NC, key) == 0) { 1587 if (tmp != MLX5_TXDB_CACHED && 1588 tmp != MLX5_TXDB_NCACHED && 1589 tmp != MLX5_TXDB_HEURISTIC) { 1590 DRV_LOG(ERR, "invalid Tx doorbell " 1591 "mapping parameter"); 1592 rte_errno = EINVAL; 1593 return -rte_errno; 1594 } 1595 config->dbnc = tmp; 1596 } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) { 1597 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 1598 } else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) { 1599 DRV_LOG(WARNING, "%s: deprecated parameter," 1600 " converted to txq_inline_mpw", key); 1601 config->txq_inline_mpw = tmp; 1602 } else if (strcmp(MLX5_TX_VEC_EN, key) == 0) { 1603 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 1604 } else if (strcmp(MLX5_TX_PP, key) == 0) { 1605 if (!mod) { 1606 DRV_LOG(ERR, "Zero Tx packet pacing parameter"); 1607 rte_errno = EINVAL; 1608 return -rte_errno; 1609 } 1610 config->tx_pp = tmp; 1611 } else if (strcmp(MLX5_TX_SKEW, key) == 0) { 1612 config->tx_skew = tmp; 1613 } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) { 1614 config->rx_vec_en = !!tmp; 1615 } else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) { 1616 config->l3_vxlan_en = !!tmp; 1617 } else if (strcmp(MLX5_VF_NL_EN, key) == 0) { 1618 config->vf_nl_en = !!tmp; 1619 } else if (strcmp(MLX5_DV_ESW_EN, key) == 0) { 1620 config->dv_esw_en = !!tmp; 1621 } else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) { 1622 config->dv_flow_en = !!tmp; 1623 } else if (strcmp(MLX5_DV_XMETA_EN, key) == 0) { 1624 if (tmp != MLX5_XMETA_MODE_LEGACY && 1625 tmp != MLX5_XMETA_MODE_META16 && 1626 tmp != MLX5_XMETA_MODE_META32) { 1627 DRV_LOG(ERR, "invalid extensive " 1628 "metadata parameter"); 1629 rte_errno = EINVAL; 1630 return -rte_errno; 1631 } 1632 config->dv_xmeta_en = tmp; 1633 } else if (strcmp(MLX5_LACP_BY_USER, key) == 0) { 1634 config->lacp_by_user = !!tmp; 1635 } else if (strcmp(MLX5_MR_EXT_MEMSEG_EN, key) == 0) { 1636 config->mr_ext_memseg_en = !!tmp; 1637 } else if (strcmp(MLX5_MAX_DUMP_FILES_NUM, key) == 0) { 1638 config->max_dump_files_num = tmp; 1639 } else if (strcmp(MLX5_LRO_TIMEOUT_USEC, key) == 0) { 1640 config->lro.timeout = tmp; 1641 } else if (strcmp(MLX5_CLASS_ARG_NAME, key) == 0) { 1642 DRV_LOG(DEBUG, "class argument is %s.", val); 1643 } else if (strcmp(MLX5_HP_BUF_SIZE, key) == 0) { 1644 config->log_hp_size = tmp; 1645 } else if (strcmp(MLX5_RECLAIM_MEM, key) == 0) { 1646 if (tmp != MLX5_RCM_NONE && 1647 tmp != MLX5_RCM_LIGHT && 1648 tmp != MLX5_RCM_AGGR) { 1649 DRV_LOG(ERR, "Unrecognize %s: \"%s\"", key, val); 1650 rte_errno = EINVAL; 1651 return -rte_errno; 1652 } 1653 config->reclaim_mode = tmp; 1654 } else if (strcmp(MLX5_SYS_MEM_EN, key) == 0) { 1655 config->sys_mem_en = !!tmp; 1656 } else if (strcmp(MLX5_DECAP_EN, key) == 0) { 1657 config->decap_en = !!tmp; 1658 } else { 1659 DRV_LOG(WARNING, "%s: unknown parameter", key); 1660 rte_errno = EINVAL; 1661 return -rte_errno; 1662 } 1663 return 0; 1664 } 1665 1666 /** 1667 * Parse device parameters. 1668 * 1669 * @param config 1670 * Pointer to device configuration structure. 1671 * @param devargs 1672 * Device arguments structure. 1673 * 1674 * @return 1675 * 0 on success, a negative errno value otherwise and rte_errno is set. 1676 */ 1677 int 1678 mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) 1679 { 1680 const char **params = (const char *[]){ 1681 MLX5_RXQ_CQE_COMP_EN, 1682 MLX5_RXQ_CQE_PAD_EN, 1683 MLX5_RXQ_PKT_PAD_EN, 1684 MLX5_RX_MPRQ_EN, 1685 MLX5_RX_MPRQ_LOG_STRIDE_NUM, 1686 MLX5_RX_MPRQ_LOG_STRIDE_SIZE, 1687 MLX5_RX_MPRQ_MAX_MEMCPY_LEN, 1688 MLX5_RXQS_MIN_MPRQ, 1689 MLX5_TXQ_INLINE, 1690 MLX5_TXQ_INLINE_MIN, 1691 MLX5_TXQ_INLINE_MAX, 1692 MLX5_TXQ_INLINE_MPW, 1693 MLX5_TXQS_MIN_INLINE, 1694 MLX5_TXQS_MAX_VEC, 1695 MLX5_TXQ_MPW_EN, 1696 MLX5_TXQ_MPW_HDR_DSEG_EN, 1697 MLX5_TXQ_MAX_INLINE_LEN, 1698 MLX5_TX_DB_NC, 1699 MLX5_TX_PP, 1700 MLX5_TX_SKEW, 1701 MLX5_TX_VEC_EN, 1702 MLX5_RX_VEC_EN, 1703 MLX5_L3_VXLAN_EN, 1704 MLX5_VF_NL_EN, 1705 MLX5_DV_ESW_EN, 1706 MLX5_DV_FLOW_EN, 1707 MLX5_DV_XMETA_EN, 1708 MLX5_LACP_BY_USER, 1709 MLX5_MR_EXT_MEMSEG_EN, 1710 MLX5_REPRESENTOR, 1711 MLX5_MAX_DUMP_FILES_NUM, 1712 MLX5_LRO_TIMEOUT_USEC, 1713 MLX5_CLASS_ARG_NAME, 1714 MLX5_HP_BUF_SIZE, 1715 MLX5_RECLAIM_MEM, 1716 MLX5_SYS_MEM_EN, 1717 MLX5_DECAP_EN, 1718 NULL, 1719 }; 1720 struct rte_kvargs *kvlist; 1721 int ret = 0; 1722 int i; 1723 1724 if (devargs == NULL) 1725 return 0; 1726 /* Following UGLY cast is done to pass checkpatch. */ 1727 kvlist = rte_kvargs_parse(devargs->args, params); 1728 if (kvlist == NULL) { 1729 rte_errno = EINVAL; 1730 return -rte_errno; 1731 } 1732 /* Process parameters. */ 1733 for (i = 0; (params[i] != NULL); ++i) { 1734 if (rte_kvargs_count(kvlist, params[i])) { 1735 ret = rte_kvargs_process(kvlist, params[i], 1736 mlx5_args_check, config); 1737 if (ret) { 1738 rte_errno = EINVAL; 1739 rte_kvargs_free(kvlist); 1740 return -rte_errno; 1741 } 1742 } 1743 } 1744 rte_kvargs_free(kvlist); 1745 return 0; 1746 } 1747 1748 /** 1749 * Configures the minimal amount of data to inline into WQE 1750 * while sending packets. 1751 * 1752 * - the txq_inline_min has the maximal priority, if this 1753 * key is specified in devargs 1754 * - if DevX is enabled the inline mode is queried from the 1755 * device (HCA attributes and NIC vport context if needed). 1756 * - otherwise L2 mode (18 bytes) is assumed for ConnectX-4/4 Lx 1757 * and none (0 bytes) for other NICs 1758 * 1759 * @param spawn 1760 * Verbs device parameters (name, port, switch_info) to spawn. 1761 * @param config 1762 * Device configuration parameters. 1763 */ 1764 void 1765 mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn, 1766 struct mlx5_dev_config *config) 1767 { 1768 if (config->txq_inline_min != MLX5_ARG_UNSET) { 1769 /* Application defines size of inlined data explicitly. */ 1770 switch (spawn->pci_dev->id.device_id) { 1771 case PCI_DEVICE_ID_MELLANOX_CONNECTX4: 1772 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 1773 if (config->txq_inline_min < 1774 (int)MLX5_INLINE_HSIZE_L2) { 1775 DRV_LOG(DEBUG, 1776 "txq_inline_mix aligned to minimal" 1777 " ConnectX-4 required value %d", 1778 (int)MLX5_INLINE_HSIZE_L2); 1779 config->txq_inline_min = MLX5_INLINE_HSIZE_L2; 1780 } 1781 break; 1782 } 1783 goto exit; 1784 } 1785 if (config->hca_attr.eth_net_offloads) { 1786 /* We have DevX enabled, inline mode queried successfully. */ 1787 switch (config->hca_attr.wqe_inline_mode) { 1788 case MLX5_CAP_INLINE_MODE_L2: 1789 /* outer L2 header must be inlined. */ 1790 config->txq_inline_min = MLX5_INLINE_HSIZE_L2; 1791 goto exit; 1792 case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: 1793 /* No inline data are required by NIC. */ 1794 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 1795 config->hw_vlan_insert = 1796 config->hca_attr.wqe_vlan_insert; 1797 DRV_LOG(DEBUG, "Tx VLAN insertion is supported"); 1798 goto exit; 1799 case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: 1800 /* inline mode is defined by NIC vport context. */ 1801 if (!config->hca_attr.eth_virt) 1802 break; 1803 switch (config->hca_attr.vport_inline_mode) { 1804 case MLX5_INLINE_MODE_NONE: 1805 config->txq_inline_min = 1806 MLX5_INLINE_HSIZE_NONE; 1807 goto exit; 1808 case MLX5_INLINE_MODE_L2: 1809 config->txq_inline_min = 1810 MLX5_INLINE_HSIZE_L2; 1811 goto exit; 1812 case MLX5_INLINE_MODE_IP: 1813 config->txq_inline_min = 1814 MLX5_INLINE_HSIZE_L3; 1815 goto exit; 1816 case MLX5_INLINE_MODE_TCP_UDP: 1817 config->txq_inline_min = 1818 MLX5_INLINE_HSIZE_L4; 1819 goto exit; 1820 case MLX5_INLINE_MODE_INNER_L2: 1821 config->txq_inline_min = 1822 MLX5_INLINE_HSIZE_INNER_L2; 1823 goto exit; 1824 case MLX5_INLINE_MODE_INNER_IP: 1825 config->txq_inline_min = 1826 MLX5_INLINE_HSIZE_INNER_L3; 1827 goto exit; 1828 case MLX5_INLINE_MODE_INNER_TCP_UDP: 1829 config->txq_inline_min = 1830 MLX5_INLINE_HSIZE_INNER_L4; 1831 goto exit; 1832 } 1833 } 1834 } 1835 /* 1836 * We get here if we are unable to deduce 1837 * inline data size with DevX. Try PCI ID 1838 * to determine old NICs. 1839 */ 1840 switch (spawn->pci_dev->id.device_id) { 1841 case PCI_DEVICE_ID_MELLANOX_CONNECTX4: 1842 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 1843 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX: 1844 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF: 1845 config->txq_inline_min = MLX5_INLINE_HSIZE_L2; 1846 config->hw_vlan_insert = 0; 1847 break; 1848 case PCI_DEVICE_ID_MELLANOX_CONNECTX5: 1849 case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: 1850 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX: 1851 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: 1852 /* 1853 * These NICs support VLAN insertion from WQE and 1854 * report the wqe_vlan_insert flag. But there is the bug 1855 * and PFC control may be broken, so disable feature. 1856 */ 1857 config->hw_vlan_insert = 0; 1858 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 1859 break; 1860 default: 1861 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 1862 break; 1863 } 1864 exit: 1865 DRV_LOG(DEBUG, "min tx inline configured: %d", config->txq_inline_min); 1866 } 1867 1868 /** 1869 * Configures the metadata mask fields in the shared context. 1870 * 1871 * @param [in] dev 1872 * Pointer to Ethernet device. 1873 */ 1874 void 1875 mlx5_set_metadata_mask(struct rte_eth_dev *dev) 1876 { 1877 struct mlx5_priv *priv = dev->data->dev_private; 1878 struct mlx5_dev_ctx_shared *sh = priv->sh; 1879 uint32_t meta, mark, reg_c0; 1880 1881 reg_c0 = ~priv->vport_meta_mask; 1882 switch (priv->config.dv_xmeta_en) { 1883 case MLX5_XMETA_MODE_LEGACY: 1884 meta = UINT32_MAX; 1885 mark = MLX5_FLOW_MARK_MASK; 1886 break; 1887 case MLX5_XMETA_MODE_META16: 1888 meta = reg_c0 >> rte_bsf32(reg_c0); 1889 mark = MLX5_FLOW_MARK_MASK; 1890 break; 1891 case MLX5_XMETA_MODE_META32: 1892 meta = UINT32_MAX; 1893 mark = (reg_c0 >> rte_bsf32(reg_c0)) & MLX5_FLOW_MARK_MASK; 1894 break; 1895 default: 1896 meta = 0; 1897 mark = 0; 1898 MLX5_ASSERT(false); 1899 break; 1900 } 1901 if (sh->dv_mark_mask && sh->dv_mark_mask != mark) 1902 DRV_LOG(WARNING, "metadata MARK mask mismatche %08X:%08X", 1903 sh->dv_mark_mask, mark); 1904 else 1905 sh->dv_mark_mask = mark; 1906 if (sh->dv_meta_mask && sh->dv_meta_mask != meta) 1907 DRV_LOG(WARNING, "metadata META mask mismatche %08X:%08X", 1908 sh->dv_meta_mask, meta); 1909 else 1910 sh->dv_meta_mask = meta; 1911 if (sh->dv_regc0_mask && sh->dv_regc0_mask != reg_c0) 1912 DRV_LOG(WARNING, "metadata reg_c0 mask mismatche %08X:%08X", 1913 sh->dv_meta_mask, reg_c0); 1914 else 1915 sh->dv_regc0_mask = reg_c0; 1916 DRV_LOG(DEBUG, "metadata mode %u", priv->config.dv_xmeta_en); 1917 DRV_LOG(DEBUG, "metadata MARK mask %08X", sh->dv_mark_mask); 1918 DRV_LOG(DEBUG, "metadata META mask %08X", sh->dv_meta_mask); 1919 DRV_LOG(DEBUG, "metadata reg_c0 mask %08X", sh->dv_regc0_mask); 1920 } 1921 1922 int 1923 rte_pmd_mlx5_get_dyn_flag_names(char *names[], unsigned int n) 1924 { 1925 static const char *const dynf_names[] = { 1926 RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, 1927 RTE_MBUF_DYNFLAG_METADATA_NAME, 1928 RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME 1929 }; 1930 unsigned int i; 1931 1932 if (n < RTE_DIM(dynf_names)) 1933 return -ENOMEM; 1934 for (i = 0; i < RTE_DIM(dynf_names); i++) { 1935 if (names[i] == NULL) 1936 return -EINVAL; 1937 strcpy(names[i], dynf_names[i]); 1938 } 1939 return RTE_DIM(dynf_names); 1940 } 1941 1942 /** 1943 * Comparison callback to sort device data. 1944 * 1945 * This is meant to be used with qsort(). 1946 * 1947 * @param a[in] 1948 * Pointer to pointer to first data object. 1949 * @param b[in] 1950 * Pointer to pointer to second data object. 1951 * 1952 * @return 1953 * 0 if both objects are equal, less than 0 if the first argument is less 1954 * than the second, greater than 0 otherwise. 1955 */ 1956 int 1957 mlx5_dev_check_sibling_config(struct mlx5_priv *priv, 1958 struct mlx5_dev_config *config) 1959 { 1960 struct mlx5_dev_ctx_shared *sh = priv->sh; 1961 struct mlx5_dev_config *sh_conf = NULL; 1962 uint16_t port_id; 1963 1964 MLX5_ASSERT(sh); 1965 /* Nothing to compare for the single/first device. */ 1966 if (sh->refcnt == 1) 1967 return 0; 1968 /* Find the device with shared context. */ 1969 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 1970 struct mlx5_priv *opriv = 1971 rte_eth_devices[port_id].data->dev_private; 1972 1973 if (opriv && opriv != priv && opriv->sh == sh) { 1974 sh_conf = &opriv->config; 1975 break; 1976 } 1977 } 1978 if (!sh_conf) 1979 return 0; 1980 if (sh_conf->dv_flow_en ^ config->dv_flow_en) { 1981 DRV_LOG(ERR, "\"dv_flow_en\" configuration mismatch" 1982 " for shared %s context", sh->ibdev_name); 1983 rte_errno = EINVAL; 1984 return rte_errno; 1985 } 1986 if (sh_conf->dv_xmeta_en ^ config->dv_xmeta_en) { 1987 DRV_LOG(ERR, "\"dv_xmeta_en\" configuration mismatch" 1988 " for shared %s context", sh->ibdev_name); 1989 rte_errno = EINVAL; 1990 return rte_errno; 1991 } 1992 return 0; 1993 } 1994 1995 /** 1996 * Look for the ethernet device belonging to mlx5 driver. 1997 * 1998 * @param[in] port_id 1999 * port_id to start looking for device. 2000 * @param[in] pci_dev 2001 * Pointer to the hint PCI device. When device is being probed 2002 * the its siblings (master and preceding representors might 2003 * not have assigned driver yet (because the mlx5_os_pci_probe() 2004 * is not completed yet, for this case match on hint PCI 2005 * device may be used to detect sibling device. 2006 * 2007 * @return 2008 * port_id of found device, RTE_MAX_ETHPORT if not found. 2009 */ 2010 uint16_t 2011 mlx5_eth_find_next(uint16_t port_id, struct rte_pci_device *pci_dev) 2012 { 2013 while (port_id < RTE_MAX_ETHPORTS) { 2014 struct rte_eth_dev *dev = &rte_eth_devices[port_id]; 2015 2016 if (dev->state != RTE_ETH_DEV_UNUSED && 2017 dev->device && 2018 (dev->device == &pci_dev->device || 2019 (dev->device->driver && 2020 dev->device->driver->name && 2021 !strcmp(dev->device->driver->name, MLX5_DRIVER_NAME)))) 2022 break; 2023 port_id++; 2024 } 2025 if (port_id >= RTE_MAX_ETHPORTS) 2026 return RTE_MAX_ETHPORTS; 2027 return port_id; 2028 } 2029 2030 /** 2031 * DPDK callback to remove a PCI device. 2032 * 2033 * This function removes all Ethernet devices belong to a given PCI device. 2034 * 2035 * @param[in] pci_dev 2036 * Pointer to the PCI device. 2037 * 2038 * @return 2039 * 0 on success, the function cannot fail. 2040 */ 2041 static int 2042 mlx5_pci_remove(struct rte_pci_device *pci_dev) 2043 { 2044 uint16_t port_id; 2045 int ret = 0; 2046 2047 RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) { 2048 /* 2049 * mlx5_dev_close() is not registered to secondary process, 2050 * call the close function explicitly for secondary process. 2051 */ 2052 if (rte_eal_process_type() == RTE_PROC_SECONDARY) 2053 ret |= mlx5_dev_close(&rte_eth_devices[port_id]); 2054 else 2055 ret |= rte_eth_dev_close(port_id); 2056 } 2057 return ret == 0 ? 0 : -EIO; 2058 } 2059 2060 static const struct rte_pci_id mlx5_pci_id_map[] = { 2061 { 2062 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2063 PCI_DEVICE_ID_MELLANOX_CONNECTX4) 2064 }, 2065 { 2066 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2067 PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) 2068 }, 2069 { 2070 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2071 PCI_DEVICE_ID_MELLANOX_CONNECTX4LX) 2072 }, 2073 { 2074 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2075 PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) 2076 }, 2077 { 2078 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2079 PCI_DEVICE_ID_MELLANOX_CONNECTX5) 2080 }, 2081 { 2082 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2083 PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) 2084 }, 2085 { 2086 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2087 PCI_DEVICE_ID_MELLANOX_CONNECTX5EX) 2088 }, 2089 { 2090 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2091 PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF) 2092 }, 2093 { 2094 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2095 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) 2096 }, 2097 { 2098 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2099 PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF) 2100 }, 2101 { 2102 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2103 PCI_DEVICE_ID_MELLANOX_CONNECTX6) 2104 }, 2105 { 2106 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2107 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF) 2108 }, 2109 { 2110 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2111 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX) 2112 }, 2113 { 2114 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2115 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXVF) 2116 }, 2117 { 2118 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2119 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF) 2120 }, 2121 { 2122 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 2123 PCI_DEVICE_ID_MELLANOX_CONNECTX6LX) 2124 }, 2125 { 2126 .vendor_id = 0 2127 } 2128 }; 2129 2130 static struct mlx5_pci_driver mlx5_driver = { 2131 .driver_class = MLX5_CLASS_NET, 2132 .pci_driver = { 2133 .driver = { 2134 .name = MLX5_DRIVER_NAME, 2135 }, 2136 .id_table = mlx5_pci_id_map, 2137 .probe = mlx5_os_pci_probe, 2138 .remove = mlx5_pci_remove, 2139 .dma_map = mlx5_dma_map, 2140 .dma_unmap = mlx5_dma_unmap, 2141 .drv_flags = PCI_DRV_FLAGS, 2142 }, 2143 }; 2144 2145 /* Initialize driver log type. */ 2146 RTE_LOG_REGISTER(mlx5_logtype, pmd.net.mlx5, NOTICE) 2147 2148 /** 2149 * Driver initialization routine. 2150 */ 2151 RTE_INIT(rte_mlx5_pmd_init) 2152 { 2153 mlx5_common_init(); 2154 /* Build the static tables for Verbs conversion. */ 2155 mlx5_set_ptype_table(); 2156 mlx5_set_cksum_table(); 2157 mlx5_set_swp_types_table(); 2158 if (mlx5_glue) 2159 mlx5_pci_driver_register(&mlx5_driver); 2160 } 2161 2162 RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__); 2163 RTE_PMD_REGISTER_PCI_TABLE(net_mlx5, mlx5_pci_id_map); 2164 RTE_PMD_REGISTER_KMOD_DEP(net_mlx5, "* ib_uverbs & mlx5_core & mlx5_ib"); 2165