1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <unistd.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <errno.h> 12 13 #include <rte_malloc.h> 14 #include <rte_ethdev_driver.h> 15 #include <rte_ethdev_pci.h> 16 #include <rte_pci.h> 17 #include <rte_bus_pci.h> 18 #include <rte_common.h> 19 #include <rte_kvargs.h> 20 #include <rte_rwlock.h> 21 #include <rte_spinlock.h> 22 #include <rte_string_fns.h> 23 #include <rte_alarm.h> 24 25 #include <mlx5_glue.h> 26 #include <mlx5_devx_cmds.h> 27 #include <mlx5_common.h> 28 #include <mlx5_common_os.h> 29 #include <mlx5_common_mp.h> 30 #include <mlx5_common_pci.h> 31 #include <mlx5_malloc.h> 32 33 #include "mlx5_defs.h" 34 #include "mlx5.h" 35 #include "mlx5_utils.h" 36 #include "mlx5_rxtx.h" 37 #include "mlx5_autoconf.h" 38 #include "mlx5_mr.h" 39 #include "mlx5_flow.h" 40 #include "rte_pmd_mlx5.h" 41 42 /* Device parameter to enable RX completion queue compression. */ 43 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en" 44 45 /* Device parameter to enable RX completion entry padding to 128B. */ 46 #define MLX5_RXQ_CQE_PAD_EN "rxq_cqe_pad_en" 47 48 /* Device parameter to enable padding Rx packet to cacheline size. */ 49 #define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en" 50 51 /* Device parameter to enable Multi-Packet Rx queue. */ 52 #define MLX5_RX_MPRQ_EN "mprq_en" 53 54 /* Device parameter to configure log 2 of the number of strides for MPRQ. */ 55 #define MLX5_RX_MPRQ_LOG_STRIDE_NUM "mprq_log_stride_num" 56 57 /* Device parameter to configure log 2 of the stride size for MPRQ. */ 58 #define MLX5_RX_MPRQ_LOG_STRIDE_SIZE "mprq_log_stride_size" 59 60 /* Device parameter to limit the size of memcpy'd packet for MPRQ. */ 61 #define MLX5_RX_MPRQ_MAX_MEMCPY_LEN "mprq_max_memcpy_len" 62 63 /* Device parameter to set the minimum number of Rx queues to enable MPRQ. */ 64 #define MLX5_RXQS_MIN_MPRQ "rxqs_min_mprq" 65 66 /* Device parameter to configure inline send. Deprecated, ignored.*/ 67 #define MLX5_TXQ_INLINE "txq_inline" 68 69 /* Device parameter to limit packet size to inline with ordinary SEND. */ 70 #define MLX5_TXQ_INLINE_MAX "txq_inline_max" 71 72 /* Device parameter to configure minimal data size to inline. */ 73 #define MLX5_TXQ_INLINE_MIN "txq_inline_min" 74 75 /* Device parameter to limit packet size to inline with Enhanced MPW. */ 76 #define MLX5_TXQ_INLINE_MPW "txq_inline_mpw" 77 78 /* 79 * Device parameter to configure the number of TX queues threshold for 80 * enabling inline send. 81 */ 82 #define MLX5_TXQS_MIN_INLINE "txqs_min_inline" 83 84 /* 85 * Device parameter to configure the number of TX queues threshold for 86 * enabling vectorized Tx, deprecated, ignored (no vectorized Tx routines). 87 */ 88 #define MLX5_TXQS_MAX_VEC "txqs_max_vec" 89 90 /* Device parameter to enable multi-packet send WQEs. */ 91 #define MLX5_TXQ_MPW_EN "txq_mpw_en" 92 93 /* 94 * Device parameter to force doorbell register mapping 95 * to non-cahed region eliminating the extra write memory barrier. 96 */ 97 #define MLX5_TX_DB_NC "tx_db_nc" 98 99 /* 100 * Device parameter to include 2 dsegs in the title WQEBB. 101 * Deprecated, ignored. 102 */ 103 #define MLX5_TXQ_MPW_HDR_DSEG_EN "txq_mpw_hdr_dseg_en" 104 105 /* 106 * Device parameter to limit the size of inlining packet. 107 * Deprecated, ignored. 108 */ 109 #define MLX5_TXQ_MAX_INLINE_LEN "txq_max_inline_len" 110 111 /* 112 * Device parameter to enable Tx scheduling on timestamps 113 * and specify the packet pacing granularity in nanoseconds. 114 */ 115 #define MLX5_TX_PP "tx_pp" 116 117 /* 118 * Device parameter to specify skew in nanoseconds on Tx datapath, 119 * it represents the time between SQ start WQE processing and 120 * appearing actual packet data on the wire. 121 */ 122 #define MLX5_TX_SKEW "tx_skew" 123 124 /* 125 * Device parameter to enable hardware Tx vector. 126 * Deprecated, ignored (no vectorized Tx routines anymore). 127 */ 128 #define MLX5_TX_VEC_EN "tx_vec_en" 129 130 /* Device parameter to enable hardware Rx vector. */ 131 #define MLX5_RX_VEC_EN "rx_vec_en" 132 133 /* Allow L3 VXLAN flow creation. */ 134 #define MLX5_L3_VXLAN_EN "l3_vxlan_en" 135 136 /* Activate DV E-Switch flow steering. */ 137 #define MLX5_DV_ESW_EN "dv_esw_en" 138 139 /* Activate DV flow steering. */ 140 #define MLX5_DV_FLOW_EN "dv_flow_en" 141 142 /* Enable extensive flow metadata support. */ 143 #define MLX5_DV_XMETA_EN "dv_xmeta_en" 144 145 /* Device parameter to let the user manage the lacp traffic of bonded device */ 146 #define MLX5_LACP_BY_USER "lacp_by_user" 147 148 /* Activate Netlink support in VF mode. */ 149 #define MLX5_VF_NL_EN "vf_nl_en" 150 151 /* Enable extending memsegs when creating a MR. */ 152 #define MLX5_MR_EXT_MEMSEG_EN "mr_ext_memseg_en" 153 154 /* Select port representors to instantiate. */ 155 #define MLX5_REPRESENTOR "representor" 156 157 /* Device parameter to configure the maximum number of dump files per queue. */ 158 #define MLX5_MAX_DUMP_FILES_NUM "max_dump_files_num" 159 160 /* Configure timeout of LRO session (in microseconds). */ 161 #define MLX5_LRO_TIMEOUT_USEC "lro_timeout_usec" 162 163 /* 164 * Device parameter to configure the total data buffer size for a single 165 * hairpin queue (logarithm value). 166 */ 167 #define MLX5_HP_BUF_SIZE "hp_buf_log_sz" 168 169 /* Flow memory reclaim mode. */ 170 #define MLX5_RECLAIM_MEM "reclaim_mem_mode" 171 172 /* The default memory allocator used in PMD. */ 173 #define MLX5_SYS_MEM_EN "sys_mem_en" 174 /* Decap will be used or not. */ 175 #define MLX5_DECAP_EN "decap_en" 176 177 /* Shared memory between primary and secondary processes. */ 178 struct mlx5_shared_data *mlx5_shared_data; 179 180 /** Driver-specific log messages type. */ 181 int mlx5_logtype; 182 183 static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = 184 LIST_HEAD_INITIALIZER(); 185 static pthread_mutex_t mlx5_dev_ctx_list_mutex = PTHREAD_MUTEX_INITIALIZER; 186 187 static const struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = { 188 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 189 { 190 .size = sizeof(struct mlx5_flow_dv_encap_decap_resource), 191 .trunk_size = 64, 192 .grow_trunk = 3, 193 .grow_shift = 2, 194 .need_lock = 1, 195 .release_mem_en = 1, 196 .malloc = mlx5_malloc, 197 .free = mlx5_free, 198 .type = "mlx5_encap_decap_ipool", 199 }, 200 { 201 .size = sizeof(struct mlx5_flow_dv_push_vlan_action_resource), 202 .trunk_size = 64, 203 .grow_trunk = 3, 204 .grow_shift = 2, 205 .need_lock = 1, 206 .release_mem_en = 1, 207 .malloc = mlx5_malloc, 208 .free = mlx5_free, 209 .type = "mlx5_push_vlan_ipool", 210 }, 211 { 212 .size = sizeof(struct mlx5_flow_dv_tag_resource), 213 .trunk_size = 64, 214 .grow_trunk = 3, 215 .grow_shift = 2, 216 .need_lock = 1, 217 .release_mem_en = 1, 218 .malloc = mlx5_malloc, 219 .free = mlx5_free, 220 .type = "mlx5_tag_ipool", 221 }, 222 { 223 .size = sizeof(struct mlx5_flow_dv_port_id_action_resource), 224 .trunk_size = 64, 225 .grow_trunk = 3, 226 .grow_shift = 2, 227 .need_lock = 1, 228 .release_mem_en = 1, 229 .malloc = mlx5_malloc, 230 .free = mlx5_free, 231 .type = "mlx5_port_id_ipool", 232 }, 233 { 234 .size = sizeof(struct mlx5_flow_tbl_data_entry), 235 .trunk_size = 64, 236 .grow_trunk = 3, 237 .grow_shift = 2, 238 .need_lock = 1, 239 .release_mem_en = 1, 240 .malloc = mlx5_malloc, 241 .free = mlx5_free, 242 .type = "mlx5_jump_ipool", 243 }, 244 { 245 .size = sizeof(struct mlx5_flow_dv_sample_resource), 246 .trunk_size = 64, 247 .grow_trunk = 3, 248 .grow_shift = 2, 249 .need_lock = 1, 250 .release_mem_en = 1, 251 .malloc = mlx5_malloc, 252 .free = mlx5_free, 253 .type = "mlx5_sample_ipool", 254 }, 255 { 256 .size = sizeof(struct mlx5_flow_dv_dest_array_resource), 257 .trunk_size = 64, 258 .grow_trunk = 3, 259 .grow_shift = 2, 260 .need_lock = 1, 261 .release_mem_en = 1, 262 .malloc = mlx5_malloc, 263 .free = mlx5_free, 264 .type = "mlx5_dest_array_ipool", 265 }, 266 #endif 267 { 268 .size = sizeof(struct mlx5_flow_meter), 269 .trunk_size = 64, 270 .grow_trunk = 3, 271 .grow_shift = 2, 272 .need_lock = 1, 273 .release_mem_en = 1, 274 .malloc = mlx5_malloc, 275 .free = mlx5_free, 276 .type = "mlx5_meter_ipool", 277 }, 278 { 279 .size = sizeof(struct mlx5_flow_mreg_copy_resource), 280 .trunk_size = 64, 281 .grow_trunk = 3, 282 .grow_shift = 2, 283 .need_lock = 1, 284 .release_mem_en = 1, 285 .malloc = mlx5_malloc, 286 .free = mlx5_free, 287 .type = "mlx5_mcp_ipool", 288 }, 289 { 290 .size = (sizeof(struct mlx5_hrxq) + MLX5_RSS_HASH_KEY_LEN), 291 .trunk_size = 64, 292 .grow_trunk = 3, 293 .grow_shift = 2, 294 .need_lock = 1, 295 .release_mem_en = 1, 296 .malloc = mlx5_malloc, 297 .free = mlx5_free, 298 .type = "mlx5_hrxq_ipool", 299 }, 300 { 301 /* 302 * MLX5_IPOOL_MLX5_FLOW size varies for DV and VERBS flows. 303 * It set in run time according to PCI function configuration. 304 */ 305 .size = 0, 306 .trunk_size = 64, 307 .grow_trunk = 3, 308 .grow_shift = 2, 309 .need_lock = 1, 310 .release_mem_en = 1, 311 .malloc = mlx5_malloc, 312 .free = mlx5_free, 313 .type = "mlx5_flow_handle_ipool", 314 }, 315 { 316 .size = sizeof(struct rte_flow), 317 .trunk_size = 4096, 318 .need_lock = 1, 319 .release_mem_en = 1, 320 .malloc = mlx5_malloc, 321 .free = mlx5_free, 322 .type = "rte_flow_ipool", 323 }, 324 { 325 .size = 0, 326 .need_lock = 1, 327 .type = "mlx5_flow_rss_id_ipool", 328 }, 329 { 330 .size = 0, 331 .need_lock = 1, 332 .type = "mlx5_flow_tnl_flow_ipool", 333 }, 334 { 335 .size = 0, 336 .need_lock = 1, 337 .type = "mlx5_flow_tnl_tbl_ipool", 338 }, 339 }; 340 341 342 #define MLX5_FLOW_MIN_ID_POOL_SIZE 512 343 #define MLX5_ID_GENERATION_ARRAY_FACTOR 16 344 345 #define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 4096 346 347 /** 348 * Initialize the shared aging list information per port. 349 * 350 * @param[in] sh 351 * Pointer to mlx5_dev_ctx_shared object. 352 */ 353 static void 354 mlx5_flow_aging_init(struct mlx5_dev_ctx_shared *sh) 355 { 356 uint32_t i; 357 struct mlx5_age_info *age_info; 358 359 for (i = 0; i < sh->max_port; i++) { 360 age_info = &sh->port[i].age_info; 361 age_info->flags = 0; 362 TAILQ_INIT(&age_info->aged_counters); 363 rte_spinlock_init(&age_info->aged_sl); 364 MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER); 365 } 366 } 367 368 /** 369 * Initialize the counters management structure. 370 * 371 * @param[in] sh 372 * Pointer to mlx5_dev_ctx_shared object to free 373 */ 374 static void 375 mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh) 376 { 377 int i; 378 379 memset(&sh->cmng, 0, sizeof(sh->cmng)); 380 TAILQ_INIT(&sh->cmng.flow_counters); 381 sh->cmng.min_id = MLX5_CNT_BATCH_OFFSET; 382 sh->cmng.max_id = -1; 383 sh->cmng.last_pool_idx = POOL_IDX_INVALID; 384 rte_spinlock_init(&sh->cmng.pool_update_sl); 385 for (i = 0; i < MLX5_COUNTER_TYPE_MAX; i++) { 386 TAILQ_INIT(&sh->cmng.counters[i]); 387 rte_spinlock_init(&sh->cmng.csl[i]); 388 } 389 } 390 391 /** 392 * Destroy all the resources allocated for a counter memory management. 393 * 394 * @param[in] mng 395 * Pointer to the memory management structure. 396 */ 397 static void 398 mlx5_flow_destroy_counter_stat_mem_mng(struct mlx5_counter_stats_mem_mng *mng) 399 { 400 uint8_t *mem = (uint8_t *)(uintptr_t)mng->raws[0].data; 401 402 LIST_REMOVE(mng, next); 403 claim_zero(mlx5_devx_cmd_destroy(mng->dm)); 404 claim_zero(mlx5_glue->devx_umem_dereg(mng->umem)); 405 mlx5_free(mem); 406 } 407 408 /** 409 * Close and release all the resources of the counters management. 410 * 411 * @param[in] sh 412 * Pointer to mlx5_dev_ctx_shared object to free. 413 */ 414 static void 415 mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh) 416 { 417 struct mlx5_counter_stats_mem_mng *mng; 418 int i, j; 419 int retries = 1024; 420 421 rte_errno = 0; 422 while (--retries) { 423 rte_eal_alarm_cancel(mlx5_flow_query_alarm, sh); 424 if (rte_errno != EINPROGRESS) 425 break; 426 rte_pause(); 427 } 428 429 if (sh->cmng.pools) { 430 struct mlx5_flow_counter_pool *pool; 431 uint16_t n_valid = sh->cmng.n_valid; 432 bool fallback = sh->cmng.counter_fallback; 433 434 for (i = 0; i < n_valid; ++i) { 435 pool = sh->cmng.pools[i]; 436 if (!fallback && pool->min_dcs) 437 claim_zero(mlx5_devx_cmd_destroy 438 (pool->min_dcs)); 439 for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) { 440 struct mlx5_flow_counter *cnt = 441 MLX5_POOL_GET_CNT(pool, j); 442 443 if (cnt->action) 444 claim_zero 445 (mlx5_glue->destroy_flow_action 446 (cnt->action)); 447 if (fallback && MLX5_POOL_GET_CNT 448 (pool, j)->dcs_when_free) 449 claim_zero(mlx5_devx_cmd_destroy 450 (cnt->dcs_when_free)); 451 } 452 mlx5_free(pool); 453 } 454 mlx5_free(sh->cmng.pools); 455 } 456 mng = LIST_FIRST(&sh->cmng.mem_mngs); 457 while (mng) { 458 mlx5_flow_destroy_counter_stat_mem_mng(mng); 459 mng = LIST_FIRST(&sh->cmng.mem_mngs); 460 } 461 memset(&sh->cmng, 0, sizeof(sh->cmng)); 462 } 463 464 /** 465 * Initialize the flow resources' indexed mempool. 466 * 467 * @param[in] sh 468 * Pointer to mlx5_dev_ctx_shared object. 469 * @param[in] sh 470 * Pointer to user dev config. 471 */ 472 static void 473 mlx5_flow_ipool_create(struct mlx5_dev_ctx_shared *sh, 474 const struct mlx5_dev_config *config) 475 { 476 uint8_t i; 477 struct mlx5_indexed_pool_config cfg; 478 479 for (i = 0; i < MLX5_IPOOL_MAX; ++i) { 480 cfg = mlx5_ipool_cfg[i]; 481 switch (i) { 482 default: 483 break; 484 /* 485 * Set MLX5_IPOOL_MLX5_FLOW ipool size 486 * according to PCI function flow configuration. 487 */ 488 case MLX5_IPOOL_MLX5_FLOW: 489 cfg.size = config->dv_flow_en ? 490 sizeof(struct mlx5_flow_handle) : 491 MLX5_FLOW_HANDLE_VERBS_SIZE; 492 break; 493 } 494 if (config->reclaim_mode) 495 cfg.release_mem_en = 1; 496 sh->ipool[i] = mlx5_ipool_create(&cfg); 497 } 498 } 499 500 /** 501 * Release the flow resources' indexed mempool. 502 * 503 * @param[in] sh 504 * Pointer to mlx5_dev_ctx_shared object. 505 */ 506 static void 507 mlx5_flow_ipool_destroy(struct mlx5_dev_ctx_shared *sh) 508 { 509 uint8_t i; 510 511 for (i = 0; i < MLX5_IPOOL_MAX; ++i) 512 mlx5_ipool_destroy(sh->ipool[i]); 513 } 514 515 /* 516 * Check if dynamic flex parser for eCPRI already exists. 517 * 518 * @param dev 519 * Pointer to Ethernet device structure. 520 * 521 * @return 522 * true on exists, false on not. 523 */ 524 bool 525 mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev) 526 { 527 struct mlx5_priv *priv = dev->data->dev_private; 528 struct mlx5_flex_parser_profiles *prf = 529 &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; 530 531 return !!prf->obj; 532 } 533 534 /* 535 * Allocation of a flex parser for eCPRI. Once created, this parser related 536 * resources will be held until the device is closed. 537 * 538 * @param dev 539 * Pointer to Ethernet device structure. 540 * 541 * @return 542 * 0 on success, a negative errno value otherwise and rte_errno is set. 543 */ 544 int 545 mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev) 546 { 547 struct mlx5_priv *priv = dev->data->dev_private; 548 struct mlx5_flex_parser_profiles *prf = 549 &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; 550 struct mlx5_devx_graph_node_attr node = { 551 .modify_field_select = 0, 552 }; 553 uint32_t ids[8]; 554 int ret; 555 556 if (!priv->config.hca_attr.parse_graph_flex_node) { 557 DRV_LOG(ERR, "Dynamic flex parser is not supported " 558 "for device %s.", priv->dev_data->name); 559 return -ENOTSUP; 560 } 561 node.header_length_mode = MLX5_GRAPH_NODE_LEN_FIXED; 562 /* 8 bytes now: 4B common header + 4B message body header. */ 563 node.header_length_base_value = 0x8; 564 /* After MAC layer: Ether / VLAN. */ 565 node.in[0].arc_parse_graph_node = MLX5_GRAPH_ARC_NODE_MAC; 566 /* Type of compared condition should be 0xAEFE in the L2 layer. */ 567 node.in[0].compare_condition_value = RTE_ETHER_TYPE_ECPRI; 568 /* Sample #0: type in common header. */ 569 node.sample[0].flow_match_sample_en = 1; 570 /* Fixed offset. */ 571 node.sample[0].flow_match_sample_offset_mode = 0x0; 572 /* Only the 2nd byte will be used. */ 573 node.sample[0].flow_match_sample_field_base_offset = 0x0; 574 /* Sample #1: message payload. */ 575 node.sample[1].flow_match_sample_en = 1; 576 /* Fixed offset. */ 577 node.sample[1].flow_match_sample_offset_mode = 0x0; 578 /* 579 * Only the first two bytes will be used right now, and its offset will 580 * start after the common header that with the length of a DW(u32). 581 */ 582 node.sample[1].flow_match_sample_field_base_offset = sizeof(uint32_t); 583 prf->obj = mlx5_devx_cmd_create_flex_parser(priv->sh->ctx, &node); 584 if (!prf->obj) { 585 DRV_LOG(ERR, "Failed to create flex parser node object."); 586 return (rte_errno == 0) ? -ENODEV : -rte_errno; 587 } 588 prf->num = 2; 589 ret = mlx5_devx_cmd_query_parse_samples(prf->obj, ids, prf->num); 590 if (ret) { 591 DRV_LOG(ERR, "Failed to query sample IDs."); 592 return (rte_errno == 0) ? -ENODEV : -rte_errno; 593 } 594 prf->offset[0] = 0x0; 595 prf->offset[1] = sizeof(uint32_t); 596 prf->ids[0] = ids[0]; 597 prf->ids[1] = ids[1]; 598 return 0; 599 } 600 601 /* 602 * Destroy the flex parser node, including the parser itself, input / output 603 * arcs and DW samples. Resources could be reused then. 604 * 605 * @param dev 606 * Pointer to Ethernet device structure. 607 */ 608 static void 609 mlx5_flex_parser_ecpri_release(struct rte_eth_dev *dev) 610 { 611 struct mlx5_priv *priv = dev->data->dev_private; 612 struct mlx5_flex_parser_profiles *prf = 613 &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0]; 614 615 if (prf->obj) 616 mlx5_devx_cmd_destroy(prf->obj); 617 prf->obj = NULL; 618 } 619 620 /* 621 * Allocate Rx and Tx UARs in robust fashion. 622 * This routine handles the following UAR allocation issues: 623 * 624 * - tries to allocate the UAR with the most appropriate memory 625 * mapping type from the ones supported by the host 626 * 627 * - tries to allocate the UAR with non-NULL base address 628 * OFED 5.0.x and Upstream rdma_core before v29 returned the NULL as 629 * UAR base address if UAR was not the first object in the UAR page. 630 * It caused the PMD failure and we should try to get another UAR 631 * till we get the first one with non-NULL base address returned. 632 */ 633 static int 634 mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh, 635 const struct mlx5_dev_config *config) 636 { 637 uint32_t uar_mapping, retry; 638 int err = 0; 639 void *base_addr; 640 641 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 642 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 643 /* Control the mapping type according to the settings. */ 644 uar_mapping = (config->dbnc == MLX5_TXDB_NCACHED) ? 645 MLX5DV_UAR_ALLOC_TYPE_NC : 646 MLX5DV_UAR_ALLOC_TYPE_BF; 647 #else 648 RTE_SET_USED(config); 649 /* 650 * It seems we have no way to control the memory mapping type 651 * for the UAR, the default "Write-Combining" type is supposed. 652 * The UAR initialization on queue creation queries the 653 * actual mapping type done by Verbs/kernel and setups the 654 * PMD datapath accordingly. 655 */ 656 uar_mapping = 0; 657 #endif 658 sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->ctx, uar_mapping); 659 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 660 if (!sh->tx_uar && 661 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 662 if (config->dbnc == MLX5_TXDB_CACHED || 663 config->dbnc == MLX5_TXDB_HEURISTIC) 664 DRV_LOG(WARNING, "Devarg tx_db_nc setting " 665 "is not supported by DevX"); 666 /* 667 * In some environments like virtual machine 668 * the Write Combining mapped might be not supported 669 * and UAR allocation fails. We try "Non-Cached" 670 * mapping for the case. The tx_burst routines take 671 * the UAR mapping type into account on UAR setup 672 * on queue creation. 673 */ 674 DRV_LOG(WARNING, "Failed to allocate Tx DevX UAR (BF)"); 675 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 676 sh->tx_uar = mlx5_glue->devx_alloc_uar 677 (sh->ctx, uar_mapping); 678 } else if (!sh->tx_uar && 679 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { 680 if (config->dbnc == MLX5_TXDB_NCACHED) 681 DRV_LOG(WARNING, "Devarg tx_db_nc settings " 682 "is not supported by DevX"); 683 /* 684 * If Verbs/kernel does not support "Non-Cached" 685 * try the "Write-Combining". 686 */ 687 DRV_LOG(WARNING, "Failed to allocate Tx DevX UAR (NC)"); 688 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; 689 sh->tx_uar = mlx5_glue->devx_alloc_uar 690 (sh->ctx, uar_mapping); 691 } 692 #endif 693 if (!sh->tx_uar) { 694 DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (BF/NC)"); 695 err = ENOMEM; 696 goto exit; 697 } 698 base_addr = mlx5_os_get_devx_uar_base_addr(sh->tx_uar); 699 if (base_addr) 700 break; 701 /* 702 * The UARs are allocated by rdma_core within the 703 * IB device context, on context closure all UARs 704 * will be freed, should be no memory/object leakage. 705 */ 706 DRV_LOG(WARNING, "Retrying to allocate Tx DevX UAR"); 707 sh->tx_uar = NULL; 708 } 709 /* Check whether we finally succeeded with valid UAR allocation. */ 710 if (!sh->tx_uar) { 711 DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (NULL base)"); 712 err = ENOMEM; 713 goto exit; 714 } 715 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 716 uar_mapping = 0; 717 sh->devx_rx_uar = mlx5_glue->devx_alloc_uar 718 (sh->ctx, uar_mapping); 719 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 720 if (!sh->devx_rx_uar && 721 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 722 /* 723 * Rx UAR is used to control interrupts only, 724 * should be no datapath noticeable impact, 725 * can try "Non-Cached" mapping safely. 726 */ 727 DRV_LOG(WARNING, "Failed to allocate Rx DevX UAR (BF)"); 728 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 729 sh->devx_rx_uar = mlx5_glue->devx_alloc_uar 730 (sh->ctx, uar_mapping); 731 } 732 #endif 733 if (!sh->devx_rx_uar) { 734 DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (BF/NC)"); 735 err = ENOMEM; 736 goto exit; 737 } 738 base_addr = mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar); 739 if (base_addr) 740 break; 741 /* 742 * The UARs are allocated by rdma_core within the 743 * IB device context, on context closure all UARs 744 * will be freed, should be no memory/object leakage. 745 */ 746 DRV_LOG(WARNING, "Retrying to allocate Rx DevX UAR"); 747 sh->devx_rx_uar = NULL; 748 } 749 /* Check whether we finally succeeded with valid UAR allocation. */ 750 if (!sh->devx_rx_uar) { 751 DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (NULL base)"); 752 err = ENOMEM; 753 } 754 exit: 755 return err; 756 } 757 758 /** 759 * Allocate shared device context. If there is multiport device the 760 * master and representors will share this context, if there is single 761 * port dedicated device, the context will be used by only given 762 * port due to unification. 763 * 764 * Routine first searches the context for the specified device name, 765 * if found the shared context assumed and reference counter is incremented. 766 * If no context found the new one is created and initialized with specified 767 * device context and parameters. 768 * 769 * @param[in] spawn 770 * Pointer to the device attributes (name, port, etc). 771 * @param[in] config 772 * Pointer to device configuration structure. 773 * 774 * @return 775 * Pointer to mlx5_dev_ctx_shared object on success, 776 * otherwise NULL and rte_errno is set. 777 */ 778 struct mlx5_dev_ctx_shared * 779 mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, 780 const struct mlx5_dev_config *config) 781 { 782 struct mlx5_dev_ctx_shared *sh; 783 int err = 0; 784 uint32_t i; 785 struct mlx5_devx_tis_attr tis_attr = { 0 }; 786 787 MLX5_ASSERT(spawn); 788 /* Secondary process should not create the shared context. */ 789 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 790 pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); 791 /* Search for IB context by device name. */ 792 LIST_FOREACH(sh, &mlx5_dev_ctx_list, next) { 793 if (!strcmp(sh->ibdev_name, 794 mlx5_os_get_dev_device_name(spawn->phys_dev))) { 795 sh->refcnt++; 796 goto exit; 797 } 798 } 799 /* No device found, we have to create new shared context. */ 800 MLX5_ASSERT(spawn->max_port); 801 sh = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, 802 sizeof(struct mlx5_dev_ctx_shared) + 803 spawn->max_port * 804 sizeof(struct mlx5_dev_shared_port), 805 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 806 if (!sh) { 807 DRV_LOG(ERR, "shared context allocation failure"); 808 rte_errno = ENOMEM; 809 goto exit; 810 } 811 err = mlx5_os_open_device(spawn, config, sh); 812 if (!sh->ctx) 813 goto error; 814 err = mlx5_os_get_dev_attr(sh->ctx, &sh->device_attr); 815 if (err) { 816 DRV_LOG(DEBUG, "mlx5_os_get_dev_attr() failed"); 817 goto error; 818 } 819 sh->refcnt = 1; 820 sh->max_port = spawn->max_port; 821 strncpy(sh->ibdev_name, mlx5_os_get_ctx_device_name(sh->ctx), 822 sizeof(sh->ibdev_name) - 1); 823 strncpy(sh->ibdev_path, mlx5_os_get_ctx_device_path(sh->ctx), 824 sizeof(sh->ibdev_path) - 1); 825 /* 826 * Setting port_id to max unallowed value means 827 * there is no interrupt subhandler installed for 828 * the given port index i. 829 */ 830 for (i = 0; i < sh->max_port; i++) { 831 sh->port[i].ih_port_id = RTE_MAX_ETHPORTS; 832 sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS; 833 } 834 sh->pd = mlx5_glue->alloc_pd(sh->ctx); 835 if (sh->pd == NULL) { 836 DRV_LOG(ERR, "PD allocation failure"); 837 err = ENOMEM; 838 goto error; 839 } 840 if (sh->devx) { 841 /* Query the EQN for this core. */ 842 err = mlx5_glue->devx_query_eqn(sh->ctx, 0, &sh->eqn); 843 if (err) { 844 rte_errno = errno; 845 DRV_LOG(ERR, "Failed to query event queue number %d.", 846 rte_errno); 847 goto error; 848 } 849 err = mlx5_os_get_pdn(sh->pd, &sh->pdn); 850 if (err) { 851 DRV_LOG(ERR, "Fail to extract pdn from PD"); 852 goto error; 853 } 854 sh->td = mlx5_devx_cmd_create_td(sh->ctx); 855 if (!sh->td) { 856 DRV_LOG(ERR, "TD allocation failure"); 857 err = ENOMEM; 858 goto error; 859 } 860 tis_attr.transport_domain = sh->td->id; 861 sh->tis = mlx5_devx_cmd_create_tis(sh->ctx, &tis_attr); 862 if (!sh->tis) { 863 DRV_LOG(ERR, "TIS allocation failure"); 864 err = ENOMEM; 865 goto error; 866 } 867 err = mlx5_alloc_rxtx_uars(sh, config); 868 if (err) 869 goto error; 870 MLX5_ASSERT(sh->tx_uar); 871 MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->tx_uar)); 872 873 MLX5_ASSERT(sh->devx_rx_uar); 874 MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar)); 875 } 876 #ifndef RTE_ARCH_64 877 /* Initialize UAR access locks for 32bit implementations. */ 878 rte_spinlock_init(&sh->uar_lock_cq); 879 for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++) 880 rte_spinlock_init(&sh->uar_lock[i]); 881 #endif 882 /* 883 * Once the device is added to the list of memory event 884 * callback, its global MR cache table cannot be expanded 885 * on the fly because of deadlock. If it overflows, lookup 886 * should be done by searching MR list linearly, which is slow. 887 * 888 * At this point the device is not added to the memory 889 * event list yet, context is just being created. 890 */ 891 err = mlx5_mr_btree_init(&sh->share_cache.cache, 892 MLX5_MR_BTREE_CACHE_N * 2, 893 spawn->pci_dev->device.numa_node); 894 if (err) { 895 err = rte_errno; 896 goto error; 897 } 898 mlx5_os_set_reg_mr_cb(&sh->share_cache.reg_mr_cb, 899 &sh->share_cache.dereg_mr_cb); 900 mlx5_os_dev_shared_handler_install(sh); 901 sh->cnt_id_tbl = mlx5_l3t_create(MLX5_L3T_TYPE_DWORD); 902 if (!sh->cnt_id_tbl) { 903 err = rte_errno; 904 goto error; 905 } 906 mlx5_flow_aging_init(sh); 907 mlx5_flow_counters_mng_init(sh); 908 mlx5_flow_ipool_create(sh, config); 909 /* Add device to memory callback list. */ 910 rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); 911 LIST_INSERT_HEAD(&mlx5_shared_data->mem_event_cb_list, 912 sh, mem_event_cb); 913 rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); 914 /* Add context to the global device list. */ 915 LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next); 916 exit: 917 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 918 return sh; 919 error: 920 pthread_mutex_destroy(&sh->txpp.mutex); 921 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 922 MLX5_ASSERT(sh); 923 if (sh->cnt_id_tbl) 924 mlx5_l3t_destroy(sh->cnt_id_tbl); 925 if (sh->tis) 926 claim_zero(mlx5_devx_cmd_destroy(sh->tis)); 927 if (sh->td) 928 claim_zero(mlx5_devx_cmd_destroy(sh->td)); 929 if (sh->devx_rx_uar) 930 mlx5_glue->devx_free_uar(sh->devx_rx_uar); 931 if (sh->tx_uar) 932 mlx5_glue->devx_free_uar(sh->tx_uar); 933 if (sh->pd) 934 claim_zero(mlx5_glue->dealloc_pd(sh->pd)); 935 if (sh->ctx) 936 claim_zero(mlx5_glue->close_device(sh->ctx)); 937 mlx5_free(sh); 938 MLX5_ASSERT(err > 0); 939 rte_errno = err; 940 return NULL; 941 } 942 943 /** 944 * Free shared IB device context. Decrement counter and if zero free 945 * all allocated resources and close handles. 946 * 947 * @param[in] sh 948 * Pointer to mlx5_dev_ctx_shared object to free 949 */ 950 void 951 mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh) 952 { 953 pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); 954 #ifdef RTE_LIBRTE_MLX5_DEBUG 955 /* Check the object presence in the list. */ 956 struct mlx5_dev_ctx_shared *lctx; 957 958 LIST_FOREACH(lctx, &mlx5_dev_ctx_list, next) 959 if (lctx == sh) 960 break; 961 MLX5_ASSERT(lctx); 962 if (lctx != sh) { 963 DRV_LOG(ERR, "Freeing non-existing shared IB context"); 964 goto exit; 965 } 966 #endif 967 MLX5_ASSERT(sh); 968 MLX5_ASSERT(sh->refcnt); 969 /* Secondary process should not free the shared context. */ 970 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 971 if (--sh->refcnt) 972 goto exit; 973 /* Remove from memory callback device list. */ 974 rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); 975 LIST_REMOVE(sh, mem_event_cb); 976 rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); 977 /* Release created Memory Regions. */ 978 mlx5_mr_release_cache(&sh->share_cache); 979 /* Remove context from the global device list. */ 980 LIST_REMOVE(sh, next); 981 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 982 /* 983 * Ensure there is no async event handler installed. 984 * Only primary process handles async device events. 985 **/ 986 mlx5_flow_counters_mng_close(sh); 987 mlx5_flow_ipool_destroy(sh); 988 mlx5_os_dev_shared_handler_uninstall(sh); 989 if (sh->cnt_id_tbl) { 990 mlx5_l3t_destroy(sh->cnt_id_tbl); 991 sh->cnt_id_tbl = NULL; 992 } 993 if (sh->tx_uar) { 994 mlx5_glue->devx_free_uar(sh->tx_uar); 995 sh->tx_uar = NULL; 996 } 997 if (sh->pd) 998 claim_zero(mlx5_glue->dealloc_pd(sh->pd)); 999 if (sh->tis) 1000 claim_zero(mlx5_devx_cmd_destroy(sh->tis)); 1001 if (sh->td) 1002 claim_zero(mlx5_devx_cmd_destroy(sh->td)); 1003 if (sh->devx_rx_uar) 1004 mlx5_glue->devx_free_uar(sh->devx_rx_uar); 1005 if (sh->ctx) 1006 claim_zero(mlx5_glue->close_device(sh->ctx)); 1007 pthread_mutex_destroy(&sh->txpp.mutex); 1008 mlx5_free(sh); 1009 return; 1010 exit: 1011 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1012 } 1013 1014 /** 1015 * Destroy table hash list. 1016 * 1017 * @param[in] priv 1018 * Pointer to the private device data structure. 1019 */ 1020 void 1021 mlx5_free_table_hash_list(struct mlx5_priv *priv) 1022 { 1023 struct mlx5_dev_ctx_shared *sh = priv->sh; 1024 1025 if (!sh->flow_tbls) 1026 return; 1027 mlx5_hlist_destroy(sh->flow_tbls); 1028 } 1029 1030 /** 1031 * Initialize flow table hash list and create the root tables entry 1032 * for each domain. 1033 * 1034 * @param[in] priv 1035 * Pointer to the private device data structure. 1036 * 1037 * @return 1038 * Zero on success, positive error code otherwise. 1039 */ 1040 int 1041 mlx5_alloc_table_hash_list(struct mlx5_priv *priv __rte_unused) 1042 { 1043 int err = 0; 1044 /* Tables are only used in DV and DR modes. */ 1045 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1046 struct mlx5_dev_ctx_shared *sh = priv->sh; 1047 char s[MLX5_HLIST_NAMESIZE]; 1048 1049 MLX5_ASSERT(sh); 1050 snprintf(s, sizeof(s), "%s_flow_table", priv->sh->ibdev_name); 1051 sh->flow_tbls = mlx5_hlist_create(s, MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE, 1052 0, 0, flow_dv_tbl_create_cb, NULL, 1053 flow_dv_tbl_remove_cb); 1054 if (!sh->flow_tbls) { 1055 DRV_LOG(ERR, "flow tables with hash creation failed."); 1056 err = ENOMEM; 1057 return err; 1058 } 1059 sh->flow_tbls->ctx = sh; 1060 #ifndef HAVE_MLX5DV_DR 1061 struct rte_flow_error error; 1062 struct rte_eth_dev *dev = &rte_eth_devices[priv->dev_data->port_id]; 1063 1064 /* 1065 * In case we have not DR support, the zero tables should be created 1066 * because DV expect to see them even if they cannot be created by 1067 * RDMA-CORE. 1068 */ 1069 if (!flow_dv_tbl_resource_get(dev, 0, 0, 0, 0, NULL, 0, 1, &error) || 1070 !flow_dv_tbl_resource_get(dev, 0, 1, 0, 0, NULL, 0, 1, &error) || 1071 !flow_dv_tbl_resource_get(dev, 0, 0, 1, 0, NULL, 0, 1, &error)) { 1072 err = ENOMEM; 1073 goto error; 1074 } 1075 return err; 1076 error: 1077 mlx5_free_table_hash_list(priv); 1078 #endif /* HAVE_MLX5DV_DR */ 1079 #endif 1080 return err; 1081 } 1082 1083 /** 1084 * Retrieve integer value from environment variable. 1085 * 1086 * @param[in] name 1087 * Environment variable name. 1088 * 1089 * @return 1090 * Integer value, 0 if the variable is not set. 1091 */ 1092 int 1093 mlx5_getenv_int(const char *name) 1094 { 1095 const char *val = getenv(name); 1096 1097 if (val == NULL) 1098 return 0; 1099 return atoi(val); 1100 } 1101 1102 /** 1103 * DPDK callback to add udp tunnel port 1104 * 1105 * @param[in] dev 1106 * A pointer to eth_dev 1107 * @param[in] udp_tunnel 1108 * A pointer to udp tunnel 1109 * 1110 * @return 1111 * 0 on valid udp ports and tunnels, -ENOTSUP otherwise. 1112 */ 1113 int 1114 mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev __rte_unused, 1115 struct rte_eth_udp_tunnel *udp_tunnel) 1116 { 1117 MLX5_ASSERT(udp_tunnel != NULL); 1118 if (udp_tunnel->prot_type == RTE_TUNNEL_TYPE_VXLAN && 1119 udp_tunnel->udp_port == 4789) 1120 return 0; 1121 if (udp_tunnel->prot_type == RTE_TUNNEL_TYPE_VXLAN_GPE && 1122 udp_tunnel->udp_port == 4790) 1123 return 0; 1124 return -ENOTSUP; 1125 } 1126 1127 /** 1128 * Initialize process private data structure. 1129 * 1130 * @param dev 1131 * Pointer to Ethernet device structure. 1132 * 1133 * @return 1134 * 0 on success, a negative errno value otherwise and rte_errno is set. 1135 */ 1136 int 1137 mlx5_proc_priv_init(struct rte_eth_dev *dev) 1138 { 1139 struct mlx5_priv *priv = dev->data->dev_private; 1140 struct mlx5_proc_priv *ppriv; 1141 size_t ppriv_size; 1142 1143 /* 1144 * UAR register table follows the process private structure. BlueFlame 1145 * registers for Tx queues are stored in the table. 1146 */ 1147 ppriv_size = 1148 sizeof(struct mlx5_proc_priv) + priv->txqs_n * sizeof(void *); 1149 ppriv = mlx5_malloc(MLX5_MEM_RTE, ppriv_size, RTE_CACHE_LINE_SIZE, 1150 dev->device->numa_node); 1151 if (!ppriv) { 1152 rte_errno = ENOMEM; 1153 return -rte_errno; 1154 } 1155 ppriv->uar_table_sz = ppriv_size; 1156 dev->process_private = ppriv; 1157 return 0; 1158 } 1159 1160 /** 1161 * Un-initialize process private data structure. 1162 * 1163 * @param dev 1164 * Pointer to Ethernet device structure. 1165 */ 1166 static void 1167 mlx5_proc_priv_uninit(struct rte_eth_dev *dev) 1168 { 1169 if (!dev->process_private) 1170 return; 1171 mlx5_free(dev->process_private); 1172 dev->process_private = NULL; 1173 } 1174 1175 /** 1176 * DPDK callback to close the device. 1177 * 1178 * Destroy all queues and objects, free memory. 1179 * 1180 * @param dev 1181 * Pointer to Ethernet device structure. 1182 */ 1183 int 1184 mlx5_dev_close(struct rte_eth_dev *dev) 1185 { 1186 struct mlx5_priv *priv = dev->data->dev_private; 1187 unsigned int i; 1188 int ret; 1189 1190 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1191 /* Check if process_private released. */ 1192 if (!dev->process_private) 1193 return 0; 1194 mlx5_tx_uar_uninit_secondary(dev); 1195 mlx5_proc_priv_uninit(dev); 1196 rte_eth_dev_release_port(dev); 1197 return 0; 1198 } 1199 if (!priv->sh) 1200 return 0; 1201 DRV_LOG(DEBUG, "port %u closing device \"%s\"", 1202 dev->data->port_id, 1203 ((priv->sh->ctx != NULL) ? 1204 mlx5_os_get_ctx_device_name(priv->sh->ctx) : "")); 1205 /* 1206 * If default mreg copy action is removed at the stop stage, 1207 * the search will return none and nothing will be done anymore. 1208 */ 1209 mlx5_flow_stop_default(dev); 1210 mlx5_traffic_disable(dev); 1211 /* 1212 * If all the flows are already flushed in the device stop stage, 1213 * then this will return directly without any action. 1214 */ 1215 mlx5_flow_list_flush(dev, &priv->flows, true); 1216 mlx5_shared_action_flush(dev); 1217 mlx5_flow_meter_flush(dev, NULL); 1218 /* Prevent crashes when queues are still in use. */ 1219 dev->rx_pkt_burst = removed_rx_burst; 1220 dev->tx_pkt_burst = removed_tx_burst; 1221 rte_wmb(); 1222 /* Disable datapath on secondary process. */ 1223 mlx5_mp_os_req_stop_rxtx(dev); 1224 /* Free the eCPRI flex parser resource. */ 1225 mlx5_flex_parser_ecpri_release(dev); 1226 if (priv->rxqs != NULL) { 1227 /* XXX race condition if mlx5_rx_burst() is still running. */ 1228 usleep(1000); 1229 for (i = 0; (i != priv->rxqs_n); ++i) 1230 mlx5_rxq_release(dev, i); 1231 priv->rxqs_n = 0; 1232 priv->rxqs = NULL; 1233 } 1234 if (priv->txqs != NULL) { 1235 /* XXX race condition if mlx5_tx_burst() is still running. */ 1236 usleep(1000); 1237 for (i = 0; (i != priv->txqs_n); ++i) 1238 mlx5_txq_release(dev, i); 1239 priv->txqs_n = 0; 1240 priv->txqs = NULL; 1241 } 1242 mlx5_proc_priv_uninit(dev); 1243 if (priv->drop_queue.hrxq) 1244 mlx5_drop_action_destroy(dev); 1245 if (priv->mreg_cp_tbl) 1246 mlx5_hlist_destroy(priv->mreg_cp_tbl); 1247 mlx5_mprq_free_mp(dev); 1248 mlx5_os_free_shared_dr(priv); 1249 if (priv->rss_conf.rss_key != NULL) 1250 mlx5_free(priv->rss_conf.rss_key); 1251 if (priv->reta_idx != NULL) 1252 mlx5_free(priv->reta_idx); 1253 if (priv->config.vf) 1254 mlx5_os_mac_addr_flush(dev); 1255 if (priv->nl_socket_route >= 0) 1256 close(priv->nl_socket_route); 1257 if (priv->nl_socket_rdma >= 0) 1258 close(priv->nl_socket_rdma); 1259 if (priv->vmwa_context) 1260 mlx5_vlan_vmwa_exit(priv->vmwa_context); 1261 ret = mlx5_hrxq_verify(dev); 1262 if (ret) 1263 DRV_LOG(WARNING, "port %u some hash Rx queue still remain", 1264 dev->data->port_id); 1265 ret = mlx5_ind_table_obj_verify(dev); 1266 if (ret) 1267 DRV_LOG(WARNING, "port %u some indirection table still remain", 1268 dev->data->port_id); 1269 ret = mlx5_rxq_obj_verify(dev); 1270 if (ret) 1271 DRV_LOG(WARNING, "port %u some Rx queue objects still remain", 1272 dev->data->port_id); 1273 ret = mlx5_rxq_verify(dev); 1274 if (ret) 1275 DRV_LOG(WARNING, "port %u some Rx queues still remain", 1276 dev->data->port_id); 1277 ret = mlx5_txq_obj_verify(dev); 1278 if (ret) 1279 DRV_LOG(WARNING, "port %u some Verbs Tx queue still remain", 1280 dev->data->port_id); 1281 ret = mlx5_txq_verify(dev); 1282 if (ret) 1283 DRV_LOG(WARNING, "port %u some Tx queues still remain", 1284 dev->data->port_id); 1285 ret = mlx5_flow_verify(dev); 1286 if (ret) 1287 DRV_LOG(WARNING, "port %u some flows still remain", 1288 dev->data->port_id); 1289 mlx5_cache_list_destroy(&priv->hrxqs); 1290 /* 1291 * Free the shared context in last turn, because the cleanup 1292 * routines above may use some shared fields, like 1293 * mlx5_os_mac_addr_flush() uses ibdev_path for retrieveing 1294 * ifindex if Netlink fails. 1295 */ 1296 mlx5_free_shared_dev_ctx(priv->sh); 1297 if (priv->domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) { 1298 unsigned int c = 0; 1299 uint16_t port_id; 1300 1301 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 1302 struct mlx5_priv *opriv = 1303 rte_eth_devices[port_id].data->dev_private; 1304 1305 if (!opriv || 1306 opriv->domain_id != priv->domain_id || 1307 &rte_eth_devices[port_id] == dev) 1308 continue; 1309 ++c; 1310 break; 1311 } 1312 if (!c) 1313 claim_zero(rte_eth_switch_domain_free(priv->domain_id)); 1314 } 1315 memset(priv, 0, sizeof(*priv)); 1316 priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; 1317 /* 1318 * Reset mac_addrs to NULL such that it is not freed as part of 1319 * rte_eth_dev_release_port(). mac_addrs is part of dev_private so 1320 * it is freed when dev_private is freed. 1321 */ 1322 dev->data->mac_addrs = NULL; 1323 return 0; 1324 } 1325 1326 /** 1327 * Verify and store value for device argument. 1328 * 1329 * @param[in] key 1330 * Key argument to verify. 1331 * @param[in] val 1332 * Value associated with key. 1333 * @param opaque 1334 * User data. 1335 * 1336 * @return 1337 * 0 on success, a negative errno value otherwise and rte_errno is set. 1338 */ 1339 static int 1340 mlx5_args_check(const char *key, const char *val, void *opaque) 1341 { 1342 struct mlx5_dev_config *config = opaque; 1343 unsigned long mod; 1344 signed long tmp; 1345 1346 /* No-op, port representors are processed in mlx5_dev_spawn(). */ 1347 if (!strcmp(MLX5_REPRESENTOR, key)) 1348 return 0; 1349 errno = 0; 1350 tmp = strtol(val, NULL, 0); 1351 if (errno) { 1352 rte_errno = errno; 1353 DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val); 1354 return -rte_errno; 1355 } 1356 if (tmp < 0 && strcmp(MLX5_TX_PP, key) && strcmp(MLX5_TX_SKEW, key)) { 1357 /* Negative values are acceptable for some keys only. */ 1358 rte_errno = EINVAL; 1359 DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val); 1360 return -rte_errno; 1361 } 1362 mod = tmp >= 0 ? tmp : -tmp; 1363 if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { 1364 config->cqe_comp = !!tmp; 1365 } else if (strcmp(MLX5_RXQ_CQE_PAD_EN, key) == 0) { 1366 config->cqe_pad = !!tmp; 1367 } else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) { 1368 config->hw_padding = !!tmp; 1369 } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) { 1370 config->mprq.enabled = !!tmp; 1371 } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) { 1372 config->mprq.stride_num_n = tmp; 1373 } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_SIZE, key) == 0) { 1374 config->mprq.stride_size_n = tmp; 1375 } else if (strcmp(MLX5_RX_MPRQ_MAX_MEMCPY_LEN, key) == 0) { 1376 config->mprq.max_memcpy_len = tmp; 1377 } else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) { 1378 config->mprq.min_rxqs_num = tmp; 1379 } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) { 1380 DRV_LOG(WARNING, "%s: deprecated parameter," 1381 " converted to txq_inline_max", key); 1382 config->txq_inline_max = tmp; 1383 } else if (strcmp(MLX5_TXQ_INLINE_MAX, key) == 0) { 1384 config->txq_inline_max = tmp; 1385 } else if (strcmp(MLX5_TXQ_INLINE_MIN, key) == 0) { 1386 config->txq_inline_min = tmp; 1387 } else if (strcmp(MLX5_TXQ_INLINE_MPW, key) == 0) { 1388 config->txq_inline_mpw = tmp; 1389 } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { 1390 config->txqs_inline = tmp; 1391 } else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) { 1392 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 1393 } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { 1394 config->mps = !!tmp; 1395 } else if (strcmp(MLX5_TX_DB_NC, key) == 0) { 1396 if (tmp != MLX5_TXDB_CACHED && 1397 tmp != MLX5_TXDB_NCACHED && 1398 tmp != MLX5_TXDB_HEURISTIC) { 1399 DRV_LOG(ERR, "invalid Tx doorbell " 1400 "mapping parameter"); 1401 rte_errno = EINVAL; 1402 return -rte_errno; 1403 } 1404 config->dbnc = tmp; 1405 } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) { 1406 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 1407 } else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) { 1408 DRV_LOG(WARNING, "%s: deprecated parameter," 1409 " converted to txq_inline_mpw", key); 1410 config->txq_inline_mpw = tmp; 1411 } else if (strcmp(MLX5_TX_VEC_EN, key) == 0) { 1412 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 1413 } else if (strcmp(MLX5_TX_PP, key) == 0) { 1414 if (!mod) { 1415 DRV_LOG(ERR, "Zero Tx packet pacing parameter"); 1416 rte_errno = EINVAL; 1417 return -rte_errno; 1418 } 1419 config->tx_pp = tmp; 1420 } else if (strcmp(MLX5_TX_SKEW, key) == 0) { 1421 config->tx_skew = tmp; 1422 } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) { 1423 config->rx_vec_en = !!tmp; 1424 } else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) { 1425 config->l3_vxlan_en = !!tmp; 1426 } else if (strcmp(MLX5_VF_NL_EN, key) == 0) { 1427 config->vf_nl_en = !!tmp; 1428 } else if (strcmp(MLX5_DV_ESW_EN, key) == 0) { 1429 config->dv_esw_en = !!tmp; 1430 } else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) { 1431 config->dv_flow_en = !!tmp; 1432 } else if (strcmp(MLX5_DV_XMETA_EN, key) == 0) { 1433 if (tmp != MLX5_XMETA_MODE_LEGACY && 1434 tmp != MLX5_XMETA_MODE_META16 && 1435 tmp != MLX5_XMETA_MODE_META32 && 1436 tmp != MLX5_XMETA_MODE_MISS_INFO) { 1437 DRV_LOG(ERR, "invalid extensive " 1438 "metadata parameter"); 1439 rte_errno = EINVAL; 1440 return -rte_errno; 1441 } 1442 if (tmp != MLX5_XMETA_MODE_MISS_INFO) 1443 config->dv_xmeta_en = tmp; 1444 else 1445 config->dv_miss_info = 1; 1446 } else if (strcmp(MLX5_LACP_BY_USER, key) == 0) { 1447 config->lacp_by_user = !!tmp; 1448 } else if (strcmp(MLX5_MR_EXT_MEMSEG_EN, key) == 0) { 1449 config->mr_ext_memseg_en = !!tmp; 1450 } else if (strcmp(MLX5_MAX_DUMP_FILES_NUM, key) == 0) { 1451 config->max_dump_files_num = tmp; 1452 } else if (strcmp(MLX5_LRO_TIMEOUT_USEC, key) == 0) { 1453 config->lro.timeout = tmp; 1454 } else if (strcmp(MLX5_CLASS_ARG_NAME, key) == 0) { 1455 DRV_LOG(DEBUG, "class argument is %s.", val); 1456 } else if (strcmp(MLX5_HP_BUF_SIZE, key) == 0) { 1457 config->log_hp_size = tmp; 1458 } else if (strcmp(MLX5_RECLAIM_MEM, key) == 0) { 1459 if (tmp != MLX5_RCM_NONE && 1460 tmp != MLX5_RCM_LIGHT && 1461 tmp != MLX5_RCM_AGGR) { 1462 DRV_LOG(ERR, "Unrecognize %s: \"%s\"", key, val); 1463 rte_errno = EINVAL; 1464 return -rte_errno; 1465 } 1466 config->reclaim_mode = tmp; 1467 } else if (strcmp(MLX5_SYS_MEM_EN, key) == 0) { 1468 config->sys_mem_en = !!tmp; 1469 } else if (strcmp(MLX5_DECAP_EN, key) == 0) { 1470 config->decap_en = !!tmp; 1471 } else { 1472 DRV_LOG(WARNING, "%s: unknown parameter", key); 1473 rte_errno = EINVAL; 1474 return -rte_errno; 1475 } 1476 return 0; 1477 } 1478 1479 /** 1480 * Parse device parameters. 1481 * 1482 * @param config 1483 * Pointer to device configuration structure. 1484 * @param devargs 1485 * Device arguments structure. 1486 * 1487 * @return 1488 * 0 on success, a negative errno value otherwise and rte_errno is set. 1489 */ 1490 int 1491 mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) 1492 { 1493 const char **params = (const char *[]){ 1494 MLX5_RXQ_CQE_COMP_EN, 1495 MLX5_RXQ_CQE_PAD_EN, 1496 MLX5_RXQ_PKT_PAD_EN, 1497 MLX5_RX_MPRQ_EN, 1498 MLX5_RX_MPRQ_LOG_STRIDE_NUM, 1499 MLX5_RX_MPRQ_LOG_STRIDE_SIZE, 1500 MLX5_RX_MPRQ_MAX_MEMCPY_LEN, 1501 MLX5_RXQS_MIN_MPRQ, 1502 MLX5_TXQ_INLINE, 1503 MLX5_TXQ_INLINE_MIN, 1504 MLX5_TXQ_INLINE_MAX, 1505 MLX5_TXQ_INLINE_MPW, 1506 MLX5_TXQS_MIN_INLINE, 1507 MLX5_TXQS_MAX_VEC, 1508 MLX5_TXQ_MPW_EN, 1509 MLX5_TXQ_MPW_HDR_DSEG_EN, 1510 MLX5_TXQ_MAX_INLINE_LEN, 1511 MLX5_TX_DB_NC, 1512 MLX5_TX_PP, 1513 MLX5_TX_SKEW, 1514 MLX5_TX_VEC_EN, 1515 MLX5_RX_VEC_EN, 1516 MLX5_L3_VXLAN_EN, 1517 MLX5_VF_NL_EN, 1518 MLX5_DV_ESW_EN, 1519 MLX5_DV_FLOW_EN, 1520 MLX5_DV_XMETA_EN, 1521 MLX5_LACP_BY_USER, 1522 MLX5_MR_EXT_MEMSEG_EN, 1523 MLX5_REPRESENTOR, 1524 MLX5_MAX_DUMP_FILES_NUM, 1525 MLX5_LRO_TIMEOUT_USEC, 1526 MLX5_CLASS_ARG_NAME, 1527 MLX5_HP_BUF_SIZE, 1528 MLX5_RECLAIM_MEM, 1529 MLX5_SYS_MEM_EN, 1530 MLX5_DECAP_EN, 1531 NULL, 1532 }; 1533 struct rte_kvargs *kvlist; 1534 int ret = 0; 1535 int i; 1536 1537 if (devargs == NULL) 1538 return 0; 1539 /* Following UGLY cast is done to pass checkpatch. */ 1540 kvlist = rte_kvargs_parse(devargs->args, params); 1541 if (kvlist == NULL) { 1542 rte_errno = EINVAL; 1543 return -rte_errno; 1544 } 1545 /* Process parameters. */ 1546 for (i = 0; (params[i] != NULL); ++i) { 1547 if (rte_kvargs_count(kvlist, params[i])) { 1548 ret = rte_kvargs_process(kvlist, params[i], 1549 mlx5_args_check, config); 1550 if (ret) { 1551 rte_errno = EINVAL; 1552 rte_kvargs_free(kvlist); 1553 return -rte_errno; 1554 } 1555 } 1556 } 1557 rte_kvargs_free(kvlist); 1558 return 0; 1559 } 1560 1561 /** 1562 * Configures the minimal amount of data to inline into WQE 1563 * while sending packets. 1564 * 1565 * - the txq_inline_min has the maximal priority, if this 1566 * key is specified in devargs 1567 * - if DevX is enabled the inline mode is queried from the 1568 * device (HCA attributes and NIC vport context if needed). 1569 * - otherwise L2 mode (18 bytes) is assumed for ConnectX-4/4 Lx 1570 * and none (0 bytes) for other NICs 1571 * 1572 * @param spawn 1573 * Verbs device parameters (name, port, switch_info) to spawn. 1574 * @param config 1575 * Device configuration parameters. 1576 */ 1577 void 1578 mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn, 1579 struct mlx5_dev_config *config) 1580 { 1581 if (config->txq_inline_min != MLX5_ARG_UNSET) { 1582 /* Application defines size of inlined data explicitly. */ 1583 switch (spawn->pci_dev->id.device_id) { 1584 case PCI_DEVICE_ID_MELLANOX_CONNECTX4: 1585 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 1586 if (config->txq_inline_min < 1587 (int)MLX5_INLINE_HSIZE_L2) { 1588 DRV_LOG(DEBUG, 1589 "txq_inline_mix aligned to minimal" 1590 " ConnectX-4 required value %d", 1591 (int)MLX5_INLINE_HSIZE_L2); 1592 config->txq_inline_min = MLX5_INLINE_HSIZE_L2; 1593 } 1594 break; 1595 } 1596 goto exit; 1597 } 1598 if (config->hca_attr.eth_net_offloads) { 1599 /* We have DevX enabled, inline mode queried successfully. */ 1600 switch (config->hca_attr.wqe_inline_mode) { 1601 case MLX5_CAP_INLINE_MODE_L2: 1602 /* outer L2 header must be inlined. */ 1603 config->txq_inline_min = MLX5_INLINE_HSIZE_L2; 1604 goto exit; 1605 case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: 1606 /* No inline data are required by NIC. */ 1607 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 1608 config->hw_vlan_insert = 1609 config->hca_attr.wqe_vlan_insert; 1610 DRV_LOG(DEBUG, "Tx VLAN insertion is supported"); 1611 goto exit; 1612 case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: 1613 /* inline mode is defined by NIC vport context. */ 1614 if (!config->hca_attr.eth_virt) 1615 break; 1616 switch (config->hca_attr.vport_inline_mode) { 1617 case MLX5_INLINE_MODE_NONE: 1618 config->txq_inline_min = 1619 MLX5_INLINE_HSIZE_NONE; 1620 goto exit; 1621 case MLX5_INLINE_MODE_L2: 1622 config->txq_inline_min = 1623 MLX5_INLINE_HSIZE_L2; 1624 goto exit; 1625 case MLX5_INLINE_MODE_IP: 1626 config->txq_inline_min = 1627 MLX5_INLINE_HSIZE_L3; 1628 goto exit; 1629 case MLX5_INLINE_MODE_TCP_UDP: 1630 config->txq_inline_min = 1631 MLX5_INLINE_HSIZE_L4; 1632 goto exit; 1633 case MLX5_INLINE_MODE_INNER_L2: 1634 config->txq_inline_min = 1635 MLX5_INLINE_HSIZE_INNER_L2; 1636 goto exit; 1637 case MLX5_INLINE_MODE_INNER_IP: 1638 config->txq_inline_min = 1639 MLX5_INLINE_HSIZE_INNER_L3; 1640 goto exit; 1641 case MLX5_INLINE_MODE_INNER_TCP_UDP: 1642 config->txq_inline_min = 1643 MLX5_INLINE_HSIZE_INNER_L4; 1644 goto exit; 1645 } 1646 } 1647 } 1648 /* 1649 * We get here if we are unable to deduce 1650 * inline data size with DevX. Try PCI ID 1651 * to determine old NICs. 1652 */ 1653 switch (spawn->pci_dev->id.device_id) { 1654 case PCI_DEVICE_ID_MELLANOX_CONNECTX4: 1655 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 1656 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX: 1657 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF: 1658 config->txq_inline_min = MLX5_INLINE_HSIZE_L2; 1659 config->hw_vlan_insert = 0; 1660 break; 1661 case PCI_DEVICE_ID_MELLANOX_CONNECTX5: 1662 case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: 1663 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX: 1664 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: 1665 /* 1666 * These NICs support VLAN insertion from WQE and 1667 * report the wqe_vlan_insert flag. But there is the bug 1668 * and PFC control may be broken, so disable feature. 1669 */ 1670 config->hw_vlan_insert = 0; 1671 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 1672 break; 1673 default: 1674 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 1675 break; 1676 } 1677 exit: 1678 DRV_LOG(DEBUG, "min tx inline configured: %d", config->txq_inline_min); 1679 } 1680 1681 /** 1682 * Configures the metadata mask fields in the shared context. 1683 * 1684 * @param [in] dev 1685 * Pointer to Ethernet device. 1686 */ 1687 void 1688 mlx5_set_metadata_mask(struct rte_eth_dev *dev) 1689 { 1690 struct mlx5_priv *priv = dev->data->dev_private; 1691 struct mlx5_dev_ctx_shared *sh = priv->sh; 1692 uint32_t meta, mark, reg_c0; 1693 1694 reg_c0 = ~priv->vport_meta_mask; 1695 switch (priv->config.dv_xmeta_en) { 1696 case MLX5_XMETA_MODE_LEGACY: 1697 meta = UINT32_MAX; 1698 mark = MLX5_FLOW_MARK_MASK; 1699 break; 1700 case MLX5_XMETA_MODE_META16: 1701 meta = reg_c0 >> rte_bsf32(reg_c0); 1702 mark = MLX5_FLOW_MARK_MASK; 1703 break; 1704 case MLX5_XMETA_MODE_META32: 1705 meta = UINT32_MAX; 1706 mark = (reg_c0 >> rte_bsf32(reg_c0)) & MLX5_FLOW_MARK_MASK; 1707 break; 1708 default: 1709 meta = 0; 1710 mark = 0; 1711 MLX5_ASSERT(false); 1712 break; 1713 } 1714 if (sh->dv_mark_mask && sh->dv_mark_mask != mark) 1715 DRV_LOG(WARNING, "metadata MARK mask mismatche %08X:%08X", 1716 sh->dv_mark_mask, mark); 1717 else 1718 sh->dv_mark_mask = mark; 1719 if (sh->dv_meta_mask && sh->dv_meta_mask != meta) 1720 DRV_LOG(WARNING, "metadata META mask mismatche %08X:%08X", 1721 sh->dv_meta_mask, meta); 1722 else 1723 sh->dv_meta_mask = meta; 1724 if (sh->dv_regc0_mask && sh->dv_regc0_mask != reg_c0) 1725 DRV_LOG(WARNING, "metadata reg_c0 mask mismatche %08X:%08X", 1726 sh->dv_meta_mask, reg_c0); 1727 else 1728 sh->dv_regc0_mask = reg_c0; 1729 DRV_LOG(DEBUG, "metadata mode %u", priv->config.dv_xmeta_en); 1730 DRV_LOG(DEBUG, "metadata MARK mask %08X", sh->dv_mark_mask); 1731 DRV_LOG(DEBUG, "metadata META mask %08X", sh->dv_meta_mask); 1732 DRV_LOG(DEBUG, "metadata reg_c0 mask %08X", sh->dv_regc0_mask); 1733 } 1734 1735 int 1736 rte_pmd_mlx5_get_dyn_flag_names(char *names[], unsigned int n) 1737 { 1738 static const char *const dynf_names[] = { 1739 RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, 1740 RTE_MBUF_DYNFLAG_METADATA_NAME, 1741 RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME 1742 }; 1743 unsigned int i; 1744 1745 if (n < RTE_DIM(dynf_names)) 1746 return -ENOMEM; 1747 for (i = 0; i < RTE_DIM(dynf_names); i++) { 1748 if (names[i] == NULL) 1749 return -EINVAL; 1750 strcpy(names[i], dynf_names[i]); 1751 } 1752 return RTE_DIM(dynf_names); 1753 } 1754 1755 /** 1756 * Comparison callback to sort device data. 1757 * 1758 * This is meant to be used with qsort(). 1759 * 1760 * @param a[in] 1761 * Pointer to pointer to first data object. 1762 * @param b[in] 1763 * Pointer to pointer to second data object. 1764 * 1765 * @return 1766 * 0 if both objects are equal, less than 0 if the first argument is less 1767 * than the second, greater than 0 otherwise. 1768 */ 1769 int 1770 mlx5_dev_check_sibling_config(struct mlx5_priv *priv, 1771 struct mlx5_dev_config *config) 1772 { 1773 struct mlx5_dev_ctx_shared *sh = priv->sh; 1774 struct mlx5_dev_config *sh_conf = NULL; 1775 uint16_t port_id; 1776 1777 MLX5_ASSERT(sh); 1778 /* Nothing to compare for the single/first device. */ 1779 if (sh->refcnt == 1) 1780 return 0; 1781 /* Find the device with shared context. */ 1782 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 1783 struct mlx5_priv *opriv = 1784 rte_eth_devices[port_id].data->dev_private; 1785 1786 if (opriv && opriv != priv && opriv->sh == sh) { 1787 sh_conf = &opriv->config; 1788 break; 1789 } 1790 } 1791 if (!sh_conf) 1792 return 0; 1793 if (sh_conf->dv_flow_en ^ config->dv_flow_en) { 1794 DRV_LOG(ERR, "\"dv_flow_en\" configuration mismatch" 1795 " for shared %s context", sh->ibdev_name); 1796 rte_errno = EINVAL; 1797 return rte_errno; 1798 } 1799 if (sh_conf->dv_xmeta_en ^ config->dv_xmeta_en) { 1800 DRV_LOG(ERR, "\"dv_xmeta_en\" configuration mismatch" 1801 " for shared %s context", sh->ibdev_name); 1802 rte_errno = EINVAL; 1803 return rte_errno; 1804 } 1805 return 0; 1806 } 1807 1808 /** 1809 * Look for the ethernet device belonging to mlx5 driver. 1810 * 1811 * @param[in] port_id 1812 * port_id to start looking for device. 1813 * @param[in] pci_dev 1814 * Pointer to the hint PCI device. When device is being probed 1815 * the its siblings (master and preceding representors might 1816 * not have assigned driver yet (because the mlx5_os_pci_probe() 1817 * is not completed yet, for this case match on hint PCI 1818 * device may be used to detect sibling device. 1819 * 1820 * @return 1821 * port_id of found device, RTE_MAX_ETHPORT if not found. 1822 */ 1823 uint16_t 1824 mlx5_eth_find_next(uint16_t port_id, struct rte_pci_device *pci_dev) 1825 { 1826 while (port_id < RTE_MAX_ETHPORTS) { 1827 struct rte_eth_dev *dev = &rte_eth_devices[port_id]; 1828 1829 if (dev->state != RTE_ETH_DEV_UNUSED && 1830 dev->device && 1831 (dev->device == &pci_dev->device || 1832 (dev->device->driver && 1833 dev->device->driver->name && 1834 !strcmp(dev->device->driver->name, MLX5_DRIVER_NAME)))) 1835 break; 1836 port_id++; 1837 } 1838 if (port_id >= RTE_MAX_ETHPORTS) 1839 return RTE_MAX_ETHPORTS; 1840 return port_id; 1841 } 1842 1843 /** 1844 * DPDK callback to remove a PCI device. 1845 * 1846 * This function removes all Ethernet devices belong to a given PCI device. 1847 * 1848 * @param[in] pci_dev 1849 * Pointer to the PCI device. 1850 * 1851 * @return 1852 * 0 on success, the function cannot fail. 1853 */ 1854 static int 1855 mlx5_pci_remove(struct rte_pci_device *pci_dev) 1856 { 1857 uint16_t port_id; 1858 int ret = 0; 1859 1860 RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) { 1861 /* 1862 * mlx5_dev_close() is not registered to secondary process, 1863 * call the close function explicitly for secondary process. 1864 */ 1865 if (rte_eal_process_type() == RTE_PROC_SECONDARY) 1866 ret |= mlx5_dev_close(&rte_eth_devices[port_id]); 1867 else 1868 ret |= rte_eth_dev_close(port_id); 1869 } 1870 return ret == 0 ? 0 : -EIO; 1871 } 1872 1873 static const struct rte_pci_id mlx5_pci_id_map[] = { 1874 { 1875 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1876 PCI_DEVICE_ID_MELLANOX_CONNECTX4) 1877 }, 1878 { 1879 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1880 PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) 1881 }, 1882 { 1883 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1884 PCI_DEVICE_ID_MELLANOX_CONNECTX4LX) 1885 }, 1886 { 1887 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1888 PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) 1889 }, 1890 { 1891 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1892 PCI_DEVICE_ID_MELLANOX_CONNECTX5) 1893 }, 1894 { 1895 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1896 PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) 1897 }, 1898 { 1899 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1900 PCI_DEVICE_ID_MELLANOX_CONNECTX5EX) 1901 }, 1902 { 1903 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1904 PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF) 1905 }, 1906 { 1907 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1908 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) 1909 }, 1910 { 1911 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1912 PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF) 1913 }, 1914 { 1915 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1916 PCI_DEVICE_ID_MELLANOX_CONNECTX6) 1917 }, 1918 { 1919 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1920 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF) 1921 }, 1922 { 1923 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1924 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX) 1925 }, 1926 { 1927 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1928 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXVF) 1929 }, 1930 { 1931 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1932 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF) 1933 }, 1934 { 1935 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1936 PCI_DEVICE_ID_MELLANOX_CONNECTX6LX) 1937 }, 1938 { 1939 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1940 PCI_DEVICE_ID_MELLANOX_CONNECTX7) 1941 }, 1942 { 1943 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1944 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF) 1945 }, 1946 { 1947 .vendor_id = 0 1948 } 1949 }; 1950 1951 static struct mlx5_pci_driver mlx5_driver = { 1952 .driver_class = MLX5_CLASS_NET, 1953 .pci_driver = { 1954 .driver = { 1955 .name = MLX5_DRIVER_NAME, 1956 }, 1957 .id_table = mlx5_pci_id_map, 1958 .probe = mlx5_os_pci_probe, 1959 .remove = mlx5_pci_remove, 1960 .dma_map = mlx5_dma_map, 1961 .dma_unmap = mlx5_dma_unmap, 1962 .drv_flags = PCI_DRV_FLAGS, 1963 }, 1964 }; 1965 1966 /* Initialize driver log type. */ 1967 RTE_LOG_REGISTER(mlx5_logtype, pmd.net.mlx5, NOTICE) 1968 1969 /** 1970 * Driver initialization routine. 1971 */ 1972 RTE_INIT(rte_mlx5_pmd_init) 1973 { 1974 mlx5_common_init(); 1975 /* Build the static tables for Verbs conversion. */ 1976 mlx5_set_ptype_table(); 1977 mlx5_set_cksum_table(); 1978 mlx5_set_swp_types_table(); 1979 if (mlx5_glue) 1980 mlx5_pci_driver_register(&mlx5_driver); 1981 } 1982 1983 RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__); 1984 RTE_PMD_REGISTER_PCI_TABLE(net_mlx5, mlx5_pci_id_map); 1985 RTE_PMD_REGISTER_KMOD_DEP(net_mlx5, "* ib_uverbs & mlx5_core & mlx5_ib"); 1986