1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <unistd.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <errno.h> 12 #include <fcntl.h> 13 14 #include <rte_malloc.h> 15 #include <ethdev_driver.h> 16 #include <rte_pci.h> 17 #include <bus_pci_driver.h> 18 #include <rte_common.h> 19 #include <rte_kvargs.h> 20 #include <rte_rwlock.h> 21 #include <rte_spinlock.h> 22 #include <rte_string_fns.h> 23 #include <rte_eal_paging.h> 24 #include <rte_alarm.h> 25 #include <rte_cycles.h> 26 #include <rte_interrupts.h> 27 28 #include <mlx5_glue.h> 29 #include <mlx5_devx_cmds.h> 30 #include <mlx5_common.h> 31 #include <mlx5_common_os.h> 32 #include <mlx5_common_mp.h> 33 #include <mlx5_malloc.h> 34 35 #include "mlx5_defs.h" 36 #include "mlx5.h" 37 #include "mlx5_utils.h" 38 #include "mlx5_rxtx.h" 39 #include "mlx5_rx.h" 40 #include "mlx5_tx.h" 41 #include "mlx5_autoconf.h" 42 #include "mlx5_flow.h" 43 #include "mlx5_flow_os.h" 44 #include "rte_pmd_mlx5.h" 45 46 #define MLX5_ETH_DRIVER_NAME mlx5_eth 47 48 /* Device parameter to enable RX completion queue compression. */ 49 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en" 50 51 /* Device parameter to enable padding Rx packet to cacheline size. */ 52 #define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en" 53 54 /* Device parameter to enable Multi-Packet Rx queue. */ 55 #define MLX5_RX_MPRQ_EN "mprq_en" 56 57 /* Device parameter to configure log 2 of the number of strides for MPRQ. */ 58 #define MLX5_RX_MPRQ_LOG_STRIDE_NUM "mprq_log_stride_num" 59 60 /* Device parameter to configure log 2 of the stride size for MPRQ. */ 61 #define MLX5_RX_MPRQ_LOG_STRIDE_SIZE "mprq_log_stride_size" 62 63 /* Device parameter to limit the size of memcpy'd packet for MPRQ. */ 64 #define MLX5_RX_MPRQ_MAX_MEMCPY_LEN "mprq_max_memcpy_len" 65 66 /* Device parameter to set the minimum number of Rx queues to enable MPRQ. */ 67 #define MLX5_RXQS_MIN_MPRQ "rxqs_min_mprq" 68 69 /* Device parameter to configure inline send. Deprecated, ignored.*/ 70 #define MLX5_TXQ_INLINE "txq_inline" 71 72 /* Device parameter to limit packet size to inline with ordinary SEND. */ 73 #define MLX5_TXQ_INLINE_MAX "txq_inline_max" 74 75 /* Device parameter to configure minimal data size to inline. */ 76 #define MLX5_TXQ_INLINE_MIN "txq_inline_min" 77 78 /* Device parameter to limit packet size to inline with Enhanced MPW. */ 79 #define MLX5_TXQ_INLINE_MPW "txq_inline_mpw" 80 81 /* 82 * Device parameter to configure the number of TX queues threshold for 83 * enabling inline send. 84 */ 85 #define MLX5_TXQS_MIN_INLINE "txqs_min_inline" 86 87 /* 88 * Device parameter to configure the number of TX queues threshold for 89 * enabling vectorized Tx, deprecated, ignored (no vectorized Tx routines). 90 */ 91 #define MLX5_TXQS_MAX_VEC "txqs_max_vec" 92 93 /* Device parameter to enable multi-packet send WQEs. */ 94 #define MLX5_TXQ_MPW_EN "txq_mpw_en" 95 96 /* 97 * Device parameter to include 2 dsegs in the title WQEBB. 98 * Deprecated, ignored. 99 */ 100 #define MLX5_TXQ_MPW_HDR_DSEG_EN "txq_mpw_hdr_dseg_en" 101 102 /* 103 * Device parameter to limit the size of inlining packet. 104 * Deprecated, ignored. 105 */ 106 #define MLX5_TXQ_MAX_INLINE_LEN "txq_max_inline_len" 107 108 /* 109 * Device parameter to enable Tx scheduling on timestamps 110 * and specify the packet pacing granularity in nanoseconds. 111 */ 112 #define MLX5_TX_PP "tx_pp" 113 114 /* 115 * Device parameter to specify skew in nanoseconds on Tx datapath, 116 * it represents the time between SQ start WQE processing and 117 * appearing actual packet data on the wire. 118 */ 119 #define MLX5_TX_SKEW "tx_skew" 120 121 /* 122 * Device parameter to enable hardware Tx vector. 123 * Deprecated, ignored (no vectorized Tx routines anymore). 124 */ 125 #define MLX5_TX_VEC_EN "tx_vec_en" 126 127 /* Device parameter to enable hardware Rx vector. */ 128 #define MLX5_RX_VEC_EN "rx_vec_en" 129 130 /* Allow L3 VXLAN flow creation. */ 131 #define MLX5_L3_VXLAN_EN "l3_vxlan_en" 132 133 /* Activate DV E-Switch flow steering. */ 134 #define MLX5_DV_ESW_EN "dv_esw_en" 135 136 /* Activate DV flow steering. */ 137 #define MLX5_DV_FLOW_EN "dv_flow_en" 138 139 /* Enable extensive flow metadata support. */ 140 #define MLX5_DV_XMETA_EN "dv_xmeta_en" 141 142 /* Device parameter to let the user manage the lacp traffic of bonded device */ 143 #define MLX5_LACP_BY_USER "lacp_by_user" 144 145 /* Activate Netlink support in VF mode. */ 146 #define MLX5_VF_NL_EN "vf_nl_en" 147 148 /* Select port representors to instantiate. */ 149 #define MLX5_REPRESENTOR "representor" 150 151 /* Device parameter to configure the maximum number of dump files per queue. */ 152 #define MLX5_MAX_DUMP_FILES_NUM "max_dump_files_num" 153 154 /* Configure timeout of LRO session (in microseconds). */ 155 #define MLX5_LRO_TIMEOUT_USEC "lro_timeout_usec" 156 157 /* 158 * Device parameter to configure the total data buffer size for a single 159 * hairpin queue (logarithm value). 160 */ 161 #define MLX5_HP_BUF_SIZE "hp_buf_log_sz" 162 163 /* Flow memory reclaim mode. */ 164 #define MLX5_RECLAIM_MEM "reclaim_mem_mode" 165 166 /* Decap will be used or not. */ 167 #define MLX5_DECAP_EN "decap_en" 168 169 /* Device parameter to configure allow or prevent duplicate rules pattern. */ 170 #define MLX5_ALLOW_DUPLICATE_PATTERN "allow_duplicate_pattern" 171 172 /* Device parameter to configure the delay drop when creating Rxqs. */ 173 #define MLX5_DELAY_DROP "delay_drop" 174 175 /* Device parameter to create the fdb default rule in PMD */ 176 #define MLX5_FDB_DEFAULT_RULE_EN "fdb_def_rule_en" 177 178 /* HW steering counter configuration. */ 179 #define MLX5_HWS_CNT_SERVICE_CORE "service_core" 180 181 /* HW steering counter's query interval. */ 182 #define MLX5_HWS_CNT_CYCLE_TIME "svc_cycle_time" 183 184 /* Device parameter to control representor matching in ingress/egress flows with HWS. */ 185 #define MLX5_REPR_MATCHING_EN "repr_matching_en" 186 187 /* Shared memory between primary and secondary processes. */ 188 struct mlx5_shared_data *mlx5_shared_data; 189 190 /** Driver-specific log messages type. */ 191 int mlx5_logtype; 192 193 static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = 194 LIST_HEAD_INITIALIZER(); 195 static pthread_mutex_t mlx5_dev_ctx_list_mutex; 196 static const struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = { 197 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H) 198 [MLX5_IPOOL_DECAP_ENCAP] = { 199 .size = sizeof(struct mlx5_flow_dv_encap_decap_resource), 200 .trunk_size = 64, 201 .grow_trunk = 3, 202 .grow_shift = 2, 203 .need_lock = 1, 204 .release_mem_en = 1, 205 .malloc = mlx5_malloc, 206 .free = mlx5_free, 207 .type = "mlx5_encap_decap_ipool", 208 }, 209 [MLX5_IPOOL_PUSH_VLAN] = { 210 .size = sizeof(struct mlx5_flow_dv_push_vlan_action_resource), 211 .trunk_size = 64, 212 .grow_trunk = 3, 213 .grow_shift = 2, 214 .need_lock = 1, 215 .release_mem_en = 1, 216 .malloc = mlx5_malloc, 217 .free = mlx5_free, 218 .type = "mlx5_push_vlan_ipool", 219 }, 220 [MLX5_IPOOL_TAG] = { 221 .size = sizeof(struct mlx5_flow_dv_tag_resource), 222 .trunk_size = 64, 223 .grow_trunk = 3, 224 .grow_shift = 2, 225 .need_lock = 1, 226 .release_mem_en = 0, 227 .per_core_cache = (1 << 16), 228 .malloc = mlx5_malloc, 229 .free = mlx5_free, 230 .type = "mlx5_tag_ipool", 231 }, 232 [MLX5_IPOOL_PORT_ID] = { 233 .size = sizeof(struct mlx5_flow_dv_port_id_action_resource), 234 .trunk_size = 64, 235 .grow_trunk = 3, 236 .grow_shift = 2, 237 .need_lock = 1, 238 .release_mem_en = 1, 239 .malloc = mlx5_malloc, 240 .free = mlx5_free, 241 .type = "mlx5_port_id_ipool", 242 }, 243 [MLX5_IPOOL_JUMP] = { 244 .size = sizeof(struct mlx5_flow_tbl_data_entry), 245 .trunk_size = 64, 246 .grow_trunk = 3, 247 .grow_shift = 2, 248 .need_lock = 1, 249 .release_mem_en = 1, 250 .malloc = mlx5_malloc, 251 .free = mlx5_free, 252 .type = "mlx5_jump_ipool", 253 }, 254 [MLX5_IPOOL_SAMPLE] = { 255 .size = sizeof(struct mlx5_flow_dv_sample_resource), 256 .trunk_size = 64, 257 .grow_trunk = 3, 258 .grow_shift = 2, 259 .need_lock = 1, 260 .release_mem_en = 1, 261 .malloc = mlx5_malloc, 262 .free = mlx5_free, 263 .type = "mlx5_sample_ipool", 264 }, 265 [MLX5_IPOOL_DEST_ARRAY] = { 266 .size = sizeof(struct mlx5_flow_dv_dest_array_resource), 267 .trunk_size = 64, 268 .grow_trunk = 3, 269 .grow_shift = 2, 270 .need_lock = 1, 271 .release_mem_en = 1, 272 .malloc = mlx5_malloc, 273 .free = mlx5_free, 274 .type = "mlx5_dest_array_ipool", 275 }, 276 [MLX5_IPOOL_TUNNEL_ID] = { 277 .size = sizeof(struct mlx5_flow_tunnel), 278 .trunk_size = MLX5_MAX_TUNNELS, 279 .need_lock = 1, 280 .release_mem_en = 1, 281 .type = "mlx5_tunnel_offload", 282 }, 283 [MLX5_IPOOL_TNL_TBL_ID] = { 284 .size = 0, 285 .need_lock = 1, 286 .type = "mlx5_flow_tnl_tbl_ipool", 287 }, 288 #endif 289 [MLX5_IPOOL_MTR] = { 290 /** 291 * The ipool index should grow continually from small to big, 292 * for meter idx, so not set grow_trunk to avoid meter index 293 * not jump continually. 294 */ 295 .size = sizeof(struct mlx5_legacy_flow_meter), 296 .trunk_size = 64, 297 .need_lock = 1, 298 .release_mem_en = 1, 299 .malloc = mlx5_malloc, 300 .free = mlx5_free, 301 .type = "mlx5_meter_ipool", 302 }, 303 [MLX5_IPOOL_MCP] = { 304 .size = sizeof(struct mlx5_flow_mreg_copy_resource), 305 .trunk_size = 64, 306 .grow_trunk = 3, 307 .grow_shift = 2, 308 .need_lock = 1, 309 .release_mem_en = 1, 310 .malloc = mlx5_malloc, 311 .free = mlx5_free, 312 .type = "mlx5_mcp_ipool", 313 }, 314 [MLX5_IPOOL_HRXQ] = { 315 .size = (sizeof(struct mlx5_hrxq) + MLX5_RSS_HASH_KEY_LEN), 316 .trunk_size = 64, 317 .grow_trunk = 3, 318 .grow_shift = 2, 319 .need_lock = 1, 320 .release_mem_en = 1, 321 .malloc = mlx5_malloc, 322 .free = mlx5_free, 323 .type = "mlx5_hrxq_ipool", 324 }, 325 [MLX5_IPOOL_MLX5_FLOW] = { 326 /* 327 * MLX5_IPOOL_MLX5_FLOW size varies for DV and VERBS flows. 328 * It set in run time according to PCI function configuration. 329 */ 330 .size = 0, 331 .trunk_size = 64, 332 .grow_trunk = 3, 333 .grow_shift = 2, 334 .need_lock = 1, 335 .release_mem_en = 0, 336 .per_core_cache = 1 << 19, 337 .malloc = mlx5_malloc, 338 .free = mlx5_free, 339 .type = "mlx5_flow_handle_ipool", 340 }, 341 [MLX5_IPOOL_RTE_FLOW] = { 342 .size = sizeof(struct rte_flow), 343 .trunk_size = 4096, 344 .need_lock = 1, 345 .release_mem_en = 1, 346 .malloc = mlx5_malloc, 347 .free = mlx5_free, 348 .type = "rte_flow_ipool", 349 }, 350 [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID] = { 351 .size = 0, 352 .need_lock = 1, 353 .type = "mlx5_flow_rss_id_ipool", 354 }, 355 [MLX5_IPOOL_RSS_SHARED_ACTIONS] = { 356 .size = sizeof(struct mlx5_shared_action_rss), 357 .trunk_size = 64, 358 .grow_trunk = 3, 359 .grow_shift = 2, 360 .need_lock = 1, 361 .release_mem_en = 1, 362 .malloc = mlx5_malloc, 363 .free = mlx5_free, 364 .type = "mlx5_shared_action_rss", 365 }, 366 [MLX5_IPOOL_MTR_POLICY] = { 367 /** 368 * The ipool index should grow continually from small to big, 369 * for policy idx, so not set grow_trunk to avoid policy index 370 * not jump continually. 371 */ 372 .size = sizeof(struct mlx5_flow_meter_sub_policy), 373 .trunk_size = 64, 374 .need_lock = 1, 375 .release_mem_en = 1, 376 .malloc = mlx5_malloc, 377 .free = mlx5_free, 378 .type = "mlx5_meter_policy_ipool", 379 }, 380 }; 381 382 #define MLX5_FLOW_MIN_ID_POOL_SIZE 512 383 #define MLX5_ID_GENERATION_ARRAY_FACTOR 16 384 385 #define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 1024 386 387 /** 388 * Decide whether representor ID is a HPF(host PF) port on BF2. 389 * 390 * @param dev 391 * Pointer to Ethernet device structure. 392 * 393 * @return 394 * Non-zero if HPF, otherwise 0. 395 */ 396 bool 397 mlx5_is_hpf(struct rte_eth_dev *dev) 398 { 399 struct mlx5_priv *priv = dev->data->dev_private; 400 uint16_t repr = MLX5_REPRESENTOR_REPR(priv->representor_id); 401 int type = MLX5_REPRESENTOR_TYPE(priv->representor_id); 402 403 return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_VF && 404 MLX5_REPRESENTOR_REPR(-1) == repr; 405 } 406 407 /** 408 * Decide whether representor ID is a SF port representor. 409 * 410 * @param dev 411 * Pointer to Ethernet device structure. 412 * 413 * @return 414 * Non-zero if HPF, otherwise 0. 415 */ 416 bool 417 mlx5_is_sf_repr(struct rte_eth_dev *dev) 418 { 419 struct mlx5_priv *priv = dev->data->dev_private; 420 int type = MLX5_REPRESENTOR_TYPE(priv->representor_id); 421 422 return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_SF; 423 } 424 425 /** 426 * Initialize the ASO aging management structure. 427 * 428 * @param[in] sh 429 * Pointer to mlx5_dev_ctx_shared object to free 430 * 431 * @return 432 * 0 on success, a negative errno value otherwise and rte_errno is set. 433 */ 434 int 435 mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh) 436 { 437 int err; 438 439 if (sh->aso_age_mng) 440 return 0; 441 sh->aso_age_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->aso_age_mng), 442 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 443 if (!sh->aso_age_mng) { 444 DRV_LOG(ERR, "aso_age_mng allocation was failed."); 445 rte_errno = ENOMEM; 446 return -ENOMEM; 447 } 448 err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_FLOW_HIT, 1); 449 if (err) { 450 mlx5_free(sh->aso_age_mng); 451 return -1; 452 } 453 rte_rwlock_init(&sh->aso_age_mng->resize_rwl); 454 rte_spinlock_init(&sh->aso_age_mng->free_sl); 455 LIST_INIT(&sh->aso_age_mng->free); 456 return 0; 457 } 458 459 /** 460 * Close and release all the resources of the ASO aging management structure. 461 * 462 * @param[in] sh 463 * Pointer to mlx5_dev_ctx_shared object to free. 464 */ 465 static void 466 mlx5_flow_aso_age_mng_close(struct mlx5_dev_ctx_shared *sh) 467 { 468 int i, j; 469 470 mlx5_aso_flow_hit_queue_poll_stop(sh); 471 mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_FLOW_HIT); 472 if (sh->aso_age_mng->pools) { 473 struct mlx5_aso_age_pool *pool; 474 475 for (i = 0; i < sh->aso_age_mng->next; ++i) { 476 pool = sh->aso_age_mng->pools[i]; 477 claim_zero(mlx5_devx_cmd_destroy 478 (pool->flow_hit_aso_obj)); 479 for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) 480 if (pool->actions[j].dr_action) 481 claim_zero 482 (mlx5_flow_os_destroy_flow_action 483 (pool->actions[j].dr_action)); 484 mlx5_free(pool); 485 } 486 mlx5_free(sh->aso_age_mng->pools); 487 } 488 mlx5_free(sh->aso_age_mng); 489 } 490 491 /** 492 * Initialize the shared aging list information per port. 493 * 494 * @param[in] sh 495 * Pointer to mlx5_dev_ctx_shared object. 496 */ 497 static void 498 mlx5_flow_aging_init(struct mlx5_dev_ctx_shared *sh) 499 { 500 uint32_t i; 501 struct mlx5_age_info *age_info; 502 503 /* 504 * In HW steering, aging information structure is initialized later 505 * during configure function. 506 */ 507 if (sh->config.dv_flow_en == 2) 508 return; 509 for (i = 0; i < sh->max_port; i++) { 510 age_info = &sh->port[i].age_info; 511 age_info->flags = 0; 512 TAILQ_INIT(&age_info->aged_counters); 513 LIST_INIT(&age_info->aged_aso); 514 rte_spinlock_init(&age_info->aged_sl); 515 MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER); 516 } 517 } 518 519 /** 520 * DV flow counter mode detect and config. 521 * 522 * @param dev 523 * Pointer to rte_eth_dev structure. 524 * 525 */ 526 void 527 mlx5_flow_counter_mode_config(struct rte_eth_dev *dev __rte_unused) 528 { 529 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 530 struct mlx5_priv *priv = dev->data->dev_private; 531 struct mlx5_dev_ctx_shared *sh = priv->sh; 532 struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr; 533 bool fallback; 534 535 #ifndef HAVE_IBV_DEVX_ASYNC 536 fallback = true; 537 #else 538 fallback = false; 539 if (!sh->cdev->config.devx || !sh->config.dv_flow_en || 540 !hca_attr->flow_counters_dump || 541 !(hca_attr->flow_counter_bulk_alloc_bitmap & 0x4) || 542 (mlx5_flow_dv_discover_counter_offset_support(dev) == -ENOTSUP)) 543 fallback = true; 544 #endif 545 if (fallback) 546 DRV_LOG(INFO, "Use fall-back DV counter management. Flow " 547 "counter dump:%d, bulk_alloc_bitmap:0x%hhx.", 548 hca_attr->flow_counters_dump, 549 hca_attr->flow_counter_bulk_alloc_bitmap); 550 /* Initialize fallback mode only on the port initializes sh. */ 551 if (sh->refcnt == 1) 552 sh->sws_cmng.counter_fallback = fallback; 553 else if (fallback != sh->sws_cmng.counter_fallback) 554 DRV_LOG(WARNING, "Port %d in sh has different fallback mode " 555 "with others:%d.", PORT_ID(priv), fallback); 556 #endif 557 } 558 559 /** 560 * Initialize the counters management structure. 561 * 562 * @param[in] sh 563 * Pointer to mlx5_dev_ctx_shared object to free 564 */ 565 static void 566 mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh) 567 { 568 int i, j; 569 570 if (sh->config.dv_flow_en < 2) { 571 memset(&sh->sws_cmng, 0, sizeof(sh->sws_cmng)); 572 TAILQ_INIT(&sh->sws_cmng.flow_counters); 573 sh->sws_cmng.min_id = MLX5_CNT_BATCH_OFFSET; 574 sh->sws_cmng.max_id = -1; 575 sh->sws_cmng.last_pool_idx = POOL_IDX_INVALID; 576 rte_spinlock_init(&sh->sws_cmng.pool_update_sl); 577 for (i = 0; i < MLX5_COUNTER_TYPE_MAX; i++) { 578 TAILQ_INIT(&sh->sws_cmng.counters[i]); 579 rte_spinlock_init(&sh->sws_cmng.csl[i]); 580 } 581 } else { 582 struct mlx5_hca_attr *attr = &sh->cdev->config.hca_attr; 583 uint32_t fw_max_nb_cnts = attr->max_flow_counter; 584 uint8_t log_dcs = log2above(fw_max_nb_cnts) - 1; 585 uint32_t max_nb_cnts = 0; 586 587 for (i = 0, j = 0; j < MLX5_HWS_CNT_DCS_NUM; ++i) { 588 int log_dcs_i = log_dcs - i; 589 590 if (log_dcs_i < 0) 591 break; 592 if ((max_nb_cnts | RTE_BIT32(log_dcs_i)) > 593 fw_max_nb_cnts) 594 continue; 595 max_nb_cnts |= RTE_BIT32(log_dcs_i); 596 j++; 597 } 598 sh->hws_max_log_bulk_sz = log_dcs; 599 sh->hws_max_nb_counters = max_nb_cnts; 600 } 601 } 602 603 /** 604 * Destroy all the resources allocated for a counter memory management. 605 * 606 * @param[in] mng 607 * Pointer to the memory management structure. 608 */ 609 static void 610 mlx5_flow_destroy_counter_stat_mem_mng(struct mlx5_counter_stats_mem_mng *mng) 611 { 612 uint8_t *mem = (uint8_t *)(uintptr_t)mng->raws[0].data; 613 614 LIST_REMOVE(mng, next); 615 mlx5_os_wrapped_mkey_destroy(&mng->wm); 616 mlx5_free(mem); 617 } 618 619 /** 620 * Close and release all the resources of the counters management. 621 * 622 * @param[in] sh 623 * Pointer to mlx5_dev_ctx_shared object to free. 624 */ 625 static void 626 mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh) 627 { 628 struct mlx5_counter_stats_mem_mng *mng; 629 int i, j; 630 int retries = 1024; 631 632 rte_errno = 0; 633 while (--retries) { 634 rte_eal_alarm_cancel(mlx5_flow_query_alarm, sh); 635 if (rte_errno != EINPROGRESS) 636 break; 637 rte_pause(); 638 } 639 640 if (sh->sws_cmng.pools) { 641 struct mlx5_flow_counter_pool *pool; 642 uint16_t n_valid = sh->sws_cmng.n_valid; 643 bool fallback = sh->sws_cmng.counter_fallback; 644 645 for (i = 0; i < n_valid; ++i) { 646 pool = sh->sws_cmng.pools[i]; 647 if (!fallback && pool->min_dcs) 648 claim_zero(mlx5_devx_cmd_destroy 649 (pool->min_dcs)); 650 for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) { 651 struct mlx5_flow_counter *cnt = 652 MLX5_POOL_GET_CNT(pool, j); 653 654 if (cnt->action) 655 claim_zero 656 (mlx5_flow_os_destroy_flow_action 657 (cnt->action)); 658 if (fallback && MLX5_POOL_GET_CNT 659 (pool, j)->dcs_when_free) 660 claim_zero(mlx5_devx_cmd_destroy 661 (cnt->dcs_when_free)); 662 } 663 mlx5_free(pool); 664 } 665 mlx5_free(sh->sws_cmng.pools); 666 } 667 mng = LIST_FIRST(&sh->sws_cmng.mem_mngs); 668 while (mng) { 669 mlx5_flow_destroy_counter_stat_mem_mng(mng); 670 mng = LIST_FIRST(&sh->sws_cmng.mem_mngs); 671 } 672 memset(&sh->sws_cmng, 0, sizeof(sh->sws_cmng)); 673 } 674 675 /** 676 * Initialize the aso flow meters management structure. 677 * 678 * @param[in] sh 679 * Pointer to mlx5_dev_ctx_shared object to free 680 */ 681 int 682 mlx5_aso_flow_mtrs_mng_init(struct mlx5_dev_ctx_shared *sh) 683 { 684 if (!sh->mtrmng) { 685 sh->mtrmng = mlx5_malloc(MLX5_MEM_ZERO, 686 sizeof(*sh->mtrmng), 687 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 688 if (!sh->mtrmng) { 689 DRV_LOG(ERR, 690 "meter management allocation was failed."); 691 rte_errno = ENOMEM; 692 return -ENOMEM; 693 } 694 if (sh->meter_aso_en) { 695 rte_spinlock_init(&sh->mtrmng->pools_mng.mtrsl); 696 rte_rwlock_init(&sh->mtrmng->pools_mng.resize_mtrwl); 697 LIST_INIT(&sh->mtrmng->pools_mng.meters); 698 } 699 sh->mtrmng->def_policy_id = MLX5_INVALID_POLICY_ID; 700 } 701 return 0; 702 } 703 704 /** 705 * Close and release all the resources of 706 * the ASO flow meter management structure. 707 * 708 * @param[in] sh 709 * Pointer to mlx5_dev_ctx_shared object to free. 710 */ 711 static void 712 mlx5_aso_flow_mtrs_mng_close(struct mlx5_dev_ctx_shared *sh) 713 { 714 struct mlx5_aso_mtr_pool *mtr_pool; 715 struct mlx5_flow_mtr_mng *mtrmng = sh->mtrmng; 716 uint32_t idx; 717 #ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO 718 struct mlx5_aso_mtr *aso_mtr; 719 int i; 720 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */ 721 722 if (sh->meter_aso_en) { 723 mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_POLICER); 724 idx = mtrmng->pools_mng.n_valid; 725 while (idx--) { 726 mtr_pool = mtrmng->pools_mng.pools[idx]; 727 #ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO 728 for (i = 0; i < MLX5_ASO_MTRS_PER_POOL; i++) { 729 aso_mtr = &mtr_pool->mtrs[i]; 730 if (aso_mtr->fm.meter_action_g) 731 claim_zero 732 (mlx5_glue->destroy_flow_action 733 (aso_mtr->fm.meter_action_g)); 734 if (aso_mtr->fm.meter_action_y) 735 claim_zero 736 (mlx5_glue->destroy_flow_action 737 (aso_mtr->fm.meter_action_y)); 738 } 739 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */ 740 claim_zero(mlx5_devx_cmd_destroy 741 (mtr_pool->devx_obj)); 742 mtrmng->pools_mng.n_valid--; 743 mlx5_free(mtr_pool); 744 } 745 mlx5_free(sh->mtrmng->pools_mng.pools); 746 } 747 mlx5_free(sh->mtrmng); 748 sh->mtrmng = NULL; 749 } 750 751 /* Send FLOW_AGED event if needed. */ 752 void 753 mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh) 754 { 755 struct mlx5_age_info *age_info; 756 uint32_t i; 757 758 for (i = 0; i < sh->max_port; i++) { 759 age_info = &sh->port[i].age_info; 760 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) 761 continue; 762 MLX5_AGE_UNSET(age_info, MLX5_AGE_EVENT_NEW); 763 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) { 764 MLX5_AGE_UNSET(age_info, MLX5_AGE_TRIGGER); 765 rte_eth_dev_callback_process 766 (&rte_eth_devices[sh->port[i].devx_ih_port_id], 767 RTE_ETH_EVENT_FLOW_AGED, NULL); 768 } 769 } 770 } 771 772 /* 773 * Initialize the ASO connection tracking structure. 774 * 775 * @param[in] sh 776 * Pointer to mlx5_dev_ctx_shared object. 777 * 778 * @return 779 * 0 on success, a negative errno value otherwise and rte_errno is set. 780 */ 781 int 782 mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh) 783 { 784 int err; 785 786 if (sh->ct_mng) 787 return 0; 788 sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng) + 789 sizeof(struct mlx5_aso_sq) * MLX5_ASO_CT_SQ_NUM, 790 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 791 if (!sh->ct_mng) { 792 DRV_LOG(ERR, "ASO CT management allocation failed."); 793 rte_errno = ENOMEM; 794 return -rte_errno; 795 } 796 err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_CONNECTION_TRACKING, MLX5_ASO_CT_SQ_NUM); 797 if (err) { 798 mlx5_free(sh->ct_mng); 799 /* rte_errno should be extracted from the failure. */ 800 rte_errno = EINVAL; 801 return -rte_errno; 802 } 803 rte_spinlock_init(&sh->ct_mng->ct_sl); 804 rte_rwlock_init(&sh->ct_mng->resize_rwl); 805 LIST_INIT(&sh->ct_mng->free_cts); 806 return 0; 807 } 808 809 /* 810 * Close and release all the resources of the 811 * ASO connection tracking management structure. 812 * 813 * @param[in] sh 814 * Pointer to mlx5_dev_ctx_shared object to free. 815 */ 816 static void 817 mlx5_flow_aso_ct_mng_close(struct mlx5_dev_ctx_shared *sh) 818 { 819 struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng; 820 struct mlx5_aso_ct_pool *ct_pool; 821 struct mlx5_aso_ct_action *ct; 822 uint32_t idx; 823 uint32_t val; 824 uint32_t cnt; 825 int i; 826 827 mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_CONNECTION_TRACKING); 828 idx = mng->next; 829 while (idx--) { 830 cnt = 0; 831 ct_pool = mng->pools[idx]; 832 for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) { 833 ct = &ct_pool->actions[i]; 834 val = __atomic_fetch_sub(&ct->refcnt, 1, 835 __ATOMIC_RELAXED); 836 MLX5_ASSERT(val == 1); 837 if (val > 1) 838 cnt++; 839 #ifdef HAVE_MLX5_DR_ACTION_ASO_CT 840 if (ct->dr_action_orig) 841 claim_zero(mlx5_glue->destroy_flow_action 842 (ct->dr_action_orig)); 843 if (ct->dr_action_rply) 844 claim_zero(mlx5_glue->destroy_flow_action 845 (ct->dr_action_rply)); 846 #endif 847 } 848 claim_zero(mlx5_devx_cmd_destroy(ct_pool->devx_obj)); 849 if (cnt) { 850 DRV_LOG(DEBUG, "%u ASO CT objects are being used in the pool %u", 851 cnt, i); 852 } 853 mlx5_free(ct_pool); 854 /* in case of failure. */ 855 mng->next--; 856 } 857 mlx5_free(mng->pools); 858 mlx5_free(mng); 859 /* Management structure must be cleared to 0s during allocation. */ 860 sh->ct_mng = NULL; 861 } 862 863 /** 864 * Initialize the flow resources' indexed mempool. 865 * 866 * @param[in] sh 867 * Pointer to mlx5_dev_ctx_shared object. 868 */ 869 static void 870 mlx5_flow_ipool_create(struct mlx5_dev_ctx_shared *sh) 871 { 872 uint8_t i; 873 struct mlx5_indexed_pool_config cfg; 874 875 for (i = 0; i < MLX5_IPOOL_MAX; ++i) { 876 cfg = mlx5_ipool_cfg[i]; 877 switch (i) { 878 default: 879 break; 880 /* 881 * Set MLX5_IPOOL_MLX5_FLOW ipool size 882 * according to PCI function flow configuration. 883 */ 884 case MLX5_IPOOL_MLX5_FLOW: 885 cfg.size = sh->config.dv_flow_en ? 886 sizeof(struct mlx5_flow_handle) : 887 MLX5_FLOW_HANDLE_VERBS_SIZE; 888 break; 889 } 890 if (sh->config.reclaim_mode) { 891 cfg.release_mem_en = 1; 892 cfg.per_core_cache = 0; 893 } else { 894 cfg.release_mem_en = 0; 895 } 896 sh->ipool[i] = mlx5_ipool_create(&cfg); 897 } 898 } 899 900 901 /** 902 * Release the flow resources' indexed mempool. 903 * 904 * @param[in] sh 905 * Pointer to mlx5_dev_ctx_shared object. 906 */ 907 static void 908 mlx5_flow_ipool_destroy(struct mlx5_dev_ctx_shared *sh) 909 { 910 uint8_t i; 911 912 for (i = 0; i < MLX5_IPOOL_MAX; ++i) 913 mlx5_ipool_destroy(sh->ipool[i]); 914 for (i = 0; i < MLX5_MAX_MODIFY_NUM; ++i) 915 if (sh->mdh_ipools[i]) 916 mlx5_ipool_destroy(sh->mdh_ipools[i]); 917 } 918 919 /* 920 * Check if dynamic flex parser for eCPRI already exists. 921 * 922 * @param dev 923 * Pointer to Ethernet device structure. 924 * 925 * @return 926 * true on exists, false on not. 927 */ 928 bool 929 mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev) 930 { 931 struct mlx5_priv *priv = dev->data->dev_private; 932 struct mlx5_ecpri_parser_profile *prf = &priv->sh->ecpri_parser; 933 934 return !!prf->obj; 935 } 936 937 /* 938 * Allocation of a flex parser for eCPRI. Once created, this parser related 939 * resources will be held until the device is closed. 940 * 941 * @param dev 942 * Pointer to Ethernet device structure. 943 * 944 * @return 945 * 0 on success, a negative errno value otherwise and rte_errno is set. 946 */ 947 int 948 mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev) 949 { 950 struct mlx5_priv *priv = dev->data->dev_private; 951 struct mlx5_ecpri_parser_profile *prf = &priv->sh->ecpri_parser; 952 struct mlx5_devx_graph_node_attr node = { 953 .modify_field_select = 0, 954 }; 955 uint32_t ids[8]; 956 int ret; 957 958 if (!priv->sh->cdev->config.hca_attr.parse_graph_flex_node) { 959 DRV_LOG(ERR, "Dynamic flex parser is not supported " 960 "for device %s.", priv->dev_data->name); 961 return -ENOTSUP; 962 } 963 node.header_length_mode = MLX5_GRAPH_NODE_LEN_FIXED; 964 /* 8 bytes now: 4B common header + 4B message body header. */ 965 node.header_length_base_value = 0x8; 966 /* After MAC layer: Ether / VLAN. */ 967 node.in[0].arc_parse_graph_node = MLX5_GRAPH_ARC_NODE_MAC; 968 /* Type of compared condition should be 0xAEFE in the L2 layer. */ 969 node.in[0].compare_condition_value = RTE_ETHER_TYPE_ECPRI; 970 /* Sample #0: type in common header. */ 971 node.sample[0].flow_match_sample_en = 1; 972 /* Fixed offset. */ 973 node.sample[0].flow_match_sample_offset_mode = 0x0; 974 /* Only the 2nd byte will be used. */ 975 node.sample[0].flow_match_sample_field_base_offset = 0x0; 976 /* Sample #1: message payload. */ 977 node.sample[1].flow_match_sample_en = 1; 978 /* Fixed offset. */ 979 node.sample[1].flow_match_sample_offset_mode = 0x0; 980 /* 981 * Only the first two bytes will be used right now, and its offset will 982 * start after the common header that with the length of a DW(u32). 983 */ 984 node.sample[1].flow_match_sample_field_base_offset = sizeof(uint32_t); 985 prf->obj = mlx5_devx_cmd_create_flex_parser(priv->sh->cdev->ctx, &node); 986 if (!prf->obj) { 987 DRV_LOG(ERR, "Failed to create flex parser node object."); 988 return (rte_errno == 0) ? -ENODEV : -rte_errno; 989 } 990 prf->num = 2; 991 ret = mlx5_devx_cmd_query_parse_samples(prf->obj, ids, prf->num); 992 if (ret) { 993 DRV_LOG(ERR, "Failed to query sample IDs."); 994 return (rte_errno == 0) ? -ENODEV : -rte_errno; 995 } 996 prf->offset[0] = 0x0; 997 prf->offset[1] = sizeof(uint32_t); 998 prf->ids[0] = ids[0]; 999 prf->ids[1] = ids[1]; 1000 return 0; 1001 } 1002 1003 /* 1004 * Destroy the flex parser node, including the parser itself, input / output 1005 * arcs and DW samples. Resources could be reused then. 1006 * 1007 * @param dev 1008 * Pointer to Ethernet device structure. 1009 */ 1010 static void 1011 mlx5_flex_parser_ecpri_release(struct rte_eth_dev *dev) 1012 { 1013 struct mlx5_priv *priv = dev->data->dev_private; 1014 struct mlx5_ecpri_parser_profile *prf = &priv->sh->ecpri_parser; 1015 1016 if (prf->obj) 1017 mlx5_devx_cmd_destroy(prf->obj); 1018 prf->obj = NULL; 1019 } 1020 1021 uint32_t 1022 mlx5_get_supported_sw_parsing_offloads(const struct mlx5_hca_attr *attr) 1023 { 1024 uint32_t sw_parsing_offloads = 0; 1025 1026 if (attr->swp) { 1027 sw_parsing_offloads |= MLX5_SW_PARSING_CAP; 1028 if (attr->swp_csum) 1029 sw_parsing_offloads |= MLX5_SW_PARSING_CSUM_CAP; 1030 1031 if (attr->swp_lso) 1032 sw_parsing_offloads |= MLX5_SW_PARSING_TSO_CAP; 1033 } 1034 return sw_parsing_offloads; 1035 } 1036 1037 uint32_t 1038 mlx5_get_supported_tunneling_offloads(const struct mlx5_hca_attr *attr) 1039 { 1040 uint32_t tn_offloads = 0; 1041 1042 if (attr->tunnel_stateless_vxlan) 1043 tn_offloads |= MLX5_TUNNELED_OFFLOADS_VXLAN_CAP; 1044 if (attr->tunnel_stateless_gre) 1045 tn_offloads |= MLX5_TUNNELED_OFFLOADS_GRE_CAP; 1046 if (attr->tunnel_stateless_geneve_rx) 1047 tn_offloads |= MLX5_TUNNELED_OFFLOADS_GENEVE_CAP; 1048 return tn_offloads; 1049 } 1050 1051 /* Fill all fields of UAR structure. */ 1052 static int 1053 mlx5_rxtx_uars_prepare(struct mlx5_dev_ctx_shared *sh) 1054 { 1055 int ret; 1056 1057 ret = mlx5_devx_uar_prepare(sh->cdev, &sh->tx_uar); 1058 if (ret) { 1059 DRV_LOG(ERR, "Failed to prepare Tx DevX UAR."); 1060 return -rte_errno; 1061 } 1062 MLX5_ASSERT(sh->tx_uar.obj); 1063 MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->tx_uar.obj)); 1064 ret = mlx5_devx_uar_prepare(sh->cdev, &sh->rx_uar); 1065 if (ret) { 1066 DRV_LOG(ERR, "Failed to prepare Rx DevX UAR."); 1067 mlx5_devx_uar_release(&sh->tx_uar); 1068 return -rte_errno; 1069 } 1070 MLX5_ASSERT(sh->rx_uar.obj); 1071 MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->rx_uar.obj)); 1072 return 0; 1073 } 1074 1075 static void 1076 mlx5_rxtx_uars_release(struct mlx5_dev_ctx_shared *sh) 1077 { 1078 mlx5_devx_uar_release(&sh->rx_uar); 1079 mlx5_devx_uar_release(&sh->tx_uar); 1080 } 1081 1082 /** 1083 * rte_mempool_walk() callback to unregister Rx mempools. 1084 * It used when implicit mempool registration is disabled. 1085 * 1086 * @param mp 1087 * The mempool being walked. 1088 * @param arg 1089 * Pointer to the device shared context. 1090 */ 1091 static void 1092 mlx5_dev_ctx_shared_rx_mempool_unregister_cb(struct rte_mempool *mp, void *arg) 1093 { 1094 struct mlx5_dev_ctx_shared *sh = arg; 1095 1096 mlx5_dev_mempool_unregister(sh->cdev, mp); 1097 } 1098 1099 /** 1100 * Callback used when implicit mempool registration is disabled 1101 * in order to track Rx mempool destruction. 1102 * 1103 * @param event 1104 * Mempool life cycle event. 1105 * @param mp 1106 * An Rx mempool registered explicitly when the port is started. 1107 * @param arg 1108 * Pointer to a device shared context. 1109 */ 1110 static void 1111 mlx5_dev_ctx_shared_rx_mempool_event_cb(enum rte_mempool_event event, 1112 struct rte_mempool *mp, void *arg) 1113 { 1114 struct mlx5_dev_ctx_shared *sh = arg; 1115 1116 if (event == RTE_MEMPOOL_EVENT_DESTROY) 1117 mlx5_dev_mempool_unregister(sh->cdev, mp); 1118 } 1119 1120 int 1121 mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev) 1122 { 1123 struct mlx5_priv *priv = dev->data->dev_private; 1124 struct mlx5_dev_ctx_shared *sh = priv->sh; 1125 int ret; 1126 1127 /* Check if we only need to track Rx mempool destruction. */ 1128 if (!sh->cdev->config.mr_mempool_reg_en) { 1129 ret = rte_mempool_event_callback_register 1130 (mlx5_dev_ctx_shared_rx_mempool_event_cb, sh); 1131 return ret == 0 || rte_errno == EEXIST ? 0 : ret; 1132 } 1133 return mlx5_dev_mempool_subscribe(sh->cdev); 1134 } 1135 1136 /** 1137 * Set up multiple TISs with different affinities according to 1138 * number of bonding ports 1139 * 1140 * @param priv 1141 * Pointer of shared context. 1142 * 1143 * @return 1144 * Zero on success, -1 otherwise. 1145 */ 1146 static int 1147 mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh) 1148 { 1149 int i; 1150 struct mlx5_devx_lag_context lag_ctx = { 0 }; 1151 struct mlx5_devx_tis_attr tis_attr = { 0 }; 1152 1153 tis_attr.transport_domain = sh->td->id; 1154 if (sh->bond.n_port) { 1155 if (!mlx5_devx_cmd_query_lag(sh->cdev->ctx, &lag_ctx)) { 1156 sh->lag.tx_remap_affinity[0] = 1157 lag_ctx.tx_remap_affinity_1; 1158 sh->lag.tx_remap_affinity[1] = 1159 lag_ctx.tx_remap_affinity_2; 1160 sh->lag.affinity_mode = lag_ctx.port_select_mode; 1161 } else { 1162 DRV_LOG(ERR, "Failed to query lag affinity."); 1163 return -1; 1164 } 1165 if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) { 1166 for (i = 0; i < sh->bond.n_port; i++) { 1167 tis_attr.lag_tx_port_affinity = 1168 MLX5_IFC_LAG_MAP_TIS_AFFINITY(i, 1169 sh->bond.n_port); 1170 sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, 1171 &tis_attr); 1172 if (!sh->tis[i]) { 1173 DRV_LOG(ERR, "Failed to TIS %d/%d for bonding device" 1174 " %s.", i, sh->bond.n_port, 1175 sh->ibdev_name); 1176 return -1; 1177 } 1178 } 1179 DRV_LOG(DEBUG, "LAG number of ports : %d, affinity_1 & 2 : pf%d & %d.\n", 1180 sh->bond.n_port, lag_ctx.tx_remap_affinity_1, 1181 lag_ctx.tx_remap_affinity_2); 1182 return 0; 1183 } 1184 if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH) 1185 DRV_LOG(INFO, "Device %s enabled HW hash based LAG.", 1186 sh->ibdev_name); 1187 } 1188 tis_attr.lag_tx_port_affinity = 0; 1189 sh->tis[0] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr); 1190 if (!sh->tis[0]) { 1191 DRV_LOG(ERR, "Failed to TIS 0 for bonding device" 1192 " %s.", sh->ibdev_name); 1193 return -1; 1194 } 1195 return 0; 1196 } 1197 1198 /** 1199 * Verify and store value for share device argument. 1200 * 1201 * @param[in] key 1202 * Key argument to verify. 1203 * @param[in] val 1204 * Value associated with key. 1205 * @param opaque 1206 * User data. 1207 * 1208 * @return 1209 * 0 on success, a negative errno value otherwise and rte_errno is set. 1210 */ 1211 static int 1212 mlx5_dev_args_check_handler(const char *key, const char *val, void *opaque) 1213 { 1214 struct mlx5_sh_config *config = opaque; 1215 signed long tmp; 1216 1217 errno = 0; 1218 tmp = strtol(val, NULL, 0); 1219 if (errno) { 1220 rte_errno = errno; 1221 DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val); 1222 return -rte_errno; 1223 } 1224 if (tmp < 0 && strcmp(MLX5_TX_PP, key) && strcmp(MLX5_TX_SKEW, key)) { 1225 /* Negative values are acceptable for some keys only. */ 1226 rte_errno = EINVAL; 1227 DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val); 1228 return -rte_errno; 1229 } 1230 if (strcmp(MLX5_TX_PP, key) == 0) { 1231 unsigned long mod = tmp >= 0 ? tmp : -tmp; 1232 1233 if (!mod) { 1234 DRV_LOG(ERR, "Zero Tx packet pacing parameter."); 1235 rte_errno = EINVAL; 1236 return -rte_errno; 1237 } 1238 config->tx_pp = tmp; 1239 } else if (strcmp(MLX5_TX_SKEW, key) == 0) { 1240 config->tx_skew = tmp; 1241 } else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) { 1242 config->l3_vxlan_en = !!tmp; 1243 } else if (strcmp(MLX5_VF_NL_EN, key) == 0) { 1244 config->vf_nl_en = !!tmp; 1245 } else if (strcmp(MLX5_DV_ESW_EN, key) == 0) { 1246 config->dv_esw_en = !!tmp; 1247 } else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) { 1248 if (tmp > 2) { 1249 DRV_LOG(ERR, "Invalid %s parameter.", key); 1250 rte_errno = EINVAL; 1251 return -rte_errno; 1252 } 1253 config->dv_flow_en = tmp; 1254 } else if (strcmp(MLX5_DV_XMETA_EN, key) == 0) { 1255 if (tmp != MLX5_XMETA_MODE_LEGACY && 1256 tmp != MLX5_XMETA_MODE_META16 && 1257 tmp != MLX5_XMETA_MODE_META32 && 1258 tmp != MLX5_XMETA_MODE_MISS_INFO && 1259 tmp != MLX5_XMETA_MODE_META32_HWS) { 1260 DRV_LOG(ERR, "Invalid extensive metadata parameter."); 1261 rte_errno = EINVAL; 1262 return -rte_errno; 1263 } 1264 if (tmp != MLX5_XMETA_MODE_MISS_INFO) 1265 config->dv_xmeta_en = tmp; 1266 else 1267 config->dv_miss_info = 1; 1268 } else if (strcmp(MLX5_LACP_BY_USER, key) == 0) { 1269 config->lacp_by_user = !!tmp; 1270 } else if (strcmp(MLX5_RECLAIM_MEM, key) == 0) { 1271 if (tmp != MLX5_RCM_NONE && 1272 tmp != MLX5_RCM_LIGHT && 1273 tmp != MLX5_RCM_AGGR) { 1274 DRV_LOG(ERR, "Unrecognize %s: \"%s\"", key, val); 1275 rte_errno = EINVAL; 1276 return -rte_errno; 1277 } 1278 config->reclaim_mode = tmp; 1279 } else if (strcmp(MLX5_DECAP_EN, key) == 0) { 1280 config->decap_en = !!tmp; 1281 } else if (strcmp(MLX5_ALLOW_DUPLICATE_PATTERN, key) == 0) { 1282 config->allow_duplicate_pattern = !!tmp; 1283 } else if (strcmp(MLX5_FDB_DEFAULT_RULE_EN, key) == 0) { 1284 config->fdb_def_rule = !!tmp; 1285 } else if (strcmp(MLX5_HWS_CNT_SERVICE_CORE, key) == 0) { 1286 config->cnt_svc.service_core = tmp; 1287 } else if (strcmp(MLX5_HWS_CNT_CYCLE_TIME, key) == 0) { 1288 config->cnt_svc.cycle_time = tmp; 1289 } else if (strcmp(MLX5_REPR_MATCHING_EN, key) == 0) { 1290 config->repr_matching = !!tmp; 1291 } 1292 return 0; 1293 } 1294 1295 /** 1296 * Parse user device parameters and adjust them according to device 1297 * capabilities. 1298 * 1299 * @param sh 1300 * Pointer to shared device context. 1301 * @param mkvlist 1302 * Pointer to mlx5 kvargs control, can be NULL if there is no devargs. 1303 * @param config 1304 * Pointer to shared device configuration structure. 1305 * 1306 * @return 1307 * 0 on success, a negative errno value otherwise and rte_errno is set. 1308 */ 1309 static int 1310 mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh, 1311 struct mlx5_kvargs_ctrl *mkvlist, 1312 struct mlx5_sh_config *config) 1313 { 1314 const char **params = (const char *[]){ 1315 MLX5_TX_PP, 1316 MLX5_TX_SKEW, 1317 MLX5_L3_VXLAN_EN, 1318 MLX5_VF_NL_EN, 1319 MLX5_DV_ESW_EN, 1320 MLX5_DV_FLOW_EN, 1321 MLX5_DV_XMETA_EN, 1322 MLX5_LACP_BY_USER, 1323 MLX5_RECLAIM_MEM, 1324 MLX5_DECAP_EN, 1325 MLX5_ALLOW_DUPLICATE_PATTERN, 1326 MLX5_FDB_DEFAULT_RULE_EN, 1327 MLX5_HWS_CNT_SERVICE_CORE, 1328 MLX5_HWS_CNT_CYCLE_TIME, 1329 MLX5_REPR_MATCHING_EN, 1330 NULL, 1331 }; 1332 int ret = 0; 1333 1334 /* Default configuration. */ 1335 memset(config, 0, sizeof(*config)); 1336 config->vf_nl_en = 1; 1337 config->dv_esw_en = 1; 1338 config->dv_flow_en = 1; 1339 config->decap_en = 1; 1340 config->allow_duplicate_pattern = 1; 1341 config->fdb_def_rule = 1; 1342 config->cnt_svc.cycle_time = MLX5_CNT_SVC_CYCLE_TIME_DEFAULT; 1343 config->cnt_svc.service_core = rte_get_main_lcore(); 1344 config->repr_matching = 1; 1345 if (mkvlist != NULL) { 1346 /* Process parameters. */ 1347 ret = mlx5_kvargs_process(mkvlist, params, 1348 mlx5_dev_args_check_handler, config); 1349 if (ret) { 1350 DRV_LOG(ERR, "Failed to process device arguments: %s", 1351 strerror(rte_errno)); 1352 return -rte_errno; 1353 } 1354 } 1355 /* Adjust parameters according to device capabilities. */ 1356 if (config->dv_flow_en && !sh->dev_cap.dv_flow_en) { 1357 DRV_LOG(WARNING, "DV flow is not supported."); 1358 config->dv_flow_en = 0; 1359 } 1360 if (config->dv_esw_en && !sh->dev_cap.dv_esw_en) { 1361 DRV_LOG(DEBUG, "E-Switch DV flow is not supported."); 1362 config->dv_esw_en = 0; 1363 } 1364 if (config->dv_esw_en && !config->dv_flow_en) { 1365 DRV_LOG(DEBUG, 1366 "E-Switch DV flow is supported only when DV flow is enabled."); 1367 config->dv_esw_en = 0; 1368 } 1369 if (config->dv_miss_info && config->dv_esw_en) 1370 config->dv_xmeta_en = MLX5_XMETA_MODE_META16; 1371 if (!config->dv_esw_en && 1372 config->dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { 1373 DRV_LOG(WARNING, 1374 "Metadata mode %u is not supported (no E-Switch).", 1375 config->dv_xmeta_en); 1376 config->dv_xmeta_en = MLX5_XMETA_MODE_LEGACY; 1377 } 1378 if (config->dv_flow_en != 2 && !config->repr_matching) { 1379 DRV_LOG(DEBUG, "Disabling representor matching is valid only " 1380 "when HW Steering is enabled."); 1381 config->repr_matching = 1; 1382 } 1383 if (config->tx_pp && !sh->dev_cap.txpp_en) { 1384 DRV_LOG(ERR, "Packet pacing is not supported."); 1385 rte_errno = ENODEV; 1386 return -rte_errno; 1387 } 1388 if (!config->tx_pp && config->tx_skew) { 1389 DRV_LOG(WARNING, 1390 "\"tx_skew\" doesn't affect without \"tx_pp\"."); 1391 } 1392 /* Check for LRO support. */ 1393 if (mlx5_devx_obj_ops_en(sh) && sh->cdev->config.hca_attr.lro_cap) { 1394 /* TBD check tunnel lro caps. */ 1395 config->lro_allowed = 1; 1396 DRV_LOG(DEBUG, "LRO is allowed."); 1397 DRV_LOG(DEBUG, 1398 "LRO minimal size of TCP segment required for coalescing is %d bytes.", 1399 sh->cdev->config.hca_attr.lro_min_mss_size); 1400 } 1401 /* 1402 * If HW has bug working with tunnel packet decapsulation and scatter 1403 * FCS, and decapsulation is needed, clear the hw_fcs_strip bit. 1404 * Then RTE_ETH_RX_OFFLOAD_KEEP_CRC bit will not be set anymore. 1405 */ 1406 if (sh->dev_cap.scatter_fcs_w_decap_disable && sh->config.decap_en) 1407 config->hw_fcs_strip = 0; 1408 else 1409 config->hw_fcs_strip = sh->dev_cap.hw_fcs_strip; 1410 DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported", 1411 (config->hw_fcs_strip ? "" : "not ")); 1412 DRV_LOG(DEBUG, "\"tx_pp\" is %d.", config->tx_pp); 1413 DRV_LOG(DEBUG, "\"tx_skew\" is %d.", config->tx_skew); 1414 DRV_LOG(DEBUG, "\"reclaim_mode\" is %u.", config->reclaim_mode); 1415 DRV_LOG(DEBUG, "\"dv_esw_en\" is %u.", config->dv_esw_en); 1416 DRV_LOG(DEBUG, "\"dv_flow_en\" is %u.", config->dv_flow_en); 1417 DRV_LOG(DEBUG, "\"dv_xmeta_en\" is %u.", config->dv_xmeta_en); 1418 DRV_LOG(DEBUG, "\"dv_miss_info\" is %u.", config->dv_miss_info); 1419 DRV_LOG(DEBUG, "\"l3_vxlan_en\" is %u.", config->l3_vxlan_en); 1420 DRV_LOG(DEBUG, "\"vf_nl_en\" is %u.", config->vf_nl_en); 1421 DRV_LOG(DEBUG, "\"lacp_by_user\" is %u.", config->lacp_by_user); 1422 DRV_LOG(DEBUG, "\"decap_en\" is %u.", config->decap_en); 1423 DRV_LOG(DEBUG, "\"allow_duplicate_pattern\" is %u.", 1424 config->allow_duplicate_pattern); 1425 DRV_LOG(DEBUG, "\"fdb_def_rule_en\" is %u.", config->fdb_def_rule); 1426 DRV_LOG(DEBUG, "\"repr_matching_en\" is %u.", config->repr_matching); 1427 return 0; 1428 } 1429 1430 /** 1431 * Configure realtime timestamp format. 1432 * 1433 * @param sh 1434 * Pointer to mlx5_dev_ctx_shared object. 1435 * @param hca_attr 1436 * Pointer to DevX HCA capabilities structure. 1437 */ 1438 void 1439 mlx5_rt_timestamp_config(struct mlx5_dev_ctx_shared *sh, 1440 struct mlx5_hca_attr *hca_attr) 1441 { 1442 uint32_t dw_cnt = MLX5_ST_SZ_DW(register_mtutc); 1443 uint32_t reg[dw_cnt]; 1444 int ret = ENOTSUP; 1445 1446 if (hca_attr->access_register_user) 1447 ret = mlx5_devx_cmd_register_read(sh->cdev->ctx, 1448 MLX5_REGISTER_ID_MTUTC, 0, 1449 reg, dw_cnt); 1450 if (!ret) { 1451 uint32_t ts_mode; 1452 1453 /* MTUTC register is read successfully. */ 1454 ts_mode = MLX5_GET(register_mtutc, reg, time_stamp_mode); 1455 if (ts_mode == MLX5_MTUTC_TIMESTAMP_MODE_REAL_TIME) 1456 sh->dev_cap.rt_timestamp = 1; 1457 } else { 1458 /* Kernel does not support register reading. */ 1459 if (hca_attr->dev_freq_khz == (NS_PER_S / MS_PER_S)) 1460 sh->dev_cap.rt_timestamp = 1; 1461 } 1462 } 1463 1464 /** 1465 * Allocate shared device context. If there is multiport device the 1466 * master and representors will share this context, if there is single 1467 * port dedicated device, the context will be used by only given 1468 * port due to unification. 1469 * 1470 * Routine first searches the context for the specified device name, 1471 * if found the shared context assumed and reference counter is incremented. 1472 * If no context found the new one is created and initialized with specified 1473 * device context and parameters. 1474 * 1475 * @param[in] spawn 1476 * Pointer to the device attributes (name, port, etc). 1477 * @param mkvlist 1478 * Pointer to mlx5 kvargs control, can be NULL if there is no devargs. 1479 * 1480 * @return 1481 * Pointer to mlx5_dev_ctx_shared object on success, 1482 * otherwise NULL and rte_errno is set. 1483 */ 1484 struct mlx5_dev_ctx_shared * 1485 mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, 1486 struct mlx5_kvargs_ctrl *mkvlist) 1487 { 1488 struct mlx5_dev_ctx_shared *sh; 1489 int err = 0; 1490 uint32_t i; 1491 1492 MLX5_ASSERT(spawn); 1493 /* Secondary process should not create the shared context. */ 1494 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 1495 pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); 1496 /* Search for IB context by device name. */ 1497 LIST_FOREACH(sh, &mlx5_dev_ctx_list, next) { 1498 if (!strcmp(sh->ibdev_name, spawn->phys_dev_name)) { 1499 sh->refcnt++; 1500 goto exit; 1501 } 1502 } 1503 /* No device found, we have to create new shared context. */ 1504 MLX5_ASSERT(spawn->max_port); 1505 sh = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, 1506 sizeof(struct mlx5_dev_ctx_shared) + 1507 spawn->max_port * sizeof(struct mlx5_dev_shared_port), 1508 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 1509 if (!sh) { 1510 DRV_LOG(ERR, "Shared context allocation failure."); 1511 rte_errno = ENOMEM; 1512 goto exit; 1513 } 1514 pthread_mutex_init(&sh->txpp.mutex, NULL); 1515 sh->numa_node = spawn->cdev->dev->numa_node; 1516 sh->cdev = spawn->cdev; 1517 sh->esw_mode = !!(spawn->info.master || spawn->info.representor); 1518 if (spawn->bond_info) 1519 sh->bond = *spawn->bond_info; 1520 err = mlx5_os_capabilities_prepare(sh); 1521 if (err) { 1522 DRV_LOG(ERR, "Fail to configure device capabilities."); 1523 goto error; 1524 } 1525 err = mlx5_shared_dev_ctx_args_config(sh, mkvlist, &sh->config); 1526 if (err) { 1527 DRV_LOG(ERR, "Failed to process device configure: %s", 1528 strerror(rte_errno)); 1529 goto error; 1530 } 1531 sh->refcnt = 1; 1532 sh->max_port = spawn->max_port; 1533 strncpy(sh->ibdev_name, mlx5_os_get_ctx_device_name(sh->cdev->ctx), 1534 sizeof(sh->ibdev_name) - 1); 1535 strncpy(sh->ibdev_path, mlx5_os_get_ctx_device_path(sh->cdev->ctx), 1536 sizeof(sh->ibdev_path) - 1); 1537 /* 1538 * Setting port_id to max unallowed value means there is no interrupt 1539 * subhandler installed for the given port index i. 1540 */ 1541 for (i = 0; i < sh->max_port; i++) { 1542 sh->port[i].ih_port_id = RTE_MAX_ETHPORTS; 1543 sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS; 1544 sh->port[i].nl_ih_port_id = RTE_MAX_ETHPORTS; 1545 } 1546 if (sh->cdev->config.devx) { 1547 sh->td = mlx5_devx_cmd_create_td(sh->cdev->ctx); 1548 if (!sh->td) { 1549 DRV_LOG(ERR, "TD allocation failure"); 1550 rte_errno = ENOMEM; 1551 goto error; 1552 } 1553 if (mlx5_setup_tis(sh)) { 1554 DRV_LOG(ERR, "TIS allocation failure"); 1555 rte_errno = ENOMEM; 1556 goto error; 1557 } 1558 err = mlx5_rxtx_uars_prepare(sh); 1559 if (err) 1560 goto error; 1561 #ifndef RTE_ARCH_64 1562 } else { 1563 /* Initialize UAR access locks for 32bit implementations. */ 1564 rte_spinlock_init(&sh->uar_lock_cq); 1565 for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++) 1566 rte_spinlock_init(&sh->uar_lock[i]); 1567 #endif 1568 } 1569 mlx5_os_dev_shared_handler_install(sh); 1570 if (LIST_EMPTY(&mlx5_dev_ctx_list)) { 1571 err = mlx5_flow_os_init_workspace_once(); 1572 if (err) 1573 goto error; 1574 } 1575 mlx5_flow_aging_init(sh); 1576 mlx5_flow_counters_mng_init(sh); 1577 mlx5_flow_ipool_create(sh); 1578 /* Add context to the global device list. */ 1579 LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next); 1580 rte_spinlock_init(&sh->geneve_tlv_opt_sl); 1581 exit: 1582 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1583 return sh; 1584 error: 1585 err = rte_errno; 1586 pthread_mutex_destroy(&sh->txpp.mutex); 1587 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1588 MLX5_ASSERT(sh); 1589 mlx5_rxtx_uars_release(sh); 1590 i = 0; 1591 do { 1592 if (sh->tis[i]) 1593 claim_zero(mlx5_devx_cmd_destroy(sh->tis[i])); 1594 } while (++i < (uint32_t)sh->bond.n_port); 1595 if (sh->td) 1596 claim_zero(mlx5_devx_cmd_destroy(sh->td)); 1597 mlx5_free(sh); 1598 rte_errno = err; 1599 return NULL; 1600 } 1601 1602 /** 1603 * Create LWM event_channel and interrupt handle for shared device 1604 * context. All rxqs sharing the device context share the event_channel. 1605 * A callback is registered in interrupt thread to receive the LWM event. 1606 * 1607 * @param[in] priv 1608 * Pointer to mlx5_priv instance. 1609 * 1610 * @return 1611 * 0 on success, negative with rte_errno set. 1612 */ 1613 int 1614 mlx5_lwm_setup(struct mlx5_priv *priv) 1615 { 1616 int fd_lwm; 1617 1618 pthread_mutex_init(&priv->sh->lwm_config_lock, NULL); 1619 priv->sh->devx_channel_lwm = mlx5_os_devx_create_event_channel 1620 (priv->sh->cdev->ctx, 1621 MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA); 1622 if (!priv->sh->devx_channel_lwm) 1623 goto err; 1624 fd_lwm = mlx5_os_get_devx_channel_fd(priv->sh->devx_channel_lwm); 1625 priv->sh->intr_handle_lwm = mlx5_os_interrupt_handler_create 1626 (RTE_INTR_INSTANCE_F_SHARED, true, 1627 fd_lwm, mlx5_dev_interrupt_handler_lwm, priv); 1628 if (!priv->sh->intr_handle_lwm) 1629 goto err; 1630 return 0; 1631 err: 1632 if (priv->sh->devx_channel_lwm) { 1633 mlx5_os_devx_destroy_event_channel 1634 (priv->sh->devx_channel_lwm); 1635 priv->sh->devx_channel_lwm = NULL; 1636 } 1637 pthread_mutex_destroy(&priv->sh->lwm_config_lock); 1638 return -rte_errno; 1639 } 1640 1641 /** 1642 * Destroy LWM event_channel and interrupt handle for shared device 1643 * context before free this context. The interrupt handler is also 1644 * unregistered. 1645 * 1646 * @param[in] sh 1647 * Pointer to shared device context. 1648 */ 1649 void 1650 mlx5_lwm_unset(struct mlx5_dev_ctx_shared *sh) 1651 { 1652 if (sh->intr_handle_lwm) { 1653 mlx5_os_interrupt_handler_destroy(sh->intr_handle_lwm, 1654 mlx5_dev_interrupt_handler_lwm, (void *)-1); 1655 sh->intr_handle_lwm = NULL; 1656 } 1657 if (sh->devx_channel_lwm) { 1658 mlx5_os_devx_destroy_event_channel 1659 (sh->devx_channel_lwm); 1660 sh->devx_channel_lwm = NULL; 1661 } 1662 pthread_mutex_destroy(&sh->lwm_config_lock); 1663 } 1664 1665 /** 1666 * Free shared IB device context. Decrement counter and if zero free 1667 * all allocated resources and close handles. 1668 * 1669 * @param[in] sh 1670 * Pointer to mlx5_dev_ctx_shared object to free 1671 */ 1672 void 1673 mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh) 1674 { 1675 int ret; 1676 int i = 0; 1677 1678 pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); 1679 #ifdef RTE_LIBRTE_MLX5_DEBUG 1680 /* Check the object presence in the list. */ 1681 struct mlx5_dev_ctx_shared *lctx; 1682 1683 LIST_FOREACH(lctx, &mlx5_dev_ctx_list, next) 1684 if (lctx == sh) 1685 break; 1686 MLX5_ASSERT(lctx); 1687 if (lctx != sh) { 1688 DRV_LOG(ERR, "Freeing non-existing shared IB context"); 1689 goto exit; 1690 } 1691 #endif 1692 MLX5_ASSERT(sh); 1693 MLX5_ASSERT(sh->refcnt); 1694 /* Secondary process should not free the shared context. */ 1695 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 1696 if (--sh->refcnt) 1697 goto exit; 1698 /* Stop watching for mempool events and unregister all mempools. */ 1699 if (!sh->cdev->config.mr_mempool_reg_en) { 1700 ret = rte_mempool_event_callback_unregister 1701 (mlx5_dev_ctx_shared_rx_mempool_event_cb, sh); 1702 if (ret == 0) 1703 rte_mempool_walk 1704 (mlx5_dev_ctx_shared_rx_mempool_unregister_cb, sh); 1705 } 1706 /* Remove context from the global device list. */ 1707 LIST_REMOVE(sh, next); 1708 /* Release resources on the last device removal. */ 1709 if (LIST_EMPTY(&mlx5_dev_ctx_list)) { 1710 mlx5_os_net_cleanup(); 1711 mlx5_flow_os_release_workspace(); 1712 } 1713 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1714 if (sh->flex_parsers_dv) { 1715 mlx5_list_destroy(sh->flex_parsers_dv); 1716 sh->flex_parsers_dv = NULL; 1717 } 1718 /* 1719 * Ensure there is no async event handler installed. 1720 * Only primary process handles async device events. 1721 **/ 1722 mlx5_flow_counters_mng_close(sh); 1723 if (sh->ct_mng) 1724 mlx5_flow_aso_ct_mng_close(sh); 1725 if (sh->aso_age_mng) { 1726 mlx5_flow_aso_age_mng_close(sh); 1727 sh->aso_age_mng = NULL; 1728 } 1729 if (sh->mtrmng) 1730 mlx5_aso_flow_mtrs_mng_close(sh); 1731 mlx5_flow_ipool_destroy(sh); 1732 mlx5_os_dev_shared_handler_uninstall(sh); 1733 mlx5_rxtx_uars_release(sh); 1734 do { 1735 if (sh->tis[i]) 1736 claim_zero(mlx5_devx_cmd_destroy(sh->tis[i])); 1737 } while (++i < sh->bond.n_port); 1738 if (sh->td) 1739 claim_zero(mlx5_devx_cmd_destroy(sh->td)); 1740 MLX5_ASSERT(sh->geneve_tlv_option_resource == NULL); 1741 pthread_mutex_destroy(&sh->txpp.mutex); 1742 mlx5_lwm_unset(sh); 1743 mlx5_free(sh); 1744 return; 1745 exit: 1746 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 1747 } 1748 1749 /** 1750 * Destroy table hash list. 1751 * 1752 * @param[in] priv 1753 * Pointer to the private device data structure. 1754 */ 1755 void 1756 mlx5_free_table_hash_list(struct mlx5_priv *priv) 1757 { 1758 struct mlx5_dev_ctx_shared *sh = priv->sh; 1759 struct mlx5_hlist **tbls = (priv->sh->config.dv_flow_en == 2) ? 1760 &sh->groups : &sh->flow_tbls; 1761 if (*tbls == NULL) 1762 return; 1763 mlx5_hlist_destroy(*tbls); 1764 *tbls = NULL; 1765 } 1766 1767 #ifdef HAVE_MLX5_HWS_SUPPORT 1768 /** 1769 * Allocate HW steering group hash list. 1770 * 1771 * @param[in] priv 1772 * Pointer to the private device data structure. 1773 */ 1774 static int 1775 mlx5_alloc_hw_group_hash_list(struct mlx5_priv *priv) 1776 { 1777 int err = 0; 1778 struct mlx5_dev_ctx_shared *sh = priv->sh; 1779 char s[MLX5_NAME_SIZE]; 1780 1781 MLX5_ASSERT(sh); 1782 snprintf(s, sizeof(s), "%s_flow_groups", priv->sh->ibdev_name); 1783 sh->groups = mlx5_hlist_create 1784 (s, MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE, 1785 false, true, sh, 1786 flow_hw_grp_create_cb, 1787 flow_hw_grp_match_cb, 1788 flow_hw_grp_remove_cb, 1789 flow_hw_grp_clone_cb, 1790 flow_hw_grp_clone_free_cb); 1791 if (!sh->groups) { 1792 DRV_LOG(ERR, "flow groups with hash creation failed."); 1793 err = ENOMEM; 1794 } 1795 return err; 1796 } 1797 #endif 1798 1799 1800 /** 1801 * Initialize flow table hash list and create the root tables entry 1802 * for each domain. 1803 * 1804 * @param[in] priv 1805 * Pointer to the private device data structure. 1806 * 1807 * @return 1808 * Zero on success, positive error code otherwise. 1809 */ 1810 int 1811 mlx5_alloc_table_hash_list(struct mlx5_priv *priv __rte_unused) 1812 { 1813 int err = 0; 1814 1815 /* Tables are only used in DV and DR modes. */ 1816 #ifdef HAVE_MLX5_HWS_SUPPORT 1817 struct mlx5_dev_ctx_shared *sh = priv->sh; 1818 char s[MLX5_NAME_SIZE]; 1819 1820 if (priv->sh->config.dv_flow_en == 2) 1821 return mlx5_alloc_hw_group_hash_list(priv); 1822 MLX5_ASSERT(sh); 1823 snprintf(s, sizeof(s), "%s_flow_table", priv->sh->ibdev_name); 1824 sh->flow_tbls = mlx5_hlist_create(s, MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE, 1825 false, true, sh, 1826 flow_dv_tbl_create_cb, 1827 flow_dv_tbl_match_cb, 1828 flow_dv_tbl_remove_cb, 1829 flow_dv_tbl_clone_cb, 1830 flow_dv_tbl_clone_free_cb); 1831 if (!sh->flow_tbls) { 1832 DRV_LOG(ERR, "flow tables with hash creation failed."); 1833 err = ENOMEM; 1834 return err; 1835 } 1836 #ifndef HAVE_MLX5DV_DR 1837 struct rte_flow_error error; 1838 struct rte_eth_dev *dev = &rte_eth_devices[priv->dev_data->port_id]; 1839 1840 /* 1841 * In case we have not DR support, the zero tables should be created 1842 * because DV expect to see them even if they cannot be created by 1843 * RDMA-CORE. 1844 */ 1845 if (!flow_dv_tbl_resource_get(dev, 0, 0, 0, 0, 1846 NULL, 0, 1, 0, &error) || 1847 !flow_dv_tbl_resource_get(dev, 0, 1, 0, 0, 1848 NULL, 0, 1, 0, &error) || 1849 !flow_dv_tbl_resource_get(dev, 0, 0, 1, 0, 1850 NULL, 0, 1, 0, &error)) { 1851 err = ENOMEM; 1852 goto error; 1853 } 1854 return err; 1855 error: 1856 mlx5_free_table_hash_list(priv); 1857 #endif /* HAVE_MLX5DV_DR */ 1858 #endif 1859 return err; 1860 } 1861 1862 /** 1863 * Retrieve integer value from environment variable. 1864 * 1865 * @param[in] name 1866 * Environment variable name. 1867 * 1868 * @return 1869 * Integer value, 0 if the variable is not set. 1870 */ 1871 int 1872 mlx5_getenv_int(const char *name) 1873 { 1874 const char *val = getenv(name); 1875 1876 if (val == NULL) 1877 return 0; 1878 return atoi(val); 1879 } 1880 1881 /** 1882 * DPDK callback to add udp tunnel port 1883 * 1884 * @param[in] dev 1885 * A pointer to eth_dev 1886 * @param[in] udp_tunnel 1887 * A pointer to udp tunnel 1888 * 1889 * @return 1890 * 0 on valid udp ports and tunnels, -ENOTSUP otherwise. 1891 */ 1892 int 1893 mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev __rte_unused, 1894 struct rte_eth_udp_tunnel *udp_tunnel) 1895 { 1896 MLX5_ASSERT(udp_tunnel != NULL); 1897 if (udp_tunnel->prot_type == RTE_ETH_TUNNEL_TYPE_VXLAN && 1898 udp_tunnel->udp_port == 4789) 1899 return 0; 1900 if (udp_tunnel->prot_type == RTE_ETH_TUNNEL_TYPE_VXLAN_GPE && 1901 udp_tunnel->udp_port == 4790) 1902 return 0; 1903 return -ENOTSUP; 1904 } 1905 1906 /** 1907 * Initialize process private data structure. 1908 * 1909 * @param dev 1910 * Pointer to Ethernet device structure. 1911 * 1912 * @return 1913 * 0 on success, a negative errno value otherwise and rte_errno is set. 1914 */ 1915 int 1916 mlx5_proc_priv_init(struct rte_eth_dev *dev) 1917 { 1918 struct mlx5_priv *priv = dev->data->dev_private; 1919 struct mlx5_proc_priv *ppriv; 1920 size_t ppriv_size; 1921 1922 mlx5_proc_priv_uninit(dev); 1923 /* 1924 * UAR register table follows the process private structure. BlueFlame 1925 * registers for Tx queues are stored in the table. 1926 */ 1927 ppriv_size = sizeof(struct mlx5_proc_priv) + 1928 priv->txqs_n * sizeof(struct mlx5_uar_data); 1929 ppriv = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, ppriv_size, 1930 RTE_CACHE_LINE_SIZE, dev->device->numa_node); 1931 if (!ppriv) { 1932 rte_errno = ENOMEM; 1933 return -rte_errno; 1934 } 1935 ppriv->uar_table_sz = priv->txqs_n; 1936 dev->process_private = ppriv; 1937 if (rte_eal_process_type() == RTE_PROC_PRIMARY) 1938 priv->sh->pppriv = ppriv; 1939 return 0; 1940 } 1941 1942 /** 1943 * Un-initialize process private data structure. 1944 * 1945 * @param dev 1946 * Pointer to Ethernet device structure. 1947 */ 1948 void 1949 mlx5_proc_priv_uninit(struct rte_eth_dev *dev) 1950 { 1951 if (!dev->process_private) 1952 return; 1953 mlx5_free(dev->process_private); 1954 dev->process_private = NULL; 1955 } 1956 1957 /** 1958 * DPDK callback to close the device. 1959 * 1960 * Destroy all queues and objects, free memory. 1961 * 1962 * @param dev 1963 * Pointer to Ethernet device structure. 1964 */ 1965 int 1966 mlx5_dev_close(struct rte_eth_dev *dev) 1967 { 1968 struct mlx5_priv *priv = dev->data->dev_private; 1969 unsigned int i; 1970 int ret; 1971 1972 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1973 /* Check if process_private released. */ 1974 if (!dev->process_private) 1975 return 0; 1976 mlx5_tx_uar_uninit_secondary(dev); 1977 mlx5_proc_priv_uninit(dev); 1978 rte_eth_dev_release_port(dev); 1979 return 0; 1980 } 1981 if (!priv->sh) 1982 return 0; 1983 DRV_LOG(DEBUG, "port %u closing device \"%s\"", 1984 dev->data->port_id, 1985 ((priv->sh->cdev->ctx != NULL) ? 1986 mlx5_os_get_ctx_device_name(priv->sh->cdev->ctx) : "")); 1987 /* 1988 * If default mreg copy action is removed at the stop stage, 1989 * the search will return none and nothing will be done anymore. 1990 */ 1991 mlx5_flow_stop_default(dev); 1992 mlx5_traffic_disable(dev); 1993 /* 1994 * If all the flows are already flushed in the device stop stage, 1995 * then this will return directly without any action. 1996 */ 1997 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true); 1998 mlx5_action_handle_flush(dev); 1999 mlx5_flow_meter_flush(dev, NULL); 2000 /* Prevent crashes when queues are still in use. */ 2001 dev->rx_pkt_burst = rte_eth_pkt_burst_dummy; 2002 dev->tx_pkt_burst = rte_eth_pkt_burst_dummy; 2003 rte_wmb(); 2004 /* Disable datapath on secondary process. */ 2005 mlx5_mp_os_req_stop_rxtx(dev); 2006 /* Free the eCPRI flex parser resource. */ 2007 mlx5_flex_parser_ecpri_release(dev); 2008 mlx5_flex_item_port_cleanup(dev); 2009 #ifdef HAVE_MLX5_HWS_SUPPORT 2010 flow_hw_destroy_vport_action(dev); 2011 flow_hw_resource_release(dev); 2012 flow_hw_clear_port_info(dev); 2013 if (priv->sh->config.dv_flow_en == 2) { 2014 flow_hw_clear_flow_metadata_config(); 2015 flow_hw_clear_tags_set(dev); 2016 } 2017 #endif 2018 if (priv->rxq_privs != NULL) { 2019 /* XXX race condition if mlx5_rx_burst() is still running. */ 2020 rte_delay_us_sleep(1000); 2021 for (i = 0; (i != priv->rxqs_n); ++i) 2022 mlx5_rxq_release(dev, i); 2023 priv->rxqs_n = 0; 2024 mlx5_free(priv->rxq_privs); 2025 priv->rxq_privs = NULL; 2026 } 2027 if (priv->txqs != NULL) { 2028 /* XXX race condition if mlx5_tx_burst() is still running. */ 2029 rte_delay_us_sleep(1000); 2030 for (i = 0; (i != priv->txqs_n); ++i) 2031 mlx5_txq_release(dev, i); 2032 priv->txqs_n = 0; 2033 priv->txqs = NULL; 2034 } 2035 mlx5_proc_priv_uninit(dev); 2036 if (priv->q_counters) { 2037 mlx5_devx_cmd_destroy(priv->q_counters); 2038 priv->q_counters = NULL; 2039 } 2040 if (priv->drop_queue.hrxq) 2041 mlx5_drop_action_destroy(dev); 2042 if (priv->mreg_cp_tbl) 2043 mlx5_hlist_destroy(priv->mreg_cp_tbl); 2044 mlx5_mprq_free_mp(dev); 2045 mlx5_os_free_shared_dr(priv); 2046 if (priv->rss_conf.rss_key != NULL) 2047 mlx5_free(priv->rss_conf.rss_key); 2048 if (priv->reta_idx != NULL) 2049 mlx5_free(priv->reta_idx); 2050 if (priv->sh->dev_cap.vf) 2051 mlx5_os_mac_addr_flush(dev); 2052 if (priv->nl_socket_route >= 0) 2053 close(priv->nl_socket_route); 2054 if (priv->nl_socket_rdma >= 0) 2055 close(priv->nl_socket_rdma); 2056 if (priv->vmwa_context) 2057 mlx5_vlan_vmwa_exit(priv->vmwa_context); 2058 ret = mlx5_hrxq_verify(dev); 2059 if (ret) 2060 DRV_LOG(WARNING, "port %u some hash Rx queue still remain", 2061 dev->data->port_id); 2062 ret = mlx5_ind_table_obj_verify(dev); 2063 if (ret) 2064 DRV_LOG(WARNING, "port %u some indirection table still remain", 2065 dev->data->port_id); 2066 ret = mlx5_rxq_obj_verify(dev); 2067 if (ret) 2068 DRV_LOG(WARNING, "port %u some Rx queue objects still remain", 2069 dev->data->port_id); 2070 ret = mlx5_ext_rxq_verify(dev); 2071 if (ret) 2072 DRV_LOG(WARNING, "Port %u some external RxQ still remain.", 2073 dev->data->port_id); 2074 ret = mlx5_rxq_verify(dev); 2075 if (ret) 2076 DRV_LOG(WARNING, "port %u some Rx queues still remain", 2077 dev->data->port_id); 2078 ret = mlx5_txq_obj_verify(dev); 2079 if (ret) 2080 DRV_LOG(WARNING, "port %u some Verbs Tx queue still remain", 2081 dev->data->port_id); 2082 ret = mlx5_txq_verify(dev); 2083 if (ret) 2084 DRV_LOG(WARNING, "port %u some Tx queues still remain", 2085 dev->data->port_id); 2086 ret = mlx5_flow_verify(dev); 2087 if (ret) 2088 DRV_LOG(WARNING, "port %u some flows still remain", 2089 dev->data->port_id); 2090 if (priv->hrxqs) 2091 mlx5_list_destroy(priv->hrxqs); 2092 mlx5_free(priv->ext_rxqs); 2093 /* 2094 * Free the shared context in last turn, because the cleanup 2095 * routines above may use some shared fields, like 2096 * mlx5_os_mac_addr_flush() uses ibdev_path for retrieving 2097 * ifindex if Netlink fails. 2098 */ 2099 mlx5_free_shared_dev_ctx(priv->sh); 2100 if (priv->domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) { 2101 unsigned int c = 0; 2102 uint16_t port_id; 2103 2104 MLX5_ETH_FOREACH_DEV(port_id, dev->device) { 2105 struct mlx5_priv *opriv = 2106 rte_eth_devices[port_id].data->dev_private; 2107 2108 if (!opriv || 2109 opriv->domain_id != priv->domain_id || 2110 &rte_eth_devices[port_id] == dev) 2111 continue; 2112 ++c; 2113 break; 2114 } 2115 if (!c) 2116 claim_zero(rte_eth_switch_domain_free(priv->domain_id)); 2117 } 2118 memset(priv, 0, sizeof(*priv)); 2119 priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; 2120 /* 2121 * Reset mac_addrs to NULL such that it is not freed as part of 2122 * rte_eth_dev_release_port(). mac_addrs is part of dev_private so 2123 * it is freed when dev_private is freed. 2124 */ 2125 dev->data->mac_addrs = NULL; 2126 return 0; 2127 } 2128 2129 const struct eth_dev_ops mlx5_dev_ops = { 2130 .dev_configure = mlx5_dev_configure, 2131 .dev_start = mlx5_dev_start, 2132 .dev_stop = mlx5_dev_stop, 2133 .dev_set_link_down = mlx5_set_link_down, 2134 .dev_set_link_up = mlx5_set_link_up, 2135 .dev_close = mlx5_dev_close, 2136 .promiscuous_enable = mlx5_promiscuous_enable, 2137 .promiscuous_disable = mlx5_promiscuous_disable, 2138 .allmulticast_enable = mlx5_allmulticast_enable, 2139 .allmulticast_disable = mlx5_allmulticast_disable, 2140 .link_update = mlx5_link_update, 2141 .stats_get = mlx5_stats_get, 2142 .stats_reset = mlx5_stats_reset, 2143 .xstats_get = mlx5_xstats_get, 2144 .xstats_reset = mlx5_xstats_reset, 2145 .xstats_get_names = mlx5_xstats_get_names, 2146 .fw_version_get = mlx5_fw_version_get, 2147 .dev_infos_get = mlx5_dev_infos_get, 2148 .representor_info_get = mlx5_representor_info_get, 2149 .read_clock = mlx5_txpp_read_clock, 2150 .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, 2151 .vlan_filter_set = mlx5_vlan_filter_set, 2152 .rx_queue_setup = mlx5_rx_queue_setup, 2153 .rx_queue_avail_thresh_set = mlx5_rx_queue_lwm_set, 2154 .rx_queue_avail_thresh_query = mlx5_rx_queue_lwm_query, 2155 .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, 2156 .tx_queue_setup = mlx5_tx_queue_setup, 2157 .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, 2158 .rx_queue_release = mlx5_rx_queue_release, 2159 .tx_queue_release = mlx5_tx_queue_release, 2160 .rx_queue_start = mlx5_rx_queue_start, 2161 .rx_queue_stop = mlx5_rx_queue_stop, 2162 .tx_queue_start = mlx5_tx_queue_start, 2163 .tx_queue_stop = mlx5_tx_queue_stop, 2164 .flow_ctrl_get = mlx5_dev_get_flow_ctrl, 2165 .flow_ctrl_set = mlx5_dev_set_flow_ctrl, 2166 .mac_addr_remove = mlx5_mac_addr_remove, 2167 .mac_addr_add = mlx5_mac_addr_add, 2168 .mac_addr_set = mlx5_mac_addr_set, 2169 .set_mc_addr_list = mlx5_set_mc_addr_list, 2170 .mtu_set = mlx5_dev_set_mtu, 2171 .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, 2172 .vlan_offload_set = mlx5_vlan_offload_set, 2173 .reta_update = mlx5_dev_rss_reta_update, 2174 .reta_query = mlx5_dev_rss_reta_query, 2175 .rss_hash_update = mlx5_rss_hash_update, 2176 .rss_hash_conf_get = mlx5_rss_hash_conf_get, 2177 .flow_ops_get = mlx5_flow_ops_get, 2178 .rxq_info_get = mlx5_rxq_info_get, 2179 .txq_info_get = mlx5_txq_info_get, 2180 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 2181 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 2182 .rx_queue_intr_enable = mlx5_rx_intr_enable, 2183 .rx_queue_intr_disable = mlx5_rx_intr_disable, 2184 .is_removed = mlx5_is_removed, 2185 .udp_tunnel_port_add = mlx5_udp_tunnel_port_add, 2186 .get_module_info = mlx5_get_module_info, 2187 .get_module_eeprom = mlx5_get_module_eeprom, 2188 .hairpin_cap_get = mlx5_hairpin_cap_get, 2189 .mtr_ops_get = mlx5_flow_meter_ops_get, 2190 .hairpin_bind = mlx5_hairpin_bind, 2191 .hairpin_unbind = mlx5_hairpin_unbind, 2192 .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports, 2193 .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update, 2194 .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, 2195 .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, 2196 .get_monitor_addr = mlx5_get_monitor_addr, 2197 }; 2198 2199 /* Available operations from secondary process. */ 2200 const struct eth_dev_ops mlx5_dev_sec_ops = { 2201 .stats_get = mlx5_stats_get, 2202 .stats_reset = mlx5_stats_reset, 2203 .xstats_get = mlx5_xstats_get, 2204 .xstats_reset = mlx5_xstats_reset, 2205 .xstats_get_names = mlx5_xstats_get_names, 2206 .fw_version_get = mlx5_fw_version_get, 2207 .dev_infos_get = mlx5_dev_infos_get, 2208 .representor_info_get = mlx5_representor_info_get, 2209 .read_clock = mlx5_txpp_read_clock, 2210 .rx_queue_start = mlx5_rx_queue_start, 2211 .rx_queue_stop = mlx5_rx_queue_stop, 2212 .tx_queue_start = mlx5_tx_queue_start, 2213 .tx_queue_stop = mlx5_tx_queue_stop, 2214 .rxq_info_get = mlx5_rxq_info_get, 2215 .txq_info_get = mlx5_txq_info_get, 2216 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 2217 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 2218 .get_module_info = mlx5_get_module_info, 2219 .get_module_eeprom = mlx5_get_module_eeprom, 2220 }; 2221 2222 /* Available operations in flow isolated mode. */ 2223 const struct eth_dev_ops mlx5_dev_ops_isolate = { 2224 .dev_configure = mlx5_dev_configure, 2225 .dev_start = mlx5_dev_start, 2226 .dev_stop = mlx5_dev_stop, 2227 .dev_set_link_down = mlx5_set_link_down, 2228 .dev_set_link_up = mlx5_set_link_up, 2229 .dev_close = mlx5_dev_close, 2230 .promiscuous_enable = mlx5_promiscuous_enable, 2231 .promiscuous_disable = mlx5_promiscuous_disable, 2232 .allmulticast_enable = mlx5_allmulticast_enable, 2233 .allmulticast_disable = mlx5_allmulticast_disable, 2234 .link_update = mlx5_link_update, 2235 .stats_get = mlx5_stats_get, 2236 .stats_reset = mlx5_stats_reset, 2237 .xstats_get = mlx5_xstats_get, 2238 .xstats_reset = mlx5_xstats_reset, 2239 .xstats_get_names = mlx5_xstats_get_names, 2240 .fw_version_get = mlx5_fw_version_get, 2241 .dev_infos_get = mlx5_dev_infos_get, 2242 .representor_info_get = mlx5_representor_info_get, 2243 .read_clock = mlx5_txpp_read_clock, 2244 .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, 2245 .vlan_filter_set = mlx5_vlan_filter_set, 2246 .rx_queue_setup = mlx5_rx_queue_setup, 2247 .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, 2248 .tx_queue_setup = mlx5_tx_queue_setup, 2249 .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, 2250 .rx_queue_release = mlx5_rx_queue_release, 2251 .tx_queue_release = mlx5_tx_queue_release, 2252 .rx_queue_start = mlx5_rx_queue_start, 2253 .rx_queue_stop = mlx5_rx_queue_stop, 2254 .tx_queue_start = mlx5_tx_queue_start, 2255 .tx_queue_stop = mlx5_tx_queue_stop, 2256 .flow_ctrl_get = mlx5_dev_get_flow_ctrl, 2257 .flow_ctrl_set = mlx5_dev_set_flow_ctrl, 2258 .mac_addr_remove = mlx5_mac_addr_remove, 2259 .mac_addr_add = mlx5_mac_addr_add, 2260 .mac_addr_set = mlx5_mac_addr_set, 2261 .set_mc_addr_list = mlx5_set_mc_addr_list, 2262 .mtu_set = mlx5_dev_set_mtu, 2263 .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, 2264 .vlan_offload_set = mlx5_vlan_offload_set, 2265 .flow_ops_get = mlx5_flow_ops_get, 2266 .rxq_info_get = mlx5_rxq_info_get, 2267 .txq_info_get = mlx5_txq_info_get, 2268 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 2269 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 2270 .rx_queue_intr_enable = mlx5_rx_intr_enable, 2271 .rx_queue_intr_disable = mlx5_rx_intr_disable, 2272 .is_removed = mlx5_is_removed, 2273 .get_module_info = mlx5_get_module_info, 2274 .get_module_eeprom = mlx5_get_module_eeprom, 2275 .hairpin_cap_get = mlx5_hairpin_cap_get, 2276 .mtr_ops_get = mlx5_flow_meter_ops_get, 2277 .hairpin_bind = mlx5_hairpin_bind, 2278 .hairpin_unbind = mlx5_hairpin_unbind, 2279 .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports, 2280 .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update, 2281 .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, 2282 .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, 2283 .get_monitor_addr = mlx5_get_monitor_addr, 2284 }; 2285 2286 /** 2287 * Verify and store value for device argument. 2288 * 2289 * @param[in] key 2290 * Key argument to verify. 2291 * @param[in] val 2292 * Value associated with key. 2293 * @param opaque 2294 * User data. 2295 * 2296 * @return 2297 * 0 on success, a negative errno value otherwise and rte_errno is set. 2298 */ 2299 static int 2300 mlx5_port_args_check_handler(const char *key, const char *val, void *opaque) 2301 { 2302 struct mlx5_port_config *config = opaque; 2303 signed long tmp; 2304 2305 /* No-op, port representors are processed in mlx5_dev_spawn(). */ 2306 if (!strcmp(MLX5_REPRESENTOR, key)) 2307 return 0; 2308 errno = 0; 2309 tmp = strtol(val, NULL, 0); 2310 if (errno) { 2311 rte_errno = errno; 2312 DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val); 2313 return -rte_errno; 2314 } 2315 if (tmp < 0) { 2316 /* Negative values are acceptable for some keys only. */ 2317 rte_errno = EINVAL; 2318 DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val); 2319 return -rte_errno; 2320 } 2321 if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { 2322 if (tmp > MLX5_CQE_RESP_FORMAT_L34H_STRIDX) { 2323 DRV_LOG(ERR, "invalid CQE compression " 2324 "format parameter"); 2325 rte_errno = EINVAL; 2326 return -rte_errno; 2327 } 2328 config->cqe_comp = !!tmp; 2329 config->cqe_comp_fmt = tmp; 2330 } else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) { 2331 config->hw_padding = !!tmp; 2332 } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) { 2333 config->mprq.enabled = !!tmp; 2334 } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) { 2335 config->mprq.log_stride_num = tmp; 2336 } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_SIZE, key) == 0) { 2337 config->mprq.log_stride_size = tmp; 2338 } else if (strcmp(MLX5_RX_MPRQ_MAX_MEMCPY_LEN, key) == 0) { 2339 config->mprq.max_memcpy_len = tmp; 2340 } else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) { 2341 config->mprq.min_rxqs_num = tmp; 2342 } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) { 2343 DRV_LOG(WARNING, "%s: deprecated parameter," 2344 " converted to txq_inline_max", key); 2345 config->txq_inline_max = tmp; 2346 } else if (strcmp(MLX5_TXQ_INLINE_MAX, key) == 0) { 2347 config->txq_inline_max = tmp; 2348 } else if (strcmp(MLX5_TXQ_INLINE_MIN, key) == 0) { 2349 config->txq_inline_min = tmp; 2350 } else if (strcmp(MLX5_TXQ_INLINE_MPW, key) == 0) { 2351 config->txq_inline_mpw = tmp; 2352 } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { 2353 config->txqs_inline = tmp; 2354 } else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) { 2355 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 2356 } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { 2357 config->mps = !!tmp; 2358 } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) { 2359 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 2360 } else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) { 2361 DRV_LOG(WARNING, "%s: deprecated parameter," 2362 " converted to txq_inline_mpw", key); 2363 config->txq_inline_mpw = tmp; 2364 } else if (strcmp(MLX5_TX_VEC_EN, key) == 0) { 2365 DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key); 2366 } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) { 2367 config->rx_vec_en = !!tmp; 2368 } else if (strcmp(MLX5_MAX_DUMP_FILES_NUM, key) == 0) { 2369 config->max_dump_files_num = tmp; 2370 } else if (strcmp(MLX5_LRO_TIMEOUT_USEC, key) == 0) { 2371 config->lro_timeout = tmp; 2372 } else if (strcmp(MLX5_HP_BUF_SIZE, key) == 0) { 2373 config->log_hp_size = tmp; 2374 } else if (strcmp(MLX5_DELAY_DROP, key) == 0) { 2375 config->std_delay_drop = !!(tmp & MLX5_DELAY_DROP_STANDARD); 2376 config->hp_delay_drop = !!(tmp & MLX5_DELAY_DROP_HAIRPIN); 2377 } 2378 return 0; 2379 } 2380 2381 /** 2382 * Parse user port parameters and adjust them according to device capabilities. 2383 * 2384 * @param priv 2385 * Pointer to shared device context. 2386 * @param mkvlist 2387 * Pointer to mlx5 kvargs control, can be NULL if there is no devargs. 2388 * @param config 2389 * Pointer to port configuration structure. 2390 * 2391 * @return 2392 * 0 on success, a negative errno value otherwise and rte_errno is set. 2393 */ 2394 int 2395 mlx5_port_args_config(struct mlx5_priv *priv, struct mlx5_kvargs_ctrl *mkvlist, 2396 struct mlx5_port_config *config) 2397 { 2398 struct mlx5_hca_attr *hca_attr = &priv->sh->cdev->config.hca_attr; 2399 struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap; 2400 bool devx = priv->sh->cdev->config.devx; 2401 const char **params = (const char *[]){ 2402 MLX5_RXQ_CQE_COMP_EN, 2403 MLX5_RXQ_PKT_PAD_EN, 2404 MLX5_RX_MPRQ_EN, 2405 MLX5_RX_MPRQ_LOG_STRIDE_NUM, 2406 MLX5_RX_MPRQ_LOG_STRIDE_SIZE, 2407 MLX5_RX_MPRQ_MAX_MEMCPY_LEN, 2408 MLX5_RXQS_MIN_MPRQ, 2409 MLX5_TXQ_INLINE, 2410 MLX5_TXQ_INLINE_MIN, 2411 MLX5_TXQ_INLINE_MAX, 2412 MLX5_TXQ_INLINE_MPW, 2413 MLX5_TXQS_MIN_INLINE, 2414 MLX5_TXQS_MAX_VEC, 2415 MLX5_TXQ_MPW_EN, 2416 MLX5_TXQ_MPW_HDR_DSEG_EN, 2417 MLX5_TXQ_MAX_INLINE_LEN, 2418 MLX5_TX_VEC_EN, 2419 MLX5_RX_VEC_EN, 2420 MLX5_REPRESENTOR, 2421 MLX5_MAX_DUMP_FILES_NUM, 2422 MLX5_LRO_TIMEOUT_USEC, 2423 MLX5_HP_BUF_SIZE, 2424 MLX5_DELAY_DROP, 2425 NULL, 2426 }; 2427 int ret = 0; 2428 2429 /* Default configuration. */ 2430 memset(config, 0, sizeof(*config)); 2431 config->mps = MLX5_ARG_UNSET; 2432 config->cqe_comp = 1; 2433 config->rx_vec_en = 1; 2434 config->txq_inline_max = MLX5_ARG_UNSET; 2435 config->txq_inline_min = MLX5_ARG_UNSET; 2436 config->txq_inline_mpw = MLX5_ARG_UNSET; 2437 config->txqs_inline = MLX5_ARG_UNSET; 2438 config->mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN; 2439 config->mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS; 2440 config->mprq.log_stride_num = MLX5_MPRQ_DEFAULT_LOG_STRIDE_NUM; 2441 config->log_hp_size = MLX5_ARG_UNSET; 2442 config->std_delay_drop = 0; 2443 config->hp_delay_drop = 0; 2444 if (mkvlist != NULL) { 2445 /* Process parameters. */ 2446 ret = mlx5_kvargs_process(mkvlist, params, 2447 mlx5_port_args_check_handler, config); 2448 if (ret) { 2449 DRV_LOG(ERR, "Failed to process port arguments: %s", 2450 strerror(rte_errno)); 2451 return -rte_errno; 2452 } 2453 } 2454 /* Adjust parameters according to device capabilities. */ 2455 if (config->hw_padding && !dev_cap->hw_padding) { 2456 DRV_LOG(DEBUG, "Rx end alignment padding isn't supported."); 2457 config->hw_padding = 0; 2458 } else if (config->hw_padding) { 2459 DRV_LOG(DEBUG, "Rx end alignment padding is enabled."); 2460 } 2461 /* 2462 * MPW is disabled by default, while the Enhanced MPW is enabled 2463 * by default. 2464 */ 2465 if (config->mps == MLX5_ARG_UNSET) 2466 config->mps = (dev_cap->mps == MLX5_MPW_ENHANCED) ? 2467 MLX5_MPW_ENHANCED : MLX5_MPW_DISABLED; 2468 else 2469 config->mps = config->mps ? dev_cap->mps : MLX5_MPW_DISABLED; 2470 DRV_LOG(INFO, "%sMPS is %s", 2471 config->mps == MLX5_MPW_ENHANCED ? "enhanced " : 2472 config->mps == MLX5_MPW ? "legacy " : "", 2473 config->mps != MLX5_MPW_DISABLED ? "enabled" : "disabled"); 2474 if (priv->sh->config.lro_allowed) { 2475 /* 2476 * If LRO timeout is not configured by application, 2477 * use the minimal supported value. 2478 */ 2479 if (!config->lro_timeout) 2480 config->lro_timeout = 2481 hca_attr->lro_timer_supported_periods[0]; 2482 DRV_LOG(DEBUG, "LRO session timeout set to %d usec.", 2483 config->lro_timeout); 2484 } 2485 if (config->cqe_comp && !dev_cap->cqe_comp) { 2486 DRV_LOG(WARNING, "Rx CQE 128B compression is not supported."); 2487 config->cqe_comp = 0; 2488 } 2489 if (config->cqe_comp_fmt == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX && 2490 (!devx || !hca_attr->mini_cqe_resp_flow_tag)) { 2491 DRV_LOG(WARNING, 2492 "Flow Tag CQE compression format isn't supported."); 2493 config->cqe_comp = 0; 2494 } 2495 if (config->cqe_comp_fmt == MLX5_CQE_RESP_FORMAT_L34H_STRIDX && 2496 (!devx || !hca_attr->mini_cqe_resp_l3_l4_tag)) { 2497 DRV_LOG(WARNING, 2498 "L3/L4 Header CQE compression format isn't supported."); 2499 config->cqe_comp = 0; 2500 } 2501 DRV_LOG(DEBUG, "Rx CQE compression is %ssupported.", 2502 config->cqe_comp ? "" : "not "); 2503 if ((config->std_delay_drop || config->hp_delay_drop) && 2504 !dev_cap->rq_delay_drop_en) { 2505 config->std_delay_drop = 0; 2506 config->hp_delay_drop = 0; 2507 DRV_LOG(WARNING, "dev_port-%u: Rxq delay drop isn't supported.", 2508 priv->dev_port); 2509 } 2510 if (config->mprq.enabled && !priv->sh->dev_cap.mprq.enabled) { 2511 DRV_LOG(WARNING, "Multi-Packet RQ isn't supported."); 2512 config->mprq.enabled = 0; 2513 } 2514 if (config->max_dump_files_num == 0) 2515 config->max_dump_files_num = 128; 2516 /* Detect minimal data bytes to inline. */ 2517 mlx5_set_min_inline(priv); 2518 DRV_LOG(DEBUG, "VLAN insertion in WQE is %ssupported.", 2519 config->hw_vlan_insert ? "" : "not "); 2520 DRV_LOG(DEBUG, "\"rxq_pkt_pad_en\" is %u.", config->hw_padding); 2521 DRV_LOG(DEBUG, "\"rxq_cqe_comp_en\" is %u.", config->cqe_comp); 2522 DRV_LOG(DEBUG, "\"cqe_comp_fmt\" is %u.", config->cqe_comp_fmt); 2523 DRV_LOG(DEBUG, "\"rx_vec_en\" is %u.", config->rx_vec_en); 2524 DRV_LOG(DEBUG, "Standard \"delay_drop\" is %u.", 2525 config->std_delay_drop); 2526 DRV_LOG(DEBUG, "Hairpin \"delay_drop\" is %u.", config->hp_delay_drop); 2527 DRV_LOG(DEBUG, "\"max_dump_files_num\" is %u.", 2528 config->max_dump_files_num); 2529 DRV_LOG(DEBUG, "\"log_hp_size\" is %u.", config->log_hp_size); 2530 DRV_LOG(DEBUG, "\"mprq_en\" is %u.", config->mprq.enabled); 2531 DRV_LOG(DEBUG, "\"mprq_log_stride_num\" is %u.", 2532 config->mprq.log_stride_num); 2533 DRV_LOG(DEBUG, "\"mprq_log_stride_size\" is %u.", 2534 config->mprq.log_stride_size); 2535 DRV_LOG(DEBUG, "\"mprq_max_memcpy_len\" is %u.", 2536 config->mprq.max_memcpy_len); 2537 DRV_LOG(DEBUG, "\"rxqs_min_mprq\" is %u.", config->mprq.min_rxqs_num); 2538 DRV_LOG(DEBUG, "\"lro_timeout_usec\" is %u.", config->lro_timeout); 2539 DRV_LOG(DEBUG, "\"txq_mpw_en\" is %d.", config->mps); 2540 DRV_LOG(DEBUG, "\"txqs_min_inline\" is %d.", config->txqs_inline); 2541 DRV_LOG(DEBUG, "\"txq_inline_min\" is %d.", config->txq_inline_min); 2542 DRV_LOG(DEBUG, "\"txq_inline_max\" is %d.", config->txq_inline_max); 2543 DRV_LOG(DEBUG, "\"txq_inline_mpw\" is %d.", config->txq_inline_mpw); 2544 return 0; 2545 } 2546 2547 /** 2548 * Print the key for device argument. 2549 * 2550 * It is "dummy" handler whose whole purpose is to enable using 2551 * mlx5_kvargs_process() function which set devargs as used. 2552 * 2553 * @param key 2554 * Key argument. 2555 * @param val 2556 * Value associated with key, unused. 2557 * @param opaque 2558 * Unused, can be NULL. 2559 * 2560 * @return 2561 * 0 on success, function cannot fail. 2562 */ 2563 static int 2564 mlx5_dummy_handler(const char *key, const char *val, void *opaque) 2565 { 2566 DRV_LOG(DEBUG, "\tKey: \"%s\" is set as used.", key); 2567 RTE_SET_USED(opaque); 2568 RTE_SET_USED(val); 2569 return 0; 2570 } 2571 2572 /** 2573 * Set requested devargs as used when device is already spawned. 2574 * 2575 * It is necessary since it is valid to ask probe again for existing device, 2576 * if its devargs don't assign as used, mlx5_kvargs_validate() will fail. 2577 * 2578 * @param name 2579 * Name of the existing device. 2580 * @param port_id 2581 * Port identifier of the device. 2582 * @param mkvlist 2583 * Pointer to mlx5 kvargs control to sign as used. 2584 */ 2585 void 2586 mlx5_port_args_set_used(const char *name, uint16_t port_id, 2587 struct mlx5_kvargs_ctrl *mkvlist) 2588 { 2589 const char **params = (const char *[]){ 2590 MLX5_RXQ_CQE_COMP_EN, 2591 MLX5_RXQ_PKT_PAD_EN, 2592 MLX5_RX_MPRQ_EN, 2593 MLX5_RX_MPRQ_LOG_STRIDE_NUM, 2594 MLX5_RX_MPRQ_LOG_STRIDE_SIZE, 2595 MLX5_RX_MPRQ_MAX_MEMCPY_LEN, 2596 MLX5_RXQS_MIN_MPRQ, 2597 MLX5_TXQ_INLINE, 2598 MLX5_TXQ_INLINE_MIN, 2599 MLX5_TXQ_INLINE_MAX, 2600 MLX5_TXQ_INLINE_MPW, 2601 MLX5_TXQS_MIN_INLINE, 2602 MLX5_TXQS_MAX_VEC, 2603 MLX5_TXQ_MPW_EN, 2604 MLX5_TXQ_MPW_HDR_DSEG_EN, 2605 MLX5_TXQ_MAX_INLINE_LEN, 2606 MLX5_TX_VEC_EN, 2607 MLX5_RX_VEC_EN, 2608 MLX5_REPRESENTOR, 2609 MLX5_MAX_DUMP_FILES_NUM, 2610 MLX5_LRO_TIMEOUT_USEC, 2611 MLX5_HP_BUF_SIZE, 2612 MLX5_DELAY_DROP, 2613 NULL, 2614 }; 2615 2616 /* Secondary process should not handle devargs. */ 2617 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2618 return; 2619 MLX5_ASSERT(mkvlist != NULL); 2620 DRV_LOG(DEBUG, "Ethernet device \"%s\" for port %u " 2621 "already exists, set devargs as used:", name, port_id); 2622 /* This function cannot fail with this handler. */ 2623 mlx5_kvargs_process(mkvlist, params, mlx5_dummy_handler, NULL); 2624 } 2625 2626 /** 2627 * Check sibling device configurations when probing again. 2628 * 2629 * Sibling devices sharing infiniband device context should have compatible 2630 * configurations. This regards representors and bonding device. 2631 * 2632 * @param cdev 2633 * Pointer to mlx5 device structure. 2634 * @param mkvlist 2635 * Pointer to mlx5 kvargs control, can be NULL if there is no devargs. 2636 * 2637 * @return 2638 * 0 on success, a negative errno value otherwise and rte_errno is set. 2639 */ 2640 int 2641 mlx5_probe_again_args_validate(struct mlx5_common_device *cdev, 2642 struct mlx5_kvargs_ctrl *mkvlist) 2643 { 2644 struct mlx5_dev_ctx_shared *sh = NULL; 2645 struct mlx5_sh_config *config; 2646 int ret; 2647 2648 /* Secondary process should not handle devargs. */ 2649 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 2650 return 0; 2651 pthread_mutex_lock(&mlx5_dev_ctx_list_mutex); 2652 /* Search for IB context by common device pointer. */ 2653 LIST_FOREACH(sh, &mlx5_dev_ctx_list, next) 2654 if (sh->cdev == cdev) 2655 break; 2656 pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); 2657 /* There is sh for this device -> it isn't probe again. */ 2658 if (sh == NULL) 2659 return 0; 2660 config = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, 2661 sizeof(struct mlx5_sh_config), 2662 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 2663 if (config == NULL) { 2664 rte_errno = -ENOMEM; 2665 return -rte_errno; 2666 } 2667 /* 2668 * Creates a temporary IB context configure structure according to new 2669 * devargs attached in probing again. 2670 */ 2671 ret = mlx5_shared_dev_ctx_args_config(sh, mkvlist, config); 2672 if (ret) { 2673 DRV_LOG(ERR, "Failed to process device configure: %s", 2674 strerror(rte_errno)); 2675 mlx5_free(config); 2676 return ret; 2677 } 2678 /* 2679 * Checks the match between the temporary structure and the existing 2680 * IB context structure. 2681 */ 2682 if (sh->config.dv_flow_en ^ config->dv_flow_en) { 2683 DRV_LOG(ERR, "\"dv_flow_en\" " 2684 "configuration mismatch for shared %s context.", 2685 sh->ibdev_name); 2686 goto error; 2687 } 2688 if ((sh->config.dv_xmeta_en ^ config->dv_xmeta_en) || 2689 (sh->config.dv_miss_info ^ config->dv_miss_info)) { 2690 DRV_LOG(ERR, "\"dv_xmeta_en\" " 2691 "configuration mismatch for shared %s context.", 2692 sh->ibdev_name); 2693 goto error; 2694 } 2695 if (sh->config.dv_esw_en ^ config->dv_esw_en) { 2696 DRV_LOG(ERR, "\"dv_esw_en\" " 2697 "configuration mismatch for shared %s context.", 2698 sh->ibdev_name); 2699 goto error; 2700 } 2701 if (sh->config.reclaim_mode ^ config->reclaim_mode) { 2702 DRV_LOG(ERR, "\"reclaim_mode\" " 2703 "configuration mismatch for shared %s context.", 2704 sh->ibdev_name); 2705 goto error; 2706 } 2707 if (sh->config.allow_duplicate_pattern ^ 2708 config->allow_duplicate_pattern) { 2709 DRV_LOG(ERR, "\"allow_duplicate_pattern\" " 2710 "configuration mismatch for shared %s context.", 2711 sh->ibdev_name); 2712 goto error; 2713 } 2714 if (sh->config.fdb_def_rule ^ config->fdb_def_rule) { 2715 DRV_LOG(ERR, "\"fdb_def_rule_en\" configuration mismatch for shared %s context.", 2716 sh->ibdev_name); 2717 goto error; 2718 } 2719 if (sh->config.l3_vxlan_en ^ config->l3_vxlan_en) { 2720 DRV_LOG(ERR, "\"l3_vxlan_en\" " 2721 "configuration mismatch for shared %s context.", 2722 sh->ibdev_name); 2723 goto error; 2724 } 2725 if (sh->config.decap_en ^ config->decap_en) { 2726 DRV_LOG(ERR, "\"decap_en\" " 2727 "configuration mismatch for shared %s context.", 2728 sh->ibdev_name); 2729 goto error; 2730 } 2731 if (sh->config.lacp_by_user ^ config->lacp_by_user) { 2732 DRV_LOG(ERR, "\"lacp_by_user\" " 2733 "configuration mismatch for shared %s context.", 2734 sh->ibdev_name); 2735 goto error; 2736 } 2737 if (sh->config.tx_pp ^ config->tx_pp) { 2738 DRV_LOG(ERR, "\"tx_pp\" " 2739 "configuration mismatch for shared %s context.", 2740 sh->ibdev_name); 2741 goto error; 2742 } 2743 if (sh->config.tx_skew ^ config->tx_skew) { 2744 DRV_LOG(ERR, "\"tx_skew\" " 2745 "configuration mismatch for shared %s context.", 2746 sh->ibdev_name); 2747 goto error; 2748 } 2749 mlx5_free(config); 2750 return 0; 2751 error: 2752 mlx5_free(config); 2753 rte_errno = EINVAL; 2754 return -rte_errno; 2755 } 2756 2757 /** 2758 * Configures the minimal amount of data to inline into WQE 2759 * while sending packets. 2760 * 2761 * - the txq_inline_min has the maximal priority, if this 2762 * key is specified in devargs 2763 * - if DevX is enabled the inline mode is queried from the 2764 * device (HCA attributes and NIC vport context if needed). 2765 * - otherwise L2 mode (18 bytes) is assumed for ConnectX-4/4 Lx 2766 * and none (0 bytes) for other NICs 2767 * 2768 * @param priv 2769 * Pointer to the private device data structure. 2770 */ 2771 void 2772 mlx5_set_min_inline(struct mlx5_priv *priv) 2773 { 2774 struct mlx5_hca_attr *hca_attr = &priv->sh->cdev->config.hca_attr; 2775 struct mlx5_port_config *config = &priv->config; 2776 2777 if (config->txq_inline_min != MLX5_ARG_UNSET) { 2778 /* Application defines size of inlined data explicitly. */ 2779 if (priv->pci_dev != NULL) { 2780 switch (priv->pci_dev->id.device_id) { 2781 case PCI_DEVICE_ID_MELLANOX_CONNECTX4: 2782 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 2783 if (config->txq_inline_min < 2784 (int)MLX5_INLINE_HSIZE_L2) { 2785 DRV_LOG(DEBUG, 2786 "txq_inline_mix aligned to minimal ConnectX-4 required value %d", 2787 (int)MLX5_INLINE_HSIZE_L2); 2788 config->txq_inline_min = 2789 MLX5_INLINE_HSIZE_L2; 2790 } 2791 break; 2792 } 2793 } 2794 goto exit; 2795 } 2796 if (hca_attr->eth_net_offloads) { 2797 /* We have DevX enabled, inline mode queried successfully. */ 2798 switch (hca_attr->wqe_inline_mode) { 2799 case MLX5_CAP_INLINE_MODE_L2: 2800 /* outer L2 header must be inlined. */ 2801 config->txq_inline_min = MLX5_INLINE_HSIZE_L2; 2802 goto exit; 2803 case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: 2804 /* No inline data are required by NIC. */ 2805 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2806 config->hw_vlan_insert = 2807 hca_attr->wqe_vlan_insert; 2808 DRV_LOG(DEBUG, "Tx VLAN insertion is supported"); 2809 goto exit; 2810 case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: 2811 /* inline mode is defined by NIC vport context. */ 2812 if (!hca_attr->eth_virt) 2813 break; 2814 switch (hca_attr->vport_inline_mode) { 2815 case MLX5_INLINE_MODE_NONE: 2816 config->txq_inline_min = 2817 MLX5_INLINE_HSIZE_NONE; 2818 goto exit; 2819 case MLX5_INLINE_MODE_L2: 2820 config->txq_inline_min = 2821 MLX5_INLINE_HSIZE_L2; 2822 goto exit; 2823 case MLX5_INLINE_MODE_IP: 2824 config->txq_inline_min = 2825 MLX5_INLINE_HSIZE_L3; 2826 goto exit; 2827 case MLX5_INLINE_MODE_TCP_UDP: 2828 config->txq_inline_min = 2829 MLX5_INLINE_HSIZE_L4; 2830 goto exit; 2831 case MLX5_INLINE_MODE_INNER_L2: 2832 config->txq_inline_min = 2833 MLX5_INLINE_HSIZE_INNER_L2; 2834 goto exit; 2835 case MLX5_INLINE_MODE_INNER_IP: 2836 config->txq_inline_min = 2837 MLX5_INLINE_HSIZE_INNER_L3; 2838 goto exit; 2839 case MLX5_INLINE_MODE_INNER_TCP_UDP: 2840 config->txq_inline_min = 2841 MLX5_INLINE_HSIZE_INNER_L4; 2842 goto exit; 2843 } 2844 } 2845 } 2846 if (priv->pci_dev == NULL) { 2847 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2848 goto exit; 2849 } 2850 /* 2851 * We get here if we are unable to deduce 2852 * inline data size with DevX. Try PCI ID 2853 * to determine old NICs. 2854 */ 2855 switch (priv->pci_dev->id.device_id) { 2856 case PCI_DEVICE_ID_MELLANOX_CONNECTX4: 2857 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 2858 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX: 2859 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF: 2860 config->txq_inline_min = MLX5_INLINE_HSIZE_L2; 2861 config->hw_vlan_insert = 0; 2862 break; 2863 case PCI_DEVICE_ID_MELLANOX_CONNECTX5: 2864 case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: 2865 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX: 2866 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: 2867 /* 2868 * These NICs support VLAN insertion from WQE and 2869 * report the wqe_vlan_insert flag. But there is the bug 2870 * and PFC control may be broken, so disable feature. 2871 */ 2872 config->hw_vlan_insert = 0; 2873 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2874 break; 2875 default: 2876 config->txq_inline_min = MLX5_INLINE_HSIZE_NONE; 2877 break; 2878 } 2879 exit: 2880 DRV_LOG(DEBUG, "min tx inline configured: %d", config->txq_inline_min); 2881 } 2882 2883 /** 2884 * Configures the metadata mask fields in the shared context. 2885 * 2886 * @param [in] dev 2887 * Pointer to Ethernet device. 2888 */ 2889 void 2890 mlx5_set_metadata_mask(struct rte_eth_dev *dev) 2891 { 2892 struct mlx5_priv *priv = dev->data->dev_private; 2893 struct mlx5_dev_ctx_shared *sh = priv->sh; 2894 uint32_t meta, mark, reg_c0; 2895 2896 reg_c0 = ~priv->vport_meta_mask; 2897 switch (sh->config.dv_xmeta_en) { 2898 case MLX5_XMETA_MODE_LEGACY: 2899 meta = UINT32_MAX; 2900 mark = MLX5_FLOW_MARK_MASK; 2901 break; 2902 case MLX5_XMETA_MODE_META16: 2903 meta = reg_c0 >> rte_bsf32(reg_c0); 2904 mark = MLX5_FLOW_MARK_MASK; 2905 break; 2906 case MLX5_XMETA_MODE_META32: 2907 meta = UINT32_MAX; 2908 mark = (reg_c0 >> rte_bsf32(reg_c0)) & MLX5_FLOW_MARK_MASK; 2909 break; 2910 case MLX5_XMETA_MODE_META32_HWS: 2911 meta = UINT32_MAX; 2912 mark = MLX5_FLOW_MARK_MASK; 2913 break; 2914 default: 2915 meta = 0; 2916 mark = 0; 2917 MLX5_ASSERT(false); 2918 break; 2919 } 2920 if (sh->dv_mark_mask && sh->dv_mark_mask != mark) 2921 DRV_LOG(WARNING, "metadata MARK mask mismatch %08X:%08X", 2922 sh->dv_mark_mask, mark); 2923 else 2924 sh->dv_mark_mask = mark; 2925 if (sh->dv_meta_mask && sh->dv_meta_mask != meta) 2926 DRV_LOG(WARNING, "metadata META mask mismatch %08X:%08X", 2927 sh->dv_meta_mask, meta); 2928 else 2929 sh->dv_meta_mask = meta; 2930 if (sh->dv_regc0_mask && sh->dv_regc0_mask != reg_c0) 2931 DRV_LOG(WARNING, "metadata reg_c0 mask mismatch %08X:%08X", 2932 sh->dv_meta_mask, reg_c0); 2933 else 2934 sh->dv_regc0_mask = reg_c0; 2935 DRV_LOG(DEBUG, "metadata mode %u", sh->config.dv_xmeta_en); 2936 DRV_LOG(DEBUG, "metadata MARK mask %08X", sh->dv_mark_mask); 2937 DRV_LOG(DEBUG, "metadata META mask %08X", sh->dv_meta_mask); 2938 DRV_LOG(DEBUG, "metadata reg_c0 mask %08X", sh->dv_regc0_mask); 2939 } 2940 2941 int 2942 rte_pmd_mlx5_get_dyn_flag_names(char *names[], unsigned int n) 2943 { 2944 static const char *const dynf_names[] = { 2945 RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, 2946 RTE_MBUF_DYNFLAG_METADATA_NAME, 2947 RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME 2948 }; 2949 unsigned int i; 2950 2951 if (n < RTE_DIM(dynf_names)) 2952 return -ENOMEM; 2953 for (i = 0; i < RTE_DIM(dynf_names); i++) { 2954 if (names[i] == NULL) 2955 return -EINVAL; 2956 strcpy(names[i], dynf_names[i]); 2957 } 2958 return RTE_DIM(dynf_names); 2959 } 2960 2961 /** 2962 * Look for the ethernet device belonging to mlx5 driver. 2963 * 2964 * @param[in] port_id 2965 * port_id to start looking for device. 2966 * @param[in] odev 2967 * Pointer to the hint device. When device is being probed 2968 * the its siblings (master and preceding representors might 2969 * not have assigned driver yet (because the mlx5_os_pci_probe() 2970 * is not completed yet, for this case match on hint 2971 * device may be used to detect sibling device. 2972 * 2973 * @return 2974 * port_id of found device, RTE_MAX_ETHPORT if not found. 2975 */ 2976 uint16_t 2977 mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev) 2978 { 2979 while (port_id < RTE_MAX_ETHPORTS) { 2980 struct rte_eth_dev *dev = &rte_eth_devices[port_id]; 2981 2982 if (dev->state != RTE_ETH_DEV_UNUSED && 2983 dev->device && 2984 (dev->device == odev || 2985 (dev->device->driver && 2986 dev->device->driver->name && 2987 ((strcmp(dev->device->driver->name, 2988 MLX5_PCI_DRIVER_NAME) == 0) || 2989 (strcmp(dev->device->driver->name, 2990 MLX5_AUXILIARY_DRIVER_NAME) == 0))))) 2991 break; 2992 port_id++; 2993 } 2994 if (port_id >= RTE_MAX_ETHPORTS) 2995 return RTE_MAX_ETHPORTS; 2996 return port_id; 2997 } 2998 2999 /** 3000 * Callback to remove a device. 3001 * 3002 * This function removes all Ethernet devices belong to a given device. 3003 * 3004 * @param[in] cdev 3005 * Pointer to the generic device. 3006 * 3007 * @return 3008 * 0 on success, the function cannot fail. 3009 */ 3010 int 3011 mlx5_net_remove(struct mlx5_common_device *cdev) 3012 { 3013 uint16_t port_id; 3014 int ret = 0; 3015 3016 RTE_ETH_FOREACH_DEV_OF(port_id, cdev->dev) { 3017 /* 3018 * mlx5_dev_close() is not registered to secondary process, 3019 * call the close function explicitly for secondary process. 3020 */ 3021 if (rte_eal_process_type() == RTE_PROC_SECONDARY) 3022 ret |= mlx5_dev_close(&rte_eth_devices[port_id]); 3023 else 3024 ret |= rte_eth_dev_close(port_id); 3025 } 3026 return ret == 0 ? 0 : -EIO; 3027 } 3028 3029 static const struct rte_pci_id mlx5_pci_id_map[] = { 3030 { 3031 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3032 PCI_DEVICE_ID_MELLANOX_CONNECTX4) 3033 }, 3034 { 3035 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3036 PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) 3037 }, 3038 { 3039 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3040 PCI_DEVICE_ID_MELLANOX_CONNECTX4LX) 3041 }, 3042 { 3043 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3044 PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) 3045 }, 3046 { 3047 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3048 PCI_DEVICE_ID_MELLANOX_CONNECTX5) 3049 }, 3050 { 3051 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3052 PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) 3053 }, 3054 { 3055 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3056 PCI_DEVICE_ID_MELLANOX_CONNECTX5EX) 3057 }, 3058 { 3059 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3060 PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF) 3061 }, 3062 { 3063 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3064 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) 3065 }, 3066 { 3067 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3068 PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF) 3069 }, 3070 { 3071 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3072 PCI_DEVICE_ID_MELLANOX_CONNECTX6) 3073 }, 3074 { 3075 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3076 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF) 3077 }, 3078 { 3079 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3080 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX) 3081 }, 3082 { 3083 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3084 PCI_DEVICE_ID_MELLANOX_CONNECTXVF) 3085 }, 3086 { 3087 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3088 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF) 3089 }, 3090 { 3091 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3092 PCI_DEVICE_ID_MELLANOX_CONNECTX6LX) 3093 }, 3094 { 3095 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3096 PCI_DEVICE_ID_MELLANOX_CONNECTX7) 3097 }, 3098 { 3099 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 3100 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF) 3101 }, 3102 { 3103 .vendor_id = 0 3104 } 3105 }; 3106 3107 static struct mlx5_class_driver mlx5_net_driver = { 3108 .drv_class = MLX5_CLASS_ETH, 3109 .name = RTE_STR(MLX5_ETH_DRIVER_NAME), 3110 .id_table = mlx5_pci_id_map, 3111 .probe = mlx5_os_net_probe, 3112 .remove = mlx5_net_remove, 3113 .probe_again = 1, 3114 .intr_lsc = 1, 3115 .intr_rmv = 1, 3116 }; 3117 3118 /* Initialize driver log type. */ 3119 RTE_LOG_REGISTER_DEFAULT(mlx5_logtype, NOTICE) 3120 3121 /** 3122 * Driver initialization routine. 3123 */ 3124 RTE_INIT(rte_mlx5_pmd_init) 3125 { 3126 pthread_mutex_init(&mlx5_dev_ctx_list_mutex, NULL); 3127 mlx5_common_init(); 3128 /* Build the static tables for Verbs conversion. */ 3129 mlx5_set_ptype_table(); 3130 mlx5_set_cksum_table(); 3131 mlx5_set_swp_types_table(); 3132 if (mlx5_glue) 3133 mlx5_class_driver_register(&mlx5_net_driver); 3134 } 3135 3136 RTE_PMD_EXPORT_NAME(MLX5_ETH_DRIVER_NAME, __COUNTER__); 3137 RTE_PMD_REGISTER_PCI_TABLE(MLX5_ETH_DRIVER_NAME, mlx5_pci_id_map); 3138 RTE_PMD_REGISTER_KMOD_DEP(MLX5_ETH_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib"); 3139