1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2020 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <unistd.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <errno.h> 12 #include <net/if.h> 13 #include <sys/mman.h> 14 #include <linux/rtnetlink.h> 15 #include <linux/sockios.h> 16 #include <linux/ethtool.h> 17 #include <fcntl.h> 18 19 /* Verbs header. */ 20 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 21 #ifdef PEDANTIC 22 #pragma GCC diagnostic ignored "-Wpedantic" 23 #endif 24 #include <infiniband/verbs.h> 25 #ifdef PEDANTIC 26 #pragma GCC diagnostic error "-Wpedantic" 27 #endif 28 29 #include <rte_malloc.h> 30 #include <rte_ethdev_driver.h> 31 #include <rte_ethdev_pci.h> 32 #include <rte_pci.h> 33 #include <rte_bus_pci.h> 34 #include <rte_common.h> 35 #include <rte_kvargs.h> 36 #include <rte_rwlock.h> 37 #include <rte_spinlock.h> 38 #include <rte_string_fns.h> 39 #include <rte_alarm.h> 40 41 #include <mlx5_glue.h> 42 #include <mlx5_devx_cmds.h> 43 #include <mlx5_common.h> 44 #include <mlx5_common_mp.h> 45 #include <mlx5_common_mr.h> 46 #include <mlx5_malloc.h> 47 48 #include "mlx5_defs.h" 49 #include "mlx5.h" 50 #include "mlx5_common_os.h" 51 #include "mlx5_utils.h" 52 #include "mlx5_rxtx.h" 53 #include "mlx5_autoconf.h" 54 #include "mlx5_mr.h" 55 #include "mlx5_flow.h" 56 #include "rte_pmd_mlx5.h" 57 #include "mlx5_verbs.h" 58 59 #define MLX5_TAGS_HLIST_ARRAY_SIZE 8192 60 61 #ifndef HAVE_IBV_MLX5_MOD_MPW 62 #define MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED (1 << 2) 63 #define MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW (1 << 3) 64 #endif 65 66 #ifndef HAVE_IBV_MLX5_MOD_CQE_128B_COMP 67 #define MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP (1 << 4) 68 #endif 69 70 /** 71 * Get mlx5 device attributes. The glue function query_device_ex() is called 72 * with out parameter of type 'struct ibv_device_attr_ex *'. Then fill in mlx5 73 * device attributes from the glue out parameter. 74 * 75 * @param dev 76 * Pointer to ibv context. 77 * 78 * @param device_attr 79 * Pointer to mlx5 device attributes. 80 * 81 * @return 82 * 0 on success, non zero error number otherwise 83 */ 84 int 85 mlx5_os_get_dev_attr(void *ctx, struct mlx5_dev_attr *device_attr) 86 { 87 int err; 88 struct ibv_device_attr_ex attr_ex; 89 memset(device_attr, 0, sizeof(*device_attr)); 90 err = mlx5_glue->query_device_ex(ctx, NULL, &attr_ex); 91 if (err) 92 return err; 93 94 device_attr->device_cap_flags_ex = attr_ex.device_cap_flags_ex; 95 device_attr->max_qp_wr = attr_ex.orig_attr.max_qp_wr; 96 device_attr->max_sge = attr_ex.orig_attr.max_sge; 97 device_attr->max_cq = attr_ex.orig_attr.max_cq; 98 device_attr->max_qp = attr_ex.orig_attr.max_qp; 99 device_attr->raw_packet_caps = attr_ex.raw_packet_caps; 100 device_attr->max_rwq_indirection_table_size = 101 attr_ex.rss_caps.max_rwq_indirection_table_size; 102 device_attr->max_tso = attr_ex.tso_caps.max_tso; 103 device_attr->tso_supported_qpts = attr_ex.tso_caps.supported_qpts; 104 105 struct mlx5dv_context dv_attr = { .comp_mask = 0 }; 106 err = mlx5_glue->dv_query_device(ctx, &dv_attr); 107 if (err) 108 return err; 109 110 device_attr->flags = dv_attr.flags; 111 device_attr->comp_mask = dv_attr.comp_mask; 112 #ifdef HAVE_IBV_MLX5_MOD_SWP 113 device_attr->sw_parsing_offloads = 114 dv_attr.sw_parsing_caps.sw_parsing_offloads; 115 #endif 116 device_attr->min_single_stride_log_num_of_bytes = 117 dv_attr.striding_rq_caps.min_single_stride_log_num_of_bytes; 118 device_attr->max_single_stride_log_num_of_bytes = 119 dv_attr.striding_rq_caps.max_single_stride_log_num_of_bytes; 120 device_attr->min_single_wqe_log_num_of_strides = 121 dv_attr.striding_rq_caps.min_single_wqe_log_num_of_strides; 122 device_attr->max_single_wqe_log_num_of_strides = 123 dv_attr.striding_rq_caps.max_single_wqe_log_num_of_strides; 124 device_attr->stride_supported_qpts = 125 dv_attr.striding_rq_caps.supported_qpts; 126 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 127 device_attr->tunnel_offloads_caps = dv_attr.tunnel_offloads_caps; 128 #endif 129 130 return err; 131 } 132 133 /** 134 * Verbs callback to allocate a memory. This function should allocate the space 135 * according to the size provided residing inside a huge page. 136 * Please note that all allocation must respect the alignment from libmlx5 137 * (i.e. currently sysconf(_SC_PAGESIZE)). 138 * 139 * @param[in] size 140 * The size in bytes of the memory to allocate. 141 * @param[in] data 142 * A pointer to the callback data. 143 * 144 * @return 145 * Allocated buffer, NULL otherwise and rte_errno is set. 146 */ 147 static void * 148 mlx5_alloc_verbs_buf(size_t size, void *data) 149 { 150 struct mlx5_priv *priv = data; 151 void *ret; 152 size_t alignment = sysconf(_SC_PAGESIZE); 153 unsigned int socket = SOCKET_ID_ANY; 154 155 if (priv->verbs_alloc_ctx.type == MLX5_VERBS_ALLOC_TYPE_TX_QUEUE) { 156 const struct mlx5_txq_ctrl *ctrl = priv->verbs_alloc_ctx.obj; 157 158 socket = ctrl->socket; 159 } else if (priv->verbs_alloc_ctx.type == 160 MLX5_VERBS_ALLOC_TYPE_RX_QUEUE) { 161 const struct mlx5_rxq_ctrl *ctrl = priv->verbs_alloc_ctx.obj; 162 163 socket = ctrl->socket; 164 } 165 MLX5_ASSERT(data != NULL); 166 ret = mlx5_malloc(0, size, alignment, socket); 167 if (!ret && size) 168 rte_errno = ENOMEM; 169 return ret; 170 } 171 172 /** 173 * Verbs callback to free a memory. 174 * 175 * @param[in] ptr 176 * A pointer to the memory to free. 177 * @param[in] data 178 * A pointer to the callback data. 179 */ 180 static void 181 mlx5_free_verbs_buf(void *ptr, void *data __rte_unused) 182 { 183 MLX5_ASSERT(data != NULL); 184 mlx5_free(ptr); 185 } 186 187 /** 188 * Initialize DR related data within private structure. 189 * Routine checks the reference counter and does actual 190 * resources creation/initialization only if counter is zero. 191 * 192 * @param[in] priv 193 * Pointer to the private device data structure. 194 * 195 * @return 196 * Zero on success, positive error code otherwise. 197 */ 198 static int 199 mlx5_alloc_shared_dr(struct mlx5_priv *priv) 200 { 201 struct mlx5_dev_ctx_shared *sh = priv->sh; 202 char s[MLX5_HLIST_NAMESIZE]; 203 int err = 0; 204 205 if (!sh->flow_tbls) 206 err = mlx5_alloc_table_hash_list(priv); 207 else 208 DRV_LOG(DEBUG, "sh->flow_tbls[%p] already created, reuse\n", 209 (void *)sh->flow_tbls); 210 if (err) 211 return err; 212 /* Create tags hash list table. */ 213 snprintf(s, sizeof(s), "%s_tags", sh->ibdev_name); 214 sh->tag_table = mlx5_hlist_create(s, MLX5_TAGS_HLIST_ARRAY_SIZE); 215 if (!sh->tag_table) { 216 DRV_LOG(ERR, "tags with hash creation failed."); 217 err = ENOMEM; 218 goto error; 219 } 220 #ifdef HAVE_MLX5DV_DR 221 void *domain; 222 223 if (sh->dv_refcnt) { 224 /* Shared DV/DR structures is already initialized. */ 225 sh->dv_refcnt++; 226 priv->dr_shared = 1; 227 return 0; 228 } 229 /* Reference counter is zero, we should initialize structures. */ 230 domain = mlx5_glue->dr_create_domain(sh->ctx, 231 MLX5DV_DR_DOMAIN_TYPE_NIC_RX); 232 if (!domain) { 233 DRV_LOG(ERR, "ingress mlx5dv_dr_create_domain failed"); 234 err = errno; 235 goto error; 236 } 237 sh->rx_domain = domain; 238 domain = mlx5_glue->dr_create_domain(sh->ctx, 239 MLX5DV_DR_DOMAIN_TYPE_NIC_TX); 240 if (!domain) { 241 DRV_LOG(ERR, "egress mlx5dv_dr_create_domain failed"); 242 err = errno; 243 goto error; 244 } 245 pthread_mutex_init(&sh->dv_mutex, NULL); 246 sh->tx_domain = domain; 247 #ifdef HAVE_MLX5DV_DR_ESWITCH 248 if (priv->config.dv_esw_en) { 249 domain = mlx5_glue->dr_create_domain 250 (sh->ctx, MLX5DV_DR_DOMAIN_TYPE_FDB); 251 if (!domain) { 252 DRV_LOG(ERR, "FDB mlx5dv_dr_create_domain failed"); 253 err = errno; 254 goto error; 255 } 256 sh->fdb_domain = domain; 257 sh->esw_drop_action = mlx5_glue->dr_create_flow_action_drop(); 258 } 259 #endif 260 if (priv->config.reclaim_mode == MLX5_RCM_AGGR) { 261 mlx5_glue->dr_reclaim_domain_memory(sh->rx_domain, 1); 262 mlx5_glue->dr_reclaim_domain_memory(sh->tx_domain, 1); 263 if (sh->fdb_domain) 264 mlx5_glue->dr_reclaim_domain_memory(sh->fdb_domain, 1); 265 } 266 sh->pop_vlan_action = mlx5_glue->dr_create_flow_action_pop_vlan(); 267 #endif /* HAVE_MLX5DV_DR */ 268 sh->dv_refcnt++; 269 priv->dr_shared = 1; 270 return 0; 271 error: 272 /* Rollback the created objects. */ 273 if (sh->rx_domain) { 274 mlx5_glue->dr_destroy_domain(sh->rx_domain); 275 sh->rx_domain = NULL; 276 } 277 if (sh->tx_domain) { 278 mlx5_glue->dr_destroy_domain(sh->tx_domain); 279 sh->tx_domain = NULL; 280 } 281 if (sh->fdb_domain) { 282 mlx5_glue->dr_destroy_domain(sh->fdb_domain); 283 sh->fdb_domain = NULL; 284 } 285 if (sh->esw_drop_action) { 286 mlx5_glue->destroy_flow_action(sh->esw_drop_action); 287 sh->esw_drop_action = NULL; 288 } 289 if (sh->pop_vlan_action) { 290 mlx5_glue->destroy_flow_action(sh->pop_vlan_action); 291 sh->pop_vlan_action = NULL; 292 } 293 if (sh->tag_table) { 294 /* tags should be destroyed with flow before. */ 295 mlx5_hlist_destroy(sh->tag_table, NULL, NULL); 296 sh->tag_table = NULL; 297 } 298 mlx5_free_table_hash_list(priv); 299 return err; 300 } 301 302 /** 303 * Destroy DR related data within private structure. 304 * 305 * @param[in] priv 306 * Pointer to the private device data structure. 307 */ 308 void 309 mlx5_os_free_shared_dr(struct mlx5_priv *priv) 310 { 311 struct mlx5_dev_ctx_shared *sh; 312 313 if (!priv->dr_shared) 314 return; 315 priv->dr_shared = 0; 316 sh = priv->sh; 317 MLX5_ASSERT(sh); 318 #ifdef HAVE_MLX5DV_DR 319 MLX5_ASSERT(sh->dv_refcnt); 320 if (sh->dv_refcnt && --sh->dv_refcnt) 321 return; 322 if (sh->rx_domain) { 323 mlx5_glue->dr_destroy_domain(sh->rx_domain); 324 sh->rx_domain = NULL; 325 } 326 if (sh->tx_domain) { 327 mlx5_glue->dr_destroy_domain(sh->tx_domain); 328 sh->tx_domain = NULL; 329 } 330 #ifdef HAVE_MLX5DV_DR_ESWITCH 331 if (sh->fdb_domain) { 332 mlx5_glue->dr_destroy_domain(sh->fdb_domain); 333 sh->fdb_domain = NULL; 334 } 335 if (sh->esw_drop_action) { 336 mlx5_glue->destroy_flow_action(sh->esw_drop_action); 337 sh->esw_drop_action = NULL; 338 } 339 #endif 340 if (sh->pop_vlan_action) { 341 mlx5_glue->destroy_flow_action(sh->pop_vlan_action); 342 sh->pop_vlan_action = NULL; 343 } 344 pthread_mutex_destroy(&sh->dv_mutex); 345 #endif /* HAVE_MLX5DV_DR */ 346 if (sh->tag_table) { 347 /* tags should be destroyed with flow before. */ 348 mlx5_hlist_destroy(sh->tag_table, NULL, NULL); 349 sh->tag_table = NULL; 350 } 351 mlx5_free_table_hash_list(priv); 352 } 353 354 /** 355 * Spawn an Ethernet device from Verbs information. 356 * 357 * @param dpdk_dev 358 * Backing DPDK device. 359 * @param spawn 360 * Verbs device parameters (name, port, switch_info) to spawn. 361 * @param config 362 * Device configuration parameters. 363 * 364 * @return 365 * A valid Ethernet device object on success, NULL otherwise and rte_errno 366 * is set. The following errors are defined: 367 * 368 * EBUSY: device is not supposed to be spawned. 369 * EEXIST: device is already spawned 370 */ 371 static struct rte_eth_dev * 372 mlx5_dev_spawn(struct rte_device *dpdk_dev, 373 struct mlx5_dev_spawn_data *spawn, 374 struct mlx5_dev_config config) 375 { 376 const struct mlx5_switch_info *switch_info = &spawn->info; 377 struct mlx5_dev_ctx_shared *sh = NULL; 378 struct ibv_port_attr port_attr; 379 struct mlx5dv_context dv_attr = { .comp_mask = 0 }; 380 struct rte_eth_dev *eth_dev = NULL; 381 struct mlx5_priv *priv = NULL; 382 int err = 0; 383 unsigned int hw_padding = 0; 384 unsigned int mps; 385 unsigned int cqe_comp; 386 unsigned int cqe_pad = 0; 387 unsigned int tunnel_en = 0; 388 unsigned int mpls_en = 0; 389 unsigned int swp = 0; 390 unsigned int mprq = 0; 391 unsigned int mprq_min_stride_size_n = 0; 392 unsigned int mprq_max_stride_size_n = 0; 393 unsigned int mprq_min_stride_num_n = 0; 394 unsigned int mprq_max_stride_num_n = 0; 395 struct rte_ether_addr mac; 396 char name[RTE_ETH_NAME_MAX_LEN]; 397 int own_domain_id = 0; 398 uint16_t port_id; 399 unsigned int i; 400 #ifdef HAVE_MLX5DV_DR_DEVX_PORT 401 struct mlx5dv_devx_port devx_port = { .comp_mask = 0 }; 402 #endif 403 404 /* Determine if this port representor is supposed to be spawned. */ 405 if (switch_info->representor && dpdk_dev->devargs) { 406 struct rte_eth_devargs eth_da; 407 408 err = rte_eth_devargs_parse(dpdk_dev->devargs->args, ð_da); 409 if (err) { 410 rte_errno = -err; 411 DRV_LOG(ERR, "failed to process device arguments: %s", 412 strerror(rte_errno)); 413 return NULL; 414 } 415 for (i = 0; i < eth_da.nb_representor_ports; ++i) 416 if (eth_da.representor_ports[i] == 417 (uint16_t)switch_info->port_name) 418 break; 419 if (i == eth_da.nb_representor_ports) { 420 rte_errno = EBUSY; 421 return NULL; 422 } 423 } 424 /* Build device name. */ 425 if (spawn->pf_bond < 0) { 426 /* Single device. */ 427 if (!switch_info->representor) 428 strlcpy(name, dpdk_dev->name, sizeof(name)); 429 else 430 snprintf(name, sizeof(name), "%s_representor_%u", 431 dpdk_dev->name, switch_info->port_name); 432 } else { 433 /* Bonding device. */ 434 if (!switch_info->representor) 435 snprintf(name, sizeof(name), "%s_%s", 436 dpdk_dev->name, 437 mlx5_os_get_dev_device_name(spawn->phys_dev)); 438 else 439 snprintf(name, sizeof(name), "%s_%s_representor_%u", 440 dpdk_dev->name, 441 mlx5_os_get_dev_device_name(spawn->phys_dev), 442 switch_info->port_name); 443 } 444 /* check if the device is already spawned */ 445 if (rte_eth_dev_get_port_by_name(name, &port_id) == 0) { 446 rte_errno = EEXIST; 447 return NULL; 448 } 449 DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name); 450 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 451 struct mlx5_mp_id mp_id; 452 453 eth_dev = rte_eth_dev_attach_secondary(name); 454 if (eth_dev == NULL) { 455 DRV_LOG(ERR, "can not attach rte ethdev"); 456 rte_errno = ENOMEM; 457 return NULL; 458 } 459 eth_dev->device = dpdk_dev; 460 eth_dev->dev_ops = &mlx5_os_dev_sec_ops; 461 err = mlx5_proc_priv_init(eth_dev); 462 if (err) 463 return NULL; 464 mp_id.port_id = eth_dev->data->port_id; 465 strlcpy(mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN); 466 /* Receive command fd from primary process */ 467 err = mlx5_mp_req_verbs_cmd_fd(&mp_id); 468 if (err < 0) 469 goto err_secondary; 470 /* Remap UAR for Tx queues. */ 471 err = mlx5_tx_uar_init_secondary(eth_dev, err); 472 if (err) 473 goto err_secondary; 474 /* 475 * Ethdev pointer is still required as input since 476 * the primary device is not accessible from the 477 * secondary process. 478 */ 479 eth_dev->rx_pkt_burst = mlx5_select_rx_function(eth_dev); 480 eth_dev->tx_pkt_burst = mlx5_select_tx_function(eth_dev); 481 return eth_dev; 482 err_secondary: 483 mlx5_dev_close(eth_dev); 484 return NULL; 485 } 486 /* 487 * Some parameters ("tx_db_nc" in particularly) are needed in 488 * advance to create dv/verbs device context. We proceed the 489 * devargs here to get ones, and later proceed devargs again 490 * to override some hardware settings. 491 */ 492 err = mlx5_args(&config, dpdk_dev->devargs); 493 if (err) { 494 err = rte_errno; 495 DRV_LOG(ERR, "failed to process device arguments: %s", 496 strerror(rte_errno)); 497 goto error; 498 } 499 mlx5_malloc_mem_select(config.sys_mem_en); 500 sh = mlx5_alloc_shared_dev_ctx(spawn, &config); 501 if (!sh) 502 return NULL; 503 config.devx = sh->devx; 504 #ifdef HAVE_MLX5DV_DR_ACTION_DEST_DEVX_TIR 505 config.dest_tir = 1; 506 #endif 507 #ifdef HAVE_IBV_MLX5_MOD_SWP 508 dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_SWP; 509 #endif 510 /* 511 * Multi-packet send is supported by ConnectX-4 Lx PF as well 512 * as all ConnectX-5 devices. 513 */ 514 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 515 dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS; 516 #endif 517 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 518 dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_STRIDING_RQ; 519 #endif 520 mlx5_glue->dv_query_device(sh->ctx, &dv_attr); 521 if (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) { 522 if (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) { 523 DRV_LOG(DEBUG, "enhanced MPW is supported"); 524 mps = MLX5_MPW_ENHANCED; 525 } else { 526 DRV_LOG(DEBUG, "MPW is supported"); 527 mps = MLX5_MPW; 528 } 529 } else { 530 DRV_LOG(DEBUG, "MPW isn't supported"); 531 mps = MLX5_MPW_DISABLED; 532 } 533 #ifdef HAVE_IBV_MLX5_MOD_SWP 534 if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_SWP) 535 swp = dv_attr.sw_parsing_caps.sw_parsing_offloads; 536 DRV_LOG(DEBUG, "SWP support: %u", swp); 537 #endif 538 config.swp = !!swp; 539 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 540 if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_STRIDING_RQ) { 541 struct mlx5dv_striding_rq_caps mprq_caps = 542 dv_attr.striding_rq_caps; 543 544 DRV_LOG(DEBUG, "\tmin_single_stride_log_num_of_bytes: %d", 545 mprq_caps.min_single_stride_log_num_of_bytes); 546 DRV_LOG(DEBUG, "\tmax_single_stride_log_num_of_bytes: %d", 547 mprq_caps.max_single_stride_log_num_of_bytes); 548 DRV_LOG(DEBUG, "\tmin_single_wqe_log_num_of_strides: %d", 549 mprq_caps.min_single_wqe_log_num_of_strides); 550 DRV_LOG(DEBUG, "\tmax_single_wqe_log_num_of_strides: %d", 551 mprq_caps.max_single_wqe_log_num_of_strides); 552 DRV_LOG(DEBUG, "\tsupported_qpts: %d", 553 mprq_caps.supported_qpts); 554 DRV_LOG(DEBUG, "device supports Multi-Packet RQ"); 555 mprq = 1; 556 mprq_min_stride_size_n = 557 mprq_caps.min_single_stride_log_num_of_bytes; 558 mprq_max_stride_size_n = 559 mprq_caps.max_single_stride_log_num_of_bytes; 560 mprq_min_stride_num_n = 561 mprq_caps.min_single_wqe_log_num_of_strides; 562 mprq_max_stride_num_n = 563 mprq_caps.max_single_wqe_log_num_of_strides; 564 } 565 #endif 566 if (RTE_CACHE_LINE_SIZE == 128 && 567 !(dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP)) 568 cqe_comp = 0; 569 else 570 cqe_comp = 1; 571 config.cqe_comp = cqe_comp; 572 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD 573 /* Whether device supports 128B Rx CQE padding. */ 574 cqe_pad = RTE_CACHE_LINE_SIZE == 128 && 575 (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_PAD); 576 #endif 577 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 578 if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS) { 579 tunnel_en = ((dv_attr.tunnel_offloads_caps & 580 MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_VXLAN) && 581 (dv_attr.tunnel_offloads_caps & 582 MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GRE) && 583 (dv_attr.tunnel_offloads_caps & 584 MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GENEVE)); 585 } 586 DRV_LOG(DEBUG, "tunnel offloading is %ssupported", 587 tunnel_en ? "" : "not "); 588 #else 589 DRV_LOG(WARNING, 590 "tunnel offloading disabled due to old OFED/rdma-core version"); 591 #endif 592 config.tunnel_en = tunnel_en; 593 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 594 mpls_en = ((dv_attr.tunnel_offloads_caps & 595 MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_GRE) && 596 (dv_attr.tunnel_offloads_caps & 597 MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_UDP)); 598 DRV_LOG(DEBUG, "MPLS over GRE/UDP tunnel offloading is %ssupported", 599 mpls_en ? "" : "not "); 600 #else 601 DRV_LOG(WARNING, "MPLS over GRE/UDP tunnel offloading disabled due to" 602 " old OFED/rdma-core version or firmware configuration"); 603 #endif 604 config.mpls_en = mpls_en; 605 /* Check port status. */ 606 err = mlx5_glue->query_port(sh->ctx, spawn->phys_port, &port_attr); 607 if (err) { 608 DRV_LOG(ERR, "port query failed: %s", strerror(err)); 609 goto error; 610 } 611 if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) { 612 DRV_LOG(ERR, "port is not configured in Ethernet mode"); 613 err = EINVAL; 614 goto error; 615 } 616 if (port_attr.state != IBV_PORT_ACTIVE) 617 DRV_LOG(DEBUG, "port is not active: \"%s\" (%d)", 618 mlx5_glue->port_state_str(port_attr.state), 619 port_attr.state); 620 /* Allocate private eth device data. */ 621 priv = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, 622 sizeof(*priv), 623 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 624 if (priv == NULL) { 625 DRV_LOG(ERR, "priv allocation failure"); 626 err = ENOMEM; 627 goto error; 628 } 629 priv->sh = sh; 630 priv->dev_port = spawn->phys_port; 631 priv->pci_dev = spawn->pci_dev; 632 priv->mtu = RTE_ETHER_MTU; 633 priv->mp_id.port_id = port_id; 634 strlcpy(priv->mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN); 635 /* Some internal functions rely on Netlink sockets, open them now. */ 636 priv->nl_socket_rdma = mlx5_nl_init(NETLINK_RDMA); 637 priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE); 638 priv->representor = !!switch_info->representor; 639 priv->master = !!switch_info->master; 640 priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; 641 priv->vport_meta_tag = 0; 642 priv->vport_meta_mask = 0; 643 priv->pf_bond = spawn->pf_bond; 644 #ifdef HAVE_MLX5DV_DR_DEVX_PORT 645 /* 646 * The DevX port query API is implemented. E-Switch may use 647 * either vport or reg_c[0] metadata register to match on 648 * vport index. The engaged part of metadata register is 649 * defined by mask. 650 */ 651 if (switch_info->representor || switch_info->master) { 652 devx_port.comp_mask = MLX5DV_DEVX_PORT_VPORT | 653 MLX5DV_DEVX_PORT_MATCH_REG_C_0; 654 err = mlx5_glue->devx_port_query(sh->ctx, spawn->phys_port, 655 &devx_port); 656 if (err) { 657 DRV_LOG(WARNING, 658 "can't query devx port %d on device %s", 659 spawn->phys_port, 660 mlx5_os_get_dev_device_name(spawn->phys_dev)); 661 devx_port.comp_mask = 0; 662 } 663 } 664 if (devx_port.comp_mask & MLX5DV_DEVX_PORT_MATCH_REG_C_0) { 665 priv->vport_meta_tag = devx_port.reg_c_0.value; 666 priv->vport_meta_mask = devx_port.reg_c_0.mask; 667 if (!priv->vport_meta_mask) { 668 DRV_LOG(ERR, "vport zero mask for port %d" 669 " on bonding device %s", 670 spawn->phys_port, 671 mlx5_os_get_dev_device_name 672 (spawn->phys_dev)); 673 err = ENOTSUP; 674 goto error; 675 } 676 if (priv->vport_meta_tag & ~priv->vport_meta_mask) { 677 DRV_LOG(ERR, "invalid vport tag for port %d" 678 " on bonding device %s", 679 spawn->phys_port, 680 mlx5_os_get_dev_device_name 681 (spawn->phys_dev)); 682 err = ENOTSUP; 683 goto error; 684 } 685 } 686 if (devx_port.comp_mask & MLX5DV_DEVX_PORT_VPORT) { 687 priv->vport_id = devx_port.vport_num; 688 } else if (spawn->pf_bond >= 0) { 689 DRV_LOG(ERR, "can't deduce vport index for port %d" 690 " on bonding device %s", 691 spawn->phys_port, 692 mlx5_os_get_dev_device_name(spawn->phys_dev)); 693 err = ENOTSUP; 694 goto error; 695 } else { 696 /* Suppose vport index in compatible way. */ 697 priv->vport_id = switch_info->representor ? 698 switch_info->port_name + 1 : -1; 699 } 700 #else 701 /* 702 * Kernel/rdma_core support single E-Switch per PF configurations 703 * only and vport_id field contains the vport index for 704 * associated VF, which is deduced from representor port name. 705 * For example, let's have the IB device port 10, it has 706 * attached network device eth0, which has port name attribute 707 * pf0vf2, we can deduce the VF number as 2, and set vport index 708 * as 3 (2+1). This assigning schema should be changed if the 709 * multiple E-Switch instances per PF configurations or/and PCI 710 * subfunctions are added. 711 */ 712 priv->vport_id = switch_info->representor ? 713 switch_info->port_name + 1 : -1; 714 #endif 715 /* representor_id field keeps the unmodified VF index. */ 716 priv->representor_id = switch_info->representor ? 717 switch_info->port_name : -1; 718 /* 719 * Look for sibling devices in order to reuse their switch domain 720 * if any, otherwise allocate one. 721 */ 722 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 723 const struct mlx5_priv *opriv = 724 rte_eth_devices[port_id].data->dev_private; 725 726 if (!opriv || 727 opriv->sh != priv->sh || 728 opriv->domain_id == 729 RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) 730 continue; 731 priv->domain_id = opriv->domain_id; 732 break; 733 } 734 if (priv->domain_id == RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) { 735 err = rte_eth_switch_domain_alloc(&priv->domain_id); 736 if (err) { 737 err = rte_errno; 738 DRV_LOG(ERR, "unable to allocate switch domain: %s", 739 strerror(rte_errno)); 740 goto error; 741 } 742 own_domain_id = 1; 743 } 744 /* Override some values set by hardware configuration. */ 745 mlx5_args(&config, dpdk_dev->devargs); 746 err = mlx5_dev_check_sibling_config(priv, &config); 747 if (err) 748 goto error; 749 config.hw_csum = !!(sh->device_attr.device_cap_flags_ex & 750 IBV_DEVICE_RAW_IP_CSUM); 751 DRV_LOG(DEBUG, "checksum offloading is %ssupported", 752 (config.hw_csum ? "" : "not ")); 753 #if !defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) && \ 754 !defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45) 755 DRV_LOG(DEBUG, "counters are not supported"); 756 #endif 757 #if !defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_MLX5DV_DR) 758 if (config.dv_flow_en) { 759 DRV_LOG(WARNING, "DV flow is not supported"); 760 config.dv_flow_en = 0; 761 } 762 #endif 763 config.ind_table_max_size = 764 sh->device_attr.max_rwq_indirection_table_size; 765 /* 766 * Remove this check once DPDK supports larger/variable 767 * indirection tables. 768 */ 769 if (config.ind_table_max_size > (unsigned int)ETH_RSS_RETA_SIZE_512) 770 config.ind_table_max_size = ETH_RSS_RETA_SIZE_512; 771 DRV_LOG(DEBUG, "maximum Rx indirection table size is %u", 772 config.ind_table_max_size); 773 config.hw_vlan_strip = !!(sh->device_attr.raw_packet_caps & 774 IBV_RAW_PACKET_CAP_CVLAN_STRIPPING); 775 DRV_LOG(DEBUG, "VLAN stripping is %ssupported", 776 (config.hw_vlan_strip ? "" : "not ")); 777 config.hw_fcs_strip = !!(sh->device_attr.raw_packet_caps & 778 IBV_RAW_PACKET_CAP_SCATTER_FCS); 779 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING) 780 hw_padding = !!sh->device_attr.rx_pad_end_addr_align; 781 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING) 782 hw_padding = !!(sh->device_attr.device_cap_flags_ex & 783 IBV_DEVICE_PCI_WRITE_END_PADDING); 784 #endif 785 if (config.hw_padding && !hw_padding) { 786 DRV_LOG(DEBUG, "Rx end alignment padding isn't supported"); 787 config.hw_padding = 0; 788 } else if (config.hw_padding) { 789 DRV_LOG(DEBUG, "Rx end alignment padding is enabled"); 790 } 791 config.tso = (sh->device_attr.max_tso > 0 && 792 (sh->device_attr.tso_supported_qpts & 793 (1 << IBV_QPT_RAW_PACKET))); 794 if (config.tso) 795 config.tso_max_payload_sz = sh->device_attr.max_tso; 796 /* 797 * MPW is disabled by default, while the Enhanced MPW is enabled 798 * by default. 799 */ 800 if (config.mps == MLX5_ARG_UNSET) 801 config.mps = (mps == MLX5_MPW_ENHANCED) ? MLX5_MPW_ENHANCED : 802 MLX5_MPW_DISABLED; 803 else 804 config.mps = config.mps ? mps : MLX5_MPW_DISABLED; 805 DRV_LOG(INFO, "%sMPS is %s", 806 config.mps == MLX5_MPW_ENHANCED ? "enhanced " : 807 config.mps == MLX5_MPW ? "legacy " : "", 808 config.mps != MLX5_MPW_DISABLED ? "enabled" : "disabled"); 809 if (config.cqe_comp && !cqe_comp) { 810 DRV_LOG(WARNING, "Rx CQE compression isn't supported"); 811 config.cqe_comp = 0; 812 } 813 if (config.cqe_pad && !cqe_pad) { 814 DRV_LOG(WARNING, "Rx CQE padding isn't supported"); 815 config.cqe_pad = 0; 816 } else if (config.cqe_pad) { 817 DRV_LOG(INFO, "Rx CQE padding is enabled"); 818 } 819 if (config.devx) { 820 priv->counter_fallback = 0; 821 err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config.hca_attr); 822 if (err) { 823 err = -err; 824 goto error; 825 } 826 if (!config.hca_attr.flow_counters_dump) 827 priv->counter_fallback = 1; 828 #ifndef HAVE_IBV_DEVX_ASYNC 829 priv->counter_fallback = 1; 830 #endif 831 if (priv->counter_fallback) 832 DRV_LOG(INFO, "Use fall-back DV counter management"); 833 /* Check for LRO support. */ 834 if (config.dest_tir && config.hca_attr.lro_cap && 835 config.dv_flow_en) { 836 /* TBD check tunnel lro caps. */ 837 config.lro.supported = config.hca_attr.lro_cap; 838 DRV_LOG(DEBUG, "Device supports LRO"); 839 /* 840 * If LRO timeout is not configured by application, 841 * use the minimal supported value. 842 */ 843 if (!config.lro.timeout) 844 config.lro.timeout = 845 config.hca_attr.lro_timer_supported_periods[0]; 846 DRV_LOG(DEBUG, "LRO session timeout set to %d usec", 847 config.lro.timeout); 848 } 849 #if defined(HAVE_MLX5DV_DR) && defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_METER) 850 if (config.hca_attr.qos.sup && config.hca_attr.qos.srtcm_sup && 851 config.dv_flow_en) { 852 uint8_t reg_c_mask = 853 config.hca_attr.qos.flow_meter_reg_c_ids; 854 /* 855 * Meter needs two REG_C's for color match and pre-sfx 856 * flow match. Here get the REG_C for color match. 857 * REG_C_0 and REG_C_1 is reserved for metadata feature. 858 */ 859 reg_c_mask &= 0xfc; 860 if (__builtin_popcount(reg_c_mask) < 1) { 861 priv->mtr_en = 0; 862 DRV_LOG(WARNING, "No available register for" 863 " meter."); 864 } else { 865 priv->mtr_color_reg = ffs(reg_c_mask) - 1 + 866 REG_C_0; 867 priv->mtr_en = 1; 868 priv->mtr_reg_share = 869 config.hca_attr.qos.flow_meter_reg_share; 870 DRV_LOG(DEBUG, "The REG_C meter uses is %d", 871 priv->mtr_color_reg); 872 } 873 } 874 #endif 875 } 876 if (config.tx_pp) { 877 DRV_LOG(DEBUG, "Timestamp counter frequency %u kHz", 878 config.hca_attr.dev_freq_khz); 879 DRV_LOG(DEBUG, "Packet pacing is %ssupported", 880 config.hca_attr.qos.packet_pacing ? "" : "not "); 881 DRV_LOG(DEBUG, "Cross channel ops are %ssupported", 882 config.hca_attr.cross_channel ? "" : "not "); 883 DRV_LOG(DEBUG, "WQE index ignore is %ssupported", 884 config.hca_attr.wqe_index_ignore ? "" : "not "); 885 DRV_LOG(DEBUG, "Non-wire SQ feature is %ssupported", 886 config.hca_attr.non_wire_sq ? "" : "not "); 887 DRV_LOG(DEBUG, "Static WQE SQ feature is %ssupported (%d)", 888 config.hca_attr.log_max_static_sq_wq ? "" : "not ", 889 config.hca_attr.log_max_static_sq_wq); 890 DRV_LOG(DEBUG, "WQE rate PP mode is %ssupported", 891 config.hca_attr.qos.wqe_rate_pp ? "" : "not "); 892 if (!config.devx) { 893 DRV_LOG(ERR, "DevX is required for packet pacing"); 894 err = ENODEV; 895 goto error; 896 } 897 if (!config.hca_attr.qos.packet_pacing) { 898 DRV_LOG(ERR, "Packet pacing is not supported"); 899 err = ENODEV; 900 goto error; 901 } 902 if (!config.hca_attr.cross_channel) { 903 DRV_LOG(ERR, "Cross channel operations are" 904 " required for packet pacing"); 905 err = ENODEV; 906 goto error; 907 } 908 if (!config.hca_attr.wqe_index_ignore) { 909 DRV_LOG(ERR, "WQE index ignore feature is" 910 " required for packet pacing"); 911 err = ENODEV; 912 goto error; 913 } 914 if (!config.hca_attr.non_wire_sq) { 915 DRV_LOG(ERR, "Non-wire SQ feature is" 916 " required for packet pacing"); 917 err = ENODEV; 918 goto error; 919 } 920 if (!config.hca_attr.log_max_static_sq_wq) { 921 DRV_LOG(ERR, "Static WQE SQ feature is" 922 " required for packet pacing"); 923 err = ENODEV; 924 goto error; 925 } 926 if (!config.hca_attr.qos.wqe_rate_pp) { 927 DRV_LOG(ERR, "WQE rate mode is required" 928 " for packet pacing"); 929 err = ENODEV; 930 goto error; 931 } 932 #ifndef HAVE_MLX5DV_DEVX_UAR_OFFSET 933 DRV_LOG(ERR, "DevX does not provide UAR offset," 934 " can't create queues for packet pacing"); 935 err = ENODEV; 936 goto error; 937 #endif 938 } 939 if (config.devx) { 940 uint32_t reg[MLX5_ST_SZ_DW(register_mtutc)]; 941 942 err = mlx5_devx_cmd_register_read 943 (sh->ctx, MLX5_REGISTER_ID_MTUTC, 0, 944 reg, MLX5_ST_SZ_DW(register_mtutc)); 945 if (!err) { 946 uint32_t ts_mode; 947 948 /* MTUTC register is read successfully. */ 949 ts_mode = MLX5_GET(register_mtutc, reg, 950 time_stamp_mode); 951 if (ts_mode == MLX5_MTUTC_TIMESTAMP_MODE_REAL_TIME) 952 config.rt_timestamp = 1; 953 } else { 954 /* Kernel does not support register reading. */ 955 if (config.hca_attr.dev_freq_khz == 956 (NS_PER_S / MS_PER_S)) 957 config.rt_timestamp = 1; 958 } 959 } 960 /* 961 * If HW has bug working with tunnel packet decapsulation and 962 * scatter FCS, and decapsulation is needed, clear the hw_fcs_strip 963 * bit. Then DEV_RX_OFFLOAD_KEEP_CRC bit will not be set anymore. 964 */ 965 if (config.hca_attr.scatter_fcs_w_decap_disable && config.decap_en) 966 config.hw_fcs_strip = 0; 967 DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported", 968 (config.hw_fcs_strip ? "" : "not ")); 969 if (config.mprq.enabled && mprq) { 970 if (config.mprq.stride_num_n && 971 (config.mprq.stride_num_n > mprq_max_stride_num_n || 972 config.mprq.stride_num_n < mprq_min_stride_num_n)) { 973 config.mprq.stride_num_n = 974 RTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_NUM_N, 975 mprq_min_stride_num_n), 976 mprq_max_stride_num_n); 977 DRV_LOG(WARNING, 978 "the number of strides" 979 " for Multi-Packet RQ is out of range," 980 " setting default value (%u)", 981 1 << config.mprq.stride_num_n); 982 } 983 if (config.mprq.stride_size_n && 984 (config.mprq.stride_size_n > mprq_max_stride_size_n || 985 config.mprq.stride_size_n < mprq_min_stride_size_n)) { 986 config.mprq.stride_size_n = 987 RTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_SIZE_N, 988 mprq_min_stride_size_n), 989 mprq_max_stride_size_n); 990 DRV_LOG(WARNING, 991 "the size of a stride" 992 " for Multi-Packet RQ is out of range," 993 " setting default value (%u)", 994 1 << config.mprq.stride_size_n); 995 } 996 config.mprq.min_stride_size_n = mprq_min_stride_size_n; 997 config.mprq.max_stride_size_n = mprq_max_stride_size_n; 998 } else if (config.mprq.enabled && !mprq) { 999 DRV_LOG(WARNING, "Multi-Packet RQ isn't supported"); 1000 config.mprq.enabled = 0; 1001 } 1002 if (config.max_dump_files_num == 0) 1003 config.max_dump_files_num = 128; 1004 eth_dev = rte_eth_dev_allocate(name); 1005 if (eth_dev == NULL) { 1006 DRV_LOG(ERR, "can not allocate rte ethdev"); 1007 err = ENOMEM; 1008 goto error; 1009 } 1010 /* Flag to call rte_eth_dev_release_port() in rte_eth_dev_close(). */ 1011 eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; 1012 if (priv->representor) { 1013 eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR; 1014 eth_dev->data->representor_id = priv->representor_id; 1015 } 1016 /* 1017 * Store associated network device interface index. This index 1018 * is permanent throughout the lifetime of device. So, we may store 1019 * the ifindex here and use the cached value further. 1020 */ 1021 MLX5_ASSERT(spawn->ifindex); 1022 priv->if_index = spawn->ifindex; 1023 eth_dev->data->dev_private = priv; 1024 priv->dev_data = eth_dev->data; 1025 eth_dev->data->mac_addrs = priv->mac; 1026 eth_dev->device = dpdk_dev; 1027 /* Configure the first MAC address by default. */ 1028 if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) { 1029 DRV_LOG(ERR, 1030 "port %u cannot get MAC address, is mlx5_en" 1031 " loaded? (errno: %s)", 1032 eth_dev->data->port_id, strerror(rte_errno)); 1033 err = ENODEV; 1034 goto error; 1035 } 1036 DRV_LOG(INFO, 1037 "port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", 1038 eth_dev->data->port_id, 1039 mac.addr_bytes[0], mac.addr_bytes[1], 1040 mac.addr_bytes[2], mac.addr_bytes[3], 1041 mac.addr_bytes[4], mac.addr_bytes[5]); 1042 #ifdef RTE_LIBRTE_MLX5_DEBUG 1043 { 1044 char ifname[IF_NAMESIZE]; 1045 1046 if (mlx5_get_ifname(eth_dev, &ifname) == 0) 1047 DRV_LOG(DEBUG, "port %u ifname is \"%s\"", 1048 eth_dev->data->port_id, ifname); 1049 else 1050 DRV_LOG(DEBUG, "port %u ifname is unknown", 1051 eth_dev->data->port_id); 1052 } 1053 #endif 1054 /* Get actual MTU if possible. */ 1055 err = mlx5_get_mtu(eth_dev, &priv->mtu); 1056 if (err) { 1057 err = rte_errno; 1058 goto error; 1059 } 1060 DRV_LOG(DEBUG, "port %u MTU is %u", eth_dev->data->port_id, 1061 priv->mtu); 1062 /* Initialize burst functions to prevent crashes before link-up. */ 1063 eth_dev->rx_pkt_burst = removed_rx_burst; 1064 eth_dev->tx_pkt_burst = removed_tx_burst; 1065 eth_dev->dev_ops = &mlx5_os_dev_ops; 1066 /* Register MAC address. */ 1067 claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0)); 1068 if (config.vf && config.vf_nl_en) 1069 mlx5_nl_mac_addr_sync(priv->nl_socket_route, 1070 mlx5_ifindex(eth_dev), 1071 eth_dev->data->mac_addrs, 1072 MLX5_MAX_MAC_ADDRESSES); 1073 priv->flows = 0; 1074 priv->ctrl_flows = 0; 1075 TAILQ_INIT(&priv->flow_meters); 1076 TAILQ_INIT(&priv->flow_meter_profiles); 1077 /* Hint libmlx5 to use PMD allocator for data plane resources */ 1078 mlx5_glue->dv_set_context_attr(sh->ctx, 1079 MLX5DV_CTX_ATTR_BUF_ALLOCATORS, 1080 (void *)((uintptr_t)&(struct mlx5dv_ctx_allocators){ 1081 .alloc = &mlx5_alloc_verbs_buf, 1082 .free = &mlx5_free_verbs_buf, 1083 .data = priv, 1084 })); 1085 /* Bring Ethernet device up. */ 1086 DRV_LOG(DEBUG, "port %u forcing Ethernet interface up", 1087 eth_dev->data->port_id); 1088 mlx5_set_link_up(eth_dev); 1089 /* 1090 * Even though the interrupt handler is not installed yet, 1091 * interrupts will still trigger on the async_fd from 1092 * Verbs context returned by ibv_open_device(). 1093 */ 1094 mlx5_link_update(eth_dev, 0); 1095 #ifdef HAVE_MLX5DV_DR_ESWITCH 1096 if (!(config.hca_attr.eswitch_manager && config.dv_flow_en && 1097 (switch_info->representor || switch_info->master))) 1098 config.dv_esw_en = 0; 1099 #else 1100 config.dv_esw_en = 0; 1101 #endif 1102 /* Detect minimal data bytes to inline. */ 1103 mlx5_set_min_inline(spawn, &config); 1104 /* Store device configuration on private structure. */ 1105 priv->config = config; 1106 /* Create context for virtual machine VLAN workaround. */ 1107 priv->vmwa_context = mlx5_vlan_vmwa_init(eth_dev, spawn->ifindex); 1108 if (config.dv_flow_en) { 1109 err = mlx5_alloc_shared_dr(priv); 1110 if (err) 1111 goto error; 1112 /* 1113 * RSS id is shared with meter flow id. Meter flow id can only 1114 * use the 24 MSB of the register. 1115 */ 1116 priv->qrss_id_pool = mlx5_flow_id_pool_alloc(UINT32_MAX >> 1117 MLX5_MTR_COLOR_BITS); 1118 if (!priv->qrss_id_pool) { 1119 DRV_LOG(ERR, "can't create flow id pool"); 1120 err = ENOMEM; 1121 goto error; 1122 } 1123 } 1124 /* Supported Verbs flow priority number detection. */ 1125 err = mlx5_flow_discover_priorities(eth_dev); 1126 if (err < 0) { 1127 err = -err; 1128 goto error; 1129 } 1130 priv->config.flow_prio = err; 1131 if (!priv->config.dv_esw_en && 1132 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { 1133 DRV_LOG(WARNING, "metadata mode %u is not supported " 1134 "(no E-Switch)", priv->config.dv_xmeta_en); 1135 priv->config.dv_xmeta_en = MLX5_XMETA_MODE_LEGACY; 1136 } 1137 mlx5_set_metadata_mask(eth_dev); 1138 if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 1139 !priv->sh->dv_regc0_mask) { 1140 DRV_LOG(ERR, "metadata mode %u is not supported " 1141 "(no metadata reg_c[0] is available)", 1142 priv->config.dv_xmeta_en); 1143 err = ENOTSUP; 1144 goto error; 1145 } 1146 /* 1147 * Allocate the buffer for flow creating, just once. 1148 * The allocation must be done before any flow creating. 1149 */ 1150 mlx5_flow_alloc_intermediate(eth_dev); 1151 /* Query availability of metadata reg_c's. */ 1152 err = mlx5_flow_discover_mreg_c(eth_dev); 1153 if (err < 0) { 1154 err = -err; 1155 goto error; 1156 } 1157 if (!mlx5_flow_ext_mreg_supported(eth_dev)) { 1158 DRV_LOG(DEBUG, 1159 "port %u extensive metadata register is not supported", 1160 eth_dev->data->port_id); 1161 if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { 1162 DRV_LOG(ERR, "metadata mode %u is not supported " 1163 "(no metadata registers available)", 1164 priv->config.dv_xmeta_en); 1165 err = ENOTSUP; 1166 goto error; 1167 } 1168 } 1169 if (priv->config.dv_flow_en && 1170 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 1171 mlx5_flow_ext_mreg_supported(eth_dev) && 1172 priv->sh->dv_regc0_mask) { 1173 priv->mreg_cp_tbl = mlx5_hlist_create(MLX5_FLOW_MREG_HNAME, 1174 MLX5_FLOW_MREG_HTABLE_SZ); 1175 if (!priv->mreg_cp_tbl) { 1176 err = ENOMEM; 1177 goto error; 1178 } 1179 } 1180 return eth_dev; 1181 error: 1182 if (priv) { 1183 if (priv->mreg_cp_tbl) 1184 mlx5_hlist_destroy(priv->mreg_cp_tbl, NULL, NULL); 1185 if (priv->sh) 1186 mlx5_os_free_shared_dr(priv); 1187 if (priv->nl_socket_route >= 0) 1188 close(priv->nl_socket_route); 1189 if (priv->nl_socket_rdma >= 0) 1190 close(priv->nl_socket_rdma); 1191 if (priv->vmwa_context) 1192 mlx5_vlan_vmwa_exit(priv->vmwa_context); 1193 if (priv->qrss_id_pool) 1194 mlx5_flow_id_pool_release(priv->qrss_id_pool); 1195 if (own_domain_id) 1196 claim_zero(rte_eth_switch_domain_free(priv->domain_id)); 1197 mlx5_free(priv); 1198 if (eth_dev != NULL) 1199 eth_dev->data->dev_private = NULL; 1200 } 1201 if (eth_dev != NULL) { 1202 /* mac_addrs must not be freed alone because part of 1203 * dev_private 1204 **/ 1205 eth_dev->data->mac_addrs = NULL; 1206 rte_eth_dev_release_port(eth_dev); 1207 } 1208 if (sh) 1209 mlx5_free_shared_dev_ctx(sh); 1210 MLX5_ASSERT(err > 0); 1211 rte_errno = err; 1212 return NULL; 1213 } 1214 1215 /** 1216 * Comparison callback to sort device data. 1217 * 1218 * This is meant to be used with qsort(). 1219 * 1220 * @param a[in] 1221 * Pointer to pointer to first data object. 1222 * @param b[in] 1223 * Pointer to pointer to second data object. 1224 * 1225 * @return 1226 * 0 if both objects are equal, less than 0 if the first argument is less 1227 * than the second, greater than 0 otherwise. 1228 */ 1229 static int 1230 mlx5_dev_spawn_data_cmp(const void *a, const void *b) 1231 { 1232 const struct mlx5_switch_info *si_a = 1233 &((const struct mlx5_dev_spawn_data *)a)->info; 1234 const struct mlx5_switch_info *si_b = 1235 &((const struct mlx5_dev_spawn_data *)b)->info; 1236 int ret; 1237 1238 /* Master device first. */ 1239 ret = si_b->master - si_a->master; 1240 if (ret) 1241 return ret; 1242 /* Then representor devices. */ 1243 ret = si_b->representor - si_a->representor; 1244 if (ret) 1245 return ret; 1246 /* Unidentified devices come last in no specific order. */ 1247 if (!si_a->representor) 1248 return 0; 1249 /* Order representors by name. */ 1250 return si_a->port_name - si_b->port_name; 1251 } 1252 1253 /** 1254 * Match PCI information for possible slaves of bonding device. 1255 * 1256 * @param[in] ibv_dev 1257 * Pointer to Infiniband device structure. 1258 * @param[in] pci_dev 1259 * Pointer to PCI device structure to match PCI address. 1260 * @param[in] nl_rdma 1261 * Netlink RDMA group socket handle. 1262 * 1263 * @return 1264 * negative value if no bonding device found, otherwise 1265 * positive index of slave PF in bonding. 1266 */ 1267 static int 1268 mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, 1269 const struct rte_pci_device *pci_dev, 1270 int nl_rdma) 1271 { 1272 char ifname[IF_NAMESIZE + 1]; 1273 unsigned int ifindex; 1274 unsigned int np, i; 1275 FILE *file = NULL; 1276 int pf = -1; 1277 1278 /* 1279 * Try to get master device name. If something goes 1280 * wrong suppose the lack of kernel support and no 1281 * bonding devices. 1282 */ 1283 if (nl_rdma < 0) 1284 return -1; 1285 if (!strstr(ibv_dev->name, "bond")) 1286 return -1; 1287 np = mlx5_nl_portnum(nl_rdma, ibv_dev->name); 1288 if (!np) 1289 return -1; 1290 /* 1291 * The Master device might not be on the predefined 1292 * port (not on port index 1, it is not garanted), 1293 * we have to scan all Infiniband device port and 1294 * find master. 1295 */ 1296 for (i = 1; i <= np; ++i) { 1297 /* Check whether Infiniband port is populated. */ 1298 ifindex = mlx5_nl_ifindex(nl_rdma, ibv_dev->name, i); 1299 if (!ifindex) 1300 continue; 1301 if (!if_indextoname(ifindex, ifname)) 1302 continue; 1303 /* Try to read bonding slave names from sysfs. */ 1304 MKSTR(slaves, 1305 "/sys/class/net/%s/master/bonding/slaves", ifname); 1306 file = fopen(slaves, "r"); 1307 if (file) 1308 break; 1309 } 1310 if (!file) 1311 return -1; 1312 /* Use safe format to check maximal buffer length. */ 1313 MLX5_ASSERT(atol(RTE_STR(IF_NAMESIZE)) == IF_NAMESIZE); 1314 while (fscanf(file, "%" RTE_STR(IF_NAMESIZE) "s", ifname) == 1) { 1315 char tmp_str[IF_NAMESIZE + 32]; 1316 struct rte_pci_addr pci_addr; 1317 struct mlx5_switch_info info; 1318 1319 /* Process slave interface names in the loop. */ 1320 snprintf(tmp_str, sizeof(tmp_str), 1321 "/sys/class/net/%s", ifname); 1322 if (mlx5_dev_to_pci_addr(tmp_str, &pci_addr)) { 1323 DRV_LOG(WARNING, "can not get PCI address" 1324 " for netdev \"%s\"", ifname); 1325 continue; 1326 } 1327 if (pci_dev->addr.domain != pci_addr.domain || 1328 pci_dev->addr.bus != pci_addr.bus || 1329 pci_dev->addr.devid != pci_addr.devid || 1330 pci_dev->addr.function != pci_addr.function) 1331 continue; 1332 /* Slave interface PCI address match found. */ 1333 fclose(file); 1334 snprintf(tmp_str, sizeof(tmp_str), 1335 "/sys/class/net/%s/phys_port_name", ifname); 1336 file = fopen(tmp_str, "rb"); 1337 if (!file) 1338 break; 1339 info.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET; 1340 if (fscanf(file, "%32s", tmp_str) == 1) 1341 mlx5_translate_port_name(tmp_str, &info); 1342 if (info.name_type == MLX5_PHYS_PORT_NAME_TYPE_LEGACY || 1343 info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) 1344 pf = info.port_name; 1345 break; 1346 } 1347 if (file) 1348 fclose(file); 1349 return pf; 1350 } 1351 1352 /** 1353 * DPDK callback to register a PCI device. 1354 * 1355 * This function spawns Ethernet devices out of a given PCI device. 1356 * 1357 * @param[in] pci_drv 1358 * PCI driver structure (mlx5_driver). 1359 * @param[in] pci_dev 1360 * PCI device information. 1361 * 1362 * @return 1363 * 0 on success, a negative errno value otherwise and rte_errno is set. 1364 */ 1365 int 1366 mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 1367 struct rte_pci_device *pci_dev) 1368 { 1369 struct ibv_device **ibv_list; 1370 /* 1371 * Number of found IB Devices matching with requested PCI BDF. 1372 * nd != 1 means there are multiple IB devices over the same 1373 * PCI device and we have representors and master. 1374 */ 1375 unsigned int nd = 0; 1376 /* 1377 * Number of found IB device Ports. nd = 1 and np = 1..n means 1378 * we have the single multiport IB device, and there may be 1379 * representors attached to some of found ports. 1380 */ 1381 unsigned int np = 0; 1382 /* 1383 * Number of DPDK ethernet devices to Spawn - either over 1384 * multiple IB devices or multiple ports of single IB device. 1385 * Actually this is the number of iterations to spawn. 1386 */ 1387 unsigned int ns = 0; 1388 /* 1389 * Bonding device 1390 * < 0 - no bonding device (single one) 1391 * >= 0 - bonding device (value is slave PF index) 1392 */ 1393 int bd = -1; 1394 struct mlx5_dev_spawn_data *list = NULL; 1395 struct mlx5_dev_config dev_config; 1396 int ret; 1397 1398 if (mlx5_class_get(pci_dev->device.devargs) != MLX5_CLASS_NET) { 1399 DRV_LOG(DEBUG, "Skip probing - should be probed by other mlx5" 1400 " driver."); 1401 return 1; 1402 } 1403 if (rte_eal_process_type() == RTE_PROC_PRIMARY) 1404 mlx5_pmd_socket_init(); 1405 ret = mlx5_init_once(); 1406 if (ret) { 1407 DRV_LOG(ERR, "unable to init PMD global data: %s", 1408 strerror(rte_errno)); 1409 return -rte_errno; 1410 } 1411 MLX5_ASSERT(pci_drv == &mlx5_driver); 1412 errno = 0; 1413 ibv_list = mlx5_glue->get_device_list(&ret); 1414 if (!ibv_list) { 1415 rte_errno = errno ? errno : ENOSYS; 1416 DRV_LOG(ERR, "cannot list devices, is ib_uverbs loaded?"); 1417 return -rte_errno; 1418 } 1419 /* 1420 * First scan the list of all Infiniband devices to find 1421 * matching ones, gathering into the list. 1422 */ 1423 struct ibv_device *ibv_match[ret + 1]; 1424 int nl_route = mlx5_nl_init(NETLINK_ROUTE); 1425 int nl_rdma = mlx5_nl_init(NETLINK_RDMA); 1426 unsigned int i; 1427 1428 while (ret-- > 0) { 1429 struct rte_pci_addr pci_addr; 1430 1431 DRV_LOG(DEBUG, "checking device \"%s\"", ibv_list[ret]->name); 1432 bd = mlx5_device_bond_pci_match 1433 (ibv_list[ret], pci_dev, nl_rdma); 1434 if (bd >= 0) { 1435 /* 1436 * Bonding device detected. Only one match is allowed, 1437 * the bonding is supported over multi-port IB device, 1438 * there should be no matches on representor PCI 1439 * functions or non VF LAG bonding devices with 1440 * specified address. 1441 */ 1442 if (nd) { 1443 DRV_LOG(ERR, 1444 "multiple PCI match on bonding device" 1445 "\"%s\" found", ibv_list[ret]->name); 1446 rte_errno = ENOENT; 1447 ret = -rte_errno; 1448 goto exit; 1449 } 1450 DRV_LOG(INFO, "PCI information matches for" 1451 " slave %d bonding device \"%s\"", 1452 bd, ibv_list[ret]->name); 1453 ibv_match[nd++] = ibv_list[ret]; 1454 break; 1455 } 1456 if (mlx5_dev_to_pci_addr 1457 (ibv_list[ret]->ibdev_path, &pci_addr)) 1458 continue; 1459 if (pci_dev->addr.domain != pci_addr.domain || 1460 pci_dev->addr.bus != pci_addr.bus || 1461 pci_dev->addr.devid != pci_addr.devid || 1462 pci_dev->addr.function != pci_addr.function) 1463 continue; 1464 DRV_LOG(INFO, "PCI information matches for device \"%s\"", 1465 ibv_list[ret]->name); 1466 ibv_match[nd++] = ibv_list[ret]; 1467 } 1468 ibv_match[nd] = NULL; 1469 if (!nd) { 1470 /* No device matches, just complain and bail out. */ 1471 DRV_LOG(WARNING, 1472 "no Verbs device matches PCI device " PCI_PRI_FMT "," 1473 " are kernel drivers loaded?", 1474 pci_dev->addr.domain, pci_dev->addr.bus, 1475 pci_dev->addr.devid, pci_dev->addr.function); 1476 rte_errno = ENOENT; 1477 ret = -rte_errno; 1478 goto exit; 1479 } 1480 if (nd == 1) { 1481 /* 1482 * Found single matching device may have multiple ports. 1483 * Each port may be representor, we have to check the port 1484 * number and check the representors existence. 1485 */ 1486 if (nl_rdma >= 0) 1487 np = mlx5_nl_portnum(nl_rdma, ibv_match[0]->name); 1488 if (!np) 1489 DRV_LOG(WARNING, "can not get IB device \"%s\"" 1490 " ports number", ibv_match[0]->name); 1491 if (bd >= 0 && !np) { 1492 DRV_LOG(ERR, "can not get ports" 1493 " for bonding device"); 1494 rte_errno = ENOENT; 1495 ret = -rte_errno; 1496 goto exit; 1497 } 1498 } 1499 #ifndef HAVE_MLX5DV_DR_DEVX_PORT 1500 if (bd >= 0) { 1501 /* 1502 * This may happen if there is VF LAG kernel support and 1503 * application is compiled with older rdma_core library. 1504 */ 1505 DRV_LOG(ERR, 1506 "No kernel/verbs support for VF LAG bonding found."); 1507 rte_errno = ENOTSUP; 1508 ret = -rte_errno; 1509 goto exit; 1510 } 1511 #endif 1512 /* 1513 * Now we can determine the maximal 1514 * amount of devices to be spawned. 1515 */ 1516 list = mlx5_malloc(MLX5_MEM_ZERO, 1517 sizeof(struct mlx5_dev_spawn_data) * 1518 (np ? np : nd), 1519 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 1520 if (!list) { 1521 DRV_LOG(ERR, "spawn data array allocation failure"); 1522 rte_errno = ENOMEM; 1523 ret = -rte_errno; 1524 goto exit; 1525 } 1526 if (bd >= 0 || np > 1) { 1527 /* 1528 * Single IB device with multiple ports found, 1529 * it may be E-Switch master device and representors. 1530 * We have to perform identification through the ports. 1531 */ 1532 MLX5_ASSERT(nl_rdma >= 0); 1533 MLX5_ASSERT(ns == 0); 1534 MLX5_ASSERT(nd == 1); 1535 MLX5_ASSERT(np); 1536 for (i = 1; i <= np; ++i) { 1537 list[ns].max_port = np; 1538 list[ns].phys_port = i; 1539 list[ns].phys_dev = ibv_match[0]; 1540 list[ns].eth_dev = NULL; 1541 list[ns].pci_dev = pci_dev; 1542 list[ns].pf_bond = bd; 1543 list[ns].ifindex = mlx5_nl_ifindex 1544 (nl_rdma, 1545 mlx5_os_get_dev_device_name 1546 (list[ns].phys_dev), i); 1547 if (!list[ns].ifindex) { 1548 /* 1549 * No network interface index found for the 1550 * specified port, it means there is no 1551 * representor on this port. It's OK, 1552 * there can be disabled ports, for example 1553 * if sriov_numvfs < sriov_totalvfs. 1554 */ 1555 continue; 1556 } 1557 ret = -1; 1558 if (nl_route >= 0) 1559 ret = mlx5_nl_switch_info 1560 (nl_route, 1561 list[ns].ifindex, 1562 &list[ns].info); 1563 if (ret || (!list[ns].info.representor && 1564 !list[ns].info.master)) { 1565 /* 1566 * We failed to recognize representors with 1567 * Netlink, let's try to perform the task 1568 * with sysfs. 1569 */ 1570 ret = mlx5_sysfs_switch_info 1571 (list[ns].ifindex, 1572 &list[ns].info); 1573 } 1574 if (!ret && bd >= 0) { 1575 switch (list[ns].info.name_type) { 1576 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1577 if (list[ns].info.port_name == bd) 1578 ns++; 1579 break; 1580 case MLX5_PHYS_PORT_NAME_TYPE_PFHPF: 1581 /* Fallthrough */ 1582 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1583 if (list[ns].info.pf_num == bd) 1584 ns++; 1585 break; 1586 default: 1587 break; 1588 } 1589 continue; 1590 } 1591 if (!ret && (list[ns].info.representor ^ 1592 list[ns].info.master)) 1593 ns++; 1594 } 1595 if (!ns) { 1596 DRV_LOG(ERR, 1597 "unable to recognize master/representors" 1598 " on the IB device with multiple ports"); 1599 rte_errno = ENOENT; 1600 ret = -rte_errno; 1601 goto exit; 1602 } 1603 } else { 1604 /* 1605 * The existence of several matching entries (nd > 1) means 1606 * port representors have been instantiated. No existing Verbs 1607 * call nor sysfs entries can tell them apart, this can only 1608 * be done through Netlink calls assuming kernel drivers are 1609 * recent enough to support them. 1610 * 1611 * In the event of identification failure through Netlink, 1612 * try again through sysfs, then: 1613 * 1614 * 1. A single IB device matches (nd == 1) with single 1615 * port (np=0/1) and is not a representor, assume 1616 * no switch support. 1617 * 1618 * 2. Otherwise no safe assumptions can be made; 1619 * complain louder and bail out. 1620 */ 1621 for (i = 0; i != nd; ++i) { 1622 memset(&list[ns].info, 0, sizeof(list[ns].info)); 1623 list[ns].max_port = 1; 1624 list[ns].phys_port = 1; 1625 list[ns].phys_dev = ibv_match[i]; 1626 list[ns].eth_dev = NULL; 1627 list[ns].pci_dev = pci_dev; 1628 list[ns].pf_bond = -1; 1629 list[ns].ifindex = 0; 1630 if (nl_rdma >= 0) 1631 list[ns].ifindex = mlx5_nl_ifindex 1632 (nl_rdma, 1633 mlx5_os_get_dev_device_name 1634 (list[ns].phys_dev), 1); 1635 if (!list[ns].ifindex) { 1636 char ifname[IF_NAMESIZE]; 1637 1638 /* 1639 * Netlink failed, it may happen with old 1640 * ib_core kernel driver (before 4.16). 1641 * We can assume there is old driver because 1642 * here we are processing single ports IB 1643 * devices. Let's try sysfs to retrieve 1644 * the ifindex. The method works for 1645 * master device only. 1646 */ 1647 if (nd > 1) { 1648 /* 1649 * Multiple devices found, assume 1650 * representors, can not distinguish 1651 * master/representor and retrieve 1652 * ifindex via sysfs. 1653 */ 1654 continue; 1655 } 1656 ret = mlx5_get_ifname_sysfs 1657 (ibv_match[i]->ibdev_path, ifname); 1658 if (!ret) 1659 list[ns].ifindex = 1660 if_nametoindex(ifname); 1661 if (!list[ns].ifindex) { 1662 /* 1663 * No network interface index found 1664 * for the specified device, it means 1665 * there it is neither representor 1666 * nor master. 1667 */ 1668 continue; 1669 } 1670 } 1671 ret = -1; 1672 if (nl_route >= 0) 1673 ret = mlx5_nl_switch_info 1674 (nl_route, 1675 list[ns].ifindex, 1676 &list[ns].info); 1677 if (ret || (!list[ns].info.representor && 1678 !list[ns].info.master)) { 1679 /* 1680 * We failed to recognize representors with 1681 * Netlink, let's try to perform the task 1682 * with sysfs. 1683 */ 1684 ret = mlx5_sysfs_switch_info 1685 (list[ns].ifindex, 1686 &list[ns].info); 1687 } 1688 if (!ret && (list[ns].info.representor ^ 1689 list[ns].info.master)) { 1690 ns++; 1691 } else if ((nd == 1) && 1692 !list[ns].info.representor && 1693 !list[ns].info.master) { 1694 /* 1695 * Single IB device with 1696 * one physical port and 1697 * attached network device. 1698 * May be SRIOV is not enabled 1699 * or there is no representors. 1700 */ 1701 DRV_LOG(INFO, "no E-Switch support detected"); 1702 ns++; 1703 break; 1704 } 1705 } 1706 if (!ns) { 1707 DRV_LOG(ERR, 1708 "unable to recognize master/representors" 1709 " on the multiple IB devices"); 1710 rte_errno = ENOENT; 1711 ret = -rte_errno; 1712 goto exit; 1713 } 1714 } 1715 MLX5_ASSERT(ns); 1716 /* 1717 * Sort list to probe devices in natural order for users convenience 1718 * (i.e. master first, then representors from lowest to highest ID). 1719 */ 1720 qsort(list, ns, sizeof(*list), mlx5_dev_spawn_data_cmp); 1721 /* Default configuration. */ 1722 dev_config = (struct mlx5_dev_config){ 1723 .hw_padding = 0, 1724 .mps = MLX5_ARG_UNSET, 1725 .dbnc = MLX5_ARG_UNSET, 1726 .rx_vec_en = 1, 1727 .txq_inline_max = MLX5_ARG_UNSET, 1728 .txq_inline_min = MLX5_ARG_UNSET, 1729 .txq_inline_mpw = MLX5_ARG_UNSET, 1730 .txqs_inline = MLX5_ARG_UNSET, 1731 .vf_nl_en = 1, 1732 .mr_ext_memseg_en = 1, 1733 .mprq = { 1734 .enabled = 0, /* Disabled by default. */ 1735 .stride_num_n = 0, 1736 .stride_size_n = 0, 1737 .max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN, 1738 .min_rxqs_num = MLX5_MPRQ_MIN_RXQS, 1739 }, 1740 .dv_esw_en = 1, 1741 .dv_flow_en = 1, 1742 .decap_en = 1, 1743 .log_hp_size = MLX5_ARG_UNSET, 1744 }; 1745 /* Device specific configuration. */ 1746 switch (pci_dev->id.device_id) { 1747 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 1748 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF: 1749 case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: 1750 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: 1751 case PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF: 1752 case PCI_DEVICE_ID_MELLANOX_CONNECTX6VF: 1753 case PCI_DEVICE_ID_MELLANOX_CONNECTX6DXVF: 1754 dev_config.vf = 1; 1755 break; 1756 default: 1757 break; 1758 } 1759 for (i = 0; i != ns; ++i) { 1760 uint32_t restore; 1761 1762 list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device, 1763 &list[i], 1764 dev_config); 1765 if (!list[i].eth_dev) { 1766 if (rte_errno != EBUSY && rte_errno != EEXIST) 1767 break; 1768 /* Device is disabled or already spawned. Ignore it. */ 1769 continue; 1770 } 1771 restore = list[i].eth_dev->data->dev_flags; 1772 rte_eth_copy_pci_info(list[i].eth_dev, pci_dev); 1773 /* Restore non-PCI flags cleared by the above call. */ 1774 list[i].eth_dev->data->dev_flags |= restore; 1775 rte_eth_dev_probing_finish(list[i].eth_dev); 1776 } 1777 if (i != ns) { 1778 DRV_LOG(ERR, 1779 "probe of PCI device " PCI_PRI_FMT " aborted after" 1780 " encountering an error: %s", 1781 pci_dev->addr.domain, pci_dev->addr.bus, 1782 pci_dev->addr.devid, pci_dev->addr.function, 1783 strerror(rte_errno)); 1784 ret = -rte_errno; 1785 /* Roll back. */ 1786 while (i--) { 1787 if (!list[i].eth_dev) 1788 continue; 1789 mlx5_dev_close(list[i].eth_dev); 1790 /* mac_addrs must not be freed because in dev_private */ 1791 list[i].eth_dev->data->mac_addrs = NULL; 1792 claim_zero(rte_eth_dev_release_port(list[i].eth_dev)); 1793 } 1794 /* Restore original error. */ 1795 rte_errno = -ret; 1796 } else { 1797 ret = 0; 1798 } 1799 exit: 1800 /* 1801 * Do the routine cleanup: 1802 * - close opened Netlink sockets 1803 * - free allocated spawn data array 1804 * - free the Infiniband device list 1805 */ 1806 if (nl_rdma >= 0) 1807 close(nl_rdma); 1808 if (nl_route >= 0) 1809 close(nl_route); 1810 if (list) 1811 mlx5_free(list); 1812 MLX5_ASSERT(ibv_list); 1813 mlx5_glue->free_device_list(ibv_list); 1814 return ret; 1815 } 1816 1817 static int 1818 mlx5_config_doorbell_mapping_env(const struct mlx5_dev_config *config) 1819 { 1820 char *env; 1821 int value; 1822 1823 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 1824 /* Get environment variable to store. */ 1825 env = getenv(MLX5_SHUT_UP_BF); 1826 value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET; 1827 if (config->dbnc == MLX5_ARG_UNSET) 1828 setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1); 1829 else 1830 setenv(MLX5_SHUT_UP_BF, 1831 config->dbnc == MLX5_TXDB_NCACHED ? "1" : "0", 1); 1832 return value; 1833 } 1834 1835 static void 1836 mlx5_restore_doorbell_mapping_env(int value) 1837 { 1838 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 1839 /* Restore the original environment variable state. */ 1840 if (value == MLX5_ARG_UNSET) 1841 unsetenv(MLX5_SHUT_UP_BF); 1842 else 1843 setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1); 1844 } 1845 1846 /** 1847 * Extract pdn of PD object using DV API. 1848 * 1849 * @param[in] pd 1850 * Pointer to the verbs PD object. 1851 * @param[out] pdn 1852 * Pointer to the PD object number variable. 1853 * 1854 * @return 1855 * 0 on success, error value otherwise. 1856 */ 1857 int 1858 mlx5_os_get_pdn(void *pd, uint32_t *pdn) 1859 { 1860 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1861 struct mlx5dv_obj obj; 1862 struct mlx5dv_pd pd_info; 1863 int ret = 0; 1864 1865 obj.pd.in = pd; 1866 obj.pd.out = &pd_info; 1867 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD); 1868 if (ret) { 1869 DRV_LOG(DEBUG, "Fail to get PD object info"); 1870 return ret; 1871 } 1872 *pdn = pd_info.pdn; 1873 return 0; 1874 #else 1875 (void)pd; 1876 (void)pdn; 1877 return -ENOTSUP; 1878 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */ 1879 } 1880 1881 /** 1882 * Function API to open IB device. 1883 * 1884 * This function calls the Linux glue APIs to open a device. 1885 * 1886 * @param[in] spawn 1887 * Pointer to the IB device attributes (name, port, etc). 1888 * @param[out] config 1889 * Pointer to device configuration structure. 1890 * @param[out] sh 1891 * Pointer to shared context structure. 1892 * 1893 * @return 1894 * 0 on success, a positive error value otherwise. 1895 */ 1896 int 1897 mlx5_os_open_device(const struct mlx5_dev_spawn_data *spawn, 1898 const struct mlx5_dev_config *config, 1899 struct mlx5_dev_ctx_shared *sh) 1900 { 1901 int dbmap_env; 1902 int err = 0; 1903 1904 sh->numa_node = spawn->pci_dev->device.numa_node; 1905 pthread_mutex_init(&sh->txpp.mutex, NULL); 1906 /* 1907 * Configure environment variable "MLX5_BF_SHUT_UP" 1908 * before the device creation. The rdma_core library 1909 * checks the variable at device creation and 1910 * stores the result internally. 1911 */ 1912 dbmap_env = mlx5_config_doorbell_mapping_env(config); 1913 /* Try to open IB device with DV first, then usual Verbs. */ 1914 errno = 0; 1915 sh->ctx = mlx5_glue->dv_open_device(spawn->phys_dev); 1916 if (sh->ctx) { 1917 sh->devx = 1; 1918 DRV_LOG(DEBUG, "DevX is supported"); 1919 /* The device is created, no need for environment. */ 1920 mlx5_restore_doorbell_mapping_env(dbmap_env); 1921 } else { 1922 /* The environment variable is still configured. */ 1923 sh->ctx = mlx5_glue->open_device(spawn->phys_dev); 1924 err = errno ? errno : ENODEV; 1925 /* 1926 * The environment variable is not needed anymore, 1927 * all device creation attempts are completed. 1928 */ 1929 mlx5_restore_doorbell_mapping_env(dbmap_env); 1930 if (!sh->ctx) 1931 return err; 1932 DRV_LOG(DEBUG, "DevX is NOT supported"); 1933 err = 0; 1934 } 1935 return err; 1936 } 1937 1938 /** 1939 * Install shared asynchronous device events handler. 1940 * This function is implemented to support event sharing 1941 * between multiple ports of single IB device. 1942 * 1943 * @param sh 1944 * Pointer to mlx5_dev_ctx_shared object. 1945 */ 1946 void 1947 mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh) 1948 { 1949 int ret; 1950 int flags; 1951 1952 sh->intr_handle.fd = -1; 1953 flags = fcntl(((struct ibv_context *)sh->ctx)->async_fd, F_GETFL); 1954 ret = fcntl(((struct ibv_context *)sh->ctx)->async_fd, 1955 F_SETFL, flags | O_NONBLOCK); 1956 if (ret) { 1957 DRV_LOG(INFO, "failed to change file descriptor async event" 1958 " queue"); 1959 } else { 1960 sh->intr_handle.fd = ((struct ibv_context *)sh->ctx)->async_fd; 1961 sh->intr_handle.type = RTE_INTR_HANDLE_EXT; 1962 if (rte_intr_callback_register(&sh->intr_handle, 1963 mlx5_dev_interrupt_handler, sh)) { 1964 DRV_LOG(INFO, "Fail to install the shared interrupt."); 1965 sh->intr_handle.fd = -1; 1966 } 1967 } 1968 if (sh->devx) { 1969 #ifdef HAVE_IBV_DEVX_ASYNC 1970 sh->intr_handle_devx.fd = -1; 1971 sh->devx_comp = 1972 (void *)mlx5_glue->devx_create_cmd_comp(sh->ctx); 1973 struct mlx5dv_devx_cmd_comp *devx_comp = sh->devx_comp; 1974 if (!devx_comp) { 1975 DRV_LOG(INFO, "failed to allocate devx_comp."); 1976 return; 1977 } 1978 flags = fcntl(devx_comp->fd, F_GETFL); 1979 ret = fcntl(devx_comp->fd, F_SETFL, flags | O_NONBLOCK); 1980 if (ret) { 1981 DRV_LOG(INFO, "failed to change file descriptor" 1982 " devx comp"); 1983 return; 1984 } 1985 sh->intr_handle_devx.fd = devx_comp->fd; 1986 sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT; 1987 if (rte_intr_callback_register(&sh->intr_handle_devx, 1988 mlx5_dev_interrupt_handler_devx, sh)) { 1989 DRV_LOG(INFO, "Fail to install the devx shared" 1990 " interrupt."); 1991 sh->intr_handle_devx.fd = -1; 1992 } 1993 #endif /* HAVE_IBV_DEVX_ASYNC */ 1994 } 1995 } 1996 1997 /** 1998 * Uninstall shared asynchronous device events handler. 1999 * This function is implemented to support event sharing 2000 * between multiple ports of single IB device. 2001 * 2002 * @param dev 2003 * Pointer to mlx5_dev_ctx_shared object. 2004 */ 2005 void 2006 mlx5_os_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh) 2007 { 2008 if (sh->intr_handle.fd >= 0) 2009 mlx5_intr_callback_unregister(&sh->intr_handle, 2010 mlx5_dev_interrupt_handler, sh); 2011 #ifdef HAVE_IBV_DEVX_ASYNC 2012 if (sh->intr_handle_devx.fd >= 0) 2013 rte_intr_callback_unregister(&sh->intr_handle_devx, 2014 mlx5_dev_interrupt_handler_devx, sh); 2015 if (sh->devx_comp) 2016 mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp); 2017 #endif 2018 } 2019 2020 /** 2021 * Read statistics by a named counter. 2022 * 2023 * @param[in] priv 2024 * Pointer to the private device data structure. 2025 * @param[in] ctr_name 2026 * Pointer to the name of the statistic counter to read 2027 * @param[out] stat 2028 * Pointer to read statistic value. 2029 * @return 2030 * 0 on success and stat is valud, 1 if failed to read the value 2031 * rte_errno is set. 2032 * 2033 */ 2034 int 2035 mlx5_os_read_dev_stat(struct mlx5_priv *priv, const char *ctr_name, 2036 uint64_t *stat) 2037 { 2038 int fd; 2039 2040 if (priv->sh) { 2041 MKSTR(path, "%s/ports/%d/hw_counters/%s", 2042 priv->sh->ibdev_path, 2043 priv->dev_port, 2044 ctr_name); 2045 fd = open(path, O_RDONLY); 2046 if (fd != -1) { 2047 char buf[21] = {'\0'}; 2048 ssize_t n = read(fd, buf, sizeof(buf)); 2049 2050 close(fd); 2051 if (n != -1) { 2052 *stat = strtoull(buf, NULL, 10); 2053 return 0; 2054 } 2055 } 2056 } 2057 *stat = 0; 2058 return 1; 2059 } 2060 2061 /** 2062 * Read device counters table. 2063 * 2064 * @param dev 2065 * Pointer to Ethernet device. 2066 * @param[out] stats 2067 * Counters table output buffer. 2068 * 2069 * @return 2070 * 0 on success and stats is filled, negative errno value otherwise and 2071 * rte_errno is set. 2072 */ 2073 int 2074 mlx5_os_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats) 2075 { 2076 struct mlx5_priv *priv = dev->data->dev_private; 2077 struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; 2078 unsigned int i; 2079 struct ifreq ifr; 2080 unsigned int stats_sz = xstats_ctrl->stats_n * sizeof(uint64_t); 2081 unsigned char et_stat_buf[sizeof(struct ethtool_stats) + stats_sz]; 2082 struct ethtool_stats *et_stats = (struct ethtool_stats *)et_stat_buf; 2083 int ret; 2084 2085 et_stats->cmd = ETHTOOL_GSTATS; 2086 et_stats->n_stats = xstats_ctrl->stats_n; 2087 ifr.ifr_data = (caddr_t)et_stats; 2088 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 2089 if (ret) { 2090 DRV_LOG(WARNING, 2091 "port %u unable to read statistic values from device", 2092 dev->data->port_id); 2093 return ret; 2094 } 2095 for (i = 0; i != xstats_ctrl->mlx5_stats_n; ++i) { 2096 if (xstats_ctrl->info[i].dev) { 2097 ret = mlx5_os_read_dev_stat(priv, 2098 xstats_ctrl->info[i].ctr_name, 2099 &stats[i]); 2100 /* return last xstats counter if fail to read. */ 2101 if (ret == 0) 2102 xstats_ctrl->xstats[i] = stats[i]; 2103 else 2104 stats[i] = xstats_ctrl->xstats[i]; 2105 } else { 2106 stats[i] = (uint64_t) 2107 et_stats->data[xstats_ctrl->dev_table_idx[i]]; 2108 } 2109 } 2110 return 0; 2111 } 2112 2113 /** 2114 * Query the number of statistics provided by ETHTOOL. 2115 * 2116 * @param dev 2117 * Pointer to Ethernet device. 2118 * 2119 * @return 2120 * Number of statistics on success, negative errno value otherwise and 2121 * rte_errno is set. 2122 */ 2123 int 2124 mlx5_os_get_stats_n(struct rte_eth_dev *dev) 2125 { 2126 struct ethtool_drvinfo drvinfo; 2127 struct ifreq ifr; 2128 int ret; 2129 2130 drvinfo.cmd = ETHTOOL_GDRVINFO; 2131 ifr.ifr_data = (caddr_t)&drvinfo; 2132 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 2133 if (ret) { 2134 DRV_LOG(WARNING, "port %u unable to query number of statistics", 2135 dev->data->port_id); 2136 return ret; 2137 } 2138 return drvinfo.n_stats; 2139 } 2140 2141 static const struct mlx5_counter_ctrl mlx5_counters_init[] = { 2142 { 2143 .dpdk_name = "rx_port_unicast_bytes", 2144 .ctr_name = "rx_vport_unicast_bytes", 2145 }, 2146 { 2147 .dpdk_name = "rx_port_multicast_bytes", 2148 .ctr_name = "rx_vport_multicast_bytes", 2149 }, 2150 { 2151 .dpdk_name = "rx_port_broadcast_bytes", 2152 .ctr_name = "rx_vport_broadcast_bytes", 2153 }, 2154 { 2155 .dpdk_name = "rx_port_unicast_packets", 2156 .ctr_name = "rx_vport_unicast_packets", 2157 }, 2158 { 2159 .dpdk_name = "rx_port_multicast_packets", 2160 .ctr_name = "rx_vport_multicast_packets", 2161 }, 2162 { 2163 .dpdk_name = "rx_port_broadcast_packets", 2164 .ctr_name = "rx_vport_broadcast_packets", 2165 }, 2166 { 2167 .dpdk_name = "tx_port_unicast_bytes", 2168 .ctr_name = "tx_vport_unicast_bytes", 2169 }, 2170 { 2171 .dpdk_name = "tx_port_multicast_bytes", 2172 .ctr_name = "tx_vport_multicast_bytes", 2173 }, 2174 { 2175 .dpdk_name = "tx_port_broadcast_bytes", 2176 .ctr_name = "tx_vport_broadcast_bytes", 2177 }, 2178 { 2179 .dpdk_name = "tx_port_unicast_packets", 2180 .ctr_name = "tx_vport_unicast_packets", 2181 }, 2182 { 2183 .dpdk_name = "tx_port_multicast_packets", 2184 .ctr_name = "tx_vport_multicast_packets", 2185 }, 2186 { 2187 .dpdk_name = "tx_port_broadcast_packets", 2188 .ctr_name = "tx_vport_broadcast_packets", 2189 }, 2190 { 2191 .dpdk_name = "rx_wqe_err", 2192 .ctr_name = "rx_wqe_err", 2193 }, 2194 { 2195 .dpdk_name = "rx_crc_errors_phy", 2196 .ctr_name = "rx_crc_errors_phy", 2197 }, 2198 { 2199 .dpdk_name = "rx_in_range_len_errors_phy", 2200 .ctr_name = "rx_in_range_len_errors_phy", 2201 }, 2202 { 2203 .dpdk_name = "rx_symbol_err_phy", 2204 .ctr_name = "rx_symbol_err_phy", 2205 }, 2206 { 2207 .dpdk_name = "tx_errors_phy", 2208 .ctr_name = "tx_errors_phy", 2209 }, 2210 { 2211 .dpdk_name = "rx_out_of_buffer", 2212 .ctr_name = "out_of_buffer", 2213 .dev = 1, 2214 }, 2215 { 2216 .dpdk_name = "tx_packets_phy", 2217 .ctr_name = "tx_packets_phy", 2218 }, 2219 { 2220 .dpdk_name = "rx_packets_phy", 2221 .ctr_name = "rx_packets_phy", 2222 }, 2223 { 2224 .dpdk_name = "tx_discards_phy", 2225 .ctr_name = "tx_discards_phy", 2226 }, 2227 { 2228 .dpdk_name = "rx_discards_phy", 2229 .ctr_name = "rx_discards_phy", 2230 }, 2231 { 2232 .dpdk_name = "tx_bytes_phy", 2233 .ctr_name = "tx_bytes_phy", 2234 }, 2235 { 2236 .dpdk_name = "rx_bytes_phy", 2237 .ctr_name = "rx_bytes_phy", 2238 }, 2239 /* Representor only */ 2240 { 2241 .dpdk_name = "rx_packets", 2242 .ctr_name = "vport_rx_packets", 2243 }, 2244 { 2245 .dpdk_name = "rx_bytes", 2246 .ctr_name = "vport_rx_bytes", 2247 }, 2248 { 2249 .dpdk_name = "tx_packets", 2250 .ctr_name = "vport_tx_packets", 2251 }, 2252 { 2253 .dpdk_name = "tx_bytes", 2254 .ctr_name = "vport_tx_bytes", 2255 }, 2256 }; 2257 2258 static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init); 2259 2260 /** 2261 * Init the structures to read device counters. 2262 * 2263 * @param dev 2264 * Pointer to Ethernet device. 2265 */ 2266 void 2267 mlx5_os_stats_init(struct rte_eth_dev *dev) 2268 { 2269 struct mlx5_priv *priv = dev->data->dev_private; 2270 struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; 2271 struct mlx5_stats_ctrl *stats_ctrl = &priv->stats_ctrl; 2272 unsigned int i; 2273 unsigned int j; 2274 struct ifreq ifr; 2275 struct ethtool_gstrings *strings = NULL; 2276 unsigned int dev_stats_n; 2277 unsigned int str_sz; 2278 int ret; 2279 2280 /* So that it won't aggregate for each init. */ 2281 xstats_ctrl->mlx5_stats_n = 0; 2282 ret = mlx5_os_get_stats_n(dev); 2283 if (ret < 0) { 2284 DRV_LOG(WARNING, "port %u no extended statistics available", 2285 dev->data->port_id); 2286 return; 2287 } 2288 dev_stats_n = ret; 2289 /* Allocate memory to grab stat names and values. */ 2290 str_sz = dev_stats_n * ETH_GSTRING_LEN; 2291 strings = (struct ethtool_gstrings *) 2292 mlx5_malloc(0, str_sz + sizeof(struct ethtool_gstrings), 0, 2293 SOCKET_ID_ANY); 2294 if (!strings) { 2295 DRV_LOG(WARNING, "port %u unable to allocate memory for xstats", 2296 dev->data->port_id); 2297 return; 2298 } 2299 strings->cmd = ETHTOOL_GSTRINGS; 2300 strings->string_set = ETH_SS_STATS; 2301 strings->len = dev_stats_n; 2302 ifr.ifr_data = (caddr_t)strings; 2303 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 2304 if (ret) { 2305 DRV_LOG(WARNING, "port %u unable to get statistic names", 2306 dev->data->port_id); 2307 goto free; 2308 } 2309 for (i = 0; i != dev_stats_n; ++i) { 2310 const char *curr_string = (const char *) 2311 &strings->data[i * ETH_GSTRING_LEN]; 2312 2313 for (j = 0; j != xstats_n; ++j) { 2314 if (!strcmp(mlx5_counters_init[j].ctr_name, 2315 curr_string)) { 2316 unsigned int idx = xstats_ctrl->mlx5_stats_n++; 2317 2318 xstats_ctrl->dev_table_idx[idx] = i; 2319 xstats_ctrl->info[idx] = mlx5_counters_init[j]; 2320 break; 2321 } 2322 } 2323 } 2324 /* Add dev counters. */ 2325 for (i = 0; i != xstats_n; ++i) { 2326 if (mlx5_counters_init[i].dev) { 2327 unsigned int idx = xstats_ctrl->mlx5_stats_n++; 2328 2329 xstats_ctrl->info[idx] = mlx5_counters_init[i]; 2330 xstats_ctrl->hw_stats[idx] = 0; 2331 } 2332 } 2333 MLX5_ASSERT(xstats_ctrl->mlx5_stats_n <= MLX5_MAX_XSTATS); 2334 xstats_ctrl->stats_n = dev_stats_n; 2335 /* Copy to base at first time. */ 2336 ret = mlx5_os_read_dev_counters(dev, xstats_ctrl->base); 2337 if (ret) 2338 DRV_LOG(ERR, "port %u cannot read device counters: %s", 2339 dev->data->port_id, strerror(rte_errno)); 2340 mlx5_os_read_dev_stat(priv, "out_of_buffer", &stats_ctrl->imissed_base); 2341 stats_ctrl->imissed = 0; 2342 free: 2343 mlx5_free(strings); 2344 } 2345 2346 /** 2347 * Set the reg_mr and dereg_mr call backs 2348 * 2349 * @param reg_mr_cb[out] 2350 * Pointer to reg_mr func 2351 * @param dereg_mr_cb[out] 2352 * Pointer to dereg_mr func 2353 * 2354 */ 2355 void 2356 mlx5_os_set_reg_mr_cb(mlx5_reg_mr_t *reg_mr_cb, 2357 mlx5_dereg_mr_t *dereg_mr_cb) 2358 { 2359 *reg_mr_cb = mlx5_verbs_ops.reg_mr; 2360 *dereg_mr_cb = mlx5_verbs_ops.dereg_mr; 2361 } 2362 2363 const struct eth_dev_ops mlx5_os_dev_ops = { 2364 .dev_configure = mlx5_dev_configure, 2365 .dev_start = mlx5_dev_start, 2366 .dev_stop = mlx5_dev_stop, 2367 .dev_set_link_down = mlx5_set_link_down, 2368 .dev_set_link_up = mlx5_set_link_up, 2369 .dev_close = mlx5_dev_close, 2370 .promiscuous_enable = mlx5_promiscuous_enable, 2371 .promiscuous_disable = mlx5_promiscuous_disable, 2372 .allmulticast_enable = mlx5_allmulticast_enable, 2373 .allmulticast_disable = mlx5_allmulticast_disable, 2374 .link_update = mlx5_link_update, 2375 .stats_get = mlx5_stats_get, 2376 .stats_reset = mlx5_stats_reset, 2377 .xstats_get = mlx5_xstats_get, 2378 .xstats_reset = mlx5_xstats_reset, 2379 .xstats_get_names = mlx5_xstats_get_names, 2380 .fw_version_get = mlx5_fw_version_get, 2381 .dev_infos_get = mlx5_dev_infos_get, 2382 .read_clock = mlx5_txpp_read_clock, 2383 .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, 2384 .vlan_filter_set = mlx5_vlan_filter_set, 2385 .rx_queue_setup = mlx5_rx_queue_setup, 2386 .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, 2387 .tx_queue_setup = mlx5_tx_queue_setup, 2388 .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, 2389 .rx_queue_release = mlx5_rx_queue_release, 2390 .tx_queue_release = mlx5_tx_queue_release, 2391 .flow_ctrl_get = mlx5_dev_get_flow_ctrl, 2392 .flow_ctrl_set = mlx5_dev_set_flow_ctrl, 2393 .mac_addr_remove = mlx5_mac_addr_remove, 2394 .mac_addr_add = mlx5_mac_addr_add, 2395 .mac_addr_set = mlx5_mac_addr_set, 2396 .set_mc_addr_list = mlx5_set_mc_addr_list, 2397 .mtu_set = mlx5_dev_set_mtu, 2398 .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, 2399 .vlan_offload_set = mlx5_vlan_offload_set, 2400 .reta_update = mlx5_dev_rss_reta_update, 2401 .reta_query = mlx5_dev_rss_reta_query, 2402 .rss_hash_update = mlx5_rss_hash_update, 2403 .rss_hash_conf_get = mlx5_rss_hash_conf_get, 2404 .filter_ctrl = mlx5_dev_filter_ctrl, 2405 .rx_descriptor_status = mlx5_rx_descriptor_status, 2406 .tx_descriptor_status = mlx5_tx_descriptor_status, 2407 .rxq_info_get = mlx5_rxq_info_get, 2408 .txq_info_get = mlx5_txq_info_get, 2409 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 2410 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 2411 .rx_queue_count = mlx5_rx_queue_count, 2412 .rx_queue_intr_enable = mlx5_rx_intr_enable, 2413 .rx_queue_intr_disable = mlx5_rx_intr_disable, 2414 .is_removed = mlx5_is_removed, 2415 .udp_tunnel_port_add = mlx5_udp_tunnel_port_add, 2416 .get_module_info = mlx5_get_module_info, 2417 .get_module_eeprom = mlx5_get_module_eeprom, 2418 .hairpin_cap_get = mlx5_hairpin_cap_get, 2419 .mtr_ops_get = mlx5_flow_meter_ops_get, 2420 }; 2421 2422 /* Available operations from secondary process. */ 2423 const struct eth_dev_ops mlx5_os_dev_sec_ops = { 2424 .stats_get = mlx5_stats_get, 2425 .stats_reset = mlx5_stats_reset, 2426 .xstats_get = mlx5_xstats_get, 2427 .xstats_reset = mlx5_xstats_reset, 2428 .xstats_get_names = mlx5_xstats_get_names, 2429 .fw_version_get = mlx5_fw_version_get, 2430 .dev_infos_get = mlx5_dev_infos_get, 2431 .read_clock = mlx5_txpp_read_clock, 2432 .rx_descriptor_status = mlx5_rx_descriptor_status, 2433 .tx_descriptor_status = mlx5_tx_descriptor_status, 2434 .rxq_info_get = mlx5_rxq_info_get, 2435 .txq_info_get = mlx5_txq_info_get, 2436 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 2437 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 2438 .get_module_info = mlx5_get_module_info, 2439 .get_module_eeprom = mlx5_get_module_eeprom, 2440 }; 2441 2442 /* Available operations in flow isolated mode. */ 2443 const struct eth_dev_ops mlx5_os_dev_ops_isolate = { 2444 .dev_configure = mlx5_dev_configure, 2445 .dev_start = mlx5_dev_start, 2446 .dev_stop = mlx5_dev_stop, 2447 .dev_set_link_down = mlx5_set_link_down, 2448 .dev_set_link_up = mlx5_set_link_up, 2449 .dev_close = mlx5_dev_close, 2450 .promiscuous_enable = mlx5_promiscuous_enable, 2451 .promiscuous_disable = mlx5_promiscuous_disable, 2452 .allmulticast_enable = mlx5_allmulticast_enable, 2453 .allmulticast_disable = mlx5_allmulticast_disable, 2454 .link_update = mlx5_link_update, 2455 .stats_get = mlx5_stats_get, 2456 .stats_reset = mlx5_stats_reset, 2457 .xstats_get = mlx5_xstats_get, 2458 .xstats_reset = mlx5_xstats_reset, 2459 .xstats_get_names = mlx5_xstats_get_names, 2460 .fw_version_get = mlx5_fw_version_get, 2461 .dev_infos_get = mlx5_dev_infos_get, 2462 .read_clock = mlx5_txpp_read_clock, 2463 .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, 2464 .vlan_filter_set = mlx5_vlan_filter_set, 2465 .rx_queue_setup = mlx5_rx_queue_setup, 2466 .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, 2467 .tx_queue_setup = mlx5_tx_queue_setup, 2468 .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, 2469 .rx_queue_release = mlx5_rx_queue_release, 2470 .tx_queue_release = mlx5_tx_queue_release, 2471 .flow_ctrl_get = mlx5_dev_get_flow_ctrl, 2472 .flow_ctrl_set = mlx5_dev_set_flow_ctrl, 2473 .mac_addr_remove = mlx5_mac_addr_remove, 2474 .mac_addr_add = mlx5_mac_addr_add, 2475 .mac_addr_set = mlx5_mac_addr_set, 2476 .set_mc_addr_list = mlx5_set_mc_addr_list, 2477 .mtu_set = mlx5_dev_set_mtu, 2478 .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, 2479 .vlan_offload_set = mlx5_vlan_offload_set, 2480 .filter_ctrl = mlx5_dev_filter_ctrl, 2481 .rx_descriptor_status = mlx5_rx_descriptor_status, 2482 .tx_descriptor_status = mlx5_tx_descriptor_status, 2483 .rxq_info_get = mlx5_rxq_info_get, 2484 .txq_info_get = mlx5_txq_info_get, 2485 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 2486 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 2487 .rx_queue_intr_enable = mlx5_rx_intr_enable, 2488 .rx_queue_intr_disable = mlx5_rx_intr_disable, 2489 .is_removed = mlx5_is_removed, 2490 .get_module_info = mlx5_get_module_info, 2491 .get_module_eeprom = mlx5_get_module_eeprom, 2492 .hairpin_cap_get = mlx5_hairpin_cap_get, 2493 .mtr_ops_get = mlx5_flow_meter_ops_get, 2494 }; 2495