1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2020 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <unistd.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <errno.h> 12 #include <net/if.h> 13 #include <sys/mman.h> 14 #include <linux/rtnetlink.h> 15 #include <linux/sockios.h> 16 #include <linux/ethtool.h> 17 #include <fcntl.h> 18 19 /* Verbs header. */ 20 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 21 #ifdef PEDANTIC 22 #pragma GCC diagnostic ignored "-Wpedantic" 23 #endif 24 #include <infiniband/verbs.h> 25 #ifdef PEDANTIC 26 #pragma GCC diagnostic error "-Wpedantic" 27 #endif 28 29 #include <rte_malloc.h> 30 #include <rte_ethdev_driver.h> 31 #include <rte_ethdev_pci.h> 32 #include <rte_pci.h> 33 #include <rte_bus_pci.h> 34 #include <rte_common.h> 35 #include <rte_kvargs.h> 36 #include <rte_rwlock.h> 37 #include <rte_spinlock.h> 38 #include <rte_string_fns.h> 39 #include <rte_alarm.h> 40 41 #include <mlx5_glue.h> 42 #include <mlx5_devx_cmds.h> 43 #include <mlx5_common.h> 44 #include <mlx5_common_mp.h> 45 #include <mlx5_common_mr.h> 46 47 #include "mlx5_defs.h" 48 #include "mlx5.h" 49 #include "mlx5_utils.h" 50 #include "mlx5_rxtx.h" 51 #include "mlx5_autoconf.h" 52 #include "mlx5_mr.h" 53 #include "mlx5_flow.h" 54 #include "rte_pmd_mlx5.h" 55 56 #define MLX5_TAGS_HLIST_ARRAY_SIZE 8192 57 58 #ifndef HAVE_IBV_MLX5_MOD_MPW 59 #define MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED (1 << 2) 60 #define MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW (1 << 3) 61 #endif 62 63 #ifndef HAVE_IBV_MLX5_MOD_CQE_128B_COMP 64 #define MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP (1 << 4) 65 #endif 66 67 /** 68 * Get device name. Given an ibv_device pointer - return a 69 * pointer to the corresponding device name. 70 * 71 * @param[in] dev 72 * Pointer to ibv device. 73 * 74 * @return 75 * Pointer to device name if dev is valid, NULL otherwise. 76 */ 77 const char * 78 mlx5_os_get_dev_device_name(void *dev) 79 { 80 if (!dev) 81 return NULL; 82 return ((struct ibv_device *)dev)->name; 83 } 84 85 /** 86 * Get ibv device name. Given an ibv_context pointer - return a 87 * pointer to the corresponding device name. 88 * 89 * @param[in] ctx 90 * Pointer to ibv context. 91 * 92 * @return 93 * Pointer to device name if ctx is valid, NULL otherwise. 94 */ 95 const char * 96 mlx5_os_get_ctx_device_name(void *ctx) 97 { 98 if (!ctx) 99 return NULL; 100 return ((struct ibv_context *)ctx)->device->name; 101 } 102 103 /** 104 * Get ibv device path name. Given an ibv_context pointer - return a 105 * pointer to the corresponding device path name. 106 * 107 * @param[in] ctx 108 * Pointer to ibv context. 109 * 110 * @return 111 * Pointer to device path name if ctx is valid, NULL otherwise. 112 */ 113 const char * 114 mlx5_os_get_ctx_device_path(void *ctx) 115 { 116 if (!ctx) 117 return NULL; 118 119 return ((struct ibv_context *)ctx)->device->ibdev_path; 120 } 121 122 /** 123 * Get umem id. Given a pointer to umem object of type 124 * 'struct mlx5dv_devx_umem *' - return its id. 125 * 126 * @param[in] umem 127 * Pointer to umem object. 128 * 129 * @return 130 * The umem id if umem is valid, 0 otherwise. 131 */ 132 uint32_t 133 mlx5_os_get_umem_id(void *umem) 134 { 135 if (!umem) 136 return 0; 137 return ((struct mlx5dv_devx_umem *)umem)->umem_id; 138 } 139 140 /** 141 * Get mlx5 device attributes. The glue function query_device_ex() is called 142 * with out parameter of type 'struct ibv_device_attr_ex *'. Then fill in mlx5 143 * device attributes from the glue out parameter. 144 * 145 * @param dev 146 * Pointer to ibv context. 147 * 148 * @param device_attr 149 * Pointer to mlx5 device attributes. 150 * 151 * @return 152 * 0 on success, non zero error number otherwise 153 */ 154 int 155 mlx5_os_get_dev_attr(void *ctx, struct mlx5_dev_attr *device_attr) 156 { 157 int err; 158 struct ibv_device_attr_ex attr_ex; 159 memset(device_attr, 0, sizeof(*device_attr)); 160 err = mlx5_glue->query_device_ex(ctx, NULL, &attr_ex); 161 if (err) 162 return err; 163 164 device_attr->device_cap_flags_ex = attr_ex.device_cap_flags_ex; 165 device_attr->max_qp_wr = attr_ex.orig_attr.max_qp_wr; 166 device_attr->max_sge = attr_ex.orig_attr.max_sge; 167 device_attr->max_cq = attr_ex.orig_attr.max_cq; 168 device_attr->max_qp = attr_ex.orig_attr.max_qp; 169 device_attr->raw_packet_caps = attr_ex.raw_packet_caps; 170 device_attr->max_rwq_indirection_table_size = 171 attr_ex.rss_caps.max_rwq_indirection_table_size; 172 device_attr->max_tso = attr_ex.tso_caps.max_tso; 173 device_attr->tso_supported_qpts = attr_ex.tso_caps.supported_qpts; 174 175 struct mlx5dv_context dv_attr = { .comp_mask = 0 }; 176 err = mlx5_glue->dv_query_device(ctx, &dv_attr); 177 if (err) 178 return err; 179 180 device_attr->flags = dv_attr.flags; 181 device_attr->comp_mask = dv_attr.comp_mask; 182 #ifdef HAVE_IBV_MLX5_MOD_SWP 183 device_attr->sw_parsing_offloads = 184 dv_attr.sw_parsing_caps.sw_parsing_offloads; 185 #endif 186 device_attr->min_single_stride_log_num_of_bytes = 187 dv_attr.striding_rq_caps.min_single_stride_log_num_of_bytes; 188 device_attr->max_single_stride_log_num_of_bytes = 189 dv_attr.striding_rq_caps.max_single_stride_log_num_of_bytes; 190 device_attr->min_single_wqe_log_num_of_strides = 191 dv_attr.striding_rq_caps.min_single_wqe_log_num_of_strides; 192 device_attr->max_single_wqe_log_num_of_strides = 193 dv_attr.striding_rq_caps.max_single_wqe_log_num_of_strides; 194 device_attr->stride_supported_qpts = 195 dv_attr.striding_rq_caps.supported_qpts; 196 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 197 device_attr->tunnel_offloads_caps = dv_attr.tunnel_offloads_caps; 198 #endif 199 200 return err; 201 } 202 203 /** 204 * Verbs callback to allocate a memory. This function should allocate the space 205 * according to the size provided residing inside a huge page. 206 * Please note that all allocation must respect the alignment from libmlx5 207 * (i.e. currently sysconf(_SC_PAGESIZE)). 208 * 209 * @param[in] size 210 * The size in bytes of the memory to allocate. 211 * @param[in] data 212 * A pointer to the callback data. 213 * 214 * @return 215 * Allocated buffer, NULL otherwise and rte_errno is set. 216 */ 217 static void * 218 mlx5_alloc_verbs_buf(size_t size, void *data) 219 { 220 struct mlx5_priv *priv = data; 221 void *ret; 222 size_t alignment = sysconf(_SC_PAGESIZE); 223 unsigned int socket = SOCKET_ID_ANY; 224 225 if (priv->verbs_alloc_ctx.type == MLX5_VERBS_ALLOC_TYPE_TX_QUEUE) { 226 const struct mlx5_txq_ctrl *ctrl = priv->verbs_alloc_ctx.obj; 227 228 socket = ctrl->socket; 229 } else if (priv->verbs_alloc_ctx.type == 230 MLX5_VERBS_ALLOC_TYPE_RX_QUEUE) { 231 const struct mlx5_rxq_ctrl *ctrl = priv->verbs_alloc_ctx.obj; 232 233 socket = ctrl->socket; 234 } 235 MLX5_ASSERT(data != NULL); 236 ret = rte_malloc_socket(__func__, size, alignment, socket); 237 if (!ret && size) 238 rte_errno = ENOMEM; 239 return ret; 240 } 241 242 /** 243 * Verbs callback to free a memory. 244 * 245 * @param[in] ptr 246 * A pointer to the memory to free. 247 * @param[in] data 248 * A pointer to the callback data. 249 */ 250 static void 251 mlx5_free_verbs_buf(void *ptr, void *data __rte_unused) 252 { 253 MLX5_ASSERT(data != NULL); 254 rte_free(ptr); 255 } 256 257 /** 258 * Initialize DR related data within private structure. 259 * Routine checks the reference counter and does actual 260 * resources creation/initialization only if counter is zero. 261 * 262 * @param[in] priv 263 * Pointer to the private device data structure. 264 * 265 * @return 266 * Zero on success, positive error code otherwise. 267 */ 268 static int 269 mlx5_alloc_shared_dr(struct mlx5_priv *priv) 270 { 271 struct mlx5_dev_ctx_shared *sh = priv->sh; 272 char s[MLX5_HLIST_NAMESIZE]; 273 int err = 0; 274 275 if (!sh->flow_tbls) 276 err = mlx5_alloc_table_hash_list(priv); 277 else 278 DRV_LOG(DEBUG, "sh->flow_tbls[%p] already created, reuse\n", 279 (void *)sh->flow_tbls); 280 if (err) 281 return err; 282 /* Create tags hash list table. */ 283 snprintf(s, sizeof(s), "%s_tags", sh->ibdev_name); 284 sh->tag_table = mlx5_hlist_create(s, MLX5_TAGS_HLIST_ARRAY_SIZE); 285 if (!sh->tag_table) { 286 DRV_LOG(ERR, "tags with hash creation failed.\n"); 287 err = ENOMEM; 288 goto error; 289 } 290 #ifdef HAVE_MLX5DV_DR 291 void *domain; 292 293 if (sh->dv_refcnt) { 294 /* Shared DV/DR structures is already initialized. */ 295 sh->dv_refcnt++; 296 priv->dr_shared = 1; 297 return 0; 298 } 299 /* Reference counter is zero, we should initialize structures. */ 300 domain = mlx5_glue->dr_create_domain(sh->ctx, 301 MLX5DV_DR_DOMAIN_TYPE_NIC_RX); 302 if (!domain) { 303 DRV_LOG(ERR, "ingress mlx5dv_dr_create_domain failed"); 304 err = errno; 305 goto error; 306 } 307 sh->rx_domain = domain; 308 domain = mlx5_glue->dr_create_domain(sh->ctx, 309 MLX5DV_DR_DOMAIN_TYPE_NIC_TX); 310 if (!domain) { 311 DRV_LOG(ERR, "egress mlx5dv_dr_create_domain failed"); 312 err = errno; 313 goto error; 314 } 315 pthread_mutex_init(&sh->dv_mutex, NULL); 316 sh->tx_domain = domain; 317 #ifdef HAVE_MLX5DV_DR_ESWITCH 318 if (priv->config.dv_esw_en) { 319 domain = mlx5_glue->dr_create_domain 320 (sh->ctx, MLX5DV_DR_DOMAIN_TYPE_FDB); 321 if (!domain) { 322 DRV_LOG(ERR, "FDB mlx5dv_dr_create_domain failed"); 323 err = errno; 324 goto error; 325 } 326 sh->fdb_domain = domain; 327 sh->esw_drop_action = mlx5_glue->dr_create_flow_action_drop(); 328 } 329 #endif 330 if (priv->config.reclaim_mode == MLX5_RCM_AGGR) { 331 mlx5_glue->dr_reclaim_domain_memory(sh->rx_domain, 1); 332 mlx5_glue->dr_reclaim_domain_memory(sh->tx_domain, 1); 333 if (sh->fdb_domain) 334 mlx5_glue->dr_reclaim_domain_memory(sh->fdb_domain, 1); 335 } 336 sh->pop_vlan_action = mlx5_glue->dr_create_flow_action_pop_vlan(); 337 #endif /* HAVE_MLX5DV_DR */ 338 sh->dv_refcnt++; 339 priv->dr_shared = 1; 340 return 0; 341 error: 342 /* Rollback the created objects. */ 343 if (sh->rx_domain) { 344 mlx5_glue->dr_destroy_domain(sh->rx_domain); 345 sh->rx_domain = NULL; 346 } 347 if (sh->tx_domain) { 348 mlx5_glue->dr_destroy_domain(sh->tx_domain); 349 sh->tx_domain = NULL; 350 } 351 if (sh->fdb_domain) { 352 mlx5_glue->dr_destroy_domain(sh->fdb_domain); 353 sh->fdb_domain = NULL; 354 } 355 if (sh->esw_drop_action) { 356 mlx5_glue->destroy_flow_action(sh->esw_drop_action); 357 sh->esw_drop_action = NULL; 358 } 359 if (sh->pop_vlan_action) { 360 mlx5_glue->destroy_flow_action(sh->pop_vlan_action); 361 sh->pop_vlan_action = NULL; 362 } 363 if (sh->tag_table) { 364 /* tags should be destroyed with flow before. */ 365 mlx5_hlist_destroy(sh->tag_table, NULL, NULL); 366 sh->tag_table = NULL; 367 } 368 mlx5_free_table_hash_list(priv); 369 return err; 370 } 371 372 /** 373 * Destroy DR related data within private structure. 374 * 375 * @param[in] priv 376 * Pointer to the private device data structure. 377 */ 378 void 379 mlx5_os_free_shared_dr(struct mlx5_priv *priv) 380 { 381 struct mlx5_dev_ctx_shared *sh; 382 383 if (!priv->dr_shared) 384 return; 385 priv->dr_shared = 0; 386 sh = priv->sh; 387 MLX5_ASSERT(sh); 388 #ifdef HAVE_MLX5DV_DR 389 MLX5_ASSERT(sh->dv_refcnt); 390 if (sh->dv_refcnt && --sh->dv_refcnt) 391 return; 392 if (sh->rx_domain) { 393 mlx5_glue->dr_destroy_domain(sh->rx_domain); 394 sh->rx_domain = NULL; 395 } 396 if (sh->tx_domain) { 397 mlx5_glue->dr_destroy_domain(sh->tx_domain); 398 sh->tx_domain = NULL; 399 } 400 #ifdef HAVE_MLX5DV_DR_ESWITCH 401 if (sh->fdb_domain) { 402 mlx5_glue->dr_destroy_domain(sh->fdb_domain); 403 sh->fdb_domain = NULL; 404 } 405 if (sh->esw_drop_action) { 406 mlx5_glue->destroy_flow_action(sh->esw_drop_action); 407 sh->esw_drop_action = NULL; 408 } 409 #endif 410 if (sh->pop_vlan_action) { 411 mlx5_glue->destroy_flow_action(sh->pop_vlan_action); 412 sh->pop_vlan_action = NULL; 413 } 414 pthread_mutex_destroy(&sh->dv_mutex); 415 #endif /* HAVE_MLX5DV_DR */ 416 if (sh->tag_table) { 417 /* tags should be destroyed with flow before. */ 418 mlx5_hlist_destroy(sh->tag_table, NULL, NULL); 419 sh->tag_table = NULL; 420 } 421 mlx5_free_table_hash_list(priv); 422 } 423 424 /** 425 * Spawn an Ethernet device from Verbs information. 426 * 427 * @param dpdk_dev 428 * Backing DPDK device. 429 * @param spawn 430 * Verbs device parameters (name, port, switch_info) to spawn. 431 * @param config 432 * Device configuration parameters. 433 * 434 * @return 435 * A valid Ethernet device object on success, NULL otherwise and rte_errno 436 * is set. The following errors are defined: 437 * 438 * EBUSY: device is not supposed to be spawned. 439 * EEXIST: device is already spawned 440 */ 441 static struct rte_eth_dev * 442 mlx5_dev_spawn(struct rte_device *dpdk_dev, 443 struct mlx5_dev_spawn_data *spawn, 444 struct mlx5_dev_config config) 445 { 446 const struct mlx5_switch_info *switch_info = &spawn->info; 447 struct mlx5_dev_ctx_shared *sh = NULL; 448 struct ibv_port_attr port_attr; 449 struct mlx5dv_context dv_attr = { .comp_mask = 0 }; 450 struct rte_eth_dev *eth_dev = NULL; 451 struct mlx5_priv *priv = NULL; 452 int err = 0; 453 unsigned int hw_padding = 0; 454 unsigned int mps; 455 unsigned int cqe_comp; 456 unsigned int cqe_pad = 0; 457 unsigned int tunnel_en = 0; 458 unsigned int mpls_en = 0; 459 unsigned int swp = 0; 460 unsigned int mprq = 0; 461 unsigned int mprq_min_stride_size_n = 0; 462 unsigned int mprq_max_stride_size_n = 0; 463 unsigned int mprq_min_stride_num_n = 0; 464 unsigned int mprq_max_stride_num_n = 0; 465 struct rte_ether_addr mac; 466 char name[RTE_ETH_NAME_MAX_LEN]; 467 int own_domain_id = 0; 468 uint16_t port_id; 469 unsigned int i; 470 #ifdef HAVE_MLX5DV_DR_DEVX_PORT 471 struct mlx5dv_devx_port devx_port = { .comp_mask = 0 }; 472 #endif 473 474 /* Determine if this port representor is supposed to be spawned. */ 475 if (switch_info->representor && dpdk_dev->devargs) { 476 struct rte_eth_devargs eth_da; 477 478 err = rte_eth_devargs_parse(dpdk_dev->devargs->args, ð_da); 479 if (err) { 480 rte_errno = -err; 481 DRV_LOG(ERR, "failed to process device arguments: %s", 482 strerror(rte_errno)); 483 return NULL; 484 } 485 for (i = 0; i < eth_da.nb_representor_ports; ++i) 486 if (eth_da.representor_ports[i] == 487 (uint16_t)switch_info->port_name) 488 break; 489 if (i == eth_da.nb_representor_ports) { 490 rte_errno = EBUSY; 491 return NULL; 492 } 493 } 494 /* Build device name. */ 495 if (spawn->pf_bond < 0) { 496 /* Single device. */ 497 if (!switch_info->representor) 498 strlcpy(name, dpdk_dev->name, sizeof(name)); 499 else 500 snprintf(name, sizeof(name), "%s_representor_%u", 501 dpdk_dev->name, switch_info->port_name); 502 } else { 503 /* Bonding device. */ 504 if (!switch_info->representor) 505 snprintf(name, sizeof(name), "%s_%s", 506 dpdk_dev->name, 507 mlx5_os_get_dev_device_name(spawn->phys_dev)); 508 else 509 snprintf(name, sizeof(name), "%s_%s_representor_%u", 510 dpdk_dev->name, 511 mlx5_os_get_dev_device_name(spawn->phys_dev), 512 switch_info->port_name); 513 } 514 /* check if the device is already spawned */ 515 if (rte_eth_dev_get_port_by_name(name, &port_id) == 0) { 516 rte_errno = EEXIST; 517 return NULL; 518 } 519 DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name); 520 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 521 struct mlx5_mp_id mp_id; 522 523 eth_dev = rte_eth_dev_attach_secondary(name); 524 if (eth_dev == NULL) { 525 DRV_LOG(ERR, "can not attach rte ethdev"); 526 rte_errno = ENOMEM; 527 return NULL; 528 } 529 eth_dev->device = dpdk_dev; 530 eth_dev->dev_ops = &mlx5_os_dev_sec_ops; 531 err = mlx5_proc_priv_init(eth_dev); 532 if (err) 533 return NULL; 534 mp_id.port_id = eth_dev->data->port_id; 535 strlcpy(mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN); 536 /* Receive command fd from primary process */ 537 err = mlx5_mp_req_verbs_cmd_fd(&mp_id); 538 if (err < 0) 539 goto err_secondary; 540 /* Remap UAR for Tx queues. */ 541 err = mlx5_tx_uar_init_secondary(eth_dev, err); 542 if (err) 543 goto err_secondary; 544 /* 545 * Ethdev pointer is still required as input since 546 * the primary device is not accessible from the 547 * secondary process. 548 */ 549 eth_dev->rx_pkt_burst = mlx5_select_rx_function(eth_dev); 550 eth_dev->tx_pkt_burst = mlx5_select_tx_function(eth_dev); 551 return eth_dev; 552 err_secondary: 553 mlx5_dev_close(eth_dev); 554 return NULL; 555 } 556 /* 557 * Some parameters ("tx_db_nc" in particularly) are needed in 558 * advance to create dv/verbs device context. We proceed the 559 * devargs here to get ones, and later proceed devargs again 560 * to override some hardware settings. 561 */ 562 err = mlx5_args(&config, dpdk_dev->devargs); 563 if (err) { 564 err = rte_errno; 565 DRV_LOG(ERR, "failed to process device arguments: %s", 566 strerror(rte_errno)); 567 goto error; 568 } 569 sh = mlx5_alloc_shared_dev_ctx(spawn, &config); 570 if (!sh) 571 return NULL; 572 config.devx = sh->devx; 573 #ifdef HAVE_MLX5DV_DR_ACTION_DEST_DEVX_TIR 574 config.dest_tir = 1; 575 #endif 576 #ifdef HAVE_IBV_MLX5_MOD_SWP 577 dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_SWP; 578 #endif 579 /* 580 * Multi-packet send is supported by ConnectX-4 Lx PF as well 581 * as all ConnectX-5 devices. 582 */ 583 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 584 dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS; 585 #endif 586 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 587 dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_STRIDING_RQ; 588 #endif 589 mlx5_glue->dv_query_device(sh->ctx, &dv_attr); 590 if (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) { 591 if (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) { 592 DRV_LOG(DEBUG, "enhanced MPW is supported"); 593 mps = MLX5_MPW_ENHANCED; 594 } else { 595 DRV_LOG(DEBUG, "MPW is supported"); 596 mps = MLX5_MPW; 597 } 598 } else { 599 DRV_LOG(DEBUG, "MPW isn't supported"); 600 mps = MLX5_MPW_DISABLED; 601 } 602 #ifdef HAVE_IBV_MLX5_MOD_SWP 603 if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_SWP) 604 swp = dv_attr.sw_parsing_caps.sw_parsing_offloads; 605 DRV_LOG(DEBUG, "SWP support: %u", swp); 606 #endif 607 config.swp = !!swp; 608 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 609 if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_STRIDING_RQ) { 610 struct mlx5dv_striding_rq_caps mprq_caps = 611 dv_attr.striding_rq_caps; 612 613 DRV_LOG(DEBUG, "\tmin_single_stride_log_num_of_bytes: %d", 614 mprq_caps.min_single_stride_log_num_of_bytes); 615 DRV_LOG(DEBUG, "\tmax_single_stride_log_num_of_bytes: %d", 616 mprq_caps.max_single_stride_log_num_of_bytes); 617 DRV_LOG(DEBUG, "\tmin_single_wqe_log_num_of_strides: %d", 618 mprq_caps.min_single_wqe_log_num_of_strides); 619 DRV_LOG(DEBUG, "\tmax_single_wqe_log_num_of_strides: %d", 620 mprq_caps.max_single_wqe_log_num_of_strides); 621 DRV_LOG(DEBUG, "\tsupported_qpts: %d", 622 mprq_caps.supported_qpts); 623 DRV_LOG(DEBUG, "device supports Multi-Packet RQ"); 624 mprq = 1; 625 mprq_min_stride_size_n = 626 mprq_caps.min_single_stride_log_num_of_bytes; 627 mprq_max_stride_size_n = 628 mprq_caps.max_single_stride_log_num_of_bytes; 629 mprq_min_stride_num_n = 630 mprq_caps.min_single_wqe_log_num_of_strides; 631 mprq_max_stride_num_n = 632 mprq_caps.max_single_wqe_log_num_of_strides; 633 } 634 #endif 635 if (RTE_CACHE_LINE_SIZE == 128 && 636 !(dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP)) 637 cqe_comp = 0; 638 else 639 cqe_comp = 1; 640 config.cqe_comp = cqe_comp; 641 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD 642 /* Whether device supports 128B Rx CQE padding. */ 643 cqe_pad = RTE_CACHE_LINE_SIZE == 128 && 644 (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_PAD); 645 #endif 646 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 647 if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS) { 648 tunnel_en = ((dv_attr.tunnel_offloads_caps & 649 MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_VXLAN) && 650 (dv_attr.tunnel_offloads_caps & 651 MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GRE) && 652 (dv_attr.tunnel_offloads_caps & 653 MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GENEVE)); 654 } 655 DRV_LOG(DEBUG, "tunnel offloading is %ssupported", 656 tunnel_en ? "" : "not "); 657 #else 658 DRV_LOG(WARNING, 659 "tunnel offloading disabled due to old OFED/rdma-core version"); 660 #endif 661 config.tunnel_en = tunnel_en; 662 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 663 mpls_en = ((dv_attr.tunnel_offloads_caps & 664 MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_GRE) && 665 (dv_attr.tunnel_offloads_caps & 666 MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_UDP)); 667 DRV_LOG(DEBUG, "MPLS over GRE/UDP tunnel offloading is %ssupported", 668 mpls_en ? "" : "not "); 669 #else 670 DRV_LOG(WARNING, "MPLS over GRE/UDP tunnel offloading disabled due to" 671 " old OFED/rdma-core version or firmware configuration"); 672 #endif 673 config.mpls_en = mpls_en; 674 /* Check port status. */ 675 err = mlx5_glue->query_port(sh->ctx, spawn->phys_port, &port_attr); 676 if (err) { 677 DRV_LOG(ERR, "port query failed: %s", strerror(err)); 678 goto error; 679 } 680 if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) { 681 DRV_LOG(ERR, "port is not configured in Ethernet mode"); 682 err = EINVAL; 683 goto error; 684 } 685 if (port_attr.state != IBV_PORT_ACTIVE) 686 DRV_LOG(DEBUG, "port is not active: \"%s\" (%d)", 687 mlx5_glue->port_state_str(port_attr.state), 688 port_attr.state); 689 /* Allocate private eth device data. */ 690 priv = rte_zmalloc("ethdev private structure", 691 sizeof(*priv), 692 RTE_CACHE_LINE_SIZE); 693 if (priv == NULL) { 694 DRV_LOG(ERR, "priv allocation failure"); 695 err = ENOMEM; 696 goto error; 697 } 698 priv->sh = sh; 699 priv->dev_port = spawn->phys_port; 700 priv->pci_dev = spawn->pci_dev; 701 priv->mtu = RTE_ETHER_MTU; 702 priv->mp_id.port_id = port_id; 703 strlcpy(priv->mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN); 704 #ifndef RTE_ARCH_64 705 /* Initialize UAR access locks for 32bit implementations. */ 706 rte_spinlock_init(&priv->uar_lock_cq); 707 for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++) 708 rte_spinlock_init(&priv->uar_lock[i]); 709 #endif 710 /* Some internal functions rely on Netlink sockets, open them now. */ 711 priv->nl_socket_rdma = mlx5_nl_init(NETLINK_RDMA); 712 priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE); 713 priv->representor = !!switch_info->representor; 714 priv->master = !!switch_info->master; 715 priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; 716 priv->vport_meta_tag = 0; 717 priv->vport_meta_mask = 0; 718 priv->pf_bond = spawn->pf_bond; 719 #ifdef HAVE_MLX5DV_DR_DEVX_PORT 720 /* 721 * The DevX port query API is implemented. E-Switch may use 722 * either vport or reg_c[0] metadata register to match on 723 * vport index. The engaged part of metadata register is 724 * defined by mask. 725 */ 726 if (switch_info->representor || switch_info->master) { 727 devx_port.comp_mask = MLX5DV_DEVX_PORT_VPORT | 728 MLX5DV_DEVX_PORT_MATCH_REG_C_0; 729 err = mlx5_glue->devx_port_query(sh->ctx, spawn->phys_port, 730 &devx_port); 731 if (err) { 732 DRV_LOG(WARNING, 733 "can't query devx port %d on device %s", 734 spawn->phys_port, 735 mlx5_os_get_dev_device_name(spawn->phys_dev)); 736 devx_port.comp_mask = 0; 737 } 738 } 739 if (devx_port.comp_mask & MLX5DV_DEVX_PORT_MATCH_REG_C_0) { 740 priv->vport_meta_tag = devx_port.reg_c_0.value; 741 priv->vport_meta_mask = devx_port.reg_c_0.mask; 742 if (!priv->vport_meta_mask) { 743 DRV_LOG(ERR, "vport zero mask for port %d" 744 " on bonding device %s", 745 spawn->phys_port, 746 mlx5_os_get_dev_device_name 747 (spawn->phys_dev)); 748 err = ENOTSUP; 749 goto error; 750 } 751 if (priv->vport_meta_tag & ~priv->vport_meta_mask) { 752 DRV_LOG(ERR, "invalid vport tag for port %d" 753 " on bonding device %s", 754 spawn->phys_port, 755 mlx5_os_get_dev_device_name 756 (spawn->phys_dev)); 757 err = ENOTSUP; 758 goto error; 759 } 760 } 761 if (devx_port.comp_mask & MLX5DV_DEVX_PORT_VPORT) { 762 priv->vport_id = devx_port.vport_num; 763 } else if (spawn->pf_bond >= 0) { 764 DRV_LOG(ERR, "can't deduce vport index for port %d" 765 " on bonding device %s", 766 spawn->phys_port, 767 mlx5_os_get_dev_device_name(spawn->phys_dev)); 768 err = ENOTSUP; 769 goto error; 770 } else { 771 /* Suppose vport index in compatible way. */ 772 priv->vport_id = switch_info->representor ? 773 switch_info->port_name + 1 : -1; 774 } 775 #else 776 /* 777 * Kernel/rdma_core support single E-Switch per PF configurations 778 * only and vport_id field contains the vport index for 779 * associated VF, which is deduced from representor port name. 780 * For example, let's have the IB device port 10, it has 781 * attached network device eth0, which has port name attribute 782 * pf0vf2, we can deduce the VF number as 2, and set vport index 783 * as 3 (2+1). This assigning schema should be changed if the 784 * multiple E-Switch instances per PF configurations or/and PCI 785 * subfunctions are added. 786 */ 787 priv->vport_id = switch_info->representor ? 788 switch_info->port_name + 1 : -1; 789 #endif 790 /* representor_id field keeps the unmodified VF index. */ 791 priv->representor_id = switch_info->representor ? 792 switch_info->port_name : -1; 793 /* 794 * Look for sibling devices in order to reuse their switch domain 795 * if any, otherwise allocate one. 796 */ 797 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 798 const struct mlx5_priv *opriv = 799 rte_eth_devices[port_id].data->dev_private; 800 801 if (!opriv || 802 opriv->sh != priv->sh || 803 opriv->domain_id == 804 RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) 805 continue; 806 priv->domain_id = opriv->domain_id; 807 break; 808 } 809 if (priv->domain_id == RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) { 810 err = rte_eth_switch_domain_alloc(&priv->domain_id); 811 if (err) { 812 err = rte_errno; 813 DRV_LOG(ERR, "unable to allocate switch domain: %s", 814 strerror(rte_errno)); 815 goto error; 816 } 817 own_domain_id = 1; 818 } 819 /* Override some values set by hardware configuration. */ 820 mlx5_args(&config, dpdk_dev->devargs); 821 err = mlx5_dev_check_sibling_config(priv, &config); 822 if (err) 823 goto error; 824 config.hw_csum = !!(sh->device_attr.device_cap_flags_ex & 825 IBV_DEVICE_RAW_IP_CSUM); 826 DRV_LOG(DEBUG, "checksum offloading is %ssupported", 827 (config.hw_csum ? "" : "not ")); 828 #if !defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) && \ 829 !defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45) 830 DRV_LOG(DEBUG, "counters are not supported"); 831 #endif 832 #if !defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_MLX5DV_DR) 833 if (config.dv_flow_en) { 834 DRV_LOG(WARNING, "DV flow is not supported"); 835 config.dv_flow_en = 0; 836 } 837 #endif 838 config.ind_table_max_size = 839 sh->device_attr.max_rwq_indirection_table_size; 840 /* 841 * Remove this check once DPDK supports larger/variable 842 * indirection tables. 843 */ 844 if (config.ind_table_max_size > (unsigned int)ETH_RSS_RETA_SIZE_512) 845 config.ind_table_max_size = ETH_RSS_RETA_SIZE_512; 846 DRV_LOG(DEBUG, "maximum Rx indirection table size is %u", 847 config.ind_table_max_size); 848 config.hw_vlan_strip = !!(sh->device_attr.raw_packet_caps & 849 IBV_RAW_PACKET_CAP_CVLAN_STRIPPING); 850 DRV_LOG(DEBUG, "VLAN stripping is %ssupported", 851 (config.hw_vlan_strip ? "" : "not ")); 852 config.hw_fcs_strip = !!(sh->device_attr.raw_packet_caps & 853 IBV_RAW_PACKET_CAP_SCATTER_FCS); 854 DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported", 855 (config.hw_fcs_strip ? "" : "not ")); 856 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING) 857 hw_padding = !!sh->device_attr.rx_pad_end_addr_align; 858 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING) 859 hw_padding = !!(sh->device_attr.device_cap_flags_ex & 860 IBV_DEVICE_PCI_WRITE_END_PADDING); 861 #endif 862 if (config.hw_padding && !hw_padding) { 863 DRV_LOG(DEBUG, "Rx end alignment padding isn't supported"); 864 config.hw_padding = 0; 865 } else if (config.hw_padding) { 866 DRV_LOG(DEBUG, "Rx end alignment padding is enabled"); 867 } 868 config.tso = (sh->device_attr.max_tso > 0 && 869 (sh->device_attr.tso_supported_qpts & 870 (1 << IBV_QPT_RAW_PACKET))); 871 if (config.tso) 872 config.tso_max_payload_sz = sh->device_attr.max_tso; 873 /* 874 * MPW is disabled by default, while the Enhanced MPW is enabled 875 * by default. 876 */ 877 if (config.mps == MLX5_ARG_UNSET) 878 config.mps = (mps == MLX5_MPW_ENHANCED) ? MLX5_MPW_ENHANCED : 879 MLX5_MPW_DISABLED; 880 else 881 config.mps = config.mps ? mps : MLX5_MPW_DISABLED; 882 DRV_LOG(INFO, "%sMPS is %s", 883 config.mps == MLX5_MPW_ENHANCED ? "enhanced " : 884 config.mps == MLX5_MPW ? "legacy " : "", 885 config.mps != MLX5_MPW_DISABLED ? "enabled" : "disabled"); 886 if (config.cqe_comp && !cqe_comp) { 887 DRV_LOG(WARNING, "Rx CQE compression isn't supported"); 888 config.cqe_comp = 0; 889 } 890 if (config.cqe_pad && !cqe_pad) { 891 DRV_LOG(WARNING, "Rx CQE padding isn't supported"); 892 config.cqe_pad = 0; 893 } else if (config.cqe_pad) { 894 DRV_LOG(INFO, "Rx CQE padding is enabled"); 895 } 896 if (config.devx) { 897 priv->counter_fallback = 0; 898 err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config.hca_attr); 899 if (err) { 900 err = -err; 901 goto error; 902 } 903 if (!config.hca_attr.flow_counters_dump) 904 priv->counter_fallback = 1; 905 #ifndef HAVE_IBV_DEVX_ASYNC 906 priv->counter_fallback = 1; 907 #endif 908 if (priv->counter_fallback) 909 DRV_LOG(INFO, "Use fall-back DV counter management"); 910 /* Check for LRO support. */ 911 if (config.dest_tir && config.hca_attr.lro_cap && 912 config.dv_flow_en) { 913 /* TBD check tunnel lro caps. */ 914 config.lro.supported = config.hca_attr.lro_cap; 915 DRV_LOG(DEBUG, "Device supports LRO"); 916 /* 917 * If LRO timeout is not configured by application, 918 * use the minimal supported value. 919 */ 920 if (!config.lro.timeout) 921 config.lro.timeout = 922 config.hca_attr.lro_timer_supported_periods[0]; 923 DRV_LOG(DEBUG, "LRO session timeout set to %d usec", 924 config.lro.timeout); 925 } 926 #if defined(HAVE_MLX5DV_DR) && defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_METER) 927 if (config.hca_attr.qos.sup && config.hca_attr.qos.srtcm_sup && 928 config.dv_flow_en) { 929 uint8_t reg_c_mask = 930 config.hca_attr.qos.flow_meter_reg_c_ids; 931 /* 932 * Meter needs two REG_C's for color match and pre-sfx 933 * flow match. Here get the REG_C for color match. 934 * REG_C_0 and REG_C_1 is reserved for metadata feature. 935 */ 936 reg_c_mask &= 0xfc; 937 if (__builtin_popcount(reg_c_mask) < 1) { 938 priv->mtr_en = 0; 939 DRV_LOG(WARNING, "No available register for" 940 " meter."); 941 } else { 942 priv->mtr_color_reg = ffs(reg_c_mask) - 1 + 943 REG_C_0; 944 priv->mtr_en = 1; 945 priv->mtr_reg_share = 946 config.hca_attr.qos.flow_meter_reg_share; 947 DRV_LOG(DEBUG, "The REG_C meter uses is %d", 948 priv->mtr_color_reg); 949 } 950 } 951 #endif 952 } 953 if (config.mprq.enabled && mprq) { 954 if (config.mprq.stride_num_n && 955 (config.mprq.stride_num_n > mprq_max_stride_num_n || 956 config.mprq.stride_num_n < mprq_min_stride_num_n)) { 957 config.mprq.stride_num_n = 958 RTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_NUM_N, 959 mprq_min_stride_num_n), 960 mprq_max_stride_num_n); 961 DRV_LOG(WARNING, 962 "the number of strides" 963 " for Multi-Packet RQ is out of range," 964 " setting default value (%u)", 965 1 << config.mprq.stride_num_n); 966 } 967 if (config.mprq.stride_size_n && 968 (config.mprq.stride_size_n > mprq_max_stride_size_n || 969 config.mprq.stride_size_n < mprq_min_stride_size_n)) { 970 config.mprq.stride_size_n = 971 RTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_SIZE_N, 972 mprq_min_stride_size_n), 973 mprq_max_stride_size_n); 974 DRV_LOG(WARNING, 975 "the size of a stride" 976 " for Multi-Packet RQ is out of range," 977 " setting default value (%u)", 978 1 << config.mprq.stride_size_n); 979 } 980 config.mprq.min_stride_size_n = mprq_min_stride_size_n; 981 config.mprq.max_stride_size_n = mprq_max_stride_size_n; 982 } else if (config.mprq.enabled && !mprq) { 983 DRV_LOG(WARNING, "Multi-Packet RQ isn't supported"); 984 config.mprq.enabled = 0; 985 } 986 if (config.max_dump_files_num == 0) 987 config.max_dump_files_num = 128; 988 eth_dev = rte_eth_dev_allocate(name); 989 if (eth_dev == NULL) { 990 DRV_LOG(ERR, "can not allocate rte ethdev"); 991 err = ENOMEM; 992 goto error; 993 } 994 /* Flag to call rte_eth_dev_release_port() in rte_eth_dev_close(). */ 995 eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; 996 if (priv->representor) { 997 eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR; 998 eth_dev->data->representor_id = priv->representor_id; 999 } 1000 /* 1001 * Store associated network device interface index. This index 1002 * is permanent throughout the lifetime of device. So, we may store 1003 * the ifindex here and use the cached value further. 1004 */ 1005 MLX5_ASSERT(spawn->ifindex); 1006 priv->if_index = spawn->ifindex; 1007 eth_dev->data->dev_private = priv; 1008 priv->dev_data = eth_dev->data; 1009 eth_dev->data->mac_addrs = priv->mac; 1010 eth_dev->device = dpdk_dev; 1011 /* Configure the first MAC address by default. */ 1012 if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) { 1013 DRV_LOG(ERR, 1014 "port %u cannot get MAC address, is mlx5_en" 1015 " loaded? (errno: %s)", 1016 eth_dev->data->port_id, strerror(rte_errno)); 1017 err = ENODEV; 1018 goto error; 1019 } 1020 DRV_LOG(INFO, 1021 "port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", 1022 eth_dev->data->port_id, 1023 mac.addr_bytes[0], mac.addr_bytes[1], 1024 mac.addr_bytes[2], mac.addr_bytes[3], 1025 mac.addr_bytes[4], mac.addr_bytes[5]); 1026 #ifdef RTE_LIBRTE_MLX5_DEBUG 1027 { 1028 char ifname[IF_NAMESIZE]; 1029 1030 if (mlx5_get_ifname(eth_dev, &ifname) == 0) 1031 DRV_LOG(DEBUG, "port %u ifname is \"%s\"", 1032 eth_dev->data->port_id, ifname); 1033 else 1034 DRV_LOG(DEBUG, "port %u ifname is unknown", 1035 eth_dev->data->port_id); 1036 } 1037 #endif 1038 /* Get actual MTU if possible. */ 1039 err = mlx5_get_mtu(eth_dev, &priv->mtu); 1040 if (err) { 1041 err = rte_errno; 1042 goto error; 1043 } 1044 DRV_LOG(DEBUG, "port %u MTU is %u", eth_dev->data->port_id, 1045 priv->mtu); 1046 /* Initialize burst functions to prevent crashes before link-up. */ 1047 eth_dev->rx_pkt_burst = removed_rx_burst; 1048 eth_dev->tx_pkt_burst = removed_tx_burst; 1049 eth_dev->dev_ops = &mlx5_os_dev_ops; 1050 /* Register MAC address. */ 1051 claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0)); 1052 if (config.vf && config.vf_nl_en) 1053 mlx5_nl_mac_addr_sync(priv->nl_socket_route, 1054 mlx5_ifindex(eth_dev), 1055 eth_dev->data->mac_addrs, 1056 MLX5_MAX_MAC_ADDRESSES); 1057 priv->flows = 0; 1058 priv->ctrl_flows = 0; 1059 TAILQ_INIT(&priv->flow_meters); 1060 TAILQ_INIT(&priv->flow_meter_profiles); 1061 /* Hint libmlx5 to use PMD allocator for data plane resources */ 1062 struct mlx5dv_ctx_allocators alctr = { 1063 .alloc = &mlx5_alloc_verbs_buf, 1064 .free = &mlx5_free_verbs_buf, 1065 .data = priv, 1066 }; 1067 mlx5_glue->dv_set_context_attr(sh->ctx, 1068 MLX5DV_CTX_ATTR_BUF_ALLOCATORS, 1069 (void *)((uintptr_t)&alctr)); 1070 /* Bring Ethernet device up. */ 1071 DRV_LOG(DEBUG, "port %u forcing Ethernet interface up", 1072 eth_dev->data->port_id); 1073 mlx5_set_link_up(eth_dev); 1074 /* 1075 * Even though the interrupt handler is not installed yet, 1076 * interrupts will still trigger on the async_fd from 1077 * Verbs context returned by ibv_open_device(). 1078 */ 1079 mlx5_link_update(eth_dev, 0); 1080 #ifdef HAVE_MLX5DV_DR_ESWITCH 1081 if (!(config.hca_attr.eswitch_manager && config.dv_flow_en && 1082 (switch_info->representor || switch_info->master))) 1083 config.dv_esw_en = 0; 1084 #else 1085 config.dv_esw_en = 0; 1086 #endif 1087 /* Detect minimal data bytes to inline. */ 1088 mlx5_set_min_inline(spawn, &config); 1089 /* Store device configuration on private structure. */ 1090 priv->config = config; 1091 /* Create context for virtual machine VLAN workaround. */ 1092 priv->vmwa_context = mlx5_vlan_vmwa_init(eth_dev, spawn->ifindex); 1093 if (config.dv_flow_en) { 1094 err = mlx5_alloc_shared_dr(priv); 1095 if (err) 1096 goto error; 1097 /* 1098 * RSS id is shared with meter flow id. Meter flow id can only 1099 * use the 24 MSB of the register. 1100 */ 1101 priv->qrss_id_pool = mlx5_flow_id_pool_alloc(UINT32_MAX >> 1102 MLX5_MTR_COLOR_BITS); 1103 if (!priv->qrss_id_pool) { 1104 DRV_LOG(ERR, "can't create flow id pool"); 1105 err = ENOMEM; 1106 goto error; 1107 } 1108 } 1109 /* Supported Verbs flow priority number detection. */ 1110 err = mlx5_flow_discover_priorities(eth_dev); 1111 if (err < 0) { 1112 err = -err; 1113 goto error; 1114 } 1115 priv->config.flow_prio = err; 1116 if (!priv->config.dv_esw_en && 1117 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { 1118 DRV_LOG(WARNING, "metadata mode %u is not supported " 1119 "(no E-Switch)", priv->config.dv_xmeta_en); 1120 priv->config.dv_xmeta_en = MLX5_XMETA_MODE_LEGACY; 1121 } 1122 mlx5_set_metadata_mask(eth_dev); 1123 if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 1124 !priv->sh->dv_regc0_mask) { 1125 DRV_LOG(ERR, "metadata mode %u is not supported " 1126 "(no metadata reg_c[0] is available)", 1127 priv->config.dv_xmeta_en); 1128 err = ENOTSUP; 1129 goto error; 1130 } 1131 /* 1132 * Allocate the buffer for flow creating, just once. 1133 * The allocation must be done before any flow creating. 1134 */ 1135 mlx5_flow_alloc_intermediate(eth_dev); 1136 /* Query availability of metadata reg_c's. */ 1137 err = mlx5_flow_discover_mreg_c(eth_dev); 1138 if (err < 0) { 1139 err = -err; 1140 goto error; 1141 } 1142 if (!mlx5_flow_ext_mreg_supported(eth_dev)) { 1143 DRV_LOG(DEBUG, 1144 "port %u extensive metadata register is not supported", 1145 eth_dev->data->port_id); 1146 if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { 1147 DRV_LOG(ERR, "metadata mode %u is not supported " 1148 "(no metadata registers available)", 1149 priv->config.dv_xmeta_en); 1150 err = ENOTSUP; 1151 goto error; 1152 } 1153 } 1154 if (priv->config.dv_flow_en && 1155 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 1156 mlx5_flow_ext_mreg_supported(eth_dev) && 1157 priv->sh->dv_regc0_mask) { 1158 priv->mreg_cp_tbl = mlx5_hlist_create(MLX5_FLOW_MREG_HNAME, 1159 MLX5_FLOW_MREG_HTABLE_SZ); 1160 if (!priv->mreg_cp_tbl) { 1161 err = ENOMEM; 1162 goto error; 1163 } 1164 } 1165 return eth_dev; 1166 error: 1167 if (priv) { 1168 if (priv->mreg_cp_tbl) 1169 mlx5_hlist_destroy(priv->mreg_cp_tbl, NULL, NULL); 1170 if (priv->sh) 1171 mlx5_os_free_shared_dr(priv); 1172 if (priv->nl_socket_route >= 0) 1173 close(priv->nl_socket_route); 1174 if (priv->nl_socket_rdma >= 0) 1175 close(priv->nl_socket_rdma); 1176 if (priv->vmwa_context) 1177 mlx5_vlan_vmwa_exit(priv->vmwa_context); 1178 if (priv->qrss_id_pool) 1179 mlx5_flow_id_pool_release(priv->qrss_id_pool); 1180 if (own_domain_id) 1181 claim_zero(rte_eth_switch_domain_free(priv->domain_id)); 1182 rte_free(priv); 1183 if (eth_dev != NULL) 1184 eth_dev->data->dev_private = NULL; 1185 } 1186 if (eth_dev != NULL) { 1187 /* mac_addrs must not be freed alone because part of 1188 * dev_private 1189 **/ 1190 eth_dev->data->mac_addrs = NULL; 1191 rte_eth_dev_release_port(eth_dev); 1192 } 1193 if (sh) 1194 mlx5_free_shared_dev_ctx(sh); 1195 MLX5_ASSERT(err > 0); 1196 rte_errno = err; 1197 return NULL; 1198 } 1199 1200 /** 1201 * Comparison callback to sort device data. 1202 * 1203 * This is meant to be used with qsort(). 1204 * 1205 * @param a[in] 1206 * Pointer to pointer to first data object. 1207 * @param b[in] 1208 * Pointer to pointer to second data object. 1209 * 1210 * @return 1211 * 0 if both objects are equal, less than 0 if the first argument is less 1212 * than the second, greater than 0 otherwise. 1213 */ 1214 static int 1215 mlx5_dev_spawn_data_cmp(const void *a, const void *b) 1216 { 1217 const struct mlx5_switch_info *si_a = 1218 &((const struct mlx5_dev_spawn_data *)a)->info; 1219 const struct mlx5_switch_info *si_b = 1220 &((const struct mlx5_dev_spawn_data *)b)->info; 1221 int ret; 1222 1223 /* Master device first. */ 1224 ret = si_b->master - si_a->master; 1225 if (ret) 1226 return ret; 1227 /* Then representor devices. */ 1228 ret = si_b->representor - si_a->representor; 1229 if (ret) 1230 return ret; 1231 /* Unidentified devices come last in no specific order. */ 1232 if (!si_a->representor) 1233 return 0; 1234 /* Order representors by name. */ 1235 return si_a->port_name - si_b->port_name; 1236 } 1237 1238 /** 1239 * Match PCI information for possible slaves of bonding device. 1240 * 1241 * @param[in] ibv_dev 1242 * Pointer to Infiniband device structure. 1243 * @param[in] pci_dev 1244 * Pointer to PCI device structure to match PCI address. 1245 * @param[in] nl_rdma 1246 * Netlink RDMA group socket handle. 1247 * 1248 * @return 1249 * negative value if no bonding device found, otherwise 1250 * positive index of slave PF in bonding. 1251 */ 1252 static int 1253 mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, 1254 const struct rte_pci_device *pci_dev, 1255 int nl_rdma) 1256 { 1257 char ifname[IF_NAMESIZE + 1]; 1258 unsigned int ifindex; 1259 unsigned int np, i; 1260 FILE *file = NULL; 1261 int pf = -1; 1262 1263 /* 1264 * Try to get master device name. If something goes 1265 * wrong suppose the lack of kernel support and no 1266 * bonding devices. 1267 */ 1268 if (nl_rdma < 0) 1269 return -1; 1270 if (!strstr(ibv_dev->name, "bond")) 1271 return -1; 1272 np = mlx5_nl_portnum(nl_rdma, ibv_dev->name); 1273 if (!np) 1274 return -1; 1275 /* 1276 * The Master device might not be on the predefined 1277 * port (not on port index 1, it is not garanted), 1278 * we have to scan all Infiniband device port and 1279 * find master. 1280 */ 1281 for (i = 1; i <= np; ++i) { 1282 /* Check whether Infiniband port is populated. */ 1283 ifindex = mlx5_nl_ifindex(nl_rdma, ibv_dev->name, i); 1284 if (!ifindex) 1285 continue; 1286 if (!if_indextoname(ifindex, ifname)) 1287 continue; 1288 /* Try to read bonding slave names from sysfs. */ 1289 MKSTR(slaves, 1290 "/sys/class/net/%s/master/bonding/slaves", ifname); 1291 file = fopen(slaves, "r"); 1292 if (file) 1293 break; 1294 } 1295 if (!file) 1296 return -1; 1297 /* Use safe format to check maximal buffer length. */ 1298 MLX5_ASSERT(atol(RTE_STR(IF_NAMESIZE)) == IF_NAMESIZE); 1299 while (fscanf(file, "%" RTE_STR(IF_NAMESIZE) "s", ifname) == 1) { 1300 char tmp_str[IF_NAMESIZE + 32]; 1301 struct rte_pci_addr pci_addr; 1302 struct mlx5_switch_info info; 1303 1304 /* Process slave interface names in the loop. */ 1305 snprintf(tmp_str, sizeof(tmp_str), 1306 "/sys/class/net/%s", ifname); 1307 if (mlx5_dev_to_pci_addr(tmp_str, &pci_addr)) { 1308 DRV_LOG(WARNING, "can not get PCI address" 1309 " for netdev \"%s\"", ifname); 1310 continue; 1311 } 1312 if (pci_dev->addr.domain != pci_addr.domain || 1313 pci_dev->addr.bus != pci_addr.bus || 1314 pci_dev->addr.devid != pci_addr.devid || 1315 pci_dev->addr.function != pci_addr.function) 1316 continue; 1317 /* Slave interface PCI address match found. */ 1318 fclose(file); 1319 snprintf(tmp_str, sizeof(tmp_str), 1320 "/sys/class/net/%s/phys_port_name", ifname); 1321 file = fopen(tmp_str, "rb"); 1322 if (!file) 1323 break; 1324 info.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET; 1325 if (fscanf(file, "%32s", tmp_str) == 1) 1326 mlx5_translate_port_name(tmp_str, &info); 1327 if (info.name_type == MLX5_PHYS_PORT_NAME_TYPE_LEGACY || 1328 info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) 1329 pf = info.port_name; 1330 break; 1331 } 1332 if (file) 1333 fclose(file); 1334 return pf; 1335 } 1336 1337 /** 1338 * DPDK callback to register a PCI device. 1339 * 1340 * This function spawns Ethernet devices out of a given PCI device. 1341 * 1342 * @param[in] pci_drv 1343 * PCI driver structure (mlx5_driver). 1344 * @param[in] pci_dev 1345 * PCI device information. 1346 * 1347 * @return 1348 * 0 on success, a negative errno value otherwise and rte_errno is set. 1349 */ 1350 int 1351 mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 1352 struct rte_pci_device *pci_dev) 1353 { 1354 struct ibv_device **ibv_list; 1355 /* 1356 * Number of found IB Devices matching with requested PCI BDF. 1357 * nd != 1 means there are multiple IB devices over the same 1358 * PCI device and we have representors and master. 1359 */ 1360 unsigned int nd = 0; 1361 /* 1362 * Number of found IB device Ports. nd = 1 and np = 1..n means 1363 * we have the single multiport IB device, and there may be 1364 * representors attached to some of found ports. 1365 */ 1366 unsigned int np = 0; 1367 /* 1368 * Number of DPDK ethernet devices to Spawn - either over 1369 * multiple IB devices or multiple ports of single IB device. 1370 * Actually this is the number of iterations to spawn. 1371 */ 1372 unsigned int ns = 0; 1373 /* 1374 * Bonding device 1375 * < 0 - no bonding device (single one) 1376 * >= 0 - bonding device (value is slave PF index) 1377 */ 1378 int bd = -1; 1379 struct mlx5_dev_spawn_data *list = NULL; 1380 struct mlx5_dev_config dev_config; 1381 int ret; 1382 1383 if (mlx5_class_get(pci_dev->device.devargs) != MLX5_CLASS_NET) { 1384 DRV_LOG(DEBUG, "Skip probing - should be probed by other mlx5" 1385 " driver."); 1386 return 1; 1387 } 1388 if (rte_eal_process_type() == RTE_PROC_PRIMARY) 1389 mlx5_pmd_socket_init(); 1390 ret = mlx5_init_once(); 1391 if (ret) { 1392 DRV_LOG(ERR, "unable to init PMD global data: %s", 1393 strerror(rte_errno)); 1394 return -rte_errno; 1395 } 1396 MLX5_ASSERT(pci_drv == &mlx5_driver); 1397 errno = 0; 1398 ibv_list = mlx5_glue->get_device_list(&ret); 1399 if (!ibv_list) { 1400 rte_errno = errno ? errno : ENOSYS; 1401 DRV_LOG(ERR, "cannot list devices, is ib_uverbs loaded?"); 1402 return -rte_errno; 1403 } 1404 /* 1405 * First scan the list of all Infiniband devices to find 1406 * matching ones, gathering into the list. 1407 */ 1408 struct ibv_device *ibv_match[ret + 1]; 1409 int nl_route = mlx5_nl_init(NETLINK_ROUTE); 1410 int nl_rdma = mlx5_nl_init(NETLINK_RDMA); 1411 unsigned int i; 1412 1413 while (ret-- > 0) { 1414 struct rte_pci_addr pci_addr; 1415 1416 DRV_LOG(DEBUG, "checking device \"%s\"", ibv_list[ret]->name); 1417 bd = mlx5_device_bond_pci_match 1418 (ibv_list[ret], pci_dev, nl_rdma); 1419 if (bd >= 0) { 1420 /* 1421 * Bonding device detected. Only one match is allowed, 1422 * the bonding is supported over multi-port IB device, 1423 * there should be no matches on representor PCI 1424 * functions or non VF LAG bonding devices with 1425 * specified address. 1426 */ 1427 if (nd) { 1428 DRV_LOG(ERR, 1429 "multiple PCI match on bonding device" 1430 "\"%s\" found", ibv_list[ret]->name); 1431 rte_errno = ENOENT; 1432 ret = -rte_errno; 1433 goto exit; 1434 } 1435 DRV_LOG(INFO, "PCI information matches for" 1436 " slave %d bonding device \"%s\"", 1437 bd, ibv_list[ret]->name); 1438 ibv_match[nd++] = ibv_list[ret]; 1439 break; 1440 } 1441 if (mlx5_dev_to_pci_addr 1442 (ibv_list[ret]->ibdev_path, &pci_addr)) 1443 continue; 1444 if (pci_dev->addr.domain != pci_addr.domain || 1445 pci_dev->addr.bus != pci_addr.bus || 1446 pci_dev->addr.devid != pci_addr.devid || 1447 pci_dev->addr.function != pci_addr.function) 1448 continue; 1449 DRV_LOG(INFO, "PCI information matches for device \"%s\"", 1450 ibv_list[ret]->name); 1451 ibv_match[nd++] = ibv_list[ret]; 1452 } 1453 ibv_match[nd] = NULL; 1454 if (!nd) { 1455 /* No device matches, just complain and bail out. */ 1456 DRV_LOG(WARNING, 1457 "no Verbs device matches PCI device " PCI_PRI_FMT "," 1458 " are kernel drivers loaded?", 1459 pci_dev->addr.domain, pci_dev->addr.bus, 1460 pci_dev->addr.devid, pci_dev->addr.function); 1461 rte_errno = ENOENT; 1462 ret = -rte_errno; 1463 goto exit; 1464 } 1465 if (nd == 1) { 1466 /* 1467 * Found single matching device may have multiple ports. 1468 * Each port may be representor, we have to check the port 1469 * number and check the representors existence. 1470 */ 1471 if (nl_rdma >= 0) 1472 np = mlx5_nl_portnum(nl_rdma, ibv_match[0]->name); 1473 if (!np) 1474 DRV_LOG(WARNING, "can not get IB device \"%s\"" 1475 " ports number", ibv_match[0]->name); 1476 if (bd >= 0 && !np) { 1477 DRV_LOG(ERR, "can not get ports" 1478 " for bonding device"); 1479 rte_errno = ENOENT; 1480 ret = -rte_errno; 1481 goto exit; 1482 } 1483 } 1484 #ifndef HAVE_MLX5DV_DR_DEVX_PORT 1485 if (bd >= 0) { 1486 /* 1487 * This may happen if there is VF LAG kernel support and 1488 * application is compiled with older rdma_core library. 1489 */ 1490 DRV_LOG(ERR, 1491 "No kernel/verbs support for VF LAG bonding found."); 1492 rte_errno = ENOTSUP; 1493 ret = -rte_errno; 1494 goto exit; 1495 } 1496 #endif 1497 /* 1498 * Now we can determine the maximal 1499 * amount of devices to be spawned. 1500 */ 1501 list = rte_zmalloc("device spawn data", 1502 sizeof(struct mlx5_dev_spawn_data) * 1503 (np ? np : nd), 1504 RTE_CACHE_LINE_SIZE); 1505 if (!list) { 1506 DRV_LOG(ERR, "spawn data array allocation failure"); 1507 rte_errno = ENOMEM; 1508 ret = -rte_errno; 1509 goto exit; 1510 } 1511 if (bd >= 0 || np > 1) { 1512 /* 1513 * Single IB device with multiple ports found, 1514 * it may be E-Switch master device and representors. 1515 * We have to perform identification through the ports. 1516 */ 1517 MLX5_ASSERT(nl_rdma >= 0); 1518 MLX5_ASSERT(ns == 0); 1519 MLX5_ASSERT(nd == 1); 1520 MLX5_ASSERT(np); 1521 for (i = 1; i <= np; ++i) { 1522 list[ns].max_port = np; 1523 list[ns].phys_port = i; 1524 list[ns].phys_dev = ibv_match[0]; 1525 list[ns].eth_dev = NULL; 1526 list[ns].pci_dev = pci_dev; 1527 list[ns].pf_bond = bd; 1528 list[ns].ifindex = mlx5_nl_ifindex 1529 (nl_rdma, 1530 mlx5_os_get_dev_device_name 1531 (list[ns].phys_dev), i); 1532 if (!list[ns].ifindex) { 1533 /* 1534 * No network interface index found for the 1535 * specified port, it means there is no 1536 * representor on this port. It's OK, 1537 * there can be disabled ports, for example 1538 * if sriov_numvfs < sriov_totalvfs. 1539 */ 1540 continue; 1541 } 1542 ret = -1; 1543 if (nl_route >= 0) 1544 ret = mlx5_nl_switch_info 1545 (nl_route, 1546 list[ns].ifindex, 1547 &list[ns].info); 1548 if (ret || (!list[ns].info.representor && 1549 !list[ns].info.master)) { 1550 /* 1551 * We failed to recognize representors with 1552 * Netlink, let's try to perform the task 1553 * with sysfs. 1554 */ 1555 ret = mlx5_sysfs_switch_info 1556 (list[ns].ifindex, 1557 &list[ns].info); 1558 } 1559 if (!ret && bd >= 0) { 1560 switch (list[ns].info.name_type) { 1561 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1562 if (list[ns].info.port_name == bd) 1563 ns++; 1564 break; 1565 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1566 if (list[ns].info.pf_num == bd) 1567 ns++; 1568 break; 1569 default: 1570 break; 1571 } 1572 continue; 1573 } 1574 if (!ret && (list[ns].info.representor ^ 1575 list[ns].info.master)) 1576 ns++; 1577 } 1578 if (!ns) { 1579 DRV_LOG(ERR, 1580 "unable to recognize master/representors" 1581 " on the IB device with multiple ports"); 1582 rte_errno = ENOENT; 1583 ret = -rte_errno; 1584 goto exit; 1585 } 1586 } else { 1587 /* 1588 * The existence of several matching entries (nd > 1) means 1589 * port representors have been instantiated. No existing Verbs 1590 * call nor sysfs entries can tell them apart, this can only 1591 * be done through Netlink calls assuming kernel drivers are 1592 * recent enough to support them. 1593 * 1594 * In the event of identification failure through Netlink, 1595 * try again through sysfs, then: 1596 * 1597 * 1. A single IB device matches (nd == 1) with single 1598 * port (np=0/1) and is not a representor, assume 1599 * no switch support. 1600 * 1601 * 2. Otherwise no safe assumptions can be made; 1602 * complain louder and bail out. 1603 */ 1604 for (i = 0; i != nd; ++i) { 1605 memset(&list[ns].info, 0, sizeof(list[ns].info)); 1606 list[ns].max_port = 1; 1607 list[ns].phys_port = 1; 1608 list[ns].phys_dev = ibv_match[i]; 1609 list[ns].eth_dev = NULL; 1610 list[ns].pci_dev = pci_dev; 1611 list[ns].pf_bond = -1; 1612 list[ns].ifindex = 0; 1613 if (nl_rdma >= 0) 1614 list[ns].ifindex = mlx5_nl_ifindex 1615 (nl_rdma, 1616 mlx5_os_get_dev_device_name 1617 (list[ns].phys_dev), 1); 1618 if (!list[ns].ifindex) { 1619 char ifname[IF_NAMESIZE]; 1620 1621 /* 1622 * Netlink failed, it may happen with old 1623 * ib_core kernel driver (before 4.16). 1624 * We can assume there is old driver because 1625 * here we are processing single ports IB 1626 * devices. Let's try sysfs to retrieve 1627 * the ifindex. The method works for 1628 * master device only. 1629 */ 1630 if (nd > 1) { 1631 /* 1632 * Multiple devices found, assume 1633 * representors, can not distinguish 1634 * master/representor and retrieve 1635 * ifindex via sysfs. 1636 */ 1637 continue; 1638 } 1639 ret = mlx5_get_master_ifname 1640 (ibv_match[i]->ibdev_path, &ifname); 1641 if (!ret) 1642 list[ns].ifindex = 1643 if_nametoindex(ifname); 1644 if (!list[ns].ifindex) { 1645 /* 1646 * No network interface index found 1647 * for the specified device, it means 1648 * there it is neither representor 1649 * nor master. 1650 */ 1651 continue; 1652 } 1653 } 1654 ret = -1; 1655 if (nl_route >= 0) 1656 ret = mlx5_nl_switch_info 1657 (nl_route, 1658 list[ns].ifindex, 1659 &list[ns].info); 1660 if (ret || (!list[ns].info.representor && 1661 !list[ns].info.master)) { 1662 /* 1663 * We failed to recognize representors with 1664 * Netlink, let's try to perform the task 1665 * with sysfs. 1666 */ 1667 ret = mlx5_sysfs_switch_info 1668 (list[ns].ifindex, 1669 &list[ns].info); 1670 } 1671 if (!ret && (list[ns].info.representor ^ 1672 list[ns].info.master)) { 1673 ns++; 1674 } else if ((nd == 1) && 1675 !list[ns].info.representor && 1676 !list[ns].info.master) { 1677 /* 1678 * Single IB device with 1679 * one physical port and 1680 * attached network device. 1681 * May be SRIOV is not enabled 1682 * or there is no representors. 1683 */ 1684 DRV_LOG(INFO, "no E-Switch support detected"); 1685 ns++; 1686 break; 1687 } 1688 } 1689 if (!ns) { 1690 DRV_LOG(ERR, 1691 "unable to recognize master/representors" 1692 " on the multiple IB devices"); 1693 rte_errno = ENOENT; 1694 ret = -rte_errno; 1695 goto exit; 1696 } 1697 } 1698 MLX5_ASSERT(ns); 1699 /* 1700 * Sort list to probe devices in natural order for users convenience 1701 * (i.e. master first, then representors from lowest to highest ID). 1702 */ 1703 qsort(list, ns, sizeof(*list), mlx5_dev_spawn_data_cmp); 1704 /* Default configuration. */ 1705 dev_config = (struct mlx5_dev_config){ 1706 .hw_padding = 0, 1707 .mps = MLX5_ARG_UNSET, 1708 .dbnc = MLX5_ARG_UNSET, 1709 .rx_vec_en = 1, 1710 .txq_inline_max = MLX5_ARG_UNSET, 1711 .txq_inline_min = MLX5_ARG_UNSET, 1712 .txq_inline_mpw = MLX5_ARG_UNSET, 1713 .txqs_inline = MLX5_ARG_UNSET, 1714 .vf_nl_en = 1, 1715 .mr_ext_memseg_en = 1, 1716 .mprq = { 1717 .enabled = 0, /* Disabled by default. */ 1718 .stride_num_n = 0, 1719 .stride_size_n = 0, 1720 .max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN, 1721 .min_rxqs_num = MLX5_MPRQ_MIN_RXQS, 1722 }, 1723 .dv_esw_en = 1, 1724 .dv_flow_en = 1, 1725 .log_hp_size = MLX5_ARG_UNSET, 1726 }; 1727 /* Device specific configuration. */ 1728 switch (pci_dev->id.device_id) { 1729 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 1730 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF: 1731 case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: 1732 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: 1733 case PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF: 1734 case PCI_DEVICE_ID_MELLANOX_CONNECTX6VF: 1735 case PCI_DEVICE_ID_MELLANOX_CONNECTX6DXVF: 1736 dev_config.vf = 1; 1737 break; 1738 default: 1739 break; 1740 } 1741 for (i = 0; i != ns; ++i) { 1742 uint32_t restore; 1743 1744 list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device, 1745 &list[i], 1746 dev_config); 1747 if (!list[i].eth_dev) { 1748 if (rte_errno != EBUSY && rte_errno != EEXIST) 1749 break; 1750 /* Device is disabled or already spawned. Ignore it. */ 1751 continue; 1752 } 1753 restore = list[i].eth_dev->data->dev_flags; 1754 rte_eth_copy_pci_info(list[i].eth_dev, pci_dev); 1755 /* Restore non-PCI flags cleared by the above call. */ 1756 list[i].eth_dev->data->dev_flags |= restore; 1757 rte_eth_dev_probing_finish(list[i].eth_dev); 1758 } 1759 if (i != ns) { 1760 DRV_LOG(ERR, 1761 "probe of PCI device " PCI_PRI_FMT " aborted after" 1762 " encountering an error: %s", 1763 pci_dev->addr.domain, pci_dev->addr.bus, 1764 pci_dev->addr.devid, pci_dev->addr.function, 1765 strerror(rte_errno)); 1766 ret = -rte_errno; 1767 /* Roll back. */ 1768 while (i--) { 1769 if (!list[i].eth_dev) 1770 continue; 1771 mlx5_dev_close(list[i].eth_dev); 1772 /* mac_addrs must not be freed because in dev_private */ 1773 list[i].eth_dev->data->mac_addrs = NULL; 1774 claim_zero(rte_eth_dev_release_port(list[i].eth_dev)); 1775 } 1776 /* Restore original error. */ 1777 rte_errno = -ret; 1778 } else { 1779 ret = 0; 1780 } 1781 exit: 1782 /* 1783 * Do the routine cleanup: 1784 * - close opened Netlink sockets 1785 * - free allocated spawn data array 1786 * - free the Infiniband device list 1787 */ 1788 if (nl_rdma >= 0) 1789 close(nl_rdma); 1790 if (nl_route >= 0) 1791 close(nl_route); 1792 if (list) 1793 rte_free(list); 1794 MLX5_ASSERT(ibv_list); 1795 mlx5_glue->free_device_list(ibv_list); 1796 return ret; 1797 } 1798 1799 static int 1800 mlx5_config_doorbell_mapping_env(const struct mlx5_dev_config *config) 1801 { 1802 char *env; 1803 int value; 1804 1805 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 1806 /* Get environment variable to store. */ 1807 env = getenv(MLX5_SHUT_UP_BF); 1808 value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET; 1809 if (config->dbnc == MLX5_ARG_UNSET) 1810 setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1); 1811 else 1812 setenv(MLX5_SHUT_UP_BF, 1813 config->dbnc == MLX5_TXDB_NCACHED ? "1" : "0", 1); 1814 return value; 1815 } 1816 1817 static void 1818 mlx5_restore_doorbell_mapping_env(int value) 1819 { 1820 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 1821 /* Restore the original environment variable state. */ 1822 if (value == MLX5_ARG_UNSET) 1823 unsetenv(MLX5_SHUT_UP_BF); 1824 else 1825 setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1); 1826 } 1827 1828 /** 1829 * Extract pdn of PD object using DV API. 1830 * 1831 * @param[in] pd 1832 * Pointer to the verbs PD object. 1833 * @param[out] pdn 1834 * Pointer to the PD object number variable. 1835 * 1836 * @return 1837 * 0 on success, error value otherwise. 1838 */ 1839 int 1840 mlx5_os_get_pdn(void *pd, uint32_t *pdn) 1841 { 1842 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1843 struct mlx5dv_obj obj; 1844 struct mlx5dv_pd pd_info; 1845 int ret = 0; 1846 1847 obj.pd.in = pd; 1848 obj.pd.out = &pd_info; 1849 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD); 1850 if (ret) { 1851 DRV_LOG(DEBUG, "Fail to get PD object info"); 1852 return ret; 1853 } 1854 *pdn = pd_info.pdn; 1855 return 0; 1856 #else 1857 (void)pd; 1858 (void)pdn; 1859 return -ENOTSUP; 1860 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */ 1861 } 1862 1863 /** 1864 * Function API to open IB device. 1865 * 1866 * This function calls the Linux glue APIs to open a device. 1867 * 1868 * @param[in] spawn 1869 * Pointer to the IB device attributes (name, port, etc). 1870 * @param[out] config 1871 * Pointer to device configuration structure. 1872 * @param[out] sh 1873 * Pointer to shared context structure. 1874 * 1875 * @return 1876 * 0 on success, a positive error value otherwise. 1877 */ 1878 int 1879 mlx5_os_open_device(const struct mlx5_dev_spawn_data *spawn, 1880 const struct mlx5_dev_config *config, 1881 struct mlx5_dev_ctx_shared *sh) 1882 { 1883 int dbmap_env; 1884 int err = 0; 1885 /* 1886 * Configure environment variable "MLX5_BF_SHUT_UP" 1887 * before the device creation. The rdma_core library 1888 * checks the variable at device creation and 1889 * stores the result internally. 1890 */ 1891 dbmap_env = mlx5_config_doorbell_mapping_env(config); 1892 /* Try to open IB device with DV first, then usual Verbs. */ 1893 errno = 0; 1894 sh->ctx = mlx5_glue->dv_open_device(spawn->phys_dev); 1895 if (sh->ctx) { 1896 sh->devx = 1; 1897 DRV_LOG(DEBUG, "DevX is supported"); 1898 /* The device is created, no need for environment. */ 1899 mlx5_restore_doorbell_mapping_env(dbmap_env); 1900 } else { 1901 /* The environment variable is still configured. */ 1902 sh->ctx = mlx5_glue->open_device(spawn->phys_dev); 1903 err = errno ? errno : ENODEV; 1904 /* 1905 * The environment variable is not needed anymore, 1906 * all device creation attempts are completed. 1907 */ 1908 mlx5_restore_doorbell_mapping_env(dbmap_env); 1909 if (!sh->ctx) 1910 return err; 1911 DRV_LOG(DEBUG, "DevX is NOT supported"); 1912 err = 0; 1913 } 1914 return err; 1915 } 1916 1917 /** 1918 * Install shared asynchronous device events handler. 1919 * This function is implemented to support event sharing 1920 * between multiple ports of single IB device. 1921 * 1922 * @param sh 1923 * Pointer to mlx5_dev_ctx_shared object. 1924 */ 1925 void 1926 mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh) 1927 { 1928 int ret; 1929 int flags; 1930 1931 sh->intr_handle.fd = -1; 1932 flags = fcntl(((struct ibv_context *)sh->ctx)->async_fd, F_GETFL); 1933 ret = fcntl(((struct ibv_context *)sh->ctx)->async_fd, 1934 F_SETFL, flags | O_NONBLOCK); 1935 if (ret) { 1936 DRV_LOG(INFO, "failed to change file descriptor async event" 1937 " queue"); 1938 } else { 1939 sh->intr_handle.fd = ((struct ibv_context *)sh->ctx)->async_fd; 1940 sh->intr_handle.type = RTE_INTR_HANDLE_EXT; 1941 if (rte_intr_callback_register(&sh->intr_handle, 1942 mlx5_dev_interrupt_handler, sh)) { 1943 DRV_LOG(INFO, "Fail to install the shared interrupt."); 1944 sh->intr_handle.fd = -1; 1945 } 1946 } 1947 if (sh->devx) { 1948 #ifdef HAVE_IBV_DEVX_ASYNC 1949 sh->intr_handle_devx.fd = -1; 1950 sh->devx_comp = 1951 (void *)mlx5_glue->devx_create_cmd_comp(sh->ctx); 1952 struct mlx5dv_devx_cmd_comp *devx_comp = sh->devx_comp; 1953 if (!devx_comp) { 1954 DRV_LOG(INFO, "failed to allocate devx_comp."); 1955 return; 1956 } 1957 flags = fcntl(devx_comp->fd, F_GETFL); 1958 ret = fcntl(devx_comp->fd, F_SETFL, flags | O_NONBLOCK); 1959 if (ret) { 1960 DRV_LOG(INFO, "failed to change file descriptor" 1961 " devx comp"); 1962 return; 1963 } 1964 sh->intr_handle_devx.fd = devx_comp->fd; 1965 sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT; 1966 if (rte_intr_callback_register(&sh->intr_handle_devx, 1967 mlx5_dev_interrupt_handler_devx, sh)) { 1968 DRV_LOG(INFO, "Fail to install the devx shared" 1969 " interrupt."); 1970 sh->intr_handle_devx.fd = -1; 1971 } 1972 #endif /* HAVE_IBV_DEVX_ASYNC */ 1973 } 1974 } 1975 1976 /** 1977 * Uninstall shared asynchronous device events handler. 1978 * This function is implemented to support event sharing 1979 * between multiple ports of single IB device. 1980 * 1981 * @param dev 1982 * Pointer to mlx5_dev_ctx_shared object. 1983 */ 1984 void 1985 mlx5_os_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh) 1986 { 1987 if (sh->intr_handle.fd >= 0) 1988 mlx5_intr_callback_unregister(&sh->intr_handle, 1989 mlx5_dev_interrupt_handler, sh); 1990 #ifdef HAVE_IBV_DEVX_ASYNC 1991 if (sh->intr_handle_devx.fd >= 0) 1992 rte_intr_callback_unregister(&sh->intr_handle_devx, 1993 mlx5_dev_interrupt_handler_devx, sh); 1994 if (sh->devx_comp) 1995 mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp); 1996 #endif 1997 } 1998 1999 /** 2000 * Read statistics by a named counter. 2001 * 2002 * @param[in] priv 2003 * Pointer to the private device data structure. 2004 * @param[in] ctr_name 2005 * Pointer to the name of the statistic counter to read 2006 * @param[out] stat 2007 * Pointer to read statistic value. 2008 * @return 2009 * 0 on success and stat is valud, 1 if failed to read the value 2010 * rte_errno is set. 2011 * 2012 */ 2013 int 2014 mlx5_os_read_dev_stat(struct mlx5_priv *priv, const char *ctr_name, 2015 uint64_t *stat) 2016 { 2017 int fd; 2018 2019 if (priv->sh) { 2020 MKSTR(path, "%s/ports/%d/hw_counters/%s", 2021 priv->sh->ibdev_path, 2022 priv->dev_port, 2023 ctr_name); 2024 fd = open(path, O_RDONLY); 2025 if (fd != -1) { 2026 char buf[21] = {'\0'}; 2027 ssize_t n = read(fd, buf, sizeof(buf)); 2028 2029 close(fd); 2030 if (n != -1) { 2031 *stat = strtoull(buf, NULL, 10); 2032 return 0; 2033 } 2034 } 2035 } 2036 *stat = 0; 2037 return 1; 2038 } 2039 2040 /** 2041 * Read device counters table. 2042 * 2043 * @param dev 2044 * Pointer to Ethernet device. 2045 * @param[out] stats 2046 * Counters table output buffer. 2047 * 2048 * @return 2049 * 0 on success and stats is filled, negative errno value otherwise and 2050 * rte_errno is set. 2051 */ 2052 int 2053 mlx5_os_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats) 2054 { 2055 struct mlx5_priv *priv = dev->data->dev_private; 2056 struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; 2057 unsigned int i; 2058 struct ifreq ifr; 2059 unsigned int stats_sz = xstats_ctrl->stats_n * sizeof(uint64_t); 2060 unsigned char et_stat_buf[sizeof(struct ethtool_stats) + stats_sz]; 2061 struct ethtool_stats *et_stats = (struct ethtool_stats *)et_stat_buf; 2062 int ret; 2063 2064 et_stats->cmd = ETHTOOL_GSTATS; 2065 et_stats->n_stats = xstats_ctrl->stats_n; 2066 ifr.ifr_data = (caddr_t)et_stats; 2067 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 2068 if (ret) { 2069 DRV_LOG(WARNING, 2070 "port %u unable to read statistic values from device", 2071 dev->data->port_id); 2072 return ret; 2073 } 2074 for (i = 0; i != xstats_ctrl->mlx5_stats_n; ++i) { 2075 if (xstats_ctrl->info[i].dev) { 2076 ret = mlx5_os_read_dev_stat(priv, 2077 xstats_ctrl->info[i].ctr_name, 2078 &stats[i]); 2079 /* return last xstats counter if fail to read. */ 2080 if (ret == 0) 2081 xstats_ctrl->xstats[i] = stats[i]; 2082 else 2083 stats[i] = xstats_ctrl->xstats[i]; 2084 } else { 2085 stats[i] = (uint64_t) 2086 et_stats->data[xstats_ctrl->dev_table_idx[i]]; 2087 } 2088 } 2089 return 0; 2090 } 2091 2092 /** 2093 * Query the number of statistics provided by ETHTOOL. 2094 * 2095 * @param dev 2096 * Pointer to Ethernet device. 2097 * 2098 * @return 2099 * Number of statistics on success, negative errno value otherwise and 2100 * rte_errno is set. 2101 */ 2102 int 2103 mlx5_os_get_stats_n(struct rte_eth_dev *dev) 2104 { 2105 struct ethtool_drvinfo drvinfo; 2106 struct ifreq ifr; 2107 int ret; 2108 2109 drvinfo.cmd = ETHTOOL_GDRVINFO; 2110 ifr.ifr_data = (caddr_t)&drvinfo; 2111 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 2112 if (ret) { 2113 DRV_LOG(WARNING, "port %u unable to query number of statistics", 2114 dev->data->port_id); 2115 return ret; 2116 } 2117 return drvinfo.n_stats; 2118 } 2119 2120 static const struct mlx5_counter_ctrl mlx5_counters_init[] = { 2121 { 2122 .dpdk_name = "rx_port_unicast_bytes", 2123 .ctr_name = "rx_vport_unicast_bytes", 2124 }, 2125 { 2126 .dpdk_name = "rx_port_multicast_bytes", 2127 .ctr_name = "rx_vport_multicast_bytes", 2128 }, 2129 { 2130 .dpdk_name = "rx_port_broadcast_bytes", 2131 .ctr_name = "rx_vport_broadcast_bytes", 2132 }, 2133 { 2134 .dpdk_name = "rx_port_unicast_packets", 2135 .ctr_name = "rx_vport_unicast_packets", 2136 }, 2137 { 2138 .dpdk_name = "rx_port_multicast_packets", 2139 .ctr_name = "rx_vport_multicast_packets", 2140 }, 2141 { 2142 .dpdk_name = "rx_port_broadcast_packets", 2143 .ctr_name = "rx_vport_broadcast_packets", 2144 }, 2145 { 2146 .dpdk_name = "tx_port_unicast_bytes", 2147 .ctr_name = "tx_vport_unicast_bytes", 2148 }, 2149 { 2150 .dpdk_name = "tx_port_multicast_bytes", 2151 .ctr_name = "tx_vport_multicast_bytes", 2152 }, 2153 { 2154 .dpdk_name = "tx_port_broadcast_bytes", 2155 .ctr_name = "tx_vport_broadcast_bytes", 2156 }, 2157 { 2158 .dpdk_name = "tx_port_unicast_packets", 2159 .ctr_name = "tx_vport_unicast_packets", 2160 }, 2161 { 2162 .dpdk_name = "tx_port_multicast_packets", 2163 .ctr_name = "tx_vport_multicast_packets", 2164 }, 2165 { 2166 .dpdk_name = "tx_port_broadcast_packets", 2167 .ctr_name = "tx_vport_broadcast_packets", 2168 }, 2169 { 2170 .dpdk_name = "rx_wqe_err", 2171 .ctr_name = "rx_wqe_err", 2172 }, 2173 { 2174 .dpdk_name = "rx_crc_errors_phy", 2175 .ctr_name = "rx_crc_errors_phy", 2176 }, 2177 { 2178 .dpdk_name = "rx_in_range_len_errors_phy", 2179 .ctr_name = "rx_in_range_len_errors_phy", 2180 }, 2181 { 2182 .dpdk_name = "rx_symbol_err_phy", 2183 .ctr_name = "rx_symbol_err_phy", 2184 }, 2185 { 2186 .dpdk_name = "tx_errors_phy", 2187 .ctr_name = "tx_errors_phy", 2188 }, 2189 { 2190 .dpdk_name = "rx_out_of_buffer", 2191 .ctr_name = "out_of_buffer", 2192 .dev = 1, 2193 }, 2194 { 2195 .dpdk_name = "tx_packets_phy", 2196 .ctr_name = "tx_packets_phy", 2197 }, 2198 { 2199 .dpdk_name = "rx_packets_phy", 2200 .ctr_name = "rx_packets_phy", 2201 }, 2202 { 2203 .dpdk_name = "tx_discards_phy", 2204 .ctr_name = "tx_discards_phy", 2205 }, 2206 { 2207 .dpdk_name = "rx_discards_phy", 2208 .ctr_name = "rx_discards_phy", 2209 }, 2210 { 2211 .dpdk_name = "tx_bytes_phy", 2212 .ctr_name = "tx_bytes_phy", 2213 }, 2214 { 2215 .dpdk_name = "rx_bytes_phy", 2216 .ctr_name = "rx_bytes_phy", 2217 }, 2218 /* Representor only */ 2219 { 2220 .dpdk_name = "rx_packets", 2221 .ctr_name = "vport_rx_packets", 2222 }, 2223 { 2224 .dpdk_name = "rx_bytes", 2225 .ctr_name = "vport_rx_bytes", 2226 }, 2227 { 2228 .dpdk_name = "tx_packets", 2229 .ctr_name = "vport_tx_packets", 2230 }, 2231 { 2232 .dpdk_name = "tx_bytes", 2233 .ctr_name = "vport_tx_bytes", 2234 }, 2235 }; 2236 2237 static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init); 2238 2239 /** 2240 * Init the structures to read device counters. 2241 * 2242 * @param dev 2243 * Pointer to Ethernet device. 2244 */ 2245 void 2246 mlx5_os_stats_init(struct rte_eth_dev *dev) 2247 { 2248 struct mlx5_priv *priv = dev->data->dev_private; 2249 struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; 2250 struct mlx5_stats_ctrl *stats_ctrl = &priv->stats_ctrl; 2251 unsigned int i; 2252 unsigned int j; 2253 struct ifreq ifr; 2254 struct ethtool_gstrings *strings = NULL; 2255 unsigned int dev_stats_n; 2256 unsigned int str_sz; 2257 int ret; 2258 2259 /* So that it won't aggregate for each init. */ 2260 xstats_ctrl->mlx5_stats_n = 0; 2261 ret = mlx5_os_get_stats_n(dev); 2262 if (ret < 0) { 2263 DRV_LOG(WARNING, "port %u no extended statistics available", 2264 dev->data->port_id); 2265 return; 2266 } 2267 dev_stats_n = ret; 2268 /* Allocate memory to grab stat names and values. */ 2269 str_sz = dev_stats_n * ETH_GSTRING_LEN; 2270 strings = (struct ethtool_gstrings *) 2271 rte_malloc("xstats_strings", 2272 str_sz + sizeof(struct ethtool_gstrings), 0); 2273 if (!strings) { 2274 DRV_LOG(WARNING, "port %u unable to allocate memory for xstats", 2275 dev->data->port_id); 2276 return; 2277 } 2278 strings->cmd = ETHTOOL_GSTRINGS; 2279 strings->string_set = ETH_SS_STATS; 2280 strings->len = dev_stats_n; 2281 ifr.ifr_data = (caddr_t)strings; 2282 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 2283 if (ret) { 2284 DRV_LOG(WARNING, "port %u unable to get statistic names", 2285 dev->data->port_id); 2286 goto free; 2287 } 2288 for (i = 0; i != dev_stats_n; ++i) { 2289 const char *curr_string = (const char *) 2290 &strings->data[i * ETH_GSTRING_LEN]; 2291 2292 for (j = 0; j != xstats_n; ++j) { 2293 if (!strcmp(mlx5_counters_init[j].ctr_name, 2294 curr_string)) { 2295 unsigned int idx = xstats_ctrl->mlx5_stats_n++; 2296 2297 xstats_ctrl->dev_table_idx[idx] = i; 2298 xstats_ctrl->info[idx] = mlx5_counters_init[j]; 2299 break; 2300 } 2301 } 2302 } 2303 /* Add dev counters. */ 2304 for (i = 0; i != xstats_n; ++i) { 2305 if (mlx5_counters_init[i].dev) { 2306 unsigned int idx = xstats_ctrl->mlx5_stats_n++; 2307 2308 xstats_ctrl->info[idx] = mlx5_counters_init[i]; 2309 xstats_ctrl->hw_stats[idx] = 0; 2310 } 2311 } 2312 MLX5_ASSERT(xstats_ctrl->mlx5_stats_n <= MLX5_MAX_XSTATS); 2313 xstats_ctrl->stats_n = dev_stats_n; 2314 /* Copy to base at first time. */ 2315 ret = mlx5_os_read_dev_counters(dev, xstats_ctrl->base); 2316 if (ret) 2317 DRV_LOG(ERR, "port %u cannot read device counters: %s", 2318 dev->data->port_id, strerror(rte_errno)); 2319 mlx5_os_read_dev_stat(priv, "out_of_buffer", &stats_ctrl->imissed_base); 2320 stats_ctrl->imissed = 0; 2321 free: 2322 rte_free(strings); 2323 } 2324 2325 /** 2326 * Set the reg_mr and dereg_mr call backs 2327 * 2328 * @param reg_mr_cb[out] 2329 * Pointer to reg_mr func 2330 * @param dereg_mr_cb[out] 2331 * Pointer to dereg_mr func 2332 * 2333 */ 2334 void 2335 mlx5_os_set_reg_mr_cb(mlx5_reg_mr_t *reg_mr_cb, 2336 mlx5_dereg_mr_t *dereg_mr_cb) 2337 { 2338 *reg_mr_cb = mlx5_common_verbs_reg_mr; 2339 *dereg_mr_cb = mlx5_common_verbs_dereg_mr; 2340 } 2341 2342 const struct eth_dev_ops mlx5_os_dev_ops = { 2343 .dev_configure = mlx5_dev_configure, 2344 .dev_start = mlx5_dev_start, 2345 .dev_stop = mlx5_dev_stop, 2346 .dev_set_link_down = mlx5_set_link_down, 2347 .dev_set_link_up = mlx5_set_link_up, 2348 .dev_close = mlx5_dev_close, 2349 .promiscuous_enable = mlx5_promiscuous_enable, 2350 .promiscuous_disable = mlx5_promiscuous_disable, 2351 .allmulticast_enable = mlx5_allmulticast_enable, 2352 .allmulticast_disable = mlx5_allmulticast_disable, 2353 .link_update = mlx5_link_update, 2354 .stats_get = mlx5_stats_get, 2355 .stats_reset = mlx5_stats_reset, 2356 .xstats_get = mlx5_xstats_get, 2357 .xstats_reset = mlx5_xstats_reset, 2358 .xstats_get_names = mlx5_xstats_get_names, 2359 .fw_version_get = mlx5_fw_version_get, 2360 .dev_infos_get = mlx5_dev_infos_get, 2361 .read_clock = mlx5_read_clock, 2362 .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, 2363 .vlan_filter_set = mlx5_vlan_filter_set, 2364 .rx_queue_setup = mlx5_rx_queue_setup, 2365 .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, 2366 .tx_queue_setup = mlx5_tx_queue_setup, 2367 .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, 2368 .rx_queue_release = mlx5_rx_queue_release, 2369 .tx_queue_release = mlx5_tx_queue_release, 2370 .flow_ctrl_get = mlx5_dev_get_flow_ctrl, 2371 .flow_ctrl_set = mlx5_dev_set_flow_ctrl, 2372 .mac_addr_remove = mlx5_mac_addr_remove, 2373 .mac_addr_add = mlx5_mac_addr_add, 2374 .mac_addr_set = mlx5_mac_addr_set, 2375 .set_mc_addr_list = mlx5_set_mc_addr_list, 2376 .mtu_set = mlx5_dev_set_mtu, 2377 .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, 2378 .vlan_offload_set = mlx5_vlan_offload_set, 2379 .reta_update = mlx5_dev_rss_reta_update, 2380 .reta_query = mlx5_dev_rss_reta_query, 2381 .rss_hash_update = mlx5_rss_hash_update, 2382 .rss_hash_conf_get = mlx5_rss_hash_conf_get, 2383 .filter_ctrl = mlx5_dev_filter_ctrl, 2384 .rx_descriptor_status = mlx5_rx_descriptor_status, 2385 .tx_descriptor_status = mlx5_tx_descriptor_status, 2386 .rxq_info_get = mlx5_rxq_info_get, 2387 .txq_info_get = mlx5_txq_info_get, 2388 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 2389 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 2390 .rx_queue_count = mlx5_rx_queue_count, 2391 .rx_queue_intr_enable = mlx5_rx_intr_enable, 2392 .rx_queue_intr_disable = mlx5_rx_intr_disable, 2393 .is_removed = mlx5_is_removed, 2394 .udp_tunnel_port_add = mlx5_udp_tunnel_port_add, 2395 .get_module_info = mlx5_get_module_info, 2396 .get_module_eeprom = mlx5_get_module_eeprom, 2397 .hairpin_cap_get = mlx5_hairpin_cap_get, 2398 .mtr_ops_get = mlx5_flow_meter_ops_get, 2399 }; 2400 2401 /* Available operations from secondary process. */ 2402 const struct eth_dev_ops mlx5_os_dev_sec_ops = { 2403 .stats_get = mlx5_stats_get, 2404 .stats_reset = mlx5_stats_reset, 2405 .xstats_get = mlx5_xstats_get, 2406 .xstats_reset = mlx5_xstats_reset, 2407 .xstats_get_names = mlx5_xstats_get_names, 2408 .fw_version_get = mlx5_fw_version_get, 2409 .dev_infos_get = mlx5_dev_infos_get, 2410 .rx_descriptor_status = mlx5_rx_descriptor_status, 2411 .tx_descriptor_status = mlx5_tx_descriptor_status, 2412 .rxq_info_get = mlx5_rxq_info_get, 2413 .txq_info_get = mlx5_txq_info_get, 2414 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 2415 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 2416 .get_module_info = mlx5_get_module_info, 2417 .get_module_eeprom = mlx5_get_module_eeprom, 2418 }; 2419 2420 /* Available operations in flow isolated mode. */ 2421 const struct eth_dev_ops mlx5_os_dev_ops_isolate = { 2422 .dev_configure = mlx5_dev_configure, 2423 .dev_start = mlx5_dev_start, 2424 .dev_stop = mlx5_dev_stop, 2425 .dev_set_link_down = mlx5_set_link_down, 2426 .dev_set_link_up = mlx5_set_link_up, 2427 .dev_close = mlx5_dev_close, 2428 .promiscuous_enable = mlx5_promiscuous_enable, 2429 .promiscuous_disable = mlx5_promiscuous_disable, 2430 .allmulticast_enable = mlx5_allmulticast_enable, 2431 .allmulticast_disable = mlx5_allmulticast_disable, 2432 .link_update = mlx5_link_update, 2433 .stats_get = mlx5_stats_get, 2434 .stats_reset = mlx5_stats_reset, 2435 .xstats_get = mlx5_xstats_get, 2436 .xstats_reset = mlx5_xstats_reset, 2437 .xstats_get_names = mlx5_xstats_get_names, 2438 .fw_version_get = mlx5_fw_version_get, 2439 .dev_infos_get = mlx5_dev_infos_get, 2440 .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, 2441 .vlan_filter_set = mlx5_vlan_filter_set, 2442 .rx_queue_setup = mlx5_rx_queue_setup, 2443 .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, 2444 .tx_queue_setup = mlx5_tx_queue_setup, 2445 .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, 2446 .rx_queue_release = mlx5_rx_queue_release, 2447 .tx_queue_release = mlx5_tx_queue_release, 2448 .flow_ctrl_get = mlx5_dev_get_flow_ctrl, 2449 .flow_ctrl_set = mlx5_dev_set_flow_ctrl, 2450 .mac_addr_remove = mlx5_mac_addr_remove, 2451 .mac_addr_add = mlx5_mac_addr_add, 2452 .mac_addr_set = mlx5_mac_addr_set, 2453 .set_mc_addr_list = mlx5_set_mc_addr_list, 2454 .mtu_set = mlx5_dev_set_mtu, 2455 .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, 2456 .vlan_offload_set = mlx5_vlan_offload_set, 2457 .filter_ctrl = mlx5_dev_filter_ctrl, 2458 .rx_descriptor_status = mlx5_rx_descriptor_status, 2459 .tx_descriptor_status = mlx5_tx_descriptor_status, 2460 .rxq_info_get = mlx5_rxq_info_get, 2461 .txq_info_get = mlx5_txq_info_get, 2462 .rx_burst_mode_get = mlx5_rx_burst_mode_get, 2463 .tx_burst_mode_get = mlx5_tx_burst_mode_get, 2464 .rx_queue_intr_enable = mlx5_rx_intr_enable, 2465 .rx_queue_intr_disable = mlx5_rx_intr_disable, 2466 .is_removed = mlx5_is_removed, 2467 .get_module_info = mlx5_get_module_info, 2468 .get_module_eeprom = mlx5_get_module_eeprom, 2469 .hairpin_cap_get = mlx5_hairpin_cap_get, 2470 .mtr_ops_get = mlx5_flow_meter_ops_get, 2471 }; 2472