1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 5 #include <errno.h> 6 #include <stdalign.h> 7 #include <stddef.h> 8 #include <stdint.h> 9 #include <stdlib.h> 10 11 #include <rte_windows.h> 12 #include <ethdev_pci.h> 13 14 #include <mlx5_glue.h> 15 #include <mlx5_devx_cmds.h> 16 #include <mlx5_common.h> 17 #include <mlx5_common_mp.h> 18 #include <mlx5_common_mr.h> 19 #include <mlx5_malloc.h> 20 21 #include "mlx5_defs.h" 22 #include "mlx5.h" 23 #include "mlx5_common_os.h" 24 #include "mlx5_utils.h" 25 #include "mlx5_rxtx.h" 26 #include "mlx5_rx.h" 27 #include "mlx5_tx.h" 28 #include "mlx5_autoconf.h" 29 #include "mlx5_mr.h" 30 #include "mlx5_flow.h" 31 #include "mlx5_devx.h" 32 33 static const char *MZ_MLX5_PMD_SHARED_DATA = "mlx5_pmd_shared_data"; 34 35 /* Spinlock for mlx5_shared_data allocation. */ 36 static rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER; 37 38 /** 39 * Initialize shared data between primary and secondary process. 40 * 41 * A memzone is reserved by primary process and secondary processes attach to 42 * the memzone. 43 * 44 * @return 45 * 0 on success, a negative errno value otherwise and rte_errno is set. 46 */ 47 static int 48 mlx5_init_shared_data(void) 49 { 50 const struct rte_memzone *mz; 51 int ret = 0; 52 53 rte_spinlock_lock(&mlx5_shared_data_lock); 54 if (mlx5_shared_data == NULL) { 55 /* Allocate shared memory. */ 56 mz = rte_memzone_reserve(MZ_MLX5_PMD_SHARED_DATA, 57 sizeof(*mlx5_shared_data), 58 SOCKET_ID_ANY, 0); 59 if (mz == NULL) { 60 DRV_LOG(ERR, 61 "Cannot allocate mlx5 shared data"); 62 ret = -rte_errno; 63 goto error; 64 } 65 mlx5_shared_data = mz->addr; 66 memset(mlx5_shared_data, 0, sizeof(*mlx5_shared_data)); 67 rte_spinlock_init(&mlx5_shared_data->lock); 68 } 69 error: 70 rte_spinlock_unlock(&mlx5_shared_data_lock); 71 return ret; 72 } 73 74 /** 75 * PMD global initialization. 76 * 77 * Independent from individual device, this function initializes global 78 * per-PMD data structures distinguishing primary and secondary processes. 79 * Hence, each initialization is called once per a process. 80 * 81 * @return 82 * 0 on success, a negative errno value otherwise and rte_errno is set. 83 */ 84 static int 85 mlx5_init_once(void) 86 { 87 if (mlx5_init_shared_data()) 88 return -rte_errno; 89 return 0; 90 } 91 92 /** 93 * Get mlx5 device attributes. 94 * 95 * @param ctx 96 * Pointer to device context. 97 * 98 * @param device_attr 99 * Pointer to mlx5 device attributes. 100 * 101 * @return 102 * 0 on success, non zero error number otherwise 103 */ 104 int 105 mlx5_os_get_dev_attr(void *ctx, struct mlx5_dev_attr *device_attr) 106 { 107 struct mlx5_context *mlx5_ctx; 108 struct mlx5_hca_attr hca_attr; 109 void *pv_iseg = NULL; 110 u32 cb_iseg = 0; 111 int err = 0; 112 113 if (!ctx) 114 return -EINVAL; 115 mlx5_ctx = (struct mlx5_context *)ctx; 116 memset(device_attr, 0, sizeof(*device_attr)); 117 err = mlx5_devx_cmd_query_hca_attr(mlx5_ctx, &hca_attr); 118 if (err) { 119 DRV_LOG(ERR, "Failed to get device hca_cap"); 120 return err; 121 } 122 device_attr->max_cq = 1 << hca_attr.log_max_cq; 123 device_attr->max_qp = 1 << hca_attr.log_max_qp; 124 device_attr->max_qp_wr = 1 << hca_attr.log_max_qp_sz; 125 device_attr->max_cqe = 1 << hca_attr.log_max_cq_sz; 126 device_attr->max_mr = 1 << hca_attr.log_max_mrw_sz; 127 device_attr->max_pd = 1 << hca_attr.log_max_pd; 128 device_attr->max_srq = 1 << hca_attr.log_max_srq; 129 device_attr->max_srq_wr = 1 << hca_attr.log_max_srq_sz; 130 if (hca_attr.rss_ind_tbl_cap) { 131 device_attr->max_rwq_indirection_table_size = 132 1 << hca_attr.rss_ind_tbl_cap; 133 } 134 pv_iseg = mlx5_glue->query_hca_iseg(mlx5_ctx, &cb_iseg); 135 if (pv_iseg == NULL) { 136 DRV_LOG(ERR, "Failed to get device hca_iseg"); 137 return errno; 138 } 139 if (!err) { 140 snprintf(device_attr->fw_ver, 64, "%x.%x.%04x", 141 MLX5_GET(initial_seg, pv_iseg, fw_rev_major), 142 MLX5_GET(initial_seg, pv_iseg, fw_rev_minor), 143 MLX5_GET(initial_seg, pv_iseg, fw_rev_subminor)); 144 } 145 return err; 146 } 147 148 /** 149 * Initialize DR related data within private structure. 150 * Routine checks the reference counter and does actual 151 * resources creation/initialization only if counter is zero. 152 * 153 * @param[in] priv 154 * Pointer to the private device data structure. 155 * 156 * @return 157 * Zero on success, positive error code otherwise. 158 */ 159 static int 160 mlx5_alloc_shared_dr(struct mlx5_priv *priv) 161 { 162 struct mlx5_dev_ctx_shared *sh = priv->sh; 163 int err = 0; 164 165 if (!sh->flow_tbls) 166 err = mlx5_alloc_table_hash_list(priv); 167 else 168 DRV_LOG(DEBUG, "sh->flow_tbls[%p] already created, reuse", 169 (void *)sh->flow_tbls); 170 return err; 171 } 172 /** 173 * Destroy DR related data within private structure. 174 * 175 * @param[in] priv 176 * Pointer to the private device data structure. 177 */ 178 void 179 mlx5_os_free_shared_dr(struct mlx5_priv *priv) 180 { 181 mlx5_free_table_hash_list(priv); 182 } 183 184 /** 185 * Set the completion channel file descriptor interrupt as non-blocking. 186 * Currently it has no support under Windows. 187 * 188 * @param[in] rxq_obj 189 * Pointer to RQ channel object, which includes the channel fd 190 * 191 * @param[out] fd 192 * The file descriptor (representing the intetrrupt) used in this channel. 193 * 194 * @return 195 * 0 on successfully setting the fd to non-blocking, non-zero otherwise. 196 */ 197 int 198 mlx5_os_set_nonblock_channel_fd(int fd) 199 { 200 (void)fd; 201 DRV_LOG(WARNING, "%s: is not supported", __func__); 202 return -ENOTSUP; 203 } 204 205 /** 206 * Function API open device under Windows 207 * 208 * This function calls the Windows glue APIs to open a device. 209 * 210 * @param[in] spawn 211 * Pointer to the device attributes (name, port, etc). 212 * @param[out] config 213 * Pointer to device configuration structure. 214 * @param[out] sh 215 * Pointer to shared context structure. 216 * 217 * @return 218 * 0 on success, a positive error value otherwise. 219 */ 220 int 221 mlx5_os_open_device(const struct mlx5_dev_spawn_data *spawn, 222 const struct mlx5_dev_config *config, 223 struct mlx5_dev_ctx_shared *sh) 224 { 225 RTE_SET_USED(config); 226 int err = 0; 227 struct mlx5_context *mlx5_ctx; 228 229 pthread_mutex_init(&sh->txpp.mutex, NULL); 230 /* Set numa node from pci probe */ 231 sh->numa_node = spawn->pci_dev->device.numa_node; 232 233 /* Try to open device with DevX */ 234 rte_errno = 0; 235 sh->ctx = mlx5_glue->open_device(spawn->phys_dev); 236 if (!sh->ctx) { 237 DRV_LOG(ERR, "open_device failed"); 238 err = errno; 239 return err; 240 } 241 sh->devx = 1; 242 mlx5_ctx = (struct mlx5_context *)sh->ctx; 243 err = mlx5_glue->query_device(spawn->phys_dev, &mlx5_ctx->mlx5_dev); 244 if (err) 245 DRV_LOG(ERR, "Failed to query device context fields."); 246 return err; 247 } 248 249 /** 250 * DV flow counter mode detect and config. 251 * 252 * @param dev 253 * Pointer to rte_eth_dev structure. 254 * 255 */ 256 static void 257 mlx5_flow_counter_mode_config(struct rte_eth_dev *dev __rte_unused) 258 { 259 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 260 struct mlx5_priv *priv = dev->data->dev_private; 261 struct mlx5_dev_ctx_shared *sh = priv->sh; 262 bool fallback; 263 264 #ifndef HAVE_IBV_DEVX_ASYNC 265 fallback = true; 266 #else 267 fallback = false; 268 if (!priv->config.devx || !priv->config.dv_flow_en || 269 !priv->config.hca_attr.flow_counters_dump || 270 !(priv->config.hca_attr.flow_counter_bulk_alloc_bitmap & 0x4) || 271 (mlx5_flow_dv_discover_counter_offset_support(dev) == -ENOTSUP)) 272 fallback = true; 273 #endif 274 if (fallback) 275 DRV_LOG(INFO, "Use fall-back DV counter management. Flow " 276 "counter dump:%d, bulk_alloc_bitmap:0x%hhx.", 277 priv->config.hca_attr.flow_counters_dump, 278 priv->config.hca_attr.flow_counter_bulk_alloc_bitmap); 279 /* Initialize fallback mode only on the port initializes sh. */ 280 if (sh->refcnt == 1) 281 sh->cmng.counter_fallback = fallback; 282 else if (fallback != sh->cmng.counter_fallback) 283 DRV_LOG(WARNING, "Port %d in sh has different fallback mode " 284 "with others:%d.", PORT_ID(priv), fallback); 285 #endif 286 } 287 288 /** 289 * Spawn an Ethernet device from Verbs information. 290 * 291 * @param dpdk_dev 292 * Backing DPDK device. 293 * @param spawn 294 * Verbs device parameters (name, port, switch_info) to spawn. 295 * @param config 296 * Device configuration parameters. 297 * 298 * @return 299 * A valid Ethernet device object on success, NULL otherwise and rte_errno 300 * is set. The following errors are defined: 301 * 302 * EEXIST: device is already spawned 303 */ 304 static struct rte_eth_dev * 305 mlx5_dev_spawn(struct rte_device *dpdk_dev, 306 struct mlx5_dev_spawn_data *spawn, 307 struct mlx5_dev_config *config) 308 { 309 const struct mlx5_switch_info *switch_info = &spawn->info; 310 struct mlx5_dev_ctx_shared *sh = NULL; 311 struct mlx5_dev_attr device_attr; 312 struct rte_eth_dev *eth_dev = NULL; 313 struct mlx5_priv *priv = NULL; 314 int err = 0; 315 unsigned int cqe_comp; 316 struct rte_ether_addr mac; 317 char name[RTE_ETH_NAME_MAX_LEN]; 318 int own_domain_id = 0; 319 uint16_t port_id; 320 321 /* Build device name. */ 322 strlcpy(name, dpdk_dev->name, sizeof(name)); 323 /* check if the device is already spawned */ 324 if (rte_eth_dev_get_port_by_name(name, &port_id) == 0) { 325 rte_errno = EEXIST; 326 return NULL; 327 } 328 DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name); 329 /* 330 * Some parameters are needed in advance to create device context. We 331 * process the devargs here to get ones, and later process devargs 332 * again to override some hardware settings. 333 */ 334 err = mlx5_args(config, dpdk_dev->devargs); 335 if (err) { 336 err = rte_errno; 337 DRV_LOG(ERR, "failed to process device arguments: %s", 338 strerror(rte_errno)); 339 goto error; 340 } 341 mlx5_malloc_mem_select(config->sys_mem_en); 342 sh = mlx5_alloc_shared_dev_ctx(spawn, config); 343 if (!sh) 344 return NULL; 345 config->devx = sh->devx; 346 /* Initialize the shutdown event in mlx5_dev_spawn to 347 * support mlx5_is_removed for Windows. 348 */ 349 err = mlx5_glue->devx_init_showdown_event(sh->ctx); 350 if (err) { 351 DRV_LOG(ERR, "failed to init showdown event: %s", 352 strerror(errno)); 353 goto error; 354 } 355 DRV_LOG(DEBUG, "MPW isn't supported"); 356 mlx5_os_get_dev_attr(sh->ctx, &device_attr); 357 config->swp = 0; 358 config->ind_table_max_size = 359 sh->device_attr.max_rwq_indirection_table_size; 360 cqe_comp = 0; 361 config->cqe_comp = cqe_comp; 362 DRV_LOG(DEBUG, "tunnel offloading is not supported"); 363 config->tunnel_en = 0; 364 DRV_LOG(DEBUG, "MPLS over GRE/UDP tunnel offloading is no supported"); 365 config->mpls_en = 0; 366 /* Allocate private eth device data. */ 367 priv = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, 368 sizeof(*priv), 369 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 370 if (priv == NULL) { 371 DRV_LOG(ERR, "priv allocation failure"); 372 err = ENOMEM; 373 goto error; 374 } 375 priv->sh = sh; 376 priv->dev_port = spawn->phys_port; 377 priv->pci_dev = spawn->pci_dev; 378 priv->mtu = RTE_ETHER_MTU; 379 priv->mp_id.port_id = port_id; 380 strlcpy(priv->mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN); 381 priv->representor = !!switch_info->representor; 382 priv->master = !!switch_info->master; 383 priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; 384 priv->vport_meta_tag = 0; 385 priv->vport_meta_mask = 0; 386 priv->pf_bond = spawn->pf_bond; 387 priv->vport_id = -1; 388 /* representor_id field keeps the unmodified VF index. */ 389 priv->representor_id = -1; 390 /* 391 * Look for sibling devices in order to reuse their switch domain 392 * if any, otherwise allocate one. 393 */ 394 MLX5_ETH_FOREACH_DEV(port_id, NULL) { 395 const struct mlx5_priv *opriv = 396 rte_eth_devices[port_id].data->dev_private; 397 398 if (!opriv || 399 opriv->sh != priv->sh || 400 opriv->domain_id == 401 RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) 402 continue; 403 priv->domain_id = opriv->domain_id; 404 break; 405 } 406 if (priv->domain_id == RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) { 407 err = rte_eth_switch_domain_alloc(&priv->domain_id); 408 if (err) { 409 err = rte_errno; 410 DRV_LOG(ERR, "unable to allocate switch domain: %s", 411 strerror(rte_errno)); 412 goto error; 413 } 414 own_domain_id = 1; 415 } 416 /* Override some values set by hardware configuration. */ 417 mlx5_args(config, dpdk_dev->devargs); 418 err = mlx5_dev_check_sibling_config(priv, config); 419 if (err) 420 goto error; 421 DRV_LOG(DEBUG, "counters are not supported"); 422 config->ind_table_max_size = 423 sh->device_attr.max_rwq_indirection_table_size; 424 /* 425 * Remove this check once DPDK supports larger/variable 426 * indirection tables. 427 */ 428 if (config->ind_table_max_size > (unsigned int)ETH_RSS_RETA_SIZE_512) 429 config->ind_table_max_size = ETH_RSS_RETA_SIZE_512; 430 DRV_LOG(DEBUG, "maximum Rx indirection table size is %u", 431 config->ind_table_max_size); 432 DRV_LOG(DEBUG, "VLAN stripping is %ssupported", 433 (config->hw_vlan_strip ? "" : "not ")); 434 if (config->hw_padding) { 435 DRV_LOG(DEBUG, "Rx end alignment padding isn't supported"); 436 config->hw_padding = 0; 437 } 438 if (config->tso) 439 config->tso_max_payload_sz = sh->device_attr.max_tso; 440 DRV_LOG(DEBUG, "%sMPS is %s.", 441 config->mps == MLX5_MPW_ENHANCED ? "enhanced " : 442 config->mps == MLX5_MPW ? "legacy " : "", 443 config->mps != MLX5_MPW_DISABLED ? "enabled" : "disabled"); 444 if (config->cqe_comp && !cqe_comp) { 445 DRV_LOG(WARNING, "Rx CQE compression isn't supported."); 446 config->cqe_comp = 0; 447 } 448 if (config->devx) { 449 err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config->hca_attr); 450 if (err) { 451 err = -err; 452 goto error; 453 } 454 /* Check relax ordering support. */ 455 sh->cmng.relaxed_ordering_read = 0; 456 sh->cmng.relaxed_ordering_write = 0; 457 if (!haswell_broadwell_cpu) { 458 sh->cmng.relaxed_ordering_write = 459 config->hca_attr.relaxed_ordering_write; 460 sh->cmng.relaxed_ordering_read = 461 config->hca_attr.relaxed_ordering_read; 462 } 463 config->hw_csum = config->hca_attr.csum_cap; 464 DRV_LOG(DEBUG, "checksum offloading is %ssupported", 465 (config->hw_csum ? "" : "not ")); 466 } 467 if (config->devx) { 468 uint32_t reg[MLX5_ST_SZ_DW(register_mtutc)]; 469 470 err = config->hca_attr.access_register_user ? 471 mlx5_devx_cmd_register_read 472 (sh->ctx, MLX5_REGISTER_ID_MTUTC, 0, 473 reg, MLX5_ST_SZ_DW(register_mtutc)) : ENOTSUP; 474 if (!err) { 475 uint32_t ts_mode; 476 477 /* MTUTC register is read successfully. */ 478 ts_mode = MLX5_GET(register_mtutc, reg, 479 time_stamp_mode); 480 if (ts_mode == MLX5_MTUTC_TIMESTAMP_MODE_REAL_TIME) 481 config->rt_timestamp = 1; 482 } else { 483 /* Kernel does not support register reading. */ 484 if (config->hca_attr.dev_freq_khz == 485 (NS_PER_S / MS_PER_S)) 486 config->rt_timestamp = 1; 487 } 488 sh->rq_ts_format = config->hca_attr.rq_ts_format; 489 sh->sq_ts_format = config->hca_attr.sq_ts_format; 490 sh->qp_ts_format = config->hca_attr.qp_ts_format; 491 } 492 if (config->mprq.enabled) { 493 DRV_LOG(WARNING, "Multi-Packet RQ isn't supported"); 494 config->mprq.enabled = 0; 495 } 496 if (config->max_dump_files_num == 0) 497 config->max_dump_files_num = 128; 498 eth_dev = rte_eth_dev_allocate(name); 499 if (eth_dev == NULL) { 500 DRV_LOG(ERR, "can not allocate rte ethdev"); 501 err = ENOMEM; 502 goto error; 503 } 504 if (priv->representor) { 505 eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR; 506 eth_dev->data->representor_id = priv->representor_id; 507 } 508 /* 509 * Store associated network device interface index. This index 510 * is permanent throughout the lifetime of device. So, we may store 511 * the ifindex here and use the cached value further. 512 */ 513 MLX5_ASSERT(spawn->ifindex); 514 priv->if_index = spawn->ifindex; 515 eth_dev->data->dev_private = priv; 516 priv->dev_data = eth_dev->data; 517 eth_dev->data->mac_addrs = priv->mac; 518 eth_dev->device = dpdk_dev; 519 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 520 /* Configure the first MAC address by default. */ 521 if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) { 522 DRV_LOG(ERR, 523 "port %u cannot get MAC address, is mlx5_en" 524 " loaded? (errno: %s).", 525 eth_dev->data->port_id, strerror(rte_errno)); 526 err = ENODEV; 527 goto error; 528 } 529 DRV_LOG(INFO, 530 "port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", 531 eth_dev->data->port_id, 532 mac.addr_bytes[0], mac.addr_bytes[1], 533 mac.addr_bytes[2], mac.addr_bytes[3], 534 mac.addr_bytes[4], mac.addr_bytes[5]); 535 #ifdef RTE_LIBRTE_MLX5_DEBUG 536 { 537 char ifname[MLX5_NAMESIZE]; 538 539 if (mlx5_get_ifname(eth_dev, &ifname) == 0) 540 DRV_LOG(DEBUG, "port %u ifname is \"%s\"", 541 eth_dev->data->port_id, ifname); 542 else 543 DRV_LOG(DEBUG, "port %u ifname is unknown.", 544 eth_dev->data->port_id); 545 } 546 #endif 547 /* Get actual MTU if possible. */ 548 err = mlx5_get_mtu(eth_dev, &priv->mtu); 549 if (err) { 550 err = rte_errno; 551 goto error; 552 } 553 DRV_LOG(DEBUG, "port %u MTU is %u.", eth_dev->data->port_id, 554 priv->mtu); 555 /* Initialize burst functions to prevent crashes before link-up. */ 556 eth_dev->rx_pkt_burst = removed_rx_burst; 557 eth_dev->tx_pkt_burst = removed_tx_burst; 558 eth_dev->dev_ops = &mlx5_dev_ops; 559 eth_dev->rx_descriptor_status = mlx5_rx_descriptor_status; 560 eth_dev->tx_descriptor_status = mlx5_tx_descriptor_status; 561 eth_dev->rx_queue_count = mlx5_rx_queue_count; 562 /* Register MAC address. */ 563 claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0)); 564 priv->ctrl_flows = 0; 565 TAILQ_INIT(&priv->flow_meters); 566 priv->mtr_profile_tbl = mlx5_l3t_create(MLX5_L3T_TYPE_PTR); 567 if (!priv->mtr_profile_tbl) 568 goto error; 569 /* Bring Ethernet device up. */ 570 DRV_LOG(DEBUG, "port %u forcing Ethernet interface up.", 571 eth_dev->data->port_id); 572 /* nl calls are unsupported - set to -1 not to fail on release */ 573 priv->nl_socket_rdma = -1; 574 priv->nl_socket_route = -1; 575 mlx5_set_link_up(eth_dev); 576 /* 577 * Even though the interrupt handler is not installed yet, 578 * interrupts will still trigger on the async_fd from 579 * Verbs context returned by ibv_open_device(). 580 */ 581 mlx5_link_update(eth_dev, 0); 582 config->dv_esw_en = 0; 583 /* Detect minimal data bytes to inline. */ 584 mlx5_set_min_inline(spawn, config); 585 /* Store device configuration on private structure. */ 586 priv->config = *config; 587 /* Create context for virtual machine VLAN workaround. */ 588 priv->vmwa_context = NULL; 589 if (config->dv_flow_en) { 590 err = mlx5_alloc_shared_dr(priv); 591 if (err) 592 goto error; 593 } 594 /* No supported flow priority number detection. */ 595 priv->config.flow_prio = -1; 596 if (!priv->config.dv_esw_en && 597 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { 598 DRV_LOG(WARNING, "metadata mode %u is not supported " 599 "(no E-Switch)", priv->config.dv_xmeta_en); 600 priv->config.dv_xmeta_en = MLX5_XMETA_MODE_LEGACY; 601 } 602 mlx5_set_metadata_mask(eth_dev); 603 if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 604 !priv->sh->dv_regc0_mask) { 605 DRV_LOG(ERR, "metadata mode %u is not supported " 606 "(no metadata reg_c[0] is available).", 607 priv->config.dv_xmeta_en); 608 err = ENOTSUP; 609 goto error; 610 } 611 priv->hrxqs = mlx5_list_create("hrxq", eth_dev, true, 612 mlx5_hrxq_create_cb, mlx5_hrxq_match_cb, 613 mlx5_hrxq_remove_cb, mlx5_hrxq_clone_cb, 614 mlx5_hrxq_clone_free_cb); 615 /* Query availability of metadata reg_c's. */ 616 err = mlx5_flow_discover_mreg_c(eth_dev); 617 if (err < 0) { 618 err = -err; 619 goto error; 620 } 621 if (!mlx5_flow_ext_mreg_supported(eth_dev)) { 622 DRV_LOG(DEBUG, 623 "port %u extensive metadata register is not supported.", 624 eth_dev->data->port_id); 625 if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { 626 DRV_LOG(ERR, "metadata mode %u is not supported " 627 "(no metadata registers available).", 628 priv->config.dv_xmeta_en); 629 err = ENOTSUP; 630 goto error; 631 } 632 } 633 if (config->devx && config->dv_flow_en) { 634 priv->obj_ops = devx_obj_ops; 635 } else { 636 DRV_LOG(ERR, "Flow mode %u is not supported " 637 "(Windows flow must be DevX with DV flow enabled).", 638 priv->config.dv_flow_en); 639 err = ENOTSUP; 640 goto error; 641 } 642 mlx5_flow_counter_mode_config(eth_dev); 643 return eth_dev; 644 error: 645 if (priv) { 646 if (priv->mtr_profile_tbl) 647 mlx5_l3t_destroy(priv->mtr_profile_tbl); 648 if (own_domain_id) 649 claim_zero(rte_eth_switch_domain_free(priv->domain_id)); 650 mlx5_free(priv); 651 if (eth_dev != NULL) 652 eth_dev->data->dev_private = NULL; 653 } 654 if (eth_dev != NULL) { 655 /* mac_addrs must not be freed alone because part of 656 * dev_private 657 **/ 658 eth_dev->data->mac_addrs = NULL; 659 rte_eth_dev_release_port(eth_dev); 660 } 661 if (sh) 662 mlx5_free_shared_dev_ctx(sh); 663 MLX5_ASSERT(err > 0); 664 rte_errno = err; 665 return NULL; 666 } 667 668 /** 669 * This function should share events between multiple ports of single IB 670 * device. Currently it has no support under Windows. 671 * 672 * @param sh 673 * Pointer to mlx5_dev_ctx_shared object. 674 */ 675 void 676 mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh) 677 { 678 (void)sh; 679 DRV_LOG(WARNING, "%s: is not supported", __func__); 680 } 681 682 /** 683 * This function should share events between multiple ports of single IB 684 * device. Currently it has no support under Windows. 685 * 686 * @param dev 687 * Pointer to mlx5_dev_ctx_shared object. 688 */ 689 void 690 mlx5_os_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh) 691 { 692 (void)sh; 693 DRV_LOG(WARNING, "%s: is not supported", __func__); 694 } 695 696 /** 697 * Read statistics by a named counter. 698 * 699 * @param[in] priv 700 * Pointer to the private device data structure. 701 * @param[in] ctr_name 702 * Pointer to the name of the statistic counter to read 703 * @param[out] stat 704 * Pointer to read statistic value. 705 * @return 706 * 0 on success and stat is valud, 1 if failed to read the value 707 * rte_errno is set. 708 * 709 */ 710 int 711 mlx5_os_read_dev_stat(struct mlx5_priv *priv, const char *ctr_name, 712 uint64_t *stat) 713 { 714 RTE_SET_USED(priv); 715 RTE_SET_USED(ctr_name); 716 RTE_SET_USED(stat); 717 DRV_LOG(WARNING, "%s: is not supported", __func__); 718 return -ENOTSUP; 719 } 720 721 /** 722 * Flush device MAC addresses 723 * Currently it has no support under Windows. 724 * 725 * @param dev 726 * Pointer to Ethernet device structure. 727 * 728 */ 729 void 730 mlx5_os_mac_addr_flush(struct rte_eth_dev *dev) 731 { 732 (void)dev; 733 DRV_LOG(WARNING, "%s: is not supported", __func__); 734 } 735 736 /** 737 * Remove a MAC address from device 738 * Currently it has no support under Windows. 739 * 740 * @param dev 741 * Pointer to Ethernet device structure. 742 * @param index 743 * MAC address index. 744 */ 745 void 746 mlx5_os_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index) 747 { 748 (void)dev; 749 (void)(index); 750 DRV_LOG(WARNING, "%s: is not supported", __func__); 751 } 752 753 /** 754 * Adds a MAC address to the device 755 * Currently it has no support under Windows. 756 * 757 * @param dev 758 * Pointer to Ethernet device structure. 759 * @param mac_addr 760 * MAC address to register. 761 * @param index 762 * MAC address index. 763 * 764 * @return 765 * 0 on success, a negative errno value otherwise 766 */ 767 int 768 mlx5_os_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac, 769 uint32_t index) 770 { 771 (void)index; 772 struct rte_ether_addr lmac; 773 774 if (mlx5_get_mac(dev, &lmac.addr_bytes)) { 775 DRV_LOG(ERR, 776 "port %u cannot get MAC address, is mlx5_en" 777 " loaded? (errno: %s)", 778 dev->data->port_id, strerror(rte_errno)); 779 return rte_errno; 780 } 781 if (!rte_is_same_ether_addr(&lmac, mac)) { 782 DRV_LOG(ERR, 783 "adding new mac address to device is unsupported"); 784 return -ENOTSUP; 785 } 786 return 0; 787 } 788 789 /** 790 * Modify a VF MAC address 791 * Currently it has no support under Windows. 792 * 793 * @param priv 794 * Pointer to device private data. 795 * @param mac_addr 796 * MAC address to modify into. 797 * @param iface_idx 798 * Net device interface index 799 * @param vf_index 800 * VF index 801 * 802 * @return 803 * 0 on success, a negative errno value otherwise 804 */ 805 int 806 mlx5_os_vf_mac_addr_modify(struct mlx5_priv *priv, 807 unsigned int iface_idx, 808 struct rte_ether_addr *mac_addr, 809 int vf_index) 810 { 811 (void)priv; 812 (void)iface_idx; 813 (void)mac_addr; 814 (void)vf_index; 815 DRV_LOG(WARNING, "%s: is not supported", __func__); 816 return -ENOTSUP; 817 } 818 819 /** 820 * Set device promiscuous mode 821 * Currently it has no support under Windows. 822 * 823 * @param dev 824 * Pointer to Ethernet device structure. 825 * @param enable 826 * 0 - promiscuous is disabled, otherwise - enabled 827 * 828 * @return 829 * 0 on success, a negative error value otherwise 830 */ 831 int 832 mlx5_os_set_promisc(struct rte_eth_dev *dev, int enable) 833 { 834 (void)dev; 835 (void)enable; 836 DRV_LOG(WARNING, "%s: is not supported", __func__); 837 return -ENOTSUP; 838 } 839 840 /** 841 * Set device allmulti mode 842 * 843 * @param dev 844 * Pointer to Ethernet device structure. 845 * @param enable 846 * 0 - all multicase is disabled, otherwise - enabled 847 * 848 * @return 849 * 0 on success, a negative error value otherwise 850 */ 851 int 852 mlx5_os_set_allmulti(struct rte_eth_dev *dev, int enable) 853 { 854 (void)dev; 855 (void)enable; 856 DRV_LOG(WARNING, "%s: is not supported", __func__); 857 return -ENOTSUP; 858 } 859 860 /** 861 * Detect if a devx_device_bdf object has identical DBDF values to the 862 * rte_pci_addr found in bus/pci probing 863 * 864 * @param[in] devx_bdf 865 * Pointer to the devx_device_bdf structure. 866 * @param[in] addr 867 * Pointer to the rte_pci_addr structure. 868 * 869 * @return 870 * 1 on Device match, 0 on mismatch. 871 */ 872 static int 873 mlx5_match_devx_bdf_to_addr(struct devx_device_bdf *devx_bdf, 874 struct rte_pci_addr *addr) 875 { 876 if (addr->domain != (devx_bdf->bus_id >> 8) || 877 addr->bus != (devx_bdf->bus_id & 0xff) || 878 addr->devid != devx_bdf->dev_id || 879 addr->function != devx_bdf->fnc_id) { 880 return 0; 881 } 882 return 1; 883 } 884 885 /** 886 * Detect if a devx_device_bdf object matches the rte_pci_addr 887 * found in bus/pci probing 888 * Compare both the Native/PF BDF and the raw_bdf representing a VF BDF. 889 * 890 * @param[in] devx_bdf 891 * Pointer to the devx_device_bdf structure. 892 * @param[in] addr 893 * Pointer to the rte_pci_addr structure. 894 * 895 * @return 896 * 1 on Device match, 0 on mismatch, rte_errno code on failure. 897 */ 898 static int 899 mlx5_match_devx_devices_to_addr(struct devx_device_bdf *devx_bdf, 900 struct rte_pci_addr *addr) 901 { 902 int err; 903 struct devx_device mlx5_dev; 904 905 if (mlx5_match_devx_bdf_to_addr(devx_bdf, addr)) 906 return 1; 907 /** 908 * Didn't match on Native/PF BDF, could still 909 * Match a VF BDF, check it next 910 */ 911 err = mlx5_glue->query_device(devx_bdf, &mlx5_dev); 912 if (err) { 913 DRV_LOG(ERR, "query_device failed"); 914 rte_errno = err; 915 return rte_errno; 916 } 917 if (mlx5_match_devx_bdf_to_addr(&mlx5_dev.raw_bdf, addr)) 918 return 1; 919 return 0; 920 } 921 922 /** 923 * DPDK callback to register a PCI device. 924 * 925 * This function spawns Ethernet devices out of a given device. 926 * 927 * @param[in] dev 928 * Pointer to the generic device. 929 * 930 * @return 931 * 0 on success, a negative errno value otherwise and rte_errno is set. 932 */ 933 int 934 mlx5_os_net_probe(struct rte_device *dev) 935 { 936 struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev); 937 struct devx_device_bdf *devx_bdf_devs, *orig_devx_bdf_devs; 938 /* 939 * Number of found IB Devices matching with requested PCI BDF. 940 * nd != 1 means there are multiple IB devices over the same 941 * PCI device and we have representors and master. 942 */ 943 unsigned int nd = 0; 944 /* 945 * Number of found IB device Ports. nd = 1 and np = 1..n means 946 * we have the single multiport IB device, and there may be 947 * representors attached to some of found ports. 948 * Currently not supported. 949 * unsigned int np = 0; 950 */ 951 952 /* 953 * Number of DPDK ethernet devices to Spawn - either over 954 * multiple IB devices or multiple ports of single IB device. 955 * Actually this is the number of iterations to spawn. 956 */ 957 unsigned int ns = 0; 958 /* 959 * Bonding device 960 * < 0 - no bonding device (single one) 961 * >= 0 - bonding device (value is slave PF index) 962 */ 963 int bd = -1; 964 struct mlx5_dev_spawn_data *list = NULL; 965 struct mlx5_dev_config dev_config; 966 unsigned int dev_config_vf; 967 int ret, err; 968 uint32_t restore; 969 970 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 971 DRV_LOG(ERR, "Secondary process is not supported on Windows."); 972 return -ENOTSUP; 973 } 974 ret = mlx5_init_once(); 975 if (ret) { 976 DRV_LOG(ERR, "unable to init PMD global data: %s", 977 strerror(rte_errno)); 978 return -rte_errno; 979 } 980 errno = 0; 981 devx_bdf_devs = mlx5_glue->get_device_list(&ret); 982 orig_devx_bdf_devs = devx_bdf_devs; 983 if (!devx_bdf_devs) { 984 rte_errno = errno ? errno : ENOSYS; 985 DRV_LOG(ERR, "cannot list devices, is ib_uverbs loaded?"); 986 return -rte_errno; 987 } 988 /* 989 * First scan the list of all Infiniband devices to find 990 * matching ones, gathering into the list. 991 */ 992 struct devx_device_bdf *devx_bdf_match[ret + 1]; 993 994 while (ret-- > 0) { 995 err = mlx5_match_devx_devices_to_addr(devx_bdf_devs, 996 &pci_dev->addr); 997 if (!err) { 998 devx_bdf_devs++; 999 continue; 1000 } 1001 if (err != 1) { 1002 ret = -err; 1003 goto exit; 1004 } 1005 devx_bdf_match[nd++] = devx_bdf_devs; 1006 } 1007 devx_bdf_match[nd] = NULL; 1008 if (!nd) { 1009 /* No device matches, just complain and bail out. */ 1010 DRV_LOG(WARNING, 1011 "no DevX device matches PCI device " PCI_PRI_FMT "," 1012 " is DevX Configured?", 1013 pci_dev->addr.domain, pci_dev->addr.bus, 1014 pci_dev->addr.devid, pci_dev->addr.function); 1015 rte_errno = ENOENT; 1016 ret = -rte_errno; 1017 goto exit; 1018 } 1019 /* 1020 * Now we can determine the maximal 1021 * amount of devices to be spawned. 1022 */ 1023 list = mlx5_malloc(MLX5_MEM_ZERO, 1024 sizeof(struct mlx5_dev_spawn_data), 1025 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 1026 if (!list) { 1027 DRV_LOG(ERR, "spawn data array allocation failure"); 1028 rte_errno = ENOMEM; 1029 ret = -rte_errno; 1030 goto exit; 1031 } 1032 memset(&list[ns].info, 0, sizeof(list[ns].info)); 1033 list[ns].max_port = 1; 1034 list[ns].phys_port = 1; 1035 list[ns].phys_dev = devx_bdf_match[ns]; 1036 list[ns].eth_dev = NULL; 1037 list[ns].pci_dev = pci_dev; 1038 list[ns].pf_bond = bd; 1039 list[ns].ifindex = -1; /* Spawn will assign */ 1040 list[ns].info = 1041 (struct mlx5_switch_info){ 1042 .master = 0, 1043 .representor = 0, 1044 .name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK, 1045 .port_name = 0, 1046 .switch_id = 0, 1047 }; 1048 /* Device specific configuration. */ 1049 switch (pci_dev->id.device_id) { 1050 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 1051 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF: 1052 case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: 1053 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: 1054 case PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF: 1055 case PCI_DEVICE_ID_MELLANOX_CONNECTX6VF: 1056 case PCI_DEVICE_ID_MELLANOX_CONNECTXVF: 1057 dev_config_vf = 1; 1058 break; 1059 default: 1060 dev_config_vf = 0; 1061 break; 1062 } 1063 /* Default configuration. */ 1064 memset(&dev_config, 0, sizeof(struct mlx5_dev_config)); 1065 dev_config.vf = dev_config_vf; 1066 dev_config.mps = 0; 1067 dev_config.dbnc = MLX5_ARG_UNSET; 1068 dev_config.rx_vec_en = 1; 1069 dev_config.txq_inline_max = MLX5_ARG_UNSET; 1070 dev_config.txq_inline_min = MLX5_ARG_UNSET; 1071 dev_config.txq_inline_mpw = MLX5_ARG_UNSET; 1072 dev_config.txqs_inline = MLX5_ARG_UNSET; 1073 dev_config.vf_nl_en = 0; 1074 dev_config.mr_ext_memseg_en = 1; 1075 dev_config.mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN; 1076 dev_config.mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS; 1077 dev_config.dv_esw_en = 0; 1078 dev_config.dv_flow_en = 1; 1079 dev_config.decap_en = 0; 1080 dev_config.log_hp_size = MLX5_ARG_UNSET; 1081 list[ns].numa_node = pci_dev->device.numa_node; 1082 list[ns].eth_dev = mlx5_dev_spawn(&pci_dev->device, 1083 &list[ns], 1084 &dev_config); 1085 if (!list[ns].eth_dev) 1086 goto exit; 1087 restore = list[ns].eth_dev->data->dev_flags; 1088 rte_eth_copy_pci_info(list[ns].eth_dev, pci_dev); 1089 /* Restore non-PCI flags cleared by the above call. */ 1090 list[ns].eth_dev->data->dev_flags |= restore; 1091 rte_eth_dev_probing_finish(list[ns].eth_dev); 1092 ret = 0; 1093 exit: 1094 /* 1095 * Do the routine cleanup: 1096 * - free allocated spawn data array 1097 * - free the device list 1098 */ 1099 if (list) 1100 mlx5_free(list); 1101 MLX5_ASSERT(orig_devx_bdf_devs); 1102 mlx5_glue->free_device_list(orig_devx_bdf_devs); 1103 return ret; 1104 } 1105 1106 /** 1107 * Set the reg_mr and dereg_mr call backs 1108 * 1109 * @param reg_mr_cb[out] 1110 * Pointer to reg_mr func 1111 * @param dereg_mr_cb[out] 1112 * Pointer to dereg_mr func 1113 * 1114 */ 1115 void 1116 mlx5_os_set_reg_mr_cb(mlx5_reg_mr_t *reg_mr_cb, 1117 mlx5_dereg_mr_t *dereg_mr_cb) 1118 { 1119 *reg_mr_cb = mlx5_os_reg_mr; 1120 *dereg_mr_cb = mlx5_os_dereg_mr; 1121 } 1122 1123 /** 1124 * Extract pdn of PD object using DevX 1125 * 1126 * @param[in] pd 1127 * Pointer to the DevX PD object. 1128 * @param[out] pdn 1129 * Pointer to the PD object number variable. 1130 * 1131 * @return 1132 * 0 on success, error value otherwise. 1133 */ 1134 int 1135 mlx5_os_get_pdn(void *pd, uint32_t *pdn) 1136 { 1137 if (!pd) 1138 return -EINVAL; 1139 1140 *pdn = ((struct mlx5_pd *)pd)->pdn; 1141 return 0; 1142 } 1143 1144 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {0}; 1145