1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 5 #include <errno.h> 6 #include <stdalign.h> 7 #include <stddef.h> 8 #include <stdint.h> 9 #include <stdlib.h> 10 11 #include <rte_windows.h> 12 #include <rte_ethdev_pci.h> 13 14 #include <mlx5_glue.h> 15 #include <mlx5_devx_cmds.h> 16 #include <mlx5_common.h> 17 #include <mlx5_common_mp.h> 18 #include <mlx5_common_mr.h> 19 #include <mlx5_malloc.h> 20 21 #include "mlx5_defs.h" 22 #include "mlx5.h" 23 #include "mlx5_common_os.h" 24 #include "mlx5_utils.h" 25 #include "mlx5_rxtx.h" 26 #include "mlx5_autoconf.h" 27 #include "mlx5_mr.h" 28 #include "mlx5_flow.h" 29 #include "mlx5_devx.h" 30 31 #define MLX5_TAGS_HLIST_ARRAY_SIZE 8192 32 33 static const char *MZ_MLX5_PMD_SHARED_DATA = "mlx5_pmd_shared_data"; 34 35 /* Spinlock for mlx5_shared_data allocation. */ 36 static rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER; 37 38 /** 39 * Initialize shared data between primary and secondary process. 40 * 41 * A memzone is reserved by primary process and secondary processes attach to 42 * the memzone. 43 * 44 * @return 45 * 0 on success, a negative errno value otherwise and rte_errno is set. 46 */ 47 static int 48 mlx5_init_shared_data(void) 49 { 50 const struct rte_memzone *mz; 51 int ret = 0; 52 53 rte_spinlock_lock(&mlx5_shared_data_lock); 54 if (mlx5_shared_data == NULL) { 55 /* Allocate shared memory. */ 56 mz = rte_memzone_reserve(MZ_MLX5_PMD_SHARED_DATA, 57 sizeof(*mlx5_shared_data), 58 SOCKET_ID_ANY, 0); 59 if (mz == NULL) { 60 DRV_LOG(ERR, 61 "Cannot allocate mlx5 shared data"); 62 ret = -rte_errno; 63 goto error; 64 } 65 mlx5_shared_data = mz->addr; 66 memset(mlx5_shared_data, 0, sizeof(*mlx5_shared_data)); 67 rte_spinlock_init(&mlx5_shared_data->lock); 68 } 69 error: 70 rte_spinlock_unlock(&mlx5_shared_data_lock); 71 return ret; 72 } 73 74 /** 75 * PMD global initialization. 76 * 77 * Independent from individual device, this function initializes global 78 * per-PMD data structures distinguishing primary and secondary processes. 79 * Hence, each initialization is called once per a process. 80 * 81 * @return 82 * 0 on success, a negative errno value otherwise and rte_errno is set. 83 */ 84 static int 85 mlx5_init_once(void) 86 { 87 if (mlx5_init_shared_data()) 88 return -rte_errno; 89 return 0; 90 } 91 92 /** 93 * Get mlx5 device attributes. 94 * 95 * @param ctx 96 * Pointer to device context. 97 * 98 * @param device_attr 99 * Pointer to mlx5 device attributes. 100 * 101 * @return 102 * 0 on success, non zero error number otherwise 103 */ 104 int 105 mlx5_os_get_dev_attr(void *ctx, struct mlx5_dev_attr *device_attr) 106 { 107 struct mlx5_context *mlx5_ctx; 108 struct mlx5_hca_attr hca_attr; 109 void *pv_iseg = NULL; 110 u32 cb_iseg = 0; 111 int err = 0; 112 113 if (!ctx) 114 return -EINVAL; 115 mlx5_ctx = (struct mlx5_context *)ctx; 116 memset(device_attr, 0, sizeof(*device_attr)); 117 err = mlx5_devx_cmd_query_hca_attr(mlx5_ctx, &hca_attr); 118 if (err) { 119 DRV_LOG(ERR, "Failed to get device hca_cap"); 120 return err; 121 } 122 device_attr->max_cq = 1 << hca_attr.log_max_cq; 123 device_attr->max_qp = 1 << hca_attr.log_max_qp; 124 device_attr->max_qp_wr = 1 << hca_attr.log_max_qp_sz; 125 device_attr->max_cqe = 1 << hca_attr.log_max_cq_sz; 126 device_attr->max_mr = 1 << hca_attr.log_max_mrw_sz; 127 device_attr->max_pd = 1 << hca_attr.log_max_pd; 128 device_attr->max_srq = 1 << hca_attr.log_max_srq; 129 device_attr->max_srq_wr = 1 << hca_attr.log_max_srq_sz; 130 if (hca_attr.rss_ind_tbl_cap) { 131 device_attr->max_rwq_indirection_table_size = 132 1 << hca_attr.rss_ind_tbl_cap; 133 } 134 pv_iseg = mlx5_glue->query_hca_iseg(mlx5_ctx, &cb_iseg); 135 if (pv_iseg == NULL) { 136 DRV_LOG(ERR, "Failed to get device hca_iseg"); 137 return errno; 138 } 139 if (!err) { 140 snprintf(device_attr->fw_ver, 64, "%x.%x.%04x", 141 MLX5_GET(initial_seg, pv_iseg, fw_rev_major), 142 MLX5_GET(initial_seg, pv_iseg, fw_rev_minor), 143 MLX5_GET(initial_seg, pv_iseg, fw_rev_subminor)); 144 } 145 return err; 146 } 147 148 /** 149 * Initialize DR related data within private structure. 150 * Routine checks the reference counter and does actual 151 * resources creation/initialization only if counter is zero. 152 * 153 * @param[in] priv 154 * Pointer to the private device data structure. 155 * 156 * @return 157 * Zero on success, positive error code otherwise. 158 */ 159 static int 160 mlx5_alloc_shared_dr(struct mlx5_priv *priv) 161 { 162 struct mlx5_dev_ctx_shared *sh = priv->sh; 163 int err = 0; 164 165 if (!sh->flow_tbls) 166 err = mlx5_alloc_table_hash_list(priv); 167 else 168 DRV_LOG(DEBUG, "sh->flow_tbls[%p] already created, reuse\n", 169 (void *)sh->flow_tbls); 170 return err; 171 } 172 /** 173 * Destroy DR related data within private structure. 174 * 175 * @param[in] priv 176 * Pointer to the private device data structure. 177 */ 178 void 179 mlx5_os_free_shared_dr(struct mlx5_priv *priv) 180 { 181 mlx5_free_table_hash_list(priv); 182 } 183 184 /** 185 * Set the completion channel file descriptor interrupt as non-blocking. 186 * Currently it has no support under Windows. 187 * 188 * @param[in] rxq_obj 189 * Pointer to RQ channel object, which includes the channel fd 190 * 191 * @param[out] fd 192 * The file descriptor (representing the intetrrupt) used in this channel. 193 * 194 * @return 195 * 0 on successfully setting the fd to non-blocking, non-zero otherwise. 196 */ 197 int 198 mlx5_os_set_nonblock_channel_fd(int fd) 199 { 200 (void)fd; 201 DRV_LOG(WARNING, "%s: is not supported", __func__); 202 return -ENOTSUP; 203 } 204 205 /** 206 * Function API open device under Windows 207 * 208 * This function calls the Windows glue APIs to open a device. 209 * 210 * @param[in] spawn 211 * Pointer to the device attributes (name, port, etc). 212 * @param[out] config 213 * Pointer to device configuration structure. 214 * @param[out] sh 215 * Pointer to shared context structure. 216 * 217 * @return 218 * 0 on success, a positive error value otherwise. 219 */ 220 int 221 mlx5_os_open_device(const struct mlx5_dev_spawn_data *spawn, 222 const struct mlx5_dev_config *config, 223 struct mlx5_dev_ctx_shared *sh) 224 { 225 RTE_SET_USED(config); 226 int err = 0; 227 struct mlx5_context *mlx5_ctx; 228 229 pthread_mutex_init(&sh->txpp.mutex, NULL); 230 /* Set numa node from pci probe */ 231 sh->numa_node = spawn->pci_dev->device.numa_node; 232 233 /* Try to open device with DevX */ 234 rte_errno = 0; 235 sh->ctx = mlx5_glue->open_device(spawn->phys_dev); 236 if (!sh->ctx) { 237 DRV_LOG(ERR, "open_device failed"); 238 err = errno; 239 return err; 240 } 241 sh->devx = 1; 242 mlx5_ctx = (struct mlx5_context *)sh->ctx; 243 err = mlx5_glue->query_device(spawn->phys_dev, &mlx5_ctx->mlx5_dev); 244 if (err) 245 DRV_LOG(ERR, "Failed to query device context fields."); 246 return err; 247 } 248 249 /** 250 * DV flow counter mode detect and config. 251 * 252 * @param dev 253 * Pointer to rte_eth_dev structure. 254 * 255 */ 256 static void 257 mlx5_flow_counter_mode_config(struct rte_eth_dev *dev __rte_unused) 258 { 259 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 260 struct mlx5_priv *priv = dev->data->dev_private; 261 struct mlx5_dev_ctx_shared *sh = priv->sh; 262 bool fallback; 263 264 #ifndef HAVE_IBV_DEVX_ASYNC 265 fallback = true; 266 #else 267 fallback = false; 268 if (!priv->config.devx || !priv->config.dv_flow_en || 269 !priv->config.hca_attr.flow_counters_dump || 270 !(priv->config.hca_attr.flow_counter_bulk_alloc_bitmap & 0x4) || 271 (mlx5_flow_dv_discover_counter_offset_support(dev) == -ENOTSUP)) 272 fallback = true; 273 #endif 274 if (fallback) 275 DRV_LOG(INFO, "Use fall-back DV counter management. Flow " 276 "counter dump:%d, bulk_alloc_bitmap:0x%hhx.", 277 priv->config.hca_attr.flow_counters_dump, 278 priv->config.hca_attr.flow_counter_bulk_alloc_bitmap); 279 /* Initialize fallback mode only on the port initializes sh. */ 280 if (sh->refcnt == 1) 281 sh->cmng.counter_fallback = fallback; 282 else if (fallback != sh->cmng.counter_fallback) 283 DRV_LOG(WARNING, "Port %d in sh has different fallback mode " 284 "with others:%d.", PORT_ID(priv), fallback); 285 #endif 286 } 287 288 /** 289 * Spawn an Ethernet device from Verbs information. 290 * 291 * @param dpdk_dev 292 * Backing DPDK device. 293 * @param spawn 294 * Verbs device parameters (name, port, switch_info) to spawn. 295 * @param config 296 * Device configuration parameters. 297 * 298 * @return 299 * A valid Ethernet device object on success, NULL otherwise and rte_errno 300 * is set. The following errors are defined: 301 * 302 * EEXIST: device is already spawned 303 */ 304 static struct rte_eth_dev * 305 mlx5_dev_spawn(struct rte_device *dpdk_dev, 306 struct mlx5_dev_spawn_data *spawn, 307 struct mlx5_dev_config *config) 308 { 309 const struct mlx5_switch_info *switch_info = &spawn->info; 310 struct mlx5_dev_ctx_shared *sh = NULL; 311 struct mlx5_dev_attr device_attr; 312 struct rte_eth_dev *eth_dev = NULL; 313 struct mlx5_priv *priv = NULL; 314 int err = 0; 315 unsigned int cqe_comp; 316 unsigned int cqe_pad = 0; 317 struct rte_ether_addr mac; 318 char name[RTE_ETH_NAME_MAX_LEN]; 319 int own_domain_id = 0; 320 uint16_t port_id; 321 322 /* Build device name. */ 323 strlcpy(name, dpdk_dev->name, sizeof(name)); 324 /* check if the device is already spawned */ 325 if (rte_eth_dev_get_port_by_name(name, &port_id) == 0) { 326 rte_errno = EEXIST; 327 return NULL; 328 } 329 DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name); 330 /* 331 * Some parameters are needed in advance to create device context. We 332 * process the devargs here to get ones, and later process devargs 333 * again to override some hardware settings. 334 */ 335 err = mlx5_args(config, dpdk_dev->devargs); 336 if (err) { 337 err = rte_errno; 338 DRV_LOG(ERR, "failed to process device arguments: %s", 339 strerror(rte_errno)); 340 goto error; 341 } 342 mlx5_malloc_mem_select(config->sys_mem_en); 343 sh = mlx5_alloc_shared_dev_ctx(spawn, config); 344 if (!sh) 345 return NULL; 346 config->devx = sh->devx; 347 /* Initialize the shutdown event in mlx5_dev_spawn to 348 * support mlx5_is_removed for Windows. 349 */ 350 err = mlx5_glue->devx_init_showdown_event(sh->ctx); 351 if (err) { 352 DRV_LOG(ERR, "failed to init showdown event: %s", 353 strerror(errno)); 354 goto error; 355 } 356 DRV_LOG(DEBUG, "MPW isn't supported"); 357 mlx5_os_get_dev_attr(sh->ctx, &device_attr); 358 config->swp = 0; 359 config->ind_table_max_size = 360 sh->device_attr.max_rwq_indirection_table_size; 361 if (RTE_CACHE_LINE_SIZE == 128 && 362 !(device_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP)) 363 cqe_comp = 0; 364 else 365 cqe_comp = 1; 366 config->cqe_comp = cqe_comp; 367 DRV_LOG(DEBUG, "tunnel offloading is not supported"); 368 config->tunnel_en = 0; 369 DRV_LOG(DEBUG, "MPLS over GRE/UDP tunnel offloading is no supported"); 370 config->mpls_en = 0; 371 /* Allocate private eth device data. */ 372 priv = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE, 373 sizeof(*priv), 374 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 375 if (priv == NULL) { 376 DRV_LOG(ERR, "priv allocation failure"); 377 err = ENOMEM; 378 goto error; 379 } 380 priv->sh = sh; 381 priv->dev_port = spawn->phys_port; 382 priv->pci_dev = spawn->pci_dev; 383 priv->mtu = RTE_ETHER_MTU; 384 priv->mp_id.port_id = port_id; 385 strlcpy(priv->mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN); 386 priv->representor = !!switch_info->representor; 387 priv->master = !!switch_info->master; 388 priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; 389 priv->vport_meta_tag = 0; 390 priv->vport_meta_mask = 0; 391 priv->pf_bond = spawn->pf_bond; 392 priv->vport_id = -1; 393 /* representor_id field keeps the unmodified VF index. */ 394 priv->representor_id = -1; 395 /* 396 * Look for sibling devices in order to reuse their switch domain 397 * if any, otherwise allocate one. 398 */ 399 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 400 const struct mlx5_priv *opriv = 401 rte_eth_devices[port_id].data->dev_private; 402 403 if (!opriv || 404 opriv->sh != priv->sh || 405 opriv->domain_id == 406 RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) 407 continue; 408 priv->domain_id = opriv->domain_id; 409 break; 410 } 411 if (priv->domain_id == RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) { 412 err = rte_eth_switch_domain_alloc(&priv->domain_id); 413 if (err) { 414 err = rte_errno; 415 DRV_LOG(ERR, "unable to allocate switch domain: %s", 416 strerror(rte_errno)); 417 goto error; 418 } 419 own_domain_id = 1; 420 } 421 /* Override some values set by hardware configuration. */ 422 mlx5_args(config, dpdk_dev->devargs); 423 err = mlx5_dev_check_sibling_config(priv, config); 424 if (err) 425 goto error; 426 config->hw_csum = !!(sh->device_attr.device_cap_flags_ex & 427 IBV_DEVICE_RAW_IP_CSUM); 428 DRV_LOG(DEBUG, "checksum offloading is %ssupported", 429 (config->hw_csum ? "" : "not ")); 430 DRV_LOG(DEBUG, "counters are not supported"); 431 config->ind_table_max_size = 432 sh->device_attr.max_rwq_indirection_table_size; 433 /* 434 * Remove this check once DPDK supports larger/variable 435 * indirection tables. 436 */ 437 if (config->ind_table_max_size > (unsigned int)ETH_RSS_RETA_SIZE_512) 438 config->ind_table_max_size = ETH_RSS_RETA_SIZE_512; 439 DRV_LOG(DEBUG, "maximum Rx indirection table size is %u", 440 config->ind_table_max_size); 441 config->hw_vlan_strip = !!(sh->device_attr.raw_packet_caps & 442 IBV_RAW_PACKET_CAP_CVLAN_STRIPPING); 443 DRV_LOG(DEBUG, "VLAN stripping is %ssupported", 444 (config->hw_vlan_strip ? "" : "not ")); 445 config->hw_fcs_strip = !!(sh->device_attr.raw_packet_caps & 446 IBV_RAW_PACKET_CAP_SCATTER_FCS); 447 if (config->hw_padding) { 448 DRV_LOG(DEBUG, "Rx end alignment padding isn't supported"); 449 config->hw_padding = 0; 450 } 451 config->tso = (sh->device_attr.max_tso > 0 && 452 (sh->device_attr.tso_supported_qpts & 453 (1 << IBV_QPT_RAW_PACKET))); 454 if (config->tso) 455 config->tso_max_payload_sz = sh->device_attr.max_tso; 456 DRV_LOG(DEBUG, "%sMPS is %s.", 457 config->mps == MLX5_MPW_ENHANCED ? "enhanced " : 458 config->mps == MLX5_MPW ? "legacy " : "", 459 config->mps != MLX5_MPW_DISABLED ? "enabled" : "disabled"); 460 if (config->cqe_comp && !cqe_comp) { 461 DRV_LOG(WARNING, "Rx CQE compression isn't supported."); 462 config->cqe_comp = 0; 463 } 464 if (config->cqe_pad && !cqe_pad) { 465 DRV_LOG(WARNING, "Rx CQE padding isn't supported."); 466 config->cqe_pad = 0; 467 } else if (config->cqe_pad) { 468 DRV_LOG(INFO, "Rx CQE padding is enabled."); 469 } 470 if (config->devx) { 471 err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config->hca_attr); 472 if (err) { 473 err = -err; 474 goto error; 475 } 476 /* Check relax ordering support. */ 477 sh->cmng.relaxed_ordering_read = 0; 478 sh->cmng.relaxed_ordering_write = 0; 479 if (!haswell_broadwell_cpu) { 480 sh->cmng.relaxed_ordering_write = 481 config->hca_attr.relaxed_ordering_write; 482 sh->cmng.relaxed_ordering_read = 483 config->hca_attr.relaxed_ordering_read; 484 } 485 } 486 if (config->devx) { 487 uint32_t reg[MLX5_ST_SZ_DW(register_mtutc)]; 488 489 err = config->hca_attr.access_register_user ? 490 mlx5_devx_cmd_register_read 491 (sh->ctx, MLX5_REGISTER_ID_MTUTC, 0, 492 reg, MLX5_ST_SZ_DW(register_mtutc)) : ENOTSUP; 493 if (!err) { 494 uint32_t ts_mode; 495 496 /* MTUTC register is read successfully. */ 497 ts_mode = MLX5_GET(register_mtutc, reg, 498 time_stamp_mode); 499 if (ts_mode == MLX5_MTUTC_TIMESTAMP_MODE_REAL_TIME) 500 config->rt_timestamp = 1; 501 } else { 502 /* Kernel does not support register reading. */ 503 if (config->hca_attr.dev_freq_khz == 504 (NS_PER_S / MS_PER_S)) 505 config->rt_timestamp = 1; 506 } 507 } 508 if (config->mprq.enabled) { 509 DRV_LOG(WARNING, "Multi-Packet RQ isn't supported"); 510 config->mprq.enabled = 0; 511 } 512 if (config->max_dump_files_num == 0) 513 config->max_dump_files_num = 128; 514 eth_dev = rte_eth_dev_allocate(name); 515 if (eth_dev == NULL) { 516 DRV_LOG(ERR, "can not allocate rte ethdev"); 517 err = ENOMEM; 518 goto error; 519 } 520 if (priv->representor) { 521 eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR; 522 eth_dev->data->representor_id = priv->representor_id; 523 } 524 /* 525 * Store associated network device interface index. This index 526 * is permanent throughout the lifetime of device. So, we may store 527 * the ifindex here and use the cached value further. 528 */ 529 MLX5_ASSERT(spawn->ifindex); 530 priv->if_index = spawn->ifindex; 531 eth_dev->data->dev_private = priv; 532 priv->dev_data = eth_dev->data; 533 eth_dev->data->mac_addrs = priv->mac; 534 eth_dev->device = dpdk_dev; 535 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 536 /* Configure the first MAC address by default. */ 537 if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) { 538 DRV_LOG(ERR, 539 "port %u cannot get MAC address, is mlx5_en" 540 " loaded? (errno: %s).", 541 eth_dev->data->port_id, strerror(rte_errno)); 542 err = ENODEV; 543 goto error; 544 } 545 DRV_LOG(INFO, 546 "port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", 547 eth_dev->data->port_id, 548 mac.addr_bytes[0], mac.addr_bytes[1], 549 mac.addr_bytes[2], mac.addr_bytes[3], 550 mac.addr_bytes[4], mac.addr_bytes[5]); 551 #ifdef RTE_LIBRTE_MLX5_DEBUG 552 { 553 char ifname[MLX5_NAMESIZE]; 554 555 if (mlx5_get_ifname(eth_dev, &ifname) == 0) 556 DRV_LOG(DEBUG, "port %u ifname is \"%s\"", 557 eth_dev->data->port_id, ifname); 558 else 559 DRV_LOG(DEBUG, "port %u ifname is unknown.", 560 eth_dev->data->port_id); 561 } 562 #endif 563 /* Get actual MTU if possible. */ 564 err = mlx5_get_mtu(eth_dev, &priv->mtu); 565 if (err) { 566 err = rte_errno; 567 goto error; 568 } 569 DRV_LOG(DEBUG, "port %u MTU is %u.", eth_dev->data->port_id, 570 priv->mtu); 571 /* Initialize burst functions to prevent crashes before link-up. */ 572 eth_dev->rx_pkt_burst = removed_rx_burst; 573 eth_dev->tx_pkt_burst = removed_tx_burst; 574 eth_dev->dev_ops = &mlx5_dev_ops; 575 eth_dev->rx_descriptor_status = mlx5_rx_descriptor_status; 576 eth_dev->tx_descriptor_status = mlx5_tx_descriptor_status; 577 eth_dev->rx_queue_count = mlx5_rx_queue_count; 578 /* Register MAC address. */ 579 claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0)); 580 priv->flows = 0; 581 priv->ctrl_flows = 0; 582 TAILQ_INIT(&priv->flow_meters); 583 TAILQ_INIT(&priv->flow_meter_profiles); 584 /* Bring Ethernet device up. */ 585 DRV_LOG(DEBUG, "port %u forcing Ethernet interface up.", 586 eth_dev->data->port_id); 587 /* nl calls are unsupported - set to -1 not to fail on release */ 588 priv->nl_socket_rdma = -1; 589 priv->nl_socket_route = -1; 590 mlx5_set_link_up(eth_dev); 591 /* 592 * Even though the interrupt handler is not installed yet, 593 * interrupts will still trigger on the async_fd from 594 * Verbs context returned by ibv_open_device(). 595 */ 596 mlx5_link_update(eth_dev, 0); 597 config->dv_esw_en = 0; 598 /* Detect minimal data bytes to inline. */ 599 mlx5_set_min_inline(spawn, config); 600 /* Store device configuration on private structure. */ 601 priv->config = *config; 602 /* Create context for virtual machine VLAN workaround. */ 603 priv->vmwa_context = NULL; 604 if (config->dv_flow_en) { 605 err = mlx5_alloc_shared_dr(priv); 606 if (err) 607 goto error; 608 } 609 /* No supported flow priority number detection. */ 610 priv->config.flow_prio = -1; 611 if (!priv->config.dv_esw_en && 612 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { 613 DRV_LOG(WARNING, "metadata mode %u is not supported " 614 "(no E-Switch)", priv->config.dv_xmeta_en); 615 priv->config.dv_xmeta_en = MLX5_XMETA_MODE_LEGACY; 616 } 617 mlx5_set_metadata_mask(eth_dev); 618 if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 619 !priv->sh->dv_regc0_mask) { 620 DRV_LOG(ERR, "metadata mode %u is not supported " 621 "(no metadata reg_c[0] is available).", 622 priv->config.dv_xmeta_en); 623 err = ENOTSUP; 624 goto error; 625 } 626 mlx5_cache_list_init(&priv->hrxqs, "hrxq", 0, eth_dev, 627 mlx5_hrxq_create_cb, 628 mlx5_hrxq_match_cb, 629 mlx5_hrxq_remove_cb); 630 /* Query availability of metadata reg_c's. */ 631 err = mlx5_flow_discover_mreg_c(eth_dev); 632 if (err < 0) { 633 err = -err; 634 goto error; 635 } 636 if (!mlx5_flow_ext_mreg_supported(eth_dev)) { 637 DRV_LOG(DEBUG, 638 "port %u extensive metadata register is not supported.", 639 eth_dev->data->port_id); 640 if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { 641 DRV_LOG(ERR, "metadata mode %u is not supported " 642 "(no metadata registers available).", 643 priv->config.dv_xmeta_en); 644 err = ENOTSUP; 645 goto error; 646 } 647 } 648 if (config->devx && config->dv_flow_en) { 649 priv->obj_ops = devx_obj_ops; 650 } else { 651 DRV_LOG(ERR, "Flow mode %u is not supported " 652 "(Windows flow must be DevX with DV flow enabled).", 653 priv->config.dv_flow_en); 654 err = ENOTSUP; 655 goto error; 656 } 657 mlx5_flow_counter_mode_config(eth_dev); 658 return eth_dev; 659 error: 660 if (priv) { 661 if (own_domain_id) 662 claim_zero(rte_eth_switch_domain_free(priv->domain_id)); 663 mlx5_free(priv); 664 if (eth_dev != NULL) 665 eth_dev->data->dev_private = NULL; 666 } 667 if (eth_dev != NULL) { 668 /* mac_addrs must not be freed alone because part of 669 * dev_private 670 **/ 671 eth_dev->data->mac_addrs = NULL; 672 rte_eth_dev_release_port(eth_dev); 673 } 674 if (sh) 675 mlx5_free_shared_dev_ctx(sh); 676 MLX5_ASSERT(err > 0); 677 rte_errno = err; 678 return NULL; 679 } 680 681 /** 682 * This function should share events between multiple ports of single IB 683 * device. Currently it has no support under Windows. 684 * 685 * @param sh 686 * Pointer to mlx5_dev_ctx_shared object. 687 */ 688 void 689 mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh) 690 { 691 (void)sh; 692 DRV_LOG(WARNING, "%s: is not supported", __func__); 693 } 694 695 /** 696 * This function should share events between multiple ports of single IB 697 * device. Currently it has no support under Windows. 698 * 699 * @param dev 700 * Pointer to mlx5_dev_ctx_shared object. 701 */ 702 void 703 mlx5_os_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh) 704 { 705 (void)sh; 706 DRV_LOG(WARNING, "%s: is not supported", __func__); 707 } 708 709 /** 710 * Read statistics by a named counter. 711 * 712 * @param[in] priv 713 * Pointer to the private device data structure. 714 * @param[in] ctr_name 715 * Pointer to the name of the statistic counter to read 716 * @param[out] stat 717 * Pointer to read statistic value. 718 * @return 719 * 0 on success and stat is valud, 1 if failed to read the value 720 * rte_errno is set. 721 * 722 */ 723 int 724 mlx5_os_read_dev_stat(struct mlx5_priv *priv, const char *ctr_name, 725 uint64_t *stat) 726 { 727 RTE_SET_USED(priv); 728 RTE_SET_USED(ctr_name); 729 RTE_SET_USED(stat); 730 DRV_LOG(WARNING, "%s: is not supported", __func__); 731 return -ENOTSUP; 732 } 733 734 /** 735 * Flush device MAC addresses 736 * Currently it has no support under Windows. 737 * 738 * @param dev 739 * Pointer to Ethernet device structure. 740 * 741 */ 742 void 743 mlx5_os_mac_addr_flush(struct rte_eth_dev *dev) 744 { 745 (void)dev; 746 DRV_LOG(WARNING, "%s: is not supported", __func__); 747 } 748 749 /** 750 * Remove a MAC address from device 751 * Currently it has no support under Windows. 752 * 753 * @param dev 754 * Pointer to Ethernet device structure. 755 * @param index 756 * MAC address index. 757 */ 758 void 759 mlx5_os_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index) 760 { 761 (void)dev; 762 (void)(index); 763 DRV_LOG(WARNING, "%s: is not supported", __func__); 764 } 765 766 /** 767 * Adds a MAC address to the device 768 * Currently it has no support under Windows. 769 * 770 * @param dev 771 * Pointer to Ethernet device structure. 772 * @param mac_addr 773 * MAC address to register. 774 * @param index 775 * MAC address index. 776 * 777 * @return 778 * 0 on success, a negative errno value otherwise 779 */ 780 int 781 mlx5_os_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac, 782 uint32_t index) 783 { 784 (void)index; 785 struct rte_ether_addr lmac; 786 787 if (mlx5_get_mac(dev, &lmac.addr_bytes)) { 788 DRV_LOG(ERR, 789 "port %u cannot get MAC address, is mlx5_en" 790 " loaded? (errno: %s)", 791 dev->data->port_id, strerror(rte_errno)); 792 return rte_errno; 793 } 794 if (!rte_is_same_ether_addr(&lmac, mac)) { 795 DRV_LOG(ERR, 796 "adding new mac address to device is unsupported"); 797 return -ENOTSUP; 798 } 799 return 0; 800 } 801 802 /** 803 * Modify a VF MAC address 804 * Currently it has no support under Windows. 805 * 806 * @param priv 807 * Pointer to device private data. 808 * @param mac_addr 809 * MAC address to modify into. 810 * @param iface_idx 811 * Net device interface index 812 * @param vf_index 813 * VF index 814 * 815 * @return 816 * 0 on success, a negative errno value otherwise 817 */ 818 int 819 mlx5_os_vf_mac_addr_modify(struct mlx5_priv *priv, 820 unsigned int iface_idx, 821 struct rte_ether_addr *mac_addr, 822 int vf_index) 823 { 824 (void)priv; 825 (void)iface_idx; 826 (void)mac_addr; 827 (void)vf_index; 828 DRV_LOG(WARNING, "%s: is not supported", __func__); 829 return -ENOTSUP; 830 } 831 832 /** 833 * Set device promiscuous mode 834 * Currently it has no support under Windows. 835 * 836 * @param dev 837 * Pointer to Ethernet device structure. 838 * @param enable 839 * 0 - promiscuous is disabled, otherwise - enabled 840 * 841 * @return 842 * 0 on success, a negative error value otherwise 843 */ 844 int 845 mlx5_os_set_promisc(struct rte_eth_dev *dev, int enable) 846 { 847 (void)dev; 848 (void)enable; 849 DRV_LOG(WARNING, "%s: is not supported", __func__); 850 return -ENOTSUP; 851 } 852 853 /** 854 * Set device allmulti mode 855 * 856 * @param dev 857 * Pointer to Ethernet device structure. 858 * @param enable 859 * 0 - all multicase is disabled, otherwise - enabled 860 * 861 * @return 862 * 0 on success, a negative error value otherwise 863 */ 864 int 865 mlx5_os_set_allmulti(struct rte_eth_dev *dev, int enable) 866 { 867 (void)dev; 868 (void)enable; 869 DRV_LOG(WARNING, "%s: is not supported", __func__); 870 return -ENOTSUP; 871 } 872 873 /** 874 * Detect if a devx_device_bdf object has identical DBDF values to the 875 * rte_pci_addr found in bus/pci probing 876 * 877 * @param[in] devx_bdf 878 * Pointer to the devx_device_bdf structure. 879 * @param[in] addr 880 * Pointer to the rte_pci_addr structure. 881 * 882 * @return 883 * 1 on Device match, 0 on mismatch. 884 */ 885 static int 886 mlx5_match_devx_bdf_to_addr(struct devx_device_bdf *devx_bdf, 887 struct rte_pci_addr *addr) 888 { 889 if (addr->domain != (devx_bdf->bus_id >> 8) || 890 addr->bus != (devx_bdf->bus_id & 0xff) || 891 addr->devid != devx_bdf->dev_id || 892 addr->function != devx_bdf->fnc_id) { 893 return 0; 894 } 895 return 1; 896 } 897 898 /** 899 * Detect if a devx_device_bdf object matches the rte_pci_addr 900 * found in bus/pci probing 901 * Compare both the Native/PF BDF and the raw_bdf representing a VF BDF. 902 * 903 * @param[in] devx_bdf 904 * Pointer to the devx_device_bdf structure. 905 * @param[in] addr 906 * Pointer to the rte_pci_addr structure. 907 * 908 * @return 909 * 1 on Device match, 0 on mismatch, rte_errno code on failure. 910 */ 911 static int 912 mlx5_match_devx_devices_to_addr(struct devx_device_bdf *devx_bdf, 913 struct rte_pci_addr *addr) 914 { 915 int err; 916 struct devx_device mlx5_dev; 917 918 if (mlx5_match_devx_bdf_to_addr(devx_bdf, addr)) 919 return 1; 920 /** 921 * Didn't match on Native/PF BDF, could still 922 * Match a VF BDF, check it next 923 */ 924 err = mlx5_glue->query_device(devx_bdf, &mlx5_dev); 925 if (err) { 926 DRV_LOG(ERR, "query_device failed"); 927 rte_errno = err; 928 return rte_errno; 929 } 930 if (mlx5_match_devx_bdf_to_addr(&mlx5_dev.raw_bdf, addr)) 931 return 1; 932 return 0; 933 } 934 935 /** 936 * DPDK callback to register a PCI device. 937 * 938 * This function spawns Ethernet devices out of a given PCI device. 939 * 940 * @param[in] pci_drv 941 * PCI driver structure (mlx5_driver). 942 * @param[in] pci_dev 943 * PCI device information. 944 * 945 * @return 946 * 0 on success, a negative errno value otherwise and rte_errno is set. 947 */ 948 int 949 mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 950 struct rte_pci_device *pci_dev) 951 { 952 struct devx_device_bdf *devx_bdf_devs, *orig_devx_bdf_devs; 953 /* 954 * Number of found IB Devices matching with requested PCI BDF. 955 * nd != 1 means there are multiple IB devices over the same 956 * PCI device and we have representors and master. 957 */ 958 unsigned int nd = 0; 959 /* 960 * Number of found IB device Ports. nd = 1 and np = 1..n means 961 * we have the single multiport IB device, and there may be 962 * representors attached to some of found ports. 963 * Currently not supported. 964 * unsigned int np = 0; 965 */ 966 967 /* 968 * Number of DPDK ethernet devices to Spawn - either over 969 * multiple IB devices or multiple ports of single IB device. 970 * Actually this is the number of iterations to spawn. 971 */ 972 unsigned int ns = 0; 973 /* 974 * Bonding device 975 * < 0 - no bonding device (single one) 976 * >= 0 - bonding device (value is slave PF index) 977 */ 978 int bd = -1; 979 struct mlx5_dev_spawn_data *list = NULL; 980 struct mlx5_dev_config dev_config; 981 unsigned int dev_config_vf; 982 int ret, err; 983 uint32_t restore; 984 985 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 986 DRV_LOG(ERR, "Secondary process is not supported on Windows."); 987 return -ENOTSUP; 988 } 989 ret = mlx5_init_once(); 990 if (ret) { 991 DRV_LOG(ERR, "unable to init PMD global data: %s", 992 strerror(rte_errno)); 993 return -rte_errno; 994 } 995 errno = 0; 996 devx_bdf_devs = mlx5_glue->get_device_list(&ret); 997 orig_devx_bdf_devs = devx_bdf_devs; 998 if (!devx_bdf_devs) { 999 rte_errno = errno ? errno : ENOSYS; 1000 DRV_LOG(ERR, "cannot list devices, is ib_uverbs loaded?"); 1001 return -rte_errno; 1002 } 1003 /* 1004 * First scan the list of all Infiniband devices to find 1005 * matching ones, gathering into the list. 1006 */ 1007 struct devx_device_bdf *devx_bdf_match[ret + 1]; 1008 1009 while (ret-- > 0) { 1010 err = mlx5_match_devx_devices_to_addr(devx_bdf_devs, 1011 &pci_dev->addr); 1012 if (!err) { 1013 devx_bdf_devs++; 1014 continue; 1015 } 1016 if (err != 1) { 1017 ret = -err; 1018 goto exit; 1019 } 1020 devx_bdf_match[nd++] = devx_bdf_devs; 1021 } 1022 devx_bdf_match[nd] = NULL; 1023 if (!nd) { 1024 /* No device matches, just complain and bail out. */ 1025 DRV_LOG(WARNING, 1026 "no DevX device matches PCI device " PCI_PRI_FMT "," 1027 " is DevX Configured?", 1028 pci_dev->addr.domain, pci_dev->addr.bus, 1029 pci_dev->addr.devid, pci_dev->addr.function); 1030 rte_errno = ENOENT; 1031 ret = -rte_errno; 1032 goto exit; 1033 } 1034 /* 1035 * Now we can determine the maximal 1036 * amount of devices to be spawned. 1037 */ 1038 list = mlx5_malloc(MLX5_MEM_ZERO, 1039 sizeof(struct mlx5_dev_spawn_data), 1040 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 1041 if (!list) { 1042 DRV_LOG(ERR, "spawn data array allocation failure"); 1043 rte_errno = ENOMEM; 1044 ret = -rte_errno; 1045 goto exit; 1046 } 1047 memset(&list[ns].info, 0, sizeof(list[ns].info)); 1048 list[ns].max_port = 1; 1049 list[ns].phys_port = 1; 1050 list[ns].phys_dev = devx_bdf_match[ns]; 1051 list[ns].eth_dev = NULL; 1052 list[ns].pci_dev = pci_dev; 1053 list[ns].pf_bond = bd; 1054 list[ns].ifindex = -1; /* Spawn will assign */ 1055 list[ns].info = 1056 (struct mlx5_switch_info){ 1057 .master = 0, 1058 .representor = 0, 1059 .name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK, 1060 .port_name = 0, 1061 .switch_id = 0, 1062 }; 1063 /* Device specific configuration. */ 1064 switch (pci_dev->id.device_id) { 1065 case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF: 1066 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF: 1067 case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: 1068 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: 1069 case PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF: 1070 case PCI_DEVICE_ID_MELLANOX_CONNECTX6VF: 1071 case PCI_DEVICE_ID_MELLANOX_CONNECTXVF: 1072 dev_config_vf = 1; 1073 break; 1074 default: 1075 dev_config_vf = 0; 1076 break; 1077 } 1078 /* Default configuration. */ 1079 memset(&dev_config, 0, sizeof(struct mlx5_dev_config)); 1080 dev_config.vf = dev_config_vf; 1081 dev_config.mps = 0; 1082 dev_config.dbnc = MLX5_ARG_UNSET; 1083 dev_config.rx_vec_en = 1; 1084 dev_config.txq_inline_max = MLX5_ARG_UNSET; 1085 dev_config.txq_inline_min = MLX5_ARG_UNSET; 1086 dev_config.txq_inline_mpw = MLX5_ARG_UNSET; 1087 dev_config.txqs_inline = MLX5_ARG_UNSET; 1088 dev_config.vf_nl_en = 0; 1089 dev_config.mr_ext_memseg_en = 1; 1090 dev_config.mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN; 1091 dev_config.mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS; 1092 dev_config.dv_esw_en = 0; 1093 dev_config.dv_flow_en = 1; 1094 dev_config.decap_en = 0; 1095 dev_config.log_hp_size = MLX5_ARG_UNSET; 1096 list[ns].eth_dev = mlx5_dev_spawn(&pci_dev->device, 1097 &list[ns], 1098 &dev_config); 1099 if (!list[ns].eth_dev) 1100 goto exit; 1101 restore = list[ns].eth_dev->data->dev_flags; 1102 rte_eth_copy_pci_info(list[ns].eth_dev, pci_dev); 1103 /* Restore non-PCI flags cleared by the above call. */ 1104 list[ns].eth_dev->data->dev_flags |= restore; 1105 rte_eth_dev_probing_finish(list[ns].eth_dev); 1106 ret = 0; 1107 exit: 1108 /* 1109 * Do the routine cleanup: 1110 * - free allocated spawn data array 1111 * - free the device list 1112 */ 1113 if (list) 1114 mlx5_free(list); 1115 MLX5_ASSERT(orig_devx_bdf_devs); 1116 mlx5_glue->free_device_list(orig_devx_bdf_devs); 1117 return ret; 1118 } 1119 1120 /** 1121 * Set the reg_mr and dereg_mr call backs 1122 * 1123 * @param reg_mr_cb[out] 1124 * Pointer to reg_mr func 1125 * @param dereg_mr_cb[out] 1126 * Pointer to dereg_mr func 1127 * 1128 */ 1129 void 1130 mlx5_os_set_reg_mr_cb(mlx5_reg_mr_t *reg_mr_cb, 1131 mlx5_dereg_mr_t *dereg_mr_cb) 1132 { 1133 *reg_mr_cb = mlx5_os_reg_mr; 1134 *dereg_mr_cb = mlx5_os_dereg_mr; 1135 } 1136 1137 /** 1138 * Extract pdn of PD object using DevX 1139 * 1140 * @param[in] pd 1141 * Pointer to the DevX PD object. 1142 * @param[out] pdn 1143 * Pointer to the PD object number variable. 1144 * 1145 * @return 1146 * 0 on success, error value otherwise. 1147 */ 1148 int 1149 mlx5_os_get_pdn(void *pd, uint32_t *pdn) 1150 { 1151 if (!pd) 1152 return -EINVAL; 1153 1154 *pdn = ((struct mlx5_pd *)pd)->pdn; 1155 return 0; 1156 } 1157 1158 const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {0}; 1159