1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2019 Mellanox Technologies, Ltd 3 */ 4 5 #include <unistd.h> 6 #include <string.h> 7 #include <stdio.h> 8 9 #include <rte_errno.h> 10 #include <rte_mempool.h> 11 #include <rte_malloc.h> 12 13 #include "mlx5_common.h" 14 #include "mlx5_common_os.h" 15 #include "mlx5_common_utils.h" 16 #include "mlx5_malloc.h" 17 #include "mlx5_common_pci.h" 18 19 int mlx5_common_logtype; 20 21 uint8_t haswell_broadwell_cpu; 22 23 /* In case this is an x86_64 intel processor to check if 24 * we should use relaxed ordering. 25 */ 26 #ifdef RTE_ARCH_X86_64 27 /** 28 * This function returns processor identification and feature information 29 * into the registers. 30 * 31 * @param eax, ebx, ecx, edx 32 * Pointers to the registers that will hold cpu information. 33 * @param level 34 * The main category of information returned. 35 */ 36 static inline void mlx5_cpu_id(unsigned int level, 37 unsigned int *eax, unsigned int *ebx, 38 unsigned int *ecx, unsigned int *edx) 39 { 40 __asm__("cpuid\n\t" 41 : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) 42 : "0" (level)); 43 } 44 #endif 45 46 RTE_INIT_PRIO(mlx5_log_init, LOG) 47 { 48 mlx5_common_logtype = rte_log_register("pmd.common.mlx5"); 49 if (mlx5_common_logtype >= 0) 50 rte_log_set_level(mlx5_common_logtype, RTE_LOG_NOTICE); 51 } 52 53 static bool mlx5_common_initialized; 54 55 /** 56 * One time innitialization routine for run-time dependency on glue library 57 * for multiple PMDs. Each mlx5 PMD that depends on mlx5_common module, 58 * must invoke in its constructor. 59 */ 60 void 61 mlx5_common_init(void) 62 { 63 if (mlx5_common_initialized) 64 return; 65 66 mlx5_glue_constructor(); 67 mlx5_common_pci_init(); 68 mlx5_common_initialized = true; 69 } 70 71 /** 72 * This function is responsible of initializing the variable 73 * haswell_broadwell_cpu by checking if the cpu is intel 74 * and reading the data returned from mlx5_cpu_id(). 75 * since haswell and broadwell cpus don't have improved performance 76 * when using relaxed ordering we want to check the cpu type before 77 * before deciding whether to enable RO or not. 78 * if the cpu is haswell or broadwell the variable will be set to 1 79 * otherwise it will be 0. 80 */ 81 RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG) 82 { 83 #ifdef RTE_ARCH_X86_64 84 unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56}; 85 unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46}; 86 unsigned int i, model, family, brand_id, vendor; 87 unsigned int signature_intel_ebx = 0x756e6547; 88 unsigned int extended_model; 89 unsigned int eax = 0; 90 unsigned int ebx = 0; 91 unsigned int ecx = 0; 92 unsigned int edx = 0; 93 int max_level; 94 95 mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx); 96 vendor = ebx; 97 max_level = eax; 98 if (max_level < 1) { 99 haswell_broadwell_cpu = 0; 100 return; 101 } 102 mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx); 103 model = (eax >> 4) & 0x0f; 104 family = (eax >> 8) & 0x0f; 105 brand_id = ebx & 0xff; 106 extended_model = (eax >> 12) & 0xf0; 107 /* Check if the processor is Haswell or Broadwell */ 108 if (vendor == signature_intel_ebx) { 109 if (family == 0x06) 110 model += extended_model; 111 if (brand_id == 0 && family == 0x6) { 112 for (i = 0; i < RTE_DIM(broadwell_models); i++) 113 if (model == broadwell_models[i]) { 114 haswell_broadwell_cpu = 1; 115 return; 116 } 117 for (i = 0; i < RTE_DIM(haswell_models); i++) 118 if (model == haswell_models[i]) { 119 haswell_broadwell_cpu = 1; 120 return; 121 } 122 } 123 } 124 #endif 125 haswell_broadwell_cpu = 0; 126 } 127 128 /** 129 * Allocate page of door-bells and register it using DevX API. 130 * 131 * @param [in] ctx 132 * Pointer to the device context. 133 * 134 * @return 135 * Pointer to new page on success, NULL otherwise. 136 */ 137 static struct mlx5_devx_dbr_page * 138 mlx5_alloc_dbr_page(void *ctx) 139 { 140 struct mlx5_devx_dbr_page *page; 141 142 /* Allocate space for door-bell page and management data. */ 143 page = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 144 sizeof(struct mlx5_devx_dbr_page), 145 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 146 if (!page) { 147 DRV_LOG(ERR, "cannot allocate dbr page"); 148 return NULL; 149 } 150 /* Register allocated memory. */ 151 page->umem = mlx5_glue->devx_umem_reg(ctx, page->dbrs, 152 MLX5_DBR_PAGE_SIZE, 0); 153 if (!page->umem) { 154 DRV_LOG(ERR, "cannot umem reg dbr page"); 155 mlx5_free(page); 156 return NULL; 157 } 158 return page; 159 } 160 161 /** 162 * Find the next available door-bell, allocate new page if needed. 163 * 164 * @param [in] ctx 165 * Pointer to device context. 166 * @param [in] head 167 * Pointer to the head of dbr pages list. 168 * @param [out] dbr_page 169 * Door-bell page containing the page data. 170 * 171 * @return 172 * Door-bell address offset on success, a negative error value otherwise. 173 */ 174 int64_t 175 mlx5_get_dbr(void *ctx, struct mlx5_dbr_page_list *head, 176 struct mlx5_devx_dbr_page **dbr_page) 177 { 178 struct mlx5_devx_dbr_page *page = NULL; 179 uint32_t i, j; 180 181 LIST_FOREACH(page, head, next) 182 if (page->dbr_count < MLX5_DBR_PER_PAGE) 183 break; 184 if (!page) { /* No page with free door-bell exists. */ 185 page = mlx5_alloc_dbr_page(ctx); 186 if (!page) /* Failed to allocate new page. */ 187 return (-1); 188 LIST_INSERT_HEAD(head, page, next); 189 } 190 /* Loop to find bitmap part with clear bit. */ 191 for (i = 0; 192 i < MLX5_DBR_BITMAP_SIZE && page->dbr_bitmap[i] == UINT64_MAX; 193 i++) 194 ; /* Empty. */ 195 /* Find the first clear bit. */ 196 MLX5_ASSERT(i < MLX5_DBR_BITMAP_SIZE); 197 j = rte_bsf64(~page->dbr_bitmap[i]); 198 page->dbr_bitmap[i] |= (UINT64_C(1) << j); 199 page->dbr_count++; 200 *dbr_page = page; 201 return (i * CHAR_BIT * sizeof(uint64_t) + j) * MLX5_DBR_SIZE; 202 } 203 204 /** 205 * Release a door-bell record. 206 * 207 * @param [in] head 208 * Pointer to the head of dbr pages list. 209 * @param [in] umem_id 210 * UMEM ID of page containing the door-bell record to release. 211 * @param [in] offset 212 * Offset of door-bell record in page. 213 * 214 * @return 215 * 0 on success, a negative error value otherwise. 216 */ 217 int32_t 218 mlx5_release_dbr(struct mlx5_dbr_page_list *head, uint32_t umem_id, 219 uint64_t offset) 220 { 221 struct mlx5_devx_dbr_page *page = NULL; 222 int ret = 0; 223 224 LIST_FOREACH(page, head, next) 225 /* Find the page this address belongs to. */ 226 if (mlx5_os_get_umem_id(page->umem) == umem_id) 227 break; 228 if (!page) 229 return -EINVAL; 230 page->dbr_count--; 231 if (!page->dbr_count) { 232 /* Page not used, free it and remove from list. */ 233 LIST_REMOVE(page, next); 234 if (page->umem) 235 ret = -mlx5_glue->devx_umem_dereg(page->umem); 236 mlx5_free(page); 237 } else { 238 /* Mark in bitmap that this door-bell is not in use. */ 239 offset /= MLX5_DBR_SIZE; 240 int i = offset / 64; 241 int j = offset % 64; 242 243 page->dbr_bitmap[i] &= ~(UINT64_C(1) << j); 244 } 245 return ret; 246 } 247 248 /** 249 * Allocate the User Access Region with DevX on specified device. 250 * 251 * @param [in] ctx 252 * Infiniband device context to perform allocation on. 253 * @param [in] mapping 254 * MLX5DV_UAR_ALLOC_TYPE_BF - allocate as cached memory with write-combining 255 * attributes (if supported by the host), the 256 * writes to the UAR registers must be followed 257 * by write memory barrier. 258 * MLX5DV_UAR_ALLOC_TYPE_NC - allocate as non-cached nenory, all writes are 259 * promoted to the registers immediately, no 260 * memory barriers needed. 261 * mapping < 0 - the first attempt is performed with MLX5DV_UAR_ALLOC_TYPE_BF, 262 * if this fails the next attempt with MLX5DV_UAR_ALLOC_TYPE_NC 263 * is performed. The drivers specifying negative values should 264 * always provide the write memory barrier operation after UAR 265 * register writings. 266 * If there is no definitions for the MLX5DV_UAR_ALLOC_TYPE_xx (older rdma 267 * library headers), the caller can specify 0. 268 * 269 * @return 270 * UAR object pointer on success, NULL otherwise and rte_errno is set. 271 */ 272 void * 273 mlx5_devx_alloc_uar(void *ctx, int mapping) 274 { 275 void *uar; 276 uint32_t retry, uar_mapping; 277 void *base_addr; 278 279 for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) { 280 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 281 /* Control the mapping type according to the settings. */ 282 uar_mapping = (mapping < 0) ? 283 MLX5DV_UAR_ALLOC_TYPE_NC : mapping; 284 #else 285 /* 286 * It seems we have no way to control the memory mapping type 287 * for the UAR, the default "Write-Combining" type is supposed. 288 */ 289 uar_mapping = 0; 290 RTE_SET_USED(mapping); 291 #endif 292 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 293 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC 294 if (!uar && 295 mapping < 0 && 296 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { 297 /* 298 * In some environments like virtual machine the 299 * Write Combining mapped might be not supported and 300 * UAR allocation fails. We tried "Non-Cached" mapping 301 * for the case. 302 */ 303 DRV_LOG(WARNING, "Failed to allocate DevX UAR (BF)"); 304 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; 305 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 306 } else if (!uar && 307 mapping < 0 && 308 uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { 309 /* 310 * If Verbs/kernel does not support "Non-Cached" 311 * try the "Write-Combining". 312 */ 313 DRV_LOG(WARNING, "Failed to allocate DevX UAR (NC)"); 314 uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; 315 uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); 316 } 317 #endif 318 if (!uar) { 319 DRV_LOG(ERR, "Failed to allocate DevX UAR (BF/NC)"); 320 rte_errno = ENOMEM; 321 goto exit; 322 } 323 base_addr = mlx5_os_get_devx_uar_base_addr(uar); 324 if (base_addr) 325 break; 326 /* 327 * The UARs are allocated by rdma_core within the 328 * IB device context, on context closure all UARs 329 * will be freed, should be no memory/object leakage. 330 */ 331 DRV_LOG(WARNING, "Retrying to allocate DevX UAR"); 332 uar = NULL; 333 } 334 /* Check whether we finally succeeded with valid UAR allocation. */ 335 if (!uar) { 336 DRV_LOG(ERR, "Failed to allocate DevX UAR (NULL base)"); 337 rte_errno = ENOMEM; 338 } 339 /* 340 * Return void * instead of struct mlx5dv_devx_uar * 341 * is for compatibility with older rdma-core library headers. 342 */ 343 exit: 344 return uar; 345 } 346