1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2019 Mellanox Technologies, Ltd 3 */ 4 #include <stdlib.h> 5 6 #include <rte_malloc.h> 7 #include <rte_errno.h> 8 #include <rte_common.h> 9 #include <rte_sched_common.h> 10 11 #include <mlx5_prm.h> 12 #include <mlx5_common.h> 13 14 #include "mlx5_vdpa_utils.h" 15 #include "mlx5_vdpa.h" 16 17 void 18 mlx5_vdpa_mem_dereg(struct mlx5_vdpa_priv *priv) 19 { 20 struct mlx5_vdpa_query_mr *entry; 21 struct mlx5_vdpa_query_mr *next; 22 23 entry = SLIST_FIRST(&priv->mr_list); 24 while (entry) { 25 next = SLIST_NEXT(entry, next); 26 claim_zero(mlx5_devx_cmd_destroy(entry->mkey)); 27 if (!entry->is_indirect) 28 claim_zero(mlx5_glue->devx_umem_dereg(entry->umem)); 29 SLIST_REMOVE(&priv->mr_list, entry, mlx5_vdpa_query_mr, next); 30 rte_free(entry); 31 entry = next; 32 } 33 SLIST_INIT(&priv->mr_list); 34 if (priv->null_mr) { 35 claim_zero(mlx5_glue->dereg_mr(priv->null_mr)); 36 priv->null_mr = NULL; 37 } 38 if (priv->vmem) { 39 free(priv->vmem); 40 priv->vmem = NULL; 41 } 42 } 43 44 static int 45 mlx5_vdpa_regions_addr_cmp(const void *a, const void *b) 46 { 47 const struct rte_vhost_mem_region *region_a = a; 48 const struct rte_vhost_mem_region *region_b = b; 49 50 if (region_a->guest_phys_addr < region_b->guest_phys_addr) 51 return -1; 52 if (region_a->guest_phys_addr > region_b->guest_phys_addr) 53 return 1; 54 return 0; 55 } 56 57 #define KLM_NUM_MAX_ALIGN(sz) (RTE_ALIGN_CEIL(sz, MLX5_MAX_KLM_BYTE_COUNT) / \ 58 MLX5_MAX_KLM_BYTE_COUNT) 59 60 /* 61 * Allocate and sort the region list and choose indirect mkey mode: 62 * 1. Calculate GCD, guest memory size and indirect mkey entries num per mode. 63 * 2. Align GCD to the maximum allowed size(2G) and to be power of 2. 64 * 2. Decide the indirect mkey mode according to the next rules: 65 * a. If both KLM_FBS entries number and KLM entries number are bigger 66 * than the maximum allowed(MLX5_DEVX_MAX_KLM_ENTRIES) - error. 67 * b. KLM mode if KLM_FBS entries number is bigger than the maximum 68 * allowed(MLX5_DEVX_MAX_KLM_ENTRIES). 69 * c. KLM mode if GCD is smaller than the minimum allowed(4K). 70 * d. KLM mode if the total size of KLM entries is in one cache line 71 * and the total size of KLM_FBS entries is not in one cache line. 72 * e. Otherwise, KLM_FBS mode. 73 */ 74 static struct rte_vhost_memory * 75 mlx5_vdpa_vhost_mem_regions_prepare(int vid, uint8_t *mode, uint64_t *mem_size, 76 uint64_t *gcd, uint32_t *entries_num) 77 { 78 struct rte_vhost_memory *mem; 79 uint64_t size; 80 uint64_t klm_entries_num = 0; 81 uint64_t klm_fbs_entries_num; 82 uint32_t i; 83 int ret = rte_vhost_get_mem_table(vid, &mem); 84 85 if (ret < 0) { 86 DRV_LOG(ERR, "Failed to get VM memory layout vid =%d.", vid); 87 rte_errno = EINVAL; 88 return NULL; 89 } 90 qsort(mem->regions, mem->nregions, sizeof(mem->regions[0]), 91 mlx5_vdpa_regions_addr_cmp); 92 *mem_size = (mem->regions[(mem->nregions - 1)].guest_phys_addr) + 93 (mem->regions[(mem->nregions - 1)].size) - 94 (mem->regions[0].guest_phys_addr); 95 *gcd = 0; 96 for (i = 0; i < mem->nregions; ++i) { 97 DRV_LOG(INFO, "Region %u: HVA 0x%" PRIx64 ", GPA 0x%" PRIx64 98 ", size 0x%" PRIx64 ".", i, 99 mem->regions[i].host_user_addr, 100 mem->regions[i].guest_phys_addr, mem->regions[i].size); 101 if (i > 0) { 102 /* Hole handle. */ 103 size = mem->regions[i].guest_phys_addr - 104 (mem->regions[i - 1].guest_phys_addr + 105 mem->regions[i - 1].size); 106 *gcd = rte_get_gcd(*gcd, size); 107 klm_entries_num += KLM_NUM_MAX_ALIGN(size); 108 } 109 size = mem->regions[i].size; 110 *gcd = rte_get_gcd(*gcd, size); 111 klm_entries_num += KLM_NUM_MAX_ALIGN(size); 112 } 113 if (*gcd > MLX5_MAX_KLM_BYTE_COUNT) 114 *gcd = rte_get_gcd(*gcd, MLX5_MAX_KLM_BYTE_COUNT); 115 if (!RTE_IS_POWER_OF_2(*gcd)) { 116 uint64_t candidate_gcd = rte_align64prevpow2(*gcd); 117 118 while (candidate_gcd > 1 && (*gcd % candidate_gcd)) 119 candidate_gcd /= 2; 120 DRV_LOG(DEBUG, "GCD 0x%" PRIx64 " is not power of 2. Adjusted " 121 "GCD is 0x%" PRIx64 ".", *gcd, candidate_gcd); 122 *gcd = candidate_gcd; 123 } 124 klm_fbs_entries_num = *mem_size / *gcd; 125 if (*gcd < MLX5_MIN_KLM_FIXED_BUFFER_SIZE || klm_fbs_entries_num > 126 MLX5_DEVX_MAX_KLM_ENTRIES || 127 ((klm_entries_num * sizeof(struct mlx5_klm)) <= 128 RTE_CACHE_LINE_SIZE && (klm_fbs_entries_num * 129 sizeof(struct mlx5_klm)) > 130 RTE_CACHE_LINE_SIZE)) { 131 *mode = MLX5_MKC_ACCESS_MODE_KLM; 132 *entries_num = klm_entries_num; 133 DRV_LOG(INFO, "Indirect mkey mode is KLM."); 134 } else { 135 *mode = MLX5_MKC_ACCESS_MODE_KLM_FBS; 136 *entries_num = klm_fbs_entries_num; 137 DRV_LOG(INFO, "Indirect mkey mode is KLM Fixed Buffer Size."); 138 } 139 DRV_LOG(DEBUG, "Memory registration information: nregions = %u, " 140 "mem_size = 0x%" PRIx64 ", GCD = 0x%" PRIx64 141 ", klm_fbs_entries_num = 0x%" PRIx64 ", klm_entries_num = 0x%" 142 PRIx64 ".", mem->nregions, *mem_size, *gcd, klm_fbs_entries_num, 143 klm_entries_num); 144 if (*entries_num > MLX5_DEVX_MAX_KLM_ENTRIES) { 145 DRV_LOG(ERR, "Failed to prepare memory of vid %d - memory is " 146 "too fragmented.", vid); 147 free(mem); 148 return NULL; 149 } 150 return mem; 151 } 152 153 #define KLM_SIZE_MAX_ALIGN(sz) ((sz) > MLX5_MAX_KLM_BYTE_COUNT ? \ 154 MLX5_MAX_KLM_BYTE_COUNT : (sz)) 155 156 /* 157 * The target here is to group all the physical memory regions of the 158 * virtio device in one indirect mkey. 159 * For KLM Fixed Buffer Size mode (HW find the translation entry in one 160 * read according to the guest phisical address): 161 * All the sub-direct mkeys of it must be in the same size, hence, each 162 * one of them should be in the GCD size of all the virtio memory 163 * regions and the holes between them. 164 * For KLM mode (each entry may be in different size so HW must iterate 165 * the entries): 166 * Each virtio memory region and each hole between them have one entry, 167 * just need to cover the maximum allowed size(2G) by splitting entries 168 * which their associated memory regions are bigger than 2G. 169 * It means that each virtio memory region may be mapped to more than 170 * one direct mkey in the 2 modes. 171 * All the holes of invalid memory between the virtio memory regions 172 * will be mapped to the null memory region for security. 173 */ 174 int 175 mlx5_vdpa_mem_register(struct mlx5_vdpa_priv *priv) 176 { 177 struct mlx5_devx_mkey_attr mkey_attr; 178 struct mlx5_vdpa_query_mr *entry = NULL; 179 struct rte_vhost_mem_region *reg = NULL; 180 uint8_t mode; 181 uint32_t entries_num = 0; 182 uint32_t i; 183 uint64_t gcd; 184 uint64_t klm_size; 185 uint64_t mem_size; 186 uint64_t k; 187 int klm_index = 0; 188 int ret; 189 struct rte_vhost_memory *mem = mlx5_vdpa_vhost_mem_regions_prepare 190 (priv->vid, &mode, &mem_size, &gcd, &entries_num); 191 struct mlx5_klm klm_array[entries_num]; 192 193 if (!mem) 194 return -rte_errno; 195 priv->vmem = mem; 196 priv->null_mr = mlx5_glue->alloc_null_mr(priv->pd); 197 if (!priv->null_mr) { 198 DRV_LOG(ERR, "Failed to allocate null MR."); 199 ret = -errno; 200 goto error; 201 } 202 DRV_LOG(DEBUG, "Dump fill Mkey = %u.", priv->null_mr->lkey); 203 for (i = 0; i < mem->nregions; i++) { 204 reg = &mem->regions[i]; 205 entry = rte_zmalloc(__func__, sizeof(*entry), 0); 206 if (!entry) { 207 ret = -ENOMEM; 208 DRV_LOG(ERR, "Failed to allocate mem entry memory."); 209 goto error; 210 } 211 entry->umem = mlx5_glue->devx_umem_reg(priv->ctx, 212 (void *)(uintptr_t)reg->host_user_addr, 213 reg->size, IBV_ACCESS_LOCAL_WRITE); 214 if (!entry->umem) { 215 DRV_LOG(ERR, "Failed to register Umem by Devx."); 216 ret = -errno; 217 goto error; 218 } 219 mkey_attr.addr = (uintptr_t)(reg->guest_phys_addr); 220 mkey_attr.size = reg->size; 221 mkey_attr.umem_id = entry->umem->umem_id; 222 mkey_attr.pd = priv->pdn; 223 mkey_attr.pg_access = 1; 224 mkey_attr.klm_array = NULL; 225 mkey_attr.klm_num = 0; 226 mkey_attr.relaxed_ordering_read = 0; 227 mkey_attr.relaxed_ordering_write = 0; 228 entry->mkey = mlx5_devx_cmd_mkey_create(priv->ctx, &mkey_attr); 229 if (!entry->mkey) { 230 DRV_LOG(ERR, "Failed to create direct Mkey."); 231 ret = -rte_errno; 232 goto error; 233 } 234 entry->addr = (void *)(uintptr_t)(reg->host_user_addr); 235 entry->length = reg->size; 236 entry->is_indirect = 0; 237 if (i > 0) { 238 uint64_t sadd; 239 uint64_t empty_region_sz = reg->guest_phys_addr - 240 (mem->regions[i - 1].guest_phys_addr + 241 mem->regions[i - 1].size); 242 243 if (empty_region_sz > 0) { 244 sadd = mem->regions[i - 1].guest_phys_addr + 245 mem->regions[i - 1].size; 246 klm_size = mode == MLX5_MKC_ACCESS_MODE_KLM ? 247 KLM_SIZE_MAX_ALIGN(empty_region_sz) : gcd; 248 for (k = 0; k < empty_region_sz; 249 k += klm_size) { 250 klm_array[klm_index].byte_count = 251 k + klm_size > empty_region_sz ? 252 empty_region_sz - k : klm_size; 253 klm_array[klm_index].mkey = 254 priv->null_mr->lkey; 255 klm_array[klm_index].address = sadd + k; 256 klm_index++; 257 } 258 } 259 } 260 klm_size = mode == MLX5_MKC_ACCESS_MODE_KLM ? 261 KLM_SIZE_MAX_ALIGN(reg->size) : gcd; 262 for (k = 0; k < reg->size; k += klm_size) { 263 klm_array[klm_index].byte_count = k + klm_size > 264 reg->size ? reg->size - k : klm_size; 265 klm_array[klm_index].mkey = entry->mkey->id; 266 klm_array[klm_index].address = reg->guest_phys_addr + k; 267 klm_index++; 268 } 269 SLIST_INSERT_HEAD(&priv->mr_list, entry, next); 270 } 271 mkey_attr.addr = (uintptr_t)(mem->regions[0].guest_phys_addr); 272 mkey_attr.size = mem_size; 273 mkey_attr.pd = priv->pdn; 274 mkey_attr.umem_id = 0; 275 /* Must be zero for KLM mode. */ 276 mkey_attr.log_entity_size = mode == MLX5_MKC_ACCESS_MODE_KLM_FBS ? 277 rte_log2_u64(gcd) : 0; 278 mkey_attr.pg_access = 0; 279 mkey_attr.klm_array = klm_array; 280 mkey_attr.klm_num = klm_index; 281 entry = rte_zmalloc(__func__, sizeof(*entry), 0); 282 if (!entry) { 283 DRV_LOG(ERR, "Failed to allocate memory for indirect entry."); 284 ret = -ENOMEM; 285 goto error; 286 } 287 entry->mkey = mlx5_devx_cmd_mkey_create(priv->ctx, &mkey_attr); 288 if (!entry->mkey) { 289 DRV_LOG(ERR, "Failed to create indirect Mkey."); 290 ret = -rte_errno; 291 goto error; 292 } 293 entry->is_indirect = 1; 294 SLIST_INSERT_HEAD(&priv->mr_list, entry, next); 295 priv->gpa_mkey_index = entry->mkey->id; 296 return 0; 297 error: 298 if (entry) { 299 if (entry->mkey) 300 mlx5_devx_cmd_destroy(entry->mkey); 301 if (entry->umem) 302 mlx5_glue->devx_umem_dereg(entry->umem); 303 rte_free(entry); 304 } 305 mlx5_vdpa_mem_dereg(priv); 306 rte_errno = -ret; 307 return ret; 308 } 309