1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2019 Mellanox Technologies, Ltd
3 */
4 #include <stdlib.h>
5
6 #include <rte_malloc.h>
7 #include <rte_errno.h>
8 #include <rte_common.h>
9 #include <rte_sched_common.h>
10
11 #include <mlx5_prm.h>
12 #include <mlx5_common.h>
13
14 #include "mlx5_vdpa_utils.h"
15 #include "mlx5_vdpa.h"
16
17 void
mlx5_vdpa_mem_dereg(struct mlx5_vdpa_priv * priv)18 mlx5_vdpa_mem_dereg(struct mlx5_vdpa_priv *priv)
19 {
20 struct mlx5_vdpa_query_mr *mrs =
21 (struct mlx5_vdpa_query_mr *)priv->mrs;
22 struct mlx5_vdpa_query_mr *entry;
23 int i;
24
25 if (priv->mrs) {
26 for (i = priv->num_mrs - 1; i >= 0; i--) {
27 entry = &mrs[i];
28 if (entry->is_indirect) {
29 if (entry->mkey)
30 claim_zero(
31 mlx5_devx_cmd_destroy(entry->mkey));
32 } else {
33 if (entry->mr)
34 claim_zero(
35 mlx5_glue->dereg_mr(entry->mr));
36 }
37 }
38 rte_free(priv->mrs);
39 priv->mrs = NULL;
40 priv->num_mrs = 0;
41 }
42 if (priv->vmem_info.vmem) {
43 free(priv->vmem_info.vmem);
44 priv->vmem_info.vmem = NULL;
45 }
46 priv->gpa_mkey_index = 0;
47 }
48
49 static int
mlx5_vdpa_regions_addr_cmp(const void * a,const void * b)50 mlx5_vdpa_regions_addr_cmp(const void *a, const void *b)
51 {
52 const struct rte_vhost_mem_region *region_a = a;
53 const struct rte_vhost_mem_region *region_b = b;
54
55 if (region_a->guest_phys_addr < region_b->guest_phys_addr)
56 return -1;
57 if (region_a->guest_phys_addr > region_b->guest_phys_addr)
58 return 1;
59 return 0;
60 }
61
62 #define KLM_NUM_MAX_ALIGN(sz) (RTE_ALIGN_CEIL(sz, MLX5_MAX_KLM_BYTE_COUNT) / \
63 MLX5_MAX_KLM_BYTE_COUNT)
64
65 /*
66 * Allocate and sort the region list and choose indirect mkey mode:
67 * 1. Calculate GCD, guest memory size and indirect mkey entries num per mode.
68 * 2. Align GCD to the maximum allowed size(2G) and to be power of 2.
69 * 2. Decide the indirect mkey mode according to the next rules:
70 * a. If both KLM_FBS entries number and KLM entries number are bigger
71 * than the maximum allowed(MLX5_DEVX_MAX_KLM_ENTRIES) - error.
72 * b. KLM mode if KLM_FBS entries number is bigger than the maximum
73 * allowed(MLX5_DEVX_MAX_KLM_ENTRIES).
74 * c. KLM mode if GCD is smaller than the minimum allowed(4K).
75 * d. KLM mode if the total size of KLM entries is in one cache line
76 * and the total size of KLM_FBS entries is not in one cache line.
77 * e. Otherwise, KLM_FBS mode.
78 */
79 static struct rte_vhost_memory *
mlx5_vdpa_vhost_mem_regions_prepare(int vid,uint8_t * mode,uint64_t * mem_size,uint64_t * gcd,uint32_t * entries_num)80 mlx5_vdpa_vhost_mem_regions_prepare(int vid, uint8_t *mode, uint64_t *mem_size,
81 uint64_t *gcd, uint32_t *entries_num)
82 {
83 struct rte_vhost_memory *mem;
84 uint64_t size;
85 uint64_t klm_entries_num = 0;
86 uint64_t klm_fbs_entries_num;
87 uint32_t i;
88 int ret = rte_vhost_get_mem_table(vid, &mem);
89
90 if (ret < 0) {
91 DRV_LOG(ERR, "Failed to get VM memory layout vid =%d.", vid);
92 rte_errno = EINVAL;
93 return NULL;
94 }
95 qsort(mem->regions, mem->nregions, sizeof(mem->regions[0]),
96 mlx5_vdpa_regions_addr_cmp);
97 *mem_size = (mem->regions[(mem->nregions - 1)].guest_phys_addr) +
98 (mem->regions[(mem->nregions - 1)].size) -
99 (mem->regions[0].guest_phys_addr);
100 *gcd = 0;
101 for (i = 0; i < mem->nregions; ++i) {
102 DRV_LOG(INFO, "Region %u: HVA 0x%" PRIx64 ", GPA 0x%" PRIx64
103 ", size 0x%" PRIx64 ".", i,
104 mem->regions[i].host_user_addr,
105 mem->regions[i].guest_phys_addr, mem->regions[i].size);
106 if (i > 0) {
107 /* Hole handle. */
108 size = mem->regions[i].guest_phys_addr -
109 (mem->regions[i - 1].guest_phys_addr +
110 mem->regions[i - 1].size);
111 *gcd = rte_get_gcd64(*gcd, size);
112 klm_entries_num += KLM_NUM_MAX_ALIGN(size);
113 }
114 size = mem->regions[i].size;
115 *gcd = rte_get_gcd64(*gcd, size);
116 klm_entries_num += KLM_NUM_MAX_ALIGN(size);
117 }
118 if (*gcd > MLX5_MAX_KLM_BYTE_COUNT)
119 *gcd = rte_get_gcd64(*gcd, MLX5_MAX_KLM_BYTE_COUNT);
120 if (!RTE_IS_POWER_OF_2(*gcd)) {
121 uint64_t candidate_gcd = rte_align64prevpow2(*gcd);
122
123 while (candidate_gcd > 1 && (*gcd % candidate_gcd))
124 candidate_gcd /= 2;
125 DRV_LOG(DEBUG, "GCD 0x%" PRIx64 " is not power of 2. Adjusted "
126 "GCD is 0x%" PRIx64 ".", *gcd, candidate_gcd);
127 *gcd = candidate_gcd;
128 }
129 klm_fbs_entries_num = *mem_size / *gcd;
130 if (*gcd < MLX5_MIN_KLM_FIXED_BUFFER_SIZE || klm_fbs_entries_num >
131 MLX5_DEVX_MAX_KLM_ENTRIES ||
132 ((klm_entries_num * sizeof(struct mlx5_klm)) <=
133 RTE_CACHE_LINE_SIZE && (klm_fbs_entries_num *
134 sizeof(struct mlx5_klm)) >
135 RTE_CACHE_LINE_SIZE)) {
136 *mode = MLX5_MKC_ACCESS_MODE_KLM;
137 *entries_num = klm_entries_num;
138 DRV_LOG(INFO, "Indirect mkey mode is KLM.");
139 } else {
140 *mode = MLX5_MKC_ACCESS_MODE_KLM_FBS;
141 *entries_num = klm_fbs_entries_num;
142 DRV_LOG(INFO, "Indirect mkey mode is KLM Fixed Buffer Size.");
143 }
144 DRV_LOG(DEBUG, "Memory registration information: nregions = %u, "
145 "mem_size = 0x%" PRIx64 ", GCD = 0x%" PRIx64
146 ", klm_fbs_entries_num = 0x%" PRIx64 ", klm_entries_num = 0x%"
147 PRIx64 ".", mem->nregions, *mem_size, *gcd, klm_fbs_entries_num,
148 klm_entries_num);
149 if (*entries_num > MLX5_DEVX_MAX_KLM_ENTRIES) {
150 DRV_LOG(ERR, "Failed to prepare memory of vid %d - memory is "
151 "too fragmented.", vid);
152 free(mem);
153 return NULL;
154 }
155 return mem;
156 }
157
158 static int
mlx5_vdpa_mem_cmp(struct rte_vhost_memory * mem1,struct rte_vhost_memory * mem2)159 mlx5_vdpa_mem_cmp(struct rte_vhost_memory *mem1, struct rte_vhost_memory *mem2)
160 {
161 uint32_t i;
162
163 if (mem1->nregions != mem2->nregions)
164 return -1;
165 for (i = 0; i < mem1->nregions; i++) {
166 if (mem1->regions[i].guest_phys_addr !=
167 mem2->regions[i].guest_phys_addr)
168 return -1;
169 if (mem1->regions[i].size != mem2->regions[i].size)
170 return -1;
171 }
172 return 0;
173 }
174
175 #define KLM_SIZE_MAX_ALIGN(sz) ((sz) > MLX5_MAX_KLM_BYTE_COUNT ? \
176 MLX5_MAX_KLM_BYTE_COUNT : (sz))
177
178 static int
mlx5_vdpa_create_indirect_mkey(struct mlx5_vdpa_priv * priv)179 mlx5_vdpa_create_indirect_mkey(struct mlx5_vdpa_priv *priv)
180 {
181 struct mlx5_devx_mkey_attr mkey_attr;
182 struct mlx5_vdpa_query_mr *mrs =
183 (struct mlx5_vdpa_query_mr *)priv->mrs;
184 struct mlx5_vdpa_query_mr *entry;
185 struct rte_vhost_mem_region *reg;
186 uint8_t mode = priv->vmem_info.mode;
187 uint32_t entries_num = priv->vmem_info.entries_num;
188 struct rte_vhost_memory *mem = priv->vmem_info.vmem;
189 struct mlx5_klm klm_array[entries_num];
190 uint64_t gcd = priv->vmem_info.gcd;
191 int ret = -rte_errno;
192 uint64_t klm_size;
193 int klm_index = 0;
194 uint64_t k;
195 uint32_t i;
196
197 /* If it is the last entry, create indirect mkey. */
198 for (i = 0; i < mem->nregions; i++) {
199 entry = &mrs[i];
200 reg = &mem->regions[i];
201 if (i > 0) {
202 uint64_t sadd;
203 uint64_t empty_region_sz = reg->guest_phys_addr -
204 (mem->regions[i - 1].guest_phys_addr +
205 mem->regions[i - 1].size);
206
207 if (empty_region_sz > 0) {
208 sadd = mem->regions[i - 1].guest_phys_addr +
209 mem->regions[i - 1].size;
210 klm_size = mode == MLX5_MKC_ACCESS_MODE_KLM ?
211 KLM_SIZE_MAX_ALIGN(empty_region_sz) : gcd;
212 for (k = 0; k < empty_region_sz;
213 k += klm_size) {
214 klm_array[klm_index].byte_count =
215 k + klm_size > empty_region_sz ?
216 empty_region_sz - k : klm_size;
217 klm_array[klm_index].mkey =
218 priv->null_mr->lkey;
219 klm_array[klm_index].address = sadd + k;
220 klm_index++;
221 }
222 }
223 }
224 klm_size = mode == MLX5_MKC_ACCESS_MODE_KLM ?
225 KLM_SIZE_MAX_ALIGN(reg->size) : gcd;
226 for (k = 0; k < reg->size; k += klm_size) {
227 klm_array[klm_index].byte_count = k + klm_size >
228 reg->size ? reg->size - k : klm_size;
229 klm_array[klm_index].mkey = entry->mr->lkey;
230 klm_array[klm_index].address = reg->guest_phys_addr + k;
231 klm_index++;
232 }
233 }
234 memset(&mkey_attr, 0, sizeof(mkey_attr));
235 mkey_attr.addr = (uintptr_t)(mem->regions[0].guest_phys_addr);
236 mkey_attr.size = priv->vmem_info.size;
237 mkey_attr.pd = priv->cdev->pdn;
238 mkey_attr.umem_id = 0;
239 /* Must be zero for KLM mode. */
240 mkey_attr.log_entity_size = mode == MLX5_MKC_ACCESS_MODE_KLM_FBS ?
241 rte_log2_u64(gcd) : 0;
242 mkey_attr.pg_access = 0;
243 mkey_attr.klm_array = klm_array;
244 mkey_attr.klm_num = klm_index;
245 entry = &mrs[mem->nregions];
246 entry->mkey = mlx5_devx_cmd_mkey_create(priv->cdev->ctx, &mkey_attr);
247 if (!entry->mkey) {
248 DRV_LOG(ERR, "Failed to create indirect Mkey.");
249 rte_errno = -ret;
250 return ret;
251 }
252 entry->is_indirect = 1;
253 priv->gpa_mkey_index = entry->mkey->id;
254 return 0;
255 }
256
257 /*
258 * The target here is to group all the physical memory regions of the
259 * virtio device in one indirect mkey.
260 * For KLM Fixed Buffer Size mode (HW find the translation entry in one
261 * read according to the guest phisical address):
262 * All the sub-direct mkeys of it must be in the same size, hence, each
263 * one of them should be in the GCD size of all the virtio memory
264 * regions and the holes between them.
265 * For KLM mode (each entry may be in different size so HW must iterate
266 * the entries):
267 * Each virtio memory region and each hole between them have one entry,
268 * just need to cover the maximum allowed size(2G) by splitting entries
269 * which their associated memory regions are bigger than 2G.
270 * It means that each virtio memory region may be mapped to more than
271 * one direct mkey in the 2 modes.
272 * All the holes of invalid memory between the virtio memory regions
273 * will be mapped to the null memory region for security.
274 */
275 int
mlx5_vdpa_mem_register(struct mlx5_vdpa_priv * priv)276 mlx5_vdpa_mem_register(struct mlx5_vdpa_priv *priv)
277 {
278 void *mrs;
279 uint8_t mode = 0;
280 int ret = -rte_errno;
281 uint32_t i, thrd_idx, data[1];
282 RTE_ATOMIC(uint32_t) remaining_cnt = 0;
283 RTE_ATOMIC(uint32_t) err_cnt = 0;
284 uint32_t task_num = 0;
285 struct rte_vhost_memory *mem = mlx5_vdpa_vhost_mem_regions_prepare
286 (priv->vid, &mode, &priv->vmem_info.size,
287 &priv->vmem_info.gcd, &priv->vmem_info.entries_num);
288
289 if (!mem)
290 return -rte_errno;
291 if (priv->vmem_info.vmem != NULL) {
292 if (mlx5_vdpa_mem_cmp(mem, priv->vmem_info.vmem) == 0) {
293 /* VM memory not changed, reuse resources. */
294 free(mem);
295 return 0;
296 }
297 mlx5_vdpa_mem_dereg(priv);
298 }
299 priv->vmem_info.vmem = mem;
300 priv->vmem_info.mode = mode;
301 priv->num_mrs = mem->nregions;
302 if (!priv->num_mrs || priv->num_mrs >= MLX5_VDPA_MAX_MRS) {
303 DRV_LOG(ERR,
304 "Invalid number of memory regions.");
305 goto error;
306 }
307 /* The last one is indirect mkey entry. */
308 priv->num_mrs++;
309 mrs = rte_zmalloc("mlx5 vDPA memory regions",
310 sizeof(struct mlx5_vdpa_query_mr) * priv->num_mrs, 0);
311 priv->mrs = mrs;
312 if (!priv->mrs) {
313 DRV_LOG(ERR, "Failed to allocate private memory regions.");
314 goto error;
315 }
316 if (priv->use_c_thread) {
317 uint32_t main_task_idx[mem->nregions];
318
319 for (i = 0; i < mem->nregions; i++) {
320 thrd_idx = i % (conf_thread_mng.max_thrds + 1);
321 if (!thrd_idx) {
322 main_task_idx[task_num] = i;
323 task_num++;
324 continue;
325 }
326 thrd_idx = priv->last_c_thrd_idx + 1;
327 if (thrd_idx >= conf_thread_mng.max_thrds)
328 thrd_idx = 0;
329 priv->last_c_thrd_idx = thrd_idx;
330 data[0] = i;
331 if (mlx5_vdpa_task_add(priv, thrd_idx,
332 MLX5_VDPA_TASK_REG_MR,
333 &remaining_cnt, &err_cnt,
334 (void **)&data, 1)) {
335 DRV_LOG(ERR,
336 "Fail to add task mem region (%d)", i);
337 main_task_idx[task_num] = i;
338 task_num++;
339 }
340 }
341 for (i = 0; i < task_num; i++) {
342 ret = mlx5_vdpa_register_mr(priv,
343 main_task_idx[i]);
344 if (ret) {
345 DRV_LOG(ERR,
346 "Failed to register mem region %d.", i);
347 goto error;
348 }
349 }
350 if (mlx5_vdpa_c_thread_wait_bulk_tasks_done(&remaining_cnt,
351 &err_cnt, 100)) {
352 DRV_LOG(ERR,
353 "Failed to wait register mem region tasks ready.");
354 goto error;
355 }
356 } else {
357 for (i = 0; i < mem->nregions; i++) {
358 ret = mlx5_vdpa_register_mr(priv, i);
359 if (ret) {
360 DRV_LOG(ERR,
361 "Failed to register mem region %d.", i);
362 goto error;
363 }
364 }
365 }
366 ret = mlx5_vdpa_create_indirect_mkey(priv);
367 if (ret) {
368 DRV_LOG(ERR, "Failed to create indirect mkey .");
369 goto error;
370 }
371 return 0;
372 error:
373 mlx5_vdpa_mem_dereg(priv);
374 rte_errno = -ret;
375 return ret;
376 }
377
378 int
mlx5_vdpa_register_mr(struct mlx5_vdpa_priv * priv,uint32_t idx)379 mlx5_vdpa_register_mr(struct mlx5_vdpa_priv *priv, uint32_t idx)
380 {
381 struct rte_vhost_memory *mem = priv->vmem_info.vmem;
382 struct mlx5_vdpa_query_mr *mrs =
383 (struct mlx5_vdpa_query_mr *)priv->mrs;
384 struct mlx5_vdpa_query_mr *entry;
385 struct rte_vhost_mem_region *reg;
386 int ret;
387
388 reg = &mem->regions[idx];
389 entry = &mrs[idx];
390 entry->mr = mlx5_glue->reg_mr_iova
391 (priv->cdev->pd,
392 (void *)(uintptr_t)(reg->host_user_addr),
393 reg->size, reg->guest_phys_addr,
394 IBV_ACCESS_LOCAL_WRITE);
395 if (!entry->mr) {
396 DRV_LOG(ERR, "Failed to create direct Mkey.");
397 ret = -rte_errno;
398 return ret;
399 }
400 entry->is_indirect = 0;
401 return 0;
402 }
403