xref: /dpdk/drivers/vdpa/mlx5/mlx5_vdpa_mem.c (revision 68a03efeed657e6e05f281479b33b51102797e15)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 #include <stdlib.h>
5 
6 #include <rte_malloc.h>
7 #include <rte_errno.h>
8 #include <rte_common.h>
9 #include <rte_sched_common.h>
10 
11 #include <mlx5_prm.h>
12 #include <mlx5_common.h>
13 
14 #include "mlx5_vdpa_utils.h"
15 #include "mlx5_vdpa.h"
16 
17 void
18 mlx5_vdpa_mem_dereg(struct mlx5_vdpa_priv *priv)
19 {
20 	struct mlx5_vdpa_query_mr *entry;
21 	struct mlx5_vdpa_query_mr *next;
22 
23 	entry = SLIST_FIRST(&priv->mr_list);
24 	while (entry) {
25 		next = SLIST_NEXT(entry, next);
26 		claim_zero(mlx5_devx_cmd_destroy(entry->mkey));
27 		if (!entry->is_indirect)
28 			claim_zero(mlx5_glue->devx_umem_dereg(entry->umem));
29 		SLIST_REMOVE(&priv->mr_list, entry, mlx5_vdpa_query_mr, next);
30 		rte_free(entry);
31 		entry = next;
32 	}
33 	SLIST_INIT(&priv->mr_list);
34 	if (priv->null_mr) {
35 		claim_zero(mlx5_glue->dereg_mr(priv->null_mr));
36 		priv->null_mr = NULL;
37 	}
38 	if (priv->vmem) {
39 		free(priv->vmem);
40 		priv->vmem = NULL;
41 	}
42 }
43 
44 static int
45 mlx5_vdpa_regions_addr_cmp(const void *a, const void *b)
46 {
47 	const struct rte_vhost_mem_region *region_a = a;
48 	const struct rte_vhost_mem_region *region_b = b;
49 
50 	if (region_a->guest_phys_addr < region_b->guest_phys_addr)
51 		return -1;
52 	if (region_a->guest_phys_addr > region_b->guest_phys_addr)
53 		return 1;
54 	return 0;
55 }
56 
57 #define KLM_NUM_MAX_ALIGN(sz) (RTE_ALIGN_CEIL(sz, MLX5_MAX_KLM_BYTE_COUNT) / \
58 			       MLX5_MAX_KLM_BYTE_COUNT)
59 
60 /*
61  * Allocate and sort the region list and choose indirect mkey mode:
62  *   1. Calculate GCD, guest memory size and indirect mkey entries num per mode.
63  *   2. Align GCD to the maximum allowed size(2G) and to be power of 2.
64  *   2. Decide the indirect mkey mode according to the next rules:
65  *         a. If both KLM_FBS entries number and KLM entries number are bigger
66  *            than the maximum allowed(MLX5_DEVX_MAX_KLM_ENTRIES) - error.
67  *         b. KLM mode if KLM_FBS entries number is bigger than the maximum
68  *            allowed(MLX5_DEVX_MAX_KLM_ENTRIES).
69  *         c. KLM mode if GCD is smaller than the minimum allowed(4K).
70  *         d. KLM mode if the total size of KLM entries is in one cache line
71  *            and the total size of KLM_FBS entries is not in one cache line.
72  *         e. Otherwise, KLM_FBS mode.
73  */
74 static struct rte_vhost_memory *
75 mlx5_vdpa_vhost_mem_regions_prepare(int vid, uint8_t *mode, uint64_t *mem_size,
76 				    uint64_t *gcd, uint32_t *entries_num)
77 {
78 	struct rte_vhost_memory *mem;
79 	uint64_t size;
80 	uint64_t klm_entries_num = 0;
81 	uint64_t klm_fbs_entries_num;
82 	uint32_t i;
83 	int ret = rte_vhost_get_mem_table(vid, &mem);
84 
85 	if (ret < 0) {
86 		DRV_LOG(ERR, "Failed to get VM memory layout vid =%d.", vid);
87 		rte_errno = EINVAL;
88 		return NULL;
89 	}
90 	qsort(mem->regions, mem->nregions, sizeof(mem->regions[0]),
91 	      mlx5_vdpa_regions_addr_cmp);
92 	*mem_size = (mem->regions[(mem->nregions - 1)].guest_phys_addr) +
93 				      (mem->regions[(mem->nregions - 1)].size) -
94 					      (mem->regions[0].guest_phys_addr);
95 	*gcd = 0;
96 	for (i = 0; i < mem->nregions; ++i) {
97 		DRV_LOG(INFO,  "Region %u: HVA 0x%" PRIx64 ", GPA 0x%" PRIx64
98 			", size 0x%" PRIx64 ".", i,
99 			mem->regions[i].host_user_addr,
100 			mem->regions[i].guest_phys_addr, mem->regions[i].size);
101 		if (i > 0) {
102 			/* Hole handle. */
103 			size = mem->regions[i].guest_phys_addr -
104 				(mem->regions[i - 1].guest_phys_addr +
105 				 mem->regions[i - 1].size);
106 			*gcd = rte_get_gcd(*gcd, size);
107 			klm_entries_num += KLM_NUM_MAX_ALIGN(size);
108 		}
109 		size = mem->regions[i].size;
110 		*gcd = rte_get_gcd(*gcd, size);
111 		klm_entries_num += KLM_NUM_MAX_ALIGN(size);
112 	}
113 	if (*gcd > MLX5_MAX_KLM_BYTE_COUNT)
114 		*gcd = rte_get_gcd(*gcd, MLX5_MAX_KLM_BYTE_COUNT);
115 	if (!RTE_IS_POWER_OF_2(*gcd)) {
116 		uint64_t candidate_gcd = rte_align64prevpow2(*gcd);
117 
118 		while (candidate_gcd > 1 && (*gcd % candidate_gcd))
119 			candidate_gcd /= 2;
120 		DRV_LOG(DEBUG, "GCD 0x%" PRIx64 " is not power of 2. Adjusted "
121 			"GCD is 0x%" PRIx64 ".", *gcd, candidate_gcd);
122 		*gcd = candidate_gcd;
123 	}
124 	klm_fbs_entries_num = *mem_size / *gcd;
125 	if (*gcd < MLX5_MIN_KLM_FIXED_BUFFER_SIZE || klm_fbs_entries_num >
126 	    MLX5_DEVX_MAX_KLM_ENTRIES ||
127 	    ((klm_entries_num * sizeof(struct mlx5_klm)) <=
128 	    RTE_CACHE_LINE_SIZE && (klm_fbs_entries_num *
129 				    sizeof(struct mlx5_klm)) >
130 							RTE_CACHE_LINE_SIZE)) {
131 		*mode = MLX5_MKC_ACCESS_MODE_KLM;
132 		*entries_num = klm_entries_num;
133 		DRV_LOG(INFO, "Indirect mkey mode is KLM.");
134 	} else {
135 		*mode = MLX5_MKC_ACCESS_MODE_KLM_FBS;
136 		*entries_num = klm_fbs_entries_num;
137 		DRV_LOG(INFO, "Indirect mkey mode is KLM Fixed Buffer Size.");
138 	}
139 	DRV_LOG(DEBUG, "Memory registration information: nregions = %u, "
140 		"mem_size = 0x%" PRIx64 ", GCD = 0x%" PRIx64
141 		", klm_fbs_entries_num = 0x%" PRIx64 ", klm_entries_num = 0x%"
142 		PRIx64 ".", mem->nregions, *mem_size, *gcd, klm_fbs_entries_num,
143 		klm_entries_num);
144 	if (*entries_num > MLX5_DEVX_MAX_KLM_ENTRIES) {
145 		DRV_LOG(ERR, "Failed to prepare memory of vid %d - memory is "
146 			"too fragmented.", vid);
147 		free(mem);
148 		return NULL;
149 	}
150 	return mem;
151 }
152 
153 #define KLM_SIZE_MAX_ALIGN(sz) ((sz) > MLX5_MAX_KLM_BYTE_COUNT ? \
154 				MLX5_MAX_KLM_BYTE_COUNT : (sz))
155 
156 /*
157  * The target here is to group all the physical memory regions of the
158  * virtio device in one indirect mkey.
159  * For KLM Fixed Buffer Size mode (HW find the translation entry in one
160  * read according to the guest phisical address):
161  * All the sub-direct mkeys of it must be in the same size, hence, each
162  * one of them should be in the GCD size of all the virtio memory
163  * regions and the holes between them.
164  * For KLM mode (each entry may be in different size so HW must iterate
165  * the entries):
166  * Each virtio memory region and each hole between them have one entry,
167  * just need to cover the maximum allowed size(2G) by splitting entries
168  * which their associated memory regions are bigger than 2G.
169  * It means that each virtio memory region may be mapped to more than
170  * one direct mkey in the 2 modes.
171  * All the holes of invalid memory between the virtio memory regions
172  * will be mapped to the null memory region for security.
173  */
174 int
175 mlx5_vdpa_mem_register(struct mlx5_vdpa_priv *priv)
176 {
177 	struct mlx5_devx_mkey_attr mkey_attr;
178 	struct mlx5_vdpa_query_mr *entry = NULL;
179 	struct rte_vhost_mem_region *reg = NULL;
180 	uint8_t mode;
181 	uint32_t entries_num = 0;
182 	uint32_t i;
183 	uint64_t gcd;
184 	uint64_t klm_size;
185 	uint64_t mem_size;
186 	uint64_t k;
187 	int klm_index = 0;
188 	int ret;
189 	struct rte_vhost_memory *mem = mlx5_vdpa_vhost_mem_regions_prepare
190 			      (priv->vid, &mode, &mem_size, &gcd, &entries_num);
191 	struct mlx5_klm klm_array[entries_num];
192 
193 	if (!mem)
194 		return -rte_errno;
195 	priv->vmem = mem;
196 	priv->null_mr = mlx5_glue->alloc_null_mr(priv->pd);
197 	if (!priv->null_mr) {
198 		DRV_LOG(ERR, "Failed to allocate null MR.");
199 		ret = -errno;
200 		goto error;
201 	}
202 	DRV_LOG(DEBUG, "Dump fill Mkey = %u.", priv->null_mr->lkey);
203 	for (i = 0; i < mem->nregions; i++) {
204 		reg = &mem->regions[i];
205 		entry = rte_zmalloc(__func__, sizeof(*entry), 0);
206 		if (!entry) {
207 			ret = -ENOMEM;
208 			DRV_LOG(ERR, "Failed to allocate mem entry memory.");
209 			goto error;
210 		}
211 		entry->umem = mlx5_glue->devx_umem_reg(priv->ctx,
212 					 (void *)(uintptr_t)reg->host_user_addr,
213 					     reg->size, IBV_ACCESS_LOCAL_WRITE);
214 		if (!entry->umem) {
215 			DRV_LOG(ERR, "Failed to register Umem by Devx.");
216 			ret = -errno;
217 			goto error;
218 		}
219 		mkey_attr.addr = (uintptr_t)(reg->guest_phys_addr);
220 		mkey_attr.size = reg->size;
221 		mkey_attr.umem_id = entry->umem->umem_id;
222 		mkey_attr.pd = priv->pdn;
223 		mkey_attr.pg_access = 1;
224 		mkey_attr.klm_array = NULL;
225 		mkey_attr.klm_num = 0;
226 		mkey_attr.relaxed_ordering_read = 0;
227 		mkey_attr.relaxed_ordering_write = 0;
228 		entry->mkey = mlx5_devx_cmd_mkey_create(priv->ctx, &mkey_attr);
229 		if (!entry->mkey) {
230 			DRV_LOG(ERR, "Failed to create direct Mkey.");
231 			ret = -rte_errno;
232 			goto error;
233 		}
234 		entry->addr = (void *)(uintptr_t)(reg->host_user_addr);
235 		entry->length = reg->size;
236 		entry->is_indirect = 0;
237 		if (i > 0) {
238 			uint64_t sadd;
239 			uint64_t empty_region_sz = reg->guest_phys_addr -
240 					  (mem->regions[i - 1].guest_phys_addr +
241 					   mem->regions[i - 1].size);
242 
243 			if (empty_region_sz > 0) {
244 				sadd = mem->regions[i - 1].guest_phys_addr +
245 				       mem->regions[i - 1].size;
246 				klm_size = mode == MLX5_MKC_ACCESS_MODE_KLM ?
247 				      KLM_SIZE_MAX_ALIGN(empty_region_sz) : gcd;
248 				for (k = 0; k < empty_region_sz;
249 				     k += klm_size) {
250 					klm_array[klm_index].byte_count =
251 						k + klm_size > empty_region_sz ?
252 						 empty_region_sz - k : klm_size;
253 					klm_array[klm_index].mkey =
254 							    priv->null_mr->lkey;
255 					klm_array[klm_index].address = sadd + k;
256 					klm_index++;
257 				}
258 			}
259 		}
260 		klm_size = mode == MLX5_MKC_ACCESS_MODE_KLM ?
261 					    KLM_SIZE_MAX_ALIGN(reg->size) : gcd;
262 		for (k = 0; k < reg->size; k += klm_size) {
263 			klm_array[klm_index].byte_count = k + klm_size >
264 					   reg->size ? reg->size - k : klm_size;
265 			klm_array[klm_index].mkey = entry->mkey->id;
266 			klm_array[klm_index].address = reg->guest_phys_addr + k;
267 			klm_index++;
268 		}
269 		SLIST_INSERT_HEAD(&priv->mr_list, entry, next);
270 	}
271 	mkey_attr.addr = (uintptr_t)(mem->regions[0].guest_phys_addr);
272 	mkey_attr.size = mem_size;
273 	mkey_attr.pd = priv->pdn;
274 	mkey_attr.umem_id = 0;
275 	/* Must be zero for KLM mode. */
276 	mkey_attr.log_entity_size = mode == MLX5_MKC_ACCESS_MODE_KLM_FBS ?
277 							  rte_log2_u64(gcd) : 0;
278 	mkey_attr.pg_access = 0;
279 	mkey_attr.klm_array = klm_array;
280 	mkey_attr.klm_num = klm_index;
281 	entry = rte_zmalloc(__func__, sizeof(*entry), 0);
282 	if (!entry) {
283 		DRV_LOG(ERR, "Failed to allocate memory for indirect entry.");
284 		ret = -ENOMEM;
285 		goto error;
286 	}
287 	entry->mkey = mlx5_devx_cmd_mkey_create(priv->ctx, &mkey_attr);
288 	if (!entry->mkey) {
289 		DRV_LOG(ERR, "Failed to create indirect Mkey.");
290 		ret = -rte_errno;
291 		goto error;
292 	}
293 	entry->is_indirect = 1;
294 	SLIST_INSERT_HEAD(&priv->mr_list, entry, next);
295 	priv->gpa_mkey_index = entry->mkey->id;
296 	return 0;
297 error:
298 	if (entry) {
299 		if (entry->mkey)
300 			mlx5_devx_cmd_destroy(entry->mkey);
301 		if (entry->umem)
302 			mlx5_glue->devx_umem_dereg(entry->umem);
303 		rte_free(entry);
304 	}
305 	mlx5_vdpa_mem_dereg(priv);
306 	rte_errno = -ret;
307 	return ret;
308 }
309