xref: /dpdk/drivers/net/mana/mr.c (revision 0c7bc26bb0b39bfe8999f422329bd52861b43a72)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2022 Microsoft Corporation
3  */
4 
5 #include <rte_malloc.h>
6 #include <ethdev_driver.h>
7 #include <rte_eal_paging.h>
8 
9 #include <infiniband/verbs.h>
10 
11 #include "mana.h"
12 
13 struct mana_range {
14 	uintptr_t	start;
15 	uintptr_t	end;
16 	uint32_t	len;
17 };
18 
19 void
mana_mempool_chunk_cb(struct rte_mempool * mp __rte_unused,void * opaque,struct rte_mempool_memhdr * memhdr,unsigned int idx)20 mana_mempool_chunk_cb(struct rte_mempool *mp __rte_unused, void *opaque,
21 		      struct rte_mempool_memhdr *memhdr, unsigned int idx)
22 {
23 	struct mana_range *ranges = opaque;
24 	struct mana_range *range = &ranges[idx];
25 	uint64_t page_size = rte_mem_page_size();
26 
27 	range->start = RTE_ALIGN_FLOOR((uintptr_t)memhdr->addr, page_size);
28 	range->end = RTE_ALIGN_CEIL((uintptr_t)memhdr->addr + memhdr->len,
29 				    page_size);
30 	range->len = range->end - range->start;
31 }
32 
33 /*
34  * Register all memory regions from pool.
35  */
36 int
mana_new_pmd_mr(struct mana_mr_btree * local_tree,struct mana_priv * priv,struct rte_mempool * pool)37 mana_new_pmd_mr(struct mana_mr_btree *local_tree, struct mana_priv *priv,
38 		struct rte_mempool *pool)
39 {
40 	struct ibv_mr *ibv_mr;
41 	struct mana_range ranges[pool->nb_mem_chunks];
42 	uint32_t i;
43 	struct mana_mr_cache mr;
44 	int ret;
45 
46 	rte_mempool_mem_iter(pool, mana_mempool_chunk_cb, ranges);
47 
48 	for (i = 0; i < pool->nb_mem_chunks; i++) {
49 		if (ranges[i].len > priv->max_mr_size) {
50 			DP_LOG(ERR, "memory chunk size %u exceeding max MR",
51 			       ranges[i].len);
52 			return -ENOMEM;
53 		}
54 
55 		DP_LOG(DEBUG,
56 		       "registering memory chunk start 0x%" PRIxPTR " len %u",
57 		       ranges[i].start, ranges[i].len);
58 
59 		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
60 			/* Send a message to the primary to do MR */
61 			ret = mana_mp_req_mr_create(priv, ranges[i].start,
62 						    ranges[i].len);
63 			if (ret) {
64 				DP_LOG(ERR,
65 				       "MR failed start 0x%" PRIxPTR " len %u",
66 				       ranges[i].start, ranges[i].len);
67 				return ret;
68 			}
69 			continue;
70 		}
71 
72 		ibv_mr = ibv_reg_mr(priv->ib_pd, (void *)ranges[i].start,
73 				    ranges[i].len, IBV_ACCESS_LOCAL_WRITE);
74 		if (ibv_mr) {
75 			DP_LOG(DEBUG, "MR lkey %u addr %p len %zu",
76 			       ibv_mr->lkey, ibv_mr->addr, ibv_mr->length);
77 
78 			mr.lkey = ibv_mr->lkey;
79 			mr.addr = (uintptr_t)ibv_mr->addr;
80 			mr.len = ibv_mr->length;
81 			mr.verb_obj = ibv_mr;
82 
83 			rte_spinlock_lock(&priv->mr_btree_lock);
84 			ret = mana_mr_btree_insert(&priv->mr_btree, &mr);
85 			rte_spinlock_unlock(&priv->mr_btree_lock);
86 			if (ret) {
87 				ibv_dereg_mr(ibv_mr);
88 				DP_LOG(ERR, "Failed to add to global MR btree");
89 				return ret;
90 			}
91 
92 			ret = mana_mr_btree_insert(local_tree, &mr);
93 			if (ret) {
94 				/* Don't need to clean up MR as it's already
95 				 * in the global tree
96 				 */
97 				DP_LOG(ERR, "Failed to add to local MR btree");
98 				return ret;
99 			}
100 		} else {
101 			DP_LOG(ERR, "MR failed at 0x%" PRIxPTR " len %u",
102 			       ranges[i].start, ranges[i].len);
103 			return -errno;
104 		}
105 	}
106 	return 0;
107 }
108 
109 /*
110  * Deregister a MR.
111  */
112 void
mana_del_pmd_mr(struct mana_mr_cache * mr)113 mana_del_pmd_mr(struct mana_mr_cache *mr)
114 {
115 	int ret;
116 	struct ibv_mr *ibv_mr = (struct ibv_mr *)mr->verb_obj;
117 
118 	ret = ibv_dereg_mr(ibv_mr);
119 	if (ret)
120 		DP_LOG(ERR, "dereg MR failed ret %d", ret);
121 }
122 
123 /*
124  * Alloc a MR.
125  * Try to find a MR in the cache. If not found, register a new MR.
126  */
127 struct mana_mr_cache *
mana_alloc_pmd_mr(struct mana_mr_btree * local_mr_btree,struct mana_priv * priv,struct rte_mbuf * mbuf)128 mana_alloc_pmd_mr(struct mana_mr_btree *local_mr_btree, struct mana_priv *priv,
129 		  struct rte_mbuf *mbuf)
130 {
131 	struct rte_mempool *pool = mbuf->pool;
132 	int ret, second_try = 0;
133 	struct mana_mr_cache *mr;
134 	uint16_t idx;
135 
136 	DP_LOG(DEBUG, "finding mr for mbuf addr %p len %d",
137 	       mbuf->buf_addr, mbuf->buf_len);
138 
139 try_again:
140 	/* First try to find the MR in local queue tree */
141 	ret = mana_mr_btree_lookup(local_mr_btree, &idx,
142 				   (uintptr_t)mbuf->buf_addr, mbuf->buf_len,
143 				   &mr);
144 	if (ret)
145 		return NULL;
146 
147 	if (mr) {
148 		DP_LOG(DEBUG, "Local mr lkey %u addr 0x%" PRIxPTR " len %zu",
149 		       mr->lkey, mr->addr, mr->len);
150 		return mr;
151 	}
152 
153 	/* If not found, try to find the MR in global tree */
154 	rte_spinlock_lock(&priv->mr_btree_lock);
155 	ret = mana_mr_btree_lookup(&priv->mr_btree, &idx,
156 				   (uintptr_t)mbuf->buf_addr,
157 				   mbuf->buf_len, &mr);
158 	rte_spinlock_unlock(&priv->mr_btree_lock);
159 
160 	if (ret)
161 		return NULL;
162 
163 	/* If found in the global tree, add it to the local tree */
164 	if (mr) {
165 		ret = mana_mr_btree_insert(local_mr_btree, mr);
166 		if (ret) {
167 			DP_LOG(ERR, "Failed to add MR to local tree.");
168 			return NULL;
169 		}
170 
171 		DP_LOG(DEBUG,
172 		       "Added local MR key %u addr 0x%" PRIxPTR " len %zu",
173 		       mr->lkey, mr->addr, mr->len);
174 		return mr;
175 	}
176 
177 	if (second_try) {
178 		DP_LOG(ERR, "Internal error second try failed");
179 		return NULL;
180 	}
181 
182 	ret = mana_new_pmd_mr(local_mr_btree, priv, pool);
183 	if (ret) {
184 		DP_LOG(ERR, "Failed to allocate MR ret %d addr %p len %d",
185 		       ret, mbuf->buf_addr, mbuf->buf_len);
186 		return NULL;
187 	}
188 
189 	second_try = 1;
190 	goto try_again;
191 }
192 
193 void
mana_remove_all_mr(struct mana_priv * priv)194 mana_remove_all_mr(struct mana_priv *priv)
195 {
196 	struct mana_mr_btree *bt = &priv->mr_btree;
197 	struct mana_mr_cache *mr;
198 	struct ibv_mr *ibv_mr;
199 	uint16_t i;
200 
201 	rte_spinlock_lock(&priv->mr_btree_lock);
202 	/* Start with index 1 as the 1st entry is always NULL */
203 	for (i = 1; i < bt->len; i++) {
204 		mr = &bt->table[i];
205 		ibv_mr = mr->verb_obj;
206 		ibv_dereg_mr(ibv_mr);
207 	}
208 	bt->len = 1;
209 	rte_spinlock_unlock(&priv->mr_btree_lock);
210 }
211 
212 /*
213  * Expand the MR cache.
214  * MR cache is maintained as a btree and expand on demand.
215  */
216 static int
mana_mr_btree_expand(struct mana_mr_btree * bt,int n)217 mana_mr_btree_expand(struct mana_mr_btree *bt, int n)
218 {
219 	void *mem;
220 
221 	mem = rte_realloc_socket(bt->table, n * sizeof(struct mana_mr_cache),
222 				 0, bt->socket);
223 	if (!mem) {
224 		DP_LOG(ERR, "Failed to expand btree size %d", n);
225 		return -1;
226 	}
227 
228 	DP_LOG(ERR, "Expanded btree to size %d", n);
229 	bt->table = mem;
230 	bt->size = n;
231 
232 	return 0;
233 }
234 
235 /*
236  * Look for a region of memory in MR cache.
237  */
mana_mr_btree_lookup(struct mana_mr_btree * bt,uint16_t * idx,uintptr_t addr,size_t len,struct mana_mr_cache ** cache)238 int mana_mr_btree_lookup(struct mana_mr_btree *bt, uint16_t *idx,
239 			 uintptr_t addr, size_t len,
240 			 struct mana_mr_cache **cache)
241 {
242 	struct mana_mr_cache *table;
243 	uint16_t n;
244 	uint16_t base = 0;
245 	int ret;
246 
247 	*cache = NULL;
248 
249 	n = bt->len;
250 	/* Try to double the cache if it's full */
251 	if (n == bt->size) {
252 		ret = mana_mr_btree_expand(bt, bt->size << 1);
253 		if (ret)
254 			return ret;
255 	}
256 
257 	table = bt->table;
258 
259 	/* Do binary search on addr */
260 	do {
261 		uint16_t delta = n >> 1;
262 
263 		if (addr < table[base + delta].addr) {
264 			n = delta;
265 		} else {
266 			base += delta;
267 			n -= delta;
268 		}
269 	} while (n > 1);
270 
271 	*idx = base;
272 
273 	if (addr + len <= table[base].addr + table[base].len) {
274 		*cache = &table[base];
275 		return 0;
276 	}
277 
278 	DP_LOG(DEBUG,
279 	       "addr 0x%" PRIxPTR " len %zu idx %u sum 0x%" PRIxPTR " not found",
280 	       addr, len, *idx, addr + len);
281 
282 	return 0;
283 }
284 
285 int
mana_mr_btree_init(struct mana_mr_btree * bt,int n,int socket)286 mana_mr_btree_init(struct mana_mr_btree *bt, int n, int socket)
287 {
288 	memset(bt, 0, sizeof(*bt));
289 	bt->table = rte_calloc_socket("MANA B-tree table",
290 				      n,
291 				      sizeof(struct mana_mr_cache),
292 				      0, socket);
293 	if (!bt->table) {
294 		DRV_LOG(ERR, "Failed to allocate B-tree n %d socket %d",
295 			n, socket);
296 		return -ENOMEM;
297 	}
298 
299 	bt->socket = socket;
300 	bt->size = n;
301 
302 	/* First entry must be NULL for binary search to work */
303 	bt->table[0] = (struct mana_mr_cache) {
304 		.lkey = UINT32_MAX,
305 	};
306 	bt->len = 1;
307 
308 	DRV_LOG(ERR, "B-tree initialized table %p size %d len %d",
309 		bt->table, n, bt->len);
310 
311 	return 0;
312 }
313 
314 void
mana_mr_btree_free(struct mana_mr_btree * bt)315 mana_mr_btree_free(struct mana_mr_btree *bt)
316 {
317 	rte_free(bt->table);
318 	memset(bt, 0, sizeof(*bt));
319 }
320 
321 int
mana_mr_btree_insert(struct mana_mr_btree * bt,struct mana_mr_cache * entry)322 mana_mr_btree_insert(struct mana_mr_btree *bt, struct mana_mr_cache *entry)
323 {
324 	struct mana_mr_cache *table;
325 	uint16_t idx = 0;
326 	uint16_t shift;
327 	int ret;
328 
329 	ret = mana_mr_btree_lookup(bt, &idx, entry->addr, entry->len, &table);
330 	if (ret)
331 		return ret;
332 
333 	if (table) {
334 		DP_LOG(DEBUG, "Addr 0x%" PRIxPTR " len %zu exists in btree",
335 		       entry->addr, entry->len);
336 		return 0;
337 	}
338 
339 	if (bt->len >= bt->size) {
340 		DP_LOG(ERR, "Btree overflow detected len %u size %u",
341 		       bt->len, bt->size);
342 		bt->overflow = 1;
343 		return -1;
344 	}
345 
346 	table = bt->table;
347 
348 	idx++;
349 	shift = (bt->len - idx) * sizeof(struct mana_mr_cache);
350 	if (shift) {
351 		DP_LOG(DEBUG, "Moving %u bytes from idx %u to %u",
352 		       shift, idx, idx + 1);
353 		memmove(&table[idx + 1], &table[idx], shift);
354 	}
355 
356 	table[idx] = *entry;
357 	bt->len++;
358 
359 	DP_LOG(DEBUG,
360 	       "Inserted MR b-tree table %p idx %d addr 0x%" PRIxPTR " len %zu",
361 	       table, idx, entry->addr, entry->len);
362 
363 	return 0;
364 }
365