xref: /dpdk/drivers/net/mana/mr.c (revision eb704df7e27df838ba7ec9bcd034bf0aaee405cd)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2022 Microsoft Corporation
3  */
4 
5 #include <rte_malloc.h>
6 #include <ethdev_driver.h>
7 #include <rte_eal_paging.h>
8 
9 #include <infiniband/verbs.h>
10 
11 #include "mana.h"
12 
13 struct mana_range {
14 	uintptr_t	start;
15 	uintptr_t	end;
16 	uint32_t	len;
17 };
18 
19 void
20 mana_mempool_chunk_cb(struct rte_mempool *mp __rte_unused, void *opaque,
21 		      struct rte_mempool_memhdr *memhdr, unsigned int idx)
22 {
23 	struct mana_range *ranges = opaque;
24 	struct mana_range *range = &ranges[idx];
25 	uint64_t page_size = rte_mem_page_size();
26 
27 	range->start = RTE_ALIGN_FLOOR((uintptr_t)memhdr->addr, page_size);
28 	range->end = RTE_ALIGN_CEIL((uintptr_t)memhdr->addr + memhdr->len,
29 				    page_size);
30 	range->len = range->end - range->start;
31 }
32 
33 /*
34  * Register all memory regions from pool.
35  */
36 int
37 mana_new_pmd_mr(struct mana_mr_btree *local_tree, struct mana_priv *priv,
38 		struct rte_mempool *pool)
39 {
40 	struct ibv_mr *ibv_mr;
41 	struct mana_range ranges[pool->nb_mem_chunks];
42 	uint32_t i;
43 	struct mana_mr_cache *mr;
44 	int ret;
45 
46 	rte_mempool_mem_iter(pool, mana_mempool_chunk_cb, ranges);
47 
48 	for (i = 0; i < pool->nb_mem_chunks; i++) {
49 		if (ranges[i].len > priv->max_mr_size) {
50 			DP_LOG(ERR, "memory chunk size %u exceeding max MR",
51 			       ranges[i].len);
52 			return -ENOMEM;
53 		}
54 
55 		DP_LOG(DEBUG,
56 		       "registering memory chunk start 0x%" PRIxPTR " len %u",
57 		       ranges[i].start, ranges[i].len);
58 
59 		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
60 			/* Send a message to the primary to do MR */
61 			ret = mana_mp_req_mr_create(priv, ranges[i].start,
62 						    ranges[i].len);
63 			if (ret) {
64 				DP_LOG(ERR,
65 				       "MR failed start 0x%" PRIxPTR " len %u",
66 				       ranges[i].start, ranges[i].len);
67 				return ret;
68 			}
69 			continue;
70 		}
71 
72 		ibv_mr = ibv_reg_mr(priv->ib_pd, (void *)ranges[i].start,
73 				    ranges[i].len, IBV_ACCESS_LOCAL_WRITE);
74 		if (ibv_mr) {
75 			DP_LOG(DEBUG, "MR lkey %u addr %p len %zu",
76 			       ibv_mr->lkey, ibv_mr->addr, ibv_mr->length);
77 
78 			mr = rte_calloc("MANA MR", 1, sizeof(*mr), 0);
79 			mr->lkey = ibv_mr->lkey;
80 			mr->addr = (uintptr_t)ibv_mr->addr;
81 			mr->len = ibv_mr->length;
82 			mr->verb_obj = ibv_mr;
83 
84 			rte_spinlock_lock(&priv->mr_btree_lock);
85 			ret = mana_mr_btree_insert(&priv->mr_btree, mr);
86 			rte_spinlock_unlock(&priv->mr_btree_lock);
87 			if (ret) {
88 				ibv_dereg_mr(ibv_mr);
89 				DP_LOG(ERR, "Failed to add to global MR btree");
90 				return ret;
91 			}
92 
93 			ret = mana_mr_btree_insert(local_tree, mr);
94 			if (ret) {
95 				/* Don't need to clean up MR as it's already
96 				 * in the global tree
97 				 */
98 				DP_LOG(ERR, "Failed to add to local MR btree");
99 				return ret;
100 			}
101 		} else {
102 			DP_LOG(ERR, "MR failed at 0x%" PRIxPTR " len %u",
103 			       ranges[i].start, ranges[i].len);
104 			return -errno;
105 		}
106 	}
107 	return 0;
108 }
109 
110 /*
111  * Deregister a MR.
112  */
113 void
114 mana_del_pmd_mr(struct mana_mr_cache *mr)
115 {
116 	int ret;
117 	struct ibv_mr *ibv_mr = (struct ibv_mr *)mr->verb_obj;
118 
119 	ret = ibv_dereg_mr(ibv_mr);
120 	if (ret)
121 		DP_LOG(ERR, "dereg MR failed ret %d", ret);
122 }
123 
124 /*
125  * Alloc a MR.
126  * Try to find a MR in the cache. If not found, register a new MR.
127  */
128 struct mana_mr_cache *
129 mana_alloc_pmd_mr(struct mana_mr_btree *local_mr_btree, struct mana_priv *priv,
130 		  struct rte_mbuf *mbuf)
131 {
132 	struct rte_mempool *pool = mbuf->pool;
133 	int ret, second_try = 0;
134 	struct mana_mr_cache *mr;
135 	uint16_t idx;
136 
137 	DP_LOG(DEBUG, "finding mr for mbuf addr %p len %d",
138 	       mbuf->buf_addr, mbuf->buf_len);
139 
140 try_again:
141 	/* First try to find the MR in local queue tree */
142 	mr = mana_mr_btree_lookup(local_mr_btree, &idx,
143 				  (uintptr_t)mbuf->buf_addr, mbuf->buf_len);
144 	if (mr) {
145 		DP_LOG(DEBUG, "Local mr lkey %u addr 0x%" PRIxPTR " len %zu",
146 		       mr->lkey, mr->addr, mr->len);
147 		return mr;
148 	}
149 
150 	/* If not found, try to find the MR in global tree */
151 	rte_spinlock_lock(&priv->mr_btree_lock);
152 	mr = mana_mr_btree_lookup(&priv->mr_btree, &idx,
153 				  (uintptr_t)mbuf->buf_addr,
154 				  mbuf->buf_len);
155 	rte_spinlock_unlock(&priv->mr_btree_lock);
156 
157 	/* If found in the global tree, add it to the local tree */
158 	if (mr) {
159 		ret = mana_mr_btree_insert(local_mr_btree, mr);
160 		if (ret) {
161 			DP_LOG(ERR, "Failed to add MR to local tree.");
162 			return NULL;
163 		}
164 
165 		DP_LOG(DEBUG,
166 		       "Added local MR key %u addr 0x%" PRIxPTR " len %zu",
167 		       mr->lkey, mr->addr, mr->len);
168 		return mr;
169 	}
170 
171 	if (second_try) {
172 		DP_LOG(ERR, "Internal error second try failed");
173 		return NULL;
174 	}
175 
176 	ret = mana_new_pmd_mr(local_mr_btree, priv, pool);
177 	if (ret) {
178 		DP_LOG(ERR, "Failed to allocate MR ret %d addr %p len %d",
179 		       ret, mbuf->buf_addr, mbuf->buf_len);
180 		return NULL;
181 	}
182 
183 	second_try = 1;
184 	goto try_again;
185 }
186 
187 void
188 mana_remove_all_mr(struct mana_priv *priv)
189 {
190 	struct mana_mr_btree *bt = &priv->mr_btree;
191 	struct mana_mr_cache *mr;
192 	struct ibv_mr *ibv_mr;
193 	uint16_t i;
194 
195 	rte_spinlock_lock(&priv->mr_btree_lock);
196 	/* Start with index 1 as the 1st entry is always NULL */
197 	for (i = 1; i < bt->len; i++) {
198 		mr = &bt->table[i];
199 		ibv_mr = mr->verb_obj;
200 		ibv_dereg_mr(ibv_mr);
201 	}
202 	bt->len = 1;
203 	rte_spinlock_unlock(&priv->mr_btree_lock);
204 }
205 
206 /*
207  * Expand the MR cache.
208  * MR cache is maintained as a btree and expand on demand.
209  */
210 static int
211 mana_mr_btree_expand(struct mana_mr_btree *bt, int n)
212 {
213 	void *mem;
214 
215 	mem = rte_realloc_socket(bt->table, n * sizeof(struct mana_mr_cache),
216 				 0, bt->socket);
217 	if (!mem) {
218 		DP_LOG(ERR, "Failed to expand btree size %d", n);
219 		return -1;
220 	}
221 
222 	DP_LOG(ERR, "Expanded btree to size %d", n);
223 	bt->table = mem;
224 	bt->size = n;
225 
226 	return 0;
227 }
228 
229 /*
230  * Look for a region of memory in MR cache.
231  */
232 struct mana_mr_cache *
233 mana_mr_btree_lookup(struct mana_mr_btree *bt, uint16_t *idx,
234 		     uintptr_t addr, size_t len)
235 {
236 	struct mana_mr_cache *table;
237 	uint16_t n;
238 	uint16_t base = 0;
239 	int ret;
240 
241 	n = bt->len;
242 
243 	/* Try to double the cache if it's full */
244 	if (n == bt->size) {
245 		ret = mana_mr_btree_expand(bt, bt->size << 1);
246 		if (ret)
247 			return NULL;
248 	}
249 
250 	table = bt->table;
251 
252 	/* Do binary search on addr */
253 	do {
254 		uint16_t delta = n >> 1;
255 
256 		if (addr < table[base + delta].addr) {
257 			n = delta;
258 		} else {
259 			base += delta;
260 			n -= delta;
261 		}
262 	} while (n > 1);
263 
264 	*idx = base;
265 
266 	if (addr + len <= table[base].addr + table[base].len)
267 		return &table[base];
268 
269 	DP_LOG(DEBUG,
270 	       "addr 0x%" PRIxPTR " len %zu idx %u sum 0x%" PRIxPTR " not found",
271 	       addr, len, *idx, addr + len);
272 
273 	return NULL;
274 }
275 
276 int
277 mana_mr_btree_init(struct mana_mr_btree *bt, int n, int socket)
278 {
279 	memset(bt, 0, sizeof(*bt));
280 	bt->table = rte_calloc_socket("MANA B-tree table",
281 				      n,
282 				      sizeof(struct mana_mr_cache),
283 				      0, socket);
284 	if (!bt->table) {
285 		DRV_LOG(ERR, "Failed to allocate B-tree n %d socket %d",
286 			n, socket);
287 		return -ENOMEM;
288 	}
289 
290 	bt->socket = socket;
291 	bt->size = n;
292 
293 	/* First entry must be NULL for binary search to work */
294 	bt->table[0] = (struct mana_mr_cache) {
295 		.lkey = UINT32_MAX,
296 	};
297 	bt->len = 1;
298 
299 	DRV_LOG(ERR, "B-tree initialized table %p size %d len %d",
300 		bt->table, n, bt->len);
301 
302 	return 0;
303 }
304 
305 void
306 mana_mr_btree_free(struct mana_mr_btree *bt)
307 {
308 	rte_free(bt->table);
309 	memset(bt, 0, sizeof(*bt));
310 }
311 
312 int
313 mana_mr_btree_insert(struct mana_mr_btree *bt, struct mana_mr_cache *entry)
314 {
315 	struct mana_mr_cache *table;
316 	uint16_t idx = 0;
317 	uint16_t shift;
318 
319 	if (mana_mr_btree_lookup(bt, &idx, entry->addr, entry->len)) {
320 		DP_LOG(DEBUG, "Addr 0x%" PRIxPTR " len %zu exists in btree",
321 		       entry->addr, entry->len);
322 		return 0;
323 	}
324 
325 	if (bt->len >= bt->size) {
326 		bt->overflow = 1;
327 		return -1;
328 	}
329 
330 	table = bt->table;
331 
332 	idx++;
333 	shift = (bt->len - idx) * sizeof(struct mana_mr_cache);
334 	if (shift) {
335 		DP_LOG(DEBUG, "Moving %u bytes from idx %u to %u",
336 		       shift, idx, idx + 1);
337 		memmove(&table[idx + 1], &table[idx], shift);
338 	}
339 
340 	table[idx] = *entry;
341 	bt->len++;
342 
343 	DP_LOG(DEBUG,
344 	       "Inserted MR b-tree table %p idx %d addr 0x%" PRIxPTR " len %zu",
345 	       table, idx, entry->addr, entry->len);
346 
347 	return 0;
348 }
349