xref: /dpdk/drivers/net/mana/mr.c (revision 3c4898ef762eeb2578b9ae3d7f6e3a0e5cbca8c8)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2022 Microsoft Corporation
3  */
4 
5 #include <rte_malloc.h>
6 #include <ethdev_driver.h>
7 #include <rte_eal_paging.h>
8 
9 #include <infiniband/verbs.h>
10 
11 #include "mana.h"
12 
13 struct mana_range {
14 	uintptr_t	start;
15 	uintptr_t	end;
16 	uint32_t	len;
17 };
18 
19 void
20 mana_mempool_chunk_cb(struct rte_mempool *mp __rte_unused, void *opaque,
21 		      struct rte_mempool_memhdr *memhdr, unsigned int idx)
22 {
23 	struct mana_range *ranges = opaque;
24 	struct mana_range *range = &ranges[idx];
25 	uint64_t page_size = rte_mem_page_size();
26 
27 	range->start = RTE_ALIGN_FLOOR((uintptr_t)memhdr->addr, page_size);
28 	range->end = RTE_ALIGN_CEIL((uintptr_t)memhdr->addr + memhdr->len,
29 				    page_size);
30 	range->len = range->end - range->start;
31 }
32 
33 /*
34  * Register all memory regions from pool.
35  */
36 int
37 mana_new_pmd_mr(struct mana_mr_btree *local_tree, struct mana_priv *priv,
38 		struct rte_mempool *pool)
39 {
40 	struct ibv_mr *ibv_mr;
41 	struct mana_range ranges[pool->nb_mem_chunks];
42 	uint32_t i;
43 	struct mana_mr_cache *mr;
44 	int ret;
45 
46 	rte_mempool_mem_iter(pool, mana_mempool_chunk_cb, ranges);
47 
48 	for (i = 0; i < pool->nb_mem_chunks; i++) {
49 		if (ranges[i].len > priv->max_mr_size) {
50 			DP_LOG(ERR, "memory chunk size %u exceeding max MR",
51 			       ranges[i].len);
52 			return -ENOMEM;
53 		}
54 
55 		DP_LOG(DEBUG,
56 		       "registering memory chunk start 0x%" PRIxPTR " len %u",
57 		       ranges[i].start, ranges[i].len);
58 
59 		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
60 			/* Send a message to the primary to do MR */
61 			ret = mana_mp_req_mr_create(priv, ranges[i].start,
62 						    ranges[i].len);
63 			if (ret) {
64 				DP_LOG(ERR,
65 				       "MR failed start 0x%" PRIxPTR " len %u",
66 				       ranges[i].start, ranges[i].len);
67 				return ret;
68 			}
69 			continue;
70 		}
71 
72 		ibv_mr = ibv_reg_mr(priv->ib_pd, (void *)ranges[i].start,
73 				    ranges[i].len, IBV_ACCESS_LOCAL_WRITE);
74 		if (ibv_mr) {
75 			DP_LOG(DEBUG, "MR lkey %u addr %p len %zu",
76 			       ibv_mr->lkey, ibv_mr->addr, ibv_mr->length);
77 
78 			mr = rte_calloc("MANA MR", 1, sizeof(*mr), 0);
79 			mr->lkey = ibv_mr->lkey;
80 			mr->addr = (uintptr_t)ibv_mr->addr;
81 			mr->len = ibv_mr->length;
82 			mr->verb_obj = ibv_mr;
83 
84 			rte_spinlock_lock(&priv->mr_btree_lock);
85 			ret = mana_mr_btree_insert(&priv->mr_btree, mr);
86 			rte_spinlock_unlock(&priv->mr_btree_lock);
87 			if (ret) {
88 				ibv_dereg_mr(ibv_mr);
89 				DP_LOG(ERR, "Failed to add to global MR btree");
90 				return ret;
91 			}
92 
93 			ret = mana_mr_btree_insert(local_tree, mr);
94 			if (ret) {
95 				/* Don't need to clean up MR as it's already
96 				 * in the global tree
97 				 */
98 				DP_LOG(ERR, "Failed to add to local MR btree");
99 				return ret;
100 			}
101 		} else {
102 			DP_LOG(ERR, "MR failed at 0x%" PRIxPTR " len %u",
103 			       ranges[i].start, ranges[i].len);
104 			return -errno;
105 		}
106 	}
107 	return 0;
108 }
109 
110 /*
111  * Deregister a MR.
112  */
113 void
114 mana_del_pmd_mr(struct mana_mr_cache *mr)
115 {
116 	int ret;
117 	struct ibv_mr *ibv_mr = (struct ibv_mr *)mr->verb_obj;
118 
119 	ret = ibv_dereg_mr(ibv_mr);
120 	if (ret)
121 		DP_LOG(ERR, "dereg MR failed ret %d", ret);
122 }
123 
124 /*
125  * Find a MR from cache. If not found, register a new MR.
126  */
127 struct mana_mr_cache *
128 mana_find_pmd_mr(struct mana_mr_btree *local_mr_btree, struct mana_priv *priv,
129 		 struct rte_mbuf *mbuf)
130 {
131 	struct rte_mempool *pool = mbuf->pool;
132 	int ret, second_try = 0;
133 	struct mana_mr_cache *mr;
134 	uint16_t idx;
135 
136 	DP_LOG(DEBUG, "finding mr for mbuf addr %p len %d",
137 	       mbuf->buf_addr, mbuf->buf_len);
138 
139 try_again:
140 	/* First try to find the MR in local queue tree */
141 	mr = mana_mr_btree_lookup(local_mr_btree, &idx,
142 				  (uintptr_t)mbuf->buf_addr, mbuf->buf_len);
143 	if (mr) {
144 		DP_LOG(DEBUG, "Local mr lkey %u addr 0x%" PRIxPTR " len %zu",
145 		       mr->lkey, mr->addr, mr->len);
146 		return mr;
147 	}
148 
149 	/* If not found, try to find the MR in global tree */
150 	rte_spinlock_lock(&priv->mr_btree_lock);
151 	mr = mana_mr_btree_lookup(&priv->mr_btree, &idx,
152 				  (uintptr_t)mbuf->buf_addr,
153 				  mbuf->buf_len);
154 	rte_spinlock_unlock(&priv->mr_btree_lock);
155 
156 	/* If found in the global tree, add it to the local tree */
157 	if (mr) {
158 		ret = mana_mr_btree_insert(local_mr_btree, mr);
159 		if (ret) {
160 			DP_LOG(ERR, "Failed to add MR to local tree.");
161 			return NULL;
162 		}
163 
164 		DP_LOG(DEBUG,
165 		       "Added local MR key %u addr 0x%" PRIxPTR " len %zu",
166 		       mr->lkey, mr->addr, mr->len);
167 		return mr;
168 	}
169 
170 	if (second_try) {
171 		DP_LOG(ERR, "Internal error second try failed");
172 		return NULL;
173 	}
174 
175 	ret = mana_new_pmd_mr(local_mr_btree, priv, pool);
176 	if (ret) {
177 		DP_LOG(ERR, "Failed to allocate MR ret %d addr %p len %d",
178 		       ret, mbuf->buf_addr, mbuf->buf_len);
179 		return NULL;
180 	}
181 
182 	second_try = 1;
183 	goto try_again;
184 }
185 
186 void
187 mana_remove_all_mr(struct mana_priv *priv)
188 {
189 	struct mana_mr_btree *bt = &priv->mr_btree;
190 	struct mana_mr_cache *mr;
191 	struct ibv_mr *ibv_mr;
192 	uint16_t i;
193 
194 	rte_spinlock_lock(&priv->mr_btree_lock);
195 	/* Start with index 1 as the 1st entry is always NULL */
196 	for (i = 1; i < bt->len; i++) {
197 		mr = &bt->table[i];
198 		ibv_mr = mr->verb_obj;
199 		ibv_dereg_mr(ibv_mr);
200 	}
201 	bt->len = 1;
202 	rte_spinlock_unlock(&priv->mr_btree_lock);
203 }
204 
205 /*
206  * Expand the MR cache.
207  * MR cache is maintained as a btree and expand on demand.
208  */
209 static int
210 mana_mr_btree_expand(struct mana_mr_btree *bt, int n)
211 {
212 	void *mem;
213 
214 	mem = rte_realloc_socket(bt->table, n * sizeof(struct mana_mr_cache),
215 				 0, bt->socket);
216 	if (!mem) {
217 		DP_LOG(ERR, "Failed to expand btree size %d", n);
218 		return -1;
219 	}
220 
221 	DP_LOG(ERR, "Expanded btree to size %d", n);
222 	bt->table = mem;
223 	bt->size = n;
224 
225 	return 0;
226 }
227 
228 /*
229  * Look for a region of memory in MR cache.
230  */
231 struct mana_mr_cache *
232 mana_mr_btree_lookup(struct mana_mr_btree *bt, uint16_t *idx,
233 		     uintptr_t addr, size_t len)
234 {
235 	struct mana_mr_cache *table;
236 	uint16_t n;
237 	uint16_t base = 0;
238 	int ret;
239 
240 	n = bt->len;
241 
242 	/* Try to double the cache if it's full */
243 	if (n == bt->size) {
244 		ret = mana_mr_btree_expand(bt, bt->size << 1);
245 		if (ret)
246 			return NULL;
247 	}
248 
249 	table = bt->table;
250 
251 	/* Do binary search on addr */
252 	do {
253 		uint16_t delta = n >> 1;
254 
255 		if (addr < table[base + delta].addr) {
256 			n = delta;
257 		} else {
258 			base += delta;
259 			n -= delta;
260 		}
261 	} while (n > 1);
262 
263 	*idx = base;
264 
265 	if (addr + len <= table[base].addr + table[base].len)
266 		return &table[base];
267 
268 	DP_LOG(DEBUG,
269 	       "addr 0x%" PRIxPTR " len %zu idx %u sum 0x%" PRIxPTR " not found",
270 	       addr, len, *idx, addr + len);
271 
272 	return NULL;
273 }
274 
275 int
276 mana_mr_btree_init(struct mana_mr_btree *bt, int n, int socket)
277 {
278 	memset(bt, 0, sizeof(*bt));
279 	bt->table = rte_calloc_socket("MANA B-tree table",
280 				      n,
281 				      sizeof(struct mana_mr_cache),
282 				      0, socket);
283 	if (!bt->table) {
284 		DRV_LOG(ERR, "Failed to allocate B-tree n %d socket %d",
285 			n, socket);
286 		return -ENOMEM;
287 	}
288 
289 	bt->socket = socket;
290 	bt->size = n;
291 
292 	/* First entry must be NULL for binary search to work */
293 	bt->table[0] = (struct mana_mr_cache) {
294 		.lkey = UINT32_MAX,
295 	};
296 	bt->len = 1;
297 
298 	DRV_LOG(ERR, "B-tree initialized table %p size %d len %d",
299 		bt->table, n, bt->len);
300 
301 	return 0;
302 }
303 
304 void
305 mana_mr_btree_free(struct mana_mr_btree *bt)
306 {
307 	rte_free(bt->table);
308 	memset(bt, 0, sizeof(*bt));
309 }
310 
311 int
312 mana_mr_btree_insert(struct mana_mr_btree *bt, struct mana_mr_cache *entry)
313 {
314 	struct mana_mr_cache *table;
315 	uint16_t idx = 0;
316 	uint16_t shift;
317 
318 	if (mana_mr_btree_lookup(bt, &idx, entry->addr, entry->len)) {
319 		DP_LOG(DEBUG, "Addr 0x%" PRIxPTR " len %zu exists in btree",
320 		       entry->addr, entry->len);
321 		return 0;
322 	}
323 
324 	if (bt->len >= bt->size) {
325 		bt->overflow = 1;
326 		return -1;
327 	}
328 
329 	table = bt->table;
330 
331 	idx++;
332 	shift = (bt->len - idx) * sizeof(struct mana_mr_cache);
333 	if (shift) {
334 		DP_LOG(DEBUG, "Moving %u bytes from idx %u to %u",
335 		       shift, idx, idx + 1);
336 		memmove(&table[idx + 1], &table[idx], shift);
337 	}
338 
339 	table[idx] = *entry;
340 	bt->len++;
341 
342 	DP_LOG(DEBUG,
343 	       "Inserted MR b-tree table %p idx %d addr 0x%" PRIxPTR " len %zu",
344 	       table, idx, entry->addr, entry->len);
345 
346 	return 0;
347 }
348