1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2022 Microsoft Corporation 3 */ 4 5 #include <rte_malloc.h> 6 #include <ethdev_driver.h> 7 #include <rte_eal_paging.h> 8 9 #include <infiniband/verbs.h> 10 11 #include "mana.h" 12 13 struct mana_range { 14 uintptr_t start; 15 uintptr_t end; 16 uint32_t len; 17 }; 18 19 void 20 mana_mempool_chunk_cb(struct rte_mempool *mp __rte_unused, void *opaque, 21 struct rte_mempool_memhdr *memhdr, unsigned int idx) 22 { 23 struct mana_range *ranges = opaque; 24 struct mana_range *range = &ranges[idx]; 25 uint64_t page_size = rte_mem_page_size(); 26 27 range->start = RTE_ALIGN_FLOOR((uintptr_t)memhdr->addr, page_size); 28 range->end = RTE_ALIGN_CEIL((uintptr_t)memhdr->addr + memhdr->len, 29 page_size); 30 range->len = range->end - range->start; 31 } 32 33 /* 34 * Register all memory regions from pool. 35 */ 36 int 37 mana_new_pmd_mr(struct mana_mr_btree *local_tree, struct mana_priv *priv, 38 struct rte_mempool *pool) 39 { 40 struct ibv_mr *ibv_mr; 41 struct mana_range ranges[pool->nb_mem_chunks]; 42 uint32_t i; 43 struct mana_mr_cache *mr; 44 int ret; 45 46 rte_mempool_mem_iter(pool, mana_mempool_chunk_cb, ranges); 47 48 for (i = 0; i < pool->nb_mem_chunks; i++) { 49 if (ranges[i].len > priv->max_mr_size) { 50 DP_LOG(ERR, "memory chunk size %u exceeding max MR", 51 ranges[i].len); 52 return -ENOMEM; 53 } 54 55 DP_LOG(DEBUG, 56 "registering memory chunk start 0x%" PRIxPTR " len %u", 57 ranges[i].start, ranges[i].len); 58 59 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 60 /* Send a message to the primary to do MR */ 61 ret = mana_mp_req_mr_create(priv, ranges[i].start, 62 ranges[i].len); 63 if (ret) { 64 DP_LOG(ERR, 65 "MR failed start 0x%" PRIxPTR " len %u", 66 ranges[i].start, ranges[i].len); 67 return ret; 68 } 69 continue; 70 } 71 72 ibv_mr = ibv_reg_mr(priv->ib_pd, (void *)ranges[i].start, 73 ranges[i].len, IBV_ACCESS_LOCAL_WRITE); 74 if (ibv_mr) { 75 DP_LOG(DEBUG, "MR lkey %u addr %p len %zu", 76 ibv_mr->lkey, ibv_mr->addr, ibv_mr->length); 77 78 mr = rte_calloc("MANA MR", 1, sizeof(*mr), 0); 79 mr->lkey = ibv_mr->lkey; 80 mr->addr = (uintptr_t)ibv_mr->addr; 81 mr->len = ibv_mr->length; 82 mr->verb_obj = ibv_mr; 83 84 rte_spinlock_lock(&priv->mr_btree_lock); 85 ret = mana_mr_btree_insert(&priv->mr_btree, mr); 86 rte_spinlock_unlock(&priv->mr_btree_lock); 87 if (ret) { 88 ibv_dereg_mr(ibv_mr); 89 DP_LOG(ERR, "Failed to add to global MR btree"); 90 return ret; 91 } 92 93 ret = mana_mr_btree_insert(local_tree, mr); 94 if (ret) { 95 /* Don't need to clean up MR as it's already 96 * in the global tree 97 */ 98 DP_LOG(ERR, "Failed to add to local MR btree"); 99 return ret; 100 } 101 } else { 102 DP_LOG(ERR, "MR failed at 0x%" PRIxPTR " len %u", 103 ranges[i].start, ranges[i].len); 104 return -errno; 105 } 106 } 107 return 0; 108 } 109 110 /* 111 * Deregister a MR. 112 */ 113 void 114 mana_del_pmd_mr(struct mana_mr_cache *mr) 115 { 116 int ret; 117 struct ibv_mr *ibv_mr = (struct ibv_mr *)mr->verb_obj; 118 119 ret = ibv_dereg_mr(ibv_mr); 120 if (ret) 121 DP_LOG(ERR, "dereg MR failed ret %d", ret); 122 } 123 124 /* 125 * Find a MR from cache. If not found, register a new MR. 126 */ 127 struct mana_mr_cache * 128 mana_find_pmd_mr(struct mana_mr_btree *local_mr_btree, struct mana_priv *priv, 129 struct rte_mbuf *mbuf) 130 { 131 struct rte_mempool *pool = mbuf->pool; 132 int ret, second_try = 0; 133 struct mana_mr_cache *mr; 134 uint16_t idx; 135 136 DP_LOG(DEBUG, "finding mr for mbuf addr %p len %d", 137 mbuf->buf_addr, mbuf->buf_len); 138 139 try_again: 140 /* First try to find the MR in local queue tree */ 141 mr = mana_mr_btree_lookup(local_mr_btree, &idx, 142 (uintptr_t)mbuf->buf_addr, mbuf->buf_len); 143 if (mr) { 144 DP_LOG(DEBUG, "Local mr lkey %u addr 0x%" PRIxPTR " len %zu", 145 mr->lkey, mr->addr, mr->len); 146 return mr; 147 } 148 149 /* If not found, try to find the MR in global tree */ 150 rte_spinlock_lock(&priv->mr_btree_lock); 151 mr = mana_mr_btree_lookup(&priv->mr_btree, &idx, 152 (uintptr_t)mbuf->buf_addr, 153 mbuf->buf_len); 154 rte_spinlock_unlock(&priv->mr_btree_lock); 155 156 /* If found in the global tree, add it to the local tree */ 157 if (mr) { 158 ret = mana_mr_btree_insert(local_mr_btree, mr); 159 if (ret) { 160 DP_LOG(ERR, "Failed to add MR to local tree."); 161 return NULL; 162 } 163 164 DP_LOG(DEBUG, 165 "Added local MR key %u addr 0x%" PRIxPTR " len %zu", 166 mr->lkey, mr->addr, mr->len); 167 return mr; 168 } 169 170 if (second_try) { 171 DP_LOG(ERR, "Internal error second try failed"); 172 return NULL; 173 } 174 175 ret = mana_new_pmd_mr(local_mr_btree, priv, pool); 176 if (ret) { 177 DP_LOG(ERR, "Failed to allocate MR ret %d addr %p len %d", 178 ret, mbuf->buf_addr, mbuf->buf_len); 179 return NULL; 180 } 181 182 second_try = 1; 183 goto try_again; 184 } 185 186 void 187 mana_remove_all_mr(struct mana_priv *priv) 188 { 189 struct mana_mr_btree *bt = &priv->mr_btree; 190 struct mana_mr_cache *mr; 191 struct ibv_mr *ibv_mr; 192 uint16_t i; 193 194 rte_spinlock_lock(&priv->mr_btree_lock); 195 /* Start with index 1 as the 1st entry is always NULL */ 196 for (i = 1; i < bt->len; i++) { 197 mr = &bt->table[i]; 198 ibv_mr = mr->verb_obj; 199 ibv_dereg_mr(ibv_mr); 200 } 201 bt->len = 1; 202 rte_spinlock_unlock(&priv->mr_btree_lock); 203 } 204 205 /* 206 * Expand the MR cache. 207 * MR cache is maintained as a btree and expand on demand. 208 */ 209 static int 210 mana_mr_btree_expand(struct mana_mr_btree *bt, int n) 211 { 212 void *mem; 213 214 mem = rte_realloc_socket(bt->table, n * sizeof(struct mana_mr_cache), 215 0, bt->socket); 216 if (!mem) { 217 DP_LOG(ERR, "Failed to expand btree size %d", n); 218 return -1; 219 } 220 221 DP_LOG(ERR, "Expanded btree to size %d", n); 222 bt->table = mem; 223 bt->size = n; 224 225 return 0; 226 } 227 228 /* 229 * Look for a region of memory in MR cache. 230 */ 231 struct mana_mr_cache * 232 mana_mr_btree_lookup(struct mana_mr_btree *bt, uint16_t *idx, 233 uintptr_t addr, size_t len) 234 { 235 struct mana_mr_cache *table; 236 uint16_t n; 237 uint16_t base = 0; 238 int ret; 239 240 n = bt->len; 241 242 /* Try to double the cache if it's full */ 243 if (n == bt->size) { 244 ret = mana_mr_btree_expand(bt, bt->size << 1); 245 if (ret) 246 return NULL; 247 } 248 249 table = bt->table; 250 251 /* Do binary search on addr */ 252 do { 253 uint16_t delta = n >> 1; 254 255 if (addr < table[base + delta].addr) { 256 n = delta; 257 } else { 258 base += delta; 259 n -= delta; 260 } 261 } while (n > 1); 262 263 *idx = base; 264 265 if (addr + len <= table[base].addr + table[base].len) 266 return &table[base]; 267 268 DP_LOG(DEBUG, 269 "addr 0x%" PRIxPTR " len %zu idx %u sum 0x%" PRIxPTR " not found", 270 addr, len, *idx, addr + len); 271 272 return NULL; 273 } 274 275 int 276 mana_mr_btree_init(struct mana_mr_btree *bt, int n, int socket) 277 { 278 memset(bt, 0, sizeof(*bt)); 279 bt->table = rte_calloc_socket("MANA B-tree table", 280 n, 281 sizeof(struct mana_mr_cache), 282 0, socket); 283 if (!bt->table) { 284 DRV_LOG(ERR, "Failed to allocate B-tree n %d socket %d", 285 n, socket); 286 return -ENOMEM; 287 } 288 289 bt->socket = socket; 290 bt->size = n; 291 292 /* First entry must be NULL for binary search to work */ 293 bt->table[0] = (struct mana_mr_cache) { 294 .lkey = UINT32_MAX, 295 }; 296 bt->len = 1; 297 298 DRV_LOG(ERR, "B-tree initialized table %p size %d len %d", 299 bt->table, n, bt->len); 300 301 return 0; 302 } 303 304 void 305 mana_mr_btree_free(struct mana_mr_btree *bt) 306 { 307 rte_free(bt->table); 308 memset(bt, 0, sizeof(*bt)); 309 } 310 311 int 312 mana_mr_btree_insert(struct mana_mr_btree *bt, struct mana_mr_cache *entry) 313 { 314 struct mana_mr_cache *table; 315 uint16_t idx = 0; 316 uint16_t shift; 317 318 if (mana_mr_btree_lookup(bt, &idx, entry->addr, entry->len)) { 319 DP_LOG(DEBUG, "Addr 0x%" PRIxPTR " len %zu exists in btree", 320 entry->addr, entry->len); 321 return 0; 322 } 323 324 if (bt->len >= bt->size) { 325 bt->overflow = 1; 326 return -1; 327 } 328 329 table = bt->table; 330 331 idx++; 332 shift = (bt->len - idx) * sizeof(struct mana_mr_cache); 333 if (shift) { 334 DP_LOG(DEBUG, "Moving %u bytes from idx %u to %u", 335 shift, idx, idx + 1); 336 memmove(&table[idx + 1], &table[idx], shift); 337 } 338 339 table[idx] = *entry; 340 bt->len++; 341 342 DP_LOG(DEBUG, 343 "Inserted MR b-tree table %p idx %d addr 0x%" PRIxPTR " len %zu", 344 table, idx, entry->addr, entry->len); 345 346 return 0; 347 } 348