xref: /dpdk/drivers/net/mlx5/mlx5_utils.c (revision 23d6f76d31474eeaafed752e377c109f803c287b)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 
5 #include <rte_malloc.h>
6 
7 #include <mlx5_malloc.h>
8 
9 #include "mlx5_utils.h"
10 
11 /********************* Indexed pool **********************/
12 
13 static inline void
14 mlx5_ipool_lock(struct mlx5_indexed_pool *pool)
15 {
16 	if (pool->cfg.need_lock)
17 		rte_spinlock_lock(&pool->rsz_lock);
18 }
19 
20 static inline void
21 mlx5_ipool_unlock(struct mlx5_indexed_pool *pool)
22 {
23 	if (pool->cfg.need_lock)
24 		rte_spinlock_unlock(&pool->rsz_lock);
25 }
26 
27 static inline uint32_t
28 mlx5_trunk_idx_get(struct mlx5_indexed_pool *pool, uint32_t entry_idx)
29 {
30 	struct mlx5_indexed_pool_config *cfg = &pool->cfg;
31 	uint32_t trunk_idx = 0;
32 	uint32_t i;
33 
34 	if (!cfg->grow_trunk)
35 		return entry_idx / cfg->trunk_size;
36 	if (entry_idx >= pool->grow_tbl[cfg->grow_trunk - 1]) {
37 		trunk_idx = (entry_idx - pool->grow_tbl[cfg->grow_trunk - 1]) /
38 			    (cfg->trunk_size << (cfg->grow_shift *
39 			    cfg->grow_trunk)) + cfg->grow_trunk;
40 	} else {
41 		for (i = 0; i < cfg->grow_trunk; i++) {
42 			if (entry_idx < pool->grow_tbl[i])
43 				break;
44 		}
45 		trunk_idx = i;
46 	}
47 	return trunk_idx;
48 }
49 
50 static inline uint32_t
51 mlx5_trunk_size_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
52 {
53 	struct mlx5_indexed_pool_config *cfg = &pool->cfg;
54 
55 	return cfg->trunk_size << (cfg->grow_shift *
56 	       (trunk_idx > cfg->grow_trunk ? cfg->grow_trunk : trunk_idx));
57 }
58 
59 static inline uint32_t
60 mlx5_trunk_idx_offset_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
61 {
62 	struct mlx5_indexed_pool_config *cfg = &pool->cfg;
63 	uint32_t offset = 0;
64 
65 	if (!trunk_idx)
66 		return 0;
67 	if (!cfg->grow_trunk)
68 		return cfg->trunk_size * trunk_idx;
69 	if (trunk_idx < cfg->grow_trunk)
70 		offset = pool->grow_tbl[trunk_idx - 1];
71 	else
72 		offset = pool->grow_tbl[cfg->grow_trunk - 1] +
73 			 (cfg->trunk_size << (cfg->grow_shift *
74 			 cfg->grow_trunk)) * (trunk_idx - cfg->grow_trunk);
75 	return offset;
76 }
77 
78 struct mlx5_indexed_pool *
79 mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg)
80 {
81 	struct mlx5_indexed_pool *pool;
82 	uint32_t i;
83 
84 	if (!cfg || (!cfg->malloc ^ !cfg->free) ||
85 	    (cfg->per_core_cache && cfg->release_mem_en) ||
86 	    (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) ||
87 	    ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32))))
88 		return NULL;
89 	pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool) + cfg->grow_trunk *
90 			   sizeof(pool->grow_tbl[0]), RTE_CACHE_LINE_SIZE,
91 			   SOCKET_ID_ANY);
92 	if (!pool)
93 		return NULL;
94 	pool->cfg = *cfg;
95 	if (!pool->cfg.trunk_size)
96 		pool->cfg.trunk_size = MLX5_IPOOL_DEFAULT_TRUNK_SIZE;
97 	if (!cfg->malloc && !cfg->free) {
98 		pool->cfg.malloc = mlx5_malloc;
99 		pool->cfg.free = mlx5_free;
100 	}
101 	if (pool->cfg.need_lock)
102 		rte_spinlock_init(&pool->rsz_lock);
103 	/*
104 	 * Initialize the dynamic grow trunk size lookup table to have a quick
105 	 * lookup for the trunk entry index offset.
106 	 */
107 	for (i = 0; i < cfg->grow_trunk; i++) {
108 		pool->grow_tbl[i] = cfg->trunk_size << (cfg->grow_shift * i);
109 		if (i > 0)
110 			pool->grow_tbl[i] += pool->grow_tbl[i - 1];
111 	}
112 	if (!pool->cfg.max_idx)
113 		pool->cfg.max_idx =
114 			mlx5_trunk_idx_offset_get(pool, TRUNK_MAX_IDX + 1);
115 	if (!cfg->per_core_cache)
116 		pool->free_list = TRUNK_INVALID;
117 	rte_spinlock_init(&pool->lcore_lock);
118 	return pool;
119 }
120 
121 static int
122 mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
123 {
124 	struct mlx5_indexed_trunk *trunk;
125 	struct mlx5_indexed_trunk **trunk_tmp;
126 	struct mlx5_indexed_trunk **p;
127 	size_t trunk_size = 0;
128 	size_t data_size;
129 	size_t bmp_size;
130 	uint32_t idx, cur_max_idx, i;
131 
132 	cur_max_idx = mlx5_trunk_idx_offset_get(pool, pool->n_trunk_valid);
133 	if (pool->n_trunk_valid == TRUNK_MAX_IDX ||
134 	    cur_max_idx >= pool->cfg.max_idx)
135 		return -ENOMEM;
136 	if (pool->n_trunk_valid == pool->n_trunk) {
137 		/* No free trunk flags, expand trunk list. */
138 		int n_grow = pool->n_trunk_valid ? pool->n_trunk :
139 			     RTE_CACHE_LINE_SIZE / sizeof(void *);
140 
141 		p = pool->cfg.malloc(0, (pool->n_trunk_valid + n_grow) *
142 				     sizeof(struct mlx5_indexed_trunk *),
143 				     RTE_CACHE_LINE_SIZE, rte_socket_id());
144 		if (!p)
145 			return -ENOMEM;
146 		if (pool->trunks)
147 			memcpy(p, pool->trunks, pool->n_trunk_valid *
148 			       sizeof(struct mlx5_indexed_trunk *));
149 		memset(RTE_PTR_ADD(p, pool->n_trunk_valid * sizeof(void *)), 0,
150 		       n_grow * sizeof(void *));
151 		trunk_tmp = pool->trunks;
152 		pool->trunks = p;
153 		if (trunk_tmp)
154 			pool->cfg.free(trunk_tmp);
155 		pool->n_trunk += n_grow;
156 	}
157 	if (!pool->cfg.release_mem_en) {
158 		idx = pool->n_trunk_valid;
159 	} else {
160 		/* Find the first available slot in trunk list */
161 		for (idx = 0; idx < pool->n_trunk; idx++)
162 			if (pool->trunks[idx] == NULL)
163 				break;
164 	}
165 	trunk_size += sizeof(*trunk);
166 	data_size = mlx5_trunk_size_get(pool, idx);
167 	bmp_size = rte_bitmap_get_memory_footprint(data_size);
168 	/* rte_bitmap requires memory cacheline aligned. */
169 	trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
170 	trunk_size += bmp_size;
171 	trunk = pool->cfg.malloc(0, trunk_size,
172 				 RTE_CACHE_LINE_SIZE, rte_socket_id());
173 	if (!trunk)
174 		return -ENOMEM;
175 	pool->trunks[idx] = trunk;
176 	trunk->idx = idx;
177 	trunk->free = data_size;
178 	trunk->prev = TRUNK_INVALID;
179 	trunk->next = TRUNK_INVALID;
180 	MLX5_ASSERT(pool->free_list == TRUNK_INVALID);
181 	pool->free_list = idx;
182 	/* Mark all entries as available. */
183 	trunk->bmp = rte_bitmap_init_with_all_set(data_size, &trunk->data
184 		     [RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size)],
185 		     bmp_size);
186 	/* Clear the overhead bits in the trunk if it happens. */
187 	if (cur_max_idx + data_size > pool->cfg.max_idx) {
188 		for (i = pool->cfg.max_idx - cur_max_idx; i < data_size; i++)
189 			rte_bitmap_clear(trunk->bmp, i);
190 	}
191 	MLX5_ASSERT(trunk->bmp);
192 	pool->n_trunk_valid++;
193 #ifdef POOL_DEBUG
194 	pool->trunk_new++;
195 	pool->trunk_avail++;
196 #endif
197 	return 0;
198 }
199 
200 static inline struct mlx5_indexed_cache *
201 mlx5_ipool_update_global_cache(struct mlx5_indexed_pool *pool, int cidx)
202 {
203 	struct mlx5_indexed_cache *gc, *lc, *olc = NULL;
204 
205 	lc = pool->cache[cidx]->lc;
206 	gc = rte_atomic_load_explicit(&pool->gc, rte_memory_order_relaxed);
207 	if (gc && lc != gc) {
208 		mlx5_ipool_lock(pool);
209 		if (lc && !(--lc->ref_cnt))
210 			olc = lc;
211 		lc = pool->gc;
212 		lc->ref_cnt++;
213 		pool->cache[cidx]->lc = lc;
214 		mlx5_ipool_unlock(pool);
215 		if (olc)
216 			pool->cfg.free(olc);
217 	}
218 	return lc;
219 }
220 
221 static uint32_t
222 mlx5_ipool_allocate_from_global(struct mlx5_indexed_pool *pool, int cidx)
223 {
224 	struct mlx5_indexed_trunk *trunk;
225 	struct mlx5_indexed_cache *p, *lc, *olc = NULL;
226 	size_t trunk_size = 0;
227 	size_t data_size;
228 	uint32_t cur_max_idx, trunk_idx, trunk_n;
229 	uint32_t fetch_size, ts_idx, i;
230 	int n_grow;
231 
232 check_again:
233 	p = NULL;
234 	fetch_size = 0;
235 	/*
236 	 * Fetch new index from global if possible. First round local
237 	 * cache will be NULL.
238 	 */
239 	lc = pool->cache[cidx]->lc;
240 	mlx5_ipool_lock(pool);
241 	/* Try to update local cache first. */
242 	if (likely(pool->gc)) {
243 		if (lc != pool->gc) {
244 			if (lc && !(--lc->ref_cnt))
245 				olc = lc;
246 			lc = pool->gc;
247 			lc->ref_cnt++;
248 			pool->cache[cidx]->lc = lc;
249 		}
250 		if (lc->len) {
251 			/* Use the updated local cache to fetch index. */
252 			fetch_size = pool->cfg.per_core_cache >> 2;
253 			if (lc->len < fetch_size)
254 				fetch_size = lc->len;
255 			lc->len -= fetch_size;
256 			memcpy(pool->cache[cidx]->idx, &lc->idx[lc->len],
257 			       sizeof(uint32_t) * fetch_size);
258 		}
259 	}
260 	mlx5_ipool_unlock(pool);
261 	if (unlikely(olc)) {
262 		pool->cfg.free(olc);
263 		olc = NULL;
264 	}
265 	if (fetch_size) {
266 		pool->cache[cidx]->len = fetch_size - 1;
267 		return pool->cache[cidx]->idx[pool->cache[cidx]->len];
268 	}
269 	trunk_idx = lc ? rte_atomic_load_explicit(&lc->n_trunk_valid,
270 			 rte_memory_order_acquire) : 0;
271 	trunk_n = lc ? lc->n_trunk : 0;
272 	cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx);
273 	/* Check if index reach maximum. */
274 	if (trunk_idx == TRUNK_MAX_IDX ||
275 	    cur_max_idx >= pool->cfg.max_idx)
276 		return 0;
277 	/* No enough space in trunk array, resize the trunks array. */
278 	if (trunk_idx == trunk_n) {
279 		n_grow = trunk_idx ? trunk_idx :
280 			     RTE_CACHE_LINE_SIZE / sizeof(void *);
281 		cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_n + n_grow);
282 		/* Resize the trunk array. */
283 		p = pool->cfg.malloc(0, ((trunk_idx + n_grow) *
284 			sizeof(struct mlx5_indexed_trunk *)) +
285 			(cur_max_idx * sizeof(uint32_t)) + sizeof(*p),
286 			RTE_CACHE_LINE_SIZE, rte_socket_id());
287 		if (!p)
288 			return 0;
289 		p->trunks = (struct mlx5_indexed_trunk **)&p->idx[cur_max_idx];
290 		if (lc)
291 			memcpy(p->trunks, lc->trunks, trunk_idx *
292 		       sizeof(struct mlx5_indexed_trunk *));
293 #ifdef RTE_LIBRTE_MLX5_DEBUG
294 		memset(RTE_PTR_ADD(p->trunks, trunk_idx * sizeof(void *)), 0,
295 			n_grow * sizeof(void *));
296 #endif
297 		p->n_trunk_valid = trunk_idx;
298 		p->n_trunk = trunk_n + n_grow;
299 		p->len = 0;
300 	}
301 	/* Prepare the new trunk. */
302 	trunk_size = sizeof(*trunk);
303 	data_size = mlx5_trunk_size_get(pool, trunk_idx);
304 	trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
305 	trunk = pool->cfg.malloc(0, trunk_size,
306 				 RTE_CACHE_LINE_SIZE, rte_socket_id());
307 	if (unlikely(!trunk)) {
308 		pool->cfg.free(p);
309 		return 0;
310 	}
311 	trunk->idx = trunk_idx;
312 	trunk->free = data_size;
313 	mlx5_ipool_lock(pool);
314 	/*
315 	 * Double check if trunks has been updated or have available index.
316 	 * During the new trunk allocate, index may still be flushed to the
317 	 * global cache. So also need to check the pool->gc->len.
318 	 */
319 	if (pool->gc && (lc != pool->gc ||
320 	    lc->n_trunk_valid != trunk_idx ||
321 	    pool->gc->len)) {
322 		mlx5_ipool_unlock(pool);
323 		if (p)
324 			pool->cfg.free(p);
325 		pool->cfg.free(trunk);
326 		goto check_again;
327 	}
328 	/* Resize the trunk array and update local cache first.  */
329 	if (p) {
330 		if (lc && !(--lc->ref_cnt))
331 			olc = lc;
332 		lc = p;
333 		lc->ref_cnt = 1;
334 		pool->cache[cidx]->lc = lc;
335 		rte_atomic_store_explicit(&pool->gc, p, rte_memory_order_relaxed);
336 	}
337 	/* Add trunk to trunks array. */
338 	lc->trunks[trunk_idx] = trunk;
339 	rte_atomic_fetch_add_explicit(&lc->n_trunk_valid, 1, rte_memory_order_relaxed);
340 	/* Enqueue half of the index to global. */
341 	ts_idx = mlx5_trunk_idx_offset_get(pool, trunk_idx) + 1;
342 	fetch_size = trunk->free >> 1;
343 	if (fetch_size > pool->cfg.per_core_cache)
344 		fetch_size = trunk->free - pool->cfg.per_core_cache;
345 	for (i = 0; i < fetch_size; i++)
346 		lc->idx[i] = ts_idx + i;
347 	lc->len = fetch_size;
348 	mlx5_ipool_unlock(pool);
349 	/* Copy left half - 1 to local cache index array. */
350 	pool->cache[cidx]->len = trunk->free - fetch_size - 1;
351 	ts_idx += fetch_size;
352 	for (i = 0; i < pool->cache[cidx]->len; i++)
353 		pool->cache[cidx]->idx[i] = ts_idx + i;
354 	if (olc)
355 		pool->cfg.free(olc);
356 	return ts_idx + i;
357 }
358 
359 static void *
360 _mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
361 {
362 	struct mlx5_indexed_trunk *trunk;
363 	struct mlx5_indexed_cache *lc;
364 	uint32_t trunk_idx;
365 	uint32_t entry_idx;
366 
367 	MLX5_ASSERT(idx);
368 	if (unlikely(!pool->cache[cidx])) {
369 		pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
370 			sizeof(struct mlx5_ipool_per_lcore) +
371 			(pool->cfg.per_core_cache * sizeof(uint32_t)),
372 			RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
373 		if (!pool->cache[cidx]) {
374 			DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
375 			return NULL;
376 		}
377 	}
378 	lc = mlx5_ipool_update_global_cache(pool, cidx);
379 	idx -= 1;
380 	trunk_idx = mlx5_trunk_idx_get(pool, idx);
381 	trunk = lc->trunks[trunk_idx];
382 	if (!trunk)
383 		return NULL;
384 	entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk_idx);
385 	return &trunk->data[entry_idx * pool->cfg.size];
386 }
387 
388 static void *
389 mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
390 {
391 	void *entry;
392 	int cidx;
393 
394 	cidx = rte_lcore_index(rte_lcore_id());
395 	if (unlikely(cidx == -1)) {
396 		cidx = RTE_MAX_LCORE;
397 		rte_spinlock_lock(&pool->lcore_lock);
398 	}
399 	entry = _mlx5_ipool_get_cache(pool, cidx, idx);
400 	if (unlikely(cidx == RTE_MAX_LCORE))
401 		rte_spinlock_unlock(&pool->lcore_lock);
402 	return entry;
403 }
404 
405 
406 static void *
407 _mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, int cidx,
408 			 uint32_t *idx)
409 {
410 	if (unlikely(!pool->cache[cidx])) {
411 		pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
412 			sizeof(struct mlx5_ipool_per_lcore) +
413 			(pool->cfg.per_core_cache * sizeof(uint32_t)),
414 			RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
415 		if (!pool->cache[cidx]) {
416 			DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
417 			return NULL;
418 		}
419 	} else if (pool->cache[cidx]->len) {
420 		pool->cache[cidx]->len--;
421 		*idx = pool->cache[cidx]->idx[pool->cache[cidx]->len];
422 		return _mlx5_ipool_get_cache(pool, cidx, *idx);
423 	}
424 	/* Not enough idx in global cache. Keep fetching from global. */
425 	*idx = mlx5_ipool_allocate_from_global(pool, cidx);
426 	if (unlikely(!(*idx)))
427 		return NULL;
428 	return _mlx5_ipool_get_cache(pool, cidx, *idx);
429 }
430 
431 static void *
432 mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, uint32_t *idx)
433 {
434 	void *entry;
435 	int cidx;
436 
437 	cidx = rte_lcore_index(rte_lcore_id());
438 	if (unlikely(cidx == -1)) {
439 		cidx = RTE_MAX_LCORE;
440 		rte_spinlock_lock(&pool->lcore_lock);
441 	}
442 	entry = _mlx5_ipool_malloc_cache(pool, cidx, idx);
443 	if (unlikely(cidx == RTE_MAX_LCORE))
444 		rte_spinlock_unlock(&pool->lcore_lock);
445 	return entry;
446 }
447 
448 static void
449 _mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
450 {
451 	struct mlx5_ipool_per_lcore *ilc;
452 	struct mlx5_indexed_cache *gc, *olc = NULL;
453 	uint32_t reclaim_num = 0;
454 
455 	MLX5_ASSERT(idx);
456 	/*
457 	 * When index was allocated on core A but freed on core B. In this
458 	 * case check if local cache on core B was allocated before.
459 	 */
460 	if (unlikely(!pool->cache[cidx])) {
461 		pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
462 			sizeof(struct mlx5_ipool_per_lcore) +
463 			(pool->cfg.per_core_cache * sizeof(uint32_t)),
464 			RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
465 		if (!pool->cache[cidx]) {
466 			DRV_LOG(ERR, "Ipool cache%d allocate failed\n", cidx);
467 			return;
468 		}
469 	}
470 	/* Try to enqueue to local index cache. */
471 	if (pool->cache[cidx]->len < pool->cfg.per_core_cache) {
472 		pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
473 		pool->cache[cidx]->len++;
474 		return;
475 	}
476 	ilc = pool->cache[cidx];
477 	reclaim_num = pool->cfg.per_core_cache >> 2;
478 	ilc->len -= reclaim_num;
479 	/* Local index cache full, try with global index cache. */
480 	mlx5_ipool_lock(pool);
481 	gc = pool->gc;
482 	if (ilc->lc != gc) {
483 		if (ilc->lc && !(--ilc->lc->ref_cnt))
484 			olc = ilc->lc;
485 		gc->ref_cnt++;
486 		ilc->lc = gc;
487 	}
488 	memcpy(&gc->idx[gc->len], &ilc->idx[ilc->len],
489 	       reclaim_num * sizeof(uint32_t));
490 	gc->len += reclaim_num;
491 	mlx5_ipool_unlock(pool);
492 	if (olc)
493 		pool->cfg.free(olc);
494 	pool->cache[cidx]->idx[pool->cache[cidx]->len] = idx;
495 	pool->cache[cidx]->len++;
496 }
497 
498 static void
499 mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, uint32_t idx)
500 {
501 	int cidx;
502 
503 	cidx = rte_lcore_index(rte_lcore_id());
504 	if (unlikely(cidx == -1)) {
505 		cidx = RTE_MAX_LCORE;
506 		rte_spinlock_lock(&pool->lcore_lock);
507 	}
508 	_mlx5_ipool_free_cache(pool, cidx, idx);
509 	if (unlikely(cidx == RTE_MAX_LCORE))
510 		rte_spinlock_unlock(&pool->lcore_lock);
511 }
512 
513 void *
514 mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
515 {
516 	struct mlx5_indexed_trunk *trunk;
517 	uint64_t slab = 0;
518 	uint32_t iidx = 0;
519 	void *p;
520 
521 	if (pool->cfg.per_core_cache)
522 		return mlx5_ipool_malloc_cache(pool, idx);
523 	mlx5_ipool_lock(pool);
524 	if (pool->free_list == TRUNK_INVALID) {
525 		/* If no available trunks, grow new. */
526 		if (mlx5_ipool_grow(pool)) {
527 			mlx5_ipool_unlock(pool);
528 			return NULL;
529 		}
530 	}
531 	MLX5_ASSERT(pool->free_list != TRUNK_INVALID);
532 	trunk = pool->trunks[pool->free_list];
533 	MLX5_ASSERT(trunk->free);
534 	if (!rte_bitmap_scan(trunk->bmp, &iidx, &slab)) {
535 		mlx5_ipool_unlock(pool);
536 		return NULL;
537 	}
538 	MLX5_ASSERT(slab);
539 	iidx += rte_ctz64(slab);
540 	MLX5_ASSERT(iidx != UINT32_MAX);
541 	MLX5_ASSERT(iidx < mlx5_trunk_size_get(pool, trunk->idx));
542 	rte_bitmap_clear(trunk->bmp, iidx);
543 	p = &trunk->data[iidx * pool->cfg.size];
544 	/*
545 	 * The ipool index should grow continually from small to big,
546 	 * some features as metering only accept limited bits of index.
547 	 * Random index with MSB set may be rejected.
548 	 */
549 	iidx += mlx5_trunk_idx_offset_get(pool, trunk->idx);
550 	iidx += 1; /* non-zero index. */
551 	trunk->free--;
552 #ifdef POOL_DEBUG
553 	pool->n_entry++;
554 #endif
555 	if (!trunk->free) {
556 		/* Full trunk will be removed from free list in imalloc. */
557 		MLX5_ASSERT(pool->free_list == trunk->idx);
558 		pool->free_list = trunk->next;
559 		if (trunk->next != TRUNK_INVALID)
560 			pool->trunks[trunk->next]->prev = TRUNK_INVALID;
561 		trunk->prev = TRUNK_INVALID;
562 		trunk->next = TRUNK_INVALID;
563 #ifdef POOL_DEBUG
564 		pool->trunk_empty++;
565 		pool->trunk_avail--;
566 #endif
567 	}
568 	*idx = iidx;
569 	mlx5_ipool_unlock(pool);
570 	return p;
571 }
572 
573 void *
574 mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
575 {
576 	void *entry = mlx5_ipool_malloc(pool, idx);
577 
578 	if (entry && pool->cfg.size)
579 		memset(entry, 0, pool->cfg.size);
580 	return entry;
581 }
582 
583 void
584 mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx)
585 {
586 	struct mlx5_indexed_trunk *trunk;
587 	uint32_t trunk_idx;
588 	uint32_t entry_idx;
589 
590 	if (!idx)
591 		return;
592 	if (pool->cfg.per_core_cache) {
593 		mlx5_ipool_free_cache(pool, idx);
594 		return;
595 	}
596 	idx -= 1;
597 	mlx5_ipool_lock(pool);
598 	trunk_idx = mlx5_trunk_idx_get(pool, idx);
599 	if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
600 	    (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
601 		goto out;
602 	trunk = pool->trunks[trunk_idx];
603 	if (!trunk)
604 		goto out;
605 	entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
606 	if (trunk_idx != trunk->idx ||
607 	    rte_bitmap_get(trunk->bmp, entry_idx))
608 		goto out;
609 	rte_bitmap_set(trunk->bmp, entry_idx);
610 	trunk->free++;
611 	if (pool->cfg.release_mem_en && trunk->free == mlx5_trunk_size_get
612 	   (pool, trunk->idx)) {
613 		if (pool->free_list == trunk->idx)
614 			pool->free_list = trunk->next;
615 		if (trunk->next != TRUNK_INVALID)
616 			pool->trunks[trunk->next]->prev = trunk->prev;
617 		if (trunk->prev != TRUNK_INVALID)
618 			pool->trunks[trunk->prev]->next = trunk->next;
619 		pool->cfg.free(trunk);
620 		pool->trunks[trunk_idx] = NULL;
621 		pool->n_trunk_valid--;
622 #ifdef POOL_DEBUG
623 		pool->trunk_avail--;
624 		pool->trunk_free++;
625 #endif
626 		if (pool->n_trunk_valid == 0) {
627 			pool->cfg.free(pool->trunks);
628 			pool->trunks = NULL;
629 			pool->n_trunk = 0;
630 		}
631 	} else if (trunk->free == 1) {
632 		/* Put into free trunk list head. */
633 		MLX5_ASSERT(pool->free_list != trunk->idx);
634 		trunk->next = pool->free_list;
635 		trunk->prev = TRUNK_INVALID;
636 		if (pool->free_list != TRUNK_INVALID)
637 			pool->trunks[pool->free_list]->prev = trunk->idx;
638 		pool->free_list = trunk->idx;
639 #ifdef POOL_DEBUG
640 		pool->trunk_empty--;
641 		pool->trunk_avail++;
642 #endif
643 	}
644 #ifdef POOL_DEBUG
645 	pool->n_entry--;
646 #endif
647 out:
648 	mlx5_ipool_unlock(pool);
649 }
650 
651 void *
652 mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx)
653 {
654 	struct mlx5_indexed_trunk *trunk;
655 	void *p = NULL;
656 	uint32_t trunk_idx;
657 	uint32_t entry_idx;
658 
659 	if (!idx)
660 		return NULL;
661 	if (pool->cfg.per_core_cache)
662 		return mlx5_ipool_get_cache(pool, idx);
663 	idx -= 1;
664 	mlx5_ipool_lock(pool);
665 	trunk_idx = mlx5_trunk_idx_get(pool, idx);
666 	if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
667 	    (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
668 		goto out;
669 	trunk = pool->trunks[trunk_idx];
670 	if (!trunk)
671 		goto out;
672 	entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
673 	if (trunk_idx != trunk->idx ||
674 	    rte_bitmap_get(trunk->bmp, entry_idx))
675 		goto out;
676 	p = &trunk->data[entry_idx * pool->cfg.size];
677 out:
678 	mlx5_ipool_unlock(pool);
679 	return p;
680 }
681 
682 int
683 mlx5_ipool_destroy(struct mlx5_indexed_pool *pool)
684 {
685 	struct mlx5_indexed_trunk **trunks = NULL;
686 	struct mlx5_indexed_cache *gc = pool->gc;
687 	uint32_t i, n_trunk_valid = 0;
688 
689 	MLX5_ASSERT(pool);
690 	mlx5_ipool_lock(pool);
691 	if (pool->cfg.per_core_cache) {
692 		for (i = 0; i <= RTE_MAX_LCORE; i++) {
693 			/*
694 			 * Free only old global cache. Pool gc will be
695 			 * freed at last.
696 			 */
697 			if (pool->cache[i]) {
698 				if (pool->cache[i]->lc &&
699 				    pool->cache[i]->lc != pool->gc &&
700 				    (!(--pool->cache[i]->lc->ref_cnt)))
701 					pool->cfg.free(pool->cache[i]->lc);
702 				pool->cfg.free(pool->cache[i]);
703 			}
704 		}
705 		if (gc) {
706 			trunks = gc->trunks;
707 			n_trunk_valid = gc->n_trunk_valid;
708 		}
709 	} else {
710 		gc = NULL;
711 		trunks = pool->trunks;
712 		n_trunk_valid = pool->n_trunk_valid;
713 	}
714 	for (i = 0; i < n_trunk_valid; i++) {
715 		if (trunks[i])
716 			pool->cfg.free(trunks[i]);
717 	}
718 	if (!gc && trunks)
719 		pool->cfg.free(trunks);
720 	if (gc)
721 		pool->cfg.free(gc);
722 	mlx5_ipool_unlock(pool);
723 	mlx5_free(pool);
724 	return 0;
725 }
726 
727 void
728 mlx5_ipool_flush_cache(struct mlx5_indexed_pool *pool)
729 {
730 	uint32_t i, j;
731 	struct mlx5_indexed_cache *gc;
732 	struct rte_bitmap *ibmp;
733 	uint32_t bmp_num, mem_size;
734 
735 	if (!pool->cfg.per_core_cache)
736 		return;
737 	gc = pool->gc;
738 	if (!gc)
739 		return;
740 	/* Reset bmp. */
741 	bmp_num = mlx5_trunk_idx_offset_get(pool, gc->n_trunk_valid);
742 	mem_size = rte_bitmap_get_memory_footprint(bmp_num);
743 	pool->bmp_mem = pool->cfg.malloc(MLX5_MEM_ZERO, mem_size,
744 					 RTE_CACHE_LINE_SIZE, rte_socket_id());
745 	if (!pool->bmp_mem) {
746 		DRV_LOG(ERR, "Ipool bitmap mem allocate failed.\n");
747 		return;
748 	}
749 	ibmp = rte_bitmap_init_with_all_set(bmp_num, pool->bmp_mem, mem_size);
750 	if (!ibmp) {
751 		pool->cfg.free(pool->bmp_mem);
752 		pool->bmp_mem = NULL;
753 		DRV_LOG(ERR, "Ipool bitmap create failed.\n");
754 		return;
755 	}
756 	pool->ibmp = ibmp;
757 	/* Clear global cache. */
758 	for (i = 0; i < gc->len; i++)
759 		rte_bitmap_clear(ibmp, gc->idx[i] - 1);
760 	/* Clear core cache. */
761 	for (i = 0; i < RTE_MAX_LCORE + 1; i++) {
762 		struct mlx5_ipool_per_lcore *ilc = pool->cache[i];
763 
764 		if (!ilc)
765 			continue;
766 		for (j = 0; j < ilc->len; j++)
767 			rte_bitmap_clear(ibmp, ilc->idx[j] - 1);
768 	}
769 }
770 
771 static void *
772 mlx5_ipool_get_next_cache(struct mlx5_indexed_pool *pool, uint32_t *pos)
773 {
774 	struct rte_bitmap *ibmp;
775 	uint64_t slab = 0;
776 	uint32_t iidx = *pos;
777 
778 	ibmp = pool->ibmp;
779 	if (!ibmp || !rte_bitmap_scan(ibmp, &iidx, &slab)) {
780 		if (pool->bmp_mem) {
781 			pool->cfg.free(pool->bmp_mem);
782 			pool->bmp_mem = NULL;
783 			pool->ibmp = NULL;
784 		}
785 		return NULL;
786 	}
787 	iidx += rte_ctz64(slab);
788 	rte_bitmap_clear(ibmp, iidx);
789 	iidx++;
790 	*pos = iidx;
791 	return mlx5_ipool_get_cache(pool, iidx);
792 }
793 
794 void *
795 mlx5_ipool_get_next(struct mlx5_indexed_pool *pool, uint32_t *pos)
796 {
797 	uint32_t idx = *pos;
798 	void *entry;
799 
800 	if (pool->cfg.per_core_cache)
801 		return mlx5_ipool_get_next_cache(pool, pos);
802 	while (idx <= mlx5_trunk_idx_offset_get(pool, pool->n_trunk)) {
803 		entry = mlx5_ipool_get(pool, idx);
804 		if (entry) {
805 			*pos = idx;
806 			return entry;
807 		}
808 		idx++;
809 	}
810 	return NULL;
811 }
812 
813 int
814 mlx5_ipool_resize(struct mlx5_indexed_pool *pool, uint32_t num_entries)
815 {
816 	uint32_t cur_max_idx;
817 	uint32_t max_index = mlx5_trunk_idx_offset_get(pool, TRUNK_MAX_IDX + 1);
818 
819 	if (num_entries % pool->cfg.trunk_size) {
820 		DRV_LOG(ERR, "num_entries param should be trunk_size(=%u) multiplication\n",
821 			pool->cfg.trunk_size);
822 		return -EINVAL;
823 	}
824 
825 	mlx5_ipool_lock(pool);
826 	cur_max_idx = pool->cfg.max_idx + num_entries;
827 	/* If the ipool max idx is above maximum or uint overflow occurred. */
828 	if (cur_max_idx > max_index || cur_max_idx < num_entries) {
829 		DRV_LOG(ERR, "Ipool resize failed\n");
830 		DRV_LOG(ERR, "Adding %u entries to existing %u entries, will cross max limit(=%u)\n",
831 		num_entries, cur_max_idx, max_index);
832 		mlx5_ipool_unlock(pool);
833 		return -EINVAL;
834 	}
835 
836 	/* Update maximum entries number. */
837 	pool->cfg.max_idx = cur_max_idx;
838 	mlx5_ipool_unlock(pool);
839 	return 0;
840 }
841 
842 void
843 mlx5_ipool_dump(struct mlx5_indexed_pool *pool)
844 {
845 	printf("Pool %s entry size %u, trunks %u, %d entry per trunk, "
846 	       "total: %d\n",
847 	       pool->cfg.type, pool->cfg.size, pool->n_trunk_valid,
848 	       pool->cfg.trunk_size, pool->n_trunk_valid);
849 #ifdef POOL_DEBUG
850 	printf("Pool %s entry %u, trunk alloc %u, empty: %u, "
851 	       "available %u free %u\n",
852 	       pool->cfg.type, pool->n_entry, pool->trunk_new,
853 	       pool->trunk_empty, pool->trunk_avail, pool->trunk_free);
854 #endif
855 }
856 
857 struct mlx5_l3t_tbl *
858 mlx5_l3t_create(enum mlx5_l3t_type type)
859 {
860 	struct mlx5_l3t_tbl *tbl;
861 	struct mlx5_indexed_pool_config l3t_ip_cfg = {
862 		.trunk_size = 16,
863 		.grow_trunk = 6,
864 		.grow_shift = 1,
865 		.need_lock = 0,
866 		.release_mem_en = 1,
867 		.malloc = mlx5_malloc,
868 		.free = mlx5_free,
869 	};
870 
871 	if (type >= MLX5_L3T_TYPE_MAX) {
872 		rte_errno = EINVAL;
873 		return NULL;
874 	}
875 	tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_l3t_tbl), 1,
876 			  SOCKET_ID_ANY);
877 	if (!tbl) {
878 		rte_errno = ENOMEM;
879 		return NULL;
880 	}
881 	tbl->type = type;
882 	switch (type) {
883 	case MLX5_L3T_TYPE_WORD:
884 		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
885 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
886 		break;
887 	case MLX5_L3T_TYPE_DWORD:
888 		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
889 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
890 		break;
891 	case MLX5_L3T_TYPE_QWORD:
892 		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
893 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
894 		break;
895 	default:
896 		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
897 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
898 		break;
899 	}
900 	rte_spinlock_init(&tbl->sl);
901 	tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
902 	if (!tbl->eip) {
903 		rte_errno = ENOMEM;
904 		mlx5_free(tbl);
905 		tbl = NULL;
906 	}
907 	return tbl;
908 }
909 
910 void
911 mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl)
912 {
913 	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
914 	uint32_t i, j;
915 
916 	if (!tbl)
917 		return;
918 	g_tbl = tbl->tbl;
919 	if (g_tbl) {
920 		for (i = 0; i < MLX5_L3T_GT_SIZE; i++) {
921 			m_tbl = g_tbl->tbl[i];
922 			if (!m_tbl)
923 				continue;
924 			for (j = 0; j < MLX5_L3T_MT_SIZE; j++) {
925 				if (!m_tbl->tbl[j])
926 					continue;
927 				MLX5_ASSERT(!((struct mlx5_l3t_entry_word *)
928 					    m_tbl->tbl[j])->ref_cnt);
929 				mlx5_ipool_free(tbl->eip,
930 						((struct mlx5_l3t_entry_word *)
931 						m_tbl->tbl[j])->idx);
932 				m_tbl->tbl[j] = 0;
933 				if (!(--m_tbl->ref_cnt))
934 					break;
935 			}
936 			MLX5_ASSERT(!m_tbl->ref_cnt);
937 			mlx5_free(g_tbl->tbl[i]);
938 			g_tbl->tbl[i] = 0;
939 			if (!(--g_tbl->ref_cnt))
940 				break;
941 		}
942 		MLX5_ASSERT(!g_tbl->ref_cnt);
943 		mlx5_free(tbl->tbl);
944 		tbl->tbl = 0;
945 	}
946 	mlx5_ipool_destroy(tbl->eip);
947 	mlx5_free(tbl);
948 }
949 
950 static int32_t
951 __l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
952 		union mlx5_l3t_data *data)
953 {
954 	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
955 	struct mlx5_l3t_entry_word *w_e_tbl;
956 	struct mlx5_l3t_entry_dword *dw_e_tbl;
957 	struct mlx5_l3t_entry_qword *qw_e_tbl;
958 	struct mlx5_l3t_entry_ptr *ptr_e_tbl;
959 	void *e_tbl;
960 	uint32_t entry_idx;
961 
962 	g_tbl = tbl->tbl;
963 	if (!g_tbl)
964 		return -1;
965 	m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
966 	if (!m_tbl)
967 		return -1;
968 	e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
969 	if (!e_tbl)
970 		return -1;
971 	entry_idx = idx & MLX5_L3T_ET_MASK;
972 	switch (tbl->type) {
973 	case MLX5_L3T_TYPE_WORD:
974 		w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
975 		data->word = w_e_tbl->entry[entry_idx].data;
976 		if (w_e_tbl->entry[entry_idx].data)
977 			w_e_tbl->entry[entry_idx].ref_cnt++;
978 		break;
979 	case MLX5_L3T_TYPE_DWORD:
980 		dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
981 		data->dword = dw_e_tbl->entry[entry_idx].data;
982 		if (dw_e_tbl->entry[entry_idx].data)
983 			dw_e_tbl->entry[entry_idx].ref_cnt++;
984 		break;
985 	case MLX5_L3T_TYPE_QWORD:
986 		qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
987 		data->qword = qw_e_tbl->entry[entry_idx].data;
988 		if (qw_e_tbl->entry[entry_idx].data)
989 			qw_e_tbl->entry[entry_idx].ref_cnt++;
990 		break;
991 	default:
992 		ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
993 		data->ptr = ptr_e_tbl->entry[entry_idx].data;
994 		if (ptr_e_tbl->entry[entry_idx].data)
995 			ptr_e_tbl->entry[entry_idx].ref_cnt++;
996 		break;
997 	}
998 	return 0;
999 }
1000 
1001 int32_t
1002 mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1003 		   union mlx5_l3t_data *data)
1004 {
1005 	int ret;
1006 
1007 	rte_spinlock_lock(&tbl->sl);
1008 	ret = __l3t_get_entry(tbl, idx, data);
1009 	rte_spinlock_unlock(&tbl->sl);
1010 	return ret;
1011 }
1012 
1013 int32_t
1014 mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
1015 {
1016 	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1017 	struct mlx5_l3t_entry_word *w_e_tbl;
1018 	struct mlx5_l3t_entry_dword *dw_e_tbl;
1019 	struct mlx5_l3t_entry_qword *qw_e_tbl;
1020 	struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1021 	void *e_tbl;
1022 	uint32_t entry_idx;
1023 	uint64_t ref_cnt;
1024 	int32_t ret = -1;
1025 
1026 	rte_spinlock_lock(&tbl->sl);
1027 	g_tbl = tbl->tbl;
1028 	if (!g_tbl)
1029 		goto out;
1030 	m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1031 	if (!m_tbl)
1032 		goto out;
1033 	e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1034 	if (!e_tbl)
1035 		goto out;
1036 	entry_idx = idx & MLX5_L3T_ET_MASK;
1037 	switch (tbl->type) {
1038 	case MLX5_L3T_TYPE_WORD:
1039 		w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1040 		MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
1041 		ret = --w_e_tbl->entry[entry_idx].ref_cnt;
1042 		if (ret)
1043 			goto out;
1044 		w_e_tbl->entry[entry_idx].data = 0;
1045 		ref_cnt = --w_e_tbl->ref_cnt;
1046 		break;
1047 	case MLX5_L3T_TYPE_DWORD:
1048 		dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1049 		MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
1050 		ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
1051 		if (ret)
1052 			goto out;
1053 		dw_e_tbl->entry[entry_idx].data = 0;
1054 		ref_cnt = --dw_e_tbl->ref_cnt;
1055 		break;
1056 	case MLX5_L3T_TYPE_QWORD:
1057 		qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1058 		MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
1059 		ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
1060 		if (ret)
1061 			goto out;
1062 		qw_e_tbl->entry[entry_idx].data = 0;
1063 		ref_cnt = --qw_e_tbl->ref_cnt;
1064 		break;
1065 	default:
1066 		ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1067 		MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
1068 		ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
1069 		if (ret)
1070 			goto out;
1071 		ptr_e_tbl->entry[entry_idx].data = NULL;
1072 		ref_cnt = --ptr_e_tbl->ref_cnt;
1073 		break;
1074 	}
1075 	if (!ref_cnt) {
1076 		mlx5_ipool_free(tbl->eip,
1077 				((struct mlx5_l3t_entry_word *)e_tbl)->idx);
1078 		m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1079 									NULL;
1080 		if (!(--m_tbl->ref_cnt)) {
1081 			mlx5_free(m_tbl);
1082 			g_tbl->tbl
1083 			[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] = NULL;
1084 			if (!(--g_tbl->ref_cnt)) {
1085 				mlx5_free(g_tbl);
1086 				tbl->tbl = 0;
1087 			}
1088 		}
1089 	}
1090 out:
1091 	rte_spinlock_unlock(&tbl->sl);
1092 	return ret;
1093 }
1094 
1095 static int32_t
1096 __l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1097 		union mlx5_l3t_data *data)
1098 {
1099 	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
1100 	struct mlx5_l3t_entry_word *w_e_tbl;
1101 	struct mlx5_l3t_entry_dword *dw_e_tbl;
1102 	struct mlx5_l3t_entry_qword *qw_e_tbl;
1103 	struct mlx5_l3t_entry_ptr *ptr_e_tbl;
1104 	void *e_tbl;
1105 	uint32_t entry_idx, tbl_idx = 0;
1106 
1107 	/* Check the global table, create it if empty. */
1108 	g_tbl = tbl->tbl;
1109 	if (!g_tbl) {
1110 		g_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1111 				    sizeof(struct mlx5_l3t_level_tbl) +
1112 				    sizeof(void *) * MLX5_L3T_GT_SIZE, 1,
1113 				    SOCKET_ID_ANY);
1114 		if (!g_tbl) {
1115 			rte_errno = ENOMEM;
1116 			return -1;
1117 		}
1118 		tbl->tbl = g_tbl;
1119 	}
1120 	/*
1121 	 * Check the middle table, create it if empty. Ref_cnt will be
1122 	 * increased if new sub table created.
1123 	 */
1124 	m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
1125 	if (!m_tbl) {
1126 		m_tbl = mlx5_malloc(MLX5_MEM_ZERO,
1127 				    sizeof(struct mlx5_l3t_level_tbl) +
1128 				    sizeof(void *) * MLX5_L3T_MT_SIZE, 1,
1129 				    SOCKET_ID_ANY);
1130 		if (!m_tbl) {
1131 			rte_errno = ENOMEM;
1132 			return -1;
1133 		}
1134 		g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] =
1135 									m_tbl;
1136 		g_tbl->ref_cnt++;
1137 	}
1138 	/*
1139 	 * Check the entry table, create it if empty. Ref_cnt will be
1140 	 * increased if new sub entry table created.
1141 	 */
1142 	e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
1143 	if (!e_tbl) {
1144 		e_tbl = mlx5_ipool_zmalloc(tbl->eip, &tbl_idx);
1145 		if (!e_tbl) {
1146 			rte_errno = ENOMEM;
1147 			return -1;
1148 		}
1149 		((struct mlx5_l3t_entry_word *)e_tbl)->idx = tbl_idx;
1150 		m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
1151 									e_tbl;
1152 		m_tbl->ref_cnt++;
1153 	}
1154 	entry_idx = idx & MLX5_L3T_ET_MASK;
1155 	switch (tbl->type) {
1156 	case MLX5_L3T_TYPE_WORD:
1157 		w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
1158 		if (w_e_tbl->entry[entry_idx].data) {
1159 			data->word = w_e_tbl->entry[entry_idx].data;
1160 			w_e_tbl->entry[entry_idx].ref_cnt++;
1161 			rte_errno = EEXIST;
1162 			return -1;
1163 		}
1164 		w_e_tbl->entry[entry_idx].data = data->word;
1165 		w_e_tbl->entry[entry_idx].ref_cnt = 1;
1166 		w_e_tbl->ref_cnt++;
1167 		break;
1168 	case MLX5_L3T_TYPE_DWORD:
1169 		dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
1170 		if (dw_e_tbl->entry[entry_idx].data) {
1171 			data->dword = dw_e_tbl->entry[entry_idx].data;
1172 			dw_e_tbl->entry[entry_idx].ref_cnt++;
1173 			rte_errno = EEXIST;
1174 			return -1;
1175 		}
1176 		dw_e_tbl->entry[entry_idx].data = data->dword;
1177 		dw_e_tbl->entry[entry_idx].ref_cnt = 1;
1178 		dw_e_tbl->ref_cnt++;
1179 		break;
1180 	case MLX5_L3T_TYPE_QWORD:
1181 		qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
1182 		if (qw_e_tbl->entry[entry_idx].data) {
1183 			data->qword = qw_e_tbl->entry[entry_idx].data;
1184 			qw_e_tbl->entry[entry_idx].ref_cnt++;
1185 			rte_errno = EEXIST;
1186 			return -1;
1187 		}
1188 		qw_e_tbl->entry[entry_idx].data = data->qword;
1189 		qw_e_tbl->entry[entry_idx].ref_cnt = 1;
1190 		qw_e_tbl->ref_cnt++;
1191 		break;
1192 	default:
1193 		ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
1194 		if (ptr_e_tbl->entry[entry_idx].data) {
1195 			data->ptr = ptr_e_tbl->entry[entry_idx].data;
1196 			ptr_e_tbl->entry[entry_idx].ref_cnt++;
1197 			rte_errno = EEXIST;
1198 			return -1;
1199 		}
1200 		ptr_e_tbl->entry[entry_idx].data = data->ptr;
1201 		ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
1202 		ptr_e_tbl->ref_cnt++;
1203 		break;
1204 	}
1205 	return 0;
1206 }
1207 
1208 int32_t
1209 mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
1210 		   union mlx5_l3t_data *data)
1211 {
1212 	int ret;
1213 
1214 	rte_spinlock_lock(&tbl->sl);
1215 	ret = __l3t_set_entry(tbl, idx, data);
1216 	rte_spinlock_unlock(&tbl->sl);
1217 	return ret;
1218 }
1219