xref: /dpdk/drivers/net/mlx5/mlx5_utils.c (revision eeded2044af5bbe88220120b14933536cbb3edb6)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 
5 #include <rte_malloc.h>
6 
7 #include <mlx5_malloc.h>
8 
9 #include "mlx5_utils.h"
10 
11 
12 /********************* Cache list ************************/
13 
14 static struct mlx5_cache_entry *
15 mlx5_clist_default_create_cb(struct mlx5_cache_list *list,
16 			     struct mlx5_cache_entry *entry __rte_unused,
17 			     void *ctx __rte_unused)
18 {
19 	return mlx5_malloc(MLX5_MEM_ZERO, list->entry_sz, 0, SOCKET_ID_ANY);
20 }
21 
22 static void
23 mlx5_clist_default_remove_cb(struct mlx5_cache_list *list __rte_unused,
24 			     struct mlx5_cache_entry *entry)
25 {
26 	mlx5_free(entry);
27 }
28 
29 int
30 mlx5_cache_list_init(struct mlx5_cache_list *list, const char *name,
31 		     uint32_t entry_size, void *ctx,
32 		     mlx5_cache_create_cb cb_create,
33 		     mlx5_cache_match_cb cb_match,
34 		     mlx5_cache_remove_cb cb_remove)
35 {
36 	MLX5_ASSERT(list);
37 	if (!cb_match || (!cb_create ^ !cb_remove))
38 		return -1;
39 	if (name)
40 		snprintf(list->name, sizeof(list->name), "%s", name);
41 	list->entry_sz = entry_size;
42 	list->ctx = ctx;
43 	list->cb_create = cb_create ? cb_create : mlx5_clist_default_create_cb;
44 	list->cb_match = cb_match;
45 	list->cb_remove = cb_remove ? cb_remove : mlx5_clist_default_remove_cb;
46 	rte_rwlock_init(&list->lock);
47 	DRV_LOG(DEBUG, "Cache list %s initialized.", list->name);
48 	LIST_INIT(&list->head);
49 	return 0;
50 }
51 
52 static struct mlx5_cache_entry *
53 __cache_lookup(struct mlx5_cache_list *list, void *ctx, bool reuse)
54 {
55 	struct mlx5_cache_entry *entry;
56 
57 	LIST_FOREACH(entry, &list->head, next) {
58 		if (list->cb_match(list, entry, ctx))
59 			continue;
60 		if (reuse) {
61 			__atomic_add_fetch(&entry->ref_cnt, 1,
62 					   __ATOMIC_RELAXED);
63 			DRV_LOG(DEBUG, "Cache list %s entry %p ref++: %u.",
64 				list->name, (void *)entry, entry->ref_cnt);
65 		}
66 		break;
67 	}
68 	return entry;
69 }
70 
71 static struct mlx5_cache_entry *
72 cache_lookup(struct mlx5_cache_list *list, void *ctx, bool reuse)
73 {
74 	struct mlx5_cache_entry *entry;
75 
76 	rte_rwlock_read_lock(&list->lock);
77 	entry = __cache_lookup(list, ctx, reuse);
78 	rte_rwlock_read_unlock(&list->lock);
79 	return entry;
80 }
81 
82 struct mlx5_cache_entry *
83 mlx5_cache_lookup(struct mlx5_cache_list *list, void *ctx)
84 {
85 	return cache_lookup(list, ctx, false);
86 }
87 
88 struct mlx5_cache_entry *
89 mlx5_cache_register(struct mlx5_cache_list *list, void *ctx)
90 {
91 	struct mlx5_cache_entry *entry;
92 	uint32_t prev_gen_cnt = 0;
93 
94 	MLX5_ASSERT(list);
95 	prev_gen_cnt = __atomic_load_n(&list->gen_cnt, __ATOMIC_ACQUIRE);
96 	/* Lookup with read lock, reuse if found. */
97 	entry = cache_lookup(list, ctx, true);
98 	if (entry)
99 		return entry;
100 	/* Not found, append with write lock - block read from other threads. */
101 	rte_rwlock_write_lock(&list->lock);
102 	/* If list changed by other threads before lock, search again. */
103 	if (prev_gen_cnt != __atomic_load_n(&list->gen_cnt, __ATOMIC_ACQUIRE)) {
104 		/* Lookup and reuse w/o read lock. */
105 		entry = __cache_lookup(list, ctx, true);
106 		if (entry)
107 			goto done;
108 	}
109 	entry = list->cb_create(list, entry, ctx);
110 	if (!entry) {
111 		DRV_LOG(ERR, "Failed to init cache list %s entry %p.",
112 			list->name, (void *)entry);
113 		goto done;
114 	}
115 	entry->ref_cnt = 1;
116 	LIST_INSERT_HEAD(&list->head, entry, next);
117 	__atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_RELEASE);
118 	__atomic_add_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
119 	DRV_LOG(DEBUG, "Cache list %s entry %p new: %u.",
120 		list->name, (void *)entry, entry->ref_cnt);
121 done:
122 	rte_rwlock_write_unlock(&list->lock);
123 	return entry;
124 }
125 
126 int
127 mlx5_cache_unregister(struct mlx5_cache_list *list,
128 		      struct mlx5_cache_entry *entry)
129 {
130 	rte_rwlock_write_lock(&list->lock);
131 	MLX5_ASSERT(entry && entry->next.le_prev);
132 	DRV_LOG(DEBUG, "Cache list %s entry %p ref--: %u.",
133 		list->name, (void *)entry, entry->ref_cnt);
134 	if (--entry->ref_cnt) {
135 		rte_rwlock_write_unlock(&list->lock);
136 		return 1;
137 	}
138 	__atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_ACQUIRE);
139 	__atomic_sub_fetch(&list->count, 1, __ATOMIC_ACQUIRE);
140 	LIST_REMOVE(entry, next);
141 	list->cb_remove(list, entry);
142 	rte_rwlock_write_unlock(&list->lock);
143 	DRV_LOG(DEBUG, "Cache list %s entry %p removed.",
144 		list->name, (void *)entry);
145 	return 0;
146 }
147 
148 void
149 mlx5_cache_list_destroy(struct mlx5_cache_list *list)
150 {
151 	struct mlx5_cache_entry *entry;
152 
153 	MLX5_ASSERT(list);
154 	/* no LIST_FOREACH_SAFE, using while instead */
155 	while (!LIST_EMPTY(&list->head)) {
156 		entry = LIST_FIRST(&list->head);
157 		LIST_REMOVE(entry, next);
158 		list->cb_remove(list, entry);
159 		DRV_LOG(DEBUG, "Cache list %s entry %p destroyed.",
160 			list->name, (void *)entry);
161 	}
162 	memset(list, 0, sizeof(*list));
163 }
164 
165 uint32_t
166 mlx5_cache_list_get_entry_num(struct mlx5_cache_list *list)
167 {
168 	MLX5_ASSERT(list);
169 	return __atomic_load_n(&list->count, __ATOMIC_RELAXED);
170 }
171 
172 /********************* Indexed pool **********************/
173 
174 static inline void
175 mlx5_ipool_lock(struct mlx5_indexed_pool *pool)
176 {
177 	if (pool->cfg.need_lock)
178 		rte_spinlock_lock(&pool->lock);
179 }
180 
181 static inline void
182 mlx5_ipool_unlock(struct mlx5_indexed_pool *pool)
183 {
184 	if (pool->cfg.need_lock)
185 		rte_spinlock_unlock(&pool->lock);
186 }
187 
188 static inline uint32_t
189 mlx5_trunk_idx_get(struct mlx5_indexed_pool *pool, uint32_t entry_idx)
190 {
191 	struct mlx5_indexed_pool_config *cfg = &pool->cfg;
192 	uint32_t trunk_idx = 0;
193 	uint32_t i;
194 
195 	if (!cfg->grow_trunk)
196 		return entry_idx / cfg->trunk_size;
197 	if (entry_idx >= pool->grow_tbl[cfg->grow_trunk - 1]) {
198 		trunk_idx = (entry_idx - pool->grow_tbl[cfg->grow_trunk - 1]) /
199 			    (cfg->trunk_size << (cfg->grow_shift *
200 			    cfg->grow_trunk)) + cfg->grow_trunk;
201 	} else {
202 		for (i = 0; i < cfg->grow_trunk; i++) {
203 			if (entry_idx < pool->grow_tbl[i])
204 				break;
205 		}
206 		trunk_idx = i;
207 	}
208 	return trunk_idx;
209 }
210 
211 static inline uint32_t
212 mlx5_trunk_size_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
213 {
214 	struct mlx5_indexed_pool_config *cfg = &pool->cfg;
215 
216 	return cfg->trunk_size << (cfg->grow_shift *
217 	       (trunk_idx > cfg->grow_trunk ? cfg->grow_trunk : trunk_idx));
218 }
219 
220 static inline uint32_t
221 mlx5_trunk_idx_offset_get(struct mlx5_indexed_pool *pool, uint32_t trunk_idx)
222 {
223 	struct mlx5_indexed_pool_config *cfg = &pool->cfg;
224 	uint32_t offset = 0;
225 
226 	if (!trunk_idx)
227 		return 0;
228 	if (!cfg->grow_trunk)
229 		return cfg->trunk_size * trunk_idx;
230 	if (trunk_idx < cfg->grow_trunk)
231 		offset = pool->grow_tbl[trunk_idx - 1];
232 	else
233 		offset = pool->grow_tbl[cfg->grow_trunk - 1] +
234 			 (cfg->trunk_size << (cfg->grow_shift *
235 			 cfg->grow_trunk)) * (trunk_idx - cfg->grow_trunk);
236 	return offset;
237 }
238 
239 struct mlx5_indexed_pool *
240 mlx5_ipool_create(struct mlx5_indexed_pool_config *cfg)
241 {
242 	struct mlx5_indexed_pool *pool;
243 	uint32_t i;
244 
245 	if (!cfg || (!cfg->malloc ^ !cfg->free) ||
246 	    (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) ||
247 	    ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32))))
248 		return NULL;
249 	pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool) + cfg->grow_trunk *
250 			   sizeof(pool->grow_tbl[0]), RTE_CACHE_LINE_SIZE,
251 			   SOCKET_ID_ANY);
252 	if (!pool)
253 		return NULL;
254 	pool->cfg = *cfg;
255 	if (!pool->cfg.trunk_size)
256 		pool->cfg.trunk_size = MLX5_IPOOL_DEFAULT_TRUNK_SIZE;
257 	if (!cfg->malloc && !cfg->free) {
258 		pool->cfg.malloc = mlx5_malloc;
259 		pool->cfg.free = mlx5_free;
260 	}
261 	pool->free_list = TRUNK_INVALID;
262 	if (pool->cfg.need_lock)
263 		rte_spinlock_init(&pool->lock);
264 	/*
265 	 * Initialize the dynamic grow trunk size lookup table to have a quick
266 	 * lookup for the trunk entry index offset.
267 	 */
268 	for (i = 0; i < cfg->grow_trunk; i++) {
269 		pool->grow_tbl[i] = cfg->trunk_size << (cfg->grow_shift * i);
270 		if (i > 0)
271 			pool->grow_tbl[i] += pool->grow_tbl[i - 1];
272 	}
273 	return pool;
274 }
275 
276 static int
277 mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
278 {
279 	struct mlx5_indexed_trunk *trunk;
280 	struct mlx5_indexed_trunk **trunk_tmp;
281 	struct mlx5_indexed_trunk **p;
282 	size_t trunk_size = 0;
283 	size_t data_size;
284 	size_t bmp_size;
285 	uint32_t idx;
286 
287 	if (pool->n_trunk_valid == TRUNK_MAX_IDX)
288 		return -ENOMEM;
289 	if (pool->n_trunk_valid == pool->n_trunk) {
290 		/* No free trunk flags, expand trunk list. */
291 		int n_grow = pool->n_trunk_valid ? pool->n_trunk :
292 			     RTE_CACHE_LINE_SIZE / sizeof(void *);
293 
294 		p = pool->cfg.malloc(0, (pool->n_trunk_valid + n_grow) *
295 				     sizeof(struct mlx5_indexed_trunk *),
296 				     RTE_CACHE_LINE_SIZE, rte_socket_id());
297 		if (!p)
298 			return -ENOMEM;
299 		if (pool->trunks)
300 			memcpy(p, pool->trunks, pool->n_trunk_valid *
301 			       sizeof(struct mlx5_indexed_trunk *));
302 		memset(RTE_PTR_ADD(p, pool->n_trunk_valid * sizeof(void *)), 0,
303 		       n_grow * sizeof(void *));
304 		trunk_tmp = pool->trunks;
305 		pool->trunks = p;
306 		if (trunk_tmp)
307 			pool->cfg.free(trunk_tmp);
308 		pool->n_trunk += n_grow;
309 	}
310 	if (!pool->cfg.release_mem_en) {
311 		idx = pool->n_trunk_valid;
312 	} else {
313 		/* Find the first available slot in trunk list */
314 		for (idx = 0; idx < pool->n_trunk; idx++)
315 			if (pool->trunks[idx] == NULL)
316 				break;
317 	}
318 	trunk_size += sizeof(*trunk);
319 	data_size = mlx5_trunk_size_get(pool, idx);
320 	bmp_size = rte_bitmap_get_memory_footprint(data_size);
321 	/* rte_bitmap requires memory cacheline aligned. */
322 	trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
323 	trunk_size += bmp_size;
324 	trunk = pool->cfg.malloc(0, trunk_size,
325 				 RTE_CACHE_LINE_SIZE, rte_socket_id());
326 	if (!trunk)
327 		return -ENOMEM;
328 	pool->trunks[idx] = trunk;
329 	trunk->idx = idx;
330 	trunk->free = data_size;
331 	trunk->prev = TRUNK_INVALID;
332 	trunk->next = TRUNK_INVALID;
333 	MLX5_ASSERT(pool->free_list == TRUNK_INVALID);
334 	pool->free_list = idx;
335 	/* Mark all entries as available. */
336 	trunk->bmp = rte_bitmap_init_with_all_set(data_size, &trunk->data
337 		     [RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size)],
338 		     bmp_size);
339 	MLX5_ASSERT(trunk->bmp);
340 	pool->n_trunk_valid++;
341 #ifdef POOL_DEBUG
342 	pool->trunk_new++;
343 	pool->trunk_avail++;
344 #endif
345 	return 0;
346 }
347 
348 void *
349 mlx5_ipool_malloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
350 {
351 	struct mlx5_indexed_trunk *trunk;
352 	uint64_t slab = 0;
353 	uint32_t iidx = 0;
354 	void *p;
355 
356 	mlx5_ipool_lock(pool);
357 	if (pool->free_list == TRUNK_INVALID) {
358 		/* If no available trunks, grow new. */
359 		if (mlx5_ipool_grow(pool)) {
360 			mlx5_ipool_unlock(pool);
361 			return NULL;
362 		}
363 	}
364 	MLX5_ASSERT(pool->free_list != TRUNK_INVALID);
365 	trunk = pool->trunks[pool->free_list];
366 	MLX5_ASSERT(trunk->free);
367 	if (!rte_bitmap_scan(trunk->bmp, &iidx, &slab)) {
368 		mlx5_ipool_unlock(pool);
369 		return NULL;
370 	}
371 	MLX5_ASSERT(slab);
372 	iidx += __builtin_ctzll(slab);
373 	MLX5_ASSERT(iidx != UINT32_MAX);
374 	MLX5_ASSERT(iidx < mlx5_trunk_size_get(pool, trunk->idx));
375 	rte_bitmap_clear(trunk->bmp, iidx);
376 	p = &trunk->data[iidx * pool->cfg.size];
377 	/*
378 	 * The ipool index should grow continually from small to big,
379 	 * some features as metering only accept limited bits of index.
380 	 * Random index with MSB set may be rejected.
381 	 */
382 	iidx += mlx5_trunk_idx_offset_get(pool, trunk->idx);
383 	iidx += 1; /* non-zero index. */
384 	trunk->free--;
385 #ifdef POOL_DEBUG
386 	pool->n_entry++;
387 #endif
388 	if (!trunk->free) {
389 		/* Full trunk will be removed from free list in imalloc. */
390 		MLX5_ASSERT(pool->free_list == trunk->idx);
391 		pool->free_list = trunk->next;
392 		if (trunk->next != TRUNK_INVALID)
393 			pool->trunks[trunk->next]->prev = TRUNK_INVALID;
394 		trunk->prev = TRUNK_INVALID;
395 		trunk->next = TRUNK_INVALID;
396 #ifdef POOL_DEBUG
397 		pool->trunk_empty++;
398 		pool->trunk_avail--;
399 #endif
400 	}
401 	*idx = iidx;
402 	mlx5_ipool_unlock(pool);
403 	return p;
404 }
405 
406 void *
407 mlx5_ipool_zmalloc(struct mlx5_indexed_pool *pool, uint32_t *idx)
408 {
409 	void *entry = mlx5_ipool_malloc(pool, idx);
410 
411 	if (entry && pool->cfg.size)
412 		memset(entry, 0, pool->cfg.size);
413 	return entry;
414 }
415 
416 void
417 mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx)
418 {
419 	struct mlx5_indexed_trunk *trunk;
420 	uint32_t trunk_idx;
421 	uint32_t entry_idx;
422 
423 	if (!idx)
424 		return;
425 	idx -= 1;
426 	mlx5_ipool_lock(pool);
427 	trunk_idx = mlx5_trunk_idx_get(pool, idx);
428 	if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
429 	    (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
430 		goto out;
431 	trunk = pool->trunks[trunk_idx];
432 	if (!trunk)
433 		goto out;
434 	entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
435 	if (trunk_idx != trunk->idx ||
436 	    rte_bitmap_get(trunk->bmp, entry_idx))
437 		goto out;
438 	rte_bitmap_set(trunk->bmp, entry_idx);
439 	trunk->free++;
440 	if (pool->cfg.release_mem_en && trunk->free == mlx5_trunk_size_get
441 	   (pool, trunk->idx)) {
442 		if (pool->free_list == trunk->idx)
443 			pool->free_list = trunk->next;
444 		if (trunk->next != TRUNK_INVALID)
445 			pool->trunks[trunk->next]->prev = trunk->prev;
446 		if (trunk->prev != TRUNK_INVALID)
447 			pool->trunks[trunk->prev]->next = trunk->next;
448 		pool->cfg.free(trunk);
449 		pool->trunks[trunk_idx] = NULL;
450 		pool->n_trunk_valid--;
451 #ifdef POOL_DEBUG
452 		pool->trunk_avail--;
453 		pool->trunk_free++;
454 #endif
455 		if (pool->n_trunk_valid == 0) {
456 			pool->cfg.free(pool->trunks);
457 			pool->trunks = NULL;
458 			pool->n_trunk = 0;
459 		}
460 	} else if (trunk->free == 1) {
461 		/* Put into free trunk list head. */
462 		MLX5_ASSERT(pool->free_list != trunk->idx);
463 		trunk->next = pool->free_list;
464 		trunk->prev = TRUNK_INVALID;
465 		if (pool->free_list != TRUNK_INVALID)
466 			pool->trunks[pool->free_list]->prev = trunk->idx;
467 		pool->free_list = trunk->idx;
468 #ifdef POOL_DEBUG
469 		pool->trunk_empty--;
470 		pool->trunk_avail++;
471 #endif
472 	}
473 #ifdef POOL_DEBUG
474 	pool->n_entry--;
475 #endif
476 out:
477 	mlx5_ipool_unlock(pool);
478 }
479 
480 void *
481 mlx5_ipool_get(struct mlx5_indexed_pool *pool, uint32_t idx)
482 {
483 	struct mlx5_indexed_trunk *trunk;
484 	void *p = NULL;
485 	uint32_t trunk_idx;
486 	uint32_t entry_idx;
487 
488 	if (!idx)
489 		return NULL;
490 	idx -= 1;
491 	mlx5_ipool_lock(pool);
492 	trunk_idx = mlx5_trunk_idx_get(pool, idx);
493 	if ((!pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk_valid) ||
494 	    (pool->cfg.release_mem_en && trunk_idx >= pool->n_trunk))
495 		goto out;
496 	trunk = pool->trunks[trunk_idx];
497 	if (!trunk)
498 		goto out;
499 	entry_idx = idx - mlx5_trunk_idx_offset_get(pool, trunk->idx);
500 	if (trunk_idx != trunk->idx ||
501 	    rte_bitmap_get(trunk->bmp, entry_idx))
502 		goto out;
503 	p = &trunk->data[entry_idx * pool->cfg.size];
504 out:
505 	mlx5_ipool_unlock(pool);
506 	return p;
507 }
508 
509 int
510 mlx5_ipool_destroy(struct mlx5_indexed_pool *pool)
511 {
512 	struct mlx5_indexed_trunk **trunks;
513 	uint32_t i;
514 
515 	MLX5_ASSERT(pool);
516 	mlx5_ipool_lock(pool);
517 	trunks = pool->trunks;
518 	for (i = 0; i < pool->n_trunk; i++) {
519 		if (trunks[i])
520 			pool->cfg.free(trunks[i]);
521 	}
522 	if (!pool->trunks)
523 		pool->cfg.free(pool->trunks);
524 	mlx5_ipool_unlock(pool);
525 	mlx5_free(pool);
526 	return 0;
527 }
528 
529 void
530 mlx5_ipool_dump(struct mlx5_indexed_pool *pool)
531 {
532 	printf("Pool %s entry size %u, trunks %u, %d entry per trunk, "
533 	       "total: %d\n",
534 	       pool->cfg.type, pool->cfg.size, pool->n_trunk_valid,
535 	       pool->cfg.trunk_size, pool->n_trunk_valid);
536 #ifdef POOL_DEBUG
537 	printf("Pool %s entry %u, trunk alloc %u, empty: %u, "
538 	       "available %u free %u\n",
539 	       pool->cfg.type, pool->n_entry, pool->trunk_new,
540 	       pool->trunk_empty, pool->trunk_avail, pool->trunk_free);
541 #endif
542 }
543 
544 struct mlx5_l3t_tbl *
545 mlx5_l3t_create(enum mlx5_l3t_type type)
546 {
547 	struct mlx5_l3t_tbl *tbl;
548 	struct mlx5_indexed_pool_config l3t_ip_cfg = {
549 		.trunk_size = 16,
550 		.grow_trunk = 6,
551 		.grow_shift = 1,
552 		.need_lock = 0,
553 		.release_mem_en = 1,
554 		.malloc = mlx5_malloc,
555 		.free = mlx5_free,
556 	};
557 
558 	if (type >= MLX5_L3T_TYPE_MAX) {
559 		rte_errno = EINVAL;
560 		return NULL;
561 	}
562 	tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_l3t_tbl), 1,
563 			  SOCKET_ID_ANY);
564 	if (!tbl) {
565 		rte_errno = ENOMEM;
566 		return NULL;
567 	}
568 	tbl->type = type;
569 	switch (type) {
570 	case MLX5_L3T_TYPE_WORD:
571 		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
572 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
573 		break;
574 	case MLX5_L3T_TYPE_DWORD:
575 		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
576 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
577 		break;
578 	case MLX5_L3T_TYPE_QWORD:
579 		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
580 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
581 		break;
582 	default:
583 		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
584 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
585 		break;
586 	}
587 	rte_spinlock_init(&tbl->sl);
588 	tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
589 	if (!tbl->eip) {
590 		rte_errno = ENOMEM;
591 		mlx5_free(tbl);
592 		tbl = NULL;
593 	}
594 	return tbl;
595 }
596 
597 void
598 mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl)
599 {
600 	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
601 	uint32_t i, j;
602 
603 	if (!tbl)
604 		return;
605 	g_tbl = tbl->tbl;
606 	if (g_tbl) {
607 		for (i = 0; i < MLX5_L3T_GT_SIZE; i++) {
608 			m_tbl = g_tbl->tbl[i];
609 			if (!m_tbl)
610 				continue;
611 			for (j = 0; j < MLX5_L3T_MT_SIZE; j++) {
612 				if (!m_tbl->tbl[j])
613 					continue;
614 				MLX5_ASSERT(!((struct mlx5_l3t_entry_word *)
615 					    m_tbl->tbl[j])->ref_cnt);
616 				mlx5_ipool_free(tbl->eip,
617 						((struct mlx5_l3t_entry_word *)
618 						m_tbl->tbl[j])->idx);
619 				m_tbl->tbl[j] = 0;
620 				if (!(--m_tbl->ref_cnt))
621 					break;
622 			}
623 			MLX5_ASSERT(!m_tbl->ref_cnt);
624 			mlx5_free(g_tbl->tbl[i]);
625 			g_tbl->tbl[i] = 0;
626 			if (!(--g_tbl->ref_cnt))
627 				break;
628 		}
629 		MLX5_ASSERT(!g_tbl->ref_cnt);
630 		mlx5_free(tbl->tbl);
631 		tbl->tbl = 0;
632 	}
633 	mlx5_ipool_destroy(tbl->eip);
634 	mlx5_free(tbl);
635 }
636 
637 static int32_t
638 __l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
639 		union mlx5_l3t_data *data)
640 {
641 	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
642 	struct mlx5_l3t_entry_word *w_e_tbl;
643 	struct mlx5_l3t_entry_dword *dw_e_tbl;
644 	struct mlx5_l3t_entry_qword *qw_e_tbl;
645 	struct mlx5_l3t_entry_ptr *ptr_e_tbl;
646 	void *e_tbl;
647 	uint32_t entry_idx;
648 
649 	g_tbl = tbl->tbl;
650 	if (!g_tbl)
651 		return -1;
652 	m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
653 	if (!m_tbl)
654 		return -1;
655 	e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
656 	if (!e_tbl)
657 		return -1;
658 	entry_idx = idx & MLX5_L3T_ET_MASK;
659 	switch (tbl->type) {
660 	case MLX5_L3T_TYPE_WORD:
661 		w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
662 		data->word = w_e_tbl->entry[entry_idx].data;
663 		if (w_e_tbl->entry[entry_idx].data)
664 			w_e_tbl->entry[entry_idx].ref_cnt++;
665 		break;
666 	case MLX5_L3T_TYPE_DWORD:
667 		dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
668 		data->dword = dw_e_tbl->entry[entry_idx].data;
669 		if (dw_e_tbl->entry[entry_idx].data)
670 			dw_e_tbl->entry[entry_idx].ref_cnt++;
671 		break;
672 	case MLX5_L3T_TYPE_QWORD:
673 		qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
674 		data->qword = qw_e_tbl->entry[entry_idx].data;
675 		if (qw_e_tbl->entry[entry_idx].data)
676 			qw_e_tbl->entry[entry_idx].ref_cnt++;
677 		break;
678 	default:
679 		ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
680 		data->ptr = ptr_e_tbl->entry[entry_idx].data;
681 		if (ptr_e_tbl->entry[entry_idx].data)
682 			ptr_e_tbl->entry[entry_idx].ref_cnt++;
683 		break;
684 	}
685 	return 0;
686 }
687 
688 int32_t
689 mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
690 		   union mlx5_l3t_data *data)
691 {
692 	int ret;
693 
694 	rte_spinlock_lock(&tbl->sl);
695 	ret = __l3t_get_entry(tbl, idx, data);
696 	rte_spinlock_unlock(&tbl->sl);
697 	return ret;
698 }
699 
700 int32_t
701 mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
702 {
703 	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
704 	struct mlx5_l3t_entry_word *w_e_tbl;
705 	struct mlx5_l3t_entry_dword *dw_e_tbl;
706 	struct mlx5_l3t_entry_qword *qw_e_tbl;
707 	struct mlx5_l3t_entry_ptr *ptr_e_tbl;
708 	void *e_tbl;
709 	uint32_t entry_idx;
710 	uint64_t ref_cnt;
711 	int32_t ret = -1;
712 
713 	rte_spinlock_lock(&tbl->sl);
714 	g_tbl = tbl->tbl;
715 	if (!g_tbl)
716 		goto out;
717 	m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
718 	if (!m_tbl)
719 		goto out;
720 	e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
721 	if (!e_tbl)
722 		goto out;
723 	entry_idx = idx & MLX5_L3T_ET_MASK;
724 	switch (tbl->type) {
725 	case MLX5_L3T_TYPE_WORD:
726 		w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
727 		MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
728 		ret = --w_e_tbl->entry[entry_idx].ref_cnt;
729 		if (ret)
730 			goto out;
731 		w_e_tbl->entry[entry_idx].data = 0;
732 		ref_cnt = --w_e_tbl->ref_cnt;
733 		break;
734 	case MLX5_L3T_TYPE_DWORD:
735 		dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
736 		MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
737 		ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
738 		if (ret)
739 			goto out;
740 		dw_e_tbl->entry[entry_idx].data = 0;
741 		ref_cnt = --dw_e_tbl->ref_cnt;
742 		break;
743 	case MLX5_L3T_TYPE_QWORD:
744 		qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
745 		MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
746 		ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
747 		if (ret)
748 			goto out;
749 		qw_e_tbl->entry[entry_idx].data = 0;
750 		ref_cnt = --qw_e_tbl->ref_cnt;
751 		break;
752 	default:
753 		ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
754 		MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
755 		ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
756 		if (ret)
757 			goto out;
758 		ptr_e_tbl->entry[entry_idx].data = NULL;
759 		ref_cnt = --ptr_e_tbl->ref_cnt;
760 		break;
761 	}
762 	if (!ref_cnt) {
763 		mlx5_ipool_free(tbl->eip,
764 				((struct mlx5_l3t_entry_word *)e_tbl)->idx);
765 		m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
766 									NULL;
767 		if (!(--m_tbl->ref_cnt)) {
768 			mlx5_free(m_tbl);
769 			g_tbl->tbl
770 			[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] = NULL;
771 			if (!(--g_tbl->ref_cnt)) {
772 				mlx5_free(g_tbl);
773 				tbl->tbl = 0;
774 			}
775 		}
776 	}
777 out:
778 	rte_spinlock_unlock(&tbl->sl);
779 	return ret;
780 }
781 
782 static int32_t
783 __l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
784 		union mlx5_l3t_data *data)
785 {
786 	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
787 	struct mlx5_l3t_entry_word *w_e_tbl;
788 	struct mlx5_l3t_entry_dword *dw_e_tbl;
789 	struct mlx5_l3t_entry_qword *qw_e_tbl;
790 	struct mlx5_l3t_entry_ptr *ptr_e_tbl;
791 	void *e_tbl;
792 	uint32_t entry_idx, tbl_idx = 0;
793 
794 	/* Check the global table, create it if empty. */
795 	g_tbl = tbl->tbl;
796 	if (!g_tbl) {
797 		g_tbl = mlx5_malloc(MLX5_MEM_ZERO,
798 				    sizeof(struct mlx5_l3t_level_tbl) +
799 				    sizeof(void *) * MLX5_L3T_GT_SIZE, 1,
800 				    SOCKET_ID_ANY);
801 		if (!g_tbl) {
802 			rte_errno = ENOMEM;
803 			return -1;
804 		}
805 		tbl->tbl = g_tbl;
806 	}
807 	/*
808 	 * Check the middle table, create it if empty. Ref_cnt will be
809 	 * increased if new sub table created.
810 	 */
811 	m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
812 	if (!m_tbl) {
813 		m_tbl = mlx5_malloc(MLX5_MEM_ZERO,
814 				    sizeof(struct mlx5_l3t_level_tbl) +
815 				    sizeof(void *) * MLX5_L3T_MT_SIZE, 1,
816 				    SOCKET_ID_ANY);
817 		if (!m_tbl) {
818 			rte_errno = ENOMEM;
819 			return -1;
820 		}
821 		g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] =
822 									m_tbl;
823 		g_tbl->ref_cnt++;
824 	}
825 	/*
826 	 * Check the entry table, create it if empty. Ref_cnt will be
827 	 * increased if new sub entry table created.
828 	 */
829 	e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
830 	if (!e_tbl) {
831 		e_tbl = mlx5_ipool_zmalloc(tbl->eip, &tbl_idx);
832 		if (!e_tbl) {
833 			rte_errno = ENOMEM;
834 			return -1;
835 		}
836 		((struct mlx5_l3t_entry_word *)e_tbl)->idx = tbl_idx;
837 		m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
838 									e_tbl;
839 		m_tbl->ref_cnt++;
840 	}
841 	entry_idx = idx & MLX5_L3T_ET_MASK;
842 	switch (tbl->type) {
843 	case MLX5_L3T_TYPE_WORD:
844 		w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
845 		if (w_e_tbl->entry[entry_idx].data) {
846 			data->word = w_e_tbl->entry[entry_idx].data;
847 			w_e_tbl->entry[entry_idx].ref_cnt++;
848 			rte_errno = EEXIST;
849 			return -1;
850 		}
851 		w_e_tbl->entry[entry_idx].data = data->word;
852 		w_e_tbl->entry[entry_idx].ref_cnt = 1;
853 		w_e_tbl->ref_cnt++;
854 		break;
855 	case MLX5_L3T_TYPE_DWORD:
856 		dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
857 		if (dw_e_tbl->entry[entry_idx].data) {
858 			data->dword = dw_e_tbl->entry[entry_idx].data;
859 			dw_e_tbl->entry[entry_idx].ref_cnt++;
860 			rte_errno = EEXIST;
861 			return -1;
862 		}
863 		dw_e_tbl->entry[entry_idx].data = data->dword;
864 		dw_e_tbl->entry[entry_idx].ref_cnt = 1;
865 		dw_e_tbl->ref_cnt++;
866 		break;
867 	case MLX5_L3T_TYPE_QWORD:
868 		qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
869 		if (qw_e_tbl->entry[entry_idx].data) {
870 			data->qword = qw_e_tbl->entry[entry_idx].data;
871 			qw_e_tbl->entry[entry_idx].ref_cnt++;
872 			rte_errno = EEXIST;
873 			return -1;
874 		}
875 		qw_e_tbl->entry[entry_idx].data = data->qword;
876 		qw_e_tbl->entry[entry_idx].ref_cnt = 1;
877 		qw_e_tbl->ref_cnt++;
878 		break;
879 	default:
880 		ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
881 		if (ptr_e_tbl->entry[entry_idx].data) {
882 			data->ptr = ptr_e_tbl->entry[entry_idx].data;
883 			ptr_e_tbl->entry[entry_idx].ref_cnt++;
884 			rte_errno = EEXIST;
885 			return -1;
886 		}
887 		ptr_e_tbl->entry[entry_idx].data = data->ptr;
888 		ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
889 		ptr_e_tbl->ref_cnt++;
890 		break;
891 	}
892 	return 0;
893 }
894 
895 int32_t
896 mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
897 		   union mlx5_l3t_data *data)
898 {
899 	int ret;
900 
901 	rte_spinlock_lock(&tbl->sl);
902 	ret = __l3t_set_entry(tbl, idx, data);
903 	rte_spinlock_unlock(&tbl->sl);
904 	return ret;
905 }
906 
907 int32_t
908 mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
909 		       union mlx5_l3t_data *data,
910 		       mlx5_l3t_alloc_callback_fn cb, void *ctx)
911 {
912 	int32_t ret;
913 
914 	rte_spinlock_lock(&tbl->sl);
915 	/* Check if entry data is ready. */
916 	ret = __l3t_get_entry(tbl, idx, data);
917 	if (!ret) {
918 		switch (tbl->type) {
919 		case MLX5_L3T_TYPE_WORD:
920 			if (data->word)
921 				goto out;
922 			break;
923 		case MLX5_L3T_TYPE_DWORD:
924 			if (data->dword)
925 				goto out;
926 			break;
927 		case MLX5_L3T_TYPE_QWORD:
928 			if (data->qword)
929 				goto out;
930 			break;
931 		default:
932 			if (data->ptr)
933 				goto out;
934 			break;
935 		}
936 	}
937 	/* Entry data is not ready, use user callback to create it. */
938 	ret = cb(ctx, data);
939 	if (ret)
940 		goto out;
941 	/* Save the new allocated data to entry. */
942 	ret = __l3t_set_entry(tbl, idx, data);
943 out:
944 	rte_spinlock_unlock(&tbl->sl);
945 	return ret;
946 }
947