xref: /dpdk/drivers/mempool/cnxk/cn10k_mempool_ops.c (revision 27595cd83053b2d39634a159d6709b3ce3cdf3b0)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 
5 #include <rte_mempool.h>
6 
7 #include "roc_api.h"
8 #include "cnxk_mempool.h"
9 
10 #define BATCH_ALLOC_SZ              ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS
11 #define BATCH_OP_DATA_TABLE_MZ_NAME "batch_op_data_table_mz"
12 #define BATCH_ALLOC_WAIT_US         5
13 #define BATCH_ALLOC_RETRIES         4
14 
15 enum batch_op_status {
16 	BATCH_ALLOC_OP_NOT_ISSUED = 0,
17 	BATCH_ALLOC_OP_ISSUED = 1,
18 	BATCH_ALLOC_OP_DONE
19 };
20 
21 struct batch_op_mem {
22 	unsigned int sz;
23 	enum batch_op_status status;
24 	alignas(ROC_ALIGN) uint64_t objs[BATCH_ALLOC_SZ];
25 };
26 
27 struct batch_op_data {
28 	uint64_t lmt_addr;
29 	uint32_t max_async_batch;
30 	alignas(ROC_ALIGN) struct batch_op_mem mem[RTE_MAX_LCORE];
31 };
32 
33 static struct batch_op_data **batch_op_data_tbl;
34 
35 static int
batch_op_data_table_create(void)36 batch_op_data_table_create(void)
37 {
38 	const struct rte_memzone *mz;
39 
40 	/* If table is already set, nothing to do */
41 	if (batch_op_data_tbl)
42 		return 0;
43 
44 	mz = rte_memzone_lookup(BATCH_OP_DATA_TABLE_MZ_NAME);
45 	if (mz == NULL) {
46 		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
47 			unsigned int maxpools, sz;
48 
49 			maxpools = roc_idev_npa_maxpools_get();
50 			sz = maxpools * sizeof(struct batch_op_data *);
51 
52 			mz = rte_memzone_reserve_aligned(
53 				BATCH_OP_DATA_TABLE_MZ_NAME, sz, SOCKET_ID_ANY,
54 				0, ROC_ALIGN);
55 		}
56 		if (mz == NULL) {
57 			plt_err("Failed to reserve batch op data table");
58 			return -ENOMEM;
59 		}
60 	}
61 	batch_op_data_tbl = mz->addr;
62 	rte_wmb();
63 	return 0;
64 }
65 
66 static inline struct batch_op_data *
batch_op_data_get(uint64_t pool_id)67 batch_op_data_get(uint64_t pool_id)
68 {
69 	uint64_t aura = roc_npa_aura_handle_to_aura(pool_id);
70 
71 	return batch_op_data_tbl[aura];
72 }
73 
74 static inline void
batch_op_data_set(uint64_t pool_id,struct batch_op_data * op_data)75 batch_op_data_set(uint64_t pool_id, struct batch_op_data *op_data)
76 {
77 	uint64_t aura = roc_npa_aura_handle_to_aura(pool_id);
78 
79 	batch_op_data_tbl[aura] = op_data;
80 }
81 
82 static int
batch_op_init(struct rte_mempool * mp)83 batch_op_init(struct rte_mempool *mp)
84 {
85 	struct batch_op_data *op_data;
86 	int i;
87 
88 	op_data = batch_op_data_get(mp->pool_id);
89 	/* The data should not have been allocated previously */
90 	RTE_ASSERT(op_data == NULL);
91 
92 	op_data = rte_zmalloc(NULL, sizeof(struct batch_op_data), ROC_ALIGN);
93 	if (op_data == NULL)
94 		return -ENOMEM;
95 
96 	for (i = 0; i < RTE_MAX_LCORE; i++) {
97 		op_data->mem[i].sz = 0;
98 		op_data->mem[i].status = BATCH_ALLOC_OP_NOT_ISSUED;
99 	}
100 
101 	op_data->lmt_addr = roc_idev_lmt_base_addr_get();
102 	op_data->max_async_batch =
103 		RTE_MIN((unsigned int)BATCH_ALLOC_SZ,
104 			RTE_ALIGN_CEIL(mp->cache_size, ROC_ALIGN / 8));
105 
106 	batch_op_data_set(mp->pool_id, op_data);
107 	rte_wmb();
108 
109 	return 0;
110 }
111 
112 static void
batch_op_fini(struct rte_mempool * mp)113 batch_op_fini(struct rte_mempool *mp)
114 {
115 	struct batch_op_data *op_data;
116 	int i;
117 
118 	op_data = batch_op_data_get(mp->pool_id);
119 	if (!op_data) {
120 		/* Batch op data can be uninitialized in case of empty
121 		 * mempools.
122 		 */
123 		return;
124 	}
125 
126 	/* If max_async_batch == 0, then batch mem will be empty */
127 	if (op_data->max_async_batch == 0)
128 		goto free_op_data;
129 
130 	rte_wmb();
131 	for (i = 0; i < RTE_MAX_LCORE; i++) {
132 		struct batch_op_mem *mem = &op_data->mem[i];
133 
134 		if (mem->status == BATCH_ALLOC_OP_ISSUED) {
135 			mem->sz = roc_npa_aura_batch_alloc_extract(
136 				mem->objs, mem->objs, op_data->max_async_batch);
137 			mem->status = BATCH_ALLOC_OP_DONE;
138 		}
139 		if (mem->status == BATCH_ALLOC_OP_DONE) {
140 			roc_npa_aura_op_bulk_free(mp->pool_id, mem->objs,
141 						  mem->sz, 1);
142 			mem->status = BATCH_ALLOC_OP_NOT_ISSUED;
143 		}
144 	}
145 
146 free_op_data:
147 	rte_free(op_data);
148 	batch_op_data_set(mp->pool_id, NULL);
149 	rte_wmb();
150 }
151 
152 static int __rte_hot
cn10k_mempool_enq(struct rte_mempool * mp,void * const * obj_table,unsigned int n)153 cn10k_mempool_enq(struct rte_mempool *mp, void *const *obj_table,
154 		  unsigned int n)
155 {
156 	const uint64_t *ptr = (const uint64_t *)obj_table;
157 	uint64_t lmt_addr = 0, lmt_id = 0;
158 	struct batch_op_data *op_data;
159 
160 	/* Ensure mbuf init changes are written before the free pointers are
161 	 * enqueued to the stack.
162 	 */
163 	rte_io_wmb();
164 
165 	/* For non-EAL threads, rte_lcore_id() will not be valid. Hence
166 	 * fallback to bulk alloc
167 	 */
168 	if (unlikely(rte_lcore_id() == LCORE_ID_ANY))
169 		return cnxk_mempool_enq(mp, obj_table, n);
170 
171 	if (n == 1) {
172 		roc_npa_aura_op_free(mp->pool_id, 1, ptr[0]);
173 		return 0;
174 	}
175 
176 	op_data = batch_op_data_get(mp->pool_id);
177 	lmt_addr = op_data->lmt_addr;
178 	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
179 	roc_npa_aura_op_batch_free(mp->pool_id, ptr, n, 1, lmt_addr, lmt_id);
180 
181 	return 0;
182 }
183 
184 static unsigned int
cn10k_mempool_get_count(const struct rte_mempool * mp)185 cn10k_mempool_get_count(const struct rte_mempool *mp)
186 {
187 	struct batch_op_data *op_data;
188 	unsigned int count = 0;
189 	int i;
190 
191 	op_data = batch_op_data_get(mp->pool_id);
192 	/* If max_async_batch == 0, then batch alloc mem will be empty */
193 	if (op_data->max_async_batch == 0)
194 		goto npa_pool_count;
195 
196 	rte_wmb();
197 	for (i = 0; i < RTE_MAX_LCORE; i++) {
198 		struct batch_op_mem *mem = &op_data->mem[i];
199 
200 		if (mem->status == BATCH_ALLOC_OP_ISSUED)
201 			count += roc_npa_aura_batch_alloc_count(
202 				mem->objs, op_data->max_async_batch,
203 				BATCH_ALLOC_WAIT_US);
204 
205 		if (mem->status == BATCH_ALLOC_OP_DONE)
206 			count += mem->sz;
207 	}
208 
209 npa_pool_count:
210 	count += cnxk_mempool_get_count(mp);
211 
212 	return count;
213 }
214 
215 static inline unsigned int __rte_hot
mempool_deq(struct rte_mempool * mp,void ** obj_table,unsigned int n)216 mempool_deq(struct rte_mempool *mp, void **obj_table, unsigned int n)
217 {
218 	return cnxk_mempool_deq(mp, obj_table, n) ? 0 : n;
219 }
220 
221 static inline unsigned int __rte_hot
mempool_deq_batch_async(struct rte_mempool * mp,void ** obj_table,unsigned int n)222 mempool_deq_batch_async(struct rte_mempool *mp, void **obj_table, unsigned int n)
223 {
224 	struct batch_op_data *op_data;
225 	struct batch_op_mem *mem;
226 	unsigned int count = 0;
227 	int tid, rc, retry;
228 	bool loop = true;
229 
230 	op_data = batch_op_data_get(mp->pool_id);
231 	tid = rte_lcore_id();
232 	mem = &op_data->mem[tid];
233 
234 	/* Issue batch alloc */
235 	if (mem->status == BATCH_ALLOC_OP_NOT_ISSUED) {
236 		rc = roc_npa_aura_batch_alloc_issue(
237 			mp->pool_id, mem->objs, op_data->max_async_batch, 0, 1);
238 		/* If issue fails, try falling back to default alloc */
239 		if (unlikely(rc))
240 			return mempool_deq(mp, obj_table, n);
241 		mem->status = BATCH_ALLOC_OP_ISSUED;
242 	}
243 
244 	retry = BATCH_ALLOC_RETRIES;
245 	while (loop) {
246 		unsigned int cur_sz;
247 
248 		if (mem->status == BATCH_ALLOC_OP_ISSUED) {
249 			mem->sz = roc_npa_aura_batch_alloc_extract(
250 				mem->objs, mem->objs, op_data->max_async_batch);
251 
252 			/* If partial alloc reduce the retry count */
253 			retry -= (mem->sz != op_data->max_async_batch);
254 			/* Break the loop if retry count exhausted */
255 			loop = !!retry;
256 			mem->status = BATCH_ALLOC_OP_DONE;
257 		}
258 
259 		cur_sz = n - count;
260 		if (cur_sz > mem->sz)
261 			cur_sz = mem->sz;
262 
263 		/* Dequeue the pointers */
264 		memcpy(&obj_table[count], &mem->objs[mem->sz - cur_sz],
265 		       cur_sz * sizeof(uintptr_t));
266 		mem->sz -= cur_sz;
267 		count += cur_sz;
268 
269 		/* Break loop if the required pointers has been dequeued */
270 		loop &= (count != n);
271 
272 		/* Issue next batch alloc if pointers are exhausted */
273 		if (mem->sz == 0) {
274 			rc = roc_npa_aura_batch_alloc_issue(
275 				mp->pool_id, mem->objs,
276 				op_data->max_async_batch, 0, 1);
277 			/* Break loop if issue failed and set status */
278 			loop &= !rc;
279 			mem->status = !rc;
280 		}
281 	}
282 
283 	return count;
284 }
285 
286 static inline unsigned int __rte_hot
mempool_deq_batch_sync(struct rte_mempool * mp,void ** obj_table,unsigned int n)287 mempool_deq_batch_sync(struct rte_mempool *mp, void **obj_table, unsigned int n)
288 {
289 	struct batch_op_data *op_data;
290 	struct batch_op_mem *mem;
291 	unsigned int count = 0;
292 	int tid, retry, rc;
293 
294 	op_data = batch_op_data_get(mp->pool_id);
295 	tid = rte_lcore_id();
296 	mem = &op_data->mem[tid];
297 
298 	retry = BATCH_ALLOC_RETRIES;
299 	while (count != n && retry) {
300 		unsigned int cur_sz, batch_sz;
301 
302 		cur_sz = n - count;
303 		batch_sz = RTE_MIN(BATCH_ALLOC_SZ, (int)cur_sz);
304 
305 		/* Issue batch alloc */
306 		rc = roc_npa_aura_batch_alloc_issue(mp->pool_id, mem->objs,
307 						    batch_sz, 0, 1);
308 
309 		/* If issue fails, try falling back to default alloc */
310 		if (unlikely(rc))
311 			return count +
312 			       mempool_deq(mp, obj_table + count, n - count);
313 
314 		cur_sz = roc_npa_aura_batch_alloc_extract(mem->objs, mem->objs,
315 							  batch_sz);
316 
317 		/* Dequeue the pointers */
318 		memcpy(&obj_table[count], mem->objs,
319 		       cur_sz * sizeof(uintptr_t));
320 		count += cur_sz;
321 
322 		/* If partial alloc reduce the retry count */
323 		retry -= (batch_sz != cur_sz);
324 	}
325 
326 	return count;
327 }
328 
329 static int __rte_hot
cn10k_mempool_deq(struct rte_mempool * mp,void ** obj_table,unsigned int n)330 cn10k_mempool_deq(struct rte_mempool *mp, void **obj_table, unsigned int n)
331 {
332 	struct batch_op_data *op_data;
333 	unsigned int count = 0;
334 
335 	/* For non-EAL threads, rte_lcore_id() will not be valid. Hence
336 	 * fallback to bulk alloc
337 	 */
338 	if (unlikely(rte_lcore_id() == LCORE_ID_ANY))
339 		return cnxk_mempool_deq(mp, obj_table, n);
340 
341 	op_data = batch_op_data_get(mp->pool_id);
342 	if (op_data->max_async_batch)
343 		count = mempool_deq_batch_async(mp, obj_table, n);
344 	else
345 		count = mempool_deq_batch_sync(mp, obj_table, n);
346 
347 	if (unlikely(count != n)) {
348 		/* No partial alloc allowed. Free up allocated pointers */
349 		cn10k_mempool_enq(mp, obj_table, count);
350 		return -ENOENT;
351 	}
352 
353 	return 0;
354 }
355 
356 static int
cn10k_mempool_alloc(struct rte_mempool * mp)357 cn10k_mempool_alloc(struct rte_mempool *mp)
358 {
359 	uint32_t block_size;
360 	size_t padding;
361 	int rc;
362 
363 	block_size = mp->elt_size + mp->header_size + mp->trailer_size;
364 	/* Align header size to ROC_ALIGN */
365 	if (mp->header_size % ROC_ALIGN != 0) {
366 		padding = RTE_ALIGN_CEIL(mp->header_size, ROC_ALIGN) -
367 			  mp->header_size;
368 		mp->header_size += padding;
369 		block_size += padding;
370 	}
371 
372 	/* Align block size to ROC_ALIGN */
373 	if (block_size % ROC_ALIGN != 0) {
374 		padding = RTE_ALIGN_CEIL(block_size, ROC_ALIGN) - block_size;
375 		mp->trailer_size += padding;
376 		block_size += padding;
377 	}
378 
379 	rc = cnxk_mempool_alloc(mp);
380 	if (rc)
381 		return rc;
382 
383 	rc = batch_op_init(mp);
384 	if (rc) {
385 		plt_err("Failed to init batch alloc mem rc=%d", rc);
386 		goto error;
387 	}
388 
389 	return 0;
390 error:
391 	cnxk_mempool_free(mp);
392 	return rc;
393 }
394 
395 static void
cn10k_mempool_free(struct rte_mempool * mp)396 cn10k_mempool_free(struct rte_mempool *mp)
397 {
398 	batch_op_fini(mp);
399 	cnxk_mempool_free(mp);
400 }
401 
402 int
cn10k_mempool_plt_init(void)403 cn10k_mempool_plt_init(void)
404 {
405 	return batch_op_data_table_create();
406 }
407 
408 static struct rte_mempool_ops cn10k_mempool_ops = {
409 	.name = "cn10k_mempool_ops",
410 	.alloc = cn10k_mempool_alloc,
411 	.free = cn10k_mempool_free,
412 	.enqueue = cn10k_mempool_enq,
413 	.dequeue = cn10k_mempool_deq,
414 	.get_count = cn10k_mempool_get_count,
415 	.calc_mem_size = cnxk_mempool_calc_mem_size,
416 	.populate = cnxk_mempool_populate,
417 };
418 
419 RTE_MEMPOOL_REGISTER_OPS(cn10k_mempool_ops);
420