xref: /dpdk/app/test/test_mempool_perf.c (revision 0efea35a2bb0ae9df6e204151c7f96b5eb93e130)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  * Copyright(c) 2022 SmartShare Systems
4  */
5 
6 #include <string.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <stdint.h>
10 #include <inttypes.h>
11 #include <stdarg.h>
12 #include <errno.h>
13 #include <sys/queue.h>
14 
15 #include <rte_common.h>
16 #include <rte_log.h>
17 #include <rte_debug.h>
18 #include <rte_memory.h>
19 #include <rte_launch.h>
20 #include <rte_cycles.h>
21 #include <rte_eal.h>
22 #include <rte_per_lcore.h>
23 #include <rte_lcore.h>
24 #include <rte_branch_prediction.h>
25 #include <rte_mempool.h>
26 #include <rte_spinlock.h>
27 #include <rte_malloc.h>
28 #include <rte_mbuf_pool_ops.h>
29 
30 #include "test.h"
31 
32 /*
33  * Mempool performance
34  * =======
35  *
36  *    Each core get *n_keep* objects per bulk of *n_get_bulk*. Then,
37  *    objects are put back in the pool per bulk of *n_put_bulk*.
38  *
39  *    This sequence is done during TIME_S seconds.
40  *
41  *    This test is done on the following configurations:
42  *
43  *    - Cores configuration (*cores*)
44  *
45  *      - One core with cache
46  *      - Two cores with cache
47  *      - Max. cores with cache
48  *      - One core without cache
49  *      - Two cores without cache
50  *      - Max. cores without cache
51  *      - One core with user-owned cache
52  *      - Two cores with user-owned cache
53  *      - Max. cores with user-owned cache
54  *
55  *    - Bulk size (*n_get_bulk*, *n_put_bulk*)
56  *
57  *      - Bulk get from 1 to 32
58  *      - Bulk put from 1 to 32
59  *      - Bulk get and put from 1 to 32, compile time constant
60  *
61  *    - Number of kept objects (*n_keep*)
62  *
63  *      - 32
64  *      - 128
65  *      - 512
66  */
67 
68 #define N 65536
69 #define TIME_S 5
70 #define MEMPOOL_ELT_SIZE 2048
71 #define MAX_KEEP 512
72 #define MEMPOOL_SIZE ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1)
73 
74 /* Number of pointers fitting into one cache line. */
75 #define CACHE_LINE_BURST (RTE_CACHE_LINE_SIZE / sizeof(uintptr_t))
76 
77 #define LOG_ERR() printf("test failed at %s():%d\n", __func__, __LINE__)
78 #define RET_ERR() do {							\
79 		LOG_ERR();						\
80 		return -1;						\
81 	} while (0)
82 #define GOTO_ERR(var, label) do {					\
83 		LOG_ERR();						\
84 		var = -1;						\
85 		goto label;						\
86 	} while (0)
87 
88 static int use_external_cache;
89 static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
90 
91 static uint32_t synchro;
92 
93 /* number of objects in one bulk operation (get or put) */
94 static unsigned n_get_bulk;
95 static unsigned n_put_bulk;
96 
97 /* number of objects retrieved from mempool before putting them back */
98 static unsigned n_keep;
99 
100 /* true if we want to test with constant n_get_bulk and n_put_bulk */
101 static int use_constant_values;
102 
103 /* number of enqueues / dequeues */
104 struct __rte_cache_aligned mempool_test_stats {
105 	uint64_t enq_count;
106 };
107 
108 static struct mempool_test_stats stats[RTE_MAX_LCORE];
109 
110 /*
111  * save the object number in the first 4 bytes of object data. All
112  * other bytes are set to 0.
113  */
114 static void
115 my_obj_init(struct rte_mempool *mp, __rte_unused void *arg,
116 	    void *obj, unsigned i)
117 {
118 	uint32_t *objnum = obj;
119 	memset(obj, 0, mp->elt_size);
120 	*objnum = i;
121 }
122 
123 static __rte_always_inline int
124 test_loop(struct rte_mempool *mp, struct rte_mempool_cache *cache,
125 	  unsigned int x_keep, unsigned int x_get_bulk, unsigned int x_put_bulk)
126 {
127 	alignas(RTE_CACHE_LINE_SIZE) void *obj_table[MAX_KEEP];
128 	unsigned int idx;
129 	unsigned int i;
130 	int ret;
131 
132 	for (i = 0; likely(i < (N / x_keep)); i++) {
133 		/* get x_keep objects by bulk of x_get_bulk */
134 		for (idx = 0; idx < x_keep; idx += x_get_bulk) {
135 			ret = rte_mempool_generic_get(mp,
136 						      &obj_table[idx],
137 						      x_get_bulk,
138 						      cache);
139 			if (unlikely(ret < 0)) {
140 				rte_mempool_dump(stdout, mp);
141 				return ret;
142 			}
143 		}
144 
145 		/* put the objects back by bulk of x_put_bulk */
146 		for (idx = 0; idx < x_keep; idx += x_put_bulk) {
147 			rte_mempool_generic_put(mp,
148 						&obj_table[idx],
149 						x_put_bulk,
150 						cache);
151 		}
152 	}
153 
154 	return 0;
155 }
156 
157 static int
158 per_lcore_mempool_test(void *arg)
159 {
160 	struct rte_mempool *mp = arg;
161 	unsigned lcore_id = rte_lcore_id();
162 	int ret = 0;
163 	uint64_t start_cycles, end_cycles;
164 	uint64_t time_diff = 0, hz = rte_get_timer_hz();
165 	struct rte_mempool_cache *cache;
166 
167 	if (use_external_cache) {
168 		/* Create a user-owned mempool cache. */
169 		cache = rte_mempool_cache_create(external_cache_size,
170 						 SOCKET_ID_ANY);
171 		if (cache == NULL)
172 			RET_ERR();
173 	} else {
174 		/* May be NULL if cache is disabled. */
175 		cache = rte_mempool_default_cache(mp, lcore_id);
176 	}
177 
178 	/* n_get_bulk and n_put_bulk must be divisors of n_keep */
179 	if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
180 		GOTO_ERR(ret, out);
181 	if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
182 		GOTO_ERR(ret, out);
183 	/* for constant n, n_get_bulk and n_put_bulk must be the same */
184 	if (use_constant_values && n_put_bulk != n_get_bulk)
185 		GOTO_ERR(ret, out);
186 
187 	stats[lcore_id].enq_count = 0;
188 
189 	/* wait synchro for workers */
190 	if (lcore_id != rte_get_main_lcore())
191 		rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
192 
193 	start_cycles = rte_get_timer_cycles();
194 
195 	while (time_diff/hz < TIME_S) {
196 		if (!use_constant_values)
197 			ret = test_loop(mp, cache, n_keep, n_get_bulk, n_put_bulk);
198 		else if (n_get_bulk == 1)
199 			ret = test_loop(mp, cache, n_keep, 1, 1);
200 		else if (n_get_bulk == 4)
201 			ret = test_loop(mp, cache, n_keep, 4, 4);
202 		else if (n_get_bulk == CACHE_LINE_BURST)
203 			ret = test_loop(mp, cache, n_keep,
204 					CACHE_LINE_BURST, CACHE_LINE_BURST);
205 		else if (n_get_bulk == 32)
206 			ret = test_loop(mp, cache, n_keep, 32, 32);
207 		else
208 			ret = -1;
209 
210 		if (ret < 0)
211 			GOTO_ERR(ret, out);
212 
213 		end_cycles = rte_get_timer_cycles();
214 		time_diff = end_cycles - start_cycles;
215 		stats[lcore_id].enq_count += N;
216 	}
217 
218 out:
219 	if (use_external_cache) {
220 		rte_mempool_cache_flush(cache, mp);
221 		rte_mempool_cache_free(cache);
222 	}
223 
224 	return ret;
225 }
226 
227 /* launch all the per-lcore test, and display the result */
228 static int
229 launch_cores(struct rte_mempool *mp, unsigned int cores)
230 {
231 	unsigned lcore_id;
232 	uint64_t rate;
233 	int ret;
234 	unsigned cores_save = cores;
235 
236 	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
237 
238 	/* reset stats */
239 	memset(stats, 0, sizeof(stats));
240 
241 	printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
242 	       "n_put_bulk=%u n_keep=%u constant_n=%u ",
243 	       use_external_cache ?
244 		   external_cache_size : (unsigned) mp->cache_size,
245 	       cores, n_get_bulk, n_put_bulk, n_keep, use_constant_values);
246 
247 	if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
248 		printf("mempool is not full\n");
249 		return -1;
250 	}
251 
252 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
253 		if (cores == 1)
254 			break;
255 		cores--;
256 		rte_eal_remote_launch(per_lcore_mempool_test,
257 				      mp, lcore_id);
258 	}
259 
260 	/* start synchro and launch test on main */
261 	__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
262 
263 	ret = per_lcore_mempool_test(mp);
264 
265 	cores = cores_save;
266 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
267 		if (cores == 1)
268 			break;
269 		cores--;
270 		if (rte_eal_wait_lcore(lcore_id) < 0)
271 			ret = -1;
272 	}
273 
274 	if (ret < 0) {
275 		printf("per-lcore test returned -1\n");
276 		return -1;
277 	}
278 
279 	rate = 0;
280 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
281 		rate += (stats[lcore_id].enq_count / TIME_S);
282 
283 	printf("rate_persec=%" PRIu64 "\n", rate);
284 
285 	return 0;
286 }
287 
288 /* for a given number of core, launch all test cases */
289 static int
290 do_one_mempool_test(struct rte_mempool *mp, unsigned int cores)
291 {
292 	unsigned int bulk_tab_get[] = { 1, 4, CACHE_LINE_BURST, 32, 0 };
293 	unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 0 };
294 	unsigned int keep_tab[] = { 32, 128, 512, 0 };
295 	unsigned *get_bulk_ptr;
296 	unsigned *put_bulk_ptr;
297 	unsigned *keep_ptr;
298 	int ret;
299 
300 	for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
301 		for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
302 			for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
303 
304 				use_constant_values = 0;
305 				n_get_bulk = *get_bulk_ptr;
306 				n_put_bulk = *put_bulk_ptr;
307 				n_keep = *keep_ptr;
308 				ret = launch_cores(mp, cores);
309 				if (ret < 0)
310 					return -1;
311 
312 				/* replay test with constant values */
313 				if (n_get_bulk == n_put_bulk) {
314 					use_constant_values = 1;
315 					ret = launch_cores(mp, cores);
316 					if (ret < 0)
317 						return -1;
318 				}
319 			}
320 		}
321 	}
322 	return 0;
323 }
324 
325 static int
326 test_mempool_perf(void)
327 {
328 	struct rte_mempool *mp_cache = NULL;
329 	struct rte_mempool *mp_nocache = NULL;
330 	struct rte_mempool *default_pool = NULL;
331 	const char *default_pool_ops;
332 	int ret = -1;
333 
334 	/* create a mempool (without cache) */
335 	mp_nocache = rte_mempool_create("perf_test_nocache", MEMPOOL_SIZE,
336 					MEMPOOL_ELT_SIZE, 0, 0,
337 					NULL, NULL,
338 					my_obj_init, NULL,
339 					SOCKET_ID_ANY, 0);
340 	if (mp_nocache == NULL)
341 		goto err;
342 
343 	/* create a mempool (with cache) */
344 	mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE,
345 				      MEMPOOL_ELT_SIZE,
346 				      RTE_MEMPOOL_CACHE_MAX_SIZE, 0,
347 				      NULL, NULL,
348 				      my_obj_init, NULL,
349 				      SOCKET_ID_ANY, 0);
350 	if (mp_cache == NULL)
351 		goto err;
352 
353 	default_pool_ops = rte_mbuf_best_mempool_ops();
354 	/* Create a mempool based on Default handler */
355 	default_pool = rte_mempool_create_empty("default_pool",
356 						MEMPOOL_SIZE,
357 						MEMPOOL_ELT_SIZE,
358 						0, 0,
359 						SOCKET_ID_ANY, 0);
360 
361 	if (default_pool == NULL) {
362 		printf("cannot allocate %s mempool\n", default_pool_ops);
363 		goto err;
364 	}
365 
366 	if (rte_mempool_set_ops_byname(default_pool, default_pool_ops, NULL)
367 				       < 0) {
368 		printf("cannot set %s handler\n", default_pool_ops);
369 		goto err;
370 	}
371 
372 	if (rte_mempool_populate_default(default_pool) < 0) {
373 		printf("cannot populate %s mempool\n", default_pool_ops);
374 		goto err;
375 	}
376 
377 	rte_mempool_obj_iter(default_pool, my_obj_init, NULL);
378 
379 	/* performance test with 1, 2 and max cores */
380 	printf("start performance test (without cache)\n");
381 
382 	if (do_one_mempool_test(mp_nocache, 1) < 0)
383 		goto err;
384 
385 	if (do_one_mempool_test(mp_nocache, 2) < 0)
386 		goto err;
387 
388 	if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0)
389 		goto err;
390 
391 	/* performance test with 1, 2 and max cores */
392 	printf("start performance test for %s (without cache)\n",
393 	       default_pool_ops);
394 
395 	if (do_one_mempool_test(default_pool, 1) < 0)
396 		goto err;
397 
398 	if (do_one_mempool_test(default_pool, 2) < 0)
399 		goto err;
400 
401 	if (do_one_mempool_test(default_pool, rte_lcore_count()) < 0)
402 		goto err;
403 
404 	/* performance test with 1, 2 and max cores */
405 	printf("start performance test (with cache)\n");
406 
407 	if (do_one_mempool_test(mp_cache, 1) < 0)
408 		goto err;
409 
410 	if (do_one_mempool_test(mp_cache, 2) < 0)
411 		goto err;
412 
413 	if (do_one_mempool_test(mp_cache, rte_lcore_count()) < 0)
414 		goto err;
415 
416 	/* performance test with 1, 2 and max cores */
417 	printf("start performance test (with user-owned cache)\n");
418 	use_external_cache = 1;
419 
420 	if (do_one_mempool_test(mp_nocache, 1) < 0)
421 		goto err;
422 
423 	if (do_one_mempool_test(mp_nocache, 2) < 0)
424 		goto err;
425 
426 	if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0)
427 		goto err;
428 
429 	rte_mempool_list_dump(stdout);
430 
431 	ret = 0;
432 
433 err:
434 	rte_mempool_free(mp_cache);
435 	rte_mempool_free(mp_nocache);
436 	rte_mempool_free(default_pool);
437 	return ret;
438 }
439 
440 REGISTER_PERF_TEST(mempool_perf_autotest, test_mempool_perf);
441