xref: /dpdk/app/test/test_mempool_perf.c (revision 8809f78c7dd9f33a44a4f89c58fc91ded34296ed)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <string.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <stdint.h>
9 #include <inttypes.h>
10 #include <stdarg.h>
11 #include <errno.h>
12 #include <sys/queue.h>
13 
14 #include <rte_common.h>
15 #include <rte_log.h>
16 #include <rte_debug.h>
17 #include <rte_memory.h>
18 #include <rte_launch.h>
19 #include <rte_cycles.h>
20 #include <rte_eal.h>
21 #include <rte_per_lcore.h>
22 #include <rte_lcore.h>
23 #include <rte_atomic.h>
24 #include <rte_branch_prediction.h>
25 #include <rte_mempool.h>
26 #include <rte_spinlock.h>
27 #include <rte_malloc.h>
28 #include <rte_mbuf_pool_ops.h>
29 
30 #include "test.h"
31 
32 /*
33  * Mempool performance
34  * =======
35  *
36  *    Each core get *n_keep* objects per bulk of *n_get_bulk*. Then,
37  *    objects are put back in the pool per bulk of *n_put_bulk*.
38  *
39  *    This sequence is done during TIME_S seconds.
40  *
41  *    This test is done on the following configurations:
42  *
43  *    - Cores configuration (*cores*)
44  *
45  *      - One core with cache
46  *      - Two cores with cache
47  *      - Max. cores with cache
48  *      - One core without cache
49  *      - Two cores without cache
50  *      - Max. cores without cache
51  *      - One core with user-owned cache
52  *      - Two cores with user-owned cache
53  *      - Max. cores with user-owned cache
54  *
55  *    - Bulk size (*n_get_bulk*, *n_put_bulk*)
56  *
57  *      - Bulk get from 1 to 32
58  *      - Bulk put from 1 to 32
59  *
60  *    - Number of kept objects (*n_keep*)
61  *
62  *      - 32
63  *      - 128
64  */
65 
66 #define N 65536
67 #define TIME_S 5
68 #define MEMPOOL_ELT_SIZE 2048
69 #define MAX_KEEP 128
70 #define MEMPOOL_SIZE ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1)
71 
72 #define LOG_ERR() printf("test failed at %s():%d\n", __func__, __LINE__)
73 #define RET_ERR() do {							\
74 		LOG_ERR();						\
75 		return -1;						\
76 	} while (0)
77 #define GOTO_ERR(var, label) do {					\
78 		LOG_ERR();						\
79 		var = -1;						\
80 		goto label;						\
81 	} while (0)
82 
83 static int use_external_cache;
84 static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
85 
86 static rte_atomic32_t synchro;
87 
88 /* number of objects in one bulk operation (get or put) */
89 static unsigned n_get_bulk;
90 static unsigned n_put_bulk;
91 
92 /* number of objects retrived from mempool before putting them back */
93 static unsigned n_keep;
94 
95 /* number of enqueues / dequeues */
96 struct mempool_test_stats {
97 	uint64_t enq_count;
98 } __rte_cache_aligned;
99 
100 static struct mempool_test_stats stats[RTE_MAX_LCORE];
101 
102 /*
103  * save the object number in the first 4 bytes of object data. All
104  * other bytes are set to 0.
105  */
106 static void
107 my_obj_init(struct rte_mempool *mp, __rte_unused void *arg,
108 	    void *obj, unsigned i)
109 {
110 	uint32_t *objnum = obj;
111 	memset(obj, 0, mp->elt_size);
112 	*objnum = i;
113 }
114 
115 static int
116 per_lcore_mempool_test(void *arg)
117 {
118 	void *obj_table[MAX_KEEP];
119 	unsigned i, idx;
120 	struct rte_mempool *mp = arg;
121 	unsigned lcore_id = rte_lcore_id();
122 	int ret = 0;
123 	uint64_t start_cycles, end_cycles;
124 	uint64_t time_diff = 0, hz = rte_get_timer_hz();
125 	struct rte_mempool_cache *cache;
126 
127 	if (use_external_cache) {
128 		/* Create a user-owned mempool cache. */
129 		cache = rte_mempool_cache_create(external_cache_size,
130 						 SOCKET_ID_ANY);
131 		if (cache == NULL)
132 			RET_ERR();
133 	} else {
134 		/* May be NULL if cache is disabled. */
135 		cache = rte_mempool_default_cache(mp, lcore_id);
136 	}
137 
138 	/* n_get_bulk and n_put_bulk must be divisors of n_keep */
139 	if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
140 		GOTO_ERR(ret, out);
141 	if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
142 		GOTO_ERR(ret, out);
143 
144 	stats[lcore_id].enq_count = 0;
145 
146 	/* wait synchro for workers */
147 	if (lcore_id != rte_get_main_lcore())
148 		while (rte_atomic32_read(&synchro) == 0);
149 
150 	start_cycles = rte_get_timer_cycles();
151 
152 	while (time_diff/hz < TIME_S) {
153 		for (i = 0; likely(i < (N/n_keep)); i++) {
154 			/* get n_keep objects by bulk of n_bulk */
155 			idx = 0;
156 			while (idx < n_keep) {
157 				ret = rte_mempool_generic_get(mp,
158 							      &obj_table[idx],
159 							      n_get_bulk,
160 							      cache);
161 				if (unlikely(ret < 0)) {
162 					rte_mempool_dump(stdout, mp);
163 					/* in this case, objects are lost... */
164 					GOTO_ERR(ret, out);
165 				}
166 				idx += n_get_bulk;
167 			}
168 
169 			/* put the objects back */
170 			idx = 0;
171 			while (idx < n_keep) {
172 				rte_mempool_generic_put(mp, &obj_table[idx],
173 							n_put_bulk,
174 							cache);
175 				idx += n_put_bulk;
176 			}
177 		}
178 		end_cycles = rte_get_timer_cycles();
179 		time_diff = end_cycles - start_cycles;
180 		stats[lcore_id].enq_count += N;
181 	}
182 
183 out:
184 	if (use_external_cache) {
185 		rte_mempool_cache_flush(cache, mp);
186 		rte_mempool_cache_free(cache);
187 	}
188 
189 	return ret;
190 }
191 
192 /* launch all the per-lcore test, and display the result */
193 static int
194 launch_cores(struct rte_mempool *mp, unsigned int cores)
195 {
196 	unsigned lcore_id;
197 	uint64_t rate;
198 	int ret;
199 	unsigned cores_save = cores;
200 
201 	rte_atomic32_set(&synchro, 0);
202 
203 	/* reset stats */
204 	memset(stats, 0, sizeof(stats));
205 
206 	printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
207 	       "n_put_bulk=%u n_keep=%u ",
208 	       use_external_cache ?
209 		   external_cache_size : (unsigned) mp->cache_size,
210 	       cores, n_get_bulk, n_put_bulk, n_keep);
211 
212 	if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
213 		printf("mempool is not full\n");
214 		return -1;
215 	}
216 
217 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
218 		if (cores == 1)
219 			break;
220 		cores--;
221 		rte_eal_remote_launch(per_lcore_mempool_test,
222 				      mp, lcore_id);
223 	}
224 
225 	/* start synchro and launch test on main */
226 	rte_atomic32_set(&synchro, 1);
227 
228 	ret = per_lcore_mempool_test(mp);
229 
230 	cores = cores_save;
231 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
232 		if (cores == 1)
233 			break;
234 		cores--;
235 		if (rte_eal_wait_lcore(lcore_id) < 0)
236 			ret = -1;
237 	}
238 
239 	if (ret < 0) {
240 		printf("per-lcore test returned -1\n");
241 		return -1;
242 	}
243 
244 	rate = 0;
245 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
246 		rate += (stats[lcore_id].enq_count / TIME_S);
247 
248 	printf("rate_persec=%" PRIu64 "\n", rate);
249 
250 	return 0;
251 }
252 
253 /* for a given number of core, launch all test cases */
254 static int
255 do_one_mempool_test(struct rte_mempool *mp, unsigned int cores)
256 {
257 	unsigned bulk_tab_get[] = { 1, 4, 32, 0 };
258 	unsigned bulk_tab_put[] = { 1, 4, 32, 0 };
259 	unsigned keep_tab[] = { 32, 128, 0 };
260 	unsigned *get_bulk_ptr;
261 	unsigned *put_bulk_ptr;
262 	unsigned *keep_ptr;
263 	int ret;
264 
265 	for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
266 		for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
267 			for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
268 
269 				n_get_bulk = *get_bulk_ptr;
270 				n_put_bulk = *put_bulk_ptr;
271 				n_keep = *keep_ptr;
272 				ret = launch_cores(mp, cores);
273 
274 				if (ret < 0)
275 					return -1;
276 			}
277 		}
278 	}
279 	return 0;
280 }
281 
282 static int
283 test_mempool_perf(void)
284 {
285 	struct rte_mempool *mp_cache = NULL;
286 	struct rte_mempool *mp_nocache = NULL;
287 	struct rte_mempool *default_pool = NULL;
288 	const char *default_pool_ops;
289 	int ret = -1;
290 
291 	rte_atomic32_init(&synchro);
292 
293 	/* create a mempool (without cache) */
294 	mp_nocache = rte_mempool_create("perf_test_nocache", MEMPOOL_SIZE,
295 					MEMPOOL_ELT_SIZE, 0, 0,
296 					NULL, NULL,
297 					my_obj_init, NULL,
298 					SOCKET_ID_ANY, 0);
299 	if (mp_nocache == NULL)
300 		goto err;
301 
302 	/* create a mempool (with cache) */
303 	mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE,
304 				      MEMPOOL_ELT_SIZE,
305 				      RTE_MEMPOOL_CACHE_MAX_SIZE, 0,
306 				      NULL, NULL,
307 				      my_obj_init, NULL,
308 				      SOCKET_ID_ANY, 0);
309 	if (mp_cache == NULL)
310 		goto err;
311 
312 	default_pool_ops = rte_mbuf_best_mempool_ops();
313 	/* Create a mempool based on Default handler */
314 	default_pool = rte_mempool_create_empty("default_pool",
315 						MEMPOOL_SIZE,
316 						MEMPOOL_ELT_SIZE,
317 						0, 0,
318 						SOCKET_ID_ANY, 0);
319 
320 	if (default_pool == NULL) {
321 		printf("cannot allocate %s mempool\n", default_pool_ops);
322 		goto err;
323 	}
324 
325 	if (rte_mempool_set_ops_byname(default_pool, default_pool_ops, NULL)
326 				       < 0) {
327 		printf("cannot set %s handler\n", default_pool_ops);
328 		goto err;
329 	}
330 
331 	if (rte_mempool_populate_default(default_pool) < 0) {
332 		printf("cannot populate %s mempool\n", default_pool_ops);
333 		goto err;
334 	}
335 
336 	rte_mempool_obj_iter(default_pool, my_obj_init, NULL);
337 
338 	/* performance test with 1, 2 and max cores */
339 	printf("start performance test (without cache)\n");
340 
341 	if (do_one_mempool_test(mp_nocache, 1) < 0)
342 		goto err;
343 
344 	if (do_one_mempool_test(mp_nocache, 2) < 0)
345 		goto err;
346 
347 	if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0)
348 		goto err;
349 
350 	/* performance test with 1, 2 and max cores */
351 	printf("start performance test for %s (without cache)\n",
352 	       default_pool_ops);
353 
354 	if (do_one_mempool_test(default_pool, 1) < 0)
355 		goto err;
356 
357 	if (do_one_mempool_test(default_pool, 2) < 0)
358 		goto err;
359 
360 	if (do_one_mempool_test(default_pool, rte_lcore_count()) < 0)
361 		goto err;
362 
363 	/* performance test with 1, 2 and max cores */
364 	printf("start performance test (with cache)\n");
365 
366 	if (do_one_mempool_test(mp_cache, 1) < 0)
367 		goto err;
368 
369 	if (do_one_mempool_test(mp_cache, 2) < 0)
370 		goto err;
371 
372 	if (do_one_mempool_test(mp_cache, rte_lcore_count()) < 0)
373 		goto err;
374 
375 	/* performance test with 1, 2 and max cores */
376 	printf("start performance test (with user-owned cache)\n");
377 	use_external_cache = 1;
378 
379 	if (do_one_mempool_test(mp_nocache, 1) < 0)
380 		goto err;
381 
382 	if (do_one_mempool_test(mp_nocache, 2) < 0)
383 		goto err;
384 
385 	if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0)
386 		goto err;
387 
388 	rte_mempool_list_dump(stdout);
389 
390 	ret = 0;
391 
392 err:
393 	rte_mempool_free(mp_cache);
394 	rte_mempool_free(mp_nocache);
395 	rte_mempool_free(default_pool);
396 	return ret;
397 }
398 
399 REGISTER_TEST_COMMAND(mempool_perf_autotest, test_mempool_perf);
400