1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <string.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <stdint.h> 9 #include <inttypes.h> 10 #include <stdarg.h> 11 #include <errno.h> 12 #include <sys/queue.h> 13 14 #include <rte_common.h> 15 #include <rte_log.h> 16 #include <rte_debug.h> 17 #include <rte_memory.h> 18 #include <rte_launch.h> 19 #include <rte_cycles.h> 20 #include <rte_eal.h> 21 #include <rte_per_lcore.h> 22 #include <rte_lcore.h> 23 #include <rte_branch_prediction.h> 24 #include <rte_mempool.h> 25 #include <rte_spinlock.h> 26 #include <rte_malloc.h> 27 #include <rte_mbuf_pool_ops.h> 28 29 #include "test.h" 30 31 /* 32 * Mempool performance 33 * ======= 34 * 35 * Each core get *n_keep* objects per bulk of *n_get_bulk*. Then, 36 * objects are put back in the pool per bulk of *n_put_bulk*. 37 * 38 * This sequence is done during TIME_S seconds. 39 * 40 * This test is done on the following configurations: 41 * 42 * - Cores configuration (*cores*) 43 * 44 * - One core with cache 45 * - Two cores with cache 46 * - Max. cores with cache 47 * - One core without cache 48 * - Two cores without cache 49 * - Max. cores without cache 50 * - One core with user-owned cache 51 * - Two cores with user-owned cache 52 * - Max. cores with user-owned cache 53 * 54 * - Bulk size (*n_get_bulk*, *n_put_bulk*) 55 * 56 * - Bulk get from 1 to 32 57 * - Bulk put from 1 to 32 58 * 59 * - Number of kept objects (*n_keep*) 60 * 61 * - 32 62 * - 128 63 */ 64 65 #define N 65536 66 #define TIME_S 5 67 #define MEMPOOL_ELT_SIZE 2048 68 #define MAX_KEEP 128 69 #define MEMPOOL_SIZE ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1) 70 71 #define LOG_ERR() printf("test failed at %s():%d\n", __func__, __LINE__) 72 #define RET_ERR() do { \ 73 LOG_ERR(); \ 74 return -1; \ 75 } while (0) 76 #define GOTO_ERR(var, label) do { \ 77 LOG_ERR(); \ 78 var = -1; \ 79 goto label; \ 80 } while (0) 81 82 static int use_external_cache; 83 static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE; 84 85 static uint32_t synchro; 86 87 /* number of objects in one bulk operation (get or put) */ 88 static unsigned n_get_bulk; 89 static unsigned n_put_bulk; 90 91 /* number of objects retrived from mempool before putting them back */ 92 static unsigned n_keep; 93 94 /* number of enqueues / dequeues */ 95 struct mempool_test_stats { 96 uint64_t enq_count; 97 } __rte_cache_aligned; 98 99 static struct mempool_test_stats stats[RTE_MAX_LCORE]; 100 101 /* 102 * save the object number in the first 4 bytes of object data. All 103 * other bytes are set to 0. 104 */ 105 static void 106 my_obj_init(struct rte_mempool *mp, __rte_unused void *arg, 107 void *obj, unsigned i) 108 { 109 uint32_t *objnum = obj; 110 memset(obj, 0, mp->elt_size); 111 *objnum = i; 112 } 113 114 static int 115 per_lcore_mempool_test(void *arg) 116 { 117 void *obj_table[MAX_KEEP]; 118 unsigned i, idx; 119 struct rte_mempool *mp = arg; 120 unsigned lcore_id = rte_lcore_id(); 121 int ret = 0; 122 uint64_t start_cycles, end_cycles; 123 uint64_t time_diff = 0, hz = rte_get_timer_hz(); 124 struct rte_mempool_cache *cache; 125 126 if (use_external_cache) { 127 /* Create a user-owned mempool cache. */ 128 cache = rte_mempool_cache_create(external_cache_size, 129 SOCKET_ID_ANY); 130 if (cache == NULL) 131 RET_ERR(); 132 } else { 133 /* May be NULL if cache is disabled. */ 134 cache = rte_mempool_default_cache(mp, lcore_id); 135 } 136 137 /* n_get_bulk and n_put_bulk must be divisors of n_keep */ 138 if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep) 139 GOTO_ERR(ret, out); 140 if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep) 141 GOTO_ERR(ret, out); 142 143 stats[lcore_id].enq_count = 0; 144 145 /* wait synchro for workers */ 146 if (lcore_id != rte_get_main_lcore()) 147 rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED); 148 149 start_cycles = rte_get_timer_cycles(); 150 151 while (time_diff/hz < TIME_S) { 152 for (i = 0; likely(i < (N/n_keep)); i++) { 153 /* get n_keep objects by bulk of n_bulk */ 154 idx = 0; 155 while (idx < n_keep) { 156 ret = rte_mempool_generic_get(mp, 157 &obj_table[idx], 158 n_get_bulk, 159 cache); 160 if (unlikely(ret < 0)) { 161 rte_mempool_dump(stdout, mp); 162 /* in this case, objects are lost... */ 163 GOTO_ERR(ret, out); 164 } 165 idx += n_get_bulk; 166 } 167 168 /* put the objects back */ 169 idx = 0; 170 while (idx < n_keep) { 171 rte_mempool_generic_put(mp, &obj_table[idx], 172 n_put_bulk, 173 cache); 174 idx += n_put_bulk; 175 } 176 } 177 end_cycles = rte_get_timer_cycles(); 178 time_diff = end_cycles - start_cycles; 179 stats[lcore_id].enq_count += N; 180 } 181 182 out: 183 if (use_external_cache) { 184 rte_mempool_cache_flush(cache, mp); 185 rte_mempool_cache_free(cache); 186 } 187 188 return ret; 189 } 190 191 /* launch all the per-lcore test, and display the result */ 192 static int 193 launch_cores(struct rte_mempool *mp, unsigned int cores) 194 { 195 unsigned lcore_id; 196 uint64_t rate; 197 int ret; 198 unsigned cores_save = cores; 199 200 __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED); 201 202 /* reset stats */ 203 memset(stats, 0, sizeof(stats)); 204 205 printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u " 206 "n_put_bulk=%u n_keep=%u ", 207 use_external_cache ? 208 external_cache_size : (unsigned) mp->cache_size, 209 cores, n_get_bulk, n_put_bulk, n_keep); 210 211 if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) { 212 printf("mempool is not full\n"); 213 return -1; 214 } 215 216 RTE_LCORE_FOREACH_WORKER(lcore_id) { 217 if (cores == 1) 218 break; 219 cores--; 220 rte_eal_remote_launch(per_lcore_mempool_test, 221 mp, lcore_id); 222 } 223 224 /* start synchro and launch test on main */ 225 __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED); 226 227 ret = per_lcore_mempool_test(mp); 228 229 cores = cores_save; 230 RTE_LCORE_FOREACH_WORKER(lcore_id) { 231 if (cores == 1) 232 break; 233 cores--; 234 if (rte_eal_wait_lcore(lcore_id) < 0) 235 ret = -1; 236 } 237 238 if (ret < 0) { 239 printf("per-lcore test returned -1\n"); 240 return -1; 241 } 242 243 rate = 0; 244 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) 245 rate += (stats[lcore_id].enq_count / TIME_S); 246 247 printf("rate_persec=%" PRIu64 "\n", rate); 248 249 return 0; 250 } 251 252 /* for a given number of core, launch all test cases */ 253 static int 254 do_one_mempool_test(struct rte_mempool *mp, unsigned int cores) 255 { 256 unsigned bulk_tab_get[] = { 1, 4, 32, 0 }; 257 unsigned bulk_tab_put[] = { 1, 4, 32, 0 }; 258 unsigned keep_tab[] = { 32, 128, 0 }; 259 unsigned *get_bulk_ptr; 260 unsigned *put_bulk_ptr; 261 unsigned *keep_ptr; 262 int ret; 263 264 for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) { 265 for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) { 266 for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) { 267 268 n_get_bulk = *get_bulk_ptr; 269 n_put_bulk = *put_bulk_ptr; 270 n_keep = *keep_ptr; 271 ret = launch_cores(mp, cores); 272 273 if (ret < 0) 274 return -1; 275 } 276 } 277 } 278 return 0; 279 } 280 281 static int 282 test_mempool_perf(void) 283 { 284 struct rte_mempool *mp_cache = NULL; 285 struct rte_mempool *mp_nocache = NULL; 286 struct rte_mempool *default_pool = NULL; 287 const char *default_pool_ops; 288 int ret = -1; 289 290 /* create a mempool (without cache) */ 291 mp_nocache = rte_mempool_create("perf_test_nocache", MEMPOOL_SIZE, 292 MEMPOOL_ELT_SIZE, 0, 0, 293 NULL, NULL, 294 my_obj_init, NULL, 295 SOCKET_ID_ANY, 0); 296 if (mp_nocache == NULL) 297 goto err; 298 299 /* create a mempool (with cache) */ 300 mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE, 301 MEMPOOL_ELT_SIZE, 302 RTE_MEMPOOL_CACHE_MAX_SIZE, 0, 303 NULL, NULL, 304 my_obj_init, NULL, 305 SOCKET_ID_ANY, 0); 306 if (mp_cache == NULL) 307 goto err; 308 309 default_pool_ops = rte_mbuf_best_mempool_ops(); 310 /* Create a mempool based on Default handler */ 311 default_pool = rte_mempool_create_empty("default_pool", 312 MEMPOOL_SIZE, 313 MEMPOOL_ELT_SIZE, 314 0, 0, 315 SOCKET_ID_ANY, 0); 316 317 if (default_pool == NULL) { 318 printf("cannot allocate %s mempool\n", default_pool_ops); 319 goto err; 320 } 321 322 if (rte_mempool_set_ops_byname(default_pool, default_pool_ops, NULL) 323 < 0) { 324 printf("cannot set %s handler\n", default_pool_ops); 325 goto err; 326 } 327 328 if (rte_mempool_populate_default(default_pool) < 0) { 329 printf("cannot populate %s mempool\n", default_pool_ops); 330 goto err; 331 } 332 333 rte_mempool_obj_iter(default_pool, my_obj_init, NULL); 334 335 /* performance test with 1, 2 and max cores */ 336 printf("start performance test (without cache)\n"); 337 338 if (do_one_mempool_test(mp_nocache, 1) < 0) 339 goto err; 340 341 if (do_one_mempool_test(mp_nocache, 2) < 0) 342 goto err; 343 344 if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0) 345 goto err; 346 347 /* performance test with 1, 2 and max cores */ 348 printf("start performance test for %s (without cache)\n", 349 default_pool_ops); 350 351 if (do_one_mempool_test(default_pool, 1) < 0) 352 goto err; 353 354 if (do_one_mempool_test(default_pool, 2) < 0) 355 goto err; 356 357 if (do_one_mempool_test(default_pool, rte_lcore_count()) < 0) 358 goto err; 359 360 /* performance test with 1, 2 and max cores */ 361 printf("start performance test (with cache)\n"); 362 363 if (do_one_mempool_test(mp_cache, 1) < 0) 364 goto err; 365 366 if (do_one_mempool_test(mp_cache, 2) < 0) 367 goto err; 368 369 if (do_one_mempool_test(mp_cache, rte_lcore_count()) < 0) 370 goto err; 371 372 /* performance test with 1, 2 and max cores */ 373 printf("start performance test (with user-owned cache)\n"); 374 use_external_cache = 1; 375 376 if (do_one_mempool_test(mp_nocache, 1) < 0) 377 goto err; 378 379 if (do_one_mempool_test(mp_nocache, 2) < 0) 380 goto err; 381 382 if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0) 383 goto err; 384 385 rte_mempool_list_dump(stdout); 386 387 ret = 0; 388 389 err: 390 rte_mempool_free(mp_cache); 391 rte_mempool_free(mp_nocache); 392 rte_mempool_free(default_pool); 393 return ret; 394 } 395 396 REGISTER_TEST_COMMAND(mempool_perf_autotest, test_mempool_perf); 397