1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <string.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <stdint.h> 38 #include <inttypes.h> 39 #include <stdarg.h> 40 #include <errno.h> 41 #include <sys/queue.h> 42 43 #include <rte_common.h> 44 #include <rte_log.h> 45 #include <rte_debug.h> 46 #include <rte_memory.h> 47 #include <rte_memzone.h> 48 #include <rte_launch.h> 49 #include <rte_cycles.h> 50 #include <rte_eal.h> 51 #include <rte_per_lcore.h> 52 #include <rte_lcore.h> 53 #include <rte_atomic.h> 54 #include <rte_branch_prediction.h> 55 #include <rte_mempool.h> 56 #include <rte_spinlock.h> 57 #include <rte_malloc.h> 58 59 #include "test.h" 60 61 /* 62 * Mempool performance 63 * ======= 64 * 65 * Each core get *n_keep* objects per bulk of *n_get_bulk*. Then, 66 * objects are put back in the pool per bulk of *n_put_bulk*. 67 * 68 * This sequence is done during TIME_S seconds. 69 * 70 * This test is done on the following configurations: 71 * 72 * - Cores configuration (*cores*) 73 * 74 * - One core with cache 75 * - Two cores with cache 76 * - Max. cores with cache 77 * - One core without cache 78 * - Two cores without cache 79 * - Max. cores without cache 80 * - One core with user-owned cache 81 * - Two cores with user-owned cache 82 * - Max. cores with user-owned cache 83 * 84 * - Bulk size (*n_get_bulk*, *n_put_bulk*) 85 * 86 * - Bulk get from 1 to 32 87 * - Bulk put from 1 to 32 88 * 89 * - Number of kept objects (*n_keep*) 90 * 91 * - 32 92 * - 128 93 */ 94 95 #define N 65536 96 #define TIME_S 5 97 #define MEMPOOL_ELT_SIZE 2048 98 #define MAX_KEEP 128 99 #define MEMPOOL_SIZE ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1) 100 101 #define LOG_ERR() printf("test failed at %s():%d\n", __func__, __LINE__) 102 #define RET_ERR() do { \ 103 LOG_ERR(); \ 104 return -1; \ 105 } while (0) 106 #define GOTO_ERR(var, label) do { \ 107 LOG_ERR(); \ 108 var = -1; \ 109 goto label; \ 110 } while (0) 111 112 static struct rte_mempool *mp; 113 static struct rte_mempool *mp_cache, *mp_nocache; 114 static int use_external_cache; 115 static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE; 116 117 static rte_atomic32_t synchro; 118 119 /* number of objects in one bulk operation (get or put) */ 120 static unsigned n_get_bulk; 121 static unsigned n_put_bulk; 122 123 /* number of objects retrived from mempool before putting them back */ 124 static unsigned n_keep; 125 126 /* number of enqueues / dequeues */ 127 struct mempool_test_stats { 128 uint64_t enq_count; 129 } __rte_cache_aligned; 130 131 static struct mempool_test_stats stats[RTE_MAX_LCORE]; 132 133 /* 134 * save the object number in the first 4 bytes of object data. All 135 * other bytes are set to 0. 136 */ 137 static void 138 my_obj_init(struct rte_mempool *mp, __attribute__((unused)) void *arg, 139 void *obj, unsigned i) 140 { 141 uint32_t *objnum = obj; 142 memset(obj, 0, mp->elt_size); 143 *objnum = i; 144 } 145 146 static int 147 per_lcore_mempool_test(__attribute__((unused)) void *arg) 148 { 149 void *obj_table[MAX_KEEP]; 150 unsigned i, idx; 151 unsigned lcore_id = rte_lcore_id(); 152 int ret = 0; 153 uint64_t start_cycles, end_cycles; 154 uint64_t time_diff = 0, hz = rte_get_timer_hz(); 155 struct rte_mempool_cache *cache; 156 157 if (use_external_cache) { 158 /* Create a user-owned mempool cache. */ 159 cache = rte_mempool_cache_create(external_cache_size, 160 SOCKET_ID_ANY); 161 if (cache == NULL) 162 RET_ERR(); 163 } else { 164 /* May be NULL if cache is disabled. */ 165 cache = rte_mempool_default_cache(mp, lcore_id); 166 } 167 168 /* n_get_bulk and n_put_bulk must be divisors of n_keep */ 169 if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep) 170 GOTO_ERR(ret, out); 171 if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep) 172 GOTO_ERR(ret, out); 173 174 stats[lcore_id].enq_count = 0; 175 176 /* wait synchro for slaves */ 177 if (lcore_id != rte_get_master_lcore()) 178 while (rte_atomic32_read(&synchro) == 0); 179 180 start_cycles = rte_get_timer_cycles(); 181 182 while (time_diff/hz < TIME_S) { 183 for (i = 0; likely(i < (N/n_keep)); i++) { 184 /* get n_keep objects by bulk of n_bulk */ 185 idx = 0; 186 while (idx < n_keep) { 187 ret = rte_mempool_generic_get(mp, 188 &obj_table[idx], 189 n_get_bulk, 190 cache, 0); 191 if (unlikely(ret < 0)) { 192 rte_mempool_dump(stdout, mp); 193 /* in this case, objects are lost... */ 194 GOTO_ERR(ret, out); 195 } 196 idx += n_get_bulk; 197 } 198 199 /* put the objects back */ 200 idx = 0; 201 while (idx < n_keep) { 202 rte_mempool_generic_put(mp, &obj_table[idx], 203 n_put_bulk, 204 cache, 0); 205 idx += n_put_bulk; 206 } 207 } 208 end_cycles = rte_get_timer_cycles(); 209 time_diff = end_cycles - start_cycles; 210 stats[lcore_id].enq_count += N; 211 } 212 213 out: 214 if (use_external_cache) { 215 rte_mempool_cache_flush(cache, mp); 216 rte_mempool_cache_free(cache); 217 } 218 219 return ret; 220 } 221 222 /* launch all the per-lcore test, and display the result */ 223 static int 224 launch_cores(unsigned cores) 225 { 226 unsigned lcore_id; 227 uint64_t rate; 228 int ret; 229 unsigned cores_save = cores; 230 231 rte_atomic32_set(&synchro, 0); 232 233 /* reset stats */ 234 memset(stats, 0, sizeof(stats)); 235 236 printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u " 237 "n_put_bulk=%u n_keep=%u ", 238 use_external_cache ? 239 external_cache_size : (unsigned) mp->cache_size, 240 cores, n_get_bulk, n_put_bulk, n_keep); 241 242 if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) { 243 printf("mempool is not full\n"); 244 return -1; 245 } 246 247 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 248 if (cores == 1) 249 break; 250 cores--; 251 rte_eal_remote_launch(per_lcore_mempool_test, 252 NULL, lcore_id); 253 } 254 255 /* start synchro and launch test on master */ 256 rte_atomic32_set(&synchro, 1); 257 258 ret = per_lcore_mempool_test(NULL); 259 260 cores = cores_save; 261 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 262 if (cores == 1) 263 break; 264 cores--; 265 if (rte_eal_wait_lcore(lcore_id) < 0) 266 ret = -1; 267 } 268 269 if (ret < 0) { 270 printf("per-lcore test returned -1\n"); 271 return -1; 272 } 273 274 rate = 0; 275 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) 276 rate += (stats[lcore_id].enq_count / TIME_S); 277 278 printf("rate_persec=%" PRIu64 "\n", rate); 279 280 return 0; 281 } 282 283 /* for a given number of core, launch all test cases */ 284 static int 285 do_one_mempool_test(unsigned cores) 286 { 287 unsigned bulk_tab_get[] = { 1, 4, 32, 0 }; 288 unsigned bulk_tab_put[] = { 1, 4, 32, 0 }; 289 unsigned keep_tab[] = { 32, 128, 0 }; 290 unsigned *get_bulk_ptr; 291 unsigned *put_bulk_ptr; 292 unsigned *keep_ptr; 293 int ret; 294 295 for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) { 296 for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) { 297 for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) { 298 299 n_get_bulk = *get_bulk_ptr; 300 n_put_bulk = *put_bulk_ptr; 301 n_keep = *keep_ptr; 302 ret = launch_cores(cores); 303 304 if (ret < 0) 305 return -1; 306 } 307 } 308 } 309 return 0; 310 } 311 312 static int 313 test_mempool_perf(void) 314 { 315 rte_atomic32_init(&synchro); 316 317 /* create a mempool (without cache) */ 318 if (mp_nocache == NULL) 319 mp_nocache = rte_mempool_create("perf_test_nocache", MEMPOOL_SIZE, 320 MEMPOOL_ELT_SIZE, 0, 0, 321 NULL, NULL, 322 my_obj_init, NULL, 323 SOCKET_ID_ANY, 0); 324 if (mp_nocache == NULL) 325 return -1; 326 327 /* create a mempool (with cache) */ 328 if (mp_cache == NULL) 329 mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE, 330 MEMPOOL_ELT_SIZE, 331 RTE_MEMPOOL_CACHE_MAX_SIZE, 0, 332 NULL, NULL, 333 my_obj_init, NULL, 334 SOCKET_ID_ANY, 0); 335 if (mp_cache == NULL) 336 return -1; 337 338 /* performance test with 1, 2 and max cores */ 339 printf("start performance test (without cache)\n"); 340 mp = mp_nocache; 341 342 if (do_one_mempool_test(1) < 0) 343 return -1; 344 345 if (do_one_mempool_test(2) < 0) 346 return -1; 347 348 if (do_one_mempool_test(rte_lcore_count()) < 0) 349 return -1; 350 351 /* performance test with 1, 2 and max cores */ 352 printf("start performance test (with cache)\n"); 353 mp = mp_cache; 354 355 if (do_one_mempool_test(1) < 0) 356 return -1; 357 358 if (do_one_mempool_test(2) < 0) 359 return -1; 360 361 if (do_one_mempool_test(rte_lcore_count()) < 0) 362 return -1; 363 364 /* performance test with 1, 2 and max cores */ 365 printf("start performance test (with user-owned cache)\n"); 366 mp = mp_nocache; 367 use_external_cache = 1; 368 369 if (do_one_mempool_test(1) < 0) 370 return -1; 371 372 if (do_one_mempool_test(2) < 0) 373 return -1; 374 375 if (do_one_mempool_test(rte_lcore_count()) < 0) 376 return -1; 377 378 rte_mempool_list_dump(stdout); 379 380 return 0; 381 } 382 383 REGISTER_TEST_COMMAND(mempool_perf_autotest, test_mempool_perf); 384