1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <string.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <stdint.h> 38 #include <inttypes.h> 39 #include <stdarg.h> 40 #include <errno.h> 41 #include <sys/queue.h> 42 43 #include <rte_common.h> 44 #include <rte_log.h> 45 #include <rte_debug.h> 46 #include <rte_memory.h> 47 #include <rte_memzone.h> 48 #include <rte_launch.h> 49 #include <rte_cycles.h> 50 #include <rte_eal.h> 51 #include <rte_per_lcore.h> 52 #include <rte_lcore.h> 53 #include <rte_atomic.h> 54 #include <rte_branch_prediction.h> 55 #include <rte_ring.h> 56 #include <rte_mempool.h> 57 #include <rte_spinlock.h> 58 #include <rte_malloc.h> 59 60 #include "test.h" 61 62 /* 63 * Mempool performance 64 * ======= 65 * 66 * Each core get *n_keep* objects per bulk of *n_get_bulk*. Then, 67 * objects are put back in the pool per bulk of *n_put_bulk*. 68 * 69 * This sequence is done during TIME_S seconds. 70 * 71 * This test is done on the following configurations: 72 * 73 * - Cores configuration (*cores*) 74 * 75 * - One core with cache 76 * - Two cores with cache 77 * - Max. cores with cache 78 * - One core without cache 79 * - Two cores without cache 80 * - Max. cores without cache 81 * 82 * - Bulk size (*n_get_bulk*, *n_put_bulk*) 83 * 84 * - Bulk get from 1 to 32 85 * - Bulk put from 1 to 32 86 * 87 * - Number of kept objects (*n_keep*) 88 * 89 * - 32 90 * - 128 91 */ 92 93 #define N 65536 94 #define TIME_S 5 95 #define MEMPOOL_ELT_SIZE 2048 96 #define MAX_KEEP 128 97 #define MEMPOOL_SIZE ((RTE_MAX_LCORE*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1) 98 99 static struct rte_mempool *mp; 100 static struct rte_mempool *mp_cache, *mp_nocache; 101 102 static rte_atomic32_t synchro; 103 104 /* number of objects in one bulk operation (get or put) */ 105 static unsigned n_get_bulk; 106 static unsigned n_put_bulk; 107 108 /* number of objects retrived from mempool before putting them back */ 109 static unsigned n_keep; 110 111 /* number of enqueues / dequeues */ 112 struct mempool_test_stats { 113 unsigned enq_count; 114 } __rte_cache_aligned; 115 116 static struct mempool_test_stats stats[RTE_MAX_LCORE]; 117 118 /* 119 * save the object number in the first 4 bytes of object data. All 120 * other bytes are set to 0. 121 */ 122 static void 123 my_obj_init(struct rte_mempool *mp, __attribute__((unused)) void *arg, 124 void *obj, unsigned i) 125 { 126 uint32_t *objnum = obj; 127 memset(obj, 0, mp->elt_size); 128 *objnum = i; 129 } 130 131 static int 132 per_lcore_mempool_test(__attribute__((unused)) void *arg) 133 { 134 void *obj_table[MAX_KEEP]; 135 unsigned i, idx; 136 unsigned lcore_id = rte_lcore_id(); 137 int ret; 138 uint64_t start_cycles, end_cycles; 139 uint64_t time_diff = 0, hz = rte_get_timer_hz(); 140 141 /* n_get_bulk and n_put_bulk must be divisors of n_keep */ 142 if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep) 143 return -1; 144 if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep) 145 return -1; 146 147 stats[lcore_id].enq_count = 0; 148 149 /* wait synchro for slaves */ 150 if (lcore_id != rte_get_master_lcore()) 151 while (rte_atomic32_read(&synchro) == 0); 152 153 start_cycles = rte_get_timer_cycles(); 154 155 while (time_diff/hz < TIME_S) { 156 for (i = 0; likely(i < (N/n_keep)); i++) { 157 /* get n_keep objects by bulk of n_bulk */ 158 idx = 0; 159 while (idx < n_keep) { 160 ret = rte_mempool_get_bulk(mp, &obj_table[idx], 161 n_get_bulk); 162 if (unlikely(ret < 0)) { 163 rte_mempool_dump(stdout, mp); 164 rte_ring_dump(stdout, mp->ring); 165 /* in this case, objects are lost... */ 166 return -1; 167 } 168 idx += n_get_bulk; 169 } 170 171 /* put the objects back */ 172 idx = 0; 173 while (idx < n_keep) { 174 rte_mempool_put_bulk(mp, &obj_table[idx], 175 n_put_bulk); 176 idx += n_put_bulk; 177 } 178 } 179 end_cycles = rte_get_timer_cycles(); 180 time_diff = end_cycles - start_cycles; 181 stats[lcore_id].enq_count += N; 182 } 183 184 return 0; 185 } 186 187 /* launch all the per-lcore test, and display the result */ 188 static int 189 launch_cores(unsigned cores) 190 { 191 unsigned lcore_id; 192 unsigned rate; 193 int ret; 194 unsigned cores_save = cores; 195 196 rte_atomic32_set(&synchro, 0); 197 198 /* reset stats */ 199 memset(stats, 0, sizeof(stats)); 200 201 printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u " 202 "n_put_bulk=%u n_keep=%u ", 203 (unsigned) mp->cache_size, cores, n_get_bulk, n_put_bulk, n_keep); 204 205 if (rte_mempool_count(mp) != MEMPOOL_SIZE) { 206 printf("mempool is not full\n"); 207 return -1; 208 } 209 210 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 211 if (cores == 1) 212 break; 213 cores--; 214 rte_eal_remote_launch(per_lcore_mempool_test, 215 NULL, lcore_id); 216 } 217 218 /* start synchro and launch test on master */ 219 rte_atomic32_set(&synchro, 1); 220 221 ret = per_lcore_mempool_test(NULL); 222 223 cores = cores_save; 224 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 225 if (cores == 1) 226 break; 227 cores--; 228 if (rte_eal_wait_lcore(lcore_id) < 0) 229 ret = -1; 230 } 231 232 if (ret < 0) { 233 printf("per-lcore test returned -1\n"); 234 return -1; 235 } 236 237 rate = 0; 238 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) 239 rate += (stats[lcore_id].enq_count / TIME_S); 240 241 printf("rate_persec=%u\n", rate); 242 243 return 0; 244 } 245 246 /* for a given number of core, launch all test cases */ 247 static int 248 do_one_mempool_test(unsigned cores) 249 { 250 unsigned bulk_tab_get[] = { 1, 4, 32, 0 }; 251 unsigned bulk_tab_put[] = { 1, 4, 32, 0 }; 252 unsigned keep_tab[] = { 32, 128, 0 }; 253 unsigned *get_bulk_ptr; 254 unsigned *put_bulk_ptr; 255 unsigned *keep_ptr; 256 int ret; 257 258 for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) { 259 for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) { 260 for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) { 261 262 n_get_bulk = *get_bulk_ptr; 263 n_put_bulk = *put_bulk_ptr; 264 n_keep = *keep_ptr; 265 ret = launch_cores(cores); 266 267 if (ret < 0) 268 return -1; 269 } 270 } 271 } 272 return 0; 273 } 274 275 static int 276 test_mempool_perf(void) 277 { 278 rte_atomic32_init(&synchro); 279 280 /* create a mempool (without cache) */ 281 if (mp_nocache == NULL) 282 mp_nocache = rte_mempool_create("perf_test_nocache", MEMPOOL_SIZE, 283 MEMPOOL_ELT_SIZE, 0, 0, 284 NULL, NULL, 285 my_obj_init, NULL, 286 SOCKET_ID_ANY, 0); 287 if (mp_nocache == NULL) 288 return -1; 289 290 /* create a mempool (with cache) */ 291 if (mp_cache == NULL) 292 mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE, 293 MEMPOOL_ELT_SIZE, 294 RTE_MEMPOOL_CACHE_MAX_SIZE, 0, 295 NULL, NULL, 296 my_obj_init, NULL, 297 SOCKET_ID_ANY, 0); 298 if (mp_cache == NULL) 299 return -1; 300 301 /* performance test with 1, 2 and max cores */ 302 printf("start performance test (without cache)\n"); 303 mp = mp_nocache; 304 305 if (do_one_mempool_test(1) < 0) 306 return -1; 307 308 if (do_one_mempool_test(2) < 0) 309 return -1; 310 311 if (do_one_mempool_test(rte_lcore_count()) < 0) 312 return -1; 313 314 /* performance test with 1, 2 and max cores */ 315 printf("start performance test (with cache)\n"); 316 mp = mp_cache; 317 318 if (do_one_mempool_test(1) < 0) 319 return -1; 320 321 if (do_one_mempool_test(2) < 0) 322 return -1; 323 324 if (do_one_mempool_test(rte_lcore_count()) < 0) 325 return -1; 326 327 rte_mempool_list_dump(stdout); 328 329 return 0; 330 } 331 332 static struct test_command mempool_perf_cmd = { 333 .command = "mempool_perf_autotest", 334 .callback = test_mempool_perf, 335 }; 336 REGISTER_TEST_COMMAND(mempool_perf_cmd); 337