1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5 /* 6 * This is a simple functional test for rte_smp_mb() implementation. 7 * I.E. make sure that LOAD and STORE operations that precede the 8 * rte_smp_mb() call are globally visible across the lcores 9 * before the the LOAD and STORE operations that follows it. 10 * The test uses simple implementation of Peterson's lock algorithm 11 * (https://en.wikipedia.org/wiki/Peterson%27s_algorithm) 12 * for two execution units to make sure that rte_smp_mb() prevents 13 * store-load reordering to happen. 14 * Also when executed on a single lcore could be used as a approxiamate 15 * estimation of number of cycles particular implementation of rte_smp_mb() 16 * will take. 17 */ 18 19 #include <stdio.h> 20 #include <string.h> 21 #include <stdint.h> 22 #include <inttypes.h> 23 24 #include <rte_memory.h> 25 #include <rte_per_lcore.h> 26 #include <rte_launch.h> 27 #include <rte_atomic.h> 28 #include <rte_eal.h> 29 #include <rte_lcore.h> 30 #include <rte_pause.h> 31 #include <rte_random.h> 32 #include <rte_cycles.h> 33 #include <rte_vect.h> 34 #include <rte_debug.h> 35 36 #include "test.h" 37 38 #define ADD_MAX 8 39 #define ITER_MAX 0x1000000 40 41 enum plock_use_type { 42 USE_MB, 43 USE_SMP_MB, 44 USE_NUM 45 }; 46 47 struct plock { 48 volatile uint32_t flag[2]; 49 volatile uint32_t victim; 50 enum plock_use_type utype; 51 }; 52 53 /* 54 * Lock plus protected by it two counters. 55 */ 56 struct plock_test { 57 struct plock lock; 58 uint64_t val; 59 uint64_t iter; 60 }; 61 62 /* 63 * Each active lcore shares plock_test struct with it's left and right 64 * neighbours. 65 */ 66 struct lcore_plock_test { 67 struct plock_test *pt[2]; /* shared, lock-protected data */ 68 uint64_t sum[2]; /* local copy of the shared data */ 69 uint64_t iter; /* number of iterations to perfom */ 70 uint32_t lc; /* given lcore id */ 71 }; 72 73 static inline void 74 store_load_barrier(uint32_t utype) 75 { 76 if (utype == USE_MB) 77 rte_mb(); 78 else if (utype == USE_SMP_MB) 79 rte_smp_mb(); 80 else 81 RTE_VERIFY(0); 82 } 83 84 /* 85 * Peterson lock implementation. 86 */ 87 static void 88 plock_lock(struct plock *l, uint32_t self) 89 { 90 uint32_t other; 91 92 other = self ^ 1; 93 94 l->flag[self] = 1; 95 rte_smp_wmb(); 96 l->victim = self; 97 98 store_load_barrier(l->utype); 99 100 while (l->flag[other] == 1 && l->victim == self) 101 rte_pause(); 102 rte_smp_rmb(); 103 } 104 105 static void 106 plock_unlock(struct plock *l, uint32_t self) 107 { 108 rte_smp_wmb(); 109 l->flag[self] = 0; 110 } 111 112 static void 113 plock_reset(struct plock *l, enum plock_use_type utype) 114 { 115 memset(l, 0, sizeof(*l)); 116 l->utype = utype; 117 } 118 119 /* 120 * grab the lock, update both counters, release the lock. 121 */ 122 static void 123 plock_add(struct plock_test *pt, uint32_t self, uint32_t n) 124 { 125 plock_lock(&pt->lock, self); 126 pt->iter++; 127 pt->val += n; 128 plock_unlock(&pt->lock, self); 129 } 130 131 static int 132 plock_test1_lcore(void *data) 133 { 134 uint64_t tm; 135 uint32_t lc, ln; 136 uint64_t i, n; 137 struct lcore_plock_test *lpt; 138 139 lpt = data; 140 lc = rte_lcore_id(); 141 142 /* find lcore_plock_test struct for given lcore */ 143 for (ln = rte_lcore_count(); ln != 0 && lpt->lc != lc; lpt++, ln--) 144 ; 145 146 if (ln == 0) { 147 printf("%s(%u) error at init\n", __func__, lc); 148 return -1; 149 } 150 151 n = rte_rand() % ADD_MAX; 152 tm = rte_get_timer_cycles(); 153 154 /* 155 * for each iteration: 156 * - update shared, locked protected data in a safe manner 157 * - update local copy of the shared data 158 */ 159 for (i = 0; i != lpt->iter; i++) { 160 161 plock_add(lpt->pt[0], 0, n); 162 plock_add(lpt->pt[1], 1, n); 163 164 lpt->sum[0] += n; 165 lpt->sum[1] += n; 166 167 n = (n + 1) % ADD_MAX; 168 } 169 170 tm = rte_get_timer_cycles() - tm; 171 172 printf("%s(%u): %" PRIu64 " iterations finished, in %" PRIu64 173 " cycles, %#Lf cycles/iteration, " 174 "local sum={%" PRIu64 ", %" PRIu64 "}\n", 175 __func__, lc, i, tm, (long double)tm / i, 176 lpt->sum[0], lpt->sum[1]); 177 return 0; 178 } 179 180 /* 181 * For N active lcores we allocate N+1 lcore_plock_test structures. 182 * Each active lcore shares one lcore_plock_test structure with its 183 * left lcore neighbor and one lcore_plock_test structure with its 184 * right lcore neighbor. 185 * During the test each lcore updates data in both shared structures and 186 * its local copies. Then at validation phase we check that our shared 187 * and local data are the same. 188 */ 189 static int 190 plock_test(uint64_t iter, enum plock_use_type utype) 191 { 192 int32_t rc; 193 uint32_t i, lc, n; 194 uint64_t *sum; 195 struct plock_test *pt; 196 struct lcore_plock_test *lpt; 197 198 /* init phase, allocate and initialize shared data */ 199 200 n = rte_lcore_count(); 201 pt = calloc(n + 1, sizeof(*pt)); 202 lpt = calloc(n, sizeof(*lpt)); 203 sum = calloc(n + 1, sizeof(*sum)); 204 205 printf("%s(iter=%" PRIu64 ", utype=%u) started on %u lcores\n", 206 __func__, iter, utype, n); 207 208 if (pt == NULL || lpt == NULL || sum == NULL) { 209 printf("%s: failed to allocate memory for %u lcores\n", 210 __func__, n); 211 free(pt); 212 free(lpt); 213 free(sum); 214 return -ENOMEM; 215 } 216 217 for (i = 0; i != n + 1; i++) 218 plock_reset(&pt[i].lock, utype); 219 220 i = 0; 221 RTE_LCORE_FOREACH(lc) { 222 223 lpt[i].lc = lc; 224 lpt[i].iter = iter; 225 lpt[i].pt[0] = pt + i; 226 lpt[i].pt[1] = pt + i + 1; 227 i++; 228 } 229 230 lpt[i - 1].pt[1] = pt; 231 232 for (i = 0; i != n; i++) 233 printf("lpt[%u]={lc=%u, pt={%p, %p},};\n", 234 i, lpt[i].lc, lpt[i].pt[0], lpt[i].pt[1]); 235 236 237 /* test phase - start and wait for completion on each active lcore */ 238 239 rte_eal_mp_remote_launch(plock_test1_lcore, lpt, CALL_MAIN); 240 rte_eal_mp_wait_lcore(); 241 242 /* validation phase - make sure that shared and local data match */ 243 244 for (i = 0; i != n; i++) { 245 sum[i] += lpt[i].sum[0]; 246 sum[i + 1] += lpt[i].sum[1]; 247 } 248 249 sum[0] += sum[i]; 250 251 rc = 0; 252 for (i = 0; i != n; i++) { 253 printf("%s: sum[%u]=%" PRIu64 ", pt[%u].val=%" PRIu64 ", pt[%u].iter=%" PRIu64 ";\n", 254 __func__, i, sum[i], i, pt[i].val, i, pt[i].iter); 255 256 /* race condition occurred, lock doesn't work properly */ 257 if (sum[i] != pt[i].val || 2 * iter != pt[i].iter) { 258 printf("error: local and shared sums don't match\n"); 259 rc = -1; 260 } 261 } 262 263 free(pt); 264 free(lpt); 265 free(sum); 266 267 printf("%s(utype=%u) returns %d\n", __func__, utype, rc); 268 return rc; 269 } 270 271 static int 272 test_barrier(void) 273 { 274 int32_t i, ret, rc[USE_NUM]; 275 276 for (i = 0; i != RTE_DIM(rc); i++) 277 rc[i] = plock_test(ITER_MAX, i); 278 279 ret = 0; 280 for (i = 0; i != RTE_DIM(rc); i++) { 281 printf("%s for utype=%d %s\n", 282 __func__, i, rc[i] == 0 ? "passed" : "failed"); 283 ret |= rc[i]; 284 } 285 286 return ret; 287 } 288 289 REGISTER_TEST_COMMAND(barrier_autotest, test_barrier); 290