1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Cavium, Inc 3 */ 4 5 #ifndef _TEST_PERF_COMMON_ 6 #define _TEST_PERF_COMMON_ 7 8 #include <stdio.h> 9 #include <stdbool.h> 10 #include <unistd.h> 11 12 #include <rte_cryptodev.h> 13 #include <rte_cycles.h> 14 #include <rte_ethdev.h> 15 #include <rte_event_crypto_adapter.h> 16 #include <rte_event_eth_rx_adapter.h> 17 #include <rte_event_eth_tx_adapter.h> 18 #include <rte_event_timer_adapter.h> 19 #include <rte_eventdev.h> 20 #include <rte_lcore.h> 21 #include <rte_malloc.h> 22 #include <rte_mempool.h> 23 #include <rte_prefetch.h> 24 25 #include "evt_common.h" 26 #include "evt_options.h" 27 #include "evt_test.h" 28 29 #define TEST_PERF_CA_ID 0 30 31 struct test_perf; 32 33 struct worker_data { 34 uint64_t processed_pkts; 35 uint64_t latency; 36 uint8_t dev_id; 37 uint8_t port_id; 38 struct test_perf *t; 39 } __rte_cache_aligned; 40 41 struct crypto_adptr_data { 42 uint8_t cdev_id; 43 uint16_t cdev_qp_id; 44 void **crypto_sess; 45 }; 46 struct prod_data { 47 uint8_t dev_id; 48 uint8_t port_id; 49 uint8_t queue_id; 50 struct crypto_adptr_data ca; 51 struct test_perf *t; 52 } __rte_cache_aligned; 53 54 struct test_perf { 55 /* Don't change the offset of "done". Signal handler use this memory 56 * to terminate all lcores work. 57 */ 58 int done; 59 uint64_t outstand_pkts; 60 uint8_t nb_workers; 61 enum evt_test_result result; 62 uint32_t nb_flows; 63 uint64_t nb_pkts; 64 struct rte_mempool *pool; 65 struct prod_data prod[EVT_MAX_PORTS]; 66 struct worker_data worker[EVT_MAX_PORTS]; 67 struct evt_options *opt; 68 uint8_t sched_type_list[EVT_MAX_STAGES] __rte_cache_aligned; 69 struct rte_event_timer_adapter *timer_adptr[ 70 RTE_EVENT_TIMER_ADAPTER_NUM_MAX] __rte_cache_aligned; 71 struct rte_mempool *ca_op_pool; 72 struct rte_mempool *ca_sess_pool; 73 struct rte_mempool *ca_asym_sess_pool; 74 struct rte_mempool *ca_vector_pool; 75 } __rte_cache_aligned; 76 77 struct perf_elt { 78 union { 79 struct rte_event_timer tim; 80 struct { 81 char pad[offsetof(struct rte_event_timer, user_meta)]; 82 uint64_t timestamp; 83 }; 84 }; 85 } __rte_cache_aligned; 86 87 #define BURST_SIZE 16 88 #define MAX_PROD_ENQ_BURST_SIZE 128 89 90 #define PERF_WORKER_INIT\ 91 struct worker_data *w = arg;\ 92 struct test_perf *t = w->t;\ 93 struct evt_options *opt = t->opt;\ 94 const uint8_t dev = w->dev_id;\ 95 const uint8_t port = w->port_id;\ 96 const uint8_t prod_timer_type = \ 97 opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR;\ 98 const uint8_t prod_crypto_type = \ 99 opt->prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR;\ 100 uint8_t *const sched_type_list = &t->sched_type_list[0];\ 101 struct rte_mempool *const pool = t->pool;\ 102 const uint8_t nb_stages = t->opt->nb_stages;\ 103 const uint8_t laststage = nb_stages - 1;\ 104 uint8_t cnt = 0;\ 105 void *bufs[16] __rte_cache_aligned;\ 106 int const sz = RTE_DIM(bufs);\ 107 uint8_t stage;\ 108 struct perf_elt *pe = NULL;\ 109 if (opt->verbose_level > 1)\ 110 printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\ 111 rte_lcore_id(), dev, port) 112 113 static __rte_always_inline void 114 perf_mark_fwd_latency(struct perf_elt *const pe) 115 { 116 pe->timestamp = rte_get_timer_cycles(); 117 } 118 119 static __rte_always_inline int 120 perf_handle_crypto_ev(struct rte_event *ev, struct perf_elt **pe, int enable_fwd_latency) 121 { 122 struct rte_crypto_op *op = ev->event_ptr; 123 struct rte_mbuf *m; 124 125 126 if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) { 127 rte_crypto_op_free(op); 128 return op->status; 129 } 130 131 /* Forward latency not enabled - perf data will not be accessed */ 132 if (!enable_fwd_latency) 133 return 0; 134 135 /* Get pointer to perf data */ 136 if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 137 if (op->sym->m_dst == NULL) 138 m = op->sym->m_src; 139 else 140 m = op->sym->m_dst; 141 *pe = rte_pktmbuf_mtod(m, struct perf_elt *); 142 } else { 143 *pe = RTE_PTR_ADD(op->asym->modex.result.data, op->asym->modex.result.length); 144 } 145 146 return 0; 147 } 148 149 static __rte_always_inline struct perf_elt * 150 perf_elt_from_vec_get(struct rte_event_vector *vec) 151 { 152 /* Timestamp for vector event stored in first element */ 153 struct rte_crypto_op *cop = vec->ptrs[0]; 154 struct rte_mbuf *m; 155 156 if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 157 m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst; 158 return rte_pktmbuf_mtod(m, struct perf_elt *); 159 } else { 160 return RTE_PTR_ADD(cop->asym->modex.result.data, cop->asym->modex.result.length); 161 } 162 } 163 164 static __rte_always_inline int 165 perf_handle_crypto_vector_ev(struct rte_event *ev, struct perf_elt **pe, 166 const int enable_fwd_latency) 167 { 168 struct rte_event_vector *vec = ev->vec; 169 struct rte_crypto_op *cop; 170 struct rte_mbuf *m; 171 int i, n = 0; 172 void *data; 173 174 for (i = 0; i < vec->nb_elem; i++) { 175 cop = vec->ptrs[i]; 176 if (unlikely(cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) { 177 if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 178 m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst; 179 rte_pktmbuf_free(m); 180 } else { 181 data = cop->asym->modex.result.data; 182 rte_mempool_put(rte_mempool_from_obj(data), data); 183 } 184 rte_crypto_op_free(cop); 185 continue; 186 } 187 vec->ptrs[n++] = cop; 188 } 189 190 /* All cops failed, free the vector */ 191 if (n == 0) { 192 rte_mempool_put(rte_mempool_from_obj(vec), vec); 193 return -ENOENT; 194 } 195 196 vec->nb_elem = n; 197 198 /* Forward latency not enabled - perf data will be not accessed */ 199 if (!enable_fwd_latency) 200 return 0; 201 202 /* Get pointer to perf data */ 203 *pe = perf_elt_from_vec_get(vec); 204 205 return 0; 206 } 207 208 static __rte_always_inline int 209 perf_process_last_stage(struct rte_mempool *const pool, uint8_t prod_crypto_type, 210 struct rte_event *const ev, struct worker_data *const w, 211 void *bufs[], int const buf_sz, uint8_t count) 212 { 213 void *to_free_in_bulk; 214 215 /* release fence here ensures event_prt is 216 * stored before updating the number of 217 * processed packets for worker lcores 218 */ 219 rte_atomic_thread_fence(__ATOMIC_RELEASE); 220 w->processed_pkts++; 221 222 if (prod_crypto_type) { 223 struct rte_crypto_op *op = ev->event_ptr; 224 struct rte_mbuf *m; 225 226 if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 227 if (op->sym->m_dst == NULL) 228 m = op->sym->m_src; 229 else 230 m = op->sym->m_dst; 231 232 to_free_in_bulk = m; 233 } else { 234 to_free_in_bulk = op->asym->modex.result.data; 235 } 236 rte_crypto_op_free(op); 237 } else { 238 to_free_in_bulk = ev->event_ptr; 239 } 240 241 bufs[count++] = to_free_in_bulk; 242 if (unlikely(count == buf_sz)) { 243 count = 0; 244 rte_mempool_put_bulk(pool, bufs, buf_sz); 245 } 246 247 return count; 248 } 249 250 static __rte_always_inline uint8_t 251 perf_process_last_stage_latency(struct rte_mempool *const pool, uint8_t prod_crypto_type, 252 struct rte_event *const ev, struct worker_data *const w, 253 void *bufs[], int const buf_sz, uint8_t count) 254 { 255 uint64_t latency; 256 struct perf_elt *pe; 257 void *to_free_in_bulk; 258 259 /* Release fence here ensures event_prt is stored before updating the number of processed 260 * packets for worker lcores. 261 */ 262 rte_atomic_thread_fence(__ATOMIC_RELEASE); 263 w->processed_pkts++; 264 265 if (prod_crypto_type) { 266 struct rte_crypto_op *op = ev->event_ptr; 267 struct rte_mbuf *m; 268 269 if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 270 if (op->sym->m_dst == NULL) 271 m = op->sym->m_src; 272 else 273 m = op->sym->m_dst; 274 275 to_free_in_bulk = m; 276 pe = rte_pktmbuf_mtod(m, struct perf_elt *); 277 } else { 278 pe = RTE_PTR_ADD(op->asym->modex.result.data, 279 op->asym->modex.result.length); 280 to_free_in_bulk = op->asym->modex.result.data; 281 } 282 rte_crypto_op_free(op); 283 } else { 284 pe = ev->event_ptr; 285 to_free_in_bulk = pe; 286 } 287 288 latency = rte_get_timer_cycles() - pe->timestamp; 289 w->latency += latency; 290 291 bufs[count++] = to_free_in_bulk; 292 if (unlikely(count == buf_sz)) { 293 count = 0; 294 rte_mempool_put_bulk(pool, bufs, buf_sz); 295 } 296 297 return count; 298 } 299 300 static __rte_always_inline void 301 perf_process_vector_last_stage(struct rte_mempool *const pool, 302 struct rte_mempool *const ca_pool, struct rte_event *const ev, 303 struct worker_data *const w, const bool enable_fwd_latency) 304 { 305 struct rte_event_vector *vec = ev->vec; 306 struct rte_crypto_op *cop; 307 void *bufs[vec->nb_elem]; 308 struct perf_elt *pe; 309 uint64_t latency; 310 int i; 311 312 /* Release fence here ensures event_prt is stored before updating the number of processed 313 * packets for worker lcores. 314 */ 315 rte_atomic_thread_fence(__ATOMIC_RELEASE); 316 w->processed_pkts += vec->nb_elem; 317 318 if (enable_fwd_latency) { 319 pe = perf_elt_from_vec_get(vec); 320 latency = rte_get_timer_cycles() - pe->timestamp; 321 w->latency += latency; 322 } 323 324 for (i = 0; i < vec->nb_elem; i++) { 325 cop = vec->ptrs[i]; 326 if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) 327 bufs[i] = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst; 328 else 329 bufs[i] = cop->asym->modex.result.data; 330 } 331 332 rte_mempool_put_bulk(pool, bufs, vec->nb_elem); 333 rte_mempool_put_bulk(ca_pool, (void * const *)vec->ptrs, vec->nb_elem); 334 rte_mempool_put(rte_mempool_from_obj(vec), vec); 335 } 336 337 static inline int 338 perf_nb_event_ports(struct evt_options *opt) 339 { 340 return evt_nr_active_lcores(opt->wlcores) + 341 evt_nr_active_lcores(opt->plcores); 342 } 343 344 int perf_test_result(struct evt_test *test, struct evt_options *opt); 345 int perf_opt_check(struct evt_options *opt, uint64_t nb_queues); 346 int perf_test_setup(struct evt_test *test, struct evt_options *opt); 347 int perf_ethdev_setup(struct evt_test *test, struct evt_options *opt); 348 int perf_cryptodev_setup(struct evt_test *test, struct evt_options *opt); 349 int perf_mempool_setup(struct evt_test *test, struct evt_options *opt); 350 int perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, 351 uint8_t stride, uint8_t nb_queues, 352 const struct rte_event_port_conf *port_conf); 353 int perf_event_dev_service_setup(uint8_t dev_id); 354 int perf_launch_lcores(struct evt_test *test, struct evt_options *opt, 355 int (*worker)(void *)); 356 void perf_opt_dump(struct evt_options *opt, uint8_t nb_queues); 357 void perf_test_destroy(struct evt_test *test, struct evt_options *opt); 358 void perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt); 359 void perf_cryptodev_destroy(struct evt_test *test, struct evt_options *opt); 360 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt); 361 void perf_ethdev_rx_stop(struct evt_test *test, struct evt_options *opt); 362 void perf_mempool_destroy(struct evt_test *test, struct evt_options *opt); 363 void perf_worker_cleanup(struct rte_mempool *const pool, uint8_t dev_id, 364 uint8_t port_id, struct rte_event events[], 365 uint16_t nb_enq, uint16_t nb_deq); 366 367 #endif /* _TEST_PERF_COMMON_ */ 368