1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Cavium, Inc 3 */ 4 5 #ifndef _TEST_PERF_COMMON_ 6 #define _TEST_PERF_COMMON_ 7 8 #include <stdio.h> 9 #include <stdbool.h> 10 #include <unistd.h> 11 12 #include <rte_cryptodev.h> 13 #include <rte_cycles.h> 14 #include <rte_ethdev.h> 15 #include <rte_event_crypto_adapter.h> 16 #include <rte_event_eth_rx_adapter.h> 17 #include <rte_event_eth_tx_adapter.h> 18 #include <rte_event_timer_adapter.h> 19 #include <rte_eventdev.h> 20 #include <rte_lcore.h> 21 #include <rte_malloc.h> 22 #include <rte_mempool.h> 23 #include <rte_prefetch.h> 24 25 #include "evt_common.h" 26 #include "evt_options.h" 27 #include "evt_test.h" 28 29 #define TEST_PERF_CA_ID 0 30 #define TEST_PERF_DA_ID 0 31 32 struct test_perf; 33 34 struct __rte_cache_aligned worker_data { 35 uint64_t processed_pkts; 36 uint64_t latency; 37 uint8_t dev_id; 38 uint8_t port_id; 39 struct test_perf *t; 40 }; 41 42 struct crypto_adptr_data { 43 uint8_t cdev_id; 44 uint16_t cdev_qp_id; 45 void **crypto_sess; 46 }; 47 48 struct dma_adptr_data { 49 uint8_t dma_dev_id; 50 uint16_t vchan_id; 51 }; 52 53 struct __rte_cache_aligned prod_data { 54 uint8_t dev_id; 55 uint8_t port_id; 56 uint8_t queue_id; 57 struct crypto_adptr_data ca; 58 struct dma_adptr_data da; 59 struct test_perf *t; 60 }; 61 62 struct __rte_cache_aligned test_perf { 63 /* Don't change the offset of "done". Signal handler use this memory 64 * to terminate all lcores work. 65 */ 66 int done; 67 uint64_t outstand_pkts; 68 uint8_t nb_workers; 69 enum evt_test_result result; 70 uint32_t nb_flows; 71 uint64_t nb_pkts; 72 struct rte_mempool *pool; 73 struct prod_data prod[EVT_MAX_PORTS]; 74 struct worker_data worker[EVT_MAX_PORTS]; 75 struct evt_options *opt; 76 alignas(RTE_CACHE_LINE_SIZE) uint8_t sched_type_list[EVT_MAX_STAGES]; 77 alignas(RTE_CACHE_LINE_SIZE) struct rte_event_timer_adapter *timer_adptr[ 78 RTE_EVENT_TIMER_ADAPTER_NUM_MAX]; 79 struct rte_mempool *ca_op_pool; 80 struct rte_mempool *ca_sess_pool; 81 struct rte_mempool *ca_asym_sess_pool; 82 struct rte_mempool *ca_vector_pool; 83 }; 84 85 struct __rte_cache_aligned perf_elt { 86 union { 87 struct rte_event_timer tim; 88 struct { 89 char pad[offsetof(struct rte_event_timer, user_meta)]; 90 uint64_t timestamp; 91 }; 92 }; 93 }; 94 95 #define BURST_SIZE 16 96 #define MAX_PROD_ENQ_BURST_SIZE 128 97 98 #define PERF_WORKER_INIT\ 99 struct worker_data *w = arg;\ 100 struct test_perf *t = w->t;\ 101 struct evt_options *opt = t->opt;\ 102 const uint8_t dev = w->dev_id;\ 103 const uint8_t port = w->port_id;\ 104 const uint8_t prod_timer_type = \ 105 opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR;\ 106 uint8_t *const sched_type_list = &t->sched_type_list[0];\ 107 const enum evt_prod_type prod_type = opt->prod_type;\ 108 struct rte_mempool *const pool = t->pool;\ 109 const uint8_t nb_stages = t->opt->nb_stages;\ 110 const uint8_t laststage = nb_stages - 1;\ 111 uint8_t cnt = 0;\ 112 alignas(RTE_CACHE_LINE_SIZE) void *bufs[16];\ 113 int const sz = RTE_DIM(bufs);\ 114 uint8_t stage;\ 115 struct perf_elt *pe = NULL;\ 116 if (opt->verbose_level > 1)\ 117 printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\ 118 rte_lcore_id(), dev, port) 119 120 static __rte_always_inline void 121 perf_mark_fwd_latency(enum evt_prod_type prod_type, struct rte_event *const ev) 122 { 123 struct perf_elt *pe; 124 125 if (prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) { 126 struct rte_crypto_op *op = ev->event_ptr; 127 struct rte_mbuf *m; 128 129 if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 130 if (op->sym->m_dst == NULL) 131 m = op->sym->m_src; 132 else 133 m = op->sym->m_dst; 134 135 pe = rte_pktmbuf_mtod(m, struct perf_elt *); 136 } else { 137 pe = RTE_PTR_ADD(op->asym->modex.result.data, 138 op->asym->modex.result.length); 139 } 140 pe->timestamp = rte_get_timer_cycles(); 141 } else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) { 142 struct rte_event_dma_adapter_op *op = ev->event_ptr; 143 144 op->user_meta = rte_get_timer_cycles(); 145 } else { 146 pe = ev->event_ptr; 147 pe->timestamp = rte_get_timer_cycles(); 148 } 149 } 150 151 static __rte_always_inline int 152 perf_handle_crypto_ev(struct rte_event *ev) 153 { 154 struct rte_crypto_op *op = ev->event_ptr; 155 156 if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) { 157 rte_crypto_op_free(op); 158 return op->status; 159 } 160 161 return 0; 162 } 163 164 static __rte_always_inline struct perf_elt * 165 perf_elt_from_vec_get(struct rte_event_vector *vec) 166 { 167 /* Timestamp for vector event stored in first element */ 168 struct rte_crypto_op *cop = vec->ptrs[0]; 169 struct rte_mbuf *m; 170 171 if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 172 m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst; 173 return rte_pktmbuf_mtod(m, struct perf_elt *); 174 } else { 175 return RTE_PTR_ADD(cop->asym->modex.result.data, cop->asym->modex.result.length); 176 } 177 } 178 179 static __rte_always_inline int 180 perf_handle_crypto_vector_ev(struct rte_event *ev, struct perf_elt **pe, 181 const int enable_fwd_latency) 182 { 183 struct rte_event_vector *vec = ev->vec; 184 struct rte_crypto_op *cop; 185 struct rte_mbuf *m; 186 int i, n = 0; 187 void *data; 188 189 for (i = 0; i < vec->nb_elem; i++) { 190 cop = vec->ptrs[i]; 191 if (unlikely(cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) { 192 if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 193 m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst; 194 rte_pktmbuf_free(m); 195 } else { 196 data = cop->asym->modex.result.data; 197 rte_mempool_put(rte_mempool_from_obj(data), data); 198 } 199 rte_crypto_op_free(cop); 200 continue; 201 } 202 vec->ptrs[n++] = cop; 203 } 204 205 /* All cops failed, free the vector */ 206 if (n == 0) { 207 rte_mempool_put(rte_mempool_from_obj(vec), vec); 208 return -ENOENT; 209 } 210 211 vec->nb_elem = n; 212 213 /* Forward latency not enabled - perf data will be not accessed */ 214 if (!enable_fwd_latency) 215 return 0; 216 217 /* Get pointer to perf data */ 218 *pe = perf_elt_from_vec_get(vec); 219 220 return 0; 221 } 222 223 static __rte_always_inline int 224 perf_process_last_stage(struct rte_mempool *const pool, enum evt_prod_type prod_type, 225 struct rte_event *const ev, struct worker_data *const w, 226 void *bufs[], int const buf_sz, uint8_t count) 227 { 228 void *to_free_in_bulk; 229 230 /* release fence here ensures event_prt is 231 * stored before updating the number of 232 * processed packets for worker lcores 233 */ 234 rte_atomic_thread_fence(rte_memory_order_release); 235 w->processed_pkts++; 236 237 if (prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) { 238 struct rte_crypto_op *op = ev->event_ptr; 239 struct rte_mbuf *m; 240 241 if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 242 if (op->sym->m_dst == NULL) 243 m = op->sym->m_src; 244 else 245 m = op->sym->m_dst; 246 247 to_free_in_bulk = m; 248 } else { 249 to_free_in_bulk = op->asym->modex.result.data; 250 } 251 rte_crypto_op_free(op); 252 } else { 253 to_free_in_bulk = ev->event_ptr; 254 } 255 256 bufs[count++] = to_free_in_bulk; 257 if (unlikely(count == buf_sz)) { 258 count = 0; 259 rte_mempool_put_bulk(pool, bufs, buf_sz); 260 } 261 262 return count; 263 } 264 265 static __rte_always_inline uint8_t 266 perf_process_last_stage_latency(struct rte_mempool *const pool, enum evt_prod_type prod_type, 267 struct rte_event *const ev, struct worker_data *const w, 268 void *bufs[], int const buf_sz, uint8_t count) 269 { 270 uint64_t latency, tstamp; 271 struct perf_elt *pe; 272 void *to_free_in_bulk; 273 274 /* Release fence here ensures event_prt is stored before updating the number of processed 275 * packets for worker lcores. 276 */ 277 rte_atomic_thread_fence(rte_memory_order_release); 278 w->processed_pkts++; 279 280 if (prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) { 281 struct rte_crypto_op *op = ev->event_ptr; 282 struct rte_mbuf *m; 283 284 if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 285 if (op->sym->m_dst == NULL) 286 m = op->sym->m_src; 287 else 288 m = op->sym->m_dst; 289 290 to_free_in_bulk = m; 291 pe = rte_pktmbuf_mtod(m, struct perf_elt *); 292 } else { 293 pe = RTE_PTR_ADD(op->asym->modex.result.data, 294 op->asym->modex.result.length); 295 to_free_in_bulk = op->asym->modex.result.data; 296 } 297 tstamp = pe->timestamp; 298 rte_crypto_op_free(op); 299 } else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) { 300 struct rte_event_dma_adapter_op *op = ev->event_ptr; 301 302 to_free_in_bulk = op; 303 tstamp = op->user_meta; 304 } else { 305 pe = ev->event_ptr; 306 tstamp = pe->timestamp; 307 to_free_in_bulk = pe; 308 } 309 310 latency = rte_get_timer_cycles() - tstamp; 311 w->latency += latency; 312 313 bufs[count++] = to_free_in_bulk; 314 if (unlikely(count == buf_sz)) { 315 count = 0; 316 rte_mempool_put_bulk(pool, bufs, buf_sz); 317 } 318 319 return count; 320 } 321 322 static __rte_always_inline void 323 perf_process_vector_last_stage(struct rte_mempool *const pool, 324 struct rte_mempool *const ca_pool, struct rte_event *const ev, 325 struct worker_data *const w, const bool enable_fwd_latency) 326 { 327 struct rte_event_vector *vec = ev->vec; 328 struct rte_crypto_op *cop; 329 void *bufs[vec->nb_elem]; 330 struct perf_elt *pe; 331 uint64_t latency; 332 int i; 333 334 /* Release fence here ensures event_prt is stored before updating the number of processed 335 * packets for worker lcores. 336 */ 337 rte_atomic_thread_fence(rte_memory_order_release); 338 w->processed_pkts += vec->nb_elem; 339 340 if (enable_fwd_latency) { 341 pe = perf_elt_from_vec_get(vec); 342 latency = rte_get_timer_cycles() - pe->timestamp; 343 w->latency += latency; 344 } 345 346 for (i = 0; i < vec->nb_elem; i++) { 347 cop = vec->ptrs[i]; 348 if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) 349 bufs[i] = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst; 350 else 351 bufs[i] = cop->asym->modex.result.data; 352 } 353 354 rte_mempool_put_bulk(pool, bufs, vec->nb_elem); 355 rte_mempool_put_bulk(ca_pool, (void * const *)vec->ptrs, vec->nb_elem); 356 rte_mempool_put(rte_mempool_from_obj(vec), vec); 357 } 358 359 static inline int 360 perf_nb_event_ports(struct evt_options *opt) 361 { 362 return evt_nr_active_lcores(opt->wlcores) + 363 evt_nr_active_lcores(opt->plcores); 364 } 365 366 int perf_test_result(struct evt_test *test, struct evt_options *opt); 367 int perf_opt_check(struct evt_options *opt, uint64_t nb_queues); 368 int perf_test_setup(struct evt_test *test, struct evt_options *opt); 369 int perf_ethdev_setup(struct evt_test *test, struct evt_options *opt); 370 int perf_cryptodev_setup(struct evt_test *test, struct evt_options *opt); 371 int perf_dmadev_setup(struct evt_test *test, struct evt_options *opt); 372 int perf_mempool_setup(struct evt_test *test, struct evt_options *opt); 373 int perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, 374 uint8_t stride, uint8_t nb_queues, 375 const struct rte_event_port_conf *port_conf); 376 int perf_event_dev_service_setup(uint8_t dev_id); 377 int perf_launch_lcores(struct evt_test *test, struct evt_options *opt, 378 int (*worker)(void *)); 379 void perf_opt_dump(struct evt_options *opt, uint8_t nb_queues); 380 void perf_test_destroy(struct evt_test *test, struct evt_options *opt); 381 void perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt); 382 void perf_cryptodev_destroy(struct evt_test *test, struct evt_options *opt); 383 void perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt); 384 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt); 385 void perf_ethdev_rx_stop(struct evt_test *test, struct evt_options *opt); 386 void perf_mempool_destroy(struct evt_test *test, struct evt_options *opt); 387 void perf_worker_cleanup(struct rte_mempool *const pool, uint8_t dev_id, 388 uint8_t port_id, struct rte_event events[], 389 uint16_t nb_enq, uint16_t nb_deq); 390 391 #endif /* _TEST_PERF_COMMON_ */ 392