1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Cavium, Inc 3 */ 4 5 #ifndef _TEST_PERF_COMMON_ 6 #define _TEST_PERF_COMMON_ 7 8 #include <stdio.h> 9 #include <stdbool.h> 10 #include <unistd.h> 11 12 #include <rte_cryptodev.h> 13 #include <rte_cycles.h> 14 #include <rte_ethdev.h> 15 #include <rte_event_crypto_adapter.h> 16 #include <rte_event_eth_rx_adapter.h> 17 #include <rte_event_eth_tx_adapter.h> 18 #include <rte_event_timer_adapter.h> 19 #include <rte_eventdev.h> 20 #include <rte_lcore.h> 21 #include <rte_malloc.h> 22 #include <rte_mempool.h> 23 #include <rte_prefetch.h> 24 25 #include "evt_common.h" 26 #include "evt_options.h" 27 #include "evt_test.h" 28 29 #define TEST_PERF_CA_ID 0 30 #define TEST_PERF_DA_ID 0 31 32 struct test_perf; 33 34 struct __rte_cache_aligned worker_data { 35 uint64_t processed_pkts; 36 uint64_t latency; 37 uint8_t dev_id; 38 uint8_t port_id; 39 struct test_perf *t; 40 }; 41 42 struct crypto_adptr_data { 43 uint8_t cdev_id; 44 uint16_t cdev_qp_id; 45 void **crypto_sess; 46 }; 47 48 struct dma_adptr_data { 49 uint8_t dma_dev_id; 50 uint16_t vchan_id; 51 void **dma_op; 52 }; 53 54 struct __rte_cache_aligned prod_data { 55 uint8_t dev_id; 56 uint8_t port_id; 57 uint8_t queue_id; 58 struct crypto_adptr_data ca; 59 struct dma_adptr_data da; 60 struct test_perf *t; 61 }; 62 63 struct __rte_cache_aligned test_perf { 64 /* Don't change the offset of "done". Signal handler use this memory 65 * to terminate all lcores work. 66 */ 67 int done; 68 uint64_t outstand_pkts; 69 uint8_t nb_workers; 70 enum evt_test_result result; 71 uint32_t nb_flows; 72 uint64_t nb_pkts; 73 struct rte_mempool *pool; 74 struct prod_data prod[EVT_MAX_PORTS]; 75 struct worker_data worker[EVT_MAX_PORTS]; 76 struct evt_options *opt; 77 alignas(RTE_CACHE_LINE_SIZE) uint8_t sched_type_list[EVT_MAX_STAGES]; 78 alignas(RTE_CACHE_LINE_SIZE) struct rte_event_timer_adapter *timer_adptr[ 79 RTE_EVENT_TIMER_ADAPTER_NUM_MAX]; 80 struct rte_mempool *ca_op_pool; 81 struct rte_mempool *ca_sess_pool; 82 struct rte_mempool *ca_asym_sess_pool; 83 struct rte_mempool *ca_vector_pool; 84 struct rte_mempool *da_op_pool; 85 }; 86 87 struct __rte_cache_aligned perf_elt { 88 union { 89 struct rte_event_timer tim; 90 struct { 91 char pad[offsetof(struct rte_event_timer, user_meta)]; 92 uint64_t timestamp; 93 }; 94 }; 95 }; 96 97 #define BURST_SIZE 16 98 #define MAX_PROD_ENQ_BURST_SIZE 128 99 100 #define PERF_WORKER_INIT\ 101 struct worker_data *w = arg;\ 102 struct test_perf *t = w->t;\ 103 struct evt_options *opt = t->opt;\ 104 const uint8_t dev = w->dev_id;\ 105 const uint8_t port = w->port_id;\ 106 const uint8_t prod_timer_type = \ 107 opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR;\ 108 uint8_t *const sched_type_list = &t->sched_type_list[0];\ 109 const enum evt_prod_type prod_type = opt->prod_type;\ 110 struct rte_mempool *const pool = t->pool;\ 111 const uint8_t nb_stages = t->opt->nb_stages;\ 112 const uint8_t laststage = nb_stages - 1;\ 113 uint8_t cnt = 0;\ 114 alignas(RTE_CACHE_LINE_SIZE) void *bufs[16];\ 115 int const sz = RTE_DIM(bufs);\ 116 uint8_t stage;\ 117 struct perf_elt *pe = NULL;\ 118 if (opt->verbose_level > 1)\ 119 printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\ 120 rte_lcore_id(), dev, port) 121 122 static __rte_always_inline void 123 perf_mark_fwd_latency(struct perf_elt *const pe) 124 { 125 pe->timestamp = rte_get_timer_cycles(); 126 } 127 128 static __rte_always_inline int 129 perf_handle_crypto_ev(struct rte_event *ev, struct perf_elt **pe, int enable_fwd_latency) 130 { 131 struct rte_crypto_op *op = ev->event_ptr; 132 struct rte_mbuf *m; 133 134 135 if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) { 136 rte_crypto_op_free(op); 137 return op->status; 138 } 139 140 /* Forward latency not enabled - perf data will not be accessed */ 141 if (!enable_fwd_latency) 142 return 0; 143 144 /* Get pointer to perf data */ 145 if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 146 if (op->sym->m_dst == NULL) 147 m = op->sym->m_src; 148 else 149 m = op->sym->m_dst; 150 *pe = rte_pktmbuf_mtod(m, struct perf_elt *); 151 } else { 152 *pe = RTE_PTR_ADD(op->asym->modex.result.data, op->asym->modex.result.length); 153 } 154 155 return 0; 156 } 157 158 static __rte_always_inline struct perf_elt * 159 perf_elt_from_vec_get(struct rte_event_vector *vec) 160 { 161 /* Timestamp for vector event stored in first element */ 162 struct rte_crypto_op *cop = vec->ptrs[0]; 163 struct rte_mbuf *m; 164 165 if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 166 m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst; 167 return rte_pktmbuf_mtod(m, struct perf_elt *); 168 } else { 169 return RTE_PTR_ADD(cop->asym->modex.result.data, cop->asym->modex.result.length); 170 } 171 } 172 173 static __rte_always_inline int 174 perf_handle_crypto_vector_ev(struct rte_event *ev, struct perf_elt **pe, 175 const int enable_fwd_latency) 176 { 177 struct rte_event_vector *vec = ev->vec; 178 struct rte_crypto_op *cop; 179 struct rte_mbuf *m; 180 int i, n = 0; 181 void *data; 182 183 for (i = 0; i < vec->nb_elem; i++) { 184 cop = vec->ptrs[i]; 185 if (unlikely(cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) { 186 if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 187 m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst; 188 rte_pktmbuf_free(m); 189 } else { 190 data = cop->asym->modex.result.data; 191 rte_mempool_put(rte_mempool_from_obj(data), data); 192 } 193 rte_crypto_op_free(cop); 194 continue; 195 } 196 vec->ptrs[n++] = cop; 197 } 198 199 /* All cops failed, free the vector */ 200 if (n == 0) { 201 rte_mempool_put(rte_mempool_from_obj(vec), vec); 202 return -ENOENT; 203 } 204 205 vec->nb_elem = n; 206 207 /* Forward latency not enabled - perf data will be not accessed */ 208 if (!enable_fwd_latency) 209 return 0; 210 211 /* Get pointer to perf data */ 212 *pe = perf_elt_from_vec_get(vec); 213 214 return 0; 215 } 216 217 static __rte_always_inline int 218 perf_process_last_stage(struct rte_mempool *const pool, enum evt_prod_type prod_type, 219 struct rte_event *const ev, struct worker_data *const w, 220 void *bufs[], int const buf_sz, uint8_t count) 221 { 222 void *to_free_in_bulk; 223 224 /* release fence here ensures event_prt is 225 * stored before updating the number of 226 * processed packets for worker lcores 227 */ 228 rte_atomic_thread_fence(rte_memory_order_release); 229 w->processed_pkts++; 230 231 if (prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) { 232 struct rte_crypto_op *op = ev->event_ptr; 233 struct rte_mbuf *m; 234 235 if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 236 if (op->sym->m_dst == NULL) 237 m = op->sym->m_src; 238 else 239 m = op->sym->m_dst; 240 241 to_free_in_bulk = m; 242 } else { 243 to_free_in_bulk = op->asym->modex.result.data; 244 } 245 rte_crypto_op_free(op); 246 } else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) { 247 return count; 248 } else { 249 to_free_in_bulk = ev->event_ptr; 250 } 251 252 bufs[count++] = to_free_in_bulk; 253 if (unlikely(count == buf_sz)) { 254 count = 0; 255 rte_mempool_put_bulk(pool, bufs, buf_sz); 256 } 257 258 return count; 259 } 260 261 static __rte_always_inline uint8_t 262 perf_process_last_stage_latency(struct rte_mempool *const pool, enum evt_prod_type prod_type, 263 struct rte_event *const ev, struct worker_data *const w, 264 void *bufs[], int const buf_sz, uint8_t count) 265 { 266 uint64_t latency; 267 struct perf_elt *pe; 268 void *to_free_in_bulk; 269 270 /* Release fence here ensures event_prt is stored before updating the number of processed 271 * packets for worker lcores. 272 */ 273 rte_atomic_thread_fence(rte_memory_order_release); 274 w->processed_pkts++; 275 276 if (prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) { 277 struct rte_crypto_op *op = ev->event_ptr; 278 struct rte_mbuf *m; 279 280 if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 281 if (op->sym->m_dst == NULL) 282 m = op->sym->m_src; 283 else 284 m = op->sym->m_dst; 285 286 to_free_in_bulk = m; 287 pe = rte_pktmbuf_mtod(m, struct perf_elt *); 288 } else { 289 pe = RTE_PTR_ADD(op->asym->modex.result.data, 290 op->asym->modex.result.length); 291 to_free_in_bulk = op->asym->modex.result.data; 292 } 293 rte_crypto_op_free(op); 294 } else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) { 295 return count; 296 } else { 297 pe = ev->event_ptr; 298 to_free_in_bulk = pe; 299 } 300 301 latency = rte_get_timer_cycles() - pe->timestamp; 302 w->latency += latency; 303 304 bufs[count++] = to_free_in_bulk; 305 if (unlikely(count == buf_sz)) { 306 count = 0; 307 rte_mempool_put_bulk(pool, bufs, buf_sz); 308 } 309 310 return count; 311 } 312 313 static __rte_always_inline void 314 perf_process_vector_last_stage(struct rte_mempool *const pool, 315 struct rte_mempool *const ca_pool, struct rte_event *const ev, 316 struct worker_data *const w, const bool enable_fwd_latency) 317 { 318 struct rte_event_vector *vec = ev->vec; 319 struct rte_crypto_op *cop; 320 void *bufs[vec->nb_elem]; 321 struct perf_elt *pe; 322 uint64_t latency; 323 int i; 324 325 /* Release fence here ensures event_prt is stored before updating the number of processed 326 * packets for worker lcores. 327 */ 328 rte_atomic_thread_fence(rte_memory_order_release); 329 w->processed_pkts += vec->nb_elem; 330 331 if (enable_fwd_latency) { 332 pe = perf_elt_from_vec_get(vec); 333 latency = rte_get_timer_cycles() - pe->timestamp; 334 w->latency += latency; 335 } 336 337 for (i = 0; i < vec->nb_elem; i++) { 338 cop = vec->ptrs[i]; 339 if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) 340 bufs[i] = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst; 341 else 342 bufs[i] = cop->asym->modex.result.data; 343 } 344 345 rte_mempool_put_bulk(pool, bufs, vec->nb_elem); 346 rte_mempool_put_bulk(ca_pool, (void * const *)vec->ptrs, vec->nb_elem); 347 rte_mempool_put(rte_mempool_from_obj(vec), vec); 348 } 349 350 static inline int 351 perf_nb_event_ports(struct evt_options *opt) 352 { 353 return evt_nr_active_lcores(opt->wlcores) + 354 evt_nr_active_lcores(opt->plcores); 355 } 356 357 int perf_test_result(struct evt_test *test, struct evt_options *opt); 358 int perf_opt_check(struct evt_options *opt, uint64_t nb_queues); 359 int perf_test_setup(struct evt_test *test, struct evt_options *opt); 360 int perf_ethdev_setup(struct evt_test *test, struct evt_options *opt); 361 int perf_cryptodev_setup(struct evt_test *test, struct evt_options *opt); 362 int perf_dmadev_setup(struct evt_test *test, struct evt_options *opt); 363 int perf_mempool_setup(struct evt_test *test, struct evt_options *opt); 364 int perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, 365 uint8_t stride, uint8_t nb_queues, 366 const struct rte_event_port_conf *port_conf); 367 int perf_event_dev_service_setup(uint8_t dev_id); 368 int perf_launch_lcores(struct evt_test *test, struct evt_options *opt, 369 int (*worker)(void *)); 370 void perf_opt_dump(struct evt_options *opt, uint8_t nb_queues); 371 void perf_test_destroy(struct evt_test *test, struct evt_options *opt); 372 void perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt); 373 void perf_cryptodev_destroy(struct evt_test *test, struct evt_options *opt); 374 void perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt); 375 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt); 376 void perf_ethdev_rx_stop(struct evt_test *test, struct evt_options *opt); 377 void perf_mempool_destroy(struct evt_test *test, struct evt_options *opt); 378 void perf_worker_cleanup(struct rte_mempool *const pool, uint8_t dev_id, 379 uint8_t port_id, struct rte_event events[], 380 uint16_t nb_enq, uint16_t nb_deq); 381 382 #endif /* _TEST_PERF_COMMON_ */ 383