1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Cavium, Inc 3 */ 4 5 #ifndef _TEST_PERF_COMMON_ 6 #define _TEST_PERF_COMMON_ 7 8 #include <stdio.h> 9 #include <stdbool.h> 10 #include <unistd.h> 11 12 #include <rte_cryptodev.h> 13 #include <rte_cycles.h> 14 #include <rte_ethdev.h> 15 #include <rte_event_crypto_adapter.h> 16 #include <rte_event_eth_rx_adapter.h> 17 #include <rte_event_eth_tx_adapter.h> 18 #include <rte_event_timer_adapter.h> 19 #include <rte_eventdev.h> 20 #include <rte_lcore.h> 21 #include <rte_malloc.h> 22 #include <rte_mempool.h> 23 #include <rte_prefetch.h> 24 25 #include "evt_common.h" 26 #include "evt_options.h" 27 #include "evt_test.h" 28 29 #define TEST_PERF_CA_ID 0 30 31 struct test_perf; 32 33 struct worker_data { 34 uint64_t processed_pkts; 35 uint64_t latency; 36 uint8_t dev_id; 37 uint8_t port_id; 38 struct test_perf *t; 39 } __rte_cache_aligned; 40 41 struct crypto_adptr_data { 42 uint8_t cdev_id; 43 uint16_t cdev_qp_id; 44 void **crypto_sess; 45 }; 46 struct prod_data { 47 uint8_t dev_id; 48 uint8_t port_id; 49 uint8_t queue_id; 50 struct crypto_adptr_data ca; 51 struct test_perf *t; 52 } __rte_cache_aligned; 53 54 struct test_perf { 55 /* Don't change the offset of "done". Signal handler use this memory 56 * to terminate all lcores work. 57 */ 58 int done; 59 uint64_t outstand_pkts; 60 uint8_t nb_workers; 61 enum evt_test_result result; 62 uint32_t nb_flows; 63 uint64_t nb_pkts; 64 struct rte_mempool *pool; 65 struct prod_data prod[EVT_MAX_PORTS]; 66 struct worker_data worker[EVT_MAX_PORTS]; 67 struct evt_options *opt; 68 uint8_t sched_type_list[EVT_MAX_STAGES] __rte_cache_aligned; 69 struct rte_event_timer_adapter *timer_adptr[ 70 RTE_EVENT_TIMER_ADAPTER_NUM_MAX] __rte_cache_aligned; 71 struct rte_mempool *ca_op_pool; 72 struct rte_mempool *ca_sess_pool; 73 struct rte_mempool *ca_asym_sess_pool; 74 } __rte_cache_aligned; 75 76 struct perf_elt { 77 union { 78 struct rte_event_timer tim; 79 struct { 80 char pad[offsetof(struct rte_event_timer, user_meta)]; 81 uint64_t timestamp; 82 }; 83 }; 84 } __rte_cache_aligned; 85 86 #define BURST_SIZE 16 87 #define MAX_PROD_ENQ_BURST_SIZE 128 88 89 #define PERF_WORKER_INIT\ 90 struct worker_data *w = arg;\ 91 struct test_perf *t = w->t;\ 92 struct evt_options *opt = t->opt;\ 93 const uint8_t dev = w->dev_id;\ 94 const uint8_t port = w->port_id;\ 95 const uint8_t prod_timer_type = \ 96 opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR;\ 97 const uint8_t prod_crypto_type = \ 98 opt->prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR;\ 99 uint8_t *const sched_type_list = &t->sched_type_list[0];\ 100 struct rte_mempool *const pool = t->pool;\ 101 const uint8_t nb_stages = t->opt->nb_stages;\ 102 const uint8_t laststage = nb_stages - 1;\ 103 uint8_t cnt = 0;\ 104 void *bufs[16] __rte_cache_aligned;\ 105 int const sz = RTE_DIM(bufs);\ 106 if (opt->verbose_level > 1)\ 107 printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\ 108 rte_lcore_id(), dev, port) 109 110 static __rte_always_inline void 111 perf_mark_fwd_latency(struct perf_elt *const pe) 112 { 113 pe->timestamp = rte_get_timer_cycles(); 114 } 115 116 static __rte_always_inline int 117 perf_handle_crypto_ev(struct rte_event *ev, struct perf_elt **pe, int enable_fwd_latency) 118 { 119 struct rte_crypto_op *op = ev->event_ptr; 120 struct rte_mbuf *m; 121 122 123 if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) { 124 rte_crypto_op_free(op); 125 return op->status; 126 } 127 128 /* Forward latency not enabled - perf data will not be accessed */ 129 if (!enable_fwd_latency) 130 return 0; 131 132 /* Get pointer to perf data */ 133 if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 134 if (op->sym->m_dst == NULL) 135 m = op->sym->m_src; 136 else 137 m = op->sym->m_dst; 138 *pe = rte_pktmbuf_mtod(m, struct perf_elt *); 139 } else { 140 *pe = RTE_PTR_ADD(op->asym->modex.result.data, op->asym->modex.result.length); 141 } 142 143 return 0; 144 } 145 146 147 static __rte_always_inline int 148 perf_process_last_stage(struct rte_mempool *const pool, uint8_t prod_crypto_type, 149 struct rte_event *const ev, struct worker_data *const w, 150 void *bufs[], int const buf_sz, uint8_t count) 151 { 152 void *to_free_in_bulk; 153 154 /* release fence here ensures event_prt is 155 * stored before updating the number of 156 * processed packets for worker lcores 157 */ 158 rte_atomic_thread_fence(__ATOMIC_RELEASE); 159 w->processed_pkts++; 160 161 if (prod_crypto_type) { 162 struct rte_crypto_op *op = ev->event_ptr; 163 struct rte_mbuf *m; 164 165 if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 166 if (op->sym->m_dst == NULL) 167 m = op->sym->m_src; 168 else 169 m = op->sym->m_dst; 170 171 to_free_in_bulk = m; 172 } else { 173 to_free_in_bulk = op->asym->modex.result.data; 174 } 175 rte_crypto_op_free(op); 176 } else { 177 to_free_in_bulk = ev->event_ptr; 178 } 179 180 bufs[count++] = to_free_in_bulk; 181 if (unlikely(count == buf_sz)) { 182 count = 0; 183 rte_mempool_put_bulk(pool, bufs, buf_sz); 184 } 185 186 return count; 187 } 188 189 static __rte_always_inline uint8_t 190 perf_process_last_stage_latency(struct rte_mempool *const pool, uint8_t prod_crypto_type, 191 struct rte_event *const ev, struct worker_data *const w, 192 void *bufs[], int const buf_sz, uint8_t count) 193 { 194 uint64_t latency; 195 struct perf_elt *pe; 196 void *to_free_in_bulk; 197 198 /* release fence here ensures event_prt is 199 * stored before updating the number of 200 * processed packets for worker lcores 201 */ 202 rte_atomic_thread_fence(__ATOMIC_RELEASE); 203 w->processed_pkts++; 204 205 if (prod_crypto_type) { 206 struct rte_crypto_op *op = ev->event_ptr; 207 struct rte_mbuf *m; 208 209 if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { 210 if (op->sym->m_dst == NULL) 211 m = op->sym->m_src; 212 else 213 m = op->sym->m_dst; 214 215 to_free_in_bulk = m; 216 pe = rte_pktmbuf_mtod(m, struct perf_elt *); 217 } else { 218 pe = RTE_PTR_ADD(op->asym->modex.result.data, 219 op->asym->modex.result.length); 220 to_free_in_bulk = op->asym->modex.result.data; 221 } 222 rte_crypto_op_free(op); 223 } else { 224 pe = ev->event_ptr; 225 to_free_in_bulk = pe; 226 } 227 228 latency = rte_get_timer_cycles() - pe->timestamp; 229 w->latency += latency; 230 231 bufs[count++] = to_free_in_bulk; 232 if (unlikely(count == buf_sz)) { 233 count = 0; 234 rte_mempool_put_bulk(pool, bufs, buf_sz); 235 } 236 237 return count; 238 } 239 240 241 static inline int 242 perf_nb_event_ports(struct evt_options *opt) 243 { 244 return evt_nr_active_lcores(opt->wlcores) + 245 evt_nr_active_lcores(opt->plcores); 246 } 247 248 int perf_test_result(struct evt_test *test, struct evt_options *opt); 249 int perf_opt_check(struct evt_options *opt, uint64_t nb_queues); 250 int perf_test_setup(struct evt_test *test, struct evt_options *opt); 251 int perf_ethdev_setup(struct evt_test *test, struct evt_options *opt); 252 int perf_cryptodev_setup(struct evt_test *test, struct evt_options *opt); 253 int perf_mempool_setup(struct evt_test *test, struct evt_options *opt); 254 int perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, 255 uint8_t stride, uint8_t nb_queues, 256 const struct rte_event_port_conf *port_conf); 257 int perf_event_dev_service_setup(uint8_t dev_id); 258 int perf_launch_lcores(struct evt_test *test, struct evt_options *opt, 259 int (*worker)(void *)); 260 void perf_opt_dump(struct evt_options *opt, uint8_t nb_queues); 261 void perf_test_destroy(struct evt_test *test, struct evt_options *opt); 262 void perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt); 263 void perf_cryptodev_destroy(struct evt_test *test, struct evt_options *opt); 264 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt); 265 void perf_ethdev_rx_stop(struct evt_test *test, struct evt_options *opt); 266 void perf_mempool_destroy(struct evt_test *test, struct evt_options *opt); 267 void perf_worker_cleanup(struct rte_mempool *const pool, uint8_t dev_id, 268 uint8_t port_id, struct rte_event events[], 269 uint16_t nb_enq, uint16_t nb_deq); 270 271 #endif /* _TEST_PERF_COMMON_ */ 272