xref: /dpdk/app/test-eventdev/test_perf_common.h (revision b9a87346b05c562dd6005ee025eca67a1a80bea8)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Cavium, Inc
3  */
4 
5 #ifndef _TEST_PERF_COMMON_
6 #define _TEST_PERF_COMMON_
7 
8 #include <stdio.h>
9 #include <stdbool.h>
10 #include <unistd.h>
11 
12 #include <rte_cryptodev.h>
13 #include <rte_cycles.h>
14 #include <rte_ethdev.h>
15 #include <rte_event_crypto_adapter.h>
16 #include <rte_event_eth_rx_adapter.h>
17 #include <rte_event_eth_tx_adapter.h>
18 #include <rte_event_timer_adapter.h>
19 #include <rte_eventdev.h>
20 #include <rte_lcore.h>
21 #include <rte_malloc.h>
22 #include <rte_mempool.h>
23 #include <rte_prefetch.h>
24 
25 #include "evt_common.h"
26 #include "evt_options.h"
27 #include "evt_test.h"
28 
29 #define TEST_PERF_CA_ID 0
30 #define TEST_PERF_DA_ID 0
31 
32 struct test_perf;
33 
34 struct __rte_cache_aligned worker_data {
35 	uint64_t processed_pkts;
36 	uint64_t latency;
37 	uint8_t dev_id;
38 	uint8_t port_id;
39 	struct test_perf *t;
40 };
41 
42 struct crypto_adptr_data {
43 	uint8_t cdev_id;
44 	uint16_t cdev_qp_id;
45 	void **crypto_sess;
46 };
47 
48 struct dma_adptr_data {
49 	uint8_t dma_dev_id;
50 	uint16_t vchan_id;
51 	void **dma_op;
52 };
53 
54 struct __rte_cache_aligned prod_data {
55 	uint8_t dev_id;
56 	uint8_t port_id;
57 	uint8_t queue_id;
58 	struct crypto_adptr_data ca;
59 	struct dma_adptr_data da;
60 	struct test_perf *t;
61 };
62 
63 struct __rte_cache_aligned test_perf {
64 	/* Don't change the offset of "done". Signal handler use this memory
65 	 * to terminate all lcores work.
66 	 */
67 	int done;
68 	uint64_t outstand_pkts;
69 	uint8_t nb_workers;
70 	enum evt_test_result result;
71 	uint32_t nb_flows;
72 	uint64_t nb_pkts;
73 	struct rte_mempool *pool;
74 	struct prod_data prod[EVT_MAX_PORTS];
75 	struct worker_data worker[EVT_MAX_PORTS];
76 	struct evt_options *opt;
77 	alignas(RTE_CACHE_LINE_SIZE) uint8_t sched_type_list[EVT_MAX_STAGES];
78 	alignas(RTE_CACHE_LINE_SIZE) struct rte_event_timer_adapter *timer_adptr[
79 		RTE_EVENT_TIMER_ADAPTER_NUM_MAX];
80 	struct rte_mempool *ca_op_pool;
81 	struct rte_mempool *ca_sess_pool;
82 	struct rte_mempool *ca_asym_sess_pool;
83 	struct rte_mempool *ca_vector_pool;
84 	struct rte_mempool *da_op_pool;
85 };
86 
87 struct __rte_cache_aligned perf_elt {
88 	union {
89 		struct rte_event_timer tim;
90 		struct {
91 			char pad[offsetof(struct rte_event_timer, user_meta)];
92 			uint64_t timestamp;
93 		};
94 	};
95 };
96 
97 #define BURST_SIZE 16
98 #define MAX_PROD_ENQ_BURST_SIZE 128
99 
100 #define PERF_WORKER_INIT\
101 	struct worker_data *w  = arg;\
102 	struct test_perf *t = w->t;\
103 	struct evt_options *opt = t->opt;\
104 	const uint8_t dev = w->dev_id;\
105 	const uint8_t port = w->port_id;\
106 	const uint8_t prod_timer_type = \
107 		opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR;\
108 	uint8_t *const sched_type_list = &t->sched_type_list[0];\
109 	const enum evt_prod_type prod_type = opt->prod_type;\
110 	struct rte_mempool *const pool = t->pool;\
111 	const uint8_t nb_stages = t->opt->nb_stages;\
112 	const uint8_t laststage = nb_stages - 1;\
113 	uint8_t cnt = 0;\
114 	alignas(RTE_CACHE_LINE_SIZE) void *bufs[16];\
115 	int const sz = RTE_DIM(bufs);\
116 	uint8_t stage;\
117 	struct perf_elt *pe = NULL;\
118 	if (opt->verbose_level > 1)\
119 		printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\
120 				rte_lcore_id(), dev, port)
121 
122 static __rte_always_inline void
123 perf_mark_fwd_latency(struct perf_elt *const pe)
124 {
125 	pe->timestamp = rte_get_timer_cycles();
126 }
127 
128 static __rte_always_inline int
129 perf_handle_crypto_ev(struct rte_event *ev, struct perf_elt **pe, int enable_fwd_latency)
130 {
131 	struct rte_crypto_op *op = ev->event_ptr;
132 	struct rte_mbuf *m;
133 
134 
135 	if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) {
136 		rte_crypto_op_free(op);
137 		return op->status;
138 	}
139 
140 	/* Forward latency not enabled - perf data will not be accessed */
141 	if (!enable_fwd_latency)
142 		return 0;
143 
144 	/* Get pointer to perf data */
145 	if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
146 		if (op->sym->m_dst == NULL)
147 			m = op->sym->m_src;
148 		else
149 			m = op->sym->m_dst;
150 		*pe = rte_pktmbuf_mtod(m, struct perf_elt *);
151 	} else {
152 		*pe = RTE_PTR_ADD(op->asym->modex.result.data, op->asym->modex.result.length);
153 	}
154 
155 	return 0;
156 }
157 
158 static __rte_always_inline struct perf_elt *
159 perf_elt_from_vec_get(struct rte_event_vector *vec)
160 {
161 	/* Timestamp for vector event stored in first element */
162 	struct rte_crypto_op *cop = vec->ptrs[0];
163 	struct rte_mbuf *m;
164 
165 	if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
166 		m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst;
167 		return rte_pktmbuf_mtod(m, struct perf_elt *);
168 	} else {
169 		return RTE_PTR_ADD(cop->asym->modex.result.data, cop->asym->modex.result.length);
170 	}
171 }
172 
173 static __rte_always_inline int
174 perf_handle_crypto_vector_ev(struct rte_event *ev, struct perf_elt **pe,
175 		const int enable_fwd_latency)
176 {
177 	struct rte_event_vector *vec = ev->vec;
178 	struct rte_crypto_op *cop;
179 	struct rte_mbuf *m;
180 	int i, n = 0;
181 	void *data;
182 
183 	for (i = 0; i < vec->nb_elem; i++) {
184 		cop = vec->ptrs[i];
185 		if (unlikely(cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) {
186 			if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
187 				m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst;
188 				rte_pktmbuf_free(m);
189 			} else {
190 				data = cop->asym->modex.result.data;
191 				rte_mempool_put(rte_mempool_from_obj(data), data);
192 			}
193 			rte_crypto_op_free(cop);
194 			continue;
195 		}
196 		vec->ptrs[n++] = cop;
197 	}
198 
199 	/* All cops failed, free the vector */
200 	if (n == 0) {
201 		rte_mempool_put(rte_mempool_from_obj(vec), vec);
202 		return -ENOENT;
203 	}
204 
205 	vec->nb_elem = n;
206 
207 	/* Forward latency not enabled - perf data will be not accessed */
208 	if (!enable_fwd_latency)
209 		return 0;
210 
211 	/* Get pointer to perf data */
212 	*pe = perf_elt_from_vec_get(vec);
213 
214 	return 0;
215 }
216 
217 static __rte_always_inline int
218 perf_process_last_stage(struct rte_mempool *const pool, enum evt_prod_type prod_type,
219 			struct rte_event *const ev, struct worker_data *const w,
220 			void *bufs[], int const buf_sz, uint8_t count)
221 {
222 	void *to_free_in_bulk;
223 
224 	/* release fence here ensures event_prt is
225 	 * stored before updating the number of
226 	 * processed packets for worker lcores
227 	 */
228 	rte_atomic_thread_fence(rte_memory_order_release);
229 	w->processed_pkts++;
230 
231 	if (prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) {
232 		struct rte_crypto_op *op = ev->event_ptr;
233 		struct rte_mbuf *m;
234 
235 		if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
236 			if (op->sym->m_dst == NULL)
237 				m = op->sym->m_src;
238 			else
239 				m = op->sym->m_dst;
240 
241 			to_free_in_bulk = m;
242 		} else {
243 			to_free_in_bulk = op->asym->modex.result.data;
244 		}
245 		rte_crypto_op_free(op);
246 	} else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
247 		return count;
248 	} else {
249 		to_free_in_bulk = ev->event_ptr;
250 	}
251 
252 	bufs[count++] = to_free_in_bulk;
253 	if (unlikely(count == buf_sz)) {
254 		count = 0;
255 		rte_mempool_put_bulk(pool, bufs, buf_sz);
256 	}
257 
258 	return count;
259 }
260 
261 static __rte_always_inline uint8_t
262 perf_process_last_stage_latency(struct rte_mempool *const pool, enum evt_prod_type prod_type,
263 				struct rte_event *const ev, struct worker_data *const w,
264 				void *bufs[], int const buf_sz, uint8_t count)
265 {
266 	uint64_t latency;
267 	struct perf_elt *pe;
268 	void *to_free_in_bulk;
269 
270 	/* Release fence here ensures event_prt is stored before updating the number of processed
271 	 * packets for worker lcores.
272 	 */
273 	rte_atomic_thread_fence(rte_memory_order_release);
274 	w->processed_pkts++;
275 
276 	if (prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) {
277 		struct rte_crypto_op *op = ev->event_ptr;
278 		struct rte_mbuf *m;
279 
280 		if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
281 			if (op->sym->m_dst == NULL)
282 				m = op->sym->m_src;
283 			else
284 				m = op->sym->m_dst;
285 
286 			to_free_in_bulk = m;
287 			pe = rte_pktmbuf_mtod(m, struct perf_elt *);
288 		} else {
289 			pe = RTE_PTR_ADD(op->asym->modex.result.data,
290 					 op->asym->modex.result.length);
291 			to_free_in_bulk = op->asym->modex.result.data;
292 		}
293 		rte_crypto_op_free(op);
294 	} else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
295 		return count;
296 	} else {
297 		pe = ev->event_ptr;
298 		to_free_in_bulk = pe;
299 	}
300 
301 	latency = rte_get_timer_cycles() - pe->timestamp;
302 	w->latency += latency;
303 
304 	bufs[count++] = to_free_in_bulk;
305 	if (unlikely(count == buf_sz)) {
306 		count = 0;
307 		rte_mempool_put_bulk(pool, bufs, buf_sz);
308 	}
309 
310 	return count;
311 }
312 
313 static __rte_always_inline void
314 perf_process_vector_last_stage(struct rte_mempool *const pool,
315 		struct rte_mempool *const ca_pool, struct rte_event *const ev,
316 		struct worker_data *const w, const bool enable_fwd_latency)
317 {
318 	struct rte_event_vector *vec = ev->vec;
319 	struct rte_crypto_op *cop;
320 	void *bufs[vec->nb_elem];
321 	struct perf_elt *pe;
322 	uint64_t latency;
323 	int i;
324 
325 	/* Release fence here ensures event_prt is stored before updating the number of processed
326 	 * packets for worker lcores.
327 	 */
328 	rte_atomic_thread_fence(rte_memory_order_release);
329 	w->processed_pkts += vec->nb_elem;
330 
331 	if (enable_fwd_latency) {
332 		pe = perf_elt_from_vec_get(vec);
333 		latency = rte_get_timer_cycles() - pe->timestamp;
334 		w->latency += latency;
335 	}
336 
337 	for (i = 0; i < vec->nb_elem; i++) {
338 		cop = vec->ptrs[i];
339 		if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC)
340 			bufs[i] = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst;
341 		else
342 			bufs[i] = cop->asym->modex.result.data;
343 	}
344 
345 	rte_mempool_put_bulk(pool, bufs, vec->nb_elem);
346 	rte_mempool_put_bulk(ca_pool, (void * const *)vec->ptrs, vec->nb_elem);
347 	rte_mempool_put(rte_mempool_from_obj(vec), vec);
348 }
349 
350 static inline int
351 perf_nb_event_ports(struct evt_options *opt)
352 {
353 	return evt_nr_active_lcores(opt->wlcores) +
354 			evt_nr_active_lcores(opt->plcores);
355 }
356 
357 int perf_test_result(struct evt_test *test, struct evt_options *opt);
358 int perf_opt_check(struct evt_options *opt, uint64_t nb_queues);
359 int perf_test_setup(struct evt_test *test, struct evt_options *opt);
360 int perf_ethdev_setup(struct evt_test *test, struct evt_options *opt);
361 int perf_cryptodev_setup(struct evt_test *test, struct evt_options *opt);
362 int perf_dmadev_setup(struct evt_test *test, struct evt_options *opt);
363 int perf_mempool_setup(struct evt_test *test, struct evt_options *opt);
364 int perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
365 				uint8_t stride, uint8_t nb_queues,
366 				const struct rte_event_port_conf *port_conf);
367 int perf_event_dev_service_setup(uint8_t dev_id);
368 int perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
369 		int (*worker)(void *));
370 void perf_opt_dump(struct evt_options *opt, uint8_t nb_queues);
371 void perf_test_destroy(struct evt_test *test, struct evt_options *opt);
372 void perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt);
373 void perf_cryptodev_destroy(struct evt_test *test, struct evt_options *opt);
374 void perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt);
375 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt);
376 void perf_ethdev_rx_stop(struct evt_test *test, struct evt_options *opt);
377 void perf_mempool_destroy(struct evt_test *test, struct evt_options *opt);
378 void perf_worker_cleanup(struct rte_mempool *const pool, uint8_t dev_id,
379 			 uint8_t port_id, struct rte_event events[],
380 			 uint16_t nb_enq, uint16_t nb_deq);
381 
382 #endif /* _TEST_PERF_COMMON_ */
383