xref: /dpdk/app/test-eventdev/test_perf_common.h (revision 665b49c51639a10c553433bc2bcd85c7331c631e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Cavium, Inc
3  */
4 
5 #ifndef _TEST_PERF_COMMON_
6 #define _TEST_PERF_COMMON_
7 
8 #include <stdio.h>
9 #include <stdbool.h>
10 #include <unistd.h>
11 
12 #include <rte_cryptodev.h>
13 #include <rte_cycles.h>
14 #include <rte_ethdev.h>
15 #include <rte_event_crypto_adapter.h>
16 #include <rte_event_eth_rx_adapter.h>
17 #include <rte_event_eth_tx_adapter.h>
18 #include <rte_event_timer_adapter.h>
19 #include <rte_eventdev.h>
20 #include <rte_lcore.h>
21 #include <rte_malloc.h>
22 #include <rte_mempool.h>
23 #include <rte_prefetch.h>
24 
25 #include "evt_common.h"
26 #include "evt_options.h"
27 #include "evt_test.h"
28 
29 #define TEST_PERF_CA_ID 0
30 
31 struct test_perf;
32 
33 struct worker_data {
34 	uint64_t processed_pkts;
35 	uint64_t latency;
36 	uint8_t dev_id;
37 	uint8_t port_id;
38 	struct test_perf *t;
39 } __rte_cache_aligned;
40 
41 struct crypto_adptr_data {
42 	uint8_t cdev_id;
43 	uint16_t cdev_qp_id;
44 	void **crypto_sess;
45 };
46 struct prod_data {
47 	uint8_t dev_id;
48 	uint8_t port_id;
49 	uint8_t queue_id;
50 	struct crypto_adptr_data ca;
51 	struct test_perf *t;
52 } __rte_cache_aligned;
53 
54 struct test_perf {
55 	/* Don't change the offset of "done". Signal handler use this memory
56 	 * to terminate all lcores work.
57 	 */
58 	int done;
59 	uint64_t outstand_pkts;
60 	uint8_t nb_workers;
61 	enum evt_test_result result;
62 	uint32_t nb_flows;
63 	uint64_t nb_pkts;
64 	struct rte_mempool *pool;
65 	struct prod_data prod[EVT_MAX_PORTS];
66 	struct worker_data worker[EVT_MAX_PORTS];
67 	struct evt_options *opt;
68 	uint8_t sched_type_list[EVT_MAX_STAGES] __rte_cache_aligned;
69 	struct rte_event_timer_adapter *timer_adptr[
70 		RTE_EVENT_TIMER_ADAPTER_NUM_MAX] __rte_cache_aligned;
71 	struct rte_mempool *ca_op_pool;
72 	struct rte_mempool *ca_sess_pool;
73 	struct rte_mempool *ca_asym_sess_pool;
74 	struct rte_mempool *ca_vector_pool;
75 } __rte_cache_aligned;
76 
77 struct perf_elt {
78 	union {
79 		struct rte_event_timer tim;
80 		struct {
81 			char pad[offsetof(struct rte_event_timer, user_meta)];
82 			uint64_t timestamp;
83 		};
84 	};
85 } __rte_cache_aligned;
86 
87 #define BURST_SIZE 16
88 #define MAX_PROD_ENQ_BURST_SIZE 128
89 
90 #define PERF_WORKER_INIT\
91 	struct worker_data *w  = arg;\
92 	struct test_perf *t = w->t;\
93 	struct evt_options *opt = t->opt;\
94 	const uint8_t dev = w->dev_id;\
95 	const uint8_t port = w->port_id;\
96 	const uint8_t prod_timer_type = \
97 		opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR;\
98 	const uint8_t prod_crypto_type = \
99 		opt->prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR;\
100 	uint8_t *const sched_type_list = &t->sched_type_list[0];\
101 	struct rte_mempool *const pool = t->pool;\
102 	const uint8_t nb_stages = t->opt->nb_stages;\
103 	const uint8_t laststage = nb_stages - 1;\
104 	uint8_t cnt = 0;\
105 	void *bufs[16] __rte_cache_aligned;\
106 	int const sz = RTE_DIM(bufs);\
107 	uint8_t stage;\
108 	struct perf_elt *pe = NULL;\
109 	if (opt->verbose_level > 1)\
110 		printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\
111 				rte_lcore_id(), dev, port)
112 
113 static __rte_always_inline void
114 perf_mark_fwd_latency(struct perf_elt *const pe)
115 {
116 	pe->timestamp = rte_get_timer_cycles();
117 }
118 
119 static __rte_always_inline int
120 perf_handle_crypto_ev(struct rte_event *ev, struct perf_elt **pe, int enable_fwd_latency)
121 {
122 	struct rte_crypto_op *op = ev->event_ptr;
123 	struct rte_mbuf *m;
124 
125 
126 	if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) {
127 		rte_crypto_op_free(op);
128 		return op->status;
129 	}
130 
131 	/* Forward latency not enabled - perf data will not be accessed */
132 	if (!enable_fwd_latency)
133 		return 0;
134 
135 	/* Get pointer to perf data */
136 	if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
137 		if (op->sym->m_dst == NULL)
138 			m = op->sym->m_src;
139 		else
140 			m = op->sym->m_dst;
141 		*pe = rte_pktmbuf_mtod(m, struct perf_elt *);
142 	} else {
143 		*pe = RTE_PTR_ADD(op->asym->modex.result.data, op->asym->modex.result.length);
144 	}
145 
146 	return 0;
147 }
148 
149 static __rte_always_inline struct perf_elt *
150 perf_elt_from_vec_get(struct rte_event_vector *vec)
151 {
152 	/* Timestamp for vector event stored in first element */
153 	struct rte_crypto_op *cop = vec->ptrs[0];
154 	struct rte_mbuf *m;
155 
156 	if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
157 		m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst;
158 		return rte_pktmbuf_mtod(m, struct perf_elt *);
159 	} else {
160 		return RTE_PTR_ADD(cop->asym->modex.result.data, cop->asym->modex.result.length);
161 	}
162 }
163 
164 static __rte_always_inline int
165 perf_handle_crypto_vector_ev(struct rte_event *ev, struct perf_elt **pe,
166 		const int enable_fwd_latency)
167 {
168 	struct rte_event_vector *vec = ev->vec;
169 	struct rte_crypto_op *cop;
170 	struct rte_mbuf *m;
171 	int i, n = 0;
172 	void *data;
173 
174 	for (i = 0; i < vec->nb_elem; i++) {
175 		cop = vec->ptrs[i];
176 		if (unlikely(cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) {
177 			if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
178 				m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst;
179 				rte_pktmbuf_free(m);
180 			} else {
181 				data = cop->asym->modex.result.data;
182 				rte_mempool_put(rte_mempool_from_obj(data), data);
183 			}
184 			rte_crypto_op_free(cop);
185 			continue;
186 		}
187 		vec->ptrs[n++] = cop;
188 	}
189 
190 	/* All cops failed, free the vector */
191 	if (n == 0) {
192 		rte_mempool_put(rte_mempool_from_obj(vec), vec);
193 		return -ENOENT;
194 	}
195 
196 	vec->nb_elem = n;
197 
198 	/* Forward latency not enabled - perf data will be not accessed */
199 	if (!enable_fwd_latency)
200 		return 0;
201 
202 	/* Get pointer to perf data */
203 	*pe = perf_elt_from_vec_get(vec);
204 
205 	return 0;
206 }
207 
208 static __rte_always_inline int
209 perf_process_last_stage(struct rte_mempool *const pool, uint8_t prod_crypto_type,
210 		struct rte_event *const ev, struct worker_data *const w,
211 		void *bufs[], int const buf_sz, uint8_t count)
212 {
213 	void *to_free_in_bulk;
214 
215 	/* release fence here ensures event_prt is
216 	 * stored before updating the number of
217 	 * processed packets for worker lcores
218 	 */
219 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
220 	w->processed_pkts++;
221 
222 	if (prod_crypto_type) {
223 		struct rte_crypto_op *op = ev->event_ptr;
224 		struct rte_mbuf *m;
225 
226 		if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
227 			if (op->sym->m_dst == NULL)
228 				m = op->sym->m_src;
229 			else
230 				m = op->sym->m_dst;
231 
232 			to_free_in_bulk = m;
233 		} else {
234 			to_free_in_bulk = op->asym->modex.result.data;
235 		}
236 		rte_crypto_op_free(op);
237 	} else {
238 		to_free_in_bulk = ev->event_ptr;
239 	}
240 
241 	bufs[count++] = to_free_in_bulk;
242 	if (unlikely(count == buf_sz)) {
243 		count = 0;
244 		rte_mempool_put_bulk(pool, bufs, buf_sz);
245 	}
246 
247 	return count;
248 }
249 
250 static __rte_always_inline uint8_t
251 perf_process_last_stage_latency(struct rte_mempool *const pool, uint8_t prod_crypto_type,
252 		struct rte_event *const ev, struct worker_data *const w,
253 		void *bufs[], int const buf_sz, uint8_t count)
254 {
255 	uint64_t latency;
256 	struct perf_elt *pe;
257 	void *to_free_in_bulk;
258 
259 	/* Release fence here ensures event_prt is stored before updating the number of processed
260 	 * packets for worker lcores.
261 	 */
262 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
263 	w->processed_pkts++;
264 
265 	if (prod_crypto_type) {
266 		struct rte_crypto_op *op = ev->event_ptr;
267 		struct rte_mbuf *m;
268 
269 		if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
270 			if (op->sym->m_dst == NULL)
271 				m = op->sym->m_src;
272 			else
273 				m = op->sym->m_dst;
274 
275 			to_free_in_bulk = m;
276 			pe = rte_pktmbuf_mtod(m, struct perf_elt *);
277 		} else {
278 			pe = RTE_PTR_ADD(op->asym->modex.result.data,
279 					 op->asym->modex.result.length);
280 			to_free_in_bulk = op->asym->modex.result.data;
281 		}
282 		rte_crypto_op_free(op);
283 	} else {
284 		pe = ev->event_ptr;
285 		to_free_in_bulk = pe;
286 	}
287 
288 	latency = rte_get_timer_cycles() - pe->timestamp;
289 	w->latency += latency;
290 
291 	bufs[count++] = to_free_in_bulk;
292 	if (unlikely(count == buf_sz)) {
293 		count = 0;
294 		rte_mempool_put_bulk(pool, bufs, buf_sz);
295 	}
296 
297 	return count;
298 }
299 
300 static __rte_always_inline void
301 perf_process_vector_last_stage(struct rte_mempool *const pool,
302 		struct rte_mempool *const ca_pool, struct rte_event *const ev,
303 		struct worker_data *const w, const bool enable_fwd_latency)
304 {
305 	struct rte_event_vector *vec = ev->vec;
306 	struct rte_crypto_op *cop;
307 	void *bufs[vec->nb_elem];
308 	struct perf_elt *pe;
309 	uint64_t latency;
310 	int i;
311 
312 	/* Release fence here ensures event_prt is stored before updating the number of processed
313 	 * packets for worker lcores.
314 	 */
315 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
316 	w->processed_pkts += vec->nb_elem;
317 
318 	if (enable_fwd_latency) {
319 		pe = perf_elt_from_vec_get(vec);
320 		latency = rte_get_timer_cycles() - pe->timestamp;
321 		w->latency += latency;
322 	}
323 
324 	for (i = 0; i < vec->nb_elem; i++) {
325 		cop = vec->ptrs[i];
326 		if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC)
327 			bufs[i] = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst;
328 		else
329 			bufs[i] = cop->asym->modex.result.data;
330 	}
331 
332 	rte_mempool_put_bulk(pool, bufs, vec->nb_elem);
333 	rte_mempool_put_bulk(ca_pool, (void * const *)vec->ptrs, vec->nb_elem);
334 	rte_mempool_put(rte_mempool_from_obj(vec), vec);
335 }
336 
337 static inline int
338 perf_nb_event_ports(struct evt_options *opt)
339 {
340 	return evt_nr_active_lcores(opt->wlcores) +
341 			evt_nr_active_lcores(opt->plcores);
342 }
343 
344 int perf_test_result(struct evt_test *test, struct evt_options *opt);
345 int perf_opt_check(struct evt_options *opt, uint64_t nb_queues);
346 int perf_test_setup(struct evt_test *test, struct evt_options *opt);
347 int perf_ethdev_setup(struct evt_test *test, struct evt_options *opt);
348 int perf_cryptodev_setup(struct evt_test *test, struct evt_options *opt);
349 int perf_mempool_setup(struct evt_test *test, struct evt_options *opt);
350 int perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
351 				uint8_t stride, uint8_t nb_queues,
352 				const struct rte_event_port_conf *port_conf);
353 int perf_event_dev_service_setup(uint8_t dev_id);
354 int perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
355 		int (*worker)(void *));
356 void perf_opt_dump(struct evt_options *opt, uint8_t nb_queues);
357 void perf_test_destroy(struct evt_test *test, struct evt_options *opt);
358 void perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt);
359 void perf_cryptodev_destroy(struct evt_test *test, struct evt_options *opt);
360 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt);
361 void perf_ethdev_rx_stop(struct evt_test *test, struct evt_options *opt);
362 void perf_mempool_destroy(struct evt_test *test, struct evt_options *opt);
363 void perf_worker_cleanup(struct rte_mempool *const pool, uint8_t dev_id,
364 			 uint8_t port_id, struct rte_event events[],
365 			 uint16_t nb_enq, uint16_t nb_deq);
366 
367 #endif /* _TEST_PERF_COMMON_ */
368