xref: /dpdk/app/test-eventdev/test_perf_common.h (revision bca734c27e345af500d0d951421584e2567cd107)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Cavium, Inc
3  */
4 
5 #ifndef _TEST_PERF_COMMON_
6 #define _TEST_PERF_COMMON_
7 
8 #include <stdio.h>
9 #include <stdbool.h>
10 #include <unistd.h>
11 
12 #include <rte_cryptodev.h>
13 #include <rte_cycles.h>
14 #include <rte_ethdev.h>
15 #include <rte_event_crypto_adapter.h>
16 #include <rte_event_eth_rx_adapter.h>
17 #include <rte_event_eth_tx_adapter.h>
18 #include <rte_event_timer_adapter.h>
19 #include <rte_eventdev.h>
20 #include <rte_lcore.h>
21 #include <rte_malloc.h>
22 #include <rte_mempool.h>
23 #include <rte_prefetch.h>
24 
25 #include "evt_common.h"
26 #include "evt_options.h"
27 #include "evt_test.h"
28 
29 #define TEST_PERF_CA_ID 0
30 #define TEST_PERF_DA_ID 0
31 
32 struct test_perf;
33 
34 struct __rte_cache_aligned worker_data {
35 	uint64_t processed_pkts;
36 	uint64_t latency;
37 	uint8_t dev_id;
38 	uint8_t port_id;
39 	struct test_perf *t;
40 };
41 
42 struct crypto_adptr_data {
43 	uint8_t cdev_id;
44 	uint16_t cdev_qp_id;
45 	void **crypto_sess;
46 };
47 
48 struct dma_adptr_data {
49 	uint8_t dma_dev_id;
50 	uint16_t vchan_id;
51 };
52 
53 struct __rte_cache_aligned prod_data {
54 	uint8_t dev_id;
55 	uint8_t port_id;
56 	uint8_t queue_id;
57 	struct crypto_adptr_data ca;
58 	struct dma_adptr_data da;
59 	struct test_perf *t;
60 };
61 
62 struct __rte_cache_aligned test_perf {
63 	/* Don't change the offset of "done". Signal handler use this memory
64 	 * to terminate all lcores work.
65 	 */
66 	int done;
67 	uint64_t outstand_pkts;
68 	uint8_t nb_workers;
69 	enum evt_test_result result;
70 	uint32_t nb_flows;
71 	uint64_t nb_pkts;
72 	struct rte_mempool *pool;
73 	struct prod_data prod[EVT_MAX_PORTS];
74 	struct worker_data worker[EVT_MAX_PORTS];
75 	struct evt_options *opt;
76 	alignas(RTE_CACHE_LINE_SIZE) uint8_t sched_type_list[EVT_MAX_STAGES];
77 	alignas(RTE_CACHE_LINE_SIZE) struct rte_event_timer_adapter *timer_adptr[
78 		RTE_EVENT_TIMER_ADAPTER_NUM_MAX];
79 	struct rte_mempool *ca_op_pool;
80 	struct rte_mempool *ca_sess_pool;
81 	struct rte_mempool *ca_asym_sess_pool;
82 	struct rte_mempool *ca_vector_pool;
83 };
84 
85 struct __rte_cache_aligned perf_elt {
86 	union {
87 		struct rte_event_timer tim;
88 		struct {
89 			char pad[offsetof(struct rte_event_timer, user_meta)];
90 			uint64_t timestamp;
91 		};
92 	};
93 };
94 
95 #define BURST_SIZE 16
96 #define MAX_PROD_ENQ_BURST_SIZE 128
97 
98 #define PERF_WORKER_INIT\
99 	struct worker_data *w  = arg;\
100 	struct test_perf *t = w->t;\
101 	struct evt_options *opt = t->opt;\
102 	const uint8_t dev = w->dev_id;\
103 	const uint8_t port = w->port_id;\
104 	const uint8_t prod_timer_type = \
105 		opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR;\
106 	uint8_t *const sched_type_list = &t->sched_type_list[0];\
107 	const enum evt_prod_type prod_type = opt->prod_type;\
108 	struct rte_mempool *const pool = t->pool;\
109 	const uint8_t nb_stages = t->opt->nb_stages;\
110 	const uint8_t laststage = nb_stages - 1;\
111 	uint8_t cnt = 0;\
112 	alignas(RTE_CACHE_LINE_SIZE) void *bufs[16];\
113 	int const sz = RTE_DIM(bufs);\
114 	uint8_t stage;\
115 	struct perf_elt *pe = NULL;\
116 	if (opt->verbose_level > 1)\
117 		printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\
118 				rte_lcore_id(), dev, port)
119 
120 static __rte_always_inline void
121 perf_mark_fwd_latency(enum evt_prod_type prod_type, struct rte_event *const ev)
122 {
123 	struct perf_elt *pe;
124 
125 	if (prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) {
126 		struct rte_crypto_op *op = ev->event_ptr;
127 		struct rte_mbuf *m;
128 
129 		if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
130 			if (op->sym->m_dst == NULL)
131 				m = op->sym->m_src;
132 			else
133 				m = op->sym->m_dst;
134 
135 			pe = rte_pktmbuf_mtod(m, struct perf_elt *);
136 		} else {
137 			pe = RTE_PTR_ADD(op->asym->modex.result.data,
138 					 op->asym->modex.result.length);
139 		}
140 		pe->timestamp = rte_get_timer_cycles();
141 	} else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
142 		struct rte_event_dma_adapter_op *op = ev->event_ptr;
143 
144 		op->user_meta = rte_get_timer_cycles();
145 	} else {
146 		pe = ev->event_ptr;
147 		pe->timestamp = rte_get_timer_cycles();
148 	}
149 }
150 
151 static __rte_always_inline int
152 perf_handle_crypto_ev(struct rte_event *ev)
153 {
154 	struct rte_crypto_op *op = ev->event_ptr;
155 
156 	if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) {
157 		rte_crypto_op_free(op);
158 		return op->status;
159 	}
160 
161 	return 0;
162 }
163 
164 static __rte_always_inline struct perf_elt *
165 perf_elt_from_vec_get(struct rte_event_vector *vec)
166 {
167 	/* Timestamp for vector event stored in first element */
168 	struct rte_crypto_op *cop = vec->ptrs[0];
169 	struct rte_mbuf *m;
170 
171 	if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
172 		m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst;
173 		return rte_pktmbuf_mtod(m, struct perf_elt *);
174 	} else {
175 		return RTE_PTR_ADD(cop->asym->modex.result.data, cop->asym->modex.result.length);
176 	}
177 }
178 
179 static __rte_always_inline int
180 perf_handle_crypto_vector_ev(struct rte_event *ev, struct perf_elt **pe,
181 		const int enable_fwd_latency)
182 {
183 	struct rte_event_vector *vec = ev->vec;
184 	struct rte_crypto_op *cop;
185 	struct rte_mbuf *m;
186 	int i, n = 0;
187 	void *data;
188 
189 	for (i = 0; i < vec->nb_elem; i++) {
190 		cop = vec->ptrs[i];
191 		if (unlikely(cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) {
192 			if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
193 				m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst;
194 				rte_pktmbuf_free(m);
195 			} else {
196 				data = cop->asym->modex.result.data;
197 				rte_mempool_put(rte_mempool_from_obj(data), data);
198 			}
199 			rte_crypto_op_free(cop);
200 			continue;
201 		}
202 		vec->ptrs[n++] = cop;
203 	}
204 
205 	/* All cops failed, free the vector */
206 	if (n == 0) {
207 		rte_mempool_put(rte_mempool_from_obj(vec), vec);
208 		return -ENOENT;
209 	}
210 
211 	vec->nb_elem = n;
212 
213 	/* Forward latency not enabled - perf data will be not accessed */
214 	if (!enable_fwd_latency)
215 		return 0;
216 
217 	/* Get pointer to perf data */
218 	*pe = perf_elt_from_vec_get(vec);
219 
220 	return 0;
221 }
222 
223 static __rte_always_inline int
224 perf_process_last_stage(struct rte_mempool *const pool, enum evt_prod_type prod_type,
225 			struct rte_event *const ev, struct worker_data *const w,
226 			void *bufs[], int const buf_sz, uint8_t count)
227 {
228 	void *to_free_in_bulk;
229 
230 	/* release fence here ensures event_prt is
231 	 * stored before updating the number of
232 	 * processed packets for worker lcores
233 	 */
234 	rte_atomic_thread_fence(rte_memory_order_release);
235 	w->processed_pkts++;
236 
237 	if (prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) {
238 		struct rte_crypto_op *op = ev->event_ptr;
239 		struct rte_mbuf *m;
240 
241 		if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
242 			if (op->sym->m_dst == NULL)
243 				m = op->sym->m_src;
244 			else
245 				m = op->sym->m_dst;
246 
247 			to_free_in_bulk = m;
248 		} else {
249 			to_free_in_bulk = op->asym->modex.result.data;
250 		}
251 		rte_crypto_op_free(op);
252 	} else {
253 		to_free_in_bulk = ev->event_ptr;
254 	}
255 
256 	bufs[count++] = to_free_in_bulk;
257 	if (unlikely(count == buf_sz)) {
258 		count = 0;
259 		rte_mempool_put_bulk(pool, bufs, buf_sz);
260 	}
261 
262 	return count;
263 }
264 
265 static __rte_always_inline uint8_t
266 perf_process_last_stage_latency(struct rte_mempool *const pool, enum evt_prod_type prod_type,
267 				struct rte_event *const ev, struct worker_data *const w,
268 				void *bufs[], int const buf_sz, uint8_t count)
269 {
270 	uint64_t latency, tstamp;
271 	struct perf_elt *pe;
272 	void *to_free_in_bulk;
273 
274 	/* Release fence here ensures event_prt is stored before updating the number of processed
275 	 * packets for worker lcores.
276 	 */
277 	rte_atomic_thread_fence(rte_memory_order_release);
278 	w->processed_pkts++;
279 
280 	if (prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) {
281 		struct rte_crypto_op *op = ev->event_ptr;
282 		struct rte_mbuf *m;
283 
284 		if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
285 			if (op->sym->m_dst == NULL)
286 				m = op->sym->m_src;
287 			else
288 				m = op->sym->m_dst;
289 
290 			to_free_in_bulk = m;
291 			pe = rte_pktmbuf_mtod(m, struct perf_elt *);
292 		} else {
293 			pe = RTE_PTR_ADD(op->asym->modex.result.data,
294 					 op->asym->modex.result.length);
295 			to_free_in_bulk = op->asym->modex.result.data;
296 		}
297 		tstamp = pe->timestamp;
298 		rte_crypto_op_free(op);
299 	} else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
300 		struct rte_event_dma_adapter_op *op = ev->event_ptr;
301 
302 		to_free_in_bulk = op;
303 		tstamp = op->user_meta;
304 	} else {
305 		pe = ev->event_ptr;
306 		tstamp = pe->timestamp;
307 		to_free_in_bulk = pe;
308 	}
309 
310 	latency = rte_get_timer_cycles() - tstamp;
311 	w->latency += latency;
312 
313 	bufs[count++] = to_free_in_bulk;
314 	if (unlikely(count == buf_sz)) {
315 		count = 0;
316 		rte_mempool_put_bulk(pool, bufs, buf_sz);
317 	}
318 
319 	return count;
320 }
321 
322 static __rte_always_inline void
323 perf_process_vector_last_stage(struct rte_mempool *const pool,
324 		struct rte_mempool *const ca_pool, struct rte_event *const ev,
325 		struct worker_data *const w, const bool enable_fwd_latency)
326 {
327 	struct rte_event_vector *vec = ev->vec;
328 	struct rte_crypto_op *cop;
329 	void *bufs[vec->nb_elem];
330 	struct perf_elt *pe;
331 	uint64_t latency;
332 	int i;
333 
334 	/* Release fence here ensures event_prt is stored before updating the number of processed
335 	 * packets for worker lcores.
336 	 */
337 	rte_atomic_thread_fence(rte_memory_order_release);
338 	w->processed_pkts += vec->nb_elem;
339 
340 	if (enable_fwd_latency) {
341 		pe = perf_elt_from_vec_get(vec);
342 		latency = rte_get_timer_cycles() - pe->timestamp;
343 		w->latency += latency;
344 	}
345 
346 	for (i = 0; i < vec->nb_elem; i++) {
347 		cop = vec->ptrs[i];
348 		if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC)
349 			bufs[i] = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst;
350 		else
351 			bufs[i] = cop->asym->modex.result.data;
352 	}
353 
354 	rte_mempool_put_bulk(pool, bufs, vec->nb_elem);
355 	rte_mempool_put_bulk(ca_pool, (void * const *)vec->ptrs, vec->nb_elem);
356 	rte_mempool_put(rte_mempool_from_obj(vec), vec);
357 }
358 
359 static inline int
360 perf_nb_event_ports(struct evt_options *opt)
361 {
362 	return evt_nr_active_lcores(opt->wlcores) +
363 			evt_nr_active_lcores(opt->plcores);
364 }
365 
366 int perf_test_result(struct evt_test *test, struct evt_options *opt);
367 int perf_opt_check(struct evt_options *opt, uint64_t nb_queues);
368 int perf_test_setup(struct evt_test *test, struct evt_options *opt);
369 int perf_ethdev_setup(struct evt_test *test, struct evt_options *opt);
370 int perf_cryptodev_setup(struct evt_test *test, struct evt_options *opt);
371 int perf_dmadev_setup(struct evt_test *test, struct evt_options *opt);
372 int perf_mempool_setup(struct evt_test *test, struct evt_options *opt);
373 int perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
374 				uint8_t stride, uint8_t nb_queues,
375 				const struct rte_event_port_conf *port_conf);
376 int perf_event_dev_service_setup(uint8_t dev_id);
377 int perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
378 		int (*worker)(void *));
379 void perf_opt_dump(struct evt_options *opt, uint8_t nb_queues);
380 void perf_test_destroy(struct evt_test *test, struct evt_options *opt);
381 void perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt);
382 void perf_cryptodev_destroy(struct evt_test *test, struct evt_options *opt);
383 void perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt);
384 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt);
385 void perf_ethdev_rx_stop(struct evt_test *test, struct evt_options *opt);
386 void perf_mempool_destroy(struct evt_test *test, struct evt_options *opt);
387 void perf_worker_cleanup(struct rte_mempool *const pool, uint8_t dev_id,
388 			 uint8_t port_id, struct rte_event events[],
389 			 uint16_t nb_enq, uint16_t nb_deq);
390 
391 #endif /* _TEST_PERF_COMMON_ */
392