xref: /dpdk/app/test-eventdev/test_perf_common.h (revision 9ad3a41ab2a10db0059e1decdbf3ec038f348e08)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Cavium, Inc
3  */
4 
5 #ifndef _TEST_PERF_COMMON_
6 #define _TEST_PERF_COMMON_
7 
8 #include <stdio.h>
9 #include <stdbool.h>
10 #include <unistd.h>
11 
12 #include <rte_cryptodev.h>
13 #include <rte_cycles.h>
14 #include <rte_ethdev.h>
15 #include <rte_eventdev.h>
16 #include <rte_event_crypto_adapter.h>
17 #include <rte_event_eth_rx_adapter.h>
18 #include <rte_event_timer_adapter.h>
19 #include <rte_lcore.h>
20 #include <rte_malloc.h>
21 #include <rte_mempool.h>
22 #include <rte_prefetch.h>
23 
24 #include "evt_common.h"
25 #include "evt_options.h"
26 #include "evt_test.h"
27 
28 #define TEST_PERF_CA_ID 0
29 
30 struct test_perf;
31 
32 struct worker_data {
33 	uint64_t processed_pkts;
34 	uint64_t latency;
35 	uint8_t dev_id;
36 	uint8_t port_id;
37 	struct test_perf *t;
38 } __rte_cache_aligned;
39 
40 struct crypto_adptr_data {
41 	uint8_t cdev_id;
42 	uint16_t cdev_qp_id;
43 	struct rte_cryptodev_sym_session **crypto_sess;
44 };
45 struct prod_data {
46 	uint8_t dev_id;
47 	uint8_t port_id;
48 	uint8_t queue_id;
49 	struct crypto_adptr_data ca;
50 	struct test_perf *t;
51 } __rte_cache_aligned;
52 
53 struct test_perf {
54 	/* Don't change the offset of "done". Signal handler use this memory
55 	 * to terminate all lcores work.
56 	 */
57 	int done;
58 	uint64_t outstand_pkts;
59 	uint8_t nb_workers;
60 	enum evt_test_result result;
61 	uint32_t nb_flows;
62 	uint64_t nb_pkts;
63 	struct rte_mempool *pool;
64 	struct prod_data prod[EVT_MAX_PORTS];
65 	struct worker_data worker[EVT_MAX_PORTS];
66 	struct evt_options *opt;
67 	uint8_t sched_type_list[EVT_MAX_STAGES] __rte_cache_aligned;
68 	struct rte_event_timer_adapter *timer_adptr[
69 		RTE_EVENT_TIMER_ADAPTER_NUM_MAX] __rte_cache_aligned;
70 	struct rte_mempool *ca_op_pool;
71 	struct rte_mempool *ca_sess_pool;
72 	struct rte_mempool *ca_sess_priv_pool;
73 } __rte_cache_aligned;
74 
75 struct perf_elt {
76 	union {
77 		struct rte_event_timer tim;
78 		struct {
79 			char pad[offsetof(struct rte_event_timer, user_meta)];
80 			uint64_t timestamp;
81 		};
82 	};
83 } __rte_cache_aligned;
84 
85 #define BURST_SIZE 16
86 #define MAX_PROD_ENQ_BURST_SIZE 128
87 
88 #define PERF_WORKER_INIT\
89 	struct worker_data *w  = arg;\
90 	struct test_perf *t = w->t;\
91 	struct evt_options *opt = t->opt;\
92 	const uint8_t dev = w->dev_id;\
93 	const uint8_t port = w->port_id;\
94 	const uint8_t prod_timer_type = \
95 		opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR;\
96 	const uint8_t prod_crypto_type = \
97 		opt->prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR;\
98 	uint8_t *const sched_type_list = &t->sched_type_list[0];\
99 	struct rte_mempool *const pool = t->pool;\
100 	const uint8_t nb_stages = t->opt->nb_stages;\
101 	const uint8_t laststage = nb_stages - 1;\
102 	uint8_t cnt = 0;\
103 	void *bufs[16] __rte_cache_aligned;\
104 	int const sz = RTE_DIM(bufs);\
105 	if (opt->verbose_level > 1)\
106 		printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\
107 				rte_lcore_id(), dev, port)
108 
109 static __rte_always_inline int
110 perf_process_last_stage(struct rte_mempool *const pool,
111 		struct rte_event *const ev, struct worker_data *const w,
112 		void *bufs[], int const buf_sz, uint8_t count)
113 {
114 	bufs[count++] = ev->event_ptr;
115 
116 	/* release fence here ensures event_prt is
117 	 * stored before updating the number of
118 	 * processed packets for worker lcores
119 	 */
120 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
121 	w->processed_pkts++;
122 
123 	if (unlikely(count == buf_sz)) {
124 		count = 0;
125 		rte_mempool_put_bulk(pool, bufs, buf_sz);
126 	}
127 	return count;
128 }
129 
130 static __rte_always_inline uint8_t
131 perf_process_last_stage_latency(struct rte_mempool *const pool,
132 		struct rte_event *const ev, struct worker_data *const w,
133 		void *bufs[], int const buf_sz, uint8_t count)
134 {
135 	uint64_t latency;
136 	struct perf_elt *const m = ev->event_ptr;
137 
138 	bufs[count++] = ev->event_ptr;
139 
140 	/* release fence here ensures event_prt is
141 	 * stored before updating the number of
142 	 * processed packets for worker lcores
143 	 */
144 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
145 	w->processed_pkts++;
146 
147 	if (unlikely(count == buf_sz)) {
148 		count = 0;
149 		latency = rte_get_timer_cycles() - m->timestamp;
150 		rte_mempool_put_bulk(pool, bufs, buf_sz);
151 	} else {
152 		latency = rte_get_timer_cycles() - m->timestamp;
153 	}
154 
155 	w->latency += latency;
156 	return count;
157 }
158 
159 
160 static inline int
161 perf_nb_event_ports(struct evt_options *opt)
162 {
163 	return evt_nr_active_lcores(opt->wlcores) +
164 			evt_nr_active_lcores(opt->plcores);
165 }
166 
167 int perf_test_result(struct evt_test *test, struct evt_options *opt);
168 int perf_opt_check(struct evt_options *opt, uint64_t nb_queues);
169 int perf_test_setup(struct evt_test *test, struct evt_options *opt);
170 int perf_ethdev_setup(struct evt_test *test, struct evt_options *opt);
171 int perf_cryptodev_setup(struct evt_test *test, struct evt_options *opt);
172 int perf_mempool_setup(struct evt_test *test, struct evt_options *opt);
173 int perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
174 				uint8_t stride, uint8_t nb_queues,
175 				const struct rte_event_port_conf *port_conf);
176 int perf_event_dev_service_setup(uint8_t dev_id);
177 int perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
178 		int (*worker)(void *));
179 void perf_opt_dump(struct evt_options *opt, uint8_t nb_queues);
180 void perf_test_destroy(struct evt_test *test, struct evt_options *opt);
181 void perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt);
182 void perf_cryptodev_destroy(struct evt_test *test, struct evt_options *opt);
183 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt);
184 void perf_mempool_destroy(struct evt_test *test, struct evt_options *opt);
185 
186 #endif /* _TEST_PERF_COMMON_ */
187