xref: /dpdk/drivers/net/mlx5/mlx5_txpp.c (revision 27918f0d53f482fa97f2a8dcd5792c23094abcec)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 #include <fcntl.h>
5 #include <stdint.h>
6 
7 #include <rte_ether.h>
8 #include <ethdev_driver.h>
9 #include <rte_interrupts.h>
10 #include <rte_alarm.h>
11 #include <rte_malloc.h>
12 #include <rte_cycles.h>
13 #include <rte_eal_paging.h>
14 
15 #include <mlx5_malloc.h>
16 #include <mlx5_common_devx.h>
17 
18 #include "mlx5.h"
19 #include "mlx5_rx.h"
20 #include "mlx5_tx.h"
21 #include "mlx5_common_os.h"
22 
23 static_assert(sizeof(struct mlx5_cqe_ts) == sizeof(rte_int128_t),
24 		"Wrong timestamp CQE part size");
25 
26 static const char * const mlx5_txpp_stat_names[] = {
27 	"tx_pp_missed_interrupt_errors", /* Missed service interrupt. */
28 	"tx_pp_rearm_queue_errors", /* Rearm Queue errors. */
29 	"tx_pp_clock_queue_errors", /* Clock Queue errors. */
30 	"tx_pp_timestamp_past_errors", /* Timestamp in the past. */
31 	"tx_pp_timestamp_future_errors", /* Timestamp in the distant future. */
32 	"tx_pp_timestamp_order_errors", /* Timestamp not in ascending order. */
33 	"tx_pp_jitter", /* Timestamp jitter (one Clock Queue completion). */
34 	"tx_pp_wander", /* Timestamp wander (half of Clock Queue CQEs). */
35 	"tx_pp_sync_lost", /* Scheduling synchronization lost. */
36 };
37 
38 /* Destroy Event Queue Notification Channel. */
39 static void
40 mlx5_txpp_destroy_event_channel(struct mlx5_dev_ctx_shared *sh)
41 {
42 	if (sh->txpp.echan) {
43 		mlx5_os_devx_destroy_event_channel(sh->txpp.echan);
44 		sh->txpp.echan = NULL;
45 	}
46 }
47 
48 /* Create Event Queue Notification Channel. */
49 static int
50 mlx5_txpp_create_event_channel(struct mlx5_dev_ctx_shared *sh)
51 {
52 	MLX5_ASSERT(!sh->txpp.echan);
53 	sh->txpp.echan = mlx5_os_devx_create_event_channel(sh->cdev->ctx,
54 			MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA);
55 	if (!sh->txpp.echan) {
56 		rte_errno = errno;
57 		DRV_LOG(ERR, "Failed to create event channel %d.", rte_errno);
58 		return -rte_errno;
59 	}
60 	return 0;
61 }
62 
63 static void
64 mlx5_txpp_free_pp_index(struct mlx5_dev_ctx_shared *sh)
65 {
66 #ifdef HAVE_MLX5DV_PP_ALLOC
67 	if (sh->txpp.pp) {
68 		mlx5_glue->dv_free_pp(sh->txpp.pp);
69 		sh->txpp.pp = NULL;
70 		sh->txpp.pp_id = 0;
71 	}
72 #else
73 	RTE_SET_USED(sh);
74 	DRV_LOG(ERR, "Freeing pacing index is not supported.");
75 #endif
76 }
77 
78 /* Allocate Packet Pacing index from kernel via mlx5dv call. */
79 static int
80 mlx5_txpp_alloc_pp_index(struct mlx5_dev_ctx_shared *sh)
81 {
82 #ifdef HAVE_MLX5DV_PP_ALLOC
83 	uint32_t pp[MLX5_ST_SZ_DW(set_pp_rate_limit_context)];
84 	uint64_t rate;
85 
86 	MLX5_ASSERT(!sh->txpp.pp);
87 	memset(&pp, 0, sizeof(pp));
88 	rate = NS_PER_S / sh->txpp.tick;
89 	if (rate * sh->txpp.tick != NS_PER_S)
90 		DRV_LOG(WARNING, "Packet pacing frequency is not precise.");
91 	if (sh->txpp.test) {
92 		uint32_t len;
93 
94 		len = RTE_MAX(MLX5_TXPP_TEST_PKT_SIZE,
95 			      (size_t)RTE_ETHER_MIN_LEN);
96 		MLX5_SET(set_pp_rate_limit_context, &pp,
97 			 burst_upper_bound, len);
98 		MLX5_SET(set_pp_rate_limit_context, &pp,
99 			 typical_packet_size, len);
100 		/* Convert packets per second into kilobits. */
101 		rate = (rate * len) / (1000ul / CHAR_BIT);
102 		DRV_LOG(INFO, "Packet pacing rate set to %" PRIu64, rate);
103 	}
104 	MLX5_SET(set_pp_rate_limit_context, &pp, rate_limit, rate);
105 	MLX5_SET(set_pp_rate_limit_context, &pp, rate_mode,
106 		 sh->txpp.test ? MLX5_DATA_RATE : MLX5_WQE_RATE);
107 	sh->txpp.pp = mlx5_glue->dv_alloc_pp
108 				(sh->cdev->ctx, sizeof(pp), &pp,
109 				 MLX5DV_PP_ALLOC_FLAGS_DEDICATED_INDEX);
110 	if (sh->txpp.pp == NULL) {
111 		DRV_LOG(ERR, "Failed to allocate packet pacing index.");
112 		rte_errno = errno;
113 		return -errno;
114 	}
115 	if (!((struct mlx5dv_pp *)sh->txpp.pp)->index) {
116 		DRV_LOG(ERR, "Zero packet pacing index allocated.");
117 		mlx5_txpp_free_pp_index(sh);
118 		rte_errno = ENOTSUP;
119 		return -ENOTSUP;
120 	}
121 	sh->txpp.pp_id = ((struct mlx5dv_pp *)(sh->txpp.pp))->index;
122 	return 0;
123 #else
124 	RTE_SET_USED(sh);
125 	DRV_LOG(ERR, "Allocating pacing index is not supported.");
126 	rte_errno = ENOTSUP;
127 	return -ENOTSUP;
128 #endif
129 }
130 
131 static void
132 mlx5_txpp_destroy_send_queue(struct mlx5_txpp_wq *wq)
133 {
134 	mlx5_devx_sq_destroy(&wq->sq_obj);
135 	mlx5_devx_cq_destroy(&wq->cq_obj);
136 	memset(wq, 0, sizeof(*wq));
137 }
138 
139 static void
140 mlx5_txpp_destroy_rearm_queue(struct mlx5_dev_ctx_shared *sh)
141 {
142 	struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue;
143 
144 	mlx5_txpp_destroy_send_queue(wq);
145 }
146 
147 static void
148 mlx5_txpp_destroy_clock_queue(struct mlx5_dev_ctx_shared *sh)
149 {
150 	struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue;
151 
152 	mlx5_txpp_destroy_send_queue(wq);
153 	if (sh->txpp.tsa) {
154 		mlx5_free(sh->txpp.tsa);
155 		sh->txpp.tsa = NULL;
156 	}
157 }
158 
159 static void
160 mlx5_txpp_doorbell_rearm_queue(struct mlx5_dev_ctx_shared *sh, uint16_t ci)
161 {
162 	struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue;
163 	struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->sq_obj.wqes;
164 	union {
165 		uint32_t w32[2];
166 		uint64_t w64;
167 	} cs;
168 
169 	wq->sq_ci = ci + 1;
170 	cs.w32[0] = rte_cpu_to_be_32(rte_be_to_cpu_32
171 			(wqe[ci & (wq->sq_size - 1)].ctrl[0]) | (ci - 1) << 8);
172 	cs.w32[1] = wqe[ci & (wq->sq_size - 1)].ctrl[1];
173 	/* Update SQ doorbell record with new SQ ci. */
174 	mlx5_doorbell_ring(&sh->tx_uar.bf_db, cs.w64, wq->sq_ci,
175 			   wq->sq_obj.db_rec, !sh->tx_uar.dbnc);
176 }
177 
178 static void
179 mlx5_txpp_fill_wqe_rearm_queue(struct mlx5_dev_ctx_shared *sh)
180 {
181 	struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue;
182 	struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->sq_obj.wqes;
183 	uint32_t i;
184 
185 	for (i = 0; i < wq->sq_size; i += 2) {
186 		struct mlx5_wqe_cseg *cs;
187 		struct mlx5_wqe_qseg *qs;
188 		uint32_t index;
189 
190 		/* Build SEND_EN request with slave WQE index. */
191 		cs = &wqe[i + 0].cseg;
192 		cs->opcode = RTE_BE32(MLX5_OPCODE_SEND_EN | 0);
193 		cs->sq_ds = rte_cpu_to_be_32((wq->sq_obj.sq->id << 8) | 2);
194 		cs->flags = RTE_BE32(MLX5_COMP_ALWAYS <<
195 				     MLX5_COMP_MODE_OFFSET);
196 		cs->misc = RTE_BE32(0);
197 		qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg));
198 		index = (i * MLX5_TXPP_REARM / 2 + MLX5_TXPP_REARM) &
199 			((1 << MLX5_WQ_INDEX_WIDTH) - 1);
200 		qs->max_index = rte_cpu_to_be_32(index);
201 		qs->qpn_cqn =
202 			   rte_cpu_to_be_32(sh->txpp.clock_queue.sq_obj.sq->id);
203 		/* Build WAIT request with slave CQE index. */
204 		cs = &wqe[i + 1].cseg;
205 		cs->opcode = RTE_BE32(MLX5_OPCODE_WAIT | 0);
206 		cs->sq_ds = rte_cpu_to_be_32((wq->sq_obj.sq->id << 8) | 2);
207 		cs->flags = RTE_BE32(MLX5_COMP_ONLY_ERR <<
208 				     MLX5_COMP_MODE_OFFSET);
209 		cs->misc = RTE_BE32(0);
210 		qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg));
211 		index = (i * MLX5_TXPP_REARM / 2 + MLX5_TXPP_REARM / 2) &
212 			((1 << MLX5_CQ_INDEX_WIDTH) - 1);
213 		qs->max_index = rte_cpu_to_be_32(index);
214 		qs->qpn_cqn =
215 			   rte_cpu_to_be_32(sh->txpp.clock_queue.cq_obj.cq->id);
216 	}
217 }
218 
219 /* Creates the Rearm Queue to fire the requests to Clock Queue in realtime. */
220 static int
221 mlx5_txpp_create_rearm_queue(struct mlx5_dev_ctx_shared *sh)
222 {
223 	struct mlx5_devx_create_sq_attr sq_attr = {
224 		.cd_master = 1,
225 		.state = MLX5_SQC_STATE_RST,
226 		.tis_lst_sz = 1,
227 		.tis_num = sh->tis[0]->id,
228 		.wq_attr = (struct mlx5_devx_wq_attr){
229 			.pd = sh->cdev->pdn,
230 			.uar_page =
231 				mlx5_os_get_devx_uar_page_id(sh->tx_uar.obj),
232 		},
233 		.ts_format = mlx5_ts_format_conv
234 				       (sh->cdev->config.hca_attr.sq_ts_format),
235 	};
236 	struct mlx5_devx_modify_sq_attr msq_attr = { 0 };
237 	struct mlx5_devx_cq_attr cq_attr = {
238 		.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar.obj),
239 	};
240 	struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue;
241 	int ret;
242 
243 	/* Create completion queue object for Rearm Queue. */
244 	ret = mlx5_devx_cq_create(sh->cdev->ctx, &wq->cq_obj,
245 				  log2above(MLX5_TXPP_REARM_CQ_SIZE), &cq_attr,
246 				  sh->numa_node);
247 	if (ret) {
248 		DRV_LOG(ERR, "Failed to create CQ for Rearm Queue.");
249 		return ret;
250 	}
251 	wq->cq_ci = 0;
252 	wq->arm_sn = 0;
253 	wq->sq_size = MLX5_TXPP_REARM_SQ_SIZE;
254 	MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size)));
255 	/* Create send queue object for Rearm Queue. */
256 	sq_attr.cqn = wq->cq_obj.cq->id;
257 	/* There should be no WQE leftovers in the cyclic queue. */
258 	ret = mlx5_devx_sq_create(sh->cdev->ctx, &wq->sq_obj,
259 				  log2above(MLX5_TXPP_REARM_SQ_SIZE), &sq_attr,
260 				  sh->numa_node);
261 	if (ret) {
262 		rte_errno = errno;
263 		DRV_LOG(ERR, "Failed to create SQ for Rearm Queue.");
264 		goto error;
265 	}
266 	/* Build the WQEs in the Send Queue before goto Ready state. */
267 	mlx5_txpp_fill_wqe_rearm_queue(sh);
268 	/* Change queue state to ready. */
269 	msq_attr.sq_state = MLX5_SQC_STATE_RST;
270 	msq_attr.state = MLX5_SQC_STATE_RDY;
271 	ret = mlx5_devx_cmd_modify_sq(wq->sq_obj.sq, &msq_attr);
272 	if (ret) {
273 		DRV_LOG(ERR, "Failed to set SQ ready state Rearm Queue.");
274 		goto error;
275 	}
276 	return 0;
277 error:
278 	ret = -rte_errno;
279 	mlx5_txpp_destroy_rearm_queue(sh);
280 	rte_errno = -ret;
281 	return ret;
282 }
283 
284 static void
285 mlx5_txpp_fill_wqe_clock_queue(struct mlx5_dev_ctx_shared *sh)
286 {
287 	struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue;
288 	struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->sq_obj.wqes;
289 	struct mlx5_wqe_cseg *cs = &wqe->cseg;
290 	uint32_t wqe_size, opcode, i;
291 	uint8_t *dst;
292 
293 	/* For test purposes fill the WQ with SEND inline packet. */
294 	if (sh->txpp.test) {
295 		wqe_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE +
296 				     MLX5_WQE_CSEG_SIZE +
297 				     2 * MLX5_WQE_ESEG_SIZE -
298 				     MLX5_ESEG_MIN_INLINE_SIZE,
299 				     MLX5_WSEG_SIZE);
300 		opcode = MLX5_OPCODE_SEND;
301 	} else {
302 		wqe_size = MLX5_WSEG_SIZE;
303 		opcode = MLX5_OPCODE_NOP;
304 	}
305 	cs->opcode = rte_cpu_to_be_32(opcode | 0); /* Index is ignored. */
306 	cs->sq_ds = rte_cpu_to_be_32((wq->sq_obj.sq->id << 8) |
307 				     (wqe_size / MLX5_WSEG_SIZE));
308 	cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET);
309 	cs->misc = RTE_BE32(0);
310 	wqe_size = RTE_ALIGN(wqe_size, MLX5_WQE_SIZE);
311 	if (sh->txpp.test) {
312 		struct mlx5_wqe_eseg *es = &wqe->eseg;
313 		struct rte_ether_hdr *eth_hdr;
314 		struct rte_ipv4_hdr *ip_hdr;
315 		struct rte_udp_hdr *udp_hdr;
316 
317 		/* Build the inline test packet pattern. */
318 		MLX5_ASSERT(wqe_size <= MLX5_WQE_SIZE_MAX);
319 		MLX5_ASSERT(MLX5_TXPP_TEST_PKT_SIZE >=
320 				(sizeof(struct rte_ether_hdr) +
321 				 sizeof(struct rte_ipv4_hdr)));
322 		es->flags = 0;
323 		es->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
324 		es->swp_offs = 0;
325 		es->metadata = 0;
326 		es->swp_flags = 0;
327 		es->mss = 0;
328 		es->inline_hdr_sz = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE);
329 		/* Build test packet L2 header (Ethernet). */
330 		dst = (uint8_t *)&es->inline_data;
331 		eth_hdr = (struct rte_ether_hdr *)dst;
332 		rte_eth_random_addr(&eth_hdr->dst_addr.addr_bytes[0]);
333 		rte_eth_random_addr(&eth_hdr->src_addr.addr_bytes[0]);
334 		eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
335 		/* Build test packet L3 header (IP v4). */
336 		dst += sizeof(struct rte_ether_hdr);
337 		ip_hdr = (struct rte_ipv4_hdr *)dst;
338 		ip_hdr->version_ihl = RTE_IPV4_VHL_DEF;
339 		ip_hdr->type_of_service = 0;
340 		ip_hdr->fragment_offset = 0;
341 		ip_hdr->time_to_live = 64;
342 		ip_hdr->next_proto_id = IPPROTO_UDP;
343 		ip_hdr->packet_id = 0;
344 		ip_hdr->total_length = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE -
345 						sizeof(struct rte_ether_hdr));
346 		/* use RFC5735 / RFC2544 reserved network test addresses */
347 		ip_hdr->src_addr = RTE_BE32((198U << 24) | (18 << 16) |
348 					    (0 << 8) | 1);
349 		ip_hdr->dst_addr = RTE_BE32((198U << 24) | (18 << 16) |
350 					    (0 << 8) | 2);
351 		if (MLX5_TXPP_TEST_PKT_SIZE <
352 					(sizeof(struct rte_ether_hdr) +
353 					 sizeof(struct rte_ipv4_hdr) +
354 					 sizeof(struct rte_udp_hdr)))
355 			goto wcopy;
356 		/* Build test packet L4 header (UDP). */
357 		dst += sizeof(struct rte_ipv4_hdr);
358 		udp_hdr = (struct rte_udp_hdr *)dst;
359 		udp_hdr->src_port = RTE_BE16(9); /* RFC863 Discard. */
360 		udp_hdr->dst_port = RTE_BE16(9);
361 		udp_hdr->dgram_len = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE -
362 					      sizeof(struct rte_ether_hdr) -
363 					      sizeof(struct rte_ipv4_hdr));
364 		udp_hdr->dgram_cksum = 0;
365 		/* Fill the test packet data. */
366 		dst += sizeof(struct rte_udp_hdr);
367 		for (i = sizeof(struct rte_ether_hdr) +
368 			sizeof(struct rte_ipv4_hdr) +
369 			sizeof(struct rte_udp_hdr);
370 				i < MLX5_TXPP_TEST_PKT_SIZE; i++)
371 			*dst++ = (uint8_t)(i & 0xFF);
372 	}
373 wcopy:
374 	/* Duplicate the pattern to the next WQEs. */
375 	dst = (uint8_t *)(uintptr_t)wq->sq_obj.umem_buf;
376 	for (i = 1; i < MLX5_TXPP_CLKQ_SIZE; i++) {
377 		dst += wqe_size;
378 		rte_memcpy(dst, (void *)(uintptr_t)wq->sq_obj.umem_buf,
379 			   wqe_size);
380 	}
381 }
382 
383 /* Creates the Clock Queue for packet pacing, returns zero on success. */
384 static int
385 mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh)
386 {
387 	struct mlx5_devx_create_sq_attr sq_attr = { 0 };
388 	struct mlx5_devx_modify_sq_attr msq_attr = { 0 };
389 	struct mlx5_devx_cq_attr cq_attr = {
390 		.use_first_only = 1,
391 		.overrun_ignore = 1,
392 		.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar.obj),
393 	};
394 	struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue;
395 	int ret;
396 
397 	sh->txpp.tsa = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
398 				   MLX5_TXPP_REARM_SQ_SIZE *
399 				   sizeof(struct mlx5_txpp_ts),
400 				   0, sh->numa_node);
401 	if (!sh->txpp.tsa) {
402 		DRV_LOG(ERR, "Failed to allocate memory for CQ stats.");
403 		return -ENOMEM;
404 	}
405 	sh->txpp.ts_p = 0;
406 	sh->txpp.ts_n = 0;
407 	/* Create completion queue object for Clock Queue. */
408 	ret = mlx5_devx_cq_create(sh->cdev->ctx, &wq->cq_obj,
409 				  log2above(MLX5_TXPP_CLKQ_SIZE), &cq_attr,
410 				  sh->numa_node);
411 	if (ret) {
412 		DRV_LOG(ERR, "Failed to create CQ for Clock Queue.");
413 		goto error;
414 	}
415 	wq->cq_ci = 0;
416 	/* Allocate memory buffer for Send Queue WQEs. */
417 	if (sh->txpp.test) {
418 		wq->sq_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE +
419 					MLX5_WQE_CSEG_SIZE +
420 					2 * MLX5_WQE_ESEG_SIZE -
421 					MLX5_ESEG_MIN_INLINE_SIZE,
422 					MLX5_WQE_SIZE) / MLX5_WQE_SIZE;
423 		wq->sq_size *= MLX5_TXPP_CLKQ_SIZE;
424 	} else {
425 		wq->sq_size = MLX5_TXPP_CLKQ_SIZE;
426 	}
427 	/* There should not be WQE leftovers in the cyclic queue. */
428 	MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size)));
429 	/* Create send queue object for Clock Queue. */
430 	if (sh->txpp.test) {
431 		sq_attr.tis_lst_sz = 1;
432 		sq_attr.tis_num = sh->tis[0]->id;
433 		sq_attr.non_wire = 0;
434 		sq_attr.static_sq_wq = 1;
435 	} else {
436 		sq_attr.non_wire = 1;
437 		sq_attr.static_sq_wq = 1;
438 	}
439 	sq_attr.cqn = wq->cq_obj.cq->id;
440 	sq_attr.packet_pacing_rate_limit_index = sh->txpp.pp_id;
441 	sq_attr.wq_attr.cd_slave = 1;
442 	sq_attr.wq_attr.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar.obj);
443 	sq_attr.wq_attr.pd = sh->cdev->pdn;
444 	sq_attr.ts_format =
445 		mlx5_ts_format_conv(sh->cdev->config.hca_attr.sq_ts_format);
446 	ret = mlx5_devx_sq_create(sh->cdev->ctx, &wq->sq_obj,
447 				  log2above(wq->sq_size),
448 				  &sq_attr, sh->numa_node);
449 	if (ret) {
450 		rte_errno = errno;
451 		DRV_LOG(ERR, "Failed to create SQ for Clock Queue.");
452 		goto error;
453 	}
454 	/* Build the WQEs in the Send Queue before goto Ready state. */
455 	mlx5_txpp_fill_wqe_clock_queue(sh);
456 	/* Change queue state to ready. */
457 	msq_attr.sq_state = MLX5_SQC_STATE_RST;
458 	msq_attr.state = MLX5_SQC_STATE_RDY;
459 	wq->sq_ci = 0;
460 	ret = mlx5_devx_cmd_modify_sq(wq->sq_obj.sq, &msq_attr);
461 	if (ret) {
462 		DRV_LOG(ERR, "Failed to set SQ ready state Clock Queue.");
463 		goto error;
464 	}
465 	return 0;
466 error:
467 	ret = -rte_errno;
468 	mlx5_txpp_destroy_clock_queue(sh);
469 	rte_errno = -ret;
470 	return ret;
471 }
472 
473 /* Enable notification from the Rearm Queue CQ. */
474 static inline void
475 mlx5_txpp_cq_arm(struct mlx5_dev_ctx_shared *sh)
476 {
477 	struct mlx5_txpp_wq *aq = &sh->txpp.rearm_queue;
478 	uint32_t arm_sn = aq->arm_sn << MLX5_CQ_SQN_OFFSET;
479 	uint32_t db_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | aq->cq_ci;
480 	uint64_t db_be =
481 		rte_cpu_to_be_64(((uint64_t)db_hi << 32) | aq->cq_obj.cq->id);
482 
483 	mlx5_doorbell_ring(&sh->tx_uar.cq_db, db_be, db_hi,
484 			   &aq->cq_obj.db_rec[MLX5_CQ_ARM_DB], 0);
485 	aq->arm_sn++;
486 }
487 
488 #if defined(RTE_ARCH_X86_64)
489 static inline int
490 mlx5_atomic128_compare_exchange(rte_int128_t *dst,
491 				rte_int128_t *exp,
492 				const rte_int128_t *src)
493 {
494 	uint8_t res;
495 
496 	asm volatile (MPLOCKED
497 		      "cmpxchg16b %[dst];"
498 		      " sete %[res]"
499 		      : [dst] "=m" (dst->val[0]),
500 			"=a" (exp->val[0]),
501 			"=d" (exp->val[1]),
502 			[res] "=r" (res)
503 		      : "b" (src->val[0]),
504 			"c" (src->val[1]),
505 			"a" (exp->val[0]),
506 			"d" (exp->val[1]),
507 			"m" (dst->val[0])
508 		      : "memory");
509 
510 	return res;
511 }
512 #endif
513 
514 static inline void
515 mlx5_atomic_read_cqe(rte_int128_t *from, rte_int128_t *ts)
516 {
517 	/*
518 	 * The only CQE of Clock Queue is being continuously
519 	 * updated by hardware with specified rate. We must
520 	 * read timestamp and WQE completion index atomically.
521 	 */
522 #if defined(RTE_ARCH_X86_64)
523 	rte_int128_t src;
524 
525 	memset(&src, 0, sizeof(src));
526 	*ts = src;
527 	/* if (*from == *ts) *from = *src else *ts = *from; */
528 	mlx5_atomic128_compare_exchange(from, ts, &src);
529 #else
530 	uint64_t *cqe = (uint64_t *)from;
531 
532 	/*
533 	 * Power architecture does not support 16B compare-and-swap.
534 	 * ARM implements it in software, code below is more relevant.
535 	 */
536 	for (;;) {
537 		uint64_t tm, op;
538 		uint64_t *ps;
539 
540 		rte_compiler_barrier();
541 		tm = rte_atomic_load_explicit(cqe + 0, rte_memory_order_relaxed);
542 		op = rte_atomic_load_explicit(cqe + 1, rte_memory_order_relaxed);
543 		rte_compiler_barrier();
544 		if (tm != rte_atomic_load_explicit(cqe + 0, rte_memory_order_relaxed))
545 			continue;
546 		if (op != rte_atomic_load_explicit(cqe + 1, rte_memory_order_relaxed))
547 			continue;
548 		ps = (uint64_t *)ts;
549 		ps[0] = tm;
550 		ps[1] = op;
551 		return;
552 	}
553 #endif
554 }
555 
556 /* Stores timestamp in the cache structure to share data with datapath. */
557 static inline void
558 mlx5_txpp_cache_timestamp(struct mlx5_dev_ctx_shared *sh,
559 			   uint64_t ts, uint64_t ci)
560 {
561 	ci = ci << (64 - MLX5_CQ_INDEX_WIDTH);
562 	ci |= (ts << MLX5_CQ_INDEX_WIDTH) >> MLX5_CQ_INDEX_WIDTH;
563 	rte_compiler_barrier();
564 	rte_atomic_store_explicit(&sh->txpp.ts.ts, ts, rte_memory_order_relaxed);
565 	rte_atomic_store_explicit(&sh->txpp.ts.ci_ts, ci, rte_memory_order_relaxed);
566 	rte_wmb();
567 }
568 
569 /* Reads timestamp from Clock Queue CQE and stores in the cache. */
570 static inline void
571 mlx5_txpp_update_timestamp(struct mlx5_dev_ctx_shared *sh)
572 {
573 	struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue;
574 	struct mlx5_cqe *cqe = (struct mlx5_cqe *)(uintptr_t)wq->cq_obj.cqes;
575 	union {
576 		rte_int128_t u128;
577 		struct mlx5_cqe_ts cts;
578 	} to;
579 	uint64_t ts;
580 	uint16_t ci;
581 	uint8_t opcode;
582 
583 	mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128);
584 	opcode = MLX5_CQE_OPCODE(to.cts.op_own);
585 	if (opcode) {
586 		if (opcode != MLX5_CQE_INVALID) {
587 			/*
588 			 * Commit the error state if and only if
589 			 * we have got at least one actual completion.
590 			 */
591 			DRV_LOG(DEBUG,
592 				"Clock Queue error sync lost (%X).", opcode);
593 				rte_atomic_fetch_add_explicit(&sh->txpp.err_clock_queue,
594 				   1, rte_memory_order_relaxed);
595 			sh->txpp.sync_lost = 1;
596 		}
597 		return;
598 	}
599 	ci = rte_be_to_cpu_16(to.cts.wqe_counter);
600 	ts = rte_be_to_cpu_64(to.cts.timestamp);
601 	ts = mlx5_txpp_convert_rx_ts(sh, ts);
602 	wq->cq_ci += (ci - wq->sq_ci) & UINT16_MAX;
603 	wq->sq_ci = ci;
604 	mlx5_txpp_cache_timestamp(sh, ts, wq->cq_ci);
605 }
606 
607 /* Waits for the first completion on Clock Queue to init timestamp. */
608 static inline void
609 mlx5_txpp_init_timestamp(struct mlx5_dev_ctx_shared *sh)
610 {
611 	struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue;
612 	uint32_t wait;
613 
614 	sh->txpp.ts_p = 0;
615 	sh->txpp.ts_n = 0;
616 	for (wait = 0; wait < MLX5_TXPP_WAIT_INIT_TS; wait++) {
617 		mlx5_txpp_update_timestamp(sh);
618 		if (wq->sq_ci)
619 			return;
620 		/* Wait one millisecond and try again. */
621 		rte_delay_us_sleep(US_PER_S / MS_PER_S);
622 	}
623 	DRV_LOG(ERR, "Unable to initialize timestamp.");
624 	sh->txpp.sync_lost = 1;
625 }
626 
627 #ifdef HAVE_IBV_DEVX_EVENT
628 /* Gather statistics for timestamp from Clock Queue CQE. */
629 static inline void
630 mlx5_txpp_gather_timestamp(struct mlx5_dev_ctx_shared *sh)
631 {
632 	/* Check whether we have a valid timestamp. */
633 	if (!sh->txpp.clock_queue.sq_ci && !sh->txpp.ts_n)
634 		return;
635 	MLX5_ASSERT(sh->txpp.ts_p < MLX5_TXPP_REARM_SQ_SIZE);
636 	rte_atomic_store_explicit(&sh->txpp.tsa[sh->txpp.ts_p].ts,
637 			 sh->txpp.ts.ts, rte_memory_order_relaxed);
638 	rte_atomic_store_explicit(&sh->txpp.tsa[sh->txpp.ts_p].ci_ts,
639 			 sh->txpp.ts.ci_ts, rte_memory_order_relaxed);
640 	if (++sh->txpp.ts_p >= MLX5_TXPP_REARM_SQ_SIZE)
641 		sh->txpp.ts_p = 0;
642 	if (sh->txpp.ts_n < MLX5_TXPP_REARM_SQ_SIZE)
643 		++sh->txpp.ts_n;
644 }
645 
646 /* Handles Rearm Queue completions in periodic service. */
647 static __rte_always_inline void
648 mlx5_txpp_handle_rearm_queue(struct mlx5_dev_ctx_shared *sh)
649 {
650 	struct mlx5_txpp_wq *wq = &sh->txpp.rearm_queue;
651 	uint32_t cq_ci = wq->cq_ci;
652 	bool error = false;
653 	int ret;
654 
655 	do {
656 		volatile struct mlx5_cqe *cqe;
657 
658 		cqe = &wq->cq_obj.cqes[cq_ci & (MLX5_TXPP_REARM_CQ_SIZE - 1)];
659 		ret = check_cqe(cqe, MLX5_TXPP_REARM_CQ_SIZE, cq_ci);
660 		switch (ret) {
661 		case MLX5_CQE_STATUS_ERR:
662 			error = true;
663 			++cq_ci;
664 			break;
665 		case MLX5_CQE_STATUS_SW_OWN:
666 			wq->sq_ci += 2;
667 			++cq_ci;
668 			break;
669 		case MLX5_CQE_STATUS_HW_OWN:
670 			break;
671 		default:
672 			MLX5_ASSERT(false);
673 			break;
674 		}
675 	} while (ret != MLX5_CQE_STATUS_HW_OWN);
676 	if (likely(cq_ci != wq->cq_ci)) {
677 		/* Check whether we have missed interrupts. */
678 		if (cq_ci - wq->cq_ci != 1) {
679 			DRV_LOG(DEBUG, "Rearm Queue missed interrupt.");
680 			rte_atomic_fetch_add_explicit(&sh->txpp.err_miss_int,
681 					   1, rte_memory_order_relaxed);
682 			/* Check sync lost on wqe index. */
683 			if (cq_ci - wq->cq_ci >=
684 				(((1UL << MLX5_WQ_INDEX_WIDTH) /
685 				  MLX5_TXPP_REARM) - 1))
686 				error = 1;
687 		}
688 		/* Update doorbell record to notify hardware. */
689 		rte_compiler_barrier();
690 		*wq->cq_obj.db_rec = rte_cpu_to_be_32(cq_ci);
691 		rte_wmb();
692 		wq->cq_ci = cq_ci;
693 		/* Fire new requests to Rearm Queue. */
694 		if (error) {
695 			DRV_LOG(DEBUG, "Rearm Queue error sync lost.");
696 			rte_atomic_fetch_add_explicit(&sh->txpp.err_rearm_queue,
697 					   1, rte_memory_order_relaxed);
698 			sh->txpp.sync_lost = 1;
699 		}
700 	}
701 }
702 
703 /* Handles Clock Queue completions in periodic service. */
704 static __rte_always_inline void
705 mlx5_txpp_handle_clock_queue(struct mlx5_dev_ctx_shared *sh)
706 {
707 	mlx5_txpp_update_timestamp(sh);
708 	mlx5_txpp_gather_timestamp(sh);
709 }
710 #endif
711 
712 /* Invoked periodically on Rearm Queue completions. */
713 void
714 mlx5_txpp_interrupt_handler(void *cb_arg)
715 {
716 #ifndef HAVE_IBV_DEVX_EVENT
717 	RTE_SET_USED(cb_arg);
718 	return;
719 #else
720 	struct mlx5_dev_ctx_shared *sh = cb_arg;
721 	union {
722 		struct mlx5dv_devx_async_event_hdr event_resp;
723 		uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
724 	} out;
725 
726 	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
727 	/* Process events in the loop. Only rearm completions are expected. */
728 	while (mlx5_glue->devx_get_event
729 				(sh->txpp.echan,
730 				 &out.event_resp,
731 				 sizeof(out.buf)) >=
732 				 (ssize_t)sizeof(out.event_resp.cookie)) {
733 		mlx5_txpp_handle_rearm_queue(sh);
734 		mlx5_txpp_handle_clock_queue(sh);
735 		mlx5_txpp_cq_arm(sh);
736 		mlx5_txpp_doorbell_rearm_queue
737 					(sh, sh->txpp.rearm_queue.sq_ci - 1);
738 	}
739 #endif /* HAVE_IBV_DEVX_ASYNC */
740 }
741 
742 static void
743 mlx5_txpp_stop_service(struct mlx5_dev_ctx_shared *sh)
744 {
745 	mlx5_os_interrupt_handler_destroy(sh->txpp.intr_handle,
746 					  mlx5_txpp_interrupt_handler, sh);
747 }
748 
749 /* Attach interrupt handler and fires first request to Rearm Queue. */
750 static int
751 mlx5_txpp_start_service(struct mlx5_dev_ctx_shared *sh)
752 {
753 	uint16_t event_nums[1] = {0};
754 	int ret;
755 	int fd;
756 
757 	sh->txpp.err_miss_int = 0;
758 	sh->txpp.err_rearm_queue = 0;
759 	sh->txpp.err_clock_queue = 0;
760 	sh->txpp.err_ts_past = 0;
761 	sh->txpp.err_ts_future = 0;
762 	sh->txpp.err_ts_order = 0;
763 	/* Attach interrupt handler to process Rearm Queue completions. */
764 	fd = mlx5_os_get_devx_channel_fd(sh->txpp.echan);
765 	ret = mlx5_os_set_nonblock_channel_fd(fd);
766 	if (ret) {
767 		DRV_LOG(ERR, "Failed to change event channel FD.");
768 		rte_errno = errno;
769 		return -rte_errno;
770 	}
771 	fd = mlx5_os_get_devx_channel_fd(sh->txpp.echan);
772 	sh->txpp.intr_handle = mlx5_os_interrupt_handler_create
773 		(RTE_INTR_INSTANCE_F_SHARED, false,
774 		 fd, mlx5_txpp_interrupt_handler, sh);
775 	if (!sh->txpp.intr_handle) {
776 		DRV_LOG(ERR, "Fail to allocate intr_handle");
777 		return -rte_errno;
778 	}
779 	/* Subscribe CQ event to the event channel controlled by the driver. */
780 	ret = mlx5_os_devx_subscribe_devx_event(sh->txpp.echan,
781 					    sh->txpp.rearm_queue.cq_obj.cq->obj,
782 					     sizeof(event_nums), event_nums, 0);
783 	if (ret) {
784 		DRV_LOG(ERR, "Failed to subscribe CQE event.");
785 		rte_errno = errno;
786 		return -errno;
787 	}
788 	/* Enable interrupts in the CQ. */
789 	mlx5_txpp_cq_arm(sh);
790 	/* Fire the first request on Rearm Queue. */
791 	mlx5_txpp_doorbell_rearm_queue(sh, sh->txpp.rearm_queue.sq_size - 1);
792 	mlx5_txpp_init_timestamp(sh);
793 	return 0;
794 }
795 
796 /*
797  * The routine initializes the packet pacing infrastructure:
798  * - allocates PP context
799  * - Clock CQ/SQ
800  * - Rearm CQ/SQ
801  * - attaches rearm interrupt handler
802  * - starts Clock Queue
803  *
804  * Returns 0 on success, negative otherwise
805  */
806 static int
807 mlx5_txpp_create(struct mlx5_dev_ctx_shared *sh)
808 {
809 	int tx_pp = sh->config.tx_pp;
810 	int ret;
811 
812 	/* Store the requested pacing parameters. */
813 	sh->txpp.tick = tx_pp >= 0 ? tx_pp : -tx_pp;
814 	sh->txpp.test = !!(tx_pp < 0);
815 	sh->txpp.skew = sh->config.tx_skew;
816 	sh->txpp.freq = sh->cdev->config.hca_attr.dev_freq_khz;
817 	ret = mlx5_txpp_create_event_channel(sh);
818 	if (ret)
819 		goto exit;
820 	ret = mlx5_txpp_alloc_pp_index(sh);
821 	if (ret)
822 		goto exit;
823 	ret = mlx5_txpp_create_clock_queue(sh);
824 	if (ret)
825 		goto exit;
826 	ret = mlx5_txpp_create_rearm_queue(sh);
827 	if (ret)
828 		goto exit;
829 	ret = mlx5_txpp_start_service(sh);
830 	if (ret)
831 		goto exit;
832 exit:
833 	if (ret) {
834 		mlx5_txpp_stop_service(sh);
835 		mlx5_txpp_destroy_rearm_queue(sh);
836 		mlx5_txpp_destroy_clock_queue(sh);
837 		mlx5_txpp_free_pp_index(sh);
838 		mlx5_txpp_destroy_event_channel(sh);
839 		sh->txpp.tick = 0;
840 		sh->txpp.test = 0;
841 		sh->txpp.skew = 0;
842 	}
843 	return ret;
844 }
845 
846 /*
847  * The routine destroys the packet pacing infrastructure:
848  * - detaches rearm interrupt handler
849  * - Rearm CQ/SQ
850  * - Clock CQ/SQ
851  * - PP context
852  */
853 static void
854 mlx5_txpp_destroy(struct mlx5_dev_ctx_shared *sh)
855 {
856 	mlx5_txpp_stop_service(sh);
857 	mlx5_txpp_destroy_rearm_queue(sh);
858 	mlx5_txpp_destroy_clock_queue(sh);
859 	mlx5_txpp_free_pp_index(sh);
860 	mlx5_txpp_destroy_event_channel(sh);
861 	sh->txpp.tick = 0;
862 	sh->txpp.test = 0;
863 	sh->txpp.skew = 0;
864 }
865 
866 /**
867  * Creates and starts packet pacing infrastructure on specified device.
868  *
869  * @param dev
870  *   Pointer to Ethernet device structure.
871  *
872  * @return
873  *   0 on success, a negative errno value otherwise and rte_errno is set.
874  */
875 int
876 mlx5_txpp_start(struct rte_eth_dev *dev)
877 {
878 	struct mlx5_priv *priv = dev->data->dev_private;
879 	struct mlx5_dev_ctx_shared *sh = priv->sh;
880 	int err = 0;
881 
882 	if (!sh->config.tx_pp) {
883 		/* Packet pacing is not requested for the device. */
884 		MLX5_ASSERT(priv->txpp_en == 0);
885 		return 0;
886 	}
887 	if (priv->txpp_en) {
888 		/* Packet pacing is already enabled for the device. */
889 		MLX5_ASSERT(sh->txpp.refcnt);
890 		return 0;
891 	}
892 	if (sh->config.tx_pp > 0) {
893 		err = rte_mbuf_dynflag_lookup
894 			(RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL);
895 		/* No flag registered means no service needed. */
896 		if (err < 0)
897 			return 0;
898 		err = 0;
899 	}
900 	claim_zero(pthread_mutex_lock(&sh->txpp.mutex));
901 	if (sh->txpp.refcnt) {
902 		priv->txpp_en = 1;
903 		++sh->txpp.refcnt;
904 	} else {
905 		err = mlx5_txpp_create(sh);
906 		if (!err) {
907 			MLX5_ASSERT(sh->txpp.tick);
908 			priv->txpp_en = 1;
909 			sh->txpp.refcnt = 1;
910 		} else {
911 			rte_errno = -err;
912 		}
913 	}
914 	claim_zero(pthread_mutex_unlock(&sh->txpp.mutex));
915 	return err;
916 }
917 
918 /**
919  * Stops and destroys packet pacing infrastructure on specified device.
920  *
921  * @param dev
922  *   Pointer to Ethernet device structure.
923  *
924  * @return
925  *   0 on success, a negative errno value otherwise and rte_errno is set.
926  */
927 void
928 mlx5_txpp_stop(struct rte_eth_dev *dev)
929 {
930 	struct mlx5_priv *priv = dev->data->dev_private;
931 	struct mlx5_dev_ctx_shared *sh = priv->sh;
932 
933 	if (!priv->txpp_en) {
934 		/* Packet pacing is already disabled for the device. */
935 		return;
936 	}
937 	priv->txpp_en = 0;
938 	claim_zero(pthread_mutex_lock(&sh->txpp.mutex));
939 	MLX5_ASSERT(sh->txpp.refcnt);
940 	if (!sh->txpp.refcnt || --sh->txpp.refcnt) {
941 		claim_zero(pthread_mutex_unlock(&sh->txpp.mutex));
942 		return;
943 	}
944 	/* No references any more, do actual destroy. */
945 	mlx5_txpp_destroy(sh);
946 	claim_zero(pthread_mutex_unlock(&sh->txpp.mutex));
947 }
948 
949 /*
950  * Read the current clock counter of an Ethernet device
951  *
952  * This returns the current raw clock value of an Ethernet device. It is
953  * a raw amount of ticks, with no given time reference.
954  * The value returned here is from the same clock than the one
955  * filling timestamp field of Rx/Tx packets when using hardware timestamp
956  * offload. Therefore it can be used to compute a precise conversion of
957  * the device clock to the real time.
958  *
959  * @param dev
960  *   Pointer to Ethernet device structure.
961  * @param clock
962  *   Pointer to the uint64_t that holds the raw clock value.
963  *
964  * @return
965  *   - 0: Success.
966  *   - -ENOTSUP: The function is not supported in this mode. Requires
967  *     packet pacing module configured and started (tx_pp devarg)
968  */
969 int
970 mlx5_txpp_read_clock(struct rte_eth_dev *dev, uint64_t *timestamp)
971 {
972 	struct mlx5_priv *priv = dev->data->dev_private;
973 	struct mlx5_dev_ctx_shared *sh = priv->sh;
974 	uint64_t ts;
975 	int ret;
976 
977 	if (sh->txpp.refcnt) {
978 		struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue;
979 		struct mlx5_cqe *cqe =
980 				(struct mlx5_cqe *)(uintptr_t)wq->cq_obj.cqes;
981 		union {
982 			rte_int128_t u128;
983 			struct mlx5_cqe_ts cts;
984 		} to;
985 
986 		mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128);
987 		if (to.cts.op_own >> 4) {
988 			DRV_LOG(DEBUG, "Clock Queue error sync lost.");
989 			rte_atomic_fetch_add_explicit(&sh->txpp.err_clock_queue,
990 					   1, rte_memory_order_relaxed);
991 			sh->txpp.sync_lost = 1;
992 			return -EIO;
993 		}
994 		ts = rte_be_to_cpu_64(to.cts.timestamp);
995 		ts = mlx5_txpp_convert_rx_ts(sh, ts);
996 		*timestamp = ts;
997 		return 0;
998 	}
999 	/* Check if we can read timestamp directly from hardware. */
1000 	ts = mlx5_read_pcibar_clock(dev);
1001 	if (ts != 0) {
1002 		*timestamp = ts;
1003 		return 0;
1004 	}
1005 	/* Not supported in isolated mode - kernel does not see the CQEs. */
1006 	if (priv->isolated || rte_eal_process_type() != RTE_PROC_PRIMARY)
1007 		return -ENOTSUP;
1008 	ret = mlx5_read_clock(dev, timestamp);
1009 	return ret;
1010 }
1011 
1012 /**
1013  * DPDK callback to clear device extended statistics.
1014  *
1015  * @param dev
1016  *   Pointer to Ethernet device structure.
1017  *
1018  * @return
1019  *   0 on success and stats is reset, negative errno value otherwise and
1020  *   rte_errno is set.
1021  */
1022 int mlx5_txpp_xstats_reset(struct rte_eth_dev *dev)
1023 {
1024 	struct mlx5_priv *priv = dev->data->dev_private;
1025 	struct mlx5_dev_ctx_shared *sh = priv->sh;
1026 
1027 	rte_atomic_store_explicit(&sh->txpp.err_miss_int, 0, rte_memory_order_relaxed);
1028 	rte_atomic_store_explicit(&sh->txpp.err_rearm_queue, 0, rte_memory_order_relaxed);
1029 	rte_atomic_store_explicit(&sh->txpp.err_clock_queue, 0, rte_memory_order_relaxed);
1030 	rte_atomic_store_explicit(&sh->txpp.err_ts_past, 0, rte_memory_order_relaxed);
1031 	rte_atomic_store_explicit(&sh->txpp.err_ts_future, 0, rte_memory_order_relaxed);
1032 	rte_atomic_store_explicit(&sh->txpp.err_ts_order, 0, rte_memory_order_relaxed);
1033 	return 0;
1034 }
1035 
1036 /**
1037  * Routine to retrieve names of extended device statistics
1038  * for packet send scheduling. It appends the specific stats names
1039  * after the parts filled by preceding modules (eth stats, etc.)
1040  *
1041  * @param dev
1042  *   Pointer to Ethernet device structure.
1043  * @param[out] xstats_names
1044  *   Buffer to insert names into.
1045  * @param n
1046  *   Number of names.
1047  * @param n_used
1048  *   Number of names filled by preceding statistics modules.
1049  *
1050  * @return
1051  *   Number of xstats names.
1052  */
1053 int mlx5_txpp_xstats_get_names(struct rte_eth_dev *dev __rte_unused,
1054 			       struct rte_eth_xstat_name *xstats_names,
1055 			       unsigned int n, unsigned int n_used)
1056 {
1057 	unsigned int n_txpp = RTE_DIM(mlx5_txpp_stat_names);
1058 	unsigned int i;
1059 
1060 	if (n >= n_used + n_txpp && xstats_names) {
1061 		for (i = 0; i < n_txpp; ++i) {
1062 			strlcpy(xstats_names[i + n_used].name,
1063 				mlx5_txpp_stat_names[i],
1064 				RTE_ETH_XSTATS_NAME_SIZE);
1065 		}
1066 	}
1067 	return n_used + n_txpp;
1068 }
1069 
1070 static inline void
1071 mlx5_txpp_read_tsa(struct mlx5_dev_txpp *txpp,
1072 		   struct mlx5_txpp_ts *tsa, uint16_t idx)
1073 {
1074 	do {
1075 		uint64_t ts, ci;
1076 
1077 		ts = rte_atomic_load_explicit(&txpp->tsa[idx].ts, rte_memory_order_relaxed);
1078 		ci = rte_atomic_load_explicit(&txpp->tsa[idx].ci_ts, rte_memory_order_relaxed);
1079 		rte_compiler_barrier();
1080 		if ((ci ^ ts) << MLX5_CQ_INDEX_WIDTH != 0)
1081 			continue;
1082 		if (rte_atomic_load_explicit(&txpp->tsa[idx].ts,
1083 				    rte_memory_order_relaxed) != ts)
1084 			continue;
1085 		if (rte_atomic_load_explicit(&txpp->tsa[idx].ci_ts,
1086 				    rte_memory_order_relaxed) != ci)
1087 			continue;
1088 		tsa->ts = ts;
1089 		tsa->ci_ts = ci;
1090 		return;
1091 	} while (true);
1092 }
1093 
1094 /*
1095  * Jitter reflects the clock change between
1096  * neighbours Clock Queue completions.
1097  */
1098 static uint64_t
1099 mlx5_txpp_xstats_jitter(struct mlx5_dev_txpp *txpp)
1100 {
1101 	struct mlx5_txpp_ts tsa0, tsa1;
1102 	int64_t dts, dci;
1103 	uint16_t ts_p;
1104 
1105 	if (txpp->ts_n < 2) {
1106 		/* No gathered enough reports yet. */
1107 		return 0;
1108 	}
1109 	do {
1110 		int ts_0, ts_1;
1111 
1112 		ts_p = txpp->ts_p;
1113 		rte_compiler_barrier();
1114 		ts_0 = ts_p - 2;
1115 		if (ts_0 < 0)
1116 			ts_0 += MLX5_TXPP_REARM_SQ_SIZE;
1117 		ts_1 = ts_p - 1;
1118 		if (ts_1 < 0)
1119 			ts_1 += MLX5_TXPP_REARM_SQ_SIZE;
1120 		mlx5_txpp_read_tsa(txpp, &tsa0, ts_0);
1121 		mlx5_txpp_read_tsa(txpp, &tsa1, ts_1);
1122 		rte_compiler_barrier();
1123 	} while (ts_p != txpp->ts_p);
1124 	/* We have two neighbor reports, calculate the jitter. */
1125 	dts = tsa1.ts - tsa0.ts;
1126 	dci = (tsa1.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)) -
1127 	      (tsa0.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH));
1128 	if (dci < 0)
1129 		dci += 1 << MLX5_CQ_INDEX_WIDTH;
1130 	dci *= txpp->tick;
1131 	return (dts > dci) ? dts - dci : dci - dts;
1132 }
1133 
1134 /*
1135  * Wander reflects the long-term clock change
1136  * over the entire length of all Clock Queue completions.
1137  */
1138 static uint64_t
1139 mlx5_txpp_xstats_wander(struct mlx5_dev_txpp *txpp)
1140 {
1141 	struct mlx5_txpp_ts tsa0, tsa1;
1142 	int64_t dts, dci;
1143 	uint16_t ts_p;
1144 
1145 	if (txpp->ts_n < MLX5_TXPP_REARM_SQ_SIZE) {
1146 		/* No gathered enough reports yet. */
1147 		return 0;
1148 	}
1149 	do {
1150 		int ts_0, ts_1;
1151 
1152 		ts_p = txpp->ts_p;
1153 		rte_compiler_barrier();
1154 		ts_0 = ts_p - MLX5_TXPP_REARM_SQ_SIZE / 2 - 1;
1155 		if (ts_0 < 0)
1156 			ts_0 += MLX5_TXPP_REARM_SQ_SIZE;
1157 		ts_1 = ts_p - 1;
1158 		if (ts_1 < 0)
1159 			ts_1 += MLX5_TXPP_REARM_SQ_SIZE;
1160 		mlx5_txpp_read_tsa(txpp, &tsa0, ts_0);
1161 		mlx5_txpp_read_tsa(txpp, &tsa1, ts_1);
1162 		rte_compiler_barrier();
1163 	} while (ts_p != txpp->ts_p);
1164 	/* We have two neighbor reports, calculate the jitter. */
1165 	dts = tsa1.ts - tsa0.ts;
1166 	dci = (tsa1.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH)) -
1167 	      (tsa0.ci_ts >> (64 - MLX5_CQ_INDEX_WIDTH));
1168 	dci += 1 << MLX5_CQ_INDEX_WIDTH;
1169 	dci *= txpp->tick;
1170 	return (dts > dci) ? dts - dci : dci - dts;
1171 }
1172 
1173 /**
1174  * Routine to retrieve extended device statistics
1175  * for packet send scheduling. It appends the specific statistics
1176  * after the parts filled by preceding modules (eth stats, etc.)
1177  *
1178  * @param dev
1179  *   Pointer to Ethernet device.
1180  * @param[out] stats
1181  *   Pointer to rte extended stats table.
1182  * @param n
1183  *   The size of the stats table.
1184  * @param n_used
1185  *   Number of stats filled by preceding statistics modules.
1186  *
1187  * @return
1188  *   Number of extended stats on success and stats is filled,
1189  *   negative on error and rte_errno is set.
1190  */
1191 int
1192 mlx5_txpp_xstats_get(struct rte_eth_dev *dev,
1193 		     struct rte_eth_xstat *stats,
1194 		     unsigned int n, unsigned int n_used)
1195 {
1196 	unsigned int n_txpp = RTE_DIM(mlx5_txpp_stat_names);
1197 
1198 	if (n >= n_used + n_txpp && stats) {
1199 		struct mlx5_priv *priv = dev->data->dev_private;
1200 		struct mlx5_dev_ctx_shared *sh = priv->sh;
1201 		unsigned int i;
1202 
1203 		for (i = 0; i < n_txpp; ++i)
1204 			stats[n_used + i].id = n_used + i;
1205 		stats[n_used + 0].value =
1206 				rte_atomic_load_explicit(&sh->txpp.err_miss_int,
1207 						rte_memory_order_relaxed);
1208 		stats[n_used + 1].value =
1209 				rte_atomic_load_explicit(&sh->txpp.err_rearm_queue,
1210 						rte_memory_order_relaxed);
1211 		stats[n_used + 2].value =
1212 				rte_atomic_load_explicit(&sh->txpp.err_clock_queue,
1213 						rte_memory_order_relaxed);
1214 		stats[n_used + 3].value =
1215 				rte_atomic_load_explicit(&sh->txpp.err_ts_past,
1216 						rte_memory_order_relaxed);
1217 		stats[n_used + 4].value =
1218 				rte_atomic_load_explicit(&sh->txpp.err_ts_future,
1219 						rte_memory_order_relaxed);
1220 		stats[n_used + 5].value =
1221 				rte_atomic_load_explicit(&sh->txpp.err_ts_order,
1222 						rte_memory_order_relaxed);
1223 		stats[n_used + 6].value = mlx5_txpp_xstats_jitter(&sh->txpp);
1224 		stats[n_used + 7].value = mlx5_txpp_xstats_wander(&sh->txpp);
1225 		stats[n_used + 8].value = sh->txpp.sync_lost;
1226 	}
1227 	return n_used + n_txpp;
1228 }
1229