xref: /dpdk/drivers/raw/ifpga/afu_pmd_n3000.c (revision dc348f2e81a94dd3b8a32c2f882483227796905d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2022 Intel Corporation
3  */
4 
5 #include <errno.h>
6 #include <stdio.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <inttypes.h>
10 #include <unistd.h>
11 #include <fcntl.h>
12 #include <poll.h>
13 #include <sys/eventfd.h>
14 #include <sys/ioctl.h>
15 
16 #include <rte_eal.h>
17 #include <rte_malloc.h>
18 #include <rte_memcpy.h>
19 #include <rte_io.h>
20 #include <rte_vfio.h>
21 #include <bus_pci_driver.h>
22 #include <bus_ifpga_driver.h>
23 #include <rte_rawdev.h>
24 
25 #include "afu_pmd_core.h"
26 #include "afu_pmd_n3000.h"
27 
28 static int nlb_afu_config(struct afu_rawdev *dev)
29 {
30 	struct n3000_afu_priv *priv = NULL;
31 	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
32 	struct nlb_csr_cfg v;
33 
34 	if (!dev)
35 		return -EINVAL;
36 
37 	if (!dev->priv)
38 		return -ENOENT;
39 
40 	priv = (struct n3000_afu_priv *)dev->priv;
41 	cfg = &priv->nlb_cfg;
42 
43 	v.csr = 0;
44 
45 	if (cfg->cont)
46 		v.cont = 1;
47 
48 	if (cfg->cache_policy == NLB_WRPUSH_I)
49 		v.wrpush_i = 1;
50 	else
51 		v.wrthru_en = cfg->cache_policy;
52 
53 	if (cfg->cache_hint == NLB_RDLINE_MIXED)
54 		v.rdsel = 3;
55 	else
56 		v.rdsel = cfg->cache_hint;
57 
58 	v.mode = cfg->mode;
59 	v.chsel = cfg->read_vc;
60 	v.wr_chsel = cfg->write_vc;
61 	v.wrfence_chsel = cfg->wrfence_vc;
62 	v.wrthru_en = cfg->cache_policy;
63 	v.multicl_len = cfg->multi_cl - 1;
64 
65 	IFPGA_RAWDEV_PMD_DEBUG("cfg: 0x%08x", v.csr);
66 	rte_write32(v.csr, priv->nlb_ctx.addr + CSR_CFG);
67 
68 	return 0;
69 }
70 
71 static void nlb_afu_report(struct afu_rawdev *dev, uint32_t cl)
72 {
73 	struct n3000_afu_priv *priv = NULL;
74 	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
75 	struct nlb_dsm_status *stat = NULL;
76 	uint64_t ticks = 0;
77 	double num, rd_bw, wr_bw;
78 
79 	if (!dev || !dev->priv)
80 		return;
81 
82 	priv = (struct n3000_afu_priv *)dev->priv;
83 
84 	cfg = &priv->nlb_cfg;
85 	stat = priv->nlb_ctx.status_ptr;
86 
87 	if (cfg->cont)
88 		ticks = stat->num_clocks - stat->start_overhead;
89 	else
90 		ticks = stat->num_clocks -
91 			(stat->start_overhead + stat->end_overhead);
92 
93 	if (cfg->freq_mhz == 0)
94 		cfg->freq_mhz = 200;
95 
96 	num = (double)stat->num_reads;
97 	rd_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
98 	num = (double)stat->num_writes;
99 	wr_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
100 
101 	printf("Cachelines  Read_Count Write_Count Clocks@%uMHz   "
102 		"Rd_Bandwidth   Wr_Bandwidth\n", cfg->freq_mhz);
103 	printf("%10u  %10u %11u  %12"PRIu64"   %7.3f GB/s   %7.3f GB/s\n",
104 		cl, stat->num_reads, stat->num_writes, ticks,
105 		rd_bw / 1e9, wr_bw / 1e9);
106 }
107 
108 static int nlb_afu_test(struct afu_rawdev *dev)
109 {
110 	struct n3000_afu_priv *priv = NULL;
111 	struct nlb_afu_ctx *ctx = NULL;
112 	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
113 	struct nlb_csr_ctl ctl;
114 	uint32_t *ptr = NULL;
115 	uint32_t i, j, cl, val = 0;
116 	uint64_t sval = 0;
117 	int ret = 0;
118 
119 	if (!dev)
120 		return -EINVAL;
121 
122 	if (!dev->priv)
123 		return -ENOENT;
124 
125 	priv = (struct n3000_afu_priv *)dev->priv;
126 	ctx = &priv->nlb_ctx;
127 	cfg = &priv->nlb_cfg;
128 
129 	/* initialize registers */
130 	IFPGA_RAWDEV_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
131 	rte_write64(ctx->dsm_iova, ctx->addr + CSR_AFU_DSM_BASEL);
132 
133 	ctl.csr = 0;
134 	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
135 	ctl.reset = 1;
136 	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
137 
138 	IFPGA_RAWDEV_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
139 	rte_write64(SIZE_TO_CLS(ctx->src_iova), ctx->addr + CSR_SRC_ADDR);
140 	IFPGA_RAWDEV_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
141 	rte_write64(SIZE_TO_CLS(ctx->dest_iova), ctx->addr + CSR_DST_ADDR);
142 
143 	ret = nlb_afu_config(dev);
144 	if (ret)
145 		return ret;
146 
147 	/* initialize src data */
148 	ptr = (uint32_t *)ctx->src_ptr;
149 	j = CLS_TO_SIZE(cfg->end) >> 2;
150 	for (i = 0; i < j; i++)
151 		*ptr++ = i;
152 
153 	/* start test */
154 	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
155 		memset(ctx->dest_ptr, 0, CLS_TO_SIZE(cl));
156 		memset(ctx->dsm_ptr, 0, DSM_SIZE);
157 
158 		ctl.csr = 0;
159 		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
160 		ctl.reset = 1;
161 		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
162 
163 		rte_write32(cl, ctx->addr + CSR_NUM_LINES);
164 
165 		rte_delay_us(10);
166 
167 		ctl.start = 1;
168 		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
169 
170 		if (cfg->cont) {
171 			rte_delay_ms(cfg->timeout * 1000);
172 			ctl.force_completion = 1;
173 			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
174 			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
175 				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
176 				DSM_TIMEOUT);
177 			if (ret) {
178 				printf("DSM poll timeout\n");
179 				goto end;
180 			}
181 		} else {
182 			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
183 				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
184 				DSM_TIMEOUT);
185 			if (ret) {
186 				printf("DSM poll timeout\n");
187 				goto end;
188 			}
189 			ctl.force_completion = 1;
190 			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
191 		}
192 
193 		nlb_afu_report(dev, cl);
194 
195 		i = 0;
196 		while (i++ < 100) {
197 			sval = rte_read64(ctx->addr + CSR_STATUS1);
198 			if (sval == 0)
199 				break;
200 			rte_delay_us(1000);
201 		}
202 
203 		ptr = (uint32_t *)ctx->dest_ptr;
204 		j = CLS_TO_SIZE(cl) >> 2;
205 		for (i = 0; i < j; i++) {
206 			if (*ptr++ != i) {
207 				IFPGA_RAWDEV_PMD_ERR("Data mismatch @ %u", i);
208 				break;
209 			}
210 		}
211 	}
212 
213 end:
214 	return ret;
215 }
216 
217 static void dma_afu_buf_free(struct dma_afu_ctx *ctx)
218 {
219 	int i = 0;
220 
221 	if (!ctx)
222 		return;
223 
224 	for (i = 0; i < NUM_DMA_BUF; i++) {
225 		rte_free(ctx->dma_buf[i]);
226 		ctx->dma_buf[i] = NULL;
227 	}
228 
229 	rte_free(ctx->data_buf);
230 	ctx->data_buf = NULL;
231 
232 	rte_free(ctx->ref_buf);
233 	ctx->ref_buf = NULL;
234 }
235 
236 static int dma_afu_buf_alloc(struct dma_afu_ctx *ctx,
237 	struct rte_pmd_afu_dma_cfg *cfg)
238 {
239 	size_t page_sz = sysconf(_SC_PAGE_SIZE);
240 	int i, ret = 0;
241 
242 	if (!ctx || !cfg)
243 		return -EINVAL;
244 
245 	for (i = 0; i < NUM_DMA_BUF; i++) {
246 		ctx->dma_buf[i] = (uint64_t *)rte_zmalloc(NULL, cfg->size,
247 			TEST_MEM_ALIGN);
248 		if (!ctx->dma_buf[i]) {
249 			ret = -ENOMEM;
250 			goto free_dma_buf;
251 		}
252 		ctx->dma_iova[i] = rte_malloc_virt2iova(ctx->dma_buf[i]);
253 		if (ctx->dma_iova[i] == RTE_BAD_IOVA) {
254 			ret = -ENOMEM;
255 			goto free_dma_buf;
256 		}
257 	}
258 
259 	ctx->data_buf = rte_malloc(NULL, cfg->length, page_sz);
260 	if (!ctx->data_buf) {
261 		ret = -ENOMEM;
262 		goto free_dma_buf;
263 	}
264 
265 	ctx->ref_buf = rte_malloc(NULL, cfg->length, page_sz);
266 	if (!ctx->ref_buf) {
267 		ret = -ENOMEM;
268 		goto free_data_buf;
269 	}
270 
271 	return 0;
272 
273 free_data_buf:
274 	rte_free(ctx->data_buf);
275 	ctx->data_buf = NULL;
276 free_dma_buf:
277 	for (i = 0; i < NUM_DMA_BUF; i++) {
278 		rte_free(ctx->dma_buf[i]);
279 		ctx->dma_buf[i] = NULL;
280 	}
281 	return ret;
282 }
283 
284 static void dma_afu_buf_init(struct dma_afu_ctx *ctx, size_t size)
285 {
286 	int *ptr = NULL;
287 	size_t i = 0;
288 	size_t dword_size = 0;
289 
290 	if (!ctx || !size)
291 		return;
292 
293 	ptr = (int *)ctx->ref_buf;
294 
295 	if (ctx->pattern) {
296 		memset(ptr, ctx->pattern, size);
297 	} else {
298 		srand(99);
299 		dword_size = size >> 2;
300 		for (i = 0; i < dword_size; i++)
301 			*ptr++ = rand();
302 	}
303 	rte_memcpy(ctx->data_buf, ctx->ref_buf, size);
304 }
305 
306 static int dma_afu_buf_verify(struct dma_afu_ctx *ctx, size_t size)
307 {
308 	uint8_t *src = NULL;
309 	uint8_t *dst = NULL;
310 	size_t i = 0;
311 	int n = 0;
312 
313 	if (!ctx || !size)
314 		return -EINVAL;
315 
316 	src = (uint8_t *)ctx->ref_buf;
317 	dst = (uint8_t *)ctx->data_buf;
318 
319 	if (memcmp(src, dst, size)) {
320 		printf("Transfer is corrupted\n");
321 		if (ctx->verbose) {
322 			for (i = 0; i < size; i++) {
323 				if (*src != *dst) {
324 					if (++n >= ERR_CHECK_LIMIT)
325 						break;
326 					printf("Mismatch at 0x%zx, "
327 						"Expected %02x  Actual %02x\n",
328 						i, *src, *dst);
329 				}
330 				src++;
331 				dst++;
332 			}
333 			if (n < ERR_CHECK_LIMIT) {
334 				printf("Found %d error bytes\n", n);
335 			} else {
336 				printf("......\n");
337 				printf("Found more than %d error bytes\n", n);
338 			}
339 		}
340 		return -1;
341 	}
342 
343 	printf("Transfer is verified\n");
344 	return 0;
345 }
346 
347 static void blk_write64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
348 {
349 	uint64_t qwords = bytes / sizeof(uint64_t);
350 
351 	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
352 		!IS_ALIGNED_QWORD((uint64_t)bytes))
353 		return;
354 
355 	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
356 		rte_write64(*host_addr, dev_addr);
357 }
358 
359 static void blk_read64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
360 {
361 	uint64_t qwords = bytes / sizeof(uint64_t);
362 
363 	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
364 		!IS_ALIGNED_QWORD((uint64_t)bytes))
365 		return;
366 
367 	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
368 		*host_addr = rte_read64(dev_addr);
369 }
370 
371 static void switch_ase_page(struct dma_afu_ctx *ctx, uint64_t addr)
372 {
373 	uint64_t requested_page = addr & ~DMA_ASE_WINDOW_MASK;
374 
375 	if (!ctx)
376 		return;
377 
378 	if (requested_page != ctx->cur_ase_page) {
379 		rte_write64(requested_page, ctx->ase_ctrl_addr);
380 		ctx->cur_ase_page = requested_page;
381 	}
382 }
383 
384 static int ase_write_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
385 	uint64_t host_addr, uint32_t count)
386 {
387 	uint64_t dev_aligned_addr = 0;
388 	uint64_t shift = 0;
389 	uint64_t val = 0;
390 	uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
391 
392 	IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%x)", host_addr,
393 		dev_addr, count);
394 
395 	if (!ctx || (count >= QWORD_BYTES))
396 		return -EINVAL;
397 
398 	if (!count)
399 		return 0;
400 
401 	switch_ase_page(ctx, dev_addr);
402 
403 	shift = dev_addr % QWORD_BYTES;
404 	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
405 	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
406 	rte_memcpy(((char *)(&val)) + shift, (void *)addr, count);
407 
408 	/* write back to device */
409 	rte_write64(val, ctx->ase_data_addr + dev_aligned_addr);
410 
411 	return 0;
412 }
413 
414 static int ase_write(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
415 	uint64_t *src_ptr, uint64_t *count)
416 {
417 	uint64_t src = *src_ptr;
418 	uint64_t dst = *dst_ptr;
419 	uint64_t align_bytes = *count;
420 	uint64_t offset = 0;
421 	uint64_t left_in_page = DMA_ASE_WINDOW;
422 	uint64_t size_to_copy = 0;
423 
424 	IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
425 		align_bytes);
426 
427 	if (!ctx || !IS_ALIGNED_DWORD(dst))
428 		return -EINVAL;
429 
430 	if (align_bytes < DWORD_BYTES)
431 		return 0;
432 
433 	if (!IS_ALIGNED_QWORD(dst)) {
434 		/* Write out a single DWORD to get QWORD aligned */
435 		switch_ase_page(ctx, dst);
436 		offset = dst & DMA_ASE_WINDOW_MASK;
437 
438 		rte_write32(*(uint32_t *)(uintptr_t)src,
439 			ctx->ase_data_addr + offset);
440 		src += DWORD_BYTES;
441 		dst += DWORD_BYTES;
442 		align_bytes -= DWORD_BYTES;
443 	}
444 
445 	if (!align_bytes)
446 		return 0;
447 
448 	/* Write out blocks of 64-bit values */
449 	while (align_bytes >= QWORD_BYTES) {
450 		left_in_page -= dst & DMA_ASE_WINDOW_MASK;
451 		size_to_copy =
452 			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
453 		if (size_to_copy < QWORD_BYTES)
454 			break;
455 		switch_ase_page(ctx, dst);
456 		offset = dst & DMA_ASE_WINDOW_MASK;
457 		blk_write64((uint64_t *)(ctx->ase_data_addr + offset),
458 			(uint64_t *)(uintptr_t)src, size_to_copy);
459 		src += size_to_copy;
460 		dst += size_to_copy;
461 		align_bytes -= size_to_copy;
462 	}
463 
464 	if (align_bytes >= DWORD_BYTES) {
465 		/* Write out remaining DWORD */
466 		switch_ase_page(ctx, dst);
467 		offset = dst & DMA_ASE_WINDOW_MASK;
468 		rte_write32(*(uint32_t *)(uintptr_t)src,
469 			ctx->ase_data_addr + offset);
470 		src += DWORD_BYTES;
471 		dst += DWORD_BYTES;
472 		align_bytes -= DWORD_BYTES;
473 	}
474 
475 	*src_ptr = src;
476 	*dst_ptr = dst;
477 	*count = align_bytes;
478 
479 	return 0;
480 }
481 
482 static int ase_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
483 	uint64_t *src_ptr, uint64_t count)
484 {
485 	uint64_t dst = *dst_ptr;
486 	uint64_t src = *src_ptr;
487 	uint64_t count_left = count;
488 	uint64_t unaligned_size = 0;
489 	int ret = 0;
490 
491 	IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
492 		count);
493 
494 	/* aligns address to 8 byte using dst masking method */
495 	if (!IS_ALIGNED_DWORD(dst) && !IS_ALIGNED_QWORD(dst)) {
496 		unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
497 		if (unaligned_size > count_left)
498 			unaligned_size = count_left;
499 		ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
500 		if (ret)
501 			return ret;
502 		count_left -= unaligned_size;
503 		src += unaligned_size;
504 		dst += unaligned_size;
505 	}
506 
507 	/* Handles 8/4 byte MMIO transfer */
508 	ret = ase_write(ctx, &dst, &src, &count_left);
509 	if (ret)
510 		return ret;
511 
512 	/* Left over unaligned bytes transferred using dst masking method */
513 	unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
514 	if (unaligned_size > count_left)
515 		unaligned_size = count_left;
516 
517 	ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
518 	if (ret)
519 		return ret;
520 
521 	count_left -= unaligned_size;
522 	*dst_ptr = dst + unaligned_size;
523 	*src_ptr = src + unaligned_size;
524 
525 	return 0;
526 }
527 
528 static int ase_read_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
529 	uint64_t host_addr, uint32_t count)
530 {
531 	uint64_t dev_aligned_addr = 0;
532 	uint64_t shift = 0;
533 	uint64_t val = 0;
534 	uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
535 
536 	IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%x)", host_addr,
537 		dev_addr, count);
538 
539 	if (!ctx || (count >= QWORD_BYTES))
540 		return -EINVAL;
541 
542 	if (!count)
543 		return 0;
544 
545 	switch_ase_page(ctx, dev_addr);
546 
547 	shift = dev_addr % QWORD_BYTES;
548 	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
549 	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
550 	rte_memcpy((void *)addr, ((char *)(&val)) + shift, count);
551 
552 	return 0;
553 }
554 
555 static int ase_read(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
556 	uint64_t *dst_ptr, uint64_t *count)
557 {
558 	uint64_t src = *src_ptr;
559 	uint64_t dst = *dst_ptr;
560 	uint64_t align_bytes = *count;
561 	uint64_t offset = 0;
562 	uint64_t left_in_page = DMA_ASE_WINDOW;
563 	uint64_t size_to_copy = 0;
564 
565 	IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%"PRIx64")", dst, src,
566 		align_bytes);
567 
568 	if (!ctx || !IS_ALIGNED_DWORD(src))
569 		return -EINVAL;
570 
571 	if (align_bytes < DWORD_BYTES)
572 		return 0;
573 
574 	if (!IS_ALIGNED_QWORD(src)) {
575 		/* Read a single DWORD to get QWORD aligned */
576 		switch_ase_page(ctx, src);
577 		offset = src & DMA_ASE_WINDOW_MASK;
578 		*(uint32_t *)(uintptr_t)dst =
579 			rte_read32(ctx->ase_data_addr + offset);
580 		src += DWORD_BYTES;
581 		dst += DWORD_BYTES;
582 		align_bytes -= DWORD_BYTES;
583 	}
584 
585 	if (!align_bytes)
586 		return 0;
587 
588 	/* Read blocks of 64-bit values */
589 	while (align_bytes >= QWORD_BYTES) {
590 		left_in_page -= src & DMA_ASE_WINDOW_MASK;
591 		size_to_copy =
592 			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
593 		if (size_to_copy < QWORD_BYTES)
594 			break;
595 		switch_ase_page(ctx, src);
596 		offset = src & DMA_ASE_WINDOW_MASK;
597 		blk_read64((uint64_t *)(ctx->ase_data_addr + offset),
598 			(uint64_t *)(uintptr_t)dst, size_to_copy);
599 		src += size_to_copy;
600 		dst += size_to_copy;
601 		align_bytes -= size_to_copy;
602 	}
603 
604 	if (align_bytes >= DWORD_BYTES) {
605 		/* Read remaining DWORD */
606 		switch_ase_page(ctx, src);
607 		offset = src & DMA_ASE_WINDOW_MASK;
608 		*(uint32_t *)(uintptr_t)dst =
609 			rte_read32(ctx->ase_data_addr + offset);
610 		src += DWORD_BYTES;
611 		dst += DWORD_BYTES;
612 		align_bytes -= DWORD_BYTES;
613 	}
614 
615 	*src_ptr = src;
616 	*dst_ptr = dst;
617 	*count = align_bytes;
618 
619 	return 0;
620 }
621 
622 static int ase_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
623 	uint64_t *dst_ptr, uint64_t count)
624 {
625 	uint64_t src = *src_ptr;
626 	uint64_t dst = *dst_ptr;
627 	uint64_t count_left = count;
628 	uint64_t unaligned_size = 0;
629 	int ret = 0;
630 
631 	IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
632 		count);
633 
634 	/* Aligns address to 8 byte using src masking method */
635 	if (!IS_ALIGNED_DWORD(src) && !IS_ALIGNED_QWORD(src)) {
636 		unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
637 		if (unaligned_size > count_left)
638 			unaligned_size = count_left;
639 		ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
640 		if (ret)
641 			return ret;
642 		count_left -= unaligned_size;
643 		dst += unaligned_size;
644 		src += unaligned_size;
645 	}
646 
647 	/* Handles 8/4 byte MMIO transfer */
648 	ret = ase_read(ctx, &src, &dst, &count_left);
649 	if (ret)
650 		return ret;
651 
652 	/* Left over unaligned bytes transferred using src masking method */
653 	unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
654 	if (unaligned_size > count_left)
655 		unaligned_size = count_left;
656 
657 	ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
658 	if (ret)
659 		return ret;
660 
661 	count_left -= unaligned_size;
662 	*dst_ptr = dst + unaligned_size;
663 	*src_ptr = src + unaligned_size;
664 
665 	return 0;
666 }
667 
668 static void clear_interrupt(struct dma_afu_ctx *ctx)
669 {
670 	/* clear interrupt by writing 1 to IRQ bit in status register */
671 	msgdma_status status;
672 
673 	if (!ctx)
674 		return;
675 
676 	status.csr = 0;
677 	status.irq = 1;
678 	rte_write32(status.csr, CSR_STATUS(ctx->csr_addr));
679 }
680 
681 static int poll_interrupt(struct dma_afu_ctx *ctx)
682 {
683 	struct pollfd pfd = {0};
684 	uint64_t count = 0;
685 	ssize_t bytes_read = 0;
686 	int poll_ret = 0;
687 	int ret = 0;
688 
689 	if (!ctx || (ctx->event_fd < 0))
690 		return -EINVAL;
691 
692 	pfd.fd = ctx->event_fd;
693 	pfd.events = POLLIN;
694 	poll_ret = poll(&pfd, 1, DMA_TIMEOUT_MSEC);
695 	if (poll_ret < 0) {
696 		IFPGA_RAWDEV_PMD_ERR("Error %s", strerror(errno));
697 		ret = -EFAULT;
698 		goto out;
699 	} else if (poll_ret == 0) {
700 		IFPGA_RAWDEV_PMD_ERR("Timeout");
701 		ret = -ETIMEDOUT;
702 	} else {
703 		bytes_read = read(pfd.fd, &count, sizeof(count));
704 		if (bytes_read > 0) {
705 			if (ctx->verbose)
706 				IFPGA_RAWDEV_PMD_DEBUG("Successful, ret %d, cnt %"PRIu64,
707 					poll_ret, count);
708 			ret = 0;
709 		} else {
710 			IFPGA_RAWDEV_PMD_ERR("Failed %s", bytes_read > 0 ?
711 				strerror(errno) : "zero bytes read");
712 			ret = -EIO;
713 		}
714 	}
715 out:
716 	clear_interrupt(ctx);
717 	return ret;
718 }
719 
720 static void send_descriptor(struct dma_afu_ctx *ctx, msgdma_ext_desc *desc)
721 {
722 	msgdma_status status;
723 	uint64_t fpga_queue_full = 0;
724 
725 	if (!ctx)
726 		return;
727 
728 	if (ctx->verbose) {
729 		IFPGA_RAWDEV_PMD_DEBUG("descriptor.rd_address = 0x%x%08x",
730 			desc->rd_address_ext, desc->rd_address);
731 		IFPGA_RAWDEV_PMD_DEBUG("descriptor.wr_address = 0x%x%08x",
732 			desc->wr_address_ext, desc->wr_address);
733 		IFPGA_RAWDEV_PMD_DEBUG("descriptor.len = %u", desc->len);
734 		IFPGA_RAWDEV_PMD_DEBUG("descriptor.wr_burst_count = %u",
735 			desc->wr_burst_count);
736 		IFPGA_RAWDEV_PMD_DEBUG("descriptor.rd_burst_count = %u",
737 			desc->rd_burst_count);
738 		IFPGA_RAWDEV_PMD_DEBUG("descriptor.wr_stride %u", desc->wr_stride);
739 		IFPGA_RAWDEV_PMD_DEBUG("descriptor.rd_stride %u", desc->rd_stride);
740 	}
741 
742 	do {
743 		status.csr = rte_read32(CSR_STATUS(ctx->csr_addr));
744 		if (fpga_queue_full++ > 100000000) {
745 			IFPGA_RAWDEV_PMD_DEBUG("DMA queue full retry");
746 			fpga_queue_full = 0;
747 		}
748 	} while (status.desc_buf_full);
749 
750 	blk_write64((uint64_t *)ctx->desc_addr, (uint64_t *)desc,
751 		sizeof(*desc));
752 }
753 
754 static int do_dma(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
755 	int count, int is_last_desc, fpga_dma_type type, int intr_en)
756 {
757 	msgdma_ext_desc *desc = NULL;
758 	int alignment_offset = 0;
759 	int segment_size = 0;
760 
761 	if (!ctx)
762 		return -EINVAL;
763 
764 	/* src, dst and count must be 64-byte aligned */
765 	if (!IS_DMA_ALIGNED(src) || !IS_DMA_ALIGNED(dst) ||
766 		!IS_DMA_ALIGNED(count))
767 		return -EINVAL;
768 	memset(ctx->desc_buf, 0, sizeof(msgdma_ext_desc));
769 
770 	/* these fields are fixed for all DMA transfers */
771 	desc = ctx->desc_buf;
772 	desc->seq_num = 0;
773 	desc->wr_stride = 1;
774 	desc->rd_stride = 1;
775 	desc->control.go = 1;
776 	if (intr_en)
777 		desc->control.transfer_irq_en = 1;
778 	else
779 		desc->control.transfer_irq_en = 0;
780 
781 	if (!is_last_desc)
782 		desc->control.early_done_en = 1;
783 	else
784 		desc->control.early_done_en = 0;
785 
786 	if (type == FPGA_TO_FPGA) {
787 		desc->rd_address = src & DMA_MASK_32_BIT;
788 		desc->wr_address = dst & DMA_MASK_32_BIT;
789 		desc->len = count;
790 		desc->wr_burst_count = 4;
791 		desc->rd_burst_count = 4;
792 		desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
793 		desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
794 		send_descriptor(ctx, desc);
795 	} else {
796 		/* check CCIP (host) address is aligned to 4CL (256B) */
797 		alignment_offset = (type == HOST_TO_FPGA)
798 			? (src % CCIP_ALIGN_BYTES) : (dst % CCIP_ALIGN_BYTES);
799 		/* performing a short transfer to get aligned */
800 		if (alignment_offset != 0) {
801 			desc->rd_address = src & DMA_MASK_32_BIT;
802 			desc->wr_address = dst & DMA_MASK_32_BIT;
803 			desc->wr_burst_count = 1;
804 			desc->rd_burst_count = 1;
805 			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
806 			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
807 			/* count isn't large enough to hit next 4CL boundary */
808 			if ((CCIP_ALIGN_BYTES - alignment_offset) >= count) {
809 				segment_size = count;
810 				count = 0;
811 			} else {
812 				segment_size = CCIP_ALIGN_BYTES
813 					- alignment_offset;
814 				src += segment_size;
815 				dst += segment_size;
816 				count -= segment_size;
817 				desc->control.transfer_irq_en = 0;
818 			}
819 			/* post short transfer to align to a 4CL (256 byte) */
820 			desc->len = segment_size;
821 			send_descriptor(ctx, desc);
822 		}
823 		/* at this point we are 4CL (256 byte) aligned */
824 		if (count >= CCIP_ALIGN_BYTES) {
825 			desc->rd_address = src & DMA_MASK_32_BIT;
826 			desc->wr_address = dst & DMA_MASK_32_BIT;
827 			desc->wr_burst_count = 4;
828 			desc->rd_burst_count = 4;
829 			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
830 			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
831 			/* buffer ends on 4CL boundary */
832 			if ((count % CCIP_ALIGN_BYTES) == 0) {
833 				segment_size = count;
834 				count = 0;
835 			} else {
836 				segment_size = count
837 					- (count % CCIP_ALIGN_BYTES);
838 				src += segment_size;
839 				dst += segment_size;
840 				count -= segment_size;
841 				desc->control.transfer_irq_en = 0;
842 			}
843 			desc->len = segment_size;
844 			send_descriptor(ctx, desc);
845 		}
846 		/* post short transfer to handle the remainder */
847 		if (count > 0) {
848 			desc->rd_address = src & DMA_MASK_32_BIT;
849 			desc->wr_address = dst & DMA_MASK_32_BIT;
850 			desc->len = count;
851 			desc->wr_burst_count = 1;
852 			desc->rd_burst_count = 1;
853 			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
854 			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
855 			if (intr_en)
856 				desc->control.transfer_irq_en = 1;
857 			send_descriptor(ctx, desc);
858 		}
859 	}
860 
861 	return 0;
862 }
863 
864 static int issue_magic(struct dma_afu_ctx *ctx)
865 {
866 	*(ctx->magic_buf) = 0ULL;
867 	return do_dma(ctx, DMA_WF_HOST_ADDR(ctx->magic_iova),
868 		DMA_WF_MAGIC_ROM, 64, 1, FPGA_TO_HOST, 1);
869 }
870 
871 static void wait_magic(struct dma_afu_ctx *ctx)
872 {
873 	int magic_timeout = 0;
874 
875 	if (!ctx)
876 		return;
877 
878 	poll_interrupt(ctx);
879 	while (*(ctx->magic_buf) != DMA_WF_MAGIC) {
880 		if (magic_timeout++ > 1000) {
881 			IFPGA_RAWDEV_PMD_ERR("DMA magic operation timeout");
882 			magic_timeout = 0;
883 			break;
884 		}
885 	}
886 	*(ctx->magic_buf) = 0ULL;
887 }
888 
889 static int dma_tx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
890 	uint64_t chunk, int is_last_chunk, int *intr_issued)
891 {
892 	int intr_en = 0;
893 	int ret = 0;
894 
895 	if (!ctx || !intr_issued)
896 		return -EINVAL;
897 
898 	src += chunk * ctx->dma_buf_size;
899 	dst += chunk * ctx->dma_buf_size;
900 
901 	if (((chunk % HALF_DMA_BUF) == (HALF_DMA_BUF - 1)) || is_last_chunk) {
902 		if (*intr_issued) {
903 			ret = poll_interrupt(ctx);
904 			if (ret)
905 				return ret;
906 		}
907 		intr_en = 1;
908 	}
909 
910 	chunk %= NUM_DMA_BUF;
911 	rte_memcpy(ctx->dma_buf[chunk], (void *)(uintptr_t)src,
912 		ctx->dma_buf_size);
913 	ret = do_dma(ctx, dst, DMA_HOST_ADDR(ctx->dma_iova[chunk]),
914 			ctx->dma_buf_size, 0, HOST_TO_FPGA, intr_en);
915 	if (intr_en)
916 		*intr_issued = 1;
917 
918 	return ret;
919 }
920 
921 static int dma_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
922 	size_t count)
923 {
924 	uint64_t i = 0;
925 	uint64_t count_left = count;
926 	uint64_t aligned_addr = 0;
927 	uint64_t align_bytes = 0;
928 	uint64_t dma_chunks = 0;
929 	uint64_t dma_tx_bytes = 0;
930 	uint64_t offset = 0;
931 	int issued_intr = 0;
932 	int ret = 0;
933 
934 	IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
935 		count);
936 
937 	if (!ctx)
938 		return -EINVAL;
939 
940 	if (!IS_DMA_ALIGNED(dst)) {
941 		if (count_left < DMA_ALIGN_BYTES)
942 			return ase_host_to_fpga(ctx, &dst, &src, count_left);
943 
944 		aligned_addr = ((dst / DMA_ALIGN_BYTES) + 1)
945 			* DMA_ALIGN_BYTES;
946 		align_bytes = aligned_addr - dst;
947 		ret = ase_host_to_fpga(ctx, &dst, &src, align_bytes);
948 		if (ret)
949 			return ret;
950 		count_left = count_left - align_bytes;
951 	}
952 
953 	if (count_left) {
954 		dma_chunks = count_left / ctx->dma_buf_size;
955 		offset = dma_chunks * ctx->dma_buf_size;
956 		count_left -= offset;
957 		IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
958 			" (%"PRIu64"...0x%"PRIx64")",
959 			src, dst, dma_chunks, count_left);
960 		for (i = 0; i < dma_chunks; i++) {
961 			ret = dma_tx_buf(ctx, dst, src, i,
962 				i == (dma_chunks - 1), &issued_intr);
963 			if (ret)
964 				return ret;
965 		}
966 
967 		if (issued_intr) {
968 			ret = poll_interrupt(ctx);
969 			if (ret)
970 				return ret;
971 		}
972 
973 		if (count_left) {
974 			i = count_left / DMA_ALIGN_BYTES;
975 			if (i > 0) {
976 				dma_tx_bytes = i * DMA_ALIGN_BYTES;
977 				IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
978 					dma_tx_bytes);
979 				rte_memcpy(ctx->dma_buf[0],
980 					(void *)(uintptr_t)(src + offset),
981 					dma_tx_bytes);
982 				ret = do_dma(ctx, dst + offset,
983 					DMA_HOST_ADDR(ctx->dma_iova[0]),
984 					dma_tx_bytes, 1, HOST_TO_FPGA, 1);
985 				if (ret)
986 					return ret;
987 				ret = poll_interrupt(ctx);
988 				if (ret)
989 					return ret;
990 			}
991 
992 			count_left -= dma_tx_bytes;
993 			if (count_left) {
994 				IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
995 					count_left);
996 				dst += offset + dma_tx_bytes;
997 				src += offset + dma_tx_bytes;
998 				ret = ase_host_to_fpga(ctx, &dst, &src,
999 					count_left);
1000 			}
1001 		}
1002 	}
1003 
1004 	return ret;
1005 }
1006 
1007 static int dma_rx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
1008 	uint64_t chunk, int is_last_chunk, uint64_t *rx_count, int *wf_issued)
1009 {
1010 	uint64_t i = chunk % NUM_DMA_BUF;
1011 	uint64_t n = *rx_count;
1012 	uint64_t num_pending = 0;
1013 	int ret = 0;
1014 
1015 	if (!ctx || !wf_issued)
1016 		return -EINVAL;
1017 
1018 	ret = do_dma(ctx, DMA_HOST_ADDR(ctx->dma_iova[i]),
1019 		src + chunk * ctx->dma_buf_size,
1020 		ctx->dma_buf_size, 1, FPGA_TO_HOST, 0);
1021 	if (ret)
1022 		return ret;
1023 
1024 	num_pending = chunk - n + 1;
1025 	if (num_pending == HALF_DMA_BUF) {
1026 		ret = issue_magic(ctx);
1027 		if (ret) {
1028 			IFPGA_RAWDEV_PMD_DEBUG("Magic issue failed");
1029 			return ret;
1030 		}
1031 		*wf_issued = 1;
1032 	}
1033 
1034 	if ((num_pending > (NUM_DMA_BUF - 1)) || is_last_chunk) {
1035 		if (*wf_issued) {
1036 			wait_magic(ctx);
1037 			for (i = 0; i < HALF_DMA_BUF; i++) {
1038 				rte_memcpy((void *)(uintptr_t)(dst +
1039 						n * ctx->dma_buf_size),
1040 					ctx->dma_buf[n % NUM_DMA_BUF],
1041 					ctx->dma_buf_size);
1042 				n++;
1043 			}
1044 			*wf_issued = 0;
1045 			*rx_count = n;
1046 		}
1047 		ret = issue_magic(ctx);
1048 		if (ret) {
1049 			IFPGA_RAWDEV_PMD_DEBUG("Magic issue failed");
1050 			return ret;
1051 		}
1052 		*wf_issued = 1;
1053 	}
1054 
1055 	return ret;
1056 }
1057 
1058 static int dma_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
1059 	size_t count)
1060 {
1061 	uint64_t i = 0;
1062 	uint64_t count_left = count;
1063 	uint64_t aligned_addr = 0;
1064 	uint64_t align_bytes = 0;
1065 	uint64_t dma_chunks = 0;
1066 	uint64_t pending_buf = 0;
1067 	uint64_t dma_rx_bytes = 0;
1068 	uint64_t offset = 0;
1069 	int wf_issued = 0;
1070 	int ret = 0;
1071 
1072 	IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
1073 		count);
1074 
1075 	if (!ctx)
1076 		return -EINVAL;
1077 
1078 	if (!IS_DMA_ALIGNED(src)) {
1079 		if (count_left < DMA_ALIGN_BYTES)
1080 			return ase_fpga_to_host(ctx, &src, &dst, count_left);
1081 
1082 		aligned_addr = ((src / DMA_ALIGN_BYTES) + 1)
1083 			 * DMA_ALIGN_BYTES;
1084 		align_bytes = aligned_addr - src;
1085 		ret = ase_fpga_to_host(ctx, &src, &dst, align_bytes);
1086 		if (ret)
1087 			return ret;
1088 		count_left = count_left - align_bytes;
1089 	}
1090 
1091 	if (count_left) {
1092 		dma_chunks = count_left / ctx->dma_buf_size;
1093 		offset = dma_chunks * ctx->dma_buf_size;
1094 		count_left -= offset;
1095 		IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
1096 			" (%"PRIu64"...0x%"PRIx64")",
1097 			src, dst, dma_chunks, count_left);
1098 		for (i = 0; i < dma_chunks; i++) {
1099 			ret = dma_rx_buf(ctx, dst, src, i,
1100 				i == (dma_chunks - 1),
1101 				&pending_buf, &wf_issued);
1102 			if (ret)
1103 				return ret;
1104 		}
1105 
1106 		if (wf_issued)
1107 			wait_magic(ctx);
1108 
1109 		/* clear out final dma memcpy operations */
1110 		while (pending_buf < dma_chunks) {
1111 			/* constant size transfer; no length check required */
1112 			rte_memcpy((void *)(uintptr_t)(dst +
1113 					pending_buf * ctx->dma_buf_size),
1114 				ctx->dma_buf[pending_buf % NUM_DMA_BUF],
1115 				ctx->dma_buf_size);
1116 			pending_buf++;
1117 		}
1118 
1119 		if (count_left > 0) {
1120 			i = count_left / DMA_ALIGN_BYTES;
1121 			if (i > 0) {
1122 				dma_rx_bytes = i * DMA_ALIGN_BYTES;
1123 				IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
1124 					dma_rx_bytes);
1125 				ret = do_dma(ctx,
1126 					DMA_HOST_ADDR(ctx->dma_iova[0]),
1127 					src + offset,
1128 					dma_rx_bytes, 1, FPGA_TO_HOST, 0);
1129 				if (ret)
1130 					return ret;
1131 				ret = issue_magic(ctx);
1132 				if (ret)
1133 					return ret;
1134 				wait_magic(ctx);
1135 				rte_memcpy((void *)(uintptr_t)(dst + offset),
1136 					ctx->dma_buf[0], dma_rx_bytes);
1137 			}
1138 
1139 			count_left -= dma_rx_bytes;
1140 			if (count_left) {
1141 				IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
1142 					count_left);
1143 				dst += offset + dma_rx_bytes;
1144 				src += offset + dma_rx_bytes;
1145 				ret = ase_fpga_to_host(ctx, &src, &dst,
1146 							count_left);
1147 			}
1148 		}
1149 	}
1150 
1151 	return ret;
1152 }
1153 
1154 static int dma_fpga_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
1155 	size_t count)
1156 {
1157 	uint64_t i = 0;
1158 	uint64_t count_left = count;
1159 	uint64_t dma_chunks = 0;
1160 	uint64_t offset = 0;
1161 	uint64_t tx_chunks = 0;
1162 	uint64_t *tmp_buf = NULL;
1163 	int ret = 0;
1164 
1165 	IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
1166 		count);
1167 
1168 	if (!ctx)
1169 		return -EINVAL;
1170 
1171 	if (IS_DMA_ALIGNED(dst) && IS_DMA_ALIGNED(src)
1172 	    && IS_DMA_ALIGNED(count_left)) {
1173 		dma_chunks = count_left / ctx->dma_buf_size;
1174 		offset = dma_chunks * ctx->dma_buf_size;
1175 		count_left -= offset;
1176 		IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
1177 			" (%"PRIu64"...0x%"PRIx64")",
1178 			src, dst, dma_chunks, count_left);
1179 		for (i = 0; i < dma_chunks; i++) {
1180 			ret = do_dma(ctx, dst + i * ctx->dma_buf_size,
1181 				src + i * ctx->dma_buf_size,
1182 				ctx->dma_buf_size, 0, FPGA_TO_FPGA, 0);
1183 			if (ret)
1184 				return ret;
1185 			if ((((i + 1) % NUM_DMA_BUF) == 0) ||
1186 				(i == (dma_chunks - 1))) {
1187 				ret = issue_magic(ctx);
1188 				if (ret)
1189 					return ret;
1190 				wait_magic(ctx);
1191 			}
1192 		}
1193 
1194 		if (count_left > 0) {
1195 			IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to DMA", count_left);
1196 			ret = do_dma(ctx, dst + offset, src + offset,
1197 				count_left, 1, FPGA_TO_FPGA, 0);
1198 			if (ret)
1199 				return ret;
1200 			ret = issue_magic(ctx);
1201 			if (ret)
1202 				return ret;
1203 			wait_magic(ctx);
1204 		}
1205 	} else {
1206 		if ((src < dst) && (src + count_left > dst)) {
1207 			IFPGA_RAWDEV_PMD_ERR("Overlapping: 0x%"PRIx64
1208 				" -> 0x%"PRIx64" (0x%"PRIx64")",
1209 				src, dst, count_left);
1210 			return -EINVAL;
1211 		}
1212 		tx_chunks = count_left / ctx->dma_buf_size;
1213 		offset = tx_chunks * ctx->dma_buf_size;
1214 		count_left -= offset;
1215 		IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64
1216 			" (%"PRIu64"...0x%"PRIx64")",
1217 			src, dst, tx_chunks, count_left);
1218 		tmp_buf = (uint64_t *)rte_malloc(NULL, ctx->dma_buf_size,
1219 			DMA_ALIGN_BYTES);
1220 		for (i = 0; i < tx_chunks; i++) {
1221 			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
1222 				src + i * ctx->dma_buf_size,
1223 				ctx->dma_buf_size);
1224 			if (ret)
1225 				goto free_buf;
1226 			ret = dma_host_to_fpga(ctx,
1227 				dst + i * ctx->dma_buf_size,
1228 				(uint64_t)tmp_buf, ctx->dma_buf_size);
1229 			if (ret)
1230 				goto free_buf;
1231 		}
1232 
1233 		if (count_left > 0) {
1234 			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
1235 				src + offset, count_left);
1236 			if (ret)
1237 				goto free_buf;
1238 			ret = dma_host_to_fpga(ctx, dst + offset,
1239 				(uint64_t)tmp_buf, count_left);
1240 			if (ret)
1241 				goto free_buf;
1242 		}
1243 free_buf:
1244 		rte_free(tmp_buf);
1245 	}
1246 
1247 	return ret;
1248 }
1249 
1250 static int dma_transfer_sync(struct dma_afu_ctx *ctx, uint64_t dst,
1251 	uint64_t src, size_t count, fpga_dma_type type)
1252 {
1253 	int ret = 0;
1254 
1255 	if (!ctx)
1256 		return -EINVAL;
1257 
1258 	if (type == HOST_TO_FPGA)
1259 		ret = dma_host_to_fpga(ctx, dst, src, count);
1260 	else if (type == FPGA_TO_HOST)
1261 		ret = dma_fpga_to_host(ctx, dst, src, count);
1262 	else if (type == FPGA_TO_FPGA)
1263 		ret = dma_fpga_to_fpga(ctx, dst, src, count);
1264 	else
1265 		return -EINVAL;
1266 
1267 	return ret;
1268 }
1269 
1270 static double get_duration(struct timespec start, struct timespec end)
1271 {
1272 	uint64_t diff = 1000000000L * (end.tv_sec - start.tv_sec)
1273 		+ end.tv_nsec - start.tv_nsec;
1274 	return (double)diff / (double)1000000000L;
1275 }
1276 
1277 #define SWEEP_ITERS 1
1278 static int sweep_test(struct dma_afu_ctx *ctx, uint32_t length,
1279 	uint64_t ddr_offset, uint64_t buf_offset, uint64_t size_decrement)
1280 {
1281 	struct timespec start, end;
1282 	uint64_t test_size = 0;
1283 	uint64_t *dma_buf_ptr = NULL;
1284 	double throughput, total_time = 0.0;
1285 	int i = 0;
1286 	int ret = 0;
1287 
1288 	if (!ctx || !ctx->data_buf || !ctx->ref_buf) {
1289 		IFPGA_RAWDEV_PMD_ERR("Buffer for DMA test is not allocated");
1290 		return -EINVAL;
1291 	}
1292 
1293 	if (length < (buf_offset + size_decrement)) {
1294 		IFPGA_RAWDEV_PMD_ERR("Test length does not match unaligned parameter");
1295 		return -EINVAL;
1296 	}
1297 	test_size = length - (buf_offset + size_decrement);
1298 	if ((ddr_offset + test_size) > ctx->mem_size) {
1299 		IFPGA_RAWDEV_PMD_ERR("Test is out of DDR memory space");
1300 		return -EINVAL;
1301 	}
1302 
1303 	dma_buf_ptr = (uint64_t *)((uint8_t *)ctx->data_buf + buf_offset);
1304 	printf("Sweep Host %p to FPGA 0x%"PRIx64
1305 		" with 0x%"PRIx64" bytes ...\n",
1306 		(void *)dma_buf_ptr, ddr_offset, test_size);
1307 
1308 	for (i = 0; i < SWEEP_ITERS; i++) {
1309 		clock_gettime(CLOCK_MONOTONIC, &start);
1310 		ret = dma_transfer_sync(ctx, ddr_offset, (uint64_t)dma_buf_ptr,
1311 			test_size, HOST_TO_FPGA);
1312 		clock_gettime(CLOCK_MONOTONIC, &end);
1313 		if (ret) {
1314 			IFPGA_RAWDEV_PMD_ERR("Failed");
1315 			return ret;
1316 		}
1317 		total_time += get_duration(start, end);
1318 	}
1319 	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
1320 	printf("Measured bandwidth = %lf MB/s\n", throughput);
1321 
1322 	printf("Sweep FPGA 0x%"PRIx64" to Host %p with 0x%"PRIx64" bytes ...\n",
1323 		ddr_offset, (void *)dma_buf_ptr, test_size);
1324 
1325 	total_time = 0.0;
1326 	memset((char *)dma_buf_ptr, 0, test_size);
1327 	for (i = 0; i < SWEEP_ITERS; i++) {
1328 		clock_gettime(CLOCK_MONOTONIC, &start);
1329 		ret = dma_transfer_sync(ctx, (uint64_t)dma_buf_ptr, ddr_offset,
1330 			test_size, FPGA_TO_HOST);
1331 		clock_gettime(CLOCK_MONOTONIC, &end);
1332 		if (ret) {
1333 			IFPGA_RAWDEV_PMD_ERR("Failed");
1334 			return ret;
1335 		}
1336 		total_time += get_duration(start, end);
1337 	}
1338 	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
1339 	printf("Measured bandwidth = %lf MB/s\n", throughput);
1340 
1341 	printf("Verifying buffer ...\n");
1342 	return dma_afu_buf_verify(ctx, test_size);
1343 }
1344 
1345 static int dma_afu_test(struct afu_rawdev *dev)
1346 {
1347 	struct n3000_afu_priv *priv = NULL;
1348 	struct dma_afu_ctx *ctx = NULL;
1349 	struct rte_pmd_afu_dma_cfg *cfg = NULL;
1350 	msgdma_ctrl ctrl;
1351 	uint64_t offset = 0;
1352 	uint32_t i = 0;
1353 	int ret = 0;
1354 
1355 	if (!dev)
1356 		return -EINVAL;
1357 
1358 	if (!dev->priv)
1359 		return -ENOENT;
1360 
1361 	priv = (struct n3000_afu_priv *)dev->priv;
1362 	cfg = &priv->dma_cfg;
1363 	if (cfg->index >= NUM_N3000_DMA)
1364 		return -EINVAL;
1365 	ctx = &priv->dma_ctx[cfg->index];
1366 
1367 	ctx->pattern = (int)cfg->pattern;
1368 	ctx->verbose = (int)cfg->verbose;
1369 	ctx->dma_buf_size = cfg->size;
1370 
1371 	ret = dma_afu_buf_alloc(ctx, cfg);
1372 	if (ret)
1373 		goto free;
1374 
1375 	printf("Initialize test buffer\n");
1376 	dma_afu_buf_init(ctx, cfg->length);
1377 
1378 	/* enable interrupt */
1379 	ctrl.csr = 0;
1380 	ctrl.global_intr_en_mask = 1;
1381 	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
1382 
1383 	printf("Host %p to FPGA 0x%x with 0x%x bytes\n", ctx->data_buf,
1384 		cfg->offset, cfg->length);
1385 	ret = dma_transfer_sync(ctx, cfg->offset, (uint64_t)ctx->data_buf,
1386 		cfg->length, HOST_TO_FPGA);
1387 	if (ret) {
1388 		IFPGA_RAWDEV_PMD_ERR("Failed to transfer data from host to FPGA");
1389 		goto end;
1390 	}
1391 	memset(ctx->data_buf, 0, cfg->length);
1392 
1393 	printf("FPGA 0x%x to Host %p with 0x%x bytes\n", cfg->offset,
1394 		ctx->data_buf, cfg->length);
1395 	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, cfg->offset,
1396 		cfg->length, FPGA_TO_HOST);
1397 	if (ret) {
1398 		IFPGA_RAWDEV_PMD_ERR("Failed to transfer data from FPGA to host");
1399 		goto end;
1400 	}
1401 	ret = dma_afu_buf_verify(ctx, cfg->length);
1402 	if (ret)
1403 		goto end;
1404 
1405 	if ((cfg->offset + cfg->length * 2) <= ctx->mem_size)
1406 		offset = cfg->offset + cfg->length;
1407 	else if (cfg->offset > cfg->length)
1408 		offset = 0;
1409 	else
1410 		goto end;
1411 
1412 	printf("FPGA 0x%x to FPGA 0x%"PRIx64" with 0x%x bytes\n",
1413 		cfg->offset, offset, cfg->length);
1414 	ret = dma_transfer_sync(ctx, offset, cfg->offset, cfg->length,
1415 		FPGA_TO_FPGA);
1416 	if (ret) {
1417 		IFPGA_RAWDEV_PMD_ERR("Failed to transfer data from FPGA to FPGA");
1418 		goto end;
1419 	}
1420 
1421 	printf("FPGA 0x%"PRIx64" to Host %p with 0x%x bytes\n", offset,
1422 		ctx->data_buf, cfg->length);
1423 	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, offset,
1424 		cfg->length, FPGA_TO_HOST);
1425 	if (ret) {
1426 		IFPGA_RAWDEV_PMD_ERR("Failed to transfer data from FPGA to host");
1427 		goto end;
1428 	}
1429 	ret = dma_afu_buf_verify(ctx, cfg->length);
1430 	if (ret)
1431 		goto end;
1432 
1433 	printf("Sweep with aligned address and size\n");
1434 	ret = sweep_test(ctx, cfg->length, cfg->offset, 0, 0);
1435 	if (ret)
1436 		goto end;
1437 
1438 	if (cfg->unaligned) {
1439 		printf("Sweep with unaligned address and size\n");
1440 		struct unaligned_set {
1441 			uint64_t addr_offset;
1442 			uint64_t size_dec;
1443 		} param[] = {{61, 5}, {3, 0}, {7, 3}, {0, 3}, {0, 61}, {0, 7}};
1444 		for (i = 0; i < ARRAY_SIZE(param); i++) {
1445 			ret = sweep_test(ctx, cfg->length, cfg->offset,
1446 				param[i].addr_offset, param[i].size_dec);
1447 			if (ret)
1448 				break;
1449 		}
1450 	}
1451 
1452 end:
1453 	/* disable interrupt */
1454 	ctrl.global_intr_en_mask = 0;
1455 	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
1456 
1457 free:
1458 	dma_afu_buf_free(ctx);
1459 	return ret;
1460 }
1461 
1462 static struct rte_pci_device *n3000_afu_get_pci_dev(struct afu_rawdev *dev)
1463 {
1464 	struct rte_afu_device *afudev = NULL;
1465 
1466 	if (!dev || !dev->rawdev || !dev->rawdev->device)
1467 		return NULL;
1468 
1469 	afudev = RTE_DEV_TO_AFU(dev->rawdev->device);
1470 	if (!afudev->rawdev || !afudev->rawdev->device)
1471 		return NULL;
1472 
1473 	return RTE_DEV_TO_PCI(afudev->rawdev->device);
1474 }
1475 
1476 #ifdef VFIO_PRESENT
1477 static int dma_afu_set_irqs(struct afu_rawdev *dev, uint32_t vec_start,
1478 	uint32_t count, int *efds)
1479 {
1480 	struct rte_pci_device *pci_dev = NULL;
1481 	struct vfio_irq_set *irq_set = NULL;
1482 	int vfio_dev_fd = 0;
1483 	size_t sz = 0;
1484 	int ret = 0;
1485 
1486 	if (!dev || !efds || (count == 0) || (count > MAX_MSIX_VEC))
1487 		return -EINVAL;
1488 
1489 	pci_dev = n3000_afu_get_pci_dev(dev);
1490 	if (!pci_dev)
1491 		return -ENODEV;
1492 	vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle);
1493 
1494 	sz = sizeof(*irq_set) + sizeof(*efds) * count;
1495 	irq_set = rte_zmalloc(NULL, sz, 0);
1496 	if (!irq_set)
1497 		return -ENOMEM;
1498 
1499 	irq_set->argsz = (uint32_t)sz;
1500 	irq_set->count = count;
1501 	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
1502 		VFIO_IRQ_SET_ACTION_TRIGGER;
1503 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
1504 	irq_set->start = vec_start;
1505 
1506 	rte_memcpy(&irq_set->data, efds, sizeof(*efds) * count);
1507 	ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
1508 	if (ret)
1509 		IFPGA_RAWDEV_PMD_ERR("Error enabling MSI-X interrupts\n");
1510 
1511 	rte_free(irq_set);
1512 	return ret;
1513 }
1514 #endif
1515 
1516 static void *n3000_afu_get_port_addr(struct afu_rawdev *dev)
1517 {
1518 	struct rte_pci_device *pci_dev = NULL;
1519 	uint8_t *addr = NULL;
1520 	uint64_t val = 0;
1521 	uint32_t bar = 0;
1522 
1523 	pci_dev = n3000_afu_get_pci_dev(dev);
1524 	if (!pci_dev)
1525 		return NULL;
1526 
1527 	addr = (uint8_t *)pci_dev->mem_resource[0].addr;
1528 	val = rte_read64(addr + PORT_ATTR_REG(dev->port));
1529 	if (!PORT_IMPLEMENTED(val)) {
1530 		IFPGA_RAWDEV_PMD_INFO("FIU port %d is not implemented", dev->port);
1531 		return NULL;
1532 	}
1533 
1534 	bar = PORT_BAR(val);
1535 	if (bar >= PCI_MAX_RESOURCE) {
1536 		IFPGA_RAWDEV_PMD_ERR("BAR index %u is out of limit", bar);
1537 		return NULL;
1538 	}
1539 
1540 	addr = (uint8_t *)pci_dev->mem_resource[bar].addr + PORT_OFFSET(val);
1541 	return addr;
1542 }
1543 
1544 static int n3000_afu_get_irq_capability(struct afu_rawdev *dev,
1545 	uint32_t *vec_start, uint32_t *vec_count)
1546 {
1547 	uint8_t *addr = NULL;
1548 	uint64_t val = 0;
1549 	uint64_t header = 0;
1550 	uint64_t next_offset = 0;
1551 
1552 	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
1553 	if (!addr)
1554 		return -ENOENT;
1555 
1556 	do {
1557 		addr += next_offset;
1558 		header = rte_read64(addr);
1559 		if ((DFH_TYPE(header) == DFH_TYPE_PRIVATE) &&
1560 			(DFH_FEATURE_ID(header) == PORT_FEATURE_UINT_ID)) {
1561 			val = rte_read64(addr + PORT_UINT_CAP_REG);
1562 			if (vec_start)
1563 				*vec_start = PORT_VEC_START(val);
1564 			if (vec_count)
1565 				*vec_count = PORT_VEC_COUNT(val);
1566 			return 0;
1567 		}
1568 		next_offset = DFH_NEXT_OFFSET(header);
1569 		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
1570 			break;
1571 	} while (!DFH_EOL(header));
1572 
1573 	return -ENOENT;
1574 }
1575 
1576 static int nlb_afu_ctx_release(struct afu_rawdev *dev)
1577 {
1578 	struct n3000_afu_priv *priv = NULL;
1579 	struct nlb_afu_ctx *ctx = NULL;
1580 
1581 	if (!dev)
1582 		return -EINVAL;
1583 
1584 	priv = (struct n3000_afu_priv *)dev->priv;
1585 	if (!priv)
1586 		return -ENOENT;
1587 
1588 	ctx = &priv->nlb_ctx;
1589 
1590 	rte_free(ctx->dsm_ptr);
1591 	ctx->dsm_ptr = NULL;
1592 	ctx->status_ptr = NULL;
1593 
1594 	rte_free(ctx->src_ptr);
1595 	ctx->src_ptr = NULL;
1596 
1597 	rte_free(ctx->dest_ptr);
1598 	ctx->dest_ptr = NULL;
1599 
1600 	return 0;
1601 }
1602 
1603 static int nlb_afu_ctx_init(struct afu_rawdev *dev, uint8_t *addr)
1604 {
1605 	struct n3000_afu_priv *priv = NULL;
1606 	struct nlb_afu_ctx *ctx = NULL;
1607 	int ret = 0;
1608 
1609 	if (!dev || !addr)
1610 		return -EINVAL;
1611 
1612 	priv = (struct n3000_afu_priv *)dev->priv;
1613 	if (!priv)
1614 		return -ENOENT;
1615 
1616 	ctx = &priv->nlb_ctx;
1617 	ctx->addr = addr;
1618 
1619 	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
1620 	if (!ctx->dsm_ptr)
1621 		return -ENOMEM;
1622 
1623 	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
1624 	if (ctx->dsm_iova == RTE_BAD_IOVA) {
1625 		ret = -ENOMEM;
1626 		goto release_dsm;
1627 	}
1628 
1629 	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
1630 		TEST_MEM_ALIGN);
1631 	if (!ctx->src_ptr) {
1632 		ret = -ENOMEM;
1633 		goto release_dsm;
1634 	}
1635 	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
1636 	if (ctx->src_iova == RTE_BAD_IOVA) {
1637 		ret = -ENOMEM;
1638 		goto release_src;
1639 	}
1640 
1641 	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
1642 		TEST_MEM_ALIGN);
1643 	if (!ctx->dest_ptr) {
1644 		ret = -ENOMEM;
1645 		goto release_src;
1646 	}
1647 	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
1648 	if (ctx->dest_iova == RTE_BAD_IOVA) {
1649 		ret = -ENOMEM;
1650 		goto release_dest;
1651 	}
1652 
1653 	ctx->status_ptr = (struct nlb_dsm_status *)(ctx->dsm_ptr + DSM_STATUS);
1654 	return 0;
1655 
1656 release_dest:
1657 	rte_free(ctx->dest_ptr);
1658 	ctx->dest_ptr = NULL;
1659 release_src:
1660 	rte_free(ctx->src_ptr);
1661 	ctx->src_ptr = NULL;
1662 release_dsm:
1663 	rte_free(ctx->dsm_ptr);
1664 	ctx->dsm_ptr = NULL;
1665 	return ret;
1666 }
1667 
1668 static int dma_afu_ctx_release(struct afu_rawdev *dev)
1669 {
1670 	struct n3000_afu_priv *priv = NULL;
1671 	struct dma_afu_ctx *ctx = NULL;
1672 
1673 	if (!dev)
1674 		return -EINVAL;
1675 
1676 	priv = (struct n3000_afu_priv *)dev->priv;
1677 	if (!priv)
1678 		return -ENOENT;
1679 
1680 	ctx = &priv->dma_ctx[0];
1681 
1682 	rte_free(ctx->desc_buf);
1683 	ctx->desc_buf = NULL;
1684 
1685 	rte_free(ctx->magic_buf);
1686 	ctx->magic_buf = NULL;
1687 
1688 	close(ctx->event_fd);
1689 	return 0;
1690 }
1691 
1692 static int dma_afu_ctx_init(struct afu_rawdev *dev, int index, uint8_t *addr)
1693 {
1694 	struct n3000_afu_priv *priv = NULL;
1695 	struct dma_afu_ctx *ctx = NULL;
1696 	uint64_t mem_sz[] = {0x100000000, 0x100000000, 0x40000000, 0x1000000};
1697 	static int efds[1] = {0};
1698 	uint32_t vec_start = 0;
1699 	int ret = 0;
1700 
1701 	if (!dev || (index < 0) || (index >= NUM_N3000_DMA) || !addr)
1702 		return -EINVAL;
1703 
1704 	priv = (struct n3000_afu_priv *)dev->priv;
1705 	if (!priv)
1706 		return -ENOENT;
1707 
1708 	ctx = &priv->dma_ctx[index];
1709 	ctx->index = index;
1710 	ctx->addr = addr;
1711 	ctx->csr_addr = addr + DMA_CSR;
1712 	ctx->desc_addr = addr + DMA_DESC;
1713 	ctx->ase_ctrl_addr = addr + DMA_ASE_CTRL;
1714 	ctx->ase_data_addr = addr + DMA_ASE_DATA;
1715 	ctx->mem_size = mem_sz[ctx->index];
1716 	ctx->cur_ase_page = INVALID_ASE_PAGE;
1717 	if (ctx->index == 0) {
1718 		ret = n3000_afu_get_irq_capability(dev, &vec_start, NULL);
1719 		if (ret)
1720 			return ret;
1721 
1722 		efds[0] = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1723 		if (efds[0] < 0) {
1724 			IFPGA_RAWDEV_PMD_ERR("eventfd create failed");
1725 			return -EBADF;
1726 		}
1727 #ifdef VFIO_PRESENT
1728 		if (dma_afu_set_irqs(dev, vec_start, 1, efds))
1729 			IFPGA_RAWDEV_PMD_ERR("DMA interrupt setup failed");
1730 #endif
1731 	}
1732 	ctx->event_fd = efds[0];
1733 
1734 	ctx->desc_buf = (msgdma_ext_desc *)rte_zmalloc(NULL,
1735 		sizeof(msgdma_ext_desc), DMA_ALIGN_BYTES);
1736 	if (!ctx->desc_buf) {
1737 		ret = -ENOMEM;
1738 		goto release;
1739 	}
1740 
1741 	ctx->magic_buf = (uint64_t *)rte_zmalloc(NULL, MAGIC_BUF_SIZE,
1742 		TEST_MEM_ALIGN);
1743 	if (!ctx->magic_buf) {
1744 		ret = -ENOMEM;
1745 		goto release;
1746 	}
1747 	ctx->magic_iova = rte_malloc_virt2iova(ctx->magic_buf);
1748 	if (ctx->magic_iova == RTE_BAD_IOVA) {
1749 		ret = -ENOMEM;
1750 		goto release;
1751 	}
1752 
1753 	return 0;
1754 
1755 release:
1756 	dma_afu_ctx_release(dev);
1757 	return ret;
1758 }
1759 
1760 static int n3000_afu_ctx_init(struct afu_rawdev *dev)
1761 {
1762 	struct n3000_afu_priv *priv = NULL;
1763 	uint8_t *addr = NULL;
1764 	uint64_t header = 0;
1765 	uint64_t uuid_hi = 0;
1766 	uint64_t uuid_lo = 0;
1767 	uint64_t next_offset = 0;
1768 	int ret = 0;
1769 
1770 	if (!dev)
1771 		return -EINVAL;
1772 
1773 	priv = (struct n3000_afu_priv *)dev->priv;
1774 	if (!priv)
1775 		return -ENOENT;
1776 
1777 	addr = (uint8_t *)dev->addr;
1778 	do {
1779 		addr += next_offset;
1780 		header = rte_read64(addr);
1781 		uuid_lo = rte_read64(addr + DFH_UUID_L_OFFSET);
1782 		uuid_hi = rte_read64(addr + DFH_UUID_H_OFFSET);
1783 
1784 		if ((DFH_TYPE(header) == DFH_TYPE_AFU) &&
1785 			(uuid_lo == N3000_NLB0_UUID_L) &&
1786 			(uuid_hi == N3000_NLB0_UUID_H)) {
1787 			IFPGA_RAWDEV_PMD_INFO("AFU NLB0 found @ %p", (void *)addr);
1788 			ret = nlb_afu_ctx_init(dev, addr);
1789 			if (ret)
1790 				return ret;
1791 		} else if ((DFH_TYPE(header) == DFH_TYPE_BBB) &&
1792 			(uuid_lo == N3000_DMA_UUID_L) &&
1793 			(uuid_hi == N3000_DMA_UUID_H) &&
1794 			(priv->num_dma < NUM_N3000_DMA)) {
1795 			IFPGA_RAWDEV_PMD_INFO("AFU DMA%d found @ %p",
1796 				priv->num_dma, (void *)addr);
1797 			ret = dma_afu_ctx_init(dev, priv->num_dma, addr);
1798 			if (ret)
1799 				return ret;
1800 			priv->num_dma++;
1801 		} else {
1802 			IFPGA_RAWDEV_PMD_DEBUG("DFH: type %"PRIu64
1803 				", uuid %016"PRIx64"%016"PRIx64,
1804 				DFH_TYPE(header), uuid_hi, uuid_lo);
1805 		}
1806 
1807 		next_offset = DFH_NEXT_OFFSET(header);
1808 		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
1809 			break;
1810 	} while (!DFH_EOL(header));
1811 
1812 	return 0;
1813 }
1814 
1815 static int n3000_afu_init(struct afu_rawdev *dev)
1816 {
1817 	if (!dev)
1818 		return -EINVAL;
1819 
1820 	if (!dev->priv) {
1821 		dev->priv = rte_zmalloc(NULL, sizeof(struct n3000_afu_priv), 0);
1822 		if (!dev->priv)
1823 			return -ENOMEM;
1824 	}
1825 
1826 	return n3000_afu_ctx_init(dev);
1827 }
1828 
1829 static int n3000_afu_config(struct afu_rawdev *dev, void *config,
1830 	size_t config_size)
1831 {
1832 	struct n3000_afu_priv *priv = NULL;
1833 	struct rte_pmd_afu_n3000_cfg *cfg = NULL;
1834 	int i = 0;
1835 	uint64_t top = 0;
1836 
1837 	if (!dev || !config || !config_size)
1838 		return -EINVAL;
1839 
1840 	priv = (struct n3000_afu_priv *)dev->priv;
1841 	if (!priv)
1842 		return -ENOENT;
1843 
1844 	if (config_size != sizeof(struct rte_pmd_afu_n3000_cfg))
1845 		return -EINVAL;
1846 
1847 	cfg = (struct rte_pmd_afu_n3000_cfg *)config;
1848 	if (cfg->type == RTE_PMD_AFU_N3000_NLB) {
1849 		if (cfg->nlb_cfg.mode != NLB_MODE_LPBK)
1850 			return -EINVAL;
1851 		if ((cfg->nlb_cfg.read_vc > NLB_VC_RANDOM) ||
1852 			(cfg->nlb_cfg.write_vc > NLB_VC_RANDOM))
1853 			return -EINVAL;
1854 		if (cfg->nlb_cfg.wrfence_vc > NLB_VC_VH1)
1855 			return -EINVAL;
1856 		if (cfg->nlb_cfg.cache_hint > NLB_RDLINE_MIXED)
1857 			return -EINVAL;
1858 		if (cfg->nlb_cfg.cache_policy > NLB_WRPUSH_I)
1859 			return -EINVAL;
1860 		if ((cfg->nlb_cfg.multi_cl != 1) &&
1861 			(cfg->nlb_cfg.multi_cl != 2) &&
1862 			(cfg->nlb_cfg.multi_cl != 4))
1863 			return -EINVAL;
1864 		if ((cfg->nlb_cfg.begin < MIN_CACHE_LINES) ||
1865 			(cfg->nlb_cfg.begin > MAX_CACHE_LINES))
1866 			return -EINVAL;
1867 		if ((cfg->nlb_cfg.end < cfg->nlb_cfg.begin) ||
1868 			(cfg->nlb_cfg.end > MAX_CACHE_LINES))
1869 			return -EINVAL;
1870 		rte_memcpy(&priv->nlb_cfg, &cfg->nlb_cfg,
1871 			sizeof(struct rte_pmd_afu_nlb_cfg));
1872 	} else if (cfg->type == RTE_PMD_AFU_N3000_DMA) {
1873 		if (cfg->dma_cfg.index >= NUM_N3000_DMA)
1874 			return -EINVAL;
1875 		i = cfg->dma_cfg.index;
1876 		if (cfg->dma_cfg.length > priv->dma_ctx[i].mem_size)
1877 			return -EINVAL;
1878 		if (cfg->dma_cfg.offset >= priv->dma_ctx[i].mem_size)
1879 			return -EINVAL;
1880 		top = cfg->dma_cfg.length + cfg->dma_cfg.offset;
1881 		if ((top == 0) || (top > priv->dma_ctx[i].mem_size))
1882 			return -EINVAL;
1883 		if (i == 3) {  /* QDR connected to DMA3 */
1884 			if (cfg->dma_cfg.length & 0x3f) {
1885 				cfg->dma_cfg.length &= ~0x3f;
1886 				IFPGA_RAWDEV_PMD_INFO("Round size to %x for QDR",
1887 					cfg->dma_cfg.length);
1888 			}
1889 		}
1890 		rte_memcpy(&priv->dma_cfg, &cfg->dma_cfg,
1891 			sizeof(struct rte_pmd_afu_dma_cfg));
1892 	} else {
1893 		IFPGA_RAWDEV_PMD_ERR("Invalid type of N3000 AFU");
1894 		return -EINVAL;
1895 	}
1896 
1897 	priv->cfg_type = cfg->type;
1898 	return 0;
1899 }
1900 
1901 static int n3000_afu_test(struct afu_rawdev *dev)
1902 {
1903 	struct n3000_afu_priv *priv = NULL;
1904 	int ret = 0;
1905 
1906 	if (!dev)
1907 		return -EINVAL;
1908 
1909 	if (!dev->priv)
1910 		return -ENOENT;
1911 
1912 	priv = (struct n3000_afu_priv *)dev->priv;
1913 
1914 	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
1915 		IFPGA_RAWDEV_PMD_INFO("Test NLB");
1916 		ret = nlb_afu_test(dev);
1917 	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
1918 		IFPGA_RAWDEV_PMD_INFO("Test DMA%u", priv->dma_cfg.index);
1919 		ret = dma_afu_test(dev);
1920 	} else {
1921 		IFPGA_RAWDEV_PMD_ERR("Please configure AFU before test");
1922 		ret = -EINVAL;
1923 	}
1924 
1925 	return ret;
1926 }
1927 
1928 static int n3000_afu_close(struct afu_rawdev *dev)
1929 {
1930 	if (!dev)
1931 		return -EINVAL;
1932 
1933 	nlb_afu_ctx_release(dev);
1934 	dma_afu_ctx_release(dev);
1935 
1936 	rte_free(dev->priv);
1937 	dev->priv = NULL;
1938 
1939 	return 0;
1940 }
1941 
1942 static int n3000_afu_dump(struct afu_rawdev *dev, FILE *f)
1943 {
1944 	struct n3000_afu_priv *priv = NULL;
1945 
1946 	if (!dev)
1947 		return -EINVAL;
1948 
1949 	priv = (struct n3000_afu_priv *)dev->priv;
1950 	if (!priv)
1951 		return -ENOENT;
1952 
1953 	if (!f)
1954 		f = stdout;
1955 
1956 	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
1957 		struct nlb_afu_ctx *ctx = &priv->nlb_ctx;
1958 		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
1959 		fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
1960 		fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
1961 		fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
1962 		fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
1963 		fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
1964 		fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
1965 		fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
1966 	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
1967 		struct dma_afu_ctx *ctx = &priv->dma_ctx[priv->dma_cfg.index];
1968 		fprintf(f, "index:\t\t%d\n", ctx->index);
1969 		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
1970 		fprintf(f, "csr_addr:\t%p\n", (void *)ctx->csr_addr);
1971 		fprintf(f, "desc_addr:\t%p\n", (void *)ctx->desc_addr);
1972 		fprintf(f, "ase_ctrl_addr:\t%p\n", (void *)ctx->ase_ctrl_addr);
1973 		fprintf(f, "ase_data_addr:\t%p\n", (void *)ctx->ase_data_addr);
1974 		fprintf(f, "desc_buf:\t%p\n", (void *)ctx->desc_buf);
1975 		fprintf(f, "magic_buf:\t%p\n", (void *)ctx->magic_buf);
1976 		fprintf(f, "magic_iova:\t0x%"PRIx64"\n", ctx->magic_iova);
1977 	} else {
1978 		return -EINVAL;
1979 	}
1980 
1981 	return 0;
1982 }
1983 
1984 static int n3000_afu_reset(struct afu_rawdev *dev)
1985 {
1986 	uint8_t *addr = NULL;
1987 	uint64_t val = 0;
1988 
1989 	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
1990 	if (!addr)
1991 		return -ENOENT;
1992 
1993 	val = rte_read64(addr + PORT_CTRL_REG);
1994 	val |= PORT_SOFT_RESET;
1995 	rte_write64(val, addr + PORT_CTRL_REG);
1996 	rte_delay_us(100);
1997 	val &= ~PORT_SOFT_RESET;
1998 	rte_write64(val, addr + PORT_CTRL_REG);
1999 
2000 	return 0;
2001 }
2002 
2003 static struct afu_ops n3000_afu_ops = {
2004 	.init = n3000_afu_init,
2005 	.config = n3000_afu_config,
2006 	.start = NULL,
2007 	.stop = NULL,
2008 	.test = n3000_afu_test,
2009 	.close = n3000_afu_close,
2010 	.dump = n3000_afu_dump,
2011 	.reset = n3000_afu_reset
2012 };
2013 
2014 static struct afu_rawdev_drv n3000_afu_drv = {
2015 	.uuid = { N3000_AFU_UUID_L, N3000_AFU_UUID_H },
2016 	.ops = &n3000_afu_ops
2017 };
2018 
2019 AFU_PMD_REGISTER(n3000_afu_drv);
2020