xref: /dpdk/drivers/dma/ioat/ioat_dmadev.c (revision f665790a5dbad7b645ff46f31d65e977324e7bfc)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4 
5 #include <bus_pci_driver.h>
6 #include <rte_dmadev_pmd.h>
7 #include <rte_malloc.h>
8 #include <rte_prefetch.h>
9 #include <rte_errno.h>
10 
11 #include "ioat_internal.h"
12 
13 static struct rte_pci_driver ioat_pmd_drv;
14 
15 RTE_LOG_REGISTER_DEFAULT(ioat_pmd_logtype, INFO);
16 
17 #define DESC_SZ sizeof(struct ioat_dma_hw_desc)
18 
19 #define IOAT_PMD_NAME dmadev_ioat
20 #define IOAT_PMD_NAME_STR RTE_STR(IOAT_PMD_NAME)
21 
22 /* IOAT operations. */
23 enum rte_ioat_ops {
24 	ioat_op_copy = 0,	/* Standard DMA Operation */
25 	ioat_op_fill		/* Block Fill */
26 };
27 
28 /* Configure a device. */
29 static int
30 ioat_dev_configure(struct rte_dma_dev *dev __rte_unused, const struct rte_dma_conf *dev_conf,
31 		uint32_t conf_sz)
32 {
33 	if (sizeof(struct rte_dma_conf) != conf_sz)
34 		return -EINVAL;
35 
36 	if (dev_conf->nb_vchans != 1)
37 		return -EINVAL;
38 
39 	return 0;
40 }
41 
42 /* Setup a virtual channel for IOAT, only 1 vchan is supported. */
43 static int
44 ioat_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
45 		const struct rte_dma_vchan_conf *qconf, uint32_t qconf_sz)
46 {
47 	struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
48 	uint16_t max_desc = qconf->nb_desc;
49 	int i;
50 
51 	if (sizeof(struct rte_dma_vchan_conf) != qconf_sz)
52 		return -EINVAL;
53 
54 	ioat->qcfg = *qconf;
55 
56 	if (!rte_is_power_of_2(max_desc)) {
57 		max_desc = rte_align32pow2(max_desc);
58 		IOAT_PMD_DEBUG("DMA dev %u using %u descriptors", dev->data->dev_id, max_desc);
59 		ioat->qcfg.nb_desc = max_desc;
60 	}
61 
62 	/* In case we are reconfiguring a device, free any existing memory. */
63 	rte_free(ioat->desc_ring);
64 
65 	ioat->desc_ring = rte_zmalloc(NULL, sizeof(*ioat->desc_ring) * max_desc, 0);
66 	if (ioat->desc_ring == NULL)
67 		return -ENOMEM;
68 
69 	ioat->ring_addr = rte_mem_virt2iova(ioat->desc_ring);
70 
71 	ioat->status_addr = rte_mem_virt2iova(ioat) + offsetof(struct ioat_dmadev, status);
72 
73 	/* Ensure all counters are reset, if reconfiguring/restarting device. */
74 	ioat->next_read = 0;
75 	ioat->next_write = 0;
76 	ioat->last_write = 0;
77 	ioat->offset = 0;
78 	ioat->failure = 0;
79 
80 	/* Reset Stats. */
81 	ioat->stats = (struct rte_dma_stats){0};
82 
83 	/* Configure descriptor ring - each one points to next. */
84 	for (i = 0; i < ioat->qcfg.nb_desc; i++) {
85 		ioat->desc_ring[i].next = ioat->ring_addr +
86 				(((i + 1) % ioat->qcfg.nb_desc) * DESC_SZ);
87 	}
88 
89 	return 0;
90 }
91 
92 /* Recover IOAT device. */
93 static inline int
94 __ioat_recover(struct ioat_dmadev *ioat)
95 {
96 	uint32_t chanerr, retry = 0;
97 	uint16_t mask = ioat->qcfg.nb_desc - 1;
98 
99 	/* Clear any channel errors. Reading and writing to chanerr does this. */
100 	chanerr = ioat->regs->chanerr;
101 	ioat->regs->chanerr = chanerr;
102 
103 	/* Reset Channel. */
104 	ioat->regs->chancmd = IOAT_CHANCMD_RESET;
105 
106 	/* Write new chain address to trigger state change. */
107 	ioat->regs->chainaddr = ioat->desc_ring[(ioat->next_read - 1) & mask].next;
108 	/* Ensure channel control and status addr are correct. */
109 	ioat->regs->chanctrl = IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
110 			IOAT_CHANCTRL_ERR_COMPLETION_EN;
111 	ioat->regs->chancmp = ioat->status_addr;
112 
113 	/* Allow HW time to move to the ARMED state. */
114 	do {
115 		rte_pause();
116 		retry++;
117 	} while (ioat->regs->chansts != IOAT_CHANSTS_ARMED && retry < 200);
118 
119 	/* Exit as failure if device is still HALTED. */
120 	if (ioat->regs->chansts != IOAT_CHANSTS_ARMED)
121 		return -1;
122 
123 	/* Store next write as offset as recover will move HW and SW ring out of sync. */
124 	ioat->offset = ioat->next_read;
125 
126 	/* Prime status register with previous address. */
127 	ioat->status = ioat->desc_ring[(ioat->next_read - 2) & mask].next;
128 
129 	return 0;
130 }
131 
132 /* Start a configured device. */
133 static int
134 ioat_dev_start(struct rte_dma_dev *dev)
135 {
136 	struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
137 
138 	if (ioat->qcfg.nb_desc == 0 || ioat->desc_ring == NULL)
139 		return -EBUSY;
140 
141 	/* Inform hardware of where the descriptor ring is. */
142 	ioat->regs->chainaddr = ioat->ring_addr;
143 	/* Inform hardware of where to write the status/completions. */
144 	ioat->regs->chancmp = ioat->status_addr;
145 	/* Ensure channel control is set to abort on error, so we get status writeback. */
146 	ioat->regs->chanctrl = IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
147 			IOAT_CHANCTRL_ERR_COMPLETION_EN;
148 
149 	/* Prime the status register to be set to the last element. */
150 	ioat->status = ioat->ring_addr + ((ioat->qcfg.nb_desc - 1) * DESC_SZ);
151 
152 	/* reset all counters */
153 	ioat->next_read = 0;
154 	ioat->next_write = 0;
155 	ioat->last_write = 0;
156 	ioat->offset = 0;
157 	ioat->failure = 0;
158 
159 	IOAT_PMD_DEBUG("channel status - %s [0x%"PRIx64"]",
160 			chansts_readable[ioat->status & IOAT_CHANSTS_STATUS],
161 			ioat->status);
162 
163 	if ((ioat->regs->chansts & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED) {
164 		IOAT_PMD_WARN("Device HALTED on start, attempting to recover");
165 		if (__ioat_recover(ioat) != 0) {
166 			IOAT_PMD_ERR("Device couldn't be recovered");
167 			return -1;
168 		}
169 	}
170 
171 	return 0;
172 }
173 
174 /* Stop a configured device. */
175 static int
176 ioat_dev_stop(struct rte_dma_dev *dev)
177 {
178 	struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
179 	unsigned int chansts;
180 	uint32_t retry = 0;
181 
182 	chansts = (unsigned int)(ioat->regs->chansts & IOAT_CHANSTS_STATUS);
183 	if (chansts == IOAT_CHANSTS_ACTIVE || chansts == IOAT_CHANSTS_IDLE)
184 		ioat->regs->chancmd = IOAT_CHANCMD_SUSPEND;
185 	else
186 		ioat->regs->chancmd = IOAT_CHANCMD_RESET;
187 
188 	do {
189 		rte_pause();
190 		retry++;
191 		chansts = (unsigned int)(ioat->regs->chansts & IOAT_CHANSTS_STATUS);
192 	} while (chansts != IOAT_CHANSTS_SUSPENDED &&
193 			chansts != IOAT_CHANSTS_HALTED && retry < 200);
194 
195 	if (chansts == IOAT_CHANSTS_SUSPENDED || chansts == IOAT_CHANSTS_HALTED)
196 		return 0;
197 
198 	IOAT_PMD_WARN("Channel could not be suspended on stop. (chansts = %u [%s])",
199 			chansts, chansts_readable[chansts]);
200 	return -1;
201 }
202 
203 /* Get device information of a device. */
204 static int
205 ioat_dev_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *info, uint32_t size)
206 {
207 	struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
208 	if (size < sizeof(*info))
209 		return -EINVAL;
210 	info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
211 			RTE_DMA_CAPA_OPS_COPY |
212 			RTE_DMA_CAPA_OPS_FILL;
213 	if (ioat->version >= IOAT_VER_3_4)
214 		info->dev_capa |= RTE_DMA_CAPA_HANDLES_ERRORS;
215 	info->max_vchans = 1;
216 	info->min_desc = 32;
217 	info->max_desc = 4096;
218 	return 0;
219 }
220 
221 /* Close a configured device. */
222 static int
223 ioat_dev_close(struct rte_dma_dev *dev)
224 {
225 	struct ioat_dmadev *ioat;
226 
227 	if (!dev) {
228 		IOAT_PMD_ERR("Invalid device");
229 		return -EINVAL;
230 	}
231 
232 	ioat = dev->fp_obj->dev_private;
233 	if (!ioat) {
234 		IOAT_PMD_ERR("Error getting dev_private");
235 		return -EINVAL;
236 	}
237 
238 	rte_free(ioat->desc_ring);
239 
240 	return 0;
241 }
242 
243 /* Trigger hardware to begin performing enqueued operations. */
244 static inline void
245 __submit(struct ioat_dmadev *ioat)
246 {
247 	*ioat->doorbell = ioat->next_write - ioat->offset;
248 
249 	ioat->stats.submitted += (uint16_t)(ioat->next_write - ioat->last_write);
250 
251 	ioat->last_write = ioat->next_write;
252 }
253 
254 /* External submit function wrapper. */
255 static int
256 ioat_submit(void *dev_private, uint16_t qid __rte_unused)
257 {
258 	struct ioat_dmadev *ioat = dev_private;
259 
260 	__submit(ioat);
261 
262 	return 0;
263 }
264 
265 /* Write descriptor for enqueue. */
266 static inline int
267 __write_desc(void *dev_private, uint32_t op, uint64_t src, phys_addr_t dst,
268 		unsigned int length, uint64_t flags)
269 {
270 	struct ioat_dmadev *ioat = dev_private;
271 	uint16_t ret;
272 	const unsigned short mask = ioat->qcfg.nb_desc - 1;
273 	const unsigned short read = ioat->next_read;
274 	unsigned short write = ioat->next_write;
275 	const unsigned short space = mask + read - write;
276 	struct ioat_dma_hw_desc *desc;
277 
278 	if (space == 0)
279 		return -ENOSPC;
280 
281 	ioat->next_write = write + 1;
282 	write &= mask;
283 
284 	desc = &ioat->desc_ring[write];
285 	desc->size = length;
286 	desc->u.control_raw = (uint32_t)((op << IOAT_CMD_OP_SHIFT) |
287 			(1 << IOAT_COMP_UPDATE_SHIFT));
288 
289 	/* In IOAT the fence ensures that all operations including the current one
290 	 * are completed before moving on, DMAdev assumes that the fence ensures
291 	 * all operations before the current one are completed before starting
292 	 * the current one, so in IOAT we set the fence for the previous descriptor.
293 	 */
294 	if (flags & RTE_DMA_OP_FLAG_FENCE)
295 		ioat->desc_ring[(write - 1) & mask].u.control.fence = 1;
296 
297 	desc->src_addr = src;
298 	desc->dest_addr = dst;
299 
300 	rte_prefetch0(&ioat->desc_ring[ioat->next_write & mask]);
301 
302 	ret = (uint16_t)(ioat->next_write - 1);
303 
304 	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
305 		__submit(ioat);
306 
307 	return ret;
308 }
309 
310 /* Enqueue a fill operation onto the ioat device. */
311 static int
312 ioat_enqueue_fill(void *dev_private, uint16_t qid __rte_unused, uint64_t pattern,
313 		rte_iova_t dst, unsigned int length, uint64_t flags)
314 {
315 	return __write_desc(dev_private, ioat_op_fill, pattern, dst, length, flags);
316 }
317 
318 /* Enqueue a copy operation onto the ioat device. */
319 static int
320 ioat_enqueue_copy(void *dev_private, uint16_t qid __rte_unused, rte_iova_t src,
321 		rte_iova_t dst, unsigned int length, uint64_t flags)
322 {
323 	return __write_desc(dev_private, ioat_op_copy, src, dst, length, flags);
324 }
325 
326 /* Dump DMA device info. */
327 static int
328 __dev_dump(void *dev_private, FILE *f)
329 {
330 	struct ioat_dmadev *ioat = dev_private;
331 	uint64_t chansts_masked = ioat->regs->chansts & IOAT_CHANSTS_STATUS;
332 	uint32_t chanerr = ioat->regs->chanerr;
333 	uint64_t mask = (ioat->qcfg.nb_desc - 1);
334 	char ver = ioat->version;
335 	fprintf(f, "========= IOAT =========\n");
336 	fprintf(f, "  IOAT version: %d.%d\n", ver >> 4, ver & 0xF);
337 	fprintf(f, "  Channel status: %s [0x%"PRIx64"]\n",
338 			chansts_readable[chansts_masked], chansts_masked);
339 	fprintf(f, "  ChainADDR: 0x%"PRIu64"\n", ioat->regs->chainaddr);
340 	if (chanerr == 0) {
341 		fprintf(f, "  No Channel Errors\n");
342 	} else {
343 		fprintf(f, "  ChanERR: 0x%"PRIu32"\n", chanerr);
344 		if (chanerr & IOAT_CHANERR_INVALID_SRC_ADDR_MASK)
345 			fprintf(f, "    Invalid Source Address\n");
346 		if (chanerr & IOAT_CHANERR_INVALID_DST_ADDR_MASK)
347 			fprintf(f, "    Invalid Destination Address\n");
348 		if (chanerr & IOAT_CHANERR_INVALID_LENGTH_MASK)
349 			fprintf(f, "    Invalid Descriptor Length\n");
350 		if (chanerr & IOAT_CHANERR_DESCRIPTOR_READ_ERROR_MASK)
351 			fprintf(f, "    Descriptor Read Error\n");
352 		if ((chanerr & ~(IOAT_CHANERR_INVALID_SRC_ADDR_MASK |
353 				IOAT_CHANERR_INVALID_DST_ADDR_MASK |
354 				IOAT_CHANERR_INVALID_LENGTH_MASK |
355 				IOAT_CHANERR_DESCRIPTOR_READ_ERROR_MASK)) != 0)
356 			fprintf(f, "    Unknown Error(s)\n");
357 	}
358 	fprintf(f, "== Private Data ==\n");
359 	fprintf(f, "  Config: { ring_size: %u }\n", ioat->qcfg.nb_desc);
360 	fprintf(f, "  Status: 0x%"PRIx64"\n", ioat->status);
361 	fprintf(f, "  Status IOVA: 0x%"PRIx64"\n", ioat->status_addr);
362 	fprintf(f, "  Status ADDR: %p\n", &ioat->status);
363 	fprintf(f, "  Ring IOVA: 0x%"PRIx64"\n", ioat->ring_addr);
364 	fprintf(f, "  Ring ADDR: 0x%"PRIx64"\n", ioat->desc_ring[0].next-64);
365 	fprintf(f, "  Next write: %"PRIu16"\n", ioat->next_write);
366 	fprintf(f, "  Next read: %"PRIu16"\n", ioat->next_read);
367 	struct ioat_dma_hw_desc *desc_ring = &ioat->desc_ring[(ioat->next_write - 1) & mask];
368 	fprintf(f, "  Last Descriptor Written {\n");
369 	fprintf(f, "    Size: %"PRIu32"\n", desc_ring->size);
370 	fprintf(f, "    Control: 0x%"PRIx32"\n", desc_ring->u.control_raw);
371 	fprintf(f, "    Src: 0x%"PRIx64"\n", desc_ring->src_addr);
372 	fprintf(f, "    Dest: 0x%"PRIx64"\n", desc_ring->dest_addr);
373 	fprintf(f, "    Next: 0x%"PRIx64"\n", desc_ring->next);
374 	fprintf(f, "  }\n");
375 	fprintf(f, "  Next Descriptor {\n");
376 	fprintf(f, "    Size: %"PRIu32"\n", ioat->desc_ring[ioat->next_read & mask].size);
377 	fprintf(f, "    Src: 0x%"PRIx64"\n", ioat->desc_ring[ioat->next_read & mask].src_addr);
378 	fprintf(f, "    Dest: 0x%"PRIx64"\n", ioat->desc_ring[ioat->next_read & mask].dest_addr);
379 	fprintf(f, "    Next: 0x%"PRIx64"\n", ioat->desc_ring[ioat->next_read & mask].next);
380 	fprintf(f, "  }\n");
381 	fprintf(f, "  Key Stats { submitted: %"PRIu64", comp: %"PRIu64", failed: %"PRIu64" }\n",
382 			ioat->stats.submitted,
383 			ioat->stats.completed,
384 			ioat->stats.errors);
385 
386 	return 0;
387 }
388 
389 /* Public wrapper for dump. */
390 static int
391 ioat_dev_dump(const struct rte_dma_dev *dev, FILE *f)
392 {
393 	return __dev_dump(dev->fp_obj->dev_private, f);
394 }
395 
396 /* Returns the index of the last completed operation. */
397 static inline uint16_t
398 __get_last_completed(const struct ioat_dmadev *ioat, int *state)
399 {
400 	/* Status register contains the address of the completed operation */
401 	uint64_t status = ioat->status;
402 
403 	/* lower 3 bits indicate "transfer status" : active, idle, halted.
404 	 * We can ignore bit 0.
405 	 */
406 	*state = status & IOAT_CHANSTS_STATUS;
407 
408 	/* If we are just after recovering from an error the address returned by
409 	 * status will be 0, in this case we return the offset - 1 as the last
410 	 * completed. If not return the status value minus the chainaddr which
411 	 * gives us an offset into the ring. Right shifting by 6 (divide by 64)
412 	 * gives the index of the completion from the HW point of view and adding
413 	 * the offset translates the ring index from HW to SW point of view.
414 	 */
415 	if ((status & ~IOAT_CHANSTS_STATUS) == 0)
416 		return ioat->offset - 1;
417 
418 	return (status - ioat->ring_addr) >> 6;
419 }
420 
421 /* Translates IOAT ChanERRs to DMA error codes. */
422 static inline enum rte_dma_status_code
423 __translate_status_ioat_to_dma(uint32_t chanerr)
424 {
425 	if (chanerr & IOAT_CHANERR_INVALID_SRC_ADDR_MASK)
426 		return RTE_DMA_STATUS_INVALID_SRC_ADDR;
427 	else if (chanerr & IOAT_CHANERR_INVALID_DST_ADDR_MASK)
428 		return RTE_DMA_STATUS_INVALID_DST_ADDR;
429 	else if (chanerr & IOAT_CHANERR_INVALID_LENGTH_MASK)
430 		return RTE_DMA_STATUS_INVALID_LENGTH;
431 	else if (chanerr & IOAT_CHANERR_DESCRIPTOR_READ_ERROR_MASK)
432 		return RTE_DMA_STATUS_DESCRIPTOR_READ_ERROR;
433 	else
434 		return RTE_DMA_STATUS_ERROR_UNKNOWN;
435 }
436 
437 /* Returns details of operations that have been completed. */
438 static uint16_t
439 ioat_completed(void *dev_private, uint16_t qid __rte_unused, const uint16_t max_ops,
440 		uint16_t *last_idx, bool *has_error)
441 {
442 	struct ioat_dmadev *ioat = dev_private;
443 
444 	const unsigned short mask = (ioat->qcfg.nb_desc - 1);
445 	const unsigned short read = ioat->next_read;
446 	unsigned short last_completed, count;
447 	int state, fails = 0;
448 
449 	/* Do not do any work if there is an uncleared error. */
450 	if (ioat->failure != 0) {
451 		*has_error = true;
452 		*last_idx = ioat->next_read - 2;
453 		return 0;
454 	}
455 
456 	last_completed = __get_last_completed(ioat, &state);
457 	count = (last_completed + 1 - read) & mask;
458 
459 	/* Cap count at max_ops or set as last run in batch. */
460 	if (count > max_ops)
461 		count = max_ops;
462 
463 	if (count == max_ops || state != IOAT_CHANSTS_HALTED) {
464 		ioat->next_read = read + count;
465 		*last_idx = ioat->next_read - 1;
466 	} else {
467 		*has_error = true;
468 		rte_errno = EIO;
469 		ioat->failure = ioat->regs->chanerr;
470 		ioat->next_read = read + count + 1;
471 		if (__ioat_recover(ioat) != 0) {
472 			IOAT_PMD_ERR("Device HALTED and could not be recovered");
473 			__dev_dump(dev_private, stdout);
474 			return 0;
475 		}
476 		__submit(ioat);
477 		fails++;
478 		*last_idx = ioat->next_read - 2;
479 	}
480 
481 	ioat->stats.completed += count;
482 	ioat->stats.errors += fails;
483 
484 	return count;
485 }
486 
487 /* Returns detailed status information about operations that have been completed. */
488 static uint16_t
489 ioat_completed_status(void *dev_private, uint16_t qid __rte_unused,
490 		uint16_t max_ops, uint16_t *last_idx, enum rte_dma_status_code *status)
491 {
492 	struct ioat_dmadev *ioat = dev_private;
493 
494 	const unsigned short mask = (ioat->qcfg.nb_desc - 1);
495 	const unsigned short read = ioat->next_read;
496 	unsigned short count, last_completed;
497 	uint64_t fails = 0;
498 	int state, i;
499 
500 	last_completed = __get_last_completed(ioat, &state);
501 	count = (last_completed + 1 - read) & mask;
502 
503 	for (i = 0; i < RTE_MIN(count + 1, max_ops); i++)
504 		status[i] = RTE_DMA_STATUS_SUCCESSFUL;
505 
506 	/* Cap count at max_ops or set as last run in batch. */
507 	if (count > max_ops)
508 		count = max_ops;
509 
510 	if (count == max_ops || state != IOAT_CHANSTS_HALTED)
511 		ioat->next_read = read + count;
512 	else {
513 		rte_errno = EIO;
514 		status[count] = __translate_status_ioat_to_dma(ioat->regs->chanerr);
515 		count++;
516 		ioat->next_read = read + count;
517 		if (__ioat_recover(ioat) != 0) {
518 			IOAT_PMD_ERR("Device HALTED and could not be recovered");
519 			__dev_dump(dev_private, stdout);
520 			return 0;
521 		}
522 		__submit(ioat);
523 		fails++;
524 	}
525 
526 	if (ioat->failure > 0) {
527 		status[0] = __translate_status_ioat_to_dma(ioat->failure);
528 		count = RTE_MIN(count + 1, max_ops);
529 		ioat->failure = 0;
530 	}
531 
532 	*last_idx = ioat->next_read - 1;
533 
534 	ioat->stats.completed += count;
535 	ioat->stats.errors += fails;
536 
537 	return count;
538 }
539 
540 /* Get the remaining capacity of the ring. */
541 static uint16_t
542 ioat_burst_capacity(const void *dev_private, uint16_t vchan __rte_unused)
543 {
544 	const struct ioat_dmadev *ioat = dev_private;
545 	unsigned short size = ioat->qcfg.nb_desc - 1;
546 	unsigned short read = ioat->next_read;
547 	unsigned short write = ioat->next_write;
548 	unsigned short space = size - (write - read);
549 
550 	return space;
551 }
552 
553 /* Retrieve the generic stats of a DMA device. */
554 static int
555 ioat_stats_get(const struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
556 		struct rte_dma_stats *rte_stats, uint32_t size)
557 {
558 	struct rte_dma_stats *stats = (&((struct ioat_dmadev *)dev->fp_obj->dev_private)->stats);
559 
560 	if (size < sizeof(rte_stats))
561 		return -EINVAL;
562 	if (rte_stats == NULL)
563 		return -EINVAL;
564 
565 	*rte_stats = *stats;
566 	return 0;
567 }
568 
569 /* Reset the generic stat counters for the DMA device. */
570 static int
571 ioat_stats_reset(struct rte_dma_dev *dev, uint16_t vchan __rte_unused)
572 {
573 	struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
574 
575 	ioat->stats = (struct rte_dma_stats){0};
576 	return 0;
577 }
578 
579 /* Check if the IOAT device is idle. */
580 static int
581 ioat_vchan_status(const struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
582 		enum rte_dma_vchan_status *status)
583 {
584 	int state = 0;
585 	const struct ioat_dmadev *ioat = dev->fp_obj->dev_private;
586 	const uint16_t mask = ioat->qcfg.nb_desc - 1;
587 	const uint16_t last = __get_last_completed(ioat, &state);
588 
589 	if (state == IOAT_CHANSTS_HALTED || state == IOAT_CHANSTS_SUSPENDED)
590 		*status = RTE_DMA_VCHAN_HALTED_ERROR;
591 	else if (last == ((ioat->next_write - 1) & mask))
592 		*status = RTE_DMA_VCHAN_IDLE;
593 	else
594 		*status = RTE_DMA_VCHAN_ACTIVE;
595 
596 	return 0;
597 }
598 
599 /* Create a DMA device. */
600 static int
601 ioat_dmadev_create(const char *name, struct rte_pci_device *dev)
602 {
603 	static const struct rte_dma_dev_ops ioat_dmadev_ops = {
604 		.dev_close = ioat_dev_close,
605 		.dev_configure = ioat_dev_configure,
606 		.dev_dump = ioat_dev_dump,
607 		.dev_info_get = ioat_dev_info_get,
608 		.dev_start = ioat_dev_start,
609 		.dev_stop = ioat_dev_stop,
610 		.stats_get = ioat_stats_get,
611 		.stats_reset = ioat_stats_reset,
612 		.vchan_status = ioat_vchan_status,
613 		.vchan_setup = ioat_vchan_setup,
614 	};
615 
616 	struct rte_dma_dev *dmadev = NULL;
617 	struct ioat_dmadev *ioat = NULL;
618 	int retry = 0;
619 
620 	if (!name) {
621 		IOAT_PMD_ERR("Invalid name of the device!");
622 		return -EINVAL;
623 	}
624 
625 	/* Allocate device structure. */
626 	dmadev = rte_dma_pmd_allocate(name, dev->device.numa_node, sizeof(struct ioat_dmadev));
627 	if (dmadev == NULL) {
628 		IOAT_PMD_ERR("Unable to allocate dma device");
629 		return -ENOMEM;
630 	}
631 
632 	dmadev->device = &dev->device;
633 
634 	dmadev->fp_obj->dev_private = dmadev->data->dev_private;
635 
636 	dmadev->dev_ops = &ioat_dmadev_ops;
637 
638 	dmadev->fp_obj->burst_capacity = ioat_burst_capacity;
639 	dmadev->fp_obj->completed = ioat_completed;
640 	dmadev->fp_obj->completed_status = ioat_completed_status;
641 	dmadev->fp_obj->copy = ioat_enqueue_copy;
642 	dmadev->fp_obj->fill = ioat_enqueue_fill;
643 	dmadev->fp_obj->submit = ioat_submit;
644 
645 	ioat = dmadev->data->dev_private;
646 	ioat->dmadev = dmadev;
647 	ioat->regs = dev->mem_resource[0].addr;
648 	ioat->doorbell = &ioat->regs->dmacount;
649 	ioat->qcfg.nb_desc = 0;
650 	ioat->desc_ring = NULL;
651 	ioat->version = ioat->regs->cbver;
652 
653 	/* Do device initialization - reset and set error behaviour. */
654 	if (ioat->regs->chancnt != 1)
655 		IOAT_PMD_WARN("%s: Channel count == %d", __func__,
656 				ioat->regs->chancnt);
657 
658 	/* Locked by someone else. */
659 	if (ioat->regs->chanctrl & IOAT_CHANCTRL_CHANNEL_IN_USE) {
660 		IOAT_PMD_WARN("%s: Channel appears locked", __func__);
661 		ioat->regs->chanctrl = 0;
662 	}
663 
664 	/* clear any previous errors */
665 	if (ioat->regs->chanerr != 0) {
666 		uint32_t val = ioat->regs->chanerr;
667 		ioat->regs->chanerr = val;
668 	}
669 
670 	ioat->regs->chancmd = IOAT_CHANCMD_SUSPEND;
671 	rte_delay_ms(1);
672 	ioat->regs->chancmd = IOAT_CHANCMD_RESET;
673 	rte_delay_ms(1);
674 	while (ioat->regs->chancmd & IOAT_CHANCMD_RESET) {
675 		ioat->regs->chainaddr = 0;
676 		rte_delay_ms(1);
677 		if (++retry >= 200) {
678 			IOAT_PMD_ERR("%s: cannot reset device. CHANCMD=%#"PRIx8
679 					", CHANSTS=%#"PRIx64", CHANERR=%#"PRIx32,
680 					__func__,
681 					ioat->regs->chancmd,
682 					ioat->regs->chansts,
683 					ioat->regs->chanerr);
684 			rte_dma_pmd_release(name);
685 			return -EIO;
686 		}
687 	}
688 
689 	dmadev->fp_obj->dev_private = ioat;
690 
691 	dmadev->state = RTE_DMA_DEV_READY;
692 
693 	return 0;
694 
695 }
696 
697 /* Destroy a DMA device. */
698 static int
699 ioat_dmadev_destroy(const char *name)
700 {
701 	int ret;
702 
703 	if (!name) {
704 		IOAT_PMD_ERR("Invalid device name");
705 		return -EINVAL;
706 	}
707 
708 	ret = rte_dma_pmd_release(name);
709 	if (ret)
710 		IOAT_PMD_DEBUG("Device cleanup failed");
711 
712 	return 0;
713 }
714 
715 /* Probe DMA device. */
716 static int
717 ioat_dmadev_probe(struct rte_pci_driver *drv, struct rte_pci_device *dev)
718 {
719 	char name[32];
720 
721 	rte_pci_device_name(&dev->addr, name, sizeof(name));
722 	IOAT_PMD_INFO("Init %s on NUMA node %d", name, dev->device.numa_node);
723 
724 	dev->device.driver = &drv->driver;
725 	return ioat_dmadev_create(name, dev);
726 }
727 
728 /* Remove DMA device. */
729 static int
730 ioat_dmadev_remove(struct rte_pci_device *dev)
731 {
732 	char name[32];
733 
734 	rte_pci_device_name(&dev->addr, name, sizeof(name));
735 
736 	IOAT_PMD_INFO("Closing %s on NUMA node %d",
737 			name, dev->device.numa_node);
738 
739 	return ioat_dmadev_destroy(name);
740 }
741 
742 static const struct rte_pci_id pci_id_ioat_map[] = {
743 	{ RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_SKX) },
744 	{ RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX0) },
745 	{ RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX1) },
746 	{ RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX2) },
747 	{ RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX3) },
748 	{ RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX4) },
749 	{ RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX5) },
750 	{ RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX6) },
751 	{ RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDX7) },
752 	{ RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDXE) },
753 	{ RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_BDXF) },
754 	{ RTE_PCI_DEVICE(IOAT_VENDOR_ID, IOAT_DEVICE_ID_ICX) },
755 	{ .vendor_id = 0, /* sentinel */ },
756 };
757 
758 static struct rte_pci_driver ioat_pmd_drv = {
759 	.id_table = pci_id_ioat_map,
760 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
761 	.probe = ioat_dmadev_probe,
762 	.remove = ioat_dmadev_remove,
763 };
764 
765 RTE_PMD_REGISTER_PCI(IOAT_PMD_NAME, ioat_pmd_drv);
766 RTE_PMD_REGISTER_PCI_TABLE(IOAT_PMD_NAME, pci_id_ioat_map);
767 RTE_PMD_REGISTER_KMOD_DEP(IOAT_PMD_NAME, "* igb_uio | uio_pci_generic | vfio-pci");
768