xref: /dpdk/drivers/dma/idxd/idxd_pci.c (revision aa8ed903d29ec90ced2e9dbcf1132d098de3b6f7)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4 
5 #include <bus_pci_driver.h>
6 #include <rte_devargs.h>
7 #include <rte_dmadev_pmd.h>
8 #include <rte_malloc.h>
9 
10 #include "idxd_internal.h"
11 
12 #define IDXD_VENDOR_ID		0x8086
13 #define IDXD_DEVICE_ID_SPR	0x0B25
14 
15 #define DEVICE_VERSION_1	0x100
16 #define DEVICE_VERSION_2	0x200
17 /*
18  * Set bits for Traffic Class A & B
19  * TC-A (Bits 2:0) and TC-B (Bits 5:3)
20  */
21 #define IDXD_SET_TC_A_B		0x9
22 
23 #define IDXD_PMD_DMADEV_NAME_PCI dmadev_idxd_pci
24 
25 const struct rte_pci_id pci_id_idxd_map[] = {
26 	{ RTE_PCI_DEVICE(IDXD_VENDOR_ID, IDXD_DEVICE_ID_SPR) },
27 	{ .vendor_id = 0, /* sentinel */ },
28 };
29 
30 static inline int
31 idxd_pci_dev_command(struct idxd_dmadev *idxd, enum rte_idxd_cmds command)
32 {
33 	uint32_t err_code;
34 	uint16_t qid = idxd->qid;
35 	int i = 0;
36 
37 	if (command >= idxd_disable_wq && command <= idxd_reset_wq)
38 		qid = (1 << qid);
39 	rte_spinlock_lock(&idxd->u.pci->lk);
40 	idxd->u.pci->regs->cmd = (command << IDXD_CMD_SHIFT) | qid;
41 
42 	do {
43 		rte_pause();
44 		err_code = idxd->u.pci->regs->cmdstatus;
45 		if (++i >= 1000) {
46 			IDXD_PMD_ERR("Timeout waiting for command response from HW");
47 			rte_spinlock_unlock(&idxd->u.pci->lk);
48 			err_code &= CMDSTATUS_ERR_MASK;
49 			return err_code;
50 		}
51 	} while (err_code & CMDSTATUS_ACTIVE_MASK);
52 	rte_spinlock_unlock(&idxd->u.pci->lk);
53 
54 	err_code &= CMDSTATUS_ERR_MASK;
55 	return err_code;
56 }
57 
58 static uint32_t *
59 idxd_get_wq_cfg(struct idxd_pci_common *pci, uint8_t wq_idx)
60 {
61 	return RTE_PTR_ADD(pci->wq_regs_base,
62 			(uintptr_t)wq_idx << (5 + pci->wq_cfg_sz));
63 }
64 
65 static int
66 idxd_is_wq_enabled(struct idxd_dmadev *idxd)
67 {
68 	uint32_t state = idxd_get_wq_cfg(idxd->u.pci, idxd->qid)[wq_state_idx];
69 	return ((state >> WQ_STATE_SHIFT) & WQ_STATE_MASK) == 0x1;
70 }
71 
72 static int
73 idxd_pci_dev_stop(struct rte_dma_dev *dev)
74 {
75 	struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
76 	uint8_t err_code;
77 
78 	if (!idxd_is_wq_enabled(idxd)) {
79 		IDXD_PMD_ERR("Work queue %d already disabled", idxd->qid);
80 		return 0;
81 	}
82 
83 	err_code = idxd_pci_dev_command(idxd, idxd_disable_wq);
84 	if (err_code || idxd_is_wq_enabled(idxd)) {
85 		IDXD_PMD_ERR("Failed disabling work queue %d, error code: %#x",
86 				idxd->qid, err_code);
87 		return err_code == 0 ? -1 : -err_code;
88 	}
89 	IDXD_PMD_DEBUG("Work queue %d disabled OK", idxd->qid);
90 
91 	return 0;
92 }
93 
94 static int
95 idxd_pci_dev_start(struct rte_dma_dev *dev)
96 {
97 	struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
98 	uint8_t err_code;
99 
100 	if (idxd_is_wq_enabled(idxd)) {
101 		IDXD_PMD_WARN("WQ %d already enabled", idxd->qid);
102 		return 0;
103 	}
104 
105 	if (idxd->desc_ring == NULL) {
106 		IDXD_PMD_ERR("WQ %d has not been fully configured", idxd->qid);
107 		return -EINVAL;
108 	}
109 
110 	err_code = idxd_pci_dev_command(idxd, idxd_enable_wq);
111 	if (err_code || !idxd_is_wq_enabled(idxd)) {
112 		IDXD_PMD_ERR("Failed enabling work queue %d, error code: %#x",
113 				idxd->qid, err_code);
114 		return err_code == 0 ? -1 : -err_code;
115 	}
116 	IDXD_PMD_DEBUG("Work queue %d enabled OK", idxd->qid);
117 
118 	return 0;
119 }
120 
121 static int
122 idxd_pci_dev_close(struct rte_dma_dev *dev)
123 {
124 	struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
125 	uint8_t err_code;
126 	int is_last_wq;
127 
128 	if (idxd_is_wq_enabled(idxd)) {
129 		/* disable the wq */
130 		err_code = idxd_pci_dev_command(idxd, idxd_disable_wq);
131 		if (err_code) {
132 			IDXD_PMD_ERR("Error disabling wq: code %#x", err_code);
133 			return err_code;
134 		}
135 		IDXD_PMD_DEBUG("IDXD WQ disabled OK");
136 	}
137 
138 	/* free device memory */
139 	IDXD_PMD_DEBUG("Freeing device driver memory");
140 	rte_free(idxd->batch_comp_ring);
141 	rte_free(idxd->desc_ring);
142 
143 	/* if this is the last WQ on the device, disable the device and free
144 	 * the PCI struct
145 	 */
146 	/* NOTE: review for potential ordering optimization */
147 	is_last_wq = (rte_atomic_fetch_sub_explicit(&idxd->u.pci->ref_count, 1,
148 			rte_memory_order_seq_cst) == 1);
149 	if (is_last_wq) {
150 		/* disable the device */
151 		err_code = idxd_pci_dev_command(idxd, idxd_disable_dev);
152 		if (err_code) {
153 			IDXD_PMD_ERR("Error disabling device: code %#x", err_code);
154 			return err_code;
155 		}
156 		IDXD_PMD_DEBUG("IDXD device disabled OK");
157 		rte_free(idxd->u.pci);
158 	}
159 
160 	return 0;
161 }
162 
163 static const struct rte_dma_dev_ops idxd_pci_ops = {
164 	.dev_close = idxd_pci_dev_close,
165 	.dev_dump = idxd_dump,
166 	.dev_configure = idxd_configure,
167 	.vchan_setup = idxd_vchan_setup,
168 	.dev_info_get = idxd_info_get,
169 	.stats_get = idxd_stats_get,
170 	.stats_reset = idxd_stats_reset,
171 	.dev_start = idxd_pci_dev_start,
172 	.dev_stop = idxd_pci_dev_stop,
173 	.vchan_status = idxd_vchan_status,
174 };
175 
176 /* each portal uses 4 x 4k pages */
177 #define IDXD_PORTAL_SIZE (4096 * 4)
178 
179 static int
180 init_pci_device(struct rte_pci_device *dev, struct idxd_dmadev *idxd,
181 		unsigned int max_queues)
182 {
183 	struct idxd_pci_common *pci;
184 	uint8_t nb_groups, nb_engines, nb_wqs;
185 	uint16_t grp_offset, wq_offset; /* how far into bar0 the regs are */
186 	uint16_t wq_size, total_wq_size;
187 	uint8_t lg2_max_batch, lg2_max_copy_size;
188 	uint32_t version;
189 	unsigned int i, err_code;
190 
191 	pci = rte_malloc(NULL, sizeof(*pci), 0);
192 	if (pci == NULL) {
193 		IDXD_PMD_ERR("%s: Can't allocate memory", __func__);
194 		err_code = -1;
195 		goto err;
196 	}
197 	memset(pci, 0, sizeof(*pci));
198 	rte_spinlock_init(&pci->lk);
199 
200 	/* assign the bar registers, and then configure device */
201 	pci->regs = dev->mem_resource[0].addr;
202 	version = pci->regs->version;
203 	grp_offset = (uint16_t)pci->regs->offsets[0];
204 	pci->grp_regs = RTE_PTR_ADD(pci->regs, grp_offset * 0x100);
205 	wq_offset = (uint16_t)(pci->regs->offsets[0] >> 16);
206 	pci->wq_regs_base = RTE_PTR_ADD(pci->regs, wq_offset * 0x100);
207 	pci->portals = dev->mem_resource[2].addr;
208 	pci->wq_cfg_sz = (pci->regs->wqcap >> 24) & 0x0F;
209 
210 	/* reset */
211 	idxd->u.pci = pci;
212 	err_code = idxd_pci_dev_command(idxd, idxd_reset_device);
213 	if (err_code) {
214 		IDXD_PMD_ERR("Error reset device: code %#x", err_code);
215 		goto err;
216 	}
217 
218 	/* sanity check device status */
219 	if (pci->regs->gensts & GENSTS_DEV_STATE_MASK) {
220 		/* need function-level-reset (FLR) or is enabled */
221 		IDXD_PMD_ERR("Device status is not disabled, cannot init");
222 		err_code = -1;
223 		goto err;
224 	}
225 	if (pci->regs->cmdstatus & CMDSTATUS_ACTIVE_MASK) {
226 		/* command in progress */
227 		IDXD_PMD_ERR("Device has a command in progress, cannot init");
228 		err_code = -1;
229 		goto err;
230 	}
231 
232 	/* read basic info about the hardware for use when configuring */
233 	nb_groups = (uint8_t)pci->regs->grpcap;
234 	nb_engines = (uint8_t)pci->regs->engcap;
235 	nb_wqs = (uint8_t)(pci->regs->wqcap >> 16);
236 	total_wq_size = (uint16_t)pci->regs->wqcap;
237 	lg2_max_copy_size = (uint8_t)(pci->regs->gencap >> 16) & 0x1F;
238 	lg2_max_batch = (uint8_t)(pci->regs->gencap >> 21) & 0x0F;
239 
240 	IDXD_PMD_DEBUG("nb_groups = %u, nb_engines = %u, nb_wqs = %u",
241 			nb_groups, nb_engines, nb_wqs);
242 
243 	/* zero out any old config */
244 	for (i = 0; i < nb_groups; i++) {
245 		pci->grp_regs[i].grpengcfg = 0;
246 		pci->grp_regs[i].grpwqcfg[0] = 0;
247 		if (version <= DEVICE_VERSION_2)
248 			pci->grp_regs[i].grpflags |= IDXD_SET_TC_A_B;
249 	}
250 	for (i = 0; i < nb_wqs; i++)
251 		idxd_get_wq_cfg(pci, i)[0] = 0;
252 
253 	/* limit queues if necessary */
254 	if (max_queues != 0 && nb_wqs > max_queues) {
255 		nb_wqs = max_queues;
256 		if (nb_engines > max_queues)
257 			nb_engines = max_queues;
258 		if (nb_groups > max_queues)
259 			nb_engines = max_queues;
260 		IDXD_PMD_DEBUG("Limiting queues to %u", nb_wqs);
261 	}
262 
263 	/* put each engine into a separate group to avoid reordering */
264 	if (nb_groups > nb_engines)
265 		nb_groups = nb_engines;
266 	if (nb_groups < nb_engines)
267 		nb_engines = nb_groups;
268 
269 	/* assign engines to groups, round-robin style */
270 	for (i = 0; i < nb_engines; i++) {
271 		IDXD_PMD_DEBUG("Assigning engine %u to group %u",
272 				i, i % nb_groups);
273 		pci->grp_regs[i % nb_groups].grpengcfg |= (1ULL << i);
274 	}
275 
276 	/* now do the same for queues and give work slots to each queue */
277 	wq_size = total_wq_size / nb_wqs;
278 	IDXD_PMD_DEBUG("Work queue size = %u, max batch = 2^%u, max copy = 2^%u",
279 			wq_size, lg2_max_batch, lg2_max_copy_size);
280 	for (i = 0; i < nb_wqs; i++) {
281 		/* add engine "i" to a group */
282 		IDXD_PMD_DEBUG("Assigning work queue %u to group %u",
283 				i, i % nb_groups);
284 		pci->grp_regs[i % nb_groups].grpwqcfg[0] |= (1ULL << i);
285 		/* now configure it, in terms of size, max batch, mode */
286 		idxd_get_wq_cfg(pci, i)[wq_size_idx] = wq_size;
287 		idxd_get_wq_cfg(pci, i)[wq_mode_idx] = (1 << WQ_PRIORITY_SHIFT) |
288 				WQ_MODE_DEDICATED;
289 		idxd_get_wq_cfg(pci, i)[wq_sizes_idx] = lg2_max_copy_size |
290 				(lg2_max_batch << WQ_BATCH_SZ_SHIFT);
291 	}
292 
293 	IDXD_PMD_DEBUG("    Device Version: %"PRIx32, version);
294 	/* dump the group configuration to output */
295 	for (i = 0; i < nb_groups; i++) {
296 		IDXD_PMD_DEBUG("## Group %d", i);
297 		IDXD_PMD_DEBUG("    GRPWQCFG: %"PRIx64, pci->grp_regs[i].grpwqcfg[0]);
298 		IDXD_PMD_DEBUG("    GRPENGCFG: %"PRIx64, pci->grp_regs[i].grpengcfg);
299 		IDXD_PMD_DEBUG("    GRPFLAGS: %"PRIx32, pci->grp_regs[i].grpflags);
300 	}
301 
302 	idxd->u.pci = pci;
303 	idxd->max_batches = wq_size;
304 	idxd->max_batch_size = 1 << lg2_max_batch;
305 
306 	/* enable the device itself */
307 	err_code = idxd_pci_dev_command(idxd, idxd_enable_dev);
308 	if (err_code) {
309 		IDXD_PMD_ERR("Error enabling device: code %#x", err_code);
310 		goto err;
311 	}
312 	IDXD_PMD_DEBUG("IDXD Device enabled OK");
313 
314 	return nb_wqs;
315 
316 err:
317 	rte_free(pci);
318 	return err_code;
319 }
320 
321 static int
322 idxd_dmadev_probe_pci(struct rte_pci_driver *drv, struct rte_pci_device *dev)
323 {
324 	struct idxd_dmadev idxd = {0};
325 	uint8_t nb_wqs;
326 	int qid, ret = 0;
327 	char name[PCI_PRI_STR_SIZE];
328 	unsigned int max_queues = 0;
329 
330 	rte_pci_device_name(&dev->addr, name, sizeof(name));
331 	IDXD_PMD_INFO("Init %s on NUMA node %d", name, dev->device.numa_node);
332 	dev->device.driver = &drv->driver;
333 
334 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
335 		char qname[32];
336 		int max_qid;
337 
338 		/* look up queue 0 to get the PCI structure */
339 		snprintf(qname, sizeof(qname), "%s-q0", name);
340 		IDXD_PMD_INFO("Looking up %s", qname);
341 		ret = idxd_dmadev_create(qname, &dev->device, NULL, &idxd_pci_ops);
342 		if (ret != 0) {
343 			IDXD_PMD_ERR("Failed to create dmadev %s", name);
344 			return ret;
345 		}
346 		qid = rte_dma_get_dev_id_by_name(qname);
347 		max_qid = rte_atomic_load_explicit(
348 			&((struct idxd_dmadev *)rte_dma_fp_objs[qid].dev_private)->u.pci->ref_count,
349 			rte_memory_order_seq_cst);
350 
351 		/* we have queue 0 done, now configure the rest of the queues */
352 		for (qid = 1; qid < max_qid; qid++) {
353 			/* add the queue number to each device name */
354 			snprintf(qname, sizeof(qname), "%s-q%d", name, qid);
355 			IDXD_PMD_INFO("Looking up %s", qname);
356 			ret = idxd_dmadev_create(qname, &dev->device, NULL, &idxd_pci_ops);
357 			if (ret != 0) {
358 				IDXD_PMD_ERR("Failed to create dmadev %s", name);
359 				return ret;
360 			}
361 		}
362 		return 0;
363 	}
364 
365 	if (dev->device.devargs && dev->device.devargs->args[0] != '\0') {
366 		/* if the number of devargs grows beyond just 1, use rte_kvargs */
367 		if (sscanf(dev->device.devargs->args,
368 				"max_queues=%u", &max_queues) != 1) {
369 			IDXD_PMD_ERR("Invalid device parameter: '%s'",
370 					dev->device.devargs->args);
371 			return -1;
372 		}
373 	}
374 
375 	ret = init_pci_device(dev, &idxd, max_queues);
376 	if (ret < 0) {
377 		IDXD_PMD_ERR("Error initializing PCI hardware");
378 		return ret;
379 	}
380 	if (idxd.u.pci->portals == NULL) {
381 		IDXD_PMD_ERR("Error, invalid portal assigned during initialization");
382 		free(idxd.u.pci);
383 		return -EINVAL;
384 	}
385 	nb_wqs = (uint8_t)ret;
386 
387 	/* set up one device for each queue */
388 	for (qid = 0; qid < nb_wqs; qid++) {
389 		char qname[32];
390 
391 		/* add the queue number to each device name */
392 		snprintf(qname, sizeof(qname), "%s-q%d", name, qid);
393 		idxd.qid = qid;
394 		idxd.portal = RTE_PTR_ADD(idxd.u.pci->portals,
395 				qid * IDXD_PORTAL_SIZE);
396 		if (idxd_is_wq_enabled(&idxd))
397 			IDXD_PMD_ERR("Error, WQ %u seems enabled", qid);
398 		ret = idxd_dmadev_create(qname, &dev->device,
399 				&idxd, &idxd_pci_ops);
400 		if (ret != 0) {
401 			IDXD_PMD_ERR("Failed to create dmadev %s", name);
402 			if (qid == 0) /* if no devices using this, free pci */
403 				free(idxd.u.pci);
404 			return ret;
405 		}
406 		rte_atomic_fetch_add_explicit(&idxd.u.pci->ref_count, 1, rte_memory_order_seq_cst);
407 	}
408 
409 	return 0;
410 }
411 
412 static int
413 idxd_dmadev_destroy(const char *name)
414 {
415 	int ret = 0;
416 
417 	/* rte_dma_close is called by pmd_release */
418 	ret = rte_dma_pmd_release(name);
419 	if (ret)
420 		IDXD_PMD_DEBUG("Device cleanup failed");
421 
422 	return ret;
423 }
424 
425 static int
426 idxd_dmadev_remove_pci(struct rte_pci_device *dev)
427 {
428 	int i = 0;
429 	char name[PCI_PRI_STR_SIZE];
430 
431 	rte_pci_device_name(&dev->addr, name, sizeof(name));
432 
433 	IDXD_PMD_INFO("Closing %s on NUMA node %d", name, dev->device.numa_node);
434 
435 	RTE_DMA_FOREACH_DEV(i) {
436 		struct rte_dma_info info;
437 		rte_dma_info_get(i, &info);
438 		if (strncmp(name, info.dev_name, strlen(name)) == 0)
439 			idxd_dmadev_destroy(info.dev_name);
440 	}
441 
442 	return 0;
443 }
444 
445 struct rte_pci_driver idxd_pmd_drv_pci = {
446 	.id_table = pci_id_idxd_map,
447 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
448 	.probe = idxd_dmadev_probe_pci,
449 	.remove = idxd_dmadev_remove_pci,
450 };
451 
452 RTE_PMD_REGISTER_PCI(IDXD_PMD_DMADEV_NAME_PCI, idxd_pmd_drv_pci);
453 RTE_PMD_REGISTER_PCI_TABLE(IDXD_PMD_DMADEV_NAME_PCI, pci_id_idxd_map);
454 RTE_PMD_REGISTER_KMOD_DEP(IDXD_PMD_DMADEV_NAME_PCI, "vfio-pci");
455 RTE_PMD_REGISTER_PARAM_STRING(dmadev_idxd_pci, "max_queues=0");
456