xref: /dpdk/drivers/bus/fslmc/fslmc_vfio.c (revision 68a03efeed657e6e05f281479b33b51102797e15)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  *   Copyright (c) 2015-2016 Freescale Semiconductor, Inc. All rights reserved.
4  *   Copyright 2016-2019 NXP
5  *
6  */
7 
8 #include <unistd.h>
9 #include <stdio.h>
10 #include <sys/types.h>
11 #include <string.h>
12 #include <stdlib.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/ioctl.h>
16 #include <sys/stat.h>
17 #include <sys/mman.h>
18 #include <sys/vfs.h>
19 #include <libgen.h>
20 #include <dirent.h>
21 #include <sys/eventfd.h>
22 
23 #include <eal_filesystem.h>
24 #include <rte_mbuf.h>
25 #include <ethdev_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_memcpy.h>
28 #include <rte_string_fns.h>
29 #include <rte_cycles.h>
30 #include <rte_kvargs.h>
31 #include <rte_dev.h>
32 #include <rte_bus.h>
33 #include <rte_eal_memconfig.h>
34 
35 #include "rte_fslmc.h"
36 #include "fslmc_vfio.h"
37 #include "fslmc_logs.h"
38 #include <mc/fsl_dpmng.h>
39 
40 #include "portal/dpaa2_hw_pvt.h"
41 #include "portal/dpaa2_hw_dpio.h"
42 
43 #define FSLMC_CONTAINER_MAX_LEN 8 /**< Of the format dprc.XX */
44 
45 /* Number of VFIO containers & groups with in */
46 static struct fslmc_vfio_group vfio_group;
47 static struct fslmc_vfio_container vfio_container;
48 static int container_device_fd;
49 char *fslmc_container;
50 static int fslmc_iommu_type;
51 static uint32_t *msi_intr_vaddr;
52 void *(*rte_mcp_ptr_list);
53 
54 void *
55 dpaa2_get_mcp_ptr(int portal_idx)
56 {
57 	if (rte_mcp_ptr_list)
58 		return rte_mcp_ptr_list[portal_idx];
59 	else
60 		return NULL;
61 }
62 
63 static struct rte_dpaa2_object_list dpaa2_obj_list =
64 	TAILQ_HEAD_INITIALIZER(dpaa2_obj_list);
65 
66 /*register a fslmc bus based dpaa2 driver */
67 void
68 rte_fslmc_object_register(struct rte_dpaa2_object *object)
69 {
70 	RTE_VERIFY(object);
71 
72 	TAILQ_INSERT_TAIL(&dpaa2_obj_list, object, next);
73 }
74 
75 int
76 fslmc_get_container_group(int *groupid)
77 {
78 	int ret;
79 	char *container;
80 
81 	if (!fslmc_container) {
82 		container = getenv("DPRC");
83 		if (container == NULL) {
84 			DPAA2_BUS_DEBUG("DPAA2: DPRC not available");
85 			return -EINVAL;
86 		}
87 
88 		if (strlen(container) >= FSLMC_CONTAINER_MAX_LEN) {
89 			DPAA2_BUS_ERR("Invalid container name: %s", container);
90 			return -1;
91 		}
92 
93 		fslmc_container = strdup(container);
94 		if (!fslmc_container) {
95 			DPAA2_BUS_ERR("Mem alloc failure; Container name");
96 			return -ENOMEM;
97 		}
98 	}
99 
100 	fslmc_iommu_type = (rte_vfio_noiommu_is_enabled() == 1) ?
101 		RTE_VFIO_NOIOMMU : VFIO_TYPE1_IOMMU;
102 
103 	/* get group number */
104 	ret = rte_vfio_get_group_num(SYSFS_FSL_MC_DEVICES,
105 				     fslmc_container, groupid);
106 	if (ret <= 0) {
107 		DPAA2_BUS_ERR("Unable to find %s IOMMU group", fslmc_container);
108 		return -1;
109 	}
110 
111 	DPAA2_BUS_DEBUG("Container: %s has VFIO iommu group id = %d",
112 			fslmc_container, *groupid);
113 
114 	return 0;
115 }
116 
117 static int
118 vfio_connect_container(void)
119 {
120 	int fd, ret;
121 
122 	if (vfio_container.used) {
123 		DPAA2_BUS_DEBUG("No container available");
124 		return -1;
125 	}
126 
127 	/* Try connecting to vfio container if already created */
128 	if (!ioctl(vfio_group.fd, VFIO_GROUP_SET_CONTAINER,
129 		&vfio_container.fd)) {
130 		DPAA2_BUS_DEBUG(
131 		    "Container pre-exists with FD[0x%x] for this group",
132 		    vfio_container.fd);
133 		vfio_group.container = &vfio_container;
134 		return 0;
135 	}
136 
137 	/* Opens main vfio file descriptor which represents the "container" */
138 	fd = rte_vfio_get_container_fd();
139 	if (fd < 0) {
140 		DPAA2_BUS_ERR("Failed to open VFIO container");
141 		return -errno;
142 	}
143 
144 	/* Check whether support for SMMU type IOMMU present or not */
145 	if (ioctl(fd, VFIO_CHECK_EXTENSION, fslmc_iommu_type)) {
146 		/* Connect group to container */
147 		ret = ioctl(vfio_group.fd, VFIO_GROUP_SET_CONTAINER, &fd);
148 		if (ret) {
149 			DPAA2_BUS_ERR("Failed to setup group container");
150 			close(fd);
151 			return -errno;
152 		}
153 
154 		ret = ioctl(fd, VFIO_SET_IOMMU, fslmc_iommu_type);
155 		if (ret) {
156 			DPAA2_BUS_ERR("Failed to setup VFIO iommu");
157 			close(fd);
158 			return -errno;
159 		}
160 	} else {
161 		DPAA2_BUS_ERR("No supported IOMMU available");
162 		close(fd);
163 		return -EINVAL;
164 	}
165 
166 	vfio_container.used = 1;
167 	vfio_container.fd = fd;
168 	vfio_container.group = &vfio_group;
169 	vfio_group.container = &vfio_container;
170 
171 	return 0;
172 }
173 
174 static int vfio_map_irq_region(struct fslmc_vfio_group *group)
175 {
176 	int ret;
177 	unsigned long *vaddr = NULL;
178 	struct vfio_iommu_type1_dma_map map = {
179 		.argsz = sizeof(map),
180 		.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
181 		.vaddr = 0x6030000,
182 		.iova = 0x6030000,
183 		.size = 0x1000,
184 	};
185 
186 	vaddr = (unsigned long *)mmap(NULL, 0x1000, PROT_WRITE |
187 		PROT_READ, MAP_SHARED, container_device_fd, 0x6030000);
188 	if (vaddr == MAP_FAILED) {
189 		DPAA2_BUS_INFO("Unable to map region (errno = %d)", errno);
190 		return -errno;
191 	}
192 
193 	msi_intr_vaddr = (uint32_t *)((char *)(vaddr) + 64);
194 	map.vaddr = (unsigned long)vaddr;
195 	ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA, &map);
196 	if (ret == 0)
197 		return 0;
198 
199 	DPAA2_BUS_ERR("Unable to map DMA address (errno = %d)", errno);
200 	return -errno;
201 }
202 
203 static int fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len);
204 static int fslmc_unmap_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len);
205 
206 static void
207 fslmc_memevent_cb(enum rte_mem_event type, const void *addr, size_t len,
208 		void *arg __rte_unused)
209 {
210 	struct rte_memseg_list *msl;
211 	struct rte_memseg *ms;
212 	size_t cur_len = 0, map_len = 0;
213 	uint64_t virt_addr;
214 	rte_iova_t iova_addr;
215 	int ret;
216 
217 	msl = rte_mem_virt2memseg_list(addr);
218 
219 	while (cur_len < len) {
220 		const void *va = RTE_PTR_ADD(addr, cur_len);
221 
222 		ms = rte_mem_virt2memseg(va, msl);
223 		iova_addr = ms->iova;
224 		virt_addr = ms->addr_64;
225 		map_len = ms->len;
226 
227 		DPAA2_BUS_DEBUG("Request for %s, va=%p, "
228 				"virt_addr=0x%" PRIx64 ", "
229 				"iova=0x%" PRIx64 ", map_len=%zu",
230 				type == RTE_MEM_EVENT_ALLOC ?
231 					"alloc" : "dealloc",
232 				va, virt_addr, iova_addr, map_len);
233 
234 		/* iova_addr may be set to RTE_BAD_IOVA */
235 		if (iova_addr == RTE_BAD_IOVA) {
236 			DPAA2_BUS_DEBUG("Segment has invalid iova, skipping\n");
237 			cur_len += map_len;
238 			continue;
239 		}
240 
241 		if (type == RTE_MEM_EVENT_ALLOC)
242 			ret = fslmc_map_dma(virt_addr, iova_addr, map_len);
243 		else
244 			ret = fslmc_unmap_dma(virt_addr, iova_addr, map_len);
245 
246 		if (ret != 0) {
247 			DPAA2_BUS_ERR("DMA Mapping/Unmapping failed. "
248 					"Map=%d, addr=%p, len=%zu, err:(%d)",
249 					type, va, map_len, ret);
250 			return;
251 		}
252 
253 		cur_len += map_len;
254 	}
255 
256 	if (type == RTE_MEM_EVENT_ALLOC)
257 		DPAA2_BUS_DEBUG("Total Mapped: addr=%p, len=%zu",
258 				addr, len);
259 	else
260 		DPAA2_BUS_DEBUG("Total Unmapped: addr=%p, len=%zu",
261 				addr, len);
262 }
263 
264 static int
265 fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr __rte_unused, size_t len)
266 {
267 	struct fslmc_vfio_group *group;
268 	struct vfio_iommu_type1_dma_map dma_map = {
269 		.argsz = sizeof(struct vfio_iommu_type1_dma_map),
270 		.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
271 	};
272 	int ret;
273 
274 	if (fslmc_iommu_type == RTE_VFIO_NOIOMMU) {
275 		DPAA2_BUS_DEBUG("Running in NOIOMMU mode");
276 		return 0;
277 	}
278 
279 	dma_map.size = len;
280 	dma_map.vaddr = vaddr;
281 
282 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
283 	dma_map.iova = iovaddr;
284 #else
285 	dma_map.iova = dma_map.vaddr;
286 #endif
287 
288 	/* SET DMA MAP for IOMMU */
289 	group = &vfio_group;
290 
291 	if (!group->container) {
292 		DPAA2_BUS_ERR("Container is not connected ");
293 		return -1;
294 	}
295 
296 	DPAA2_BUS_DEBUG("--> Map address: 0x%"PRIx64", size: %"PRIu64"",
297 			(uint64_t)dma_map.vaddr, (uint64_t)dma_map.size);
298 	ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA, &dma_map);
299 	if (ret) {
300 		DPAA2_BUS_ERR("VFIO_IOMMU_MAP_DMA API(errno = %d)",
301 				errno);
302 		return -1;
303 	}
304 
305 	return 0;
306 }
307 
308 static int
309 fslmc_unmap_dma(uint64_t vaddr, uint64_t iovaddr __rte_unused, size_t len)
310 {
311 	struct fslmc_vfio_group *group;
312 	struct vfio_iommu_type1_dma_unmap dma_unmap = {
313 		.argsz = sizeof(struct vfio_iommu_type1_dma_unmap),
314 		.flags = 0,
315 	};
316 	int ret;
317 
318 	if (fslmc_iommu_type == RTE_VFIO_NOIOMMU) {
319 		DPAA2_BUS_DEBUG("Running in NOIOMMU mode");
320 		return 0;
321 	}
322 
323 	dma_unmap.size = len;
324 	dma_unmap.iova = vaddr;
325 
326 	/* SET DMA MAP for IOMMU */
327 	group = &vfio_group;
328 
329 	if (!group->container) {
330 		DPAA2_BUS_ERR("Container is not connected ");
331 		return -1;
332 	}
333 
334 	DPAA2_BUS_DEBUG("--> Unmap address: 0x%"PRIx64", size: %"PRIu64"",
335 			(uint64_t)dma_unmap.iova, (uint64_t)dma_unmap.size);
336 	ret = ioctl(group->container->fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
337 	if (ret) {
338 		DPAA2_BUS_ERR("VFIO_IOMMU_UNMAP_DMA API(errno = %d)",
339 				errno);
340 		return -1;
341 	}
342 
343 	return 0;
344 }
345 
346 static int
347 fslmc_dmamap_seg(const struct rte_memseg_list *msl __rte_unused,
348 		const struct rte_memseg *ms, void *arg)
349 {
350 	int *n_segs = arg;
351 	int ret;
352 
353 	/* if IOVA address is invalid, skip */
354 	if (ms->iova == RTE_BAD_IOVA)
355 		return 0;
356 
357 	ret = fslmc_map_dma(ms->addr_64, ms->iova, ms->len);
358 	if (ret)
359 		DPAA2_BUS_ERR("Unable to VFIO map (addr=%p, len=%zu)",
360 				ms->addr, ms->len);
361 	else
362 		(*n_segs)++;
363 
364 	return ret;
365 }
366 
367 int
368 rte_fslmc_vfio_mem_dmamap(uint64_t vaddr, uint64_t iova, uint64_t size)
369 {
370 	int ret;
371 	struct fslmc_vfio_group *group;
372 	struct vfio_iommu_type1_dma_map dma_map = {
373 		.argsz = sizeof(struct vfio_iommu_type1_dma_map),
374 		.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
375 	};
376 
377 	if (fslmc_iommu_type == RTE_VFIO_NOIOMMU) {
378 		DPAA2_BUS_DEBUG("Running in NOIOMMU mode");
379 		return 0;
380 	}
381 
382 	/* SET DMA MAP for IOMMU */
383 	group = &vfio_group;
384 	if (!group->container) {
385 		DPAA2_BUS_ERR("Container is not connected");
386 		return -1;
387 	}
388 
389 	dma_map.size = size;
390 	dma_map.vaddr = vaddr;
391 	dma_map.iova = iova;
392 
393 	DPAA2_BUS_DEBUG("VFIOdmamap 0x%"PRIx64":0x%"PRIx64",size 0x%"PRIx64"\n",
394 			(uint64_t)dma_map.vaddr, (uint64_t)dma_map.iova,
395 			(uint64_t)dma_map.size);
396 	ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA,
397 		    &dma_map);
398 	if (ret) {
399 		printf("Unable to map DMA address (errno = %d)\n",
400 			errno);
401 		return ret;
402 	}
403 
404 	return 0;
405 }
406 
407 int rte_fslmc_vfio_dmamap(void)
408 {
409 	int i = 0, ret;
410 
411 	/* Lock before parsing and registering callback to memory subsystem */
412 	rte_mcfg_mem_read_lock();
413 
414 	if (rte_memseg_walk(fslmc_dmamap_seg, &i) < 0) {
415 		rte_mcfg_mem_read_unlock();
416 		return -1;
417 	}
418 
419 	ret = rte_mem_event_callback_register("fslmc_memevent_clb",
420 			fslmc_memevent_cb, NULL);
421 	if (ret && rte_errno == ENOTSUP)
422 		DPAA2_BUS_DEBUG("Memory event callbacks not supported");
423 	else if (ret)
424 		DPAA2_BUS_DEBUG("Unable to install memory handler");
425 	else
426 		DPAA2_BUS_DEBUG("Installed memory callback handler");
427 
428 	DPAA2_BUS_DEBUG("Total %d segments found.", i);
429 
430 	/* TODO - This is a W.A. as VFIO currently does not add the mapping of
431 	 * the interrupt region to SMMU. This should be removed once the
432 	 * support is added in the Kernel.
433 	 */
434 	vfio_map_irq_region(&vfio_group);
435 
436 	/* Existing segments have been mapped and memory callback for hotplug
437 	 * has been installed.
438 	 */
439 	rte_mcfg_mem_read_unlock();
440 
441 	return 0;
442 }
443 
444 static int
445 fslmc_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
446 		int *vfio_dev_fd, struct vfio_device_info *device_info)
447 {
448 	struct vfio_group_status group_status = {
449 			.argsz = sizeof(group_status)
450 	};
451 	int vfio_group_fd, vfio_container_fd, iommu_group_no, ret;
452 
453 	/* get group number */
454 	ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_no);
455 	if (ret < 0)
456 		return -1;
457 
458 	/* get the actual group fd */
459 	vfio_group_fd = rte_vfio_get_group_fd(iommu_group_no);
460 	if (vfio_group_fd < 0 && vfio_group_fd != -ENOENT)
461 		return -1;
462 
463 	/*
464 	 * if vfio_group_fd == -ENOENT, that means the device
465 	 * isn't managed by VFIO
466 	 */
467 	if (vfio_group_fd == -ENOENT) {
468 		RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
469 				dev_addr);
470 		return 1;
471 	}
472 
473 	/* Opens main vfio file descriptor which represents the "container" */
474 	vfio_container_fd = rte_vfio_get_container_fd();
475 	if (vfio_container_fd < 0) {
476 		DPAA2_BUS_ERR("Failed to open VFIO container");
477 		return -errno;
478 	}
479 
480 	/* check if the group is viable */
481 	ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status);
482 	if (ret) {
483 		DPAA2_BUS_ERR("  %s cannot get group status, "
484 				"error %i (%s)\n", dev_addr,
485 				errno, strerror(errno));
486 		close(vfio_group_fd);
487 		rte_vfio_clear_group(vfio_group_fd);
488 		return -1;
489 	} else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
490 		DPAA2_BUS_ERR("  %s VFIO group is not viable!\n", dev_addr);
491 		close(vfio_group_fd);
492 		rte_vfio_clear_group(vfio_group_fd);
493 		return -1;
494 	}
495 	/* At this point, we know that this group is viable (meaning,
496 	 * all devices are either bound to VFIO or not bound to anything)
497 	 */
498 
499 	/* check if group does not have a container yet */
500 	if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
501 
502 		/* add group to a container */
503 		ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
504 				&vfio_container_fd);
505 		if (ret) {
506 			DPAA2_BUS_ERR("  %s cannot add VFIO group to container, "
507 					"error %i (%s)\n", dev_addr,
508 					errno, strerror(errno));
509 			close(vfio_group_fd);
510 			close(vfio_container_fd);
511 			rte_vfio_clear_group(vfio_group_fd);
512 			return -1;
513 		}
514 
515 		/*
516 		 * set an IOMMU type for container
517 		 *
518 		 */
519 		if (ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION,
520 			  fslmc_iommu_type)) {
521 			ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
522 				    fslmc_iommu_type);
523 			if (ret) {
524 				DPAA2_BUS_ERR("Failed to setup VFIO iommu");
525 				close(vfio_group_fd);
526 				close(vfio_container_fd);
527 				return -errno;
528 			}
529 		} else {
530 			DPAA2_BUS_ERR("No supported IOMMU available");
531 			close(vfio_group_fd);
532 			close(vfio_container_fd);
533 			return -EINVAL;
534 		}
535 	}
536 
537 	/* get a file descriptor for the device */
538 	*vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr);
539 	if (*vfio_dev_fd < 0) {
540 		/* if we cannot get a device fd, this implies a problem with
541 		 * the VFIO group or the container not having IOMMU configured.
542 		 */
543 
544 		DPAA2_BUS_WARN("Getting a vfio_dev_fd for %s failed", dev_addr);
545 		close(vfio_group_fd);
546 		close(vfio_container_fd);
547 		rte_vfio_clear_group(vfio_group_fd);
548 		return -1;
549 	}
550 
551 	/* test and setup the device */
552 	ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO, device_info);
553 	if (ret) {
554 		DPAA2_BUS_ERR("  %s cannot get device info, error %i (%s)",
555 				dev_addr, errno, strerror(errno));
556 		close(*vfio_dev_fd);
557 		close(vfio_group_fd);
558 		close(vfio_container_fd);
559 		rte_vfio_clear_group(vfio_group_fd);
560 		return -1;
561 	}
562 
563 	return 0;
564 }
565 
566 static intptr_t vfio_map_mcp_obj(const char *mcp_obj)
567 {
568 	intptr_t v_addr = (intptr_t)MAP_FAILED;
569 	int32_t ret, mc_fd;
570 	struct vfio_group_status status = { .argsz = sizeof(status) };
571 
572 	struct vfio_device_info d_info = { .argsz = sizeof(d_info) };
573 	struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
574 
575 	fslmc_vfio_setup_device(SYSFS_FSL_MC_DEVICES, mcp_obj,
576 			&mc_fd, &d_info);
577 
578 	/* getting device region info*/
579 	ret = ioctl(mc_fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
580 	if (ret < 0) {
581 		DPAA2_BUS_ERR("Error in VFIO getting REGION_INFO");
582 		goto MC_FAILURE;
583 	}
584 
585 	v_addr = (size_t)mmap(NULL, reg_info.size,
586 		PROT_WRITE | PROT_READ, MAP_SHARED,
587 		mc_fd, reg_info.offset);
588 
589 MC_FAILURE:
590 	close(mc_fd);
591 
592 	return v_addr;
593 }
594 
595 #define IRQ_SET_BUF_LEN  (sizeof(struct vfio_irq_set) + sizeof(int))
596 
597 int rte_dpaa2_intr_enable(struct rte_intr_handle *intr_handle, int index)
598 {
599 	int len, ret;
600 	char irq_set_buf[IRQ_SET_BUF_LEN];
601 	struct vfio_irq_set *irq_set;
602 	int *fd_ptr;
603 
604 	len = sizeof(irq_set_buf);
605 
606 	irq_set = (struct vfio_irq_set *)irq_set_buf;
607 	irq_set->argsz = len;
608 	irq_set->count = 1;
609 	irq_set->flags =
610 		VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
611 	irq_set->index = index;
612 	irq_set->start = 0;
613 	fd_ptr = (int *)&irq_set->data;
614 	*fd_ptr = intr_handle->fd;
615 
616 	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
617 	if (ret) {
618 		DPAA2_BUS_ERR("Error:dpaa2 SET IRQs fd=%d, err = %d(%s)",
619 			      intr_handle->fd, errno, strerror(errno));
620 		return ret;
621 	}
622 
623 	return ret;
624 }
625 
626 int rte_dpaa2_intr_disable(struct rte_intr_handle *intr_handle, int index)
627 {
628 	struct vfio_irq_set *irq_set;
629 	char irq_set_buf[IRQ_SET_BUF_LEN];
630 	int len, ret;
631 
632 	len = sizeof(struct vfio_irq_set);
633 
634 	irq_set = (struct vfio_irq_set *)irq_set_buf;
635 	irq_set->argsz = len;
636 	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
637 	irq_set->index = index;
638 	irq_set->start = 0;
639 	irq_set->count = 0;
640 
641 	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
642 	if (ret)
643 		DPAA2_BUS_ERR(
644 			"Error disabling dpaa2 interrupts for fd %d",
645 			intr_handle->fd);
646 
647 	return ret;
648 }
649 
650 /* set up interrupt support (but not enable interrupts) */
651 int
652 rte_dpaa2_vfio_setup_intr(struct rte_intr_handle *intr_handle,
653 			  int vfio_dev_fd,
654 			  int num_irqs)
655 {
656 	int i, ret;
657 
658 	/* start from MSI-X interrupt type */
659 	for (i = 0; i < num_irqs; i++) {
660 		struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
661 		int fd = -1;
662 
663 		irq_info.index = i;
664 
665 		ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
666 		if (ret < 0) {
667 			DPAA2_BUS_ERR("Cannot get IRQ(%d) info, error %i (%s)",
668 				      i, errno, strerror(errno));
669 			return -1;
670 		}
671 
672 		/* if this vector cannot be used with eventfd,
673 		 * fail if we explicitly
674 		 * specified interrupt type, otherwise continue
675 		 */
676 		if ((irq_info.flags & VFIO_IRQ_INFO_EVENTFD) == 0)
677 			continue;
678 
679 		/* set up an eventfd for interrupts */
680 		fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
681 		if (fd < 0) {
682 			DPAA2_BUS_ERR("Cannot set up eventfd, error %i (%s)",
683 				      errno, strerror(errno));
684 			return -1;
685 		}
686 
687 		intr_handle->fd = fd;
688 		intr_handle->type = RTE_INTR_HANDLE_VFIO_MSI;
689 		intr_handle->vfio_dev_fd = vfio_dev_fd;
690 
691 		return 0;
692 	}
693 
694 	/* if we're here, we haven't found a suitable interrupt vector */
695 	return -1;
696 }
697 
698 /*
699  * fslmc_process_iodevices for processing only IO (ETH, CRYPTO, and possibly
700  * EVENT) devices.
701  */
702 static int
703 fslmc_process_iodevices(struct rte_dpaa2_device *dev)
704 {
705 	int dev_fd;
706 	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
707 	struct rte_dpaa2_object *object = NULL;
708 
709 	fslmc_vfio_setup_device(SYSFS_FSL_MC_DEVICES, dev->device.name,
710 			&dev_fd, &device_info);
711 
712 	switch (dev->dev_type) {
713 	case DPAA2_ETH:
714 		rte_dpaa2_vfio_setup_intr(&dev->intr_handle, dev_fd,
715 					  device_info.num_irqs);
716 		break;
717 	case DPAA2_CON:
718 	case DPAA2_IO:
719 	case DPAA2_CI:
720 	case DPAA2_BPOOL:
721 	case DPAA2_DPRTC:
722 	case DPAA2_MUX:
723 		TAILQ_FOREACH(object, &dpaa2_obj_list, next) {
724 			if (dev->dev_type == object->dev_type)
725 				object->create(dev_fd, &device_info,
726 					       dev->object_id);
727 			else
728 				continue;
729 		}
730 		break;
731 	default:
732 		break;
733 	}
734 
735 	DPAA2_BUS_LOG(DEBUG, "Device (%s) abstracted from VFIO",
736 		      dev->device.name);
737 	return 0;
738 }
739 
740 static int
741 fslmc_process_mcp(struct rte_dpaa2_device *dev)
742 {
743 	int ret;
744 	intptr_t v_addr;
745 	struct fsl_mc_io dpmng  = {0};
746 	struct mc_version mc_ver_info = {0};
747 
748 	rte_mcp_ptr_list = malloc(sizeof(void *) * (MC_PORTAL_INDEX + 1));
749 	if (!rte_mcp_ptr_list) {
750 		DPAA2_BUS_ERR("Unable to allocate MC portal memory");
751 		ret = -ENOMEM;
752 		goto cleanup;
753 	}
754 
755 	v_addr = vfio_map_mcp_obj(dev->device.name);
756 	if (v_addr == (intptr_t)MAP_FAILED) {
757 		DPAA2_BUS_ERR("Error mapping region (errno = %d)", errno);
758 		ret = -1;
759 		goto cleanup;
760 	}
761 
762 	/* check the MC version compatibility */
763 	dpmng.regs = (void *)v_addr;
764 
765 	/* In case of secondary processes, MC version check is no longer
766 	 * required.
767 	 */
768 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
769 		rte_mcp_ptr_list[MC_PORTAL_INDEX] = (void *)v_addr;
770 		return 0;
771 	}
772 
773 	if (mc_get_version(&dpmng, CMD_PRI_LOW, &mc_ver_info)) {
774 		DPAA2_BUS_ERR("Unable to obtain MC version");
775 		ret = -1;
776 		goto cleanup;
777 	}
778 
779 	if ((mc_ver_info.major != MC_VER_MAJOR) ||
780 	    (mc_ver_info.minor < MC_VER_MINOR)) {
781 		DPAA2_BUS_ERR("DPAA2 MC version not compatible!"
782 			      " Expected %d.%d.x, Detected %d.%d.%d",
783 			      MC_VER_MAJOR, MC_VER_MINOR,
784 			      mc_ver_info.major, mc_ver_info.minor,
785 			      mc_ver_info.revision);
786 		ret = -1;
787 		goto cleanup;
788 	}
789 	rte_mcp_ptr_list[MC_PORTAL_INDEX] = (void *)v_addr;
790 
791 	return 0;
792 
793 cleanup:
794 	if (rte_mcp_ptr_list) {
795 		free(rte_mcp_ptr_list);
796 		rte_mcp_ptr_list = NULL;
797 	}
798 
799 	return ret;
800 }
801 
802 int
803 fslmc_vfio_process_group(void)
804 {
805 	int ret;
806 	int found_mportal = 0;
807 	struct rte_dpaa2_device *dev, *dev_temp;
808 	bool is_dpmcp_in_blocklist = false, is_dpio_in_blocklist = false;
809 	int dpmcp_count = 0, dpio_count = 0, current_device;
810 
811 	TAILQ_FOREACH_SAFE(dev, &rte_fslmc_bus.device_list, next, dev_temp) {
812 		if (dev->dev_type == DPAA2_MPORTAL) {
813 			dpmcp_count++;
814 			if (dev->device.devargs &&
815 			    dev->device.devargs->policy == RTE_DEV_BLOCKED)
816 				is_dpmcp_in_blocklist = true;
817 		}
818 		if (dev->dev_type == DPAA2_IO) {
819 			dpio_count++;
820 			if (dev->device.devargs &&
821 			    dev->device.devargs->policy == RTE_DEV_BLOCKED)
822 				is_dpio_in_blocklist = true;
823 		}
824 	}
825 
826 	/* Search the MCP as that should be initialized first. */
827 	current_device = 0;
828 	TAILQ_FOREACH_SAFE(dev, &rte_fslmc_bus.device_list, next, dev_temp) {
829 		if (dev->dev_type == DPAA2_MPORTAL) {
830 			current_device++;
831 			if (dev->device.devargs &&
832 			    dev->device.devargs->policy == RTE_DEV_BLOCKED) {
833 				DPAA2_BUS_LOG(DEBUG, "%s Blocked, skipping",
834 					      dev->device.name);
835 				TAILQ_REMOVE(&rte_fslmc_bus.device_list,
836 						dev, next);
837 				continue;
838 			}
839 
840 			if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
841 			    !is_dpmcp_in_blocklist) {
842 				if (dpmcp_count == 1 ||
843 				    current_device != dpmcp_count) {
844 					TAILQ_REMOVE(&rte_fslmc_bus.device_list,
845 						     dev, next);
846 					continue;
847 				}
848 			}
849 
850 			if (!found_mportal) {
851 				ret = fslmc_process_mcp(dev);
852 				if (ret) {
853 					DPAA2_BUS_ERR("Unable to map MC Portal");
854 					return -1;
855 				}
856 				found_mportal = 1;
857 			}
858 
859 			TAILQ_REMOVE(&rte_fslmc_bus.device_list, dev, next);
860 			free(dev);
861 			dev = NULL;
862 			/* Ideally there is only a single dpmcp, but in case
863 			 * multiple exists, looping on remaining devices.
864 			 */
865 		}
866 	}
867 
868 	/* Cannot continue if there is not even a single mportal */
869 	if (!found_mportal) {
870 		DPAA2_BUS_ERR("No MC Portal device found. Not continuing");
871 		return -1;
872 	}
873 
874 	current_device = 0;
875 	TAILQ_FOREACH_SAFE(dev, &rte_fslmc_bus.device_list, next, dev_temp) {
876 		if (dev->dev_type == DPAA2_IO)
877 			current_device++;
878 		if (dev->device.devargs &&
879 		    dev->device.devargs->policy == RTE_DEV_BLOCKED) {
880 			DPAA2_BUS_LOG(DEBUG, "%s Blocked, skipping",
881 				      dev->device.name);
882 			TAILQ_REMOVE(&rte_fslmc_bus.device_list, dev, next);
883 			continue;
884 		}
885 		if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
886 		    dev->dev_type != DPAA2_ETH &&
887 		    dev->dev_type != DPAA2_CRYPTO &&
888 		    dev->dev_type != DPAA2_QDMA &&
889 		    dev->dev_type != DPAA2_IO) {
890 			TAILQ_REMOVE(&rte_fslmc_bus.device_list, dev, next);
891 			continue;
892 		}
893 		switch (dev->dev_type) {
894 		case DPAA2_ETH:
895 		case DPAA2_CRYPTO:
896 		case DPAA2_QDMA:
897 			ret = fslmc_process_iodevices(dev);
898 			if (ret) {
899 				DPAA2_BUS_DEBUG("Dev (%s) init failed",
900 						dev->device.name);
901 				return ret;
902 			}
903 			break;
904 		case DPAA2_CON:
905 		case DPAA2_CI:
906 		case DPAA2_BPOOL:
907 		case DPAA2_DPRTC:
908 		case DPAA2_MUX:
909 			/* IN case of secondary processes, all control objects
910 			 * like dpbp, dpcon, dpci are not initialized/required
911 			 * - all of these are assumed to be initialized and made
912 			 *   available by primary.
913 			 */
914 			if (rte_eal_process_type() == RTE_PROC_SECONDARY)
915 				continue;
916 
917 			/* Call the object creation routine and remove the
918 			 * device entry from device list
919 			 */
920 			ret = fslmc_process_iodevices(dev);
921 			if (ret) {
922 				DPAA2_BUS_DEBUG("Dev (%s) init failed",
923 						dev->device.name);
924 				return -1;
925 			}
926 
927 			break;
928 		case DPAA2_IO:
929 			if (!is_dpio_in_blocklist && dpio_count > 1) {
930 				if (rte_eal_process_type() == RTE_PROC_SECONDARY
931 				    && current_device != dpio_count) {
932 					TAILQ_REMOVE(&rte_fslmc_bus.device_list,
933 						     dev, next);
934 					break;
935 				}
936 				if (rte_eal_process_type() == RTE_PROC_PRIMARY
937 				    && current_device == dpio_count) {
938 					TAILQ_REMOVE(&rte_fslmc_bus.device_list,
939 						     dev, next);
940 					break;
941 				}
942 			}
943 
944 			ret = fslmc_process_iodevices(dev);
945 			if (ret) {
946 				DPAA2_BUS_DEBUG("Dev (%s) init failed",
947 						dev->device.name);
948 				return -1;
949 			}
950 
951 			break;
952 		case DPAA2_UNKNOWN:
953 		default:
954 			/* Unknown - ignore */
955 			DPAA2_BUS_DEBUG("Found unknown device (%s)",
956 					dev->device.name);
957 			TAILQ_REMOVE(&rte_fslmc_bus.device_list, dev, next);
958 			free(dev);
959 			dev = NULL;
960 		}
961 	}
962 
963 	return 0;
964 }
965 
966 int
967 fslmc_vfio_setup_group(void)
968 {
969 	int groupid;
970 	int ret;
971 	struct vfio_group_status status = { .argsz = sizeof(status) };
972 
973 	/* if already done once */
974 	if (container_device_fd)
975 		return 0;
976 
977 	ret = fslmc_get_container_group(&groupid);
978 	if (ret)
979 		return ret;
980 
981 	/* In case this group was already opened, continue without any
982 	 * processing.
983 	 */
984 	if (vfio_group.groupid == groupid) {
985 		DPAA2_BUS_ERR("groupid already exists %d", groupid);
986 		return 0;
987 	}
988 
989 	/* Get the actual group fd */
990 	ret = rte_vfio_get_group_fd(groupid);
991 	if (ret < 0)
992 		return ret;
993 	vfio_group.fd = ret;
994 
995 	/* Check group viability */
996 	ret = ioctl(vfio_group.fd, VFIO_GROUP_GET_STATUS, &status);
997 	if (ret) {
998 		DPAA2_BUS_ERR("VFIO error getting group status");
999 		close(vfio_group.fd);
1000 		rte_vfio_clear_group(vfio_group.fd);
1001 		return ret;
1002 	}
1003 
1004 	if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
1005 		DPAA2_BUS_ERR("VFIO group not viable");
1006 		close(vfio_group.fd);
1007 		rte_vfio_clear_group(vfio_group.fd);
1008 		return -EPERM;
1009 	}
1010 	/* Since Group is VIABLE, Store the groupid */
1011 	vfio_group.groupid = groupid;
1012 
1013 	/* check if group does not have a container yet */
1014 	if (!(status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
1015 		/* Now connect this IOMMU group to given container */
1016 		ret = vfio_connect_container();
1017 		if (ret) {
1018 			DPAA2_BUS_ERR(
1019 				"Error connecting container with groupid %d",
1020 				groupid);
1021 			close(vfio_group.fd);
1022 			rte_vfio_clear_group(vfio_group.fd);
1023 			return ret;
1024 		}
1025 	}
1026 
1027 	/* Get Device information */
1028 	ret = ioctl(vfio_group.fd, VFIO_GROUP_GET_DEVICE_FD, fslmc_container);
1029 	if (ret < 0) {
1030 		DPAA2_BUS_ERR("Error getting device %s fd from group %d",
1031 			      fslmc_container, vfio_group.groupid);
1032 		close(vfio_group.fd);
1033 		rte_vfio_clear_group(vfio_group.fd);
1034 		return ret;
1035 	}
1036 	container_device_fd = ret;
1037 	DPAA2_BUS_DEBUG("VFIO Container FD is [0x%X]",
1038 			container_device_fd);
1039 
1040 	return 0;
1041 }
1042