xref: /dpdk/drivers/bus/fslmc/fslmc_vfio.c (revision 8e469ecf509b38e0c9a738864236c2d22f262839)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  *   Copyright (c) 2015-2016 Freescale Semiconductor, Inc. All rights reserved.
4  *   Copyright 2016-2024 NXP
5  *
6  */
7 
8 #include <unistd.h>
9 #include <stdio.h>
10 #include <sys/types.h>
11 #include <string.h>
12 #include <stdlib.h>
13 #include <fcntl.h>
14 #include <errno.h>
15 #include <sys/ioctl.h>
16 #include <sys/stat.h>
17 #include <sys/mman.h>
18 #include <sys/vfs.h>
19 #include <libgen.h>
20 #include <dirent.h>
21 #include <sys/eventfd.h>
22 #include <ctype.h>
23 
24 #include <eal_filesystem.h>
25 #include <rte_mbuf.h>
26 #include <ethdev_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_memcpy.h>
29 #include <rte_string_fns.h>
30 #include <rte_cycles.h>
31 #include <rte_kvargs.h>
32 #include <dev_driver.h>
33 #include <rte_eal_memconfig.h>
34 #include <eal_vfio.h>
35 
36 #include "private.h"
37 #include "fslmc_vfio.h"
38 #include "fslmc_logs.h"
39 #include <mc/fsl_dpmng.h>
40 
41 #include "portal/dpaa2_hw_pvt.h"
42 #include "portal/dpaa2_hw_dpio.h"
43 
44 #define FSLMC_VFIO_MP "fslmc_vfio_mp_sync"
45 
46 /* Container is composed by multiple groups, however,
47  * now each process only supports single group with in container.
48  */
49 static struct fslmc_vfio_container s_vfio_container;
50 /* Currently we only support single group/process. */
51 static const char *fslmc_group; /* dprc.x*/
52 static void *(*rte_mcp_ptr_list);
53 
54 struct fslmc_dmaseg {
55 	uint64_t vaddr;
56 	uint64_t iova;
57 	uint64_t size;
58 
59 	TAILQ_ENTRY(fslmc_dmaseg) next;
60 };
61 
62 TAILQ_HEAD(fslmc_dmaseg_list, fslmc_dmaseg);
63 
64 struct fslmc_dmaseg_list fslmc_memsegs =
65 		TAILQ_HEAD_INITIALIZER(fslmc_memsegs);
66 struct fslmc_dmaseg_list fslmc_iosegs =
67 		TAILQ_HEAD_INITIALIZER(fslmc_iosegs);
68 
69 static uint64_t fslmc_mem_va2iova = RTE_BAD_IOVA;
70 static int fslmc_mem_map_num;
71 
72 struct fslmc_mem_param {
73 	struct vfio_mp_param mp_param;
74 	struct fslmc_dmaseg_list memsegs;
75 	struct fslmc_dmaseg_list iosegs;
76 	uint64_t mem_va2iova;
77 	int mem_map_num;
78 };
79 
80 enum {
81 	FSLMC_VFIO_SOCKET_REQ_CONTAINER = 0x100,
82 	FSLMC_VFIO_SOCKET_REQ_GROUP,
83 	FSLMC_VFIO_SOCKET_REQ_MEM
84 };
85 
86 void *
87 dpaa2_get_mcp_ptr(int portal_idx)
88 {
89 	if (rte_mcp_ptr_list)
90 		return rte_mcp_ptr_list[portal_idx];
91 	else
92 		return NULL;
93 }
94 
95 static struct rte_dpaa2_object_list dpaa2_obj_list =
96 	TAILQ_HEAD_INITIALIZER(dpaa2_obj_list);
97 
98 static uint64_t
99 fslmc_io_virt2phy(const void *virtaddr)
100 {
101 	FILE *fp = fopen("/proc/self/maps", "r");
102 	char *line = NULL;
103 	size_t linesz;
104 	uint64_t start, end, phy;
105 	const uint64_t va = (const uint64_t)virtaddr;
106 	char tmp[1024];
107 	int ret;
108 
109 	if (!fp)
110 		return RTE_BAD_IOVA;
111 	while (getdelim(&line, &linesz, '\n', fp) > 0) {
112 		char *ptr = line;
113 		int n;
114 
115 		/** Parse virtual address range.*/
116 		n = 0;
117 		while (*ptr && !isspace(*ptr)) {
118 			tmp[n] = *ptr;
119 			ptr++;
120 			n++;
121 		}
122 		tmp[n] = 0;
123 		ret = sscanf(tmp, "%" SCNx64 "-%" SCNx64, &start, &end);
124 		if (ret != 2)
125 			continue;
126 		if (va < start || va >= end)
127 			continue;
128 
129 		/** This virtual address is in this segment.*/
130 		while (*ptr == ' ' || *ptr == 'r' ||
131 			*ptr == 'w' || *ptr == 's' ||
132 			*ptr == 'p' || *ptr == 'x' ||
133 			*ptr == '-')
134 			ptr++;
135 
136 		/** Extract phy address*/
137 		n = 0;
138 		while (*ptr && !isspace(*ptr)) {
139 			tmp[n] = *ptr;
140 			ptr++;
141 			n++;
142 		}
143 		tmp[n] = 0;
144 		phy = strtoul(tmp, 0, 16);
145 		if (!phy)
146 			continue;
147 
148 		fclose(fp);
149 		return phy + va - start;
150 	}
151 
152 	fclose(fp);
153 	return RTE_BAD_IOVA;
154 }
155 
156 /*register a fslmc bus based dpaa2 driver */
157 void
158 rte_fslmc_object_register(struct rte_dpaa2_object *object)
159 {
160 	RTE_VERIFY(object);
161 
162 	TAILQ_INSERT_TAIL(&dpaa2_obj_list, object, next);
163 }
164 
165 static const char *
166 fslmc_vfio_get_group_name(void)
167 {
168 	return fslmc_group;
169 }
170 
171 static void
172 fslmc_vfio_set_group_name(const char *group_name)
173 {
174 	fslmc_group = group_name;
175 }
176 
177 static int
178 fslmc_vfio_add_group(int vfio_group_fd,
179 	int iommu_group_num, const char *group_name)
180 {
181 	struct fslmc_vfio_group *group;
182 
183 	group = rte_zmalloc(NULL, sizeof(struct fslmc_vfio_group), 0);
184 	if (!group)
185 		return -ENOMEM;
186 	group->fd = vfio_group_fd;
187 	group->groupid = iommu_group_num;
188 	rte_strscpy(group->group_name, group_name, sizeof(group->group_name));
189 	if (rte_vfio_noiommu_is_enabled() > 0)
190 		group->iommu_type = RTE_VFIO_NOIOMMU;
191 	else
192 		group->iommu_type = VFIO_TYPE1_IOMMU;
193 	LIST_INSERT_HEAD(&s_vfio_container.groups, group, next);
194 
195 	return 0;
196 }
197 
198 static int
199 fslmc_vfio_clear_group(int vfio_group_fd)
200 {
201 	struct fslmc_vfio_group *group;
202 	struct fslmc_vfio_device *dev;
203 	int clear = 0;
204 
205 	LIST_FOREACH(group, &s_vfio_container.groups, next) {
206 		if (group->fd == vfio_group_fd) {
207 			LIST_FOREACH(dev, &group->vfio_devices, next)
208 				LIST_REMOVE(dev, next);
209 
210 			close(vfio_group_fd);
211 			LIST_REMOVE(group, next);
212 			rte_free(group);
213 			clear = 1;
214 
215 			break;
216 		}
217 	}
218 
219 	if (LIST_EMPTY(&s_vfio_container.groups)) {
220 		if (s_vfio_container.fd > 0)
221 			close(s_vfio_container.fd);
222 
223 		s_vfio_container.fd = -1;
224 	}
225 	if (clear)
226 		return 0;
227 
228 	return -ENODEV;
229 }
230 
231 static int
232 fslmc_vfio_connect_container(int vfio_group_fd)
233 {
234 	struct fslmc_vfio_group *group;
235 
236 	LIST_FOREACH(group, &s_vfio_container.groups, next) {
237 		if (group->fd == vfio_group_fd) {
238 			group->connected = 1;
239 
240 			return 0;
241 		}
242 	}
243 
244 	return -ENODEV;
245 }
246 
247 static int
248 fslmc_vfio_container_connected(int vfio_group_fd)
249 {
250 	struct fslmc_vfio_group *group;
251 
252 	LIST_FOREACH(group, &s_vfio_container.groups, next) {
253 		if (group->fd == vfio_group_fd) {
254 			if (group->connected)
255 				return 1;
256 		}
257 	}
258 	return 0;
259 }
260 
261 static int
262 fslmc_vfio_iommu_type(int vfio_group_fd)
263 {
264 	struct fslmc_vfio_group *group;
265 
266 	LIST_FOREACH(group, &s_vfio_container.groups, next) {
267 		if (group->fd == vfio_group_fd)
268 			return group->iommu_type;
269 	}
270 	return -ENODEV;
271 }
272 
273 static int
274 fslmc_vfio_group_fd_by_name(const char *group_name)
275 {
276 	struct fslmc_vfio_group *group;
277 
278 	LIST_FOREACH(group, &s_vfio_container.groups, next) {
279 		if (!strcmp(group->group_name, group_name))
280 			return group->fd;
281 	}
282 	return -ENODEV;
283 }
284 
285 static int
286 fslmc_vfio_group_fd_by_id(int group_id)
287 {
288 	struct fslmc_vfio_group *group;
289 
290 	LIST_FOREACH(group, &s_vfio_container.groups, next) {
291 		if (group->groupid == group_id)
292 			return group->fd;
293 	}
294 	return -ENODEV;
295 }
296 
297 static int
298 fslmc_vfio_group_add_dev(int vfio_group_fd,
299 	int dev_fd, const char *name)
300 {
301 	struct fslmc_vfio_group *group;
302 	struct fslmc_vfio_device *dev;
303 
304 	LIST_FOREACH(group, &s_vfio_container.groups, next) {
305 		if (group->fd == vfio_group_fd) {
306 			dev = rte_zmalloc(NULL,
307 				sizeof(struct fslmc_vfio_device), 0);
308 			dev->fd = dev_fd;
309 			rte_strscpy(dev->dev_name, name, sizeof(dev->dev_name));
310 			LIST_INSERT_HEAD(&group->vfio_devices, dev, next);
311 			return 0;
312 		}
313 	}
314 	return -ENODEV;
315 }
316 
317 static int
318 fslmc_vfio_group_remove_dev(int vfio_group_fd,
319 	const char *name)
320 {
321 	struct fslmc_vfio_group *group = NULL;
322 	struct fslmc_vfio_device *dev;
323 	int removed = 0;
324 
325 	LIST_FOREACH(group, &s_vfio_container.groups, next) {
326 		if (group->fd == vfio_group_fd)
327 			break;
328 	}
329 
330 	if (group) {
331 		LIST_FOREACH(dev, &group->vfio_devices, next) {
332 			if (!strcmp(dev->dev_name, name)) {
333 				LIST_REMOVE(dev, next);
334 				removed = 1;
335 				break;
336 			}
337 		}
338 	}
339 
340 	if (removed)
341 		return 0;
342 
343 	return -ENODEV;
344 }
345 
346 static int
347 fslmc_vfio_container_fd(void)
348 {
349 	return s_vfio_container.fd;
350 }
351 
352 static int
353 fslmc_get_group_id(const char *group_name,
354 	int *groupid)
355 {
356 	int ret;
357 
358 	/* get group number */
359 	ret = rte_vfio_get_group_num(SYSFS_FSL_MC_DEVICES,
360 			group_name, groupid);
361 	if (ret <= 0) {
362 		DPAA2_BUS_ERR("Find %s IOMMU group", group_name);
363 		if (ret < 0)
364 			return ret;
365 
366 		return -EIO;
367 	}
368 
369 	DPAA2_BUS_DEBUG("GROUP(%s) has VFIO iommu group id = %d",
370 		group_name, *groupid);
371 
372 	return 0;
373 }
374 
375 static int
376 fslmc_vfio_open_group_fd(const char *group_name)
377 {
378 	int vfio_group_fd;
379 	char filename[PATH_MAX];
380 	struct rte_mp_msg mp_req, *mp_rep;
381 	struct rte_mp_reply mp_reply = {0};
382 	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
383 	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
384 	int iommu_group_num, ret;
385 
386 	vfio_group_fd = fslmc_vfio_group_fd_by_name(group_name);
387 	if (vfio_group_fd > 0)
388 		return vfio_group_fd;
389 
390 	ret = fslmc_get_group_id(group_name, &iommu_group_num);
391 	if (ret)
392 		return ret;
393 	/* if primary, try to open the group */
394 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
395 		/* try regular group format */
396 		snprintf(filename, sizeof(filename),
397 			VFIO_GROUP_FMT, iommu_group_num);
398 		vfio_group_fd = open(filename, O_RDWR);
399 
400 		goto add_vfio_group;
401 	}
402 	/* if we're in a secondary process, request group fd from the primary
403 	 * process via mp channel.
404 	 */
405 	p->req = FSLMC_VFIO_SOCKET_REQ_GROUP;
406 	p->group_num = iommu_group_num;
407 	rte_strscpy(mp_req.name, FSLMC_VFIO_MP, sizeof(mp_req.name));
408 	mp_req.len_param = sizeof(*p);
409 	mp_req.num_fds = 0;
410 
411 	vfio_group_fd = -1;
412 	if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
413 	    mp_reply.nb_received == 1) {
414 		mp_rep = &mp_reply.msgs[0];
415 		p = (struct vfio_mp_param *)mp_rep->param;
416 		if (p->result == SOCKET_OK && mp_rep->num_fds == 1)
417 			vfio_group_fd = mp_rep->fds[0];
418 		else if (p->result == SOCKET_NO_FD)
419 			DPAA2_BUS_ERR("Bad VFIO group fd");
420 	}
421 
422 	free(mp_reply.msgs);
423 
424 add_vfio_group:
425 	if (vfio_group_fd < 0) {
426 		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
427 			DPAA2_BUS_ERR("Open VFIO group(%s) failed(%d)",
428 				filename, vfio_group_fd);
429 		} else {
430 			DPAA2_BUS_ERR("Cannot request group fd(%d)",
431 				vfio_group_fd);
432 		}
433 	} else {
434 		ret = fslmc_vfio_add_group(vfio_group_fd, iommu_group_num,
435 			group_name);
436 		if (ret) {
437 			close(vfio_group_fd);
438 			return ret;
439 		}
440 	}
441 
442 	return vfio_group_fd;
443 }
444 
445 static int
446 fslmc_vfio_check_extensions(int vfio_container_fd)
447 {
448 	int ret;
449 	uint32_t idx, n_extensions = 0;
450 	static const int type_id[] = {RTE_VFIO_TYPE1, RTE_VFIO_SPAPR,
451 		RTE_VFIO_NOIOMMU};
452 	static const char * const type_id_nm[] = {"Type 1",
453 		"sPAPR", "No-IOMMU"};
454 
455 	for (idx = 0; idx < RTE_DIM(type_id); idx++) {
456 		ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION,
457 			type_id[idx]);
458 		if (ret < 0) {
459 			DPAA2_BUS_ERR("Could not get IOMMU type, error %i (%s)",
460 				errno, strerror(errno));
461 			close(vfio_container_fd);
462 			return -errno;
463 		} else if (ret == 1) {
464 			/* we found a supported extension */
465 			n_extensions++;
466 		}
467 		DPAA2_BUS_DEBUG("IOMMU type %d (%s) is %s",
468 			type_id[idx], type_id_nm[idx],
469 			ret ? "supported" : "not supported");
470 	}
471 
472 	/* if we didn't find any supported IOMMU types, fail */
473 	if (!n_extensions) {
474 		close(vfio_container_fd);
475 		return -EIO;
476 	}
477 
478 	return 0;
479 }
480 
481 static int
482 fslmc_vfio_open_container_fd(void)
483 {
484 	int ret, vfio_container_fd;
485 	struct rte_mp_msg mp_req, *mp_rep;
486 	struct rte_mp_reply mp_reply = {0};
487 	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
488 	struct vfio_mp_param *p = (void *)mp_req.param;
489 
490 	if (fslmc_vfio_container_fd() > 0)
491 		return fslmc_vfio_container_fd();
492 
493 	/* if we're in a primary process, try to open the container */
494 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
495 		vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR);
496 		if (vfio_container_fd < 0) {
497 			DPAA2_BUS_ERR("Open VFIO container(%s), err(%d)",
498 				VFIO_CONTAINER_PATH, vfio_container_fd);
499 			ret = vfio_container_fd;
500 			goto err_exit;
501 		}
502 
503 		/* check VFIO API version */
504 		ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION);
505 		if (ret < 0) {
506 			DPAA2_BUS_ERR("Get VFIO API version(%d)",
507 				ret);
508 		} else if (ret != VFIO_API_VERSION) {
509 			DPAA2_BUS_ERR("Unsupported VFIO API version(%d)",
510 				ret);
511 			ret = -ENOTSUP;
512 		}
513 		if (ret < 0) {
514 			close(vfio_container_fd);
515 			goto err_exit;
516 		}
517 
518 		ret = fslmc_vfio_check_extensions(vfio_container_fd);
519 		if (ret) {
520 			DPAA2_BUS_ERR("Unsupported IOMMU extensions found(%d)",
521 				ret);
522 			close(vfio_container_fd);
523 			goto err_exit;
524 		}
525 
526 		goto success_exit;
527 	}
528 	/*
529 	 * if we're in a secondary process, request container fd from the
530 	 * primary process via mp channel
531 	 */
532 	p->req = FSLMC_VFIO_SOCKET_REQ_CONTAINER;
533 	rte_strscpy(mp_req.name, FSLMC_VFIO_MP, sizeof(mp_req.name));
534 	mp_req.len_param = sizeof(*p);
535 	mp_req.num_fds = 0;
536 
537 	vfio_container_fd = -1;
538 	ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
539 	if (ret)
540 		goto err_exit;
541 
542 	if (mp_reply.nb_received != 1) {
543 		ret = -EIO;
544 		goto err_exit;
545 	}
546 
547 	mp_rep = &mp_reply.msgs[0];
548 	p = (void *)mp_rep->param;
549 	if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
550 		vfio_container_fd = mp_rep->fds[0];
551 		free(mp_reply.msgs);
552 	}
553 
554 success_exit:
555 	s_vfio_container.fd = vfio_container_fd;
556 
557 	return vfio_container_fd;
558 
559 err_exit:
560 	free(mp_reply.msgs);
561 	DPAA2_BUS_ERR("Open container fd err(%d)", ret);
562 	return ret;
563 }
564 
565 int
566 fslmc_get_container_group(const char *group_name,
567 	int *groupid)
568 {
569 	int ret;
570 
571 	if (!group_name) {
572 		DPAA2_BUS_ERR("No group name provided!");
573 
574 		return -EINVAL;
575 	}
576 	ret = fslmc_get_group_id(group_name, groupid);
577 	if (ret)
578 		return ret;
579 
580 	fslmc_vfio_set_group_name(group_name);
581 
582 	return 0;
583 }
584 
585 static int
586 fslmc_vfio_mp_primary(const struct rte_mp_msg *msg,
587 	const void *peer)
588 {
589 	int fd = -1;
590 	int ret;
591 	struct rte_mp_msg reply;
592 	struct vfio_mp_param *r = (void *)reply.param;
593 	const struct vfio_mp_param *m = (const void *)msg->param;
594 	struct fslmc_mem_param *map;
595 
596 	if (msg->len_param != sizeof(*m)) {
597 		DPAA2_BUS_ERR("Invalid msg size(%d) for req(%d)",
598 			msg->len_param, m->req);
599 		return -EINVAL;
600 	}
601 
602 	memset(&reply, 0, sizeof(reply));
603 
604 	switch (m->req) {
605 	case FSLMC_VFIO_SOCKET_REQ_GROUP:
606 		r->req = FSLMC_VFIO_SOCKET_REQ_GROUP;
607 		r->group_num = m->group_num;
608 		fd = fslmc_vfio_group_fd_by_id(m->group_num);
609 		if (fd < 0) {
610 			r->result = SOCKET_ERR;
611 		} else if (!fd) {
612 			/* if group exists but isn't bound to VFIO driver */
613 			r->result = SOCKET_NO_FD;
614 		} else {
615 			/* if group exists and is bound to VFIO driver */
616 			r->result = SOCKET_OK;
617 			reply.num_fds = 1;
618 			reply.fds[0] = fd;
619 		}
620 		reply.len_param = sizeof(*r);
621 		break;
622 	case FSLMC_VFIO_SOCKET_REQ_CONTAINER:
623 		r->req = FSLMC_VFIO_SOCKET_REQ_CONTAINER;
624 		fd = fslmc_vfio_container_fd();
625 		if (fd <= 0) {
626 			r->result = SOCKET_ERR;
627 		} else {
628 			r->result = SOCKET_OK;
629 			reply.num_fds = 1;
630 			reply.fds[0] = fd;
631 		}
632 		reply.len_param = sizeof(*r);
633 		break;
634 	case FSLMC_VFIO_SOCKET_REQ_MEM:
635 		map = (void *)reply.param;
636 		r = &map->mp_param;
637 		r->req = FSLMC_VFIO_SOCKET_REQ_MEM;
638 		r->result = SOCKET_OK;
639 		map->memsegs = fslmc_memsegs;
640 		map->iosegs = fslmc_iosegs;
641 		map->mem_va2iova = fslmc_mem_va2iova;
642 		map->mem_map_num = fslmc_mem_map_num;
643 		reply.len_param = sizeof(struct fslmc_mem_param);
644 		break;
645 	default:
646 		DPAA2_BUS_ERR("VFIO received invalid message(%08x)",
647 			m->req);
648 		return -ENOTSUP;
649 	}
650 
651 	rte_strscpy(reply.name, FSLMC_VFIO_MP, sizeof(reply.name));
652 	ret = rte_mp_reply(&reply, peer);
653 
654 	return ret;
655 }
656 
657 static int
658 fslmc_vfio_mp_sync_mem_req(void)
659 {
660 	struct rte_mp_msg mp_req, *mp_rep;
661 	struct rte_mp_reply mp_reply = {0};
662 	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
663 	int ret = 0;
664 	struct vfio_mp_param *mp_param;
665 	struct fslmc_mem_param *mem_rsp;
666 
667 	mp_param = (void *)mp_req.param;
668 	memset(&mp_req, 0, sizeof(struct rte_mp_msg));
669 	mp_param->req = FSLMC_VFIO_SOCKET_REQ_MEM;
670 	rte_strscpy(mp_req.name, FSLMC_VFIO_MP, sizeof(mp_req.name));
671 	mp_req.len_param = sizeof(struct vfio_mp_param);
672 	if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
673 		mp_reply.nb_received == 1) {
674 		mp_rep = &mp_reply.msgs[0];
675 		mem_rsp = (struct fslmc_mem_param *)mp_rep->param;
676 		if (mem_rsp->mp_param.result == SOCKET_OK) {
677 			fslmc_memsegs = mem_rsp->memsegs;
678 			fslmc_mem_va2iova = mem_rsp->mem_va2iova;
679 			fslmc_mem_map_num = mem_rsp->mem_map_num;
680 		} else {
681 			DPAA2_BUS_ERR("Bad MEM SEG");
682 			ret = -EINVAL;
683 		}
684 	} else {
685 		ret = -EINVAL;
686 	}
687 	free(mp_reply.msgs);
688 
689 	return ret;
690 }
691 
692 static int
693 fslmc_vfio_mp_sync_setup(void)
694 {
695 	int ret;
696 
697 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
698 		ret = rte_mp_action_register(FSLMC_VFIO_MP,
699 			fslmc_vfio_mp_primary);
700 		if (ret && rte_errno != ENOTSUP)
701 			return ret;
702 	} else {
703 		ret = fslmc_vfio_mp_sync_mem_req();
704 		if (ret)
705 			return ret;
706 	}
707 
708 	return 0;
709 }
710 
711 static int
712 vfio_connect_container(int vfio_container_fd,
713 	int vfio_group_fd)
714 {
715 	int ret;
716 	int iommu_type;
717 
718 	if (fslmc_vfio_container_connected(vfio_group_fd)) {
719 		DPAA2_BUS_WARN("VFIO FD(%d) has connected to container",
720 			vfio_group_fd);
721 		return 0;
722 	}
723 
724 	iommu_type = fslmc_vfio_iommu_type(vfio_group_fd);
725 	if (iommu_type < 0) {
726 		DPAA2_BUS_ERR("Get iommu type(%d)", iommu_type);
727 
728 		return iommu_type;
729 	}
730 
731 	/* Check whether support for SMMU type IOMMU present or not */
732 	ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION, iommu_type);
733 	if (ret <= 0) {
734 		DPAA2_BUS_ERR("Unsupported IOMMU type(%d) ret(%d), err(%d)",
735 			iommu_type, ret, -errno);
736 		return -EINVAL;
737 	}
738 
739 	ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
740 			&vfio_container_fd);
741 	if (ret) {
742 		DPAA2_BUS_ERR("Set group container ret(%d), err(%d)",
743 			ret, -errno);
744 
745 		return ret;
746 	}
747 
748 	ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU, iommu_type);
749 	if (ret) {
750 		DPAA2_BUS_ERR("Set iommu ret(%d), err(%d)",
751 			ret, -errno);
752 
753 		return ret;
754 	}
755 
756 	return fslmc_vfio_connect_container(vfio_group_fd);
757 }
758 
759 static int
760 fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len)
761 {
762 	struct vfio_iommu_type1_dma_map dma_map = {
763 		.argsz = sizeof(struct vfio_iommu_type1_dma_map),
764 		.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
765 	};
766 	int ret, fd, is_io = 0;
767 	const char *group_name = fslmc_vfio_get_group_name();
768 	struct fslmc_dmaseg *dmaseg = NULL;
769 	uint64_t phy = 0;
770 
771 	if (rte_eal_iova_mode() == RTE_IOVA_VA) {
772 		if (vaddr != iovaddr) {
773 			DPAA2_BUS_ERR("IOVA:VA(%" PRIx64 " : %" PRIx64 ") %s",
774 				iovaddr, vaddr,
775 				"should be 1:1 for VA mode");
776 
777 			return -EINVAL;
778 		}
779 	}
780 
781 	phy = rte_mem_virt2phy((const void *)(uintptr_t)vaddr);
782 	if (phy == RTE_BAD_IOVA) {
783 		phy = fslmc_io_virt2phy((const void *)(uintptr_t)vaddr);
784 		if (phy == RTE_BAD_IOVA)
785 			return -ENOMEM;
786 		is_io = 1;
787 	} else if (fslmc_mem_va2iova != RTE_BAD_IOVA &&
788 		fslmc_mem_va2iova != (iovaddr - vaddr)) {
789 		DPAA2_BUS_WARN("Multiple MEM PA<->VA conversions.");
790 	}
791 	DPAA2_BUS_DEBUG("%s(%zu): VA(%" PRIx64 "):IOVA(%" PRIx64 "):PHY(%" PRIx64 ")",
792 		is_io ? "DMA IO map size" : "DMA MEM map size",
793 		len, vaddr, iovaddr, phy);
794 
795 	if (is_io)
796 		goto io_mapping_check;
797 
798 	TAILQ_FOREACH(dmaseg, &fslmc_memsegs, next) {
799 		if (!((vaddr + len) <= dmaseg->vaddr ||
800 			(dmaseg->vaddr + dmaseg->size) <= vaddr)) {
801 			DPAA2_BUS_ERR("MEM: New VA Range(%" PRIx64 " ~ %" PRIx64 ")",
802 				vaddr, vaddr + len);
803 			DPAA2_BUS_ERR("MEM: Overlap with (%" PRIx64 " ~ %" PRIx64 ")",
804 				dmaseg->vaddr,
805 				dmaseg->vaddr + dmaseg->size);
806 			return -EEXIST;
807 		}
808 		if (!((iovaddr + len) <= dmaseg->iova ||
809 			(dmaseg->iova + dmaseg->size) <= iovaddr)) {
810 			DPAA2_BUS_ERR("MEM: New IOVA Range(%" PRIx64 " ~ %" PRIx64 ")",
811 				iovaddr, iovaddr + len);
812 			DPAA2_BUS_ERR("MEM: Overlap with (%" PRIx64 " ~ %" PRIx64 ")",
813 				dmaseg->iova,
814 				dmaseg->iova + dmaseg->size);
815 			return -EEXIST;
816 		}
817 	}
818 	goto start_mapping;
819 
820 io_mapping_check:
821 	TAILQ_FOREACH(dmaseg, &fslmc_iosegs, next) {
822 		if (!((vaddr + len) <= dmaseg->vaddr ||
823 			(dmaseg->vaddr + dmaseg->size) <= vaddr)) {
824 			DPAA2_BUS_ERR("IO: New VA Range (%" PRIx64 " ~ %" PRIx64 ")",
825 				vaddr, vaddr + len);
826 			DPAA2_BUS_ERR("IO: Overlap with (%" PRIx64 " ~ %" PRIx64 ")",
827 				dmaseg->vaddr,
828 				dmaseg->vaddr + dmaseg->size);
829 			return -EEXIST;
830 		}
831 		if (!((iovaddr + len) <= dmaseg->iova ||
832 			(dmaseg->iova + dmaseg->size) <= iovaddr)) {
833 			DPAA2_BUS_ERR("IO: New IOVA Range(%" PRIx64 " ~ %" PRIx64 ")",
834 				iovaddr, iovaddr + len);
835 			DPAA2_BUS_ERR("IO: Overlap with (%" PRIx64 " ~ %" PRIx64 ")",
836 				dmaseg->iova,
837 				dmaseg->iova + dmaseg->size);
838 			return -EEXIST;
839 		}
840 	}
841 
842 start_mapping:
843 	fd = fslmc_vfio_group_fd_by_name(group_name);
844 	if (fd <= 0) {
845 		DPAA2_BUS_ERR("%s: Get fd by name(%s) failed(%d)",
846 			__func__, group_name, fd);
847 		if (fd < 0)
848 			return fd;
849 		return -EIO;
850 	}
851 	if (fslmc_vfio_iommu_type(fd) == RTE_VFIO_NOIOMMU) {
852 		DPAA2_BUS_DEBUG("Running in NOIOMMU mode");
853 		if (phy != iovaddr) {
854 			DPAA2_BUS_ERR("IOVA should support with IOMMU");
855 			return -EIO;
856 		}
857 		goto end_mapping;
858 	}
859 
860 	dma_map.size = len;
861 	dma_map.vaddr = vaddr;
862 	dma_map.iova = iovaddr;
863 
864 	/* SET DMA MAP for IOMMU */
865 	if (!fslmc_vfio_container_connected(fd)) {
866 		DPAA2_BUS_ERR("Container is not connected");
867 		return -EIO;
868 	}
869 
870 	ret = ioctl(fslmc_vfio_container_fd(), VFIO_IOMMU_MAP_DMA,
871 		&dma_map);
872 	if (ret) {
873 		DPAA2_BUS_ERR("%s(%d) VA(%" PRIx64 "):IOVA(%" PRIx64 "):PHY(%" PRIx64 ")",
874 			is_io ? "DMA IO map err" : "DMA MEM map err",
875 			errno, vaddr, iovaddr, phy);
876 		return ret;
877 	}
878 
879 end_mapping:
880 	dmaseg = malloc(sizeof(struct fslmc_dmaseg));
881 	if (!dmaseg) {
882 		DPAA2_BUS_ERR("DMA segment malloc failed!");
883 		return -ENOMEM;
884 	}
885 	dmaseg->vaddr = vaddr;
886 	dmaseg->iova = iovaddr;
887 	dmaseg->size = len;
888 	if (is_io) {
889 		TAILQ_INSERT_TAIL(&fslmc_iosegs, dmaseg, next);
890 	} else {
891 		fslmc_mem_map_num++;
892 		if (fslmc_mem_map_num == 1)
893 			fslmc_mem_va2iova = iovaddr - vaddr;
894 		else
895 			fslmc_mem_va2iova = RTE_BAD_IOVA;
896 		TAILQ_INSERT_TAIL(&fslmc_memsegs, dmaseg, next);
897 	}
898 	DPAA2_BUS_LOG(NOTICE,
899 		"%s(%zx): VA(%" PRIx64 "):IOVA(%" PRIx64 "):PHY(%" PRIx64 ")",
900 		is_io ? "DMA I/O map size" : "DMA MEM map size",
901 		len, vaddr, iovaddr, phy);
902 
903 	return 0;
904 }
905 
906 static int
907 fslmc_unmap_dma(uint64_t vaddr, uint64_t iovaddr, size_t len)
908 {
909 	struct vfio_iommu_type1_dma_unmap dma_unmap = {
910 		.argsz = sizeof(struct vfio_iommu_type1_dma_unmap),
911 		.flags = 0,
912 	};
913 	int ret, fd, is_io = 0;
914 	const char *group_name = fslmc_vfio_get_group_name();
915 	struct fslmc_dmaseg *dmaseg = NULL;
916 
917 	TAILQ_FOREACH(dmaseg, &fslmc_memsegs, next) {
918 		if (((vaddr && dmaseg->vaddr == vaddr) || !vaddr) &&
919 			dmaseg->iova == iovaddr &&
920 			dmaseg->size == len) {
921 			is_io = 0;
922 			break;
923 		}
924 	}
925 
926 	if (!dmaseg) {
927 		TAILQ_FOREACH(dmaseg, &fslmc_iosegs, next) {
928 			if (((vaddr && dmaseg->vaddr == vaddr) || !vaddr) &&
929 				dmaseg->iova == iovaddr &&
930 				dmaseg->size == len) {
931 				is_io = 1;
932 				break;
933 			}
934 		}
935 	}
936 
937 	if (!dmaseg) {
938 		DPAA2_BUS_ERR("IOVA(%" PRIx64 ") with length(%zx) not mapped",
939 			iovaddr, len);
940 		return 0;
941 	}
942 
943 	fd = fslmc_vfio_group_fd_by_name(group_name);
944 	if (fd <= 0) {
945 		DPAA2_BUS_ERR("%s: Get fd by name(%s) failed(%d)",
946 			__func__, group_name, fd);
947 		if (fd < 0)
948 			return fd;
949 		return -EIO;
950 	}
951 	if (fslmc_vfio_iommu_type(fd) == RTE_VFIO_NOIOMMU) {
952 		DPAA2_BUS_DEBUG("Running in NOIOMMU mode");
953 		return 0;
954 	}
955 
956 	dma_unmap.size = len;
957 	dma_unmap.iova = iovaddr;
958 
959 	/* SET DMA MAP for IOMMU */
960 	if (!fslmc_vfio_container_connected(fd)) {
961 		DPAA2_BUS_ERR("Container is not connected ");
962 		return -EIO;
963 	}
964 
965 	ret = ioctl(fslmc_vfio_container_fd(), VFIO_IOMMU_UNMAP_DMA,
966 		&dma_unmap);
967 	if (ret) {
968 		DPAA2_BUS_ERR("DMA un-map IOVA(%" PRIx64 " ~ %" PRIx64 ") err(%d)",
969 			iovaddr, iovaddr + len, errno);
970 		return ret;
971 	}
972 
973 	if (is_io) {
974 		TAILQ_REMOVE(&fslmc_iosegs, dmaseg, next);
975 	} else {
976 		TAILQ_REMOVE(&fslmc_memsegs, dmaseg, next);
977 		fslmc_mem_map_num--;
978 		if (TAILQ_EMPTY(&fslmc_memsegs))
979 			fslmc_mem_va2iova = RTE_BAD_IOVA;
980 	}
981 
982 	free(dmaseg);
983 
984 	return 0;
985 }
986 
987 uint64_t
988 rte_fslmc_cold_mem_vaddr_to_iova(void *vaddr,
989 	uint64_t size)
990 {
991 	struct fslmc_dmaseg *dmaseg;
992 	uint64_t va;
993 
994 	va = (uint64_t)vaddr;
995 	TAILQ_FOREACH(dmaseg, &fslmc_memsegs, next) {
996 		if (va >= dmaseg->vaddr &&
997 			(va + size) < (dmaseg->vaddr + dmaseg->size)) {
998 			return dmaseg->iova + va - dmaseg->vaddr;
999 		}
1000 	}
1001 
1002 	return RTE_BAD_IOVA;
1003 }
1004 
1005 void *
1006 rte_fslmc_cold_mem_iova_to_vaddr(uint64_t iova,
1007 	uint64_t size)
1008 {
1009 	struct fslmc_dmaseg *dmaseg;
1010 
1011 	TAILQ_FOREACH(dmaseg, &fslmc_memsegs, next) {
1012 		if (iova >= dmaseg->iova &&
1013 			(iova + size) < (dmaseg->iova + dmaseg->size))
1014 			return (void *)((uintptr_t)dmaseg->vaddr
1015 				+ (uintptr_t)(iova - dmaseg->iova));
1016 	}
1017 
1018 	return NULL;
1019 }
1020 
1021 __rte_hot uint64_t
1022 rte_fslmc_mem_vaddr_to_iova(void *vaddr)
1023 {
1024 	if (likely(fslmc_mem_va2iova != RTE_BAD_IOVA))
1025 		return (uint64_t)vaddr + fslmc_mem_va2iova;
1026 
1027 	return rte_fslmc_cold_mem_vaddr_to_iova(vaddr, 0);
1028 }
1029 
1030 __rte_hot void *
1031 rte_fslmc_mem_iova_to_vaddr(uint64_t iova)
1032 {
1033 	if (likely(fslmc_mem_va2iova != RTE_BAD_IOVA))
1034 		return (void *)((uintptr_t)iova - (uintptr_t)fslmc_mem_va2iova);
1035 
1036 	return rte_fslmc_cold_mem_iova_to_vaddr(iova, 0);
1037 }
1038 
1039 uint64_t
1040 rte_fslmc_io_vaddr_to_iova(void *vaddr)
1041 {
1042 	struct fslmc_dmaseg *dmaseg = NULL;
1043 	uint64_t va = (uint64_t)vaddr;
1044 
1045 	TAILQ_FOREACH(dmaseg, &fslmc_iosegs, next) {
1046 		if ((va >= dmaseg->vaddr) &&
1047 			va < dmaseg->vaddr + dmaseg->size)
1048 			return dmaseg->iova + va - dmaseg->vaddr;
1049 	}
1050 
1051 	return RTE_BAD_IOVA;
1052 }
1053 
1054 void *
1055 rte_fslmc_io_iova_to_vaddr(uint64_t iova)
1056 {
1057 	struct fslmc_dmaseg *dmaseg = NULL;
1058 
1059 	TAILQ_FOREACH(dmaseg, &fslmc_iosegs, next) {
1060 		if ((iova >= dmaseg->iova) &&
1061 			iova < dmaseg->iova + dmaseg->size)
1062 			return (void *)((uintptr_t)dmaseg->vaddr
1063 				+ (uintptr_t)(iova - dmaseg->iova));
1064 	}
1065 
1066 	return NULL;
1067 }
1068 
1069 static void
1070 fslmc_memevent_cb(enum rte_mem_event type, const void *addr,
1071 	size_t len, void *arg __rte_unused)
1072 {
1073 	struct rte_memseg_list *msl;
1074 	struct rte_memseg *ms;
1075 	size_t cur_len = 0, map_len = 0;
1076 	uint64_t virt_addr;
1077 	rte_iova_t iova_addr;
1078 	int ret;
1079 
1080 	msl = rte_mem_virt2memseg_list(addr);
1081 
1082 	while (cur_len < len) {
1083 		const void *va = RTE_PTR_ADD(addr, cur_len);
1084 
1085 		ms = rte_mem_virt2memseg(va, msl);
1086 		iova_addr = ms->iova;
1087 		virt_addr = ms->addr_64;
1088 		map_len = ms->len;
1089 
1090 		DPAA2_BUS_DEBUG("%s, va=%p, virt=%" PRIx64 ", iova=%" PRIx64 ", len=%zu",
1091 			type == RTE_MEM_EVENT_ALLOC ? "alloc" : "dealloc",
1092 			va, virt_addr, iova_addr, map_len);
1093 
1094 		/* iova_addr may be set to RTE_BAD_IOVA */
1095 		if (iova_addr == RTE_BAD_IOVA) {
1096 			DPAA2_BUS_DEBUG("Segment has invalid iova, skipping");
1097 			cur_len += map_len;
1098 			continue;
1099 		}
1100 
1101 		if (type == RTE_MEM_EVENT_ALLOC)
1102 			ret = fslmc_map_dma(virt_addr, iova_addr, map_len);
1103 		else
1104 			ret = fslmc_unmap_dma(virt_addr, iova_addr, map_len);
1105 
1106 		if (ret != 0) {
1107 			DPAA2_BUS_ERR("%s: Map=%d, addr=%p, len=%zu, err:(%d)",
1108 				type == RTE_MEM_EVENT_ALLOC ?
1109 				"DMA Mapping failed. " :
1110 				"DMA Unmapping failed. ",
1111 				type, va, map_len, ret);
1112 			return;
1113 		}
1114 
1115 		cur_len += map_len;
1116 	}
1117 
1118 	DPAA2_BUS_DEBUG("Total %s: addr=%p, len=%zu",
1119 		type == RTE_MEM_EVENT_ALLOC ? "Mapped" : "Unmapped",
1120 		addr, len);
1121 }
1122 
1123 static int
1124 fslmc_dmamap_seg(const struct rte_memseg_list *msl __rte_unused,
1125 		const struct rte_memseg *ms, void *arg)
1126 {
1127 	int *n_segs = arg;
1128 	int ret;
1129 
1130 	/* if IOVA address is invalid, skip */
1131 	if (ms->iova == RTE_BAD_IOVA)
1132 		return 0;
1133 
1134 	ret = fslmc_map_dma(ms->addr_64, ms->iova, ms->len);
1135 	if (ret)
1136 		DPAA2_BUS_ERR("Unable to VFIO map (addr=%p, len=%zu)",
1137 				ms->addr, ms->len);
1138 	else
1139 		(*n_segs)++;
1140 
1141 	return ret;
1142 }
1143 
1144 int
1145 rte_fslmc_vfio_mem_dmamap(uint64_t vaddr, uint64_t iova, uint64_t size)
1146 {
1147 	return fslmc_map_dma(vaddr, iova, size);
1148 }
1149 
1150 int
1151 rte_fslmc_vfio_mem_dmaunmap(uint64_t iova, uint64_t size)
1152 {
1153 	return fslmc_unmap_dma(0, iova, size);
1154 }
1155 
1156 int
1157 fslmc_vfio_dmamap(void)
1158 {
1159 	int i = 0, ret;
1160 
1161 	/* Lock before parsing and registering callback to memory subsystem */
1162 	rte_mcfg_mem_read_lock();
1163 
1164 	ret = rte_memseg_walk(fslmc_dmamap_seg, &i);
1165 	if (ret) {
1166 		rte_mcfg_mem_read_unlock();
1167 		return ret;
1168 	}
1169 
1170 	ret = rte_mem_event_callback_register("fslmc_memevent_clb",
1171 			fslmc_memevent_cb, NULL);
1172 	if (ret && rte_errno == ENOTSUP)
1173 		DPAA2_BUS_DEBUG("Memory event callbacks not supported");
1174 	else if (ret)
1175 		DPAA2_BUS_DEBUG("Unable to install memory handler");
1176 	else
1177 		DPAA2_BUS_DEBUG("Installed memory callback handler");
1178 
1179 	DPAA2_BUS_DEBUG("Total %d segments found.", i);
1180 
1181 	/* Existing segments have been mapped and memory callback for hotplug
1182 	 * has been installed.
1183 	 */
1184 	rte_mcfg_mem_read_unlock();
1185 
1186 	return 0;
1187 }
1188 
1189 static int
1190 fslmc_vfio_setup_device(const char *dev_addr,
1191 	int *vfio_dev_fd, struct vfio_device_info *device_info)
1192 {
1193 	struct vfio_group_status group_status = {
1194 			.argsz = sizeof(group_status)
1195 	};
1196 	int vfio_group_fd, ret;
1197 	const char *group_name = fslmc_vfio_get_group_name();
1198 
1199 	vfio_group_fd = fslmc_vfio_group_fd_by_name(group_name);
1200 	if (vfio_group_fd <= 0) {
1201 		DPAA2_BUS_ERR("%s: Get fd by name(%s) failed(%d)",
1202 			__func__, group_name, vfio_group_fd);
1203 		if (vfio_group_fd < 0)
1204 			return vfio_group_fd;
1205 		return -EIO;
1206 	}
1207 
1208 	if (!fslmc_vfio_container_connected(vfio_group_fd)) {
1209 		DPAA2_BUS_ERR("Container is not connected");
1210 		return -EIO;
1211 	}
1212 
1213 	/* get a file descriptor for the device */
1214 	*vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr);
1215 	if (*vfio_dev_fd < 0) {
1216 		/* if we cannot get a device fd, this implies a problem with
1217 		 * the VFIO group or the container not having IOMMU configured.
1218 		 */
1219 
1220 		DPAA2_BUS_ERR("Getting a vfio_dev_fd for %s from %s failed",
1221 			dev_addr, group_name);
1222 		return -EIO;
1223 	}
1224 
1225 	/* test and setup the device */
1226 	ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO, device_info);
1227 	if (ret) {
1228 		DPAA2_BUS_ERR("%s cannot get device info err(%d)(%s)",
1229 			dev_addr, errno, strerror(errno));
1230 		return ret;
1231 	}
1232 
1233 	return fslmc_vfio_group_add_dev(vfio_group_fd, *vfio_dev_fd,
1234 			dev_addr);
1235 }
1236 
1237 static intptr_t vfio_map_mcp_obj(const char *mcp_obj)
1238 {
1239 	intptr_t v_addr = (intptr_t)MAP_FAILED;
1240 	int32_t ret, mc_fd;
1241 	struct vfio_group_status status = { .argsz = sizeof(status) };
1242 
1243 	struct vfio_device_info d_info = { .argsz = sizeof(d_info) };
1244 	struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
1245 
1246 	fslmc_vfio_setup_device(mcp_obj, &mc_fd, &d_info);
1247 
1248 	/* getting device region info*/
1249 	ret = ioctl(mc_fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
1250 	if (ret < 0) {
1251 		DPAA2_BUS_ERR("Error in VFIO getting REGION_INFO");
1252 		goto MC_FAILURE;
1253 	}
1254 
1255 	v_addr = (size_t)mmap(NULL, reg_info.size,
1256 		PROT_WRITE | PROT_READ, MAP_SHARED,
1257 		mc_fd, reg_info.offset);
1258 
1259 MC_FAILURE:
1260 	close(mc_fd);
1261 
1262 	return v_addr;
1263 }
1264 
1265 #define IRQ_SET_BUF_LEN  (sizeof(struct vfio_irq_set) + sizeof(int))
1266 
1267 int rte_dpaa2_intr_enable(struct rte_intr_handle *intr_handle, int index)
1268 {
1269 	int len, ret;
1270 	char irq_set_buf[IRQ_SET_BUF_LEN];
1271 	struct vfio_irq_set *irq_set;
1272 	int *fd_ptr, vfio_dev_fd;
1273 
1274 	len = sizeof(irq_set_buf);
1275 
1276 	irq_set = (struct vfio_irq_set *)irq_set_buf;
1277 	irq_set->argsz = len;
1278 	irq_set->count = 1;
1279 	irq_set->flags =
1280 		VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
1281 	irq_set->index = index;
1282 	irq_set->start = 0;
1283 	fd_ptr = (int *)&irq_set->data;
1284 	*fd_ptr = rte_intr_fd_get(intr_handle);
1285 
1286 	vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
1287 	ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
1288 	if (ret) {
1289 		DPAA2_BUS_ERR("Error:dpaa2 SET IRQs fd=%d, err = %d(%s)",
1290 			      rte_intr_fd_get(intr_handle), errno,
1291 			      strerror(errno));
1292 		return ret;
1293 	}
1294 
1295 	return ret;
1296 }
1297 
1298 int rte_dpaa2_intr_disable(struct rte_intr_handle *intr_handle, int index)
1299 {
1300 	struct vfio_irq_set *irq_set;
1301 	char irq_set_buf[IRQ_SET_BUF_LEN];
1302 	int len, ret, vfio_dev_fd;
1303 
1304 	len = sizeof(struct vfio_irq_set);
1305 
1306 	irq_set = (struct vfio_irq_set *)irq_set_buf;
1307 	irq_set->argsz = len;
1308 	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
1309 	irq_set->index = index;
1310 	irq_set->start = 0;
1311 	irq_set->count = 0;
1312 
1313 	vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
1314 	ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
1315 	if (ret)
1316 		DPAA2_BUS_ERR("Error disabling dpaa2 interrupts for fd %d",
1317 			rte_intr_fd_get(intr_handle));
1318 
1319 	return ret;
1320 }
1321 
1322 /* set up interrupt support (but not enable interrupts) */
1323 int
1324 rte_dpaa2_vfio_setup_intr(struct rte_intr_handle *intr_handle,
1325 			  int vfio_dev_fd,
1326 			  int num_irqs)
1327 {
1328 	int i, ret;
1329 
1330 	/* start from MSI-X interrupt type */
1331 	for (i = 0; i < num_irqs; i++) {
1332 		struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
1333 		int fd = -1;
1334 
1335 		irq_info.index = i;
1336 
1337 		ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
1338 		if (ret < 0) {
1339 			DPAA2_BUS_ERR("Cannot get IRQ(%d) info, error %i (%s)",
1340 				      i, errno, strerror(errno));
1341 			return ret;
1342 		}
1343 
1344 		/* if this vector cannot be used with eventfd,
1345 		 * fail if we explicitly
1346 		 * specified interrupt type, otherwise continue
1347 		 */
1348 		if ((irq_info.flags & VFIO_IRQ_INFO_EVENTFD) == 0)
1349 			continue;
1350 
1351 		/* set up an eventfd for interrupts */
1352 		fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1353 		if (fd < 0) {
1354 			DPAA2_BUS_ERR("Cannot set up eventfd, error %i (%s)",
1355 				errno, strerror(errno));
1356 			return fd;
1357 		}
1358 
1359 		if (rte_intr_fd_set(intr_handle, fd))
1360 			return -rte_errno;
1361 
1362 		if (rte_intr_type_set(intr_handle, RTE_INTR_HANDLE_VFIO_MSI))
1363 			return -rte_errno;
1364 
1365 		if (rte_intr_dev_fd_set(intr_handle, vfio_dev_fd))
1366 			return -rte_errno;
1367 
1368 		return 0;
1369 	}
1370 
1371 	/* if we're here, we haven't found a suitable interrupt vector */
1372 	return -EIO;
1373 }
1374 
1375 static void
1376 fslmc_close_iodevices(struct rte_dpaa2_device *dev,
1377 	int vfio_fd)
1378 {
1379 	struct rte_dpaa2_object *object = NULL;
1380 	struct rte_dpaa2_driver *drv;
1381 	int ret, probe_all;
1382 
1383 	switch (dev->dev_type) {
1384 	case DPAA2_IO:
1385 	case DPAA2_CON:
1386 	case DPAA2_CI:
1387 	case DPAA2_BPOOL:
1388 	case DPAA2_MUX:
1389 		TAILQ_FOREACH(object, &dpaa2_obj_list, next) {
1390 			if (dev->dev_type == object->dev_type)
1391 				object->close(dev->object_id);
1392 			else
1393 				continue;
1394 		}
1395 		break;
1396 	case DPAA2_ETH:
1397 	case DPAA2_CRYPTO:
1398 	case DPAA2_QDMA:
1399 		probe_all = rte_fslmc_bus.bus.conf.scan_mode !=
1400 			    RTE_BUS_SCAN_ALLOWLIST;
1401 		TAILQ_FOREACH(drv, &rte_fslmc_bus.driver_list, next) {
1402 			if (drv->drv_type != dev->dev_type)
1403 				continue;
1404 			if (rte_dev_is_probed(&dev->device))
1405 				continue;
1406 			if (probe_all ||
1407 			    (dev->device.devargs &&
1408 			     dev->device.devargs->policy ==
1409 			     RTE_DEV_ALLOWED)) {
1410 				ret = drv->remove(dev);
1411 				if (ret)
1412 					DPAA2_BUS_ERR("Unable to remove");
1413 			}
1414 		}
1415 		break;
1416 	default:
1417 		break;
1418 	}
1419 
1420 	ret = fslmc_vfio_group_remove_dev(vfio_fd, dev->device.name);
1421 	if (ret) {
1422 		DPAA2_BUS_ERR("Failed to remove %s from vfio",
1423 			dev->device.name);
1424 	}
1425 	DPAA2_BUS_LOG(DEBUG, "Device (%s) Closed",
1426 		      dev->device.name);
1427 }
1428 
1429 /*
1430  * fslmc_process_iodevices for processing only IO (ETH, CRYPTO, and possibly
1431  * EVENT) devices.
1432  */
1433 static int
1434 fslmc_process_iodevices(struct rte_dpaa2_device *dev)
1435 {
1436 	int dev_fd, ret;
1437 	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
1438 	struct rte_dpaa2_object *object = NULL;
1439 
1440 	ret = fslmc_vfio_setup_device(dev->device.name, &dev_fd,
1441 			&device_info);
1442 	if (ret)
1443 		return ret;
1444 
1445 	switch (dev->dev_type) {
1446 	case DPAA2_ETH:
1447 		ret = rte_dpaa2_vfio_setup_intr(dev->intr_handle, dev_fd,
1448 				device_info.num_irqs);
1449 		if (ret)
1450 			return ret;
1451 		break;
1452 	case DPAA2_CON:
1453 	case DPAA2_IO:
1454 	case DPAA2_CI:
1455 	case DPAA2_BPOOL:
1456 	case DPAA2_DPRTC:
1457 	case DPAA2_MUX:
1458 	case DPAA2_DPRC:
1459 		TAILQ_FOREACH(object, &dpaa2_obj_list, next) {
1460 			if (dev->dev_type == object->dev_type)
1461 				object->create(dev_fd, &device_info, dev);
1462 			else
1463 				continue;
1464 		}
1465 		break;
1466 	default:
1467 		break;
1468 	}
1469 
1470 	DPAA2_BUS_LOG(DEBUG, "Device (%s) abstracted from VFIO",
1471 		      dev->device.name);
1472 	return 0;
1473 }
1474 
1475 static int
1476 fslmc_process_mcp(struct rte_dpaa2_device *dev)
1477 {
1478 	int ret;
1479 	intptr_t v_addr;
1480 	struct fsl_mc_io dpmng  = {0};
1481 	struct mc_version mc_ver_info = {0};
1482 
1483 	rte_mcp_ptr_list = malloc(sizeof(void *) * (MC_PORTAL_INDEX + 1));
1484 	if (!rte_mcp_ptr_list) {
1485 		DPAA2_BUS_ERR("Unable to allocate MC portal memory");
1486 		ret = -ENOMEM;
1487 		goto cleanup;
1488 	}
1489 
1490 	v_addr = vfio_map_mcp_obj(dev->device.name);
1491 	if (v_addr == (intptr_t)MAP_FAILED) {
1492 		DPAA2_BUS_ERR("Error mapping region (errno = %d)", errno);
1493 		ret = -1;
1494 		goto cleanup;
1495 	}
1496 
1497 	/* check the MC version compatibility */
1498 	dpmng.regs = (void *)v_addr;
1499 
1500 	/* In case of secondary processes, MC version check is no longer
1501 	 * required.
1502 	 */
1503 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1504 		rte_mcp_ptr_list[MC_PORTAL_INDEX] = (void *)v_addr;
1505 		return 0;
1506 	}
1507 
1508 	if (mc_get_version(&dpmng, CMD_PRI_LOW, &mc_ver_info)) {
1509 		DPAA2_BUS_ERR("Unable to obtain MC version");
1510 		ret = -1;
1511 		goto cleanup;
1512 	}
1513 
1514 	if ((mc_ver_info.major != MC_VER_MAJOR) ||
1515 	    (mc_ver_info.minor < MC_VER_MINOR)) {
1516 		DPAA2_BUS_ERR("DPAA2 MC version not compatible!"
1517 			      " Expected %d.%d.x, Detected %d.%d.%d",
1518 			      MC_VER_MAJOR, MC_VER_MINOR,
1519 			      mc_ver_info.major, mc_ver_info.minor,
1520 			      mc_ver_info.revision);
1521 		ret = -1;
1522 		goto cleanup;
1523 	}
1524 	rte_mcp_ptr_list[MC_PORTAL_INDEX] = (void *)v_addr;
1525 
1526 	return 0;
1527 
1528 cleanup:
1529 	if (rte_mcp_ptr_list) {
1530 		free(rte_mcp_ptr_list);
1531 		rte_mcp_ptr_list = NULL;
1532 	}
1533 
1534 	return ret;
1535 }
1536 
1537 int
1538 fslmc_vfio_close_group(void)
1539 {
1540 	struct rte_dpaa2_device *dev, *dev_temp;
1541 	int vfio_group_fd;
1542 	const char *group_name = fslmc_vfio_get_group_name();
1543 
1544 	vfio_group_fd = fslmc_vfio_group_fd_by_name(group_name);
1545 	if (vfio_group_fd <= 0) {
1546 		DPAA2_BUS_INFO("%s: Get fd by name(%s) failed(%d)",
1547 			__func__, group_name, vfio_group_fd);
1548 		if (vfio_group_fd < 0)
1549 			return vfio_group_fd;
1550 		return -EIO;
1551 	}
1552 
1553 	RTE_TAILQ_FOREACH_SAFE(dev, &rte_fslmc_bus.device_list, next, dev_temp) {
1554 		if (dev->device.devargs &&
1555 		    dev->device.devargs->policy == RTE_DEV_BLOCKED) {
1556 			DPAA2_BUS_LOG(DEBUG, "%s Blacklisted, skipping",
1557 				      dev->device.name);
1558 			TAILQ_REMOVE(&rte_fslmc_bus.device_list, dev, next);
1559 				continue;
1560 		}
1561 		switch (dev->dev_type) {
1562 		case DPAA2_ETH:
1563 		case DPAA2_CRYPTO:
1564 		case DPAA2_QDMA:
1565 		case DPAA2_IO:
1566 			fslmc_close_iodevices(dev, vfio_group_fd);
1567 			break;
1568 		case DPAA2_CON:
1569 		case DPAA2_CI:
1570 		case DPAA2_BPOOL:
1571 		case DPAA2_MUX:
1572 			if (rte_eal_process_type() == RTE_PROC_SECONDARY)
1573 				continue;
1574 
1575 			fslmc_close_iodevices(dev, vfio_group_fd);
1576 			break;
1577 		case DPAA2_DPRTC:
1578 		default:
1579 			DPAA2_BUS_DEBUG("Device cannot be closed: Not supported (%s)",
1580 					dev->device.name);
1581 		}
1582 	}
1583 
1584 	fslmc_vfio_clear_group(vfio_group_fd);
1585 
1586 	return 0;
1587 }
1588 
1589 int
1590 fslmc_vfio_process_group(void)
1591 {
1592 	int ret;
1593 	int found_mportal = 0;
1594 	struct rte_dpaa2_device *dev, *dev_temp;
1595 	bool is_dpmcp_in_blocklist = false, is_dpio_in_blocklist = false;
1596 	int dpmcp_count = 0, dpio_count = 0, current_device;
1597 
1598 	RTE_TAILQ_FOREACH_SAFE(dev, &rte_fslmc_bus.device_list, next,
1599 		dev_temp) {
1600 		if (dev->dev_type == DPAA2_MPORTAL) {
1601 			dpmcp_count++;
1602 			if (dev->device.devargs &&
1603 			    dev->device.devargs->policy == RTE_DEV_BLOCKED)
1604 				is_dpmcp_in_blocklist = true;
1605 		}
1606 		if (dev->dev_type == DPAA2_IO) {
1607 			dpio_count++;
1608 			if (dev->device.devargs &&
1609 			    dev->device.devargs->policy == RTE_DEV_BLOCKED)
1610 				is_dpio_in_blocklist = true;
1611 		}
1612 	}
1613 
1614 	/* Search the MCP as that should be initialized first. */
1615 	current_device = 0;
1616 	RTE_TAILQ_FOREACH_SAFE(dev, &rte_fslmc_bus.device_list, next,
1617 		dev_temp) {
1618 		if (dev->dev_type == DPAA2_MPORTAL) {
1619 			current_device++;
1620 			if (dev->device.devargs &&
1621 			    dev->device.devargs->policy == RTE_DEV_BLOCKED) {
1622 				DPAA2_BUS_LOG(DEBUG, "%s Blocked, skipping",
1623 					      dev->device.name);
1624 				TAILQ_REMOVE(&rte_fslmc_bus.device_list,
1625 						dev, next);
1626 				continue;
1627 			}
1628 
1629 			if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
1630 			    !is_dpmcp_in_blocklist) {
1631 				if (dpmcp_count == 1 ||
1632 				    current_device != dpmcp_count) {
1633 					TAILQ_REMOVE(&rte_fslmc_bus.device_list,
1634 						     dev, next);
1635 					continue;
1636 				}
1637 			}
1638 
1639 			if (!found_mportal) {
1640 				ret = fslmc_process_mcp(dev);
1641 				if (ret) {
1642 					DPAA2_BUS_ERR("Unable to map MC Portal");
1643 					return ret;
1644 				}
1645 				found_mportal = 1;
1646 			}
1647 
1648 			TAILQ_REMOVE(&rte_fslmc_bus.device_list, dev, next);
1649 			free(dev);
1650 			dev = NULL;
1651 			/* Ideally there is only a single dpmcp, but in case
1652 			 * multiple exists, looping on remaining devices.
1653 			 */
1654 		}
1655 	}
1656 
1657 	/* Cannot continue if there is not even a single mportal */
1658 	if (!found_mportal) {
1659 		DPAA2_BUS_ERR("No MC Portal device found. Not continuing");
1660 		return -EIO;
1661 	}
1662 
1663 	/* Search for DPRC device next as it updates endpoint of
1664 	 * other devices.
1665 	 */
1666 	current_device = 0;
1667 	RTE_TAILQ_FOREACH_SAFE(dev, &rte_fslmc_bus.device_list, next, dev_temp) {
1668 		if (dev->dev_type == DPAA2_DPRC) {
1669 			ret = fslmc_process_iodevices(dev);
1670 			if (ret) {
1671 				DPAA2_BUS_ERR("Unable to process dprc");
1672 				return ret;
1673 			}
1674 			TAILQ_REMOVE(&rte_fslmc_bus.device_list, dev, next);
1675 		}
1676 	}
1677 
1678 	current_device = 0;
1679 	RTE_TAILQ_FOREACH_SAFE(dev, &rte_fslmc_bus.device_list, next,
1680 		dev_temp) {
1681 		if (dev->dev_type == DPAA2_IO)
1682 			current_device++;
1683 		if (dev->device.devargs &&
1684 		    dev->device.devargs->policy == RTE_DEV_BLOCKED) {
1685 			DPAA2_BUS_LOG(DEBUG, "%s Blocked, skipping",
1686 				      dev->device.name);
1687 			TAILQ_REMOVE(&rte_fslmc_bus.device_list, dev, next);
1688 			continue;
1689 		}
1690 		if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
1691 		    dev->dev_type != DPAA2_ETH &&
1692 		    dev->dev_type != DPAA2_CRYPTO &&
1693 		    dev->dev_type != DPAA2_QDMA &&
1694 		    dev->dev_type != DPAA2_IO) {
1695 			TAILQ_REMOVE(&rte_fslmc_bus.device_list, dev, next);
1696 			continue;
1697 		}
1698 		switch (dev->dev_type) {
1699 		case DPAA2_ETH:
1700 		case DPAA2_CRYPTO:
1701 		case DPAA2_QDMA:
1702 			ret = fslmc_process_iodevices(dev);
1703 			if (ret) {
1704 				DPAA2_BUS_DEBUG("Dev (%s) init failed",
1705 						dev->device.name);
1706 				return ret;
1707 			}
1708 			break;
1709 		case DPAA2_CON:
1710 		case DPAA2_CI:
1711 		case DPAA2_BPOOL:
1712 		case DPAA2_DPRTC:
1713 		case DPAA2_MUX:
1714 			/* IN case of secondary processes, all control objects
1715 			 * like dpbp, dpcon, dpci are not initialized/required
1716 			 * - all of these are assumed to be initialized and made
1717 			 *   available by primary.
1718 			 */
1719 			if (rte_eal_process_type() == RTE_PROC_SECONDARY)
1720 				continue;
1721 
1722 			/* Call the object creation routine and remove the
1723 			 * device entry from device list
1724 			 */
1725 			ret = fslmc_process_iodevices(dev);
1726 			if (ret) {
1727 				DPAA2_BUS_DEBUG("Dev (%s) init failed",
1728 						dev->device.name);
1729 				return ret;
1730 			}
1731 
1732 			break;
1733 		case DPAA2_IO:
1734 			if (!is_dpio_in_blocklist && dpio_count > 1) {
1735 				if (rte_eal_process_type() == RTE_PROC_SECONDARY
1736 				    && current_device != dpio_count) {
1737 					TAILQ_REMOVE(&rte_fslmc_bus.device_list,
1738 						     dev, next);
1739 					break;
1740 				}
1741 				if (rte_eal_process_type() == RTE_PROC_PRIMARY
1742 				    && current_device == dpio_count) {
1743 					TAILQ_REMOVE(&rte_fslmc_bus.device_list,
1744 						     dev, next);
1745 					break;
1746 				}
1747 			}
1748 
1749 			ret = fslmc_process_iodevices(dev);
1750 			if (ret) {
1751 				DPAA2_BUS_DEBUG("Dev (%s) init failed",
1752 						dev->device.name);
1753 				return ret;
1754 			}
1755 
1756 			break;
1757 		case DPAA2_UNKNOWN:
1758 		default:
1759 			/* Unknown - ignore */
1760 			DPAA2_BUS_DEBUG("Found unknown device (%s)",
1761 					dev->device.name);
1762 			TAILQ_REMOVE(&rte_fslmc_bus.device_list, dev, next);
1763 			free(dev);
1764 			dev = NULL;
1765 		}
1766 	}
1767 
1768 	return 0;
1769 }
1770 
1771 int
1772 fslmc_vfio_setup_group(void)
1773 {
1774 	int vfio_group_fd, vfio_container_fd, ret;
1775 	struct vfio_group_status status = { .argsz = sizeof(status) };
1776 	const char *group_name = fslmc_vfio_get_group_name();
1777 
1778 	/* MC VFIO setup entry */
1779 	vfio_container_fd = fslmc_vfio_container_fd();
1780 	if (vfio_container_fd <= 0) {
1781 		vfio_container_fd = fslmc_vfio_open_container_fd();
1782 		if (vfio_container_fd < 0) {
1783 			DPAA2_BUS_ERR("Failed to create MC VFIO container");
1784 			return vfio_container_fd;
1785 		}
1786 	}
1787 
1788 	if (!group_name) {
1789 		DPAA2_BUS_DEBUG("DPAA2: DPRC not available");
1790 		return -EINVAL;
1791 	}
1792 
1793 	vfio_group_fd = fslmc_vfio_group_fd_by_name(group_name);
1794 	if (vfio_group_fd < 0) {
1795 		vfio_group_fd = fslmc_vfio_open_group_fd(group_name);
1796 		if (vfio_group_fd < 0) {
1797 			DPAA2_BUS_ERR("open group name(%s) failed(%d)",
1798 				group_name, vfio_group_fd);
1799 			return -rte_errno;
1800 		}
1801 	}
1802 
1803 	/* Check group viability */
1804 	ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &status);
1805 	if (ret) {
1806 		DPAA2_BUS_ERR("VFIO(%s:fd=%d) error getting group status(%d)",
1807 			group_name, vfio_group_fd, ret);
1808 		fslmc_vfio_clear_group(vfio_group_fd);
1809 		return ret;
1810 	}
1811 
1812 	if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
1813 		DPAA2_BUS_ERR("VFIO group not viable");
1814 		fslmc_vfio_clear_group(vfio_group_fd);
1815 		return -EPERM;
1816 	}
1817 
1818 	/* check if group does not have a container yet */
1819 	if (!(status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
1820 		/* Now connect this IOMMU group to given container */
1821 		ret = vfio_connect_container(vfio_container_fd,
1822 			vfio_group_fd);
1823 	} else {
1824 		/* Here is supposed in secondary process,
1825 		 * group has been set to container in primary process.
1826 		 */
1827 		if (rte_eal_process_type() == RTE_PROC_PRIMARY)
1828 			DPAA2_BUS_WARN("This group has been set container?");
1829 		ret = fslmc_vfio_connect_container(vfio_group_fd);
1830 	}
1831 	if (ret) {
1832 		DPAA2_BUS_ERR("vfio group connect failed(%d)", ret);
1833 		fslmc_vfio_clear_group(vfio_group_fd);
1834 		return ret;
1835 	}
1836 
1837 	/* Get Device information */
1838 	ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, group_name);
1839 	if (ret < 0) {
1840 		DPAA2_BUS_ERR("Error getting device %s fd", group_name);
1841 		fslmc_vfio_clear_group(vfio_group_fd);
1842 		return ret;
1843 	}
1844 
1845 	ret = fslmc_vfio_mp_sync_setup();
1846 	if (ret) {
1847 		DPAA2_BUS_ERR("VFIO MP sync setup failed!");
1848 		fslmc_vfio_clear_group(vfio_group_fd);
1849 		return ret;
1850 	}
1851 
1852 	DPAA2_BUS_DEBUG("VFIO GROUP FD is %d", vfio_group_fd);
1853 
1854 	return 0;
1855 }
1856