xref: /dpdk/drivers/bus/pci/linux/pci_vfio.c (revision fd8c20aab4c2fe2c568455be4efab76db126791f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <string.h>
6 #include <fcntl.h>
7 #include <linux/pci_regs.h>
8 #include <sys/eventfd.h>
9 #include <sys/socket.h>
10 #include <sys/ioctl.h>
11 #include <sys/mman.h>
12 #include <stdbool.h>
13 
14 #include <rte_log.h>
15 #include <rte_pci.h>
16 #include <rte_bus_pci.h>
17 #include <rte_eal_memconfig.h>
18 #include <rte_malloc.h>
19 #include <rte_vfio.h>
20 
21 #include "eal_filesystem.h"
22 
23 #include "pci_init.h"
24 #include "private.h"
25 
26 /**
27  * @file
28  * PCI probing under linux (VFIO version)
29  *
30  * This code tries to determine if the PCI device is bound to VFIO driver,
31  * and initialize it (map BARs, set up interrupts) if that's the case.
32  *
33  * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y".
34  */
35 
36 #ifdef VFIO_PRESENT
37 
38 #define PAGE_SIZE   (sysconf(_SC_PAGESIZE))
39 #define PAGE_MASK   (~(PAGE_SIZE - 1))
40 
41 static struct rte_tailq_elem rte_vfio_tailq = {
42 	.name = "VFIO_RESOURCE_LIST",
43 };
44 EAL_REGISTER_TAILQ(rte_vfio_tailq)
45 
46 int
47 pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
48 		    void *buf, size_t len, off_t offs)
49 {
50 	return pread64(intr_handle->vfio_dev_fd, buf, len,
51 	       VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
52 }
53 
54 int
55 pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
56 		    const void *buf, size_t len, off_t offs)
57 {
58 	return pwrite64(intr_handle->vfio_dev_fd, buf, len,
59 	       VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
60 }
61 
62 /* get PCI BAR number where MSI-X interrupts are */
63 static int
64 pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
65 {
66 	int ret;
67 	uint32_t reg;
68 	uint16_t flags;
69 	uint8_t cap_id, cap_offset;
70 
71 	/* read PCI capability pointer from config space */
72 	ret = pread64(fd, &reg, sizeof(reg),
73 			VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
74 			PCI_CAPABILITY_LIST);
75 	if (ret != sizeof(reg)) {
76 		RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "
77 				"config space!\n");
78 		return -1;
79 	}
80 
81 	/* we need first byte */
82 	cap_offset = reg & 0xFF;
83 
84 	while (cap_offset) {
85 
86 		/* read PCI capability ID */
87 		ret = pread64(fd, &reg, sizeof(reg),
88 				VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
89 				cap_offset);
90 		if (ret != sizeof(reg)) {
91 			RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI "
92 					"config space!\n");
93 			return -1;
94 		}
95 
96 		/* we need first byte */
97 		cap_id = reg & 0xFF;
98 
99 		/* if we haven't reached MSI-X, check next capability */
100 		if (cap_id != PCI_CAP_ID_MSIX) {
101 			ret = pread64(fd, &reg, sizeof(reg),
102 					VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
103 					cap_offset);
104 			if (ret != sizeof(reg)) {
105 				RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "
106 						"config space!\n");
107 				return -1;
108 			}
109 
110 			/* we need second byte */
111 			cap_offset = (reg & 0xFF00) >> 8;
112 
113 			continue;
114 		}
115 		/* else, read table offset */
116 		else {
117 			/* table offset resides in the next 4 bytes */
118 			ret = pread64(fd, &reg, sizeof(reg),
119 					VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
120 					cap_offset + 4);
121 			if (ret != sizeof(reg)) {
122 				RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config "
123 						"space!\n");
124 				return -1;
125 			}
126 
127 			ret = pread64(fd, &flags, sizeof(flags),
128 					VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
129 					cap_offset + 2);
130 			if (ret != sizeof(flags)) {
131 				RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config "
132 						"space!\n");
133 				return -1;
134 			}
135 
136 			msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
137 			msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
138 			msix_table->size =
139 				16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
140 
141 			return 0;
142 		}
143 	}
144 	return 0;
145 }
146 
147 /* set PCI bus mastering */
148 static int
149 pci_vfio_set_bus_master(int dev_fd, bool op)
150 {
151 	uint16_t reg;
152 	int ret;
153 
154 	ret = pread64(dev_fd, &reg, sizeof(reg),
155 			VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
156 			PCI_COMMAND);
157 	if (ret != sizeof(reg)) {
158 		RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n");
159 		return -1;
160 	}
161 
162 	if (op)
163 		/* set the master bit */
164 		reg |= PCI_COMMAND_MASTER;
165 	else
166 		reg &= ~(PCI_COMMAND_MASTER);
167 
168 	ret = pwrite64(dev_fd, &reg, sizeof(reg),
169 			VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
170 			PCI_COMMAND);
171 
172 	if (ret != sizeof(reg)) {
173 		RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n");
174 		return -1;
175 	}
176 
177 	return 0;
178 }
179 
180 /* set up interrupt support (but not enable interrupts) */
181 static int
182 pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
183 {
184 	int i, ret, intr_idx;
185 	enum rte_intr_mode intr_mode;
186 
187 	/* default to invalid index */
188 	intr_idx = VFIO_PCI_NUM_IRQS;
189 
190 	/* Get default / configured intr_mode */
191 	intr_mode = rte_eal_vfio_intr_mode();
192 
193 	/* get interrupt type from internal config (MSI-X by default, can be
194 	 * overridden from the command line
195 	 */
196 	switch (intr_mode) {
197 	case RTE_INTR_MODE_MSIX:
198 		intr_idx = VFIO_PCI_MSIX_IRQ_INDEX;
199 		break;
200 	case RTE_INTR_MODE_MSI:
201 		intr_idx = VFIO_PCI_MSI_IRQ_INDEX;
202 		break;
203 	case RTE_INTR_MODE_LEGACY:
204 		intr_idx = VFIO_PCI_INTX_IRQ_INDEX;
205 		break;
206 	/* don't do anything if we want to automatically determine interrupt type */
207 	case RTE_INTR_MODE_NONE:
208 		break;
209 	default:
210 		RTE_LOG(ERR, EAL, "  unknown default interrupt type!\n");
211 		return -1;
212 	}
213 
214 	/* start from MSI-X interrupt type */
215 	for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) {
216 		struct vfio_irq_info irq = { .argsz = sizeof(irq) };
217 		int fd = -1;
218 
219 		/* skip interrupt modes we don't want */
220 		if (intr_mode != RTE_INTR_MODE_NONE &&
221 				i != intr_idx)
222 			continue;
223 
224 		irq.index = i;
225 
226 		ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
227 		if (ret < 0) {
228 			RTE_LOG(ERR, EAL, "  cannot get IRQ info, "
229 					"error %i (%s)\n", errno, strerror(errno));
230 			return -1;
231 		}
232 
233 		/* if this vector cannot be used with eventfd, fail if we explicitly
234 		 * specified interrupt type, otherwise continue */
235 		if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) {
236 			if (intr_mode != RTE_INTR_MODE_NONE) {
237 				RTE_LOG(ERR, EAL,
238 						"  interrupt vector does not support eventfd!\n");
239 				return -1;
240 			} else
241 				continue;
242 		}
243 
244 		/* set up an eventfd for interrupts */
245 		fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
246 		if (fd < 0) {
247 			RTE_LOG(ERR, EAL, "  cannot set up eventfd, "
248 					"error %i (%s)\n", errno, strerror(errno));
249 			return -1;
250 		}
251 
252 		dev->intr_handle.fd = fd;
253 		dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
254 
255 		switch (i) {
256 		case VFIO_PCI_MSIX_IRQ_INDEX:
257 			intr_mode = RTE_INTR_MODE_MSIX;
258 			dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX;
259 			break;
260 		case VFIO_PCI_MSI_IRQ_INDEX:
261 			intr_mode = RTE_INTR_MODE_MSI;
262 			dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI;
263 			break;
264 		case VFIO_PCI_INTX_IRQ_INDEX:
265 			intr_mode = RTE_INTR_MODE_LEGACY;
266 			dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY;
267 			break;
268 		default:
269 			RTE_LOG(ERR, EAL, "  unknown interrupt type!\n");
270 			return -1;
271 		}
272 
273 		return 0;
274 	}
275 
276 	/* if we're here, we haven't found a suitable interrupt vector */
277 	return -1;
278 }
279 
280 static int
281 pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
282 {
283 	uint32_t ioport_bar;
284 	int ret;
285 
286 	ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
287 			  VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
288 			  + PCI_BASE_ADDRESS_0 + bar_index*4);
289 	if (ret != sizeof(ioport_bar)) {
290 		RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n",
291 			PCI_BASE_ADDRESS_0 + bar_index*4);
292 		return -1;
293 	}
294 
295 	return (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) != 0;
296 }
297 
298 static int
299 pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd)
300 {
301 	if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) {
302 		RTE_LOG(ERR, EAL, "Error setting up interrupts!\n");
303 		return -1;
304 	}
305 
306 	/* set bus mastering for the device */
307 	if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
308 		RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
309 		return -1;
310 	}
311 
312 	/*
313 	 * Reset the device. If the device is not capable of resetting,
314 	 * then it updates errno as EINVAL.
315 	 */
316 	if (ioctl(vfio_dev_fd, VFIO_DEVICE_RESET) && errno != EINVAL) {
317 		RTE_LOG(ERR, EAL, "Unable to reset device! Error: %d (%s)\n",
318 				errno, strerror(errno));
319 		return -1;
320 	}
321 
322 	return 0;
323 }
324 
325 static int
326 pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
327 		int bar_index, int additional_flags)
328 {
329 	struct memreg {
330 		unsigned long offset, size;
331 	} memreg[2] = {};
332 	void *bar_addr;
333 	struct pci_msix_table *msix_table = &vfio_res->msix_table;
334 	struct pci_map *bar = &vfio_res->maps[bar_index];
335 
336 	if (bar->size == 0)
337 		/* Skip this BAR */
338 		return 0;
339 
340 	if (msix_table->bar_index == bar_index) {
341 		/*
342 		 * VFIO will not let us map the MSI-X table,
343 		 * but we can map around it.
344 		 */
345 		uint32_t table_start = msix_table->offset;
346 		uint32_t table_end = table_start + msix_table->size;
347 		table_end = (table_end + ~PAGE_MASK) & PAGE_MASK;
348 		table_start &= PAGE_MASK;
349 
350 		if (table_start == 0 && table_end >= bar->size) {
351 			/* Cannot map this BAR */
352 			RTE_LOG(DEBUG, EAL, "Skipping BAR%d\n", bar_index);
353 			bar->size = 0;
354 			bar->addr = 0;
355 			return 0;
356 		}
357 
358 		memreg[0].offset = bar->offset;
359 		memreg[0].size = table_start;
360 		memreg[1].offset = bar->offset + table_end;
361 		memreg[1].size = bar->size - table_end;
362 
363 		RTE_LOG(DEBUG, EAL,
364 			"Trying to map BAR%d that contains the MSI-X "
365 			"table. Trying offsets: "
366 			"0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", bar_index,
367 			memreg[0].offset, memreg[0].size,
368 			memreg[1].offset, memreg[1].size);
369 	} else {
370 		memreg[0].offset = bar->offset;
371 		memreg[0].size = bar->size;
372 	}
373 
374 	/* reserve the address using an inaccessible mapping */
375 	bar_addr = mmap(bar->addr, bar->size, 0, MAP_PRIVATE |
376 			MAP_ANONYMOUS | additional_flags, -1, 0);
377 	if (bar_addr != MAP_FAILED) {
378 		void *map_addr = NULL;
379 		if (memreg[0].size) {
380 			/* actual map of first part */
381 			map_addr = pci_map_resource(bar_addr, vfio_dev_fd,
382 							memreg[0].offset,
383 							memreg[0].size,
384 							MAP_FIXED);
385 		}
386 
387 		/* if there's a second part, try to map it */
388 		if (map_addr != MAP_FAILED
389 			&& memreg[1].offset && memreg[1].size) {
390 			void *second_addr = RTE_PTR_ADD(bar_addr,
391 							memreg[1].offset -
392 							(uintptr_t)bar->offset);
393 			map_addr = pci_map_resource(second_addr,
394 							vfio_dev_fd,
395 							memreg[1].offset,
396 							memreg[1].size,
397 							MAP_FIXED);
398 		}
399 
400 		if (map_addr == MAP_FAILED || !map_addr) {
401 			munmap(bar_addr, bar->size);
402 			bar_addr = MAP_FAILED;
403 			RTE_LOG(ERR, EAL, "Failed to map pci BAR%d\n",
404 					bar_index);
405 			return -1;
406 		}
407 	} else {
408 		RTE_LOG(ERR, EAL,
409 				"Failed to create inaccessible mapping for BAR%d\n",
410 				bar_index);
411 		return -1;
412 	}
413 
414 	bar->addr = bar_addr;
415 	return 0;
416 }
417 
418 static int
419 pci_vfio_map_resource_primary(struct rte_pci_device *dev)
420 {
421 	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
422 	char pci_addr[PATH_MAX] = {0};
423 	int vfio_dev_fd;
424 	struct rte_pci_addr *loc = &dev->addr;
425 	int i, ret;
426 	struct mapped_pci_resource *vfio_res = NULL;
427 	struct mapped_pci_res_list *vfio_res_list =
428 		RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
429 
430 	struct pci_map *maps;
431 
432 	dev->intr_handle.fd = -1;
433 
434 	/* store PCI address string */
435 	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
436 			loc->domain, loc->bus, loc->devid, loc->function);
437 
438 	ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
439 					&vfio_dev_fd, &device_info);
440 	if (ret)
441 		return ret;
442 
443 	/* allocate vfio_res and get region info */
444 	vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0);
445 	if (vfio_res == NULL) {
446 		RTE_LOG(ERR, EAL,
447 			"%s(): cannot store uio mmap details\n", __func__);
448 		goto err_vfio_dev_fd;
449 	}
450 	memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr));
451 
452 	/* get number of registers (up to BAR5) */
453 	vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions,
454 			VFIO_PCI_BAR5_REGION_INDEX + 1);
455 
456 	/* map BARs */
457 	maps = vfio_res->maps;
458 
459 	vfio_res->msix_table.bar_index = -1;
460 	/* get MSI-X BAR, if any (we have to know where it is because we can't
461 	 * easily mmap it when using VFIO)
462 	 */
463 	ret = pci_vfio_get_msix_bar(vfio_dev_fd, &vfio_res->msix_table);
464 	if (ret < 0) {
465 		RTE_LOG(ERR, EAL, "  %s cannot get MSI-X BAR number!\n",
466 				pci_addr);
467 		goto err_vfio_dev_fd;
468 	}
469 
470 	for (i = 0; i < (int) vfio_res->nb_maps; i++) {
471 		struct vfio_region_info reg = { .argsz = sizeof(reg) };
472 		void *bar_addr;
473 
474 		reg.index = i;
475 
476 		ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg);
477 		if (ret) {
478 			RTE_LOG(ERR, EAL, "  %s cannot get device region info "
479 					"error %i (%s)\n", pci_addr, errno, strerror(errno));
480 			goto err_vfio_res;
481 		}
482 
483 		/* chk for io port region */
484 		ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
485 		if (ret < 0)
486 			goto err_vfio_res;
487 		else if (ret) {
488 			RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
489 					i);
490 			continue;
491 		}
492 
493 		/* skip non-mmapable BARs */
494 		if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
495 			continue;
496 
497 		/* try mapping somewhere close to the end of hugepages */
498 		if (pci_map_addr == NULL)
499 			pci_map_addr = pci_find_max_end_va();
500 
501 		bar_addr = pci_map_addr;
502 		pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
503 
504 		maps[i].addr = bar_addr;
505 		maps[i].offset = reg.offset;
506 		maps[i].size = reg.size;
507 		maps[i].path = NULL; /* vfio doesn't have per-resource paths */
508 
509 		ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
510 		if (ret < 0) {
511 			RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n",
512 					pci_addr, i, strerror(errno));
513 			goto err_vfio_res;
514 		}
515 
516 		dev->mem_resource[i].addr = maps[i].addr;
517 	}
518 
519 	if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) {
520 		RTE_LOG(ERR, EAL, "  %s setup device failed\n", pci_addr);
521 		goto err_vfio_res;
522 	}
523 
524 	TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
525 
526 	return 0;
527 err_vfio_res:
528 	rte_free(vfio_res);
529 err_vfio_dev_fd:
530 	close(vfio_dev_fd);
531 	return -1;
532 }
533 
534 static int
535 pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
536 {
537 	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
538 	char pci_addr[PATH_MAX] = {0};
539 	int vfio_dev_fd;
540 	struct rte_pci_addr *loc = &dev->addr;
541 	int i, ret;
542 	struct mapped_pci_resource *vfio_res = NULL;
543 	struct mapped_pci_res_list *vfio_res_list =
544 		RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
545 
546 	struct pci_map *maps;
547 
548 	dev->intr_handle.fd = -1;
549 
550 	/* store PCI address string */
551 	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
552 			loc->domain, loc->bus, loc->devid, loc->function);
553 
554 	ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
555 					&vfio_dev_fd, &device_info);
556 	if (ret)
557 		return ret;
558 
559 	/* if we're in a secondary process, just find our tailq entry */
560 	TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
561 		if (rte_pci_addr_cmp(&vfio_res->pci_addr,
562 						 &dev->addr))
563 			continue;
564 		break;
565 	}
566 	/* if we haven't found our tailq entry, something's wrong */
567 	if (vfio_res == NULL) {
568 		RTE_LOG(ERR, EAL, "  %s cannot find TAILQ entry for PCI device!\n",
569 				pci_addr);
570 		goto err_vfio_dev_fd;
571 	}
572 
573 	/* map BARs */
574 	maps = vfio_res->maps;
575 
576 	for (i = 0; i < (int) vfio_res->nb_maps; i++) {
577 		ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED);
578 		if (ret < 0) {
579 			RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n",
580 					pci_addr, i, strerror(errno));
581 			goto err_vfio_dev_fd;
582 		}
583 
584 		dev->mem_resource[i].addr = maps[i].addr;
585 	}
586 
587 	return 0;
588 err_vfio_dev_fd:
589 	close(vfio_dev_fd);
590 	return -1;
591 }
592 
593 /*
594  * map the PCI resources of a PCI device in virtual memory (VFIO version).
595  * primary and secondary processes follow almost exactly the same path
596  */
597 int
598 pci_vfio_map_resource(struct rte_pci_device *dev)
599 {
600 	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
601 		return pci_vfio_map_resource_primary(dev);
602 	else
603 		return pci_vfio_map_resource_secondary(dev);
604 }
605 
606 int
607 pci_vfio_unmap_resource(struct rte_pci_device *dev)
608 {
609 	char pci_addr[PATH_MAX] = {0};
610 	struct rte_pci_addr *loc = &dev->addr;
611 	int i, ret;
612 	struct mapped_pci_resource *vfio_res = NULL;
613 	struct mapped_pci_res_list *vfio_res_list;
614 
615 	struct pci_map *maps;
616 
617 	/* store PCI address string */
618 	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
619 			loc->domain, loc->bus, loc->devid, loc->function);
620 
621 
622 	if (close(dev->intr_handle.fd) < 0) {
623 		RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n",
624 			pci_addr);
625 		return -1;
626 	}
627 
628 	if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) {
629 		RTE_LOG(ERR, EAL, "  %s cannot unset bus mastering for PCI device!\n",
630 				pci_addr);
631 		return -1;
632 	}
633 
634 	ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr,
635 				  dev->intr_handle.vfio_dev_fd);
636 	if (ret < 0) {
637 		RTE_LOG(ERR, EAL,
638 			"%s(): cannot release device\n", __func__);
639 		return ret;
640 	}
641 
642 	vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
643 	/* Get vfio_res */
644 	TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
645 		if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
646 			continue;
647 		break;
648 	}
649 	/* if we haven't found our tailq entry, something's wrong */
650 	if (vfio_res == NULL) {
651 		RTE_LOG(ERR, EAL, "  %s cannot find TAILQ entry for PCI device!\n",
652 				pci_addr);
653 		return -1;
654 	}
655 
656 	/* unmap BARs */
657 	maps = vfio_res->maps;
658 
659 	RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n",
660 		pci_addr);
661 	for (i = 0; i < (int) vfio_res->nb_maps; i++) {
662 
663 		/*
664 		 * We do not need to be aware of MSI-X table BAR mappings as
665 		 * when mapping. Just using current maps array is enough
666 		 */
667 		if (maps[i].addr) {
668 			RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n",
669 				pci_addr, maps[i].addr);
670 			pci_unmap_resource(maps[i].addr, maps[i].size);
671 		}
672 	}
673 
674 	TAILQ_REMOVE(vfio_res_list, vfio_res, next);
675 
676 	return 0;
677 }
678 
679 int
680 pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
681 		    struct rte_pci_ioport *p)
682 {
683 	if (bar < VFIO_PCI_BAR0_REGION_INDEX ||
684 	    bar > VFIO_PCI_BAR5_REGION_INDEX) {
685 		RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar);
686 		return -1;
687 	}
688 
689 	p->dev = dev;
690 	p->base = VFIO_GET_REGION_ADDR(bar);
691 	return 0;
692 }
693 
694 void
695 pci_vfio_ioport_read(struct rte_pci_ioport *p,
696 		     void *data, size_t len, off_t offset)
697 {
698 	const struct rte_intr_handle *intr_handle = &p->dev->intr_handle;
699 
700 	if (pread64(intr_handle->vfio_dev_fd, data,
701 		    len, p->base + offset) <= 0)
702 		RTE_LOG(ERR, EAL,
703 			"Can't read from PCI bar (%" PRIu64 ") : offset (%x)\n",
704 			VFIO_GET_REGION_IDX(p->base), (int)offset);
705 }
706 
707 void
708 pci_vfio_ioport_write(struct rte_pci_ioport *p,
709 		      const void *data, size_t len, off_t offset)
710 {
711 	const struct rte_intr_handle *intr_handle = &p->dev->intr_handle;
712 
713 	if (pwrite64(intr_handle->vfio_dev_fd, data,
714 		     len, p->base + offset) <= 0)
715 		RTE_LOG(ERR, EAL,
716 			"Can't write to PCI bar (%" PRIu64 ") : offset (%x)\n",
717 			VFIO_GET_REGION_IDX(p->base), (int)offset);
718 }
719 
720 int
721 pci_vfio_ioport_unmap(struct rte_pci_ioport *p)
722 {
723 	RTE_SET_USED(p);
724 	return -1;
725 }
726 
727 int
728 pci_vfio_is_enabled(void)
729 {
730 	return rte_vfio_is_enabled("vfio_pci");
731 }
732 #endif
733