xref: /spdk/lib/env_dpdk/pci_event.c (revision a6dbe3721eb3b5990707fc3e378c95e505dd8ab5)
1488570ebSJim Harris /*   SPDX-License-Identifier: BSD-3-Clause
2*a6dbe372Spaul luse  *   Copyright (C) 2020 Intel Corporation.
359237d22SJin Yu  *   All rights reserved.
459237d22SJin Yu  */
559237d22SJin Yu 
659237d22SJin Yu #include "spdk/stdinc.h"
759237d22SJin Yu #include "spdk/string.h"
859237d22SJin Yu 
959237d22SJin Yu #include "spdk/log.h"
1059237d22SJin Yu #include "spdk/env.h"
1159237d22SJin Yu 
1259237d22SJin Yu #ifdef __linux__
1359237d22SJin Yu 
1459237d22SJin Yu #include <linux/netlink.h>
1559237d22SJin Yu 
1659237d22SJin Yu #define SPDK_UEVENT_MSG_LEN 4096
1759237d22SJin Yu #define SPDK_UEVENT_RECVBUF_SIZE 1024 * 1024
1859237d22SJin Yu 
1959237d22SJin Yu int
spdk_pci_event_listen(void)2059237d22SJin Yu spdk_pci_event_listen(void)
2159237d22SJin Yu {
2259237d22SJin Yu 	struct sockaddr_nl addr;
2359237d22SJin Yu 	int netlink_fd;
2459237d22SJin Yu 	int size = SPDK_UEVENT_RECVBUF_SIZE;
25148a9ab0STom Nabarro 	int buf_size;
26148a9ab0STom Nabarro 	socklen_t opt_size;
2759237d22SJin Yu 	int flag, rc;
2859237d22SJin Yu 
2959237d22SJin Yu 	memset(&addr, 0, sizeof(addr));
3059237d22SJin Yu 	addr.nl_family = AF_NETLINK;
315957f2c4SJin Yu 	addr.nl_pid = 0;
3259237d22SJin Yu 	addr.nl_groups = 0xffffffff;
3359237d22SJin Yu 
3459237d22SJin Yu 	netlink_fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
3559237d22SJin Yu 	if (netlink_fd < 0) {
3659237d22SJin Yu 		SPDK_ERRLOG("Failed to create netlink socket\n");
3759237d22SJin Yu 		return netlink_fd;
3859237d22SJin Yu 	}
3959237d22SJin Yu 
4059237d22SJin Yu 	if (setsockopt(netlink_fd, SOL_SOCKET, SO_RCVBUFFORCE, &size, sizeof(size)) < 0) {
41148a9ab0STom Nabarro 		if (setsockopt(netlink_fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)) < 0) {
4259237d22SJin Yu 			rc = errno;
43148a9ab0STom Nabarro 			SPDK_ERRLOG("Failed to set socket option SO_RCVBUF\n");
44148a9ab0STom Nabarro 			goto error;
45148a9ab0STom Nabarro 		}
46148a9ab0STom Nabarro 		opt_size = sizeof(buf_size);
47148a9ab0STom Nabarro 		if (getsockopt(netlink_fd, SOL_SOCKET, SO_RCVBUF, &buf_size, &opt_size) < 0) {
48148a9ab0STom Nabarro 			rc = errno;
49148a9ab0STom Nabarro 			SPDK_ERRLOG("Failed to get socket option SO_RCVBUF\n");
50148a9ab0STom Nabarro 			goto error;
51148a9ab0STom Nabarro 		}
52148a9ab0STom Nabarro 		if (buf_size < SPDK_UEVENT_RECVBUF_SIZE) {
53148a9ab0STom Nabarro 			SPDK_ERRLOG("Socket recv buffer is too small (< %d), see SO_RCVBUF "
54148a9ab0STom Nabarro 				    "section in socket(7) man page for specifics on how to "
55148a9ab0STom Nabarro 				    "adjust the system setting.", SPDK_UEVENT_RECVBUF_SIZE);
56148a9ab0STom Nabarro 			rc = ENOSPC;
57148a9ab0STom Nabarro 			goto error;
58148a9ab0STom Nabarro 		}
5959237d22SJin Yu 	}
6059237d22SJin Yu 
6159237d22SJin Yu 	flag = fcntl(netlink_fd, F_GETFL);
6259237d22SJin Yu 	if (flag < 0) {
6359237d22SJin Yu 		rc = errno;
6459237d22SJin Yu 		SPDK_ERRLOG("Failed to get socket flag, fd: %d\n", netlink_fd);
65148a9ab0STom Nabarro 		goto error;
6659237d22SJin Yu 	}
6759237d22SJin Yu 
6859237d22SJin Yu 	if (fcntl(netlink_fd, F_SETFL, flag | O_NONBLOCK) < 0) {
6959237d22SJin Yu 		rc = errno;
7059237d22SJin Yu 		SPDK_ERRLOG("Fcntl can't set nonblocking mode for socket, fd: %d\n", netlink_fd);
71148a9ab0STom Nabarro 		goto error;
7259237d22SJin Yu 	}
7359237d22SJin Yu 
7459237d22SJin Yu 	if (bind(netlink_fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
7559237d22SJin Yu 		rc = errno;
7659237d22SJin Yu 		SPDK_ERRLOG("Failed to bind the netlink\n");
77148a9ab0STom Nabarro 		goto error;
7859237d22SJin Yu 	}
7959237d22SJin Yu 
8059237d22SJin Yu 	return netlink_fd;
81148a9ab0STom Nabarro error:
82148a9ab0STom Nabarro 	close(netlink_fd);
83148a9ab0STom Nabarro 	return -rc;
8459237d22SJin Yu }
8559237d22SJin Yu 
8659237d22SJin Yu /* Note: We parse the event from uio and vfio subsystem and will ignore
8759237d22SJin Yu  *       all the event from other subsystem. the event from uio subsystem
8859237d22SJin Yu  *       as below:
8959237d22SJin Yu  *       action: "add" or "remove"
9059237d22SJin Yu  *       subsystem: "uio"
9159237d22SJin Yu  *       dev_path: "/devices/pci0000:80/0000:80:01.0/0000:81:00.0/uio/uio0"
9259237d22SJin Yu  *       VFIO subsystem add event:
9359237d22SJin Yu  *       ACTION=bind
9459237d22SJin Yu  *       DRIVER=vfio-pci
9559237d22SJin Yu  *       PCI_SLOT_NAME=0000:d8:00.0
9659237d22SJin Yu  */
9759237d22SJin Yu static int
parse_subsystem_event(const char * buf,struct spdk_pci_event * event)9859237d22SJin Yu parse_subsystem_event(const char *buf, struct spdk_pci_event *event)
9959237d22SJin Yu {
10059237d22SJin Yu 	char subsystem[SPDK_UEVENT_MSG_LEN];
10159237d22SJin Yu 	char action[SPDK_UEVENT_MSG_LEN];
10259237d22SJin Yu 	char dev_path[SPDK_UEVENT_MSG_LEN];
10359237d22SJin Yu 	char driver[SPDK_UEVENT_MSG_LEN];
10459237d22SJin Yu 	char vfio_pci_addr[SPDK_UEVENT_MSG_LEN];
10559237d22SJin Yu 	char *pci_address, *tmp;
10659237d22SJin Yu 	int rc;
10759237d22SJin Yu 
10859237d22SJin Yu 	memset(subsystem, 0, SPDK_UEVENT_MSG_LEN);
10959237d22SJin Yu 	memset(action, 0, SPDK_UEVENT_MSG_LEN);
11059237d22SJin Yu 	memset(dev_path, 0, SPDK_UEVENT_MSG_LEN);
11159237d22SJin Yu 	memset(driver, 0, SPDK_UEVENT_MSG_LEN);
11259237d22SJin Yu 	memset(vfio_pci_addr, 0, SPDK_UEVENT_MSG_LEN);
11359237d22SJin Yu 
11459237d22SJin Yu 	while (*buf) {
11559237d22SJin Yu 		if (!strncmp(buf, "SUBSYSTEM=", 10)) {
11659237d22SJin Yu 			buf += 10;
11759237d22SJin Yu 			snprintf(subsystem, sizeof(subsystem), "%s", buf);
11859237d22SJin Yu 		} else if (!strncmp(buf, "ACTION=", 7)) {
11959237d22SJin Yu 			buf += 7;
12059237d22SJin Yu 			snprintf(action, sizeof(action), "%s", buf);
12159237d22SJin Yu 		} else if (!strncmp(buf, "DEVPATH=", 8)) {
12259237d22SJin Yu 			buf += 8;
12359237d22SJin Yu 			snprintf(dev_path, sizeof(dev_path), "%s", buf);
12459237d22SJin Yu 		} else if (!strncmp(buf, "DRIVER=", 7)) {
12559237d22SJin Yu 			buf += 7;
12659237d22SJin Yu 			snprintf(driver, sizeof(driver), "%s", buf);
12759237d22SJin Yu 		} else if (!strncmp(buf, "PCI_SLOT_NAME=", 14)) {
12859237d22SJin Yu 			buf += 14;
12959237d22SJin Yu 			snprintf(vfio_pci_addr, sizeof(vfio_pci_addr), "%s", buf);
13059237d22SJin Yu 		}
13159237d22SJin Yu 
13259237d22SJin Yu 		while (*buf++)
13359237d22SJin Yu 			;
13459237d22SJin Yu 	}
13559237d22SJin Yu 
13659237d22SJin Yu 	if (!strncmp(subsystem, "uio", 3)) {
13759237d22SJin Yu 		if (!strncmp(action, "remove", 6)) {
13859237d22SJin Yu 			event->action = SPDK_UEVENT_REMOVE;
13959237d22SJin Yu 		} else if (!strncmp(action, "add", 3)) {
14059237d22SJin Yu 			/* Support the ADD UEVENT for the device allow */
14159237d22SJin Yu 			event->action = SPDK_UEVENT_ADD;
14259237d22SJin Yu 		} else {
14359237d22SJin Yu 			return 0;
14459237d22SJin Yu 		}
14559237d22SJin Yu 
14659237d22SJin Yu 		tmp = strstr(dev_path, "/uio/");
14759237d22SJin Yu 		if (!tmp) {
14859237d22SJin Yu 			SPDK_ERRLOG("Invalid format of uevent: %s\n", dev_path);
14959237d22SJin Yu 			return -EBADMSG;
15059237d22SJin Yu 		}
15159237d22SJin Yu 		memset(tmp, 0, SPDK_UEVENT_MSG_LEN - (tmp - dev_path));
15259237d22SJin Yu 
15359237d22SJin Yu 		pci_address = strrchr(dev_path, '/');
15459237d22SJin Yu 		if (!pci_address) {
15559237d22SJin Yu 			SPDK_ERRLOG("Not found PCI device BDF in uevent: %s\n", dev_path);
15659237d22SJin Yu 			return -EBADMSG;
15759237d22SJin Yu 		}
15859237d22SJin Yu 		pci_address++;
15959237d22SJin Yu 
16059237d22SJin Yu 		rc = spdk_pci_addr_parse(&event->traddr, pci_address);
16159237d22SJin Yu 		if (rc != 0) {
16259237d22SJin Yu 			SPDK_ERRLOG("Invalid format for PCI device BDF: %s\n", pci_address);
16359237d22SJin Yu 			return rc;
16459237d22SJin Yu 		}
16559237d22SJin Yu 
16659237d22SJin Yu 		return 1;
16759237d22SJin Yu 	}
16859237d22SJin Yu 
16959237d22SJin Yu 	if (!strncmp(driver, "vfio-pci", 8)) {
17059237d22SJin Yu 		if (!strncmp(action, "bind", 4)) {
17159237d22SJin Yu 			/* Support the ADD UEVENT for the device allow */
17259237d22SJin Yu 			event->action = SPDK_UEVENT_ADD;
17359237d22SJin Yu 		} else {
17459237d22SJin Yu 			/* Only need to support add event.
17559237d22SJin Yu 			 * VFIO hotplug interface is "pci.c:pci_device_rte_dev_event".
17659237d22SJin Yu 			 * VFIO informs the userspace hotplug through vfio req notifier interrupt.
17759237d22SJin Yu 			 * The app needs to free the device userspace driver resource first then
178cc6920a4SJosh Soref 			 * the OS remove the device VFIO driver and broadcast the VFIO uevent.
17959237d22SJin Yu 			 */
18059237d22SJin Yu 			return 0;
18159237d22SJin Yu 		}
18259237d22SJin Yu 
18359237d22SJin Yu 		rc = spdk_pci_addr_parse(&event->traddr, vfio_pci_addr);
18459237d22SJin Yu 		if (rc != 0) {
18559237d22SJin Yu 			SPDK_ERRLOG("Invalid format for PCI device BDF: %s\n", vfio_pci_addr);
18659237d22SJin Yu 			return rc;
18759237d22SJin Yu 		}
18859237d22SJin Yu 
18959237d22SJin Yu 		return 1;
19059237d22SJin Yu 	}
19159237d22SJin Yu 
19259237d22SJin Yu 	return 0;
19359237d22SJin Yu }
19459237d22SJin Yu 
19559237d22SJin Yu int
spdk_pci_get_event(int fd,struct spdk_pci_event * event)19659237d22SJin Yu spdk_pci_get_event(int fd, struct spdk_pci_event *event)
19759237d22SJin Yu {
19859237d22SJin Yu 	int ret;
19959237d22SJin Yu 	char buf[SPDK_UEVENT_MSG_LEN];
20059237d22SJin Yu 
20159237d22SJin Yu 	memset(buf, 0, SPDK_UEVENT_MSG_LEN);
20259237d22SJin Yu 	memset(event, 0, sizeof(*event));
20359237d22SJin Yu 
20459237d22SJin Yu 	ret = recv(fd, buf, SPDK_UEVENT_MSG_LEN - 1, MSG_DONTWAIT);
20559237d22SJin Yu 	if (ret > 0) {
20659237d22SJin Yu 		return parse_subsystem_event(buf, event);
20759237d22SJin Yu 	} else if (ret < 0) {
20859237d22SJin Yu 		if (errno == EAGAIN || errno == EWOULDBLOCK) {
20959237d22SJin Yu 			return 0;
21059237d22SJin Yu 		} else {
21159237d22SJin Yu 			ret = errno;
21259237d22SJin Yu 			SPDK_ERRLOG("Socket read error %d\n", errno);
21359237d22SJin Yu 			return -ret;
21459237d22SJin Yu 		}
21559237d22SJin Yu 	} else {
21659237d22SJin Yu 		/* connection closed */
21759237d22SJin Yu 		return -ENOTCONN;
21859237d22SJin Yu 	}
21959237d22SJin Yu 
22059237d22SJin Yu 	return 0;
22159237d22SJin Yu }
22259237d22SJin Yu 
22359237d22SJin Yu #else /* Not Linux */
22459237d22SJin Yu 
22559237d22SJin Yu int
spdk_pci_event_listen(void)22659237d22SJin Yu spdk_pci_event_listen(void)
22759237d22SJin Yu {
22859237d22SJin Yu 	SPDK_ERRLOG("Non-Linux does not support this operation\n");
22959237d22SJin Yu 	return -ENOTSUP;
23059237d22SJin Yu }
23159237d22SJin Yu 
23259237d22SJin Yu int
spdk_pci_get_event(int fd,struct spdk_pci_event * event)23359237d22SJin Yu spdk_pci_get_event(int fd, struct spdk_pci_event *event)
23459237d22SJin Yu {
23559237d22SJin Yu 	SPDK_ERRLOG("Non-Linux does not support this operation\n");
23659237d22SJin Yu 	return -ENOTSUP;
23759237d22SJin Yu }
23859237d22SJin Yu #endif
239