xref: /dpdk/lib/vhost/socket.c (revision 4d2aa150769b170e439b4ae6200463140cb44ff5)
199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson  * Copyright(c) 2010-2016 Intel Corporation
399a2dd95SBruce Richardson  */
499a2dd95SBruce Richardson 
599a2dd95SBruce Richardson #include <stdint.h>
699a2dd95SBruce Richardson #include <stdio.h>
799a2dd95SBruce Richardson #include <limits.h>
899a2dd95SBruce Richardson #include <stdlib.h>
999a2dd95SBruce Richardson #include <unistd.h>
1099a2dd95SBruce Richardson #include <string.h>
1199a2dd95SBruce Richardson #include <sys/socket.h>
1299a2dd95SBruce Richardson #include <sys/un.h>
1399a2dd95SBruce Richardson #include <sys/queue.h>
1499a2dd95SBruce Richardson #include <errno.h>
1599a2dd95SBruce Richardson #include <fcntl.h>
1699a2dd95SBruce Richardson 
171c1abf17SThomas Monjalon #include <rte_thread.h>
1899a2dd95SBruce Richardson #include <rte_log.h>
1999a2dd95SBruce Richardson 
2099a2dd95SBruce Richardson #include "fd_man.h"
210adb8eccSMaxime Coquelin #include "vduse.h"
2299a2dd95SBruce Richardson #include "vhost.h"
2399a2dd95SBruce Richardson #include "vhost_user.h"
2499a2dd95SBruce Richardson 
2599a2dd95SBruce Richardson 
2699a2dd95SBruce Richardson TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
2799a2dd95SBruce Richardson 
2899a2dd95SBruce Richardson /*
2999a2dd95SBruce Richardson  * Every time rte_vhost_driver_register() is invoked, an associated
3099a2dd95SBruce Richardson  * vhost_user_socket struct will be created.
3199a2dd95SBruce Richardson  */
3299a2dd95SBruce Richardson struct vhost_user_socket {
3399a2dd95SBruce Richardson 	struct vhost_user_connection_list conn_list;
3499a2dd95SBruce Richardson 	pthread_mutex_t conn_mutex;
3599a2dd95SBruce Richardson 	char *path;
3699a2dd95SBruce Richardson 	int socket_fd;
3799a2dd95SBruce Richardson 	struct sockaddr_un un;
3899a2dd95SBruce Richardson 	bool is_server;
390adb8eccSMaxime Coquelin 	bool is_vduse;
4099a2dd95SBruce Richardson 	bool reconnect;
4199a2dd95SBruce Richardson 	bool iommu_support;
4299a2dd95SBruce Richardson 	bool use_builtin_virtio_net;
4399a2dd95SBruce Richardson 	bool extbuf;
4499a2dd95SBruce Richardson 	bool linearbuf;
4599a2dd95SBruce Richardson 	bool async_copy;
46ca7036b4SDavid Marchand 	bool net_compliant_ol_flags;
47be75dc99SMaxime Coquelin 	bool stats_enabled;
48ff8989c4SDaniil Ushkov 	bool async_connect;
4999a2dd95SBruce Richardson 
5099a2dd95SBruce Richardson 	/*
5199a2dd95SBruce Richardson 	 * The "supported_features" indicates the feature bits the
5299a2dd95SBruce Richardson 	 * vhost driver supports. The "features" indicates the feature
5399a2dd95SBruce Richardson 	 * bits after the rte_vhost_driver_features_disable/enable().
5499a2dd95SBruce Richardson 	 * It is also the final feature bits used for vhost-user
5599a2dd95SBruce Richardson 	 * features negotiation.
5699a2dd95SBruce Richardson 	 */
5799a2dd95SBruce Richardson 	uint64_t supported_features;
5899a2dd95SBruce Richardson 	uint64_t features;
5999a2dd95SBruce Richardson 
6099a2dd95SBruce Richardson 	uint64_t protocol_features;
6199a2dd95SBruce Richardson 
624aa1f88aSMaxime Coquelin 	uint32_t max_queue_pairs;
634aa1f88aSMaxime Coquelin 
6499a2dd95SBruce Richardson 	struct rte_vdpa_device *vdpa_dev;
6599a2dd95SBruce Richardson 
66ab4bb424SMaxime Coquelin 	struct rte_vhost_device_ops const *notify_ops;
6799a2dd95SBruce Richardson };
6899a2dd95SBruce Richardson 
6999a2dd95SBruce Richardson struct vhost_user_connection {
7099a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
7199a2dd95SBruce Richardson 	int connfd;
7299a2dd95SBruce Richardson 	int vid;
7399a2dd95SBruce Richardson 
7499a2dd95SBruce Richardson 	TAILQ_ENTRY(vhost_user_connection) next;
7599a2dd95SBruce Richardson };
7699a2dd95SBruce Richardson 
7799a2dd95SBruce Richardson #define MAX_VHOST_SOCKET 1024
7899a2dd95SBruce Richardson struct vhost_user {
7999a2dd95SBruce Richardson 	struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
80e68a6feaSMaxime Coquelin 	struct fdset *fdset;
8199a2dd95SBruce Richardson 	int vsocket_cnt;
8299a2dd95SBruce Richardson 	pthread_mutex_t mutex;
8399a2dd95SBruce Richardson };
8499a2dd95SBruce Richardson 
8599a2dd95SBruce Richardson #define MAX_VIRTIO_BACKLOG 128
8699a2dd95SBruce Richardson 
8799a2dd95SBruce Richardson static void vhost_user_server_new_connection(int fd, void *data, int *remove);
8899a2dd95SBruce Richardson static void vhost_user_read_cb(int fd, void *dat, int *remove);
8999a2dd95SBruce Richardson static int create_unix_socket(struct vhost_user_socket *vsocket);
9099a2dd95SBruce Richardson static int vhost_user_start_client(struct vhost_user_socket *vsocket);
9199a2dd95SBruce Richardson 
9299a2dd95SBruce Richardson static struct vhost_user vhost_user = {
9399a2dd95SBruce Richardson 	.vsocket_cnt = 0,
9499a2dd95SBruce Richardson 	.mutex = PTHREAD_MUTEX_INITIALIZER,
9599a2dd95SBruce Richardson };
9699a2dd95SBruce Richardson 
9799a2dd95SBruce Richardson /*
9899a2dd95SBruce Richardson  * return bytes# of read on success or negative val on failure. Update fdnum
9999a2dd95SBruce Richardson  * with number of fds read.
10099a2dd95SBruce Richardson  */
10199a2dd95SBruce Richardson int
102c85c35b1SMaxime Coquelin read_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int max_fds,
10399a2dd95SBruce Richardson 		int *fd_num)
10499a2dd95SBruce Richardson {
10599a2dd95SBruce Richardson 	struct iovec iov;
10699a2dd95SBruce Richardson 	struct msghdr msgh;
10799a2dd95SBruce Richardson 	char control[CMSG_SPACE(max_fds * sizeof(int))];
10899a2dd95SBruce Richardson 	struct cmsghdr *cmsg;
10999a2dd95SBruce Richardson 	int got_fds = 0;
11099a2dd95SBruce Richardson 	int ret;
11199a2dd95SBruce Richardson 
11299a2dd95SBruce Richardson 	*fd_num = 0;
11399a2dd95SBruce Richardson 
11499a2dd95SBruce Richardson 	memset(&msgh, 0, sizeof(msgh));
11599a2dd95SBruce Richardson 	iov.iov_base = buf;
11699a2dd95SBruce Richardson 	iov.iov_len  = buflen;
11799a2dd95SBruce Richardson 
11899a2dd95SBruce Richardson 	msgh.msg_iov = &iov;
11999a2dd95SBruce Richardson 	msgh.msg_iovlen = 1;
12099a2dd95SBruce Richardson 	msgh.msg_control = control;
12199a2dd95SBruce Richardson 	msgh.msg_controllen = sizeof(control);
12299a2dd95SBruce Richardson 
12399a2dd95SBruce Richardson 	ret = recvmsg(sockfd, &msgh, 0);
12499a2dd95SBruce Richardson 	if (ret <= 0) {
12599a2dd95SBruce Richardson 		if (ret)
1260e21c7c0SDavid Marchand 			VHOST_CONFIG_LOG(ifname, ERR, "recvmsg failed on fd %d (%s)",
12736c525a0SDavid Marchand 				sockfd, strerror(errno));
12899a2dd95SBruce Richardson 		return ret;
12999a2dd95SBruce Richardson 	}
13099a2dd95SBruce Richardson 
131218daf16SMaxime Coquelin 	if (msgh.msg_flags & MSG_TRUNC)
1320e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(ifname, ERR, "truncated msg (fd %d)", sockfd);
133218daf16SMaxime Coquelin 
134218daf16SMaxime Coquelin 	/* MSG_CTRUNC may be caused by LSM misconfiguration */
135218daf16SMaxime Coquelin 	if (msgh.msg_flags & MSG_CTRUNC)
1360e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(ifname, ERR, "truncated control data (fd %d)", sockfd);
13799a2dd95SBruce Richardson 
13899a2dd95SBruce Richardson 	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
13999a2dd95SBruce Richardson 		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
14099a2dd95SBruce Richardson 		if ((cmsg->cmsg_level == SOL_SOCKET) &&
14199a2dd95SBruce Richardson 			(cmsg->cmsg_type == SCM_RIGHTS)) {
14299a2dd95SBruce Richardson 			got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
14399a2dd95SBruce Richardson 			*fd_num = got_fds;
14499a2dd95SBruce Richardson 			memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
14599a2dd95SBruce Richardson 			break;
14699a2dd95SBruce Richardson 		}
14799a2dd95SBruce Richardson 	}
14899a2dd95SBruce Richardson 
14999a2dd95SBruce Richardson 	/* Clear out unused file descriptors */
15099a2dd95SBruce Richardson 	while (got_fds < max_fds)
15199a2dd95SBruce Richardson 		fds[got_fds++] = -1;
15299a2dd95SBruce Richardson 
15399a2dd95SBruce Richardson 	return ret;
15499a2dd95SBruce Richardson }
15599a2dd95SBruce Richardson 
15699a2dd95SBruce Richardson int
157c85c35b1SMaxime Coquelin send_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int fd_num)
15899a2dd95SBruce Richardson {
15999a2dd95SBruce Richardson 
16099a2dd95SBruce Richardson 	struct iovec iov;
16199a2dd95SBruce Richardson 	struct msghdr msgh;
16299a2dd95SBruce Richardson 	size_t fdsize = fd_num * sizeof(int);
16399a2dd95SBruce Richardson 	char control[CMSG_SPACE(fdsize)];
16499a2dd95SBruce Richardson 	struct cmsghdr *cmsg;
16599a2dd95SBruce Richardson 	int ret;
16699a2dd95SBruce Richardson 
16799a2dd95SBruce Richardson 	memset(&msgh, 0, sizeof(msgh));
16899a2dd95SBruce Richardson 	iov.iov_base = buf;
16999a2dd95SBruce Richardson 	iov.iov_len = buflen;
17099a2dd95SBruce Richardson 
17199a2dd95SBruce Richardson 	msgh.msg_iov = &iov;
17299a2dd95SBruce Richardson 	msgh.msg_iovlen = 1;
17399a2dd95SBruce Richardson 
17499a2dd95SBruce Richardson 	if (fds && fd_num > 0) {
17599a2dd95SBruce Richardson 		msgh.msg_control = control;
17699a2dd95SBruce Richardson 		msgh.msg_controllen = sizeof(control);
17799a2dd95SBruce Richardson 		cmsg = CMSG_FIRSTHDR(&msgh);
17899a2dd95SBruce Richardson 		if (cmsg == NULL) {
1790e21c7c0SDavid Marchand 			VHOST_CONFIG_LOG(ifname, ERR, "cmsg == NULL");
18099a2dd95SBruce Richardson 			errno = EINVAL;
18199a2dd95SBruce Richardson 			return -1;
18299a2dd95SBruce Richardson 		}
18399a2dd95SBruce Richardson 		cmsg->cmsg_len = CMSG_LEN(fdsize);
18499a2dd95SBruce Richardson 		cmsg->cmsg_level = SOL_SOCKET;
18599a2dd95SBruce Richardson 		cmsg->cmsg_type = SCM_RIGHTS;
18699a2dd95SBruce Richardson 		memcpy(CMSG_DATA(cmsg), fds, fdsize);
18799a2dd95SBruce Richardson 	} else {
18899a2dd95SBruce Richardson 		msgh.msg_control = NULL;
18999a2dd95SBruce Richardson 		msgh.msg_controllen = 0;
19099a2dd95SBruce Richardson 	}
19199a2dd95SBruce Richardson 
19299a2dd95SBruce Richardson 	do {
19399a2dd95SBruce Richardson 		ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
19499a2dd95SBruce Richardson 	} while (ret < 0 && errno == EINTR);
19599a2dd95SBruce Richardson 
19699a2dd95SBruce Richardson 	if (ret < 0) {
1970e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(ifname, ERR, "sendmsg error on fd %d (%s)",
19836c525a0SDavid Marchand 			sockfd, strerror(errno));
19999a2dd95SBruce Richardson 		return ret;
20099a2dd95SBruce Richardson 	}
20199a2dd95SBruce Richardson 
20299a2dd95SBruce Richardson 	return ret;
20399a2dd95SBruce Richardson }
20499a2dd95SBruce Richardson 
20599a2dd95SBruce Richardson static void
20699a2dd95SBruce Richardson vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
20799a2dd95SBruce Richardson {
20899a2dd95SBruce Richardson 	int vid;
20999a2dd95SBruce Richardson 	size_t size;
21099a2dd95SBruce Richardson 	struct vhost_user_connection *conn;
21199a2dd95SBruce Richardson 	int ret;
21299a2dd95SBruce Richardson 	struct virtio_net *dev;
21399a2dd95SBruce Richardson 
21499a2dd95SBruce Richardson 	if (vsocket == NULL)
21599a2dd95SBruce Richardson 		return;
21699a2dd95SBruce Richardson 
21799a2dd95SBruce Richardson 	conn = malloc(sizeof(*conn));
21899a2dd95SBruce Richardson 	if (conn == NULL) {
21999a2dd95SBruce Richardson 		close(fd);
22099a2dd95SBruce Richardson 		return;
22199a2dd95SBruce Richardson 	}
22299a2dd95SBruce Richardson 
2234dbf9316SMaxime Coquelin 	vid = vhost_user_new_device();
22499a2dd95SBruce Richardson 	if (vid == -1) {
22599a2dd95SBruce Richardson 		goto err;
22699a2dd95SBruce Richardson 	}
22799a2dd95SBruce Richardson 
22899a2dd95SBruce Richardson 	size = strnlen(vsocket->path, PATH_MAX);
22999a2dd95SBruce Richardson 	vhost_set_ifname(vid, vsocket->path, size);
23099a2dd95SBruce Richardson 
231ca7036b4SDavid Marchand 	vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net,
2321a44f67aSDavid Marchand 		vsocket->net_compliant_ol_flags, vsocket->stats_enabled,
2331a44f67aSDavid Marchand 		vsocket->iommu_support);
23499a2dd95SBruce Richardson 
23599a2dd95SBruce Richardson 	vhost_attach_vdpa_device(vid, vsocket->vdpa_dev);
23699a2dd95SBruce Richardson 
23799a2dd95SBruce Richardson 	if (vsocket->extbuf)
23899a2dd95SBruce Richardson 		vhost_enable_extbuf(vid);
23999a2dd95SBruce Richardson 
24099a2dd95SBruce Richardson 	if (vsocket->linearbuf)
24199a2dd95SBruce Richardson 		vhost_enable_linearbuf(vid);
24299a2dd95SBruce Richardson 
24399a2dd95SBruce Richardson 	if (vsocket->async_copy) {
24499a2dd95SBruce Richardson 		dev = get_device(vid);
24599a2dd95SBruce Richardson 
24699a2dd95SBruce Richardson 		if (dev)
24799a2dd95SBruce Richardson 			dev->async_copy = 1;
24899a2dd95SBruce Richardson 	}
24999a2dd95SBruce Richardson 
2500e21c7c0SDavid Marchand 	VHOST_CONFIG_LOG(vsocket->path, INFO, "new device, handle is %d", vid);
25199a2dd95SBruce Richardson 
25299a2dd95SBruce Richardson 	if (vsocket->notify_ops->new_connection) {
25399a2dd95SBruce Richardson 		ret = vsocket->notify_ops->new_connection(vid);
25499a2dd95SBruce Richardson 		if (ret < 0) {
2550e21c7c0SDavid Marchand 			VHOST_CONFIG_LOG(vsocket->path, ERR,
2560e21c7c0SDavid Marchand 				"failed to add vhost user connection with fd %d",
25736c525a0SDavid Marchand 				fd);
25899a2dd95SBruce Richardson 			goto err_cleanup;
25999a2dd95SBruce Richardson 		}
26099a2dd95SBruce Richardson 	}
26199a2dd95SBruce Richardson 
26299a2dd95SBruce Richardson 	conn->connfd = fd;
26399a2dd95SBruce Richardson 	conn->vsocket = vsocket;
26499a2dd95SBruce Richardson 	conn->vid = vid;
265e68a6feaSMaxime Coquelin 	ret = fdset_add(vhost_user.fdset, fd, vhost_user_read_cb,
26699a2dd95SBruce Richardson 			NULL, conn);
26799a2dd95SBruce Richardson 	if (ret < 0) {
2680e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(vsocket->path, ERR,
2690e21c7c0SDavid Marchand 			"failed to add fd %d into vhost server fdset",
27036c525a0SDavid Marchand 			fd);
27199a2dd95SBruce Richardson 
27299a2dd95SBruce Richardson 		if (vsocket->notify_ops->destroy_connection)
27399a2dd95SBruce Richardson 			vsocket->notify_ops->destroy_connection(conn->vid);
27499a2dd95SBruce Richardson 
27599a2dd95SBruce Richardson 		goto err_cleanup;
27699a2dd95SBruce Richardson 	}
27799a2dd95SBruce Richardson 
27899a2dd95SBruce Richardson 	pthread_mutex_lock(&vsocket->conn_mutex);
27999a2dd95SBruce Richardson 	TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
28099a2dd95SBruce Richardson 	pthread_mutex_unlock(&vsocket->conn_mutex);
28199a2dd95SBruce Richardson 
28299a2dd95SBruce Richardson 	return;
28399a2dd95SBruce Richardson 
28499a2dd95SBruce Richardson err_cleanup:
28599a2dd95SBruce Richardson 	vhost_destroy_device(vid);
28699a2dd95SBruce Richardson err:
28799a2dd95SBruce Richardson 	free(conn);
28899a2dd95SBruce Richardson 	close(fd);
28999a2dd95SBruce Richardson }
29099a2dd95SBruce Richardson 
29199a2dd95SBruce Richardson /* call back when there is new vhost-user connection from client  */
29299a2dd95SBruce Richardson static void
29399a2dd95SBruce Richardson vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
29499a2dd95SBruce Richardson {
29599a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket = dat;
29699a2dd95SBruce Richardson 
29799a2dd95SBruce Richardson 	fd = accept(fd, NULL, NULL);
29899a2dd95SBruce Richardson 	if (fd < 0)
29999a2dd95SBruce Richardson 		return;
30099a2dd95SBruce Richardson 
3010e21c7c0SDavid Marchand 	VHOST_CONFIG_LOG(vsocket->path, INFO, "new vhost user connection is %d", fd);
30299a2dd95SBruce Richardson 	vhost_user_add_connection(fd, vsocket);
30399a2dd95SBruce Richardson }
30499a2dd95SBruce Richardson 
30599a2dd95SBruce Richardson static void
30699a2dd95SBruce Richardson vhost_user_read_cb(int connfd, void *dat, int *remove)
30799a2dd95SBruce Richardson {
30899a2dd95SBruce Richardson 	struct vhost_user_connection *conn = dat;
30999a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket = conn->vsocket;
31099a2dd95SBruce Richardson 	int ret;
31199a2dd95SBruce Richardson 
31299a2dd95SBruce Richardson 	ret = vhost_user_msg_handler(conn->vid, connfd);
31399a2dd95SBruce Richardson 	if (ret < 0) {
31499a2dd95SBruce Richardson 		struct virtio_net *dev = get_device(conn->vid);
31599a2dd95SBruce Richardson 
31699a2dd95SBruce Richardson 		close(connfd);
31799a2dd95SBruce Richardson 		*remove = 1;
31899a2dd95SBruce Richardson 
31999a2dd95SBruce Richardson 		if (dev)
32099a2dd95SBruce Richardson 			vhost_destroy_device_notify(dev);
32199a2dd95SBruce Richardson 
32299a2dd95SBruce Richardson 		if (vsocket->notify_ops->destroy_connection)
32399a2dd95SBruce Richardson 			vsocket->notify_ops->destroy_connection(conn->vid);
32499a2dd95SBruce Richardson 
32599a2dd95SBruce Richardson 		vhost_destroy_device(conn->vid);
32699a2dd95SBruce Richardson 
32799a2dd95SBruce Richardson 		if (vsocket->reconnect) {
32899a2dd95SBruce Richardson 			create_unix_socket(vsocket);
32999a2dd95SBruce Richardson 			vhost_user_start_client(vsocket);
33099a2dd95SBruce Richardson 		}
33199a2dd95SBruce Richardson 
33299a2dd95SBruce Richardson 		pthread_mutex_lock(&vsocket->conn_mutex);
33399a2dd95SBruce Richardson 		TAILQ_REMOVE(&vsocket->conn_list, conn, next);
33499a2dd95SBruce Richardson 		pthread_mutex_unlock(&vsocket->conn_mutex);
33599a2dd95SBruce Richardson 
33699a2dd95SBruce Richardson 		free(conn);
33799a2dd95SBruce Richardson 	}
33899a2dd95SBruce Richardson }
33999a2dd95SBruce Richardson 
34099a2dd95SBruce Richardson static int
34199a2dd95SBruce Richardson create_unix_socket(struct vhost_user_socket *vsocket)
34299a2dd95SBruce Richardson {
34399a2dd95SBruce Richardson 	int fd;
34499a2dd95SBruce Richardson 	struct sockaddr_un *un = &vsocket->un;
34599a2dd95SBruce Richardson 
34699a2dd95SBruce Richardson 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
34799a2dd95SBruce Richardson 	if (fd < 0)
34899a2dd95SBruce Richardson 		return -1;
3490e21c7c0SDavid Marchand 	VHOST_CONFIG_LOG(vsocket->path, INFO, "vhost-user %s: socket created, fd: %d",
35036c525a0SDavid Marchand 		vsocket->is_server ? "server" : "client", fd);
35199a2dd95SBruce Richardson 
35299a2dd95SBruce Richardson 	if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
3530e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(vsocket->path, ERR,
3540e21c7c0SDavid Marchand 			"vhost-user: can't set nonblocking mode for socket, fd: %d (%s)",
35536c525a0SDavid Marchand 			fd, strerror(errno));
35699a2dd95SBruce Richardson 		close(fd);
35799a2dd95SBruce Richardson 		return -1;
35899a2dd95SBruce Richardson 	}
35999a2dd95SBruce Richardson 
36099a2dd95SBruce Richardson 	memset(un, 0, sizeof(*un));
36199a2dd95SBruce Richardson 	un->sun_family = AF_UNIX;
362c171a2d5SStephen Hemminger 	strlcpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
36399a2dd95SBruce Richardson 
36499a2dd95SBruce Richardson 	vsocket->socket_fd = fd;
36599a2dd95SBruce Richardson 	return 0;
36699a2dd95SBruce Richardson }
36799a2dd95SBruce Richardson 
36899a2dd95SBruce Richardson static int
36999a2dd95SBruce Richardson vhost_user_start_server(struct vhost_user_socket *vsocket)
37099a2dd95SBruce Richardson {
37199a2dd95SBruce Richardson 	int ret;
37299a2dd95SBruce Richardson 	int fd = vsocket->socket_fd;
37399a2dd95SBruce Richardson 	const char *path = vsocket->path;
37499a2dd95SBruce Richardson 
37599a2dd95SBruce Richardson 	/*
37699a2dd95SBruce Richardson 	 * bind () may fail if the socket file with the same name already
37799a2dd95SBruce Richardson 	 * exists. But the library obviously should not delete the file
37899a2dd95SBruce Richardson 	 * provided by the user, since we can not be sure that it is not
37999a2dd95SBruce Richardson 	 * being used by other applications. Moreover, many applications form
38099a2dd95SBruce Richardson 	 * socket names based on user input, which is prone to errors.
38199a2dd95SBruce Richardson 	 *
38299a2dd95SBruce Richardson 	 * The user must ensure that the socket does not exist before
38399a2dd95SBruce Richardson 	 * registering the vhost driver in server mode.
38499a2dd95SBruce Richardson 	 */
38599a2dd95SBruce Richardson 	ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
38699a2dd95SBruce Richardson 	if (ret < 0) {
3870e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "failed to bind: %s; remove it and try again",
38836c525a0SDavid Marchand 			strerror(errno));
38999a2dd95SBruce Richardson 		goto err;
39099a2dd95SBruce Richardson 	}
3910e21c7c0SDavid Marchand 	VHOST_CONFIG_LOG(path, INFO, "binding succeeded");
39299a2dd95SBruce Richardson 
39399a2dd95SBruce Richardson 	ret = listen(fd, MAX_VIRTIO_BACKLOG);
39499a2dd95SBruce Richardson 	if (ret < 0)
39599a2dd95SBruce Richardson 		goto err;
39699a2dd95SBruce Richardson 
397e68a6feaSMaxime Coquelin 	ret = fdset_add(vhost_user.fdset, fd, vhost_user_server_new_connection,
39899a2dd95SBruce Richardson 		  NULL, vsocket);
39999a2dd95SBruce Richardson 	if (ret < 0) {
4000e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "failed to add listen fd %d to vhost server fdset",
40136c525a0SDavid Marchand 			fd);
40299a2dd95SBruce Richardson 		goto err;
40399a2dd95SBruce Richardson 	}
40499a2dd95SBruce Richardson 
40599a2dd95SBruce Richardson 	return 0;
40699a2dd95SBruce Richardson 
40799a2dd95SBruce Richardson err:
40899a2dd95SBruce Richardson 	close(fd);
40999a2dd95SBruce Richardson 	return -1;
41099a2dd95SBruce Richardson }
41199a2dd95SBruce Richardson 
41299a2dd95SBruce Richardson struct vhost_user_reconnect {
41399a2dd95SBruce Richardson 	struct sockaddr_un un;
41499a2dd95SBruce Richardson 	int fd;
41599a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
41699a2dd95SBruce Richardson 
41799a2dd95SBruce Richardson 	TAILQ_ENTRY(vhost_user_reconnect) next;
41899a2dd95SBruce Richardson };
41999a2dd95SBruce Richardson 
42099a2dd95SBruce Richardson TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
42199a2dd95SBruce Richardson struct vhost_user_reconnect_list {
42299a2dd95SBruce Richardson 	struct vhost_user_reconnect_tailq_list head;
42399a2dd95SBruce Richardson 	pthread_mutex_t mutex;
42499a2dd95SBruce Richardson };
42599a2dd95SBruce Richardson 
42699a2dd95SBruce Richardson static struct vhost_user_reconnect_list reconn_list;
4271c1abf17SThomas Monjalon static rte_thread_t reconn_tid;
42899a2dd95SBruce Richardson 
42999a2dd95SBruce Richardson static int
430c85c35b1SMaxime Coquelin vhost_user_connect_nonblock(char *path, int fd, struct sockaddr *un, size_t sz)
43199a2dd95SBruce Richardson {
43299a2dd95SBruce Richardson 	int ret, flags;
43399a2dd95SBruce Richardson 
43499a2dd95SBruce Richardson 	ret = connect(fd, un, sz);
43599a2dd95SBruce Richardson 	if (ret < 0 && errno != EISCONN)
43699a2dd95SBruce Richardson 		return -1;
43799a2dd95SBruce Richardson 
43899a2dd95SBruce Richardson 	flags = fcntl(fd, F_GETFL, 0);
43999a2dd95SBruce Richardson 	if (flags < 0) {
4400e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "can't get flags for connfd %d (%s)",
44136c525a0SDavid Marchand 			fd, strerror(errno));
44299a2dd95SBruce Richardson 		return -2;
44399a2dd95SBruce Richardson 	}
44499a2dd95SBruce Richardson 	if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
4450e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "can't disable nonblocking on fd %d", fd);
44699a2dd95SBruce Richardson 		return -2;
44799a2dd95SBruce Richardson 	}
44899a2dd95SBruce Richardson 	return 0;
44999a2dd95SBruce Richardson }
45099a2dd95SBruce Richardson 
4511c1abf17SThomas Monjalon static uint32_t
45299a2dd95SBruce Richardson vhost_user_client_reconnect(void *arg __rte_unused)
45399a2dd95SBruce Richardson {
45499a2dd95SBruce Richardson 	int ret;
45599a2dd95SBruce Richardson 	struct vhost_user_reconnect *reconn, *next;
45699a2dd95SBruce Richardson 
45799a2dd95SBruce Richardson 	while (1) {
45899a2dd95SBruce Richardson 		pthread_mutex_lock(&reconn_list.mutex);
45999a2dd95SBruce Richardson 
46099a2dd95SBruce Richardson 		/*
46199a2dd95SBruce Richardson 		 * An equal implementation of TAILQ_FOREACH_SAFE,
46299a2dd95SBruce Richardson 		 * which does not exist on all platforms.
46399a2dd95SBruce Richardson 		 */
46499a2dd95SBruce Richardson 		for (reconn = TAILQ_FIRST(&reconn_list.head);
46599a2dd95SBruce Richardson 		     reconn != NULL; reconn = next) {
46699a2dd95SBruce Richardson 			next = TAILQ_NEXT(reconn, next);
46799a2dd95SBruce Richardson 
468c85c35b1SMaxime Coquelin 			ret = vhost_user_connect_nonblock(reconn->vsocket->path, reconn->fd,
46999a2dd95SBruce Richardson 						(struct sockaddr *)&reconn->un,
47099a2dd95SBruce Richardson 						sizeof(reconn->un));
47199a2dd95SBruce Richardson 			if (ret == -2) {
47299a2dd95SBruce Richardson 				close(reconn->fd);
4730e21c7c0SDavid Marchand 				VHOST_CONFIG_LOG(reconn->vsocket->path, ERR,
4740e21c7c0SDavid Marchand 					"reconnection for fd %d failed",
47536c525a0SDavid Marchand 					reconn->fd);
47699a2dd95SBruce Richardson 				goto remove_fd;
47799a2dd95SBruce Richardson 			}
47899a2dd95SBruce Richardson 			if (ret == -1)
47999a2dd95SBruce Richardson 				continue;
48099a2dd95SBruce Richardson 
4810e21c7c0SDavid Marchand 			VHOST_CONFIG_LOG(reconn->vsocket->path, INFO, "connected");
48299a2dd95SBruce Richardson 			vhost_user_add_connection(reconn->fd, reconn->vsocket);
48399a2dd95SBruce Richardson remove_fd:
48499a2dd95SBruce Richardson 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
48599a2dd95SBruce Richardson 			free(reconn);
48699a2dd95SBruce Richardson 		}
48799a2dd95SBruce Richardson 
48899a2dd95SBruce Richardson 		pthread_mutex_unlock(&reconn_list.mutex);
48999a2dd95SBruce Richardson 		sleep(1);
49099a2dd95SBruce Richardson 	}
49199a2dd95SBruce Richardson 
4921c1abf17SThomas Monjalon 	return 0;
49399a2dd95SBruce Richardson }
49499a2dd95SBruce Richardson 
49599a2dd95SBruce Richardson static int
49699a2dd95SBruce Richardson vhost_user_reconnect_init(void)
49799a2dd95SBruce Richardson {
49899a2dd95SBruce Richardson 	int ret;
49999a2dd95SBruce Richardson 
500*4d2aa150SAriel Otilibili 	pthread_mutex_init(&reconn_list.mutex, NULL);
50199a2dd95SBruce Richardson 	TAILQ_INIT(&reconn_list.head);
50299a2dd95SBruce Richardson 
5031c1abf17SThomas Monjalon 	ret = rte_thread_create_internal_control(&reconn_tid, "vhost-reco",
50499a2dd95SBruce Richardson 			vhost_user_client_reconnect, NULL);
50599a2dd95SBruce Richardson 	if (ret != 0) {
5060e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG("thread", ERR, "failed to create reconnect thread");
507c85c35b1SMaxime Coquelin 		if (pthread_mutex_destroy(&reconn_list.mutex))
5080e21c7c0SDavid Marchand 			VHOST_CONFIG_LOG("thread", ERR,
5090e21c7c0SDavid Marchand 				"%s: failed to destroy reconnect mutex",
51036c525a0SDavid Marchand 				__func__);
51199a2dd95SBruce Richardson 	}
51299a2dd95SBruce Richardson 
51399a2dd95SBruce Richardson 	return ret;
51499a2dd95SBruce Richardson }
51599a2dd95SBruce Richardson 
51699a2dd95SBruce Richardson static int
51799a2dd95SBruce Richardson vhost_user_start_client(struct vhost_user_socket *vsocket)
51899a2dd95SBruce Richardson {
51999a2dd95SBruce Richardson 	int ret;
52099a2dd95SBruce Richardson 	int fd = vsocket->socket_fd;
52199a2dd95SBruce Richardson 	const char *path = vsocket->path;
52299a2dd95SBruce Richardson 	struct vhost_user_reconnect *reconn;
52399a2dd95SBruce Richardson 
524ff8989c4SDaniil Ushkov 	if (!vsocket->async_connect || !vsocket->reconnect) {
525ff8989c4SDaniil Ushkov 		ret = vhost_user_connect_nonblock(vsocket->path, fd,
526ff8989c4SDaniil Ushkov 			(struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
52799a2dd95SBruce Richardson 		if (ret == 0) {
52899a2dd95SBruce Richardson 			vhost_user_add_connection(fd, vsocket);
52999a2dd95SBruce Richardson 			return 0;
53099a2dd95SBruce Richardson 		}
53199a2dd95SBruce Richardson 
5320e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, WARNING, "failed to connect: %s", strerror(errno));
53399a2dd95SBruce Richardson 
53499a2dd95SBruce Richardson 		if (ret == -2 || !vsocket->reconnect) {
53599a2dd95SBruce Richardson 			close(fd);
53699a2dd95SBruce Richardson 			return -1;
53799a2dd95SBruce Richardson 		}
53899a2dd95SBruce Richardson 
5390e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, INFO, "reconnecting...");
540ff8989c4SDaniil Ushkov 	}
54199a2dd95SBruce Richardson 	reconn = malloc(sizeof(*reconn));
54299a2dd95SBruce Richardson 	if (reconn == NULL) {
5430e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "failed to allocate memory for reconnect");
54499a2dd95SBruce Richardson 		close(fd);
54599a2dd95SBruce Richardson 		return -1;
54699a2dd95SBruce Richardson 	}
54799a2dd95SBruce Richardson 	reconn->un = vsocket->un;
54899a2dd95SBruce Richardson 	reconn->fd = fd;
54999a2dd95SBruce Richardson 	reconn->vsocket = vsocket;
55099a2dd95SBruce Richardson 	pthread_mutex_lock(&reconn_list.mutex);
55199a2dd95SBruce Richardson 	TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
55299a2dd95SBruce Richardson 	pthread_mutex_unlock(&reconn_list.mutex);
55399a2dd95SBruce Richardson 
55499a2dd95SBruce Richardson 	return 0;
55599a2dd95SBruce Richardson }
55699a2dd95SBruce Richardson 
55799a2dd95SBruce Richardson static struct vhost_user_socket *
55899a2dd95SBruce Richardson find_vhost_user_socket(const char *path)
55999a2dd95SBruce Richardson {
56099a2dd95SBruce Richardson 	int i;
56199a2dd95SBruce Richardson 
56299a2dd95SBruce Richardson 	if (path == NULL)
56399a2dd95SBruce Richardson 		return NULL;
56499a2dd95SBruce Richardson 
56599a2dd95SBruce Richardson 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
56699a2dd95SBruce Richardson 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
56799a2dd95SBruce Richardson 
56899a2dd95SBruce Richardson 		if (!strcmp(vsocket->path, path))
56999a2dd95SBruce Richardson 			return vsocket;
57099a2dd95SBruce Richardson 	}
57199a2dd95SBruce Richardson 
57299a2dd95SBruce Richardson 	return NULL;
57399a2dd95SBruce Richardson }
57499a2dd95SBruce Richardson 
57599a2dd95SBruce Richardson int
57699a2dd95SBruce Richardson rte_vhost_driver_attach_vdpa_device(const char *path,
57799a2dd95SBruce Richardson 		struct rte_vdpa_device *dev)
57899a2dd95SBruce Richardson {
57999a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
58099a2dd95SBruce Richardson 
58199a2dd95SBruce Richardson 	if (dev == NULL || path == NULL)
58299a2dd95SBruce Richardson 		return -1;
58399a2dd95SBruce Richardson 
58499a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
58599a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
58699a2dd95SBruce Richardson 	if (vsocket)
58799a2dd95SBruce Richardson 		vsocket->vdpa_dev = dev;
58899a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
58999a2dd95SBruce Richardson 
59099a2dd95SBruce Richardson 	return vsocket ? 0 : -1;
59199a2dd95SBruce Richardson }
59299a2dd95SBruce Richardson 
59399a2dd95SBruce Richardson int
59499a2dd95SBruce Richardson rte_vhost_driver_detach_vdpa_device(const char *path)
59599a2dd95SBruce Richardson {
59699a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
59799a2dd95SBruce Richardson 
59899a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
59999a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
60099a2dd95SBruce Richardson 	if (vsocket)
60199a2dd95SBruce Richardson 		vsocket->vdpa_dev = NULL;
60299a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
60399a2dd95SBruce Richardson 
60499a2dd95SBruce Richardson 	return vsocket ? 0 : -1;
60599a2dd95SBruce Richardson }
60699a2dd95SBruce Richardson 
60799a2dd95SBruce Richardson struct rte_vdpa_device *
60899a2dd95SBruce Richardson rte_vhost_driver_get_vdpa_device(const char *path)
60999a2dd95SBruce Richardson {
61099a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
61199a2dd95SBruce Richardson 	struct rte_vdpa_device *dev = NULL;
61299a2dd95SBruce Richardson 
61399a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
61499a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
61599a2dd95SBruce Richardson 	if (vsocket)
61699a2dd95SBruce Richardson 		dev = vsocket->vdpa_dev;
61799a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
61899a2dd95SBruce Richardson 
61999a2dd95SBruce Richardson 	return dev;
62099a2dd95SBruce Richardson }
62199a2dd95SBruce Richardson 
62299a2dd95SBruce Richardson int
623486f65e6SAndy Pei rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
624486f65e6SAndy Pei {
625486f65e6SAndy Pei 	struct vhost_user_socket *vsocket;
626486f65e6SAndy Pei 	struct rte_vdpa_device *vdpa_dev;
627486f65e6SAndy Pei 	int ret = 0;
628486f65e6SAndy Pei 
629486f65e6SAndy Pei 	pthread_mutex_lock(&vhost_user.mutex);
630486f65e6SAndy Pei 	vsocket = find_vhost_user_socket(path);
631486f65e6SAndy Pei 	if (!vsocket) {
6320e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet.");
633486f65e6SAndy Pei 		ret = -1;
634486f65e6SAndy Pei 		goto unlock_exit;
635486f65e6SAndy Pei 	}
636486f65e6SAndy Pei 
637486f65e6SAndy Pei 	vdpa_dev = vsocket->vdpa_dev;
638486f65e6SAndy Pei 	if (!vdpa_dev) {
639486f65e6SAndy Pei 		ret = -1;
640486f65e6SAndy Pei 		goto unlock_exit;
641486f65e6SAndy Pei 	}
642486f65e6SAndy Pei 
643f92ab3f0SAndy Pei 	*type = vdpa_dev->type;
644486f65e6SAndy Pei 
645486f65e6SAndy Pei unlock_exit:
646486f65e6SAndy Pei 	pthread_mutex_unlock(&vhost_user.mutex);
647486f65e6SAndy Pei 	return ret;
648486f65e6SAndy Pei }
649486f65e6SAndy Pei 
650486f65e6SAndy Pei int
65199a2dd95SBruce Richardson rte_vhost_driver_disable_features(const char *path, uint64_t features)
65299a2dd95SBruce Richardson {
65399a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
65499a2dd95SBruce Richardson 
65599a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
65699a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
65799a2dd95SBruce Richardson 
65899a2dd95SBruce Richardson 	/* Note that use_builtin_virtio_net is not affected by this function
65999a2dd95SBruce Richardson 	 * since callers may want to selectively disable features of the
66099a2dd95SBruce Richardson 	 * built-in vhost net device backend.
66199a2dd95SBruce Richardson 	 */
66299a2dd95SBruce Richardson 
66399a2dd95SBruce Richardson 	if (vsocket)
66499a2dd95SBruce Richardson 		vsocket->features &= ~features;
66599a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
66699a2dd95SBruce Richardson 
66799a2dd95SBruce Richardson 	return vsocket ? 0 : -1;
66899a2dd95SBruce Richardson }
66999a2dd95SBruce Richardson 
67099a2dd95SBruce Richardson int
67199a2dd95SBruce Richardson rte_vhost_driver_enable_features(const char *path, uint64_t features)
67299a2dd95SBruce Richardson {
67399a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
67499a2dd95SBruce Richardson 
67599a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
67699a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
67799a2dd95SBruce Richardson 	if (vsocket) {
67899a2dd95SBruce Richardson 		if ((vsocket->supported_features & features) != features) {
67999a2dd95SBruce Richardson 			/*
68099a2dd95SBruce Richardson 			 * trying to enable features the driver doesn't
68199a2dd95SBruce Richardson 			 * support.
68299a2dd95SBruce Richardson 			 */
68399a2dd95SBruce Richardson 			pthread_mutex_unlock(&vhost_user.mutex);
68499a2dd95SBruce Richardson 			return -1;
68599a2dd95SBruce Richardson 		}
68699a2dd95SBruce Richardson 		vsocket->features |= features;
68799a2dd95SBruce Richardson 	}
68899a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
68999a2dd95SBruce Richardson 
69099a2dd95SBruce Richardson 	return vsocket ? 0 : -1;
69199a2dd95SBruce Richardson }
69299a2dd95SBruce Richardson 
69399a2dd95SBruce Richardson int
69499a2dd95SBruce Richardson rte_vhost_driver_set_features(const char *path, uint64_t features)
69599a2dd95SBruce Richardson {
69699a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
69799a2dd95SBruce Richardson 
69899a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
69999a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
70099a2dd95SBruce Richardson 	if (vsocket) {
70199a2dd95SBruce Richardson 		vsocket->supported_features = features;
70299a2dd95SBruce Richardson 		vsocket->features = features;
70399a2dd95SBruce Richardson 
70499a2dd95SBruce Richardson 		/* Anyone setting feature bits is implementing their own vhost
70599a2dd95SBruce Richardson 		 * device backend.
70699a2dd95SBruce Richardson 		 */
70799a2dd95SBruce Richardson 		vsocket->use_builtin_virtio_net = false;
70899a2dd95SBruce Richardson 	}
70999a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
71099a2dd95SBruce Richardson 
71199a2dd95SBruce Richardson 	return vsocket ? 0 : -1;
71299a2dd95SBruce Richardson }
71399a2dd95SBruce Richardson 
71499a2dd95SBruce Richardson int
71599a2dd95SBruce Richardson rte_vhost_driver_get_features(const char *path, uint64_t *features)
71699a2dd95SBruce Richardson {
71799a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
71899a2dd95SBruce Richardson 	uint64_t vdpa_features;
71999a2dd95SBruce Richardson 	struct rte_vdpa_device *vdpa_dev;
72099a2dd95SBruce Richardson 	int ret = 0;
72199a2dd95SBruce Richardson 
72299a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
72399a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
72499a2dd95SBruce Richardson 	if (!vsocket) {
7250e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet.");
72699a2dd95SBruce Richardson 		ret = -1;
72799a2dd95SBruce Richardson 		goto unlock_exit;
72899a2dd95SBruce Richardson 	}
72999a2dd95SBruce Richardson 
73099a2dd95SBruce Richardson 	vdpa_dev = vsocket->vdpa_dev;
73199a2dd95SBruce Richardson 	if (!vdpa_dev) {
73299a2dd95SBruce Richardson 		*features = vsocket->features;
73399a2dd95SBruce Richardson 		goto unlock_exit;
73499a2dd95SBruce Richardson 	}
73599a2dd95SBruce Richardson 
73699a2dd95SBruce Richardson 	if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) {
7370e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "failed to get vdpa features for socket file.");
73899a2dd95SBruce Richardson 		ret = -1;
73999a2dd95SBruce Richardson 		goto unlock_exit;
74099a2dd95SBruce Richardson 	}
74199a2dd95SBruce Richardson 
74299a2dd95SBruce Richardson 	*features = vsocket->features & vdpa_features;
74399a2dd95SBruce Richardson 
74499a2dd95SBruce Richardson unlock_exit:
74599a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
74699a2dd95SBruce Richardson 	return ret;
74799a2dd95SBruce Richardson }
74899a2dd95SBruce Richardson 
74999a2dd95SBruce Richardson int
75099a2dd95SBruce Richardson rte_vhost_driver_set_protocol_features(const char *path,
75199a2dd95SBruce Richardson 		uint64_t protocol_features)
75299a2dd95SBruce Richardson {
75399a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
75499a2dd95SBruce Richardson 
75599a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
75699a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
75799a2dd95SBruce Richardson 	if (vsocket)
75899a2dd95SBruce Richardson 		vsocket->protocol_features = protocol_features;
75999a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
76099a2dd95SBruce Richardson 	return vsocket ? 0 : -1;
76199a2dd95SBruce Richardson }
76299a2dd95SBruce Richardson 
76399a2dd95SBruce Richardson int
76499a2dd95SBruce Richardson rte_vhost_driver_get_protocol_features(const char *path,
76599a2dd95SBruce Richardson 		uint64_t *protocol_features)
76699a2dd95SBruce Richardson {
76799a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
76899a2dd95SBruce Richardson 	uint64_t vdpa_protocol_features;
76999a2dd95SBruce Richardson 	struct rte_vdpa_device *vdpa_dev;
77099a2dd95SBruce Richardson 	int ret = 0;
77199a2dd95SBruce Richardson 
77299a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
77399a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
77499a2dd95SBruce Richardson 	if (!vsocket) {
7750e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet.");
77699a2dd95SBruce Richardson 		ret = -1;
77799a2dd95SBruce Richardson 		goto unlock_exit;
77899a2dd95SBruce Richardson 	}
77999a2dd95SBruce Richardson 
78099a2dd95SBruce Richardson 	vdpa_dev = vsocket->vdpa_dev;
78199a2dd95SBruce Richardson 	if (!vdpa_dev) {
78299a2dd95SBruce Richardson 		*protocol_features = vsocket->protocol_features;
78399a2dd95SBruce Richardson 		goto unlock_exit;
78499a2dd95SBruce Richardson 	}
78599a2dd95SBruce Richardson 
78699a2dd95SBruce Richardson 	if (vdpa_dev->ops->get_protocol_features(vdpa_dev,
78799a2dd95SBruce Richardson 				&vdpa_protocol_features) < 0) {
7880e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "failed to get vdpa protocol features.");
78999a2dd95SBruce Richardson 		ret = -1;
79099a2dd95SBruce Richardson 		goto unlock_exit;
79199a2dd95SBruce Richardson 	}
79299a2dd95SBruce Richardson 
79399a2dd95SBruce Richardson 	*protocol_features = vsocket->protocol_features
79499a2dd95SBruce Richardson 		& vdpa_protocol_features;
79599a2dd95SBruce Richardson 
79699a2dd95SBruce Richardson unlock_exit:
79799a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
79899a2dd95SBruce Richardson 	return ret;
79999a2dd95SBruce Richardson }
80099a2dd95SBruce Richardson 
80199a2dd95SBruce Richardson int
80299a2dd95SBruce Richardson rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
80399a2dd95SBruce Richardson {
80499a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
80599a2dd95SBruce Richardson 	uint32_t vdpa_queue_num;
80699a2dd95SBruce Richardson 	struct rte_vdpa_device *vdpa_dev;
80799a2dd95SBruce Richardson 	int ret = 0;
80899a2dd95SBruce Richardson 
80999a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
81099a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
81199a2dd95SBruce Richardson 	if (!vsocket) {
8120e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet.");
81399a2dd95SBruce Richardson 		ret = -1;
81499a2dd95SBruce Richardson 		goto unlock_exit;
81599a2dd95SBruce Richardson 	}
81699a2dd95SBruce Richardson 
81799a2dd95SBruce Richardson 	vdpa_dev = vsocket->vdpa_dev;
81899a2dd95SBruce Richardson 	if (!vdpa_dev) {
8194aa1f88aSMaxime Coquelin 		*queue_num = vsocket->max_queue_pairs;
82099a2dd95SBruce Richardson 		goto unlock_exit;
82199a2dd95SBruce Richardson 	}
82299a2dd95SBruce Richardson 
82399a2dd95SBruce Richardson 	if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) {
8240e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "failed to get vdpa queue number.");
82599a2dd95SBruce Richardson 		ret = -1;
82699a2dd95SBruce Richardson 		goto unlock_exit;
82799a2dd95SBruce Richardson 	}
82899a2dd95SBruce Richardson 
8294aa1f88aSMaxime Coquelin 	*queue_num = RTE_MIN(vsocket->max_queue_pairs, vdpa_queue_num);
8304aa1f88aSMaxime Coquelin 
8314aa1f88aSMaxime Coquelin unlock_exit:
8324aa1f88aSMaxime Coquelin 	pthread_mutex_unlock(&vhost_user.mutex);
8334aa1f88aSMaxime Coquelin 	return ret;
8344aa1f88aSMaxime Coquelin }
8354aa1f88aSMaxime Coquelin 
8364aa1f88aSMaxime Coquelin int
8374aa1f88aSMaxime Coquelin rte_vhost_driver_set_max_queue_num(const char *path, uint32_t max_queue_pairs)
8384aa1f88aSMaxime Coquelin {
8394aa1f88aSMaxime Coquelin 	struct vhost_user_socket *vsocket;
8404aa1f88aSMaxime Coquelin 	int ret = 0;
8414aa1f88aSMaxime Coquelin 
8424aa1f88aSMaxime Coquelin 	pthread_mutex_lock(&vhost_user.mutex);
8434aa1f88aSMaxime Coquelin 	vsocket = find_vhost_user_socket(path);
8444aa1f88aSMaxime Coquelin 	if (!vsocket) {
8450e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "socket file is not registered yet.");
8464aa1f88aSMaxime Coquelin 		ret = -1;
8474aa1f88aSMaxime Coquelin 		goto unlock_exit;
8484aa1f88aSMaxime Coquelin 	}
8494aa1f88aSMaxime Coquelin 
850e1808999SMaxime Coquelin 	/*
851e1808999SMaxime Coquelin 	 * This is only useful for VDUSE for which number of virtqueues is set
852e1808999SMaxime Coquelin 	 * by the backend. For Vhost-user, the number of virtqueues is defined
853e1808999SMaxime Coquelin 	 * by the frontend.
854e1808999SMaxime Coquelin 	 */
855e1808999SMaxime Coquelin 	if (!vsocket->is_vduse) {
856e1808999SMaxime Coquelin 		VHOST_CONFIG_LOG(path, DEBUG,
857e1808999SMaxime Coquelin 				"Keeping %u max queue pairs for Vhost-user backend",
858e1808999SMaxime Coquelin 				VHOST_MAX_QUEUE_PAIRS);
859e1808999SMaxime Coquelin 		goto unlock_exit;
860e1808999SMaxime Coquelin 	}
861e1808999SMaxime Coquelin 
86227429219SMaxime Coquelin 	VHOST_CONFIG_LOG(path, INFO, "Setting max queue pairs to %u", max_queue_pairs);
86327429219SMaxime Coquelin 
86427429219SMaxime Coquelin 	if (max_queue_pairs > VHOST_MAX_QUEUE_PAIRS) {
86527429219SMaxime Coquelin 		VHOST_CONFIG_LOG(path, ERR, "Library only supports up to %u queue pairs",
86627429219SMaxime Coquelin 				VHOST_MAX_QUEUE_PAIRS);
86727429219SMaxime Coquelin 		ret = -1;
86827429219SMaxime Coquelin 		goto unlock_exit;
86927429219SMaxime Coquelin 	}
87027429219SMaxime Coquelin 
8714aa1f88aSMaxime Coquelin 	vsocket->max_queue_pairs = max_queue_pairs;
87299a2dd95SBruce Richardson 
87399a2dd95SBruce Richardson unlock_exit:
87499a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
87599a2dd95SBruce Richardson 	return ret;
87699a2dd95SBruce Richardson }
87799a2dd95SBruce Richardson 
87899a2dd95SBruce Richardson static void
87999a2dd95SBruce Richardson vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
88099a2dd95SBruce Richardson {
881d761d455SEelco Chaudron 	if (vsocket == NULL)
882d761d455SEelco Chaudron 		return;
88399a2dd95SBruce Richardson 
884d761d455SEelco Chaudron 	free(vsocket->path);
88599a2dd95SBruce Richardson 	free(vsocket);
88699a2dd95SBruce Richardson }
88799a2dd95SBruce Richardson 
88899a2dd95SBruce Richardson /*
88999a2dd95SBruce Richardson  * Register a new vhost-user socket; here we could act as server
89099a2dd95SBruce Richardson  * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
89199a2dd95SBruce Richardson  * is set.
89299a2dd95SBruce Richardson  */
89399a2dd95SBruce Richardson int
89499a2dd95SBruce Richardson rte_vhost_driver_register(const char *path, uint64_t flags)
89599a2dd95SBruce Richardson {
89699a2dd95SBruce Richardson 	int ret = -1;
89799a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
89899a2dd95SBruce Richardson 
89999a2dd95SBruce Richardson 	if (!path)
90099a2dd95SBruce Richardson 		return -1;
90199a2dd95SBruce Richardson 
90299a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
90399a2dd95SBruce Richardson 
90499a2dd95SBruce Richardson 	if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
9050e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "the number of vhost sockets reaches maximum");
90699a2dd95SBruce Richardson 		goto out;
90799a2dd95SBruce Richardson 	}
90899a2dd95SBruce Richardson 
90999a2dd95SBruce Richardson 	vsocket = malloc(sizeof(struct vhost_user_socket));
91099a2dd95SBruce Richardson 	if (!vsocket)
91199a2dd95SBruce Richardson 		goto out;
91299a2dd95SBruce Richardson 	memset(vsocket, 0, sizeof(struct vhost_user_socket));
91399a2dd95SBruce Richardson 	vsocket->path = strdup(path);
91499a2dd95SBruce Richardson 	if (vsocket->path == NULL) {
9150e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "failed to copy socket path string");
91699a2dd95SBruce Richardson 		vhost_user_socket_mem_free(vsocket);
91799a2dd95SBruce Richardson 		goto out;
91899a2dd95SBruce Richardson 	}
91999a2dd95SBruce Richardson 	TAILQ_INIT(&vsocket->conn_list);
920*4d2aa150SAriel Otilibili 	pthread_mutex_init(&vsocket->conn_mutex, NULL);
9214789eb43SMaxime Coquelin 
9224789eb43SMaxime Coquelin 	if (!strncmp("/dev/vduse/", path, strlen("/dev/vduse/")))
9234789eb43SMaxime Coquelin 		vsocket->is_vduse = true;
9244789eb43SMaxime Coquelin 
92599a2dd95SBruce Richardson 	vsocket->vdpa_dev = NULL;
9264aa1f88aSMaxime Coquelin 	vsocket->max_queue_pairs = VHOST_MAX_QUEUE_PAIRS;
92799a2dd95SBruce Richardson 	vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT;
92899a2dd95SBruce Richardson 	vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT;
92999a2dd95SBruce Richardson 	vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
930ca7036b4SDavid Marchand 	vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
931be75dc99SMaxime Coquelin 	vsocket->stats_enabled = flags & RTE_VHOST_USER_NET_STATS_ENABLE;
932ff8989c4SDaniil Ushkov 	vsocket->async_connect = flags & RTE_VHOST_USER_ASYNC_CONNECT;
933fcfc1301SMaxime Coquelin 	if (vsocket->is_vduse)
934fcfc1301SMaxime Coquelin 		vsocket->iommu_support = true;
935fcfc1301SMaxime Coquelin 	else
9361a44f67aSDavid Marchand 		vsocket->iommu_support = flags & RTE_VHOST_USER_IOMMU_SUPPORT;
93799a2dd95SBruce Richardson 
938fcfc1301SMaxime Coquelin 	if (vsocket->async_copy && (vsocket->iommu_support ||
939fcfc1301SMaxime Coquelin 				(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
9400e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "async copy with IOMMU or post-copy not supported");
94199a2dd95SBruce Richardson 		goto out_mutex;
94299a2dd95SBruce Richardson 	}
94399a2dd95SBruce Richardson 
94499a2dd95SBruce Richardson 	/*
94599a2dd95SBruce Richardson 	 * Set the supported features correctly for the builtin vhost-user
94699a2dd95SBruce Richardson 	 * net driver.
94799a2dd95SBruce Richardson 	 *
94899a2dd95SBruce Richardson 	 * Applications know nothing about features the builtin virtio net
94999a2dd95SBruce Richardson 	 * driver (virtio_net.c) supports, thus it's not possible for them
95099a2dd95SBruce Richardson 	 * to invoke rte_vhost_driver_set_features(). To workaround it, here
95199a2dd95SBruce Richardson 	 * we set it unconditionally. If the application want to implement
95299a2dd95SBruce Richardson 	 * another vhost-user driver (say SCSI), it should call the
95399a2dd95SBruce Richardson 	 * rte_vhost_driver_set_features(), which will overwrite following
95499a2dd95SBruce Richardson 	 * two values.
95599a2dd95SBruce Richardson 	 */
95699a2dd95SBruce Richardson 	vsocket->use_builtin_virtio_net = true;
9574789eb43SMaxime Coquelin 	if (vsocket->is_vduse) {
9584789eb43SMaxime Coquelin 		vsocket->supported_features = VDUSE_NET_SUPPORTED_FEATURES;
9594789eb43SMaxime Coquelin 		vsocket->features           = VDUSE_NET_SUPPORTED_FEATURES;
9604789eb43SMaxime Coquelin 	} else {
9614789eb43SMaxime Coquelin 		vsocket->supported_features = VHOST_USER_NET_SUPPORTED_FEATURES;
9624789eb43SMaxime Coquelin 		vsocket->features           = VHOST_USER_NET_SUPPORTED_FEATURES;
96399a2dd95SBruce Richardson 		vsocket->protocol_features  = VHOST_USER_PROTOCOL_FEATURES;
9644789eb43SMaxime Coquelin 	}
96599a2dd95SBruce Richardson 
96699a2dd95SBruce Richardson 	if (vsocket->async_copy) {
96799a2dd95SBruce Richardson 		vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL);
96899a2dd95SBruce Richardson 		vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL);
9690e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, INFO, "logging feature is disabled in async copy mode");
97099a2dd95SBruce Richardson 	}
97199a2dd95SBruce Richardson 
97299a2dd95SBruce Richardson 	/*
97399a2dd95SBruce Richardson 	 * We'll not be able to receive a buffer from guest in linear mode
97499a2dd95SBruce Richardson 	 * without external buffer if it will not fit in a single mbuf, which is
97599a2dd95SBruce Richardson 	 * likely if segmentation offloading enabled.
97699a2dd95SBruce Richardson 	 */
97799a2dd95SBruce Richardson 	if (vsocket->linearbuf && !vsocket->extbuf) {
97899a2dd95SBruce Richardson 		uint64_t seg_offload_features =
97999a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_HOST_TSO4) |
98099a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_HOST_TSO6) |
98199a2dd95SBruce Richardson 				(1ULL << VIRTIO_NET_F_HOST_UFO);
98299a2dd95SBruce Richardson 
9830e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, INFO, "Linear buffers requested without external buffers,");
9840e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, INFO, "disabling host segmentation offloading support");
98599a2dd95SBruce Richardson 		vsocket->supported_features &= ~seg_offload_features;
98699a2dd95SBruce Richardson 		vsocket->features &= ~seg_offload_features;
98799a2dd95SBruce Richardson 	}
98899a2dd95SBruce Richardson 
989fcfc1301SMaxime Coquelin 	if (!vsocket->iommu_support) {
99099a2dd95SBruce Richardson 		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
99199a2dd95SBruce Richardson 		vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
99299a2dd95SBruce Richardson 	}
99399a2dd95SBruce Richardson 
99499a2dd95SBruce Richardson 	if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
99599a2dd95SBruce Richardson 		vsocket->protocol_features &=
99699a2dd95SBruce Richardson 			~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
99799a2dd95SBruce Richardson 	} else {
99899a2dd95SBruce Richardson #ifndef RTE_LIBRTE_VHOST_POSTCOPY
9990e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "Postcopy requested but not compiled");
100099a2dd95SBruce Richardson 		ret = -1;
100199a2dd95SBruce Richardson 		goto out_mutex;
100299a2dd95SBruce Richardson #endif
100399a2dd95SBruce Richardson 	}
100499a2dd95SBruce Richardson 
10054789eb43SMaxime Coquelin 	if (!vsocket->is_vduse) {
100699a2dd95SBruce Richardson 		if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
100799a2dd95SBruce Richardson 			vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
10081c1abf17SThomas Monjalon 			if (vsocket->reconnect && reconn_tid.opaque_id == 0) {
100999a2dd95SBruce Richardson 				if (vhost_user_reconnect_init() != 0)
101099a2dd95SBruce Richardson 					goto out_mutex;
101199a2dd95SBruce Richardson 			}
101299a2dd95SBruce Richardson 		} else {
101399a2dd95SBruce Richardson 			vsocket->is_server = true;
101499a2dd95SBruce Richardson 		}
101599a2dd95SBruce Richardson 		ret = create_unix_socket(vsocket);
10160adb8eccSMaxime Coquelin 		if (ret < 0)
101799a2dd95SBruce Richardson 			goto out_mutex;
101899a2dd95SBruce Richardson 	}
101999a2dd95SBruce Richardson 
102099a2dd95SBruce Richardson 	vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
102199a2dd95SBruce Richardson 
102299a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
102399a2dd95SBruce Richardson 	return ret;
102499a2dd95SBruce Richardson 
102599a2dd95SBruce Richardson out_mutex:
102699a2dd95SBruce Richardson 	if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
10270e21c7c0SDavid Marchand 		VHOST_CONFIG_LOG(path, ERR, "failed to destroy connection mutex");
102899a2dd95SBruce Richardson 	}
102999a2dd95SBruce Richardson out:
103099a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
103199a2dd95SBruce Richardson 
103299a2dd95SBruce Richardson 	return ret;
103399a2dd95SBruce Richardson }
103499a2dd95SBruce Richardson 
103599a2dd95SBruce Richardson static bool
103699a2dd95SBruce Richardson vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
103799a2dd95SBruce Richardson {
103899a2dd95SBruce Richardson 	int found = false;
103999a2dd95SBruce Richardson 	struct vhost_user_reconnect *reconn, *next;
104099a2dd95SBruce Richardson 
104199a2dd95SBruce Richardson 	pthread_mutex_lock(&reconn_list.mutex);
104299a2dd95SBruce Richardson 
104399a2dd95SBruce Richardson 	for (reconn = TAILQ_FIRST(&reconn_list.head);
104499a2dd95SBruce Richardson 	     reconn != NULL; reconn = next) {
104599a2dd95SBruce Richardson 		next = TAILQ_NEXT(reconn, next);
104699a2dd95SBruce Richardson 
104799a2dd95SBruce Richardson 		if (reconn->vsocket == vsocket) {
104899a2dd95SBruce Richardson 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
104999a2dd95SBruce Richardson 			close(reconn->fd);
105099a2dd95SBruce Richardson 			free(reconn);
105199a2dd95SBruce Richardson 			found = true;
105299a2dd95SBruce Richardson 			break;
105399a2dd95SBruce Richardson 		}
105499a2dd95SBruce Richardson 	}
105599a2dd95SBruce Richardson 	pthread_mutex_unlock(&reconn_list.mutex);
105699a2dd95SBruce Richardson 	return found;
105799a2dd95SBruce Richardson }
105899a2dd95SBruce Richardson 
105999a2dd95SBruce Richardson /**
106099a2dd95SBruce Richardson  * Unregister the specified vhost socket
106199a2dd95SBruce Richardson  */
106299a2dd95SBruce Richardson int
106399a2dd95SBruce Richardson rte_vhost_driver_unregister(const char *path)
106499a2dd95SBruce Richardson {
106599a2dd95SBruce Richardson 	int i;
106699a2dd95SBruce Richardson 	int count;
106799a2dd95SBruce Richardson 	struct vhost_user_connection *conn, *next;
106899a2dd95SBruce Richardson 
106999a2dd95SBruce Richardson 	if (path == NULL)
107099a2dd95SBruce Richardson 		return -1;
107199a2dd95SBruce Richardson 
107299a2dd95SBruce Richardson again:
107399a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
107499a2dd95SBruce Richardson 
107599a2dd95SBruce Richardson 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
107699a2dd95SBruce Richardson 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
1077451dc0faSGaoxiang Liu 		if (strcmp(vsocket->path, path))
1078451dc0faSGaoxiang Liu 			continue;
107999a2dd95SBruce Richardson 
10800adb8eccSMaxime Coquelin 		if (vsocket->is_vduse) {
10810adb8eccSMaxime Coquelin 			vduse_device_destroy(path);
10820adb8eccSMaxime Coquelin 		} else if (vsocket->is_server) {
1083451dc0faSGaoxiang Liu 			/*
1084451dc0faSGaoxiang Liu 			 * If r/wcb is executing, release vhost_user's
1085451dc0faSGaoxiang Liu 			 * mutex lock, and try again since the r/wcb
1086451dc0faSGaoxiang Liu 			 * may use the mutex lock.
1087451dc0faSGaoxiang Liu 			 */
1088e68a6feaSMaxime Coquelin 			if (fdset_try_del(vhost_user.fdset, vsocket->socket_fd) == -1) {
1089451dc0faSGaoxiang Liu 				pthread_mutex_unlock(&vhost_user.mutex);
1090451dc0faSGaoxiang Liu 				goto again;
1091451dc0faSGaoxiang Liu 			}
1092451dc0faSGaoxiang Liu 		} else if (vsocket->reconnect) {
1093451dc0faSGaoxiang Liu 			vhost_user_remove_reconnect(vsocket);
1094451dc0faSGaoxiang Liu 		}
1095451dc0faSGaoxiang Liu 
109699a2dd95SBruce Richardson 		pthread_mutex_lock(&vsocket->conn_mutex);
109799a2dd95SBruce Richardson 		for (conn = TAILQ_FIRST(&vsocket->conn_list);
109899a2dd95SBruce Richardson 			 conn != NULL;
109999a2dd95SBruce Richardson 			 conn = next) {
110099a2dd95SBruce Richardson 			next = TAILQ_NEXT(conn, next);
110199a2dd95SBruce Richardson 
110299a2dd95SBruce Richardson 			/*
110399a2dd95SBruce Richardson 			 * If r/wcb is executing, release vsocket's
110499a2dd95SBruce Richardson 			 * conn_mutex and vhost_user's mutex locks, and
110599a2dd95SBruce Richardson 			 * try again since the r/wcb may use the
110699a2dd95SBruce Richardson 			 * conn_mutex and mutex locks.
110799a2dd95SBruce Richardson 			 */
1108e68a6feaSMaxime Coquelin 			if (fdset_try_del(vhost_user.fdset,
110999a2dd95SBruce Richardson 					  conn->connfd) == -1) {
1110451dc0faSGaoxiang Liu 				pthread_mutex_unlock(&vsocket->conn_mutex);
111199a2dd95SBruce Richardson 				pthread_mutex_unlock(&vhost_user.mutex);
111299a2dd95SBruce Richardson 				goto again;
111399a2dd95SBruce Richardson 			}
111499a2dd95SBruce Richardson 
11150e21c7c0SDavid Marchand 			VHOST_CONFIG_LOG(path, INFO, "free connfd %d", conn->connfd);
111699a2dd95SBruce Richardson 			close(conn->connfd);
111799a2dd95SBruce Richardson 			vhost_destroy_device(conn->vid);
111899a2dd95SBruce Richardson 			TAILQ_REMOVE(&vsocket->conn_list, conn, next);
111999a2dd95SBruce Richardson 			free(conn);
112099a2dd95SBruce Richardson 		}
112199a2dd95SBruce Richardson 		pthread_mutex_unlock(&vsocket->conn_mutex);
112299a2dd95SBruce Richardson 
112399a2dd95SBruce Richardson 		if (vsocket->is_server) {
112499a2dd95SBruce Richardson 			close(vsocket->socket_fd);
112599a2dd95SBruce Richardson 			unlink(path);
112699a2dd95SBruce Richardson 		}
112799a2dd95SBruce Richardson 
112899a2dd95SBruce Richardson 		pthread_mutex_destroy(&vsocket->conn_mutex);
112999a2dd95SBruce Richardson 		vhost_user_socket_mem_free(vsocket);
113099a2dd95SBruce Richardson 
113199a2dd95SBruce Richardson 		count = --vhost_user.vsocket_cnt;
113299a2dd95SBruce Richardson 		vhost_user.vsockets[i] = vhost_user.vsockets[count];
113399a2dd95SBruce Richardson 		vhost_user.vsockets[count] = NULL;
113499a2dd95SBruce Richardson 		pthread_mutex_unlock(&vhost_user.mutex);
113599a2dd95SBruce Richardson 		return 0;
113699a2dd95SBruce Richardson 	}
113799a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
113899a2dd95SBruce Richardson 
113999a2dd95SBruce Richardson 	return -1;
114099a2dd95SBruce Richardson }
114199a2dd95SBruce Richardson 
114299a2dd95SBruce Richardson /*
114399a2dd95SBruce Richardson  * Register ops so that we can add/remove device to data core.
114499a2dd95SBruce Richardson  */
11450ae35eceSDavid Marchand int
11460ae35eceSDavid Marchand rte_vhost_driver_callback_register(const char *path,
11470ae35eceSDavid Marchand 	struct rte_vhost_device_ops const * const ops)
114899a2dd95SBruce Richardson {
114999a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
115099a2dd95SBruce Richardson 
115199a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
115299a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
11530ae35eceSDavid Marchand 	if (vsocket)
115499a2dd95SBruce Richardson 		vsocket->notify_ops = ops;
115599a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
115699a2dd95SBruce Richardson 
115799a2dd95SBruce Richardson 	return vsocket ? 0 : -1;
115899a2dd95SBruce Richardson }
115999a2dd95SBruce Richardson 
1160ab4bb424SMaxime Coquelin struct rte_vhost_device_ops const *
116199a2dd95SBruce Richardson vhost_driver_callback_get(const char *path)
116299a2dd95SBruce Richardson {
116399a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
116499a2dd95SBruce Richardson 
116599a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
116699a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
116799a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
116899a2dd95SBruce Richardson 
116999a2dd95SBruce Richardson 	return vsocket ? vsocket->notify_ops : NULL;
117099a2dd95SBruce Richardson }
117199a2dd95SBruce Richardson 
117299a2dd95SBruce Richardson int
117399a2dd95SBruce Richardson rte_vhost_driver_start(const char *path)
117499a2dd95SBruce Richardson {
117599a2dd95SBruce Richardson 	struct vhost_user_socket *vsocket;
117699a2dd95SBruce Richardson 
117799a2dd95SBruce Richardson 	pthread_mutex_lock(&vhost_user.mutex);
117899a2dd95SBruce Richardson 	vsocket = find_vhost_user_socket(path);
117999a2dd95SBruce Richardson 	pthread_mutex_unlock(&vhost_user.mutex);
118099a2dd95SBruce Richardson 
118199a2dd95SBruce Richardson 	if (!vsocket)
118299a2dd95SBruce Richardson 		return -1;
118399a2dd95SBruce Richardson 
11840adb8eccSMaxime Coquelin 	if (vsocket->is_vduse)
1185927d2aefSMaxime Coquelin 		return vduse_device_create(path, vsocket->net_compliant_ol_flags);
11860adb8eccSMaxime Coquelin 
1187e68a6feaSMaxime Coquelin 	if (vhost_user.fdset == NULL) {
1188e68a6feaSMaxime Coquelin 		vhost_user.fdset = fdset_init("vhost-evt");
1189e68a6feaSMaxime Coquelin 		if (vhost_user.fdset == NULL) {
11907945769cSMaxime Coquelin 			VHOST_CONFIG_LOG(path, ERR, "failed to init Vhost-user fdset");
11917945769cSMaxime Coquelin 			return -1;
11927945769cSMaxime Coquelin 		}
119399a2dd95SBruce Richardson 	}
119499a2dd95SBruce Richardson 
119599a2dd95SBruce Richardson 	if (vsocket->is_server)
119699a2dd95SBruce Richardson 		return vhost_user_start_server(vsocket);
119799a2dd95SBruce Richardson 	else
119899a2dd95SBruce Richardson 		return vhost_user_start_client(vsocket);
119999a2dd95SBruce Richardson }
1200