xref: /dpdk/lib/vhost/socket.c (revision 665b49c51639a10c553433bc2bcd85c7331c631e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <string.h>
11 #include <sys/socket.h>
12 #include <sys/un.h>
13 #include <sys/queue.h>
14 #include <errno.h>
15 #include <fcntl.h>
16 #include <pthread.h>
17 
18 #include <rte_log.h>
19 
20 #include "fd_man.h"
21 #include "vhost.h"
22 #include "vhost_user.h"
23 
24 
25 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
26 
27 /*
28  * Every time rte_vhost_driver_register() is invoked, an associated
29  * vhost_user_socket struct will be created.
30  */
31 struct vhost_user_socket {
32 	struct vhost_user_connection_list conn_list;
33 	pthread_mutex_t conn_mutex;
34 	char *path;
35 	int socket_fd;
36 	struct sockaddr_un un;
37 	bool is_server;
38 	bool reconnect;
39 	bool iommu_support;
40 	bool use_builtin_virtio_net;
41 	bool extbuf;
42 	bool linearbuf;
43 	bool async_copy;
44 	bool net_compliant_ol_flags;
45 	bool stats_enabled;
46 
47 	/*
48 	 * The "supported_features" indicates the feature bits the
49 	 * vhost driver supports. The "features" indicates the feature
50 	 * bits after the rte_vhost_driver_features_disable/enable().
51 	 * It is also the final feature bits used for vhost-user
52 	 * features negotiation.
53 	 */
54 	uint64_t supported_features;
55 	uint64_t features;
56 
57 	uint64_t protocol_features;
58 
59 	struct rte_vdpa_device *vdpa_dev;
60 
61 	struct rte_vhost_device_ops const *notify_ops;
62 };
63 
64 struct vhost_user_connection {
65 	struct vhost_user_socket *vsocket;
66 	int connfd;
67 	int vid;
68 
69 	TAILQ_ENTRY(vhost_user_connection) next;
70 };
71 
72 #define MAX_VHOST_SOCKET 1024
73 struct vhost_user {
74 	struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
75 	struct fdset fdset;
76 	int vsocket_cnt;
77 	pthread_mutex_t mutex;
78 };
79 
80 #define MAX_VIRTIO_BACKLOG 128
81 
82 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
83 static void vhost_user_read_cb(int fd, void *dat, int *remove);
84 static int create_unix_socket(struct vhost_user_socket *vsocket);
85 static int vhost_user_start_client(struct vhost_user_socket *vsocket);
86 
87 static struct vhost_user vhost_user = {
88 	.fdset = {
89 		.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
90 		.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
91 		.fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER,
92 		.num = 0
93 	},
94 	.vsocket_cnt = 0,
95 	.mutex = PTHREAD_MUTEX_INITIALIZER,
96 };
97 
98 /*
99  * return bytes# of read on success or negative val on failure. Update fdnum
100  * with number of fds read.
101  */
102 int
103 read_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int max_fds,
104 		int *fd_num)
105 {
106 	struct iovec iov;
107 	struct msghdr msgh;
108 	char control[CMSG_SPACE(max_fds * sizeof(int))];
109 	struct cmsghdr *cmsg;
110 	int got_fds = 0;
111 	int ret;
112 
113 	*fd_num = 0;
114 
115 	memset(&msgh, 0, sizeof(msgh));
116 	iov.iov_base = buf;
117 	iov.iov_len  = buflen;
118 
119 	msgh.msg_iov = &iov;
120 	msgh.msg_iovlen = 1;
121 	msgh.msg_control = control;
122 	msgh.msg_controllen = sizeof(control);
123 
124 	ret = recvmsg(sockfd, &msgh, 0);
125 	if (ret <= 0) {
126 		if (ret)
127 			VHOST_LOG_CONFIG(ifname, ERR, "recvmsg failed on fd %d (%s)\n",
128 				sockfd, strerror(errno));
129 		return ret;
130 	}
131 
132 	if (msgh.msg_flags & MSG_TRUNC)
133 		VHOST_LOG_CONFIG(ifname, ERR, "truncated msg (fd %d)\n", sockfd);
134 
135 	/* MSG_CTRUNC may be caused by LSM misconfiguration */
136 	if (msgh.msg_flags & MSG_CTRUNC)
137 		VHOST_LOG_CONFIG(ifname, ERR, "truncated control data (fd %d)\n", sockfd);
138 
139 	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
140 		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
141 		if ((cmsg->cmsg_level == SOL_SOCKET) &&
142 			(cmsg->cmsg_type == SCM_RIGHTS)) {
143 			got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
144 			*fd_num = got_fds;
145 			memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
146 			break;
147 		}
148 	}
149 
150 	/* Clear out unused file descriptors */
151 	while (got_fds < max_fds)
152 		fds[got_fds++] = -1;
153 
154 	return ret;
155 }
156 
157 int
158 send_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int fd_num)
159 {
160 
161 	struct iovec iov;
162 	struct msghdr msgh;
163 	size_t fdsize = fd_num * sizeof(int);
164 	char control[CMSG_SPACE(fdsize)];
165 	struct cmsghdr *cmsg;
166 	int ret;
167 
168 	memset(&msgh, 0, sizeof(msgh));
169 	iov.iov_base = buf;
170 	iov.iov_len = buflen;
171 
172 	msgh.msg_iov = &iov;
173 	msgh.msg_iovlen = 1;
174 
175 	if (fds && fd_num > 0) {
176 		msgh.msg_control = control;
177 		msgh.msg_controllen = sizeof(control);
178 		cmsg = CMSG_FIRSTHDR(&msgh);
179 		if (cmsg == NULL) {
180 			VHOST_LOG_CONFIG(ifname, ERR, "cmsg == NULL\n");
181 			errno = EINVAL;
182 			return -1;
183 		}
184 		cmsg->cmsg_len = CMSG_LEN(fdsize);
185 		cmsg->cmsg_level = SOL_SOCKET;
186 		cmsg->cmsg_type = SCM_RIGHTS;
187 		memcpy(CMSG_DATA(cmsg), fds, fdsize);
188 	} else {
189 		msgh.msg_control = NULL;
190 		msgh.msg_controllen = 0;
191 	}
192 
193 	do {
194 		ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
195 	} while (ret < 0 && errno == EINTR);
196 
197 	if (ret < 0) {
198 		VHOST_LOG_CONFIG(ifname, ERR, "sendmsg error on fd %d (%s)\n",
199 			sockfd, strerror(errno));
200 		return ret;
201 	}
202 
203 	return ret;
204 }
205 
206 static void
207 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
208 {
209 	int vid;
210 	size_t size;
211 	struct vhost_user_connection *conn;
212 	int ret;
213 	struct virtio_net *dev;
214 
215 	if (vsocket == NULL)
216 		return;
217 
218 	conn = malloc(sizeof(*conn));
219 	if (conn == NULL) {
220 		close(fd);
221 		return;
222 	}
223 
224 	vid = vhost_new_device();
225 	if (vid == -1) {
226 		goto err;
227 	}
228 
229 	size = strnlen(vsocket->path, PATH_MAX);
230 	vhost_set_ifname(vid, vsocket->path, size);
231 
232 	vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net,
233 		vsocket->net_compliant_ol_flags, vsocket->stats_enabled,
234 		vsocket->iommu_support);
235 
236 	vhost_attach_vdpa_device(vid, vsocket->vdpa_dev);
237 
238 	if (vsocket->extbuf)
239 		vhost_enable_extbuf(vid);
240 
241 	if (vsocket->linearbuf)
242 		vhost_enable_linearbuf(vid);
243 
244 	if (vsocket->async_copy) {
245 		dev = get_device(vid);
246 
247 		if (dev)
248 			dev->async_copy = 1;
249 	}
250 
251 	VHOST_LOG_CONFIG(vsocket->path, INFO, "new device, handle is %d\n", vid);
252 
253 	if (vsocket->notify_ops->new_connection) {
254 		ret = vsocket->notify_ops->new_connection(vid);
255 		if (ret < 0) {
256 			VHOST_LOG_CONFIG(vsocket->path, ERR,
257 				"failed to add vhost user connection with fd %d\n",
258 				fd);
259 			goto err_cleanup;
260 		}
261 	}
262 
263 	conn->connfd = fd;
264 	conn->vsocket = vsocket;
265 	conn->vid = vid;
266 	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
267 			NULL, conn);
268 	if (ret < 0) {
269 		VHOST_LOG_CONFIG(vsocket->path, ERR,
270 			"failed to add fd %d into vhost server fdset\n",
271 			fd);
272 
273 		if (vsocket->notify_ops->destroy_connection)
274 			vsocket->notify_ops->destroy_connection(conn->vid);
275 
276 		goto err_cleanup;
277 	}
278 
279 	pthread_mutex_lock(&vsocket->conn_mutex);
280 	TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
281 	pthread_mutex_unlock(&vsocket->conn_mutex);
282 
283 	fdset_pipe_notify(&vhost_user.fdset);
284 	return;
285 
286 err_cleanup:
287 	vhost_destroy_device(vid);
288 err:
289 	free(conn);
290 	close(fd);
291 }
292 
293 /* call back when there is new vhost-user connection from client  */
294 static void
295 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
296 {
297 	struct vhost_user_socket *vsocket = dat;
298 
299 	fd = accept(fd, NULL, NULL);
300 	if (fd < 0)
301 		return;
302 
303 	VHOST_LOG_CONFIG(vsocket->path, INFO, "new vhost user connection is %d\n", fd);
304 	vhost_user_add_connection(fd, vsocket);
305 }
306 
307 static void
308 vhost_user_read_cb(int connfd, void *dat, int *remove)
309 {
310 	struct vhost_user_connection *conn = dat;
311 	struct vhost_user_socket *vsocket = conn->vsocket;
312 	int ret;
313 
314 	ret = vhost_user_msg_handler(conn->vid, connfd);
315 	if (ret < 0) {
316 		struct virtio_net *dev = get_device(conn->vid);
317 
318 		close(connfd);
319 		*remove = 1;
320 
321 		if (dev)
322 			vhost_destroy_device_notify(dev);
323 
324 		if (vsocket->notify_ops->destroy_connection)
325 			vsocket->notify_ops->destroy_connection(conn->vid);
326 
327 		vhost_destroy_device(conn->vid);
328 
329 		if (vsocket->reconnect) {
330 			create_unix_socket(vsocket);
331 			vhost_user_start_client(vsocket);
332 		}
333 
334 		pthread_mutex_lock(&vsocket->conn_mutex);
335 		TAILQ_REMOVE(&vsocket->conn_list, conn, next);
336 		pthread_mutex_unlock(&vsocket->conn_mutex);
337 
338 		free(conn);
339 	}
340 }
341 
342 static int
343 create_unix_socket(struct vhost_user_socket *vsocket)
344 {
345 	int fd;
346 	struct sockaddr_un *un = &vsocket->un;
347 
348 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
349 	if (fd < 0)
350 		return -1;
351 	VHOST_LOG_CONFIG(vsocket->path, INFO, "vhost-user %s: socket created, fd: %d\n",
352 		vsocket->is_server ? "server" : "client", fd);
353 
354 	if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
355 		VHOST_LOG_CONFIG(vsocket->path, ERR,
356 			"vhost-user: can't set nonblocking mode for socket, fd: %d (%s)\n",
357 			fd, strerror(errno));
358 		close(fd);
359 		return -1;
360 	}
361 
362 	memset(un, 0, sizeof(*un));
363 	un->sun_family = AF_UNIX;
364 	strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
365 	un->sun_path[sizeof(un->sun_path) - 1] = '\0';
366 
367 	vsocket->socket_fd = fd;
368 	return 0;
369 }
370 
371 static int
372 vhost_user_start_server(struct vhost_user_socket *vsocket)
373 {
374 	int ret;
375 	int fd = vsocket->socket_fd;
376 	const char *path = vsocket->path;
377 
378 	/*
379 	 * bind () may fail if the socket file with the same name already
380 	 * exists. But the library obviously should not delete the file
381 	 * provided by the user, since we can not be sure that it is not
382 	 * being used by other applications. Moreover, many applications form
383 	 * socket names based on user input, which is prone to errors.
384 	 *
385 	 * The user must ensure that the socket does not exist before
386 	 * registering the vhost driver in server mode.
387 	 */
388 	ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
389 	if (ret < 0) {
390 		VHOST_LOG_CONFIG(path, ERR, "failed to bind: %s; remove it and try again\n",
391 			strerror(errno));
392 		goto err;
393 	}
394 	VHOST_LOG_CONFIG(path, INFO, "binding succeeded\n");
395 
396 	ret = listen(fd, MAX_VIRTIO_BACKLOG);
397 	if (ret < 0)
398 		goto err;
399 
400 	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
401 		  NULL, vsocket);
402 	if (ret < 0) {
403 		VHOST_LOG_CONFIG(path, ERR, "failed to add listen fd %d to vhost server fdset\n",
404 			fd);
405 		goto err;
406 	}
407 
408 	return 0;
409 
410 err:
411 	close(fd);
412 	return -1;
413 }
414 
415 struct vhost_user_reconnect {
416 	struct sockaddr_un un;
417 	int fd;
418 	struct vhost_user_socket *vsocket;
419 
420 	TAILQ_ENTRY(vhost_user_reconnect) next;
421 };
422 
423 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
424 struct vhost_user_reconnect_list {
425 	struct vhost_user_reconnect_tailq_list head;
426 	pthread_mutex_t mutex;
427 };
428 
429 static struct vhost_user_reconnect_list reconn_list;
430 static pthread_t reconn_tid;
431 
432 static int
433 vhost_user_connect_nonblock(char *path, int fd, struct sockaddr *un, size_t sz)
434 {
435 	int ret, flags;
436 
437 	ret = connect(fd, un, sz);
438 	if (ret < 0 && errno != EISCONN)
439 		return -1;
440 
441 	flags = fcntl(fd, F_GETFL, 0);
442 	if (flags < 0) {
443 		VHOST_LOG_CONFIG(path, ERR, "can't get flags for connfd %d (%s)\n",
444 			fd, strerror(errno));
445 		return -2;
446 	}
447 	if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
448 		VHOST_LOG_CONFIG(path, ERR, "can't disable nonblocking on fd %d\n", fd);
449 		return -2;
450 	}
451 	return 0;
452 }
453 
454 static void *
455 vhost_user_client_reconnect(void *arg __rte_unused)
456 {
457 	int ret;
458 	struct vhost_user_reconnect *reconn, *next;
459 
460 	while (1) {
461 		pthread_mutex_lock(&reconn_list.mutex);
462 
463 		/*
464 		 * An equal implementation of TAILQ_FOREACH_SAFE,
465 		 * which does not exist on all platforms.
466 		 */
467 		for (reconn = TAILQ_FIRST(&reconn_list.head);
468 		     reconn != NULL; reconn = next) {
469 			next = TAILQ_NEXT(reconn, next);
470 
471 			ret = vhost_user_connect_nonblock(reconn->vsocket->path, reconn->fd,
472 						(struct sockaddr *)&reconn->un,
473 						sizeof(reconn->un));
474 			if (ret == -2) {
475 				close(reconn->fd);
476 				VHOST_LOG_CONFIG(reconn->vsocket->path, ERR,
477 					"reconnection for fd %d failed\n",
478 					reconn->fd);
479 				goto remove_fd;
480 			}
481 			if (ret == -1)
482 				continue;
483 
484 			VHOST_LOG_CONFIG(reconn->vsocket->path, INFO, "connected\n");
485 			vhost_user_add_connection(reconn->fd, reconn->vsocket);
486 remove_fd:
487 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
488 			free(reconn);
489 		}
490 
491 		pthread_mutex_unlock(&reconn_list.mutex);
492 		sleep(1);
493 	}
494 
495 	return NULL;
496 }
497 
498 static int
499 vhost_user_reconnect_init(void)
500 {
501 	int ret;
502 
503 	ret = pthread_mutex_init(&reconn_list.mutex, NULL);
504 	if (ret < 0) {
505 		VHOST_LOG_CONFIG("thread", ERR, "%s: failed to initialize mutex\n", __func__);
506 		return ret;
507 	}
508 	TAILQ_INIT(&reconn_list.head);
509 
510 	ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL,
511 			     vhost_user_client_reconnect, NULL);
512 	if (ret != 0) {
513 		VHOST_LOG_CONFIG("thread", ERR, "failed to create reconnect thread\n");
514 		if (pthread_mutex_destroy(&reconn_list.mutex))
515 			VHOST_LOG_CONFIG("thread", ERR,
516 				"%s: failed to destroy reconnect mutex\n",
517 				__func__);
518 	}
519 
520 	return ret;
521 }
522 
523 static int
524 vhost_user_start_client(struct vhost_user_socket *vsocket)
525 {
526 	int ret;
527 	int fd = vsocket->socket_fd;
528 	const char *path = vsocket->path;
529 	struct vhost_user_reconnect *reconn;
530 
531 	ret = vhost_user_connect_nonblock(vsocket->path, fd, (struct sockaddr *)&vsocket->un,
532 					  sizeof(vsocket->un));
533 	if (ret == 0) {
534 		vhost_user_add_connection(fd, vsocket);
535 		return 0;
536 	}
537 
538 	VHOST_LOG_CONFIG(path, WARNING, "failed to connect: %s\n", strerror(errno));
539 
540 	if (ret == -2 || !vsocket->reconnect) {
541 		close(fd);
542 		return -1;
543 	}
544 
545 	VHOST_LOG_CONFIG(path, INFO, "reconnecting...\n");
546 	reconn = malloc(sizeof(*reconn));
547 	if (reconn == NULL) {
548 		VHOST_LOG_CONFIG(path, ERR, "failed to allocate memory for reconnect\n");
549 		close(fd);
550 		return -1;
551 	}
552 	reconn->un = vsocket->un;
553 	reconn->fd = fd;
554 	reconn->vsocket = vsocket;
555 	pthread_mutex_lock(&reconn_list.mutex);
556 	TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
557 	pthread_mutex_unlock(&reconn_list.mutex);
558 
559 	return 0;
560 }
561 
562 static struct vhost_user_socket *
563 find_vhost_user_socket(const char *path)
564 {
565 	int i;
566 
567 	if (path == NULL)
568 		return NULL;
569 
570 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
571 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
572 
573 		if (!strcmp(vsocket->path, path))
574 			return vsocket;
575 	}
576 
577 	return NULL;
578 }
579 
580 int
581 rte_vhost_driver_attach_vdpa_device(const char *path,
582 		struct rte_vdpa_device *dev)
583 {
584 	struct vhost_user_socket *vsocket;
585 
586 	if (dev == NULL || path == NULL)
587 		return -1;
588 
589 	pthread_mutex_lock(&vhost_user.mutex);
590 	vsocket = find_vhost_user_socket(path);
591 	if (vsocket)
592 		vsocket->vdpa_dev = dev;
593 	pthread_mutex_unlock(&vhost_user.mutex);
594 
595 	return vsocket ? 0 : -1;
596 }
597 
598 int
599 rte_vhost_driver_detach_vdpa_device(const char *path)
600 {
601 	struct vhost_user_socket *vsocket;
602 
603 	pthread_mutex_lock(&vhost_user.mutex);
604 	vsocket = find_vhost_user_socket(path);
605 	if (vsocket)
606 		vsocket->vdpa_dev = NULL;
607 	pthread_mutex_unlock(&vhost_user.mutex);
608 
609 	return vsocket ? 0 : -1;
610 }
611 
612 struct rte_vdpa_device *
613 rte_vhost_driver_get_vdpa_device(const char *path)
614 {
615 	struct vhost_user_socket *vsocket;
616 	struct rte_vdpa_device *dev = NULL;
617 
618 	pthread_mutex_lock(&vhost_user.mutex);
619 	vsocket = find_vhost_user_socket(path);
620 	if (vsocket)
621 		dev = vsocket->vdpa_dev;
622 	pthread_mutex_unlock(&vhost_user.mutex);
623 
624 	return dev;
625 }
626 
627 int
628 rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
629 {
630 	struct vhost_user_socket *vsocket;
631 	struct rte_vdpa_device *vdpa_dev;
632 	int ret = 0;
633 
634 	pthread_mutex_lock(&vhost_user.mutex);
635 	vsocket = find_vhost_user_socket(path);
636 	if (!vsocket) {
637 		VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n");
638 		ret = -1;
639 		goto unlock_exit;
640 	}
641 
642 	vdpa_dev = vsocket->vdpa_dev;
643 	if (!vdpa_dev) {
644 		ret = -1;
645 		goto unlock_exit;
646 	}
647 
648 	*type = vdpa_dev->type;
649 
650 unlock_exit:
651 	pthread_mutex_unlock(&vhost_user.mutex);
652 	return ret;
653 }
654 
655 int
656 rte_vhost_driver_disable_features(const char *path, uint64_t features)
657 {
658 	struct vhost_user_socket *vsocket;
659 
660 	pthread_mutex_lock(&vhost_user.mutex);
661 	vsocket = find_vhost_user_socket(path);
662 
663 	/* Note that use_builtin_virtio_net is not affected by this function
664 	 * since callers may want to selectively disable features of the
665 	 * built-in vhost net device backend.
666 	 */
667 
668 	if (vsocket)
669 		vsocket->features &= ~features;
670 	pthread_mutex_unlock(&vhost_user.mutex);
671 
672 	return vsocket ? 0 : -1;
673 }
674 
675 int
676 rte_vhost_driver_enable_features(const char *path, uint64_t features)
677 {
678 	struct vhost_user_socket *vsocket;
679 
680 	pthread_mutex_lock(&vhost_user.mutex);
681 	vsocket = find_vhost_user_socket(path);
682 	if (vsocket) {
683 		if ((vsocket->supported_features & features) != features) {
684 			/*
685 			 * trying to enable features the driver doesn't
686 			 * support.
687 			 */
688 			pthread_mutex_unlock(&vhost_user.mutex);
689 			return -1;
690 		}
691 		vsocket->features |= features;
692 	}
693 	pthread_mutex_unlock(&vhost_user.mutex);
694 
695 	return vsocket ? 0 : -1;
696 }
697 
698 int
699 rte_vhost_driver_set_features(const char *path, uint64_t features)
700 {
701 	struct vhost_user_socket *vsocket;
702 
703 	pthread_mutex_lock(&vhost_user.mutex);
704 	vsocket = find_vhost_user_socket(path);
705 	if (vsocket) {
706 		vsocket->supported_features = features;
707 		vsocket->features = features;
708 
709 		/* Anyone setting feature bits is implementing their own vhost
710 		 * device backend.
711 		 */
712 		vsocket->use_builtin_virtio_net = false;
713 	}
714 	pthread_mutex_unlock(&vhost_user.mutex);
715 
716 	return vsocket ? 0 : -1;
717 }
718 
719 int
720 rte_vhost_driver_get_features(const char *path, uint64_t *features)
721 {
722 	struct vhost_user_socket *vsocket;
723 	uint64_t vdpa_features;
724 	struct rte_vdpa_device *vdpa_dev;
725 	int ret = 0;
726 
727 	pthread_mutex_lock(&vhost_user.mutex);
728 	vsocket = find_vhost_user_socket(path);
729 	if (!vsocket) {
730 		VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n");
731 		ret = -1;
732 		goto unlock_exit;
733 	}
734 
735 	vdpa_dev = vsocket->vdpa_dev;
736 	if (!vdpa_dev) {
737 		*features = vsocket->features;
738 		goto unlock_exit;
739 	}
740 
741 	if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) {
742 		VHOST_LOG_CONFIG(path, ERR, "failed to get vdpa features for socket file.\n");
743 		ret = -1;
744 		goto unlock_exit;
745 	}
746 
747 	*features = vsocket->features & vdpa_features;
748 
749 unlock_exit:
750 	pthread_mutex_unlock(&vhost_user.mutex);
751 	return ret;
752 }
753 
754 int
755 rte_vhost_driver_set_protocol_features(const char *path,
756 		uint64_t protocol_features)
757 {
758 	struct vhost_user_socket *vsocket;
759 
760 	pthread_mutex_lock(&vhost_user.mutex);
761 	vsocket = find_vhost_user_socket(path);
762 	if (vsocket)
763 		vsocket->protocol_features = protocol_features;
764 	pthread_mutex_unlock(&vhost_user.mutex);
765 	return vsocket ? 0 : -1;
766 }
767 
768 int
769 rte_vhost_driver_get_protocol_features(const char *path,
770 		uint64_t *protocol_features)
771 {
772 	struct vhost_user_socket *vsocket;
773 	uint64_t vdpa_protocol_features;
774 	struct rte_vdpa_device *vdpa_dev;
775 	int ret = 0;
776 
777 	pthread_mutex_lock(&vhost_user.mutex);
778 	vsocket = find_vhost_user_socket(path);
779 	if (!vsocket) {
780 		VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n");
781 		ret = -1;
782 		goto unlock_exit;
783 	}
784 
785 	vdpa_dev = vsocket->vdpa_dev;
786 	if (!vdpa_dev) {
787 		*protocol_features = vsocket->protocol_features;
788 		goto unlock_exit;
789 	}
790 
791 	if (vdpa_dev->ops->get_protocol_features(vdpa_dev,
792 				&vdpa_protocol_features) < 0) {
793 		VHOST_LOG_CONFIG(path, ERR, "failed to get vdpa protocol features.\n");
794 		ret = -1;
795 		goto unlock_exit;
796 	}
797 
798 	*protocol_features = vsocket->protocol_features
799 		& vdpa_protocol_features;
800 
801 unlock_exit:
802 	pthread_mutex_unlock(&vhost_user.mutex);
803 	return ret;
804 }
805 
806 int
807 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
808 {
809 	struct vhost_user_socket *vsocket;
810 	uint32_t vdpa_queue_num;
811 	struct rte_vdpa_device *vdpa_dev;
812 	int ret = 0;
813 
814 	pthread_mutex_lock(&vhost_user.mutex);
815 	vsocket = find_vhost_user_socket(path);
816 	if (!vsocket) {
817 		VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n");
818 		ret = -1;
819 		goto unlock_exit;
820 	}
821 
822 	vdpa_dev = vsocket->vdpa_dev;
823 	if (!vdpa_dev) {
824 		*queue_num = VHOST_MAX_QUEUE_PAIRS;
825 		goto unlock_exit;
826 	}
827 
828 	if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) {
829 		VHOST_LOG_CONFIG(path, ERR, "failed to get vdpa queue number.\n");
830 		ret = -1;
831 		goto unlock_exit;
832 	}
833 
834 	*queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num);
835 
836 unlock_exit:
837 	pthread_mutex_unlock(&vhost_user.mutex);
838 	return ret;
839 }
840 
841 static void
842 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
843 {
844 	if (vsocket && vsocket->path) {
845 		free(vsocket->path);
846 		vsocket->path = NULL;
847 	}
848 
849 	if (vsocket) {
850 		free(vsocket);
851 		vsocket = NULL;
852 	}
853 }
854 
855 /*
856  * Register a new vhost-user socket; here we could act as server
857  * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
858  * is set.
859  */
860 int
861 rte_vhost_driver_register(const char *path, uint64_t flags)
862 {
863 	int ret = -1;
864 	struct vhost_user_socket *vsocket;
865 
866 	if (!path)
867 		return -1;
868 
869 	pthread_mutex_lock(&vhost_user.mutex);
870 
871 	if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
872 		VHOST_LOG_CONFIG(path, ERR, "the number of vhost sockets reaches maximum\n");
873 		goto out;
874 	}
875 
876 	vsocket = malloc(sizeof(struct vhost_user_socket));
877 	if (!vsocket)
878 		goto out;
879 	memset(vsocket, 0, sizeof(struct vhost_user_socket));
880 	vsocket->path = strdup(path);
881 	if (vsocket->path == NULL) {
882 		VHOST_LOG_CONFIG(path, ERR, "failed to copy socket path string\n");
883 		vhost_user_socket_mem_free(vsocket);
884 		goto out;
885 	}
886 	TAILQ_INIT(&vsocket->conn_list);
887 	ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
888 	if (ret) {
889 		VHOST_LOG_CONFIG(path, ERR, "failed to init connection mutex\n");
890 		goto out_free;
891 	}
892 	vsocket->vdpa_dev = NULL;
893 	vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT;
894 	vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT;
895 	vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
896 	vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
897 	vsocket->stats_enabled = flags & RTE_VHOST_USER_NET_STATS_ENABLE;
898 	vsocket->iommu_support = flags & RTE_VHOST_USER_IOMMU_SUPPORT;
899 
900 	if (vsocket->async_copy &&
901 		(flags & (RTE_VHOST_USER_IOMMU_SUPPORT |
902 		RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
903 		VHOST_LOG_CONFIG(path, ERR, "async copy with IOMMU or post-copy not supported\n");
904 		goto out_mutex;
905 	}
906 
907 	/*
908 	 * Set the supported features correctly for the builtin vhost-user
909 	 * net driver.
910 	 *
911 	 * Applications know nothing about features the builtin virtio net
912 	 * driver (virtio_net.c) supports, thus it's not possible for them
913 	 * to invoke rte_vhost_driver_set_features(). To workaround it, here
914 	 * we set it unconditionally. If the application want to implement
915 	 * another vhost-user driver (say SCSI), it should call the
916 	 * rte_vhost_driver_set_features(), which will overwrite following
917 	 * two values.
918 	 */
919 	vsocket->use_builtin_virtio_net = true;
920 	vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
921 	vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
922 	vsocket->protocol_features  = VHOST_USER_PROTOCOL_FEATURES;
923 
924 	if (vsocket->async_copy) {
925 		vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL);
926 		vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL);
927 		VHOST_LOG_CONFIG(path, INFO, "logging feature is disabled in async copy mode\n");
928 	}
929 
930 	/*
931 	 * We'll not be able to receive a buffer from guest in linear mode
932 	 * without external buffer if it will not fit in a single mbuf, which is
933 	 * likely if segmentation offloading enabled.
934 	 */
935 	if (vsocket->linearbuf && !vsocket->extbuf) {
936 		uint64_t seg_offload_features =
937 				(1ULL << VIRTIO_NET_F_HOST_TSO4) |
938 				(1ULL << VIRTIO_NET_F_HOST_TSO6) |
939 				(1ULL << VIRTIO_NET_F_HOST_UFO);
940 
941 		VHOST_LOG_CONFIG(path, INFO, "Linear buffers requested without external buffers,\n");
942 		VHOST_LOG_CONFIG(path, INFO, "disabling host segmentation offloading support\n");
943 		vsocket->supported_features &= ~seg_offload_features;
944 		vsocket->features &= ~seg_offload_features;
945 	}
946 
947 	if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
948 		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
949 		vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
950 	}
951 
952 	if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
953 		vsocket->protocol_features &=
954 			~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
955 	} else {
956 #ifndef RTE_LIBRTE_VHOST_POSTCOPY
957 		VHOST_LOG_CONFIG(path, ERR, "Postcopy requested but not compiled\n");
958 		ret = -1;
959 		goto out_mutex;
960 #endif
961 	}
962 
963 	if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
964 		vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
965 		if (vsocket->reconnect && reconn_tid == 0) {
966 			if (vhost_user_reconnect_init() != 0)
967 				goto out_mutex;
968 		}
969 	} else {
970 		vsocket->is_server = true;
971 	}
972 	ret = create_unix_socket(vsocket);
973 	if (ret < 0) {
974 		goto out_mutex;
975 	}
976 
977 	vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
978 
979 	pthread_mutex_unlock(&vhost_user.mutex);
980 	return ret;
981 
982 out_mutex:
983 	if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
984 		VHOST_LOG_CONFIG(path, ERR, "failed to destroy connection mutex\n");
985 	}
986 out_free:
987 	vhost_user_socket_mem_free(vsocket);
988 out:
989 	pthread_mutex_unlock(&vhost_user.mutex);
990 
991 	return ret;
992 }
993 
994 static bool
995 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
996 {
997 	int found = false;
998 	struct vhost_user_reconnect *reconn, *next;
999 
1000 	pthread_mutex_lock(&reconn_list.mutex);
1001 
1002 	for (reconn = TAILQ_FIRST(&reconn_list.head);
1003 	     reconn != NULL; reconn = next) {
1004 		next = TAILQ_NEXT(reconn, next);
1005 
1006 		if (reconn->vsocket == vsocket) {
1007 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
1008 			close(reconn->fd);
1009 			free(reconn);
1010 			found = true;
1011 			break;
1012 		}
1013 	}
1014 	pthread_mutex_unlock(&reconn_list.mutex);
1015 	return found;
1016 }
1017 
1018 /**
1019  * Unregister the specified vhost socket
1020  */
1021 int
1022 rte_vhost_driver_unregister(const char *path)
1023 {
1024 	int i;
1025 	int count;
1026 	struct vhost_user_connection *conn, *next;
1027 
1028 	if (path == NULL)
1029 		return -1;
1030 
1031 again:
1032 	pthread_mutex_lock(&vhost_user.mutex);
1033 
1034 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
1035 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
1036 		if (strcmp(vsocket->path, path))
1037 			continue;
1038 
1039 		if (vsocket->is_server) {
1040 			/*
1041 			 * If r/wcb is executing, release vhost_user's
1042 			 * mutex lock, and try again since the r/wcb
1043 			 * may use the mutex lock.
1044 			 */
1045 			if (fdset_try_del(&vhost_user.fdset, vsocket->socket_fd) == -1) {
1046 				pthread_mutex_unlock(&vhost_user.mutex);
1047 				goto again;
1048 			}
1049 		} else if (vsocket->reconnect) {
1050 			vhost_user_remove_reconnect(vsocket);
1051 		}
1052 
1053 		pthread_mutex_lock(&vsocket->conn_mutex);
1054 		for (conn = TAILQ_FIRST(&vsocket->conn_list);
1055 			 conn != NULL;
1056 			 conn = next) {
1057 			next = TAILQ_NEXT(conn, next);
1058 
1059 			/*
1060 			 * If r/wcb is executing, release vsocket's
1061 			 * conn_mutex and vhost_user's mutex locks, and
1062 			 * try again since the r/wcb may use the
1063 			 * conn_mutex and mutex locks.
1064 			 */
1065 			if (fdset_try_del(&vhost_user.fdset,
1066 					  conn->connfd) == -1) {
1067 				pthread_mutex_unlock(&vsocket->conn_mutex);
1068 				pthread_mutex_unlock(&vhost_user.mutex);
1069 				goto again;
1070 			}
1071 
1072 			VHOST_LOG_CONFIG(path, INFO, "free connfd %d\n", conn->connfd);
1073 			close(conn->connfd);
1074 			vhost_destroy_device(conn->vid);
1075 			TAILQ_REMOVE(&vsocket->conn_list, conn, next);
1076 			free(conn);
1077 		}
1078 		pthread_mutex_unlock(&vsocket->conn_mutex);
1079 
1080 		if (vsocket->is_server) {
1081 			close(vsocket->socket_fd);
1082 			unlink(path);
1083 		}
1084 
1085 		pthread_mutex_destroy(&vsocket->conn_mutex);
1086 		vhost_user_socket_mem_free(vsocket);
1087 
1088 		count = --vhost_user.vsocket_cnt;
1089 		vhost_user.vsockets[i] = vhost_user.vsockets[count];
1090 		vhost_user.vsockets[count] = NULL;
1091 		pthread_mutex_unlock(&vhost_user.mutex);
1092 		return 0;
1093 	}
1094 	pthread_mutex_unlock(&vhost_user.mutex);
1095 
1096 	return -1;
1097 }
1098 
1099 /*
1100  * Register ops so that we can add/remove device to data core.
1101  */
1102 int
1103 rte_vhost_driver_callback_register(const char *path,
1104 	struct rte_vhost_device_ops const * const ops)
1105 {
1106 	struct vhost_user_socket *vsocket;
1107 
1108 	pthread_mutex_lock(&vhost_user.mutex);
1109 	vsocket = find_vhost_user_socket(path);
1110 	if (vsocket)
1111 		vsocket->notify_ops = ops;
1112 	pthread_mutex_unlock(&vhost_user.mutex);
1113 
1114 	return vsocket ? 0 : -1;
1115 }
1116 
1117 struct rte_vhost_device_ops const *
1118 vhost_driver_callback_get(const char *path)
1119 {
1120 	struct vhost_user_socket *vsocket;
1121 
1122 	pthread_mutex_lock(&vhost_user.mutex);
1123 	vsocket = find_vhost_user_socket(path);
1124 	pthread_mutex_unlock(&vhost_user.mutex);
1125 
1126 	return vsocket ? vsocket->notify_ops : NULL;
1127 }
1128 
1129 int
1130 rte_vhost_driver_start(const char *path)
1131 {
1132 	struct vhost_user_socket *vsocket;
1133 	static pthread_t fdset_tid;
1134 
1135 	pthread_mutex_lock(&vhost_user.mutex);
1136 	vsocket = find_vhost_user_socket(path);
1137 	pthread_mutex_unlock(&vhost_user.mutex);
1138 
1139 	if (!vsocket)
1140 		return -1;
1141 
1142 	if (fdset_tid == 0) {
1143 		/**
1144 		 * create a pipe which will be waited by poll and notified to
1145 		 * rebuild the wait list of poll.
1146 		 */
1147 		if (fdset_pipe_init(&vhost_user.fdset) < 0) {
1148 			VHOST_LOG_CONFIG(path, ERR, "failed to create pipe for vhost fdset\n");
1149 			return -1;
1150 		}
1151 
1152 		int ret = rte_ctrl_thread_create(&fdset_tid,
1153 			"vhost-events", NULL, fdset_event_dispatch,
1154 			&vhost_user.fdset);
1155 		if (ret != 0) {
1156 			VHOST_LOG_CONFIG(path, ERR, "failed to create fdset handling thread\n");
1157 			fdset_pipe_uninit(&vhost_user.fdset);
1158 			return -1;
1159 		}
1160 	}
1161 
1162 	if (vsocket->is_server)
1163 		return vhost_user_start_server(vsocket);
1164 	else
1165 		return vhost_user_start_client(vsocket);
1166 }
1167