xref: /dpdk/lib/vhost/socket.c (revision 99a2dd955fba6e4cc23b77d590a033650ced9c45)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <string.h>
11 #include <sys/types.h>
12 #include <sys/socket.h>
13 #include <sys/un.h>
14 #include <sys/queue.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <pthread.h>
18 
19 #include <rte_log.h>
20 
21 #include "fd_man.h"
22 #include "vhost.h"
23 #include "vhost_user.h"
24 
25 
26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
27 
28 /*
29  * Every time rte_vhost_driver_register() is invoked, an associated
30  * vhost_user_socket struct will be created.
31  */
32 struct vhost_user_socket {
33 	struct vhost_user_connection_list conn_list;
34 	pthread_mutex_t conn_mutex;
35 	char *path;
36 	int socket_fd;
37 	struct sockaddr_un un;
38 	bool is_server;
39 	bool reconnect;
40 	bool iommu_support;
41 	bool use_builtin_virtio_net;
42 	bool extbuf;
43 	bool linearbuf;
44 	bool async_copy;
45 
46 	/*
47 	 * The "supported_features" indicates the feature bits the
48 	 * vhost driver supports. The "features" indicates the feature
49 	 * bits after the rte_vhost_driver_features_disable/enable().
50 	 * It is also the final feature bits used for vhost-user
51 	 * features negotiation.
52 	 */
53 	uint64_t supported_features;
54 	uint64_t features;
55 
56 	uint64_t protocol_features;
57 
58 	struct rte_vdpa_device *vdpa_dev;
59 
60 	struct vhost_device_ops const *notify_ops;
61 };
62 
63 struct vhost_user_connection {
64 	struct vhost_user_socket *vsocket;
65 	int connfd;
66 	int vid;
67 
68 	TAILQ_ENTRY(vhost_user_connection) next;
69 };
70 
71 #define MAX_VHOST_SOCKET 1024
72 struct vhost_user {
73 	struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
74 	struct fdset fdset;
75 	int vsocket_cnt;
76 	pthread_mutex_t mutex;
77 };
78 
79 #define MAX_VIRTIO_BACKLOG 128
80 
81 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
82 static void vhost_user_read_cb(int fd, void *dat, int *remove);
83 static int create_unix_socket(struct vhost_user_socket *vsocket);
84 static int vhost_user_start_client(struct vhost_user_socket *vsocket);
85 
86 static struct vhost_user vhost_user = {
87 	.fdset = {
88 		.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
89 		.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
90 		.fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER,
91 		.num = 0
92 	},
93 	.vsocket_cnt = 0,
94 	.mutex = PTHREAD_MUTEX_INITIALIZER,
95 };
96 
97 /*
98  * return bytes# of read on success or negative val on failure. Update fdnum
99  * with number of fds read.
100  */
101 int
102 read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds,
103 		int *fd_num)
104 {
105 	struct iovec iov;
106 	struct msghdr msgh;
107 	char control[CMSG_SPACE(max_fds * sizeof(int))];
108 	struct cmsghdr *cmsg;
109 	int got_fds = 0;
110 	int ret;
111 
112 	*fd_num = 0;
113 
114 	memset(&msgh, 0, sizeof(msgh));
115 	iov.iov_base = buf;
116 	iov.iov_len  = buflen;
117 
118 	msgh.msg_iov = &iov;
119 	msgh.msg_iovlen = 1;
120 	msgh.msg_control = control;
121 	msgh.msg_controllen = sizeof(control);
122 
123 	ret = recvmsg(sockfd, &msgh, 0);
124 	if (ret <= 0) {
125 		if (ret)
126 			VHOST_LOG_CONFIG(ERR, "recvmsg failed\n");
127 		return ret;
128 	}
129 
130 	if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
131 		VHOST_LOG_CONFIG(ERR, "truncated msg\n");
132 		return -1;
133 	}
134 
135 	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
136 		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
137 		if ((cmsg->cmsg_level == SOL_SOCKET) &&
138 			(cmsg->cmsg_type == SCM_RIGHTS)) {
139 			got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
140 			*fd_num = got_fds;
141 			memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
142 			break;
143 		}
144 	}
145 
146 	/* Clear out unused file descriptors */
147 	while (got_fds < max_fds)
148 		fds[got_fds++] = -1;
149 
150 	return ret;
151 }
152 
153 int
154 send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
155 {
156 
157 	struct iovec iov;
158 	struct msghdr msgh;
159 	size_t fdsize = fd_num * sizeof(int);
160 	char control[CMSG_SPACE(fdsize)];
161 	struct cmsghdr *cmsg;
162 	int ret;
163 
164 	memset(&msgh, 0, sizeof(msgh));
165 	iov.iov_base = buf;
166 	iov.iov_len = buflen;
167 
168 	msgh.msg_iov = &iov;
169 	msgh.msg_iovlen = 1;
170 
171 	if (fds && fd_num > 0) {
172 		msgh.msg_control = control;
173 		msgh.msg_controllen = sizeof(control);
174 		cmsg = CMSG_FIRSTHDR(&msgh);
175 		if (cmsg == NULL) {
176 			VHOST_LOG_CONFIG(ERR, "cmsg == NULL\n");
177 			errno = EINVAL;
178 			return -1;
179 		}
180 		cmsg->cmsg_len = CMSG_LEN(fdsize);
181 		cmsg->cmsg_level = SOL_SOCKET;
182 		cmsg->cmsg_type = SCM_RIGHTS;
183 		memcpy(CMSG_DATA(cmsg), fds, fdsize);
184 	} else {
185 		msgh.msg_control = NULL;
186 		msgh.msg_controllen = 0;
187 	}
188 
189 	do {
190 		ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
191 	} while (ret < 0 && errno == EINTR);
192 
193 	if (ret < 0) {
194 		VHOST_LOG_CONFIG(ERR,  "sendmsg error\n");
195 		return ret;
196 	}
197 
198 	return ret;
199 }
200 
201 static void
202 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
203 {
204 	int vid;
205 	size_t size;
206 	struct vhost_user_connection *conn;
207 	int ret;
208 	struct virtio_net *dev;
209 
210 	if (vsocket == NULL)
211 		return;
212 
213 	conn = malloc(sizeof(*conn));
214 	if (conn == NULL) {
215 		close(fd);
216 		return;
217 	}
218 
219 	vid = vhost_new_device();
220 	if (vid == -1) {
221 		goto err;
222 	}
223 
224 	size = strnlen(vsocket->path, PATH_MAX);
225 	vhost_set_ifname(vid, vsocket->path, size);
226 
227 	vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net);
228 
229 	vhost_attach_vdpa_device(vid, vsocket->vdpa_dev);
230 
231 	if (vsocket->extbuf)
232 		vhost_enable_extbuf(vid);
233 
234 	if (vsocket->linearbuf)
235 		vhost_enable_linearbuf(vid);
236 
237 	if (vsocket->async_copy) {
238 		dev = get_device(vid);
239 
240 		if (dev)
241 			dev->async_copy = 1;
242 	}
243 
244 	VHOST_LOG_CONFIG(INFO, "new device, handle is %d\n", vid);
245 
246 	if (vsocket->notify_ops->new_connection) {
247 		ret = vsocket->notify_ops->new_connection(vid);
248 		if (ret < 0) {
249 			VHOST_LOG_CONFIG(ERR,
250 				"failed to add vhost user connection with fd %d\n",
251 				fd);
252 			goto err_cleanup;
253 		}
254 	}
255 
256 	conn->connfd = fd;
257 	conn->vsocket = vsocket;
258 	conn->vid = vid;
259 	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
260 			NULL, conn);
261 	if (ret < 0) {
262 		VHOST_LOG_CONFIG(ERR,
263 			"failed to add fd %d into vhost server fdset\n",
264 			fd);
265 
266 		if (vsocket->notify_ops->destroy_connection)
267 			vsocket->notify_ops->destroy_connection(conn->vid);
268 
269 		goto err_cleanup;
270 	}
271 
272 	pthread_mutex_lock(&vsocket->conn_mutex);
273 	TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
274 	pthread_mutex_unlock(&vsocket->conn_mutex);
275 
276 	fdset_pipe_notify(&vhost_user.fdset);
277 	return;
278 
279 err_cleanup:
280 	vhost_destroy_device(vid);
281 err:
282 	free(conn);
283 	close(fd);
284 }
285 
286 /* call back when there is new vhost-user connection from client  */
287 static void
288 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
289 {
290 	struct vhost_user_socket *vsocket = dat;
291 
292 	fd = accept(fd, NULL, NULL);
293 	if (fd < 0)
294 		return;
295 
296 	VHOST_LOG_CONFIG(INFO, "new vhost user connection is %d\n", fd);
297 	vhost_user_add_connection(fd, vsocket);
298 }
299 
300 static void
301 vhost_user_read_cb(int connfd, void *dat, int *remove)
302 {
303 	struct vhost_user_connection *conn = dat;
304 	struct vhost_user_socket *vsocket = conn->vsocket;
305 	int ret;
306 
307 	ret = vhost_user_msg_handler(conn->vid, connfd);
308 	if (ret < 0) {
309 		struct virtio_net *dev = get_device(conn->vid);
310 
311 		close(connfd);
312 		*remove = 1;
313 
314 		if (dev)
315 			vhost_destroy_device_notify(dev);
316 
317 		if (vsocket->notify_ops->destroy_connection)
318 			vsocket->notify_ops->destroy_connection(conn->vid);
319 
320 		vhost_destroy_device(conn->vid);
321 
322 		if (vsocket->reconnect) {
323 			create_unix_socket(vsocket);
324 			vhost_user_start_client(vsocket);
325 		}
326 
327 		pthread_mutex_lock(&vsocket->conn_mutex);
328 		TAILQ_REMOVE(&vsocket->conn_list, conn, next);
329 		pthread_mutex_unlock(&vsocket->conn_mutex);
330 
331 		free(conn);
332 	}
333 }
334 
335 static int
336 create_unix_socket(struct vhost_user_socket *vsocket)
337 {
338 	int fd;
339 	struct sockaddr_un *un = &vsocket->un;
340 
341 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
342 	if (fd < 0)
343 		return -1;
344 	VHOST_LOG_CONFIG(INFO, "vhost-user %s: socket created, fd: %d\n",
345 		vsocket->is_server ? "server" : "client", fd);
346 
347 	if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
348 		VHOST_LOG_CONFIG(ERR,
349 			"vhost-user: can't set nonblocking mode for socket, fd: "
350 			"%d (%s)\n", fd, strerror(errno));
351 		close(fd);
352 		return -1;
353 	}
354 
355 	memset(un, 0, sizeof(*un));
356 	un->sun_family = AF_UNIX;
357 	strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
358 	un->sun_path[sizeof(un->sun_path) - 1] = '\0';
359 
360 	vsocket->socket_fd = fd;
361 	return 0;
362 }
363 
364 static int
365 vhost_user_start_server(struct vhost_user_socket *vsocket)
366 {
367 	int ret;
368 	int fd = vsocket->socket_fd;
369 	const char *path = vsocket->path;
370 
371 	/*
372 	 * bind () may fail if the socket file with the same name already
373 	 * exists. But the library obviously should not delete the file
374 	 * provided by the user, since we can not be sure that it is not
375 	 * being used by other applications. Moreover, many applications form
376 	 * socket names based on user input, which is prone to errors.
377 	 *
378 	 * The user must ensure that the socket does not exist before
379 	 * registering the vhost driver in server mode.
380 	 */
381 	ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
382 	if (ret < 0) {
383 		VHOST_LOG_CONFIG(ERR,
384 			"failed to bind to %s: %s; remove it and try again\n",
385 			path, strerror(errno));
386 		goto err;
387 	}
388 	VHOST_LOG_CONFIG(INFO, "bind to %s\n", path);
389 
390 	ret = listen(fd, MAX_VIRTIO_BACKLOG);
391 	if (ret < 0)
392 		goto err;
393 
394 	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
395 		  NULL, vsocket);
396 	if (ret < 0) {
397 		VHOST_LOG_CONFIG(ERR,
398 			"failed to add listen fd %d to vhost server fdset\n",
399 			fd);
400 		goto err;
401 	}
402 
403 	return 0;
404 
405 err:
406 	close(fd);
407 	return -1;
408 }
409 
410 struct vhost_user_reconnect {
411 	struct sockaddr_un un;
412 	int fd;
413 	struct vhost_user_socket *vsocket;
414 
415 	TAILQ_ENTRY(vhost_user_reconnect) next;
416 };
417 
418 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
419 struct vhost_user_reconnect_list {
420 	struct vhost_user_reconnect_tailq_list head;
421 	pthread_mutex_t mutex;
422 };
423 
424 static struct vhost_user_reconnect_list reconn_list;
425 static pthread_t reconn_tid;
426 
427 static int
428 vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
429 {
430 	int ret, flags;
431 
432 	ret = connect(fd, un, sz);
433 	if (ret < 0 && errno != EISCONN)
434 		return -1;
435 
436 	flags = fcntl(fd, F_GETFL, 0);
437 	if (flags < 0) {
438 		VHOST_LOG_CONFIG(ERR,
439 			"can't get flags for connfd %d\n", fd);
440 		return -2;
441 	}
442 	if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
443 		VHOST_LOG_CONFIG(ERR,
444 				"can't disable nonblocking on fd %d\n", fd);
445 		return -2;
446 	}
447 	return 0;
448 }
449 
450 static void *
451 vhost_user_client_reconnect(void *arg __rte_unused)
452 {
453 	int ret;
454 	struct vhost_user_reconnect *reconn, *next;
455 
456 	while (1) {
457 		pthread_mutex_lock(&reconn_list.mutex);
458 
459 		/*
460 		 * An equal implementation of TAILQ_FOREACH_SAFE,
461 		 * which does not exist on all platforms.
462 		 */
463 		for (reconn = TAILQ_FIRST(&reconn_list.head);
464 		     reconn != NULL; reconn = next) {
465 			next = TAILQ_NEXT(reconn, next);
466 
467 			ret = vhost_user_connect_nonblock(reconn->fd,
468 						(struct sockaddr *)&reconn->un,
469 						sizeof(reconn->un));
470 			if (ret == -2) {
471 				close(reconn->fd);
472 				VHOST_LOG_CONFIG(ERR,
473 					"reconnection for fd %d failed\n",
474 					reconn->fd);
475 				goto remove_fd;
476 			}
477 			if (ret == -1)
478 				continue;
479 
480 			VHOST_LOG_CONFIG(INFO,
481 				"%s: connected\n", reconn->vsocket->path);
482 			vhost_user_add_connection(reconn->fd, reconn->vsocket);
483 remove_fd:
484 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
485 			free(reconn);
486 		}
487 
488 		pthread_mutex_unlock(&reconn_list.mutex);
489 		sleep(1);
490 	}
491 
492 	return NULL;
493 }
494 
495 static int
496 vhost_user_reconnect_init(void)
497 {
498 	int ret;
499 
500 	ret = pthread_mutex_init(&reconn_list.mutex, NULL);
501 	if (ret < 0) {
502 		VHOST_LOG_CONFIG(ERR, "failed to initialize mutex");
503 		return ret;
504 	}
505 	TAILQ_INIT(&reconn_list.head);
506 
507 	ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL,
508 			     vhost_user_client_reconnect, NULL);
509 	if (ret != 0) {
510 		VHOST_LOG_CONFIG(ERR, "failed to create reconnect thread");
511 		if (pthread_mutex_destroy(&reconn_list.mutex)) {
512 			VHOST_LOG_CONFIG(ERR,
513 				"failed to destroy reconnect mutex");
514 		}
515 	}
516 
517 	return ret;
518 }
519 
520 static int
521 vhost_user_start_client(struct vhost_user_socket *vsocket)
522 {
523 	int ret;
524 	int fd = vsocket->socket_fd;
525 	const char *path = vsocket->path;
526 	struct vhost_user_reconnect *reconn;
527 
528 	ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&vsocket->un,
529 					  sizeof(vsocket->un));
530 	if (ret == 0) {
531 		vhost_user_add_connection(fd, vsocket);
532 		return 0;
533 	}
534 
535 	VHOST_LOG_CONFIG(WARNING,
536 		"failed to connect to %s: %s\n",
537 		path, strerror(errno));
538 
539 	if (ret == -2 || !vsocket->reconnect) {
540 		close(fd);
541 		return -1;
542 	}
543 
544 	VHOST_LOG_CONFIG(INFO, "%s: reconnecting...\n", path);
545 	reconn = malloc(sizeof(*reconn));
546 	if (reconn == NULL) {
547 		VHOST_LOG_CONFIG(ERR,
548 			"failed to allocate memory for reconnect\n");
549 		close(fd);
550 		return -1;
551 	}
552 	reconn->un = vsocket->un;
553 	reconn->fd = fd;
554 	reconn->vsocket = vsocket;
555 	pthread_mutex_lock(&reconn_list.mutex);
556 	TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
557 	pthread_mutex_unlock(&reconn_list.mutex);
558 
559 	return 0;
560 }
561 
562 static struct vhost_user_socket *
563 find_vhost_user_socket(const char *path)
564 {
565 	int i;
566 
567 	if (path == NULL)
568 		return NULL;
569 
570 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
571 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
572 
573 		if (!strcmp(vsocket->path, path))
574 			return vsocket;
575 	}
576 
577 	return NULL;
578 }
579 
580 int
581 rte_vhost_driver_attach_vdpa_device(const char *path,
582 		struct rte_vdpa_device *dev)
583 {
584 	struct vhost_user_socket *vsocket;
585 
586 	if (dev == NULL || path == NULL)
587 		return -1;
588 
589 	pthread_mutex_lock(&vhost_user.mutex);
590 	vsocket = find_vhost_user_socket(path);
591 	if (vsocket)
592 		vsocket->vdpa_dev = dev;
593 	pthread_mutex_unlock(&vhost_user.mutex);
594 
595 	return vsocket ? 0 : -1;
596 }
597 
598 int
599 rte_vhost_driver_detach_vdpa_device(const char *path)
600 {
601 	struct vhost_user_socket *vsocket;
602 
603 	pthread_mutex_lock(&vhost_user.mutex);
604 	vsocket = find_vhost_user_socket(path);
605 	if (vsocket)
606 		vsocket->vdpa_dev = NULL;
607 	pthread_mutex_unlock(&vhost_user.mutex);
608 
609 	return vsocket ? 0 : -1;
610 }
611 
612 struct rte_vdpa_device *
613 rte_vhost_driver_get_vdpa_device(const char *path)
614 {
615 	struct vhost_user_socket *vsocket;
616 	struct rte_vdpa_device *dev = NULL;
617 
618 	pthread_mutex_lock(&vhost_user.mutex);
619 	vsocket = find_vhost_user_socket(path);
620 	if (vsocket)
621 		dev = vsocket->vdpa_dev;
622 	pthread_mutex_unlock(&vhost_user.mutex);
623 
624 	return dev;
625 }
626 
627 int
628 rte_vhost_driver_disable_features(const char *path, uint64_t features)
629 {
630 	struct vhost_user_socket *vsocket;
631 
632 	pthread_mutex_lock(&vhost_user.mutex);
633 	vsocket = find_vhost_user_socket(path);
634 
635 	/* Note that use_builtin_virtio_net is not affected by this function
636 	 * since callers may want to selectively disable features of the
637 	 * built-in vhost net device backend.
638 	 */
639 
640 	if (vsocket)
641 		vsocket->features &= ~features;
642 	pthread_mutex_unlock(&vhost_user.mutex);
643 
644 	return vsocket ? 0 : -1;
645 }
646 
647 int
648 rte_vhost_driver_enable_features(const char *path, uint64_t features)
649 {
650 	struct vhost_user_socket *vsocket;
651 
652 	pthread_mutex_lock(&vhost_user.mutex);
653 	vsocket = find_vhost_user_socket(path);
654 	if (vsocket) {
655 		if ((vsocket->supported_features & features) != features) {
656 			/*
657 			 * trying to enable features the driver doesn't
658 			 * support.
659 			 */
660 			pthread_mutex_unlock(&vhost_user.mutex);
661 			return -1;
662 		}
663 		vsocket->features |= features;
664 	}
665 	pthread_mutex_unlock(&vhost_user.mutex);
666 
667 	return vsocket ? 0 : -1;
668 }
669 
670 int
671 rte_vhost_driver_set_features(const char *path, uint64_t features)
672 {
673 	struct vhost_user_socket *vsocket;
674 
675 	pthread_mutex_lock(&vhost_user.mutex);
676 	vsocket = find_vhost_user_socket(path);
677 	if (vsocket) {
678 		vsocket->supported_features = features;
679 		vsocket->features = features;
680 
681 		/* Anyone setting feature bits is implementing their own vhost
682 		 * device backend.
683 		 */
684 		vsocket->use_builtin_virtio_net = false;
685 	}
686 	pthread_mutex_unlock(&vhost_user.mutex);
687 
688 	return vsocket ? 0 : -1;
689 }
690 
691 int
692 rte_vhost_driver_get_features(const char *path, uint64_t *features)
693 {
694 	struct vhost_user_socket *vsocket;
695 	uint64_t vdpa_features;
696 	struct rte_vdpa_device *vdpa_dev;
697 	int ret = 0;
698 
699 	pthread_mutex_lock(&vhost_user.mutex);
700 	vsocket = find_vhost_user_socket(path);
701 	if (!vsocket) {
702 		VHOST_LOG_CONFIG(ERR,
703 			"socket file %s is not registered yet.\n", path);
704 		ret = -1;
705 		goto unlock_exit;
706 	}
707 
708 	vdpa_dev = vsocket->vdpa_dev;
709 	if (!vdpa_dev) {
710 		*features = vsocket->features;
711 		goto unlock_exit;
712 	}
713 
714 	if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) {
715 		VHOST_LOG_CONFIG(ERR,
716 				"failed to get vdpa features "
717 				"for socket file %s.\n", path);
718 		ret = -1;
719 		goto unlock_exit;
720 	}
721 
722 	*features = vsocket->features & vdpa_features;
723 
724 unlock_exit:
725 	pthread_mutex_unlock(&vhost_user.mutex);
726 	return ret;
727 }
728 
729 int
730 rte_vhost_driver_set_protocol_features(const char *path,
731 		uint64_t protocol_features)
732 {
733 	struct vhost_user_socket *vsocket;
734 
735 	pthread_mutex_lock(&vhost_user.mutex);
736 	vsocket = find_vhost_user_socket(path);
737 	if (vsocket)
738 		vsocket->protocol_features = protocol_features;
739 	pthread_mutex_unlock(&vhost_user.mutex);
740 	return vsocket ? 0 : -1;
741 }
742 
743 int
744 rte_vhost_driver_get_protocol_features(const char *path,
745 		uint64_t *protocol_features)
746 {
747 	struct vhost_user_socket *vsocket;
748 	uint64_t vdpa_protocol_features;
749 	struct rte_vdpa_device *vdpa_dev;
750 	int ret = 0;
751 
752 	pthread_mutex_lock(&vhost_user.mutex);
753 	vsocket = find_vhost_user_socket(path);
754 	if (!vsocket) {
755 		VHOST_LOG_CONFIG(ERR,
756 			"socket file %s is not registered yet.\n", path);
757 		ret = -1;
758 		goto unlock_exit;
759 	}
760 
761 	vdpa_dev = vsocket->vdpa_dev;
762 	if (!vdpa_dev) {
763 		*protocol_features = vsocket->protocol_features;
764 		goto unlock_exit;
765 	}
766 
767 	if (vdpa_dev->ops->get_protocol_features(vdpa_dev,
768 				&vdpa_protocol_features) < 0) {
769 		VHOST_LOG_CONFIG(ERR,
770 				"failed to get vdpa protocol features "
771 				"for socket file %s.\n", path);
772 		ret = -1;
773 		goto unlock_exit;
774 	}
775 
776 	*protocol_features = vsocket->protocol_features
777 		& vdpa_protocol_features;
778 
779 unlock_exit:
780 	pthread_mutex_unlock(&vhost_user.mutex);
781 	return ret;
782 }
783 
784 int
785 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
786 {
787 	struct vhost_user_socket *vsocket;
788 	uint32_t vdpa_queue_num;
789 	struct rte_vdpa_device *vdpa_dev;
790 	int ret = 0;
791 
792 	pthread_mutex_lock(&vhost_user.mutex);
793 	vsocket = find_vhost_user_socket(path);
794 	if (!vsocket) {
795 		VHOST_LOG_CONFIG(ERR,
796 			"socket file %s is not registered yet.\n", path);
797 		ret = -1;
798 		goto unlock_exit;
799 	}
800 
801 	vdpa_dev = vsocket->vdpa_dev;
802 	if (!vdpa_dev) {
803 		*queue_num = VHOST_MAX_QUEUE_PAIRS;
804 		goto unlock_exit;
805 	}
806 
807 	if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) {
808 		VHOST_LOG_CONFIG(ERR,
809 				"failed to get vdpa queue number "
810 				"for socket file %s.\n", path);
811 		ret = -1;
812 		goto unlock_exit;
813 	}
814 
815 	*queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num);
816 
817 unlock_exit:
818 	pthread_mutex_unlock(&vhost_user.mutex);
819 	return ret;
820 }
821 
822 static void
823 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
824 {
825 	if (vsocket && vsocket->path) {
826 		free(vsocket->path);
827 		vsocket->path = NULL;
828 	}
829 
830 	if (vsocket) {
831 		free(vsocket);
832 		vsocket = NULL;
833 	}
834 }
835 
836 /*
837  * Register a new vhost-user socket; here we could act as server
838  * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
839  * is set.
840  */
841 int
842 rte_vhost_driver_register(const char *path, uint64_t flags)
843 {
844 	int ret = -1;
845 	struct vhost_user_socket *vsocket;
846 
847 	if (!path)
848 		return -1;
849 
850 	pthread_mutex_lock(&vhost_user.mutex);
851 
852 	if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
853 		VHOST_LOG_CONFIG(ERR,
854 			"error: the number of vhost sockets reaches maximum\n");
855 		goto out;
856 	}
857 
858 	vsocket = malloc(sizeof(struct vhost_user_socket));
859 	if (!vsocket)
860 		goto out;
861 	memset(vsocket, 0, sizeof(struct vhost_user_socket));
862 	vsocket->path = strdup(path);
863 	if (vsocket->path == NULL) {
864 		VHOST_LOG_CONFIG(ERR,
865 			"error: failed to copy socket path string\n");
866 		vhost_user_socket_mem_free(vsocket);
867 		goto out;
868 	}
869 	TAILQ_INIT(&vsocket->conn_list);
870 	ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
871 	if (ret) {
872 		VHOST_LOG_CONFIG(ERR,
873 			"error: failed to init connection mutex\n");
874 		goto out_free;
875 	}
876 	vsocket->vdpa_dev = NULL;
877 	vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT;
878 	vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT;
879 	vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
880 
881 	if (vsocket->async_copy &&
882 		(flags & (RTE_VHOST_USER_IOMMU_SUPPORT |
883 		RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
884 		VHOST_LOG_CONFIG(ERR, "error: enabling async copy and IOMMU "
885 			"or post-copy feature simultaneously is not "
886 			"supported\n");
887 		goto out_mutex;
888 	}
889 
890 	/*
891 	 * Set the supported features correctly for the builtin vhost-user
892 	 * net driver.
893 	 *
894 	 * Applications know nothing about features the builtin virtio net
895 	 * driver (virtio_net.c) supports, thus it's not possible for them
896 	 * to invoke rte_vhost_driver_set_features(). To workaround it, here
897 	 * we set it unconditionally. If the application want to implement
898 	 * another vhost-user driver (say SCSI), it should call the
899 	 * rte_vhost_driver_set_features(), which will overwrite following
900 	 * two values.
901 	 */
902 	vsocket->use_builtin_virtio_net = true;
903 	vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
904 	vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
905 	vsocket->protocol_features  = VHOST_USER_PROTOCOL_FEATURES;
906 
907 	if (vsocket->async_copy) {
908 		vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL);
909 		vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL);
910 		VHOST_LOG_CONFIG(INFO,
911 			"Logging feature is disabled in async copy mode\n");
912 	}
913 
914 	/*
915 	 * We'll not be able to receive a buffer from guest in linear mode
916 	 * without external buffer if it will not fit in a single mbuf, which is
917 	 * likely if segmentation offloading enabled.
918 	 */
919 	if (vsocket->linearbuf && !vsocket->extbuf) {
920 		uint64_t seg_offload_features =
921 				(1ULL << VIRTIO_NET_F_HOST_TSO4) |
922 				(1ULL << VIRTIO_NET_F_HOST_TSO6) |
923 				(1ULL << VIRTIO_NET_F_HOST_UFO);
924 
925 		VHOST_LOG_CONFIG(INFO,
926 			"Linear buffers requested without external buffers, "
927 			"disabling host segmentation offloading support\n");
928 		vsocket->supported_features &= ~seg_offload_features;
929 		vsocket->features &= ~seg_offload_features;
930 	}
931 
932 	if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
933 		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
934 		vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
935 	}
936 
937 	if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
938 		vsocket->protocol_features &=
939 			~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
940 	} else {
941 #ifndef RTE_LIBRTE_VHOST_POSTCOPY
942 		VHOST_LOG_CONFIG(ERR,
943 			"Postcopy requested but not compiled\n");
944 		ret = -1;
945 		goto out_mutex;
946 #endif
947 	}
948 
949 	if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
950 		vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
951 		if (vsocket->reconnect && reconn_tid == 0) {
952 			if (vhost_user_reconnect_init() != 0)
953 				goto out_mutex;
954 		}
955 	} else {
956 		vsocket->is_server = true;
957 	}
958 	ret = create_unix_socket(vsocket);
959 	if (ret < 0) {
960 		goto out_mutex;
961 	}
962 
963 	vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
964 
965 	pthread_mutex_unlock(&vhost_user.mutex);
966 	return ret;
967 
968 out_mutex:
969 	if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
970 		VHOST_LOG_CONFIG(ERR,
971 			"error: failed to destroy connection mutex\n");
972 	}
973 out_free:
974 	vhost_user_socket_mem_free(vsocket);
975 out:
976 	pthread_mutex_unlock(&vhost_user.mutex);
977 
978 	return ret;
979 }
980 
981 static bool
982 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
983 {
984 	int found = false;
985 	struct vhost_user_reconnect *reconn, *next;
986 
987 	pthread_mutex_lock(&reconn_list.mutex);
988 
989 	for (reconn = TAILQ_FIRST(&reconn_list.head);
990 	     reconn != NULL; reconn = next) {
991 		next = TAILQ_NEXT(reconn, next);
992 
993 		if (reconn->vsocket == vsocket) {
994 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
995 			close(reconn->fd);
996 			free(reconn);
997 			found = true;
998 			break;
999 		}
1000 	}
1001 	pthread_mutex_unlock(&reconn_list.mutex);
1002 	return found;
1003 }
1004 
1005 /**
1006  * Unregister the specified vhost socket
1007  */
1008 int
1009 rte_vhost_driver_unregister(const char *path)
1010 {
1011 	int i;
1012 	int count;
1013 	struct vhost_user_connection *conn, *next;
1014 
1015 	if (path == NULL)
1016 		return -1;
1017 
1018 again:
1019 	pthread_mutex_lock(&vhost_user.mutex);
1020 
1021 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
1022 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
1023 
1024 		if (!strcmp(vsocket->path, path)) {
1025 			pthread_mutex_lock(&vsocket->conn_mutex);
1026 			for (conn = TAILQ_FIRST(&vsocket->conn_list);
1027 			     conn != NULL;
1028 			     conn = next) {
1029 				next = TAILQ_NEXT(conn, next);
1030 
1031 				/*
1032 				 * If r/wcb is executing, release vsocket's
1033 				 * conn_mutex and vhost_user's mutex locks, and
1034 				 * try again since the r/wcb may use the
1035 				 * conn_mutex and mutex locks.
1036 				 */
1037 				if (fdset_try_del(&vhost_user.fdset,
1038 						  conn->connfd) == -1) {
1039 					pthread_mutex_unlock(
1040 							&vsocket->conn_mutex);
1041 					pthread_mutex_unlock(&vhost_user.mutex);
1042 					goto again;
1043 				}
1044 
1045 				VHOST_LOG_CONFIG(INFO,
1046 					"free connfd = %d for device '%s'\n",
1047 					conn->connfd, path);
1048 				close(conn->connfd);
1049 				vhost_destroy_device(conn->vid);
1050 				TAILQ_REMOVE(&vsocket->conn_list, conn, next);
1051 				free(conn);
1052 			}
1053 			pthread_mutex_unlock(&vsocket->conn_mutex);
1054 
1055 			if (vsocket->is_server) {
1056 				/*
1057 				 * If r/wcb is executing, release vhost_user's
1058 				 * mutex lock, and try again since the r/wcb
1059 				 * may use the mutex lock.
1060 				 */
1061 				if (fdset_try_del(&vhost_user.fdset,
1062 						vsocket->socket_fd) == -1) {
1063 					pthread_mutex_unlock(&vhost_user.mutex);
1064 					goto again;
1065 				}
1066 
1067 				close(vsocket->socket_fd);
1068 				unlink(path);
1069 			} else if (vsocket->reconnect) {
1070 				vhost_user_remove_reconnect(vsocket);
1071 			}
1072 
1073 			pthread_mutex_destroy(&vsocket->conn_mutex);
1074 			vhost_user_socket_mem_free(vsocket);
1075 
1076 			count = --vhost_user.vsocket_cnt;
1077 			vhost_user.vsockets[i] = vhost_user.vsockets[count];
1078 			vhost_user.vsockets[count] = NULL;
1079 			pthread_mutex_unlock(&vhost_user.mutex);
1080 
1081 			return 0;
1082 		}
1083 	}
1084 	pthread_mutex_unlock(&vhost_user.mutex);
1085 
1086 	return -1;
1087 }
1088 
1089 /*
1090  * Register ops so that we can add/remove device to data core.
1091  */
1092 int
1093 rte_vhost_driver_callback_register(const char *path,
1094 	struct vhost_device_ops const * const ops)
1095 {
1096 	struct vhost_user_socket *vsocket;
1097 
1098 	pthread_mutex_lock(&vhost_user.mutex);
1099 	vsocket = find_vhost_user_socket(path);
1100 	if (vsocket)
1101 		vsocket->notify_ops = ops;
1102 	pthread_mutex_unlock(&vhost_user.mutex);
1103 
1104 	return vsocket ? 0 : -1;
1105 }
1106 
1107 struct vhost_device_ops const *
1108 vhost_driver_callback_get(const char *path)
1109 {
1110 	struct vhost_user_socket *vsocket;
1111 
1112 	pthread_mutex_lock(&vhost_user.mutex);
1113 	vsocket = find_vhost_user_socket(path);
1114 	pthread_mutex_unlock(&vhost_user.mutex);
1115 
1116 	return vsocket ? vsocket->notify_ops : NULL;
1117 }
1118 
1119 int
1120 rte_vhost_driver_start(const char *path)
1121 {
1122 	struct vhost_user_socket *vsocket;
1123 	static pthread_t fdset_tid;
1124 
1125 	pthread_mutex_lock(&vhost_user.mutex);
1126 	vsocket = find_vhost_user_socket(path);
1127 	pthread_mutex_unlock(&vhost_user.mutex);
1128 
1129 	if (!vsocket)
1130 		return -1;
1131 
1132 	if (fdset_tid == 0) {
1133 		/**
1134 		 * create a pipe which will be waited by poll and notified to
1135 		 * rebuild the wait list of poll.
1136 		 */
1137 		if (fdset_pipe_init(&vhost_user.fdset) < 0) {
1138 			VHOST_LOG_CONFIG(ERR,
1139 				"failed to create pipe for vhost fdset\n");
1140 			return -1;
1141 		}
1142 
1143 		int ret = rte_ctrl_thread_create(&fdset_tid,
1144 			"vhost-events", NULL, fdset_event_dispatch,
1145 			&vhost_user.fdset);
1146 		if (ret != 0) {
1147 			VHOST_LOG_CONFIG(ERR,
1148 				"failed to create fdset handling thread");
1149 
1150 			fdset_pipe_uninit(&vhost_user.fdset);
1151 			return -1;
1152 		}
1153 	}
1154 
1155 	if (vsocket->is_server)
1156 		return vhost_user_start_server(vsocket);
1157 	else
1158 		return vhost_user_start_client(vsocket);
1159 }
1160