xref: /dpdk/lib/vhost/socket.c (revision decb35d890209f603b01c1d23f35995bd51228fc)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <string.h>
11 #include <sys/socket.h>
12 #include <sys/un.h>
13 #include <sys/queue.h>
14 #include <errno.h>
15 #include <fcntl.h>
16 #include <pthread.h>
17 
18 #include <rte_log.h>
19 
20 #include "fd_man.h"
21 #include "vhost.h"
22 #include "vhost_user.h"
23 
24 
25 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
26 
27 /*
28  * Every time rte_vhost_driver_register() is invoked, an associated
29  * vhost_user_socket struct will be created.
30  */
31 struct vhost_user_socket {
32 	struct vhost_user_connection_list conn_list;
33 	pthread_mutex_t conn_mutex;
34 	char *path;
35 	int socket_fd;
36 	struct sockaddr_un un;
37 	bool is_server;
38 	bool reconnect;
39 	bool iommu_support;
40 	bool use_builtin_virtio_net;
41 	bool extbuf;
42 	bool linearbuf;
43 	bool async_copy;
44 	bool net_compliant_ol_flags;
45 	bool stats_enabled;
46 
47 	/*
48 	 * The "supported_features" indicates the feature bits the
49 	 * vhost driver supports. The "features" indicates the feature
50 	 * bits after the rte_vhost_driver_features_disable/enable().
51 	 * It is also the final feature bits used for vhost-user
52 	 * features negotiation.
53 	 */
54 	uint64_t supported_features;
55 	uint64_t features;
56 
57 	uint64_t protocol_features;
58 
59 	struct rte_vdpa_device *vdpa_dev;
60 
61 	struct rte_vhost_device_ops const *notify_ops;
62 };
63 
64 struct vhost_user_connection {
65 	struct vhost_user_socket *vsocket;
66 	int connfd;
67 	int vid;
68 
69 	TAILQ_ENTRY(vhost_user_connection) next;
70 };
71 
72 #define MAX_VHOST_SOCKET 1024
73 struct vhost_user {
74 	struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
75 	struct fdset fdset;
76 	int vsocket_cnt;
77 	pthread_mutex_t mutex;
78 };
79 
80 #define MAX_VIRTIO_BACKLOG 128
81 
82 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
83 static void vhost_user_read_cb(int fd, void *dat, int *remove);
84 static int create_unix_socket(struct vhost_user_socket *vsocket);
85 static int vhost_user_start_client(struct vhost_user_socket *vsocket);
86 
87 static struct vhost_user vhost_user = {
88 	.fdset = {
89 		.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
90 		.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
91 		.fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER,
92 		.num = 0
93 	},
94 	.vsocket_cnt = 0,
95 	.mutex = PTHREAD_MUTEX_INITIALIZER,
96 };
97 
98 /*
99  * return bytes# of read on success or negative val on failure. Update fdnum
100  * with number of fds read.
101  */
102 int
103 read_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int max_fds,
104 		int *fd_num)
105 {
106 	struct iovec iov;
107 	struct msghdr msgh;
108 	char control[CMSG_SPACE(max_fds * sizeof(int))];
109 	struct cmsghdr *cmsg;
110 	int got_fds = 0;
111 	int ret;
112 
113 	*fd_num = 0;
114 
115 	memset(&msgh, 0, sizeof(msgh));
116 	iov.iov_base = buf;
117 	iov.iov_len  = buflen;
118 
119 	msgh.msg_iov = &iov;
120 	msgh.msg_iovlen = 1;
121 	msgh.msg_control = control;
122 	msgh.msg_controllen = sizeof(control);
123 
124 	ret = recvmsg(sockfd, &msgh, 0);
125 	if (ret <= 0) {
126 		if (ret)
127 			VHOST_LOG_CONFIG(ifname, ERR, "recvmsg failed on fd %d (%s)\n",
128 				sockfd, strerror(errno));
129 		return ret;
130 	}
131 
132 	if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
133 		VHOST_LOG_CONFIG(ifname, ERR, "truncated msg (fd %d)\n", sockfd);
134 		return -1;
135 	}
136 
137 	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
138 		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
139 		if ((cmsg->cmsg_level == SOL_SOCKET) &&
140 			(cmsg->cmsg_type == SCM_RIGHTS)) {
141 			got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
142 			*fd_num = got_fds;
143 			memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
144 			break;
145 		}
146 	}
147 
148 	/* Clear out unused file descriptors */
149 	while (got_fds < max_fds)
150 		fds[got_fds++] = -1;
151 
152 	return ret;
153 }
154 
155 int
156 send_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int fd_num)
157 {
158 
159 	struct iovec iov;
160 	struct msghdr msgh;
161 	size_t fdsize = fd_num * sizeof(int);
162 	char control[CMSG_SPACE(fdsize)];
163 	struct cmsghdr *cmsg;
164 	int ret;
165 
166 	memset(&msgh, 0, sizeof(msgh));
167 	iov.iov_base = buf;
168 	iov.iov_len = buflen;
169 
170 	msgh.msg_iov = &iov;
171 	msgh.msg_iovlen = 1;
172 
173 	if (fds && fd_num > 0) {
174 		msgh.msg_control = control;
175 		msgh.msg_controllen = sizeof(control);
176 		cmsg = CMSG_FIRSTHDR(&msgh);
177 		if (cmsg == NULL) {
178 			VHOST_LOG_CONFIG(ifname, ERR, "cmsg == NULL\n");
179 			errno = EINVAL;
180 			return -1;
181 		}
182 		cmsg->cmsg_len = CMSG_LEN(fdsize);
183 		cmsg->cmsg_level = SOL_SOCKET;
184 		cmsg->cmsg_type = SCM_RIGHTS;
185 		memcpy(CMSG_DATA(cmsg), fds, fdsize);
186 	} else {
187 		msgh.msg_control = NULL;
188 		msgh.msg_controllen = 0;
189 	}
190 
191 	do {
192 		ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
193 	} while (ret < 0 && errno == EINTR);
194 
195 	if (ret < 0) {
196 		VHOST_LOG_CONFIG(ifname, ERR, "sendmsg error on fd %d (%s)\n",
197 			sockfd, strerror(errno));
198 		return ret;
199 	}
200 
201 	return ret;
202 }
203 
204 static void
205 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
206 {
207 	int vid;
208 	size_t size;
209 	struct vhost_user_connection *conn;
210 	int ret;
211 	struct virtio_net *dev;
212 
213 	if (vsocket == NULL)
214 		return;
215 
216 	conn = malloc(sizeof(*conn));
217 	if (conn == NULL) {
218 		close(fd);
219 		return;
220 	}
221 
222 	vid = vhost_new_device();
223 	if (vid == -1) {
224 		goto err;
225 	}
226 
227 	size = strnlen(vsocket->path, PATH_MAX);
228 	vhost_set_ifname(vid, vsocket->path, size);
229 
230 	vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net,
231 		vsocket->net_compliant_ol_flags, vsocket->stats_enabled);
232 
233 	vhost_attach_vdpa_device(vid, vsocket->vdpa_dev);
234 
235 	if (vsocket->extbuf)
236 		vhost_enable_extbuf(vid);
237 
238 	if (vsocket->linearbuf)
239 		vhost_enable_linearbuf(vid);
240 
241 	if (vsocket->async_copy) {
242 		dev = get_device(vid);
243 
244 		if (dev)
245 			dev->async_copy = 1;
246 	}
247 
248 	VHOST_LOG_CONFIG(vsocket->path, INFO, "new device, handle is %d\n", vid);
249 
250 	if (vsocket->notify_ops->new_connection) {
251 		ret = vsocket->notify_ops->new_connection(vid);
252 		if (ret < 0) {
253 			VHOST_LOG_CONFIG(vsocket->path, ERR,
254 				"failed to add vhost user connection with fd %d\n",
255 				fd);
256 			goto err_cleanup;
257 		}
258 	}
259 
260 	conn->connfd = fd;
261 	conn->vsocket = vsocket;
262 	conn->vid = vid;
263 	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
264 			NULL, conn);
265 	if (ret < 0) {
266 		VHOST_LOG_CONFIG(vsocket->path, ERR,
267 			"failed to add fd %d into vhost server fdset\n",
268 			fd);
269 
270 		if (vsocket->notify_ops->destroy_connection)
271 			vsocket->notify_ops->destroy_connection(conn->vid);
272 
273 		goto err_cleanup;
274 	}
275 
276 	pthread_mutex_lock(&vsocket->conn_mutex);
277 	TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
278 	pthread_mutex_unlock(&vsocket->conn_mutex);
279 
280 	fdset_pipe_notify(&vhost_user.fdset);
281 	return;
282 
283 err_cleanup:
284 	vhost_destroy_device(vid);
285 err:
286 	free(conn);
287 	close(fd);
288 }
289 
290 /* call back when there is new vhost-user connection from client  */
291 static void
292 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
293 {
294 	struct vhost_user_socket *vsocket = dat;
295 
296 	fd = accept(fd, NULL, NULL);
297 	if (fd < 0)
298 		return;
299 
300 	VHOST_LOG_CONFIG(vsocket->path, INFO, "new vhost user connection is %d\n", fd);
301 	vhost_user_add_connection(fd, vsocket);
302 }
303 
304 static void
305 vhost_user_read_cb(int connfd, void *dat, int *remove)
306 {
307 	struct vhost_user_connection *conn = dat;
308 	struct vhost_user_socket *vsocket = conn->vsocket;
309 	int ret;
310 
311 	ret = vhost_user_msg_handler(conn->vid, connfd);
312 	if (ret < 0) {
313 		struct virtio_net *dev = get_device(conn->vid);
314 
315 		close(connfd);
316 		*remove = 1;
317 
318 		if (dev)
319 			vhost_destroy_device_notify(dev);
320 
321 		if (vsocket->notify_ops->destroy_connection)
322 			vsocket->notify_ops->destroy_connection(conn->vid);
323 
324 		vhost_destroy_device(conn->vid);
325 
326 		if (vsocket->reconnect) {
327 			create_unix_socket(vsocket);
328 			vhost_user_start_client(vsocket);
329 		}
330 
331 		pthread_mutex_lock(&vsocket->conn_mutex);
332 		TAILQ_REMOVE(&vsocket->conn_list, conn, next);
333 		pthread_mutex_unlock(&vsocket->conn_mutex);
334 
335 		free(conn);
336 	}
337 }
338 
339 static int
340 create_unix_socket(struct vhost_user_socket *vsocket)
341 {
342 	int fd;
343 	struct sockaddr_un *un = &vsocket->un;
344 
345 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
346 	if (fd < 0)
347 		return -1;
348 	VHOST_LOG_CONFIG(vsocket->path, INFO, "vhost-user %s: socket created, fd: %d\n",
349 		vsocket->is_server ? "server" : "client", fd);
350 
351 	if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
352 		VHOST_LOG_CONFIG(vsocket->path, ERR,
353 			"vhost-user: can't set nonblocking mode for socket, fd: %d (%s)\n",
354 			fd, strerror(errno));
355 		close(fd);
356 		return -1;
357 	}
358 
359 	memset(un, 0, sizeof(*un));
360 	un->sun_family = AF_UNIX;
361 	strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
362 	un->sun_path[sizeof(un->sun_path) - 1] = '\0';
363 
364 	vsocket->socket_fd = fd;
365 	return 0;
366 }
367 
368 static int
369 vhost_user_start_server(struct vhost_user_socket *vsocket)
370 {
371 	int ret;
372 	int fd = vsocket->socket_fd;
373 	const char *path = vsocket->path;
374 
375 	/*
376 	 * bind () may fail if the socket file with the same name already
377 	 * exists. But the library obviously should not delete the file
378 	 * provided by the user, since we can not be sure that it is not
379 	 * being used by other applications. Moreover, many applications form
380 	 * socket names based on user input, which is prone to errors.
381 	 *
382 	 * The user must ensure that the socket does not exist before
383 	 * registering the vhost driver in server mode.
384 	 */
385 	ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
386 	if (ret < 0) {
387 		VHOST_LOG_CONFIG(path, ERR, "failed to bind: %s; remove it and try again\n",
388 			strerror(errno));
389 		goto err;
390 	}
391 	VHOST_LOG_CONFIG(path, INFO, "binding succeeded\n");
392 
393 	ret = listen(fd, MAX_VIRTIO_BACKLOG);
394 	if (ret < 0)
395 		goto err;
396 
397 	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
398 		  NULL, vsocket);
399 	if (ret < 0) {
400 		VHOST_LOG_CONFIG(path, ERR, "failed to add listen fd %d to vhost server fdset\n",
401 			fd);
402 		goto err;
403 	}
404 
405 	return 0;
406 
407 err:
408 	close(fd);
409 	return -1;
410 }
411 
412 struct vhost_user_reconnect {
413 	struct sockaddr_un un;
414 	int fd;
415 	struct vhost_user_socket *vsocket;
416 
417 	TAILQ_ENTRY(vhost_user_reconnect) next;
418 };
419 
420 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
421 struct vhost_user_reconnect_list {
422 	struct vhost_user_reconnect_tailq_list head;
423 	pthread_mutex_t mutex;
424 };
425 
426 static struct vhost_user_reconnect_list reconn_list;
427 static pthread_t reconn_tid;
428 
429 static int
430 vhost_user_connect_nonblock(char *path, int fd, struct sockaddr *un, size_t sz)
431 {
432 	int ret, flags;
433 
434 	ret = connect(fd, un, sz);
435 	if (ret < 0 && errno != EISCONN)
436 		return -1;
437 
438 	flags = fcntl(fd, F_GETFL, 0);
439 	if (flags < 0) {
440 		VHOST_LOG_CONFIG(path, ERR, "can't get flags for connfd %d (%s)\n",
441 			fd, strerror(errno));
442 		return -2;
443 	}
444 	if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
445 		VHOST_LOG_CONFIG(path, ERR, "can't disable nonblocking on fd %d\n", fd);
446 		return -2;
447 	}
448 	return 0;
449 }
450 
451 static void *
452 vhost_user_client_reconnect(void *arg __rte_unused)
453 {
454 	int ret;
455 	struct vhost_user_reconnect *reconn, *next;
456 
457 	while (1) {
458 		pthread_mutex_lock(&reconn_list.mutex);
459 
460 		/*
461 		 * An equal implementation of TAILQ_FOREACH_SAFE,
462 		 * which does not exist on all platforms.
463 		 */
464 		for (reconn = TAILQ_FIRST(&reconn_list.head);
465 		     reconn != NULL; reconn = next) {
466 			next = TAILQ_NEXT(reconn, next);
467 
468 			ret = vhost_user_connect_nonblock(reconn->vsocket->path, reconn->fd,
469 						(struct sockaddr *)&reconn->un,
470 						sizeof(reconn->un));
471 			if (ret == -2) {
472 				close(reconn->fd);
473 				VHOST_LOG_CONFIG(reconn->vsocket->path, ERR,
474 					"reconnection for fd %d failed\n",
475 					reconn->fd);
476 				goto remove_fd;
477 			}
478 			if (ret == -1)
479 				continue;
480 
481 			VHOST_LOG_CONFIG(reconn->vsocket->path, INFO, "connected\n");
482 			vhost_user_add_connection(reconn->fd, reconn->vsocket);
483 remove_fd:
484 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
485 			free(reconn);
486 		}
487 
488 		pthread_mutex_unlock(&reconn_list.mutex);
489 		sleep(1);
490 	}
491 
492 	return NULL;
493 }
494 
495 static int
496 vhost_user_reconnect_init(void)
497 {
498 	int ret;
499 
500 	ret = pthread_mutex_init(&reconn_list.mutex, NULL);
501 	if (ret < 0) {
502 		VHOST_LOG_CONFIG("thread", ERR, "%s: failed to initialize mutex\n", __func__);
503 		return ret;
504 	}
505 	TAILQ_INIT(&reconn_list.head);
506 
507 	ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL,
508 			     vhost_user_client_reconnect, NULL);
509 	if (ret != 0) {
510 		VHOST_LOG_CONFIG("thread", ERR, "failed to create reconnect thread\n");
511 		if (pthread_mutex_destroy(&reconn_list.mutex))
512 			VHOST_LOG_CONFIG("thread", ERR,
513 				"%s: failed to destroy reconnect mutex\n",
514 				__func__);
515 	}
516 
517 	return ret;
518 }
519 
520 static int
521 vhost_user_start_client(struct vhost_user_socket *vsocket)
522 {
523 	int ret;
524 	int fd = vsocket->socket_fd;
525 	const char *path = vsocket->path;
526 	struct vhost_user_reconnect *reconn;
527 
528 	ret = vhost_user_connect_nonblock(vsocket->path, fd, (struct sockaddr *)&vsocket->un,
529 					  sizeof(vsocket->un));
530 	if (ret == 0) {
531 		vhost_user_add_connection(fd, vsocket);
532 		return 0;
533 	}
534 
535 	VHOST_LOG_CONFIG(path, WARNING, "failed to connect: %s\n", strerror(errno));
536 
537 	if (ret == -2 || !vsocket->reconnect) {
538 		close(fd);
539 		return -1;
540 	}
541 
542 	VHOST_LOG_CONFIG(path, INFO, "reconnecting...\n");
543 	reconn = malloc(sizeof(*reconn));
544 	if (reconn == NULL) {
545 		VHOST_LOG_CONFIG(path, ERR, "failed to allocate memory for reconnect\n");
546 		close(fd);
547 		return -1;
548 	}
549 	reconn->un = vsocket->un;
550 	reconn->fd = fd;
551 	reconn->vsocket = vsocket;
552 	pthread_mutex_lock(&reconn_list.mutex);
553 	TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
554 	pthread_mutex_unlock(&reconn_list.mutex);
555 
556 	return 0;
557 }
558 
559 static struct vhost_user_socket *
560 find_vhost_user_socket(const char *path)
561 {
562 	int i;
563 
564 	if (path == NULL)
565 		return NULL;
566 
567 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
568 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
569 
570 		if (!strcmp(vsocket->path, path))
571 			return vsocket;
572 	}
573 
574 	return NULL;
575 }
576 
577 int
578 rte_vhost_driver_attach_vdpa_device(const char *path,
579 		struct rte_vdpa_device *dev)
580 {
581 	struct vhost_user_socket *vsocket;
582 
583 	if (dev == NULL || path == NULL)
584 		return -1;
585 
586 	pthread_mutex_lock(&vhost_user.mutex);
587 	vsocket = find_vhost_user_socket(path);
588 	if (vsocket)
589 		vsocket->vdpa_dev = dev;
590 	pthread_mutex_unlock(&vhost_user.mutex);
591 
592 	return vsocket ? 0 : -1;
593 }
594 
595 int
596 rte_vhost_driver_detach_vdpa_device(const char *path)
597 {
598 	struct vhost_user_socket *vsocket;
599 
600 	pthread_mutex_lock(&vhost_user.mutex);
601 	vsocket = find_vhost_user_socket(path);
602 	if (vsocket)
603 		vsocket->vdpa_dev = NULL;
604 	pthread_mutex_unlock(&vhost_user.mutex);
605 
606 	return vsocket ? 0 : -1;
607 }
608 
609 struct rte_vdpa_device *
610 rte_vhost_driver_get_vdpa_device(const char *path)
611 {
612 	struct vhost_user_socket *vsocket;
613 	struct rte_vdpa_device *dev = NULL;
614 
615 	pthread_mutex_lock(&vhost_user.mutex);
616 	vsocket = find_vhost_user_socket(path);
617 	if (vsocket)
618 		dev = vsocket->vdpa_dev;
619 	pthread_mutex_unlock(&vhost_user.mutex);
620 
621 	return dev;
622 }
623 
624 int
625 rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
626 {
627 	struct vhost_user_socket *vsocket;
628 	struct rte_vdpa_device *vdpa_dev;
629 	uint32_t vdpa_type = 0;
630 	int ret = 0;
631 
632 	pthread_mutex_lock(&vhost_user.mutex);
633 	vsocket = find_vhost_user_socket(path);
634 	if (!vsocket) {
635 		VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n");
636 		ret = -1;
637 		goto unlock_exit;
638 	}
639 
640 	vdpa_dev = vsocket->vdpa_dev;
641 	if (!vdpa_dev) {
642 		ret = -1;
643 		goto unlock_exit;
644 	}
645 
646 	if (vdpa_dev->ops->get_dev_type) {
647 		ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
648 		if (ret) {
649 			VHOST_LOG_CONFIG(path, ERR,
650 				"failed to get vdpa dev type for socket file.\n");
651 			ret = -1;
652 			goto unlock_exit;
653 		}
654 	} else {
655 		vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
656 	}
657 
658 	*type = vdpa_type;
659 
660 unlock_exit:
661 	pthread_mutex_unlock(&vhost_user.mutex);
662 	return ret;
663 }
664 
665 int
666 rte_vhost_driver_disable_features(const char *path, uint64_t features)
667 {
668 	struct vhost_user_socket *vsocket;
669 
670 	pthread_mutex_lock(&vhost_user.mutex);
671 	vsocket = find_vhost_user_socket(path);
672 
673 	/* Note that use_builtin_virtio_net is not affected by this function
674 	 * since callers may want to selectively disable features of the
675 	 * built-in vhost net device backend.
676 	 */
677 
678 	if (vsocket)
679 		vsocket->features &= ~features;
680 	pthread_mutex_unlock(&vhost_user.mutex);
681 
682 	return vsocket ? 0 : -1;
683 }
684 
685 int
686 rte_vhost_driver_enable_features(const char *path, uint64_t features)
687 {
688 	struct vhost_user_socket *vsocket;
689 
690 	pthread_mutex_lock(&vhost_user.mutex);
691 	vsocket = find_vhost_user_socket(path);
692 	if (vsocket) {
693 		if ((vsocket->supported_features & features) != features) {
694 			/*
695 			 * trying to enable features the driver doesn't
696 			 * support.
697 			 */
698 			pthread_mutex_unlock(&vhost_user.mutex);
699 			return -1;
700 		}
701 		vsocket->features |= features;
702 	}
703 	pthread_mutex_unlock(&vhost_user.mutex);
704 
705 	return vsocket ? 0 : -1;
706 }
707 
708 int
709 rte_vhost_driver_set_features(const char *path, uint64_t features)
710 {
711 	struct vhost_user_socket *vsocket;
712 
713 	pthread_mutex_lock(&vhost_user.mutex);
714 	vsocket = find_vhost_user_socket(path);
715 	if (vsocket) {
716 		vsocket->supported_features = features;
717 		vsocket->features = features;
718 
719 		/* Anyone setting feature bits is implementing their own vhost
720 		 * device backend.
721 		 */
722 		vsocket->use_builtin_virtio_net = false;
723 	}
724 	pthread_mutex_unlock(&vhost_user.mutex);
725 
726 	return vsocket ? 0 : -1;
727 }
728 
729 int
730 rte_vhost_driver_get_features(const char *path, uint64_t *features)
731 {
732 	struct vhost_user_socket *vsocket;
733 	uint64_t vdpa_features;
734 	struct rte_vdpa_device *vdpa_dev;
735 	int ret = 0;
736 
737 	pthread_mutex_lock(&vhost_user.mutex);
738 	vsocket = find_vhost_user_socket(path);
739 	if (!vsocket) {
740 		VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n");
741 		ret = -1;
742 		goto unlock_exit;
743 	}
744 
745 	vdpa_dev = vsocket->vdpa_dev;
746 	if (!vdpa_dev) {
747 		*features = vsocket->features;
748 		goto unlock_exit;
749 	}
750 
751 	if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) {
752 		VHOST_LOG_CONFIG(path, ERR, "failed to get vdpa features for socket file.\n");
753 		ret = -1;
754 		goto unlock_exit;
755 	}
756 
757 	*features = vsocket->features & vdpa_features;
758 
759 unlock_exit:
760 	pthread_mutex_unlock(&vhost_user.mutex);
761 	return ret;
762 }
763 
764 int
765 rte_vhost_driver_set_protocol_features(const char *path,
766 		uint64_t protocol_features)
767 {
768 	struct vhost_user_socket *vsocket;
769 
770 	pthread_mutex_lock(&vhost_user.mutex);
771 	vsocket = find_vhost_user_socket(path);
772 	if (vsocket)
773 		vsocket->protocol_features = protocol_features;
774 	pthread_mutex_unlock(&vhost_user.mutex);
775 	return vsocket ? 0 : -1;
776 }
777 
778 int
779 rte_vhost_driver_get_protocol_features(const char *path,
780 		uint64_t *protocol_features)
781 {
782 	struct vhost_user_socket *vsocket;
783 	uint64_t vdpa_protocol_features;
784 	struct rte_vdpa_device *vdpa_dev;
785 	int ret = 0;
786 
787 	pthread_mutex_lock(&vhost_user.mutex);
788 	vsocket = find_vhost_user_socket(path);
789 	if (!vsocket) {
790 		VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n");
791 		ret = -1;
792 		goto unlock_exit;
793 	}
794 
795 	vdpa_dev = vsocket->vdpa_dev;
796 	if (!vdpa_dev) {
797 		*protocol_features = vsocket->protocol_features;
798 		goto unlock_exit;
799 	}
800 
801 	if (vdpa_dev->ops->get_protocol_features(vdpa_dev,
802 				&vdpa_protocol_features) < 0) {
803 		VHOST_LOG_CONFIG(path, ERR, "failed to get vdpa protocol features.\n");
804 		ret = -1;
805 		goto unlock_exit;
806 	}
807 
808 	*protocol_features = vsocket->protocol_features
809 		& vdpa_protocol_features;
810 
811 unlock_exit:
812 	pthread_mutex_unlock(&vhost_user.mutex);
813 	return ret;
814 }
815 
816 int
817 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
818 {
819 	struct vhost_user_socket *vsocket;
820 	uint32_t vdpa_queue_num;
821 	struct rte_vdpa_device *vdpa_dev;
822 	int ret = 0;
823 
824 	pthread_mutex_lock(&vhost_user.mutex);
825 	vsocket = find_vhost_user_socket(path);
826 	if (!vsocket) {
827 		VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n");
828 		ret = -1;
829 		goto unlock_exit;
830 	}
831 
832 	vdpa_dev = vsocket->vdpa_dev;
833 	if (!vdpa_dev) {
834 		*queue_num = VHOST_MAX_QUEUE_PAIRS;
835 		goto unlock_exit;
836 	}
837 
838 	if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) {
839 		VHOST_LOG_CONFIG(path, ERR, "failed to get vdpa queue number.\n");
840 		ret = -1;
841 		goto unlock_exit;
842 	}
843 
844 	*queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num);
845 
846 unlock_exit:
847 	pthread_mutex_unlock(&vhost_user.mutex);
848 	return ret;
849 }
850 
851 static void
852 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
853 {
854 	if (vsocket && vsocket->path) {
855 		free(vsocket->path);
856 		vsocket->path = NULL;
857 	}
858 
859 	if (vsocket) {
860 		free(vsocket);
861 		vsocket = NULL;
862 	}
863 }
864 
865 /*
866  * Register a new vhost-user socket; here we could act as server
867  * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
868  * is set.
869  */
870 int
871 rte_vhost_driver_register(const char *path, uint64_t flags)
872 {
873 	int ret = -1;
874 	struct vhost_user_socket *vsocket;
875 
876 	if (!path)
877 		return -1;
878 
879 	pthread_mutex_lock(&vhost_user.mutex);
880 
881 	if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
882 		VHOST_LOG_CONFIG(path, ERR, "the number of vhost sockets reaches maximum\n");
883 		goto out;
884 	}
885 
886 	vsocket = malloc(sizeof(struct vhost_user_socket));
887 	if (!vsocket)
888 		goto out;
889 	memset(vsocket, 0, sizeof(struct vhost_user_socket));
890 	vsocket->path = strdup(path);
891 	if (vsocket->path == NULL) {
892 		VHOST_LOG_CONFIG(path, ERR, "failed to copy socket path string\n");
893 		vhost_user_socket_mem_free(vsocket);
894 		goto out;
895 	}
896 	TAILQ_INIT(&vsocket->conn_list);
897 	ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
898 	if (ret) {
899 		VHOST_LOG_CONFIG(path, ERR, "failed to init connection mutex\n");
900 		goto out_free;
901 	}
902 	vsocket->vdpa_dev = NULL;
903 	vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT;
904 	vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT;
905 	vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
906 	vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
907 	vsocket->stats_enabled = flags & RTE_VHOST_USER_NET_STATS_ENABLE;
908 
909 	if (vsocket->async_copy &&
910 		(flags & (RTE_VHOST_USER_IOMMU_SUPPORT |
911 		RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
912 		VHOST_LOG_CONFIG(path, ERR, "async copy with IOMMU or post-copy not supported\n");
913 		goto out_mutex;
914 	}
915 
916 	/*
917 	 * Set the supported features correctly for the builtin vhost-user
918 	 * net driver.
919 	 *
920 	 * Applications know nothing about features the builtin virtio net
921 	 * driver (virtio_net.c) supports, thus it's not possible for them
922 	 * to invoke rte_vhost_driver_set_features(). To workaround it, here
923 	 * we set it unconditionally. If the application want to implement
924 	 * another vhost-user driver (say SCSI), it should call the
925 	 * rte_vhost_driver_set_features(), which will overwrite following
926 	 * two values.
927 	 */
928 	vsocket->use_builtin_virtio_net = true;
929 	vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
930 	vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
931 	vsocket->protocol_features  = VHOST_USER_PROTOCOL_FEATURES;
932 
933 	if (vsocket->async_copy) {
934 		vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL);
935 		vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL);
936 		VHOST_LOG_CONFIG(path, INFO, "logging feature is disabled in async copy mode\n");
937 	}
938 
939 	/*
940 	 * We'll not be able to receive a buffer from guest in linear mode
941 	 * without external buffer if it will not fit in a single mbuf, which is
942 	 * likely if segmentation offloading enabled.
943 	 */
944 	if (vsocket->linearbuf && !vsocket->extbuf) {
945 		uint64_t seg_offload_features =
946 				(1ULL << VIRTIO_NET_F_HOST_TSO4) |
947 				(1ULL << VIRTIO_NET_F_HOST_TSO6) |
948 				(1ULL << VIRTIO_NET_F_HOST_UFO);
949 
950 		VHOST_LOG_CONFIG(path, INFO, "Linear buffers requested without external buffers,\n");
951 		VHOST_LOG_CONFIG(path, INFO, "disabling host segmentation offloading support\n");
952 		vsocket->supported_features &= ~seg_offload_features;
953 		vsocket->features &= ~seg_offload_features;
954 	}
955 
956 	if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
957 		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
958 		vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
959 	}
960 
961 	if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
962 		vsocket->protocol_features &=
963 			~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
964 	} else {
965 #ifndef RTE_LIBRTE_VHOST_POSTCOPY
966 		VHOST_LOG_CONFIG(path, ERR, "Postcopy requested but not compiled\n");
967 		ret = -1;
968 		goto out_mutex;
969 #endif
970 	}
971 
972 	if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
973 		vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
974 		if (vsocket->reconnect && reconn_tid == 0) {
975 			if (vhost_user_reconnect_init() != 0)
976 				goto out_mutex;
977 		}
978 	} else {
979 		vsocket->is_server = true;
980 	}
981 	ret = create_unix_socket(vsocket);
982 	if (ret < 0) {
983 		goto out_mutex;
984 	}
985 
986 	vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
987 
988 	pthread_mutex_unlock(&vhost_user.mutex);
989 	return ret;
990 
991 out_mutex:
992 	if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
993 		VHOST_LOG_CONFIG(path, ERR, "failed to destroy connection mutex\n");
994 	}
995 out_free:
996 	vhost_user_socket_mem_free(vsocket);
997 out:
998 	pthread_mutex_unlock(&vhost_user.mutex);
999 
1000 	return ret;
1001 }
1002 
1003 static bool
1004 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
1005 {
1006 	int found = false;
1007 	struct vhost_user_reconnect *reconn, *next;
1008 
1009 	pthread_mutex_lock(&reconn_list.mutex);
1010 
1011 	for (reconn = TAILQ_FIRST(&reconn_list.head);
1012 	     reconn != NULL; reconn = next) {
1013 		next = TAILQ_NEXT(reconn, next);
1014 
1015 		if (reconn->vsocket == vsocket) {
1016 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
1017 			close(reconn->fd);
1018 			free(reconn);
1019 			found = true;
1020 			break;
1021 		}
1022 	}
1023 	pthread_mutex_unlock(&reconn_list.mutex);
1024 	return found;
1025 }
1026 
1027 /**
1028  * Unregister the specified vhost socket
1029  */
1030 int
1031 rte_vhost_driver_unregister(const char *path)
1032 {
1033 	int i;
1034 	int count;
1035 	struct vhost_user_connection *conn, *next;
1036 
1037 	if (path == NULL)
1038 		return -1;
1039 
1040 again:
1041 	pthread_mutex_lock(&vhost_user.mutex);
1042 
1043 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
1044 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
1045 		if (strcmp(vsocket->path, path))
1046 			continue;
1047 
1048 		if (vsocket->is_server) {
1049 			/*
1050 			 * If r/wcb is executing, release vhost_user's
1051 			 * mutex lock, and try again since the r/wcb
1052 			 * may use the mutex lock.
1053 			 */
1054 			if (fdset_try_del(&vhost_user.fdset, vsocket->socket_fd) == -1) {
1055 				pthread_mutex_unlock(&vhost_user.mutex);
1056 				goto again;
1057 			}
1058 		} else if (vsocket->reconnect) {
1059 			vhost_user_remove_reconnect(vsocket);
1060 		}
1061 
1062 		pthread_mutex_lock(&vsocket->conn_mutex);
1063 		for (conn = TAILQ_FIRST(&vsocket->conn_list);
1064 			 conn != NULL;
1065 			 conn = next) {
1066 			next = TAILQ_NEXT(conn, next);
1067 
1068 			/*
1069 			 * If r/wcb is executing, release vsocket's
1070 			 * conn_mutex and vhost_user's mutex locks, and
1071 			 * try again since the r/wcb may use the
1072 			 * conn_mutex and mutex locks.
1073 			 */
1074 			if (fdset_try_del(&vhost_user.fdset,
1075 					  conn->connfd) == -1) {
1076 				pthread_mutex_unlock(&vsocket->conn_mutex);
1077 				pthread_mutex_unlock(&vhost_user.mutex);
1078 				goto again;
1079 			}
1080 
1081 			VHOST_LOG_CONFIG(path, INFO, "free connfd %d\n", conn->connfd);
1082 			close(conn->connfd);
1083 			vhost_destroy_device(conn->vid);
1084 			TAILQ_REMOVE(&vsocket->conn_list, conn, next);
1085 			free(conn);
1086 		}
1087 		pthread_mutex_unlock(&vsocket->conn_mutex);
1088 
1089 		if (vsocket->is_server) {
1090 			close(vsocket->socket_fd);
1091 			unlink(path);
1092 		}
1093 
1094 		pthread_mutex_destroy(&vsocket->conn_mutex);
1095 		vhost_user_socket_mem_free(vsocket);
1096 
1097 		count = --vhost_user.vsocket_cnt;
1098 		vhost_user.vsockets[i] = vhost_user.vsockets[count];
1099 		vhost_user.vsockets[count] = NULL;
1100 		pthread_mutex_unlock(&vhost_user.mutex);
1101 		return 0;
1102 	}
1103 	pthread_mutex_unlock(&vhost_user.mutex);
1104 
1105 	return -1;
1106 }
1107 
1108 /*
1109  * Register ops so that we can add/remove device to data core.
1110  */
1111 int
1112 rte_vhost_driver_callback_register(const char *path,
1113 	struct rte_vhost_device_ops const * const ops)
1114 {
1115 	struct vhost_user_socket *vsocket;
1116 
1117 	pthread_mutex_lock(&vhost_user.mutex);
1118 	vsocket = find_vhost_user_socket(path);
1119 	if (vsocket)
1120 		vsocket->notify_ops = ops;
1121 	pthread_mutex_unlock(&vhost_user.mutex);
1122 
1123 	return vsocket ? 0 : -1;
1124 }
1125 
1126 struct rte_vhost_device_ops const *
1127 vhost_driver_callback_get(const char *path)
1128 {
1129 	struct vhost_user_socket *vsocket;
1130 
1131 	pthread_mutex_lock(&vhost_user.mutex);
1132 	vsocket = find_vhost_user_socket(path);
1133 	pthread_mutex_unlock(&vhost_user.mutex);
1134 
1135 	return vsocket ? vsocket->notify_ops : NULL;
1136 }
1137 
1138 int
1139 rte_vhost_driver_start(const char *path)
1140 {
1141 	struct vhost_user_socket *vsocket;
1142 	static pthread_t fdset_tid;
1143 
1144 	pthread_mutex_lock(&vhost_user.mutex);
1145 	vsocket = find_vhost_user_socket(path);
1146 	pthread_mutex_unlock(&vhost_user.mutex);
1147 
1148 	if (!vsocket)
1149 		return -1;
1150 
1151 	if (fdset_tid == 0) {
1152 		/**
1153 		 * create a pipe which will be waited by poll and notified to
1154 		 * rebuild the wait list of poll.
1155 		 */
1156 		if (fdset_pipe_init(&vhost_user.fdset) < 0) {
1157 			VHOST_LOG_CONFIG(path, ERR, "failed to create pipe for vhost fdset\n");
1158 			return -1;
1159 		}
1160 
1161 		int ret = rte_ctrl_thread_create(&fdset_tid,
1162 			"vhost-events", NULL, fdset_event_dispatch,
1163 			&vhost_user.fdset);
1164 		if (ret != 0) {
1165 			VHOST_LOG_CONFIG(path, ERR, "failed to create fdset handling thread\n");
1166 			fdset_pipe_uninit(&vhost_user.fdset);
1167 			return -1;
1168 		}
1169 	}
1170 
1171 	if (vsocket->is_server)
1172 		return vhost_user_start_server(vsocket);
1173 	else
1174 		return vhost_user_start_client(vsocket);
1175 }
1176