xref: /dpdk/lib/vhost/socket.c (revision 5d52418fa4b9a7f28eaedc1d88ec5cf330381c0e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <string.h>
11 #include <sys/socket.h>
12 #include <sys/un.h>
13 #include <sys/queue.h>
14 #include <errno.h>
15 #include <fcntl.h>
16 
17 #include <rte_thread.h>
18 #include <rte_log.h>
19 
20 #include "fd_man.h"
21 #include "vduse.h"
22 #include "vhost.h"
23 #include "vhost_user.h"
24 
25 
26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
27 
28 /*
29  * Every time rte_vhost_driver_register() is invoked, an associated
30  * vhost_user_socket struct will be created.
31  */
32 struct vhost_user_socket {
33 	struct vhost_user_connection_list conn_list;
34 	pthread_mutex_t conn_mutex;
35 	char *path;
36 	int socket_fd;
37 	struct sockaddr_un un;
38 	bool is_server;
39 	bool is_vduse;
40 	bool reconnect;
41 	bool iommu_support;
42 	bool use_builtin_virtio_net;
43 	bool extbuf;
44 	bool linearbuf;
45 	bool async_copy;
46 	bool net_compliant_ol_flags;
47 	bool stats_enabled;
48 
49 	/*
50 	 * The "supported_features" indicates the feature bits the
51 	 * vhost driver supports. The "features" indicates the feature
52 	 * bits after the rte_vhost_driver_features_disable/enable().
53 	 * It is also the final feature bits used for vhost-user
54 	 * features negotiation.
55 	 */
56 	uint64_t supported_features;
57 	uint64_t features;
58 
59 	uint64_t protocol_features;
60 
61 	uint32_t max_queue_pairs;
62 
63 	struct rte_vdpa_device *vdpa_dev;
64 
65 	struct rte_vhost_device_ops const *notify_ops;
66 };
67 
68 struct vhost_user_connection {
69 	struct vhost_user_socket *vsocket;
70 	int connfd;
71 	int vid;
72 
73 	TAILQ_ENTRY(vhost_user_connection) next;
74 };
75 
76 #define MAX_VHOST_SOCKET 1024
77 struct vhost_user {
78 	struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
79 	struct fdset fdset;
80 	int vsocket_cnt;
81 	pthread_mutex_t mutex;
82 };
83 
84 #define MAX_VIRTIO_BACKLOG 128
85 
86 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
87 static void vhost_user_read_cb(int fd, void *dat, int *remove);
88 static int create_unix_socket(struct vhost_user_socket *vsocket);
89 static int vhost_user_start_client(struct vhost_user_socket *vsocket);
90 
91 static struct vhost_user vhost_user = {
92 	.fdset = {
93 		.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
94 		.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
95 		.fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER,
96 		.num = 0
97 	},
98 	.vsocket_cnt = 0,
99 	.mutex = PTHREAD_MUTEX_INITIALIZER,
100 };
101 
102 /*
103  * return bytes# of read on success or negative val on failure. Update fdnum
104  * with number of fds read.
105  */
106 int
107 read_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int max_fds,
108 		int *fd_num)
109 {
110 	struct iovec iov;
111 	struct msghdr msgh;
112 	char control[CMSG_SPACE(max_fds * sizeof(int))];
113 	struct cmsghdr *cmsg;
114 	int got_fds = 0;
115 	int ret;
116 
117 	*fd_num = 0;
118 
119 	memset(&msgh, 0, sizeof(msgh));
120 	iov.iov_base = buf;
121 	iov.iov_len  = buflen;
122 
123 	msgh.msg_iov = &iov;
124 	msgh.msg_iovlen = 1;
125 	msgh.msg_control = control;
126 	msgh.msg_controllen = sizeof(control);
127 
128 	ret = recvmsg(sockfd, &msgh, 0);
129 	if (ret <= 0) {
130 		if (ret)
131 			VHOST_LOG_CONFIG(ifname, ERR, "recvmsg failed on fd %d (%s)\n",
132 				sockfd, strerror(errno));
133 		return ret;
134 	}
135 
136 	if (msgh.msg_flags & MSG_TRUNC)
137 		VHOST_LOG_CONFIG(ifname, ERR, "truncated msg (fd %d)\n", sockfd);
138 
139 	/* MSG_CTRUNC may be caused by LSM misconfiguration */
140 	if (msgh.msg_flags & MSG_CTRUNC)
141 		VHOST_LOG_CONFIG(ifname, ERR, "truncated control data (fd %d)\n", sockfd);
142 
143 	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
144 		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
145 		if ((cmsg->cmsg_level == SOL_SOCKET) &&
146 			(cmsg->cmsg_type == SCM_RIGHTS)) {
147 			got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
148 			*fd_num = got_fds;
149 			memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
150 			break;
151 		}
152 	}
153 
154 	/* Clear out unused file descriptors */
155 	while (got_fds < max_fds)
156 		fds[got_fds++] = -1;
157 
158 	return ret;
159 }
160 
161 int
162 send_fd_message(char *ifname, int sockfd, char *buf, int buflen, int *fds, int fd_num)
163 {
164 
165 	struct iovec iov;
166 	struct msghdr msgh;
167 	size_t fdsize = fd_num * sizeof(int);
168 	char control[CMSG_SPACE(fdsize)];
169 	struct cmsghdr *cmsg;
170 	int ret;
171 
172 	memset(&msgh, 0, sizeof(msgh));
173 	iov.iov_base = buf;
174 	iov.iov_len = buflen;
175 
176 	msgh.msg_iov = &iov;
177 	msgh.msg_iovlen = 1;
178 
179 	if (fds && fd_num > 0) {
180 		msgh.msg_control = control;
181 		msgh.msg_controllen = sizeof(control);
182 		cmsg = CMSG_FIRSTHDR(&msgh);
183 		if (cmsg == NULL) {
184 			VHOST_LOG_CONFIG(ifname, ERR, "cmsg == NULL\n");
185 			errno = EINVAL;
186 			return -1;
187 		}
188 		cmsg->cmsg_len = CMSG_LEN(fdsize);
189 		cmsg->cmsg_level = SOL_SOCKET;
190 		cmsg->cmsg_type = SCM_RIGHTS;
191 		memcpy(CMSG_DATA(cmsg), fds, fdsize);
192 	} else {
193 		msgh.msg_control = NULL;
194 		msgh.msg_controllen = 0;
195 	}
196 
197 	do {
198 		ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
199 	} while (ret < 0 && errno == EINTR);
200 
201 	if (ret < 0) {
202 		VHOST_LOG_CONFIG(ifname, ERR, "sendmsg error on fd %d (%s)\n",
203 			sockfd, strerror(errno));
204 		return ret;
205 	}
206 
207 	return ret;
208 }
209 
210 static void
211 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
212 {
213 	int vid;
214 	size_t size;
215 	struct vhost_user_connection *conn;
216 	int ret;
217 	struct virtio_net *dev;
218 
219 	if (vsocket == NULL)
220 		return;
221 
222 	conn = malloc(sizeof(*conn));
223 	if (conn == NULL) {
224 		close(fd);
225 		return;
226 	}
227 
228 	vid = vhost_user_new_device();
229 	if (vid == -1) {
230 		goto err;
231 	}
232 
233 	size = strnlen(vsocket->path, PATH_MAX);
234 	vhost_set_ifname(vid, vsocket->path, size);
235 
236 	vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net,
237 		vsocket->net_compliant_ol_flags, vsocket->stats_enabled,
238 		vsocket->iommu_support);
239 
240 	vhost_attach_vdpa_device(vid, vsocket->vdpa_dev);
241 
242 	if (vsocket->extbuf)
243 		vhost_enable_extbuf(vid);
244 
245 	if (vsocket->linearbuf)
246 		vhost_enable_linearbuf(vid);
247 
248 	if (vsocket->async_copy) {
249 		dev = get_device(vid);
250 
251 		if (dev)
252 			dev->async_copy = 1;
253 	}
254 
255 	VHOST_LOG_CONFIG(vsocket->path, INFO, "new device, handle is %d\n", vid);
256 
257 	if (vsocket->notify_ops->new_connection) {
258 		ret = vsocket->notify_ops->new_connection(vid);
259 		if (ret < 0) {
260 			VHOST_LOG_CONFIG(vsocket->path, ERR,
261 				"failed to add vhost user connection with fd %d\n",
262 				fd);
263 			goto err_cleanup;
264 		}
265 	}
266 
267 	conn->connfd = fd;
268 	conn->vsocket = vsocket;
269 	conn->vid = vid;
270 	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
271 			NULL, conn);
272 	if (ret < 0) {
273 		VHOST_LOG_CONFIG(vsocket->path, ERR,
274 			"failed to add fd %d into vhost server fdset\n",
275 			fd);
276 
277 		if (vsocket->notify_ops->destroy_connection)
278 			vsocket->notify_ops->destroy_connection(conn->vid);
279 
280 		goto err_cleanup;
281 	}
282 
283 	pthread_mutex_lock(&vsocket->conn_mutex);
284 	TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
285 	pthread_mutex_unlock(&vsocket->conn_mutex);
286 
287 	fdset_pipe_notify(&vhost_user.fdset);
288 	return;
289 
290 err_cleanup:
291 	vhost_destroy_device(vid);
292 err:
293 	free(conn);
294 	close(fd);
295 }
296 
297 /* call back when there is new vhost-user connection from client  */
298 static void
299 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
300 {
301 	struct vhost_user_socket *vsocket = dat;
302 
303 	fd = accept(fd, NULL, NULL);
304 	if (fd < 0)
305 		return;
306 
307 	VHOST_LOG_CONFIG(vsocket->path, INFO, "new vhost user connection is %d\n", fd);
308 	vhost_user_add_connection(fd, vsocket);
309 }
310 
311 static void
312 vhost_user_read_cb(int connfd, void *dat, int *remove)
313 {
314 	struct vhost_user_connection *conn = dat;
315 	struct vhost_user_socket *vsocket = conn->vsocket;
316 	int ret;
317 
318 	ret = vhost_user_msg_handler(conn->vid, connfd);
319 	if (ret < 0) {
320 		struct virtio_net *dev = get_device(conn->vid);
321 
322 		close(connfd);
323 		*remove = 1;
324 
325 		if (dev)
326 			vhost_destroy_device_notify(dev);
327 
328 		if (vsocket->notify_ops->destroy_connection)
329 			vsocket->notify_ops->destroy_connection(conn->vid);
330 
331 		vhost_destroy_device(conn->vid);
332 
333 		if (vsocket->reconnect) {
334 			create_unix_socket(vsocket);
335 			vhost_user_start_client(vsocket);
336 		}
337 
338 		pthread_mutex_lock(&vsocket->conn_mutex);
339 		TAILQ_REMOVE(&vsocket->conn_list, conn, next);
340 		pthread_mutex_unlock(&vsocket->conn_mutex);
341 
342 		free(conn);
343 	}
344 }
345 
346 static int
347 create_unix_socket(struct vhost_user_socket *vsocket)
348 {
349 	int fd;
350 	struct sockaddr_un *un = &vsocket->un;
351 
352 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
353 	if (fd < 0)
354 		return -1;
355 	VHOST_LOG_CONFIG(vsocket->path, INFO, "vhost-user %s: socket created, fd: %d\n",
356 		vsocket->is_server ? "server" : "client", fd);
357 
358 	if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
359 		VHOST_LOG_CONFIG(vsocket->path, ERR,
360 			"vhost-user: can't set nonblocking mode for socket, fd: %d (%s)\n",
361 			fd, strerror(errno));
362 		close(fd);
363 		return -1;
364 	}
365 
366 	memset(un, 0, sizeof(*un));
367 	un->sun_family = AF_UNIX;
368 	strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
369 	un->sun_path[sizeof(un->sun_path) - 1] = '\0';
370 
371 	vsocket->socket_fd = fd;
372 	return 0;
373 }
374 
375 static int
376 vhost_user_start_server(struct vhost_user_socket *vsocket)
377 {
378 	int ret;
379 	int fd = vsocket->socket_fd;
380 	const char *path = vsocket->path;
381 
382 	/*
383 	 * bind () may fail if the socket file with the same name already
384 	 * exists. But the library obviously should not delete the file
385 	 * provided by the user, since we can not be sure that it is not
386 	 * being used by other applications. Moreover, many applications form
387 	 * socket names based on user input, which is prone to errors.
388 	 *
389 	 * The user must ensure that the socket does not exist before
390 	 * registering the vhost driver in server mode.
391 	 */
392 	ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
393 	if (ret < 0) {
394 		VHOST_LOG_CONFIG(path, ERR, "failed to bind: %s; remove it and try again\n",
395 			strerror(errno));
396 		goto err;
397 	}
398 	VHOST_LOG_CONFIG(path, INFO, "binding succeeded\n");
399 
400 	ret = listen(fd, MAX_VIRTIO_BACKLOG);
401 	if (ret < 0)
402 		goto err;
403 
404 	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
405 		  NULL, vsocket);
406 	if (ret < 0) {
407 		VHOST_LOG_CONFIG(path, ERR, "failed to add listen fd %d to vhost server fdset\n",
408 			fd);
409 		goto err;
410 	}
411 
412 	return 0;
413 
414 err:
415 	close(fd);
416 	return -1;
417 }
418 
419 struct vhost_user_reconnect {
420 	struct sockaddr_un un;
421 	int fd;
422 	struct vhost_user_socket *vsocket;
423 
424 	TAILQ_ENTRY(vhost_user_reconnect) next;
425 };
426 
427 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
428 struct vhost_user_reconnect_list {
429 	struct vhost_user_reconnect_tailq_list head;
430 	pthread_mutex_t mutex;
431 };
432 
433 static struct vhost_user_reconnect_list reconn_list;
434 static rte_thread_t reconn_tid;
435 
436 static int
437 vhost_user_connect_nonblock(char *path, int fd, struct sockaddr *un, size_t sz)
438 {
439 	int ret, flags;
440 
441 	ret = connect(fd, un, sz);
442 	if (ret < 0 && errno != EISCONN)
443 		return -1;
444 
445 	flags = fcntl(fd, F_GETFL, 0);
446 	if (flags < 0) {
447 		VHOST_LOG_CONFIG(path, ERR, "can't get flags for connfd %d (%s)\n",
448 			fd, strerror(errno));
449 		return -2;
450 	}
451 	if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
452 		VHOST_LOG_CONFIG(path, ERR, "can't disable nonblocking on fd %d\n", fd);
453 		return -2;
454 	}
455 	return 0;
456 }
457 
458 static uint32_t
459 vhost_user_client_reconnect(void *arg __rte_unused)
460 {
461 	int ret;
462 	struct vhost_user_reconnect *reconn, *next;
463 
464 	while (1) {
465 		pthread_mutex_lock(&reconn_list.mutex);
466 
467 		/*
468 		 * An equal implementation of TAILQ_FOREACH_SAFE,
469 		 * which does not exist on all platforms.
470 		 */
471 		for (reconn = TAILQ_FIRST(&reconn_list.head);
472 		     reconn != NULL; reconn = next) {
473 			next = TAILQ_NEXT(reconn, next);
474 
475 			ret = vhost_user_connect_nonblock(reconn->vsocket->path, reconn->fd,
476 						(struct sockaddr *)&reconn->un,
477 						sizeof(reconn->un));
478 			if (ret == -2) {
479 				close(reconn->fd);
480 				VHOST_LOG_CONFIG(reconn->vsocket->path, ERR,
481 					"reconnection for fd %d failed\n",
482 					reconn->fd);
483 				goto remove_fd;
484 			}
485 			if (ret == -1)
486 				continue;
487 
488 			VHOST_LOG_CONFIG(reconn->vsocket->path, INFO, "connected\n");
489 			vhost_user_add_connection(reconn->fd, reconn->vsocket);
490 remove_fd:
491 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
492 			free(reconn);
493 		}
494 
495 		pthread_mutex_unlock(&reconn_list.mutex);
496 		sleep(1);
497 	}
498 
499 	return 0;
500 }
501 
502 static int
503 vhost_user_reconnect_init(void)
504 {
505 	int ret;
506 
507 	ret = pthread_mutex_init(&reconn_list.mutex, NULL);
508 	if (ret < 0) {
509 		VHOST_LOG_CONFIG("thread", ERR, "%s: failed to initialize mutex\n", __func__);
510 		return ret;
511 	}
512 	TAILQ_INIT(&reconn_list.head);
513 
514 	ret = rte_thread_create_internal_control(&reconn_tid, "vhost-reco",
515 			vhost_user_client_reconnect, NULL);
516 	if (ret != 0) {
517 		VHOST_LOG_CONFIG("thread", ERR, "failed to create reconnect thread\n");
518 		if (pthread_mutex_destroy(&reconn_list.mutex))
519 			VHOST_LOG_CONFIG("thread", ERR,
520 				"%s: failed to destroy reconnect mutex\n",
521 				__func__);
522 	}
523 
524 	return ret;
525 }
526 
527 static int
528 vhost_user_start_client(struct vhost_user_socket *vsocket)
529 {
530 	int ret;
531 	int fd = vsocket->socket_fd;
532 	const char *path = vsocket->path;
533 	struct vhost_user_reconnect *reconn;
534 
535 	ret = vhost_user_connect_nonblock(vsocket->path, fd, (struct sockaddr *)&vsocket->un,
536 					  sizeof(vsocket->un));
537 	if (ret == 0) {
538 		vhost_user_add_connection(fd, vsocket);
539 		return 0;
540 	}
541 
542 	VHOST_LOG_CONFIG(path, WARNING, "failed to connect: %s\n", strerror(errno));
543 
544 	if (ret == -2 || !vsocket->reconnect) {
545 		close(fd);
546 		return -1;
547 	}
548 
549 	VHOST_LOG_CONFIG(path, INFO, "reconnecting...\n");
550 	reconn = malloc(sizeof(*reconn));
551 	if (reconn == NULL) {
552 		VHOST_LOG_CONFIG(path, ERR, "failed to allocate memory for reconnect\n");
553 		close(fd);
554 		return -1;
555 	}
556 	reconn->un = vsocket->un;
557 	reconn->fd = fd;
558 	reconn->vsocket = vsocket;
559 	pthread_mutex_lock(&reconn_list.mutex);
560 	TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
561 	pthread_mutex_unlock(&reconn_list.mutex);
562 
563 	return 0;
564 }
565 
566 static struct vhost_user_socket *
567 find_vhost_user_socket(const char *path)
568 {
569 	int i;
570 
571 	if (path == NULL)
572 		return NULL;
573 
574 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
575 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
576 
577 		if (!strcmp(vsocket->path, path))
578 			return vsocket;
579 	}
580 
581 	return NULL;
582 }
583 
584 int
585 rte_vhost_driver_attach_vdpa_device(const char *path,
586 		struct rte_vdpa_device *dev)
587 {
588 	struct vhost_user_socket *vsocket;
589 
590 	if (dev == NULL || path == NULL)
591 		return -1;
592 
593 	pthread_mutex_lock(&vhost_user.mutex);
594 	vsocket = find_vhost_user_socket(path);
595 	if (vsocket)
596 		vsocket->vdpa_dev = dev;
597 	pthread_mutex_unlock(&vhost_user.mutex);
598 
599 	return vsocket ? 0 : -1;
600 }
601 
602 int
603 rte_vhost_driver_detach_vdpa_device(const char *path)
604 {
605 	struct vhost_user_socket *vsocket;
606 
607 	pthread_mutex_lock(&vhost_user.mutex);
608 	vsocket = find_vhost_user_socket(path);
609 	if (vsocket)
610 		vsocket->vdpa_dev = NULL;
611 	pthread_mutex_unlock(&vhost_user.mutex);
612 
613 	return vsocket ? 0 : -1;
614 }
615 
616 struct rte_vdpa_device *
617 rte_vhost_driver_get_vdpa_device(const char *path)
618 {
619 	struct vhost_user_socket *vsocket;
620 	struct rte_vdpa_device *dev = NULL;
621 
622 	pthread_mutex_lock(&vhost_user.mutex);
623 	vsocket = find_vhost_user_socket(path);
624 	if (vsocket)
625 		dev = vsocket->vdpa_dev;
626 	pthread_mutex_unlock(&vhost_user.mutex);
627 
628 	return dev;
629 }
630 
631 int
632 rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
633 {
634 	struct vhost_user_socket *vsocket;
635 	struct rte_vdpa_device *vdpa_dev;
636 	int ret = 0;
637 
638 	pthread_mutex_lock(&vhost_user.mutex);
639 	vsocket = find_vhost_user_socket(path);
640 	if (!vsocket) {
641 		VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n");
642 		ret = -1;
643 		goto unlock_exit;
644 	}
645 
646 	vdpa_dev = vsocket->vdpa_dev;
647 	if (!vdpa_dev) {
648 		ret = -1;
649 		goto unlock_exit;
650 	}
651 
652 	*type = vdpa_dev->type;
653 
654 unlock_exit:
655 	pthread_mutex_unlock(&vhost_user.mutex);
656 	return ret;
657 }
658 
659 int
660 rte_vhost_driver_disable_features(const char *path, uint64_t features)
661 {
662 	struct vhost_user_socket *vsocket;
663 
664 	pthread_mutex_lock(&vhost_user.mutex);
665 	vsocket = find_vhost_user_socket(path);
666 
667 	/* Note that use_builtin_virtio_net is not affected by this function
668 	 * since callers may want to selectively disable features of the
669 	 * built-in vhost net device backend.
670 	 */
671 
672 	if (vsocket)
673 		vsocket->features &= ~features;
674 	pthread_mutex_unlock(&vhost_user.mutex);
675 
676 	return vsocket ? 0 : -1;
677 }
678 
679 int
680 rte_vhost_driver_enable_features(const char *path, uint64_t features)
681 {
682 	struct vhost_user_socket *vsocket;
683 
684 	pthread_mutex_lock(&vhost_user.mutex);
685 	vsocket = find_vhost_user_socket(path);
686 	if (vsocket) {
687 		if ((vsocket->supported_features & features) != features) {
688 			/*
689 			 * trying to enable features the driver doesn't
690 			 * support.
691 			 */
692 			pthread_mutex_unlock(&vhost_user.mutex);
693 			return -1;
694 		}
695 		vsocket->features |= features;
696 	}
697 	pthread_mutex_unlock(&vhost_user.mutex);
698 
699 	return vsocket ? 0 : -1;
700 }
701 
702 int
703 rte_vhost_driver_set_features(const char *path, uint64_t features)
704 {
705 	struct vhost_user_socket *vsocket;
706 
707 	pthread_mutex_lock(&vhost_user.mutex);
708 	vsocket = find_vhost_user_socket(path);
709 	if (vsocket) {
710 		vsocket->supported_features = features;
711 		vsocket->features = features;
712 
713 		/* Anyone setting feature bits is implementing their own vhost
714 		 * device backend.
715 		 */
716 		vsocket->use_builtin_virtio_net = false;
717 	}
718 	pthread_mutex_unlock(&vhost_user.mutex);
719 
720 	return vsocket ? 0 : -1;
721 }
722 
723 int
724 rte_vhost_driver_get_features(const char *path, uint64_t *features)
725 {
726 	struct vhost_user_socket *vsocket;
727 	uint64_t vdpa_features;
728 	struct rte_vdpa_device *vdpa_dev;
729 	int ret = 0;
730 
731 	pthread_mutex_lock(&vhost_user.mutex);
732 	vsocket = find_vhost_user_socket(path);
733 	if (!vsocket) {
734 		VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n");
735 		ret = -1;
736 		goto unlock_exit;
737 	}
738 
739 	vdpa_dev = vsocket->vdpa_dev;
740 	if (!vdpa_dev) {
741 		*features = vsocket->features;
742 		goto unlock_exit;
743 	}
744 
745 	if (vdpa_dev->ops->get_features(vdpa_dev, &vdpa_features) < 0) {
746 		VHOST_LOG_CONFIG(path, ERR, "failed to get vdpa features for socket file.\n");
747 		ret = -1;
748 		goto unlock_exit;
749 	}
750 
751 	*features = vsocket->features & vdpa_features;
752 
753 unlock_exit:
754 	pthread_mutex_unlock(&vhost_user.mutex);
755 	return ret;
756 }
757 
758 int
759 rte_vhost_driver_set_protocol_features(const char *path,
760 		uint64_t protocol_features)
761 {
762 	struct vhost_user_socket *vsocket;
763 
764 	pthread_mutex_lock(&vhost_user.mutex);
765 	vsocket = find_vhost_user_socket(path);
766 	if (vsocket)
767 		vsocket->protocol_features = protocol_features;
768 	pthread_mutex_unlock(&vhost_user.mutex);
769 	return vsocket ? 0 : -1;
770 }
771 
772 int
773 rte_vhost_driver_get_protocol_features(const char *path,
774 		uint64_t *protocol_features)
775 {
776 	struct vhost_user_socket *vsocket;
777 	uint64_t vdpa_protocol_features;
778 	struct rte_vdpa_device *vdpa_dev;
779 	int ret = 0;
780 
781 	pthread_mutex_lock(&vhost_user.mutex);
782 	vsocket = find_vhost_user_socket(path);
783 	if (!vsocket) {
784 		VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n");
785 		ret = -1;
786 		goto unlock_exit;
787 	}
788 
789 	vdpa_dev = vsocket->vdpa_dev;
790 	if (!vdpa_dev) {
791 		*protocol_features = vsocket->protocol_features;
792 		goto unlock_exit;
793 	}
794 
795 	if (vdpa_dev->ops->get_protocol_features(vdpa_dev,
796 				&vdpa_protocol_features) < 0) {
797 		VHOST_LOG_CONFIG(path, ERR, "failed to get vdpa protocol features.\n");
798 		ret = -1;
799 		goto unlock_exit;
800 	}
801 
802 	*protocol_features = vsocket->protocol_features
803 		& vdpa_protocol_features;
804 
805 unlock_exit:
806 	pthread_mutex_unlock(&vhost_user.mutex);
807 	return ret;
808 }
809 
810 int
811 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
812 {
813 	struct vhost_user_socket *vsocket;
814 	uint32_t vdpa_queue_num;
815 	struct rte_vdpa_device *vdpa_dev;
816 	int ret = 0;
817 
818 	pthread_mutex_lock(&vhost_user.mutex);
819 	vsocket = find_vhost_user_socket(path);
820 	if (!vsocket) {
821 		VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n");
822 		ret = -1;
823 		goto unlock_exit;
824 	}
825 
826 	vdpa_dev = vsocket->vdpa_dev;
827 	if (!vdpa_dev) {
828 		*queue_num = vsocket->max_queue_pairs;
829 		goto unlock_exit;
830 	}
831 
832 	if (vdpa_dev->ops->get_queue_num(vdpa_dev, &vdpa_queue_num) < 0) {
833 		VHOST_LOG_CONFIG(path, ERR, "failed to get vdpa queue number.\n");
834 		ret = -1;
835 		goto unlock_exit;
836 	}
837 
838 	*queue_num = RTE_MIN(vsocket->max_queue_pairs, vdpa_queue_num);
839 
840 unlock_exit:
841 	pthread_mutex_unlock(&vhost_user.mutex);
842 	return ret;
843 }
844 
845 int
846 rte_vhost_driver_set_max_queue_num(const char *path, uint32_t max_queue_pairs)
847 {
848 	struct vhost_user_socket *vsocket;
849 	int ret = 0;
850 
851 	VHOST_LOG_CONFIG(path, INFO, "Setting max queue pairs to %u\n", max_queue_pairs);
852 
853 	if (max_queue_pairs > VHOST_MAX_QUEUE_PAIRS) {
854 		VHOST_LOG_CONFIG(path, ERR, "Library only supports up to %u queue pairs\n",
855 				VHOST_MAX_QUEUE_PAIRS);
856 		return -1;
857 	}
858 
859 	pthread_mutex_lock(&vhost_user.mutex);
860 	vsocket = find_vhost_user_socket(path);
861 	if (!vsocket) {
862 		VHOST_LOG_CONFIG(path, ERR, "socket file is not registered yet.\n");
863 		ret = -1;
864 		goto unlock_exit;
865 	}
866 
867 	vsocket->max_queue_pairs = max_queue_pairs;
868 
869 unlock_exit:
870 	pthread_mutex_unlock(&vhost_user.mutex);
871 	return ret;
872 }
873 
874 static void
875 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
876 {
877 	if (vsocket == NULL)
878 		return;
879 
880 	free(vsocket->path);
881 	free(vsocket);
882 }
883 
884 /*
885  * Register a new vhost-user socket; here we could act as server
886  * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
887  * is set.
888  */
889 int
890 rte_vhost_driver_register(const char *path, uint64_t flags)
891 {
892 	int ret = -1;
893 	struct vhost_user_socket *vsocket;
894 
895 	if (!path)
896 		return -1;
897 
898 	pthread_mutex_lock(&vhost_user.mutex);
899 
900 	if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
901 		VHOST_LOG_CONFIG(path, ERR, "the number of vhost sockets reaches maximum\n");
902 		goto out;
903 	}
904 
905 	vsocket = malloc(sizeof(struct vhost_user_socket));
906 	if (!vsocket)
907 		goto out;
908 	memset(vsocket, 0, sizeof(struct vhost_user_socket));
909 	vsocket->path = strdup(path);
910 	if (vsocket->path == NULL) {
911 		VHOST_LOG_CONFIG(path, ERR, "failed to copy socket path string\n");
912 		vhost_user_socket_mem_free(vsocket);
913 		goto out;
914 	}
915 	TAILQ_INIT(&vsocket->conn_list);
916 	ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
917 	if (ret) {
918 		VHOST_LOG_CONFIG(path, ERR, "failed to init connection mutex\n");
919 		goto out_free;
920 	}
921 
922 	if (!strncmp("/dev/vduse/", path, strlen("/dev/vduse/")))
923 		vsocket->is_vduse = true;
924 
925 	vsocket->vdpa_dev = NULL;
926 	vsocket->max_queue_pairs = VHOST_MAX_QUEUE_PAIRS;
927 	vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT;
928 	vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT;
929 	vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
930 	vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
931 	vsocket->stats_enabled = flags & RTE_VHOST_USER_NET_STATS_ENABLE;
932 	if (vsocket->is_vduse)
933 		vsocket->iommu_support = true;
934 	else
935 		vsocket->iommu_support = flags & RTE_VHOST_USER_IOMMU_SUPPORT;
936 
937 	if (vsocket->async_copy && (vsocket->iommu_support ||
938 				(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
939 		VHOST_LOG_CONFIG(path, ERR, "async copy with IOMMU or post-copy not supported\n");
940 		goto out_mutex;
941 	}
942 
943 	/*
944 	 * Set the supported features correctly for the builtin vhost-user
945 	 * net driver.
946 	 *
947 	 * Applications know nothing about features the builtin virtio net
948 	 * driver (virtio_net.c) supports, thus it's not possible for them
949 	 * to invoke rte_vhost_driver_set_features(). To workaround it, here
950 	 * we set it unconditionally. If the application want to implement
951 	 * another vhost-user driver (say SCSI), it should call the
952 	 * rte_vhost_driver_set_features(), which will overwrite following
953 	 * two values.
954 	 */
955 	vsocket->use_builtin_virtio_net = true;
956 	if (vsocket->is_vduse) {
957 		vsocket->supported_features = VDUSE_NET_SUPPORTED_FEATURES;
958 		vsocket->features           = VDUSE_NET_SUPPORTED_FEATURES;
959 	} else {
960 		vsocket->supported_features = VHOST_USER_NET_SUPPORTED_FEATURES;
961 		vsocket->features           = VHOST_USER_NET_SUPPORTED_FEATURES;
962 		vsocket->protocol_features  = VHOST_USER_PROTOCOL_FEATURES;
963 	}
964 
965 	if (vsocket->async_copy) {
966 		vsocket->supported_features &= ~(1ULL << VHOST_F_LOG_ALL);
967 		vsocket->features &= ~(1ULL << VHOST_F_LOG_ALL);
968 		VHOST_LOG_CONFIG(path, INFO, "logging feature is disabled in async copy mode\n");
969 	}
970 
971 	/*
972 	 * We'll not be able to receive a buffer from guest in linear mode
973 	 * without external buffer if it will not fit in a single mbuf, which is
974 	 * likely if segmentation offloading enabled.
975 	 */
976 	if (vsocket->linearbuf && !vsocket->extbuf) {
977 		uint64_t seg_offload_features =
978 				(1ULL << VIRTIO_NET_F_HOST_TSO4) |
979 				(1ULL << VIRTIO_NET_F_HOST_TSO6) |
980 				(1ULL << VIRTIO_NET_F_HOST_UFO);
981 
982 		VHOST_LOG_CONFIG(path, INFO, "Linear buffers requested without external buffers,\n");
983 		VHOST_LOG_CONFIG(path, INFO, "disabling host segmentation offloading support\n");
984 		vsocket->supported_features &= ~seg_offload_features;
985 		vsocket->features &= ~seg_offload_features;
986 	}
987 
988 	if (!vsocket->iommu_support) {
989 		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
990 		vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
991 	}
992 
993 	if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
994 		vsocket->protocol_features &=
995 			~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
996 	} else {
997 #ifndef RTE_LIBRTE_VHOST_POSTCOPY
998 		VHOST_LOG_CONFIG(path, ERR, "Postcopy requested but not compiled\n");
999 		ret = -1;
1000 		goto out_mutex;
1001 #endif
1002 	}
1003 
1004 	if (!vsocket->is_vduse) {
1005 		if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
1006 			vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
1007 			if (vsocket->reconnect && reconn_tid.opaque_id == 0) {
1008 				if (vhost_user_reconnect_init() != 0)
1009 					goto out_mutex;
1010 			}
1011 		} else {
1012 			vsocket->is_server = true;
1013 		}
1014 		ret = create_unix_socket(vsocket);
1015 		if (ret < 0)
1016 			goto out_mutex;
1017 	}
1018 
1019 	vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
1020 
1021 	pthread_mutex_unlock(&vhost_user.mutex);
1022 	return ret;
1023 
1024 out_mutex:
1025 	if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
1026 		VHOST_LOG_CONFIG(path, ERR, "failed to destroy connection mutex\n");
1027 	}
1028 out_free:
1029 	vhost_user_socket_mem_free(vsocket);
1030 out:
1031 	pthread_mutex_unlock(&vhost_user.mutex);
1032 
1033 	return ret;
1034 }
1035 
1036 static bool
1037 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
1038 {
1039 	int found = false;
1040 	struct vhost_user_reconnect *reconn, *next;
1041 
1042 	pthread_mutex_lock(&reconn_list.mutex);
1043 
1044 	for (reconn = TAILQ_FIRST(&reconn_list.head);
1045 	     reconn != NULL; reconn = next) {
1046 		next = TAILQ_NEXT(reconn, next);
1047 
1048 		if (reconn->vsocket == vsocket) {
1049 			TAILQ_REMOVE(&reconn_list.head, reconn, next);
1050 			close(reconn->fd);
1051 			free(reconn);
1052 			found = true;
1053 			break;
1054 		}
1055 	}
1056 	pthread_mutex_unlock(&reconn_list.mutex);
1057 	return found;
1058 }
1059 
1060 /**
1061  * Unregister the specified vhost socket
1062  */
1063 int
1064 rte_vhost_driver_unregister(const char *path)
1065 {
1066 	int i;
1067 	int count;
1068 	struct vhost_user_connection *conn, *next;
1069 
1070 	if (path == NULL)
1071 		return -1;
1072 
1073 again:
1074 	pthread_mutex_lock(&vhost_user.mutex);
1075 
1076 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
1077 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
1078 		if (strcmp(vsocket->path, path))
1079 			continue;
1080 
1081 		if (vsocket->is_vduse) {
1082 			vduse_device_destroy(path);
1083 		} else if (vsocket->is_server) {
1084 			/*
1085 			 * If r/wcb is executing, release vhost_user's
1086 			 * mutex lock, and try again since the r/wcb
1087 			 * may use the mutex lock.
1088 			 */
1089 			if (fdset_try_del(&vhost_user.fdset, vsocket->socket_fd) == -1) {
1090 				pthread_mutex_unlock(&vhost_user.mutex);
1091 				goto again;
1092 			}
1093 		} else if (vsocket->reconnect) {
1094 			vhost_user_remove_reconnect(vsocket);
1095 		}
1096 
1097 		pthread_mutex_lock(&vsocket->conn_mutex);
1098 		for (conn = TAILQ_FIRST(&vsocket->conn_list);
1099 			 conn != NULL;
1100 			 conn = next) {
1101 			next = TAILQ_NEXT(conn, next);
1102 
1103 			/*
1104 			 * If r/wcb is executing, release vsocket's
1105 			 * conn_mutex and vhost_user's mutex locks, and
1106 			 * try again since the r/wcb may use the
1107 			 * conn_mutex and mutex locks.
1108 			 */
1109 			if (fdset_try_del(&vhost_user.fdset,
1110 					  conn->connfd) == -1) {
1111 				pthread_mutex_unlock(&vsocket->conn_mutex);
1112 				pthread_mutex_unlock(&vhost_user.mutex);
1113 				goto again;
1114 			}
1115 
1116 			VHOST_LOG_CONFIG(path, INFO, "free connfd %d\n", conn->connfd);
1117 			close(conn->connfd);
1118 			vhost_destroy_device(conn->vid);
1119 			TAILQ_REMOVE(&vsocket->conn_list, conn, next);
1120 			free(conn);
1121 		}
1122 		pthread_mutex_unlock(&vsocket->conn_mutex);
1123 
1124 		if (vsocket->is_server) {
1125 			close(vsocket->socket_fd);
1126 			unlink(path);
1127 		}
1128 
1129 		pthread_mutex_destroy(&vsocket->conn_mutex);
1130 		vhost_user_socket_mem_free(vsocket);
1131 
1132 		count = --vhost_user.vsocket_cnt;
1133 		vhost_user.vsockets[i] = vhost_user.vsockets[count];
1134 		vhost_user.vsockets[count] = NULL;
1135 		pthread_mutex_unlock(&vhost_user.mutex);
1136 		return 0;
1137 	}
1138 	pthread_mutex_unlock(&vhost_user.mutex);
1139 
1140 	return -1;
1141 }
1142 
1143 /*
1144  * Register ops so that we can add/remove device to data core.
1145  */
1146 int
1147 rte_vhost_driver_callback_register(const char *path,
1148 	struct rte_vhost_device_ops const * const ops)
1149 {
1150 	struct vhost_user_socket *vsocket;
1151 
1152 	pthread_mutex_lock(&vhost_user.mutex);
1153 	vsocket = find_vhost_user_socket(path);
1154 	if (vsocket)
1155 		vsocket->notify_ops = ops;
1156 	pthread_mutex_unlock(&vhost_user.mutex);
1157 
1158 	return vsocket ? 0 : -1;
1159 }
1160 
1161 struct rte_vhost_device_ops const *
1162 vhost_driver_callback_get(const char *path)
1163 {
1164 	struct vhost_user_socket *vsocket;
1165 
1166 	pthread_mutex_lock(&vhost_user.mutex);
1167 	vsocket = find_vhost_user_socket(path);
1168 	pthread_mutex_unlock(&vhost_user.mutex);
1169 
1170 	return vsocket ? vsocket->notify_ops : NULL;
1171 }
1172 
1173 int
1174 rte_vhost_driver_start(const char *path)
1175 {
1176 	struct vhost_user_socket *vsocket;
1177 	static rte_thread_t fdset_tid;
1178 
1179 	pthread_mutex_lock(&vhost_user.mutex);
1180 	vsocket = find_vhost_user_socket(path);
1181 	pthread_mutex_unlock(&vhost_user.mutex);
1182 
1183 	if (!vsocket)
1184 		return -1;
1185 
1186 	if (vsocket->is_vduse)
1187 		return vduse_device_create(path, vsocket->net_compliant_ol_flags);
1188 
1189 	if (fdset_tid.opaque_id == 0) {
1190 		/**
1191 		 * create a pipe which will be waited by poll and notified to
1192 		 * rebuild the wait list of poll.
1193 		 */
1194 		if (fdset_pipe_init(&vhost_user.fdset) < 0) {
1195 			VHOST_LOG_CONFIG(path, ERR, "failed to create pipe for vhost fdset\n");
1196 			return -1;
1197 		}
1198 
1199 		int ret = rte_thread_create_internal_control(&fdset_tid,
1200 				"vhost-evt", fdset_event_dispatch, &vhost_user.fdset);
1201 		if (ret != 0) {
1202 			VHOST_LOG_CONFIG(path, ERR, "failed to create fdset handling thread\n");
1203 			fdset_pipe_uninit(&vhost_user.fdset);
1204 			return -1;
1205 		}
1206 	}
1207 
1208 	if (vsocket->is_server)
1209 		return vhost_user_start_server(vsocket);
1210 	else
1211 		return vhost_user_start_client(vsocket);
1212 }
1213