xref: /dpdk/drivers/net/virtio/virtio_user/vhost_user.c (revision 97b914f4e715565d53d38ac6e04815b9be5e58a9)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <sys/socket.h>
6 #include <sys/types.h>
7 #include <sys/stat.h>
8 #include <unistd.h>
9 #include <fcntl.h>
10 #include <sys/un.h>
11 #include <string.h>
12 #include <errno.h>
13 
14 #include <rte_alarm.h>
15 #include <rte_string_fns.h>
16 #include <rte_fbarray.h>
17 
18 #include "vhost.h"
19 #include "virtio_user_dev.h"
20 
21 struct vhost_user_data {
22 	int vhostfd;
23 	int listenfd;
24 	uint64_t protocol_features;
25 };
26 
27 #ifndef VHOST_USER_F_PROTOCOL_FEATURES
28 #define VHOST_USER_F_PROTOCOL_FEATURES 30
29 #endif
30 
31 /** Protocol features. */
32 #ifndef VHOST_USER_PROTOCOL_F_MQ
33 #define VHOST_USER_PROTOCOL_F_MQ 0
34 #endif
35 
36 #ifndef VHOST_USER_PROTOCOL_F_REPLY_ACK
37 #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
38 #endif
39 
40 #ifndef VHOST_USER_PROTOCOL_F_STATUS
41 #define VHOST_USER_PROTOCOL_F_STATUS 16
42 #endif
43 
44 #define VHOST_USER_SUPPORTED_PROTOCOL_FEATURES		\
45 	(1ULL << VHOST_USER_PROTOCOL_F_MQ |		\
46 	 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK |	\
47 	 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
48 
49 /* The version of the protocol we support */
50 #define VHOST_USER_VERSION    0x1
51 
52 #define VHOST_MEMORY_MAX_NREGIONS 8
53 struct vhost_memory {
54 	uint32_t nregions;
55 	uint32_t padding;
56 	struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS];
57 };
58 
59 enum vhost_user_request {
60 	VHOST_USER_NONE = 0,
61 	VHOST_USER_GET_FEATURES = 1,
62 	VHOST_USER_SET_FEATURES = 2,
63 	VHOST_USER_SET_OWNER = 3,
64 	VHOST_USER_RESET_OWNER = 4,
65 	VHOST_USER_SET_MEM_TABLE = 5,
66 	VHOST_USER_SET_LOG_BASE = 6,
67 	VHOST_USER_SET_LOG_FD = 7,
68 	VHOST_USER_SET_VRING_NUM = 8,
69 	VHOST_USER_SET_VRING_ADDR = 9,
70 	VHOST_USER_SET_VRING_BASE = 10,
71 	VHOST_USER_GET_VRING_BASE = 11,
72 	VHOST_USER_SET_VRING_KICK = 12,
73 	VHOST_USER_SET_VRING_CALL = 13,
74 	VHOST_USER_SET_VRING_ERR = 14,
75 	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
76 	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
77 	VHOST_USER_GET_QUEUE_NUM = 17,
78 	VHOST_USER_SET_VRING_ENABLE = 18,
79 	VHOST_USER_SET_STATUS = 39,
80 	VHOST_USER_GET_STATUS = 40,
81 };
82 
83 struct vhost_user_msg {
84 	enum vhost_user_request request;
85 
86 #define VHOST_USER_VERSION_MASK     0x3
87 #define VHOST_USER_REPLY_MASK       (0x1 << 2)
88 #define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
89 	uint32_t flags;
90 	uint32_t size; /* the following payload size */
91 	union {
92 #define VHOST_USER_VRING_IDX_MASK   0xff
93 #define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
94 		uint64_t u64;
95 		struct vhost_vring_state state;
96 		struct vhost_vring_addr addr;
97 		struct vhost_memory memory;
98 	} payload;
99 } __rte_packed;
100 
101 #define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64)
102 #define VHOST_USER_PAYLOAD_SIZE \
103 	(sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE)
104 
105 static int
106 vhost_user_write(int fd, struct vhost_user_msg *msg, int *fds, int fd_num)
107 {
108 	int r;
109 	struct msghdr msgh;
110 	struct iovec iov;
111 	size_t fd_size = fd_num * sizeof(int);
112 	char control[CMSG_SPACE(fd_size)];
113 	struct cmsghdr *cmsg;
114 
115 	memset(&msgh, 0, sizeof(msgh));
116 	memset(control, 0, sizeof(control));
117 
118 	iov.iov_base = (uint8_t *)msg;
119 	iov.iov_len = VHOST_USER_HDR_SIZE + msg->size;
120 
121 	msgh.msg_iov = &iov;
122 	msgh.msg_iovlen = 1;
123 	msgh.msg_control = control;
124 	msgh.msg_controllen = sizeof(control);
125 
126 	cmsg = CMSG_FIRSTHDR(&msgh);
127 	cmsg->cmsg_len = CMSG_LEN(fd_size);
128 	cmsg->cmsg_level = SOL_SOCKET;
129 	cmsg->cmsg_type = SCM_RIGHTS;
130 	memcpy(CMSG_DATA(cmsg), fds, fd_size);
131 
132 	do {
133 		r = sendmsg(fd, &msgh, 0);
134 	} while (r < 0 && errno == EINTR);
135 
136 	if (r < 0)
137 		PMD_DRV_LOG(ERR, "Failed to send msg: %s", strerror(errno));
138 
139 	return r;
140 }
141 
142 static int
143 vhost_user_read(int fd, struct vhost_user_msg *msg)
144 {
145 	uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION;
146 	int ret, sz_hdr = VHOST_USER_HDR_SIZE, sz_payload;
147 
148 	ret = recv(fd, (void *)msg, sz_hdr, 0);
149 	if (ret < 0) {
150 		PMD_DRV_LOG(ERR, "Failed to recv msg header: %s", strerror(errno));
151 		return -1;
152 	} else if (ret < sz_hdr) {
153 		PMD_DRV_LOG(ERR, "Failed to recv msg hdr: %d instead of %d.",
154 			    ret, sz_hdr);
155 		return -1;
156 	}
157 
158 	/* validate msg flags */
159 	if (msg->flags != (valid_flags)) {
160 		PMD_DRV_LOG(ERR, "Failed to recv msg: flags 0x%x instead of 0x%x.",
161 			    msg->flags, valid_flags);
162 		return -1;
163 	}
164 
165 	sz_payload = msg->size;
166 
167 	if ((size_t)sz_payload > sizeof(msg->payload)) {
168 		PMD_DRV_LOG(ERR, "Payload size overflow, header says %d but max %zu",
169 				sz_payload, sizeof(msg->payload));
170 		return -1;
171 	}
172 
173 	if (sz_payload) {
174 		ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0);
175 		if (ret < 0) {
176 			PMD_DRV_LOG(ERR, "Failed to recv msg payload: %s", strerror(errno));
177 			return -1;
178 		} else if (ret < sz_payload) {
179 			PMD_DRV_LOG(ERR, "Failed to recv msg payload: %d instead of %u.",
180 				ret, msg->size);
181 			return -1;
182 		}
183 	}
184 
185 	return 0;
186 }
187 
188 static int
189 vhost_user_check_reply_ack(struct virtio_user_dev *dev, struct vhost_user_msg *msg)
190 {
191 	struct vhost_user_data *data = dev->backend_data;
192 	enum vhost_user_request req = msg->request;
193 	int ret;
194 
195 	if (!(msg->flags & VHOST_USER_NEED_REPLY_MASK))
196 		return 0;
197 
198 	ret = vhost_user_read(data->vhostfd, msg);
199 	if (ret < 0) {
200 		PMD_DRV_LOG(ERR, "Failed to read reply-ack");
201 		return -1;
202 	}
203 
204 	if (req != msg->request) {
205 		PMD_DRV_LOG(ERR, "Unexpected reply-ack request type (%d)", msg->request);
206 		return -1;
207 	}
208 
209 	if (msg->size != sizeof(msg->payload.u64)) {
210 		PMD_DRV_LOG(ERR, "Unexpected reply-ack payload size (%u)", msg->size);
211 		return -1;
212 	}
213 
214 	if (msg->payload.u64) {
215 		PMD_DRV_LOG(ERR, "Slave replied NACK to request type (%d)", msg->request);
216 		return -1;
217 	}
218 
219 	return 0;
220 }
221 
222 static int
223 vhost_user_set_owner(struct virtio_user_dev *dev)
224 {
225 	int ret;
226 	struct vhost_user_data *data = dev->backend_data;
227 	struct vhost_user_msg msg = {
228 		.request = VHOST_USER_SET_OWNER,
229 		.flags = VHOST_USER_VERSION,
230 	};
231 
232 	ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
233 	if (ret < 0) {
234 		PMD_DRV_LOG(ERR, "Failed to set owner");
235 		return -1;
236 	}
237 
238 	return 0;
239 }
240 
241 static int
242 vhost_user_get_protocol_features(struct virtio_user_dev *dev, uint64_t *features)
243 {
244 	int ret;
245 	struct vhost_user_data *data = dev->backend_data;
246 	struct vhost_user_msg msg = {
247 		.request = VHOST_USER_GET_PROTOCOL_FEATURES,
248 		.flags = VHOST_USER_VERSION,
249 	};
250 
251 	ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
252 	if (ret < 0)
253 		goto err;
254 
255 	ret = vhost_user_read(data->vhostfd, &msg);
256 	if (ret < 0)
257 		goto err;
258 
259 	if (msg.request != VHOST_USER_GET_PROTOCOL_FEATURES) {
260 		PMD_DRV_LOG(ERR, "Unexpected request type (%d)", msg.request);
261 		goto err;
262 	}
263 
264 	if (msg.size != sizeof(*features)) {
265 		PMD_DRV_LOG(ERR, "Unexpected payload size (%u)", msg.size);
266 		goto err;
267 	}
268 
269 	*features = msg.payload.u64;
270 
271 	return 0;
272 err:
273 	PMD_DRV_LOG(ERR, "Failed to get backend protocol features");
274 
275 	return -1;
276 }
277 
278 static int
279 vhost_user_set_protocol_features(struct virtio_user_dev *dev, uint64_t features)
280 {
281 	int ret;
282 	struct vhost_user_data *data = dev->backend_data;
283 	struct vhost_user_msg msg = {
284 		.request = VHOST_USER_SET_PROTOCOL_FEATURES,
285 		.flags = VHOST_USER_VERSION,
286 		.size = sizeof(features),
287 		.payload.u64 = features,
288 	};
289 
290 	ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
291 	if (ret < 0) {
292 		PMD_DRV_LOG(ERR, "Failed to set protocol features");
293 		return -1;
294 	}
295 
296 	return 0;
297 }
298 
299 static int
300 vhost_user_get_features(struct virtio_user_dev *dev, uint64_t *features)
301 {
302 	int ret;
303 	struct vhost_user_data *data = dev->backend_data;
304 	struct vhost_user_msg msg = {
305 		.request = VHOST_USER_GET_FEATURES,
306 		.flags = VHOST_USER_VERSION,
307 	};
308 
309 	ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
310 	if (ret < 0)
311 		goto err;
312 
313 	ret = vhost_user_read(data->vhostfd, &msg);
314 	if (ret < 0)
315 		goto err;
316 
317 	if (msg.request != VHOST_USER_GET_FEATURES) {
318 		PMD_DRV_LOG(ERR, "Unexpected request type (%d)", msg.request);
319 		goto err;
320 	}
321 
322 	if (msg.size != sizeof(*features)) {
323 		PMD_DRV_LOG(ERR, "Unexpected payload size (%u)", msg.size);
324 		goto err;
325 	}
326 
327 	*features = msg.payload.u64;
328 
329 	if (!(*features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)))
330 		return 0;
331 
332 	/* Negotiate protocol features */
333 	ret = vhost_user_get_protocol_features(dev, &data->protocol_features);
334 	if (ret < 0)
335 		goto err;
336 
337 	data->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_FEATURES;
338 
339 	ret = vhost_user_set_protocol_features(dev, data->protocol_features);
340 	if (ret < 0)
341 		goto err;
342 
343 	if (!(data->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)))
344 		dev->unsupported_features |= (1ull << VIRTIO_NET_F_MQ);
345 
346 	return 0;
347 err:
348 	PMD_DRV_LOG(ERR, "Failed to get backend features");
349 
350 	return -1;
351 }
352 
353 static int
354 vhost_user_set_features(struct virtio_user_dev *dev, uint64_t features)
355 {
356 	int ret;
357 	struct vhost_user_data *data = dev->backend_data;
358 	struct vhost_user_msg msg = {
359 		.request = VHOST_USER_SET_FEATURES,
360 		.flags = VHOST_USER_VERSION,
361 		.size = sizeof(features),
362 		.payload.u64 = features,
363 	};
364 
365 	msg.payload.u64 |= dev->device_features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES);
366 
367 	ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
368 	if (ret < 0) {
369 		PMD_DRV_LOG(ERR, "Failed to set features");
370 		return -1;
371 	}
372 
373 	return 0;
374 }
375 
376 struct walk_arg {
377 	struct vhost_memory *vm;
378 	int *fds;
379 	int region_nr;
380 };
381 
382 static int
383 update_memory_region(const struct rte_memseg_list *msl __rte_unused,
384 		const struct rte_memseg *ms, void *arg)
385 {
386 	struct walk_arg *wa = arg;
387 	struct vhost_memory_region *mr;
388 	uint64_t start_addr, end_addr;
389 	size_t offset;
390 	int i, fd;
391 
392 	fd = rte_memseg_get_fd_thread_unsafe(ms);
393 	if (fd < 0) {
394 		PMD_DRV_LOG(ERR, "Failed to get fd, ms=%p rte_errno=%d",
395 			ms, rte_errno);
396 		return -1;
397 	}
398 
399 	if (rte_memseg_get_fd_offset_thread_unsafe(ms, &offset) < 0) {
400 		PMD_DRV_LOG(ERR, "Failed to get offset, ms=%p rte_errno=%d",
401 			ms, rte_errno);
402 		return -1;
403 	}
404 
405 	start_addr = (uint64_t)(uintptr_t)ms->addr;
406 	end_addr = start_addr + ms->len;
407 
408 	for (i = 0; i < wa->region_nr; i++) {
409 		if (wa->fds[i] != fd)
410 			continue;
411 
412 		mr = &wa->vm->regions[i];
413 
414 		if (mr->userspace_addr + mr->memory_size < end_addr)
415 			mr->memory_size = end_addr - mr->userspace_addr;
416 
417 		if (mr->userspace_addr > start_addr) {
418 			mr->userspace_addr = start_addr;
419 			mr->guest_phys_addr = start_addr;
420 		}
421 
422 		if (mr->mmap_offset > offset)
423 			mr->mmap_offset = offset;
424 
425 		PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
426 			" addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
427 			mr->mmap_offset, mr->userspace_addr,
428 			mr->memory_size);
429 
430 		return 0;
431 	}
432 
433 	if (i >= VHOST_MEMORY_MAX_NREGIONS) {
434 		PMD_DRV_LOG(ERR, "Too many memory regions");
435 		return -1;
436 	}
437 
438 	mr = &wa->vm->regions[i];
439 	wa->fds[i] = fd;
440 
441 	mr->guest_phys_addr = start_addr;
442 	mr->userspace_addr = start_addr;
443 	mr->memory_size = ms->len;
444 	mr->mmap_offset = offset;
445 
446 	PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
447 		" addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
448 		mr->mmap_offset, mr->userspace_addr,
449 		mr->memory_size);
450 
451 	wa->region_nr++;
452 
453 	return 0;
454 }
455 
456 static int
457 vhost_user_set_memory_table(struct virtio_user_dev *dev)
458 {
459 	struct walk_arg wa;
460 	int fds[VHOST_MEMORY_MAX_NREGIONS];
461 	int ret, fd_num;
462 	struct vhost_user_data *data = dev->backend_data;
463 	struct vhost_user_msg msg = {
464 		.request = VHOST_USER_SET_MEM_TABLE,
465 		.flags = VHOST_USER_VERSION,
466 	};
467 
468 	if (data->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK))
469 		msg.flags |= VHOST_USER_NEED_REPLY_MASK;
470 
471 	wa.region_nr = 0;
472 	wa.vm = &msg.payload.memory;
473 	wa.fds = fds;
474 
475 	/*
476 	 * The memory lock has already been taken by memory subsystem
477 	 * or virtio_user_start_device().
478 	 */
479 	ret = rte_memseg_walk_thread_unsafe(update_memory_region, &wa);
480 	if (ret < 0)
481 		goto err;
482 
483 	fd_num = wa.region_nr;
484 	msg.payload.memory.nregions = wa.region_nr;
485 	msg.payload.memory.padding = 0;
486 
487 	msg.size = sizeof(msg.payload.memory.nregions);
488 	msg.size += sizeof(msg.payload.memory.padding);
489 	msg.size += fd_num * sizeof(struct vhost_memory_region);
490 
491 	ret = vhost_user_write(data->vhostfd, &msg, fds, fd_num);
492 	if (ret < 0)
493 		goto err;
494 
495 	return vhost_user_check_reply_ack(dev, &msg);
496 err:
497 	PMD_DRV_LOG(ERR, "Failed to set memory table");
498 	return -1;
499 }
500 
501 static int
502 vhost_user_set_vring(struct virtio_user_dev *dev, enum vhost_user_request req,
503 		struct vhost_vring_state *state)
504 {
505 	int ret;
506 	struct vhost_user_data *data = dev->backend_data;
507 	struct vhost_user_msg msg = {
508 		.request = req,
509 		.flags = VHOST_USER_VERSION,
510 		.size = sizeof(*state),
511 		.payload.state = *state,
512 	};
513 
514 	ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
515 	if (ret < 0) {
516 		PMD_DRV_LOG(ERR, "Failed to set vring state (request %d)", req);
517 		return -1;
518 	}
519 
520 	return 0;
521 }
522 
523 static int
524 vhost_user_set_vring_enable(struct virtio_user_dev *dev, struct vhost_vring_state *state)
525 {
526 	return vhost_user_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, state);
527 }
528 
529 static int
530 vhost_user_set_vring_num(struct virtio_user_dev *dev, struct vhost_vring_state *state)
531 {
532 	return vhost_user_set_vring(dev, VHOST_USER_SET_VRING_NUM, state);
533 }
534 
535 static int
536 vhost_user_set_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state)
537 {
538 	return vhost_user_set_vring(dev, VHOST_USER_SET_VRING_BASE, state);
539 }
540 
541 static int
542 vhost_user_get_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state)
543 {
544 	int ret;
545 	struct vhost_user_msg msg;
546 	struct vhost_user_data *data = dev->backend_data;
547 	unsigned int index = state->index;
548 
549 	ret = vhost_user_set_vring(dev, VHOST_USER_GET_VRING_BASE, state);
550 	if (ret < 0) {
551 		PMD_DRV_LOG(ERR, "Failed to send request");
552 		goto err;
553 	}
554 
555 	ret = vhost_user_read(data->vhostfd, &msg);
556 	if (ret < 0) {
557 		PMD_DRV_LOG(ERR, "Failed to read reply");
558 		goto err;
559 	}
560 
561 	if (msg.request != VHOST_USER_GET_VRING_BASE) {
562 		PMD_DRV_LOG(ERR, "Unexpected request type (%d)", msg.request);
563 		goto err;
564 	}
565 
566 	if (msg.size != sizeof(*state)) {
567 		PMD_DRV_LOG(ERR, "Unexpected payload size (%u)", msg.size);
568 		goto err;
569 	}
570 
571 	if (msg.payload.state.index != index) {
572 		PMD_DRV_LOG(ERR, "Unexpected ring index (%u)", state->index);
573 		goto err;
574 	}
575 
576 	*state = msg.payload.state;
577 
578 	return 0;
579 err:
580 	PMD_DRV_LOG(ERR, "Failed to get vring base");
581 	return -1;
582 }
583 
584 static int
585 vhost_user_set_vring_file(struct virtio_user_dev *dev, enum vhost_user_request req,
586 		struct vhost_vring_file *file)
587 {
588 	int ret;
589 	int fd = file->fd;
590 	int num_fd = 0;
591 	struct vhost_user_data *data = dev->backend_data;
592 	struct vhost_user_msg msg = {
593 		.request = req,
594 		.flags = VHOST_USER_VERSION,
595 		.size = sizeof(msg.payload.u64),
596 		.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
597 	};
598 
599 	if (fd >= 0)
600 		num_fd++;
601 	else
602 		msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
603 
604 	ret = vhost_user_write(data->vhostfd, &msg, &fd, num_fd);
605 	if (ret < 0) {
606 		PMD_DRV_LOG(ERR, "Failed to set vring file (request %d)", req);
607 		return -1;
608 	}
609 
610 	return 0;
611 }
612 
613 static int
614 vhost_user_set_vring_call(struct virtio_user_dev *dev, struct vhost_vring_file *file)
615 {
616 	return vhost_user_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
617 }
618 
619 static int
620 vhost_user_set_vring_kick(struct virtio_user_dev *dev, struct vhost_vring_file *file)
621 {
622 	return vhost_user_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
623 }
624 
625 
626 static int
627 vhost_user_set_vring_addr(struct virtio_user_dev *dev, struct vhost_vring_addr *addr)
628 {
629 	int ret;
630 	struct vhost_user_data *data = dev->backend_data;
631 	struct vhost_user_msg msg = {
632 		.request = VHOST_USER_SET_VRING_ADDR,
633 		.flags = VHOST_USER_VERSION,
634 		.size = sizeof(*addr),
635 		.payload.addr = *addr,
636 	};
637 
638 	ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
639 	if (ret < 0) {
640 		PMD_DRV_LOG(ERR, "Failed to send vring addresses");
641 		return -1;
642 	}
643 
644 	return 0;
645 }
646 
647 static int
648 vhost_user_get_status(struct virtio_user_dev *dev, uint8_t *status)
649 {
650 	int ret;
651 	struct vhost_user_data *data = dev->backend_data;
652 	struct vhost_user_msg msg = {
653 		.request = VHOST_USER_GET_STATUS,
654 		.flags = VHOST_USER_VERSION,
655 	};
656 
657 	/*
658 	 * If features have not been negotiated, we don't know if the backend
659 	 * supports protocol features
660 	 */
661 	if (!(dev->status & VIRTIO_CONFIG_STATUS_FEATURES_OK))
662 		return -ENOTSUP;
663 
664 	/* Status protocol feature requires protocol features support */
665 	if (!(dev->device_features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)))
666 		return -ENOTSUP;
667 
668 	if (!(data->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_STATUS)))
669 		return -ENOTSUP;
670 
671 	ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
672 	if (ret < 0) {
673 		PMD_DRV_LOG(ERR, "Failed to send request");
674 		goto err;
675 	}
676 
677 	ret = vhost_user_read(data->vhostfd, &msg);
678 	if (ret < 0) {
679 		PMD_DRV_LOG(ERR, "Failed to recv request");
680 		goto err;
681 	}
682 
683 	if (msg.request != VHOST_USER_GET_STATUS) {
684 		PMD_DRV_LOG(ERR, "Unexpected request type (%d)", msg.request);
685 		goto err;
686 	}
687 
688 	if (msg.size != sizeof(msg.payload.u64)) {
689 		PMD_DRV_LOG(ERR, "Unexpected payload size (%u)", msg.size);
690 		goto err;
691 	}
692 
693 	*status = (uint8_t)msg.payload.u64;
694 
695 	return 0;
696 err:
697 	PMD_DRV_LOG(ERR, "Failed to get device status");
698 	return -1;
699 }
700 
701 static int
702 vhost_user_set_status(struct virtio_user_dev *dev, uint8_t status)
703 {
704 	int ret;
705 	struct vhost_user_data *data = dev->backend_data;
706 	struct vhost_user_msg msg = {
707 		.request = VHOST_USER_SET_STATUS,
708 		.flags = VHOST_USER_VERSION,
709 		.size = sizeof(msg.payload.u64),
710 		.payload.u64 = status,
711 	};
712 
713 	/*
714 	 * If features have not been negotiated, we don't know if the backend
715 	 * supports protocol features
716 	 */
717 	if (!(dev->status & VIRTIO_CONFIG_STATUS_FEATURES_OK))
718 		return -ENOTSUP;
719 
720 	/* Status protocol feature requires protocol features support */
721 	if (!(dev->device_features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)))
722 		return -ENOTSUP;
723 
724 	if (!(data->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_STATUS)))
725 		return -ENOTSUP;
726 
727 	if (data->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK))
728 		msg.flags |= VHOST_USER_NEED_REPLY_MASK;
729 
730 	ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
731 	if (ret < 0) {
732 		PMD_DRV_LOG(ERR, "Failed to send get status request");
733 		return -1;
734 	}
735 
736 	return vhost_user_check_reply_ack(dev, &msg);
737 }
738 
739 #define MAX_VIRTIO_USER_BACKLOG 1
740 static int
741 vhost_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
742 {
743 	int ret;
744 	int flag;
745 	struct vhost_user_data *data = dev->backend_data;
746 	int fd = data->listenfd;
747 
748 	ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
749 	if (ret < 0) {
750 		PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again",
751 			    dev->path, strerror(errno));
752 		return -1;
753 	}
754 	ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
755 	if (ret < 0)
756 		return -1;
757 
758 	PMD_DRV_LOG(NOTICE, "(%s) waiting for client connection...", dev->path);
759 	data->vhostfd = accept(fd, NULL, NULL);
760 	if (data->vhostfd < 0) {
761 		PMD_DRV_LOG(ERR, "Failed to accept initial client connection (%s)",
762 				strerror(errno));
763 		return -1;
764 	}
765 
766 	flag = fcntl(fd, F_GETFL);
767 	if (fcntl(fd, F_SETFL, flag | O_NONBLOCK) < 0) {
768 		PMD_DRV_LOG(ERR, "fcntl failed, %s", strerror(errno));
769 		return -1;
770 	}
771 
772 	return 0;
773 }
774 
775 static int
776 vhost_user_server_disconnect(struct virtio_user_dev *dev)
777 {
778 	struct vhost_user_data *data = dev->backend_data;
779 
780 	if (data->vhostfd < 0) {
781 		PMD_DRV_LOG(ERR, "(%s) Expected valid Vhost FD", dev->path);
782 		return -1;
783 	}
784 
785 	close(data->vhostfd);
786 	data->vhostfd = -1;
787 
788 	return 0;
789 }
790 
791 static int
792 vhost_user_server_reconnect(struct virtio_user_dev *dev)
793 {
794 	struct vhost_user_data *data = dev->backend_data;
795 	int fd;
796 
797 	fd = accept(data->listenfd, NULL, NULL);
798 	if (fd < 0)
799 		return -1;
800 
801 	data->vhostfd = fd;
802 
803 	return 0;
804 }
805 
806 /**
807  * Set up environment to talk with a vhost user backend.
808  *
809  * @return
810  *   - (-1) if fail;
811  *   - (0) if succeed.
812  */
813 static int
814 vhost_user_setup(struct virtio_user_dev *dev)
815 {
816 	int fd;
817 	int flag;
818 	struct sockaddr_un un;
819 	struct vhost_user_data *data;
820 
821 	data = malloc(sizeof(*data));
822 	if (!data) {
823 		PMD_DRV_LOG(ERR, "(%s) Failed to allocate Vhost-user data", dev->path);
824 		return -1;
825 	}
826 
827 	memset(data, 0, sizeof(*data));
828 
829 	dev->backend_data = data;
830 
831 	data->vhostfd = -1;
832 	data->listenfd = -1;
833 
834 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
835 	if (fd < 0) {
836 		PMD_DRV_LOG(ERR, "socket() error, %s", strerror(errno));
837 		goto err_data;
838 	}
839 
840 	flag = fcntl(fd, F_GETFD);
841 	if (flag == -1)
842 		PMD_DRV_LOG(WARNING, "fcntl get fd failed, %s", strerror(errno));
843 	else if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0)
844 		PMD_DRV_LOG(WARNING, "fcntl set fd failed, %s", strerror(errno));
845 
846 	memset(&un, 0, sizeof(un));
847 	un.sun_family = AF_UNIX;
848 	strlcpy(un.sun_path, dev->path, sizeof(un.sun_path));
849 
850 	if (dev->is_server) {
851 		data->listenfd = fd;
852 		if (vhost_user_start_server(dev, &un) < 0) {
853 			PMD_DRV_LOG(ERR, "virtio-user startup fails in server mode");
854 			goto err_socket;
855 		}
856 	} else {
857 		if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
858 			PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
859 			goto err_socket;
860 		}
861 		data->vhostfd = fd;
862 	}
863 
864 	return 0;
865 
866 err_socket:
867 	close(fd);
868 err_data:
869 	free(data);
870 	dev->backend_data = NULL;
871 
872 	return -1;
873 }
874 
875 static int
876 vhost_user_destroy(struct virtio_user_dev *dev)
877 {
878 	struct vhost_user_data *data = dev->backend_data;
879 
880 	if (!data)
881 		return 0;
882 
883 	if (data->vhostfd >= 0) {
884 		close(data->vhostfd);
885 		data->vhostfd = -1;
886 	}
887 
888 	if (data->listenfd >= 0) {
889 		close(data->listenfd);
890 		data->listenfd = -1;
891 	}
892 
893 	free(data);
894 	dev->backend_data = NULL;
895 
896 	return 0;
897 }
898 
899 static int
900 vhost_user_enable_queue_pair(struct virtio_user_dev *dev,
901 			     uint16_t pair_idx,
902 			     int enable)
903 {
904 	struct vhost_user_data *data = dev->backend_data;
905 	int i;
906 
907 	if (data->vhostfd < 0)
908 		return 0;
909 
910 	if (dev->qp_enabled[pair_idx] == enable)
911 		return 0;
912 
913 	for (i = 0; i < 2; ++i) {
914 		struct vhost_vring_state state = {
915 			.index = pair_idx * 2 + i,
916 			.num = enable,
917 		};
918 
919 		if (vhost_user_set_vring_enable(dev, &state))
920 			return -1;
921 	}
922 
923 	dev->qp_enabled[pair_idx] = enable;
924 	return 0;
925 }
926 
927 static int
928 vhost_user_get_backend_features(uint64_t *features)
929 {
930 	*features = 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
931 
932 	return 0;
933 }
934 
935 static int
936 vhost_user_update_link_state(struct virtio_user_dev *dev)
937 {
938 	struct vhost_user_data *data = dev->backend_data;
939 	char buf[128];
940 
941 	if (data->vhostfd >= 0) {
942 		int r;
943 		int flags;
944 
945 		flags = fcntl(data->vhostfd, F_GETFL);
946 		if (fcntl(data->vhostfd, F_SETFL, flags | O_NONBLOCK) == -1) {
947 			PMD_DRV_LOG(ERR, "error setting O_NONBLOCK flag");
948 			return -1;
949 		}
950 
951 		r = recv(data->vhostfd, buf, 128, MSG_PEEK);
952 		if (r == 0 || (r < 0 && errno != EAGAIN)) {
953 			dev->net_status &= (~VIRTIO_NET_S_LINK_UP);
954 			PMD_DRV_LOG(ERR, "virtio-user port %u is down", dev->hw.port_id);
955 
956 			/* This function could be called in the process
957 			 * of interrupt handling, callback cannot be
958 			 * unregistered here, set an alarm to do it.
959 			 */
960 			rte_eal_alarm_set(1,
961 				virtio_user_dev_delayed_disconnect_handler,
962 				(void *)dev);
963 		} else {
964 			dev->net_status |= VIRTIO_NET_S_LINK_UP;
965 		}
966 
967 		if (fcntl(data->vhostfd, F_SETFL,
968 					flags & ~O_NONBLOCK) == -1) {
969 			PMD_DRV_LOG(ERR, "error clearing O_NONBLOCK flag");
970 			return -1;
971 		}
972 	} else if (dev->is_server) {
973 		dev->net_status &= (~VIRTIO_NET_S_LINK_UP);
974 		if (virtio_user_dev_server_reconnect(dev) >= 0)
975 			dev->net_status |= VIRTIO_NET_S_LINK_UP;
976 	}
977 
978 	return 0;
979 }
980 
981 static int
982 vhost_user_get_intr_fd(struct virtio_user_dev *dev)
983 {
984 	struct vhost_user_data *data = dev->backend_data;
985 
986 	if (dev->is_server && data->vhostfd == -1)
987 		return data->listenfd;
988 
989 	return data->vhostfd;
990 }
991 
992 struct virtio_user_backend_ops virtio_ops_user = {
993 	.setup = vhost_user_setup,
994 	.destroy = vhost_user_destroy,
995 	.get_backend_features = vhost_user_get_backend_features,
996 	.set_owner = vhost_user_set_owner,
997 	.get_features = vhost_user_get_features,
998 	.set_features = vhost_user_set_features,
999 	.set_memory_table = vhost_user_set_memory_table,
1000 	.set_vring_num = vhost_user_set_vring_num,
1001 	.set_vring_base = vhost_user_set_vring_base,
1002 	.get_vring_base = vhost_user_get_vring_base,
1003 	.set_vring_call = vhost_user_set_vring_call,
1004 	.set_vring_kick = vhost_user_set_vring_kick,
1005 	.set_vring_addr = vhost_user_set_vring_addr,
1006 	.get_status = vhost_user_get_status,
1007 	.set_status = vhost_user_set_status,
1008 	.enable_qp = vhost_user_enable_queue_pair,
1009 	.update_link_state = vhost_user_update_link_state,
1010 	.server_disconnect = vhost_user_server_disconnect,
1011 	.server_reconnect = vhost_user_server_reconnect,
1012 	.get_intr_fd = vhost_user_get_intr_fd,
1013 };
1014