xref: /dpdk/examples/vhost_blk/vhost_blk.c (revision ae67f7d0256687fdfb24d27ee94b20d88c65108e)
1c19beb3fSJin Yu /* SPDX-License-Identifier: BSD-3-Clause
2c19beb3fSJin Yu  * Copyright(c) 2010-2019 Intel Corporation
3c19beb3fSJin Yu  */
4c19beb3fSJin Yu 
59c3586a6SDavid Marchand #ifndef _GNU_SOURCE
69c3586a6SDavid Marchand #define _GNU_SOURCE
79c3586a6SDavid Marchand #endif
89c3586a6SDavid Marchand 
9c19beb3fSJin Yu #include <stdint.h>
1072b452c5SDmitry Kozlyuk #include <stdlib.h>
11c19beb3fSJin Yu #include <unistd.h>
12c19beb3fSJin Yu #include <stdbool.h>
13c19beb3fSJin Yu #include <signal.h>
14c19beb3fSJin Yu #include <assert.h>
15c19beb3fSJin Yu #include <semaphore.h>
16c19beb3fSJin Yu #include <linux/virtio_blk.h>
17c19beb3fSJin Yu #include <linux/virtio_ring.h>
18c19beb3fSJin Yu 
19c19beb3fSJin Yu #include <rte_atomic.h>
20c19beb3fSJin Yu #include <rte_cycles.h>
21c19beb3fSJin Yu #include <rte_log.h>
22c19beb3fSJin Yu #include <rte_malloc.h>
23c19beb3fSJin Yu #include <rte_vhost.h>
24c19beb3fSJin Yu 
25c19beb3fSJin Yu #include "vhost_blk.h"
26c19beb3fSJin Yu #include "blk_spec.h"
27c19beb3fSJin Yu 
28c19beb3fSJin Yu #define VIRTQ_DESC_F_NEXT	1
29c19beb3fSJin Yu #define VIRTQ_DESC_F_AVAIL	(1 << 7)
30c19beb3fSJin Yu #define VIRTQ_DESC_F_USED	(1 << 15)
31c19beb3fSJin Yu 
32c19beb3fSJin Yu #define MAX_TASK		12
33c19beb3fSJin Yu 
34c19beb3fSJin Yu #define VHOST_BLK_FEATURES ((1ULL << VIRTIO_F_RING_PACKED) | \
35c19beb3fSJin Yu 			    (1ULL << VIRTIO_F_VERSION_1) |\
36c19beb3fSJin Yu 			    (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
37c19beb3fSJin Yu 			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))
3891d3e2d4SJin Yu #define CTRLR_NAME		"vhost.socket"
3991d3e2d4SJin Yu 
4091d3e2d4SJin Yu enum CTRLR_WORKER_STATUS {
4191d3e2d4SJin Yu 	WORKER_STATE_START = 0,
4291d3e2d4SJin Yu 	WORKER_STATE_STOP,
4391d3e2d4SJin Yu };
44c19beb3fSJin Yu 
45be848992STimothy Redaelli struct vhost_blk_ctrlr *g_vhost_ctrlr;
46be848992STimothy Redaelli 
47c19beb3fSJin Yu /* Path to folder where character device will be created. Can be set by user. */
48c19beb3fSJin Yu static char dev_pathname[PATH_MAX] = "";
49c19beb3fSJin Yu static sem_t exit_sem;
5091d3e2d4SJin Yu static enum CTRLR_WORKER_STATUS worker_thread_status;
51c19beb3fSJin Yu 
52c19beb3fSJin Yu struct vhost_blk_ctrlr *
53c19beb3fSJin Yu vhost_blk_ctrlr_find(const char *ctrlr_name)
54c19beb3fSJin Yu {
55c19beb3fSJin Yu 	if (ctrlr_name == NULL)
56c19beb3fSJin Yu 		return NULL;
57c19beb3fSJin Yu 
58c19beb3fSJin Yu 	/* currently we only support 1 socket file fd */
59c19beb3fSJin Yu 	return g_vhost_ctrlr;
60c19beb3fSJin Yu }
61c19beb3fSJin Yu 
6291d3e2d4SJin Yu static uint64_t
6391d3e2d4SJin Yu gpa_to_vva(struct vhost_blk_ctrlr *ctrlr, uint64_t gpa, uint64_t *len)
64c19beb3fSJin Yu {
65c19beb3fSJin Yu 	assert(ctrlr->mem != NULL);
66c19beb3fSJin Yu 
67c19beb3fSJin Yu 	return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len);
68c19beb3fSJin Yu }
69c19beb3fSJin Yu 
7091d3e2d4SJin Yu static void
7191d3e2d4SJin Yu enqueue_task(struct vhost_blk_task *task)
72c19beb3fSJin Yu {
7391d3e2d4SJin Yu 	struct vhost_blk_queue *vq = task->vq;
7491d3e2d4SJin Yu 	struct vring_used *used = vq->vring.used;
7591d3e2d4SJin Yu 
7691d3e2d4SJin Yu 	rte_vhost_set_last_inflight_io_split(task->ctrlr->vid,
7791d3e2d4SJin Yu 		vq->id, task->req_idx);
7891d3e2d4SJin Yu 
7991d3e2d4SJin Yu 	/* Fill out the next entry in the "used" ring.  id = the
8091d3e2d4SJin Yu 	 * index of the descriptor that contained the blk request.
8191d3e2d4SJin Yu 	 * len = the total amount of data transferred for the blk
8291d3e2d4SJin Yu 	 * request. We must report the correct len, for variable
8391d3e2d4SJin Yu 	 * length blk CDBs, where we may return less data than
8491d3e2d4SJin Yu 	 * allocated by the guest VM.
8591d3e2d4SJin Yu 	 */
8691d3e2d4SJin Yu 	used->ring[used->idx & (vq->vring.size - 1)].id = task->req_idx;
8791d3e2d4SJin Yu 	used->ring[used->idx & (vq->vring.size - 1)].len = task->data_len;
8892e68d9cSTyler Retzlaff 	rte_atomic_thread_fence(rte_memory_order_seq_cst);
8991d3e2d4SJin Yu 	used->idx++;
9092e68d9cSTyler Retzlaff 	rte_atomic_thread_fence(rte_memory_order_seq_cst);
9191d3e2d4SJin Yu 
9291d3e2d4SJin Yu 	rte_vhost_clr_inflight_desc_split(task->ctrlr->vid,
9391d3e2d4SJin Yu 		vq->id, used->idx, task->req_idx);
9491d3e2d4SJin Yu 
9591d3e2d4SJin Yu 	/* Send an interrupt back to the guest VM so that it knows
9691d3e2d4SJin Yu 	 * a completion is ready to be processed.
9791d3e2d4SJin Yu 	 */
9891d3e2d4SJin Yu 	rte_vhost_vring_call(task->ctrlr->vid, vq->id);
99c19beb3fSJin Yu }
100c19beb3fSJin Yu 
10191d3e2d4SJin Yu static void
10291d3e2d4SJin Yu enqueue_task_packed(struct vhost_blk_task *task)
10391d3e2d4SJin Yu {
10491d3e2d4SJin Yu 	struct vhost_blk_queue *vq = task->vq;
10591d3e2d4SJin Yu 	struct vring_packed_desc *desc;
10691d3e2d4SJin Yu 
10791d3e2d4SJin Yu 	rte_vhost_set_last_inflight_io_packed(task->ctrlr->vid, vq->id,
10891d3e2d4SJin Yu 					    task->inflight_idx);
10991d3e2d4SJin Yu 
11091d3e2d4SJin Yu 	desc = &vq->vring.desc_packed[vq->last_used_idx];
11191d3e2d4SJin Yu 	desc->id = task->buffer_id;
11291d3e2d4SJin Yu 	desc->addr = 0;
11391d3e2d4SJin Yu 
11492e68d9cSTyler Retzlaff 	rte_atomic_thread_fence(rte_memory_order_seq_cst);
11591d3e2d4SJin Yu 	if (vq->used_wrap_counter)
11691d3e2d4SJin Yu 		desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED;
11791d3e2d4SJin Yu 	else
11891d3e2d4SJin Yu 		desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
11992e68d9cSTyler Retzlaff 	rte_atomic_thread_fence(rte_memory_order_seq_cst);
12091d3e2d4SJin Yu 
12191d3e2d4SJin Yu 	rte_vhost_clr_inflight_desc_packed(task->ctrlr->vid, vq->id,
12291d3e2d4SJin Yu 					   task->inflight_idx);
12391d3e2d4SJin Yu 
12491d3e2d4SJin Yu 	vq->last_used_idx += task->chain_num;
12591d3e2d4SJin Yu 	if (vq->last_used_idx >= vq->vring.size) {
12691d3e2d4SJin Yu 		vq->last_used_idx -= vq->vring.size;
12791d3e2d4SJin Yu 		vq->used_wrap_counter = !vq->used_wrap_counter;
12891d3e2d4SJin Yu 	}
12991d3e2d4SJin Yu 
13091d3e2d4SJin Yu 	/* Send an interrupt back to the guest VM so that it knows
13191d3e2d4SJin Yu 	 * a completion is ready to be processed.
13291d3e2d4SJin Yu 	 */
13391d3e2d4SJin Yu 	rte_vhost_vring_call(task->ctrlr->vid, vq->id);
134c19beb3fSJin Yu }
135c19beb3fSJin Yu 
136c19beb3fSJin Yu static bool
137c19beb3fSJin Yu descriptor_has_next_packed(struct vring_packed_desc *cur_desc)
138c19beb3fSJin Yu {
139c19beb3fSJin Yu 	return !!(cur_desc->flags & VRING_DESC_F_NEXT);
140c19beb3fSJin Yu }
141c19beb3fSJin Yu 
142c19beb3fSJin Yu static bool
143c19beb3fSJin Yu descriptor_has_next_split(struct vring_desc *cur_desc)
144c19beb3fSJin Yu {
145c19beb3fSJin Yu 	return !!(cur_desc->flags & VRING_DESC_F_NEXT);
146c19beb3fSJin Yu }
147c19beb3fSJin Yu 
14891d3e2d4SJin Yu static int
14991d3e2d4SJin Yu desc_payload_to_iovs(struct vhost_blk_ctrlr *ctrlr, struct iovec *iovs,
15091d3e2d4SJin Yu 		     uint32_t *iov_index, uintptr_t payload, uint64_t remaining)
151c19beb3fSJin Yu {
15291d3e2d4SJin Yu 	void *vva;
15391d3e2d4SJin Yu 	uint64_t len;
154c19beb3fSJin Yu 
155c19beb3fSJin Yu 	do {
15691d3e2d4SJin Yu 		if (*iov_index >= VHOST_BLK_MAX_IOVS) {
15791d3e2d4SJin Yu 			fprintf(stderr, "VHOST_BLK_MAX_IOVS reached\n");
15891d3e2d4SJin Yu 			return -1;
15991d3e2d4SJin Yu 		}
16091d3e2d4SJin Yu 		len = remaining;
16191d3e2d4SJin Yu 		vva = (void *)(uintptr_t)gpa_to_vva(ctrlr,
16291d3e2d4SJin Yu 				 payload, &len);
16391d3e2d4SJin Yu 		if (!vva || !len) {
164c19beb3fSJin Yu 			fprintf(stderr, "failed to translate desc address.\n");
16591d3e2d4SJin Yu 			return -1;
16691d3e2d4SJin Yu 		}
16791d3e2d4SJin Yu 
16891d3e2d4SJin Yu 		iovs[*iov_index].iov_base = vva;
16991d3e2d4SJin Yu 		iovs[*iov_index].iov_len = len;
17091d3e2d4SJin Yu 		payload += len;
17191d3e2d4SJin Yu 		remaining -= len;
17291d3e2d4SJin Yu 		(*iov_index)++;
17391d3e2d4SJin Yu 	} while (remaining);
17491d3e2d4SJin Yu 
17591d3e2d4SJin Yu 	return 0;
17691d3e2d4SJin Yu }
17791d3e2d4SJin Yu 
17891d3e2d4SJin Yu static struct vring_desc *
17991d3e2d4SJin Yu vring_get_next_desc(struct vhost_blk_queue *vq, struct vring_desc *desc)
18091d3e2d4SJin Yu {
18191d3e2d4SJin Yu 	if (descriptor_has_next_split(desc))
18291d3e2d4SJin Yu 		return &vq->vring.desc[desc->next];
18391d3e2d4SJin Yu 
18491d3e2d4SJin Yu 	return NULL;
18591d3e2d4SJin Yu }
18691d3e2d4SJin Yu 
18791d3e2d4SJin Yu static struct vring_packed_desc *
18891d3e2d4SJin Yu vring_get_next_desc_packed(struct vhost_blk_queue *vq, uint16_t *req_idx)
18991d3e2d4SJin Yu {
19091d3e2d4SJin Yu 	if (descriptor_has_next_packed(&vq->vring.desc_packed[*req_idx])) {
19191d3e2d4SJin Yu 		*req_idx = (*req_idx + 1) % vq->vring.size;
19291d3e2d4SJin Yu 		return &vq->vring.desc_packed[*req_idx];
19391d3e2d4SJin Yu 	}
19491d3e2d4SJin Yu 
19591d3e2d4SJin Yu 	return NULL;
19691d3e2d4SJin Yu }
19791d3e2d4SJin Yu 
19891d3e2d4SJin Yu static struct rte_vhost_inflight_desc_packed *
19991d3e2d4SJin Yu vring_get_next_inflight_desc(struct vhost_blk_queue *vq,
20091d3e2d4SJin Yu 			struct rte_vhost_inflight_desc_packed *desc)
20191d3e2d4SJin Yu {
20291d3e2d4SJin Yu 	if (!!(desc->flags & VRING_DESC_F_NEXT))
20391d3e2d4SJin Yu 		return &vq->inflight_ring.inflight_packed->desc[desc->next];
20491d3e2d4SJin Yu 
20591d3e2d4SJin Yu 	return NULL;
20691d3e2d4SJin Yu }
20791d3e2d4SJin Yu 
20891d3e2d4SJin Yu static int
20991d3e2d4SJin Yu setup_iovs_from_descs_split(struct vhost_blk_ctrlr *ctrlr,
21091d3e2d4SJin Yu 			    struct vhost_blk_queue *vq, uint16_t req_idx,
21191d3e2d4SJin Yu 			    struct iovec *iovs, uint32_t *iovs_idx,
21291d3e2d4SJin Yu 			    uint32_t *payload)
21391d3e2d4SJin Yu {
21491d3e2d4SJin Yu 	struct vring_desc *desc = &vq->vring.desc[req_idx];
21591d3e2d4SJin Yu 
21691d3e2d4SJin Yu 	do {
21791d3e2d4SJin Yu 		/* does not support indirect descriptors */
21891d3e2d4SJin Yu 		assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
21991d3e2d4SJin Yu 
22091d3e2d4SJin Yu 		if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
22191d3e2d4SJin Yu 			fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
22291d3e2d4SJin Yu 			return -1;
22391d3e2d4SJin Yu 		}
22491d3e2d4SJin Yu 
22591d3e2d4SJin Yu 		if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
22691d3e2d4SJin Yu 			desc->addr, desc->len) != 0) {
22791d3e2d4SJin Yu 			fprintf(stderr, "Failed to convert desc payload to iovs\n");
22891d3e2d4SJin Yu 			return -1;
22991d3e2d4SJin Yu 		}
23091d3e2d4SJin Yu 
23191d3e2d4SJin Yu 		*payload += desc->len;
23291d3e2d4SJin Yu 
23391d3e2d4SJin Yu 		desc = vring_get_next_desc(vq, desc);
23491d3e2d4SJin Yu 	} while (desc != NULL);
23591d3e2d4SJin Yu 
23691d3e2d4SJin Yu 	return 0;
23791d3e2d4SJin Yu }
23891d3e2d4SJin Yu 
23991d3e2d4SJin Yu static int
24091d3e2d4SJin Yu setup_iovs_from_descs_packed(struct vhost_blk_ctrlr *ctrlr,
24191d3e2d4SJin Yu 			     struct vhost_blk_queue *vq, uint16_t req_idx,
24291d3e2d4SJin Yu 			     struct iovec *iovs, uint32_t *iovs_idx,
24391d3e2d4SJin Yu 			     uint32_t *payload)
24491d3e2d4SJin Yu {
24591d3e2d4SJin Yu 	struct vring_packed_desc *desc = &vq->vring.desc_packed[req_idx];
24691d3e2d4SJin Yu 
24791d3e2d4SJin Yu 	do {
24891d3e2d4SJin Yu 		/* does not support indirect descriptors */
24991d3e2d4SJin Yu 		assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
25091d3e2d4SJin Yu 
25191d3e2d4SJin Yu 		if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
25291d3e2d4SJin Yu 			fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
25391d3e2d4SJin Yu 			return -1;
25491d3e2d4SJin Yu 		}
25591d3e2d4SJin Yu 
25691d3e2d4SJin Yu 		if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
25791d3e2d4SJin Yu 			desc->addr, desc->len) != 0) {
25891d3e2d4SJin Yu 			fprintf(stderr, "Failed to convert desc payload to iovs\n");
25991d3e2d4SJin Yu 			return -1;
26091d3e2d4SJin Yu 		}
26191d3e2d4SJin Yu 
26291d3e2d4SJin Yu 		*payload += desc->len;
26391d3e2d4SJin Yu 
26491d3e2d4SJin Yu 		desc = vring_get_next_desc_packed(vq, &req_idx);
26591d3e2d4SJin Yu 	} while (desc != NULL);
26691d3e2d4SJin Yu 
26791d3e2d4SJin Yu 	return 0;
26891d3e2d4SJin Yu }
26991d3e2d4SJin Yu 
27091d3e2d4SJin Yu static int
27191d3e2d4SJin Yu setup_iovs_from_inflight_desc(struct vhost_blk_ctrlr *ctrlr,
27291d3e2d4SJin Yu 			      struct vhost_blk_queue *vq, uint16_t req_idx,
27391d3e2d4SJin Yu 			      struct iovec *iovs, uint32_t *iovs_idx,
27491d3e2d4SJin Yu 			      uint32_t *payload)
27591d3e2d4SJin Yu {
27691d3e2d4SJin Yu 	struct rte_vhost_ring_inflight *inflight_vq;
27791d3e2d4SJin Yu 	struct rte_vhost_inflight_desc_packed *desc;
27891d3e2d4SJin Yu 
27991d3e2d4SJin Yu 	inflight_vq = &vq->inflight_ring;
28091d3e2d4SJin Yu 	desc = &inflight_vq->inflight_packed->desc[req_idx];
28191d3e2d4SJin Yu 
28291d3e2d4SJin Yu 	do {
28391d3e2d4SJin Yu 		/* does not support indirect descriptors */
28491d3e2d4SJin Yu 		assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
28591d3e2d4SJin Yu 
28691d3e2d4SJin Yu 		if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
28791d3e2d4SJin Yu 			fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
28891d3e2d4SJin Yu 			return -1;
28991d3e2d4SJin Yu 		}
29091d3e2d4SJin Yu 
29191d3e2d4SJin Yu 		if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
29291d3e2d4SJin Yu 			desc->addr, desc->len) != 0) {
29391d3e2d4SJin Yu 			fprintf(stderr, "Failed to convert desc payload to iovs\n");
29491d3e2d4SJin Yu 			return -1;
29591d3e2d4SJin Yu 		}
29691d3e2d4SJin Yu 
29791d3e2d4SJin Yu 		*payload += desc->len;
29891d3e2d4SJin Yu 
29991d3e2d4SJin Yu 		desc = vring_get_next_inflight_desc(vq, desc);
30091d3e2d4SJin Yu 	} while (desc != NULL);
30191d3e2d4SJin Yu 
30291d3e2d4SJin Yu 	return 0;
30391d3e2d4SJin Yu }
30491d3e2d4SJin Yu 
30591d3e2d4SJin Yu static void
30691d3e2d4SJin Yu process_blk_task(struct vhost_blk_task *task)
30791d3e2d4SJin Yu {
30891d3e2d4SJin Yu 	uint32_t payload = 0;
30991d3e2d4SJin Yu 
31091d3e2d4SJin Yu 	if (task->vq->packed_ring) {
31191d3e2d4SJin Yu 		struct rte_vhost_ring_inflight *inflight_ring;
31291d3e2d4SJin Yu 		struct rte_vhost_resubmit_info *resubmit_inflight;
31391d3e2d4SJin Yu 
31491d3e2d4SJin Yu 		inflight_ring = &task->vq->inflight_ring;
31591d3e2d4SJin Yu 		resubmit_inflight = inflight_ring->resubmit_inflight;
31691d3e2d4SJin Yu 
31791d3e2d4SJin Yu 		if (resubmit_inflight != NULL &&
31891d3e2d4SJin Yu 		    resubmit_inflight->resubmit_list != NULL) {
31991d3e2d4SJin Yu 			if (setup_iovs_from_inflight_desc(task->ctrlr, task->vq,
32091d3e2d4SJin Yu 				task->req_idx, task->iovs, &task->iovs_cnt,
32191d3e2d4SJin Yu 				&payload)) {
32291d3e2d4SJin Yu 				fprintf(stderr, "Failed to setup iovs\n");
323c19beb3fSJin Yu 				return;
324c19beb3fSJin Yu 			}
32591d3e2d4SJin Yu 		} else {
32691d3e2d4SJin Yu 			if (setup_iovs_from_descs_packed(task->ctrlr, task->vq,
32791d3e2d4SJin Yu 				task->req_idx, task->iovs, &task->iovs_cnt,
32891d3e2d4SJin Yu 				&payload)) {
32991d3e2d4SJin Yu 				fprintf(stderr, "Failed to setup iovs\n");
33091d3e2d4SJin Yu 				return;
33191d3e2d4SJin Yu 			}
33291d3e2d4SJin Yu 		}
33391d3e2d4SJin Yu 	} else {
33491d3e2d4SJin Yu 		if (setup_iovs_from_descs_split(task->ctrlr, task->vq,
33591d3e2d4SJin Yu 			task->req_idx, task->iovs, &task->iovs_cnt, &payload)) {
33691d3e2d4SJin Yu 			fprintf(stderr, "Failed to setup iovs\n");
33791d3e2d4SJin Yu 			return;
33891d3e2d4SJin Yu 		}
33991d3e2d4SJin Yu 	}
340c19beb3fSJin Yu 
34191d3e2d4SJin Yu 	/* First IOV must be the req head. */
34291d3e2d4SJin Yu 	task->req = (struct virtio_blk_outhdr *)task->iovs[0].iov_base;
34391d3e2d4SJin Yu 	assert(sizeof(*task->req) == task->iovs[0].iov_len);
344c19beb3fSJin Yu 
34591d3e2d4SJin Yu 	/* Last IOV must be the status tail. */
34691d3e2d4SJin Yu 	task->status = (uint8_t *)task->iovs[task->iovs_cnt - 1].iov_base;
34791d3e2d4SJin Yu 	assert(sizeof(*task->status) == task->iovs[task->iovs_cnt - 1].iov_len);
34891d3e2d4SJin Yu 
34991d3e2d4SJin Yu 	/* Transport data len */
35091d3e2d4SJin Yu 	task->data_len = payload - task->iovs[0].iov_len -
35191d3e2d4SJin Yu 		task->iovs[task->iovs_cnt - 1].iov_len;
35291d3e2d4SJin Yu 
35391d3e2d4SJin Yu 	if (vhost_bdev_process_blk_commands(task->ctrlr->bdev, task))
35491d3e2d4SJin Yu 		/* invalid response */
35591d3e2d4SJin Yu 		*task->status = VIRTIO_BLK_S_IOERR;
35691d3e2d4SJin Yu 	else
35791d3e2d4SJin Yu 		/* successfully */
35891d3e2d4SJin Yu 		*task->status = VIRTIO_BLK_S_OK;
35991d3e2d4SJin Yu 
36091d3e2d4SJin Yu 	if (task->vq->packed_ring)
36191d3e2d4SJin Yu 		enqueue_task_packed(task);
36291d3e2d4SJin Yu 	else
36391d3e2d4SJin Yu 		enqueue_task(task);
364c19beb3fSJin Yu }
365c19beb3fSJin Yu 
366c19beb3fSJin Yu static void
36791d3e2d4SJin Yu blk_task_init(struct vhost_blk_task *task)
368c19beb3fSJin Yu {
36991d3e2d4SJin Yu 	task->iovs_cnt = 0;
37091d3e2d4SJin Yu 	task->data_len = 0;
37191d3e2d4SJin Yu 	task->req = NULL;
37291d3e2d4SJin Yu 	task->status = NULL;
373c19beb3fSJin Yu }
374c19beb3fSJin Yu 
375c19beb3fSJin Yu static void
37691d3e2d4SJin Yu submit_inflight_vq(struct vhost_blk_queue *vq)
377c19beb3fSJin Yu {
37891d3e2d4SJin Yu 	struct rte_vhost_ring_inflight *inflight_ring;
379c19beb3fSJin Yu 	struct rte_vhost_resubmit_info *resubmit_inflight;
380c19beb3fSJin Yu 	struct vhost_blk_task *task;
381c19beb3fSJin Yu 
38291d3e2d4SJin Yu 	inflight_ring = &vq->inflight_ring;
38391d3e2d4SJin Yu 	resubmit_inflight = inflight_ring->resubmit_inflight;
384c19beb3fSJin Yu 
38591d3e2d4SJin Yu 	if (resubmit_inflight == NULL ||
38691d3e2d4SJin Yu 	    resubmit_inflight->resubmit_num == 0)
38791d3e2d4SJin Yu 		return;
388c19beb3fSJin Yu 
38991d3e2d4SJin Yu 	fprintf(stdout, "Resubmit inflight num is %d\n",
39091d3e2d4SJin Yu 		resubmit_inflight->resubmit_num);
391c19beb3fSJin Yu 
392c19beb3fSJin Yu 	while (resubmit_inflight->resubmit_num-- > 0) {
39391d3e2d4SJin Yu 		uint16_t desc_idx;
394c19beb3fSJin Yu 
39591d3e2d4SJin Yu 		desc_idx = resubmit_inflight->resubmit_list[
39691d3e2d4SJin Yu 					resubmit_inflight->resubmit_num].index;
397c19beb3fSJin Yu 
39891d3e2d4SJin Yu 		if (vq->packed_ring) {
39991d3e2d4SJin Yu 			uint16_t task_idx;
40091d3e2d4SJin Yu 			struct rte_vhost_inflight_desc_packed *desc;
40191d3e2d4SJin Yu 
40291d3e2d4SJin Yu 			desc = inflight_ring->inflight_packed->desc;
40391d3e2d4SJin Yu 			task_idx = desc[desc[desc_idx].last].id;
40491d3e2d4SJin Yu 			task = &vq->tasks[task_idx];
40591d3e2d4SJin Yu 
40691d3e2d4SJin Yu 			task->req_idx = desc_idx;
40791d3e2d4SJin Yu 			task->chain_num = desc[desc_idx].num;
40891d3e2d4SJin Yu 			task->buffer_id = task_idx;
40991d3e2d4SJin Yu 			task->inflight_idx = desc_idx;
41091d3e2d4SJin Yu 
41191d3e2d4SJin Yu 			vq->last_avail_idx += desc[desc_idx].num;
41291d3e2d4SJin Yu 			if (vq->last_avail_idx >= vq->vring.size) {
41391d3e2d4SJin Yu 				vq->last_avail_idx -= vq->vring.size;
41491d3e2d4SJin Yu 				vq->avail_wrap_counter =
41591d3e2d4SJin Yu 					!vq->avail_wrap_counter;
41691d3e2d4SJin Yu 			}
41791d3e2d4SJin Yu 		} else
41891d3e2d4SJin Yu 			/* In split ring, the desc_idx is the req_id
41991d3e2d4SJin Yu 			 * which was initialized when allocated the task pool.
42091d3e2d4SJin Yu 			 */
42191d3e2d4SJin Yu 			task = &vq->tasks[desc_idx];
42291d3e2d4SJin Yu 
42391d3e2d4SJin Yu 		blk_task_init(task);
42491d3e2d4SJin Yu 		process_blk_task(task);
425c19beb3fSJin Yu 	}
426c19beb3fSJin Yu 
42791d3e2d4SJin Yu 	free(resubmit_inflight->resubmit_list);
42891d3e2d4SJin Yu 	resubmit_inflight->resubmit_list = NULL;
429c19beb3fSJin Yu }
43091d3e2d4SJin Yu 
43191d3e2d4SJin Yu /* Use the buffer_id as the task_idx */
43291d3e2d4SJin Yu static uint16_t
43391d3e2d4SJin Yu vhost_blk_vq_get_desc_chain_buffer_id(struct vhost_blk_queue *vq,
43491d3e2d4SJin Yu 				      uint16_t *req_head, uint16_t *num)
43591d3e2d4SJin Yu {
43691d3e2d4SJin Yu 	struct vring_packed_desc *desc = &vq->vring.desc_packed[
43791d3e2d4SJin Yu 						vq->last_avail_idx];
43891d3e2d4SJin Yu 
43991d3e2d4SJin Yu 	*req_head = vq->last_avail_idx;
44091d3e2d4SJin Yu 	*num = 1;
44191d3e2d4SJin Yu 
44291d3e2d4SJin Yu 	while (descriptor_has_next_packed(desc)) {
44391d3e2d4SJin Yu 		vq->last_avail_idx = (vq->last_avail_idx + 1) % vq->vring.size;
44491d3e2d4SJin Yu 		desc = &vq->vring.desc_packed[vq->last_avail_idx];
44591d3e2d4SJin Yu 		*num += 1;
44691d3e2d4SJin Yu 	}
44791d3e2d4SJin Yu 
44891d3e2d4SJin Yu 	/* Point to next desc */
44991d3e2d4SJin Yu 	vq->last_avail_idx = (vq->last_avail_idx + 1) % vq->vring.size;
45091d3e2d4SJin Yu 	if (vq->last_avail_idx < *req_head)
45191d3e2d4SJin Yu 		vq->avail_wrap_counter = !vq->avail_wrap_counter;
45291d3e2d4SJin Yu 
45391d3e2d4SJin Yu 	return desc->id;
45491d3e2d4SJin Yu }
45591d3e2d4SJin Yu 
45691d3e2d4SJin Yu static uint16_t
45791d3e2d4SJin Yu vq_get_desc_idx(struct vhost_blk_queue *vq)
45891d3e2d4SJin Yu {
45991d3e2d4SJin Yu 	uint16_t desc_idx;
46091d3e2d4SJin Yu 	uint16_t last_avail_idx;
46191d3e2d4SJin Yu 
46291d3e2d4SJin Yu 	last_avail_idx = vq->last_avail_idx & (vq->vring.size - 1);
46391d3e2d4SJin Yu 	desc_idx = vq->vring.avail->ring[last_avail_idx];
46491d3e2d4SJin Yu 	vq->last_avail_idx++;
46591d3e2d4SJin Yu 
46691d3e2d4SJin Yu 	return desc_idx;
46791d3e2d4SJin Yu }
46891d3e2d4SJin Yu 
46991d3e2d4SJin Yu static int
47091d3e2d4SJin Yu vhost_blk_vq_is_avail(struct vhost_blk_queue *vq)
47191d3e2d4SJin Yu {
47291d3e2d4SJin Yu 	if (vq->packed_ring) {
47391d3e2d4SJin Yu 		uint16_t flags = vq->vring.desc_packed[
47491d3e2d4SJin Yu 					vq->last_avail_idx].flags;
47591d3e2d4SJin Yu 		bool avail_wrap_counter = vq->avail_wrap_counter;
47691d3e2d4SJin Yu 
47791d3e2d4SJin Yu 		return (!!(flags & VIRTQ_DESC_F_AVAIL) == avail_wrap_counter &&
47891d3e2d4SJin Yu 			!!(flags & VIRTQ_DESC_F_USED) != avail_wrap_counter);
479c19beb3fSJin Yu 	} else {
48091d3e2d4SJin Yu 		if (vq->vring.avail->idx != vq->last_avail_idx)
48191d3e2d4SJin Yu 			return 1;
482c19beb3fSJin Yu 
48391d3e2d4SJin Yu 		return 0;
484c19beb3fSJin Yu 	}
485c19beb3fSJin Yu }
486c19beb3fSJin Yu 
487c19beb3fSJin Yu static void
48891d3e2d4SJin Yu process_vq(struct vhost_blk_queue *vq)
489c19beb3fSJin Yu {
490c19beb3fSJin Yu 	struct vhost_blk_task *task;
491c19beb3fSJin Yu 
49291d3e2d4SJin Yu 	if (vq->packed_ring) {
49391d3e2d4SJin Yu 		while (vhost_blk_vq_is_avail(vq)) {
49491d3e2d4SJin Yu 			uint16_t task_idx, req_idx, last_idx, chain_num;
495c19beb3fSJin Yu 
49691d3e2d4SJin Yu 			task_idx = vhost_blk_vq_get_desc_chain_buffer_id(vq,
49791d3e2d4SJin Yu 					&req_idx, &chain_num);
49891d3e2d4SJin Yu 			task = &vq->tasks[task_idx];
499c19beb3fSJin Yu 
50091d3e2d4SJin Yu 			blk_task_init(task);
501c19beb3fSJin Yu 			task->req_idx = req_idx;
50291d3e2d4SJin Yu 			task->chain_num = chain_num;
50391d3e2d4SJin Yu 			task->buffer_id = task_idx;
50491d3e2d4SJin Yu 			last_idx = (req_idx + chain_num - 1) % vq->vring.size;
505c19beb3fSJin Yu 
50691d3e2d4SJin Yu 			rte_vhost_set_inflight_desc_packed(task->ctrlr->vid,
50791d3e2d4SJin Yu 							   vq->id,
50891d3e2d4SJin Yu 							   task->req_idx,
50991d3e2d4SJin Yu 							   last_idx,
51091d3e2d4SJin Yu 							   &task->inflight_idx);
51191d3e2d4SJin Yu 
51291d3e2d4SJin Yu 			process_blk_task(task);
51391d3e2d4SJin Yu 		}
51491d3e2d4SJin Yu 	} else {
51591d3e2d4SJin Yu 		while (vhost_blk_vq_is_avail(vq)) {
51691d3e2d4SJin Yu 			uint16_t desc_idx;
51791d3e2d4SJin Yu 
51891d3e2d4SJin Yu 			desc_idx = vq_get_desc_idx(vq);
51991d3e2d4SJin Yu 			task = &vq->tasks[desc_idx];
52091d3e2d4SJin Yu 
52191d3e2d4SJin Yu 			blk_task_init(task);
52291d3e2d4SJin Yu 			rte_vhost_set_inflight_desc_split(task->ctrlr->vid,
52391d3e2d4SJin Yu 							  vq->id,
524c19beb3fSJin Yu 							  task->req_idx);
52591d3e2d4SJin Yu 			process_blk_task(task);
526c19beb3fSJin Yu 		}
527c19beb3fSJin Yu 	}
528c19beb3fSJin Yu }
529c19beb3fSJin Yu 
5306b6678a6SThomas Monjalon static uint32_t
531c19beb3fSJin Yu ctrlr_worker(void *arg)
532c19beb3fSJin Yu {
533c19beb3fSJin Yu 	struct vhost_blk_ctrlr *ctrlr = (struct vhost_blk_ctrlr *)arg;
534c19beb3fSJin Yu 	int i;
535c19beb3fSJin Yu 
536c19beb3fSJin Yu 	fprintf(stdout, "Ctrlr Worker Thread start\n");
537c19beb3fSJin Yu 
538c19beb3fSJin Yu 	if (ctrlr == NULL || ctrlr->bdev == NULL) {
539c19beb3fSJin Yu 		fprintf(stderr,
540c19beb3fSJin Yu 			"%s: Error, invalid argument passed to worker thread\n",
541c19beb3fSJin Yu 			__func__);
542c19beb3fSJin Yu 		exit(0);
543c19beb3fSJin Yu 	}
544c19beb3fSJin Yu 
54591d3e2d4SJin Yu 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
54691d3e2d4SJin Yu 		submit_inflight_vq(&ctrlr->queues[i]);
547c19beb3fSJin Yu 
54891d3e2d4SJin Yu 	while (worker_thread_status != WORKER_STATE_STOP)
54991d3e2d4SJin Yu 		for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
55091d3e2d4SJin Yu 			process_vq(&ctrlr->queues[i]);
551c19beb3fSJin Yu 
552c19beb3fSJin Yu 	fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
553c19beb3fSJin Yu 	sem_post(&exit_sem);
5546b6678a6SThomas Monjalon 	return 0;
555c19beb3fSJin Yu }
556c19beb3fSJin Yu 
557c19beb3fSJin Yu static int
55891d3e2d4SJin Yu alloc_task_pool(struct vhost_blk_ctrlr *ctrlr)
55991d3e2d4SJin Yu {
56091d3e2d4SJin Yu 	struct vhost_blk_queue *vq;
56191d3e2d4SJin Yu 	int i, j;
56291d3e2d4SJin Yu 
56391d3e2d4SJin Yu 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
56491d3e2d4SJin Yu 		vq = &ctrlr->queues[i];
56591d3e2d4SJin Yu 
56691d3e2d4SJin Yu 		vq->tasks = rte_zmalloc(NULL,
56791d3e2d4SJin Yu 			sizeof(struct vhost_blk_task) * vq->vring.size, 0);
56891d3e2d4SJin Yu 		if (!vq->tasks) {
56991d3e2d4SJin Yu 			fprintf(stderr, "Failed to allocate task memory\n");
57091d3e2d4SJin Yu 			return -1;
57191d3e2d4SJin Yu 		}
57291d3e2d4SJin Yu 
57391d3e2d4SJin Yu 		for (j = 0; j < vq->vring.size; j++) {
57491d3e2d4SJin Yu 			vq->tasks[j].req_idx = j;
57591d3e2d4SJin Yu 			vq->tasks[j].ctrlr = ctrlr;
57691d3e2d4SJin Yu 			vq->tasks[j].vq = vq;
57791d3e2d4SJin Yu 		}
57891d3e2d4SJin Yu 	}
57991d3e2d4SJin Yu 
58091d3e2d4SJin Yu 	return 0;
58191d3e2d4SJin Yu }
58291d3e2d4SJin Yu 
58391d3e2d4SJin Yu static void
58491d3e2d4SJin Yu free_task_pool(struct vhost_blk_ctrlr *ctrlr)
58591d3e2d4SJin Yu {
58691d3e2d4SJin Yu 	int i;
58791d3e2d4SJin Yu 
58891d3e2d4SJin Yu 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
58991d3e2d4SJin Yu 		rte_free(ctrlr->queues[i].tasks);
59091d3e2d4SJin Yu }
59191d3e2d4SJin Yu 
59291d3e2d4SJin Yu static int
593c19beb3fSJin Yu new_device(int vid)
594c19beb3fSJin Yu {
595c19beb3fSJin Yu 	struct vhost_blk_ctrlr *ctrlr;
59691d3e2d4SJin Yu 	struct vhost_blk_queue *vq;
59791d3e2d4SJin Yu 	char path[PATH_MAX];
598510f43fcSKeiichi Watanabe 	uint64_t features, protocol_features;
5996b6678a6SThomas Monjalon 	rte_thread_t tid;
600c19beb3fSJin Yu 	int i, ret;
601510f43fcSKeiichi Watanabe 	bool packed_ring, inflight_shmfd;
602c19beb3fSJin Yu 
60391d3e2d4SJin Yu 	ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
60491d3e2d4SJin Yu 	if (ret) {
60591d3e2d4SJin Yu 		fprintf(stderr, "Failed to get the socket path\n");
60691d3e2d4SJin Yu 		return -1;
60791d3e2d4SJin Yu 	}
60891d3e2d4SJin Yu 
60991d3e2d4SJin Yu 	ctrlr = vhost_blk_ctrlr_find(path);
610c19beb3fSJin Yu 	if (!ctrlr) {
61191d3e2d4SJin Yu 		fprintf(stderr, "Failed to find controller\n");
612c19beb3fSJin Yu 		return -1;
613c19beb3fSJin Yu 	}
614c19beb3fSJin Yu 
615c19beb3fSJin Yu 	if (ctrlr->started)
616c19beb3fSJin Yu 		return 0;
617c19beb3fSJin Yu 
61891d3e2d4SJin Yu 	ctrlr->vid = vid;
619c19beb3fSJin Yu 	ret = rte_vhost_get_negotiated_features(vid, &features);
620c19beb3fSJin Yu 	if (ret) {
62191d3e2d4SJin Yu 		fprintf(stderr, "Failed to get the negotiated features\n");
622c19beb3fSJin Yu 		return -1;
623c19beb3fSJin Yu 	}
62491d3e2d4SJin Yu 	packed_ring = !!(features & (1ULL << VIRTIO_F_RING_PACKED));
625c19beb3fSJin Yu 
626510f43fcSKeiichi Watanabe 	ret = rte_vhost_get_negotiated_protocol_features(
627510f43fcSKeiichi Watanabe 		vid, &protocol_features);
628510f43fcSKeiichi Watanabe 	if (ret) {
629510f43fcSKeiichi Watanabe 		fprintf(stderr,
630510f43fcSKeiichi Watanabe 			"Failed to get the negotiated protocol features\n");
631510f43fcSKeiichi Watanabe 		return -1;
632510f43fcSKeiichi Watanabe 	}
633510f43fcSKeiichi Watanabe 	inflight_shmfd = !!(features &
634510f43fcSKeiichi Watanabe 			    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD));
635510f43fcSKeiichi Watanabe 
636c19beb3fSJin Yu 	/* Disable Notifications and init last idx */
637c19beb3fSJin Yu 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
63891d3e2d4SJin Yu 		vq = &ctrlr->queues[i];
63991d3e2d4SJin Yu 		vq->id = i;
640c19beb3fSJin Yu 
64191d3e2d4SJin Yu 		assert(rte_vhost_get_vhost_vring(ctrlr->vid, i,
64291d3e2d4SJin Yu 						 &vq->vring) == 0);
64391d3e2d4SJin Yu 		assert(rte_vhost_get_vring_base(ctrlr->vid, i,
64491d3e2d4SJin Yu 					       &vq->last_avail_idx,
64591d3e2d4SJin Yu 					       &vq->last_used_idx) == 0);
646510f43fcSKeiichi Watanabe 
647510f43fcSKeiichi Watanabe 		if (inflight_shmfd)
648510f43fcSKeiichi Watanabe 			assert(rte_vhost_get_vhost_ring_inflight(
649510f43fcSKeiichi Watanabe 				       ctrlr->vid, i,
65091d3e2d4SJin Yu 				       &vq->inflight_ring) == 0);
651c19beb3fSJin Yu 
652510f43fcSKeiichi Watanabe 		if (packed_ring && inflight_shmfd) {
653c19beb3fSJin Yu 			/* for the reconnection */
65491d3e2d4SJin Yu 			assert(rte_vhost_get_vring_base_from_inflight(
65591d3e2d4SJin Yu 				ctrlr->vid, i,
65691d3e2d4SJin Yu 				&vq->last_avail_idx,
65791d3e2d4SJin Yu 				&vq->last_used_idx) == 0);
658c19beb3fSJin Yu 
65991d3e2d4SJin Yu 			vq->avail_wrap_counter = vq->last_avail_idx &
660c19beb3fSJin Yu 				(1 << 15);
66191d3e2d4SJin Yu 			vq->last_avail_idx = vq->last_avail_idx &
662c19beb3fSJin Yu 				0x7fff;
66391d3e2d4SJin Yu 			vq->used_wrap_counter = vq->last_used_idx &
664c19beb3fSJin Yu 				(1 << 15);
66591d3e2d4SJin Yu 			vq->last_used_idx = vq->last_used_idx &
666c19beb3fSJin Yu 				0x7fff;
667c19beb3fSJin Yu 		}
668c19beb3fSJin Yu 
66991d3e2d4SJin Yu 		vq->packed_ring = packed_ring;
670c19beb3fSJin Yu 		rte_vhost_enable_guest_notification(vid, i, 0);
671c19beb3fSJin Yu 	}
672c19beb3fSJin Yu 
67391d3e2d4SJin Yu 	assert(rte_vhost_get_mem_table(vid, &ctrlr->mem) == 0);
67491d3e2d4SJin Yu 	assert(ctrlr->mem != NULL);
67591d3e2d4SJin Yu 	assert(alloc_task_pool(ctrlr) == 0);
67691d3e2d4SJin Yu 
677c19beb3fSJin Yu 	/* start polling vring */
67891d3e2d4SJin Yu 	worker_thread_status = WORKER_STATE_START;
67991d3e2d4SJin Yu 	fprintf(stdout, "New Device %s, Device ID %d\n", path, vid);
6806b6678a6SThomas Monjalon 	if (rte_thread_create_control(&tid, "dpdk-vhost-blk",
681e5fb1a96SChengwen Feng 			&ctrlr_worker, ctrlr) != 0) {
682c19beb3fSJin Yu 		fprintf(stderr, "Worker Thread Started Failed\n");
683c19beb3fSJin Yu 		return -1;
684c19beb3fSJin Yu 	}
685c19beb3fSJin Yu 
686c19beb3fSJin Yu 	/* device has been started */
687c19beb3fSJin Yu 	ctrlr->started = 1;
6886b6678a6SThomas Monjalon 	rte_thread_detach(tid);
689c19beb3fSJin Yu 	return 0;
690c19beb3fSJin Yu }
691c19beb3fSJin Yu 
692c19beb3fSJin Yu static void
693c19beb3fSJin Yu destroy_device(int vid)
694c19beb3fSJin Yu {
695c19beb3fSJin Yu 	char path[PATH_MAX];
696c19beb3fSJin Yu 	struct vhost_blk_ctrlr *ctrlr;
69791d3e2d4SJin Yu 	struct vhost_blk_queue *vq;
698c19beb3fSJin Yu 	int i, ret;
699c19beb3fSJin Yu 
700c19beb3fSJin Yu 	ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
701c19beb3fSJin Yu 	if (ret) {
702c19beb3fSJin Yu 		fprintf(stderr, "Destroy Ctrlr Failed\n");
703c19beb3fSJin Yu 		return;
704c19beb3fSJin Yu 	}
705c19beb3fSJin Yu 
706c19beb3fSJin Yu 	fprintf(stdout, "Destroy %s Device ID %d\n", path, vid);
707c19beb3fSJin Yu 	ctrlr = vhost_blk_ctrlr_find(path);
708c19beb3fSJin Yu 	if (!ctrlr) {
709c19beb3fSJin Yu 		fprintf(stderr, "Destroy Ctrlr Failed\n");
710c19beb3fSJin Yu 		return;
711c19beb3fSJin Yu 	}
712c19beb3fSJin Yu 
713c19beb3fSJin Yu 	if (!ctrlr->started)
714c19beb3fSJin Yu 		return;
715c19beb3fSJin Yu 
71691d3e2d4SJin Yu 	worker_thread_status = WORKER_STATE_STOP;
71791d3e2d4SJin Yu 	sem_wait(&exit_sem);
718c19beb3fSJin Yu 
719c19beb3fSJin Yu 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
72091d3e2d4SJin Yu 		vq = &ctrlr->queues[i];
72191d3e2d4SJin Yu 		if (vq->packed_ring) {
72291d3e2d4SJin Yu 			vq->last_avail_idx |= (vq->avail_wrap_counter <<
723c19beb3fSJin Yu 				15);
72491d3e2d4SJin Yu 			vq->last_used_idx |= (vq->used_wrap_counter <<
725c19beb3fSJin Yu 				15);
726c19beb3fSJin Yu 		}
727c19beb3fSJin Yu 
72891d3e2d4SJin Yu 		rte_vhost_set_vring_base(ctrlr->vid, i,
72991d3e2d4SJin Yu 					 vq->last_avail_idx,
73091d3e2d4SJin Yu 					 vq->last_used_idx);
73191d3e2d4SJin Yu 	}
73291d3e2d4SJin Yu 
73391d3e2d4SJin Yu 	free_task_pool(ctrlr);
734c19beb3fSJin Yu 	free(ctrlr->mem);
735c19beb3fSJin Yu 
736c19beb3fSJin Yu 	ctrlr->started = 0;
737c19beb3fSJin Yu }
738c19beb3fSJin Yu 
739c19beb3fSJin Yu static int
740c19beb3fSJin Yu new_connection(int vid)
741c19beb3fSJin Yu {
742c19beb3fSJin Yu 	/* extend the proper features for block device */
743c19beb3fSJin Yu 	vhost_session_install_rte_compat_hooks(vid);
744c19beb3fSJin Yu 
745c19beb3fSJin Yu 	return 0;
746c19beb3fSJin Yu }
747c19beb3fSJin Yu 
748ab4bb424SMaxime Coquelin struct rte_vhost_device_ops vhost_blk_device_ops = {
749c19beb3fSJin Yu 	.new_device =  new_device,
750c19beb3fSJin Yu 	.destroy_device = destroy_device,
751c19beb3fSJin Yu 	.new_connection = new_connection,
752c19beb3fSJin Yu };
753c19beb3fSJin Yu 
754c19beb3fSJin Yu static struct vhost_block_dev *
755c19beb3fSJin Yu vhost_blk_bdev_construct(const char *bdev_name,
756c19beb3fSJin Yu 	const char *bdev_serial, uint32_t blk_size, uint64_t blk_cnt,
757c19beb3fSJin Yu 	bool wce_enable)
758c19beb3fSJin Yu {
759c19beb3fSJin Yu 	struct vhost_block_dev *bdev;
760c19beb3fSJin Yu 
761c19beb3fSJin Yu 	bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE);
762c19beb3fSJin Yu 	if (!bdev)
763c19beb3fSJin Yu 		return NULL;
764c19beb3fSJin Yu 
765e2b4cfd6SJin Yu 	snprintf(bdev->name, sizeof(bdev->name), "%s", bdev_name);
766e2b4cfd6SJin Yu 	snprintf(bdev->product_name, sizeof(bdev->product_name), "%s",
767e2b4cfd6SJin Yu 		 bdev_serial);
768c19beb3fSJin Yu 	bdev->blocklen = blk_size;
769c19beb3fSJin Yu 	bdev->blockcnt = blk_cnt;
770c19beb3fSJin Yu 	bdev->write_cache = wce_enable;
771c19beb3fSJin Yu 
77291d3e2d4SJin Yu 	fprintf(stdout, "Blocklen=%d, blockcnt=%"PRIx64"\n", bdev->blocklen,
773c19beb3fSJin Yu 		bdev->blockcnt);
774c19beb3fSJin Yu 
775c19beb3fSJin Yu 	/* use memory as disk storage space */
776c19beb3fSJin Yu 	bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
777c19beb3fSJin Yu 	if (!bdev->data) {
77891d3e2d4SJin Yu 		fprintf(stderr, "No enough reserved huge memory for disk\n");
779*ae67f7d0SStephen Hemminger 		rte_free(bdev);
780c19beb3fSJin Yu 		return NULL;
781c19beb3fSJin Yu 	}
782c19beb3fSJin Yu 
783c19beb3fSJin Yu 	return bdev;
784c19beb3fSJin Yu }
785c19beb3fSJin Yu 
786c19beb3fSJin Yu static struct vhost_blk_ctrlr *
787c19beb3fSJin Yu vhost_blk_ctrlr_construct(const char *ctrlr_name)
788c19beb3fSJin Yu {
789c19beb3fSJin Yu 	int ret;
790c19beb3fSJin Yu 	struct vhost_blk_ctrlr *ctrlr;
791c19beb3fSJin Yu 	char *path;
792c19beb3fSJin Yu 	char cwd[PATH_MAX];
793c19beb3fSJin Yu 
794c19beb3fSJin Yu 	/* always use current directory */
795c19beb3fSJin Yu 	path = getcwd(cwd, PATH_MAX);
796c19beb3fSJin Yu 	if (!path) {
797c19beb3fSJin Yu 		fprintf(stderr, "Cannot get current working directory\n");
798c19beb3fSJin Yu 		return NULL;
799c19beb3fSJin Yu 	}
800c19beb3fSJin Yu 	snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name);
801c19beb3fSJin Yu 
80271dd2870SJin Yu 	unlink(dev_pathname);
803c19beb3fSJin Yu 
804c19beb3fSJin Yu 	if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
80591d3e2d4SJin Yu 		fprintf(stderr, "Socket %s already exists\n", dev_pathname);
806c19beb3fSJin Yu 		return NULL;
807c19beb3fSJin Yu 	}
808c19beb3fSJin Yu 
809c19beb3fSJin Yu 	ret = rte_vhost_driver_set_features(dev_pathname, VHOST_BLK_FEATURES);
810c19beb3fSJin Yu 	if (ret != 0) {
811c19beb3fSJin Yu 		fprintf(stderr, "Set vhost driver features failed\n");
812c19beb3fSJin Yu 		rte_vhost_driver_unregister(dev_pathname);
813c19beb3fSJin Yu 		return NULL;
814c19beb3fSJin Yu 	}
815c19beb3fSJin Yu 
81691d3e2d4SJin Yu 	/* set vhost user protocol features */
817c19beb3fSJin Yu 	vhost_dev_install_rte_compat_hooks(dev_pathname);
818c19beb3fSJin Yu 
819c19beb3fSJin Yu 	ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
820c19beb3fSJin Yu 	if (!ctrlr) {
821c19beb3fSJin Yu 		rte_vhost_driver_unregister(dev_pathname);
822c19beb3fSJin Yu 		return NULL;
823c19beb3fSJin Yu 	}
824c19beb3fSJin Yu 
825c19beb3fSJin Yu 	/* hardcoded block device information with 128MiB */
826c19beb3fSJin Yu 	ctrlr->bdev = vhost_blk_bdev_construct("malloc0", "vhost_blk_malloc0",
827c19beb3fSJin Yu 						4096, 32768, 0);
828c19beb3fSJin Yu 	if (!ctrlr->bdev) {
829c19beb3fSJin Yu 		rte_free(ctrlr);
830c19beb3fSJin Yu 		rte_vhost_driver_unregister(dev_pathname);
831c19beb3fSJin Yu 		return NULL;
832c19beb3fSJin Yu 	}
833c19beb3fSJin Yu 
834c19beb3fSJin Yu 	rte_vhost_driver_callback_register(dev_pathname,
835c19beb3fSJin Yu 					   &vhost_blk_device_ops);
836c19beb3fSJin Yu 
837c19beb3fSJin Yu 	return ctrlr;
838c19beb3fSJin Yu }
839c19beb3fSJin Yu 
840c19beb3fSJin Yu static void
84191d3e2d4SJin Yu vhost_blk_ctrlr_destroy(struct vhost_blk_ctrlr *ctrlr)
84291d3e2d4SJin Yu {
84391d3e2d4SJin Yu 	if (ctrlr->bdev != NULL) {
84491d3e2d4SJin Yu 		rte_free(ctrlr->bdev->data);
84591d3e2d4SJin Yu 
84691d3e2d4SJin Yu 		rte_free(ctrlr->bdev);
84791d3e2d4SJin Yu 	}
84891d3e2d4SJin Yu 	rte_free(ctrlr);
84991d3e2d4SJin Yu 
85091d3e2d4SJin Yu 	rte_vhost_driver_unregister(dev_pathname);
85191d3e2d4SJin Yu }
85291d3e2d4SJin Yu 
85391d3e2d4SJin Yu static void
854c19beb3fSJin Yu signal_handler(__rte_unused int signum)
855c19beb3fSJin Yu {
856c19beb3fSJin Yu 	struct vhost_blk_ctrlr *ctrlr;
857c19beb3fSJin Yu 
858c19beb3fSJin Yu 	ctrlr = vhost_blk_ctrlr_find(dev_pathname);
85991d3e2d4SJin Yu 	if (ctrlr == NULL)
86091d3e2d4SJin Yu 		return;
861c19beb3fSJin Yu 
86291d3e2d4SJin Yu 	if (ctrlr->started)
86391d3e2d4SJin Yu 		destroy_device(ctrlr->vid);
86491d3e2d4SJin Yu 
86591d3e2d4SJin Yu 	vhost_blk_ctrlr_destroy(ctrlr);
866c19beb3fSJin Yu 	exit(0);
867c19beb3fSJin Yu }
868c19beb3fSJin Yu 
869c19beb3fSJin Yu int main(int argc, char *argv[])
870c19beb3fSJin Yu {
871c19beb3fSJin Yu 	int ret;
872c19beb3fSJin Yu 
873c19beb3fSJin Yu 	/* init EAL */
874c19beb3fSJin Yu 	ret = rte_eal_init(argc, argv);
875c19beb3fSJin Yu 	if (ret < 0)
876c19beb3fSJin Yu 		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
877c19beb3fSJin Yu 
87891d3e2d4SJin Yu 	g_vhost_ctrlr = vhost_blk_ctrlr_construct(CTRLR_NAME);
879c19beb3fSJin Yu 	if (g_vhost_ctrlr == NULL) {
880c19beb3fSJin Yu 		fprintf(stderr, "Construct vhost blk controller failed\n");
881c19beb3fSJin Yu 		return 0;
882c19beb3fSJin Yu 	}
883c19beb3fSJin Yu 
884c19beb3fSJin Yu 	if (sem_init(&exit_sem, 0, 0) < 0) {
885c19beb3fSJin Yu 		fprintf(stderr, "Error init exit_sem\n");
886c19beb3fSJin Yu 		return -1;
887c19beb3fSJin Yu 	}
888c19beb3fSJin Yu 
88991d3e2d4SJin Yu 	signal(SIGINT, signal_handler);
89091d3e2d4SJin Yu 
891dc9e6580SYunjian Wang 	ret = rte_vhost_driver_start(dev_pathname);
892dc9e6580SYunjian Wang 	if (ret < 0) {
893dc9e6580SYunjian Wang 		fprintf(stderr, "Failed to start vhost driver.\n");
894dc9e6580SYunjian Wang 		return -1;
895dc9e6580SYunjian Wang 	}
896c19beb3fSJin Yu 
897c19beb3fSJin Yu 	/* loop for exit the application */
898c19beb3fSJin Yu 	while (1)
899c19beb3fSJin Yu 		sleep(1);
900c19beb3fSJin Yu 
90110aa3757SChengchang Tang 	/* clean up the EAL */
90210aa3757SChengchang Tang 	rte_eal_cleanup();
90310aa3757SChengchang Tang 
904c19beb3fSJin Yu 	return 0;
905c19beb3fSJin Yu }
906