xref: /dpdk/examples/vhost_blk/vhost_blk.c (revision ae67f7d0256687fdfb24d27ee94b20d88c65108e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2019 Intel Corporation
3  */
4 
5 #ifndef _GNU_SOURCE
6 #define _GNU_SOURCE
7 #endif
8 
9 #include <stdint.h>
10 #include <stdlib.h>
11 #include <unistd.h>
12 #include <stdbool.h>
13 #include <signal.h>
14 #include <assert.h>
15 #include <semaphore.h>
16 #include <linux/virtio_blk.h>
17 #include <linux/virtio_ring.h>
18 
19 #include <rte_atomic.h>
20 #include <rte_cycles.h>
21 #include <rte_log.h>
22 #include <rte_malloc.h>
23 #include <rte_vhost.h>
24 
25 #include "vhost_blk.h"
26 #include "blk_spec.h"
27 
28 #define VIRTQ_DESC_F_NEXT	1
29 #define VIRTQ_DESC_F_AVAIL	(1 << 7)
30 #define VIRTQ_DESC_F_USED	(1 << 15)
31 
32 #define MAX_TASK		12
33 
34 #define VHOST_BLK_FEATURES ((1ULL << VIRTIO_F_RING_PACKED) | \
35 			    (1ULL << VIRTIO_F_VERSION_1) |\
36 			    (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
37 			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))
38 #define CTRLR_NAME		"vhost.socket"
39 
40 enum CTRLR_WORKER_STATUS {
41 	WORKER_STATE_START = 0,
42 	WORKER_STATE_STOP,
43 };
44 
45 struct vhost_blk_ctrlr *g_vhost_ctrlr;
46 
47 /* Path to folder where character device will be created. Can be set by user. */
48 static char dev_pathname[PATH_MAX] = "";
49 static sem_t exit_sem;
50 static enum CTRLR_WORKER_STATUS worker_thread_status;
51 
52 struct vhost_blk_ctrlr *
53 vhost_blk_ctrlr_find(const char *ctrlr_name)
54 {
55 	if (ctrlr_name == NULL)
56 		return NULL;
57 
58 	/* currently we only support 1 socket file fd */
59 	return g_vhost_ctrlr;
60 }
61 
62 static uint64_t
63 gpa_to_vva(struct vhost_blk_ctrlr *ctrlr, uint64_t gpa, uint64_t *len)
64 {
65 	assert(ctrlr->mem != NULL);
66 
67 	return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len);
68 }
69 
70 static void
71 enqueue_task(struct vhost_blk_task *task)
72 {
73 	struct vhost_blk_queue *vq = task->vq;
74 	struct vring_used *used = vq->vring.used;
75 
76 	rte_vhost_set_last_inflight_io_split(task->ctrlr->vid,
77 		vq->id, task->req_idx);
78 
79 	/* Fill out the next entry in the "used" ring.  id = the
80 	 * index of the descriptor that contained the blk request.
81 	 * len = the total amount of data transferred for the blk
82 	 * request. We must report the correct len, for variable
83 	 * length blk CDBs, where we may return less data than
84 	 * allocated by the guest VM.
85 	 */
86 	used->ring[used->idx & (vq->vring.size - 1)].id = task->req_idx;
87 	used->ring[used->idx & (vq->vring.size - 1)].len = task->data_len;
88 	rte_atomic_thread_fence(rte_memory_order_seq_cst);
89 	used->idx++;
90 	rte_atomic_thread_fence(rte_memory_order_seq_cst);
91 
92 	rte_vhost_clr_inflight_desc_split(task->ctrlr->vid,
93 		vq->id, used->idx, task->req_idx);
94 
95 	/* Send an interrupt back to the guest VM so that it knows
96 	 * a completion is ready to be processed.
97 	 */
98 	rte_vhost_vring_call(task->ctrlr->vid, vq->id);
99 }
100 
101 static void
102 enqueue_task_packed(struct vhost_blk_task *task)
103 {
104 	struct vhost_blk_queue *vq = task->vq;
105 	struct vring_packed_desc *desc;
106 
107 	rte_vhost_set_last_inflight_io_packed(task->ctrlr->vid, vq->id,
108 					    task->inflight_idx);
109 
110 	desc = &vq->vring.desc_packed[vq->last_used_idx];
111 	desc->id = task->buffer_id;
112 	desc->addr = 0;
113 
114 	rte_atomic_thread_fence(rte_memory_order_seq_cst);
115 	if (vq->used_wrap_counter)
116 		desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED;
117 	else
118 		desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
119 	rte_atomic_thread_fence(rte_memory_order_seq_cst);
120 
121 	rte_vhost_clr_inflight_desc_packed(task->ctrlr->vid, vq->id,
122 					   task->inflight_idx);
123 
124 	vq->last_used_idx += task->chain_num;
125 	if (vq->last_used_idx >= vq->vring.size) {
126 		vq->last_used_idx -= vq->vring.size;
127 		vq->used_wrap_counter = !vq->used_wrap_counter;
128 	}
129 
130 	/* Send an interrupt back to the guest VM so that it knows
131 	 * a completion is ready to be processed.
132 	 */
133 	rte_vhost_vring_call(task->ctrlr->vid, vq->id);
134 }
135 
136 static bool
137 descriptor_has_next_packed(struct vring_packed_desc *cur_desc)
138 {
139 	return !!(cur_desc->flags & VRING_DESC_F_NEXT);
140 }
141 
142 static bool
143 descriptor_has_next_split(struct vring_desc *cur_desc)
144 {
145 	return !!(cur_desc->flags & VRING_DESC_F_NEXT);
146 }
147 
148 static int
149 desc_payload_to_iovs(struct vhost_blk_ctrlr *ctrlr, struct iovec *iovs,
150 		     uint32_t *iov_index, uintptr_t payload, uint64_t remaining)
151 {
152 	void *vva;
153 	uint64_t len;
154 
155 	do {
156 		if (*iov_index >= VHOST_BLK_MAX_IOVS) {
157 			fprintf(stderr, "VHOST_BLK_MAX_IOVS reached\n");
158 			return -1;
159 		}
160 		len = remaining;
161 		vva = (void *)(uintptr_t)gpa_to_vva(ctrlr,
162 				 payload, &len);
163 		if (!vva || !len) {
164 			fprintf(stderr, "failed to translate desc address.\n");
165 			return -1;
166 		}
167 
168 		iovs[*iov_index].iov_base = vva;
169 		iovs[*iov_index].iov_len = len;
170 		payload += len;
171 		remaining -= len;
172 		(*iov_index)++;
173 	} while (remaining);
174 
175 	return 0;
176 }
177 
178 static struct vring_desc *
179 vring_get_next_desc(struct vhost_blk_queue *vq, struct vring_desc *desc)
180 {
181 	if (descriptor_has_next_split(desc))
182 		return &vq->vring.desc[desc->next];
183 
184 	return NULL;
185 }
186 
187 static struct vring_packed_desc *
188 vring_get_next_desc_packed(struct vhost_blk_queue *vq, uint16_t *req_idx)
189 {
190 	if (descriptor_has_next_packed(&vq->vring.desc_packed[*req_idx])) {
191 		*req_idx = (*req_idx + 1) % vq->vring.size;
192 		return &vq->vring.desc_packed[*req_idx];
193 	}
194 
195 	return NULL;
196 }
197 
198 static struct rte_vhost_inflight_desc_packed *
199 vring_get_next_inflight_desc(struct vhost_blk_queue *vq,
200 			struct rte_vhost_inflight_desc_packed *desc)
201 {
202 	if (!!(desc->flags & VRING_DESC_F_NEXT))
203 		return &vq->inflight_ring.inflight_packed->desc[desc->next];
204 
205 	return NULL;
206 }
207 
208 static int
209 setup_iovs_from_descs_split(struct vhost_blk_ctrlr *ctrlr,
210 			    struct vhost_blk_queue *vq, uint16_t req_idx,
211 			    struct iovec *iovs, uint32_t *iovs_idx,
212 			    uint32_t *payload)
213 {
214 	struct vring_desc *desc = &vq->vring.desc[req_idx];
215 
216 	do {
217 		/* does not support indirect descriptors */
218 		assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
219 
220 		if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
221 			fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
222 			return -1;
223 		}
224 
225 		if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
226 			desc->addr, desc->len) != 0) {
227 			fprintf(stderr, "Failed to convert desc payload to iovs\n");
228 			return -1;
229 		}
230 
231 		*payload += desc->len;
232 
233 		desc = vring_get_next_desc(vq, desc);
234 	} while (desc != NULL);
235 
236 	return 0;
237 }
238 
239 static int
240 setup_iovs_from_descs_packed(struct vhost_blk_ctrlr *ctrlr,
241 			     struct vhost_blk_queue *vq, uint16_t req_idx,
242 			     struct iovec *iovs, uint32_t *iovs_idx,
243 			     uint32_t *payload)
244 {
245 	struct vring_packed_desc *desc = &vq->vring.desc_packed[req_idx];
246 
247 	do {
248 		/* does not support indirect descriptors */
249 		assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
250 
251 		if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
252 			fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
253 			return -1;
254 		}
255 
256 		if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
257 			desc->addr, desc->len) != 0) {
258 			fprintf(stderr, "Failed to convert desc payload to iovs\n");
259 			return -1;
260 		}
261 
262 		*payload += desc->len;
263 
264 		desc = vring_get_next_desc_packed(vq, &req_idx);
265 	} while (desc != NULL);
266 
267 	return 0;
268 }
269 
270 static int
271 setup_iovs_from_inflight_desc(struct vhost_blk_ctrlr *ctrlr,
272 			      struct vhost_blk_queue *vq, uint16_t req_idx,
273 			      struct iovec *iovs, uint32_t *iovs_idx,
274 			      uint32_t *payload)
275 {
276 	struct rte_vhost_ring_inflight *inflight_vq;
277 	struct rte_vhost_inflight_desc_packed *desc;
278 
279 	inflight_vq = &vq->inflight_ring;
280 	desc = &inflight_vq->inflight_packed->desc[req_idx];
281 
282 	do {
283 		/* does not support indirect descriptors */
284 		assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
285 
286 		if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
287 			fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
288 			return -1;
289 		}
290 
291 		if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
292 			desc->addr, desc->len) != 0) {
293 			fprintf(stderr, "Failed to convert desc payload to iovs\n");
294 			return -1;
295 		}
296 
297 		*payload += desc->len;
298 
299 		desc = vring_get_next_inflight_desc(vq, desc);
300 	} while (desc != NULL);
301 
302 	return 0;
303 }
304 
305 static void
306 process_blk_task(struct vhost_blk_task *task)
307 {
308 	uint32_t payload = 0;
309 
310 	if (task->vq->packed_ring) {
311 		struct rte_vhost_ring_inflight *inflight_ring;
312 		struct rte_vhost_resubmit_info *resubmit_inflight;
313 
314 		inflight_ring = &task->vq->inflight_ring;
315 		resubmit_inflight = inflight_ring->resubmit_inflight;
316 
317 		if (resubmit_inflight != NULL &&
318 		    resubmit_inflight->resubmit_list != NULL) {
319 			if (setup_iovs_from_inflight_desc(task->ctrlr, task->vq,
320 				task->req_idx, task->iovs, &task->iovs_cnt,
321 				&payload)) {
322 				fprintf(stderr, "Failed to setup iovs\n");
323 				return;
324 			}
325 		} else {
326 			if (setup_iovs_from_descs_packed(task->ctrlr, task->vq,
327 				task->req_idx, task->iovs, &task->iovs_cnt,
328 				&payload)) {
329 				fprintf(stderr, "Failed to setup iovs\n");
330 				return;
331 			}
332 		}
333 	} else {
334 		if (setup_iovs_from_descs_split(task->ctrlr, task->vq,
335 			task->req_idx, task->iovs, &task->iovs_cnt, &payload)) {
336 			fprintf(stderr, "Failed to setup iovs\n");
337 			return;
338 		}
339 	}
340 
341 	/* First IOV must be the req head. */
342 	task->req = (struct virtio_blk_outhdr *)task->iovs[0].iov_base;
343 	assert(sizeof(*task->req) == task->iovs[0].iov_len);
344 
345 	/* Last IOV must be the status tail. */
346 	task->status = (uint8_t *)task->iovs[task->iovs_cnt - 1].iov_base;
347 	assert(sizeof(*task->status) == task->iovs[task->iovs_cnt - 1].iov_len);
348 
349 	/* Transport data len */
350 	task->data_len = payload - task->iovs[0].iov_len -
351 		task->iovs[task->iovs_cnt - 1].iov_len;
352 
353 	if (vhost_bdev_process_blk_commands(task->ctrlr->bdev, task))
354 		/* invalid response */
355 		*task->status = VIRTIO_BLK_S_IOERR;
356 	else
357 		/* successfully */
358 		*task->status = VIRTIO_BLK_S_OK;
359 
360 	if (task->vq->packed_ring)
361 		enqueue_task_packed(task);
362 	else
363 		enqueue_task(task);
364 }
365 
366 static void
367 blk_task_init(struct vhost_blk_task *task)
368 {
369 	task->iovs_cnt = 0;
370 	task->data_len = 0;
371 	task->req = NULL;
372 	task->status = NULL;
373 }
374 
375 static void
376 submit_inflight_vq(struct vhost_blk_queue *vq)
377 {
378 	struct rte_vhost_ring_inflight *inflight_ring;
379 	struct rte_vhost_resubmit_info *resubmit_inflight;
380 	struct vhost_blk_task *task;
381 
382 	inflight_ring = &vq->inflight_ring;
383 	resubmit_inflight = inflight_ring->resubmit_inflight;
384 
385 	if (resubmit_inflight == NULL ||
386 	    resubmit_inflight->resubmit_num == 0)
387 		return;
388 
389 	fprintf(stdout, "Resubmit inflight num is %d\n",
390 		resubmit_inflight->resubmit_num);
391 
392 	while (resubmit_inflight->resubmit_num-- > 0) {
393 		uint16_t desc_idx;
394 
395 		desc_idx = resubmit_inflight->resubmit_list[
396 					resubmit_inflight->resubmit_num].index;
397 
398 		if (vq->packed_ring) {
399 			uint16_t task_idx;
400 			struct rte_vhost_inflight_desc_packed *desc;
401 
402 			desc = inflight_ring->inflight_packed->desc;
403 			task_idx = desc[desc[desc_idx].last].id;
404 			task = &vq->tasks[task_idx];
405 
406 			task->req_idx = desc_idx;
407 			task->chain_num = desc[desc_idx].num;
408 			task->buffer_id = task_idx;
409 			task->inflight_idx = desc_idx;
410 
411 			vq->last_avail_idx += desc[desc_idx].num;
412 			if (vq->last_avail_idx >= vq->vring.size) {
413 				vq->last_avail_idx -= vq->vring.size;
414 				vq->avail_wrap_counter =
415 					!vq->avail_wrap_counter;
416 			}
417 		} else
418 			/* In split ring, the desc_idx is the req_id
419 			 * which was initialized when allocated the task pool.
420 			 */
421 			task = &vq->tasks[desc_idx];
422 
423 		blk_task_init(task);
424 		process_blk_task(task);
425 	}
426 
427 	free(resubmit_inflight->resubmit_list);
428 	resubmit_inflight->resubmit_list = NULL;
429 }
430 
431 /* Use the buffer_id as the task_idx */
432 static uint16_t
433 vhost_blk_vq_get_desc_chain_buffer_id(struct vhost_blk_queue *vq,
434 				      uint16_t *req_head, uint16_t *num)
435 {
436 	struct vring_packed_desc *desc = &vq->vring.desc_packed[
437 						vq->last_avail_idx];
438 
439 	*req_head = vq->last_avail_idx;
440 	*num = 1;
441 
442 	while (descriptor_has_next_packed(desc)) {
443 		vq->last_avail_idx = (vq->last_avail_idx + 1) % vq->vring.size;
444 		desc = &vq->vring.desc_packed[vq->last_avail_idx];
445 		*num += 1;
446 	}
447 
448 	/* Point to next desc */
449 	vq->last_avail_idx = (vq->last_avail_idx + 1) % vq->vring.size;
450 	if (vq->last_avail_idx < *req_head)
451 		vq->avail_wrap_counter = !vq->avail_wrap_counter;
452 
453 	return desc->id;
454 }
455 
456 static uint16_t
457 vq_get_desc_idx(struct vhost_blk_queue *vq)
458 {
459 	uint16_t desc_idx;
460 	uint16_t last_avail_idx;
461 
462 	last_avail_idx = vq->last_avail_idx & (vq->vring.size - 1);
463 	desc_idx = vq->vring.avail->ring[last_avail_idx];
464 	vq->last_avail_idx++;
465 
466 	return desc_idx;
467 }
468 
469 static int
470 vhost_blk_vq_is_avail(struct vhost_blk_queue *vq)
471 {
472 	if (vq->packed_ring) {
473 		uint16_t flags = vq->vring.desc_packed[
474 					vq->last_avail_idx].flags;
475 		bool avail_wrap_counter = vq->avail_wrap_counter;
476 
477 		return (!!(flags & VIRTQ_DESC_F_AVAIL) == avail_wrap_counter &&
478 			!!(flags & VIRTQ_DESC_F_USED) != avail_wrap_counter);
479 	} else {
480 		if (vq->vring.avail->idx != vq->last_avail_idx)
481 			return 1;
482 
483 		return 0;
484 	}
485 }
486 
487 static void
488 process_vq(struct vhost_blk_queue *vq)
489 {
490 	struct vhost_blk_task *task;
491 
492 	if (vq->packed_ring) {
493 		while (vhost_blk_vq_is_avail(vq)) {
494 			uint16_t task_idx, req_idx, last_idx, chain_num;
495 
496 			task_idx = vhost_blk_vq_get_desc_chain_buffer_id(vq,
497 					&req_idx, &chain_num);
498 			task = &vq->tasks[task_idx];
499 
500 			blk_task_init(task);
501 			task->req_idx = req_idx;
502 			task->chain_num = chain_num;
503 			task->buffer_id = task_idx;
504 			last_idx = (req_idx + chain_num - 1) % vq->vring.size;
505 
506 			rte_vhost_set_inflight_desc_packed(task->ctrlr->vid,
507 							   vq->id,
508 							   task->req_idx,
509 							   last_idx,
510 							   &task->inflight_idx);
511 
512 			process_blk_task(task);
513 		}
514 	} else {
515 		while (vhost_blk_vq_is_avail(vq)) {
516 			uint16_t desc_idx;
517 
518 			desc_idx = vq_get_desc_idx(vq);
519 			task = &vq->tasks[desc_idx];
520 
521 			blk_task_init(task);
522 			rte_vhost_set_inflight_desc_split(task->ctrlr->vid,
523 							  vq->id,
524 							  task->req_idx);
525 			process_blk_task(task);
526 		}
527 	}
528 }
529 
530 static uint32_t
531 ctrlr_worker(void *arg)
532 {
533 	struct vhost_blk_ctrlr *ctrlr = (struct vhost_blk_ctrlr *)arg;
534 	int i;
535 
536 	fprintf(stdout, "Ctrlr Worker Thread start\n");
537 
538 	if (ctrlr == NULL || ctrlr->bdev == NULL) {
539 		fprintf(stderr,
540 			"%s: Error, invalid argument passed to worker thread\n",
541 			__func__);
542 		exit(0);
543 	}
544 
545 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
546 		submit_inflight_vq(&ctrlr->queues[i]);
547 
548 	while (worker_thread_status != WORKER_STATE_STOP)
549 		for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
550 			process_vq(&ctrlr->queues[i]);
551 
552 	fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
553 	sem_post(&exit_sem);
554 	return 0;
555 }
556 
557 static int
558 alloc_task_pool(struct vhost_blk_ctrlr *ctrlr)
559 {
560 	struct vhost_blk_queue *vq;
561 	int i, j;
562 
563 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
564 		vq = &ctrlr->queues[i];
565 
566 		vq->tasks = rte_zmalloc(NULL,
567 			sizeof(struct vhost_blk_task) * vq->vring.size, 0);
568 		if (!vq->tasks) {
569 			fprintf(stderr, "Failed to allocate task memory\n");
570 			return -1;
571 		}
572 
573 		for (j = 0; j < vq->vring.size; j++) {
574 			vq->tasks[j].req_idx = j;
575 			vq->tasks[j].ctrlr = ctrlr;
576 			vq->tasks[j].vq = vq;
577 		}
578 	}
579 
580 	return 0;
581 }
582 
583 static void
584 free_task_pool(struct vhost_blk_ctrlr *ctrlr)
585 {
586 	int i;
587 
588 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
589 		rte_free(ctrlr->queues[i].tasks);
590 }
591 
592 static int
593 new_device(int vid)
594 {
595 	struct vhost_blk_ctrlr *ctrlr;
596 	struct vhost_blk_queue *vq;
597 	char path[PATH_MAX];
598 	uint64_t features, protocol_features;
599 	rte_thread_t tid;
600 	int i, ret;
601 	bool packed_ring, inflight_shmfd;
602 
603 	ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
604 	if (ret) {
605 		fprintf(stderr, "Failed to get the socket path\n");
606 		return -1;
607 	}
608 
609 	ctrlr = vhost_blk_ctrlr_find(path);
610 	if (!ctrlr) {
611 		fprintf(stderr, "Failed to find controller\n");
612 		return -1;
613 	}
614 
615 	if (ctrlr->started)
616 		return 0;
617 
618 	ctrlr->vid = vid;
619 	ret = rte_vhost_get_negotiated_features(vid, &features);
620 	if (ret) {
621 		fprintf(stderr, "Failed to get the negotiated features\n");
622 		return -1;
623 	}
624 	packed_ring = !!(features & (1ULL << VIRTIO_F_RING_PACKED));
625 
626 	ret = rte_vhost_get_negotiated_protocol_features(
627 		vid, &protocol_features);
628 	if (ret) {
629 		fprintf(stderr,
630 			"Failed to get the negotiated protocol features\n");
631 		return -1;
632 	}
633 	inflight_shmfd = !!(features &
634 			    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD));
635 
636 	/* Disable Notifications and init last idx */
637 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
638 		vq = &ctrlr->queues[i];
639 		vq->id = i;
640 
641 		assert(rte_vhost_get_vhost_vring(ctrlr->vid, i,
642 						 &vq->vring) == 0);
643 		assert(rte_vhost_get_vring_base(ctrlr->vid, i,
644 					       &vq->last_avail_idx,
645 					       &vq->last_used_idx) == 0);
646 
647 		if (inflight_shmfd)
648 			assert(rte_vhost_get_vhost_ring_inflight(
649 				       ctrlr->vid, i,
650 				       &vq->inflight_ring) == 0);
651 
652 		if (packed_ring && inflight_shmfd) {
653 			/* for the reconnection */
654 			assert(rte_vhost_get_vring_base_from_inflight(
655 				ctrlr->vid, i,
656 				&vq->last_avail_idx,
657 				&vq->last_used_idx) == 0);
658 
659 			vq->avail_wrap_counter = vq->last_avail_idx &
660 				(1 << 15);
661 			vq->last_avail_idx = vq->last_avail_idx &
662 				0x7fff;
663 			vq->used_wrap_counter = vq->last_used_idx &
664 				(1 << 15);
665 			vq->last_used_idx = vq->last_used_idx &
666 				0x7fff;
667 		}
668 
669 		vq->packed_ring = packed_ring;
670 		rte_vhost_enable_guest_notification(vid, i, 0);
671 	}
672 
673 	assert(rte_vhost_get_mem_table(vid, &ctrlr->mem) == 0);
674 	assert(ctrlr->mem != NULL);
675 	assert(alloc_task_pool(ctrlr) == 0);
676 
677 	/* start polling vring */
678 	worker_thread_status = WORKER_STATE_START;
679 	fprintf(stdout, "New Device %s, Device ID %d\n", path, vid);
680 	if (rte_thread_create_control(&tid, "dpdk-vhost-blk",
681 			&ctrlr_worker, ctrlr) != 0) {
682 		fprintf(stderr, "Worker Thread Started Failed\n");
683 		return -1;
684 	}
685 
686 	/* device has been started */
687 	ctrlr->started = 1;
688 	rte_thread_detach(tid);
689 	return 0;
690 }
691 
692 static void
693 destroy_device(int vid)
694 {
695 	char path[PATH_MAX];
696 	struct vhost_blk_ctrlr *ctrlr;
697 	struct vhost_blk_queue *vq;
698 	int i, ret;
699 
700 	ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
701 	if (ret) {
702 		fprintf(stderr, "Destroy Ctrlr Failed\n");
703 		return;
704 	}
705 
706 	fprintf(stdout, "Destroy %s Device ID %d\n", path, vid);
707 	ctrlr = vhost_blk_ctrlr_find(path);
708 	if (!ctrlr) {
709 		fprintf(stderr, "Destroy Ctrlr Failed\n");
710 		return;
711 	}
712 
713 	if (!ctrlr->started)
714 		return;
715 
716 	worker_thread_status = WORKER_STATE_STOP;
717 	sem_wait(&exit_sem);
718 
719 	for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
720 		vq = &ctrlr->queues[i];
721 		if (vq->packed_ring) {
722 			vq->last_avail_idx |= (vq->avail_wrap_counter <<
723 				15);
724 			vq->last_used_idx |= (vq->used_wrap_counter <<
725 				15);
726 		}
727 
728 		rte_vhost_set_vring_base(ctrlr->vid, i,
729 					 vq->last_avail_idx,
730 					 vq->last_used_idx);
731 	}
732 
733 	free_task_pool(ctrlr);
734 	free(ctrlr->mem);
735 
736 	ctrlr->started = 0;
737 }
738 
739 static int
740 new_connection(int vid)
741 {
742 	/* extend the proper features for block device */
743 	vhost_session_install_rte_compat_hooks(vid);
744 
745 	return 0;
746 }
747 
748 struct rte_vhost_device_ops vhost_blk_device_ops = {
749 	.new_device =  new_device,
750 	.destroy_device = destroy_device,
751 	.new_connection = new_connection,
752 };
753 
754 static struct vhost_block_dev *
755 vhost_blk_bdev_construct(const char *bdev_name,
756 	const char *bdev_serial, uint32_t blk_size, uint64_t blk_cnt,
757 	bool wce_enable)
758 {
759 	struct vhost_block_dev *bdev;
760 
761 	bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE);
762 	if (!bdev)
763 		return NULL;
764 
765 	snprintf(bdev->name, sizeof(bdev->name), "%s", bdev_name);
766 	snprintf(bdev->product_name, sizeof(bdev->product_name), "%s",
767 		 bdev_serial);
768 	bdev->blocklen = blk_size;
769 	bdev->blockcnt = blk_cnt;
770 	bdev->write_cache = wce_enable;
771 
772 	fprintf(stdout, "Blocklen=%d, blockcnt=%"PRIx64"\n", bdev->blocklen,
773 		bdev->blockcnt);
774 
775 	/* use memory as disk storage space */
776 	bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
777 	if (!bdev->data) {
778 		fprintf(stderr, "No enough reserved huge memory for disk\n");
779 		rte_free(bdev);
780 		return NULL;
781 	}
782 
783 	return bdev;
784 }
785 
786 static struct vhost_blk_ctrlr *
787 vhost_blk_ctrlr_construct(const char *ctrlr_name)
788 {
789 	int ret;
790 	struct vhost_blk_ctrlr *ctrlr;
791 	char *path;
792 	char cwd[PATH_MAX];
793 
794 	/* always use current directory */
795 	path = getcwd(cwd, PATH_MAX);
796 	if (!path) {
797 		fprintf(stderr, "Cannot get current working directory\n");
798 		return NULL;
799 	}
800 	snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name);
801 
802 	unlink(dev_pathname);
803 
804 	if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
805 		fprintf(stderr, "Socket %s already exists\n", dev_pathname);
806 		return NULL;
807 	}
808 
809 	ret = rte_vhost_driver_set_features(dev_pathname, VHOST_BLK_FEATURES);
810 	if (ret != 0) {
811 		fprintf(stderr, "Set vhost driver features failed\n");
812 		rte_vhost_driver_unregister(dev_pathname);
813 		return NULL;
814 	}
815 
816 	/* set vhost user protocol features */
817 	vhost_dev_install_rte_compat_hooks(dev_pathname);
818 
819 	ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
820 	if (!ctrlr) {
821 		rte_vhost_driver_unregister(dev_pathname);
822 		return NULL;
823 	}
824 
825 	/* hardcoded block device information with 128MiB */
826 	ctrlr->bdev = vhost_blk_bdev_construct("malloc0", "vhost_blk_malloc0",
827 						4096, 32768, 0);
828 	if (!ctrlr->bdev) {
829 		rte_free(ctrlr);
830 		rte_vhost_driver_unregister(dev_pathname);
831 		return NULL;
832 	}
833 
834 	rte_vhost_driver_callback_register(dev_pathname,
835 					   &vhost_blk_device_ops);
836 
837 	return ctrlr;
838 }
839 
840 static void
841 vhost_blk_ctrlr_destroy(struct vhost_blk_ctrlr *ctrlr)
842 {
843 	if (ctrlr->bdev != NULL) {
844 		rte_free(ctrlr->bdev->data);
845 
846 		rte_free(ctrlr->bdev);
847 	}
848 	rte_free(ctrlr);
849 
850 	rte_vhost_driver_unregister(dev_pathname);
851 }
852 
853 static void
854 signal_handler(__rte_unused int signum)
855 {
856 	struct vhost_blk_ctrlr *ctrlr;
857 
858 	ctrlr = vhost_blk_ctrlr_find(dev_pathname);
859 	if (ctrlr == NULL)
860 		return;
861 
862 	if (ctrlr->started)
863 		destroy_device(ctrlr->vid);
864 
865 	vhost_blk_ctrlr_destroy(ctrlr);
866 	exit(0);
867 }
868 
869 int main(int argc, char *argv[])
870 {
871 	int ret;
872 
873 	/* init EAL */
874 	ret = rte_eal_init(argc, argv);
875 	if (ret < 0)
876 		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
877 
878 	g_vhost_ctrlr = vhost_blk_ctrlr_construct(CTRLR_NAME);
879 	if (g_vhost_ctrlr == NULL) {
880 		fprintf(stderr, "Construct vhost blk controller failed\n");
881 		return 0;
882 	}
883 
884 	if (sem_init(&exit_sem, 0, 0) < 0) {
885 		fprintf(stderr, "Error init exit_sem\n");
886 		return -1;
887 	}
888 
889 	signal(SIGINT, signal_handler);
890 
891 	ret = rte_vhost_driver_start(dev_pathname);
892 	if (ret < 0) {
893 		fprintf(stderr, "Failed to start vhost driver.\n");
894 		return -1;
895 	}
896 
897 	/* loop for exit the application */
898 	while (1)
899 		sleep(1);
900 
901 	/* clean up the EAL */
902 	rte_eal_cleanup();
903 
904 	return 0;
905 }
906