1c19beb3fSJin Yu /* SPDX-License-Identifier: BSD-3-Clause 2c19beb3fSJin Yu * Copyright(c) 2010-2019 Intel Corporation 3c19beb3fSJin Yu */ 4c19beb3fSJin Yu 59c3586a6SDavid Marchand #ifndef _GNU_SOURCE 69c3586a6SDavid Marchand #define _GNU_SOURCE 79c3586a6SDavid Marchand #endif 89c3586a6SDavid Marchand 9c19beb3fSJin Yu #include <stdint.h> 1072b452c5SDmitry Kozlyuk #include <stdlib.h> 11c19beb3fSJin Yu #include <unistd.h> 12c19beb3fSJin Yu #include <stdbool.h> 13c19beb3fSJin Yu #include <signal.h> 14c19beb3fSJin Yu #include <assert.h> 15c19beb3fSJin Yu #include <semaphore.h> 16c19beb3fSJin Yu #include <linux/virtio_blk.h> 17c19beb3fSJin Yu #include <linux/virtio_ring.h> 18c19beb3fSJin Yu 19c19beb3fSJin Yu #include <rte_atomic.h> 20c19beb3fSJin Yu #include <rte_cycles.h> 21c19beb3fSJin Yu #include <rte_log.h> 22c19beb3fSJin Yu #include <rte_malloc.h> 23c19beb3fSJin Yu #include <rte_vhost.h> 24c19beb3fSJin Yu 25c19beb3fSJin Yu #include "vhost_blk.h" 26c19beb3fSJin Yu #include "blk_spec.h" 27c19beb3fSJin Yu 28c19beb3fSJin Yu #define VIRTQ_DESC_F_NEXT 1 29c19beb3fSJin Yu #define VIRTQ_DESC_F_AVAIL (1 << 7) 30c19beb3fSJin Yu #define VIRTQ_DESC_F_USED (1 << 15) 31c19beb3fSJin Yu 32c19beb3fSJin Yu #define MAX_TASK 12 33c19beb3fSJin Yu 34c19beb3fSJin Yu #define VHOST_BLK_FEATURES ((1ULL << VIRTIO_F_RING_PACKED) | \ 35c19beb3fSJin Yu (1ULL << VIRTIO_F_VERSION_1) |\ 36c19beb3fSJin Yu (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \ 37c19beb3fSJin Yu (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)) 3891d3e2d4SJin Yu #define CTRLR_NAME "vhost.socket" 3991d3e2d4SJin Yu 4091d3e2d4SJin Yu enum CTRLR_WORKER_STATUS { 4191d3e2d4SJin Yu WORKER_STATE_START = 0, 4291d3e2d4SJin Yu WORKER_STATE_STOP, 4391d3e2d4SJin Yu }; 44c19beb3fSJin Yu 45be848992STimothy Redaelli struct vhost_blk_ctrlr *g_vhost_ctrlr; 46be848992STimothy Redaelli 47c19beb3fSJin Yu /* Path to folder where character device will be created. Can be set by user. */ 48c19beb3fSJin Yu static char dev_pathname[PATH_MAX] = ""; 49c19beb3fSJin Yu static sem_t exit_sem; 5091d3e2d4SJin Yu static enum CTRLR_WORKER_STATUS worker_thread_status; 51c19beb3fSJin Yu 52c19beb3fSJin Yu struct vhost_blk_ctrlr * 53c19beb3fSJin Yu vhost_blk_ctrlr_find(const char *ctrlr_name) 54c19beb3fSJin Yu { 55c19beb3fSJin Yu if (ctrlr_name == NULL) 56c19beb3fSJin Yu return NULL; 57c19beb3fSJin Yu 58c19beb3fSJin Yu /* currently we only support 1 socket file fd */ 59c19beb3fSJin Yu return g_vhost_ctrlr; 60c19beb3fSJin Yu } 61c19beb3fSJin Yu 6291d3e2d4SJin Yu static uint64_t 6391d3e2d4SJin Yu gpa_to_vva(struct vhost_blk_ctrlr *ctrlr, uint64_t gpa, uint64_t *len) 64c19beb3fSJin Yu { 65c19beb3fSJin Yu assert(ctrlr->mem != NULL); 66c19beb3fSJin Yu 67c19beb3fSJin Yu return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len); 68c19beb3fSJin Yu } 69c19beb3fSJin Yu 7091d3e2d4SJin Yu static void 7191d3e2d4SJin Yu enqueue_task(struct vhost_blk_task *task) 72c19beb3fSJin Yu { 7391d3e2d4SJin Yu struct vhost_blk_queue *vq = task->vq; 7491d3e2d4SJin Yu struct vring_used *used = vq->vring.used; 7591d3e2d4SJin Yu 7691d3e2d4SJin Yu rte_vhost_set_last_inflight_io_split(task->ctrlr->vid, 7791d3e2d4SJin Yu vq->id, task->req_idx); 7891d3e2d4SJin Yu 7991d3e2d4SJin Yu /* Fill out the next entry in the "used" ring. id = the 8091d3e2d4SJin Yu * index of the descriptor that contained the blk request. 8191d3e2d4SJin Yu * len = the total amount of data transferred for the blk 8291d3e2d4SJin Yu * request. We must report the correct len, for variable 8391d3e2d4SJin Yu * length blk CDBs, where we may return less data than 8491d3e2d4SJin Yu * allocated by the guest VM. 8591d3e2d4SJin Yu */ 8691d3e2d4SJin Yu used->ring[used->idx & (vq->vring.size - 1)].id = task->req_idx; 8791d3e2d4SJin Yu used->ring[used->idx & (vq->vring.size - 1)].len = task->data_len; 8892e68d9cSTyler Retzlaff rte_atomic_thread_fence(rte_memory_order_seq_cst); 8991d3e2d4SJin Yu used->idx++; 9092e68d9cSTyler Retzlaff rte_atomic_thread_fence(rte_memory_order_seq_cst); 9191d3e2d4SJin Yu 9291d3e2d4SJin Yu rte_vhost_clr_inflight_desc_split(task->ctrlr->vid, 9391d3e2d4SJin Yu vq->id, used->idx, task->req_idx); 9491d3e2d4SJin Yu 9591d3e2d4SJin Yu /* Send an interrupt back to the guest VM so that it knows 9691d3e2d4SJin Yu * a completion is ready to be processed. 9791d3e2d4SJin Yu */ 9891d3e2d4SJin Yu rte_vhost_vring_call(task->ctrlr->vid, vq->id); 99c19beb3fSJin Yu } 100c19beb3fSJin Yu 10191d3e2d4SJin Yu static void 10291d3e2d4SJin Yu enqueue_task_packed(struct vhost_blk_task *task) 10391d3e2d4SJin Yu { 10491d3e2d4SJin Yu struct vhost_blk_queue *vq = task->vq; 10591d3e2d4SJin Yu struct vring_packed_desc *desc; 10691d3e2d4SJin Yu 10791d3e2d4SJin Yu rte_vhost_set_last_inflight_io_packed(task->ctrlr->vid, vq->id, 10891d3e2d4SJin Yu task->inflight_idx); 10991d3e2d4SJin Yu 11091d3e2d4SJin Yu desc = &vq->vring.desc_packed[vq->last_used_idx]; 11191d3e2d4SJin Yu desc->id = task->buffer_id; 11291d3e2d4SJin Yu desc->addr = 0; 11391d3e2d4SJin Yu 11492e68d9cSTyler Retzlaff rte_atomic_thread_fence(rte_memory_order_seq_cst); 11591d3e2d4SJin Yu if (vq->used_wrap_counter) 11691d3e2d4SJin Yu desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED; 11791d3e2d4SJin Yu else 11891d3e2d4SJin Yu desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED); 11992e68d9cSTyler Retzlaff rte_atomic_thread_fence(rte_memory_order_seq_cst); 12091d3e2d4SJin Yu 12191d3e2d4SJin Yu rte_vhost_clr_inflight_desc_packed(task->ctrlr->vid, vq->id, 12291d3e2d4SJin Yu task->inflight_idx); 12391d3e2d4SJin Yu 12491d3e2d4SJin Yu vq->last_used_idx += task->chain_num; 12591d3e2d4SJin Yu if (vq->last_used_idx >= vq->vring.size) { 12691d3e2d4SJin Yu vq->last_used_idx -= vq->vring.size; 12791d3e2d4SJin Yu vq->used_wrap_counter = !vq->used_wrap_counter; 12891d3e2d4SJin Yu } 12991d3e2d4SJin Yu 13091d3e2d4SJin Yu /* Send an interrupt back to the guest VM so that it knows 13191d3e2d4SJin Yu * a completion is ready to be processed. 13291d3e2d4SJin Yu */ 13391d3e2d4SJin Yu rte_vhost_vring_call(task->ctrlr->vid, vq->id); 134c19beb3fSJin Yu } 135c19beb3fSJin Yu 136c19beb3fSJin Yu static bool 137c19beb3fSJin Yu descriptor_has_next_packed(struct vring_packed_desc *cur_desc) 138c19beb3fSJin Yu { 139c19beb3fSJin Yu return !!(cur_desc->flags & VRING_DESC_F_NEXT); 140c19beb3fSJin Yu } 141c19beb3fSJin Yu 142c19beb3fSJin Yu static bool 143c19beb3fSJin Yu descriptor_has_next_split(struct vring_desc *cur_desc) 144c19beb3fSJin Yu { 145c19beb3fSJin Yu return !!(cur_desc->flags & VRING_DESC_F_NEXT); 146c19beb3fSJin Yu } 147c19beb3fSJin Yu 14891d3e2d4SJin Yu static int 14991d3e2d4SJin Yu desc_payload_to_iovs(struct vhost_blk_ctrlr *ctrlr, struct iovec *iovs, 15091d3e2d4SJin Yu uint32_t *iov_index, uintptr_t payload, uint64_t remaining) 151c19beb3fSJin Yu { 15291d3e2d4SJin Yu void *vva; 15391d3e2d4SJin Yu uint64_t len; 154c19beb3fSJin Yu 155c19beb3fSJin Yu do { 15691d3e2d4SJin Yu if (*iov_index >= VHOST_BLK_MAX_IOVS) { 15791d3e2d4SJin Yu fprintf(stderr, "VHOST_BLK_MAX_IOVS reached\n"); 15891d3e2d4SJin Yu return -1; 15991d3e2d4SJin Yu } 16091d3e2d4SJin Yu len = remaining; 16191d3e2d4SJin Yu vva = (void *)(uintptr_t)gpa_to_vva(ctrlr, 16291d3e2d4SJin Yu payload, &len); 16391d3e2d4SJin Yu if (!vva || !len) { 164c19beb3fSJin Yu fprintf(stderr, "failed to translate desc address.\n"); 16591d3e2d4SJin Yu return -1; 16691d3e2d4SJin Yu } 16791d3e2d4SJin Yu 16891d3e2d4SJin Yu iovs[*iov_index].iov_base = vva; 16991d3e2d4SJin Yu iovs[*iov_index].iov_len = len; 17091d3e2d4SJin Yu payload += len; 17191d3e2d4SJin Yu remaining -= len; 17291d3e2d4SJin Yu (*iov_index)++; 17391d3e2d4SJin Yu } while (remaining); 17491d3e2d4SJin Yu 17591d3e2d4SJin Yu return 0; 17691d3e2d4SJin Yu } 17791d3e2d4SJin Yu 17891d3e2d4SJin Yu static struct vring_desc * 17991d3e2d4SJin Yu vring_get_next_desc(struct vhost_blk_queue *vq, struct vring_desc *desc) 18091d3e2d4SJin Yu { 18191d3e2d4SJin Yu if (descriptor_has_next_split(desc)) 18291d3e2d4SJin Yu return &vq->vring.desc[desc->next]; 18391d3e2d4SJin Yu 18491d3e2d4SJin Yu return NULL; 18591d3e2d4SJin Yu } 18691d3e2d4SJin Yu 18791d3e2d4SJin Yu static struct vring_packed_desc * 18891d3e2d4SJin Yu vring_get_next_desc_packed(struct vhost_blk_queue *vq, uint16_t *req_idx) 18991d3e2d4SJin Yu { 19091d3e2d4SJin Yu if (descriptor_has_next_packed(&vq->vring.desc_packed[*req_idx])) { 19191d3e2d4SJin Yu *req_idx = (*req_idx + 1) % vq->vring.size; 19291d3e2d4SJin Yu return &vq->vring.desc_packed[*req_idx]; 19391d3e2d4SJin Yu } 19491d3e2d4SJin Yu 19591d3e2d4SJin Yu return NULL; 19691d3e2d4SJin Yu } 19791d3e2d4SJin Yu 19891d3e2d4SJin Yu static struct rte_vhost_inflight_desc_packed * 19991d3e2d4SJin Yu vring_get_next_inflight_desc(struct vhost_blk_queue *vq, 20091d3e2d4SJin Yu struct rte_vhost_inflight_desc_packed *desc) 20191d3e2d4SJin Yu { 20291d3e2d4SJin Yu if (!!(desc->flags & VRING_DESC_F_NEXT)) 20391d3e2d4SJin Yu return &vq->inflight_ring.inflight_packed->desc[desc->next]; 20491d3e2d4SJin Yu 20591d3e2d4SJin Yu return NULL; 20691d3e2d4SJin Yu } 20791d3e2d4SJin Yu 20891d3e2d4SJin Yu static int 20991d3e2d4SJin Yu setup_iovs_from_descs_split(struct vhost_blk_ctrlr *ctrlr, 21091d3e2d4SJin Yu struct vhost_blk_queue *vq, uint16_t req_idx, 21191d3e2d4SJin Yu struct iovec *iovs, uint32_t *iovs_idx, 21291d3e2d4SJin Yu uint32_t *payload) 21391d3e2d4SJin Yu { 21491d3e2d4SJin Yu struct vring_desc *desc = &vq->vring.desc[req_idx]; 21591d3e2d4SJin Yu 21691d3e2d4SJin Yu do { 21791d3e2d4SJin Yu /* does not support indirect descriptors */ 21891d3e2d4SJin Yu assert((desc->flags & VRING_DESC_F_INDIRECT) == 0); 21991d3e2d4SJin Yu 22091d3e2d4SJin Yu if (*iovs_idx >= VHOST_BLK_MAX_IOVS) { 22191d3e2d4SJin Yu fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n"); 22291d3e2d4SJin Yu return -1; 22391d3e2d4SJin Yu } 22491d3e2d4SJin Yu 22591d3e2d4SJin Yu if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx, 22691d3e2d4SJin Yu desc->addr, desc->len) != 0) { 22791d3e2d4SJin Yu fprintf(stderr, "Failed to convert desc payload to iovs\n"); 22891d3e2d4SJin Yu return -1; 22991d3e2d4SJin Yu } 23091d3e2d4SJin Yu 23191d3e2d4SJin Yu *payload += desc->len; 23291d3e2d4SJin Yu 23391d3e2d4SJin Yu desc = vring_get_next_desc(vq, desc); 23491d3e2d4SJin Yu } while (desc != NULL); 23591d3e2d4SJin Yu 23691d3e2d4SJin Yu return 0; 23791d3e2d4SJin Yu } 23891d3e2d4SJin Yu 23991d3e2d4SJin Yu static int 24091d3e2d4SJin Yu setup_iovs_from_descs_packed(struct vhost_blk_ctrlr *ctrlr, 24191d3e2d4SJin Yu struct vhost_blk_queue *vq, uint16_t req_idx, 24291d3e2d4SJin Yu struct iovec *iovs, uint32_t *iovs_idx, 24391d3e2d4SJin Yu uint32_t *payload) 24491d3e2d4SJin Yu { 24591d3e2d4SJin Yu struct vring_packed_desc *desc = &vq->vring.desc_packed[req_idx]; 24691d3e2d4SJin Yu 24791d3e2d4SJin Yu do { 24891d3e2d4SJin Yu /* does not support indirect descriptors */ 24991d3e2d4SJin Yu assert((desc->flags & VRING_DESC_F_INDIRECT) == 0); 25091d3e2d4SJin Yu 25191d3e2d4SJin Yu if (*iovs_idx >= VHOST_BLK_MAX_IOVS) { 25291d3e2d4SJin Yu fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n"); 25391d3e2d4SJin Yu return -1; 25491d3e2d4SJin Yu } 25591d3e2d4SJin Yu 25691d3e2d4SJin Yu if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx, 25791d3e2d4SJin Yu desc->addr, desc->len) != 0) { 25891d3e2d4SJin Yu fprintf(stderr, "Failed to convert desc payload to iovs\n"); 25991d3e2d4SJin Yu return -1; 26091d3e2d4SJin Yu } 26191d3e2d4SJin Yu 26291d3e2d4SJin Yu *payload += desc->len; 26391d3e2d4SJin Yu 26491d3e2d4SJin Yu desc = vring_get_next_desc_packed(vq, &req_idx); 26591d3e2d4SJin Yu } while (desc != NULL); 26691d3e2d4SJin Yu 26791d3e2d4SJin Yu return 0; 26891d3e2d4SJin Yu } 26991d3e2d4SJin Yu 27091d3e2d4SJin Yu static int 27191d3e2d4SJin Yu setup_iovs_from_inflight_desc(struct vhost_blk_ctrlr *ctrlr, 27291d3e2d4SJin Yu struct vhost_blk_queue *vq, uint16_t req_idx, 27391d3e2d4SJin Yu struct iovec *iovs, uint32_t *iovs_idx, 27491d3e2d4SJin Yu uint32_t *payload) 27591d3e2d4SJin Yu { 27691d3e2d4SJin Yu struct rte_vhost_ring_inflight *inflight_vq; 27791d3e2d4SJin Yu struct rte_vhost_inflight_desc_packed *desc; 27891d3e2d4SJin Yu 27991d3e2d4SJin Yu inflight_vq = &vq->inflight_ring; 28091d3e2d4SJin Yu desc = &inflight_vq->inflight_packed->desc[req_idx]; 28191d3e2d4SJin Yu 28291d3e2d4SJin Yu do { 28391d3e2d4SJin Yu /* does not support indirect descriptors */ 28491d3e2d4SJin Yu assert((desc->flags & VRING_DESC_F_INDIRECT) == 0); 28591d3e2d4SJin Yu 28691d3e2d4SJin Yu if (*iovs_idx >= VHOST_BLK_MAX_IOVS) { 28791d3e2d4SJin Yu fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n"); 28891d3e2d4SJin Yu return -1; 28991d3e2d4SJin Yu } 29091d3e2d4SJin Yu 29191d3e2d4SJin Yu if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx, 29291d3e2d4SJin Yu desc->addr, desc->len) != 0) { 29391d3e2d4SJin Yu fprintf(stderr, "Failed to convert desc payload to iovs\n"); 29491d3e2d4SJin Yu return -1; 29591d3e2d4SJin Yu } 29691d3e2d4SJin Yu 29791d3e2d4SJin Yu *payload += desc->len; 29891d3e2d4SJin Yu 29991d3e2d4SJin Yu desc = vring_get_next_inflight_desc(vq, desc); 30091d3e2d4SJin Yu } while (desc != NULL); 30191d3e2d4SJin Yu 30291d3e2d4SJin Yu return 0; 30391d3e2d4SJin Yu } 30491d3e2d4SJin Yu 30591d3e2d4SJin Yu static void 30691d3e2d4SJin Yu process_blk_task(struct vhost_blk_task *task) 30791d3e2d4SJin Yu { 30891d3e2d4SJin Yu uint32_t payload = 0; 30991d3e2d4SJin Yu 31091d3e2d4SJin Yu if (task->vq->packed_ring) { 31191d3e2d4SJin Yu struct rte_vhost_ring_inflight *inflight_ring; 31291d3e2d4SJin Yu struct rte_vhost_resubmit_info *resubmit_inflight; 31391d3e2d4SJin Yu 31491d3e2d4SJin Yu inflight_ring = &task->vq->inflight_ring; 31591d3e2d4SJin Yu resubmit_inflight = inflight_ring->resubmit_inflight; 31691d3e2d4SJin Yu 31791d3e2d4SJin Yu if (resubmit_inflight != NULL && 31891d3e2d4SJin Yu resubmit_inflight->resubmit_list != NULL) { 31991d3e2d4SJin Yu if (setup_iovs_from_inflight_desc(task->ctrlr, task->vq, 32091d3e2d4SJin Yu task->req_idx, task->iovs, &task->iovs_cnt, 32191d3e2d4SJin Yu &payload)) { 32291d3e2d4SJin Yu fprintf(stderr, "Failed to setup iovs\n"); 323c19beb3fSJin Yu return; 324c19beb3fSJin Yu } 32591d3e2d4SJin Yu } else { 32691d3e2d4SJin Yu if (setup_iovs_from_descs_packed(task->ctrlr, task->vq, 32791d3e2d4SJin Yu task->req_idx, task->iovs, &task->iovs_cnt, 32891d3e2d4SJin Yu &payload)) { 32991d3e2d4SJin Yu fprintf(stderr, "Failed to setup iovs\n"); 33091d3e2d4SJin Yu return; 33191d3e2d4SJin Yu } 33291d3e2d4SJin Yu } 33391d3e2d4SJin Yu } else { 33491d3e2d4SJin Yu if (setup_iovs_from_descs_split(task->ctrlr, task->vq, 33591d3e2d4SJin Yu task->req_idx, task->iovs, &task->iovs_cnt, &payload)) { 33691d3e2d4SJin Yu fprintf(stderr, "Failed to setup iovs\n"); 33791d3e2d4SJin Yu return; 33891d3e2d4SJin Yu } 33991d3e2d4SJin Yu } 340c19beb3fSJin Yu 34191d3e2d4SJin Yu /* First IOV must be the req head. */ 34291d3e2d4SJin Yu task->req = (struct virtio_blk_outhdr *)task->iovs[0].iov_base; 34391d3e2d4SJin Yu assert(sizeof(*task->req) == task->iovs[0].iov_len); 344c19beb3fSJin Yu 34591d3e2d4SJin Yu /* Last IOV must be the status tail. */ 34691d3e2d4SJin Yu task->status = (uint8_t *)task->iovs[task->iovs_cnt - 1].iov_base; 34791d3e2d4SJin Yu assert(sizeof(*task->status) == task->iovs[task->iovs_cnt - 1].iov_len); 34891d3e2d4SJin Yu 34991d3e2d4SJin Yu /* Transport data len */ 35091d3e2d4SJin Yu task->data_len = payload - task->iovs[0].iov_len - 35191d3e2d4SJin Yu task->iovs[task->iovs_cnt - 1].iov_len; 35291d3e2d4SJin Yu 35391d3e2d4SJin Yu if (vhost_bdev_process_blk_commands(task->ctrlr->bdev, task)) 35491d3e2d4SJin Yu /* invalid response */ 35591d3e2d4SJin Yu *task->status = VIRTIO_BLK_S_IOERR; 35691d3e2d4SJin Yu else 35791d3e2d4SJin Yu /* successfully */ 35891d3e2d4SJin Yu *task->status = VIRTIO_BLK_S_OK; 35991d3e2d4SJin Yu 36091d3e2d4SJin Yu if (task->vq->packed_ring) 36191d3e2d4SJin Yu enqueue_task_packed(task); 36291d3e2d4SJin Yu else 36391d3e2d4SJin Yu enqueue_task(task); 364c19beb3fSJin Yu } 365c19beb3fSJin Yu 366c19beb3fSJin Yu static void 36791d3e2d4SJin Yu blk_task_init(struct vhost_blk_task *task) 368c19beb3fSJin Yu { 36991d3e2d4SJin Yu task->iovs_cnt = 0; 37091d3e2d4SJin Yu task->data_len = 0; 37191d3e2d4SJin Yu task->req = NULL; 37291d3e2d4SJin Yu task->status = NULL; 373c19beb3fSJin Yu } 374c19beb3fSJin Yu 375c19beb3fSJin Yu static void 37691d3e2d4SJin Yu submit_inflight_vq(struct vhost_blk_queue *vq) 377c19beb3fSJin Yu { 37891d3e2d4SJin Yu struct rte_vhost_ring_inflight *inflight_ring; 379c19beb3fSJin Yu struct rte_vhost_resubmit_info *resubmit_inflight; 380c19beb3fSJin Yu struct vhost_blk_task *task; 381c19beb3fSJin Yu 38291d3e2d4SJin Yu inflight_ring = &vq->inflight_ring; 38391d3e2d4SJin Yu resubmit_inflight = inflight_ring->resubmit_inflight; 384c19beb3fSJin Yu 38591d3e2d4SJin Yu if (resubmit_inflight == NULL || 38691d3e2d4SJin Yu resubmit_inflight->resubmit_num == 0) 38791d3e2d4SJin Yu return; 388c19beb3fSJin Yu 38991d3e2d4SJin Yu fprintf(stdout, "Resubmit inflight num is %d\n", 39091d3e2d4SJin Yu resubmit_inflight->resubmit_num); 391c19beb3fSJin Yu 392c19beb3fSJin Yu while (resubmit_inflight->resubmit_num-- > 0) { 39391d3e2d4SJin Yu uint16_t desc_idx; 394c19beb3fSJin Yu 39591d3e2d4SJin Yu desc_idx = resubmit_inflight->resubmit_list[ 39691d3e2d4SJin Yu resubmit_inflight->resubmit_num].index; 397c19beb3fSJin Yu 39891d3e2d4SJin Yu if (vq->packed_ring) { 39991d3e2d4SJin Yu uint16_t task_idx; 40091d3e2d4SJin Yu struct rte_vhost_inflight_desc_packed *desc; 40191d3e2d4SJin Yu 40291d3e2d4SJin Yu desc = inflight_ring->inflight_packed->desc; 40391d3e2d4SJin Yu task_idx = desc[desc[desc_idx].last].id; 40491d3e2d4SJin Yu task = &vq->tasks[task_idx]; 40591d3e2d4SJin Yu 40691d3e2d4SJin Yu task->req_idx = desc_idx; 40791d3e2d4SJin Yu task->chain_num = desc[desc_idx].num; 40891d3e2d4SJin Yu task->buffer_id = task_idx; 40991d3e2d4SJin Yu task->inflight_idx = desc_idx; 41091d3e2d4SJin Yu 41191d3e2d4SJin Yu vq->last_avail_idx += desc[desc_idx].num; 41291d3e2d4SJin Yu if (vq->last_avail_idx >= vq->vring.size) { 41391d3e2d4SJin Yu vq->last_avail_idx -= vq->vring.size; 41491d3e2d4SJin Yu vq->avail_wrap_counter = 41591d3e2d4SJin Yu !vq->avail_wrap_counter; 41691d3e2d4SJin Yu } 41791d3e2d4SJin Yu } else 41891d3e2d4SJin Yu /* In split ring, the desc_idx is the req_id 41991d3e2d4SJin Yu * which was initialized when allocated the task pool. 42091d3e2d4SJin Yu */ 42191d3e2d4SJin Yu task = &vq->tasks[desc_idx]; 42291d3e2d4SJin Yu 42391d3e2d4SJin Yu blk_task_init(task); 42491d3e2d4SJin Yu process_blk_task(task); 425c19beb3fSJin Yu } 426c19beb3fSJin Yu 42791d3e2d4SJin Yu free(resubmit_inflight->resubmit_list); 42891d3e2d4SJin Yu resubmit_inflight->resubmit_list = NULL; 429c19beb3fSJin Yu } 43091d3e2d4SJin Yu 43191d3e2d4SJin Yu /* Use the buffer_id as the task_idx */ 43291d3e2d4SJin Yu static uint16_t 43391d3e2d4SJin Yu vhost_blk_vq_get_desc_chain_buffer_id(struct vhost_blk_queue *vq, 43491d3e2d4SJin Yu uint16_t *req_head, uint16_t *num) 43591d3e2d4SJin Yu { 43691d3e2d4SJin Yu struct vring_packed_desc *desc = &vq->vring.desc_packed[ 43791d3e2d4SJin Yu vq->last_avail_idx]; 43891d3e2d4SJin Yu 43991d3e2d4SJin Yu *req_head = vq->last_avail_idx; 44091d3e2d4SJin Yu *num = 1; 44191d3e2d4SJin Yu 44291d3e2d4SJin Yu while (descriptor_has_next_packed(desc)) { 44391d3e2d4SJin Yu vq->last_avail_idx = (vq->last_avail_idx + 1) % vq->vring.size; 44491d3e2d4SJin Yu desc = &vq->vring.desc_packed[vq->last_avail_idx]; 44591d3e2d4SJin Yu *num += 1; 44691d3e2d4SJin Yu } 44791d3e2d4SJin Yu 44891d3e2d4SJin Yu /* Point to next desc */ 44991d3e2d4SJin Yu vq->last_avail_idx = (vq->last_avail_idx + 1) % vq->vring.size; 45091d3e2d4SJin Yu if (vq->last_avail_idx < *req_head) 45191d3e2d4SJin Yu vq->avail_wrap_counter = !vq->avail_wrap_counter; 45291d3e2d4SJin Yu 45391d3e2d4SJin Yu return desc->id; 45491d3e2d4SJin Yu } 45591d3e2d4SJin Yu 45691d3e2d4SJin Yu static uint16_t 45791d3e2d4SJin Yu vq_get_desc_idx(struct vhost_blk_queue *vq) 45891d3e2d4SJin Yu { 45991d3e2d4SJin Yu uint16_t desc_idx; 46091d3e2d4SJin Yu uint16_t last_avail_idx; 46191d3e2d4SJin Yu 46291d3e2d4SJin Yu last_avail_idx = vq->last_avail_idx & (vq->vring.size - 1); 46391d3e2d4SJin Yu desc_idx = vq->vring.avail->ring[last_avail_idx]; 46491d3e2d4SJin Yu vq->last_avail_idx++; 46591d3e2d4SJin Yu 46691d3e2d4SJin Yu return desc_idx; 46791d3e2d4SJin Yu } 46891d3e2d4SJin Yu 46991d3e2d4SJin Yu static int 47091d3e2d4SJin Yu vhost_blk_vq_is_avail(struct vhost_blk_queue *vq) 47191d3e2d4SJin Yu { 47291d3e2d4SJin Yu if (vq->packed_ring) { 47391d3e2d4SJin Yu uint16_t flags = vq->vring.desc_packed[ 47491d3e2d4SJin Yu vq->last_avail_idx].flags; 47591d3e2d4SJin Yu bool avail_wrap_counter = vq->avail_wrap_counter; 47691d3e2d4SJin Yu 47791d3e2d4SJin Yu return (!!(flags & VIRTQ_DESC_F_AVAIL) == avail_wrap_counter && 47891d3e2d4SJin Yu !!(flags & VIRTQ_DESC_F_USED) != avail_wrap_counter); 479c19beb3fSJin Yu } else { 48091d3e2d4SJin Yu if (vq->vring.avail->idx != vq->last_avail_idx) 48191d3e2d4SJin Yu return 1; 482c19beb3fSJin Yu 48391d3e2d4SJin Yu return 0; 484c19beb3fSJin Yu } 485c19beb3fSJin Yu } 486c19beb3fSJin Yu 487c19beb3fSJin Yu static void 48891d3e2d4SJin Yu process_vq(struct vhost_blk_queue *vq) 489c19beb3fSJin Yu { 490c19beb3fSJin Yu struct vhost_blk_task *task; 491c19beb3fSJin Yu 49291d3e2d4SJin Yu if (vq->packed_ring) { 49391d3e2d4SJin Yu while (vhost_blk_vq_is_avail(vq)) { 49491d3e2d4SJin Yu uint16_t task_idx, req_idx, last_idx, chain_num; 495c19beb3fSJin Yu 49691d3e2d4SJin Yu task_idx = vhost_blk_vq_get_desc_chain_buffer_id(vq, 49791d3e2d4SJin Yu &req_idx, &chain_num); 49891d3e2d4SJin Yu task = &vq->tasks[task_idx]; 499c19beb3fSJin Yu 50091d3e2d4SJin Yu blk_task_init(task); 501c19beb3fSJin Yu task->req_idx = req_idx; 50291d3e2d4SJin Yu task->chain_num = chain_num; 50391d3e2d4SJin Yu task->buffer_id = task_idx; 50491d3e2d4SJin Yu last_idx = (req_idx + chain_num - 1) % vq->vring.size; 505c19beb3fSJin Yu 50691d3e2d4SJin Yu rte_vhost_set_inflight_desc_packed(task->ctrlr->vid, 50791d3e2d4SJin Yu vq->id, 50891d3e2d4SJin Yu task->req_idx, 50991d3e2d4SJin Yu last_idx, 51091d3e2d4SJin Yu &task->inflight_idx); 51191d3e2d4SJin Yu 51291d3e2d4SJin Yu process_blk_task(task); 51391d3e2d4SJin Yu } 51491d3e2d4SJin Yu } else { 51591d3e2d4SJin Yu while (vhost_blk_vq_is_avail(vq)) { 51691d3e2d4SJin Yu uint16_t desc_idx; 51791d3e2d4SJin Yu 51891d3e2d4SJin Yu desc_idx = vq_get_desc_idx(vq); 51991d3e2d4SJin Yu task = &vq->tasks[desc_idx]; 52091d3e2d4SJin Yu 52191d3e2d4SJin Yu blk_task_init(task); 52291d3e2d4SJin Yu rte_vhost_set_inflight_desc_split(task->ctrlr->vid, 52391d3e2d4SJin Yu vq->id, 524c19beb3fSJin Yu task->req_idx); 52591d3e2d4SJin Yu process_blk_task(task); 526c19beb3fSJin Yu } 527c19beb3fSJin Yu } 528c19beb3fSJin Yu } 529c19beb3fSJin Yu 5306b6678a6SThomas Monjalon static uint32_t 531c19beb3fSJin Yu ctrlr_worker(void *arg) 532c19beb3fSJin Yu { 533c19beb3fSJin Yu struct vhost_blk_ctrlr *ctrlr = (struct vhost_blk_ctrlr *)arg; 534c19beb3fSJin Yu int i; 535c19beb3fSJin Yu 536c19beb3fSJin Yu fprintf(stdout, "Ctrlr Worker Thread start\n"); 537c19beb3fSJin Yu 538c19beb3fSJin Yu if (ctrlr == NULL || ctrlr->bdev == NULL) { 539c19beb3fSJin Yu fprintf(stderr, 540c19beb3fSJin Yu "%s: Error, invalid argument passed to worker thread\n", 541c19beb3fSJin Yu __func__); 542c19beb3fSJin Yu exit(0); 543c19beb3fSJin Yu } 544c19beb3fSJin Yu 54591d3e2d4SJin Yu for (i = 0; i < NUM_OF_BLK_QUEUES; i++) 54691d3e2d4SJin Yu submit_inflight_vq(&ctrlr->queues[i]); 547c19beb3fSJin Yu 54891d3e2d4SJin Yu while (worker_thread_status != WORKER_STATE_STOP) 54991d3e2d4SJin Yu for (i = 0; i < NUM_OF_BLK_QUEUES; i++) 55091d3e2d4SJin Yu process_vq(&ctrlr->queues[i]); 551c19beb3fSJin Yu 552c19beb3fSJin Yu fprintf(stdout, "Ctrlr Worker Thread Exiting\n"); 553c19beb3fSJin Yu sem_post(&exit_sem); 5546b6678a6SThomas Monjalon return 0; 555c19beb3fSJin Yu } 556c19beb3fSJin Yu 557c19beb3fSJin Yu static int 55891d3e2d4SJin Yu alloc_task_pool(struct vhost_blk_ctrlr *ctrlr) 55991d3e2d4SJin Yu { 56091d3e2d4SJin Yu struct vhost_blk_queue *vq; 56191d3e2d4SJin Yu int i, j; 56291d3e2d4SJin Yu 56391d3e2d4SJin Yu for (i = 0; i < NUM_OF_BLK_QUEUES; i++) { 56491d3e2d4SJin Yu vq = &ctrlr->queues[i]; 56591d3e2d4SJin Yu 56691d3e2d4SJin Yu vq->tasks = rte_zmalloc(NULL, 56791d3e2d4SJin Yu sizeof(struct vhost_blk_task) * vq->vring.size, 0); 56891d3e2d4SJin Yu if (!vq->tasks) { 56991d3e2d4SJin Yu fprintf(stderr, "Failed to allocate task memory\n"); 57091d3e2d4SJin Yu return -1; 57191d3e2d4SJin Yu } 57291d3e2d4SJin Yu 57391d3e2d4SJin Yu for (j = 0; j < vq->vring.size; j++) { 57491d3e2d4SJin Yu vq->tasks[j].req_idx = j; 57591d3e2d4SJin Yu vq->tasks[j].ctrlr = ctrlr; 57691d3e2d4SJin Yu vq->tasks[j].vq = vq; 57791d3e2d4SJin Yu } 57891d3e2d4SJin Yu } 57991d3e2d4SJin Yu 58091d3e2d4SJin Yu return 0; 58191d3e2d4SJin Yu } 58291d3e2d4SJin Yu 58391d3e2d4SJin Yu static void 58491d3e2d4SJin Yu free_task_pool(struct vhost_blk_ctrlr *ctrlr) 58591d3e2d4SJin Yu { 58691d3e2d4SJin Yu int i; 58791d3e2d4SJin Yu 58891d3e2d4SJin Yu for (i = 0; i < NUM_OF_BLK_QUEUES; i++) 58991d3e2d4SJin Yu rte_free(ctrlr->queues[i].tasks); 59091d3e2d4SJin Yu } 59191d3e2d4SJin Yu 59291d3e2d4SJin Yu static int 593c19beb3fSJin Yu new_device(int vid) 594c19beb3fSJin Yu { 595c19beb3fSJin Yu struct vhost_blk_ctrlr *ctrlr; 59691d3e2d4SJin Yu struct vhost_blk_queue *vq; 59791d3e2d4SJin Yu char path[PATH_MAX]; 598510f43fcSKeiichi Watanabe uint64_t features, protocol_features; 5996b6678a6SThomas Monjalon rte_thread_t tid; 600c19beb3fSJin Yu int i, ret; 601510f43fcSKeiichi Watanabe bool packed_ring, inflight_shmfd; 602c19beb3fSJin Yu 60391d3e2d4SJin Yu ret = rte_vhost_get_ifname(vid, path, PATH_MAX); 60491d3e2d4SJin Yu if (ret) { 60591d3e2d4SJin Yu fprintf(stderr, "Failed to get the socket path\n"); 60691d3e2d4SJin Yu return -1; 60791d3e2d4SJin Yu } 60891d3e2d4SJin Yu 60991d3e2d4SJin Yu ctrlr = vhost_blk_ctrlr_find(path); 610c19beb3fSJin Yu if (!ctrlr) { 61191d3e2d4SJin Yu fprintf(stderr, "Failed to find controller\n"); 612c19beb3fSJin Yu return -1; 613c19beb3fSJin Yu } 614c19beb3fSJin Yu 615c19beb3fSJin Yu if (ctrlr->started) 616c19beb3fSJin Yu return 0; 617c19beb3fSJin Yu 61891d3e2d4SJin Yu ctrlr->vid = vid; 619c19beb3fSJin Yu ret = rte_vhost_get_negotiated_features(vid, &features); 620c19beb3fSJin Yu if (ret) { 62191d3e2d4SJin Yu fprintf(stderr, "Failed to get the negotiated features\n"); 622c19beb3fSJin Yu return -1; 623c19beb3fSJin Yu } 62491d3e2d4SJin Yu packed_ring = !!(features & (1ULL << VIRTIO_F_RING_PACKED)); 625c19beb3fSJin Yu 626510f43fcSKeiichi Watanabe ret = rte_vhost_get_negotiated_protocol_features( 627510f43fcSKeiichi Watanabe vid, &protocol_features); 628510f43fcSKeiichi Watanabe if (ret) { 629510f43fcSKeiichi Watanabe fprintf(stderr, 630510f43fcSKeiichi Watanabe "Failed to get the negotiated protocol features\n"); 631510f43fcSKeiichi Watanabe return -1; 632510f43fcSKeiichi Watanabe } 633510f43fcSKeiichi Watanabe inflight_shmfd = !!(features & 634510f43fcSKeiichi Watanabe (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)); 635510f43fcSKeiichi Watanabe 636c19beb3fSJin Yu /* Disable Notifications and init last idx */ 637c19beb3fSJin Yu for (i = 0; i < NUM_OF_BLK_QUEUES; i++) { 63891d3e2d4SJin Yu vq = &ctrlr->queues[i]; 63991d3e2d4SJin Yu vq->id = i; 640c19beb3fSJin Yu 64191d3e2d4SJin Yu assert(rte_vhost_get_vhost_vring(ctrlr->vid, i, 64291d3e2d4SJin Yu &vq->vring) == 0); 64391d3e2d4SJin Yu assert(rte_vhost_get_vring_base(ctrlr->vid, i, 64491d3e2d4SJin Yu &vq->last_avail_idx, 64591d3e2d4SJin Yu &vq->last_used_idx) == 0); 646510f43fcSKeiichi Watanabe 647510f43fcSKeiichi Watanabe if (inflight_shmfd) 648510f43fcSKeiichi Watanabe assert(rte_vhost_get_vhost_ring_inflight( 649510f43fcSKeiichi Watanabe ctrlr->vid, i, 65091d3e2d4SJin Yu &vq->inflight_ring) == 0); 651c19beb3fSJin Yu 652510f43fcSKeiichi Watanabe if (packed_ring && inflight_shmfd) { 653c19beb3fSJin Yu /* for the reconnection */ 65491d3e2d4SJin Yu assert(rte_vhost_get_vring_base_from_inflight( 65591d3e2d4SJin Yu ctrlr->vid, i, 65691d3e2d4SJin Yu &vq->last_avail_idx, 65791d3e2d4SJin Yu &vq->last_used_idx) == 0); 658c19beb3fSJin Yu 65991d3e2d4SJin Yu vq->avail_wrap_counter = vq->last_avail_idx & 660c19beb3fSJin Yu (1 << 15); 66191d3e2d4SJin Yu vq->last_avail_idx = vq->last_avail_idx & 662c19beb3fSJin Yu 0x7fff; 66391d3e2d4SJin Yu vq->used_wrap_counter = vq->last_used_idx & 664c19beb3fSJin Yu (1 << 15); 66591d3e2d4SJin Yu vq->last_used_idx = vq->last_used_idx & 666c19beb3fSJin Yu 0x7fff; 667c19beb3fSJin Yu } 668c19beb3fSJin Yu 66991d3e2d4SJin Yu vq->packed_ring = packed_ring; 670c19beb3fSJin Yu rte_vhost_enable_guest_notification(vid, i, 0); 671c19beb3fSJin Yu } 672c19beb3fSJin Yu 67391d3e2d4SJin Yu assert(rte_vhost_get_mem_table(vid, &ctrlr->mem) == 0); 67491d3e2d4SJin Yu assert(ctrlr->mem != NULL); 67591d3e2d4SJin Yu assert(alloc_task_pool(ctrlr) == 0); 67691d3e2d4SJin Yu 677c19beb3fSJin Yu /* start polling vring */ 67891d3e2d4SJin Yu worker_thread_status = WORKER_STATE_START; 67991d3e2d4SJin Yu fprintf(stdout, "New Device %s, Device ID %d\n", path, vid); 6806b6678a6SThomas Monjalon if (rte_thread_create_control(&tid, "dpdk-vhost-blk", 681e5fb1a96SChengwen Feng &ctrlr_worker, ctrlr) != 0) { 682c19beb3fSJin Yu fprintf(stderr, "Worker Thread Started Failed\n"); 683c19beb3fSJin Yu return -1; 684c19beb3fSJin Yu } 685c19beb3fSJin Yu 686c19beb3fSJin Yu /* device has been started */ 687c19beb3fSJin Yu ctrlr->started = 1; 6886b6678a6SThomas Monjalon rte_thread_detach(tid); 689c19beb3fSJin Yu return 0; 690c19beb3fSJin Yu } 691c19beb3fSJin Yu 692c19beb3fSJin Yu static void 693c19beb3fSJin Yu destroy_device(int vid) 694c19beb3fSJin Yu { 695c19beb3fSJin Yu char path[PATH_MAX]; 696c19beb3fSJin Yu struct vhost_blk_ctrlr *ctrlr; 69791d3e2d4SJin Yu struct vhost_blk_queue *vq; 698c19beb3fSJin Yu int i, ret; 699c19beb3fSJin Yu 700c19beb3fSJin Yu ret = rte_vhost_get_ifname(vid, path, PATH_MAX); 701c19beb3fSJin Yu if (ret) { 702c19beb3fSJin Yu fprintf(stderr, "Destroy Ctrlr Failed\n"); 703c19beb3fSJin Yu return; 704c19beb3fSJin Yu } 705c19beb3fSJin Yu 706c19beb3fSJin Yu fprintf(stdout, "Destroy %s Device ID %d\n", path, vid); 707c19beb3fSJin Yu ctrlr = vhost_blk_ctrlr_find(path); 708c19beb3fSJin Yu if (!ctrlr) { 709c19beb3fSJin Yu fprintf(stderr, "Destroy Ctrlr Failed\n"); 710c19beb3fSJin Yu return; 711c19beb3fSJin Yu } 712c19beb3fSJin Yu 713c19beb3fSJin Yu if (!ctrlr->started) 714c19beb3fSJin Yu return; 715c19beb3fSJin Yu 71691d3e2d4SJin Yu worker_thread_status = WORKER_STATE_STOP; 71791d3e2d4SJin Yu sem_wait(&exit_sem); 718c19beb3fSJin Yu 719c19beb3fSJin Yu for (i = 0; i < NUM_OF_BLK_QUEUES; i++) { 72091d3e2d4SJin Yu vq = &ctrlr->queues[i]; 72191d3e2d4SJin Yu if (vq->packed_ring) { 72291d3e2d4SJin Yu vq->last_avail_idx |= (vq->avail_wrap_counter << 723c19beb3fSJin Yu 15); 72491d3e2d4SJin Yu vq->last_used_idx |= (vq->used_wrap_counter << 725c19beb3fSJin Yu 15); 726c19beb3fSJin Yu } 727c19beb3fSJin Yu 72891d3e2d4SJin Yu rte_vhost_set_vring_base(ctrlr->vid, i, 72991d3e2d4SJin Yu vq->last_avail_idx, 73091d3e2d4SJin Yu vq->last_used_idx); 73191d3e2d4SJin Yu } 73291d3e2d4SJin Yu 73391d3e2d4SJin Yu free_task_pool(ctrlr); 734c19beb3fSJin Yu free(ctrlr->mem); 735c19beb3fSJin Yu 736c19beb3fSJin Yu ctrlr->started = 0; 737c19beb3fSJin Yu } 738c19beb3fSJin Yu 739c19beb3fSJin Yu static int 740c19beb3fSJin Yu new_connection(int vid) 741c19beb3fSJin Yu { 742c19beb3fSJin Yu /* extend the proper features for block device */ 743c19beb3fSJin Yu vhost_session_install_rte_compat_hooks(vid); 744c19beb3fSJin Yu 745c19beb3fSJin Yu return 0; 746c19beb3fSJin Yu } 747c19beb3fSJin Yu 748ab4bb424SMaxime Coquelin struct rte_vhost_device_ops vhost_blk_device_ops = { 749c19beb3fSJin Yu .new_device = new_device, 750c19beb3fSJin Yu .destroy_device = destroy_device, 751c19beb3fSJin Yu .new_connection = new_connection, 752c19beb3fSJin Yu }; 753c19beb3fSJin Yu 754c19beb3fSJin Yu static struct vhost_block_dev * 755c19beb3fSJin Yu vhost_blk_bdev_construct(const char *bdev_name, 756c19beb3fSJin Yu const char *bdev_serial, uint32_t blk_size, uint64_t blk_cnt, 757c19beb3fSJin Yu bool wce_enable) 758c19beb3fSJin Yu { 759c19beb3fSJin Yu struct vhost_block_dev *bdev; 760c19beb3fSJin Yu 761c19beb3fSJin Yu bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE); 762c19beb3fSJin Yu if (!bdev) 763c19beb3fSJin Yu return NULL; 764c19beb3fSJin Yu 765e2b4cfd6SJin Yu snprintf(bdev->name, sizeof(bdev->name), "%s", bdev_name); 766e2b4cfd6SJin Yu snprintf(bdev->product_name, sizeof(bdev->product_name), "%s", 767e2b4cfd6SJin Yu bdev_serial); 768c19beb3fSJin Yu bdev->blocklen = blk_size; 769c19beb3fSJin Yu bdev->blockcnt = blk_cnt; 770c19beb3fSJin Yu bdev->write_cache = wce_enable; 771c19beb3fSJin Yu 77291d3e2d4SJin Yu fprintf(stdout, "Blocklen=%d, blockcnt=%"PRIx64"\n", bdev->blocklen, 773c19beb3fSJin Yu bdev->blockcnt); 774c19beb3fSJin Yu 775c19beb3fSJin Yu /* use memory as disk storage space */ 776c19beb3fSJin Yu bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0); 777c19beb3fSJin Yu if (!bdev->data) { 77891d3e2d4SJin Yu fprintf(stderr, "No enough reserved huge memory for disk\n"); 779*ae67f7d0SStephen Hemminger rte_free(bdev); 780c19beb3fSJin Yu return NULL; 781c19beb3fSJin Yu } 782c19beb3fSJin Yu 783c19beb3fSJin Yu return bdev; 784c19beb3fSJin Yu } 785c19beb3fSJin Yu 786c19beb3fSJin Yu static struct vhost_blk_ctrlr * 787c19beb3fSJin Yu vhost_blk_ctrlr_construct(const char *ctrlr_name) 788c19beb3fSJin Yu { 789c19beb3fSJin Yu int ret; 790c19beb3fSJin Yu struct vhost_blk_ctrlr *ctrlr; 791c19beb3fSJin Yu char *path; 792c19beb3fSJin Yu char cwd[PATH_MAX]; 793c19beb3fSJin Yu 794c19beb3fSJin Yu /* always use current directory */ 795c19beb3fSJin Yu path = getcwd(cwd, PATH_MAX); 796c19beb3fSJin Yu if (!path) { 797c19beb3fSJin Yu fprintf(stderr, "Cannot get current working directory\n"); 798c19beb3fSJin Yu return NULL; 799c19beb3fSJin Yu } 800c19beb3fSJin Yu snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name); 801c19beb3fSJin Yu 80271dd2870SJin Yu unlink(dev_pathname); 803c19beb3fSJin Yu 804c19beb3fSJin Yu if (rte_vhost_driver_register(dev_pathname, 0) != 0) { 80591d3e2d4SJin Yu fprintf(stderr, "Socket %s already exists\n", dev_pathname); 806c19beb3fSJin Yu return NULL; 807c19beb3fSJin Yu } 808c19beb3fSJin Yu 809c19beb3fSJin Yu ret = rte_vhost_driver_set_features(dev_pathname, VHOST_BLK_FEATURES); 810c19beb3fSJin Yu if (ret != 0) { 811c19beb3fSJin Yu fprintf(stderr, "Set vhost driver features failed\n"); 812c19beb3fSJin Yu rte_vhost_driver_unregister(dev_pathname); 813c19beb3fSJin Yu return NULL; 814c19beb3fSJin Yu } 815c19beb3fSJin Yu 81691d3e2d4SJin Yu /* set vhost user protocol features */ 817c19beb3fSJin Yu vhost_dev_install_rte_compat_hooks(dev_pathname); 818c19beb3fSJin Yu 819c19beb3fSJin Yu ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE); 820c19beb3fSJin Yu if (!ctrlr) { 821c19beb3fSJin Yu rte_vhost_driver_unregister(dev_pathname); 822c19beb3fSJin Yu return NULL; 823c19beb3fSJin Yu } 824c19beb3fSJin Yu 825c19beb3fSJin Yu /* hardcoded block device information with 128MiB */ 826c19beb3fSJin Yu ctrlr->bdev = vhost_blk_bdev_construct("malloc0", "vhost_blk_malloc0", 827c19beb3fSJin Yu 4096, 32768, 0); 828c19beb3fSJin Yu if (!ctrlr->bdev) { 829c19beb3fSJin Yu rte_free(ctrlr); 830c19beb3fSJin Yu rte_vhost_driver_unregister(dev_pathname); 831c19beb3fSJin Yu return NULL; 832c19beb3fSJin Yu } 833c19beb3fSJin Yu 834c19beb3fSJin Yu rte_vhost_driver_callback_register(dev_pathname, 835c19beb3fSJin Yu &vhost_blk_device_ops); 836c19beb3fSJin Yu 837c19beb3fSJin Yu return ctrlr; 838c19beb3fSJin Yu } 839c19beb3fSJin Yu 840c19beb3fSJin Yu static void 84191d3e2d4SJin Yu vhost_blk_ctrlr_destroy(struct vhost_blk_ctrlr *ctrlr) 84291d3e2d4SJin Yu { 84391d3e2d4SJin Yu if (ctrlr->bdev != NULL) { 84491d3e2d4SJin Yu rte_free(ctrlr->bdev->data); 84591d3e2d4SJin Yu 84691d3e2d4SJin Yu rte_free(ctrlr->bdev); 84791d3e2d4SJin Yu } 84891d3e2d4SJin Yu rte_free(ctrlr); 84991d3e2d4SJin Yu 85091d3e2d4SJin Yu rte_vhost_driver_unregister(dev_pathname); 85191d3e2d4SJin Yu } 85291d3e2d4SJin Yu 85391d3e2d4SJin Yu static void 854c19beb3fSJin Yu signal_handler(__rte_unused int signum) 855c19beb3fSJin Yu { 856c19beb3fSJin Yu struct vhost_blk_ctrlr *ctrlr; 857c19beb3fSJin Yu 858c19beb3fSJin Yu ctrlr = vhost_blk_ctrlr_find(dev_pathname); 85991d3e2d4SJin Yu if (ctrlr == NULL) 86091d3e2d4SJin Yu return; 861c19beb3fSJin Yu 86291d3e2d4SJin Yu if (ctrlr->started) 86391d3e2d4SJin Yu destroy_device(ctrlr->vid); 86491d3e2d4SJin Yu 86591d3e2d4SJin Yu vhost_blk_ctrlr_destroy(ctrlr); 866c19beb3fSJin Yu exit(0); 867c19beb3fSJin Yu } 868c19beb3fSJin Yu 869c19beb3fSJin Yu int main(int argc, char *argv[]) 870c19beb3fSJin Yu { 871c19beb3fSJin Yu int ret; 872c19beb3fSJin Yu 873c19beb3fSJin Yu /* init EAL */ 874c19beb3fSJin Yu ret = rte_eal_init(argc, argv); 875c19beb3fSJin Yu if (ret < 0) 876c19beb3fSJin Yu rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 877c19beb3fSJin Yu 87891d3e2d4SJin Yu g_vhost_ctrlr = vhost_blk_ctrlr_construct(CTRLR_NAME); 879c19beb3fSJin Yu if (g_vhost_ctrlr == NULL) { 880c19beb3fSJin Yu fprintf(stderr, "Construct vhost blk controller failed\n"); 881c19beb3fSJin Yu return 0; 882c19beb3fSJin Yu } 883c19beb3fSJin Yu 884c19beb3fSJin Yu if (sem_init(&exit_sem, 0, 0) < 0) { 885c19beb3fSJin Yu fprintf(stderr, "Error init exit_sem\n"); 886c19beb3fSJin Yu return -1; 887c19beb3fSJin Yu } 888c19beb3fSJin Yu 88991d3e2d4SJin Yu signal(SIGINT, signal_handler); 89091d3e2d4SJin Yu 891dc9e6580SYunjian Wang ret = rte_vhost_driver_start(dev_pathname); 892dc9e6580SYunjian Wang if (ret < 0) { 893dc9e6580SYunjian Wang fprintf(stderr, "Failed to start vhost driver.\n"); 894dc9e6580SYunjian Wang return -1; 895dc9e6580SYunjian Wang } 896c19beb3fSJin Yu 897c19beb3fSJin Yu /* loop for exit the application */ 898c19beb3fSJin Yu while (1) 899c19beb3fSJin Yu sleep(1); 900c19beb3fSJin Yu 90110aa3757SChengchang Tang /* clean up the EAL */ 90210aa3757SChengchang Tang rte_eal_cleanup(); 90310aa3757SChengchang Tang 904c19beb3fSJin Yu return 0; 905c19beb3fSJin Yu } 906