1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <linux/virtio_ring.h> 35 #include <linux/virtio_scsi.h> 36 #include <stdint.h> 37 #include <sys/eventfd.h> 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/stat.h> 41 #include <unistd.h> 42 #include <semaphore.h> 43 44 #include <rte_config.h> 45 #include <rte_malloc.h> 46 #include <rte_virtio_net.h> 47 #include <vhost.h> 48 #include <vhost_user.h> 49 50 #include "spdk_internal/log.h" 51 #include "spdk/env.h" 52 #include "spdk/scsi.h" 53 #include "spdk/conf.h" 54 #include "spdk/event.h" 55 #include "spdk/scsi_spec.h" 56 57 #include "spdk/vhost.h" 58 #include "task.h" 59 60 static uint32_t g_num_ctrlrs[RTE_MAX_LCORE]; 61 62 #define CONTROLQ_POLL_PERIOD_US (1000 * 5) 63 64 #define VIRTIO_SCSI_CONTROLQ 0 65 #define VIRTIO_SCSI_EVENTQ 1 66 #define VIRTIO_SCSI_REQUESTQ 2 67 68 /* Path to folder where character device will be created. Can be set by user. */ 69 static char dev_dirname[PATH_MAX] = ""; 70 71 struct spdk_vaddr_region { 72 void *vaddr; 73 uint64_t len; 74 }; 75 76 /* 77 * Device linked list structure for data path. 78 */ 79 struct spdk_vhost_scsi_ctrlr { 80 char *name; 81 /**< Pointer to device created by vhost lib. */ 82 struct virtio_net *dev; 83 84 struct spdk_vaddr_region region[VHOST_MEMORY_MAX_NREGIONS]; 85 uint32_t nregions; 86 87 /**< TODO make this an array of spdk_scsi_devs. The vhost scsi 88 * request will tell us which scsi_dev to use. 89 */ 90 struct spdk_scsi_dev *scsi_dev[SPDK_VHOST_SCSI_CTRLR_MAX_DEVS]; 91 92 int task_cnt; 93 94 struct spdk_poller *requestq_poller; 95 struct spdk_poller *controlq_poller; 96 97 int32_t lcore; 98 99 uint64_t cpumask; 100 } __rte_cache_aligned; 101 102 /* This maps from the integer index passed by DPDK to the our controller representation. */ 103 /* MAX_VHOST_DEVICE from DPDK. */ 104 static struct spdk_vhost_scsi_ctrlr *dpdk_vid_mapping[MAX_VHOST_DEVICE]; 105 106 /* 107 * Get available requests from avail ring. 108 */ 109 static uint16_t 110 vq_avail_ring_get(struct vhost_virtqueue *vq, uint16_t *reqs, uint16_t reqs_len) 111 { 112 struct vring_avail *avail = vq->avail; 113 uint16_t size_mask = vq->size - 1; 114 uint16_t last_idx = vq->last_avail_idx, avail_idx = avail->idx; 115 uint16_t count = RTE_MIN((avail_idx - last_idx) & size_mask, reqs_len); 116 uint16_t i; 117 118 vq->last_avail_idx += count; 119 for (i = 0; i < count; i++) { 120 reqs[i] = vq->avail->ring[(last_idx + i) & size_mask]; 121 } 122 123 SPDK_TRACELOG(SPDK_TRACE_VHOST_RING, 124 "AVAIL: last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n", 125 last_idx, avail_idx, count); 126 127 return count; 128 } 129 130 /* 131 * Enqueue id and len to used ring. 132 */ 133 static void 134 vq_used_ring_enqueue(struct vhost_virtqueue *vq, uint16_t id, uint32_t len) 135 { 136 struct vring_used *used = vq->used; 137 uint16_t size_mask = vq->size - 1; 138 uint16_t last_idx = vq->last_used_idx; 139 140 SPDK_TRACELOG(SPDK_TRACE_VHOST_RING, "USED: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n", 141 last_idx, id, len); 142 143 vq->last_used_idx++; 144 last_idx &= size_mask; 145 146 used->ring[last_idx].id = id; 147 used->ring[last_idx].len = len; 148 149 rte_compiler_barrier(); 150 151 vq->used->idx = vq->last_used_idx; 152 eventfd_write(vq->callfd, (eventfd_t)1); 153 } 154 155 static bool 156 vring_desc_has_next(struct vring_desc *cur_desc) 157 { 158 return !!(cur_desc->flags & VRING_DESC_F_NEXT); 159 } 160 161 static struct vring_desc * 162 vring_desc_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc) 163 { 164 assert(vring_desc_has_next(cur_desc)); 165 return &vq_desc[cur_desc->next]; 166 } 167 168 static bool 169 vring_desc_is_wr(struct vring_desc *cur_desc) 170 { 171 return !!(cur_desc->flags & VRING_DESC_F_WRITE); 172 } 173 174 static void task_submit(struct spdk_vhost_task *task); 175 static int process_request(struct spdk_vhost_task *task); 176 static void invalid_request(struct spdk_vhost_task *task); 177 178 static void 179 submit_completion(struct spdk_vhost_task *task) 180 { 181 struct iovec *iovs = NULL; 182 int result; 183 184 vq_used_ring_enqueue(task->vq, task->req_idx, task->scsi.data_transferred); 185 SPDK_TRACELOG(SPDK_TRACE_VHOST, "Finished task (%p) req_idx=%d\n", task, task->req_idx); 186 187 if (task->scsi.iovs != &task->scsi.iov) { 188 iovs = task->scsi.iovs; 189 task->scsi.iovs = &task->scsi.iov; 190 task->scsi.iovcnt = 1; 191 } 192 193 spdk_vhost_task_put(task); 194 195 if (!iovs) { 196 return; 197 } 198 199 while (1) { 200 task = spdk_vhost_dequeue_task(); 201 if (!task) { 202 spdk_vhost_iovec_free(iovs); 203 break; 204 } 205 206 /* Set iovs so underlying functions will not try to alloc IOV */ 207 task->scsi.iovs = iovs; 208 task->scsi.iovcnt = VHOST_SCSI_IOVS_LEN; 209 210 result = process_request(task); 211 if (result == 0) { 212 task_submit(task); 213 break; 214 } else { 215 task->scsi.iovs = &task->scsi.iov; 216 task->scsi.iovcnt = 1; 217 invalid_request(task); 218 } 219 } 220 } 221 222 static void 223 process_mgmt_task_completion(void *arg1, void *arg2) 224 { 225 struct spdk_vhost_task *task = arg1; 226 227 submit_completion(task); 228 } 229 230 static void 231 process_task_completion(void *arg1, void *arg2) 232 { 233 struct spdk_vhost_task *task = arg1; 234 235 /* The SCSI task has completed. Do final processing and then post 236 notification to the virtqueue's "used" ring. 237 */ 238 task->resp->status = task->scsi.status; 239 240 if (task->scsi.status != SPDK_SCSI_STATUS_GOOD) { 241 memcpy(task->resp->sense, task->scsi.sense_data, task->scsi.sense_data_len); 242 task->resp->sense_len = task->scsi.sense_data_len; 243 } 244 task->resp->resid = task->scsi.transfer_len - task->scsi.data_transferred; 245 246 submit_completion(task); 247 } 248 249 static void 250 task_submit(struct spdk_vhost_task *task) 251 { 252 /* The task is ready to be submitted. First create the callback event that 253 will be invoked when the SCSI command is completed. See process_task_completion() 254 for what SPDK vhost-scsi does when the task is completed. 255 */ 256 257 task->resp->response = VIRTIO_SCSI_S_OK; 258 task->scsi.cb_event = spdk_event_allocate(rte_lcore_id(), 259 process_task_completion, 260 task, NULL); 261 spdk_scsi_dev_queue_task(task->scsi_dev, &task->scsi); 262 } 263 264 static void 265 mgmt_task_submit(struct spdk_vhost_task *task) 266 { 267 task->tmf_resp->response = VIRTIO_SCSI_S_OK; 268 task->scsi.cb_event = spdk_event_allocate(rte_lcore_id(), 269 process_mgmt_task_completion, 270 task, NULL); 271 spdk_scsi_dev_queue_mgmt_task(task->scsi_dev, &task->scsi); 272 } 273 274 static void 275 invalid_request(struct spdk_vhost_task *task) 276 { 277 vq_used_ring_enqueue(task->vq, task->req_idx, 0); 278 spdk_vhost_task_put(task); 279 280 SPDK_TRACELOG(SPDK_TRACE_VHOST, "Invalid request (status=%" PRIu8")\n", 281 task->resp ? task->resp->response : -1); 282 } 283 284 static struct spdk_scsi_dev * 285 get_scsi_dev(struct spdk_vhost_scsi_ctrlr *vdev, const __u8 *lun) 286 { 287 SPDK_TRACEDUMP(SPDK_TRACE_VHOST_QUEUE, "LUN", lun, 8); 288 /* First byte must be 1 and second is target */ 289 if (lun[0] != 1 || lun[1] >= SPDK_VHOST_SCSI_CTRLR_MAX_DEVS) 290 return NULL; 291 292 return vdev->scsi_dev[lun[1]]; 293 } 294 295 static struct spdk_scsi_lun * 296 get_scsi_lun(struct spdk_scsi_dev *scsi_dev, const __u8 *lun) 297 { 298 uint16_t lun_id = (((uint16_t)lun[2] << 8) | lun[3]) & 0x3FFF; 299 300 /* For now only one LUN per controller is allowed so no need to search LUN IDs*/ 301 return likely(scsi_dev != NULL && lun_id < scsi_dev->maxlun) ? scsi_dev->lun[lun_id] : NULL; 302 } 303 304 static void 305 process_ctrl_request(struct spdk_vhost_scsi_ctrlr *vdev, struct vhost_virtqueue *controlq, 306 uint16_t req_idx) 307 { 308 struct spdk_vhost_task *task; 309 310 struct vring_desc *desc; 311 struct virtio_scsi_ctrl_tmf_req *ctrl_req; 312 struct virtio_scsi_ctrl_an_resp *an_resp; 313 314 desc = &controlq->desc[req_idx]; 315 ctrl_req = (void *)gpa_to_vva(vdev->dev, desc->addr); 316 317 SPDK_TRACELOG(SPDK_TRACE_VHOST_QUEUE, 318 "Processing controlq descriptor: desc %d/%p, desc_addr %p, len %d, flags %d, last_used_idx %d; enabled %d; kickfd %d; size %d\n", 319 req_idx, desc, (void *)desc->addr, desc->len, desc->flags, controlq->last_used_idx, 320 controlq->enabled, controlq->kickfd, controlq->size); 321 SPDK_TRACEDUMP(SPDK_TRACE_VHOST_QUEUE, "Request desriptor", (uint8_t *)ctrl_req, 322 desc->len); 323 324 task = spdk_vhost_task_get(&vdev->task_cnt); 325 task->vq = controlq; 326 task->vdev = vdev; 327 task->req_idx = req_idx; 328 task->scsi_dev = get_scsi_dev(task->vdev, ctrl_req->lun); 329 330 /* Process the TMF request */ 331 switch (ctrl_req->type) { 332 case VIRTIO_SCSI_T_TMF: 333 /* Get the response buffer */ 334 assert(vring_desc_has_next(desc)); 335 desc = vring_desc_get_next(controlq->desc, desc); 336 task->tmf_resp = (void *)gpa_to_vva(vdev->dev, desc->addr); 337 338 /* Check if we are processing a valid request */ 339 if (task->scsi_dev == NULL) { 340 task->tmf_resp->response = VIRTIO_SCSI_S_BAD_TARGET; 341 break; 342 } 343 344 switch (ctrl_req->subtype) { 345 case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: 346 /* Handle LUN reset */ 347 SPDK_TRACELOG(SPDK_TRACE_VHOST_QUEUE, "LUN reset\n"); 348 task->scsi.type = SPDK_SCSI_TASK_TYPE_MANAGE; 349 task->scsi.function = SPDK_SCSI_TASK_FUNC_LUN_RESET; 350 task->scsi.lun = get_scsi_lun(task->scsi_dev, ctrl_req->lun); 351 352 mgmt_task_submit(task); 353 return; 354 default: 355 task->tmf_resp->response = VIRTIO_SCSI_S_ABORTED; 356 /* Unsupported command */ 357 SPDK_TRACELOG(SPDK_TRACE_VHOST_QUEUE, "Unsupported TMF command %x\n", ctrl_req->subtype); 358 break; 359 } 360 break; 361 case VIRTIO_SCSI_T_AN_QUERY: 362 case VIRTIO_SCSI_T_AN_SUBSCRIBE: { 363 desc = vring_desc_get_next(controlq->desc, desc); 364 an_resp = (void *)gpa_to_vva(vdev->dev, desc->addr); 365 an_resp->response = VIRTIO_SCSI_S_ABORTED; 366 break; 367 } 368 default: 369 SPDK_TRACELOG(SPDK_TRACE_VHOST_QUEUE, "Unsupported control command %x\n", ctrl_req->type); 370 break; 371 } 372 373 vq_used_ring_enqueue(controlq, req_idx, 0); 374 spdk_vhost_task_put(task); 375 } 376 377 /* 378 * Process task's descriptor chain and setup data related fields. 379 * Return 380 * -1 if request is invalid and must be aborted, 381 * 0 if all data are set, 382 * 1 if it was not possible to allocate IO vector for this task. 383 */ 384 static int 385 task_data_setup(struct spdk_vhost_task *task, 386 struct virtio_scsi_cmd_req **req) 387 { 388 struct vhost_virtqueue *vq = task->vq; 389 struct virtio_net *dev = task->vdev->dev; 390 struct vring_desc *desc = &task->vq->desc[task->req_idx]; 391 struct iovec *iovs = task->scsi.iovs; 392 uint16_t iovcnt = 0, iovcnt_max = task->scsi.iovcnt; 393 uint32_t len = 0; 394 395 assert(iovcnt_max == 1 || iovcnt_max == VHOST_SCSI_IOVS_LEN); 396 397 /* Sanity check. First descriptor must be readable and must have next one. */ 398 if (unlikely(vring_desc_is_wr(desc) || !vring_desc_has_next(desc))) { 399 SPDK_WARNLOG("Invalid first (request) descriptor.\n"); 400 task->resp = NULL; 401 goto abort_task; 402 } 403 404 *req = (void *)gpa_to_vva(dev, desc->addr); 405 406 desc = vring_desc_get_next(vq->desc, desc); 407 task->scsi.dxfer_dir = vring_desc_is_wr(desc) ? SPDK_SCSI_DIR_FROM_DEV : SPDK_SCSI_DIR_TO_DEV; 408 409 if (task->scsi.dxfer_dir == SPDK_SCSI_DIR_FROM_DEV) { 410 /* 411 * FROM_DEV (READ): [RD_req][WR_resp][WR_buf0]...[WR_bufN] 412 */ 413 task->resp = (void *)gpa_to_vva(dev, desc->addr); 414 if (!vring_desc_has_next(desc)) { 415 /* 416 * TEST UNIT READY command and some others might not contain any payload and this is not an error. 417 */ 418 SPDK_TRACELOG(SPDK_TRACE_VHOST_DATA, 419 "No payload descriptors for FROM DEV command req_idx=%"PRIu16".\n", task->req_idx); 420 SPDK_TRACEDUMP(SPDK_TRACE_VHOST_DATA, "CDB=", (*req)->cdb, VIRTIO_SCSI_CDB_SIZE); 421 task->scsi.iovcnt = 1; 422 task->scsi.iovs[0].iov_len = 0; 423 task->scsi.length = 0; 424 task->scsi.transfer_len = 0; 425 return 0; 426 } 427 428 desc = vring_desc_get_next(vq->desc, desc); 429 if (iovcnt_max != VHOST_SCSI_IOVS_LEN && vring_desc_has_next(desc)) { 430 iovs = spdk_vhost_iovec_alloc(); 431 if (iovs == NULL) { 432 return 1; 433 } 434 435 iovcnt_max = VHOST_SCSI_IOVS_LEN; 436 } 437 438 /* All remaining descriptors are data. */ 439 while (iovcnt < iovcnt_max) { 440 iovs[iovcnt].iov_base = (void *)gpa_to_vva(dev, desc->addr); 441 iovs[iovcnt].iov_len = desc->len; 442 len += desc->len; 443 iovcnt++; 444 445 if (!vring_desc_has_next(desc)) 446 break; 447 448 desc = vring_desc_get_next(vq->desc, desc); 449 if (unlikely(!vring_desc_is_wr(desc))) { 450 SPDK_WARNLOG("FROM DEV cmd: descriptor nr %" PRIu16" in payload chain is read only.\n", iovcnt); 451 task->resp = NULL; 452 goto abort_task; 453 } 454 } 455 } else { 456 SPDK_TRACELOG(SPDK_TRACE_VHOST_DATA, "TO DEV"); 457 /* 458 * TO_DEV (WRITE):[RD_req][RD_buf0]...[RD_bufN][WR_resp] 459 * No need to check descriptor WR flag as this is done while setting scsi.dxfer_dir. 460 */ 461 462 if (iovcnt_max != VHOST_SCSI_IOVS_LEN && vring_desc_has_next(desc)) { 463 /* If next descriptor is not for response, allocate iovs. */ 464 if (!vring_desc_is_wr(vring_desc_get_next(vq->desc, desc))) { 465 iovs = spdk_vhost_iovec_alloc(); 466 467 if (iovs == NULL) { 468 return 1; 469 } 470 471 iovcnt_max = VHOST_SCSI_IOVS_LEN; 472 } 473 } 474 475 /* Process descriptors up to response. */ 476 while (!vring_desc_is_wr(desc) && iovcnt < iovcnt_max) { 477 iovs[iovcnt].iov_base = (void *)gpa_to_vva(dev, desc->addr); 478 iovs[iovcnt].iov_len = desc->len; 479 len += desc->len; 480 iovcnt++; 481 482 if (!vring_desc_has_next(desc)) { 483 SPDK_WARNLOG("TO_DEV cmd: no response descriptor.\n"); 484 task->resp = NULL; 485 goto abort_task; 486 } 487 488 desc = vring_desc_get_next(vq->desc, desc); 489 } 490 491 task->resp = (void *)gpa_to_vva(dev, desc->addr); 492 if (vring_desc_has_next(desc)) { 493 SPDK_WARNLOG("TO_DEV cmd: ignoring unexpected descriptors after response descriptor.\n"); 494 } 495 } 496 497 if (iovcnt_max > 1 && iovcnt == iovcnt_max) { 498 SPDK_WARNLOG("Too many IO vectors in chain!\n"); 499 goto abort_task; 500 } 501 502 task->scsi.iovs = iovs; 503 task->scsi.iovcnt = iovcnt; 504 task->scsi.length = len; 505 task->scsi.transfer_len = len; 506 return 0; 507 508 abort_task: 509 if (iovs != task->scsi.iovs) { 510 spdk_vhost_iovec_free(iovs); 511 } 512 513 if (task->resp) { 514 task->resp->response = VIRTIO_SCSI_S_ABORTED; 515 } 516 517 return -1; 518 } 519 520 static int 521 process_request(struct spdk_vhost_task *task) 522 { 523 struct virtio_scsi_cmd_req *req; 524 int result; 525 526 result = task_data_setup(task, &req); 527 if (result) { 528 return result; 529 } 530 531 task->scsi_dev = get_scsi_dev(task->vdev, req->lun); 532 if (unlikely(task->scsi_dev == NULL)) { 533 task->resp->response = VIRTIO_SCSI_S_BAD_TARGET; 534 return -1; 535 } 536 537 task->scsi.lun = get_scsi_lun(task->scsi_dev, req->lun); 538 task->scsi.cdb = req->cdb; 539 task->scsi.target_port = spdk_scsi_dev_find_port_by_id(task->scsi_dev, 0); 540 SPDK_TRACEDUMP(SPDK_TRACE_VHOST_DATA, "request CDB", req->cdb, VIRTIO_SCSI_CDB_SIZE); 541 return 0; 542 } 543 544 static void 545 process_controlq(struct spdk_vhost_scsi_ctrlr *vdev, struct vhost_virtqueue *vq) 546 { 547 uint16_t reqs[32]; 548 uint16_t reqs_cnt, i; 549 550 reqs_cnt = vq_avail_ring_get(vq, reqs, RTE_DIM(reqs)); 551 for (i = 0; i < reqs_cnt; i++) { 552 process_ctrl_request(vdev, vq, reqs[i]); 553 } 554 } 555 556 static void 557 process_requestq(struct spdk_vhost_scsi_ctrlr *vdev, struct vhost_virtqueue *vq) 558 { 559 uint16_t reqs[32]; 560 uint16_t reqs_cnt, i; 561 struct spdk_vhost_task *task; 562 int result; 563 564 reqs_cnt = vq_avail_ring_get(vq, reqs, RTE_DIM(reqs)); 565 assert(reqs_cnt <= 32); 566 567 for (i = 0; i < reqs_cnt; i++) { 568 task = spdk_vhost_task_get(&vdev->task_cnt); 569 570 SPDK_TRACELOG(SPDK_TRACE_VHOST, "====== Starting processing request idx %"PRIu16"======\n", 571 reqs[i]); 572 task->vq = vq; 573 task->vdev = vdev; 574 task->req_idx = reqs[i]; 575 result = process_request(task); 576 if (likely(result == 0)) { 577 task_submit(task); 578 SPDK_TRACELOG(SPDK_TRACE_VHOST, "====== Task %p req_idx %d submitted ======\n", task, 579 task->req_idx); 580 } else if (result > 0) { 581 spdk_vhost_enqueue_task(task); 582 SPDK_TRACELOG(SPDK_TRACE_VHOST, "====== Task %p req_idx %d deferred ======\n", task, task->req_idx); 583 } else { 584 invalid_request(task); 585 SPDK_TRACELOG(SPDK_TRACE_VHOST, "====== Task %p req_idx %d failed ======\n", task, task->req_idx); 586 } 587 } 588 } 589 590 static void 591 vdev_controlq_worker(void *arg) 592 { 593 struct spdk_vhost_scsi_ctrlr *vdev = arg; 594 595 process_controlq(vdev, vdev->dev->virtqueue[VIRTIO_SCSI_CONTROLQ]); 596 } 597 598 static void 599 vdev_worker(void *arg) 600 { 601 struct spdk_vhost_scsi_ctrlr *vdev = arg; 602 uint32_t q_idx; 603 604 for (q_idx = VIRTIO_SCSI_REQUESTQ; q_idx < vdev->dev->num_queues; q_idx++) { 605 process_requestq(vdev, vdev->dev->virtqueue[q_idx]); 606 } 607 } 608 609 #define SHIFT_2MB 21 610 #define SIZE_2MB (1ULL << SHIFT_2MB) 611 #define FLOOR_2MB(x) (((uintptr_t)x) / SIZE_2MB) << SHIFT_2MB 612 #define CEIL_2MB(x) ((((uintptr_t)x) + SIZE_2MB - 1) / SIZE_2MB) << SHIFT_2MB 613 614 static void 615 vdev_event_done_cb(void *arg1, void *arg2) 616 { 617 sem_post((sem_t *)arg2); 618 } 619 620 static struct spdk_event * 621 vhost_sem_event_alloc(uint32_t core, spdk_event_fn fn, void *arg1, sem_t *sem) 622 { 623 if (sem_init(sem, 0, 0) < 0) 624 rte_panic("Failed to initialize semaphore."); 625 626 return spdk_event_allocate(core, fn, arg1, sem); 627 } 628 629 static int 630 vhost_sem_timedwait(sem_t *sem, unsigned sec) 631 { 632 struct timespec timeout; 633 int rc; 634 635 clock_gettime(CLOCK_REALTIME, &timeout); 636 timeout.tv_sec += sec; 637 638 rc = sem_timedwait(sem, &timeout); 639 sem_destroy(sem); 640 641 return rc; 642 } 643 644 static void 645 add_vdev_cb(void *arg1, void *arg2) 646 { 647 struct spdk_vhost_scsi_ctrlr *vdev = arg1; 648 struct virtio_memory_region *region; 649 uint32_t i; 650 651 for (i = 0; i < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS; i++) { 652 if (vdev->scsi_dev[i] == NULL) { 653 continue; 654 } 655 spdk_scsi_dev_allocate_io_channels(vdev->scsi_dev[i]); 656 } 657 SPDK_NOTICELOG("Started poller for vhost controller %s on lcore %d\n", vdev->name, vdev->lcore); 658 vdev->nregions = vdev->dev->mem->nregions; 659 for (i = 0; i < vdev->nregions; i++) { 660 uint64_t start, end, len; 661 region = &vdev->dev->mem->regions[i]; 662 start = FLOOR_2MB(region->mmap_addr); 663 end = CEIL_2MB(region->mmap_addr + region->mmap_size); 664 len = end - start; 665 vdev->region[i].vaddr = (void *)start; 666 vdev->region[i].len = len; 667 SPDK_NOTICELOG("Registering VM memory for vtophys translation - 0x%jx len:0x%jx\n", 668 start, len); 669 spdk_mem_register(vdev->region[i].vaddr, vdev->region[i].len); 670 } 671 672 spdk_poller_register(&vdev->requestq_poller, vdev_worker, vdev, vdev->lcore, 0); 673 spdk_poller_register(&vdev->controlq_poller, vdev_controlq_worker, vdev, vdev->lcore, 674 CONTROLQ_POLL_PERIOD_US); 675 sem_post((sem_t *)arg2); 676 } 677 678 static void 679 remove_vdev_cb(void *arg1, void *arg2) 680 { 681 struct spdk_vhost_scsi_ctrlr *vdev = arg1; 682 uint32_t i; 683 684 for (i = 0; i < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS; i++) { 685 if (vdev->scsi_dev[i] == NULL) { 686 continue; 687 } 688 spdk_scsi_dev_free_io_channels(vdev->scsi_dev[i]); 689 } 690 691 SPDK_NOTICELOG("Stopping poller for vhost controller %s\n", vdev->name); 692 for (i = 0; i < vdev->nregions; i++) { 693 spdk_mem_unregister(vdev->region[i].vaddr, vdev->region[i].len); 694 } 695 696 vdev->nregions = 0; 697 698 sem_post((sem_t *)arg2); 699 } 700 701 static void 702 destroy_device(int vid) 703 { 704 struct spdk_vhost_scsi_ctrlr *vdev = dpdk_vid_mapping[vid]; 705 struct spdk_event *event; 706 sem_t done_sem; 707 uint32_t i; 708 709 event = vhost_sem_event_alloc(vdev->lcore, vdev_event_done_cb, NULL, &done_sem); 710 spdk_poller_unregister(&vdev->requestq_poller, event); 711 if (vhost_sem_timedwait(&done_sem, 1)) 712 rte_panic("%s: failed to unregister request queue poller.\n", vdev->name); 713 714 event = vhost_sem_event_alloc(vdev->lcore, vdev_event_done_cb, NULL, &done_sem); 715 spdk_poller_unregister(&vdev->controlq_poller, event); 716 if (vhost_sem_timedwait(&done_sem, 1)) 717 rte_panic("%s: failed to unregister control queue poller.\n", vdev->name); 718 719 /* Wait for all tasks to finish */ 720 for (i = 1000; i && vdev->task_cnt > 0; i--) { 721 usleep(1000); 722 } 723 724 if (vdev->task_cnt > 0) { 725 rte_panic("%s: pending tasks did not finish in 1s.\n", vdev->name); 726 } 727 728 event = vhost_sem_event_alloc(vdev->lcore, remove_vdev_cb, vdev, &done_sem); 729 spdk_event_call(event); 730 if (vhost_sem_timedwait(&done_sem, 1)) 731 rte_panic("%s: failed to unregister poller.\n", vdev->name); 732 733 g_num_ctrlrs[vdev->lcore]--; 734 vdev->lcore = -1; 735 vdev->dev = NULL; 736 dpdk_vid_mapping[vid] = NULL; 737 } 738 739 #define LUN_DEV_NAME_SIZE 8 740 #define MAX_SCSI_CTRLRS 15 741 742 static struct spdk_vhost_scsi_ctrlr *spdk_vhost_ctrlrs[MAX_SCSI_CTRLRS]; 743 744 static struct spdk_vhost_scsi_ctrlr * 745 spdk_vhost_scsi_ctrlr_find(const char *ctrlr_name) 746 { 747 unsigned i; 748 size_t dev_dirname_len = strlen(dev_dirname); 749 750 if (strncmp(ctrlr_name, dev_dirname, dev_dirname_len) == 0) { 751 ctrlr_name += dev_dirname_len; 752 } 753 754 for (i = 0; i < MAX_SCSI_CTRLRS; i++) { 755 if (spdk_vhost_ctrlrs[i] == NULL) { 756 continue; 757 } 758 759 if (strcmp(spdk_vhost_ctrlrs[i]->name, ctrlr_name) == 0) { 760 return spdk_vhost_ctrlrs[i]; 761 } 762 } 763 764 return NULL; 765 } 766 767 int 768 spdk_vhost_scsi_ctrlr_construct(const char *name, uint64_t cpumask) 769 { 770 struct spdk_vhost_scsi_ctrlr *vdev; 771 unsigned ctrlr_num; 772 char path[PATH_MAX]; 773 struct stat file_stat; 774 775 if (name == NULL) { 776 SPDK_ERRLOG("Can't add controller with no name\n"); 777 return -EINVAL; 778 } 779 780 if ((cpumask & spdk_app_get_core_mask()) != cpumask) { 781 SPDK_ERRLOG("cpumask 0x%jx not a subset of app mask 0x%jx\n", 782 cpumask, spdk_app_get_core_mask()); 783 return -EINVAL; 784 } 785 786 if (spdk_vhost_scsi_ctrlr_find(name)) { 787 SPDK_ERRLOG("vhost scsi controller %s already exists.\n", name); 788 return -EEXIST; 789 } 790 791 for (ctrlr_num = 0; ctrlr_num < MAX_SCSI_CTRLRS; ctrlr_num++) { 792 if (spdk_vhost_ctrlrs[ctrlr_num] == NULL) { 793 break; 794 } 795 } 796 797 if (ctrlr_num == MAX_SCSI_CTRLRS) { 798 SPDK_ERRLOG("Max scsi controllers reached (%d).\n", MAX_SCSI_CTRLRS); 799 return -ENOSPC; 800 } 801 802 if (snprintf(path, sizeof(path), "%s%s", dev_dirname, name) >= (int)sizeof(path)) { 803 SPDK_ERRLOG("Resulting socket path for controller %s is too long: %s%s\n", name, dev_dirname, name); 804 return -EINVAL; 805 } 806 807 /* Register vhost driver to handle vhost messages. */ 808 if (stat(path, &file_stat) != -1) { 809 if (!S_ISSOCK(file_stat.st_mode)) { 810 SPDK_ERRLOG("Cannot remove %s: not a socket.\n", path); 811 return -EINVAL; 812 } else if (unlink(path) != 0) { 813 rte_exit(EXIT_FAILURE, "Cannot remove %s.\n", path); 814 } 815 } 816 817 if (rte_vhost_driver_register(path, 0) != 0) { 818 SPDK_ERRLOG("Could not register controller %s with vhost library\n", name); 819 SPDK_ERRLOG("Check if domain socket %s already exists\n", path); 820 return -EIO; 821 } 822 823 vdev = rte_zmalloc(NULL, sizeof(*vdev), RTE_CACHE_LINE_SIZE); 824 if (vdev == NULL) { 825 SPDK_ERRLOG("Couldn't allocate memory for vhost dev\n"); 826 return -ENOMEM; 827 } 828 829 spdk_vhost_ctrlrs[ctrlr_num] = vdev; 830 vdev->name = strdup(name); 831 vdev->cpumask = cpumask; 832 vdev->lcore = -1; 833 SPDK_NOTICELOG("Controller %s: new controller added\n", name); 834 return 0; 835 } 836 837 int 838 spdk_vhost_parse_core_mask(const char *mask, uint64_t *cpumask) 839 { 840 char *end; 841 842 if (mask == NULL || cpumask == NULL) { 843 return -1; 844 } 845 846 errno = 0; 847 *cpumask = strtoull(mask, &end, 16); 848 849 if (*end != '\0' || errno || !*cpumask || 850 ((*cpumask & spdk_app_get_core_mask()) != *cpumask)) { 851 852 SPDK_ERRLOG("cpumask %s not a subset of app mask 0x%jx\n", 853 mask, spdk_app_get_core_mask()); 854 return -1; 855 } 856 857 return 0; 858 } 859 860 struct spdk_scsi_dev * 861 spdk_vhost_scsi_ctrlr_get_dev(struct spdk_vhost_scsi_ctrlr *ctrlr, uint8_t num) 862 { 863 assert(ctrlr != NULL); 864 assert(num < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS); 865 return ctrlr->scsi_dev[num]; 866 } 867 868 int 869 spdk_vhost_scsi_ctrlr_add_dev(const char *ctrlr_name, unsigned scsi_dev_num, const char *lun_name) 870 { 871 struct spdk_vhost_scsi_ctrlr *vdev; 872 char dev_name[SPDK_SCSI_DEV_MAX_NAME]; 873 int lun_id_list[1]; 874 char *lun_names_list[1]; 875 876 if (ctrlr_name == NULL) { 877 SPDK_ERRLOG("No controller name\n"); 878 return -EINVAL; 879 } 880 881 if (scsi_dev_num >= SPDK_VHOST_SCSI_CTRLR_MAX_DEVS) { 882 SPDK_ERRLOG("Controller %d device number too big (max %d)\n", scsi_dev_num, 883 SPDK_VHOST_SCSI_CTRLR_MAX_DEVS); 884 return -EINVAL; 885 } 886 887 if (lun_name == NULL) { 888 SPDK_ERRLOG("No lun name specified \n"); 889 return -EINVAL; 890 } else if (strlen(lun_name) >= SPDK_SCSI_DEV_MAX_NAME) { 891 SPDK_ERRLOG("LUN name '%s' too long (max %d).\n", lun_name, SPDK_SCSI_DEV_MAX_NAME - 1); 892 return -1; 893 } 894 895 vdev = spdk_vhost_scsi_ctrlr_find(ctrlr_name); 896 if (vdev == NULL) { 897 SPDK_ERRLOG("Controller %s is not defined\n", ctrlr_name); 898 return -ENODEV; 899 } 900 901 if (vdev->lcore != -1) { 902 SPDK_ERRLOG("Controller %s is in use and hotplug is not supported\n", ctrlr_name); 903 return -ENODEV; 904 } 905 906 if (vdev->scsi_dev[scsi_dev_num] != NULL) { 907 SPDK_ERRLOG("Controller %s dev %u already occupied\n", ctrlr_name, scsi_dev_num); 908 return -EEXIST; 909 } 910 911 /* 912 * At this stage only one LUN per device 913 */ 914 snprintf(dev_name, sizeof(dev_name), "Dev %u", scsi_dev_num); 915 lun_id_list[0] = 0; 916 lun_names_list[0] = (char *)lun_name; 917 918 vdev->scsi_dev[scsi_dev_num] = spdk_scsi_dev_construct(dev_name, lun_names_list, lun_id_list, 1); 919 if (vdev->scsi_dev[scsi_dev_num] == NULL) { 920 SPDK_ERRLOG("Couldn't create spdk SCSI device '%s' using lun device '%s' in controller: %s\n", 921 dev_name, lun_name, vdev->name); 922 return -EINVAL; 923 } 924 925 spdk_scsi_dev_add_port(vdev->scsi_dev[scsi_dev_num], 0, "vhost"); 926 SPDK_NOTICELOG("Controller %s: defined device '%s' using lun '%s'\n", 927 vdev->name, dev_name, lun_name); 928 return 0; 929 } 930 931 struct spdk_vhost_scsi_ctrlr * 932 spdk_vhost_scsi_ctrlr_next(struct spdk_vhost_scsi_ctrlr *prev) 933 { 934 int i = 0; 935 936 if (prev != NULL) { 937 for (; i < MAX_SCSI_CTRLRS; i++) { 938 if (spdk_vhost_ctrlrs[i] == prev) { 939 break; 940 } 941 } 942 943 i++; 944 } 945 946 for (; i < MAX_SCSI_CTRLRS; i++) { 947 if (spdk_vhost_ctrlrs[i] == NULL) { 948 continue; 949 } 950 951 return spdk_vhost_ctrlrs[i]; 952 } 953 954 return NULL; 955 } 956 957 const char * 958 spdk_vhost_scsi_ctrlr_get_name(struct spdk_vhost_scsi_ctrlr *ctrlr) 959 { 960 assert(ctrlr != NULL); 961 return ctrlr->name; 962 } 963 964 uint64_t 965 spdk_vhost_scsi_ctrlr_get_cpumask(struct spdk_vhost_scsi_ctrlr *ctrlr) 966 { 967 assert(ctrlr != NULL); 968 return ctrlr->cpumask; 969 } 970 971 static int spdk_vhost_scsi_controller_construct(void) 972 { 973 struct spdk_conf_section *sp = spdk_conf_first_section(NULL); 974 int i, dev_num; 975 unsigned ctrlr_num = 0; 976 char *lun_name, *dev_num_str; 977 char *cpumask_str; 978 char *name; 979 uint64_t cpumask; 980 981 while (sp != NULL) { 982 if (!spdk_conf_section_match_prefix(sp, "VhostScsi")) { 983 sp = spdk_conf_next_section(sp); 984 continue; 985 } 986 987 if (sscanf(spdk_conf_section_get_name(sp), "VhostScsi%u", &ctrlr_num) != 1) { 988 SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n", 989 spdk_conf_section_get_name(sp)); 990 return -1; 991 } 992 993 name = spdk_conf_section_get_val(sp, "Name"); 994 cpumask_str = spdk_conf_section_get_val(sp, "Cpumask"); 995 if (cpumask_str == NULL) { 996 cpumask = spdk_app_get_core_mask(); 997 } else if (spdk_vhost_parse_core_mask(cpumask_str, &cpumask)) { 998 SPDK_ERRLOG("%s: Error parsing cpumask '%s' while creating controller\n", name, cpumask_str); 999 return -1; 1000 } 1001 1002 if (spdk_vhost_scsi_ctrlr_construct(name, cpumask) < 0) { 1003 return -1; 1004 } 1005 1006 for (i = 0; spdk_conf_section_get_nval(sp, "Dev", i) != NULL; i++) { 1007 dev_num_str = spdk_conf_section_get_nmval(sp, "Dev", i, 0); 1008 if (dev_num_str == NULL) { 1009 SPDK_ERRLOG("%s: Invalid or missing Dev number\n", name); 1010 return -1; 1011 } 1012 1013 dev_num = (int)strtol(dev_num_str, NULL, 10); 1014 lun_name = spdk_conf_section_get_nmval(sp, "Dev", i, 1); 1015 if (lun_name == NULL) { 1016 SPDK_ERRLOG("%s: Invalid or missing LUN name for dev %d\n", name, dev_num); 1017 return -1; 1018 } else if (spdk_conf_section_get_nmval(sp, "Dev", i, 2)) { 1019 SPDK_ERRLOG("%s: Only one LUN per vhost SCSI device supported\n", name); 1020 return -1; 1021 } 1022 1023 if (spdk_vhost_scsi_ctrlr_add_dev(name, dev_num, lun_name) < 0) { 1024 return -1; 1025 } 1026 } 1027 1028 sp = spdk_conf_next_section(sp); 1029 1030 } 1031 1032 return 0; 1033 } 1034 1035 static uint32_t 1036 spdk_vhost_scsi_allocate_reactor(uint64_t cpumask) 1037 { 1038 uint32_t i, selected_core; 1039 uint32_t min_ctrlrs; 1040 1041 cpumask &= spdk_app_get_core_mask(); 1042 1043 if (cpumask == 0) { 1044 return 0; 1045 } 1046 1047 min_ctrlrs = INT_MAX; 1048 selected_core = 0; 1049 1050 for (i = 0; i < RTE_MAX_LCORE && i < 64; i++) { 1051 if (!((1ULL << i) & cpumask)) { 1052 continue; 1053 } 1054 1055 if (g_num_ctrlrs[i] < min_ctrlrs) { 1056 selected_core = i; 1057 min_ctrlrs = g_num_ctrlrs[i]; 1058 } 1059 } 1060 1061 g_num_ctrlrs[selected_core]++; 1062 return selected_core; 1063 } 1064 1065 /* 1066 * A new device is added to a data core. First the device is added to the main linked list 1067 * and then allocated to a specific data core. 1068 */ 1069 static int 1070 new_device(int vid) 1071 { 1072 struct virtio_net *dev = vhost_devices[vid]; 1073 struct spdk_vhost_scsi_ctrlr *vdev = NULL; 1074 struct spdk_event *event; 1075 sem_t added; 1076 uint32_t i; 1077 1078 vdev = spdk_vhost_scsi_ctrlr_find(dev->ifname); 1079 if (vdev == NULL) { 1080 SPDK_ERRLOG("Controller %s not found.\n", dev->ifname); 1081 return -1; 1082 } 1083 1084 if (vdev->lcore != -1) { 1085 SPDK_ERRLOG("Controller %s already connected.\n", dev->ifname); 1086 return -1; 1087 } 1088 1089 dpdk_vid_mapping[vid] = vdev; 1090 vdev->dev = dev; 1091 1092 /* Disable notifications. */ 1093 for (i = 0; i < dev->num_queues; i++) { 1094 rte_vhost_enable_guest_notification(vid, i, 0); 1095 } 1096 1097 dev->flags |= VIRTIO_DEV_RUNNING; 1098 vdev->dev = dev; 1099 1100 vdev->lcore = spdk_vhost_scsi_allocate_reactor(vdev->cpumask); 1101 1102 event = vhost_sem_event_alloc(vdev->lcore, add_vdev_cb, vdev, &added); 1103 spdk_event_call(event); 1104 if (vhost_sem_timedwait(&added, 1)) 1105 rte_panic("Failed to register new device '%s'\n", vdev->name); 1106 return 0; 1107 } 1108 1109 /* 1110 * These callback allow devices to be added to the data core when configuration 1111 * has been fully complete. 1112 */ 1113 static const struct virtio_net_device_ops virtio_net_device_ops = { 1114 .new_device = new_device, 1115 .destroy_device = destroy_device, 1116 }; 1117 1118 static void * 1119 session_start(void *arg) 1120 { 1121 rte_vhost_driver_session_start(); 1122 return NULL; 1123 } 1124 1125 void 1126 spdk_vhost_startup(void *arg1, void *arg2) 1127 { 1128 int ret; 1129 pthread_t tid; 1130 const char *basename = arg1; 1131 1132 if (basename && strlen(basename) > 0) { 1133 ret = snprintf(dev_dirname, sizeof(dev_dirname) - 2, "%s", basename); 1134 if ((size_t)ret >= sizeof(dev_dirname) - 2) { 1135 rte_exit(EXIT_FAILURE, "Char dev dir path length %d is too long\n", ret); 1136 } 1137 1138 if (dev_dirname[ret - 1] != '/') { 1139 dev_dirname[ret] = '/'; 1140 dev_dirname[ret + 1] = '\0'; 1141 } 1142 } 1143 1144 ret = spdk_vhost_scsi_controller_construct(); 1145 if (ret != 0) 1146 rte_exit(EXIT_FAILURE, "Cannot construct vhost controllers\n"); 1147 1148 rte_vhost_driver_callback_register(&virtio_net_device_ops); 1149 1150 if (pthread_create(&tid, NULL, &session_start, NULL) < 0) 1151 rte_panic("Failed to start session poller thread (%d): %s", errno, strerror(errno)); 1152 pthread_detach(tid); 1153 } 1154 1155 static void * 1156 session_shutdown(void *arg) 1157 { 1158 struct spdk_vhost_scsi_ctrlr *vdev = NULL; 1159 int i; 1160 1161 for (i = 0; i < MAX_SCSI_CTRLRS; i++) { 1162 vdev = spdk_vhost_ctrlrs[i]; 1163 if (vdev == NULL) { 1164 continue; 1165 } 1166 rte_vhost_driver_unregister(vdev->name); 1167 } 1168 1169 SPDK_NOTICELOG("Exiting\n"); 1170 spdk_app_stop(0); 1171 return NULL; 1172 } 1173 1174 /* 1175 * When we receive a INT signal. Execute shutdown in separate thread to avoid deadlock. 1176 */ 1177 void 1178 spdk_vhost_shutdown_cb(void) 1179 { 1180 pthread_t tid; 1181 if (pthread_create(&tid, NULL, &session_shutdown, NULL) < 0) 1182 rte_panic("Failed to start session shutdown thread (%d): %s", errno, strerror(errno)); 1183 pthread_detach(tid); 1184 } 1185 1186 SPDK_LOG_REGISTER_TRACE_FLAG("vhost", SPDK_TRACE_VHOST) 1187 SPDK_LOG_REGISTER_TRACE_FLAG("vhost_ring", SPDK_TRACE_VHOST_RING) 1188 SPDK_LOG_REGISTER_TRACE_FLAG("vhost_queue", SPDK_TRACE_VHOST_QUEUE) 1189 SPDK_LOG_REGISTER_TRACE_FLAG("vhost_data", SPDK_TRACE_VHOST_DATA) 1190