1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2016 Intel Corporation 3 */ 4 5 #include <sys/socket.h> 6 #include <sys/types.h> 7 #include <sys/stat.h> 8 #include <unistd.h> 9 #include <fcntl.h> 10 #include <sys/un.h> 11 #include <string.h> 12 #include <errno.h> 13 14 #include <rte_string_fns.h> 15 #include <rte_fbarray.h> 16 #include <rte_eal_memconfig.h> 17 18 #include "vhost.h" 19 #include "virtio_user_dev.h" 20 21 /* The version of the protocol we support */ 22 #define VHOST_USER_VERSION 0x1 23 24 #define VHOST_MEMORY_MAX_NREGIONS 8 25 struct vhost_memory { 26 uint32_t nregions; 27 uint32_t padding; 28 struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS]; 29 }; 30 31 struct vhost_user_msg { 32 enum vhost_user_request request; 33 34 #define VHOST_USER_VERSION_MASK 0x3 35 #define VHOST_USER_REPLY_MASK (0x1 << 2) 36 uint32_t flags; 37 uint32_t size; /* the following payload size */ 38 union { 39 #define VHOST_USER_VRING_IDX_MASK 0xff 40 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 41 uint64_t u64; 42 struct vhost_vring_state state; 43 struct vhost_vring_addr addr; 44 struct vhost_memory memory; 45 } payload; 46 int fds[VHOST_MEMORY_MAX_NREGIONS]; 47 } __attribute((packed)); 48 49 #define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64) 50 #define VHOST_USER_PAYLOAD_SIZE \ 51 (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE) 52 53 static int 54 vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num) 55 { 56 int r; 57 struct msghdr msgh; 58 struct iovec iov; 59 size_t fd_size = fd_num * sizeof(int); 60 char control[CMSG_SPACE(fd_size)]; 61 struct cmsghdr *cmsg; 62 63 memset(&msgh, 0, sizeof(msgh)); 64 memset(control, 0, sizeof(control)); 65 66 iov.iov_base = (uint8_t *)buf; 67 iov.iov_len = len; 68 69 msgh.msg_iov = &iov; 70 msgh.msg_iovlen = 1; 71 msgh.msg_control = control; 72 msgh.msg_controllen = sizeof(control); 73 74 cmsg = CMSG_FIRSTHDR(&msgh); 75 cmsg->cmsg_len = CMSG_LEN(fd_size); 76 cmsg->cmsg_level = SOL_SOCKET; 77 cmsg->cmsg_type = SCM_RIGHTS; 78 memcpy(CMSG_DATA(cmsg), fds, fd_size); 79 80 do { 81 r = sendmsg(fd, &msgh, 0); 82 } while (r < 0 && errno == EINTR); 83 84 return r; 85 } 86 87 static int 88 vhost_user_read(int fd, struct vhost_user_msg *msg) 89 { 90 uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION; 91 int ret, sz_hdr = VHOST_USER_HDR_SIZE, sz_payload; 92 93 ret = recv(fd, (void *)msg, sz_hdr, 0); 94 if (ret < sz_hdr) { 95 PMD_DRV_LOG(ERR, "Failed to recv msg hdr: %d instead of %d.", 96 ret, sz_hdr); 97 goto fail; 98 } 99 100 /* validate msg flags */ 101 if (msg->flags != (valid_flags)) { 102 PMD_DRV_LOG(ERR, "Failed to recv msg: flags %x instead of %x.", 103 msg->flags, valid_flags); 104 goto fail; 105 } 106 107 sz_payload = msg->size; 108 109 if ((size_t)sz_payload > sizeof(msg->payload)) 110 goto fail; 111 112 if (sz_payload) { 113 ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0); 114 if (ret < sz_payload) { 115 PMD_DRV_LOG(ERR, 116 "Failed to recv msg payload: %d instead of %d.", 117 ret, msg->size); 118 goto fail; 119 } 120 } 121 122 return 0; 123 124 fail: 125 return -1; 126 } 127 128 struct walk_arg { 129 struct vhost_memory *vm; 130 int *fds; 131 int region_nr; 132 }; 133 134 static int 135 update_memory_region(const struct rte_memseg_list *msl __rte_unused, 136 const struct rte_memseg *ms, void *arg) 137 { 138 struct walk_arg *wa = arg; 139 struct vhost_memory_region *mr; 140 uint64_t start_addr, end_addr; 141 size_t offset; 142 int i, fd; 143 144 fd = rte_memseg_get_fd_thread_unsafe(ms); 145 if (fd < 0) { 146 PMD_DRV_LOG(ERR, "Failed to get fd, ms=%p rte_errno=%d", 147 ms, rte_errno); 148 return -1; 149 } 150 151 if (rte_memseg_get_fd_offset_thread_unsafe(ms, &offset) < 0) { 152 PMD_DRV_LOG(ERR, "Failed to get offset, ms=%p rte_errno=%d", 153 ms, rte_errno); 154 return -1; 155 } 156 157 start_addr = (uint64_t)(uintptr_t)ms->addr; 158 end_addr = start_addr + ms->len; 159 160 for (i = 0; i < wa->region_nr; i++) { 161 if (wa->fds[i] != fd) 162 continue; 163 164 mr = &wa->vm->regions[i]; 165 166 if (mr->userspace_addr + mr->memory_size < end_addr) 167 mr->memory_size = end_addr - mr->userspace_addr; 168 169 if (mr->userspace_addr > start_addr) { 170 mr->userspace_addr = start_addr; 171 mr->guest_phys_addr = start_addr; 172 } 173 174 if (mr->mmap_offset > offset) 175 mr->mmap_offset = offset; 176 177 PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64 178 " addr=0x%" PRIx64 " len=%" PRIu64, i, fd, 179 mr->mmap_offset, mr->userspace_addr, 180 mr->memory_size); 181 182 return 0; 183 } 184 185 if (i >= VHOST_MEMORY_MAX_NREGIONS) { 186 PMD_DRV_LOG(ERR, "Too many memory regions"); 187 return -1; 188 } 189 190 mr = &wa->vm->regions[i]; 191 wa->fds[i] = fd; 192 193 mr->guest_phys_addr = start_addr; 194 mr->userspace_addr = start_addr; 195 mr->memory_size = ms->len; 196 mr->mmap_offset = offset; 197 198 PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64 199 " addr=0x%" PRIx64 " len=%" PRIu64, i, fd, 200 mr->mmap_offset, mr->userspace_addr, 201 mr->memory_size); 202 203 wa->region_nr++; 204 205 return 0; 206 } 207 208 static int 209 prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[]) 210 { 211 struct walk_arg wa; 212 213 wa.region_nr = 0; 214 wa.vm = &msg->payload.memory; 215 wa.fds = fds; 216 217 /* 218 * The memory lock has already been taken by memory subsystem 219 * or virtio_user_start_device(). 220 */ 221 if (rte_memseg_walk_thread_unsafe(update_memory_region, &wa) < 0) 222 return -1; 223 224 msg->payload.memory.nregions = wa.region_nr; 225 msg->payload.memory.padding = 0; 226 227 return 0; 228 } 229 230 static struct vhost_user_msg m; 231 232 const char * const vhost_msg_strings[] = { 233 [VHOST_USER_SET_OWNER] = "VHOST_SET_OWNER", 234 [VHOST_USER_RESET_OWNER] = "VHOST_RESET_OWNER", 235 [VHOST_USER_SET_FEATURES] = "VHOST_SET_FEATURES", 236 [VHOST_USER_GET_FEATURES] = "VHOST_GET_FEATURES", 237 [VHOST_USER_SET_VRING_CALL] = "VHOST_SET_VRING_CALL", 238 [VHOST_USER_SET_VRING_NUM] = "VHOST_SET_VRING_NUM", 239 [VHOST_USER_SET_VRING_BASE] = "VHOST_SET_VRING_BASE", 240 [VHOST_USER_GET_VRING_BASE] = "VHOST_GET_VRING_BASE", 241 [VHOST_USER_SET_VRING_ADDR] = "VHOST_SET_VRING_ADDR", 242 [VHOST_USER_SET_VRING_KICK] = "VHOST_SET_VRING_KICK", 243 [VHOST_USER_SET_MEM_TABLE] = "VHOST_SET_MEM_TABLE", 244 [VHOST_USER_SET_VRING_ENABLE] = "VHOST_SET_VRING_ENABLE", 245 }; 246 247 static int 248 vhost_user_sock(struct virtio_user_dev *dev, 249 enum vhost_user_request req, 250 void *arg) 251 { 252 struct vhost_user_msg msg; 253 struct vhost_vring_file *file = 0; 254 int need_reply = 0; 255 int fds[VHOST_MEMORY_MAX_NREGIONS]; 256 int fd_num = 0; 257 int len; 258 int vhostfd = dev->vhostfd; 259 260 RTE_SET_USED(m); 261 262 PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); 263 264 if (dev->is_server && vhostfd < 0) 265 return -1; 266 267 msg.request = req; 268 msg.flags = VHOST_USER_VERSION; 269 msg.size = 0; 270 271 switch (req) { 272 case VHOST_USER_GET_FEATURES: 273 need_reply = 1; 274 break; 275 276 case VHOST_USER_SET_FEATURES: 277 case VHOST_USER_SET_LOG_BASE: 278 msg.payload.u64 = *((__u64 *)arg); 279 msg.size = sizeof(m.payload.u64); 280 break; 281 282 case VHOST_USER_SET_OWNER: 283 case VHOST_USER_RESET_OWNER: 284 break; 285 286 case VHOST_USER_SET_MEM_TABLE: 287 if (prepare_vhost_memory_user(&msg, fds) < 0) 288 return -1; 289 fd_num = msg.payload.memory.nregions; 290 msg.size = sizeof(m.payload.memory.nregions); 291 msg.size += sizeof(m.payload.memory.padding); 292 msg.size += fd_num * sizeof(struct vhost_memory_region); 293 break; 294 295 case VHOST_USER_SET_LOG_FD: 296 fds[fd_num++] = *((int *)arg); 297 break; 298 299 case VHOST_USER_SET_VRING_NUM: 300 case VHOST_USER_SET_VRING_BASE: 301 case VHOST_USER_SET_VRING_ENABLE: 302 memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); 303 msg.size = sizeof(m.payload.state); 304 break; 305 306 case VHOST_USER_GET_VRING_BASE: 307 memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); 308 msg.size = sizeof(m.payload.state); 309 need_reply = 1; 310 break; 311 312 case VHOST_USER_SET_VRING_ADDR: 313 memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr)); 314 msg.size = sizeof(m.payload.addr); 315 break; 316 317 case VHOST_USER_SET_VRING_KICK: 318 case VHOST_USER_SET_VRING_CALL: 319 case VHOST_USER_SET_VRING_ERR: 320 file = arg; 321 msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK; 322 msg.size = sizeof(m.payload.u64); 323 if (file->fd > 0) 324 fds[fd_num++] = file->fd; 325 else 326 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 327 break; 328 329 default: 330 PMD_DRV_LOG(ERR, "trying to send unhandled msg type"); 331 return -1; 332 } 333 334 len = VHOST_USER_HDR_SIZE + msg.size; 335 if (vhost_user_write(vhostfd, &msg, len, fds, fd_num) < 0) { 336 PMD_DRV_LOG(ERR, "%s failed: %s", 337 vhost_msg_strings[req], strerror(errno)); 338 return -1; 339 } 340 341 if (need_reply) { 342 if (vhost_user_read(vhostfd, &msg) < 0) { 343 PMD_DRV_LOG(ERR, "Received msg failed: %s", 344 strerror(errno)); 345 return -1; 346 } 347 348 if (req != msg.request) { 349 PMD_DRV_LOG(ERR, "Received unexpected msg type"); 350 return -1; 351 } 352 353 switch (req) { 354 case VHOST_USER_GET_FEATURES: 355 if (msg.size != sizeof(m.payload.u64)) { 356 PMD_DRV_LOG(ERR, "Received bad msg size"); 357 return -1; 358 } 359 *((__u64 *)arg) = msg.payload.u64; 360 break; 361 case VHOST_USER_GET_VRING_BASE: 362 if (msg.size != sizeof(m.payload.state)) { 363 PMD_DRV_LOG(ERR, "Received bad msg size"); 364 return -1; 365 } 366 memcpy(arg, &msg.payload.state, 367 sizeof(struct vhost_vring_state)); 368 break; 369 default: 370 PMD_DRV_LOG(ERR, "Received unexpected msg type"); 371 return -1; 372 } 373 } 374 375 return 0; 376 } 377 378 #define MAX_VIRTIO_USER_BACKLOG 1 379 static int 380 virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un) 381 { 382 int ret; 383 int flag; 384 int fd = dev->listenfd; 385 386 ret = bind(fd, (struct sockaddr *)un, sizeof(*un)); 387 if (ret < 0) { 388 PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n", 389 dev->path, strerror(errno)); 390 return -1; 391 } 392 ret = listen(fd, MAX_VIRTIO_USER_BACKLOG); 393 if (ret < 0) 394 return -1; 395 396 flag = fcntl(fd, F_GETFL); 397 if (fcntl(fd, F_SETFL, flag | O_NONBLOCK) < 0) { 398 PMD_DRV_LOG(ERR, "fcntl failed, %s", strerror(errno)); 399 return -1; 400 } 401 402 return 0; 403 } 404 405 /** 406 * Set up environment to talk with a vhost user backend. 407 * 408 * @return 409 * - (-1) if fail; 410 * - (0) if succeed. 411 */ 412 static int 413 vhost_user_setup(struct virtio_user_dev *dev) 414 { 415 int fd; 416 int flag; 417 struct sockaddr_un un; 418 419 fd = socket(AF_UNIX, SOCK_STREAM, 0); 420 if (fd < 0) { 421 PMD_DRV_LOG(ERR, "socket() error, %s", strerror(errno)); 422 return -1; 423 } 424 425 flag = fcntl(fd, F_GETFD); 426 if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0) 427 PMD_DRV_LOG(WARNING, "fcntl failed, %s", strerror(errno)); 428 429 memset(&un, 0, sizeof(un)); 430 un.sun_family = AF_UNIX; 431 strlcpy(un.sun_path, dev->path, sizeof(un.sun_path)); 432 433 if (dev->is_server) { 434 dev->listenfd = fd; 435 if (virtio_user_start_server(dev, &un) < 0) { 436 PMD_DRV_LOG(ERR, "virtio-user startup fails in server mode"); 437 close(fd); 438 return -1; 439 } 440 dev->vhostfd = -1; 441 } else { 442 if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) { 443 PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno)); 444 close(fd); 445 return -1; 446 } 447 dev->vhostfd = fd; 448 } 449 450 return 0; 451 } 452 453 static int 454 vhost_user_enable_queue_pair(struct virtio_user_dev *dev, 455 uint16_t pair_idx, 456 int enable) 457 { 458 int i; 459 460 for (i = 0; i < 2; ++i) { 461 struct vhost_vring_state state = { 462 .index = pair_idx * 2 + i, 463 .num = enable, 464 }; 465 466 if (vhost_user_sock(dev, VHOST_USER_SET_VRING_ENABLE, &state)) 467 return -1; 468 } 469 470 return 0; 471 } 472 473 struct virtio_user_backend_ops virtio_ops_user = { 474 .setup = vhost_user_setup, 475 .send_request = vhost_user_sock, 476 .enable_qp = vhost_user_enable_queue_pair 477 }; 478