1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/socket.h> 35 #include <sys/types.h> 36 #include <sys/stat.h> 37 #include <unistd.h> 38 #include <fcntl.h> 39 #include <sys/un.h> 40 #include <string.h> 41 #include <errno.h> 42 43 #include "vhost.h" 44 45 static int 46 vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num) 47 { 48 int r; 49 struct msghdr msgh; 50 struct iovec iov; 51 size_t fd_size = fd_num * sizeof(int); 52 char control[CMSG_SPACE(fd_size)]; 53 struct cmsghdr *cmsg; 54 55 memset(&msgh, 0, sizeof(msgh)); 56 memset(control, 0, sizeof(control)); 57 58 iov.iov_base = (uint8_t *)buf; 59 iov.iov_len = len; 60 61 msgh.msg_iov = &iov; 62 msgh.msg_iovlen = 1; 63 msgh.msg_control = control; 64 msgh.msg_controllen = sizeof(control); 65 66 cmsg = CMSG_FIRSTHDR(&msgh); 67 cmsg->cmsg_len = CMSG_LEN(fd_size); 68 cmsg->cmsg_level = SOL_SOCKET; 69 cmsg->cmsg_type = SCM_RIGHTS; 70 memcpy(CMSG_DATA(cmsg), fds, fd_size); 71 72 do { 73 r = sendmsg(fd, &msgh, 0); 74 } while (r < 0 && errno == EINTR); 75 76 return r; 77 } 78 79 static int 80 vhost_user_read(int fd, struct vhost_user_msg *msg) 81 { 82 uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION; 83 int ret, sz_hdr = VHOST_USER_HDR_SIZE, sz_payload; 84 85 ret = recv(fd, (void *)msg, sz_hdr, 0); 86 if (ret < sz_hdr) { 87 PMD_DRV_LOG(ERR, "Failed to recv msg hdr: %d instead of %d.", 88 ret, sz_hdr); 89 goto fail; 90 } 91 92 /* validate msg flags */ 93 if (msg->flags != (valid_flags)) { 94 PMD_DRV_LOG(ERR, "Failed to recv msg: flags %x instead of %x.", 95 msg->flags, valid_flags); 96 goto fail; 97 } 98 99 sz_payload = msg->size; 100 if (sz_payload) { 101 ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0); 102 if (ret < sz_payload) { 103 PMD_DRV_LOG(ERR, 104 "Failed to recv msg payload: %d instead of %d.", 105 ret, msg->size); 106 goto fail; 107 } 108 } 109 110 return 0; 111 112 fail: 113 return -1; 114 } 115 116 struct hugepage_file_info { 117 uint64_t addr; /**< virtual addr */ 118 size_t size; /**< the file size */ 119 char path[PATH_MAX]; /**< path to backing file */ 120 }; 121 122 /* Two possible options: 123 * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file 124 * array. This is simple but cannot be used in secondary process because 125 * secondary process will close and munmap that file. 126 * 2. Match HUGEFILE_FMT to find hugepage files directly. 127 * 128 * We choose option 2. 129 */ 130 static int 131 get_hugepage_file_info(struct hugepage_file_info huges[], int max) 132 { 133 int idx; 134 FILE *f; 135 char buf[BUFSIZ], *tmp, *tail; 136 char *str_underline, *str_start; 137 int huge_index; 138 uint64_t v_start, v_end; 139 140 f = fopen("/proc/self/maps", "r"); 141 if (!f) { 142 PMD_DRV_LOG(ERR, "cannot open /proc/self/maps"); 143 return -1; 144 } 145 146 idx = 0; 147 while (fgets(buf, sizeof(buf), f) != NULL) { 148 if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) { 149 PMD_DRV_LOG(ERR, "Failed to parse address"); 150 goto error; 151 } 152 153 tmp = strchr(buf, ' ') + 1; /** skip address */ 154 tmp = strchr(tmp, ' ') + 1; /** skip perm */ 155 tmp = strchr(tmp, ' ') + 1; /** skip offset */ 156 tmp = strchr(tmp, ' ') + 1; /** skip dev */ 157 tmp = strchr(tmp, ' ') + 1; /** skip inode */ 158 while (*tmp == ' ') /** skip spaces */ 159 tmp++; 160 tail = strrchr(tmp, '\n'); /** remove newline if exists */ 161 if (tail) 162 *tail = '\0'; 163 164 /* Match HUGEFILE_FMT, aka "%s/%smap_%d", 165 * which is defined in eal_filesystem.h 166 */ 167 str_underline = strrchr(tmp, '_'); 168 if (!str_underline) 169 continue; 170 171 str_start = str_underline - strlen("map"); 172 if (str_start < tmp) 173 continue; 174 175 if (sscanf(str_start, "map_%d", &huge_index) != 1) 176 continue; 177 178 if (idx >= max) { 179 PMD_DRV_LOG(ERR, "Exceed maximum of %d", max); 180 goto error; 181 } 182 huges[idx].addr = v_start; 183 huges[idx].size = v_end - v_start; 184 strcpy(huges[idx].path, tmp); 185 idx++; 186 } 187 188 fclose(f); 189 return idx; 190 191 error: 192 fclose(f); 193 return -1; 194 } 195 196 static int 197 prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[]) 198 { 199 int i, num; 200 struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS]; 201 struct vhost_memory_region *mr; 202 203 num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS); 204 if (num < 0) { 205 PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user"); 206 return -1; 207 } 208 209 for (i = 0; i < num; ++i) { 210 mr = &msg->payload.memory.regions[i]; 211 mr->guest_phys_addr = huges[i].addr; /* use vaddr! */ 212 mr->userspace_addr = huges[i].addr; 213 mr->memory_size = huges[i].size; 214 mr->mmap_offset = 0; 215 fds[i] = open(huges[i].path, O_RDWR); 216 } 217 218 msg->payload.memory.nregions = num; 219 msg->payload.memory.padding = 0; 220 221 return 0; 222 } 223 224 static struct vhost_user_msg m; 225 226 static const char * const vhost_msg_strings[] = { 227 [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", 228 [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", 229 [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", 230 [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", 231 [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", 232 [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", 233 [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", 234 [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", 235 [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", 236 [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", 237 [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", 238 [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", 239 NULL, 240 }; 241 242 int 243 vhost_user_sock(int vhostfd, enum vhost_user_request req, void *arg) 244 { 245 struct vhost_user_msg msg; 246 struct vhost_vring_file *file = 0; 247 int need_reply = 0; 248 int fds[VHOST_MEMORY_MAX_NREGIONS]; 249 int fd_num = 0; 250 int i, len; 251 252 RTE_SET_USED(m); 253 RTE_SET_USED(vhost_msg_strings); 254 255 PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); 256 257 msg.request = req; 258 msg.flags = VHOST_USER_VERSION; 259 msg.size = 0; 260 261 switch (req) { 262 case VHOST_USER_GET_FEATURES: 263 need_reply = 1; 264 break; 265 266 case VHOST_USER_SET_FEATURES: 267 case VHOST_USER_SET_LOG_BASE: 268 msg.payload.u64 = *((__u64 *)arg); 269 msg.size = sizeof(m.payload.u64); 270 break; 271 272 case VHOST_USER_SET_OWNER: 273 case VHOST_USER_RESET_OWNER: 274 break; 275 276 case VHOST_USER_SET_MEM_TABLE: 277 if (prepare_vhost_memory_user(&msg, fds) < 0) 278 return -1; 279 fd_num = msg.payload.memory.nregions; 280 msg.size = sizeof(m.payload.memory.nregions); 281 msg.size += sizeof(m.payload.memory.padding); 282 msg.size += fd_num * sizeof(struct vhost_memory_region); 283 break; 284 285 case VHOST_USER_SET_LOG_FD: 286 fds[fd_num++] = *((int *)arg); 287 break; 288 289 case VHOST_USER_SET_VRING_NUM: 290 case VHOST_USER_SET_VRING_BASE: 291 case VHOST_USER_SET_VRING_ENABLE: 292 memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); 293 msg.size = sizeof(m.payload.state); 294 break; 295 296 case VHOST_USER_GET_VRING_BASE: 297 memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); 298 msg.size = sizeof(m.payload.state); 299 need_reply = 1; 300 break; 301 302 case VHOST_USER_SET_VRING_ADDR: 303 memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr)); 304 msg.size = sizeof(m.payload.addr); 305 break; 306 307 case VHOST_USER_SET_VRING_KICK: 308 case VHOST_USER_SET_VRING_CALL: 309 case VHOST_USER_SET_VRING_ERR: 310 file = arg; 311 msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK; 312 msg.size = sizeof(m.payload.u64); 313 if (file->fd > 0) 314 fds[fd_num++] = file->fd; 315 else 316 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 317 break; 318 319 default: 320 PMD_DRV_LOG(ERR, "trying to send unhandled msg type"); 321 return -1; 322 } 323 324 len = VHOST_USER_HDR_SIZE + msg.size; 325 if (vhost_user_write(vhostfd, &msg, len, fds, fd_num) < 0) { 326 PMD_DRV_LOG(ERR, "%s failed: %s", 327 vhost_msg_strings[req], strerror(errno)); 328 return -1; 329 } 330 331 if (req == VHOST_USER_SET_MEM_TABLE) 332 for (i = 0; i < fd_num; ++i) 333 close(fds[i]); 334 335 if (need_reply) { 336 if (vhost_user_read(vhostfd, &msg) < 0) { 337 PMD_DRV_LOG(ERR, "Received msg failed: %s", 338 strerror(errno)); 339 return -1; 340 } 341 342 if (req != msg.request) { 343 PMD_DRV_LOG(ERR, "Received unexpected msg type"); 344 return -1; 345 } 346 347 switch (req) { 348 case VHOST_USER_GET_FEATURES: 349 if (msg.size != sizeof(m.payload.u64)) { 350 PMD_DRV_LOG(ERR, "Received bad msg size"); 351 return -1; 352 } 353 *((__u64 *)arg) = msg.payload.u64; 354 break; 355 case VHOST_USER_GET_VRING_BASE: 356 if (msg.size != sizeof(m.payload.state)) { 357 PMD_DRV_LOG(ERR, "Received bad msg size"); 358 return -1; 359 } 360 memcpy(arg, &msg.payload.state, 361 sizeof(struct vhost_vring_state)); 362 break; 363 default: 364 PMD_DRV_LOG(ERR, "Received unexpected msg type"); 365 return -1; 366 } 367 } 368 369 return 0; 370 } 371 372 /** 373 * Set up environment to talk with a vhost user backend. 374 * @param path 375 * - The path to vhost user unix socket file. 376 * 377 * @return 378 * - (-1) if fail to set up; 379 * - (>=0) if successful, and it is the fd to vhostfd. 380 */ 381 int 382 vhost_user_setup(const char *path) 383 { 384 int fd; 385 int flag; 386 struct sockaddr_un un; 387 388 fd = socket(AF_UNIX, SOCK_STREAM, 0); 389 if (fd < 0) { 390 PMD_DRV_LOG(ERR, "socket() error, %s", strerror(errno)); 391 return -1; 392 } 393 394 flag = fcntl(fd, F_GETFD); 395 fcntl(fd, F_SETFD, flag | FD_CLOEXEC); 396 397 memset(&un, 0, sizeof(un)); 398 un.sun_family = AF_UNIX; 399 snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); 400 if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) { 401 PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno)); 402 close(fd); 403 return -1; 404 } 405 406 return fd; 407 } 408 409 int 410 vhost_user_enable_queue_pair(int vhostfd, uint16_t pair_idx, int enable) 411 { 412 int i; 413 414 for (i = 0; i < 2; ++i) { 415 struct vhost_vring_state state = { 416 .index = pair_idx * 2 + i, 417 .num = enable, 418 }; 419 420 if (vhost_user_sock(vhostfd, 421 VHOST_USER_SET_VRING_ENABLE, &state)) 422 return -1; 423 } 424 425 return 0; 426 } 427