1 /* $NetBSD: perfuse.c,v 1.20 2011/09/09 22:51:44 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 16 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 17 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <stdio.h> 29 #include <unistd.h> 30 #include <stdlib.h> 31 #include <fcntl.h> 32 #include <string.h> 33 #include <errno.h> 34 #include <puffs.h> 35 #include <sys/types.h> 36 #include <sys/mman.h> 37 #include <sys/socket.h> 38 #include <sys/extattr.h> 39 #include <sys/un.h> 40 #include <machine/vmparam.h> 41 42 #define LIBPERFUSE 43 #include "perfuse.h" 44 #include "perfuse_if.h" 45 #include "perfuse_priv.h" 46 47 int perfuse_diagflags = 0; /* global used only in DPRINTF/DERR/DWARN */ 48 extern char **environ; 49 50 static struct perfuse_state *init_state(void); 51 static int get_fd(const char *); 52 53 54 static struct perfuse_state * 55 init_state(void) 56 { 57 struct perfuse_state *ps; 58 char opts[1024]; 59 60 if ((ps = malloc(sizeof(*ps))) == NULL) 61 DERR(EX_OSERR, "%s: malloc failed", __func__); 62 63 (void)memset(ps, 0, sizeof(*ps)); 64 ps->ps_max_write = UINT_MAX; 65 ps->ps_max_readahead = UINT_MAX; 66 67 /* 68 * Most of the time, access() is broken because the filesystem 69 * performs the check with root privileges. glusterfs will do that 70 * if the Linux-specific setfsuid() is missing, for instance. 71 */ 72 ps->ps_flags |= PS_NO_ACCESS; 73 74 /* 75 * This is a temporary way to toggle access and creat usage. 76 * It would be nice if that could be provided as mount options, 77 * but that will not be obvious to do. 78 */ 79 if (getenv_r("PERFUSE_OPTIONS", opts, sizeof(opts)) != -1) { 80 char *optname; 81 char *last; 82 83 for ((optname = strtok_r(opts, ",", &last)); 84 optname != NULL; 85 (optname = strtok_r(NULL, ",", &last))) { 86 if (strcmp(optname, "enable_access") == 0) 87 ps->ps_flags &= ~PS_NO_ACCESS; 88 89 if (strcmp(optname, "disable_access") == 0) 90 ps->ps_flags |= PS_NO_ACCESS; 91 92 if (strcmp(optname, "enable_creat") == 0) 93 ps->ps_flags &= ~PS_NO_CREAT; 94 95 if (strcmp(optname, "disable_creat") == 0) 96 ps->ps_flags |= PS_NO_CREAT; 97 } 98 } 99 100 101 return ps; 102 } 103 104 105 static int 106 get_fd(data) 107 const char *data; 108 { 109 char *string; 110 const char fdopt[] = "fd="; 111 char *lastp; 112 char *opt; 113 int fd = -1; 114 115 if ((string = strdup(data)) == NULL) 116 return -1; 117 118 for (opt = strtok_r(string, ",", &lastp); 119 opt != NULL; 120 opt = strtok_r(NULL, ",", &lastp)) { 121 if (strncmp(opt, fdopt, strlen(fdopt)) == 0) { 122 fd = atoi(opt + strlen(fdopt)); 123 break; 124 } 125 } 126 127 /* 128 * No file descriptor found 129 */ 130 if (fd == -1) 131 errno = EINVAL; 132 133 free(string); 134 return fd; 135 136 } 137 138 int 139 perfuse_open(path, flags, mode) 140 const char *path; 141 int flags; 142 mode_t mode; 143 { 144 int sv[2]; 145 struct sockaddr_un sun; 146 struct sockaddr *sa; 147 char progname[] = _PATH_PERFUSED; 148 char minus_i[] = "-i"; 149 char fdstr[16]; 150 char *const argv[] = { progname, minus_i, fdstr, NULL}; 151 uint32_t opt; 152 uint32_t optlen; 153 int sock_type = SOCK_SEQPACKET; 154 155 if (strcmp(path, _PATH_FUSE) != 0) 156 return open(path, flags, mode); 157 158 /* 159 * Try SOCK_SEQPACKET then SOCK_DGRAM if unavailable 160 */ 161 if ((sv[0] = socket(PF_LOCAL, SOCK_SEQPACKET, 0)) == -1) { 162 sock_type = SOCK_DGRAM; 163 DWARNX("SEQPACKET local sockets unavailable, using less " 164 "reliable DGRAM sockets. Expect file operation hangs."); 165 166 if ((sv[0] = socket(PF_LOCAL, SOCK_DGRAM, 0)) == -1) { 167 #ifdef PERFUSE_DEBUG 168 DWARN("%s: %d socket failed", __func__, __LINE__); 169 #endif 170 return -1; 171 } 172 } 173 174 /* 175 * Set a buffer lentgh large enough so that any FUSE packet 176 * will fit. 177 */ 178 opt = (uint32_t)FUSE_BUFSIZE; 179 optlen = sizeof(opt); 180 if (setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0) 181 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt); 182 183 if (setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0) 184 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt); 185 186 sa = (struct sockaddr *)(void *)&sun; 187 sun.sun_len = sizeof(sun); 188 sun.sun_family = AF_LOCAL; 189 (void)strcpy(sun.sun_path, path); 190 191 if (connect(sv[0], sa, (socklen_t)sun.sun_len) == 0) 192 return sv[0]; 193 194 /* 195 * Attempt to run perfused on our own 196 * if it does not run yet; In that case 197 * we will talk using a socketpair 198 * instead of /dev/fuse. 199 */ 200 if (socketpair(PF_LOCAL, sock_type, 0, sv) != 0) { 201 DWARN("%s:%d: socketpair failed", __func__, __LINE__); 202 return -1; 203 } 204 205 /* 206 * Set a buffer lentgh large enough so that any FUSE packet 207 * will fit. 208 */ 209 opt = (uint32_t)(4 * FUSE_BUFSIZE); 210 optlen = sizeof(opt); 211 if (setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0) 212 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt); 213 214 if (setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0) 215 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt); 216 217 if (setsockopt(sv[1], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0) 218 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt); 219 220 if (setsockopt(sv[1], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0) 221 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt); 222 223 /* 224 * Request peer credentials. This musr be done before first 225 * frame is sent. 226 */ 227 opt = 1; 228 optlen = sizeof(opt); 229 if (setsockopt(sv[1], 0, LOCAL_CREDS, &opt, optlen) != 0) 230 DWARN("%s: setsockopt LOCAL_CREDS failed", __func__); 231 232 (void)sprintf(fdstr, "%d", sv[1]); 233 234 switch(fork()) { 235 case -1: 236 #ifdef PERFUSE_DEBUG 237 DWARN("%s:%d: fork failed", __func__, __LINE__); 238 #endif 239 return -1; 240 /* NOTREACHED */ 241 break; 242 case 0: 243 (void)execve(argv[0], argv, environ); 244 #ifdef PERFUSE_DEBUG 245 DWARN("%s:%d: execve failed", __func__, __LINE__); 246 #endif 247 return -1; 248 /* NOTREACHED */ 249 break; 250 default: 251 break; 252 } 253 254 return sv[0]; 255 } 256 257 int 258 perfuse_mount(source, target, filesystemtype, mountflags, data) 259 const char *source; 260 const char *target; 261 const char *filesystemtype; 262 long mountflags; 263 const void *data; 264 { 265 int s; 266 size_t len; 267 struct perfuse_mount_out *pmo; 268 struct sockaddr_storage ss; 269 struct sockaddr_un *sun; 270 struct sockaddr *sa; 271 socklen_t sa_len; 272 size_t sock_len; 273 char *frame; 274 char *cp; 275 276 #ifdef PERFUSE_DEBUG 277 if (perfuse_diagflags & PDF_MISC) 278 DPRINTF("%s(\"%s\", \"%s\", \"%s\", 0x%lx, \"%s\")\n", 279 __func__, source, target, filesystemtype, 280 mountflags, (const char *)data); 281 #endif 282 283 if ((s = get_fd(data)) == -1) 284 return -1; 285 286 /* 287 * If we are connected to /dev/fuse, we need a second 288 * socket to get replies from perfused. 289 * XXX This socket is not removed at exit time yet 290 */ 291 sock_len = 0; 292 sa = (struct sockaddr *)(void *)&ss; 293 sun = (struct sockaddr_un *)(void *)&ss; 294 sa_len = sizeof(ss); 295 if ((getpeername(s, sa, &sa_len) == 0) && 296 (sa->sa_family = AF_LOCAL) && 297 (strcmp(sun->sun_path, _PATH_FUSE) == 0)) { 298 299 sun->sun_len = sizeof(*sun); 300 sun->sun_family = AF_LOCAL; 301 (void)sprintf(sun->sun_path, "%s/%s-%d", 302 _PATH_TMP, getprogname(), getpid()); 303 304 if (bind(s, sa, (socklen_t)sa->sa_len) != 0) 305 DERR(EX_OSERR, "%s:%d bind to \"%s\" failed", 306 __func__, __LINE__, sun->sun_path); 307 308 sock_len = strlen(sun->sun_path) + 1; 309 } 310 311 len = sizeof(*pmo); 312 len += source ? (uint32_t)strlen(source) + 1 : 0; 313 len += target ? (uint32_t)strlen(target) + 1 : 0; 314 len += filesystemtype ? (uint32_t)strlen(filesystemtype) + 1 : 0; 315 len += data ? (uint32_t)strlen(data) + 1 : 0; 316 len += sock_len; 317 318 if ((frame = malloc(len)) == NULL) { 319 #ifdef PERFUSE_DEBUG 320 if (perfuse_diagflags & PDF_MISC) 321 DWARN("%s:%d malloc failed", __func__, __LINE__); 322 #endif 323 return -1; 324 } 325 326 pmo = (struct perfuse_mount_out *)(void *)frame; 327 pmo->pmo_len = (uint32_t)len; 328 pmo->pmo_error = 0; 329 pmo->pmo_unique = (uint64_t)-1; 330 (void)strcpy(pmo->pmo_magic, PERFUSE_MOUNT_MAGIC); 331 332 pmo->pmo_source_len = source ? (uint32_t)strlen(source) + 1 : 0; 333 pmo->pmo_target_len = target ? (uint32_t)strlen(target) + 1: 0; 334 pmo->pmo_filesystemtype_len = 335 filesystemtype ? (uint32_t)strlen(filesystemtype) + 1 : 0; 336 pmo->pmo_mountflags = (uint32_t)mountflags; 337 pmo->pmo_data_len = data ? (uint32_t)strlen(data) + 1 : 0; 338 pmo->pmo_sock_len = (uint32_t)sock_len; 339 340 cp = (char *)(void *)(pmo + 1); 341 342 if (source) { 343 (void)strcpy(cp, source); 344 cp += pmo->pmo_source_len; 345 } 346 347 if (target) { 348 (void)strcpy(cp, target); 349 cp += pmo->pmo_target_len; 350 } 351 352 if (filesystemtype) { 353 (void)strcpy(cp, filesystemtype); 354 cp += pmo->pmo_filesystemtype_len; 355 } 356 357 if (data) { 358 (void)strcpy(cp, data); 359 cp += pmo->pmo_data_len; 360 } 361 362 if (sock_len != 0) { 363 (void)strcpy(cp, sun->sun_path); 364 cp += pmo->pmo_sock_len; 365 } 366 367 if (send(s, frame, len, MSG_NOSIGNAL) != (ssize_t)len) { 368 #ifdef PERFUSE_DEBUG 369 DWARN("%s:%d sendto failed", __func__, __LINE__); 370 #endif 371 return -1; 372 } 373 374 return 0; 375 } 376 377 378 uint64_t 379 perfuse_next_unique(pu) 380 struct puffs_usermount *pu; 381 { 382 struct perfuse_state *ps; 383 384 ps = puffs_getspecific(pu); 385 386 return ps->ps_unique++; 387 } 388 389 struct puffs_usermount * 390 perfuse_init(pc, pmi) 391 struct perfuse_callbacks *pc; 392 struct perfuse_mount_info *pmi; 393 { 394 struct perfuse_state *ps; 395 struct puffs_usermount *pu; 396 struct puffs_ops *pops; 397 const char *source = _PATH_PUFFS; 398 char *fstype; 399 unsigned int puffs_flags; 400 struct puffs_node *pn_root; 401 struct puffs_pathobj *po_root; 402 403 /* 404 * perfused needs to remain in memory. If it gets 405 * swapped out, the kernel will deadlock when trying 406 * to free memory backed by the PUFFS filesystem 407 */ 408 mlockall(MCL_CURRENT|MCL_FUTURE); 409 410 ps = init_state(); 411 ps->ps_owner_uid = pmi->pmi_uid; 412 413 if (pmi->pmi_source) { 414 if ((ps->ps_source = strdup(pmi->pmi_source)) == NULL) 415 DERR(EX_OSERR, "%s: strdup failed", __func__); 416 417 source = ps->ps_source; 418 } 419 420 if (pmi->pmi_filesystemtype) { 421 size_t len; 422 423 ps->ps_filesystemtype = strdup(pmi->pmi_filesystemtype); 424 if (ps->ps_filesystemtype == NULL) 425 DERR(EX_OSERR, "%s: strdup failed", __func__); 426 427 len = sizeof("perfuse|") + strlen(ps->ps_filesystemtype) + 1; 428 if ((fstype = malloc(len)) == NULL) 429 DERR(EX_OSERR, "%s: malloc failed", __func__); 430 431 (void)sprintf(fstype, "perfuse|%s", ps->ps_filesystemtype); 432 } else { 433 if ((fstype = strdup("perfuse")) == NULL) 434 DERR(EX_OSERR, "%s: strdup failed", __func__); 435 } 436 437 if ((ps->ps_target = strdup(pmi->pmi_target)) == NULL) 438 DERR(EX_OSERR, "%s: strdup failed", __func__); 439 440 ps->ps_mountflags = pmi->pmi_mountflags; 441 442 /* 443 * Some options are forbidden for non root users 444 */ 445 if (ps->ps_owner_uid != 0) 446 ps->ps_mountflags |= MNT_NOSUID|MNT_NODEV; 447 448 PUFFSOP_INIT(pops); 449 PUFFSOP_SET(pops, perfuse, fs, unmount); 450 PUFFSOP_SET(pops, perfuse, fs, statvfs); 451 PUFFSOP_SET(pops, perfuse, fs, sync); 452 PUFFSOP_SET(pops, perfuse, node, lookup); 453 PUFFSOP_SET(pops, perfuse, node, create); 454 PUFFSOP_SET(pops, perfuse, node, mknod); 455 PUFFSOP_SET(pops, perfuse, node, open); 456 PUFFSOP_SET(pops, perfuse, node, close); 457 PUFFSOP_SET(pops, perfuse, node, access); 458 PUFFSOP_SET(pops, perfuse, node, getattr); 459 PUFFSOP_SET(pops, perfuse, node, setattr); 460 PUFFSOP_SET(pops, perfuse, node, poll); 461 #if 0 462 PUFFSOP_SET(pops, perfuse, node, mmap); 463 #endif 464 PUFFSOP_SET(pops, perfuse, node, fsync); 465 PUFFSOP_SET(pops, perfuse, node, seek); 466 PUFFSOP_SET(pops, perfuse, node, remove); 467 PUFFSOP_SET(pops, perfuse, node, link); 468 PUFFSOP_SET(pops, perfuse, node, rename); 469 PUFFSOP_SET(pops, perfuse, node, mkdir); 470 PUFFSOP_SET(pops, perfuse, node, rmdir); 471 PUFFSOP_SET(pops, perfuse, node, symlink); 472 PUFFSOP_SET(pops, perfuse, node, readdir); 473 PUFFSOP_SET(pops, perfuse, node, readlink); 474 PUFFSOP_SET(pops, perfuse, node, reclaim); 475 PUFFSOP_SET(pops, perfuse, node, inactive); 476 PUFFSOP_SET(pops, perfuse, node, print); 477 PUFFSOP_SET(pops, perfuse, node, advlock); 478 PUFFSOP_SET(pops, perfuse, node, read); 479 PUFFSOP_SET(pops, perfuse, node, write); 480 #ifdef PUFFS_EXTNAMELEN 481 PUFFSOP_SET(pops, perfuse, node, getextattr); 482 PUFFSOP_SET(pops, perfuse, node, setextattr); 483 PUFFSOP_SET(pops, perfuse, node, listextattr); 484 PUFFSOP_SET(pops, perfuse, node, deleteextattr); 485 #endif /* PUFFS_EXTNAMELEN */ 486 487 /* 488 * We used to have PUFFS_KFLAG_WTCACHE here, which uses the 489 * page cache (highly desirable to get mmap(2)), but still sends 490 * all writes to the filesystem. In fact it does not send the 491 * data written, but the pages that contain it. 492 * 493 * There is a nasty bug hidden somewhere, possibly in libpuffs' 494 * VOP_FSYNC, which sends an asynchronous PUFFS_SETATTR that 495 * update file size. When writes are in progress, it will cause 496 * the file to be truncated and we get a zero-filled chunk at the 497 * beginning of a page. Removing PUFFS_KFLAG_WTCACHE fixes that 498 * problem. 499 * 500 * The other consequences are that changes will not be propagated 501 * immediatly to the filesystem, and we get a huge performance gain 502 * because much less requests are sent. A test case for the above 503 * mentioned bug got its execution time slashed by factor 50. 504 */ 505 puffs_flags = 0; 506 507 if (perfuse_diagflags & PDF_PUFFS) 508 puffs_flags |= PUFFS_FLAG_OPDUMP; 509 510 if ((pu = puffs_init(pops, source, fstype, ps, puffs_flags)) == NULL) 511 DERR(EX_OSERR, "%s: puffs_init failed", __func__); 512 513 ps->ps_pu = pu; 514 515 /* 516 * Setup filesystem root 517 */ 518 pn_root = perfuse_new_pn(pu, "", NULL); 519 PERFUSE_NODE_DATA(pn_root)->pnd_ino = FUSE_ROOT_ID; 520 PERFUSE_NODE_DATA(pn_root)->pnd_parent = pn_root; 521 puffs_setroot(pu, pn_root); 522 ps->ps_fsid = pn_root->pn_va.va_fsid; 523 524 po_root = puffs_getrootpathobj(pu); 525 if ((po_root->po_path = strdup("/")) == NULL) 526 DERRX(EX_OSERR, "perfuse_mount_start() failed"); 527 528 po_root->po_len = 1; 529 puffs_path_buildhash(pu, po_root); 530 531 puffs_vattr_null(&pn_root->pn_va); 532 pn_root->pn_va.va_type = VDIR; 533 pn_root->pn_va.va_mode = 0755; 534 535 ps->ps_root = pn_root; 536 537 /* 538 * Callbacks 539 */ 540 ps->ps_new_msg = pc->pc_new_msg; 541 ps->ps_xchg_msg = pc->pc_xchg_msg; 542 ps->ps_destroy_msg = pc->pc_destroy_msg; 543 ps->ps_get_inhdr = pc->pc_get_inhdr; 544 ps->ps_get_inpayload = pc->pc_get_inpayload; 545 ps->ps_get_outhdr = pc->pc_get_outhdr; 546 ps->ps_get_outpayload = pc->pc_get_outpayload; 547 ps->ps_umount = pc->pc_umount; 548 549 return pu; 550 } 551 552 void 553 perfuse_setspecific(pu, priv) 554 struct puffs_usermount *pu; 555 void *priv; 556 { 557 struct perfuse_state *ps; 558 559 ps = puffs_getspecific(pu); 560 ps->ps_private = priv; 561 562 return; 563 } 564 565 void * 566 perfuse_getspecific(pu) 567 struct puffs_usermount *pu; 568 { 569 struct perfuse_state *ps; 570 571 ps = puffs_getspecific(pu); 572 573 return ps->ps_private; 574 } 575 576 int 577 perfuse_inloop(pu) 578 struct puffs_usermount *pu; 579 { 580 struct perfuse_state *ps; 581 582 ps = puffs_getspecific(pu); 583 584 return ps->ps_flags & PS_INLOOP; 585 } 586 587 int 588 perfuse_mainloop(pu) 589 struct puffs_usermount *pu; 590 { 591 struct perfuse_state *ps; 592 593 ps = puffs_getspecific(pu); 594 595 ps->ps_flags |= PS_INLOOP; 596 if (puffs_mainloop(ps->ps_pu) != 0) { 597 DERR(EX_OSERR, "%s: failed", __func__); 598 return -1; 599 } 600 601 /* 602 * Normal exit after unmount 603 */ 604 return 0; 605 } 606 607 /* ARGSUSED0 */ 608 uint64_t 609 perfuse_get_ino(pu, opc) 610 struct puffs_usermount *pu; 611 puffs_cookie_t opc; 612 { 613 return PERFUSE_NODE_DATA(opc)->pnd_ino; 614 } 615 616 int 617 perfuse_unmount(pu) 618 struct puffs_usermount *pu; 619 { 620 struct perfuse_state *ps; 621 622 ps = puffs_getspecific(pu); 623 624 return unmount(ps->ps_target, MNT_FORCE); 625 } 626