1 /* $NetBSD: perfuse.c,v 1.25 2012/02/03 15:54:15 manu Exp $ */ 2 3 /*- 4 * Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 16 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 17 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <stdio.h> 29 #include <unistd.h> 30 #include <stdlib.h> 31 #include <fcntl.h> 32 #include <string.h> 33 #include <errno.h> 34 #include <puffs.h> 35 #include <sys/types.h> 36 #include <sys/mman.h> 37 #include <sys/resource.h> 38 #include <sys/socket.h> 39 #include <sys/extattr.h> 40 #include <sys/un.h> 41 #include <machine/vmparam.h> 42 43 #define LIBPERFUSE 44 #include "perfuse.h" 45 #include "perfuse_if.h" 46 #include "perfuse_priv.h" 47 48 int perfuse_diagflags = 0; /* global used only in DPRINTF/DERR/DWARN */ 49 extern char **environ; 50 51 static struct perfuse_state *init_state(void); 52 static int get_fd(const char *); 53 54 55 static struct perfuse_state * 56 init_state(void) 57 { 58 struct perfuse_state *ps; 59 char opts[1024]; 60 61 if ((ps = malloc(sizeof(*ps))) == NULL) 62 DERR(EX_OSERR, "%s: malloc failed", __func__); 63 64 (void)memset(ps, 0, sizeof(*ps)); 65 ps->ps_max_write = UINT_MAX; 66 ps->ps_max_readahead = UINT_MAX; 67 TAILQ_INIT(&ps->ps_trace); 68 69 /* 70 * Most of the time, access() is broken because the filesystem 71 * performs the check with root privileges. glusterfs will do that 72 * if the Linux-specific setfsuid() is missing, for instance. 73 */ 74 ps->ps_flags |= PS_NO_ACCESS; 75 76 /* 77 * This is a temporary way to toggle access and creat usage. 78 * It would be nice if that could be provided as mount options, 79 * but that will not be obvious to do. 80 */ 81 if (getenv_r("PERFUSE_OPTIONS", opts, sizeof(opts)) != -1) { 82 char *optname; 83 char *last; 84 85 for ((optname = strtok_r(opts, ",", &last)); 86 optname != NULL; 87 (optname = strtok_r(NULL, ",", &last))) { 88 if (strcmp(optname, "enable_access") == 0) 89 ps->ps_flags &= ~PS_NO_ACCESS; 90 91 if (strcmp(optname, "disable_access") == 0) 92 ps->ps_flags |= PS_NO_ACCESS; 93 94 if (strcmp(optname, "enable_creat") == 0) 95 ps->ps_flags &= ~PS_NO_CREAT; 96 97 if (strcmp(optname, "disable_creat") == 0) 98 ps->ps_flags |= PS_NO_CREAT; 99 } 100 } 101 102 103 return ps; 104 } 105 106 107 static int 108 get_fd(data) 109 const char *data; 110 { 111 char *string; 112 const char fdopt[] = "fd="; 113 char *lastp; 114 char *opt; 115 int fd = -1; 116 117 if ((string = strdup(data)) == NULL) 118 return -1; 119 120 for (opt = strtok_r(string, ",", &lastp); 121 opt != NULL; 122 opt = strtok_r(NULL, ",", &lastp)) { 123 if (strncmp(opt, fdopt, strlen(fdopt)) == 0) { 124 fd = atoi(opt + strlen(fdopt)); 125 break; 126 } 127 } 128 129 /* 130 * No file descriptor found 131 */ 132 if (fd == -1) 133 errno = EINVAL; 134 135 free(string); 136 return fd; 137 138 } 139 140 int 141 perfuse_open(path, flags, mode) 142 const char *path; 143 int flags; 144 mode_t mode; 145 { 146 int sv[2]; 147 struct sockaddr_un sun; 148 struct sockaddr *sa; 149 char progname[] = _PATH_PERFUSED; 150 char minus_i[] = "-i"; 151 char fdstr[16]; 152 char *const argv[] = { progname, minus_i, fdstr, NULL}; 153 uint32_t opt; 154 uint32_t optlen; 155 int sock_type = SOCK_SEQPACKET; 156 157 if (strcmp(path, _PATH_FUSE) != 0) 158 return open(path, flags, mode); 159 160 /* 161 * Try SOCK_SEQPACKET then SOCK_DGRAM if unavailable 162 */ 163 if ((sv[0] = socket(PF_LOCAL, SOCK_SEQPACKET, 0)) == -1) { 164 sock_type = SOCK_DGRAM; 165 DWARNX("SEQPACKET local sockets unavailable, using less " 166 "reliable DGRAM sockets. Expect file operation hangs."); 167 168 if ((sv[0] = socket(PF_LOCAL, SOCK_DGRAM, 0)) == -1) { 169 #ifdef PERFUSE_DEBUG 170 DWARN("%s: %d socket failed", __func__, __LINE__); 171 #endif 172 return -1; 173 } 174 } 175 176 /* 177 * Set a buffer lentgh large enough so that any FUSE packet 178 * will fit. 179 */ 180 opt = (uint32_t)FUSE_BUFSIZE; 181 optlen = sizeof(opt); 182 if (setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0) 183 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt); 184 185 if (setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0) 186 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt); 187 188 sa = (struct sockaddr *)(void *)&sun; 189 sun.sun_len = sizeof(sun); 190 sun.sun_family = AF_LOCAL; 191 (void)strcpy(sun.sun_path, path); 192 193 if (connect(sv[0], sa, (socklen_t)sun.sun_len) == 0) 194 return sv[0]; 195 196 /* 197 * Attempt to run perfused on our own 198 * if it does not run yet; In that case 199 * we will talk using a socketpair 200 * instead of /dev/fuse. 201 */ 202 if (socketpair(PF_LOCAL, sock_type, 0, sv) != 0) { 203 DWARN("%s:%d: socketpair failed", __func__, __LINE__); 204 return -1; 205 } 206 207 /* 208 * Set a buffer lentgh large enough so that any FUSE packet 209 * will fit. 210 */ 211 opt = (uint32_t)(4 * FUSE_BUFSIZE); 212 optlen = sizeof(opt); 213 if (setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0) 214 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt); 215 216 if (setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0) 217 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt); 218 219 if (setsockopt(sv[1], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0) 220 DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt); 221 222 if (setsockopt(sv[1], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0) 223 DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt); 224 225 /* 226 * Request peer credentials. This musr be done before first 227 * frame is sent. 228 */ 229 opt = 1; 230 optlen = sizeof(opt); 231 if (setsockopt(sv[1], 0, LOCAL_CREDS, &opt, optlen) != 0) 232 DWARN("%s: setsockopt LOCAL_CREDS failed", __func__); 233 234 (void)sprintf(fdstr, "%d", sv[1]); 235 236 switch(fork()) { 237 case -1: 238 #ifdef PERFUSE_DEBUG 239 DWARN("%s:%d: fork failed", __func__, __LINE__); 240 #endif 241 return -1; 242 /* NOTREACHED */ 243 break; 244 case 0: 245 (void)close(sv[0]); 246 (void)execve(argv[0], argv, environ); 247 #ifdef PERFUSE_DEBUG 248 DWARN("%s:%d: execve failed", __func__, __LINE__); 249 #endif 250 return -1; 251 /* NOTREACHED */ 252 break; 253 default: 254 break; 255 } 256 257 (void)close(sv[1]); 258 return sv[0]; 259 } 260 261 int 262 perfuse_mount(source, target, filesystemtype, mountflags, data) 263 const char *source; 264 const char *target; 265 const char *filesystemtype; 266 long mountflags; 267 const void *data; 268 { 269 int s; 270 size_t len; 271 struct perfuse_mount_out *pmo; 272 struct sockaddr_storage ss; 273 struct sockaddr_un *sun; 274 struct sockaddr *sa; 275 socklen_t sa_len; 276 size_t sock_len; 277 char *frame; 278 char *cp; 279 280 #ifdef PERFUSE_DEBUG 281 if (perfuse_diagflags & PDF_MISC) 282 DPRINTF("%s(\"%s\", \"%s\", \"%s\", 0x%lx, \"%s\")\n", 283 __func__, source, target, filesystemtype, 284 mountflags, (const char *)data); 285 #endif 286 287 if ((s = get_fd(data)) == -1) 288 return -1; 289 290 /* 291 * If we are connected to /dev/fuse, we need a second 292 * socket to get replies from perfused. 293 * XXX This socket is not removed at exit time yet 294 */ 295 sock_len = 0; 296 sa = (struct sockaddr *)(void *)&ss; 297 sun = (struct sockaddr_un *)(void *)&ss; 298 sa_len = sizeof(ss); 299 if ((getpeername(s, sa, &sa_len) == 0) && 300 (sa->sa_family = AF_LOCAL) && 301 (strcmp(sun->sun_path, _PATH_FUSE) == 0)) { 302 303 sun->sun_len = sizeof(*sun); 304 sun->sun_family = AF_LOCAL; 305 (void)sprintf(sun->sun_path, "%s/%s-%d", 306 _PATH_TMP, getprogname(), getpid()); 307 308 if (bind(s, sa, (socklen_t)sa->sa_len) != 0) 309 DERR(EX_OSERR, "%s:%d bind to \"%s\" failed", 310 __func__, __LINE__, sun->sun_path); 311 312 sock_len = strlen(sun->sun_path) + 1; 313 } 314 315 len = sizeof(*pmo); 316 len += source ? (uint32_t)strlen(source) + 1 : 0; 317 len += target ? (uint32_t)strlen(target) + 1 : 0; 318 len += filesystemtype ? (uint32_t)strlen(filesystemtype) + 1 : 0; 319 len += data ? (uint32_t)strlen(data) + 1 : 0; 320 len += sock_len; 321 322 if ((frame = malloc(len)) == NULL) { 323 #ifdef PERFUSE_DEBUG 324 if (perfuse_diagflags & PDF_MISC) 325 DWARN("%s:%d malloc failed", __func__, __LINE__); 326 #endif 327 return -1; 328 } 329 330 pmo = (struct perfuse_mount_out *)(void *)frame; 331 pmo->pmo_len = (uint32_t)len; 332 pmo->pmo_error = 0; 333 pmo->pmo_unique = (uint64_t)-1; 334 (void)strcpy(pmo->pmo_magic, PERFUSE_MOUNT_MAGIC); 335 336 pmo->pmo_source_len = source ? (uint32_t)strlen(source) + 1 : 0; 337 pmo->pmo_target_len = target ? (uint32_t)strlen(target) + 1: 0; 338 pmo->pmo_filesystemtype_len = 339 filesystemtype ? (uint32_t)strlen(filesystemtype) + 1 : 0; 340 pmo->pmo_mountflags = (uint32_t)mountflags; 341 pmo->pmo_data_len = data ? (uint32_t)strlen(data) + 1 : 0; 342 pmo->pmo_sock_len = (uint32_t)sock_len; 343 344 cp = (char *)(void *)(pmo + 1); 345 346 if (source) { 347 (void)strcpy(cp, source); 348 cp += pmo->pmo_source_len; 349 } 350 351 if (target) { 352 (void)strcpy(cp, target); 353 cp += pmo->pmo_target_len; 354 } 355 356 if (filesystemtype) { 357 (void)strcpy(cp, filesystemtype); 358 cp += pmo->pmo_filesystemtype_len; 359 } 360 361 if (data) { 362 (void)strcpy(cp, data); 363 cp += pmo->pmo_data_len; 364 } 365 366 if (sock_len != 0) { 367 (void)strcpy(cp, sun->sun_path); 368 cp += pmo->pmo_sock_len; 369 } 370 371 if (send(s, frame, len, MSG_NOSIGNAL) != (ssize_t)len) { 372 #ifdef PERFUSE_DEBUG 373 DWARN("%s:%d sendto failed", __func__, __LINE__); 374 #endif 375 return -1; 376 } 377 378 return 0; 379 } 380 381 382 uint64_t 383 perfuse_next_unique(pu) 384 struct puffs_usermount *pu; 385 { 386 struct perfuse_state *ps; 387 388 ps = puffs_getspecific(pu); 389 390 return ps->ps_unique++; 391 } 392 393 struct puffs_usermount * 394 perfuse_init(pc, pmi) 395 struct perfuse_callbacks *pc; 396 struct perfuse_mount_info *pmi; 397 { 398 struct perfuse_state *ps; 399 struct puffs_usermount *pu; 400 struct puffs_ops *pops; 401 const char *source = _PATH_PUFFS; 402 char *fstype; 403 unsigned int puffs_flags; 404 struct puffs_node *pn_root; 405 struct puffs_pathobj *po_root; 406 struct rlimit rl; 407 408 /* 409 * perfused can grow quite large, let assume there's enough ram ... 410 */ 411 if (getrlimit(RLIMIT_DATA, &rl) < 0) { 412 DERR(EX_OSERR, "%s: getrlimit failed: %s", __func__, 413 strerror(errno)); 414 } else { 415 rl.rlim_cur = rl.rlim_max; 416 if (setrlimit(RLIMIT_DATA, &rl) < 0) { 417 DERR(EX_OSERR, "%s: setrlimit failed: %s", __func__, 418 strerror(errno)); 419 } 420 } 421 422 423 ps = init_state(); 424 ps->ps_owner_uid = pmi->pmi_uid; 425 426 if (pmi->pmi_source) { 427 if ((ps->ps_source = strdup(pmi->pmi_source)) == NULL) 428 DERR(EX_OSERR, "%s: strdup failed", __func__); 429 430 source = ps->ps_source; 431 } 432 433 if (pmi->pmi_filesystemtype) { 434 size_t len; 435 436 ps->ps_filesystemtype = strdup(pmi->pmi_filesystemtype); 437 if (ps->ps_filesystemtype == NULL) 438 DERR(EX_OSERR, "%s: strdup failed", __func__); 439 440 len = sizeof("perfuse|") + strlen(ps->ps_filesystemtype) + 1; 441 if ((fstype = malloc(len)) == NULL) 442 DERR(EX_OSERR, "%s: malloc failed", __func__); 443 444 (void)sprintf(fstype, "perfuse|%s", ps->ps_filesystemtype); 445 } else { 446 if ((fstype = strdup("perfuse")) == NULL) 447 DERR(EX_OSERR, "%s: strdup failed", __func__); 448 } 449 450 if ((ps->ps_target = strdup(pmi->pmi_target)) == NULL) 451 DERR(EX_OSERR, "%s: strdup failed", __func__); 452 453 ps->ps_mountflags = pmi->pmi_mountflags; 454 455 /* 456 * Some options are forbidden for non root users 457 */ 458 if (ps->ps_owner_uid != 0) 459 ps->ps_mountflags |= MNT_NOSUID|MNT_NODEV; 460 461 PUFFSOP_INIT(pops); 462 PUFFSOP_SET(pops, perfuse, fs, unmount); 463 PUFFSOP_SET(pops, perfuse, fs, statvfs); 464 PUFFSOP_SET(pops, perfuse, fs, sync); 465 PUFFSOP_SET(pops, perfuse, node, lookup); 466 PUFFSOP_SET(pops, perfuse, node, create); 467 PUFFSOP_SET(pops, perfuse, node, mknod); 468 PUFFSOP_SET(pops, perfuse, node, open); 469 PUFFSOP_SET(pops, perfuse, node, close); 470 PUFFSOP_SET(pops, perfuse, node, access); 471 PUFFSOP_SET(pops, perfuse, node, getattr); 472 PUFFSOP_SET(pops, perfuse, node, setattr); 473 PUFFSOP_SET(pops, perfuse, node, poll); 474 #if 0 475 PUFFSOP_SET(pops, perfuse, node, mmap); 476 #endif 477 PUFFSOP_SET(pops, perfuse, node, fsync); 478 PUFFSOP_SET(pops, perfuse, node, seek); 479 PUFFSOP_SET(pops, perfuse, node, remove); 480 PUFFSOP_SET(pops, perfuse, node, link); 481 PUFFSOP_SET(pops, perfuse, node, rename); 482 PUFFSOP_SET(pops, perfuse, node, mkdir); 483 PUFFSOP_SET(pops, perfuse, node, rmdir); 484 PUFFSOP_SET(pops, perfuse, node, symlink); 485 PUFFSOP_SET(pops, perfuse, node, readdir); 486 PUFFSOP_SET(pops, perfuse, node, readlink); 487 PUFFSOP_SET(pops, perfuse, node, reclaim); 488 PUFFSOP_SET(pops, perfuse, node, inactive); 489 PUFFSOP_SET(pops, perfuse, node, print); 490 PUFFSOP_SET(pops, perfuse, node, advlock); 491 PUFFSOP_SET(pops, perfuse, node, read); 492 PUFFSOP_SET(pops, perfuse, node, write); 493 #ifdef PUFFS_EXTNAMELEN 494 PUFFSOP_SET(pops, perfuse, node, getextattr); 495 PUFFSOP_SET(pops, perfuse, node, setextattr); 496 PUFFSOP_SET(pops, perfuse, node, listextattr); 497 PUFFSOP_SET(pops, perfuse, node, deleteextattr); 498 #endif /* PUFFS_EXTNAMELEN */ 499 500 /* 501 * We used to have PUFFS_KFLAG_WTCACHE here, which uses the 502 * page cache (highly desirable to get mmap(2)), but still sends 503 * all writes to the filesystem. In fact it does not send the 504 * data written, but the pages that contain it. 505 * 506 * There is a nasty bug hidden somewhere, possibly in libpuffs' 507 * VOP_FSYNC, which sends an asynchronous PUFFS_SETATTR that 508 * update file size. When writes are in progress, it will cause 509 * the file to be truncated and we get a zero-filled chunk at the 510 * beginning of a page. Removing PUFFS_KFLAG_WTCACHE fixes that 511 * problem. 512 * 513 * The other consequences are that changes will not be propagated 514 * immediatly to the filesystem, and we get a huge performance gain 515 * because much less requests are sent. A test case for the above 516 * mentioned bug got its execution time slashed by factor 50. 517 * 518 * PUFFS_KFLAG_NOCACHE_NAME is required so that we can see changes 519 * done by other machines in networked filesystems. 520 */ 521 puffs_flags = PUFFS_KFLAG_NOCACHE_NAME; 522 523 if (perfuse_diagflags & PDF_PUFFS) 524 puffs_flags |= PUFFS_FLAG_OPDUMP; 525 526 if ((pu = puffs_init(pops, source, fstype, ps, puffs_flags)) == NULL) 527 DERR(EX_OSERR, "%s: puffs_init failed", __func__); 528 529 ps->ps_pu = pu; 530 531 /* 532 * Setup filesystem root 533 */ 534 pn_root = perfuse_new_pn(pu, "", NULL); 535 PERFUSE_NODE_DATA(pn_root)->pnd_nodeid = FUSE_ROOT_ID; 536 PERFUSE_NODE_DATA(pn_root)->pnd_parent = pn_root; 537 puffs_setroot(pu, pn_root); 538 ps->ps_fsid = pn_root->pn_va.va_fsid; 539 540 po_root = puffs_getrootpathobj(pu); 541 if ((po_root->po_path = strdup("/")) == NULL) 542 DERRX(EX_OSERR, "perfuse_mount_start() failed"); 543 544 po_root->po_len = 1; 545 puffs_path_buildhash(pu, po_root); 546 547 puffs_vattr_null(&pn_root->pn_va); 548 pn_root->pn_va.va_type = VDIR; 549 pn_root->pn_va.va_mode = 0755; 550 pn_root->pn_va.va_fileid = FUSE_ROOT_ID; 551 552 ps->ps_root = pn_root; 553 554 /* 555 * Callbacks 556 */ 557 ps->ps_new_msg = pc->pc_new_msg; 558 ps->ps_xchg_msg = pc->pc_xchg_msg; 559 ps->ps_destroy_msg = pc->pc_destroy_msg; 560 ps->ps_get_inhdr = pc->pc_get_inhdr; 561 ps->ps_get_inpayload = pc->pc_get_inpayload; 562 ps->ps_get_outhdr = pc->pc_get_outhdr; 563 ps->ps_get_outpayload = pc->pc_get_outpayload; 564 ps->ps_umount = pc->pc_umount; 565 566 return pu; 567 } 568 569 void 570 perfuse_setspecific(pu, priv) 571 struct puffs_usermount *pu; 572 void *priv; 573 { 574 struct perfuse_state *ps; 575 576 ps = puffs_getspecific(pu); 577 ps->ps_private = priv; 578 579 return; 580 } 581 582 void * 583 perfuse_getspecific(pu) 584 struct puffs_usermount *pu; 585 { 586 struct perfuse_state *ps; 587 588 ps = puffs_getspecific(pu); 589 590 return ps->ps_private; 591 } 592 593 int 594 perfuse_inloop(pu) 595 struct puffs_usermount *pu; 596 { 597 struct perfuse_state *ps; 598 599 ps = puffs_getspecific(pu); 600 601 return ps->ps_flags & PS_INLOOP; 602 } 603 604 int 605 perfuse_mainloop(pu) 606 struct puffs_usermount *pu; 607 { 608 struct perfuse_state *ps; 609 610 ps = puffs_getspecific(pu); 611 612 ps->ps_flags |= PS_INLOOP; 613 if (puffs_mainloop(ps->ps_pu) != 0) { 614 DERR(EX_OSERR, "%s: failed", __func__); 615 return -1; 616 } 617 618 /* 619 * Normal exit after unmount 620 */ 621 return 0; 622 } 623 624 /* ARGSUSED0 */ 625 uint64_t 626 perfuse_get_nodeid(pu, opc) 627 struct puffs_usermount *pu; 628 puffs_cookie_t opc; 629 { 630 return PERFUSE_NODE_DATA(opc)->pnd_nodeid; 631 } 632 633 int 634 perfuse_unmount(pu) 635 struct puffs_usermount *pu; 636 { 637 struct perfuse_state *ps; 638 639 ps = puffs_getspecific(pu); 640 641 return unmount(ps->ps_target, MNT_FORCE); 642 } 643