1 /* $OpenBSD: sys_pipe.c,v 1.133 2021/12/13 14:56:55 visa Exp $ */ 2 3 /* 4 * Copyright (c) 1996 John S. Dyson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Absolutely no warranty of function or purpose is made by the author 17 * John S. Dyson. 18 * 4. Modifications may be freely made to this file if the above conditions 19 * are met. 20 */ 21 22 /* 23 * This file contains a high-performance replacement for the socket-based 24 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 25 * all features of sockets, but does do everything that pipes normally 26 * do. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/proc.h> 32 #include <sys/fcntl.h> 33 #include <sys/file.h> 34 #include <sys/filedesc.h> 35 #include <sys/pool.h> 36 #include <sys/ioctl.h> 37 #include <sys/stat.h> 38 #include <sys/signalvar.h> 39 #include <sys/mount.h> 40 #include <sys/syscallargs.h> 41 #include <sys/event.h> 42 #include <sys/lock.h> 43 #include <sys/poll.h> 44 #ifdef KTRACE 45 #include <sys/ktrace.h> 46 #endif 47 48 #include <uvm/uvm_extern.h> 49 50 #include <sys/pipe.h> 51 52 struct pipe_pair { 53 struct pipe pp_wpipe; 54 struct pipe pp_rpipe; 55 struct rwlock pp_lock; 56 }; 57 58 /* 59 * interfaces to the outside world 60 */ 61 int pipe_read(struct file *, struct uio *, int); 62 int pipe_write(struct file *, struct uio *, int); 63 int pipe_close(struct file *, struct proc *); 64 int pipe_poll(struct file *, int events, struct proc *); 65 int pipe_kqfilter(struct file *fp, struct knote *kn); 66 int pipe_ioctl(struct file *, u_long, caddr_t, struct proc *); 67 int pipe_stat(struct file *fp, struct stat *ub, struct proc *p); 68 69 static const struct fileops pipeops = { 70 .fo_read = pipe_read, 71 .fo_write = pipe_write, 72 .fo_ioctl = pipe_ioctl, 73 .fo_poll = pipe_poll, 74 .fo_kqfilter = pipe_kqfilter, 75 .fo_stat = pipe_stat, 76 .fo_close = pipe_close 77 }; 78 79 void filt_pipedetach(struct knote *kn); 80 int filt_piperead(struct knote *kn, long hint); 81 int filt_pipereadmodify(struct kevent *kev, struct knote *kn); 82 int filt_pipereadprocess(struct knote *kn, struct kevent *kev); 83 int filt_piperead_common(struct knote *kn, struct pipe *rpipe); 84 int filt_pipewrite(struct knote *kn, long hint); 85 int filt_pipewritemodify(struct kevent *kev, struct knote *kn); 86 int filt_pipewriteprocess(struct knote *kn, struct kevent *kev); 87 int filt_pipewrite_common(struct knote *kn, struct pipe *rpipe); 88 int filt_pipeexcept(struct knote *kn, long hint); 89 int filt_pipeexceptmodify(struct kevent *kev, struct knote *kn); 90 int filt_pipeexceptprocess(struct knote *kn, struct kevent *kev); 91 int filt_pipeexcept_common(struct knote *kn, struct pipe *rpipe); 92 93 const struct filterops pipe_rfiltops = { 94 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 95 .f_attach = NULL, 96 .f_detach = filt_pipedetach, 97 .f_event = filt_piperead, 98 .f_modify = filt_pipereadmodify, 99 .f_process = filt_pipereadprocess, 100 }; 101 102 const struct filterops pipe_wfiltops = { 103 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 104 .f_attach = NULL, 105 .f_detach = filt_pipedetach, 106 .f_event = filt_pipewrite, 107 .f_modify = filt_pipewritemodify, 108 .f_process = filt_pipewriteprocess, 109 }; 110 111 const struct filterops pipe_efiltops = { 112 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 113 .f_attach = NULL, 114 .f_detach = filt_pipedetach, 115 .f_event = filt_pipeexcept, 116 .f_modify = filt_pipeexceptmodify, 117 .f_process = filt_pipeexceptprocess, 118 }; 119 120 /* 121 * Default pipe buffer size(s), this can be kind-of large now because pipe 122 * space is pageable. The pipe code will try to maintain locality of 123 * reference for performance reasons, so small amounts of outstanding I/O 124 * will not wipe the cache. 125 */ 126 #define MINPIPESIZE (PIPE_SIZE/3) 127 128 /* 129 * Limit the number of "big" pipes 130 */ 131 #define LIMITBIGPIPES 32 132 unsigned int nbigpipe; 133 static unsigned int amountpipekva; 134 135 struct pool pipe_pair_pool; 136 137 int dopipe(struct proc *, int *, int); 138 void pipeselwakeup(struct pipe *); 139 140 int pipe_create(struct pipe *); 141 void pipe_destroy(struct pipe *); 142 int pipe_rundown(struct pipe *); 143 struct pipe *pipe_peer(struct pipe *); 144 int pipe_buffer_realloc(struct pipe *, u_int); 145 void pipe_buffer_free(struct pipe *); 146 147 int pipe_iolock(struct pipe *); 148 void pipe_iounlock(struct pipe *); 149 int pipe_iosleep(struct pipe *, const char *); 150 151 struct pipe_pair *pipe_pair_create(void); 152 void pipe_pair_destroy(struct pipe_pair *); 153 154 /* 155 * The pipe system call for the DTYPE_PIPE type of pipes 156 */ 157 158 int 159 sys_pipe(struct proc *p, void *v, register_t *retval) 160 { 161 struct sys_pipe_args /* { 162 syscallarg(int *) fdp; 163 } */ *uap = v; 164 165 return (dopipe(p, SCARG(uap, fdp), 0)); 166 } 167 168 int 169 sys_pipe2(struct proc *p, void *v, register_t *retval) 170 { 171 struct sys_pipe2_args /* { 172 syscallarg(int *) fdp; 173 syscallarg(int) flags; 174 } */ *uap = v; 175 176 if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK)) 177 return (EINVAL); 178 179 return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags))); 180 } 181 182 int 183 dopipe(struct proc *p, int *ufds, int flags) 184 { 185 struct filedesc *fdp = p->p_fd; 186 struct file *rf, *wf; 187 struct pipe_pair *pp; 188 struct pipe *rpipe, *wpipe = NULL; 189 int fds[2], cloexec, error; 190 191 cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; 192 193 pp = pipe_pair_create(); 194 if (pp == NULL) 195 return (ENOMEM); 196 wpipe = &pp->pp_wpipe; 197 rpipe = &pp->pp_rpipe; 198 199 fdplock(fdp); 200 201 error = falloc(p, &rf, &fds[0]); 202 if (error != 0) 203 goto free2; 204 rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 205 rf->f_type = DTYPE_PIPE; 206 rf->f_data = rpipe; 207 rf->f_ops = &pipeops; 208 209 error = falloc(p, &wf, &fds[1]); 210 if (error != 0) 211 goto free3; 212 wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 213 wf->f_type = DTYPE_PIPE; 214 wf->f_data = wpipe; 215 wf->f_ops = &pipeops; 216 217 fdinsert(fdp, fds[0], cloexec, rf); 218 fdinsert(fdp, fds[1], cloexec, wf); 219 220 error = copyout(fds, ufds, sizeof(fds)); 221 if (error == 0) { 222 fdpunlock(fdp); 223 #ifdef KTRACE 224 if (KTRPOINT(p, KTR_STRUCT)) 225 ktrfds(p, fds, 2); 226 #endif 227 } else { 228 /* fdrelease() unlocks fdp. */ 229 fdrelease(p, fds[0]); 230 fdplock(fdp); 231 fdrelease(p, fds[1]); 232 } 233 234 FRELE(rf, p); 235 FRELE(wf, p); 236 return (error); 237 238 free3: 239 fdremove(fdp, fds[0]); 240 closef(rf, p); 241 rpipe = NULL; 242 free2: 243 fdpunlock(fdp); 244 pipe_destroy(wpipe); 245 pipe_destroy(rpipe); 246 return (error); 247 } 248 249 /* 250 * Allocate kva for pipe circular buffer, the space is pageable. 251 * This routine will 'realloc' the size of a pipe safely, if it fails 252 * it will retain the old buffer. 253 * If it fails it will return ENOMEM. 254 */ 255 int 256 pipe_buffer_realloc(struct pipe *cpipe, u_int size) 257 { 258 caddr_t buffer; 259 260 /* buffer uninitialized or pipe locked */ 261 KASSERT((cpipe->pipe_buffer.buffer == NULL) || 262 (cpipe->pipe_state & PIPE_LOCK)); 263 264 /* buffer should be empty */ 265 KASSERT(cpipe->pipe_buffer.cnt == 0); 266 267 KERNEL_LOCK(); 268 buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok); 269 KERNEL_UNLOCK(); 270 if (buffer == NULL) 271 return (ENOMEM); 272 273 /* free old resources if we are resizing */ 274 pipe_buffer_free(cpipe); 275 276 cpipe->pipe_buffer.buffer = buffer; 277 cpipe->pipe_buffer.size = size; 278 cpipe->pipe_buffer.in = 0; 279 cpipe->pipe_buffer.out = 0; 280 281 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 282 283 return (0); 284 } 285 286 /* 287 * initialize and allocate VM and memory for pipe 288 */ 289 int 290 pipe_create(struct pipe *cpipe) 291 { 292 int error; 293 294 error = pipe_buffer_realloc(cpipe, PIPE_SIZE); 295 if (error != 0) 296 return (error); 297 298 sigio_init(&cpipe->pipe_sigio); 299 300 getnanotime(&cpipe->pipe_ctime); 301 cpipe->pipe_atime = cpipe->pipe_ctime; 302 cpipe->pipe_mtime = cpipe->pipe_ctime; 303 304 return (0); 305 } 306 307 struct pipe * 308 pipe_peer(struct pipe *cpipe) 309 { 310 struct pipe *peer; 311 312 rw_assert_anylock(cpipe->pipe_lock); 313 314 peer = cpipe->pipe_peer; 315 if (peer == NULL || (peer->pipe_state & PIPE_EOF)) 316 return (NULL); 317 return (peer); 318 } 319 320 /* 321 * Lock a pipe for exclusive I/O access. 322 */ 323 int 324 pipe_iolock(struct pipe *cpipe) 325 { 326 int error; 327 328 rw_assert_wrlock(cpipe->pipe_lock); 329 330 while (cpipe->pipe_state & PIPE_LOCK) { 331 cpipe->pipe_state |= PIPE_LWANT; 332 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, 333 "pipeiolk", INFSLP); 334 if (error) 335 return (error); 336 } 337 cpipe->pipe_state |= PIPE_LOCK; 338 return (0); 339 } 340 341 /* 342 * Unlock a pipe I/O lock. 343 */ 344 void 345 pipe_iounlock(struct pipe *cpipe) 346 { 347 rw_assert_wrlock(cpipe->pipe_lock); 348 KASSERT(cpipe->pipe_state & PIPE_LOCK); 349 350 cpipe->pipe_state &= ~PIPE_LOCK; 351 if (cpipe->pipe_state & PIPE_LWANT) { 352 cpipe->pipe_state &= ~PIPE_LWANT; 353 wakeup(cpipe); 354 } 355 } 356 357 /* 358 * Unlock the pipe I/O lock and go to sleep. Returns 0 on success and the I/O 359 * lock is relocked. Otherwise if a signal was caught, non-zero is returned and 360 * the I/O lock is not locked. 361 * 362 * Any caller must obtain a reference to the pipe by incrementing `pipe_busy' 363 * before calling this function in order ensure that the same pipe is not 364 * destroyed while sleeping. 365 */ 366 int 367 pipe_iosleep(struct pipe *cpipe, const char *wmesg) 368 { 369 int error; 370 371 pipe_iounlock(cpipe); 372 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, wmesg, 373 INFSLP); 374 if (error) 375 return (error); 376 return (pipe_iolock(cpipe)); 377 } 378 379 void 380 pipeselwakeup(struct pipe *cpipe) 381 { 382 rw_assert_wrlock(cpipe->pipe_lock); 383 384 if (cpipe->pipe_state & PIPE_SEL) { 385 cpipe->pipe_state &= ~PIPE_SEL; 386 selwakeup(&cpipe->pipe_sel); 387 } else { 388 KNOTE(&cpipe->pipe_sel.si_note, 0); 389 } 390 391 if (cpipe->pipe_state & PIPE_ASYNC) 392 pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 393 } 394 395 int 396 pipe_read(struct file *fp, struct uio *uio, int fflags) 397 { 398 struct pipe *rpipe = fp->f_data; 399 size_t nread = 0, size; 400 int error; 401 402 rw_enter_write(rpipe->pipe_lock); 403 ++rpipe->pipe_busy; 404 error = pipe_iolock(rpipe); 405 if (error) { 406 --rpipe->pipe_busy; 407 pipe_rundown(rpipe); 408 rw_exit_write(rpipe->pipe_lock); 409 return (error); 410 } 411 412 while (uio->uio_resid) { 413 /* Normal pipe buffer receive. */ 414 if (rpipe->pipe_buffer.cnt > 0) { 415 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 416 if (size > rpipe->pipe_buffer.cnt) 417 size = rpipe->pipe_buffer.cnt; 418 if (size > uio->uio_resid) 419 size = uio->uio_resid; 420 rw_exit_write(rpipe->pipe_lock); 421 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 422 size, uio); 423 rw_enter_write(rpipe->pipe_lock); 424 if (error) { 425 break; 426 } 427 rpipe->pipe_buffer.out += size; 428 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 429 rpipe->pipe_buffer.out = 0; 430 431 rpipe->pipe_buffer.cnt -= size; 432 /* 433 * If there is no more to read in the pipe, reset 434 * its pointers to the beginning. This improves 435 * cache hit stats. 436 */ 437 if (rpipe->pipe_buffer.cnt == 0) { 438 rpipe->pipe_buffer.in = 0; 439 rpipe->pipe_buffer.out = 0; 440 } 441 nread += size; 442 } else { 443 /* 444 * detect EOF condition 445 * read returns 0 on EOF, no need to set error 446 */ 447 if (rpipe->pipe_state & PIPE_EOF) 448 break; 449 450 /* If the "write-side" has been blocked, wake it up. */ 451 if (rpipe->pipe_state & PIPE_WANTW) { 452 rpipe->pipe_state &= ~PIPE_WANTW; 453 wakeup(rpipe); 454 } 455 456 /* Break if some data was read. */ 457 if (nread > 0) 458 break; 459 460 /* Handle non-blocking mode operation. */ 461 if (fp->f_flag & FNONBLOCK) { 462 error = EAGAIN; 463 break; 464 } 465 466 /* Wait for more data. */ 467 rpipe->pipe_state |= PIPE_WANTR; 468 error = pipe_iosleep(rpipe, "piperd"); 469 if (error) 470 goto unlocked_error; 471 } 472 } 473 pipe_iounlock(rpipe); 474 475 if (error == 0) 476 getnanotime(&rpipe->pipe_atime); 477 unlocked_error: 478 --rpipe->pipe_busy; 479 480 if (pipe_rundown(rpipe) == 0 && rpipe->pipe_buffer.cnt < MINPIPESIZE) { 481 /* Handle write blocking hysteresis. */ 482 if (rpipe->pipe_state & PIPE_WANTW) { 483 rpipe->pipe_state &= ~PIPE_WANTW; 484 wakeup(rpipe); 485 } 486 } 487 488 if (rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt >= PIPE_BUF) 489 pipeselwakeup(rpipe); 490 491 rw_exit_write(rpipe->pipe_lock); 492 return (error); 493 } 494 495 int 496 pipe_write(struct file *fp, struct uio *uio, int fflags) 497 { 498 struct pipe *rpipe = fp->f_data, *wpipe; 499 struct rwlock *lock = rpipe->pipe_lock; 500 size_t orig_resid; 501 int error; 502 503 rw_enter_write(lock); 504 wpipe = pipe_peer(rpipe); 505 506 /* Detect loss of pipe read side, issue SIGPIPE if lost. */ 507 if (wpipe == NULL) { 508 rw_exit_write(lock); 509 return (EPIPE); 510 } 511 512 ++wpipe->pipe_busy; 513 error = pipe_iolock(wpipe); 514 if (error) { 515 --wpipe->pipe_busy; 516 pipe_rundown(wpipe); 517 rw_exit_write(lock); 518 return (error); 519 } 520 521 522 /* If it is advantageous to resize the pipe buffer, do so. */ 523 if (uio->uio_resid > PIPE_SIZE && 524 wpipe->pipe_buffer.size <= PIPE_SIZE && 525 wpipe->pipe_buffer.cnt == 0) { 526 unsigned int npipe; 527 528 npipe = atomic_inc_int_nv(&nbigpipe); 529 if (npipe > LIMITBIGPIPES || 530 pipe_buffer_realloc(wpipe, BIG_PIPE_SIZE) != 0) 531 atomic_dec_int(&nbigpipe); 532 } 533 534 orig_resid = uio->uio_resid; 535 536 while (uio->uio_resid) { 537 size_t space; 538 539 if (wpipe->pipe_state & PIPE_EOF) { 540 error = EPIPE; 541 break; 542 } 543 544 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 545 546 /* Writes of size <= PIPE_BUF must be atomic. */ 547 if (space < uio->uio_resid && orig_resid <= PIPE_BUF) 548 space = 0; 549 550 if (space > 0) { 551 size_t size; /* Transfer size */ 552 size_t segsize; /* first segment to transfer */ 553 554 /* 555 * Transfer size is minimum of uio transfer 556 * and free space in pipe buffer. 557 */ 558 if (space > uio->uio_resid) 559 size = uio->uio_resid; 560 else 561 size = space; 562 /* 563 * First segment to transfer is minimum of 564 * transfer size and contiguous space in 565 * pipe buffer. If first segment to transfer 566 * is less than the transfer size, we've got 567 * a wraparound in the buffer. 568 */ 569 segsize = wpipe->pipe_buffer.size - 570 wpipe->pipe_buffer.in; 571 if (segsize > size) 572 segsize = size; 573 574 /* Transfer first segment */ 575 576 rw_exit_write(lock); 577 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 578 segsize, uio); 579 rw_enter_write(lock); 580 581 if (error == 0 && segsize < size) { 582 /* 583 * Transfer remaining part now, to 584 * support atomic writes. Wraparound 585 * happened. 586 */ 587 #ifdef DIAGNOSTIC 588 if (wpipe->pipe_buffer.in + segsize != 589 wpipe->pipe_buffer.size) 590 panic("Expected pipe buffer wraparound disappeared"); 591 #endif 592 593 rw_exit_write(lock); 594 error = uiomove(&wpipe->pipe_buffer.buffer[0], 595 size - segsize, uio); 596 rw_enter_write(lock); 597 } 598 if (error == 0) { 599 wpipe->pipe_buffer.in += size; 600 if (wpipe->pipe_buffer.in >= 601 wpipe->pipe_buffer.size) { 602 #ifdef DIAGNOSTIC 603 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 604 panic("Expected wraparound bad"); 605 #endif 606 wpipe->pipe_buffer.in = size - segsize; 607 } 608 609 wpipe->pipe_buffer.cnt += size; 610 #ifdef DIAGNOSTIC 611 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 612 panic("Pipe buffer overflow"); 613 #endif 614 } 615 if (error) 616 break; 617 } else { 618 /* If the "read-side" has been blocked, wake it up. */ 619 if (wpipe->pipe_state & PIPE_WANTR) { 620 wpipe->pipe_state &= ~PIPE_WANTR; 621 wakeup(wpipe); 622 } 623 624 /* Don't block on non-blocking I/O. */ 625 if (fp->f_flag & FNONBLOCK) { 626 error = EAGAIN; 627 break; 628 } 629 630 /* 631 * We have no more space and have something to offer, 632 * wake up select/poll. 633 */ 634 pipeselwakeup(wpipe); 635 636 wpipe->pipe_state |= PIPE_WANTW; 637 error = pipe_iosleep(wpipe, "pipewr"); 638 if (error) 639 goto unlocked_error; 640 641 /* 642 * If read side wants to go away, we just issue a 643 * signal to ourselves. 644 */ 645 if (wpipe->pipe_state & PIPE_EOF) { 646 error = EPIPE; 647 break; 648 } 649 } 650 } 651 pipe_iounlock(wpipe); 652 653 unlocked_error: 654 --wpipe->pipe_busy; 655 656 if (pipe_rundown(wpipe) == 0 && wpipe->pipe_buffer.cnt > 0) { 657 /* 658 * If we have put any characters in the buffer, we wake up 659 * the reader. 660 */ 661 if (wpipe->pipe_state & PIPE_WANTR) { 662 wpipe->pipe_state &= ~PIPE_WANTR; 663 wakeup(wpipe); 664 } 665 } 666 667 /* Don't return EPIPE if I/O was successful. */ 668 if (wpipe->pipe_buffer.cnt == 0 && 669 uio->uio_resid == 0 && 670 error == EPIPE) { 671 error = 0; 672 } 673 674 if (error == 0) 675 getnanotime(&wpipe->pipe_mtime); 676 /* We have something to offer, wake up select/poll. */ 677 if (wpipe->pipe_buffer.cnt) 678 pipeselwakeup(wpipe); 679 680 rw_exit_write(lock); 681 return (error); 682 } 683 684 /* 685 * we implement a very minimal set of ioctls for compatibility with sockets. 686 */ 687 int 688 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p) 689 { 690 struct pipe *mpipe = fp->f_data; 691 int error = 0; 692 693 switch (cmd) { 694 695 case FIONBIO: 696 break; 697 698 case FIOASYNC: 699 rw_enter_write(mpipe->pipe_lock); 700 if (*(int *)data) { 701 mpipe->pipe_state |= PIPE_ASYNC; 702 } else { 703 mpipe->pipe_state &= ~PIPE_ASYNC; 704 } 705 rw_exit_write(mpipe->pipe_lock); 706 break; 707 708 case FIONREAD: 709 rw_enter_read(mpipe->pipe_lock); 710 *(int *)data = mpipe->pipe_buffer.cnt; 711 rw_exit_read(mpipe->pipe_lock); 712 break; 713 714 case FIOSETOWN: 715 case SIOCSPGRP: 716 case TIOCSPGRP: 717 error = sigio_setown(&mpipe->pipe_sigio, cmd, data); 718 break; 719 720 case FIOGETOWN: 721 case SIOCGPGRP: 722 case TIOCGPGRP: 723 sigio_getown(&mpipe->pipe_sigio, cmd, data); 724 break; 725 726 default: 727 error = ENOTTY; 728 } 729 730 return (error); 731 } 732 733 int 734 pipe_poll(struct file *fp, int events, struct proc *p) 735 { 736 struct pipe *rpipe = fp->f_data, *wpipe; 737 struct rwlock *lock = rpipe->pipe_lock; 738 int revents = 0; 739 740 rw_enter_write(lock); 741 wpipe = pipe_peer(rpipe); 742 743 if (events & (POLLIN | POLLRDNORM)) { 744 if (rpipe->pipe_buffer.cnt > 0 || 745 (rpipe->pipe_state & PIPE_EOF)) 746 revents |= events & (POLLIN | POLLRDNORM); 747 } 748 749 /* NOTE: POLLHUP and POLLOUT/POLLWRNORM are mutually exclusive */ 750 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) 751 revents |= POLLHUP; 752 else if (events & (POLLOUT | POLLWRNORM)) { 753 if (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt >= PIPE_BUF) 754 revents |= events & (POLLOUT | POLLWRNORM); 755 } 756 757 if (revents == 0) { 758 if (events & (POLLIN | POLLRDNORM)) { 759 selrecord(p, &rpipe->pipe_sel); 760 rpipe->pipe_state |= PIPE_SEL; 761 } 762 if (events & (POLLOUT | POLLWRNORM)) { 763 selrecord(p, &wpipe->pipe_sel); 764 wpipe->pipe_state |= PIPE_SEL; 765 } 766 } 767 768 rw_exit_write(lock); 769 770 return (revents); 771 } 772 773 int 774 pipe_stat(struct file *fp, struct stat *ub, struct proc *p) 775 { 776 struct pipe *pipe = fp->f_data; 777 778 memset(ub, 0, sizeof(*ub)); 779 780 rw_enter_read(pipe->pipe_lock); 781 ub->st_mode = S_IFIFO; 782 ub->st_blksize = pipe->pipe_buffer.size; 783 ub->st_size = pipe->pipe_buffer.cnt; 784 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 785 ub->st_atim.tv_sec = pipe->pipe_atime.tv_sec; 786 ub->st_atim.tv_nsec = pipe->pipe_atime.tv_nsec; 787 ub->st_mtim.tv_sec = pipe->pipe_mtime.tv_sec; 788 ub->st_mtim.tv_nsec = pipe->pipe_mtime.tv_nsec; 789 ub->st_ctim.tv_sec = pipe->pipe_ctime.tv_sec; 790 ub->st_ctim.tv_nsec = pipe->pipe_ctime.tv_nsec; 791 ub->st_uid = fp->f_cred->cr_uid; 792 ub->st_gid = fp->f_cred->cr_gid; 793 rw_exit_read(pipe->pipe_lock); 794 /* 795 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 796 * XXX (st_dev, st_ino) should be unique. 797 */ 798 return (0); 799 } 800 801 int 802 pipe_close(struct file *fp, struct proc *p) 803 { 804 struct pipe *cpipe = fp->f_data; 805 806 fp->f_ops = NULL; 807 fp->f_data = NULL; 808 pipe_destroy(cpipe); 809 return (0); 810 } 811 812 /* 813 * Free kva for pipe circular buffer. 814 * No pipe lock check as only called from pipe_buffer_realloc() and pipeclose() 815 */ 816 void 817 pipe_buffer_free(struct pipe *cpipe) 818 { 819 u_int size; 820 821 if (cpipe->pipe_buffer.buffer == NULL) 822 return; 823 824 size = cpipe->pipe_buffer.size; 825 826 KERNEL_LOCK(); 827 km_free(cpipe->pipe_buffer.buffer, size, &kv_any, &kp_pageable); 828 KERNEL_UNLOCK(); 829 830 cpipe->pipe_buffer.buffer = NULL; 831 832 atomic_sub_int(&amountpipekva, size); 833 if (size > PIPE_SIZE) 834 atomic_dec_int(&nbigpipe); 835 } 836 837 /* 838 * shutdown the pipe, and free resources. 839 */ 840 void 841 pipe_destroy(struct pipe *cpipe) 842 { 843 struct pipe *ppipe; 844 845 if (cpipe == NULL) 846 return; 847 848 rw_enter_write(cpipe->pipe_lock); 849 850 pipeselwakeup(cpipe); 851 sigio_free(&cpipe->pipe_sigio); 852 853 /* 854 * If the other side is blocked, wake it up saying that 855 * we want to close it down. 856 */ 857 cpipe->pipe_state |= PIPE_EOF; 858 while (cpipe->pipe_busy) { 859 wakeup(cpipe); 860 cpipe->pipe_state |= PIPE_WANTD; 861 rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO, "pipecl", INFSLP); 862 } 863 864 /* Disconnect from peer. */ 865 if ((ppipe = cpipe->pipe_peer) != NULL) { 866 pipeselwakeup(ppipe); 867 868 ppipe->pipe_state |= PIPE_EOF; 869 wakeup(ppipe); 870 ppipe->pipe_peer = NULL; 871 } 872 873 pipe_buffer_free(cpipe); 874 875 rw_exit_write(cpipe->pipe_lock); 876 877 if (ppipe == NULL) 878 pipe_pair_destroy(cpipe->pipe_pair); 879 } 880 881 /* 882 * Returns non-zero if a rundown is currently ongoing. 883 */ 884 int 885 pipe_rundown(struct pipe *cpipe) 886 { 887 rw_assert_wrlock(cpipe->pipe_lock); 888 889 if (cpipe->pipe_busy > 0 || (cpipe->pipe_state & PIPE_WANTD) == 0) 890 return (0); 891 892 /* Only wakeup pipe_destroy() once the pipe is no longer busy. */ 893 cpipe->pipe_state &= ~(PIPE_WANTD | PIPE_WANTR | PIPE_WANTW); 894 wakeup(cpipe); 895 return (1); 896 } 897 898 int 899 pipe_kqfilter(struct file *fp, struct knote *kn) 900 { 901 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 902 struct rwlock *lock = rpipe->pipe_lock; 903 int error = 0; 904 905 rw_enter_write(lock); 906 wpipe = pipe_peer(rpipe); 907 908 switch (kn->kn_filter) { 909 case EVFILT_READ: 910 kn->kn_fop = &pipe_rfiltops; 911 kn->kn_hook = rpipe; 912 klist_insert_locked(&rpipe->pipe_sel.si_note, kn); 913 break; 914 case EVFILT_WRITE: 915 if (wpipe == NULL) { 916 /* other end of pipe has been closed */ 917 error = EPIPE; 918 break; 919 } 920 kn->kn_fop = &pipe_wfiltops; 921 kn->kn_hook = wpipe; 922 klist_insert_locked(&wpipe->pipe_sel.si_note, kn); 923 break; 924 case EVFILT_EXCEPT: 925 if (kn->kn_flags & __EV_SELECT) { 926 /* Prevent triggering exceptfds. */ 927 error = EPERM; 928 break; 929 } 930 if ((kn->kn_flags & __EV_POLL) == 0) { 931 /* Disallow usage through kevent(2). */ 932 error = EINVAL; 933 break; 934 } 935 kn->kn_fop = &pipe_efiltops; 936 kn->kn_hook = rpipe; 937 klist_insert_locked(&rpipe->pipe_sel.si_note, kn); 938 break; 939 default: 940 error = EINVAL; 941 } 942 943 rw_exit_write(lock); 944 945 return (error); 946 } 947 948 void 949 filt_pipedetach(struct knote *kn) 950 { 951 struct pipe *cpipe = kn->kn_hook; 952 953 klist_remove(&cpipe->pipe_sel.si_note, kn); 954 } 955 956 int 957 filt_piperead_common(struct knote *kn, struct pipe *rpipe) 958 { 959 struct pipe *wpipe; 960 961 rw_assert_wrlock(rpipe->pipe_lock); 962 963 wpipe = pipe_peer(rpipe); 964 965 kn->kn_data = rpipe->pipe_buffer.cnt; 966 967 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) { 968 kn->kn_flags |= EV_EOF; 969 if (kn->kn_flags & __EV_POLL) 970 kn->kn_flags |= __EV_HUP; 971 return (1); 972 } 973 974 return (kn->kn_data > 0); 975 } 976 977 int 978 filt_piperead(struct knote *kn, long hint) 979 { 980 struct pipe *rpipe = kn->kn_fp->f_data; 981 982 return (filt_piperead_common(kn, rpipe)); 983 } 984 985 int 986 filt_pipereadmodify(struct kevent *kev, struct knote *kn) 987 { 988 struct pipe *rpipe = kn->kn_fp->f_data; 989 int active; 990 991 rw_enter_write(rpipe->pipe_lock); 992 knote_modify(kev, kn); 993 active = filt_piperead_common(kn, rpipe); 994 rw_exit_write(rpipe->pipe_lock); 995 996 return (active); 997 } 998 999 int 1000 filt_pipereadprocess(struct knote *kn, struct kevent *kev) 1001 { 1002 struct pipe *rpipe = kn->kn_fp->f_data; 1003 int active; 1004 1005 rw_enter_write(rpipe->pipe_lock); 1006 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 1007 active = 1; 1008 else 1009 active = filt_piperead_common(kn, rpipe); 1010 if (active) 1011 knote_submit(kn, kev); 1012 rw_exit_write(rpipe->pipe_lock); 1013 1014 return (active); 1015 } 1016 1017 int 1018 filt_pipewrite_common(struct knote *kn, struct pipe *rpipe) 1019 { 1020 struct pipe *wpipe; 1021 1022 rw_assert_wrlock(rpipe->pipe_lock); 1023 1024 wpipe = pipe_peer(rpipe); 1025 1026 if (wpipe == NULL) { 1027 kn->kn_data = 0; 1028 kn->kn_flags |= EV_EOF; 1029 if (kn->kn_flags & __EV_POLL) 1030 kn->kn_flags |= __EV_HUP; 1031 return (1); 1032 } 1033 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1034 1035 return (kn->kn_data >= PIPE_BUF); 1036 } 1037 1038 int 1039 filt_pipewrite(struct knote *kn, long hint) 1040 { 1041 struct pipe *rpipe = kn->kn_fp->f_data; 1042 1043 return (filt_pipewrite_common(kn, rpipe)); 1044 } 1045 1046 int 1047 filt_pipewritemodify(struct kevent *kev, struct knote *kn) 1048 { 1049 struct pipe *rpipe = kn->kn_fp->f_data; 1050 int active; 1051 1052 rw_enter_write(rpipe->pipe_lock); 1053 knote_modify(kev, kn); 1054 active = filt_pipewrite_common(kn, rpipe); 1055 rw_exit_write(rpipe->pipe_lock); 1056 1057 return (active); 1058 } 1059 1060 int 1061 filt_pipewriteprocess(struct knote *kn, struct kevent *kev) 1062 { 1063 struct pipe *rpipe = kn->kn_fp->f_data; 1064 int active; 1065 1066 rw_enter_write(rpipe->pipe_lock); 1067 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 1068 active = 1; 1069 else 1070 active = filt_pipewrite_common(kn, rpipe); 1071 if (active) 1072 knote_submit(kn, kev); 1073 rw_exit_write(rpipe->pipe_lock); 1074 1075 return (active); 1076 } 1077 1078 int 1079 filt_pipeexcept_common(struct knote *kn, struct pipe *rpipe) 1080 { 1081 struct pipe *wpipe; 1082 int active = 0; 1083 1084 rw_assert_wrlock(rpipe->pipe_lock); 1085 1086 wpipe = pipe_peer(rpipe); 1087 1088 if (kn->kn_flags & __EV_POLL) { 1089 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) { 1090 kn->kn_flags |= __EV_HUP; 1091 active = 1; 1092 } 1093 } 1094 1095 return (active); 1096 } 1097 1098 int 1099 filt_pipeexcept(struct knote *kn, long hint) 1100 { 1101 struct pipe *rpipe = kn->kn_fp->f_data; 1102 1103 return (filt_pipeexcept_common(kn, rpipe)); 1104 } 1105 1106 int 1107 filt_pipeexceptmodify(struct kevent *kev, struct knote *kn) 1108 { 1109 struct pipe *rpipe = kn->kn_fp->f_data; 1110 int active; 1111 1112 rw_enter_write(rpipe->pipe_lock); 1113 knote_modify(kev, kn); 1114 active = filt_pipeexcept_common(kn, rpipe); 1115 rw_exit_write(rpipe->pipe_lock); 1116 1117 return (active); 1118 } 1119 1120 int 1121 filt_pipeexceptprocess(struct knote *kn, struct kevent *kev) 1122 { 1123 struct pipe *rpipe = kn->kn_fp->f_data; 1124 int active; 1125 1126 rw_enter_write(rpipe->pipe_lock); 1127 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 1128 active = 1; 1129 else 1130 active = filt_pipeexcept_common(kn, rpipe); 1131 if (active) 1132 knote_submit(kn, kev); 1133 rw_exit_write(rpipe->pipe_lock); 1134 1135 return (active); 1136 } 1137 1138 void 1139 pipe_init(void) 1140 { 1141 pool_init(&pipe_pair_pool, sizeof(struct pipe_pair), 0, IPL_MPFLOOR, 1142 PR_WAITOK, "pipepl", NULL); 1143 } 1144 1145 struct pipe_pair * 1146 pipe_pair_create(void) 1147 { 1148 struct pipe_pair *pp; 1149 1150 pp = pool_get(&pipe_pair_pool, PR_WAITOK | PR_ZERO); 1151 pp->pp_wpipe.pipe_pair = pp; 1152 pp->pp_rpipe.pipe_pair = pp; 1153 pp->pp_wpipe.pipe_peer = &pp->pp_rpipe; 1154 pp->pp_rpipe.pipe_peer = &pp->pp_wpipe; 1155 /* 1156 * One lock is used per pipe pair in order to obtain exclusive access to 1157 * the pipe pair. 1158 */ 1159 rw_init(&pp->pp_lock, "pipelk"); 1160 pp->pp_wpipe.pipe_lock = &pp->pp_lock; 1161 pp->pp_rpipe.pipe_lock = &pp->pp_lock; 1162 1163 klist_init_rwlock(&pp->pp_wpipe.pipe_sel.si_note, &pp->pp_lock); 1164 klist_init_rwlock(&pp->pp_rpipe.pipe_sel.si_note, &pp->pp_lock); 1165 1166 if (pipe_create(&pp->pp_wpipe) || pipe_create(&pp->pp_rpipe)) 1167 goto err; 1168 return (pp); 1169 err: 1170 pipe_destroy(&pp->pp_wpipe); 1171 pipe_destroy(&pp->pp_rpipe); 1172 return (NULL); 1173 } 1174 1175 void 1176 pipe_pair_destroy(struct pipe_pair *pp) 1177 { 1178 klist_free(&pp->pp_wpipe.pipe_sel.si_note); 1179 klist_free(&pp->pp_rpipe.pipe_sel.si_note); 1180 pool_put(&pipe_pair_pool, pp); 1181 } 1182