1 /* $OpenBSD: sys_pipe.c,v 1.118 2020/02/20 16:56:52 visa Exp $ */ 2 3 /* 4 * Copyright (c) 1996 John S. Dyson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Absolutely no warranty of function or purpose is made by the author 17 * John S. Dyson. 18 * 4. Modifications may be freely made to this file if the above conditions 19 * are met. 20 */ 21 22 /* 23 * This file contains a high-performance replacement for the socket-based 24 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 25 * all features of sockets, but does do everything that pipes normally 26 * do. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/proc.h> 32 #include <sys/fcntl.h> 33 #include <sys/file.h> 34 #include <sys/filedesc.h> 35 #include <sys/pool.h> 36 #include <sys/ioctl.h> 37 #include <sys/stat.h> 38 #include <sys/signalvar.h> 39 #include <sys/mount.h> 40 #include <sys/syscallargs.h> 41 #include <sys/event.h> 42 #include <sys/lock.h> 43 #include <sys/poll.h> 44 #ifdef KTRACE 45 #include <sys/ktrace.h> 46 #endif 47 48 #include <uvm/uvm_extern.h> 49 50 #include <sys/pipe.h> 51 52 /* 53 * interfaces to the outside world 54 */ 55 int pipe_read(struct file *, struct uio *, int); 56 int pipe_write(struct file *, struct uio *, int); 57 int pipe_close(struct file *, struct proc *); 58 int pipe_poll(struct file *, int events, struct proc *); 59 int pipe_kqfilter(struct file *fp, struct knote *kn); 60 int pipe_ioctl(struct file *, u_long, caddr_t, struct proc *); 61 int pipe_stat(struct file *fp, struct stat *ub, struct proc *p); 62 63 static const struct fileops pipeops = { 64 .fo_read = pipe_read, 65 .fo_write = pipe_write, 66 .fo_ioctl = pipe_ioctl, 67 .fo_poll = pipe_poll, 68 .fo_kqfilter = pipe_kqfilter, 69 .fo_stat = pipe_stat, 70 .fo_close = pipe_close 71 }; 72 73 void filt_pipedetach(struct knote *kn); 74 int filt_piperead(struct knote *kn, long hint); 75 int filt_pipewrite(struct knote *kn, long hint); 76 77 const struct filterops pipe_rfiltops = { 78 .f_flags = FILTEROP_ISFD, 79 .f_attach = NULL, 80 .f_detach = filt_pipedetach, 81 .f_event = filt_piperead, 82 }; 83 84 const struct filterops pipe_wfiltops = { 85 .f_flags = FILTEROP_ISFD, 86 .f_attach = NULL, 87 .f_detach = filt_pipedetach, 88 .f_event = filt_pipewrite, 89 }; 90 91 /* 92 * Default pipe buffer size(s), this can be kind-of large now because pipe 93 * space is pageable. The pipe code will try to maintain locality of 94 * reference for performance reasons, so small amounts of outstanding I/O 95 * will not wipe the cache. 96 */ 97 #define MINPIPESIZE (PIPE_SIZE/3) 98 99 /* 100 * Limit the number of "big" pipes 101 */ 102 #define LIMITBIGPIPES 32 103 unsigned int nbigpipe; 104 static unsigned int amountpipekva; 105 106 struct pool pipe_pool; 107 struct pool pipe_lock_pool; 108 109 int dopipe(struct proc *, int *, int); 110 void pipeselwakeup(struct pipe *); 111 112 struct pipe *pipe_create(void); 113 void pipe_destroy(struct pipe *); 114 int pipe_rundown(struct pipe *); 115 struct pipe *pipe_peer(struct pipe *); 116 int pipe_buffer_realloc(struct pipe *, u_int); 117 void pipe_buffer_free(struct pipe *); 118 119 int pipe_iolock(struct pipe *); 120 void pipe_iounlock(struct pipe *); 121 int pipe_iosleep(struct pipe *, const char *); 122 123 /* 124 * The pipe system call for the DTYPE_PIPE type of pipes 125 */ 126 127 int 128 sys_pipe(struct proc *p, void *v, register_t *retval) 129 { 130 struct sys_pipe_args /* { 131 syscallarg(int *) fdp; 132 } */ *uap = v; 133 134 return (dopipe(p, SCARG(uap, fdp), 0)); 135 } 136 137 int 138 sys_pipe2(struct proc *p, void *v, register_t *retval) 139 { 140 struct sys_pipe2_args /* { 141 syscallarg(int *) fdp; 142 syscallarg(int) flags; 143 } */ *uap = v; 144 145 if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK)) 146 return (EINVAL); 147 148 return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags))); 149 } 150 151 int 152 dopipe(struct proc *p, int *ufds, int flags) 153 { 154 struct filedesc *fdp = p->p_fd; 155 struct file *rf, *wf; 156 struct pipe *rpipe, *wpipe = NULL; 157 struct rwlock *lock; 158 int fds[2], cloexec, error; 159 160 cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; 161 162 if ((rpipe = pipe_create()) == NULL) { 163 error = ENOMEM; 164 goto free1; 165 } 166 167 /* 168 * One lock is used per pipe pair in order to obtain exclusive access to 169 * the pipe pair. 170 */ 171 lock = pool_get(&pipe_lock_pool, PR_WAITOK); 172 rw_init(lock, "pipelk"); 173 rpipe->pipe_lock = lock; 174 175 if ((wpipe = pipe_create()) == NULL) { 176 error = ENOMEM; 177 goto free1; 178 } 179 wpipe->pipe_lock = lock; 180 181 rpipe->pipe_peer = wpipe; 182 wpipe->pipe_peer = rpipe; 183 184 fdplock(fdp); 185 186 error = falloc(p, &rf, &fds[0]); 187 if (error != 0) 188 goto free2; 189 rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 190 rf->f_type = DTYPE_PIPE; 191 rf->f_data = rpipe; 192 rf->f_ops = &pipeops; 193 194 error = falloc(p, &wf, &fds[1]); 195 if (error != 0) 196 goto free3; 197 wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 198 wf->f_type = DTYPE_PIPE; 199 wf->f_data = wpipe; 200 wf->f_ops = &pipeops; 201 202 fdinsert(fdp, fds[0], cloexec, rf); 203 fdinsert(fdp, fds[1], cloexec, wf); 204 205 error = copyout(fds, ufds, sizeof(fds)); 206 if (error == 0) { 207 fdpunlock(fdp); 208 #ifdef KTRACE 209 if (KTRPOINT(p, KTR_STRUCT)) 210 ktrfds(p, fds, 2); 211 #endif 212 } else { 213 /* fdrelease() unlocks fdp. */ 214 fdrelease(p, fds[0]); 215 fdplock(fdp); 216 fdrelease(p, fds[1]); 217 } 218 219 FRELE(rf, p); 220 FRELE(wf, p); 221 return (error); 222 223 free3: 224 fdremove(fdp, fds[0]); 225 closef(rf, p); 226 rpipe = NULL; 227 free2: 228 fdpunlock(fdp); 229 free1: 230 pipe_destroy(wpipe); 231 pipe_destroy(rpipe); 232 return (error); 233 } 234 235 /* 236 * Allocate kva for pipe circular buffer, the space is pageable. 237 * This routine will 'realloc' the size of a pipe safely, if it fails 238 * it will retain the old buffer. 239 * If it fails it will return ENOMEM. 240 */ 241 int 242 pipe_buffer_realloc(struct pipe *cpipe, u_int size) 243 { 244 caddr_t buffer; 245 246 /* buffer uninitialized or pipe locked */ 247 KASSERT((cpipe->pipe_buffer.buffer == NULL) || 248 (cpipe->pipe_state & PIPE_LOCK)); 249 250 /* buffer should be empty */ 251 KASSERT(cpipe->pipe_buffer.cnt == 0); 252 253 KERNEL_LOCK(); 254 buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok); 255 KERNEL_UNLOCK(); 256 if (buffer == NULL) 257 return (ENOMEM); 258 259 /* free old resources if we are resizing */ 260 pipe_buffer_free(cpipe); 261 262 cpipe->pipe_buffer.buffer = buffer; 263 cpipe->pipe_buffer.size = size; 264 cpipe->pipe_buffer.in = 0; 265 cpipe->pipe_buffer.out = 0; 266 267 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 268 269 return (0); 270 } 271 272 /* 273 * initialize and allocate VM and memory for pipe 274 */ 275 struct pipe * 276 pipe_create(void) 277 { 278 struct pipe *cpipe; 279 int error; 280 281 cpipe = pool_get(&pipe_pool, PR_WAITOK | PR_ZERO); 282 283 error = pipe_buffer_realloc(cpipe, PIPE_SIZE); 284 if (error != 0) { 285 pool_put(&pipe_pool, cpipe); 286 return (NULL); 287 } 288 289 sigio_init(&cpipe->pipe_sigio); 290 291 getnanotime(&cpipe->pipe_ctime); 292 cpipe->pipe_atime = cpipe->pipe_ctime; 293 cpipe->pipe_mtime = cpipe->pipe_ctime; 294 295 return (cpipe); 296 } 297 298 struct pipe * 299 pipe_peer(struct pipe *cpipe) 300 { 301 struct pipe *peer; 302 303 rw_assert_anylock(cpipe->pipe_lock); 304 305 peer = cpipe->pipe_peer; 306 if (peer == NULL || (peer->pipe_state & PIPE_EOF)) 307 return (NULL); 308 return (peer); 309 } 310 311 /* 312 * Lock a pipe for exclusive I/O access. 313 */ 314 int 315 pipe_iolock(struct pipe *cpipe) 316 { 317 int error; 318 319 rw_assert_wrlock(cpipe->pipe_lock); 320 321 while (cpipe->pipe_state & PIPE_LOCK) { 322 cpipe->pipe_state |= PIPE_LWANT; 323 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, 324 "pipeiolk", INFSLP); 325 if (error) 326 return (error); 327 } 328 cpipe->pipe_state |= PIPE_LOCK; 329 return (0); 330 } 331 332 /* 333 * Unlock a pipe I/O lock. 334 */ 335 void 336 pipe_iounlock(struct pipe *cpipe) 337 { 338 rw_assert_wrlock(cpipe->pipe_lock); 339 KASSERT(cpipe->pipe_state & PIPE_LOCK); 340 341 cpipe->pipe_state &= ~PIPE_LOCK; 342 if (cpipe->pipe_state & PIPE_LWANT) { 343 cpipe->pipe_state &= ~PIPE_LWANT; 344 wakeup(cpipe); 345 } 346 } 347 348 /* 349 * Unlock the pipe I/O lock and go to sleep. Returns 0 on success and the I/O 350 * lock is relocked. Otherwise if a signal was caught, non-zero is returned and 351 * the I/O lock is not locked. 352 * 353 * Any caller must obtain a reference to the pipe by incrementing `pipe_busy' 354 * before calling this function in order ensure that the same pipe is not 355 * destroyed while sleeping. 356 */ 357 int 358 pipe_iosleep(struct pipe *cpipe, const char *wmesg) 359 { 360 int error; 361 362 pipe_iounlock(cpipe); 363 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, wmesg, 364 INFSLP); 365 if (error) 366 return (error); 367 return (pipe_iolock(cpipe)); 368 } 369 370 void 371 pipeselwakeup(struct pipe *cpipe) 372 { 373 rw_assert_wrlock(cpipe->pipe_lock); 374 375 if (cpipe->pipe_state & PIPE_SEL) { 376 cpipe->pipe_state &= ~PIPE_SEL; 377 selwakeup(&cpipe->pipe_sel); 378 } else { 379 KERNEL_LOCK(); 380 KNOTE(&cpipe->pipe_sel.si_note, NOTE_SUBMIT); 381 KERNEL_UNLOCK(); 382 } 383 384 if (cpipe->pipe_state & PIPE_ASYNC) 385 pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 386 } 387 388 int 389 pipe_read(struct file *fp, struct uio *uio, int fflags) 390 { 391 struct pipe *rpipe = fp->f_data; 392 size_t nread = 0, size; 393 int error; 394 395 rw_enter_write(rpipe->pipe_lock); 396 ++rpipe->pipe_busy; 397 error = pipe_iolock(rpipe); 398 if (error) { 399 --rpipe->pipe_busy; 400 pipe_rundown(rpipe); 401 rw_exit_write(rpipe->pipe_lock); 402 return (error); 403 } 404 405 while (uio->uio_resid) { 406 /* Normal pipe buffer receive. */ 407 if (rpipe->pipe_buffer.cnt > 0) { 408 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 409 if (size > rpipe->pipe_buffer.cnt) 410 size = rpipe->pipe_buffer.cnt; 411 if (size > uio->uio_resid) 412 size = uio->uio_resid; 413 rw_exit_write(rpipe->pipe_lock); 414 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 415 size, uio); 416 rw_enter_write(rpipe->pipe_lock); 417 if (error) { 418 break; 419 } 420 rpipe->pipe_buffer.out += size; 421 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 422 rpipe->pipe_buffer.out = 0; 423 424 rpipe->pipe_buffer.cnt -= size; 425 /* 426 * If there is no more to read in the pipe, reset 427 * its pointers to the beginning. This improves 428 * cache hit stats. 429 */ 430 if (rpipe->pipe_buffer.cnt == 0) { 431 rpipe->pipe_buffer.in = 0; 432 rpipe->pipe_buffer.out = 0; 433 } 434 nread += size; 435 } else { 436 /* 437 * detect EOF condition 438 * read returns 0 on EOF, no need to set error 439 */ 440 if (rpipe->pipe_state & PIPE_EOF) 441 break; 442 443 /* If the "write-side" has been blocked, wake it up. */ 444 if (rpipe->pipe_state & PIPE_WANTW) { 445 rpipe->pipe_state &= ~PIPE_WANTW; 446 wakeup(rpipe); 447 } 448 449 /* Break if some data was read. */ 450 if (nread > 0) 451 break; 452 453 /* Handle non-blocking mode operation. */ 454 if (fp->f_flag & FNONBLOCK) { 455 error = EAGAIN; 456 break; 457 } 458 459 /* Wait for more data. */ 460 rpipe->pipe_state |= PIPE_WANTR; 461 error = pipe_iosleep(rpipe, "piperd"); 462 if (error) 463 goto unlocked_error; 464 } 465 } 466 pipe_iounlock(rpipe); 467 468 if (error == 0) 469 getnanotime(&rpipe->pipe_atime); 470 unlocked_error: 471 --rpipe->pipe_busy; 472 473 if (pipe_rundown(rpipe) == 0 && rpipe->pipe_buffer.cnt < MINPIPESIZE) { 474 /* Handle write blocking hysteresis. */ 475 if (rpipe->pipe_state & PIPE_WANTW) { 476 rpipe->pipe_state &= ~PIPE_WANTW; 477 wakeup(rpipe); 478 } 479 } 480 481 if (rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt >= PIPE_BUF) 482 pipeselwakeup(rpipe); 483 484 rw_exit_write(rpipe->pipe_lock); 485 return (error); 486 } 487 488 int 489 pipe_write(struct file *fp, struct uio *uio, int fflags) 490 { 491 struct pipe *rpipe = fp->f_data, *wpipe; 492 struct rwlock *lock = rpipe->pipe_lock; 493 size_t orig_resid; 494 int error; 495 496 rw_enter_write(lock); 497 wpipe = pipe_peer(rpipe); 498 499 /* Detect loss of pipe read side, issue SIGPIPE if lost. */ 500 if (wpipe == NULL) { 501 rw_exit_write(lock); 502 return (EPIPE); 503 } 504 505 ++wpipe->pipe_busy; 506 error = pipe_iolock(wpipe); 507 if (error) { 508 --wpipe->pipe_busy; 509 pipe_rundown(wpipe); 510 rw_exit_write(lock); 511 return (error); 512 } 513 514 515 /* If it is advantageous to resize the pipe buffer, do so. */ 516 if (uio->uio_resid > PIPE_SIZE && 517 wpipe->pipe_buffer.size <= PIPE_SIZE && 518 wpipe->pipe_buffer.cnt == 0) { 519 unsigned int npipe; 520 521 npipe = atomic_inc_int_nv(&nbigpipe); 522 if (npipe > LIMITBIGPIPES || 523 pipe_buffer_realloc(wpipe, BIG_PIPE_SIZE) != 0) 524 atomic_dec_int(&nbigpipe); 525 } 526 527 orig_resid = uio->uio_resid; 528 529 while (uio->uio_resid) { 530 size_t space; 531 532 if (wpipe->pipe_state & PIPE_EOF) { 533 error = EPIPE; 534 break; 535 } 536 537 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 538 539 /* Writes of size <= PIPE_BUF must be atomic. */ 540 if (space < uio->uio_resid && orig_resid <= PIPE_BUF) 541 space = 0; 542 543 if (space > 0) { 544 size_t size; /* Transfer size */ 545 size_t segsize; /* first segment to transfer */ 546 547 /* 548 * Transfer size is minimum of uio transfer 549 * and free space in pipe buffer. 550 */ 551 if (space > uio->uio_resid) 552 size = uio->uio_resid; 553 else 554 size = space; 555 /* 556 * First segment to transfer is minimum of 557 * transfer size and contiguous space in 558 * pipe buffer. If first segment to transfer 559 * is less than the transfer size, we've got 560 * a wraparound in the buffer. 561 */ 562 segsize = wpipe->pipe_buffer.size - 563 wpipe->pipe_buffer.in; 564 if (segsize > size) 565 segsize = size; 566 567 /* Transfer first segment */ 568 569 rw_exit_write(lock); 570 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 571 segsize, uio); 572 rw_enter_write(lock); 573 574 if (error == 0 && segsize < size) { 575 /* 576 * Transfer remaining part now, to 577 * support atomic writes. Wraparound 578 * happened. 579 */ 580 #ifdef DIAGNOSTIC 581 if (wpipe->pipe_buffer.in + segsize != 582 wpipe->pipe_buffer.size) 583 panic("Expected pipe buffer wraparound disappeared"); 584 #endif 585 586 rw_exit_write(lock); 587 error = uiomove(&wpipe->pipe_buffer.buffer[0], 588 size - segsize, uio); 589 rw_enter_write(lock); 590 } 591 if (error == 0) { 592 wpipe->pipe_buffer.in += size; 593 if (wpipe->pipe_buffer.in >= 594 wpipe->pipe_buffer.size) { 595 #ifdef DIAGNOSTIC 596 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 597 panic("Expected wraparound bad"); 598 #endif 599 wpipe->pipe_buffer.in = size - segsize; 600 } 601 602 wpipe->pipe_buffer.cnt += size; 603 #ifdef DIAGNOSTIC 604 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 605 panic("Pipe buffer overflow"); 606 #endif 607 } 608 if (error) 609 break; 610 } else { 611 /* If the "read-side" has been blocked, wake it up. */ 612 if (wpipe->pipe_state & PIPE_WANTR) { 613 wpipe->pipe_state &= ~PIPE_WANTR; 614 wakeup(wpipe); 615 } 616 617 /* Don't block on non-blocking I/O. */ 618 if (fp->f_flag & FNONBLOCK) { 619 error = EAGAIN; 620 break; 621 } 622 623 /* 624 * We have no more space and have something to offer, 625 * wake up select/poll. 626 */ 627 pipeselwakeup(wpipe); 628 629 wpipe->pipe_state |= PIPE_WANTW; 630 error = pipe_iosleep(wpipe, "pipewr"); 631 if (error) 632 goto unlocked_error; 633 634 /* 635 * If read side wants to go away, we just issue a 636 * signal to ourselves. 637 */ 638 if (wpipe->pipe_state & PIPE_EOF) { 639 error = EPIPE; 640 break; 641 } 642 } 643 } 644 pipe_iounlock(wpipe); 645 646 unlocked_error: 647 --wpipe->pipe_busy; 648 649 if (pipe_rundown(wpipe) == 0 && wpipe->pipe_buffer.cnt > 0) { 650 /* 651 * If we have put any characters in the buffer, we wake up 652 * the reader. 653 */ 654 if (wpipe->pipe_state & PIPE_WANTR) { 655 wpipe->pipe_state &= ~PIPE_WANTR; 656 wakeup(wpipe); 657 } 658 } 659 660 /* Don't return EPIPE if I/O was successful. */ 661 if (wpipe->pipe_buffer.cnt == 0 && 662 uio->uio_resid == 0 && 663 error == EPIPE) { 664 error = 0; 665 } 666 667 if (error == 0) 668 getnanotime(&wpipe->pipe_mtime); 669 /* We have something to offer, wake up select/poll. */ 670 if (wpipe->pipe_buffer.cnt) 671 pipeselwakeup(wpipe); 672 673 rw_exit_write(lock); 674 return (error); 675 } 676 677 /* 678 * we implement a very minimal set of ioctls for compatibility with sockets. 679 */ 680 int 681 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p) 682 { 683 struct pipe *mpipe = fp->f_data; 684 int error = 0; 685 686 switch (cmd) { 687 688 case FIONBIO: 689 break; 690 691 case FIOASYNC: 692 rw_enter_write(mpipe->pipe_lock); 693 if (*(int *)data) { 694 mpipe->pipe_state |= PIPE_ASYNC; 695 } else { 696 mpipe->pipe_state &= ~PIPE_ASYNC; 697 } 698 rw_exit_write(mpipe->pipe_lock); 699 break; 700 701 case FIONREAD: 702 rw_enter_read(mpipe->pipe_lock); 703 *(int *)data = mpipe->pipe_buffer.cnt; 704 rw_exit_read(mpipe->pipe_lock); 705 break; 706 707 case FIOSETOWN: 708 case SIOCSPGRP: 709 case TIOCSPGRP: 710 error = sigio_setown(&mpipe->pipe_sigio, cmd, data); 711 break; 712 713 case FIOGETOWN: 714 case SIOCGPGRP: 715 case TIOCGPGRP: 716 sigio_getown(&mpipe->pipe_sigio, cmd, data); 717 break; 718 719 default: 720 error = ENOTTY; 721 } 722 723 return (error); 724 } 725 726 int 727 pipe_poll(struct file *fp, int events, struct proc *p) 728 { 729 struct pipe *rpipe = fp->f_data, *wpipe; 730 struct rwlock *lock = rpipe->pipe_lock; 731 int revents = 0; 732 733 rw_enter_write(lock); 734 wpipe = pipe_peer(rpipe); 735 736 if (events & (POLLIN | POLLRDNORM)) { 737 if (rpipe->pipe_buffer.cnt > 0 || 738 (rpipe->pipe_state & PIPE_EOF)) 739 revents |= events & (POLLIN | POLLRDNORM); 740 } 741 742 /* NOTE: POLLHUP and POLLOUT/POLLWRNORM are mutually exclusive */ 743 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) 744 revents |= POLLHUP; 745 else if (events & (POLLOUT | POLLWRNORM)) { 746 if (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt >= PIPE_BUF) 747 revents |= events & (POLLOUT | POLLWRNORM); 748 } 749 750 if (revents == 0) { 751 if (events & (POLLIN | POLLRDNORM)) { 752 selrecord(p, &rpipe->pipe_sel); 753 rpipe->pipe_state |= PIPE_SEL; 754 } 755 if (events & (POLLOUT | POLLWRNORM)) { 756 selrecord(p, &wpipe->pipe_sel); 757 wpipe->pipe_state |= PIPE_SEL; 758 } 759 } 760 761 rw_exit_write(lock); 762 763 return (revents); 764 } 765 766 int 767 pipe_stat(struct file *fp, struct stat *ub, struct proc *p) 768 { 769 struct pipe *pipe = fp->f_data; 770 771 memset(ub, 0, sizeof(*ub)); 772 773 rw_enter_read(pipe->pipe_lock); 774 ub->st_mode = S_IFIFO; 775 ub->st_blksize = pipe->pipe_buffer.size; 776 ub->st_size = pipe->pipe_buffer.cnt; 777 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 778 ub->st_atim.tv_sec = pipe->pipe_atime.tv_sec; 779 ub->st_atim.tv_nsec = pipe->pipe_atime.tv_nsec; 780 ub->st_mtim.tv_sec = pipe->pipe_mtime.tv_sec; 781 ub->st_mtim.tv_nsec = pipe->pipe_mtime.tv_nsec; 782 ub->st_ctim.tv_sec = pipe->pipe_ctime.tv_sec; 783 ub->st_ctim.tv_nsec = pipe->pipe_ctime.tv_nsec; 784 ub->st_uid = fp->f_cred->cr_uid; 785 ub->st_gid = fp->f_cred->cr_gid; 786 rw_exit_read(pipe->pipe_lock); 787 /* 788 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 789 * XXX (st_dev, st_ino) should be unique. 790 */ 791 return (0); 792 } 793 794 int 795 pipe_close(struct file *fp, struct proc *p) 796 { 797 struct pipe *cpipe = fp->f_data; 798 799 fp->f_ops = NULL; 800 fp->f_data = NULL; 801 pipe_destroy(cpipe); 802 return (0); 803 } 804 805 /* 806 * Free kva for pipe circular buffer. 807 * No pipe lock check as only called from pipe_buffer_realloc() and pipeclose() 808 */ 809 void 810 pipe_buffer_free(struct pipe *cpipe) 811 { 812 u_int size; 813 814 if (cpipe->pipe_buffer.buffer == NULL) 815 return; 816 817 size = cpipe->pipe_buffer.size; 818 819 KERNEL_LOCK(); 820 km_free(cpipe->pipe_buffer.buffer, size, &kv_any, &kp_pageable); 821 KERNEL_UNLOCK(); 822 823 cpipe->pipe_buffer.buffer = NULL; 824 825 atomic_sub_int(&amountpipekva, size); 826 if (size > PIPE_SIZE) 827 atomic_dec_int(&nbigpipe); 828 } 829 830 /* 831 * shutdown the pipe, and free resources. 832 */ 833 void 834 pipe_destroy(struct pipe *cpipe) 835 { 836 struct pipe *ppipe; 837 struct rwlock *lock = NULL; 838 839 if (cpipe == NULL) 840 return; 841 842 rw_enter_write(cpipe->pipe_lock); 843 844 pipeselwakeup(cpipe); 845 sigio_free(&cpipe->pipe_sigio); 846 847 /* 848 * If the other side is blocked, wake it up saying that 849 * we want to close it down. 850 */ 851 cpipe->pipe_state |= PIPE_EOF; 852 while (cpipe->pipe_busy) { 853 wakeup(cpipe); 854 cpipe->pipe_state |= PIPE_WANTD; 855 rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO, "pipecl", INFSLP); 856 } 857 858 /* Disconnect from peer. */ 859 if ((ppipe = cpipe->pipe_peer) != NULL) { 860 pipeselwakeup(ppipe); 861 862 ppipe->pipe_state |= PIPE_EOF; 863 wakeup(ppipe); 864 ppipe->pipe_peer = NULL; 865 } else { 866 /* 867 * Peer already gone. This is last reference to the pipe lock 868 * and it must therefore be freed below. 869 */ 870 lock = cpipe->pipe_lock; 871 } 872 873 rw_exit_write(cpipe->pipe_lock); 874 875 pipe_buffer_free(cpipe); 876 if (lock != NULL) 877 pool_put(&pipe_lock_pool, lock); 878 pool_put(&pipe_pool, cpipe); 879 } 880 881 /* 882 * Returns non-zero if a rundown is currently ongoing. 883 */ 884 int 885 pipe_rundown(struct pipe *cpipe) 886 { 887 rw_assert_wrlock(cpipe->pipe_lock); 888 889 if (cpipe->pipe_busy > 0 || (cpipe->pipe_state & PIPE_WANTD) == 0) 890 return (0); 891 892 /* Only wakeup pipe_destroy() once the pipe is no longer busy. */ 893 cpipe->pipe_state &= ~(PIPE_WANTD | PIPE_WANTR | PIPE_WANTW); 894 wakeup(cpipe); 895 return (1); 896 } 897 898 int 899 pipe_kqfilter(struct file *fp, struct knote *kn) 900 { 901 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 902 struct rwlock *lock = rpipe->pipe_lock; 903 int error = 0; 904 905 rw_enter_write(lock); 906 wpipe = pipe_peer(rpipe); 907 908 switch (kn->kn_filter) { 909 case EVFILT_READ: 910 kn->kn_fop = &pipe_rfiltops; 911 SLIST_INSERT_HEAD(&rpipe->pipe_sel.si_note, kn, kn_selnext); 912 break; 913 case EVFILT_WRITE: 914 if (wpipe == NULL) { 915 /* other end of pipe has been closed */ 916 error = EPIPE; 917 break; 918 } 919 kn->kn_fop = &pipe_wfiltops; 920 SLIST_INSERT_HEAD(&wpipe->pipe_sel.si_note, kn, kn_selnext); 921 break; 922 default: 923 error = EINVAL; 924 } 925 926 rw_exit_write(lock); 927 928 return (error); 929 } 930 931 void 932 filt_pipedetach(struct knote *kn) 933 { 934 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 935 struct rwlock *lock = rpipe->pipe_lock; 936 937 rw_enter_write(lock); 938 wpipe = pipe_peer(rpipe); 939 940 switch (kn->kn_filter) { 941 case EVFILT_READ: 942 SLIST_REMOVE(&rpipe->pipe_sel.si_note, kn, knote, kn_selnext); 943 break; 944 case EVFILT_WRITE: 945 if (wpipe == NULL) 946 break; 947 SLIST_REMOVE(&wpipe->pipe_sel.si_note, kn, knote, kn_selnext); 948 break; 949 } 950 951 rw_exit_write(lock); 952 } 953 954 int 955 filt_piperead(struct knote *kn, long hint) 956 { 957 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 958 struct rwlock *lock = rpipe->pipe_lock; 959 960 if ((hint & NOTE_SUBMIT) == 0) 961 rw_enter_read(lock); 962 wpipe = pipe_peer(rpipe); 963 964 kn->kn_data = rpipe->pipe_buffer.cnt; 965 966 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) { 967 if ((hint & NOTE_SUBMIT) == 0) 968 rw_exit_read(lock); 969 kn->kn_flags |= EV_EOF; 970 return (1); 971 } 972 973 if ((hint & NOTE_SUBMIT) == 0) 974 rw_exit_read(lock); 975 976 return (kn->kn_data > 0); 977 } 978 979 int 980 filt_pipewrite(struct knote *kn, long hint) 981 { 982 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 983 struct rwlock *lock = rpipe->pipe_lock; 984 985 if ((hint & NOTE_SUBMIT) == 0) 986 rw_enter_read(lock); 987 wpipe = pipe_peer(rpipe); 988 989 if (wpipe == NULL) { 990 if ((hint & NOTE_SUBMIT) == 0) 991 rw_exit_read(lock); 992 kn->kn_data = 0; 993 kn->kn_flags |= EV_EOF; 994 return (1); 995 } 996 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 997 998 if ((hint & NOTE_SUBMIT) == 0) 999 rw_exit_read(lock); 1000 1001 return (kn->kn_data >= PIPE_BUF); 1002 } 1003 1004 void 1005 pipe_init(void) 1006 { 1007 pool_init(&pipe_pool, sizeof(struct pipe), 0, IPL_MPFLOOR, PR_WAITOK, 1008 "pipepl", NULL); 1009 pool_init(&pipe_lock_pool, sizeof(struct rwlock), 0, IPL_MPFLOOR, 1010 PR_WAITOK, "pipelkpl", NULL); 1011 } 1012