1 /* $OpenBSD: sys_pipe.c,v 1.126 2020/12/30 17:02:32 visa Exp $ */ 2 3 /* 4 * Copyright (c) 1996 John S. Dyson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Absolutely no warranty of function or purpose is made by the author 17 * John S. Dyson. 18 * 4. Modifications may be freely made to this file if the above conditions 19 * are met. 20 */ 21 22 /* 23 * This file contains a high-performance replacement for the socket-based 24 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 25 * all features of sockets, but does do everything that pipes normally 26 * do. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/proc.h> 32 #include <sys/fcntl.h> 33 #include <sys/file.h> 34 #include <sys/filedesc.h> 35 #include <sys/pool.h> 36 #include <sys/ioctl.h> 37 #include <sys/stat.h> 38 #include <sys/signalvar.h> 39 #include <sys/mount.h> 40 #include <sys/syscallargs.h> 41 #include <sys/event.h> 42 #include <sys/lock.h> 43 #include <sys/poll.h> 44 #ifdef KTRACE 45 #include <sys/ktrace.h> 46 #endif 47 48 #include <uvm/uvm_extern.h> 49 50 #include <sys/pipe.h> 51 52 struct pipe_pair { 53 struct pipe pp_wpipe; 54 struct pipe pp_rpipe; 55 struct rwlock pp_lock; 56 }; 57 58 /* 59 * interfaces to the outside world 60 */ 61 int pipe_read(struct file *, struct uio *, int); 62 int pipe_write(struct file *, struct uio *, int); 63 int pipe_close(struct file *, struct proc *); 64 int pipe_poll(struct file *, int events, struct proc *); 65 int pipe_kqfilter(struct file *fp, struct knote *kn); 66 int pipe_ioctl(struct file *, u_long, caddr_t, struct proc *); 67 int pipe_stat(struct file *fp, struct stat *ub, struct proc *p); 68 69 static const struct fileops pipeops = { 70 .fo_read = pipe_read, 71 .fo_write = pipe_write, 72 .fo_ioctl = pipe_ioctl, 73 .fo_poll = pipe_poll, 74 .fo_kqfilter = pipe_kqfilter, 75 .fo_stat = pipe_stat, 76 .fo_close = pipe_close 77 }; 78 79 void filt_pipedetach(struct knote *kn); 80 int filt_piperead(struct knote *kn, long hint); 81 int filt_pipewrite(struct knote *kn, long hint); 82 83 const struct filterops pipe_rfiltops = { 84 .f_flags = FILTEROP_ISFD, 85 .f_attach = NULL, 86 .f_detach = filt_pipedetach, 87 .f_event = filt_piperead, 88 }; 89 90 const struct filterops pipe_wfiltops = { 91 .f_flags = FILTEROP_ISFD, 92 .f_attach = NULL, 93 .f_detach = filt_pipedetach, 94 .f_event = filt_pipewrite, 95 }; 96 97 /* 98 * Default pipe buffer size(s), this can be kind-of large now because pipe 99 * space is pageable. The pipe code will try to maintain locality of 100 * reference for performance reasons, so small amounts of outstanding I/O 101 * will not wipe the cache. 102 */ 103 #define MINPIPESIZE (PIPE_SIZE/3) 104 105 /* 106 * Limit the number of "big" pipes 107 */ 108 #define LIMITBIGPIPES 32 109 unsigned int nbigpipe; 110 static unsigned int amountpipekva; 111 112 struct pool pipe_pair_pool; 113 114 int dopipe(struct proc *, int *, int); 115 void pipeselwakeup(struct pipe *); 116 117 int pipe_create(struct pipe *); 118 void pipe_destroy(struct pipe *); 119 int pipe_rundown(struct pipe *); 120 struct pipe *pipe_peer(struct pipe *); 121 int pipe_buffer_realloc(struct pipe *, u_int); 122 void pipe_buffer_free(struct pipe *); 123 124 int pipe_iolock(struct pipe *); 125 void pipe_iounlock(struct pipe *); 126 int pipe_iosleep(struct pipe *, const char *); 127 128 struct pipe_pair *pipe_pair_create(void); 129 void pipe_pair_destroy(struct pipe_pair *); 130 131 /* 132 * The pipe system call for the DTYPE_PIPE type of pipes 133 */ 134 135 int 136 sys_pipe(struct proc *p, void *v, register_t *retval) 137 { 138 struct sys_pipe_args /* { 139 syscallarg(int *) fdp; 140 } */ *uap = v; 141 142 return (dopipe(p, SCARG(uap, fdp), 0)); 143 } 144 145 int 146 sys_pipe2(struct proc *p, void *v, register_t *retval) 147 { 148 struct sys_pipe2_args /* { 149 syscallarg(int *) fdp; 150 syscallarg(int) flags; 151 } */ *uap = v; 152 153 if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK)) 154 return (EINVAL); 155 156 return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags))); 157 } 158 159 int 160 dopipe(struct proc *p, int *ufds, int flags) 161 { 162 struct filedesc *fdp = p->p_fd; 163 struct file *rf, *wf; 164 struct pipe_pair *pp; 165 struct pipe *rpipe, *wpipe = NULL; 166 int fds[2], cloexec, error; 167 168 cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; 169 170 pp = pipe_pair_create(); 171 if (pp == NULL) 172 return (ENOMEM); 173 wpipe = &pp->pp_wpipe; 174 rpipe = &pp->pp_rpipe; 175 176 fdplock(fdp); 177 178 error = falloc(p, &rf, &fds[0]); 179 if (error != 0) 180 goto free2; 181 rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 182 rf->f_type = DTYPE_PIPE; 183 rf->f_data = rpipe; 184 rf->f_ops = &pipeops; 185 186 error = falloc(p, &wf, &fds[1]); 187 if (error != 0) 188 goto free3; 189 wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 190 wf->f_type = DTYPE_PIPE; 191 wf->f_data = wpipe; 192 wf->f_ops = &pipeops; 193 194 fdinsert(fdp, fds[0], cloexec, rf); 195 fdinsert(fdp, fds[1], cloexec, wf); 196 197 error = copyout(fds, ufds, sizeof(fds)); 198 if (error == 0) { 199 fdpunlock(fdp); 200 #ifdef KTRACE 201 if (KTRPOINT(p, KTR_STRUCT)) 202 ktrfds(p, fds, 2); 203 #endif 204 } else { 205 /* fdrelease() unlocks fdp. */ 206 fdrelease(p, fds[0]); 207 fdplock(fdp); 208 fdrelease(p, fds[1]); 209 } 210 211 FRELE(rf, p); 212 FRELE(wf, p); 213 return (error); 214 215 free3: 216 fdremove(fdp, fds[0]); 217 closef(rf, p); 218 rpipe = NULL; 219 free2: 220 fdpunlock(fdp); 221 pipe_destroy(wpipe); 222 pipe_destroy(rpipe); 223 return (error); 224 } 225 226 /* 227 * Allocate kva for pipe circular buffer, the space is pageable. 228 * This routine will 'realloc' the size of a pipe safely, if it fails 229 * it will retain the old buffer. 230 * If it fails it will return ENOMEM. 231 */ 232 int 233 pipe_buffer_realloc(struct pipe *cpipe, u_int size) 234 { 235 caddr_t buffer; 236 237 /* buffer uninitialized or pipe locked */ 238 KASSERT((cpipe->pipe_buffer.buffer == NULL) || 239 (cpipe->pipe_state & PIPE_LOCK)); 240 241 /* buffer should be empty */ 242 KASSERT(cpipe->pipe_buffer.cnt == 0); 243 244 KERNEL_LOCK(); 245 buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok); 246 KERNEL_UNLOCK(); 247 if (buffer == NULL) 248 return (ENOMEM); 249 250 /* free old resources if we are resizing */ 251 pipe_buffer_free(cpipe); 252 253 cpipe->pipe_buffer.buffer = buffer; 254 cpipe->pipe_buffer.size = size; 255 cpipe->pipe_buffer.in = 0; 256 cpipe->pipe_buffer.out = 0; 257 258 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 259 260 return (0); 261 } 262 263 /* 264 * initialize and allocate VM and memory for pipe 265 */ 266 int 267 pipe_create(struct pipe *cpipe) 268 { 269 int error; 270 271 error = pipe_buffer_realloc(cpipe, PIPE_SIZE); 272 if (error != 0) 273 return (error); 274 275 sigio_init(&cpipe->pipe_sigio); 276 277 getnanotime(&cpipe->pipe_ctime); 278 cpipe->pipe_atime = cpipe->pipe_ctime; 279 cpipe->pipe_mtime = cpipe->pipe_ctime; 280 281 return (0); 282 } 283 284 struct pipe * 285 pipe_peer(struct pipe *cpipe) 286 { 287 struct pipe *peer; 288 289 rw_assert_anylock(cpipe->pipe_lock); 290 291 peer = cpipe->pipe_peer; 292 if (peer == NULL || (peer->pipe_state & PIPE_EOF)) 293 return (NULL); 294 return (peer); 295 } 296 297 /* 298 * Lock a pipe for exclusive I/O access. 299 */ 300 int 301 pipe_iolock(struct pipe *cpipe) 302 { 303 int error; 304 305 rw_assert_wrlock(cpipe->pipe_lock); 306 307 while (cpipe->pipe_state & PIPE_LOCK) { 308 cpipe->pipe_state |= PIPE_LWANT; 309 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, 310 "pipeiolk", INFSLP); 311 if (error) 312 return (error); 313 } 314 cpipe->pipe_state |= PIPE_LOCK; 315 return (0); 316 } 317 318 /* 319 * Unlock a pipe I/O lock. 320 */ 321 void 322 pipe_iounlock(struct pipe *cpipe) 323 { 324 rw_assert_wrlock(cpipe->pipe_lock); 325 KASSERT(cpipe->pipe_state & PIPE_LOCK); 326 327 cpipe->pipe_state &= ~PIPE_LOCK; 328 if (cpipe->pipe_state & PIPE_LWANT) { 329 cpipe->pipe_state &= ~PIPE_LWANT; 330 wakeup(cpipe); 331 } 332 } 333 334 /* 335 * Unlock the pipe I/O lock and go to sleep. Returns 0 on success and the I/O 336 * lock is relocked. Otherwise if a signal was caught, non-zero is returned and 337 * the I/O lock is not locked. 338 * 339 * Any caller must obtain a reference to the pipe by incrementing `pipe_busy' 340 * before calling this function in order ensure that the same pipe is not 341 * destroyed while sleeping. 342 */ 343 int 344 pipe_iosleep(struct pipe *cpipe, const char *wmesg) 345 { 346 int error; 347 348 pipe_iounlock(cpipe); 349 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, wmesg, 350 INFSLP); 351 if (error) 352 return (error); 353 return (pipe_iolock(cpipe)); 354 } 355 356 void 357 pipeselwakeup(struct pipe *cpipe) 358 { 359 rw_assert_wrlock(cpipe->pipe_lock); 360 361 if (cpipe->pipe_state & PIPE_SEL) { 362 cpipe->pipe_state &= ~PIPE_SEL; 363 selwakeup(&cpipe->pipe_sel); 364 } else { 365 KERNEL_LOCK(); 366 KNOTE(&cpipe->pipe_sel.si_note, NOTE_SUBMIT); 367 KERNEL_UNLOCK(); 368 } 369 370 if (cpipe->pipe_state & PIPE_ASYNC) 371 pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 372 } 373 374 int 375 pipe_read(struct file *fp, struct uio *uio, int fflags) 376 { 377 struct pipe *rpipe = fp->f_data; 378 size_t nread = 0, size; 379 int error; 380 381 rw_enter_write(rpipe->pipe_lock); 382 ++rpipe->pipe_busy; 383 error = pipe_iolock(rpipe); 384 if (error) { 385 --rpipe->pipe_busy; 386 pipe_rundown(rpipe); 387 rw_exit_write(rpipe->pipe_lock); 388 return (error); 389 } 390 391 while (uio->uio_resid) { 392 /* Normal pipe buffer receive. */ 393 if (rpipe->pipe_buffer.cnt > 0) { 394 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 395 if (size > rpipe->pipe_buffer.cnt) 396 size = rpipe->pipe_buffer.cnt; 397 if (size > uio->uio_resid) 398 size = uio->uio_resid; 399 rw_exit_write(rpipe->pipe_lock); 400 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 401 size, uio); 402 rw_enter_write(rpipe->pipe_lock); 403 if (error) { 404 break; 405 } 406 rpipe->pipe_buffer.out += size; 407 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 408 rpipe->pipe_buffer.out = 0; 409 410 rpipe->pipe_buffer.cnt -= size; 411 /* 412 * If there is no more to read in the pipe, reset 413 * its pointers to the beginning. This improves 414 * cache hit stats. 415 */ 416 if (rpipe->pipe_buffer.cnt == 0) { 417 rpipe->pipe_buffer.in = 0; 418 rpipe->pipe_buffer.out = 0; 419 } 420 nread += size; 421 } else { 422 /* 423 * detect EOF condition 424 * read returns 0 on EOF, no need to set error 425 */ 426 if (rpipe->pipe_state & PIPE_EOF) 427 break; 428 429 /* If the "write-side" has been blocked, wake it up. */ 430 if (rpipe->pipe_state & PIPE_WANTW) { 431 rpipe->pipe_state &= ~PIPE_WANTW; 432 wakeup(rpipe); 433 } 434 435 /* Break if some data was read. */ 436 if (nread > 0) 437 break; 438 439 /* Handle non-blocking mode operation. */ 440 if (fp->f_flag & FNONBLOCK) { 441 error = EAGAIN; 442 break; 443 } 444 445 /* Wait for more data. */ 446 rpipe->pipe_state |= PIPE_WANTR; 447 error = pipe_iosleep(rpipe, "piperd"); 448 if (error) 449 goto unlocked_error; 450 } 451 } 452 pipe_iounlock(rpipe); 453 454 if (error == 0) 455 getnanotime(&rpipe->pipe_atime); 456 unlocked_error: 457 --rpipe->pipe_busy; 458 459 if (pipe_rundown(rpipe) == 0 && rpipe->pipe_buffer.cnt < MINPIPESIZE) { 460 /* Handle write blocking hysteresis. */ 461 if (rpipe->pipe_state & PIPE_WANTW) { 462 rpipe->pipe_state &= ~PIPE_WANTW; 463 wakeup(rpipe); 464 } 465 } 466 467 if (rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt >= PIPE_BUF) 468 pipeselwakeup(rpipe); 469 470 rw_exit_write(rpipe->pipe_lock); 471 return (error); 472 } 473 474 int 475 pipe_write(struct file *fp, struct uio *uio, int fflags) 476 { 477 struct pipe *rpipe = fp->f_data, *wpipe; 478 struct rwlock *lock = rpipe->pipe_lock; 479 size_t orig_resid; 480 int error; 481 482 rw_enter_write(lock); 483 wpipe = pipe_peer(rpipe); 484 485 /* Detect loss of pipe read side, issue SIGPIPE if lost. */ 486 if (wpipe == NULL) { 487 rw_exit_write(lock); 488 return (EPIPE); 489 } 490 491 ++wpipe->pipe_busy; 492 error = pipe_iolock(wpipe); 493 if (error) { 494 --wpipe->pipe_busy; 495 pipe_rundown(wpipe); 496 rw_exit_write(lock); 497 return (error); 498 } 499 500 501 /* If it is advantageous to resize the pipe buffer, do so. */ 502 if (uio->uio_resid > PIPE_SIZE && 503 wpipe->pipe_buffer.size <= PIPE_SIZE && 504 wpipe->pipe_buffer.cnt == 0) { 505 unsigned int npipe; 506 507 npipe = atomic_inc_int_nv(&nbigpipe); 508 if (npipe > LIMITBIGPIPES || 509 pipe_buffer_realloc(wpipe, BIG_PIPE_SIZE) != 0) 510 atomic_dec_int(&nbigpipe); 511 } 512 513 orig_resid = uio->uio_resid; 514 515 while (uio->uio_resid) { 516 size_t space; 517 518 if (wpipe->pipe_state & PIPE_EOF) { 519 error = EPIPE; 520 break; 521 } 522 523 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 524 525 /* Writes of size <= PIPE_BUF must be atomic. */ 526 if (space < uio->uio_resid && orig_resid <= PIPE_BUF) 527 space = 0; 528 529 if (space > 0) { 530 size_t size; /* Transfer size */ 531 size_t segsize; /* first segment to transfer */ 532 533 /* 534 * Transfer size is minimum of uio transfer 535 * and free space in pipe buffer. 536 */ 537 if (space > uio->uio_resid) 538 size = uio->uio_resid; 539 else 540 size = space; 541 /* 542 * First segment to transfer is minimum of 543 * transfer size and contiguous space in 544 * pipe buffer. If first segment to transfer 545 * is less than the transfer size, we've got 546 * a wraparound in the buffer. 547 */ 548 segsize = wpipe->pipe_buffer.size - 549 wpipe->pipe_buffer.in; 550 if (segsize > size) 551 segsize = size; 552 553 /* Transfer first segment */ 554 555 rw_exit_write(lock); 556 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 557 segsize, uio); 558 rw_enter_write(lock); 559 560 if (error == 0 && segsize < size) { 561 /* 562 * Transfer remaining part now, to 563 * support atomic writes. Wraparound 564 * happened. 565 */ 566 #ifdef DIAGNOSTIC 567 if (wpipe->pipe_buffer.in + segsize != 568 wpipe->pipe_buffer.size) 569 panic("Expected pipe buffer wraparound disappeared"); 570 #endif 571 572 rw_exit_write(lock); 573 error = uiomove(&wpipe->pipe_buffer.buffer[0], 574 size - segsize, uio); 575 rw_enter_write(lock); 576 } 577 if (error == 0) { 578 wpipe->pipe_buffer.in += size; 579 if (wpipe->pipe_buffer.in >= 580 wpipe->pipe_buffer.size) { 581 #ifdef DIAGNOSTIC 582 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 583 panic("Expected wraparound bad"); 584 #endif 585 wpipe->pipe_buffer.in = size - segsize; 586 } 587 588 wpipe->pipe_buffer.cnt += size; 589 #ifdef DIAGNOSTIC 590 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 591 panic("Pipe buffer overflow"); 592 #endif 593 } 594 if (error) 595 break; 596 } else { 597 /* If the "read-side" has been blocked, wake it up. */ 598 if (wpipe->pipe_state & PIPE_WANTR) { 599 wpipe->pipe_state &= ~PIPE_WANTR; 600 wakeup(wpipe); 601 } 602 603 /* Don't block on non-blocking I/O. */ 604 if (fp->f_flag & FNONBLOCK) { 605 error = EAGAIN; 606 break; 607 } 608 609 /* 610 * We have no more space and have something to offer, 611 * wake up select/poll. 612 */ 613 pipeselwakeup(wpipe); 614 615 wpipe->pipe_state |= PIPE_WANTW; 616 error = pipe_iosleep(wpipe, "pipewr"); 617 if (error) 618 goto unlocked_error; 619 620 /* 621 * If read side wants to go away, we just issue a 622 * signal to ourselves. 623 */ 624 if (wpipe->pipe_state & PIPE_EOF) { 625 error = EPIPE; 626 break; 627 } 628 } 629 } 630 pipe_iounlock(wpipe); 631 632 unlocked_error: 633 --wpipe->pipe_busy; 634 635 if (pipe_rundown(wpipe) == 0 && wpipe->pipe_buffer.cnt > 0) { 636 /* 637 * If we have put any characters in the buffer, we wake up 638 * the reader. 639 */ 640 if (wpipe->pipe_state & PIPE_WANTR) { 641 wpipe->pipe_state &= ~PIPE_WANTR; 642 wakeup(wpipe); 643 } 644 } 645 646 /* Don't return EPIPE if I/O was successful. */ 647 if (wpipe->pipe_buffer.cnt == 0 && 648 uio->uio_resid == 0 && 649 error == EPIPE) { 650 error = 0; 651 } 652 653 if (error == 0) 654 getnanotime(&wpipe->pipe_mtime); 655 /* We have something to offer, wake up select/poll. */ 656 if (wpipe->pipe_buffer.cnt) 657 pipeselwakeup(wpipe); 658 659 rw_exit_write(lock); 660 return (error); 661 } 662 663 /* 664 * we implement a very minimal set of ioctls for compatibility with sockets. 665 */ 666 int 667 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p) 668 { 669 struct pipe *mpipe = fp->f_data; 670 int error = 0; 671 672 switch (cmd) { 673 674 case FIONBIO: 675 break; 676 677 case FIOASYNC: 678 rw_enter_write(mpipe->pipe_lock); 679 if (*(int *)data) { 680 mpipe->pipe_state |= PIPE_ASYNC; 681 } else { 682 mpipe->pipe_state &= ~PIPE_ASYNC; 683 } 684 rw_exit_write(mpipe->pipe_lock); 685 break; 686 687 case FIONREAD: 688 rw_enter_read(mpipe->pipe_lock); 689 *(int *)data = mpipe->pipe_buffer.cnt; 690 rw_exit_read(mpipe->pipe_lock); 691 break; 692 693 case FIOSETOWN: 694 case SIOCSPGRP: 695 case TIOCSPGRP: 696 error = sigio_setown(&mpipe->pipe_sigio, cmd, data); 697 break; 698 699 case FIOGETOWN: 700 case SIOCGPGRP: 701 case TIOCGPGRP: 702 sigio_getown(&mpipe->pipe_sigio, cmd, data); 703 break; 704 705 default: 706 error = ENOTTY; 707 } 708 709 return (error); 710 } 711 712 int 713 pipe_poll(struct file *fp, int events, struct proc *p) 714 { 715 struct pipe *rpipe = fp->f_data, *wpipe; 716 struct rwlock *lock = rpipe->pipe_lock; 717 int revents = 0; 718 719 rw_enter_write(lock); 720 wpipe = pipe_peer(rpipe); 721 722 if (events & (POLLIN | POLLRDNORM)) { 723 if (rpipe->pipe_buffer.cnt > 0 || 724 (rpipe->pipe_state & PIPE_EOF)) 725 revents |= events & (POLLIN | POLLRDNORM); 726 } 727 728 /* NOTE: POLLHUP and POLLOUT/POLLWRNORM are mutually exclusive */ 729 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) 730 revents |= POLLHUP; 731 else if (events & (POLLOUT | POLLWRNORM)) { 732 if (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt >= PIPE_BUF) 733 revents |= events & (POLLOUT | POLLWRNORM); 734 } 735 736 if (revents == 0) { 737 if (events & (POLLIN | POLLRDNORM)) { 738 selrecord(p, &rpipe->pipe_sel); 739 rpipe->pipe_state |= PIPE_SEL; 740 } 741 if (events & (POLLOUT | POLLWRNORM)) { 742 selrecord(p, &wpipe->pipe_sel); 743 wpipe->pipe_state |= PIPE_SEL; 744 } 745 } 746 747 rw_exit_write(lock); 748 749 return (revents); 750 } 751 752 int 753 pipe_stat(struct file *fp, struct stat *ub, struct proc *p) 754 { 755 struct pipe *pipe = fp->f_data; 756 757 memset(ub, 0, sizeof(*ub)); 758 759 rw_enter_read(pipe->pipe_lock); 760 ub->st_mode = S_IFIFO; 761 ub->st_blksize = pipe->pipe_buffer.size; 762 ub->st_size = pipe->pipe_buffer.cnt; 763 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 764 ub->st_atim.tv_sec = pipe->pipe_atime.tv_sec; 765 ub->st_atim.tv_nsec = pipe->pipe_atime.tv_nsec; 766 ub->st_mtim.tv_sec = pipe->pipe_mtime.tv_sec; 767 ub->st_mtim.tv_nsec = pipe->pipe_mtime.tv_nsec; 768 ub->st_ctim.tv_sec = pipe->pipe_ctime.tv_sec; 769 ub->st_ctim.tv_nsec = pipe->pipe_ctime.tv_nsec; 770 ub->st_uid = fp->f_cred->cr_uid; 771 ub->st_gid = fp->f_cred->cr_gid; 772 rw_exit_read(pipe->pipe_lock); 773 /* 774 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 775 * XXX (st_dev, st_ino) should be unique. 776 */ 777 return (0); 778 } 779 780 int 781 pipe_close(struct file *fp, struct proc *p) 782 { 783 struct pipe *cpipe = fp->f_data; 784 785 fp->f_ops = NULL; 786 fp->f_data = NULL; 787 pipe_destroy(cpipe); 788 return (0); 789 } 790 791 /* 792 * Free kva for pipe circular buffer. 793 * No pipe lock check as only called from pipe_buffer_realloc() and pipeclose() 794 */ 795 void 796 pipe_buffer_free(struct pipe *cpipe) 797 { 798 u_int size; 799 800 if (cpipe->pipe_buffer.buffer == NULL) 801 return; 802 803 size = cpipe->pipe_buffer.size; 804 805 KERNEL_LOCK(); 806 km_free(cpipe->pipe_buffer.buffer, size, &kv_any, &kp_pageable); 807 KERNEL_UNLOCK(); 808 809 cpipe->pipe_buffer.buffer = NULL; 810 811 atomic_sub_int(&amountpipekva, size); 812 if (size > PIPE_SIZE) 813 atomic_dec_int(&nbigpipe); 814 } 815 816 /* 817 * shutdown the pipe, and free resources. 818 */ 819 void 820 pipe_destroy(struct pipe *cpipe) 821 { 822 struct pipe *ppipe; 823 824 if (cpipe == NULL) 825 return; 826 827 rw_enter_write(cpipe->pipe_lock); 828 829 pipeselwakeup(cpipe); 830 sigio_free(&cpipe->pipe_sigio); 831 832 /* 833 * If the other side is blocked, wake it up saying that 834 * we want to close it down. 835 */ 836 cpipe->pipe_state |= PIPE_EOF; 837 while (cpipe->pipe_busy) { 838 wakeup(cpipe); 839 cpipe->pipe_state |= PIPE_WANTD; 840 rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO, "pipecl", INFSLP); 841 } 842 843 /* Disconnect from peer. */ 844 if ((ppipe = cpipe->pipe_peer) != NULL) { 845 pipeselwakeup(ppipe); 846 847 ppipe->pipe_state |= PIPE_EOF; 848 wakeup(ppipe); 849 ppipe->pipe_peer = NULL; 850 } 851 852 pipe_buffer_free(cpipe); 853 854 rw_exit_write(cpipe->pipe_lock); 855 856 if (ppipe == NULL) 857 pipe_pair_destroy(cpipe->pipe_pair); 858 } 859 860 /* 861 * Returns non-zero if a rundown is currently ongoing. 862 */ 863 int 864 pipe_rundown(struct pipe *cpipe) 865 { 866 rw_assert_wrlock(cpipe->pipe_lock); 867 868 if (cpipe->pipe_busy > 0 || (cpipe->pipe_state & PIPE_WANTD) == 0) 869 return (0); 870 871 /* Only wakeup pipe_destroy() once the pipe is no longer busy. */ 872 cpipe->pipe_state &= ~(PIPE_WANTD | PIPE_WANTR | PIPE_WANTW); 873 wakeup(cpipe); 874 return (1); 875 } 876 877 int 878 pipe_kqfilter(struct file *fp, struct knote *kn) 879 { 880 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 881 struct rwlock *lock = rpipe->pipe_lock; 882 int error = 0; 883 884 rw_enter_write(lock); 885 wpipe = pipe_peer(rpipe); 886 887 switch (kn->kn_filter) { 888 case EVFILT_READ: 889 kn->kn_fop = &pipe_rfiltops; 890 kn->kn_hook = rpipe; 891 klist_insert_locked(&rpipe->pipe_sel.si_note, kn); 892 break; 893 case EVFILT_WRITE: 894 if (wpipe == NULL) { 895 /* other end of pipe has been closed */ 896 error = EPIPE; 897 break; 898 } 899 kn->kn_fop = &pipe_wfiltops; 900 kn->kn_hook = wpipe; 901 klist_insert_locked(&wpipe->pipe_sel.si_note, kn); 902 break; 903 default: 904 error = EINVAL; 905 } 906 907 rw_exit_write(lock); 908 909 return (error); 910 } 911 912 void 913 filt_pipedetach(struct knote *kn) 914 { 915 struct pipe *cpipe = kn->kn_hook; 916 917 klist_remove(&cpipe->pipe_sel.si_note, kn); 918 } 919 920 int 921 filt_piperead(struct knote *kn, long hint) 922 { 923 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 924 struct rwlock *lock = rpipe->pipe_lock; 925 926 if ((hint & NOTE_SUBMIT) == 0) 927 rw_enter_read(lock); 928 wpipe = pipe_peer(rpipe); 929 930 kn->kn_data = rpipe->pipe_buffer.cnt; 931 932 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) { 933 if ((hint & NOTE_SUBMIT) == 0) 934 rw_exit_read(lock); 935 kn->kn_flags |= EV_EOF; 936 if (kn->kn_flags & __EV_POLL) 937 kn->kn_flags |= __EV_HUP; 938 return (1); 939 } 940 941 if ((hint & NOTE_SUBMIT) == 0) 942 rw_exit_read(lock); 943 944 return (kn->kn_data > 0); 945 } 946 947 int 948 filt_pipewrite(struct knote *kn, long hint) 949 { 950 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 951 struct rwlock *lock = rpipe->pipe_lock; 952 953 if ((hint & NOTE_SUBMIT) == 0) 954 rw_enter_read(lock); 955 wpipe = pipe_peer(rpipe); 956 957 if (wpipe == NULL) { 958 if ((hint & NOTE_SUBMIT) == 0) 959 rw_exit_read(lock); 960 kn->kn_data = 0; 961 kn->kn_flags |= EV_EOF; 962 if (kn->kn_flags & __EV_POLL) 963 kn->kn_flags |= __EV_HUP; 964 return (1); 965 } 966 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 967 968 if ((hint & NOTE_SUBMIT) == 0) 969 rw_exit_read(lock); 970 971 return (kn->kn_data >= PIPE_BUF); 972 } 973 974 void 975 pipe_init(void) 976 { 977 pool_init(&pipe_pair_pool, sizeof(struct pipe_pair), 0, IPL_MPFLOOR, 978 PR_WAITOK, "pipepl", NULL); 979 } 980 981 struct pipe_pair * 982 pipe_pair_create(void) 983 { 984 struct pipe_pair *pp; 985 986 pp = pool_get(&pipe_pair_pool, PR_WAITOK | PR_ZERO); 987 pp->pp_wpipe.pipe_pair = pp; 988 pp->pp_rpipe.pipe_pair = pp; 989 pp->pp_wpipe.pipe_peer = &pp->pp_rpipe; 990 pp->pp_rpipe.pipe_peer = &pp->pp_wpipe; 991 /* 992 * One lock is used per pipe pair in order to obtain exclusive access to 993 * the pipe pair. 994 */ 995 rw_init(&pp->pp_lock, "pipelk"); 996 pp->pp_wpipe.pipe_lock = &pp->pp_lock; 997 pp->pp_rpipe.pipe_lock = &pp->pp_lock; 998 999 klist_init_rwlock(&pp->pp_wpipe.pipe_sel.si_note, &pp->pp_lock); 1000 klist_init_rwlock(&pp->pp_rpipe.pipe_sel.si_note, &pp->pp_lock); 1001 1002 if (pipe_create(&pp->pp_wpipe) || pipe_create(&pp->pp_rpipe)) 1003 goto err; 1004 return (pp); 1005 err: 1006 pipe_destroy(&pp->pp_wpipe); 1007 pipe_destroy(&pp->pp_rpipe); 1008 return (NULL); 1009 } 1010 1011 void 1012 pipe_pair_destroy(struct pipe_pair *pp) 1013 { 1014 klist_free(&pp->pp_wpipe.pipe_sel.si_note); 1015 klist_free(&pp->pp_rpipe.pipe_sel.si_note); 1016 pool_put(&pipe_pair_pool, pp); 1017 } 1018