1 /* $OpenBSD: sys_pipe.c,v 1.123 2020/06/29 18:23:18 anton Exp $ */ 2 3 /* 4 * Copyright (c) 1996 John S. Dyson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Absolutely no warranty of function or purpose is made by the author 17 * John S. Dyson. 18 * 4. Modifications may be freely made to this file if the above conditions 19 * are met. 20 */ 21 22 /* 23 * This file contains a high-performance replacement for the socket-based 24 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 25 * all features of sockets, but does do everything that pipes normally 26 * do. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/proc.h> 32 #include <sys/fcntl.h> 33 #include <sys/file.h> 34 #include <sys/filedesc.h> 35 #include <sys/pool.h> 36 #include <sys/ioctl.h> 37 #include <sys/stat.h> 38 #include <sys/signalvar.h> 39 #include <sys/mount.h> 40 #include <sys/syscallargs.h> 41 #include <sys/event.h> 42 #include <sys/lock.h> 43 #include <sys/poll.h> 44 #ifdef KTRACE 45 #include <sys/ktrace.h> 46 #endif 47 48 #include <uvm/uvm_extern.h> 49 50 #include <sys/pipe.h> 51 52 struct pipe_pair { 53 struct pipe pp_wpipe; 54 struct pipe pp_rpipe; 55 struct rwlock pp_lock; 56 }; 57 58 /* 59 * interfaces to the outside world 60 */ 61 int pipe_read(struct file *, struct uio *, int); 62 int pipe_write(struct file *, struct uio *, int); 63 int pipe_close(struct file *, struct proc *); 64 int pipe_poll(struct file *, int events, struct proc *); 65 int pipe_kqfilter(struct file *fp, struct knote *kn); 66 int pipe_ioctl(struct file *, u_long, caddr_t, struct proc *); 67 int pipe_stat(struct file *fp, struct stat *ub, struct proc *p); 68 69 static const struct fileops pipeops = { 70 .fo_read = pipe_read, 71 .fo_write = pipe_write, 72 .fo_ioctl = pipe_ioctl, 73 .fo_poll = pipe_poll, 74 .fo_kqfilter = pipe_kqfilter, 75 .fo_stat = pipe_stat, 76 .fo_close = pipe_close 77 }; 78 79 void filt_pipedetach(struct knote *kn); 80 int filt_piperead(struct knote *kn, long hint); 81 int filt_pipewrite(struct knote *kn, long hint); 82 83 const struct filterops pipe_rfiltops = { 84 .f_flags = FILTEROP_ISFD, 85 .f_attach = NULL, 86 .f_detach = filt_pipedetach, 87 .f_event = filt_piperead, 88 }; 89 90 const struct filterops pipe_wfiltops = { 91 .f_flags = FILTEROP_ISFD, 92 .f_attach = NULL, 93 .f_detach = filt_pipedetach, 94 .f_event = filt_pipewrite, 95 }; 96 97 /* 98 * Default pipe buffer size(s), this can be kind-of large now because pipe 99 * space is pageable. The pipe code will try to maintain locality of 100 * reference for performance reasons, so small amounts of outstanding I/O 101 * will not wipe the cache. 102 */ 103 #define MINPIPESIZE (PIPE_SIZE/3) 104 105 /* 106 * Limit the number of "big" pipes 107 */ 108 #define LIMITBIGPIPES 32 109 unsigned int nbigpipe; 110 static unsigned int amountpipekva; 111 112 struct pool pipe_pair_pool; 113 114 int dopipe(struct proc *, int *, int); 115 void pipeselwakeup(struct pipe *); 116 117 int pipe_create(struct pipe *); 118 void pipe_destroy(struct pipe *); 119 int pipe_rundown(struct pipe *); 120 struct pipe *pipe_peer(struct pipe *); 121 int pipe_buffer_realloc(struct pipe *, u_int); 122 void pipe_buffer_free(struct pipe *); 123 124 int pipe_iolock(struct pipe *); 125 void pipe_iounlock(struct pipe *); 126 int pipe_iosleep(struct pipe *, const char *); 127 128 struct pipe_pair *pipe_pair_create(void); 129 130 /* 131 * The pipe system call for the DTYPE_PIPE type of pipes 132 */ 133 134 int 135 sys_pipe(struct proc *p, void *v, register_t *retval) 136 { 137 struct sys_pipe_args /* { 138 syscallarg(int *) fdp; 139 } */ *uap = v; 140 141 return (dopipe(p, SCARG(uap, fdp), 0)); 142 } 143 144 int 145 sys_pipe2(struct proc *p, void *v, register_t *retval) 146 { 147 struct sys_pipe2_args /* { 148 syscallarg(int *) fdp; 149 syscallarg(int) flags; 150 } */ *uap = v; 151 152 if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK)) 153 return (EINVAL); 154 155 return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags))); 156 } 157 158 int 159 dopipe(struct proc *p, int *ufds, int flags) 160 { 161 struct filedesc *fdp = p->p_fd; 162 struct file *rf, *wf; 163 struct pipe_pair *pp; 164 struct pipe *rpipe, *wpipe = NULL; 165 int fds[2], cloexec, error; 166 167 cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; 168 169 pp = pipe_pair_create(); 170 if (pp == NULL) 171 return (ENOMEM); 172 wpipe = &pp->pp_wpipe; 173 rpipe = &pp->pp_rpipe; 174 175 fdplock(fdp); 176 177 error = falloc(p, &rf, &fds[0]); 178 if (error != 0) 179 goto free2; 180 rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 181 rf->f_type = DTYPE_PIPE; 182 rf->f_data = rpipe; 183 rf->f_ops = &pipeops; 184 185 error = falloc(p, &wf, &fds[1]); 186 if (error != 0) 187 goto free3; 188 wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 189 wf->f_type = DTYPE_PIPE; 190 wf->f_data = wpipe; 191 wf->f_ops = &pipeops; 192 193 fdinsert(fdp, fds[0], cloexec, rf); 194 fdinsert(fdp, fds[1], cloexec, wf); 195 196 error = copyout(fds, ufds, sizeof(fds)); 197 if (error == 0) { 198 fdpunlock(fdp); 199 #ifdef KTRACE 200 if (KTRPOINT(p, KTR_STRUCT)) 201 ktrfds(p, fds, 2); 202 #endif 203 } else { 204 /* fdrelease() unlocks fdp. */ 205 fdrelease(p, fds[0]); 206 fdplock(fdp); 207 fdrelease(p, fds[1]); 208 } 209 210 FRELE(rf, p); 211 FRELE(wf, p); 212 return (error); 213 214 free3: 215 fdremove(fdp, fds[0]); 216 closef(rf, p); 217 rpipe = NULL; 218 free2: 219 fdpunlock(fdp); 220 pipe_destroy(wpipe); 221 pipe_destroy(rpipe); 222 return (error); 223 } 224 225 /* 226 * Allocate kva for pipe circular buffer, the space is pageable. 227 * This routine will 'realloc' the size of a pipe safely, if it fails 228 * it will retain the old buffer. 229 * If it fails it will return ENOMEM. 230 */ 231 int 232 pipe_buffer_realloc(struct pipe *cpipe, u_int size) 233 { 234 caddr_t buffer; 235 236 /* buffer uninitialized or pipe locked */ 237 KASSERT((cpipe->pipe_buffer.buffer == NULL) || 238 (cpipe->pipe_state & PIPE_LOCK)); 239 240 /* buffer should be empty */ 241 KASSERT(cpipe->pipe_buffer.cnt == 0); 242 243 KERNEL_LOCK(); 244 buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok); 245 KERNEL_UNLOCK(); 246 if (buffer == NULL) 247 return (ENOMEM); 248 249 /* free old resources if we are resizing */ 250 pipe_buffer_free(cpipe); 251 252 cpipe->pipe_buffer.buffer = buffer; 253 cpipe->pipe_buffer.size = size; 254 cpipe->pipe_buffer.in = 0; 255 cpipe->pipe_buffer.out = 0; 256 257 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 258 259 return (0); 260 } 261 262 /* 263 * initialize and allocate VM and memory for pipe 264 */ 265 int 266 pipe_create(struct pipe *cpipe) 267 { 268 int error; 269 270 error = pipe_buffer_realloc(cpipe, PIPE_SIZE); 271 if (error != 0) 272 return (error); 273 274 sigio_init(&cpipe->pipe_sigio); 275 276 getnanotime(&cpipe->pipe_ctime); 277 cpipe->pipe_atime = cpipe->pipe_ctime; 278 cpipe->pipe_mtime = cpipe->pipe_ctime; 279 280 return (0); 281 } 282 283 struct pipe * 284 pipe_peer(struct pipe *cpipe) 285 { 286 struct pipe *peer; 287 288 rw_assert_anylock(cpipe->pipe_lock); 289 290 peer = cpipe->pipe_peer; 291 if (peer == NULL || (peer->pipe_state & PIPE_EOF)) 292 return (NULL); 293 return (peer); 294 } 295 296 /* 297 * Lock a pipe for exclusive I/O access. 298 */ 299 int 300 pipe_iolock(struct pipe *cpipe) 301 { 302 int error; 303 304 rw_assert_wrlock(cpipe->pipe_lock); 305 306 while (cpipe->pipe_state & PIPE_LOCK) { 307 cpipe->pipe_state |= PIPE_LWANT; 308 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, 309 "pipeiolk", INFSLP); 310 if (error) 311 return (error); 312 } 313 cpipe->pipe_state |= PIPE_LOCK; 314 return (0); 315 } 316 317 /* 318 * Unlock a pipe I/O lock. 319 */ 320 void 321 pipe_iounlock(struct pipe *cpipe) 322 { 323 rw_assert_wrlock(cpipe->pipe_lock); 324 KASSERT(cpipe->pipe_state & PIPE_LOCK); 325 326 cpipe->pipe_state &= ~PIPE_LOCK; 327 if (cpipe->pipe_state & PIPE_LWANT) { 328 cpipe->pipe_state &= ~PIPE_LWANT; 329 wakeup(cpipe); 330 } 331 } 332 333 /* 334 * Unlock the pipe I/O lock and go to sleep. Returns 0 on success and the I/O 335 * lock is relocked. Otherwise if a signal was caught, non-zero is returned and 336 * the I/O lock is not locked. 337 * 338 * Any caller must obtain a reference to the pipe by incrementing `pipe_busy' 339 * before calling this function in order ensure that the same pipe is not 340 * destroyed while sleeping. 341 */ 342 int 343 pipe_iosleep(struct pipe *cpipe, const char *wmesg) 344 { 345 int error; 346 347 pipe_iounlock(cpipe); 348 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, wmesg, 349 INFSLP); 350 if (error) 351 return (error); 352 return (pipe_iolock(cpipe)); 353 } 354 355 void 356 pipeselwakeup(struct pipe *cpipe) 357 { 358 rw_assert_wrlock(cpipe->pipe_lock); 359 360 if (cpipe->pipe_state & PIPE_SEL) { 361 cpipe->pipe_state &= ~PIPE_SEL; 362 selwakeup(&cpipe->pipe_sel); 363 } else { 364 KERNEL_LOCK(); 365 KNOTE(&cpipe->pipe_sel.si_note, NOTE_SUBMIT); 366 KERNEL_UNLOCK(); 367 } 368 369 if (cpipe->pipe_state & PIPE_ASYNC) 370 pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 371 } 372 373 int 374 pipe_read(struct file *fp, struct uio *uio, int fflags) 375 { 376 struct pipe *rpipe = fp->f_data; 377 size_t nread = 0, size; 378 int error; 379 380 rw_enter_write(rpipe->pipe_lock); 381 ++rpipe->pipe_busy; 382 error = pipe_iolock(rpipe); 383 if (error) { 384 --rpipe->pipe_busy; 385 pipe_rundown(rpipe); 386 rw_exit_write(rpipe->pipe_lock); 387 return (error); 388 } 389 390 while (uio->uio_resid) { 391 /* Normal pipe buffer receive. */ 392 if (rpipe->pipe_buffer.cnt > 0) { 393 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 394 if (size > rpipe->pipe_buffer.cnt) 395 size = rpipe->pipe_buffer.cnt; 396 if (size > uio->uio_resid) 397 size = uio->uio_resid; 398 rw_exit_write(rpipe->pipe_lock); 399 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 400 size, uio); 401 rw_enter_write(rpipe->pipe_lock); 402 if (error) { 403 break; 404 } 405 rpipe->pipe_buffer.out += size; 406 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 407 rpipe->pipe_buffer.out = 0; 408 409 rpipe->pipe_buffer.cnt -= size; 410 /* 411 * If there is no more to read in the pipe, reset 412 * its pointers to the beginning. This improves 413 * cache hit stats. 414 */ 415 if (rpipe->pipe_buffer.cnt == 0) { 416 rpipe->pipe_buffer.in = 0; 417 rpipe->pipe_buffer.out = 0; 418 } 419 nread += size; 420 } else { 421 /* 422 * detect EOF condition 423 * read returns 0 on EOF, no need to set error 424 */ 425 if (rpipe->pipe_state & PIPE_EOF) 426 break; 427 428 /* If the "write-side" has been blocked, wake it up. */ 429 if (rpipe->pipe_state & PIPE_WANTW) { 430 rpipe->pipe_state &= ~PIPE_WANTW; 431 wakeup(rpipe); 432 } 433 434 /* Break if some data was read. */ 435 if (nread > 0) 436 break; 437 438 /* Handle non-blocking mode operation. */ 439 if (fp->f_flag & FNONBLOCK) { 440 error = EAGAIN; 441 break; 442 } 443 444 /* Wait for more data. */ 445 rpipe->pipe_state |= PIPE_WANTR; 446 error = pipe_iosleep(rpipe, "piperd"); 447 if (error) 448 goto unlocked_error; 449 } 450 } 451 pipe_iounlock(rpipe); 452 453 if (error == 0) 454 getnanotime(&rpipe->pipe_atime); 455 unlocked_error: 456 --rpipe->pipe_busy; 457 458 if (pipe_rundown(rpipe) == 0 && rpipe->pipe_buffer.cnt < MINPIPESIZE) { 459 /* Handle write blocking hysteresis. */ 460 if (rpipe->pipe_state & PIPE_WANTW) { 461 rpipe->pipe_state &= ~PIPE_WANTW; 462 wakeup(rpipe); 463 } 464 } 465 466 if (rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt >= PIPE_BUF) 467 pipeselwakeup(rpipe); 468 469 rw_exit_write(rpipe->pipe_lock); 470 return (error); 471 } 472 473 int 474 pipe_write(struct file *fp, struct uio *uio, int fflags) 475 { 476 struct pipe *rpipe = fp->f_data, *wpipe; 477 struct rwlock *lock = rpipe->pipe_lock; 478 size_t orig_resid; 479 int error; 480 481 rw_enter_write(lock); 482 wpipe = pipe_peer(rpipe); 483 484 /* Detect loss of pipe read side, issue SIGPIPE if lost. */ 485 if (wpipe == NULL) { 486 rw_exit_write(lock); 487 return (EPIPE); 488 } 489 490 ++wpipe->pipe_busy; 491 error = pipe_iolock(wpipe); 492 if (error) { 493 --wpipe->pipe_busy; 494 pipe_rundown(wpipe); 495 rw_exit_write(lock); 496 return (error); 497 } 498 499 500 /* If it is advantageous to resize the pipe buffer, do so. */ 501 if (uio->uio_resid > PIPE_SIZE && 502 wpipe->pipe_buffer.size <= PIPE_SIZE && 503 wpipe->pipe_buffer.cnt == 0) { 504 unsigned int npipe; 505 506 npipe = atomic_inc_int_nv(&nbigpipe); 507 if (npipe > LIMITBIGPIPES || 508 pipe_buffer_realloc(wpipe, BIG_PIPE_SIZE) != 0) 509 atomic_dec_int(&nbigpipe); 510 } 511 512 orig_resid = uio->uio_resid; 513 514 while (uio->uio_resid) { 515 size_t space; 516 517 if (wpipe->pipe_state & PIPE_EOF) { 518 error = EPIPE; 519 break; 520 } 521 522 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 523 524 /* Writes of size <= PIPE_BUF must be atomic. */ 525 if (space < uio->uio_resid && orig_resid <= PIPE_BUF) 526 space = 0; 527 528 if (space > 0) { 529 size_t size; /* Transfer size */ 530 size_t segsize; /* first segment to transfer */ 531 532 /* 533 * Transfer size is minimum of uio transfer 534 * and free space in pipe buffer. 535 */ 536 if (space > uio->uio_resid) 537 size = uio->uio_resid; 538 else 539 size = space; 540 /* 541 * First segment to transfer is minimum of 542 * transfer size and contiguous space in 543 * pipe buffer. If first segment to transfer 544 * is less than the transfer size, we've got 545 * a wraparound in the buffer. 546 */ 547 segsize = wpipe->pipe_buffer.size - 548 wpipe->pipe_buffer.in; 549 if (segsize > size) 550 segsize = size; 551 552 /* Transfer first segment */ 553 554 rw_exit_write(lock); 555 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 556 segsize, uio); 557 rw_enter_write(lock); 558 559 if (error == 0 && segsize < size) { 560 /* 561 * Transfer remaining part now, to 562 * support atomic writes. Wraparound 563 * happened. 564 */ 565 #ifdef DIAGNOSTIC 566 if (wpipe->pipe_buffer.in + segsize != 567 wpipe->pipe_buffer.size) 568 panic("Expected pipe buffer wraparound disappeared"); 569 #endif 570 571 rw_exit_write(lock); 572 error = uiomove(&wpipe->pipe_buffer.buffer[0], 573 size - segsize, uio); 574 rw_enter_write(lock); 575 } 576 if (error == 0) { 577 wpipe->pipe_buffer.in += size; 578 if (wpipe->pipe_buffer.in >= 579 wpipe->pipe_buffer.size) { 580 #ifdef DIAGNOSTIC 581 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 582 panic("Expected wraparound bad"); 583 #endif 584 wpipe->pipe_buffer.in = size - segsize; 585 } 586 587 wpipe->pipe_buffer.cnt += size; 588 #ifdef DIAGNOSTIC 589 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 590 panic("Pipe buffer overflow"); 591 #endif 592 } 593 if (error) 594 break; 595 } else { 596 /* If the "read-side" has been blocked, wake it up. */ 597 if (wpipe->pipe_state & PIPE_WANTR) { 598 wpipe->pipe_state &= ~PIPE_WANTR; 599 wakeup(wpipe); 600 } 601 602 /* Don't block on non-blocking I/O. */ 603 if (fp->f_flag & FNONBLOCK) { 604 error = EAGAIN; 605 break; 606 } 607 608 /* 609 * We have no more space and have something to offer, 610 * wake up select/poll. 611 */ 612 pipeselwakeup(wpipe); 613 614 wpipe->pipe_state |= PIPE_WANTW; 615 error = pipe_iosleep(wpipe, "pipewr"); 616 if (error) 617 goto unlocked_error; 618 619 /* 620 * If read side wants to go away, we just issue a 621 * signal to ourselves. 622 */ 623 if (wpipe->pipe_state & PIPE_EOF) { 624 error = EPIPE; 625 break; 626 } 627 } 628 } 629 pipe_iounlock(wpipe); 630 631 unlocked_error: 632 --wpipe->pipe_busy; 633 634 if (pipe_rundown(wpipe) == 0 && wpipe->pipe_buffer.cnt > 0) { 635 /* 636 * If we have put any characters in the buffer, we wake up 637 * the reader. 638 */ 639 if (wpipe->pipe_state & PIPE_WANTR) { 640 wpipe->pipe_state &= ~PIPE_WANTR; 641 wakeup(wpipe); 642 } 643 } 644 645 /* Don't return EPIPE if I/O was successful. */ 646 if (wpipe->pipe_buffer.cnt == 0 && 647 uio->uio_resid == 0 && 648 error == EPIPE) { 649 error = 0; 650 } 651 652 if (error == 0) 653 getnanotime(&wpipe->pipe_mtime); 654 /* We have something to offer, wake up select/poll. */ 655 if (wpipe->pipe_buffer.cnt) 656 pipeselwakeup(wpipe); 657 658 rw_exit_write(lock); 659 return (error); 660 } 661 662 /* 663 * we implement a very minimal set of ioctls for compatibility with sockets. 664 */ 665 int 666 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p) 667 { 668 struct pipe *mpipe = fp->f_data; 669 int error = 0; 670 671 switch (cmd) { 672 673 case FIONBIO: 674 break; 675 676 case FIOASYNC: 677 rw_enter_write(mpipe->pipe_lock); 678 if (*(int *)data) { 679 mpipe->pipe_state |= PIPE_ASYNC; 680 } else { 681 mpipe->pipe_state &= ~PIPE_ASYNC; 682 } 683 rw_exit_write(mpipe->pipe_lock); 684 break; 685 686 case FIONREAD: 687 rw_enter_read(mpipe->pipe_lock); 688 *(int *)data = mpipe->pipe_buffer.cnt; 689 rw_exit_read(mpipe->pipe_lock); 690 break; 691 692 case FIOSETOWN: 693 case SIOCSPGRP: 694 case TIOCSPGRP: 695 error = sigio_setown(&mpipe->pipe_sigio, cmd, data); 696 break; 697 698 case FIOGETOWN: 699 case SIOCGPGRP: 700 case TIOCGPGRP: 701 sigio_getown(&mpipe->pipe_sigio, cmd, data); 702 break; 703 704 default: 705 error = ENOTTY; 706 } 707 708 return (error); 709 } 710 711 int 712 pipe_poll(struct file *fp, int events, struct proc *p) 713 { 714 struct pipe *rpipe = fp->f_data, *wpipe; 715 struct rwlock *lock = rpipe->pipe_lock; 716 int revents = 0; 717 718 rw_enter_write(lock); 719 wpipe = pipe_peer(rpipe); 720 721 if (events & (POLLIN | POLLRDNORM)) { 722 if (rpipe->pipe_buffer.cnt > 0 || 723 (rpipe->pipe_state & PIPE_EOF)) 724 revents |= events & (POLLIN | POLLRDNORM); 725 } 726 727 /* NOTE: POLLHUP and POLLOUT/POLLWRNORM are mutually exclusive */ 728 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) 729 revents |= POLLHUP; 730 else if (events & (POLLOUT | POLLWRNORM)) { 731 if (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt >= PIPE_BUF) 732 revents |= events & (POLLOUT | POLLWRNORM); 733 } 734 735 if (revents == 0) { 736 if (events & (POLLIN | POLLRDNORM)) { 737 selrecord(p, &rpipe->pipe_sel); 738 rpipe->pipe_state |= PIPE_SEL; 739 } 740 if (events & (POLLOUT | POLLWRNORM)) { 741 selrecord(p, &wpipe->pipe_sel); 742 wpipe->pipe_state |= PIPE_SEL; 743 } 744 } 745 746 rw_exit_write(lock); 747 748 return (revents); 749 } 750 751 int 752 pipe_stat(struct file *fp, struct stat *ub, struct proc *p) 753 { 754 struct pipe *pipe = fp->f_data; 755 756 memset(ub, 0, sizeof(*ub)); 757 758 rw_enter_read(pipe->pipe_lock); 759 ub->st_mode = S_IFIFO; 760 ub->st_blksize = pipe->pipe_buffer.size; 761 ub->st_size = pipe->pipe_buffer.cnt; 762 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 763 ub->st_atim.tv_sec = pipe->pipe_atime.tv_sec; 764 ub->st_atim.tv_nsec = pipe->pipe_atime.tv_nsec; 765 ub->st_mtim.tv_sec = pipe->pipe_mtime.tv_sec; 766 ub->st_mtim.tv_nsec = pipe->pipe_mtime.tv_nsec; 767 ub->st_ctim.tv_sec = pipe->pipe_ctime.tv_sec; 768 ub->st_ctim.tv_nsec = pipe->pipe_ctime.tv_nsec; 769 ub->st_uid = fp->f_cred->cr_uid; 770 ub->st_gid = fp->f_cred->cr_gid; 771 rw_exit_read(pipe->pipe_lock); 772 /* 773 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 774 * XXX (st_dev, st_ino) should be unique. 775 */ 776 return (0); 777 } 778 779 int 780 pipe_close(struct file *fp, struct proc *p) 781 { 782 struct pipe *cpipe = fp->f_data; 783 784 fp->f_ops = NULL; 785 fp->f_data = NULL; 786 pipe_destroy(cpipe); 787 return (0); 788 } 789 790 /* 791 * Free kva for pipe circular buffer. 792 * No pipe lock check as only called from pipe_buffer_realloc() and pipeclose() 793 */ 794 void 795 pipe_buffer_free(struct pipe *cpipe) 796 { 797 u_int size; 798 799 if (cpipe->pipe_buffer.buffer == NULL) 800 return; 801 802 size = cpipe->pipe_buffer.size; 803 804 KERNEL_LOCK(); 805 km_free(cpipe->pipe_buffer.buffer, size, &kv_any, &kp_pageable); 806 KERNEL_UNLOCK(); 807 808 cpipe->pipe_buffer.buffer = NULL; 809 810 atomic_sub_int(&amountpipekva, size); 811 if (size > PIPE_SIZE) 812 atomic_dec_int(&nbigpipe); 813 } 814 815 /* 816 * shutdown the pipe, and free resources. 817 */ 818 void 819 pipe_destroy(struct pipe *cpipe) 820 { 821 struct pipe *ppipe; 822 823 if (cpipe == NULL) 824 return; 825 826 rw_enter_write(cpipe->pipe_lock); 827 828 pipeselwakeup(cpipe); 829 sigio_free(&cpipe->pipe_sigio); 830 831 /* 832 * If the other side is blocked, wake it up saying that 833 * we want to close it down. 834 */ 835 cpipe->pipe_state |= PIPE_EOF; 836 while (cpipe->pipe_busy) { 837 wakeup(cpipe); 838 cpipe->pipe_state |= PIPE_WANTD; 839 rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO, "pipecl", INFSLP); 840 } 841 842 /* Disconnect from peer. */ 843 if ((ppipe = cpipe->pipe_peer) != NULL) { 844 pipeselwakeup(ppipe); 845 846 ppipe->pipe_state |= PIPE_EOF; 847 wakeup(ppipe); 848 ppipe->pipe_peer = NULL; 849 } 850 851 pipe_buffer_free(cpipe); 852 853 rw_exit_write(cpipe->pipe_lock); 854 855 if (ppipe == NULL) 856 pool_put(&pipe_pair_pool, cpipe->pipe_pair); 857 } 858 859 /* 860 * Returns non-zero if a rundown is currently ongoing. 861 */ 862 int 863 pipe_rundown(struct pipe *cpipe) 864 { 865 rw_assert_wrlock(cpipe->pipe_lock); 866 867 if (cpipe->pipe_busy > 0 || (cpipe->pipe_state & PIPE_WANTD) == 0) 868 return (0); 869 870 /* Only wakeup pipe_destroy() once the pipe is no longer busy. */ 871 cpipe->pipe_state &= ~(PIPE_WANTD | PIPE_WANTR | PIPE_WANTW); 872 wakeup(cpipe); 873 return (1); 874 } 875 876 int 877 pipe_kqfilter(struct file *fp, struct knote *kn) 878 { 879 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 880 struct rwlock *lock = rpipe->pipe_lock; 881 int error = 0; 882 883 rw_enter_write(lock); 884 wpipe = pipe_peer(rpipe); 885 886 switch (kn->kn_filter) { 887 case EVFILT_READ: 888 kn->kn_fop = &pipe_rfiltops; 889 klist_insert(&rpipe->pipe_sel.si_note, kn); 890 break; 891 case EVFILT_WRITE: 892 if (wpipe == NULL) { 893 /* other end of pipe has been closed */ 894 error = EPIPE; 895 break; 896 } 897 kn->kn_fop = &pipe_wfiltops; 898 klist_insert(&wpipe->pipe_sel.si_note, kn); 899 break; 900 default: 901 error = EINVAL; 902 } 903 904 rw_exit_write(lock); 905 906 return (error); 907 } 908 909 void 910 filt_pipedetach(struct knote *kn) 911 { 912 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 913 struct rwlock *lock = rpipe->pipe_lock; 914 915 rw_enter_write(lock); 916 wpipe = pipe_peer(rpipe); 917 918 switch (kn->kn_filter) { 919 case EVFILT_READ: 920 klist_remove(&rpipe->pipe_sel.si_note, kn); 921 break; 922 case EVFILT_WRITE: 923 if (wpipe == NULL) 924 break; 925 klist_remove(&wpipe->pipe_sel.si_note, kn); 926 break; 927 } 928 929 rw_exit_write(lock); 930 } 931 932 int 933 filt_piperead(struct knote *kn, long hint) 934 { 935 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 936 struct rwlock *lock = rpipe->pipe_lock; 937 938 if ((hint & NOTE_SUBMIT) == 0) 939 rw_enter_read(lock); 940 wpipe = pipe_peer(rpipe); 941 942 kn->kn_data = rpipe->pipe_buffer.cnt; 943 944 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) { 945 if ((hint & NOTE_SUBMIT) == 0) 946 rw_exit_read(lock); 947 kn->kn_flags |= EV_EOF; 948 if (kn->kn_flags & __EV_POLL) 949 kn->kn_flags |= __EV_HUP; 950 return (1); 951 } 952 953 if ((hint & NOTE_SUBMIT) == 0) 954 rw_exit_read(lock); 955 956 return (kn->kn_data > 0); 957 } 958 959 int 960 filt_pipewrite(struct knote *kn, long hint) 961 { 962 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 963 struct rwlock *lock = rpipe->pipe_lock; 964 965 if ((hint & NOTE_SUBMIT) == 0) 966 rw_enter_read(lock); 967 wpipe = pipe_peer(rpipe); 968 969 if (wpipe == NULL) { 970 if ((hint & NOTE_SUBMIT) == 0) 971 rw_exit_read(lock); 972 kn->kn_data = 0; 973 kn->kn_flags |= EV_EOF; 974 if (kn->kn_flags & __EV_POLL) 975 kn->kn_flags |= __EV_HUP; 976 return (1); 977 } 978 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 979 980 if ((hint & NOTE_SUBMIT) == 0) 981 rw_exit_read(lock); 982 983 return (kn->kn_data >= PIPE_BUF); 984 } 985 986 void 987 pipe_init(void) 988 { 989 pool_init(&pipe_pair_pool, sizeof(struct pipe_pair), 0, IPL_MPFLOOR, 990 PR_WAITOK, "pipepl", NULL); 991 } 992 993 struct pipe_pair * 994 pipe_pair_create(void) 995 { 996 struct pipe_pair *pp; 997 998 pp = pool_get(&pipe_pair_pool, PR_WAITOK | PR_ZERO); 999 pp->pp_wpipe.pipe_pair = pp; 1000 pp->pp_rpipe.pipe_pair = pp; 1001 pp->pp_wpipe.pipe_peer = &pp->pp_rpipe; 1002 pp->pp_rpipe.pipe_peer = &pp->pp_wpipe; 1003 /* 1004 * One lock is used per pipe pair in order to obtain exclusive access to 1005 * the pipe pair. 1006 */ 1007 rw_init(&pp->pp_lock, "pipelk"); 1008 pp->pp_wpipe.pipe_lock = &pp->pp_lock; 1009 pp->pp_rpipe.pipe_lock = &pp->pp_lock; 1010 1011 if (pipe_create(&pp->pp_wpipe) || pipe_create(&pp->pp_rpipe)) 1012 goto err; 1013 return (pp); 1014 err: 1015 pipe_destroy(&pp->pp_wpipe); 1016 pipe_destroy(&pp->pp_rpipe); 1017 return (NULL); 1018 } 1019