1 /* $OpenBSD: sys_pipe.c,v 1.141 2022/07/09 12:48:21 visa Exp $ */ 2 3 /* 4 * Copyright (c) 1996 John S. Dyson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Absolutely no warranty of function or purpose is made by the author 17 * John S. Dyson. 18 * 4. Modifications may be freely made to this file if the above conditions 19 * are met. 20 */ 21 22 /* 23 * This file contains a high-performance replacement for the socket-based 24 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 25 * all features of sockets, but does do everything that pipes normally 26 * do. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/proc.h> 32 #include <sys/fcntl.h> 33 #include <sys/file.h> 34 #include <sys/filedesc.h> 35 #include <sys/pool.h> 36 #include <sys/ioctl.h> 37 #include <sys/stat.h> 38 #include <sys/signalvar.h> 39 #include <sys/mount.h> 40 #include <sys/syscallargs.h> 41 #include <sys/event.h> 42 #include <sys/lock.h> 43 #ifdef KTRACE 44 #include <sys/ktrace.h> 45 #endif 46 47 #include <uvm/uvm_extern.h> 48 49 #include <sys/pipe.h> 50 51 struct pipe_pair { 52 struct pipe pp_wpipe; 53 struct pipe pp_rpipe; 54 struct rwlock pp_lock; 55 }; 56 57 /* 58 * interfaces to the outside world 59 */ 60 int pipe_read(struct file *, struct uio *, int); 61 int pipe_write(struct file *, struct uio *, int); 62 int pipe_close(struct file *, struct proc *); 63 int pipe_kqfilter(struct file *fp, struct knote *kn); 64 int pipe_ioctl(struct file *, u_long, caddr_t, struct proc *); 65 int pipe_stat(struct file *fp, struct stat *ub, struct proc *p); 66 67 static const struct fileops pipeops = { 68 .fo_read = pipe_read, 69 .fo_write = pipe_write, 70 .fo_ioctl = pipe_ioctl, 71 .fo_kqfilter = pipe_kqfilter, 72 .fo_stat = pipe_stat, 73 .fo_close = pipe_close 74 }; 75 76 void filt_pipedetach(struct knote *kn); 77 int filt_piperead(struct knote *kn, long hint); 78 int filt_pipewrite(struct knote *kn, long hint); 79 int filt_pipeexcept(struct knote *kn, long hint); 80 int filt_pipemodify(struct kevent *kev, struct knote *kn); 81 int filt_pipeprocess(struct knote *kn, struct kevent *kev); 82 83 const struct filterops pipe_rfiltops = { 84 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 85 .f_attach = NULL, 86 .f_detach = filt_pipedetach, 87 .f_event = filt_piperead, 88 .f_modify = filt_pipemodify, 89 .f_process = filt_pipeprocess, 90 }; 91 92 const struct filterops pipe_wfiltops = { 93 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 94 .f_attach = NULL, 95 .f_detach = filt_pipedetach, 96 .f_event = filt_pipewrite, 97 .f_modify = filt_pipemodify, 98 .f_process = filt_pipeprocess, 99 }; 100 101 const struct filterops pipe_efiltops = { 102 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 103 .f_attach = NULL, 104 .f_detach = filt_pipedetach, 105 .f_event = filt_pipeexcept, 106 .f_modify = filt_pipemodify, 107 .f_process = filt_pipeprocess, 108 }; 109 110 /* 111 * Default pipe buffer size(s), this can be kind-of large now because pipe 112 * space is pageable. The pipe code will try to maintain locality of 113 * reference for performance reasons, so small amounts of outstanding I/O 114 * will not wipe the cache. 115 */ 116 #define MINPIPESIZE (PIPE_SIZE/3) 117 118 /* 119 * Limit the number of "big" pipes 120 */ 121 #define LIMITBIGPIPES 32 122 unsigned int nbigpipe; 123 static unsigned int amountpipekva; 124 125 struct pool pipe_pair_pool; 126 127 int dopipe(struct proc *, int *, int); 128 void pipeselwakeup(struct pipe *); 129 130 int pipe_create(struct pipe *); 131 void pipe_destroy(struct pipe *); 132 int pipe_rundown(struct pipe *); 133 struct pipe *pipe_peer(struct pipe *); 134 int pipe_buffer_realloc(struct pipe *, u_int); 135 void pipe_buffer_free(struct pipe *); 136 137 int pipe_iolock(struct pipe *); 138 void pipe_iounlock(struct pipe *); 139 int pipe_iosleep(struct pipe *, const char *); 140 141 struct pipe_pair *pipe_pair_create(void); 142 void pipe_pair_destroy(struct pipe_pair *); 143 144 /* 145 * The pipe system call for the DTYPE_PIPE type of pipes 146 */ 147 148 int 149 sys_pipe(struct proc *p, void *v, register_t *retval) 150 { 151 struct sys_pipe_args /* { 152 syscallarg(int *) fdp; 153 } */ *uap = v; 154 155 return (dopipe(p, SCARG(uap, fdp), 0)); 156 } 157 158 int 159 sys_pipe2(struct proc *p, void *v, register_t *retval) 160 { 161 struct sys_pipe2_args /* { 162 syscallarg(int *) fdp; 163 syscallarg(int) flags; 164 } */ *uap = v; 165 166 if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK)) 167 return (EINVAL); 168 169 return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags))); 170 } 171 172 int 173 dopipe(struct proc *p, int *ufds, int flags) 174 { 175 struct filedesc *fdp = p->p_fd; 176 struct file *rf, *wf; 177 struct pipe_pair *pp; 178 struct pipe *rpipe, *wpipe = NULL; 179 int fds[2], cloexec, error; 180 181 cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; 182 183 pp = pipe_pair_create(); 184 if (pp == NULL) 185 return (ENOMEM); 186 wpipe = &pp->pp_wpipe; 187 rpipe = &pp->pp_rpipe; 188 189 fdplock(fdp); 190 191 error = falloc(p, &rf, &fds[0]); 192 if (error != 0) 193 goto free2; 194 rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 195 rf->f_type = DTYPE_PIPE; 196 rf->f_data = rpipe; 197 rf->f_ops = &pipeops; 198 199 error = falloc(p, &wf, &fds[1]); 200 if (error != 0) 201 goto free3; 202 wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 203 wf->f_type = DTYPE_PIPE; 204 wf->f_data = wpipe; 205 wf->f_ops = &pipeops; 206 207 fdinsert(fdp, fds[0], cloexec, rf); 208 fdinsert(fdp, fds[1], cloexec, wf); 209 210 error = copyout(fds, ufds, sizeof(fds)); 211 if (error == 0) { 212 fdpunlock(fdp); 213 #ifdef KTRACE 214 if (KTRPOINT(p, KTR_STRUCT)) 215 ktrfds(p, fds, 2); 216 #endif 217 } else { 218 /* fdrelease() unlocks fdp. */ 219 fdrelease(p, fds[0]); 220 fdplock(fdp); 221 fdrelease(p, fds[1]); 222 } 223 224 FRELE(rf, p); 225 FRELE(wf, p); 226 return (error); 227 228 free3: 229 fdremove(fdp, fds[0]); 230 closef(rf, p); 231 rpipe = NULL; 232 free2: 233 fdpunlock(fdp); 234 pipe_destroy(wpipe); 235 pipe_destroy(rpipe); 236 return (error); 237 } 238 239 /* 240 * Allocate kva for pipe circular buffer, the space is pageable. 241 * This routine will 'realloc' the size of a pipe safely, if it fails 242 * it will retain the old buffer. 243 * If it fails it will return ENOMEM. 244 */ 245 int 246 pipe_buffer_realloc(struct pipe *cpipe, u_int size) 247 { 248 caddr_t buffer; 249 250 /* buffer uninitialized or pipe locked */ 251 KASSERT((cpipe->pipe_buffer.buffer == NULL) || 252 (cpipe->pipe_state & PIPE_LOCK)); 253 254 /* buffer should be empty */ 255 KASSERT(cpipe->pipe_buffer.cnt == 0); 256 257 KERNEL_LOCK(); 258 buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok); 259 KERNEL_UNLOCK(); 260 if (buffer == NULL) 261 return (ENOMEM); 262 263 /* free old resources if we are resizing */ 264 pipe_buffer_free(cpipe); 265 266 cpipe->pipe_buffer.buffer = buffer; 267 cpipe->pipe_buffer.size = size; 268 cpipe->pipe_buffer.in = 0; 269 cpipe->pipe_buffer.out = 0; 270 271 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 272 273 return (0); 274 } 275 276 /* 277 * initialize and allocate VM and memory for pipe 278 */ 279 int 280 pipe_create(struct pipe *cpipe) 281 { 282 int error; 283 284 error = pipe_buffer_realloc(cpipe, PIPE_SIZE); 285 if (error != 0) 286 return (error); 287 288 sigio_init(&cpipe->pipe_sigio); 289 290 getnanotime(&cpipe->pipe_ctime); 291 cpipe->pipe_atime = cpipe->pipe_ctime; 292 cpipe->pipe_mtime = cpipe->pipe_ctime; 293 294 return (0); 295 } 296 297 struct pipe * 298 pipe_peer(struct pipe *cpipe) 299 { 300 struct pipe *peer; 301 302 rw_assert_anylock(cpipe->pipe_lock); 303 304 peer = cpipe->pipe_peer; 305 if (peer == NULL || (peer->pipe_state & PIPE_EOF)) 306 return (NULL); 307 return (peer); 308 } 309 310 /* 311 * Lock a pipe for exclusive I/O access. 312 */ 313 int 314 pipe_iolock(struct pipe *cpipe) 315 { 316 int error; 317 318 rw_assert_wrlock(cpipe->pipe_lock); 319 320 while (cpipe->pipe_state & PIPE_LOCK) { 321 cpipe->pipe_state |= PIPE_LWANT; 322 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, 323 "pipeiolk", INFSLP); 324 if (error) 325 return (error); 326 } 327 cpipe->pipe_state |= PIPE_LOCK; 328 return (0); 329 } 330 331 /* 332 * Unlock a pipe I/O lock. 333 */ 334 void 335 pipe_iounlock(struct pipe *cpipe) 336 { 337 rw_assert_wrlock(cpipe->pipe_lock); 338 KASSERT(cpipe->pipe_state & PIPE_LOCK); 339 340 cpipe->pipe_state &= ~PIPE_LOCK; 341 if (cpipe->pipe_state & PIPE_LWANT) { 342 cpipe->pipe_state &= ~PIPE_LWANT; 343 wakeup(cpipe); 344 } 345 } 346 347 /* 348 * Unlock the pipe I/O lock and go to sleep. Returns 0 on success and the I/O 349 * lock is relocked. Otherwise if a signal was caught, non-zero is returned and 350 * the I/O lock is not locked. 351 * 352 * Any caller must obtain a reference to the pipe by incrementing `pipe_busy' 353 * before calling this function in order ensure that the same pipe is not 354 * destroyed while sleeping. 355 */ 356 int 357 pipe_iosleep(struct pipe *cpipe, const char *wmesg) 358 { 359 int error; 360 361 pipe_iounlock(cpipe); 362 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, wmesg, 363 INFSLP); 364 if (error) 365 return (error); 366 return (pipe_iolock(cpipe)); 367 } 368 369 void 370 pipeselwakeup(struct pipe *cpipe) 371 { 372 rw_assert_wrlock(cpipe->pipe_lock); 373 374 KNOTE(&cpipe->pipe_klist, 0); 375 376 if (cpipe->pipe_state & PIPE_ASYNC) 377 pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 378 } 379 380 int 381 pipe_read(struct file *fp, struct uio *uio, int fflags) 382 { 383 struct pipe *rpipe = fp->f_data; 384 size_t nread = 0, size; 385 int error; 386 387 rw_enter_write(rpipe->pipe_lock); 388 ++rpipe->pipe_busy; 389 error = pipe_iolock(rpipe); 390 if (error) { 391 --rpipe->pipe_busy; 392 pipe_rundown(rpipe); 393 rw_exit_write(rpipe->pipe_lock); 394 return (error); 395 } 396 397 while (uio->uio_resid) { 398 /* Normal pipe buffer receive. */ 399 if (rpipe->pipe_buffer.cnt > 0) { 400 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 401 if (size > rpipe->pipe_buffer.cnt) 402 size = rpipe->pipe_buffer.cnt; 403 if (size > uio->uio_resid) 404 size = uio->uio_resid; 405 rw_exit_write(rpipe->pipe_lock); 406 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 407 size, uio); 408 rw_enter_write(rpipe->pipe_lock); 409 if (error) { 410 break; 411 } 412 rpipe->pipe_buffer.out += size; 413 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 414 rpipe->pipe_buffer.out = 0; 415 416 rpipe->pipe_buffer.cnt -= size; 417 /* 418 * If there is no more to read in the pipe, reset 419 * its pointers to the beginning. This improves 420 * cache hit stats. 421 */ 422 if (rpipe->pipe_buffer.cnt == 0) { 423 rpipe->pipe_buffer.in = 0; 424 rpipe->pipe_buffer.out = 0; 425 } 426 nread += size; 427 } else { 428 /* 429 * detect EOF condition 430 * read returns 0 on EOF, no need to set error 431 */ 432 if (rpipe->pipe_state & PIPE_EOF) 433 break; 434 435 /* If the "write-side" has been blocked, wake it up. */ 436 if (rpipe->pipe_state & PIPE_WANTW) { 437 rpipe->pipe_state &= ~PIPE_WANTW; 438 wakeup(rpipe); 439 } 440 441 /* Break if some data was read. */ 442 if (nread > 0) 443 break; 444 445 /* Handle non-blocking mode operation. */ 446 if (fp->f_flag & FNONBLOCK) { 447 error = EAGAIN; 448 break; 449 } 450 451 /* Wait for more data. */ 452 rpipe->pipe_state |= PIPE_WANTR; 453 error = pipe_iosleep(rpipe, "piperd"); 454 if (error) 455 goto unlocked_error; 456 } 457 } 458 pipe_iounlock(rpipe); 459 460 if (error == 0) 461 getnanotime(&rpipe->pipe_atime); 462 unlocked_error: 463 --rpipe->pipe_busy; 464 465 if (pipe_rundown(rpipe) == 0 && rpipe->pipe_buffer.cnt < MINPIPESIZE) { 466 /* Handle write blocking hysteresis. */ 467 if (rpipe->pipe_state & PIPE_WANTW) { 468 rpipe->pipe_state &= ~PIPE_WANTW; 469 wakeup(rpipe); 470 } 471 } 472 473 if (rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt >= PIPE_BUF) 474 pipeselwakeup(rpipe); 475 476 rw_exit_write(rpipe->pipe_lock); 477 return (error); 478 } 479 480 int 481 pipe_write(struct file *fp, struct uio *uio, int fflags) 482 { 483 struct pipe *rpipe = fp->f_data, *wpipe; 484 struct rwlock *lock = rpipe->pipe_lock; 485 size_t orig_resid; 486 int error; 487 488 rw_enter_write(lock); 489 wpipe = pipe_peer(rpipe); 490 491 /* Detect loss of pipe read side, issue SIGPIPE if lost. */ 492 if (wpipe == NULL) { 493 rw_exit_write(lock); 494 return (EPIPE); 495 } 496 497 ++wpipe->pipe_busy; 498 error = pipe_iolock(wpipe); 499 if (error) { 500 --wpipe->pipe_busy; 501 pipe_rundown(wpipe); 502 rw_exit_write(lock); 503 return (error); 504 } 505 506 507 /* If it is advantageous to resize the pipe buffer, do so. */ 508 if (uio->uio_resid > PIPE_SIZE && 509 wpipe->pipe_buffer.size <= PIPE_SIZE && 510 wpipe->pipe_buffer.cnt == 0) { 511 unsigned int npipe; 512 513 npipe = atomic_inc_int_nv(&nbigpipe); 514 if (npipe > LIMITBIGPIPES || 515 pipe_buffer_realloc(wpipe, BIG_PIPE_SIZE) != 0) 516 atomic_dec_int(&nbigpipe); 517 } 518 519 orig_resid = uio->uio_resid; 520 521 while (uio->uio_resid) { 522 size_t space; 523 524 if (wpipe->pipe_state & PIPE_EOF) { 525 error = EPIPE; 526 break; 527 } 528 529 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 530 531 /* Writes of size <= PIPE_BUF must be atomic. */ 532 if (space < uio->uio_resid && orig_resid <= PIPE_BUF) 533 space = 0; 534 535 if (space > 0) { 536 size_t size; /* Transfer size */ 537 size_t segsize; /* first segment to transfer */ 538 539 /* 540 * Transfer size is minimum of uio transfer 541 * and free space in pipe buffer. 542 */ 543 if (space > uio->uio_resid) 544 size = uio->uio_resid; 545 else 546 size = space; 547 /* 548 * First segment to transfer is minimum of 549 * transfer size and contiguous space in 550 * pipe buffer. If first segment to transfer 551 * is less than the transfer size, we've got 552 * a wraparound in the buffer. 553 */ 554 segsize = wpipe->pipe_buffer.size - 555 wpipe->pipe_buffer.in; 556 if (segsize > size) 557 segsize = size; 558 559 /* Transfer first segment */ 560 561 rw_exit_write(lock); 562 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 563 segsize, uio); 564 rw_enter_write(lock); 565 566 if (error == 0 && segsize < size) { 567 /* 568 * Transfer remaining part now, to 569 * support atomic writes. Wraparound 570 * happened. 571 */ 572 #ifdef DIAGNOSTIC 573 if (wpipe->pipe_buffer.in + segsize != 574 wpipe->pipe_buffer.size) 575 panic("Expected pipe buffer wraparound disappeared"); 576 #endif 577 578 rw_exit_write(lock); 579 error = uiomove(&wpipe->pipe_buffer.buffer[0], 580 size - segsize, uio); 581 rw_enter_write(lock); 582 } 583 if (error == 0) { 584 wpipe->pipe_buffer.in += size; 585 if (wpipe->pipe_buffer.in >= 586 wpipe->pipe_buffer.size) { 587 #ifdef DIAGNOSTIC 588 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 589 panic("Expected wraparound bad"); 590 #endif 591 wpipe->pipe_buffer.in = size - segsize; 592 } 593 594 wpipe->pipe_buffer.cnt += size; 595 #ifdef DIAGNOSTIC 596 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 597 panic("Pipe buffer overflow"); 598 #endif 599 } 600 if (error) 601 break; 602 } else { 603 /* If the "read-side" has been blocked, wake it up. */ 604 if (wpipe->pipe_state & PIPE_WANTR) { 605 wpipe->pipe_state &= ~PIPE_WANTR; 606 wakeup(wpipe); 607 } 608 609 /* Don't block on non-blocking I/O. */ 610 if (fp->f_flag & FNONBLOCK) { 611 error = EAGAIN; 612 break; 613 } 614 615 /* 616 * We have no more space and have something to offer, 617 * wake up select/poll. 618 */ 619 pipeselwakeup(wpipe); 620 621 wpipe->pipe_state |= PIPE_WANTW; 622 error = pipe_iosleep(wpipe, "pipewr"); 623 if (error) 624 goto unlocked_error; 625 626 /* 627 * If read side wants to go away, we just issue a 628 * signal to ourselves. 629 */ 630 if (wpipe->pipe_state & PIPE_EOF) { 631 error = EPIPE; 632 break; 633 } 634 } 635 } 636 pipe_iounlock(wpipe); 637 638 unlocked_error: 639 --wpipe->pipe_busy; 640 641 if (pipe_rundown(wpipe) == 0 && wpipe->pipe_buffer.cnt > 0) { 642 /* 643 * If we have put any characters in the buffer, we wake up 644 * the reader. 645 */ 646 if (wpipe->pipe_state & PIPE_WANTR) { 647 wpipe->pipe_state &= ~PIPE_WANTR; 648 wakeup(wpipe); 649 } 650 } 651 652 /* Don't return EPIPE if I/O was successful. */ 653 if (wpipe->pipe_buffer.cnt == 0 && 654 uio->uio_resid == 0 && 655 error == EPIPE) { 656 error = 0; 657 } 658 659 if (error == 0) 660 getnanotime(&wpipe->pipe_mtime); 661 /* We have something to offer, wake up select/poll. */ 662 if (wpipe->pipe_buffer.cnt) 663 pipeselwakeup(wpipe); 664 665 rw_exit_write(lock); 666 return (error); 667 } 668 669 /* 670 * we implement a very minimal set of ioctls for compatibility with sockets. 671 */ 672 int 673 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p) 674 { 675 struct pipe *mpipe = fp->f_data; 676 int error = 0; 677 678 switch (cmd) { 679 680 case FIONBIO: 681 break; 682 683 case FIOASYNC: 684 rw_enter_write(mpipe->pipe_lock); 685 if (*(int *)data) { 686 mpipe->pipe_state |= PIPE_ASYNC; 687 } else { 688 mpipe->pipe_state &= ~PIPE_ASYNC; 689 } 690 rw_exit_write(mpipe->pipe_lock); 691 break; 692 693 case FIONREAD: 694 rw_enter_read(mpipe->pipe_lock); 695 *(int *)data = mpipe->pipe_buffer.cnt; 696 rw_exit_read(mpipe->pipe_lock); 697 break; 698 699 case FIOSETOWN: 700 case SIOCSPGRP: 701 case TIOCSPGRP: 702 error = sigio_setown(&mpipe->pipe_sigio, cmd, data); 703 break; 704 705 case FIOGETOWN: 706 case SIOCGPGRP: 707 case TIOCGPGRP: 708 sigio_getown(&mpipe->pipe_sigio, cmd, data); 709 break; 710 711 default: 712 error = ENOTTY; 713 } 714 715 return (error); 716 } 717 718 int 719 pipe_stat(struct file *fp, struct stat *ub, struct proc *p) 720 { 721 struct pipe *pipe = fp->f_data; 722 723 memset(ub, 0, sizeof(*ub)); 724 725 rw_enter_read(pipe->pipe_lock); 726 ub->st_mode = S_IFIFO; 727 ub->st_blksize = pipe->pipe_buffer.size; 728 ub->st_size = pipe->pipe_buffer.cnt; 729 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 730 ub->st_atim.tv_sec = pipe->pipe_atime.tv_sec; 731 ub->st_atim.tv_nsec = pipe->pipe_atime.tv_nsec; 732 ub->st_mtim.tv_sec = pipe->pipe_mtime.tv_sec; 733 ub->st_mtim.tv_nsec = pipe->pipe_mtime.tv_nsec; 734 ub->st_ctim.tv_sec = pipe->pipe_ctime.tv_sec; 735 ub->st_ctim.tv_nsec = pipe->pipe_ctime.tv_nsec; 736 ub->st_uid = fp->f_cred->cr_uid; 737 ub->st_gid = fp->f_cred->cr_gid; 738 rw_exit_read(pipe->pipe_lock); 739 /* 740 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 741 * XXX (st_dev, st_ino) should be unique. 742 */ 743 return (0); 744 } 745 746 int 747 pipe_close(struct file *fp, struct proc *p) 748 { 749 struct pipe *cpipe = fp->f_data; 750 751 fp->f_ops = NULL; 752 fp->f_data = NULL; 753 pipe_destroy(cpipe); 754 return (0); 755 } 756 757 /* 758 * Free kva for pipe circular buffer. 759 * No pipe lock check as only called from pipe_buffer_realloc() and pipeclose() 760 */ 761 void 762 pipe_buffer_free(struct pipe *cpipe) 763 { 764 u_int size; 765 766 if (cpipe->pipe_buffer.buffer == NULL) 767 return; 768 769 size = cpipe->pipe_buffer.size; 770 771 KERNEL_LOCK(); 772 km_free(cpipe->pipe_buffer.buffer, size, &kv_any, &kp_pageable); 773 KERNEL_UNLOCK(); 774 775 cpipe->pipe_buffer.buffer = NULL; 776 777 atomic_sub_int(&amountpipekva, size); 778 if (size > PIPE_SIZE) 779 atomic_dec_int(&nbigpipe); 780 } 781 782 /* 783 * shutdown the pipe, and free resources. 784 */ 785 void 786 pipe_destroy(struct pipe *cpipe) 787 { 788 struct pipe *ppipe; 789 790 if (cpipe == NULL) 791 return; 792 793 rw_enter_write(cpipe->pipe_lock); 794 795 pipeselwakeup(cpipe); 796 sigio_free(&cpipe->pipe_sigio); 797 798 /* 799 * If the other side is blocked, wake it up saying that 800 * we want to close it down. 801 */ 802 cpipe->pipe_state |= PIPE_EOF; 803 while (cpipe->pipe_busy) { 804 wakeup(cpipe); 805 cpipe->pipe_state |= PIPE_WANTD; 806 rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO, "pipecl", INFSLP); 807 } 808 809 /* Disconnect from peer. */ 810 if ((ppipe = cpipe->pipe_peer) != NULL) { 811 pipeselwakeup(ppipe); 812 813 ppipe->pipe_state |= PIPE_EOF; 814 wakeup(ppipe); 815 ppipe->pipe_peer = NULL; 816 } 817 818 pipe_buffer_free(cpipe); 819 820 rw_exit_write(cpipe->pipe_lock); 821 822 if (ppipe == NULL) 823 pipe_pair_destroy(cpipe->pipe_pair); 824 } 825 826 /* 827 * Returns non-zero if a rundown is currently ongoing. 828 */ 829 int 830 pipe_rundown(struct pipe *cpipe) 831 { 832 rw_assert_wrlock(cpipe->pipe_lock); 833 834 if (cpipe->pipe_busy > 0 || (cpipe->pipe_state & PIPE_WANTD) == 0) 835 return (0); 836 837 /* Only wakeup pipe_destroy() once the pipe is no longer busy. */ 838 cpipe->pipe_state &= ~(PIPE_WANTD | PIPE_WANTR | PIPE_WANTW); 839 wakeup(cpipe); 840 return (1); 841 } 842 843 int 844 pipe_kqfilter(struct file *fp, struct knote *kn) 845 { 846 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 847 struct rwlock *lock = rpipe->pipe_lock; 848 int error = 0; 849 850 rw_enter_write(lock); 851 wpipe = pipe_peer(rpipe); 852 853 switch (kn->kn_filter) { 854 case EVFILT_READ: 855 kn->kn_fop = &pipe_rfiltops; 856 kn->kn_hook = rpipe; 857 klist_insert_locked(&rpipe->pipe_klist, kn); 858 break; 859 case EVFILT_WRITE: 860 if (wpipe == NULL) { 861 /* other end of pipe has been closed */ 862 error = EPIPE; 863 break; 864 } 865 kn->kn_fop = &pipe_wfiltops; 866 kn->kn_hook = wpipe; 867 klist_insert_locked(&wpipe->pipe_klist, kn); 868 break; 869 case EVFILT_EXCEPT: 870 if (kn->kn_flags & __EV_SELECT) { 871 /* Prevent triggering exceptfds. */ 872 error = EPERM; 873 break; 874 } 875 if ((kn->kn_flags & __EV_POLL) == 0) { 876 /* Disallow usage through kevent(2). */ 877 error = EINVAL; 878 break; 879 } 880 kn->kn_fop = &pipe_efiltops; 881 kn->kn_hook = rpipe; 882 klist_insert_locked(&rpipe->pipe_klist, kn); 883 break; 884 default: 885 error = EINVAL; 886 } 887 888 rw_exit_write(lock); 889 890 return (error); 891 } 892 893 void 894 filt_pipedetach(struct knote *kn) 895 { 896 struct pipe *cpipe = kn->kn_hook; 897 898 klist_remove(&cpipe->pipe_klist, kn); 899 } 900 901 int 902 filt_piperead(struct knote *kn, long hint) 903 { 904 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 905 906 rw_assert_wrlock(rpipe->pipe_lock); 907 908 wpipe = pipe_peer(rpipe); 909 910 kn->kn_data = rpipe->pipe_buffer.cnt; 911 912 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) { 913 kn->kn_flags |= EV_EOF; 914 if (kn->kn_flags & __EV_POLL) 915 kn->kn_flags |= __EV_HUP; 916 return (1); 917 } 918 919 return (kn->kn_data > 0); 920 } 921 922 int 923 filt_pipewrite(struct knote *kn, long hint) 924 { 925 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 926 927 rw_assert_wrlock(rpipe->pipe_lock); 928 929 wpipe = pipe_peer(rpipe); 930 931 if (wpipe == NULL) { 932 kn->kn_data = 0; 933 kn->kn_flags |= EV_EOF; 934 if (kn->kn_flags & __EV_POLL) 935 kn->kn_flags |= __EV_HUP; 936 return (1); 937 } 938 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 939 940 return (kn->kn_data >= PIPE_BUF); 941 } 942 943 int 944 filt_pipeexcept(struct knote *kn, long hint) 945 { 946 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 947 int active = 0; 948 949 rw_assert_wrlock(rpipe->pipe_lock); 950 951 wpipe = pipe_peer(rpipe); 952 953 if (kn->kn_flags & __EV_POLL) { 954 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) { 955 kn->kn_flags |= __EV_HUP; 956 active = 1; 957 } 958 } 959 960 return (active); 961 } 962 963 int 964 filt_pipemodify(struct kevent *kev, struct knote *kn) 965 { 966 struct pipe *rpipe = kn->kn_fp->f_data; 967 int active; 968 969 rw_enter_write(rpipe->pipe_lock); 970 active = knote_modify(kev, kn); 971 rw_exit_write(rpipe->pipe_lock); 972 973 return (active); 974 } 975 976 int 977 filt_pipeprocess(struct knote *kn, struct kevent *kev) 978 { 979 struct pipe *rpipe = kn->kn_fp->f_data; 980 int active; 981 982 rw_enter_write(rpipe->pipe_lock); 983 active = knote_process(kn, kev); 984 rw_exit_write(rpipe->pipe_lock); 985 986 return (active); 987 } 988 989 void 990 pipe_init(void) 991 { 992 pool_init(&pipe_pair_pool, sizeof(struct pipe_pair), 0, IPL_MPFLOOR, 993 PR_WAITOK, "pipepl", NULL); 994 } 995 996 struct pipe_pair * 997 pipe_pair_create(void) 998 { 999 struct pipe_pair *pp; 1000 1001 pp = pool_get(&pipe_pair_pool, PR_WAITOK | PR_ZERO); 1002 pp->pp_wpipe.pipe_pair = pp; 1003 pp->pp_rpipe.pipe_pair = pp; 1004 pp->pp_wpipe.pipe_peer = &pp->pp_rpipe; 1005 pp->pp_rpipe.pipe_peer = &pp->pp_wpipe; 1006 /* 1007 * One lock is used per pipe pair in order to obtain exclusive access to 1008 * the pipe pair. 1009 */ 1010 rw_init(&pp->pp_lock, "pipelk"); 1011 pp->pp_wpipe.pipe_lock = &pp->pp_lock; 1012 pp->pp_rpipe.pipe_lock = &pp->pp_lock; 1013 1014 klist_init_rwlock(&pp->pp_wpipe.pipe_klist, &pp->pp_lock); 1015 klist_init_rwlock(&pp->pp_rpipe.pipe_klist, &pp->pp_lock); 1016 1017 if (pipe_create(&pp->pp_wpipe) || pipe_create(&pp->pp_rpipe)) 1018 goto err; 1019 return (pp); 1020 err: 1021 pipe_destroy(&pp->pp_wpipe); 1022 pipe_destroy(&pp->pp_rpipe); 1023 return (NULL); 1024 } 1025 1026 void 1027 pipe_pair_destroy(struct pipe_pair *pp) 1028 { 1029 klist_free(&pp->pp_wpipe.pipe_klist); 1030 klist_free(&pp->pp_rpipe.pipe_klist); 1031 pool_put(&pipe_pair_pool, pp); 1032 } 1033