1 /* $OpenBSD: sys_pipe.c,v 1.148 2024/12/30 02:46:00 guenther Exp $ */ 2 3 /* 4 * Copyright (c) 1996 John S. Dyson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Absolutely no warranty of function or purpose is made by the author 17 * John S. Dyson. 18 * 4. Modifications may be freely made to this file if the above conditions 19 * are met. 20 */ 21 22 /* 23 * This file contains a high-performance replacement for the socket-based 24 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 25 * all features of sockets, but does do everything that pipes normally 26 * do. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/proc.h> 32 #include <sys/fcntl.h> 33 #include <sys/file.h> 34 #include <sys/filedesc.h> 35 #include <sys/pool.h> 36 #include <sys/ioctl.h> 37 #include <sys/stat.h> 38 #include <sys/signalvar.h> 39 #include <sys/mount.h> 40 #include <sys/syscallargs.h> 41 #include <sys/event.h> 42 #ifdef KTRACE 43 #include <sys/ktrace.h> 44 #endif 45 46 #include <uvm/uvm_extern.h> 47 48 #include <sys/pipe.h> 49 50 struct pipe_pair { 51 struct pipe pp_wpipe; 52 struct pipe pp_rpipe; 53 struct rwlock pp_lock; 54 }; 55 56 /* 57 * interfaces to the outside world 58 */ 59 int pipe_read(struct file *, struct uio *, int); 60 int pipe_write(struct file *, struct uio *, int); 61 int pipe_close(struct file *, struct proc *); 62 int pipe_kqfilter(struct file *fp, struct knote *kn); 63 int pipe_ioctl(struct file *, u_long, caddr_t, struct proc *); 64 int pipe_stat(struct file *fp, struct stat *ub, struct proc *p); 65 66 static const struct fileops pipeops = { 67 .fo_read = pipe_read, 68 .fo_write = pipe_write, 69 .fo_ioctl = pipe_ioctl, 70 .fo_kqfilter = pipe_kqfilter, 71 .fo_stat = pipe_stat, 72 .fo_close = pipe_close 73 }; 74 75 void filt_pipedetach(struct knote *kn); 76 int filt_piperead(struct knote *kn, long hint); 77 int filt_pipewrite(struct knote *kn, long hint); 78 int filt_pipeexcept(struct knote *kn, long hint); 79 int filt_pipemodify(struct kevent *kev, struct knote *kn); 80 int filt_pipeprocess(struct knote *kn, struct kevent *kev); 81 82 const struct filterops pipe_rfiltops = { 83 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 84 .f_attach = NULL, 85 .f_detach = filt_pipedetach, 86 .f_event = filt_piperead, 87 .f_modify = filt_pipemodify, 88 .f_process = filt_pipeprocess, 89 }; 90 91 const struct filterops pipe_wfiltops = { 92 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 93 .f_attach = NULL, 94 .f_detach = filt_pipedetach, 95 .f_event = filt_pipewrite, 96 .f_modify = filt_pipemodify, 97 .f_process = filt_pipeprocess, 98 }; 99 100 const struct filterops pipe_efiltops = { 101 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 102 .f_attach = NULL, 103 .f_detach = filt_pipedetach, 104 .f_event = filt_pipeexcept, 105 .f_modify = filt_pipemodify, 106 .f_process = filt_pipeprocess, 107 }; 108 109 /* 110 * Default pipe buffer size(s), this can be kind-of large now because pipe 111 * space is pageable. The pipe code will try to maintain locality of 112 * reference for performance reasons, so small amounts of outstanding I/O 113 * will not wipe the cache. 114 */ 115 #define MINPIPESIZE (PIPE_SIZE/3) 116 117 /* 118 * Limit the number of "big" pipes 119 */ 120 #define LIMITBIGPIPES 32 121 unsigned int nbigpipe; 122 static unsigned int amountpipekva; 123 124 struct pool pipe_pair_pool; 125 126 int dopipe(struct proc *, int *, int); 127 void pipe_wakeup(struct pipe *); 128 129 int pipe_create(struct pipe *); 130 void pipe_destroy(struct pipe *); 131 int pipe_rundown(struct pipe *); 132 struct pipe *pipe_peer(struct pipe *); 133 int pipe_buffer_realloc(struct pipe *, u_int); 134 void pipe_buffer_free(struct pipe *); 135 136 int pipe_iolock(struct pipe *); 137 void pipe_iounlock(struct pipe *); 138 int pipe_iosleep(struct pipe *, const char *); 139 140 struct pipe_pair *pipe_pair_create(void); 141 void pipe_pair_destroy(struct pipe_pair *); 142 143 /* 144 * The pipe system call for the DTYPE_PIPE type of pipes 145 */ 146 147 int 148 sys_pipe(struct proc *p, void *v, register_t *retval) 149 { 150 struct sys_pipe_args /* { 151 syscallarg(int *) fdp; 152 } */ *uap = v; 153 154 return (dopipe(p, SCARG(uap, fdp), 0)); 155 } 156 157 int 158 sys_pipe2(struct proc *p, void *v, register_t *retval) 159 { 160 struct sys_pipe2_args /* { 161 syscallarg(int *) fdp; 162 syscallarg(int) flags; 163 } */ *uap = v; 164 165 if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK)) 166 return (EINVAL); 167 168 return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags))); 169 } 170 171 int 172 dopipe(struct proc *p, int *ufds, int flags) 173 { 174 struct filedesc *fdp = p->p_fd; 175 struct file *rf, *wf; 176 struct pipe_pair *pp; 177 struct pipe *rpipe, *wpipe = NULL; 178 int fds[2], cloexec, error; 179 180 cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; 181 182 pp = pipe_pair_create(); 183 if (pp == NULL) 184 return (ENOMEM); 185 wpipe = &pp->pp_wpipe; 186 rpipe = &pp->pp_rpipe; 187 188 fdplock(fdp); 189 190 error = falloc(p, &rf, &fds[0]); 191 if (error != 0) 192 goto free2; 193 rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 194 rf->f_type = DTYPE_PIPE; 195 rf->f_data = rpipe; 196 rf->f_ops = &pipeops; 197 198 error = falloc(p, &wf, &fds[1]); 199 if (error != 0) 200 goto free3; 201 wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 202 wf->f_type = DTYPE_PIPE; 203 wf->f_data = wpipe; 204 wf->f_ops = &pipeops; 205 206 fdinsert(fdp, fds[0], cloexec, rf); 207 fdinsert(fdp, fds[1], cloexec, wf); 208 209 error = copyout(fds, ufds, sizeof(fds)); 210 if (error == 0) { 211 fdpunlock(fdp); 212 #ifdef KTRACE 213 if (KTRPOINT(p, KTR_STRUCT)) 214 ktrfds(p, fds, 2); 215 #endif 216 } else { 217 /* fdrelease() unlocks fdp. */ 218 fdrelease(p, fds[0]); 219 fdplock(fdp); 220 fdrelease(p, fds[1]); 221 } 222 223 FRELE(rf, p); 224 FRELE(wf, p); 225 return (error); 226 227 free3: 228 fdremove(fdp, fds[0]); 229 closef(rf, p); 230 rpipe = NULL; 231 free2: 232 fdpunlock(fdp); 233 pipe_destroy(wpipe); 234 pipe_destroy(rpipe); 235 return (error); 236 } 237 238 /* 239 * Allocate kva for pipe circular buffer, the space is pageable. 240 * This routine will 'realloc' the size of a pipe safely, if it fails 241 * it will retain the old buffer. 242 * If it fails it will return ENOMEM. 243 */ 244 int 245 pipe_buffer_realloc(struct pipe *cpipe, u_int size) 246 { 247 caddr_t buffer; 248 249 /* buffer uninitialized or pipe locked */ 250 KASSERT((cpipe->pipe_buffer.buffer == NULL) || 251 (cpipe->pipe_state & PIPE_LOCK)); 252 253 /* buffer should be empty */ 254 KASSERT(cpipe->pipe_buffer.cnt == 0); 255 256 buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok); 257 if (buffer == NULL) 258 return (ENOMEM); 259 260 /* free old resources if we are resizing */ 261 pipe_buffer_free(cpipe); 262 263 cpipe->pipe_buffer.buffer = buffer; 264 cpipe->pipe_buffer.size = size; 265 cpipe->pipe_buffer.in = 0; 266 cpipe->pipe_buffer.out = 0; 267 268 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 269 270 return (0); 271 } 272 273 /* 274 * initialize and allocate VM and memory for pipe 275 */ 276 int 277 pipe_create(struct pipe *cpipe) 278 { 279 int error; 280 281 error = pipe_buffer_realloc(cpipe, PIPE_SIZE); 282 if (error != 0) 283 return (error); 284 285 sigio_init(&cpipe->pipe_sigio); 286 287 getnanotime(&cpipe->pipe_ctime); 288 cpipe->pipe_atime = cpipe->pipe_ctime; 289 cpipe->pipe_mtime = cpipe->pipe_ctime; 290 291 return (0); 292 } 293 294 struct pipe * 295 pipe_peer(struct pipe *cpipe) 296 { 297 struct pipe *peer; 298 299 rw_assert_anylock(cpipe->pipe_lock); 300 301 peer = cpipe->pipe_peer; 302 if (peer == NULL || (peer->pipe_state & PIPE_EOF)) 303 return (NULL); 304 return (peer); 305 } 306 307 /* 308 * Lock a pipe for exclusive I/O access. 309 */ 310 int 311 pipe_iolock(struct pipe *cpipe) 312 { 313 int error; 314 315 rw_assert_wrlock(cpipe->pipe_lock); 316 317 while (cpipe->pipe_state & PIPE_LOCK) { 318 cpipe->pipe_state |= PIPE_LWANT; 319 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, 320 "pipeiolk", INFSLP); 321 if (error) 322 return (error); 323 } 324 cpipe->pipe_state |= PIPE_LOCK; 325 return (0); 326 } 327 328 /* 329 * Unlock a pipe I/O lock. 330 */ 331 void 332 pipe_iounlock(struct pipe *cpipe) 333 { 334 rw_assert_wrlock(cpipe->pipe_lock); 335 KASSERT(cpipe->pipe_state & PIPE_LOCK); 336 337 cpipe->pipe_state &= ~PIPE_LOCK; 338 if (cpipe->pipe_state & PIPE_LWANT) { 339 cpipe->pipe_state &= ~PIPE_LWANT; 340 wakeup(cpipe); 341 } 342 } 343 344 /* 345 * Unlock the pipe I/O lock and go to sleep. Returns 0 on success and the I/O 346 * lock is relocked. Otherwise if a signal was caught, non-zero is returned and 347 * the I/O lock is not locked. 348 * 349 * Any caller must obtain a reference to the pipe by incrementing `pipe_busy' 350 * before calling this function in order ensure that the same pipe is not 351 * destroyed while sleeping. 352 */ 353 int 354 pipe_iosleep(struct pipe *cpipe, const char *wmesg) 355 { 356 int error; 357 358 pipe_iounlock(cpipe); 359 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, wmesg, 360 INFSLP); 361 if (error) 362 return (error); 363 return (pipe_iolock(cpipe)); 364 } 365 366 void 367 pipe_wakeup(struct pipe *cpipe) 368 { 369 rw_assert_wrlock(cpipe->pipe_lock); 370 371 knote_locked(&cpipe->pipe_klist, 0); 372 373 if (cpipe->pipe_state & PIPE_ASYNC) 374 pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 375 } 376 377 int 378 pipe_read(struct file *fp, struct uio *uio, int fflags) 379 { 380 struct pipe *rpipe = fp->f_data; 381 size_t nread = 0, size; 382 int error; 383 384 rw_enter_write(rpipe->pipe_lock); 385 ++rpipe->pipe_busy; 386 error = pipe_iolock(rpipe); 387 if (error) { 388 --rpipe->pipe_busy; 389 pipe_rundown(rpipe); 390 rw_exit_write(rpipe->pipe_lock); 391 return (error); 392 } 393 394 while (uio->uio_resid) { 395 /* Normal pipe buffer receive. */ 396 if (rpipe->pipe_buffer.cnt > 0) { 397 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 398 if (size > rpipe->pipe_buffer.cnt) 399 size = rpipe->pipe_buffer.cnt; 400 if (size > uio->uio_resid) 401 size = uio->uio_resid; 402 rw_exit_write(rpipe->pipe_lock); 403 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 404 size, uio); 405 rw_enter_write(rpipe->pipe_lock); 406 if (error) { 407 break; 408 } 409 rpipe->pipe_buffer.out += size; 410 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 411 rpipe->pipe_buffer.out = 0; 412 413 rpipe->pipe_buffer.cnt -= size; 414 /* 415 * If there is no more to read in the pipe, reset 416 * its pointers to the beginning. This improves 417 * cache hit stats. 418 */ 419 if (rpipe->pipe_buffer.cnt == 0) { 420 rpipe->pipe_buffer.in = 0; 421 rpipe->pipe_buffer.out = 0; 422 } 423 nread += size; 424 } else { 425 /* 426 * detect EOF condition 427 * read returns 0 on EOF, no need to set error 428 */ 429 if (rpipe->pipe_state & PIPE_EOF) 430 break; 431 432 /* If the "write-side" has been blocked, wake it up. */ 433 if (rpipe->pipe_state & PIPE_WANTW) { 434 rpipe->pipe_state &= ~PIPE_WANTW; 435 wakeup(rpipe); 436 } 437 438 /* Break if some data was read. */ 439 if (nread > 0) 440 break; 441 442 /* Handle non-blocking mode operation. */ 443 if (fp->f_flag & FNONBLOCK) { 444 error = EAGAIN; 445 break; 446 } 447 448 /* Wait for more data. */ 449 rpipe->pipe_state |= PIPE_WANTR; 450 error = pipe_iosleep(rpipe, "piperd"); 451 if (error) 452 goto unlocked_error; 453 } 454 } 455 pipe_iounlock(rpipe); 456 457 if (error == 0) 458 getnanotime(&rpipe->pipe_atime); 459 unlocked_error: 460 --rpipe->pipe_busy; 461 462 if (pipe_rundown(rpipe) == 0 && rpipe->pipe_buffer.cnt < MINPIPESIZE) { 463 /* Handle write blocking hysteresis. */ 464 if (rpipe->pipe_state & PIPE_WANTW) { 465 rpipe->pipe_state &= ~PIPE_WANTW; 466 wakeup(rpipe); 467 } 468 } 469 470 if (rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt >= PIPE_BUF) 471 pipe_wakeup(rpipe); 472 473 rw_exit_write(rpipe->pipe_lock); 474 return (error); 475 } 476 477 int 478 pipe_write(struct file *fp, struct uio *uio, int fflags) 479 { 480 struct pipe *rpipe = fp->f_data, *wpipe; 481 struct rwlock *lock = rpipe->pipe_lock; 482 size_t orig_resid; 483 int error; 484 485 rw_enter_write(lock); 486 wpipe = pipe_peer(rpipe); 487 488 /* Detect loss of pipe read side, issue SIGPIPE if lost. */ 489 if (wpipe == NULL) { 490 rw_exit_write(lock); 491 return (EPIPE); 492 } 493 494 ++wpipe->pipe_busy; 495 error = pipe_iolock(wpipe); 496 if (error) { 497 --wpipe->pipe_busy; 498 pipe_rundown(wpipe); 499 rw_exit_write(lock); 500 return (error); 501 } 502 503 504 /* If it is advantageous to resize the pipe buffer, do so. */ 505 if (uio->uio_resid > PIPE_SIZE && 506 wpipe->pipe_buffer.size <= PIPE_SIZE && 507 wpipe->pipe_buffer.cnt == 0) { 508 unsigned int npipe; 509 510 npipe = atomic_inc_int_nv(&nbigpipe); 511 if (npipe > LIMITBIGPIPES || 512 pipe_buffer_realloc(wpipe, BIG_PIPE_SIZE) != 0) 513 atomic_dec_int(&nbigpipe); 514 } 515 516 orig_resid = uio->uio_resid; 517 518 while (uio->uio_resid) { 519 size_t space; 520 521 if (wpipe->pipe_state & PIPE_EOF) { 522 error = EPIPE; 523 break; 524 } 525 526 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 527 528 /* Writes of size <= PIPE_BUF must be atomic. */ 529 if (space < uio->uio_resid && orig_resid <= PIPE_BUF) 530 space = 0; 531 532 if (space > 0) { 533 size_t size; /* Transfer size */ 534 size_t segsize; /* first segment to transfer */ 535 536 /* 537 * Transfer size is minimum of uio transfer 538 * and free space in pipe buffer. 539 */ 540 if (space > uio->uio_resid) 541 size = uio->uio_resid; 542 else 543 size = space; 544 /* 545 * First segment to transfer is minimum of 546 * transfer size and contiguous space in 547 * pipe buffer. If first segment to transfer 548 * is less than the transfer size, we've got 549 * a wraparound in the buffer. 550 */ 551 segsize = wpipe->pipe_buffer.size - 552 wpipe->pipe_buffer.in; 553 if (segsize > size) 554 segsize = size; 555 556 /* Transfer first segment */ 557 558 rw_exit_write(lock); 559 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 560 segsize, uio); 561 rw_enter_write(lock); 562 563 if (error == 0 && segsize < size) { 564 /* 565 * Transfer remaining part now, to 566 * support atomic writes. Wraparound 567 * happened. 568 */ 569 #ifdef DIAGNOSTIC 570 if (wpipe->pipe_buffer.in + segsize != 571 wpipe->pipe_buffer.size) 572 panic("Expected pipe buffer wraparound disappeared"); 573 #endif 574 575 rw_exit_write(lock); 576 error = uiomove(&wpipe->pipe_buffer.buffer[0], 577 size - segsize, uio); 578 rw_enter_write(lock); 579 } 580 if (error == 0) { 581 wpipe->pipe_buffer.in += size; 582 if (wpipe->pipe_buffer.in >= 583 wpipe->pipe_buffer.size) { 584 #ifdef DIAGNOSTIC 585 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 586 panic("Expected wraparound bad"); 587 #endif 588 wpipe->pipe_buffer.in = size - segsize; 589 } 590 591 wpipe->pipe_buffer.cnt += size; 592 #ifdef DIAGNOSTIC 593 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 594 panic("Pipe buffer overflow"); 595 #endif 596 } 597 if (error) 598 break; 599 } else { 600 /* If the "read-side" has been blocked, wake it up. */ 601 if (wpipe->pipe_state & PIPE_WANTR) { 602 wpipe->pipe_state &= ~PIPE_WANTR; 603 wakeup(wpipe); 604 } 605 606 /* Don't block on non-blocking I/O. */ 607 if (fp->f_flag & FNONBLOCK) { 608 error = EAGAIN; 609 break; 610 } 611 612 /* 613 * We have no more space and have something to offer, 614 * wake up select/poll. 615 */ 616 pipe_wakeup(wpipe); 617 618 wpipe->pipe_state |= PIPE_WANTW; 619 error = pipe_iosleep(wpipe, "pipewr"); 620 if (error) 621 goto unlocked_error; 622 623 /* 624 * If read side wants to go away, we just issue a 625 * signal to ourselves. 626 */ 627 if (wpipe->pipe_state & PIPE_EOF) { 628 error = EPIPE; 629 break; 630 } 631 } 632 } 633 pipe_iounlock(wpipe); 634 635 unlocked_error: 636 --wpipe->pipe_busy; 637 638 if (pipe_rundown(wpipe) == 0 && wpipe->pipe_buffer.cnt > 0) { 639 /* 640 * If we have put any characters in the buffer, we wake up 641 * the reader. 642 */ 643 if (wpipe->pipe_state & PIPE_WANTR) { 644 wpipe->pipe_state &= ~PIPE_WANTR; 645 wakeup(wpipe); 646 } 647 } 648 649 /* Don't return EPIPE if I/O was successful. */ 650 if (wpipe->pipe_buffer.cnt == 0 && 651 uio->uio_resid == 0 && 652 error == EPIPE) { 653 error = 0; 654 } 655 656 if (error == 0) 657 getnanotime(&wpipe->pipe_mtime); 658 /* We have something to offer, wake up select/poll. */ 659 if (wpipe->pipe_buffer.cnt) 660 pipe_wakeup(wpipe); 661 662 rw_exit_write(lock); 663 return (error); 664 } 665 666 /* 667 * we implement a very minimal set of ioctls for compatibility with sockets. 668 */ 669 int 670 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p) 671 { 672 struct pipe *mpipe = fp->f_data; 673 int error = 0; 674 675 switch (cmd) { 676 677 case FIOASYNC: 678 rw_enter_write(mpipe->pipe_lock); 679 if (*(int *)data) { 680 mpipe->pipe_state |= PIPE_ASYNC; 681 } else { 682 mpipe->pipe_state &= ~PIPE_ASYNC; 683 } 684 rw_exit_write(mpipe->pipe_lock); 685 break; 686 687 case FIONREAD: 688 rw_enter_read(mpipe->pipe_lock); 689 *(int *)data = mpipe->pipe_buffer.cnt; 690 rw_exit_read(mpipe->pipe_lock); 691 break; 692 693 case FIOSETOWN: 694 case SIOCSPGRP: 695 case TIOCSPGRP: 696 error = sigio_setown(&mpipe->pipe_sigio, cmd, data); 697 break; 698 699 case FIOGETOWN: 700 case SIOCGPGRP: 701 case TIOCGPGRP: 702 sigio_getown(&mpipe->pipe_sigio, cmd, data); 703 break; 704 705 default: 706 error = ENOTTY; 707 } 708 709 return (error); 710 } 711 712 int 713 pipe_stat(struct file *fp, struct stat *ub, struct proc *p) 714 { 715 struct pipe *pipe = fp->f_data; 716 717 memset(ub, 0, sizeof(*ub)); 718 719 rw_enter_read(pipe->pipe_lock); 720 ub->st_mode = S_IFIFO; 721 ub->st_blksize = pipe->pipe_buffer.size; 722 ub->st_size = pipe->pipe_buffer.cnt; 723 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 724 ub->st_atim.tv_sec = pipe->pipe_atime.tv_sec; 725 ub->st_atim.tv_nsec = pipe->pipe_atime.tv_nsec; 726 ub->st_mtim.tv_sec = pipe->pipe_mtime.tv_sec; 727 ub->st_mtim.tv_nsec = pipe->pipe_mtime.tv_nsec; 728 ub->st_ctim.tv_sec = pipe->pipe_ctime.tv_sec; 729 ub->st_ctim.tv_nsec = pipe->pipe_ctime.tv_nsec; 730 ub->st_uid = fp->f_cred->cr_uid; 731 ub->st_gid = fp->f_cred->cr_gid; 732 rw_exit_read(pipe->pipe_lock); 733 /* 734 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 735 * XXX (st_dev, st_ino) should be unique. 736 */ 737 return (0); 738 } 739 740 int 741 pipe_close(struct file *fp, struct proc *p) 742 { 743 struct pipe *cpipe = fp->f_data; 744 745 fp->f_ops = NULL; 746 fp->f_data = NULL; 747 pipe_destroy(cpipe); 748 return (0); 749 } 750 751 /* 752 * Free kva for pipe circular buffer. 753 * No pipe lock check as only called from pipe_buffer_realloc() and pipeclose() 754 */ 755 void 756 pipe_buffer_free(struct pipe *cpipe) 757 { 758 u_int size; 759 760 if (cpipe->pipe_buffer.buffer == NULL) 761 return; 762 763 size = cpipe->pipe_buffer.size; 764 765 km_free(cpipe->pipe_buffer.buffer, size, &kv_any, &kp_pageable); 766 767 cpipe->pipe_buffer.buffer = NULL; 768 769 atomic_sub_int(&amountpipekva, size); 770 if (size > PIPE_SIZE) 771 atomic_dec_int(&nbigpipe); 772 } 773 774 /* 775 * shutdown the pipe, and free resources. 776 */ 777 void 778 pipe_destroy(struct pipe *cpipe) 779 { 780 struct pipe *ppipe; 781 782 if (cpipe == NULL) 783 return; 784 785 rw_enter_write(cpipe->pipe_lock); 786 787 pipe_wakeup(cpipe); 788 sigio_free(&cpipe->pipe_sigio); 789 790 /* 791 * If the other side is blocked, wake it up saying that 792 * we want to close it down. 793 */ 794 cpipe->pipe_state |= PIPE_EOF; 795 while (cpipe->pipe_busy) { 796 wakeup(cpipe); 797 cpipe->pipe_state |= PIPE_WANTD; 798 rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO, "pipecl", INFSLP); 799 } 800 801 /* Disconnect from peer. */ 802 if ((ppipe = cpipe->pipe_peer) != NULL) { 803 pipe_wakeup(ppipe); 804 805 ppipe->pipe_state |= PIPE_EOF; 806 wakeup(ppipe); 807 ppipe->pipe_peer = NULL; 808 } 809 810 pipe_buffer_free(cpipe); 811 812 rw_exit_write(cpipe->pipe_lock); 813 814 if (ppipe == NULL) 815 pipe_pair_destroy(cpipe->pipe_pair); 816 } 817 818 /* 819 * Returns non-zero if a rundown is currently ongoing. 820 */ 821 int 822 pipe_rundown(struct pipe *cpipe) 823 { 824 rw_assert_wrlock(cpipe->pipe_lock); 825 826 if (cpipe->pipe_busy > 0 || (cpipe->pipe_state & PIPE_WANTD) == 0) 827 return (0); 828 829 /* Only wakeup pipe_destroy() once the pipe is no longer busy. */ 830 cpipe->pipe_state &= ~(PIPE_WANTD | PIPE_WANTR | PIPE_WANTW); 831 wakeup(cpipe); 832 return (1); 833 } 834 835 int 836 pipe_kqfilter(struct file *fp, struct knote *kn) 837 { 838 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 839 struct rwlock *lock = rpipe->pipe_lock; 840 int error = 0; 841 842 rw_enter_write(lock); 843 wpipe = pipe_peer(rpipe); 844 845 switch (kn->kn_filter) { 846 case EVFILT_READ: 847 kn->kn_fop = &pipe_rfiltops; 848 kn->kn_hook = rpipe; 849 klist_insert_locked(&rpipe->pipe_klist, kn); 850 break; 851 case EVFILT_WRITE: 852 if (wpipe == NULL) { 853 /* 854 * The other end of the pipe has been closed. 855 * Since the filter now always indicates a pending 856 * event, attach the knote to the current side 857 * to proceed with the registration. 858 */ 859 wpipe = rpipe; 860 } 861 kn->kn_fop = &pipe_wfiltops; 862 kn->kn_hook = wpipe; 863 klist_insert_locked(&wpipe->pipe_klist, kn); 864 break; 865 case EVFILT_EXCEPT: 866 if (kn->kn_flags & __EV_SELECT) { 867 /* Prevent triggering exceptfds. */ 868 error = EPERM; 869 break; 870 } 871 if ((kn->kn_flags & __EV_POLL) == 0) { 872 /* Disallow usage through kevent(2). */ 873 error = EINVAL; 874 break; 875 } 876 kn->kn_fop = &pipe_efiltops; 877 kn->kn_hook = rpipe; 878 klist_insert_locked(&rpipe->pipe_klist, kn); 879 break; 880 default: 881 error = EINVAL; 882 } 883 884 rw_exit_write(lock); 885 886 return (error); 887 } 888 889 void 890 filt_pipedetach(struct knote *kn) 891 { 892 struct pipe *cpipe = kn->kn_hook; 893 894 klist_remove(&cpipe->pipe_klist, kn); 895 } 896 897 int 898 filt_piperead(struct knote *kn, long hint) 899 { 900 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 901 902 rw_assert_wrlock(rpipe->pipe_lock); 903 904 wpipe = pipe_peer(rpipe); 905 906 kn->kn_data = rpipe->pipe_buffer.cnt; 907 908 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) { 909 kn->kn_flags |= EV_EOF; 910 if (kn->kn_flags & __EV_POLL) 911 kn->kn_flags |= __EV_HUP; 912 return (1); 913 } 914 915 return (kn->kn_data > 0); 916 } 917 918 int 919 filt_pipewrite(struct knote *kn, long hint) 920 { 921 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 922 923 rw_assert_wrlock(rpipe->pipe_lock); 924 925 wpipe = pipe_peer(rpipe); 926 927 if (wpipe == NULL) { 928 kn->kn_data = 0; 929 kn->kn_flags |= EV_EOF; 930 if (kn->kn_flags & __EV_POLL) 931 kn->kn_flags |= __EV_HUP; 932 return (1); 933 } 934 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 935 936 return (kn->kn_data >= PIPE_BUF); 937 } 938 939 int 940 filt_pipeexcept(struct knote *kn, long hint) 941 { 942 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 943 int active = 0; 944 945 rw_assert_wrlock(rpipe->pipe_lock); 946 947 wpipe = pipe_peer(rpipe); 948 949 if (kn->kn_flags & __EV_POLL) { 950 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) { 951 kn->kn_flags |= __EV_HUP; 952 active = 1; 953 } 954 } 955 956 return (active); 957 } 958 959 int 960 filt_pipemodify(struct kevent *kev, struct knote *kn) 961 { 962 struct pipe *rpipe = kn->kn_fp->f_data; 963 int active; 964 965 rw_enter_write(rpipe->pipe_lock); 966 active = knote_modify(kev, kn); 967 rw_exit_write(rpipe->pipe_lock); 968 969 return (active); 970 } 971 972 int 973 filt_pipeprocess(struct knote *kn, struct kevent *kev) 974 { 975 struct pipe *rpipe = kn->kn_fp->f_data; 976 int active; 977 978 rw_enter_write(rpipe->pipe_lock); 979 active = knote_process(kn, kev); 980 rw_exit_write(rpipe->pipe_lock); 981 982 return (active); 983 } 984 985 void 986 pipe_init(void) 987 { 988 pool_init(&pipe_pair_pool, sizeof(struct pipe_pair), 0, IPL_MPFLOOR, 989 PR_WAITOK, "pipepl", NULL); 990 } 991 992 struct pipe_pair * 993 pipe_pair_create(void) 994 { 995 struct pipe_pair *pp; 996 997 pp = pool_get(&pipe_pair_pool, PR_WAITOK | PR_ZERO); 998 pp->pp_wpipe.pipe_pair = pp; 999 pp->pp_rpipe.pipe_pair = pp; 1000 pp->pp_wpipe.pipe_peer = &pp->pp_rpipe; 1001 pp->pp_rpipe.pipe_peer = &pp->pp_wpipe; 1002 /* 1003 * One lock is used per pipe pair in order to obtain exclusive access to 1004 * the pipe pair. 1005 */ 1006 rw_init(&pp->pp_lock, "pipelk"); 1007 pp->pp_wpipe.pipe_lock = &pp->pp_lock; 1008 pp->pp_rpipe.pipe_lock = &pp->pp_lock; 1009 1010 klist_init_rwlock(&pp->pp_wpipe.pipe_klist, &pp->pp_lock); 1011 klist_init_rwlock(&pp->pp_rpipe.pipe_klist, &pp->pp_lock); 1012 1013 if (pipe_create(&pp->pp_wpipe) || pipe_create(&pp->pp_rpipe)) 1014 goto err; 1015 return (pp); 1016 err: 1017 pipe_destroy(&pp->pp_wpipe); 1018 pipe_destroy(&pp->pp_rpipe); 1019 return (NULL); 1020 } 1021 1022 void 1023 pipe_pair_destroy(struct pipe_pair *pp) 1024 { 1025 klist_free(&pp->pp_wpipe.pipe_klist); 1026 klist_free(&pp->pp_rpipe.pipe_klist); 1027 pool_put(&pipe_pair_pool, pp); 1028 } 1029