1 /* $NetBSD: sys_pipe.c,v 1.41 2003/08/11 10:24:41 pk Exp $ */ 2 3 /*- 4 * Copyright (c) 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Paul Kranenburg. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1996 John S. Dyson 41 * All rights reserved. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice immediately at the beginning of the file, without modification, 48 * this list of conditions, and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Absolutely no warranty of function or purpose is made by the author 53 * John S. Dyson. 54 * 4. Modifications may be freely made to this file if the above conditions 55 * are met. 56 * 57 * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.95 2002/03/09 22:06:31 alfred Exp $ 58 */ 59 60 /* 61 * This file contains a high-performance replacement for the socket-based 62 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 63 * all features of sockets, but does do everything that pipes normally 64 * do. 65 * 66 * Adaption for NetBSD UVM, including uvm_loan() based direct write, was 67 * written by Jaromir Dolecek. 68 */ 69 70 /* 71 * This code has two modes of operation, a small write mode and a large 72 * write mode. The small write mode acts like conventional pipes with 73 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 74 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 75 * and PIPE_SIZE in size it is mapped read-only into the kernel address space 76 * using the UVM page loan facility from where the receiving process can copy 77 * the data directly from the pages in the sending process. 78 * 79 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 80 * happen for small transfers so that the system will not spend all of 81 * its time context switching. PIPE_SIZE is constrained by the 82 * amount of kernel virtual memory. 83 */ 84 85 #include <sys/cdefs.h> 86 __KERNEL_RCSID(0, "$NetBSD: sys_pipe.c,v 1.41 2003/08/11 10:24:41 pk Exp $"); 87 88 #include <sys/param.h> 89 #include <sys/systm.h> 90 #include <sys/proc.h> 91 #include <sys/fcntl.h> 92 #include <sys/file.h> 93 #include <sys/filedesc.h> 94 #include <sys/filio.h> 95 #include <sys/kernel.h> 96 #include <sys/lock.h> 97 #include <sys/ttycom.h> 98 #include <sys/stat.h> 99 #include <sys/malloc.h> 100 #include <sys/poll.h> 101 #include <sys/signalvar.h> 102 #include <sys/vnode.h> 103 #include <sys/uio.h> 104 #include <sys/lock.h> 105 #include <sys/select.h> 106 #include <sys/mount.h> 107 #include <sys/sa.h> 108 #include <sys/syscallargs.h> 109 #include <uvm/uvm.h> 110 #include <sys/sysctl.h> 111 #include <sys/kernel.h> 112 113 #include <sys/pipe.h> 114 115 /* 116 * Avoid microtime(9), it's slow. We don't guard the read from time(9) 117 * with splclock(9) since we don't actually need to be THAT sure the access 118 * is atomic. 119 */ 120 #define PIPE_TIMESTAMP(tvp) (*(tvp) = time) 121 122 123 /* 124 * Use this define if you want to disable *fancy* VM things. Expect an 125 * approx 30% decrease in transfer rate. 126 */ 127 /* #define PIPE_NODIRECT */ 128 129 /* 130 * interfaces to the outside world 131 */ 132 static int pipe_read(struct file *fp, off_t *offset, struct uio *uio, 133 struct ucred *cred, int flags); 134 static int pipe_write(struct file *fp, off_t *offset, struct uio *uio, 135 struct ucred *cred, int flags); 136 static int pipe_close(struct file *fp, struct proc *p); 137 static int pipe_poll(struct file *fp, int events, struct proc *p); 138 static int pipe_fcntl(struct file *fp, u_int com, void *data, 139 struct proc *p); 140 static int pipe_kqfilter(struct file *fp, struct knote *kn); 141 static int pipe_stat(struct file *fp, struct stat *sb, struct proc *p); 142 static int pipe_ioctl(struct file *fp, u_long cmd, void *data, 143 struct proc *p); 144 145 static struct fileops pipeops = { 146 pipe_read, pipe_write, pipe_ioctl, pipe_fcntl, pipe_poll, 147 pipe_stat, pipe_close, pipe_kqfilter 148 }; 149 150 /* 151 * Default pipe buffer size(s), this can be kind-of large now because pipe 152 * space is pageable. The pipe code will try to maintain locality of 153 * reference for performance reasons, so small amounts of outstanding I/O 154 * will not wipe the cache. 155 */ 156 #define MINPIPESIZE (PIPE_SIZE/3) 157 #define MAXPIPESIZE (2*PIPE_SIZE/3) 158 159 /* 160 * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 161 * is there so that on large systems, we don't exhaust it. 162 */ 163 #define MAXPIPEKVA (8*1024*1024) 164 static int maxpipekva = MAXPIPEKVA; 165 166 /* 167 * Limit for direct transfers, we cannot, of course limit 168 * the amount of kva for pipes in general though. 169 */ 170 #define LIMITPIPEKVA (16*1024*1024) 171 static int limitpipekva = LIMITPIPEKVA; 172 173 /* 174 * Limit the number of "big" pipes 175 */ 176 #define LIMITBIGPIPES 32 177 static int maxbigpipes = LIMITBIGPIPES; 178 static int nbigpipe = 0; 179 180 /* 181 * Amount of KVA consumed by pipe buffers. 182 */ 183 static int amountpipekva = 0; 184 185 MALLOC_DEFINE(M_PIPE, "pipe", "Pipe structures"); 186 187 static void pipeclose(struct pipe *pipe); 188 static void pipe_free_kmem(struct pipe *pipe); 189 static int pipe_create(struct pipe **pipep, int allockva); 190 static int pipelock(struct pipe *pipe, int catch); 191 static __inline void pipeunlock(struct pipe *pipe); 192 static void pipeselwakeup(struct pipe *pipe, struct pipe *sigp); 193 #ifndef PIPE_NODIRECT 194 static int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 195 #endif 196 static int pipespace(struct pipe *pipe, int size); 197 198 #ifndef PIPE_NODIRECT 199 static int pipe_loan_alloc(struct pipe *, int); 200 static void pipe_loan_free(struct pipe *); 201 #endif /* PIPE_NODIRECT */ 202 203 static struct pool pipe_pool; 204 205 /* 206 * The pipe system call for the DTYPE_PIPE type of pipes 207 */ 208 209 /* ARGSUSED */ 210 int 211 sys_pipe(l, v, retval) 212 struct lwp *l; 213 void *v; 214 register_t *retval; 215 { 216 struct file *rf, *wf; 217 struct pipe *rpipe, *wpipe; 218 int fd, error; 219 struct proc *p; 220 221 p = l->l_proc; 222 rpipe = wpipe = NULL; 223 if (pipe_create(&rpipe, 1) || pipe_create(&wpipe, 0)) { 224 pipeclose(rpipe); 225 pipeclose(wpipe); 226 return (ENFILE); 227 } 228 229 /* 230 * Note: the file structure returned from falloc() is marked 231 * as 'larval' initially. Unless we mark it as 'mature' by 232 * FILE_SET_MATURE(), any attempt to do anything with it would 233 * return EBADF, including e.g. dup(2) or close(2). This avoids 234 * file descriptor races if we block in the second falloc(). 235 */ 236 237 error = falloc(p, &rf, &fd); 238 if (error) 239 goto free2; 240 retval[0] = fd; 241 rf->f_flag = FREAD; 242 rf->f_type = DTYPE_PIPE; 243 rf->f_data = (caddr_t)rpipe; 244 rf->f_ops = &pipeops; 245 246 error = falloc(p, &wf, &fd); 247 if (error) 248 goto free3; 249 retval[1] = fd; 250 wf->f_flag = FWRITE; 251 wf->f_type = DTYPE_PIPE; 252 wf->f_data = (caddr_t)wpipe; 253 wf->f_ops = &pipeops; 254 255 rpipe->pipe_peer = wpipe; 256 wpipe->pipe_peer = rpipe; 257 258 FILE_SET_MATURE(rf); 259 FILE_SET_MATURE(wf); 260 FILE_UNUSE(rf, p); 261 FILE_UNUSE(wf, p); 262 return (0); 263 free3: 264 FILE_UNUSE(rf, p); 265 ffree(rf); 266 fdremove(p->p_fd, retval[0]); 267 free2: 268 pipeclose(wpipe); 269 pipeclose(rpipe); 270 271 return (error); 272 } 273 274 /* 275 * Allocate kva for pipe circular buffer, the space is pageable 276 * This routine will 'realloc' the size of a pipe safely, if it fails 277 * it will retain the old buffer. 278 * If it fails it will return ENOMEM. 279 */ 280 static int 281 pipespace(pipe, size) 282 struct pipe *pipe; 283 int size; 284 { 285 caddr_t buffer; 286 /* 287 * Allocate pageable virtual address space. Physical memory is 288 * allocated on demand. 289 */ 290 buffer = (caddr_t) uvm_km_valloc(kernel_map, round_page(size)); 291 if (buffer == NULL) 292 return (ENOMEM); 293 294 /* free old resources if we're resizing */ 295 pipe_free_kmem(pipe); 296 pipe->pipe_buffer.buffer = buffer; 297 pipe->pipe_buffer.size = size; 298 pipe->pipe_buffer.in = 0; 299 pipe->pipe_buffer.out = 0; 300 pipe->pipe_buffer.cnt = 0; 301 amountpipekva += pipe->pipe_buffer.size; 302 return (0); 303 } 304 305 /* 306 * Initialize and allocate VM and memory for pipe. 307 */ 308 static int 309 pipe_create(pipep, allockva) 310 struct pipe **pipep; 311 int allockva; 312 { 313 struct pipe *pipe; 314 int error; 315 316 pipe = pool_get(&pipe_pool, M_WAITOK); 317 if (pipe == NULL) 318 return (ENOMEM); 319 320 /* Initialize */ 321 memset(pipe, 0, sizeof(struct pipe)); 322 pipe->pipe_state = PIPE_SIGNALR; 323 324 if (allockva && (error = pipespace(pipe, PIPE_SIZE))) 325 return (error); 326 327 PIPE_TIMESTAMP(&pipe->pipe_ctime); 328 pipe->pipe_atime = pipe->pipe_ctime; 329 pipe->pipe_mtime = pipe->pipe_ctime; 330 simple_lock_init(&pipe->pipe_slock); 331 lockinit(&pipe->pipe_lock, PRIBIO | PCATCH, "pipelk", 0, 0); 332 333 *pipep = pipe; 334 return (0); 335 } 336 337 338 /* 339 * Lock a pipe for I/O, blocking other access 340 * Called with pipe spin lock held. 341 * Return with pipe spin lock released on success. 342 */ 343 static int 344 pipelock(pipe, catch) 345 struct pipe *pipe; 346 int catch; 347 { 348 int error; 349 350 LOCK_ASSERT(simple_lock_held(&pipe->pipe_slock)); 351 352 while (1) { 353 error = lockmgr(&pipe->pipe_lock, LK_EXCLUSIVE | LK_INTERLOCK, 354 &pipe->pipe_slock); 355 if (error == 0) 356 break; 357 358 simple_lock(&pipe->pipe_slock); 359 if (catch || (error != EINTR && error != ERESTART)) 360 break; 361 /* 362 * XXX XXX XXX 363 * The pipe lock is initialised with PCATCH on and we cannot 364 * override this in a lockmgr() call. Thus a pending signal 365 * will cause lockmgr() to return with EINTR or ERESTART. 366 * We cannot simply re-enter lockmgr() at this point since 367 * the pending signals have not yet been posted and would 368 * cause an immediate EINTR/ERESTART return again. 369 * As a workaround we pause for a while here, giving the lock 370 * a chance to drain, before trying again. 371 * XXX XXX XXX 372 * 373 * NOTE: Consider dropping PCATCH from this lock; in practice 374 * it is never held for long enough periods for having it 375 * interruptable at the start of pipe_read/pipe_write to be 376 * beneficial. 377 */ 378 (void) tsleep(&lbolt, PRIBIO, "rstrtpipelock", hz); 379 } 380 return (error); 381 } 382 383 /* 384 * unlock a pipe I/O lock 385 */ 386 static __inline void 387 pipeunlock(pipe) 388 struct pipe *pipe; 389 { 390 391 lockmgr(&pipe->pipe_lock, LK_RELEASE, NULL); 392 } 393 394 /* 395 * Select/poll wakup. This also sends SIGIO to peer connected to 396 * 'sigpipe' side of pipe. 397 */ 398 static void 399 pipeselwakeup(selp, sigp) 400 struct pipe *selp, *sigp; 401 { 402 struct proc *p; 403 pid_t pid; 404 405 selnotify(&selp->pipe_sel, 0); 406 if (sigp == NULL || (sigp->pipe_state & PIPE_ASYNC) == 0) 407 return; 408 409 pid = sigp->pipe_pgid; 410 if (pid == 0) 411 return; 412 413 if (pid > 0) 414 gsignal(pid, SIGIO); 415 else if ((p = pfind(-pid)) != NULL) 416 psignal(p, SIGIO); 417 } 418 419 /* ARGSUSED */ 420 static int 421 pipe_read(fp, offset, uio, cred, flags) 422 struct file *fp; 423 off_t *offset; 424 struct uio *uio; 425 struct ucred *cred; 426 int flags; 427 { 428 struct pipe *rpipe = (struct pipe *) fp->f_data; 429 struct pipebuf *bp = &rpipe->pipe_buffer; 430 int error; 431 size_t nread = 0; 432 size_t size; 433 size_t ocnt; 434 435 PIPE_LOCK(rpipe); 436 ++rpipe->pipe_busy; 437 ocnt = bp->cnt; 438 439 again: 440 error = pipelock(rpipe, 1); 441 if (error) 442 goto unlocked_error; 443 444 while (uio->uio_resid) { 445 /* 446 * normal pipe buffer receive 447 */ 448 if (bp->cnt > 0) { 449 size = bp->size - bp->out; 450 if (size > bp->cnt) 451 size = bp->cnt; 452 if (size > uio->uio_resid) 453 size = uio->uio_resid; 454 455 error = uiomove(&bp->buffer[bp->out], size, uio); 456 if (error) 457 break; 458 459 bp->out += size; 460 if (bp->out >= bp->size) 461 bp->out = 0; 462 463 bp->cnt -= size; 464 465 /* 466 * If there is no more to read in the pipe, reset 467 * its pointers to the beginning. This improves 468 * cache hit stats. 469 */ 470 if (bp->cnt == 0) { 471 bp->in = 0; 472 bp->out = 0; 473 } 474 nread += size; 475 #ifndef PIPE_NODIRECT 476 } else if ((rpipe->pipe_state & PIPE_DIRECTR) != 0) { 477 /* 478 * Direct copy, bypassing a kernel buffer. 479 */ 480 caddr_t va; 481 482 KASSERT(rpipe->pipe_state & PIPE_DIRECTW); 483 484 size = rpipe->pipe_map.cnt; 485 if (size > uio->uio_resid) 486 size = uio->uio_resid; 487 488 va = (caddr_t) rpipe->pipe_map.kva + 489 rpipe->pipe_map.pos; 490 error = uiomove(va, size, uio); 491 if (error) 492 break; 493 nread += size; 494 rpipe->pipe_map.pos += size; 495 rpipe->pipe_map.cnt -= size; 496 if (rpipe->pipe_map.cnt == 0) { 497 PIPE_LOCK(rpipe); 498 rpipe->pipe_state &= ~PIPE_DIRECTR; 499 wakeup(rpipe); 500 PIPE_UNLOCK(rpipe); 501 } 502 #endif 503 } else { 504 /* 505 * Break if some data was read. 506 */ 507 if (nread > 0) 508 break; 509 510 PIPE_LOCK(rpipe); 511 512 /* 513 * detect EOF condition 514 * read returns 0 on EOF, no need to set error 515 */ 516 if (rpipe->pipe_state & PIPE_EOF) { 517 PIPE_UNLOCK(rpipe); 518 break; 519 } 520 521 /* 522 * don't block on non-blocking I/O 523 */ 524 if (fp->f_flag & FNONBLOCK) { 525 PIPE_UNLOCK(rpipe); 526 error = EAGAIN; 527 break; 528 } 529 530 /* 531 * Unlock the pipe buffer for our remaining processing. 532 * We will either break out with an error or we will 533 * sleep and relock to loop. 534 */ 535 pipeunlock(rpipe); 536 537 /* 538 * The PIPE_DIRECTR flag is not under the control 539 * of the long-term lock (see pipe_direct_write()), 540 * so re-check now while holding the spin lock. 541 */ 542 if ((rpipe->pipe_state & PIPE_DIRECTR) != 0) 543 goto again; 544 545 /* 546 * We want to read more, wake up select/poll. 547 */ 548 pipeselwakeup(rpipe, rpipe->pipe_peer); 549 550 /* 551 * If the "write-side" is blocked, wake it up now. 552 */ 553 if (rpipe->pipe_state & PIPE_WANTW) { 554 rpipe->pipe_state &= ~PIPE_WANTW; 555 wakeup(rpipe); 556 } 557 558 /* Now wait until the pipe is filled */ 559 rpipe->pipe_state |= PIPE_WANTR; 560 error = ltsleep(rpipe, PRIBIO | PCATCH, 561 "piperd", 0, &rpipe->pipe_slock); 562 if (error != 0) 563 goto unlocked_error; 564 goto again; 565 } 566 } 567 568 if (error == 0) 569 PIPE_TIMESTAMP(&rpipe->pipe_atime); 570 571 PIPE_LOCK(rpipe); 572 pipeunlock(rpipe); 573 574 unlocked_error: 575 --rpipe->pipe_busy; 576 577 /* 578 * PIPE_WANTCLOSE processing only makes sense if pipe_busy is 0. 579 */ 580 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANTCLOSE)) { 581 rpipe->pipe_state &= ~(PIPE_WANTCLOSE|PIPE_WANTW); 582 wakeup(rpipe); 583 } else if (bp->cnt < MINPIPESIZE) { 584 /* 585 * Handle write blocking hysteresis. 586 */ 587 if (rpipe->pipe_state & PIPE_WANTW) { 588 rpipe->pipe_state &= ~PIPE_WANTW; 589 wakeup(rpipe); 590 } 591 } 592 593 /* 594 * If anything was read off the buffer, signal to the writer it's 595 * possible to write more data. Also send signal if we are here for the 596 * first time after last write. 597 */ 598 if ((bp->size - bp->cnt) >= PIPE_BUF 599 && (ocnt != bp->cnt || (rpipe->pipe_state & PIPE_SIGNALR))) { 600 pipeselwakeup(rpipe, rpipe->pipe_peer); 601 rpipe->pipe_state &= ~PIPE_SIGNALR; 602 } 603 604 PIPE_UNLOCK(rpipe); 605 return (error); 606 } 607 608 #ifndef PIPE_NODIRECT 609 /* 610 * Allocate structure for loan transfer. 611 */ 612 static int 613 pipe_loan_alloc(wpipe, npages) 614 struct pipe *wpipe; 615 int npages; 616 { 617 vsize_t len; 618 619 len = (vsize_t)npages << PAGE_SHIFT; 620 wpipe->pipe_map.kva = uvm_km_valloc_wait(kernel_map, len); 621 if (wpipe->pipe_map.kva == 0) 622 return (ENOMEM); 623 624 amountpipekva += len; 625 wpipe->pipe_map.npages = npages; 626 wpipe->pipe_map.pgs = malloc(npages * sizeof(struct vm_page *), M_PIPE, 627 M_WAITOK); 628 return (0); 629 } 630 631 /* 632 * Free resources allocated for loan transfer. 633 */ 634 static void 635 pipe_loan_free(wpipe) 636 struct pipe *wpipe; 637 { 638 vsize_t len; 639 640 len = (vsize_t)wpipe->pipe_map.npages << PAGE_SHIFT; 641 uvm_km_free(kernel_map, wpipe->pipe_map.kva, len); 642 wpipe->pipe_map.kva = 0; 643 amountpipekva -= len; 644 free(wpipe->pipe_map.pgs, M_PIPE); 645 wpipe->pipe_map.pgs = NULL; 646 } 647 648 /* 649 * NetBSD direct write, using uvm_loan() mechanism. 650 * This implements the pipe buffer write mechanism. Note that only 651 * a direct write OR a normal pipe write can be pending at any given time. 652 * If there are any characters in the pipe buffer, the direct write will 653 * be deferred until the receiving process grabs all of the bytes from 654 * the pipe buffer. Then the direct mapping write is set-up. 655 * 656 * Called with the long-term pipe lock held. 657 */ 658 static int 659 pipe_direct_write(wpipe, uio) 660 struct pipe *wpipe; 661 struct uio *uio; 662 { 663 int error, npages, j; 664 struct vm_page **pgs; 665 vaddr_t bbase, kva, base, bend; 666 vsize_t blen, bcnt; 667 voff_t bpos; 668 669 KASSERT(wpipe->pipe_map.cnt == 0); 670 671 /* 672 * Handle first PIPE_CHUNK_SIZE bytes of buffer. Deal with buffers 673 * not aligned to PAGE_SIZE. 674 */ 675 bbase = (vaddr_t)uio->uio_iov->iov_base; 676 base = trunc_page(bbase); 677 bend = round_page(bbase + uio->uio_iov->iov_len); 678 blen = bend - base; 679 bpos = bbase - base; 680 681 if (blen > PIPE_DIRECT_CHUNK) { 682 blen = PIPE_DIRECT_CHUNK; 683 bend = base + blen; 684 bcnt = PIPE_DIRECT_CHUNK - bpos; 685 } else { 686 bcnt = uio->uio_iov->iov_len; 687 } 688 npages = blen >> PAGE_SHIFT; 689 690 /* 691 * Free the old kva if we need more pages than we have 692 * allocated. 693 */ 694 if (wpipe->pipe_map.kva != 0 && npages > wpipe->pipe_map.npages) 695 pipe_loan_free(wpipe); 696 697 /* Allocate new kva. */ 698 if (wpipe->pipe_map.kva == 0) { 699 error = pipe_loan_alloc(wpipe, npages); 700 if (error) 701 return (error); 702 } 703 704 /* Loan the write buffer memory from writer process */ 705 pgs = wpipe->pipe_map.pgs; 706 error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, base, blen, 707 pgs, UVM_LOAN_TOPAGE); 708 if (error) { 709 pipe_loan_free(wpipe); 710 return (error); 711 } 712 713 /* Enter the loaned pages to kva */ 714 kva = wpipe->pipe_map.kva; 715 for (j = 0; j < npages; j++, kva += PAGE_SIZE) { 716 pmap_kenter_pa(kva, VM_PAGE_TO_PHYS(pgs[j]), VM_PROT_READ); 717 } 718 pmap_update(pmap_kernel()); 719 720 /* Now we can put the pipe in direct write mode */ 721 wpipe->pipe_map.pos = bpos; 722 wpipe->pipe_map.cnt = bcnt; 723 wpipe->pipe_state |= PIPE_DIRECTW; 724 725 /* 726 * But before we can let someone do a direct read, 727 * we have to wait until the pipe is drained. 728 */ 729 730 /* Relase the pipe lock while we wait */ 731 PIPE_LOCK(wpipe); 732 pipeunlock(wpipe); 733 734 while (error == 0 && wpipe->pipe_buffer.cnt > 0) { 735 if (wpipe->pipe_state & PIPE_WANTR) { 736 wpipe->pipe_state &= ~PIPE_WANTR; 737 wakeup(wpipe); 738 } 739 740 wpipe->pipe_state |= PIPE_WANTW; 741 error = ltsleep(wpipe, PRIBIO | PCATCH, "pipdwc", 0, 742 &wpipe->pipe_slock); 743 if (error == 0 && wpipe->pipe_state & PIPE_EOF) 744 error = EPIPE; 745 } 746 747 /* Pipe is drained; next read will off the direct buffer */ 748 wpipe->pipe_state |= PIPE_DIRECTR; 749 750 /* Wait until the reader is done */ 751 while (error == 0 && (wpipe->pipe_state & PIPE_DIRECTR)) { 752 if (wpipe->pipe_state & PIPE_WANTR) { 753 wpipe->pipe_state &= ~PIPE_WANTR; 754 wakeup(wpipe); 755 } 756 pipeselwakeup(wpipe, wpipe); 757 error = ltsleep(wpipe, PRIBIO | PCATCH, "pipdwt", 0, 758 &wpipe->pipe_slock); 759 if (error == 0 && wpipe->pipe_state & PIPE_EOF) 760 error = EPIPE; 761 } 762 763 /* Take pipe out of direct write mode */ 764 wpipe->pipe_state &= ~(PIPE_DIRECTW | PIPE_DIRECTR); 765 766 /* Acquire the pipe lock and cleanup */ 767 (void)pipelock(wpipe, 0); 768 if (pgs != NULL) { 769 pmap_kremove(wpipe->pipe_map.kva, blen); 770 uvm_unloan(pgs, npages, UVM_LOAN_TOPAGE); 771 } 772 if (error || amountpipekva > maxpipekva) 773 pipe_loan_free(wpipe); 774 775 if (error) { 776 pipeselwakeup(wpipe, wpipe); 777 778 /* 779 * If nothing was read from what we offered, return error 780 * straight on. Otherwise update uio resid first. Caller 781 * will deal with the error condition, returning short 782 * write, error, or restarting the write(2) as appropriate. 783 */ 784 if (wpipe->pipe_map.cnt == bcnt) { 785 wpipe->pipe_map.cnt = 0; 786 wakeup(wpipe); 787 return (error); 788 } 789 790 bcnt -= wpipe->pipe_map.cnt; 791 } 792 793 uio->uio_resid -= bcnt; 794 /* uio_offset not updated, not set/used for write(2) */ 795 uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + bcnt; 796 uio->uio_iov->iov_len -= bcnt; 797 if (uio->uio_iov->iov_len == 0) { 798 uio->uio_iov++; 799 uio->uio_iovcnt--; 800 } 801 802 wpipe->pipe_map.cnt = 0; 803 return (error); 804 } 805 #endif /* !PIPE_NODIRECT */ 806 807 static int 808 pipe_write(fp, offset, uio, cred, flags) 809 struct file *fp; 810 off_t *offset; 811 struct uio *uio; 812 struct ucred *cred; 813 int flags; 814 { 815 struct pipe *wpipe, *rpipe; 816 struct pipebuf *bp; 817 int error; 818 819 /* We want to write to our peer */ 820 rpipe = (struct pipe *) fp->f_data; 821 822 retry: 823 error = 0; 824 PIPE_LOCK(rpipe); 825 wpipe = rpipe->pipe_peer; 826 827 /* 828 * Detect loss of pipe read side, issue SIGPIPE if lost. 829 */ 830 if (wpipe == NULL) 831 error = EPIPE; 832 else if (simple_lock_try(&wpipe->pipe_slock) == 0) { 833 /* Deal with race for peer */ 834 PIPE_UNLOCK(rpipe); 835 goto retry; 836 } else if ((wpipe->pipe_state & PIPE_EOF) != 0) { 837 PIPE_UNLOCK(wpipe); 838 error = EPIPE; 839 } 840 841 PIPE_UNLOCK(rpipe); 842 if (error != 0) 843 return (error); 844 845 ++wpipe->pipe_busy; 846 847 /* Aquire the long-term pipe lock */ 848 if ((error = pipelock(wpipe,1)) != 0) { 849 --wpipe->pipe_busy; 850 if (wpipe->pipe_busy == 0 851 && (wpipe->pipe_state & PIPE_WANTCLOSE)) { 852 wpipe->pipe_state &= ~(PIPE_WANTCLOSE | PIPE_WANTR); 853 wakeup(wpipe); 854 } 855 PIPE_UNLOCK(wpipe); 856 return (error); 857 } 858 859 bp = &wpipe->pipe_buffer; 860 861 /* 862 * If it is advantageous to resize the pipe buffer, do so. 863 */ 864 if ((uio->uio_resid > PIPE_SIZE) && 865 (nbigpipe < maxbigpipes) && 866 #ifndef PIPE_NODIRECT 867 (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 868 #endif 869 (bp->size <= PIPE_SIZE) && (bp->cnt == 0)) { 870 871 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 872 nbigpipe++; 873 } 874 875 while (uio->uio_resid) { 876 size_t space; 877 878 #ifndef PIPE_NODIRECT 879 /* 880 * Pipe buffered writes cannot be coincidental with 881 * direct writes. Also, only one direct write can be 882 * in progress at any one time. We wait until the currently 883 * executing direct write is completed before continuing. 884 * 885 * We break out if a signal occurs or the reader goes away. 886 */ 887 while (error == 0 && wpipe->pipe_state & PIPE_DIRECTW) { 888 PIPE_LOCK(wpipe); 889 if (wpipe->pipe_state & PIPE_WANTR) { 890 wpipe->pipe_state &= ~PIPE_WANTR; 891 wakeup(wpipe); 892 } 893 pipeunlock(wpipe); 894 error = ltsleep(wpipe, PRIBIO | PCATCH, 895 "pipbww", 0, &wpipe->pipe_slock); 896 897 (void)pipelock(wpipe, 0); 898 if (wpipe->pipe_state & PIPE_EOF) 899 error = EPIPE; 900 } 901 if (error) 902 break; 903 904 /* 905 * If the transfer is large, we can gain performance if 906 * we do process-to-process copies directly. 907 * If the write is non-blocking, we don't use the 908 * direct write mechanism. 909 * 910 * The direct write mechanism will detect the reader going 911 * away on us. 912 */ 913 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 914 (fp->f_flag & FNONBLOCK) == 0 && 915 (wpipe->pipe_map.kva || (amountpipekva < limitpipekva))) { 916 error = pipe_direct_write(wpipe, uio); 917 918 /* 919 * Break out if error occured, unless it's ENOMEM. 920 * ENOMEM means we failed to allocate some resources 921 * for direct write, so we just fallback to ordinary 922 * write. If the direct write was successful, 923 * process rest of data via ordinary write. 924 */ 925 if (error == 0) 926 continue; 927 928 if (error != ENOMEM) 929 break; 930 } 931 #endif /* PIPE_NODIRECT */ 932 933 space = bp->size - bp->cnt; 934 935 /* Writes of size <= PIPE_BUF must be atomic. */ 936 if ((space < uio->uio_resid) && (uio->uio_resid <= PIPE_BUF)) 937 space = 0; 938 939 if (space > 0) { 940 int size; /* Transfer size */ 941 int segsize; /* first segment to transfer */ 942 943 /* 944 * Transfer size is minimum of uio transfer 945 * and free space in pipe buffer. 946 */ 947 if (space > uio->uio_resid) 948 size = uio->uio_resid; 949 else 950 size = space; 951 /* 952 * First segment to transfer is minimum of 953 * transfer size and contiguous space in 954 * pipe buffer. If first segment to transfer 955 * is less than the transfer size, we've got 956 * a wraparound in the buffer. 957 */ 958 segsize = bp->size - bp->in; 959 if (segsize > size) 960 segsize = size; 961 962 /* Transfer first segment */ 963 error = uiomove(&bp->buffer[bp->in], segsize, uio); 964 965 if (error == 0 && segsize < size) { 966 /* 967 * Transfer remaining part now, to 968 * support atomic writes. Wraparound 969 * happened. 970 */ 971 #ifdef DEBUG 972 if (bp->in + segsize != bp->size) 973 panic("Expected pipe buffer wraparound disappeared"); 974 #endif 975 976 error = uiomove(&bp->buffer[0], 977 size - segsize, uio); 978 } 979 if (error) 980 break; 981 982 bp->in += size; 983 if (bp->in >= bp->size) { 984 #ifdef DEBUG 985 if (bp->in != size - segsize + bp->size) 986 panic("Expected wraparound bad"); 987 #endif 988 bp->in = size - segsize; 989 } 990 991 bp->cnt += size; 992 #ifdef DEBUG 993 if (bp->cnt > bp->size) 994 panic("Pipe buffer overflow"); 995 #endif 996 } else { 997 /* 998 * If the "read-side" has been blocked, wake it up now. 999 */ 1000 PIPE_LOCK(wpipe); 1001 if (wpipe->pipe_state & PIPE_WANTR) { 1002 wpipe->pipe_state &= ~PIPE_WANTR; 1003 wakeup(wpipe); 1004 } 1005 PIPE_UNLOCK(wpipe); 1006 1007 /* 1008 * don't block on non-blocking I/O 1009 */ 1010 if (fp->f_flag & FNONBLOCK) { 1011 error = EAGAIN; 1012 break; 1013 } 1014 1015 /* 1016 * We have no more space and have something to offer, 1017 * wake up select/poll. 1018 */ 1019 if (bp->cnt) 1020 pipeselwakeup(wpipe, wpipe); 1021 1022 PIPE_LOCK(wpipe); 1023 pipeunlock(wpipe); 1024 wpipe->pipe_state |= PIPE_WANTW; 1025 error = ltsleep(wpipe, PRIBIO | PCATCH, "pipewr", 0, 1026 &wpipe->pipe_slock); 1027 (void)pipelock(wpipe, 0); 1028 if (error != 0) 1029 break; 1030 /* 1031 * If read side wants to go away, we just issue a signal 1032 * to ourselves. 1033 */ 1034 if (wpipe->pipe_state & PIPE_EOF) { 1035 error = EPIPE; 1036 break; 1037 } 1038 } 1039 } 1040 1041 PIPE_LOCK(wpipe); 1042 --wpipe->pipe_busy; 1043 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANTCLOSE)) { 1044 wpipe->pipe_state &= ~(PIPE_WANTCLOSE | PIPE_WANTR); 1045 wakeup(wpipe); 1046 } else if (bp->cnt > 0) { 1047 /* 1048 * If we have put any characters in the buffer, we wake up 1049 * the reader. 1050 */ 1051 if (wpipe->pipe_state & PIPE_WANTR) { 1052 wpipe->pipe_state &= ~PIPE_WANTR; 1053 wakeup(wpipe); 1054 } 1055 } 1056 1057 /* 1058 * Don't return EPIPE if I/O was successful 1059 */ 1060 if (error == EPIPE && bp->cnt == 0 && uio->uio_resid == 0) 1061 error = 0; 1062 1063 if (error == 0) 1064 PIPE_TIMESTAMP(&wpipe->pipe_mtime); 1065 1066 /* 1067 * We have something to offer, wake up select/poll. 1068 * wpipe->pipe_map.cnt is always 0 in this point (direct write 1069 * is only done synchronously), so check only wpipe->pipe_buffer.cnt 1070 */ 1071 if (bp->cnt) 1072 pipeselwakeup(wpipe, wpipe); 1073 1074 /* 1075 * Arrange for next read(2) to do a signal. 1076 */ 1077 wpipe->pipe_state |= PIPE_SIGNALR; 1078 1079 pipeunlock(wpipe); 1080 PIPE_UNLOCK(wpipe); 1081 return (error); 1082 } 1083 1084 /* 1085 * we implement a very minimal set of ioctls for compatibility with sockets. 1086 */ 1087 int 1088 pipe_ioctl(fp, cmd, data, p) 1089 struct file *fp; 1090 u_long cmd; 1091 void *data; 1092 struct proc *p; 1093 { 1094 struct pipe *pipe = (struct pipe *)fp->f_data; 1095 pid_t pgid; 1096 int error; 1097 1098 switch (cmd) { 1099 1100 case FIONBIO: 1101 return (0); 1102 1103 case FIOASYNC: 1104 PIPE_LOCK(pipe); 1105 if (*(int *)data) { 1106 pipe->pipe_state |= PIPE_ASYNC; 1107 } else { 1108 pipe->pipe_state &= ~PIPE_ASYNC; 1109 } 1110 PIPE_UNLOCK(pipe); 1111 return (0); 1112 1113 case FIONREAD: 1114 PIPE_LOCK(pipe); 1115 #ifndef PIPE_NODIRECT 1116 if (pipe->pipe_state & PIPE_DIRECTW) 1117 *(int *)data = pipe->pipe_map.cnt; 1118 else 1119 #endif 1120 *(int *)data = pipe->pipe_buffer.cnt; 1121 PIPE_UNLOCK(pipe); 1122 return (0); 1123 1124 case TIOCSPGRP: 1125 pgid = *(int *)data; 1126 if (pgid != 0) { 1127 error = pgid_in_session(p, pgid); 1128 if (error) 1129 return error; 1130 } 1131 pipe->pipe_pgid = pgid; 1132 return (0); 1133 1134 case TIOCGPGRP: 1135 *(int *)data = pipe->pipe_pgid; 1136 return (0); 1137 1138 } 1139 return (EPASSTHROUGH); 1140 } 1141 1142 int 1143 pipe_poll(fp, events, td) 1144 struct file *fp; 1145 int events; 1146 struct proc *td; 1147 { 1148 struct pipe *rpipe = (struct pipe *)fp->f_data; 1149 struct pipe *wpipe; 1150 int eof = 0; 1151 int revents = 0; 1152 1153 retry: 1154 PIPE_LOCK(rpipe); 1155 wpipe = rpipe->pipe_peer; 1156 if (wpipe != NULL && simple_lock_try(&wpipe->pipe_slock) == 0) { 1157 /* Deal with race for peer */ 1158 PIPE_UNLOCK(rpipe); 1159 goto retry; 1160 } 1161 1162 if (events & (POLLIN | POLLRDNORM)) 1163 if ((rpipe->pipe_buffer.cnt > 0) || 1164 #ifndef PIPE_NODIRECT 1165 (rpipe->pipe_state & PIPE_DIRECTR) || 1166 #endif 1167 (rpipe->pipe_state & PIPE_EOF)) 1168 revents |= events & (POLLIN | POLLRDNORM); 1169 1170 eof |= (rpipe->pipe_state & PIPE_EOF); 1171 PIPE_UNLOCK(rpipe); 1172 1173 if (wpipe == NULL) 1174 revents |= events & (POLLOUT | POLLWRNORM); 1175 else { 1176 if (events & (POLLOUT | POLLWRNORM)) 1177 if ((wpipe->pipe_state & PIPE_EOF) || ( 1178 #ifndef PIPE_NODIRECT 1179 (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 1180 #endif 1181 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1182 revents |= events & (POLLOUT | POLLWRNORM); 1183 1184 eof |= (wpipe->pipe_state & PIPE_EOF); 1185 PIPE_UNLOCK(wpipe); 1186 } 1187 1188 if (wpipe == NULL || eof) 1189 revents |= POLLHUP; 1190 1191 if (revents == 0) { 1192 if (events & (POLLIN | POLLRDNORM)) 1193 selrecord(td, &rpipe->pipe_sel); 1194 1195 if (events & (POLLOUT | POLLWRNORM)) 1196 selrecord(td, &wpipe->pipe_sel); 1197 } 1198 1199 return (revents); 1200 } 1201 1202 static int 1203 pipe_stat(fp, ub, td) 1204 struct file *fp; 1205 struct stat *ub; 1206 struct proc *td; 1207 { 1208 struct pipe *pipe = (struct pipe *)fp->f_data; 1209 1210 memset((caddr_t)ub, 0, sizeof(*ub)); 1211 ub->st_mode = S_IFIFO | S_IRUSR | S_IWUSR; 1212 ub->st_blksize = pipe->pipe_buffer.size; 1213 ub->st_size = pipe->pipe_buffer.cnt; 1214 ub->st_blocks = (ub->st_size) ? 1 : 0; 1215 TIMEVAL_TO_TIMESPEC(&pipe->pipe_atime, &ub->st_atimespec) 1216 TIMEVAL_TO_TIMESPEC(&pipe->pipe_mtime, &ub->st_mtimespec); 1217 TIMEVAL_TO_TIMESPEC(&pipe->pipe_ctime, &ub->st_ctimespec); 1218 ub->st_uid = fp->f_cred->cr_uid; 1219 ub->st_gid = fp->f_cred->cr_gid; 1220 /* 1221 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 1222 * XXX (st_dev, st_ino) should be unique. 1223 */ 1224 return (0); 1225 } 1226 1227 /* ARGSUSED */ 1228 static int 1229 pipe_close(fp, td) 1230 struct file *fp; 1231 struct proc *td; 1232 { 1233 struct pipe *pipe = (struct pipe *)fp->f_data; 1234 1235 fp->f_data = NULL; 1236 pipeclose(pipe); 1237 return (0); 1238 } 1239 1240 static void 1241 pipe_free_kmem(pipe) 1242 struct pipe *pipe; 1243 { 1244 1245 if (pipe->pipe_buffer.buffer != NULL) { 1246 if (pipe->pipe_buffer.size > PIPE_SIZE) 1247 --nbigpipe; 1248 amountpipekva -= pipe->pipe_buffer.size; 1249 uvm_km_free(kernel_map, 1250 (vaddr_t)pipe->pipe_buffer.buffer, 1251 pipe->pipe_buffer.size); 1252 pipe->pipe_buffer.buffer = NULL; 1253 } 1254 #ifndef PIPE_NODIRECT 1255 if (pipe->pipe_map.kva != 0) { 1256 pipe_loan_free(pipe); 1257 pipe->pipe_map.cnt = 0; 1258 pipe->pipe_map.kva = 0; 1259 pipe->pipe_map.pos = 0; 1260 pipe->pipe_map.npages = 0; 1261 } 1262 #endif /* !PIPE_NODIRECT */ 1263 } 1264 1265 /* 1266 * shutdown the pipe 1267 */ 1268 static void 1269 pipeclose(pipe) 1270 struct pipe *pipe; 1271 { 1272 struct pipe *ppipe; 1273 1274 if (pipe == NULL) 1275 return; 1276 1277 retry: 1278 PIPE_LOCK(pipe); 1279 1280 pipeselwakeup(pipe, pipe); 1281 1282 /* 1283 * If the other side is blocked, wake it up saying that 1284 * we want to close it down. 1285 */ 1286 while (pipe->pipe_busy) { 1287 wakeup(pipe); 1288 pipe->pipe_state |= PIPE_WANTCLOSE | PIPE_EOF; 1289 ltsleep(pipe, PRIBIO, "pipecl", 0, &pipe->pipe_slock); 1290 } 1291 1292 /* 1293 * Disconnect from peer 1294 */ 1295 if ((ppipe = pipe->pipe_peer) != NULL) { 1296 /* Deal with race for peer */ 1297 if (simple_lock_try(&ppipe->pipe_slock) == 0) { 1298 PIPE_UNLOCK(pipe); 1299 goto retry; 1300 } 1301 pipeselwakeup(ppipe, ppipe); 1302 1303 ppipe->pipe_state |= PIPE_EOF; 1304 wakeup(ppipe); 1305 ppipe->pipe_peer = NULL; 1306 PIPE_UNLOCK(ppipe); 1307 } 1308 1309 (void)lockmgr(&pipe->pipe_lock, LK_DRAIN | LK_INTERLOCK, 1310 &pipe->pipe_slock); 1311 1312 /* 1313 * free resources 1314 */ 1315 pipe_free_kmem(pipe); 1316 pool_put(&pipe_pool, pipe); 1317 } 1318 1319 static void 1320 filt_pipedetach(struct knote *kn) 1321 { 1322 struct pipe *pipe = (struct pipe *)kn->kn_fp->f_data; 1323 1324 switch(kn->kn_filter) { 1325 case EVFILT_WRITE: 1326 /* need the peer structure, not our own */ 1327 pipe = pipe->pipe_peer; 1328 /* XXXSMP: race for peer */ 1329 1330 /* if reader end already closed, just return */ 1331 if (pipe == NULL) 1332 return; 1333 1334 break; 1335 default: 1336 /* nothing to do */ 1337 break; 1338 } 1339 1340 #ifdef DIAGNOSTIC 1341 if (kn->kn_hook != pipe) 1342 panic("filt_pipedetach: inconsistent knote"); 1343 #endif 1344 1345 PIPE_LOCK(pipe); 1346 SLIST_REMOVE(&pipe->pipe_sel.sel_klist, kn, knote, kn_selnext); 1347 PIPE_UNLOCK(pipe); 1348 } 1349 1350 /*ARGSUSED*/ 1351 static int 1352 filt_piperead(struct knote *kn, long hint) 1353 { 1354 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1355 struct pipe *wpipe = rpipe->pipe_peer; 1356 1357 PIPE_LOCK(rpipe); 1358 kn->kn_data = rpipe->pipe_buffer.cnt; 1359 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 1360 kn->kn_data = rpipe->pipe_map.cnt; 1361 1362 /* XXXSMP: race for peer */ 1363 if ((rpipe->pipe_state & PIPE_EOF) || 1364 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1365 kn->kn_flags |= EV_EOF; 1366 PIPE_UNLOCK(rpipe); 1367 return (1); 1368 } 1369 PIPE_UNLOCK(rpipe); 1370 return (kn->kn_data > 0); 1371 } 1372 1373 /*ARGSUSED*/ 1374 static int 1375 filt_pipewrite(struct knote *kn, long hint) 1376 { 1377 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1378 struct pipe *wpipe = rpipe->pipe_peer; 1379 1380 PIPE_LOCK(rpipe); 1381 /* XXXSMP: race for peer */ 1382 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1383 kn->kn_data = 0; 1384 kn->kn_flags |= EV_EOF; 1385 PIPE_UNLOCK(rpipe); 1386 return (1); 1387 } 1388 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1389 if (wpipe->pipe_state & PIPE_DIRECTW) 1390 kn->kn_data = 0; 1391 1392 PIPE_UNLOCK(rpipe); 1393 return (kn->kn_data >= PIPE_BUF); 1394 } 1395 1396 static const struct filterops pipe_rfiltops = 1397 { 1, NULL, filt_pipedetach, filt_piperead }; 1398 static const struct filterops pipe_wfiltops = 1399 { 1, NULL, filt_pipedetach, filt_pipewrite }; 1400 1401 /*ARGSUSED*/ 1402 static int 1403 pipe_kqfilter(struct file *fp, struct knote *kn) 1404 { 1405 struct pipe *pipe; 1406 1407 pipe = (struct pipe *)kn->kn_fp->f_data; 1408 switch (kn->kn_filter) { 1409 case EVFILT_READ: 1410 kn->kn_fop = &pipe_rfiltops; 1411 break; 1412 case EVFILT_WRITE: 1413 kn->kn_fop = &pipe_wfiltops; 1414 /* XXXSMP: race for peer */ 1415 pipe = pipe->pipe_peer; 1416 if (pipe == NULL) { 1417 /* other end of pipe has been closed */ 1418 return (EBADF); 1419 } 1420 break; 1421 default: 1422 return (1); 1423 } 1424 kn->kn_hook = pipe; 1425 1426 PIPE_LOCK(pipe); 1427 SLIST_INSERT_HEAD(&pipe->pipe_sel.sel_klist, kn, kn_selnext); 1428 PIPE_UNLOCK(pipe); 1429 return (0); 1430 } 1431 1432 static int 1433 pipe_fcntl(fp, cmd, data, p) 1434 struct file *fp; 1435 u_int cmd; 1436 void *data; 1437 struct proc *p; 1438 { 1439 if (cmd == F_SETFL) 1440 return (0); 1441 else 1442 return (EOPNOTSUPP); 1443 } 1444 1445 /* 1446 * Handle pipe sysctls. 1447 */ 1448 int 1449 sysctl_dopipe(name, namelen, oldp, oldlenp, newp, newlen) 1450 int *name; 1451 u_int namelen; 1452 void *oldp; 1453 size_t *oldlenp; 1454 void *newp; 1455 size_t newlen; 1456 { 1457 /* All sysctl names at this level are terminal. */ 1458 if (namelen != 1) 1459 return (ENOTDIR); /* overloaded */ 1460 1461 switch (name[0]) { 1462 case KERN_PIPE_MAXKVASZ: 1463 return (sysctl_int(oldp, oldlenp, newp, newlen, &maxpipekva)); 1464 case KERN_PIPE_LIMITKVA: 1465 return (sysctl_int(oldp, oldlenp, newp, newlen, &limitpipekva)); 1466 case KERN_PIPE_MAXBIGPIPES: 1467 return (sysctl_int(oldp, oldlenp, newp, newlen, &maxbigpipes)); 1468 case KERN_PIPE_NBIGPIPES: 1469 return (sysctl_rdint(oldp, oldlenp, newp, nbigpipe)); 1470 case KERN_PIPE_KVASIZE: 1471 return (sysctl_rdint(oldp, oldlenp, newp, amountpipekva)); 1472 default: 1473 return (EOPNOTSUPP); 1474 } 1475 /* NOTREACHED */ 1476 } 1477 1478 /* 1479 * Initialize pipe structs. 1480 */ 1481 void 1482 pipe_init(void) 1483 { 1484 pool_init(&pipe_pool, sizeof(struct pipe), 0, 0, 0, "pipepl", NULL); 1485 } 1486