1 /* $NetBSD: sys_pipe.c,v 1.40 2003/06/29 22:31:26 fvdl Exp $ */ 2 3 /*- 4 * Copyright (c) 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Paul Kranenburg. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1996 John S. Dyson 41 * All rights reserved. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice immediately at the beginning of the file, without modification, 48 * this list of conditions, and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Absolutely no warranty of function or purpose is made by the author 53 * John S. Dyson. 54 * 4. Modifications may be freely made to this file if the above conditions 55 * are met. 56 * 57 * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.95 2002/03/09 22:06:31 alfred Exp $ 58 */ 59 60 /* 61 * This file contains a high-performance replacement for the socket-based 62 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 63 * all features of sockets, but does do everything that pipes normally 64 * do. 65 * 66 * Adaption for NetBSD UVM, including uvm_loan() based direct write, was 67 * written by Jaromir Dolecek. 68 */ 69 70 /* 71 * This code has two modes of operation, a small write mode and a large 72 * write mode. The small write mode acts like conventional pipes with 73 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 74 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 75 * and PIPE_SIZE in size it is mapped read-only into the kernel address space 76 * using the UVM page loan facility from where the receiving process can copy 77 * the data directly from the pages in the sending process. 78 * 79 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 80 * happen for small transfers so that the system will not spend all of 81 * its time context switching. PIPE_SIZE is constrained by the 82 * amount of kernel virtual memory. 83 */ 84 85 #include <sys/cdefs.h> 86 __KERNEL_RCSID(0, "$NetBSD: sys_pipe.c,v 1.40 2003/06/29 22:31:26 fvdl Exp $"); 87 88 #include <sys/param.h> 89 #include <sys/systm.h> 90 #include <sys/proc.h> 91 #include <sys/fcntl.h> 92 #include <sys/file.h> 93 #include <sys/filedesc.h> 94 #include <sys/filio.h> 95 #include <sys/kernel.h> 96 #include <sys/lock.h> 97 #include <sys/ttycom.h> 98 #include <sys/stat.h> 99 #include <sys/malloc.h> 100 #include <sys/poll.h> 101 #include <sys/signalvar.h> 102 #include <sys/vnode.h> 103 #include <sys/uio.h> 104 #include <sys/lock.h> 105 #include <sys/select.h> 106 #include <sys/mount.h> 107 #include <sys/sa.h> 108 #include <sys/syscallargs.h> 109 #include <uvm/uvm.h> 110 #include <sys/sysctl.h> 111 #include <sys/kernel.h> 112 113 #include <sys/pipe.h> 114 115 /* 116 * Avoid microtime(9), it's slow. We don't guard the read from time(9) 117 * with splclock(9) since we don't actually need to be THAT sure the access 118 * is atomic. 119 */ 120 #define PIPE_TIMESTAMP(tvp) (*(tvp) = time) 121 122 123 /* 124 * Use this define if you want to disable *fancy* VM things. Expect an 125 * approx 30% decrease in transfer rate. 126 */ 127 /* #define PIPE_NODIRECT */ 128 129 /* 130 * interfaces to the outside world 131 */ 132 static int pipe_read(struct file *fp, off_t *offset, struct uio *uio, 133 struct ucred *cred, int flags); 134 static int pipe_write(struct file *fp, off_t *offset, struct uio *uio, 135 struct ucred *cred, int flags); 136 static int pipe_close(struct file *fp, struct proc *p); 137 static int pipe_poll(struct file *fp, int events, struct proc *p); 138 static int pipe_fcntl(struct file *fp, u_int com, void *data, 139 struct proc *p); 140 static int pipe_kqfilter(struct file *fp, struct knote *kn); 141 static int pipe_stat(struct file *fp, struct stat *sb, struct proc *p); 142 static int pipe_ioctl(struct file *fp, u_long cmd, void *data, 143 struct proc *p); 144 145 static struct fileops pipeops = { 146 pipe_read, pipe_write, pipe_ioctl, pipe_fcntl, pipe_poll, 147 pipe_stat, pipe_close, pipe_kqfilter 148 }; 149 150 /* 151 * Default pipe buffer size(s), this can be kind-of large now because pipe 152 * space is pageable. The pipe code will try to maintain locality of 153 * reference for performance reasons, so small amounts of outstanding I/O 154 * will not wipe the cache. 155 */ 156 #define MINPIPESIZE (PIPE_SIZE/3) 157 #define MAXPIPESIZE (2*PIPE_SIZE/3) 158 159 /* 160 * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 161 * is there so that on large systems, we don't exhaust it. 162 */ 163 #define MAXPIPEKVA (8*1024*1024) 164 static int maxpipekva = MAXPIPEKVA; 165 166 /* 167 * Limit for direct transfers, we cannot, of course limit 168 * the amount of kva for pipes in general though. 169 */ 170 #define LIMITPIPEKVA (16*1024*1024) 171 static int limitpipekva = LIMITPIPEKVA; 172 173 /* 174 * Limit the number of "big" pipes 175 */ 176 #define LIMITBIGPIPES 32 177 static int maxbigpipes = LIMITBIGPIPES; 178 static int nbigpipe = 0; 179 180 /* 181 * Amount of KVA consumed by pipe buffers. 182 */ 183 static int amountpipekva = 0; 184 185 MALLOC_DEFINE(M_PIPE, "pipe", "Pipe structures"); 186 187 static void pipeclose(struct pipe *pipe); 188 static void pipe_free_kmem(struct pipe *pipe); 189 static int pipe_create(struct pipe **pipep, int allockva); 190 static int pipelock(struct pipe *pipe, int catch); 191 static __inline void pipeunlock(struct pipe *pipe); 192 static void pipeselwakeup(struct pipe *pipe, struct pipe *sigp); 193 #ifndef PIPE_NODIRECT 194 static int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 195 #endif 196 static int pipespace(struct pipe *pipe, int size); 197 198 #ifndef PIPE_NODIRECT 199 static int pipe_loan_alloc(struct pipe *, int); 200 static void pipe_loan_free(struct pipe *); 201 #endif /* PIPE_NODIRECT */ 202 203 static struct pool pipe_pool; 204 205 /* 206 * The pipe system call for the DTYPE_PIPE type of pipes 207 */ 208 209 /* ARGSUSED */ 210 int 211 sys_pipe(l, v, retval) 212 struct lwp *l; 213 void *v; 214 register_t *retval; 215 { 216 struct file *rf, *wf; 217 struct pipe *rpipe, *wpipe; 218 int fd, error; 219 struct proc *p; 220 221 p = l->l_proc; 222 rpipe = wpipe = NULL; 223 if (pipe_create(&rpipe, 1) || pipe_create(&wpipe, 0)) { 224 pipeclose(rpipe); 225 pipeclose(wpipe); 226 return (ENFILE); 227 } 228 229 /* 230 * Note: the file structure returned from falloc() is marked 231 * as 'larval' initially. Unless we mark it as 'mature' by 232 * FILE_SET_MATURE(), any attempt to do anything with it would 233 * return EBADF, including e.g. dup(2) or close(2). This avoids 234 * file descriptor races if we block in the second falloc(). 235 */ 236 237 error = falloc(p, &rf, &fd); 238 if (error) 239 goto free2; 240 retval[0] = fd; 241 rf->f_flag = FREAD; 242 rf->f_type = DTYPE_PIPE; 243 rf->f_data = (caddr_t)rpipe; 244 rf->f_ops = &pipeops; 245 246 error = falloc(p, &wf, &fd); 247 if (error) 248 goto free3; 249 retval[1] = fd; 250 wf->f_flag = FWRITE; 251 wf->f_type = DTYPE_PIPE; 252 wf->f_data = (caddr_t)wpipe; 253 wf->f_ops = &pipeops; 254 255 rpipe->pipe_peer = wpipe; 256 wpipe->pipe_peer = rpipe; 257 258 FILE_SET_MATURE(rf); 259 FILE_SET_MATURE(wf); 260 FILE_UNUSE(rf, p); 261 FILE_UNUSE(wf, p); 262 return (0); 263 free3: 264 FILE_UNUSE(rf, p); 265 ffree(rf); 266 fdremove(p->p_fd, retval[0]); 267 free2: 268 pipeclose(wpipe); 269 pipeclose(rpipe); 270 271 return (error); 272 } 273 274 /* 275 * Allocate kva for pipe circular buffer, the space is pageable 276 * This routine will 'realloc' the size of a pipe safely, if it fails 277 * it will retain the old buffer. 278 * If it fails it will return ENOMEM. 279 */ 280 static int 281 pipespace(pipe, size) 282 struct pipe *pipe; 283 int size; 284 { 285 caddr_t buffer; 286 /* 287 * Allocate pageable virtual address space. Physical memory is 288 * allocated on demand. 289 */ 290 buffer = (caddr_t) uvm_km_valloc(kernel_map, round_page(size)); 291 if (buffer == NULL) 292 return (ENOMEM); 293 294 /* free old resources if we're resizing */ 295 pipe_free_kmem(pipe); 296 pipe->pipe_buffer.buffer = buffer; 297 pipe->pipe_buffer.size = size; 298 pipe->pipe_buffer.in = 0; 299 pipe->pipe_buffer.out = 0; 300 pipe->pipe_buffer.cnt = 0; 301 amountpipekva += pipe->pipe_buffer.size; 302 return (0); 303 } 304 305 /* 306 * Initialize and allocate VM and memory for pipe. 307 */ 308 static int 309 pipe_create(pipep, allockva) 310 struct pipe **pipep; 311 int allockva; 312 { 313 struct pipe *pipe; 314 int error; 315 316 pipe = pool_get(&pipe_pool, M_WAITOK); 317 if (pipe == NULL) 318 return (ENOMEM); 319 320 /* Initialize */ 321 memset(pipe, 0, sizeof(struct pipe)); 322 pipe->pipe_state = PIPE_SIGNALR; 323 324 if (allockva && (error = pipespace(pipe, PIPE_SIZE))) 325 return (error); 326 327 PIPE_TIMESTAMP(&pipe->pipe_ctime); 328 pipe->pipe_atime = pipe->pipe_ctime; 329 pipe->pipe_mtime = pipe->pipe_ctime; 330 simple_lock_init(&pipe->pipe_slock); 331 lockinit(&pipe->pipe_lock, PRIBIO | PCATCH, "pipelk", 0, 0); 332 333 *pipep = pipe; 334 return (0); 335 } 336 337 338 /* 339 * Lock a pipe for I/O, blocking other access 340 * Called with pipe spin lock held. 341 * Return with pipe spin lock released on success. 342 */ 343 static int 344 pipelock(pipe, catch) 345 struct pipe *pipe; 346 int catch; 347 { 348 int error; 349 350 LOCK_ASSERT(simple_lock_held(&pipe->pipe_slock)); 351 352 while (1) { 353 error = lockmgr(&pipe->pipe_lock, LK_EXCLUSIVE | LK_INTERLOCK, 354 &pipe->pipe_slock); 355 if (error == 0) 356 break; 357 358 simple_lock(&pipe->pipe_slock); 359 if (catch || (error != EINTR && error != ERESTART)) 360 break; 361 } 362 return (error); 363 } 364 365 /* 366 * unlock a pipe I/O lock 367 */ 368 static __inline void 369 pipeunlock(pipe) 370 struct pipe *pipe; 371 { 372 373 lockmgr(&pipe->pipe_lock, LK_RELEASE, NULL); 374 } 375 376 /* 377 * Select/poll wakup. This also sends SIGIO to peer connected to 378 * 'sigpipe' side of pipe. 379 */ 380 static void 381 pipeselwakeup(selp, sigp) 382 struct pipe *selp, *sigp; 383 { 384 struct proc *p; 385 pid_t pid; 386 387 selnotify(&selp->pipe_sel, 0); 388 if (sigp == NULL || (sigp->pipe_state & PIPE_ASYNC) == 0) 389 return; 390 391 pid = sigp->pipe_pgid; 392 if (pid == 0) 393 return; 394 395 if (pid > 0) 396 gsignal(pid, SIGIO); 397 else if ((p = pfind(-pid)) != NULL) 398 psignal(p, SIGIO); 399 } 400 401 /* ARGSUSED */ 402 static int 403 pipe_read(fp, offset, uio, cred, flags) 404 struct file *fp; 405 off_t *offset; 406 struct uio *uio; 407 struct ucred *cred; 408 int flags; 409 { 410 struct pipe *rpipe = (struct pipe *) fp->f_data; 411 struct pipebuf *bp = &rpipe->pipe_buffer; 412 int error; 413 size_t nread = 0; 414 size_t size; 415 size_t ocnt; 416 417 PIPE_LOCK(rpipe); 418 ++rpipe->pipe_busy; 419 ocnt = bp->cnt; 420 421 again: 422 error = pipelock(rpipe, 1); 423 if (error) 424 goto unlocked_error; 425 426 while (uio->uio_resid) { 427 /* 428 * normal pipe buffer receive 429 */ 430 if (bp->cnt > 0) { 431 size = bp->size - bp->out; 432 if (size > bp->cnt) 433 size = bp->cnt; 434 if (size > uio->uio_resid) 435 size = uio->uio_resid; 436 437 error = uiomove(&bp->buffer[bp->out], size, uio); 438 if (error) 439 break; 440 441 bp->out += size; 442 if (bp->out >= bp->size) 443 bp->out = 0; 444 445 bp->cnt -= size; 446 447 /* 448 * If there is no more to read in the pipe, reset 449 * its pointers to the beginning. This improves 450 * cache hit stats. 451 */ 452 if (bp->cnt == 0) { 453 bp->in = 0; 454 bp->out = 0; 455 } 456 nread += size; 457 #ifndef PIPE_NODIRECT 458 } else if ((rpipe->pipe_state & PIPE_DIRECTR) != 0) { 459 /* 460 * Direct copy, bypassing a kernel buffer. 461 */ 462 caddr_t va; 463 464 KASSERT(rpipe->pipe_state & PIPE_DIRECTW); 465 466 size = rpipe->pipe_map.cnt; 467 if (size > uio->uio_resid) 468 size = uio->uio_resid; 469 470 va = (caddr_t) rpipe->pipe_map.kva + 471 rpipe->pipe_map.pos; 472 error = uiomove(va, size, uio); 473 if (error) 474 break; 475 nread += size; 476 rpipe->pipe_map.pos += size; 477 rpipe->pipe_map.cnt -= size; 478 if (rpipe->pipe_map.cnt == 0) { 479 PIPE_LOCK(rpipe); 480 rpipe->pipe_state &= ~PIPE_DIRECTR; 481 wakeup(rpipe); 482 PIPE_UNLOCK(rpipe); 483 } 484 #endif 485 } else { 486 /* 487 * Break if some data was read. 488 */ 489 if (nread > 0) 490 break; 491 492 PIPE_LOCK(rpipe); 493 494 /* 495 * detect EOF condition 496 * read returns 0 on EOF, no need to set error 497 */ 498 if (rpipe->pipe_state & PIPE_EOF) { 499 PIPE_UNLOCK(rpipe); 500 break; 501 } 502 503 /* 504 * don't block on non-blocking I/O 505 */ 506 if (fp->f_flag & FNONBLOCK) { 507 PIPE_UNLOCK(rpipe); 508 error = EAGAIN; 509 break; 510 } 511 512 /* 513 * Unlock the pipe buffer for our remaining processing. 514 * We will either break out with an error or we will 515 * sleep and relock to loop. 516 */ 517 pipeunlock(rpipe); 518 519 /* 520 * The PIPE_DIRECTR flag is not under the control 521 * of the long-term lock (see pipe_direct_write()), 522 * so re-check now while holding the spin lock. 523 */ 524 if ((rpipe->pipe_state & PIPE_DIRECTR) != 0) 525 goto again; 526 527 /* 528 * We want to read more, wake up select/poll. 529 */ 530 pipeselwakeup(rpipe, rpipe->pipe_peer); 531 532 /* 533 * If the "write-side" is blocked, wake it up now. 534 */ 535 if (rpipe->pipe_state & PIPE_WANTW) { 536 rpipe->pipe_state &= ~PIPE_WANTW; 537 wakeup(rpipe); 538 } 539 540 /* Now wait until the pipe is filled */ 541 rpipe->pipe_state |= PIPE_WANTR; 542 error = ltsleep(rpipe, PRIBIO | PCATCH, 543 "piperd", 0, &rpipe->pipe_slock); 544 if (error != 0) 545 goto unlocked_error; 546 goto again; 547 } 548 } 549 550 if (error == 0) 551 PIPE_TIMESTAMP(&rpipe->pipe_atime); 552 553 PIPE_LOCK(rpipe); 554 pipeunlock(rpipe); 555 556 unlocked_error: 557 --rpipe->pipe_busy; 558 559 /* 560 * PIPE_WANTCLOSE processing only makes sense if pipe_busy is 0. 561 */ 562 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANTCLOSE)) { 563 rpipe->pipe_state &= ~(PIPE_WANTCLOSE|PIPE_WANTW); 564 wakeup(rpipe); 565 } else if (bp->cnt < MINPIPESIZE) { 566 /* 567 * Handle write blocking hysteresis. 568 */ 569 if (rpipe->pipe_state & PIPE_WANTW) { 570 rpipe->pipe_state &= ~PIPE_WANTW; 571 wakeup(rpipe); 572 } 573 } 574 575 /* 576 * If anything was read off the buffer, signal to the writer it's 577 * possible to write more data. Also send signal if we are here for the 578 * first time after last write. 579 */ 580 if ((bp->size - bp->cnt) >= PIPE_BUF 581 && (ocnt != bp->cnt || (rpipe->pipe_state & PIPE_SIGNALR))) { 582 pipeselwakeup(rpipe, rpipe->pipe_peer); 583 rpipe->pipe_state &= ~PIPE_SIGNALR; 584 } 585 586 PIPE_UNLOCK(rpipe); 587 return (error); 588 } 589 590 #ifndef PIPE_NODIRECT 591 /* 592 * Allocate structure for loan transfer. 593 */ 594 static int 595 pipe_loan_alloc(wpipe, npages) 596 struct pipe *wpipe; 597 int npages; 598 { 599 vsize_t len; 600 601 len = (vsize_t)npages << PAGE_SHIFT; 602 wpipe->pipe_map.kva = uvm_km_valloc_wait(kernel_map, len); 603 if (wpipe->pipe_map.kva == 0) 604 return (ENOMEM); 605 606 amountpipekva += len; 607 wpipe->pipe_map.npages = npages; 608 wpipe->pipe_map.pgs = malloc(npages * sizeof(struct vm_page *), M_PIPE, 609 M_WAITOK); 610 return (0); 611 } 612 613 /* 614 * Free resources allocated for loan transfer. 615 */ 616 static void 617 pipe_loan_free(wpipe) 618 struct pipe *wpipe; 619 { 620 vsize_t len; 621 622 len = (vsize_t)wpipe->pipe_map.npages << PAGE_SHIFT; 623 uvm_km_free(kernel_map, wpipe->pipe_map.kva, len); 624 wpipe->pipe_map.kva = 0; 625 amountpipekva -= len; 626 free(wpipe->pipe_map.pgs, M_PIPE); 627 wpipe->pipe_map.pgs = NULL; 628 } 629 630 /* 631 * NetBSD direct write, using uvm_loan() mechanism. 632 * This implements the pipe buffer write mechanism. Note that only 633 * a direct write OR a normal pipe write can be pending at any given time. 634 * If there are any characters in the pipe buffer, the direct write will 635 * be deferred until the receiving process grabs all of the bytes from 636 * the pipe buffer. Then the direct mapping write is set-up. 637 * 638 * Called with the long-term pipe lock held. 639 */ 640 static int 641 pipe_direct_write(wpipe, uio) 642 struct pipe *wpipe; 643 struct uio *uio; 644 { 645 int error, npages, j; 646 struct vm_page **pgs; 647 vaddr_t bbase, kva, base, bend; 648 vsize_t blen, bcnt; 649 voff_t bpos; 650 651 KASSERT(wpipe->pipe_map.cnt == 0); 652 653 /* 654 * Handle first PIPE_CHUNK_SIZE bytes of buffer. Deal with buffers 655 * not aligned to PAGE_SIZE. 656 */ 657 bbase = (vaddr_t)uio->uio_iov->iov_base; 658 base = trunc_page(bbase); 659 bend = round_page(bbase + uio->uio_iov->iov_len); 660 blen = bend - base; 661 bpos = bbase - base; 662 663 if (blen > PIPE_DIRECT_CHUNK) { 664 blen = PIPE_DIRECT_CHUNK; 665 bend = base + blen; 666 bcnt = PIPE_DIRECT_CHUNK - bpos; 667 } else { 668 bcnt = uio->uio_iov->iov_len; 669 } 670 npages = blen >> PAGE_SHIFT; 671 672 /* 673 * Free the old kva if we need more pages than we have 674 * allocated. 675 */ 676 if (wpipe->pipe_map.kva != 0 && npages > wpipe->pipe_map.npages) 677 pipe_loan_free(wpipe); 678 679 /* Allocate new kva. */ 680 if (wpipe->pipe_map.kva == 0) { 681 error = pipe_loan_alloc(wpipe, npages); 682 if (error) 683 return (error); 684 } 685 686 /* Loan the write buffer memory from writer process */ 687 pgs = wpipe->pipe_map.pgs; 688 error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, base, blen, 689 pgs, UVM_LOAN_TOPAGE); 690 if (error) { 691 pipe_loan_free(wpipe); 692 return (error); 693 } 694 695 /* Enter the loaned pages to kva */ 696 kva = wpipe->pipe_map.kva; 697 for (j = 0; j < npages; j++, kva += PAGE_SIZE) { 698 pmap_kenter_pa(kva, VM_PAGE_TO_PHYS(pgs[j]), VM_PROT_READ); 699 } 700 pmap_update(pmap_kernel()); 701 702 /* Now we can put the pipe in direct write mode */ 703 wpipe->pipe_map.pos = bpos; 704 wpipe->pipe_map.cnt = bcnt; 705 wpipe->pipe_state |= PIPE_DIRECTW; 706 707 /* 708 * But before we can let someone do a direct read, 709 * we have to wait until the pipe is drained. 710 */ 711 712 /* Relase the pipe lock while we wait */ 713 PIPE_LOCK(wpipe); 714 pipeunlock(wpipe); 715 716 while (error == 0 && wpipe->pipe_buffer.cnt > 0) { 717 if (wpipe->pipe_state & PIPE_WANTR) { 718 wpipe->pipe_state &= ~PIPE_WANTR; 719 wakeup(wpipe); 720 } 721 722 wpipe->pipe_state |= PIPE_WANTW; 723 error = ltsleep(wpipe, PRIBIO | PCATCH, "pipdwc", 0, 724 &wpipe->pipe_slock); 725 if (error == 0 && wpipe->pipe_state & PIPE_EOF) 726 error = EPIPE; 727 } 728 729 /* Pipe is drained; next read will off the direct buffer */ 730 wpipe->pipe_state |= PIPE_DIRECTR; 731 732 /* Wait until the reader is done */ 733 while (error == 0 && (wpipe->pipe_state & PIPE_DIRECTR)) { 734 if (wpipe->pipe_state & PIPE_WANTR) { 735 wpipe->pipe_state &= ~PIPE_WANTR; 736 wakeup(wpipe); 737 } 738 pipeselwakeup(wpipe, wpipe); 739 error = ltsleep(wpipe, PRIBIO | PCATCH, "pipdwt", 0, 740 &wpipe->pipe_slock); 741 if (error == 0 && wpipe->pipe_state & PIPE_EOF) 742 error = EPIPE; 743 } 744 745 /* Take pipe out of direct write mode */ 746 wpipe->pipe_state &= ~(PIPE_DIRECTW | PIPE_DIRECTR); 747 748 /* Acquire the pipe lock and cleanup */ 749 (void)pipelock(wpipe, 0); 750 if (pgs != NULL) { 751 pmap_kremove(wpipe->pipe_map.kva, blen); 752 uvm_unloan(pgs, npages, UVM_LOAN_TOPAGE); 753 } 754 if (error || amountpipekva > maxpipekva) 755 pipe_loan_free(wpipe); 756 757 if (error) { 758 pipeselwakeup(wpipe, wpipe); 759 760 /* 761 * If nothing was read from what we offered, return error 762 * straight on. Otherwise update uio resid first. Caller 763 * will deal with the error condition, returning short 764 * write, error, or restarting the write(2) as appropriate. 765 */ 766 if (wpipe->pipe_map.cnt == bcnt) { 767 wpipe->pipe_map.cnt = 0; 768 wakeup(wpipe); 769 return (error); 770 } 771 772 bcnt -= wpipe->pipe_map.cnt; 773 } 774 775 uio->uio_resid -= bcnt; 776 /* uio_offset not updated, not set/used for write(2) */ 777 uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + bcnt; 778 uio->uio_iov->iov_len -= bcnt; 779 if (uio->uio_iov->iov_len == 0) { 780 uio->uio_iov++; 781 uio->uio_iovcnt--; 782 } 783 784 wpipe->pipe_map.cnt = 0; 785 return (error); 786 } 787 #endif /* !PIPE_NODIRECT */ 788 789 static int 790 pipe_write(fp, offset, uio, cred, flags) 791 struct file *fp; 792 off_t *offset; 793 struct uio *uio; 794 struct ucred *cred; 795 int flags; 796 { 797 struct pipe *wpipe, *rpipe; 798 struct pipebuf *bp; 799 int error; 800 801 /* We want to write to our peer */ 802 rpipe = (struct pipe *) fp->f_data; 803 804 retry: 805 error = 0; 806 PIPE_LOCK(rpipe); 807 wpipe = rpipe->pipe_peer; 808 809 /* 810 * Detect loss of pipe read side, issue SIGPIPE if lost. 811 */ 812 if (wpipe == NULL) 813 error = EPIPE; 814 else if (simple_lock_try(&wpipe->pipe_slock) == 0) { 815 /* Deal with race for peer */ 816 PIPE_UNLOCK(rpipe); 817 goto retry; 818 } else if ((wpipe->pipe_state & PIPE_EOF) != 0) { 819 PIPE_UNLOCK(wpipe); 820 error = EPIPE; 821 } 822 823 PIPE_UNLOCK(rpipe); 824 if (error != 0) 825 return (error); 826 827 ++wpipe->pipe_busy; 828 829 /* Aquire the long-term pipe lock */ 830 if ((error = pipelock(wpipe,1)) != 0) { 831 --wpipe->pipe_busy; 832 if (wpipe->pipe_busy == 0 833 && (wpipe->pipe_state & PIPE_WANTCLOSE)) { 834 wpipe->pipe_state &= ~(PIPE_WANTCLOSE | PIPE_WANTR); 835 wakeup(wpipe); 836 } 837 PIPE_UNLOCK(wpipe); 838 return (error); 839 } 840 841 bp = &wpipe->pipe_buffer; 842 843 /* 844 * If it is advantageous to resize the pipe buffer, do so. 845 */ 846 if ((uio->uio_resid > PIPE_SIZE) && 847 (nbigpipe < maxbigpipes) && 848 #ifndef PIPE_NODIRECT 849 (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 850 #endif 851 (bp->size <= PIPE_SIZE) && (bp->cnt == 0)) { 852 853 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 854 nbigpipe++; 855 } 856 857 while (uio->uio_resid) { 858 size_t space; 859 860 #ifndef PIPE_NODIRECT 861 /* 862 * Pipe buffered writes cannot be coincidental with 863 * direct writes. Also, only one direct write can be 864 * in progress at any one time. We wait until the currently 865 * executing direct write is completed before continuing. 866 * 867 * We break out if a signal occurs or the reader goes away. 868 */ 869 while (error == 0 && wpipe->pipe_state & PIPE_DIRECTW) { 870 PIPE_LOCK(wpipe); 871 if (wpipe->pipe_state & PIPE_WANTR) { 872 wpipe->pipe_state &= ~PIPE_WANTR; 873 wakeup(wpipe); 874 } 875 pipeunlock(wpipe); 876 error = ltsleep(wpipe, PRIBIO | PCATCH, 877 "pipbww", 0, &wpipe->pipe_slock); 878 879 (void)pipelock(wpipe, 0); 880 if (wpipe->pipe_state & PIPE_EOF) 881 error = EPIPE; 882 } 883 if (error) 884 break; 885 886 /* 887 * If the transfer is large, we can gain performance if 888 * we do process-to-process copies directly. 889 * If the write is non-blocking, we don't use the 890 * direct write mechanism. 891 * 892 * The direct write mechanism will detect the reader going 893 * away on us. 894 */ 895 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 896 (fp->f_flag & FNONBLOCK) == 0 && 897 (wpipe->pipe_map.kva || (amountpipekva < limitpipekva))) { 898 error = pipe_direct_write(wpipe, uio); 899 900 /* 901 * Break out if error occured, unless it's ENOMEM. 902 * ENOMEM means we failed to allocate some resources 903 * for direct write, so we just fallback to ordinary 904 * write. If the direct write was successful, 905 * process rest of data via ordinary write. 906 */ 907 if (error == 0) 908 continue; 909 910 if (error != ENOMEM) 911 break; 912 } 913 #endif /* PIPE_NODIRECT */ 914 915 space = bp->size - bp->cnt; 916 917 /* Writes of size <= PIPE_BUF must be atomic. */ 918 if ((space < uio->uio_resid) && (uio->uio_resid <= PIPE_BUF)) 919 space = 0; 920 921 if (space > 0) { 922 int size; /* Transfer size */ 923 int segsize; /* first segment to transfer */ 924 925 /* 926 * Transfer size is minimum of uio transfer 927 * and free space in pipe buffer. 928 */ 929 if (space > uio->uio_resid) 930 size = uio->uio_resid; 931 else 932 size = space; 933 /* 934 * First segment to transfer is minimum of 935 * transfer size and contiguous space in 936 * pipe buffer. If first segment to transfer 937 * is less than the transfer size, we've got 938 * a wraparound in the buffer. 939 */ 940 segsize = bp->size - bp->in; 941 if (segsize > size) 942 segsize = size; 943 944 /* Transfer first segment */ 945 error = uiomove(&bp->buffer[bp->in], segsize, uio); 946 947 if (error == 0 && segsize < size) { 948 /* 949 * Transfer remaining part now, to 950 * support atomic writes. Wraparound 951 * happened. 952 */ 953 #ifdef DEBUG 954 if (bp->in + segsize != bp->size) 955 panic("Expected pipe buffer wraparound disappeared"); 956 #endif 957 958 error = uiomove(&bp->buffer[0], 959 size - segsize, uio); 960 } 961 if (error) 962 break; 963 964 bp->in += size; 965 if (bp->in >= bp->size) { 966 #ifdef DEBUG 967 if (bp->in != size - segsize + bp->size) 968 panic("Expected wraparound bad"); 969 #endif 970 bp->in = size - segsize; 971 } 972 973 bp->cnt += size; 974 #ifdef DEBUG 975 if (bp->cnt > bp->size) 976 panic("Pipe buffer overflow"); 977 #endif 978 } else { 979 /* 980 * If the "read-side" has been blocked, wake it up now. 981 */ 982 PIPE_LOCK(wpipe); 983 if (wpipe->pipe_state & PIPE_WANTR) { 984 wpipe->pipe_state &= ~PIPE_WANTR; 985 wakeup(wpipe); 986 } 987 PIPE_UNLOCK(wpipe); 988 989 /* 990 * don't block on non-blocking I/O 991 */ 992 if (fp->f_flag & FNONBLOCK) { 993 error = EAGAIN; 994 break; 995 } 996 997 /* 998 * We have no more space and have something to offer, 999 * wake up select/poll. 1000 */ 1001 if (bp->cnt) 1002 pipeselwakeup(wpipe, wpipe); 1003 1004 PIPE_LOCK(wpipe); 1005 pipeunlock(wpipe); 1006 wpipe->pipe_state |= PIPE_WANTW; 1007 error = ltsleep(wpipe, PRIBIO | PCATCH, "pipewr", 0, 1008 &wpipe->pipe_slock); 1009 (void)pipelock(wpipe, 0); 1010 if (error != 0) 1011 break; 1012 /* 1013 * If read side wants to go away, we just issue a signal 1014 * to ourselves. 1015 */ 1016 if (wpipe->pipe_state & PIPE_EOF) { 1017 error = EPIPE; 1018 break; 1019 } 1020 } 1021 } 1022 1023 PIPE_LOCK(wpipe); 1024 --wpipe->pipe_busy; 1025 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANTCLOSE)) { 1026 wpipe->pipe_state &= ~(PIPE_WANTCLOSE | PIPE_WANTR); 1027 wakeup(wpipe); 1028 } else if (bp->cnt > 0) { 1029 /* 1030 * If we have put any characters in the buffer, we wake up 1031 * the reader. 1032 */ 1033 if (wpipe->pipe_state & PIPE_WANTR) { 1034 wpipe->pipe_state &= ~PIPE_WANTR; 1035 wakeup(wpipe); 1036 } 1037 } 1038 1039 /* 1040 * Don't return EPIPE if I/O was successful 1041 */ 1042 if (error == EPIPE && bp->cnt == 0 && uio->uio_resid == 0) 1043 error = 0; 1044 1045 if (error == 0) 1046 PIPE_TIMESTAMP(&wpipe->pipe_mtime); 1047 1048 /* 1049 * We have something to offer, wake up select/poll. 1050 * wpipe->pipe_map.cnt is always 0 in this point (direct write 1051 * is only done synchronously), so check only wpipe->pipe_buffer.cnt 1052 */ 1053 if (bp->cnt) 1054 pipeselwakeup(wpipe, wpipe); 1055 1056 /* 1057 * Arrange for next read(2) to do a signal. 1058 */ 1059 wpipe->pipe_state |= PIPE_SIGNALR; 1060 1061 pipeunlock(wpipe); 1062 PIPE_UNLOCK(wpipe); 1063 return (error); 1064 } 1065 1066 /* 1067 * we implement a very minimal set of ioctls for compatibility with sockets. 1068 */ 1069 int 1070 pipe_ioctl(fp, cmd, data, p) 1071 struct file *fp; 1072 u_long cmd; 1073 void *data; 1074 struct proc *p; 1075 { 1076 struct pipe *pipe = (struct pipe *)fp->f_data; 1077 pid_t pgid; 1078 int error; 1079 1080 switch (cmd) { 1081 1082 case FIONBIO: 1083 return (0); 1084 1085 case FIOASYNC: 1086 PIPE_LOCK(pipe); 1087 if (*(int *)data) { 1088 pipe->pipe_state |= PIPE_ASYNC; 1089 } else { 1090 pipe->pipe_state &= ~PIPE_ASYNC; 1091 } 1092 PIPE_UNLOCK(pipe); 1093 return (0); 1094 1095 case FIONREAD: 1096 PIPE_LOCK(pipe); 1097 #ifndef PIPE_NODIRECT 1098 if (pipe->pipe_state & PIPE_DIRECTW) 1099 *(int *)data = pipe->pipe_map.cnt; 1100 else 1101 #endif 1102 *(int *)data = pipe->pipe_buffer.cnt; 1103 PIPE_UNLOCK(pipe); 1104 return (0); 1105 1106 case TIOCSPGRP: 1107 pgid = *(int *)data; 1108 if (pgid != 0) { 1109 error = pgid_in_session(p, pgid); 1110 if (error) 1111 return error; 1112 } 1113 pipe->pipe_pgid = pgid; 1114 return (0); 1115 1116 case TIOCGPGRP: 1117 *(int *)data = pipe->pipe_pgid; 1118 return (0); 1119 1120 } 1121 return (EPASSTHROUGH); 1122 } 1123 1124 int 1125 pipe_poll(fp, events, td) 1126 struct file *fp; 1127 int events; 1128 struct proc *td; 1129 { 1130 struct pipe *rpipe = (struct pipe *)fp->f_data; 1131 struct pipe *wpipe; 1132 int eof = 0; 1133 int revents = 0; 1134 1135 retry: 1136 PIPE_LOCK(rpipe); 1137 wpipe = rpipe->pipe_peer; 1138 if (wpipe != NULL && simple_lock_try(&wpipe->pipe_slock) == 0) { 1139 /* Deal with race for peer */ 1140 PIPE_UNLOCK(rpipe); 1141 goto retry; 1142 } 1143 1144 if (events & (POLLIN | POLLRDNORM)) 1145 if ((rpipe->pipe_buffer.cnt > 0) || 1146 #ifndef PIPE_NODIRECT 1147 (rpipe->pipe_state & PIPE_DIRECTR) || 1148 #endif 1149 (rpipe->pipe_state & PIPE_EOF)) 1150 revents |= events & (POLLIN | POLLRDNORM); 1151 1152 eof |= (rpipe->pipe_state & PIPE_EOF); 1153 PIPE_UNLOCK(rpipe); 1154 1155 if (wpipe == NULL) 1156 revents |= events & (POLLOUT | POLLWRNORM); 1157 else { 1158 if (events & (POLLOUT | POLLWRNORM)) 1159 if ((wpipe->pipe_state & PIPE_EOF) || ( 1160 #ifndef PIPE_NODIRECT 1161 (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 1162 #endif 1163 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1164 revents |= events & (POLLOUT | POLLWRNORM); 1165 1166 eof |= (wpipe->pipe_state & PIPE_EOF); 1167 PIPE_UNLOCK(wpipe); 1168 } 1169 1170 if (wpipe == NULL || eof) 1171 revents |= POLLHUP; 1172 1173 if (revents == 0) { 1174 if (events & (POLLIN | POLLRDNORM)) 1175 selrecord(td, &rpipe->pipe_sel); 1176 1177 if (events & (POLLOUT | POLLWRNORM)) 1178 selrecord(td, &wpipe->pipe_sel); 1179 } 1180 1181 return (revents); 1182 } 1183 1184 static int 1185 pipe_stat(fp, ub, td) 1186 struct file *fp; 1187 struct stat *ub; 1188 struct proc *td; 1189 { 1190 struct pipe *pipe = (struct pipe *)fp->f_data; 1191 1192 memset((caddr_t)ub, 0, sizeof(*ub)); 1193 ub->st_mode = S_IFIFO | S_IRUSR | S_IWUSR; 1194 ub->st_blksize = pipe->pipe_buffer.size; 1195 ub->st_size = pipe->pipe_buffer.cnt; 1196 ub->st_blocks = (ub->st_size) ? 1 : 0; 1197 TIMEVAL_TO_TIMESPEC(&pipe->pipe_atime, &ub->st_atimespec) 1198 TIMEVAL_TO_TIMESPEC(&pipe->pipe_mtime, &ub->st_mtimespec); 1199 TIMEVAL_TO_TIMESPEC(&pipe->pipe_ctime, &ub->st_ctimespec); 1200 ub->st_uid = fp->f_cred->cr_uid; 1201 ub->st_gid = fp->f_cred->cr_gid; 1202 /* 1203 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 1204 * XXX (st_dev, st_ino) should be unique. 1205 */ 1206 return (0); 1207 } 1208 1209 /* ARGSUSED */ 1210 static int 1211 pipe_close(fp, td) 1212 struct file *fp; 1213 struct proc *td; 1214 { 1215 struct pipe *pipe = (struct pipe *)fp->f_data; 1216 1217 fp->f_data = NULL; 1218 pipeclose(pipe); 1219 return (0); 1220 } 1221 1222 static void 1223 pipe_free_kmem(pipe) 1224 struct pipe *pipe; 1225 { 1226 1227 if (pipe->pipe_buffer.buffer != NULL) { 1228 if (pipe->pipe_buffer.size > PIPE_SIZE) 1229 --nbigpipe; 1230 amountpipekva -= pipe->pipe_buffer.size; 1231 uvm_km_free(kernel_map, 1232 (vaddr_t)pipe->pipe_buffer.buffer, 1233 pipe->pipe_buffer.size); 1234 pipe->pipe_buffer.buffer = NULL; 1235 } 1236 #ifndef PIPE_NODIRECT 1237 if (pipe->pipe_map.kva != 0) { 1238 pipe_loan_free(pipe); 1239 pipe->pipe_map.cnt = 0; 1240 pipe->pipe_map.kva = 0; 1241 pipe->pipe_map.pos = 0; 1242 pipe->pipe_map.npages = 0; 1243 } 1244 #endif /* !PIPE_NODIRECT */ 1245 } 1246 1247 /* 1248 * shutdown the pipe 1249 */ 1250 static void 1251 pipeclose(pipe) 1252 struct pipe *pipe; 1253 { 1254 struct pipe *ppipe; 1255 1256 if (pipe == NULL) 1257 return; 1258 1259 retry: 1260 PIPE_LOCK(pipe); 1261 1262 pipeselwakeup(pipe, pipe); 1263 1264 /* 1265 * If the other side is blocked, wake it up saying that 1266 * we want to close it down. 1267 */ 1268 while (pipe->pipe_busy) { 1269 wakeup(pipe); 1270 pipe->pipe_state |= PIPE_WANTCLOSE | PIPE_EOF; 1271 ltsleep(pipe, PRIBIO, "pipecl", 0, &pipe->pipe_slock); 1272 } 1273 1274 /* 1275 * Disconnect from peer 1276 */ 1277 if ((ppipe = pipe->pipe_peer) != NULL) { 1278 /* Deal with race for peer */ 1279 if (simple_lock_try(&ppipe->pipe_slock) == 0) { 1280 PIPE_UNLOCK(pipe); 1281 goto retry; 1282 } 1283 pipeselwakeup(ppipe, ppipe); 1284 1285 ppipe->pipe_state |= PIPE_EOF; 1286 wakeup(ppipe); 1287 ppipe->pipe_peer = NULL; 1288 PIPE_UNLOCK(ppipe); 1289 } 1290 1291 (void)lockmgr(&pipe->pipe_lock, LK_DRAIN | LK_INTERLOCK, 1292 &pipe->pipe_slock); 1293 1294 /* 1295 * free resources 1296 */ 1297 pipe_free_kmem(pipe); 1298 pool_put(&pipe_pool, pipe); 1299 } 1300 1301 static void 1302 filt_pipedetach(struct knote *kn) 1303 { 1304 struct pipe *pipe = (struct pipe *)kn->kn_fp->f_data; 1305 1306 switch(kn->kn_filter) { 1307 case EVFILT_WRITE: 1308 /* need the peer structure, not our own */ 1309 pipe = pipe->pipe_peer; 1310 /* XXXSMP: race for peer */ 1311 1312 /* if reader end already closed, just return */ 1313 if (pipe == NULL) 1314 return; 1315 1316 break; 1317 default: 1318 /* nothing to do */ 1319 break; 1320 } 1321 1322 #ifdef DIAGNOSTIC 1323 if (kn->kn_hook != pipe) 1324 panic("filt_pipedetach: inconsistent knote"); 1325 #endif 1326 1327 PIPE_LOCK(pipe); 1328 SLIST_REMOVE(&pipe->pipe_sel.sel_klist, kn, knote, kn_selnext); 1329 PIPE_UNLOCK(pipe); 1330 } 1331 1332 /*ARGSUSED*/ 1333 static int 1334 filt_piperead(struct knote *kn, long hint) 1335 { 1336 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1337 struct pipe *wpipe = rpipe->pipe_peer; 1338 1339 PIPE_LOCK(rpipe); 1340 kn->kn_data = rpipe->pipe_buffer.cnt; 1341 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 1342 kn->kn_data = rpipe->pipe_map.cnt; 1343 1344 /* XXXSMP: race for peer */ 1345 if ((rpipe->pipe_state & PIPE_EOF) || 1346 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1347 kn->kn_flags |= EV_EOF; 1348 PIPE_UNLOCK(rpipe); 1349 return (1); 1350 } 1351 PIPE_UNLOCK(rpipe); 1352 return (kn->kn_data > 0); 1353 } 1354 1355 /*ARGSUSED*/ 1356 static int 1357 filt_pipewrite(struct knote *kn, long hint) 1358 { 1359 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1360 struct pipe *wpipe = rpipe->pipe_peer; 1361 1362 PIPE_LOCK(rpipe); 1363 /* XXXSMP: race for peer */ 1364 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1365 kn->kn_data = 0; 1366 kn->kn_flags |= EV_EOF; 1367 PIPE_UNLOCK(rpipe); 1368 return (1); 1369 } 1370 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1371 if (wpipe->pipe_state & PIPE_DIRECTW) 1372 kn->kn_data = 0; 1373 1374 PIPE_UNLOCK(rpipe); 1375 return (kn->kn_data >= PIPE_BUF); 1376 } 1377 1378 static const struct filterops pipe_rfiltops = 1379 { 1, NULL, filt_pipedetach, filt_piperead }; 1380 static const struct filterops pipe_wfiltops = 1381 { 1, NULL, filt_pipedetach, filt_pipewrite }; 1382 1383 /*ARGSUSED*/ 1384 static int 1385 pipe_kqfilter(struct file *fp, struct knote *kn) 1386 { 1387 struct pipe *pipe; 1388 1389 pipe = (struct pipe *)kn->kn_fp->f_data; 1390 switch (kn->kn_filter) { 1391 case EVFILT_READ: 1392 kn->kn_fop = &pipe_rfiltops; 1393 break; 1394 case EVFILT_WRITE: 1395 kn->kn_fop = &pipe_wfiltops; 1396 /* XXXSMP: race for peer */ 1397 pipe = pipe->pipe_peer; 1398 if (pipe == NULL) { 1399 /* other end of pipe has been closed */ 1400 return (EBADF); 1401 } 1402 break; 1403 default: 1404 return (1); 1405 } 1406 kn->kn_hook = pipe; 1407 1408 PIPE_LOCK(pipe); 1409 SLIST_INSERT_HEAD(&pipe->pipe_sel.sel_klist, kn, kn_selnext); 1410 PIPE_UNLOCK(pipe); 1411 return (0); 1412 } 1413 1414 static int 1415 pipe_fcntl(fp, cmd, data, p) 1416 struct file *fp; 1417 u_int cmd; 1418 void *data; 1419 struct proc *p; 1420 { 1421 if (cmd == F_SETFL) 1422 return (0); 1423 else 1424 return (EOPNOTSUPP); 1425 } 1426 1427 /* 1428 * Handle pipe sysctls. 1429 */ 1430 int 1431 sysctl_dopipe(name, namelen, oldp, oldlenp, newp, newlen) 1432 int *name; 1433 u_int namelen; 1434 void *oldp; 1435 size_t *oldlenp; 1436 void *newp; 1437 size_t newlen; 1438 { 1439 /* All sysctl names at this level are terminal. */ 1440 if (namelen != 1) 1441 return (ENOTDIR); /* overloaded */ 1442 1443 switch (name[0]) { 1444 case KERN_PIPE_MAXKVASZ: 1445 return (sysctl_int(oldp, oldlenp, newp, newlen, &maxpipekva)); 1446 case KERN_PIPE_LIMITKVA: 1447 return (sysctl_int(oldp, oldlenp, newp, newlen, &limitpipekva)); 1448 case KERN_PIPE_MAXBIGPIPES: 1449 return (sysctl_int(oldp, oldlenp, newp, newlen, &maxbigpipes)); 1450 case KERN_PIPE_NBIGPIPES: 1451 return (sysctl_rdint(oldp, oldlenp, newp, nbigpipe)); 1452 case KERN_PIPE_KVASIZE: 1453 return (sysctl_rdint(oldp, oldlenp, newp, amountpipekva)); 1454 default: 1455 return (EOPNOTSUPP); 1456 } 1457 /* NOTREACHED */ 1458 } 1459 1460 /* 1461 * Initialize pipe structs. 1462 */ 1463 void 1464 pipe_init(void) 1465 { 1466 pool_init(&pipe_pool, sizeof(struct pipe), 0, 0, 0, "pipepl", NULL); 1467 } 1468