1 /* 2 * Copyright (c) 1996 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. Modifications may be freely made to this file if the above conditions 17 * are met. 18 * 19 * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.60.2.13 2002/08/05 15:05:15 des Exp $ 20 * $DragonFly: src/sys/kern/sys_pipe.c,v 1.21 2004/05/11 22:48:53 dillon Exp $ 21 */ 22 23 /* 24 * This file contains a high-performance replacement for the socket-based 25 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 26 * all features of sockets, but does do everything that pipes normally 27 * do. 28 */ 29 30 /* 31 * This code has two modes of operation, a small write mode and a large 32 * write mode. The small write mode acts like conventional pipes with 33 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 34 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 35 * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 36 * the receiving process can copy it directly from the pages in the sending 37 * process. 38 * 39 * If the sending process receives a signal, it is possible that it will 40 * go away, and certainly its address space can change, because control 41 * is returned back to the user-mode side. In that case, the pipe code 42 * arranges to copy the buffer supplied by the user process, to a pageable 43 * kernel buffer, and the receiving process will grab the data from the 44 * pageable kernel buffer. Since signals don't happen all that often, 45 * the copy operation is normally eliminated. 46 * 47 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 48 * happen for small transfers so that the system will not spend all of 49 * its time context switching. PIPE_SIZE is constrained by the 50 * amount of kernel virtual memory. 51 */ 52 53 #include <sys/param.h> 54 #include <sys/systm.h> 55 #include <sys/kernel.h> 56 #include <sys/proc.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/filedesc.h> 60 #include <sys/filio.h> 61 #include <sys/ttycom.h> 62 #include <sys/stat.h> 63 #include <sys/poll.h> 64 #include <sys/select.h> 65 #include <sys/signalvar.h> 66 #include <sys/sysproto.h> 67 #include <sys/pipe.h> 68 #include <sys/vnode.h> 69 #include <sys/uio.h> 70 #include <sys/event.h> 71 #include <sys/globaldata.h> 72 #include <sys/module.h> 73 #include <sys/malloc.h> 74 #include <sys/sysctl.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_param.h> 78 #include <sys/lock.h> 79 #include <vm/vm_object.h> 80 #include <vm/vm_kern.h> 81 #include <vm/vm_extern.h> 82 #include <vm/pmap.h> 83 #include <vm/vm_map.h> 84 #include <vm/vm_page.h> 85 #include <vm/vm_zone.h> 86 87 #include <sys/file2.h> 88 89 #include <machine/cpufunc.h> 90 91 /* 92 * interfaces to the outside world 93 */ 94 static int pipe_read (struct file *fp, struct uio *uio, 95 struct ucred *cred, int flags, struct thread *td); 96 static int pipe_write (struct file *fp, struct uio *uio, 97 struct ucred *cred, int flags, struct thread *td); 98 static int pipe_close (struct file *fp, struct thread *td); 99 static int pipe_poll (struct file *fp, int events, struct ucred *cred, 100 struct thread *td); 101 static int pipe_kqfilter (struct file *fp, struct knote *kn); 102 static int pipe_stat (struct file *fp, struct stat *sb, struct thread *td); 103 static int pipe_ioctl (struct file *fp, u_long cmd, caddr_t data, struct thread *td); 104 105 static struct fileops pipeops = { 106 NULL, /* port */ 107 0, /* autoq */ 108 pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter, 109 pipe_stat, pipe_close 110 }; 111 112 static void filt_pipedetach(struct knote *kn); 113 static int filt_piperead(struct knote *kn, long hint); 114 static int filt_pipewrite(struct knote *kn, long hint); 115 116 static struct filterops pipe_rfiltops = 117 { 1, NULL, filt_pipedetach, filt_piperead }; 118 static struct filterops pipe_wfiltops = 119 { 1, NULL, filt_pipedetach, filt_pipewrite }; 120 121 MALLOC_DEFINE(M_PIPE, "pipe", "pipe structures"); 122 123 /* 124 * Default pipe buffer size(s), this can be kind-of large now because pipe 125 * space is pageable. The pipe code will try to maintain locality of 126 * reference for performance reasons, so small amounts of outstanding I/O 127 * will not wipe the cache. 128 */ 129 #define MINPIPESIZE (PIPE_SIZE/3) 130 #define MAXPIPESIZE (2*PIPE_SIZE/3) 131 132 /* 133 * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 134 * is there so that on large systems, we don't exhaust it. 135 */ 136 #define MAXPIPEKVA (8*1024*1024) 137 138 /* 139 * Limit for direct transfers, we cannot, of course limit 140 * the amount of kva for pipes in general though. 141 */ 142 #define LIMITPIPEKVA (16*1024*1024) 143 144 /* 145 * Limit the number of "big" pipes 146 */ 147 #define LIMITBIGPIPES 32 148 #define PIPEQ_MAX_CACHE 16 /* per-cpu pipe structure cache */ 149 150 static int pipe_maxbig = LIMITBIGPIPES; 151 static int pipe_maxcache = PIPEQ_MAX_CACHE; 152 static int pipe_nbig; 153 static int pipe_bcache_alloc; 154 static int pipe_bkmem_alloc; 155 static int pipe_dwrite_enable = 1; /* 0:copy, 1:kmem/sfbuf 2:force */ 156 static int pipe_dwrite_sfbuf = 1; /* 0:kmem_map 1:sfbufs 2:sfbufs_dmap */ 157 /* 3:sfbuf_dmap w/ forced invlpg */ 158 159 SYSCTL_NODE(_kern, OID_AUTO, pipe, CTLFLAG_RW, 0, "Pipe operation"); 160 SYSCTL_INT(_kern_pipe, OID_AUTO, nbig, 161 CTLFLAG_RD, &pipe_nbig, 0, "numer of big pipes allocated"); 162 SYSCTL_INT(_kern_pipe, OID_AUTO, maxcache, 163 CTLFLAG_RW, &pipe_maxcache, 0, "max pipes cached per-cpu"); 164 SYSCTL_INT(_kern_pipe, OID_AUTO, maxbig, 165 CTLFLAG_RW, &pipe_maxbig, 0, "max number of big pipes"); 166 SYSCTL_INT(_kern_pipe, OID_AUTO, dwrite_enable, 167 CTLFLAG_RW, &pipe_dwrite_enable, 0, "1:enable/2:force direct writes"); 168 SYSCTL_INT(_kern_pipe, OID_AUTO, dwrite_sfbuf, 169 CTLFLAG_RW, &pipe_dwrite_sfbuf, 0, "(if dwrite_enable) 0:kmem 1:sfbuf 2:sfbuf_dmap 3:sfbuf_dmap_forceinvlpg"); 170 #if !defined(NO_PIPE_SYSCTL_STATS) 171 SYSCTL_INT(_kern_pipe, OID_AUTO, bcache_alloc, 172 CTLFLAG_RW, &pipe_bcache_alloc, 0, "pipe buffer from pcpu cache"); 173 SYSCTL_INT(_kern_pipe, OID_AUTO, bkmem_alloc, 174 CTLFLAG_RW, &pipe_bkmem_alloc, 0, "pipe buffer from kmem"); 175 #endif 176 177 static void pipeclose (struct pipe *cpipe); 178 static void pipe_free_kmem (struct pipe *cpipe); 179 static int pipe_create (struct pipe **cpipep); 180 static __inline int pipelock (struct pipe *cpipe, int catch); 181 static __inline void pipeunlock (struct pipe *cpipe); 182 static __inline void pipeselwakeup (struct pipe *cpipe); 183 #ifndef PIPE_NODIRECT 184 static int pipe_build_write_buffer (struct pipe *wpipe, struct uio *uio); 185 static int pipe_direct_write (struct pipe *wpipe, struct uio *uio); 186 static void pipe_clone_write_buffer (struct pipe *wpipe); 187 #endif 188 static int pipespace (struct pipe *cpipe, int size); 189 190 /* 191 * The pipe system call for the DTYPE_PIPE type of pipes 192 * 193 * pipe_ARgs(int dummy) 194 */ 195 196 /* ARGSUSED */ 197 int 198 pipe(struct pipe_args *uap) 199 { 200 struct thread *td = curthread; 201 struct proc *p = td->td_proc; 202 struct filedesc *fdp; 203 struct file *rf, *wf; 204 struct pipe *rpipe, *wpipe; 205 int fd1, fd2, error; 206 207 KKASSERT(p); 208 fdp = p->p_fd; 209 210 rpipe = wpipe = NULL; 211 if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 212 pipeclose(rpipe); 213 pipeclose(wpipe); 214 return (ENFILE); 215 } 216 217 rpipe->pipe_state |= PIPE_DIRECTOK; 218 wpipe->pipe_state |= PIPE_DIRECTOK; 219 220 /* 221 * Select the direct-map features to use for this pipe. Since the 222 * sysctl's can change on the fly we record the settings when the 223 * pipe is created. 224 * 225 * Generally speaking the system will default to what we consider 226 * to be the best-balanced and most stable option. Right now this 227 * is SFBUF1. Modes 2 and 3 are considered experiemental at the 228 * moment. 229 */ 230 wpipe->pipe_feature = PIPE_COPY; 231 if (pipe_dwrite_enable) { 232 switch(pipe_dwrite_sfbuf) { 233 case 0: 234 wpipe->pipe_feature = PIPE_KMEM; 235 break; 236 case 1: 237 wpipe->pipe_feature = PIPE_SFBUF1; 238 break; 239 case 2: 240 case 3: 241 wpipe->pipe_feature = PIPE_SFBUF2; 242 break; 243 } 244 } 245 rpipe->pipe_feature = wpipe->pipe_feature; 246 247 error = falloc(p, &rf, &fd1); 248 if (error) { 249 pipeclose(rpipe); 250 pipeclose(wpipe); 251 return (error); 252 } 253 fhold(rf); 254 uap->sysmsg_fds[0] = fd1; 255 256 /* 257 * Warning: once we've gotten past allocation of the fd for the 258 * read-side, we can only drop the read side via fdrop() in order 259 * to avoid races against processes which manage to dup() the read 260 * side while we are blocked trying to allocate the write side. 261 */ 262 rf->f_flag = FREAD | FWRITE; 263 rf->f_type = DTYPE_PIPE; 264 rf->f_data = (caddr_t)rpipe; 265 rf->f_ops = &pipeops; 266 error = falloc(p, &wf, &fd2); 267 if (error) { 268 if (fdp->fd_ofiles[fd1] == rf) { 269 fdp->fd_ofiles[fd1] = NULL; 270 fdrop(rf, td); 271 } 272 fdrop(rf, td); 273 /* rpipe has been closed by fdrop(). */ 274 pipeclose(wpipe); 275 return (error); 276 } 277 wf->f_flag = FREAD | FWRITE; 278 wf->f_type = DTYPE_PIPE; 279 wf->f_data = (caddr_t)wpipe; 280 wf->f_ops = &pipeops; 281 uap->sysmsg_fds[1] = fd2; 282 283 rpipe->pipe_peer = wpipe; 284 wpipe->pipe_peer = rpipe; 285 fdrop(rf, td); 286 287 return (0); 288 } 289 290 /* 291 * Allocate kva for pipe circular buffer, the space is pageable 292 * This routine will 'realloc' the size of a pipe safely, if it fails 293 * it will retain the old buffer. 294 * If it fails it will return ENOMEM. 295 */ 296 static int 297 pipespace(struct pipe *cpipe, int size) 298 { 299 struct vm_object *object; 300 caddr_t buffer; 301 int npages, error; 302 303 npages = round_page(size) / PAGE_SIZE; 304 object = cpipe->pipe_buffer.object; 305 306 /* 307 * [re]create the object if necessary and reserve space for it 308 * in the kernel_map. The object and memory are pageable. On 309 * success, free the old resources before assigning the new 310 * ones. 311 */ 312 if (object == NULL || object->size != npages) { 313 object = vm_object_allocate(OBJT_DEFAULT, npages); 314 buffer = (caddr_t) vm_map_min(kernel_map); 315 316 error = vm_map_find(kernel_map, object, 0, 317 (vm_offset_t *) &buffer, size, 1, 318 VM_PROT_ALL, VM_PROT_ALL, 0); 319 320 if (error != KERN_SUCCESS) { 321 vm_object_deallocate(object); 322 return (ENOMEM); 323 } 324 pipe_free_kmem(cpipe); 325 cpipe->pipe_buffer.object = object; 326 cpipe->pipe_buffer.buffer = buffer; 327 cpipe->pipe_buffer.size = size; 328 ++pipe_bkmem_alloc; 329 } else { 330 ++pipe_bcache_alloc; 331 } 332 cpipe->pipe_buffer.in = 0; 333 cpipe->pipe_buffer.out = 0; 334 cpipe->pipe_buffer.cnt = 0; 335 return (0); 336 } 337 338 /* 339 * Initialize and allocate VM and memory for pipe, pulling the pipe from 340 * our per-cpu cache if possible. For now make sure it is sized for the 341 * smaller PIPE_SIZE default. 342 */ 343 static int 344 pipe_create(cpipep) 345 struct pipe **cpipep; 346 { 347 globaldata_t gd = mycpu; 348 struct pipe *cpipe; 349 int error; 350 351 if ((cpipe = gd->gd_pipeq) != NULL) { 352 gd->gd_pipeq = cpipe->pipe_peer; 353 --gd->gd_pipeqcount; 354 cpipe->pipe_peer = NULL; 355 } else { 356 cpipe = malloc(sizeof(struct pipe), M_PIPE, M_WAITOK|M_ZERO); 357 } 358 *cpipep = cpipe; 359 if ((error = pipespace(cpipe, PIPE_SIZE)) != 0) 360 return (error); 361 vfs_timestamp(&cpipe->pipe_ctime); 362 cpipe->pipe_atime = cpipe->pipe_ctime; 363 cpipe->pipe_mtime = cpipe->pipe_ctime; 364 return (0); 365 } 366 367 368 /* 369 * lock a pipe for I/O, blocking other access 370 */ 371 static __inline int 372 pipelock(cpipe, catch) 373 struct pipe *cpipe; 374 int catch; 375 { 376 int error; 377 378 while (cpipe->pipe_state & PIPE_LOCK) { 379 cpipe->pipe_state |= PIPE_LWANT; 380 error = tsleep(cpipe, (catch ? PCATCH : 0), "pipelk", 0); 381 if (error != 0) 382 return (error); 383 } 384 cpipe->pipe_state |= PIPE_LOCK; 385 return (0); 386 } 387 388 /* 389 * unlock a pipe I/O lock 390 */ 391 static __inline void 392 pipeunlock(cpipe) 393 struct pipe *cpipe; 394 { 395 396 cpipe->pipe_state &= ~PIPE_LOCK; 397 if (cpipe->pipe_state & PIPE_LWANT) { 398 cpipe->pipe_state &= ~PIPE_LWANT; 399 wakeup(cpipe); 400 } 401 } 402 403 static __inline void 404 pipeselwakeup(cpipe) 405 struct pipe *cpipe; 406 { 407 408 if (cpipe->pipe_state & PIPE_SEL) { 409 cpipe->pipe_state &= ~PIPE_SEL; 410 selwakeup(&cpipe->pipe_sel); 411 } 412 if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 413 pgsigio(cpipe->pipe_sigio, SIGIO, 0); 414 KNOTE(&cpipe->pipe_sel.si_note, 0); 415 } 416 417 /* ARGSUSED */ 418 static int 419 pipe_read(struct file *fp, struct uio *uio, struct ucred *cred, 420 int flags, struct thread *td) 421 { 422 struct pipe *rpipe = (struct pipe *) fp->f_data; 423 int error; 424 int nread = 0; 425 u_int size; 426 427 ++rpipe->pipe_busy; 428 error = pipelock(rpipe, 1); 429 if (error) 430 goto unlocked_error; 431 432 while (uio->uio_resid) { 433 caddr_t va; 434 435 if (rpipe->pipe_buffer.cnt > 0) { 436 /* 437 * normal pipe buffer receive 438 */ 439 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 440 if (size > rpipe->pipe_buffer.cnt) 441 size = rpipe->pipe_buffer.cnt; 442 if (size > (u_int) uio->uio_resid) 443 size = (u_int) uio->uio_resid; 444 445 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 446 size, uio); 447 if (error) 448 break; 449 450 rpipe->pipe_buffer.out += size; 451 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 452 rpipe->pipe_buffer.out = 0; 453 454 rpipe->pipe_buffer.cnt -= size; 455 456 /* 457 * If there is no more to read in the pipe, reset 458 * its pointers to the beginning. This improves 459 * cache hit stats. 460 */ 461 if (rpipe->pipe_buffer.cnt == 0) { 462 rpipe->pipe_buffer.in = 0; 463 rpipe->pipe_buffer.out = 0; 464 } 465 nread += size; 466 #ifndef PIPE_NODIRECT 467 } else if (rpipe->pipe_kva && 468 rpipe->pipe_feature == PIPE_KMEM && 469 (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) 470 == PIPE_DIRECTW 471 ) { 472 /* 473 * Direct copy using source-side kva mapping 474 */ 475 size = rpipe->pipe_map.xio_bytes; 476 if (size > (u_int)uio->uio_resid) 477 size = (u_int)uio->uio_resid; 478 va = (caddr_t)rpipe->pipe_kva + rpipe->pipe_map.xio_offset; 479 error = uiomove(va, size, uio); 480 if (error) 481 break; 482 nread += size; 483 rpipe->pipe_map.xio_offset += size; 484 rpipe->pipe_map.xio_bytes -= size; 485 if (rpipe->pipe_map.xio_bytes == 0) { 486 rpipe->pipe_state |= PIPE_DIRECTIP; 487 rpipe->pipe_state &= ~PIPE_DIRECTW; 488 wakeup(rpipe); 489 } 490 } else if (rpipe->pipe_map.xio_bytes && 491 rpipe->pipe_kva && 492 rpipe->pipe_feature == PIPE_SFBUF2 && 493 (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) 494 == PIPE_DIRECTW 495 ) { 496 /* 497 * Direct copy, bypassing a kernel buffer. We cannot 498 * mess with the direct-write buffer until 499 * PIPE_DIRECTIP is cleared. In order to prevent 500 * the pipe_write code from racing itself in 501 * direct_write, we set DIRECTIP when we clear 502 * DIRECTW after we have exhausted the buffer. 503 */ 504 if (pipe_dwrite_sfbuf == 3) 505 rpipe->pipe_kvamask = 0; 506 pmap_qenter2(rpipe->pipe_kva, rpipe->pipe_map.xio_pages, 507 rpipe->pipe_map.xio_npages, 508 &rpipe->pipe_kvamask); 509 size = rpipe->pipe_map.xio_bytes; 510 if (size > (u_int)uio->uio_resid) 511 size = (u_int)uio->uio_resid; 512 va = (caddr_t)rpipe->pipe_kva + 513 rpipe->pipe_map.xio_offset; 514 error = uiomove(va, size, uio); 515 if (error) 516 break; 517 nread += size; 518 rpipe->pipe_map.xio_offset += size; 519 rpipe->pipe_map.xio_bytes -= size; 520 if (rpipe->pipe_map.xio_bytes == 0) { 521 rpipe->pipe_state |= PIPE_DIRECTIP; 522 rpipe->pipe_state &= ~PIPE_DIRECTW; 523 wakeup(rpipe); 524 } 525 } else if (rpipe->pipe_map.xio_bytes && 526 rpipe->pipe_feature == PIPE_SFBUF1 && 527 (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) 528 == PIPE_DIRECTW 529 ) { 530 /* 531 * Direct copy, bypassing a kernel buffer. We cannot 532 * mess with the direct-write buffer until 533 * PIPE_DIRECTIP is cleared. In order to prevent 534 * the pipe_write code from racing itself in 535 * direct_write, we set DIRECTIP when we clear 536 * DIRECTW after we have exhausted the buffer. 537 */ 538 error = xio_uio_copy(&rpipe->pipe_map, uio, &size); 539 if (error) 540 break; 541 nread += size; 542 if (rpipe->pipe_map.xio_bytes == 0) { 543 rpipe->pipe_state |= PIPE_DIRECTIP; 544 rpipe->pipe_state &= ~PIPE_DIRECTW; 545 wakeup(rpipe); 546 } 547 #endif 548 } else { 549 /* 550 * detect EOF condition 551 * read returns 0 on EOF, no need to set error 552 */ 553 if (rpipe->pipe_state & PIPE_EOF) 554 break; 555 556 /* 557 * If the "write-side" has been blocked, wake it up now. 558 */ 559 if (rpipe->pipe_state & PIPE_WANTW) { 560 rpipe->pipe_state &= ~PIPE_WANTW; 561 wakeup(rpipe); 562 } 563 564 /* 565 * Break if some data was read. 566 */ 567 if (nread > 0) 568 break; 569 570 /* 571 * Unlock the pipe buffer for our remaining 572 * processing. We will either break out with an 573 * error or we will sleep and relock to loop. 574 */ 575 pipeunlock(rpipe); 576 577 /* 578 * Handle non-blocking mode operation or 579 * wait for more data. 580 */ 581 if (fp->f_flag & FNONBLOCK) { 582 error = EAGAIN; 583 } else { 584 rpipe->pipe_state |= PIPE_WANTR; 585 if ((error = tsleep(rpipe, PCATCH|PNORESCHED, 586 "piperd", 0)) == 0) { 587 error = pipelock(rpipe, 1); 588 } 589 } 590 if (error) 591 goto unlocked_error; 592 } 593 } 594 pipeunlock(rpipe); 595 596 if (error == 0) 597 vfs_timestamp(&rpipe->pipe_atime); 598 unlocked_error: 599 --rpipe->pipe_busy; 600 601 /* 602 * PIPE_WANT processing only makes sense if pipe_busy is 0. 603 */ 604 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 605 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 606 wakeup(rpipe); 607 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 608 /* 609 * Handle write blocking hysteresis. 610 */ 611 if (rpipe->pipe_state & PIPE_WANTW) { 612 rpipe->pipe_state &= ~PIPE_WANTW; 613 wakeup(rpipe); 614 } 615 } 616 617 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 618 pipeselwakeup(rpipe); 619 return (error); 620 } 621 622 #ifndef PIPE_NODIRECT 623 /* 624 * Map the sending processes' buffer into kernel space and wire it. 625 * This is similar to a physical write operation. 626 */ 627 static int 628 pipe_build_write_buffer(wpipe, uio) 629 struct pipe *wpipe; 630 struct uio *uio; 631 { 632 int error; 633 u_int size; 634 635 size = (u_int) uio->uio_iov->iov_len; 636 if (size > wpipe->pipe_buffer.size) 637 size = wpipe->pipe_buffer.size; 638 639 error = xio_init_ubuf(&wpipe->pipe_map, uio->uio_iov->iov_base, 640 size, XIOF_READ); 641 if (error) 642 return(error); 643 644 /* 645 * Create a kernel map for KMEM and SFBUF2 copy modes. SFBUF2 will 646 * map the pages on the target while KMEM maps the pages now. 647 */ 648 switch(wpipe->pipe_feature) { 649 case PIPE_KMEM: 650 case PIPE_SFBUF2: 651 if (wpipe->pipe_kva == NULL) { 652 wpipe->pipe_kva = 653 kmem_alloc_nofault(kernel_map, XIO_INTERNAL_SIZE); 654 wpipe->pipe_kvamask = 0; 655 } 656 if (wpipe->pipe_feature == PIPE_KMEM) { 657 pmap_qenter(wpipe->pipe_kva, wpipe->pipe_map.xio_pages, 658 wpipe->pipe_map.xio_npages); 659 } 660 break; 661 default: 662 break; 663 } 664 665 /* 666 * And update the uio data. The XIO might have loaded fewer bytes 667 * then requested so reload 'size'. 668 */ 669 size = wpipe->pipe_map.xio_bytes; 670 uio->uio_iov->iov_len -= size; 671 uio->uio_iov->iov_base += size; 672 if (uio->uio_iov->iov_len == 0) 673 uio->uio_iov++; 674 uio->uio_resid -= size; 675 uio->uio_offset += size; 676 return (0); 677 } 678 679 /* 680 * In the case of a signal, the writing process might go away. This 681 * code copies the data into the circular buffer so that the source 682 * pages can be freed without loss of data. 683 */ 684 static void 685 pipe_clone_write_buffer(wpipe) 686 struct pipe *wpipe; 687 { 688 int size; 689 690 size = wpipe->pipe_map.xio_bytes; 691 692 KKASSERT(size <= wpipe->pipe_buffer.size); 693 694 wpipe->pipe_buffer.in = size; 695 wpipe->pipe_buffer.out = 0; 696 wpipe->pipe_buffer.cnt = size; 697 wpipe->pipe_state &= ~(PIPE_DIRECTW | PIPE_DIRECTIP); 698 699 xio_copy_xtok(&wpipe->pipe_map, wpipe->pipe_buffer.buffer, size); 700 xio_release(&wpipe->pipe_map); 701 if (wpipe->pipe_kva) { 702 pmap_qremove(wpipe->pipe_kva, XIO_INTERNAL_PAGES); 703 kmem_free(kernel_map, wpipe->pipe_kva, XIO_INTERNAL_SIZE); 704 wpipe->pipe_kva = NULL; 705 } 706 } 707 708 /* 709 * This implements the pipe buffer write mechanism. Note that only 710 * a direct write OR a normal pipe write can be pending at any given time. 711 * If there are any characters in the pipe buffer, the direct write will 712 * be deferred until the receiving process grabs all of the bytes from 713 * the pipe buffer. Then the direct mapping write is set-up. 714 */ 715 static int 716 pipe_direct_write(wpipe, uio) 717 struct pipe *wpipe; 718 struct uio *uio; 719 { 720 int error; 721 722 retry: 723 while (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) { 724 if (wpipe->pipe_state & PIPE_WANTR) { 725 wpipe->pipe_state &= ~PIPE_WANTR; 726 wakeup(wpipe); 727 } 728 wpipe->pipe_state |= PIPE_WANTW; 729 error = tsleep(wpipe, PCATCH, "pipdww", 0); 730 if (error) 731 goto error2; 732 if (wpipe->pipe_state & PIPE_EOF) { 733 error = EPIPE; 734 goto error2; 735 } 736 } 737 KKASSERT(wpipe->pipe_map.xio_bytes == 0); 738 if (wpipe->pipe_buffer.cnt > 0) { 739 if (wpipe->pipe_state & PIPE_WANTR) { 740 wpipe->pipe_state &= ~PIPE_WANTR; 741 wakeup(wpipe); 742 } 743 744 wpipe->pipe_state |= PIPE_WANTW; 745 error = tsleep(wpipe, PCATCH, "pipdwc", 0); 746 if (error) 747 goto error2; 748 if (wpipe->pipe_state & PIPE_EOF) { 749 error = EPIPE; 750 goto error2; 751 } 752 goto retry; 753 } 754 755 /* 756 * Build our direct-write buffer 757 */ 758 wpipe->pipe_state |= PIPE_DIRECTW | PIPE_DIRECTIP; 759 error = pipe_build_write_buffer(wpipe, uio); 760 if (error) 761 goto error1; 762 wpipe->pipe_state &= ~PIPE_DIRECTIP; 763 764 /* 765 * Wait until the receiver has snarfed the data. Since we are likely 766 * going to sleep we optimize the case and yield synchronously, 767 * possibly avoiding the tsleep(). 768 */ 769 error = 0; 770 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 771 if (wpipe->pipe_state & PIPE_EOF) { 772 pipelock(wpipe, 0); 773 xio_release(&wpipe->pipe_map); 774 if (wpipe->pipe_kva) { 775 pmap_qremove(wpipe->pipe_kva, XIO_INTERNAL_PAGES); 776 kmem_free(kernel_map, wpipe->pipe_kva, XIO_INTERNAL_SIZE); 777 wpipe->pipe_kva = NULL; 778 } 779 pipeunlock(wpipe); 780 pipeselwakeup(wpipe); 781 error = EPIPE; 782 goto error1; 783 } 784 if (wpipe->pipe_state & PIPE_WANTR) { 785 wpipe->pipe_state &= ~PIPE_WANTR; 786 wakeup(wpipe); 787 } 788 pipeselwakeup(wpipe); 789 error = tsleep(wpipe, PCATCH|PNORESCHED, "pipdwt", 0); 790 } 791 pipelock(wpipe,0); 792 if (wpipe->pipe_state & PIPE_DIRECTW) { 793 /* 794 * this bit of trickery substitutes a kernel buffer for 795 * the process that might be going away. 796 */ 797 pipe_clone_write_buffer(wpipe); 798 KKASSERT((wpipe->pipe_state & PIPE_DIRECTIP) == 0); 799 } else { 800 /* 801 * note: The pipe_kva mapping is not qremove'd here. For 802 * legacy PIPE_KMEM mode this constitutes an improvement 803 * over the original FreeBSD-4 algorithm. For PIPE_SFBUF2 804 * mode the kva mapping must not be removed to get the 805 * caching benefit. 806 * 807 * For testing purposes we will give the original algorithm 808 * the benefit of the doubt 'what it could have been', and 809 * keep the optimization. 810 */ 811 KKASSERT(wpipe->pipe_state & PIPE_DIRECTIP); 812 xio_release(&wpipe->pipe_map); 813 wpipe->pipe_state &= ~PIPE_DIRECTIP; 814 } 815 pipeunlock(wpipe); 816 return (error); 817 818 /* 819 * Direct-write error, clear the direct write flags. 820 */ 821 error1: 822 wpipe->pipe_state &= ~(PIPE_DIRECTW | PIPE_DIRECTIP); 823 /* fallthrough */ 824 825 /* 826 * General error, wakeup the other side if it happens to be sleeping. 827 */ 828 error2: 829 wakeup(wpipe); 830 return (error); 831 } 832 #endif 833 834 static int 835 pipe_write(struct file *fp, struct uio *uio, struct ucred *cred, 836 int flags, struct thread *td) 837 { 838 int error = 0; 839 int orig_resid; 840 struct pipe *wpipe, *rpipe; 841 842 rpipe = (struct pipe *) fp->f_data; 843 wpipe = rpipe->pipe_peer; 844 845 /* 846 * detect loss of pipe read side, issue SIGPIPE if lost. 847 */ 848 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 849 return (EPIPE); 850 } 851 ++wpipe->pipe_busy; 852 853 /* 854 * If it is advantageous to resize the pipe buffer, do 855 * so. 856 */ 857 if ((uio->uio_resid > PIPE_SIZE) && 858 (pipe_nbig < pipe_maxbig) && 859 (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) == 0 && 860 (wpipe->pipe_buffer.size <= PIPE_SIZE) && 861 (wpipe->pipe_buffer.cnt == 0)) { 862 863 if ((error = pipelock(wpipe,1)) == 0) { 864 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 865 pipe_nbig++; 866 pipeunlock(wpipe); 867 } 868 } 869 870 /* 871 * If an early error occured unbusy and return, waking up any pending 872 * readers. 873 */ 874 if (error) { 875 --wpipe->pipe_busy; 876 if ((wpipe->pipe_busy == 0) && 877 (wpipe->pipe_state & PIPE_WANT)) { 878 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 879 wakeup(wpipe); 880 } 881 return(error); 882 } 883 884 KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); 885 886 orig_resid = uio->uio_resid; 887 888 while (uio->uio_resid) { 889 int space; 890 891 #ifndef PIPE_NODIRECT 892 /* 893 * If the transfer is large, we can gain performance if 894 * we do process-to-process copies directly. 895 * If the write is non-blocking, we don't use the 896 * direct write mechanism. 897 * 898 * The direct write mechanism will detect the reader going 899 * away on us. 900 */ 901 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT || 902 pipe_dwrite_enable > 1) && 903 (fp->f_flag & FNONBLOCK) == 0 && 904 pipe_dwrite_enable) { 905 error = pipe_direct_write( wpipe, uio); 906 if (error) 907 break; 908 continue; 909 } 910 #endif 911 912 /* 913 * Pipe buffered writes cannot be coincidental with 914 * direct writes. We wait until the currently executing 915 * direct write is completed before we start filling the 916 * pipe buffer. We break out if a signal occurs or the 917 * reader goes away. 918 */ 919 retrywrite: 920 while (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) { 921 if (wpipe->pipe_state & PIPE_WANTR) { 922 wpipe->pipe_state &= ~PIPE_WANTR; 923 wakeup(wpipe); 924 } 925 error = tsleep(wpipe, PCATCH, "pipbww", 0); 926 if (wpipe->pipe_state & PIPE_EOF) 927 break; 928 if (error) 929 break; 930 } 931 if (wpipe->pipe_state & PIPE_EOF) { 932 error = EPIPE; 933 break; 934 } 935 936 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 937 938 /* Writes of size <= PIPE_BUF must be atomic. */ 939 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 940 space = 0; 941 942 /* 943 * Write to fill, read size handles write hysteresis. Also 944 * additional restrictions can cause select-based non-blocking 945 * writes to spin. 946 */ 947 if (space > 0) { 948 if ((error = pipelock(wpipe,1)) == 0) { 949 int size; /* Transfer size */ 950 int segsize; /* first segment to transfer */ 951 952 /* 953 * It is possible for a direct write to 954 * slip in on us... handle it here... 955 */ 956 if (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) { 957 pipeunlock(wpipe); 958 goto retrywrite; 959 } 960 /* 961 * If a process blocked in uiomove, our 962 * value for space might be bad. 963 * 964 * XXX will we be ok if the reader has gone 965 * away here? 966 */ 967 if (space > wpipe->pipe_buffer.size - 968 wpipe->pipe_buffer.cnt) { 969 pipeunlock(wpipe); 970 goto retrywrite; 971 } 972 973 /* 974 * Transfer size is minimum of uio transfer 975 * and free space in pipe buffer. 976 */ 977 if (space > uio->uio_resid) 978 size = uio->uio_resid; 979 else 980 size = space; 981 /* 982 * First segment to transfer is minimum of 983 * transfer size and contiguous space in 984 * pipe buffer. If first segment to transfer 985 * is less than the transfer size, we've got 986 * a wraparound in the buffer. 987 */ 988 segsize = wpipe->pipe_buffer.size - 989 wpipe->pipe_buffer.in; 990 if (segsize > size) 991 segsize = size; 992 993 /* Transfer first segment */ 994 995 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 996 segsize, uio); 997 998 if (error == 0 && segsize < size) { 999 /* 1000 * Transfer remaining part now, to 1001 * support atomic writes. Wraparound 1002 * happened. 1003 */ 1004 if (wpipe->pipe_buffer.in + segsize != 1005 wpipe->pipe_buffer.size) 1006 panic("Expected pipe buffer wraparound disappeared"); 1007 1008 error = uiomove(&wpipe->pipe_buffer.buffer[0], 1009 size - segsize, uio); 1010 } 1011 if (error == 0) { 1012 wpipe->pipe_buffer.in += size; 1013 if (wpipe->pipe_buffer.in >= 1014 wpipe->pipe_buffer.size) { 1015 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 1016 panic("Expected wraparound bad"); 1017 wpipe->pipe_buffer.in = size - segsize; 1018 } 1019 1020 wpipe->pipe_buffer.cnt += size; 1021 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 1022 panic("Pipe buffer overflow"); 1023 1024 } 1025 pipeunlock(wpipe); 1026 } 1027 if (error) 1028 break; 1029 1030 } else { 1031 /* 1032 * If the "read-side" has been blocked, wake it up now 1033 * and yield to let it drain synchronously rather 1034 * then block. 1035 */ 1036 if (wpipe->pipe_state & PIPE_WANTR) { 1037 wpipe->pipe_state &= ~PIPE_WANTR; 1038 wakeup(wpipe); 1039 } 1040 1041 /* 1042 * don't block on non-blocking I/O 1043 */ 1044 if (fp->f_flag & FNONBLOCK) { 1045 error = EAGAIN; 1046 break; 1047 } 1048 1049 /* 1050 * We have no more space and have something to offer, 1051 * wake up select/poll. 1052 */ 1053 pipeselwakeup(wpipe); 1054 1055 wpipe->pipe_state |= PIPE_WANTW; 1056 error = tsleep(wpipe, PCATCH|PNORESCHED, "pipewr", 0); 1057 if (error != 0) 1058 break; 1059 /* 1060 * If read side wants to go away, we just issue a signal 1061 * to ourselves. 1062 */ 1063 if (wpipe->pipe_state & PIPE_EOF) { 1064 error = EPIPE; 1065 break; 1066 } 1067 } 1068 } 1069 1070 --wpipe->pipe_busy; 1071 1072 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 1073 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 1074 wakeup(wpipe); 1075 } else if (wpipe->pipe_buffer.cnt > 0) { 1076 /* 1077 * If we have put any characters in the buffer, we wake up 1078 * the reader. 1079 */ 1080 if (wpipe->pipe_state & PIPE_WANTR) { 1081 wpipe->pipe_state &= ~PIPE_WANTR; 1082 wakeup(wpipe); 1083 } 1084 } 1085 1086 /* 1087 * Don't return EPIPE if I/O was successful 1088 */ 1089 if ((wpipe->pipe_buffer.cnt == 0) && 1090 (uio->uio_resid == 0) && 1091 (error == EPIPE)) { 1092 error = 0; 1093 } 1094 1095 if (error == 0) 1096 vfs_timestamp(&wpipe->pipe_mtime); 1097 1098 /* 1099 * We have something to offer, 1100 * wake up select/poll. 1101 */ 1102 if (wpipe->pipe_buffer.cnt) 1103 pipeselwakeup(wpipe); 1104 1105 return (error); 1106 } 1107 1108 /* 1109 * we implement a very minimal set of ioctls for compatibility with sockets. 1110 */ 1111 int 1112 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct thread *td) 1113 { 1114 struct pipe *mpipe = (struct pipe *)fp->f_data; 1115 1116 switch (cmd) { 1117 1118 case FIONBIO: 1119 return (0); 1120 1121 case FIOASYNC: 1122 if (*(int *)data) { 1123 mpipe->pipe_state |= PIPE_ASYNC; 1124 } else { 1125 mpipe->pipe_state &= ~PIPE_ASYNC; 1126 } 1127 return (0); 1128 1129 case FIONREAD: 1130 if (mpipe->pipe_state & PIPE_DIRECTW) { 1131 *(int *)data = mpipe->pipe_map.xio_bytes; 1132 } else { 1133 *(int *)data = mpipe->pipe_buffer.cnt; 1134 } 1135 return (0); 1136 1137 case FIOSETOWN: 1138 return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 1139 1140 case FIOGETOWN: 1141 *(int *)data = fgetown(mpipe->pipe_sigio); 1142 return (0); 1143 1144 /* This is deprecated, FIOSETOWN should be used instead. */ 1145 case TIOCSPGRP: 1146 return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 1147 1148 /* This is deprecated, FIOGETOWN should be used instead. */ 1149 case TIOCGPGRP: 1150 *(int *)data = -fgetown(mpipe->pipe_sigio); 1151 return (0); 1152 1153 } 1154 return (ENOTTY); 1155 } 1156 1157 int 1158 pipe_poll(struct file *fp, int events, struct ucred *cred, struct thread *td) 1159 { 1160 struct pipe *rpipe = (struct pipe *)fp->f_data; 1161 struct pipe *wpipe; 1162 int revents = 0; 1163 1164 wpipe = rpipe->pipe_peer; 1165 if (events & (POLLIN | POLLRDNORM)) 1166 if ((rpipe->pipe_state & PIPE_DIRECTW) || 1167 (rpipe->pipe_buffer.cnt > 0) || 1168 (rpipe->pipe_state & PIPE_EOF)) 1169 revents |= events & (POLLIN | POLLRDNORM); 1170 1171 if (events & (POLLOUT | POLLWRNORM)) 1172 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 1173 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1174 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1175 revents |= events & (POLLOUT | POLLWRNORM); 1176 1177 if ((rpipe->pipe_state & PIPE_EOF) || 1178 (wpipe == NULL) || 1179 (wpipe->pipe_state & PIPE_EOF)) 1180 revents |= POLLHUP; 1181 1182 if (revents == 0) { 1183 if (events & (POLLIN | POLLRDNORM)) { 1184 selrecord(td, &rpipe->pipe_sel); 1185 rpipe->pipe_state |= PIPE_SEL; 1186 } 1187 1188 if (events & (POLLOUT | POLLWRNORM)) { 1189 selrecord(td, &wpipe->pipe_sel); 1190 wpipe->pipe_state |= PIPE_SEL; 1191 } 1192 } 1193 1194 return (revents); 1195 } 1196 1197 static int 1198 pipe_stat(struct file *fp, struct stat *ub, struct thread *td) 1199 { 1200 struct pipe *pipe = (struct pipe *)fp->f_data; 1201 1202 bzero((caddr_t)ub, sizeof(*ub)); 1203 ub->st_mode = S_IFIFO; 1204 ub->st_blksize = pipe->pipe_buffer.size; 1205 ub->st_size = pipe->pipe_buffer.cnt; 1206 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1207 ub->st_atimespec = pipe->pipe_atime; 1208 ub->st_mtimespec = pipe->pipe_mtime; 1209 ub->st_ctimespec = pipe->pipe_ctime; 1210 /* 1211 * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev, 1212 * st_flags, st_gen. 1213 * XXX (st_dev, st_ino) should be unique. 1214 */ 1215 return (0); 1216 } 1217 1218 /* ARGSUSED */ 1219 static int 1220 pipe_close(struct file *fp, struct thread *td) 1221 { 1222 struct pipe *cpipe = (struct pipe *)fp->f_data; 1223 1224 fp->f_ops = &badfileops; 1225 fp->f_data = NULL; 1226 funsetown(cpipe->pipe_sigio); 1227 pipeclose(cpipe); 1228 return (0); 1229 } 1230 1231 static void 1232 pipe_free_kmem(struct pipe *cpipe) 1233 { 1234 if (cpipe->pipe_buffer.buffer != NULL) { 1235 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1236 --pipe_nbig; 1237 kmem_free(kernel_map, 1238 (vm_offset_t)cpipe->pipe_buffer.buffer, 1239 cpipe->pipe_buffer.size); 1240 cpipe->pipe_buffer.buffer = NULL; 1241 cpipe->pipe_buffer.object = NULL; 1242 } 1243 #ifndef PIPE_NODIRECT 1244 KKASSERT(cpipe->pipe_map.xio_bytes == 0 && 1245 cpipe->pipe_map.xio_offset == 0 && 1246 cpipe->pipe_map.xio_npages == 0); 1247 #endif 1248 } 1249 1250 /* 1251 * shutdown the pipe 1252 */ 1253 static void 1254 pipeclose(struct pipe *cpipe) 1255 { 1256 globaldata_t gd; 1257 struct pipe *ppipe; 1258 1259 if (cpipe == NULL) 1260 return; 1261 1262 pipeselwakeup(cpipe); 1263 1264 /* 1265 * If the other side is blocked, wake it up saying that 1266 * we want to close it down. 1267 */ 1268 while (cpipe->pipe_busy) { 1269 wakeup(cpipe); 1270 cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 1271 tsleep(cpipe, 0, "pipecl", 0); 1272 } 1273 1274 /* 1275 * Disconnect from peer 1276 */ 1277 if ((ppipe = cpipe->pipe_peer) != NULL) { 1278 pipeselwakeup(ppipe); 1279 1280 ppipe->pipe_state |= PIPE_EOF; 1281 wakeup(ppipe); 1282 KNOTE(&ppipe->pipe_sel.si_note, 0); 1283 ppipe->pipe_peer = NULL; 1284 } 1285 1286 if (cpipe->pipe_kva) { 1287 pmap_qremove(cpipe->pipe_kva, XIO_INTERNAL_PAGES); 1288 kmem_free(kernel_map, cpipe->pipe_kva, XIO_INTERNAL_SIZE); 1289 cpipe->pipe_kva = NULL; 1290 } 1291 1292 /* 1293 * free or cache resources 1294 */ 1295 gd = mycpu; 1296 if (gd->gd_pipeqcount >= pipe_maxcache || 1297 cpipe->pipe_buffer.size != PIPE_SIZE 1298 ) { 1299 pipe_free_kmem(cpipe); 1300 free(cpipe, M_PIPE); 1301 } else { 1302 KKASSERT(cpipe->pipe_map.xio_npages == 0 && 1303 cpipe->pipe_map.xio_bytes == 0 && 1304 cpipe->pipe_map.xio_offset == 0); 1305 cpipe->pipe_state = 0; 1306 cpipe->pipe_busy = 0; 1307 cpipe->pipe_peer = gd->gd_pipeq; 1308 gd->gd_pipeq = cpipe; 1309 ++gd->gd_pipeqcount; 1310 } 1311 } 1312 1313 /*ARGSUSED*/ 1314 static int 1315 pipe_kqfilter(struct file *fp, struct knote *kn) 1316 { 1317 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1318 1319 switch (kn->kn_filter) { 1320 case EVFILT_READ: 1321 kn->kn_fop = &pipe_rfiltops; 1322 break; 1323 case EVFILT_WRITE: 1324 kn->kn_fop = &pipe_wfiltops; 1325 cpipe = cpipe->pipe_peer; 1326 if (cpipe == NULL) 1327 /* other end of pipe has been closed */ 1328 return (EPIPE); 1329 break; 1330 default: 1331 return (1); 1332 } 1333 kn->kn_hook = (caddr_t)cpipe; 1334 1335 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 1336 return (0); 1337 } 1338 1339 static void 1340 filt_pipedetach(struct knote *kn) 1341 { 1342 struct pipe *cpipe = (struct pipe *)kn->kn_hook; 1343 1344 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 1345 } 1346 1347 /*ARGSUSED*/ 1348 static int 1349 filt_piperead(struct knote *kn, long hint) 1350 { 1351 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1352 struct pipe *wpipe = rpipe->pipe_peer; 1353 1354 kn->kn_data = rpipe->pipe_buffer.cnt; 1355 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 1356 kn->kn_data = rpipe->pipe_map.xio_bytes; 1357 1358 if ((rpipe->pipe_state & PIPE_EOF) || 1359 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1360 kn->kn_flags |= EV_EOF; 1361 return (1); 1362 } 1363 return (kn->kn_data > 0); 1364 } 1365 1366 /*ARGSUSED*/ 1367 static int 1368 filt_pipewrite(struct knote *kn, long hint) 1369 { 1370 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1371 struct pipe *wpipe = rpipe->pipe_peer; 1372 1373 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1374 kn->kn_data = 0; 1375 kn->kn_flags |= EV_EOF; 1376 return (1); 1377 } 1378 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1379 if (wpipe->pipe_state & PIPE_DIRECTW) 1380 kn->kn_data = 0; 1381 1382 return (kn->kn_data >= PIPE_BUF); 1383 } 1384