1 /* 2 * Copyright (c) 1996 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. Modifications may be freely made to this file if the above conditions 17 * are met. 18 * 19 * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.60.2.13 2002/08/05 15:05:15 des Exp $ 20 * $DragonFly: src/sys/kern/sys_pipe.c,v 1.30 2005/07/04 18:39:16 dillon Exp $ 21 */ 22 23 /* 24 * This file contains a high-performance replacement for the socket-based 25 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 26 * all features of sockets, but does do everything that pipes normally 27 * do. 28 */ 29 30 /* 31 * This code has two modes of operation, a small write mode and a large 32 * write mode. The small write mode acts like conventional pipes with 33 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 34 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 35 * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 36 * the receiving process can copy it directly from the pages in the sending 37 * process. 38 * 39 * If the sending process receives a signal, it is possible that it will 40 * go away, and certainly its address space can change, because control 41 * is returned back to the user-mode side. In that case, the pipe code 42 * arranges to copy the buffer supplied by the user process, to a pageable 43 * kernel buffer, and the receiving process will grab the data from the 44 * pageable kernel buffer. Since signals don't happen all that often, 45 * the copy operation is normally eliminated. 46 * 47 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 48 * happen for small transfers so that the system will not spend all of 49 * its time context switching. PIPE_SIZE is constrained by the 50 * amount of kernel virtual memory. 51 */ 52 53 #include <sys/param.h> 54 #include <sys/systm.h> 55 #include <sys/kernel.h> 56 #include <sys/proc.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/filedesc.h> 60 #include <sys/filio.h> 61 #include <sys/ttycom.h> 62 #include <sys/stat.h> 63 #include <sys/poll.h> 64 #include <sys/select.h> 65 #include <sys/signalvar.h> 66 #include <sys/sysproto.h> 67 #include <sys/pipe.h> 68 #include <sys/vnode.h> 69 #include <sys/uio.h> 70 #include <sys/event.h> 71 #include <sys/globaldata.h> 72 #include <sys/module.h> 73 #include <sys/malloc.h> 74 #include <sys/sysctl.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_param.h> 78 #include <sys/lock.h> 79 #include <vm/vm_object.h> 80 #include <vm/vm_kern.h> 81 #include <vm/vm_extern.h> 82 #include <vm/pmap.h> 83 #include <vm/vm_map.h> 84 #include <vm/vm_page.h> 85 #include <vm/vm_zone.h> 86 87 #include <sys/file2.h> 88 89 #include <machine/cpufunc.h> 90 91 /* 92 * interfaces to the outside world 93 */ 94 static int pipe_read (struct file *fp, struct uio *uio, 95 struct ucred *cred, int flags, struct thread *td); 96 static int pipe_write (struct file *fp, struct uio *uio, 97 struct ucred *cred, int flags, struct thread *td); 98 static int pipe_close (struct file *fp, struct thread *td); 99 static int pipe_poll (struct file *fp, int events, struct ucred *cred, 100 struct thread *td); 101 static int pipe_kqfilter (struct file *fp, struct knote *kn); 102 static int pipe_stat (struct file *fp, struct stat *sb, struct thread *td); 103 static int pipe_ioctl (struct file *fp, u_long cmd, caddr_t data, struct thread *td); 104 105 static struct fileops pipeops = { 106 NULL, /* port */ 107 NULL, /* clone */ 108 pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter, 109 pipe_stat, pipe_close 110 }; 111 112 static void filt_pipedetach(struct knote *kn); 113 static int filt_piperead(struct knote *kn, long hint); 114 static int filt_pipewrite(struct knote *kn, long hint); 115 116 static struct filterops pipe_rfiltops = 117 { 1, NULL, filt_pipedetach, filt_piperead }; 118 static struct filterops pipe_wfiltops = 119 { 1, NULL, filt_pipedetach, filt_pipewrite }; 120 121 MALLOC_DEFINE(M_PIPE, "pipe", "pipe structures"); 122 123 /* 124 * Default pipe buffer size(s), this can be kind-of large now because pipe 125 * space is pageable. The pipe code will try to maintain locality of 126 * reference for performance reasons, so small amounts of outstanding I/O 127 * will not wipe the cache. 128 */ 129 #define MINPIPESIZE (PIPE_SIZE/3) 130 #define MAXPIPESIZE (2*PIPE_SIZE/3) 131 132 /* 133 * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 134 * is there so that on large systems, we don't exhaust it. 135 */ 136 #define MAXPIPEKVA (8*1024*1024) 137 138 /* 139 * Limit for direct transfers, we cannot, of course limit 140 * the amount of kva for pipes in general though. 141 */ 142 #define LIMITPIPEKVA (16*1024*1024) 143 144 /* 145 * Limit the number of "big" pipes 146 */ 147 #define LIMITBIGPIPES 32 148 #define PIPEQ_MAX_CACHE 16 /* per-cpu pipe structure cache */ 149 150 static int pipe_maxbig = LIMITBIGPIPES; 151 static int pipe_maxcache = PIPEQ_MAX_CACHE; 152 static int pipe_nbig; 153 static int pipe_bcache_alloc; 154 static int pipe_bkmem_alloc; 155 static int pipe_dwrite_enable = 1; /* 0:copy, 1:kmem/sfbuf 2:force */ 156 static int pipe_dwrite_sfbuf = 1; /* 0:kmem_map 1:sfbufs 2:sfbufs_dmap */ 157 /* 3:sfbuf_dmap w/ forced invlpg */ 158 159 SYSCTL_NODE(_kern, OID_AUTO, pipe, CTLFLAG_RW, 0, "Pipe operation"); 160 SYSCTL_INT(_kern_pipe, OID_AUTO, nbig, 161 CTLFLAG_RD, &pipe_nbig, 0, "numer of big pipes allocated"); 162 SYSCTL_INT(_kern_pipe, OID_AUTO, maxcache, 163 CTLFLAG_RW, &pipe_maxcache, 0, "max pipes cached per-cpu"); 164 SYSCTL_INT(_kern_pipe, OID_AUTO, maxbig, 165 CTLFLAG_RW, &pipe_maxbig, 0, "max number of big pipes"); 166 SYSCTL_INT(_kern_pipe, OID_AUTO, dwrite_enable, 167 CTLFLAG_RW, &pipe_dwrite_enable, 0, "1:enable/2:force direct writes"); 168 SYSCTL_INT(_kern_pipe, OID_AUTO, dwrite_sfbuf, 169 CTLFLAG_RW, &pipe_dwrite_sfbuf, 0, 170 "(if dwrite_enable) 0:kmem 1:sfbuf 2:sfbuf_dmap 3:sfbuf_dmap_forceinvlpg"); 171 #if !defined(NO_PIPE_SYSCTL_STATS) 172 SYSCTL_INT(_kern_pipe, OID_AUTO, bcache_alloc, 173 CTLFLAG_RW, &pipe_bcache_alloc, 0, "pipe buffer from pcpu cache"); 174 SYSCTL_INT(_kern_pipe, OID_AUTO, bkmem_alloc, 175 CTLFLAG_RW, &pipe_bkmem_alloc, 0, "pipe buffer from kmem"); 176 #endif 177 178 static void pipeclose (struct pipe *cpipe); 179 static void pipe_free_kmem (struct pipe *cpipe); 180 static int pipe_create (struct pipe **cpipep); 181 static __inline int pipelock (struct pipe *cpipe, int catch); 182 static __inline void pipeunlock (struct pipe *cpipe); 183 static __inline void pipeselwakeup (struct pipe *cpipe); 184 #ifndef PIPE_NODIRECT 185 static int pipe_build_write_buffer (struct pipe *wpipe, struct uio *uio); 186 static int pipe_direct_write (struct pipe *wpipe, struct uio *uio); 187 static void pipe_clone_write_buffer (struct pipe *wpipe); 188 #endif 189 static int pipespace (struct pipe *cpipe, int size); 190 191 /* 192 * The pipe system call for the DTYPE_PIPE type of pipes 193 * 194 * pipe_ARgs(int dummy) 195 */ 196 197 /* ARGSUSED */ 198 int 199 pipe(struct pipe_args *uap) 200 { 201 struct thread *td = curthread; 202 struct proc *p = td->td_proc; 203 struct filedesc *fdp; 204 struct file *rf, *wf; 205 struct pipe *rpipe, *wpipe; 206 int fd1, fd2, error; 207 208 KKASSERT(p); 209 fdp = p->p_fd; 210 211 rpipe = wpipe = NULL; 212 if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 213 pipeclose(rpipe); 214 pipeclose(wpipe); 215 return (ENFILE); 216 } 217 218 rpipe->pipe_state |= PIPE_DIRECTOK; 219 wpipe->pipe_state |= PIPE_DIRECTOK; 220 221 /* 222 * Select the direct-map features to use for this pipe. Since the 223 * sysctl's can change on the fly we record the settings when the 224 * pipe is created. 225 * 226 * Generally speaking the system will default to what we consider 227 * to be the best-balanced and most stable option. Right now this 228 * is SFBUF1. Modes 2 and 3 are considered experiemental at the 229 * moment. 230 */ 231 wpipe->pipe_feature = PIPE_COPY; 232 if (pipe_dwrite_enable) { 233 switch(pipe_dwrite_sfbuf) { 234 case 0: 235 wpipe->pipe_feature = PIPE_KMEM; 236 break; 237 case 1: 238 wpipe->pipe_feature = PIPE_SFBUF1; 239 break; 240 case 2: 241 case 3: 242 wpipe->pipe_feature = PIPE_SFBUF2; 243 break; 244 } 245 } 246 rpipe->pipe_feature = wpipe->pipe_feature; 247 248 error = falloc(p, &rf, &fd1); 249 if (error) { 250 pipeclose(rpipe); 251 pipeclose(wpipe); 252 return (error); 253 } 254 uap->sysmsg_fds[0] = fd1; 255 256 /* 257 * Warning: once we've gotten past allocation of the fd for the 258 * read-side, we can only drop the read side via fdrop() in order 259 * to avoid races against processes which manage to dup() the read 260 * side while we are blocked trying to allocate the write side. 261 */ 262 rf->f_flag = FREAD | FWRITE; 263 rf->f_type = DTYPE_PIPE; 264 rf->f_data = (caddr_t)rpipe; 265 rf->f_ops = &pipeops; 266 error = falloc(p, &wf, &fd2); 267 if (error) { 268 if (fdp->fd_files[fd1].fp == rf) { 269 funsetfd(fdp, fd1); 270 fdrop(rf, td); 271 } 272 fdrop(rf, td); 273 /* rpipe has been closed by fdrop(). */ 274 pipeclose(wpipe); 275 return (error); 276 } 277 wf->f_flag = FREAD | FWRITE; 278 wf->f_type = DTYPE_PIPE; 279 wf->f_data = (caddr_t)wpipe; 280 wf->f_ops = &pipeops; 281 uap->sysmsg_fds[1] = fd2; 282 283 rpipe->pipe_peer = wpipe; 284 wpipe->pipe_peer = rpipe; 285 fdrop(rf, td); 286 fdrop(wf, td); 287 288 return (0); 289 } 290 291 /* 292 * Allocate kva for pipe circular buffer, the space is pageable 293 * This routine will 'realloc' the size of a pipe safely, if it fails 294 * it will retain the old buffer. 295 * If it fails it will return ENOMEM. 296 */ 297 static int 298 pipespace(struct pipe *cpipe, int size) 299 { 300 struct vm_object *object; 301 caddr_t buffer; 302 int npages, error; 303 304 npages = round_page(size) / PAGE_SIZE; 305 object = cpipe->pipe_buffer.object; 306 307 /* 308 * [re]create the object if necessary and reserve space for it 309 * in the kernel_map. The object and memory are pageable. On 310 * success, free the old resources before assigning the new 311 * ones. 312 */ 313 if (object == NULL || object->size != npages) { 314 object = vm_object_allocate(OBJT_DEFAULT, npages); 315 buffer = (caddr_t) vm_map_min(kernel_map); 316 317 error = vm_map_find(kernel_map, object, 0, 318 (vm_offset_t *) &buffer, size, 1, 319 VM_PROT_ALL, VM_PROT_ALL, 0); 320 321 if (error != KERN_SUCCESS) { 322 vm_object_deallocate(object); 323 return (ENOMEM); 324 } 325 pipe_free_kmem(cpipe); 326 cpipe->pipe_buffer.object = object; 327 cpipe->pipe_buffer.buffer = buffer; 328 cpipe->pipe_buffer.size = size; 329 ++pipe_bkmem_alloc; 330 } else { 331 ++pipe_bcache_alloc; 332 } 333 cpipe->pipe_buffer.in = 0; 334 cpipe->pipe_buffer.out = 0; 335 cpipe->pipe_buffer.cnt = 0; 336 return (0); 337 } 338 339 /* 340 * Initialize and allocate VM and memory for pipe, pulling the pipe from 341 * our per-cpu cache if possible. For now make sure it is sized for the 342 * smaller PIPE_SIZE default. 343 */ 344 static int 345 pipe_create(cpipep) 346 struct pipe **cpipep; 347 { 348 globaldata_t gd = mycpu; 349 struct pipe *cpipe; 350 int error; 351 352 if ((cpipe = gd->gd_pipeq) != NULL) { 353 gd->gd_pipeq = cpipe->pipe_peer; 354 --gd->gd_pipeqcount; 355 cpipe->pipe_peer = NULL; 356 } else { 357 cpipe = malloc(sizeof(struct pipe), M_PIPE, M_WAITOK|M_ZERO); 358 } 359 *cpipep = cpipe; 360 if ((error = pipespace(cpipe, PIPE_SIZE)) != 0) 361 return (error); 362 vfs_timestamp(&cpipe->pipe_ctime); 363 cpipe->pipe_atime = cpipe->pipe_ctime; 364 cpipe->pipe_mtime = cpipe->pipe_ctime; 365 return (0); 366 } 367 368 369 /* 370 * lock a pipe for I/O, blocking other access 371 */ 372 static __inline int 373 pipelock(cpipe, catch) 374 struct pipe *cpipe; 375 int catch; 376 { 377 int error; 378 379 while (cpipe->pipe_state & PIPE_LOCK) { 380 cpipe->pipe_state |= PIPE_LWANT; 381 error = tsleep(cpipe, (catch ? PCATCH : 0), "pipelk", 0); 382 if (error != 0) 383 return (error); 384 } 385 cpipe->pipe_state |= PIPE_LOCK; 386 return (0); 387 } 388 389 /* 390 * unlock a pipe I/O lock 391 */ 392 static __inline void 393 pipeunlock(cpipe) 394 struct pipe *cpipe; 395 { 396 397 cpipe->pipe_state &= ~PIPE_LOCK; 398 if (cpipe->pipe_state & PIPE_LWANT) { 399 cpipe->pipe_state &= ~PIPE_LWANT; 400 wakeup(cpipe); 401 } 402 } 403 404 static __inline void 405 pipeselwakeup(cpipe) 406 struct pipe *cpipe; 407 { 408 409 if (cpipe->pipe_state & PIPE_SEL) { 410 cpipe->pipe_state &= ~PIPE_SEL; 411 selwakeup(&cpipe->pipe_sel); 412 } 413 if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 414 pgsigio(cpipe->pipe_sigio, SIGIO, 0); 415 KNOTE(&cpipe->pipe_sel.si_note, 0); 416 } 417 418 /* ARGSUSED */ 419 static int 420 pipe_read(struct file *fp, struct uio *uio, struct ucred *cred, 421 int flags, struct thread *td) 422 { 423 struct pipe *rpipe = (struct pipe *) fp->f_data; 424 int error; 425 int nread = 0; 426 u_int size; 427 428 ++rpipe->pipe_busy; 429 error = pipelock(rpipe, 1); 430 if (error) 431 goto unlocked_error; 432 433 while (uio->uio_resid) { 434 caddr_t va; 435 436 if (rpipe->pipe_buffer.cnt > 0) { 437 /* 438 * normal pipe buffer receive 439 */ 440 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 441 if (size > rpipe->pipe_buffer.cnt) 442 size = rpipe->pipe_buffer.cnt; 443 if (size > (u_int) uio->uio_resid) 444 size = (u_int) uio->uio_resid; 445 446 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 447 size, uio); 448 if (error) 449 break; 450 451 rpipe->pipe_buffer.out += size; 452 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 453 rpipe->pipe_buffer.out = 0; 454 455 rpipe->pipe_buffer.cnt -= size; 456 457 /* 458 * If there is no more to read in the pipe, reset 459 * its pointers to the beginning. This improves 460 * cache hit stats. 461 */ 462 if (rpipe->pipe_buffer.cnt == 0) { 463 rpipe->pipe_buffer.in = 0; 464 rpipe->pipe_buffer.out = 0; 465 } 466 nread += size; 467 #ifndef PIPE_NODIRECT 468 } else if (rpipe->pipe_kva && 469 rpipe->pipe_feature == PIPE_KMEM && 470 (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) 471 == PIPE_DIRECTW 472 ) { 473 /* 474 * Direct copy using source-side kva mapping 475 */ 476 size = rpipe->pipe_map.xio_bytes - 477 rpipe->pipe_buffer.out; 478 if (size > (u_int)uio->uio_resid) 479 size = (u_int)uio->uio_resid; 480 va = (caddr_t)rpipe->pipe_kva + 481 xio_kvaoffset(&rpipe->pipe_map, rpipe->pipe_buffer.out); 482 error = uiomove(va, size, uio); 483 if (error) 484 break; 485 nread += size; 486 rpipe->pipe_buffer.out += size; 487 if (rpipe->pipe_buffer.out == rpipe->pipe_map.xio_bytes) { 488 rpipe->pipe_state |= PIPE_DIRECTIP; 489 rpipe->pipe_state &= ~PIPE_DIRECTW; 490 /* reset out index for copy mode */ 491 rpipe->pipe_buffer.out = 0; 492 wakeup(rpipe); 493 } 494 } else if (rpipe->pipe_buffer.out != rpipe->pipe_map.xio_bytes && 495 rpipe->pipe_kva && 496 rpipe->pipe_feature == PIPE_SFBUF2 && 497 (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) 498 == PIPE_DIRECTW 499 ) { 500 /* 501 * Direct copy, bypassing a kernel buffer. We cannot 502 * mess with the direct-write buffer until 503 * PIPE_DIRECTIP is cleared. In order to prevent 504 * the pipe_write code from racing itself in 505 * direct_write, we set DIRECTIP when we clear 506 * DIRECTW after we have exhausted the buffer. 507 */ 508 if (pipe_dwrite_sfbuf == 3) 509 rpipe->pipe_kvamask = 0; 510 pmap_qenter2(rpipe->pipe_kva, rpipe->pipe_map.xio_pages, 511 rpipe->pipe_map.xio_npages, 512 &rpipe->pipe_kvamask); 513 size = rpipe->pipe_map.xio_bytes - 514 rpipe->pipe_buffer.out; 515 if (size > (u_int)uio->uio_resid) 516 size = (u_int)uio->uio_resid; 517 va = (caddr_t)rpipe->pipe_kva + xio_kvaoffset(&rpipe->pipe_map, rpipe->pipe_buffer.out); 518 error = uiomove(va, size, uio); 519 if (error) 520 break; 521 nread += size; 522 rpipe->pipe_buffer.out += size; 523 if (rpipe->pipe_buffer.out == rpipe->pipe_map.xio_bytes) { 524 rpipe->pipe_state |= PIPE_DIRECTIP; 525 rpipe->pipe_state &= ~PIPE_DIRECTW; 526 /* reset out index for copy mode */ 527 rpipe->pipe_buffer.out = 0; 528 wakeup(rpipe); 529 } 530 } else if (rpipe->pipe_buffer.out != rpipe->pipe_map.xio_bytes && 531 rpipe->pipe_feature == PIPE_SFBUF1 && 532 (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) 533 == PIPE_DIRECTW 534 ) { 535 /* 536 * Direct copy, bypassing a kernel buffer. We cannot 537 * mess with the direct-write buffer until 538 * PIPE_DIRECTIP is cleared. In order to prevent 539 * the pipe_write code from racing itself in 540 * direct_write, we set DIRECTIP when we clear 541 * DIRECTW after we have exhausted the buffer. 542 */ 543 error = xio_uio_copy(&rpipe->pipe_map, rpipe->pipe_buffer.out, uio, &size); 544 if (error) 545 break; 546 nread += size; 547 rpipe->pipe_buffer.out += size; 548 if (rpipe->pipe_buffer.out == rpipe->pipe_map.xio_bytes) { 549 rpipe->pipe_state |= PIPE_DIRECTIP; 550 rpipe->pipe_state &= ~PIPE_DIRECTW; 551 /* reset out index for copy mode */ 552 rpipe->pipe_buffer.out = 0; 553 wakeup(rpipe); 554 } 555 #endif 556 } else { 557 /* 558 * detect EOF condition 559 * read returns 0 on EOF, no need to set error 560 */ 561 if (rpipe->pipe_state & PIPE_EOF) 562 break; 563 564 /* 565 * If the "write-side" has been blocked, wake it up now. 566 */ 567 if (rpipe->pipe_state & PIPE_WANTW) { 568 rpipe->pipe_state &= ~PIPE_WANTW; 569 wakeup(rpipe); 570 } 571 572 /* 573 * Break if some data was read. 574 */ 575 if (nread > 0) 576 break; 577 578 /* 579 * Unlock the pipe buffer for our remaining 580 * processing. We will either break out with an 581 * error or we will sleep and relock to loop. 582 */ 583 pipeunlock(rpipe); 584 585 /* 586 * Handle non-blocking mode operation or 587 * wait for more data. 588 */ 589 if (fp->f_flag & FNONBLOCK) { 590 error = EAGAIN; 591 } else { 592 rpipe->pipe_state |= PIPE_WANTR; 593 if ((error = tsleep(rpipe, PCATCH|PNORESCHED, 594 "piperd", 0)) == 0) { 595 error = pipelock(rpipe, 1); 596 } 597 } 598 if (error) 599 goto unlocked_error; 600 } 601 } 602 pipeunlock(rpipe); 603 604 if (error == 0) 605 vfs_timestamp(&rpipe->pipe_atime); 606 unlocked_error: 607 --rpipe->pipe_busy; 608 609 /* 610 * PIPE_WANT processing only makes sense if pipe_busy is 0. 611 */ 612 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 613 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 614 wakeup(rpipe); 615 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 616 /* 617 * Handle write blocking hysteresis. 618 */ 619 if (rpipe->pipe_state & PIPE_WANTW) { 620 rpipe->pipe_state &= ~PIPE_WANTW; 621 wakeup(rpipe); 622 } 623 } 624 625 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 626 pipeselwakeup(rpipe); 627 return (error); 628 } 629 630 #ifndef PIPE_NODIRECT 631 /* 632 * Map the sending processes' buffer into kernel space and wire it. 633 * This is similar to a physical write operation. 634 */ 635 static int 636 pipe_build_write_buffer(wpipe, uio) 637 struct pipe *wpipe; 638 struct uio *uio; 639 { 640 int error; 641 u_int size; 642 643 size = (u_int) uio->uio_iov->iov_len; 644 if (size > wpipe->pipe_buffer.size) 645 size = wpipe->pipe_buffer.size; 646 647 if (uio->uio_segflg == UIO_SYSSPACE) { 648 error = xio_init_kbuf(&wpipe->pipe_map, uio->uio_iov->iov_base, 649 size); 650 } else { 651 error = xio_init_ubuf(&wpipe->pipe_map, uio->uio_iov->iov_base, 652 size, XIOF_READ); 653 } 654 wpipe->pipe_buffer.out = 0; 655 if (error) 656 return(error); 657 658 /* 659 * Create a kernel map for KMEM and SFBUF2 copy modes. SFBUF2 will 660 * map the pages on the target while KMEM maps the pages now. 661 */ 662 switch(wpipe->pipe_feature) { 663 case PIPE_KMEM: 664 case PIPE_SFBUF2: 665 if (wpipe->pipe_kva == NULL) { 666 wpipe->pipe_kva = 667 kmem_alloc_nofault(kernel_map, XIO_INTERNAL_SIZE); 668 wpipe->pipe_kvamask = 0; 669 } 670 if (wpipe->pipe_feature == PIPE_KMEM) { 671 pmap_qenter(wpipe->pipe_kva, wpipe->pipe_map.xio_pages, 672 wpipe->pipe_map.xio_npages); 673 } 674 break; 675 default: 676 break; 677 } 678 679 /* 680 * And update the uio data. The XIO might have loaded fewer bytes 681 * then requested so reload 'size'. 682 */ 683 size = wpipe->pipe_map.xio_bytes; 684 uio->uio_iov->iov_len -= size; 685 uio->uio_iov->iov_base += size; 686 if (uio->uio_iov->iov_len == 0) 687 uio->uio_iov++; 688 uio->uio_resid -= size; 689 uio->uio_offset += size; 690 return (0); 691 } 692 693 /* 694 * In the case of a signal, the writing process might go away. This 695 * code copies the data into the circular buffer so that the source 696 * pages can be freed without loss of data. 697 * 698 * Note that in direct mode pipe_buffer.out is used to track the 699 * XIO offset. We are converting the direct mode into buffered mode 700 * which changes the meaning of pipe_buffer.out. 701 */ 702 static void 703 pipe_clone_write_buffer(wpipe) 704 struct pipe *wpipe; 705 { 706 int size; 707 int offset; 708 709 offset = wpipe->pipe_buffer.out; 710 size = wpipe->pipe_map.xio_bytes - offset; 711 712 KKASSERT(size <= wpipe->pipe_buffer.size); 713 714 wpipe->pipe_buffer.in = size; 715 wpipe->pipe_buffer.out = 0; 716 wpipe->pipe_buffer.cnt = size; 717 wpipe->pipe_state &= ~(PIPE_DIRECTW | PIPE_DIRECTIP); 718 719 xio_copy_xtok(&wpipe->pipe_map, offset, wpipe->pipe_buffer.buffer, size); 720 xio_release(&wpipe->pipe_map); 721 if (wpipe->pipe_kva) { 722 pmap_qremove(wpipe->pipe_kva, XIO_INTERNAL_PAGES); 723 kmem_free(kernel_map, wpipe->pipe_kva, XIO_INTERNAL_SIZE); 724 wpipe->pipe_kva = NULL; 725 } 726 } 727 728 /* 729 * This implements the pipe buffer write mechanism. Note that only 730 * a direct write OR a normal pipe write can be pending at any given time. 731 * If there are any characters in the pipe buffer, the direct write will 732 * be deferred until the receiving process grabs all of the bytes from 733 * the pipe buffer. Then the direct mapping write is set-up. 734 */ 735 static int 736 pipe_direct_write(wpipe, uio) 737 struct pipe *wpipe; 738 struct uio *uio; 739 { 740 int error; 741 742 retry: 743 while (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) { 744 if (wpipe->pipe_state & PIPE_WANTR) { 745 wpipe->pipe_state &= ~PIPE_WANTR; 746 wakeup(wpipe); 747 } 748 wpipe->pipe_state |= PIPE_WANTW; 749 error = tsleep(wpipe, PCATCH, "pipdww", 0); 750 if (error) 751 goto error2; 752 if (wpipe->pipe_state & PIPE_EOF) { 753 error = EPIPE; 754 goto error2; 755 } 756 } 757 KKASSERT(wpipe->pipe_map.xio_bytes == 0); 758 if (wpipe->pipe_buffer.cnt > 0) { 759 if (wpipe->pipe_state & PIPE_WANTR) { 760 wpipe->pipe_state &= ~PIPE_WANTR; 761 wakeup(wpipe); 762 } 763 764 wpipe->pipe_state |= PIPE_WANTW; 765 error = tsleep(wpipe, PCATCH, "pipdwc", 0); 766 if (error) 767 goto error2; 768 if (wpipe->pipe_state & PIPE_EOF) { 769 error = EPIPE; 770 goto error2; 771 } 772 goto retry; 773 } 774 775 /* 776 * Build our direct-write buffer 777 */ 778 wpipe->pipe_state |= PIPE_DIRECTW | PIPE_DIRECTIP; 779 error = pipe_build_write_buffer(wpipe, uio); 780 if (error) 781 goto error1; 782 wpipe->pipe_state &= ~PIPE_DIRECTIP; 783 784 /* 785 * Wait until the receiver has snarfed the data. Since we are likely 786 * going to sleep we optimize the case and yield synchronously, 787 * possibly avoiding the tsleep(). 788 */ 789 error = 0; 790 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 791 if (wpipe->pipe_state & PIPE_EOF) { 792 pipelock(wpipe, 0); 793 xio_release(&wpipe->pipe_map); 794 if (wpipe->pipe_kva) { 795 pmap_qremove(wpipe->pipe_kva, XIO_INTERNAL_PAGES); 796 kmem_free(kernel_map, wpipe->pipe_kva, XIO_INTERNAL_SIZE); 797 wpipe->pipe_kva = NULL; 798 } 799 pipeunlock(wpipe); 800 pipeselwakeup(wpipe); 801 error = EPIPE; 802 goto error1; 803 } 804 if (wpipe->pipe_state & PIPE_WANTR) { 805 wpipe->pipe_state &= ~PIPE_WANTR; 806 wakeup(wpipe); 807 } 808 pipeselwakeup(wpipe); 809 error = tsleep(wpipe, PCATCH|PNORESCHED, "pipdwt", 0); 810 } 811 pipelock(wpipe,0); 812 if (wpipe->pipe_state & PIPE_DIRECTW) { 813 /* 814 * this bit of trickery substitutes a kernel buffer for 815 * the process that might be going away. 816 */ 817 pipe_clone_write_buffer(wpipe); 818 KKASSERT((wpipe->pipe_state & PIPE_DIRECTIP) == 0); 819 } else { 820 /* 821 * note: The pipe_kva mapping is not qremove'd here. For 822 * legacy PIPE_KMEM mode this constitutes an improvement 823 * over the original FreeBSD-4 algorithm. For PIPE_SFBUF2 824 * mode the kva mapping must not be removed to get the 825 * caching benefit. 826 * 827 * For testing purposes we will give the original algorithm 828 * the benefit of the doubt 'what it could have been', and 829 * keep the optimization. 830 */ 831 KKASSERT(wpipe->pipe_state & PIPE_DIRECTIP); 832 xio_release(&wpipe->pipe_map); 833 wpipe->pipe_state &= ~PIPE_DIRECTIP; 834 } 835 pipeunlock(wpipe); 836 return (error); 837 838 /* 839 * Direct-write error, clear the direct write flags. 840 */ 841 error1: 842 wpipe->pipe_state &= ~(PIPE_DIRECTW | PIPE_DIRECTIP); 843 /* fallthrough */ 844 845 /* 846 * General error, wakeup the other side if it happens to be sleeping. 847 */ 848 error2: 849 wakeup(wpipe); 850 return (error); 851 } 852 #endif 853 854 static int 855 pipe_write(struct file *fp, struct uio *uio, struct ucred *cred, 856 int flags, struct thread *td) 857 { 858 int error = 0; 859 int orig_resid; 860 struct pipe *wpipe, *rpipe; 861 862 rpipe = (struct pipe *) fp->f_data; 863 wpipe = rpipe->pipe_peer; 864 865 /* 866 * detect loss of pipe read side, issue SIGPIPE if lost. 867 */ 868 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 869 return (EPIPE); 870 } 871 ++wpipe->pipe_busy; 872 873 /* 874 * If it is advantageous to resize the pipe buffer, do 875 * so. 876 */ 877 if ((uio->uio_resid > PIPE_SIZE) && 878 (pipe_nbig < pipe_maxbig) && 879 (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) == 0 && 880 (wpipe->pipe_buffer.size <= PIPE_SIZE) && 881 (wpipe->pipe_buffer.cnt == 0)) { 882 883 if ((error = pipelock(wpipe,1)) == 0) { 884 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 885 pipe_nbig++; 886 pipeunlock(wpipe); 887 } 888 } 889 890 /* 891 * If an early error occured unbusy and return, waking up any pending 892 * readers. 893 */ 894 if (error) { 895 --wpipe->pipe_busy; 896 if ((wpipe->pipe_busy == 0) && 897 (wpipe->pipe_state & PIPE_WANT)) { 898 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 899 wakeup(wpipe); 900 } 901 return(error); 902 } 903 904 KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); 905 906 orig_resid = uio->uio_resid; 907 908 while (uio->uio_resid) { 909 int space; 910 911 #ifndef PIPE_NODIRECT 912 /* 913 * If the transfer is large, we can gain performance if 914 * we do process-to-process copies directly. 915 * If the write is non-blocking, we don't use the 916 * direct write mechanism. 917 * 918 * The direct write mechanism will detect the reader going 919 * away on us. 920 */ 921 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT || 922 pipe_dwrite_enable > 1) && 923 (fp->f_flag & FNONBLOCK) == 0 && 924 pipe_dwrite_enable) { 925 error = pipe_direct_write( wpipe, uio); 926 if (error) 927 break; 928 continue; 929 } 930 #endif 931 932 /* 933 * Pipe buffered writes cannot be coincidental with 934 * direct writes. We wait until the currently executing 935 * direct write is completed before we start filling the 936 * pipe buffer. We break out if a signal occurs or the 937 * reader goes away. 938 */ 939 retrywrite: 940 while (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) { 941 if (wpipe->pipe_state & PIPE_WANTR) { 942 wpipe->pipe_state &= ~PIPE_WANTR; 943 wakeup(wpipe); 944 } 945 error = tsleep(wpipe, PCATCH, "pipbww", 0); 946 if (wpipe->pipe_state & PIPE_EOF) 947 break; 948 if (error) 949 break; 950 } 951 if (wpipe->pipe_state & PIPE_EOF) { 952 error = EPIPE; 953 break; 954 } 955 956 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 957 958 /* Writes of size <= PIPE_BUF must be atomic. */ 959 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 960 space = 0; 961 962 /* 963 * Write to fill, read size handles write hysteresis. Also 964 * additional restrictions can cause select-based non-blocking 965 * writes to spin. 966 */ 967 if (space > 0) { 968 if ((error = pipelock(wpipe,1)) == 0) { 969 int size; /* Transfer size */ 970 int segsize; /* first segment to transfer */ 971 972 /* 973 * It is possible for a direct write to 974 * slip in on us... handle it here... 975 */ 976 if (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) { 977 pipeunlock(wpipe); 978 goto retrywrite; 979 } 980 /* 981 * If a process blocked in uiomove, our 982 * value for space might be bad. 983 * 984 * XXX will we be ok if the reader has gone 985 * away here? 986 */ 987 if (space > wpipe->pipe_buffer.size - 988 wpipe->pipe_buffer.cnt) { 989 pipeunlock(wpipe); 990 goto retrywrite; 991 } 992 993 /* 994 * Transfer size is minimum of uio transfer 995 * and free space in pipe buffer. 996 */ 997 if (space > uio->uio_resid) 998 size = uio->uio_resid; 999 else 1000 size = space; 1001 /* 1002 * First segment to transfer is minimum of 1003 * transfer size and contiguous space in 1004 * pipe buffer. If first segment to transfer 1005 * is less than the transfer size, we've got 1006 * a wraparound in the buffer. 1007 */ 1008 segsize = wpipe->pipe_buffer.size - 1009 wpipe->pipe_buffer.in; 1010 if (segsize > size) 1011 segsize = size; 1012 1013 /* Transfer first segment */ 1014 1015 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1016 segsize, uio); 1017 1018 if (error == 0 && segsize < size) { 1019 /* 1020 * Transfer remaining part now, to 1021 * support atomic writes. Wraparound 1022 * happened. 1023 */ 1024 if (wpipe->pipe_buffer.in + segsize != 1025 wpipe->pipe_buffer.size) 1026 panic("Expected pipe buffer wraparound disappeared"); 1027 1028 error = uiomove(&wpipe->pipe_buffer.buffer[0], 1029 size - segsize, uio); 1030 } 1031 if (error == 0) { 1032 wpipe->pipe_buffer.in += size; 1033 if (wpipe->pipe_buffer.in >= 1034 wpipe->pipe_buffer.size) { 1035 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 1036 panic("Expected wraparound bad"); 1037 wpipe->pipe_buffer.in = size - segsize; 1038 } 1039 1040 wpipe->pipe_buffer.cnt += size; 1041 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 1042 panic("Pipe buffer overflow"); 1043 1044 } 1045 pipeunlock(wpipe); 1046 } 1047 if (error) 1048 break; 1049 1050 } else { 1051 /* 1052 * If the "read-side" has been blocked, wake it up now 1053 * and yield to let it drain synchronously rather 1054 * then block. 1055 */ 1056 if (wpipe->pipe_state & PIPE_WANTR) { 1057 wpipe->pipe_state &= ~PIPE_WANTR; 1058 wakeup(wpipe); 1059 } 1060 1061 /* 1062 * don't block on non-blocking I/O 1063 */ 1064 if (fp->f_flag & FNONBLOCK) { 1065 error = EAGAIN; 1066 break; 1067 } 1068 1069 /* 1070 * We have no more space and have something to offer, 1071 * wake up select/poll. 1072 */ 1073 pipeselwakeup(wpipe); 1074 1075 wpipe->pipe_state |= PIPE_WANTW; 1076 error = tsleep(wpipe, PCATCH|PNORESCHED, "pipewr", 0); 1077 if (error != 0) 1078 break; 1079 /* 1080 * If read side wants to go away, we just issue a signal 1081 * to ourselves. 1082 */ 1083 if (wpipe->pipe_state & PIPE_EOF) { 1084 error = EPIPE; 1085 break; 1086 } 1087 } 1088 } 1089 1090 --wpipe->pipe_busy; 1091 1092 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 1093 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 1094 wakeup(wpipe); 1095 } else if (wpipe->pipe_buffer.cnt > 0) { 1096 /* 1097 * If we have put any characters in the buffer, we wake up 1098 * the reader. 1099 */ 1100 if (wpipe->pipe_state & PIPE_WANTR) { 1101 wpipe->pipe_state &= ~PIPE_WANTR; 1102 wakeup(wpipe); 1103 } 1104 } 1105 1106 /* 1107 * Don't return EPIPE if I/O was successful 1108 */ 1109 if ((wpipe->pipe_buffer.cnt == 0) && 1110 (uio->uio_resid == 0) && 1111 (error == EPIPE)) { 1112 error = 0; 1113 } 1114 1115 if (error == 0) 1116 vfs_timestamp(&wpipe->pipe_mtime); 1117 1118 /* 1119 * We have something to offer, 1120 * wake up select/poll. 1121 */ 1122 if (wpipe->pipe_buffer.cnt) 1123 pipeselwakeup(wpipe); 1124 1125 return (error); 1126 } 1127 1128 /* 1129 * we implement a very minimal set of ioctls for compatibility with sockets. 1130 */ 1131 int 1132 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct thread *td) 1133 { 1134 struct pipe *mpipe = (struct pipe *)fp->f_data; 1135 1136 switch (cmd) { 1137 1138 case FIONBIO: 1139 return (0); 1140 1141 case FIOASYNC: 1142 if (*(int *)data) { 1143 mpipe->pipe_state |= PIPE_ASYNC; 1144 } else { 1145 mpipe->pipe_state &= ~PIPE_ASYNC; 1146 } 1147 return (0); 1148 1149 case FIONREAD: 1150 if (mpipe->pipe_state & PIPE_DIRECTW) { 1151 *(int *)data = mpipe->pipe_map.xio_bytes - 1152 mpipe->pipe_buffer.out; 1153 } else { 1154 *(int *)data = mpipe->pipe_buffer.cnt; 1155 } 1156 return (0); 1157 1158 case FIOSETOWN: 1159 return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 1160 1161 case FIOGETOWN: 1162 *(int *)data = fgetown(mpipe->pipe_sigio); 1163 return (0); 1164 1165 /* This is deprecated, FIOSETOWN should be used instead. */ 1166 case TIOCSPGRP: 1167 return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 1168 1169 /* This is deprecated, FIOGETOWN should be used instead. */ 1170 case TIOCGPGRP: 1171 *(int *)data = -fgetown(mpipe->pipe_sigio); 1172 return (0); 1173 1174 } 1175 return (ENOTTY); 1176 } 1177 1178 int 1179 pipe_poll(struct file *fp, int events, struct ucred *cred, struct thread *td) 1180 { 1181 struct pipe *rpipe = (struct pipe *)fp->f_data; 1182 struct pipe *wpipe; 1183 int revents = 0; 1184 1185 wpipe = rpipe->pipe_peer; 1186 if (events & (POLLIN | POLLRDNORM)) 1187 if ((rpipe->pipe_state & PIPE_DIRECTW) || 1188 (rpipe->pipe_buffer.cnt > 0) || 1189 (rpipe->pipe_state & PIPE_EOF)) 1190 revents |= events & (POLLIN | POLLRDNORM); 1191 1192 if (events & (POLLOUT | POLLWRNORM)) 1193 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 1194 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1195 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1196 revents |= events & (POLLOUT | POLLWRNORM); 1197 1198 if ((rpipe->pipe_state & PIPE_EOF) || 1199 (wpipe == NULL) || 1200 (wpipe->pipe_state & PIPE_EOF)) 1201 revents |= POLLHUP; 1202 1203 if (revents == 0) { 1204 if (events & (POLLIN | POLLRDNORM)) { 1205 selrecord(td, &rpipe->pipe_sel); 1206 rpipe->pipe_state |= PIPE_SEL; 1207 } 1208 1209 if (events & (POLLOUT | POLLWRNORM)) { 1210 selrecord(td, &wpipe->pipe_sel); 1211 wpipe->pipe_state |= PIPE_SEL; 1212 } 1213 } 1214 1215 return (revents); 1216 } 1217 1218 static int 1219 pipe_stat(struct file *fp, struct stat *ub, struct thread *td) 1220 { 1221 struct pipe *pipe = (struct pipe *)fp->f_data; 1222 1223 bzero((caddr_t)ub, sizeof(*ub)); 1224 ub->st_mode = S_IFIFO; 1225 ub->st_blksize = pipe->pipe_buffer.size; 1226 ub->st_size = pipe->pipe_buffer.cnt; 1227 if (ub->st_size == 0 && (pipe->pipe_state & PIPE_DIRECTW)) { 1228 ub->st_size = pipe->pipe_map.xio_bytes - 1229 pipe->pipe_buffer.out; 1230 } 1231 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1232 ub->st_atimespec = pipe->pipe_atime; 1233 ub->st_mtimespec = pipe->pipe_mtime; 1234 ub->st_ctimespec = pipe->pipe_ctime; 1235 /* 1236 * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev, 1237 * st_flags, st_gen. 1238 * XXX (st_dev, st_ino) should be unique. 1239 */ 1240 return (0); 1241 } 1242 1243 /* ARGSUSED */ 1244 static int 1245 pipe_close(struct file *fp, struct thread *td) 1246 { 1247 struct pipe *cpipe = (struct pipe *)fp->f_data; 1248 1249 fp->f_ops = &badfileops; 1250 fp->f_data = NULL; 1251 funsetown(cpipe->pipe_sigio); 1252 pipeclose(cpipe); 1253 return (0); 1254 } 1255 1256 static void 1257 pipe_free_kmem(struct pipe *cpipe) 1258 { 1259 if (cpipe->pipe_buffer.buffer != NULL) { 1260 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1261 --pipe_nbig; 1262 kmem_free(kernel_map, 1263 (vm_offset_t)cpipe->pipe_buffer.buffer, 1264 cpipe->pipe_buffer.size); 1265 cpipe->pipe_buffer.buffer = NULL; 1266 cpipe->pipe_buffer.object = NULL; 1267 } 1268 #ifndef PIPE_NODIRECT 1269 KKASSERT(cpipe->pipe_map.xio_bytes == 0 && 1270 cpipe->pipe_map.xio_offset == 0 && 1271 cpipe->pipe_map.xio_npages == 0); 1272 #endif 1273 } 1274 1275 /* 1276 * shutdown the pipe 1277 */ 1278 static void 1279 pipeclose(struct pipe *cpipe) 1280 { 1281 globaldata_t gd; 1282 struct pipe *ppipe; 1283 1284 if (cpipe == NULL) 1285 return; 1286 1287 pipeselwakeup(cpipe); 1288 1289 /* 1290 * If the other side is blocked, wake it up saying that 1291 * we want to close it down. 1292 */ 1293 while (cpipe->pipe_busy) { 1294 wakeup(cpipe); 1295 cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 1296 tsleep(cpipe, 0, "pipecl", 0); 1297 } 1298 1299 /* 1300 * Disconnect from peer 1301 */ 1302 if ((ppipe = cpipe->pipe_peer) != NULL) { 1303 pipeselwakeup(ppipe); 1304 1305 ppipe->pipe_state |= PIPE_EOF; 1306 wakeup(ppipe); 1307 KNOTE(&ppipe->pipe_sel.si_note, 0); 1308 ppipe->pipe_peer = NULL; 1309 } 1310 1311 if (cpipe->pipe_kva) { 1312 pmap_qremove(cpipe->pipe_kva, XIO_INTERNAL_PAGES); 1313 kmem_free(kernel_map, cpipe->pipe_kva, XIO_INTERNAL_SIZE); 1314 cpipe->pipe_kva = NULL; 1315 } 1316 1317 /* 1318 * free or cache resources 1319 */ 1320 gd = mycpu; 1321 if (gd->gd_pipeqcount >= pipe_maxcache || 1322 cpipe->pipe_buffer.size != PIPE_SIZE 1323 ) { 1324 pipe_free_kmem(cpipe); 1325 free(cpipe, M_PIPE); 1326 } else { 1327 KKASSERT(cpipe->pipe_map.xio_npages == 0 && 1328 cpipe->pipe_map.xio_bytes == 0 && 1329 cpipe->pipe_map.xio_offset == 0); 1330 cpipe->pipe_state = 0; 1331 cpipe->pipe_busy = 0; 1332 cpipe->pipe_peer = gd->gd_pipeq; 1333 gd->gd_pipeq = cpipe; 1334 ++gd->gd_pipeqcount; 1335 } 1336 } 1337 1338 /*ARGSUSED*/ 1339 static int 1340 pipe_kqfilter(struct file *fp, struct knote *kn) 1341 { 1342 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1343 1344 switch (kn->kn_filter) { 1345 case EVFILT_READ: 1346 kn->kn_fop = &pipe_rfiltops; 1347 break; 1348 case EVFILT_WRITE: 1349 kn->kn_fop = &pipe_wfiltops; 1350 cpipe = cpipe->pipe_peer; 1351 if (cpipe == NULL) 1352 /* other end of pipe has been closed */ 1353 return (EPIPE); 1354 break; 1355 default: 1356 return (1); 1357 } 1358 kn->kn_hook = (caddr_t)cpipe; 1359 1360 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 1361 return (0); 1362 } 1363 1364 static void 1365 filt_pipedetach(struct knote *kn) 1366 { 1367 struct pipe *cpipe = (struct pipe *)kn->kn_hook; 1368 1369 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 1370 } 1371 1372 /*ARGSUSED*/ 1373 static int 1374 filt_piperead(struct knote *kn, long hint) 1375 { 1376 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1377 struct pipe *wpipe = rpipe->pipe_peer; 1378 1379 kn->kn_data = rpipe->pipe_buffer.cnt; 1380 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) { 1381 kn->kn_data = rpipe->pipe_map.xio_bytes - 1382 rpipe->pipe_buffer.out; 1383 } 1384 1385 if ((rpipe->pipe_state & PIPE_EOF) || 1386 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1387 kn->kn_flags |= EV_EOF; 1388 return (1); 1389 } 1390 return (kn->kn_data > 0); 1391 } 1392 1393 /*ARGSUSED*/ 1394 static int 1395 filt_pipewrite(struct knote *kn, long hint) 1396 { 1397 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1398 struct pipe *wpipe = rpipe->pipe_peer; 1399 1400 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1401 kn->kn_data = 0; 1402 kn->kn_flags |= EV_EOF; 1403 return (1); 1404 } 1405 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1406 if (wpipe->pipe_state & PIPE_DIRECTW) 1407 kn->kn_data = 0; 1408 1409 return (kn->kn_data >= PIPE_BUF); 1410 } 1411