1 /* $NetBSD: sys_pipe.c,v 1.16 2001/10/08 07:50:17 mycroft Exp $ */ 2 3 /* 4 * Copyright (c) 1996 John S. Dyson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Absolutely no warranty of function or purpose is made by the author 17 * John S. Dyson. 18 * 4. Modifications may be freely made to this file if the above conditions 19 * are met. 20 * 21 * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.82 2001/06/15 20:45:01 jlemon Exp $ 22 */ 23 24 /* 25 * This file contains a high-performance replacement for the socket-based 26 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 27 * all features of sockets, but does do everything that pipes normally 28 * do. 29 * 30 * Adaption for NetBSD UVM, including uvm_loan() based direct write, was 31 * written by Jaromir Dolecek. 32 */ 33 34 /* 35 * This code has two modes of operation, a small write mode and a large 36 * write mode. The small write mode acts like conventional pipes with 37 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 38 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 39 * and PIPE_SIZE in size, it is fully mapped into the kernel (on FreeBSD, 40 * those pages are also wired), and the receiving process can copy it directly 41 * from the pages in the sending process. 42 * 43 * If the sending process receives a signal, it is possible that it will 44 * go away, and certainly its address space can change, because control 45 * is returned back to the user-mode side. In that case, the pipe code 46 * arranges to copy the buffer supplied by the user process on FreeBSD, to 47 * a pageable kernel buffer, and the receiving process will grab the data 48 * from the pageable kernel buffer. Since signals don't happen all that often, 49 * the copy operation is normally eliminated. 50 * For NetBSD, the pages are mapped read-only, COW for kernel by uvm_loan(), 51 * so no explicit handling need to be done, all is handled by standard VM 52 * facilities. 53 * 54 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 55 * happen for small transfers so that the system will not spend all of 56 * its time context switching. PIPE_SIZE is constrained by the 57 * amount of kernel virtual memory. 58 */ 59 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/proc.h> 63 #include <sys/fcntl.h> 64 #include <sys/file.h> 65 #include <sys/filedesc.h> 66 #include <sys/filio.h> 67 #include <sys/ttycom.h> 68 #include <sys/stat.h> 69 #include <sys/poll.h> 70 #include <sys/signalvar.h> 71 #include <sys/vnode.h> 72 #include <sys/uio.h> 73 #include <sys/lock.h> 74 #ifdef __FreeBSD__ 75 #include <sys/mutex.h> 76 #include <sys/selinfo.h> 77 #include <sys/sysproto.h> 78 #elif defined(__NetBSD__) 79 #include <sys/select.h> 80 #include <sys/malloc.h> 81 #include <sys/mount.h> 82 #include <sys/syscallargs.h> 83 #include <uvm/uvm.h> 84 #include <sys/sysctl.h> 85 #endif /* NetBSD, FreeBSD */ 86 87 #include <sys/pipe.h> 88 89 #ifdef __NetBSD__ 90 #define vfs_timestamp(tv) microtime(tv) 91 #endif 92 93 /* 94 * Use this define if you want to disable *fancy* VM things. Expect an 95 * approx 30% decrease in transfer rate. This could be useful for 96 * OpenBSD. 97 */ 98 /* #define PIPE_NODIRECT */ 99 100 /* 101 * interfaces to the outside world 102 */ 103 #ifdef __FreeBSD__ 104 static int pipe_read __P((struct file *fp, struct uio *uio, 105 struct ucred *cred, int flags, struct proc *p)); 106 static int pipe_write __P((struct file *fp, struct uio *uio, 107 struct ucred *cred, int flags, struct proc *p)); 108 static int pipe_close __P((struct file *fp, struct proc *p)); 109 static int pipe_poll __P((struct file *fp, int events, struct ucred *cred, 110 struct proc *p)); 111 static int pipe_kqfilter __P((struct file *fp, struct knote *kn)); 112 static int pipe_stat __P((struct file *fp, struct stat *sb, struct proc *p)); 113 static int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); 114 115 static struct fileops pipeops = { 116 pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter, 117 pipe_stat, pipe_close 118 }; 119 120 static void filt_pipedetach(struct knote *kn); 121 static int filt_piperead(struct knote *kn, long hint); 122 static int filt_pipewrite(struct knote *kn, long hint); 123 124 static struct filterops pipe_rfiltops = 125 { 1, NULL, filt_pipedetach, filt_piperead }; 126 static struct filterops pipe_wfiltops = 127 { 1, NULL, filt_pipedetach, filt_pipewrite }; 128 #endif /* FreeBSD */ 129 130 #ifdef __NetBSD__ 131 static int pipe_read __P((struct file *fp, off_t *offset, struct uio *uio, 132 struct ucred *cred, int flags)); 133 static int pipe_write __P((struct file *fp, off_t *offset, struct uio *uio, 134 struct ucred *cred, int flags)); 135 static int pipe_close __P((struct file *fp, struct proc *p)); 136 static int pipe_poll __P((struct file *fp, int events, struct proc *p)); 137 static int pipe_fcntl __P((struct file *fp, u_int com, caddr_t data, 138 struct proc *p)); 139 static int pipe_stat __P((struct file *fp, struct stat *sb, struct proc *p)); 140 static int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); 141 142 static struct fileops pipeops = 143 { pipe_read, pipe_write, pipe_ioctl, pipe_fcntl, pipe_poll, 144 pipe_stat, pipe_close }; 145 #endif /* NetBSD */ 146 147 /* 148 * Default pipe buffer size(s), this can be kind-of large now because pipe 149 * space is pageable. The pipe code will try to maintain locality of 150 * reference for performance reasons, so small amounts of outstanding I/O 151 * will not wipe the cache. 152 */ 153 #define MINPIPESIZE (PIPE_SIZE/3) 154 #define MAXPIPESIZE (2*PIPE_SIZE/3) 155 156 /* 157 * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 158 * is there so that on large systems, we don't exhaust it. 159 */ 160 #define MAXPIPEKVA (8*1024*1024) 161 static int maxpipekva = MAXPIPEKVA; 162 163 /* 164 * Limit for direct transfers, we cannot, of course limit 165 * the amount of kva for pipes in general though. 166 */ 167 #define LIMITPIPEKVA (16*1024*1024) 168 static int limitpipekva = LIMITPIPEKVA; 169 170 /* 171 * Limit the number of "big" pipes 172 */ 173 #define LIMITBIGPIPES 32 174 static int maxbigpipes = LIMITBIGPIPES; 175 static int nbigpipe = 0; 176 177 /* 178 * Amount of KVA consumed by pipe buffers. 179 */ 180 static int amountpipekva = 0; 181 182 static void pipeclose __P((struct pipe *cpipe)); 183 static void pipe_free_kmem __P((struct pipe *cpipe)); 184 static int pipe_create __P((struct pipe **cpipep, int allockva)); 185 static __inline int pipelock __P((struct pipe *cpipe, int catch)); 186 static __inline void pipeunlock __P((struct pipe *cpipe)); 187 static __inline void pipeselwakeup __P((struct pipe *selp, 188 struct pipe *sigp)); 189 static int pipespace __P((struct pipe *cpipe, int size)); 190 191 #ifdef __FreeBSD__ 192 #ifndef PIPE_NODIRECT 193 static int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); 194 static void pipe_destroy_write_buffer __P((struct pipe *wpipe)); 195 static int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 196 static void pipe_clone_write_buffer __P((struct pipe *wpipe)); 197 #endif 198 199 static vm_zone_t pipe_zone; 200 #endif /* FreeBSD */ 201 202 #ifdef __NetBSD__ 203 #ifndef PIPE_NODIRECT 204 static __inline int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 205 static __inline int pipe_loan_alloc __P((struct pipe *wpipe, int npages, 206 vsize_t blen)); 207 static void pipe_loan_free __P((struct pipe *wpipe)); 208 #endif /* PIPE_NODIRECT */ 209 210 static struct pool pipe_pool; 211 #endif /* NetBSD */ 212 213 /* 214 * The pipe system call for the DTYPE_PIPE type of pipes 215 */ 216 217 /* ARGSUSED */ 218 #ifdef __FreeBSD__ 219 int 220 pipe(p, uap) 221 struct proc *p; 222 struct pipe_args /* { 223 int dummy; 224 } */ *uap; 225 #elif defined(__NetBSD__) 226 int 227 sys_pipe(p, v, retval) 228 struct proc *p; 229 void *v; 230 register_t *retval; 231 #endif 232 { 233 struct file *rf, *wf; 234 struct pipe *rpipe, *wpipe; 235 int fd, error; 236 237 #ifdef __FreeBSD__ 238 if (pipe_zone == NULL) 239 pipe_zone = zinit("PIPE", sizeof(struct pipe), 0, 0, 4); 240 241 rpipe = wpipe = NULL; 242 if (pipe_create(&rpipe, 1) || pipe_create(&wpipe, 1)) { 243 pipeclose(rpipe); 244 pipeclose(wpipe); 245 return (ENFILE); 246 } 247 248 error = falloc(p, &rf, &fd); 249 if (error) { 250 pipeclose(rpipe); 251 pipeclose(wpipe); 252 return (error); 253 } 254 fhold(rf); 255 p->p_retval[0] = fd; 256 257 /* 258 * Warning: once we've gotten past allocation of the fd for the 259 * read-side, we can only drop the read side via fdrop() in order 260 * to avoid races against processes which manage to dup() the read 261 * side while we are blocked trying to allocate the write side. 262 */ 263 rf->f_flag = FREAD | FWRITE; 264 rf->f_type = DTYPE_PIPE; 265 rf->f_data = (caddr_t)rpipe; 266 rf->f_ops = &pipeops; 267 error = falloc(p, &wf, &fd); 268 if (error) { 269 struct filedesc *fdp = p->p_fd; 270 271 if (fdp->fd_ofiles[p->p_retval[0]] == rf) { 272 fdp->fd_ofiles[p->p_retval[0]] = NULL; 273 fdrop(rf, p); 274 } 275 fdrop(rf, p); 276 /* rpipe has been closed by fdrop(). */ 277 pipeclose(wpipe); 278 return (error); 279 } 280 wf->f_flag = FREAD | FWRITE; 281 wf->f_type = DTYPE_PIPE; 282 wf->f_data = (caddr_t)wpipe; 283 wf->f_ops = &pipeops; 284 p->p_retval[1] = fd; 285 286 rpipe->pipe_peer = wpipe; 287 wpipe->pipe_peer = rpipe; 288 fdrop(rf, p); 289 #endif /* FreeBSD */ 290 291 #ifdef __NetBSD__ 292 rpipe = wpipe = NULL; 293 if (pipe_create(&rpipe, 1) || pipe_create(&wpipe, 0)) { 294 pipeclose(rpipe); 295 pipeclose(wpipe); 296 return (ENFILE); 297 } 298 299 /* 300 * Note: the file structure returned from falloc() is marked 301 * as 'larval' initially. Unless we mark it as 'mature' by 302 * FILE_SET_MATURE(), any attempt to do anything with it would 303 * return EBADF, including e.g. dup(2) or close(2). This avoids 304 * file descriptor races if we block in the second falloc(). 305 */ 306 307 error = falloc(p, &rf, &fd); 308 if (error) 309 goto free2; 310 retval[0] = fd; 311 rf->f_flag = FREAD; 312 rf->f_type = DTYPE_PIPE; 313 rf->f_data = (caddr_t)rpipe; 314 rf->f_ops = &pipeops; 315 316 error = falloc(p, &wf, &fd); 317 if (error) 318 goto free3; 319 retval[1] = fd; 320 wf->f_flag = FWRITE; 321 wf->f_type = DTYPE_PIPE; 322 wf->f_data = (caddr_t)wpipe; 323 wf->f_ops = &pipeops; 324 325 rpipe->pipe_peer = wpipe; 326 wpipe->pipe_peer = rpipe; 327 328 FILE_SET_MATURE(rf); 329 FILE_SET_MATURE(wf); 330 FILE_UNUSE(rf, p); 331 FILE_UNUSE(wf, p); 332 return (0); 333 free3: 334 FILE_UNUSE(rf, p); 335 ffree(rf); 336 fdremove(p->p_fd, retval[0]); 337 free2: 338 pipeclose(wpipe); 339 pipeclose(rpipe); 340 #endif /* NetBSD */ 341 342 return (error); 343 } 344 345 /* 346 * Allocate kva for pipe circular buffer, the space is pageable 347 * This routine will 'realloc' the size of a pipe safely, if it fails 348 * it will retain the old buffer. 349 * If it fails it will return ENOMEM. 350 */ 351 static int 352 pipespace(cpipe, size) 353 struct pipe *cpipe; 354 int size; 355 { 356 caddr_t buffer; 357 #ifdef __FreeBSD__ 358 struct vm_object *object; 359 int npages, error; 360 361 npages = round_page(size)/PAGE_SIZE; 362 /* 363 * Create an object, I don't like the idea of paging to/from 364 * kernel_object. 365 */ 366 mtx_lock(&vm_mtx); 367 object = vm_object_allocate(OBJT_DEFAULT, npages); 368 buffer = (caddr_t) vm_map_min(kernel_map); 369 370 /* 371 * Insert the object into the kernel map, and allocate kva for it. 372 * The map entry is, by default, pageable. 373 */ 374 error = vm_map_find(kernel_map, object, 0, 375 (vm_offset_t *) &buffer, size, 1, 376 VM_PROT_ALL, VM_PROT_ALL, 0); 377 378 if (error != KERN_SUCCESS) { 379 vm_object_deallocate(object); 380 mtx_unlock(&vm_mtx); 381 return (ENOMEM); 382 } 383 #endif /* FreeBSD */ 384 385 #ifdef __NetBSD__ 386 /* 387 * Allocate pageable virtual address space. Physical memory is allocated 388 * on demand. 389 */ 390 buffer = (caddr_t) uvm_km_valloc(kernel_map, round_page(size)); 391 if (buffer == NULL) 392 return (ENOMEM); 393 #endif /* NetBSD */ 394 395 /* free old resources if we're resizing */ 396 pipe_free_kmem(cpipe); 397 #ifdef __FreeBSD__ 398 mtx_unlock(&vm_mtx); 399 cpipe->pipe_buffer.object = object; 400 #endif 401 cpipe->pipe_buffer.buffer = buffer; 402 cpipe->pipe_buffer.size = size; 403 cpipe->pipe_buffer.in = 0; 404 cpipe->pipe_buffer.out = 0; 405 cpipe->pipe_buffer.cnt = 0; 406 amountpipekva += cpipe->pipe_buffer.size; 407 return (0); 408 } 409 410 /* 411 * initialize and allocate VM and memory for pipe 412 */ 413 static int 414 pipe_create(cpipep, allockva) 415 struct pipe **cpipep; 416 int allockva; 417 { 418 struct pipe *cpipe; 419 int error; 420 421 #ifdef __FreeBSD__ 422 *cpipep = zalloc(pipe_zone); 423 #endif 424 #ifdef __NetBSD__ 425 *cpipep = pool_get(&pipe_pool, M_WAITOK); 426 #endif 427 if (*cpipep == NULL) 428 return (ENOMEM); 429 430 cpipe = *cpipep; 431 432 /* Initialize */ 433 memset(cpipe, 0, sizeof(*cpipe)); 434 cpipe->pipe_state = PIPE_SIGNALR; 435 436 if (allockva && (error = pipespace(cpipe, PIPE_SIZE))) 437 return (error); 438 439 vfs_timestamp(&cpipe->pipe_ctime); 440 cpipe->pipe_atime = cpipe->pipe_ctime; 441 cpipe->pipe_mtime = cpipe->pipe_ctime; 442 #ifdef __NetBSD__ 443 cpipe->pipe_pgid = NO_PID; 444 lockinit(&cpipe->pipe_lock, PRIBIO | PCATCH, "pipelk", 0, 0); 445 #endif 446 447 return (0); 448 } 449 450 451 /* 452 * lock a pipe for I/O, blocking other access 453 */ 454 static __inline int 455 pipelock(cpipe, catch) 456 struct pipe *cpipe; 457 int catch; 458 { 459 int error; 460 461 #ifdef __FreeBSD__ 462 while (cpipe->pipe_state & PIPE_LOCK) { 463 cpipe->pipe_state |= PIPE_LWANT; 464 error = tsleep(cpipe, catch ? (PRIBIO | PCATCH) : PRIBIO, 465 "pipelk", 0); 466 if (error != 0) 467 return (error); 468 } 469 cpipe->pipe_state |= PIPE_LOCK; 470 return (0); 471 #endif 472 473 #ifdef __NetBSD__ 474 do { 475 error = lockmgr(&cpipe->pipe_lock, LK_EXCLUSIVE, NULL); 476 } while (!catch && (error == EINTR || error == ERESTART)); 477 return (error); 478 #endif 479 } 480 481 /* 482 * unlock a pipe I/O lock 483 */ 484 static __inline void 485 pipeunlock(cpipe) 486 struct pipe *cpipe; 487 { 488 #ifdef __FreeBSD__ 489 cpipe->pipe_state &= ~PIPE_LOCK; 490 if (cpipe->pipe_state & PIPE_LWANT) { 491 cpipe->pipe_state &= ~PIPE_LWANT; 492 wakeup(cpipe); 493 } 494 #endif 495 496 #ifdef __NetBSD__ 497 lockmgr(&cpipe->pipe_lock, LK_RELEASE, NULL); 498 #endif 499 } 500 501 /* 502 * Select/poll wakup. This also sends SIGIO to peer connected to 503 * 'sigpipe' side of pipe. 504 */ 505 static __inline void 506 pipeselwakeup(selp, sigp) 507 struct pipe *selp, *sigp; 508 { 509 if (selp->pipe_state & PIPE_SEL) { 510 selp->pipe_state &= ~PIPE_SEL; 511 selwakeup(&selp->pipe_sel); 512 } 513 #ifdef __FreeBSD__ 514 if (sigp && (sigp->pipe_state & PIPE_ASYNC) && sigp->pipe_sigio) 515 pgsigio(sigp->pipe_sigio, SIGIO, 0); 516 KNOTE(&selp->pipe_sel.si_note, 0); 517 #endif 518 519 #ifdef __NetBSD__ 520 if (sigp && (sigp->pipe_state & PIPE_ASYNC) 521 && sigp->pipe_pgid != NO_PID){ 522 struct proc *p; 523 524 if (sigp->pipe_pgid < 0) 525 gsignal(-sigp->pipe_pgid, SIGIO); 526 else if (sigp->pipe_pgid > 0 && (p = pfind(sigp->pipe_pgid)) != 0) 527 psignal(p, SIGIO); 528 } 529 #endif /* NetBSD */ 530 } 531 532 /* ARGSUSED */ 533 #ifdef __FreeBSD__ 534 static int 535 pipe_read(fp, uio, cred, flags, p) 536 struct file *fp; 537 struct uio *uio; 538 struct ucred *cred; 539 int flags; 540 struct proc *p; 541 #elif defined(__NetBSD__) 542 static int 543 pipe_read(fp, offset, uio, cred, flags) 544 struct file *fp; 545 off_t *offset; 546 struct uio *uio; 547 struct ucred *cred; 548 int flags; 549 #endif 550 { 551 struct pipe *rpipe = (struct pipe *) fp->f_data; 552 int error; 553 size_t nread = 0; 554 size_t size; 555 size_t ocnt; 556 557 ++rpipe->pipe_busy; 558 error = pipelock(rpipe, 1); 559 if (error) 560 goto unlocked_error; 561 562 ocnt = rpipe->pipe_buffer.cnt; 563 564 while (uio->uio_resid) { 565 /* 566 * normal pipe buffer receive 567 */ 568 if (rpipe->pipe_buffer.cnt > 0) { 569 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 570 if (size > rpipe->pipe_buffer.cnt) 571 size = rpipe->pipe_buffer.cnt; 572 if (size > uio->uio_resid) 573 size = uio->uio_resid; 574 575 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 576 size, uio); 577 if (error) 578 break; 579 580 rpipe->pipe_buffer.out += size; 581 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 582 rpipe->pipe_buffer.out = 0; 583 584 rpipe->pipe_buffer.cnt -= size; 585 586 /* 587 * If there is no more to read in the pipe, reset 588 * its pointers to the beginning. This improves 589 * cache hit stats. 590 */ 591 if (rpipe->pipe_buffer.cnt == 0) { 592 rpipe->pipe_buffer.in = 0; 593 rpipe->pipe_buffer.out = 0; 594 } 595 nread += size; 596 #ifndef PIPE_NODIRECT 597 /* 598 * Direct copy, bypassing a kernel buffer. 599 */ 600 } else if ((size = rpipe->pipe_map.cnt) && 601 (rpipe->pipe_state & PIPE_DIRECTW)) { 602 caddr_t va; 603 if (size > uio->uio_resid) 604 size = uio->uio_resid; 605 606 va = (caddr_t) rpipe->pipe_map.kva + 607 rpipe->pipe_map.pos; 608 error = uiomove(va, size, uio); 609 if (error) 610 break; 611 nread += size; 612 rpipe->pipe_map.pos += size; 613 rpipe->pipe_map.cnt -= size; 614 if (rpipe->pipe_map.cnt == 0) { 615 rpipe->pipe_state &= ~PIPE_DIRECTW; 616 wakeup(rpipe); 617 } 618 #endif 619 } else { 620 /* 621 * detect EOF condition 622 * read returns 0 on EOF, no need to set error 623 */ 624 if (rpipe->pipe_state & PIPE_EOF) 625 break; 626 627 /* 628 * If the "write-side" has been blocked, wake it up now. 629 */ 630 if (rpipe->pipe_state & PIPE_WANTW) { 631 rpipe->pipe_state &= ~PIPE_WANTW; 632 wakeup(rpipe); 633 } 634 635 /* 636 * Break if some data was read. 637 */ 638 if (nread > 0) 639 break; 640 641 /* 642 * don't block on non-blocking I/O 643 */ 644 if (fp->f_flag & FNONBLOCK) { 645 error = EAGAIN; 646 break; 647 } 648 649 /* 650 * Unlock the pipe buffer for our remaining processing. 651 * We will either break out with an error or we will 652 * sleep and relock to loop. 653 */ 654 pipeunlock(rpipe); 655 656 /* 657 * We want to read more, wake up select/poll. 658 */ 659 pipeselwakeup(rpipe, rpipe->pipe_peer); 660 661 rpipe->pipe_state |= PIPE_WANTR; 662 error = tsleep(rpipe, PRIBIO | PCATCH, "piperd", 0); 663 if (error != 0 || (error = pipelock(rpipe, 1))) 664 goto unlocked_error; 665 } 666 } 667 pipeunlock(rpipe); 668 669 if (error == 0) 670 vfs_timestamp(&rpipe->pipe_atime); 671 unlocked_error: 672 --rpipe->pipe_busy; 673 674 /* 675 * PIPE_WANTCLOSE processing only makes sense if pipe_busy is 0. 676 */ 677 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANTCLOSE)) { 678 rpipe->pipe_state &= ~(PIPE_WANTCLOSE|PIPE_WANTW); 679 wakeup(rpipe); 680 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 681 /* 682 * Handle write blocking hysteresis. 683 */ 684 if (rpipe->pipe_state & PIPE_WANTW) { 685 rpipe->pipe_state &= ~PIPE_WANTW; 686 wakeup(rpipe); 687 } 688 } 689 690 /* 691 * If anything was read off the buffer, signal to the writer it's 692 * possible to write more data. Also send signal if we are here for the 693 * first time after last write. 694 */ 695 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF 696 && (ocnt != rpipe->pipe_buffer.cnt || (rpipe->pipe_state & PIPE_SIGNALR))) { 697 pipeselwakeup(rpipe, rpipe->pipe_peer); 698 rpipe->pipe_state &= ~PIPE_SIGNALR; 699 } 700 701 return (error); 702 } 703 704 #ifdef __FreeBSD__ 705 #ifndef PIPE_NODIRECT 706 /* 707 * Map the sending processes' buffer into kernel space and wire it. 708 * This is similar to a physical write operation. 709 */ 710 static int 711 pipe_build_write_buffer(wpipe, uio) 712 struct pipe *wpipe; 713 struct uio *uio; 714 { 715 size_t size; 716 int i; 717 vm_offset_t addr, endaddr, paddr; 718 719 size = uio->uio_iov->iov_len; 720 if (size > wpipe->pipe_buffer.size) 721 size = wpipe->pipe_buffer.size; 722 723 endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 724 mtx_lock(&vm_mtx); 725 addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 726 for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 727 vm_page_t m; 728 729 if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 730 (paddr = pmap_kextract(addr)) == 0) { 731 int j; 732 733 for (j = 0; j < i; j++) 734 vm_page_unwire(wpipe->pipe_map.ms[j], 1); 735 mtx_unlock(&vm_mtx); 736 return (EFAULT); 737 } 738 739 m = PHYS_TO_VM_PAGE(paddr); 740 vm_page_wire(m); 741 wpipe->pipe_map.ms[i] = m; 742 } 743 744 /* 745 * set up the control block 746 */ 747 wpipe->pipe_map.npages = i; 748 wpipe->pipe_map.pos = 749 ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 750 wpipe->pipe_map.cnt = size; 751 752 /* 753 * and map the buffer 754 */ 755 if (wpipe->pipe_map.kva == 0) { 756 /* 757 * We need to allocate space for an extra page because the 758 * address range might (will) span pages at times. 759 */ 760 wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 761 wpipe->pipe_buffer.size + PAGE_SIZE); 762 amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 763 } 764 pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 765 wpipe->pipe_map.npages); 766 767 mtx_unlock(&vm_mtx); 768 /* 769 * and update the uio data 770 */ 771 772 uio->uio_iov->iov_len -= size; 773 uio->uio_iov->iov_base += size; 774 if (uio->uio_iov->iov_len == 0) 775 uio->uio_iov++; 776 uio->uio_resid -= size; 777 uio->uio_offset += size; 778 return (0); 779 } 780 781 /* 782 * unmap and unwire the process buffer 783 */ 784 static void 785 pipe_destroy_write_buffer(wpipe) 786 struct pipe *wpipe; 787 { 788 int i; 789 790 mtx_lock(&vm_mtx); 791 if (wpipe->pipe_map.kva) { 792 pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 793 794 if (amountpipekva > maxpipekva) { 795 vm_offset_t kva = wpipe->pipe_map.kva; 796 wpipe->pipe_map.kva = 0; 797 kmem_free(kernel_map, kva, 798 wpipe->pipe_buffer.size + PAGE_SIZE); 799 amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 800 } 801 } 802 for (i = 0; i < wpipe->pipe_map.npages; i++) 803 vm_page_unwire(wpipe->pipe_map.ms[i], 1); 804 mtx_unlock(&vm_mtx); 805 } 806 807 /* 808 * In the case of a signal, the writing process might go away. This 809 * code copies the data into the circular buffer so that the source 810 * pages can be freed without loss of data. 811 */ 812 static void 813 pipe_clone_write_buffer(wpipe) 814 struct pipe *wpipe; 815 { 816 int size; 817 int pos; 818 819 size = wpipe->pipe_map.cnt; 820 pos = wpipe->pipe_map.pos; 821 memcpy((caddr_t) wpipe->pipe_buffer.buffer, 822 (caddr_t) wpipe->pipe_map.kva + pos, size); 823 824 wpipe->pipe_buffer.in = size; 825 wpipe->pipe_buffer.out = 0; 826 wpipe->pipe_buffer.cnt = size; 827 wpipe->pipe_state &= ~PIPE_DIRECTW; 828 829 pipe_destroy_write_buffer(wpipe); 830 } 831 832 /* 833 * This implements the pipe buffer write mechanism. Note that only 834 * a direct write OR a normal pipe write can be pending at any given time. 835 * If there are any characters in the pipe buffer, the direct write will 836 * be deferred until the receiving process grabs all of the bytes from 837 * the pipe buffer. Then the direct mapping write is set-up. 838 */ 839 static int 840 pipe_direct_write(wpipe, uio) 841 struct pipe *wpipe; 842 struct uio *uio; 843 { 844 int error; 845 846 retry: 847 while (wpipe->pipe_state & PIPE_DIRECTW) { 848 if (wpipe->pipe_state & PIPE_WANTR) { 849 wpipe->pipe_state &= ~PIPE_WANTR; 850 wakeup(wpipe); 851 } 852 wpipe->pipe_state |= PIPE_WANTW; 853 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdww", 0); 854 if (error) 855 goto error1; 856 if (wpipe->pipe_state & PIPE_EOF) { 857 error = EPIPE; 858 goto error1; 859 } 860 } 861 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 862 if (wpipe->pipe_buffer.cnt > 0) { 863 if (wpipe->pipe_state & PIPE_WANTR) { 864 wpipe->pipe_state &= ~PIPE_WANTR; 865 wakeup(wpipe); 866 } 867 868 wpipe->pipe_state |= PIPE_WANTW; 869 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwc", 0); 870 if (error) 871 goto error1; 872 if (wpipe->pipe_state & PIPE_EOF) { 873 error = EPIPE; 874 goto error1; 875 } 876 goto retry; 877 } 878 879 wpipe->pipe_state |= PIPE_DIRECTW; 880 881 error = pipe_build_write_buffer(wpipe, uio); 882 if (error) { 883 wpipe->pipe_state &= ~PIPE_DIRECTW; 884 goto error1; 885 } 886 887 error = 0; 888 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 889 if (wpipe->pipe_state & PIPE_EOF) { 890 pipelock(wpipe, 0); 891 pipe_destroy_write_buffer(wpipe); 892 pipeunlock(wpipe); 893 pipeselwakeup(wpipe, wpipe); 894 error = EPIPE; 895 goto error1; 896 } 897 if (wpipe->pipe_state & PIPE_WANTR) { 898 wpipe->pipe_state &= ~PIPE_WANTR; 899 wakeup(wpipe); 900 } 901 pipeselwakeup(wpipe, wpipe); 902 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwt", 0); 903 } 904 905 pipelock(wpipe,0); 906 if (wpipe->pipe_state & PIPE_DIRECTW) { 907 /* 908 * this bit of trickery substitutes a kernel buffer for 909 * the process that might be going away. 910 */ 911 pipe_clone_write_buffer(wpipe); 912 } else { 913 pipe_destroy_write_buffer(wpipe); 914 } 915 pipeunlock(wpipe); 916 return (error); 917 918 error1: 919 wakeup(wpipe); 920 return (error); 921 } 922 #endif /* !PIPE_NODIRECT */ 923 #endif /* FreeBSD */ 924 925 #ifdef __NetBSD__ 926 #ifndef PIPE_NODIRECT 927 /* 928 * Allocate structure for loan transfer. 929 */ 930 static __inline int 931 pipe_loan_alloc(wpipe, npages, blen) 932 struct pipe *wpipe; 933 int npages; 934 vsize_t blen; 935 { 936 wpipe->pipe_map.kva = uvm_km_valloc_wait(kernel_map, blen); 937 if (wpipe->pipe_map.kva == NULL) 938 return (ENOMEM); 939 940 amountpipekva += blen; 941 wpipe->pipe_map.npages = npages; 942 wpipe->pipe_map.ms = (struct vm_page **) malloc( 943 npages * sizeof(struct vm_page *), M_PIPE, M_WAITOK); 944 945 return (0); 946 } 947 948 /* 949 * Free resources allocated for loan transfer. 950 */ 951 static void 952 pipe_loan_free(wpipe) 953 struct pipe *wpipe; 954 { 955 uvm_km_free(kernel_map, wpipe->pipe_map.kva, 956 wpipe->pipe_map.npages * PAGE_SIZE); 957 wpipe->pipe_map.kva = NULL; 958 amountpipekva -= wpipe->pipe_map.npages * PAGE_SIZE; 959 free(wpipe->pipe_map.ms, M_PIPE); 960 wpipe->pipe_map.ms = NULL; 961 } 962 963 /* 964 * NetBSD direct write, using uvm_loan() mechanism. 965 * This implements the pipe buffer write mechanism. Note that only 966 * a direct write OR a normal pipe write can be pending at any given time. 967 * If there are any characters in the pipe buffer, the direct write will 968 * be deferred until the receiving process grabs all of the bytes from 969 * the pipe buffer. Then the direct mapping write is set-up. 970 */ 971 static __inline int 972 pipe_direct_write(wpipe, uio) 973 struct pipe *wpipe; 974 struct uio *uio; 975 { 976 int error, npages, j; 977 struct vm_page **res = NULL; 978 vaddr_t bbase, kva, base, bend; 979 vsize_t blen, bcnt; 980 voff_t bpos; 981 982 retry: 983 while (wpipe->pipe_state & PIPE_DIRECTW) { 984 if (wpipe->pipe_state & PIPE_WANTR) { 985 wpipe->pipe_state &= ~PIPE_WANTR; 986 wakeup(wpipe); 987 } 988 wpipe->pipe_state |= PIPE_WANTW; 989 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdww", 0); 990 if (error) 991 goto error; 992 if (wpipe->pipe_state & PIPE_EOF) { 993 error = EPIPE; 994 goto error; 995 } 996 } 997 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 998 if (wpipe->pipe_buffer.cnt > 0) { 999 if (wpipe->pipe_state & PIPE_WANTR) { 1000 wpipe->pipe_state &= ~PIPE_WANTR; 1001 wakeup(wpipe); 1002 } 1003 1004 wpipe->pipe_state |= PIPE_WANTW; 1005 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwc", 0); 1006 if (error) 1007 goto error; 1008 if (wpipe->pipe_state & PIPE_EOF) { 1009 error = EPIPE; 1010 goto error; 1011 } 1012 goto retry; 1013 } 1014 1015 /* 1016 * Handle first PIPE_CHUNK_SIZE bytes of buffer. Deal with buffers 1017 * not aligned to PAGE_SIZE. 1018 */ 1019 bbase = (vaddr_t)uio->uio_iov->iov_base; 1020 base = trunc_page(bbase); 1021 bend = round_page(bbase + uio->uio_iov->iov_len); 1022 blen = bend - base; 1023 bpos = bbase - base; 1024 1025 if (blen > PIPE_DIRECT_CHUNK) { 1026 blen = PIPE_DIRECT_CHUNK; 1027 bend = base + blen; 1028 bcnt = PIPE_DIRECT_CHUNK - bpos; 1029 } else 1030 bcnt = uio->uio_iov->iov_len; 1031 1032 npages = blen / PAGE_SIZE; 1033 1034 wpipe->pipe_map.pos = bpos; 1035 wpipe->pipe_map.cnt = bcnt; 1036 1037 /* 1038 * Free the old kva if we need more pages than we have 1039 * allocated. 1040 */ 1041 if (wpipe->pipe_map.kva && npages > wpipe->pipe_map.npages) 1042 pipe_loan_free(wpipe); 1043 1044 /* Allocate new kva. */ 1045 if (!wpipe->pipe_map.kva 1046 && (error = pipe_loan_alloc(wpipe, npages, blen))) 1047 goto error; 1048 1049 /* Loan the write buffer memory from writer process */ 1050 error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, base, blen, 1051 (void **) wpipe->pipe_map.ms, UVM_LOAN_TOPAGE); 1052 if (error) 1053 goto cleanup; 1054 res = wpipe->pipe_map.ms; 1055 1056 /* Enter the loaned pages to kva */ 1057 kva = wpipe->pipe_map.kva; 1058 for(j=0; j < npages; j++, kva += PAGE_SIZE) 1059 pmap_enter(pmap_kernel(), kva, res[j]->phys_addr, 1060 VM_PROT_READ, 0); 1061 pmap_update(pmap_kernel()); 1062 1063 wpipe->pipe_state |= PIPE_DIRECTW; 1064 error = 0; 1065 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 1066 if (wpipe->pipe_state & PIPE_EOF) { 1067 error = EPIPE; 1068 break; 1069 } 1070 if (wpipe->pipe_state & PIPE_WANTR) { 1071 wpipe->pipe_state &= ~PIPE_WANTR; 1072 wakeup(wpipe); 1073 } 1074 pipeselwakeup(wpipe, wpipe); 1075 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwt", 0); 1076 } 1077 1078 if (error) 1079 wpipe->pipe_state &= ~PIPE_DIRECTW; 1080 1081 cleanup: 1082 pipelock(wpipe, 0); 1083 if (res) 1084 uvm_unloan((void **) res, npages, UVM_LOAN_TOPAGE); 1085 if (error || amountpipekva > maxpipekva) 1086 pipe_loan_free(wpipe); 1087 pipeunlock(wpipe); 1088 1089 if (error) { 1090 pipeselwakeup(wpipe, wpipe); 1091 1092 /* 1093 * If nothing was read from what we offered, return error 1094 * streight on. Otherwise update uio resid first. Caller 1095 * will deal with the error condition, returning short 1096 * write, error, or restarting the write(2) as appropriate. 1097 */ 1098 if (wpipe->pipe_map.cnt == bcnt) { 1099 error: 1100 wakeup(wpipe); 1101 return (error); 1102 } 1103 1104 bcnt -= wpipe->pipe_map.cnt; 1105 } 1106 1107 uio->uio_resid -= bcnt; 1108 /* uio_offset not updated, not set/used for write(2) */ 1109 (char *) uio->uio_iov->iov_base += bcnt; 1110 uio->uio_iov->iov_len -= bcnt; 1111 if (uio->uio_iov->iov_len == 0) { 1112 uio->uio_iov++; 1113 uio->uio_iovcnt--; 1114 } 1115 1116 return (error); 1117 } 1118 #endif /* !PIPE_NODIRECT */ 1119 #endif /* NetBSD */ 1120 1121 #ifdef __FreeBSD__ 1122 static int 1123 pipe_write(fp, uio, cred, flags, p) 1124 struct file *fp; 1125 off_t *offset; 1126 struct uio *uio; 1127 struct ucred *cred; 1128 int flags; 1129 struct proc *p; 1130 #elif defined(__NetBSD__) 1131 static int 1132 pipe_write(fp, offset, uio, cred, flags) 1133 struct file *fp; 1134 off_t *offset; 1135 struct uio *uio; 1136 struct ucred *cred; 1137 int flags; 1138 #endif 1139 { 1140 int error = 0; 1141 struct pipe *wpipe, *rpipe; 1142 1143 rpipe = (struct pipe *) fp->f_data; 1144 wpipe = rpipe->pipe_peer; 1145 1146 /* 1147 * detect loss of pipe read side, issue SIGPIPE if lost. 1148 */ 1149 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) 1150 return (EPIPE); 1151 1152 ++wpipe->pipe_busy; 1153 1154 /* 1155 * If it is advantageous to resize the pipe buffer, do 1156 * so. 1157 */ 1158 if ((uio->uio_resid > PIPE_SIZE) && 1159 (nbigpipe < maxbigpipes) && 1160 #ifndef PIPE_NODIRECT 1161 (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 1162 #endif 1163 (wpipe->pipe_buffer.size <= PIPE_SIZE) && 1164 (wpipe->pipe_buffer.cnt == 0)) { 1165 1166 if ((error = pipelock(wpipe,1)) == 0) { 1167 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 1168 nbigpipe++; 1169 pipeunlock(wpipe); 1170 } else { 1171 /* 1172 * If an error occurred, unbusy and return, waking up 1173 * any waiting readers. 1174 */ 1175 --wpipe->pipe_busy; 1176 if (wpipe->pipe_busy == 0 1177 && (wpipe->pipe_state & PIPE_WANTCLOSE)) { 1178 wpipe->pipe_state &= 1179 ~(PIPE_WANTCLOSE | PIPE_WANTR); 1180 wakeup(wpipe); 1181 } 1182 1183 return (error); 1184 } 1185 } 1186 1187 #ifdef __FreeBSD__ 1188 KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); 1189 #endif 1190 1191 while (uio->uio_resid) { 1192 int space; 1193 1194 #ifndef PIPE_NODIRECT 1195 /* 1196 * If the transfer is large, we can gain performance if 1197 * we do process-to-process copies directly. 1198 * If the write is non-blocking, we don't use the 1199 * direct write mechanism. 1200 * 1201 * The direct write mechanism will detect the reader going 1202 * away on us. 1203 */ 1204 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 1205 (fp->f_flag & FNONBLOCK) == 0 && 1206 (wpipe->pipe_map.kva || (amountpipekva < limitpipekva))) { 1207 error = pipe_direct_write(wpipe, uio); 1208 1209 /* 1210 * Break out if error occured, unless it's ENOMEM. 1211 * ENOMEM means we failed to allocate some resources 1212 * for direct write, so we just fallback to ordinary 1213 * write. If the direct write was successful, 1214 * process rest of data via ordinary write. 1215 */ 1216 if (!error) 1217 continue; 1218 1219 if (error != ENOMEM) 1220 break; 1221 } 1222 #endif /* PIPE_NODIRECT */ 1223 1224 /* 1225 * Pipe buffered writes cannot be coincidental with 1226 * direct writes. We wait until the currently executing 1227 * direct write is completed before we start filling the 1228 * pipe buffer. We break out if a signal occurs or the 1229 * reader goes away. 1230 */ 1231 retrywrite: 1232 while (wpipe->pipe_state & PIPE_DIRECTW) { 1233 if (wpipe->pipe_state & PIPE_WANTR) { 1234 wpipe->pipe_state &= ~PIPE_WANTR; 1235 wakeup(wpipe); 1236 } 1237 error = tsleep(wpipe, PRIBIO | PCATCH, "pipbww", 0); 1238 if (wpipe->pipe_state & PIPE_EOF) 1239 break; 1240 if (error) 1241 break; 1242 } 1243 if (wpipe->pipe_state & PIPE_EOF) { 1244 error = EPIPE; 1245 break; 1246 } 1247 1248 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1249 1250 /* Writes of size <= PIPE_BUF must be atomic. */ 1251 if ((space < uio->uio_resid) && (uio->uio_resid <= PIPE_BUF)) 1252 space = 0; 1253 1254 if (space > 0) { 1255 int size; /* Transfer size */ 1256 int segsize; /* first segment to transfer */ 1257 1258 if ((error = pipelock(wpipe,1)) != 0) 1259 break; 1260 1261 /* 1262 * It is possible for a direct write to 1263 * slip in on us... handle it here... 1264 */ 1265 if (wpipe->pipe_state & PIPE_DIRECTW) { 1266 pipeunlock(wpipe); 1267 goto retrywrite; 1268 } 1269 /* 1270 * If a process blocked in uiomove, our 1271 * value for space might be bad. 1272 * 1273 * XXX will we be ok if the reader has gone 1274 * away here? 1275 */ 1276 if (space > wpipe->pipe_buffer.size - 1277 wpipe->pipe_buffer.cnt) { 1278 pipeunlock(wpipe); 1279 goto retrywrite; 1280 } 1281 1282 /* 1283 * Transfer size is minimum of uio transfer 1284 * and free space in pipe buffer. 1285 */ 1286 if (space > uio->uio_resid) 1287 size = uio->uio_resid; 1288 else 1289 size = space; 1290 /* 1291 * First segment to transfer is minimum of 1292 * transfer size and contiguous space in 1293 * pipe buffer. If first segment to transfer 1294 * is less than the transfer size, we've got 1295 * a wraparound in the buffer. 1296 */ 1297 segsize = wpipe->pipe_buffer.size - 1298 wpipe->pipe_buffer.in; 1299 if (segsize > size) 1300 segsize = size; 1301 1302 /* Transfer first segment */ 1303 1304 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1305 segsize, uio); 1306 1307 if (error == 0 && segsize < size) { 1308 /* 1309 * Transfer remaining part now, to 1310 * support atomic writes. Wraparound 1311 * happened. 1312 */ 1313 #ifdef DEBUG 1314 if (wpipe->pipe_buffer.in + segsize != 1315 wpipe->pipe_buffer.size) 1316 panic("Expected pipe buffer wraparound disappeared"); 1317 #endif 1318 1319 error = uiomove(&wpipe->pipe_buffer.buffer[0], 1320 size - segsize, uio); 1321 } 1322 if (error == 0) { 1323 wpipe->pipe_buffer.in += size; 1324 if (wpipe->pipe_buffer.in >= 1325 wpipe->pipe_buffer.size) { 1326 #ifdef DEBUG 1327 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 1328 panic("Expected wraparound bad"); 1329 #endif 1330 wpipe->pipe_buffer.in = size - segsize; 1331 } 1332 1333 wpipe->pipe_buffer.cnt += size; 1334 #ifdef DEBUG 1335 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 1336 panic("Pipe buffer overflow"); 1337 #endif 1338 1339 } 1340 pipeunlock(wpipe); 1341 if (error) 1342 break; 1343 1344 } else { 1345 /* 1346 * If the "read-side" has been blocked, wake it up now. 1347 */ 1348 if (wpipe->pipe_state & PIPE_WANTR) { 1349 wpipe->pipe_state &= ~PIPE_WANTR; 1350 wakeup(wpipe); 1351 } 1352 1353 /* 1354 * don't block on non-blocking I/O 1355 */ 1356 if (fp->f_flag & FNONBLOCK) { 1357 error = EAGAIN; 1358 break; 1359 } 1360 1361 /* 1362 * We have no more space and have something to offer, 1363 * wake up select/poll. 1364 */ 1365 pipeselwakeup(wpipe, wpipe); 1366 1367 wpipe->pipe_state |= PIPE_WANTW; 1368 error = tsleep(wpipe, PRIBIO | PCATCH, "pipewr", 0); 1369 if (error != 0) 1370 break; 1371 /* 1372 * If read side wants to go away, we just issue a signal 1373 * to ourselves. 1374 */ 1375 if (wpipe->pipe_state & PIPE_EOF) { 1376 error = EPIPE; 1377 break; 1378 } 1379 } 1380 } 1381 1382 --wpipe->pipe_busy; 1383 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANTCLOSE)) { 1384 wpipe->pipe_state &= ~(PIPE_WANTCLOSE | PIPE_WANTR); 1385 wakeup(wpipe); 1386 } else if (wpipe->pipe_buffer.cnt > 0) { 1387 /* 1388 * If we have put any characters in the buffer, we wake up 1389 * the reader. 1390 */ 1391 if (wpipe->pipe_state & PIPE_WANTR) { 1392 wpipe->pipe_state &= ~PIPE_WANTR; 1393 wakeup(wpipe); 1394 } 1395 } 1396 1397 /* 1398 * Don't return EPIPE if I/O was successful 1399 */ 1400 if ((error == EPIPE) && (wpipe->pipe_buffer.cnt == 0) 1401 && (uio->uio_resid == 0)) 1402 error = 0; 1403 1404 if (error == 0) 1405 vfs_timestamp(&wpipe->pipe_mtime); 1406 1407 /* 1408 * We have something to offer, wake up select/poll. 1409 * wpipe->pipe_map.cnt is always 0 in this point (direct write 1410 * is only done synchronously), so check only wpipe->pipe_buffer.cnt 1411 */ 1412 if (wpipe->pipe_buffer.cnt) 1413 pipeselwakeup(wpipe, wpipe); 1414 1415 /* 1416 * Arrange for next read(2) to do a signal. 1417 */ 1418 wpipe->pipe_state |= PIPE_SIGNALR; 1419 1420 return (error); 1421 } 1422 1423 /* 1424 * we implement a very minimal set of ioctls for compatibility with sockets. 1425 */ 1426 int 1427 pipe_ioctl(fp, cmd, data, p) 1428 struct file *fp; 1429 u_long cmd; 1430 caddr_t data; 1431 struct proc *p; 1432 { 1433 struct pipe *mpipe = (struct pipe *)fp->f_data; 1434 1435 switch (cmd) { 1436 1437 case FIONBIO: 1438 return (0); 1439 1440 case FIOASYNC: 1441 if (*(int *)data) { 1442 mpipe->pipe_state |= PIPE_ASYNC; 1443 } else { 1444 mpipe->pipe_state &= ~PIPE_ASYNC; 1445 } 1446 return (0); 1447 1448 case FIONREAD: 1449 #ifndef PIPE_NODIRECT 1450 if (mpipe->pipe_state & PIPE_DIRECTW) 1451 *(int *)data = mpipe->pipe_map.cnt; 1452 else 1453 #endif 1454 *(int *)data = mpipe->pipe_buffer.cnt; 1455 return (0); 1456 1457 #ifdef __FreeBSD__ 1458 case FIOSETOWN: 1459 return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 1460 1461 case FIOGETOWN: 1462 *(int *)data = fgetown(mpipe->pipe_sigio); 1463 return (0); 1464 1465 /* This is deprecated, FIOSETOWN should be used instead. */ 1466 case TIOCSPGRP: 1467 return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 1468 1469 /* This is deprecated, FIOGETOWN should be used instead. */ 1470 case TIOCGPGRP: 1471 *(int *)data = -fgetown(mpipe->pipe_sigio); 1472 return (0); 1473 #endif /* FreeBSD */ 1474 #ifdef __NetBSD__ 1475 case TIOCSPGRP: 1476 mpipe->pipe_pgid = *(int *)data; 1477 return (0); 1478 1479 case TIOCGPGRP: 1480 *(int *)data = mpipe->pipe_pgid; 1481 return (0); 1482 #endif /* NetBSD */ 1483 1484 } 1485 return (ENOTTY); 1486 } 1487 1488 int 1489 pipe_poll(fp, events, p) 1490 struct file *fp; 1491 int events; 1492 struct proc *p; 1493 { 1494 struct pipe *rpipe = (struct pipe *)fp->f_data; 1495 struct pipe *wpipe; 1496 int revents = 0; 1497 1498 wpipe = rpipe->pipe_peer; 1499 if (events & (POLLIN | POLLRDNORM)) 1500 if ((rpipe->pipe_buffer.cnt > 0) || 1501 #ifndef PIPE_NODIRECT 1502 (rpipe->pipe_state & PIPE_DIRECTW) || 1503 #endif 1504 (rpipe->pipe_state & PIPE_EOF)) 1505 revents |= events & (POLLIN | POLLRDNORM); 1506 1507 if (events & (POLLOUT | POLLWRNORM)) 1508 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) 1509 || ( 1510 #ifndef PIPE_NODIRECT 1511 ((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1512 #endif 1513 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1514 revents |= events & (POLLOUT | POLLWRNORM); 1515 1516 if ((rpipe->pipe_state & PIPE_EOF) || 1517 (wpipe == NULL) || 1518 (wpipe->pipe_state & PIPE_EOF)) 1519 revents |= POLLHUP; 1520 1521 if (revents == 0) { 1522 if (events & (POLLIN | POLLRDNORM)) { 1523 selrecord(p, &rpipe->pipe_sel); 1524 rpipe->pipe_state |= PIPE_SEL; 1525 } 1526 1527 if (events & (POLLOUT | POLLWRNORM)) { 1528 selrecord(p, &wpipe->pipe_sel); 1529 wpipe->pipe_state |= PIPE_SEL; 1530 } 1531 } 1532 1533 return (revents); 1534 } 1535 1536 static int 1537 pipe_stat(fp, ub, p) 1538 struct file *fp; 1539 struct stat *ub; 1540 struct proc *p; 1541 { 1542 struct pipe *pipe = (struct pipe *)fp->f_data; 1543 1544 memset((caddr_t)ub, 0, sizeof(*ub)); 1545 ub->st_mode = S_IFIFO; 1546 ub->st_blksize = pipe->pipe_buffer.size; 1547 ub->st_size = pipe->pipe_buffer.cnt; 1548 ub->st_blocks = (ub->st_size) ? 1 : 0; 1549 #ifdef __FreeBSD__ 1550 ub->st_atimespec = pipe->pipe_atime; 1551 ub->st_mtimespec = pipe->pipe_mtime; 1552 ub->st_ctimespec = pipe->pipe_ctime; 1553 #endif /* FreeBSD */ 1554 #ifdef __NetBSD__ 1555 TIMEVAL_TO_TIMESPEC(&pipe->pipe_atime, &ub->st_atimespec) 1556 TIMEVAL_TO_TIMESPEC(&pipe->pipe_mtime, &ub->st_mtimespec); 1557 TIMEVAL_TO_TIMESPEC(&pipe->pipe_ctime, &ub->st_ctimespec); 1558 #endif /* NetBSD */ 1559 ub->st_uid = fp->f_cred->cr_uid; 1560 ub->st_gid = fp->f_cred->cr_gid; 1561 /* 1562 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 1563 * XXX (st_dev, st_ino) should be unique. 1564 */ 1565 return (0); 1566 } 1567 1568 /* ARGSUSED */ 1569 static int 1570 pipe_close(fp, p) 1571 struct file *fp; 1572 struct proc *p; 1573 { 1574 struct pipe *cpipe = (struct pipe *)fp->f_data; 1575 1576 #ifdef __FreeBSD__ 1577 fp->f_ops = &badfileops; 1578 funsetown(cpipe->pipe_sigio); 1579 #endif 1580 fp->f_data = NULL; 1581 pipeclose(cpipe); 1582 return (0); 1583 } 1584 1585 static void 1586 pipe_free_kmem(cpipe) 1587 struct pipe *cpipe; 1588 { 1589 1590 #ifdef __FreeBSD__ 1591 mtx_assert(&vm_mtx, MA_OWNED); 1592 #endif 1593 if (cpipe->pipe_buffer.buffer != NULL) { 1594 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1595 --nbigpipe; 1596 amountpipekva -= cpipe->pipe_buffer.size; 1597 #ifdef __FreeBSD__ 1598 kmem_free(kernel_map, 1599 (vm_offset_t)cpipe->pipe_buffer.buffer, 1600 cpipe->pipe_buffer.size); 1601 #elif defined(__NetBSD__) 1602 uvm_km_free(kernel_map, 1603 (vaddr_t)cpipe->pipe_buffer.buffer, 1604 cpipe->pipe_buffer.size); 1605 #endif /* NetBSD */ 1606 1607 cpipe->pipe_buffer.buffer = NULL; 1608 } 1609 #ifndef PIPE_NODIRECT 1610 if (cpipe->pipe_map.kva != NULL) { 1611 #ifdef __FreeBSD__ 1612 amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 1613 kmem_free(kernel_map, 1614 cpipe->pipe_map.kva, 1615 cpipe->pipe_buffer.size + PAGE_SIZE); 1616 #elif defined(__NetBSD__) 1617 pipe_loan_free(cpipe); 1618 #endif /* NetBSD */ 1619 cpipe->pipe_map.cnt = 0; 1620 cpipe->pipe_map.kva = NULL; 1621 cpipe->pipe_map.pos = 0; 1622 cpipe->pipe_map.npages = 0; 1623 } 1624 #endif /* !PIPE_NODIRECT */ 1625 } 1626 1627 /* 1628 * shutdown the pipe 1629 */ 1630 static void 1631 pipeclose(cpipe) 1632 struct pipe *cpipe; 1633 { 1634 struct pipe *ppipe; 1635 1636 if (!cpipe) 1637 return; 1638 1639 pipeselwakeup(cpipe, cpipe); 1640 1641 /* 1642 * If the other side is blocked, wake it up saying that 1643 * we want to close it down. 1644 */ 1645 while (cpipe->pipe_busy) { 1646 wakeup(cpipe); 1647 cpipe->pipe_state |= PIPE_WANTCLOSE | PIPE_EOF; 1648 tsleep(cpipe, PRIBIO, "pipecl", 0); 1649 } 1650 1651 /* 1652 * Disconnect from peer 1653 */ 1654 if ((ppipe = cpipe->pipe_peer) != NULL) { 1655 pipeselwakeup(ppipe, ppipe); 1656 1657 ppipe->pipe_state |= PIPE_EOF; 1658 wakeup(ppipe); 1659 ppipe->pipe_peer = NULL; 1660 } 1661 1662 /* 1663 * free resources 1664 */ 1665 #ifdef _FreeBSD__ 1666 mtx_lock(&vm_mtx); 1667 pipe_free_kmem(cpipe); 1668 /* XXX: erm, doesn't zalloc already have its own locks and 1669 * not need the giant vm lock? 1670 */ 1671 zfree(pipe_zone, cpipe); 1672 mtx_unlock(&vm_mtx); 1673 #endif /* FreeBSD */ 1674 1675 #ifdef __NetBSD__ 1676 pipe_free_kmem(cpipe); 1677 (void) lockmgr(&cpipe->pipe_lock, LK_DRAIN, NULL); 1678 pool_put(&pipe_pool, cpipe); 1679 #endif 1680 } 1681 1682 #ifdef __FreeBSD__ 1683 /*ARGSUSED*/ 1684 static int 1685 pipe_kqfilter(struct file *fp, struct knote *kn) 1686 { 1687 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1688 1689 switch (kn->kn_filter) { 1690 case EVFILT_READ: 1691 kn->kn_fop = &pipe_rfiltops; 1692 break; 1693 case EVFILT_WRITE: 1694 kn->kn_fop = &pipe_wfiltops; 1695 cpipe = cpipe->pipe_peer; 1696 break; 1697 default: 1698 return (1); 1699 } 1700 kn->kn_hook = (caddr_t)cpipe; 1701 1702 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 1703 return (0); 1704 } 1705 1706 static void 1707 filt_pipedetach(struct knote *kn) 1708 { 1709 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1710 1711 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 1712 } 1713 1714 /*ARGSUSED*/ 1715 static int 1716 filt_piperead(struct knote *kn, long hint) 1717 { 1718 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1719 struct pipe *wpipe = rpipe->pipe_peer; 1720 1721 kn->kn_data = rpipe->pipe_buffer.cnt; 1722 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 1723 kn->kn_data = rpipe->pipe_map.cnt; 1724 1725 if ((rpipe->pipe_state & PIPE_EOF) || 1726 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1727 kn->kn_flags |= EV_EOF; 1728 return (1); 1729 } 1730 return (kn->kn_data > 0); 1731 } 1732 1733 /*ARGSUSED*/ 1734 static int 1735 filt_pipewrite(struct knote *kn, long hint) 1736 { 1737 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1738 struct pipe *wpipe = rpipe->pipe_peer; 1739 1740 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1741 kn->kn_data = 0; 1742 kn->kn_flags |= EV_EOF; 1743 return (1); 1744 } 1745 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1746 if (wpipe->pipe_state & PIPE_DIRECTW) 1747 kn->kn_data = 0; 1748 1749 return (kn->kn_data >= PIPE_BUF); 1750 } 1751 #endif /* FreeBSD */ 1752 1753 #ifdef __NetBSD__ 1754 static int 1755 pipe_fcntl(fp, cmd, data, p) 1756 struct file *fp; 1757 u_int cmd; 1758 caddr_t data; 1759 struct proc *p; 1760 { 1761 if (cmd == F_SETFL) 1762 return (0); 1763 else 1764 return (EOPNOTSUPP); 1765 } 1766 1767 /* 1768 * Handle pipe sysctls. 1769 */ 1770 int 1771 sysctl_dopipe(name, namelen, oldp, oldlenp, newp, newlen) 1772 int *name; 1773 u_int namelen; 1774 void *oldp; 1775 size_t *oldlenp; 1776 void *newp; 1777 size_t newlen; 1778 { 1779 /* All sysctl names at this level are terminal. */ 1780 if (namelen != 1) 1781 return (ENOTDIR); /* overloaded */ 1782 1783 switch (name[0]) { 1784 case KERN_PIPE_MAXKVASZ: 1785 return (sysctl_int(oldp, oldlenp, newp, newlen, &maxpipekva)); 1786 case KERN_PIPE_LIMITKVA: 1787 return (sysctl_int(oldp, oldlenp, newp, newlen, &limitpipekva)); 1788 case KERN_PIPE_MAXBIGPIPES: 1789 return (sysctl_int(oldp, oldlenp, newp, newlen, &maxbigpipes)); 1790 case KERN_PIPE_NBIGPIPES: 1791 return (sysctl_rdint(oldp, oldlenp, newp, nbigpipe)); 1792 case KERN_PIPE_KVASIZE: 1793 return (sysctl_rdint(oldp, oldlenp, newp, amountpipekva)); 1794 default: 1795 return (EOPNOTSUPP); 1796 } 1797 /* NOTREACHED */ 1798 } 1799 1800 /* 1801 * Initialize pipe structs. 1802 */ 1803 void 1804 pipe_init(void) 1805 { 1806 pool_init(&pipe_pool, sizeof(struct pipe), 0, 0, 0, "pipepl", 1807 0, NULL, NULL, M_PIPE); 1808 } 1809 1810 #endif /* __NetBSD __ */ 1811