1 /* $NetBSD: sys_pipe.c,v 1.12 2001/09/20 19:09:13 jdolecek Exp $ */ 2 3 /* 4 * Copyright (c) 1996 John S. Dyson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Absolutely no warranty of function or purpose is made by the author 17 * John S. Dyson. 18 * 4. Modifications may be freely made to this file if the above conditions 19 * are met. 20 * 21 * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.82 2001/06/15 20:45:01 jlemon Exp $ 22 */ 23 24 /* 25 * This file contains a high-performance replacement for the socket-based 26 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 27 * all features of sockets, but does do everything that pipes normally 28 * do. 29 * 30 * Adaption for NetBSD UVM, including uvm_loan() based direct write, was 31 * written by Jaromir Dolecek. 32 */ 33 34 /* 35 * This code has two modes of operation, a small write mode and a large 36 * write mode. The small write mode acts like conventional pipes with 37 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 38 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 39 * and PIPE_SIZE in size, it is fully mapped into the kernel (on FreeBSD, 40 * those pages are also wired), and the receiving process can copy it directly 41 * from the pages in the sending process. 42 * 43 * If the sending process receives a signal, it is possible that it will 44 * go away, and certainly its address space can change, because control 45 * is returned back to the user-mode side. In that case, the pipe code 46 * arranges to copy the buffer supplied by the user process on FreeBSD, to 47 * a pageable kernel buffer, and the receiving process will grab the data 48 * from the pageable kernel buffer. Since signals don't happen all that often, 49 * the copy operation is normally eliminated. 50 * For NetBSD, the pages are mapped read-only, COW for kernel by uvm_loan(), 51 * so no explicit handling need to be done, all is handled by standard VM 52 * facilities. 53 * 54 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 55 * happen for small transfers so that the system will not spend all of 56 * its time context switching. PIPE_SIZE is constrained by the 57 * amount of kernel virtual memory. 58 */ 59 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/proc.h> 63 #include <sys/fcntl.h> 64 #include <sys/file.h> 65 #include <sys/filedesc.h> 66 #include <sys/filio.h> 67 #include <sys/ttycom.h> 68 #include <sys/stat.h> 69 #include <sys/poll.h> 70 #include <sys/signalvar.h> 71 #include <sys/vnode.h> 72 #include <sys/uio.h> 73 #include <sys/lock.h> 74 #ifdef __FreeBSD__ 75 #include <sys/mutex.h> 76 #include <sys/selinfo.h> 77 #include <sys/sysproto.h> 78 #elif defined(__NetBSD__) 79 #include <sys/select.h> 80 #include <sys/malloc.h> 81 #include <sys/mount.h> 82 #include <sys/syscallargs.h> 83 #include <uvm/uvm.h> 84 #include <sys/sysctl.h> 85 #endif /* NetBSD, FreeBSD */ 86 87 #include <sys/pipe.h> 88 89 #ifdef __NetBSD__ 90 #define vfs_timestamp(tv) microtime(tv) 91 #endif 92 93 /* 94 * Use this define if you want to disable *fancy* VM things. Expect an 95 * approx 30% decrease in transfer rate. This could be useful for 96 * OpenBSD. 97 */ 98 /* #define PIPE_NODIRECT */ 99 100 /* 101 * interfaces to the outside world 102 */ 103 #ifdef __FreeBSD__ 104 static int pipe_read __P((struct file *fp, struct uio *uio, 105 struct ucred *cred, int flags, struct proc *p)); 106 static int pipe_write __P((struct file *fp, struct uio *uio, 107 struct ucred *cred, int flags, struct proc *p)); 108 static int pipe_close __P((struct file *fp, struct proc *p)); 109 static int pipe_poll __P((struct file *fp, int events, struct ucred *cred, 110 struct proc *p)); 111 static int pipe_kqfilter __P((struct file *fp, struct knote *kn)); 112 static int pipe_stat __P((struct file *fp, struct stat *sb, struct proc *p)); 113 static int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); 114 115 static struct fileops pipeops = { 116 pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter, 117 pipe_stat, pipe_close 118 }; 119 120 static void filt_pipedetach(struct knote *kn); 121 static int filt_piperead(struct knote *kn, long hint); 122 static int filt_pipewrite(struct knote *kn, long hint); 123 124 static struct filterops pipe_rfiltops = 125 { 1, NULL, filt_pipedetach, filt_piperead }; 126 static struct filterops pipe_wfiltops = 127 { 1, NULL, filt_pipedetach, filt_pipewrite }; 128 #endif /* FreeBSD */ 129 130 #ifdef __NetBSD__ 131 static int pipe_read __P((struct file *fp, off_t *offset, struct uio *uio, 132 struct ucred *cred, int flags)); 133 static int pipe_write __P((struct file *fp, off_t *offset, struct uio *uio, 134 struct ucred *cred, int flags)); 135 static int pipe_close __P((struct file *fp, struct proc *p)); 136 static int pipe_poll __P((struct file *fp, int events, struct proc *p)); 137 static int pipe_fcntl __P((struct file *fp, u_int com, caddr_t data, 138 struct proc *p)); 139 static int pipe_stat __P((struct file *fp, struct stat *sb, struct proc *p)); 140 static int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); 141 142 static struct fileops pipeops = 143 { pipe_read, pipe_write, pipe_ioctl, pipe_fcntl, pipe_poll, 144 pipe_stat, pipe_close }; 145 #endif /* NetBSD */ 146 147 /* 148 * Default pipe buffer size(s), this can be kind-of large now because pipe 149 * space is pageable. The pipe code will try to maintain locality of 150 * reference for performance reasons, so small amounts of outstanding I/O 151 * will not wipe the cache. 152 */ 153 #define MINPIPESIZE (PIPE_SIZE/3) 154 #define MAXPIPESIZE (2*PIPE_SIZE/3) 155 156 /* 157 * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 158 * is there so that on large systems, we don't exhaust it. 159 */ 160 #define MAXPIPEKVA (8*1024*1024) 161 static int maxpipekva = MAXPIPEKVA; 162 163 /* 164 * Limit for direct transfers, we cannot, of course limit 165 * the amount of kva for pipes in general though. 166 */ 167 #define LIMITPIPEKVA (16*1024*1024) 168 static int limitpipekva = LIMITPIPEKVA; 169 170 /* 171 * Limit the number of "big" pipes 172 */ 173 #define LIMITBIGPIPES 32 174 static int maxbigpipes = LIMITBIGPIPES; 175 static int nbigpipe = 0; 176 177 /* 178 * Amount of KVA consumed by pipe buffers. 179 */ 180 static int amountpipekva = 0; 181 182 static void pipeclose __P((struct pipe *cpipe)); 183 static void pipe_free_kmem __P((struct pipe *cpipe)); 184 static int pipe_create __P((struct pipe **cpipep, int allockva)); 185 static __inline int pipelock __P((struct pipe *cpipe, int catch)); 186 static __inline void pipeunlock __P((struct pipe *cpipe)); 187 static __inline void pipeselwakeup __P((struct pipe *selp, 188 struct pipe *sigp)); 189 static int pipespace __P((struct pipe *cpipe, int size)); 190 191 #ifdef __FreeBSD__ 192 #ifndef PIPE_NODIRECT 193 static int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); 194 static void pipe_destroy_write_buffer __P((struct pipe *wpipe)); 195 static int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 196 static void pipe_clone_write_buffer __P((struct pipe *wpipe)); 197 #endif 198 199 static vm_zone_t pipe_zone; 200 #endif /* FreeBSD */ 201 202 #ifdef __NetBSD__ 203 #ifndef PIPE_NODIRECT 204 static __inline int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 205 static __inline int pipe_loan_alloc __P((struct pipe *wpipe, int npages, 206 vsize_t blen)); 207 static void pipe_loan_free __P((struct pipe *wpipe)); 208 #endif /* PIPE_NODIRECT */ 209 210 static struct pool pipe_pool; 211 #endif /* NetBSD */ 212 213 /* 214 * The pipe system call for the DTYPE_PIPE type of pipes 215 */ 216 217 /* ARGSUSED */ 218 #ifdef __FreeBSD__ 219 int 220 pipe(p, uap) 221 struct proc *p; 222 struct pipe_args /* { 223 int dummy; 224 } */ *uap; 225 #elif defined(__NetBSD__) 226 int 227 sys_pipe(p, v, retval) 228 struct proc *p; 229 void *v; 230 register_t *retval; 231 #endif 232 { 233 struct file *rf, *wf; 234 struct pipe *rpipe, *wpipe; 235 int fd, error; 236 237 #ifdef __FreeBSD__ 238 if (pipe_zone == NULL) 239 pipe_zone = zinit("PIPE", sizeof(struct pipe), 0, 0, 4); 240 241 rpipe = wpipe = NULL; 242 if (pipe_create(&rpipe, 1) || pipe_create(&wpipe, 1)) { 243 pipeclose(rpipe); 244 pipeclose(wpipe); 245 return (ENFILE); 246 } 247 248 error = falloc(p, &rf, &fd); 249 if (error) { 250 pipeclose(rpipe); 251 pipeclose(wpipe); 252 return (error); 253 } 254 fhold(rf); 255 p->p_retval[0] = fd; 256 257 /* 258 * Warning: once we've gotten past allocation of the fd for the 259 * read-side, we can only drop the read side via fdrop() in order 260 * to avoid races against processes which manage to dup() the read 261 * side while we are blocked trying to allocate the write side. 262 */ 263 rf->f_flag = FREAD | FWRITE; 264 rf->f_type = DTYPE_PIPE; 265 rf->f_data = (caddr_t)rpipe; 266 rf->f_ops = &pipeops; 267 error = falloc(p, &wf, &fd); 268 if (error) { 269 struct filedesc *fdp = p->p_fd; 270 271 if (fdp->fd_ofiles[p->p_retval[0]] == rf) { 272 fdp->fd_ofiles[p->p_retval[0]] = NULL; 273 fdrop(rf, p); 274 } 275 fdrop(rf, p); 276 /* rpipe has been closed by fdrop(). */ 277 pipeclose(wpipe); 278 return (error); 279 } 280 wf->f_flag = FREAD | FWRITE; 281 wf->f_type = DTYPE_PIPE; 282 wf->f_data = (caddr_t)wpipe; 283 wf->f_ops = &pipeops; 284 p->p_retval[1] = fd; 285 286 rpipe->pipe_peer = wpipe; 287 wpipe->pipe_peer = rpipe; 288 fdrop(rf, p); 289 #endif /* FreeBSD */ 290 291 #ifdef __NetBSD__ 292 rpipe = wpipe = NULL; 293 if (pipe_create(&rpipe, 1) || pipe_create(&wpipe, 0)) { 294 pipeclose(rpipe); 295 pipeclose(wpipe); 296 return (ENFILE); 297 } 298 299 /* 300 * Note: the file structure returned from falloc() is marked 301 * as 'larval' initially. Unless we mark it as 'mature' by 302 * FILE_SET_MATURE(), any attempt to do anything with it would 303 * return EBADF, including e.g. dup(2) or close(2). This avoids 304 * file descriptor races if we block in the second falloc(). 305 */ 306 307 error = falloc(p, &rf, &fd); 308 if (error) 309 goto free2; 310 retval[0] = fd; 311 rf->f_flag = FREAD; 312 rf->f_type = DTYPE_PIPE; 313 rf->f_data = (caddr_t)rpipe; 314 rf->f_ops = &pipeops; 315 316 error = falloc(p, &wf, &fd); 317 if (error) 318 goto free3; 319 retval[1] = fd; 320 wf->f_flag = FWRITE; 321 wf->f_type = DTYPE_PIPE; 322 wf->f_data = (caddr_t)wpipe; 323 wf->f_ops = &pipeops; 324 325 rpipe->pipe_peer = wpipe; 326 wpipe->pipe_peer = rpipe; 327 328 FILE_SET_MATURE(rf); 329 FILE_SET_MATURE(wf); 330 FILE_UNUSE(rf, p); 331 FILE_UNUSE(wf, p); 332 return (0); 333 free3: 334 FILE_UNUSE(rf, p); 335 ffree(rf); 336 fdremove(p->p_fd, retval[0]); 337 free2: 338 pipeclose(wpipe); 339 pipeclose(rpipe); 340 #endif /* NetBSD */ 341 342 return (error); 343 } 344 345 /* 346 * Allocate kva for pipe circular buffer, the space is pageable 347 * This routine will 'realloc' the size of a pipe safely, if it fails 348 * it will retain the old buffer. 349 * If it fails it will return ENOMEM. 350 */ 351 static int 352 pipespace(cpipe, size) 353 struct pipe *cpipe; 354 int size; 355 { 356 caddr_t buffer; 357 #ifdef __FreeBSD__ 358 struct vm_object *object; 359 int npages, error; 360 361 npages = round_page(size)/PAGE_SIZE; 362 /* 363 * Create an object, I don't like the idea of paging to/from 364 * kernel_object. 365 */ 366 mtx_lock(&vm_mtx); 367 object = vm_object_allocate(OBJT_DEFAULT, npages); 368 buffer = (caddr_t) vm_map_min(kernel_map); 369 370 /* 371 * Insert the object into the kernel map, and allocate kva for it. 372 * The map entry is, by default, pageable. 373 */ 374 error = vm_map_find(kernel_map, object, 0, 375 (vm_offset_t *) &buffer, size, 1, 376 VM_PROT_ALL, VM_PROT_ALL, 0); 377 378 if (error != KERN_SUCCESS) { 379 vm_object_deallocate(object); 380 mtx_unlock(&vm_mtx); 381 return (ENOMEM); 382 } 383 #endif /* FreeBSD */ 384 385 #ifdef __NetBSD__ 386 /* 387 * Allocate pageable virtual address space. Physical memory is allocated 388 * on demand. 389 */ 390 buffer = (caddr_t) uvm_km_valloc(kernel_map, round_page(size)); 391 if (buffer == NULL) 392 return (ENOMEM); 393 #endif /* NetBSD */ 394 395 /* free old resources if we're resizing */ 396 pipe_free_kmem(cpipe); 397 #ifdef __FreeBSD__ 398 mtx_unlock(&vm_mtx); 399 cpipe->pipe_buffer.object = object; 400 #endif 401 cpipe->pipe_buffer.buffer = buffer; 402 cpipe->pipe_buffer.size = size; 403 cpipe->pipe_buffer.in = 0; 404 cpipe->pipe_buffer.out = 0; 405 cpipe->pipe_buffer.cnt = 0; 406 amountpipekva += cpipe->pipe_buffer.size; 407 return (0); 408 } 409 410 /* 411 * initialize and allocate VM and memory for pipe 412 */ 413 static int 414 pipe_create(cpipep, allockva) 415 struct pipe **cpipep; 416 int allockva; 417 { 418 struct pipe *cpipe; 419 int error; 420 421 #ifdef __FreeBSD__ 422 *cpipep = zalloc(pipe_zone); 423 #endif 424 #ifdef __NetBSD__ 425 *cpipep = pool_get(&pipe_pool, M_WAITOK); 426 #endif 427 if (*cpipep == NULL) 428 return (ENOMEM); 429 430 cpipe = *cpipep; 431 432 /* Initialize */ 433 memset(cpipe, 0, sizeof(*cpipe)); 434 cpipe->pipe_state = PIPE_SIGNALR; 435 436 if (allockva && (error = pipespace(cpipe, PIPE_SIZE))) 437 return (error); 438 439 vfs_timestamp(&cpipe->pipe_ctime); 440 cpipe->pipe_atime = cpipe->pipe_ctime; 441 cpipe->pipe_mtime = cpipe->pipe_ctime; 442 #ifdef __NetBSD__ 443 cpipe->pipe_pgid = NO_PID; 444 lockinit(&cpipe->pipe_lock, PRIBIO | PCATCH, "pipelk", 0, 0); 445 #endif 446 447 return (0); 448 } 449 450 451 /* 452 * lock a pipe for I/O, blocking other access 453 */ 454 static __inline int 455 pipelock(cpipe, catch) 456 struct pipe *cpipe; 457 int catch; 458 { 459 int error; 460 461 #ifdef __FreeBSD__ 462 while (cpipe->pipe_state & PIPE_LOCK) { 463 cpipe->pipe_state |= PIPE_LWANT; 464 error = tsleep(cpipe, catch ? (PRIBIO | PCATCH) : PRIBIO, 465 "pipelk", 0); 466 if (error != 0) 467 return (error); 468 } 469 cpipe->pipe_state |= PIPE_LOCK; 470 return (0); 471 #endif 472 473 #ifdef __NetBSD__ 474 do { 475 error = lockmgr(&cpipe->pipe_lock, LK_EXCLUSIVE, NULL); 476 } while (!catch && (error == EINTR || error == ERESTART)); 477 return (error); 478 #endif 479 } 480 481 /* 482 * unlock a pipe I/O lock 483 */ 484 static __inline void 485 pipeunlock(cpipe) 486 struct pipe *cpipe; 487 { 488 #ifdef __FreeBSD__ 489 cpipe->pipe_state &= ~PIPE_LOCK; 490 if (cpipe->pipe_state & PIPE_LWANT) { 491 cpipe->pipe_state &= ~PIPE_LWANT; 492 wakeup(cpipe); 493 } 494 #endif 495 496 #ifdef __NetBSD__ 497 lockmgr(&cpipe->pipe_lock, LK_RELEASE, NULL); 498 #endif 499 } 500 501 /* 502 * Select/poll wakup. This also sends SIGIO to peer connected to 503 * 'sigpipe' side of pipe. 504 */ 505 static __inline void 506 pipeselwakeup(selp, sigp) 507 struct pipe *selp, *sigp; 508 { 509 if (selp->pipe_state & PIPE_SEL) { 510 selp->pipe_state &= ~PIPE_SEL; 511 selwakeup(&selp->pipe_sel); 512 } 513 #ifdef __FreeBSD__ 514 if (sigp && (sigp->pipe_state & PIPE_ASYNC) && sigp->pipe_sigio) 515 pgsigio(sigp->pipe_sigio, SIGIO, 0); 516 KNOTE(&selp->pipe_sel.si_note, 0); 517 #endif 518 519 #ifdef __NetBSD__ 520 if (sigp && (sigp->pipe_state & PIPE_ASYNC) 521 && sigp->pipe_pgid != NO_PID){ 522 struct proc *p; 523 524 if (sigp->pipe_pgid < 0) 525 gsignal(-sigp->pipe_pgid, SIGIO); 526 else if (sigp->pipe_pgid > 0 && (p = pfind(sigp->pipe_pgid)) != 0) 527 psignal(p, SIGIO); 528 } 529 #endif /* NetBSD */ 530 } 531 532 /* ARGSUSED */ 533 #ifdef __FreeBSD__ 534 static int 535 pipe_read(fp, uio, cred, flags, p) 536 struct file *fp; 537 struct uio *uio; 538 struct ucred *cred; 539 int flags; 540 struct proc *p; 541 #elif defined(__NetBSD__) 542 static int 543 pipe_read(fp, offset, uio, cred, flags) 544 struct file *fp; 545 off_t *offset; 546 struct uio *uio; 547 struct ucred *cred; 548 int flags; 549 #endif 550 { 551 struct pipe *rpipe = (struct pipe *) fp->f_data; 552 int error; 553 size_t nread = 0; 554 size_t size; 555 size_t ocnt; 556 557 ++rpipe->pipe_busy; 558 error = pipelock(rpipe, 1); 559 if (error) 560 goto unlocked_error; 561 562 ocnt = rpipe->pipe_buffer.cnt; 563 564 while (uio->uio_resid) { 565 /* 566 * normal pipe buffer receive 567 */ 568 if (rpipe->pipe_buffer.cnt > 0) { 569 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 570 if (size > rpipe->pipe_buffer.cnt) 571 size = rpipe->pipe_buffer.cnt; 572 if (size > uio->uio_resid) 573 size = uio->uio_resid; 574 575 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 576 size, uio); 577 if (error) 578 break; 579 580 rpipe->pipe_buffer.out += size; 581 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 582 rpipe->pipe_buffer.out = 0; 583 584 rpipe->pipe_buffer.cnt -= size; 585 586 /* 587 * If there is no more to read in the pipe, reset 588 * its pointers to the beginning. This improves 589 * cache hit stats. 590 */ 591 if (rpipe->pipe_buffer.cnt == 0) { 592 rpipe->pipe_buffer.in = 0; 593 rpipe->pipe_buffer.out = 0; 594 } 595 nread += size; 596 #ifndef PIPE_NODIRECT 597 /* 598 * Direct copy, bypassing a kernel buffer. 599 */ 600 } else if ((size = rpipe->pipe_map.cnt) && 601 (rpipe->pipe_state & PIPE_DIRECTW)) { 602 caddr_t va; 603 if (size > uio->uio_resid) 604 size = uio->uio_resid; 605 606 va = (caddr_t) rpipe->pipe_map.kva + 607 rpipe->pipe_map.pos; 608 error = uiomove(va, size, uio); 609 if (error) 610 break; 611 nread += size; 612 rpipe->pipe_map.pos += size; 613 rpipe->pipe_map.cnt -= size; 614 if (rpipe->pipe_map.cnt == 0) { 615 rpipe->pipe_state &= ~PIPE_DIRECTW; 616 wakeup(rpipe); 617 } 618 #endif 619 } else { 620 /* 621 * detect EOF condition 622 * read returns 0 on EOF, no need to set error 623 */ 624 if (rpipe->pipe_state & PIPE_EOF) 625 break; 626 627 /* 628 * If the "write-side" has been blocked, wake it up now. 629 */ 630 if (rpipe->pipe_state & PIPE_WANTW) { 631 rpipe->pipe_state &= ~PIPE_WANTW; 632 wakeup(rpipe); 633 } 634 635 /* 636 * Break if some data was read. 637 */ 638 if (nread > 0) 639 break; 640 641 /* 642 * don't block on non-blocking I/O 643 */ 644 if (fp->f_flag & FNONBLOCK) { 645 error = EAGAIN; 646 break; 647 } 648 649 /* 650 * Unlock the pipe buffer for our remaining processing. 651 * We will either break out with an error or we will 652 * sleep and relock to loop. 653 */ 654 pipeunlock(rpipe); 655 656 /* 657 * We want to read more, wake up select/poll. 658 */ 659 pipeselwakeup(rpipe, rpipe->pipe_peer); 660 661 rpipe->pipe_state |= PIPE_WANTR; 662 error = tsleep(rpipe, PRIBIO | PCATCH, "piperd", 0); 663 if (error != 0 || (error = pipelock(rpipe, 1))) 664 goto unlocked_error; 665 } 666 } 667 pipeunlock(rpipe); 668 669 if (error == 0) 670 vfs_timestamp(&rpipe->pipe_atime); 671 unlocked_error: 672 --rpipe->pipe_busy; 673 674 /* 675 * PIPE_WANTCLOSE processing only makes sense if pipe_busy is 0. 676 */ 677 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANTCLOSE)) { 678 rpipe->pipe_state &= ~(PIPE_WANTCLOSE|PIPE_WANTW); 679 wakeup(rpipe); 680 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 681 /* 682 * Handle write blocking hysteresis. 683 */ 684 if (rpipe->pipe_state & PIPE_WANTW) { 685 rpipe->pipe_state &= ~PIPE_WANTW; 686 wakeup(rpipe); 687 } 688 } 689 690 /* 691 * If anything was read off the buffer, signal to the writer it's 692 * possible to write more data. Also send signal if we are here for the 693 * first time after last write. 694 */ 695 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF 696 && (ocnt != rpipe->pipe_buffer.cnt || (rpipe->pipe_state & PIPE_SIGNALR))) { 697 pipeselwakeup(rpipe, rpipe->pipe_peer); 698 rpipe->pipe_state &= ~PIPE_SIGNALR; 699 } 700 701 return (error); 702 } 703 704 #ifdef __FreeBSD__ 705 #ifndef PIPE_NODIRECT 706 /* 707 * Map the sending processes' buffer into kernel space and wire it. 708 * This is similar to a physical write operation. 709 */ 710 static int 711 pipe_build_write_buffer(wpipe, uio) 712 struct pipe *wpipe; 713 struct uio *uio; 714 { 715 size_t size; 716 int i; 717 vm_offset_t addr, endaddr, paddr; 718 719 size = uio->uio_iov->iov_len; 720 if (size > wpipe->pipe_buffer.size) 721 size = wpipe->pipe_buffer.size; 722 723 endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 724 mtx_lock(&vm_mtx); 725 addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 726 for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 727 vm_page_t m; 728 729 if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 730 (paddr = pmap_kextract(addr)) == 0) { 731 int j; 732 733 for (j = 0; j < i; j++) 734 vm_page_unwire(wpipe->pipe_map.ms[j], 1); 735 mtx_unlock(&vm_mtx); 736 return (EFAULT); 737 } 738 739 m = PHYS_TO_VM_PAGE(paddr); 740 vm_page_wire(m); 741 wpipe->pipe_map.ms[i] = m; 742 } 743 744 /* 745 * set up the control block 746 */ 747 wpipe->pipe_map.npages = i; 748 wpipe->pipe_map.pos = 749 ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 750 wpipe->pipe_map.cnt = size; 751 752 /* 753 * and map the buffer 754 */ 755 if (wpipe->pipe_map.kva == 0) { 756 /* 757 * We need to allocate space for an extra page because the 758 * address range might (will) span pages at times. 759 */ 760 wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 761 wpipe->pipe_buffer.size + PAGE_SIZE); 762 amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 763 } 764 pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 765 wpipe->pipe_map.npages); 766 767 mtx_unlock(&vm_mtx); 768 /* 769 * and update the uio data 770 */ 771 772 uio->uio_iov->iov_len -= size; 773 uio->uio_iov->iov_base += size; 774 if (uio->uio_iov->iov_len == 0) 775 uio->uio_iov++; 776 uio->uio_resid -= size; 777 uio->uio_offset += size; 778 return (0); 779 } 780 781 /* 782 * unmap and unwire the process buffer 783 */ 784 static void 785 pipe_destroy_write_buffer(wpipe) 786 struct pipe *wpipe; 787 { 788 int i; 789 790 mtx_lock(&vm_mtx); 791 if (wpipe->pipe_map.kva) { 792 pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 793 794 if (amountpipekva > maxpipekva) { 795 vm_offset_t kva = wpipe->pipe_map.kva; 796 wpipe->pipe_map.kva = 0; 797 kmem_free(kernel_map, kva, 798 wpipe->pipe_buffer.size + PAGE_SIZE); 799 amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 800 } 801 } 802 for (i = 0; i < wpipe->pipe_map.npages; i++) 803 vm_page_unwire(wpipe->pipe_map.ms[i], 1); 804 mtx_unlock(&vm_mtx); 805 } 806 807 /* 808 * In the case of a signal, the writing process might go away. This 809 * code copies the data into the circular buffer so that the source 810 * pages can be freed without loss of data. 811 */ 812 static void 813 pipe_clone_write_buffer(wpipe) 814 struct pipe *wpipe; 815 { 816 int size; 817 int pos; 818 819 size = wpipe->pipe_map.cnt; 820 pos = wpipe->pipe_map.pos; 821 memcpy((caddr_t) wpipe->pipe_buffer.buffer, 822 (caddr_t) wpipe->pipe_map.kva + pos, size); 823 824 wpipe->pipe_buffer.in = size; 825 wpipe->pipe_buffer.out = 0; 826 wpipe->pipe_buffer.cnt = size; 827 wpipe->pipe_state &= ~PIPE_DIRECTW; 828 829 pipe_destroy_write_buffer(wpipe); 830 } 831 832 /* 833 * This implements the pipe buffer write mechanism. Note that only 834 * a direct write OR a normal pipe write can be pending at any given time. 835 * If there are any characters in the pipe buffer, the direct write will 836 * be deferred until the receiving process grabs all of the bytes from 837 * the pipe buffer. Then the direct mapping write is set-up. 838 */ 839 static int 840 pipe_direct_write(wpipe, uio) 841 struct pipe *wpipe; 842 struct uio *uio; 843 { 844 int error; 845 846 retry: 847 while (wpipe->pipe_state & PIPE_DIRECTW) { 848 if (wpipe->pipe_state & PIPE_WANTR) { 849 wpipe->pipe_state &= ~PIPE_WANTR; 850 wakeup(wpipe); 851 } 852 wpipe->pipe_state |= PIPE_WANTW; 853 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdww", 0); 854 if (error) 855 goto error1; 856 if (wpipe->pipe_state & PIPE_EOF) { 857 error = EPIPE; 858 goto error1; 859 } 860 } 861 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 862 if (wpipe->pipe_buffer.cnt > 0) { 863 if (wpipe->pipe_state & PIPE_WANTR) { 864 wpipe->pipe_state &= ~PIPE_WANTR; 865 wakeup(wpipe); 866 } 867 868 wpipe->pipe_state |= PIPE_WANTW; 869 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwc", 0); 870 if (error) 871 goto error1; 872 if (wpipe->pipe_state & PIPE_EOF) { 873 error = EPIPE; 874 goto error1; 875 } 876 goto retry; 877 } 878 879 wpipe->pipe_state |= PIPE_DIRECTW; 880 881 error = pipe_build_write_buffer(wpipe, uio); 882 if (error) { 883 wpipe->pipe_state &= ~PIPE_DIRECTW; 884 goto error1; 885 } 886 887 error = 0; 888 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 889 if (wpipe->pipe_state & PIPE_EOF) { 890 pipelock(wpipe, 0); 891 pipe_destroy_write_buffer(wpipe); 892 pipeunlock(wpipe); 893 pipeselwakeup(wpipe, wpipe); 894 error = EPIPE; 895 goto error1; 896 } 897 if (wpipe->pipe_state & PIPE_WANTR) { 898 wpipe->pipe_state &= ~PIPE_WANTR; 899 wakeup(wpipe); 900 } 901 pipeselwakeup(wpipe, wpipe); 902 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwt", 0); 903 } 904 905 pipelock(wpipe,0); 906 if (wpipe->pipe_state & PIPE_DIRECTW) { 907 /* 908 * this bit of trickery substitutes a kernel buffer for 909 * the process that might be going away. 910 */ 911 pipe_clone_write_buffer(wpipe); 912 } else { 913 pipe_destroy_write_buffer(wpipe); 914 } 915 pipeunlock(wpipe); 916 return (error); 917 918 error1: 919 wakeup(wpipe); 920 return (error); 921 } 922 #endif /* !PIPE_NODIRECT */ 923 #endif /* FreeBSD */ 924 925 #ifdef __NetBSD__ 926 #ifndef PIPE_NODIRECT 927 /* 928 * Allocate structure for loan transfer. 929 */ 930 static __inline int 931 pipe_loan_alloc(wpipe, npages, blen) 932 struct pipe *wpipe; 933 int npages; 934 vsize_t blen; 935 { 936 wpipe->pipe_map.kva = uvm_km_valloc_wait(kernel_map, blen); 937 if (wpipe->pipe_map.kva == NULL) 938 return (ENOMEM); 939 940 amountpipekva += blen; 941 wpipe->pipe_map.npages = npages; 942 wpipe->pipe_map.ms = (struct vm_page **) malloc( 943 npages * sizeof(struct vm_page *), M_PIPE, M_WAITOK); 944 945 return (0); 946 } 947 948 /* 949 * Free resources allocated for loan transfer. 950 */ 951 static void 952 pipe_loan_free(wpipe) 953 struct pipe *wpipe; 954 { 955 uvm_km_free(kernel_map, wpipe->pipe_map.kva, 956 wpipe->pipe_map.npages * PAGE_SIZE); 957 wpipe->pipe_map.kva = NULL; 958 amountpipekva -= wpipe->pipe_map.npages * PAGE_SIZE; 959 free(wpipe->pipe_map.ms, M_PIPE); 960 wpipe->pipe_map.ms = NULL; 961 } 962 963 /* 964 * NetBSD direct write, using uvm_loan() mechanism. 965 * This implements the pipe buffer write mechanism. Note that only 966 * a direct write OR a normal pipe write can be pending at any given time. 967 * If there are any characters in the pipe buffer, the direct write will 968 * be deferred until the receiving process grabs all of the bytes from 969 * the pipe buffer. Then the direct mapping write is set-up. 970 */ 971 static __inline int 972 pipe_direct_write(wpipe, uio) 973 struct pipe *wpipe; 974 struct uio *uio; 975 { 976 int error, npages, j; 977 struct vm_page **res = NULL; 978 vaddr_t bbase, kva, base, bend; 979 vsize_t blen, bcnt; 980 voff_t bpos; 981 982 retry: 983 while (wpipe->pipe_state & PIPE_DIRECTW) { 984 if (wpipe->pipe_state & PIPE_WANTR) { 985 wpipe->pipe_state &= ~PIPE_WANTR; 986 wakeup(wpipe); 987 } 988 wpipe->pipe_state |= PIPE_WANTW; 989 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdww", 0); 990 if (error) 991 goto error; 992 if (wpipe->pipe_state & PIPE_EOF) { 993 error = EPIPE; 994 goto error; 995 } 996 } 997 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 998 if (wpipe->pipe_buffer.cnt > 0) { 999 if (wpipe->pipe_state & PIPE_WANTR) { 1000 wpipe->pipe_state &= ~PIPE_WANTR; 1001 wakeup(wpipe); 1002 } 1003 1004 wpipe->pipe_state |= PIPE_WANTW; 1005 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwc", 0); 1006 if (error) 1007 goto error; 1008 if (wpipe->pipe_state & PIPE_EOF) { 1009 error = EPIPE; 1010 goto error; 1011 } 1012 goto retry; 1013 } 1014 1015 /* 1016 * Handle first iovec, first PIPE_CHUNK_SIZE bytes. Expect caller 1017 * to deal with short write. 1018 * 1019 * Note: need to deal with buffers not aligned to PAGE_SIZE. 1020 */ 1021 bbase = (vaddr_t)uio->uio_iov[0].iov_base; 1022 base = trunc_page(bbase); 1023 bend = round_page(bbase + uio->uio_iov[0].iov_len); 1024 blen = bend - base; 1025 bpos = bbase - base; 1026 1027 if (blen > PIPE_DIRECT_CHUNK) { 1028 blen = PIPE_DIRECT_CHUNK; 1029 bend = base + blen; 1030 bcnt = PIPE_DIRECT_CHUNK - bpos; 1031 } else 1032 bcnt = uio->uio_iov[0].iov_len; 1033 1034 npages = blen / PAGE_SIZE; 1035 1036 wpipe->pipe_map.pos = bpos; 1037 wpipe->pipe_map.cnt = bcnt; 1038 1039 /* 1040 * Free the old kva if we need more pages than we have 1041 * allocated. 1042 */ 1043 if (wpipe->pipe_map.kva && npages > wpipe->pipe_map.npages) 1044 pipe_loan_free(wpipe); 1045 1046 /* Allocate new kva. */ 1047 if (!wpipe->pipe_map.kva 1048 && (error = pipe_loan_alloc(wpipe, npages, blen))) 1049 goto error; 1050 1051 /* Loan the write buffer memory from writer process */ 1052 error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, base, blen, 1053 (void **) wpipe->pipe_map.ms, UVM_LOAN_TOPAGE); 1054 if (error) 1055 goto cleanup; 1056 res = wpipe->pipe_map.ms; 1057 1058 /* Enter the loaned pages to kva */ 1059 kva = wpipe->pipe_map.kva; 1060 for(j=0; j < npages; j++, kva += PAGE_SIZE) 1061 pmap_enter(pmap_kernel(), kva, res[j]->phys_addr, 1062 VM_PROT_READ, 0); 1063 pmap_update(pmap_kernel()); 1064 1065 wpipe->pipe_state |= PIPE_DIRECTW; 1066 error = 0; 1067 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 1068 if (wpipe->pipe_state & PIPE_EOF) { 1069 error = EPIPE; 1070 break; 1071 } 1072 if (wpipe->pipe_state & PIPE_WANTR) { 1073 wpipe->pipe_state &= ~PIPE_WANTR; 1074 wakeup(wpipe); 1075 } 1076 pipeselwakeup(wpipe, wpipe); 1077 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwt", 0); 1078 } 1079 1080 if (error) 1081 wpipe->pipe_state &= ~PIPE_DIRECTW; 1082 1083 cleanup: 1084 pipelock(wpipe, 0); 1085 if (res) 1086 uvm_unloanpage(res, npages); 1087 if (error || amountpipekva > maxpipekva) 1088 pipe_loan_free(wpipe); 1089 pipeunlock(wpipe); 1090 1091 if (error == EPIPE) { 1092 pipeselwakeup(wpipe, wpipe); 1093 1094 /* 1095 * If anything was read from what we offered, return success 1096 * and short write. We return EOF on next write(2). 1097 */ 1098 if (wpipe->pipe_map.cnt < bcnt) { 1099 bcnt -= wpipe->pipe_map.cnt; 1100 error = 0; 1101 } 1102 } 1103 1104 if (error) { 1105 error: 1106 wakeup(wpipe); 1107 return (error); 1108 } 1109 1110 uio->uio_resid -= bcnt; 1111 /* uio_offset not updated, not set/used for write(2) */ 1112 1113 return (0); 1114 } 1115 #endif /* !PIPE_NODIRECT */ 1116 #endif /* NetBSD */ 1117 1118 #ifdef __FreeBSD__ 1119 static int 1120 pipe_write(fp, uio, cred, flags, p) 1121 struct file *fp; 1122 off_t *offset; 1123 struct uio *uio; 1124 struct ucred *cred; 1125 int flags; 1126 struct proc *p; 1127 #elif defined(__NetBSD__) 1128 static int 1129 pipe_write(fp, offset, uio, cred, flags) 1130 struct file *fp; 1131 off_t *offset; 1132 struct uio *uio; 1133 struct ucred *cred; 1134 int flags; 1135 #endif 1136 { 1137 int error = 0; 1138 int orig_resid; 1139 struct pipe *wpipe, *rpipe; 1140 1141 rpipe = (struct pipe *) fp->f_data; 1142 wpipe = rpipe->pipe_peer; 1143 1144 /* 1145 * detect loss of pipe read side, issue SIGPIPE if lost. 1146 */ 1147 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) 1148 return (EPIPE); 1149 1150 ++wpipe->pipe_busy; 1151 1152 /* 1153 * If it is advantageous to resize the pipe buffer, do 1154 * so. 1155 */ 1156 if ((uio->uio_resid > PIPE_SIZE) && 1157 (nbigpipe < maxbigpipes) && 1158 #ifndef PIPE_NODIRECT 1159 (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 1160 #endif 1161 (wpipe->pipe_buffer.size <= PIPE_SIZE) && 1162 (wpipe->pipe_buffer.cnt == 0)) { 1163 1164 if ((error = pipelock(wpipe,1)) == 0) { 1165 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 1166 nbigpipe++; 1167 pipeunlock(wpipe); 1168 } else { 1169 /* 1170 * If an error occurred, unbusy and return, waking up 1171 * any waiting readers. 1172 */ 1173 --wpipe->pipe_busy; 1174 if (wpipe->pipe_busy == 0 1175 && (wpipe->pipe_state & PIPE_WANTCLOSE)) { 1176 wpipe->pipe_state &= 1177 ~(PIPE_WANTCLOSE | PIPE_WANTR); 1178 wakeup(wpipe); 1179 } 1180 1181 return (error); 1182 } 1183 } 1184 1185 #ifdef __FreeBSD__ 1186 KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); 1187 #endif 1188 1189 orig_resid = uio->uio_resid; 1190 while (uio->uio_resid) { 1191 int space; 1192 1193 #ifndef PIPE_NODIRECT 1194 /* 1195 * If the transfer is large, we can gain performance if 1196 * we do process-to-process copies directly. 1197 * If the write is non-blocking, we don't use the 1198 * direct write mechanism. 1199 * 1200 * The direct write mechanism will detect the reader going 1201 * away on us. 1202 */ 1203 if ((uio->uio_iov[0].iov_len >= PIPE_MINDIRECT) && 1204 (uio->uio_resid == orig_resid) && 1205 (fp->f_flag & FNONBLOCK) == 0 && 1206 (wpipe->pipe_map.kva || (amountpipekva < limitpipekva))) { 1207 error = pipe_direct_write(wpipe, uio); 1208 1209 /* 1210 * We either errorred, wrote whole buffer, or 1211 * wrote part of buffer. If the error is ENOMEM, 1212 * we failed to allocate some resources for direct 1213 * write and fall back to ordinary write. Otherwise, 1214 * break out now. 1215 */ 1216 if (error != ENOMEM) 1217 break; 1218 } 1219 #endif /* PIPE_NODIRECT */ 1220 1221 /* 1222 * Pipe buffered writes cannot be coincidental with 1223 * direct writes. We wait until the currently executing 1224 * direct write is completed before we start filling the 1225 * pipe buffer. We break out if a signal occurs or the 1226 * reader goes away. 1227 */ 1228 retrywrite: 1229 while (wpipe->pipe_state & PIPE_DIRECTW) { 1230 if (wpipe->pipe_state & PIPE_WANTR) { 1231 wpipe->pipe_state &= ~PIPE_WANTR; 1232 wakeup(wpipe); 1233 } 1234 error = tsleep(wpipe, PRIBIO | PCATCH, "pipbww", 0); 1235 if (wpipe->pipe_state & PIPE_EOF) 1236 break; 1237 if (error) 1238 break; 1239 } 1240 if (wpipe->pipe_state & PIPE_EOF) { 1241 error = EPIPE; 1242 break; 1243 } 1244 1245 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1246 1247 /* Writes of size <= PIPE_BUF must be atomic. */ 1248 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 1249 space = 0; 1250 1251 if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 1252 int size; /* Transfer size */ 1253 int segsize; /* first segment to transfer */ 1254 1255 if ((error = pipelock(wpipe,1)) != 0) 1256 break; 1257 1258 /* 1259 * It is possible for a direct write to 1260 * slip in on us... handle it here... 1261 */ 1262 if (wpipe->pipe_state & PIPE_DIRECTW) { 1263 pipeunlock(wpipe); 1264 goto retrywrite; 1265 } 1266 /* 1267 * If a process blocked in uiomove, our 1268 * value for space might be bad. 1269 * 1270 * XXX will we be ok if the reader has gone 1271 * away here? 1272 */ 1273 if (space > wpipe->pipe_buffer.size - 1274 wpipe->pipe_buffer.cnt) { 1275 pipeunlock(wpipe); 1276 goto retrywrite; 1277 } 1278 1279 /* 1280 * Transfer size is minimum of uio transfer 1281 * and free space in pipe buffer. 1282 */ 1283 if (space > uio->uio_resid) 1284 size = uio->uio_resid; 1285 else 1286 size = space; 1287 /* 1288 * First segment to transfer is minimum of 1289 * transfer size and contiguous space in 1290 * pipe buffer. If first segment to transfer 1291 * is less than the transfer size, we've got 1292 * a wraparound in the buffer. 1293 */ 1294 segsize = wpipe->pipe_buffer.size - 1295 wpipe->pipe_buffer.in; 1296 if (segsize > size) 1297 segsize = size; 1298 1299 /* Transfer first segment */ 1300 1301 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1302 segsize, uio); 1303 1304 if (error == 0 && segsize < size) { 1305 /* 1306 * Transfer remaining part now, to 1307 * support atomic writes. Wraparound 1308 * happened. 1309 */ 1310 #ifdef DEBUG 1311 if (wpipe->pipe_buffer.in + segsize != 1312 wpipe->pipe_buffer.size) 1313 panic("Expected pipe buffer wraparound disappeared"); 1314 #endif 1315 1316 error = uiomove(&wpipe->pipe_buffer.buffer[0], 1317 size - segsize, uio); 1318 } 1319 if (error == 0) { 1320 wpipe->pipe_buffer.in += size; 1321 if (wpipe->pipe_buffer.in >= 1322 wpipe->pipe_buffer.size) { 1323 #ifdef DEBUG 1324 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 1325 panic("Expected wraparound bad"); 1326 #endif 1327 wpipe->pipe_buffer.in = size - segsize; 1328 } 1329 1330 wpipe->pipe_buffer.cnt += size; 1331 #ifdef DEBUG 1332 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 1333 panic("Pipe buffer overflow"); 1334 #endif 1335 1336 } 1337 pipeunlock(wpipe); 1338 if (error) 1339 break; 1340 1341 } else { 1342 /* 1343 * If the "read-side" has been blocked, wake it up now. 1344 */ 1345 if (wpipe->pipe_state & PIPE_WANTR) { 1346 wpipe->pipe_state &= ~PIPE_WANTR; 1347 wakeup(wpipe); 1348 } 1349 1350 /* 1351 * don't block on non-blocking I/O 1352 */ 1353 if (fp->f_flag & FNONBLOCK) { 1354 error = EAGAIN; 1355 break; 1356 } 1357 1358 /* 1359 * We have no more space and have something to offer, 1360 * wake up select/poll. 1361 */ 1362 pipeselwakeup(wpipe, wpipe); 1363 1364 wpipe->pipe_state |= PIPE_WANTW; 1365 error = tsleep(wpipe, PRIBIO | PCATCH, "pipewr", 0); 1366 if (error != 0) 1367 break; 1368 /* 1369 * If read side wants to go away, we just issue a signal 1370 * to ourselves. 1371 */ 1372 if (wpipe->pipe_state & PIPE_EOF) { 1373 error = EPIPE; 1374 break; 1375 } 1376 } 1377 } 1378 1379 --wpipe->pipe_busy; 1380 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANTCLOSE)) { 1381 wpipe->pipe_state &= ~(PIPE_WANTCLOSE | PIPE_WANTR); 1382 wakeup(wpipe); 1383 } else if (wpipe->pipe_buffer.cnt > 0) { 1384 /* 1385 * If we have put any characters in the buffer, we wake up 1386 * the reader. 1387 */ 1388 if (wpipe->pipe_state & PIPE_WANTR) { 1389 wpipe->pipe_state &= ~PIPE_WANTR; 1390 wakeup(wpipe); 1391 } 1392 } 1393 1394 /* 1395 * Don't return EPIPE if I/O was successful 1396 */ 1397 if ((error == EPIPE) && (wpipe->pipe_buffer.cnt == 0) 1398 && (uio->uio_resid == 0)) 1399 error = 0; 1400 1401 if (error == 0) 1402 vfs_timestamp(&wpipe->pipe_mtime); 1403 1404 /* 1405 * We have something to offer, wake up select/poll. 1406 * wpipe->pipe_map.cnt is always 0 in this point (direct write 1407 * is only done synchronously), so check wpipe->only pipe_buffer.cnt 1408 */ 1409 if (wpipe->pipe_buffer.cnt) 1410 pipeselwakeup(wpipe, wpipe); 1411 1412 /* 1413 * Arrange for next read(2) to do a signal. 1414 */ 1415 wpipe->pipe_state |= PIPE_SIGNALR; 1416 1417 return (error); 1418 } 1419 1420 /* 1421 * we implement a very minimal set of ioctls for compatibility with sockets. 1422 */ 1423 int 1424 pipe_ioctl(fp, cmd, data, p) 1425 struct file *fp; 1426 u_long cmd; 1427 caddr_t data; 1428 struct proc *p; 1429 { 1430 struct pipe *mpipe = (struct pipe *)fp->f_data; 1431 1432 switch (cmd) { 1433 1434 case FIONBIO: 1435 return (0); 1436 1437 case FIOASYNC: 1438 if (*(int *)data) { 1439 mpipe->pipe_state |= PIPE_ASYNC; 1440 } else { 1441 mpipe->pipe_state &= ~PIPE_ASYNC; 1442 } 1443 return (0); 1444 1445 case FIONREAD: 1446 #ifndef PIPE_NODIRECT 1447 if (mpipe->pipe_state & PIPE_DIRECTW) 1448 *(int *)data = mpipe->pipe_map.cnt; 1449 else 1450 #endif 1451 *(int *)data = mpipe->pipe_buffer.cnt; 1452 return (0); 1453 1454 #ifdef __FreeBSD__ 1455 case FIOSETOWN: 1456 return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 1457 1458 case FIOGETOWN: 1459 *(int *)data = fgetown(mpipe->pipe_sigio); 1460 return (0); 1461 1462 /* This is deprecated, FIOSETOWN should be used instead. */ 1463 case TIOCSPGRP: 1464 return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 1465 1466 /* This is deprecated, FIOGETOWN should be used instead. */ 1467 case TIOCGPGRP: 1468 *(int *)data = -fgetown(mpipe->pipe_sigio); 1469 return (0); 1470 #endif /* FreeBSD */ 1471 #ifdef __NetBSD__ 1472 case TIOCSPGRP: 1473 mpipe->pipe_pgid = *(int *)data; 1474 return (0); 1475 1476 case TIOCGPGRP: 1477 *(int *)data = mpipe->pipe_pgid; 1478 return (0); 1479 #endif /* NetBSD */ 1480 1481 } 1482 return (ENOTTY); 1483 } 1484 1485 int 1486 pipe_poll(fp, events, p) 1487 struct file *fp; 1488 int events; 1489 struct proc *p; 1490 { 1491 struct pipe *rpipe = (struct pipe *)fp->f_data; 1492 struct pipe *wpipe; 1493 int revents = 0; 1494 1495 wpipe = rpipe->pipe_peer; 1496 if (events & (POLLIN | POLLRDNORM)) 1497 if ((rpipe->pipe_buffer.cnt > 0) || 1498 #ifndef PIPE_NODIRECT 1499 (rpipe->pipe_state & PIPE_DIRECTW) || 1500 #endif 1501 (rpipe->pipe_state & PIPE_EOF)) 1502 revents |= events & (POLLIN | POLLRDNORM); 1503 1504 if (events & (POLLOUT | POLLWRNORM)) 1505 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) 1506 || ( 1507 #ifndef PIPE_NODIRECT 1508 ((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1509 #endif 1510 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1511 revents |= events & (POLLOUT | POLLWRNORM); 1512 1513 if ((rpipe->pipe_state & PIPE_EOF) || 1514 (wpipe == NULL) || 1515 (wpipe->pipe_state & PIPE_EOF)) 1516 revents |= POLLHUP; 1517 1518 if (revents == 0) { 1519 if (events & (POLLIN | POLLRDNORM)) { 1520 selrecord(p, &rpipe->pipe_sel); 1521 rpipe->pipe_state |= PIPE_SEL; 1522 } 1523 1524 if (events & (POLLOUT | POLLWRNORM)) { 1525 selrecord(p, &wpipe->pipe_sel); 1526 wpipe->pipe_state |= PIPE_SEL; 1527 } 1528 } 1529 1530 return (revents); 1531 } 1532 1533 static int 1534 pipe_stat(fp, ub, p) 1535 struct file *fp; 1536 struct stat *ub; 1537 struct proc *p; 1538 { 1539 struct pipe *pipe = (struct pipe *)fp->f_data; 1540 1541 memset((caddr_t)ub, 0, sizeof(*ub)); 1542 ub->st_mode = S_IFIFO; 1543 ub->st_blksize = pipe->pipe_buffer.size; 1544 ub->st_size = pipe->pipe_buffer.cnt; 1545 ub->st_blocks = (ub->st_size) ? 1 : 0; 1546 #ifdef __FreeBSD__ 1547 ub->st_atimespec = pipe->pipe_atime; 1548 ub->st_mtimespec = pipe->pipe_mtime; 1549 ub->st_ctimespec = pipe->pipe_ctime; 1550 #endif /* FreeBSD */ 1551 #ifdef __NetBSD__ 1552 TIMEVAL_TO_TIMESPEC(&pipe->pipe_atime, &ub->st_atimespec) 1553 TIMEVAL_TO_TIMESPEC(&pipe->pipe_mtime, &ub->st_mtimespec); 1554 TIMEVAL_TO_TIMESPEC(&pipe->pipe_ctime, &ub->st_ctimespec); 1555 #endif /* NetBSD */ 1556 ub->st_uid = fp->f_cred->cr_uid; 1557 ub->st_gid = fp->f_cred->cr_gid; 1558 /* 1559 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 1560 * XXX (st_dev, st_ino) should be unique. 1561 */ 1562 return (0); 1563 } 1564 1565 /* ARGSUSED */ 1566 static int 1567 pipe_close(fp, p) 1568 struct file *fp; 1569 struct proc *p; 1570 { 1571 struct pipe *cpipe = (struct pipe *)fp->f_data; 1572 1573 #ifdef __FreeBSD__ 1574 fp->f_ops = &badfileops; 1575 funsetown(cpipe->pipe_sigio); 1576 #endif 1577 fp->f_data = NULL; 1578 pipeclose(cpipe); 1579 return (0); 1580 } 1581 1582 static void 1583 pipe_free_kmem(cpipe) 1584 struct pipe *cpipe; 1585 { 1586 1587 #ifdef __FreeBSD__ 1588 mtx_assert(&vm_mtx, MA_OWNED); 1589 #endif 1590 if (cpipe->pipe_buffer.buffer != NULL) { 1591 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1592 --nbigpipe; 1593 amountpipekva -= cpipe->pipe_buffer.size; 1594 #ifdef __FreeBSD__ 1595 kmem_free(kernel_map, 1596 (vm_offset_t)cpipe->pipe_buffer.buffer, 1597 cpipe->pipe_buffer.size); 1598 #elif defined(__NetBSD__) 1599 uvm_km_free(kernel_map, 1600 (vaddr_t)cpipe->pipe_buffer.buffer, 1601 cpipe->pipe_buffer.size); 1602 #endif /* NetBSD */ 1603 1604 cpipe->pipe_buffer.buffer = NULL; 1605 } 1606 #ifndef PIPE_NODIRECT 1607 if (cpipe->pipe_map.kva != NULL) { 1608 #ifdef __FreeBSD__ 1609 amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 1610 kmem_free(kernel_map, 1611 cpipe->pipe_map.kva, 1612 cpipe->pipe_buffer.size + PAGE_SIZE); 1613 #elif defined(__NetBSD__) 1614 pipe_loan_free(cpipe); 1615 #endif /* NetBSD */ 1616 cpipe->pipe_map.cnt = 0; 1617 cpipe->pipe_map.kva = NULL; 1618 cpipe->pipe_map.pos = 0; 1619 cpipe->pipe_map.npages = 0; 1620 } 1621 #endif /* !PIPE_NODIRECT */ 1622 } 1623 1624 /* 1625 * shutdown the pipe 1626 */ 1627 static void 1628 pipeclose(cpipe) 1629 struct pipe *cpipe; 1630 { 1631 struct pipe *ppipe; 1632 1633 if (!cpipe) 1634 return; 1635 1636 pipeselwakeup(cpipe, cpipe); 1637 1638 /* 1639 * If the other side is blocked, wake it up saying that 1640 * we want to close it down. 1641 */ 1642 while (cpipe->pipe_busy) { 1643 wakeup(cpipe); 1644 cpipe->pipe_state |= PIPE_WANTCLOSE | PIPE_EOF; 1645 tsleep(cpipe, PRIBIO, "pipecl", 0); 1646 } 1647 1648 /* 1649 * Disconnect from peer 1650 */ 1651 if ((ppipe = cpipe->pipe_peer) != NULL) { 1652 pipeselwakeup(ppipe, ppipe); 1653 1654 ppipe->pipe_state |= PIPE_EOF; 1655 wakeup(ppipe); 1656 ppipe->pipe_peer = NULL; 1657 } 1658 1659 /* 1660 * free resources 1661 */ 1662 #ifdef _FreeBSD__ 1663 mtx_lock(&vm_mtx); 1664 pipe_free_kmem(cpipe); 1665 /* XXX: erm, doesn't zalloc already have its own locks and 1666 * not need the giant vm lock? 1667 */ 1668 zfree(pipe_zone, cpipe); 1669 mtx_unlock(&vm_mtx); 1670 #endif /* FreeBSD */ 1671 1672 #ifdef __NetBSD__ 1673 pipe_free_kmem(cpipe); 1674 (void) lockmgr(&cpipe->pipe_lock, LK_DRAIN, NULL); 1675 pool_put(&pipe_pool, cpipe); 1676 #endif 1677 } 1678 1679 #ifdef __FreeBSD__ 1680 /*ARGSUSED*/ 1681 static int 1682 pipe_kqfilter(struct file *fp, struct knote *kn) 1683 { 1684 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1685 1686 switch (kn->kn_filter) { 1687 case EVFILT_READ: 1688 kn->kn_fop = &pipe_rfiltops; 1689 break; 1690 case EVFILT_WRITE: 1691 kn->kn_fop = &pipe_wfiltops; 1692 cpipe = cpipe->pipe_peer; 1693 break; 1694 default: 1695 return (1); 1696 } 1697 kn->kn_hook = (caddr_t)cpipe; 1698 1699 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 1700 return (0); 1701 } 1702 1703 static void 1704 filt_pipedetach(struct knote *kn) 1705 { 1706 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1707 1708 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 1709 } 1710 1711 /*ARGSUSED*/ 1712 static int 1713 filt_piperead(struct knote *kn, long hint) 1714 { 1715 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1716 struct pipe *wpipe = rpipe->pipe_peer; 1717 1718 kn->kn_data = rpipe->pipe_buffer.cnt; 1719 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 1720 kn->kn_data = rpipe->pipe_map.cnt; 1721 1722 if ((rpipe->pipe_state & PIPE_EOF) || 1723 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1724 kn->kn_flags |= EV_EOF; 1725 return (1); 1726 } 1727 return (kn->kn_data > 0); 1728 } 1729 1730 /*ARGSUSED*/ 1731 static int 1732 filt_pipewrite(struct knote *kn, long hint) 1733 { 1734 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1735 struct pipe *wpipe = rpipe->pipe_peer; 1736 1737 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1738 kn->kn_data = 0; 1739 kn->kn_flags |= EV_EOF; 1740 return (1); 1741 } 1742 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1743 if (wpipe->pipe_state & PIPE_DIRECTW) 1744 kn->kn_data = 0; 1745 1746 return (kn->kn_data >= PIPE_BUF); 1747 } 1748 #endif /* FreeBSD */ 1749 1750 #ifdef __NetBSD__ 1751 static int 1752 pipe_fcntl(fp, cmd, data, p) 1753 struct file *fp; 1754 u_int cmd; 1755 caddr_t data; 1756 struct proc *p; 1757 { 1758 if (cmd == F_SETFL) 1759 return (0); 1760 else 1761 return (EOPNOTSUPP); 1762 } 1763 1764 /* 1765 * Handle pipe sysctls. 1766 */ 1767 int 1768 sysctl_dopipe(name, namelen, oldp, oldlenp, newp, newlen) 1769 int *name; 1770 u_int namelen; 1771 void *oldp; 1772 size_t *oldlenp; 1773 void *newp; 1774 size_t newlen; 1775 { 1776 /* All sysctl names at this level are terminal. */ 1777 if (namelen != 1) 1778 return (ENOTDIR); /* overloaded */ 1779 1780 switch (name[0]) { 1781 case KERN_PIPE_MAXKVASZ: 1782 return (sysctl_int(oldp, oldlenp, newp, newlen, &maxpipekva)); 1783 case KERN_PIPE_LIMITKVA: 1784 return (sysctl_int(oldp, oldlenp, newp, newlen, &limitpipekva)); 1785 case KERN_PIPE_MAXBIGPIPES: 1786 return (sysctl_int(oldp, oldlenp, newp, newlen, &maxbigpipes)); 1787 case KERN_PIPE_NBIGPIPES: 1788 return (sysctl_rdint(oldp, oldlenp, newp, nbigpipe)); 1789 case KERN_PIPE_KVASIZE: 1790 return (sysctl_rdint(oldp, oldlenp, newp, amountpipekva)); 1791 default: 1792 return (EOPNOTSUPP); 1793 } 1794 /* NOTREACHED */ 1795 } 1796 1797 /* 1798 * Initialize pipe structs. 1799 */ 1800 void 1801 pipe_init(void) 1802 { 1803 pool_init(&pipe_pool, sizeof(struct pipe), 0, 0, 0, "pipepl", 1804 0, NULL, NULL, M_PIPE); 1805 } 1806 1807 #endif /* __NetBSD __ */ 1808