1 /* $NetBSD: sys_pipe.c,v 1.4 2001/06/21 18:59:51 jdolecek Exp $ */ 2 3 /* 4 * Copyright (c) 1996 John S. Dyson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Absolutely no warranty of function or purpose is made by the author 17 * John S. Dyson. 18 * 4. Modifications may be freely made to this file if the above conditions 19 * are met. 20 * 21 * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.82 2001/06/15 20:45:01 jlemon Exp $ 22 */ 23 24 /* 25 * This file contains a high-performance replacement for the socket-based 26 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 27 * all features of sockets, but does do everything that pipes normally 28 * do. 29 * 30 * Adaption for NetBSD UVM, including uvm_loan() based direct write, was 31 * written by Jaromir Dolecek. 32 */ 33 34 /* 35 * This code has two modes of operation, a small write mode and a large 36 * write mode. The small write mode acts like conventional pipes with 37 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 38 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 39 * and PIPE_SIZE in size, it is fully mapped into the kernel (on FreeBSD, 40 * those pages are also wired), and the receiving process can copy it directly 41 * from the pages in the sending process. 42 * 43 * If the sending process receives a signal, it is possible that it will 44 * go away, and certainly its address space can change, because control 45 * is returned back to the user-mode side. In that case, the pipe code 46 * arranges to copy the buffer supplied by the user process on FreeBSD, to 47 * a pageable kernel buffer, and the receiving process will grab the data 48 * from the pageable kernel buffer. Since signals don't happen all that often, 49 * the copy operation is normally eliminated. 50 * For NetBSD, the pages are mapped read-only, COW for kernel by uvm_loan(), 51 * so no explicit handling need to be done, all is handled by standard VM 52 * facilities. 53 * 54 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 55 * happen for small transfers so that the system will not spend all of 56 * its time context switching. PIPE_SIZE is constrained by the 57 * amount of kernel virtual memory. 58 */ 59 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/proc.h> 63 #include <sys/fcntl.h> 64 #include <sys/file.h> 65 #include <sys/filedesc.h> 66 #include <sys/filio.h> 67 #include <sys/ttycom.h> 68 #include <sys/stat.h> 69 #include <sys/poll.h> 70 #include <sys/signalvar.h> 71 #include <sys/vnode.h> 72 #include <sys/uio.h> 73 #include <sys/lock.h> 74 #ifdef __FreeBSD__ 75 #include <sys/mutex.h> 76 #include <sys/selinfo.h> 77 #include <sys/sysproto.h> 78 #elif defined(__NetBSD__) 79 #include <sys/select.h> 80 #include <sys/malloc.h> 81 #include <sys/mount.h> 82 #include <sys/syscallargs.h> 83 #include <uvm/uvm.h> 84 #include <sys/sysctl.h> 85 #endif /* NetBSD, FreeBSD */ 86 87 #include <sys/pipe.h> 88 89 #ifdef __NetBSD__ 90 #define vfs_timestamp(tv) microtime(tv) 91 #endif 92 93 /* 94 * Use this define if you want to disable *fancy* VM things. Expect an 95 * approx 30% decrease in transfer rate. This could be useful for 96 * OpenBSD. 97 */ 98 /* #define PIPE_NODIRECT */ 99 100 /* 101 * interfaces to the outside world 102 */ 103 #ifdef __FreeBSD__ 104 static int pipe_read __P((struct file *fp, struct uio *uio, 105 struct ucred *cred, int flags, struct proc *p)); 106 static int pipe_write __P((struct file *fp, struct uio *uio, 107 struct ucred *cred, int flags, struct proc *p)); 108 static int pipe_close __P((struct file *fp, struct proc *p)); 109 static int pipe_poll __P((struct file *fp, int events, struct ucred *cred, 110 struct proc *p)); 111 static int pipe_kqfilter __P((struct file *fp, struct knote *kn)); 112 static int pipe_stat __P((struct file *fp, struct stat *sb, struct proc *p)); 113 static int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); 114 115 static struct fileops pipeops = { 116 pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter, 117 pipe_stat, pipe_close 118 }; 119 120 static void filt_pipedetach(struct knote *kn); 121 static int filt_piperead(struct knote *kn, long hint); 122 static int filt_pipewrite(struct knote *kn, long hint); 123 124 static struct filterops pipe_rfiltops = 125 { 1, NULL, filt_pipedetach, filt_piperead }; 126 static struct filterops pipe_wfiltops = 127 { 1, NULL, filt_pipedetach, filt_pipewrite }; 128 #endif /* FreeBSD */ 129 130 #ifdef __NetBSD__ 131 static int pipe_read __P((struct file *fp, off_t *offset, struct uio *uio, 132 struct ucred *cred, int flags)); 133 static int pipe_write __P((struct file *fp, off_t *offset, struct uio *uio, 134 struct ucred *cred, int flags)); 135 static int pipe_close __P((struct file *fp, struct proc *p)); 136 static int pipe_poll __P((struct file *fp, int events, struct proc *p)); 137 static int pipe_fcntl __P((struct file *fp, u_int com, caddr_t data, 138 struct proc *p)); 139 static int pipe_stat __P((struct file *fp, struct stat *sb, struct proc *p)); 140 static int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); 141 142 static struct fileops pipeops = 143 { pipe_read, pipe_write, pipe_ioctl, pipe_fcntl, pipe_poll, 144 pipe_stat, pipe_close }; 145 #endif /* NetBSD */ 146 147 /* 148 * Default pipe buffer size(s), this can be kind-of large now because pipe 149 * space is pageable. The pipe code will try to maintain locality of 150 * reference for performance reasons, so small amounts of outstanding I/O 151 * will not wipe the cache. 152 */ 153 #define MINPIPESIZE (PIPE_SIZE/3) 154 #define MAXPIPESIZE (2*PIPE_SIZE/3) 155 156 /* 157 * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 158 * is there so that on large systems, we don't exhaust it. 159 */ 160 #define MAXPIPEKVA (8*1024*1024) 161 static int maxpipekva = MAXPIPEKVA; 162 163 /* 164 * Limit for direct transfers, we cannot, of course limit 165 * the amount of kva for pipes in general though. 166 */ 167 #define LIMITPIPEKVA (16*1024*1024) 168 static int limitpipekva = LIMITPIPEKVA; 169 170 /* 171 * Limit the number of "big" pipes 172 */ 173 #define LIMITBIGPIPES 32 174 static int maxbigpipes = LIMITBIGPIPES; 175 static int nbigpipe = 0; 176 177 /* 178 * Amount of KVA consumed by pipe buffers. 179 */ 180 static int amountpipekva = 0; 181 182 static void pipeclose __P((struct pipe *cpipe)); 183 static void pipe_free_kmem __P((struct pipe *cpipe)); 184 static int pipe_create __P((struct pipe **cpipep)); 185 static __inline int pipelock __P((struct pipe *cpipe, int catch)); 186 static __inline void pipeunlock __P((struct pipe *cpipe)); 187 static __inline void pipeselwakeup __P((struct pipe *selp, 188 struct pipe *sigp)); 189 static int pipespace __P((struct pipe *cpipe, int size)); 190 191 #ifdef __FreeBSD__ 192 #ifndef PIPE_NODIRECT 193 static int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); 194 static void pipe_destroy_write_buffer __P((struct pipe *wpipe)); 195 static int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 196 static void pipe_clone_write_buffer __P((struct pipe *wpipe)); 197 #endif 198 199 static vm_zone_t pipe_zone; 200 #endif /* FreeBSD */ 201 202 #ifdef __NetBSD__ 203 #ifndef PIPE_NODIRECT 204 static __inline int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 205 static __inline int pipe_loan_alloc __P((struct pipe *wpipe, int npages, 206 vsize_t blen)); 207 static void pipe_loan_free __P((struct pipe *wpipe)); 208 #endif /* PIPE_NODIRECT */ 209 210 static struct pool pipe_pool; 211 #endif /* NetBSD */ 212 213 /* 214 * The pipe system call for the DTYPE_PIPE type of pipes 215 */ 216 217 /* ARGSUSED */ 218 #ifdef __FreeBSD__ 219 int 220 pipe(p, uap) 221 struct proc *p; 222 struct pipe_args /* { 223 int dummy; 224 } */ *uap; 225 #elif defined(__NetBSD__) 226 int 227 sys_pipe(p, v, retval) 228 struct proc *p; 229 void *v; 230 register_t *retval; 231 #endif 232 { 233 struct filedesc *fdp = p->p_fd; 234 struct file *rf, *wf; 235 struct pipe *rpipe, *wpipe; 236 int fd, error; 237 238 #ifdef __FreeBSD__ 239 if (pipe_zone == NULL) 240 pipe_zone = zinit("PIPE", sizeof(struct pipe), 0, 0, 4); 241 #endif 242 243 rpipe = wpipe = NULL; 244 if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 245 pipeclose(rpipe); 246 pipeclose(wpipe); 247 return (ENFILE); 248 } 249 250 #ifdef __FreeBSD__ 251 error = falloc(p, &rf, &fd); 252 if (error) { 253 pipeclose(rpipe); 254 pipeclose(wpipe); 255 return (error); 256 } 257 fhold(rf); 258 p->p_retval[0] = fd; 259 260 /* 261 * Warning: once we've gotten past allocation of the fd for the 262 * read-side, we can only drop the read side via fdrop() in order 263 * to avoid races against processes which manage to dup() the read 264 * side while we are blocked trying to allocate the write side. 265 */ 266 rf->f_flag = FREAD | FWRITE; 267 rf->f_type = DTYPE_PIPE; 268 rf->f_data = (caddr_t)rpipe; 269 rf->f_ops = &pipeops; 270 error = falloc(p, &wf, &fd); 271 if (error) { 272 if (fdp->fd_ofiles[p->p_retval[0]] == rf) { 273 fdp->fd_ofiles[p->p_retval[0]] = NULL; 274 fdrop(rf, p); 275 } 276 fdrop(rf, p); 277 /* rpipe has been closed by fdrop(). */ 278 pipeclose(wpipe); 279 return (error); 280 } 281 wf->f_flag = FREAD | FWRITE; 282 wf->f_type = DTYPE_PIPE; 283 wf->f_data = (caddr_t)wpipe; 284 wf->f_ops = &pipeops; 285 p->p_retval[1] = fd; 286 287 rpipe->pipe_peer = wpipe; 288 wpipe->pipe_peer = rpipe; 289 fdrop(rf, p); 290 #endif /* FreeBSD */ 291 292 #ifdef __NetBSD__ 293 /* 294 * Note: the file structure returned from falloc() is marked 295 * as 'larval' initially. Unless we mark it as 'mature' by 296 * FILE_SET_MATURE(), any attempt to do anything with it would 297 * return EBADF, including e.g. dup(2) or close(2). This avoids 298 * file descriptor races if we block in the second falloc(). 299 */ 300 301 error = falloc(p, &rf, &fd); 302 if (error) 303 goto free2; 304 retval[0] = fd; 305 rf->f_flag = FREAD; 306 rf->f_type = DTYPE_PIPE; 307 rf->f_data = (caddr_t)rpipe; 308 rf->f_ops = &pipeops; 309 310 error = falloc(p, &wf, &fd); 311 if (error) 312 goto free3; 313 retval[1] = fd; 314 wf->f_flag = FWRITE; 315 wf->f_type = DTYPE_PIPE; 316 wf->f_data = (caddr_t)wpipe; 317 wf->f_ops = &pipeops; 318 319 rpipe->pipe_peer = wpipe; 320 wpipe->pipe_peer = rpipe; 321 322 FILE_SET_MATURE(rf); 323 FILE_SET_MATURE(wf); 324 FILE_UNUSE(rf, p); 325 FILE_UNUSE(wf, p); 326 return (0); 327 free3: 328 FILE_UNUSE(rf, p); 329 ffree(rf); 330 fdremove(fdp, retval[0]); 331 free2: 332 pipeclose(wpipe); 333 pipeclose(rpipe); 334 #endif /* NetBSD */ 335 336 return (error); 337 } 338 339 /* 340 * Allocate kva for pipe circular buffer, the space is pageable 341 * This routine will 'realloc' the size of a pipe safely, if it fails 342 * it will retain the old buffer. 343 * If it fails it will return ENOMEM. 344 */ 345 static int 346 pipespace(cpipe, size) 347 struct pipe *cpipe; 348 int size; 349 { 350 caddr_t buffer; 351 #ifdef __FreeBSD__ 352 struct vm_object *object; 353 int npages, error; 354 355 npages = round_page(size)/PAGE_SIZE; 356 /* 357 * Create an object, I don't like the idea of paging to/from 358 * kernel_object. 359 */ 360 mtx_lock(&vm_mtx); 361 object = vm_object_allocate(OBJT_DEFAULT, npages); 362 buffer = (caddr_t) vm_map_min(kernel_map); 363 364 /* 365 * Insert the object into the kernel map, and allocate kva for it. 366 * The map entry is, by default, pageable. 367 */ 368 error = vm_map_find(kernel_map, object, 0, 369 (vm_offset_t *) &buffer, size, 1, 370 VM_PROT_ALL, VM_PROT_ALL, 0); 371 372 if (error != KERN_SUCCESS) { 373 vm_object_deallocate(object); 374 mtx_unlock(&vm_mtx); 375 return (ENOMEM); 376 } 377 #endif /* FreeBSD */ 378 379 #ifdef __NetBSD__ 380 /* 381 * Allocate pageable virtual address space. Physical memory is allocated 382 * on demand. 383 */ 384 buffer = (caddr_t) uvm_km_valloc(kernel_map, round_page(size)); 385 if (buffer == NULL) 386 return (ENOMEM); 387 #endif /* NetBSD */ 388 389 /* free old resources if we're resizing */ 390 pipe_free_kmem(cpipe); 391 #ifdef __FreeBSD__ 392 mtx_unlock(&vm_mtx); 393 cpipe->pipe_buffer.object = object; 394 #endif 395 cpipe->pipe_buffer.buffer = buffer; 396 cpipe->pipe_buffer.size = size; 397 cpipe->pipe_buffer.in = 0; 398 cpipe->pipe_buffer.out = 0; 399 cpipe->pipe_buffer.cnt = 0; 400 amountpipekva += cpipe->pipe_buffer.size; 401 return (0); 402 } 403 404 /* 405 * initialize and allocate VM and memory for pipe 406 */ 407 static int 408 pipe_create(cpipep) 409 struct pipe **cpipep; 410 { 411 struct pipe *cpipe; 412 int error; 413 414 #ifdef __FreeBSD__ 415 *cpipep = zalloc(pipe_zone); 416 #endif 417 #ifdef __NetBSD__ 418 *cpipep = pool_get(&pipe_pool, M_WAITOK); 419 #endif 420 if (*cpipep == NULL) 421 return (ENOMEM); 422 423 cpipe = *cpipep; 424 425 #ifdef __FreeBSD__ 426 /* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */ 427 cpipe->pipe_buffer.object = NULL; 428 #endif /* FreeBSD */ 429 /* 430 * protect so pipeclose() doesn't follow a junk pointer 431 * if pipespace() fails. 432 */ 433 cpipe->pipe_buffer.buffer = NULL; 434 bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel)); 435 cpipe->pipe_state = PIPE_SIGNALR; 436 cpipe->pipe_peer = NULL; 437 cpipe->pipe_busy = 0; 438 439 #ifndef PIPE_NODIRECT 440 /* 441 * pipe data structure initializations to support direct pipe I/O 442 */ 443 cpipe->pipe_map.cnt = 0; 444 cpipe->pipe_map.kva = NULL; 445 cpipe->pipe_map.pos = 0; 446 cpipe->pipe_map.npages = 0; 447 #ifdef __NetBSD__ 448 cpipe->pipe_map.ms = NULL; 449 #endif 450 #endif /* !PIPE_NODIRECT */ 451 452 if ((error = pipespace(cpipe, PIPE_SIZE))) 453 return (error); 454 455 vfs_timestamp(&cpipe->pipe_ctime); 456 cpipe->pipe_atime = cpipe->pipe_ctime; 457 cpipe->pipe_mtime = cpipe->pipe_ctime; 458 #ifdef __NetBSD__ 459 cpipe->pipe_pgid = NO_PID; 460 lockinit(&cpipe->pipe_lock, PRIBIO | PCATCH, "pipelk", 0, 0); 461 #endif 462 463 return (0); 464 } 465 466 467 /* 468 * lock a pipe for I/O, blocking other access 469 */ 470 static __inline int 471 pipelock(cpipe, catch) 472 struct pipe *cpipe; 473 int catch; 474 { 475 int error; 476 477 #ifdef __FreeBSD__ 478 while (cpipe->pipe_state & PIPE_LOCK) { 479 cpipe->pipe_state |= PIPE_LWANT; 480 error = tsleep(cpipe, catch ? (PRIBIO | PCATCH) : PRIBIO, 481 "pipelk", 0); 482 if (error != 0) 483 return (error); 484 } 485 cpipe->pipe_state |= PIPE_LOCK; 486 return (0); 487 #endif 488 489 #ifdef __NetBSD__ 490 do { 491 error = lockmgr(&cpipe->pipe_lock, LK_EXCLUSIVE, NULL); 492 } while (!catch && (error == EINTR || error == ERESTART)); 493 return (error); 494 #endif 495 } 496 497 /* 498 * unlock a pipe I/O lock 499 */ 500 static __inline void 501 pipeunlock(cpipe) 502 struct pipe *cpipe; 503 { 504 #ifdef __FreeBSD__ 505 cpipe->pipe_state &= ~PIPE_LOCK; 506 if (cpipe->pipe_state & PIPE_LWANT) { 507 cpipe->pipe_state &= ~PIPE_LWANT; 508 wakeup(cpipe); 509 } 510 #endif 511 512 #ifdef __NetBSD__ 513 lockmgr(&cpipe->pipe_lock, LK_RELEASE, NULL); 514 #endif 515 } 516 517 /* 518 * Select/poll wakup. This also sends SIGIO to peer connected to 519 * 'sigpipe' side of pipe. 520 */ 521 static __inline void 522 pipeselwakeup(selp, sigp) 523 struct pipe *selp, *sigp; 524 { 525 if (selp->pipe_state & PIPE_SEL) { 526 selp->pipe_state &= ~PIPE_SEL; 527 selwakeup(&selp->pipe_sel); 528 } 529 #ifdef __FreeBSD__ 530 if (sigp && (sigp->pipe_state & PIPE_ASYNC) && sigp->pipe_sigio) 531 pgsigio(sigp->pipe_sigio, SIGIO, 0); 532 KNOTE(&selp->pipe_sel.si_note, 0); 533 #endif 534 535 #ifdef __NetBSD__ 536 if (sigp && (sigp->pipe_state & PIPE_ASYNC) 537 && sigp->pipe_pgid != NO_PID){ 538 struct proc *p; 539 540 if (sigp->pipe_pgid < 0) 541 gsignal(-sigp->pipe_pgid, SIGIO); 542 else if (sigp->pipe_pgid > 0 && (p = pfind(sigp->pipe_pgid)) != 0) 543 psignal(p, SIGIO); 544 } 545 #endif /* NetBSD */ 546 } 547 548 /* ARGSUSED */ 549 #ifdef __FreeBSD__ 550 static int 551 pipe_read(fp, uio, cred, flags, p) 552 struct file *fp; 553 struct uio *uio; 554 struct ucred *cred; 555 int flags; 556 struct proc *p; 557 #elif defined(__NetBSD__) 558 static int 559 pipe_read(fp, offset, uio, cred, flags) 560 struct file *fp; 561 off_t *offset; 562 struct uio *uio; 563 struct ucred *cred; 564 int flags; 565 #endif 566 { 567 struct pipe *rpipe = (struct pipe *) fp->f_data; 568 int error; 569 size_t nread = 0; 570 size_t size; 571 size_t ocnt; 572 573 ++rpipe->pipe_busy; 574 error = pipelock(rpipe, 1); 575 if (error) 576 goto unlocked_error; 577 578 ocnt = rpipe->pipe_buffer.cnt; 579 580 while (uio->uio_resid) { 581 /* 582 * normal pipe buffer receive 583 */ 584 if (rpipe->pipe_buffer.cnt > 0) { 585 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 586 if (size > rpipe->pipe_buffer.cnt) 587 size = rpipe->pipe_buffer.cnt; 588 if (size > uio->uio_resid) 589 size = uio->uio_resid; 590 591 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 592 size, uio); 593 if (error) 594 break; 595 596 rpipe->pipe_buffer.out += size; 597 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 598 rpipe->pipe_buffer.out = 0; 599 600 rpipe->pipe_buffer.cnt -= size; 601 602 /* 603 * If there is no more to read in the pipe, reset 604 * its pointers to the beginning. This improves 605 * cache hit stats. 606 */ 607 if (rpipe->pipe_buffer.cnt == 0) { 608 rpipe->pipe_buffer.in = 0; 609 rpipe->pipe_buffer.out = 0; 610 } 611 nread += size; 612 #ifndef PIPE_NODIRECT 613 /* 614 * Direct copy, bypassing a kernel buffer. 615 */ 616 } else if ((size = rpipe->pipe_map.cnt) && 617 (rpipe->pipe_state & PIPE_DIRECTW)) { 618 caddr_t va; 619 if (size > uio->uio_resid) 620 size = uio->uio_resid; 621 622 va = (caddr_t) rpipe->pipe_map.kva + 623 rpipe->pipe_map.pos; 624 error = uiomove(va, size, uio); 625 if (error) 626 break; 627 nread += size; 628 rpipe->pipe_map.pos += size; 629 rpipe->pipe_map.cnt -= size; 630 if (rpipe->pipe_map.cnt == 0) { 631 rpipe->pipe_state &= ~PIPE_DIRECTW; 632 wakeup(rpipe); 633 #ifdef __NetBSD__ 634 if (uio->uio_resid > 0 && 635 (rpipe->pipe_state & PIPE_MOREW)) 636 goto waitformore; 637 #endif /* NetBSD */ 638 } 639 #endif 640 } else { 641 /* 642 * detect EOF condition 643 * read returns 0 on EOF, no need to set error 644 */ 645 if (rpipe->pipe_state & PIPE_EOF) 646 break; 647 648 /* 649 * If the "write-side" has been blocked, wake it up now. 650 */ 651 if (rpipe->pipe_state & PIPE_WANTW) { 652 rpipe->pipe_state &= ~PIPE_WANTW; 653 wakeup(rpipe); 654 } 655 656 /* 657 * Break if some data was read. 658 */ 659 if (nread > 0) 660 break; 661 662 /* 663 * don't block on non-blocking I/O 664 */ 665 if (fp->f_flag & FNONBLOCK) { 666 error = EAGAIN; 667 break; 668 } 669 670 #if defined(__NetBSD__) && !defined(PIPE_NODIRECT) 671 waitformore: 672 #endif 673 /* 674 * Unlock the pipe buffer for our remaining processing. 675 * We will either break out with an error or we will 676 * sleep and relock to loop. 677 */ 678 pipeunlock(rpipe); 679 680 /* 681 * We want to read more, wake up select/poll. 682 */ 683 pipeselwakeup(rpipe, rpipe->pipe_peer); 684 685 rpipe->pipe_state |= PIPE_WANTR; 686 error = tsleep(rpipe, PRIBIO | PCATCH, "piperd", 0); 687 if (error != 0 || (error = pipelock(rpipe, 1))) 688 goto unlocked_error; 689 } 690 } 691 pipeunlock(rpipe); 692 693 if (error == 0) 694 vfs_timestamp(&rpipe->pipe_atime); 695 unlocked_error: 696 --rpipe->pipe_busy; 697 698 /* 699 * PIPE_WANTCLOSE processing only makes sense if pipe_busy is 0. 700 */ 701 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANTCLOSE)) { 702 rpipe->pipe_state &= ~(PIPE_WANTCLOSE|PIPE_WANTW); 703 wakeup(rpipe); 704 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 705 /* 706 * Handle write blocking hysteresis. 707 */ 708 if (rpipe->pipe_state & PIPE_WANTW) { 709 rpipe->pipe_state &= ~PIPE_WANTW; 710 wakeup(rpipe); 711 } 712 } 713 714 /* 715 * If anything was read off the buffer, signal to the writer it's 716 * possible to write more data. Also send signal if we are here for the 717 * first time after last write. 718 */ 719 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF 720 && (ocnt != rpipe->pipe_buffer.cnt || (rpipe->pipe_state & PIPE_SIGNALR))) { 721 pipeselwakeup(rpipe, rpipe->pipe_peer); 722 rpipe->pipe_state &= ~PIPE_SIGNALR; 723 } 724 725 return (error); 726 } 727 728 #ifdef __FreeBSD__ 729 #ifndef PIPE_NODIRECT 730 /* 731 * Map the sending processes' buffer into kernel space and wire it. 732 * This is similar to a physical write operation. 733 */ 734 static int 735 pipe_build_write_buffer(wpipe, uio) 736 struct pipe *wpipe; 737 struct uio *uio; 738 { 739 size_t size; 740 int i; 741 vm_offset_t addr, endaddr, paddr; 742 743 size = uio->uio_iov->iov_len; 744 if (size > wpipe->pipe_buffer.size) 745 size = wpipe->pipe_buffer.size; 746 747 endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 748 mtx_lock(&vm_mtx); 749 addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 750 for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 751 vm_page_t m; 752 753 if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 754 (paddr = pmap_kextract(addr)) == 0) { 755 int j; 756 757 for (j = 0; j < i; j++) 758 vm_page_unwire(wpipe->pipe_map.ms[j], 1); 759 mtx_unlock(&vm_mtx); 760 return (EFAULT); 761 } 762 763 m = PHYS_TO_VM_PAGE(paddr); 764 vm_page_wire(m); 765 wpipe->pipe_map.ms[i] = m; 766 } 767 768 /* 769 * set up the control block 770 */ 771 wpipe->pipe_map.npages = i; 772 wpipe->pipe_map.pos = 773 ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 774 wpipe->pipe_map.cnt = size; 775 776 /* 777 * and map the buffer 778 */ 779 if (wpipe->pipe_map.kva == 0) { 780 /* 781 * We need to allocate space for an extra page because the 782 * address range might (will) span pages at times. 783 */ 784 wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 785 wpipe->pipe_buffer.size + PAGE_SIZE); 786 amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 787 } 788 pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 789 wpipe->pipe_map.npages); 790 791 mtx_unlock(&vm_mtx); 792 /* 793 * and update the uio data 794 */ 795 796 uio->uio_iov->iov_len -= size; 797 uio->uio_iov->iov_base += size; 798 if (uio->uio_iov->iov_len == 0) 799 uio->uio_iov++; 800 uio->uio_resid -= size; 801 uio->uio_offset += size; 802 return (0); 803 } 804 805 /* 806 * unmap and unwire the process buffer 807 */ 808 static void 809 pipe_destroy_write_buffer(wpipe) 810 struct pipe *wpipe; 811 { 812 int i; 813 814 mtx_lock(&vm_mtx); 815 if (wpipe->pipe_map.kva) { 816 pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 817 818 if (amountpipekva > maxpipekva) { 819 vm_offset_t kva = wpipe->pipe_map.kva; 820 wpipe->pipe_map.kva = 0; 821 kmem_free(kernel_map, kva, 822 wpipe->pipe_buffer.size + PAGE_SIZE); 823 amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 824 } 825 } 826 for (i = 0; i < wpipe->pipe_map.npages; i++) 827 vm_page_unwire(wpipe->pipe_map.ms[i], 1); 828 mtx_unlock(&vm_mtx); 829 } 830 831 /* 832 * In the case of a signal, the writing process might go away. This 833 * code copies the data into the circular buffer so that the source 834 * pages can be freed without loss of data. 835 */ 836 static void 837 pipe_clone_write_buffer(wpipe) 838 struct pipe *wpipe; 839 { 840 int size; 841 int pos; 842 843 size = wpipe->pipe_map.cnt; 844 pos = wpipe->pipe_map.pos; 845 bcopy((caddr_t) wpipe->pipe_map.kva + pos, 846 (caddr_t) wpipe->pipe_buffer.buffer, size); 847 848 wpipe->pipe_buffer.in = size; 849 wpipe->pipe_buffer.out = 0; 850 wpipe->pipe_buffer.cnt = size; 851 wpipe->pipe_state &= ~PIPE_DIRECTW; 852 853 pipe_destroy_write_buffer(wpipe); 854 } 855 856 /* 857 * This implements the pipe buffer write mechanism. Note that only 858 * a direct write OR a normal pipe write can be pending at any given time. 859 * If there are any characters in the pipe buffer, the direct write will 860 * be deferred until the receiving process grabs all of the bytes from 861 * the pipe buffer. Then the direct mapping write is set-up. 862 */ 863 static int 864 pipe_direct_write(wpipe, uio) 865 struct pipe *wpipe; 866 struct uio *uio; 867 { 868 int error; 869 870 retry: 871 while (wpipe->pipe_state & PIPE_DIRECTW) { 872 if (wpipe->pipe_state & PIPE_WANTR) { 873 wpipe->pipe_state &= ~PIPE_WANTR; 874 wakeup(wpipe); 875 } 876 wpipe->pipe_state |= PIPE_WANTW; 877 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdww", 0); 878 if (error) 879 goto error1; 880 if (wpipe->pipe_state & PIPE_EOF) { 881 error = EPIPE; 882 goto error1; 883 } 884 } 885 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 886 if (wpipe->pipe_buffer.cnt > 0) { 887 if (wpipe->pipe_state & PIPE_WANTR) { 888 wpipe->pipe_state &= ~PIPE_WANTR; 889 wakeup(wpipe); 890 } 891 892 wpipe->pipe_state |= PIPE_WANTW; 893 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwc", 0); 894 if (error) 895 goto error1; 896 if (wpipe->pipe_state & PIPE_EOF) { 897 error = EPIPE; 898 goto error1; 899 } 900 goto retry; 901 } 902 903 wpipe->pipe_state |= PIPE_DIRECTW; 904 905 error = pipe_build_write_buffer(wpipe, uio); 906 if (error) { 907 wpipe->pipe_state &= ~PIPE_DIRECTW; 908 goto error1; 909 } 910 911 error = 0; 912 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 913 if (wpipe->pipe_state & PIPE_EOF) { 914 pipelock(wpipe, 0); 915 pipe_destroy_write_buffer(wpipe); 916 pipeunlock(wpipe); 917 pipeselwakeup(wpipe, wpipe); 918 error = EPIPE; 919 goto error1; 920 } 921 if (wpipe->pipe_state & PIPE_WANTR) { 922 wpipe->pipe_state &= ~PIPE_WANTR; 923 wakeup(wpipe); 924 } 925 pipeselwakeup(wpipe, wpipe); 926 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwt", 0); 927 } 928 929 pipelock(wpipe,0); 930 if (wpipe->pipe_state & PIPE_DIRECTW) { 931 /* 932 * this bit of trickery substitutes a kernel buffer for 933 * the process that might be going away. 934 */ 935 pipe_clone_write_buffer(wpipe); 936 } else { 937 pipe_destroy_write_buffer(wpipe); 938 } 939 pipeunlock(wpipe); 940 return (error); 941 942 error1: 943 wakeup(wpipe); 944 return (error); 945 } 946 #endif /* !PIPE_NODIRECT */ 947 #endif /* FreeBSD */ 948 949 #ifdef __NetBSD__ 950 #ifndef PIPE_NODIRECT 951 /* 952 * Allocate structure for loan transfer. 953 */ 954 static __inline int 955 pipe_loan_alloc(wpipe, npages, blen) 956 struct pipe *wpipe; 957 int npages; 958 vsize_t blen; 959 { 960 wpipe->pipe_map.kva = uvm_km_valloc(kernel_map, blen); 961 if (wpipe->pipe_map.kva == NULL) 962 return (ENOMEM); 963 964 amountpipekva += blen; 965 wpipe->pipe_map.npages = npages; 966 wpipe->pipe_map.ms = (struct vm_page **) malloc( 967 npages * sizeof(struct vm_page *), M_PIPE, M_WAITOK); 968 969 return (0); 970 } 971 972 /* 973 * Free resources allocated for loan transfer. 974 */ 975 static void 976 pipe_loan_free(wpipe) 977 struct pipe *wpipe; 978 { 979 uvm_km_free(kernel_map, wpipe->pipe_map.kva, 980 wpipe->pipe_map.npages * PAGE_SIZE); 981 wpipe->pipe_map.kva = NULL; 982 amountpipekva -= wpipe->pipe_map.npages * PAGE_SIZE; 983 free(wpipe->pipe_map.ms, M_PIPE); 984 wpipe->pipe_map.ms = NULL; 985 } 986 987 /* 988 * NetBSD direct write, using uvm_loan() mechanism. 989 * This implements the pipe buffer write mechanism. Note that only 990 * a direct write OR a normal pipe write can be pending at any given time. 991 * If there are any characters in the pipe buffer, the direct write will 992 * be deferred until the receiving process grabs all of the bytes from 993 * the pipe buffer. Then the direct mapping write is set-up. 994 */ 995 static __inline int 996 pipe_direct_write(wpipe, uio) 997 struct pipe *wpipe; 998 struct uio *uio; 999 { 1000 int error, i, npages, j; 1001 struct vm_page **res; 1002 vaddr_t bbase, kva, base, bend; 1003 vsize_t blen, bcnt; 1004 voff_t boff, bpos; 1005 struct vm_map *wmap = &uio->uio_procp->p_vmspace->vm_map; 1006 retry: 1007 while (wpipe->pipe_state & PIPE_DIRECTW) { 1008 if (wpipe->pipe_state & PIPE_WANTR) { 1009 wpipe->pipe_state &= ~PIPE_WANTR; 1010 wakeup(wpipe); 1011 } 1012 wpipe->pipe_state |= PIPE_WANTW; 1013 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdww", 0); 1014 if (error) 1015 goto error1; 1016 if (wpipe->pipe_state & PIPE_EOF) { 1017 error = EPIPE; 1018 goto error1; 1019 } 1020 } 1021 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 1022 if (wpipe->pipe_buffer.cnt > 0) { 1023 if ( wpipe->pipe_state & PIPE_WANTR) { 1024 wpipe->pipe_state &= ~PIPE_WANTR; 1025 wakeup(wpipe); 1026 } 1027 1028 wpipe->pipe_state |= PIPE_WANTW; 1029 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwc", 0); 1030 if (error) 1031 goto error1; 1032 if (wpipe->pipe_state & PIPE_EOF) { 1033 error = EPIPE; 1034 goto error1; 1035 } 1036 goto retry; 1037 } 1038 1039 /* 1040 * For each iovec: 1041 * 1. Loan the pages to kernel. 1042 * 2. Set up pipe structures. 1043 * 3. Wait until consumer reads it all or exits. 1044 */ 1045 boff = 0; 1046 for(i=0; i < uio->uio_iovcnt; ) { 1047 /* 1048 * Note: need to handle buffers not aligned to PAGE_SIZE. 1049 */ 1050 bbase = (vaddr_t)uio->uio_iov[i].iov_base; 1051 base = trunc_page(bbase + boff); 1052 bend = round_page(bbase + uio->uio_iov[i].iov_len); 1053 blen = bend - base; 1054 1055 if (boff == 0) 1056 bpos = bbase % PAGE_SIZE; 1057 else 1058 bpos = 0; 1059 1060 if (blen > PIPE_DIRECT_CHUNK) { 1061 blen = PIPE_DIRECT_CHUNK; 1062 boff += PIPE_DIRECT_CHUNK; 1063 bend = base + blen; 1064 bcnt = PIPE_DIRECT_CHUNK - bpos; 1065 wpipe->pipe_state |= PIPE_MOREW; 1066 } else { 1067 if (boff == 0) 1068 bcnt = uio->uio_iov[i].iov_len; 1069 else 1070 bcnt = ((bbase % PAGE_SIZE) + 1071 uio->uio_iov[i].iov_len) %PIPE_DIRECT_CHUNK; 1072 boff = 0; 1073 i++; 1074 wpipe->pipe_state &= ~PIPE_MOREW; 1075 } 1076 1077 npages = blen / PAGE_SIZE; 1078 1079 /* 1080 * Free the old kva if we need more pages than we have 1081 * allocated. 1082 */ 1083 if (wpipe->pipe_map.kva 1084 && npages > wpipe->pipe_map.npages) 1085 pipe_loan_free(wpipe); 1086 1087 /* Allocate new kva. */ 1088 if (!wpipe->pipe_map.kva) { 1089 if ((error = pipe_loan_alloc(wpipe, 1090 npages, blen))) 1091 goto error; 1092 } 1093 1094 /* Loan the write buffer memory from writer process */ 1095 res = wpipe->pipe_map.ms; 1096 error = uvm_loan(wmap, base, blen, 1097 (void **) res, UVM_LOAN_TOPAGE); 1098 if (error) 1099 goto cleanup; 1100 1101 /* Enter the loaned pages to kva */ 1102 kva = wpipe->pipe_map.kva; 1103 for(j=0; j < npages; j++, kva += PAGE_SIZE) 1104 pmap_enter(pmap_kernel(), kva, res[j]->phys_addr, 1105 VM_PROT_READ, 0); 1106 1107 wpipe->pipe_map.pos = bpos; 1108 wpipe->pipe_map.cnt = bcnt; 1109 wpipe->pipe_state |= PIPE_DIRECTW; 1110 1111 error = 0; 1112 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 1113 if (wpipe->pipe_state & PIPE_EOF) { 1114 error = EPIPE; 1115 break; 1116 } 1117 if (wpipe->pipe_state & PIPE_WANTR) { 1118 wpipe->pipe_state &= ~PIPE_WANTR; 1119 wakeup(wpipe); 1120 } 1121 pipeselwakeup(wpipe, wpipe); 1122 error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwt", 0); 1123 } 1124 1125 cleanup: 1126 pipelock(wpipe,0); 1127 if (amountpipekva > maxpipekva) 1128 pipe_loan_free(wpipe); 1129 uvm_unloanpage(res, npages); 1130 pipeunlock(wpipe); 1131 if (error) { 1132 error: 1133 /* XXX update uio ? */ 1134 if (error == EPIPE) 1135 pipeselwakeup(wpipe, wpipe); 1136 1137 wpipe->pipe_state &= ~PIPE_MOREW; 1138 goto error1; 1139 } 1140 1141 uio->uio_offset += bcnt; 1142 uio->uio_resid -= bcnt; 1143 1144 } /* for */ 1145 1146 return (error); 1147 1148 error1: 1149 wakeup(wpipe); 1150 return (error); 1151 } 1152 #endif /* !PIPE_NODIRECT */ 1153 #endif /* NetBSD */ 1154 1155 #ifdef __FreeBSD__ 1156 static int 1157 pipe_write(fp, uio, cred, flags, p) 1158 struct file *fp; 1159 off_t *offset; 1160 struct uio *uio; 1161 struct ucred *cred; 1162 int flags; 1163 struct proc *p; 1164 #elif defined(__NetBSD__) 1165 static int 1166 pipe_write(fp, offset, uio, cred, flags) 1167 struct file *fp; 1168 off_t *offset; 1169 struct uio *uio; 1170 struct ucred *cred; 1171 int flags; 1172 #endif 1173 { 1174 int error = 0; 1175 int orig_resid; 1176 struct pipe *wpipe, *rpipe; 1177 1178 rpipe = (struct pipe *) fp->f_data; 1179 wpipe = rpipe->pipe_peer; 1180 1181 /* 1182 * detect loss of pipe read side, issue SIGPIPE if lost. 1183 */ 1184 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) 1185 return (EPIPE); 1186 1187 ++wpipe->pipe_busy; 1188 1189 /* 1190 * If it is advantageous to resize the pipe buffer, do 1191 * so. 1192 */ 1193 if ((uio->uio_resid > PIPE_SIZE) && 1194 (nbigpipe < maxbigpipes) && 1195 #ifndef PIPE_NODIRECT 1196 (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 1197 #endif 1198 (wpipe->pipe_buffer.size <= PIPE_SIZE) && 1199 (wpipe->pipe_buffer.cnt == 0)) { 1200 1201 if ((error = pipelock(wpipe,1)) == 0) { 1202 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 1203 nbigpipe++; 1204 pipeunlock(wpipe); 1205 } else { 1206 /* 1207 * If an error occured unbusy and return, waking up any 1208 * pending readers. 1209 */ 1210 --wpipe->pipe_busy; 1211 if (wpipe->pipe_busy == 0 1212 && (wpipe->pipe_state & PIPE_WANTCLOSE)) { 1213 wpipe->pipe_state &= 1214 ~(PIPE_WANTCLOSE | PIPE_WANTR); 1215 wakeup(wpipe); 1216 } 1217 1218 return (error); 1219 } 1220 } 1221 1222 #ifdef __FreeBSD__ 1223 KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); 1224 #endif 1225 1226 orig_resid = uio->uio_resid; 1227 while (uio->uio_resid) { 1228 int space; 1229 1230 #ifndef PIPE_NODIRECT 1231 /* 1232 * If the transfer is large, we can gain performance if 1233 * we do process-to-process copies directly. 1234 * If the write is non-blocking, we don't use the 1235 * direct write mechanism. 1236 * 1237 * The direct write mechanism will detect the reader going 1238 * away on us. 1239 */ 1240 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 1241 (fp->f_flag & FNONBLOCK) == 0 && 1242 (wpipe->pipe_map.kva || (amountpipekva < limitpipekva))) { 1243 error = pipe_direct_write(wpipe, uio); 1244 if (error) 1245 break; 1246 continue; 1247 } 1248 #endif /* PIPE_NODIRECT */ 1249 1250 /* 1251 * Pipe buffered writes cannot be coincidental with 1252 * direct writes. We wait until the currently executing 1253 * direct write is completed before we start filling the 1254 * pipe buffer. We break out if a signal occurs or the 1255 * reader goes away. 1256 */ 1257 retrywrite: 1258 while (wpipe->pipe_state & PIPE_DIRECTW) { 1259 if (wpipe->pipe_state & PIPE_WANTR) { 1260 wpipe->pipe_state &= ~PIPE_WANTR; 1261 wakeup(wpipe); 1262 } 1263 error = tsleep(wpipe, PRIBIO | PCATCH, "pipbww", 0); 1264 if (wpipe->pipe_state & PIPE_EOF) 1265 break; 1266 if (error) 1267 break; 1268 } 1269 if (wpipe->pipe_state & PIPE_EOF) { 1270 error = EPIPE; 1271 break; 1272 } 1273 1274 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1275 1276 /* Writes of size <= PIPE_BUF must be atomic. */ 1277 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 1278 space = 0; 1279 1280 if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 1281 int size; /* Transfer size */ 1282 int segsize; /* first segment to transfer */ 1283 1284 if ((error = pipelock(wpipe,1)) != 0) 1285 break; 1286 1287 /* 1288 * It is possible for a direct write to 1289 * slip in on us... handle it here... 1290 */ 1291 if (wpipe->pipe_state & PIPE_DIRECTW) { 1292 pipeunlock(wpipe); 1293 goto retrywrite; 1294 } 1295 /* 1296 * If a process blocked in uiomove, our 1297 * value for space might be bad. 1298 * 1299 * XXX will we be ok if the reader has gone 1300 * away here? 1301 */ 1302 if (space > wpipe->pipe_buffer.size - 1303 wpipe->pipe_buffer.cnt) { 1304 pipeunlock(wpipe); 1305 goto retrywrite; 1306 } 1307 1308 /* 1309 * Transfer size is minimum of uio transfer 1310 * and free space in pipe buffer. 1311 */ 1312 if (space > uio->uio_resid) 1313 size = uio->uio_resid; 1314 else 1315 size = space; 1316 /* 1317 * First segment to transfer is minimum of 1318 * transfer size and contiguous space in 1319 * pipe buffer. If first segment to transfer 1320 * is less than the transfer size, we've got 1321 * a wraparound in the buffer. 1322 */ 1323 segsize = wpipe->pipe_buffer.size - 1324 wpipe->pipe_buffer.in; 1325 if (segsize > size) 1326 segsize = size; 1327 1328 /* Transfer first segment */ 1329 1330 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1331 segsize, uio); 1332 1333 if (error == 0 && segsize < size) { 1334 /* 1335 * Transfer remaining part now, to 1336 * support atomic writes. Wraparound 1337 * happened. 1338 */ 1339 #ifdef DEBUG 1340 if (wpipe->pipe_buffer.in + segsize != 1341 wpipe->pipe_buffer.size) 1342 panic("Expected pipe buffer wraparound disappeared"); 1343 #endif 1344 1345 error = uiomove(&wpipe->pipe_buffer.buffer[0], 1346 size - segsize, uio); 1347 } 1348 if (error == 0) { 1349 wpipe->pipe_buffer.in += size; 1350 if (wpipe->pipe_buffer.in >= 1351 wpipe->pipe_buffer.size) { 1352 #ifdef DEBUG 1353 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 1354 panic("Expected wraparound bad"); 1355 #endif 1356 wpipe->pipe_buffer.in = size - segsize; 1357 } 1358 1359 wpipe->pipe_buffer.cnt += size; 1360 #ifdef DEBUG 1361 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 1362 panic("Pipe buffer overflow"); 1363 #endif 1364 1365 } 1366 pipeunlock(wpipe); 1367 if (error) 1368 break; 1369 1370 } else { 1371 /* 1372 * If the "read-side" has been blocked, wake it up now. 1373 */ 1374 if (wpipe->pipe_state & PIPE_WANTR) { 1375 wpipe->pipe_state &= ~PIPE_WANTR; 1376 wakeup(wpipe); 1377 } 1378 1379 /* 1380 * don't block on non-blocking I/O 1381 */ 1382 if (fp->f_flag & FNONBLOCK) { 1383 error = EAGAIN; 1384 break; 1385 } 1386 1387 /* 1388 * We have no more space and have something to offer, 1389 * wake up select/poll. 1390 */ 1391 pipeselwakeup(wpipe, wpipe); 1392 1393 wpipe->pipe_state |= PIPE_WANTW; 1394 error = tsleep(wpipe, PRIBIO | PCATCH, "pipewr", 0); 1395 if (error != 0) 1396 break; 1397 /* 1398 * If read side wants to go away, we just issue a signal 1399 * to ourselves. 1400 */ 1401 if (wpipe->pipe_state & PIPE_EOF) { 1402 error = EPIPE; 1403 break; 1404 } 1405 } 1406 } 1407 1408 --wpipe->pipe_busy; 1409 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANTCLOSE)) { 1410 wpipe->pipe_state &= ~(PIPE_WANTCLOSE | PIPE_WANTR); 1411 wakeup(wpipe); 1412 } else if (wpipe->pipe_buffer.cnt > 0) { 1413 /* 1414 * If we have put any characters in the buffer, we wake up 1415 * the reader. 1416 */ 1417 if (wpipe->pipe_state & PIPE_WANTR) { 1418 wpipe->pipe_state &= ~PIPE_WANTR; 1419 wakeup(wpipe); 1420 } 1421 } 1422 1423 /* 1424 * Don't return EPIPE if I/O was successful 1425 */ 1426 if ((error == EPIPE) && (wpipe->pipe_buffer.cnt == 0) 1427 && (uio->uio_resid == 0)) 1428 error = 0; 1429 1430 if (error == 0) 1431 vfs_timestamp(&wpipe->pipe_mtime); 1432 1433 /* 1434 * We have something to offer, wake up select/poll. 1435 * wpipe->pipe_map.cnt is always 0 in this point (direct write 1436 * is only done synchronously), so check wpipe->only pipe_buffer.cnt 1437 */ 1438 if (wpipe->pipe_buffer.cnt) 1439 pipeselwakeup(wpipe, wpipe); 1440 1441 /* 1442 * Arrange for next read(2) to do a signal. 1443 */ 1444 wpipe->pipe_state |= PIPE_SIGNALR; 1445 1446 return (error); 1447 } 1448 1449 /* 1450 * we implement a very minimal set of ioctls for compatibility with sockets. 1451 */ 1452 int 1453 pipe_ioctl(fp, cmd, data, p) 1454 struct file *fp; 1455 u_long cmd; 1456 caddr_t data; 1457 struct proc *p; 1458 { 1459 struct pipe *mpipe = (struct pipe *)fp->f_data; 1460 1461 switch (cmd) { 1462 1463 case FIONBIO: 1464 return (0); 1465 1466 case FIOASYNC: 1467 if (*(int *)data) { 1468 mpipe->pipe_state |= PIPE_ASYNC; 1469 } else { 1470 mpipe->pipe_state &= ~PIPE_ASYNC; 1471 } 1472 return (0); 1473 1474 case FIONREAD: 1475 #ifndef PIPE_NODIRECT 1476 if (mpipe->pipe_state & PIPE_DIRECTW) 1477 *(int *)data = mpipe->pipe_map.cnt; 1478 else 1479 #endif 1480 *(int *)data = mpipe->pipe_buffer.cnt; 1481 return (0); 1482 1483 #ifdef __FreeBSD__ 1484 case FIOSETOWN: 1485 return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 1486 1487 case FIOGETOWN: 1488 *(int *)data = fgetown(mpipe->pipe_sigio); 1489 return (0); 1490 1491 /* This is deprecated, FIOSETOWN should be used instead. */ 1492 case TIOCSPGRP: 1493 return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 1494 1495 /* This is deprecated, FIOGETOWN should be used instead. */ 1496 case TIOCGPGRP: 1497 *(int *)data = -fgetown(mpipe->pipe_sigio); 1498 return (0); 1499 #endif /* FreeBSD */ 1500 #ifdef __NetBSD__ 1501 case TIOCSPGRP: 1502 mpipe->pipe_pgid = *(int *)data; 1503 return (0); 1504 1505 case TIOCGPGRP: 1506 *(int *)data = mpipe->pipe_pgid; 1507 return (0); 1508 #endif /* NetBSD */ 1509 1510 } 1511 return (ENOTTY); 1512 } 1513 1514 int 1515 pipe_poll(fp, events, p) 1516 struct file *fp; 1517 int events; 1518 struct proc *p; 1519 { 1520 struct pipe *rpipe = (struct pipe *)fp->f_data; 1521 struct pipe *wpipe; 1522 int revents = 0; 1523 1524 wpipe = rpipe->pipe_peer; 1525 if (events & (POLLIN | POLLRDNORM)) 1526 if ((rpipe->pipe_buffer.cnt > 0) || 1527 #ifndef PIPE_NODIRECT 1528 (rpipe->pipe_state & PIPE_DIRECTW) || 1529 #endif 1530 (rpipe->pipe_state & PIPE_EOF)) 1531 revents |= events & (POLLIN | POLLRDNORM); 1532 1533 if (events & (POLLOUT | POLLWRNORM)) 1534 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) 1535 || ( 1536 #ifndef PIPE_NODIRECT 1537 ((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1538 #endif 1539 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1540 revents |= events & (POLLOUT | POLLWRNORM); 1541 1542 if ((rpipe->pipe_state & PIPE_EOF) || 1543 (wpipe == NULL) || 1544 (wpipe->pipe_state & PIPE_EOF)) 1545 revents |= POLLHUP; 1546 1547 if (revents == 0) { 1548 if (events & (POLLIN | POLLRDNORM)) { 1549 selrecord(p, &rpipe->pipe_sel); 1550 rpipe->pipe_state |= PIPE_SEL; 1551 } 1552 1553 if (events & (POLLOUT | POLLWRNORM)) { 1554 selrecord(p, &wpipe->pipe_sel); 1555 wpipe->pipe_state |= PIPE_SEL; 1556 } 1557 } 1558 1559 return (revents); 1560 } 1561 1562 static int 1563 pipe_stat(fp, ub, p) 1564 struct file *fp; 1565 struct stat *ub; 1566 struct proc *p; 1567 { 1568 struct pipe *pipe = (struct pipe *)fp->f_data; 1569 1570 bzero((caddr_t)ub, sizeof(*ub)); 1571 ub->st_mode = S_IFIFO; 1572 ub->st_blksize = pipe->pipe_buffer.size; 1573 ub->st_size = pipe->pipe_buffer.cnt; 1574 ub->st_blocks = (ub->st_size) ? 1 : 0; 1575 #ifdef __FreeBSD__ 1576 ub->st_atimespec = pipe->pipe_atime; 1577 ub->st_mtimespec = pipe->pipe_mtime; 1578 ub->st_ctimespec = pipe->pipe_ctime; 1579 #endif /* FreeBSD */ 1580 #ifdef __NetBSD__ 1581 TIMEVAL_TO_TIMESPEC(&pipe->pipe_atime, &ub->st_atimespec) 1582 TIMEVAL_TO_TIMESPEC(&pipe->pipe_mtime, &ub->st_mtimespec); 1583 TIMEVAL_TO_TIMESPEC(&pipe->pipe_ctime, &ub->st_ctimespec); 1584 #endif /* NetBSD */ 1585 ub->st_uid = fp->f_cred->cr_uid; 1586 ub->st_gid = fp->f_cred->cr_gid; 1587 /* 1588 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 1589 * XXX (st_dev, st_ino) should be unique. 1590 */ 1591 return (0); 1592 } 1593 1594 /* ARGSUSED */ 1595 static int 1596 pipe_close(fp, p) 1597 struct file *fp; 1598 struct proc *p; 1599 { 1600 struct pipe *cpipe = (struct pipe *)fp->f_data; 1601 1602 #ifdef __FreeBSD__ 1603 fp->f_ops = &badfileops; 1604 funsetown(cpipe->pipe_sigio); 1605 #endif 1606 fp->f_data = NULL; 1607 pipeclose(cpipe); 1608 return (0); 1609 } 1610 1611 static void 1612 pipe_free_kmem(cpipe) 1613 struct pipe *cpipe; 1614 { 1615 1616 #ifdef __FreeBSD__ 1617 mtx_assert(&vm_mtx, MA_OWNED); 1618 #endif 1619 if (cpipe->pipe_buffer.buffer != NULL) { 1620 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1621 --nbigpipe; 1622 amountpipekva -= cpipe->pipe_buffer.size; 1623 #ifdef __FreeBSD__ 1624 kmem_free(kernel_map, 1625 (vm_offset_t)cpipe->pipe_buffer.buffer, 1626 cpipe->pipe_buffer.size); 1627 #elif defined(__NetBSD__) 1628 uvm_km_free(kernel_map, 1629 (vaddr_t)cpipe->pipe_buffer.buffer, 1630 cpipe->pipe_buffer.size); 1631 #endif /* NetBSD */ 1632 1633 cpipe->pipe_buffer.buffer = NULL; 1634 } 1635 #ifndef PIPE_NODIRECT 1636 if (cpipe->pipe_map.kva != NULL) { 1637 #ifdef __FreeBSD__ 1638 amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 1639 kmem_free(kernel_map, 1640 cpipe->pipe_map.kva, 1641 cpipe->pipe_buffer.size + PAGE_SIZE); 1642 #elif defined(__NetBSD__) 1643 pipe_loan_free(cpipe); 1644 #endif /* NetBSD */ 1645 cpipe->pipe_map.cnt = 0; 1646 cpipe->pipe_map.kva = NULL; 1647 cpipe->pipe_map.pos = 0; 1648 cpipe->pipe_map.npages = 0; 1649 } 1650 #endif /* !PIPE_NODIRECT */ 1651 } 1652 1653 /* 1654 * shutdown the pipe 1655 */ 1656 static void 1657 pipeclose(cpipe) 1658 struct pipe *cpipe; 1659 { 1660 struct pipe *ppipe; 1661 1662 if (!cpipe) 1663 return; 1664 1665 pipeselwakeup(cpipe, cpipe); 1666 1667 /* 1668 * If the other side is blocked, wake it up saying that 1669 * we want to close it down. 1670 */ 1671 while (cpipe->pipe_busy) { 1672 wakeup(cpipe); 1673 cpipe->pipe_state |= PIPE_WANTCLOSE | PIPE_EOF; 1674 tsleep(cpipe, PRIBIO, "pipecl", 0); 1675 } 1676 1677 /* 1678 * Disconnect from peer 1679 */ 1680 if ((ppipe = cpipe->pipe_peer) != NULL) { 1681 pipeselwakeup(ppipe, ppipe); 1682 1683 ppipe->pipe_state |= PIPE_EOF; 1684 wakeup(ppipe); 1685 ppipe->pipe_peer = NULL; 1686 } 1687 1688 /* 1689 * free resources 1690 */ 1691 #ifdef _FreeBSD__ 1692 mtx_lock(&vm_mtx); 1693 pipe_free_kmem(cpipe); 1694 /* XXX: erm, doesn't zalloc already have its own locks and 1695 * not need the giant vm lock? 1696 */ 1697 zfree(pipe_zone, cpipe); 1698 mtx_unlock(&vm_mtx); 1699 #endif /* FreeBSD */ 1700 1701 #ifdef __NetBSD__ 1702 pipe_free_kmem(cpipe); 1703 (void) lockmgr(&cpipe->pipe_lock, LK_DRAIN, NULL); 1704 pool_put(&pipe_pool, cpipe); 1705 #endif 1706 } 1707 1708 #ifdef __FreeBSD__ 1709 /*ARGSUSED*/ 1710 static int 1711 pipe_kqfilter(struct file *fp, struct knote *kn) 1712 { 1713 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1714 1715 switch (kn->kn_filter) { 1716 case EVFILT_READ: 1717 kn->kn_fop = &pipe_rfiltops; 1718 break; 1719 case EVFILT_WRITE: 1720 kn->kn_fop = &pipe_wfiltops; 1721 cpipe = cpipe->pipe_peer; 1722 break; 1723 default: 1724 return (1); 1725 } 1726 kn->kn_hook = (caddr_t)cpipe; 1727 1728 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 1729 return (0); 1730 } 1731 1732 static void 1733 filt_pipedetach(struct knote *kn) 1734 { 1735 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1736 1737 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 1738 } 1739 1740 /*ARGSUSED*/ 1741 static int 1742 filt_piperead(struct knote *kn, long hint) 1743 { 1744 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1745 struct pipe *wpipe = rpipe->pipe_peer; 1746 1747 kn->kn_data = rpipe->pipe_buffer.cnt; 1748 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 1749 kn->kn_data = rpipe->pipe_map.cnt; 1750 1751 if ((rpipe->pipe_state & PIPE_EOF) || 1752 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1753 kn->kn_flags |= EV_EOF; 1754 return (1); 1755 } 1756 return (kn->kn_data > 0); 1757 } 1758 1759 /*ARGSUSED*/ 1760 static int 1761 filt_pipewrite(struct knote *kn, long hint) 1762 { 1763 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1764 struct pipe *wpipe = rpipe->pipe_peer; 1765 1766 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1767 kn->kn_data = 0; 1768 kn->kn_flags |= EV_EOF; 1769 return (1); 1770 } 1771 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1772 if (wpipe->pipe_state & PIPE_DIRECTW) 1773 kn->kn_data = 0; 1774 1775 return (kn->kn_data >= PIPE_BUF); 1776 } 1777 #endif /* FreeBSD */ 1778 1779 #ifdef __NetBSD__ 1780 static int 1781 pipe_fcntl(fp, cmd, data, p) 1782 struct file *fp; 1783 u_int cmd; 1784 caddr_t data; 1785 struct proc *p; 1786 { 1787 if (cmd == F_SETFL) 1788 return (0); 1789 else 1790 return (EOPNOTSUPP); 1791 } 1792 1793 /* 1794 * Handle pipe sysctls. 1795 */ 1796 int 1797 sysctl_dopipe(name, namelen, oldp, oldlenp, newp, newlen) 1798 int *name; 1799 u_int namelen; 1800 void *oldp; 1801 size_t *oldlenp; 1802 void *newp; 1803 size_t newlen; 1804 { 1805 /* All sysctl names at this level are terminal. */ 1806 if (namelen != 1) 1807 return (ENOTDIR); /* overloaded */ 1808 1809 switch (name[0]) { 1810 case KERN_PIPE_MAXKVASZ: 1811 return (sysctl_int(oldp, oldlenp, newp, newlen, &maxpipekva)); 1812 case KERN_PIPE_LIMITKVA: 1813 return (sysctl_int(oldp, oldlenp, newp, newlen, &limitpipekva)); 1814 case KERN_PIPE_MAXBIGPIPES: 1815 return (sysctl_int(oldp, oldlenp, newp, newlen, &maxbigpipes)); 1816 case KERN_PIPE_NBIGPIPES: 1817 return (sysctl_rdint(oldp, oldlenp, newp, nbigpipe)); 1818 case KERN_PIPE_KVASIZE: 1819 return (sysctl_rdint(oldp, oldlenp, newp, amountpipekva)); 1820 default: 1821 return (EOPNOTSUPP); 1822 } 1823 /* NOTREACHED */ 1824 } 1825 1826 /* 1827 * Initialize pipe structs. 1828 */ 1829 void 1830 pipe_init(void) 1831 { 1832 pool_init(&pipe_pool, sizeof(struct pipe), 0, 0, 0, "pipepl", 1833 0, NULL, NULL, M_PIPE); 1834 } 1835 1836 #endif /* __NetBSD __ */ 1837