1 /* 2 * Copyright (c) 1996 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. Modifications may be freely made to this file if the above conditions 17 * are met. 18 * 19 * $Id: sys_pipe.c,v 1.4 1996/10/12 14:34:42 niklas Exp $ 20 */ 21 22 #ifndef OLD_PIPE 23 24 /* 25 * This file contains a high-performance replacement for the socket-based 26 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 27 * all features of sockets, but does do everything that pipes normally 28 * do. 29 */ 30 31 /* 32 * This code has two modes of operation, a small write mode and a large 33 * write mode. The small write mode acts like conventional pipes with 34 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 35 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 36 * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 37 * the receiving process can copy it directly from the pages in the sending 38 * process. 39 * 40 * If the sending process receives a signal, it is possible that it will 41 * go away, and certainly its address space can change, because control 42 * is returned back to the user-mode side. In that case, the pipe code 43 * arranges to copy the buffer supplied by the user process, to a pageable 44 * kernel buffer, and the receiving process will grab the data from the 45 * pageable kernel buffer. Since signals don't happen all that often, 46 * the copy operation is normally eliminated. 47 * 48 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 49 * happen for small transfers so that the system will not spend all of 50 * its time context switching. PIPE_SIZE is constrained by the 51 * amount of kernel virtual memory. 52 */ 53 54 #include <sys/param.h> 55 #include <sys/systm.h> 56 #include <sys/proc.h> 57 #include <sys/file.h> 58 #include <sys/protosw.h> 59 #include <sys/stat.h> 60 #include <sys/filedesc.h> 61 #include <sys/malloc.h> 62 #include <sys/ioctl.h> 63 #include <sys/stat.h> 64 #include <sys/select.h> 65 #include <sys/signalvar.h> 66 #include <sys/errno.h> 67 #include <sys/queue.h> 68 #include <sys/vmmeter.h> 69 #include <sys/kernel.h> 70 #if defined(__FreeBSD__) 71 #include <sys/sysproto.h> 72 #else /* defined(__NetBSD__) || defined(__OpenBSD__) */ 73 #include <sys/mount.h> 74 #include <sys/syscallargs.h> 75 #endif 76 77 #include <vm/vm.h> 78 #include <vm/vm_prot.h> 79 #include <vm/vm_param.h> 80 #include <vm/lock.h> 81 #include <vm/vm_object.h> 82 #include <vm/vm_kern.h> 83 #include <vm/vm_extern.h> 84 #include <vm/pmap.h> 85 #include <vm/vm_map.h> 86 #include <vm/vm_page.h> 87 88 #include <sys/pipe.h> 89 90 /* 91 * Use this define if you want to disable *fancy* VM things. Expect an 92 * approx 30% decrease in transfer rate. This could be useful for 93 * NetBSD or OpenBSD. 94 */ 95 #if defined(__NetBSD__) || defined(__OpenBSD__) 96 #define PIPE_NODIRECT 97 #endif 98 99 /* 100 * interfaces to the outside world 101 */ 102 int pipe_read __P((struct file *, struct uio *, struct ucred *)); 103 int pipe_write __P((struct file *, struct uio *, struct ucred *)); 104 int pipe_close __P((struct file *, struct proc *)); 105 int pipe_select __P((struct file *, int which, struct proc *)); 106 #if defined(__FreeBSD__) 107 int pipe_ioctl __P((struct file *, int, caddr_t, struct proc *)); 108 #else /* defined(__NetBSD__) || defined(__OpenBSD__) */ 109 int pipe_ioctl __P((struct file *, u_long, caddr_t, struct proc *)); 110 #endif 111 112 static struct fileops pipeops = 113 { pipe_read, pipe_write, pipe_ioctl, pipe_select, pipe_close }; 114 115 116 /* 117 * Default pipe buffer size(s), this can be kind-of large now because pipe 118 * space is pageable. The pipe code will try to maintain locality of 119 * reference for performance reasons, so small amounts of outstanding I/O 120 * will not wipe the cache. 121 */ 122 #define MINPIPESIZE (PIPE_SIZE/3) 123 #define MAXPIPESIZE (2*PIPE_SIZE/3) 124 125 /* 126 * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 127 * is there so that on large systems, we don't exhaust it. 128 */ 129 #define MAXPIPEKVA (8*1024*1024) 130 131 /* 132 * Limit for direct transfers, we cannot, of course limit 133 * the amount of kva for pipes in general though. 134 */ 135 #define LIMITPIPEKVA (16*1024*1024) 136 137 /* 138 * Limit the number of "big" pipes 139 */ 140 #define LIMITBIGPIPES 32 141 int nbigpipe; 142 143 static int amountpipekva; 144 145 void pipeclose __P((struct pipe *)); 146 void pipeinit __P((struct pipe *)); 147 int pipe_stat __P((struct pipe *, struct stat *)); 148 static __inline int pipelock __P((struct pipe *, int)); 149 static __inline void pipeunlock __P((struct pipe *)); 150 static __inline void pipeselwakeup __P((struct pipe *)); 151 #ifndef PIPE_NODIRECT 152 int pipe_build_write_buffer __P((struct pipe *, struct uio *)); 153 void pipe_destroy_write_buffer __P((struct pipe *)); 154 int pipe_direct_write __P((struct pipe *, struct uio *)); 155 void pipe_clone_write_buffer __P((struct pipe *)); 156 #endif 157 void pipespace __P((struct pipe *)); 158 159 /* 160 * The pipe system call for the DTYPE_PIPE type of pipes 161 */ 162 163 /* ARGSUSED */ 164 int 165 #if defined(__FreeBSD__) 166 pipe(p, uap, retval) 167 #else /* (__NetBSD__) || (__OpenBSD__) */ 168 sys_pipe(p, v, retval) 169 #endif 170 struct proc *p; 171 void *v; 172 #if defined(__FreeBSD__) 173 int retval[]; 174 #else /* (__NetBSD__) || (__OpenBSD__) */ 175 register_t *retval; 176 #endif 177 { 178 register struct filedesc *fdp = p->p_fd; 179 struct file *rf, *wf; 180 struct pipe *rpipe, *wpipe; 181 int fd, error; 182 183 rpipe = malloc( sizeof (*rpipe), M_TEMP, M_WAITOK); 184 pipeinit(rpipe); 185 rpipe->pipe_state |= PIPE_DIRECTOK; 186 wpipe = malloc( sizeof (*wpipe), M_TEMP, M_WAITOK); 187 pipeinit(wpipe); 188 wpipe->pipe_state |= PIPE_DIRECTOK; 189 190 error = falloc(p, &rf, &fd); 191 if (error) 192 goto free2; 193 retval[0] = fd; 194 rf->f_flag = FREAD | FWRITE; 195 rf->f_type = DTYPE_PIPE; 196 rf->f_ops = &pipeops; 197 rf->f_data = (caddr_t)rpipe; 198 error = falloc(p, &wf, &fd); 199 if (error) 200 goto free3; 201 wf->f_flag = FREAD | FWRITE; 202 wf->f_type = DTYPE_PIPE; 203 wf->f_ops = &pipeops; 204 wf->f_data = (caddr_t)wpipe; 205 retval[1] = fd; 206 207 rpipe->pipe_peer = wpipe; 208 wpipe->pipe_peer = rpipe; 209 210 return (0); 211 free3: 212 ffree(rf); 213 fdp->fd_ofiles[retval[0]] = 0; 214 free2: 215 (void)pipeclose(wpipe); 216 (void)pipeclose(rpipe); 217 return (error); 218 } 219 220 /* 221 * Allocate kva for pipe circular buffer, the space is pageable 222 */ 223 void 224 pipespace(cpipe) 225 struct pipe *cpipe; 226 { 227 int npages, error; 228 229 npages = round_page(cpipe->pipe_buffer.size)/PAGE_SIZE; 230 /* 231 * Create an object, I don't like the idea of paging to/from 232 * kernel_object. 233 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 234 */ 235 #if defined(__FreeBSD__) 236 cpipe->pipe_buffer.object = vm_object_allocate(OBJT_DEFAULT, npages); 237 #else /* (__NetBSD__) || (__OpenBSD__) */ 238 cpipe->pipe_buffer.object = vm_object_allocate(npages); 239 #endif 240 cpipe->pipe_buffer.buffer = (caddr_t) vm_map_min(kernel_map); 241 242 /* 243 * Insert the object into the kernel map, and allocate kva for it. 244 * The map entry is, by default, pageable. 245 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 246 */ 247 #if defined(__FreeBSD__) 248 error = vm_map_find(kernel_map, cpipe->pipe_buffer.object, 0, 249 (vm_offset_t *) &cpipe->pipe_buffer.buffer, 250 cpipe->pipe_buffer.size, 1, 251 VM_PROT_ALL, VM_PROT_ALL, 0); 252 #else /* (__NetBSD__) || (__OpenBSD__) */ 253 error = vm_map_find(kernel_map, cpipe->pipe_buffer.object, 0, 254 (vm_offset_t *) &cpipe->pipe_buffer.buffer, 255 cpipe->pipe_buffer.size, 1); 256 #endif 257 258 if (error != KERN_SUCCESS) 259 panic("pipeinit: cannot allocate pipe -- out of kvm -- code = %d", error); 260 amountpipekva += cpipe->pipe_buffer.size; 261 } 262 263 /* 264 * initialize and allocate VM and memory for pipe 265 */ 266 void 267 pipeinit(cpipe) 268 struct pipe *cpipe; 269 { 270 int s; 271 272 cpipe->pipe_buffer.in = 0; 273 cpipe->pipe_buffer.out = 0; 274 cpipe->pipe_buffer.cnt = 0; 275 cpipe->pipe_buffer.size = PIPE_SIZE; 276 277 /* Buffer kva gets dynamically allocated */ 278 cpipe->pipe_buffer.buffer = NULL; 279 /* cpipe->pipe_buffer.object = invalid */ 280 281 cpipe->pipe_state = 0; 282 cpipe->pipe_peer = NULL; 283 cpipe->pipe_busy = 0; 284 s = splhigh(); 285 cpipe->pipe_ctime = time; 286 cpipe->pipe_atime = time; 287 cpipe->pipe_mtime = time; 288 splx(s); 289 bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel); 290 cpipe->pipe_pgid = NO_PID; 291 292 #ifndef PIPE_NODIRECT 293 /* 294 * pipe data structure initializations to support direct pipe I/O 295 */ 296 cpipe->pipe_map.cnt = 0; 297 cpipe->pipe_map.kva = 0; 298 cpipe->pipe_map.pos = 0; 299 cpipe->pipe_map.npages = 0; 300 /* cpipe->pipe_map.ms[] = invalid */ 301 #endif 302 } 303 304 305 /* 306 * lock a pipe for I/O, blocking other access 307 */ 308 static __inline int 309 pipelock(cpipe, catch) 310 struct pipe *cpipe; 311 int catch; 312 { 313 int error; 314 while (cpipe->pipe_state & PIPE_LOCK) { 315 cpipe->pipe_state |= PIPE_LWANT; 316 error = tsleep(cpipe, catch ? PRIBIO|PCATCH : PRIBIO, 317 "pipelk", 0); 318 if (error) 319 return error; 320 } 321 cpipe->pipe_state |= PIPE_LOCK; 322 return 0; 323 } 324 325 /* 326 * unlock a pipe I/O lock 327 */ 328 static __inline void 329 pipeunlock(cpipe) 330 struct pipe *cpipe; 331 { 332 cpipe->pipe_state &= ~PIPE_LOCK; 333 if (cpipe->pipe_state & PIPE_LWANT) { 334 cpipe->pipe_state &= ~PIPE_LWANT; 335 wakeup(cpipe); 336 } 337 } 338 339 static __inline void 340 pipeselwakeup(cpipe) 341 struct pipe *cpipe; 342 { 343 if (cpipe->pipe_state & PIPE_SEL) { 344 cpipe->pipe_state &= ~PIPE_SEL; 345 selwakeup(&cpipe->pipe_sel); 346 } 347 } 348 349 /* ARGSUSED */ 350 int 351 pipe_read(fp, uio, cred) 352 struct file *fp; 353 struct uio *uio; 354 struct ucred *cred; 355 { 356 357 struct pipe *rpipe = (struct pipe *) fp->f_data; 358 int error = 0; 359 int nread = 0; 360 int size; 361 362 ++rpipe->pipe_busy; 363 while (uio->uio_resid) { 364 /* 365 * normal pipe buffer receive 366 */ 367 if (rpipe->pipe_buffer.cnt > 0) { 368 size = rpipe->pipe_buffer.size - 369 rpipe->pipe_buffer.out; 370 if (size > rpipe->pipe_buffer.cnt) 371 size = rpipe->pipe_buffer.cnt; 372 if (size > uio->uio_resid) 373 size = uio->uio_resid; 374 if ((error = pipelock(rpipe,1)) == 0) { 375 error = uiomove( &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 376 size, uio); 377 pipeunlock(rpipe); 378 } 379 if (error) { 380 break; 381 } 382 rpipe->pipe_buffer.out += size; 383 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 384 rpipe->pipe_buffer.out = 0; 385 386 rpipe->pipe_buffer.cnt -= size; 387 nread += size; 388 #ifndef PIPE_NODIRECT 389 /* 390 * Direct copy, bypassing a kernel buffer. 391 */ 392 } else if ((size = rpipe->pipe_map.cnt) && 393 (rpipe->pipe_state & PIPE_DIRECTW)) { 394 caddr_t va; 395 if (size > uio->uio_resid) 396 size = uio->uio_resid; 397 if ((error = pipelock(rpipe,1)) == 0) { 398 va = (caddr_t) rpipe->pipe_map.kva + rpipe->pipe_map.pos; 399 error = uiomove(va, size, uio); 400 pipeunlock(rpipe); 401 } 402 if (error) 403 break; 404 nread += size; 405 rpipe->pipe_map.pos += size; 406 rpipe->pipe_map.cnt -= size; 407 if (rpipe->pipe_map.cnt == 0) { 408 rpipe->pipe_state &= ~PIPE_DIRECTW; 409 wakeup(rpipe); 410 } 411 #endif 412 } else { 413 /* 414 * detect EOF condition 415 */ 416 if (rpipe->pipe_state & PIPE_EOF) { 417 /* XXX error = ? */ 418 break; 419 } 420 /* 421 * If the "write-side" has been blocked, wake it up now. 422 */ 423 if (rpipe->pipe_state & PIPE_WANTW) { 424 rpipe->pipe_state &= ~PIPE_WANTW; 425 wakeup(rpipe); 426 } 427 if (nread > 0) 428 break; 429 430 if (fp->f_flag & FNONBLOCK) { 431 error = EAGAIN; 432 break; 433 } 434 435 /* 436 * If there is no more to read in the pipe, reset 437 * its pointers to the beginning. This improves 438 * cache hit stats. 439 */ 440 441 if ((error = pipelock(rpipe,1)) == 0) { 442 if (rpipe->pipe_buffer.cnt == 0) { 443 rpipe->pipe_buffer.in = 0; 444 rpipe->pipe_buffer.out = 0; 445 } 446 pipeunlock(rpipe); 447 } else { 448 break; 449 } 450 451 if (rpipe->pipe_state & PIPE_WANTW) { 452 rpipe->pipe_state &= ~PIPE_WANTW; 453 wakeup(rpipe); 454 } 455 456 rpipe->pipe_state |= PIPE_WANTR; 457 error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0); 458 if (error) 459 break; 460 } 461 } 462 463 if (error == 0) { 464 int s = splhigh(); 465 rpipe->pipe_atime = time; 466 splx(s); 467 } 468 469 --rpipe->pipe_busy; 470 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 471 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 472 wakeup(rpipe); 473 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 474 /* 475 * If there is no more to read in the pipe, reset 476 * its pointers to the beginning. This improves 477 * cache hit stats. 478 */ 479 if (rpipe->pipe_buffer.cnt == 0) { 480 if ((error == 0) && (error = pipelock(rpipe,1)) == 0) { 481 rpipe->pipe_buffer.in = 0; 482 rpipe->pipe_buffer.out = 0; 483 pipeunlock(rpipe); 484 } 485 } 486 487 /* 488 * If the "write-side" has been blocked, wake it up now. 489 */ 490 if (rpipe->pipe_state & PIPE_WANTW) { 491 rpipe->pipe_state &= ~PIPE_WANTW; 492 wakeup(rpipe); 493 } 494 } 495 496 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 497 pipeselwakeup(rpipe); 498 499 return error; 500 } 501 502 #ifndef PIPE_NODIRECT 503 /* 504 * Map the sending processes' buffer into kernel space and wire it. 505 * This is similar to a physical write operation. 506 */ 507 int 508 pipe_build_write_buffer(wpipe, uio) 509 struct pipe *wpipe; 510 struct uio *uio; 511 { 512 int size; 513 int i; 514 vm_offset_t addr, endaddr, paddr; 515 516 size = uio->uio_iov->iov_len; 517 if (size > wpipe->pipe_buffer.size) 518 size = wpipe->pipe_buffer.size; 519 520 endaddr = round_page(uio->uio_iov->iov_base + size); 521 for(i = 0, addr = trunc_page(uio->uio_iov->iov_base); 522 addr < endaddr; 523 addr += PAGE_SIZE, i+=1) { 524 525 vm_page_t m; 526 527 vm_fault_quick( (caddr_t) addr, VM_PROT_READ); 528 paddr = pmap_kextract(addr); 529 if (!paddr) { 530 int j; 531 for(j=0;j<i;j++) 532 vm_page_unwire(wpipe->pipe_map.ms[j]); 533 return EFAULT; 534 } 535 536 m = PHYS_TO_VM_PAGE(paddr); 537 vm_page_wire(m); 538 wpipe->pipe_map.ms[i] = m; 539 } 540 541 /* 542 * set up the control block 543 */ 544 wpipe->pipe_map.npages = i; 545 wpipe->pipe_map.pos = ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 546 wpipe->pipe_map.cnt = size; 547 548 /* 549 * and map the buffer 550 */ 551 if (wpipe->pipe_map.kva == 0) { 552 /* 553 * We need to allocate space for an extra page because the 554 * address range might (will) span pages at times. 555 */ 556 wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 557 wpipe->pipe_buffer.size + PAGE_SIZE); 558 amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 559 } 560 pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 561 wpipe->pipe_map.npages); 562 563 /* 564 * and update the uio data 565 */ 566 567 uio->uio_iov->iov_len -= size; 568 uio->uio_iov->iov_base += size; 569 if (uio->uio_iov->iov_len == 0) 570 uio->uio_iov++; 571 uio->uio_resid -= size; 572 uio->uio_offset += size; 573 return 0; 574 } 575 576 /* 577 * unmap and unwire the process buffer 578 */ 579 void 580 pipe_destroy_write_buffer(wpipe) 581 struct pipe *wpipe; 582 { 583 int i; 584 if (wpipe->pipe_map.kva) { 585 pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 586 587 if (amountpipekva > MAXPIPEKVA) { 588 vm_offset_t kva = wpipe->pipe_map.kva; 589 wpipe->pipe_map.kva = 0; 590 kmem_free(kernel_map, kva, 591 wpipe->pipe_buffer.size + PAGE_SIZE); 592 amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 593 } 594 } 595 for (i=0;i<wpipe->pipe_map.npages;i++) 596 vm_page_unwire(wpipe->pipe_map.ms[i]); 597 } 598 599 /* 600 * In the case of a signal, the writing process might go away. This 601 * code copies the data into the circular buffer so that the source 602 * pages can be freed without loss of data. 603 */ 604 void 605 pipe_clone_write_buffer(wpipe) 606 struct pipe *wpipe; 607 { 608 int size; 609 int pos; 610 611 size = wpipe->pipe_map.cnt; 612 pos = wpipe->pipe_map.pos; 613 bcopy((caddr_t) wpipe->pipe_map.kva+pos, 614 (caddr_t) wpipe->pipe_buffer.buffer, 615 size); 616 617 wpipe->pipe_buffer.in = size; 618 wpipe->pipe_buffer.out = 0; 619 wpipe->pipe_buffer.cnt = size; 620 wpipe->pipe_state &= ~PIPE_DIRECTW; 621 622 pipe_destroy_write_buffer(wpipe); 623 } 624 625 /* 626 * This implements the pipe buffer write mechanism. Note that only 627 * a direct write OR a normal pipe write can be pending at any given time. 628 * If there are any characters in the pipe buffer, the direct write will 629 * be deferred until the receiving process grabs all of the bytes from 630 * the pipe buffer. Then the direct mapping write is set-up. 631 */ 632 int 633 pipe_direct_write(wpipe, uio) 634 struct pipe *wpipe; 635 struct uio *uio; 636 { 637 int error; 638 retry: 639 while (wpipe->pipe_state & PIPE_DIRECTW) { 640 if ( wpipe->pipe_state & PIPE_WANTR) { 641 wpipe->pipe_state &= ~PIPE_WANTR; 642 wakeup(wpipe); 643 } 644 wpipe->pipe_state |= PIPE_WANTW; 645 error = tsleep(wpipe, 646 PRIBIO|PCATCH, "pipdww", 0); 647 if (error) 648 goto error1; 649 if (wpipe->pipe_state & PIPE_EOF) { 650 error = EPIPE; 651 goto error1; 652 } 653 } 654 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 655 if (wpipe->pipe_buffer.cnt > 0) { 656 if ( wpipe->pipe_state & PIPE_WANTR) { 657 wpipe->pipe_state &= ~PIPE_WANTR; 658 wakeup(wpipe); 659 } 660 661 wpipe->pipe_state |= PIPE_WANTW; 662 error = tsleep(wpipe, 663 PRIBIO|PCATCH, "pipdwc", 0); 664 if (error) 665 goto error1; 666 if (wpipe->pipe_state & PIPE_EOF) { 667 error = EPIPE; 668 goto error1; 669 } 670 goto retry; 671 } 672 673 wpipe->pipe_state |= PIPE_DIRECTW; 674 675 error = pipe_build_write_buffer(wpipe, uio); 676 if (error) { 677 wpipe->pipe_state &= ~PIPE_DIRECTW; 678 goto error1; 679 } 680 681 error = 0; 682 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 683 if (wpipe->pipe_state & PIPE_EOF) { 684 pipelock(wpipe, 0); 685 pipe_destroy_write_buffer(wpipe); 686 pipeunlock(wpipe); 687 pipeselwakeup(wpipe); 688 error = EPIPE; 689 goto error1; 690 } 691 if (wpipe->pipe_state & PIPE_WANTR) { 692 wpipe->pipe_state &= ~PIPE_WANTR; 693 wakeup(wpipe); 694 } 695 pipeselwakeup(wpipe); 696 error = tsleep(wpipe, PRIBIO|PCATCH, "pipdwt", 0); 697 } 698 699 pipelock(wpipe,0); 700 if (wpipe->pipe_state & PIPE_DIRECTW) { 701 /* 702 * this bit of trickery substitutes a kernel buffer for 703 * the process that might be going away. 704 */ 705 pipe_clone_write_buffer(wpipe); 706 } else { 707 pipe_destroy_write_buffer(wpipe); 708 } 709 pipeunlock(wpipe); 710 return error; 711 712 error1: 713 wakeup(wpipe); 714 return error; 715 } 716 #endif 717 718 int 719 pipe_write(fp, uio, cred) 720 struct file *fp; 721 struct uio *uio; 722 struct ucred *cred; 723 { 724 int error = 0; 725 int orig_resid; 726 727 struct pipe *wpipe, *rpipe; 728 729 rpipe = (struct pipe *) fp->f_data; 730 wpipe = rpipe->pipe_peer; 731 732 /* 733 * detect loss of pipe read side, issue SIGPIPE if lost. 734 */ 735 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 736 return EPIPE; 737 } 738 739 /* 740 * If it is advantageous to resize the pipe buffer, do 741 * so. 742 */ 743 if ((uio->uio_resid > PIPE_SIZE) && 744 (nbigpipe < LIMITBIGPIPES) && 745 (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 746 (wpipe->pipe_buffer.size <= PIPE_SIZE) && 747 (wpipe->pipe_buffer.cnt == 0)) { 748 749 if (wpipe->pipe_buffer.buffer) { 750 amountpipekva -= wpipe->pipe_buffer.size; 751 kmem_free(kernel_map, 752 (vm_offset_t)wpipe->pipe_buffer.buffer, 753 wpipe->pipe_buffer.size); 754 } 755 756 #ifndef PIPE_NODIRECT 757 if (wpipe->pipe_map.kva) { 758 amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 759 kmem_free(kernel_map, 760 wpipe->pipe_map.kva, 761 wpipe->pipe_buffer.size + PAGE_SIZE); 762 } 763 #endif 764 765 wpipe->pipe_buffer.in = 0; 766 wpipe->pipe_buffer.out = 0; 767 wpipe->pipe_buffer.cnt = 0; 768 wpipe->pipe_buffer.size = BIG_PIPE_SIZE; 769 wpipe->pipe_buffer.buffer = NULL; 770 ++nbigpipe; 771 772 #ifndef PIPE_NODIRECT 773 wpipe->pipe_map.cnt = 0; 774 wpipe->pipe_map.kva = 0; 775 wpipe->pipe_map.pos = 0; 776 wpipe->pipe_map.npages = 0; 777 #endif 778 779 } 780 781 782 if( wpipe->pipe_buffer.buffer == NULL) { 783 if ((error = pipelock(wpipe,1)) == 0) { 784 pipespace(wpipe); 785 pipeunlock(wpipe); 786 } else { 787 return error; 788 } 789 } 790 791 ++wpipe->pipe_busy; 792 orig_resid = uio->uio_resid; 793 while (uio->uio_resid) { 794 int space; 795 #ifndef PIPE_NODIRECT 796 /* 797 * If the transfer is large, we can gain performance if 798 * we do process-to-process copies directly. 799 * If the write is non-blocking, we don't use the 800 * direct write mechanism. 801 */ 802 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 803 (fp->f_flag & FNONBLOCK) == 0 && 804 (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && 805 (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { 806 error = pipe_direct_write( wpipe, uio); 807 if (error) { 808 break; 809 } 810 continue; 811 } 812 #endif 813 814 /* 815 * Pipe buffered writes cannot be coincidental with 816 * direct writes. We wait until the currently executing 817 * direct write is completed before we start filling the 818 * pipe buffer. 819 */ 820 retrywrite: 821 while (wpipe->pipe_state & PIPE_DIRECTW) { 822 if (wpipe->pipe_state & PIPE_WANTR) { 823 wpipe->pipe_state &= ~PIPE_WANTR; 824 wakeup(wpipe); 825 } 826 error = tsleep(wpipe, 827 PRIBIO|PCATCH, "pipbww", 0); 828 if (error) 829 break; 830 } 831 832 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 833 834 /* Writes of size <= PIPE_BUF must be atomic. */ 835 /* XXX perhaps they need to be contiguous to be atomic? */ 836 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 837 space = 0; 838 839 if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 840 /* 841 * This set the maximum transfer as a segment of 842 * the buffer. 843 */ 844 int size = wpipe->pipe_buffer.size - wpipe->pipe_buffer.in; 845 /* 846 * space is the size left in the buffer 847 */ 848 if (size > space) 849 size = space; 850 /* 851 * now limit it to the size of the uio transfer 852 */ 853 if (size > uio->uio_resid) 854 size = uio->uio_resid; 855 if ((error = pipelock(wpipe,1)) == 0) { 856 /* 857 * It is possible for a direct write to 858 * slip in on us... handle it here... 859 */ 860 if (wpipe->pipe_state & PIPE_DIRECTW) { 861 pipeunlock(wpipe); 862 goto retrywrite; 863 } 864 error = uiomove( &wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 865 size, uio); 866 pipeunlock(wpipe); 867 } 868 if (error) 869 break; 870 871 wpipe->pipe_buffer.in += size; 872 if (wpipe->pipe_buffer.in >= wpipe->pipe_buffer.size) 873 wpipe->pipe_buffer.in = 0; 874 875 wpipe->pipe_buffer.cnt += size; 876 } else { 877 /* 878 * If the "read-side" has been blocked, wake it up now. 879 */ 880 if (wpipe->pipe_state & PIPE_WANTR) { 881 wpipe->pipe_state &= ~PIPE_WANTR; 882 wakeup(wpipe); 883 } 884 885 /* 886 * don't block on non-blocking I/O 887 */ 888 if (fp->f_flag & FNONBLOCK) { 889 error = EAGAIN; 890 break; 891 } 892 893 /* 894 * We have no more space and have something to offer, 895 * wake up selects. 896 */ 897 pipeselwakeup(wpipe); 898 899 wpipe->pipe_state |= PIPE_WANTW; 900 error = tsleep(wpipe, (PRIBIO + 1)|PCATCH, 901 "pipewr", 0); 902 if (error) 903 break; 904 /* 905 * If read side wants to go away, we just issue a 906 * signal to ourselves. 907 */ 908 if (wpipe->pipe_state & PIPE_EOF) { 909 error = EPIPE; 910 break; 911 } 912 } 913 } 914 915 --wpipe->pipe_busy; 916 if ((wpipe->pipe_busy == 0) && 917 (wpipe->pipe_state & PIPE_WANT)) { 918 wpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTR); 919 wakeup(wpipe); 920 } else if (wpipe->pipe_buffer.cnt > 0) { 921 /* 922 * If we have put any characters in the buffer, we wake up 923 * the reader. 924 */ 925 if (wpipe->pipe_state & PIPE_WANTR) { 926 wpipe->pipe_state &= ~PIPE_WANTR; 927 wakeup(wpipe); 928 } 929 } 930 931 /* 932 * Don't return EPIPE if I/O was successful 933 */ 934 if ((wpipe->pipe_buffer.cnt == 0) && 935 (uio->uio_resid == 0) && 936 (error == EPIPE)) 937 error = 0; 938 939 if (error == 0) { 940 int s = splhigh(); 941 wpipe->pipe_mtime = time; 942 splx(s); 943 } 944 /* 945 * We have something to offer, 946 * wake up select. 947 */ 948 if (wpipe->pipe_buffer.cnt) 949 pipeselwakeup(wpipe); 950 951 return error; 952 } 953 954 /* 955 * we implement a very minimal set of ioctls for compatibility with sockets. 956 */ 957 int 958 pipe_ioctl(fp, cmd, data, p) 959 struct file *fp; 960 #if defined(__FreeBSD__) 961 int cmd; 962 #else 963 u_long cmd; 964 #endif 965 register caddr_t data; 966 struct proc *p; 967 { 968 register struct pipe *mpipe = (struct pipe *)fp->f_data; 969 970 switch (cmd) { 971 972 case FIONBIO: 973 return (0); 974 975 case FIOASYNC: 976 if (*(int *)data) { 977 mpipe->pipe_state |= PIPE_ASYNC; 978 } else { 979 mpipe->pipe_state &= ~PIPE_ASYNC; 980 } 981 return (0); 982 983 case FIONREAD: 984 if (mpipe->pipe_state & PIPE_DIRECTW) 985 *(int *)data = mpipe->pipe_map.cnt; 986 else 987 *(int *)data = mpipe->pipe_buffer.cnt; 988 return (0); 989 990 case SIOCSPGRP: 991 mpipe->pipe_pgid = *(int *)data; 992 return (0); 993 994 case SIOCGPGRP: 995 *(int *)data = mpipe->pipe_pgid; 996 return (0); 997 998 } 999 return (ENOTTY); 1000 } 1001 1002 int 1003 pipe_select(fp, which, p) 1004 struct file *fp; 1005 int which; 1006 struct proc *p; 1007 { 1008 register struct pipe *rpipe = (struct pipe *)fp->f_data; 1009 struct pipe *wpipe; 1010 1011 wpipe = rpipe->pipe_peer; 1012 switch (which) { 1013 1014 case FREAD: 1015 if ( (rpipe->pipe_state & PIPE_DIRECTW) || 1016 (rpipe->pipe_buffer.cnt > 0) || 1017 (rpipe->pipe_state & PIPE_EOF)) { 1018 return (1); 1019 } 1020 selrecord(p, &rpipe->pipe_sel); 1021 rpipe->pipe_state |= PIPE_SEL; 1022 break; 1023 1024 case FWRITE: 1025 if ((wpipe == NULL) || 1026 (wpipe->pipe_state & PIPE_EOF) || 1027 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1028 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) { 1029 return (1); 1030 } 1031 selrecord(p, &wpipe->pipe_sel); 1032 wpipe->pipe_state |= PIPE_SEL; 1033 break; 1034 1035 case 0: 1036 if ((rpipe->pipe_state & PIPE_EOF) || 1037 (wpipe == NULL) || 1038 (wpipe->pipe_state & PIPE_EOF)) { 1039 return (1); 1040 } 1041 1042 selrecord(p, &rpipe->pipe_sel); 1043 rpipe->pipe_state |= PIPE_SEL; 1044 break; 1045 } 1046 return (0); 1047 } 1048 1049 int 1050 pipe_stat(pipe, ub) 1051 register struct pipe *pipe; 1052 register struct stat *ub; 1053 { 1054 bzero((caddr_t)ub, sizeof (*ub)); 1055 ub->st_mode = S_IFIFO; 1056 ub->st_blksize = pipe->pipe_buffer.size; 1057 ub->st_size = pipe->pipe_buffer.cnt; 1058 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1059 TIMEVAL_TO_TIMESPEC(&pipe->pipe_atime, &ub->st_atimespec); 1060 TIMEVAL_TO_TIMESPEC(&pipe->pipe_mtime, &ub->st_mtimespec); 1061 TIMEVAL_TO_TIMESPEC(&pipe->pipe_ctime, &ub->st_ctimespec); 1062 /* 1063 * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev, 1064 * st_flags, st_gen. 1065 * XXX (st_dev, st_ino) should be unique. 1066 */ 1067 return 0; 1068 } 1069 1070 /* ARGSUSED */ 1071 int 1072 pipe_close(fp, p) 1073 struct file *fp; 1074 struct proc *p; 1075 { 1076 struct pipe *cpipe = (struct pipe *)fp->f_data; 1077 1078 pipeclose(cpipe); 1079 fp->f_data = NULL; 1080 return 0; 1081 } 1082 1083 /* 1084 * shutdown the pipe 1085 */ 1086 void 1087 pipeclose(cpipe) 1088 struct pipe *cpipe; 1089 { 1090 struct pipe *ppipe; 1091 if (cpipe) { 1092 1093 pipeselwakeup(cpipe); 1094 1095 /* 1096 * If the other side is blocked, wake it up saying that 1097 * we want to close it down. 1098 */ 1099 while (cpipe->pipe_busy) { 1100 wakeup(cpipe); 1101 cpipe->pipe_state |= PIPE_WANT|PIPE_EOF; 1102 tsleep(cpipe, PRIBIO, "pipecl", 0); 1103 } 1104 1105 /* 1106 * Disconnect from peer 1107 */ 1108 if ((ppipe = cpipe->pipe_peer) != NULL) { 1109 pipeselwakeup(ppipe); 1110 1111 ppipe->pipe_state |= PIPE_EOF; 1112 wakeup(ppipe); 1113 ppipe->pipe_peer = NULL; 1114 } 1115 1116 /* 1117 * free resources 1118 */ 1119 if (cpipe->pipe_buffer.buffer) { 1120 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1121 --nbigpipe; 1122 amountpipekva -= cpipe->pipe_buffer.size; 1123 kmem_free(kernel_map, 1124 (vm_offset_t)cpipe->pipe_buffer.buffer, 1125 cpipe->pipe_buffer.size); 1126 } 1127 #ifndef PIPE_NODIRECT 1128 if (cpipe->pipe_map.kva) { 1129 amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 1130 kmem_free(kernel_map, 1131 cpipe->pipe_map.kva, 1132 cpipe->pipe_buffer.size + PAGE_SIZE); 1133 } 1134 #endif 1135 free(cpipe, M_TEMP); 1136 } 1137 } 1138 #endif 1139