xref: /openbsd-src/sys/kern/sys_pipe.c (revision 25c4e8bd056e974b28f4a0ffd39d76c190a56013)
1 /*	$OpenBSD: sys_pipe.c,v 1.141 2022/07/09 12:48:21 visa Exp $	*/
2 
3 /*
4  * Copyright (c) 1996 John S. Dyson
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice immediately at the beginning of the file, without modification,
12  *    this list of conditions, and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Absolutely no warranty of function or purpose is made by the author
17  *    John S. Dyson.
18  * 4. Modifications may be freely made to this file if the above conditions
19  *    are met.
20  */
21 
22 /*
23  * This file contains a high-performance replacement for the socket-based
24  * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
25  * all features of sockets, but does do everything that pipes normally
26  * do.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/proc.h>
32 #include <sys/fcntl.h>
33 #include <sys/file.h>
34 #include <sys/filedesc.h>
35 #include <sys/pool.h>
36 #include <sys/ioctl.h>
37 #include <sys/stat.h>
38 #include <sys/signalvar.h>
39 #include <sys/mount.h>
40 #include <sys/syscallargs.h>
41 #include <sys/event.h>
42 #include <sys/lock.h>
43 #ifdef KTRACE
44 #include <sys/ktrace.h>
45 #endif
46 
47 #include <uvm/uvm_extern.h>
48 
49 #include <sys/pipe.h>
50 
51 struct pipe_pair {
52 	struct pipe pp_wpipe;
53 	struct pipe pp_rpipe;
54 	struct rwlock pp_lock;
55 };
56 
57 /*
58  * interfaces to the outside world
59  */
60 int	pipe_read(struct file *, struct uio *, int);
61 int	pipe_write(struct file *, struct uio *, int);
62 int	pipe_close(struct file *, struct proc *);
63 int	pipe_kqfilter(struct file *fp, struct knote *kn);
64 int	pipe_ioctl(struct file *, u_long, caddr_t, struct proc *);
65 int	pipe_stat(struct file *fp, struct stat *ub, struct proc *p);
66 
67 static const struct fileops pipeops = {
68 	.fo_read	= pipe_read,
69 	.fo_write	= pipe_write,
70 	.fo_ioctl	= pipe_ioctl,
71 	.fo_kqfilter	= pipe_kqfilter,
72 	.fo_stat	= pipe_stat,
73 	.fo_close	= pipe_close
74 };
75 
76 void	filt_pipedetach(struct knote *kn);
77 int	filt_piperead(struct knote *kn, long hint);
78 int	filt_pipewrite(struct knote *kn, long hint);
79 int	filt_pipeexcept(struct knote *kn, long hint);
80 int	filt_pipemodify(struct kevent *kev, struct knote *kn);
81 int	filt_pipeprocess(struct knote *kn, struct kevent *kev);
82 
83 const struct filterops pipe_rfiltops = {
84 	.f_flags	= FILTEROP_ISFD | FILTEROP_MPSAFE,
85 	.f_attach	= NULL,
86 	.f_detach	= filt_pipedetach,
87 	.f_event	= filt_piperead,
88 	.f_modify	= filt_pipemodify,
89 	.f_process	= filt_pipeprocess,
90 };
91 
92 const struct filterops pipe_wfiltops = {
93 	.f_flags	= FILTEROP_ISFD | FILTEROP_MPSAFE,
94 	.f_attach	= NULL,
95 	.f_detach	= filt_pipedetach,
96 	.f_event	= filt_pipewrite,
97 	.f_modify	= filt_pipemodify,
98 	.f_process	= filt_pipeprocess,
99 };
100 
101 const struct filterops pipe_efiltops = {
102 	.f_flags	= FILTEROP_ISFD | FILTEROP_MPSAFE,
103 	.f_attach	= NULL,
104 	.f_detach	= filt_pipedetach,
105 	.f_event	= filt_pipeexcept,
106 	.f_modify	= filt_pipemodify,
107 	.f_process	= filt_pipeprocess,
108 };
109 
110 /*
111  * Default pipe buffer size(s), this can be kind-of large now because pipe
112  * space is pageable.  The pipe code will try to maintain locality of
113  * reference for performance reasons, so small amounts of outstanding I/O
114  * will not wipe the cache.
115  */
116 #define MINPIPESIZE (PIPE_SIZE/3)
117 
118 /*
119  * Limit the number of "big" pipes
120  */
121 #define LIMITBIGPIPES	32
122 unsigned int nbigpipe;
123 static unsigned int amountpipekva;
124 
125 struct pool pipe_pair_pool;
126 
127 int	dopipe(struct proc *, int *, int);
128 void	pipeselwakeup(struct pipe *);
129 
130 int	pipe_create(struct pipe *);
131 void	pipe_destroy(struct pipe *);
132 int	pipe_rundown(struct pipe *);
133 struct pipe *pipe_peer(struct pipe *);
134 int	pipe_buffer_realloc(struct pipe *, u_int);
135 void	pipe_buffer_free(struct pipe *);
136 
137 int	pipe_iolock(struct pipe *);
138 void	pipe_iounlock(struct pipe *);
139 int	pipe_iosleep(struct pipe *, const char *);
140 
141 struct pipe_pair *pipe_pair_create(void);
142 void	pipe_pair_destroy(struct pipe_pair *);
143 
144 /*
145  * The pipe system call for the DTYPE_PIPE type of pipes
146  */
147 
148 int
149 sys_pipe(struct proc *p, void *v, register_t *retval)
150 {
151 	struct sys_pipe_args /* {
152 		syscallarg(int *) fdp;
153 	} */ *uap = v;
154 
155 	return (dopipe(p, SCARG(uap, fdp), 0));
156 }
157 
158 int
159 sys_pipe2(struct proc *p, void *v, register_t *retval)
160 {
161 	struct sys_pipe2_args /* {
162 		syscallarg(int *) fdp;
163 		syscallarg(int) flags;
164 	} */ *uap = v;
165 
166 	if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK))
167 		return (EINVAL);
168 
169 	return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags)));
170 }
171 
172 int
173 dopipe(struct proc *p, int *ufds, int flags)
174 {
175 	struct filedesc *fdp = p->p_fd;
176 	struct file *rf, *wf;
177 	struct pipe_pair *pp;
178 	struct pipe *rpipe, *wpipe = NULL;
179 	int fds[2], cloexec, error;
180 
181 	cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
182 
183 	pp = pipe_pair_create();
184 	if (pp == NULL)
185 		return (ENOMEM);
186 	wpipe = &pp->pp_wpipe;
187 	rpipe = &pp->pp_rpipe;
188 
189 	fdplock(fdp);
190 
191 	error = falloc(p, &rf, &fds[0]);
192 	if (error != 0)
193 		goto free2;
194 	rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
195 	rf->f_type = DTYPE_PIPE;
196 	rf->f_data = rpipe;
197 	rf->f_ops = &pipeops;
198 
199 	error = falloc(p, &wf, &fds[1]);
200 	if (error != 0)
201 		goto free3;
202 	wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
203 	wf->f_type = DTYPE_PIPE;
204 	wf->f_data = wpipe;
205 	wf->f_ops = &pipeops;
206 
207 	fdinsert(fdp, fds[0], cloexec, rf);
208 	fdinsert(fdp, fds[1], cloexec, wf);
209 
210 	error = copyout(fds, ufds, sizeof(fds));
211 	if (error == 0) {
212 		fdpunlock(fdp);
213 #ifdef KTRACE
214 		if (KTRPOINT(p, KTR_STRUCT))
215 			ktrfds(p, fds, 2);
216 #endif
217 	} else {
218 		/* fdrelease() unlocks fdp. */
219 		fdrelease(p, fds[0]);
220 		fdplock(fdp);
221 		fdrelease(p, fds[1]);
222 	}
223 
224 	FRELE(rf, p);
225 	FRELE(wf, p);
226 	return (error);
227 
228 free3:
229 	fdremove(fdp, fds[0]);
230 	closef(rf, p);
231 	rpipe = NULL;
232 free2:
233 	fdpunlock(fdp);
234 	pipe_destroy(wpipe);
235 	pipe_destroy(rpipe);
236 	return (error);
237 }
238 
239 /*
240  * Allocate kva for pipe circular buffer, the space is pageable.
241  * This routine will 'realloc' the size of a pipe safely, if it fails
242  * it will retain the old buffer.
243  * If it fails it will return ENOMEM.
244  */
245 int
246 pipe_buffer_realloc(struct pipe *cpipe, u_int size)
247 {
248 	caddr_t buffer;
249 
250 	/* buffer uninitialized or pipe locked */
251 	KASSERT((cpipe->pipe_buffer.buffer == NULL) ||
252 	    (cpipe->pipe_state & PIPE_LOCK));
253 
254 	/* buffer should be empty */
255 	KASSERT(cpipe->pipe_buffer.cnt == 0);
256 
257 	KERNEL_LOCK();
258 	buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok);
259 	KERNEL_UNLOCK();
260 	if (buffer == NULL)
261 		return (ENOMEM);
262 
263 	/* free old resources if we are resizing */
264 	pipe_buffer_free(cpipe);
265 
266 	cpipe->pipe_buffer.buffer = buffer;
267 	cpipe->pipe_buffer.size = size;
268 	cpipe->pipe_buffer.in = 0;
269 	cpipe->pipe_buffer.out = 0;
270 
271 	atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size);
272 
273 	return (0);
274 }
275 
276 /*
277  * initialize and allocate VM and memory for pipe
278  */
279 int
280 pipe_create(struct pipe *cpipe)
281 {
282 	int error;
283 
284 	error = pipe_buffer_realloc(cpipe, PIPE_SIZE);
285 	if (error != 0)
286 		return (error);
287 
288 	sigio_init(&cpipe->pipe_sigio);
289 
290 	getnanotime(&cpipe->pipe_ctime);
291 	cpipe->pipe_atime = cpipe->pipe_ctime;
292 	cpipe->pipe_mtime = cpipe->pipe_ctime;
293 
294 	return (0);
295 }
296 
297 struct pipe *
298 pipe_peer(struct pipe *cpipe)
299 {
300 	struct pipe *peer;
301 
302 	rw_assert_anylock(cpipe->pipe_lock);
303 
304 	peer = cpipe->pipe_peer;
305 	if (peer == NULL || (peer->pipe_state & PIPE_EOF))
306 		return (NULL);
307 	return (peer);
308 }
309 
310 /*
311  * Lock a pipe for exclusive I/O access.
312  */
313 int
314 pipe_iolock(struct pipe *cpipe)
315 {
316 	int error;
317 
318 	rw_assert_wrlock(cpipe->pipe_lock);
319 
320 	while (cpipe->pipe_state & PIPE_LOCK) {
321 		cpipe->pipe_state |= PIPE_LWANT;
322 		error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH,
323 		    "pipeiolk", INFSLP);
324 		if (error)
325 			return (error);
326 	}
327 	cpipe->pipe_state |= PIPE_LOCK;
328 	return (0);
329 }
330 
331 /*
332  * Unlock a pipe I/O lock.
333  */
334 void
335 pipe_iounlock(struct pipe *cpipe)
336 {
337 	rw_assert_wrlock(cpipe->pipe_lock);
338 	KASSERT(cpipe->pipe_state & PIPE_LOCK);
339 
340 	cpipe->pipe_state &= ~PIPE_LOCK;
341 	if (cpipe->pipe_state & PIPE_LWANT) {
342 		cpipe->pipe_state &= ~PIPE_LWANT;
343 		wakeup(cpipe);
344 	}
345 }
346 
347 /*
348  * Unlock the pipe I/O lock and go to sleep. Returns 0 on success and the I/O
349  * lock is relocked. Otherwise if a signal was caught, non-zero is returned and
350  * the I/O lock is not locked.
351  *
352  * Any caller must obtain a reference to the pipe by incrementing `pipe_busy'
353  * before calling this function in order ensure that the same pipe is not
354  * destroyed while sleeping.
355  */
356 int
357 pipe_iosleep(struct pipe *cpipe, const char *wmesg)
358 {
359 	int error;
360 
361 	pipe_iounlock(cpipe);
362 	error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, wmesg,
363 	    INFSLP);
364 	if (error)
365 		return (error);
366 	return (pipe_iolock(cpipe));
367 }
368 
369 void
370 pipeselwakeup(struct pipe *cpipe)
371 {
372 	rw_assert_wrlock(cpipe->pipe_lock);
373 
374 	KNOTE(&cpipe->pipe_klist, 0);
375 
376 	if (cpipe->pipe_state & PIPE_ASYNC)
377 		pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
378 }
379 
380 int
381 pipe_read(struct file *fp, struct uio *uio, int fflags)
382 {
383 	struct pipe *rpipe = fp->f_data;
384 	size_t nread = 0, size;
385 	int error;
386 
387 	rw_enter_write(rpipe->pipe_lock);
388 	++rpipe->pipe_busy;
389 	error = pipe_iolock(rpipe);
390 	if (error) {
391 		--rpipe->pipe_busy;
392 		pipe_rundown(rpipe);
393 		rw_exit_write(rpipe->pipe_lock);
394 		return (error);
395 	}
396 
397 	while (uio->uio_resid) {
398 		/* Normal pipe buffer receive. */
399 		if (rpipe->pipe_buffer.cnt > 0) {
400 			size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
401 			if (size > rpipe->pipe_buffer.cnt)
402 				size = rpipe->pipe_buffer.cnt;
403 			if (size > uio->uio_resid)
404 				size = uio->uio_resid;
405 			rw_exit_write(rpipe->pipe_lock);
406 			error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
407 					size, uio);
408 			rw_enter_write(rpipe->pipe_lock);
409 			if (error) {
410 				break;
411 			}
412 			rpipe->pipe_buffer.out += size;
413 			if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
414 				rpipe->pipe_buffer.out = 0;
415 
416 			rpipe->pipe_buffer.cnt -= size;
417 			/*
418 			 * If there is no more to read in the pipe, reset
419 			 * its pointers to the beginning.  This improves
420 			 * cache hit stats.
421 			 */
422 			if (rpipe->pipe_buffer.cnt == 0) {
423 				rpipe->pipe_buffer.in = 0;
424 				rpipe->pipe_buffer.out = 0;
425 			}
426 			nread += size;
427 		} else {
428 			/*
429 			 * detect EOF condition
430 			 * read returns 0 on EOF, no need to set error
431 			 */
432 			if (rpipe->pipe_state & PIPE_EOF)
433 				break;
434 
435 			/* If the "write-side" has been blocked, wake it up. */
436 			if (rpipe->pipe_state & PIPE_WANTW) {
437 				rpipe->pipe_state &= ~PIPE_WANTW;
438 				wakeup(rpipe);
439 			}
440 
441 			/* Break if some data was read. */
442 			if (nread > 0)
443 				break;
444 
445 			/* Handle non-blocking mode operation. */
446 			if (fp->f_flag & FNONBLOCK) {
447 				error = EAGAIN;
448 				break;
449 			}
450 
451 			/* Wait for more data. */
452 			rpipe->pipe_state |= PIPE_WANTR;
453 			error = pipe_iosleep(rpipe, "piperd");
454 			if (error)
455 				goto unlocked_error;
456 		}
457 	}
458 	pipe_iounlock(rpipe);
459 
460 	if (error == 0)
461 		getnanotime(&rpipe->pipe_atime);
462 unlocked_error:
463 	--rpipe->pipe_busy;
464 
465 	if (pipe_rundown(rpipe) == 0 && rpipe->pipe_buffer.cnt < MINPIPESIZE) {
466 		/* Handle write blocking hysteresis. */
467 		if (rpipe->pipe_state & PIPE_WANTW) {
468 			rpipe->pipe_state &= ~PIPE_WANTW;
469 			wakeup(rpipe);
470 		}
471 	}
472 
473 	if (rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt >= PIPE_BUF)
474 		pipeselwakeup(rpipe);
475 
476 	rw_exit_write(rpipe->pipe_lock);
477 	return (error);
478 }
479 
480 int
481 pipe_write(struct file *fp, struct uio *uio, int fflags)
482 {
483 	struct pipe *rpipe = fp->f_data, *wpipe;
484 	struct rwlock *lock = rpipe->pipe_lock;
485 	size_t orig_resid;
486 	int error;
487 
488 	rw_enter_write(lock);
489 	wpipe = pipe_peer(rpipe);
490 
491 	/* Detect loss of pipe read side, issue SIGPIPE if lost. */
492 	if (wpipe == NULL) {
493 		rw_exit_write(lock);
494 		return (EPIPE);
495 	}
496 
497 	++wpipe->pipe_busy;
498 	error = pipe_iolock(wpipe);
499 	if (error) {
500 		--wpipe->pipe_busy;
501 		pipe_rundown(wpipe);
502 		rw_exit_write(lock);
503 		return (error);
504 	}
505 
506 
507 	/* If it is advantageous to resize the pipe buffer, do so. */
508 	if (uio->uio_resid > PIPE_SIZE &&
509 	    wpipe->pipe_buffer.size <= PIPE_SIZE &&
510 	    wpipe->pipe_buffer.cnt == 0) {
511 	    	unsigned int npipe;
512 
513 		npipe = atomic_inc_int_nv(&nbigpipe);
514 		if (npipe > LIMITBIGPIPES ||
515 		    pipe_buffer_realloc(wpipe, BIG_PIPE_SIZE) != 0)
516 			atomic_dec_int(&nbigpipe);
517 	}
518 
519 	orig_resid = uio->uio_resid;
520 
521 	while (uio->uio_resid) {
522 		size_t space;
523 
524 		if (wpipe->pipe_state & PIPE_EOF) {
525 			error = EPIPE;
526 			break;
527 		}
528 
529 		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
530 
531 		/* Writes of size <= PIPE_BUF must be atomic. */
532 		if (space < uio->uio_resid && orig_resid <= PIPE_BUF)
533 			space = 0;
534 
535 		if (space > 0) {
536 			size_t size;	/* Transfer size */
537 			size_t segsize;	/* first segment to transfer */
538 
539 			/*
540 			 * Transfer size is minimum of uio transfer
541 			 * and free space in pipe buffer.
542 			 */
543 			if (space > uio->uio_resid)
544 				size = uio->uio_resid;
545 			else
546 				size = space;
547 			/*
548 			 * First segment to transfer is minimum of
549 			 * transfer size and contiguous space in
550 			 * pipe buffer.  If first segment to transfer
551 			 * is less than the transfer size, we've got
552 			 * a wraparound in the buffer.
553 			 */
554 			segsize = wpipe->pipe_buffer.size -
555 				wpipe->pipe_buffer.in;
556 			if (segsize > size)
557 				segsize = size;
558 
559 			/* Transfer first segment */
560 
561 			rw_exit_write(lock);
562 			error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
563 					segsize, uio);
564 			rw_enter_write(lock);
565 
566 			if (error == 0 && segsize < size) {
567 				/*
568 				 * Transfer remaining part now, to
569 				 * support atomic writes.  Wraparound
570 				 * happened.
571 				 */
572 #ifdef DIAGNOSTIC
573 				if (wpipe->pipe_buffer.in + segsize !=
574 				    wpipe->pipe_buffer.size)
575 					panic("Expected pipe buffer wraparound disappeared");
576 #endif
577 
578 				rw_exit_write(lock);
579 				error = uiomove(&wpipe->pipe_buffer.buffer[0],
580 						size - segsize, uio);
581 				rw_enter_write(lock);
582 			}
583 			if (error == 0) {
584 				wpipe->pipe_buffer.in += size;
585 				if (wpipe->pipe_buffer.in >=
586 				    wpipe->pipe_buffer.size) {
587 #ifdef DIAGNOSTIC
588 					if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size)
589 						panic("Expected wraparound bad");
590 #endif
591 					wpipe->pipe_buffer.in = size - segsize;
592 				}
593 
594 				wpipe->pipe_buffer.cnt += size;
595 #ifdef DIAGNOSTIC
596 				if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size)
597 					panic("Pipe buffer overflow");
598 #endif
599 			}
600 			if (error)
601 				break;
602 		} else {
603 			/* If the "read-side" has been blocked, wake it up. */
604 			if (wpipe->pipe_state & PIPE_WANTR) {
605 				wpipe->pipe_state &= ~PIPE_WANTR;
606 				wakeup(wpipe);
607 			}
608 
609 			/* Don't block on non-blocking I/O. */
610 			if (fp->f_flag & FNONBLOCK) {
611 				error = EAGAIN;
612 				break;
613 			}
614 
615 			/*
616 			 * We have no more space and have something to offer,
617 			 * wake up select/poll.
618 			 */
619 			pipeselwakeup(wpipe);
620 
621 			wpipe->pipe_state |= PIPE_WANTW;
622 			error = pipe_iosleep(wpipe, "pipewr");
623 			if (error)
624 				goto unlocked_error;
625 
626 			/*
627 			 * If read side wants to go away, we just issue a
628 			 * signal to ourselves.
629 			 */
630 			if (wpipe->pipe_state & PIPE_EOF) {
631 				error = EPIPE;
632 				break;
633 			}
634 		}
635 	}
636 	pipe_iounlock(wpipe);
637 
638 unlocked_error:
639 	--wpipe->pipe_busy;
640 
641 	if (pipe_rundown(wpipe) == 0 && wpipe->pipe_buffer.cnt > 0) {
642 		/*
643 		 * If we have put any characters in the buffer, we wake up
644 		 * the reader.
645 		 */
646 		if (wpipe->pipe_state & PIPE_WANTR) {
647 			wpipe->pipe_state &= ~PIPE_WANTR;
648 			wakeup(wpipe);
649 		}
650 	}
651 
652 	/* Don't return EPIPE if I/O was successful. */
653 	if (wpipe->pipe_buffer.cnt == 0 &&
654 	    uio->uio_resid == 0 &&
655 	    error == EPIPE) {
656 		error = 0;
657 	}
658 
659 	if (error == 0)
660 		getnanotime(&wpipe->pipe_mtime);
661 	/* We have something to offer, wake up select/poll. */
662 	if (wpipe->pipe_buffer.cnt)
663 		pipeselwakeup(wpipe);
664 
665 	rw_exit_write(lock);
666 	return (error);
667 }
668 
669 /*
670  * we implement a very minimal set of ioctls for compatibility with sockets.
671  */
672 int
673 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p)
674 {
675 	struct pipe *mpipe = fp->f_data;
676 	int error = 0;
677 
678 	switch (cmd) {
679 
680 	case FIONBIO:
681 		break;
682 
683 	case FIOASYNC:
684 		rw_enter_write(mpipe->pipe_lock);
685 		if (*(int *)data) {
686 			mpipe->pipe_state |= PIPE_ASYNC;
687 		} else {
688 			mpipe->pipe_state &= ~PIPE_ASYNC;
689 		}
690 		rw_exit_write(mpipe->pipe_lock);
691 		break;
692 
693 	case FIONREAD:
694 		rw_enter_read(mpipe->pipe_lock);
695 		*(int *)data = mpipe->pipe_buffer.cnt;
696 		rw_exit_read(mpipe->pipe_lock);
697 		break;
698 
699 	case FIOSETOWN:
700 	case SIOCSPGRP:
701 	case TIOCSPGRP:
702 		error = sigio_setown(&mpipe->pipe_sigio, cmd, data);
703 		break;
704 
705 	case FIOGETOWN:
706 	case SIOCGPGRP:
707 	case TIOCGPGRP:
708 		sigio_getown(&mpipe->pipe_sigio, cmd, data);
709 		break;
710 
711 	default:
712 		error = ENOTTY;
713 	}
714 
715 	return (error);
716 }
717 
718 int
719 pipe_stat(struct file *fp, struct stat *ub, struct proc *p)
720 {
721 	struct pipe *pipe = fp->f_data;
722 
723 	memset(ub, 0, sizeof(*ub));
724 
725 	rw_enter_read(pipe->pipe_lock);
726 	ub->st_mode = S_IFIFO;
727 	ub->st_blksize = pipe->pipe_buffer.size;
728 	ub->st_size = pipe->pipe_buffer.cnt;
729 	ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
730 	ub->st_atim.tv_sec  = pipe->pipe_atime.tv_sec;
731 	ub->st_atim.tv_nsec = pipe->pipe_atime.tv_nsec;
732 	ub->st_mtim.tv_sec  = pipe->pipe_mtime.tv_sec;
733 	ub->st_mtim.tv_nsec = pipe->pipe_mtime.tv_nsec;
734 	ub->st_ctim.tv_sec  = pipe->pipe_ctime.tv_sec;
735 	ub->st_ctim.tv_nsec = pipe->pipe_ctime.tv_nsec;
736 	ub->st_uid = fp->f_cred->cr_uid;
737 	ub->st_gid = fp->f_cred->cr_gid;
738 	rw_exit_read(pipe->pipe_lock);
739 	/*
740 	 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
741 	 * XXX (st_dev, st_ino) should be unique.
742 	 */
743 	return (0);
744 }
745 
746 int
747 pipe_close(struct file *fp, struct proc *p)
748 {
749 	struct pipe *cpipe = fp->f_data;
750 
751 	fp->f_ops = NULL;
752 	fp->f_data = NULL;
753 	pipe_destroy(cpipe);
754 	return (0);
755 }
756 
757 /*
758  * Free kva for pipe circular buffer.
759  * No pipe lock check as only called from pipe_buffer_realloc() and pipeclose()
760  */
761 void
762 pipe_buffer_free(struct pipe *cpipe)
763 {
764 	u_int size;
765 
766 	if (cpipe->pipe_buffer.buffer == NULL)
767 		return;
768 
769 	size = cpipe->pipe_buffer.size;
770 
771 	KERNEL_LOCK();
772 	km_free(cpipe->pipe_buffer.buffer, size, &kv_any, &kp_pageable);
773 	KERNEL_UNLOCK();
774 
775 	cpipe->pipe_buffer.buffer = NULL;
776 
777 	atomic_sub_int(&amountpipekva, size);
778 	if (size > PIPE_SIZE)
779 		atomic_dec_int(&nbigpipe);
780 }
781 
782 /*
783  * shutdown the pipe, and free resources.
784  */
785 void
786 pipe_destroy(struct pipe *cpipe)
787 {
788 	struct pipe *ppipe;
789 
790 	if (cpipe == NULL)
791 		return;
792 
793 	rw_enter_write(cpipe->pipe_lock);
794 
795 	pipeselwakeup(cpipe);
796 	sigio_free(&cpipe->pipe_sigio);
797 
798 	/*
799 	 * If the other side is blocked, wake it up saying that
800 	 * we want to close it down.
801 	 */
802 	cpipe->pipe_state |= PIPE_EOF;
803 	while (cpipe->pipe_busy) {
804 		wakeup(cpipe);
805 		cpipe->pipe_state |= PIPE_WANTD;
806 		rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO, "pipecl", INFSLP);
807 	}
808 
809 	/* Disconnect from peer. */
810 	if ((ppipe = cpipe->pipe_peer) != NULL) {
811 		pipeselwakeup(ppipe);
812 
813 		ppipe->pipe_state |= PIPE_EOF;
814 		wakeup(ppipe);
815 		ppipe->pipe_peer = NULL;
816 	}
817 
818 	pipe_buffer_free(cpipe);
819 
820 	rw_exit_write(cpipe->pipe_lock);
821 
822 	if (ppipe == NULL)
823 		pipe_pair_destroy(cpipe->pipe_pair);
824 }
825 
826 /*
827  * Returns non-zero if a rundown is currently ongoing.
828  */
829 int
830 pipe_rundown(struct pipe *cpipe)
831 {
832 	rw_assert_wrlock(cpipe->pipe_lock);
833 
834 	if (cpipe->pipe_busy > 0 || (cpipe->pipe_state & PIPE_WANTD) == 0)
835 		return (0);
836 
837 	/* Only wakeup pipe_destroy() once the pipe is no longer busy. */
838 	cpipe->pipe_state &= ~(PIPE_WANTD | PIPE_WANTR | PIPE_WANTW);
839 	wakeup(cpipe);
840 	return (1);
841 }
842 
843 int
844 pipe_kqfilter(struct file *fp, struct knote *kn)
845 {
846 	struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
847 	struct rwlock *lock = rpipe->pipe_lock;
848 	int error = 0;
849 
850 	rw_enter_write(lock);
851 	wpipe = pipe_peer(rpipe);
852 
853 	switch (kn->kn_filter) {
854 	case EVFILT_READ:
855 		kn->kn_fop = &pipe_rfiltops;
856 		kn->kn_hook = rpipe;
857 		klist_insert_locked(&rpipe->pipe_klist, kn);
858 		break;
859 	case EVFILT_WRITE:
860 		if (wpipe == NULL) {
861 			/* other end of pipe has been closed */
862 			error = EPIPE;
863 			break;
864 		}
865 		kn->kn_fop = &pipe_wfiltops;
866 		kn->kn_hook = wpipe;
867 		klist_insert_locked(&wpipe->pipe_klist, kn);
868 		break;
869 	case EVFILT_EXCEPT:
870 		if (kn->kn_flags & __EV_SELECT) {
871 			/* Prevent triggering exceptfds. */
872 			error = EPERM;
873 			break;
874 		}
875 		if ((kn->kn_flags & __EV_POLL) == 0) {
876 			/* Disallow usage through kevent(2). */
877 			error = EINVAL;
878 			break;
879 		}
880 		kn->kn_fop = &pipe_efiltops;
881 		kn->kn_hook = rpipe;
882 		klist_insert_locked(&rpipe->pipe_klist, kn);
883 		break;
884 	default:
885 		error = EINVAL;
886 	}
887 
888 	rw_exit_write(lock);
889 
890 	return (error);
891 }
892 
893 void
894 filt_pipedetach(struct knote *kn)
895 {
896 	struct pipe *cpipe = kn->kn_hook;
897 
898 	klist_remove(&cpipe->pipe_klist, kn);
899 }
900 
901 int
902 filt_piperead(struct knote *kn, long hint)
903 {
904 	struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
905 
906 	rw_assert_wrlock(rpipe->pipe_lock);
907 
908 	wpipe = pipe_peer(rpipe);
909 
910 	kn->kn_data = rpipe->pipe_buffer.cnt;
911 
912 	if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) {
913 		kn->kn_flags |= EV_EOF;
914 		if (kn->kn_flags & __EV_POLL)
915 			kn->kn_flags |= __EV_HUP;
916 		return (1);
917 	}
918 
919 	return (kn->kn_data > 0);
920 }
921 
922 int
923 filt_pipewrite(struct knote *kn, long hint)
924 {
925 	struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
926 
927 	rw_assert_wrlock(rpipe->pipe_lock);
928 
929 	wpipe = pipe_peer(rpipe);
930 
931 	if (wpipe == NULL) {
932 		kn->kn_data = 0;
933 		kn->kn_flags |= EV_EOF;
934 		if (kn->kn_flags & __EV_POLL)
935 			kn->kn_flags |= __EV_HUP;
936 		return (1);
937 	}
938 	kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
939 
940 	return (kn->kn_data >= PIPE_BUF);
941 }
942 
943 int
944 filt_pipeexcept(struct knote *kn, long hint)
945 {
946 	struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
947 	int active = 0;
948 
949 	rw_assert_wrlock(rpipe->pipe_lock);
950 
951 	wpipe = pipe_peer(rpipe);
952 
953 	if (kn->kn_flags & __EV_POLL) {
954 		if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) {
955 			kn->kn_flags |= __EV_HUP;
956 			active = 1;
957 		}
958 	}
959 
960 	return (active);
961 }
962 
963 int
964 filt_pipemodify(struct kevent *kev, struct knote *kn)
965 {
966 	struct pipe *rpipe = kn->kn_fp->f_data;
967 	int active;
968 
969 	rw_enter_write(rpipe->pipe_lock);
970 	active = knote_modify(kev, kn);
971 	rw_exit_write(rpipe->pipe_lock);
972 
973 	return (active);
974 }
975 
976 int
977 filt_pipeprocess(struct knote *kn, struct kevent *kev)
978 {
979 	struct pipe *rpipe = kn->kn_fp->f_data;
980 	int active;
981 
982 	rw_enter_write(rpipe->pipe_lock);
983 	active = knote_process(kn, kev);
984 	rw_exit_write(rpipe->pipe_lock);
985 
986 	return (active);
987 }
988 
989 void
990 pipe_init(void)
991 {
992 	pool_init(&pipe_pair_pool, sizeof(struct pipe_pair), 0, IPL_MPFLOOR,
993 	    PR_WAITOK, "pipepl", NULL);
994 }
995 
996 struct pipe_pair *
997 pipe_pair_create(void)
998 {
999 	struct pipe_pair *pp;
1000 
1001 	pp = pool_get(&pipe_pair_pool, PR_WAITOK | PR_ZERO);
1002 	pp->pp_wpipe.pipe_pair = pp;
1003 	pp->pp_rpipe.pipe_pair = pp;
1004 	pp->pp_wpipe.pipe_peer = &pp->pp_rpipe;
1005 	pp->pp_rpipe.pipe_peer = &pp->pp_wpipe;
1006 	/*
1007 	 * One lock is used per pipe pair in order to obtain exclusive access to
1008 	 * the pipe pair.
1009 	 */
1010 	rw_init(&pp->pp_lock, "pipelk");
1011 	pp->pp_wpipe.pipe_lock = &pp->pp_lock;
1012 	pp->pp_rpipe.pipe_lock = &pp->pp_lock;
1013 
1014 	klist_init_rwlock(&pp->pp_wpipe.pipe_klist, &pp->pp_lock);
1015 	klist_init_rwlock(&pp->pp_rpipe.pipe_klist, &pp->pp_lock);
1016 
1017 	if (pipe_create(&pp->pp_wpipe) || pipe_create(&pp->pp_rpipe))
1018 		goto err;
1019 	return (pp);
1020 err:
1021 	pipe_destroy(&pp->pp_wpipe);
1022 	pipe_destroy(&pp->pp_rpipe);
1023 	return (NULL);
1024 }
1025 
1026 void
1027 pipe_pair_destroy(struct pipe_pair *pp)
1028 {
1029 	klist_free(&pp->pp_wpipe.pipe_klist);
1030 	klist_free(&pp->pp_rpipe.pipe_klist);
1031 	pool_put(&pipe_pair_pool, pp);
1032 }
1033