xref: /minix3/minix/servers/vfs/select.c (revision dd41186aac5f9c05e657f127b7e5d33f375d1686)
1 /* Implement entry point to select system call.
2  *
3  * The entry points into this file are
4  *   do_select:	       perform the SELECT system call
5  *   select_callback:  notify select system of possible fd operation
6  *   select_unsuspend_by_endpt: cancel a blocking select on exiting driver
7  *
8  * The select code uses minimal locking, so that the replies from character
9  * drivers can be processed without blocking. Filps are locked only for pipes.
10  * We make the assumption that any other structures and fields are safe to
11  * check (and possibly change) as long as we know that a process is blocked on
12  * a select(2) call, meaning that all involved filps are guaranteed to stay
13  * open until either we finish the select call, it the process gets interrupted
14  * by a signal.
15  */
16 
17 #include "fs.h"
18 #include <sys/fcntl.h>
19 #include <sys/time.h>
20 #include <sys/select.h>
21 #include <sys/stat.h>
22 #include <minix/callnr.h>
23 #include <minix/u64.h>
24 #include <string.h>
25 #include <assert.h>
26 
27 #include "file.h"
28 #include "vnode.h"
29 
30 /* max. number of simultaneously pending select() calls */
31 #define MAXSELECTS 25
32 #define FROM_PROC 0
33 #define TO_PROC   1
34 
35 #define USECPERSEC 1000000	/* number of microseconds in a second */
36 
37 typedef fd_set *ixfer_fd_set_ptr;
38 
39 static struct selectentry {
40   struct fproc *requestor;	/* slot is free iff this is NULL */
41   endpoint_t req_endpt;
42   fd_set readfds, writefds, errorfds;
43   fd_set ready_readfds, ready_writefds, ready_errorfds;
44   ixfer_fd_set_ptr vir_readfds, vir_writefds, vir_errorfds;
45   struct filp *filps[OPEN_MAX];
46   int type[OPEN_MAX];
47   int nfds, nreadyfds;
48   int error;
49   char block;
50   char starting;
51   clock_t expiry;
52   minix_timer_t timer;	/* if expiry > 0 */
53 } selecttab[MAXSELECTS];
54 
55 static int copy_fdsets(struct selectentry *se, int nfds, int direction);
56 static void filp_status(struct filp *fp, int status);
57 static int is_deferred(struct selectentry *se);
58 static void restart_proc(struct selectentry *se);
59 static void ops2tab(int ops, int fd, struct selectentry *e);
60 static int is_regular_file(struct filp *f);
61 static int is_pipe(struct filp *f);
62 static int is_char_device(struct filp *f);
63 static void select_lock_filp(struct filp *f, int ops);
64 static int select_request_file(struct filp *f, int *ops, int block,
65 	struct fproc *rfp);
66 static int select_request_char(struct filp *f, int *ops, int block,
67 	struct fproc *rfp);
68 static int select_request_pipe(struct filp *f, int *ops, int block,
69 	struct fproc *rfp);
70 static void select_cancel_all(struct selectentry *e);
71 static void select_cancel_filp(struct filp *f);
72 static void select_return(struct selectentry *);
73 static void select_restart_filps(void);
74 static int tab2ops(int fd, struct selectentry *e);
75 static void wipe_select(struct selectentry *s);
76 void select_timeout_check(int s);
77 
78 static struct fdtype {
79 	int (*select_request)(struct filp *, int *ops, int block,
80 		struct fproc *rfp);
81 	int (*type_match)(struct filp *f);
82 } fdtypes[] = {
83 	{ select_request_char, is_char_device },
84 	{ select_request_file, is_regular_file },
85 	{ select_request_pipe, is_pipe },
86 };
87 #define SEL_FDS		(sizeof(fdtypes) / sizeof(fdtypes[0]))
88 
89 /*===========================================================================*
90  *				do_select				     *
91  *===========================================================================*/
92 int do_select(void)
93 {
94 /* Implement the select(nfds, readfds, writefds, errorfds, timeout) system
95  * call. First we copy the arguments and verify their sanity. Then we check
96  * whether there are file descriptors that satisfy the select call right off
97  * the bat. If so, or if there are no ready file descriptors but the process
98  * requested to return immediately, we return the result. Otherwise we set a
99  * timeout and wait for either the file descriptors to become ready or the
100  * timer to go off. If no timeout value was provided, we wait indefinitely.
101  */
102   int r, nfds, do_timeout, fd, s;
103   struct filp *f;
104   unsigned int type, ops;
105   struct timeval timeout;
106   struct selectentry *se;
107   vir_bytes vtimeout;
108   clock_t ticks;
109 
110   nfds = job_m_in.m_lc_vfs_select.nfds;
111   vtimeout = job_m_in.m_lc_vfs_select.timeout;
112 
113   /* Sane amount of file descriptors? */
114   if (nfds < 0 || nfds > OPEN_MAX) return(EINVAL);
115 
116   /* Find a slot to store this select request */
117   for (s = 0; s < MAXSELECTS; s++)
118 	if (selecttab[s].requestor == NULL) /* Unused slot */
119 		break;
120   if (s >= MAXSELECTS) return(ENOSPC);
121 
122   se = &selecttab[s];
123   wipe_select(se);	/* Clear results of previous usage */
124   se->requestor = fp;
125   se->req_endpt = who_e;
126   se->vir_readfds = job_m_in.m_lc_vfs_select.readfds;
127   se->vir_writefds = job_m_in.m_lc_vfs_select.writefds;
128   se->vir_errorfds = job_m_in.m_lc_vfs_select.errorfds;
129 
130   /* Copy fdsets from the process */
131   if ((r = copy_fdsets(se, nfds, FROM_PROC)) != OK) {
132 	se->requestor = NULL;
133 	return(r);
134   }
135 
136   /* Did the process set a timeout value? If so, retrieve it. */
137   if (vtimeout != 0) {
138 	r = sys_datacopy_wrapper(who_e, vtimeout, SELF, (vir_bytes) &timeout,
139 		sizeof(timeout));
140 
141 	/* No nonsense in the timeval */
142 	if (r == OK && (timeout.tv_sec < 0 || timeout.tv_usec < 0 ||
143 	    timeout.tv_usec >= USECPERSEC))
144 		r = EINVAL;
145 
146 	if (r != OK) {
147 		se->requestor = NULL;
148 		return(r);
149 	}
150 	do_timeout = 1;
151   } else
152 	do_timeout = 0;
153 
154   /* If there is no timeout, we block forever. Otherwise, we block up to the
155    * specified time interval.
156    */
157   if (!do_timeout)	/* No timeout value set */
158 	se->block = 1;
159   else if (do_timeout && (timeout.tv_sec > 0 || timeout.tv_usec > 0))
160 	se->block = 1;
161   else			/* timeout set as (0,0) - this effects a poll */
162 	se->block = 0;
163   se->expiry = 0;	/* no timer set (yet) */
164 
165   /* We are going to lock filps, and that means that while locking a second
166    * filp, we might already get the results for the first one. In that case,
167    * the incoming results must not cause the select call to finish prematurely.
168    */
169   se->starting = TRUE;
170 
171   /* Verify that file descriptors are okay to select on */
172   for (fd = 0; fd < nfds; fd++) {
173 	/* Because the select() interface implicitly includes file descriptors
174 	 * you might not want to select on, we have to figure out whether we're
175 	 * interested in them. Typically, these file descriptors include fd's
176 	 * inherited from the parent proc and file descriptors that have been
177 	 * close()d, but had a lower fd than one in the current set.
178 	 */
179 	if (!(ops = tab2ops(fd, se)))
180 		continue; /* No operations set; nothing to do for this fd */
181 
182 	/* Get filp belonging to this fd */
183 	f = se->filps[fd] = get_filp(fd, VNODE_READ);
184 	if (f == NULL) {
185 		if (err_code == EBADF)
186 			r = err_code;
187 		else /* File descriptor is 'ready' to return EIO */
188 			r = EINTR;
189 
190 		se->requestor = NULL;
191 		return(r);
192 	}
193 
194 	/* Check file types. According to POSIX 2008:
195 	 * "The pselect() and select() functions shall support regular files,
196 	 * terminal and pseudo-terminal devices, FIFOs, pipes, and sockets. The
197 	 * behavior of pselect() and select() on file descriptors that refer to
198 	 * other types of file is unspecified."
199 	 *
200 	 * In our case, terminal and pseudo-terminal devices are handled by the
201 	 * TTY major and sockets by either INET major (socket type AF_INET) or
202 	 * UDS major (socket type AF_UNIX). Additionally, we give other
203 	 * character drivers the chance to handle select for any of their
204 	 * device nodes. Some may not implement support for select and let
205 	 * libchardriver return EBADF, which we then pass to the calling
206 	 * process once we receive the reply.
207 	 */
208 	se->type[fd] = -1;
209 	for (type = 0; type < SEL_FDS; type++) {
210 		if (fdtypes[type].type_match(f)) {
211 			se->type[fd] = type;
212 			se->nfds = fd+1;
213 			se->filps[fd]->filp_selectors++;
214 			break;
215 		}
216 	}
217 	unlock_filp(f);
218 	if (se->type[fd] == -1) { /* Type not found */
219 		se->requestor = NULL;
220 		return(EBADF);
221 	}
222   }
223 
224   /* Check all file descriptors in the set whether one is 'ready' now */
225   for (fd = 0; fd < nfds; fd++) {
226 	/* Again, check for involuntarily selected fd's */
227 	if (!(ops = tab2ops(fd, se)))
228 		continue; /* No operations set; nothing to do for this fd */
229 
230 	/* File descriptors selected for reading that are not opened for
231 	 * reading should be marked as readable, as read calls would fail
232 	 * immediately. The same applies to writing.
233 	 */
234 	f = se->filps[fd];
235 	if ((ops & SEL_RD) && !(f->filp_mode & R_BIT)) {
236 		ops2tab(SEL_RD, fd, se);
237 		ops &= ~SEL_RD;
238 	}
239 	if ((ops & SEL_WR) && !(f->filp_mode & W_BIT)) {
240 		ops2tab(SEL_WR, fd, se);
241 		ops &= ~SEL_WR;
242 	}
243 	/* Test filp for select operations if not already done so. e.g.,
244 	 * processes sharing a filp and both doing a select on that filp. */
245 	if ((f->filp_select_ops & ops) != ops) {
246 		int wantops;
247 
248 		wantops = (f->filp_select_ops |= ops);
249 		type = se->type[fd];
250 		select_lock_filp(f, wantops);
251 		r = fdtypes[type].select_request(f, &wantops, se->block, fp);
252 		unlock_filp(f);
253 		if (r != OK && r != SUSPEND) {
254 			se->error = r;
255 			break; /* Error or bogus return code; abort */
256 		}
257 
258 		/* The select request above might have turned on/off some
259 		 * operations because they were 'ready' or not meaningful.
260 		 * Either way, we might have a result and we need to store them
261 		 * in the select table entry. */
262 		if (wantops & ops) ops2tab(wantops, fd, se);
263 	}
264   }
265 
266   /* At this point there won't be any blocking calls anymore. */
267   se->starting = FALSE;
268 
269   if ((se->nreadyfds > 0 || se->error != OK || !se->block) &&
270 		!is_deferred(se)) {
271 	/* An error occurred, or fd's were found that were ready to go right
272 	 * away, and/or we were instructed not to block at all. Must return
273 	 * immediately. Do not copy FD sets if an error occurred.
274 	 */
275 	if (se->error != OK)
276 		r = se->error;
277 	else
278 		r = copy_fdsets(se, se->nfds, TO_PROC);
279 	select_cancel_all(se);
280 	se->requestor = NULL;
281 
282 	if (r != OK)
283 		return(r);
284 	return(se->nreadyfds);
285   }
286 
287   /* Convert timeval to ticks and set the timer. If it fails, undo
288    * all, return error.
289    */
290   if (do_timeout && se->block) {
291 	/* Open Group:
292 	 * "If the requested timeout interval requires a finer
293 	 * granularity than the implementation supports, the
294 	 * actual timeout interval shall be rounded up to the next
295 	 * supported value."
296 	 */
297 	if (timeout.tv_sec >= (TMRDIFF_MAX - 1) / system_hz) {
298 		ticks = TMRDIFF_MAX; /* silently truncate */
299 	} else {
300 		ticks = timeout.tv_sec * system_hz +
301 		    (timeout.tv_usec * system_hz + USECPERSEC-1) / USECPERSEC;
302 	}
303 	assert(ticks != 0 && ticks <= TMRDIFF_MAX);
304 	se->expiry = ticks;
305 	set_timer(&se->timer, ticks, select_timeout_check, s);
306   }
307 
308   /* process now blocked */
309   suspend(FP_BLOCKED_ON_SELECT);
310   return(SUSPEND);
311 }
312 
313 /*===========================================================================*
314  *				is_deferred				     *
315  *===========================================================================*/
316 static int is_deferred(struct selectentry *se)
317 {
318 /* Find out whether this select has pending initial replies */
319 
320   int fd;
321   struct filp *f;
322 
323   /* The select call must have finished its initialization at all. */
324   if (se->starting) return(TRUE);
325 
326   for (fd = 0; fd < se->nfds; fd++) {
327 	if ((f = se->filps[fd]) == NULL) continue;
328 	if (f->filp_select_flags & (FSF_UPDATE|FSF_BUSY)) return(TRUE);
329   }
330 
331   return(FALSE);
332 }
333 
334 
335 /*===========================================================================*
336  *				is_regular_file				     *
337  *===========================================================================*/
338 static int is_regular_file(struct filp *f)
339 {
340   return(f && f->filp_vno && S_ISREG(f->filp_vno->v_mode));
341 }
342 
343 /*===========================================================================*
344  *				is_pipe					     *
345  *===========================================================================*/
346 static int is_pipe(struct filp *f)
347 {
348 /* Recognize either anonymous pipe or named pipe (FIFO) */
349   return(f && f->filp_vno && S_ISFIFO(f->filp_vno->v_mode));
350 }
351 
352 /*===========================================================================*
353  *				is_char_device				     *
354  *===========================================================================*/
355 static int is_char_device(struct filp *f)
356 {
357 /* See if this filp is a handle on a character device. This function MUST NOT
358  * block its calling thread. The given filp may or may not be locked.
359  */
360 
361   return (f && f->filp_vno && S_ISCHR(f->filp_vno->v_mode));
362 }
363 
364 /*===========================================================================*
365  *				select_request_char			     *
366  *===========================================================================*/
367 static int select_request_char(struct filp *f, int *ops, int block,
368 	struct fproc *rfp)
369 {
370 /* Check readiness status on a character device. Unless suitable results are
371  * available right now, this will only initiate the polling process, causing
372  * result processing to be deferred. This function MUST NOT block its calling
373  * thread. The given filp may or may not be locked.
374  */
375   dev_t dev;
376   int r, rops;
377   struct dmap *dp;
378 
379   /* Start by remapping the device node number to a "real" device number. Those
380    * two are different only for CTTY_MAJOR aka /dev/tty, but that one single
381    * exception requires quite some extra effort here: the select code matches
382    * character driver replies to their requests based on the device number, so
383    * it needs to be aware that device numbers may be mapped. The idea is to
384    * perform the mapping once and store the result in the filp object, so that
385    * at least we don't run into problems when a process loses its controlling
386    * terminal while doing a select (see also free_proc). It should be noted
387    * that it is possible that multiple processes share the same /dev/tty filp,
388    * and they may not all have a controlling terminal. The ctty-less processes
389    * should never pass the mapping; a more problematic case is checked below.
390    *
391    * The cdev_map call also checks the major number for rough validity, so that
392    * we can use it to index the dmap array safely a bit later.
393    */
394   if ((dev = cdev_map(f->filp_vno->v_sdev, rfp)) == NO_DEV)
395 	return(ENXIO);
396 
397   if (f->filp_char_select_dev != NO_DEV && f->filp_char_select_dev != dev) {
398 	/* Currently, this case can occur as follows: a process with a
399 	 * controlling terminal opens /dev/tty and forks, the new child starts
400 	 * a new session, opens a new controlling terminal, and both parent and
401 	 * child call select on the /dev/tty file descriptor. If this case ever
402 	 * becomes real, a better solution may be to force-close a filp for
403 	 * /dev/tty when a new controlling terminal is opened.
404 	 */
405 	printf("VFS: file pointer has multiple controlling TTYs!\n");
406 	return(EIO);
407   }
408   f->filp_char_select_dev = dev; /* set before possibly suspending */
409 
410   rops = *ops;
411 
412   /* By default, nothing to do */
413   *ops = 0;
414 
415   /*
416    * If we have previously asked the driver to notify us about certain ready
417    * operations, but it has not notified us yet, then we can safely assume that
418    * those operations are not ready right now.  Therefore, if this call is not
419    * supposed to block, we can disregard the pending operations as not ready.
420    * We must make absolutely sure that the flags are "stable" right now though:
421    * we are neither waiting to query the driver about them (FSF_UPDATE) nor
422    * querying the driver about them right now (FSF_BUSY).  This is a dangerous
423    * case of premature optimization and may be removed altogether if it proves
424    * to continue to be a source of bugs.
425    */
426   if (!block && !(f->filp_select_flags & (FSF_UPDATE | FSF_BUSY)) &&
427       (f->filp_select_flags & FSF_BLOCKED)) {
428 	if ((rops & SEL_RD) && (f->filp_select_flags & FSF_RD_BLOCK))
429 		rops &= ~SEL_RD;
430 	if ((rops & SEL_WR) && (f->filp_select_flags & FSF_WR_BLOCK))
431 		rops &= ~SEL_WR;
432 	if ((rops & SEL_ERR) && (f->filp_select_flags & FSF_ERR_BLOCK))
433 		rops &= ~SEL_ERR;
434 	if (!(rops & (SEL_RD|SEL_WR|SEL_ERR)))
435 		return(OK);
436   }
437 
438   f->filp_select_flags |= FSF_UPDATE;
439   if (block) {
440 	rops |= SEL_NOTIFY;
441 	if (rops & SEL_RD)	f->filp_select_flags |= FSF_RD_BLOCK;
442 	if (rops & SEL_WR)	f->filp_select_flags |= FSF_WR_BLOCK;
443 	if (rops & SEL_ERR)	f->filp_select_flags |= FSF_ERR_BLOCK;
444   }
445 
446   if (f->filp_select_flags & FSF_BUSY)
447 	return(SUSPEND);
448 
449   dp = &dmap[major(dev)];
450   if (dp->dmap_sel_busy)
451 	return(SUSPEND);
452 
453   f->filp_select_flags &= ~FSF_UPDATE;
454   r = cdev_select(dev, rops);
455   if (r != OK)
456 	return(r);
457 
458   dp->dmap_sel_busy = TRUE;
459   dp->dmap_sel_filp = f;
460   f->filp_select_flags |= FSF_BUSY;
461 
462   return(SUSPEND);
463 }
464 
465 /*===========================================================================*
466  *				select_request_file			     *
467  *===========================================================================*/
468 static int select_request_file(struct filp *UNUSED(f), int *UNUSED(ops),
469   int UNUSED(block), struct fproc *UNUSED(rfp))
470 {
471   /* Files are always ready, so output *ops is input *ops */
472   return(OK);
473 }
474 
475 /*===========================================================================*
476  *				select_request_pipe			     *
477  *===========================================================================*/
478 static int select_request_pipe(struct filp *f, int *ops, int block,
479 	struct fproc *UNUSED(rfp))
480 {
481 /* Check readiness status on a pipe. The given filp is locked. This function
482  * may block its calling thread if necessary.
483  */
484   int orig_ops, r = 0, err;
485 
486   orig_ops = *ops;
487 
488   if ((*ops & (SEL_RD|SEL_ERR))) {
489 	/* Check if we can read 1 byte */
490 	err = pipe_check(f, READING, f->filp_flags & ~O_NONBLOCK, 1,
491 			 1 /* Check only */);
492 
493 	if (err != SUSPEND)
494 		r |= SEL_RD;
495 	if (err < 0 && err != SUSPEND)
496 		r |= SEL_ERR;
497   }
498 
499   if ((*ops & (SEL_WR|SEL_ERR))) {
500 	/* Check if we can write 1 byte */
501 	err = pipe_check(f, WRITING, f->filp_flags & ~O_NONBLOCK, 1,
502 			 1 /* Check only */);
503 
504 	if (err != SUSPEND)
505 		r |= SEL_WR;
506 	if (err < 0 && err != SUSPEND)
507 		r |= SEL_ERR;
508   }
509 
510   /* Some options we collected might not be requested. */
511   *ops = r & orig_ops;
512 
513   if (!*ops && block)
514 	f->filp_pipe_select_ops |= orig_ops;
515 
516   return(OK);
517 }
518 
519 /*===========================================================================*
520  *				tab2ops					     *
521  *===========================================================================*/
522 static int tab2ops(int fd, struct selectentry *e)
523 {
524   int ops = 0;
525   if (FD_ISSET(fd, &e->readfds))  ops |= SEL_RD;
526   if (FD_ISSET(fd, &e->writefds)) ops |= SEL_WR;
527   if (FD_ISSET(fd, &e->errorfds)) ops |= SEL_ERR;
528 
529   return(ops);
530 }
531 
532 
533 /*===========================================================================*
534  *				ops2tab					     *
535  *===========================================================================*/
536 static void ops2tab(int ops, int fd, struct selectentry *e)
537 {
538   if ((ops & SEL_RD) && e->vir_readfds && FD_ISSET(fd, &e->readfds) &&
539       !FD_ISSET(fd, &e->ready_readfds)) {
540 	FD_SET(fd, &e->ready_readfds);
541 	e->nreadyfds++;
542   }
543 
544   if ((ops & SEL_WR) && e->vir_writefds && FD_ISSET(fd, &e->writefds) &&
545       !FD_ISSET(fd, &e->ready_writefds)) {
546 	FD_SET(fd, &e->ready_writefds);
547 	e->nreadyfds++;
548   }
549 
550   if ((ops & SEL_ERR) && e->vir_errorfds && FD_ISSET(fd, &e->errorfds) &&
551       !FD_ISSET(fd, &e->ready_errorfds)) {
552 	FD_SET(fd, &e->ready_errorfds);
553 	e->nreadyfds++;
554   }
555 }
556 
557 
558 /*===========================================================================*
559  *				copy_fdsets				     *
560  *===========================================================================*/
561 static int copy_fdsets(struct selectentry *se, int nfds, int direction)
562 {
563 /* Copy FD sets from or to the user process calling select(2). This function
564  * MUST NOT block the calling thread.
565  */
566   int r;
567   size_t fd_setsize;
568   endpoint_t src_e, dst_e;
569   fd_set *src_fds, *dst_fds;
570 
571   if (nfds < 0 || nfds > OPEN_MAX)
572 	panic("select copy_fdsets: nfds wrong: %d", nfds);
573 
574   /* Only copy back as many bits as the user expects. */
575   fd_setsize = (size_t) (howmany(nfds, __NFDBITS) * sizeof(__fd_mask));
576 
577   /* Set source and destination endpoints */
578   src_e = (direction == FROM_PROC) ? se->req_endpt : SELF;
579   dst_e = (direction == FROM_PROC) ? SELF : se->req_endpt;
580 
581   /* read set */
582   src_fds = (direction == FROM_PROC) ? se->vir_readfds : &se->ready_readfds;
583   dst_fds = (direction == FROM_PROC) ? &se->readfds : se->vir_readfds;
584   if (se->vir_readfds) {
585 	r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
586 			(vir_bytes) dst_fds, fd_setsize);
587 	if (r != OK) return(r);
588   }
589 
590   /* write set */
591   src_fds = (direction == FROM_PROC) ? se->vir_writefds : &se->ready_writefds;
592   dst_fds = (direction == FROM_PROC) ? &se->writefds : se->vir_writefds;
593   if (se->vir_writefds) {
594 	r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
595 			(vir_bytes) dst_fds, fd_setsize);
596 	if (r != OK) return(r);
597   }
598 
599   /* error set */
600   src_fds = (direction == FROM_PROC) ? se->vir_errorfds : &se->ready_errorfds;
601   dst_fds = (direction == FROM_PROC) ? &se->errorfds : se->vir_errorfds;
602   if (se->vir_errorfds) {
603 	r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
604 			(vir_bytes) dst_fds, fd_setsize);
605 	if (r != OK) return(r);
606   }
607 
608   return(OK);
609 }
610 
611 
612 /*===========================================================================*
613  *				select_cancel_all			     *
614  *===========================================================================*/
615 static void select_cancel_all(struct selectentry *se)
616 {
617 /* Cancel select, possibly on success. Decrease select usage and cancel timer.
618  * This function MUST NOT block its calling thread.
619  */
620 
621   int fd;
622   struct filp *f;
623 
624   for (fd = 0; fd < se->nfds; fd++) {
625 	if ((f = se->filps[fd]) == NULL) continue;
626 	se->filps[fd] = NULL;
627 	select_cancel_filp(f);
628   }
629 
630   if (se->expiry > 0) {
631 	cancel_timer(&se->timer);
632 	se->expiry = 0;
633   }
634 
635   se->requestor = NULL;
636 }
637 
638 /*===========================================================================*
639  *				select_cancel_filp			     *
640  *===========================================================================*/
641 static void select_cancel_filp(struct filp *f)
642 {
643 /* Reduce the number of select users of this filp. This function MUST NOT block
644  * its calling thread.
645  */
646   devmajor_t major;
647 
648   assert(f);
649   assert(f->filp_selectors > 0);
650   assert(f->filp_count > 0);
651 
652   f->filp_selectors--;
653   if (f->filp_selectors == 0) {
654 	/* No one selecting on this filp anymore, forget about select state */
655 	f->filp_select_ops = 0;
656 	f->filp_select_flags = 0;
657 	f->filp_pipe_select_ops = 0;
658 
659 	/* If this filp is the subject of an ongoing select query to a
660 	 * character device, mark the query as stale, so that this filp will
661 	 * not be checked when the result arrives. The filp select device may
662 	 * still be NO_DEV if do_select fails on the initial fd check.
663 	 */
664 	if (is_char_device(f) && f->filp_char_select_dev != NO_DEV) {
665 		major = major(f->filp_char_select_dev);
666 		if (dmap[major].dmap_sel_busy &&
667 			dmap[major].dmap_sel_filp == f)
668 			dmap[major].dmap_sel_filp = NULL; /* leave _busy set */
669 		f->filp_char_select_dev = NO_DEV;
670 	}
671   }
672 }
673 
674 /*===========================================================================*
675  *				select_return				     *
676  *===========================================================================*/
677 static void select_return(struct selectentry *se)
678 {
679 /* Return the results of a select call to the user process and revive the
680  * process. This function MUST NOT block its calling thread.
681  */
682   int r;
683 
684   assert(!is_deferred(se));	/* Not done yet, first wait for async reply */
685 
686   select_cancel_all(se);
687 
688   if (se->error != OK)
689 	r = se->error;
690   else
691 	r = copy_fdsets(se, se->nfds, TO_PROC);
692   if (r == OK)
693 	r = se->nreadyfds;
694 
695   revive(se->req_endpt, r);
696 }
697 
698 
699 /*===========================================================================*
700  *				select_callback			             *
701  *===========================================================================*/
702 void select_callback(struct filp *f, int status)
703 {
704 /* The status of a filp has changed, with the given ready operations or error.
705  * This function is currently called only for pipes, and holds the lock to
706  * the filp.
707  */
708 
709   filp_status(f, status);
710 }
711 
712 /*===========================================================================*
713  *				init_select  				     *
714  *===========================================================================*/
715 void init_select(void)
716 {
717   int s;
718 
719   for (s = 0; s < MAXSELECTS; s++)
720 	init_timer(&selecttab[s].timer);
721 }
722 
723 
724 /*===========================================================================*
725  *				select_forget			             *
726  *===========================================================================*/
727 void select_forget(void)
728 {
729 /* The calling thread's associated process is expected to be unpaused, due to
730  * a signal that is supposed to interrupt the current system call. Totally
731  * forget about the select(). This function may block its calling thread if
732  * necessary (but it doesn't).
733  */
734   int slot;
735   struct selectentry *se;
736 
737   for (slot = 0; slot < MAXSELECTS; slot++) {
738 	se = &selecttab[slot];
739 	if (se->requestor == fp)
740 		break;
741   }
742 
743   if (slot >= MAXSELECTS) return;	/* Entry not found */
744 
745   assert(se->starting == FALSE);
746 
747   /* Do NOT test on is_deferred here. We can safely cancel ongoing queries. */
748   select_cancel_all(se);
749 }
750 
751 
752 /*===========================================================================*
753  *				select_timeout_check	  	     	     *
754  *===========================================================================*/
755 void select_timeout_check(int s)
756 {
757 /* An alarm has gone off for one of the select queries. This function MUST NOT
758  * block its calling thread.
759  */
760   struct selectentry *se;
761 
762   if (s < 0 || s >= MAXSELECTS) return;	/* Entry does not exist */
763 
764   se = &selecttab[s];
765   if (se->requestor == NULL) return;
766   if (se->expiry == 0) return;	/* Strange, did we even ask for a timeout? */
767   se->expiry = 0;
768   if (!is_deferred(se))
769 	select_return(se);
770   else
771 	se->block = 0;	/* timer triggered "too soon", treat as nonblocking */
772 }
773 
774 
775 /*===========================================================================*
776  *				select_unsuspend_by_endpt  	     	     *
777  *===========================================================================*/
778 void select_unsuspend_by_endpt(endpoint_t proc_e)
779 {
780 /* Revive blocked processes when a driver has disappeared */
781   devmajor_t major;
782   int fd, s;
783   struct selectentry *se;
784   struct filp *f;
785 
786   for (s = 0; s < MAXSELECTS; s++) {
787 	int wakehim = 0;
788 	se = &selecttab[s];
789 	if (se->requestor == NULL) continue;
790 	if (se->requestor->fp_endpoint == proc_e) {
791 		assert(se->requestor->fp_flags & FP_EXITING);
792 		select_cancel_all(se);
793 		continue;
794 	}
795 
796 	for (fd = 0; fd < se->nfds; fd++) {
797 		if ((f = se->filps[fd]) == NULL || !is_char_device(f))
798 			continue;
799 
800 		assert(f->filp_char_select_dev != NO_DEV);
801 		major = major(f->filp_char_select_dev);
802 		if (dmap_driver_match(proc_e, major)) {
803 			se->filps[fd] = NULL;
804 			se->error = EIO;
805 			select_cancel_filp(f);
806 			wakehim = 1;
807 		}
808 	}
809 
810 	if (wakehim && !is_deferred(se))
811 		select_return(se);
812   }
813 }
814 
815 /*===========================================================================*
816  *				select_reply1				     *
817  *===========================================================================*/
818 void select_reply1(endpoint_t driver_e, devminor_t minor, int status)
819 {
820 /* Handle the initial reply to CDEV_SELECT request. This function MUST NOT
821  * block its calling thread.
822  */
823   devmajor_t major;
824   dev_t dev;
825   struct filp *f;
826   struct dmap *dp;
827 
828   /* Figure out which device is replying */
829   if ((dp = get_dmap(driver_e)) == NULL) return;
830 
831   major = dp-dmap;
832   dev = makedev(major, minor);
833 
834   /* Get filp belonging to character special file */
835   if (!dp->dmap_sel_busy) {
836 	printf("VFS (%s:%d): major %d was not expecting a CDEV_SELECT reply\n",
837 		__FILE__, __LINE__, major);
838 	return;
839   }
840 
841   /* The select filp may have been set to NULL if the requestor has been
842    * unpaused in the meantime. In that case, we ignore the result, but we do
843    * look for other filps to restart later.
844    */
845   if ((f = dp->dmap_sel_filp) != NULL) {
846 	/* Find vnode and check we got a reply from the device we expected */
847 	assert(is_char_device(f));
848 	assert(f->filp_char_select_dev != NO_DEV);
849 	if (f->filp_char_select_dev != dev) {
850 		/* This should never happen. The driver may be misbehaving.
851 		 * For now we assume that the reply we want will arrive later..
852 		 */
853 		printf("VFS (%s:%d): expected reply from dev %llx not %llx\n",
854 			__FILE__, __LINE__, f->filp_char_select_dev, dev);
855 		return;
856 	}
857   }
858 
859   /* No longer waiting for a reply from this device */
860   dp->dmap_sel_busy = FALSE;
861   dp->dmap_sel_filp = NULL;
862 
863   /* Process the select result only if the filp is valid. */
864   if (f != NULL) {
865 	assert(f->filp_count >= 1);
866 	assert(f->filp_select_flags & FSF_BUSY);
867 
868 	f->filp_select_flags &= ~FSF_BUSY;
869 
870 	/* The select call is done now, except when
871 	 * - another process started a select on the same filp with possibly a
872 	 *   different set of operations.
873 	 * - a process does a select on the same filp but using different file
874 	 *   descriptors.
875 	 * - the select has a timeout. Upon receiving this reply the operations
876 	 *   might not be ready yet, so we want to wait for that to ultimately
877 	 *   happen.
878 	 *   Therefore we need to keep remembering what the operations are.
879 	 */
880 	if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED)))
881 		f->filp_select_ops = 0;		/* done selecting */
882 	else if (status > 0 && !(f->filp_select_flags & FSF_UPDATE))
883 		/* there may be operations pending */
884 		f->filp_select_ops &= ~status;
885 
886 	/* Record new filp status */
887 	if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) {
888 		if (status > 0) {	/* operations ready */
889 			if (status & SEL_RD)
890 				f->filp_select_flags &= ~FSF_RD_BLOCK;
891 			if (status & SEL_WR)
892 				f->filp_select_flags &= ~FSF_WR_BLOCK;
893 			if (status & SEL_ERR)
894 				f->filp_select_flags &= ~FSF_ERR_BLOCK;
895 		} else if (status < 0) { /* error */
896 			/* Always unblock upon error */
897 			f->filp_select_flags &= ~FSF_BLOCKED;
898 		}
899 	}
900 
901 	filp_status(f, status); /* Tell filp owners about the results */
902   }
903 
904   select_restart_filps();
905 }
906 
907 
908 /*===========================================================================*
909  *				select_reply2				     *
910  *===========================================================================*/
911 void select_reply2(endpoint_t driver_e, devminor_t minor, int status)
912 {
913 /* Handle secondary reply to DEV_SELECT request. A secondary reply occurs when
914  * the select request is 'blocking' until an operation becomes ready. This
915  * function MUST NOT block its calling thread.
916  */
917   int slot, found, fd;
918   devmajor_t major;
919   dev_t dev;
920   struct filp *f;
921   struct dmap *dp;
922   struct selectentry *se;
923 
924   if (status == 0) {
925 	printf("VFS (%s:%d): weird status (%d) to report\n",
926 		__FILE__, __LINE__, status);
927 	return;
928   }
929 
930   /* Figure out which device is replying */
931   if ((dp = get_dmap(driver_e)) == NULL) {
932 	printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n",
933 		__FILE__, __LINE__, driver_e);
934 	return;
935   }
936   major = dp-dmap;
937   dev = makedev(major, minor);
938 
939   /* Find all file descriptors selecting for this device */
940   for (slot = 0; slot < MAXSELECTS; slot++) {
941 	se = &selecttab[slot];
942 	if (se->requestor == NULL) continue;	/* empty slot */
943 
944 	found = FALSE;
945 	for (fd = 0; fd < se->nfds; fd++) {
946 		if ((f = se->filps[fd]) == NULL) continue;
947 		if (!is_char_device(f)) continue;
948 		assert(f->filp_char_select_dev != NO_DEV);
949 		if (f->filp_char_select_dev != dev) continue;
950 
951 		if (status > 0) {	/* Operations ready */
952 			/* Clear the replied bits from the request
953 			 * mask unless FSF_UPDATE is set.
954 			 */
955 			if (!(f->filp_select_flags & FSF_UPDATE))
956 				f->filp_select_ops &= ~status;
957 			if (status & SEL_RD)
958 				f->filp_select_flags &= ~FSF_RD_BLOCK;
959 			if (status & SEL_WR)
960 				f->filp_select_flags &= ~FSF_WR_BLOCK;
961 			if (status & SEL_ERR)
962 				f->filp_select_flags &= ~FSF_ERR_BLOCK;
963 
964 			ops2tab(status, fd, se);
965 		} else {
966 			f->filp_select_flags &= ~FSF_BLOCKED;
967 			se->error = status;
968 		}
969 		found = TRUE;
970 	}
971 	/* Even if 'found' is set now, nothing may have changed for this call,
972 	 * as it may not have been interested in the operations that were
973 	 * reported as ready. Let restart_proc check.
974 	 */
975 	if (found)
976 		restart_proc(se);
977   }
978 
979   select_restart_filps();
980 }
981 
982 /*===========================================================================*
983  *				select_restart_filps			     *
984  *===========================================================================*/
985 static void select_restart_filps(void)
986 {
987 /* We got a result from a character driver, and now we need to check if we can
988  * restart deferred polling operations. This function MUST NOT block its
989  * calling thread.
990  */
991   int fd, slot;
992   struct filp *f;
993   struct selectentry *se;
994 
995   /* Locate filps that can be restarted */
996   for (slot = 0; slot < MAXSELECTS; slot++) {
997 	se = &selecttab[slot];
998 	if (se->requestor == NULL) continue; /* empty slot */
999 
1000 	/* Only 'deferred' processes are eligible to restart */
1001 	if (!is_deferred(se)) continue;
1002 
1003 	/* Find filps that are not waiting for a reply, but have an updated
1004 	 * status (i.e., another select on the same filp with possibly a
1005 	 * different set of operations is to be done), and thus requires the
1006 	 * select request to be sent again).
1007 	 */
1008 	for (fd = 0; fd < se->nfds; fd++) {
1009 		int r, wantops, ops;
1010 		if ((f = se->filps[fd]) == NULL) continue;
1011 		if (f->filp_select_flags & FSF_BUSY) /* Still waiting for */
1012 			continue;		     /* initial reply */
1013 		if (!(f->filp_select_flags & FSF_UPDATE)) /* Must be in  */
1014 			continue;			  /* 'update' state */
1015 
1016 		/* This function is suitable only for character devices. In
1017 		 * particular, checking pipes the same way would introduce a
1018 		 * serious locking problem.
1019 		 */
1020 		assert(is_char_device(f));
1021 
1022 		wantops = ops = f->filp_select_ops;
1023 		r = select_request_char(f, &wantops, se->block, se->requestor);
1024 		if (r != OK && r != SUSPEND) {
1025 			se->error = r;
1026 			restart_proc(se);
1027 			break; /* Error or bogus return code; abort */
1028 		}
1029 		if (wantops & ops) ops2tab(wantops, fd, se);
1030 	}
1031   }
1032 }
1033 
1034 /*===========================================================================*
1035  *				filp_status				     *
1036  *===========================================================================*/
1037 static void
1038 filp_status(struct filp *f, int status)
1039 {
1040 /* Tell processes that need to know about the status of this filp. This
1041  * function MUST NOT block its calling thread.
1042  */
1043   int fd, slot, found;
1044   struct selectentry *se;
1045 
1046   for (slot = 0; slot < MAXSELECTS; slot++) {
1047 	se = &selecttab[slot];
1048 	if (se->requestor == NULL) continue; /* empty slot */
1049 
1050 	found = FALSE;
1051 	for (fd = 0; fd < se->nfds; fd++) {
1052 		if (se->filps[fd] != f) continue;
1053 		if (status < 0)
1054 			se->error = status;
1055 		else
1056 			ops2tab(status, fd, se);
1057 		found = TRUE;
1058 	}
1059 	if (found)
1060 		restart_proc(se);
1061   }
1062 }
1063 
1064 /*===========================================================================*
1065  *				restart_proc				     *
1066  *===========================================================================*/
1067 static void
1068 restart_proc(struct selectentry *se)
1069 {
1070 /* Tell process about select results (if any) unless there are still results
1071  * pending. This function MUST NOT block its calling thread.
1072  */
1073 
1074   if ((se->nreadyfds > 0 || se->error != OK || !se->block) && !is_deferred(se))
1075 	select_return(se);
1076 }
1077 
1078 /*===========================================================================*
1079  *				wipe_select				     *
1080  *===========================================================================*/
1081 static void wipe_select(struct selectentry *se)
1082 {
1083   se->nfds = 0;
1084   se->nreadyfds = 0;
1085   se->error = OK;
1086   se->block = 0;
1087   memset(se->filps, 0, sizeof(se->filps));
1088 
1089   FD_ZERO(&se->readfds);
1090   FD_ZERO(&se->writefds);
1091   FD_ZERO(&se->errorfds);
1092   FD_ZERO(&se->ready_readfds);
1093   FD_ZERO(&se->ready_writefds);
1094   FD_ZERO(&se->ready_errorfds);
1095 }
1096 
1097 /*===========================================================================*
1098  *				select_lock_filp			     *
1099  *===========================================================================*/
1100 static void select_lock_filp(struct filp *f, int ops)
1101 {
1102 /* Lock a filp and vnode based on which operations are requested. This function
1103  * may block its calling thread, obviously.
1104  */
1105   tll_access_t locktype;
1106 
1107   locktype = VNODE_READ; /* By default */
1108 
1109   if (ops & (SEL_WR|SEL_ERR))
1110 	/* Selecting for error or writing requires exclusive access */
1111 	locktype = VNODE_WRITE;
1112 
1113   lock_filp(f, locktype);
1114 }
1115 
1116 /*
1117  * Dump the state of the entire select table, for debugging purposes.
1118  */
1119 void
1120 select_dump(void)
1121 {
1122 	struct selectentry *se;
1123 	struct filp *f;
1124 	struct dmap *dp;
1125 	dev_t dev;
1126 	int s, fd;
1127 
1128 	for (s = 0; s < MAXSELECTS; s++) {
1129 		se = &selecttab[s];
1130 		if (se->requestor == NULL)
1131 			continue;
1132 
1133 		printf("select %d: endpt %d nfds %d nreadyfds %d error %d "
1134 		    "block %d starting %d expiry %u is_deferred %d\n",
1135 		    s, se->req_endpt, se->nfds, se->nreadyfds, se->error,
1136 		    se->block, se->starting, se->expiry, is_deferred(se));
1137 
1138 		for (fd = 0; !se->starting && fd < se->nfds; fd++) {
1139 			/* Save on output: do not print NULL filps at all. */
1140 			if ((f = se->filps[fd]) == NULL)
1141 				continue;
1142 
1143 			printf("- [%d] filp %p flags %x type ", fd, f,
1144 			    f->filp_select_flags);
1145 			if (is_regular_file(f))
1146 				printf("regular\n");
1147 			else if (is_pipe(f))
1148 				printf("pipe\n");
1149 			else if (is_char_device(f)) {
1150 				dev = cdev_map(f->filp_vno->v_sdev,
1151 				    se->requestor);
1152 				printf("char (dev <%d,%d>, dmap ",
1153 				    major(dev), minor(dev));
1154 				if (dev != NO_DEV) {
1155 					dp = &dmap[major(dev)];
1156 					printf("busy %d filp %p)\n",
1157 					    dp->dmap_sel_busy,
1158 					    dp->dmap_sel_filp);
1159 				} else
1160 					printf("unknown)\n");
1161 			} else
1162 				printf("unknown\n");
1163 		}
1164 	}
1165 }
1166