xref: /minix3/minix/servers/vfs/select.c (revision eda6f5931d42c77e1480347b1fc3eef2f8d33806)
1 /* Implement entry point to select system call.
2  *
3  * The entry points into this file are
4  *   do_select:	       perform the SELECT system call
5  *   select_callback:  notify select system of possible fd operation
6  *   select_unsuspend_by_endpt: cancel a blocking select on exiting driver
7  *
8  * The select code uses minimal locking, so that the replies from character
9  * drivers can be processed without blocking. Filps are locked only for pipes.
10  * We make the assumption that any other structures and fields are safe to
11  * check (and possibly change) as long as we know that a process is blocked on
12  * a select(2) call, meaning that all involved filps are guaranteed to stay
13  * open until either we finish the select call, it the process gets interrupted
14  * by a signal.
15  */
16 
17 #include "fs.h"
18 #include <sys/fcntl.h>
19 #include <sys/time.h>
20 #include <sys/select.h>
21 #include <sys/stat.h>
22 #include <minix/callnr.h>
23 #include <minix/u64.h>
24 #include <string.h>
25 #include <assert.h>
26 
27 #include "file.h"
28 #include "vnode.h"
29 
30 /* max. number of simultaneously pending select() calls */
31 #define MAXSELECTS 25
32 #define FROM_PROC 0
33 #define TO_PROC   1
34 
35 static struct selectentry {
36   struct fproc *requestor;	/* slot is free iff this is NULL */
37   endpoint_t req_endpt;
38   fd_set readfds, writefds, errorfds;
39   fd_set ready_readfds, ready_writefds, ready_errorfds;
40   fd_set *vir_readfds, *vir_writefds, *vir_errorfds;
41   struct filp *filps[OPEN_MAX];
42   int type[OPEN_MAX];
43   int nfds, nreadyfds;
44   int error;
45   char block;
46   char starting;
47   clock_t expiry;
48   minix_timer_t timer;	/* if expiry > 0 */
49 } selecttab[MAXSELECTS];
50 
51 static int copy_fdsets(struct selectentry *se, int nfds, int direction);
52 static void filp_status(struct filp *fp, int status);
53 static int is_deferred(struct selectentry *se);
54 static void restart_proc(struct selectentry *se);
55 static void ops2tab(int ops, int fd, struct selectentry *e);
56 static int is_regular_file(struct filp *f);
57 static int is_pipe(struct filp *f);
58 static int is_char_device(struct filp *f);
59 static void select_lock_filp(struct filp *f, int ops);
60 static int select_request_file(struct filp *f, int *ops, int block,
61 	struct fproc *rfp);
62 static int select_request_char(struct filp *f, int *ops, int block,
63 	struct fproc *rfp);
64 static int select_request_pipe(struct filp *f, int *ops, int block,
65 	struct fproc *rfp);
66 static void select_cancel_all(struct selectentry *e);
67 static void select_cancel_filp(struct filp *f);
68 static void select_return(struct selectentry *);
69 static void select_restart_filps(void);
70 static int tab2ops(int fd, struct selectentry *e);
71 static void wipe_select(struct selectentry *s);
72 
73 static struct fdtype {
74 	int (*select_request)(struct filp *, int *ops, int block,
75 		struct fproc *rfp);
76 	int (*type_match)(struct filp *f);
77 } fdtypes[] = {
78 	{ select_request_char, is_char_device },
79 	{ select_request_file, is_regular_file },
80 	{ select_request_pipe, is_pipe },
81 };
82 #define SEL_FDS		(sizeof(fdtypes) / sizeof(fdtypes[0]))
83 
84 /*===========================================================================*
85  *				do_select				     *
86  *===========================================================================*/
87 int do_select(void)
88 {
89 /* Implement the select(nfds, readfds, writefds, errorfds, timeout) system
90  * call. First we copy the arguments and verify their sanity. Then we check
91  * whether there are file descriptors that satisfy the select call right off
92  * the bat. If so, or if there are no ready file descriptors but the process
93  * requested to return immediately, we return the result. Otherwise we set a
94  * timeout and wait for either the file descriptors to become ready or the
95  * timer to go off. If no timeout value was provided, we wait indefinitely.
96  */
97   int r, nfds, do_timeout = 0, fd, s;
98   struct filp *f;
99   unsigned int type, ops;
100   struct timeval timeout;
101   struct selectentry *se;
102   vir_bytes vtimeout;
103 
104   nfds = job_m_in.m_lc_vfs_select.nfds;
105   vtimeout = job_m_in.m_lc_vfs_select.timeout;
106 
107   /* Sane amount of file descriptors? */
108   if (nfds < 0 || nfds > OPEN_MAX) return(EINVAL);
109 
110   /* Find a slot to store this select request */
111   for (s = 0; s < MAXSELECTS; s++)
112 	if (selecttab[s].requestor == NULL) /* Unused slot */
113 		break;
114   if (s >= MAXSELECTS) return(ENOSPC);
115 
116   se = &selecttab[s];
117   wipe_select(se);	/* Clear results of previous usage */
118   se->requestor = fp;
119   se->req_endpt = who_e;
120   se->vir_readfds = job_m_in.m_lc_vfs_select.readfds;
121   se->vir_writefds = job_m_in.m_lc_vfs_select.writefds;
122   se->vir_errorfds = job_m_in.m_lc_vfs_select.errorfds;
123 
124   /* Copy fdsets from the process */
125   if ((r = copy_fdsets(se, nfds, FROM_PROC)) != OK) {
126 	se->requestor = NULL;
127 	return(r);
128   }
129 
130   /* Did the process set a timeout value? If so, retrieve it. */
131   if (vtimeout != 0) {
132 	do_timeout = 1;
133 	r = sys_datacopy_wrapper(who_e, vtimeout, SELF, (vir_bytes) &timeout,
134 		sizeof(timeout));
135 	if (r != OK) {
136 		se->requestor = NULL;
137 		return(r);
138 	}
139   }
140 
141   /* No nonsense in the timeval */
142   if (do_timeout && (timeout.tv_sec < 0 || timeout.tv_usec < 0)) {
143 	se->requestor = NULL;
144 	return(EINVAL);
145   }
146 
147   /* If there is no timeout, we block forever. Otherwise, we block up to the
148    * specified time interval.
149    */
150   if (!do_timeout)	/* No timeout value set */
151 	se->block = 1;
152   else if (do_timeout && (timeout.tv_sec > 0 || timeout.tv_usec > 0))
153 	se->block = 1;
154   else			/* timeout set as (0,0) - this effects a poll */
155 	se->block = 0;
156   se->expiry = 0;	/* no timer set (yet) */
157 
158   /* We are going to lock filps, and that means that while locking a second
159    * filp, we might already get the results for the first one. In that case,
160    * the incoming results must not cause the select call to finish prematurely.
161    */
162   se->starting = TRUE;
163 
164   /* Verify that file descriptors are okay to select on */
165   for (fd = 0; fd < nfds; fd++) {
166 	/* Because the select() interface implicitly includes file descriptors
167 	 * you might not want to select on, we have to figure out whether we're
168 	 * interested in them. Typically, these file descriptors include fd's
169 	 * inherited from the parent proc and file descriptors that have been
170 	 * close()d, but had a lower fd than one in the current set.
171 	 */
172 	if (!(ops = tab2ops(fd, se)))
173 		continue; /* No operations set; nothing to do for this fd */
174 
175 	/* Get filp belonging to this fd */
176 	f = se->filps[fd] = get_filp(fd, VNODE_READ);
177 	if (f == NULL) {
178 		if (err_code == EBADF)
179 			r = err_code;
180 		else /* File descriptor is 'ready' to return EIO */
181 			r = EINTR;
182 
183 		se->requestor = NULL;
184 		return(r);
185 	}
186 
187 	/* Check file types. According to POSIX 2008:
188 	 * "The pselect() and select() functions shall support regular files,
189 	 * terminal and pseudo-terminal devices, FIFOs, pipes, and sockets. The
190 	 * behavior of pselect() and select() on file descriptors that refer to
191 	 * other types of file is unspecified."
192 	 *
193 	 * In our case, terminal and pseudo-terminal devices are handled by the
194 	 * TTY major and sockets by either INET major (socket type AF_INET) or
195 	 * UDS major (socket type AF_UNIX). Additionally, we give other
196 	 * character drivers the chance to handle select for any of their
197 	 * device nodes. Some may not implement support for select and let
198 	 * libchardriver return EBADF, which we then pass to the calling
199 	 * process once we receive the reply.
200 	 */
201 	se->type[fd] = -1;
202 	for (type = 0; type < SEL_FDS; type++) {
203 		if (fdtypes[type].type_match(f)) {
204 			se->type[fd] = type;
205 			se->nfds = fd+1;
206 			se->filps[fd]->filp_selectors++;
207 			break;
208 		}
209 	}
210 	unlock_filp(f);
211 	if (se->type[fd] == -1) { /* Type not found */
212 		se->requestor = NULL;
213 		return(EBADF);
214 	}
215   }
216 
217   /* Check all file descriptors in the set whether one is 'ready' now */
218   for (fd = 0; fd < nfds; fd++) {
219 	/* Again, check for involuntarily selected fd's */
220 	if (!(ops = tab2ops(fd, se)))
221 		continue; /* No operations set; nothing to do for this fd */
222 
223 	/* File descriptors selected for reading that are not opened for
224 	 * reading should be marked as readable, as read calls would fail
225 	 * immediately. The same applies to writing.
226 	 */
227 	f = se->filps[fd];
228 	if ((ops & SEL_RD) && !(f->filp_mode & R_BIT)) {
229 		ops2tab(SEL_RD, fd, se);
230 		ops &= ~SEL_RD;
231 	}
232 	if ((ops & SEL_WR) && !(f->filp_mode & W_BIT)) {
233 		ops2tab(SEL_WR, fd, se);
234 		ops &= ~SEL_WR;
235 	}
236 	/* Test filp for select operations if not already done so. e.g.,
237 	 * processes sharing a filp and both doing a select on that filp. */
238 	if ((f->filp_select_ops & ops) != ops) {
239 		int wantops;
240 
241 		wantops = (f->filp_select_ops |= ops);
242 		type = se->type[fd];
243 		select_lock_filp(f, wantops);
244 		r = fdtypes[type].select_request(f, &wantops, se->block, fp);
245 		unlock_filp(f);
246 		if (r != OK && r != SUSPEND) {
247 			se->error = r;
248 			break; /* Error or bogus return code; abort */
249 		}
250 
251 		/* The select request above might have turned on/off some
252 		 * operations because they were 'ready' or not meaningful.
253 		 * Either way, we might have a result and we need to store them
254 		 * in the select table entry. */
255 		if (wantops & ops) ops2tab(wantops, fd, se);
256 	}
257   }
258 
259   /* At this point there won't be any blocking calls anymore. */
260   se->starting = FALSE;
261 
262   if ((se->nreadyfds > 0 || se->error != OK || !se->block) &&
263 		!is_deferred(se)) {
264 	/* An error occurred, or fd's were found that were ready to go right
265 	 * away, and/or we were instructed not to block at all. Must return
266 	 * immediately. Do not copy FD sets if an error occurred.
267 	 */
268 	if (se->error != OK)
269 		r = se->error;
270 	else
271 		r = copy_fdsets(se, se->nfds, TO_PROC);
272 	select_cancel_all(se);
273 	se->requestor = NULL;
274 
275 	if (r != OK)
276 		return(r);
277 	return(se->nreadyfds);
278   }
279 
280   /* Convert timeval to ticks and set the timer. If it fails, undo
281    * all, return error.
282    */
283   if (do_timeout) {
284 	int ticks;
285 	/* Open Group:
286 	 * "If the requested timeout interval requires a finer
287 	 * granularity than the implementation supports, the
288 	 * actual timeout interval shall be rounded up to the next
289 	 * supported value."
290 	 */
291 #define USECPERSEC 1000000
292 	while(timeout.tv_usec >= USECPERSEC) {
293 		/* this is to avoid overflow with *system_hz below */
294 		timeout.tv_usec -= USECPERSEC;
295 		timeout.tv_sec++;
296 	}
297 	ticks = timeout.tv_sec * system_hz +
298 		(timeout.tv_usec * system_hz + USECPERSEC-1) / USECPERSEC;
299 	se->expiry = ticks;
300 	set_timer(&se->timer, ticks, select_timeout_check, s);
301   }
302 
303   /* process now blocked */
304   suspend(FP_BLOCKED_ON_SELECT);
305   return(SUSPEND);
306 }
307 
308 /*===========================================================================*
309  *				is_deferred				     *
310  *===========================================================================*/
311 static int is_deferred(struct selectentry *se)
312 {
313 /* Find out whether this select has pending initial replies */
314 
315   int fd;
316   struct filp *f;
317 
318   /* The select call must have finished its initialization at all. */
319   if (se->starting) return(TRUE);
320 
321   for (fd = 0; fd < se->nfds; fd++) {
322 	if ((f = se->filps[fd]) == NULL) continue;
323 	if (f->filp_select_flags & (FSF_UPDATE|FSF_BUSY)) return(TRUE);
324   }
325 
326   return(FALSE);
327 }
328 
329 
330 /*===========================================================================*
331  *				is_regular_file				     *
332  *===========================================================================*/
333 static int is_regular_file(struct filp *f)
334 {
335   return(f && f->filp_vno && S_ISREG(f->filp_vno->v_mode));
336 }
337 
338 /*===========================================================================*
339  *				is_pipe					     *
340  *===========================================================================*/
341 static int is_pipe(struct filp *f)
342 {
343 /* Recognize either anonymous pipe or named pipe (FIFO) */
344   return(f && f->filp_vno && S_ISFIFO(f->filp_vno->v_mode));
345 }
346 
347 /*===========================================================================*
348  *				is_char_device				     *
349  *===========================================================================*/
350 static int is_char_device(struct filp *f)
351 {
352 /* See if this filp is a handle on a character device. This function MUST NOT
353  * block its calling thread. The given filp may or may not be locked.
354  */
355 
356   return (f && f->filp_vno && S_ISCHR(f->filp_vno->v_mode));
357 }
358 
359 /*===========================================================================*
360  *				select_request_char			     *
361  *===========================================================================*/
362 static int select_request_char(struct filp *f, int *ops, int block,
363 	struct fproc *rfp)
364 {
365 /* Check readiness status on a character device. Unless suitable results are
366  * available right now, this will only initiate the polling process, causing
367  * result processing to be deferred. This function MUST NOT block its calling
368  * thread. The given filp may or may not be locked.
369  */
370   dev_t dev;
371   int r, rops;
372   struct dmap *dp;
373 
374   /* Start by remapping the device node number to a "real" device number. Those
375    * two are different only for CTTY_MAJOR aka /dev/tty, but that one single
376    * exception requires quite some extra effort here: the select code matches
377    * character driver replies to their requests based on the device number, so
378    * it needs to be aware that device numbers may be mapped. The idea is to
379    * perform the mapping once and store the result in the filp object, so that
380    * at least we don't run into problems when a process loses its controlling
381    * terminal while doing a select (see also free_proc). It should be noted
382    * that it is possible that multiple processes share the same /dev/tty filp,
383    * and they may not all have a controlling terminal. The ctty-less processes
384    * should never pass the mapping; a more problematic case is checked below.
385    *
386    * The cdev_map call also checks the major number for rough validity, so that
387    * we can use it to index the dmap array safely a bit later.
388    */
389   if ((dev = cdev_map(f->filp_vno->v_sdev, rfp)) == NO_DEV)
390 	return(ENXIO);
391 
392   if (f->filp_char_select_dev != NO_DEV && f->filp_char_select_dev != dev) {
393 	/* Currently, this case can occur as follows: a process with a
394 	 * controlling terminal opens /dev/tty and forks, the new child starts
395 	 * a new session, opens a new controlling terminal, and both parent and
396 	 * child call select on the /dev/tty file descriptor. If this case ever
397 	 * becomes real, a better solution may be to force-close a filp for
398 	 * /dev/tty when a new controlling terminal is opened.
399 	 */
400 	printf("VFS: file pointer has multiple controlling TTYs!\n");
401 	return(EIO);
402   }
403   f->filp_char_select_dev = dev; /* set before possibly suspending */
404 
405   rops = *ops;
406 
407   /* By default, nothing to do */
408   *ops = 0;
409 
410   if (!block && (f->filp_select_flags & FSF_BLOCKED)) {
411 	/* This filp is blocked waiting for a reply, but we don't want to
412 	 * block ourselves. Unless we're awaiting the initial reply, these
413 	 * operations won't be ready */
414 	if (!(f->filp_select_flags & FSF_BUSY)) {
415 		if ((rops & SEL_RD) && (f->filp_select_flags & FSF_RD_BLOCK))
416 			rops &= ~SEL_RD;
417 		if ((rops & SEL_WR) && (f->filp_select_flags & FSF_WR_BLOCK))
418 			rops &= ~SEL_WR;
419 		if ((rops & SEL_ERR) && (f->filp_select_flags & FSF_ERR_BLOCK))
420 			rops &= ~SEL_ERR;
421 		if (!(rops & (SEL_RD|SEL_WR|SEL_ERR)))
422 			return(OK);
423 	}
424   }
425 
426   f->filp_select_flags |= FSF_UPDATE;
427   if (block) {
428 	rops |= SEL_NOTIFY;
429 	if (rops & SEL_RD)	f->filp_select_flags |= FSF_RD_BLOCK;
430 	if (rops & SEL_WR)	f->filp_select_flags |= FSF_WR_BLOCK;
431 	if (rops & SEL_ERR)	f->filp_select_flags |= FSF_ERR_BLOCK;
432   }
433 
434   if (f->filp_select_flags & FSF_BUSY)
435 	return(SUSPEND);
436 
437   dp = &dmap[major(dev)];
438   if (dp->dmap_sel_busy)
439 	return(SUSPEND);
440 
441   f->filp_select_flags &= ~FSF_UPDATE;
442   r = cdev_select(dev, rops);
443   if (r != OK)
444 	return(r);
445 
446   dp->dmap_sel_busy = TRUE;
447   dp->dmap_sel_filp = f;
448   f->filp_select_flags |= FSF_BUSY;
449 
450   return(SUSPEND);
451 }
452 
453 /*===========================================================================*
454  *				select_request_file			     *
455  *===========================================================================*/
456 static int select_request_file(struct filp *UNUSED(f), int *UNUSED(ops),
457   int UNUSED(block), struct fproc *UNUSED(rfp))
458 {
459   /* Files are always ready, so output *ops is input *ops */
460   return(OK);
461 }
462 
463 /*===========================================================================*
464  *				select_request_pipe			     *
465  *===========================================================================*/
466 static int select_request_pipe(struct filp *f, int *ops, int block,
467 	struct fproc *UNUSED(rfp))
468 {
469 /* Check readiness status on a pipe. The given filp is locked. This function
470  * may block its calling thread if necessary.
471  */
472   int orig_ops, r = 0, err;
473 
474   orig_ops = *ops;
475 
476   if ((*ops & (SEL_RD|SEL_ERR))) {
477 	/* Check if we can read 1 byte */
478 	err = pipe_check(f, READING, f->filp_flags & ~O_NONBLOCK, 1,
479 			 1 /* Check only */);
480 
481 	if (err != SUSPEND)
482 		r |= SEL_RD;
483 	if (err < 0 && err != SUSPEND)
484 		r |= SEL_ERR;
485   }
486 
487   if ((*ops & (SEL_WR|SEL_ERR))) {
488 	/* Check if we can write 1 byte */
489 	err = pipe_check(f, WRITING, f->filp_flags & ~O_NONBLOCK, 1,
490 			 1 /* Check only */);
491 
492 	if (err != SUSPEND)
493 		r |= SEL_WR;
494 	if (err < 0 && err != SUSPEND)
495 		r |= SEL_ERR;
496   }
497 
498   /* Some options we collected might not be requested. */
499   *ops = r & orig_ops;
500 
501   if (!*ops && block)
502 	f->filp_pipe_select_ops |= orig_ops;
503 
504   return(OK);
505 }
506 
507 /*===========================================================================*
508  *				tab2ops					     *
509  *===========================================================================*/
510 static int tab2ops(int fd, struct selectentry *e)
511 {
512   int ops = 0;
513   if (FD_ISSET(fd, &e->readfds))  ops |= SEL_RD;
514   if (FD_ISSET(fd, &e->writefds)) ops |= SEL_WR;
515   if (FD_ISSET(fd, &e->errorfds)) ops |= SEL_ERR;
516 
517   return(ops);
518 }
519 
520 
521 /*===========================================================================*
522  *				ops2tab					     *
523  *===========================================================================*/
524 static void ops2tab(int ops, int fd, struct selectentry *e)
525 {
526   if ((ops & SEL_RD) && e->vir_readfds && FD_ISSET(fd, &e->readfds) &&
527       !FD_ISSET(fd, &e->ready_readfds)) {
528 	FD_SET(fd, &e->ready_readfds);
529 	e->nreadyfds++;
530   }
531 
532   if ((ops & SEL_WR) && e->vir_writefds && FD_ISSET(fd, &e->writefds) &&
533       !FD_ISSET(fd, &e->ready_writefds)) {
534 	FD_SET(fd, &e->ready_writefds);
535 	e->nreadyfds++;
536   }
537 
538   if ((ops & SEL_ERR) && e->vir_errorfds && FD_ISSET(fd, &e->errorfds) &&
539       !FD_ISSET(fd, &e->ready_errorfds)) {
540 	FD_SET(fd, &e->ready_errorfds);
541 	e->nreadyfds++;
542   }
543 }
544 
545 
546 /*===========================================================================*
547  *				copy_fdsets				     *
548  *===========================================================================*/
549 static int copy_fdsets(struct selectentry *se, int nfds, int direction)
550 {
551 /* Copy FD sets from or to the user process calling select(2). This function
552  * MUST NOT block the calling thread.
553  */
554   int r;
555   size_t fd_setsize;
556   endpoint_t src_e, dst_e;
557   fd_set *src_fds, *dst_fds;
558 
559   if (nfds < 0 || nfds > OPEN_MAX)
560 	panic("select copy_fdsets: nfds wrong: %d", nfds);
561 
562   /* Only copy back as many bits as the user expects. */
563   fd_setsize = (size_t) (howmany(nfds, __NFDBITS) * sizeof(__fd_mask));
564 
565   /* Set source and destination endpoints */
566   src_e = (direction == FROM_PROC) ? se->req_endpt : SELF;
567   dst_e = (direction == FROM_PROC) ? SELF : se->req_endpt;
568 
569   /* read set */
570   src_fds = (direction == FROM_PROC) ? se->vir_readfds : &se->ready_readfds;
571   dst_fds = (direction == FROM_PROC) ? &se->readfds : se->vir_readfds;
572   if (se->vir_readfds) {
573 	r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
574 			(vir_bytes) dst_fds, fd_setsize);
575 	if (r != OK) return(r);
576   }
577 
578   /* write set */
579   src_fds = (direction == FROM_PROC) ? se->vir_writefds : &se->ready_writefds;
580   dst_fds = (direction == FROM_PROC) ? &se->writefds : se->vir_writefds;
581   if (se->vir_writefds) {
582 	r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
583 			(vir_bytes) dst_fds, fd_setsize);
584 	if (r != OK) return(r);
585   }
586 
587   /* error set */
588   src_fds = (direction == FROM_PROC) ? se->vir_errorfds : &se->ready_errorfds;
589   dst_fds = (direction == FROM_PROC) ? &se->errorfds : se->vir_errorfds;
590   if (se->vir_errorfds) {
591 	r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
592 			(vir_bytes) dst_fds, fd_setsize);
593 	if (r != OK) return(r);
594   }
595 
596   return(OK);
597 }
598 
599 
600 /*===========================================================================*
601  *				select_cancel_all			     *
602  *===========================================================================*/
603 static void select_cancel_all(struct selectentry *se)
604 {
605 /* Cancel select, possibly on success. Decrease select usage and cancel timer.
606  * This function MUST NOT block its calling thread.
607  */
608 
609   int fd;
610   struct filp *f;
611 
612   for (fd = 0; fd < se->nfds; fd++) {
613 	if ((f = se->filps[fd]) == NULL) continue;
614 	se->filps[fd] = NULL;
615 	select_cancel_filp(f);
616   }
617 
618   if (se->expiry > 0) {
619 	cancel_timer(&se->timer);
620 	se->expiry = 0;
621   }
622 
623   se->requestor = NULL;
624 }
625 
626 /*===========================================================================*
627  *				select_cancel_filp			     *
628  *===========================================================================*/
629 static void select_cancel_filp(struct filp *f)
630 {
631 /* Reduce the number of select users of this filp. This function MUST NOT block
632  * its calling thread.
633  */
634   devmajor_t major;
635 
636   assert(f);
637   assert(f->filp_selectors > 0);
638   assert(f->filp_count > 0);
639 
640   f->filp_selectors--;
641   if (f->filp_selectors == 0) {
642 	/* No one selecting on this filp anymore, forget about select state */
643 	f->filp_select_ops = 0;
644 	f->filp_select_flags = 0;
645 	f->filp_pipe_select_ops = 0;
646 
647 	/* If this filp is the subject of an ongoing select query to a
648 	 * character device, mark the query as stale, so that this filp will
649 	 * not be checked when the result arrives. The filp select device may
650 	 * still be NO_DEV if do_select fails on the initial fd check.
651 	 */
652 	if (is_char_device(f) && f->filp_char_select_dev != NO_DEV) {
653 		major = major(f->filp_char_select_dev);
654 		if (dmap[major].dmap_sel_busy &&
655 			dmap[major].dmap_sel_filp == f)
656 			dmap[major].dmap_sel_filp = NULL; /* leave _busy set */
657 		f->filp_char_select_dev = NO_DEV;
658 	}
659   }
660 }
661 
662 /*===========================================================================*
663  *				select_return				     *
664  *===========================================================================*/
665 static void select_return(struct selectentry *se)
666 {
667 /* Return the results of a select call to the user process and revive the
668  * process. This function MUST NOT block its calling thread.
669  */
670   int r;
671 
672   assert(!is_deferred(se));	/* Not done yet, first wait for async reply */
673 
674   select_cancel_all(se);
675 
676   if (se->error != OK)
677 	r = se->error;
678   else
679 	r = copy_fdsets(se, se->nfds, TO_PROC);
680   if (r == OK)
681 	r = se->nreadyfds;
682 
683   revive(se->req_endpt, r);
684 }
685 
686 
687 /*===========================================================================*
688  *				select_callback			             *
689  *===========================================================================*/
690 void select_callback(struct filp *f, int status)
691 {
692 /* The status of a filp has changed, with the given ready operations or error.
693  * This function is currently called only for pipes, and holds the lock to
694  * the filp.
695  */
696 
697   filp_status(f, status);
698 }
699 
700 /*===========================================================================*
701  *				init_select  				     *
702  *===========================================================================*/
703 void init_select(void)
704 {
705   int s;
706 
707   for (s = 0; s < MAXSELECTS; s++)
708 	init_timer(&selecttab[s].timer);
709 }
710 
711 
712 /*===========================================================================*
713  *				select_forget			             *
714  *===========================================================================*/
715 void select_forget(void)
716 {
717 /* The calling thread's associated process is expected to be unpaused, due to
718  * a signal that is supposed to interrupt the current system call. Totally
719  * forget about the select(). This function may block its calling thread if
720  * necessary (but it doesn't).
721  */
722   int slot;
723   struct selectentry *se;
724 
725   for (slot = 0; slot < MAXSELECTS; slot++) {
726 	se = &selecttab[slot];
727 	if (se->requestor == fp)
728 		break;
729   }
730 
731   if (slot >= MAXSELECTS) return;	/* Entry not found */
732 
733   assert(se->starting == FALSE);
734 
735   /* Do NOT test on is_deferred here. We can safely cancel ongoing queries. */
736   select_cancel_all(se);
737 }
738 
739 
740 /*===========================================================================*
741  *				select_timeout_check	  	     	     *
742  *===========================================================================*/
743 void select_timeout_check(minix_timer_t *timer)
744 {
745 /* An alarm has gone off for one of the select queries. This function MUST NOT
746  * block its calling thread.
747  */
748   int s;
749   struct selectentry *se;
750 
751   s = tmr_arg(timer)->ta_int;
752   if (s < 0 || s >= MAXSELECTS) return;	/* Entry does not exist */
753 
754   se = &selecttab[s];
755   if (se->requestor == NULL) return;
756   if (se->expiry <= 0) return;	/* Strange, did we even ask for a timeout? */
757   se->expiry = 0;
758   if (is_deferred(se)) return;	/* Wait for initial replies to CDEV_SELECT */
759   select_return(se);
760 }
761 
762 
763 /*===========================================================================*
764  *				select_unsuspend_by_endpt  	     	     *
765  *===========================================================================*/
766 void select_unsuspend_by_endpt(endpoint_t proc_e)
767 {
768 /* Revive blocked processes when a driver has disappeared */
769   devmajor_t major;
770   int fd, s;
771   struct selectentry *se;
772   struct filp *f;
773 
774   for (s = 0; s < MAXSELECTS; s++) {
775 	int wakehim = 0;
776 	se = &selecttab[s];
777 	if (se->requestor == NULL) continue;
778 	if (se->requestor->fp_endpoint == proc_e) {
779 		assert(se->requestor->fp_flags & FP_EXITING);
780 		select_cancel_all(se);
781 		continue;
782 	}
783 
784 	for (fd = 0; fd < se->nfds; fd++) {
785 		if ((f = se->filps[fd]) == NULL || !is_char_device(f))
786 			continue;
787 
788 		assert(f->filp_char_select_dev != NO_DEV);
789 		major = major(f->filp_char_select_dev);
790 		if (dmap_driver_match(proc_e, major)) {
791 			se->filps[fd] = NULL;
792 			se->error = EIO;
793 			select_cancel_filp(f);
794 			wakehim = 1;
795 		}
796 	}
797 
798 	if (wakehim && !is_deferred(se))
799 		select_return(se);
800   }
801 }
802 
803 /*===========================================================================*
804  *				select_reply1				     *
805  *===========================================================================*/
806 void select_reply1(endpoint_t driver_e, devminor_t minor, int status)
807 {
808 /* Handle the initial reply to CDEV_SELECT request. This function MUST NOT
809  * block its calling thread.
810  */
811   devmajor_t major;
812   dev_t dev;
813   struct filp *f;
814   struct dmap *dp;
815 
816   /* Figure out which device is replying */
817   if ((dp = get_dmap(driver_e)) == NULL) return;
818 
819   major = dp-dmap;
820   dev = makedev(major, minor);
821 
822   /* Get filp belonging to character special file */
823   if (!dp->dmap_sel_busy) {
824 	printf("VFS (%s:%d): major %d was not expecting a CDEV_SELECT reply\n",
825 		__FILE__, __LINE__, major);
826 	return;
827   }
828 
829   /* The select filp may have been set to NULL if the requestor has been
830    * unpaused in the meantime. In that case, we ignore the result, but we do
831    * look for other filps to restart later.
832    */
833   if ((f = dp->dmap_sel_filp) != NULL) {
834 	/* Find vnode and check we got a reply from the device we expected */
835 	assert(is_char_device(f));
836 	assert(f->filp_char_select_dev != NO_DEV);
837 	if (f->filp_char_select_dev != dev) {
838 		/* This should never happen. The driver may be misbehaving.
839 		 * For now we assume that the reply we want will arrive later..
840 		 */
841 		printf("VFS (%s:%d): expected reply from dev %llx not %llx\n",
842 			__FILE__, __LINE__, f->filp_char_select_dev, dev);
843 		return;
844 	}
845   }
846 
847   /* No longer waiting for a reply from this device */
848   dp->dmap_sel_busy = FALSE;
849   dp->dmap_sel_filp = NULL;
850 
851   /* Process the select result only if the filp is valid. */
852   if (f != NULL) {
853 	assert(f->filp_count >= 1);
854 	assert(f->filp_select_flags & FSF_BUSY);
855 
856 	f->filp_select_flags &= ~FSF_BUSY;
857 
858 	/* The select call is done now, except when
859 	 * - another process started a select on the same filp with possibly a
860 	 *   different set of operations.
861 	 * - a process does a select on the same filp but using different file
862 	 *   descriptors.
863 	 * - the select has a timeout. Upon receiving this reply the operations
864 	 *   might not be ready yet, so we want to wait for that to ultimately
865 	 *   happen.
866 	 *   Therefore we need to keep remembering what the operations are.
867 	 */
868 	if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED)))
869 		f->filp_select_ops = 0;		/* done selecting */
870 	else if (status > 0 && !(f->filp_select_flags & FSF_UPDATE))
871 		/* there may be operations pending */
872 		f->filp_select_ops &= ~status;
873 
874 	/* Record new filp status */
875 	if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) {
876 		if (status > 0) {	/* operations ready */
877 			if (status & SEL_RD)
878 				f->filp_select_flags &= ~FSF_RD_BLOCK;
879 			if (status & SEL_WR)
880 				f->filp_select_flags &= ~FSF_WR_BLOCK;
881 			if (status & SEL_ERR)
882 				f->filp_select_flags &= ~FSF_ERR_BLOCK;
883 		} else if (status < 0) { /* error */
884 			/* Always unblock upon error */
885 			f->filp_select_flags &= ~FSF_BLOCKED;
886 		}
887 	}
888 
889 	filp_status(f, status); /* Tell filp owners about the results */
890   }
891 
892   select_restart_filps();
893 }
894 
895 
896 /*===========================================================================*
897  *				select_reply2				     *
898  *===========================================================================*/
899 void select_reply2(endpoint_t driver_e, devminor_t minor, int status)
900 {
901 /* Handle secondary reply to DEV_SELECT request. A secondary reply occurs when
902  * the select request is 'blocking' until an operation becomes ready. This
903  * function MUST NOT block its calling thread.
904  */
905   int slot, found, fd;
906   devmajor_t major;
907   dev_t dev;
908   struct filp *f;
909   struct dmap *dp;
910   struct selectentry *se;
911 
912   if (status == 0) {
913 	printf("VFS (%s:%d): weird status (%d) to report\n",
914 		__FILE__, __LINE__, status);
915 	return;
916   }
917 
918   /* Figure out which device is replying */
919   if ((dp = get_dmap(driver_e)) == NULL) {
920 	printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n",
921 		__FILE__, __LINE__, driver_e);
922 	return;
923   }
924   major = dp-dmap;
925   dev = makedev(major, minor);
926 
927   /* Find all file descriptors selecting for this device */
928   for (slot = 0; slot < MAXSELECTS; slot++) {
929 	se = &selecttab[slot];
930 	if (se->requestor == NULL) continue;	/* empty slot */
931 
932 	found = FALSE;
933 	for (fd = 0; fd < se->nfds; fd++) {
934 		if ((f = se->filps[fd]) == NULL) continue;
935 		if (!is_char_device(f)) continue;
936 		assert(f->filp_char_select_dev != NO_DEV);
937 		if (f->filp_char_select_dev != dev) continue;
938 
939 		if (status > 0) {	/* Operations ready */
940 			/* Clear the replied bits from the request
941 			 * mask unless FSF_UPDATE is set.
942 			 */
943 			if (!(f->filp_select_flags & FSF_UPDATE))
944 				f->filp_select_ops &= ~status;
945 			if (status & SEL_RD)
946 				f->filp_select_flags &= ~FSF_RD_BLOCK;
947 			if (status & SEL_WR)
948 				f->filp_select_flags &= ~FSF_WR_BLOCK;
949 			if (status & SEL_ERR)
950 				f->filp_select_flags &= ~FSF_ERR_BLOCK;
951 
952 			ops2tab(status, fd, se);
953 		} else {
954 			f->filp_select_flags &= ~FSF_BLOCKED;
955 			se->error = status;
956 		}
957 		found = TRUE;
958 	}
959 	/* Even if 'found' is set now, nothing may have changed for this call,
960 	 * as it may not have been interested in the operations that were
961 	 * reported as ready. Let restart_proc check.
962 	 */
963 	if (found)
964 		restart_proc(se);
965   }
966 
967   select_restart_filps();
968 }
969 
970 /*===========================================================================*
971  *				select_restart_filps			     *
972  *===========================================================================*/
973 static void select_restart_filps(void)
974 {
975 /* We got a result from a character driver, and now we need to check if we can
976  * restart deferred polling operations. This function MUST NOT block its
977  * calling thread.
978  */
979   int fd, slot;
980   struct filp *f;
981   struct selectentry *se;
982 
983   /* Locate filps that can be restarted */
984   for (slot = 0; slot < MAXSELECTS; slot++) {
985 	se = &selecttab[slot];
986 	if (se->requestor == NULL) continue; /* empty slot */
987 
988 	/* Only 'deferred' processes are eligible to restart */
989 	if (!is_deferred(se)) continue;
990 
991 	/* Find filps that are not waiting for a reply, but have an updated
992 	 * status (i.e., another select on the same filp with possibly a
993 	 * different set of operations is to be done), and thus requires the
994 	 * select request to be sent again).
995 	 */
996 	for (fd = 0; fd < se->nfds; fd++) {
997 		int r, wantops, ops;
998 		if ((f = se->filps[fd]) == NULL) continue;
999 		if (f->filp_select_flags & FSF_BUSY) /* Still waiting for */
1000 			continue;		     /* initial reply */
1001 		if (!(f->filp_select_flags & FSF_UPDATE)) /* Must be in  */
1002 			continue;			  /* 'update' state */
1003 
1004 		/* This function is suitable only for character devices. In
1005 		 * particular, checking pipes the same way would introduce a
1006 		 * serious locking problem.
1007 		 */
1008 		assert(is_char_device(f));
1009 
1010 		wantops = ops = f->filp_select_ops;
1011 		r = select_request_char(f, &wantops, se->block, se->requestor);
1012 		if (r != OK && r != SUSPEND) {
1013 			se->error = r;
1014 			restart_proc(se);
1015 			break; /* Error or bogus return code; abort */
1016 		}
1017 		if (wantops & ops) ops2tab(wantops, fd, se);
1018 	}
1019   }
1020 }
1021 
1022 /*===========================================================================*
1023  *				filp_status				     *
1024  *===========================================================================*/
1025 static void filp_status(f, status)
1026 struct filp *f;
1027 int status;
1028 {
1029 /* Tell processes that need to know about the status of this filp. This
1030  * function MUST NOT block its calling thread.
1031  */
1032   int fd, slot, found;
1033   struct selectentry *se;
1034 
1035   for (slot = 0; slot < MAXSELECTS; slot++) {
1036 	se = &selecttab[slot];
1037 	if (se->requestor == NULL) continue; /* empty slot */
1038 
1039 	found = FALSE;
1040 	for (fd = 0; fd < se->nfds; fd++) {
1041 		if (se->filps[fd] != f) continue;
1042 		if (status < 0)
1043 			se->error = status;
1044 		else
1045 			ops2tab(status, fd, se);
1046 		found = TRUE;
1047 	}
1048 	if (found)
1049 		restart_proc(se);
1050   }
1051 }
1052 
1053 /*===========================================================================*
1054  *				restart_proc				     *
1055  *===========================================================================*/
1056 static void restart_proc(se)
1057 struct selectentry *se;
1058 {
1059 /* Tell process about select results (if any) unless there are still results
1060  * pending. This function MUST NOT block its calling thread.
1061  */
1062 
1063   if ((se->nreadyfds > 0 || se->error != OK || !se->block) && !is_deferred(se))
1064 	select_return(se);
1065 }
1066 
1067 /*===========================================================================*
1068  *				wipe_select				     *
1069  *===========================================================================*/
1070 static void wipe_select(struct selectentry *se)
1071 {
1072   se->nfds = 0;
1073   se->nreadyfds = 0;
1074   se->error = OK;
1075   se->block = 0;
1076   memset(se->filps, 0, sizeof(se->filps));
1077 
1078   FD_ZERO(&se->readfds);
1079   FD_ZERO(&se->writefds);
1080   FD_ZERO(&se->errorfds);
1081   FD_ZERO(&se->ready_readfds);
1082   FD_ZERO(&se->ready_writefds);
1083   FD_ZERO(&se->ready_errorfds);
1084 }
1085 
1086 /*===========================================================================*
1087  *				select_lock_filp			     *
1088  *===========================================================================*/
1089 static void select_lock_filp(struct filp *f, int ops)
1090 {
1091 /* Lock a filp and vnode based on which operations are requested. This function
1092  * may block its calling thread, obviously.
1093  */
1094   tll_access_t locktype;
1095 
1096   locktype = VNODE_READ; /* By default */
1097 
1098   if (ops & (SEL_WR|SEL_ERR))
1099 	/* Selecting for error or writing requires exclusive access */
1100 	locktype = VNODE_WRITE;
1101 
1102   lock_filp(f, locktype);
1103 }
1104