1 /* Implement entry point to select system call. 2 * 3 * The entry points into this file are 4 * do_select: perform the SELECT system call 5 * select_callback: notify select system of possible fd operation 6 * select_unsuspend_by_endpt: cancel a blocking select on exiting driver 7 * 8 * The select code uses minimal locking, so that the replies from character 9 * drivers can be processed without blocking. Filps are locked only for pipes. 10 * We make the assumption that any other structures and fields are safe to 11 * check (and possibly change) as long as we know that a process is blocked on 12 * a select(2) call, meaning that all involved filps are guaranteed to stay 13 * open until either we finish the select call, it the process gets interrupted 14 * by a signal. 15 */ 16 17 #include "fs.h" 18 #include <sys/fcntl.h> 19 #include <sys/time.h> 20 #include <sys/select.h> 21 #include <sys/stat.h> 22 #include <minix/callnr.h> 23 #include <minix/u64.h> 24 #include <string.h> 25 #include <assert.h> 26 27 #include "file.h" 28 #include "vnode.h" 29 30 /* max. number of simultaneously pending select() calls */ 31 #define MAXSELECTS 25 32 #define FROM_PROC 0 33 #define TO_PROC 1 34 35 #define USECPERSEC 1000000 /* number of microseconds in a second */ 36 37 typedef fd_set *ixfer_fd_set_ptr; 38 39 static struct selectentry { 40 struct fproc *requestor; /* slot is free iff this is NULL */ 41 endpoint_t req_endpt; 42 fd_set readfds, writefds, errorfds; 43 fd_set ready_readfds, ready_writefds, ready_errorfds; 44 ixfer_fd_set_ptr vir_readfds, vir_writefds, vir_errorfds; 45 struct filp *filps[OPEN_MAX]; 46 int type[OPEN_MAX]; 47 int nfds, nreadyfds; 48 int error; 49 char block; 50 char starting; 51 clock_t expiry; 52 minix_timer_t timer; /* if expiry > 0 */ 53 } selecttab[MAXSELECTS]; 54 55 static int copy_fdsets(struct selectentry *se, int nfds, int direction); 56 static void filp_status(struct filp *fp, int status); 57 static int is_deferred(struct selectentry *se); 58 static void restart_proc(struct selectentry *se); 59 static void ops2tab(int ops, int fd, struct selectentry *e); 60 static int is_regular_file(struct filp *f); 61 static int is_pipe(struct filp *f); 62 static int is_char_device(struct filp *f); 63 static void select_lock_filp(struct filp *f, int ops); 64 static int select_request_file(struct filp *f, int *ops, int block, 65 struct fproc *rfp); 66 static int select_request_char(struct filp *f, int *ops, int block, 67 struct fproc *rfp); 68 static int select_request_pipe(struct filp *f, int *ops, int block, 69 struct fproc *rfp); 70 static void select_cancel_all(struct selectentry *e); 71 static void select_cancel_filp(struct filp *f); 72 static void select_return(struct selectentry *); 73 static void select_restart_filps(void); 74 static int tab2ops(int fd, struct selectentry *e); 75 static void wipe_select(struct selectentry *s); 76 void select_timeout_check(int s); 77 78 static struct fdtype { 79 int (*select_request)(struct filp *, int *ops, int block, 80 struct fproc *rfp); 81 int (*type_match)(struct filp *f); 82 } fdtypes[] = { 83 { select_request_char, is_char_device }, 84 { select_request_file, is_regular_file }, 85 { select_request_pipe, is_pipe }, 86 }; 87 #define SEL_FDS (sizeof(fdtypes) / sizeof(fdtypes[0])) 88 89 /*===========================================================================* 90 * do_select * 91 *===========================================================================*/ 92 int do_select(void) 93 { 94 /* Implement the select(nfds, readfds, writefds, errorfds, timeout) system 95 * call. First we copy the arguments and verify their sanity. Then we check 96 * whether there are file descriptors that satisfy the select call right off 97 * the bat. If so, or if there are no ready file descriptors but the process 98 * requested to return immediately, we return the result. Otherwise we set a 99 * timeout and wait for either the file descriptors to become ready or the 100 * timer to go off. If no timeout value was provided, we wait indefinitely. 101 */ 102 int r, nfds, do_timeout, fd, s; 103 struct filp *f; 104 unsigned int type, ops; 105 struct timeval timeout; 106 struct selectentry *se; 107 vir_bytes vtimeout; 108 clock_t ticks; 109 110 nfds = job_m_in.m_lc_vfs_select.nfds; 111 vtimeout = job_m_in.m_lc_vfs_select.timeout; 112 113 /* Sane amount of file descriptors? */ 114 if (nfds < 0 || nfds > OPEN_MAX) return(EINVAL); 115 116 /* Find a slot to store this select request */ 117 for (s = 0; s < MAXSELECTS; s++) 118 if (selecttab[s].requestor == NULL) /* Unused slot */ 119 break; 120 if (s >= MAXSELECTS) return(ENOSPC); 121 122 se = &selecttab[s]; 123 wipe_select(se); /* Clear results of previous usage */ 124 se->requestor = fp; 125 se->req_endpt = who_e; 126 se->vir_readfds = job_m_in.m_lc_vfs_select.readfds; 127 se->vir_writefds = job_m_in.m_lc_vfs_select.writefds; 128 se->vir_errorfds = job_m_in.m_lc_vfs_select.errorfds; 129 130 /* Copy fdsets from the process */ 131 if ((r = copy_fdsets(se, nfds, FROM_PROC)) != OK) { 132 se->requestor = NULL; 133 return(r); 134 } 135 136 /* Did the process set a timeout value? If so, retrieve it. */ 137 if (vtimeout != 0) { 138 r = sys_datacopy_wrapper(who_e, vtimeout, SELF, (vir_bytes) &timeout, 139 sizeof(timeout)); 140 141 /* No nonsense in the timeval */ 142 if (r == OK && (timeout.tv_sec < 0 || timeout.tv_usec < 0 || 143 timeout.tv_usec >= USECPERSEC)) 144 r = EINVAL; 145 146 if (r != OK) { 147 se->requestor = NULL; 148 return(r); 149 } 150 do_timeout = 1; 151 } else 152 do_timeout = 0; 153 154 /* If there is no timeout, we block forever. Otherwise, we block up to the 155 * specified time interval. 156 */ 157 if (!do_timeout) /* No timeout value set */ 158 se->block = 1; 159 else if (do_timeout && (timeout.tv_sec > 0 || timeout.tv_usec > 0)) 160 se->block = 1; 161 else /* timeout set as (0,0) - this effects a poll */ 162 se->block = 0; 163 se->expiry = 0; /* no timer set (yet) */ 164 165 /* We are going to lock filps, and that means that while locking a second 166 * filp, we might already get the results for the first one. In that case, 167 * the incoming results must not cause the select call to finish prematurely. 168 */ 169 se->starting = TRUE; 170 171 /* Verify that file descriptors are okay to select on */ 172 for (fd = 0; fd < nfds; fd++) { 173 /* Because the select() interface implicitly includes file descriptors 174 * you might not want to select on, we have to figure out whether we're 175 * interested in them. Typically, these file descriptors include fd's 176 * inherited from the parent proc and file descriptors that have been 177 * close()d, but had a lower fd than one in the current set. 178 */ 179 if (!(ops = tab2ops(fd, se))) 180 continue; /* No operations set; nothing to do for this fd */ 181 182 /* Get filp belonging to this fd */ 183 f = se->filps[fd] = get_filp(fd, VNODE_READ); 184 if (f == NULL) { 185 if (err_code == EBADF) 186 r = err_code; 187 else /* File descriptor is 'ready' to return EIO */ 188 r = EINTR; 189 190 se->requestor = NULL; 191 return(r); 192 } 193 194 /* Check file types. According to POSIX 2008: 195 * "The pselect() and select() functions shall support regular files, 196 * terminal and pseudo-terminal devices, FIFOs, pipes, and sockets. The 197 * behavior of pselect() and select() on file descriptors that refer to 198 * other types of file is unspecified." 199 * 200 * In our case, terminal and pseudo-terminal devices are handled by the 201 * TTY major and sockets by either INET major (socket type AF_INET) or 202 * UDS major (socket type AF_UNIX). Additionally, we give other 203 * character drivers the chance to handle select for any of their 204 * device nodes. Some may not implement support for select and let 205 * libchardriver return EBADF, which we then pass to the calling 206 * process once we receive the reply. 207 */ 208 se->type[fd] = -1; 209 for (type = 0; type < SEL_FDS; type++) { 210 if (fdtypes[type].type_match(f)) { 211 se->type[fd] = type; 212 se->nfds = fd+1; 213 se->filps[fd]->filp_selectors++; 214 break; 215 } 216 } 217 unlock_filp(f); 218 if (se->type[fd] == -1) { /* Type not found */ 219 se->requestor = NULL; 220 return(EBADF); 221 } 222 } 223 224 /* Check all file descriptors in the set whether one is 'ready' now */ 225 for (fd = 0; fd < nfds; fd++) { 226 /* Again, check for involuntarily selected fd's */ 227 if (!(ops = tab2ops(fd, se))) 228 continue; /* No operations set; nothing to do for this fd */ 229 230 /* File descriptors selected for reading that are not opened for 231 * reading should be marked as readable, as read calls would fail 232 * immediately. The same applies to writing. 233 */ 234 f = se->filps[fd]; 235 if ((ops & SEL_RD) && !(f->filp_mode & R_BIT)) { 236 ops2tab(SEL_RD, fd, se); 237 ops &= ~SEL_RD; 238 } 239 if ((ops & SEL_WR) && !(f->filp_mode & W_BIT)) { 240 ops2tab(SEL_WR, fd, se); 241 ops &= ~SEL_WR; 242 } 243 /* Test filp for select operations if not already done so. e.g., 244 * processes sharing a filp and both doing a select on that filp. */ 245 if ((f->filp_select_ops & ops) != ops) { 246 int wantops; 247 248 wantops = (f->filp_select_ops |= ops); 249 type = se->type[fd]; 250 select_lock_filp(f, wantops); 251 r = fdtypes[type].select_request(f, &wantops, se->block, fp); 252 unlock_filp(f); 253 if (r != OK && r != SUSPEND) { 254 se->error = r; 255 break; /* Error or bogus return code; abort */ 256 } 257 258 /* The select request above might have turned on/off some 259 * operations because they were 'ready' or not meaningful. 260 * Either way, we might have a result and we need to store them 261 * in the select table entry. */ 262 if (wantops & ops) ops2tab(wantops, fd, se); 263 } 264 } 265 266 /* At this point there won't be any blocking calls anymore. */ 267 se->starting = FALSE; 268 269 if ((se->nreadyfds > 0 || se->error != OK || !se->block) && 270 !is_deferred(se)) { 271 /* An error occurred, or fd's were found that were ready to go right 272 * away, and/or we were instructed not to block at all. Must return 273 * immediately. Do not copy FD sets if an error occurred. 274 */ 275 if (se->error != OK) 276 r = se->error; 277 else 278 r = copy_fdsets(se, se->nfds, TO_PROC); 279 select_cancel_all(se); 280 se->requestor = NULL; 281 282 if (r != OK) 283 return(r); 284 return(se->nreadyfds); 285 } 286 287 /* Convert timeval to ticks and set the timer. If it fails, undo 288 * all, return error. 289 */ 290 if (do_timeout && se->block) { 291 /* Open Group: 292 * "If the requested timeout interval requires a finer 293 * granularity than the implementation supports, the 294 * actual timeout interval shall be rounded up to the next 295 * supported value." 296 */ 297 if (timeout.tv_sec >= (TMRDIFF_MAX - 1) / system_hz) { 298 ticks = TMRDIFF_MAX; /* silently truncate */ 299 } else { 300 ticks = timeout.tv_sec * system_hz + 301 (timeout.tv_usec * system_hz + USECPERSEC-1) / USECPERSEC; 302 } 303 assert(ticks != 0 && ticks <= TMRDIFF_MAX); 304 se->expiry = ticks; 305 set_timer(&se->timer, ticks, select_timeout_check, s); 306 } 307 308 /* process now blocked */ 309 suspend(FP_BLOCKED_ON_SELECT); 310 return(SUSPEND); 311 } 312 313 /*===========================================================================* 314 * is_deferred * 315 *===========================================================================*/ 316 static int is_deferred(struct selectentry *se) 317 { 318 /* Find out whether this select has pending initial replies */ 319 320 int fd; 321 struct filp *f; 322 323 /* The select call must have finished its initialization at all. */ 324 if (se->starting) return(TRUE); 325 326 for (fd = 0; fd < se->nfds; fd++) { 327 if ((f = se->filps[fd]) == NULL) continue; 328 if (f->filp_select_flags & (FSF_UPDATE|FSF_BUSY)) return(TRUE); 329 } 330 331 return(FALSE); 332 } 333 334 335 /*===========================================================================* 336 * is_regular_file * 337 *===========================================================================*/ 338 static int is_regular_file(struct filp *f) 339 { 340 return(f && f->filp_vno && S_ISREG(f->filp_vno->v_mode)); 341 } 342 343 /*===========================================================================* 344 * is_pipe * 345 *===========================================================================*/ 346 static int is_pipe(struct filp *f) 347 { 348 /* Recognize either anonymous pipe or named pipe (FIFO) */ 349 return(f && f->filp_vno && S_ISFIFO(f->filp_vno->v_mode)); 350 } 351 352 /*===========================================================================* 353 * is_char_device * 354 *===========================================================================*/ 355 static int is_char_device(struct filp *f) 356 { 357 /* See if this filp is a handle on a character device. This function MUST NOT 358 * block its calling thread. The given filp may or may not be locked. 359 */ 360 361 return (f && f->filp_vno && S_ISCHR(f->filp_vno->v_mode)); 362 } 363 364 /*===========================================================================* 365 * select_request_char * 366 *===========================================================================*/ 367 static int select_request_char(struct filp *f, int *ops, int block, 368 struct fproc *rfp) 369 { 370 /* Check readiness status on a character device. Unless suitable results are 371 * available right now, this will only initiate the polling process, causing 372 * result processing to be deferred. This function MUST NOT block its calling 373 * thread. The given filp may or may not be locked. 374 */ 375 dev_t dev; 376 int r, rops; 377 struct dmap *dp; 378 379 /* Start by remapping the device node number to a "real" device number. Those 380 * two are different only for CTTY_MAJOR aka /dev/tty, but that one single 381 * exception requires quite some extra effort here: the select code matches 382 * character driver replies to their requests based on the device number, so 383 * it needs to be aware that device numbers may be mapped. The idea is to 384 * perform the mapping once and store the result in the filp object, so that 385 * at least we don't run into problems when a process loses its controlling 386 * terminal while doing a select (see also free_proc). It should be noted 387 * that it is possible that multiple processes share the same /dev/tty filp, 388 * and they may not all have a controlling terminal. The ctty-less processes 389 * should never pass the mapping; a more problematic case is checked below. 390 * 391 * The cdev_map call also checks the major number for rough validity, so that 392 * we can use it to index the dmap array safely a bit later. 393 */ 394 if ((dev = cdev_map(f->filp_vno->v_sdev, rfp)) == NO_DEV) 395 return(ENXIO); 396 397 if (f->filp_char_select_dev != NO_DEV && f->filp_char_select_dev != dev) { 398 /* Currently, this case can occur as follows: a process with a 399 * controlling terminal opens /dev/tty and forks, the new child starts 400 * a new session, opens a new controlling terminal, and both parent and 401 * child call select on the /dev/tty file descriptor. If this case ever 402 * becomes real, a better solution may be to force-close a filp for 403 * /dev/tty when a new controlling terminal is opened. 404 */ 405 printf("VFS: file pointer has multiple controlling TTYs!\n"); 406 return(EIO); 407 } 408 f->filp_char_select_dev = dev; /* set before possibly suspending */ 409 410 rops = *ops; 411 412 /* By default, nothing to do */ 413 *ops = 0; 414 415 /* 416 * If we have previously asked the driver to notify us about certain ready 417 * operations, but it has not notified us yet, then we can safely assume that 418 * those operations are not ready right now. Therefore, if this call is not 419 * supposed to block, we can disregard the pending operations as not ready. 420 * We must make absolutely sure that the flags are "stable" right now though: 421 * we are neither waiting to query the driver about them (FSF_UPDATE) nor 422 * querying the driver about them right now (FSF_BUSY). This is a dangerous 423 * case of premature optimization and may be removed altogether if it proves 424 * to continue to be a source of bugs. 425 */ 426 if (!block && !(f->filp_select_flags & (FSF_UPDATE | FSF_BUSY)) && 427 (f->filp_select_flags & FSF_BLOCKED)) { 428 if ((rops & SEL_RD) && (f->filp_select_flags & FSF_RD_BLOCK)) 429 rops &= ~SEL_RD; 430 if ((rops & SEL_WR) && (f->filp_select_flags & FSF_WR_BLOCK)) 431 rops &= ~SEL_WR; 432 if ((rops & SEL_ERR) && (f->filp_select_flags & FSF_ERR_BLOCK)) 433 rops &= ~SEL_ERR; 434 if (!(rops & (SEL_RD|SEL_WR|SEL_ERR))) 435 return(OK); 436 } 437 438 f->filp_select_flags |= FSF_UPDATE; 439 if (block) { 440 rops |= SEL_NOTIFY; 441 if (rops & SEL_RD) f->filp_select_flags |= FSF_RD_BLOCK; 442 if (rops & SEL_WR) f->filp_select_flags |= FSF_WR_BLOCK; 443 if (rops & SEL_ERR) f->filp_select_flags |= FSF_ERR_BLOCK; 444 } 445 446 if (f->filp_select_flags & FSF_BUSY) 447 return(SUSPEND); 448 449 dp = &dmap[major(dev)]; 450 if (dp->dmap_sel_busy) 451 return(SUSPEND); 452 453 f->filp_select_flags &= ~FSF_UPDATE; 454 r = cdev_select(dev, rops); 455 if (r != OK) 456 return(r); 457 458 dp->dmap_sel_busy = TRUE; 459 dp->dmap_sel_filp = f; 460 f->filp_select_flags |= FSF_BUSY; 461 462 return(SUSPEND); 463 } 464 465 /*===========================================================================* 466 * select_request_file * 467 *===========================================================================*/ 468 static int select_request_file(struct filp *UNUSED(f), int *UNUSED(ops), 469 int UNUSED(block), struct fproc *UNUSED(rfp)) 470 { 471 /* Files are always ready, so output *ops is input *ops */ 472 return(OK); 473 } 474 475 /*===========================================================================* 476 * select_request_pipe * 477 *===========================================================================*/ 478 static int select_request_pipe(struct filp *f, int *ops, int block, 479 struct fproc *UNUSED(rfp)) 480 { 481 /* Check readiness status on a pipe. The given filp is locked. This function 482 * may block its calling thread if necessary. 483 */ 484 int orig_ops, r = 0, err; 485 486 orig_ops = *ops; 487 488 if ((*ops & (SEL_RD|SEL_ERR))) { 489 /* Check if we can read 1 byte */ 490 err = pipe_check(f, READING, f->filp_flags & ~O_NONBLOCK, 1, 491 1 /* Check only */); 492 493 if (err != SUSPEND) 494 r |= SEL_RD; 495 if (err < 0 && err != SUSPEND) 496 r |= SEL_ERR; 497 } 498 499 if ((*ops & (SEL_WR|SEL_ERR))) { 500 /* Check if we can write 1 byte */ 501 err = pipe_check(f, WRITING, f->filp_flags & ~O_NONBLOCK, 1, 502 1 /* Check only */); 503 504 if (err != SUSPEND) 505 r |= SEL_WR; 506 if (err < 0 && err != SUSPEND) 507 r |= SEL_ERR; 508 } 509 510 /* Some options we collected might not be requested. */ 511 *ops = r & orig_ops; 512 513 if (!*ops && block) 514 f->filp_pipe_select_ops |= orig_ops; 515 516 return(OK); 517 } 518 519 /*===========================================================================* 520 * tab2ops * 521 *===========================================================================*/ 522 static int tab2ops(int fd, struct selectentry *e) 523 { 524 int ops = 0; 525 if (FD_ISSET(fd, &e->readfds)) ops |= SEL_RD; 526 if (FD_ISSET(fd, &e->writefds)) ops |= SEL_WR; 527 if (FD_ISSET(fd, &e->errorfds)) ops |= SEL_ERR; 528 529 return(ops); 530 } 531 532 533 /*===========================================================================* 534 * ops2tab * 535 *===========================================================================*/ 536 static void ops2tab(int ops, int fd, struct selectentry *e) 537 { 538 if ((ops & SEL_RD) && e->vir_readfds && FD_ISSET(fd, &e->readfds) && 539 !FD_ISSET(fd, &e->ready_readfds)) { 540 FD_SET(fd, &e->ready_readfds); 541 e->nreadyfds++; 542 } 543 544 if ((ops & SEL_WR) && e->vir_writefds && FD_ISSET(fd, &e->writefds) && 545 !FD_ISSET(fd, &e->ready_writefds)) { 546 FD_SET(fd, &e->ready_writefds); 547 e->nreadyfds++; 548 } 549 550 if ((ops & SEL_ERR) && e->vir_errorfds && FD_ISSET(fd, &e->errorfds) && 551 !FD_ISSET(fd, &e->ready_errorfds)) { 552 FD_SET(fd, &e->ready_errorfds); 553 e->nreadyfds++; 554 } 555 } 556 557 558 /*===========================================================================* 559 * copy_fdsets * 560 *===========================================================================*/ 561 static int copy_fdsets(struct selectentry *se, int nfds, int direction) 562 { 563 /* Copy FD sets from or to the user process calling select(2). This function 564 * MUST NOT block the calling thread. 565 */ 566 int r; 567 size_t fd_setsize; 568 endpoint_t src_e, dst_e; 569 fd_set *src_fds, *dst_fds; 570 571 if (nfds < 0 || nfds > OPEN_MAX) 572 panic("select copy_fdsets: nfds wrong: %d", nfds); 573 574 /* Only copy back as many bits as the user expects. */ 575 fd_setsize = (size_t) (howmany(nfds, __NFDBITS) * sizeof(__fd_mask)); 576 577 /* Set source and destination endpoints */ 578 src_e = (direction == FROM_PROC) ? se->req_endpt : SELF; 579 dst_e = (direction == FROM_PROC) ? SELF : se->req_endpt; 580 581 /* read set */ 582 src_fds = (direction == FROM_PROC) ? se->vir_readfds : &se->ready_readfds; 583 dst_fds = (direction == FROM_PROC) ? &se->readfds : se->vir_readfds; 584 if (se->vir_readfds) { 585 r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e, 586 (vir_bytes) dst_fds, fd_setsize); 587 if (r != OK) return(r); 588 } 589 590 /* write set */ 591 src_fds = (direction == FROM_PROC) ? se->vir_writefds : &se->ready_writefds; 592 dst_fds = (direction == FROM_PROC) ? &se->writefds : se->vir_writefds; 593 if (se->vir_writefds) { 594 r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e, 595 (vir_bytes) dst_fds, fd_setsize); 596 if (r != OK) return(r); 597 } 598 599 /* error set */ 600 src_fds = (direction == FROM_PROC) ? se->vir_errorfds : &se->ready_errorfds; 601 dst_fds = (direction == FROM_PROC) ? &se->errorfds : se->vir_errorfds; 602 if (se->vir_errorfds) { 603 r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e, 604 (vir_bytes) dst_fds, fd_setsize); 605 if (r != OK) return(r); 606 } 607 608 return(OK); 609 } 610 611 612 /*===========================================================================* 613 * select_cancel_all * 614 *===========================================================================*/ 615 static void select_cancel_all(struct selectentry *se) 616 { 617 /* Cancel select, possibly on success. Decrease select usage and cancel timer. 618 * This function MUST NOT block its calling thread. 619 */ 620 621 int fd; 622 struct filp *f; 623 624 for (fd = 0; fd < se->nfds; fd++) { 625 if ((f = se->filps[fd]) == NULL) continue; 626 se->filps[fd] = NULL; 627 select_cancel_filp(f); 628 } 629 630 if (se->expiry > 0) { 631 cancel_timer(&se->timer); 632 se->expiry = 0; 633 } 634 635 se->requestor = NULL; 636 } 637 638 /*===========================================================================* 639 * select_cancel_filp * 640 *===========================================================================*/ 641 static void select_cancel_filp(struct filp *f) 642 { 643 /* Reduce the number of select users of this filp. This function MUST NOT block 644 * its calling thread. 645 */ 646 devmajor_t major; 647 648 assert(f); 649 assert(f->filp_selectors > 0); 650 assert(f->filp_count > 0); 651 652 f->filp_selectors--; 653 if (f->filp_selectors == 0) { 654 /* No one selecting on this filp anymore, forget about select state */ 655 f->filp_select_ops = 0; 656 f->filp_select_flags = 0; 657 f->filp_pipe_select_ops = 0; 658 659 /* If this filp is the subject of an ongoing select query to a 660 * character device, mark the query as stale, so that this filp will 661 * not be checked when the result arrives. The filp select device may 662 * still be NO_DEV if do_select fails on the initial fd check. 663 */ 664 if (is_char_device(f) && f->filp_char_select_dev != NO_DEV) { 665 major = major(f->filp_char_select_dev); 666 if (dmap[major].dmap_sel_busy && 667 dmap[major].dmap_sel_filp == f) 668 dmap[major].dmap_sel_filp = NULL; /* leave _busy set */ 669 f->filp_char_select_dev = NO_DEV; 670 } 671 } 672 } 673 674 /*===========================================================================* 675 * select_return * 676 *===========================================================================*/ 677 static void select_return(struct selectentry *se) 678 { 679 /* Return the results of a select call to the user process and revive the 680 * process. This function MUST NOT block its calling thread. 681 */ 682 int r; 683 684 assert(!is_deferred(se)); /* Not done yet, first wait for async reply */ 685 686 select_cancel_all(se); 687 688 if (se->error != OK) 689 r = se->error; 690 else 691 r = copy_fdsets(se, se->nfds, TO_PROC); 692 if (r == OK) 693 r = se->nreadyfds; 694 695 revive(se->req_endpt, r); 696 } 697 698 699 /*===========================================================================* 700 * select_callback * 701 *===========================================================================*/ 702 void select_callback(struct filp *f, int status) 703 { 704 /* The status of a filp has changed, with the given ready operations or error. 705 * This function is currently called only for pipes, and holds the lock to 706 * the filp. 707 */ 708 709 filp_status(f, status); 710 } 711 712 /*===========================================================================* 713 * init_select * 714 *===========================================================================*/ 715 void init_select(void) 716 { 717 int s; 718 719 for (s = 0; s < MAXSELECTS; s++) 720 init_timer(&selecttab[s].timer); 721 } 722 723 724 /*===========================================================================* 725 * select_forget * 726 *===========================================================================*/ 727 void select_forget(void) 728 { 729 /* The calling thread's associated process is expected to be unpaused, due to 730 * a signal that is supposed to interrupt the current system call. Totally 731 * forget about the select(). This function may block its calling thread if 732 * necessary (but it doesn't). 733 */ 734 int slot; 735 struct selectentry *se; 736 737 for (slot = 0; slot < MAXSELECTS; slot++) { 738 se = &selecttab[slot]; 739 if (se->requestor == fp) 740 break; 741 } 742 743 if (slot >= MAXSELECTS) return; /* Entry not found */ 744 745 assert(se->starting == FALSE); 746 747 /* Do NOT test on is_deferred here. We can safely cancel ongoing queries. */ 748 select_cancel_all(se); 749 } 750 751 752 /*===========================================================================* 753 * select_timeout_check * 754 *===========================================================================*/ 755 void select_timeout_check(int s) 756 { 757 /* An alarm has gone off for one of the select queries. This function MUST NOT 758 * block its calling thread. 759 */ 760 struct selectentry *se; 761 762 if (s < 0 || s >= MAXSELECTS) return; /* Entry does not exist */ 763 764 se = &selecttab[s]; 765 if (se->requestor == NULL) return; 766 if (se->expiry == 0) return; /* Strange, did we even ask for a timeout? */ 767 se->expiry = 0; 768 if (!is_deferred(se)) 769 select_return(se); 770 else 771 se->block = 0; /* timer triggered "too soon", treat as nonblocking */ 772 } 773 774 775 /*===========================================================================* 776 * select_unsuspend_by_endpt * 777 *===========================================================================*/ 778 void select_unsuspend_by_endpt(endpoint_t proc_e) 779 { 780 /* Revive blocked processes when a driver has disappeared */ 781 devmajor_t major; 782 int fd, s; 783 struct selectentry *se; 784 struct filp *f; 785 786 for (s = 0; s < MAXSELECTS; s++) { 787 int wakehim = 0; 788 se = &selecttab[s]; 789 if (se->requestor == NULL) continue; 790 if (se->requestor->fp_endpoint == proc_e) { 791 assert(se->requestor->fp_flags & FP_EXITING); 792 select_cancel_all(se); 793 continue; 794 } 795 796 for (fd = 0; fd < se->nfds; fd++) { 797 if ((f = se->filps[fd]) == NULL || !is_char_device(f)) 798 continue; 799 800 assert(f->filp_char_select_dev != NO_DEV); 801 major = major(f->filp_char_select_dev); 802 if (dmap_driver_match(proc_e, major)) { 803 se->filps[fd] = NULL; 804 se->error = EIO; 805 select_cancel_filp(f); 806 wakehim = 1; 807 } 808 } 809 810 if (wakehim && !is_deferred(se)) 811 select_return(se); 812 } 813 } 814 815 /*===========================================================================* 816 * select_reply1 * 817 *===========================================================================*/ 818 void select_reply1(endpoint_t driver_e, devminor_t minor, int status) 819 { 820 /* Handle the initial reply to CDEV_SELECT request. This function MUST NOT 821 * block its calling thread. 822 */ 823 devmajor_t major; 824 dev_t dev; 825 struct filp *f; 826 struct dmap *dp; 827 828 /* Figure out which device is replying */ 829 if ((dp = get_dmap(driver_e)) == NULL) return; 830 831 major = dp-dmap; 832 dev = makedev(major, minor); 833 834 /* Get filp belonging to character special file */ 835 if (!dp->dmap_sel_busy) { 836 printf("VFS (%s:%d): major %d was not expecting a CDEV_SELECT reply\n", 837 __FILE__, __LINE__, major); 838 return; 839 } 840 841 /* The select filp may have been set to NULL if the requestor has been 842 * unpaused in the meantime. In that case, we ignore the result, but we do 843 * look for other filps to restart later. 844 */ 845 if ((f = dp->dmap_sel_filp) != NULL) { 846 /* Find vnode and check we got a reply from the device we expected */ 847 assert(is_char_device(f)); 848 assert(f->filp_char_select_dev != NO_DEV); 849 if (f->filp_char_select_dev != dev) { 850 /* This should never happen. The driver may be misbehaving. 851 * For now we assume that the reply we want will arrive later.. 852 */ 853 printf("VFS (%s:%d): expected reply from dev %llx not %llx\n", 854 __FILE__, __LINE__, f->filp_char_select_dev, dev); 855 return; 856 } 857 } 858 859 /* No longer waiting for a reply from this device */ 860 dp->dmap_sel_busy = FALSE; 861 dp->dmap_sel_filp = NULL; 862 863 /* Process the select result only if the filp is valid. */ 864 if (f != NULL) { 865 assert(f->filp_count >= 1); 866 assert(f->filp_select_flags & FSF_BUSY); 867 868 f->filp_select_flags &= ~FSF_BUSY; 869 870 /* The select call is done now, except when 871 * - another process started a select on the same filp with possibly a 872 * different set of operations. 873 * - a process does a select on the same filp but using different file 874 * descriptors. 875 * - the select has a timeout. Upon receiving this reply the operations 876 * might not be ready yet, so we want to wait for that to ultimately 877 * happen. 878 * Therefore we need to keep remembering what the operations are. 879 */ 880 if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED))) 881 f->filp_select_ops = 0; /* done selecting */ 882 else if (status > 0 && !(f->filp_select_flags & FSF_UPDATE)) 883 /* there may be operations pending */ 884 f->filp_select_ops &= ~status; 885 886 /* Record new filp status */ 887 if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) { 888 if (status > 0) { /* operations ready */ 889 if (status & SEL_RD) 890 f->filp_select_flags &= ~FSF_RD_BLOCK; 891 if (status & SEL_WR) 892 f->filp_select_flags &= ~FSF_WR_BLOCK; 893 if (status & SEL_ERR) 894 f->filp_select_flags &= ~FSF_ERR_BLOCK; 895 } else if (status < 0) { /* error */ 896 /* Always unblock upon error */ 897 f->filp_select_flags &= ~FSF_BLOCKED; 898 } 899 } 900 901 filp_status(f, status); /* Tell filp owners about the results */ 902 } 903 904 select_restart_filps(); 905 } 906 907 908 /*===========================================================================* 909 * select_reply2 * 910 *===========================================================================*/ 911 void select_reply2(endpoint_t driver_e, devminor_t minor, int status) 912 { 913 /* Handle secondary reply to DEV_SELECT request. A secondary reply occurs when 914 * the select request is 'blocking' until an operation becomes ready. This 915 * function MUST NOT block its calling thread. 916 */ 917 int slot, found, fd; 918 devmajor_t major; 919 dev_t dev; 920 struct filp *f; 921 struct dmap *dp; 922 struct selectentry *se; 923 924 if (status == 0) { 925 printf("VFS (%s:%d): weird status (%d) to report\n", 926 __FILE__, __LINE__, status); 927 return; 928 } 929 930 /* Figure out which device is replying */ 931 if ((dp = get_dmap(driver_e)) == NULL) { 932 printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n", 933 __FILE__, __LINE__, driver_e); 934 return; 935 } 936 major = dp-dmap; 937 dev = makedev(major, minor); 938 939 /* Find all file descriptors selecting for this device */ 940 for (slot = 0; slot < MAXSELECTS; slot++) { 941 se = &selecttab[slot]; 942 if (se->requestor == NULL) continue; /* empty slot */ 943 944 found = FALSE; 945 for (fd = 0; fd < se->nfds; fd++) { 946 if ((f = se->filps[fd]) == NULL) continue; 947 if (!is_char_device(f)) continue; 948 assert(f->filp_char_select_dev != NO_DEV); 949 if (f->filp_char_select_dev != dev) continue; 950 951 if (status > 0) { /* Operations ready */ 952 /* Clear the replied bits from the request 953 * mask unless FSF_UPDATE is set. 954 */ 955 if (!(f->filp_select_flags & FSF_UPDATE)) 956 f->filp_select_ops &= ~status; 957 if (status & SEL_RD) 958 f->filp_select_flags &= ~FSF_RD_BLOCK; 959 if (status & SEL_WR) 960 f->filp_select_flags &= ~FSF_WR_BLOCK; 961 if (status & SEL_ERR) 962 f->filp_select_flags &= ~FSF_ERR_BLOCK; 963 964 ops2tab(status, fd, se); 965 } else { 966 f->filp_select_flags &= ~FSF_BLOCKED; 967 se->error = status; 968 } 969 found = TRUE; 970 } 971 /* Even if 'found' is set now, nothing may have changed for this call, 972 * as it may not have been interested in the operations that were 973 * reported as ready. Let restart_proc check. 974 */ 975 if (found) 976 restart_proc(se); 977 } 978 979 select_restart_filps(); 980 } 981 982 /*===========================================================================* 983 * select_restart_filps * 984 *===========================================================================*/ 985 static void select_restart_filps(void) 986 { 987 /* We got a result from a character driver, and now we need to check if we can 988 * restart deferred polling operations. This function MUST NOT block its 989 * calling thread. 990 */ 991 int fd, slot; 992 struct filp *f; 993 struct selectentry *se; 994 995 /* Locate filps that can be restarted */ 996 for (slot = 0; slot < MAXSELECTS; slot++) { 997 se = &selecttab[slot]; 998 if (se->requestor == NULL) continue; /* empty slot */ 999 1000 /* Only 'deferred' processes are eligible to restart */ 1001 if (!is_deferred(se)) continue; 1002 1003 /* Find filps that are not waiting for a reply, but have an updated 1004 * status (i.e., another select on the same filp with possibly a 1005 * different set of operations is to be done), and thus requires the 1006 * select request to be sent again). 1007 */ 1008 for (fd = 0; fd < se->nfds; fd++) { 1009 int r, wantops, ops; 1010 if ((f = se->filps[fd]) == NULL) continue; 1011 if (f->filp_select_flags & FSF_BUSY) /* Still waiting for */ 1012 continue; /* initial reply */ 1013 if (!(f->filp_select_flags & FSF_UPDATE)) /* Must be in */ 1014 continue; /* 'update' state */ 1015 1016 /* This function is suitable only for character devices. In 1017 * particular, checking pipes the same way would introduce a 1018 * serious locking problem. 1019 */ 1020 assert(is_char_device(f)); 1021 1022 wantops = ops = f->filp_select_ops; 1023 r = select_request_char(f, &wantops, se->block, se->requestor); 1024 if (r != OK && r != SUSPEND) { 1025 se->error = r; 1026 restart_proc(se); 1027 break; /* Error or bogus return code; abort */ 1028 } 1029 if (wantops & ops) ops2tab(wantops, fd, se); 1030 } 1031 } 1032 } 1033 1034 /*===========================================================================* 1035 * filp_status * 1036 *===========================================================================*/ 1037 static void 1038 filp_status(struct filp *f, int status) 1039 { 1040 /* Tell processes that need to know about the status of this filp. This 1041 * function MUST NOT block its calling thread. 1042 */ 1043 int fd, slot, found; 1044 struct selectentry *se; 1045 1046 for (slot = 0; slot < MAXSELECTS; slot++) { 1047 se = &selecttab[slot]; 1048 if (se->requestor == NULL) continue; /* empty slot */ 1049 1050 found = FALSE; 1051 for (fd = 0; fd < se->nfds; fd++) { 1052 if (se->filps[fd] != f) continue; 1053 if (status < 0) 1054 se->error = status; 1055 else 1056 ops2tab(status, fd, se); 1057 found = TRUE; 1058 } 1059 if (found) 1060 restart_proc(se); 1061 } 1062 } 1063 1064 /*===========================================================================* 1065 * restart_proc * 1066 *===========================================================================*/ 1067 static void 1068 restart_proc(struct selectentry *se) 1069 { 1070 /* Tell process about select results (if any) unless there are still results 1071 * pending. This function MUST NOT block its calling thread. 1072 */ 1073 1074 if ((se->nreadyfds > 0 || se->error != OK || !se->block) && !is_deferred(se)) 1075 select_return(se); 1076 } 1077 1078 /*===========================================================================* 1079 * wipe_select * 1080 *===========================================================================*/ 1081 static void wipe_select(struct selectentry *se) 1082 { 1083 se->nfds = 0; 1084 se->nreadyfds = 0; 1085 se->error = OK; 1086 se->block = 0; 1087 memset(se->filps, 0, sizeof(se->filps)); 1088 1089 FD_ZERO(&se->readfds); 1090 FD_ZERO(&se->writefds); 1091 FD_ZERO(&se->errorfds); 1092 FD_ZERO(&se->ready_readfds); 1093 FD_ZERO(&se->ready_writefds); 1094 FD_ZERO(&se->ready_errorfds); 1095 } 1096 1097 /*===========================================================================* 1098 * select_lock_filp * 1099 *===========================================================================*/ 1100 static void select_lock_filp(struct filp *f, int ops) 1101 { 1102 /* Lock a filp and vnode based on which operations are requested. This function 1103 * may block its calling thread, obviously. 1104 */ 1105 tll_access_t locktype; 1106 1107 locktype = VNODE_READ; /* By default */ 1108 1109 if (ops & (SEL_WR|SEL_ERR)) 1110 /* Selecting for error or writing requires exclusive access */ 1111 locktype = VNODE_WRITE; 1112 1113 lock_filp(f, locktype); 1114 } 1115 1116 /* 1117 * Dump the state of the entire select table, for debugging purposes. 1118 */ 1119 void 1120 select_dump(void) 1121 { 1122 struct selectentry *se; 1123 struct filp *f; 1124 struct dmap *dp; 1125 dev_t dev; 1126 int s, fd; 1127 1128 for (s = 0; s < MAXSELECTS; s++) { 1129 se = &selecttab[s]; 1130 if (se->requestor == NULL) 1131 continue; 1132 1133 printf("select %d: endpt %d nfds %d nreadyfds %d error %d " 1134 "block %d starting %d expiry %u is_deferred %d\n", 1135 s, se->req_endpt, se->nfds, se->nreadyfds, se->error, 1136 se->block, se->starting, se->expiry, is_deferred(se)); 1137 1138 for (fd = 0; !se->starting && fd < se->nfds; fd++) { 1139 /* Save on output: do not print NULL filps at all. */ 1140 if ((f = se->filps[fd]) == NULL) 1141 continue; 1142 1143 printf("- [%d] filp %p flags %x type ", fd, f, 1144 f->filp_select_flags); 1145 if (is_regular_file(f)) 1146 printf("regular\n"); 1147 else if (is_pipe(f)) 1148 printf("pipe\n"); 1149 else if (is_char_device(f)) { 1150 dev = cdev_map(f->filp_vno->v_sdev, 1151 se->requestor); 1152 printf("char (dev <%d,%d>, dmap ", 1153 major(dev), minor(dev)); 1154 if (dev != NO_DEV) { 1155 dp = &dmap[major(dev)]; 1156 printf("busy %d filp %p)\n", 1157 dp->dmap_sel_busy, 1158 dp->dmap_sel_filp); 1159 } else 1160 printf("unknown)\n"); 1161 } else 1162 printf("unknown\n"); 1163 } 1164 } 1165 } 1166