1 /* 2 * This file implements the lower socket layer of VFS: communication with 3 * socket drivers. Socket driver communication evolved out of character driver 4 * communication, and the two have many similarities between them. Most 5 * importantly, socket driver communication also has the distinction between 6 * short-lived and long-lived requests. 7 * 8 * Short-lived requests are expected to be replied to by the socket driver 9 * immediately in all cases. For such requests, VFS keeps the worker thread 10 * for the calling process alive until the reply arrives. In contrast, 11 * long-lived requests may block. For such requests, VFS suspends the calling 12 * process until a reply comes in, or until a signal interrupts the request. 13 * Both short-lived and long-lived requests may be aborted if VFS finds that 14 * the corresponding socket driver has died. Even though long-lived requests 15 * may be marked as nonblocking, nonblocking calls are still handled as 16 * long-lived in terms of VFS processing. 17 * 18 * For an overview of the socket driver requests and replies, message layouts, 19 * and which requests are long-lived or short-lived (i.e. may suspend or not), 20 * please refer to the corresponding table in the libsockdriver source code. 21 * 22 * For most long-lived socket requests, the main VFS thread processes the reply 23 * from the socket driver. This typically consists of waking up the user 24 * process that originally issued the system call on the socket by simply 25 * relaying the call's result code. Some socket calls require a specific reply 26 * message and/or additional post-call actions; for those, resume_*() calls are 27 * made back into the upper socket layer. 28 * 29 * If a process is interrupted by a signal, any ongoing long-lived socket 30 * request must be canceled. This is done by sending a one-way cancel request 31 * to the socket driver, and waiting for it to reply to the original request. 32 * In this case, the reply will be processed from the worker thread that is 33 * handling the cancel operation. Canceling does not imply call failure: the 34 * cancellation may result in a partial I/O reply, and a successful reply may 35 * cross the cancel request. 36 * 37 * One main exception is the reply to an accept request. Once a connection has 38 * been accepted, a new socket has to be created for it. This requires actions 39 * that require the ability to block the current thread, and so, a worker 40 * thread is spawned for processing successful accept replies, unless the reply 41 * was received from a worker thread already (as may be the case if the accept 42 * request was being canceled). 43 */ 44 45 #include "fs.h" 46 #include <sys/socket.h> 47 #include <minix/callnr.h> 48 49 /* 50 * Send a short-lived request message to the given socket driver, and suspend 51 * the current worker thread until a reply message has been received. On 52 * success, the function will return OK, and the reply message will be stored 53 * in the message structure pointed to by 'm_ptr'. The function may fail if 54 * the socket driver dies before sending a reply. In that case, the function 55 * will return a negative error code, and also store the same negative error 56 * code in the m_type field of the 'm_ptr' message structure. 57 */ 58 static int 59 sdev_sendrec(struct smap * sp, message * m_ptr) 60 { 61 int r; 62 63 /* Send the request to the driver. */ 64 if ((r = asynsend3(sp->smap_endpt, m_ptr, AMF_NOREPLY)) != OK) 65 panic("VFS: asynsend in sdev_sendrec failed: %d", r); 66 67 /* Suspend this thread until we have received the response. */ 68 self->w_task = sp->smap_endpt; 69 self->w_drv_sendrec = m_ptr; 70 71 worker_wait(); 72 73 self->w_task = NONE; 74 assert(self->w_drv_sendrec == NULL); 75 76 return (!IS_SDEV_RS(m_ptr->m_type)) ? m_ptr->m_type : OK; 77 } 78 79 /* 80 * Suspend the current process for later completion of its system call. 81 */ 82 int 83 sdev_suspend(dev_t dev, cp_grant_id_t grant0, cp_grant_id_t grant1, 84 cp_grant_id_t grant2, int fd, vir_bytes buf) 85 { 86 87 fp->fp_sdev.dev = dev; 88 fp->fp_sdev.callnr = job_call_nr; 89 fp->fp_sdev.grant[0] = grant0; 90 fp->fp_sdev.grant[1] = grant1; 91 fp->fp_sdev.grant[2] = grant2; 92 93 if (job_call_nr == VFS_ACCEPT) { 94 assert(fd != -1); 95 assert(buf == 0); 96 fp->fp_sdev.aux.fd = fd; 97 } else if (job_call_nr == VFS_RECVMSG) { 98 assert(fd == -1); 99 /* 100 * TODO: we are not yet consistent enough in dealing with 101 * mapped NULL pages to have an assert(buf != 0) here.. 102 */ 103 fp->fp_sdev.aux.buf = buf; 104 } else { 105 assert(fd == -1); 106 assert(buf == 0); 107 } 108 109 suspend(FP_BLOCKED_ON_SDEV); 110 return SUSPEND; 111 } 112 113 /* 114 * Create a socket or socket pair. Return OK on success, with the new socket 115 * device identifier(s) stored in the 'dev' array. Return an error code upon 116 * failure. 117 */ 118 int 119 sdev_socket(int domain, int type, int protocol, dev_t * dev, int pair) 120 { 121 struct smap *sp; 122 message m; 123 sockid_t sock_id, sock_id2; 124 int r; 125 126 /* We could return EAFNOSUPPORT, but the caller should have checked. */ 127 if ((sp = get_smap_by_domain(domain)) == NULL) 128 panic("VFS: sdev_socket for unknown domain"); 129 130 /* Prepare the request message. */ 131 memset(&m, 0, sizeof(m)); 132 m.m_type = pair ? SDEV_SOCKETPAIR : SDEV_SOCKET; 133 m.m_vfs_lsockdriver_socket.req_id = (sockid_t)who_e; 134 m.m_vfs_lsockdriver_socket.domain = domain; 135 m.m_vfs_lsockdriver_socket.type = type; 136 m.m_vfs_lsockdriver_socket.protocol = protocol; 137 m.m_vfs_lsockdriver_socket.user_endpt = who_e; 138 139 /* Send the request, and wait for the reply. */ 140 if ((r = sdev_sendrec(sp, &m)) != OK) 141 return r; /* socket driver died */ 142 143 /* Parse the reply message, and check for protocol errors. */ 144 if (m.m_type != SDEV_SOCKET_REPLY) { 145 printf("VFS: %d sent bad reply type %d for call %d\n", 146 sp->smap_endpt, m.m_type, job_call_nr); 147 return EIO; 148 } 149 150 sock_id = m.m_lsockdriver_vfs_socket_reply.sock_id; 151 sock_id2 = m.m_lsockdriver_vfs_socket_reply.sock_id2; 152 153 /* Check for regular errors. Upon success, return the socket(s). */ 154 if (sock_id < 0) 155 return sock_id; 156 157 dev[0] = make_smap_dev(sp, sock_id); 158 159 if (pair) { 160 /* Okay, one more protocol error. */ 161 if (sock_id2 < 0) { 162 printf("VFS: %d sent bad SOCKETPAIR socket ID %d\n", 163 sp->smap_endpt, sock_id2); 164 (void)sdev_close(dev[0], FALSE /*may_suspend*/); 165 return EIO; 166 } 167 168 dev[1] = make_smap_dev(sp, sock_id2); 169 } 170 171 return OK; 172 } 173 174 /* 175 * Bind or connect a socket to a particular address. These calls may block, so 176 * suspend the current process instead of making the thread wait for the reply. 177 */ 178 static int 179 sdev_bindconn(dev_t dev, int type, vir_bytes addr, unsigned int addr_len, 180 int filp_flags) 181 { 182 struct smap *sp; 183 sockid_t sock_id; 184 cp_grant_id_t grant; 185 message m; 186 int r; 187 188 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) 189 return EIO; 190 191 /* Allocate resources. */ 192 grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, addr_len, 193 CPF_READ); 194 if (!GRANT_VALID(grant)) 195 panic("VFS: cpf_grant_magic failed"); 196 197 /* Prepare the request message. */ 198 memset(&m, 0, sizeof(m)); 199 m.m_type = type; 200 m.m_vfs_lsockdriver_addr.req_id = (sockid_t)who_e; 201 m.m_vfs_lsockdriver_addr.sock_id = sock_id; 202 m.m_vfs_lsockdriver_addr.grant = grant; 203 m.m_vfs_lsockdriver_addr.len = addr_len; 204 m.m_vfs_lsockdriver_addr.user_endpt = who_e; 205 m.m_vfs_lsockdriver_addr.sflags = 206 (filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0; 207 208 /* Send the request to the driver. */ 209 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK) 210 panic("VFS: asynsend in sdev_bindconn failed: %d", r); 211 212 /* Suspend the process until the reply arrives. */ 213 return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID, -1, 0); 214 } 215 216 /* 217 * Bind a socket to a local address. 218 */ 219 int 220 sdev_bind(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags) 221 { 222 223 return sdev_bindconn(dev, SDEV_BIND, addr, addr_len, filp_flags); 224 } 225 226 /* 227 * Connect a socket to a remote address. 228 */ 229 int 230 sdev_connect(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags) 231 { 232 233 return sdev_bindconn(dev, SDEV_CONNECT, addr, addr_len, filp_flags); 234 } 235 236 /* 237 * Send and receive a "simple" request: listen, shutdown, or close. Note that 238 * while cancel requests use the same request format, they require a different 239 * way of handling their replies. 240 */ 241 static int 242 sdev_simple(dev_t dev, int type, int param) 243 { 244 struct smap *sp; 245 sockid_t sock_id; 246 message m; 247 int r; 248 249 assert(type == SDEV_LISTEN || type == SDEV_SHUTDOWN || 250 type == SDEV_CLOSE); 251 252 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) 253 return EIO; 254 255 /* Prepare the request message. */ 256 memset(&m, 0, sizeof(m)); 257 m.m_type = type; 258 m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e; 259 m.m_vfs_lsockdriver_simple.sock_id = sock_id; 260 m.m_vfs_lsockdriver_simple.param = param; 261 262 /* Send the request, and wait for the reply. */ 263 if ((r = sdev_sendrec(sp, &m)) != OK) 264 return r; /* socket driver died */ 265 266 /* Parse and return the reply. */ 267 if (m.m_type != SDEV_REPLY) { 268 printf("VFS: %d sent bad reply type %d for call %d\n", 269 sp->smap_endpt, m.m_type, job_call_nr); 270 return EIO; 271 } 272 273 return m.m_lsockdriver_vfs_reply.status; 274 } 275 276 /* 277 * Put a socket in listening mode. 278 */ 279 int 280 sdev_listen(dev_t dev, int backlog) 281 { 282 283 assert(backlog >= 0); 284 285 return sdev_simple(dev, SDEV_LISTEN, backlog); 286 } 287 288 /* 289 * Accept a new connection on a socket. 290 */ 291 int 292 sdev_accept(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags, 293 int listen_fd) 294 { 295 struct smap *sp; 296 sockid_t sock_id; 297 cp_grant_id_t grant; 298 message m; 299 int r; 300 301 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) 302 return EIO; 303 304 /* Allocate resources. */ 305 if (addr != 0) { 306 grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, addr_len, 307 CPF_WRITE); 308 if (!GRANT_VALID(grant)) 309 panic("VFS: cpf_grant_magic failed"); 310 } else 311 grant = GRANT_INVALID; 312 313 /* Prepare the request message. */ 314 memset(&m, 0, sizeof(m)); 315 m.m_type = SDEV_ACCEPT; 316 m.m_vfs_lsockdriver_addr.req_id = (sockid_t)who_e; 317 m.m_vfs_lsockdriver_addr.sock_id = sock_id; 318 m.m_vfs_lsockdriver_addr.grant = grant; 319 m.m_vfs_lsockdriver_addr.len = addr_len; 320 m.m_vfs_lsockdriver_addr.user_endpt = who_e; 321 m.m_vfs_lsockdriver_addr.sflags = 322 (filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0; 323 324 /* Send the request to the driver. */ 325 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK) 326 panic("VFS: asynsend in sdev_accept failed: %d", r); 327 328 /* Suspend the process until the reply arrives. */ 329 return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID, 330 listen_fd, 0); 331 } 332 333 /* 334 * Send or receive a message on a socket. All read (read(2), recvfrom(2), and 335 * recvmsg(2)) and write (write(2), sendto(2), sendmsg(2)) system calls on 336 * sockets pass through this function. The function is named sdev_readwrite 337 * rather than sdev_sendrecv to avoid confusion with sdev_sendrec. 338 */ 339 int 340 sdev_readwrite(dev_t dev, vir_bytes data_buf, size_t data_len, 341 vir_bytes ctl_buf, unsigned int ctl_len, vir_bytes addr_buf, 342 unsigned int addr_len, int flags, int rw_flag, int filp_flags, 343 vir_bytes user_buf) 344 { 345 struct smap *sp; 346 sockid_t sock_id; 347 cp_grant_id_t data_grant, ctl_grant, addr_grant; 348 message m; 349 int r, bits; 350 351 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) 352 return EIO; 353 354 /* Allocate resources. */ 355 data_grant = GRANT_INVALID; 356 ctl_grant = GRANT_INVALID; 357 addr_grant = GRANT_INVALID; 358 bits = (rw_flag == WRITING) ? CPF_READ : CPF_WRITE; 359 360 /* 361 * Supposedly it is allowed to send or receive zero data bytes, even 362 * though it is a bad idea as the return value will then be zero, which 363 * may also indicate EOF (as per W. Richard Stevens). 364 */ 365 if (data_buf != 0) { 366 data_grant = cpf_grant_magic(sp->smap_endpt, who_e, data_buf, 367 data_len, bits); 368 if (!GRANT_VALID(data_grant)) 369 panic("VFS: cpf_grant_magic failed"); 370 } 371 372 if (ctl_buf != 0) { 373 ctl_grant = cpf_grant_magic(sp->smap_endpt, who_e, ctl_buf, 374 ctl_len, bits); 375 if (!GRANT_VALID(ctl_grant)) 376 panic("VFS: cpf_grant_magic failed"); 377 } 378 379 if (addr_buf != 0) { 380 addr_grant = cpf_grant_magic(sp->smap_endpt, who_e, addr_buf, 381 addr_len, bits); 382 if (!GRANT_VALID(addr_grant)) 383 panic("VFS: cpf_grant_magic failed"); 384 } 385 386 /* Prepare the request message. */ 387 memset(&m, 0, sizeof(m)); 388 m.m_type = (rw_flag == WRITING) ? SDEV_SEND : SDEV_RECV; 389 m.m_vfs_lsockdriver_sendrecv.req_id = (sockid_t)who_e; 390 m.m_vfs_lsockdriver_sendrecv.sock_id = sock_id; 391 m.m_vfs_lsockdriver_sendrecv.data_grant = data_grant; 392 m.m_vfs_lsockdriver_sendrecv.data_len = data_len; 393 m.m_vfs_lsockdriver_sendrecv.ctl_grant = ctl_grant; 394 m.m_vfs_lsockdriver_sendrecv.ctl_len = ctl_len; 395 m.m_vfs_lsockdriver_sendrecv.addr_grant = addr_grant; 396 m.m_vfs_lsockdriver_sendrecv.addr_len = addr_len; 397 m.m_vfs_lsockdriver_sendrecv.user_endpt = who_e; 398 m.m_vfs_lsockdriver_sendrecv.flags = flags; 399 if (filp_flags & O_NONBLOCK) 400 m.m_vfs_lsockdriver_sendrecv.flags |= MSG_DONTWAIT; 401 if (rw_flag == WRITING && (filp_flags & O_NOSIGPIPE)) 402 m.m_vfs_lsockdriver_sendrecv.flags |= MSG_NOSIGNAL; 403 404 /* Send the request to the driver. */ 405 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK) 406 panic("VFS: asynsend in sdev_readwrite failed: %d", r); 407 408 /* Suspend the process until the reply arrives. */ 409 return sdev_suspend(dev, data_grant, ctl_grant, addr_grant, -1, 410 user_buf); 411 } 412 413 /* 414 * Perform I/O control. 415 */ 416 int 417 sdev_ioctl(dev_t dev, unsigned long request, vir_bytes buf, int filp_flags) 418 { 419 struct smap *sp; 420 sockid_t sock_id; 421 cp_grant_id_t grant; 422 message m; 423 int r; 424 425 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) 426 return EIO; 427 428 /* Allocate resources. */ 429 grant = make_ioctl_grant(sp->smap_endpt, who_e, buf, request); 430 431 /* Prepare the request message. */ 432 memset(&m, 0, sizeof(m)); 433 m.m_type = SDEV_IOCTL; 434 m.m_vfs_lsockdriver_ioctl.req_id = (sockid_t)who_e; 435 m.m_vfs_lsockdriver_ioctl.sock_id = sock_id; 436 m.m_vfs_lsockdriver_ioctl.request = request; 437 m.m_vfs_lsockdriver_ioctl.grant = grant; 438 m.m_vfs_lsockdriver_ioctl.user_endpt = who_e; 439 m.m_vfs_lsockdriver_ioctl.sflags = 440 (filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0; 441 442 /* Send the request to the driver. */ 443 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK) 444 panic("VFS: asynsend in sdev_ioctl failed: %d", r); 445 446 /* Suspend the process until the reply arrives. */ 447 return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID, -1, 0); 448 } 449 450 /* 451 * Set socket options. 452 */ 453 int 454 sdev_setsockopt(dev_t dev, int level, int name, vir_bytes addr, 455 unsigned int len) 456 { 457 struct smap *sp; 458 sockid_t sock_id; 459 cp_grant_id_t grant; 460 message m; 461 int r; 462 463 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) 464 return EIO; 465 466 /* Allocate resources. */ 467 grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, len, CPF_READ); 468 if (!GRANT_VALID(grant)) 469 panic("VFS: cpf_grant_magic failed"); 470 471 /* Prepare the request message. */ 472 memset(&m, 0, sizeof(m)); 473 m.m_type = SDEV_SETSOCKOPT; 474 m.m_vfs_lsockdriver_getset.req_id = (sockid_t)who_e; 475 m.m_vfs_lsockdriver_getset.sock_id = sock_id; 476 m.m_vfs_lsockdriver_getset.level = level; 477 m.m_vfs_lsockdriver_getset.name = name; 478 m.m_vfs_lsockdriver_getset.grant = grant; 479 m.m_vfs_lsockdriver_getset.len = len; 480 481 /* Send the request, and wait for the reply. */ 482 r = sdev_sendrec(sp, &m); 483 484 /* Free resources. */ 485 (void)cpf_revoke(grant); 486 487 if (r != OK) 488 return r; /* socket driver died */ 489 490 /* Parse and return the reply. */ 491 if (m.m_type != SDEV_REPLY) { 492 printf("VFS: %d sent bad reply type %d for call %d\n", 493 sp->smap_endpt, m.m_type, job_call_nr); 494 return EIO; 495 } 496 497 return m.m_lsockdriver_vfs_reply.status; 498 } 499 500 /* 501 * Send and receive a "get" request: getsockopt, getsockname, or getpeername. 502 */ 503 static int 504 sdev_get(dev_t dev, int type, int level, int name, vir_bytes addr, 505 unsigned int * len) 506 { 507 struct smap *sp; 508 sockid_t sock_id; 509 cp_grant_id_t grant; 510 message m; 511 int r; 512 513 assert(type == SDEV_GETSOCKOPT || type == SDEV_GETSOCKNAME || 514 type == SDEV_GETPEERNAME); 515 516 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) 517 return EIO; 518 519 /* Allocate resources. */ 520 grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, *len, CPF_WRITE); 521 if (!GRANT_VALID(grant)) 522 panic("VFS: cpf_grant_magic failed"); 523 524 /* Prepare the request message. */ 525 memset(&m, 0, sizeof(m)); 526 m.m_type = type; 527 m.m_vfs_lsockdriver_getset.req_id = (sockid_t)who_e; 528 m.m_vfs_lsockdriver_getset.sock_id = sock_id; 529 m.m_vfs_lsockdriver_getset.level = level; 530 m.m_vfs_lsockdriver_getset.name = name; 531 m.m_vfs_lsockdriver_getset.grant = grant; 532 m.m_vfs_lsockdriver_getset.len = *len; 533 534 /* Send the request, and wait for the reply. */ 535 r = sdev_sendrec(sp, &m); 536 537 /* Free resources. */ 538 (void)cpf_revoke(grant); 539 540 if (r != OK) 541 return r; /* socket driver died */ 542 543 /* Parse and return the reply. */ 544 if (m.m_type != SDEV_REPLY) { 545 printf("VFS: %d sent bad reply type %d for call %d\n", 546 sp->smap_endpt, m.m_type, job_call_nr); 547 return EIO; 548 } 549 550 if ((r = m.m_lsockdriver_vfs_reply.status) < 0) 551 return r; 552 553 *len = (unsigned int)r; 554 return OK; 555 } 556 557 /* 558 * Get socket options. 559 */ 560 int 561 sdev_getsockopt(dev_t dev, int level, int name, vir_bytes addr, 562 unsigned int * len) 563 { 564 565 return sdev_get(dev, SDEV_GETSOCKOPT, level, name, addr, len); 566 } 567 568 /* 569 * Get the local address of a socket. 570 */ 571 int 572 sdev_getsockname(dev_t dev, vir_bytes addr, unsigned int * addr_len) 573 { 574 575 return sdev_get(dev, SDEV_GETSOCKNAME, 0, 0, addr, addr_len); 576 } 577 578 /* 579 * Get the remote address of a socket. 580 */ 581 int 582 sdev_getpeername(dev_t dev, vir_bytes addr, unsigned int * addr_len) 583 { 584 585 return sdev_get(dev, SDEV_GETPEERNAME, 0, 0, addr, addr_len); 586 } 587 588 /* 589 * Shut down socket send and receive operations. 590 */ 591 int 592 sdev_shutdown(dev_t dev, int how) 593 { 594 595 assert(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR); 596 597 return sdev_simple(dev, SDEV_SHUTDOWN, how); 598 } 599 600 /* 601 * Close the socket identified by the given socket device number. 602 */ 603 int 604 sdev_close(dev_t dev, int may_suspend) 605 { 606 struct smap *sp; 607 sockid_t sock_id; 608 message m; 609 int r; 610 611 /* 612 * Originally, all close requests were blocking the calling thread, but 613 * the new support for SO_LINGER has changed that. In a very strictly 614 * limited subset of cases - namely, the user process calling close(2), 615 * we suspend the close request and handle it asynchronously. In all 616 * other cases, including close-on-exit, close-on-exec, and even dup2, 617 * the close is issued as a thread-synchronous request instead. 618 */ 619 if (may_suspend) { 620 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) 621 return EIO; 622 623 /* Prepare the request message. */ 624 memset(&m, 0, sizeof(m)); 625 m.m_type = SDEV_CLOSE; 626 m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e; 627 m.m_vfs_lsockdriver_simple.sock_id = sock_id; 628 m.m_vfs_lsockdriver_simple.param = 0; 629 630 /* Send the request to the driver. */ 631 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK) 632 panic("VFS: asynsend in sdev_bindconn failed: %d", r); 633 634 /* Suspend the process until the reply arrives. */ 635 return sdev_suspend(dev, GRANT_INVALID, GRANT_INVALID, 636 GRANT_INVALID, -1, 0); 637 } else 638 /* Block the calling thread until the socket is closed. */ 639 return sdev_simple(dev, SDEV_CLOSE, SDEV_NONBLOCK); 640 } 641 642 /* 643 * Initiate a select call on a socket device. Return OK iff the request was 644 * sent, without suspending the process. 645 */ 646 int 647 sdev_select(dev_t dev, int ops) 648 { 649 struct smap *sp; 650 sockid_t sock_id; 651 message m; 652 int r; 653 654 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) 655 return EIO; 656 657 /* Prepare the request message. */ 658 memset(&m, 0, sizeof(m)); 659 m.m_type = SDEV_SELECT; 660 m.m_vfs_lsockdriver_select.sock_id = sock_id; 661 m.m_vfs_lsockdriver_select.ops = ops; 662 663 /* Send the request to the driver. */ 664 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK) 665 panic("VFS: asynsend in sdev_select failed: %d", r); 666 667 return OK; 668 } 669 670 /* 671 * A reply has arrived for a previous socket accept request, and the reply 672 * indicates that a socket has been accepted. A status is also returned; 673 * usually, this status is OK, but if not, the newly accepted socket must be 674 * closed immediately again. Process the low-level aspects of the reply, and 675 * call resume_accept() to let the upper socket layer handle the rest. This 676 * function is always called from a worker thread, and may thus block. 677 */ 678 static void 679 sdev_finish_accept(struct fproc * rfp, message * m_ptr) 680 { 681 struct smap *sp; 682 sockid_t sock_id; 683 dev_t dev; 684 unsigned int len; 685 int status; 686 687 assert(rfp->fp_sdev.callnr == VFS_ACCEPT); 688 assert(m_ptr->m_type == SDEV_ACCEPT_REPLY); 689 assert(m_ptr->m_lsockdriver_vfs_accept_reply.sock_id >= 0); 690 691 /* Free resources. Accept requests use up to one grant. */ 692 if (GRANT_VALID(rfp->fp_sdev.grant[0])) 693 cpf_revoke(rfp->fp_sdev.grant[0]); 694 assert(!GRANT_VALID(rfp->fp_sdev.grant[1])); 695 assert(!GRANT_VALID(rfp->fp_sdev.grant[2])); 696 697 sock_id = m_ptr->m_lsockdriver_vfs_accept_reply.sock_id; 698 status = m_ptr->m_lsockdriver_vfs_accept_reply.status; 699 len = m_ptr->m_lsockdriver_vfs_accept_reply.len; 700 701 /* 702 * We do not want the upper socket layer (socket.c) to deal with smap 703 * and socket ID details, so we construct the new socket device number 704 * here. We won't use the saved listen FD to determine the smap entry 705 * here, since that involves file pointers and other upper-layer-only 706 * stuff. So we have to look it up by the source endpoint. As a 707 * result, we detect some driver deaths here (but not all: see below). 708 */ 709 if ((sp = get_smap_by_endpt(m_ptr->m_source)) != NULL) { 710 /* Leave 'status' as is, regardless of whether it is OK. */ 711 dev = make_smap_dev(sp, sock_id); 712 } else { 713 /* 714 * The driver must have died while the thread was blocked on 715 * activation. Extremely rare, but theoretically possible. 716 * Some driver deaths are indicated only by a driver-up 717 * announcement though; resume_accept() will detect this by 718 * checking that the listening socket has not been invalidated. 719 */ 720 status = EIO; 721 dev = NO_DEV; 722 } 723 724 /* Let the upper socket layer handle the rest. */ 725 resume_accept(rfp, status, dev, len, rfp->fp_sdev.aux.fd); 726 } 727 728 /* 729 * Worker thread stub for finishing successful accept requests. 730 */ 731 static void 732 do_accept_reply(void) 733 { 734 735 sdev_finish_accept(fp, &job_m_in); 736 } 737 738 /* 739 * With the exception of successful accept requests, this function is called 740 * whenever a reply is received for a socket driver request for which the 741 * corresponding user process was suspended (as opposed to requests which just 742 * suspend the worker thread), i.e., for long-lasting socket calls. This 743 * function is also called if the socket driver has died during a long-lasting 744 * socket call, in which case the given message's m_type is a negative error 745 * code. 746 * 747 * The division between the upper socket layer (socket.c) and the lower socket 748 * layer (this file) here is roughly: if resuming the system call involves no 749 * more than a simple replycode() call, do that here; otherwise call into the 750 * upper socket layer to handle the details. In any case, do not ever let the 751 * upper socket layer deal with reply message parsing or suspension state. 752 * 753 * This function may or may not be called from a worker thread; as such, it 754 * MUST NOT block its calling thread. This function is called for failed 755 * accept requests; successful accept requests have their replies routed 756 * through sdev_finish_accept() instead, because those require a worker thread. 757 */ 758 static void 759 sdev_finish(struct fproc * rfp, message * m_ptr) 760 { 761 unsigned int ctl_len, addr_len; 762 int callnr, status, flags; 763 764 /* The suspension status must just have been cleared by the caller. */ 765 assert(rfp->fp_blocked_on == FP_BLOCKED_ON_NONE); 766 767 /* 768 * Free resources. Every suspending call sets all grant fields, so we 769 * can safely revoke all of them without testing the original call. 770 */ 771 if (GRANT_VALID(rfp->fp_sdev.grant[0])) 772 cpf_revoke(rfp->fp_sdev.grant[0]); 773 if (GRANT_VALID(rfp->fp_sdev.grant[1])) 774 cpf_revoke(rfp->fp_sdev.grant[1]); 775 if (GRANT_VALID(rfp->fp_sdev.grant[2])) 776 cpf_revoke(rfp->fp_sdev.grant[2]); 777 778 /* 779 * Now that the socket driver call has finished (or been stopped due to 780 * driver death), we need to finish the corresponding system call from 781 * the user process. The action to take depends on the system call. 782 */ 783 callnr = rfp->fp_sdev.callnr; 784 785 switch (callnr) { 786 case VFS_BIND: 787 case VFS_CONNECT: 788 case VFS_WRITE: 789 case VFS_SENDTO: 790 case VFS_SENDMSG: 791 case VFS_IOCTL: 792 case VFS_CLOSE: 793 /* 794 * These calls all use the same SDEV_REPLY reply type and only 795 * need to reply an OK-or-error status code back to userland. 796 */ 797 if (m_ptr->m_type == SDEV_REPLY) { 798 status = m_ptr->m_lsockdriver_vfs_reply.status; 799 800 /* 801 * For close(2) calls, the return value must indicate 802 * that the file descriptor has been closed. 803 */ 804 if (callnr == VFS_CLOSE && 805 status != OK && status != EINPROGRESS) 806 status = OK; 807 } else if (m_ptr->m_type < 0) { 808 status = m_ptr->m_type; 809 } else { 810 printf("VFS: %d sent bad reply type %d for call %d\n", 811 m_ptr->m_source, m_ptr->m_type, callnr); 812 status = EIO; 813 } 814 replycode(rfp->fp_endpoint, status); 815 break; 816 817 case VFS_READ: 818 case VFS_RECVFROM: 819 case VFS_RECVMSG: 820 /* 821 * These calls use SDEV_RECV_REPLY. The action to take depends 822 * on the exact call. 823 */ 824 ctl_len = addr_len = 0; 825 flags = 0; 826 if (m_ptr->m_type == SDEV_RECV_REPLY) { 827 status = m_ptr->m_lsockdriver_vfs_recv_reply.status; 828 ctl_len = m_ptr->m_lsockdriver_vfs_recv_reply.ctl_len; 829 addr_len = 830 m_ptr->m_lsockdriver_vfs_recv_reply.addr_len; 831 flags = m_ptr->m_lsockdriver_vfs_recv_reply.flags; 832 } else if (m_ptr->m_type < 0) { 833 status = m_ptr->m_type; 834 } else { 835 printf("VFS: %d sent bad reply type %d for call %d\n", 836 m_ptr->m_source, m_ptr->m_type, callnr); 837 status = EIO; 838 } 839 840 switch (callnr) { 841 case VFS_READ: 842 replycode(rfp->fp_endpoint, status); 843 break; 844 case VFS_RECVFROM: 845 resume_recvfrom(rfp, status, addr_len); 846 break; 847 case VFS_RECVMSG: 848 resume_recvmsg(rfp, status, ctl_len, addr_len, flags, 849 rfp->fp_sdev.aux.buf); 850 break; 851 } 852 break; 853 854 case VFS_ACCEPT: 855 /* 856 * This call uses SDEV_ACCEPT_REPLY. We only get here if the 857 * accept call has failed without creating a new socket, in 858 * which case we can simply call replycode() with the error. 859 * For nothing other than consistency, we let resume_accept() 860 * handle this case too. 861 */ 862 addr_len = 0; 863 if (m_ptr->m_type == SDEV_ACCEPT_REPLY) { 864 assert(m_ptr->m_lsockdriver_vfs_accept_reply.sock_id < 865 0); 866 status = m_ptr->m_lsockdriver_vfs_accept_reply.status; 867 addr_len = m_ptr->m_lsockdriver_vfs_accept_reply.len; 868 } else if (m_ptr->m_type < 0) { 869 status = m_ptr->m_type; 870 } else { 871 printf("VFS: %d sent bad reply type %d for call %d\n", 872 m_ptr->m_source, m_ptr->m_type, callnr); 873 status = EIO; 874 } 875 /* 876 * Quick rundown of m_lsockdriver_vfs_accept_reply cases: 877 * 878 * - sock_id >= 0, status == OK: new socket accepted 879 * - sock_id >= 0, status != OK: new socket must be closed 880 * - sock_id < 0, status != OK: failure accepting socket 881 * - sock_id < 0, status == OK: invalid, covered right here 882 * 883 * See libsockdriver for why there are two reply fields at all. 884 */ 885 if (status >= 0) { 886 printf("VFS: %d sent bad status %d for call %d\n", 887 m_ptr->m_source, status, callnr); 888 status = EIO; 889 } 890 resume_accept(rfp, status, NO_DEV, addr_len, 891 rfp->fp_sdev.aux.fd); 892 break; 893 894 default: 895 /* 896 * Ultimately, enumerating all system calls that may cause 897 * socket I/O may prove too cumbersome. In that case, the 898 * callnr field could be replaced by a field that stores the 899 * combination of the expected reply type and the action to 900 * take, for example. 901 */ 902 panic("VFS: socket reply %d for unknown call %d from %d", 903 m_ptr->m_type, callnr, rfp->fp_endpoint); 904 } 905 } 906 907 /* 908 * Abort the suspended socket call for the given process, because the 909 * corresponding socket driver has died. 910 */ 911 void 912 sdev_stop(struct fproc * rfp) 913 { 914 message m; 915 916 assert(rfp->fp_blocked_on == FP_BLOCKED_ON_SDEV); 917 918 rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; 919 920 /* 921 * We use one single approach both here and when stopping worker 922 * threads: the reply message's m_type is set to an error code (always 923 * EIO for now) instead of an actual SDEV_ reply code. We test for 924 * this case in non-suspending calls as well as in sdev_finish(). 925 */ 926 m.m_type = EIO; 927 sdev_finish(rfp, &m); 928 } 929 930 /* 931 * Cancel the ongoing long-lasting socket call, because the calling process has 932 * received a caught or terminating signal. This function is always called 933 * from a worker thread (as part of PM) work, with 'fp' set to the process that 934 * issued the original system call. The calling function has just unsuspended 935 * the process out of _SDEV blocking state. The job of this function is to 936 * issue a cancel request and then block until a reply comes in; the reply may 937 * indicate success, in which case it must be handled accordingly. 938 */ 939 void 940 sdev_cancel(void) 941 { 942 struct smap *sp; 943 message m; 944 sockid_t sock_id; 945 946 /* The suspension status must just have been cleared by the caller. */ 947 assert(fp->fp_blocked_on == FP_BLOCKED_ON_NONE); 948 949 if ((sp = get_smap_by_dev(fp->fp_sdev.dev, &sock_id)) != NULL) { 950 /* Prepare the request message. */ 951 memset(&m, 0, sizeof(m)); 952 m.m_type = SDEV_CANCEL; 953 m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e; 954 m.m_vfs_lsockdriver_simple.sock_id = sock_id; 955 956 /* 957 * Send the cancel request, and wait for a reply. The reply 958 * will be for the original request and must be processed 959 * accordingly. It is possible that the original request 960 * actually succeeded, because 1) the cancel request resulted 961 * in partial success or 2) the original reply and the cancel 962 * request crossed each other. It is because of the second 963 * case that a socket driver must not respond at all to a 964 * cancel operation for an unknown request. 965 */ 966 sdev_sendrec(sp, &m); 967 } else 968 m.m_type = EIO; 969 970 /* 971 * Successful accept requests require special processing, but since we 972 * are already operating from a working thread here, we need not spawn 973 * an additional worker thread for this case. 974 */ 975 if (m.m_type == SDEV_ACCEPT_REPLY && 976 m.m_lsockdriver_vfs_accept_reply.sock_id >= 0) 977 sdev_finish_accept(fp, &m); 978 else 979 sdev_finish(fp, &m); 980 } 981 982 /* 983 * A socket driver has sent a reply to a socket request. Process it, by either 984 * waking up an active worker thread, finishing the system call from here, or 985 * (in the exceptional case of accept calls) spawning a new worker thread to 986 * process the reply. This function MUST NOT block its calling thread. 987 */ 988 void 989 sdev_reply(void) 990 { 991 struct fproc *rfp; 992 struct smap *sp; 993 struct worker_thread *wp; 994 sockid_t req_id = -1; 995 dev_t dev; 996 int slot; 997 998 if ((sp = get_smap_by_endpt(who_e)) == NULL) { 999 printf("VFS: ignoring sock dev reply from unknown driver %d\n", 1000 who_e); 1001 return; 1002 } 1003 1004 switch (call_nr) { 1005 case SDEV_REPLY: 1006 req_id = m_in.m_lsockdriver_vfs_reply.req_id; 1007 break; 1008 case SDEV_SOCKET_REPLY: 1009 req_id = m_in.m_lsockdriver_vfs_socket_reply.req_id; 1010 break; 1011 case SDEV_ACCEPT_REPLY: 1012 req_id = m_in.m_lsockdriver_vfs_accept_reply.req_id; 1013 break; 1014 case SDEV_RECV_REPLY: 1015 req_id = m_in.m_lsockdriver_vfs_recv_reply.req_id; 1016 break; 1017 case SDEV_SELECT1_REPLY: 1018 dev = make_smap_dev(sp, 1019 m_in.m_lsockdriver_vfs_select_reply.sock_id); 1020 select_sdev_reply1(dev, 1021 m_in.m_lsockdriver_vfs_select_reply.status); 1022 return; 1023 case SDEV_SELECT2_REPLY: 1024 dev = make_smap_dev(sp, 1025 m_in.m_lsockdriver_vfs_select_reply.sock_id); 1026 select_sdev_reply2(dev, 1027 m_in.m_lsockdriver_vfs_select_reply.status); 1028 return; 1029 default: 1030 printf("VFS: ignoring unknown sock dev reply %d from %d\n", 1031 call_nr, who_e); 1032 return; 1033 } 1034 1035 if (isokendpt((endpoint_t)req_id, &slot) != OK) { 1036 printf("VFS: ignoring sock dev reply from %d for unknown %d\n", 1037 who_e, req_id); 1038 return; 1039 } 1040 1041 rfp = &fproc[slot]; 1042 wp = rfp->fp_worker; 1043 if (wp != NULL && wp->w_task == who_e && wp->w_drv_sendrec != NULL) { 1044 assert(!fp_is_blocked(rfp)); 1045 *wp->w_drv_sendrec = m_in; 1046 wp->w_drv_sendrec = NULL; 1047 worker_signal(wp); /* resume suspended thread */ 1048 /* 1049 * It is up to the worker thread to 1) check that the reply is 1050 * of the right type for the request, and 2) keep in mind that 1051 * the reply type may be EIO in case the socket driver died. 1052 */ 1053 } else if (rfp->fp_blocked_on != FP_BLOCKED_ON_SDEV || 1054 get_smap_by_dev(rfp->fp_sdev.dev, NULL) != sp) { 1055 printf("VFS: ignoring sock dev reply, %d not blocked on %d\n", 1056 rfp->fp_endpoint, who_e); 1057 return; 1058 } else if (call_nr == SDEV_ACCEPT_REPLY && 1059 m_in.m_lsockdriver_vfs_accept_reply.sock_id >= 0) { 1060 /* 1061 * For accept replies that return a new socket, we need to 1062 * spawn a worker thread, because accept calls may block (so 1063 * there will no longer be a worker thread) and processing the 1064 * reply requires additional blocking calls (which we cannot 1065 * issue from the main thread). This is tricky. Under no 1066 * circumstances may we "lose" a legitimate reply, because this 1067 * would lead to resource leaks in the socket driver. To this 1068 * end, we rely on the current worker thread model to 1069 * prioritize regular work over PM work. Still, sdev_cancel() 1070 * may end up receiving the accept reply if it was already 1071 * blocked waiting for the reply message, and it must then 1072 * perform the same tasks. 1073 */ 1074 /* 1075 * It is possible that if all threads are in use, there is a 1076 * "gap" between starting the thread and its activation. The 1077 * main problem for this case is that the socket driver dies 1078 * within that gap. For accepts, we address this with no less 1079 * than two checks: 1) in this file, by looking up the smap 1080 * entry by the reply source endpoint again - if the entry is 1081 * no longer valid, the socket driver must have died; 2) in 1082 * socket.c, by revalidating the original listening socket - if 1083 * the listening socket has been invalidated, the driver died. 1084 * 1085 * Since we unsuspend the process now, a socket driver sending 1086 * two accept replies in a row may never cause VFS to attempt 1087 * spawning two threads; the second reply should be ignored. 1088 */ 1089 assert(fp->fp_func == NULL); 1090 1091 worker_start(rfp, do_accept_reply, &m_in, FALSE /*use_spare*/); 1092 1093 /* 1094 * TODO: I just introduced the notion of not using the fp_u 1095 * union across yields after unsuspension, but for socket calls 1096 * we have a lot of socket state to carry over, so I'm now 1097 * immediately violating my own rule again here. Possible 1098 * solutions: 1) introduce another blocking state just to mark 1099 * the fp_u union in use (this has side effects though), 2) 1100 * introduce a pseudo message type which covers both the accept 1101 * reply fields and the fp_u state (do_pending_pipe does this), 1102 * or 3) add a fp_flags flag for this purpose. In any case, 1103 * the whole point is that we catch any attempts to reuse fp_u 1104 * for other purposes and thus cause state corruption. This 1105 * should not happen anyway, but it's too dangerous to leave 1106 * entirely unchecked. --dcvmoole 1107 */ 1108 rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; 1109 } else { 1110 rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; 1111 1112 sdev_finish(rfp, &m_in); 1113 } 1114 } 1115