1 /*
2 * This file implements the lower socket layer of VFS: communication with
3 * socket drivers. Socket driver communication evolved out of character driver
4 * communication, and the two have many similarities between them. Most
5 * importantly, socket driver communication also has the distinction between
6 * short-lived and long-lived requests.
7 *
8 * Short-lived requests are expected to be replied to by the socket driver
9 * immediately in all cases. For such requests, VFS keeps the worker thread
10 * for the calling process alive until the reply arrives. In contrast,
11 * long-lived requests may block. For such requests, VFS suspends the calling
12 * process until a reply comes in, or until a signal interrupts the request.
13 * Both short-lived and long-lived requests may be aborted if VFS finds that
14 * the corresponding socket driver has died. Even though long-lived requests
15 * may be marked as nonblocking, nonblocking calls are still handled as
16 * long-lived in terms of VFS processing.
17 *
18 * For an overview of the socket driver requests and replies, message layouts,
19 * and which requests are long-lived or short-lived (i.e. may suspend or not),
20 * please refer to the corresponding table in the libsockdriver source code.
21 *
22 * For most long-lived socket requests, the main VFS thread processes the reply
23 * from the socket driver. This typically consists of waking up the user
24 * process that originally issued the system call on the socket by simply
25 * relaying the call's result code. Some socket calls require a specific reply
26 * message and/or additional post-call actions; for those, resume_*() calls are
27 * made back into the upper socket layer.
28 *
29 * If a process is interrupted by a signal, any ongoing long-lived socket
30 * request must be canceled. This is done by sending a one-way cancel request
31 * to the socket driver, and waiting for it to reply to the original request.
32 * In this case, the reply will be processed from the worker thread that is
33 * handling the cancel operation. Canceling does not imply call failure: the
34 * cancellation may result in a partial I/O reply, and a successful reply may
35 * cross the cancel request.
36 *
37 * One main exception is the reply to an accept request. Once a connection has
38 * been accepted, a new socket has to be created for it. This requires actions
39 * that require the ability to block the current thread, and so, a worker
40 * thread is spawned for processing successful accept replies, unless the reply
41 * was received from a worker thread already (as may be the case if the accept
42 * request was being canceled).
43 */
44
45 #include "fs.h"
46 #include <sys/socket.h>
47 #include <minix/callnr.h>
48
49 /*
50 * Send a short-lived request message to the given socket driver, and suspend
51 * the current worker thread until a reply message has been received. On
52 * success, the function will return OK, and the reply message will be stored
53 * in the message structure pointed to by 'm_ptr'. The function may fail if
54 * the socket driver dies before sending a reply. In that case, the function
55 * will return a negative error code, and also store the same negative error
56 * code in the m_type field of the 'm_ptr' message structure.
57 */
58 static int
sdev_sendrec(struct smap * sp,message * m_ptr)59 sdev_sendrec(struct smap * sp, message * m_ptr)
60 {
61 int r;
62
63 /* Send the request to the driver. */
64 if ((r = asynsend3(sp->smap_endpt, m_ptr, AMF_NOREPLY)) != OK)
65 panic("VFS: asynsend in sdev_sendrec failed: %d", r);
66
67 /* Suspend this thread until we have received the response. */
68 self->w_task = sp->smap_endpt;
69 self->w_drv_sendrec = m_ptr;
70
71 worker_wait();
72
73 self->w_task = NONE;
74 assert(self->w_drv_sendrec == NULL);
75
76 return (!IS_SDEV_RS(m_ptr->m_type)) ? m_ptr->m_type : OK;
77 }
78
79 /*
80 * Suspend the current process for later completion of its system call.
81 */
82 int
sdev_suspend(dev_t dev,cp_grant_id_t grant0,cp_grant_id_t grant1,cp_grant_id_t grant2,int fd,vir_bytes buf)83 sdev_suspend(dev_t dev, cp_grant_id_t grant0, cp_grant_id_t grant1,
84 cp_grant_id_t grant2, int fd, vir_bytes buf)
85 {
86
87 fp->fp_sdev.dev = dev;
88 fp->fp_sdev.callnr = job_call_nr;
89 fp->fp_sdev.grant[0] = grant0;
90 fp->fp_sdev.grant[1] = grant1;
91 fp->fp_sdev.grant[2] = grant2;
92
93 if (job_call_nr == VFS_ACCEPT) {
94 assert(fd != -1);
95 assert(buf == 0);
96 fp->fp_sdev.aux.fd = fd;
97 } else if (job_call_nr == VFS_RECVMSG) {
98 assert(fd == -1);
99 /*
100 * TODO: we are not yet consistent enough in dealing with
101 * mapped NULL pages to have an assert(buf != 0) here..
102 */
103 fp->fp_sdev.aux.buf = buf;
104 } else {
105 assert(fd == -1);
106 assert(buf == 0);
107 }
108
109 suspend(FP_BLOCKED_ON_SDEV);
110 return SUSPEND;
111 }
112
113 /*
114 * Create a socket or socket pair. Return OK on success, with the new socket
115 * device identifier(s) stored in the 'dev' array. Return an error code upon
116 * failure.
117 */
118 int
sdev_socket(int domain,int type,int protocol,dev_t * dev,int pair)119 sdev_socket(int domain, int type, int protocol, dev_t * dev, int pair)
120 {
121 struct smap *sp;
122 message m;
123 sockid_t sock_id, sock_id2;
124 int r;
125
126 /* We could return EAFNOSUPPORT, but the caller should have checked. */
127 if ((sp = get_smap_by_domain(domain)) == NULL)
128 panic("VFS: sdev_socket for unknown domain");
129
130 /* Prepare the request message. */
131 memset(&m, 0, sizeof(m));
132 m.m_type = pair ? SDEV_SOCKETPAIR : SDEV_SOCKET;
133 m.m_vfs_lsockdriver_socket.req_id = (sockid_t)who_e;
134 m.m_vfs_lsockdriver_socket.domain = domain;
135 m.m_vfs_lsockdriver_socket.type = type;
136 m.m_vfs_lsockdriver_socket.protocol = protocol;
137 m.m_vfs_lsockdriver_socket.user_endpt = who_e;
138
139 /* Send the request, and wait for the reply. */
140 if ((r = sdev_sendrec(sp, &m)) != OK)
141 return r; /* socket driver died */
142
143 /* Parse the reply message, and check for protocol errors. */
144 if (m.m_type != SDEV_SOCKET_REPLY) {
145 printf("VFS: %d sent bad reply type %d for call %d\n",
146 sp->smap_endpt, m.m_type, job_call_nr);
147 return EIO;
148 }
149
150 sock_id = m.m_lsockdriver_vfs_socket_reply.sock_id;
151 sock_id2 = m.m_lsockdriver_vfs_socket_reply.sock_id2;
152
153 /* Check for regular errors. Upon success, return the socket(s). */
154 if (sock_id < 0)
155 return sock_id;
156
157 dev[0] = make_smap_dev(sp, sock_id);
158
159 if (pair) {
160 /* Okay, one more protocol error. */
161 if (sock_id2 < 0) {
162 printf("VFS: %d sent bad SOCKETPAIR socket ID %d\n",
163 sp->smap_endpt, sock_id2);
164 (void)sdev_close(dev[0], FALSE /*may_suspend*/);
165 return EIO;
166 }
167
168 dev[1] = make_smap_dev(sp, sock_id2);
169 }
170
171 return OK;
172 }
173
174 /*
175 * Bind or connect a socket to a particular address. These calls may block, so
176 * suspend the current process instead of making the thread wait for the reply.
177 */
178 static int
sdev_bindconn(dev_t dev,int type,vir_bytes addr,unsigned int addr_len,int filp_flags)179 sdev_bindconn(dev_t dev, int type, vir_bytes addr, unsigned int addr_len,
180 int filp_flags)
181 {
182 struct smap *sp;
183 sockid_t sock_id;
184 cp_grant_id_t grant;
185 message m;
186 int r;
187
188 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
189 return EIO;
190
191 /* Allocate resources. */
192 grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, addr_len,
193 CPF_READ);
194 if (!GRANT_VALID(grant))
195 panic("VFS: cpf_grant_magic failed");
196
197 /* Prepare the request message. */
198 memset(&m, 0, sizeof(m));
199 m.m_type = type;
200 m.m_vfs_lsockdriver_addr.req_id = (sockid_t)who_e;
201 m.m_vfs_lsockdriver_addr.sock_id = sock_id;
202 m.m_vfs_lsockdriver_addr.grant = grant;
203 m.m_vfs_lsockdriver_addr.len = addr_len;
204 m.m_vfs_lsockdriver_addr.user_endpt = who_e;
205 m.m_vfs_lsockdriver_addr.sflags =
206 (filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0;
207
208 /* Send the request to the driver. */
209 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
210 panic("VFS: asynsend in sdev_bindconn failed: %d", r);
211
212 /* Suspend the process until the reply arrives. */
213 return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID, -1, 0);
214 }
215
216 /*
217 * Bind a socket to a local address.
218 */
219 int
sdev_bind(dev_t dev,vir_bytes addr,unsigned int addr_len,int filp_flags)220 sdev_bind(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags)
221 {
222
223 return sdev_bindconn(dev, SDEV_BIND, addr, addr_len, filp_flags);
224 }
225
226 /*
227 * Connect a socket to a remote address.
228 */
229 int
sdev_connect(dev_t dev,vir_bytes addr,unsigned int addr_len,int filp_flags)230 sdev_connect(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags)
231 {
232
233 return sdev_bindconn(dev, SDEV_CONNECT, addr, addr_len, filp_flags);
234 }
235
236 /*
237 * Send and receive a "simple" request: listen, shutdown, or close. Note that
238 * while cancel requests use the same request format, they require a different
239 * way of handling their replies.
240 */
241 static int
sdev_simple(dev_t dev,int type,int param)242 sdev_simple(dev_t dev, int type, int param)
243 {
244 struct smap *sp;
245 sockid_t sock_id;
246 message m;
247 int r;
248
249 assert(type == SDEV_LISTEN || type == SDEV_SHUTDOWN ||
250 type == SDEV_CLOSE);
251
252 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
253 return EIO;
254
255 /* Prepare the request message. */
256 memset(&m, 0, sizeof(m));
257 m.m_type = type;
258 m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e;
259 m.m_vfs_lsockdriver_simple.sock_id = sock_id;
260 m.m_vfs_lsockdriver_simple.param = param;
261
262 /* Send the request, and wait for the reply. */
263 if ((r = sdev_sendrec(sp, &m)) != OK)
264 return r; /* socket driver died */
265
266 /* Parse and return the reply. */
267 if (m.m_type != SDEV_REPLY) {
268 printf("VFS: %d sent bad reply type %d for call %d\n",
269 sp->smap_endpt, m.m_type, job_call_nr);
270 return EIO;
271 }
272
273 return m.m_lsockdriver_vfs_reply.status;
274 }
275
276 /*
277 * Put a socket in listening mode.
278 */
279 int
sdev_listen(dev_t dev,int backlog)280 sdev_listen(dev_t dev, int backlog)
281 {
282
283 assert(backlog >= 0);
284
285 return sdev_simple(dev, SDEV_LISTEN, backlog);
286 }
287
288 /*
289 * Accept a new connection on a socket.
290 */
291 int
sdev_accept(dev_t dev,vir_bytes addr,unsigned int addr_len,int filp_flags,int listen_fd)292 sdev_accept(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags,
293 int listen_fd)
294 {
295 struct smap *sp;
296 sockid_t sock_id;
297 cp_grant_id_t grant;
298 message m;
299 int r;
300
301 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
302 return EIO;
303
304 /* Allocate resources. */
305 if (addr != 0) {
306 grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, addr_len,
307 CPF_WRITE);
308 if (!GRANT_VALID(grant))
309 panic("VFS: cpf_grant_magic failed");
310 } else
311 grant = GRANT_INVALID;
312
313 /* Prepare the request message. */
314 memset(&m, 0, sizeof(m));
315 m.m_type = SDEV_ACCEPT;
316 m.m_vfs_lsockdriver_addr.req_id = (sockid_t)who_e;
317 m.m_vfs_lsockdriver_addr.sock_id = sock_id;
318 m.m_vfs_lsockdriver_addr.grant = grant;
319 m.m_vfs_lsockdriver_addr.len = addr_len;
320 m.m_vfs_lsockdriver_addr.user_endpt = who_e;
321 m.m_vfs_lsockdriver_addr.sflags =
322 (filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0;
323
324 /* Send the request to the driver. */
325 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
326 panic("VFS: asynsend in sdev_accept failed: %d", r);
327
328 /* Suspend the process until the reply arrives. */
329 return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID,
330 listen_fd, 0);
331 }
332
333 /*
334 * Send or receive a message on a socket. All read (read(2), recvfrom(2), and
335 * recvmsg(2)) and write (write(2), sendto(2), sendmsg(2)) system calls on
336 * sockets pass through this function. The function is named sdev_readwrite
337 * rather than sdev_sendrecv to avoid confusion with sdev_sendrec.
338 */
339 int
sdev_readwrite(dev_t dev,vir_bytes data_buf,size_t data_len,vir_bytes ctl_buf,unsigned int ctl_len,vir_bytes addr_buf,unsigned int addr_len,int flags,int rw_flag,int filp_flags,vir_bytes user_buf)340 sdev_readwrite(dev_t dev, vir_bytes data_buf, size_t data_len,
341 vir_bytes ctl_buf, unsigned int ctl_len, vir_bytes addr_buf,
342 unsigned int addr_len, int flags, int rw_flag, int filp_flags,
343 vir_bytes user_buf)
344 {
345 struct smap *sp;
346 sockid_t sock_id;
347 cp_grant_id_t data_grant, ctl_grant, addr_grant;
348 message m;
349 int r, bits;
350
351 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
352 return EIO;
353
354 /* Allocate resources. */
355 data_grant = GRANT_INVALID;
356 ctl_grant = GRANT_INVALID;
357 addr_grant = GRANT_INVALID;
358 bits = (rw_flag == WRITING) ? CPF_READ : CPF_WRITE;
359
360 /*
361 * Supposedly it is allowed to send or receive zero data bytes, even
362 * though it is a bad idea as the return value will then be zero, which
363 * may also indicate EOF (as per W. Richard Stevens).
364 */
365 if (data_buf != 0) {
366 data_grant = cpf_grant_magic(sp->smap_endpt, who_e, data_buf,
367 data_len, bits);
368 if (!GRANT_VALID(data_grant))
369 panic("VFS: cpf_grant_magic failed");
370 }
371
372 if (ctl_buf != 0) {
373 ctl_grant = cpf_grant_magic(sp->smap_endpt, who_e, ctl_buf,
374 ctl_len, bits);
375 if (!GRANT_VALID(ctl_grant))
376 panic("VFS: cpf_grant_magic failed");
377 }
378
379 if (addr_buf != 0) {
380 addr_grant = cpf_grant_magic(sp->smap_endpt, who_e, addr_buf,
381 addr_len, bits);
382 if (!GRANT_VALID(addr_grant))
383 panic("VFS: cpf_grant_magic failed");
384 }
385
386 /* Prepare the request message. */
387 memset(&m, 0, sizeof(m));
388 m.m_type = (rw_flag == WRITING) ? SDEV_SEND : SDEV_RECV;
389 m.m_vfs_lsockdriver_sendrecv.req_id = (sockid_t)who_e;
390 m.m_vfs_lsockdriver_sendrecv.sock_id = sock_id;
391 m.m_vfs_lsockdriver_sendrecv.data_grant = data_grant;
392 m.m_vfs_lsockdriver_sendrecv.data_len = data_len;
393 m.m_vfs_lsockdriver_sendrecv.ctl_grant = ctl_grant;
394 m.m_vfs_lsockdriver_sendrecv.ctl_len = ctl_len;
395 m.m_vfs_lsockdriver_sendrecv.addr_grant = addr_grant;
396 m.m_vfs_lsockdriver_sendrecv.addr_len = addr_len;
397 m.m_vfs_lsockdriver_sendrecv.user_endpt = who_e;
398 m.m_vfs_lsockdriver_sendrecv.flags = flags;
399 if (filp_flags & O_NONBLOCK)
400 m.m_vfs_lsockdriver_sendrecv.flags |= MSG_DONTWAIT;
401 if (rw_flag == WRITING && (filp_flags & O_NOSIGPIPE))
402 m.m_vfs_lsockdriver_sendrecv.flags |= MSG_NOSIGNAL;
403
404 /* Send the request to the driver. */
405 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
406 panic("VFS: asynsend in sdev_readwrite failed: %d", r);
407
408 /* Suspend the process until the reply arrives. */
409 return sdev_suspend(dev, data_grant, ctl_grant, addr_grant, -1,
410 user_buf);
411 }
412
413 /*
414 * Perform I/O control.
415 */
416 int
sdev_ioctl(dev_t dev,unsigned long request,vir_bytes buf,int filp_flags)417 sdev_ioctl(dev_t dev, unsigned long request, vir_bytes buf, int filp_flags)
418 {
419 struct smap *sp;
420 sockid_t sock_id;
421 cp_grant_id_t grant;
422 message m;
423 int r;
424
425 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
426 return EIO;
427
428 /* Allocate resources. */
429 grant = make_ioctl_grant(sp->smap_endpt, who_e, buf, request);
430
431 /* Prepare the request message. */
432 memset(&m, 0, sizeof(m));
433 m.m_type = SDEV_IOCTL;
434 m.m_vfs_lsockdriver_ioctl.req_id = (sockid_t)who_e;
435 m.m_vfs_lsockdriver_ioctl.sock_id = sock_id;
436 m.m_vfs_lsockdriver_ioctl.request = request;
437 m.m_vfs_lsockdriver_ioctl.grant = grant;
438 m.m_vfs_lsockdriver_ioctl.user_endpt = who_e;
439 m.m_vfs_lsockdriver_ioctl.sflags =
440 (filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0;
441
442 /* Send the request to the driver. */
443 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
444 panic("VFS: asynsend in sdev_ioctl failed: %d", r);
445
446 /* Suspend the process until the reply arrives. */
447 return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID, -1, 0);
448 }
449
450 /*
451 * Set socket options.
452 */
453 int
sdev_setsockopt(dev_t dev,int level,int name,vir_bytes addr,unsigned int len)454 sdev_setsockopt(dev_t dev, int level, int name, vir_bytes addr,
455 unsigned int len)
456 {
457 struct smap *sp;
458 sockid_t sock_id;
459 cp_grant_id_t grant;
460 message m;
461 int r;
462
463 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
464 return EIO;
465
466 /* Allocate resources. */
467 grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, len, CPF_READ);
468 if (!GRANT_VALID(grant))
469 panic("VFS: cpf_grant_magic failed");
470
471 /* Prepare the request message. */
472 memset(&m, 0, sizeof(m));
473 m.m_type = SDEV_SETSOCKOPT;
474 m.m_vfs_lsockdriver_getset.req_id = (sockid_t)who_e;
475 m.m_vfs_lsockdriver_getset.sock_id = sock_id;
476 m.m_vfs_lsockdriver_getset.level = level;
477 m.m_vfs_lsockdriver_getset.name = name;
478 m.m_vfs_lsockdriver_getset.grant = grant;
479 m.m_vfs_lsockdriver_getset.len = len;
480
481 /* Send the request, and wait for the reply. */
482 r = sdev_sendrec(sp, &m);
483
484 /* Free resources. */
485 (void)cpf_revoke(grant);
486
487 if (r != OK)
488 return r; /* socket driver died */
489
490 /* Parse and return the reply. */
491 if (m.m_type != SDEV_REPLY) {
492 printf("VFS: %d sent bad reply type %d for call %d\n",
493 sp->smap_endpt, m.m_type, job_call_nr);
494 return EIO;
495 }
496
497 return m.m_lsockdriver_vfs_reply.status;
498 }
499
500 /*
501 * Send and receive a "get" request: getsockopt, getsockname, or getpeername.
502 */
503 static int
sdev_get(dev_t dev,int type,int level,int name,vir_bytes addr,unsigned int * len)504 sdev_get(dev_t dev, int type, int level, int name, vir_bytes addr,
505 unsigned int * len)
506 {
507 struct smap *sp;
508 sockid_t sock_id;
509 cp_grant_id_t grant;
510 message m;
511 int r;
512
513 assert(type == SDEV_GETSOCKOPT || type == SDEV_GETSOCKNAME ||
514 type == SDEV_GETPEERNAME);
515
516 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
517 return EIO;
518
519 /* Allocate resources. */
520 grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, *len, CPF_WRITE);
521 if (!GRANT_VALID(grant))
522 panic("VFS: cpf_grant_magic failed");
523
524 /* Prepare the request message. */
525 memset(&m, 0, sizeof(m));
526 m.m_type = type;
527 m.m_vfs_lsockdriver_getset.req_id = (sockid_t)who_e;
528 m.m_vfs_lsockdriver_getset.sock_id = sock_id;
529 m.m_vfs_lsockdriver_getset.level = level;
530 m.m_vfs_lsockdriver_getset.name = name;
531 m.m_vfs_lsockdriver_getset.grant = grant;
532 m.m_vfs_lsockdriver_getset.len = *len;
533
534 /* Send the request, and wait for the reply. */
535 r = sdev_sendrec(sp, &m);
536
537 /* Free resources. */
538 (void)cpf_revoke(grant);
539
540 if (r != OK)
541 return r; /* socket driver died */
542
543 /* Parse and return the reply. */
544 if (m.m_type != SDEV_REPLY) {
545 printf("VFS: %d sent bad reply type %d for call %d\n",
546 sp->smap_endpt, m.m_type, job_call_nr);
547 return EIO;
548 }
549
550 if ((r = m.m_lsockdriver_vfs_reply.status) < 0)
551 return r;
552
553 *len = (unsigned int)r;
554 return OK;
555 }
556
557 /*
558 * Get socket options.
559 */
560 int
sdev_getsockopt(dev_t dev,int level,int name,vir_bytes addr,unsigned int * len)561 sdev_getsockopt(dev_t dev, int level, int name, vir_bytes addr,
562 unsigned int * len)
563 {
564
565 return sdev_get(dev, SDEV_GETSOCKOPT, level, name, addr, len);
566 }
567
568 /*
569 * Get the local address of a socket.
570 */
571 int
sdev_getsockname(dev_t dev,vir_bytes addr,unsigned int * addr_len)572 sdev_getsockname(dev_t dev, vir_bytes addr, unsigned int * addr_len)
573 {
574
575 return sdev_get(dev, SDEV_GETSOCKNAME, 0, 0, addr, addr_len);
576 }
577
578 /*
579 * Get the remote address of a socket.
580 */
581 int
sdev_getpeername(dev_t dev,vir_bytes addr,unsigned int * addr_len)582 sdev_getpeername(dev_t dev, vir_bytes addr, unsigned int * addr_len)
583 {
584
585 return sdev_get(dev, SDEV_GETPEERNAME, 0, 0, addr, addr_len);
586 }
587
588 /*
589 * Shut down socket send and receive operations.
590 */
591 int
sdev_shutdown(dev_t dev,int how)592 sdev_shutdown(dev_t dev, int how)
593 {
594
595 assert(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR);
596
597 return sdev_simple(dev, SDEV_SHUTDOWN, how);
598 }
599
600 /*
601 * Close the socket identified by the given socket device number.
602 */
603 int
sdev_close(dev_t dev,int may_suspend)604 sdev_close(dev_t dev, int may_suspend)
605 {
606 struct smap *sp;
607 sockid_t sock_id;
608 message m;
609 int r;
610
611 /*
612 * Originally, all close requests were blocking the calling thread, but
613 * the new support for SO_LINGER has changed that. In a very strictly
614 * limited subset of cases - namely, the user process calling close(2),
615 * we suspend the close request and handle it asynchronously. In all
616 * other cases, including close-on-exit, close-on-exec, and even dup2,
617 * the close is issued as a thread-synchronous request instead.
618 */
619 if (may_suspend) {
620 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
621 return EIO;
622
623 /* Prepare the request message. */
624 memset(&m, 0, sizeof(m));
625 m.m_type = SDEV_CLOSE;
626 m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e;
627 m.m_vfs_lsockdriver_simple.sock_id = sock_id;
628 m.m_vfs_lsockdriver_simple.param = 0;
629
630 /* Send the request to the driver. */
631 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
632 panic("VFS: asynsend in sdev_bindconn failed: %d", r);
633
634 /* Suspend the process until the reply arrives. */
635 return sdev_suspend(dev, GRANT_INVALID, GRANT_INVALID,
636 GRANT_INVALID, -1, 0);
637 } else
638 /* Block the calling thread until the socket is closed. */
639 return sdev_simple(dev, SDEV_CLOSE, SDEV_NONBLOCK);
640 }
641
642 /*
643 * Initiate a select call on a socket device. Return OK iff the request was
644 * sent, without suspending the process.
645 */
646 int
sdev_select(dev_t dev,int ops)647 sdev_select(dev_t dev, int ops)
648 {
649 struct smap *sp;
650 sockid_t sock_id;
651 message m;
652 int r;
653
654 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
655 return EIO;
656
657 /* Prepare the request message. */
658 memset(&m, 0, sizeof(m));
659 m.m_type = SDEV_SELECT;
660 m.m_vfs_lsockdriver_select.sock_id = sock_id;
661 m.m_vfs_lsockdriver_select.ops = ops;
662
663 /* Send the request to the driver. */
664 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
665 panic("VFS: asynsend in sdev_select failed: %d", r);
666
667 return OK;
668 }
669
670 /*
671 * A reply has arrived for a previous socket accept request, and the reply
672 * indicates that a socket has been accepted. A status is also returned;
673 * usually, this status is OK, but if not, the newly accepted socket must be
674 * closed immediately again. Process the low-level aspects of the reply, and
675 * call resume_accept() to let the upper socket layer handle the rest. This
676 * function is always called from a worker thread, and may thus block.
677 */
678 static void
sdev_finish_accept(struct fproc * rfp,message * m_ptr)679 sdev_finish_accept(struct fproc * rfp, message * m_ptr)
680 {
681 struct smap *sp;
682 sockid_t sock_id;
683 dev_t dev;
684 unsigned int len;
685 int status;
686
687 assert(rfp->fp_sdev.callnr == VFS_ACCEPT);
688 assert(m_ptr->m_type == SDEV_ACCEPT_REPLY);
689 assert(m_ptr->m_lsockdriver_vfs_accept_reply.sock_id >= 0);
690
691 /* Free resources. Accept requests use up to one grant. */
692 if (GRANT_VALID(rfp->fp_sdev.grant[0]))
693 cpf_revoke(rfp->fp_sdev.grant[0]);
694 assert(!GRANT_VALID(rfp->fp_sdev.grant[1]));
695 assert(!GRANT_VALID(rfp->fp_sdev.grant[2]));
696
697 sock_id = m_ptr->m_lsockdriver_vfs_accept_reply.sock_id;
698 status = m_ptr->m_lsockdriver_vfs_accept_reply.status;
699 len = m_ptr->m_lsockdriver_vfs_accept_reply.len;
700
701 /*
702 * We do not want the upper socket layer (socket.c) to deal with smap
703 * and socket ID details, so we construct the new socket device number
704 * here. We won't use the saved listen FD to determine the smap entry
705 * here, since that involves file pointers and other upper-layer-only
706 * stuff. So we have to look it up by the source endpoint. As a
707 * result, we detect some driver deaths here (but not all: see below).
708 */
709 if ((sp = get_smap_by_endpt(m_ptr->m_source)) != NULL) {
710 /* Leave 'status' as is, regardless of whether it is OK. */
711 dev = make_smap_dev(sp, sock_id);
712 } else {
713 /*
714 * The driver must have died while the thread was blocked on
715 * activation. Extremely rare, but theoretically possible.
716 * Some driver deaths are indicated only by a driver-up
717 * announcement though; resume_accept() will detect this by
718 * checking that the listening socket has not been invalidated.
719 */
720 status = EIO;
721 dev = NO_DEV;
722 }
723
724 /* Let the upper socket layer handle the rest. */
725 resume_accept(rfp, status, dev, len, rfp->fp_sdev.aux.fd);
726 }
727
728 /*
729 * Worker thread stub for finishing successful accept requests.
730 */
731 static void
do_accept_reply(void)732 do_accept_reply(void)
733 {
734
735 sdev_finish_accept(fp, &job_m_in);
736 }
737
738 /*
739 * With the exception of successful accept requests, this function is called
740 * whenever a reply is received for a socket driver request for which the
741 * corresponding user process was suspended (as opposed to requests which just
742 * suspend the worker thread), i.e., for long-lasting socket calls. This
743 * function is also called if the socket driver has died during a long-lasting
744 * socket call, in which case the given message's m_type is a negative error
745 * code.
746 *
747 * The division between the upper socket layer (socket.c) and the lower socket
748 * layer (this file) here is roughly: if resuming the system call involves no
749 * more than a simple replycode() call, do that here; otherwise call into the
750 * upper socket layer to handle the details. In any case, do not ever let the
751 * upper socket layer deal with reply message parsing or suspension state.
752 *
753 * This function may or may not be called from a worker thread; as such, it
754 * MUST NOT block its calling thread. This function is called for failed
755 * accept requests; successful accept requests have their replies routed
756 * through sdev_finish_accept() instead, because those require a worker thread.
757 */
758 static void
sdev_finish(struct fproc * rfp,message * m_ptr)759 sdev_finish(struct fproc * rfp, message * m_ptr)
760 {
761 unsigned int ctl_len, addr_len;
762 int callnr, status, flags;
763
764 /* The suspension status must just have been cleared by the caller. */
765 assert(rfp->fp_blocked_on == FP_BLOCKED_ON_NONE);
766
767 /*
768 * Free resources. Every suspending call sets all grant fields, so we
769 * can safely revoke all of them without testing the original call.
770 */
771 if (GRANT_VALID(rfp->fp_sdev.grant[0]))
772 cpf_revoke(rfp->fp_sdev.grant[0]);
773 if (GRANT_VALID(rfp->fp_sdev.grant[1]))
774 cpf_revoke(rfp->fp_sdev.grant[1]);
775 if (GRANT_VALID(rfp->fp_sdev.grant[2]))
776 cpf_revoke(rfp->fp_sdev.grant[2]);
777
778 /*
779 * Now that the socket driver call has finished (or been stopped due to
780 * driver death), we need to finish the corresponding system call from
781 * the user process. The action to take depends on the system call.
782 */
783 callnr = rfp->fp_sdev.callnr;
784
785 switch (callnr) {
786 case VFS_BIND:
787 case VFS_CONNECT:
788 case VFS_WRITE:
789 case VFS_SENDTO:
790 case VFS_SENDMSG:
791 case VFS_IOCTL:
792 case VFS_CLOSE:
793 /*
794 * These calls all use the same SDEV_REPLY reply type and only
795 * need to reply an OK-or-error status code back to userland.
796 */
797 if (m_ptr->m_type == SDEV_REPLY) {
798 status = m_ptr->m_lsockdriver_vfs_reply.status;
799
800 /*
801 * For close(2) calls, the return value must indicate
802 * that the file descriptor has been closed.
803 */
804 if (callnr == VFS_CLOSE &&
805 status != OK && status != EINPROGRESS)
806 status = OK;
807 } else if (m_ptr->m_type < 0) {
808 status = m_ptr->m_type;
809 } else {
810 printf("VFS: %d sent bad reply type %d for call %d\n",
811 m_ptr->m_source, m_ptr->m_type, callnr);
812 status = EIO;
813 }
814 replycode(rfp->fp_endpoint, status);
815 break;
816
817 case VFS_READ:
818 case VFS_RECVFROM:
819 case VFS_RECVMSG:
820 /*
821 * These calls use SDEV_RECV_REPLY. The action to take depends
822 * on the exact call.
823 */
824 ctl_len = addr_len = 0;
825 flags = 0;
826 if (m_ptr->m_type == SDEV_RECV_REPLY) {
827 status = m_ptr->m_lsockdriver_vfs_recv_reply.status;
828 ctl_len = m_ptr->m_lsockdriver_vfs_recv_reply.ctl_len;
829 addr_len =
830 m_ptr->m_lsockdriver_vfs_recv_reply.addr_len;
831 flags = m_ptr->m_lsockdriver_vfs_recv_reply.flags;
832 } else if (m_ptr->m_type < 0) {
833 status = m_ptr->m_type;
834 } else {
835 printf("VFS: %d sent bad reply type %d for call %d\n",
836 m_ptr->m_source, m_ptr->m_type, callnr);
837 status = EIO;
838 }
839
840 switch (callnr) {
841 case VFS_READ:
842 replycode(rfp->fp_endpoint, status);
843 break;
844 case VFS_RECVFROM:
845 resume_recvfrom(rfp, status, addr_len);
846 break;
847 case VFS_RECVMSG:
848 resume_recvmsg(rfp, status, ctl_len, addr_len, flags,
849 rfp->fp_sdev.aux.buf);
850 break;
851 }
852 break;
853
854 case VFS_ACCEPT:
855 /*
856 * This call uses SDEV_ACCEPT_REPLY. We only get here if the
857 * accept call has failed without creating a new socket, in
858 * which case we can simply call replycode() with the error.
859 * For nothing other than consistency, we let resume_accept()
860 * handle this case too.
861 */
862 addr_len = 0;
863 if (m_ptr->m_type == SDEV_ACCEPT_REPLY) {
864 assert(m_ptr->m_lsockdriver_vfs_accept_reply.sock_id <
865 0);
866 status = m_ptr->m_lsockdriver_vfs_accept_reply.status;
867 addr_len = m_ptr->m_lsockdriver_vfs_accept_reply.len;
868 } else if (m_ptr->m_type < 0) {
869 status = m_ptr->m_type;
870 } else {
871 printf("VFS: %d sent bad reply type %d for call %d\n",
872 m_ptr->m_source, m_ptr->m_type, callnr);
873 status = EIO;
874 }
875 /*
876 * Quick rundown of m_lsockdriver_vfs_accept_reply cases:
877 *
878 * - sock_id >= 0, status == OK: new socket accepted
879 * - sock_id >= 0, status != OK: new socket must be closed
880 * - sock_id < 0, status != OK: failure accepting socket
881 * - sock_id < 0, status == OK: invalid, covered right here
882 *
883 * See libsockdriver for why there are two reply fields at all.
884 */
885 if (status >= 0) {
886 printf("VFS: %d sent bad status %d for call %d\n",
887 m_ptr->m_source, status, callnr);
888 status = EIO;
889 }
890 resume_accept(rfp, status, NO_DEV, addr_len,
891 rfp->fp_sdev.aux.fd);
892 break;
893
894 default:
895 /*
896 * Ultimately, enumerating all system calls that may cause
897 * socket I/O may prove too cumbersome. In that case, the
898 * callnr field could be replaced by a field that stores the
899 * combination of the expected reply type and the action to
900 * take, for example.
901 */
902 panic("VFS: socket reply %d for unknown call %d from %d",
903 m_ptr->m_type, callnr, rfp->fp_endpoint);
904 }
905 }
906
907 /*
908 * Abort the suspended socket call for the given process, because the
909 * corresponding socket driver has died.
910 */
911 void
sdev_stop(struct fproc * rfp)912 sdev_stop(struct fproc * rfp)
913 {
914 message m;
915
916 assert(rfp->fp_blocked_on == FP_BLOCKED_ON_SDEV);
917
918 rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
919
920 /*
921 * We use one single approach both here and when stopping worker
922 * threads: the reply message's m_type is set to an error code (always
923 * EIO for now) instead of an actual SDEV_ reply code. We test for
924 * this case in non-suspending calls as well as in sdev_finish().
925 */
926 m.m_type = EIO;
927 sdev_finish(rfp, &m);
928 }
929
930 /*
931 * Cancel the ongoing long-lasting socket call, because the calling process has
932 * received a caught or terminating signal. This function is always called
933 * from a worker thread (as part of PM) work, with 'fp' set to the process that
934 * issued the original system call. The calling function has just unsuspended
935 * the process out of _SDEV blocking state. The job of this function is to
936 * issue a cancel request and then block until a reply comes in; the reply may
937 * indicate success, in which case it must be handled accordingly.
938 */
939 void
sdev_cancel(void)940 sdev_cancel(void)
941 {
942 struct smap *sp;
943 message m;
944 sockid_t sock_id;
945
946 /* The suspension status must just have been cleared by the caller. */
947 assert(fp->fp_blocked_on == FP_BLOCKED_ON_NONE);
948
949 if ((sp = get_smap_by_dev(fp->fp_sdev.dev, &sock_id)) != NULL) {
950 /* Prepare the request message. */
951 memset(&m, 0, sizeof(m));
952 m.m_type = SDEV_CANCEL;
953 m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e;
954 m.m_vfs_lsockdriver_simple.sock_id = sock_id;
955
956 /*
957 * Send the cancel request, and wait for a reply. The reply
958 * will be for the original request and must be processed
959 * accordingly. It is possible that the original request
960 * actually succeeded, because 1) the cancel request resulted
961 * in partial success or 2) the original reply and the cancel
962 * request crossed each other. It is because of the second
963 * case that a socket driver must not respond at all to a
964 * cancel operation for an unknown request.
965 */
966 sdev_sendrec(sp, &m);
967 } else
968 m.m_type = EIO;
969
970 /*
971 * Successful accept requests require special processing, but since we
972 * are already operating from a working thread here, we need not spawn
973 * an additional worker thread for this case.
974 */
975 if (m.m_type == SDEV_ACCEPT_REPLY &&
976 m.m_lsockdriver_vfs_accept_reply.sock_id >= 0)
977 sdev_finish_accept(fp, &m);
978 else
979 sdev_finish(fp, &m);
980 }
981
982 /*
983 * A socket driver has sent a reply to a socket request. Process it, by either
984 * waking up an active worker thread, finishing the system call from here, or
985 * (in the exceptional case of accept calls) spawning a new worker thread to
986 * process the reply. This function MUST NOT block its calling thread.
987 */
988 void
sdev_reply(void)989 sdev_reply(void)
990 {
991 struct fproc *rfp;
992 struct smap *sp;
993 struct worker_thread *wp;
994 sockid_t req_id = -1;
995 dev_t dev;
996 int slot;
997
998 if ((sp = get_smap_by_endpt(who_e)) == NULL) {
999 printf("VFS: ignoring sock dev reply from unknown driver %d\n",
1000 who_e);
1001 return;
1002 }
1003
1004 switch (call_nr) {
1005 case SDEV_REPLY:
1006 req_id = m_in.m_lsockdriver_vfs_reply.req_id;
1007 break;
1008 case SDEV_SOCKET_REPLY:
1009 req_id = m_in.m_lsockdriver_vfs_socket_reply.req_id;
1010 break;
1011 case SDEV_ACCEPT_REPLY:
1012 req_id = m_in.m_lsockdriver_vfs_accept_reply.req_id;
1013 break;
1014 case SDEV_RECV_REPLY:
1015 req_id = m_in.m_lsockdriver_vfs_recv_reply.req_id;
1016 break;
1017 case SDEV_SELECT1_REPLY:
1018 dev = make_smap_dev(sp,
1019 m_in.m_lsockdriver_vfs_select_reply.sock_id);
1020 select_sdev_reply1(dev,
1021 m_in.m_lsockdriver_vfs_select_reply.status);
1022 return;
1023 case SDEV_SELECT2_REPLY:
1024 dev = make_smap_dev(sp,
1025 m_in.m_lsockdriver_vfs_select_reply.sock_id);
1026 select_sdev_reply2(dev,
1027 m_in.m_lsockdriver_vfs_select_reply.status);
1028 return;
1029 default:
1030 printf("VFS: ignoring unknown sock dev reply %d from %d\n",
1031 call_nr, who_e);
1032 return;
1033 }
1034
1035 if (isokendpt((endpoint_t)req_id, &slot) != OK) {
1036 printf("VFS: ignoring sock dev reply from %d for unknown %d\n",
1037 who_e, req_id);
1038 return;
1039 }
1040
1041 rfp = &fproc[slot];
1042 wp = rfp->fp_worker;
1043 if (wp != NULL && wp->w_task == who_e && wp->w_drv_sendrec != NULL) {
1044 assert(!fp_is_blocked(rfp));
1045 *wp->w_drv_sendrec = m_in;
1046 wp->w_drv_sendrec = NULL;
1047 worker_signal(wp); /* resume suspended thread */
1048 /*
1049 * It is up to the worker thread to 1) check that the reply is
1050 * of the right type for the request, and 2) keep in mind that
1051 * the reply type may be EIO in case the socket driver died.
1052 */
1053 } else if (rfp->fp_blocked_on != FP_BLOCKED_ON_SDEV ||
1054 get_smap_by_dev(rfp->fp_sdev.dev, NULL) != sp) {
1055 printf("VFS: ignoring sock dev reply, %d not blocked on %d\n",
1056 rfp->fp_endpoint, who_e);
1057 return;
1058 } else if (call_nr == SDEV_ACCEPT_REPLY &&
1059 m_in.m_lsockdriver_vfs_accept_reply.sock_id >= 0) {
1060 /*
1061 * For accept replies that return a new socket, we need to
1062 * spawn a worker thread, because accept calls may block (so
1063 * there will no longer be a worker thread) and processing the
1064 * reply requires additional blocking calls (which we cannot
1065 * issue from the main thread). This is tricky. Under no
1066 * circumstances may we "lose" a legitimate reply, because this
1067 * would lead to resource leaks in the socket driver. To this
1068 * end, we rely on the current worker thread model to
1069 * prioritize regular work over PM work. Still, sdev_cancel()
1070 * may end up receiving the accept reply if it was already
1071 * blocked waiting for the reply message, and it must then
1072 * perform the same tasks.
1073 */
1074 /*
1075 * It is possible that if all threads are in use, there is a
1076 * "gap" between starting the thread and its activation. The
1077 * main problem for this case is that the socket driver dies
1078 * within that gap. For accepts, we address this with no less
1079 * than two checks: 1) in this file, by looking up the smap
1080 * entry by the reply source endpoint again - if the entry is
1081 * no longer valid, the socket driver must have died; 2) in
1082 * socket.c, by revalidating the original listening socket - if
1083 * the listening socket has been invalidated, the driver died.
1084 *
1085 * Since we unsuspend the process now, a socket driver sending
1086 * two accept replies in a row may never cause VFS to attempt
1087 * spawning two threads; the second reply should be ignored.
1088 */
1089 assert(fp->fp_func == NULL);
1090
1091 worker_start(rfp, do_accept_reply, &m_in, FALSE /*use_spare*/);
1092
1093 /*
1094 * TODO: I just introduced the notion of not using the fp_u
1095 * union across yields after unsuspension, but for socket calls
1096 * we have a lot of socket state to carry over, so I'm now
1097 * immediately violating my own rule again here. Possible
1098 * solutions: 1) introduce another blocking state just to mark
1099 * the fp_u union in use (this has side effects though), 2)
1100 * introduce a pseudo message type which covers both the accept
1101 * reply fields and the fp_u state (do_pending_pipe does this),
1102 * or 3) add a fp_flags flag for this purpose. In any case,
1103 * the whole point is that we catch any attempts to reuse fp_u
1104 * for other purposes and thus cause state corruption. This
1105 * should not happen anyway, but it's too dangerous to leave
1106 * entirely unchecked. --dcvmoole
1107 */
1108 rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
1109 } else {
1110 rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
1111
1112 sdev_finish(rfp, &m_in);
1113 }
1114 }
1115