1 /* $NetBSD: sockin.c,v 1.27 2013/03/18 13:14:11 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2008, 2009 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.27 2013/03/18 13:14:11 pooka Exp $"); 30 31 #include <sys/param.h> 32 #include <sys/condvar.h> 33 #include <sys/domain.h> 34 #include <sys/kmem.h> 35 #include <sys/kthread.h> 36 #include <sys/mbuf.h> 37 #include <sys/mutex.h> 38 #include <sys/poll.h> 39 #include <sys/protosw.h> 40 #include <sys/queue.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/time.h> 44 45 #include <net/bpf.h> 46 #include <net/if.h> 47 #include <net/radix.h> 48 49 #include <netinet/in.h> 50 #include <netinet/in_systm.h> 51 #include <netinet/ip.h> 52 53 #include <rump/rumpuser.h> 54 55 #include "rump_private.h" 56 #include "rumpcomp_user.h" 57 58 /* 59 * An inet communication domain which uses the socket interface. 60 * Currently supports only IPv4 UDP, but could easily be extended to 61 * support IPv6 and TCP by adding more stuff to the protosw. 62 */ 63 64 DOMAIN_DEFINE(sockindomain); 65 66 static void sockin_init(void); 67 static int sockin_usrreq(struct socket *, int, struct mbuf *, 68 struct mbuf *, struct mbuf *, struct lwp *); 69 static int sockin_ctloutput(int op, struct socket *, struct sockopt *); 70 71 const struct protosw sockinsw[] = { 72 { 73 .pr_type = SOCK_DGRAM, 74 .pr_domain = &sockindomain, 75 .pr_protocol = IPPROTO_UDP, 76 .pr_flags = PR_ATOMIC|PR_ADDR, 77 .pr_usrreq = sockin_usrreq, 78 .pr_ctloutput = sockin_ctloutput, 79 }, 80 { 81 .pr_type = SOCK_STREAM, 82 .pr_domain = &sockindomain, 83 .pr_protocol = IPPROTO_TCP, 84 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS, 85 .pr_usrreq = sockin_usrreq, 86 .pr_ctloutput = sockin_ctloutput, 87 }}; 88 89 struct domain sockindomain = { 90 .dom_family = PF_INET, 91 .dom_name = "socket_inet", 92 .dom_init = sockin_init, 93 .dom_externalize = NULL, 94 .dom_dispose = NULL, 95 .dom_protosw = sockinsw, 96 .dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)], 97 .dom_rtattach = rt_inithead, 98 .dom_rtoffset = 32, 99 .dom_maxrtkey = sizeof(struct sockaddr_in), 100 .dom_ifattach = NULL, 101 .dom_ifdetach = NULL, 102 .dom_ifqueues = { NULL }, 103 .dom_link = { NULL }, 104 .dom_mowner = MOWNER_INIT("",""), 105 .dom_rtcache = { NULL }, 106 .dom_sockaddr_cmp = NULL 107 }; 108 109 #define SO2S(so) ((intptr_t)(so->so_internal)) 110 #define SOCKIN_SBSIZE 65536 111 112 struct sockin_unit { 113 struct socket *su_so; 114 115 LIST_ENTRY(sockin_unit) su_entries; 116 }; 117 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent); 118 static kmutex_t su_mtx; 119 static bool rebuild; 120 static int nsock; 121 122 /* XXX: for the bpf hack */ 123 static struct ifnet sockin_if; 124 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; } 125 126 static int 127 registersock(struct socket *so, int news) 128 { 129 struct sockin_unit *su; 130 131 su = kmem_alloc(sizeof(*su), KM_NOSLEEP); 132 if (!su) 133 return ENOMEM; 134 135 so->so_internal = (void *)(intptr_t)news; 136 su->su_so = so; 137 138 mutex_enter(&su_mtx); 139 LIST_INSERT_HEAD(&su_ent, su, su_entries); 140 nsock++; 141 rebuild = true; 142 mutex_exit(&su_mtx); 143 144 return 0; 145 } 146 147 static void 148 removesock(struct socket *so) 149 { 150 struct sockin_unit *su_iter; 151 int error; 152 153 mutex_enter(&su_mtx); 154 LIST_FOREACH(su_iter, &su_ent, su_entries) { 155 if (su_iter->su_so == so) 156 break; 157 } 158 if (!su_iter) 159 panic("no such socket"); 160 161 LIST_REMOVE(su_iter, su_entries); 162 nsock--; 163 rebuild = true; 164 mutex_exit(&su_mtx); 165 166 rumpuser_close(SO2S(su_iter->su_so), &error); 167 kmem_free(su_iter, sizeof(*su_iter)); 168 } 169 170 static void 171 sockin_process(struct socket *so) 172 { 173 struct sockaddr_in from; 174 struct iovec io; 175 struct msghdr rmsg; 176 struct mbuf *m; 177 ssize_t n; 178 size_t plen; 179 int error; 180 181 m = m_gethdr(M_WAIT, MT_DATA); 182 if (so->so_proto->pr_type == SOCK_DGRAM) { 183 plen = IP_MAXPACKET; 184 MEXTMALLOC(m, plen, M_DONTWAIT); 185 } else { 186 plen = MCLBYTES; 187 MCLGET(m, M_DONTWAIT); 188 } 189 if ((m->m_flags & M_EXT) == 0) { 190 m_freem(m); 191 return; 192 } 193 194 memset(&rmsg, 0, sizeof(rmsg)); 195 io.iov_base = mtod(m, void *); 196 io.iov_len = plen; 197 rmsg.msg_iov = &io; 198 rmsg.msg_iovlen = 1; 199 rmsg.msg_name = (struct sockaddr *)&from; 200 rmsg.msg_namelen = sizeof(from); 201 202 n = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &error); 203 if (n <= 0) { 204 m_freem(m); 205 206 /* Treat a TCP socket a goner */ 207 if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) { 208 mutex_enter(softnet_lock); 209 soisdisconnected(so); 210 mutex_exit(softnet_lock); 211 removesock(so); 212 } 213 return; 214 } 215 m->m_len = m->m_pkthdr.len = n; 216 217 bpf_mtap_af(&sockin_if, AF_UNSPEC, m); 218 219 mutex_enter(softnet_lock); 220 if (so->so_proto->pr_type == SOCK_DGRAM) { 221 if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) { 222 m_freem(m); 223 } 224 } else { 225 sbappendstream(&so->so_rcv, m); 226 } 227 228 sorwakeup(so); 229 mutex_exit(softnet_lock); 230 } 231 232 static void 233 sockin_accept(struct socket *so) 234 { 235 struct socket *nso; 236 struct sockaddr_in sin; 237 int news, error, slen; 238 239 slen = sizeof(sin); 240 news = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin, 241 &slen, &error); 242 if (news == -1) 243 return; 244 245 mutex_enter(softnet_lock); 246 nso = sonewconn(so, SS_ISCONNECTED); 247 if (nso == NULL) 248 goto errout; 249 if (registersock(nso, news) != 0) 250 goto errout; 251 mutex_exit(softnet_lock); 252 return; 253 254 errout: 255 rumpuser_close(news, &error); 256 if (nso) 257 soclose(nso); 258 mutex_exit(softnet_lock); 259 } 260 261 #define POLLTIMEOUT 100 /* check for new entries every 100ms */ 262 263 /* XXX: doesn't handle socket (kernel) locking properly? */ 264 static void 265 sockinworker(void *arg) 266 { 267 struct pollfd *pfds = NULL, *npfds; 268 struct sockin_unit *su_iter; 269 struct socket *so; 270 int cursock = 0, i, rv, error; 271 272 /* 273 * Loop reading requests. Check for new sockets periodically 274 * (could be smarter, but I'm lazy). 275 */ 276 for (;;) { 277 if (rebuild) { 278 npfds = NULL; 279 mutex_enter(&su_mtx); 280 if (nsock) 281 npfds = kmem_alloc(nsock * sizeof(*npfds), 282 KM_NOSLEEP); 283 if (npfds || nsock == 0) { 284 if (pfds) 285 kmem_free(pfds, cursock*sizeof(*pfds)); 286 pfds = npfds; 287 cursock = nsock; 288 rebuild = false; 289 290 i = 0; 291 LIST_FOREACH(su_iter, &su_ent, su_entries) { 292 pfds[i].fd = SO2S(su_iter->su_so); 293 pfds[i].events = POLLIN; 294 pfds[i].revents = 0; 295 i++; 296 } 297 KASSERT(i == nsock); 298 } 299 mutex_exit(&su_mtx); 300 } 301 302 /* find affected sockets & process */ 303 rv = rumpuser_poll(pfds, cursock, POLLTIMEOUT, &error); 304 for (i = 0; i < cursock && rv > 0; i++) { 305 if (pfds[i].revents & POLLIN) { 306 mutex_enter(&su_mtx); 307 LIST_FOREACH(su_iter, &su_ent, su_entries) { 308 if (SO2S(su_iter->su_so)==pfds[i].fd) { 309 so = su_iter->su_so; 310 mutex_exit(&su_mtx); 311 if(so->so_options&SO_ACCEPTCONN) 312 sockin_accept(so); 313 else 314 sockin_process(so); 315 mutex_enter(&su_mtx); 316 break; 317 } 318 } 319 /* if we can't find it, just wing it */ 320 KASSERT(rebuild || su_iter); 321 mutex_exit(&su_mtx); 322 pfds[i].revents = 0; 323 rv--; 324 i = -1; 325 continue; 326 } 327 328 /* something else? ignore */ 329 if (pfds[i].revents) { 330 pfds[i].revents = 0; 331 rv--; 332 } 333 } 334 KASSERT(rv <= 0); 335 } 336 337 } 338 339 static void 340 sockin_init(void) 341 { 342 int rv; 343 344 if (rump_threads) { 345 if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker, 346 NULL, NULL, "sockwork")) != 0) 347 panic("sockin_init: could not create worker thread\n"); 348 } else { 349 printf("sockin_init: no threads => no worker thread\n"); 350 } 351 mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE); 352 strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname)); 353 bpf_attach(&sockin_if, DLT_NULL, 0); 354 } 355 356 static int 357 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 358 struct mbuf *control, struct lwp *l) 359 { 360 int error = 0, rv; 361 362 switch (req) { 363 case PRU_ATTACH: 364 { 365 int news, dummy; 366 int sbsize; 367 368 sosetlock(so); 369 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 370 error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE); 371 if (error) 372 break; 373 } 374 375 news = rumpcomp_sockin_socket(PF_INET, so->so_proto->pr_type, 376 0, &error); 377 if (news == -1) 378 break; 379 380 /* for UDP sockets, make sure we can send&recv max */ 381 if (so->so_proto->pr_type == SOCK_DGRAM) { 382 sbsize = SOCKIN_SBSIZE; 383 rumpcomp_sockin_setsockopt(news, 384 SOL_SOCKET, SO_SNDBUF, 385 &sbsize, sizeof(sbsize), &error); 386 sbsize = SOCKIN_SBSIZE; 387 rumpcomp_sockin_setsockopt(news, 388 SOL_SOCKET, SO_RCVBUF, 389 &sbsize, sizeof(sbsize), &error); 390 } 391 392 if ((error = registersock(so, news)) != 0) 393 rumpuser_close(news, &dummy); 394 395 break; 396 } 397 398 case PRU_ACCEPT: 399 /* we do all the work in the worker thread */ 400 break; 401 402 case PRU_BIND: 403 rumpcomp_sockin_bind(SO2S(so), 404 mtod(nam, const struct sockaddr *), 405 sizeof(struct sockaddr_in), &error); 406 break; 407 408 case PRU_CONNECT: 409 rv = rumpcomp_sockin_connect(SO2S(so), 410 mtod(nam, struct sockaddr *), sizeof(struct sockaddr_in), 411 &error); 412 if (rv == 0) 413 soisconnected(so); 414 break; 415 416 case PRU_LISTEN: 417 rumpcomp_sockin_listen(SO2S(so), so->so_qlimit, &error); 418 break; 419 420 case PRU_SEND: 421 { 422 struct sockaddr *saddr; 423 struct msghdr mhdr; 424 size_t iov_max, i; 425 struct iovec iov_buf[32], *iov; 426 struct mbuf *m2; 427 size_t tot; 428 int s; 429 430 bpf_mtap_af(&sockin_if, AF_UNSPEC, m); 431 432 memset(&mhdr, 0, sizeof(mhdr)); 433 434 iov_max = 0; 435 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 436 iov_max++; 437 } 438 439 if (iov_max <= __arraycount(iov_buf)) { 440 iov = iov_buf; 441 } else { 442 iov = kmem_alloc(sizeof(struct iovec) * iov_max, 443 KM_SLEEP); 444 } 445 446 tot = 0; 447 for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) { 448 iov[i].iov_base = m2->m_data; 449 iov[i].iov_len = m2->m_len; 450 tot += m2->m_len; 451 } 452 mhdr.msg_iov = iov; 453 mhdr.msg_iovlen = i; 454 s = SO2S(so); 455 456 if (nam != NULL) { 457 saddr = mtod(nam, struct sockaddr *); 458 mhdr.msg_name = saddr; 459 mhdr.msg_namelen = saddr->sa_len; 460 } 461 462 rumpcomp_sockin_sendmsg(s, &mhdr, 0, &error); 463 464 if (iov != iov_buf) 465 kmem_free(iov, sizeof(struct iovec) * iov_max); 466 467 m_freem(m); 468 m_freem(control); 469 470 /* this assumes too many things to list.. buthey, testing */ 471 if (!rump_threads) 472 sockin_process(so); 473 } 474 break; 475 476 case PRU_SHUTDOWN: 477 removesock(so); 478 break; 479 480 case PRU_SOCKADDR: 481 case PRU_PEERADDR: 482 { 483 int slen = nam->m_len; 484 enum rumpcomp_sockin_getnametype which; 485 486 if (req == PRU_SOCKADDR) 487 which = RUMPCOMP_SOCKIN_SOCKNAME; 488 else 489 which = RUMPCOMP_SOCKIN_PEERNAME; 490 rumpcomp_sockin_getname(SO2S(so), 491 mtod(nam, struct sockaddr *), &slen, which, &error); 492 if (error == 0) 493 nam->m_len = slen; 494 break; 495 } 496 497 case PRU_CONTROL: 498 error = ENOTTY; 499 break; 500 501 default: 502 panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req); 503 } 504 505 return error; 506 } 507 508 static int 509 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt) 510 { 511 int error; 512 513 rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level, 514 sopt->sopt_name, sopt->sopt_data, sopt->sopt_size, &error); 515 return error; 516 } 517