1 /* $NetBSD: sockin.c,v 1.26 2011/03/31 19:40:54 dyoung Exp $ */ 2 3 /* 4 * Copyright (c) 2008, 2009 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.26 2011/03/31 19:40:54 dyoung Exp $"); 30 31 #include <sys/param.h> 32 #include <sys/condvar.h> 33 #include <sys/domain.h> 34 #include <sys/kmem.h> 35 #include <sys/kthread.h> 36 #include <sys/mbuf.h> 37 #include <sys/mutex.h> 38 #include <sys/poll.h> 39 #include <sys/protosw.h> 40 #include <sys/queue.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/time.h> 44 45 #include <net/bpf.h> 46 #include <net/if.h> 47 #include <net/radix.h> 48 49 #include <netinet/in.h> 50 #include <netinet/in_systm.h> 51 #include <netinet/ip.h> 52 53 #include <rump/rumpuser.h> 54 55 #include "rump_private.h" 56 57 /* 58 * An inet communication domain which uses the socket interface. 59 * Currently supports only IPv4 UDP, but could easily be extended to 60 * support IPv6 and TCP by adding more stuff to the protosw. 61 */ 62 63 DOMAIN_DEFINE(sockindomain); 64 65 static void sockin_init(void); 66 static int sockin_usrreq(struct socket *, int, struct mbuf *, 67 struct mbuf *, struct mbuf *, struct lwp *); 68 static int sockin_ctloutput(int op, struct socket *, struct sockopt *); 69 70 const struct protosw sockinsw[] = { 71 { 72 .pr_type = SOCK_DGRAM, 73 .pr_domain = &sockindomain, 74 .pr_protocol = IPPROTO_UDP, 75 .pr_flags = PR_ATOMIC|PR_ADDR, 76 .pr_usrreq = sockin_usrreq, 77 .pr_ctloutput = sockin_ctloutput, 78 }, 79 { 80 .pr_type = SOCK_STREAM, 81 .pr_domain = &sockindomain, 82 .pr_protocol = IPPROTO_TCP, 83 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS, 84 .pr_usrreq = sockin_usrreq, 85 .pr_ctloutput = sockin_ctloutput, 86 }}; 87 88 struct domain sockindomain = { 89 .dom_family = PF_INET, 90 .dom_name = "socket_inet", 91 .dom_init = sockin_init, 92 .dom_externalize = NULL, 93 .dom_dispose = NULL, 94 .dom_protosw = sockinsw, 95 .dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)], 96 .dom_rtattach = rt_inithead, 97 .dom_rtoffset = 32, 98 .dom_maxrtkey = sizeof(struct sockaddr_in), 99 .dom_ifattach = NULL, 100 .dom_ifdetach = NULL, 101 .dom_ifqueues = { NULL }, 102 .dom_link = { NULL }, 103 .dom_mowner = MOWNER_INIT("",""), 104 .dom_rtcache = { NULL }, 105 .dom_sockaddr_cmp = NULL 106 }; 107 108 #define SO2S(so) ((intptr_t)(so->so_internal)) 109 #define SOCKIN_SBSIZE 65536 110 111 struct sockin_unit { 112 struct socket *su_so; 113 114 LIST_ENTRY(sockin_unit) su_entries; 115 }; 116 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent); 117 static kmutex_t su_mtx; 118 static bool rebuild; 119 static int nsock; 120 121 /* XXX: for the bpf hack */ 122 static struct ifnet sockin_if; 123 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; } 124 125 static int 126 registersock(struct socket *so, int news) 127 { 128 struct sockin_unit *su; 129 130 su = kmem_alloc(sizeof(*su), KM_NOSLEEP); 131 if (!su) 132 return ENOMEM; 133 134 so->so_internal = (void *)(intptr_t)news; 135 su->su_so = so; 136 137 mutex_enter(&su_mtx); 138 LIST_INSERT_HEAD(&su_ent, su, su_entries); 139 nsock++; 140 rebuild = true; 141 mutex_exit(&su_mtx); 142 143 return 0; 144 } 145 146 static void 147 removesock(struct socket *so) 148 { 149 struct sockin_unit *su_iter; 150 int error; 151 152 mutex_enter(&su_mtx); 153 LIST_FOREACH(su_iter, &su_ent, su_entries) { 154 if (su_iter->su_so == so) 155 break; 156 } 157 if (!su_iter) 158 panic("no such socket"); 159 160 LIST_REMOVE(su_iter, su_entries); 161 nsock--; 162 rebuild = true; 163 mutex_exit(&su_mtx); 164 165 rumpuser_close(SO2S(su_iter->su_so), &error); 166 kmem_free(su_iter, sizeof(*su_iter)); 167 } 168 169 static void 170 sockin_process(struct socket *so) 171 { 172 struct sockaddr_in from; 173 struct iovec io; 174 struct msghdr rmsg; 175 struct mbuf *m; 176 ssize_t n; 177 size_t plen; 178 int error; 179 180 m = m_gethdr(M_WAIT, MT_DATA); 181 if (so->so_proto->pr_type == SOCK_DGRAM) { 182 plen = IP_MAXPACKET; 183 MEXTMALLOC(m, plen, M_DONTWAIT); 184 } else { 185 plen = MCLBYTES; 186 MCLGET(m, M_DONTWAIT); 187 } 188 if ((m->m_flags & M_EXT) == 0) { 189 m_freem(m); 190 return; 191 } 192 193 memset(&rmsg, 0, sizeof(rmsg)); 194 io.iov_base = mtod(m, void *); 195 io.iov_len = plen; 196 rmsg.msg_iov = &io; 197 rmsg.msg_iovlen = 1; 198 rmsg.msg_name = (struct sockaddr *)&from; 199 rmsg.msg_namelen = sizeof(from); 200 201 n = rumpuser_net_recvmsg(SO2S(so), &rmsg, 0, &error); 202 if (n <= 0) { 203 m_freem(m); 204 205 /* Treat a TCP socket a goner */ 206 if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) { 207 mutex_enter(softnet_lock); 208 soisdisconnected(so); 209 mutex_exit(softnet_lock); 210 removesock(so); 211 } 212 return; 213 } 214 m->m_len = m->m_pkthdr.len = n; 215 216 bpf_mtap_af(&sockin_if, AF_UNSPEC, m); 217 218 mutex_enter(softnet_lock); 219 if (so->so_proto->pr_type == SOCK_DGRAM) { 220 if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) { 221 m_freem(m); 222 } 223 } else { 224 sbappendstream(&so->so_rcv, m); 225 } 226 227 sorwakeup(so); 228 mutex_exit(softnet_lock); 229 } 230 231 static void 232 sockin_accept(struct socket *so) 233 { 234 struct socket *nso; 235 struct sockaddr_in sin; 236 int news, error, slen; 237 238 slen = sizeof(sin); 239 news = rumpuser_net_accept(SO2S(so), (struct sockaddr *)&sin, 240 &slen, &error); 241 if (news == -1) 242 return; 243 244 mutex_enter(softnet_lock); 245 nso = sonewconn(so, SS_ISCONNECTED); 246 if (nso == NULL) 247 goto errout; 248 if (registersock(nso, news) != 0) 249 goto errout; 250 mutex_exit(softnet_lock); 251 return; 252 253 errout: 254 rumpuser_close(news, &error); 255 if (nso) 256 soclose(nso); 257 mutex_exit(softnet_lock); 258 } 259 260 #define POLLTIMEOUT 100 /* check for new entries every 100ms */ 261 262 /* XXX: doesn't handle socket (kernel) locking properly? */ 263 static void 264 sockinworker(void *arg) 265 { 266 struct pollfd *pfds = NULL, *npfds; 267 struct sockin_unit *su_iter; 268 struct socket *so; 269 int cursock = 0, i, rv, error; 270 271 /* 272 * Loop reading requests. Check for new sockets periodically 273 * (could be smarter, but I'm lazy). 274 */ 275 for (;;) { 276 if (rebuild) { 277 npfds = NULL; 278 mutex_enter(&su_mtx); 279 if (nsock) 280 npfds = kmem_alloc(nsock * sizeof(*npfds), 281 KM_NOSLEEP); 282 if (npfds || nsock == 0) { 283 if (pfds) 284 kmem_free(pfds, cursock*sizeof(*pfds)); 285 pfds = npfds; 286 cursock = nsock; 287 rebuild = false; 288 289 i = 0; 290 LIST_FOREACH(su_iter, &su_ent, su_entries) { 291 pfds[i].fd = SO2S(su_iter->su_so); 292 pfds[i].events = POLLIN; 293 pfds[i].revents = 0; 294 i++; 295 } 296 KASSERT(i == nsock); 297 } 298 mutex_exit(&su_mtx); 299 } 300 301 /* find affected sockets & process */ 302 rv = rumpuser_poll(pfds, cursock, POLLTIMEOUT, &error); 303 for (i = 0; i < cursock && rv > 0; i++) { 304 if (pfds[i].revents & POLLIN) { 305 mutex_enter(&su_mtx); 306 LIST_FOREACH(su_iter, &su_ent, su_entries) { 307 if (SO2S(su_iter->su_so)==pfds[i].fd) { 308 so = su_iter->su_so; 309 mutex_exit(&su_mtx); 310 if(so->so_options&SO_ACCEPTCONN) 311 sockin_accept(so); 312 else 313 sockin_process(so); 314 mutex_enter(&su_mtx); 315 break; 316 } 317 } 318 /* if we can't find it, just wing it */ 319 KASSERT(rebuild || su_iter); 320 mutex_exit(&su_mtx); 321 pfds[i].revents = 0; 322 rv--; 323 i = -1; 324 continue; 325 } 326 327 /* something else? ignore */ 328 if (pfds[i].revents) { 329 pfds[i].revents = 0; 330 rv--; 331 } 332 } 333 KASSERT(rv <= 0); 334 } 335 336 } 337 338 static void 339 sockin_init(void) 340 { 341 int rv; 342 343 if (rump_threads) { 344 if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker, 345 NULL, NULL, "sockwork")) != 0) 346 panic("sockin_init: could not create worker thread\n"); 347 } else { 348 printf("sockin_init: no threads => no worker thread\n"); 349 } 350 mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE); 351 strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname)); 352 bpf_attach(&sockin_if, DLT_NULL, 0); 353 } 354 355 static int 356 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 357 struct mbuf *control, struct lwp *l) 358 { 359 int error = 0, rv; 360 361 switch (req) { 362 case PRU_ATTACH: 363 { 364 int news, dummy; 365 int sbsize; 366 367 sosetlock(so); 368 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 369 error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE); 370 if (error) 371 break; 372 } 373 374 news = rumpuser_net_socket(PF_INET, so->so_proto->pr_type, 375 0, &error); 376 if (news == -1) 377 break; 378 379 /* for UDP sockets, make sure we can send&recv max */ 380 if (so->so_proto->pr_type == SOCK_DGRAM) { 381 sbsize = SOCKIN_SBSIZE; 382 rumpuser_net_setsockopt(news, SOL_SOCKET, SO_SNDBUF, 383 &sbsize, sizeof(sbsize), &error); 384 sbsize = SOCKIN_SBSIZE; 385 rumpuser_net_setsockopt(news, SOL_SOCKET, SO_RCVBUF, 386 &sbsize, sizeof(sbsize), &error); 387 } 388 389 if ((error = registersock(so, news)) != 0) 390 rumpuser_close(news, &dummy); 391 392 break; 393 } 394 395 case PRU_ACCEPT: 396 /* we do all the work in the worker thread */ 397 break; 398 399 case PRU_BIND: 400 rumpuser_net_bind(SO2S(so), mtod(nam, const struct sockaddr *), 401 sizeof(struct sockaddr_in), &error); 402 break; 403 404 case PRU_CONNECT: 405 rv = rumpuser_net_connect(SO2S(so), 406 mtod(nam, struct sockaddr *), sizeof(struct sockaddr_in), 407 &error); 408 if (rv == 0) 409 soisconnected(so); 410 break; 411 412 case PRU_LISTEN: 413 rumpuser_net_listen(SO2S(so), so->so_qlimit, &error); 414 break; 415 416 case PRU_SEND: 417 { 418 struct sockaddr *saddr; 419 struct msghdr mhdr; 420 size_t iov_max, i; 421 struct iovec iov_buf[32], *iov; 422 struct mbuf *m2; 423 size_t tot; 424 int s; 425 426 bpf_mtap_af(&sockin_if, AF_UNSPEC, m); 427 428 memset(&mhdr, 0, sizeof(mhdr)); 429 430 iov_max = 0; 431 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 432 iov_max++; 433 } 434 435 if (iov_max <= __arraycount(iov_buf)) { 436 iov = iov_buf; 437 } else { 438 iov = kmem_alloc(sizeof(struct iovec) * iov_max, 439 KM_SLEEP); 440 } 441 442 tot = 0; 443 for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) { 444 iov[i].iov_base = m2->m_data; 445 iov[i].iov_len = m2->m_len; 446 tot += m2->m_len; 447 } 448 mhdr.msg_iov = iov; 449 mhdr.msg_iovlen = i; 450 s = SO2S(so); 451 452 if (nam != NULL) { 453 saddr = mtod(nam, struct sockaddr *); 454 mhdr.msg_name = saddr; 455 mhdr.msg_namelen = saddr->sa_len; 456 } 457 458 rumpuser_net_sendmsg(s, &mhdr, 0, &error); 459 460 if (iov != iov_buf) 461 kmem_free(iov, sizeof(struct iovec) * iov_max); 462 463 m_freem(m); 464 m_freem(control); 465 466 /* this assumes too many things to list.. buthey, testing */ 467 if (!rump_threads) 468 sockin_process(so); 469 } 470 break; 471 472 case PRU_SHUTDOWN: 473 removesock(so); 474 break; 475 476 case PRU_SOCKADDR: 477 case PRU_PEERADDR: 478 { 479 int slen = nam->m_len; 480 enum rumpuser_getnametype which; 481 482 if (req == PRU_SOCKADDR) 483 which = RUMPUSER_SOCKNAME; 484 else 485 which = RUMPUSER_PEERNAME; 486 rumpuser_net_getname(SO2S(so), 487 mtod(nam, struct sockaddr *), &slen, which, &error); 488 if (error == 0) 489 nam->m_len = slen; 490 break; 491 } 492 493 case PRU_CONTROL: 494 error = ENOTTY; 495 break; 496 497 default: 498 panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req); 499 } 500 501 return error; 502 } 503 504 static int 505 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt) 506 { 507 int error; 508 509 rumpuser_net_setsockopt(SO2S(so), sopt->sopt_level, 510 sopt->sopt_name, sopt->sopt_data, sopt->sopt_size, &error); 511 return error; 512 } 513