1 /* 2 * Copyright (c) 2003, 2004 Matthew Dillon. All rights reserved. 3 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 4 * Copyright (c) 2003 Jonathan Lemon. All rights reserved. 5 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Jonathan Lemon, Jeffrey M. Hsu, and Matthew Dillon. 9 * 10 * Jonathan Lemon gave Jeffrey Hsu permission to combine his copyright 11 * into this one around July 8 2004. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of The DragonFly Project nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific, prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 29 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 30 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 34 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 35 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * $DragonFly: src/sys/net/netisr.c,v 1.49 2008/11/01 10:29:31 sephe Exp $ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/msgport.h> 46 #include <sys/proc.h> 47 #include <sys/interrupt.h> 48 #include <sys/socket.h> 49 #include <sys/sysctl.h> 50 #include <sys/socketvar.h> 51 #include <net/if.h> 52 #include <net/if_var.h> 53 #include <net/netisr.h> 54 #include <machine/cpufunc.h> 55 56 #include <sys/thread2.h> 57 #include <sys/msgport2.h> 58 #include <net/netmsg2.h> 59 60 #define NETISR_GET_MPLOCK(ni) \ 61 do { \ 62 if (((ni)->ni_flags & NETISR_FLAG_MPSAFE) == 0) \ 63 get_mplock(); \ 64 } while (0) 65 66 #define NETISR_REL_MPLOCK(ni) \ 67 do { \ 68 if (((ni)->ni_flags & NETISR_FLAG_MPSAFE) == 0) \ 69 rel_mplock(); \ 70 } while (0) 71 72 static void netmsg_sync_func(struct netmsg *msg); 73 74 struct netmsg_port_registration { 75 TAILQ_ENTRY(netmsg_port_registration) npr_entry; 76 lwkt_port_t npr_port; 77 }; 78 79 static struct netisr netisrs[NETISR_MAX]; 80 static TAILQ_HEAD(,netmsg_port_registration) netreglist; 81 82 /* Per-CPU thread to handle any protocol. */ 83 struct thread netisr_cpu[MAXCPU]; 84 lwkt_port netisr_afree_rport; 85 lwkt_port netisr_adone_rport; 86 lwkt_port netisr_apanic_rport; 87 lwkt_port netisr_sync_port; 88 89 static int (*netmsg_fwd_port_fn)(lwkt_port_t, lwkt_msg_t); 90 91 static int netisr_mpsafe_thread = NETMSG_SERVICE_ADAPTIVE; 92 TUNABLE_INT("net.netisr.mpsafe_thread", &netisr_mpsafe_thread); 93 94 SYSCTL_NODE(_net, OID_AUTO, netisr, CTLFLAG_RW, 0, "netisr"); 95 SYSCTL_INT(_net_netisr, OID_AUTO, mpsafe_thread, CTLFLAG_RW, 96 &netisr_mpsafe_thread, 0, 97 "0:BGL, 1:Adaptive BGL, 2:No BGL(experimental)"); 98 99 static __inline int 100 NETISR_TO_MSGF(const struct netisr *ni) 101 { 102 int msg_flags = 0; 103 104 if (ni->ni_flags & NETISR_FLAG_MPSAFE) 105 msg_flags |= MSGF_MPSAFE; 106 return msg_flags; 107 } 108 109 /* 110 * netisr_afree_rport replymsg function, only used to handle async 111 * messages which the sender has abandoned to their fate. 112 */ 113 static void 114 netisr_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) 115 { 116 kfree(msg, M_LWKTMSG); 117 } 118 119 /* 120 * We need a custom putport function to handle the case where the 121 * message target is the current thread's message port. This case 122 * can occur when the TCP or UDP stack does a direct callback to NFS and NFS 123 * then turns around and executes a network operation synchronously. 124 * 125 * To prevent deadlocking, we must execute these self-referential messages 126 * synchronously, effectively turning the message into a glorified direct 127 * procedure call back into the protocol stack. The operation must be 128 * complete on return or we will deadlock, so panic if it isn't. 129 */ 130 static int 131 netmsg_put_port(lwkt_port_t port, lwkt_msg_t lmsg) 132 { 133 netmsg_t netmsg = (void *)lmsg; 134 135 if ((lmsg->ms_flags & MSGF_SYNC) && port == &curthread->td_msgport) { 136 netmsg->nm_dispatch(netmsg); 137 if ((lmsg->ms_flags & MSGF_DONE) == 0) 138 panic("netmsg_put_port: self-referential deadlock on netport"); 139 return(EASYNC); 140 } else { 141 return(netmsg_fwd_port_fn(port, lmsg)); 142 } 143 } 144 145 /* 146 * UNIX DOMAIN sockets still have to run their uipc functions synchronously, 147 * because they depend on the user proc context for a number of things 148 * (like creds) which we have not yet incorporated into the message structure. 149 * 150 * However, we maintain or message/port abstraction. Having a special 151 * synchronous port which runs the commands synchronously gives us the 152 * ability to serialize operations in one place later on when we start 153 * removing the BGL. 154 */ 155 static int 156 netmsg_sync_putport(lwkt_port_t port, lwkt_msg_t lmsg) 157 { 158 netmsg_t netmsg = (void *)lmsg; 159 160 KKASSERT((lmsg->ms_flags & MSGF_DONE) == 0); 161 162 lmsg->ms_target_port = port; /* required for abort */ 163 netmsg->nm_dispatch(netmsg); 164 return(EASYNC); 165 } 166 167 static void 168 netisr_init(void) 169 { 170 int i; 171 172 TAILQ_INIT(&netreglist); 173 174 /* 175 * Create default per-cpu threads for generic protocol handling. 176 */ 177 for (i = 0; i < ncpus; ++i) { 178 lwkt_create(netmsg_service_loop, &netisr_mpsafe_thread, NULL, 179 &netisr_cpu[i], TDF_NETWORK | TDF_MPSAFE, i, 180 "netisr_cpu %d", i); 181 netmsg_service_port_init(&netisr_cpu[i].td_msgport); 182 } 183 184 /* 185 * The netisr_afree_rport is a special reply port which automatically 186 * frees the replied message. The netisr_adone_rport simply marks 187 * the message as being done. The netisr_apanic_rport panics if 188 * the message is replied to. 189 */ 190 lwkt_initport_replyonly(&netisr_afree_rport, netisr_autofree_reply); 191 lwkt_initport_replyonly_null(&netisr_adone_rport); 192 lwkt_initport_panic(&netisr_apanic_rport); 193 194 /* 195 * The netisr_syncport is a special port which executes the message 196 * synchronously and waits for it if EASYNC is returned. 197 */ 198 lwkt_initport_putonly(&netisr_sync_port, netmsg_sync_putport); 199 } 200 201 SYSINIT(netisr, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, netisr_init, NULL); 202 203 /* 204 * Finish initializing the message port for a netmsg service. This also 205 * registers the port for synchronous cleanup operations such as when an 206 * ifnet is being destroyed. There is no deregistration API yet. 207 */ 208 void 209 netmsg_service_port_init(lwkt_port_t port) 210 { 211 struct netmsg_port_registration *reg; 212 213 /* 214 * Override the putport function. Our custom function checks for 215 * self-references and executes such commands synchronously. 216 */ 217 if (netmsg_fwd_port_fn == NULL) 218 netmsg_fwd_port_fn = port->mp_putport; 219 KKASSERT(netmsg_fwd_port_fn == port->mp_putport); 220 port->mp_putport = netmsg_put_port; 221 222 /* 223 * Keep track of ports using the netmsg API so we can synchronize 224 * certain operations (such as freeing an ifnet structure) across all 225 * consumers. 226 */ 227 reg = kmalloc(sizeof(*reg), M_TEMP, M_WAITOK|M_ZERO); 228 reg->npr_port = port; 229 TAILQ_INSERT_TAIL(&netreglist, reg, npr_entry); 230 } 231 232 /* 233 * This function synchronizes the caller with all netmsg services. For 234 * example, if an interface is being removed we must make sure that all 235 * packets related to that interface complete processing before the structure 236 * can actually be freed. This sort of synchronization is an alternative to 237 * ref-counting the netif, removing the ref counting overhead in favor of 238 * placing additional overhead in the netif freeing sequence (where it is 239 * inconsequential). 240 */ 241 void 242 netmsg_service_sync(void) 243 { 244 struct netmsg_port_registration *reg; 245 struct netmsg smsg; 246 247 netmsg_init(&smsg, NULL, &curthread->td_msgport, 248 MSGF_MPSAFE, netmsg_sync_func); 249 250 TAILQ_FOREACH(reg, &netreglist, npr_entry) { 251 lwkt_domsg(reg->npr_port, &smsg.nm_lmsg, 0); 252 } 253 } 254 255 /* 256 * The netmsg function simply replies the message. API semantics require 257 * EASYNC to be returned if the netmsg function disposes of the message. 258 */ 259 static void 260 netmsg_sync_func(struct netmsg *msg) 261 { 262 lwkt_replymsg(&msg->nm_lmsg, 0); 263 } 264 265 /* 266 * Service a netmsg request and modify the BGL lock state if appropriate. 267 * The new BGL lock state is returned (1:locked, 0:unlocked). 268 */ 269 int 270 netmsg_service(struct netmsg *msg, int mpsafe_mode, int mplocked) 271 { 272 /* 273 * If nm_so is non-NULL the message is related to a socket. Sockets 274 * can migrate between protocol processing threads when they connect, 275 * due to an implied connect during a sendmsg(), or when a connection 276 * is accepted. 277 * 278 * If this occurs any messages already queued to the original thread 279 * or which race the change must be forwarded to the new protocol 280 * processing port. 281 * 282 * MPSAFE - socket changes are synchronous to the current protocol port 283 * so if the port can only change out from under us if it is 284 * already different from the current port anyway so we forward 285 * it. It is possible to chase a changing port, which is fine. 286 */ 287 if (msg->nm_so && msg->nm_so->so_port != &curthread->td_msgport) { 288 lwkt_forwardmsg(msg->nm_so->so_port, &msg->nm_lmsg); 289 return(mplocked); 290 } 291 292 /* 293 * Adjust the mplock dynamically. 294 */ 295 switch (mpsafe_mode) { 296 case NETMSG_SERVICE_ADAPTIVE: /* Adaptive BGL */ 297 if (msg->nm_lmsg.ms_flags & MSGF_MPSAFE) { 298 if (mplocked) { 299 rel_mplock(); 300 mplocked = 0; 301 } 302 msg->nm_dispatch(msg); 303 /* Leave mpunlocked */ 304 } else { 305 if (!mplocked) { 306 get_mplock(); 307 /* mplocked = 1; not needed */ 308 } 309 msg->nm_dispatch(msg); 310 rel_mplock(); 311 mplocked = 0; 312 /* Leave mpunlocked, next msg might be mpsafe */ 313 } 314 break; 315 316 case NETMSG_SERVICE_MPSAFE: /* No BGL */ 317 if (mplocked) { 318 rel_mplock(); 319 mplocked = 0; 320 } 321 msg->nm_dispatch(msg); 322 /* Leave mpunlocked */ 323 break; 324 325 default: /* BGL */ 326 if (!mplocked) { 327 get_mplock(); 328 mplocked = 1; 329 } 330 msg->nm_dispatch(msg); 331 /* Leave mplocked */ 332 break; 333 } 334 return mplocked; 335 } 336 337 /* 338 * Generic netmsg service loop. Some protocols may roll their own but all 339 * must do the basic command dispatch function call done here. 340 */ 341 void 342 netmsg_service_loop(void *arg) 343 { 344 struct netmsg *msg; 345 int mplocked, *mpsafe_mode = arg; 346 347 /* 348 * Thread was started with TDF_MPSAFE 349 */ 350 mplocked = 0; 351 352 /* 353 * Loop on netmsgs 354 */ 355 while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) { 356 mplocked = netmsg_service(msg, *mpsafe_mode, mplocked); 357 } 358 } 359 360 /* 361 * Call the netisr directly. 362 * Queueing may be done in the msg port layer at its discretion. 363 */ 364 void 365 netisr_dispatch(int num, struct mbuf *m) 366 { 367 /* just queue it for now XXX JH */ 368 netisr_queue(num, m); 369 } 370 371 /* 372 * Same as netisr_dispatch(), but always queue. 373 * This is either used in places where we are not confident that 374 * direct dispatch is possible, or where queueing is required. 375 */ 376 int 377 netisr_queue(int num, struct mbuf *m) 378 { 379 struct netisr *ni; 380 struct netmsg_packet *pmsg; 381 lwkt_port_t port; 382 383 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 384 ("%s: bad isr %d", __func__, num)); 385 386 ni = &netisrs[num]; 387 if (ni->ni_handler == NULL) { 388 kprintf("%s: unregistered isr %d\n", __func__, num); 389 m_freem(m); 390 return (EIO); 391 } 392 393 if ((port = ni->ni_mport(&m)) == NULL) 394 return (EIO); 395 396 pmsg = &m->m_hdr.mh_netmsg; 397 398 netmsg_init(&pmsg->nm_netmsg, NULL, &netisr_apanic_rport, 399 NETISR_TO_MSGF(ni), ni->ni_handler); 400 pmsg->nm_packet = m; 401 pmsg->nm_netmsg.nm_lmsg.u.ms_result = num; 402 lwkt_sendmsg(port, &pmsg->nm_netmsg.nm_lmsg); 403 return (0); 404 } 405 406 void 407 netisr_register(int num, pkt_portfn_t mportfn, 408 pktinfo_portfn_t mportfn_pktinfo, netisr_fn_t handler, 409 uint32_t flags) 410 { 411 struct netisr *ni; 412 413 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 414 ("netisr_register: bad isr %d", num)); 415 ni = &netisrs[num]; 416 417 ni->ni_mport = mportfn; 418 ni->ni_mport_pktinfo = mportfn_pktinfo; 419 ni->ni_handler = handler; 420 ni->ni_flags = flags; 421 netmsg_init(&ni->ni_netmsg, NULL, &netisr_adone_rport, 422 NETISR_TO_MSGF(ni), NULL); 423 } 424 425 int 426 netisr_unregister(int num) 427 { 428 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 429 ("unregister_netisr: bad isr number: %d\n", num)); 430 431 /* XXX JH */ 432 return (0); 433 } 434 435 /* 436 * Return message port for default handler thread on CPU 0. 437 */ 438 lwkt_port_t 439 cpu0_portfn(struct mbuf **mptr) 440 { 441 struct mbuf *m = *mptr; 442 int cpu = 0; 443 444 m->m_pkthdr.hash = cpu; 445 m->m_flags |= M_HASH; 446 return (&netisr_cpu[cpu].td_msgport); 447 } 448 449 lwkt_port_t 450 cpu_portfn(int cpu) 451 { 452 return (&netisr_cpu[cpu].td_msgport); 453 } 454 455 /* 456 * If the current thread is a network protocol thread (TDF_NETWORK), 457 * then return the current thread's message port. 458 * XXX Else, return the current CPU's netisr message port. 459 */ 460 lwkt_port_t 461 cur_netport(void) 462 { 463 if (curthread->td_flags & TDF_NETWORK) 464 return &curthread->td_msgport; 465 else 466 return cpu_portfn(mycpuid); 467 } 468 469 /* ARGSUSED */ 470 lwkt_port_t 471 cpu0_soport(struct socket *so __unused, struct sockaddr *nam __unused, 472 struct mbuf **dummy __unused) 473 { 474 return (&netisr_cpu[0].td_msgport); 475 } 476 477 lwkt_port_t 478 cpu0_ctlport(int cmd __unused, struct sockaddr *sa __unused, 479 void *extra __unused) 480 { 481 return (&netisr_cpu[0].td_msgport); 482 } 483 484 lwkt_port_t 485 sync_soport(struct socket *so __unused, struct sockaddr *nam __unused, 486 struct mbuf **dummy __unused) 487 { 488 return (&netisr_sync_port); 489 } 490 491 /* 492 * schednetisr() is used to call the netisr handler from the appropriate 493 * netisr thread for polling and other purposes. 494 * 495 * This function may be called from a hard interrupt or IPI and must be 496 * MP SAFE and non-blocking. We use a fixed per-cpu message instead of 497 * trying to allocate one. We must get ourselves onto the target cpu 498 * to safely check the MSGF_DONE bit on the message but since the message 499 * will be sent to that cpu anyway this does not add any extra work beyond 500 * what lwkt_sendmsg() would have already had to do to schedule the target 501 * thread. 502 */ 503 static void 504 schednetisr_remote(void *data) 505 { 506 int num = (int)(intptr_t)data; 507 struct netisr *ni = &netisrs[num]; 508 lwkt_port_t port = &netisr_cpu[0].td_msgport; 509 struct netmsg *pmsg; 510 511 pmsg = &netisrs[num].ni_netmsg; 512 crit_enter(); 513 if (pmsg->nm_lmsg.ms_flags & MSGF_DONE) { 514 netmsg_init(pmsg, NULL, &netisr_adone_rport, 515 NETISR_TO_MSGF(ni), ni->ni_handler); 516 pmsg->nm_lmsg.u.ms_result = num; 517 lwkt_sendmsg(port, &pmsg->nm_lmsg); 518 } 519 crit_exit(); 520 } 521 522 void 523 schednetisr(int num) 524 { 525 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 526 ("schednetisr: bad isr %d", num)); 527 #ifdef SMP 528 if (mycpu->gd_cpuid != 0) { 529 lwkt_send_ipiq(globaldata_find(0), 530 schednetisr_remote, (void *)(intptr_t)num); 531 } else { 532 schednetisr_remote((void *)(intptr_t)num); 533 } 534 #else 535 schednetisr_remote((void *)(intptr_t)num); 536 #endif 537 } 538 539 lwkt_port_t 540 netisr_find_port(int num, struct mbuf **m0) 541 { 542 struct netisr *ni; 543 lwkt_port_t port; 544 struct mbuf *m = *m0; 545 546 *m0 = NULL; 547 548 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 549 ("%s: bad isr %d", __func__, num)); 550 551 ni = &netisrs[num]; 552 if (ni->ni_mport == NULL) { 553 kprintf("%s: unregistered isr %d\n", __func__, num); 554 m_freem(m); 555 return NULL; 556 } 557 558 if ((port = ni->ni_mport(&m)) == NULL) 559 return NULL; 560 561 *m0 = m; 562 return port; 563 } 564 565 void 566 netisr_run(int num, struct mbuf *m) 567 { 568 struct netisr *ni; 569 struct netmsg_packet *pmsg; 570 571 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 572 ("%s: bad isr %d", __func__, num)); 573 574 ni = &netisrs[num]; 575 if (ni->ni_handler == NULL) { 576 kprintf("%s: unregistered isr %d\n", __func__, num); 577 m_freem(m); 578 return; 579 } 580 581 pmsg = &m->m_hdr.mh_netmsg; 582 583 netmsg_init(&pmsg->nm_netmsg, NULL, &netisr_apanic_rport, 584 0, ni->ni_handler); 585 pmsg->nm_packet = m; 586 pmsg->nm_netmsg.nm_lmsg.u.ms_result = num; 587 588 NETISR_GET_MPLOCK(ni); 589 ni->ni_handler(&pmsg->nm_netmsg); 590 NETISR_REL_MPLOCK(ni); 591 } 592 593 lwkt_port_t 594 pktinfo_portfn_cpu0(const struct pktinfo *dummy __unused, 595 struct mbuf *m) 596 { 597 m->m_pkthdr.hash = 0; 598 return &netisr_cpu[0].td_msgport; 599 } 600 601 lwkt_port_t 602 pktinfo_portfn_notsupp(const struct pktinfo *dummy __unused, 603 struct mbuf *m __unused) 604 { 605 return NULL; 606 } 607 608 lwkt_port_t 609 netisr_find_pktinfo_port(const struct pktinfo *pi, struct mbuf *m) 610 { 611 struct netisr *ni; 612 int num = pi->pi_netisr; 613 614 KASSERT(m->m_flags & M_HASH, ("packet does not contain hash\n")); 615 KASSERT((num > 0 && num <= (sizeof(netisrs)/sizeof(netisrs[0]))), 616 ("%s: bad isr %d", __func__, num)); 617 618 ni = &netisrs[num]; 619 if (ni->ni_mport_pktinfo == NULL) { 620 kprintf("%s: unregistered isr %d\n", __func__, num); 621 return NULL; 622 } 623 return ni->ni_mport_pktinfo(pi, m); 624 } 625