1 /* 2 * Copyright (c) 2003, 2004 Matthew Dillon. All rights reserved. 3 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 4 * Copyright (c) 2003 Jonathan Lemon. All rights reserved. 5 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Jonathan Lemon, Jeffrey M. Hsu, and Matthew Dillon. 9 * 10 * Jonathan Lemon gave Jeffrey Hsu permission to combine his copyright 11 * into this one around July 8 2004. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of The DragonFly Project nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific, prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 29 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 30 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 34 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 35 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/msgport.h> 44 #include <sys/proc.h> 45 #include <sys/interrupt.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/socketvar.h> 49 #include <net/if.h> 50 #include <net/if_var.h> 51 #include <net/netisr.h> 52 #include <machine/cpufunc.h> 53 54 #include <sys/thread2.h> 55 #include <sys/msgport2.h> 56 #include <net/netmsg2.h> 57 #include <sys/mplock2.h> 58 59 static void netmsg_sync_func(netmsg_t msg); 60 static void netmsg_service_loop(void *arg); 61 static void cpu0_cpufn(struct mbuf **mp, int hoff); 62 63 struct netmsg_port_registration { 64 TAILQ_ENTRY(netmsg_port_registration) npr_entry; 65 lwkt_port_t npr_port; 66 }; 67 68 struct netmsg_rollup { 69 TAILQ_ENTRY(netmsg_rollup) ru_entry; 70 netisr_ru_t ru_func; 71 }; 72 73 static struct netisr netisrs[NETISR_MAX]; 74 static TAILQ_HEAD(,netmsg_port_registration) netreglist; 75 static TAILQ_HEAD(,netmsg_rollup) netrulist; 76 77 /* Per-CPU thread to handle any protocol. */ 78 static struct thread netisr_cpu[MAXCPU]; 79 lwkt_port netisr_afree_rport; 80 lwkt_port netisr_afree_free_so_rport; 81 lwkt_port netisr_adone_rport; 82 lwkt_port netisr_apanic_rport; 83 lwkt_port netisr_sync_port; 84 85 static int (*netmsg_fwd_port_fn)(lwkt_port_t, lwkt_msg_t); 86 87 SYSCTL_NODE(_net, OID_AUTO, netisr, CTLFLAG_RW, 0, "netisr"); 88 89 /* 90 * netisr_afree_rport replymsg function, only used to handle async 91 * messages which the sender has abandoned to their fate. 92 */ 93 static void 94 netisr_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) 95 { 96 kfree(msg, M_LWKTMSG); 97 } 98 99 static void 100 netisr_autofree_free_so_reply(lwkt_port_t port, lwkt_msg_t msg) 101 { 102 sofree(((netmsg_t)msg)->base.nm_so); 103 kfree(msg, M_LWKTMSG); 104 } 105 106 /* 107 * We need a custom putport function to handle the case where the 108 * message target is the current thread's message port. This case 109 * can occur when the TCP or UDP stack does a direct callback to NFS and NFS 110 * then turns around and executes a network operation synchronously. 111 * 112 * To prevent deadlocking, we must execute these self-referential messages 113 * synchronously, effectively turning the message into a glorified direct 114 * procedure call back into the protocol stack. The operation must be 115 * complete on return or we will deadlock, so panic if it isn't. 116 * 117 * However, the target function is under no obligation to immediately 118 * reply the message. It may forward it elsewhere. 119 */ 120 static int 121 netmsg_put_port(lwkt_port_t port, lwkt_msg_t lmsg) 122 { 123 netmsg_base_t nmsg = (void *)lmsg; 124 125 if ((lmsg->ms_flags & MSGF_SYNC) && port == &curthread->td_msgport) { 126 nmsg->nm_dispatch((netmsg_t)nmsg); 127 return(EASYNC); 128 } else { 129 return(netmsg_fwd_port_fn(port, lmsg)); 130 } 131 } 132 133 /* 134 * UNIX DOMAIN sockets still have to run their uipc functions synchronously, 135 * because they depend on the user proc context for a number of things 136 * (like creds) which we have not yet incorporated into the message structure. 137 * 138 * However, we maintain or message/port abstraction. Having a special 139 * synchronous port which runs the commands synchronously gives us the 140 * ability to serialize operations in one place later on when we start 141 * removing the BGL. 142 */ 143 static int 144 netmsg_sync_putport(lwkt_port_t port, lwkt_msg_t lmsg) 145 { 146 netmsg_base_t nmsg = (void *)lmsg; 147 148 KKASSERT((lmsg->ms_flags & MSGF_DONE) == 0); 149 150 lmsg->ms_target_port = port; /* required for abort */ 151 nmsg->nm_dispatch((netmsg_t)nmsg); 152 return(EASYNC); 153 } 154 155 static void 156 netisr_init(void) 157 { 158 int i; 159 160 TAILQ_INIT(&netreglist); 161 TAILQ_INIT(&netrulist); 162 163 /* 164 * Create default per-cpu threads for generic protocol handling. 165 */ 166 for (i = 0; i < ncpus; ++i) { 167 lwkt_create(netmsg_service_loop, NULL, NULL, 168 &netisr_cpu[i], TDF_STOPREQ, i, 169 "netisr_cpu %d", i); 170 netmsg_service_port_init(&netisr_cpu[i].td_msgport); 171 lwkt_schedule(&netisr_cpu[i]); 172 } 173 174 /* 175 * The netisr_afree_rport is a special reply port which automatically 176 * frees the replied message. The netisr_adone_rport simply marks 177 * the message as being done. The netisr_apanic_rport panics if 178 * the message is replied to. 179 */ 180 lwkt_initport_replyonly(&netisr_afree_rport, netisr_autofree_reply); 181 lwkt_initport_replyonly(&netisr_afree_free_so_rport, 182 netisr_autofree_free_so_reply); 183 lwkt_initport_replyonly_null(&netisr_adone_rport); 184 lwkt_initport_panic(&netisr_apanic_rport); 185 186 /* 187 * The netisr_syncport is a special port which executes the message 188 * synchronously and waits for it if EASYNC is returned. 189 */ 190 lwkt_initport_putonly(&netisr_sync_port, netmsg_sync_putport); 191 } 192 193 SYSINIT(netisr, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, netisr_init, NULL); 194 195 /* 196 * Finish initializing the message port for a netmsg service. This also 197 * registers the port for synchronous cleanup operations such as when an 198 * ifnet is being destroyed. There is no deregistration API yet. 199 */ 200 void 201 netmsg_service_port_init(lwkt_port_t port) 202 { 203 struct netmsg_port_registration *reg; 204 205 /* 206 * Override the putport function. Our custom function checks for 207 * self-references and executes such commands synchronously. 208 */ 209 if (netmsg_fwd_port_fn == NULL) 210 netmsg_fwd_port_fn = port->mp_putport; 211 KKASSERT(netmsg_fwd_port_fn == port->mp_putport); 212 port->mp_putport = netmsg_put_port; 213 214 /* 215 * Keep track of ports using the netmsg API so we can synchronize 216 * certain operations (such as freeing an ifnet structure) across all 217 * consumers. 218 */ 219 reg = kmalloc(sizeof(*reg), M_TEMP, M_WAITOK|M_ZERO); 220 reg->npr_port = port; 221 TAILQ_INSERT_TAIL(&netreglist, reg, npr_entry); 222 } 223 224 /* 225 * This function synchronizes the caller with all netmsg services. For 226 * example, if an interface is being removed we must make sure that all 227 * packets related to that interface complete processing before the structure 228 * can actually be freed. This sort of synchronization is an alternative to 229 * ref-counting the netif, removing the ref counting overhead in favor of 230 * placing additional overhead in the netif freeing sequence (where it is 231 * inconsequential). 232 */ 233 void 234 netmsg_service_sync(void) 235 { 236 struct netmsg_port_registration *reg; 237 struct netmsg_base smsg; 238 239 netmsg_init(&smsg, NULL, &curthread->td_msgport, 0, netmsg_sync_func); 240 241 TAILQ_FOREACH(reg, &netreglist, npr_entry) { 242 lwkt_domsg(reg->npr_port, &smsg.lmsg, 0); 243 } 244 } 245 246 /* 247 * The netmsg function simply replies the message. API semantics require 248 * EASYNC to be returned if the netmsg function disposes of the message. 249 */ 250 static void 251 netmsg_sync_func(netmsg_t msg) 252 { 253 lwkt_replymsg(&msg->lmsg, 0); 254 } 255 256 /* 257 * Generic netmsg service loop. Some protocols may roll their own but all 258 * must do the basic command dispatch function call done here. 259 */ 260 static void 261 netmsg_service_loop(void *arg) 262 { 263 struct netmsg_rollup *ru; 264 netmsg_base_t msg; 265 thread_t td = curthread;; 266 int limit; 267 268 while ((msg = lwkt_waitport(&td->td_msgport, 0))) { 269 /* 270 * Run up to 512 pending netmsgs. 271 */ 272 limit = 512; 273 do { 274 KASSERT(msg->nm_dispatch != NULL, 275 ("netmsg_service isr %d badmsg\n", 276 msg->lmsg.u.ms_result)); 277 if (msg->nm_so && 278 msg->nm_so->so_port != &td->td_msgport) { 279 /* 280 * Sockets undergoing connect or disconnect 281 * ops can change ports on us. Chase the 282 * port. 283 */ 284 kprintf("netmsg_service_loop: Warning, " 285 "port changed so=%p\n", msg->nm_so); 286 lwkt_forwardmsg(msg->nm_so->so_port, 287 &msg->lmsg); 288 } else { 289 /* 290 * We are on the correct port, dispatch it. 291 */ 292 msg->nm_dispatch((netmsg_t)msg); 293 } 294 if (--limit == 0) 295 break; 296 } while ((msg = lwkt_getport(&td->td_msgport)) != NULL); 297 298 /* 299 * Run all registered rollup functions for this cpu 300 * (e.g. tcp_willblock()). 301 */ 302 TAILQ_FOREACH(ru, &netrulist, ru_entry) 303 ru->ru_func(); 304 } 305 } 306 307 /* 308 * Forward a packet to a netisr service function. 309 * 310 * If the packet has not been assigned to a protocol thread we call 311 * the port characterization function to assign it. The caller must 312 * clear M_HASH (or not have set it in the first place) if the caller 313 * wishes the packet to be recharacterized. 314 */ 315 int 316 netisr_queue(int num, struct mbuf *m) 317 { 318 struct netisr *ni; 319 struct netmsg_packet *pmsg; 320 lwkt_port_t port; 321 322 KASSERT((num > 0 && num <= NELEM(netisrs)), 323 ("Bad isr %d", num)); 324 325 ni = &netisrs[num]; 326 if (ni->ni_handler == NULL) { 327 kprintf("Unregistered isr %d\n", num); 328 m_freem(m); 329 return (EIO); 330 } 331 332 /* 333 * Figure out which protocol thread to send to. This does not 334 * have to be perfect but performance will be really good if it 335 * is correct. Major protocol inputs such as ip_input() will 336 * re-characterize the packet as necessary. 337 */ 338 if ((m->m_flags & M_HASH) == 0) { 339 ni->ni_cpufn(&m, 0); 340 if (m == NULL) { 341 m_freem(m); 342 return (EIO); 343 } 344 if ((m->m_flags & M_HASH) == 0) { 345 kprintf("netisr_queue(%d): packet hash failed\n", num); 346 m_freem(m); 347 return (EIO); 348 } 349 } 350 351 /* 352 * Get the protocol port based on the packet hash, initialize 353 * the netmsg, and send it off. 354 */ 355 port = cpu_portfn(m->m_pkthdr.hash); 356 pmsg = &m->m_hdr.mh_netmsg; 357 netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport, 358 0, ni->ni_handler); 359 pmsg->nm_packet = m; 360 pmsg->base.lmsg.u.ms_result = num; 361 lwkt_sendmsg(port, &pmsg->base.lmsg); 362 363 return (0); 364 } 365 366 /* 367 * Pre-characterization of a deeper portion of the packet for the 368 * requested isr. 369 * 370 * The base of the ISR type (e.g. IP) that we want to characterize is 371 * at (hoff) relative to the beginning of the mbuf. This allows 372 * e.g. ether_input_chain() to not have to adjust the m_data/m_len. 373 */ 374 void 375 netisr_characterize(int num, struct mbuf **mp, int hoff) 376 { 377 struct netisr *ni; 378 struct mbuf *m; 379 380 /* 381 * Validation 382 */ 383 m = *mp; 384 KKASSERT(m != NULL); 385 386 if (num < 0 || num >= NETISR_MAX) { 387 if (num == NETISR_MAX) { 388 m->m_flags |= M_HASH; 389 m->m_pkthdr.hash = 0; 390 return; 391 } 392 panic("Bad isr %d", num); 393 } 394 395 /* 396 * Valid netisr? 397 */ 398 ni = &netisrs[num]; 399 if (ni->ni_handler == NULL) { 400 kprintf("Unregistered isr %d\n", num); 401 m_freem(m); 402 *mp = NULL; 403 } 404 405 /* 406 * Characterize the packet 407 */ 408 if ((m->m_flags & M_HASH) == 0) { 409 ni->ni_cpufn(mp, hoff); 410 m = *mp; 411 if (m && (m->m_flags & M_HASH) == 0) 412 kprintf("netisr_queue(%d): packet hash failed\n", num); 413 } 414 } 415 416 void 417 netisr_register(int num, netisr_fn_t handler, netisr_cpufn_t cpufn) 418 { 419 struct netisr *ni; 420 421 KASSERT((num > 0 && num <= NELEM(netisrs)), 422 ("netisr_register: bad isr %d", num)); 423 KKASSERT(handler != NULL); 424 425 if (cpufn == NULL) 426 cpufn = cpu0_cpufn; 427 428 ni = &netisrs[num]; 429 430 ni->ni_handler = handler; 431 ni->ni_cpufn = cpufn; 432 netmsg_init(&ni->ni_netmsg, NULL, &netisr_adone_rport, 0, NULL); 433 } 434 435 void 436 netisr_register_rollup(netisr_ru_t ru_func) 437 { 438 struct netmsg_rollup *ru; 439 440 ru = kmalloc(sizeof(*ru), M_TEMP, M_WAITOK|M_ZERO); 441 ru->ru_func = ru_func; 442 TAILQ_INSERT_TAIL(&netrulist, ru, ru_entry); 443 } 444 445 /* 446 * Return the message port for the general protocol message servicing 447 * thread for a particular cpu. 448 */ 449 lwkt_port_t 450 cpu_portfn(int cpu) 451 { 452 KKASSERT(cpu >= 0 && cpu < ncpus); 453 return (&netisr_cpu[cpu].td_msgport); 454 } 455 456 /* 457 * Return the current cpu's network protocol thread. 458 */ 459 lwkt_port_t 460 cur_netport(void) 461 { 462 return(cpu_portfn(mycpu->gd_cpuid)); 463 } 464 465 /* 466 * Return a default protocol control message processing thread port 467 */ 468 lwkt_port_t 469 cpu0_ctlport(int cmd __unused, struct sockaddr *sa __unused, 470 void *extra __unused) 471 { 472 return (&netisr_cpu[0].td_msgport); 473 } 474 475 /* 476 * This is a default netisr packet characterization function which 477 * sets M_HASH. If a netisr is registered with a NULL cpufn function 478 * this one is assigned. 479 * 480 * This function makes no attempt to validate the packet. 481 */ 482 static void 483 cpu0_cpufn(struct mbuf **mp, int hoff __unused) 484 { 485 struct mbuf *m = *mp; 486 487 m->m_flags |= M_HASH; 488 m->m_pkthdr.hash = 0; 489 } 490 491 /* 492 * schednetisr() is used to call the netisr handler from the appropriate 493 * netisr thread for polling and other purposes. 494 * 495 * This function may be called from a hard interrupt or IPI and must be 496 * MP SAFE and non-blocking. We use a fixed per-cpu message instead of 497 * trying to allocate one. We must get ourselves onto the target cpu 498 * to safely check the MSGF_DONE bit on the message but since the message 499 * will be sent to that cpu anyway this does not add any extra work beyond 500 * what lwkt_sendmsg() would have already had to do to schedule the target 501 * thread. 502 */ 503 static void 504 schednetisr_remote(void *data) 505 { 506 int num = (int)(intptr_t)data; 507 struct netisr *ni = &netisrs[num]; 508 lwkt_port_t port = &netisr_cpu[0].td_msgport; 509 netmsg_base_t pmsg; 510 511 pmsg = &netisrs[num].ni_netmsg; 512 if (pmsg->lmsg.ms_flags & MSGF_DONE) { 513 netmsg_init(pmsg, NULL, &netisr_adone_rport, 0, ni->ni_handler); 514 pmsg->lmsg.u.ms_result = num; 515 lwkt_sendmsg(port, &pmsg->lmsg); 516 } 517 } 518 519 void 520 schednetisr(int num) 521 { 522 KASSERT((num > 0 && num <= NELEM(netisrs)), 523 ("schednetisr: bad isr %d", num)); 524 KKASSERT(netisrs[num].ni_handler != NULL); 525 #ifdef SMP 526 if (mycpu->gd_cpuid != 0) { 527 lwkt_send_ipiq(globaldata_find(0), 528 schednetisr_remote, (void *)(intptr_t)num); 529 } else { 530 crit_enter(); 531 schednetisr_remote((void *)(intptr_t)num); 532 crit_exit(); 533 } 534 #else 535 crit_enter(); 536 schednetisr_remote((void *)(intptr_t)num); 537 crit_exit(); 538 #endif 539 } 540