1 /* 2 * Copyright (c) 2003, 2004 Matthew Dillon. All rights reserved. 3 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 4 * Copyright (c) 2003 Jonathan Lemon. All rights reserved. 5 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Jonathan Lemon, Jeffrey M. Hsu, and Matthew Dillon. 9 * 10 * Jonathan Lemon gave Jeffrey Hsu permission to combine his copyright 11 * into this one around July 8 2004. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of The DragonFly Project nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific, prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 29 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 30 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 34 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 35 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/msgport.h> 44 #include <sys/proc.h> 45 #include <sys/interrupt.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/socketvar.h> 49 #include <net/if.h> 50 #include <net/if_var.h> 51 #include <net/netisr2.h> 52 #include <machine/cpufunc.h> 53 #include <machine/smp.h> 54 55 #include <sys/thread2.h> 56 #include <sys/msgport2.h> 57 #include <net/netmsg2.h> 58 #include <sys/mplock2.h> 59 60 #include <vm/vm_extern.h> 61 62 static void netmsg_service_port_init(lwkt_port_t); 63 static void netmsg_service_loop(void *arg); 64 static void netisr_hashfn0(struct mbuf **mp, int hoff); 65 static void netisr_nohashck(struct mbuf *, const struct pktinfo *); 66 67 struct netmsg_port_registration { 68 TAILQ_ENTRY(netmsg_port_registration) npr_entry; 69 lwkt_port_t npr_port; 70 }; 71 72 struct netisr_rollup { 73 TAILQ_ENTRY(netisr_rollup) ru_entry; 74 netisr_ru_t ru_func; 75 int ru_prio; 76 void *ru_key; 77 }; 78 79 struct netmsg_rollup { 80 struct netmsg_base base; 81 netisr_ru_t func; 82 int prio; 83 void *key; 84 }; 85 86 struct netmsg_barrier { 87 struct netmsg_base base; 88 volatile cpumask_t *br_cpumask; 89 volatile uint32_t br_done; 90 }; 91 92 #define NETISR_BR_NOTDONE 0x1 93 #define NETISR_BR_WAITDONE 0x80000000 94 95 struct netisr_barrier { 96 struct netmsg_barrier *br_msgs[MAXCPU]; 97 int br_isset; 98 }; 99 100 struct netisr_data { 101 struct thread thread; 102 #ifdef INVARIANTS 103 void *netlastfunc; 104 #endif 105 TAILQ_HEAD(, netisr_rollup) netrulist; 106 }; 107 108 static struct netisr_data *netisr_data[MAXCPU]; 109 110 static struct netisr netisrs[NETISR_MAX]; 111 static TAILQ_HEAD(,netmsg_port_registration) netreglist; 112 113 /* Per-CPU thread to handle any protocol. */ 114 struct thread *netisr_threads[MAXCPU]; 115 116 lwkt_port netisr_afree_rport; 117 lwkt_port netisr_afree_free_so_rport; 118 lwkt_port netisr_adone_rport; 119 lwkt_port netisr_apanic_rport; 120 lwkt_port netisr_sync_port; 121 122 static int (*netmsg_fwd_port_fn)(lwkt_port_t, lwkt_msg_t); 123 124 SYSCTL_NODE(_net, OID_AUTO, netisr, CTLFLAG_RW, 0, "netisr"); 125 126 static int netisr_rollup_limit = 32; 127 SYSCTL_INT(_net_netisr, OID_AUTO, rollup_limit, CTLFLAG_RW, 128 &netisr_rollup_limit, 0, "Message to process before rollup"); 129 130 int netisr_ncpus; 131 TUNABLE_INT("net.netisr.ncpus", &netisr_ncpus); 132 SYSCTL_INT(_net_netisr, OID_AUTO, ncpus, CTLFLAG_RD, 133 &netisr_ncpus, 0, "# of CPUs to handle network messages"); 134 135 /* 136 * netisr_afree_rport replymsg function, only used to handle async 137 * messages which the sender has abandoned to their fate. 138 */ 139 static void 140 netisr_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) 141 { 142 kfree(msg, M_LWKTMSG); 143 } 144 145 static void 146 netisr_autofree_free_so_reply(lwkt_port_t port, lwkt_msg_t msg) 147 { 148 sofree(((netmsg_t)msg)->base.nm_so); 149 kfree(msg, M_LWKTMSG); 150 } 151 152 /* 153 * We need a custom putport function to handle the case where the 154 * message target is the current thread's message port. This case 155 * can occur when the TCP or UDP stack does a direct callback to NFS and NFS 156 * then turns around and executes a network operation synchronously. 157 * 158 * To prevent deadlocking, we must execute these self-referential messages 159 * synchronously, effectively turning the message into a glorified direct 160 * procedure call back into the protocol stack. The operation must be 161 * complete on return or we will deadlock, so panic if it isn't. 162 * 163 * However, the target function is under no obligation to immediately 164 * reply the message. It may forward it elsewhere. 165 */ 166 static int 167 netmsg_put_port(lwkt_port_t port, lwkt_msg_t lmsg) 168 { 169 netmsg_base_t nmsg = (void *)lmsg; 170 171 if ((lmsg->ms_flags & MSGF_SYNC) && port == &curthread->td_msgport) { 172 nmsg->nm_dispatch((netmsg_t)nmsg); 173 return(EASYNC); 174 } else { 175 return(netmsg_fwd_port_fn(port, lmsg)); 176 } 177 } 178 179 /* 180 * UNIX DOMAIN sockets still have to run their uipc functions synchronously, 181 * because they depend on the user proc context for a number of things 182 * (like creds) which we have not yet incorporated into the message structure. 183 * 184 * However, we maintain or message/port abstraction. Having a special 185 * synchronous port which runs the commands synchronously gives us the 186 * ability to serialize operations in one place later on when we start 187 * removing the BGL. 188 */ 189 static int 190 netmsg_sync_putport(lwkt_port_t port, lwkt_msg_t lmsg) 191 { 192 netmsg_base_t nmsg = (void *)lmsg; 193 194 KKASSERT((lmsg->ms_flags & MSGF_DONE) == 0); 195 196 lmsg->ms_target_port = port; /* required for abort */ 197 nmsg->nm_dispatch((netmsg_t)nmsg); 198 return(EASYNC); 199 } 200 201 static void 202 netisr_init(void) 203 { 204 int i; 205 206 if (netisr_ncpus <= 0) { 207 /* Default. */ 208 netisr_ncpus = ncpus2; 209 } else if (netisr_ncpus > ncpus) { 210 netisr_ncpus = ncpus; 211 } 212 if (netisr_ncpus > NETISR_CPUMAX) 213 netisr_ncpus = NETISR_CPUMAX; 214 215 TAILQ_INIT(&netreglist); 216 217 /* 218 * Create default per-cpu threads for generic protocol handling. 219 */ 220 for (i = 0; i < ncpus; ++i) { 221 struct netisr_data *nd; 222 223 nd = (void *)kmem_alloc3(&kernel_map, sizeof(*nd), 224 VM_SUBSYS_GD, KM_CPU(i)); 225 memset(nd, 0, sizeof(*nd)); 226 TAILQ_INIT(&nd->netrulist); 227 netisr_data[i] = nd; 228 229 lwkt_create(netmsg_service_loop, NULL, &netisr_threads[i], 230 &nd->thread, TDF_NOSTART|TDF_FORCE_SPINPORT|TDF_FIXEDCPU, 231 i, "netisr %d", i); 232 netmsg_service_port_init(&netisr_threads[i]->td_msgport); 233 lwkt_schedule(netisr_threads[i]); 234 } 235 236 /* 237 * The netisr_afree_rport is a special reply port which automatically 238 * frees the replied message. The netisr_adone_rport simply marks 239 * the message as being done. The netisr_apanic_rport panics if 240 * the message is replied to. 241 */ 242 lwkt_initport_replyonly(&netisr_afree_rport, netisr_autofree_reply); 243 lwkt_initport_replyonly(&netisr_afree_free_so_rport, 244 netisr_autofree_free_so_reply); 245 lwkt_initport_replyonly_null(&netisr_adone_rport); 246 lwkt_initport_panic(&netisr_apanic_rport); 247 248 /* 249 * The netisr_syncport is a special port which executes the message 250 * synchronously and waits for it if EASYNC is returned. 251 */ 252 lwkt_initport_putonly(&netisr_sync_port, netmsg_sync_putport); 253 } 254 SYSINIT(netisr, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, netisr_init, NULL); 255 256 /* 257 * Finish initializing the message port for a netmsg service. This also 258 * registers the port for synchronous cleanup operations such as when an 259 * ifnet is being destroyed. There is no deregistration API yet. 260 */ 261 static void 262 netmsg_service_port_init(lwkt_port_t port) 263 { 264 struct netmsg_port_registration *reg; 265 266 /* 267 * Override the putport function. Our custom function checks for 268 * self-references and executes such commands synchronously. 269 */ 270 if (netmsg_fwd_port_fn == NULL) 271 netmsg_fwd_port_fn = port->mp_putport; 272 KKASSERT(netmsg_fwd_port_fn == port->mp_putport); 273 port->mp_putport = netmsg_put_port; 274 275 /* 276 * Keep track of ports using the netmsg API so we can synchronize 277 * certain operations (such as freeing an ifnet structure) across all 278 * consumers. 279 */ 280 reg = kmalloc(sizeof(*reg), M_TEMP, M_WAITOK|M_ZERO); 281 reg->npr_port = port; 282 TAILQ_INSERT_TAIL(&netreglist, reg, npr_entry); 283 } 284 285 /* 286 * This function synchronizes the caller with all netmsg services. For 287 * example, if an interface is being removed we must make sure that all 288 * packets related to that interface complete processing before the structure 289 * can actually be freed. This sort of synchronization is an alternative to 290 * ref-counting the netif, removing the ref counting overhead in favor of 291 * placing additional overhead in the netif freeing sequence (where it is 292 * inconsequential). 293 */ 294 void 295 netmsg_service_sync(void) 296 { 297 struct netmsg_port_registration *reg; 298 struct netmsg_base smsg; 299 300 netmsg_init(&smsg, NULL, &curthread->td_msgport, 0, netmsg_sync_handler); 301 302 TAILQ_FOREACH(reg, &netreglist, npr_entry) { 303 lwkt_domsg(reg->npr_port, &smsg.lmsg, 0); 304 } 305 } 306 307 /* 308 * The netmsg function simply replies the message. API semantics require 309 * EASYNC to be returned if the netmsg function disposes of the message. 310 */ 311 void 312 netmsg_sync_handler(netmsg_t msg) 313 { 314 lwkt_replymsg(&msg->lmsg, 0); 315 } 316 317 /* 318 * Generic netmsg service loop. Some protocols may roll their own but all 319 * must do the basic command dispatch function call done here. 320 */ 321 static void 322 netmsg_service_loop(void *arg) 323 { 324 netmsg_base_t msg; 325 thread_t td = curthread; 326 int limit; 327 struct netisr_data *nd = netisr_data[mycpuid]; 328 329 td->td_type = TD_TYPE_NETISR; 330 331 while ((msg = lwkt_waitport(&td->td_msgport, 0))) { 332 struct netisr_rollup *ru; 333 334 /* 335 * Run up to 512 pending netmsgs. 336 */ 337 limit = netisr_rollup_limit; 338 do { 339 KASSERT(msg->nm_dispatch != NULL, 340 ("netmsg_service isr %d badmsg", 341 msg->lmsg.u.ms_result)); 342 /* 343 * Don't match so_port, if the msg explicitly 344 * asks us to ignore its so_port. 345 */ 346 if ((msg->lmsg.ms_flags & MSGF_IGNSOPORT) == 0 && 347 msg->nm_so && 348 msg->nm_so->so_port != &td->td_msgport) { 349 /* 350 * Sockets undergoing connect or disconnect 351 * ops can change ports on us. Chase the 352 * port. 353 */ 354 #ifdef foo 355 /* 356 * This could be quite common for protocols 357 * which support asynchronous pru_connect, 358 * e.g. TCP, so kprintf socket port chasing 359 * could be too verbose for the console. 360 */ 361 kprintf("%s: Warning, port changed so=%p\n", 362 __func__, msg->nm_so); 363 #endif 364 lwkt_forwardmsg(msg->nm_so->so_port, 365 &msg->lmsg); 366 } else { 367 /* 368 * We are on the correct port, dispatch it. 369 */ 370 #ifdef INVARIANTS 371 nd->netlastfunc = msg->nm_dispatch; 372 #endif 373 msg->nm_dispatch((netmsg_t)msg); 374 } 375 if (--limit == 0) 376 break; 377 } while ((msg = lwkt_getport(&td->td_msgport)) != NULL); 378 379 /* 380 * Run all registered rollup functions for this cpu 381 * (e.g. tcp_willblock()). 382 */ 383 TAILQ_FOREACH(ru, &nd->netrulist, ru_entry) 384 ru->ru_func(); 385 } 386 } 387 388 /* 389 * Forward a packet to a netisr service function. 390 * 391 * If the packet has not been assigned to a protocol thread we call 392 * the port characterization function to assign it. The caller must 393 * clear M_HASH (or not have set it in the first place) if the caller 394 * wishes the packet to be recharacterized. 395 */ 396 int 397 netisr_queue(int num, struct mbuf *m) 398 { 399 struct netisr *ni; 400 struct netmsg_packet *pmsg; 401 lwkt_port_t port; 402 403 KASSERT((num > 0 && num <= NELEM(netisrs)), 404 ("Bad isr %d", num)); 405 406 ni = &netisrs[num]; 407 if (ni->ni_handler == NULL) { 408 kprintf("%s: Unregistered isr %d\n", __func__, num); 409 m_freem(m); 410 return (EIO); 411 } 412 413 /* 414 * Figure out which protocol thread to send to. This does not 415 * have to be perfect but performance will be really good if it 416 * is correct. Major protocol inputs such as ip_input() will 417 * re-characterize the packet as necessary. 418 */ 419 if ((m->m_flags & M_HASH) == 0) { 420 ni->ni_hashfn(&m, 0); 421 if (m == NULL) 422 return (EIO); 423 if ((m->m_flags & M_HASH) == 0) { 424 kprintf("%s(%d): packet hash failed\n", 425 __func__, num); 426 m_freem(m); 427 return (EIO); 428 } 429 } 430 431 /* 432 * Get the protocol port based on the packet hash, initialize 433 * the netmsg, and send it off. 434 */ 435 port = netisr_hashport(m->m_pkthdr.hash); 436 pmsg = &m->m_hdr.mh_netmsg; 437 netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport, 438 0, ni->ni_handler); 439 pmsg->nm_packet = m; 440 pmsg->base.lmsg.u.ms_result = num; 441 lwkt_sendmsg(port, &pmsg->base.lmsg); 442 443 return (0); 444 } 445 446 /* 447 * Run a netisr service function on the packet. 448 * 449 * The packet must have been correctly characterized! 450 */ 451 int 452 netisr_handle(int num, struct mbuf *m) 453 { 454 struct netisr *ni; 455 struct netmsg_packet *pmsg; 456 lwkt_port_t port; 457 458 /* 459 * Get the protocol port based on the packet hash 460 */ 461 KASSERT((m->m_flags & M_HASH), ("packet not characterized")); 462 port = netisr_hashport(m->m_pkthdr.hash); 463 KASSERT(&curthread->td_msgport == port, ("wrong msgport")); 464 465 KASSERT((num > 0 && num <= NELEM(netisrs)), ("bad isr %d", num)); 466 ni = &netisrs[num]; 467 if (ni->ni_handler == NULL) { 468 kprintf("%s: unregistered isr %d\n", __func__, num); 469 m_freem(m); 470 return EIO; 471 } 472 473 /* 474 * Initialize the netmsg, and run the handler directly. 475 */ 476 pmsg = &m->m_hdr.mh_netmsg; 477 netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport, 478 0, ni->ni_handler); 479 pmsg->nm_packet = m; 480 pmsg->base.lmsg.u.ms_result = num; 481 ni->ni_handler((netmsg_t)&pmsg->base); 482 483 return 0; 484 } 485 486 /* 487 * Pre-characterization of a deeper portion of the packet for the 488 * requested isr. 489 * 490 * The base of the ISR type (e.g. IP) that we want to characterize is 491 * at (hoff) relative to the beginning of the mbuf. This allows 492 * e.g. ether_characterize() to not have to adjust the m_data/m_len. 493 */ 494 void 495 netisr_characterize(int num, struct mbuf **mp, int hoff) 496 { 497 struct netisr *ni; 498 struct mbuf *m; 499 500 /* 501 * Validation 502 */ 503 m = *mp; 504 KKASSERT(m != NULL); 505 506 if (num < 0 || num >= NETISR_MAX) { 507 if (num == NETISR_MAX) { 508 m_sethash(m, 0); 509 return; 510 } 511 panic("Bad isr %d", num); 512 } 513 514 /* 515 * Valid netisr? 516 */ 517 ni = &netisrs[num]; 518 if (ni->ni_handler == NULL) { 519 kprintf("%s: Unregistered isr %d\n", __func__, num); 520 m_freem(m); 521 *mp = NULL; 522 } 523 524 /* 525 * Characterize the packet 526 */ 527 if ((m->m_flags & M_HASH) == 0) { 528 ni->ni_hashfn(mp, hoff); 529 m = *mp; 530 if (m && (m->m_flags & M_HASH) == 0) { 531 kprintf("%s(%d): packet hash failed\n", 532 __func__, num); 533 } 534 } 535 } 536 537 void 538 netisr_register(int num, netisr_fn_t handler, netisr_hashfn_t hashfn) 539 { 540 struct netisr *ni; 541 542 KASSERT((num > 0 && num <= NELEM(netisrs)), 543 ("netisr_register: bad isr %d", num)); 544 KKASSERT(handler != NULL); 545 546 if (hashfn == NULL) 547 hashfn = netisr_hashfn0; 548 549 ni = &netisrs[num]; 550 551 ni->ni_handler = handler; 552 ni->ni_hashck = netisr_nohashck; 553 ni->ni_hashfn = hashfn; 554 netmsg_init(&ni->ni_netmsg, NULL, &netisr_adone_rport, 0, NULL); 555 } 556 557 void 558 netisr_register_hashcheck(int num, netisr_hashck_t hashck) 559 { 560 struct netisr *ni; 561 562 KASSERT((num > 0 && num <= NELEM(netisrs)), 563 ("netisr_register: bad isr %d", num)); 564 565 ni = &netisrs[num]; 566 ni->ni_hashck = hashck; 567 } 568 569 static void 570 netisr_register_rollup_dispatch(netmsg_t nmsg) 571 { 572 struct netmsg_rollup *nm = (struct netmsg_rollup *)nmsg; 573 int cpuid = mycpuid; 574 struct netisr_data *nd = netisr_data[cpuid]; 575 struct netisr_rollup *new_ru, *ru; 576 577 new_ru = kmalloc(sizeof(*new_ru), M_TEMP, M_WAITOK|M_ZERO); 578 new_ru->ru_func = nm->func; 579 new_ru->ru_prio = nm->prio; 580 581 /* 582 * Higher priority "rollup" appears first 583 */ 584 TAILQ_FOREACH(ru, &nd->netrulist, ru_entry) { 585 if (ru->ru_prio < new_ru->ru_prio) { 586 TAILQ_INSERT_BEFORE(ru, new_ru, ru_entry); 587 goto done; 588 } 589 } 590 TAILQ_INSERT_TAIL(&nd->netrulist, new_ru, ru_entry); 591 done: 592 if (cpuid == 0) 593 nm->key = new_ru; 594 KKASSERT(nm->key != NULL); 595 new_ru->ru_key = nm->key; 596 597 netisr_forwardmsg_all(&nm->base, cpuid + 1); 598 } 599 600 struct netisr_rollup * 601 netisr_register_rollup(netisr_ru_t func, int prio) 602 { 603 struct netmsg_rollup nm; 604 605 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 606 netisr_register_rollup_dispatch); 607 nm.func = func; 608 nm.prio = prio; 609 nm.key = NULL; 610 netisr_domsg_global(&nm.base); 611 612 KKASSERT(nm.key != NULL); 613 return (nm.key); 614 } 615 616 static void 617 netisr_unregister_rollup_dispatch(netmsg_t nmsg) 618 { 619 struct netmsg_rollup *nm = (struct netmsg_rollup *)nmsg; 620 int cpuid = mycpuid; 621 struct netisr_data *nd = netisr_data[cpuid]; 622 struct netisr_rollup *ru; 623 624 TAILQ_FOREACH(ru, &nd->netrulist, ru_entry) { 625 if (ru->ru_key == nm->key) 626 break; 627 } 628 if (ru == NULL) 629 panic("netisr: no rullup for %p", nm->key); 630 631 TAILQ_REMOVE(&nd->netrulist, ru, ru_entry); 632 kfree(ru, M_TEMP); 633 634 netisr_forwardmsg_all(&nm->base, cpuid + 1); 635 } 636 637 void 638 netisr_unregister_rollup(struct netisr_rollup *key) 639 { 640 struct netmsg_rollup nm; 641 642 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 643 netisr_unregister_rollup_dispatch); 644 nm.key = key; 645 netisr_domsg_global(&nm.base); 646 } 647 648 /* 649 * Return a default protocol control message processing thread port 650 */ 651 lwkt_port_t 652 cpu0_ctlport(int cmd __unused, struct sockaddr *sa __unused, 653 void *extra __unused, int *cpuid) 654 { 655 *cpuid = 0; 656 return netisr_cpuport(*cpuid); 657 } 658 659 /* 660 * This is a default netisr packet characterization function which 661 * sets M_HASH. If a netisr is registered with a NULL hashfn function 662 * this one is assigned. 663 * 664 * This function makes no attempt to validate the packet. 665 */ 666 static void 667 netisr_hashfn0(struct mbuf **mp, int hoff __unused) 668 { 669 670 m_sethash(*mp, 0); 671 } 672 673 /* 674 * schednetisr() is used to call the netisr handler from the appropriate 675 * netisr thread for polling and other purposes. 676 * 677 * This function may be called from a hard interrupt or IPI and must be 678 * MP SAFE and non-blocking. We use a fixed per-cpu message instead of 679 * trying to allocate one. We must get ourselves onto the target cpu 680 * to safely check the MSGF_DONE bit on the message but since the message 681 * will be sent to that cpu anyway this does not add any extra work beyond 682 * what lwkt_sendmsg() would have already had to do to schedule the target 683 * thread. 684 */ 685 static void 686 schednetisr_remote(void *data) 687 { 688 int num = (int)(intptr_t)data; 689 struct netisr *ni = &netisrs[num]; 690 lwkt_port_t port = &netisr_threads[0]->td_msgport; 691 netmsg_base_t pmsg; 692 693 pmsg = &netisrs[num].ni_netmsg; 694 if (pmsg->lmsg.ms_flags & MSGF_DONE) { 695 netmsg_init(pmsg, NULL, &netisr_adone_rport, 0, ni->ni_handler); 696 pmsg->lmsg.u.ms_result = num; 697 lwkt_sendmsg(port, &pmsg->lmsg); 698 } 699 } 700 701 void 702 schednetisr(int num) 703 { 704 KASSERT((num > 0 && num <= NELEM(netisrs)), 705 ("schednetisr: bad isr %d", num)); 706 KKASSERT(netisrs[num].ni_handler != NULL); 707 if (mycpu->gd_cpuid != 0) { 708 lwkt_send_ipiq(globaldata_find(0), 709 schednetisr_remote, (void *)(intptr_t)num); 710 } else { 711 crit_enter(); 712 schednetisr_remote((void *)(intptr_t)num); 713 crit_exit(); 714 } 715 } 716 717 static void 718 netisr_barrier_dispatch(netmsg_t nmsg) 719 { 720 struct netmsg_barrier *msg = (struct netmsg_barrier *)nmsg; 721 722 ATOMIC_CPUMASK_NANDBIT(*msg->br_cpumask, mycpu->gd_cpuid); 723 if (CPUMASK_TESTZERO(*msg->br_cpumask)) 724 wakeup(msg->br_cpumask); 725 726 for (;;) { 727 uint32_t done = msg->br_done; 728 729 cpu_ccfence(); 730 if ((done & NETISR_BR_NOTDONE) == 0) 731 break; 732 733 tsleep_interlock(&msg->br_done, 0); 734 if (atomic_cmpset_int(&msg->br_done, 735 done, done | NETISR_BR_WAITDONE)) 736 tsleep(&msg->br_done, PINTERLOCKED, "nbrdsp", 0); 737 } 738 739 lwkt_replymsg(&nmsg->lmsg, 0); 740 } 741 742 struct netisr_barrier * 743 netisr_barrier_create(void) 744 { 745 struct netisr_barrier *br; 746 747 br = kmalloc(sizeof(*br), M_LWKTMSG, M_WAITOK | M_ZERO); 748 return br; 749 } 750 751 void 752 netisr_barrier_set(struct netisr_barrier *br) 753 { 754 volatile cpumask_t other_cpumask; 755 int i, cur_cpuid; 756 757 ASSERT_NETISR0; 758 KKASSERT(!br->br_isset); 759 760 other_cpumask = mycpu->gd_other_cpus; 761 CPUMASK_ANDMASK(other_cpumask, smp_active_mask); 762 cur_cpuid = mycpuid; 763 764 for (i = 0; i < ncpus; ++i) { 765 struct netmsg_barrier *msg; 766 767 if (i == cur_cpuid) 768 continue; 769 770 msg = kmalloc(sizeof(struct netmsg_barrier), 771 M_LWKTMSG, M_WAITOK); 772 773 /* 774 * Don't use priority message here; mainly to keep 775 * it ordered w/ the previous data packets sent by 776 * the caller. 777 */ 778 netmsg_init(&msg->base, NULL, &netisr_afree_rport, 0, 779 netisr_barrier_dispatch); 780 msg->br_cpumask = &other_cpumask; 781 msg->br_done = NETISR_BR_NOTDONE; 782 783 KKASSERT(br->br_msgs[i] == NULL); 784 br->br_msgs[i] = msg; 785 } 786 787 for (i = 0; i < ncpus; ++i) { 788 if (i == cur_cpuid) 789 continue; 790 lwkt_sendmsg(netisr_cpuport(i), &br->br_msgs[i]->base.lmsg); 791 } 792 793 while (CPUMASK_TESTNZERO(other_cpumask)) { 794 tsleep_interlock(&other_cpumask, 0); 795 if (CPUMASK_TESTNZERO(other_cpumask)) 796 tsleep(&other_cpumask, PINTERLOCKED, "nbrset", 0); 797 } 798 br->br_isset = 1; 799 } 800 801 void 802 netisr_barrier_rem(struct netisr_barrier *br) 803 { 804 int i, cur_cpuid; 805 806 ASSERT_NETISR0; 807 KKASSERT(br->br_isset); 808 809 cur_cpuid = mycpuid; 810 for (i = 0; i < ncpus; ++i) { 811 struct netmsg_barrier *msg = br->br_msgs[i]; 812 uint32_t done; 813 814 msg = br->br_msgs[i]; 815 br->br_msgs[i] = NULL; 816 817 if (i == cur_cpuid) 818 continue; 819 820 done = atomic_swap_int(&msg->br_done, 0); 821 if (done & NETISR_BR_WAITDONE) 822 wakeup(&msg->br_done); 823 } 824 br->br_isset = 0; 825 } 826 827 static void 828 netisr_nohashck(struct mbuf *m, const struct pktinfo *pi __unused) 829 { 830 m->m_flags &= ~M_HASH; 831 } 832 833 void 834 netisr_hashcheck(int num, struct mbuf *m, const struct pktinfo *pi) 835 { 836 struct netisr *ni; 837 838 if (num < 0 || num >= NETISR_MAX) 839 panic("Bad isr %d", num); 840 841 /* 842 * Valid netisr? 843 */ 844 ni = &netisrs[num]; 845 if (ni->ni_handler == NULL) 846 panic("Unregistered isr %d", num); 847 848 ni->ni_hashck(m, pi); 849 } 850