1 /* $NetBSD: raw_ip.c,v 1.62 2002/08/14 00:23:33 itojun Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1982, 1986, 1988, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 65 */ 66 67 #include <sys/cdefs.h> 68 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.62 2002/08/14 00:23:33 itojun Exp $"); 69 70 #include "opt_ipsec.h" 71 #include "opt_mrouting.h" 72 73 #include <sys/param.h> 74 #include <sys/malloc.h> 75 #include <sys/mbuf.h> 76 #include <sys/socket.h> 77 #include <sys/protosw.h> 78 #include <sys/socketvar.h> 79 #include <sys/errno.h> 80 #include <sys/systm.h> 81 #include <sys/proc.h> 82 83 #include <net/if.h> 84 #include <net/route.h> 85 86 #include <netinet/in.h> 87 #include <netinet/in_systm.h> 88 #include <netinet/ip.h> 89 #include <netinet/ip_var.h> 90 #include <netinet/ip_mroute.h> 91 #include <netinet/ip_icmp.h> 92 #include <netinet/in_pcb.h> 93 #include <netinet/in_var.h> 94 95 #include <machine/stdarg.h> 96 97 #ifdef IPSEC 98 #include <netinet6/ipsec.h> 99 #endif /*IPSEC*/ 100 101 struct inpcbtable rawcbtable; 102 103 int rip_pcbnotify __P((struct inpcbtable *, struct in_addr, 104 struct in_addr, int, int, void (*) __P((struct inpcb *, int)))); 105 int rip_bind __P((struct inpcb *, struct mbuf *)); 106 int rip_connect __P((struct inpcb *, struct mbuf *)); 107 void rip_disconnect __P((struct inpcb *)); 108 109 /* 110 * Nominal space allocated to a raw ip socket. 111 */ 112 #define RIPSNDQ 8192 113 #define RIPRCVQ 8192 114 115 /* 116 * Raw interface to IP protocol. 117 */ 118 119 /* 120 * Initialize raw connection block q. 121 */ 122 void 123 rip_init() 124 { 125 126 in_pcbinit(&rawcbtable, 1, 1); 127 } 128 129 static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; 130 131 /* 132 * Setup generic address and protocol structures 133 * for raw_input routine, then pass them along with 134 * mbuf chain. 135 */ 136 void 137 #if __STDC__ 138 rip_input(struct mbuf *m, ...) 139 #else 140 rip_input(m, va_alist) 141 struct mbuf *m; 142 va_dcl 143 #endif 144 { 145 int off, proto; 146 struct ip *ip = mtod(m, struct ip *); 147 struct inpcb *inp; 148 struct inpcb *last = 0; 149 struct mbuf *opts = 0; 150 struct sockaddr_in ripsrc; 151 va_list ap; 152 153 va_start(ap, m); 154 off = va_arg(ap, int); 155 proto = va_arg(ap, int); 156 va_end(ap); 157 158 ripsrc.sin_family = AF_INET; 159 ripsrc.sin_len = sizeof(struct sockaddr_in); 160 ripsrc.sin_addr = ip->ip_src; 161 ripsrc.sin_port = 0; 162 bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero)); 163 164 /* 165 * XXX Compatibility: programs using raw IP expect ip_len 166 * XXX to have the header length subtracted, and in host order. 167 * XXX ip_off is also expected to be host order. 168 */ 169 ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); 170 NTOHS(ip->ip_off); 171 172 CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { 173 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) 174 continue; 175 if (!in_nullhost(inp->inp_laddr) && 176 !in_hosteq(inp->inp_laddr, ip->ip_dst)) 177 continue; 178 if (!in_nullhost(inp->inp_faddr) && 179 !in_hosteq(inp->inp_faddr, ip->ip_src)) 180 continue; 181 if (last) { 182 struct mbuf *n; 183 184 #ifdef IPSEC 185 /* check AH/ESP integrity. */ 186 if (ipsec4_in_reject_so(m, last->inp_socket)) { 187 ipsecstat.in_polvio++; 188 /* do not inject data to pcb */ 189 } else 190 #endif /*IPSEC*/ 191 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { 192 if (last->inp_flags & INP_CONTROLOPTS || 193 last->inp_socket->so_options & SO_TIMESTAMP) 194 ip_savecontrol(last, &opts, ip, n); 195 if (sbappendaddr(&last->inp_socket->so_rcv, 196 sintosa(&ripsrc), n, opts) == 0) { 197 /* should notify about lost packet */ 198 m_freem(n); 199 if (opts) 200 m_freem(opts); 201 } else 202 sorwakeup(last->inp_socket); 203 opts = NULL; 204 } 205 } 206 last = inp; 207 } 208 #ifdef IPSEC 209 /* check AH/ESP integrity. */ 210 if (last && ipsec4_in_reject_so(m, last->inp_socket)) { 211 m_freem(m); 212 ipsecstat.in_polvio++; 213 ipstat.ips_delivered--; 214 /* do not inject data to pcb */ 215 } else 216 #endif /*IPSEC*/ 217 if (last) { 218 if (last->inp_flags & INP_CONTROLOPTS || 219 last->inp_socket->so_options & SO_TIMESTAMP) 220 ip_savecontrol(last, &opts, ip, m); 221 if (sbappendaddr(&last->inp_socket->so_rcv, 222 sintosa(&ripsrc), m, opts) == 0) { 223 m_freem(m); 224 if (opts) 225 m_freem(opts); 226 } else 227 sorwakeup(last->inp_socket); 228 } else { 229 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { 230 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 231 0, 0); 232 ipstat.ips_noproto++; 233 ipstat.ips_delivered--; 234 } else 235 m_freem(m); 236 } 237 return; 238 } 239 240 int 241 rip_pcbnotify(table, faddr, laddr, proto, errno, notify) 242 struct inpcbtable *table; 243 struct in_addr faddr, laddr; 244 int proto; 245 int errno; 246 void (*notify) __P((struct inpcb *, int)); 247 { 248 struct inpcb *inp, *ninp; 249 int nmatch; 250 251 nmatch = 0; 252 for (inp = CIRCLEQ_FIRST(&table->inpt_queue); 253 inp != (struct inpcb *)&table->inpt_queue; 254 inp = ninp) { 255 ninp = inp->inp_queue.cqe_next; 256 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) 257 continue; 258 if (in_hosteq(inp->inp_faddr, faddr) && 259 in_hosteq(inp->inp_laddr, laddr)) { 260 (*notify)(inp, errno); 261 nmatch++; 262 } 263 } 264 265 return nmatch; 266 } 267 268 void * 269 rip_ctlinput(cmd, sa, v) 270 int cmd; 271 struct sockaddr *sa; 272 void *v; 273 { 274 struct ip *ip = v; 275 void (*notify) __P((struct inpcb *, int)) = in_rtchange; 276 int errno; 277 278 if (sa->sa_family != AF_INET || 279 sa->sa_len != sizeof(struct sockaddr_in)) 280 return NULL; 281 if ((unsigned)cmd >= PRC_NCMDS) 282 return NULL; 283 errno = inetctlerrmap[cmd]; 284 if (PRC_IS_REDIRECT(cmd)) 285 notify = in_rtchange, ip = 0; 286 else if (cmd == PRC_HOSTDEAD) 287 ip = 0; 288 else if (errno == 0) 289 return NULL; 290 if (ip) { 291 rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr, 292 ip->ip_src, ip->ip_p, errno, notify); 293 294 /* XXX mapped address case */ 295 } else 296 in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno, 297 notify); 298 return NULL; 299 } 300 301 /* 302 * Generate IP header and pass packet to ip_output. 303 * Tack on options user may have setup with control call. 304 */ 305 int 306 #if __STDC__ 307 rip_output(struct mbuf *m, ...) 308 #else 309 rip_output(m, va_alist) 310 struct mbuf *m; 311 va_dcl 312 #endif 313 { 314 struct inpcb *inp; 315 struct ip *ip; 316 struct mbuf *opts; 317 int flags; 318 va_list ap; 319 320 va_start(ap, m); 321 inp = va_arg(ap, struct inpcb *); 322 va_end(ap); 323 324 flags = 325 (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST 326 | IP_RETURNMTU; 327 328 /* 329 * If the user handed us a complete IP packet, use it. 330 * Otherwise, allocate an mbuf for a header and fill it in. 331 */ 332 if ((inp->inp_flags & INP_HDRINCL) == 0) { 333 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 334 m_freem(m); 335 return (EMSGSIZE); 336 } 337 M_PREPEND(m, sizeof(struct ip), M_WAIT); 338 ip = mtod(m, struct ip *); 339 ip->ip_tos = 0; 340 ip->ip_off = htons(0); 341 ip->ip_p = inp->inp_ip.ip_p; 342 ip->ip_len = htons(m->m_pkthdr.len); 343 ip->ip_src = inp->inp_laddr; 344 ip->ip_dst = inp->inp_faddr; 345 ip->ip_ttl = MAXTTL; 346 opts = inp->inp_options; 347 } else { 348 if (m->m_pkthdr.len > IP_MAXPACKET) { 349 m_freem(m); 350 return (EMSGSIZE); 351 } 352 ip = mtod(m, struct ip *); 353 /* XXX userland passes ip_len and ip_off in host order */ 354 if (m->m_pkthdr.len != ip->ip_len) { 355 m_freem(m); 356 return (EINVAL); 357 } 358 HTONS(ip->ip_len); 359 HTONS(ip->ip_off); 360 if (ip->ip_id == 0) 361 ip->ip_id = htons(ip_id++); 362 opts = NULL; 363 /* XXX prevent ip_output from overwriting header fields */ 364 flags |= IP_RAWOUTPUT; 365 ipstat.ips_rawout++; 366 } 367 #ifdef IPSEC 368 if (ipsec_setsocket(m, inp->inp_socket) != 0) { 369 m_freem(m); 370 return ENOBUFS; 371 } 372 #endif /*IPSEC*/ 373 return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions, 374 &inp->inp_errormtu)); 375 } 376 377 /* 378 * Raw IP socket option processing. 379 */ 380 int 381 rip_ctloutput(op, so, level, optname, m) 382 int op; 383 struct socket *so; 384 int level, optname; 385 struct mbuf **m; 386 { 387 struct inpcb *inp = sotoinpcb(so); 388 int error = 0; 389 390 if (level != IPPROTO_IP) { 391 error = ENOPROTOOPT; 392 if (op == PRCO_SETOPT && *m != 0) 393 (void) m_free(*m); 394 } else switch (op) { 395 396 case PRCO_SETOPT: 397 switch (optname) { 398 case IP_HDRINCL: 399 if (*m == 0 || (*m)->m_len < sizeof (int)) 400 error = EINVAL; 401 else { 402 if (*mtod(*m, int *)) 403 inp->inp_flags |= INP_HDRINCL; 404 else 405 inp->inp_flags &= ~INP_HDRINCL; 406 } 407 if (*m != 0) 408 (void) m_free(*m); 409 break; 410 411 #ifdef MROUTING 412 case MRT_INIT: 413 case MRT_DONE: 414 case MRT_ADD_VIF: 415 case MRT_DEL_VIF: 416 case MRT_ADD_MFC: 417 case MRT_DEL_MFC: 418 case MRT_ASSERT: 419 error = ip_mrouter_set(so, optname, m); 420 break; 421 #endif 422 423 default: 424 error = ip_ctloutput(op, so, level, optname, m); 425 break; 426 } 427 break; 428 429 case PRCO_GETOPT: 430 switch (optname) { 431 case IP_HDRINCL: 432 *m = m_get(M_WAIT, M_SOOPTS); 433 (*m)->m_len = sizeof (int); 434 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0; 435 break; 436 437 #ifdef MROUTING 438 case MRT_VERSION: 439 case MRT_ASSERT: 440 error = ip_mrouter_get(so, optname, m); 441 break; 442 #endif 443 444 default: 445 error = ip_ctloutput(op, so, level, optname, m); 446 break; 447 } 448 break; 449 } 450 return (error); 451 } 452 453 int 454 rip_bind(inp, nam) 455 struct inpcb *inp; 456 struct mbuf *nam; 457 { 458 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 459 460 if (nam->m_len != sizeof(*addr)) 461 return (EINVAL); 462 if (TAILQ_FIRST(&ifnet) == 0) 463 return (EADDRNOTAVAIL); 464 if (addr->sin_family != AF_INET && 465 addr->sin_family != AF_IMPLINK) 466 return (EAFNOSUPPORT); 467 if (!in_nullhost(addr->sin_addr) && 468 ifa_ifwithaddr(sintosa(addr)) == 0) 469 return (EADDRNOTAVAIL); 470 inp->inp_laddr = addr->sin_addr; 471 return (0); 472 } 473 474 int 475 rip_connect(inp, nam) 476 struct inpcb *inp; 477 struct mbuf *nam; 478 { 479 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 480 481 if (nam->m_len != sizeof(*addr)) 482 return (EINVAL); 483 if (TAILQ_FIRST(&ifnet) == 0) 484 return (EADDRNOTAVAIL); 485 if (addr->sin_family != AF_INET && 486 addr->sin_family != AF_IMPLINK) 487 return (EAFNOSUPPORT); 488 inp->inp_faddr = addr->sin_addr; 489 return (0); 490 } 491 492 void 493 rip_disconnect(inp) 494 struct inpcb *inp; 495 { 496 497 inp->inp_faddr = zeroin_addr; 498 } 499 500 u_long rip_sendspace = RIPSNDQ; 501 u_long rip_recvspace = RIPRCVQ; 502 503 /*ARGSUSED*/ 504 int 505 rip_usrreq(so, req, m, nam, control, p) 506 struct socket *so; 507 int req; 508 struct mbuf *m, *nam, *control; 509 struct proc *p; 510 { 511 struct inpcb *inp; 512 int s; 513 int error = 0; 514 #ifdef MROUTING 515 extern struct socket *ip_mrouter; 516 #endif 517 518 if (req == PRU_CONTROL) 519 return (in_control(so, (long)m, (caddr_t)nam, 520 (struct ifnet *)control, p)); 521 522 if (req == PRU_PURGEIF) { 523 in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control); 524 in_purgeif((struct ifnet *)control); 525 in_pcbpurgeif(&rawcbtable, (struct ifnet *)control); 526 return (0); 527 } 528 529 s = splsoftnet(); 530 inp = sotoinpcb(so); 531 #ifdef DIAGNOSTIC 532 if (req != PRU_SEND && req != PRU_SENDOOB && control) 533 panic("rip_usrreq: unexpected control mbuf"); 534 #endif 535 if (inp == 0 && req != PRU_ATTACH) { 536 error = EINVAL; 537 goto release; 538 } 539 540 switch (req) { 541 542 case PRU_ATTACH: 543 if (inp != 0) { 544 error = EISCONN; 545 break; 546 } 547 if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) { 548 error = EACCES; 549 break; 550 } 551 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 552 error = soreserve(so, rip_sendspace, rip_recvspace); 553 if (error) 554 break; 555 } 556 error = in_pcballoc(so, &rawcbtable); 557 if (error) 558 break; 559 inp = sotoinpcb(so); 560 inp->inp_ip.ip_p = (long)nam; 561 break; 562 563 case PRU_DETACH: 564 #ifdef MROUTING 565 if (so == ip_mrouter) 566 ip_mrouter_done(); 567 #endif 568 in_pcbdetach(inp); 569 break; 570 571 case PRU_BIND: 572 error = rip_bind(inp, nam); 573 break; 574 575 case PRU_LISTEN: 576 error = EOPNOTSUPP; 577 break; 578 579 case PRU_CONNECT: 580 error = rip_connect(inp, nam); 581 if (error) 582 break; 583 soisconnected(so); 584 break; 585 586 case PRU_CONNECT2: 587 error = EOPNOTSUPP; 588 break; 589 590 case PRU_DISCONNECT: 591 soisdisconnected(so); 592 rip_disconnect(inp); 593 break; 594 595 /* 596 * Mark the connection as being incapable of further input. 597 */ 598 case PRU_SHUTDOWN: 599 socantsendmore(so); 600 break; 601 602 case PRU_RCVD: 603 error = EOPNOTSUPP; 604 break; 605 606 /* 607 * Ship a packet out. The appropriate raw output 608 * routine handles any massaging necessary. 609 */ 610 case PRU_SEND: 611 if (control && control->m_len) { 612 m_freem(control); 613 m_freem(m); 614 error = EINVAL; 615 break; 616 } 617 { 618 if (nam) { 619 if ((so->so_state & SS_ISCONNECTED) != 0) { 620 error = EISCONN; 621 goto die; 622 } 623 error = rip_connect(inp, nam); 624 if (error) { 625 die: 626 m_freem(m); 627 break; 628 } 629 } else { 630 if ((so->so_state & SS_ISCONNECTED) == 0) { 631 error = ENOTCONN; 632 goto die; 633 } 634 } 635 error = rip_output(m, inp); 636 if (nam) 637 rip_disconnect(inp); 638 } 639 break; 640 641 case PRU_SENSE: 642 /* 643 * stat: don't bother with a blocksize. 644 */ 645 splx(s); 646 return (0); 647 648 case PRU_RCVOOB: 649 error = EOPNOTSUPP; 650 break; 651 652 case PRU_SENDOOB: 653 m_freem(control); 654 m_freem(m); 655 error = EOPNOTSUPP; 656 break; 657 658 case PRU_SOCKADDR: 659 in_setsockaddr(inp, nam); 660 break; 661 662 case PRU_PEERADDR: 663 in_setpeeraddr(inp, nam); 664 break; 665 666 default: 667 panic("rip_usrreq"); 668 } 669 670 release: 671 splx(s); 672 return (error); 673 } 674