1 /* $NetBSD: raw_ip.c,v 1.78 2003/11/19 18:39:34 jonathan Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1982, 1986, 1988, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.78 2003/11/19 18:39:34 jonathan Exp $"); 65 66 #include "opt_inet.h" 67 #include "opt_ipsec.h" 68 #include "opt_mrouting.h" 69 70 #include <sys/param.h> 71 #include <sys/malloc.h> 72 #include <sys/mbuf.h> 73 #include <sys/socket.h> 74 #include <sys/protosw.h> 75 #include <sys/socketvar.h> 76 #include <sys/errno.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 80 #include <net/if.h> 81 #include <net/route.h> 82 83 #include <netinet/in.h> 84 #include <netinet/in_systm.h> 85 #include <netinet/ip.h> 86 #include <netinet/ip_var.h> 87 #include <netinet/ip_mroute.h> 88 #include <netinet/ip_icmp.h> 89 #include <netinet/in_pcb.h> 90 #include <netinet/in_var.h> 91 92 #include <machine/stdarg.h> 93 94 #ifdef IPSEC 95 #include <netinet6/ipsec.h> 96 #endif /*IPSEC*/ 97 98 #ifdef FAST_IPSEC 99 #include <netipsec/ipsec.h> 100 #endif /* FAST_IPSEC*/ 101 102 struct inpcbtable rawcbtable; 103 104 int rip_pcbnotify __P((struct inpcbtable *, struct in_addr, 105 struct in_addr, int, int, void (*) __P((struct inpcb *, int)))); 106 int rip_bind __P((struct inpcb *, struct mbuf *)); 107 int rip_connect __P((struct inpcb *, struct mbuf *)); 108 void rip_disconnect __P((struct inpcb *)); 109 110 /* 111 * Nominal space allocated to a raw ip socket. 112 */ 113 #define RIPSNDQ 8192 114 #define RIPRCVQ 8192 115 116 /* 117 * Raw interface to IP protocol. 118 */ 119 120 /* 121 * Initialize raw connection block q. 122 */ 123 void 124 rip_init() 125 { 126 127 in_pcbinit(&rawcbtable, 1, 1); 128 } 129 130 /* 131 * Setup generic address and protocol structures 132 * for raw_input routine, then pass them along with 133 * mbuf chain. 134 */ 135 void 136 #if __STDC__ 137 rip_input(struct mbuf *m, ...) 138 #else 139 rip_input(m, va_alist) 140 struct mbuf *m; 141 va_dcl 142 #endif 143 { 144 int proto; 145 struct ip *ip = mtod(m, struct ip *); 146 struct inpcb_hdr *inph; 147 struct inpcb *inp; 148 struct inpcb *last = 0; 149 struct mbuf *opts = 0; 150 struct sockaddr_in ripsrc; 151 va_list ap; 152 153 va_start(ap, m); 154 (void)va_arg(ap, int); /* ignore value, advance ap */ 155 proto = va_arg(ap, int); 156 va_end(ap); 157 158 ripsrc.sin_family = AF_INET; 159 ripsrc.sin_len = sizeof(struct sockaddr_in); 160 ripsrc.sin_addr = ip->ip_src; 161 ripsrc.sin_port = 0; 162 bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero)); 163 164 /* 165 * XXX Compatibility: programs using raw IP expect ip_len 166 * XXX to have the header length subtracted, and in host order. 167 * XXX ip_off is also expected to be host order. 168 */ 169 ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); 170 NTOHS(ip->ip_off); 171 172 CIRCLEQ_FOREACH(inph, &rawcbtable.inpt_queue, inph_queue) { 173 inp = (struct inpcb *)inph; 174 if (inp->inp_af != AF_INET) 175 continue; 176 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) 177 continue; 178 if (!in_nullhost(inp->inp_laddr) && 179 !in_hosteq(inp->inp_laddr, ip->ip_dst)) 180 continue; 181 if (!in_nullhost(inp->inp_faddr) && 182 !in_hosteq(inp->inp_faddr, ip->ip_src)) 183 continue; 184 if (last) { 185 struct mbuf *n; 186 187 #if defined(IPSEC) || defined(FAST_IPSEC) 188 /* check AH/ESP integrity. */ 189 if (ipsec4_in_reject_so(m, last->inp_socket)) { 190 ipsecstat.in_polvio++; 191 /* do not inject data to pcb */ 192 } else 193 #endif /*IPSEC*/ 194 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { 195 if (last->inp_flags & INP_CONTROLOPTS || 196 last->inp_socket->so_options & SO_TIMESTAMP) 197 ip_savecontrol(last, &opts, ip, n); 198 if (sbappendaddr(&last->inp_socket->so_rcv, 199 sintosa(&ripsrc), n, opts) == 0) { 200 /* should notify about lost packet */ 201 m_freem(n); 202 if (opts) 203 m_freem(opts); 204 } else 205 sorwakeup(last->inp_socket); 206 opts = NULL; 207 } 208 } 209 last = inp; 210 } 211 #if defined(IPSEC) || defined(FAST_IPSEC) 212 /* check AH/ESP integrity. */ 213 if (last && ipsec4_in_reject_so(m, last->inp_socket)) { 214 m_freem(m); 215 ipsecstat.in_polvio++; 216 ipstat.ips_delivered--; 217 /* do not inject data to pcb */ 218 } else 219 #endif /*IPSEC*/ 220 if (last) { 221 if (last->inp_flags & INP_CONTROLOPTS || 222 last->inp_socket->so_options & SO_TIMESTAMP) 223 ip_savecontrol(last, &opts, ip, m); 224 if (sbappendaddr(&last->inp_socket->so_rcv, 225 sintosa(&ripsrc), m, opts) == 0) { 226 m_freem(m); 227 if (opts) 228 m_freem(opts); 229 } else 230 sorwakeup(last->inp_socket); 231 } else { 232 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { 233 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 234 0, 0); 235 ipstat.ips_noproto++; 236 ipstat.ips_delivered--; 237 } else 238 m_freem(m); 239 } 240 return; 241 } 242 243 int 244 rip_pcbnotify(table, faddr, laddr, proto, errno, notify) 245 struct inpcbtable *table; 246 struct in_addr faddr, laddr; 247 int proto; 248 int errno; 249 void (*notify) __P((struct inpcb *, int)); 250 { 251 struct inpcb *inp, *ninp; 252 int nmatch; 253 254 nmatch = 0; 255 for (inp = (struct inpcb *)CIRCLEQ_FIRST(&table->inpt_queue); 256 inp != (struct inpcb *)&table->inpt_queue; 257 inp = ninp) { 258 ninp = (struct inpcb *)inp->inp_queue.cqe_next; 259 if (inp->inp_af != AF_INET) 260 continue; 261 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) 262 continue; 263 if (in_hosteq(inp->inp_faddr, faddr) && 264 in_hosteq(inp->inp_laddr, laddr)) { 265 (*notify)(inp, errno); 266 nmatch++; 267 } 268 } 269 270 return nmatch; 271 } 272 273 void * 274 rip_ctlinput(cmd, sa, v) 275 int cmd; 276 struct sockaddr *sa; 277 void *v; 278 { 279 struct ip *ip = v; 280 void (*notify) __P((struct inpcb *, int)) = in_rtchange; 281 int errno; 282 283 if (sa->sa_family != AF_INET || 284 sa->sa_len != sizeof(struct sockaddr_in)) 285 return NULL; 286 if ((unsigned)cmd >= PRC_NCMDS) 287 return NULL; 288 errno = inetctlerrmap[cmd]; 289 if (PRC_IS_REDIRECT(cmd)) 290 notify = in_rtchange, ip = 0; 291 else if (cmd == PRC_HOSTDEAD) 292 ip = 0; 293 else if (errno == 0) 294 return NULL; 295 if (ip) { 296 rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr, 297 ip->ip_src, ip->ip_p, errno, notify); 298 299 /* XXX mapped address case */ 300 } else 301 in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno, 302 notify); 303 return NULL; 304 } 305 306 /* 307 * Generate IP header and pass packet to ip_output. 308 * Tack on options user may have setup with control call. 309 */ 310 int 311 #if __STDC__ 312 rip_output(struct mbuf *m, ...) 313 #else 314 rip_output(m, va_alist) 315 struct mbuf *m; 316 va_dcl 317 #endif 318 { 319 struct inpcb *inp; 320 struct ip *ip; 321 struct mbuf *opts; 322 int flags; 323 va_list ap; 324 325 va_start(ap, m); 326 inp = va_arg(ap, struct inpcb *); 327 va_end(ap); 328 329 flags = 330 (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST 331 | IP_RETURNMTU; 332 333 /* 334 * If the user handed us a complete IP packet, use it. 335 * Otherwise, allocate an mbuf for a header and fill it in. 336 */ 337 if ((inp->inp_flags & INP_HDRINCL) == 0) { 338 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 339 m_freem(m); 340 return (EMSGSIZE); 341 } 342 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 343 if (!m) 344 return (ENOBUFS); 345 ip = mtod(m, struct ip *); 346 ip->ip_tos = 0; 347 ip->ip_off = htons(0); 348 ip->ip_p = inp->inp_ip.ip_p; 349 ip->ip_len = htons(m->m_pkthdr.len); 350 ip->ip_src = inp->inp_laddr; 351 ip->ip_dst = inp->inp_faddr; 352 ip->ip_ttl = MAXTTL; 353 opts = inp->inp_options; 354 } else { 355 if (m->m_pkthdr.len > IP_MAXPACKET) { 356 m_freem(m); 357 return (EMSGSIZE); 358 } 359 ip = mtod(m, struct ip *); 360 361 /* 362 * If the mbuf is read-only, we need to allocate 363 * a new mbuf for the header, since we need to 364 * modify the header. 365 */ 366 if (M_READONLY(m)) { 367 int hlen = ip->ip_hl << 2; 368 369 m = m_copyup(m, hlen, (max_linkhdr + 3) & ~3); 370 if (m == NULL) 371 return (ENOMEM); /* XXX */ 372 ip = mtod(m, struct ip *); 373 } 374 375 /* XXX userland passes ip_len and ip_off in host order */ 376 if (m->m_pkthdr.len != ip->ip_len) { 377 m_freem(m); 378 return (EINVAL); 379 } 380 HTONS(ip->ip_len); 381 HTONS(ip->ip_off); 382 if (ip->ip_id == 0) 383 ip->ip_id = ip_newid(); 384 opts = NULL; 385 /* XXX prevent ip_output from overwriting header fields */ 386 flags |= IP_RAWOUTPUT; 387 ipstat.ips_rawout++; 388 } 389 return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions, 390 inp->inp_socket, &inp->inp_errormtu)); 391 } 392 393 /* 394 * Raw IP socket option processing. 395 */ 396 int 397 rip_ctloutput(op, so, level, optname, m) 398 int op; 399 struct socket *so; 400 int level, optname; 401 struct mbuf **m; 402 { 403 struct inpcb *inp = sotoinpcb(so); 404 int error = 0; 405 406 if (level != IPPROTO_IP) { 407 error = ENOPROTOOPT; 408 if (op == PRCO_SETOPT && *m != 0) 409 (void) m_free(*m); 410 } else switch (op) { 411 412 case PRCO_SETOPT: 413 switch (optname) { 414 case IP_HDRINCL: 415 if (*m == 0 || (*m)->m_len < sizeof (int)) 416 error = EINVAL; 417 else { 418 if (*mtod(*m, int *)) 419 inp->inp_flags |= INP_HDRINCL; 420 else 421 inp->inp_flags &= ~INP_HDRINCL; 422 } 423 if (*m != 0) 424 (void) m_free(*m); 425 break; 426 427 #ifdef MROUTING 428 case MRT_INIT: 429 case MRT_DONE: 430 case MRT_ADD_VIF: 431 case MRT_DEL_VIF: 432 case MRT_ADD_MFC: 433 case MRT_DEL_MFC: 434 case MRT_ASSERT: 435 error = ip_mrouter_set(so, optname, m); 436 break; 437 #endif 438 439 default: 440 error = ip_ctloutput(op, so, level, optname, m); 441 break; 442 } 443 break; 444 445 case PRCO_GETOPT: 446 switch (optname) { 447 case IP_HDRINCL: 448 *m = m_get(M_WAIT, MT_SOOPTS); 449 MCLAIM((*m), so->so_mowner); 450 (*m)->m_len = sizeof (int); 451 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0; 452 break; 453 454 #ifdef MROUTING 455 case MRT_VERSION: 456 case MRT_ASSERT: 457 error = ip_mrouter_get(so, optname, m); 458 break; 459 #endif 460 461 default: 462 error = ip_ctloutput(op, so, level, optname, m); 463 break; 464 } 465 break; 466 } 467 return (error); 468 } 469 470 int 471 rip_bind(inp, nam) 472 struct inpcb *inp; 473 struct mbuf *nam; 474 { 475 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 476 477 if (nam->m_len != sizeof(*addr)) 478 return (EINVAL); 479 if (TAILQ_FIRST(&ifnet) == 0) 480 return (EADDRNOTAVAIL); 481 if (addr->sin_family != AF_INET && 482 addr->sin_family != AF_IMPLINK) 483 return (EAFNOSUPPORT); 484 if (!in_nullhost(addr->sin_addr) && 485 ifa_ifwithaddr(sintosa(addr)) == 0) 486 return (EADDRNOTAVAIL); 487 inp->inp_laddr = addr->sin_addr; 488 return (0); 489 } 490 491 int 492 rip_connect(inp, nam) 493 struct inpcb *inp; 494 struct mbuf *nam; 495 { 496 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 497 498 if (nam->m_len != sizeof(*addr)) 499 return (EINVAL); 500 if (TAILQ_FIRST(&ifnet) == 0) 501 return (EADDRNOTAVAIL); 502 if (addr->sin_family != AF_INET && 503 addr->sin_family != AF_IMPLINK) 504 return (EAFNOSUPPORT); 505 inp->inp_faddr = addr->sin_addr; 506 return (0); 507 } 508 509 void 510 rip_disconnect(inp) 511 struct inpcb *inp; 512 { 513 514 inp->inp_faddr = zeroin_addr; 515 } 516 517 u_long rip_sendspace = RIPSNDQ; 518 u_long rip_recvspace = RIPRCVQ; 519 520 /*ARGSUSED*/ 521 int 522 rip_usrreq(so, req, m, nam, control, p) 523 struct socket *so; 524 int req; 525 struct mbuf *m, *nam, *control; 526 struct proc *p; 527 { 528 struct inpcb *inp; 529 int s; 530 int error = 0; 531 #ifdef MROUTING 532 extern struct socket *ip_mrouter; 533 #endif 534 535 if (req == PRU_CONTROL) 536 return (in_control(so, (long)m, (caddr_t)nam, 537 (struct ifnet *)control, p)); 538 539 if (req == PRU_PURGEIF) { 540 in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control); 541 in_purgeif((struct ifnet *)control); 542 in_pcbpurgeif(&rawcbtable, (struct ifnet *)control); 543 return (0); 544 } 545 546 s = splsoftnet(); 547 inp = sotoinpcb(so); 548 #ifdef DIAGNOSTIC 549 if (req != PRU_SEND && req != PRU_SENDOOB && control) 550 panic("rip_usrreq: unexpected control mbuf"); 551 #endif 552 if (inp == 0 && req != PRU_ATTACH) { 553 error = EINVAL; 554 goto release; 555 } 556 557 switch (req) { 558 559 case PRU_ATTACH: 560 if (inp != 0) { 561 error = EISCONN; 562 break; 563 } 564 if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) { 565 error = EACCES; 566 break; 567 } 568 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 569 error = soreserve(so, rip_sendspace, rip_recvspace); 570 if (error) 571 break; 572 } 573 error = in_pcballoc(so, &rawcbtable); 574 if (error) 575 break; 576 inp = sotoinpcb(so); 577 inp->inp_ip.ip_p = (long)nam; 578 break; 579 580 case PRU_DETACH: 581 #ifdef MROUTING 582 if (so == ip_mrouter) 583 ip_mrouter_done(); 584 #endif 585 in_pcbdetach(inp); 586 break; 587 588 case PRU_BIND: 589 error = rip_bind(inp, nam); 590 break; 591 592 case PRU_LISTEN: 593 error = EOPNOTSUPP; 594 break; 595 596 case PRU_CONNECT: 597 error = rip_connect(inp, nam); 598 if (error) 599 break; 600 soisconnected(so); 601 break; 602 603 case PRU_CONNECT2: 604 error = EOPNOTSUPP; 605 break; 606 607 case PRU_DISCONNECT: 608 soisdisconnected(so); 609 rip_disconnect(inp); 610 break; 611 612 /* 613 * Mark the connection as being incapable of further input. 614 */ 615 case PRU_SHUTDOWN: 616 socantsendmore(so); 617 break; 618 619 case PRU_RCVD: 620 error = EOPNOTSUPP; 621 break; 622 623 /* 624 * Ship a packet out. The appropriate raw output 625 * routine handles any massaging necessary. 626 */ 627 case PRU_SEND: 628 if (control && control->m_len) { 629 m_freem(control); 630 m_freem(m); 631 error = EINVAL; 632 break; 633 } 634 { 635 if (nam) { 636 if ((so->so_state & SS_ISCONNECTED) != 0) { 637 error = EISCONN; 638 goto die; 639 } 640 error = rip_connect(inp, nam); 641 if (error) { 642 die: 643 m_freem(m); 644 break; 645 } 646 } else { 647 if ((so->so_state & SS_ISCONNECTED) == 0) { 648 error = ENOTCONN; 649 goto die; 650 } 651 } 652 error = rip_output(m, inp); 653 if (nam) 654 rip_disconnect(inp); 655 } 656 break; 657 658 case PRU_SENSE: 659 /* 660 * stat: don't bother with a blocksize. 661 */ 662 splx(s); 663 return (0); 664 665 case PRU_RCVOOB: 666 error = EOPNOTSUPP; 667 break; 668 669 case PRU_SENDOOB: 670 m_freem(control); 671 m_freem(m); 672 error = EOPNOTSUPP; 673 break; 674 675 case PRU_SOCKADDR: 676 in_setsockaddr(inp, nam); 677 break; 678 679 case PRU_PEERADDR: 680 in_setpeeraddr(inp, nam); 681 break; 682 683 default: 684 panic("rip_usrreq"); 685 } 686 687 release: 688 splx(s); 689 return (error); 690 } 691