1 /* $NetBSD: raw_ip.c,v 1.70 2003/06/29 22:31:57 fvdl Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1982, 1986, 1988, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 65 */ 66 67 #include <sys/cdefs.h> 68 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.70 2003/06/29 22:31:57 fvdl Exp $"); 69 70 #include "opt_ipsec.h" 71 #include "opt_mrouting.h" 72 73 #include <sys/param.h> 74 #include <sys/malloc.h> 75 #include <sys/mbuf.h> 76 #include <sys/socket.h> 77 #include <sys/protosw.h> 78 #include <sys/socketvar.h> 79 #include <sys/errno.h> 80 #include <sys/systm.h> 81 #include <sys/proc.h> 82 83 #include <net/if.h> 84 #include <net/route.h> 85 86 #include <netinet/in.h> 87 #include <netinet/in_systm.h> 88 #include <netinet/ip.h> 89 #include <netinet/ip_var.h> 90 #include <netinet/ip_mroute.h> 91 #include <netinet/ip_icmp.h> 92 #include <netinet/in_pcb.h> 93 #include <netinet/in_var.h> 94 95 #include <machine/stdarg.h> 96 97 #ifdef IPSEC 98 #include <netinet6/ipsec.h> 99 #endif /*IPSEC*/ 100 101 struct inpcbtable rawcbtable; 102 103 int rip_pcbnotify __P((struct inpcbtable *, struct in_addr, 104 struct in_addr, int, int, void (*) __P((struct inpcb *, int)))); 105 int rip_bind __P((struct inpcb *, struct mbuf *)); 106 int rip_connect __P((struct inpcb *, struct mbuf *)); 107 void rip_disconnect __P((struct inpcb *)); 108 109 /* 110 * Nominal space allocated to a raw ip socket. 111 */ 112 #define RIPSNDQ 8192 113 #define RIPRCVQ 8192 114 115 /* 116 * Raw interface to IP protocol. 117 */ 118 119 /* 120 * Initialize raw connection block q. 121 */ 122 void 123 rip_init() 124 { 125 126 in_pcbinit(&rawcbtable, 1, 1); 127 } 128 129 /* 130 * Setup generic address and protocol structures 131 * for raw_input routine, then pass them along with 132 * mbuf chain. 133 */ 134 void 135 #if __STDC__ 136 rip_input(struct mbuf *m, ...) 137 #else 138 rip_input(m, va_alist) 139 struct mbuf *m; 140 va_dcl 141 #endif 142 { 143 int proto; 144 struct ip *ip = mtod(m, struct ip *); 145 struct inpcb *inp; 146 struct inpcb *last = 0; 147 struct mbuf *opts = 0; 148 struct sockaddr_in ripsrc; 149 va_list ap; 150 151 va_start(ap, m); 152 (void)va_arg(ap, int); /* ignore value, advance ap */ 153 proto = va_arg(ap, int); 154 va_end(ap); 155 156 ripsrc.sin_family = AF_INET; 157 ripsrc.sin_len = sizeof(struct sockaddr_in); 158 ripsrc.sin_addr = ip->ip_src; 159 ripsrc.sin_port = 0; 160 bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero)); 161 162 /* 163 * XXX Compatibility: programs using raw IP expect ip_len 164 * XXX to have the header length subtracted, and in host order. 165 * XXX ip_off is also expected to be host order. 166 */ 167 ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); 168 NTOHS(ip->ip_off); 169 170 CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { 171 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) 172 continue; 173 if (!in_nullhost(inp->inp_laddr) && 174 !in_hosteq(inp->inp_laddr, ip->ip_dst)) 175 continue; 176 if (!in_nullhost(inp->inp_faddr) && 177 !in_hosteq(inp->inp_faddr, ip->ip_src)) 178 continue; 179 if (last) { 180 struct mbuf *n; 181 182 #ifdef IPSEC 183 /* check AH/ESP integrity. */ 184 if (ipsec4_in_reject_so(m, last->inp_socket)) { 185 ipsecstat.in_polvio++; 186 /* do not inject data to pcb */ 187 } else 188 #endif /*IPSEC*/ 189 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { 190 if (last->inp_flags & INP_CONTROLOPTS || 191 last->inp_socket->so_options & SO_TIMESTAMP) 192 ip_savecontrol(last, &opts, ip, n); 193 if (sbappendaddr(&last->inp_socket->so_rcv, 194 sintosa(&ripsrc), n, opts) == 0) { 195 /* should notify about lost packet */ 196 m_freem(n); 197 if (opts) 198 m_freem(opts); 199 } else 200 sorwakeup(last->inp_socket); 201 opts = NULL; 202 } 203 } 204 last = inp; 205 } 206 #ifdef IPSEC 207 /* check AH/ESP integrity. */ 208 if (last && ipsec4_in_reject_so(m, last->inp_socket)) { 209 m_freem(m); 210 ipsecstat.in_polvio++; 211 ipstat.ips_delivered--; 212 /* do not inject data to pcb */ 213 } else 214 #endif /*IPSEC*/ 215 if (last) { 216 if (last->inp_flags & INP_CONTROLOPTS || 217 last->inp_socket->so_options & SO_TIMESTAMP) 218 ip_savecontrol(last, &opts, ip, m); 219 if (sbappendaddr(&last->inp_socket->so_rcv, 220 sintosa(&ripsrc), m, opts) == 0) { 221 m_freem(m); 222 if (opts) 223 m_freem(opts); 224 } else 225 sorwakeup(last->inp_socket); 226 } else { 227 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { 228 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 229 0, 0); 230 ipstat.ips_noproto++; 231 ipstat.ips_delivered--; 232 } else 233 m_freem(m); 234 } 235 return; 236 } 237 238 int 239 rip_pcbnotify(table, faddr, laddr, proto, errno, notify) 240 struct inpcbtable *table; 241 struct in_addr faddr, laddr; 242 int proto; 243 int errno; 244 void (*notify) __P((struct inpcb *, int)); 245 { 246 struct inpcb *inp, *ninp; 247 int nmatch; 248 249 nmatch = 0; 250 for (inp = CIRCLEQ_FIRST(&table->inpt_queue); 251 inp != (struct inpcb *)&table->inpt_queue; 252 inp = ninp) { 253 ninp = inp->inp_queue.cqe_next; 254 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) 255 continue; 256 if (in_hosteq(inp->inp_faddr, faddr) && 257 in_hosteq(inp->inp_laddr, laddr)) { 258 (*notify)(inp, errno); 259 nmatch++; 260 } 261 } 262 263 return nmatch; 264 } 265 266 void * 267 rip_ctlinput(cmd, sa, v) 268 int cmd; 269 struct sockaddr *sa; 270 void *v; 271 { 272 struct ip *ip = v; 273 void (*notify) __P((struct inpcb *, int)) = in_rtchange; 274 int errno; 275 276 if (sa->sa_family != AF_INET || 277 sa->sa_len != sizeof(struct sockaddr_in)) 278 return NULL; 279 if ((unsigned)cmd >= PRC_NCMDS) 280 return NULL; 281 errno = inetctlerrmap[cmd]; 282 if (PRC_IS_REDIRECT(cmd)) 283 notify = in_rtchange, ip = 0; 284 else if (cmd == PRC_HOSTDEAD) 285 ip = 0; 286 else if (errno == 0) 287 return NULL; 288 if (ip) { 289 rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr, 290 ip->ip_src, ip->ip_p, errno, notify); 291 292 /* XXX mapped address case */ 293 } else 294 in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno, 295 notify); 296 return NULL; 297 } 298 299 /* 300 * Generate IP header and pass packet to ip_output. 301 * Tack on options user may have setup with control call. 302 */ 303 int 304 #if __STDC__ 305 rip_output(struct mbuf *m, ...) 306 #else 307 rip_output(m, va_alist) 308 struct mbuf *m; 309 va_dcl 310 #endif 311 { 312 struct inpcb *inp; 313 struct ip *ip; 314 struct mbuf *opts; 315 int flags; 316 va_list ap; 317 318 va_start(ap, m); 319 inp = va_arg(ap, struct inpcb *); 320 va_end(ap); 321 322 flags = 323 (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST 324 | IP_RETURNMTU; 325 326 /* 327 * If the user handed us a complete IP packet, use it. 328 * Otherwise, allocate an mbuf for a header and fill it in. 329 */ 330 if ((inp->inp_flags & INP_HDRINCL) == 0) { 331 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 332 m_freem(m); 333 return (EMSGSIZE); 334 } 335 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 336 if (!m) 337 return (ENOBUFS); 338 ip = mtod(m, struct ip *); 339 ip->ip_tos = 0; 340 ip->ip_off = htons(0); 341 ip->ip_p = inp->inp_ip.ip_p; 342 ip->ip_len = htons(m->m_pkthdr.len); 343 ip->ip_src = inp->inp_laddr; 344 ip->ip_dst = inp->inp_faddr; 345 ip->ip_ttl = MAXTTL; 346 opts = inp->inp_options; 347 } else { 348 if (m->m_pkthdr.len > IP_MAXPACKET) { 349 m_freem(m); 350 return (EMSGSIZE); 351 } 352 ip = mtod(m, struct ip *); 353 354 /* 355 * If the mbuf is read-only, we need to allocate 356 * a new mbuf for the header, since we need to 357 * modify the header. 358 */ 359 if (M_READONLY(m)) { 360 int hlen = ip->ip_hl << 2; 361 362 m = m_copyup(m, hlen, (max_linkhdr + 3) & ~3); 363 if (m == NULL) 364 return (ENOMEM); /* XXX */ 365 ip = mtod(m, struct ip *); 366 } 367 368 /* XXX userland passes ip_len and ip_off in host order */ 369 if (m->m_pkthdr.len != ip->ip_len) { 370 m_freem(m); 371 return (EINVAL); 372 } 373 HTONS(ip->ip_len); 374 HTONS(ip->ip_off); 375 if (ip->ip_id == 0) 376 ip->ip_id = htons(ip_id++); 377 opts = NULL; 378 /* XXX prevent ip_output from overwriting header fields */ 379 flags |= IP_RAWOUTPUT; 380 ipstat.ips_rawout++; 381 } 382 #ifdef IPSEC 383 if (ipsec_setsocket(m, inp->inp_socket) != 0) { 384 m_freem(m); 385 return ENOBUFS; 386 } 387 #endif /*IPSEC*/ 388 return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions, 389 &inp->inp_errormtu)); 390 } 391 392 /* 393 * Raw IP socket option processing. 394 */ 395 int 396 rip_ctloutput(op, so, level, optname, m) 397 int op; 398 struct socket *so; 399 int level, optname; 400 struct mbuf **m; 401 { 402 struct inpcb *inp = sotoinpcb(so); 403 int error = 0; 404 405 if (level != IPPROTO_IP) { 406 error = ENOPROTOOPT; 407 if (op == PRCO_SETOPT && *m != 0) 408 (void) m_free(*m); 409 } else switch (op) { 410 411 case PRCO_SETOPT: 412 switch (optname) { 413 case IP_HDRINCL: 414 if (*m == 0 || (*m)->m_len < sizeof (int)) 415 error = EINVAL; 416 else { 417 if (*mtod(*m, int *)) 418 inp->inp_flags |= INP_HDRINCL; 419 else 420 inp->inp_flags &= ~INP_HDRINCL; 421 } 422 if (*m != 0) 423 (void) m_free(*m); 424 break; 425 426 #ifdef MROUTING 427 case MRT_INIT: 428 case MRT_DONE: 429 case MRT_ADD_VIF: 430 case MRT_DEL_VIF: 431 case MRT_ADD_MFC: 432 case MRT_DEL_MFC: 433 case MRT_ASSERT: 434 error = ip_mrouter_set(so, optname, m); 435 break; 436 #endif 437 438 default: 439 error = ip_ctloutput(op, so, level, optname, m); 440 break; 441 } 442 break; 443 444 case PRCO_GETOPT: 445 switch (optname) { 446 case IP_HDRINCL: 447 *m = m_get(M_WAIT, MT_SOOPTS); 448 MCLAIM((*m), so->so_mowner); 449 (*m)->m_len = sizeof (int); 450 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0; 451 break; 452 453 #ifdef MROUTING 454 case MRT_VERSION: 455 case MRT_ASSERT: 456 error = ip_mrouter_get(so, optname, m); 457 break; 458 #endif 459 460 default: 461 error = ip_ctloutput(op, so, level, optname, m); 462 break; 463 } 464 break; 465 } 466 return (error); 467 } 468 469 int 470 rip_bind(inp, nam) 471 struct inpcb *inp; 472 struct mbuf *nam; 473 { 474 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 475 476 if (nam->m_len != sizeof(*addr)) 477 return (EINVAL); 478 if (TAILQ_FIRST(&ifnet) == 0) 479 return (EADDRNOTAVAIL); 480 if (addr->sin_family != AF_INET && 481 addr->sin_family != AF_IMPLINK) 482 return (EAFNOSUPPORT); 483 if (!in_nullhost(addr->sin_addr) && 484 ifa_ifwithaddr(sintosa(addr)) == 0) 485 return (EADDRNOTAVAIL); 486 inp->inp_laddr = addr->sin_addr; 487 return (0); 488 } 489 490 int 491 rip_connect(inp, nam) 492 struct inpcb *inp; 493 struct mbuf *nam; 494 { 495 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 496 497 if (nam->m_len != sizeof(*addr)) 498 return (EINVAL); 499 if (TAILQ_FIRST(&ifnet) == 0) 500 return (EADDRNOTAVAIL); 501 if (addr->sin_family != AF_INET && 502 addr->sin_family != AF_IMPLINK) 503 return (EAFNOSUPPORT); 504 inp->inp_faddr = addr->sin_addr; 505 return (0); 506 } 507 508 void 509 rip_disconnect(inp) 510 struct inpcb *inp; 511 { 512 513 inp->inp_faddr = zeroin_addr; 514 } 515 516 u_long rip_sendspace = RIPSNDQ; 517 u_long rip_recvspace = RIPRCVQ; 518 519 /*ARGSUSED*/ 520 int 521 rip_usrreq(so, req, m, nam, control, p) 522 struct socket *so; 523 int req; 524 struct mbuf *m, *nam, *control; 525 struct proc *p; 526 { 527 struct inpcb *inp; 528 int s; 529 int error = 0; 530 #ifdef MROUTING 531 extern struct socket *ip_mrouter; 532 #endif 533 534 if (req == PRU_CONTROL) 535 return (in_control(so, (long)m, (caddr_t)nam, 536 (struct ifnet *)control, p)); 537 538 if (req == PRU_PURGEIF) { 539 in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control); 540 in_purgeif((struct ifnet *)control); 541 in_pcbpurgeif(&rawcbtable, (struct ifnet *)control); 542 return (0); 543 } 544 545 s = splsoftnet(); 546 inp = sotoinpcb(so); 547 #ifdef DIAGNOSTIC 548 if (req != PRU_SEND && req != PRU_SENDOOB && control) 549 panic("rip_usrreq: unexpected control mbuf"); 550 #endif 551 if (inp == 0 && req != PRU_ATTACH) { 552 error = EINVAL; 553 goto release; 554 } 555 556 switch (req) { 557 558 case PRU_ATTACH: 559 if (inp != 0) { 560 error = EISCONN; 561 break; 562 } 563 if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) { 564 error = EACCES; 565 break; 566 } 567 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 568 error = soreserve(so, rip_sendspace, rip_recvspace); 569 if (error) 570 break; 571 } 572 error = in_pcballoc(so, &rawcbtable); 573 if (error) 574 break; 575 inp = sotoinpcb(so); 576 inp->inp_ip.ip_p = (long)nam; 577 break; 578 579 case PRU_DETACH: 580 #ifdef MROUTING 581 if (so == ip_mrouter) 582 ip_mrouter_done(); 583 #endif 584 in_pcbdetach(inp); 585 break; 586 587 case PRU_BIND: 588 error = rip_bind(inp, nam); 589 break; 590 591 case PRU_LISTEN: 592 error = EOPNOTSUPP; 593 break; 594 595 case PRU_CONNECT: 596 error = rip_connect(inp, nam); 597 if (error) 598 break; 599 soisconnected(so); 600 break; 601 602 case PRU_CONNECT2: 603 error = EOPNOTSUPP; 604 break; 605 606 case PRU_DISCONNECT: 607 soisdisconnected(so); 608 rip_disconnect(inp); 609 break; 610 611 /* 612 * Mark the connection as being incapable of further input. 613 */ 614 case PRU_SHUTDOWN: 615 socantsendmore(so); 616 break; 617 618 case PRU_RCVD: 619 error = EOPNOTSUPP; 620 break; 621 622 /* 623 * Ship a packet out. The appropriate raw output 624 * routine handles any massaging necessary. 625 */ 626 case PRU_SEND: 627 if (control && control->m_len) { 628 m_freem(control); 629 m_freem(m); 630 error = EINVAL; 631 break; 632 } 633 { 634 if (nam) { 635 if ((so->so_state & SS_ISCONNECTED) != 0) { 636 error = EISCONN; 637 goto die; 638 } 639 error = rip_connect(inp, nam); 640 if (error) { 641 die: 642 m_freem(m); 643 break; 644 } 645 } else { 646 if ((so->so_state & SS_ISCONNECTED) == 0) { 647 error = ENOTCONN; 648 goto die; 649 } 650 } 651 error = rip_output(m, inp); 652 if (nam) 653 rip_disconnect(inp); 654 } 655 break; 656 657 case PRU_SENSE: 658 /* 659 * stat: don't bother with a blocksize. 660 */ 661 splx(s); 662 return (0); 663 664 case PRU_RCVOOB: 665 error = EOPNOTSUPP; 666 break; 667 668 case PRU_SENDOOB: 669 m_freem(control); 670 m_freem(m); 671 error = EOPNOTSUPP; 672 break; 673 674 case PRU_SOCKADDR: 675 in_setsockaddr(inp, nam); 676 break; 677 678 case PRU_PEERADDR: 679 in_setpeeraddr(inp, nam); 680 break; 681 682 default: 683 panic("rip_usrreq"); 684 } 685 686 release: 687 splx(s); 688 return (error); 689 } 690