1 /* $NetBSD: raw_ip.c,v 1.81 2004/09/04 23:30:07 manu Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1982, 1986, 1988, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.81 2004/09/04 23:30:07 manu Exp $"); 65 66 #include "opt_inet.h" 67 #include "opt_ipsec.h" 68 #include "opt_mrouting.h" 69 70 #include <sys/param.h> 71 #include <sys/malloc.h> 72 #include <sys/mbuf.h> 73 #include <sys/socket.h> 74 #include <sys/protosw.h> 75 #include <sys/socketvar.h> 76 #include <sys/errno.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 80 #include <net/if.h> 81 #include <net/route.h> 82 83 #include <netinet/in.h> 84 #include <netinet/in_systm.h> 85 #include <netinet/ip.h> 86 #include <netinet/ip_var.h> 87 #include <netinet/ip_mroute.h> 88 #include <netinet/ip_icmp.h> 89 #include <netinet/in_pcb.h> 90 #include <netinet/in_var.h> 91 92 #include <machine/stdarg.h> 93 94 #ifdef IPSEC 95 #include <netinet6/ipsec.h> 96 #endif /*IPSEC*/ 97 98 #ifdef FAST_IPSEC 99 #include <netipsec/ipsec.h> 100 #include <netipsec/ipsec_var.h> /* XXX ipsecstat namespace */ 101 #endif /* FAST_IPSEC*/ 102 103 struct inpcbtable rawcbtable; 104 105 int rip_pcbnotify __P((struct inpcbtable *, struct in_addr, 106 struct in_addr, int, int, void (*) __P((struct inpcb *, int)))); 107 int rip_bind __P((struct inpcb *, struct mbuf *)); 108 int rip_connect __P((struct inpcb *, struct mbuf *)); 109 void rip_disconnect __P((struct inpcb *)); 110 111 /* 112 * Nominal space allocated to a raw ip socket. 113 */ 114 #define RIPSNDQ 8192 115 #define RIPRCVQ 8192 116 117 /* 118 * Raw interface to IP protocol. 119 */ 120 121 /* 122 * Initialize raw connection block q. 123 */ 124 void 125 rip_init() 126 { 127 128 in_pcbinit(&rawcbtable, 1, 1); 129 } 130 131 /* 132 * Setup generic address and protocol structures 133 * for raw_input routine, then pass them along with 134 * mbuf chain. 135 */ 136 void 137 rip_input(struct mbuf *m, ...) 138 { 139 int proto; 140 struct ip *ip = mtod(m, struct ip *); 141 struct inpcb_hdr *inph; 142 struct inpcb *inp; 143 struct inpcb *last = 0; 144 struct mbuf *opts = 0; 145 struct sockaddr_in ripsrc; 146 va_list ap; 147 148 va_start(ap, m); 149 (void)va_arg(ap, int); /* ignore value, advance ap */ 150 proto = va_arg(ap, int); 151 va_end(ap); 152 153 ripsrc.sin_family = AF_INET; 154 ripsrc.sin_len = sizeof(struct sockaddr_in); 155 ripsrc.sin_addr = ip->ip_src; 156 ripsrc.sin_port = 0; 157 bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero)); 158 159 /* 160 * XXX Compatibility: programs using raw IP expect ip_len 161 * XXX to have the header length subtracted, and in host order. 162 * XXX ip_off is also expected to be host order. 163 */ 164 ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); 165 NTOHS(ip->ip_off); 166 167 CIRCLEQ_FOREACH(inph, &rawcbtable.inpt_queue, inph_queue) { 168 inp = (struct inpcb *)inph; 169 if (inp->inp_af != AF_INET) 170 continue; 171 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) 172 continue; 173 if (!in_nullhost(inp->inp_laddr) && 174 !in_hosteq(inp->inp_laddr, ip->ip_dst)) 175 continue; 176 if (!in_nullhost(inp->inp_faddr) && 177 !in_hosteq(inp->inp_faddr, ip->ip_src)) 178 continue; 179 if (last) { 180 struct mbuf *n; 181 182 #if defined(IPSEC) || defined(FAST_IPSEC) 183 /* check AH/ESP integrity. */ 184 if (ipsec4_in_reject_so(m, last->inp_socket)) { 185 ipsecstat.in_polvio++; 186 /* do not inject data to pcb */ 187 } else 188 #endif /*IPSEC*/ 189 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { 190 if (last->inp_flags & INP_CONTROLOPTS || 191 last->inp_socket->so_options & SO_TIMESTAMP) 192 ip_savecontrol(last, &opts, ip, n); 193 if (sbappendaddr(&last->inp_socket->so_rcv, 194 sintosa(&ripsrc), n, opts) == 0) { 195 /* should notify about lost packet */ 196 m_freem(n); 197 if (opts) 198 m_freem(opts); 199 } else 200 sorwakeup(last->inp_socket); 201 opts = NULL; 202 } 203 } 204 last = inp; 205 } 206 #if defined(IPSEC) || defined(FAST_IPSEC) 207 /* check AH/ESP integrity. */ 208 if (last && ipsec4_in_reject_so(m, last->inp_socket)) { 209 m_freem(m); 210 ipsecstat.in_polvio++; 211 ipstat.ips_delivered--; 212 /* do not inject data to pcb */ 213 } else 214 #endif /*IPSEC*/ 215 if (last) { 216 if (last->inp_flags & INP_CONTROLOPTS || 217 last->inp_socket->so_options & SO_TIMESTAMP) 218 ip_savecontrol(last, &opts, ip, m); 219 if (sbappendaddr(&last->inp_socket->so_rcv, 220 sintosa(&ripsrc), m, opts) == 0) { 221 m_freem(m); 222 if (opts) 223 m_freem(opts); 224 } else 225 sorwakeup(last->inp_socket); 226 } else { 227 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { 228 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 229 0, 0); 230 ipstat.ips_noproto++; 231 ipstat.ips_delivered--; 232 } else 233 m_freem(m); 234 } 235 return; 236 } 237 238 int 239 rip_pcbnotify(table, faddr, laddr, proto, errno, notify) 240 struct inpcbtable *table; 241 struct in_addr faddr, laddr; 242 int proto; 243 int errno; 244 void (*notify) __P((struct inpcb *, int)); 245 { 246 struct inpcb *inp, *ninp; 247 int nmatch; 248 249 nmatch = 0; 250 for (inp = (struct inpcb *)CIRCLEQ_FIRST(&table->inpt_queue); 251 inp != (struct inpcb *)&table->inpt_queue; 252 inp = ninp) { 253 ninp = (struct inpcb *)inp->inp_queue.cqe_next; 254 if (inp->inp_af != AF_INET) 255 continue; 256 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) 257 continue; 258 if (in_hosteq(inp->inp_faddr, faddr) && 259 in_hosteq(inp->inp_laddr, laddr)) { 260 (*notify)(inp, errno); 261 nmatch++; 262 } 263 } 264 265 return nmatch; 266 } 267 268 void * 269 rip_ctlinput(cmd, sa, v) 270 int cmd; 271 struct sockaddr *sa; 272 void *v; 273 { 274 struct ip *ip = v; 275 void (*notify) __P((struct inpcb *, int)) = in_rtchange; 276 int errno; 277 278 if (sa->sa_family != AF_INET || 279 sa->sa_len != sizeof(struct sockaddr_in)) 280 return NULL; 281 if ((unsigned)cmd >= PRC_NCMDS) 282 return NULL; 283 errno = inetctlerrmap[cmd]; 284 if (PRC_IS_REDIRECT(cmd)) 285 notify = in_rtchange, ip = 0; 286 else if (cmd == PRC_HOSTDEAD) 287 ip = 0; 288 else if (errno == 0) 289 return NULL; 290 if (ip) { 291 rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr, 292 ip->ip_src, ip->ip_p, errno, notify); 293 294 /* XXX mapped address case */ 295 } else 296 in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno, 297 notify); 298 return NULL; 299 } 300 301 /* 302 * Generate IP header and pass packet to ip_output. 303 * Tack on options user may have setup with control call. 304 */ 305 int 306 rip_output(struct mbuf *m, ...) 307 { 308 struct inpcb *inp; 309 struct ip *ip; 310 struct mbuf *opts; 311 int flags; 312 va_list ap; 313 314 va_start(ap, m); 315 inp = va_arg(ap, struct inpcb *); 316 va_end(ap); 317 318 flags = 319 (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST 320 | IP_RETURNMTU; 321 322 /* 323 * If the user handed us a complete IP packet, use it. 324 * Otherwise, allocate an mbuf for a header and fill it in. 325 */ 326 if ((inp->inp_flags & INP_HDRINCL) == 0) { 327 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 328 m_freem(m); 329 return (EMSGSIZE); 330 } 331 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 332 if (!m) 333 return (ENOBUFS); 334 ip = mtod(m, struct ip *); 335 ip->ip_tos = 0; 336 ip->ip_off = htons(0); 337 ip->ip_p = inp->inp_ip.ip_p; 338 ip->ip_len = htons(m->m_pkthdr.len); 339 ip->ip_src = inp->inp_laddr; 340 ip->ip_dst = inp->inp_faddr; 341 ip->ip_ttl = MAXTTL; 342 opts = inp->inp_options; 343 } else { 344 if (m->m_pkthdr.len > IP_MAXPACKET) { 345 m_freem(m); 346 return (EMSGSIZE); 347 } 348 ip = mtod(m, struct ip *); 349 350 /* 351 * If the mbuf is read-only, we need to allocate 352 * a new mbuf for the header, since we need to 353 * modify the header. 354 */ 355 if (M_READONLY(m)) { 356 int hlen = ip->ip_hl << 2; 357 358 m = m_copyup(m, hlen, (max_linkhdr + 3) & ~3); 359 if (m == NULL) 360 return (ENOMEM); /* XXX */ 361 ip = mtod(m, struct ip *); 362 } 363 364 /* XXX userland passes ip_len and ip_off in host order */ 365 if (m->m_pkthdr.len != ip->ip_len) { 366 m_freem(m); 367 return (EINVAL); 368 } 369 HTONS(ip->ip_len); 370 HTONS(ip->ip_off); 371 if (ip->ip_id == 0) 372 ip->ip_id = ip_newid(); 373 opts = NULL; 374 /* XXX prevent ip_output from overwriting header fields */ 375 flags |= IP_RAWOUTPUT; 376 ipstat.ips_rawout++; 377 } 378 return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions, 379 inp->inp_socket, &inp->inp_errormtu)); 380 } 381 382 /* 383 * Raw IP socket option processing. 384 */ 385 int 386 rip_ctloutput(op, so, level, optname, m) 387 int op; 388 struct socket *so; 389 int level, optname; 390 struct mbuf **m; 391 { 392 struct inpcb *inp = sotoinpcb(so); 393 int error = 0; 394 395 if (level != IPPROTO_IP) { 396 error = ENOPROTOOPT; 397 if (op == PRCO_SETOPT && *m != 0) 398 (void) m_free(*m); 399 } else switch (op) { 400 401 case PRCO_SETOPT: 402 switch (optname) { 403 case IP_HDRINCL: 404 if (*m == 0 || (*m)->m_len < sizeof (int)) 405 error = EINVAL; 406 else { 407 if (*mtod(*m, int *)) 408 inp->inp_flags |= INP_HDRINCL; 409 else 410 inp->inp_flags &= ~INP_HDRINCL; 411 } 412 if (*m != 0) 413 (void) m_free(*m); 414 break; 415 416 #ifdef MROUTING 417 case MRT_INIT: 418 case MRT_DONE: 419 case MRT_ADD_VIF: 420 case MRT_DEL_VIF: 421 case MRT_ADD_MFC: 422 case MRT_DEL_MFC: 423 case MRT_ASSERT: 424 case MRT_API_CONFIG: 425 case MRT_ADD_BW_UPCALL: 426 case MRT_DEL_BW_UPCALL: 427 error = ip_mrouter_set(so, optname, m); 428 break; 429 #endif 430 431 default: 432 error = ip_ctloutput(op, so, level, optname, m); 433 break; 434 } 435 break; 436 437 case PRCO_GETOPT: 438 switch (optname) { 439 case IP_HDRINCL: 440 *m = m_get(M_WAIT, MT_SOOPTS); 441 MCLAIM((*m), so->so_mowner); 442 (*m)->m_len = sizeof (int); 443 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0; 444 break; 445 446 #ifdef MROUTING 447 case MRT_VERSION: 448 case MRT_ASSERT: 449 case MRT_API_SUPPORT: 450 case MRT_API_CONFIG: 451 error = ip_mrouter_get(so, optname, m); 452 break; 453 #endif 454 455 default: 456 error = ip_ctloutput(op, so, level, optname, m); 457 break; 458 } 459 break; 460 } 461 return (error); 462 } 463 464 int 465 rip_bind(inp, nam) 466 struct inpcb *inp; 467 struct mbuf *nam; 468 { 469 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 470 471 if (nam->m_len != sizeof(*addr)) 472 return (EINVAL); 473 if (TAILQ_FIRST(&ifnet) == 0) 474 return (EADDRNOTAVAIL); 475 if (addr->sin_family != AF_INET && 476 addr->sin_family != AF_IMPLINK) 477 return (EAFNOSUPPORT); 478 if (!in_nullhost(addr->sin_addr) && 479 ifa_ifwithaddr(sintosa(addr)) == 0) 480 return (EADDRNOTAVAIL); 481 inp->inp_laddr = addr->sin_addr; 482 return (0); 483 } 484 485 int 486 rip_connect(inp, nam) 487 struct inpcb *inp; 488 struct mbuf *nam; 489 { 490 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 491 492 if (nam->m_len != sizeof(*addr)) 493 return (EINVAL); 494 if (TAILQ_FIRST(&ifnet) == 0) 495 return (EADDRNOTAVAIL); 496 if (addr->sin_family != AF_INET && 497 addr->sin_family != AF_IMPLINK) 498 return (EAFNOSUPPORT); 499 inp->inp_faddr = addr->sin_addr; 500 return (0); 501 } 502 503 void 504 rip_disconnect(inp) 505 struct inpcb *inp; 506 { 507 508 inp->inp_faddr = zeroin_addr; 509 } 510 511 u_long rip_sendspace = RIPSNDQ; 512 u_long rip_recvspace = RIPRCVQ; 513 514 /*ARGSUSED*/ 515 int 516 rip_usrreq(so, req, m, nam, control, p) 517 struct socket *so; 518 int req; 519 struct mbuf *m, *nam, *control; 520 struct proc *p; 521 { 522 struct inpcb *inp; 523 int s; 524 int error = 0; 525 #ifdef MROUTING 526 extern struct socket *ip_mrouter; 527 #endif 528 529 if (req == PRU_CONTROL) 530 return (in_control(so, (long)m, (caddr_t)nam, 531 (struct ifnet *)control, p)); 532 533 if (req == PRU_PURGEIF) { 534 in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control); 535 in_purgeif((struct ifnet *)control); 536 in_pcbpurgeif(&rawcbtable, (struct ifnet *)control); 537 return (0); 538 } 539 540 s = splsoftnet(); 541 inp = sotoinpcb(so); 542 #ifdef DIAGNOSTIC 543 if (req != PRU_SEND && req != PRU_SENDOOB && control) 544 panic("rip_usrreq: unexpected control mbuf"); 545 #endif 546 if (inp == 0 && req != PRU_ATTACH) { 547 error = EINVAL; 548 goto release; 549 } 550 551 switch (req) { 552 553 case PRU_ATTACH: 554 if (inp != 0) { 555 error = EISCONN; 556 break; 557 } 558 if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) { 559 error = EACCES; 560 break; 561 } 562 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 563 error = soreserve(so, rip_sendspace, rip_recvspace); 564 if (error) 565 break; 566 } 567 error = in_pcballoc(so, &rawcbtable); 568 if (error) 569 break; 570 inp = sotoinpcb(so); 571 inp->inp_ip.ip_p = (long)nam; 572 break; 573 574 case PRU_DETACH: 575 #ifdef MROUTING 576 if (so == ip_mrouter) 577 ip_mrouter_done(); 578 #endif 579 in_pcbdetach(inp); 580 break; 581 582 case PRU_BIND: 583 error = rip_bind(inp, nam); 584 break; 585 586 case PRU_LISTEN: 587 error = EOPNOTSUPP; 588 break; 589 590 case PRU_CONNECT: 591 error = rip_connect(inp, nam); 592 if (error) 593 break; 594 soisconnected(so); 595 break; 596 597 case PRU_CONNECT2: 598 error = EOPNOTSUPP; 599 break; 600 601 case PRU_DISCONNECT: 602 soisdisconnected(so); 603 rip_disconnect(inp); 604 break; 605 606 /* 607 * Mark the connection as being incapable of further input. 608 */ 609 case PRU_SHUTDOWN: 610 socantsendmore(so); 611 break; 612 613 case PRU_RCVD: 614 error = EOPNOTSUPP; 615 break; 616 617 /* 618 * Ship a packet out. The appropriate raw output 619 * routine handles any massaging necessary. 620 */ 621 case PRU_SEND: 622 if (control && control->m_len) { 623 m_freem(control); 624 m_freem(m); 625 error = EINVAL; 626 break; 627 } 628 { 629 if (nam) { 630 if ((so->so_state & SS_ISCONNECTED) != 0) { 631 error = EISCONN; 632 goto die; 633 } 634 error = rip_connect(inp, nam); 635 if (error) { 636 die: 637 m_freem(m); 638 break; 639 } 640 } else { 641 if ((so->so_state & SS_ISCONNECTED) == 0) { 642 error = ENOTCONN; 643 goto die; 644 } 645 } 646 error = rip_output(m, inp); 647 if (nam) 648 rip_disconnect(inp); 649 } 650 break; 651 652 case PRU_SENSE: 653 /* 654 * stat: don't bother with a blocksize. 655 */ 656 splx(s); 657 return (0); 658 659 case PRU_RCVOOB: 660 error = EOPNOTSUPP; 661 break; 662 663 case PRU_SENDOOB: 664 m_freem(control); 665 m_freem(m); 666 error = EOPNOTSUPP; 667 break; 668 669 case PRU_SOCKADDR: 670 in_setsockaddr(inp, nam); 671 break; 672 673 case PRU_PEERADDR: 674 in_setpeeraddr(inp, nam); 675 break; 676 677 default: 678 panic("rip_usrreq"); 679 } 680 681 release: 682 splx(s); 683 return (error); 684 } 685