1 /* $NetBSD: raw_ip.c,v 1.71 2003/08/07 16:33:14 agc Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1982, 1986, 1988, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.71 2003/08/07 16:33:14 agc Exp $"); 65 66 #include "opt_ipsec.h" 67 #include "opt_mrouting.h" 68 69 #include <sys/param.h> 70 #include <sys/malloc.h> 71 #include <sys/mbuf.h> 72 #include <sys/socket.h> 73 #include <sys/protosw.h> 74 #include <sys/socketvar.h> 75 #include <sys/errno.h> 76 #include <sys/systm.h> 77 #include <sys/proc.h> 78 79 #include <net/if.h> 80 #include <net/route.h> 81 82 #include <netinet/in.h> 83 #include <netinet/in_systm.h> 84 #include <netinet/ip.h> 85 #include <netinet/ip_var.h> 86 #include <netinet/ip_mroute.h> 87 #include <netinet/ip_icmp.h> 88 #include <netinet/in_pcb.h> 89 #include <netinet/in_var.h> 90 91 #include <machine/stdarg.h> 92 93 #ifdef IPSEC 94 #include <netinet6/ipsec.h> 95 #endif /*IPSEC*/ 96 97 struct inpcbtable rawcbtable; 98 99 int rip_pcbnotify __P((struct inpcbtable *, struct in_addr, 100 struct in_addr, int, int, void (*) __P((struct inpcb *, int)))); 101 int rip_bind __P((struct inpcb *, struct mbuf *)); 102 int rip_connect __P((struct inpcb *, struct mbuf *)); 103 void rip_disconnect __P((struct inpcb *)); 104 105 /* 106 * Nominal space allocated to a raw ip socket. 107 */ 108 #define RIPSNDQ 8192 109 #define RIPRCVQ 8192 110 111 /* 112 * Raw interface to IP protocol. 113 */ 114 115 /* 116 * Initialize raw connection block q. 117 */ 118 void 119 rip_init() 120 { 121 122 in_pcbinit(&rawcbtable, 1, 1); 123 } 124 125 /* 126 * Setup generic address and protocol structures 127 * for raw_input routine, then pass them along with 128 * mbuf chain. 129 */ 130 void 131 #if __STDC__ 132 rip_input(struct mbuf *m, ...) 133 #else 134 rip_input(m, va_alist) 135 struct mbuf *m; 136 va_dcl 137 #endif 138 { 139 int proto; 140 struct ip *ip = mtod(m, struct ip *); 141 struct inpcb *inp; 142 struct inpcb *last = 0; 143 struct mbuf *opts = 0; 144 struct sockaddr_in ripsrc; 145 va_list ap; 146 147 va_start(ap, m); 148 (void)va_arg(ap, int); /* ignore value, advance ap */ 149 proto = va_arg(ap, int); 150 va_end(ap); 151 152 ripsrc.sin_family = AF_INET; 153 ripsrc.sin_len = sizeof(struct sockaddr_in); 154 ripsrc.sin_addr = ip->ip_src; 155 ripsrc.sin_port = 0; 156 bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero)); 157 158 /* 159 * XXX Compatibility: programs using raw IP expect ip_len 160 * XXX to have the header length subtracted, and in host order. 161 * XXX ip_off is also expected to be host order. 162 */ 163 ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); 164 NTOHS(ip->ip_off); 165 166 CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { 167 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) 168 continue; 169 if (!in_nullhost(inp->inp_laddr) && 170 !in_hosteq(inp->inp_laddr, ip->ip_dst)) 171 continue; 172 if (!in_nullhost(inp->inp_faddr) && 173 !in_hosteq(inp->inp_faddr, ip->ip_src)) 174 continue; 175 if (last) { 176 struct mbuf *n; 177 178 #ifdef IPSEC 179 /* check AH/ESP integrity. */ 180 if (ipsec4_in_reject_so(m, last->inp_socket)) { 181 ipsecstat.in_polvio++; 182 /* do not inject data to pcb */ 183 } else 184 #endif /*IPSEC*/ 185 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { 186 if (last->inp_flags & INP_CONTROLOPTS || 187 last->inp_socket->so_options & SO_TIMESTAMP) 188 ip_savecontrol(last, &opts, ip, n); 189 if (sbappendaddr(&last->inp_socket->so_rcv, 190 sintosa(&ripsrc), n, opts) == 0) { 191 /* should notify about lost packet */ 192 m_freem(n); 193 if (opts) 194 m_freem(opts); 195 } else 196 sorwakeup(last->inp_socket); 197 opts = NULL; 198 } 199 } 200 last = inp; 201 } 202 #ifdef IPSEC 203 /* check AH/ESP integrity. */ 204 if (last && ipsec4_in_reject_so(m, last->inp_socket)) { 205 m_freem(m); 206 ipsecstat.in_polvio++; 207 ipstat.ips_delivered--; 208 /* do not inject data to pcb */ 209 } else 210 #endif /*IPSEC*/ 211 if (last) { 212 if (last->inp_flags & INP_CONTROLOPTS || 213 last->inp_socket->so_options & SO_TIMESTAMP) 214 ip_savecontrol(last, &opts, ip, m); 215 if (sbappendaddr(&last->inp_socket->so_rcv, 216 sintosa(&ripsrc), m, opts) == 0) { 217 m_freem(m); 218 if (opts) 219 m_freem(opts); 220 } else 221 sorwakeup(last->inp_socket); 222 } else { 223 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { 224 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 225 0, 0); 226 ipstat.ips_noproto++; 227 ipstat.ips_delivered--; 228 } else 229 m_freem(m); 230 } 231 return; 232 } 233 234 int 235 rip_pcbnotify(table, faddr, laddr, proto, errno, notify) 236 struct inpcbtable *table; 237 struct in_addr faddr, laddr; 238 int proto; 239 int errno; 240 void (*notify) __P((struct inpcb *, int)); 241 { 242 struct inpcb *inp, *ninp; 243 int nmatch; 244 245 nmatch = 0; 246 for (inp = CIRCLEQ_FIRST(&table->inpt_queue); 247 inp != (struct inpcb *)&table->inpt_queue; 248 inp = ninp) { 249 ninp = inp->inp_queue.cqe_next; 250 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) 251 continue; 252 if (in_hosteq(inp->inp_faddr, faddr) && 253 in_hosteq(inp->inp_laddr, laddr)) { 254 (*notify)(inp, errno); 255 nmatch++; 256 } 257 } 258 259 return nmatch; 260 } 261 262 void * 263 rip_ctlinput(cmd, sa, v) 264 int cmd; 265 struct sockaddr *sa; 266 void *v; 267 { 268 struct ip *ip = v; 269 void (*notify) __P((struct inpcb *, int)) = in_rtchange; 270 int errno; 271 272 if (sa->sa_family != AF_INET || 273 sa->sa_len != sizeof(struct sockaddr_in)) 274 return NULL; 275 if ((unsigned)cmd >= PRC_NCMDS) 276 return NULL; 277 errno = inetctlerrmap[cmd]; 278 if (PRC_IS_REDIRECT(cmd)) 279 notify = in_rtchange, ip = 0; 280 else if (cmd == PRC_HOSTDEAD) 281 ip = 0; 282 else if (errno == 0) 283 return NULL; 284 if (ip) { 285 rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr, 286 ip->ip_src, ip->ip_p, errno, notify); 287 288 /* XXX mapped address case */ 289 } else 290 in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno, 291 notify); 292 return NULL; 293 } 294 295 /* 296 * Generate IP header and pass packet to ip_output. 297 * Tack on options user may have setup with control call. 298 */ 299 int 300 #if __STDC__ 301 rip_output(struct mbuf *m, ...) 302 #else 303 rip_output(m, va_alist) 304 struct mbuf *m; 305 va_dcl 306 #endif 307 { 308 struct inpcb *inp; 309 struct ip *ip; 310 struct mbuf *opts; 311 int flags; 312 va_list ap; 313 314 va_start(ap, m); 315 inp = va_arg(ap, struct inpcb *); 316 va_end(ap); 317 318 flags = 319 (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST 320 | IP_RETURNMTU; 321 322 /* 323 * If the user handed us a complete IP packet, use it. 324 * Otherwise, allocate an mbuf for a header and fill it in. 325 */ 326 if ((inp->inp_flags & INP_HDRINCL) == 0) { 327 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 328 m_freem(m); 329 return (EMSGSIZE); 330 } 331 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 332 if (!m) 333 return (ENOBUFS); 334 ip = mtod(m, struct ip *); 335 ip->ip_tos = 0; 336 ip->ip_off = htons(0); 337 ip->ip_p = inp->inp_ip.ip_p; 338 ip->ip_len = htons(m->m_pkthdr.len); 339 ip->ip_src = inp->inp_laddr; 340 ip->ip_dst = inp->inp_faddr; 341 ip->ip_ttl = MAXTTL; 342 opts = inp->inp_options; 343 } else { 344 if (m->m_pkthdr.len > IP_MAXPACKET) { 345 m_freem(m); 346 return (EMSGSIZE); 347 } 348 ip = mtod(m, struct ip *); 349 350 /* 351 * If the mbuf is read-only, we need to allocate 352 * a new mbuf for the header, since we need to 353 * modify the header. 354 */ 355 if (M_READONLY(m)) { 356 int hlen = ip->ip_hl << 2; 357 358 m = m_copyup(m, hlen, (max_linkhdr + 3) & ~3); 359 if (m == NULL) 360 return (ENOMEM); /* XXX */ 361 ip = mtod(m, struct ip *); 362 } 363 364 /* XXX userland passes ip_len and ip_off in host order */ 365 if (m->m_pkthdr.len != ip->ip_len) { 366 m_freem(m); 367 return (EINVAL); 368 } 369 HTONS(ip->ip_len); 370 HTONS(ip->ip_off); 371 if (ip->ip_id == 0) 372 ip->ip_id = htons(ip_id++); 373 opts = NULL; 374 /* XXX prevent ip_output from overwriting header fields */ 375 flags |= IP_RAWOUTPUT; 376 ipstat.ips_rawout++; 377 } 378 #ifdef IPSEC 379 if (ipsec_setsocket(m, inp->inp_socket) != 0) { 380 m_freem(m); 381 return ENOBUFS; 382 } 383 #endif /*IPSEC*/ 384 return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions, 385 &inp->inp_errormtu)); 386 } 387 388 /* 389 * Raw IP socket option processing. 390 */ 391 int 392 rip_ctloutput(op, so, level, optname, m) 393 int op; 394 struct socket *so; 395 int level, optname; 396 struct mbuf **m; 397 { 398 struct inpcb *inp = sotoinpcb(so); 399 int error = 0; 400 401 if (level != IPPROTO_IP) { 402 error = ENOPROTOOPT; 403 if (op == PRCO_SETOPT && *m != 0) 404 (void) m_free(*m); 405 } else switch (op) { 406 407 case PRCO_SETOPT: 408 switch (optname) { 409 case IP_HDRINCL: 410 if (*m == 0 || (*m)->m_len < sizeof (int)) 411 error = EINVAL; 412 else { 413 if (*mtod(*m, int *)) 414 inp->inp_flags |= INP_HDRINCL; 415 else 416 inp->inp_flags &= ~INP_HDRINCL; 417 } 418 if (*m != 0) 419 (void) m_free(*m); 420 break; 421 422 #ifdef MROUTING 423 case MRT_INIT: 424 case MRT_DONE: 425 case MRT_ADD_VIF: 426 case MRT_DEL_VIF: 427 case MRT_ADD_MFC: 428 case MRT_DEL_MFC: 429 case MRT_ASSERT: 430 error = ip_mrouter_set(so, optname, m); 431 break; 432 #endif 433 434 default: 435 error = ip_ctloutput(op, so, level, optname, m); 436 break; 437 } 438 break; 439 440 case PRCO_GETOPT: 441 switch (optname) { 442 case IP_HDRINCL: 443 *m = m_get(M_WAIT, MT_SOOPTS); 444 MCLAIM((*m), so->so_mowner); 445 (*m)->m_len = sizeof (int); 446 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0; 447 break; 448 449 #ifdef MROUTING 450 case MRT_VERSION: 451 case MRT_ASSERT: 452 error = ip_mrouter_get(so, optname, m); 453 break; 454 #endif 455 456 default: 457 error = ip_ctloutput(op, so, level, optname, m); 458 break; 459 } 460 break; 461 } 462 return (error); 463 } 464 465 int 466 rip_bind(inp, nam) 467 struct inpcb *inp; 468 struct mbuf *nam; 469 { 470 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 471 472 if (nam->m_len != sizeof(*addr)) 473 return (EINVAL); 474 if (TAILQ_FIRST(&ifnet) == 0) 475 return (EADDRNOTAVAIL); 476 if (addr->sin_family != AF_INET && 477 addr->sin_family != AF_IMPLINK) 478 return (EAFNOSUPPORT); 479 if (!in_nullhost(addr->sin_addr) && 480 ifa_ifwithaddr(sintosa(addr)) == 0) 481 return (EADDRNOTAVAIL); 482 inp->inp_laddr = addr->sin_addr; 483 return (0); 484 } 485 486 int 487 rip_connect(inp, nam) 488 struct inpcb *inp; 489 struct mbuf *nam; 490 { 491 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 492 493 if (nam->m_len != sizeof(*addr)) 494 return (EINVAL); 495 if (TAILQ_FIRST(&ifnet) == 0) 496 return (EADDRNOTAVAIL); 497 if (addr->sin_family != AF_INET && 498 addr->sin_family != AF_IMPLINK) 499 return (EAFNOSUPPORT); 500 inp->inp_faddr = addr->sin_addr; 501 return (0); 502 } 503 504 void 505 rip_disconnect(inp) 506 struct inpcb *inp; 507 { 508 509 inp->inp_faddr = zeroin_addr; 510 } 511 512 u_long rip_sendspace = RIPSNDQ; 513 u_long rip_recvspace = RIPRCVQ; 514 515 /*ARGSUSED*/ 516 int 517 rip_usrreq(so, req, m, nam, control, p) 518 struct socket *so; 519 int req; 520 struct mbuf *m, *nam, *control; 521 struct proc *p; 522 { 523 struct inpcb *inp; 524 int s; 525 int error = 0; 526 #ifdef MROUTING 527 extern struct socket *ip_mrouter; 528 #endif 529 530 if (req == PRU_CONTROL) 531 return (in_control(so, (long)m, (caddr_t)nam, 532 (struct ifnet *)control, p)); 533 534 if (req == PRU_PURGEIF) { 535 in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control); 536 in_purgeif((struct ifnet *)control); 537 in_pcbpurgeif(&rawcbtable, (struct ifnet *)control); 538 return (0); 539 } 540 541 s = splsoftnet(); 542 inp = sotoinpcb(so); 543 #ifdef DIAGNOSTIC 544 if (req != PRU_SEND && req != PRU_SENDOOB && control) 545 panic("rip_usrreq: unexpected control mbuf"); 546 #endif 547 if (inp == 0 && req != PRU_ATTACH) { 548 error = EINVAL; 549 goto release; 550 } 551 552 switch (req) { 553 554 case PRU_ATTACH: 555 if (inp != 0) { 556 error = EISCONN; 557 break; 558 } 559 if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) { 560 error = EACCES; 561 break; 562 } 563 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 564 error = soreserve(so, rip_sendspace, rip_recvspace); 565 if (error) 566 break; 567 } 568 error = in_pcballoc(so, &rawcbtable); 569 if (error) 570 break; 571 inp = sotoinpcb(so); 572 inp->inp_ip.ip_p = (long)nam; 573 break; 574 575 case PRU_DETACH: 576 #ifdef MROUTING 577 if (so == ip_mrouter) 578 ip_mrouter_done(); 579 #endif 580 in_pcbdetach(inp); 581 break; 582 583 case PRU_BIND: 584 error = rip_bind(inp, nam); 585 break; 586 587 case PRU_LISTEN: 588 error = EOPNOTSUPP; 589 break; 590 591 case PRU_CONNECT: 592 error = rip_connect(inp, nam); 593 if (error) 594 break; 595 soisconnected(so); 596 break; 597 598 case PRU_CONNECT2: 599 error = EOPNOTSUPP; 600 break; 601 602 case PRU_DISCONNECT: 603 soisdisconnected(so); 604 rip_disconnect(inp); 605 break; 606 607 /* 608 * Mark the connection as being incapable of further input. 609 */ 610 case PRU_SHUTDOWN: 611 socantsendmore(so); 612 break; 613 614 case PRU_RCVD: 615 error = EOPNOTSUPP; 616 break; 617 618 /* 619 * Ship a packet out. The appropriate raw output 620 * routine handles any massaging necessary. 621 */ 622 case PRU_SEND: 623 if (control && control->m_len) { 624 m_freem(control); 625 m_freem(m); 626 error = EINVAL; 627 break; 628 } 629 { 630 if (nam) { 631 if ((so->so_state & SS_ISCONNECTED) != 0) { 632 error = EISCONN; 633 goto die; 634 } 635 error = rip_connect(inp, nam); 636 if (error) { 637 die: 638 m_freem(m); 639 break; 640 } 641 } else { 642 if ((so->so_state & SS_ISCONNECTED) == 0) { 643 error = ENOTCONN; 644 goto die; 645 } 646 } 647 error = rip_output(m, inp); 648 if (nam) 649 rip_disconnect(inp); 650 } 651 break; 652 653 case PRU_SENSE: 654 /* 655 * stat: don't bother with a blocksize. 656 */ 657 splx(s); 658 return (0); 659 660 case PRU_RCVOOB: 661 error = EOPNOTSUPP; 662 break; 663 664 case PRU_SENDOOB: 665 m_freem(control); 666 m_freem(m); 667 error = EOPNOTSUPP; 668 break; 669 670 case PRU_SOCKADDR: 671 in_setsockaddr(inp, nam); 672 break; 673 674 case PRU_PEERADDR: 675 in_setpeeraddr(inp, nam); 676 break; 677 678 default: 679 panic("rip_usrreq"); 680 } 681 682 release: 683 splx(s); 684 return (error); 685 } 686