1 /* $OpenBSD: raw_ip.c,v 1.127 2022/03/23 17:22:28 bluhm Exp $ */ 2 /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/protosw.h> 76 #include <sys/socketvar.h> 77 78 #include <net/if.h> 79 #include <net/if_var.h> 80 #include <net/route.h> 81 82 #include <netinet/in.h> 83 #include <netinet/ip.h> 84 #include <netinet/ip_mroute.h> 85 #include <netinet/ip_var.h> 86 #include <netinet/in_pcb.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip_icmp.h> 89 90 #include <net/pfvar.h> 91 92 #include "pf.h" 93 94 struct inpcbtable rawcbtable; 95 96 /* 97 * Nominal space allocated to a raw ip socket. 98 */ 99 #define RIPSNDQ 8192 100 #define RIPRCVQ 8192 101 102 /* 103 * Raw interface to IP protocol. 104 */ 105 106 /* 107 * Initialize raw connection block q. 108 */ 109 void 110 rip_init(void) 111 { 112 in_pcbinit(&rawcbtable, 1); 113 } 114 115 struct mbuf *rip_chkhdr(struct mbuf *, struct mbuf *); 116 117 int 118 rip_input(struct mbuf **mp, int *offp, int proto, int af) 119 { 120 struct mbuf *m = *mp; 121 struct ip *ip = mtod(m, struct ip *); 122 struct inpcb *inp; 123 SIMPLEQ_HEAD(, inpcb) inpcblist; 124 struct in_addr *key; 125 struct counters_ref ref; 126 uint64_t *counters; 127 struct sockaddr_in ripsrc; 128 129 KASSERT(af == AF_INET); 130 131 memset(&ripsrc, 0, sizeof(ripsrc)); 132 ripsrc.sin_family = AF_INET; 133 ripsrc.sin_len = sizeof(ripsrc); 134 ripsrc.sin_addr = ip->ip_src; 135 136 key = &ip->ip_dst; 137 #if NPF > 0 138 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 139 struct pf_divert *divert; 140 141 divert = pf_find_divert(m); 142 KASSERT(divert != NULL); 143 switch (divert->type) { 144 case PF_DIVERT_TO: 145 key = &divert->addr.v4; 146 break; 147 case PF_DIVERT_REPLY: 148 break; 149 default: 150 panic("%s: unknown divert type %d, mbuf %p, divert %p", 151 __func__, divert->type, m, divert); 152 } 153 } 154 #endif 155 NET_ASSERT_WLOCKED(); 156 SIMPLEQ_INIT(&inpcblist); 157 mtx_enter(&rawcbtable.inpt_mtx); 158 TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { 159 if (inp->inp_socket->so_state & SS_CANTRCVMORE) 160 continue; 161 #ifdef INET6 162 if (inp->inp_flags & INP_IPV6) 163 continue; 164 #endif 165 if (rtable_l2(inp->inp_rtableid) != 166 rtable_l2(m->m_pkthdr.ph_rtableid)) 167 continue; 168 169 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) 170 continue; 171 if (inp->inp_laddr.s_addr && 172 inp->inp_laddr.s_addr != key->s_addr) 173 continue; 174 if (inp->inp_faddr.s_addr && 175 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 176 continue; 177 178 in_pcbref(inp); 179 SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify); 180 } 181 mtx_leave(&rawcbtable.inpt_mtx); 182 183 if (SIMPLEQ_EMPTY(&inpcblist)) { 184 if (ip->ip_p != IPPROTO_ICMP) 185 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 186 0, 0); 187 else 188 m_freem(m); 189 190 counters = counters_enter(&ref, ipcounters); 191 counters[ips_noproto]++; 192 counters[ips_delivered]--; 193 counters_leave(&ref, ipcounters); 194 } 195 196 while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) { 197 struct mbuf *n, *opts = NULL; 198 199 SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify); 200 if (SIMPLEQ_EMPTY(&inpcblist)) 201 n = m; 202 else 203 n = m_copym(m, 0, M_COPYALL, M_NOWAIT); 204 if (n != NULL) { 205 if (inp->inp_flags & INP_CONTROLOPTS || 206 inp->inp_socket->so_options & SO_TIMESTAMP) 207 ip_savecontrol(inp, &opts, ip, n); 208 if (sbappendaddr(inp->inp_socket, 209 &inp->inp_socket->so_rcv, 210 sintosa(&ripsrc), n, opts) == 0) { 211 /* should notify about lost packet */ 212 m_freem(n); 213 m_freem(opts); 214 } else 215 sorwakeup(inp->inp_socket); 216 } 217 in_pcbunref(inp); 218 } 219 return IPPROTO_DONE; 220 } 221 222 /* 223 * Generate IP header and pass packet to ip_output. 224 * Tack on options user may have setup with control call. 225 */ 226 int 227 rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 228 struct mbuf *control) 229 { 230 struct sockaddr_in *dst = satosin(dstaddr); 231 struct ip *ip; 232 struct inpcb *inp; 233 int flags, error; 234 235 inp = sotoinpcb(so); 236 flags = IP_ALLOWBROADCAST; 237 238 /* 239 * If the user handed us a complete IP packet, use it. 240 * Otherwise, allocate an mbuf for a header and fill it in. 241 */ 242 if ((inp->inp_flags & INP_HDRINCL) == 0) { 243 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 244 m_freem(m); 245 return (EMSGSIZE); 246 } 247 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 248 if (!m) 249 return (ENOBUFS); 250 ip = mtod(m, struct ip *); 251 ip->ip_tos = inp->inp_ip.ip_tos; 252 ip->ip_off = htons(0); 253 ip->ip_p = inp->inp_ip.ip_p; 254 ip->ip_len = htons(m->m_pkthdr.len); 255 ip->ip_src.s_addr = INADDR_ANY; 256 ip->ip_dst = dst->sin_addr; 257 ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL; 258 } else { 259 if (m->m_pkthdr.len > IP_MAXPACKET) { 260 m_freem(m); 261 return (EMSGSIZE); 262 } 263 264 m = rip_chkhdr(m, inp->inp_options); 265 if (m == NULL) 266 return (EINVAL); 267 268 ip = mtod(m, struct ip *); 269 if (ip->ip_id == 0) 270 ip->ip_id = htons(ip_randomid()); 271 dst->sin_addr = ip->ip_dst; 272 273 /* XXX prevent ip_output from overwriting header fields */ 274 flags |= IP_RAWOUTPUT; 275 ipstat_inc(ips_rawout); 276 } 277 278 if (ip->ip_src.s_addr == INADDR_ANY) { 279 struct in_addr *laddr; 280 281 error = in_pcbselsrc(&laddr, dst, inp); 282 if (error != 0) 283 return (error); 284 285 ip->ip_src = *laddr; 286 } 287 288 #ifdef INET6 289 /* 290 * A thought: Even though raw IP shouldn't be able to set IPv6 291 * multicast options, if it does, the last parameter to 292 * ip_output should be guarded against v6/v4 problems. 293 */ 294 #endif 295 /* force routing table */ 296 m->m_pkthdr.ph_rtableid = inp->inp_rtableid; 297 298 #if NPF > 0 299 if (inp->inp_socket->so_state & SS_ISCONNECTED && 300 ip->ip_p != IPPROTO_ICMP) 301 pf_mbuf_link_inpcb(m, inp); 302 #endif 303 304 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 305 inp->inp_moptions, inp, 0); 306 return (error); 307 } 308 309 struct mbuf * 310 rip_chkhdr(struct mbuf *m, struct mbuf *options) 311 { 312 struct ip *ip; 313 int hlen, opt, optlen, cnt; 314 u_char *cp; 315 316 if (m->m_pkthdr.len < sizeof(struct ip)) { 317 m_freem(m); 318 return NULL; 319 } 320 321 m = m_pullup(m, sizeof (struct ip)); 322 if (m == NULL) 323 return NULL; 324 325 ip = mtod(m, struct ip *); 326 hlen = ip->ip_hl << 2; 327 328 /* Don't allow packet length sizes that will crash. */ 329 if (hlen < sizeof (struct ip) || 330 ntohs(ip->ip_len) < hlen || 331 ntohs(ip->ip_len) != m->m_pkthdr.len) { 332 m_freem(m); 333 return NULL; 334 } 335 m = m_pullup(m, hlen); 336 if (m == NULL) 337 return NULL; 338 339 ip = mtod(m, struct ip *); 340 341 if (ip->ip_v != IPVERSION) { 342 m_freem(m); 343 return NULL; 344 } 345 346 /* 347 * Don't allow both user specified and setsockopt options. 348 * If options are present verify them. 349 */ 350 if (hlen != sizeof(struct ip)) { 351 if (options) { 352 m_freem(m); 353 return NULL; 354 } else { 355 cp = (u_char *)(ip + 1); 356 cnt = hlen - sizeof(struct ip); 357 for (; cnt > 0; cnt -= optlen, cp += optlen) { 358 opt = cp[IPOPT_OPTVAL]; 359 if (opt == IPOPT_EOL) 360 break; 361 if (opt == IPOPT_NOP) 362 optlen = 1; 363 else { 364 if (cnt < IPOPT_OLEN + sizeof(*cp)) { 365 m_freem(m); 366 return NULL; 367 } 368 optlen = cp[IPOPT_OLEN]; 369 if (optlen < IPOPT_OLEN + sizeof(*cp) || 370 optlen > cnt) { 371 m_freem(m); 372 return NULL; 373 } 374 } 375 } 376 } 377 } 378 379 return m; 380 } 381 382 /* 383 * Raw IP socket option processing. 384 */ 385 int 386 rip_ctloutput(int op, struct socket *so, int level, int optname, 387 struct mbuf *m) 388 { 389 struct inpcb *inp = sotoinpcb(so); 390 int error; 391 392 if (level != IPPROTO_IP) 393 return (EINVAL); 394 395 switch (optname) { 396 397 case IP_HDRINCL: 398 error = 0; 399 if (op == PRCO_SETOPT) { 400 if (m == NULL || m->m_len < sizeof (int)) 401 error = EINVAL; 402 else if (*mtod(m, int *)) 403 inp->inp_flags |= INP_HDRINCL; 404 else 405 inp->inp_flags &= ~INP_HDRINCL; 406 } else { 407 m->m_len = sizeof(int); 408 *mtod(m, int *) = inp->inp_flags & INP_HDRINCL; 409 } 410 return (error); 411 412 case MRT_INIT: 413 case MRT_DONE: 414 case MRT_ADD_VIF: 415 case MRT_DEL_VIF: 416 case MRT_ADD_MFC: 417 case MRT_DEL_MFC: 418 case MRT_VERSION: 419 case MRT_ASSERT: 420 case MRT_API_SUPPORT: 421 case MRT_API_CONFIG: 422 #ifdef MROUTING 423 switch (op) { 424 case PRCO_SETOPT: 425 error = ip_mrouter_set(so, optname, m); 426 break; 427 case PRCO_GETOPT: 428 error = ip_mrouter_get(so, optname, m); 429 break; 430 default: 431 error = EINVAL; 432 break; 433 } 434 return (error); 435 #else 436 return (EOPNOTSUPP); 437 #endif 438 } 439 return (ip_ctloutput(op, so, level, optname, m)); 440 } 441 442 u_long rip_sendspace = RIPSNDQ; 443 u_long rip_recvspace = RIPRCVQ; 444 445 /*ARGSUSED*/ 446 int 447 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 448 struct mbuf *control, struct proc *p) 449 { 450 struct inpcb *inp; 451 int error = 0; 452 453 if (req == PRU_CONTROL) 454 return (in_control(so, (u_long)m, (caddr_t)nam, 455 (struct ifnet *)control)); 456 457 soassertlocked(so); 458 459 inp = sotoinpcb(so); 460 if (inp == NULL) { 461 error = EINVAL; 462 goto release; 463 } 464 465 switch (req) { 466 467 case PRU_DISCONNECT: 468 if ((so->so_state & SS_ISCONNECTED) == 0) { 469 error = ENOTCONN; 470 break; 471 } 472 soisdisconnected(so); 473 inp->inp_faddr.s_addr = INADDR_ANY; 474 break; 475 case PRU_ABORT: 476 soisdisconnected(so); 477 if (inp == NULL) 478 panic("rip_abort"); 479 #ifdef MROUTING 480 if (so == ip_mrouter[inp->inp_rtableid]) 481 ip_mrouter_done(so); 482 #endif 483 in_pcbdetach(inp); 484 break; 485 486 case PRU_BIND: 487 { 488 struct sockaddr_in *addr; 489 490 if ((error = in_nam2sin(nam, &addr))) 491 break; 492 if (!((so->so_options & SO_BINDANY) || 493 addr->sin_addr.s_addr == INADDR_ANY || 494 addr->sin_addr.s_addr == INADDR_BROADCAST || 495 in_broadcast(addr->sin_addr, inp->inp_rtableid) || 496 ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) { 497 error = EADDRNOTAVAIL; 498 break; 499 } 500 inp->inp_laddr = addr->sin_addr; 501 break; 502 } 503 case PRU_CONNECT: 504 { 505 struct sockaddr_in *addr; 506 507 if ((error = in_nam2sin(nam, &addr))) 508 break; 509 inp->inp_faddr = addr->sin_addr; 510 soisconnected(so); 511 break; 512 } 513 514 case PRU_CONNECT2: 515 error = EOPNOTSUPP; 516 break; 517 518 /* 519 * Mark the connection as being incapable of further input. 520 */ 521 case PRU_SHUTDOWN: 522 socantsendmore(so); 523 break; 524 525 /* 526 * Ship a packet out. The appropriate raw output 527 * routine handles any massaging necessary. 528 */ 529 case PRU_SEND: 530 { 531 struct sockaddr_in dst; 532 533 memset(&dst, 0, sizeof(dst)); 534 dst.sin_family = AF_INET; 535 dst.sin_len = sizeof(dst); 536 if (so->so_state & SS_ISCONNECTED) { 537 if (nam) { 538 error = EISCONN; 539 break; 540 } 541 dst.sin_addr = inp->inp_faddr; 542 } else { 543 struct sockaddr_in *addr; 544 545 if (nam == NULL) { 546 error = ENOTCONN; 547 break; 548 } 549 if ((error = in_nam2sin(nam, &addr))) 550 break; 551 dst.sin_addr = addr->sin_addr; 552 } 553 #ifdef IPSEC 554 /* XXX Find an IPsec TDB */ 555 #endif 556 error = rip_output(m, so, sintosa(&dst), NULL); 557 m = NULL; 558 break; 559 } 560 561 case PRU_SENSE: 562 /* 563 * stat: don't bother with a blocksize. 564 */ 565 break; 566 567 /* 568 * Not supported. 569 */ 570 case PRU_LISTEN: 571 case PRU_ACCEPT: 572 case PRU_SENDOOB: 573 case PRU_RCVD: 574 case PRU_RCVOOB: 575 error = EOPNOTSUPP; 576 break; 577 578 case PRU_SOCKADDR: 579 in_setsockaddr(inp, nam); 580 break; 581 582 case PRU_PEERADDR: 583 in_setpeeraddr(inp, nam); 584 break; 585 586 default: 587 panic("rip_usrreq"); 588 } 589 release: 590 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 591 m_freem(control); 592 m_freem(m); 593 } 594 return (error); 595 } 596 597 int 598 rip_attach(struct socket *so, int proto) 599 { 600 struct inpcb *inp; 601 int error; 602 603 if (so->so_pcb) 604 panic("rip_attach"); 605 if ((so->so_state & SS_PRIV) == 0) 606 return EACCES; 607 if (proto < 0 || proto >= IPPROTO_MAX) 608 return EPROTONOSUPPORT; 609 610 if ((error = soreserve(so, rip_sendspace, rip_recvspace))) 611 return error; 612 NET_ASSERT_LOCKED(); 613 if ((error = in_pcballoc(so, &rawcbtable))) 614 return error; 615 inp = sotoinpcb(so); 616 inp->inp_ip.ip_p = proto; 617 return 0; 618 } 619 620 int 621 rip_detach(struct socket *so) 622 { 623 struct inpcb *inp = sotoinpcb(so); 624 625 soassertlocked(so); 626 627 if (inp == NULL) 628 return (EINVAL); 629 630 #ifdef MROUTING 631 if (so == ip_mrouter[inp->inp_rtableid]) 632 ip_mrouter_done(so); 633 #endif 634 in_pcbdetach(inp); 635 636 return (0); 637 } 638