1 /* $OpenBSD: raw_ip.c,v 1.128 2022/05/15 09:12:20 dlg Exp $ */ 2 /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/protosw.h> 76 #include <sys/socketvar.h> 77 78 #include <net/if.h> 79 #include <net/if_var.h> 80 #include <net/route.h> 81 82 #include <netinet/in.h> 83 #include <netinet/ip.h> 84 #include <netinet/ip_mroute.h> 85 #include <netinet/ip_var.h> 86 #include <netinet/in_pcb.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip_icmp.h> 89 90 #include <net/pfvar.h> 91 92 #include "pf.h" 93 94 struct inpcbtable rawcbtable; 95 96 /* 97 * Nominal space allocated to a raw ip socket. 98 */ 99 #define RIPSNDQ 8192 100 #define RIPRCVQ 8192 101 102 /* 103 * Raw interface to IP protocol. 104 */ 105 106 /* 107 * Initialize raw connection block q. 108 */ 109 void 110 rip_init(void) 111 { 112 in_pcbinit(&rawcbtable, 1); 113 } 114 115 struct mbuf *rip_chkhdr(struct mbuf *, struct mbuf *); 116 117 int 118 rip_input(struct mbuf **mp, int *offp, int proto, int af) 119 { 120 struct mbuf *m = *mp; 121 struct ip *ip = mtod(m, struct ip *); 122 struct inpcb *inp; 123 SIMPLEQ_HEAD(, inpcb) inpcblist; 124 struct in_addr *key; 125 struct counters_ref ref; 126 uint64_t *counters; 127 struct sockaddr_in ripsrc; 128 129 KASSERT(af == AF_INET); 130 131 memset(&ripsrc, 0, sizeof(ripsrc)); 132 ripsrc.sin_family = AF_INET; 133 ripsrc.sin_len = sizeof(ripsrc); 134 ripsrc.sin_addr = ip->ip_src; 135 136 key = &ip->ip_dst; 137 #if NPF > 0 138 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 139 struct pf_divert *divert; 140 141 divert = pf_find_divert(m); 142 KASSERT(divert != NULL); 143 switch (divert->type) { 144 case PF_DIVERT_TO: 145 key = &divert->addr.v4; 146 break; 147 case PF_DIVERT_REPLY: 148 break; 149 default: 150 panic("%s: unknown divert type %d, mbuf %p, divert %p", 151 __func__, divert->type, m, divert); 152 } 153 } 154 #endif 155 NET_ASSERT_WLOCKED(); 156 SIMPLEQ_INIT(&inpcblist); 157 mtx_enter(&rawcbtable.inpt_mtx); 158 TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { 159 if (inp->inp_socket->so_state & SS_CANTRCVMORE) 160 continue; 161 #ifdef INET6 162 if (inp->inp_flags & INP_IPV6) 163 continue; 164 #endif 165 if (rtable_l2(inp->inp_rtableid) != 166 rtable_l2(m->m_pkthdr.ph_rtableid)) 167 continue; 168 169 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) 170 continue; 171 if (inp->inp_laddr.s_addr && 172 inp->inp_laddr.s_addr != key->s_addr) 173 continue; 174 if (inp->inp_faddr.s_addr && 175 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 176 continue; 177 178 in_pcbref(inp); 179 SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify); 180 } 181 mtx_leave(&rawcbtable.inpt_mtx); 182 183 if (SIMPLEQ_EMPTY(&inpcblist)) { 184 if (ip->ip_p != IPPROTO_ICMP) 185 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 186 0, 0); 187 else 188 m_freem(m); 189 190 counters = counters_enter(&ref, ipcounters); 191 counters[ips_noproto]++; 192 counters[ips_delivered]--; 193 counters_leave(&ref, ipcounters); 194 } 195 196 while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) { 197 struct mbuf *n, *opts = NULL; 198 199 SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify); 200 if (SIMPLEQ_EMPTY(&inpcblist)) 201 n = m; 202 else 203 n = m_copym(m, 0, M_COPYALL, M_NOWAIT); 204 if (n != NULL) { 205 if (inp->inp_flags & INP_CONTROLOPTS || 206 inp->inp_socket->so_options & SO_TIMESTAMP) 207 ip_savecontrol(inp, &opts, ip, n); 208 if (sbappendaddr(inp->inp_socket, 209 &inp->inp_socket->so_rcv, 210 sintosa(&ripsrc), n, opts) == 0) { 211 /* should notify about lost packet */ 212 m_freem(n); 213 m_freem(opts); 214 } else 215 sorwakeup(inp->inp_socket); 216 } 217 in_pcbunref(inp); 218 } 219 return IPPROTO_DONE; 220 } 221 222 /* 223 * Generate IP header and pass packet to ip_output. 224 * Tack on options user may have setup with control call. 225 */ 226 int 227 rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 228 struct mbuf *control) 229 { 230 struct sockaddr_in *dst = satosin(dstaddr); 231 struct ip *ip; 232 struct inpcb *inp; 233 int flags, error; 234 235 inp = sotoinpcb(so); 236 flags = IP_ALLOWBROADCAST; 237 238 /* 239 * If the user handed us a complete IP packet, use it. 240 * Otherwise, allocate an mbuf for a header and fill it in. 241 */ 242 if ((inp->inp_flags & INP_HDRINCL) == 0) { 243 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 244 m_freem(m); 245 return (EMSGSIZE); 246 } 247 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 248 if (!m) 249 return (ENOBUFS); 250 ip = mtod(m, struct ip *); 251 ip->ip_tos = inp->inp_ip.ip_tos; 252 ip->ip_off = htons(0); 253 ip->ip_p = inp->inp_ip.ip_p; 254 ip->ip_len = htons(m->m_pkthdr.len); 255 ip->ip_src.s_addr = INADDR_ANY; 256 ip->ip_dst = dst->sin_addr; 257 ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL; 258 } else { 259 if (m->m_pkthdr.len > IP_MAXPACKET) { 260 m_freem(m); 261 return (EMSGSIZE); 262 } 263 264 m = rip_chkhdr(m, inp->inp_options); 265 if (m == NULL) 266 return (EINVAL); 267 268 ip = mtod(m, struct ip *); 269 if (ip->ip_id == 0) 270 ip->ip_id = htons(ip_randomid()); 271 dst->sin_addr = ip->ip_dst; 272 273 /* XXX prevent ip_output from overwriting header fields */ 274 flags |= IP_RAWOUTPUT; 275 ipstat_inc(ips_rawout); 276 } 277 278 if (ip->ip_src.s_addr == INADDR_ANY) { 279 error = in_pcbselsrc(&ip->ip_src, dst, inp); 280 if (error != 0) 281 return (error); 282 } 283 284 #ifdef INET6 285 /* 286 * A thought: Even though raw IP shouldn't be able to set IPv6 287 * multicast options, if it does, the last parameter to 288 * ip_output should be guarded against v6/v4 problems. 289 */ 290 #endif 291 /* force routing table */ 292 m->m_pkthdr.ph_rtableid = inp->inp_rtableid; 293 294 #if NPF > 0 295 if (inp->inp_socket->so_state & SS_ISCONNECTED && 296 ip->ip_p != IPPROTO_ICMP) 297 pf_mbuf_link_inpcb(m, inp); 298 #endif 299 300 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 301 inp->inp_moptions, inp, 0); 302 return (error); 303 } 304 305 struct mbuf * 306 rip_chkhdr(struct mbuf *m, struct mbuf *options) 307 { 308 struct ip *ip; 309 int hlen, opt, optlen, cnt; 310 u_char *cp; 311 312 if (m->m_pkthdr.len < sizeof(struct ip)) { 313 m_freem(m); 314 return NULL; 315 } 316 317 m = m_pullup(m, sizeof (struct ip)); 318 if (m == NULL) 319 return NULL; 320 321 ip = mtod(m, struct ip *); 322 hlen = ip->ip_hl << 2; 323 324 /* Don't allow packet length sizes that will crash. */ 325 if (hlen < sizeof (struct ip) || 326 ntohs(ip->ip_len) < hlen || 327 ntohs(ip->ip_len) != m->m_pkthdr.len) { 328 m_freem(m); 329 return NULL; 330 } 331 m = m_pullup(m, hlen); 332 if (m == NULL) 333 return NULL; 334 335 ip = mtod(m, struct ip *); 336 337 if (ip->ip_v != IPVERSION) { 338 m_freem(m); 339 return NULL; 340 } 341 342 /* 343 * Don't allow both user specified and setsockopt options. 344 * If options are present verify them. 345 */ 346 if (hlen != sizeof(struct ip)) { 347 if (options) { 348 m_freem(m); 349 return NULL; 350 } else { 351 cp = (u_char *)(ip + 1); 352 cnt = hlen - sizeof(struct ip); 353 for (; cnt > 0; cnt -= optlen, cp += optlen) { 354 opt = cp[IPOPT_OPTVAL]; 355 if (opt == IPOPT_EOL) 356 break; 357 if (opt == IPOPT_NOP) 358 optlen = 1; 359 else { 360 if (cnt < IPOPT_OLEN + sizeof(*cp)) { 361 m_freem(m); 362 return NULL; 363 } 364 optlen = cp[IPOPT_OLEN]; 365 if (optlen < IPOPT_OLEN + sizeof(*cp) || 366 optlen > cnt) { 367 m_freem(m); 368 return NULL; 369 } 370 } 371 } 372 } 373 } 374 375 return m; 376 } 377 378 /* 379 * Raw IP socket option processing. 380 */ 381 int 382 rip_ctloutput(int op, struct socket *so, int level, int optname, 383 struct mbuf *m) 384 { 385 struct inpcb *inp = sotoinpcb(so); 386 int error; 387 388 if (level != IPPROTO_IP) 389 return (EINVAL); 390 391 switch (optname) { 392 393 case IP_HDRINCL: 394 error = 0; 395 if (op == PRCO_SETOPT) { 396 if (m == NULL || m->m_len < sizeof (int)) 397 error = EINVAL; 398 else if (*mtod(m, int *)) 399 inp->inp_flags |= INP_HDRINCL; 400 else 401 inp->inp_flags &= ~INP_HDRINCL; 402 } else { 403 m->m_len = sizeof(int); 404 *mtod(m, int *) = inp->inp_flags & INP_HDRINCL; 405 } 406 return (error); 407 408 case MRT_INIT: 409 case MRT_DONE: 410 case MRT_ADD_VIF: 411 case MRT_DEL_VIF: 412 case MRT_ADD_MFC: 413 case MRT_DEL_MFC: 414 case MRT_VERSION: 415 case MRT_ASSERT: 416 case MRT_API_SUPPORT: 417 case MRT_API_CONFIG: 418 #ifdef MROUTING 419 switch (op) { 420 case PRCO_SETOPT: 421 error = ip_mrouter_set(so, optname, m); 422 break; 423 case PRCO_GETOPT: 424 error = ip_mrouter_get(so, optname, m); 425 break; 426 default: 427 error = EINVAL; 428 break; 429 } 430 return (error); 431 #else 432 return (EOPNOTSUPP); 433 #endif 434 } 435 return (ip_ctloutput(op, so, level, optname, m)); 436 } 437 438 u_long rip_sendspace = RIPSNDQ; 439 u_long rip_recvspace = RIPRCVQ; 440 441 /*ARGSUSED*/ 442 int 443 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 444 struct mbuf *control, struct proc *p) 445 { 446 struct inpcb *inp; 447 int error = 0; 448 449 if (req == PRU_CONTROL) 450 return (in_control(so, (u_long)m, (caddr_t)nam, 451 (struct ifnet *)control)); 452 453 soassertlocked(so); 454 455 inp = sotoinpcb(so); 456 if (inp == NULL) { 457 error = EINVAL; 458 goto release; 459 } 460 461 switch (req) { 462 463 case PRU_DISCONNECT: 464 if ((so->so_state & SS_ISCONNECTED) == 0) { 465 error = ENOTCONN; 466 break; 467 } 468 soisdisconnected(so); 469 inp->inp_faddr.s_addr = INADDR_ANY; 470 break; 471 case PRU_ABORT: 472 soisdisconnected(so); 473 if (inp == NULL) 474 panic("rip_abort"); 475 #ifdef MROUTING 476 if (so == ip_mrouter[inp->inp_rtableid]) 477 ip_mrouter_done(so); 478 #endif 479 in_pcbdetach(inp); 480 break; 481 482 case PRU_BIND: 483 { 484 struct sockaddr_in *addr; 485 486 if ((error = in_nam2sin(nam, &addr))) 487 break; 488 if (!((so->so_options & SO_BINDANY) || 489 addr->sin_addr.s_addr == INADDR_ANY || 490 addr->sin_addr.s_addr == INADDR_BROADCAST || 491 in_broadcast(addr->sin_addr, inp->inp_rtableid) || 492 ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) { 493 error = EADDRNOTAVAIL; 494 break; 495 } 496 inp->inp_laddr = addr->sin_addr; 497 break; 498 } 499 case PRU_CONNECT: 500 { 501 struct sockaddr_in *addr; 502 503 if ((error = in_nam2sin(nam, &addr))) 504 break; 505 inp->inp_faddr = addr->sin_addr; 506 soisconnected(so); 507 break; 508 } 509 510 case PRU_CONNECT2: 511 error = EOPNOTSUPP; 512 break; 513 514 /* 515 * Mark the connection as being incapable of further input. 516 */ 517 case PRU_SHUTDOWN: 518 socantsendmore(so); 519 break; 520 521 /* 522 * Ship a packet out. The appropriate raw output 523 * routine handles any massaging necessary. 524 */ 525 case PRU_SEND: 526 { 527 struct sockaddr_in dst; 528 529 memset(&dst, 0, sizeof(dst)); 530 dst.sin_family = AF_INET; 531 dst.sin_len = sizeof(dst); 532 if (so->so_state & SS_ISCONNECTED) { 533 if (nam) { 534 error = EISCONN; 535 break; 536 } 537 dst.sin_addr = inp->inp_faddr; 538 } else { 539 struct sockaddr_in *addr; 540 541 if (nam == NULL) { 542 error = ENOTCONN; 543 break; 544 } 545 if ((error = in_nam2sin(nam, &addr))) 546 break; 547 dst.sin_addr = addr->sin_addr; 548 } 549 #ifdef IPSEC 550 /* XXX Find an IPsec TDB */ 551 #endif 552 error = rip_output(m, so, sintosa(&dst), NULL); 553 m = NULL; 554 break; 555 } 556 557 case PRU_SENSE: 558 /* 559 * stat: don't bother with a blocksize. 560 */ 561 break; 562 563 /* 564 * Not supported. 565 */ 566 case PRU_LISTEN: 567 case PRU_ACCEPT: 568 case PRU_SENDOOB: 569 case PRU_RCVD: 570 case PRU_RCVOOB: 571 error = EOPNOTSUPP; 572 break; 573 574 case PRU_SOCKADDR: 575 in_setsockaddr(inp, nam); 576 break; 577 578 case PRU_PEERADDR: 579 in_setpeeraddr(inp, nam); 580 break; 581 582 default: 583 panic("rip_usrreq"); 584 } 585 release: 586 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 587 m_freem(control); 588 m_freem(m); 589 } 590 return (error); 591 } 592 593 int 594 rip_attach(struct socket *so, int proto) 595 { 596 struct inpcb *inp; 597 int error; 598 599 if (so->so_pcb) 600 panic("rip_attach"); 601 if ((so->so_state & SS_PRIV) == 0) 602 return EACCES; 603 if (proto < 0 || proto >= IPPROTO_MAX) 604 return EPROTONOSUPPORT; 605 606 if ((error = soreserve(so, rip_sendspace, rip_recvspace))) 607 return error; 608 NET_ASSERT_LOCKED(); 609 if ((error = in_pcballoc(so, &rawcbtable))) 610 return error; 611 inp = sotoinpcb(so); 612 inp->inp_ip.ip_p = proto; 613 return 0; 614 } 615 616 int 617 rip_detach(struct socket *so) 618 { 619 struct inpcb *inp = sotoinpcb(so); 620 621 soassertlocked(so); 622 623 if (inp == NULL) 624 return (EINVAL); 625 626 #ifdef MROUTING 627 if (so == ip_mrouter[inp->inp_rtableid]) 628 ip_mrouter_done(so); 629 #endif 630 in_pcbdetach(inp); 631 632 return (0); 633 } 634