1 /* $OpenBSD: raw_ip.c,v 1.150 2022/10/17 14:49:02 mvs Exp $ */ 2 /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/protosw.h> 76 #include <sys/socketvar.h> 77 78 #include <net/if.h> 79 #include <net/if_var.h> 80 #include <net/route.h> 81 82 #include <netinet/in.h> 83 #include <netinet/ip.h> 84 #include <netinet/ip_mroute.h> 85 #include <netinet/ip_var.h> 86 #include <netinet/in_pcb.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip_icmp.h> 89 90 #include <net/pfvar.h> 91 92 #include "pf.h" 93 94 struct inpcbtable rawcbtable; 95 96 /* 97 * Nominal space allocated to a raw ip socket. 98 */ 99 #define RIPSNDQ 8192 100 #define RIPRCVQ 8192 101 102 /* 103 * Raw interface to IP protocol. 104 */ 105 106 const struct pr_usrreqs rip_usrreqs = { 107 .pru_attach = rip_attach, 108 .pru_detach = rip_detach, 109 .pru_lock = rip_lock, 110 .pru_unlock = rip_unlock, 111 .pru_bind = rip_bind, 112 .pru_connect = rip_connect, 113 .pru_disconnect = rip_disconnect, 114 .pru_shutdown = rip_shutdown, 115 .pru_send = rip_send, 116 .pru_control = in_control, 117 .pru_sockaddr = in_sockaddr, 118 .pru_peeraddr = in_peeraddr, 119 }; 120 121 /* 122 * Initialize raw connection block q. 123 */ 124 void 125 rip_init(void) 126 { 127 in_pcbinit(&rawcbtable, 1); 128 } 129 130 struct mbuf *rip_chkhdr(struct mbuf *, struct mbuf *); 131 132 int 133 rip_input(struct mbuf **mp, int *offp, int proto, int af) 134 { 135 struct mbuf *m = *mp; 136 struct ip *ip = mtod(m, struct ip *); 137 struct inpcb *inp; 138 SIMPLEQ_HEAD(, inpcb) inpcblist; 139 struct in_addr *key; 140 struct counters_ref ref; 141 uint64_t *counters; 142 struct sockaddr_in ripsrc; 143 144 KASSERT(af == AF_INET); 145 146 memset(&ripsrc, 0, sizeof(ripsrc)); 147 ripsrc.sin_family = AF_INET; 148 ripsrc.sin_len = sizeof(ripsrc); 149 ripsrc.sin_addr = ip->ip_src; 150 151 key = &ip->ip_dst; 152 #if NPF > 0 153 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 154 struct pf_divert *divert; 155 156 divert = pf_find_divert(m); 157 KASSERT(divert != NULL); 158 switch (divert->type) { 159 case PF_DIVERT_TO: 160 key = &divert->addr.v4; 161 break; 162 case PF_DIVERT_REPLY: 163 break; 164 default: 165 panic("%s: unknown divert type %d, mbuf %p, divert %p", 166 __func__, divert->type, m, divert); 167 } 168 } 169 #endif 170 SIMPLEQ_INIT(&inpcblist); 171 rw_enter_write(&rawcbtable.inpt_notify); 172 mtx_enter(&rawcbtable.inpt_mtx); 173 TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { 174 if (inp->inp_socket->so_state & SS_CANTRCVMORE) 175 continue; 176 #ifdef INET6 177 if (inp->inp_flags & INP_IPV6) 178 continue; 179 #endif 180 if (rtable_l2(inp->inp_rtableid) != 181 rtable_l2(m->m_pkthdr.ph_rtableid)) 182 continue; 183 184 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) 185 continue; 186 if (inp->inp_laddr.s_addr && 187 inp->inp_laddr.s_addr != key->s_addr) 188 continue; 189 if (inp->inp_faddr.s_addr && 190 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 191 continue; 192 193 in_pcbref(inp); 194 SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify); 195 } 196 mtx_leave(&rawcbtable.inpt_mtx); 197 198 if (SIMPLEQ_EMPTY(&inpcblist)) { 199 rw_exit_write(&rawcbtable.inpt_notify); 200 201 if (ip->ip_p != IPPROTO_ICMP) 202 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 203 0, 0); 204 else 205 m_freem(m); 206 207 counters = counters_enter(&ref, ipcounters); 208 counters[ips_noproto]++; 209 counters[ips_delivered]--; 210 counters_leave(&ref, ipcounters); 211 212 return IPPROTO_DONE; 213 } 214 215 while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) { 216 struct mbuf *n, *opts = NULL; 217 218 SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify); 219 if (SIMPLEQ_EMPTY(&inpcblist)) 220 n = m; 221 else 222 n = m_copym(m, 0, M_COPYALL, M_NOWAIT); 223 if (n != NULL) { 224 int ret; 225 226 if (inp->inp_flags & INP_CONTROLOPTS || 227 inp->inp_socket->so_options & SO_TIMESTAMP) 228 ip_savecontrol(inp, &opts, ip, n); 229 230 mtx_enter(&inp->inp_mtx); 231 ret = sbappendaddr(inp->inp_socket, 232 &inp->inp_socket->so_rcv, 233 sintosa(&ripsrc), n, opts); 234 mtx_leave(&inp->inp_mtx); 235 236 if (ret == 0) { 237 /* should notify about lost packet */ 238 m_freem(n); 239 m_freem(opts); 240 } else 241 sorwakeup(inp->inp_socket); 242 } 243 in_pcbunref(inp); 244 } 245 rw_exit_write(&rawcbtable.inpt_notify); 246 247 return IPPROTO_DONE; 248 } 249 250 /* 251 * Generate IP header and pass packet to ip_output. 252 * Tack on options user may have setup with control call. 253 */ 254 int 255 rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 256 struct mbuf *control) 257 { 258 struct sockaddr_in *dst = satosin(dstaddr); 259 struct ip *ip; 260 struct inpcb *inp; 261 int flags, error; 262 263 inp = sotoinpcb(so); 264 flags = IP_ALLOWBROADCAST; 265 266 /* 267 * If the user handed us a complete IP packet, use it. 268 * Otherwise, allocate an mbuf for a header and fill it in. 269 */ 270 if ((inp->inp_flags & INP_HDRINCL) == 0) { 271 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 272 m_freem(m); 273 return (EMSGSIZE); 274 } 275 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 276 if (!m) 277 return (ENOBUFS); 278 ip = mtod(m, struct ip *); 279 ip->ip_tos = inp->inp_ip.ip_tos; 280 ip->ip_off = htons(0); 281 ip->ip_p = inp->inp_ip.ip_p; 282 ip->ip_len = htons(m->m_pkthdr.len); 283 ip->ip_src.s_addr = INADDR_ANY; 284 ip->ip_dst = dst->sin_addr; 285 ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL; 286 } else { 287 if (m->m_pkthdr.len > IP_MAXPACKET) { 288 m_freem(m); 289 return (EMSGSIZE); 290 } 291 292 m = rip_chkhdr(m, inp->inp_options); 293 if (m == NULL) 294 return (EINVAL); 295 296 ip = mtod(m, struct ip *); 297 if (ip->ip_id == 0) 298 ip->ip_id = htons(ip_randomid()); 299 dst->sin_addr = ip->ip_dst; 300 301 /* XXX prevent ip_output from overwriting header fields */ 302 flags |= IP_RAWOUTPUT; 303 ipstat_inc(ips_rawout); 304 } 305 306 if (ip->ip_src.s_addr == INADDR_ANY) { 307 error = in_pcbselsrc(&ip->ip_src, dst, inp); 308 if (error != 0) 309 return (error); 310 } 311 312 #ifdef INET6 313 /* 314 * A thought: Even though raw IP shouldn't be able to set IPv6 315 * multicast options, if it does, the last parameter to 316 * ip_output should be guarded against v6/v4 problems. 317 */ 318 #endif 319 /* force routing table */ 320 m->m_pkthdr.ph_rtableid = inp->inp_rtableid; 321 322 #if NPF > 0 323 if (inp->inp_socket->so_state & SS_ISCONNECTED && 324 ip->ip_p != IPPROTO_ICMP) 325 pf_mbuf_link_inpcb(m, inp); 326 #endif 327 328 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 329 inp->inp_moptions, inp, 0); 330 return (error); 331 } 332 333 struct mbuf * 334 rip_chkhdr(struct mbuf *m, struct mbuf *options) 335 { 336 struct ip *ip; 337 int hlen, opt, optlen, cnt; 338 u_char *cp; 339 340 if (m->m_pkthdr.len < sizeof(struct ip)) { 341 m_freem(m); 342 return NULL; 343 } 344 345 m = m_pullup(m, sizeof (struct ip)); 346 if (m == NULL) 347 return NULL; 348 349 ip = mtod(m, struct ip *); 350 hlen = ip->ip_hl << 2; 351 352 /* Don't allow packet length sizes that will crash. */ 353 if (hlen < sizeof (struct ip) || 354 ntohs(ip->ip_len) < hlen || 355 ntohs(ip->ip_len) != m->m_pkthdr.len) { 356 m_freem(m); 357 return NULL; 358 } 359 m = m_pullup(m, hlen); 360 if (m == NULL) 361 return NULL; 362 363 ip = mtod(m, struct ip *); 364 365 if (ip->ip_v != IPVERSION) { 366 m_freem(m); 367 return NULL; 368 } 369 370 /* 371 * Don't allow both user specified and setsockopt options. 372 * If options are present verify them. 373 */ 374 if (hlen != sizeof(struct ip)) { 375 if (options) { 376 m_freem(m); 377 return NULL; 378 } else { 379 cp = (u_char *)(ip + 1); 380 cnt = hlen - sizeof(struct ip); 381 for (; cnt > 0; cnt -= optlen, cp += optlen) { 382 opt = cp[IPOPT_OPTVAL]; 383 if (opt == IPOPT_EOL) 384 break; 385 if (opt == IPOPT_NOP) 386 optlen = 1; 387 else { 388 if (cnt < IPOPT_OLEN + sizeof(*cp)) { 389 m_freem(m); 390 return NULL; 391 } 392 optlen = cp[IPOPT_OLEN]; 393 if (optlen < IPOPT_OLEN + sizeof(*cp) || 394 optlen > cnt) { 395 m_freem(m); 396 return NULL; 397 } 398 } 399 } 400 } 401 } 402 403 return m; 404 } 405 406 /* 407 * Raw IP socket option processing. 408 */ 409 int 410 rip_ctloutput(int op, struct socket *so, int level, int optname, 411 struct mbuf *m) 412 { 413 struct inpcb *inp = sotoinpcb(so); 414 int error; 415 416 if (level != IPPROTO_IP) 417 return (EINVAL); 418 419 switch (optname) { 420 421 case IP_HDRINCL: 422 error = 0; 423 if (op == PRCO_SETOPT) { 424 if (m == NULL || m->m_len < sizeof (int)) 425 error = EINVAL; 426 else if (*mtod(m, int *)) 427 inp->inp_flags |= INP_HDRINCL; 428 else 429 inp->inp_flags &= ~INP_HDRINCL; 430 } else { 431 m->m_len = sizeof(int); 432 *mtod(m, int *) = inp->inp_flags & INP_HDRINCL; 433 } 434 return (error); 435 436 case MRT_INIT: 437 case MRT_DONE: 438 case MRT_ADD_VIF: 439 case MRT_DEL_VIF: 440 case MRT_ADD_MFC: 441 case MRT_DEL_MFC: 442 case MRT_VERSION: 443 case MRT_ASSERT: 444 case MRT_API_SUPPORT: 445 case MRT_API_CONFIG: 446 #ifdef MROUTING 447 switch (op) { 448 case PRCO_SETOPT: 449 error = ip_mrouter_set(so, optname, m); 450 break; 451 case PRCO_GETOPT: 452 error = ip_mrouter_get(so, optname, m); 453 break; 454 default: 455 error = EINVAL; 456 break; 457 } 458 return (error); 459 #else 460 return (EOPNOTSUPP); 461 #endif 462 } 463 return (ip_ctloutput(op, so, level, optname, m)); 464 } 465 466 u_long rip_sendspace = RIPSNDQ; 467 u_long rip_recvspace = RIPRCVQ; 468 469 int 470 rip_attach(struct socket *so, int proto, int wait) 471 { 472 struct inpcb *inp; 473 int error; 474 475 if (so->so_pcb) 476 panic("rip_attach"); 477 if ((so->so_state & SS_PRIV) == 0) 478 return EACCES; 479 if (proto < 0 || proto >= IPPROTO_MAX) 480 return EPROTONOSUPPORT; 481 482 if ((error = soreserve(so, rip_sendspace, rip_recvspace))) 483 return error; 484 NET_ASSERT_LOCKED(); 485 if ((error = in_pcballoc(so, &rawcbtable, wait))) 486 return error; 487 inp = sotoinpcb(so); 488 inp->inp_ip.ip_p = proto; 489 return 0; 490 } 491 492 int 493 rip_detach(struct socket *so) 494 { 495 struct inpcb *inp = sotoinpcb(so); 496 497 soassertlocked(so); 498 499 if (inp == NULL) 500 return (EINVAL); 501 502 #ifdef MROUTING 503 if (so == ip_mrouter[inp->inp_rtableid]) 504 ip_mrouter_done(so); 505 #endif 506 in_pcbdetach(inp); 507 508 return (0); 509 } 510 511 void 512 rip_lock(struct socket *so) 513 { 514 struct inpcb *inp = sotoinpcb(so); 515 516 NET_ASSERT_LOCKED(); 517 mtx_enter(&inp->inp_mtx); 518 } 519 520 void 521 rip_unlock(struct socket *so) 522 { 523 struct inpcb *inp = sotoinpcb(so); 524 525 NET_ASSERT_LOCKED(); 526 mtx_leave(&inp->inp_mtx); 527 } 528 529 int 530 rip_bind(struct socket *so, struct mbuf *nam, struct proc *p) 531 { 532 struct inpcb *inp = sotoinpcb(so); 533 struct sockaddr_in *addr; 534 int error; 535 536 soassertlocked(so); 537 538 if ((error = in_nam2sin(nam, &addr))) 539 return (error); 540 541 if (!((so->so_options & SO_BINDANY) || 542 addr->sin_addr.s_addr == INADDR_ANY || 543 addr->sin_addr.s_addr == INADDR_BROADCAST || 544 in_broadcast(addr->sin_addr, inp->inp_rtableid) || 545 ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) 546 return (EADDRNOTAVAIL); 547 548 inp->inp_laddr = addr->sin_addr; 549 550 return (0); 551 } 552 553 int 554 rip_connect(struct socket *so, struct mbuf *nam) 555 { 556 struct inpcb *inp = sotoinpcb(so); 557 struct sockaddr_in *addr; 558 int error; 559 560 soassertlocked(so); 561 562 if ((error = in_nam2sin(nam, &addr))) 563 return (error); 564 565 inp->inp_faddr = addr->sin_addr; 566 soisconnected(so); 567 568 return (0); 569 } 570 571 int 572 rip_disconnect(struct socket *so) 573 { 574 struct inpcb *inp = sotoinpcb(so); 575 576 soassertlocked(so); 577 578 if ((so->so_state & SS_ISCONNECTED) == 0) 579 return (ENOTCONN); 580 581 soisdisconnected(so); 582 inp->inp_faddr.s_addr = INADDR_ANY; 583 584 return (0); 585 } 586 587 int 588 rip_shutdown(struct socket *so) 589 { 590 /* 591 * Mark the connection as being incapable of further input. 592 */ 593 594 soassertlocked(so); 595 socantsendmore(so); 596 597 return (0); 598 } 599 600 int 601 rip_send(struct socket *so, struct mbuf *m, struct mbuf *nam, 602 struct mbuf *control) 603 { 604 struct inpcb *inp = sotoinpcb(so); 605 struct sockaddr_in dst; 606 int error; 607 608 soassertlocked(so); 609 610 /* 611 * Ship a packet out. The appropriate raw output 612 * routine handles any massaging necessary. 613 */ 614 memset(&dst, 0, sizeof(dst)); 615 dst.sin_family = AF_INET; 616 dst.sin_len = sizeof(dst); 617 if (so->so_state & SS_ISCONNECTED) { 618 if (nam) { 619 error = EISCONN; 620 goto out; 621 } 622 dst.sin_addr = inp->inp_faddr; 623 } else { 624 struct sockaddr_in *addr; 625 626 if (nam == NULL) { 627 error = ENOTCONN; 628 goto out; 629 } 630 if ((error = in_nam2sin(nam, &addr))) 631 goto out; 632 dst.sin_addr = addr->sin_addr; 633 } 634 #ifdef IPSEC 635 /* XXX Find an IPsec TDB */ 636 #endif 637 error = rip_output(m, so, sintosa(&dst), NULL); 638 m = NULL; 639 640 out: 641 m_freem(control); 642 m_freem(m); 643 644 return (error); 645 } 646