1 /* $OpenBSD: raw_ip.c,v 1.119 2019/02/04 21:40:52 bluhm Exp $ */ 2 /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/protosw.h> 76 #include <sys/socketvar.h> 77 78 #include <net/if.h> 79 #include <net/if_var.h> 80 #include <net/route.h> 81 82 #include <netinet/in.h> 83 #include <netinet/ip.h> 84 #include <netinet/ip_mroute.h> 85 #include <netinet/ip_var.h> 86 #include <netinet/in_pcb.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip_icmp.h> 89 90 #include <net/pfvar.h> 91 92 #include "pf.h" 93 94 struct inpcbtable rawcbtable; 95 96 /* 97 * Nominal space allocated to a raw ip socket. 98 */ 99 #define RIPSNDQ 8192 100 #define RIPRCVQ 8192 101 102 /* 103 * Raw interface to IP protocol. 104 */ 105 106 /* 107 * Initialize raw connection block q. 108 */ 109 void 110 rip_init(void) 111 { 112 113 in_pcbinit(&rawcbtable, 1); 114 } 115 116 struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; 117 118 struct mbuf *rip_chkhdr(struct mbuf *, struct mbuf *); 119 120 int 121 rip_input(struct mbuf **mp, int *offp, int proto, int af) 122 { 123 struct mbuf *m = *mp; 124 struct ip *ip = mtod(m, struct ip *); 125 struct inpcb *inp, *last = NULL; 126 struct in_addr *key; 127 struct mbuf *opts = NULL; 128 struct counters_ref ref; 129 uint64_t *counters; 130 131 KASSERT(af == AF_INET); 132 133 ripsrc.sin_addr = ip->ip_src; 134 key = &ip->ip_dst; 135 #if NPF > 0 136 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 137 struct pf_divert *divert; 138 139 divert = pf_find_divert(m); 140 KASSERT(divert != NULL); 141 switch (divert->type) { 142 case PF_DIVERT_TO: 143 key = &divert->addr.v4; 144 break; 145 case PF_DIVERT_REPLY: 146 break; 147 default: 148 panic("%s: unknown divert type %d, mbuf %p, divert %p", 149 __func__, divert->type, m, divert); 150 } 151 } 152 #endif 153 NET_ASSERT_LOCKED(); 154 TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { 155 if (inp->inp_socket->so_state & SS_CANTRCVMORE) 156 continue; 157 #ifdef INET6 158 if (inp->inp_flags & INP_IPV6) 159 continue; 160 #endif 161 if (rtable_l2(inp->inp_rtableid) != 162 rtable_l2(m->m_pkthdr.ph_rtableid)) 163 continue; 164 165 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) 166 continue; 167 if (inp->inp_laddr.s_addr && 168 inp->inp_laddr.s_addr != key->s_addr) 169 continue; 170 if (inp->inp_faddr.s_addr && 171 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 172 continue; 173 if (last) { 174 struct mbuf *n; 175 176 if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { 177 if (last->inp_flags & INP_CONTROLOPTS || 178 last->inp_socket->so_options & SO_TIMESTAMP) 179 ip_savecontrol(last, &opts, ip, n); 180 if (sbappendaddr(last->inp_socket, 181 &last->inp_socket->so_rcv, 182 sintosa(&ripsrc), n, opts) == 0) { 183 /* should notify about lost packet */ 184 m_freem(n); 185 m_freem(opts); 186 } else 187 sorwakeup(last->inp_socket); 188 opts = NULL; 189 } 190 } 191 last = inp; 192 } 193 if (last) { 194 if (last->inp_flags & INP_CONTROLOPTS || 195 last->inp_socket->so_options & SO_TIMESTAMP) 196 ip_savecontrol(last, &opts, ip, m); 197 if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv, 198 sintosa(&ripsrc), m, opts) == 0) { 199 m_freem(m); 200 m_freem(opts); 201 } else 202 sorwakeup(last->inp_socket); 203 } else { 204 if (ip->ip_p != IPPROTO_ICMP) 205 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); 206 else 207 m_freem(m); 208 209 counters = counters_enter(&ref, ipcounters); 210 counters[ips_noproto]++; 211 counters[ips_delivered]--; 212 counters_leave(&ref, ipcounters); 213 } 214 return IPPROTO_DONE; 215 } 216 217 /* 218 * Generate IP header and pass packet to ip_output. 219 * Tack on options user may have setup with control call. 220 */ 221 int 222 rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 223 struct mbuf *control) 224 { 225 struct ip *ip; 226 struct inpcb *inp; 227 int flags, error; 228 229 inp = sotoinpcb(so); 230 flags = IP_ALLOWBROADCAST; 231 232 /* 233 * If the user handed us a complete IP packet, use it. 234 * Otherwise, allocate an mbuf for a header and fill it in. 235 */ 236 if ((inp->inp_flags & INP_HDRINCL) == 0) { 237 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 238 m_freem(m); 239 return (EMSGSIZE); 240 } 241 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 242 if (!m) 243 return (ENOBUFS); 244 ip = mtod(m, struct ip *); 245 ip->ip_tos = inp->inp_ip.ip_tos; 246 ip->ip_off = htons(0); 247 ip->ip_p = inp->inp_ip.ip_p; 248 ip->ip_len = htons(m->m_pkthdr.len); 249 ip->ip_src = inp->inp_laddr; 250 ip->ip_dst = satosin(dstaddr)->sin_addr; 251 ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL; 252 } else { 253 if (m->m_pkthdr.len > IP_MAXPACKET) { 254 m_freem(m); 255 return (EMSGSIZE); 256 } 257 258 m = rip_chkhdr(m, inp->inp_options); 259 if (m == NULL) 260 return (EINVAL); 261 262 ip = mtod(m, struct ip *); 263 if (ip->ip_id == 0) 264 ip->ip_id = htons(ip_randomid()); 265 266 /* XXX prevent ip_output from overwriting header fields */ 267 flags |= IP_RAWOUTPUT; 268 ipstat_inc(ips_rawout); 269 } 270 #ifdef INET6 271 /* 272 * A thought: Even though raw IP shouldn't be able to set IPv6 273 * multicast options, if it does, the last parameter to 274 * ip_output should be guarded against v6/v4 problems. 275 */ 276 #endif 277 /* force routing table */ 278 m->m_pkthdr.ph_rtableid = inp->inp_rtableid; 279 280 #if NPF > 0 281 if (inp->inp_socket->so_state & SS_ISCONNECTED && 282 ip->ip_p != IPPROTO_ICMP) 283 pf_mbuf_link_inpcb(m, inp); 284 #endif 285 286 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 287 inp->inp_moptions, inp, 0); 288 return (error); 289 } 290 291 struct mbuf * 292 rip_chkhdr(struct mbuf *m, struct mbuf *options) 293 { 294 struct ip *ip; 295 int hlen, opt, optlen, cnt; 296 u_char *cp; 297 298 if (m->m_pkthdr.len < sizeof(struct ip)) { 299 m_freem(m); 300 return NULL; 301 } 302 303 m = m_pullup(m, sizeof (struct ip)); 304 if (m == NULL) 305 return NULL; 306 307 ip = mtod(m, struct ip *); 308 hlen = ip->ip_hl << 2; 309 310 /* Don't allow packet length sizes that will crash. */ 311 if (hlen < sizeof (struct ip) || 312 ntohs(ip->ip_len) < hlen || 313 ntohs(ip->ip_len) != m->m_pkthdr.len) { 314 m_freem(m); 315 return NULL; 316 } 317 m = m_pullup(m, hlen); 318 if (m == NULL) 319 return NULL; 320 321 ip = mtod(m, struct ip *); 322 323 if (ip->ip_v != IPVERSION) { 324 m_freem(m); 325 return NULL; 326 } 327 328 /* 329 * Don't allow both user specified and setsockopt options. 330 * If options are present verify them. 331 */ 332 if (hlen != sizeof(struct ip)) { 333 if (options) { 334 m_freem(m); 335 return NULL; 336 } else { 337 cp = (u_char *)(ip + 1); 338 cnt = hlen - sizeof(struct ip); 339 for (; cnt > 0; cnt -= optlen, cp += optlen) { 340 opt = cp[IPOPT_OPTVAL]; 341 if (opt == IPOPT_EOL) 342 break; 343 if (opt == IPOPT_NOP) 344 optlen = 1; 345 else { 346 if (cnt < IPOPT_OLEN + sizeof(*cp)) { 347 m_freem(m); 348 return NULL; 349 } 350 optlen = cp[IPOPT_OLEN]; 351 if (optlen < IPOPT_OLEN + sizeof(*cp) || 352 optlen > cnt) { 353 m_freem(m); 354 return NULL; 355 } 356 } 357 } 358 } 359 } 360 361 return m; 362 } 363 364 /* 365 * Raw IP socket option processing. 366 */ 367 int 368 rip_ctloutput(int op, struct socket *so, int level, int optname, 369 struct mbuf *m) 370 { 371 struct inpcb *inp = sotoinpcb(so); 372 int error; 373 374 if (level != IPPROTO_IP) 375 return (EINVAL); 376 377 switch (optname) { 378 379 case IP_HDRINCL: 380 error = 0; 381 if (op == PRCO_SETOPT) { 382 if (m == NULL || m->m_len < sizeof (int)) 383 error = EINVAL; 384 else if (*mtod(m, int *)) 385 inp->inp_flags |= INP_HDRINCL; 386 else 387 inp->inp_flags &= ~INP_HDRINCL; 388 } else { 389 m->m_len = sizeof(int); 390 *mtod(m, int *) = inp->inp_flags & INP_HDRINCL; 391 } 392 return (error); 393 394 case MRT_INIT: 395 case MRT_DONE: 396 case MRT_ADD_VIF: 397 case MRT_DEL_VIF: 398 case MRT_ADD_MFC: 399 case MRT_DEL_MFC: 400 case MRT_VERSION: 401 case MRT_ASSERT: 402 case MRT_API_SUPPORT: 403 case MRT_API_CONFIG: 404 #ifdef MROUTING 405 switch (op) { 406 case PRCO_SETOPT: 407 error = ip_mrouter_set(so, optname, m); 408 break; 409 case PRCO_GETOPT: 410 error = ip_mrouter_get(so, optname, m); 411 break; 412 default: 413 error = EINVAL; 414 break; 415 } 416 return (error); 417 #else 418 return (EOPNOTSUPP); 419 #endif 420 } 421 return (ip_ctloutput(op, so, level, optname, m)); 422 } 423 424 u_long rip_sendspace = RIPSNDQ; 425 u_long rip_recvspace = RIPRCVQ; 426 427 /*ARGSUSED*/ 428 int 429 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 430 struct mbuf *control, struct proc *p) 431 { 432 struct inpcb *inp; 433 int error = 0; 434 435 if (req == PRU_CONTROL) 436 return (in_control(so, (u_long)m, (caddr_t)nam, 437 (struct ifnet *)control)); 438 439 soassertlocked(so); 440 441 inp = sotoinpcb(so); 442 if (inp == NULL) { 443 error = EINVAL; 444 goto release; 445 } 446 447 switch (req) { 448 449 case PRU_DISCONNECT: 450 if ((so->so_state & SS_ISCONNECTED) == 0) { 451 error = ENOTCONN; 452 break; 453 } 454 soisdisconnected(so); 455 inp->inp_faddr.s_addr = INADDR_ANY; 456 break; 457 case PRU_ABORT: 458 soisdisconnected(so); 459 if (inp == NULL) 460 panic("rip_abort"); 461 #ifdef MROUTING 462 if (so == ip_mrouter[inp->inp_rtableid]) 463 ip_mrouter_done(so); 464 #endif 465 in_pcbdetach(inp); 466 break; 467 468 case PRU_BIND: 469 { 470 struct sockaddr_in *addr; 471 472 if ((error = in_nam2sin(nam, &addr))) 473 break; 474 if (!((so->so_options & SO_BINDANY) || 475 addr->sin_addr.s_addr == INADDR_ANY || 476 addr->sin_addr.s_addr == INADDR_BROADCAST || 477 in_broadcast(addr->sin_addr, inp->inp_rtableid) || 478 ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) { 479 error = EADDRNOTAVAIL; 480 break; 481 } 482 inp->inp_laddr = addr->sin_addr; 483 break; 484 } 485 case PRU_CONNECT: 486 { 487 struct sockaddr_in *addr; 488 489 if ((error = in_nam2sin(nam, &addr))) 490 break; 491 inp->inp_faddr = addr->sin_addr; 492 soisconnected(so); 493 break; 494 } 495 496 case PRU_CONNECT2: 497 error = EOPNOTSUPP; 498 break; 499 500 /* 501 * Mark the connection as being incapable of further input. 502 */ 503 case PRU_SHUTDOWN: 504 socantsendmore(so); 505 break; 506 507 /* 508 * Ship a packet out. The appropriate raw output 509 * routine handles any massaging necessary. 510 */ 511 case PRU_SEND: 512 { 513 struct sockaddr_in dst; 514 515 memset(&dst, 0, sizeof(dst)); 516 dst.sin_family = AF_INET; 517 dst.sin_len = sizeof(dst); 518 if (so->so_state & SS_ISCONNECTED) { 519 if (nam) { 520 error = EISCONN; 521 break; 522 } 523 dst.sin_addr = inp->inp_faddr; 524 } else { 525 struct sockaddr_in *addr; 526 527 if (nam == NULL) { 528 error = ENOTCONN; 529 break; 530 } 531 if ((error = in_nam2sin(nam, &addr))) 532 break; 533 dst.sin_addr = addr->sin_addr; 534 } 535 #ifdef IPSEC 536 /* XXX Find an IPsec TDB */ 537 #endif 538 error = rip_output(m, so, sintosa(&dst), NULL); 539 m = NULL; 540 break; 541 } 542 543 case PRU_SENSE: 544 /* 545 * stat: don't bother with a blocksize. 546 */ 547 break; 548 549 /* 550 * Not supported. 551 */ 552 case PRU_LISTEN: 553 case PRU_ACCEPT: 554 case PRU_SENDOOB: 555 case PRU_RCVD: 556 case PRU_RCVOOB: 557 error = EOPNOTSUPP; 558 break; 559 560 case PRU_SOCKADDR: 561 in_setsockaddr(inp, nam); 562 break; 563 564 case PRU_PEERADDR: 565 in_setpeeraddr(inp, nam); 566 break; 567 568 default: 569 panic("rip_usrreq"); 570 } 571 release: 572 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 573 m_freem(control); 574 m_freem(m); 575 } 576 return (error); 577 } 578 579 int 580 rip_attach(struct socket *so, int proto) 581 { 582 struct inpcb *inp; 583 int error; 584 585 if (so->so_pcb) 586 panic("rip_attach"); 587 if ((so->so_state & SS_PRIV) == 0) 588 return EACCES; 589 if (proto < 0 || proto >= IPPROTO_MAX) 590 return EPROTONOSUPPORT; 591 592 if ((error = soreserve(so, rip_sendspace, rip_recvspace))) 593 return error; 594 NET_ASSERT_LOCKED(); 595 if ((error = in_pcballoc(so, &rawcbtable))) 596 return error; 597 inp = sotoinpcb(so); 598 inp->inp_ip.ip_p = proto; 599 return 0; 600 } 601 602 int 603 rip_detach(struct socket *so) 604 { 605 struct inpcb *inp = sotoinpcb(so); 606 607 soassertlocked(so); 608 609 if (inp == NULL) 610 return (EINVAL); 611 612 #ifdef MROUTING 613 if (so == ip_mrouter[inp->inp_rtableid]) 614 ip_mrouter_done(so); 615 #endif 616 in_pcbdetach(inp); 617 618 return (0); 619 } 620