1 /* $OpenBSD: raw_ip.c,v 1.108 2017/12/04 13:40:34 bluhm Exp $ */ 2 /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/protosw.h> 76 #include <sys/socketvar.h> 77 78 #include <net/if.h> 79 #include <net/if_var.h> 80 #include <net/route.h> 81 82 #include <netinet/in.h> 83 #include <netinet/ip.h> 84 #include <netinet/ip_mroute.h> 85 #include <netinet/ip_var.h> 86 #include <netinet/in_pcb.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip_icmp.h> 89 90 #include <net/pfvar.h> 91 92 #include "pf.h" 93 94 struct inpcbtable rawcbtable; 95 96 /* 97 * Nominal space allocated to a raw ip socket. 98 */ 99 #define RIPSNDQ 8192 100 #define RIPRCVQ 8192 101 102 /* 103 * Raw interface to IP protocol. 104 */ 105 106 /* 107 * Initialize raw connection block q. 108 */ 109 void 110 rip_init(void) 111 { 112 113 in_pcbinit(&rawcbtable, 1); 114 } 115 116 struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; 117 118 int 119 rip_input(struct mbuf **mp, int *offp, int proto, int af) 120 { 121 struct mbuf *m = *mp; 122 struct ip *ip = mtod(m, struct ip *); 123 struct inpcb *inp, *last = NULL; 124 struct in_addr *key; 125 struct mbuf *opts = NULL; 126 struct counters_ref ref; 127 uint64_t *counters; 128 129 KASSERT(af == AF_INET); 130 131 ripsrc.sin_addr = ip->ip_src; 132 key = &ip->ip_dst; 133 #if NPF > 0 134 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 135 struct pf_divert *divert; 136 137 divert = pf_find_divert(m); 138 KASSERT(divert != NULL); 139 switch (divert->type) { 140 case PF_DIVERT_TO: 141 key = &divert->addr.v4; 142 break; 143 case PF_DIVERT_REPLY: 144 break; 145 default: 146 panic("%s: unknown divert type %d, mbuf %p, divert %p", 147 __func__, divert->type, m, divert); 148 } 149 } 150 #endif 151 NET_ASSERT_LOCKED(); 152 TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { 153 if (inp->inp_socket->so_state & SS_CANTRCVMORE) 154 continue; 155 #ifdef INET6 156 if (inp->inp_flags & INP_IPV6) 157 continue; 158 #endif 159 if (rtable_l2(inp->inp_rtableid) != 160 rtable_l2(m->m_pkthdr.ph_rtableid)) 161 continue; 162 163 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) 164 continue; 165 if (inp->inp_laddr.s_addr && 166 inp->inp_laddr.s_addr != key->s_addr) 167 continue; 168 if (inp->inp_faddr.s_addr && 169 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 170 continue; 171 if (last) { 172 struct mbuf *n; 173 174 if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { 175 if (last->inp_flags & INP_CONTROLOPTS || 176 last->inp_socket->so_options & SO_TIMESTAMP) 177 ip_savecontrol(last, &opts, ip, n); 178 if (sbappendaddr(last->inp_socket, 179 &last->inp_socket->so_rcv, 180 sintosa(&ripsrc), n, opts) == 0) { 181 /* should notify about lost packet */ 182 m_freem(n); 183 m_freem(opts); 184 } else 185 sorwakeup(last->inp_socket); 186 opts = NULL; 187 } 188 } 189 last = inp; 190 } 191 if (last) { 192 if (last->inp_flags & INP_CONTROLOPTS || 193 last->inp_socket->so_options & SO_TIMESTAMP) 194 ip_savecontrol(last, &opts, ip, m); 195 if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv, 196 sintosa(&ripsrc), m, opts) == 0) { 197 m_freem(m); 198 m_freem(opts); 199 } else 200 sorwakeup(last->inp_socket); 201 } else { 202 if (ip->ip_p != IPPROTO_ICMP) 203 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); 204 else 205 m_freem(m); 206 207 counters = counters_enter(&ref, ipcounters); 208 counters[ips_noproto]++; 209 counters[ips_delivered]--; 210 counters_leave(&ref, ipcounters); 211 } 212 return IPPROTO_DONE; 213 } 214 215 /* 216 * Generate IP header and pass packet to ip_output. 217 * Tack on options user may have setup with control call. 218 */ 219 int 220 rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 221 struct mbuf *control) 222 { 223 struct ip *ip; 224 struct inpcb *inp; 225 int flags, error; 226 227 inp = sotoinpcb(so); 228 flags = IP_ALLOWBROADCAST; 229 230 /* 231 * If the user handed us a complete IP packet, use it. 232 * Otherwise, allocate an mbuf for a header and fill it in. 233 */ 234 if ((inp->inp_flags & INP_HDRINCL) == 0) { 235 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 236 m_freem(m); 237 return (EMSGSIZE); 238 } 239 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 240 if (!m) 241 return (ENOBUFS); 242 ip = mtod(m, struct ip *); 243 ip->ip_tos = inp->inp_ip.ip_tos; 244 ip->ip_off = htons(0); 245 ip->ip_p = inp->inp_ip.ip_p; 246 ip->ip_len = htons(m->m_pkthdr.len); 247 ip->ip_src = inp->inp_laddr; 248 ip->ip_dst = satosin(dstaddr)->sin_addr; 249 ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL; 250 } else { 251 if (m->m_pkthdr.len > IP_MAXPACKET) { 252 m_freem(m); 253 return (EMSGSIZE); 254 } 255 if (m->m_pkthdr.len < sizeof(struct ip)) { 256 m_freem(m); 257 return (EINVAL); 258 } 259 ip = mtod(m, struct ip *); 260 /* 261 * don't allow both user specified and setsockopt options, 262 * and don't allow packet length sizes that will crash 263 */ 264 if ((ip->ip_hl != (sizeof (*ip) >> 2) && inp->inp_options) || 265 ntohs(ip->ip_len) > m->m_pkthdr.len || 266 ntohs(ip->ip_len) < ip->ip_hl << 2) { 267 m_freem(m); 268 return (EINVAL); 269 } 270 if (ip->ip_id == 0) { 271 ip->ip_id = htons(ip_randomid()); 272 } 273 /* XXX prevent ip_output from overwriting header fields */ 274 flags |= IP_RAWOUTPUT; 275 ipstat_inc(ips_rawout); 276 } 277 #ifdef INET6 278 /* 279 * A thought: Even though raw IP shouldn't be able to set IPv6 280 * multicast options, if it does, the last parameter to 281 * ip_output should be guarded against v6/v4 problems. 282 */ 283 #endif 284 /* force routing table */ 285 m->m_pkthdr.ph_rtableid = inp->inp_rtableid; 286 287 #if NPF > 0 288 if (inp->inp_socket->so_state & SS_ISCONNECTED && 289 ip->ip_p != IPPROTO_ICMP) 290 m->m_pkthdr.pf.inp = inp; 291 #endif 292 293 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 294 inp->inp_moptions, inp, 0); 295 if (error == EACCES) /* translate pf(4) error for userland */ 296 error = EHOSTUNREACH; 297 return (error); 298 } 299 300 /* 301 * Raw IP socket option processing. 302 */ 303 int 304 rip_ctloutput(int op, struct socket *so, int level, int optname, 305 struct mbuf *m) 306 { 307 struct inpcb *inp = sotoinpcb(so); 308 int error; 309 310 if (level != IPPROTO_IP) 311 return (EINVAL); 312 313 switch (optname) { 314 315 case IP_HDRINCL: 316 error = 0; 317 if (op == PRCO_SETOPT) { 318 if (m == NULL || m->m_len < sizeof (int)) 319 error = EINVAL; 320 else if (*mtod(m, int *)) 321 inp->inp_flags |= INP_HDRINCL; 322 else 323 inp->inp_flags &= ~INP_HDRINCL; 324 } else { 325 m->m_len = sizeof(int); 326 *mtod(m, int *) = inp->inp_flags & INP_HDRINCL; 327 } 328 return (error); 329 330 case MRT_INIT: 331 case MRT_DONE: 332 case MRT_ADD_VIF: 333 case MRT_DEL_VIF: 334 case MRT_ADD_MFC: 335 case MRT_DEL_MFC: 336 case MRT_VERSION: 337 case MRT_ASSERT: 338 case MRT_API_SUPPORT: 339 case MRT_API_CONFIG: 340 #ifdef MROUTING 341 switch (op) { 342 case PRCO_SETOPT: 343 error = ip_mrouter_set(so, optname, m); 344 break; 345 case PRCO_GETOPT: 346 error = ip_mrouter_get(so, optname, m); 347 break; 348 default: 349 error = EINVAL; 350 break; 351 } 352 return (error); 353 #else 354 return (EOPNOTSUPP); 355 #endif 356 } 357 return (ip_ctloutput(op, so, level, optname, m)); 358 } 359 360 u_long rip_sendspace = RIPSNDQ; 361 u_long rip_recvspace = RIPRCVQ; 362 363 /*ARGSUSED*/ 364 int 365 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 366 struct mbuf *control, struct proc *p) 367 { 368 struct inpcb *inp = sotoinpcb(so); 369 int error = 0; 370 371 soassertlocked(so); 372 373 if (req == PRU_CONTROL) 374 return (in_control(so, (u_long)m, (caddr_t)nam, 375 (struct ifnet *)control)); 376 377 if (inp == NULL) { 378 error = EINVAL; 379 goto release; 380 } 381 382 switch (req) { 383 384 case PRU_DISCONNECT: 385 if ((so->so_state & SS_ISCONNECTED) == 0) { 386 error = ENOTCONN; 387 break; 388 } 389 /* FALLTHROUGH */ 390 case PRU_ABORT: 391 soisdisconnected(so); 392 if (inp == NULL) 393 panic("rip_abort"); 394 #ifdef MROUTING 395 if (so == ip_mrouter[inp->inp_rtableid]) 396 ip_mrouter_done(so); 397 #endif 398 in_pcbdetach(inp); 399 break; 400 401 case PRU_BIND: 402 { 403 struct sockaddr_in *addr; 404 405 if ((error = in_nam2sin(nam, &addr))) 406 break; 407 if (!((so->so_options & SO_BINDANY) || 408 addr->sin_addr.s_addr == INADDR_ANY || 409 addr->sin_addr.s_addr == INADDR_BROADCAST || 410 in_broadcast(addr->sin_addr, inp->inp_rtableid) || 411 ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) { 412 error = EADDRNOTAVAIL; 413 break; 414 } 415 inp->inp_laddr = addr->sin_addr; 416 break; 417 } 418 case PRU_CONNECT: 419 { 420 struct sockaddr_in *addr; 421 422 if ((error = in_nam2sin(nam, &addr))) 423 break; 424 inp->inp_faddr = addr->sin_addr; 425 soisconnected(so); 426 break; 427 } 428 429 case PRU_CONNECT2: 430 error = EOPNOTSUPP; 431 break; 432 433 /* 434 * Mark the connection as being incapable of further input. 435 */ 436 case PRU_SHUTDOWN: 437 socantsendmore(so); 438 break; 439 440 /* 441 * Ship a packet out. The appropriate raw output 442 * routine handles any massaging necessary. 443 */ 444 case PRU_SEND: 445 { 446 struct sockaddr_in dst; 447 448 memset(&dst, 0, sizeof(dst)); 449 dst.sin_family = AF_INET; 450 dst.sin_len = sizeof(dst); 451 if (so->so_state & SS_ISCONNECTED) { 452 if (nam) { 453 error = EISCONN; 454 break; 455 } 456 dst.sin_addr = inp->inp_faddr; 457 } else { 458 struct sockaddr_in *addr; 459 460 if (nam == NULL) { 461 error = ENOTCONN; 462 break; 463 } 464 if ((error = in_nam2sin(nam, &addr))) 465 break; 466 dst.sin_addr = addr->sin_addr; 467 } 468 #ifdef IPSEC 469 /* XXX Find an IPsec TDB */ 470 #endif 471 error = rip_output(m, so, sintosa(&dst), NULL); 472 m = NULL; 473 break; 474 } 475 476 case PRU_SENSE: 477 /* 478 * stat: don't bother with a blocksize. 479 */ 480 return (0); 481 482 /* 483 * Not supported. 484 */ 485 case PRU_RCVOOB: 486 case PRU_RCVD: 487 case PRU_LISTEN: 488 case PRU_ACCEPT: 489 case PRU_SENDOOB: 490 error = EOPNOTSUPP; 491 break; 492 493 case PRU_SOCKADDR: 494 in_setsockaddr(inp, nam); 495 break; 496 497 case PRU_PEERADDR: 498 in_setpeeraddr(inp, nam); 499 break; 500 501 default: 502 panic("rip_usrreq"); 503 } 504 release: 505 m_freem(m); 506 return (error); 507 } 508 509 int 510 rip_attach(struct socket *so, int proto) 511 { 512 struct inpcb *inp; 513 int error; 514 515 if (so->so_pcb) 516 panic("rip_attach"); 517 if ((so->so_state & SS_PRIV) == 0) 518 return EACCES; 519 if (proto < 0 || proto >= IPPROTO_MAX) 520 return EPROTONOSUPPORT; 521 522 if ((error = soreserve(so, rip_sendspace, rip_recvspace))) 523 return error; 524 NET_ASSERT_LOCKED(); 525 if ((error = in_pcballoc(so, &rawcbtable))) 526 return error; 527 inp = sotoinpcb(so); 528 inp->inp_ip.ip_p = proto; 529 return 0; 530 } 531 532 int 533 rip_detach(struct socket *so) 534 { 535 struct inpcb *inp = sotoinpcb(so); 536 537 soassertlocked(so); 538 539 if (inp == NULL) 540 return (EINVAL); 541 542 #ifdef MROUTING 543 if (so == ip_mrouter[inp->inp_rtableid]) 544 ip_mrouter_done(so); 545 #endif 546 in_pcbdetach(inp); 547 548 return (0); 549 } 550