1 /* $OpenBSD: raw_ip.c,v 1.86 2016/03/07 18:44:00 naddy Exp $ */ 2 /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/protosw.h> 76 #include <sys/socketvar.h> 77 78 #include <net/if.h> 79 #include <net/if_var.h> 80 #include <net/route.h> 81 82 #include <netinet/in.h> 83 #include <netinet/ip.h> 84 #include <netinet/ip_mroute.h> 85 #include <netinet/ip_var.h> 86 #include <netinet/in_pcb.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip_icmp.h> 89 90 #include <net/pfvar.h> 91 92 #include "pf.h" 93 94 struct inpcbtable rawcbtable; 95 96 /* 97 * Nominal space allocated to a raw ip socket. 98 */ 99 #define RIPSNDQ 8192 100 #define RIPRCVQ 8192 101 102 /* 103 * Raw interface to IP protocol. 104 */ 105 106 /* 107 * Initialize raw connection block q. 108 */ 109 void 110 rip_init(void) 111 { 112 113 in_pcbinit(&rawcbtable, 1); 114 } 115 116 struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; 117 118 /* 119 * Setup generic address and protocol structures 120 * for raw_input routine, then pass them along with 121 * mbuf chain. 122 */ 123 void 124 rip_input(struct mbuf *m, ...) 125 { 126 struct ip *ip = mtod(m, struct ip *); 127 struct inpcb *inp, *last = NULL; 128 struct mbuf *opts = NULL; 129 130 ripsrc.sin_addr = ip->ip_src; 131 TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { 132 if (inp->inp_socket->so_state & SS_CANTRCVMORE) 133 continue; 134 #ifdef INET6 135 if (inp->inp_flags & INP_IPV6) 136 continue; 137 #endif 138 if (rtable_l2(inp->inp_rtableid) != 139 rtable_l2(m->m_pkthdr.ph_rtableid)) 140 continue; 141 142 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) 143 continue; 144 #if NPF > 0 145 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 146 struct pf_divert *divert; 147 148 /* XXX rdomain support */ 149 if ((divert = pf_find_divert(m)) == NULL) 150 continue; 151 if (!divert->addr.v4.s_addr) 152 goto divert_reply; 153 if (inp->inp_laddr.s_addr != divert->addr.v4.s_addr) 154 continue; 155 } else 156 divert_reply: 157 #endif 158 if (inp->inp_laddr.s_addr && 159 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 160 continue; 161 if (inp->inp_faddr.s_addr && 162 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 163 continue; 164 if (last) { 165 struct mbuf *n; 166 167 if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { 168 if (last->inp_flags & INP_CONTROLOPTS || 169 last->inp_socket->so_options & SO_TIMESTAMP) 170 ip_savecontrol(last, &opts, ip, n); 171 if (sbappendaddr(&last->inp_socket->so_rcv, 172 sintosa(&ripsrc), n, opts) == 0) { 173 /* should notify about lost packet */ 174 m_freem(n); 175 m_freem(opts); 176 } else 177 sorwakeup(last->inp_socket); 178 opts = NULL; 179 } 180 } 181 last = inp; 182 } 183 if (last) { 184 if (last->inp_flags & INP_CONTROLOPTS || 185 last->inp_socket->so_options & SO_TIMESTAMP) 186 ip_savecontrol(last, &opts, ip, m); 187 if (sbappendaddr(&last->inp_socket->so_rcv, sintosa(&ripsrc), m, 188 opts) == 0) { 189 m_freem(m); 190 m_freem(opts); 191 } else 192 sorwakeup(last->inp_socket); 193 } else { 194 if (ip->ip_p != IPPROTO_ICMP) 195 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); 196 else 197 m_freem(m); 198 ipstat.ips_noproto++; 199 ipstat.ips_delivered--; 200 } 201 } 202 203 /* 204 * Generate IP header and pass packet to ip_output. 205 * Tack on options user may have setup with control call. 206 */ 207 int 208 rip_output(struct mbuf *m, ...) 209 { 210 struct socket *so; 211 u_long dst; 212 struct ip *ip; 213 struct inpcb *inp; 214 int flags, error; 215 va_list ap; 216 217 va_start(ap, m); 218 so = va_arg(ap, struct socket *); 219 dst = va_arg(ap, u_long); 220 va_end(ap); 221 222 inp = sotoinpcb(so); 223 flags = IP_ALLOWBROADCAST; 224 225 /* 226 * If the user handed us a complete IP packet, use it. 227 * Otherwise, allocate an mbuf for a header and fill it in. 228 */ 229 if ((inp->inp_flags & INP_HDRINCL) == 0) { 230 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 231 m_freem(m); 232 return (EMSGSIZE); 233 } 234 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 235 if (!m) 236 return (ENOBUFS); 237 ip = mtod(m, struct ip *); 238 ip->ip_tos = inp->inp_ip.ip_tos; 239 ip->ip_off = htons(0); 240 ip->ip_p = inp->inp_ip.ip_p; 241 ip->ip_len = htons(m->m_pkthdr.len); 242 ip->ip_src = inp->inp_laddr; 243 ip->ip_dst.s_addr = dst; 244 ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL; 245 } else { 246 if (m->m_pkthdr.len > IP_MAXPACKET) { 247 m_freem(m); 248 return (EMSGSIZE); 249 } 250 if (m->m_pkthdr.len < sizeof(struct ip)) { 251 m_freem(m); 252 return (EINVAL); 253 } 254 ip = mtod(m, struct ip *); 255 /* 256 * don't allow both user specified and setsockopt options, 257 * and don't allow packet length sizes that will crash 258 */ 259 if ((ip->ip_hl != (sizeof (*ip) >> 2) && inp->inp_options) || 260 ntohs(ip->ip_len) > m->m_pkthdr.len || 261 ntohs(ip->ip_len) < ip->ip_hl << 2) { 262 m_freem(m); 263 return (EINVAL); 264 } 265 if (ip->ip_id == 0) { 266 ip->ip_id = htons(ip_randomid()); 267 } 268 /* XXX prevent ip_output from overwriting header fields */ 269 flags |= IP_RAWOUTPUT; 270 ipstat.ips_rawout++; 271 } 272 #ifdef INET6 273 /* 274 * A thought: Even though raw IP shouldn't be able to set IPv6 275 * multicast options, if it does, the last parameter to 276 * ip_output should be guarded against v6/v4 problems. 277 */ 278 #endif 279 /* force routing table */ 280 m->m_pkthdr.ph_rtableid = inp->inp_rtableid; 281 282 #if NPF > 0 283 if (inp->inp_socket->so_state & SS_ISCONNECTED && 284 ip->ip_p != IPPROTO_ICMP) 285 m->m_pkthdr.pf.inp = inp; 286 #endif 287 288 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 289 inp->inp_moptions, inp, 0); 290 if (error == EACCES) /* translate pf(4) error for userland */ 291 error = EHOSTUNREACH; 292 return (error); 293 } 294 295 /* 296 * Raw IP socket option processing. 297 */ 298 int 299 rip_ctloutput(int op, struct socket *so, int level, int optname, 300 struct mbuf **mp) 301 { 302 struct inpcb *inp = sotoinpcb(so); 303 int error = 0; 304 int dir; 305 306 if (level != IPPROTO_IP) { 307 if (op == PRCO_SETOPT) 308 (void) m_free(*mp); 309 return (EINVAL); 310 } 311 312 switch (optname) { 313 314 case IP_HDRINCL: 315 error = 0; 316 if (op == PRCO_SETOPT) { 317 if (*mp == NULL || (*mp)->m_len < sizeof (int)) 318 error = EINVAL; 319 else if (*mtod(*mp, int *)) 320 inp->inp_flags |= INP_HDRINCL; 321 else 322 inp->inp_flags &= ~INP_HDRINCL; 323 if (*mp) 324 (void)m_free(*mp); 325 } else { 326 *mp = m_get(M_WAIT, M_SOOPTS); 327 (*mp)->m_len = sizeof(int); 328 *mtod(*mp, int *) = inp->inp_flags & INP_HDRINCL; 329 } 330 return (error); 331 332 case IP_DIVERTFL: 333 switch (op) { 334 case PRCO_SETOPT: 335 if (*mp == NULL || (*mp)->m_len < sizeof (int)) { 336 error = EINVAL; 337 break; 338 } 339 dir = *mtod(*mp, int *); 340 if (inp->inp_divertfl > 0) 341 error = ENOTSUP; 342 else if ((dir & IPPROTO_DIVERT_RESP) || 343 (dir & IPPROTO_DIVERT_INIT)) 344 inp->inp_divertfl = dir; 345 else 346 error = EINVAL; 347 348 break; 349 350 case PRCO_GETOPT: 351 *mp = m_get(M_WAIT, M_SOOPTS); 352 (*mp)->m_len = sizeof(int); 353 *mtod(*mp, int *) = inp->inp_divertfl; 354 break; 355 356 default: 357 error = EINVAL; 358 break; 359 } 360 361 if (op == PRCO_SETOPT) 362 (void)m_free(*mp); 363 return (error); 364 365 case MRT_INIT: 366 case MRT_DONE: 367 case MRT_ADD_VIF: 368 case MRT_DEL_VIF: 369 case MRT_ADD_MFC: 370 case MRT_DEL_MFC: 371 case MRT_VERSION: 372 case MRT_ASSERT: 373 case MRT_API_SUPPORT: 374 case MRT_API_CONFIG: 375 #ifdef MROUTING 376 switch (op) { 377 case PRCO_SETOPT: 378 error = ip_mrouter_set(so, optname, mp); 379 break; 380 case PRCO_GETOPT: 381 error = ip_mrouter_get(so, optname, mp); 382 break; 383 default: 384 error = EINVAL; 385 break; 386 } 387 return (error); 388 #else 389 if (op == PRCO_SETOPT) 390 m_free(*mp); 391 return (EOPNOTSUPP); 392 #endif 393 } 394 return (ip_ctloutput(op, so, level, optname, mp)); 395 } 396 397 u_long rip_sendspace = RIPSNDQ; 398 u_long rip_recvspace = RIPRCVQ; 399 400 /*ARGSUSED*/ 401 int 402 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 403 struct mbuf *control, struct proc *p) 404 { 405 struct inpcb *inp = sotoinpcb(so); 406 int error = 0; 407 int s; 408 409 if (req == PRU_CONTROL) 410 return (in_control(so, (u_long)m, (caddr_t)nam, 411 (struct ifnet *)control)); 412 413 if (inp == NULL && req != PRU_ATTACH) { 414 error = EINVAL; 415 goto release; 416 } 417 418 switch (req) { 419 420 case PRU_ATTACH: 421 if (inp) 422 panic("rip_attach"); 423 if ((so->so_state & SS_PRIV) == 0) { 424 error = EACCES; 425 break; 426 } 427 if ((long)nam < 0 || (long)nam >= IPPROTO_MAX) { 428 error = EPROTONOSUPPORT; 429 break; 430 } 431 s = splsoftnet(); 432 if ((error = soreserve(so, rip_sendspace, rip_recvspace)) || 433 (error = in_pcballoc(so, &rawcbtable))) { 434 splx(s); 435 break; 436 } 437 splx(s); 438 inp = sotoinpcb(so); 439 inp->inp_ip.ip_p = (long)nam; 440 break; 441 442 case PRU_DISCONNECT: 443 if ((so->so_state & SS_ISCONNECTED) == 0) { 444 error = ENOTCONN; 445 break; 446 } 447 /* FALLTHROUGH */ 448 case PRU_ABORT: 449 soisdisconnected(so); 450 /* FALLTHROUGH */ 451 case PRU_DETACH: 452 if (inp == NULL) 453 panic("rip_detach"); 454 #ifdef MROUTING 455 if (so == ip_mrouter) 456 ip_mrouter_done(); 457 #endif 458 in_pcbdetach(inp); 459 break; 460 461 case PRU_BIND: 462 { 463 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 464 465 if (nam->m_len != sizeof(*addr)) { 466 error = EINVAL; 467 break; 468 } 469 if (addr->sin_family != AF_INET) { 470 error = EADDRNOTAVAIL; 471 break; 472 } 473 if (!((so->so_options & SO_BINDANY) || 474 addr->sin_addr.s_addr == INADDR_ANY || 475 addr->sin_addr.s_addr == INADDR_BROADCAST || 476 in_broadcast(addr->sin_addr, inp->inp_rtableid) || 477 ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) { 478 error = EADDRNOTAVAIL; 479 break; 480 } 481 inp->inp_laddr = addr->sin_addr; 482 break; 483 } 484 case PRU_CONNECT: 485 { 486 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 487 488 if (nam->m_len != sizeof(*addr)) { 489 error = EINVAL; 490 break; 491 } 492 if (addr->sin_family != AF_INET) { 493 error = EAFNOSUPPORT; 494 break; 495 } 496 inp->inp_faddr = addr->sin_addr; 497 soisconnected(so); 498 break; 499 } 500 501 case PRU_CONNECT2: 502 error = EOPNOTSUPP; 503 break; 504 505 /* 506 * Mark the connection as being incapable of further input. 507 */ 508 case PRU_SHUTDOWN: 509 socantsendmore(so); 510 break; 511 512 /* 513 * Ship a packet out. The appropriate raw output 514 * routine handles any massaging necessary. 515 */ 516 case PRU_SEND: 517 { 518 u_int32_t dst; 519 520 if (so->so_state & SS_ISCONNECTED) { 521 if (nam) { 522 error = EISCONN; 523 break; 524 } 525 dst = inp->inp_faddr.s_addr; 526 } else { 527 if (nam == NULL) { 528 error = ENOTCONN; 529 break; 530 } 531 dst = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr; 532 } 533 #ifdef IPSEC 534 /* XXX Find an IPsec TDB */ 535 #endif 536 error = rip_output(m, so, dst); 537 m = NULL; 538 break; 539 } 540 541 case PRU_SENSE: 542 /* 543 * stat: don't bother with a blocksize. 544 */ 545 return (0); 546 547 /* 548 * Not supported. 549 */ 550 case PRU_RCVOOB: 551 case PRU_RCVD: 552 case PRU_LISTEN: 553 case PRU_ACCEPT: 554 case PRU_SENDOOB: 555 error = EOPNOTSUPP; 556 break; 557 558 case PRU_SOCKADDR: 559 in_setsockaddr(inp, nam); 560 break; 561 562 case PRU_PEERADDR: 563 in_setpeeraddr(inp, nam); 564 break; 565 566 default: 567 panic("rip_usrreq"); 568 } 569 release: 570 m_freem(m); 571 return (error); 572 } 573