1 /* $OpenBSD: raw_ip.c,v 1.96 2017/03/03 15:48:02 bluhm Exp $ */ 2 /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/protosw.h> 76 #include <sys/socketvar.h> 77 78 #include <net/if.h> 79 #include <net/if_var.h> 80 #include <net/route.h> 81 82 #include <netinet/in.h> 83 #include <netinet/ip.h> 84 #include <netinet/ip_mroute.h> 85 #include <netinet/ip_var.h> 86 #include <netinet/in_pcb.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip_icmp.h> 89 90 #include <net/pfvar.h> 91 92 #include "pf.h" 93 94 struct inpcbtable rawcbtable; 95 96 /* 97 * Nominal space allocated to a raw ip socket. 98 */ 99 #define RIPSNDQ 8192 100 #define RIPRCVQ 8192 101 102 /* 103 * Raw interface to IP protocol. 104 */ 105 106 /* 107 * Initialize raw connection block q. 108 */ 109 void 110 rip_init(void) 111 { 112 113 in_pcbinit(&rawcbtable, 1); 114 } 115 116 struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; 117 118 int 119 rip_input(struct mbuf **mp, int *offp, int proto) 120 { 121 struct mbuf *m = *mp; 122 struct ip *ip = mtod(m, struct ip *); 123 struct inpcb *inp, *last = NULL; 124 struct mbuf *opts = NULL; 125 struct counters_ref ref; 126 uint64_t *counters; 127 128 ripsrc.sin_addr = ip->ip_src; 129 TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { 130 if (inp->inp_socket->so_state & SS_CANTRCVMORE) 131 continue; 132 #ifdef INET6 133 if (inp->inp_flags & INP_IPV6) 134 continue; 135 #endif 136 if (rtable_l2(inp->inp_rtableid) != 137 rtable_l2(m->m_pkthdr.ph_rtableid)) 138 continue; 139 140 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) 141 continue; 142 #if NPF > 0 143 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 144 struct pf_divert *divert; 145 146 /* XXX rdomain support */ 147 if ((divert = pf_find_divert(m)) == NULL) 148 continue; 149 if (!divert->addr.v4.s_addr) 150 goto divert_reply; 151 if (inp->inp_laddr.s_addr != divert->addr.v4.s_addr) 152 continue; 153 } else 154 divert_reply: 155 #endif 156 if (inp->inp_laddr.s_addr && 157 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 158 continue; 159 if (inp->inp_faddr.s_addr && 160 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 161 continue; 162 if (last) { 163 struct mbuf *n; 164 165 if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { 166 if (last->inp_flags & INP_CONTROLOPTS || 167 last->inp_socket->so_options & SO_TIMESTAMP) 168 ip_savecontrol(last, &opts, ip, n); 169 if (sbappendaddr(&last->inp_socket->so_rcv, 170 sintosa(&ripsrc), n, opts) == 0) { 171 /* should notify about lost packet */ 172 m_freem(n); 173 m_freem(opts); 174 } else 175 sorwakeup(last->inp_socket); 176 opts = NULL; 177 } 178 } 179 last = inp; 180 } 181 if (last) { 182 if (last->inp_flags & INP_CONTROLOPTS || 183 last->inp_socket->so_options & SO_TIMESTAMP) 184 ip_savecontrol(last, &opts, ip, m); 185 if (sbappendaddr(&last->inp_socket->so_rcv, sintosa(&ripsrc), m, 186 opts) == 0) { 187 m_freem(m); 188 m_freem(opts); 189 } else 190 sorwakeup(last->inp_socket); 191 } else { 192 if (ip->ip_p != IPPROTO_ICMP) 193 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); 194 else 195 m_freem(m); 196 197 counters = counters_enter(&ref, ipcounters); 198 counters[ips_noproto]++; 199 counters[ips_delivered]--; 200 counters_leave(&ref, ipcounters); 201 } 202 return IPPROTO_DONE; 203 } 204 205 /* 206 * Generate IP header and pass packet to ip_output. 207 * Tack on options user may have setup with control call. 208 */ 209 int 210 rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 211 struct mbuf *control) 212 { 213 struct ip *ip; 214 struct inpcb *inp; 215 int flags, error; 216 217 inp = sotoinpcb(so); 218 flags = IP_ALLOWBROADCAST; 219 220 /* 221 * If the user handed us a complete IP packet, use it. 222 * Otherwise, allocate an mbuf for a header and fill it in. 223 */ 224 if ((inp->inp_flags & INP_HDRINCL) == 0) { 225 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 226 m_freem(m); 227 return (EMSGSIZE); 228 } 229 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 230 if (!m) 231 return (ENOBUFS); 232 ip = mtod(m, struct ip *); 233 ip->ip_tos = inp->inp_ip.ip_tos; 234 ip->ip_off = htons(0); 235 ip->ip_p = inp->inp_ip.ip_p; 236 ip->ip_len = htons(m->m_pkthdr.len); 237 ip->ip_src = inp->inp_laddr; 238 ip->ip_dst = satosin(dstaddr)->sin_addr; 239 ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL; 240 } else { 241 if (m->m_pkthdr.len > IP_MAXPACKET) { 242 m_freem(m); 243 return (EMSGSIZE); 244 } 245 if (m->m_pkthdr.len < sizeof(struct ip)) { 246 m_freem(m); 247 return (EINVAL); 248 } 249 ip = mtod(m, struct ip *); 250 /* 251 * don't allow both user specified and setsockopt options, 252 * and don't allow packet length sizes that will crash 253 */ 254 if ((ip->ip_hl != (sizeof (*ip) >> 2) && inp->inp_options) || 255 ntohs(ip->ip_len) > m->m_pkthdr.len || 256 ntohs(ip->ip_len) < ip->ip_hl << 2) { 257 m_freem(m); 258 return (EINVAL); 259 } 260 if (ip->ip_id == 0) { 261 ip->ip_id = htons(ip_randomid()); 262 } 263 /* XXX prevent ip_output from overwriting header fields */ 264 flags |= IP_RAWOUTPUT; 265 ipstat_inc(ips_rawout); 266 } 267 #ifdef INET6 268 /* 269 * A thought: Even though raw IP shouldn't be able to set IPv6 270 * multicast options, if it does, the last parameter to 271 * ip_output should be guarded against v6/v4 problems. 272 */ 273 #endif 274 /* force routing table */ 275 m->m_pkthdr.ph_rtableid = inp->inp_rtableid; 276 277 #if NPF > 0 278 if (inp->inp_socket->so_state & SS_ISCONNECTED && 279 ip->ip_p != IPPROTO_ICMP) 280 m->m_pkthdr.pf.inp = inp; 281 #endif 282 283 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 284 inp->inp_moptions, inp, 0); 285 if (error == EACCES) /* translate pf(4) error for userland */ 286 error = EHOSTUNREACH; 287 return (error); 288 } 289 290 /* 291 * Raw IP socket option processing. 292 */ 293 int 294 rip_ctloutput(int op, struct socket *so, int level, int optname, 295 struct mbuf *m) 296 { 297 struct inpcb *inp = sotoinpcb(so); 298 int error = 0; 299 int dir; 300 301 if (level != IPPROTO_IP) { 302 if (op == PRCO_SETOPT) 303 (void) m_free(m); 304 return (EINVAL); 305 } 306 307 switch (optname) { 308 309 case IP_HDRINCL: 310 error = 0; 311 if (op == PRCO_SETOPT) { 312 if (m == NULL || m->m_len < sizeof (int)) 313 error = EINVAL; 314 else if (*mtod(m, int *)) 315 inp->inp_flags |= INP_HDRINCL; 316 else 317 inp->inp_flags &= ~INP_HDRINCL; 318 m_free(m); 319 } else { 320 m->m_len = sizeof(int); 321 *mtod(m, int *) = inp->inp_flags & INP_HDRINCL; 322 } 323 return (error); 324 325 case IP_DIVERTFL: 326 switch (op) { 327 case PRCO_SETOPT: 328 if (m == NULL || m->m_len < sizeof (int)) { 329 error = EINVAL; 330 break; 331 } 332 dir = *mtod(m, int *); 333 if (inp->inp_divertfl > 0) 334 error = ENOTSUP; 335 else if ((dir & IPPROTO_DIVERT_RESP) || 336 (dir & IPPROTO_DIVERT_INIT)) 337 inp->inp_divertfl = dir; 338 else 339 error = EINVAL; 340 341 break; 342 343 case PRCO_GETOPT: 344 m->m_len = sizeof(int); 345 *mtod(m, int *) = inp->inp_divertfl; 346 break; 347 348 default: 349 error = EINVAL; 350 break; 351 } 352 353 if (op == PRCO_SETOPT) 354 (void)m_free(m); 355 return (error); 356 357 case MRT_INIT: 358 case MRT_DONE: 359 case MRT_ADD_VIF: 360 case MRT_DEL_VIF: 361 case MRT_ADD_MFC: 362 case MRT_DEL_MFC: 363 case MRT_VERSION: 364 case MRT_ASSERT: 365 case MRT_API_SUPPORT: 366 case MRT_API_CONFIG: 367 #ifdef MROUTING 368 switch (op) { 369 case PRCO_SETOPT: 370 error = ip_mrouter_set(so, optname, m); 371 break; 372 case PRCO_GETOPT: 373 error = ip_mrouter_get(so, optname, m); 374 break; 375 default: 376 error = EINVAL; 377 break; 378 } 379 return (error); 380 #else 381 if (op == PRCO_SETOPT) 382 m_free(m); 383 return (EOPNOTSUPP); 384 #endif 385 } 386 return (ip_ctloutput(op, so, level, optname, m)); 387 } 388 389 u_long rip_sendspace = RIPSNDQ; 390 u_long rip_recvspace = RIPRCVQ; 391 392 /*ARGSUSED*/ 393 int 394 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 395 struct mbuf *control, struct proc *p) 396 { 397 struct inpcb *inp = sotoinpcb(so); 398 int error = 0; 399 400 NET_ASSERT_LOCKED(); 401 402 if (req == PRU_CONTROL) 403 return (in_control(so, (u_long)m, (caddr_t)nam, 404 (struct ifnet *)control)); 405 406 if (inp == NULL && req != PRU_ATTACH) { 407 error = EINVAL; 408 goto release; 409 } 410 411 switch (req) { 412 413 case PRU_ATTACH: 414 if (inp) 415 panic("rip_attach"); 416 if ((so->so_state & SS_PRIV) == 0) { 417 error = EACCES; 418 break; 419 } 420 if ((long)nam < 0 || (long)nam >= IPPROTO_MAX) { 421 error = EPROTONOSUPPORT; 422 break; 423 } 424 if ((error = soreserve(so, rip_sendspace, rip_recvspace)) || 425 (error = in_pcballoc(so, &rawcbtable))) { 426 break; 427 } 428 inp = sotoinpcb(so); 429 inp->inp_ip.ip_p = (long)nam; 430 break; 431 432 case PRU_DISCONNECT: 433 if ((so->so_state & SS_ISCONNECTED) == 0) { 434 error = ENOTCONN; 435 break; 436 } 437 /* FALLTHROUGH */ 438 case PRU_ABORT: 439 soisdisconnected(so); 440 /* FALLTHROUGH */ 441 case PRU_DETACH: 442 if (inp == NULL) 443 panic("rip_detach"); 444 #ifdef MROUTING 445 if (so == ip_mrouter[inp->inp_rtableid]) 446 ip_mrouter_done(so); 447 #endif 448 in_pcbdetach(inp); 449 break; 450 451 case PRU_BIND: 452 { 453 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 454 455 if (nam->m_len != sizeof(*addr)) { 456 error = EINVAL; 457 break; 458 } 459 if (addr->sin_family != AF_INET) { 460 error = EADDRNOTAVAIL; 461 break; 462 } 463 if (!((so->so_options & SO_BINDANY) || 464 addr->sin_addr.s_addr == INADDR_ANY || 465 addr->sin_addr.s_addr == INADDR_BROADCAST || 466 in_broadcast(addr->sin_addr, inp->inp_rtableid) || 467 ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) { 468 error = EADDRNOTAVAIL; 469 break; 470 } 471 inp->inp_laddr = addr->sin_addr; 472 break; 473 } 474 case PRU_CONNECT: 475 { 476 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 477 478 if (nam->m_len != sizeof(*addr)) { 479 error = EINVAL; 480 break; 481 } 482 if (addr->sin_family != AF_INET) { 483 error = EAFNOSUPPORT; 484 break; 485 } 486 inp->inp_faddr = addr->sin_addr; 487 soisconnected(so); 488 break; 489 } 490 491 case PRU_CONNECT2: 492 error = EOPNOTSUPP; 493 break; 494 495 /* 496 * Mark the connection as being incapable of further input. 497 */ 498 case PRU_SHUTDOWN: 499 socantsendmore(so); 500 break; 501 502 /* 503 * Ship a packet out. The appropriate raw output 504 * routine handles any massaging necessary. 505 */ 506 case PRU_SEND: 507 { 508 struct sockaddr_in dst; 509 510 memset(&dst, 0, sizeof(dst)); 511 dst.sin_family = AF_INET; 512 dst.sin_len = sizeof(dst); 513 if (so->so_state & SS_ISCONNECTED) { 514 if (nam) { 515 error = EISCONN; 516 break; 517 } 518 dst.sin_addr = inp->inp_faddr; 519 } else { 520 if (nam == NULL) { 521 error = ENOTCONN; 522 break; 523 } 524 dst.sin_addr = 525 mtod(nam, struct sockaddr_in *)->sin_addr; 526 } 527 #ifdef IPSEC 528 /* XXX Find an IPsec TDB */ 529 #endif 530 error = rip_output(m, so, sintosa(&dst), NULL); 531 m = NULL; 532 break; 533 } 534 535 case PRU_SENSE: 536 /* 537 * stat: don't bother with a blocksize. 538 */ 539 return (0); 540 541 /* 542 * Not supported. 543 */ 544 case PRU_RCVOOB: 545 case PRU_RCVD: 546 case PRU_LISTEN: 547 case PRU_ACCEPT: 548 case PRU_SENDOOB: 549 error = EOPNOTSUPP; 550 break; 551 552 case PRU_SOCKADDR: 553 in_setsockaddr(inp, nam); 554 break; 555 556 case PRU_PEERADDR: 557 in_setpeeraddr(inp, nam); 558 break; 559 560 default: 561 panic("rip_usrreq"); 562 } 563 release: 564 m_freem(m); 565 return (error); 566 } 567