1 /* $OpenBSD: raw_ip.c,v 1.103 2017/09/05 07:59:11 mpi Exp $ */ 2 /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/protosw.h> 76 #include <sys/socketvar.h> 77 78 #include <net/if.h> 79 #include <net/if_var.h> 80 #include <net/route.h> 81 82 #include <netinet/in.h> 83 #include <netinet/ip.h> 84 #include <netinet/ip_mroute.h> 85 #include <netinet/ip_var.h> 86 #include <netinet/in_pcb.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip_icmp.h> 89 90 #include <net/pfvar.h> 91 92 #include "pf.h" 93 94 struct inpcbtable rawcbtable; 95 96 /* 97 * Nominal space allocated to a raw ip socket. 98 */ 99 #define RIPSNDQ 8192 100 #define RIPRCVQ 8192 101 102 /* 103 * Raw interface to IP protocol. 104 */ 105 106 /* 107 * Initialize raw connection block q. 108 */ 109 void 110 rip_init(void) 111 { 112 113 in_pcbinit(&rawcbtable, 1); 114 } 115 116 struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; 117 118 int 119 rip_input(struct mbuf **mp, int *offp, int proto, int af) 120 { 121 struct mbuf *m = *mp; 122 struct ip *ip = mtod(m, struct ip *); 123 struct inpcb *inp, *last = NULL; 124 struct mbuf *opts = NULL; 125 struct counters_ref ref; 126 uint64_t *counters; 127 128 KASSERT(af == AF_INET); 129 130 ripsrc.sin_addr = ip->ip_src; 131 TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { 132 if (inp->inp_socket->so_state & SS_CANTRCVMORE) 133 continue; 134 #ifdef INET6 135 if (inp->inp_flags & INP_IPV6) 136 continue; 137 #endif 138 if (rtable_l2(inp->inp_rtableid) != 139 rtable_l2(m->m_pkthdr.ph_rtableid)) 140 continue; 141 142 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) 143 continue; 144 #if NPF > 0 145 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 146 struct pf_divert *divert; 147 148 /* XXX rdomain support */ 149 if ((divert = pf_find_divert(m)) == NULL) 150 continue; 151 if (!divert->addr.v4.s_addr) 152 goto divert_reply; 153 if (inp->inp_laddr.s_addr != divert->addr.v4.s_addr) 154 continue; 155 } else 156 divert_reply: 157 #endif 158 if (inp->inp_laddr.s_addr && 159 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 160 continue; 161 if (inp->inp_faddr.s_addr && 162 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 163 continue; 164 if (last) { 165 struct mbuf *n; 166 167 if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { 168 if (last->inp_flags & INP_CONTROLOPTS || 169 last->inp_socket->so_options & SO_TIMESTAMP) 170 ip_savecontrol(last, &opts, ip, n); 171 if (sbappendaddr(last->inp_socket, 172 &last->inp_socket->so_rcv, 173 sintosa(&ripsrc), n, opts) == 0) { 174 /* should notify about lost packet */ 175 m_freem(n); 176 m_freem(opts); 177 } else 178 sorwakeup(last->inp_socket); 179 opts = NULL; 180 } 181 } 182 last = inp; 183 } 184 if (last) { 185 if (last->inp_flags & INP_CONTROLOPTS || 186 last->inp_socket->so_options & SO_TIMESTAMP) 187 ip_savecontrol(last, &opts, ip, m); 188 if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv, 189 sintosa(&ripsrc), m, opts) == 0) { 190 m_freem(m); 191 m_freem(opts); 192 } else 193 sorwakeup(last->inp_socket); 194 } else { 195 if (ip->ip_p != IPPROTO_ICMP) 196 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); 197 else 198 m_freem(m); 199 200 counters = counters_enter(&ref, ipcounters); 201 counters[ips_noproto]++; 202 counters[ips_delivered]--; 203 counters_leave(&ref, ipcounters); 204 } 205 return IPPROTO_DONE; 206 } 207 208 /* 209 * Generate IP header and pass packet to ip_output. 210 * Tack on options user may have setup with control call. 211 */ 212 int 213 rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 214 struct mbuf *control) 215 { 216 struct ip *ip; 217 struct inpcb *inp; 218 int flags, error; 219 220 inp = sotoinpcb(so); 221 flags = IP_ALLOWBROADCAST; 222 223 /* 224 * If the user handed us a complete IP packet, use it. 225 * Otherwise, allocate an mbuf for a header and fill it in. 226 */ 227 if ((inp->inp_flags & INP_HDRINCL) == 0) { 228 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 229 m_freem(m); 230 return (EMSGSIZE); 231 } 232 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 233 if (!m) 234 return (ENOBUFS); 235 ip = mtod(m, struct ip *); 236 ip->ip_tos = inp->inp_ip.ip_tos; 237 ip->ip_off = htons(0); 238 ip->ip_p = inp->inp_ip.ip_p; 239 ip->ip_len = htons(m->m_pkthdr.len); 240 ip->ip_src = inp->inp_laddr; 241 ip->ip_dst = satosin(dstaddr)->sin_addr; 242 ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL; 243 } else { 244 if (m->m_pkthdr.len > IP_MAXPACKET) { 245 m_freem(m); 246 return (EMSGSIZE); 247 } 248 if (m->m_pkthdr.len < sizeof(struct ip)) { 249 m_freem(m); 250 return (EINVAL); 251 } 252 ip = mtod(m, struct ip *); 253 /* 254 * don't allow both user specified and setsockopt options, 255 * and don't allow packet length sizes that will crash 256 */ 257 if ((ip->ip_hl != (sizeof (*ip) >> 2) && inp->inp_options) || 258 ntohs(ip->ip_len) > m->m_pkthdr.len || 259 ntohs(ip->ip_len) < ip->ip_hl << 2) { 260 m_freem(m); 261 return (EINVAL); 262 } 263 if (ip->ip_id == 0) { 264 ip->ip_id = htons(ip_randomid()); 265 } 266 /* XXX prevent ip_output from overwriting header fields */ 267 flags |= IP_RAWOUTPUT; 268 ipstat_inc(ips_rawout); 269 } 270 #ifdef INET6 271 /* 272 * A thought: Even though raw IP shouldn't be able to set IPv6 273 * multicast options, if it does, the last parameter to 274 * ip_output should be guarded against v6/v4 problems. 275 */ 276 #endif 277 /* force routing table */ 278 m->m_pkthdr.ph_rtableid = inp->inp_rtableid; 279 280 #if NPF > 0 281 if (inp->inp_socket->so_state & SS_ISCONNECTED && 282 ip->ip_p != IPPROTO_ICMP) 283 m->m_pkthdr.pf.inp = inp; 284 #endif 285 286 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 287 inp->inp_moptions, inp, 0); 288 if (error == EACCES) /* translate pf(4) error for userland */ 289 error = EHOSTUNREACH; 290 return (error); 291 } 292 293 /* 294 * Raw IP socket option processing. 295 */ 296 int 297 rip_ctloutput(int op, struct socket *so, int level, int optname, 298 struct mbuf *m) 299 { 300 struct inpcb *inp = sotoinpcb(so); 301 int error = 0; 302 int dir; 303 304 if (level != IPPROTO_IP) 305 return (EINVAL); 306 307 switch (optname) { 308 309 case IP_HDRINCL: 310 error = 0; 311 if (op == PRCO_SETOPT) { 312 if (m == NULL || m->m_len < sizeof (int)) 313 error = EINVAL; 314 else if (*mtod(m, int *)) 315 inp->inp_flags |= INP_HDRINCL; 316 else 317 inp->inp_flags &= ~INP_HDRINCL; 318 } else { 319 m->m_len = sizeof(int); 320 *mtod(m, int *) = inp->inp_flags & INP_HDRINCL; 321 } 322 return (error); 323 324 case IP_DIVERTFL: 325 switch (op) { 326 case PRCO_SETOPT: 327 if (m == NULL || m->m_len < sizeof (int)) { 328 error = EINVAL; 329 break; 330 } 331 dir = *mtod(m, int *); 332 if (inp->inp_divertfl > 0) 333 error = ENOTSUP; 334 else if ((dir & IPPROTO_DIVERT_RESP) || 335 (dir & IPPROTO_DIVERT_INIT)) 336 inp->inp_divertfl = dir; 337 else 338 error = EINVAL; 339 340 break; 341 342 case PRCO_GETOPT: 343 m->m_len = sizeof(int); 344 *mtod(m, int *) = inp->inp_divertfl; 345 break; 346 347 default: 348 error = EINVAL; 349 break; 350 } 351 352 return (error); 353 354 case MRT_INIT: 355 case MRT_DONE: 356 case MRT_ADD_VIF: 357 case MRT_DEL_VIF: 358 case MRT_ADD_MFC: 359 case MRT_DEL_MFC: 360 case MRT_VERSION: 361 case MRT_ASSERT: 362 case MRT_API_SUPPORT: 363 case MRT_API_CONFIG: 364 #ifdef MROUTING 365 switch (op) { 366 case PRCO_SETOPT: 367 error = ip_mrouter_set(so, optname, m); 368 break; 369 case PRCO_GETOPT: 370 error = ip_mrouter_get(so, optname, m); 371 break; 372 default: 373 error = EINVAL; 374 break; 375 } 376 return (error); 377 #else 378 return (EOPNOTSUPP); 379 #endif 380 } 381 return (ip_ctloutput(op, so, level, optname, m)); 382 } 383 384 u_long rip_sendspace = RIPSNDQ; 385 u_long rip_recvspace = RIPRCVQ; 386 387 /*ARGSUSED*/ 388 int 389 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 390 struct mbuf *control, struct proc *p) 391 { 392 struct inpcb *inp = sotoinpcb(so); 393 int error = 0; 394 395 soassertlocked(so); 396 397 if (req == PRU_CONTROL) 398 return (in_control(so, (u_long)m, (caddr_t)nam, 399 (struct ifnet *)control)); 400 401 if (inp == NULL) { 402 error = EINVAL; 403 goto release; 404 } 405 406 switch (req) { 407 408 case PRU_DISCONNECT: 409 if ((so->so_state & SS_ISCONNECTED) == 0) { 410 error = ENOTCONN; 411 break; 412 } 413 /* FALLTHROUGH */ 414 case PRU_ABORT: 415 soisdisconnected(so); 416 /* FALLTHROUGH */ 417 case PRU_DETACH: 418 if (inp == NULL) 419 panic("rip_detach"); 420 #ifdef MROUTING 421 if (so == ip_mrouter[inp->inp_rtableid]) 422 ip_mrouter_done(so); 423 #endif 424 in_pcbdetach(inp); 425 break; 426 427 case PRU_BIND: 428 { 429 struct sockaddr_in *addr; 430 431 if ((error = in_nam2sin(nam, &addr))) 432 break; 433 if (!((so->so_options & SO_BINDANY) || 434 addr->sin_addr.s_addr == INADDR_ANY || 435 addr->sin_addr.s_addr == INADDR_BROADCAST || 436 in_broadcast(addr->sin_addr, inp->inp_rtableid) || 437 ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) { 438 error = EADDRNOTAVAIL; 439 break; 440 } 441 inp->inp_laddr = addr->sin_addr; 442 break; 443 } 444 case PRU_CONNECT: 445 { 446 struct sockaddr_in *addr; 447 448 if ((error = in_nam2sin(nam, &addr))) 449 break; 450 inp->inp_faddr = addr->sin_addr; 451 soisconnected(so); 452 break; 453 } 454 455 case PRU_CONNECT2: 456 error = EOPNOTSUPP; 457 break; 458 459 /* 460 * Mark the connection as being incapable of further input. 461 */ 462 case PRU_SHUTDOWN: 463 socantsendmore(so); 464 break; 465 466 /* 467 * Ship a packet out. The appropriate raw output 468 * routine handles any massaging necessary. 469 */ 470 case PRU_SEND: 471 { 472 struct sockaddr_in dst; 473 474 memset(&dst, 0, sizeof(dst)); 475 dst.sin_family = AF_INET; 476 dst.sin_len = sizeof(dst); 477 if (so->so_state & SS_ISCONNECTED) { 478 if (nam) { 479 error = EISCONN; 480 break; 481 } 482 dst.sin_addr = inp->inp_faddr; 483 } else { 484 struct sockaddr_in *addr; 485 486 if (nam == NULL) { 487 error = ENOTCONN; 488 break; 489 } 490 if ((error = in_nam2sin(nam, &addr))) 491 break; 492 dst.sin_addr = addr->sin_addr; 493 } 494 #ifdef IPSEC 495 /* XXX Find an IPsec TDB */ 496 #endif 497 error = rip_output(m, so, sintosa(&dst), NULL); 498 m = NULL; 499 break; 500 } 501 502 case PRU_SENSE: 503 /* 504 * stat: don't bother with a blocksize. 505 */ 506 return (0); 507 508 /* 509 * Not supported. 510 */ 511 case PRU_RCVOOB: 512 case PRU_RCVD: 513 case PRU_LISTEN: 514 case PRU_ACCEPT: 515 case PRU_SENDOOB: 516 error = EOPNOTSUPP; 517 break; 518 519 case PRU_SOCKADDR: 520 in_setsockaddr(inp, nam); 521 break; 522 523 case PRU_PEERADDR: 524 in_setpeeraddr(inp, nam); 525 break; 526 527 default: 528 panic("rip_usrreq"); 529 } 530 release: 531 m_freem(m); 532 return (error); 533 } 534 535 int 536 rip_attach(struct socket *so, int proto) 537 { 538 struct inpcb *inp; 539 int error; 540 541 if (so->so_pcb) 542 panic("rip_attach"); 543 if ((so->so_state & SS_PRIV) == 0) 544 return EACCES; 545 if (proto < 0 || proto >= IPPROTO_MAX) 546 return EPROTONOSUPPORT; 547 548 if ((error = soreserve(so, rip_sendspace, rip_recvspace)) || 549 (error = in_pcballoc(so, &rawcbtable))) { 550 return error; 551 } 552 inp = sotoinpcb(so); 553 inp->inp_ip.ip_p = proto; 554 return 0; 555 } 556