1 /* $OpenBSD: raw_ip.c,v 1.61 2012/03/17 10:16:41 dlg Exp $ */ 2 /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/protosw.h> 76 #include <sys/socketvar.h> 77 78 #include <net/if.h> 79 #include <net/route.h> 80 #include <net/pfvar.h> 81 82 #include <netinet/in.h> 83 #include <netinet/in_systm.h> 84 #include <netinet/ip.h> 85 #include <netinet/ip_mroute.h> 86 #include <netinet/ip_var.h> 87 #include <netinet/in_pcb.h> 88 #include <netinet/in_var.h> 89 #include <netinet/ip_icmp.h> 90 91 #include "pf.h" 92 93 struct inpcbtable rawcbtable; 94 95 /* 96 * Nominal space allocated to a raw ip socket. 97 */ 98 #define RIPSNDQ 8192 99 #define RIPRCVQ 8192 100 101 /* 102 * Raw interface to IP protocol. 103 */ 104 105 /* 106 * Initialize raw connection block q. 107 */ 108 void 109 rip_init() 110 { 111 112 in_pcbinit(&rawcbtable, 1); 113 } 114 115 struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; 116 117 /* 118 * Setup generic address and protocol structures 119 * for raw_input routine, then pass them along with 120 * mbuf chain. 121 */ 122 void 123 rip_input(struct mbuf *m, ...) 124 { 125 struct ip *ip = mtod(m, struct ip *); 126 struct inpcb *inp, *last = NULL; 127 struct mbuf *opts = NULL; 128 129 ripsrc.sin_addr = ip->ip_src; 130 CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { 131 if (inp->inp_socket->so_state & SS_CANTRCVMORE) 132 continue; 133 #ifdef INET6 134 if (inp->inp_flags & INP_IPV6) 135 continue; 136 #endif 137 if (rtable_l2(inp->inp_rtableid) != 138 rtable_l2(m->m_pkthdr.rdomain)) 139 continue; 140 141 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) 142 continue; 143 #if NPF > 0 144 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 145 struct pf_divert *divert; 146 147 /* XXX rdomain support */ 148 if ((divert = pf_find_divert(m)) == NULL) 149 continue; 150 if (inp->inp_laddr.s_addr != divert->addr.v4.s_addr) 151 continue; 152 } else 153 #endif 154 if (inp->inp_laddr.s_addr && 155 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 156 continue; 157 if (inp->inp_faddr.s_addr && 158 inp->inp_faddr.s_addr != ip->ip_src.s_addr) 159 continue; 160 if (last) { 161 struct mbuf *n; 162 163 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { 164 if (last->inp_flags & INP_CONTROLOPTS || 165 last->inp_socket->so_options & SO_TIMESTAMP) 166 ip_savecontrol(last, &opts, ip, n); 167 if (sbappendaddr(&last->inp_socket->so_rcv, 168 sintosa(&ripsrc), n, opts) == 0) { 169 /* should notify about lost packet */ 170 m_freem(n); 171 if (opts) 172 m_freem(opts); 173 } else 174 sorwakeup(last->inp_socket); 175 opts = NULL; 176 } 177 } 178 last = inp; 179 } 180 if (last) { 181 if (last->inp_flags & INP_CONTROLOPTS || 182 last->inp_socket->so_options & SO_TIMESTAMP) 183 ip_savecontrol(last, &opts, ip, m); 184 if (sbappendaddr(&last->inp_socket->so_rcv, sintosa(&ripsrc), m, 185 opts) == 0) { 186 m_freem(m); 187 if (opts) 188 m_freem(opts); 189 } else 190 sorwakeup(last->inp_socket); 191 } else { 192 if (ip->ip_p != IPPROTO_ICMP) 193 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); 194 else 195 m_freem(m); 196 ipstat.ips_noproto++; 197 ipstat.ips_delivered--; 198 } 199 } 200 201 /* 202 * Generate IP header and pass packet to ip_output. 203 * Tack on options user may have setup with control call. 204 */ 205 int 206 rip_output(struct mbuf *m, ...) 207 { 208 struct socket *so; 209 u_long dst; 210 struct ip *ip; 211 struct inpcb *inp; 212 int flags, error; 213 va_list ap; 214 215 va_start(ap, m); 216 so = va_arg(ap, struct socket *); 217 dst = va_arg(ap, u_long); 218 va_end(ap); 219 220 inp = sotoinpcb(so); 221 flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST; 222 223 /* 224 * If the user handed us a complete IP packet, use it. 225 * Otherwise, allocate an mbuf for a header and fill it in. 226 */ 227 if ((inp->inp_flags & INP_HDRINCL) == 0) { 228 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { 229 m_freem(m); 230 return (EMSGSIZE); 231 } 232 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 233 if (!m) 234 return (ENOBUFS); 235 ip = mtod(m, struct ip *); 236 ip->ip_tos = inp->inp_ip.ip_tos; 237 ip->ip_off = htons(0); 238 ip->ip_p = inp->inp_ip.ip_p; 239 ip->ip_len = htons(m->m_pkthdr.len); 240 ip->ip_src = inp->inp_laddr; 241 ip->ip_dst.s_addr = dst; 242 ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL; 243 } else { 244 if (m->m_pkthdr.len > IP_MAXPACKET) { 245 m_freem(m); 246 return (EMSGSIZE); 247 } 248 if (m->m_pkthdr.len < sizeof(struct ip)) { 249 m_freem(m); 250 return (EINVAL); 251 } 252 ip = mtod(m, struct ip *); 253 /* 254 * don't allow both user specified and setsockopt options, 255 * and don't allow packet length sizes that will crash 256 */ 257 if ((ip->ip_hl != (sizeof (*ip) >> 2) && inp->inp_options) || 258 ntohs(ip->ip_len) > m->m_pkthdr.len || 259 ntohs(ip->ip_len) < ip->ip_hl << 2) { 260 m_freem(m); 261 return (EINVAL); 262 } 263 if (ip->ip_id == 0) { 264 ip->ip_id = htons(ip_randomid()); 265 } 266 /* XXX prevent ip_output from overwriting header fields */ 267 flags |= IP_RAWOUTPUT; 268 ipstat.ips_rawout++; 269 } 270 #ifdef INET6 271 /* 272 * A thought: Even though raw IP shouldn't be able to set IPv6 273 * multicast options, if it does, the last parameter to 274 * ip_output should be guarded against v6/v4 problems. 275 */ 276 #endif 277 /* force routing domain */ 278 m->m_pkthdr.rdomain = inp->inp_rtableid; 279 280 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 281 inp->inp_moptions, inp); 282 if (error == EACCES) /* translate pf(4) error for userland */ 283 error = EHOSTUNREACH; 284 return (error); 285 } 286 287 /* 288 * Raw IP socket option processing. 289 */ 290 int 291 rip_ctloutput(int op, struct socket *so, int level, int optname, 292 struct mbuf **m) 293 { 294 struct inpcb *inp = sotoinpcb(so); 295 int error; 296 297 if (level != IPPROTO_IP) { 298 if (op == PRCO_SETOPT && *m) 299 (void) m_free(*m); 300 return (EINVAL); 301 } 302 303 switch (optname) { 304 305 case IP_HDRINCL: 306 error = 0; 307 if (op == PRCO_SETOPT) { 308 if (*m == 0 || (*m)->m_len < sizeof (int)) 309 error = EINVAL; 310 else if (*mtod(*m, int *)) 311 inp->inp_flags |= INP_HDRINCL; 312 else 313 inp->inp_flags &= ~INP_HDRINCL; 314 if (*m) 315 (void)m_free(*m); 316 } else { 317 *m = m_get(M_WAIT, M_SOOPTS); 318 (*m)->m_len = sizeof(int); 319 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL; 320 } 321 return (error); 322 323 case MRT_INIT: 324 case MRT_DONE: 325 case MRT_ADD_VIF: 326 case MRT_DEL_VIF: 327 case MRT_ADD_MFC: 328 case MRT_DEL_MFC: 329 case MRT_VERSION: 330 case MRT_ASSERT: 331 case MRT_API_SUPPORT: 332 case MRT_API_CONFIG: 333 case MRT_ADD_BW_UPCALL: 334 case MRT_DEL_BW_UPCALL: 335 #ifdef MROUTING 336 switch (op) { 337 case PRCO_SETOPT: 338 error = ip_mrouter_set(so, optname, m); 339 break; 340 case PRCO_GETOPT: 341 error = ip_mrouter_get(so, optname, m); 342 break; 343 default: 344 error = EINVAL; 345 break; 346 } 347 return (error); 348 #else 349 if (op == PRCO_SETOPT && *m) 350 m_free(*m); 351 return (EOPNOTSUPP); 352 #endif 353 } 354 return (ip_ctloutput(op, so, level, optname, m)); 355 } 356 357 u_long rip_sendspace = RIPSNDQ; 358 u_long rip_recvspace = RIPRCVQ; 359 360 /*ARGSUSED*/ 361 int 362 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 363 struct mbuf *control, struct proc *p) 364 { 365 int error = 0; 366 struct inpcb *inp = sotoinpcb(so); 367 #ifdef MROUTING 368 extern struct socket *ip_mrouter; 369 #endif 370 if (req == PRU_CONTROL) 371 return (in_control(so, (u_long)m, (caddr_t)nam, 372 (struct ifnet *)control)); 373 374 if (inp == NULL && req != PRU_ATTACH) { 375 error = EINVAL; 376 goto release; 377 } 378 379 switch (req) { 380 381 case PRU_ATTACH: 382 if (inp) 383 panic("rip_attach"); 384 if ((so->so_state & SS_PRIV) == 0) { 385 error = EACCES; 386 break; 387 } 388 if ((error = soreserve(so, rip_sendspace, rip_recvspace)) || 389 (error = in_pcballoc(so, &rawcbtable))) 390 break; 391 inp = (struct inpcb *)so->so_pcb; 392 inp->inp_ip.ip_p = (long)nam; 393 break; 394 395 case PRU_DISCONNECT: 396 if ((so->so_state & SS_ISCONNECTED) == 0) { 397 error = ENOTCONN; 398 break; 399 } 400 /* FALLTHROUGH */ 401 case PRU_ABORT: 402 soisdisconnected(so); 403 /* FALLTHROUGH */ 404 case PRU_DETACH: 405 if (inp == 0) 406 panic("rip_detach"); 407 #ifdef MROUTING 408 if (so == ip_mrouter) 409 ip_mrouter_done(); 410 #endif 411 in_pcbdetach(inp); 412 break; 413 414 case PRU_BIND: 415 { 416 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 417 418 if (nam->m_len != sizeof(*addr)) { 419 error = EINVAL; 420 break; 421 } 422 if (TAILQ_EMPTY(&ifnet) || addr->sin_family != AF_INET) { 423 error = EADDRNOTAVAIL; 424 break; 425 } 426 if (!((so->so_options & SO_BINDANY) || 427 addr->sin_addr.s_addr == 0 || 428 in_iawithaddr(addr->sin_addr, inp->inp_rtableid) || 429 in_broadcast(addr->sin_addr, NULL, inp->inp_rtableid))) { 430 error = EADDRNOTAVAIL; 431 break; 432 } 433 inp->inp_laddr = addr->sin_addr; 434 break; 435 } 436 case PRU_CONNECT: 437 { 438 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); 439 440 if (nam->m_len != sizeof(*addr)) { 441 error = EINVAL; 442 break; 443 } 444 if (TAILQ_EMPTY(&ifnet)) { 445 error = EADDRNOTAVAIL; 446 break; 447 } 448 if (addr->sin_family != AF_INET) { 449 error = EAFNOSUPPORT; 450 break; 451 } 452 inp->inp_faddr = addr->sin_addr; 453 soisconnected(so); 454 break; 455 } 456 457 case PRU_CONNECT2: 458 error = EOPNOTSUPP; 459 break; 460 461 /* 462 * Mark the connection as being incapable of further input. 463 */ 464 case PRU_SHUTDOWN: 465 socantsendmore(so); 466 break; 467 468 /* 469 * Ship a packet out. The appropriate raw output 470 * routine handles any massaging necessary. 471 */ 472 case PRU_SEND: 473 { 474 u_int32_t dst; 475 476 if (so->so_state & SS_ISCONNECTED) { 477 if (nam) { 478 error = EISCONN; 479 break; 480 } 481 dst = inp->inp_faddr.s_addr; 482 } else { 483 if (nam == NULL) { 484 error = ENOTCONN; 485 break; 486 } 487 dst = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr; 488 } 489 #ifdef IPSEC 490 /* XXX Find an IPsec TDB */ 491 #endif 492 error = rip_output(m, so, dst); 493 m = NULL; 494 break; 495 } 496 497 case PRU_SENSE: 498 /* 499 * stat: don't bother with a blocksize. 500 */ 501 return (0); 502 503 /* 504 * Not supported. 505 */ 506 case PRU_RCVOOB: 507 case PRU_RCVD: 508 case PRU_LISTEN: 509 case PRU_ACCEPT: 510 case PRU_SENDOOB: 511 error = EOPNOTSUPP; 512 break; 513 514 case PRU_SOCKADDR: 515 in_setsockaddr(inp, nam); 516 break; 517 518 case PRU_PEERADDR: 519 in_setpeeraddr(inp, nam); 520 break; 521 522 default: 523 panic("rip_usrreq"); 524 } 525 release: 526 if (m != NULL) 527 m_freem(m); 528 return (error); 529 } 530