1 /* $NetBSD: ip_encap.c,v 1.6 2001/11/13 00:32:36 lukem Exp $ */ 2 /* $KAME: ip_encap.c,v 1.39 2000/10/01 12:37:18 itojun Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 /* 33 * My grandfather said that there's a devil inside tunnelling technology... 34 * 35 * We have surprisingly many protocols that want packets with IP protocol 36 * #4 or #41. Here's a list of protocols that want protocol #41: 37 * RFC1933 configured tunnel 38 * RFC1933 automatic tunnel 39 * RFC2401 IPsec tunnel 40 * RFC2473 IPv6 generic packet tunnelling 41 * RFC2529 6over4 tunnel 42 * mobile-ip6 (uses RFC2473) 43 * 6to4 tunnel 44 * Here's a list of protocol that want protocol #4: 45 * RFC1853 IPv4-in-IPv4 tunnelling 46 * RFC2003 IPv4 encapsulation within IPv4 47 * RFC2344 reverse tunnelling for mobile-ip4 48 * RFC2401 IPsec tunnel 49 * Well, what can I say. They impose different en/decapsulation mechanism 50 * from each other, so they need separate protocol handler. The only one 51 * we can easily determine by protocol # is IPsec, which always has 52 * AH/ESP/IPComp header right after outer IP header. 53 * 54 * So, clearly good old protosw does not work for protocol #4 and #41. 55 * The code will let you match protocol via src/dst address pair. 56 */ 57 /* XXX is M_NETADDR correct? */ 58 59 #include <sys/cdefs.h> 60 __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.6 2001/11/13 00:32:36 lukem Exp $"); 61 62 #include "opt_mrouting.h" 63 #include "opt_inet.h" 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/socket.h> 68 #include <sys/sockio.h> 69 #include <sys/mbuf.h> 70 #include <sys/errno.h> 71 #include <sys/protosw.h> 72 #include <sys/queue.h> 73 74 #include <net/if.h> 75 #include <net/route.h> 76 77 #include <netinet/in.h> 78 #include <netinet/in_systm.h> 79 #include <netinet/ip.h> 80 #include <netinet/ip_var.h> 81 #include <netinet/ip_encap.h> 82 #ifdef MROUTING 83 #include <netinet/ip_mroute.h> 84 #endif /* MROUTING */ 85 86 #ifdef INET6 87 #include <netinet/ip6.h> 88 #include <netinet6/ip6_var.h> 89 #include <netinet6/ip6protosw.h> 90 #endif 91 92 #include <machine/stdarg.h> 93 94 #include "ipip.h" 95 #if NIPIP > 0 96 # include <netinet/ip_ipip.h> 97 #else 98 # ifdef MROUTING 99 # include <netinet/ip_mroute.h> 100 # endif 101 #endif 102 103 #include <net/net_osdep.h> 104 105 static void encap_add __P((struct encaptab *)); 106 static int mask_match __P((const struct encaptab *, const struct sockaddr *, 107 const struct sockaddr *)); 108 static void encap_fillarg __P((struct mbuf *, const struct encaptab *)); 109 110 /* rely upon BSS initialization */ 111 LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab); 112 113 void 114 encap_init() 115 { 116 #if 0 117 /* 118 * we cannot use LIST_INIT() here, since drivers may want to call 119 * encap_attach(), on driver attach. encap_init() will be called 120 * on AF_INET{,6} initialization, which happens after driver 121 * initialization - using LIST_INIT() here can nuke encap_attach() 122 * from drivers. 123 */ 124 LIST_INIT(&encaptab); 125 #endif 126 } 127 128 #ifdef INET 129 void 130 #if __STDC__ 131 encap4_input(struct mbuf *m, ...) 132 #else 133 encap4_input(m, va_alist) 134 struct mbuf *m; 135 va_dcl 136 #endif 137 { 138 int off, proto; 139 struct ip *ip; 140 struct sockaddr_in s, d; 141 const struct protosw *psw; 142 struct encaptab *ep, *match; 143 va_list ap; 144 int prio, matchprio; 145 146 va_start(ap, m); 147 off = va_arg(ap, int); 148 proto = va_arg(ap, int); 149 va_end(ap); 150 151 ip = mtod(m, struct ip *); 152 153 bzero(&s, sizeof(s)); 154 s.sin_family = AF_INET; 155 s.sin_len = sizeof(struct sockaddr_in); 156 s.sin_addr = ip->ip_src; 157 bzero(&d, sizeof(d)); 158 d.sin_family = AF_INET; 159 d.sin_len = sizeof(struct sockaddr_in); 160 d.sin_addr = ip->ip_dst; 161 162 match = NULL; 163 matchprio = 0; 164 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 165 if (ep->af != AF_INET) 166 continue; 167 if (ep->proto >= 0 && ep->proto != proto) 168 continue; 169 if (ep->func) 170 prio = (*ep->func)(m, off, proto, ep->arg); 171 else { 172 /* 173 * it's inbound traffic, we need to match in reverse 174 * order 175 */ 176 prio = mask_match(ep, (struct sockaddr *)&d, 177 (struct sockaddr *)&s); 178 } 179 180 /* 181 * We prioritize the matches by using bit length of the 182 * matches. mask_match() and user-supplied matching function 183 * should return the bit length of the matches (for example, 184 * if both src/dst are matched for IPv4, 64 should be returned). 185 * 0 or negative return value means "it did not match". 186 * 187 * The question is, since we have two "mask" portion, we 188 * cannot really define total order between entries. 189 * For example, which of these should be preferred? 190 * mask_match() returns 48 (32 + 16) for both of them. 191 * src=3ffe::/16, dst=3ffe:501::/32 192 * src=3ffe:501::/32, dst=3ffe::/16 193 * 194 * We need to loop through all the possible candidates 195 * to get the best match - the search takes O(n) for 196 * n attachments (i.e. interfaces). 197 */ 198 if (prio <= 0) 199 continue; 200 if (prio > matchprio) { 201 matchprio = prio; 202 match = ep; 203 } 204 } 205 206 if (match) { 207 /* found a match, "match" has the best one */ 208 psw = match->psw; 209 if (psw && psw->pr_input) { 210 encap_fillarg(m, match); 211 (*psw->pr_input)(m, off, proto); 212 } else 213 m_freem(m); 214 return; 215 } 216 217 /* last resort: inject to raw socket */ 218 rip_input(m, off, proto); 219 } 220 #endif 221 222 #ifdef INET6 223 int 224 encap6_input(mp, offp, proto) 225 struct mbuf **mp; 226 int *offp; 227 int proto; 228 { 229 struct mbuf *m = *mp; 230 struct ip6_hdr *ip6; 231 struct sockaddr_in6 s, d; 232 const struct ip6protosw *psw; 233 struct encaptab *ep, *match; 234 int prio, matchprio; 235 236 ip6 = mtod(m, struct ip6_hdr *); 237 238 bzero(&s, sizeof(s)); 239 s.sin6_family = AF_INET6; 240 s.sin6_len = sizeof(struct sockaddr_in6); 241 s.sin6_addr = ip6->ip6_src; 242 bzero(&d, sizeof(d)); 243 d.sin6_family = AF_INET6; 244 d.sin6_len = sizeof(struct sockaddr_in6); 245 d.sin6_addr = ip6->ip6_dst; 246 247 match = NULL; 248 matchprio = 0; 249 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 250 if (ep->af != AF_INET6) 251 continue; 252 if (ep->proto >= 0 && ep->proto != proto) 253 continue; 254 if (ep->func) 255 prio = (*ep->func)(m, *offp, proto, ep->arg); 256 else { 257 /* 258 * it's inbound traffic, we need to match in reverse 259 * order 260 */ 261 prio = mask_match(ep, (struct sockaddr *)&d, 262 (struct sockaddr *)&s); 263 } 264 265 /* see encap4_input() for issues here */ 266 if (prio <= 0) 267 continue; 268 if (prio > matchprio) { 269 matchprio = prio; 270 match = ep; 271 } 272 } 273 274 if (match) { 275 /* found a match */ 276 psw = (const struct ip6protosw *)match->psw; 277 if (psw && psw->pr_input) { 278 encap_fillarg(m, match); 279 return (*psw->pr_input)(mp, offp, proto); 280 } else { 281 m_freem(m); 282 return IPPROTO_DONE; 283 } 284 } 285 286 /* last resort: inject to raw socket */ 287 return rip6_input(mp, offp, proto); 288 } 289 #endif 290 291 static void 292 encap_add(ep) 293 struct encaptab *ep; 294 { 295 296 LIST_INSERT_HEAD(&encaptab, ep, chain); 297 } 298 299 /* 300 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side. 301 * length of mask (sm and dm) is assumed to be same as sp/dp. 302 * Return value will be necessary as input (cookie) for encap_detach(). 303 */ 304 const struct encaptab * 305 encap_attach(af, proto, sp, sm, dp, dm, psw, arg) 306 int af; 307 int proto; 308 const struct sockaddr *sp, *sm; 309 const struct sockaddr *dp, *dm; 310 const struct protosw *psw; 311 void *arg; 312 { 313 struct encaptab *ep; 314 int error; 315 int s; 316 317 s = splsoftnet(); 318 /* sanity check on args */ 319 if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst)) { 320 error = EINVAL; 321 goto fail; 322 } 323 if (sp->sa_len != dp->sa_len) { 324 error = EINVAL; 325 goto fail; 326 } 327 if (af != sp->sa_family || af != dp->sa_family) { 328 error = EINVAL; 329 goto fail; 330 } 331 332 /* check if anyone have already attached with exactly same config */ 333 for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { 334 if (ep->af != af) 335 continue; 336 if (ep->proto != proto) 337 continue; 338 if (ep->src.ss_len != sp->sa_len || 339 bcmp(&ep->src, sp, sp->sa_len) != 0 || 340 bcmp(&ep->srcmask, sm, sp->sa_len) != 0) 341 continue; 342 if (ep->dst.ss_len != dp->sa_len || 343 bcmp(&ep->dst, dp, dp->sa_len) != 0 || 344 bcmp(&ep->dstmask, dm, dp->sa_len) != 0) 345 continue; 346 347 error = EEXIST; 348 goto fail; 349 } 350 351 /* 352 * XXX NEED TO CHECK viftable IN THE ip_mroute CODE!!! 353 * XXX Actually, that code needs to be replaced with 354 * XXX new code that uses `gif' tunnels. 355 */ 356 357 ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ 358 if (ep == NULL) { 359 error = ENOBUFS; 360 goto fail; 361 } 362 bzero(ep, sizeof(*ep)); 363 364 ep->af = af; 365 ep->proto = proto; 366 bcopy(sp, &ep->src, sp->sa_len); 367 bcopy(sm, &ep->srcmask, sp->sa_len); 368 bcopy(dp, &ep->dst, dp->sa_len); 369 bcopy(dm, &ep->dstmask, dp->sa_len); 370 ep->psw = psw; 371 ep->arg = arg; 372 373 encap_add(ep); 374 375 error = 0; 376 splx(s); 377 return ep; 378 379 fail: 380 splx(s); 381 return NULL; 382 } 383 384 const struct encaptab * 385 encap_attach_func(af, proto, func, psw, arg) 386 int af; 387 int proto; 388 int (*func) __P((const struct mbuf *, int, int, void *)); 389 const struct protosw *psw; 390 void *arg; 391 { 392 struct encaptab *ep; 393 int error; 394 int s; 395 396 s = splsoftnet(); 397 /* sanity check on args */ 398 if (!func) { 399 error = EINVAL; 400 goto fail; 401 } 402 403 ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ 404 if (ep == NULL) { 405 error = ENOBUFS; 406 goto fail; 407 } 408 bzero(ep, sizeof(*ep)); 409 410 ep->af = af; 411 ep->proto = proto; 412 ep->func = func; 413 ep->psw = psw; 414 ep->arg = arg; 415 416 encap_add(ep); 417 418 error = 0; 419 splx(s); 420 return ep; 421 422 fail: 423 splx(s); 424 return NULL; 425 } 426 427 int 428 encap_detach(cookie) 429 const struct encaptab *cookie; 430 { 431 const struct encaptab *ep = cookie; 432 struct encaptab *p; 433 434 for (p = LIST_FIRST(&encaptab); p; p = LIST_NEXT(p, chain)) { 435 if (p == ep) { 436 LIST_REMOVE(p, chain); 437 free(p, M_NETADDR); /*XXX*/ 438 return 0; 439 } 440 } 441 442 return EINVAL; 443 } 444 445 static int 446 mask_match(ep, sp, dp) 447 const struct encaptab *ep; 448 const struct sockaddr *sp; 449 const struct sockaddr *dp; 450 { 451 struct sockaddr_storage s; 452 struct sockaddr_storage d; 453 int i; 454 const u_int8_t *p, *q; 455 u_int8_t *r; 456 int matchlen; 457 458 if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d)) 459 return 0; 460 if (sp->sa_family != ep->af || dp->sa_family != ep->af) 461 return 0; 462 if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) 463 return 0; 464 465 matchlen = 0; 466 467 p = (const u_int8_t *)sp; 468 q = (const u_int8_t *)&ep->srcmask; 469 r = (u_int8_t *)&s; 470 for (i = 0 ; i < sp->sa_len; i++) { 471 r[i] = p[i] & q[i]; 472 /* XXX estimate */ 473 matchlen += (q[i] ? 8 : 0); 474 } 475 476 p = (const u_int8_t *)dp; 477 q = (const u_int8_t *)&ep->dstmask; 478 r = (u_int8_t *)&d; 479 for (i = 0 ; i < dp->sa_len; i++) { 480 r[i] = p[i] & q[i]; 481 /* XXX rough estimate */ 482 matchlen += (q[i] ? 8 : 0); 483 } 484 485 /* need to overwrite len/family portion as we don't compare them */ 486 s.ss_len = sp->sa_len; 487 s.ss_family = sp->sa_family; 488 d.ss_len = dp->sa_len; 489 d.ss_family = dp->sa_family; 490 491 if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 && 492 bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) { 493 return matchlen; 494 } else 495 return 0; 496 } 497 498 static void 499 encap_fillarg(m, ep) 500 struct mbuf *m; 501 const struct encaptab *ep; 502 { 503 #if 0 504 m->m_pkthdr.aux = ep->arg; 505 #else 506 struct mbuf *n; 507 508 n = m_aux_add(m, AF_INET, IPPROTO_IPV4); 509 if (n) { 510 *mtod(n, void **) = ep->arg; 511 n->m_len = sizeof(void *); 512 } 513 #endif 514 } 515 516 void * 517 encap_getarg(m) 518 struct mbuf *m; 519 { 520 void *p; 521 #if 0 522 p = m->m_pkthdr.aux; 523 m->m_pkthdr.aux = NULL; 524 return p; 525 #else 526 struct mbuf *n; 527 528 p = NULL; 529 n = m_aux_find(m, AF_INET, IPPROTO_IPV4); 530 if (n) { 531 if (n->m_len == sizeof(void *)) 532 p = *mtod(n, void **); 533 m_aux_delete(m, n); 534 } 535 return p; 536 #endif 537 } 538