1 /*
2 * Copyright (C) 1993-2001, 2003 by Darren Reed.
3 *
4 * See the IPFILTER.LICENCE file for details on licencing.
5 *
6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
7 */
8
9 #if !defined(lint)
10 static const char sccsid[] = "@(#)ip_fil_solaris.c 1.7 07/22/06 (C) 1993-2000 Darren Reed";
11 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
12 #endif
13
14 #include <sys/types.h>
15 #include <sys/errno.h>
16 #include <sys/param.h>
17 #include <sys/cpuvar.h>
18 #include <sys/open.h>
19 #include <sys/ioctl.h>
20 #include <sys/filio.h>
21 #include <sys/systm.h>
22 #include <sys/strsubr.h>
23 #include <sys/cred.h>
24 #include <sys/ddi.h>
25 #include <sys/sunddi.h>
26 #include <sys/ksynch.h>
27 #include <sys/kmem.h>
28 #include <sys/mkdev.h>
29 #include <sys/protosw.h>
30 #include <sys/socket.h>
31 #include <sys/dditypes.h>
32 #include <sys/cmn_err.h>
33 #include <sys/zone.h>
34 #include <net/if.h>
35 #include <net/af.h>
36 #include <net/route.h>
37 #include <netinet/in.h>
38 #include <netinet/in_systm.h>
39 #include <netinet/ip.h>
40 #include <netinet/ip_var.h>
41 #include <netinet/tcp.h>
42 #include <netinet/udp.h>
43 #include <netinet/tcpip.h>
44 #include <netinet/ip_icmp.h>
45 #include "netinet/ip_compat.h"
46 #ifdef USE_INET6
47 # include <netinet/icmp6.h>
48 #endif
49 #include "netinet/ip_fil.h"
50 #include "netinet/ip_nat.h"
51 #include "netinet/ip_frag.h"
52 #include "netinet/ip_state.h"
53 #include "netinet/ip_auth.h"
54 #include "netinet/ip_proxy.h"
55 #include "netinet/ipf_stack.h"
56 #ifdef IPFILTER_LOOKUP
57 # include "netinet/ip_lookup.h"
58 #endif
59 #include <inet/ip_ire.h>
60
61 #include <sys/md5.h>
62 #include <sys/neti.h>
63
64 static int frzerostats __P((caddr_t, ipf_stack_t *));
65 static int fr_setipfloopback __P((int, ipf_stack_t *));
66 static int fr_enableipf __P((ipf_stack_t *, int));
67 static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
68 static int ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
69 static int ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
70 static int ipf_hook __P((hook_data_t, int, int, void *));
71 static int ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
72 static int ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
73 static int ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
74 void *));
75 static int ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
76 static int ipf_hook4 __P((hook_data_t, int, int, void *));
77 static int ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
78 static int ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
79 static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
80 void *));
81 static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
82 void *));
83 static int ipf_hook6 __P((hook_data_t, int, int, void *));
84 extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
85 extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
86
87 #if SOLARIS2 < 10
88 #if SOLARIS2 >= 7
89 u_int *ip_ttl_ptr = NULL;
90 u_int *ip_mtudisc = NULL;
91 # if SOLARIS2 >= 8
92 int *ip_forwarding = NULL;
93 u_int *ip6_forwarding = NULL;
94 # else
95 u_int *ip_forwarding = NULL;
96 # endif
97 #else
98 u_long *ip_ttl_ptr = NULL;
99 u_long *ip_mtudisc = NULL;
100 u_long *ip_forwarding = NULL;
101 #endif
102 #endif
103
104
105 /* ------------------------------------------------------------------------ */
106 /* Function: ipldetach */
107 /* Returns: int - 0 == success, else error. */
108 /* Parameters: Nil */
109 /* */
110 /* This function is responsible for undoing anything that might have been */
111 /* done in a call to iplattach(). It must be able to clean up from a call */
112 /* to iplattach() that did not succeed. Why might that happen? Someone */
113 /* configures a table to be so large that we cannot allocate enough memory */
114 /* for it. */
115 /* ------------------------------------------------------------------------ */
ipldetach(ifs)116 int ipldetach(ifs)
117 ipf_stack_t *ifs;
118 {
119
120 ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
121
122 #if SOLARIS2 < 10
123
124 if (ifs->ifs_fr_control_forwarding & 2) {
125 if (ip_forwarding != NULL)
126 *ip_forwarding = 0;
127 #if SOLARIS2 >= 8
128 if (ip6_forwarding != NULL)
129 *ip6_forwarding = 0;
130 #endif
131 }
132 #endif
133
134 /*
135 * This lock needs to be dropped around the net_hook_unregister calls
136 * because we can deadlock here with:
137 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
138 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
139 */
140 RWLOCK_EXIT(&ifs->ifs_ipf_global);
141
142 #define UNDO_HOOK(_f, _b, _e, _h) \
143 do { \
144 if (ifs->_f != NULL) { \
145 if (ifs->_b) { \
146 ifs->_b = (net_hook_unregister(ifs->_f, \
147 _e, ifs->_h) != 0); \
148 if (!ifs->_b) { \
149 hook_free(ifs->_h); \
150 ifs->_h = NULL; \
151 } \
152 } else if (ifs->_h != NULL) { \
153 hook_free(ifs->_h); \
154 ifs->_h = NULL; \
155 } \
156 } \
157 _NOTE(CONSTCOND) \
158 } while (0)
159
160 /*
161 * Remove IPv6 Hooks
162 */
163 if (ifs->ifs_ipf_ipv6 != NULL) {
164 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
165 NH_PHYSICAL_IN, ifs_ipfhook6_in);
166 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
167 NH_PHYSICAL_OUT, ifs_ipfhook6_out);
168 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
169 NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
170 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
171 NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
172 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
173 NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
174
175 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
176 goto detach_failed;
177 ifs->ifs_ipf_ipv6 = NULL;
178 }
179
180 /*
181 * Remove IPv4 Hooks
182 */
183 if (ifs->ifs_ipf_ipv4 != NULL) {
184 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
185 NH_PHYSICAL_IN, ifs_ipfhook4_in);
186 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
187 NH_PHYSICAL_OUT, ifs_ipfhook4_out);
188 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
189 NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
190 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
191 NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
192 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
193 NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
194
195 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
196 goto detach_failed;
197 ifs->ifs_ipf_ipv4 = NULL;
198 }
199
200 #undef UNDO_HOOK
201
202 #ifdef IPFDEBUG
203 cmn_err(CE_CONT, "ipldetach()\n");
204 #endif
205
206 WRITE_ENTER(&ifs->ifs_ipf_global);
207 fr_deinitialise(ifs);
208
209 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
210 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
211
212 if (ifs->ifs_ipf_locks_done == 1) {
213 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
214 MUTEX_DESTROY(&ifs->ifs_ipf_rw);
215 RW_DESTROY(&ifs->ifs_ipf_tokens);
216 RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
217 ifs->ifs_ipf_locks_done = 0;
218 }
219
220 if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
221 ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
222 ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
223 ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
224 ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
225 return -1;
226
227 return 0;
228
229 detach_failed:
230 WRITE_ENTER(&ifs->ifs_ipf_global);
231 return -1;
232 }
233
iplattach(ifs)234 int iplattach(ifs)
235 ipf_stack_t *ifs;
236 {
237 #if SOLARIS2 < 10
238 int i;
239 #endif
240 netid_t id = ifs->ifs_netid;
241
242 #ifdef IPFDEBUG
243 cmn_err(CE_CONT, "iplattach()\n");
244 #endif
245
246 ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
247 ifs->ifs_fr_flags = IPF_LOGGING;
248 #ifdef _KERNEL
249 ifs->ifs_fr_update_ipid = 0;
250 #else
251 ifs->ifs_fr_update_ipid = 1;
252 #endif
253 ifs->ifs_fr_minttl = 4;
254 ifs->ifs_fr_icmpminfragmtu = 68;
255 #if defined(IPFILTER_DEFAULT_BLOCK)
256 ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
257 #else
258 ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
259 #endif
260
261 bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
262 MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
263 MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
264 RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
265 RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
266 ifs->ifs_ipf_locks_done = 1;
267
268 if (fr_initialise(ifs) < 0)
269 return -1;
270
271 HOOK_INIT(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
272 "ipfilter_hook4_nicevents", ifs);
273 HOOK_INIT(ifs->ifs_ipfhook4_in, ipf_hook4_in,
274 "ipfilter_hook4_in", ifs);
275 HOOK_INIT(ifs->ifs_ipfhook4_out, ipf_hook4_out,
276 "ipfilter_hook4_out", ifs);
277 HOOK_INIT(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
278 "ipfilter_hook4_loop_in", ifs);
279 HOOK_INIT(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
280 "ipfilter_hook4_loop_out", ifs);
281
282 /*
283 * If we hold this lock over all of the net_hook_register calls, we
284 * can cause a deadlock to occur with the following lock ordering:
285 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
286 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
287 */
288 RWLOCK_EXIT(&ifs->ifs_ipf_global);
289
290 /*
291 * Add IPv4 hooks
292 */
293 ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
294 if (ifs->ifs_ipf_ipv4 == NULL)
295 goto hookup_failed;
296
297 ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
298 NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
299 if (!ifs->ifs_hook4_nic_events)
300 goto hookup_failed;
301
302 ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
303 NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
304 if (!ifs->ifs_hook4_physical_in)
305 goto hookup_failed;
306
307 ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
308 NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
309 if (!ifs->ifs_hook4_physical_out)
310 goto hookup_failed;
311
312 if (ifs->ifs_ipf_loopback) {
313 ifs->ifs_hook4_loopback_in = (net_hook_register(
314 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
315 ifs->ifs_ipfhook4_loop_in) == 0);
316 if (!ifs->ifs_hook4_loopback_in)
317 goto hookup_failed;
318
319 ifs->ifs_hook4_loopback_out = (net_hook_register(
320 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
321 ifs->ifs_ipfhook4_loop_out) == 0);
322 if (!ifs->ifs_hook4_loopback_out)
323 goto hookup_failed;
324 }
325 /*
326 * Add IPv6 hooks
327 */
328 ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
329 if (ifs->ifs_ipf_ipv6 == NULL)
330 goto hookup_failed;
331
332 HOOK_INIT(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
333 "ipfilter_hook6_nicevents", ifs);
334 HOOK_INIT(ifs->ifs_ipfhook6_in, ipf_hook6_in,
335 "ipfilter_hook6_in", ifs);
336 HOOK_INIT(ifs->ifs_ipfhook6_out, ipf_hook6_out,
337 "ipfilter_hook6_out", ifs);
338 HOOK_INIT(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
339 "ipfilter_hook6_loop_in", ifs);
340 HOOK_INIT(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
341 "ipfilter_hook6_loop_out", ifs);
342
343 ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
344 NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
345 if (!ifs->ifs_hook6_nic_events)
346 goto hookup_failed;
347
348 ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
349 NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
350 if (!ifs->ifs_hook6_physical_in)
351 goto hookup_failed;
352
353 ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
354 NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
355 if (!ifs->ifs_hook6_physical_out)
356 goto hookup_failed;
357
358 if (ifs->ifs_ipf_loopback) {
359 ifs->ifs_hook6_loopback_in = (net_hook_register(
360 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
361 ifs->ifs_ipfhook6_loop_in) == 0);
362 if (!ifs->ifs_hook6_loopback_in)
363 goto hookup_failed;
364
365 ifs->ifs_hook6_loopback_out = (net_hook_register(
366 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
367 ifs->ifs_ipfhook6_loop_out) == 0);
368 if (!ifs->ifs_hook6_loopback_out)
369 goto hookup_failed;
370 }
371
372 /*
373 * Reacquire ipf_global, now it is safe.
374 */
375 WRITE_ENTER(&ifs->ifs_ipf_global);
376
377 /* Do not use private interface ip_params_arr[] in Solaris 10 */
378 #if SOLARIS2 < 10
379
380 #if SOLARIS2 >= 8
381 ip_forwarding = &ip_g_forward;
382 #endif
383 /*
384 * XXX - There is no terminator for this array, so it is not possible
385 * to tell if what we are looking for is missing and go off the end
386 * of the array.
387 */
388
389 #if SOLARIS2 <= 8
390 for (i = 0; ; i++) {
391 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
392 ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
393 } else if (!strcmp(ip_param_arr[i].ip_param_name,
394 "ip_path_mtu_discovery")) {
395 ip_mtudisc = &ip_param_arr[i].ip_param_value;
396 }
397 #if SOLARIS2 < 8
398 else if (!strcmp(ip_param_arr[i].ip_param_name,
399 "ip_forwarding")) {
400 ip_forwarding = &ip_param_arr[i].ip_param_value;
401 }
402 #else
403 else if (!strcmp(ip_param_arr[i].ip_param_name,
404 "ip6_forwarding")) {
405 ip6_forwarding = &ip_param_arr[i].ip_param_value;
406 }
407 #endif
408
409 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
410 #if SOLARIS2 >= 8
411 ip6_forwarding != NULL &&
412 #endif
413 ip_forwarding != NULL)
414 break;
415 }
416 #endif
417
418 if (ifs->ifs_fr_control_forwarding & 1) {
419 if (ip_forwarding != NULL)
420 *ip_forwarding = 1;
421 #if SOLARIS2 >= 8
422 if (ip6_forwarding != NULL)
423 *ip6_forwarding = 1;
424 #endif
425 }
426
427 #endif
428
429 return 0;
430 hookup_failed:
431 WRITE_ENTER(&ifs->ifs_ipf_global);
432 return -1;
433 }
434
fr_setipfloopback(set,ifs)435 static int fr_setipfloopback(set, ifs)
436 int set;
437 ipf_stack_t *ifs;
438 {
439 if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
440 return EFAULT;
441
442 if (set && !ifs->ifs_ipf_loopback) {
443 ifs->ifs_ipf_loopback = 1;
444
445 ifs->ifs_hook4_loopback_in = (net_hook_register(
446 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
447 ifs->ifs_ipfhook4_loop_in) == 0);
448 if (!ifs->ifs_hook4_loopback_in)
449 return EINVAL;
450
451 ifs->ifs_hook4_loopback_out = (net_hook_register(
452 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
453 ifs->ifs_ipfhook4_loop_out) == 0);
454 if (!ifs->ifs_hook4_loopback_out)
455 return EINVAL;
456
457 ifs->ifs_hook6_loopback_in = (net_hook_register(
458 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
459 ifs->ifs_ipfhook6_loop_in) == 0);
460 if (!ifs->ifs_hook6_loopback_in)
461 return EINVAL;
462
463 ifs->ifs_hook6_loopback_out = (net_hook_register(
464 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
465 ifs->ifs_ipfhook6_loop_out) == 0);
466 if (!ifs->ifs_hook6_loopback_out)
467 return EINVAL;
468
469 } else if (!set && ifs->ifs_ipf_loopback) {
470 ifs->ifs_ipf_loopback = 0;
471
472 ifs->ifs_hook4_loopback_in =
473 (net_hook_unregister(ifs->ifs_ipf_ipv4,
474 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
475 if (ifs->ifs_hook4_loopback_in)
476 return EBUSY;
477
478 ifs->ifs_hook4_loopback_out =
479 (net_hook_unregister(ifs->ifs_ipf_ipv4,
480 NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
481 if (ifs->ifs_hook4_loopback_out)
482 return EBUSY;
483
484 ifs->ifs_hook6_loopback_in =
485 (net_hook_unregister(ifs->ifs_ipf_ipv6,
486 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
487 if (ifs->ifs_hook6_loopback_in)
488 return EBUSY;
489
490 ifs->ifs_hook6_loopback_out =
491 (net_hook_unregister(ifs->ifs_ipf_ipv6,
492 NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
493 if (ifs->ifs_hook6_loopback_out)
494 return EBUSY;
495 }
496 return 0;
497 }
498
499
500 /*
501 * Filter ioctl interface.
502 */
503 /*ARGSUSED*/
iplioctl(dev,cmd,data,mode,cp,rp)504 int iplioctl(dev, cmd, data, mode, cp, rp)
505 dev_t dev;
506 int cmd;
507 #if SOLARIS2 >= 7
508 intptr_t data;
509 #else
510 int *data;
511 #endif
512 int mode;
513 cred_t *cp;
514 int *rp;
515 {
516 int error = 0, tmp;
517 friostat_t fio;
518 minor_t unit;
519 u_int enable;
520 ipf_stack_t *ifs;
521
522 #ifdef IPFDEBUG
523 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
524 dev, cmd, data, mode, cp, rp);
525 #endif
526 unit = getminor(dev);
527 if (IPL_LOGMAX < unit)
528 return ENXIO;
529
530 /*
531 * As we're calling ipf_find_stack in user space, from a given zone
532 * to find the stack pointer for this zone, there is no need to have
533 * a hold/refence count here.
534 */
535 ifs = ipf_find_stack(crgetzoneid(cp));
536 ASSERT(ifs != NULL);
537
538 if (ifs->ifs_fr_running <= 0) {
539 if (unit != IPL_LOGIPF) {
540 return EIO;
541 }
542 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
543 cmd != SIOCIPFSET && cmd != SIOCFRENB &&
544 cmd != SIOCGETFS && cmd != SIOCGETFF) {
545 return EIO;
546 }
547 }
548
549 READ_ENTER(&ifs->ifs_ipf_global);
550 if (ifs->ifs_fr_enable_active != 0) {
551 RWLOCK_EXIT(&ifs->ifs_ipf_global);
552 return EBUSY;
553 }
554
555 error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
556 curproc, ifs);
557 if (error != -1) {
558 RWLOCK_EXIT(&ifs->ifs_ipf_global);
559 return error;
560 }
561 error = 0;
562
563 switch (cmd)
564 {
565 case SIOCFRENB :
566 if (!(mode & FWRITE))
567 error = EPERM;
568 else {
569 error = COPYIN((caddr_t)data, (caddr_t)&enable,
570 sizeof(enable));
571 if (error != 0) {
572 error = EFAULT;
573 break;
574 }
575
576 RWLOCK_EXIT(&ifs->ifs_ipf_global);
577 WRITE_ENTER(&ifs->ifs_ipf_global);
578
579 /*
580 * We must recheck fr_enable_active here, since we've
581 * dropped ifs_ipf_global from R in order to get it
582 * exclusively.
583 */
584 if (ifs->ifs_fr_enable_active == 0) {
585 ifs->ifs_fr_enable_active = 1;
586 error = fr_enableipf(ifs, enable);
587 ifs->ifs_fr_enable_active = 0;
588 }
589 }
590 break;
591 case SIOCIPFSET :
592 if (!(mode & FWRITE)) {
593 error = EPERM;
594 break;
595 }
596 /* FALLTHRU */
597 case SIOCIPFGETNEXT :
598 case SIOCIPFGET :
599 error = fr_ipftune(cmd, (void *)data, ifs);
600 break;
601 case SIOCSETFF :
602 if (!(mode & FWRITE))
603 error = EPERM;
604 else {
605 error = COPYIN((caddr_t)data,
606 (caddr_t)&ifs->ifs_fr_flags,
607 sizeof(ifs->ifs_fr_flags));
608 if (error != 0)
609 error = EFAULT;
610 }
611 break;
612 case SIOCIPFLP :
613 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
614 sizeof(tmp));
615 if (error != 0)
616 error = EFAULT;
617 else
618 error = fr_setipfloopback(tmp, ifs);
619 break;
620 case SIOCGETFF :
621 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
622 sizeof(ifs->ifs_fr_flags));
623 if (error != 0)
624 error = EFAULT;
625 break;
626 case SIOCFUNCL :
627 error = fr_resolvefunc((void *)data);
628 break;
629 case SIOCINAFR :
630 case SIOCRMAFR :
631 case SIOCADAFR :
632 case SIOCZRLST :
633 if (!(mode & FWRITE))
634 error = EPERM;
635 else
636 error = frrequest(unit, cmd, (caddr_t)data,
637 ifs->ifs_fr_active, 1, ifs);
638 break;
639 case SIOCINIFR :
640 case SIOCRMIFR :
641 case SIOCADIFR :
642 if (!(mode & FWRITE))
643 error = EPERM;
644 else
645 error = frrequest(unit, cmd, (caddr_t)data,
646 1 - ifs->ifs_fr_active, 1, ifs);
647 break;
648 case SIOCSWAPA :
649 if (!(mode & FWRITE))
650 error = EPERM;
651 else {
652 WRITE_ENTER(&ifs->ifs_ipf_mutex);
653 bzero((char *)ifs->ifs_frcache,
654 sizeof (ifs->ifs_frcache));
655 error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
656 (caddr_t)data,
657 sizeof(ifs->ifs_fr_active));
658 if (error != 0)
659 error = EFAULT;
660 else
661 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
662 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
663 }
664 break;
665 case SIOCGETFS :
666 fr_getstat(&fio, ifs);
667 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
668 break;
669 case SIOCFRZST :
670 if (!(mode & FWRITE))
671 error = EPERM;
672 else
673 error = fr_zerostats((caddr_t)data, ifs);
674 break;
675 case SIOCIPFFL :
676 if (!(mode & FWRITE))
677 error = EPERM;
678 else {
679 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
680 sizeof(tmp));
681 if (!error) {
682 tmp = frflush(unit, 4, tmp, ifs);
683 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
684 sizeof(tmp));
685 if (error != 0)
686 error = EFAULT;
687 } else
688 error = EFAULT;
689 }
690 break;
691 #ifdef USE_INET6
692 case SIOCIPFL6 :
693 if (!(mode & FWRITE))
694 error = EPERM;
695 else {
696 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
697 sizeof(tmp));
698 if (!error) {
699 tmp = frflush(unit, 6, tmp, ifs);
700 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
701 sizeof(tmp));
702 if (error != 0)
703 error = EFAULT;
704 } else
705 error = EFAULT;
706 }
707 break;
708 #endif
709 case SIOCSTLCK :
710 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
711 if (error == 0) {
712 ifs->ifs_fr_state_lock = tmp;
713 ifs->ifs_fr_nat_lock = tmp;
714 ifs->ifs_fr_frag_lock = tmp;
715 ifs->ifs_fr_auth_lock = tmp;
716 } else
717 error = EFAULT;
718 break;
719 #ifdef IPFILTER_LOG
720 case SIOCIPFFB :
721 if (!(mode & FWRITE))
722 error = EPERM;
723 else {
724 tmp = ipflog_clear(unit, ifs);
725 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
726 sizeof(tmp));
727 if (error)
728 error = EFAULT;
729 }
730 break;
731 #endif /* IPFILTER_LOG */
732 case SIOCFRSYN :
733 if (!(mode & FWRITE))
734 error = EPERM;
735 else {
736 RWLOCK_EXIT(&ifs->ifs_ipf_global);
737 WRITE_ENTER(&ifs->ifs_ipf_global);
738
739 frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
740 fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
741 fr_nataddrsync(0, NULL, NULL, ifs);
742 fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
743 error = 0;
744 }
745 break;
746 case SIOCGFRST :
747 error = fr_outobj((void *)data, fr_fragstats(ifs),
748 IPFOBJ_FRAGSTAT);
749 break;
750 case FIONREAD :
751 #ifdef IPFILTER_LOG
752 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
753
754 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
755 if (error != 0)
756 error = EFAULT;
757 #endif
758 break;
759 case SIOCIPFITER :
760 error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
761 curproc, ifs);
762 break;
763
764 case SIOCGENITER :
765 error = ipf_genericiter((caddr_t)data, crgetuid(cp),
766 curproc, ifs);
767 break;
768
769 case SIOCIPFDELTOK :
770 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
771 if (error != 0) {
772 error = EFAULT;
773 } else {
774 error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
775 }
776 break;
777
778 default :
779 #ifdef IPFDEBUG
780 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
781 cmd, (void *)data);
782 #endif
783 error = EINVAL;
784 break;
785 }
786 RWLOCK_EXIT(&ifs->ifs_ipf_global);
787 return error;
788 }
789
790
fr_enableipf(ifs,enable)791 static int fr_enableipf(ifs, enable)
792 ipf_stack_t *ifs;
793 int enable;
794 {
795 int error;
796
797 if (!enable) {
798 error = ipldetach(ifs);
799 if (error == 0)
800 ifs->ifs_fr_running = -1;
801 return error;
802 }
803
804 if (ifs->ifs_fr_running > 0)
805 return 0;
806
807 error = iplattach(ifs);
808 if (error == 0) {
809 if (ifs->ifs_fr_timer_id == NULL) {
810 int hz = drv_usectohz(500000);
811
812 ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
813 (void *)ifs,
814 hz);
815 }
816 ifs->ifs_fr_running = 1;
817 } else {
818 (void) ipldetach(ifs);
819 }
820 return error;
821 }
822
823
get_unit(name,v,ifs)824 phy_if_t get_unit(name, v, ifs)
825 char *name;
826 int v;
827 ipf_stack_t *ifs;
828 {
829 net_handle_t nif;
830
831 if (v == 4)
832 nif = ifs->ifs_ipf_ipv4;
833 else if (v == 6)
834 nif = ifs->ifs_ipf_ipv6;
835 else
836 return 0;
837
838 return (net_phylookup(nif, name));
839 }
840
841 /*
842 * routines below for saving IP headers to buffer
843 */
844 /*ARGSUSED*/
iplopen(devp,flags,otype,cred)845 int iplopen(devp, flags, otype, cred)
846 dev_t *devp;
847 int flags, otype;
848 cred_t *cred;
849 {
850 minor_t min = getminor(*devp);
851
852 #ifdef IPFDEBUG
853 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
854 #endif
855 if (!(otype & OTYP_CHR))
856 return ENXIO;
857
858 min = (IPL_LOGMAX < min) ? ENXIO : 0;
859 return min;
860 }
861
862
863 /*ARGSUSED*/
iplclose(dev,flags,otype,cred)864 int iplclose(dev, flags, otype, cred)
865 dev_t dev;
866 int flags, otype;
867 cred_t *cred;
868 {
869 minor_t min = getminor(dev);
870
871 #ifdef IPFDEBUG
872 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
873 #endif
874
875 min = (IPL_LOGMAX < min) ? ENXIO : 0;
876 return min;
877 }
878
879 #ifdef IPFILTER_LOG
880 /*
881 * iplread/ipllog
882 * both of these must operate with at least splnet() lest they be
883 * called during packet processing and cause an inconsistancy to appear in
884 * the filter lists.
885 */
886 /*ARGSUSED*/
iplread(dev,uio,cp)887 int iplread(dev, uio, cp)
888 dev_t dev;
889 register struct uio *uio;
890 cred_t *cp;
891 {
892 ipf_stack_t *ifs;
893 int ret;
894
895 /*
896 * As we're calling ipf_find_stack in user space, from a given zone
897 * to find the stack pointer for this zone, there is no need to have
898 * a hold/refence count here.
899 */
900 ifs = ipf_find_stack(crgetzoneid(cp));
901 ASSERT(ifs != NULL);
902
903 # ifdef IPFDEBUG
904 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
905 # endif
906
907 if (ifs->ifs_fr_running < 1) {
908 return EIO;
909 }
910
911 # ifdef IPFILTER_SYNC
912 if (getminor(dev) == IPL_LOGSYNC) {
913 return ipfsync_read(uio);
914 }
915 # endif
916
917 ret = ipflog_read(getminor(dev), uio, ifs);
918 return ret;
919 }
920 #endif /* IPFILTER_LOG */
921
922
923 /*
924 * iplread/ipllog
925 * both of these must operate with at least splnet() lest they be
926 * called during packet processing and cause an inconsistancy to appear in
927 * the filter lists.
928 */
iplwrite(dev,uio,cp)929 int iplwrite(dev, uio, cp)
930 dev_t dev;
931 register struct uio *uio;
932 cred_t *cp;
933 {
934 ipf_stack_t *ifs;
935
936 /*
937 * As we're calling ipf_find_stack in user space, from a given zone
938 * to find the stack pointer for this zone, there is no need to have
939 * a hold/refence count here.
940 */
941 ifs = ipf_find_stack(crgetzoneid(cp));
942 ASSERT(ifs != NULL);
943
944 #ifdef IPFDEBUG
945 cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
946 #endif
947
948 if (ifs->ifs_fr_running < 1) {
949 return EIO;
950 }
951
952 #ifdef IPFILTER_SYNC
953 if (getminor(dev) == IPL_LOGSYNC)
954 return ipfsync_write(uio);
955 #endif /* IPFILTER_SYNC */
956 dev = dev; /* LINT */
957 uio = uio; /* LINT */
958 cp = cp; /* LINT */
959 return ENXIO;
960 }
961
962
963 /*
964 * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
965 * requires a large amount of setting up and isn't any more efficient.
966 */
fr_send_reset(fin)967 int fr_send_reset(fin)
968 fr_info_t *fin;
969 {
970 tcphdr_t *tcp, *tcp2;
971 int tlen, hlen;
972 mblk_t *m;
973 #ifdef USE_INET6
974 ip6_t *ip6;
975 #endif
976 ip_t *ip;
977
978 tcp = fin->fin_dp;
979 if (tcp->th_flags & TH_RST)
980 return -1;
981
982 #ifndef IPFILTER_CKSUM
983 if (fr_checkl4sum(fin) == -1)
984 return -1;
985 #endif
986
987 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
988 #ifdef USE_INET6
989 if (fin->fin_v == 6)
990 hlen = sizeof(ip6_t);
991 else
992 #endif
993 hlen = sizeof(ip_t);
994 hlen += sizeof(*tcp2);
995 if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
996 return -1;
997
998 m->b_rptr += 64;
999 MTYPE(m) = M_DATA;
1000 m->b_wptr = m->b_rptr + hlen;
1001 ip = (ip_t *)m->b_rptr;
1002 bzero((char *)ip, hlen);
1003 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1004 tcp2->th_dport = tcp->th_sport;
1005 tcp2->th_sport = tcp->th_dport;
1006 if (tcp->th_flags & TH_ACK) {
1007 tcp2->th_seq = tcp->th_ack;
1008 tcp2->th_flags = TH_RST;
1009 } else {
1010 tcp2->th_ack = ntohl(tcp->th_seq);
1011 tcp2->th_ack += tlen;
1012 tcp2->th_ack = htonl(tcp2->th_ack);
1013 tcp2->th_flags = TH_RST|TH_ACK;
1014 }
1015 tcp2->th_off = sizeof(struct tcphdr) >> 2;
1016
1017 ip->ip_v = fin->fin_v;
1018 #ifdef USE_INET6
1019 if (fin->fin_v == 6) {
1020 ip6 = (ip6_t *)m->b_rptr;
1021 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1022 ip6->ip6_src = fin->fin_dst6.in6;
1023 ip6->ip6_dst = fin->fin_src6.in6;
1024 ip6->ip6_plen = htons(sizeof(*tcp));
1025 ip6->ip6_nxt = IPPROTO_TCP;
1026 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1027 } else
1028 #endif
1029 {
1030 ip->ip_src.s_addr = fin->fin_daddr;
1031 ip->ip_dst.s_addr = fin->fin_saddr;
1032 ip->ip_id = fr_nextipid(fin);
1033 ip->ip_hl = sizeof(*ip) >> 2;
1034 ip->ip_p = IPPROTO_TCP;
1035 ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1036 ip->ip_tos = fin->fin_ip->ip_tos;
1037 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1038 }
1039 return fr_send_ip(fin, m, &m);
1040 }
1041
1042 /*
1043 * Function: fr_send_ip
1044 * Returns: 0: success
1045 * -1: failed
1046 * Parameters:
1047 * fin: packet information
1048 * m: the message block where ip head starts
1049 *
1050 * Send a new packet through the IP stack.
1051 *
1052 * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1053 * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1054 * function).
1055 *
1056 * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1057 * in by this function.
1058 *
1059 * All other portions of the packet must be in on-the-wire format.
1060 */
1061 /*ARGSUSED*/
fr_send_ip(fin,m,mpp)1062 static int fr_send_ip(fin, m, mpp)
1063 fr_info_t *fin;
1064 mblk_t *m, **mpp;
1065 {
1066 qpktinfo_t qpi, *qpip;
1067 fr_info_t fnew;
1068 ip_t *ip;
1069 int i, hlen;
1070 ipf_stack_t *ifs = fin->fin_ifs;
1071
1072 ip = (ip_t *)m->b_rptr;
1073 bzero((char *)&fnew, sizeof(fnew));
1074
1075 #ifdef USE_INET6
1076 if (fin->fin_v == 6) {
1077 ip6_t *ip6;
1078
1079 ip6 = (ip6_t *)ip;
1080 ip6->ip6_vfc = 0x60;
1081 ip6->ip6_hlim = 127;
1082 fnew.fin_v = 6;
1083 hlen = sizeof(*ip6);
1084 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1085 } else
1086 #endif
1087 {
1088 fnew.fin_v = 4;
1089 #if SOLARIS2 >= 10
1090 ip->ip_ttl = 255;
1091 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1092 ip->ip_off = htons(IP_DF);
1093 #else
1094 if (ip_ttl_ptr != NULL)
1095 ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1096 else
1097 ip->ip_ttl = 63;
1098 if (ip_mtudisc != NULL)
1099 ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1100 else
1101 ip->ip_off = htons(IP_DF);
1102 #endif
1103 /*
1104 * The dance with byte order and ip_len/ip_off is because in
1105 * fr_fastroute, it expects them to be in host byte order but
1106 * ipf_cksum expects them to be in network byte order.
1107 */
1108 ip->ip_len = htons(ip->ip_len);
1109 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1110 ip->ip_len = ntohs(ip->ip_len);
1111 ip->ip_off = ntohs(ip->ip_off);
1112 hlen = sizeof(*ip);
1113 fnew.fin_plen = ip->ip_len;
1114 }
1115
1116 qpip = fin->fin_qpi;
1117 qpi.qpi_off = 0;
1118 qpi.qpi_ill = qpip->qpi_ill;
1119 qpi.qpi_m = m;
1120 qpi.qpi_data = ip;
1121 fnew.fin_qpi = &qpi;
1122 fnew.fin_ifp = fin->fin_ifp;
1123 fnew.fin_flx = FI_NOCKSUM;
1124 fnew.fin_m = m;
1125 fnew.fin_qfm = m;
1126 fnew.fin_ip = ip;
1127 fnew.fin_mp = mpp;
1128 fnew.fin_hlen = hlen;
1129 fnew.fin_dp = (char *)ip + hlen;
1130 fnew.fin_ifs = fin->fin_ifs;
1131 (void) fr_makefrip(hlen, ip, &fnew);
1132
1133 i = fr_fastroute(m, mpp, &fnew, NULL);
1134 return i;
1135 }
1136
1137
fr_send_icmp_err(type,fin,dst)1138 int fr_send_icmp_err(type, fin, dst)
1139 int type;
1140 fr_info_t *fin;
1141 int dst;
1142 {
1143 struct in_addr dst4;
1144 struct icmp *icmp;
1145 qpktinfo_t *qpi;
1146 int hlen, code;
1147 phy_if_t phy;
1148 u_short sz;
1149 #ifdef USE_INET6
1150 mblk_t *mb;
1151 #endif
1152 mblk_t *m;
1153 #ifdef USE_INET6
1154 ip6_t *ip6;
1155 #endif
1156 ip_t *ip;
1157 ipf_stack_t *ifs = fin->fin_ifs;
1158
1159 if ((type < 0) || (type > ICMP_MAXTYPE))
1160 return -1;
1161
1162 code = fin->fin_icode;
1163 #ifdef USE_INET6
1164 if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1165 return -1;
1166 #endif
1167
1168 #ifndef IPFILTER_CKSUM
1169 if (fr_checkl4sum(fin) == -1)
1170 return -1;
1171 #endif
1172
1173 qpi = fin->fin_qpi;
1174
1175 #ifdef USE_INET6
1176 mb = fin->fin_qfm;
1177
1178 if (fin->fin_v == 6) {
1179 sz = sizeof(ip6_t);
1180 sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1181 hlen = sizeof(ip6_t);
1182 type = icmptoicmp6types[type];
1183 if (type == ICMP6_DST_UNREACH)
1184 code = icmptoicmp6unreach[code];
1185 } else
1186 #endif
1187 {
1188 if ((fin->fin_p == IPPROTO_ICMP) &&
1189 !(fin->fin_flx & FI_SHORT))
1190 switch (ntohs(fin->fin_data[0]) >> 8)
1191 {
1192 case ICMP_ECHO :
1193 case ICMP_TSTAMP :
1194 case ICMP_IREQ :
1195 case ICMP_MASKREQ :
1196 break;
1197 default :
1198 return 0;
1199 }
1200
1201 sz = sizeof(ip_t) * 2;
1202 sz += 8; /* 64 bits of data */
1203 hlen = sizeof(ip_t);
1204 }
1205
1206 sz += offsetof(struct icmp, icmp_ip);
1207 if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1208 return -1;
1209 MTYPE(m) = M_DATA;
1210 m->b_rptr += 64;
1211 m->b_wptr = m->b_rptr + sz;
1212 bzero((char *)m->b_rptr, (size_t)sz);
1213 ip = (ip_t *)m->b_rptr;
1214 ip->ip_v = fin->fin_v;
1215 icmp = (struct icmp *)(m->b_rptr + hlen);
1216 icmp->icmp_type = type & 0xff;
1217 icmp->icmp_code = code & 0xff;
1218 phy = (phy_if_t)qpi->qpi_ill;
1219 if (type == ICMP_UNREACH && (phy != 0) &&
1220 fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1221 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1222
1223 #ifdef USE_INET6
1224 if (fin->fin_v == 6) {
1225 struct in6_addr dst6;
1226 int csz;
1227
1228 if (dst == 0) {
1229 ipf_stack_t *ifs = fin->fin_ifs;
1230
1231 if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1232 (void *)&dst6, NULL, ifs) == -1) {
1233 FREE_MB_T(m);
1234 return -1;
1235 }
1236 } else
1237 dst6 = fin->fin_dst6.in6;
1238
1239 csz = sz;
1240 sz -= sizeof(ip6_t);
1241 ip6 = (ip6_t *)m->b_rptr;
1242 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1243 ip6->ip6_plen = htons((u_short)sz);
1244 ip6->ip6_nxt = IPPROTO_ICMPV6;
1245 ip6->ip6_src = dst6;
1246 ip6->ip6_dst = fin->fin_src6.in6;
1247 sz -= offsetof(struct icmp, icmp_ip);
1248 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1249 icmp->icmp_cksum = csz - sizeof(ip6_t);
1250 } else
1251 #endif
1252 {
1253 ip->ip_hl = sizeof(*ip) >> 2;
1254 ip->ip_p = IPPROTO_ICMP;
1255 ip->ip_id = fin->fin_ip->ip_id;
1256 ip->ip_tos = fin->fin_ip->ip_tos;
1257 ip->ip_len = (u_short)sz;
1258 if (dst == 0) {
1259 ipf_stack_t *ifs = fin->fin_ifs;
1260
1261 if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1262 (void *)&dst4, NULL, ifs) == -1) {
1263 FREE_MB_T(m);
1264 return -1;
1265 }
1266 } else {
1267 dst4 = fin->fin_dst;
1268 }
1269 ip->ip_src = dst4;
1270 ip->ip_dst = fin->fin_src;
1271 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1272 sizeof(*fin->fin_ip));
1273 bcopy((char *)fin->fin_ip + fin->fin_hlen,
1274 (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1275 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1276 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1277 icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1278 sz - sizeof(ip_t));
1279 }
1280
1281 /*
1282 * Need to exit out of these so we don't recursively call rw_enter
1283 * from fr_qout.
1284 */
1285 return fr_send_ip(fin, m, &m);
1286 }
1287
1288 #include <sys/time.h>
1289 #include <sys/varargs.h>
1290
1291 #ifndef _KERNEL
1292 #include <stdio.h>
1293 #endif
1294
1295 #define NULLADDR_RATE_LIMIT 10 /* 10 seconds */
1296
1297
1298 /*
1299 * Print out warning message at rate-limited speed.
1300 */
rate_limit_message(ipf_stack_t * ifs,int rate,const char * message,...)1301 static void rate_limit_message(ipf_stack_t *ifs,
1302 int rate, const char *message, ...)
1303 {
1304 static time_t last_time = 0;
1305 time_t now;
1306 va_list args;
1307 char msg_buf[256];
1308 int need_printed = 0;
1309
1310 now = ddi_get_time();
1311
1312 /* make sure, no multiple entries */
1313 ASSERT(MUTEX_NOT_HELD(&(ifs->ifs_ipf_rw.ipf_lk)));
1314 MUTEX_ENTER(&ifs->ifs_ipf_rw);
1315 if (now - last_time >= rate) {
1316 need_printed = 1;
1317 last_time = now;
1318 }
1319 MUTEX_EXIT(&ifs->ifs_ipf_rw);
1320
1321 if (need_printed) {
1322 va_start(args, message);
1323 (void)vsnprintf(msg_buf, 255, message, args);
1324 va_end(args);
1325 #ifdef _KERNEL
1326 cmn_err(CE_WARN, msg_buf);
1327 #else
1328 fprintf(std_err, msg_buf);
1329 #endif
1330 }
1331 }
1332
1333 /*
1334 * Return the first IP Address associated with an interface
1335 * For IPv6, we walk through the list of logical interfaces and return
1336 * the address of the first one that isn't a link-local interface.
1337 * We can't assume that it is :1 because another link-local address
1338 * may have been assigned there.
1339 */
1340 /*ARGSUSED*/
fr_ifpaddr(v,atype,ifptr,inp,inpmask,ifs)1341 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1342 int v, atype;
1343 void *ifptr;
1344 struct in_addr *inp, *inpmask;
1345 ipf_stack_t *ifs;
1346 {
1347 struct sockaddr_in6 v6addr[2];
1348 struct sockaddr_in v4addr[2];
1349 net_ifaddr_t type[2];
1350 net_handle_t net_data;
1351 phy_if_t phyif;
1352 void *array;
1353
1354 switch (v)
1355 {
1356 case 4:
1357 net_data = ifs->ifs_ipf_ipv4;
1358 array = v4addr;
1359 break;
1360 case 6:
1361 net_data = ifs->ifs_ipf_ipv6;
1362 array = v6addr;
1363 break;
1364 default:
1365 net_data = NULL;
1366 break;
1367 }
1368
1369 if (net_data == NULL)
1370 return -1;
1371
1372 phyif = (phy_if_t)ifptr;
1373
1374 switch (atype)
1375 {
1376 case FRI_PEERADDR :
1377 type[0] = NA_PEER;
1378 break;
1379
1380 case FRI_BROADCAST :
1381 type[0] = NA_BROADCAST;
1382 break;
1383
1384 default :
1385 type[0] = NA_ADDRESS;
1386 break;
1387 }
1388
1389 type[1] = NA_NETMASK;
1390
1391 if (v == 6) {
1392 lif_if_t idx = 0;
1393
1394 do {
1395 idx = net_lifgetnext(net_data, phyif, idx);
1396 if (net_getlifaddr(net_data, phyif, idx, 2, type,
1397 array) < 0)
1398 return -1;
1399 if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1400 !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1401 break;
1402 } while (idx != 0);
1403
1404 if (idx == 0)
1405 return -1;
1406
1407 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1408 inp, inpmask);
1409 }
1410
1411 if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1412 return -1;
1413
1414 return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1415 }
1416
1417
fr_newisn(fin)1418 u_32_t fr_newisn(fin)
1419 fr_info_t *fin;
1420 {
1421 static int iss_seq_off = 0;
1422 u_char hash[16];
1423 u_32_t newiss;
1424 MD5_CTX ctx;
1425 ipf_stack_t *ifs = fin->fin_ifs;
1426
1427 /*
1428 * Compute the base value of the ISS. It is a hash
1429 * of (saddr, sport, daddr, dport, secret).
1430 */
1431 MD5Init(&ctx);
1432
1433 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1434 sizeof(fin->fin_fi.fi_src));
1435 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1436 sizeof(fin->fin_fi.fi_dst));
1437 MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1438
1439 MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1440
1441 MD5Final(hash, &ctx);
1442
1443 bcopy(hash, &newiss, sizeof(newiss));
1444
1445 /*
1446 * Now increment our "timer", and add it in to
1447 * the computed value.
1448 *
1449 * XXX Use `addin'?
1450 * XXX TCP_ISSINCR too large to use?
1451 */
1452 iss_seq_off += 0x00010000;
1453 newiss += iss_seq_off;
1454 return newiss;
1455 }
1456
1457
1458 /* ------------------------------------------------------------------------ */
1459 /* Function: fr_nextipid */
1460 /* Returns: int - 0 == success, -1 == error (packet should be droppped) */
1461 /* Parameters: fin(I) - pointer to packet information */
1462 /* */
1463 /* Returns the next IPv4 ID to use for this packet. */
1464 /* ------------------------------------------------------------------------ */
fr_nextipid(fin)1465 u_short fr_nextipid(fin)
1466 fr_info_t *fin;
1467 {
1468 static u_short ipid = 0;
1469 u_short id;
1470 ipf_stack_t *ifs = fin->fin_ifs;
1471
1472 MUTEX_ENTER(&ifs->ifs_ipf_rw);
1473 if (fin->fin_pktnum != 0) {
1474 id = fin->fin_pktnum & 0xffff;
1475 } else {
1476 id = ipid++;
1477 }
1478 MUTEX_EXIT(&ifs->ifs_ipf_rw);
1479
1480 return id;
1481 }
1482
1483
1484 #ifndef IPFILTER_CKSUM
1485 /* ARGSUSED */
1486 #endif
fr_checkv4sum(fin)1487 INLINE void fr_checkv4sum(fin)
1488 fr_info_t *fin;
1489 {
1490 #ifdef IPFILTER_CKSUM
1491 if (fr_checkl4sum(fin) == -1)
1492 fin->fin_flx |= FI_BAD;
1493 #endif
1494 }
1495
1496
1497 #ifdef USE_INET6
1498 # ifndef IPFILTER_CKSUM
1499 /* ARGSUSED */
1500 # endif
fr_checkv6sum(fin)1501 INLINE void fr_checkv6sum(fin)
1502 fr_info_t *fin;
1503 {
1504 # ifdef IPFILTER_CKSUM
1505 if (fr_checkl4sum(fin) == -1)
1506 fin->fin_flx |= FI_BAD;
1507 # endif
1508 }
1509 #endif /* USE_INET6 */
1510
1511
1512 #if (SOLARIS2 < 7)
fr_slowtimer()1513 void fr_slowtimer()
1514 #else
1515 /*ARGSUSED*/
1516 void fr_slowtimer __P((void *arg))
1517 #endif
1518 {
1519 ipf_stack_t *ifs = arg;
1520
1521 READ_ENTER(&ifs->ifs_ipf_global);
1522 if (ifs->ifs_fr_running != 1) {
1523 ifs->ifs_fr_timer_id = NULL;
1524 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1525 return;
1526 }
1527 ipf_expiretokens(ifs);
1528 fr_fragexpire(ifs);
1529 fr_timeoutstate(ifs);
1530 fr_natexpire(ifs);
1531 fr_authexpire(ifs);
1532 ifs->ifs_fr_ticks++;
1533 if (ifs->ifs_fr_running == 1)
1534 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1535 drv_usectohz(500000));
1536 else
1537 ifs->ifs_fr_timer_id = NULL;
1538 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1539 }
1540
1541
1542 /* ------------------------------------------------------------------------ */
1543 /* Function: fr_pullup */
1544 /* Returns: NULL == pullup failed, else pointer to protocol header */
1545 /* Parameters: m(I) - pointer to buffer where data packet starts */
1546 /* fin(I) - pointer to packet information */
1547 /* len(I) - number of bytes to pullup */
1548 /* */
1549 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1550 /* single buffer for ease of access. Operating system native functions are */
1551 /* used to manage buffers - if necessary. If the entire packet ends up in */
1552 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */
1553 /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */
1554 /* and ONLY if the pullup succeeds. */
1555 /* */
1556 /* We assume that 'min' is a pointer to a buffer that is part of the chain */
1557 /* of buffers that starts at *fin->fin_mp. */
1558 /* ------------------------------------------------------------------------ */
fr_pullup(min,fin,len)1559 void *fr_pullup(min, fin, len)
1560 mb_t *min;
1561 fr_info_t *fin;
1562 int len;
1563 {
1564 qpktinfo_t *qpi = fin->fin_qpi;
1565 int out = fin->fin_out, dpoff, ipoff;
1566 mb_t *m = min, *m1, *m2;
1567 char *ip;
1568 uint32_t start, stuff, end, value, flags;
1569 ipf_stack_t *ifs = fin->fin_ifs;
1570
1571 if (m == NULL)
1572 return NULL;
1573
1574 ip = (char *)fin->fin_ip;
1575 if ((fin->fin_flx & FI_COALESCE) != 0)
1576 return ip;
1577
1578 ipoff = fin->fin_ipoff;
1579 if (fin->fin_dp != NULL)
1580 dpoff = (char *)fin->fin_dp - (char *)ip;
1581 else
1582 dpoff = 0;
1583
1584 if (M_LEN(m) < len + ipoff) {
1585
1586 /*
1587 * pfil_precheck ensures the IP header is on a 32bit
1588 * aligned address so simply fail if that isn't currently
1589 * the case (should never happen).
1590 */
1591 int inc = 0;
1592
1593 if (ipoff > 0) {
1594 if ((ipoff & 3) != 0) {
1595 inc = 4 - (ipoff & 3);
1596 if (m->b_rptr - inc >= m->b_datap->db_base)
1597 m->b_rptr -= inc;
1598 else
1599 inc = 0;
1600 }
1601 }
1602
1603 /*
1604 * XXX This is here as a work around for a bug with DEBUG
1605 * XXX Solaris kernels. The problem is b_prev is used by IP
1606 * XXX code as a way to stash the phyint_index for a packet,
1607 * XXX this doesn't get reset by IP but freeb does an ASSERT()
1608 * XXX for both of these to be NULL. See 6442390.
1609 */
1610 m1 = m;
1611 m2 = m->b_prev;
1612
1613 do {
1614 m1->b_next = NULL;
1615 m1->b_prev = NULL;
1616 m1 = m1->b_cont;
1617 } while (m1);
1618
1619 /*
1620 * Need to preserve checksum information by copying them
1621 * to newmp which heads the pulluped message.
1622 */
1623 hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end,
1624 &value, &flags);
1625
1626 if (pullupmsg(m, len + ipoff + inc) == 0) {
1627 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
1628 FREE_MB_T(*fin->fin_mp);
1629 *fin->fin_mp = NULL;
1630 fin->fin_m = NULL;
1631 fin->fin_ip = NULL;
1632 fin->fin_dp = NULL;
1633 qpi->qpi_data = NULL;
1634 return NULL;
1635 }
1636
1637 (void) hcksum_assoc(m, NULL, NULL, start, stuff, end,
1638 value, flags, 0);
1639
1640 m->b_prev = m2;
1641 m->b_rptr += inc;
1642 fin->fin_m = m;
1643 ip = MTOD(m, char *) + ipoff;
1644 qpi->qpi_data = ip;
1645 }
1646
1647 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
1648 fin->fin_ip = (ip_t *)ip;
1649 if (fin->fin_dp != NULL)
1650 fin->fin_dp = (char *)fin->fin_ip + dpoff;
1651
1652 if (len == fin->fin_plen)
1653 fin->fin_flx |= FI_COALESCE;
1654 return ip;
1655 }
1656
1657
1658 /*
1659 * Function: fr_verifysrc
1660 * Returns: int (really boolean)
1661 * Parameters: fin - packet information
1662 *
1663 * Check whether the packet has a valid source address for the interface on
1664 * which the packet arrived, implementing the "fr_chksrc" feature.
1665 * Returns true iff the packet's source address is valid.
1666 */
fr_verifysrc(fin)1667 int fr_verifysrc(fin)
1668 fr_info_t *fin;
1669 {
1670 net_handle_t net_data_p;
1671 phy_if_t phy_ifdata_routeto;
1672 struct sockaddr sin;
1673 ipf_stack_t *ifs = fin->fin_ifs;
1674
1675 if (fin->fin_v == 4) {
1676 net_data_p = ifs->ifs_ipf_ipv4;
1677 } else if (fin->fin_v == 6) {
1678 net_data_p = ifs->ifs_ipf_ipv6;
1679 } else {
1680 return (0);
1681 }
1682
1683 /* Get the index corresponding to the if name */
1684 sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1685 bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
1686 phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
1687
1688 return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
1689 }
1690
1691
1692 /*
1693 * Function: fr_fastroute
1694 * Returns: 0: success;
1695 * -1: failed
1696 * Parameters:
1697 * mb: the message block where ip head starts
1698 * mpp: the pointer to the pointer of the orignal
1699 * packet message
1700 * fin: packet information
1701 * fdp: destination interface information
1702 * if it is NULL, no interface information provided.
1703 *
1704 * This function is for fastroute/to/dup-to rules. It calls
1705 * pfil_make_lay2_packet to search route, make lay-2 header
1706 * ,and identify output queue for the IP packet.
1707 * The destination address depends on the following conditions:
1708 * 1: for fastroute rule, fdp is passed in as NULL, so the
1709 * destination address is the IP Packet's destination address
1710 * 2: for to/dup-to rule, if an ip address is specified after
1711 * the interface name, this address is the as destination
1712 * address. Otherwise IP Packet's destination address is used
1713 */
fr_fastroute(mb,mpp,fin,fdp)1714 int fr_fastroute(mb, mpp, fin, fdp)
1715 mblk_t *mb, **mpp;
1716 fr_info_t *fin;
1717 frdest_t *fdp;
1718 {
1719 net_handle_t net_data_p;
1720 net_inject_t *inj;
1721 mblk_t *mp = NULL;
1722 frentry_t *fr = fin->fin_fr;
1723 qpktinfo_t *qpi;
1724 ip_t *ip;
1725
1726 struct sockaddr_in *sin;
1727 struct sockaddr_in6 *sin6;
1728 struct sockaddr *sinp;
1729 ipf_stack_t *ifs = fin->fin_ifs;
1730 #ifndef sparc
1731 u_short __iplen, __ipoff;
1732 #endif
1733
1734 if (fin->fin_v == 4) {
1735 net_data_p = ifs->ifs_ipf_ipv4;
1736 } else if (fin->fin_v == 6) {
1737 net_data_p = ifs->ifs_ipf_ipv6;
1738 } else {
1739 return (-1);
1740 }
1741
1742 inj = net_inject_alloc(NETINFO_VERSION);
1743 if (inj == NULL)
1744 return -1;
1745
1746 ip = fin->fin_ip;
1747 qpi = fin->fin_qpi;
1748
1749 /*
1750 * If this is a duplicate mblk then we want ip to point at that
1751 * data, not the original, if and only if it is already pointing at
1752 * the current mblk data.
1753 *
1754 * Otherwise, if it's not a duplicate, and we're not already pointing
1755 * at the current mblk data, then we want to ensure that the data
1756 * points at ip.
1757 */
1758
1759 if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
1760 ip = (ip_t *)mb->b_rptr;
1761 } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
1762 qpi->qpi_m->b_rptr = (uchar_t *)ip;
1763 qpi->qpi_off = 0;
1764 }
1765
1766 /*
1767 * If there is another M_PROTO, we don't want it
1768 */
1769 if (*mpp != mb) {
1770 mp = unlinkb(*mpp);
1771 freeb(*mpp);
1772 *mpp = mp;
1773 }
1774
1775 sinp = (struct sockaddr *)&inj->ni_addr;
1776 sin = (struct sockaddr_in *)sinp;
1777 sin6 = (struct sockaddr_in6 *)sinp;
1778 bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
1779 inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1780 inj->ni_packet = mb;
1781
1782 /*
1783 * In case we're here due to "to <if>" being used with
1784 * "keep state", check that we're going in the correct
1785 * direction.
1786 */
1787 if (fdp != NULL) {
1788 if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
1789 (fin->fin_rev != 0) && (fdp == &fr->fr_tif))
1790 goto bad_fastroute;
1791 inj->ni_physical = (phy_if_t)fdp->fd_ifp;
1792 if (fin->fin_v == 4) {
1793 sin->sin_addr = fdp->fd_ip;
1794 } else {
1795 sin6->sin6_addr = fdp->fd_ip6.in6;
1796 }
1797 } else {
1798 if (fin->fin_v == 4) {
1799 sin->sin_addr = ip->ip_dst;
1800 } else {
1801 sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
1802 }
1803 inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
1804 }
1805
1806 /*
1807 * Clear the hardware checksum flags from packets that we are doing
1808 * input processing on as leaving them set will cause the outgoing
1809 * NIC (if it supports hardware checksum) to calculate them anew,
1810 * using the old (correct) checksums as the pseudo value to start
1811 * from.
1812 */
1813 if (fin->fin_out == 0) {
1814 DB_CKSUMFLAGS(mb) = 0;
1815 }
1816
1817 *mpp = mb;
1818
1819 if (fin->fin_out == 0) {
1820 void *saveifp;
1821 u_32_t pass;
1822
1823 saveifp = fin->fin_ifp;
1824 fin->fin_ifp = (void *)inj->ni_physical;
1825 fin->fin_flx &= ~FI_STATE;
1826 fin->fin_out = 1;
1827 (void) fr_acctpkt(fin, &pass);
1828 fin->fin_fr = NULL;
1829 if (!fr || !(fr->fr_flags & FR_RETMASK))
1830 (void) fr_checkstate(fin, &pass);
1831 if (fr_checknatout(fin, NULL) == -1)
1832 goto bad_fastroute;
1833 fin->fin_out = 0;
1834 fin->fin_ifp = saveifp;
1835 }
1836 #ifndef sparc
1837 if (fin->fin_v == 4) {
1838 __iplen = (u_short)ip->ip_len,
1839 __ipoff = (u_short)ip->ip_off;
1840
1841 ip->ip_len = htons(__iplen);
1842 ip->ip_off = htons(__ipoff);
1843 }
1844 #endif
1845
1846 if (net_data_p) {
1847 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
1848 net_inject_free(inj);
1849 return (-1);
1850 }
1851 }
1852
1853 ifs->ifs_fr_frouteok[0]++;
1854 net_inject_free(inj);
1855 return 0;
1856 bad_fastroute:
1857 net_inject_free(inj);
1858 freemsg(mb);
1859 ifs->ifs_fr_frouteok[1]++;
1860 return -1;
1861 }
1862
1863
1864 /* ------------------------------------------------------------------------ */
1865 /* Function: ipf_hook4_out */
1866 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
1867 /* Parameters: event(I) - pointer to event */
1868 /* info(I) - pointer to hook information for firewalling */
1869 /* */
1870 /* Calling ipf_hook. */
1871 /* ------------------------------------------------------------------------ */
1872 /*ARGSUSED*/
ipf_hook4_out(hook_event_token_t token,hook_data_t info,void * arg)1873 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
1874 {
1875 return ipf_hook(info, 1, 0, arg);
1876 }
1877 /*ARGSUSED*/
ipf_hook6_out(hook_event_token_t token,hook_data_t info,void * arg)1878 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
1879 {
1880 return ipf_hook6(info, 1, 0, arg);
1881 }
1882
1883 /* ------------------------------------------------------------------------ */
1884 /* Function: ipf_hook4_in */
1885 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
1886 /* Parameters: event(I) - pointer to event */
1887 /* info(I) - pointer to hook information for firewalling */
1888 /* */
1889 /* Calling ipf_hook. */
1890 /* ------------------------------------------------------------------------ */
1891 /*ARGSUSED*/
ipf_hook4_in(hook_event_token_t token,hook_data_t info,void * arg)1892 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
1893 {
1894 return ipf_hook(info, 0, 0, arg);
1895 }
1896 /*ARGSUSED*/
ipf_hook6_in(hook_event_token_t token,hook_data_t info,void * arg)1897 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
1898 {
1899 return ipf_hook6(info, 0, 0, arg);
1900 }
1901
1902
1903 /* ------------------------------------------------------------------------ */
1904 /* Function: ipf_hook4_loop_out */
1905 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
1906 /* Parameters: event(I) - pointer to event */
1907 /* info(I) - pointer to hook information for firewalling */
1908 /* */
1909 /* Calling ipf_hook. */
1910 /* ------------------------------------------------------------------------ */
1911 /*ARGSUSED*/
ipf_hook4_loop_out(hook_event_token_t token,hook_data_t info,void * arg)1912 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1913 {
1914 return ipf_hook(info, 1, FI_NOCKSUM, arg);
1915 }
1916 /*ARGSUSED*/
ipf_hook6_loop_out(hook_event_token_t token,hook_data_t info,void * arg)1917 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1918 {
1919 return ipf_hook6(info, 1, FI_NOCKSUM, arg);
1920 }
1921
1922 /* ------------------------------------------------------------------------ */
1923 /* Function: ipf_hook4_loop_in */
1924 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
1925 /* Parameters: event(I) - pointer to event */
1926 /* info(I) - pointer to hook information for firewalling */
1927 /* */
1928 /* Calling ipf_hook. */
1929 /* ------------------------------------------------------------------------ */
1930 /*ARGSUSED*/
ipf_hook4_loop_in(hook_event_token_t token,hook_data_t info,void * arg)1931 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1932 {
1933 return ipf_hook(info, 0, FI_NOCKSUM, arg);
1934 }
1935 /*ARGSUSED*/
ipf_hook6_loop_in(hook_event_token_t token,hook_data_t info,void * arg)1936 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1937 {
1938 return ipf_hook6(info, 0, FI_NOCKSUM, arg);
1939 }
1940
1941 /* ------------------------------------------------------------------------ */
1942 /* Function: ipf_hook */
1943 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
1944 /* Parameters: info(I) - pointer to hook information for firewalling */
1945 /* out(I) - whether packet is going in or out */
1946 /* loopback(I) - whether packet is a loopback packet or not */
1947 /* */
1948 /* Stepping stone function between the IP mainline and IPFilter. Extracts */
1949 /* parameters out of the info structure and forms them up to be useful for */
1950 /* calling ipfilter. */
1951 /* ------------------------------------------------------------------------ */
ipf_hook(hook_data_t info,int out,int loopback,void * arg)1952 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
1953 {
1954 hook_pkt_event_t *fw;
1955 ipf_stack_t *ifs;
1956 qpktinfo_t qpi;
1957 int rval, hlen;
1958 u_short swap;
1959 phy_if_t phy;
1960 ip_t *ip;
1961
1962 ifs = arg;
1963 fw = (hook_pkt_event_t *)info;
1964
1965 ASSERT(fw != NULL);
1966 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
1967
1968 ip = fw->hpe_hdr;
1969 swap = ntohs(ip->ip_len);
1970 ip->ip_len = swap;
1971 swap = ntohs(ip->ip_off);
1972 ip->ip_off = swap;
1973 hlen = IPH_HDR_LENGTH(ip);
1974
1975 qpi.qpi_m = fw->hpe_mb;
1976 qpi.qpi_data = fw->hpe_hdr;
1977 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
1978 qpi.qpi_ill = (void *)phy;
1979 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
1980 if (qpi.qpi_flags)
1981 qpi.qpi_flags |= FI_MBCAST;
1982 qpi.qpi_flags |= loopback;
1983
1984 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
1985 &qpi, fw->hpe_mp, ifs);
1986
1987 /* For fastroute cases, fr_check returns 0 with mp set to NULL */
1988 if (rval == 0 && *(fw->hpe_mp) == NULL)
1989 rval = 1;
1990
1991 /* Notify IP the packet mblk_t and IP header pointers. */
1992 fw->hpe_mb = qpi.qpi_m;
1993 fw->hpe_hdr = qpi.qpi_data;
1994 if (rval == 0) {
1995 ip = qpi.qpi_data;
1996 swap = ntohs(ip->ip_len);
1997 ip->ip_len = swap;
1998 swap = ntohs(ip->ip_off);
1999 ip->ip_off = swap;
2000 }
2001 return rval;
2002
2003 }
ipf_hook6(hook_data_t info,int out,int loopback,void * arg)2004 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
2005 {
2006 hook_pkt_event_t *fw;
2007 int rval, hlen;
2008 qpktinfo_t qpi;
2009 phy_if_t phy;
2010
2011 fw = (hook_pkt_event_t *)info;
2012
2013 ASSERT(fw != NULL);
2014 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2015
2016 hlen = sizeof (ip6_t);
2017
2018 qpi.qpi_m = fw->hpe_mb;
2019 qpi.qpi_data = fw->hpe_hdr;
2020 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2021 qpi.qpi_ill = (void *)phy;
2022 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2023 if (qpi.qpi_flags)
2024 qpi.qpi_flags |= FI_MBCAST;
2025 qpi.qpi_flags |= loopback;
2026
2027 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2028 &qpi, fw->hpe_mp, arg);
2029
2030 /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2031 if (rval == 0 && *(fw->hpe_mp) == NULL)
2032 rval = 1;
2033
2034 /* Notify IP the packet mblk_t and IP header pointers. */
2035 fw->hpe_mb = qpi.qpi_m;
2036 fw->hpe_hdr = qpi.qpi_data;
2037 return rval;
2038
2039 }
2040
2041
2042 /* ------------------------------------------------------------------------ */
2043 /* Function: ipf_nic_event_v4 */
2044 /* Returns: int - 0 == no problems encountered */
2045 /* Parameters: event(I) - pointer to event */
2046 /* info(I) - pointer to information about a NIC event */
2047 /* */
2048 /* Function to receive asynchronous NIC events from IP */
2049 /* ------------------------------------------------------------------------ */
2050 /*ARGSUSED*/
ipf_nic_event_v4(hook_event_token_t event,hook_data_t info,void * arg)2051 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2052 {
2053 struct sockaddr_in *sin;
2054 hook_nic_event_t *hn;
2055 ipf_stack_t *ifs = arg;
2056 void *new_ifp = NULL;
2057
2058 if (ifs->ifs_fr_running <= 0)
2059 return (0);
2060
2061 hn = (hook_nic_event_t *)info;
2062
2063 switch (hn->hne_event)
2064 {
2065 case NE_PLUMB :
2066 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2067 ifs);
2068 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2069 hn->hne_data, ifs);
2070 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2071 hn->hne_data, ifs);
2072 break;
2073
2074 case NE_UNPLUMB :
2075 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2076 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2077 ifs);
2078 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2079 break;
2080
2081 case NE_ADDRESS_CHANGE :
2082 /*
2083 * We only respond to events for logical interface 0 because
2084 * IPFilter only uses the first address given to a network
2085 * interface. We check for hne_lif==1 because the netinfo
2086 * code maps adds 1 to the lif number so that it can return
2087 * 0 to indicate "no more lifs" when walking them.
2088 */
2089 if (hn->hne_lif == 1) {
2090 frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2091 ifs);
2092 sin = hn->hne_data;
2093 fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2094 ifs);
2095 }
2096 break;
2097
2098 #if SOLARIS2 >= 10
2099 case NE_IFINDEX_CHANGE :
2100 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2101
2102 if (hn->hne_data != NULL) {
2103 /*
2104 * The netinfo passes interface index as int (hne_data should be
2105 * handled as a pointer to int), which is always 32bit. We need to
2106 * convert it to void pointer here, since interfaces are
2107 * represented as pointers to void in IPF. The pointers are 64 bits
2108 * long on 64bit platforms. Doing something like
2109 * (void *)((int) x)
2110 * will throw warning:
2111 * "cast to pointer from integer of different size"
2112 * during 64bit compilation.
2113 *
2114 * The line below uses (size_t) to typecast int to
2115 * size_t, which might be 64bit/32bit (depending
2116 * on architecture). Once we have proper 64bit/32bit
2117 * type (size_t), we can safely convert it to void pointer.
2118 */
2119 new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2120 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2121 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2122 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2123 }
2124 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2125 break;
2126 #endif
2127
2128 default :
2129 break;
2130 }
2131
2132 return 0;
2133 }
2134
2135
2136 /* ------------------------------------------------------------------------ */
2137 /* Function: ipf_nic_event_v6 */
2138 /* Returns: int - 0 == no problems encountered */
2139 /* Parameters: event(I) - pointer to event */
2140 /* info(I) - pointer to information about a NIC event */
2141 /* */
2142 /* Function to receive asynchronous NIC events from IP */
2143 /* ------------------------------------------------------------------------ */
2144 /*ARGSUSED*/
ipf_nic_event_v6(hook_event_token_t event,hook_data_t info,void * arg)2145 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2146 {
2147 struct sockaddr_in6 *sin6;
2148 hook_nic_event_t *hn;
2149 ipf_stack_t *ifs = arg;
2150 void *new_ifp = NULL;
2151
2152 if (ifs->ifs_fr_running <= 0)
2153 return (0);
2154
2155 hn = (hook_nic_event_t *)info;
2156
2157 switch (hn->hne_event)
2158 {
2159 case NE_PLUMB :
2160 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2161 hn->hne_data, ifs);
2162 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2163 hn->hne_data, ifs);
2164 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2165 hn->hne_data, ifs);
2166 break;
2167
2168 case NE_UNPLUMB :
2169 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2170 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2171 ifs);
2172 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2173 break;
2174
2175 case NE_ADDRESS_CHANGE :
2176 if (hn->hne_lif == 1) {
2177 sin6 = hn->hne_data;
2178 fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2179 ifs);
2180 }
2181 break;
2182
2183 #if SOLARIS2 >= 10
2184 case NE_IFINDEX_CHANGE :
2185 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2186 if (hn->hne_data != NULL) {
2187 /*
2188 * The netinfo passes interface index as int (hne_data should be
2189 * handled as a pointer to int), which is always 32bit. We need to
2190 * convert it to void pointer here, since interfaces are
2191 * represented as pointers to void in IPF. The pointers are 64 bits
2192 * long on 64bit platforms. Doing something like
2193 * (void *)((int) x)
2194 * will throw warning:
2195 * "cast to pointer from integer of different size"
2196 * during 64bit compilation.
2197 *
2198 * The line below uses (size_t) to typecast int to
2199 * size_t, which might be 64bit/32bit (depending
2200 * on architecture). Once we have proper 64bit/32bit
2201 * type (size_t), we can safely convert it to void pointer.
2202 */
2203 new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2204 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2205 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2206 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2207 }
2208 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2209 break;
2210 #endif
2211
2212 default :
2213 break;
2214 }
2215
2216 return 0;
2217 }
2218
2219 /*
2220 * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2221 * are needed in Solaris kernel only. We don't need them in
2222 * ipftest to pretend the ICMP/RST packet was sent as a response.
2223 */
2224 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2225 /* ------------------------------------------------------------------------ */
2226 /* Function: fr_make_rst */
2227 /* Returns: int - 0 on success, -1 on failure */
2228 /* Parameters: fin(I) - pointer to packet information */
2229 /* */
2230 /* We must alter the original mblks passed to IPF from IP stack via */
2231 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations. */
2232 /* IPF can basicaly do only these things with mblk representing the packet: */
2233 /* leave it as it is (pass the packet) */
2234 /* */
2235 /* discard it (block the packet) */
2236 /* */
2237 /* alter it (i.e. NAT) */
2238 /* */
2239 /* As you can see IPF can not simply discard the mblk and supply a new one */
2240 /* instead to IP stack via FW_HOOKS. */
2241 /* */
2242 /* The return-rst action for packets coming via NIC is handled as follows: */
2243 /* mblk with packet is discarded */
2244 /* */
2245 /* new mblk with RST response is constructed and injected to network */
2246 /* */
2247 /* IPF can't inject packets to loopback interface, this is just another */
2248 /* limitation we have to deal with here. The only option to send RST */
2249 /* response to offending TCP packet coming via loopback is to alter it. */
2250 /* */
2251 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on */
2252 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to */
2253 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers. */
2254 /* ------------------------------------------------------------------------ */
fr_make_rst(fin)2255 int fr_make_rst(fin)
2256 fr_info_t *fin;
2257 {
2258 uint16_t tmp_port;
2259 int rv = -1;
2260 uint32_t old_ack;
2261 tcphdr_t *tcp = NULL;
2262 struct in_addr tmp_src;
2263 #ifdef USE_INET6
2264 struct in6_addr tmp_src6;
2265 #endif
2266
2267 ASSERT(fin->fin_p == IPPROTO_TCP);
2268
2269 /*
2270 * We do not need to adjust chksum, since it is not being checked by
2271 * Solaris IP stack for loopback clients.
2272 */
2273 if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2274 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2275
2276 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2277 /* Swap IPv4 addresses. */
2278 tmp_src = fin->fin_ip->ip_src;
2279 fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2280 fin->fin_ip->ip_dst = tmp_src;
2281
2282 rv = 0;
2283 }
2284 else
2285 tcp = NULL;
2286 }
2287 #ifdef USE_INET6
2288 else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2289 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2290 /*
2291 * We are relying on fact the next header is TCP, which is true
2292 * for regular TCP packets coming in over loopback.
2293 */
2294 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2295 /* Swap IPv6 addresses. */
2296 tmp_src6 = fin->fin_ip6->ip6_src;
2297 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2298 fin->fin_ip6->ip6_dst = tmp_src6;
2299
2300 rv = 0;
2301 }
2302 else
2303 tcp = NULL;
2304 }
2305 #endif
2306
2307 if (tcp != NULL) {
2308 /*
2309 * Adjust TCP header:
2310 * swap ports,
2311 * set flags,
2312 * set correct ACK number
2313 */
2314 tmp_port = tcp->th_sport;
2315 tcp->th_sport = tcp->th_dport;
2316 tcp->th_dport = tmp_port;
2317 old_ack = tcp->th_ack;
2318 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2319 tcp->th_seq = old_ack;
2320 tcp->th_flags = TH_RST | TH_ACK;
2321 }
2322
2323 return (rv);
2324 }
2325
2326 /* ------------------------------------------------------------------------ */
2327 /* Function: fr_make_icmp_v4 */
2328 /* Returns: int - 0 on success, -1 on failure */
2329 /* Parameters: fin(I) - pointer to packet information */
2330 /* */
2331 /* Please read comment at fr_make_icmp() wrapper function to get an idea */
2332 /* what is going to happen here and why. Once you read the comment there, */
2333 /* continue here with next paragraph. */
2334 /* */
2335 /* To turn IPv4 packet into ICMPv4 response packet, these things must */
2336 /* happen here: */
2337 /* (1) Original mblk is copied (duplicated). */
2338 /* */
2339 /* (2) ICMP header is created. */
2340 /* */
2341 /* (3) Link ICMP header with copy of original mblk, we have ICMPv4 */
2342 /* data ready then. */
2343 /* */
2344 /* (4) Swap IP addresses in original mblk and adjust IP header data. */
2345 /* */
2346 /* (5) The mblk containing original packet is trimmed to contain IP */
2347 /* header only and ICMP chksum is computed. */
2348 /* */
2349 /* (6) The ICMP header we have from (3) is linked to original mblk, */
2350 /* which now contains new IP header. If original packet was spread */
2351 /* over several mblks, only the first mblk is kept. */
2352 /* ------------------------------------------------------------------------ */
fr_make_icmp_v4(fin)2353 static int fr_make_icmp_v4(fin)
2354 fr_info_t *fin;
2355 {
2356 struct in_addr tmp_src;
2357 tcphdr_t *tcp;
2358 struct icmp *icmp;
2359 mblk_t *mblk_icmp;
2360 mblk_t *mblk_ip;
2361 size_t icmp_pld_len; /* octets to append to ICMP header */
2362 size_t orig_iphdr_len; /* length of IP header only */
2363 uint32_t sum;
2364 uint16_t *buf;
2365 int len;
2366
2367
2368 if (fin->fin_v != 4)
2369 return (-1);
2370
2371 /*
2372 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2373 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2374 */
2375 tcp = (tcphdr_t *) fin->fin_dp;
2376
2377 if ((fin->fin_p == IPPROTO_TCP) &&
2378 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2379 return (-1);
2380
2381 /*
2382 * Step (1)
2383 *
2384 * Make copy of original mblk.
2385 *
2386 * We want to copy as much data as necessary, not less, not more. The
2387 * ICMPv4 payload length for unreachable messages is:
2388 * original IP header + 8 bytes of L4 (if there are any).
2389 *
2390 * We determine if there are at least 8 bytes of L4 data following IP
2391 * header first.
2392 */
2393 icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2394 ICMPERR_ICMPHLEN : fin->fin_dlen;
2395 /*
2396 * Since we don't want to copy more data than necessary, we must trim
2397 * the original mblk here. The right way (STREAMish) would be to use
2398 * adjmsg() to trim it. However we would have to calculate the length
2399 * argument for adjmsg() from pointers we already have here.
2400 *
2401 * Since we have pointers and offsets, it's faster and easier for
2402 * us to just adjust pointers by hand instead of using adjmsg().
2403 */
2404 fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2405 fin->fin_m->b_wptr += icmp_pld_len;
2406 icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2407
2408 /*
2409 * Also we don't want to copy any L2 stuff, which might precede IP
2410 * header, so we have have to set b_rptr to point to the start of IP
2411 * header.
2412 */
2413 fin->fin_m->b_rptr += fin->fin_ipoff;
2414 if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2415 return (-1);
2416 fin->fin_m->b_rptr -= fin->fin_ipoff;
2417
2418 /*
2419 * Step (2)
2420 *
2421 * Create an ICMP header, which will be appened to original mblk later.
2422 * ICMP header is just another mblk.
2423 */
2424 mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
2425 if (mblk_icmp == NULL) {
2426 FREE_MB_T(mblk_ip);
2427 return (-1);
2428 }
2429
2430 MTYPE(mblk_icmp) = M_DATA;
2431 icmp = (struct icmp *) mblk_icmp->b_wptr;
2432 icmp->icmp_type = ICMP_UNREACH;
2433 icmp->icmp_code = fin->fin_icode & 0xFF;
2434 icmp->icmp_void = 0;
2435 icmp->icmp_cksum = 0;
2436 mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
2437
2438 /*
2439 * Step (3)
2440 *
2441 * Complete ICMP packet - link ICMP header with L4 data from original
2442 * IP packet.
2443 */
2444 linkb(mblk_icmp, mblk_ip);
2445
2446 /*
2447 * Step (4)
2448 *
2449 * Swap IP addresses and change IP header fields accordingly in
2450 * original IP packet.
2451 *
2452 * There is a rule option return-icmp as a dest for physical
2453 * interfaces. This option becomes useless for loopback, since IPF box
2454 * uses same address as a loopback destination. We ignore the option
2455 * here, the ICMP packet will always look like as it would have been
2456 * sent from the original destination host.
2457 */
2458 tmp_src = fin->fin_ip->ip_src;
2459 fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2460 fin->fin_ip->ip_dst = tmp_src;
2461 fin->fin_ip->ip_p = IPPROTO_ICMP;
2462 fin->fin_ip->ip_sum = 0;
2463
2464 /*
2465 * Step (5)
2466 *
2467 * We trim the orignal mblk to hold IP header only.
2468 */
2469 fin->fin_m->b_wptr = fin->fin_dp;
2470 orig_iphdr_len = fin->fin_m->b_wptr -
2471 (fin->fin_m->b_rptr + fin->fin_ipoff);
2472 fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
2473 orig_iphdr_len);
2474
2475 /*
2476 * ICMP chksum calculation. The data we are calculating chksum for are
2477 * spread over two mblks, therefore we have to use two for loops.
2478 *
2479 * First for loop computes chksum part for ICMP header.
2480 */
2481 buf = (uint16_t *) icmp;
2482 len = ICMPERR_ICMPHLEN;
2483 for (sum = 0; len > 1; len -= 2)
2484 sum += *buf++;
2485
2486 /*
2487 * Here we add chksum part for ICMP payload.
2488 */
2489 len = icmp_pld_len;
2490 buf = (uint16_t *) mblk_ip->b_rptr;
2491 for (; len > 1; len -= 2)
2492 sum += *buf++;
2493
2494 /*
2495 * Chksum is done.
2496 */
2497 sum = (sum >> 16) + (sum & 0xffff);
2498 sum += (sum >> 16);
2499 icmp->icmp_cksum = ~sum;
2500
2501 /*
2502 * Step (6)
2503 *
2504 * Release all packet mblks, except the first one.
2505 */
2506 if (fin->fin_m->b_cont != NULL) {
2507 FREE_MB_T(fin->fin_m->b_cont);
2508 }
2509
2510 /*
2511 * Append ICMP payload to first mblk, which already contains new IP
2512 * header.
2513 */
2514 linkb(fin->fin_m, mblk_icmp);
2515
2516 return (0);
2517 }
2518
2519 #ifdef USE_INET6
2520 /* ------------------------------------------------------------------------ */
2521 /* Function: fr_make_icmp_v6 */
2522 /* Returns: int - 0 on success, -1 on failure */
2523 /* Parameters: fin(I) - pointer to packet information */
2524 /* */
2525 /* Please read comment at fr_make_icmp() wrapper function to get an idea */
2526 /* what and why is going to happen here. Once you read the comment there, */
2527 /* continue here with next paragraph. */
2528 /* */
2529 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response. */
2530 /* The algorithm is fairly simple: */
2531 /* 1) We need to get copy of complete mblk. */
2532 /* */
2533 /* 2) New ICMPv6 header is created. */
2534 /* */
2535 /* 3) The copy of original mblk with packet is linked to ICMPv6 */
2536 /* header. */
2537 /* */
2538 /* 4) The checksum must be adjusted. */
2539 /* */
2540 /* 5) IP addresses in original mblk are swapped and IP header data */
2541 /* are adjusted (protocol number). */
2542 /* */
2543 /* 6) Original mblk is trimmed to hold IPv6 header only, then it is */
2544 /* linked with the ICMPv6 data we got from (3). */
2545 /* ------------------------------------------------------------------------ */
fr_make_icmp_v6(fin)2546 static int fr_make_icmp_v6(fin)
2547 fr_info_t *fin;
2548 {
2549 struct icmp6_hdr *icmp6;
2550 tcphdr_t *tcp;
2551 struct in6_addr tmp_src6;
2552 size_t icmp_pld_len;
2553 mblk_t *mblk_ip, *mblk_icmp;
2554
2555 if (fin->fin_v != 6)
2556 return (-1);
2557
2558 /*
2559 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
2560 * IP stack. If it is not SYN/FIN, then we must drop it silently.
2561 */
2562 tcp = (tcphdr_t *) fin->fin_dp;
2563
2564 if ((fin->fin_p == IPPROTO_TCP) &&
2565 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2566 return (-1);
2567
2568 /*
2569 * Step (1)
2570 *
2571 * We need to copy complete packet in case of IPv6, no trimming is
2572 * needed (except the L2 headers).
2573 */
2574 icmp_pld_len = M_LEN(fin->fin_m);
2575 fin->fin_m->b_rptr += fin->fin_ipoff;
2576 if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2577 return (-1);
2578 fin->fin_m->b_rptr -= fin->fin_ipoff;
2579
2580 /*
2581 * Step (2)
2582 *
2583 * Allocate and create ICMP header.
2584 */
2585 mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
2586 BPRI_HI);
2587
2588 if (mblk_icmp == NULL)
2589 return (-1);
2590
2591 MTYPE(mblk_icmp) = M_DATA;
2592 icmp6 = (struct icmp6_hdr *) mblk_icmp->b_wptr;
2593 icmp6->icmp6_type = ICMP6_DST_UNREACH;
2594 icmp6->icmp6_code = fin->fin_icode & 0xFF;
2595 icmp6->icmp6_data32[0] = 0;
2596 mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
2597
2598 /*
2599 * Step (3)
2600 *
2601 * Link the copy of IP packet to ICMP header.
2602 */
2603 linkb(mblk_icmp, mblk_ip);
2604
2605 /*
2606 * Step (4)
2607 *
2608 * Calculate chksum - this is much more easier task than in case of
2609 * IPv4 - ICMPv6 chksum only covers IP addresses, and payload length.
2610 * We are making compensation just for change of packet length.
2611 */
2612 icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
2613
2614 /*
2615 * Step (5)
2616 *
2617 * Swap IP addresses.
2618 */
2619 tmp_src6 = fin->fin_ip6->ip6_src;
2620 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2621 fin->fin_ip6->ip6_dst = tmp_src6;
2622
2623 /*
2624 * and adjust IP header data.
2625 */
2626 fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
2627 fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
2628
2629 /*
2630 * Step (6)
2631 *
2632 * We must release all linked mblks from original packet and keep only
2633 * the first mblk with IP header to link ICMP data.
2634 */
2635 fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
2636
2637 if (fin->fin_m->b_cont != NULL) {
2638 FREE_MB_T(fin->fin_m->b_cont);
2639 }
2640
2641 /*
2642 * Append ICMP payload to IP header.
2643 */
2644 linkb(fin->fin_m, mblk_icmp);
2645
2646 return (0);
2647 }
2648 #endif /* USE_INET6 */
2649
2650 /* ------------------------------------------------------------------------ */
2651 /* Function: fr_make_icmp */
2652 /* Returns: int - 0 on success, -1 on failure */
2653 /* Parameters: fin(I) - pointer to packet information */
2654 /* */
2655 /* We must alter the original mblks passed to IPF from IP stack via */
2656 /* FW_HOOKS. The reasons why we must alter packet are discussed within */
2657 /* comment at fr_make_rst() function. */
2658 /* */
2659 /* The fr_make_icmp() function acts as a wrapper, which passes the code */
2660 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on */
2661 /* protocol version. However there are some details, which are common to */
2662 /* both IP versions. The details are going to be explained here. */
2663 /* */
2664 /* The packet looks as follows: */
2665 /* xxx | IP hdr | IP payload ... | */
2666 /* ^ ^ ^ ^ */
2667 /* | | | | */
2668 /* | | | fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
2669 /* | | | */
2670 /* | | `- fin_m->fin_dp (in case of IPv4 points to L4 header) */
2671 /* | | */
2672 /* | `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case */
2673 /* | of loopback) */
2674 /* | */
2675 /* `- fin_m->b_rptr - points to L2 header in case of physical NIC */
2676 /* */
2677 /* All relevant IP headers are pulled up into the first mblk. It happened */
2678 /* well in advance before the matching rule was found (the rule, which took */
2679 /* us here, to fr_make_icmp() function). */
2680 /* */
2681 /* Both functions will turn packet passed in fin->fin_m mblk into a new */
2682 /* packet. New packet will be represented as chain of mblks. */
2683 /* orig mblk |- b_cont ---. */
2684 /* ^ `-> ICMP hdr |- b_cont--. */
2685 /* | ^ `-> duped orig mblk */
2686 /* | | ^ */
2687 /* `- The original mblk | | */
2688 /* will be trimmed to | | */
2689 /* to contain IP header | | */
2690 /* only | | */
2691 /* | | */
2692 /* `- This is newly | */
2693 /* allocated mblk to | */
2694 /* hold ICMPv6 data. | */
2695 /* | */
2696 /* | */
2697 /* | */
2698 /* This is the copy of original mblk, it will contain -' */
2699 /* orignal IP packet in case of ICMPv6. In case of */
2700 /* ICMPv4 it will contain up to 8 bytes of IP payload */
2701 /* (TCP/UDP/L4) data from original packet. */
2702 /* ------------------------------------------------------------------------ */
fr_make_icmp(fin)2703 int fr_make_icmp(fin)
2704 fr_info_t *fin;
2705 {
2706 int rv;
2707
2708 if (fin->fin_v == 4)
2709 rv = fr_make_icmp_v4(fin);
2710 #ifdef USE_INET6
2711 else if (fin->fin_v == 6)
2712 rv = fr_make_icmp_v6(fin);
2713 #endif
2714 else
2715 rv = -1;
2716
2717 return (rv);
2718 }
2719
2720 /* ------------------------------------------------------------------------ */
2721 /* Function: fr_buf_sum */
2722 /* Returns: unsigned int - sum of buffer buf */
2723 /* Parameters: buf - pointer to buf we want to sum up */
2724 /* len - length of buffer buf */
2725 /* */
2726 /* Sums buffer buf. The result is used for chksum calculation. The buf */
2727 /* argument must be aligned. */
2728 /* ------------------------------------------------------------------------ */
fr_buf_sum(buf,len)2729 static uint32_t fr_buf_sum(buf, len)
2730 const void *buf;
2731 unsigned int len;
2732 {
2733 uint32_t sum = 0;
2734 uint16_t *b = (uint16_t *)buf;
2735
2736 while (len > 1) {
2737 sum += *b++;
2738 len -= 2;
2739 }
2740
2741 if (len == 1)
2742 sum += htons((*(unsigned char *)b) << 8);
2743
2744 return (sum);
2745 }
2746
2747 /* ------------------------------------------------------------------------ */
2748 /* Function: fr_calc_chksum */
2749 /* Returns: void */
2750 /* Parameters: fin - pointer to fr_info_t instance with packet data */
2751 /* pkt - pointer to duplicated packet */
2752 /* */
2753 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP */
2754 /* versions. */
2755 /* ------------------------------------------------------------------------ */
fr_calc_chksum(fin,pkt)2756 void fr_calc_chksum(fin, pkt)
2757 fr_info_t *fin;
2758 mb_t *pkt;
2759 {
2760 struct pseudo_hdr {
2761 union {
2762 struct in_addr in4;
2763 #ifdef USE_INET6
2764 struct in6_addr in6;
2765 #endif
2766 } src_addr;
2767 union {
2768 struct in_addr in4;
2769 #ifdef USE_INET6
2770 struct in6_addr in6;
2771 #endif
2772 } dst_addr;
2773 char zero;
2774 char proto;
2775 uint16_t len;
2776 } phdr;
2777 uint32_t sum, ip_sum;
2778 void *buf;
2779 uint16_t *l4_csum_p;
2780 tcphdr_t *tcp;
2781 udphdr_t *udp;
2782 icmphdr_t *icmp;
2783 #ifdef USE_INET6
2784 struct icmp6_hdr *icmp6;
2785 #endif
2786 ip_t *ip;
2787 unsigned int len;
2788 int pld_len;
2789
2790 /*
2791 * We need to pullup the packet to the single continuous buffer to avoid
2792 * potential misaligment of b_rptr member in mblk chain.
2793 */
2794 if (pullupmsg(pkt, -1) == 0) {
2795 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
2796 " will not be computed by IPF");
2797 return;
2798 }
2799
2800 /*
2801 * It is guaranteed IP header starts right at b_rptr, because we are
2802 * working with a copy of the original packet.
2803 *
2804 * Compute pseudo header chksum for TCP and UDP.
2805 */
2806 if ((fin->fin_p == IPPROTO_UDP) ||
2807 (fin->fin_p == IPPROTO_TCP)) {
2808 bzero(&phdr, sizeof (phdr));
2809 #ifdef USE_INET6
2810 if (fin->fin_v == 6) {
2811 phdr.src_addr.in6 = fin->fin_srcip6;
2812 phdr.dst_addr.in6 = fin->fin_dstip6;
2813 } else {
2814 phdr.src_addr.in4 = fin->fin_src;
2815 phdr.dst_addr.in4 = fin->fin_dst;
2816 }
2817 #else
2818 phdr.src_addr.in4 = fin->fin_src;
2819 phdr.dst_addr.in4 = fin->fin_dst;
2820 #endif
2821 phdr.zero = (char) 0;
2822 phdr.proto = fin->fin_p;
2823 phdr.len = htons((uint16_t)fin->fin_dlen);
2824 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
2825 } else {
2826 sum = 0;
2827 }
2828
2829 /*
2830 * Set pointer to the L4 chksum field in the packet, set buf pointer to
2831 * the L4 header start.
2832 */
2833 switch (fin->fin_p) {
2834 case IPPROTO_UDP:
2835 udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2836 l4_csum_p = &udp->uh_sum;
2837 buf = udp;
2838 break;
2839 case IPPROTO_TCP:
2840 tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2841 l4_csum_p = &tcp->th_sum;
2842 buf = tcp;
2843 break;
2844 case IPPROTO_ICMP:
2845 icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
2846 l4_csum_p = &icmp->icmp_cksum;
2847 buf = icmp;
2848 break;
2849 #ifdef USE_INET6
2850 case IPPROTO_ICMPV6:
2851 icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
2852 l4_csum_p = &icmp6->icmp6_cksum;
2853 buf = icmp6;
2854 break;
2855 #endif
2856 default:
2857 l4_csum_p = NULL;
2858 }
2859
2860 /*
2861 * Compute L4 chksum if needed.
2862 */
2863 if (l4_csum_p != NULL) {
2864 *l4_csum_p = (uint16_t)0;
2865 pld_len = fin->fin_dlen;
2866 len = pkt->b_wptr - (unsigned char *)buf;
2867 ASSERT(len == pld_len);
2868 /*
2869 * Add payload sum to pseudoheader sum.
2870 */
2871 sum += fr_buf_sum(buf, len);
2872 while (sum >> 16)
2873 sum = (sum & 0xFFFF) + (sum >> 16);
2874
2875 *l4_csum_p = ~((uint16_t)sum);
2876 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
2877 }
2878
2879 /*
2880 * The IP header chksum is needed just for IPv4.
2881 */
2882 if (fin->fin_v == 4) {
2883 /*
2884 * Compute IPv4 header chksum.
2885 */
2886 ip = (ip_t *)pkt->b_rptr;
2887 ip->ip_sum = (uint16_t)0;
2888 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
2889 while (ip_sum >> 16)
2890 ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);
2891
2892 ip->ip_sum = ~((uint16_t)ip_sum);
2893 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
2894 }
2895
2896 return;
2897 }
2898
2899 #endif /* _KERNEL && SOLARIS2 >= 10 */
2900