1 /*- 2 * Copyright (c) 2009-2013 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This material is based upon work partially supported by The 6 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * NPF packet handler. 32 * 33 * Note: pfil(9) hooks are currently locked by softnet_lock and kernel-lock. 34 */ 35 36 #ifdef _KERNEL 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.45 2018/09/29 14:41:36 rmind Exp $"); 39 40 #include <sys/types.h> 41 #include <sys/param.h> 42 43 #include <sys/mbuf.h> 44 #include <sys/mutex.h> 45 #include <net/if.h> 46 #include <net/pfil.h> 47 #include <sys/socketvar.h> 48 49 #include <netinet/in_systm.h> 50 #include <netinet/in.h> 51 #include <netinet/ip_var.h> 52 #include <netinet/ip6.h> 53 #include <netinet6/ip6_var.h> 54 #endif 55 56 #include "npf_impl.h" 57 #include "npf_conn.h" 58 59 #if defined(_NPF_STANDALONE) 60 #define m_freem(m) npf->mbufops->free(m) 61 #define m_clear_flag(m,f) 62 #else 63 #define m_clear_flag(m,f) (m)->m_flags &= ~(f) 64 #endif 65 66 #ifndef INET6 67 #define ip6_reass_packet(x, y) ENOTSUP 68 #endif 69 70 static int 71 npf_reassembly(npf_t *npf, npf_cache_t *npc, bool *mff) 72 { 73 nbuf_t *nbuf = npc->npc_nbuf; 74 int error = EINVAL; 75 struct mbuf *m; 76 77 *mff = false; 78 m = nbuf_head_mbuf(nbuf); 79 80 if (npf_iscached(npc, NPC_IP4)) { 81 error = ip_reass_packet(&m); 82 } else if (npf_iscached(npc, NPC_IP6)) { 83 error = ip6_reass_packet(&m, npc->npc_hlen); 84 } 85 86 if (error) { 87 /* Reass failed. Free the mbuf, clear the nbuf. */ 88 npf_stats_inc(npf, NPF_STAT_REASSFAIL); 89 m_freem(m); 90 memset(nbuf, 0, sizeof(nbuf_t)); 91 return error; 92 } 93 if (m == NULL) { 94 /* More fragments should come. */ 95 npf_stats_inc(npf, NPF_STAT_FRAGMENTS); 96 *mff = true; 97 return 0; 98 } 99 100 /* 101 * Reassembly is complete, we have the final packet. 102 * Cache again, since layer 4 data is accessible now. 103 */ 104 nbuf_init(npf, nbuf, m, nbuf->nb_ifp); 105 npc->npc_info = 0; 106 107 if (npf_cache_all(npc) & (NPC_IPFRAG|NPC_FMTERR)) { 108 return EINVAL; 109 } 110 npf_stats_inc(npf, NPF_STAT_REASSEMBLY); 111 return 0; 112 } 113 114 /* 115 * npf_packet_handler: main packet handling routine for layer 3. 116 * 117 * Note: packet flow and inspection logic is in strict order. 118 */ 119 __dso_public int 120 npf_packet_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di) 121 { 122 nbuf_t nbuf; 123 npf_cache_t npc; 124 npf_conn_t *con; 125 npf_rule_t *rl; 126 npf_rproc_t *rp; 127 int error, decision, flags; 128 uint32_t ntag; 129 npf_match_info_t mi; 130 bool mff; 131 132 /* QSBR checkpoint. */ 133 pserialize_checkpoint(npf->qsbr); 134 KASSERT(ifp != NULL); 135 136 /* 137 * Initialise packet information cache. 138 * Note: it is enough to clear the info bits. 139 */ 140 npc.npc_ctx = npf; 141 nbuf_init(npf, &nbuf, *mp, ifp); 142 npc.npc_nbuf = &nbuf; 143 npc.npc_info = 0; 144 145 mi.mi_di = di; 146 mi.mi_rid = 0; 147 mi.mi_retfl = 0; 148 149 *mp = NULL; 150 decision = NPF_DECISION_BLOCK; 151 error = 0; 152 rp = NULL; 153 con = NULL; 154 155 /* Cache everything. */ 156 flags = npf_cache_all(&npc); 157 158 /* If error on the format, leave quickly. */ 159 if (flags & NPC_FMTERR) { 160 error = EINVAL; 161 goto out; 162 } 163 164 /* Determine whether it is an IP fragment. */ 165 if (__predict_false(flags & NPC_IPFRAG)) { 166 /* Pass to IPv4/IPv6 reassembly mechanism. */ 167 error = npf_reassembly(npf, &npc, &mff); 168 if (error) { 169 goto out; 170 } 171 if (mff) { 172 /* More fragments should come. */ 173 return 0; 174 } 175 } 176 177 /* Just pass-through if specially tagged. */ 178 if (nbuf_find_tag(&nbuf, &ntag) == 0 && (ntag & NPF_NTAG_PASS) != 0) { 179 goto pass; 180 } 181 182 /* Inspect the list of connections (if found, acquires a reference). */ 183 con = npf_conn_inspect(&npc, di, &error); 184 185 /* If "passing" connection found - skip the ruleset inspection. */ 186 if (con && npf_conn_pass(con, &mi, &rp)) { 187 npf_stats_inc(npf, NPF_STAT_PASS_CONN); 188 KASSERT(error == 0); 189 goto pass; 190 } 191 if (__predict_false(error)) { 192 if (error == ENETUNREACH) 193 goto block; 194 goto out; 195 } 196 197 /* Acquire the lock, inspect the ruleset using this packet. */ 198 int slock = npf_config_read_enter(); 199 npf_ruleset_t *rlset = npf_config_ruleset(npf); 200 201 rl = npf_ruleset_inspect(&npc, rlset, di, NPF_LAYER_3); 202 if (__predict_false(rl == NULL)) { 203 const bool pass = npf_default_pass(npf); 204 npf_config_read_exit(slock); 205 206 if (pass) { 207 npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT); 208 goto pass; 209 } 210 npf_stats_inc(npf, NPF_STAT_BLOCK_DEFAULT); 211 goto block; 212 } 213 214 /* 215 * Get the rule procedure (acquires a reference) for association 216 * with a connection (if any) and execution. 217 */ 218 KASSERT(rp == NULL); 219 rp = npf_rule_getrproc(rl); 220 221 /* Conclude with the rule and release the lock. */ 222 error = npf_rule_conclude(rl, &mi); 223 npf_config_read_exit(slock); 224 225 if (error) { 226 npf_stats_inc(npf, NPF_STAT_BLOCK_RULESET); 227 goto block; 228 } 229 npf_stats_inc(npf, NPF_STAT_PASS_RULESET); 230 231 /* 232 * Establish a "pass" connection, if required. Just proceed if 233 * connection creation fails (e.g. due to unsupported protocol). 234 */ 235 if ((mi.mi_retfl & NPF_RULE_STATEFUL) != 0 && !con) { 236 con = npf_conn_establish(&npc, di, 237 (mi.mi_retfl & NPF_RULE_MULTIENDS) == 0); 238 if (con) { 239 /* 240 * Note: the reference on the rule procedure is 241 * transfered to the connection. It will be 242 * released on connection destruction. 243 */ 244 npf_conn_setpass(con, &mi, rp); 245 } 246 } 247 248 pass: 249 decision = NPF_DECISION_PASS; 250 KASSERT(error == 0); 251 /* 252 * Perform NAT. 253 */ 254 error = npf_do_nat(&npc, con, di); 255 256 block: 257 /* 258 * Execute the rule procedure, if any is associated. 259 * It may reverse the decision from pass to block. 260 */ 261 if (rp && !npf_rproc_run(&npc, rp, &mi, &decision)) { 262 if (con) { 263 npf_conn_release(con); 264 } 265 npf_rproc_release(rp); 266 /* mbuf already freed */ 267 return 0; 268 } 269 270 out: 271 /* 272 * Release the reference on a connection. Release the reference 273 * on a rule procedure only if there was no association. 274 */ 275 if (con) { 276 npf_conn_release(con); 277 } else if (rp) { 278 npf_rproc_release(rp); 279 } 280 281 /* Get the new mbuf pointer. */ 282 if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) { 283 return error ? error : ENOMEM; 284 } 285 286 /* Pass the packet if decided and there is no error. */ 287 if (decision == NPF_DECISION_PASS && !error) { 288 /* 289 * XXX: Disable for now, it will be set accordingly later, 290 * for optimisations (to reduce inspection). 291 */ 292 m_clear_flag(*mp, M_CANFASTFWD); 293 return 0; 294 } 295 296 /* 297 * Block the packet. ENETUNREACH is used to indicate blocking. 298 * Depending on the flags and protocol, return TCP reset (RST) or 299 * ICMP destination unreachable. 300 */ 301 if (mi.mi_retfl && npf_return_block(&npc, mi.mi_retfl)) { 302 *mp = NULL; 303 } 304 305 if (!error) { 306 error = ENETUNREACH; 307 } 308 309 if (*mp) { 310 /* Free the mbuf chain. */ 311 m_freem(*mp); 312 *mp = NULL; 313 } 314 return error; 315 } 316