1 /*- 2 * Copyright (c) 2020 Mindaugas Rasiukevicius <rmind at noxt eu> 3 * Copyright (c) 2009-2013 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This material is based upon work partially supported by The 7 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * NPF packet handler. 33 * 34 * This is the main entry point to the NPF where packet processing happens. 35 * There are some important synchronization rules: 36 * 37 * 1) Lookups into the connection database and configuration (ruleset, 38 * tables, etc) are protected by Epoch-Based Reclamation (EBR); 39 * 40 * 2) The code in the critical path (protected by EBR) should generally 41 * not block (that includes adaptive mutex acquisitions); 42 * 43 * 3) Where it will blocks, references should be acquired atomically, 44 * while in the critical path, on the relevant objects. 45 */ 46 47 #ifdef _KERNEL 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.49 2020/05/30 14:16:56 rmind Exp $"); 50 51 #include <sys/types.h> 52 #include <sys/param.h> 53 54 #include <sys/mbuf.h> 55 #include <sys/mutex.h> 56 #include <net/if.h> 57 #include <net/pfil.h> 58 #include <sys/socketvar.h> 59 60 #include <netinet/in_systm.h> 61 #include <netinet/in.h> 62 #include <netinet/ip_var.h> 63 #include <netinet/ip6.h> 64 #include <netinet6/ip6_var.h> 65 #endif 66 67 #include "npf_impl.h" 68 #include "npf_conn.h" 69 70 #if defined(_NPF_STANDALONE) 71 #define m_freem(m) npf->mbufops->free(m) 72 #define m_clear_flag(m,f) 73 #else 74 #define m_clear_flag(m,f) (m)->m_flags &= ~(f) 75 #endif 76 77 #ifndef INET6 78 #define ip6_reass_packet(x, y) ENOTSUP 79 #endif 80 81 static int 82 npf_reassembly(npf_t *npf, npf_cache_t *npc, bool *mff) 83 { 84 nbuf_t *nbuf = npc->npc_nbuf; 85 int error = EINVAL; 86 struct mbuf *m; 87 88 *mff = false; 89 m = nbuf_head_mbuf(nbuf); 90 91 if (npf_iscached(npc, NPC_IP4) && npf->ip4_reassembly) { 92 error = ip_reass_packet(&m); 93 } else if (npf_iscached(npc, NPC_IP6) && npf->ip6_reassembly) { 94 error = ip6_reass_packet(&m, npc->npc_hlen); 95 } else { 96 /* 97 * Reassembly is disabled: just pass the packet through 98 * the ruleset for inspection. 99 */ 100 return 0; 101 } 102 103 if (error) { 104 /* Reassembly failed; free the mbuf, clear the nbuf. */ 105 npf_stats_inc(npf, NPF_STAT_REASSFAIL); 106 m_freem(m); 107 memset(nbuf, 0, sizeof(nbuf_t)); 108 return error; 109 } 110 if (m == NULL) { 111 /* More fragments should come. */ 112 npf_stats_inc(npf, NPF_STAT_FRAGMENTS); 113 *mff = true; 114 return 0; 115 } 116 117 /* 118 * Reassembly is complete, we have the final packet. 119 * Cache again, since layer 4 data is accessible now. 120 */ 121 nbuf_init(npf, nbuf, m, nbuf->nb_ifp); 122 npc->npc_info = 0; 123 124 if (npf_cache_all(npc) & (NPC_IPFRAG|NPC_FMTERR)) { 125 return EINVAL; 126 } 127 npf_stats_inc(npf, NPF_STAT_REASSEMBLY); 128 return 0; 129 } 130 131 static inline bool 132 npf_packet_bypass_tag_p(nbuf_t *nbuf) 133 { 134 uint32_t ntag; 135 return nbuf_find_tag(nbuf, &ntag) == 0 && (ntag & NPF_NTAG_PASS) != 0; 136 } 137 138 /* 139 * npfk_packet_handler: main packet handling routine for layer 3. 140 * 141 * Note: packet flow and inspection logic is in strict order. 142 */ 143 __dso_public int 144 npfk_packet_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di) 145 { 146 nbuf_t nbuf; 147 npf_cache_t npc; 148 npf_conn_t *con; 149 npf_rule_t *rl; 150 npf_rproc_t *rp; 151 int error, decision, flags; 152 npf_match_info_t mi; 153 bool mff; 154 155 KASSERT(ifp != NULL); 156 157 /* 158 * Initialize packet information cache. 159 * Note: it is enough to clear the info bits. 160 */ 161 nbuf_init(npf, &nbuf, *mp, ifp); 162 memset(&npc, 0, sizeof(npf_cache_t)); 163 npc.npc_ctx = npf; 164 npc.npc_nbuf = &nbuf; 165 166 mi.mi_di = di; 167 mi.mi_rid = 0; 168 mi.mi_retfl = 0; 169 170 *mp = NULL; 171 decision = NPF_DECISION_BLOCK; 172 error = 0; 173 rp = NULL; 174 con = NULL; 175 176 /* Cache everything. */ 177 flags = npf_cache_all(&npc); 178 179 /* Malformed packet, leave quickly. */ 180 if (flags & NPC_FMTERR) { 181 error = EINVAL; 182 goto out; 183 } 184 185 /* Determine whether it is an IP fragment. */ 186 if (__predict_false(flags & NPC_IPFRAG)) { 187 /* Pass to IPv4/IPv6 reassembly mechanism. */ 188 error = npf_reassembly(npf, &npc, &mff); 189 if (error) { 190 goto out; 191 } 192 if (mff) { 193 /* More fragments should come. */ 194 return 0; 195 } 196 } 197 198 /* Just pass-through if specially tagged. */ 199 if (npf_packet_bypass_tag_p(&nbuf)) { 200 goto pass; 201 } 202 203 /* Inspect the list of connections (if found, acquires a reference). */ 204 con = npf_conn_inspect(&npc, di, &error); 205 206 /* If "passing" connection found - skip the ruleset inspection. */ 207 if (con && npf_conn_pass(con, &mi, &rp)) { 208 npf_stats_inc(npf, NPF_STAT_PASS_CONN); 209 KASSERT(error == 0); 210 goto pass; 211 } 212 if (__predict_false(error)) { 213 if (error == ENETUNREACH) 214 goto block; 215 goto out; 216 } 217 218 /* Acquire the lock, inspect the ruleset using this packet. */ 219 int slock = npf_config_read_enter(npf); 220 npf_ruleset_t *rlset = npf_config_ruleset(npf); 221 222 rl = npf_ruleset_inspect(&npc, rlset, di, NPF_LAYER_3); 223 if (__predict_false(rl == NULL)) { 224 const bool pass = npf_default_pass(npf); 225 npf_config_read_exit(npf, slock); 226 227 if (pass) { 228 npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT); 229 goto pass; 230 } 231 npf_stats_inc(npf, NPF_STAT_BLOCK_DEFAULT); 232 goto block; 233 } 234 235 /* 236 * Get the rule procedure (acquires a reference) for association 237 * with a connection (if any) and execution. 238 */ 239 KASSERT(rp == NULL); 240 rp = npf_rule_getrproc(rl); 241 242 /* Conclude with the rule and release the lock. */ 243 error = npf_rule_conclude(rl, &mi); 244 npf_config_read_exit(npf, slock); 245 246 if (error) { 247 npf_stats_inc(npf, NPF_STAT_BLOCK_RULESET); 248 goto block; 249 } 250 npf_stats_inc(npf, NPF_STAT_PASS_RULESET); 251 252 /* 253 * Establish a "pass" connection, if required. Just proceed if 254 * connection creation fails (e.g. due to unsupported protocol). 255 */ 256 if ((mi.mi_retfl & NPF_RULE_STATEFUL) != 0 && !con) { 257 con = npf_conn_establish(&npc, di, 258 (mi.mi_retfl & NPF_RULE_GSTATEFUL) == 0); 259 if (con) { 260 /* 261 * Note: the reference on the rule procedure is 262 * transferred to the connection. It will be 263 * released on connection destruction. 264 */ 265 npf_conn_setpass(con, &mi, rp); 266 } 267 } 268 269 pass: 270 decision = NPF_DECISION_PASS; 271 KASSERT(error == 0); 272 273 /* 274 * Perform NAT. 275 */ 276 error = npf_do_nat(&npc, con, di); 277 278 block: 279 /* 280 * Execute the rule procedure, if any is associated. 281 * It may reverse the decision from pass to block. 282 */ 283 if (rp && !npf_rproc_run(&npc, rp, &mi, &decision)) { 284 if (con) { 285 npf_conn_release(con); 286 } 287 npf_rproc_release(rp); 288 /* mbuf already freed */ 289 return 0; 290 } 291 292 out: 293 /* 294 * Release the reference on a connection. Release the reference 295 * on a rule procedure only if there was no association. 296 */ 297 if (con) { 298 npf_conn_release(con); 299 } else if (rp) { 300 npf_rproc_release(rp); 301 } 302 303 /* Get the new mbuf pointer. */ 304 if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) { 305 return error ? error : ENOMEM; 306 } 307 308 /* Pass the packet if decided and there is no error. */ 309 if (decision == NPF_DECISION_PASS && !error) { 310 /* 311 * XXX: Disable for now, it will be set accordingly later, 312 * for optimisations (to reduce inspection). 313 */ 314 m_clear_flag(*mp, M_CANFASTFWD); 315 return 0; 316 } 317 318 /* 319 * Block the packet. ENETUNREACH is used to indicate blocking. 320 * Depending on the flags and protocol, return TCP reset (RST) or 321 * ICMP destination unreachable. 322 */ 323 if (mi.mi_retfl && npf_return_block(&npc, mi.mi_retfl)) { 324 *mp = NULL; 325 } 326 327 if (!error) { 328 error = ENETUNREACH; 329 } 330 331 if (*mp) { 332 /* Free the mbuf chain. */ 333 m_freem(*mp); 334 *mp = NULL; 335 } 336 return error; 337 } 338