1 2 /* rrl.c - Response Rate Limiting for NSD. 3 * By W.C.A. Wijngaards 4 * Copyright 2012, NLnet Labs. 5 * BSD, see LICENSE. 6 */ 7 #include "config.h" 8 #include <errno.h> 9 #include "rrl.h" 10 #include "util.h" 11 #include "lookup3.h" 12 #include "options.h" 13 14 #ifdef RATELIMIT 15 16 #ifdef HAVE_MMAP 17 #include <sys/mman.h> 18 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS) 19 #define MAP_ANONYMOUS MAP_ANON 20 #endif 21 #endif /* HAVE_MMAP */ 22 23 24 /** 25 * The rate limiting data structure bucket, this represents one rate of 26 * packets from a single source. 27 * Smoothed average rates. 28 */ 29 struct rrl_bucket { 30 /* the source netmask */ 31 uint64_t source; 32 /* rate, in queries per second, which due to rate=r(t)+r(t-1)/2 is 33 * equal to double the queries per second */ 34 uint32_t rate; 35 /* the full hash */ 36 uint32_t hash; 37 /* counter for queries arrived in this second */ 38 uint32_t counter; 39 /* timestamp, which time is the time of the counter, the rate is from 40 * one timestep before that. */ 41 int32_t stamp; 42 /* flags for the source mask and type */ 43 uint16_t flags; 44 }; 45 46 /* the (global) array of RRL buckets */ 47 static struct rrl_bucket* rrl_array = NULL; 48 static size_t rrl_array_size = RRL_BUCKETS; 49 static uint32_t rrl_ratelimit = RRL_LIMIT; /* 2x qps */ 50 static uint8_t rrl_slip_ratio = RRL_SLIP; 51 static uint8_t rrl_ipv4_prefixlen = RRL_IPV4_PREFIX_LENGTH; 52 static uint8_t rrl_ipv6_prefixlen = RRL_IPV6_PREFIX_LENGTH; 53 static uint64_t rrl_ipv6_mask; /* max prefixlen 64 */ 54 static uint32_t rrl_whitelist_ratelimit = RRL_WLIST_LIMIT; /* 2x qps */ 55 56 /* the array of mmaps for the children (saved between reloads) */ 57 static void** rrl_maps = NULL; 58 static size_t rrl_maps_num = 0; 59 60 void rrl_mmap_init(int numch, size_t numbuck, size_t lm, size_t wlm, size_t sm, 61 size_t plf, size_t pls) 62 { 63 #ifdef HAVE_MMAP 64 size_t i; 65 #endif 66 if(numbuck != 0) 67 rrl_array_size = numbuck; 68 rrl_ratelimit = lm*2; 69 rrl_slip_ratio = sm; 70 rrl_ipv4_prefixlen = plf; 71 rrl_ipv6_prefixlen = pls; 72 if (pls <= 32) { 73 rrl_ipv6_mask = ((uint64_t) htonl(0xffffffff << (32-pls))) << 32; 74 } else { 75 rrl_ipv6_mask = ((uint64_t) htonl(0xffffffff << (64-pls))) | 76 (((uint64_t)0xffffffff)<<32); 77 } 78 rrl_whitelist_ratelimit = wlm*2; 79 #ifdef HAVE_MMAP 80 /* allocate the ratelimit hashtable in a memory map so it is 81 * preserved across reforks (every child its own table) */ 82 rrl_maps_num = (size_t)numch; 83 rrl_maps = (void**)xmallocarray(rrl_maps_num, sizeof(void*)); 84 for(i=0; i<rrl_maps_num; i++) { 85 rrl_maps[i] = mmap(NULL, 86 sizeof(struct rrl_bucket)*rrl_array_size, 87 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0); 88 if(rrl_maps[i] == MAP_FAILED) { 89 log_msg(LOG_ERR, "rrl: mmap failed: %s", 90 strerror(errno)); 91 exit(1); 92 } 93 memset(rrl_maps[i], 0, 94 sizeof(struct rrl_bucket)*rrl_array_size); 95 } 96 #else 97 (void)numch; 98 rrl_maps_num = 0; 99 rrl_maps = NULL; 100 #endif 101 } 102 103 void rrl_set_limit(size_t lm, size_t wlm, size_t sm) 104 { 105 rrl_ratelimit = lm*2; 106 rrl_whitelist_ratelimit = wlm*2; 107 rrl_slip_ratio = sm; 108 } 109 110 void rrl_init(size_t ch) 111 { 112 if(!rrl_maps || ch >= rrl_maps_num) 113 rrl_array = xalloc_array_zero(sizeof(struct rrl_bucket), 114 rrl_array_size); 115 #ifdef HAVE_MMAP 116 else rrl_array = (struct rrl_bucket*)rrl_maps[ch]; 117 #endif 118 } 119 120 /** return the source netblock of the query, this is the genuine source 121 * for genuine queries and the target for reflected packets */ 122 static uint64_t rrl_get_source(query_type* query, uint16_t* c2) 123 { 124 /* note there is an IPv6 subnet, that maps 125 * to the same buckets as IPv4 space, but there is a flag in c2 126 * that makes the hash different */ 127 #ifdef INET6 128 if( ((struct sockaddr_in*)&query->addr)->sin_family == AF_INET) { 129 *c2 = 0; 130 return ((struct sockaddr_in*)&query->addr)-> 131 sin_addr.s_addr & htonl(0xffffffff << (32-rrl_ipv4_prefixlen)); 132 } else { 133 uint64_t s; 134 *c2 = rrl_ip6; 135 memmove(&s, &((struct sockaddr_in6*)&query->addr)->sin6_addr, 136 sizeof(s)); 137 return s & rrl_ipv6_mask; 138 } 139 #else 140 *c2 = 0; 141 return query->addr.sin_addr.s_addr & htonl(0xffffffff << (32-rrl_ipv4_prefixlen)); 142 #endif 143 } 144 145 /** debug source to string */ 146 static const char* rrlsource2str(uint64_t s, uint16_t c2) 147 { 148 static char buf[64]; 149 struct in_addr a4; 150 #ifdef INET6 151 if(c2) { 152 /* IPv6 */ 153 struct in6_addr a6; 154 memset(&a6, 0, sizeof(a6)); 155 memmove(&a6, &s, sizeof(s)); 156 if(!inet_ntop(AF_INET6, &a6, buf, sizeof(buf))) 157 strlcpy(buf, "[ip6 ntop failed]", sizeof(buf)); 158 else { 159 static char prefix[4]; 160 snprintf(prefix, sizeof(prefix), "/%d", rrl_ipv6_prefixlen); 161 strlcat(buf, &prefix[0], sizeof(buf)); 162 } 163 return buf; 164 } 165 #else 166 (void)c2; 167 #endif 168 /* ipv4 */ 169 a4.s_addr = (uint32_t)s; 170 if(!inet_ntop(AF_INET, &a4, buf, sizeof(buf))) 171 strlcpy(buf, "[ip4 ntop failed]", sizeof(buf)); 172 else { 173 static char prefix[4]; 174 snprintf(prefix, sizeof(prefix), "/%d", rrl_ipv4_prefixlen); 175 strlcat(buf, &prefix[0], sizeof(buf)); 176 } 177 return buf; 178 } 179 180 enum rrl_type rrlstr2type(const char* s) 181 { 182 if(strcmp(s, "nxdomain")==0) return rrl_type_nxdomain; 183 else if(strcmp(s, "error")==0) return rrl_type_error; 184 else if(strcmp(s, "referral")==0) return rrl_type_referral; 185 else if(strcmp(s, "any")==0) return rrl_type_any; 186 else if(strcmp(s, "wildcard")==0) return rrl_type_wildcard; 187 else if(strcmp(s, "nodata")==0) return rrl_type_nodata; 188 else if(strcmp(s, "dnskey")==0) return rrl_type_dnskey; 189 else if(strcmp(s, "positive")==0) return rrl_type_positive; 190 else if(strcmp(s, "rrsig")==0) return rrl_type_rrsig; 191 else if(strcmp(s, "all")==0) return rrl_type_all; 192 return 0; /* unknown */ 193 } 194 195 const char* rrltype2str(enum rrl_type c) 196 { 197 switch(c & 0x0fff) { 198 case rrl_type_nxdomain: return "nxdomain"; 199 case rrl_type_error: return "error"; 200 case rrl_type_referral: return "referral"; 201 case rrl_type_any: return "any"; 202 case rrl_type_wildcard: return "wildcard"; 203 case rrl_type_nodata: return "nodata"; 204 case rrl_type_dnskey: return "dnskey"; 205 case rrl_type_positive: return "positive"; 206 case rrl_type_rrsig: return "rrsig"; 207 case rrl_type_all: return "all"; 208 } 209 return "unknown"; 210 } 211 212 /** classify the query in a number of different types, each has separate 213 * ratelimiting, so that positive queries are not impeded by others */ 214 static uint16_t rrl_classify(query_type* query, const uint8_t** d, 215 size_t* d_len) 216 { 217 if(RCODE(query->packet) == RCODE_NXDOMAIN) { 218 if(query->zone && query->zone->apex) { 219 *d = dname_name(domain_dname(query->zone->apex)); 220 *d_len = domain_dname(query->zone->apex)->name_size; 221 } 222 return rrl_type_nxdomain; 223 } 224 if(RCODE(query->packet) != RCODE_OK) { 225 if(query->zone && query->zone->apex) { 226 *d = dname_name(domain_dname(query->zone->apex)); 227 *d_len = domain_dname(query->zone->apex)->name_size; 228 } 229 return rrl_type_error; 230 } 231 if(query->delegation_domain) { 232 *d = dname_name(domain_dname(query->delegation_domain)); 233 *d_len = domain_dname(query->delegation_domain)->name_size; 234 return rrl_type_referral; 235 } 236 if(query->qtype == TYPE_ANY) { 237 if(query->qname) { 238 *d = dname_name(query->qname); 239 *d_len = query->qname->name_size; 240 } 241 return rrl_type_any; 242 } 243 if(query->qtype == TYPE_RRSIG) { 244 if(query->qname) { 245 *d = dname_name(query->qname); 246 *d_len = query->qname->name_size; 247 } 248 return rrl_type_rrsig; 249 } 250 if(query->wildcard_domain) { 251 *d = dname_name(domain_dname(query->wildcard_domain)); 252 *d_len = domain_dname(query->wildcard_domain)->name_size; 253 return rrl_type_wildcard; 254 } 255 if(ANCOUNT(query->packet) == 0) { 256 if(query->zone && query->zone->apex) { 257 *d = dname_name(domain_dname(query->zone->apex)); 258 *d_len = domain_dname(query->zone->apex)->name_size; 259 } 260 return rrl_type_nodata; 261 } 262 if(query->qtype == TYPE_DNSKEY) { 263 if(query->qname) { 264 *d = dname_name(query->qname); 265 *d_len = query->qname->name_size; 266 } 267 return rrl_type_dnskey; 268 } 269 /* positive */ 270 if(query->qname) { 271 *d = dname_name(query->qname); 272 *d_len = query->qname->name_size; 273 } 274 return rrl_type_positive; 275 } 276 277 /** Examine the query and return hash and source of netblock. */ 278 static void examine_query(query_type* query, uint32_t* hash, uint64_t* source, 279 uint16_t* flags, uint32_t* lm) 280 { 281 /* compile a binary string representing the query */ 282 uint16_t c, c2; 283 /* size with 16 bytes to spare */ 284 uint8_t buf[MAXDOMAINLEN + sizeof(*source) + sizeof(c) + 16]; 285 const uint8_t* dname = NULL; size_t dname_len = 0; 286 uint32_t r = 0x267fcd16; 287 288 *source = rrl_get_source(query, &c2); 289 c = rrl_classify(query, &dname, &dname_len); 290 if(query->zone && query->zone->opts && 291 (query->zone->opts->pattern->rrl_whitelist & c)) 292 *lm = rrl_whitelist_ratelimit; 293 if(*lm == 0) return; 294 c |= c2; 295 *flags = c; 296 memmove(buf, source, sizeof(*source)); 297 memmove(buf+sizeof(*source), &c, sizeof(c)); 298 299 DEBUG(DEBUG_QUERY, 1, (LOG_INFO, "rrl_examine type %s name %s", rrltype2str(c), dname?wiredname2str(dname):"NULL")); 300 301 /* and hash it */ 302 if(dname && dname_len <= MAXDOMAINLEN) { 303 memmove(buf+sizeof(*source)+sizeof(c), dname, dname_len); 304 *hash = hashlittle(buf, sizeof(*source)+sizeof(c)+dname_len, r); 305 } else 306 *hash = hashlittle(buf, sizeof(*source)+sizeof(c), r); 307 } 308 309 /* age the bucket because elapsed time steps have gone by */ 310 static void rrl_attenuate_bucket(struct rrl_bucket* b, int32_t elapsed) 311 { 312 if(elapsed > 16) { 313 b->rate = 0; 314 } else { 315 /* divide rate /2 for every elapsed time step, because 316 * the counters in the inbetween steps were 0 */ 317 /* r(t) = 0 + 0/2 + 0/4 + .. + oldrate/2^dt */ 318 b->rate >>= elapsed; 319 /* we know that elapsed >= 2 */ 320 b->rate += (b->counter>>(elapsed-1)); 321 } 322 } 323 324 /** log a message about ratelimits */ 325 static void 326 rrl_msg(query_type* query, const char* str) 327 { 328 uint16_t c, c2, wl = 0; 329 const uint8_t* d = NULL; 330 size_t d_len; 331 uint64_t s; 332 char address[128]; 333 if(verbosity < 1) return; 334 addr2str(&query->addr, address, sizeof(address)); 335 s = rrl_get_source(query, &c2); 336 c = rrl_classify(query, &d, &d_len) | c2; 337 if(query->zone && query->zone->opts && 338 (query->zone->opts->pattern->rrl_whitelist & c)) 339 wl = 1; 340 log_msg(LOG_INFO, "ratelimit %s %s type %s%s target %s query %s %s", 341 str, d?wiredname2str(d):"", rrltype2str(c), 342 wl?"(whitelisted)":"", rrlsource2str(s, c2), 343 address, rrtype_to_string(query->qtype)); 344 } 345 346 /** true if the query used to be blocked by the ratelimit */ 347 static int 348 used_to_block(uint32_t rate, uint32_t counter, uint32_t lm) 349 { 350 return rate >= lm || counter+rate/2 >= lm; 351 } 352 353 /** update the rate in a ratelimit bucket, return actual rate */ 354 uint32_t rrl_update(query_type* query, uint32_t hash, uint64_t source, 355 uint16_t flags, int32_t now, uint32_t lm) 356 { 357 struct rrl_bucket* b = &rrl_array[hash % rrl_array_size]; 358 359 DEBUG(DEBUG_QUERY, 1, (LOG_INFO, "source %llx hash %x oldrate %d oldcount %d stamp %d", 360 (long long unsigned)source, hash, b->rate, b->counter, b->stamp)); 361 362 /* check if different source */ 363 if(b->source != source || b->flags != flags || b->hash != hash) { 364 /* initialise */ 365 /* potentially the wrong limit here, used lower nonwhitelim */ 366 if(verbosity >= 1 && 367 used_to_block(b->rate, b->counter, rrl_ratelimit)) { 368 char address[128]; 369 addr2str(&query->addr, address, sizeof(address)); 370 log_msg(LOG_INFO, "ratelimit unblock ~ type %s target %s query %s %s (%s collision)", 371 rrltype2str(b->flags), 372 rrlsource2str(b->source, b->flags), 373 address, rrtype_to_string(query->qtype), 374 (b->hash!=hash?"bucket":"hash")); 375 } 376 b->hash = hash; 377 b->source = source; 378 b->flags = flags; 379 b->counter = 1; 380 b->rate = 0; 381 b->stamp = now; 382 return 1; 383 } 384 /* this is the same source */ 385 386 /* check if old, zero or smooth it */ 387 /* circular arith for time */ 388 if(now - b->stamp == 1) { 389 /* very busy bucket and time just stepped one step */ 390 int oldblock = used_to_block(b->rate, b->counter, lm); 391 b->rate = b->rate/2 + b->counter; 392 if(oldblock && b->rate < lm) 393 rrl_msg(query, "unblock"); 394 b->counter = 1; 395 b->stamp = now; 396 } else if(now - b->stamp > 0) { 397 /* older bucket */ 398 int olderblock = used_to_block(b->rate, b->counter, lm); 399 rrl_attenuate_bucket(b, now - b->stamp); 400 if(olderblock && b->rate < lm) 401 rrl_msg(query, "unblock"); 402 b->counter = 1; 403 b->stamp = now; 404 } else if(now != b->stamp) { 405 /* robust, timestamp from the future */ 406 if(used_to_block(b->rate, b->counter, lm)) 407 rrl_msg(query, "unblock"); 408 b->rate = 0; 409 b->counter = 1; 410 b->stamp = now; 411 } else { 412 /* bucket is from the current timestep, update counter */ 413 b->counter ++; 414 415 /* log what is blocked for operational debugging */ 416 if(b->counter + b->rate/2 == lm && b->rate < lm) 417 rrl_msg(query, "block"); 418 } 419 420 /* return max from current rate and projected next-value for rate */ 421 /* so that if the rate increases suddenly very high, it is 422 * stopped halfway into the time step */ 423 if(b->counter > b->rate/2) 424 return b->counter + b->rate/2; 425 return b->rate; 426 } 427 428 int rrl_process_query(query_type* query) 429 { 430 uint64_t source; 431 uint32_t hash; 432 /* we can use circular arithmetic here, so int32 works after 2038 */ 433 int32_t now = (int32_t)time(NULL); 434 uint32_t lm = rrl_ratelimit; 435 uint16_t flags; 436 if(rrl_ratelimit == 0 && rrl_whitelist_ratelimit == 0) 437 return 0; 438 439 /* examine query */ 440 examine_query(query, &hash, &source, &flags, &lm); 441 442 if(lm == 0) 443 return 0; /* no limit for this */ 444 445 /* update rate */ 446 return (rrl_update(query, hash, source, flags, now, lm) >= lm); 447 } 448 449 query_state_type rrl_slip(query_type* query) 450 { 451 /* discard number the packets, randomly */ 452 #ifdef HAVE_ARC4RANDOM_UNIFORM 453 if((rrl_slip_ratio > 0) && ((rrl_slip_ratio == 1) || ((arc4random_uniform(rrl_slip_ratio)) == 0))) { 454 #elif HAVE_ARC4RANDOM 455 if((rrl_slip_ratio > 0) && ((rrl_slip_ratio == 1) || ((arc4random() % rrl_slip_ratio) == 0))) { 456 #else 457 if((rrl_slip_ratio > 0) && ((rrl_slip_ratio == 1) || ((random() % rrl_slip_ratio) == 0))) { 458 #endif 459 /* set TC on the rest */ 460 TC_SET(query->packet); 461 ANCOUNT_SET(query->packet, 0); 462 NSCOUNT_SET(query->packet, 0); 463 ARCOUNT_SET(query->packet, 0); 464 if(query->qname) 465 /* header, type, class, qname */ 466 buffer_set_position(query->packet, 467 QHEADERSZ+4+query->qname->name_size); 468 else buffer_set_position(query->packet, QHEADERSZ); 469 return QUERY_PROCESSED; 470 } 471 return QUERY_DISCARDED; 472 } 473 474 #endif /* RATELIMIT */ 475