1 2 /* rrl.c - Response Rate Limiting for NSD. 3 * By W.C.A. Wijngaards 4 * Copyright 2012, NLnet Labs. 5 * BSD, see LICENSE. 6 */ 7 #include "config.h" 8 #include <errno.h> 9 #include "rrl.h" 10 #include "util.h" 11 #include "lookup3.h" 12 #include "options.h" 13 14 #ifdef RATELIMIT 15 16 #ifdef HAVE_MMAP 17 #include <sys/mman.h> 18 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS) 19 #define MAP_ANONYMOUS MAP_ANON 20 #endif 21 #endif /* HAVE_MMAP */ 22 23 24 /** 25 * The rate limiting data structure bucket, this represents one rate of 26 * packets from a single source. 27 * Smoothed average rates. 28 */ 29 struct rrl_bucket { 30 /* the source netmask */ 31 uint64_t source; 32 /* rate, in queries per second, which due to rate=r(t)+r(t-1)/2 is 33 * equal to double the queries per second */ 34 uint32_t rate; 35 /* the full hash */ 36 uint32_t hash; 37 /* counter for queries arrived in this second */ 38 uint32_t counter; 39 /* timestamp, which time is the time of the counter, the rate is from 40 * one timestep before that. */ 41 int32_t stamp; 42 /* flags for the source mask and type */ 43 uint16_t flags; 44 }; 45 46 /* the (global) array of RRL buckets */ 47 static struct rrl_bucket* rrl_array = NULL; 48 static size_t rrl_array_size = RRL_BUCKETS; 49 static uint32_t rrl_ratelimit = RRL_LIMIT; /* 2x qps */ 50 static uint8_t rrl_slip_ratio = RRL_SLIP; 51 static uint8_t rrl_ipv4_prefixlen = RRL_IPV4_PREFIX_LENGTH; 52 static uint8_t rrl_ipv6_prefixlen = RRL_IPV6_PREFIX_LENGTH; 53 static uint64_t rrl_ipv6_mask; /* max prefixlen 64 */ 54 static uint32_t rrl_whitelist_ratelimit = RRL_WLIST_LIMIT; /* 2x qps */ 55 56 /* the array of mmaps for the children (saved between reloads) */ 57 static void** rrl_maps = NULL; 58 static size_t rrl_maps_num = 0; 59 60 void rrl_mmap_init(int numch, size_t numbuck, size_t lm, size_t wlm, size_t sm, 61 size_t plf, size_t pls) 62 { 63 #ifdef HAVE_MMAP 64 size_t i; 65 #endif 66 if(numbuck != 0) 67 rrl_array_size = numbuck; 68 rrl_ratelimit = lm*2; 69 rrl_slip_ratio = sm; 70 rrl_ipv4_prefixlen = plf; 71 rrl_ipv6_prefixlen = pls; 72 if (pls <= 32) { 73 rrl_ipv6_mask = ((uint64_t) htonl(0xffffffff << (32-pls))) << 32; 74 } else { 75 rrl_ipv6_mask = ((uint64_t) htonl(0xffffffff << (64-pls))) | 76 (((uint64_t)0xffffffff)<<32); 77 } 78 rrl_whitelist_ratelimit = wlm*2; 79 #ifdef HAVE_MMAP 80 /* allocate the ratelimit hashtable in a memory map so it is 81 * preserved across reforks (every child its own table) */ 82 rrl_maps_num = (size_t)numch; 83 rrl_maps = (void**)xalloc(sizeof(void*)*rrl_maps_num); 84 for(i=0; i<rrl_maps_num; i++) { 85 rrl_maps[i] = mmap(NULL, 86 sizeof(struct rrl_bucket)*rrl_array_size, 87 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0); 88 if(rrl_maps[i] == MAP_FAILED) { 89 log_msg(LOG_ERR, "rrl: mmap failed: %s", 90 strerror(errno)); 91 exit(1); 92 } 93 memset(rrl_maps[i], 0, 94 sizeof(struct rrl_bucket)*rrl_array_size); 95 } 96 #else 97 (void)numch; 98 rrl_maps_num = 0; 99 rrl_maps = NULL; 100 #endif 101 } 102 103 void rrl_set_limit(size_t lm, size_t wlm, size_t sm) 104 { 105 rrl_ratelimit = lm*2; 106 rrl_whitelist_ratelimit = wlm*2; 107 rrl_slip_ratio = sm; 108 } 109 110 void rrl_init(size_t ch) 111 { 112 if(!rrl_maps || ch >= rrl_maps_num) 113 rrl_array = xalloc_zero(sizeof(struct rrl_bucket)*rrl_array_size); 114 #ifdef HAVE_MMAP 115 else rrl_array = (struct rrl_bucket*)rrl_maps[ch]; 116 #endif 117 } 118 119 /** return the source netblock of the query, this is the genuine source 120 * for genuine queries and the target for reflected packets */ 121 static uint64_t rrl_get_source(query_type* query, uint16_t* c2) 122 { 123 /* note there is an IPv6 subnet, that maps 124 * to the same buckets as IPv4 space, but there is a flag in c2 125 * that makes the hash different */ 126 #ifdef INET6 127 if( ((struct sockaddr_in*)&query->addr)->sin_family == AF_INET) { 128 *c2 = 0; 129 return ((struct sockaddr_in*)&query->addr)-> 130 sin_addr.s_addr & htonl(0xffffffff << (32-rrl_ipv4_prefixlen)); 131 } else { 132 uint64_t s; 133 *c2 = rrl_ip6; 134 memmove(&s, &((struct sockaddr_in6*)&query->addr)->sin6_addr, 135 sizeof(s)); 136 return s & rrl_ipv6_mask; 137 } 138 #else 139 *c2 = 0; 140 return query->addr.sin_addr.s_addr & htonl(0xffffffff << (32-rrl_ipv4_prefixlen)); 141 #endif 142 } 143 144 /** debug source to string */ 145 static const char* rrlsource2str(uint64_t s, uint16_t c2) 146 { 147 static char buf[64]; 148 struct in_addr a4; 149 #ifdef INET6 150 if(c2) { 151 /* IPv6 */ 152 struct in6_addr a6; 153 memset(&a6, 0, sizeof(a6)); 154 memmove(&a6, &s, sizeof(s)); 155 if(!inet_ntop(AF_INET6, &a6, buf, sizeof(buf))) 156 strlcpy(buf, "[ip6 ntop failed]", sizeof(buf)); 157 else { 158 static char prefix[4]; 159 snprintf(prefix, sizeof(prefix), "/%d", rrl_ipv6_prefixlen); 160 strlcat(buf, &prefix[0], sizeof(buf)); 161 } 162 return buf; 163 } 164 #endif 165 /* ipv4 */ 166 a4.s_addr = (uint32_t)s; 167 if(!inet_ntop(AF_INET, &a4, buf, sizeof(buf))) 168 strlcpy(buf, "[ip4 ntop failed]", sizeof(buf)); 169 else { 170 static char prefix[4]; 171 snprintf(prefix, sizeof(prefix), "/%d", rrl_ipv4_prefixlen); 172 strlcat(buf, &prefix[0], sizeof(buf)); 173 } 174 return buf; 175 } 176 177 enum rrl_type rrlstr2type(const char* s) 178 { 179 if(strcmp(s, "nxdomain")==0) return rrl_type_nxdomain; 180 else if(strcmp(s, "error")==0) return rrl_type_error; 181 else if(strcmp(s, "referral")==0) return rrl_type_referral; 182 else if(strcmp(s, "any")==0) return rrl_type_any; 183 else if(strcmp(s, "wildcard")==0) return rrl_type_wildcard; 184 else if(strcmp(s, "nodata")==0) return rrl_type_nodata; 185 else if(strcmp(s, "dnskey")==0) return rrl_type_dnskey; 186 else if(strcmp(s, "positive")==0) return rrl_type_positive; 187 else if(strcmp(s, "rrsig")==0) return rrl_type_rrsig; 188 else if(strcmp(s, "all")==0) return rrl_type_all; 189 return 0; /* unknown */ 190 } 191 192 const char* rrltype2str(enum rrl_type c) 193 { 194 switch(c & 0x0fff) { 195 case rrl_type_nxdomain: return "nxdomain"; 196 case rrl_type_error: return "error"; 197 case rrl_type_referral: return "referral"; 198 case rrl_type_any: return "any"; 199 case rrl_type_wildcard: return "wildcard"; 200 case rrl_type_nodata: return "nodata"; 201 case rrl_type_dnskey: return "dnskey"; 202 case rrl_type_positive: return "positive"; 203 case rrl_type_rrsig: return "rrsig"; 204 case rrl_type_all: return "all"; 205 } 206 return "unknown"; 207 } 208 209 /** classify the query in a number of different types, each has separate 210 * ratelimiting, so that positive queries are not impeded by others */ 211 static uint16_t rrl_classify(query_type* query, const uint8_t** d, 212 size_t* d_len) 213 { 214 if(RCODE(query->packet) == RCODE_NXDOMAIN) { 215 if(query->zone && query->zone->apex) { 216 *d = dname_name(domain_dname(query->zone->apex)); 217 *d_len = domain_dname(query->zone->apex)->name_size; 218 } 219 return rrl_type_nxdomain; 220 } 221 if(RCODE(query->packet) != RCODE_OK) { 222 if(query->zone && query->zone->apex) { 223 *d = dname_name(domain_dname(query->zone->apex)); 224 *d_len = domain_dname(query->zone->apex)->name_size; 225 } 226 return rrl_type_error; 227 } 228 if(query->delegation_domain) { 229 *d = dname_name(domain_dname(query->delegation_domain)); 230 *d_len = domain_dname(query->delegation_domain)->name_size; 231 return rrl_type_referral; 232 } 233 if(query->qtype == TYPE_ANY) { 234 if(query->qname) { 235 *d = dname_name(query->qname); 236 *d_len = query->qname->name_size; 237 } 238 return rrl_type_any; 239 } 240 if(query->qtype == TYPE_RRSIG) { 241 if(query->qname) { 242 *d = dname_name(query->qname); 243 *d_len = query->qname->name_size; 244 } 245 return rrl_type_rrsig; 246 } 247 if(query->wildcard_domain) { 248 *d = dname_name(domain_dname(query->wildcard_domain)); 249 *d_len = domain_dname(query->wildcard_domain)->name_size; 250 return rrl_type_wildcard; 251 } 252 if(ANCOUNT(query->packet) == 0) { 253 if(query->zone && query->zone->apex) { 254 *d = dname_name(domain_dname(query->zone->apex)); 255 *d_len = domain_dname(query->zone->apex)->name_size; 256 } 257 return rrl_type_nodata; 258 } 259 if(query->qtype == TYPE_DNSKEY) { 260 if(query->qname) { 261 *d = dname_name(query->qname); 262 *d_len = query->qname->name_size; 263 } 264 return rrl_type_dnskey; 265 } 266 /* positive */ 267 if(query->qname) { 268 *d = dname_name(query->qname); 269 *d_len = query->qname->name_size; 270 } 271 return rrl_type_positive; 272 } 273 274 /** Examine the query and return hash and source of netblock. */ 275 static void examine_query(query_type* query, uint32_t* hash, uint64_t* source, 276 uint16_t* flags, uint32_t* lm) 277 { 278 /* compile a binary string representing the query */ 279 uint16_t c, c2; 280 /* size with 16 bytes to spare */ 281 uint8_t buf[MAXDOMAINLEN + sizeof(*source) + sizeof(c) + 16]; 282 const uint8_t* dname = NULL; size_t dname_len = 0; 283 uint32_t r = 0x267fcd16; 284 285 *source = rrl_get_source(query, &c2); 286 c = rrl_classify(query, &dname, &dname_len); 287 if(query->zone && query->zone->opts && 288 (query->zone->opts->pattern->rrl_whitelist & c)) 289 *lm = rrl_whitelist_ratelimit; 290 if(*lm == 0) return; 291 c |= c2; 292 *flags = c; 293 memmove(buf, source, sizeof(*source)); 294 memmove(buf+sizeof(*source), &c, sizeof(c)); 295 296 DEBUG(DEBUG_QUERY, 1, (LOG_INFO, "rrl_examine type %s name %s", rrltype2str(c), dname?wiredname2str(dname):"NULL")); 297 298 /* and hash it */ 299 if(dname && dname_len <= MAXDOMAINLEN) { 300 memmove(buf+sizeof(*source)+sizeof(c), dname, dname_len); 301 *hash = hashlittle(buf, sizeof(*source)+sizeof(c)+dname_len, r); 302 } else 303 *hash = hashlittle(buf, sizeof(*source)+sizeof(c), r); 304 } 305 306 /* age the bucket because elapsed time steps have gone by */ 307 static void rrl_attenuate_bucket(struct rrl_bucket* b, int32_t elapsed) 308 { 309 if(elapsed > 16) { 310 b->rate = 0; 311 } else { 312 /* divide rate /2 for every elapsed time step, because 313 * the counters in the inbetween steps were 0 */ 314 /* r(t) = 0 + 0/2 + 0/4 + .. + oldrate/2^dt */ 315 b->rate >>= elapsed; 316 /* we know that elapsed >= 2 */ 317 b->rate += (b->counter>>(elapsed-1)); 318 } 319 } 320 321 /** log a message about ratelimits */ 322 static void 323 rrl_msg(query_type* query, const char* str) 324 { 325 uint16_t c, c2, wl = 0; 326 const uint8_t* d = NULL; 327 size_t d_len; 328 uint64_t s; 329 char address[128]; 330 if(verbosity < 2) return; 331 addr2str(&query->addr, address, sizeof(address)); 332 s = rrl_get_source(query, &c2); 333 c = rrl_classify(query, &d, &d_len) | c2; 334 if(query->zone && query->zone->opts && 335 (query->zone->opts->pattern->rrl_whitelist & c)) 336 wl = 1; 337 log_msg(LOG_INFO, "ratelimit %s %s type %s%s target %s query %s %s", 338 str, d?wiredname2str(d):"", rrltype2str(c), 339 wl?"(whitelisted)":"", rrlsource2str(s, c2), 340 address, rrtype_to_string(query->qtype)); 341 } 342 343 /** true if the query used to be blocked by the ratelimit */ 344 static int 345 used_to_block(uint32_t rate, uint32_t counter, uint32_t lm) 346 { 347 return rate >= lm || counter+rate/2 >= lm; 348 } 349 350 /** update the rate in a ratelimit bucket, return actual rate */ 351 uint32_t rrl_update(query_type* query, uint32_t hash, uint64_t source, 352 uint16_t flags, int32_t now, uint32_t lm) 353 { 354 struct rrl_bucket* b = &rrl_array[hash % rrl_array_size]; 355 356 DEBUG(DEBUG_QUERY, 1, (LOG_INFO, "source %llx hash %x oldrate %d oldcount %d stamp %d", 357 (long long unsigned)source, hash, b->rate, b->counter, b->stamp)); 358 359 /* check if different source */ 360 if(b->source != source || b->flags != flags || b->hash != hash) { 361 /* initialise */ 362 /* potentially the wrong limit here, used lower nonwhitelim */ 363 if(verbosity >=2 && 364 used_to_block(b->rate, b->counter, rrl_ratelimit)) { 365 char address[128]; 366 addr2str(&query->addr, address, sizeof(address)); 367 log_msg(LOG_INFO, "ratelimit unblock ~ type %s target %s query %s %s (%s collision)", 368 rrltype2str(b->flags), 369 rrlsource2str(b->source, b->flags), 370 address, rrtype_to_string(query->qtype), 371 (b->hash!=hash?"bucket":"hash")); 372 } 373 b->hash = hash; 374 b->source = source; 375 b->flags = flags; 376 b->counter = 1; 377 b->rate = 0; 378 b->stamp = now; 379 return 1; 380 } 381 /* this is the same source */ 382 383 /* check if old, zero or smooth it */ 384 /* circular arith for time */ 385 if(now - b->stamp == 1) { 386 /* very busy bucket and time just stepped one step */ 387 int oldblock = used_to_block(b->rate, b->counter, lm); 388 b->rate = b->rate/2 + b->counter; 389 if(oldblock && b->rate < lm) 390 rrl_msg(query, "unblock"); 391 b->counter = 1; 392 b->stamp = now; 393 } else if(now - b->stamp > 0) { 394 /* older bucket */ 395 int olderblock = used_to_block(b->rate, b->counter, lm); 396 rrl_attenuate_bucket(b, now - b->stamp); 397 if(olderblock && b->rate < lm) 398 rrl_msg(query, "unblock"); 399 b->counter = 1; 400 b->stamp = now; 401 } else if(now != b->stamp) { 402 /* robust, timestamp from the future */ 403 if(used_to_block(b->rate, b->counter, lm)) 404 rrl_msg(query, "unblock"); 405 b->rate = 0; 406 b->counter = 1; 407 b->stamp = now; 408 } else { 409 /* bucket is from the current timestep, update counter */ 410 b->counter ++; 411 412 /* log what is blocked for operational debugging */ 413 if(b->counter + b->rate/2 == lm && b->rate < lm) 414 rrl_msg(query, "block"); 415 } 416 417 /* return max from current rate and projected next-value for rate */ 418 /* so that if the rate increases suddenly very high, it is 419 * stopped halfway into the time step */ 420 if(b->counter > b->rate/2) 421 return b->counter + b->rate/2; 422 return b->rate; 423 } 424 425 int rrl_process_query(query_type* query) 426 { 427 uint64_t source; 428 uint32_t hash; 429 /* we can use circular arithmatic here, so int32 works after 2038 */ 430 int32_t now = (int32_t)time(NULL); 431 uint32_t lm = rrl_ratelimit; 432 uint16_t flags; 433 if(rrl_ratelimit == 0 && rrl_whitelist_ratelimit == 0) 434 return 0; 435 436 /* examine query */ 437 examine_query(query, &hash, &source, &flags, &lm); 438 439 if(lm == 0) 440 return 0; /* no limit for this */ 441 442 /* update rate */ 443 return (rrl_update(query, hash, source, flags, now, lm) >= lm); 444 } 445 446 query_state_type rrl_slip(query_type* query) 447 { 448 /* discard number the packets, randomly */ 449 #ifdef HAVE_ARC4RANDOM 450 if((rrl_slip_ratio > 0) && ((rrl_slip_ratio == 1) || ((arc4random() % rrl_slip_ratio) == 0))) { 451 #else 452 if((rrl_slip_ratio > 0) && ((rrl_slip_ratio == 1) || ((random() % rrl_slip_ratio) == 0))) { 453 #endif 454 /* set TC on the rest */ 455 TC_SET(query->packet); 456 ANCOUNT_SET(query->packet, 0); 457 NSCOUNT_SET(query->packet, 0); 458 ARCOUNT_SET(query->packet, 0); 459 if(query->qname) 460 /* header, type, class, qname */ 461 buffer_set_position(query->packet, 462 QHEADERSZ+4+query->qname->name_size); 463 else buffer_set_position(query->packet, QHEADERSZ); 464 return QUERY_PROCESSED; 465 } 466 return QUERY_DISCARDED; 467 } 468 469 #endif /* RATELIMIT */ 470