1 /* $NetBSD: ntp_monitor.c,v 1.1.1.3 2013/12/27 23:30:56 christos Exp $ */ 2 3 /* 4 * ntp_monitor - monitor ntpd statistics 5 */ 6 #ifdef HAVE_CONFIG_H 7 # include <config.h> 8 #endif 9 10 #include "ntpd.h" 11 #include "ntp_io.h" 12 #include "ntp_if.h" 13 #include "ntp_lists.h" 14 #include "ntp_stdlib.h" 15 #include <ntp_random.h> 16 17 #include <stdio.h> 18 #include <signal.h> 19 #ifdef HAVE_SYS_IOCTL_H 20 # include <sys/ioctl.h> 21 #endif 22 23 /* 24 * Record statistics based on source address, mode and version. The 25 * receive procedure calls us with the incoming rbufp before it does 26 * anything else. While at it, implement rate controls for inbound 27 * traffic. 28 * 29 * Each entry is doubly linked into two lists, a hash table and a most- 30 * recently-used (MRU) list. When a packet arrives it is looked up in 31 * the hash table. If found, the statistics are updated and the entry 32 * relinked at the head of the MRU list. If not found, a new entry is 33 * allocated, initialized and linked into both the hash table and at the 34 * head of the MRU list. 35 * 36 * Memory is usually allocated by grabbing a big chunk of new memory and 37 * cutting it up into littler pieces. The exception to this when we hit 38 * the memory limit. Then we free memory by grabbing entries off the 39 * tail for the MRU list, unlinking from the hash table, and 40 * reinitializing. 41 * 42 * INC_MONLIST is the default allocation granularity in entries. 43 * INIT_MONLIST is the default initial allocation in entries. 44 */ 45 #ifdef MONMEMINC /* old name */ 46 # define INC_MONLIST MONMEMINC 47 #elif !defined(INC_MONLIST) 48 # define INC_MONLIST (4 * 1024 / sizeof(mon_entry)) 49 #endif 50 #ifndef INIT_MONLIST 51 # define INIT_MONLIST (4 * 1024 / sizeof(mon_entry)) 52 #endif 53 #ifndef MRU_MAXDEPTH_DEF 54 # define MRU_MAXDEPTH_DEF (1024 * 1024 / sizeof(mon_entry)) 55 #endif 56 57 /* 58 * Hashing stuff 59 */ 60 u_char mon_hash_bits; 61 62 /* 63 * Pointers to the hash table and the MRU list. Memory for the hash 64 * table is allocated only if monitoring is enabled. 65 */ 66 mon_entry ** mon_hash; /* MRU hash table */ 67 mon_entry mon_mru_list; /* mru listhead */ 68 69 /* 70 * List of free structures structures, and counters of in-use and total 71 * structures. The free structures are linked with the hash_next field. 72 */ 73 static mon_entry *mon_free; /* free list or null if none */ 74 u_int mru_alloc; /* mru list + free list count */ 75 u_int mru_entries; /* mru list count */ 76 u_int mru_peakentries; /* highest mru_entries seen */ 77 u_int mru_initalloc = INIT_MONLIST;/* entries to preallocate */ 78 u_int mru_incalloc = INC_MONLIST;/* allocation batch factor */ 79 static u_int mon_mem_increments; /* times called malloc() */ 80 81 /* 82 * Parameters of the RES_LIMITED restriction option. We define headway 83 * as the idle time between packets. A packet is discarded if the 84 * headway is less than the minimum, as well as if the average headway 85 * is less than eight times the increment. 86 */ 87 int ntp_minpkt = NTP_MINPKT; /* minimum (log 2 s) */ 88 u_char ntp_minpoll = NTP_MINPOLL; /* increment (log 2 s) */ 89 90 /* 91 * Initialization state. We may be monitoring, we may not. If 92 * we aren't, we may not even have allocated any memory yet. 93 */ 94 u_int mon_enabled; /* enable switch */ 95 u_int mru_mindepth = 600; /* preempt above this */ 96 int mru_maxage = 64; /* for entries older than */ 97 u_int mru_maxdepth = /* MRU count hard limit */ 98 MRU_MAXDEPTH_DEF; 99 int mon_age = 3000; /* preemption limit */ 100 101 static void mon_getmoremem(void); 102 static void remove_from_hash(mon_entry *); 103 static inline void mon_free_entry(mon_entry *); 104 static inline void mon_reclaim_entry(mon_entry *); 105 106 107 /* 108 * init_mon - initialize monitoring global data 109 */ 110 void 111 init_mon(void) 112 { 113 /* 114 * Don't do much of anything here. We don't allocate memory 115 * until mon_start(). 116 */ 117 mon_enabled = MON_OFF; 118 INIT_DLIST(mon_mru_list, mru); 119 } 120 121 122 /* 123 * remove_from_hash - removes an entry from the address hash table and 124 * decrements mru_entries. 125 */ 126 static void 127 remove_from_hash( 128 mon_entry *mon 129 ) 130 { 131 u_int hash; 132 mon_entry *punlinked; 133 134 mru_entries--; 135 hash = MON_HASH(&mon->rmtadr); 136 UNLINK_SLIST(punlinked, mon_hash[hash], mon, hash_next, 137 mon_entry); 138 NTP_ENSURE(punlinked == mon); 139 } 140 141 142 static inline void 143 mon_free_entry( 144 mon_entry *m 145 ) 146 { 147 ZERO(*m); 148 LINK_SLIST(mon_free, m, hash_next); 149 } 150 151 152 /* 153 * mon_reclaim_entry - Remove an entry from the MRU list and from the 154 * hash array, then zero-initialize it. Indirectly 155 * decrements mru_entries. 156 157 * The entry is prepared to be reused. Before return, in 158 * remove_from_hash(), mru_entries is decremented. It is the caller's 159 * responsibility to increment it again. 160 */ 161 static inline void 162 mon_reclaim_entry( 163 mon_entry *m 164 ) 165 { 166 DEBUG_INSIST(NULL != m); 167 168 UNLINK_DLIST(m, mru); 169 remove_from_hash(m); 170 ZERO(*m); 171 } 172 173 174 /* 175 * mon_getmoremem - get more memory and put it on the free list 176 */ 177 static void 178 mon_getmoremem(void) 179 { 180 mon_entry *chunk; 181 mon_entry *mon; 182 u_int entries; 183 184 entries = (0 == mon_mem_increments) 185 ? mru_initalloc 186 : mru_incalloc; 187 188 chunk = emalloc(entries * sizeof(*chunk)); 189 for (mon = chunk + entries - 1; mon >= chunk; mon--) 190 mon_free_entry(mon); 191 192 mru_alloc += entries; 193 mon_mem_increments++; 194 } 195 196 197 /* 198 * mon_start - start up the monitoring software 199 */ 200 void 201 mon_start( 202 int mode 203 ) 204 { 205 size_t octets; 206 u_int min_hash_slots; 207 208 if (MON_OFF == mode) /* MON_OFF is 0 */ 209 return; 210 if (mon_enabled) { 211 mon_enabled |= mode; 212 return; 213 } 214 if (0 == mon_mem_increments) 215 mon_getmoremem(); 216 /* 217 * Select the MRU hash table size to limit the average count 218 * per bucket at capacity (mru_maxdepth) to 8, if possible 219 * given our hash is limited to 16 bits. 220 */ 221 min_hash_slots = (mru_maxdepth / 8) + 1; 222 mon_hash_bits = 0; 223 while (min_hash_slots >>= 1) 224 mon_hash_bits++; 225 mon_hash_bits = max(4, mon_hash_bits); 226 mon_hash_bits = min(16, mon_hash_bits); 227 octets = sizeof(*mon_hash) * MON_HASH_SIZE; 228 mon_hash = erealloc_zero(mon_hash, octets, 0); 229 230 mon_enabled = mode; 231 } 232 233 234 /* 235 * mon_stop - stop the monitoring software 236 */ 237 void 238 mon_stop( 239 int mode 240 ) 241 { 242 mon_entry *mon; 243 244 if (MON_OFF == mon_enabled) 245 return; 246 if ((mon_enabled & mode) == 0 || mode == MON_OFF) 247 return; 248 249 mon_enabled &= ~mode; 250 if (mon_enabled != MON_OFF) 251 return; 252 253 /* 254 * Move everything on the MRU list to the free list quickly, 255 * without bothering to remove each from either the MRU list or 256 * the hash table. 257 */ 258 ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry) 259 mon_free_entry(mon); 260 ITER_DLIST_END() 261 262 /* empty the MRU list and hash table. */ 263 mru_entries = 0; 264 INIT_DLIST(mon_mru_list, mru); 265 zero_mem(mon_hash, sizeof(*mon_hash) * MON_HASH_SIZE); 266 } 267 268 269 /* 270 * mon_clearinterface -- remove mru entries referring to a local address 271 * which is going away. 272 */ 273 void 274 mon_clearinterface( 275 endpt *lcladr 276 ) 277 { 278 mon_entry *mon; 279 280 /* iterate mon over mon_mru_list */ 281 ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry) 282 if (mon->lcladr == lcladr) { 283 /* remove from mru list */ 284 UNLINK_DLIST(mon, mru); 285 /* remove from hash list, adjust mru_entries */ 286 remove_from_hash(mon); 287 /* put on free list */ 288 mon_free_entry(mon); 289 } 290 ITER_DLIST_END() 291 } 292 293 294 /* 295 * ntp_monitor - record stats about this packet 296 * 297 * Returns supplied restriction flags, with RES_LIMITED and RES_KOD 298 * cleared unless the packet should not be responded to normally 299 * (RES_LIMITED) and possibly should trigger a KoD response (RES_KOD). 300 * The returned flags are saved in the MRU entry, so that it reflects 301 * whether the last packet from that source triggered rate limiting, 302 * and if so, possible KoD response. This implies you can not tell 303 * whether a given address is eligible for rate limiting/KoD from the 304 * monlist restrict bits, only whether or not the last packet triggered 305 * such responses. ntpdc -c reslist lets you see whether RES_LIMITED 306 * or RES_KOD is lit for a particular address before ntp_monitor()'s 307 * typical dousing. 308 */ 309 u_short 310 ntp_monitor( 311 struct recvbuf *rbufp, 312 u_short flags 313 ) 314 { 315 l_fp interval_fp; 316 struct pkt * pkt; 317 mon_entry * mon; 318 mon_entry * oldest; 319 int oldest_age; 320 u_int hash; 321 u_short restrict_mask; 322 u_char mode; 323 u_char version; 324 int interval; 325 int head; /* headway increment */ 326 int leak; /* new headway */ 327 int limit; /* average threshold */ 328 329 if (mon_enabled == MON_OFF) 330 return ~(RES_LIMITED | RES_KOD) & flags; 331 332 pkt = &rbufp->recv_pkt; 333 hash = MON_HASH(&rbufp->recv_srcadr); 334 mode = PKT_MODE(pkt->li_vn_mode); 335 version = PKT_VERSION(pkt->li_vn_mode); 336 mon = mon_hash[hash]; 337 338 /* 339 * We keep track of all traffic for a given IP in one entry, 340 * otherwise cron'ed ntpdate or similar evades RES_LIMITED. 341 */ 342 343 for (; mon != NULL; mon = mon->hash_next) 344 if (SOCK_EQ(&mon->rmtadr, &rbufp->recv_srcadr)) 345 break; 346 347 if (mon != NULL) { 348 interval_fp = rbufp->recv_time; 349 L_SUB(&interval_fp, &mon->last); 350 /* add one-half second to round up */ 351 L_ADDUF(&interval_fp, 0x80000000); 352 interval = interval_fp.l_i; 353 mon->last = rbufp->recv_time; 354 NSRCPORT(&mon->rmtadr) = NSRCPORT(&rbufp->recv_srcadr); 355 mon->count++; 356 restrict_mask = flags; 357 mon->vn_mode = VN_MODE(version, mode); 358 359 /* Shuffle to the head of the MRU list. */ 360 UNLINK_DLIST(mon, mru); 361 LINK_DLIST(mon_mru_list, mon, mru); 362 363 /* 364 * At this point the most recent arrival is first in the 365 * MRU list. Decrease the counter by the headway, but 366 * not less than zero. 367 */ 368 mon->leak -= interval; 369 mon->leak = max(0, mon->leak); 370 head = 1 << ntp_minpoll; 371 leak = mon->leak + head; 372 limit = NTP_SHIFT * head; 373 374 DPRINTF(2, ("MRU: interval %d headway %d limit %d\n", 375 interval, leak, limit)); 376 377 /* 378 * If the minimum and average thresholds are not 379 * exceeded, douse the RES_LIMITED and RES_KOD bits and 380 * increase the counter by the headway increment. Note 381 * that we give a 1-s grace for the minimum threshold 382 * and a 2-s grace for the headway increment. If one or 383 * both thresholds are exceeded and the old counter is 384 * less than the average threshold, set the counter to 385 * the average threshold plus the increment and leave 386 * the RES_LIMITED and RES_KOD bits lit. Otherwise, 387 * leave the counter alone and douse the RES_KOD bit. 388 * This rate-limits the KoDs to no less than the average 389 * headway. 390 */ 391 if (interval + 1 >= ntp_minpkt && leak < limit) { 392 mon->leak = leak - 2; 393 restrict_mask &= ~(RES_LIMITED | RES_KOD); 394 } else if (mon->leak < limit) 395 mon->leak = limit + head; 396 else 397 restrict_mask &= ~RES_KOD; 398 399 mon->flags = restrict_mask; 400 401 return mon->flags; 402 } 403 404 /* 405 * If we got here, this is the first we've heard of this 406 * guy. Get him some memory, either from the free list 407 * or from the tail of the MRU list. 408 * 409 * The following ntp.conf "mru" knobs come into play determining 410 * the depth (or count) of the MRU list: 411 * - mru_mindepth ("mru mindepth") is a floor beneath which 412 * entries are kept without regard to their age. The 413 * default is 600 which matches the longtime implementation 414 * limit on the total number of entries. 415 * - mru_maxage ("mru maxage") is a ceiling on the age in 416 * seconds of entries. Entries older than this are 417 * reclaimed once mon_mindepth is exceeded. 64s default. 418 * Note that entries older than this can easily survive 419 * as they are reclaimed only as needed. 420 * - mru_maxdepth ("mru maxdepth") is a hard limit on the 421 * number of entries. 422 * - "mru maxmem" sets mru_maxdepth to the number of entries 423 * which fit in the given number of kilobytes. The default is 424 * 1024, or 1 megabyte. 425 * - mru_initalloc ("mru initalloc" sets the count of the 426 * initial allocation of MRU entries. 427 * - "mru initmem" sets mru_initalloc in units of kilobytes. 428 * The default is 4. 429 * - mru_incalloc ("mru incalloc" sets the number of entries to 430 * allocate on-demand each time the free list is empty. 431 * - "mru incmem" sets mru_incalloc in units of kilobytes. 432 * The default is 4. 433 * Whichever of "mru maxmem" or "mru maxdepth" occurs last in 434 * ntp.conf controls. Similarly for "mru initalloc" and "mru 435 * initmem", and for "mru incalloc" and "mru incmem". 436 */ 437 if (mru_entries < mru_mindepth) { 438 if (NULL == mon_free) 439 mon_getmoremem(); 440 UNLINK_HEAD_SLIST(mon, mon_free, hash_next); 441 } else { 442 oldest = TAIL_DLIST(mon_mru_list, mru); 443 oldest_age = 0; /* silence uninit warning */ 444 if (oldest != NULL) { 445 interval_fp = rbufp->recv_time; 446 L_SUB(&interval_fp, &oldest->last); 447 /* add one-half second to round up */ 448 L_ADDUF(&interval_fp, 0x80000000); 449 oldest_age = interval_fp.l_i; 450 } 451 /* note -1 is legal for mru_maxage (disables) */ 452 if (oldest != NULL && mru_maxage < oldest_age) { 453 mon_reclaim_entry(oldest); 454 mon = oldest; 455 } else if (mon_free != NULL || mru_alloc < 456 mru_maxdepth) { 457 if (NULL == mon_free) 458 mon_getmoremem(); 459 UNLINK_HEAD_SLIST(mon, mon_free, hash_next); 460 /* Preempt from the MRU list if old enough. */ 461 } else if (ntp_random() / (2. * FRAC) > 462 (double)oldest_age / mon_age) { 463 return ~(RES_LIMITED | RES_KOD) & flags; 464 } else { 465 mon_reclaim_entry(oldest); 466 mon = oldest; 467 } 468 } 469 470 /* 471 * Got one, initialize it 472 */ 473 mru_entries++; 474 mru_peakentries = max(mru_peakentries, mru_entries); 475 mon->last = rbufp->recv_time; 476 mon->first = mon->last; 477 mon->count = 1; 478 mon->flags = ~(RES_LIMITED | RES_KOD) & flags; 479 mon->leak = 0; 480 memcpy(&mon->rmtadr, &rbufp->recv_srcadr, sizeof(mon->rmtadr)); 481 mon->vn_mode = VN_MODE(version, mode); 482 mon->lcladr = rbufp->dstadr; 483 mon->cast_flags = (u_char)(((rbufp->dstadr->flags & 484 INT_MCASTOPEN) && rbufp->fd == mon->lcladr->fd) ? MDF_MCAST 485 : rbufp->fd == mon->lcladr->bfd ? MDF_BCAST : MDF_UCAST); 486 487 /* 488 * Drop him into front of the hash table. Also put him on top of 489 * the MRU list. 490 */ 491 LINK_SLIST(mon_hash[hash], mon, hash_next); 492 LINK_DLIST(mon_mru_list, mon, mru); 493 494 return mon->flags; 495 } 496 497 498