1 /* $NetBSD: ntp_monitor.c,v 1.7 2024/08/18 20:47:17 christos Exp $ */ 2 3 /* 4 * ntp_monitor - monitor ntpd statistics 5 */ 6 #ifdef HAVE_CONFIG_H 7 # include <config.h> 8 #endif 9 10 #include "ntpd.h" 11 #include "ntp_io.h" 12 #include "ntp_if.h" 13 #include "ntp_lists.h" 14 #include "ntp_stdlib.h" 15 #include <ntp_random.h> 16 17 #include <stdio.h> 18 #include <signal.h> 19 #ifdef HAVE_SYS_IOCTL_H 20 # include <sys/ioctl.h> 21 #endif 22 23 /* 24 * Record statistics based on source address, mode and version. The 25 * receive procedure calls us with the incoming rbufp before it does 26 * anything else. While at it, implement rate controls for inbound 27 * traffic. 28 * 29 * Each entry is doubly linked into two lists, a hash table and a most- 30 * recently-used (MRU) list. When a packet arrives it is looked up in 31 * the hash table. If found, the statistics are updated and the entry 32 * relinked at the head of the MRU list. If not found, a new entry is 33 * allocated, initialized and linked into both the hash table and at the 34 * head of the MRU list. 35 * 36 * Memory is usually allocated by grabbing a big chunk of new memory and 37 * cutting it up into littler pieces. The exception to this when we hit 38 * the memory limit. Then we free memory by grabbing entries off the 39 * tail for the MRU list, unlinking from the hash table, and 40 * reinitializing. 41 * 42 * INC_MONLIST is the default allocation granularity in entries. 43 * INIT_MONLIST is the default initial allocation in entries. 44 */ 45 #ifdef MONMEMINC /* old name */ 46 # define INC_MONLIST MONMEMINC 47 #elif !defined(INC_MONLIST) 48 # define INC_MONLIST (4 * 1024 / sizeof(mon_entry)) 49 #endif 50 #ifndef INIT_MONLIST 51 # define INIT_MONLIST (4 * 1024 / sizeof(mon_entry)) 52 #endif 53 #ifndef MRU_MAXDEPTH_DEF 54 # define MRU_MAXDEPTH_DEF (1024 * 1024 / sizeof(mon_entry)) 55 #endif 56 57 /* 58 * Hashing stuff 59 */ 60 u_char mon_hash_bits; 61 62 /* 63 * Pointers to the hash table and the MRU list. Memory for the hash 64 * table is allocated only if monitoring is enabled. 65 */ 66 mon_entry ** mon_hash; /* MRU hash table */ 67 mon_entry mon_mru_list; /* mru listhead */ 68 69 /* 70 * List of free structures structures, and counters of in-use and total 71 * structures. The free structures are linked with the hash_next field. 72 */ 73 static mon_entry *mon_free; /* free list or null if none */ 74 u_int mru_alloc; /* mru list + free list count */ 75 u_int mru_entries; /* mru list count */ 76 u_int mru_peakentries; /* highest mru_entries seen */ 77 u_int mru_initalloc = INIT_MONLIST;/* entries to preallocate */ 78 u_int mru_incalloc = INC_MONLIST;/* allocation batch factor */ 79 static u_int mon_mem_increments; /* times called malloc() */ 80 81 /* 82 * Parameters of the RES_LIMITED restriction option. We define headway 83 * as the idle time between packets. A packet is discarded if the 84 * headway is less than the minimum, as well as if the average headway 85 * is less than eight times the increment. 86 */ 87 int ntp_minpkt = NTP_MINPKT; /* minimum seconds between */ 88 /* requests from a client */ 89 u_char ntp_minpoll = NTP_MINPOLL; /* minimum average log2 seconds */ 90 /* between client requests */ 91 92 /* 93 * Initialization state. We may be monitoring, we may not. If 94 * we aren't, we may not even have allocated any memory yet. 95 */ 96 u_int mon_enabled; /* enable switch */ 97 u_int mru_mindepth = 600; /* preempt above this */ 98 int mru_maxage = 64; /* for entries older than */ 99 u_int mru_maxdepth = /* MRU count hard limit */ 100 MRU_MAXDEPTH_DEF; 101 int mon_age = 3000; /* preemption limit */ 102 103 static void mon_getmoremem(void); 104 static void remove_from_hash(mon_entry *); 105 static inline void mon_free_entry(mon_entry *); 106 static inline void mon_reclaim_entry(mon_entry *); 107 108 109 /* 110 * init_mon - initialize monitoring global data 111 */ 112 void 113 init_mon(void) 114 { 115 /* 116 * Don't do much of anything here. We don't allocate memory 117 * until mon_start(). 118 */ 119 mon_enabled = MON_OFF; 120 INIT_DLIST(mon_mru_list, mru); 121 } 122 123 124 /* 125 * remove_from_hash - removes an entry from the address hash table and 126 * decrements mru_entries. 127 */ 128 static void 129 remove_from_hash( 130 mon_entry *mon 131 ) 132 { 133 u_int hash; 134 mon_entry *punlinked; 135 136 mru_entries--; 137 hash = MON_HASH(&mon->rmtadr); 138 UNLINK_SLIST(punlinked, mon_hash[hash], mon, hash_next, 139 mon_entry); 140 ENSURE(punlinked == mon); 141 } 142 143 144 static inline void 145 mon_free_entry( 146 mon_entry *m 147 ) 148 { 149 ZERO(*m); 150 LINK_SLIST(mon_free, m, hash_next); 151 } 152 153 154 /* 155 * mon_reclaim_entry - Remove an entry from the MRU list and from the 156 * hash array, then zero-initialize it. Indirectly 157 * decrements mru_entries. 158 159 * The entry is prepared to be reused. Before return, in 160 * remove_from_hash(), mru_entries is decremented. It is the caller's 161 * responsibility to increment it again. 162 */ 163 static inline void 164 mon_reclaim_entry( 165 mon_entry *m 166 ) 167 { 168 DEBUG_INSIST(NULL != m); 169 170 UNLINK_DLIST(m, mru); 171 remove_from_hash(m); 172 ZERO(*m); 173 } 174 175 176 /* 177 * mon_getmoremem - get more memory and put it on the free list 178 */ 179 static void 180 mon_getmoremem(void) 181 { 182 mon_entry *chunk; 183 u_int entries; 184 185 entries = (0 == mon_mem_increments) 186 ? mru_initalloc 187 : mru_incalloc; 188 189 if (entries) { 190 chunk = eallocarray(entries, sizeof(*chunk)); 191 mru_alloc += entries; 192 for (chunk += entries; entries; entries--) 193 mon_free_entry(--chunk); 194 195 mon_mem_increments++; 196 } 197 } 198 199 200 /* 201 * mon_start - start up the monitoring software 202 */ 203 void 204 mon_start( 205 int mode 206 ) 207 { 208 size_t octets; 209 u_int min_hash_slots; 210 211 if (MON_OFF == mode) /* MON_OFF is 0 */ 212 return; 213 if (mon_enabled) { 214 mon_enabled |= mode; 215 return; 216 } 217 if (0 == mon_mem_increments) 218 mon_getmoremem(); 219 /* 220 * Select the MRU hash table size to limit the average count 221 * per bucket at capacity (mru_maxdepth) to 8, if possible 222 * given our hash is limited to 16 bits. 223 */ 224 min_hash_slots = (mru_maxdepth / 8) + 1; 225 mon_hash_bits = 0; 226 while (min_hash_slots >>= 1) 227 mon_hash_bits++; 228 mon_hash_bits = max(4, mon_hash_bits); 229 mon_hash_bits = min(16, mon_hash_bits); 230 octets = sizeof(*mon_hash) * MON_HASH_SIZE; 231 mon_hash = erealloc_zero(mon_hash, octets, 0); 232 233 mon_enabled = mode; 234 } 235 236 237 /* 238 * mon_stop - stop the monitoring software 239 */ 240 void 241 mon_stop( 242 int mode 243 ) 244 { 245 mon_entry *mon; 246 247 if (MON_OFF == mon_enabled) 248 return; 249 if ((mon_enabled & mode) == 0 || mode == MON_OFF) 250 return; 251 252 mon_enabled &= ~mode; 253 if (mon_enabled != MON_OFF) 254 return; 255 256 /* 257 * Move everything on the MRU list to the free list quickly, 258 * without bothering to remove each from either the MRU list or 259 * the hash table. 260 */ 261 ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry) 262 mon_free_entry(mon); 263 ITER_DLIST_END() 264 265 /* empty the MRU list and hash table. */ 266 mru_entries = 0; 267 INIT_DLIST(mon_mru_list, mru); 268 zero_mem(mon_hash, sizeof(*mon_hash) * MON_HASH_SIZE); 269 } 270 271 272 /* 273 * mon_clearinterface -- remove mru entries referring to a local address 274 * which is going away. 275 */ 276 void 277 mon_clearinterface( 278 endpt *lcladr 279 ) 280 { 281 mon_entry *mon; 282 283 /* iterate mon over mon_mru_list */ 284 ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry) 285 if (mon->lcladr == lcladr) { 286 /* remove from mru list */ 287 UNLINK_DLIST(mon, mru); 288 /* remove from hash list, adjust mru_entries */ 289 remove_from_hash(mon); 290 /* put on free list */ 291 mon_free_entry(mon); 292 } 293 ITER_DLIST_END() 294 } 295 296 297 /* 298 * ntp_monitor - record stats about this packet 299 * 300 * Returns supplied restriction flags, with RES_LIMITED and RES_KOD 301 * cleared unless the packet should not be responded to normally 302 * (RES_LIMITED) and possibly should trigger a KoD response (RES_KOD). 303 * The returned flags are saved in the MRU entry, so that it reflects 304 * whether the last packet from that source triggered rate limiting, 305 * and if so, possible KoD response. This implies you can not tell 306 * whether a given address is eligible for rate limiting/KoD from the 307 * monlist restrict bits, only whether or not the last packet triggered 308 * such responses. ntpdc -c reslist lets you see whether RES_LIMITED 309 * or RES_KOD is lit for a particular address before ntp_monitor()'s 310 * typical dousing. 311 */ 312 u_short 313 ntp_monitor( 314 struct recvbuf *rbufp, 315 u_short flags 316 ) 317 { 318 l_fp interval_fp; 319 struct pkt * pkt; 320 mon_entry * mon; 321 mon_entry * oldest; 322 int oldest_age; 323 u_int hash; 324 u_short restrict_mask; 325 u_char mode; 326 u_char version; 327 int interval; 328 int head; /* headway increment */ 329 int leak; /* new headway */ 330 int limit; /* average threshold */ 331 332 REQUIRE(rbufp != NULL); 333 334 if (mon_enabled == MON_OFF) { 335 return ~(RES_LIMITED | RES_KOD) & flags; 336 } 337 pkt = &rbufp->recv_pkt; 338 hash = MON_HASH(&rbufp->recv_srcadr); 339 mode = PKT_MODE(pkt->li_vn_mode); 340 version = PKT_VERSION(pkt->li_vn_mode); 341 mon = mon_hash[hash]; 342 343 /* 344 * We keep track of all traffic for a given IP in one entry, 345 * otherwise cron'ed ntpdate or similar evades RES_LIMITED. 346 */ 347 348 for (; mon != NULL; mon = mon->hash_next) { 349 if (SOCK_EQ(&mon->rmtadr, &rbufp->recv_srcadr)) { 350 break; 351 } 352 } 353 if (mon != NULL) { 354 interval_fp = rbufp->recv_time; 355 L_SUB(&interval_fp, &mon->last); 356 /* add one-half second to round up */ 357 L_ADDUF(&interval_fp, 0x80000000); 358 interval = interval_fp.l_i; 359 mon->last = rbufp->recv_time; 360 NSRCPORT(&mon->rmtadr) = NSRCPORT(&rbufp->recv_srcadr); 361 mon->count++; 362 restrict_mask = flags; 363 mon->vn_mode = VN_MODE(version, mode); 364 365 /* Shuffle to the head of the MRU list. */ 366 UNLINK_DLIST(mon, mru); 367 LINK_DLIST(mon_mru_list, mon, mru); 368 369 /* 370 * At this point the most recent arrival is first in the 371 * MRU list. Decrease the counter by the headway, but 372 * not less than zero. 373 */ 374 mon->leak -= interval; 375 mon->leak = max(0, mon->leak); 376 head = 1 << ntp_minpoll; 377 leak = mon->leak + head; 378 limit = NTP_SHIFT * head; 379 380 DPRINTF(2, ("MRU: interval %d headway %d limit %d\n", 381 interval, leak, limit)); 382 383 /* 384 * If the minimum and average thresholds are not 385 * exceeded, douse the RES_LIMITED and RES_KOD bits and 386 * increase the counter by the headway increment. Note 387 * that we give a 1-s grace for the minimum threshold 388 * and a 2-s grace for the headway increment. If one or 389 * both thresholds are exceeded and the old counter is 390 * less than the average threshold, set the counter to 391 * the average threshold plus the increment and leave 392 * the RES_LIMITED and RES_KOD bits lit. Otherwise, 393 * leave the counter alone and douse the RES_KOD bit. 394 * This rate-limits the KoDs to no more often than the 395 * average headway. 396 */ 397 if (interval + 1 >= ntp_minpkt && leak < limit) { 398 mon->leak = leak - 2; 399 restrict_mask &= ~(RES_LIMITED | RES_KOD); 400 } else if (mon->leak < limit) { 401 mon->leak = limit + head; 402 } else { 403 restrict_mask &= ~RES_KOD; 404 } 405 mon->flags = restrict_mask; 406 407 return mon->flags; 408 } 409 410 /* 411 * If we got here, this is the first we've heard of this 412 * guy. Get him some memory, either from the free list 413 * or from the tail of the MRU list. 414 * 415 * The following ntp.conf "mru" knobs come into play determining 416 * the depth (or count) of the MRU list: 417 * - mru_mindepth ("mru mindepth") is a floor beneath which 418 * entries are kept without regard to their age. The 419 * default is 600 which matches the longtime implementation 420 * limit on the total number of entries. 421 * - mru_maxage ("mru maxage") is a ceiling on the age in 422 * seconds of entries. Entries older than this are 423 * reclaimed once mon_mindepth is exceeded. 64s default. 424 * Note that entries older than this can easily survive 425 * as they are reclaimed only as needed. 426 * - mru_maxdepth ("mru maxdepth") is a hard limit on the 427 * number of entries. 428 * - "mru maxmem" sets mru_maxdepth to the number of entries 429 * which fit in the given number of kilobytes. The default is 430 * 1024, or 1 megabyte. 431 * - mru_initalloc ("mru initalloc" sets the count of the 432 * initial allocation of MRU entries. 433 * - "mru initmem" sets mru_initalloc in units of kilobytes. 434 * The default is 4. 435 * - mru_incalloc ("mru incalloc" sets the number of entries to 436 * allocate on-demand each time the free list is empty. 437 * - "mru incmem" sets mru_incalloc in units of kilobytes. 438 * The default is 4. 439 * Whichever of "mru maxmem" or "mru maxdepth" occurs last in 440 * ntp.conf controls. Similarly for "mru initalloc" and "mru 441 * initmem", and for "mru incalloc" and "mru incmem". 442 */ 443 if (mru_entries < mru_mindepth) { 444 if (NULL == mon_free) 445 mon_getmoremem(); 446 UNLINK_HEAD_SLIST(mon, mon_free, hash_next); 447 } else { 448 oldest = TAIL_DLIST(mon_mru_list, mru); 449 oldest_age = 0; /* silence uninit warning */ 450 if (oldest != NULL) { 451 interval_fp = rbufp->recv_time; 452 L_SUB(&interval_fp, &oldest->last); 453 /* add one-half second to round up */ 454 L_ADDUF(&interval_fp, 0x80000000); 455 oldest_age = interval_fp.l_i; 456 } 457 /* note -1 is legal for mru_maxage (disables) */ 458 if (oldest != NULL && mru_maxage < oldest_age) { 459 mon_reclaim_entry(oldest); 460 mon = oldest; 461 } else if (mon_free != NULL || mru_alloc < 462 mru_maxdepth) { 463 if (NULL == mon_free) 464 mon_getmoremem(); 465 UNLINK_HEAD_SLIST(mon, mon_free, hash_next); 466 /* Preempt from the MRU list if old enough. */ 467 } else if (ntp_uurandom() > 468 (double)oldest_age / mon_age) { 469 return ~(RES_LIMITED | RES_KOD) & flags; 470 } else { 471 mon_reclaim_entry(oldest); 472 mon = oldest; 473 } 474 } 475 476 INSIST(mon != NULL); 477 478 /* 479 * Got one, initialize it 480 */ 481 mru_entries++; 482 mru_peakentries = max(mru_peakentries, mru_entries); 483 mon->last = rbufp->recv_time; 484 mon->first = mon->last; 485 mon->count = 1; 486 mon->flags = ~(RES_LIMITED | RES_KOD) & flags; 487 mon->leak = 0; 488 memcpy(&mon->rmtadr, &rbufp->recv_srcadr, sizeof(mon->rmtadr)); 489 mon->vn_mode = VN_MODE(version, mode); 490 mon->lcladr = rbufp->dstadr; 491 mon->cast_flags = (u_char)(((rbufp->dstadr->flags & 492 INT_MCASTOPEN) && rbufp->fd == mon->lcladr->fd) ? MDF_MCAST 493 : rbufp->fd == mon->lcladr->bfd ? MDF_BCAST : MDF_UCAST); 494 495 /* 496 * Drop him into front of the hash table. Also put him on top of 497 * the MRU list. 498 */ 499 LINK_SLIST(mon_hash[hash], mon, hash_next); 500 LINK_DLIST(mon_mru_list, mon, mru); 501 502 return mon->flags; 503 } 504 505 506