xref: /netbsd-src/external/bsd/ntp/dist/ntpd/ntp_monitor.c (revision 32d1c65c71fbdb65a012e8392a62a757dd6853e9)
1 /*	$NetBSD: ntp_monitor.c,v 1.7 2024/08/18 20:47:17 christos Exp $	*/
2 
3 /*
4  * ntp_monitor - monitor ntpd statistics
5  */
6 #ifdef HAVE_CONFIG_H
7 # include <config.h>
8 #endif
9 
10 #include "ntpd.h"
11 #include "ntp_io.h"
12 #include "ntp_if.h"
13 #include "ntp_lists.h"
14 #include "ntp_stdlib.h"
15 #include <ntp_random.h>
16 
17 #include <stdio.h>
18 #include <signal.h>
19 #ifdef HAVE_SYS_IOCTL_H
20 # include <sys/ioctl.h>
21 #endif
22 
23 /*
24  * Record statistics based on source address, mode and version. The
25  * receive procedure calls us with the incoming rbufp before it does
26  * anything else. While at it, implement rate controls for inbound
27  * traffic.
28  *
29  * Each entry is doubly linked into two lists, a hash table and a most-
30  * recently-used (MRU) list. When a packet arrives it is looked up in
31  * the hash table. If found, the statistics are updated and the entry
32  * relinked at the head of the MRU list. If not found, a new entry is
33  * allocated, initialized and linked into both the hash table and at the
34  * head of the MRU list.
35  *
36  * Memory is usually allocated by grabbing a big chunk of new memory and
37  * cutting it up into littler pieces. The exception to this when we hit
38  * the memory limit. Then we free memory by grabbing entries off the
39  * tail for the MRU list, unlinking from the hash table, and
40  * reinitializing.
41  *
42  * INC_MONLIST is the default allocation granularity in entries.
43  * INIT_MONLIST is the default initial allocation in entries.
44  */
45 #ifdef MONMEMINC		/* old name */
46 # define	INC_MONLIST	MONMEMINC
47 #elif !defined(INC_MONLIST)
48 # define	INC_MONLIST	(4 * 1024 / sizeof(mon_entry))
49 #endif
50 #ifndef INIT_MONLIST
51 # define	INIT_MONLIST	(4 * 1024 / sizeof(mon_entry))
52 #endif
53 #ifndef MRU_MAXDEPTH_DEF
54 # define MRU_MAXDEPTH_DEF	(1024 * 1024 / sizeof(mon_entry))
55 #endif
56 
57 /*
58  * Hashing stuff
59  */
60 u_char	mon_hash_bits;
61 
62 /*
63  * Pointers to the hash table and the MRU list.  Memory for the hash
64  * table is allocated only if monitoring is enabled.
65  */
66 mon_entry **	mon_hash;	/* MRU hash table */
67 mon_entry	mon_mru_list;	/* mru listhead */
68 
69 /*
70  * List of free structures structures, and counters of in-use and total
71  * structures. The free structures are linked with the hash_next field.
72  */
73 static  mon_entry *mon_free;		/* free list or null if none */
74 	u_int mru_alloc;		/* mru list + free list count */
75 	u_int mru_entries;		/* mru list count */
76 	u_int mru_peakentries;		/* highest mru_entries seen */
77 	u_int mru_initalloc = INIT_MONLIST;/* entries to preallocate */
78 	u_int mru_incalloc = INC_MONLIST;/* allocation batch factor */
79 static	u_int mon_mem_increments;	/* times called malloc() */
80 
81 /*
82  * Parameters of the RES_LIMITED restriction option. We define headway
83  * as the idle time between packets. A packet is discarded if the
84  * headway is less than the minimum, as well as if the average headway
85  * is less than eight times the increment.
86  */
87 int	ntp_minpkt = NTP_MINPKT;	/* minimum seconds between */
88 					/* requests from a client */
89 u_char	ntp_minpoll = NTP_MINPOLL;	/* minimum average log2 seconds */
90 					/* between client requests */
91 
92 /*
93  * Initialization state.  We may be monitoring, we may not.  If
94  * we aren't, we may not even have allocated any memory yet.
95  */
96 	u_int	mon_enabled;		/* enable switch */
97 	u_int	mru_mindepth = 600;	/* preempt above this */
98 	int	mru_maxage = 64;	/* for entries older than */
99 	u_int	mru_maxdepth = 		/* MRU count hard limit */
100 			MRU_MAXDEPTH_DEF;
101 	int	mon_age = 3000;		/* preemption limit */
102 
103 static	void		mon_getmoremem(void);
104 static	void		remove_from_hash(mon_entry *);
105 static	inline void	mon_free_entry(mon_entry *);
106 static	inline void	mon_reclaim_entry(mon_entry *);
107 
108 
109 /*
110  * init_mon - initialize monitoring global data
111  */
112 void
113 init_mon(void)
114 {
115 	/*
116 	 * Don't do much of anything here.  We don't allocate memory
117 	 * until mon_start().
118 	 */
119 	mon_enabled = MON_OFF;
120 	INIT_DLIST(mon_mru_list, mru);
121 }
122 
123 
124 /*
125  * remove_from_hash - removes an entry from the address hash table and
126  *		      decrements mru_entries.
127  */
128 static void
129 remove_from_hash(
130 	mon_entry *mon
131 	)
132 {
133 	u_int hash;
134 	mon_entry *punlinked;
135 
136 	mru_entries--;
137 	hash = MON_HASH(&mon->rmtadr);
138 	UNLINK_SLIST(punlinked, mon_hash[hash], mon, hash_next,
139 		     mon_entry);
140 	ENSURE(punlinked == mon);
141 }
142 
143 
144 static inline void
145 mon_free_entry(
146 	mon_entry *m
147 	)
148 {
149 	ZERO(*m);
150 	LINK_SLIST(mon_free, m, hash_next);
151 }
152 
153 
154 /*
155  * mon_reclaim_entry - Remove an entry from the MRU list and from the
156  *		       hash array, then zero-initialize it.  Indirectly
157  *		       decrements mru_entries.
158 
159  * The entry is prepared to be reused.  Before return, in
160  * remove_from_hash(), mru_entries is decremented.  It is the caller's
161  * responsibility to increment it again.
162  */
163 static inline void
164 mon_reclaim_entry(
165 	mon_entry *m
166 	)
167 {
168 	DEBUG_INSIST(NULL != m);
169 
170 	UNLINK_DLIST(m, mru);
171 	remove_from_hash(m);
172 	ZERO(*m);
173 }
174 
175 
176 /*
177  * mon_getmoremem - get more memory and put it on the free list
178  */
179 static void
180 mon_getmoremem(void)
181 {
182 	mon_entry *chunk;
183 	u_int entries;
184 
185 	entries = (0 == mon_mem_increments)
186 		      ? mru_initalloc
187 		      : mru_incalloc;
188 
189 	if (entries) {
190 		chunk = eallocarray(entries, sizeof(*chunk));
191 		mru_alloc += entries;
192 		for (chunk += entries; entries; entries--)
193 			mon_free_entry(--chunk);
194 
195 		mon_mem_increments++;
196 	}
197 }
198 
199 
200 /*
201  * mon_start - start up the monitoring software
202  */
203 void
204 mon_start(
205 	int mode
206 	)
207 {
208 	size_t octets;
209 	u_int min_hash_slots;
210 
211 	if (MON_OFF == mode)		/* MON_OFF is 0 */
212 		return;
213 	if (mon_enabled) {
214 		mon_enabled |= mode;
215 		return;
216 	}
217 	if (0 == mon_mem_increments)
218 		mon_getmoremem();
219 	/*
220 	 * Select the MRU hash table size to limit the average count
221 	 * per bucket at capacity (mru_maxdepth) to 8, if possible
222 	 * given our hash is limited to 16 bits.
223 	 */
224 	min_hash_slots = (mru_maxdepth / 8) + 1;
225 	mon_hash_bits = 0;
226 	while (min_hash_slots >>= 1)
227 		mon_hash_bits++;
228 	mon_hash_bits = max(4, mon_hash_bits);
229 	mon_hash_bits = min(16, mon_hash_bits);
230 	octets = sizeof(*mon_hash) * MON_HASH_SIZE;
231 	mon_hash = erealloc_zero(mon_hash, octets, 0);
232 
233 	mon_enabled = mode;
234 }
235 
236 
237 /*
238  * mon_stop - stop the monitoring software
239  */
240 void
241 mon_stop(
242 	int mode
243 	)
244 {
245 	mon_entry *mon;
246 
247 	if (MON_OFF == mon_enabled)
248 		return;
249 	if ((mon_enabled & mode) == 0 || mode == MON_OFF)
250 		return;
251 
252 	mon_enabled &= ~mode;
253 	if (mon_enabled != MON_OFF)
254 		return;
255 
256 	/*
257 	 * Move everything on the MRU list to the free list quickly,
258 	 * without bothering to remove each from either the MRU list or
259 	 * the hash table.
260 	 */
261 	ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry)
262 		mon_free_entry(mon);
263 	ITER_DLIST_END()
264 
265 	/* empty the MRU list and hash table. */
266 	mru_entries = 0;
267 	INIT_DLIST(mon_mru_list, mru);
268 	zero_mem(mon_hash, sizeof(*mon_hash) * MON_HASH_SIZE);
269 }
270 
271 
272 /*
273  * mon_clearinterface -- remove mru entries referring to a local address
274  *			 which is going away.
275  */
276 void
277 mon_clearinterface(
278 	endpt *lcladr
279 	)
280 {
281 	mon_entry *mon;
282 
283 	/* iterate mon over mon_mru_list */
284 	ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry)
285 		if (mon->lcladr == lcladr) {
286 			/* remove from mru list */
287 			UNLINK_DLIST(mon, mru);
288 			/* remove from hash list, adjust mru_entries */
289 			remove_from_hash(mon);
290 			/* put on free list */
291 			mon_free_entry(mon);
292 		}
293 	ITER_DLIST_END()
294 }
295 
296 
297 /*
298  * ntp_monitor - record stats about this packet
299  *
300  * Returns supplied restriction flags, with RES_LIMITED and RES_KOD
301  * cleared unless the packet should not be responded to normally
302  * (RES_LIMITED) and possibly should trigger a KoD response (RES_KOD).
303  * The returned flags are saved in the MRU entry, so that it reflects
304  * whether the last packet from that source triggered rate limiting,
305  * and if so, possible KoD response.  This implies you can not tell
306  * whether a given address is eligible for rate limiting/KoD from the
307  * monlist restrict bits, only whether or not the last packet triggered
308  * such responses.  ntpdc -c reslist lets you see whether RES_LIMITED
309  * or RES_KOD is lit for a particular address before ntp_monitor()'s
310  * typical dousing.
311  */
312 u_short
313 ntp_monitor(
314 	struct recvbuf *rbufp,
315 	u_short	flags
316 	)
317 {
318 	l_fp		interval_fp;
319 	struct pkt *	pkt;
320 	mon_entry *	mon;
321 	mon_entry *	oldest;
322 	int		oldest_age;
323 	u_int		hash;
324 	u_short		restrict_mask;
325 	u_char		mode;
326 	u_char		version;
327 	int		interval;
328 	int		head;		/* headway increment */
329 	int		leak;		/* new headway */
330 	int		limit;		/* average threshold */
331 
332 	REQUIRE(rbufp != NULL);
333 
334 	if (mon_enabled == MON_OFF) {
335 		return ~(RES_LIMITED | RES_KOD) & flags;
336 	}
337 	pkt = &rbufp->recv_pkt;
338 	hash = MON_HASH(&rbufp->recv_srcadr);
339 	mode = PKT_MODE(pkt->li_vn_mode);
340 	version = PKT_VERSION(pkt->li_vn_mode);
341 	mon = mon_hash[hash];
342 
343 	/*
344 	 * We keep track of all traffic for a given IP in one entry,
345 	 * otherwise cron'ed ntpdate or similar evades RES_LIMITED.
346 	 */
347 
348 	for (; mon != NULL; mon = mon->hash_next) {
349 		if (SOCK_EQ(&mon->rmtadr, &rbufp->recv_srcadr)) {
350 			break;
351 		}
352 	}
353 	if (mon != NULL) {
354 		interval_fp = rbufp->recv_time;
355 		L_SUB(&interval_fp, &mon->last);
356 		/* add one-half second to round up */
357 		L_ADDUF(&interval_fp, 0x80000000);
358 		interval = interval_fp.l_i;
359 		mon->last = rbufp->recv_time;
360 		NSRCPORT(&mon->rmtadr) = NSRCPORT(&rbufp->recv_srcadr);
361 		mon->count++;
362 		restrict_mask = flags;
363 		mon->vn_mode = VN_MODE(version, mode);
364 
365 		/* Shuffle to the head of the MRU list. */
366 		UNLINK_DLIST(mon, mru);
367 		LINK_DLIST(mon_mru_list, mon, mru);
368 
369 		/*
370 		 * At this point the most recent arrival is first in the
371 		 * MRU list.  Decrease the counter by the headway, but
372 		 * not less than zero.
373 		 */
374 		mon->leak -= interval;
375 		mon->leak = max(0, mon->leak);
376 		head = 1 << ntp_minpoll;
377 		leak = mon->leak + head;
378 		limit = NTP_SHIFT * head;
379 
380 		DPRINTF(2, ("MRU: interval %d headway %d limit %d\n",
381 			    interval, leak, limit));
382 
383 		/*
384 		 * If the minimum and average thresholds are not
385 		 * exceeded, douse the RES_LIMITED and RES_KOD bits and
386 		 * increase the counter by the headway increment.  Note
387 		 * that we give a 1-s grace for the minimum threshold
388 		 * and a 2-s grace for the headway increment.  If one or
389 		 * both thresholds are exceeded and the old counter is
390 		 * less than the average threshold, set the counter to
391 		 * the average threshold plus the increment and leave
392 		 * the RES_LIMITED and RES_KOD bits lit. Otherwise,
393 		 * leave the counter alone and douse the RES_KOD bit.
394 		 * This rate-limits the KoDs to no more often than the
395 		 * average headway.
396 		 */
397 		if (interval + 1 >= ntp_minpkt && leak < limit) {
398 			mon->leak = leak - 2;
399 			restrict_mask &= ~(RES_LIMITED | RES_KOD);
400 		} else if (mon->leak < limit) {
401 			mon->leak = limit + head;
402 		} else {
403 			restrict_mask &= ~RES_KOD;
404 		}
405 		mon->flags = restrict_mask;
406 
407 		return mon->flags;
408 	}
409 
410 	/*
411 	 * If we got here, this is the first we've heard of this
412 	 * guy.  Get him some memory, either from the free list
413 	 * or from the tail of the MRU list.
414 	 *
415 	 * The following ntp.conf "mru" knobs come into play determining
416 	 * the depth (or count) of the MRU list:
417 	 * - mru_mindepth ("mru mindepth") is a floor beneath which
418 	 *   entries are kept without regard to their age.  The
419 	 *   default is 600 which matches the longtime implementation
420 	 *   limit on the total number of entries.
421 	 * - mru_maxage ("mru maxage") is a ceiling on the age in
422 	 *   seconds of entries.  Entries older than this are
423 	 *   reclaimed once mon_mindepth is exceeded.  64s default.
424 	 *   Note that entries older than this can easily survive
425 	 *   as they are reclaimed only as needed.
426 	 * - mru_maxdepth ("mru maxdepth") is a hard limit on the
427 	 *   number of entries.
428 	 * - "mru maxmem" sets mru_maxdepth to the number of entries
429 	 *   which fit in the given number of kilobytes.  The default is
430 	 *   1024, or 1 megabyte.
431 	 * - mru_initalloc ("mru initalloc" sets the count of the
432 	 *   initial allocation of MRU entries.
433 	 * - "mru initmem" sets mru_initalloc in units of kilobytes.
434 	 *   The default is 4.
435 	 * - mru_incalloc ("mru incalloc" sets the number of entries to
436 	 *   allocate on-demand each time the free list is empty.
437 	 * - "mru incmem" sets mru_incalloc in units of kilobytes.
438 	 *   The default is 4.
439 	 * Whichever of "mru maxmem" or "mru maxdepth" occurs last in
440 	 * ntp.conf controls.  Similarly for "mru initalloc" and "mru
441 	 * initmem", and for "mru incalloc" and "mru incmem".
442 	 */
443 	if (mru_entries < mru_mindepth) {
444 		if (NULL == mon_free)
445 			mon_getmoremem();
446 		UNLINK_HEAD_SLIST(mon, mon_free, hash_next);
447 	} else {
448 		oldest = TAIL_DLIST(mon_mru_list, mru);
449 		oldest_age = 0;		/* silence uninit warning */
450 		if (oldest != NULL) {
451 			interval_fp = rbufp->recv_time;
452 			L_SUB(&interval_fp, &oldest->last);
453 			/* add one-half second to round up */
454 			L_ADDUF(&interval_fp, 0x80000000);
455 			oldest_age = interval_fp.l_i;
456 		}
457 		/* note -1 is legal for mru_maxage (disables) */
458 		if (oldest != NULL && mru_maxage < oldest_age) {
459 			mon_reclaim_entry(oldest);
460 			mon = oldest;
461 		} else if (mon_free != NULL || mru_alloc <
462 			   mru_maxdepth) {
463 			if (NULL == mon_free)
464 				mon_getmoremem();
465 			UNLINK_HEAD_SLIST(mon, mon_free, hash_next);
466 		/* Preempt from the MRU list if old enough. */
467 		} else if (ntp_uurandom() >
468 			   (double)oldest_age / mon_age) {
469 			return ~(RES_LIMITED | RES_KOD) & flags;
470 		} else {
471 			mon_reclaim_entry(oldest);
472 			mon = oldest;
473 		}
474 	}
475 
476 	INSIST(mon != NULL);
477 
478 	/*
479 	 * Got one, initialize it
480 	 */
481 	mru_entries++;
482 	mru_peakentries = max(mru_peakentries, mru_entries);
483 	mon->last = rbufp->recv_time;
484 	mon->first = mon->last;
485 	mon->count = 1;
486 	mon->flags = ~(RES_LIMITED | RES_KOD) & flags;
487 	mon->leak = 0;
488 	memcpy(&mon->rmtadr, &rbufp->recv_srcadr, sizeof(mon->rmtadr));
489 	mon->vn_mode = VN_MODE(version, mode);
490 	mon->lcladr = rbufp->dstadr;
491 	mon->cast_flags = (u_char)(((rbufp->dstadr->flags &
492 	    INT_MCASTOPEN) && rbufp->fd == mon->lcladr->fd) ? MDF_MCAST
493 	    : rbufp->fd == mon->lcladr->bfd ? MDF_BCAST : MDF_UCAST);
494 
495 	/*
496 	 * Drop him into front of the hash table. Also put him on top of
497 	 * the MRU list.
498 	 */
499 	LINK_SLIST(mon_hash[hash], mon, hash_next);
500 	LINK_DLIST(mon_mru_list, mon, mru);
501 
502 	return mon->flags;
503 }
504 
505 
506