xref: /netbsd-src/external/bsd/ntp/dist/ntpd/ntp_monitor.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*	$NetBSD: ntp_monitor.c,v 1.1.1.3 2013/12/27 23:30:56 christos Exp $	*/
2 
3 /*
4  * ntp_monitor - monitor ntpd statistics
5  */
6 #ifdef HAVE_CONFIG_H
7 # include <config.h>
8 #endif
9 
10 #include "ntpd.h"
11 #include "ntp_io.h"
12 #include "ntp_if.h"
13 #include "ntp_lists.h"
14 #include "ntp_stdlib.h"
15 #include <ntp_random.h>
16 
17 #include <stdio.h>
18 #include <signal.h>
19 #ifdef HAVE_SYS_IOCTL_H
20 # include <sys/ioctl.h>
21 #endif
22 
23 /*
24  * Record statistics based on source address, mode and version. The
25  * receive procedure calls us with the incoming rbufp before it does
26  * anything else. While at it, implement rate controls for inbound
27  * traffic.
28  *
29  * Each entry is doubly linked into two lists, a hash table and a most-
30  * recently-used (MRU) list. When a packet arrives it is looked up in
31  * the hash table. If found, the statistics are updated and the entry
32  * relinked at the head of the MRU list. If not found, a new entry is
33  * allocated, initialized and linked into both the hash table and at the
34  * head of the MRU list.
35  *
36  * Memory is usually allocated by grabbing a big chunk of new memory and
37  * cutting it up into littler pieces. The exception to this when we hit
38  * the memory limit. Then we free memory by grabbing entries off the
39  * tail for the MRU list, unlinking from the hash table, and
40  * reinitializing.
41  *
42  * INC_MONLIST is the default allocation granularity in entries.
43  * INIT_MONLIST is the default initial allocation in entries.
44  */
45 #ifdef MONMEMINC		/* old name */
46 # define	INC_MONLIST	MONMEMINC
47 #elif !defined(INC_MONLIST)
48 # define	INC_MONLIST	(4 * 1024 / sizeof(mon_entry))
49 #endif
50 #ifndef INIT_MONLIST
51 # define	INIT_MONLIST	(4 * 1024 / sizeof(mon_entry))
52 #endif
53 #ifndef MRU_MAXDEPTH_DEF
54 # define MRU_MAXDEPTH_DEF	(1024 * 1024 / sizeof(mon_entry))
55 #endif
56 
57 /*
58  * Hashing stuff
59  */
60 u_char	mon_hash_bits;
61 
62 /*
63  * Pointers to the hash table and the MRU list.  Memory for the hash
64  * table is allocated only if monitoring is enabled.
65  */
66 mon_entry **	mon_hash;	/* MRU hash table */
67 mon_entry	mon_mru_list;	/* mru listhead */
68 
69 /*
70  * List of free structures structures, and counters of in-use and total
71  * structures. The free structures are linked with the hash_next field.
72  */
73 static  mon_entry *mon_free;		/* free list or null if none */
74 	u_int mru_alloc;		/* mru list + free list count */
75 	u_int mru_entries;		/* mru list count */
76 	u_int mru_peakentries;		/* highest mru_entries seen */
77 	u_int mru_initalloc = INIT_MONLIST;/* entries to preallocate */
78 	u_int mru_incalloc = INC_MONLIST;/* allocation batch factor */
79 static	u_int mon_mem_increments;	/* times called malloc() */
80 
81 /*
82  * Parameters of the RES_LIMITED restriction option. We define headway
83  * as the idle time between packets. A packet is discarded if the
84  * headway is less than the minimum, as well as if the average headway
85  * is less than eight times the increment.
86  */
87 int	ntp_minpkt = NTP_MINPKT;	/* minimum (log 2 s) */
88 u_char	ntp_minpoll = NTP_MINPOLL;	/* increment (log 2 s) */
89 
90 /*
91  * Initialization state.  We may be monitoring, we may not.  If
92  * we aren't, we may not even have allocated any memory yet.
93  */
94 	u_int	mon_enabled;		/* enable switch */
95 	u_int	mru_mindepth = 600;	/* preempt above this */
96 	int	mru_maxage = 64;	/* for entries older than */
97 	u_int	mru_maxdepth = 		/* MRU count hard limit */
98 			MRU_MAXDEPTH_DEF;
99 	int	mon_age = 3000;		/* preemption limit */
100 
101 static	void		mon_getmoremem(void);
102 static	void		remove_from_hash(mon_entry *);
103 static	inline void	mon_free_entry(mon_entry *);
104 static	inline void	mon_reclaim_entry(mon_entry *);
105 
106 
107 /*
108  * init_mon - initialize monitoring global data
109  */
110 void
111 init_mon(void)
112 {
113 	/*
114 	 * Don't do much of anything here.  We don't allocate memory
115 	 * until mon_start().
116 	 */
117 	mon_enabled = MON_OFF;
118 	INIT_DLIST(mon_mru_list, mru);
119 }
120 
121 
122 /*
123  * remove_from_hash - removes an entry from the address hash table and
124  *		      decrements mru_entries.
125  */
126 static void
127 remove_from_hash(
128 	mon_entry *mon
129 	)
130 {
131 	u_int hash;
132 	mon_entry *punlinked;
133 
134 	mru_entries--;
135 	hash = MON_HASH(&mon->rmtadr);
136 	UNLINK_SLIST(punlinked, mon_hash[hash], mon, hash_next,
137 		     mon_entry);
138 	NTP_ENSURE(punlinked == mon);
139 }
140 
141 
142 static inline void
143 mon_free_entry(
144 	mon_entry *m
145 	)
146 {
147 	ZERO(*m);
148 	LINK_SLIST(mon_free, m, hash_next);
149 }
150 
151 
152 /*
153  * mon_reclaim_entry - Remove an entry from the MRU list and from the
154  *		       hash array, then zero-initialize it.  Indirectly
155  *		       decrements mru_entries.
156 
157  * The entry is prepared to be reused.  Before return, in
158  * remove_from_hash(), mru_entries is decremented.  It is the caller's
159  * responsibility to increment it again.
160  */
161 static inline void
162 mon_reclaim_entry(
163 	mon_entry *m
164 	)
165 {
166 	DEBUG_INSIST(NULL != m);
167 
168 	UNLINK_DLIST(m, mru);
169 	remove_from_hash(m);
170 	ZERO(*m);
171 }
172 
173 
174 /*
175  * mon_getmoremem - get more memory and put it on the free list
176  */
177 static void
178 mon_getmoremem(void)
179 {
180 	mon_entry *chunk;
181 	mon_entry *mon;
182 	u_int entries;
183 
184 	entries = (0 == mon_mem_increments)
185 		      ? mru_initalloc
186 		      : mru_incalloc;
187 
188 	chunk = emalloc(entries * sizeof(*chunk));
189 	for (mon = chunk + entries - 1; mon >= chunk; mon--)
190 		mon_free_entry(mon);
191 
192 	mru_alloc += entries;
193 	mon_mem_increments++;
194 }
195 
196 
197 /*
198  * mon_start - start up the monitoring software
199  */
200 void
201 mon_start(
202 	int mode
203 	)
204 {
205 	size_t octets;
206 	u_int min_hash_slots;
207 
208 	if (MON_OFF == mode)		/* MON_OFF is 0 */
209 		return;
210 	if (mon_enabled) {
211 		mon_enabled |= mode;
212 		return;
213 	}
214 	if (0 == mon_mem_increments)
215 		mon_getmoremem();
216 	/*
217 	 * Select the MRU hash table size to limit the average count
218 	 * per bucket at capacity (mru_maxdepth) to 8, if possible
219 	 * given our hash is limited to 16 bits.
220 	 */
221 	min_hash_slots = (mru_maxdepth / 8) + 1;
222 	mon_hash_bits = 0;
223 	while (min_hash_slots >>= 1)
224 		mon_hash_bits++;
225 	mon_hash_bits = max(4, mon_hash_bits);
226 	mon_hash_bits = min(16, mon_hash_bits);
227 	octets = sizeof(*mon_hash) * MON_HASH_SIZE;
228 	mon_hash = erealloc_zero(mon_hash, octets, 0);
229 
230 	mon_enabled = mode;
231 }
232 
233 
234 /*
235  * mon_stop - stop the monitoring software
236  */
237 void
238 mon_stop(
239 	int mode
240 	)
241 {
242 	mon_entry *mon;
243 
244 	if (MON_OFF == mon_enabled)
245 		return;
246 	if ((mon_enabled & mode) == 0 || mode == MON_OFF)
247 		return;
248 
249 	mon_enabled &= ~mode;
250 	if (mon_enabled != MON_OFF)
251 		return;
252 
253 	/*
254 	 * Move everything on the MRU list to the free list quickly,
255 	 * without bothering to remove each from either the MRU list or
256 	 * the hash table.
257 	 */
258 	ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry)
259 		mon_free_entry(mon);
260 	ITER_DLIST_END()
261 
262 	/* empty the MRU list and hash table. */
263 	mru_entries = 0;
264 	INIT_DLIST(mon_mru_list, mru);
265 	zero_mem(mon_hash, sizeof(*mon_hash) * MON_HASH_SIZE);
266 }
267 
268 
269 /*
270  * mon_clearinterface -- remove mru entries referring to a local address
271  *			 which is going away.
272  */
273 void
274 mon_clearinterface(
275 	endpt *lcladr
276 	)
277 {
278 	mon_entry *mon;
279 
280 	/* iterate mon over mon_mru_list */
281 	ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry)
282 		if (mon->lcladr == lcladr) {
283 			/* remove from mru list */
284 			UNLINK_DLIST(mon, mru);
285 			/* remove from hash list, adjust mru_entries */
286 			remove_from_hash(mon);
287 			/* put on free list */
288 			mon_free_entry(mon);
289 		}
290 	ITER_DLIST_END()
291 }
292 
293 
294 /*
295  * ntp_monitor - record stats about this packet
296  *
297  * Returns supplied restriction flags, with RES_LIMITED and RES_KOD
298  * cleared unless the packet should not be responded to normally
299  * (RES_LIMITED) and possibly should trigger a KoD response (RES_KOD).
300  * The returned flags are saved in the MRU entry, so that it reflects
301  * whether the last packet from that source triggered rate limiting,
302  * and if so, possible KoD response.  This implies you can not tell
303  * whether a given address is eligible for rate limiting/KoD from the
304  * monlist restrict bits, only whether or not the last packet triggered
305  * such responses.  ntpdc -c reslist lets you see whether RES_LIMITED
306  * or RES_KOD is lit for a particular address before ntp_monitor()'s
307  * typical dousing.
308  */
309 u_short
310 ntp_monitor(
311 	struct recvbuf *rbufp,
312 	u_short	flags
313 	)
314 {
315 	l_fp		interval_fp;
316 	struct pkt *	pkt;
317 	mon_entry *	mon;
318 	mon_entry *	oldest;
319 	int		oldest_age;
320 	u_int		hash;
321 	u_short		restrict_mask;
322 	u_char		mode;
323 	u_char		version;
324 	int		interval;
325 	int		head;		/* headway increment */
326 	int		leak;		/* new headway */
327 	int		limit;		/* average threshold */
328 
329 	if (mon_enabled == MON_OFF)
330 		return ~(RES_LIMITED | RES_KOD) & flags;
331 
332 	pkt = &rbufp->recv_pkt;
333 	hash = MON_HASH(&rbufp->recv_srcadr);
334 	mode = PKT_MODE(pkt->li_vn_mode);
335 	version = PKT_VERSION(pkt->li_vn_mode);
336 	mon = mon_hash[hash];
337 
338 	/*
339 	 * We keep track of all traffic for a given IP in one entry,
340 	 * otherwise cron'ed ntpdate or similar evades RES_LIMITED.
341 	 */
342 
343 	for (; mon != NULL; mon = mon->hash_next)
344 		if (SOCK_EQ(&mon->rmtadr, &rbufp->recv_srcadr))
345 			break;
346 
347 	if (mon != NULL) {
348 		interval_fp = rbufp->recv_time;
349 		L_SUB(&interval_fp, &mon->last);
350 		/* add one-half second to round up */
351 		L_ADDUF(&interval_fp, 0x80000000);
352 		interval = interval_fp.l_i;
353 		mon->last = rbufp->recv_time;
354 		NSRCPORT(&mon->rmtadr) = NSRCPORT(&rbufp->recv_srcadr);
355 		mon->count++;
356 		restrict_mask = flags;
357 		mon->vn_mode = VN_MODE(version, mode);
358 
359 		/* Shuffle to the head of the MRU list. */
360 		UNLINK_DLIST(mon, mru);
361 		LINK_DLIST(mon_mru_list, mon, mru);
362 
363 		/*
364 		 * At this point the most recent arrival is first in the
365 		 * MRU list.  Decrease the counter by the headway, but
366 		 * not less than zero.
367 		 */
368 		mon->leak -= interval;
369 		mon->leak = max(0, mon->leak);
370 		head = 1 << ntp_minpoll;
371 		leak = mon->leak + head;
372 		limit = NTP_SHIFT * head;
373 
374 		DPRINTF(2, ("MRU: interval %d headway %d limit %d\n",
375 			    interval, leak, limit));
376 
377 		/*
378 		 * If the minimum and average thresholds are not
379 		 * exceeded, douse the RES_LIMITED and RES_KOD bits and
380 		 * increase the counter by the headway increment.  Note
381 		 * that we give a 1-s grace for the minimum threshold
382 		 * and a 2-s grace for the headway increment.  If one or
383 		 * both thresholds are exceeded and the old counter is
384 		 * less than the average threshold, set the counter to
385 		 * the average threshold plus the increment and leave
386 		 * the RES_LIMITED and RES_KOD bits lit. Otherwise,
387 		 * leave the counter alone and douse the RES_KOD bit.
388 		 * This rate-limits the KoDs to no less than the average
389 		 * headway.
390 		 */
391 		if (interval + 1 >= ntp_minpkt && leak < limit) {
392 			mon->leak = leak - 2;
393 			restrict_mask &= ~(RES_LIMITED | RES_KOD);
394 		} else if (mon->leak < limit)
395 			mon->leak = limit + head;
396 		else
397 			restrict_mask &= ~RES_KOD;
398 
399 		mon->flags = restrict_mask;
400 
401 		return mon->flags;
402 	}
403 
404 	/*
405 	 * If we got here, this is the first we've heard of this
406 	 * guy.  Get him some memory, either from the free list
407 	 * or from the tail of the MRU list.
408 	 *
409 	 * The following ntp.conf "mru" knobs come into play determining
410 	 * the depth (or count) of the MRU list:
411 	 * - mru_mindepth ("mru mindepth") is a floor beneath which
412 	 *   entries are kept without regard to their age.  The
413 	 *   default is 600 which matches the longtime implementation
414 	 *   limit on the total number of entries.
415 	 * - mru_maxage ("mru maxage") is a ceiling on the age in
416 	 *   seconds of entries.  Entries older than this are
417 	 *   reclaimed once mon_mindepth is exceeded.  64s default.
418 	 *   Note that entries older than this can easily survive
419 	 *   as they are reclaimed only as needed.
420 	 * - mru_maxdepth ("mru maxdepth") is a hard limit on the
421 	 *   number of entries.
422 	 * - "mru maxmem" sets mru_maxdepth to the number of entries
423 	 *   which fit in the given number of kilobytes.  The default is
424 	 *   1024, or 1 megabyte.
425 	 * - mru_initalloc ("mru initalloc" sets the count of the
426 	 *   initial allocation of MRU entries.
427 	 * - "mru initmem" sets mru_initalloc in units of kilobytes.
428 	 *   The default is 4.
429 	 * - mru_incalloc ("mru incalloc" sets the number of entries to
430 	 *   allocate on-demand each time the free list is empty.
431 	 * - "mru incmem" sets mru_incalloc in units of kilobytes.
432 	 *   The default is 4.
433 	 * Whichever of "mru maxmem" or "mru maxdepth" occurs last in
434 	 * ntp.conf controls.  Similarly for "mru initalloc" and "mru
435 	 * initmem", and for "mru incalloc" and "mru incmem".
436 	 */
437 	if (mru_entries < mru_mindepth) {
438 		if (NULL == mon_free)
439 			mon_getmoremem();
440 		UNLINK_HEAD_SLIST(mon, mon_free, hash_next);
441 	} else {
442 		oldest = TAIL_DLIST(mon_mru_list, mru);
443 		oldest_age = 0;		/* silence uninit warning */
444 		if (oldest != NULL) {
445 			interval_fp = rbufp->recv_time;
446 			L_SUB(&interval_fp, &oldest->last);
447 			/* add one-half second to round up */
448 			L_ADDUF(&interval_fp, 0x80000000);
449 			oldest_age = interval_fp.l_i;
450 		}
451 		/* note -1 is legal for mru_maxage (disables) */
452 		if (oldest != NULL && mru_maxage < oldest_age) {
453 			mon_reclaim_entry(oldest);
454 			mon = oldest;
455 		} else if (mon_free != NULL || mru_alloc <
456 			   mru_maxdepth) {
457 			if (NULL == mon_free)
458 				mon_getmoremem();
459 			UNLINK_HEAD_SLIST(mon, mon_free, hash_next);
460 		/* Preempt from the MRU list if old enough. */
461 		} else if (ntp_random() / (2. * FRAC) >
462 			   (double)oldest_age / mon_age) {
463 			return ~(RES_LIMITED | RES_KOD) & flags;
464 		} else {
465 			mon_reclaim_entry(oldest);
466 			mon = oldest;
467 		}
468 	}
469 
470 	/*
471 	 * Got one, initialize it
472 	 */
473 	mru_entries++;
474 	mru_peakentries = max(mru_peakentries, mru_entries);
475 	mon->last = rbufp->recv_time;
476 	mon->first = mon->last;
477 	mon->count = 1;
478 	mon->flags = ~(RES_LIMITED | RES_KOD) & flags;
479 	mon->leak = 0;
480 	memcpy(&mon->rmtadr, &rbufp->recv_srcadr, sizeof(mon->rmtadr));
481 	mon->vn_mode = VN_MODE(version, mode);
482 	mon->lcladr = rbufp->dstadr;
483 	mon->cast_flags = (u_char)(((rbufp->dstadr->flags &
484 	    INT_MCASTOPEN) && rbufp->fd == mon->lcladr->fd) ? MDF_MCAST
485 	    : rbufp->fd == mon->lcladr->bfd ? MDF_BCAST : MDF_UCAST);
486 
487 	/*
488 	 * Drop him into front of the hash table. Also put him on top of
489 	 * the MRU list.
490 	 */
491 	LINK_SLIST(mon_hash[hash], mon, hash_next);
492 	LINK_DLIST(mon_mru_list, mon, mru);
493 
494 	return mon->flags;
495 }
496 
497 
498