xref: /netbsd-src/external/bsd/nsd/dist/contrib/nsd_munin_ (revision 4f645668ed707e1f969c546666f8c8e45e6f8888)
1#!/bin/sh
2#
3# plugin for munin to monitor usage of NSD.
4#
5# (C) 2008 W.C.A. Wijngaards.  BSD Licensed.
6#
7# To install; compile with --enable-bind8-stats (enabled by default)
8#	and enable nsd-control in nsd.conf with the line
9#	remote-control:	control-enable: yes
10# Run the command nsd-control-setup as root to generate the key files.
11#
12# Environment variables for this script
13#	statefile	- where to put temporary statefile.
14#	nsd_conf	- where the nsd.conf file is located.
15#	nsd_control	- where to find nsd-control executable.
16#	nsd_checkconf	- where to find nsd-checkconf executable.
17#
18# You can set them in your munin/plugin-conf.d/plugins.conf file
19# with:
20# [nsd_munin*]
21# user root
22# env.statefile /usr/local/var/munin/plugin-state/nsd-state
23# env.nsd_conf /usr/local/etc/nsd.conf
24# env.nsd_control /usr/local/sbin/nsd-control
25# env.nsd_checkconf /usr/local/sbin/nsd-checkconf
26#
27# This plugin can create different graphs depending on what name
28# you link it as (with ln -s) into the plugins directory
29# You can link it multiple times.
30# If you are only a casual user, the _hits and _by_type are most interesting,
31# possibly followed by _by_rcode.
32#
33#	nsd_munin_hits		- base volume, transport type, failures
34#	nsd_munin_memory	- memory usage
35#	nsd_munin_by_type	- incoming queries by type
36#	nsd_munin_by_class	- incoming queries by class
37#	nsd_munin_by_opcode	- incoming queries by opcode
38#	nsd_munin_by_rcode	- answers by rcode
39#	nsd_munin_zones		- number of zones
40#
41# Magic markers - optional - used by installation scripts and
42# munin-config:
43#
44#%# family=contrib
45#%# capabilities=autoconf suggest
46
47# POD documentation
48: <<=cut
49=head1 NAME
50
51nsd_munin_ - Munin plugin to monitor the NSD server.
52
53=head1 APPLICABLE SYSTEMS
54
55System with NSD daemon.
56
57=head1 CONFIGURATION
58
59  [nsd_munin*]
60  user root
61  env.statefile /usr/local/var/munin/plugin-state/nsd-state
62  env.nsd_conf /usr/local/etc/nsd.conf
63  env.nsd_control /usr/local/sbin/nsd-control
64  env.nsd_checkconf /usr/local/sbin/nsd-checkconf
65
66Use the .env settings to override the defaults.
67
68=head1 USAGE
69
70Can be used to present different graphs. Use ln -s for that name in
71the plugins directory to enable the graph.
72nsd_munin_hits		- base volume, transport type, failures
73nsd_munin_memory	- memory usage
74nsd_munin_by_type	- incoming queries by type
75nsd_munin_by_class	- incoming queries by class
76nsd_munin_by_opcode	- incoming queries by opcode
77nsd_munin_by_rcode	- answers by rcode
78nsd_munin_zones		- number of zones
79
80=head1 AUTHOR
81
82Copyright 2008 W.C.A. Wijngaards
83
84=head1 LICENSE
85
86BSD
87
88=cut
89
90state=${statefile:-/usr/local/var/munin/plugin-state/nsd-state}
91conf=${nsd_conf:-/usr/local/etc/nsd.conf}
92ctrl=${nsd_control:-/usr/local/sbin/nsd-control}
93chkconf=${nsd_checkconf:-/usr/local/sbin/nsd-checkconf}
94lock=$state.lock
95
96# number of seconds between polling attempts.
97# makes the statefile hang around for at least this many seconds,
98# so that multiple links of this script can share the results.
99lee=55
100
101# to keep things within 19 characters
102ABBREV="-e s/num/n/ -e s/type/t/ -e s/opcode/o/ -e s/rcode/r/ -e s/class/c/"
103
104# get value from $1 into return variable $value
105get_value ( ) {
106	value="`grep '^'$1'=' $state | sed -e 's/^.*=//'`"
107	if test "$value"x = ""x; then
108		value="0"
109	fi
110}
111
112# download the state from NSD.
113get_state ( ) {
114	# obtain lock for fetching the state
115	# because there is a race condition in fetching and writing to file
116
117	# see if the lock is stale, if so, take it
118	if test -f $lock ; then
119		pid="`cat $lock 2>&1`"
120		kill -0 "$pid" >/dev/null 2>&1
121		if test $? -ne 0 -a "$pid" != $$ ; then
122			echo $$ >$lock
123		fi
124	fi
125
126	i=0
127	while test ! -f $lock || test "`cat $lock 2>&1`" != $$; do
128		while test -f $lock; do
129			# wait
130			i=`expr $i + 1`
131			if test $i -gt 1000; then
132				sleep 1;
133			fi
134			if test $i -gt 1500; then
135				echo "error locking $lock" "=" `cat $lock`
136				rm -f $lock
137				exit 1
138			fi
139		done
140		# try to get it
141		echo $$ >$lock
142	done
143	# do not refetch if the file exists and only LEE seconds old
144	if test -f $state; then
145		now=`date +%s`
146		get_value "timestamp"
147		if test $now -lt `expr $value + $lee`; then
148			rm -f $lock
149			return
150		fi
151	fi
152	$ctrl -c $conf stats > $state
153	if test $? -ne 0; then
154		echo "error retrieving data from the server"
155		rm -f $lock
156		exit 1
157	fi
158	echo "timestamp="`date +%s` >> $state
159	rm -f $lock
160}
161
162if test "$1" = "autoconf" ; then
163	if test ! -f $conf; then
164		echo no "($conf does not exist)"
165		exit 1
166	fi
167	if test ! -d `dirname $state`; then
168		mkdir -p `dirname $state`
169		if test ! -d `dirname $state`; then
170			echo no "($state directory does not exist)"
171			exit 1
172		fi
173	fi
174	echo yes
175	exit 0
176fi
177
178if test "$1" = "suggest" ; then
179	echo "hits"
180	echo "memory"
181	echo "by_type"
182	echo "by_class"
183	echo "by_opcode"
184	echo "by_rcode"
185	echo "zones"
186	exit 0
187fi
188
189# determine my type, by name
190id=`echo $0 | sed -e 's/^.*nsd_munin_//'`
191if test "$id"x = ""x; then
192	# some default to keep people sane.
193	id="hits"
194fi
195
196# if $1 exists in statefile, config is echoed with label $2
197exist_config ( ) {
198	mn=`echo $1 | sed $ABBREV | tr . _`
199	if grep '^'$1'=' $state >/dev/null 2>&1; then
200		echo "$mn.label $2"
201		echo "$mn.min 0"
202		echo "$mn.type ABSOLUTE"
203	fi
204}
205
206# print label and min 0 for a name $1 in nsd format
207p_config ( ) {
208	mn=`echo $1 | sed $ABBREV | tr . _`
209	echo $mn.label "$2"
210	echo $mn.min 0
211	echo $mn.type $3
212}
213
214if test "$1" = "config" ; then
215	if test ! -f $state; then
216		get_state
217	fi
218	case $id in
219	hits)
220		echo "graph_title NSD traffic"
221		echo "graph_args --base 1000 -l 0"
222		echo "graph_vlabel queries / \${graph_period}"
223		echo "graph_scale no"
224		echo "graph_category DNS"
225		for x in server0.queries server1.queries server2.queries \
226			server3.queries server4.queries server5.queries \
227			server6.queries server7.queries server8.queries \
228			server9.queries server10.queries server11.queries \
229			server12.queries server13.queries server14.queries \
230			server15.queries ; do
231			exist_config $x "queries handled by `basename $x .queries`"
232		done
233		p_config "num.queries" "total queries" "ABSOLUTE"
234		p_config "num.udp" "UDP ip4 queries" "ABSOLUTE"
235		p_config "num.udp6" "UDP ip6 queries" "ABSOLUTE"
236		p_config "num.tcp" "TCP ip4 queries" "ABSOLUTE"
237		p_config "num.tcp6" "TCP ip6 queries" "ABSOLUTE"
238		p_config "num.edns" "queries with EDNS OPT" "ABSOLUTE"
239		p_config "num.ednserr" "queries failed EDNS parse" "ABSOLUTE"
240		p_config "num.answer_wo_aa" "nonauthor. queries (referrals)" "ABSOLUTE"
241		p_config "num.rxerr" "receive failed" "ABSOLUTE"
242		p_config "num.txerr" "transmit failed" "ABSOLUTE"
243		p_config "num.truncated" "truncated replies with TC" "ABSOLUTE"
244		p_config "num.raxfr" "AXFR from allowed client" "ABSOLUTE"
245		p_config "num.rixfr" "IXFR from allowed client" "ABSOLUTE"
246		p_config "num.dropped" "dropped due to sanity check" "ABSOLUTE"
247		echo "graph_info DNS queries."
248		;;
249	memory)
250		echo "graph_title NSD memory usage"
251		echo "graph_args --base 1024 -l 0"
252		echo "graph_vlabel memory used in bytes"
253		echo "graph_category DNS"
254		p_config "size.vsz" "Total virtual memory (VSZ)" "GAUGE"
255		p_config "size.rss" "Total resident memory (RSS)" "GAUGE"
256		p_config "size.db.mem" "data in memory" "GAUGE"
257		p_config "size.xfrd.mem" "xfr and notify memory" "GAUGE"
258		p_config "size.config.mem" "config memory" "GAUGE"
259		p_config "size.db.disk" "mmap of nsd.db file" "GAUGE"
260		p_config "size.config.disk" "config zonelist on disk" "GAUGE"
261		echo "graph_info The memory used by NSD, xfrd and config. Disk size of nsd.db and zonelist."
262		;;
263	by_type)
264		echo "graph_title NSD queries by type"
265		echo "graph_args --base 1000 -l 0"
266		echo "graph_vlabel queries / \${graph_period}"
267		echo "graph_scale no"
268		echo "graph_category DNS"
269		for x in `grep "^num.type" $state`; do
270			nm=`echo $x | sed -e 's/=.*$//'`
271			tp=`echo $nm | sed -e s/num.type.//`
272			p_config "$nm" "$tp" "ABSOLUTE"
273		done
274		echo "graph_info queries by DNS RR type queried for"
275		;;
276	by_class)
277		echo "graph_title NSD queries by class"
278		echo "graph_args --base 1000 -l 0"
279		echo "graph_vlabel queries / \${graph_period}"
280		echo "graph_scale no"
281		echo "graph_category DNS"
282		for x in `grep "^num.class" $state`; do
283			nm=`echo $x | sed -e 's/=.*$//'`
284			tp=`echo $nm | sed -e s/num.class.//`
285			p_config "$nm" "$tp" "ABSOLUTE"
286		done
287		echo "graph_info queries by DNS RR class queried for."
288		;;
289	by_opcode)
290		echo "graph_title NSD queries by opcode"
291		echo "graph_args --base 1000 -l 0"
292		echo "graph_vlabel queries / \${graph_period}"
293		echo "graph_scale no"
294		echo "graph_category DNS"
295		for x in `grep "^num.opcode" $state`; do
296			nm=`echo $x | sed -e 's/=.*$//'`
297			tp=`echo $nm | sed -e s/num.opcode.//`
298			p_config "$nm" "$tp" "ABSOLUTE"
299		done
300		echo "graph_info queries by opcode in the query packet."
301		;;
302	by_rcode)
303		echo "graph_title NSD answers by return code"
304		echo "graph_args --base 1000 -l 0"
305		echo "graph_vlabel answer packets / \${graph_period}"
306		echo "graph_scale no"
307		echo "graph_category DNS"
308		for x in `grep "^num.rcode" $state`; do
309			nm=`echo $x | sed -e 's/=.*$//'`
310			tp=`echo $nm | sed -e s/num.rcode.//`
311			p_config "$nm" "$tp" "ABSOLUTE"
312		done
313		echo "graph_info answers split out by return value."
314		;;
315	zones)
316		echo "graph_title NSD number of zones"
317		echo "graph_args --base 1000 -l 0"
318		echo "graph_vlabel zone count"
319		echo "graph_category DNS"
320		p_config "zone.total" "total zones" "GAUGE"
321		p_config "zone.master" "master zones" "GAUGE"
322		p_config "zone.slave" "slave zones" "GAUGE"
323		echo "graph_info number of zones served by NSD."
324		;;
325	esac
326
327	exit 0
328fi
329
330# do the stats itself
331get_state
332
333# get the time elapsed
334get_value "time.elapsed"
335if test $value = 0 || test $value = "0.000000"; then
336	echo "error: time elapsed 0 or could not retrieve data"
337	exit 1
338fi
339elapsed="$value"
340
341# print value for $1
342print_value ( ) {
343	mn=`echo $1 | sed $ABBREV | tr . _`
344	get_value $1
345	echo "$mn.value" $value
346}
347
348# print value if line already found in $2
349print_value_line ( ) {
350	mn=`echo $1 | sed $ABBREV | tr . _`
351	value="`echo $2 | sed -e 's/^.*=//'`"
352	echo "$mn.value" $value
353}
354
355
356case $id in
357hits)
358	for x in server0.queries server1.queries server2.queries \
359		server3.queries server4.queries server5.queries \
360		server6.queries server7.queries server8.queries \
361		server9.queries server10.queries server11.queries \
362		server12.queries server13.queries server14.queries \
363		server15.queries \
364		num.queries num.udp num.udp6 num.tcp num.tcp6 \
365		num.edns num.ednserr num.answer_wo_aa num.rxerr num.txerr \
366		num.truncated num.raxfr num.rixfr num.dropped ; do
367		if grep "^"$x"=" $state >/dev/null 2>&1; then
368			print_value $x
369		fi
370	done
371	;;
372memory)
373	# get the total memory for NSD
374	serverpid=`$ctrl -c $conf serverpid 2>&1`
375	# small race condition, if reload happens between previous and next
376	# lines, if so, detect by checking if we have a number as output.
377	rssval=`ps -p $serverpid -o rss= 2>&1`
378	vszval=`ps -p $serverpid -o vsz= 2>&1`
379	if test "`expr $rssval + 1 - 1 2>&1`" -eq "$rssval" >/dev/null 2>&1; then
380		rssval=`expr $rssval \* 1024`
381	else
382		rssval=0
383	fi
384	if test "`expr $vszval + 1 - 1 2>&1`" -eq "$vszval" >/dev/null 2>&1; then
385		vszval=`expr $vszval \* 1024`
386	else
387		vszval=0
388	fi
389	echo "size_vsz.value" $vszval
390	echo "size_rss.value" $rssval
391	for x in size.db.mem size.xfrd.mem size.config.mem \
392		size.db.disk size.config.disk; do
393		print_value $x
394	done
395	;;
396by_type)
397	for x in `grep "^num.type" $state`; do
398		nm=`echo $x | sed -e 's/=.*$//'`
399		print_value_line $nm $x
400	done
401	;;
402by_class)
403	for x in `grep "^num.class" $state`; do
404		nm=`echo $x | sed -e 's/=.*$//'`
405		print_value_line $nm $x
406	done
407	;;
408by_opcode)
409	for x in `grep "^num.opcode" $state`; do
410		nm=`echo $x | sed -e 's/=.*$//'`
411		print_value_line $nm $x
412	done
413	;;
414by_rcode)
415	for x in `grep "^num.rcode" $state`; do
416		nm=`echo $x | sed -e 's/=.*$//'`
417		print_value_line $nm $x
418	done
419	;;
420zones)
421	get_value "zone.master"
422	nummas="$value"
423	get_value "zone.slave"
424	numsla="$value"
425	echo "zone_total.value" `expr $nummas + $numsla`
426	echo "zone_master.value" "$nummas"
427	echo "zone_slave.value" "$numsla"
428esac
429