1#!/bin/sh 2# 3# plugin for munin to monitor usage of NSD. 4# 5# (C) 2008 W.C.A. Wijngaards. BSD Licensed. 6# 7# To install; compile with --enable-bind8-stats (enabled by default) 8# and enable nsd-control in nsd.conf with the line 9# remote-control: control-enable: yes 10# Run the command nsd-control-setup as root to generate the key files. 11# 12# Environment variables for this script 13# statefile - where to put temporary statefile. 14# nsd_conf - where the nsd.conf file is located. 15# nsd_control - where to find nsd-control executable. 16# nsd_checkconf - where to find nsd-checkconf executable. 17# 18# You can set them in your munin/plugin-conf.d/plugins.conf file 19# with: 20# [nsd_munin*] 21# user root 22# env.statefile /usr/local/var/munin/plugin-state/nsd-state 23# env.nsd_conf /usr/local/etc/nsd.conf 24# env.nsd_control /usr/local/sbin/nsd-control 25# env.nsd_checkconf /usr/local/sbin/nsd-checkconf 26# 27# This plugin can create different graphs depending on what name 28# you link it as (with ln -s) into the plugins directory 29# You can link it multiple times. 30# If you are only a casual user, the _hits and _by_type are most interesting, 31# possibly followed by _by_rcode. 32# 33# nsd_munin_hits - base volume, transport type, failures 34# nsd_munin_memory - memory usage 35# nsd_munin_by_type - incoming queries by type 36# nsd_munin_by_class - incoming queries by class 37# nsd_munin_by_opcode - incoming queries by opcode 38# nsd_munin_by_rcode - answers by rcode 39# nsd_munin_zones - number of zones 40# 41# Magic markers - optional - used by installation scripts and 42# munin-config: 43# 44#%# family=contrib 45#%# capabilities=autoconf suggest 46 47# POD documentation 48: <<=cut 49=head1 NAME 50 51nsd_munin_ - Munin plugin to monitor the NSD server. 52 53=head1 APPLICABLE SYSTEMS 54 55System with NSD daemon. 56 57=head1 CONFIGURATION 58 59 [nsd_munin*] 60 user root 61 env.statefile /usr/local/var/munin/plugin-state/nsd-state 62 env.nsd_conf /usr/local/etc/nsd.conf 63 env.nsd_control /usr/local/sbin/nsd-control 64 env.nsd_checkconf /usr/local/sbin/nsd-checkconf 65 66Use the .env settings to override the defaults. 67 68=head1 USAGE 69 70Can be used to present different graphs. Use ln -s for that name in 71the plugins directory to enable the graph. 72nsd_munin_hits - base volume, transport type, failures 73nsd_munin_memory - memory usage 74nsd_munin_by_type - incoming queries by type 75nsd_munin_by_class - incoming queries by class 76nsd_munin_by_opcode - incoming queries by opcode 77nsd_munin_by_rcode - answers by rcode 78nsd_munin_zones - number of zones 79 80=head1 AUTHOR 81 82Copyright 2008 W.C.A. Wijngaards 83 84=head1 LICENSE 85 86BSD 87 88=cut 89 90state=${statefile:-/usr/local/var/munin/plugin-state/nsd-state} 91conf=${nsd_conf:-/usr/local/etc/nsd.conf} 92ctrl=${nsd_control:-/usr/local/sbin/nsd-control} 93chkconf=${nsd_checkconf:-/usr/local/sbin/nsd-checkconf} 94lock=$state.lock 95 96# number of seconds between polling attempts. 97# makes the statefile hang around for at least this many seconds, 98# so that multiple links of this script can share the results. 99lee=55 100 101# to keep things within 19 characters 102ABBREV="-e s/num/n/ -e s/type/t/ -e s/opcode/o/ -e s/rcode/r/ -e s/class/c/" 103 104# get value from $1 into return variable $value 105get_value ( ) { 106 value="`grep '^'$1'=' $state | sed -e 's/^.*=//'`" 107 if test "$value"x = ""x; then 108 value="0" 109 fi 110} 111 112# download the state from NSD. 113get_state ( ) { 114 # obtain lock for fetching the state 115 # because there is a race condition in fetching and writing to file 116 117 # see if the lock is stale, if so, take it 118 if test -f $lock ; then 119 pid="`cat $lock 2>&1`" 120 kill -0 "$pid" >/dev/null 2>&1 121 if test $? -ne 0 -a "$pid" != $$ ; then 122 echo $$ >$lock 123 fi 124 fi 125 126 i=0 127 while test ! -f $lock || test "`cat $lock 2>&1`" != $$; do 128 while test -f $lock; do 129 # wait 130 i=`expr $i + 1` 131 if test $i -gt 1000; then 132 sleep 1; 133 fi 134 if test $i -gt 1500; then 135 echo "error locking $lock" "=" `cat $lock` 136 rm -f $lock 137 exit 1 138 fi 139 done 140 # try to get it 141 echo $$ >$lock 142 done 143 # do not refetch if the file exists and only LEE seconds old 144 if test -f $state; then 145 now=`date +%s` 146 get_value "timestamp" 147 if test $now -lt `expr $value + $lee`; then 148 rm -f $lock 149 return 150 fi 151 fi 152 $ctrl -c $conf stats > $state 153 if test $? -ne 0; then 154 echo "error retrieving data from the server" 155 rm -f $lock 156 exit 1 157 fi 158 echo "timestamp="`date +%s` >> $state 159 rm -f $lock 160} 161 162if test "$1" = "autoconf" ; then 163 if test ! -f $conf; then 164 echo no "($conf does not exist)" 165 exit 1 166 fi 167 if test ! -d `dirname $state`; then 168 mkdir -p `dirname $state` 169 if test ! -d `dirname $state`; then 170 echo no "($state directory does not exist)" 171 exit 1 172 fi 173 fi 174 echo yes 175 exit 0 176fi 177 178if test "$1" = "suggest" ; then 179 echo "hits" 180 echo "memory" 181 echo "by_type" 182 echo "by_class" 183 echo "by_opcode" 184 echo "by_rcode" 185 echo "zones" 186 exit 0 187fi 188 189# determine my type, by name 190id=`echo $0 | sed -e 's/^.*nsd_munin_//'` 191if test "$id"x = ""x; then 192 # some default to keep people sane. 193 id="hits" 194fi 195 196# if $1 exists in statefile, config is echoed with label $2 197exist_config ( ) { 198 mn=`echo $1 | sed $ABBREV | tr . _` 199 if grep '^'$1'=' $state >/dev/null 2>&1; then 200 echo "$mn.label $2" 201 echo "$mn.min 0" 202 echo "$mn.type ABSOLUTE" 203 fi 204} 205 206# print label and min 0 for a name $1 in nsd format 207p_config ( ) { 208 mn=`echo $1 | sed $ABBREV | tr . _` 209 echo $mn.label "$2" 210 echo $mn.min 0 211 echo $mn.type $3 212} 213 214if test "$1" = "config" ; then 215 if test ! -f $state; then 216 get_state 217 fi 218 case $id in 219 hits) 220 echo "graph_title NSD traffic" 221 echo "graph_args --base 1000 -l 0" 222 echo "graph_vlabel queries / \${graph_period}" 223 echo "graph_scale no" 224 echo "graph_category DNS" 225 for x in server0.queries server1.queries server2.queries \ 226 server3.queries server4.queries server5.queries \ 227 server6.queries server7.queries server8.queries \ 228 server9.queries server10.queries server11.queries \ 229 server12.queries server13.queries server14.queries \ 230 server15.queries ; do 231 exist_config $x "queries handled by `basename $x .queries`" 232 done 233 p_config "num.queries" "total queries" "ABSOLUTE" 234 p_config "num.udp" "UDP ip4 queries" "ABSOLUTE" 235 p_config "num.udp6" "UDP ip6 queries" "ABSOLUTE" 236 p_config "num.tcp" "TCP ip4 queries" "ABSOLUTE" 237 p_config "num.tcp6" "TCP ip6 queries" "ABSOLUTE" 238 p_config "num.edns" "queries with EDNS OPT" "ABSOLUTE" 239 p_config "num.ednserr" "queries failed EDNS parse" "ABSOLUTE" 240 p_config "num.answer_wo_aa" "nonauthor. queries (referrals)" "ABSOLUTE" 241 p_config "num.rxerr" "receive failed" "ABSOLUTE" 242 p_config "num.txerr" "transmit failed" "ABSOLUTE" 243 p_config "num.truncated" "truncated replies with TC" "ABSOLUTE" 244 p_config "num.raxfr" "AXFR from allowed client" "ABSOLUTE" 245 p_config "num.rixfr" "IXFR from allowed client" "ABSOLUTE" 246 p_config "num.dropped" "dropped due to sanity check" "ABSOLUTE" 247 echo "graph_info DNS queries." 248 ;; 249 memory) 250 echo "graph_title NSD memory usage" 251 echo "graph_args --base 1024 -l 0" 252 echo "graph_vlabel memory used in bytes" 253 echo "graph_category DNS" 254 p_config "size.vsz" "Total virtual memory (VSZ)" "GAUGE" 255 p_config "size.rss" "Total resident memory (RSS)" "GAUGE" 256 p_config "size.db.mem" "data in memory" "GAUGE" 257 p_config "size.xfrd.mem" "xfr and notify memory" "GAUGE" 258 p_config "size.config.mem" "config memory" "GAUGE" 259 p_config "size.db.disk" "mmap of nsd.db file" "GAUGE" 260 p_config "size.config.disk" "config zonelist on disk" "GAUGE" 261 echo "graph_info The memory used by NSD, xfrd and config. Disk size of nsd.db and zonelist." 262 ;; 263 by_type) 264 echo "graph_title NSD queries by type" 265 echo "graph_args --base 1000 -l 0" 266 echo "graph_vlabel queries / \${graph_period}" 267 echo "graph_scale no" 268 echo "graph_category DNS" 269 for x in `grep "^num.type" $state`; do 270 nm=`echo $x | sed -e 's/=.*$//'` 271 tp=`echo $nm | sed -e s/num.type.//` 272 p_config "$nm" "$tp" "ABSOLUTE" 273 done 274 echo "graph_info queries by DNS RR type queried for" 275 ;; 276 by_class) 277 echo "graph_title NSD queries by class" 278 echo "graph_args --base 1000 -l 0" 279 echo "graph_vlabel queries / \${graph_period}" 280 echo "graph_scale no" 281 echo "graph_category DNS" 282 for x in `grep "^num.class" $state`; do 283 nm=`echo $x | sed -e 's/=.*$//'` 284 tp=`echo $nm | sed -e s/num.class.//` 285 p_config "$nm" "$tp" "ABSOLUTE" 286 done 287 echo "graph_info queries by DNS RR class queried for." 288 ;; 289 by_opcode) 290 echo "graph_title NSD queries by opcode" 291 echo "graph_args --base 1000 -l 0" 292 echo "graph_vlabel queries / \${graph_period}" 293 echo "graph_scale no" 294 echo "graph_category DNS" 295 for x in `grep "^num.opcode" $state`; do 296 nm=`echo $x | sed -e 's/=.*$//'` 297 tp=`echo $nm | sed -e s/num.opcode.//` 298 p_config "$nm" "$tp" "ABSOLUTE" 299 done 300 echo "graph_info queries by opcode in the query packet." 301 ;; 302 by_rcode) 303 echo "graph_title NSD answers by return code" 304 echo "graph_args --base 1000 -l 0" 305 echo "graph_vlabel answer packets / \${graph_period}" 306 echo "graph_scale no" 307 echo "graph_category DNS" 308 for x in `grep "^num.rcode" $state`; do 309 nm=`echo $x | sed -e 's/=.*$//'` 310 tp=`echo $nm | sed -e s/num.rcode.//` 311 p_config "$nm" "$tp" "ABSOLUTE" 312 done 313 echo "graph_info answers split out by return value." 314 ;; 315 zones) 316 echo "graph_title NSD number of zones" 317 echo "graph_args --base 1000 -l 0" 318 echo "graph_vlabel zone count" 319 echo "graph_category DNS" 320 p_config "zone.total" "total zones" "GAUGE" 321 p_config "zone.master" "master zones" "GAUGE" 322 p_config "zone.slave" "slave zones" "GAUGE" 323 echo "graph_info number of zones served by NSD." 324 ;; 325 esac 326 327 exit 0 328fi 329 330# do the stats itself 331get_state 332 333# get the time elapsed 334get_value "time.elapsed" 335if test $value = 0 || test $value = "0.000000"; then 336 echo "error: time elapsed 0 or could not retrieve data" 337 exit 1 338fi 339elapsed="$value" 340 341# print value for $1 342print_value ( ) { 343 mn=`echo $1 | sed $ABBREV | tr . _` 344 get_value $1 345 echo "$mn.value" $value 346} 347 348# print value if line already found in $2 349print_value_line ( ) { 350 mn=`echo $1 | sed $ABBREV | tr . _` 351 value="`echo $2 | sed -e 's/^.*=//'`" 352 echo "$mn.value" $value 353} 354 355 356case $id in 357hits) 358 for x in server0.queries server1.queries server2.queries \ 359 server3.queries server4.queries server5.queries \ 360 server6.queries server7.queries server8.queries \ 361 server9.queries server10.queries server11.queries \ 362 server12.queries server13.queries server14.queries \ 363 server15.queries \ 364 num.queries num.udp num.udp6 num.tcp num.tcp6 \ 365 num.edns num.ednserr num.answer_wo_aa num.rxerr num.txerr \ 366 num.truncated num.raxfr num.rixfr num.dropped ; do 367 if grep "^"$x"=" $state >/dev/null 2>&1; then 368 print_value $x 369 fi 370 done 371 ;; 372memory) 373 # get the total memory for NSD 374 serverpid=`$ctrl -c $conf serverpid 2>&1` 375 # small race condition, if reload happens between previous and next 376 # lines, if so, detect by checking if we have a number as output. 377 rssval=`ps -p $serverpid -o rss= 2>&1` 378 vszval=`ps -p $serverpid -o vsz= 2>&1` 379 if test "`expr $rssval + 1 - 1 2>&1`" -eq "$rssval" >/dev/null 2>&1; then 380 rssval=`expr $rssval \* 1024` 381 else 382 rssval=0 383 fi 384 if test "`expr $vszval + 1 - 1 2>&1`" -eq "$vszval" >/dev/null 2>&1; then 385 vszval=`expr $vszval \* 1024` 386 else 387 vszval=0 388 fi 389 echo "size_vsz.value" $vszval 390 echo "size_rss.value" $rssval 391 for x in size.db.mem size.xfrd.mem size.config.mem \ 392 size.db.disk size.config.disk; do 393 print_value $x 394 done 395 ;; 396by_type) 397 for x in `grep "^num.type" $state`; do 398 nm=`echo $x | sed -e 's/=.*$//'` 399 print_value_line $nm $x 400 done 401 ;; 402by_class) 403 for x in `grep "^num.class" $state`; do 404 nm=`echo $x | sed -e 's/=.*$//'` 405 print_value_line $nm $x 406 done 407 ;; 408by_opcode) 409 for x in `grep "^num.opcode" $state`; do 410 nm=`echo $x | sed -e 's/=.*$//'` 411 print_value_line $nm $x 412 done 413 ;; 414by_rcode) 415 for x in `grep "^num.rcode" $state`; do 416 nm=`echo $x | sed -e 's/=.*$//'` 417 print_value_line $nm $x 418 done 419 ;; 420zones) 421 get_value "zone.master" 422 nummas="$value" 423 get_value "zone.slave" 424 numsla="$value" 425 echo "zone_total.value" `expr $nummas + $numsla` 426 echo "zone_master.value" "$nummas" 427 echo "zone_slave.value" "$numsla" 428esac 429