1#!/bin/sh 2# 3# plugin for munin to monitor usage of NSD. 4# 5# (C) 2008 W.C.A. Wijngaards. BSD Licensed. 6# 7# To install; compile with --enable-bind8-stats (enabled by default) 8# and enable nsd-control in nsd.conf with the line 9# remote-control: control-enable: yes 10# Run the command nsd-control-setup as root to generate the key files. 11# 12# Environment variables for this script 13# statefile - where to put temporary statefile. 14# nsd_conf - where the nsd.conf file is located. 15# nsd_control - where to find nsd-control executable. 16# nsd_checkconf - where to find nsd-checkconf executable. 17# 18# You can set them in your munin/plugin-conf.d/plugins.conf file 19# with: 20# [nsd_munin*] 21# user root 22# env.statefile /usr/local/var/munin/plugin-state/nsd-state 23# env.nsd_conf /usr/local/etc/nsd.conf 24# env.nsd_control /usr/local/sbin/nsd-control 25# env.nsd_checkconf /usr/local/sbin/nsd-checkconf 26# 27# This plugin can create different graphs depending on what name 28# you link it as (with ln -s) into the plugins directory 29# You can link it multiple times. 30# If you are only a casual user, the _hits and _by_type are most interesting, 31# possibly followed by _by_rcode. 32# 33# nsd_munin_hits - base volume, transport type, failures 34# nsd_munin_memory - memory usage 35# nsd_munin_by_type - incoming queries by type 36# nsd_munin_by_class - incoming queries by class 37# nsd_munin_by_opcode - incoming queries by opcode 38# nsd_munin_by_rcode - answers by rcode 39# nsd_munin_zones - number of zones 40# 41# Magic markers - optional - used by installation scripts and 42# munin-config: 43# 44#%# family=contrib 45#%# capabilities=autoconf suggest 46 47# POD documentation 48: <<=cut 49=head1 NAME 50 51nsd_munin_ - Munin plugin to monitor the NSD server. 52 53=head1 APPLICABLE SYSTEMS 54 55System with NSD daemon. 56 57=head1 CONFIGURATION 58 59 [nsd_munin*] 60 user root 61 env.statefile /usr/local/var/munin/plugin-state/nsd-state 62 env.nsd_conf /usr/local/etc/nsd.conf 63 env.nsd_control /usr/local/sbin/nsd-control 64 env.nsd_checkconf /usr/local/sbin/nsd-checkconf 65 66Use the .env settings to override the defaults. 67 68=head1 USAGE 69 70Can be used to present different graphs. Use ln -s for that name in 71the plugins directory to enable the graph. 72nsd_munin_hits - base volume, transport type, failures 73nsd_munin_memory - memory usage 74nsd_munin_by_type - incoming queries by type 75nsd_munin_by_class - incoming queries by class 76nsd_munin_by_opcode - incoming queries by opcode 77nsd_munin_by_rcode - answers by rcode 78nsd_munin_zones - number of zones 79 80=head1 AUTHOR 81 82Copyright 2008 W.C.A. Wijngaards 83 84=head1 LICENSE 85 86BSD 87 88=cut 89 90state=${statefile:-/usr/local/var/munin/plugin-state/nsd-state} 91conf=${nsd_conf:-/usr/local/etc/nsd.conf} 92ctrl=${nsd_control:-/usr/local/sbin/nsd-control} 93chkconf=${nsd_checkconf:-/usr/local/sbin/nsd-checkconf} 94lock=$state.lock 95 96# number of seconds between polling attempts. 97# makes the statefile hang around for at least this many seconds, 98# so that multiple links of this script can share the results. 99lee=55 100 101# to keep things within 19 characters 102ABBREV="-e s/num/n/ -e s/type/t/ -e s/opcode/o/ -e s/rcode/r/ -e s/class/c/" 103 104# get value from $1 into return variable $value 105get_value ( ) { 106 value="`grep '^'$1'=' $state | sed -e 's/^.*=//'`" 107 if test "$value"x = ""x; then 108 value="0" 109 fi 110} 111 112# download the state from NSD. 113get_state ( ) { 114 # obtain lock for fetching the state 115 # because there is a race condition in fetching and writing to file 116 117 # see if the lock is stale, if so, take it 118 if test -f $lock ; then 119 pid="`cat $lock 2>&1`" 120 kill -0 "$pid" >/dev/null 2>&1 121 if test $? -ne 0 -a "$pid" != $$ ; then 122 echo $$ >$lock 123 fi 124 fi 125 126 i=0 127 while test ! -f $lock || test "`cat $lock 2>&1`" != $$; do 128 while test -f $lock; do 129 # wait 130 i=`expr $i + 1` 131 if test $i -gt 1000; then 132 sleep 1; 133 fi 134 if test $i -gt 1500; then 135 echo "error locking $lock" "=" `cat $lock` 136 rm -f $lock 137 exit 1 138 fi 139 done 140 # try to get it 141 echo $$ >$lock 142 done 143 # do not refetch if the file exists and only LEE seconds old 144 if test -f $state; then 145 now=`date +%s` 146 get_value "timestamp" 147 if test $now -lt `expr $value + $lee`; then 148 rm -f $lock 149 return 150 fi 151 fi 152 $ctrl -c $conf stats > $state 153 if test $? -ne 0; then 154 echo "error retrieving data from the server" 155 rm -f $lock 156 exit 1 157 fi 158 echo "timestamp="`date +%s` >> $state 159 rm -f $lock 160} 161 162if test "$1" = "autoconf" ; then 163 if test ! -f $conf; then 164 echo no "($conf does not exist)" 165 exit 1 166 fi 167 if test ! -d `dirname $state`; then 168 mkdir -p `dirname $state` 169 if test ! -d `dirname $state`; then 170 echo no "($state directory does not exist)" 171 exit 1 172 fi 173 fi 174 echo yes 175 exit 0 176fi 177 178if test "$1" = "suggest" ; then 179 echo "hits" 180 echo "memory" 181 echo "by_type" 182 echo "by_class" 183 echo "by_opcode" 184 echo "by_rcode" 185 echo "zones" 186 exit 0 187fi 188 189# determine my type, by name 190id=`echo $0 | sed -e 's/^.*nsd_munin_//'` 191if test "$id"x = ""x; then 192 # some default to keep people sane. 193 id="hits" 194fi 195 196# if $1 exists in statefile, config is echoed with label $2 197exist_config ( ) { 198 mn=`echo $1 | sed $ABBREV | tr . _` 199 if grep '^'$1'=' $state >/dev/null 2>&1; then 200 echo "$mn.label $2" 201 echo "$mn.min 0" 202 echo "$mn.type ABSOLUTE" 203 fi 204} 205 206# print label and min 0 for a name $1 in nsd format 207p_config ( ) { 208 mn=`echo $1 | sed $ABBREV | tr . _` 209 echo $mn.label "$2" 210 echo $mn.min 0 211 echo $mn.type $3 212} 213 214if test "$1" = "config" ; then 215 if test ! -f $state; then 216 get_state 217 fi 218 case $id in 219 hits) 220 echo "graph_title NSD traffic" 221 echo "graph_args --base 1000 -l 0" 222 echo "graph_vlabel queries / \${graph_period}" 223 echo "graph_scale no" 224 echo "graph_category DNS" 225 for x in server0.queries server1.queries server2.queries \ 226 server3.queries server4.queries server5.queries \ 227 server6.queries server7.queries server8.queries \ 228 server9.queries server10.queries server11.queries \ 229 server12.queries server13.queries server14.queries \ 230 server15.queries ; do 231 exist_config $x "queries handled by `basename $x .queries`" 232 done 233 p_config "num.queries" "total queries" "ABSOLUTE" 234 p_config "num.udp" "UDP ip4 queries" "ABSOLUTE" 235 p_config "num.udp6" "UDP ip6 queries" "ABSOLUTE" 236 p_config "num.tcp" "TCP ip4 queries" "ABSOLUTE" 237 p_config "num.tcp6" "TCP ip6 queries" "ABSOLUTE" 238 p_config "num.edns" "queries with EDNS OPT" "ABSOLUTE" 239 p_config "num.ednserr" "queries failed EDNS parse" "ABSOLUTE" 240 p_config "num.answer_wo_aa" "nonauthor. queries (referrals)" "ABSOLUTE" 241 p_config "num.rxerr" "receive failed" "ABSOLUTE" 242 p_config "num.txerr" "transmit failed" "ABSOLUTE" 243 p_config "num.truncated" "truncated replies with TC" "ABSOLUTE" 244 p_config "num.raxfr" "AXFR from allowed client" "ABSOLUTE" 245 p_config "num.dropped" "dropped due to sanity check" "ABSOLUTE" 246 echo "graph_info DNS queries." 247 ;; 248 memory) 249 echo "graph_title NSD memory usage" 250 echo "graph_args --base 1024 -l 0" 251 echo "graph_vlabel memory used in bytes" 252 echo "graph_category DNS" 253 p_config "size.vsz" "Total virtual memory (VSZ)" "GAUGE" 254 p_config "size.rss" "Total resident memory (RSS)" "GAUGE" 255 p_config "size.db.mem" "data in memory" "GAUGE" 256 p_config "size.xfrd.mem" "xfr and notify memory" "GAUGE" 257 p_config "size.config.mem" "config memory" "GAUGE" 258 p_config "size.db.disk" "mmap of nsd.db file" "GAUGE" 259 p_config "size.config.disk" "config zonelist on disk" "GAUGE" 260 echo "graph_info The memory used by NSD, xfrd and config. Disk size of nsd.db and zonelist." 261 ;; 262 by_type) 263 echo "graph_title NSD queries by type" 264 echo "graph_args --base 1000 -l 0" 265 echo "graph_vlabel queries / \${graph_period}" 266 echo "graph_scale no" 267 echo "graph_category DNS" 268 for x in `grep "^num.type" $state`; do 269 nm=`echo $x | sed -e 's/=.*$//'` 270 tp=`echo $nm | sed -e s/num.type.//` 271 p_config "$nm" "$tp" "ABSOLUTE" 272 done 273 echo "graph_info queries by DNS RR type queried for" 274 ;; 275 by_class) 276 echo "graph_title NSD queries by class" 277 echo "graph_args --base 1000 -l 0" 278 echo "graph_vlabel queries / \${graph_period}" 279 echo "graph_scale no" 280 echo "graph_category DNS" 281 for x in `grep "^num.class" $state`; do 282 nm=`echo $x | sed -e 's/=.*$//'` 283 tp=`echo $nm | sed -e s/num.class.//` 284 p_config "$nm" "$tp" "ABSOLUTE" 285 done 286 echo "graph_info queries by DNS RR class queried for." 287 ;; 288 by_opcode) 289 echo "graph_title NSD queries by opcode" 290 echo "graph_args --base 1000 -l 0" 291 echo "graph_vlabel queries / \${graph_period}" 292 echo "graph_scale no" 293 echo "graph_category DNS" 294 for x in `grep "^num.opcode" $state`; do 295 nm=`echo $x | sed -e 's/=.*$//'` 296 tp=`echo $nm | sed -e s/num.opcode.//` 297 p_config "$nm" "$tp" "ABSOLUTE" 298 done 299 echo "graph_info queries by opcode in the query packet." 300 ;; 301 by_rcode) 302 echo "graph_title NSD answers by return code" 303 echo "graph_args --base 1000 -l 0" 304 echo "graph_vlabel answer packets / \${graph_period}" 305 echo "graph_scale no" 306 echo "graph_category DNS" 307 for x in `grep "^num.rcode" $state`; do 308 nm=`echo $x | sed -e 's/=.*$//'` 309 tp=`echo $nm | sed -e s/num.rcode.//` 310 p_config "$nm" "$tp" "ABSOLUTE" 311 done 312 echo "graph_info answers split out by return value." 313 ;; 314 zones) 315 echo "graph_title NSD number of zones" 316 echo "graph_args --base 1000 -l 0" 317 echo "graph_vlabel zone count" 318 echo "graph_category DNS" 319 p_config "zone.total" "total zones" "GAUGE" 320 p_config "zone.master" "master zones" "GAUGE" 321 p_config "zone.slave" "slave zones" "GAUGE" 322 echo "graph_info number of zones served by NSD." 323 ;; 324 esac 325 326 exit 0 327fi 328 329# do the stats itself 330get_state 331 332# get the time elapsed 333get_value "time.elapsed" 334if test $value = 0 || test $value = "0.000000"; then 335 echo "error: time elapsed 0 or could not retrieve data" 336 exit 1 337fi 338elapsed="$value" 339 340# print value for $1 341print_value ( ) { 342 mn=`echo $1 | sed $ABBREV | tr . _` 343 get_value $1 344 echo "$mn.value" $value 345} 346 347# print value if line already found in $2 348print_value_line ( ) { 349 mn=`echo $1 | sed $ABBREV | tr . _` 350 value="`echo $2 | sed -e 's/^.*=//'`" 351 echo "$mn.value" $value 352} 353 354 355case $id in 356hits) 357 for x in server0.queries server1.queries server2.queries \ 358 server3.queries server4.queries server5.queries \ 359 server6.queries server7.queries server8.queries \ 360 server9.queries server10.queries server11.queries \ 361 server12.queries server13.queries server14.queries \ 362 server15.queries \ 363 num.queries num.udp num.udp6 num.tcp num.tcp6 \ 364 num.edns num.ednserr num.answer_wo_aa num.rxerr num.txerr \ 365 num.truncated num.raxfr num.dropped ; do 366 if grep "^"$x"=" $state >/dev/null 2>&1; then 367 print_value $x 368 fi 369 done 370 ;; 371memory) 372 # get the total memory for NSD 373 serverpid=`$ctrl -c $conf serverpid 2>&1` 374 # small race condition, if reload happens between previous and next 375 # lines, if so, detect by checking if we have a number as output. 376 rssval=`ps -p $serverpid -o rss= 2>&1` 377 vszval=`ps -p $serverpid -o vsz= 2>&1` 378 if test "`expr $rssval + 1 - 1 2>&1`" -eq "$rssval" >/dev/null 2>&1; then 379 rssval=`expr $rssval \* 1024` 380 else 381 rssval=0 382 fi 383 if test "`expr $vszval + 1 - 1 2>&1`" -eq "$vszval" >/dev/null 2>&1; then 384 vszval=`expr $vszval \* 1024` 385 else 386 vszval=0 387 fi 388 echo "size_vsz.value" $vszval 389 echo "size_rss.value" $rssval 390 for x in size.db.mem size.xfrd.mem size.config.mem \ 391 size.db.disk size.config.disk; do 392 print_value $x 393 done 394 ;; 395by_type) 396 for x in `grep "^num.type" $state`; do 397 nm=`echo $x | sed -e 's/=.*$//'` 398 print_value_line $nm $x 399 done 400 ;; 401by_class) 402 for x in `grep "^num.class" $state`; do 403 nm=`echo $x | sed -e 's/=.*$//'` 404 print_value_line $nm $x 405 done 406 ;; 407by_opcode) 408 for x in `grep "^num.opcode" $state`; do 409 nm=`echo $x | sed -e 's/=.*$//'` 410 print_value_line $nm $x 411 done 412 ;; 413by_rcode) 414 for x in `grep "^num.rcode" $state`; do 415 nm=`echo $x | sed -e 's/=.*$//'` 416 print_value_line $nm $x 417 done 418 ;; 419zones) 420 get_value "zone.master" 421 nummas="$value" 422 get_value "zone.slave" 423 numsla="$value" 424 echo "zone_total.value" `expr $nummas + $numsla` 425 echo "zone_master.value" "$nummas" 426 echo "zone_slave.value" "$numsla" 427esac 428