xref: /spdk/scripts/perf/pm/collect-bmc-pm (revision a6dbe3721eb3b5990707fc3e378c95e505dd8ab5)
1#!/usr/bin/env bash
2set -e
3
4hex() { printf '0x%02x\n' "$@"; }
5
6is_root() {
7	# Talking to local BMC device requires root privileges
8	if ((UID)); then
9		printf '%s, you need to be root to run this script\n' "$USER" >&2
10		return 1
11	fi
12
13}
14
15is_ipmitool() {
16	if ! type -P ipmitool; then
17		printf 'ipmitool not detected, cannot run commands against the BMC\n' >&2
18		return 1
19	fi
20}
21
22ipmi_load() {
23	# Silently attempt to load core ipmi drivers - we will pick up the device later on.
24	modprobe -qa ipmi_si ipmi_devintf ipmi_msghandler || return 0
25}
26
27ipmi_supported() {
28	# Verify if kernel detected and registered at least one BMC under
29	# the ipmi platform. Look for KCS specifically as this the type
30	# of the interface the script was tested against.
31
32	local ipmi=/sys/class/ipmi/ipmi0
33
34	# Keep these details global for easy access if needed.
35	local -g man_id prod_id dev_id ipmi_ver platform board ipmitool
36
37	ipmi_load
38
39	if [[ ! -e $ipmi ]]; then
40		printf 'BMC not detected. Please, make sure your platform is IPMI-compatible\n'
41		return 1
42	fi >&2
43
44	type=$(< "$ipmi/device/type")
45
46	if [[ $type != kcs ]]; then
47		printf 'No supported BMC interface detected (%s) - only KCS is supported\n' "$type"
48		return 1
49	fi >&2
50
51	man_id=$(< "$ipmi/device/bmc/manufacturer_id")
52	prod_id=$(< "$ipmi/device/bmc/product_id")
53	dev_id=$(hex "$(< "$ipmi/device/bmc/device_id")")
54	ipmi_ver=$(< "$ipmi/device/bmc/ipmi_version")
55
56	if [[ -e /sys/class/dmi/id/board_vendor ]]; then
57		platform=$(< /sys/class/dmi/id/board_vendor)
58	fi
59
60	if [[ -e /sys/class/dmi/id/board_name ]]; then
61		board=$(< /sys/class/dmi/id/board_name)
62	fi
63
64	# Keep output similar to ipmi_si's
65	cat <<- BMC_DEV >&2
66
67		BMC detected, details below:
68		Manufacturer ID: $man_id
69		Product ID: $prod_id
70		Device ID: $dev_id
71		IPMI Version: $ipmi_ver
72		Platform: ${platform:-unknown}
73		Board: ${board:-unknown}
74
75	BMC_DEV
76
77	# Verify if we have proper tools to work with
78	ipmitool=$(is_ipmitool)
79}
80
81ipmiraw() {
82	# For the majority of commands we use raw payload to not depend on specific ipmitool version
83	# and the way how it interprets/parses the returned data. This also allows us to inspect the
84	# integrity of data more closely to make sure we don't report nonsensical values to the user.
85
86	local rsp
87
88	rsp=($("$ipmitool" raw "$@" 2> /dev/null))
89	# Slap hex prefix to work with proper base
90	rsp=("${rsp[@]/#/0x}")
91
92	hex "${rsp[@]}"
93}
94
95dcmiraw() {
96	local cmd=$1 data=("${@:2}")
97
98	ipmiraw 0x2c "$cmd" 0xdc "${data[@]}"
99}
100
101print_dcmi_available_time_periods() {
102	local time_periods=${enhanced_power_attr[4]}
103	local -g available_time_periods=()
104	local -g available_time_periods_in_seconds=()
105
106	available_time_periods[0]="NOW"
107
108	if ((time_periods > 0)); then
109		local time_idx=5
110		local offset=$time_idx
111		local units unit time time_s units_mask=0xc0 to_sec
112
113		units[0x0]=seconds
114		units[0x1]=minutes
115		units[0x2]=hours
116		units[0x3]=days
117
118		to_sec[0x0]=1
119		to_sec[0x1]=60
120		to_sec[0x2]=3600
121		to_sec[0x3]=86400
122
123		while ((offset < time_idx + time_periods)); do
124			time=$((enhanced_power_attr[offset] & ~units_mask))
125			unit=${units[enhanced_power_attr[offset] >> 6]:-unknown}
126			time_s=$((time * to_sec[enhanced_power_attr[offset] >> 6]))
127			if ((time != 0)); then
128				available_time_periods[offset]="$time $unit"
129				available_time_periods_in_seconds[time_s]=${enhanced_power_attr[offset]}
130			fi
131			((++offset))
132		done
133	fi
134	cat <<- TIME_PERIODS >&2
135
136		Available averaging time periods to request:
137		$(printf '  - %s\n' "${available_time_periods[@]}")
138
139	TIME_PERIODS
140}
141
142dcmi_power_support() {
143	# Verify if the BMC conforms to the DCMI spec
144	local rsp
145
146	# Table 6-2, Get DCMI Capabilities Command Format
147	if ! rsp=($(dcmiraw 0x1 0x1)); then
148		printf 'Cannot determine if BMC supports DCMI Power Management capability\n' >&2
149		return 1
150	fi
151
152	# Table 6-3, DCMI Capabilities Parameters:
153	#  - Supported DCMI Capabilities:
154	#    - Byte 2 Platform capabilities: [0] Power management
155	if ((!(rsp[5] & (1 << 0)))); then
156		printf 'BMC does not provide DCMI Power Mangament capability\n' >&2
157		return 1
158	fi
159
160	# Check if BMC provides Enhanced System Power Statistics attributes - this allows to issue
161	# requests for power readings at averaging time period, .e.g. from last 5 seconds, 30 minutes,
162	# 1 hour and so on. With this we can provide more detailed view on power usage within a
163	# specific period of time. Without it, we need to depend only on current reading that should
164	# be always available (the "NOW" reading).
165
166	local -g enhanced_power_attr=()
167
168	# Table 6-3, DCMI Capabilities Parameters:
169	#  - Enhanced System Power Statistics attributes
170	if enhanced_power_attr=($(dcmiraw 0x1 0x5)); then
171		print_dcmi_available_time_periods
172	fi
173
174	printf 'Using DCMI Power Management\n' >&2
175}
176
177sdr_power_support() {
178	# This is a fallback which only some platforms may provide (confirmed PowerEdge and CYP).
179	# We are looking for a full, threshold sensor which reports overall power usage in Watts.
180	# Different BMCs may have SDRs which describe such sensor(s) differently so this is not
181	# 100% reliable. To make sure we pick up a proper sensor we also narrow it down to a
182	# specific entity (System Board or Power Supply). Readings from the sensor should be
183	# considered as "NOW" readings (without access to min, max readings).
184
185	local -g power_sensors=()
186	local sensor entity unit status
187
188	# Cache SDR to speed up sensor readings
189	if [[ ! -f $sdr_cache ]]; then
190		printf 'Saving SDR cache at %s\n' "$sdr_cache" >&2
191		"$ipmitool" sdr dump "$sdr_cache" > /dev/null
192	fi
193
194	if ((${#extra_power_sensors[@]} > 0)); then
195		power_sensors+=("${extra_power_sensors[@]}")
196	fi
197
198	while IFS="," read -r sensor _ unit status _ entity _; do
199		[[ $unit == Watts && $status == ok ]] || continue
200		[[ $entity == "System Board" || $entity == "Power Supply" ]] || continue
201		power_sensors+=("$sensor")
202	done < <("$ipmitool" -S "$sdr_cache" -vc sdr list full 2>&1)
203
204	if ((${#power_sensors[@]} > 0)); then
205		printf 'Using SDR (Power sensors: %s)\n' "${power_sensors[*]}"
206	else
207		printf 'Cannot locate power sensors\n'
208		return 1
209	fi >&2
210}
211
212power_support() {
213	local -g support
214
215	if [[ $interface == dcmi || $interface == sdr ]]; then
216		# override
217		"${interface}_power_support"
218		support=$interface
219	elif dcmi_power_support; then
220		support=dcmi
221	elif sdr_power_support; then
222		support=sdr
223	else
224		printf 'BMC does not provide Power Management support, cannot gather power measurements\n' >&2
225		return 1
226	fi
227}
228
229get_dcmi_now_reading() {
230	local rsp reading=0 max min avg ts timeframe mode=01h
231	local get_cmd get_avg=0 print
232
233	# Table 6-16, Get Power Reading Command:
234	get_cmd=(0x2 0x1 0x0 0x0)
235
236	if [[ -n ${available_time_periods_in_seconds[interval]} ]]; then
237		get_cmd=(0x2 0x2 "${available_time_periods_in_seconds[interval]}" 0x0)
238		get_avg=1
239		mode=02h
240	fi
241
242	# We use System Power Statistics mode to get the "NOW" reading by default. In case
243	# interval matches one supported by Enhanced System Power Statistics we use that
244	# mode to obtain extra min, max, avg statistics.
245
246	if ! rsp=($(dcmiraw "${get_cmd[@]}")); then
247		printf 'DCMI reading: error\n'
248	else
249		# Note that the BMC timestamp depends on the hwclock setup which we then attempt
250		# to represent in UTC.
251		ts=$((rsp[12] << 24 | rsp[11] << 16 | rsp[10] << 8 | rsp[9]))
252		# This is interpreted differently by different BMCs so for now we make a note of
253		# it but don't present it to the user.
254		timeframe=$((rsp[16] << 24 | rsp[15] << 16 | rsp[14] << 8 | rsp[13]))
255		reading=$((rsp[2] << 8 | rsp[1]))
256		if ((get_avg == 1)); then
257			min=$((rsp[4] << 8 | rsp[3]))
258			max=$((rsp[6] << 8 | rsp[5]))
259			avg=$((rsp[8] << 8 | rsp[7]))
260			_DCMI_min+=("$min")
261			_DCMI_max+=("$max")
262			_DCMI_avg+=("$avg")
263			power_readings["DCMI_MIN"]="_DCMI_min[@]"
264			power_readings["DCMI_MAX"]="_DCMI_max[@]"
265			power_readings["DCMI_AVG"]="_DCMI_avg[@]"
266		fi
267		_DCMI+=("$reading")
268		power_readings["DCMI"]="_DCMI[@]"
269
270		for print in min max avg reading; do
271			[[ -n ${!print} ]] || continue
272			printf '(%s) DCMI %s (mode: %s): %u Watts (interval: %us)\n' \
273				"$(utc "$ts")" \
274				"$print" \
275				"$mode" \
276				"${!print}" \
277				"$interval"
278		done
279	fi >&2
280}
281
282get_sdr_now_reading() {
283	local sensor reading=0 ts unit
284
285	if ((${#power_sensors[@]} == 0)); then
286		printf 'No power sensors were provided\n' >&2
287		return 1
288	fi
289
290	for sensor in "${!power_sensors[@]}"; do
291		ts=$(utc)
292		if ! IFS="," read -r _ reading unit _; then
293			reading=error
294		else
295			eval "_sensor${sensor}_readings+=($reading)"
296			power_readings["${power_sensors[sensor]}"]="_sensor${sensor}_readings[@]"
297			reading+=" $unit"
298		fi < <("$ipmitool" -c -S "$sdr_cache" sdr get "${power_sensors[sensor]}") 2> /dev/null
299		printf '(%s) Sensor %s reading: %s (interval %us)\n' \
300			"$ts" \
301			"${power_sensors[sensor]}" \
302			"$reading" \
303			"$interval" >&2
304	done
305}
306
307get_now_reading() {
308	case "$support" in
309		dcmi) get_dcmi_now_reading ;;
310		sdr) get_sdr_now_reading ;;
311		*) ;;
312	esac
313}
314
315dump_readings() {
316	local sensor reading readings avg total
317
318	((${#power_readings[@]} > 0)) || return 1
319	printf 'Dumping average sensors reading from %s\n' "${!power_readings[*]}" >&2
320
321	for sensor in "${!power_readings[@]}"; do
322		readings=("${!power_readings["$sensor"]}")
323		if ((${#readings[@]} == 0)); then
324			printf 'No readings available for %s sensor\n' "$sensor" >&2
325			continue
326		fi
327		total=0
328		for reading in "${readings[@]}"; do
329			((total += ${reading%.*}))
330		done
331		avg=$((total / ${#readings[@]}))
332		printf '%u\n' "$avg" > "$output_dir/${prefix:+${prefix}_}avg_${sensor}.bmc.pm.txt"
333		printf '%u\n' "${readings[@]}" > "$output_dir/${prefix:+${prefix}_}all_${sensor}.bmc.pm.txt"
334		printf 'Dumped avg to %s\n' "$output_dir/${prefix:+${prefix}_}avg_${sensor}.bmc.pm.txt" >&2
335		printf 'Dumped all to %s\n' "$output_dir/${prefix:+${prefix}_}all_${sensor}.bmc.pm.txt" >&2
336	done
337}
338
339utc() {
340	date --utc ${1:+-"d@$1"}
341}
342
343cleanup() {
344	[[ -f $sdr_cache && $remove_sdr_cache == yes ]] && rm "$sdr_cache"
345	dump_readings
346}
347
348collect_readings() {
349	local _count=$count
350	while ((count <= 0 ? 1 : _count--)); do
351		get_now_reading
352		sleep "${interval}s"
353	done
354}
355
356help() {
357	cat <<- HELP
358
359		Usage: $0 [-h] [-d dir] [-i sdr|dcmi] [-s SENSOR_NAME] [-t interval] [-l log_file] [-p prefix] [-c count]
360
361		  -h - Print this message.
362		  -d - Directory where the results should be saved. Default is /tmp.
363		  -i - Type of interface to use for requesting power usage. "sdr" or "dcmi".
364		       If not set, available interface is used ("dcmi" has priority).
365		  -t - How long to wait before each get power command in seconds. In case
366		       this value matches one of supported averaging time periods special
367		       variant of the command will be used to obtain the reading - this
368		       variant is used only with the "dcmi" interface. Default is 1s.
369		  -s - In case "sdr" interface is in use, try to read data from SENSOR_NAME.
370		  -x - In case "sdr" interface is in use, don't remove SDR cache. This can
371		       speed up subsequent runs of the script.
372		  -l - Save output of the script to a log file (dir/${0##*/}.bmc.pm.log).
373		  -p - Add prefix to saved files.
374		  -c - Read power usage count times. 0 is the default and it means to run
375		       indefinitely.
376
377		When started, ${0##*/} will enter loop to continuously read power usage from either
378		DCMI interface or dedicated Watts sensors every interval. Each reading will be
379		logged to stderr. Upon termination, average power usage will be dumped to /tmp or
380		directory set by -d.
381
382	HELP
383}
384
385is_root
386
387output_dir=/tmp
388interval=1
389remove_sdr_cache=yes
390log_to_file=no
391prefix=""
392count=0
393
394declare -A power_readings=()
395declare -a extra_power_sensors=()
396
397while getopts :hi:s:d:t:xlp:c: arg; do
398	case "$arg" in
399		h)
400			help
401			exit 0
402			;;
403		d) output_dir=$OPTARG ;;
404		s) extra_power_sensors+=("$OPTARG") ;;
405		i) interface=${OPTARG,,} ;;
406		t) interval=$OPTARG ;;
407		x) remove_sdr_cache=no ;;
408		l) log_to_file=yes ;;
409		p) prefix=$OPTARG ;;
410		c) count=$OPTARG ;;
411		*) ;;
412	esac
413done
414
415declare -r sdr_cache=$output_dir/sdr.cache
416declare -r log_file=${prefix:+${prefix}_}${0##*/}.bmc.pm.log
417
418mkdir -p "$output_dir"
419if [[ $log_to_file == yes ]]; then
420	printf 'Redirecting to %s\n' "$output_dir/$log_file" >&2
421	exec > "$output_dir/$log_file" 2>&1
422fi
423
424trap 'cleanup' EXIT
425
426ipmi_supported
427power_support
428
429collect_readings
430