xref: /spdk/scripts/perf/pm/collect-cpu-temp (revision cb645ed6daffd10a3e97cd96168cae5941cfc234)
118d0ffa5SMichal Berger#!/usr/bin/env bash
218d0ffa5SMichal Berger#  SPDX-License-Identifier: BSD-3-Clause
318d0ffa5SMichal Berger#  Copyright (C) 2023 Intel Corporation
418d0ffa5SMichal Berger#  All rights reserved.
518d0ffa5SMichal Berger
618d0ffa5SMichal Berger# Do similar thing to what turbostat.c does but in a more compact
718d0ffa5SMichal Berger# scope. We just report temp as per coretemp's hwmon entries for
818d0ffa5SMichal Berger# each core|package and check if there is any indication that
918d0ffa5SMichal Berger# throttling took place (per cpu thread).
1018d0ffa5SMichal Bergerset -e
1118d0ffa5SMichal Bergershopt -s extglob nullglob
1218d0ffa5SMichal Berger
1318d0ffa5SMichal Bergerpmdir=$(readlink -f "$(dirname "$0")")
1418d0ffa5SMichal Bergerrootdir=$(readlink -f "$pmdir/../../../")
1518d0ffa5SMichal Bergersource "$rootdir/test/scheduler/common.sh"
168d588fbcSMichal Bergersource "$pmdir/common"
1718d0ffa5SMichal Berger
1818d0ffa5SMichal Bergerto_celsius() { echo $(($1 / 1000)); }
1918d0ffa5SMichal Berger
2018d0ffa5SMichal Bergerreport_hwmon() {
2118d0ffa5SMichal Berger	local hwmon hw_name
2218d0ffa5SMichal Berger
2318d0ffa5SMichal Berger	for hwmon in /sys/class/hwmon/hwmon*; do
2418d0ffa5SMichal Berger		[[ -e $hwmon/name ]] || continue
2518d0ffa5SMichal Berger		hw_name=$(< "$hwmon/name")
2618d0ffa5SMichal Berger		case "$hw_name" in
2718d0ffa5SMichal Berger			coretemp) report_hwmon_coretemp "$hwmon" ;;
2818d0ffa5SMichal Berger			*) report_hwmon_generic "$hwmon" 2> /dev/null ;;
2918d0ffa5SMichal Berger		esac || continue
3018d0ffa5SMichal Berger	done
3118d0ffa5SMichal Berger}
3218d0ffa5SMichal Berger
3318d0ffa5SMichal Bergerreport_hwmon_generic() {
3418d0ffa5SMichal Berger	local hwmon=$1 temp ts
3518d0ffa5SMichal Berger	local label dev
3618d0ffa5SMichal Berger
3718d0ffa5SMichal Berger	ts=$(dater)
3818d0ffa5SMichal Berger
3918d0ffa5SMichal Berger	# We report just the bare minimum as each device may come with
4018d0ffa5SMichal Berger	# different set of labels, inputs, etc.
4118d0ffa5SMichal Berger	[[ -e $hwmon/temp1_input ]] || return 0
4218d0ffa5SMichal Berger
4318d0ffa5SMichal Berger	# These entries, especially the ones dedicated for nvme, may disappear during
4418d0ffa5SMichal Berger	# tests, so try to handle them gracefully.
4518d0ffa5SMichal Berger
4618d0ffa5SMichal Berger	temp=$(< "$hwmon/temp1_input") && label=$(< "$hwmon/name") || return 0
4718d0ffa5SMichal Berger
4818d0ffa5SMichal Berger	if [[ -e $hwmon/temp1_label ]]; then
4918d0ffa5SMichal Berger		label+=":$(< "$hwmon/temp1_label")"
5018d0ffa5SMichal Berger	fi
5118d0ffa5SMichal Berger
5218d0ffa5SMichal Berger	if [[ -e $hwmon/device ]] && dev=$(readlink -f "$hwmon/device"); then
5318d0ffa5SMichal Berger		label+=":${dev##*/}"
5418d0ffa5SMichal Berger	fi
5518d0ffa5SMichal Berger
568d588fbcSMichal Berger	printf '(%s) --- %s (%u C) (test:%s)\n' \
5718d0ffa5SMichal Berger		"$ts" \
5818d0ffa5SMichal Berger		"$label" \
598d588fbcSMichal Berger		"$(to_celsius "$temp")" \
608d588fbcSMichal Berger		"$TEST_TAG"
6118d0ffa5SMichal Berger	printf '%s\n' "---"
6218d0ffa5SMichal Berger}
6318d0ffa5SMichal Berger
6418d0ffa5SMichal Bergerreport_hwmon_coretemp() {
6518d0ffa5SMichal Berger	local hwmon=$1 temp ts
6618d0ffa5SMichal Berger	local core crit input id label max node package threads
6718d0ffa5SMichal Berger	local cores_input=() cores_crit=() cores_max=()
6818d0ffa5SMichal Berger
6918d0ffa5SMichal Berger	ts=$(dater)
7018d0ffa5SMichal Berger
7118d0ffa5SMichal Berger	for label in "$hwmon/"temp+([0-9])_label; do
7218d0ffa5SMichal Berger		temp=${label%_label}
7318d0ffa5SMichal Berger		label=$(< "$label")
7418d0ffa5SMichal Berger		input=$(< "${temp}_input")
7518d0ffa5SMichal Berger		crit=$(< "${temp}_crit")
7618d0ffa5SMichal Berger		max=$(< "${temp}_max")
7718d0ffa5SMichal Berger		id=${label##* }
7818d0ffa5SMichal Berger		case "$label" in
7918d0ffa5SMichal Berger			Core*)
8018d0ffa5SMichal Berger				cores_input[id]=$input
8118d0ffa5SMichal Berger				cores_crit[id]=$crit
8218d0ffa5SMichal Berger				cores_max[id]=$max
8318d0ffa5SMichal Berger				;;
8418d0ffa5SMichal Berger			Package*) node=$id package[node]=$input ;;
8518d0ffa5SMichal Berger		esac
8618d0ffa5SMichal Berger	done
8718d0ffa5SMichal Berger	# No package_id? Something is amiss
8818d0ffa5SMichal Berger	# FIXME: This is a cheeky assumption that each physical package (socket) N maps to
8918d0ffa5SMichal Berger	# a corresponding numa node N. For physical systems this may be usually the case
9018d0ffa5SMichal Berger	# but for quirky vNUMA setups not necessarily. For similar systems (don't have
9118d0ffa5SMichal Berger	# any at hand and it's a bit tricky to test thermal stuff under VMs) this probably
9218d0ffa5SMichal Berger	# would need to drop the lookup of the "Package" label and just check each thread's
9318d0ffa5SMichal Berger	# physical_package_id and/or numa assignment (cpu_node_map[@]).
9418d0ffa5SMichal Berger	[[ -n $node ]] || return 1
9518d0ffa5SMichal Berger
968d588fbcSMichal Berger	printf '(%s) --- Node%u (%u C) (test:%s)\n' \
9718d0ffa5SMichal Berger		"$ts" \
9818d0ffa5SMichal Berger		"$node" \
998d588fbcSMichal Berger		"$(to_celsius "${package[node]}")" \
1008d588fbcSMichal Berger		"$TEST_TAG"
10118d0ffa5SMichal Berger
10218d0ffa5SMichal Berger	for core in "${!cores_input[@]}"; do
10318d0ffa5SMichal Berger		threads=($(get_cpus "$node" "$core"))
10418d0ffa5SMichal Berger		printf '  (%s) Core%u (%s): %u C (crit: %u C, max: %u C)\n' \
10518d0ffa5SMichal Berger			"$ts" \
10618d0ffa5SMichal Berger			"$core" \
10718d0ffa5SMichal Berger			"${threads[*]}" \
10818d0ffa5SMichal Berger			"$(to_celsius "${cores_input[core]}")" \
10918d0ffa5SMichal Berger			"$(to_celsius "${cores_crit[core]}")" \
11018d0ffa5SMichal Berger			"$(to_celsius "${cores_max[core]}")"
11118d0ffa5SMichal Berger	done
11218d0ffa5SMichal Berger	printf '%s\n' "---"
11318d0ffa5SMichal Berger}
11418d0ffa5SMichal Berger
11518d0ffa5SMichal Bergerreport_throttling() {
11618d0ffa5SMichal Berger	# Quick check to see if MSRs report proper support - if this entry does not exist,
11718d0ffa5SMichal Berger	# then there's no point in looking up entire topology, support is simply not
11818d0ffa5SMichal Berger	# there.
11918d0ffa5SMichal Berger	[[ -e $sysfs_cpu/cpu0/thermal_throttle/core_throttle_count ]] || return 1
12018d0ffa5SMichal Berger
12118d0ffa5SMichal Berger	local cpu cpu_throttling=() cpu_throttling_time=() throttler=()
12218d0ffa5SMichal Berger	local throttle_count throttle_time
12318d0ffa5SMichal Berger	local node node_cpus=() node_throttled
12418d0ffa5SMichal Berger	local ts
12518d0ffa5SMichal Berger
12618d0ffa5SMichal Berger	throttler[0]="Normal Operation"
12718d0ffa5SMichal Berger	throttler[1]="Throttled"
12818d0ffa5SMichal Berger
12918d0ffa5SMichal Berger	ts=$(dater)
13018d0ffa5SMichal Berger	# Order the output similarly to hwmon, starting with a node|package
13118d0ffa5SMichal Berger	for node in "${nodes[@]}"; do
13218d0ffa5SMichal Berger		node_cpus=($(get_cpus "$node")) node_throttled=0 cpu_throttling=()
13318d0ffa5SMichal Berger		for cpu in "${node_cpus[@]}"; do
13418d0ffa5SMichal Berger			throttle_count=$(< "$sysfs_cpu/cpu$cpu/thermal_throttle/core_throttle_count")
13518d0ffa5SMichal Berger			throttle_time=$(< "$sysfs_cpu/cpu$cpu/thermal_throttle/core_throttle_total_time_ms")
13618d0ffa5SMichal Berger			cpu_throttling[cpu]=$throttle_count
13718d0ffa5SMichal Berger			cpu_throttling_time[cpu]=$((throttle_time / 1000))
13818d0ffa5SMichal Berger			if ((throttle_count > 0 || throttle_time > 0)); then
13918d0ffa5SMichal Berger				node_throttled=1
14018d0ffa5SMichal Berger			fi
14118d0ffa5SMichal Berger		done
14218d0ffa5SMichal Berger
14318d0ffa5SMichal Berger		((node_throttled == 1 || debug == 1)) || continue
14418d0ffa5SMichal Berger
14518d0ffa5SMichal Berger		printf '(%s) ###### Throttling Node%u Status: %s ######\n' \
14618d0ffa5SMichal Berger			"$ts" "$node" "${throttler[node_throttled]}"
14718d0ffa5SMichal Berger
14818d0ffa5SMichal Berger		for cpu in "${!cpu_throttling[@]}"; do
14918d0ffa5SMichal Berger			((cpu_throttling[cpu] > 0)) || continue
15018d0ffa5SMichal Berger			printf '(%s) CPU%u: %s (count: %u, time: %us)\n' \
15118d0ffa5SMichal Berger				"$ts" "$cpu" \
15218d0ffa5SMichal Berger				"${throttler[cpu_throttling[cpu] > 0 ? 1 : 0]}" \
15318d0ffa5SMichal Berger				"${cpu_throttling[cpu]}" \
15418d0ffa5SMichal Berger				"${cpu_throttling_time[cpu]}"
15518d0ffa5SMichal Berger		done
15618d0ffa5SMichal Berger	done
15718d0ffa5SMichal Berger}
15818d0ffa5SMichal Berger
15918d0ffa5SMichal Bergerinit_modules() {
16018d0ffa5SMichal Berger	local -gA modules_supported=()
16118d0ffa5SMichal Berger	local -gA modules_out_refs=()
16218d0ffa5SMichal Berger	local -ga modules_to_run=()
16318d0ffa5SMichal Berger	local module
16418d0ffa5SMichal Berger
16518d0ffa5SMichal Berger	modules_supported["hwmon"]=report_hwmon
16618d0ffa5SMichal Berger	modules_supported["throttle"]=report_throttling
16718d0ffa5SMichal Berger
16818d0ffa5SMichal Berger	for module in "${@:-"${!modules_supported[@]}"}"; do
16918d0ffa5SMichal Berger		if [[ -z ${modules_supported["$module"]} ]]; then
17018d0ffa5SMichal Berger			printf 'Module (%s) not supported\n' "$module" >&2
17118d0ffa5SMichal Berger			return 1
17218d0ffa5SMichal Berger		fi
17318d0ffa5SMichal Berger		modules_to_run+=("${modules_supported["$module"]}")
17418d0ffa5SMichal Berger		modules_out_refs["${modules_supported["$module"]}"]="_${modules_supported["$module"]}"
17518d0ffa5SMichal Berger	done
17618d0ffa5SMichal Berger}
17718d0ffa5SMichal Berger
17818d0ffa5SMichal Bergercollect_readings() {
17918d0ffa5SMichal Berger	local _count=$count module data
18018d0ffa5SMichal Berger
18118d0ffa5SMichal Berger	map_cpus
18218d0ffa5SMichal Berger
18318d0ffa5SMichal Berger	while ((count <= 0 ? 1 : _count--)); do
18418d0ffa5SMichal Berger		for module in "${modules_to_run[@]}"; do
18518d0ffa5SMichal Berger			local -n ref=${modules_out_refs["$module"]}
186d5fe62b2SMichal Berger			data=$("$module")
187*cb645ed6SMichal Berger			[[ -n $data ]] && ref+=("$data") && echo "$data"
18818d0ffa5SMichal Berger		done
18918d0ffa5SMichal Berger		sleep "${interval}s"
19018d0ffa5SMichal Berger	done
19118d0ffa5SMichal Berger}
19218d0ffa5SMichal Berger
19318d0ffa5SMichal Bergerdater() {
19499bebaceSMichal Berger	date "+%R:%S %Z"
19518d0ffa5SMichal Berger}
19618d0ffa5SMichal Berger
19718d0ffa5SMichal Bergercleanup() {
19818d0ffa5SMichal Berger	local module
19918d0ffa5SMichal Berger
20018d0ffa5SMichal Berger	for module in "${!modules_out_refs[@]}"; do
20118d0ffa5SMichal Berger		local -n _ref=${modules_out_refs["$module"]}
20218d0ffa5SMichal Berger		((${#_ref[@]} > 0)) || continue
203d5fe62b2SMichal Berger		printf '%s\n' "${_ref[@]}" > "$PM_OUTPUTDIR/${prefix:+${prefix}_}$module.pm.txt"
20418d0ffa5SMichal Berger		printf 'Dumped %s module to %s\n' \
20518d0ffa5SMichal Berger			"$module" \
206d5fe62b2SMichal Berger			"$PM_OUTPUTDIR/${prefix:+${prefix}_}$module.pm.txt"
20718d0ffa5SMichal Berger	done
208d5fe62b2SMichal Berger
209d5fe62b2SMichal Berger	rm_pm_pid
21018d0ffa5SMichal Berger}
21118d0ffa5SMichal Berger
21218d0ffa5SMichal Bergerhelp() {
21318d0ffa5SMichal Berger	cat <<- HELP
21418d0ffa5SMichal Berger
21518d0ffa5SMichal Berger		Usage: $0 [-h] [-c count] [-d dir] [-l] [-p prefix] [-t interval] [module0 module1 ...]
21618d0ffa5SMichal Berger
21718d0ffa5SMichal Berger		-h - Print this message.
21818d0ffa5SMichal Berger		-c - Execute module count times. 0 is the default and it means to run
21918d0ffa5SMichal Berger		     indefinitely.
22018d0ffa5SMichal Berger		-d - Directory where the results should be saved. Default is /tmp.
22118d0ffa5SMichal Berger		-l - Save output of the script to a log file (dir/${0##*/}.pm.log).
22218d0ffa5SMichal Berger		-p - Add prefix to saved files.
22318d0ffa5SMichal Berger		-t - How long to wait before executing modules. Default is 1s.
22418d0ffa5SMichal Berger
22518d0ffa5SMichal Berger		module - Module to execute. Currently supported: 'hwmon', 'throttle'. All modules are
22618d0ffa5SMichal Berger		         executed by default.
22718d0ffa5SMichal Berger
22818d0ffa5SMichal Berger		When started, ${0##*/} will enter loop to continuously execute specified
22918d0ffa5SMichal Berger		modules. Each execution will be logged to stderr. Upon termination, all
23018d0ffa5SMichal Berger		output will be dumped to /tmp or directory set by -d.
23118d0ffa5SMichal Berger
23218d0ffa5SMichal Berger	HELP
23318d0ffa5SMichal Berger}
23418d0ffa5SMichal Berger
23518d0ffa5SMichal Bergercount=0
23618d0ffa5SMichal Bergerdebug=0
23718d0ffa5SMichal Bergerinterval=1
23818d0ffa5SMichal Bergerlog_to_file=no
23918d0ffa5SMichal Bergerprefix=""
24018d0ffa5SMichal Berger
24118d0ffa5SMichal Bergerwhile getopts c:d:hlp:t:v opt; do
24218d0ffa5SMichal Berger	case "$opt" in
24318d0ffa5SMichal Berger		c) count=$OPTARG ;;
244d5fe62b2SMichal Berger		d) PM_OUTPUTDIR=$OPTARG ;;
24518d0ffa5SMichal Berger		h)
24618d0ffa5SMichal Berger			help
24718d0ffa5SMichal Berger			exit 0
24818d0ffa5SMichal Berger			;;
24918d0ffa5SMichal Berger		l) log_to_file=yes ;;
25018d0ffa5SMichal Berger		p) prefix=$OPTARG ;;
25118d0ffa5SMichal Berger		t) interval=$OPTARG ;;
25218d0ffa5SMichal Berger		v) debug=1 ;;
25318d0ffa5SMichal Berger		*) ;;
25418d0ffa5SMichal Berger	esac
25518d0ffa5SMichal Bergerdone
25618d0ffa5SMichal Bergershift $((OPTIND - 1))
25718d0ffa5SMichal Berger
25818d0ffa5SMichal Bergerdeclare -r log_file=${prefix:+${prefix}_}${0##*/}.pm.log
25918d0ffa5SMichal Berger
260d5fe62b2SMichal Bergermkdir -p "$PM_OUTPUTDIR"
26118d0ffa5SMichal Bergerif [[ $log_to_file == yes ]]; then
262d5fe62b2SMichal Berger	printf 'Redirecting to %s\n' "$PM_OUTPUTDIR/$log_file" >&2
263d5fe62b2SMichal Berger	exec > "$PM_OUTPUTDIR/$log_file" 2>&1
26418d0ffa5SMichal Bergerfi
26518d0ffa5SMichal Berger
266d5fe62b2SMichal Bergersave_pm_pid
26718d0ffa5SMichal Bergertrap 'cleanup' EXIT
2688d588fbcSMichal Bergertrap 'retag' USR1
26918d0ffa5SMichal Berger
27018d0ffa5SMichal Bergerinit_modules "$@"
27118d0ffa5SMichal Berger
27218d0ffa5SMichal Bergercollect_readings
273