1#!/usr/bin/env bash 2# SPDX-License-Identifier: BSD-3-Clause 3# Copyright (C) 2023 Intel Corporation 4# All rights reserved. 5 6# Do similar thing to what turbostat.c does but in a more compact 7# scope. We just report temp as per coretemp's hwmon entries for 8# each core|package and check if there is any indication that 9# throttling took place (per cpu thread). 10set -e 11shopt -s extglob nullglob 12 13pmdir=$(readlink -f "$(dirname "$0")") 14rootdir=$(readlink -f "$pmdir/../../../") 15source "$rootdir/test/scheduler/common.sh" 16source "$pmdir/common" 17 18to_celsius() { echo $(($1 / 1000)); } 19 20report_hwmon() { 21 local hwmon hw_name 22 23 for hwmon in /sys/class/hwmon/hwmon*; do 24 [[ -e $hwmon/name ]] || continue 25 hw_name=$(< "$hwmon/name") 26 case "$hw_name" in 27 coretemp) report_hwmon_coretemp "$hwmon" ;; 28 *) report_hwmon_generic "$hwmon" 2> /dev/null ;; 29 esac || continue 30 done 31} 32 33report_hwmon_generic() { 34 local hwmon=$1 temp ts 35 local label dev 36 37 ts=$(dater) 38 39 # We report just the bare minimum as each device may come with 40 # different set of labels, inputs, etc. 41 [[ -e $hwmon/temp1_input ]] || return 0 42 43 # These entries, especially the ones dedicated for nvme, may disappear during 44 # tests, so try to handle them gracefully. 45 46 temp=$(< "$hwmon/temp1_input") && label=$(< "$hwmon/name") || return 0 47 48 if [[ -e $hwmon/temp1_label ]]; then 49 label+=":$(< "$hwmon/temp1_label")" 50 fi 51 52 if [[ -e $hwmon/device ]] && dev=$(readlink -f "$hwmon/device"); then 53 label+=":${dev##*/}" 54 fi 55 56 printf '(%s) --- %s (%u C) (test:%s)\n' \ 57 "$ts" \ 58 "$label" \ 59 "$(to_celsius "$temp")" \ 60 "$TEST_TAG" 61 printf '%s\n' "---" 62} 63 64report_hwmon_coretemp() { 65 local hwmon=$1 temp ts 66 local core crit input id label max node package threads 67 local cores_input=() cores_crit=() cores_max=() 68 69 ts=$(dater) 70 71 for label in "$hwmon/"temp+([0-9])_label; do 72 temp=${label%_label} 73 label=$(< "$label") 74 input=$(< "${temp}_input") 75 crit=$(< "${temp}_crit") 76 max=$(< "${temp}_max") 77 id=${label##* } 78 case "$label" in 79 Core*) 80 cores_input[id]=$input 81 cores_crit[id]=$crit 82 cores_max[id]=$max 83 ;; 84 Package*) node=$id package[node]=$input ;; 85 esac 86 done 87 # No package_id? Something is amiss 88 # FIXME: This is a cheeky assumption that each physical package (socket) N maps to 89 # a corresponding numa node N. For physical systems this may be usually the case 90 # but for quirky vNUMA setups not necessarily. For similar systems (don't have 91 # any at hand and it's a bit tricky to test thermal stuff under VMs) this probably 92 # would need to drop the lookup of the "Package" label and just check each thread's 93 # physical_package_id and/or numa assignment (cpu_node_map[@]). 94 [[ -n $node ]] || return 1 95 96 printf '(%s) --- Node%u (%u C) (test:%s)\n' \ 97 "$ts" \ 98 "$node" \ 99 "$(to_celsius "${package[node]}")" \ 100 "$TEST_TAG" 101 102 for core in "${!cores_input[@]}"; do 103 threads=($(get_cpus "$node" "$core")) 104 printf ' (%s) Core%u (%s): %u C (crit: %u C, max: %u C)\n' \ 105 "$ts" \ 106 "$core" \ 107 "${threads[*]}" \ 108 "$(to_celsius "${cores_input[core]}")" \ 109 "$(to_celsius "${cores_crit[core]}")" \ 110 "$(to_celsius "${cores_max[core]}")" 111 done 112 printf '%s\n' "---" 113} 114 115report_throttling() { 116 # Quick check to see if MSRs report proper support - if this entry does not exist, 117 # then there's no point in looking up entire topology, support is simply not 118 # there. 119 [[ -e $sysfs_cpu/cpu0/thermal_throttle/core_throttle_count ]] || return 1 120 121 local cpu cpu_throttling=() cpu_throttling_time=() throttler=() 122 local throttle_count throttle_time 123 local node node_cpus=() node_throttled 124 local ts 125 126 throttler[0]="Normal Operation" 127 throttler[1]="Throttled" 128 129 ts=$(dater) 130 # Order the output similarly to hwmon, starting with a node|package 131 for node in "${nodes[@]}"; do 132 node_cpus=($(get_cpus "$node")) node_throttled=0 cpu_throttling=() 133 for cpu in "${node_cpus[@]}"; do 134 throttle_count=$(< "$sysfs_cpu/cpu$cpu/thermal_throttle/core_throttle_count") 135 throttle_time=$(< "$sysfs_cpu/cpu$cpu/thermal_throttle/core_throttle_total_time_ms") 136 cpu_throttling[cpu]=$throttle_count 137 cpu_throttling_time[cpu]=$((throttle_time / 1000)) 138 if ((throttle_count > 0 || throttle_time > 0)); then 139 node_throttled=1 140 fi 141 done 142 143 ((node_throttled == 1 || debug == 1)) || continue 144 145 printf '(%s) ###### Throttling Node%u Status: %s ######\n' \ 146 "$ts" "$node" "${throttler[node_throttled]}" 147 148 for cpu in "${!cpu_throttling[@]}"; do 149 ((cpu_throttling[cpu] > 0)) || continue 150 printf '(%s) CPU%u: %s (count: %u, time: %us)\n' \ 151 "$ts" "$cpu" \ 152 "${throttler[cpu_throttling[cpu] > 0 ? 1 : 0]}" \ 153 "${cpu_throttling[cpu]}" \ 154 "${cpu_throttling_time[cpu]}" 155 done 156 done 157} 158 159init_modules() { 160 local -gA modules_supported=() 161 local -gA modules_out_refs=() 162 local -ga modules_to_run=() 163 local module 164 165 modules_supported["hwmon"]=report_hwmon 166 modules_supported["throttle"]=report_throttling 167 168 for module in "${@:-"${!modules_supported[@]}"}"; do 169 if [[ -z ${modules_supported["$module"]} ]]; then 170 printf 'Module (%s) not supported\n' "$module" >&2 171 return 1 172 fi 173 modules_to_run+=("${modules_supported["$module"]}") 174 modules_out_refs["${modules_supported["$module"]}"]="_${modules_supported["$module"]}" 175 done 176} 177 178collect_readings() { 179 local _count=$count module data 180 181 map_cpus 182 183 while ((count <= 0 ? 1 : _count--)); do 184 for module in "${modules_to_run[@]}"; do 185 local -n ref=${modules_out_refs["$module"]} 186 data=$("$module") 187 [[ -n $data ]] && ref+=("$data") && echo "$data" 188 done 189 sleep "${interval}s" 190 done 191} 192 193dater() { 194 date "+%R:%S %Z" 195} 196 197cleanup() { 198 local module 199 200 for module in "${!modules_out_refs[@]}"; do 201 local -n _ref=${modules_out_refs["$module"]} 202 ((${#_ref[@]} > 0)) || continue 203 printf '%s\n' "${_ref[@]}" > "$PM_OUTPUTDIR/${prefix:+${prefix}_}$module.pm.txt" 204 printf 'Dumped %s module to %s\n' \ 205 "$module" \ 206 "$PM_OUTPUTDIR/${prefix:+${prefix}_}$module.pm.txt" 207 done 208 209 rm_pm_pid 210} 211 212help() { 213 cat <<- HELP 214 215 Usage: $0 [-h] [-c count] [-d dir] [-l] [-p prefix] [-t interval] [module0 module1 ...] 216 217 -h - Print this message. 218 -c - Execute module count times. 0 is the default and it means to run 219 indefinitely. 220 -d - Directory where the results should be saved. Default is /tmp. 221 -l - Save output of the script to a log file (dir/${0##*/}.pm.log). 222 -p - Add prefix to saved files. 223 -t - How long to wait before executing modules. Default is 1s. 224 225 module - Module to execute. Currently supported: 'hwmon', 'throttle'. All modules are 226 executed by default. 227 228 When started, ${0##*/} will enter loop to continuously execute specified 229 modules. Each execution will be logged to stderr. Upon termination, all 230 output will be dumped to /tmp or directory set by -d. 231 232 HELP 233} 234 235count=0 236debug=0 237interval=1 238log_to_file=no 239prefix="" 240 241while getopts c:d:hlp:t:v opt; do 242 case "$opt" in 243 c) count=$OPTARG ;; 244 d) PM_OUTPUTDIR=$OPTARG ;; 245 h) 246 help 247 exit 0 248 ;; 249 l) log_to_file=yes ;; 250 p) prefix=$OPTARG ;; 251 t) interval=$OPTARG ;; 252 v) debug=1 ;; 253 *) ;; 254 esac 255done 256shift $((OPTIND - 1)) 257 258declare -r log_file=${prefix:+${prefix}_}${0##*/}.pm.log 259 260mkdir -p "$PM_OUTPUTDIR" 261if [[ $log_to_file == yes ]]; then 262 printf 'Redirecting to %s\n' "$PM_OUTPUTDIR/$log_file" >&2 263 exec > "$PM_OUTPUTDIR/$log_file" 2>&1 264fi 265 266save_pm_pid 267trap 'cleanup' EXIT 268trap 'retag' USR1 269 270init_modules "$@" 271 272collect_readings 273