1# SPDX-License-Identifier: BSD-3-Clause 2# Copyright (C) 2020 Intel Corporation 3# All rights reserved. 4# 5 6declare -r sysfs_system=/sys/devices/system 7declare -r sysfs_cpu=$sysfs_system/cpu 8declare -r sysfs_node=$sysfs_system/node 9 10declare -r scheduler=$rootdir/test/event/scheduler/scheduler 11declare plugin=scheduler_plugin 12 13source "$rootdir/test/scheduler/cgroups.sh" 14 15fold_list_onto_array() { 16 local array=$1 17 local elem 18 19 shift || return 0 20 21 for elem; do 22 eval "${array}[elem]=$elem" 23 done 24} 25 26fold_array_onto_string() { 27 local cpus=("$@") 28 29 local IFS="," 30 echo "${cpus[*]}" 31} 32 33parse_cpu_list() { 34 local list=$1 35 local elem elems cpus 36 37 # 0-2,4,6-9, etc. 38 IFS="," read -ra elems < "$list" 39 40 ((${#elems[@]} > 0)) || return 0 41 42 for elem in "${elems[@]}"; do 43 if [[ $elem == *-* ]]; then 44 local start=${elem%-*} end=${elem#*-} 45 while ((start <= end)); do 46 cpus[start++]=$start 47 done 48 else 49 cpus[elem]=$elem 50 fi 51 done 52 printf '%u\n' "${!cpus[@]}" 53} 54 55map_cpus_node() { 56 local node_idx=$1 57 local -n _cpu_node_map=node_${node_idx}_cpu 58 local cpu_idx core_idx 59 60 for cpu_idx in $(parse_cpu_list "$sysfs_node/node$node_idx/cpulist"); do 61 if is_cpu_online_f "$cpu_idx"; then 62 core_idx=$(< "$sysfs_cpu/cpu$cpu_idx/topology/core_id") 63 local -n _cpu_core_map=node_${node_idx}_core_${core_idx} 64 _cpu_core_map+=("$cpu_idx") cpu_core_map[cpu_idx]=$core_idx 65 local -n _cpu_siblings=node_${node_idx}_core_${core_idx}_thread_${cpu_idx} 66 _cpu_siblings=($(parse_cpu_list "$sysfs_cpu/cpu$cpu_idx/topology/thread_siblings_list")) 67 cpu_siblings[cpu_idx]="node_${node_idx}_core_${core_idx}_thread_${cpu_idx}[@]" 68 fi 69 _cpu_node_map[cpu_idx]=$cpu_idx cpu_node_map[cpu_idx]=$node_idx 70 cpus+=("$cpu_idx") 71 done 72 73 nodes[node_idx]=$node_idx 74} 75 76map_cpus() { 77 local -g cpus=() 78 local -g cpu_siblings=() 79 local -g nodes=() 80 local -g cpu_node_map=() 81 local -g cpu_core_map=() 82 local -g core_node_map=() 83 local node 84 85 unset -v "${!node_@}" 86 87 for node in "$sysfs_node/node"+([0-9]); do 88 map_cpus_node "${node##*node}" 89 done 90} 91 92get_cpus() { 93 local node=$1 94 local core=$2 95 local _cpus 96 97 if [[ -z $node ]]; then 98 _cpus=("${cpus[@]}") 99 elif [[ -n $node ]]; then 100 eval "_cpus=(\${node_${node}_cpu[@]})" 101 if [[ -n $core ]]; then 102 eval "_cpus=(\${node_${node}_core_${core}[@]})" 103 fi 104 fi 105 ((${#_cpus[@]} > 0)) || return 1 106 printf '%u\n' "${_cpus[@]}" 107} 108 109get_isolated_cpus() { 110 [[ -e $sysfs_cpu/isolated ]] || return 0 111 parse_cpu_list "$sysfs_cpu/isolated" 112} 113 114get_offline_cpus() { 115 local offline 116 117 [[ -e $sysfs_cpu/offline ]] || return 0 118 parse_cpu_list "$sysfs_cpu/offline" 119} 120 121get_online_cpus() { 122 [[ -e $sysfs_cpu/online ]] || return 0 123 parse_cpu_list "$sysfs_cpu/online" 124} 125 126is_cpu_online() { 127 local online 128 129 fold_list_onto_array online $(get_online_cpus) 130 [[ -v online[$1] ]] 131} 132 133is_cpu_offline() { 134 ! is_cpu_online "$1" 135} 136 137is_cpu_online_f() { 138 local cpu=$1 139 140 if ((cpu == 0)); then 141 # cpu0 is special as it requires proper support in the kernel to be hot pluggable. 142 # As such, it usually does not have its own online attribute so always check the 143 # online list instead. 144 is_cpu_online "$cpu" 145 else 146 [[ -e $sysfs_cpu/cpu$cpu/online ]] || return 1 147 (($(< "$sysfs_cpu/cpu$cpu/online") == 1)) 148 fi 149} 150 151is_cpu_offline_f() { 152 ! is_cpu_online_f "$1" 153} 154 155is_numa() { 156 local nodes=("$sysfs_node/node"+([0-9])) 157 158 ((${#nodes[@]} > 1)) 159} 160 161online_cpu() { 162 is_cpu_offline_f "$1" || return 0 163 echo 1 > "$sysfs_cpu/cpu$1/online" 164} 165 166offline_cpu() { 167 is_cpu_online_f "$1" || return 0 168 echo 0 > "$sysfs_cpu/cpu$1/online" 169} 170 171mask_cpus() { 172 printf '[%s]\n' "$(fold_array_onto_string "$@")" 173} 174 175denied_list() { 176 local -g denied 177 178 fold_list_onto_array denied $(get_offline_cpus) "$@" 179} 180 181filter_allowed_list() { 182 local cpu 183 184 for cpu in "${!allowed[@]}"; do 185 if [[ -n ${denied[cpu]} ]] || ((cpu > 127)); then 186 unset -v "allowed[cpu]" 187 fi 188 done 189} 190 191allowed_list() { 192 local max=${1:-4} 193 local node=${2:-0} 194 local cpu_count=${cpu_count:--1} 195 196 local -g allowed 197 198 fold_list_onto_array allowed $(get_isolated_cpus) 199 200 if ((cpu_count < 0 && ${#allowed[@]} > 0)); then 201 ((max += ${#allowed[@]})) 202 fi 203 204 local -n node_cpu_ref=node_${node}_cpu 205 206 while ((${#allowed[@]} < max && ++cpu_count < ${#node_cpu_ref[@]})); do 207 fold_list_onto_array allowed $(get_cpus "$node" "${cpu_core_map[node_cpu_ref[cpu_count]]}") 208 done 209 210 filter_allowed_list 211 212 if ((${#allowed[@]} == max)); then 213 return 0 214 elif ((cpu_count == ${#node_cpu_ref[@]})); then 215 return 0 216 else 217 allowed_list "$max" "$node" 218 fi 219} 220 221get_proc_cpu_affinity() { 222 xtrace_disable 223 224 local pid=${1:-$$} 225 local status val status_file 226 227 if [[ -e $pid ]]; then 228 status_file=$pid 229 elif [[ -e /proc/$pid/status ]]; then 230 status_file=/proc/$pid/status 231 else 232 return 1 233 fi 234 235 # shellcheck disable=SC2188 236 while IFS=":"$'\t' read -r status val; do 237 if [[ $status == Cpus_allowed_list ]]; then 238 parse_cpu_list <(echo "$val") 239 return 0 240 fi 241 done < <(< "$status_file") 242 243 xtrace_restore 244} 245 246map_cpufreq() { 247 # This info is used to cross-reference current cpufreq setup with 248 # what DPDK's governor actually puts in place. 249 250 local -g cpufreq_drivers=() 251 local -g cpufreq_governors=() 252 local -g cpufreq_base_freqs=() 253 local -g cpufreq_max_freqs=() 254 local -g cpufreq_min_freqs=() 255 local -g cpufreq_cur_freqs=() 256 local -g cpufreq_is_turbo=() 257 local -g cpufreq_available_freqs=() 258 local -g cpufreq_available_governors=() 259 local -g cpufreq_high_prio=() 260 local -g cpufreq_non_turbo_ratio=() 261 local -g cpufreq_setspeed=() 262 local -g cpuinfo_max_freqs=() 263 local -g cpuinfo_min_freqs=() 264 local -g turbo_enabled=0 265 local cpu cpu_idx 266 267 for cpu in "$sysfs_cpu/cpu"+([0-9]); do 268 cpu_idx=${cpu##*cpu} 269 [[ -e $cpu/cpufreq ]] || continue 270 cpufreq_drivers[cpu_idx]=$(< "$cpu/cpufreq/scaling_driver") 271 cpufreq_governors[cpu_idx]=$(< "$cpu/cpufreq/scaling_governor") 272 273 # In case HWP is on 274 if [[ -e $cpu/cpufreq/base_frequency ]]; then 275 cpufreq_base_freqs[cpu_idx]=$(< "$cpu/cpufreq/base_frequency") 276 fi 277 278 cpufreq_cur_freqs[cpu_idx]=$(< "$cpu/cpufreq/scaling_cur_freq") 279 cpufreq_max_freqs[cpu_idx]=$(< "$cpu/cpufreq/scaling_max_freq") 280 cpufreq_min_freqs[cpu_idx]=$(< "$cpu/cpufreq/scaling_min_freq") 281 282 local -n available_governors=available_governors_cpu_${cpu_idx} 283 cpufreq_available_governors[cpu_idx]="available_governors_cpu_${cpu_idx}[@]" 284 available_governors=($(< "$cpu/cpufreq/scaling_available_governors")) 285 286 local -n available_freqs=available_freqs_cpu_${cpu_idx} 287 cpufreq_available_freqs[cpu_idx]="available_freqs_cpu_${cpu_idx}[@]" 288 289 case "${cpufreq_drivers[cpu_idx]}" in 290 acpi-cpufreq) 291 available_freqs=($(< "$cpu/cpufreq/scaling_available_frequencies")) 292 if ((available_freqs[0] - 1000 == available_freqs[1])); then 293 cpufreq_is_turbo[cpu_idx]=1 294 else 295 cpufreq_is_turbo[cpu_idx]=0 296 fi 297 cpufreq_setspeed[cpu_idx]=$(< "$cpu/cpufreq/scaling_setspeed") 298 ;; 299 intel_pstate | intel_cpufreq) # active or passive 300 local non_turbo_ratio base_max_freq num_freq freq is_turbo=0 301 302 non_turbo_ratio=$("$testdir/rdmsr.pl" "$cpu_idx" 0xce) 303 cpuinfo_min_freqs[cpu_idx]=$(< "$cpu/cpufreq/cpuinfo_min_freq") 304 cpuinfo_max_freqs[cpu_idx]=$(< "$cpu/cpufreq/cpuinfo_max_freq") 305 cpufreq_non_turbo_ratio[cpu_idx]=$(((non_turbo_ratio >> 8) & 0xff)) 306 if ((cpufreq_base_freqs[cpu_idx] / 100000 > cpufreq_non_turbo_ratio[cpu_idx])); then 307 cpufreq_high_prio[cpu_idx]=1 308 base_max_freq=${cpufreq_base_freqs[cpu_idx]} 309 else 310 cpufreq_high_prio[cpu_idx]=0 311 base_max_freq=$((cpufreq_non_turbo_ratio[cpu_idx] * 100000)) 312 fi 313 num_freqs=$(((base_max_freq - cpuinfo_min_freqs[cpu_idx]) / 100000 + 1)) 314 if ((base_max_freq < cpuinfo_max_freqs[cpu_idx])); then 315 ((num_freqs += 1)) 316 cpufreq_is_turbo[cpu_idx]=1 317 else 318 cpufreq_is_turbo[cpu_idx]=0 319 fi 320 available_freqs=() 321 for ((freq = 0; freq < num_freqs; freq++)); do 322 if ((freq == 0 && cpufreq_is_turbo[cpu_idx] == 1)); then 323 available_freqs[freq]=$((base_max_freq + 1)) 324 else 325 available_freqs[freq]=$((base_max_freq - (freq - cpufreq_is_turbo[cpu_idx]) * 100000)) 326 fi 327 done 328 ;; 329 cppc_cpufreq) 330 cpufreq_setspeed[cpu_idx]=$(< "$cpu/cpufreq/scaling_setspeed") 331 scaling_min_freqs[cpu_idx]=$(< "$cpu/cpufreq/scaling_min_freq") 332 scaling_max_freqs[cpu_idx]=$(< "$cpu/cpufreq/scaling_max_freq") 333 cpuinfo_max_freqs[cpu_idx]=$(< "$cpu/cpufreq/cpuinfo_max_freq") 334 nominal_perf[cpu_idx]=$(< "$cpu/acpi_cppc/nominal_perf") 335 highest_perf[cpu_idx]=$(< "$cpu/acpi_cppc/highest_perf") 336 337 #the unit of highest_perf and nominal_perf differs on different arm platforms. 338 #For highest_perf, it maybe 300 or 3000000, both means 3.0GHz. 339 if ((highest_perf[cpu_idx] > nominal_perf[cpu_idx] && (\ 340 highest_perf[cpu_idx] == cpuinfo_max_freqs[cpu_idx] || \ 341 highest_perf[cpu_idx] * 10000 == cpuinfo_max_freqs[cpu_idx]))); then 342 cpufreq_is_turbo[cpu_idx]=1 343 else 344 cpufreq_is_turbo[cpu_idx]=0 345 fi 346 347 if ((nominal_perf[cpu_idx] < 10000)); then 348 nominal_perf[cpu_idx]=$((nominal_perf[cpu_idx] * 10000)) 349 fi 350 351 num_freqs=$(((nominal_perf[cpu_idx] - scaling_min_freqs[cpu_idx]) / 100000 + 1 + \ 352 cpufreq_is_turbo[cpu_idx])) 353 354 available_freqs=() 355 for ((freq = 0; freq < num_freqs; freq++)); do 356 if ((freq == 0 && cpufreq_is_turbo[cpu_idx] == 1)); then 357 available_freqs[freq]=$((scaling_max_freqs[cpu_idx])) 358 else 359 available_freqs[freq]=$((nominal_perf[cpu_idx] - (\ 360 freq - cpufreq_is_turbo[cpu_idx]) * 100000)) 361 fi 362 done 363 ;; 364 esac 365 done 366 if [[ -e $sysfs_cpu/cpufreq/boost ]]; then 367 turbo_enabled=$(< "$sysfs_cpu/cpufreq/boost") 368 elif [[ -e $sysfs_cpu/intel_pstate/no_turbo ]]; then 369 turbo_enabled=$((!$(< "$sysfs_cpu/intel_pstate/no_turbo"))) 370 fi 371} 372 373set_cpufreq() { 374 local cpu=$1 375 local min_freq=$2 376 local max_freq=$3 377 local cpufreq=$sysfs_cpu/cpu$cpu/cpufreq 378 379 # Map the cpufreq info first 380 [[ -n ${cpufreq_drivers[cpu]} ]] || return 1 381 [[ -n $min_freq ]] || return 1 382 383 case "${cpufreq_drivers[cpu]}" in 384 acpi-cpufreq | cppc_cpufreq) 385 if [[ $(< "$cpufreq/scaling_governor") != userspace ]]; then 386 echo "userspace" > "$cpufreq/scaling_governor" 387 fi 388 echo "$min_freq" > "$cpufreq/scaling_setspeed" 389 ;; 390 intel_pstate | intel_cpufreq) 391 if [[ -n $max_freq ]] && ((max_freq >= min_freq)); then 392 echo "$max_freq" > "$cpufreq/scaling_max_freq" 393 fi 394 if ((min_freq <= cpufreq_max_freqs[cpu])); then 395 echo "$min_freq" > "$cpufreq/scaling_min_freq" 396 fi 397 ;; 398 esac 399} 400 401set_cpufreq_governor() { 402 local cpu=$1 403 local governor=$2 404 local cpufreq=$sysfs_cpu/cpu$cpu/cpufreq 405 406 if [[ $(< "$cpufreq/scaling_governor") != "$governor" ]]; then 407 echo "$governor" > "$cpufreq/scaling_governor" 408 fi 409} 410 411exec_under_dynamic_scheduler() { 412 if [[ -e /proc/$spdk_pid/status ]]; then 413 killprocess "$spdk_pid" 414 fi 415 "$@" --wait-for-rpc & 416 spdk_pid=$! 417 # Give some time for the app to init itself 418 waitforlisten "$spdk_pid" 419 "$rootdir/scripts/rpc.py" framework_set_scheduler dynamic 420 "$rootdir/scripts/rpc.py" framework_start_init 421} 422 423exec_under_static_scheduler() { 424 if [[ -e /proc/$spdk_pid/status ]]; then 425 killprocess "$spdk_pid" 426 fi 427 "$@" --wait-for-rpc & 428 spdk_pid=$! 429 # Give some time for the app to init itself 430 waitforlisten "$spdk_pid" 431} 432 433# Gather busy/idle stats since this function was last called 434get_thread_stats_current() { 435 xtrace_disable 436 437 local total_busy total_idle 438 439 _get_thread_stats total_busy total_idle 440 441 for thread in "${!thread_map[@]}"; do 442 : $((busy[thread] = total_busy[thread] - past_busy[thread], past_busy[thread] = total_busy[thread])) 443 : $((idle[thread] = total_idle[thread] - past_idle[thread], past_idle[thread] = total_idle[thread])) 444 done 445 xtrace_restore 446} 447 448# Gather busy/idle stats since application start 449get_thread_stats() { 450 xtrace_disable 451 _get_thread_stats busy idle 452 xtrace_restore 453} 454 455_get_thread_stats() { 456 local list_busy=$1 457 local list_idle=$2 458 local thread threads stats 459 460 stats=$(rpc_cmd thread_get_stats | jq -r '.threads[]') 461 threads=($(jq -r '.id' <<< "$stats")) 462 463 for thread in "${threads[@]}"; do 464 eval "${list_busy}[$thread]=\$(jq -r \"select(.id == $thread) | .busy\" <<< \$stats)" 465 eval "${list_idle}[$thread]=\$(jq -r \"select(.id == $thread) | .idle\" <<< \$stats)" 466 thread_map[thread]=$(jq -r "select(.id == $thread) | .name" <<< "$stats") 467 done 468} 469 470get_cpu_stat() { 471 local cpu_idx=$1 472 local stat=$2 stats astats 473 474 # cpu0 0 0 0 0 0 0 0 0 0 -> _cpu0=(0 0 0 0 0 0 0 0 0) 475 source <(grep '^cpu[0-9]' /proc/stat | sed 's/\([^ ]*\) \(.*\)/_\1=(\2)/') 476 477 # If we were called with valid cpu id return requested time 478 [[ -v _cpu$cpu_idx ]] || return 0 479 local -n cpu_stat=_cpu$cpu_idx 480 481 case "$stat" in 482 idle) echo "${cpu_stat[3]}" ;; 483 *) printf '%u\n' "${cpu_stat[@]}" ;; 484 esac 485} 486 487create_thread() { 488 rpc_cmd --plugin "$plugin" scheduler_thread_create "$@" 489} 490 491destroy_thread() { 492 rpc_cmd --plugin "$plugin" scheduler_thread_delete "$@" 493} 494 495active_thread() { 496 rpc_cmd --plugin "$plugin" scheduler_thread_set_active "$@" 497} 498 499get_cpu_time() { 500 xtrace_disable 501 502 local interval=$1 cpu_time=${2:-idle} print=${3:-0} wait=${4:-1} interval_count 503 shift 4 504 local cpus=("$@") cpu 505 local stats stat old_stats avg_load 506 local total_sample 507 local keep_going=0 508 509 # Exposed for the caller 510 local -g cpu_times=() 511 local -g avg_cpu_time=() 512 513 # cpu_time: 514 # 0 - user (time spent in user mode) 515 # 1 - nice (Time spent in user mode with low priority) 516 # 2 - system (Time spent in system mode) 517 # 3 - idle (Time spent in the idle task) 518 # 4 - iowait (Time waiting for I/O to complete) 519 # 5 - irq (Time servicing interrupts) 520 # 6 - softirq (Time servicing softirqs) 521 # 7 - steal (Stolen time) 522 # 8 - guest (Time spent running a virtual CPU) 523 # 9 - guest_nice (Time spent running a niced guest) 524 525 local -gA cpu_time_map 526 cpu_time_map["user"]=0 527 cpu_time_map["nice"]=1 528 cpu_time_map["system"]=2 529 cpu_time_map["idle"]=3 530 cpu_time_map["iowait"]=4 531 cpu_time_map["irq"]=5 532 cpu_time_map["softirq"]=6 533 cpu_time_map["steal"]=7 534 cpu_time_map["guest"]=8 535 cpu_time_map["guest_nice"]=9 536 537 # Clear up the env 538 unset -v ${!stat_@} 539 unset -v ${!old_stat_@} 540 unset -v ${!avg_stat@} 541 unset -v ${!avg_load@} 542 unset -v ${!raw_samples@} 543 544 cpu_time=${cpu_time_map["$cpu_time"]} 545 interval_count=0 546 if ((interval <= 0)); then 547 keep_going=1 548 else 549 # We skip first sample to have min 2 for stat comparison 550 interval=$((interval + 1)) 551 fi 552 while ((interval_count++, keep_going ? 1 : --interval >= 0)); do 553 ((interval_count > 1 && print == 1)) && print_cpu_time_header 554 get_cpu_stat all 555 for cpu in "${cpus[@]}"; do 556 local -n old_stats=old_stats_$cpu 557 local -n avg_load=avg_load_$cpu 558 local -n raw_samples=raw_samples_$cpu 559 local -n stats=_cpu$cpu 560 sample_stats=() total_sample=0 561 562 if ((interval_count == 1)); then 563 # Skip first sample 564 old_stats=("${stats[@]}") 565 continue 566 fi 567 for stat in "${!stats[@]}"; do 568 avg_load[stat]="stat_${stat}_${cpu}[@]" 569 sample_stats[stat]=$((stats[stat] - old_stats[stat])) 570 : $((total_sample += sample_stats[stat])) 571 done 572 for stat in "${!stats[@]}"; do 573 local -n avg_stat=stat_${stat}_${cpu} 574 local -n raw_samples_ref=raw_samples_${stat}_${cpu} 575 raw_samples[stat]="raw_samples_${stat}_${cpu}[@]" 576 raw_samples_ref+=("${stats[stat]}") 577 avg_stat+=($((sample_stats[stat] * 100 / (total_sample == 0 ? 1 : total_sample)))) 578 done 579 old_stats=("${stats[@]}") 580 ((print == 1)) && print_cpu_time "$cpu" 581 done 582 sleep "${wait}s" 583 done 584 585 # We collected % for each time. Now determine the avg % for requested time. 586 local load stat_load 587 for cpu in "${cpus[@]}"; do 588 load=0 589 local -n avg_load_cpu=avg_load_$cpu 590 stat_load=("${!avg_load_cpu[cpu_time]}") 591 for stat in "${stat_load[@]}"; do 592 : $((load += stat)) 593 done 594 cpu_times[cpu]=${stat_load[*]} 595 avg_cpu_time[cpu]=$((load / ${#stat_load[@]})) 596 done 597 598 xtrace_restore 599} 600 601print_cpu_time_header() { 602 local ts 603 ts=$(date "+%R:%S %Z") 604 605 printf '(%s) %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s (test:%s)\n' \ 606 "$ts" \ 607 "CPU" "%usr" "%nice" "%sys" "%iowait" "%irq" "%soft" "%steal" \ 608 "%guest" "%gnice" "%idle" "${TEST_TAG:-N/A}" 609} 610 611print_cpu_time() { 612 local cpu=$1 613 614 local -n _cpu_ref=avg_load_$cpu 615 ((${#_cpu_ref[@]} > 0)) || return 0 616 617 usr=("${!_cpu_ref[0]}") 618 nice=("${!_cpu_ref[1]}") 619 system=("${!_cpu_ref[2]}") 620 idle=("${!_cpu_ref[3]}") 621 iowait=("${!_cpu_ref[4]}") 622 irq=("${!_cpu_ref[5]}") 623 soft=("${!_cpu_ref[6]}") 624 steal=("${!_cpu_ref[7]}") 625 guest=("${!_cpu_ref[8]}") 626 gnice=("${!_cpu_ref[9]}") 627 628 printf '%23u %8u %8u %8u %8u %8u %8u %8u %8u %8u %8u\n' \ 629 "$cpu" \ 630 "${usr[-1]}" \ 631 "${nice[-1]}" \ 632 "${system[-1]}" \ 633 "${iowait[-1]}" \ 634 "${irq[-1]}" \ 635 "${soft[-1]}" \ 636 "${steal[-1]}" \ 637 "${guest[-1]}" \ 638 "${gnice[-1]}" \ 639 "${idle[-1]}" 640} 641 642collect_cpu_idle() { 643 ((${#cpus_to_collect[@]} > 0)) || return 1 644 645 local time=${1:-5} 646 local cpu 647 local samples 648 local -g is_idle=() 649 650 printf 'Collecting cpu idle stats (cpus: %s) for %u seconds...\n' \ 651 "${cpus_to_collect[*]}" "$time" 652 653 get_cpu_time "$time" idle 0 1 "${cpus_to_collect[@]}" 654 655 local user_load load_median user_spdk_load 656 for cpu in "${cpus_to_collect[@]}"; do 657 samples=(${cpu_times[cpu]}) 658 load_median=$(calc_median "${samples[@]}") 659 printf '* cpu%u idle samples: %s (avg: %u%%, median: %u%%)\n' \ 660 "$cpu" "${samples[*]}" "${avg_cpu_time[cpu]}" "$load_median" 661 # Cores with polling reactors have 0% idle time, 662 # while the ones in interrupt mode won't have 100% idle. 663 # During the tests, polling reactors spend the major portion 664 # of their cpu time in user mode. With that in mind, if the 665 # general check for cpus's idleness fails, check what portion 666 # of the cpu load falls into user mode. For the idle check 667 # use the last sample. For the cpu load, compare user's raw 668 # samples in SC_CLK_TCK context for a more detailed view. 669 user_load=$(cpu_usage_clk_tck "$cpu" user) 670 if ((samples[-1] >= 70)); then 671 printf '* cpu%u is idle\n' "$cpu" 672 is_idle[cpu]=1 673 elif ((user_load <= 15)); then 674 printf '* cpu%u not fully idle, but user load is low so passing\n' "$cpu" 675 is_idle[cpu]=1 676 else 677 printf '* cpu%u is not idle\n' "$cpu" 678 is_idle[cpu]=0 679 # HACK: Since we verify this in context of business of particular SPDK threads, make 680 # the last check against their {u,s}time to determine if we are really busy or not. This 681 # is meant to null and void potential jitter on the cpu. 682 # See https://github.com/spdk/spdk/issues/3362. 683 user_spdk_load=$(get_spdk_proc_time "$time" "$cpu") 684 if ((user_spdk_load <= 15)); then 685 printf '* SPDK thread pinned to cpu%u seems to be idle regardless (%u%%)\n' \ 686 "$cpu" \ 687 "$user_spdk_load" 688 is_idle[cpu]=1 689 fi 690 fi 691 done 692} 693 694cpu_usage_clk_tck() { 695 local cpu=$1 time=${2:-all} 696 local user nice system usage clk_delta 697 698 # We should be called in get_cpu_time()'s environment. 699 [[ -v raw_samples_$cpu ]] || return 1 700 701 local -n raw_samples=raw_samples_$cpu 702 user=("${!raw_samples[cpu_time_map["user"]]}") 703 nice=("${!raw_samples[cpu_time_map["nice"]]}") 704 system=("${!raw_samples[cpu_time_map["system"]]}") 705 706 # Construct delta based on last two samples of a given time. 707 case "$time" in 708 user | all) : $((clk_delta += (user[-1] - user[-2]))) ;;& 709 nice | all) : $((clk_delta += (nice[-1] - nice[-2]))) ;;& 710 system | all) : $((clk_delta += (system[-1] - system[-2]))) ;; 711 *) ;; 712 esac 713 # We assume 1s between each sample. See get_cpu_time(). 714 usage=$((100 * clk_delta / $(getconf CLK_TCK))) 715 usage=$((usage > 100 ? 100 : usage)) 716 717 printf '%u' "$usage" 718 printf '* cpu%u %s usage: %u\n' "$cpu" "$time" "$usage" >&2 719 printf '* cpu%u user samples: %s\n' "$cpu" "${user[*]}" >&2 720 printf '* cpu%u nice samples: %s\n' "$cpu" "${nice[*]}" >&2 721 printf '* cpu%u system samples: %s\n' "$cpu" "${system[*]}" >&2 722} 723 724update_thread_cpus_map() { 725 local cpu 726 local -g thread_cpus=() 727 local reactor_framework 728 729 ((${#cpus[@]} > 0)) || return 1 730 731 get_thread_stats 732 733 reactor_framework=$(rpc_cmd framework_get_reactors | jq -r '.reactors[]') 734 for cpu in "${cpus[@]}"; do 735 for thread in $(jq -r "select(.lcore == $cpu) | .lw_threads[].id" <<< "$reactor_framework"); do 736 printf '* Thread %u (%s) on cpu%u\n' "$thread" "${thread_map[thread]}" "$cpu" 737 thread_cpus[thread]=$cpu 738 done 739 done 740 ((${#thread_cpus[@]} > 0)) 741} 742 743calc_median() { 744 local samples=("$@") samples_sorted 745 local middle median sample 746 747 samples_sorted=($(printf '%s\n' "${samples[@]}" | sort -n)) 748 749 middle=$((${#samples_sorted[@]} / 2)) 750 if ((${#samples_sorted[@]} % 2 == 0)); then 751 median=$(((samples_sorted[middle - 1] + samples_sorted[middle]) / 2)) 752 else 753 median=${samples_sorted[middle]} 754 fi 755 756 echo "$median" 757 758} 759 760get_spdk_proc_time() { 761 # Similar to cpu_usage_clk_tck() but the values we are working here, per process, are already 762 # divided by SC_CLK_TCK. See proc(5). 763 764 xtrace_disable 765 766 local interval=$1 cpu=$2 767 local thread thread_to_time stats 768 local _time time _stime stime _utime utime 769 local thread_cpu_list 770 771 [[ -e /proc/$spdk_pid/status ]] || return 1 772 773 # Find SPDK thread pinned to given cpu 774 for thread in "/proc/$spdk_pid/task/"*; do 775 thread_cpu_list=($(get_proc_cpu_affinity "$thread/status")) 776 # we aim at reactor threads and these should be bound to a single cpu 777 ((${#thread_cpu_list[@]} > 1)) && continue 778 ((thread_cpu_list[0] == cpu)) && thread_to_time=$thread && break 779 done 780 781 [[ -e $thread_to_time/stat ]] || return 1 782 interval=$((interval <= 1 ? 2 : interval)) 783 784 while ((--interval >= 0)); do 785 # See cgroups.sh -> id_proc() 786 stats=$(< "$thread_to_time/stat") stats=(${stats/*) /}) 787 _utime[interval]=${stats[11]} # Amount of time spent in user mode 788 _stime[interval]=${stats[12]} # Amount of time spent in kernel mode 789 _time[interval]=$((_utime[interval] + _stime[interval])) 790 ((${#_time[@]} == 1)) && continue 791 utime+=($((_utime[interval] - _utime[interval + 1]))) 792 stime+=($((_stime[interval] - _stime[interval + 1]))) 793 time+=($((_time[interval] - _time[interval + 1]))) 794 sleep 1 795 done 796 797 echo "stime samples: ${stime[*]}" >&2 798 echo "utime samples: ${utime[*]}" >&2 799 800 calc_median "${time[@]}" 801 802 xtrace_restore 803} 804