1#!/usr/bin/env bash 2 3testdir=$(readlink -f $(dirname $0)) 4rootdir=$(readlink -f $testdir/../../..) 5source $rootdir/test/common/autotest_common.sh 6source $rootdir/test/vhost/common.sh 7 8vhost_num="0" 9vm_memory=2048 10vm_sar_enable=false 11host_sar_enable=false 12sar_delay="0" 13sar_interval="1" 14sar_count="10" 15vm_throttle="" 16bpf_traces=() 17ctrl_type="spdk_vhost_scsi" 18use_split=false 19kernel_cpus="" 20run_precondition=false 21lvol_stores=() 22lvol_bdevs=() 23split_bdevs=() 24used_vms="" 25wwpn_prefix="naa.5001405bc6498" 26packed_ring=false 27 28fio_iterations=1 29fio_gtod="" 30precond_fio_bin=$CONFIG_FIO_SOURCE_DIR/fio 31disk_map="" 32 33disk_cfg_bdfs=() 34disk_cfg_spdk_names=() 35disk_cfg_splits=() 36disk_cfg_vms=() 37disk_cfg_kernel_names=() 38 39function usage() { 40 [[ -n $2 ]] && ( 41 echo "$2" 42 echo "" 43 ) 44 echo "Shortcut script for doing automated test" 45 echo "Usage: $(basename $1) [OPTIONS]" 46 echo 47 echo "-h, --help Print help and exit" 48 echo " --fio-bin=PATH Path to FIO binary on host.;" 49 echo " Binary will be copied to VM, static compilation" 50 echo " of binary is recommended." 51 echo " --fio-jobs=PATH Comma separated list of fio config files to use for test." 52 echo " --fio-iterations=INT Number of times to run specified workload." 53 echo " --fio-gtod-reduce Enable fio gtod_reduce option in test." 54 echo " --vm-memory=INT Amount of RAM memory (in MB) to pass to a single VM." 55 echo " Default: 2048 MB" 56 echo " --vm-image=PATH OS image to use for running the VMs." 57 echo " Default: \$DEPENDENCY_DIR/spdk_test_image.qcow2" 58 echo " --vm-sar-enable Measure CPU utilization in guest VMs using sar." 59 echo " --host-sar-enable Measure CPU utilization on host using sar." 60 echo " --sar-delay=INT Wait for X seconds before starting SAR measurement. Default: 0." 61 echo " --sar-interval=INT Interval (seconds) argument for SAR. Default: 1s." 62 echo " --sar-count=INT Count argument for SAR. Default: 10." 63 echo " --bpf-traces=LIST Comma delimited list of .bt scripts for enabling BPF traces." 64 echo " List of .bt scripts available in scripts/bpf" 65 echo " --vm-throttle-iops=INT I/Os throttle rate in IOPS for each device on the VMs." 66 echo " --ctrl-type=TYPE Controller type to use for test:" 67 echo " spdk_vhost_scsi - use spdk vhost scsi" 68 echo " spdk_vhost_blk - use spdk vhost block" 69 echo " kernel_vhost - use kernel vhost scsi" 70 echo " Default: spdk_vhost_scsi" 71 echo " --packed-ring Use packed ring support. Requires Qemu 4.2.0 or greater. Default: disabled." 72 echo " --use-split Use split vbdevs instead of Logical Volumes" 73 echo " --limit-kernel-vhost=INT Limit kernel vhost to run only on a number of CPU cores." 74 echo " --run-precondition Precondition lvols after creating. Default: true." 75 echo " --precond-fio-bin FIO binary used for SPDK fio plugin precondition. Default: $CONFIG_FIO_SOURCE_DIR/fio." 76 echo " --custom-cpu-cfg=PATH Custom CPU config for test." 77 echo " Default: spdk/test/vhost/common/autotest.config" 78 echo " --disk-map Disk map for given test. Specify which disks to use, their SPDK name," 79 echo " how many times to split them and which VMs should be attached to created bdevs." 80 echo " Example:" 81 echo " NVME PCI BDF,Spdk Bdev Name,Split Count,VM List" 82 echo " 0000:1a:00.0,Nvme0,2,0 1" 83 echo " 0000:1b:00.0,Nvme1,2,2 3" 84 echo "-x set -x for script debug" 85 exit 0 86} 87 88function cleanup_lvol_cfg() { 89 notice "Removing lvol bdevs" 90 for lvol_bdev in "${lvol_bdevs[@]}"; do 91 $rpc_py bdev_lvol_delete $lvol_bdev 92 notice "lvol bdev $lvol_bdev removed" 93 done 94 95 notice "Removing lvol stores" 96 for lvol_store in "${lvol_stores[@]}"; do 97 $rpc_py bdev_lvol_delete_lvstore -u $lvol_store 98 notice "lvol store $lvol_store removed" 99 done 100} 101 102function cleanup_split_cfg() { 103 notice "Removing split vbdevs" 104 for disk in "${disk_cfg_spdk_names[@]}"; do 105 $rpc_py bdev_split_delete ${disk}n1 106 done 107} 108 109function cleanup_parted_config() { 110 notice "Removing parted disk configuration" 111 for disk in "${disk_cfg_kernel_names[@]}"; do 112 parted -s /dev/${disk}n1 rm 1 113 done 114} 115 116function cleanup_kernel_vhost() { 117 notice "Cleaning kernel vhost configration" 118 targetcli clearconfig confirm=True 119 cleanup_parted_config 120} 121 122function create_vm() { 123 vm_num=$1 124 setup_cmd="vm_setup --disk-type=$ctrl_type --force=$vm_num --memory=$vm_memory --os=$VM_IMAGE" 125 if [[ "$ctrl_type" == "kernel_vhost" ]]; then 126 x=$(printf %03d $vm_num) 127 setup_cmd+=" --disks=${wwpn_prefix}${x}" 128 else 129 setup_cmd+=" --disks=0" 130 fi 131 132 if $packed_ring; then 133 setup_cmd+=" --packed" 134 fi 135 136 $setup_cmd 137 used_vms+=" $vm_num" 138 echo "Added to used vms" 139 echo $used_vms 140} 141 142function create_spdk_controller() { 143 vm_num=$1 144 bdev=$2 145 146 if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then 147 $rpc_py vhost_create_scsi_controller naa.0.$vm_num 148 notice "Created vhost scsi controller naa.0.$vm_num" 149 $rpc_py vhost_scsi_controller_add_target naa.0.$vm_num 0 $bdev 150 notice "Added LUN 0/$bdev to controller naa.0.$vm_num" 151 elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then 152 if $packed_ring; then 153 p_opt="-p" 154 fi 155 156 $rpc_py vhost_create_blk_controller naa.0.$vm_num $bdev $p_opt 157 notice "Created vhost blk controller naa.0.$vm_num $bdev" 158 fi 159} 160 161while getopts 'xh-:' optchar; do 162 case "$optchar" in 163 -) 164 case "$OPTARG" in 165 help) usage $0 ;; 166 fio-bin=*) fio_bin="--fio-bin=${OPTARG#*=}" ;; 167 fio-jobs=*) fio_jobs="${OPTARG#*=}" ;; 168 fio-iterations=*) fio_iterations="${OPTARG#*=}" ;; 169 fio-gtod-reduce) fio_gtod="--gtod-reduce" ;; 170 vm-memory=*) vm_memory="${OPTARG#*=}" ;; 171 vm-image=*) VM_IMAGE="${OPTARG#*=}" ;; 172 vm-sar-enable) vm_sar_enable=true ;; 173 host-sar-enable) host_sar_enable=true ;; 174 sar-delay=*) sar_delay="${OPTARG#*=}" ;; 175 sar-interval=*) sar_interval="${OPTARG#*=}" ;; 176 sar-count=*) sar_count="${OPTARG#*=}" ;; 177 bpf-traces=*) IFS="," read -r -a bpf_traces <<< "${OPTARG#*=}" ;; 178 vm-throttle-iops=*) vm_throttle="${OPTARG#*=}" ;; 179 ctrl-type=*) ctrl_type="${OPTARG#*=}" ;; 180 packed-ring) packed_ring=true ;; 181 use-split) use_split=true ;; 182 run-precondition) run_precondition=true ;; 183 precond-fio-bin=*) precond_fio_bin="${OPTARG#*=}" ;; 184 limit-kernel-vhost=*) kernel_cpus="${OPTARG#*=}" ;; 185 custom-cpu-cfg=*) custom_cpu_cfg="${OPTARG#*=}" ;; 186 disk-map=*) disk_map="${OPTARG#*=}" ;; 187 *) usage $0 "Invalid argument '$OPTARG'" ;; 188 esac 189 ;; 190 h) usage $0 ;; 191 x) 192 set -x 193 x="-x" 194 ;; 195 *) usage $0 "Invalid argument '$OPTARG'" ;; 196 esac 197done 198 199rpc_py="$rootdir/scripts/rpc.py -s $(get_vhost_dir 0)/rpc.sock" 200 201if [[ -n $custom_cpu_cfg ]]; then 202 source $custom_cpu_cfg 203 vhost_reactor_mask="vhost_${vhost_num}_reactor_mask" 204 vhost_reactor_mask="${!vhost_reactor_mask}" 205 vhost_main_core="vhost_${vhost_num}_main_core" 206 vhost_main_core="${!vhost_main_core}" 207fi 208 209if [[ -z $fio_jobs ]]; then 210 error "No FIO job specified!" 211fi 212 213trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR 214 215if [[ -z $disk_map ]]; then 216 fail "No disk map provided for test. Exiting." 217fi 218 219# ===== Enable "performance" cpu governor ===== 220if hash cpupower; then 221 cpupower frequency-set -g performance 222else 223 echo "WARNING: Missing CPUPOWER!!! Please install." 224fi 225current_governor=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor) 226echo "INFO: Using $current_governor cpu governor for test." 227 228# ===== Precondition NVMes if specified ===== 229if [[ $run_precondition == true ]]; then 230 # Using the same precondition routine possible for lvols thanks 231 # to --clear-method option. Lvols should not UNMAP on creation. 232 json_cfg=$rootdir/nvme.json 233 $rootdir/scripts/gen_nvme.sh --json-with-subsystems > "$json_cfg" 234 mapfile -t nvmes < <(grep -oP "Nvme\d+" "$json_cfg") 235 fio_filename=$(printf ":%sn1" "${nvmes[@]}") 236 fio_filename=${fio_filename:1} 237 $precond_fio_bin --name="precondition" \ 238 --ioengine="${rootdir}/build/fio/spdk_bdev" \ 239 --rw="write" --spdk_json_conf="$json_cfg" --thread="1" \ 240 --group_reporting --direct="1" --size="100%" --loops="2" --bs="256k" \ 241 --iodepth=32 --filename="${fio_filename}" || true 242fi 243 244set +x 245readarray disk_cfg < $disk_map 246for line in "${disk_cfg[@]}"; do 247 echo $line 248 IFS="," 249 s=($line) 250 disk_cfg_bdfs+=(${s[0]}) 251 disk_cfg_spdk_names+=(${s[1]}) 252 disk_cfg_splits+=(${s[2]}) 253 disk_cfg_vms+=("${s[3]}") 254 255 # Find kernel nvme names 256 if [[ "$ctrl_type" == "kernel_vhost" ]]; then 257 tmp=$(find /sys/devices/pci* -name ${s[0]} -print0 | xargs sh -c 'ls $0/nvme') 258 disk_cfg_kernel_names+=($tmp) 259 IFS=" " 260 fi 261done 262unset IFS 263set -x 264 265if [[ "$ctrl_type" == "kernel_vhost" ]]; then 266 notice "Configuring kernel vhost..." 267 trap 'vm_kill_all; sleep 1; cleanup_kernel_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR 268 269 # Split disks using parted for kernel vhost 270 newline=$'\n' 271 backstores=() 272 for ((i = 0; i < ${#disk_cfg_kernel_names[@]}; i++)); do 273 nvme=${disk_cfg_kernel_names[$i]} 274 splits=${disk_cfg_splits[$i]} 275 notice " Creating extended partition on disk /dev/${nvme}n1" 276 parted -s /dev/${nvme}n1 mklabel msdos 277 parted -s /dev/${nvme}n1 mkpart extended 2048s 100% 278 279 part_size=$((100 / ${disk_cfg_splits[$i]})) # Split 100% of disk into roughly even parts 280 echo " Creating ${splits} partitions of relative disk size ${part_size}" 281 for p in $(seq 0 $((splits - 1))); do 282 p_start=$((p * part_size)) 283 p_end=$((p_start + part_size)) 284 parted -s /dev/${nvme}n1 mkpart logical ${p_start}% ${p_end}% 285 sleep 3 286 done 287 288 # Prepare kernel vhost configuration 289 # Below grep: match only NVMe partitions which are not "Extended" type. 290 # For example: will match nvme0n1p15 but not nvme0n1p1 291 partitions=$(find /dev -name "${nvme}n1*" | sort --version-sort | grep -P 'p(?!1$)\d+') 292 # Create block backstores for vhost kernel process 293 for p in $partitions; do 294 backstore_name=$(basename $p) 295 backstores+=("$backstore_name") 296 targetcli backstores/block create $backstore_name $p 297 done 298 partitions=($partitions) 299 300 # Create kernel vhost controllers and add LUNs 301 # Setup VM configurations 302 vms_to_run=(${disk_cfg_vms[i]}) 303 for ((j = 0; j < ${#vms_to_run[@]}; j++)); do 304 # WWPN prefix misses 3 characters. Need to complete it 305 # using block backstore number 306 x=$(printf %03d ${vms_to_run[$j]}) 307 wwpn="${wwpn_prefix}${x}" 308 targetcli vhost/ create $wwpn 309 targetcli vhost/$wwpn/tpg1/luns create /backstores/block/$(basename ${partitions[$j]}) 310 create_vm ${vms_to_run[j]} 311 sleep 1 312 done 313 done 314 targetcli ls 315else 316 notice "Configuring SPDK vhost..." 317 vhost_run -n "${vhost_num}" -g -a "-p ${vhost_main_core} -m ${vhost_reactor_mask}" 318 notice "..." 319 if [[ ${#bpf_traces[@]} -gt 0 ]]; then 320 notice "Enabling BPF traces: ${bpf_traces[*]}" 321 vhost_dir="$(get_vhost_dir 0)" 322 vhost_pid="$(cat $vhost_dir/vhost.pid)" 323 324 bpf_cmd=("$rootdir/scripts/bpftrace.sh") 325 bpf_cmd+=("$vhost_pid") 326 for trace in "${bpf_traces[@]}"; do 327 bpf_cmd+=("$rootdir/scripts/bpf/$trace") 328 done 329 330 BPF_OUTFILE="$VHOST_DIR/bpftraces.txt" "${bpf_cmd[@]}" & 331 bpf_script_pid=$! 332 333 # Wait a bit for trace capture to start 334 sleep 3 335 fi 336 337 if [[ $use_split == true ]]; then 338 notice "Configuring split bdevs configuration..." 339 trap 'cleanup_split_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR 340 for ((i = 0; i < ${#disk_cfg_bdfs[@]}; i++)); do 341 nvme_bdev=$($rpc_py bdev_nvme_attach_controller -b ${disk_cfg_spdk_names[$i]} -t pcie -a ${disk_cfg_bdfs[$i]}) 342 notice "Created NVMe Bdev: $nvme_bdev with BDF ${disk_cfg_bdfs[$i]}" 343 344 splits=$($rpc_py bdev_split_create $nvme_bdev ${disk_cfg_splits[$i]}) 345 splits=($splits) 346 notice "Created splits: ${splits[*]} on Bdev ${nvme_bdev}" 347 for s in "${splits[@]}"; do 348 split_bdevs+=($s) 349 done 350 351 vms_to_run=(${disk_cfg_vms[i]}) 352 for ((j = 0; j < ${#vms_to_run[@]}; j++)); do 353 notice "Setting up VM ${vms_to_run[j]}" 354 create_spdk_controller "${vms_to_run[j]}" ${splits[j]} 355 create_vm ${vms_to_run[j]} 356 done 357 echo " " 358 done 359 bdevs=("${split_bdevs[@]}") 360 else 361 notice "Configuring LVOLs..." 362 trap 'cleanup_lvol_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR 363 for ((i = 0; i < ${#disk_cfg_bdfs[@]}; i++)); do 364 nvme_bdev=$($rpc_py bdev_nvme_attach_controller -b ${disk_cfg_spdk_names[$i]} -t pcie -a ${disk_cfg_bdfs[$i]}) 365 notice "Created NVMe Bdev: $nvme_bdev with BDF ${disk_cfg_bdfs[$i]}" 366 367 ls_guid=$($rpc_py bdev_lvol_create_lvstore $nvme_bdev lvs_$i --clear-method none) 368 lvol_stores+=("$ls_guid") 369 notice "Created Lvol Store: $ls_guid on Bdev $nvme_bdev" 370 371 vms_to_run=(${disk_cfg_vms[i]}) 372 for ((j = 0; j < ${disk_cfg_splits[$i]}; j++)); do 373 free_mb=$(get_lvs_free_mb "$ls_guid") 374 size=$((free_mb / ((${disk_cfg_splits[$i]} - j)))) 375 lb_name=$($rpc_py bdev_lvol_create -u $ls_guid lbd_$j $size --clear-method none) 376 lvol_bdevs+=("$lb_name") 377 notice "Created LVOL Bdev $lb_name on Lvol Store $ls_guid on Bdev $nvme_bdev" 378 379 notice "Setting up VM ${vms_to_run[j]}" 380 create_spdk_controller "${vms_to_run[j]}" ${lb_name} 381 create_vm ${vms_to_run[j]} 382 done 383 echo " " 384 done 385 $rpc_py bdev_lvol_get_lvstores 386 fi 387 $rpc_py bdev_get_bdevs 388 $rpc_py vhost_get_controllers 389fi 390 391# Start VMs 392# Run VMs 393vm_run $used_vms 394vm_wait_for_boot 300 $used_vms 395 396if [[ -n "$kernel_cpus" ]]; then 397 echo "+cpuset" > /sys/fs/cgroup/cgroup.subtree_control 398 mkdir -p /sys/fs/cgroup/spdk 399 kernel_mask=$vhost_0_reactor_mask 400 kernel_mask=${kernel_mask#"["} 401 kernel_mask=${kernel_mask%"]"} 402 403 echo "threaded" > /sys/fs/cgroup/spdk/cgroup.type 404 echo "$kernel_mask" > /sys/fs/cgroup/spdk/cpuset.cpus 405 echo "0-1" > /sys/fs/cgroup/spdk/cpuset.mems 406 407 kernel_vhost_pids=$(pgrep "vhost" -U root) 408 for kpid in $kernel_vhost_pids; do 409 echo "Limiting kernel vhost pid ${kpid}" 410 echo "${kpid}" > /sys/fs/cgroup/spdk/cgroup.threads 411 done 412fi 413 414# Run FIO 415fio_disks="" 416for vm_num in $used_vms; do 417 host_name="VM-$vm_num" 418 vm_exec $vm_num "hostname $host_name" 419 vm_start_fio_server $fio_bin $vm_num 420 421 if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then 422 vm_check_scsi_location $vm_num 423 elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then 424 vm_check_blk_location $vm_num 425 elif [[ "$ctrl_type" == "kernel_vhost" ]]; then 426 vm_check_scsi_location $vm_num 427 fi 428 429 block=$(printf '%s' $SCSI_DISK) 430 vm_exec "$vm_num" "echo none > /sys/class/block/$block/queue/scheduler" 431 432 if [[ -n "$vm_throttle" ]]; then 433 # Check whether cgroups or cgroupsv2 is used on guest system 434 # Simple, naive & quick approach as it should do the trick for simple 435 # VMs used for performance tests 436 c_gr_ver=2 437 if vm_exec "$vm_num" "grep '^cgroup ' /proc/mounts"; then 438 c_gr_ver=1 439 fi 440 major_minor=$(vm_exec "$vm_num" "cat /sys/block/$block/dev") 441 442 if [[ $c_gr_ver == 1 ]]; then 443 vm_exec "$vm_num" "echo \"$major_minor $vm_throttle\" > /sys/fs/cgroup/blkio/blkio.throttle.read_iops_device" 444 vm_exec "$vm_num" "echo \"$major_minor $vm_throttle\" > /sys/fs/cgroup/blkio/blkio.throttle.write_iops_device" 445 elif [[ $c_gr_ver == 2 ]]; then 446 vm_exec "$vm_num" "echo '+io' > /sys/fs/cgroup/cgroup.subtree_control" 447 vm_exec "$vm_num" "echo \"$major_minor riops=$vm_throttle wiops=$vm_throttle\" > /sys/fs/cgroup/user.slice/io.max" 448 fi 449 fi 450 451 fio_disks+=" --vm=${vm_num}$(printf ':/dev/%s' $SCSI_DISK)" 452done 453 454# Run FIO traffic 455for fio_job in ${fio_jobs//,/ }; do 456 fio_job_fname=$(basename $fio_job) 457 fio_log_fname="${fio_job_fname%%.*}.log" 458 for i in $(seq 1 $fio_iterations); do 459 echo "Running FIO iteration $i for $fio_job_fname" 460 run_fio $fio_bin --hide-results --job-file="$fio_job" --out="$VHOST_DIR/fio_results" --json $fio_disks $fio_gtod & 461 fio_pid=$! 462 463 if $host_sar_enable || $vm_sar_enable; then 464 pids="" 465 mkdir -p $VHOST_DIR/fio_results/sar_stats 466 sleep $sar_delay 467 fi 468 469 if $host_sar_enable; then 470 sar -P ALL $sar_interval $sar_count > "$VHOST_DIR/fio_results/sar_stats/sar_stats_host.txt" & 471 pids+=" $!" 472 fi 473 474 if $vm_sar_enable; then 475 for vm_num in $used_vms; do 476 vm_exec "$vm_num" "mkdir -p /root/sar; sar -P ALL $sar_interval $sar_count >> /root/sar/sar_stats_VM${vm_num}_run${i}.txt" & 477 pids+=" $!" 478 done 479 fi 480 481 for j in $pids; do 482 wait $j 483 done 484 485 if $vm_sar_enable; then 486 for vm_num in $used_vms; do 487 vm_scp "$vm_num" "root@127.0.0.1:/root/sar/sar_stats_VM${vm_num}_run${i}.txt" "$VHOST_DIR/fio_results/sar_stats" 488 done 489 fi 490 491 wait $fio_pid 492 mv $VHOST_DIR/fio_results/$fio_log_fname $VHOST_DIR/fio_results/$fio_log_fname.$i 493 sleep 1 494 done 495 496 parse_fio_results "$VHOST_DIR/fio_results" "$fio_log_fname" 497done 498 499notice "Shutting down virtual machines..." 500vm_shutdown_all 501 502if [[ "$ctrl_type" == "kernel_vhost" ]]; then 503 cleanup_kernel_vhost || true 504else 505 notice "Shutting down SPDK vhost app..." 506 if [[ $use_split == true ]]; then 507 cleanup_split_cfg 508 else 509 cleanup_lvol_cfg 510 fi 511 vhost_kill "${vhost_num}" 512 513 if ((bpf_script_pid)); then 514 wait $bpf_script_pid 515 fi 516fi 517