1#!/usr/bin/env bash 2set -e 3 4# Dir variables and sourcing common files 5testdir=$(readlink -f $(dirname $0)) 6rootdir=$(readlink -f $testdir/../../..) 7plugin_dir=$rootdir/build/fio 8bdevperf_dir=$rootdir/test/bdev/bdevperf 9nvmeperf_dir=$rootdir/build/examples 10source $testdir/common.sh 11source $rootdir/scripts/common.sh || exit 1 12source $rootdir/test/common/autotest_common.sh 13 14# Global & default variables 15declare -A KERNEL_ENGINES 16KERNEL_ENGINES=( 17 ["kernel-libaio"]="--ioengine=libaio" 18 ["kernel-classic-polling"]="--ioengine=pvsync2 --hipri=100" 19 ["kernel-hybrid-polling"]="--ioengine=pvsync2 --hipri=100" 20 ["kernel-io-uring"]="--ioengine=io_uring") 21 22RW=randrw 23MIX=100 24IODEPTH=256 25BLK_SIZE=4096 26RUNTIME=600 27RAMP_TIME=30 28NUMJOBS=1 29REPEAT_NO=3 30GTOD_REDUCE=false 31SAMPLING_INT=0 32IO_BATCH_SUBMIT=0 33IO_BATCH_COMPLETE=0 34FIO_BIN=$CONFIG_FIO_SOURCE_DIR/fio 35TMP_RESULT_FILE=$testdir/result.json 36PLUGIN="nvme" 37DISKCFG="" 38BDEV_CACHE="" 39BDEV_POOL="" 40DISKNO="ALL" 41CPUS_ALLOWED=1 42NOIOSCALING=false 43PRECONDITIONING=true 44CPUFREQ="" 45PERFTOP=false 46DPDKMEM=false 47DATE="$(date +'%m_%d_%Y_%H%M%S')" 48 49function usage() { 50 set +x 51 [[ -n $2 ]] && ( 52 echo "$2" 53 echo "" 54 ) 55 echo "Run NVMe PMD/BDEV performance test. Change options for easier debug and setup configuration" 56 echo "Usage: $(basename $1) [options]" 57 echo "-h, --help Print help and exit" 58 echo 59 echo "Workload parameters:" 60 echo " --rw=STR Type of I/O pattern. Accepted values are randrw,rw. [default=$RW]" 61 echo " --rwmixread=INT Percentage of a mixed workload that should be reads. [default=$MIX]" 62 echo " --iodepth=INT Number of I/Os to keep in flight against the file. [default=$IODEPTH]" 63 echo " --block-size=INT The block size in bytes used for I/O units. [default=$BLK_SIZE]" 64 echo " --run-time=TIME[s] Tell fio to run the workload for the specified period of time. [default=$RUNTIME]" 65 echo " --ramp-time=TIME[s] Fio will run the specified workload for this amount of time before" 66 echo " logging any performance numbers. [default=$RAMP_TIME]. Applicable only for fio-based tests." 67 echo " --numjobs=INT Create the specified number of clones of this job. [default=$NUMJOBS]" 68 echo " Applicable only for fio-based tests." 69 echo " --repeat-no=INT How many times to repeat workload test. [default=$REPEAT_NO]" 70 echo " Test result will be an average of repeated test runs." 71 echo " --gtod-reduce Enable fio gtod_reduce option. [default=$GTOD_REDUCE]" 72 echo " --sampling-int=INT Value for fio log_avg_msec parameters [default=$SAMPLING_INT]" 73 echo " --io-batch-submit=INT Value for iodepth_batch_submit fio option [default=$IO_BATCH_SUBMIT]" 74 echo " --io-batch-complete=INT Value for iodepth_batch_complete fio option [default=$IO_BATCH_COMPLETE]" 75 echo " --fio-bin=PATH Path to fio binary. [default=$FIO_BIN]" 76 echo " Applicable only for fio-based tests." 77 echo 78 echo "Test setup parameters:" 79 echo " --driver=STR Selects tool used for testing. Choices available:" 80 echo " - spdk-perf-nvme (SPDK nvme perf)" 81 echo " - spdk-perf-bdev (SPDK bdev perf)" 82 echo " - spdk-plugin-nvme (SPDK nvme fio plugin)" 83 echo " - spdk-plugin-bdev (SPDK bdev fio plugin)" 84 echo " - kernel-classic-polling" 85 echo " - kernel-hybrid-polling" 86 echo " - kernel-libaio" 87 echo " - kernel-io-uring" 88 echo " --disk-config Configuration file containing PCI BDF addresses of NVMe disks to use in test." 89 echo " It consists a single column of PCI addresses. SPDK Bdev names will be assigned" 90 echo " and Kernel block device names detected." 91 echo " Lines starting with # are ignored as comments." 92 echo " --bdev-io-cache-size Set IO cache size for for SPDK bdev subsystem." 93 echo " --bdev-io-pool-size Set IO pool size for for SPDK bdev subsystem." 94 echo " --max-disk=INT,ALL Number of disks to test on, this will run multiple workloads with increasing number of disk each run." 95 echo " If =ALL then test on all found disk. [default=$DISKNO]" 96 echo " --cpu-allowed=INT/PATH Comma-separated list of CPU cores used to run the workload. Ranges allowed." 97 echo " Can also point to a file containing list of CPUs. [default=$CPUS_ALLOWED]" 98 echo " --no-preconditioning Skip preconditioning" 99 echo " --no-io-scaling Do not scale iodepth for each device in SPDK fio plugin. [default=$NOIOSCALING]" 100 echo " --cpu-frequency=INT Run tests with CPUs set to a desired frequency. 'intel_pstate=disable' must be set in" 101 echo " GRUB options. You can use 'cpupower frequency-info' and 'cpupower frequency-set' to" 102 echo " check list of available frequencies. Example: --cpu-frequency=1100000." 103 echo 104 echo "Other options:" 105 echo " --perftop Run perftop measurements on the same CPU cores as specified in --cpu-allowed option." 106 echo " --dpdk-mem-stats Dump DPDK memory stats during the test." 107 set -x 108} 109 110while getopts 'h-:' optchar; do 111 case "$optchar" in 112 -) 113 case "$OPTARG" in 114 help) 115 usage $0 116 exit 0 117 ;; 118 rw=*) RW="${OPTARG#*=}" ;; 119 rwmixread=*) MIX="${OPTARG#*=}" ;; 120 iodepth=*) IODEPTH="${OPTARG#*=}" ;; 121 block-size=*) BLK_SIZE="${OPTARG#*=}" ;; 122 run-time=*) RUNTIME="${OPTARG#*=}" ;; 123 ramp-time=*) RAMP_TIME="${OPTARG#*=}" ;; 124 numjobs=*) NUMJOBS="${OPTARG#*=}" ;; 125 repeat-no=*) REPEAT_NO="${OPTARG#*=}" ;; 126 gtod-reduce) GTOD_REDUCE=true ;; 127 sampling-int=*) SAMPLING_INT="${OPTARG#*=}" ;; 128 io-batch-submit=*) IO_BATCH_SUBMIT="${OPTARG#*=}" ;; 129 io-batch-complete=*) IO_BATCH_COMPLETE="${OPTARG#*=}" ;; 130 fio-bin=*) FIO_BIN="${OPTARG#*=}" ;; 131 driver=*) PLUGIN="${OPTARG#*=}" ;; 132 disk-config=*) 133 DISKCFG="${OPTARG#*=}" 134 if [[ ! -f "$DISKCFG" ]]; then 135 echo "Disk confiuration file $DISKCFG does not exist!" 136 exit 1 137 fi 138 ;; 139 bdev-io-cache-size=*) BDEV_CACHE="${OPTARG#*=}" ;; 140 bdev-io-pool-size=*) BDEV_POOL="${OPTARG#*=}" ;; 141 max-disk=*) DISKNO="${OPTARG#*=}" ;; 142 cpu-allowed=*) 143 CPUS_ALLOWED="${OPTARG#*=}" 144 if [[ -f "$CPUS_ALLOWED" ]]; then 145 CPUS_ALLOWED=$(cat "$CPUS_ALLOWED") 146 fi 147 ;; 148 no-preconditioning) PRECONDITIONING=false ;; 149 no-io-scaling) NOIOSCALING=true ;; 150 cpu-frequency=*) CPUFREQ="${OPTARG#*=}" ;; 151 perftop) PERFTOP=true ;; 152 dpdk-mem-stats) DPDKMEM=true ;; 153 *) 154 usage $0 echo "Invalid argument '$OPTARG'" 155 exit 1 156 ;; 157 esac 158 ;; 159 h) 160 usage $0 161 exit 0 162 ;; 163 *) 164 usage $0 "Invalid argument '$optchar'" 165 exit 1 166 ;; 167 esac 168done 169 170result_dir=$testdir/results/perf_results_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE} 171result_file=$result_dir/perf_results_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}.csv 172mkdir -p $result_dir 173unset iops_disks bw mean_lat_disks_usec p90_lat_disks_usec p99_lat_disks_usec p99_99_lat_disks_usec stdev_disks_usec 174echo "run-time,ramp-time,fio-plugin,QD,block-size,num-cpu-cores,workload,workload-mix" > $result_file 175printf "%s,%s,%s,%s,%s,%s,%s,%s\n" $RUNTIME $RAMP_TIME $PLUGIN $IODEPTH $BLK_SIZE $NO_CORES $RW $MIX >> $result_file 176echo "num_of_disks,iops,avg_lat[usec],p90[usec],p99[usec],p99.99[usec],stdev[usec],avg_slat[usec],avg_clat[usec],bw[Kib/s]" >> $result_file 177 178trap 'rm -f *.state $testdir/bdev.conf; kill $perf_pid; wait $dpdk_mem_pid; print_backtrace' ERR SIGTERM SIGABRT 179 180if [[ "$PLUGIN" =~ "bdev" ]]; then 181 create_spdk_bdev_conf "$BDEV_CACHE" "$BDEV_POOL" 182 echo "INFO: Generated bdev.conf file:" 183 cat $testdir/bdev.conf 184fi 185verify_disk_number 186DISK_NAMES=$(get_disks $PLUGIN) 187DISKS_NUMA=$(get_numa_node $PLUGIN "$DISK_NAMES") 188CORES=$(get_cores "$CPUS_ALLOWED") 189NO_CORES_ARRAY=($CORES) 190NO_CORES=${#NO_CORES_ARRAY[@]} 191 192if $PRECONDITIONING; then 193 preconditioning 194fi 195 196if [[ "$PLUGIN" =~ "kernel" ]]; then 197 $rootdir/scripts/setup.sh reset 198 fio_ioengine_opt="${KERNEL_ENGINES[$PLUGIN]}" 199 200 if [[ $PLUGIN = "kernel-classic-polling" ]]; then 201 for disk in $DISK_NAMES; do 202 echo -1 > /sys/block/$disk/queue/io_poll_delay 203 done 204 elif [[ $PLUGIN = "kernel-hybrid-polling" ]]; then 205 for disk in $DISK_NAMES; do 206 echo 0 > /sys/block/$disk/queue/io_poll_delay 207 done 208 elif [[ $PLUGIN = "kernel-io-uring" ]]; then 209 modprobe -rv nvme 210 modprobe nvme poll_queues=8 211 wait_for_nvme_reload $DISK_NAMES 212 213 backup_dir="/tmp/nvme_param_bak" 214 mkdir -p $backup_dir 215 216 for disk in $DISK_NAMES; do 217 echo "INFO: Backing up device parameters for $disk" 218 sysfs=/sys/block/$disk/queue 219 mkdir -p $backup_dir/$disk 220 cat $sysfs/iostats > $backup_dir/$disk/iostats 221 cat $sysfs/rq_affinity > $backup_dir/$disk/rq_affinity 222 cat $sysfs/nomerges > $backup_dir/$disk/nomerges 223 cat $sysfs/io_poll_delay > $backup_dir/$disk/io_poll_delay 224 done 225 226 for disk in $DISK_NAMES; do 227 echo "INFO: Setting device parameters for $disk" 228 sysfs=/sys/block/$disk/queue 229 echo 0 > $sysfs/iostats 230 echo 0 > $sysfs/rq_affinity 231 echo 2 > $sysfs/nomerges 232 echo -1 > $sysfs/io_poll_delay 233 done 234 fi 235fi 236 237if [[ -n "$CPUFREQ" ]]; then 238 if [[ ! "$(cat /proc/cmdline)" =~ "intel_pstate=disable" ]]; then 239 echo "ERROR: Cannot set custom CPU frequency for test. intel_pstate=disable not in boot options." 240 false 241 else 242 cpu_governor="$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)" 243 cpupower frequency-set -g userspace 244 cpupower frequency-set -f $CPUFREQ 245 fi 246fi 247 248if $PERFTOP; then 249 echo "INFO: starting perf record on cores $CPUS_ALLOWED" 250 perf record -C $CPUS_ALLOWED -o "$testdir/perf.data" & 251 perf_pid=$! 252fi 253 254if $DPDKMEM; then 255 echo "INFO: waiting to generate DPDK memory usage" 256 wait_time=$((RUNTIME / 2)) 257 if [[ ! "$PLUGIN" =~ "perf" ]]; then 258 wait_time=$((wait_time + RAMP_TIME)) 259 fi 260 ( 261 sleep $wait_time 262 echo "INFO: generating DPDK memory usage" 263 $rootdir/scripts/rpc.py env_dpdk_get_mem_stats 264 ) & 265 dpdk_mem_pid=$! 266fi 267 268iops_disks=0 269bw=0 270min_lat_disks_usec=0 271max_lat_disks_usec=0 272mean_lat_disks_usec=0 273p90_lat_disks_usec=0 274p99_lat_disks_usec=0 275p99_99_lat_disks_usec=0 276stdev_disks_usec=0 277mean_slat_disks_usec=0 278mean_clat_disks_usec=0 279#Run each workolad $REPEAT_NO times 280for ((j = 0; j < REPEAT_NO; j++)); do 281 if [ $PLUGIN = "spdk-perf-bdev" ]; then 282 run_bdevperf > $TMP_RESULT_FILE 283 read -r iops bandwidth <<< $(get_bdevperf_results) 284 iops_disks=$(bc "$iops_disks + $iops") 285 bw=$(bc "$bw + $bandwidth") 286 cp $TMP_RESULT_FILE $result_dir/perf_results_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.output 287 elif [ $PLUGIN = "spdk-perf-nvme" ]; then 288 run_nvmeperf $DISKNO > $TMP_RESULT_FILE 289 read -r iops bandwidth mean_lat min_lat max_lat <<< $(get_nvmeperf_results) 290 291 iops_disks=$(bc "$iops_disks+$iops") 292 bw=$(bc "$bw+$bandwidth") 293 mean_lat_disks_usec=$(bc "$mean_lat_disks_usec + $mean_lat") 294 min_lat_disks_usec=$(bc "$min_lat_disks_usec + $min_lat") 295 max_lat_disks_usec=$(bc "$max_lat_disks_usec + $max_lat") 296 297 cp $TMP_RESULT_FILE $result_dir/perf_results_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.output 298 else 299 create_fio_config $DISKNO $PLUGIN "$DISK_NAMES" "$DISKS_NUMA" "$CORES" 300 301 if [[ "$PLUGIN" =~ "spdk-plugin" ]]; then 302 run_spdk_nvme_fio $PLUGIN "--output=$TMP_RESULT_FILE" \ 303 "--write_lat_log=$result_dir/perf_lat_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}_${k}disks_${j}" 304 else 305 run_nvme_fio $fio_ioengine_opt "--output=$TMP_RESULT_FILE" \ 306 "--write_lat_log=$result_dir/perf_lat_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}_${k}disks_${j}" 307 fi 308 309 #Store values for every number of used disks 310 #Use recalculated value for mixread param in case rw mode is not rw. 311 rwmixread=$MIX 312 if [[ $RW = *"read"* ]]; then 313 rwmixread=100 314 elif [[ $RW = *"write"* ]]; then 315 rwmixread=0 316 fi 317 318 read -r iops bandwidth mean_lat_usec p90_lat_usec p99_lat_usec p99_99_lat_usec \ 319 stdev_usec mean_slat_usec mean_clat_usec <<< $(get_results $rwmixread) 320 iops_disks=$(bc "$iops_disks + $iops") 321 mean_lat_disks_usec=$(bc "$mean_lat_disks_usec + $mean_lat_usec") 322 p90_lat_disks_usec=$(bc "$p90_lat_disks_usec + $p90_lat_usec") 323 p99_lat_disks_usec=$(bc "$p99_lat_disks_usec + $p99_lat_usec") 324 p99_99_lat_disks_usec=$(bc "$p99_99_lat_disks_usec + $p99_99_lat_usec") 325 stdev_disks_usec=$(bc "$stdev_disks_usec + $stdev_usec") 326 mean_slat_disks_usec=$(bc "$mean_slat_disks_usec + $mean_slat_usec") 327 mean_clat_disks_usec=$(bc "$mean_clat_disks_usec + $mean_clat_usec") 328 bw=$(bc "$bw + $bandwidth") 329 330 cp $TMP_RESULT_FILE $result_dir/perf_results_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.json 331 cp $testdir/config.fio $result_dir/config_${MIX}_${PLUGIN}_${NO_CORES}cpus_${DATE}_${k}_disks_${j}.fio 332 rm -f $testdir/config.fio 333 fi 334done 335 336if $PERFTOP; then 337 echo "INFO: Stopping perftop measurements." 338 kill $perf_pid 339 wait $perf_pid || true 340 perf report -i "$testdir/perf.data" > $result_dir/perftop_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}.txt 341 rm -f "$testdir/perf.data" 342fi 343 344if $DPDKMEM; then 345 mv "/tmp/spdk_mem_dump.txt" $result_dir/spdk_mem_dump_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${DATE}.txt 346 echo "INFO: DPDK memory usage saved in $result_dir" 347fi 348 349#Write results to csv file 350iops_disks=$(bc "$iops_disks / $REPEAT_NO") 351bw=$(bc "$bw / $REPEAT_NO") 352if [[ "$PLUGIN" =~ "plugin" ]] || [[ "$PLUGIN" =~ "kernel" ]]; then 353 mean_lat_disks_usec=$(bc "$mean_lat_disks_usec / $REPEAT_NO") 354 p90_lat_disks_usec=$(bc "$p90_lat_disks_usec / $REPEAT_NO") 355 p99_lat_disks_usec=$(bc "$p99_lat_disks_usec / $REPEAT_NO") 356 p99_99_lat_disks_usec=$(bc "$p99_99_lat_disks_usec / $REPEAT_NO") 357 stdev_disks_usec=$(bc "$stdev_disks_usec / $REPEAT_NO") 358 mean_slat_disks_usec=$(bc "$mean_slat_disks_usec / $REPEAT_NO") 359 mean_clat_disks_usec=$(bc "$mean_clat_disks_usec / $REPEAT_NO") 360elif [[ "$PLUGIN" == "spdk-perf-nvme" ]]; then 361 mean_lat_disks_usec=$(bc "$mean_lat_disks_usec/$REPEAT_NO") 362fi 363 364printf "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" ${DISKNO} ${iops_disks} ${mean_lat_disks_usec} ${p90_lat_disks_usec} ${p99_lat_disks_usec} \ 365 ${p99_99_lat_disks_usec} ${stdev_disks_usec} ${mean_slat_disks_usec} ${mean_clat_disks_usec} ${bw} >> $result_file 366 367if [[ -n "$CPUFREQ" ]]; then 368 cpupower frequency-set -g $cpu_governor 369fi 370 371if [ $PLUGIN = "kernel-io-uring" ]; then 372 # Reload the nvme driver so that other test runs are not affected 373 modprobe -rv nvme 374 modprobe nvme 375 wait_for_nvme_reload $DISK_NAMES 376 377 for disk in $DISK_NAMES; do 378 echo "INFO: Restoring device parameters for $disk" 379 sysfs=/sys/block/$disk/queue 380 cat $backup_dir/$disk/iostats > $sysfs/iostats 381 cat $backup_dir/$disk/rq_affinity > $sysfs/rq_affinity 382 cat $backup_dir/$disk/nomerges > $sysfs/nomerges 383 cat $backup_dir/$disk/io_poll_delay > $sysfs/io_poll_delay 384 done 385fi 386rm -f $testdir/bdev.conf $testdir/config.fio 387