xref: /spdk/test/vhost/perf_bench/vhost_perf.sh (revision cc6920a4763d4b9a43aa40583c8397d8f14fa100)
1#!/usr/bin/env bash
2
3testdir=$(readlink -f $(dirname $0))
4rootdir=$(readlink -f $testdir/../../..)
5source $rootdir/test/common/autotest_common.sh
6source $rootdir/test/vhost/common.sh
7
8vhost_num="0"
9vm_memory=2048
10vm_sar_enable=false
11host_sar_enable=false
12sar_delay="0"
13sar_interval="1"
14sar_count="10"
15vm_throttle=""
16bpf_traces=()
17ctrl_type="spdk_vhost_scsi"
18use_split=false
19kernel_cpus=""
20run_precondition=false
21lvol_stores=()
22lvol_bdevs=()
23split_bdevs=()
24used_vms=""
25wwpn_prefix="naa.5001405bc6498"
26packed_ring=false
27
28fio_iterations=1
29fio_gtod=""
30precond_fio_bin=$CONFIG_FIO_SOURCE_DIR/fio
31disk_map=""
32
33disk_cfg_bdfs=()
34disk_cfg_spdk_names=()
35disk_cfg_splits=()
36disk_cfg_vms=()
37disk_cfg_kernel_names=()
38
39function usage() {
40	[[ -n $2 ]] && (
41		echo "$2"
42		echo ""
43	)
44	echo "Shortcut script for doing automated test"
45	echo "Usage: $(basename $1) [OPTIONS]"
46	echo
47	echo "-h, --help                  Print help and exit"
48	echo "    --fio-bin=PATH          Path to FIO binary on host.;"
49	echo "                            Binary will be copied to VM, static compilation"
50	echo "                            of binary is recommended."
51	echo "    --fio-jobs=PATH         Comma separated list of fio config files to use for test."
52	echo "    --fio-iterations=INT    Number of times to run specified workload."
53	echo "    --fio-gtod-reduce       Enable fio gtod_reduce option in test."
54	echo "    --vm-memory=INT         Amount of RAM memory (in MB) to pass to a single VM."
55	echo "                            Default: 2048 MB"
56	echo "    --vm-image=PATH         OS image to use for running the VMs."
57	echo "                            Default: \$DEPENDENCY_DIR/spdk_test_image.qcow2"
58	echo "    --vm-sar-enable         Measure CPU utilization in guest VMs using sar."
59	echo "    --host-sar-enable       Measure CPU utilization on host using sar."
60	echo "    --sar-delay=INT         Wait for X seconds before starting SAR measurement. Default: 0."
61	echo "    --sar-interval=INT      Interval (seconds) argument for SAR. Default: 1s."
62	echo "    --sar-count=INT         Count argument for SAR. Default: 10."
63	echo "    --bpf-traces=LIST       Comma delimited list of .bt scripts for enabling BPF traces."
64	echo "                            List of .bt scripts available in scripts/bpf"
65	echo "    --vm-throttle-iops=INT  I/Os throttle rate in IOPS for each device on the VMs."
66	echo "    --ctrl-type=TYPE        Controller type to use for test:"
67	echo "                            spdk_vhost_scsi - use spdk vhost scsi"
68	echo "                            spdk_vhost_blk - use spdk vhost block"
69	echo "                            kernel_vhost - use kernel vhost scsi"
70	echo "                            Default: spdk_vhost_scsi"
71	echo "    --packed-ring           Use packed ring support. Requires Qemu 4.2.0 or greater. Default: disabled."
72	echo "    --use-split             Use split vbdevs instead of Logical Volumes"
73	echo "    --limit-kernel-vhost=INT  Limit kernel vhost to run only on a number of CPU cores."
74	echo "    --run-precondition      Precondition lvols after creating. Default: true."
75	echo "    --precond-fio-bin       FIO binary used for SPDK fio plugin precondition. Default: $CONFIG_FIO_SOURCE_DIR/fio."
76	echo "    --custom-cpu-cfg=PATH   Custom CPU config for test."
77	echo "                            Default: spdk/test/vhost/common/autotest.config"
78	echo "    --disk-map              Disk map for given test. Specify which disks to use, their SPDK name,"
79	echo "                            how many times to split them and which VMs should be attached to created bdevs."
80	echo "                            Example:"
81	echo "                            NVME PCI BDF,Spdk Bdev Name,Split Count,VM List"
82	echo "                            0000:1a:00.0,Nvme0,2,0 1"
83	echo "                            0000:1b:00.0,Nvme1,2,2 3"
84	echo "-x                          set -x for script debug"
85	exit 0
86}
87
88function cleanup_lvol_cfg() {
89	notice "Removing lvol bdevs"
90	for lvol_bdev in "${lvol_bdevs[@]}"; do
91		$rpc_py bdev_lvol_delete $lvol_bdev
92		notice "lvol bdev $lvol_bdev removed"
93	done
94
95	notice "Removing lvol stores"
96	for lvol_store in "${lvol_stores[@]}"; do
97		$rpc_py bdev_lvol_delete_lvstore -u $lvol_store
98		notice "lvol store $lvol_store removed"
99	done
100}
101
102function cleanup_split_cfg() {
103	notice "Removing split vbdevs"
104	for disk in "${disk_cfg_spdk_names[@]}"; do
105		$rpc_py bdev_split_delete ${disk}n1
106	done
107}
108
109function cleanup_parted_config() {
110	notice "Removing parted disk configuration"
111	for disk in "${disk_cfg_kernel_names[@]}"; do
112		parted -s /dev/${disk}n1 rm 1
113	done
114}
115
116function cleanup_kernel_vhost() {
117	notice "Cleaning kernel vhost configration"
118	targetcli clearconfig confirm=True
119	cleanup_parted_config
120}
121
122function create_vm() {
123	vm_num=$1
124	setup_cmd="vm_setup --disk-type=$ctrl_type --force=$vm_num --memory=$vm_memory --os=$VM_IMAGE"
125	if [[ "$ctrl_type" == "kernel_vhost" ]]; then
126		x=$(printf %03d $vm_num)
127		setup_cmd+=" --disks=${wwpn_prefix}${x}"
128	else
129		setup_cmd+=" --disks=0"
130	fi
131
132	if $packed_ring; then
133		setup_cmd+=" --packed"
134	fi
135
136	$setup_cmd
137	used_vms+=" $vm_num"
138	echo "Added to used vms"
139	echo $used_vms
140}
141
142function create_spdk_controller() {
143	vm_num=$1
144	bdev=$2
145
146	if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then
147		$rpc_py vhost_create_scsi_controller naa.0.$vm_num
148		notice "Created vhost scsi controller naa.0.$vm_num"
149		$rpc_py vhost_scsi_controller_add_target naa.0.$vm_num 0 $bdev
150		notice "Added LUN 0/$bdev to controller naa.0.$vm_num"
151	elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then
152		if $packed_ring; then
153			p_opt="-p"
154		fi
155
156		$rpc_py vhost_create_blk_controller naa.0.$vm_num $bdev $p_opt
157		notice "Created vhost blk controller naa.0.$vm_num $bdev"
158	fi
159}
160
161while getopts 'xh-:' optchar; do
162	case "$optchar" in
163		-)
164			case "$OPTARG" in
165				help) usage $0 ;;
166				fio-bin=*) fio_bin="--fio-bin=${OPTARG#*=}" ;;
167				fio-jobs=*) fio_jobs="${OPTARG#*=}" ;;
168				fio-iterations=*) fio_iterations="${OPTARG#*=}" ;;
169				fio-gtod-reduce) fio_gtod="--gtod-reduce" ;;
170				vm-memory=*) vm_memory="${OPTARG#*=}" ;;
171				vm-image=*) VM_IMAGE="${OPTARG#*=}" ;;
172				vm-sar-enable) vm_sar_enable=true ;;
173				host-sar-enable) host_sar_enable=true ;;
174				sar-delay=*) sar_delay="${OPTARG#*=}" ;;
175				sar-interval=*) sar_interval="${OPTARG#*=}" ;;
176				sar-count=*) sar_count="${OPTARG#*=}" ;;
177				bpf-traces=*) IFS="," read -r -a bpf_traces <<< "${OPTARG#*=}" ;;
178				vm-throttle-iops=*) vm_throttle="${OPTARG#*=}" ;;
179				ctrl-type=*) ctrl_type="${OPTARG#*=}" ;;
180				packed-ring) packed_ring=true ;;
181				use-split) use_split=true ;;
182				run-precondition) run_precondition=true ;;
183				precond-fio-bin=*) precond_fio_bin="${OPTARG#*=}" ;;
184				limit-kernel-vhost=*) kernel_cpus="${OPTARG#*=}" ;;
185				custom-cpu-cfg=*) custom_cpu_cfg="${OPTARG#*=}" ;;
186				disk-map=*) disk_map="${OPTARG#*=}" ;;
187				*) usage $0 "Invalid argument '$OPTARG'" ;;
188			esac
189			;;
190		h) usage $0 ;;
191		x)
192			set -x
193			x="-x"
194			;;
195		*) usage $0 "Invalid argument '$OPTARG'" ;;
196	esac
197done
198
199rpc_py="$rootdir/scripts/rpc.py -s $(get_vhost_dir 0)/rpc.sock"
200
201if [[ -n $custom_cpu_cfg ]]; then
202	source $custom_cpu_cfg
203	vhost_reactor_mask="vhost_${vhost_num}_reactor_mask"
204	vhost_reactor_mask="${!vhost_reactor_mask}"
205	vhost_main_core="vhost_${vhost_num}_main_core"
206	vhost_main_core="${!vhost_main_core}"
207fi
208
209if [[ -z $fio_jobs ]]; then
210	error "No FIO job specified!"
211fi
212
213trap 'error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
214
215if [[ -z $disk_map ]]; then
216	fail "No disk map provided for test. Exiting."
217fi
218
219# ===== Enable "performance" cpu governor =====
220if hash cpupower; then
221	cpupower frequency-set -g performance
222else
223	echo "WARNING: Missing CPUPOWER!!! Please install."
224fi
225current_governor=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)
226echo "INFO: Using $current_governor cpu governor for test."
227
228# ===== Precondition NVMes if specified =====
229if [[ $run_precondition == true ]]; then
230	# Using the same precondition routine possible for lvols thanks
231	# to --clear-method option. Lvols should not UNMAP on creation.
232	json_cfg=$rootdir/nvme.json
233	$rootdir/scripts/gen_nvme.sh --json-with-subsystems > "$json_cfg"
234	mapfile -t nvmes < <(grep -oP "Nvme\d+" "$json_cfg")
235	fio_filename=$(printf ":%sn1" "${nvmes[@]}")
236	fio_filename=${fio_filename:1}
237	$precond_fio_bin --name="precondition" \
238		--ioengine="${rootdir}/build/fio/spdk_bdev" \
239		--rw="write" --spdk_json_conf="$json_cfg" --thread="1" \
240		--group_reporting --direct="1" --size="100%" --loops="2" --bs="256k" \
241		--iodepth=32 --filename="${fio_filename}" || true
242fi
243
244set +x
245readarray disk_cfg < $disk_map
246for line in "${disk_cfg[@]}"; do
247	echo $line
248	IFS=","
249	s=($line)
250	disk_cfg_bdfs+=(${s[0]})
251	disk_cfg_spdk_names+=(${s[1]})
252	disk_cfg_splits+=(${s[2]})
253	disk_cfg_vms+=("${s[3]}")
254
255	# Find kernel nvme names
256	if [[ "$ctrl_type" == "kernel_vhost" ]]; then
257		tmp=$(find /sys/devices/pci* -name ${s[0]} -print0 | xargs sh -c 'ls $0/nvme')
258		disk_cfg_kernel_names+=($tmp)
259		IFS=" "
260	fi
261done
262unset IFS
263set -x
264
265if [[ "$ctrl_type" == "kernel_vhost" ]]; then
266	notice "Configuring kernel vhost..."
267	trap 'vm_kill_all; sleep 1; cleanup_kernel_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
268
269	# Split disks using parted for kernel vhost
270	newline=$'\n'
271	backstores=()
272	for ((i = 0; i < ${#disk_cfg_kernel_names[@]}; i++)); do
273		nvme=${disk_cfg_kernel_names[$i]}
274		splits=${disk_cfg_splits[$i]}
275		notice "  Creating extended partition on disk /dev/${nvme}n1"
276		parted -s /dev/${nvme}n1 mklabel msdos
277		parted -s /dev/${nvme}n1 mkpart extended 2048s 100%
278
279		part_size=$((100 / ${disk_cfg_splits[$i]})) # Split 100% of disk into roughly even parts
280		echo "  Creating  ${splits} partitions of relative disk size ${part_size}"
281		for p in $(seq 0 $((splits - 1))); do
282			p_start=$((p * part_size))
283			p_end=$((p_start + part_size))
284			parted -s /dev/${nvme}n1 mkpart logical ${p_start}% ${p_end}%
285			sleep 3
286		done
287
288		# Prepare kernel vhost configuration
289		# Below grep: match only NVMe partitions which are not "Extended" type.
290		# For example: will match nvme0n1p15 but not nvme0n1p1
291		partitions=$(find /dev -name "${nvme}n1*" | sort --version-sort | grep -P 'p(?!1$)\d+')
292		# Create block backstores for vhost kernel process
293		for p in $partitions; do
294			backstore_name=$(basename $p)
295			backstores+=("$backstore_name")
296			targetcli backstores/block create $backstore_name $p
297		done
298		partitions=($partitions)
299
300		# Create kernel vhost controllers and add LUNs
301		# Setup VM configurations
302		vms_to_run=(${disk_cfg_vms[i]})
303		for ((j = 0; j < ${#vms_to_run[@]}; j++)); do
304			# WWPN prefix misses 3 characters. Need to complete it
305			# using block backstore number
306			x=$(printf %03d ${vms_to_run[$j]})
307			wwpn="${wwpn_prefix}${x}"
308			targetcli vhost/ create $wwpn
309			targetcli vhost/$wwpn/tpg1/luns create /backstores/block/$(basename ${partitions[$j]})
310			create_vm ${vms_to_run[j]}
311			sleep 1
312		done
313	done
314	targetcli ls
315else
316	notice "Configuring SPDK vhost..."
317	vhost_run -n "${vhost_num}" -g -a "-p ${vhost_main_core} -m ${vhost_reactor_mask}"
318	notice "..."
319	if [[ ${#bpf_traces[@]} -gt 0 ]]; then
320		notice "Enabling BPF traces: ${bpf_traces[*]}"
321		vhost_dir="$(get_vhost_dir 0)"
322		vhost_pid="$(cat $vhost_dir/vhost.pid)"
323
324		bpf_cmd=("$rootdir/scripts/bpftrace.sh")
325		bpf_cmd+=("$vhost_pid")
326		for trace in "${bpf_traces[@]}"; do
327			bpf_cmd+=("$rootdir/scripts/bpf/$trace")
328		done
329
330		BPF_OUTFILE="$VHOST_DIR/bpftraces.txt" "${bpf_cmd[@]}" &
331		bpf_script_pid=$!
332
333		# Wait a bit for trace capture to start
334		sleep 3
335	fi
336
337	if [[ $use_split == true ]]; then
338		notice "Configuring split bdevs configuration..."
339		trap 'cleanup_split_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
340		for ((i = 0; i < ${#disk_cfg_bdfs[@]}; i++)); do
341			nvme_bdev=$($rpc_py bdev_nvme_attach_controller -b ${disk_cfg_spdk_names[$i]} -t pcie -a ${disk_cfg_bdfs[$i]})
342			notice "Created NVMe Bdev: $nvme_bdev with BDF ${disk_cfg_bdfs[$i]}"
343
344			splits=$($rpc_py bdev_split_create $nvme_bdev ${disk_cfg_splits[$i]})
345			splits=($splits)
346			notice "Created splits: ${splits[*]} on Bdev ${nvme_bdev}"
347			for s in "${splits[@]}"; do
348				split_bdevs+=($s)
349			done
350
351			vms_to_run=(${disk_cfg_vms[i]})
352			for ((j = 0; j < ${#vms_to_run[@]}; j++)); do
353				notice "Setting up VM ${vms_to_run[j]}"
354				create_spdk_controller "${vms_to_run[j]}" ${splits[j]}
355				create_vm ${vms_to_run[j]}
356			done
357			echo " "
358		done
359		bdevs=("${split_bdevs[@]}")
360	else
361		notice "Configuring LVOLs..."
362		trap 'cleanup_lvol_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
363		for ((i = 0; i < ${#disk_cfg_bdfs[@]}; i++)); do
364			nvme_bdev=$($rpc_py bdev_nvme_attach_controller -b ${disk_cfg_spdk_names[$i]} -t pcie -a ${disk_cfg_bdfs[$i]})
365			notice "Created NVMe Bdev: $nvme_bdev with BDF ${disk_cfg_bdfs[$i]}"
366
367			ls_guid=$($rpc_py bdev_lvol_create_lvstore $nvme_bdev lvs_$i --clear-method none)
368			lvol_stores+=("$ls_guid")
369			notice "Created Lvol Store: $ls_guid on Bdev $nvme_bdev"
370
371			vms_to_run=(${disk_cfg_vms[i]})
372			for ((j = 0; j < ${disk_cfg_splits[$i]}; j++)); do
373				free_mb=$(get_lvs_free_mb "$ls_guid")
374				size=$((free_mb / ((${disk_cfg_splits[$i]} - j))))
375				lb_name=$($rpc_py bdev_lvol_create -u $ls_guid lbd_$j $size --clear-method none)
376				lvol_bdevs+=("$lb_name")
377				notice "Created LVOL Bdev $lb_name on Lvol Store $ls_guid on Bdev $nvme_bdev"
378
379				notice "Setting up VM ${vms_to_run[j]}"
380				create_spdk_controller "${vms_to_run[j]}" ${lb_name}
381				create_vm ${vms_to_run[j]}
382			done
383			echo " "
384		done
385		$rpc_py bdev_lvol_get_lvstores
386	fi
387	$rpc_py bdev_get_bdevs
388	$rpc_py vhost_get_controllers
389fi
390
391# Start VMs
392# Run VMs
393vm_run $used_vms
394vm_wait_for_boot 300 $used_vms
395
396if [[ -n "$kernel_cpus" ]]; then
397	echo "+cpuset" > /sys/fs/cgroup/cgroup.subtree_control
398	mkdir -p /sys/fs/cgroup/spdk
399	kernel_mask=$vhost_0_reactor_mask
400	kernel_mask=${kernel_mask#"["}
401	kernel_mask=${kernel_mask%"]"}
402
403	echo "threaded" > /sys/fs/cgroup/spdk/cgroup.type
404	echo "$kernel_mask" > /sys/fs/cgroup/spdk/cpuset.cpus
405	echo "0-1" > /sys/fs/cgroup/spdk/cpuset.mems
406
407	kernel_vhost_pids=$(pgrep "vhost" -U root)
408	for kpid in $kernel_vhost_pids; do
409		echo "Limiting kernel vhost pid ${kpid}"
410		echo "${kpid}" > /sys/fs/cgroup/spdk/cgroup.threads
411	done
412fi
413
414# Run FIO
415fio_disks=""
416for vm_num in $used_vms; do
417	host_name="VM-$vm_num"
418	vm_exec $vm_num "hostname $host_name"
419	vm_start_fio_server $fio_bin $vm_num
420
421	if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then
422		vm_check_scsi_location $vm_num
423	elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then
424		vm_check_blk_location $vm_num
425	elif [[ "$ctrl_type" == "kernel_vhost" ]]; then
426		vm_check_scsi_location $vm_num
427	fi
428
429	block=$(printf '%s' $SCSI_DISK)
430	vm_exec "$vm_num" "echo none > /sys/class/block/$block/queue/scheduler"
431
432	if [[ -n "$vm_throttle" ]]; then
433		# Check whether cgroups or cgroupsv2 is used on guest system
434		# Simple, naive & quick approach as it should do the trick for simple
435		# VMs used for performance tests
436		c_gr_ver=2
437		if vm_exec "$vm_num" "grep '^cgroup ' /proc/mounts"; then
438			c_gr_ver=1
439		fi
440		major_minor=$(vm_exec "$vm_num" "cat /sys/block/$block/dev")
441
442		if [[ $c_gr_ver == 1 ]]; then
443			vm_exec "$vm_num" "echo \"$major_minor $vm_throttle\" > /sys/fs/cgroup/blkio/blkio.throttle.read_iops_device"
444			vm_exec "$vm_num" "echo \"$major_minor $vm_throttle\" > /sys/fs/cgroup/blkio/blkio.throttle.write_iops_device"
445		elif [[ $c_gr_ver == 2 ]]; then
446			vm_exec "$vm_num" "echo '+io' > /sys/fs/cgroup/cgroup.subtree_control"
447			vm_exec "$vm_num" "echo \"$major_minor riops=$vm_throttle wiops=$vm_throttle\" > /sys/fs/cgroup/user.slice/io.max"
448		fi
449	fi
450
451	fio_disks+=" --vm=${vm_num}$(printf ':/dev/%s' $SCSI_DISK)"
452done
453
454# Run FIO traffic
455for fio_job in ${fio_jobs//,/ }; do
456	fio_job_fname=$(basename $fio_job)
457	fio_log_fname="${fio_job_fname%%.*}.log"
458	for i in $(seq 1 $fio_iterations); do
459		echo "Running FIO iteration $i for $fio_job_fname"
460		run_fio $fio_bin --hide-results --job-file="$fio_job" --out="$VHOST_DIR/fio_results" --json $fio_disks $fio_gtod &
461		fio_pid=$!
462
463		if $host_sar_enable || $vm_sar_enable; then
464			pids=""
465			mkdir -p $VHOST_DIR/fio_results/sar_stats
466			sleep $sar_delay
467		fi
468
469		if $host_sar_enable; then
470			sar -P ALL $sar_interval $sar_count > "$VHOST_DIR/fio_results/sar_stats/sar_stats_host.txt" &
471			pids+=" $!"
472		fi
473
474		if $vm_sar_enable; then
475			for vm_num in $used_vms; do
476				vm_exec "$vm_num" "mkdir -p /root/sar; sar -P ALL $sar_interval $sar_count >> /root/sar/sar_stats_VM${vm_num}_run${i}.txt" &
477				pids+=" $!"
478			done
479		fi
480
481		for j in $pids; do
482			wait $j
483		done
484
485		if $vm_sar_enable; then
486			for vm_num in $used_vms; do
487				vm_scp "$vm_num" "root@127.0.0.1:/root/sar/sar_stats_VM${vm_num}_run${i}.txt" "$VHOST_DIR/fio_results/sar_stats"
488			done
489		fi
490
491		wait $fio_pid
492		mv $VHOST_DIR/fio_results/$fio_log_fname $VHOST_DIR/fio_results/$fio_log_fname.$i
493		sleep 1
494	done
495
496	parse_fio_results "$VHOST_DIR/fio_results" "$fio_log_fname"
497done
498
499notice "Shutting down virtual machines..."
500vm_shutdown_all
501
502if [[ "$ctrl_type" == "kernel_vhost" ]]; then
503	cleanup_kernel_vhost || true
504else
505	notice "Shutting down SPDK vhost app..."
506	if [[ $use_split == true ]]; then
507		cleanup_split_cfg
508	else
509		cleanup_lvol_cfg
510	fi
511	vhost_kill "${vhost_num}"
512
513	if ((bpf_script_pid)); then
514		wait $bpf_script_pid
515	fi
516fi
517