xref: /spdk/test/vhost/common.sh (revision c6c1234de9e0015e670dd0b51bf6ce39ee0e07bd)
1#  SPDX-License-Identifier: BSD-3-Clause
2#  Copyright (C) 2017 Intel Corporation
3#  All rights reserved.
4#
5
6: ${SPDK_VHOST_VERBOSE=false}
7: ${VHOST_DIR="$HOME/vhost_test"}
8: ${QEMU_BIN:="qemu-system-x86_64"}
9: ${QEMU_IMG_BIN="qemu-img"}
10
11TEST_DIR=$(readlink -f $rootdir/..)
12VM_DIR=$VHOST_DIR/vms
13TARGET_DIR=$VHOST_DIR/vhost
14VM_PASSWORD="root"
15
16VM_IMAGE=${VM_IMAGE:-"$DEPENDENCY_DIR/vhost/spdk_test_image.qcow2"}
17FIO_BIN=${FIO_BIN:-}
18
19WORKDIR=$(readlink -f "$(dirname "$0")")
20
21if ! hash $QEMU_IMG_BIN $QEMU_BIN; then
22	echo 'ERROR: QEMU is not installed on this system. Unable to run vhost tests.' >&2
23	return 1
24fi
25
26mkdir -p $VHOST_DIR
27mkdir -p $VM_DIR
28mkdir -p $TARGET_DIR
29
30#
31# Source config describing QEMU and VHOST cores and NUMA
32#
33source $rootdir/test/vhost/common/autotest.config
34source "$rootdir/test/scheduler/common.sh"
35
36function vhosttestinit() {
37	if [ "$TEST_MODE" == "iso" ]; then
38		$rootdir/scripts/setup.sh
39	fi
40
41	if [[ -e $VM_IMAGE.gz && ! -e $VM_IMAGE ]]; then
42		gzip -dc "$VM_IMAGE.gz" > "$VM_IMAGE"
43	fi
44
45	# Look for the VM image
46	if [[ ! -f $VM_IMAGE ]]; then
47		[[ $1 != "--no_vm" ]] || return 0
48		echo "$VM_IMAGE is missing" >&2
49		return 1
50	fi
51}
52
53function vhosttestfini() {
54	if [ "$TEST_MODE" == "iso" ]; then
55		$rootdir/scripts/setup.sh reset
56	fi
57}
58
59function message() {
60	local verbose_out
61	if ! $SPDK_VHOST_VERBOSE; then
62		verbose_out=""
63	elif [[ ${FUNCNAME[2]} == "source" ]]; then
64		verbose_out=" (file $(basename ${BASH_SOURCE[1]}):${BASH_LINENO[1]})"
65	else
66		verbose_out=" (function ${FUNCNAME[2]}:${BASH_LINENO[1]})"
67	fi
68
69	local msg_type="$1"
70	shift
71	echo -e "${msg_type}${verbose_out}: $*"
72}
73
74function fail() {
75	echo "===========" >&2
76	message "FAIL" "$@" >&2
77	echo "===========" >&2
78	exit 1
79}
80
81function error() {
82	echo "===========" >&2
83	message "ERROR" "$@" >&2
84	echo "===========" >&2
85	# Don't 'return 1' since the stack trace will be incomplete (why?) missing upper command.
86	false
87}
88
89function warning() {
90	message "WARN" "$@" >&2
91}
92
93function notice() {
94	message "INFO" "$@"
95}
96
97function check_qemu_packedring_support() {
98	qemu_version=$($QEMU_BIN -version | grep -Po "(?<=version )\d+.\d+.\d+")
99	if [[ "$qemu_version" < "4.2.0" ]]; then
100		error "This qemu binary does not support packed ring"
101	fi
102}
103
104function get_vhost_dir() {
105	local vhost_name="$1"
106
107	if [[ -z "$vhost_name" ]]; then
108		error "vhost name must be provided to get_vhost_dir"
109		return 1
110	fi
111
112	echo "$TARGET_DIR/${vhost_name}"
113}
114
115function vhost_run() {
116	local OPTIND
117	local vhost_name
118	local run_gen_nvme=true
119	local vhost_bin="vhost"
120	local vhost_args=()
121	local cmd=()
122
123	while getopts "n:b:g" optchar; do
124		case "$optchar" in
125			n) vhost_name="$OPTARG" ;;
126			b) vhost_bin="$OPTARG" ;;
127			g)
128				run_gen_nvme=false
129				notice "Skipping gen_nvme.sh NVMe bdev configuration"
130				;;
131			*)
132				error "Unknown param $optchar"
133				return 1
134				;;
135		esac
136	done
137	shift $((OPTIND - 1))
138
139	vhost_args=("$@")
140
141	if [[ -z "$vhost_name" ]]; then
142		error "vhost name must be provided to vhost_run"
143		return 1
144	fi
145
146	local vhost_dir
147	vhost_dir="$(get_vhost_dir $vhost_name)"
148	local vhost_app="$SPDK_BIN_DIR/$vhost_bin"
149	local vhost_log_file="$vhost_dir/vhost.log"
150	local vhost_pid_file="$vhost_dir/vhost.pid"
151	local vhost_socket="$vhost_dir/usvhost"
152	notice "starting vhost app in background"
153	[[ -r "$vhost_pid_file" ]] && vhost_kill $vhost_name
154	[[ -d $vhost_dir ]] && rm -f $vhost_dir/*
155	mkdir -p $vhost_dir
156
157	if [[ ! -x $vhost_app ]]; then
158		error "application not found: $vhost_app"
159		return 1
160	fi
161
162	cmd=("$vhost_app" "-r" "$vhost_dir/rpc.sock" "${vhost_args[@]}")
163	if [[ "$vhost_bin" =~ vhost ]]; then
164		cmd+=(-S "$vhost_dir")
165	fi
166
167	notice "Logging to:   $vhost_log_file"
168	notice "Socket:      $vhost_socket"
169	notice "Command:     ${cmd[*]}"
170
171	timing_enter vhost_start
172
173	iobuf_small_count=${iobuf_small_count:-16383}
174	iobuf_large_count=${iobuf_large_count:-2047}
175
176	"${cmd[@]}" --wait-for-rpc &
177	vhost_pid=$!
178	echo $vhost_pid > $vhost_pid_file
179
180	notice "waiting for app to run..."
181	waitforlisten "$vhost_pid" "$vhost_dir/rpc.sock"
182
183	"$rootdir/scripts/rpc.py" -s "$vhost_dir/rpc.sock" \
184		iobuf_set_options \
185		--small-pool-count="$iobuf_small_count" \
186		--large-pool-count="$iobuf_large_count"
187
188	"$rootdir/scripts/rpc.py" -s "$vhost_dir/rpc.sock" \
189		framework_start_init
190
191	#do not generate nvmes if pci access is disabled
192	if [[ "${cmd[*]}" != *"--no-pci"* ]] && [[ "${cmd[*]}" != *"-u"* ]] && $run_gen_nvme; then
193		$rootdir/scripts/gen_nvme.sh | $rootdir/scripts/rpc.py -s $vhost_dir/rpc.sock load_subsystem_config
194	fi
195
196	notice "vhost started - pid=$vhost_pid"
197
198	timing_exit vhost_start
199}
200
201function vhost_kill() {
202	local rc=0
203	local vhost_name="$1"
204
205	if [[ -z "$vhost_name" ]]; then
206		error "Must provide vhost name to vhost_kill"
207		return 0
208	fi
209
210	local vhost_dir
211	vhost_dir="$(get_vhost_dir $vhost_name)"
212	local vhost_pid_file="$vhost_dir/vhost.pid"
213
214	if [[ ! -r $vhost_pid_file ]]; then
215		warning "no vhost pid file found"
216		return 0
217	fi
218
219	timing_enter vhost_kill
220	local vhost_pid
221	vhost_pid="$(cat $vhost_pid_file)"
222	notice "killing vhost (PID $vhost_pid) app"
223
224	if kill -INT $vhost_pid > /dev/null; then
225		notice "sent SIGINT to vhost app - waiting 60 seconds to exit"
226		for ((i = 0; i < 60; i++)); do
227			if kill -0 $vhost_pid; then
228				echo "."
229				sleep 1
230			else
231				break
232			fi
233		done
234		if kill -0 $vhost_pid; then
235			error "ERROR: vhost was NOT killed - sending SIGABRT"
236			kill -ABRT $vhost_pid
237			rc=1
238		else
239			while kill -0 $vhost_pid; do
240				echo "."
241			done
242		fi
243	elif kill -0 $vhost_pid; then
244		error "vhost NOT killed - you need to kill it manually"
245		rc=1
246	else
247		notice "vhost was not running"
248	fi
249
250	timing_exit vhost_kill
251
252	rm -rf "$vhost_dir"
253
254	return $rc
255}
256
257function vhost_rpc() {
258	local vhost_name="$1"
259
260	if [[ -z "$vhost_name" ]]; then
261		error "vhost name must be provided to vhost_rpc"
262		return 1
263	fi
264	shift
265
266	$rootdir/scripts/rpc.py -s $(get_vhost_dir $vhost_name)/rpc.sock "$@"
267}
268
269###
270# Mgmt functions
271###
272
273function assert_number() {
274	[[ "$1" =~ [0-9]+ ]] && return 0
275
276	error "Invalid or missing parameter: need number but got '$1'"
277	return 1
278}
279
280# Run command on vm with given password
281# First argument - vm number
282# Second argument - ssh password for vm
283#
284function vm_sshpass() {
285	vm_num_is_valid $1 || return 1
286
287	local ssh_cmd
288	ssh_cmd="sshpass -p $2 ssh \
289		-o UserKnownHostsFile=/dev/null \
290		-o StrictHostKeyChecking=no \
291		-o User=root \
292		-p $(vm_ssh_socket $1) $VM_SSH_OPTIONS 127.0.0.1"
293
294	shift 2
295	$ssh_cmd "$@"
296}
297
298# Helper to validate VM number
299# param $1 VM number
300#
301function vm_num_is_valid() {
302	[[ "$1" =~ ^[0-9]+$ ]] && return 0
303
304	error "Invalid or missing parameter: vm number '$1'"
305	return 1
306}
307
308# Print network socket for given VM number
309# param $1 virtual machine number
310#
311function vm_ssh_socket() {
312	vm_num_is_valid $1 || return 1
313	local vm_dir="$VM_DIR/$1"
314
315	cat $vm_dir/ssh_socket
316}
317
318function vm_fio_socket() {
319	vm_num_is_valid $1 || return 1
320	local vm_dir="$VM_DIR/$1"
321
322	cat $vm_dir/fio_socket
323}
324
325# Execute command on given VM
326# param $1 virtual machine number
327#
328function vm_exec() {
329	vm_num_is_valid $1 || return 1
330
331	local vm_num="$1"
332	shift
333
334	sshpass -p "$VM_PASSWORD" ssh \
335		-o UserKnownHostsFile=/dev/null \
336		-o StrictHostKeyChecking=no \
337		-o User=root \
338		-p $(vm_ssh_socket $vm_num) $VM_SSH_OPTIONS 127.0.0.1 \
339		"$@"
340}
341
342# Execute scp command on given VM
343# param $1 virtual machine number
344#
345function vm_scp() {
346	vm_num_is_valid $1 || return 1
347
348	local vm_num="$1"
349	shift
350
351	sshpass -p "$VM_PASSWORD" scp \
352		-o UserKnownHostsFile=/dev/null \
353		-o StrictHostKeyChecking=no \
354		-o User=root \
355		-P $(vm_ssh_socket $vm_num) $VM_SSH_OPTIONS \
356		"$@"
357}
358
359# check if specified VM is running
360# param $1 VM num
361function vm_is_running() {
362	vm_num_is_valid $1 || return 1
363	local vm_dir="$VM_DIR/$1"
364
365	if [[ ! -r $vm_dir/qemu.pid ]]; then
366		return 1
367	fi
368
369	local vm_pid
370	vm_pid="$(cat $vm_dir/qemu.pid)"
371
372	if /bin/kill -0 $vm_pid; then
373		return 0
374	else
375		if [[ $EUID -ne 0 ]]; then
376			warning "not root - assuming VM running since can't be checked"
377			return 0
378		fi
379
380		# not running - remove pid file
381		rm -f $vm_dir/qemu.pid
382		return 1
383	fi
384}
385
386# check if specified VM is running
387# param $1 VM num
388function vm_os_booted() {
389	vm_num_is_valid $1 || return 1
390	local vm_dir="$VM_DIR/$1"
391
392	if [[ ! -r $vm_dir/qemu.pid ]]; then
393		error "VM $1 is not running"
394		return 1
395	fi
396
397	if ! VM_SSH_OPTIONS="-o ControlMaster=no" vm_exec $1 "true" 2> /dev/null; then
398		# Shutdown existing master. Ignore errors as it might not exist.
399		VM_SSH_OPTIONS="-O exit" vm_exec $1 "true" 2> /dev/null
400		return 1
401	fi
402
403	return 0
404}
405
406# Shutdown given VM
407# param $1 virtual machine number
408# return non-zero in case of error.
409function vm_shutdown() {
410	vm_num_is_valid $1 || return 1
411	local vm_dir="$VM_DIR/$1"
412	if [[ ! -d "$vm_dir" ]]; then
413		error "VM$1 ($vm_dir) not exist - setup it first"
414		return 1
415	fi
416
417	if ! vm_is_running $1; then
418		notice "VM$1 ($vm_dir) is not running"
419		return 0
420	fi
421
422	# Temporarily disabling exit flag for next ssh command, since it will
423	# "fail" due to shutdown
424	notice "Shutting down virtual machine $vm_dir"
425	set +e
426	vm_exec $1 "nohup sh -c 'shutdown -h -P now'" || true
427	notice "VM$1 is shutting down - wait a while to complete"
428	set -e
429}
430
431# Kill given VM
432# param $1 virtual machine number
433#
434function vm_kill() {
435	vm_num_is_valid $1 || return 1
436	local vm_dir="$VM_DIR/$1"
437
438	if [[ ! -r $vm_dir/qemu.pid ]]; then
439		return 0
440	fi
441
442	local vm_pid
443	vm_pid="$(cat $vm_dir/qemu.pid)"
444
445	notice "Killing virtual machine $vm_dir (pid=$vm_pid)"
446	# First kill should fail, second one must fail
447	if /bin/kill $vm_pid; then
448		notice "process $vm_pid killed"
449		rm -rf $vm_dir
450	elif vm_is_running $1; then
451		error "Process $vm_pid NOT killed"
452		return 1
453	fi
454}
455
456# List all VM numbers in VM_DIR
457#
458function vm_list_all() {
459	local vms=()
460	vms=("$VM_DIR"/+([0-9]))
461	if ((${#vms[@]} > 0)); then
462		basename --multiple "${vms[@]}"
463	fi
464}
465
466# Kills all VM in $VM_DIR
467#
468function vm_kill_all() {
469	local vm
470	for vm in $(vm_list_all); do
471		vm_kill $vm
472	done
473
474	rm -rf $VM_DIR
475}
476
477# Shutdown all VM in $VM_DIR
478#
479function vm_shutdown_all() {
480	local timeo=${1:-90} vms vm
481
482	vms=($(vm_list_all))
483
484	for vm in "${vms[@]}"; do
485		vm_shutdown "$vm"
486	done
487
488	notice "Waiting for VMs to shutdown..."
489	while ((timeo-- > 0 && ${#vms[@]} > 0)); do
490		for vm in "${!vms[@]}"; do
491			vm_is_running "${vms[vm]}" || unset -v "vms[vm]"
492		done
493		sleep 1
494	done
495
496	if ((${#vms[@]} == 0)); then
497		notice "All VMs successfully shut down"
498		return 0
499	fi
500
501	warning "Not all VMs were shut down. Leftovers: ${vms[*]}"
502
503	for vm in "${vms[@]}"; do
504		vm_print_logs "$vm"
505	done
506
507	return 1
508}
509
510function vm_setup() {
511	xtrace_disable
512	local OPTIND optchar vm_num
513
514	local os=""
515	local os_mode=""
516	local qemu_args=()
517	local disk_type_g=NOT_DEFINED
518	local read_only="false"
519	# List created of a strings separated with a ":"
520	local disks=()
521	local raw_cache=""
522	local vm_incoming=""
523	local vm_migrate_to=""
524	local force_vm=""
525	local guest_memory=1024
526	local queue_number=""
527	local vhost_dir
528	local packed=false
529	vhost_dir="$(get_vhost_dir 0)"
530	while getopts ':-:' optchar; do
531		case "$optchar" in
532			-)
533				case "$OPTARG" in
534					os=*) os="${OPTARG#*=}" ;;
535					os-mode=*) os_mode="${OPTARG#*=}" ;;
536					qemu-args=*) qemu_args+=("${OPTARG#*=}") ;;
537					disk-type=*) disk_type_g="${OPTARG#*=}" ;;
538					read-only=*) read_only="${OPTARG#*=}" ;;
539					disks=*) IFS=":" read -ra disks <<< "${OPTARG#*=}" ;;
540					raw-cache=*) raw_cache=",cache${OPTARG#*=}" ;;
541					force=*) force_vm=${OPTARG#*=} ;;
542					memory=*) guest_memory=${OPTARG#*=} ;;
543					queue_num=*) queue_number=${OPTARG#*=} ;;
544					incoming=*) vm_incoming="${OPTARG#*=}" ;;
545					migrate-to=*) vm_migrate_to="${OPTARG#*=}" ;;
546					vhost-name=*) vhost_dir="$(get_vhost_dir ${OPTARG#*=})" ;;
547					spdk-boot=*) local boot_from="${OPTARG#*=}" ;;
548					packed) packed=true ;;
549					*)
550						error "unknown argument $OPTARG"
551						return 1
552						;;
553				esac
554				;;
555			*)
556				error "vm_create Unknown param $OPTARG"
557				return 1
558				;;
559		esac
560	done
561
562	# Find next directory we can use
563	if [[ -n $force_vm ]]; then
564		vm_num=$force_vm
565
566		vm_num_is_valid $vm_num || return 1
567		local vm_dir="$VM_DIR/$vm_num"
568		[[ -d $vm_dir ]] && warning "removing existing VM in '$vm_dir'"
569	else
570		local vm_dir=""
571
572		set +x
573		for ((i = 0; i <= 256; i++)); do
574			local vm_dir="$VM_DIR/$i"
575			[[ ! -d $vm_dir ]] && break
576		done
577		xtrace_restore
578
579		vm_num=$i
580	fi
581
582	if [[ $vm_num -eq 256 ]]; then
583		error "no free VM found. do some cleanup (256 VMs created, are you insane?)"
584		return 1
585	fi
586
587	if [[ -n "$vm_migrate_to" && -n "$vm_incoming" ]]; then
588		error "'--incoming' and '--migrate-to' cannot be used together"
589		return 1
590	elif [[ -n "$vm_incoming" ]]; then
591		if [[ -n "$os_mode" || -n "$os" ]]; then
592			error "'--incoming' can't be used together with '--os' nor '--os-mode'"
593			return 1
594		fi
595
596		os_mode="original"
597		os="$VM_DIR/$vm_incoming/os.qcow2"
598	elif [[ -n "$vm_migrate_to" ]]; then
599		[[ "$os_mode" != "backing" ]] && warning "Using 'backing' mode for OS since '--migrate-to' is used"
600		os_mode=backing
601	fi
602
603	notice "Creating new VM in $vm_dir"
604	mkdir -p $vm_dir
605
606	if [[ "$os_mode" == "backing" ]]; then
607		notice "Creating backing file for OS image file: $os"
608		if ! $QEMU_IMG_BIN create -f qcow2 -b $os $vm_dir/os.qcow2 -F qcow2; then
609			error "Failed to create OS backing file in '$vm_dir/os.qcow2' using '$os'"
610			return 1
611		fi
612
613		local os=$vm_dir/os.qcow2
614	elif [[ "$os_mode" == "original" ]]; then
615		warning "Using original OS image file: $os"
616	elif [[ "$os_mode" != "snapshot" ]]; then
617		if [[ -z "$os_mode" ]]; then
618			notice "No '--os-mode' parameter provided - using 'snapshot'"
619			os_mode="snapshot"
620		else
621			error "Invalid '--os-mode=$os_mode'"
622			return 1
623		fi
624	fi
625
626	local qemu_mask_param="VM_${vm_num}_qemu_mask"
627	local qemu_numa_node_param="VM_${vm_num}_qemu_numa_node"
628
629	if [[ -z "${!qemu_mask_param}" ]] || [[ -z "${!qemu_numa_node_param}" ]]; then
630		error "Parameters ${qemu_mask_param} or ${qemu_numa_node_param} not found in autotest.config file"
631		return 1
632	fi
633
634	local task_mask=${!qemu_mask_param}
635
636	notice "TASK MASK: $task_mask"
637	local cmd=(taskset -a -c "$task_mask" "$QEMU_BIN")
638	local vm_socket_offset=$((10000 + 100 * vm_num))
639
640	local ssh_socket=$((vm_socket_offset + 0))
641	local fio_socket=$((vm_socket_offset + 1))
642	local monitor_port=$((vm_socket_offset + 2))
643	local migration_port=$((vm_socket_offset + 3))
644	local gdbserver_socket=$((vm_socket_offset + 4))
645	local vnc_socket=$((100 + vm_num))
646	local qemu_pid_file="$vm_dir/qemu.pid"
647	local cpu_num=0
648
649	set +x
650	# cpu list for taskset can be comma separated or range
651	# or both at the same time, so first split on commas
652	cpu_list=$(echo $task_mask | tr "," "\n")
653	queue_number=0
654	for c in $cpu_list; do
655		# if range is detected - count how many cpus
656		if [[ $c =~ [0-9]+-[0-9]+ ]]; then
657			val=$((c - 1))
658			val=${val#-}
659		else
660			val=1
661		fi
662		cpu_num=$((cpu_num + val))
663		queue_number=$((queue_number + val))
664	done
665
666	if [ -z $queue_number ]; then
667		queue_number=$cpu_num
668	fi
669
670	# Normalize tcp ports to make sure they are available
671	ssh_socket=$(get_free_tcp_port "$ssh_socket")
672	fio_socket=$(get_free_tcp_port "$fio_socket")
673	monitor_port=$(get_free_tcp_port "$monitor_port")
674	migration_port=$(get_free_tcp_port "$migration_port")
675	gdbserver_socket=$(get_free_tcp_port "$gdbserver_socket")
676	vnc_socket=$(get_free_tcp_port "$vnc_socket")
677
678	xtrace_restore
679
680	local node_num=${!qemu_numa_node_param}
681	local boot_disk_present=false
682	notice "NUMA NODE: $node_num"
683	cmd+=(-m "$guest_memory" --enable-kvm -cpu host -smp "$cpu_num" -vga std -vnc ":$vnc_socket" -daemonize)
684	cmd+=(-object "memory-backend-file,id=mem,size=${guest_memory}M,mem-path=/dev/hugepages,share=on,prealloc=yes,host-nodes=$node_num,policy=bind")
685	[[ $os_mode == snapshot ]] && cmd+=(-snapshot)
686	[[ -n "$vm_incoming" ]] && cmd+=(-incoming "tcp:0:$migration_port")
687	cmd+=(-monitor "telnet:127.0.0.1:$monitor_port,server,nowait")
688	cmd+=(-numa "node,memdev=mem")
689	cmd+=(-pidfile "$qemu_pid_file")
690	cmd+=(-serial "file:$vm_dir/serial.log")
691	cmd+=(-D "$vm_dir/qemu.log")
692	cmd+=(-chardev "file,path=$vm_dir/seabios.log,id=seabios" -device "isa-debugcon,iobase=0x402,chardev=seabios")
693	cmd+=(-net "user,hostfwd=tcp::$ssh_socket-:22,hostfwd=tcp::$fio_socket-:8765")
694	cmd+=(-net nic)
695	if [[ -z "$boot_from" ]]; then
696		cmd+=(-drive "file=$os,if=none,id=os_disk")
697		cmd+=(-device "ide-hd,drive=os_disk,bootindex=0")
698	fi
699
700	if ((${#disks[@]} == 0)) && [[ $disk_type_g == virtio* ]]; then
701		disks=("default_virtio.img")
702	elif ((${#disks[@]} == 0)); then
703		error "No disks defined, aborting"
704		return 1
705	fi
706
707	for disk in "${disks[@]}"; do
708		# Each disk can define its type in a form of a disk_name,type. The remaining parts
709		# of the string are dropped.
710		IFS="," read -r disk disk_type _ <<< "$disk"
711		[[ -z $disk_type ]] && disk_type=$disk_type_g
712
713		case $disk_type in
714			virtio)
715				local raw_name="RAWSCSI"
716				local raw_disk=$vm_dir/test.img
717
718				# Create disk file if it not exist or it is smaller than 1G
719				if [[ -f $disk && $(stat --printf="%s" $disk) -ge $((1024 * 1024 * 1024)) ]]; then
720					raw_disk=$disk
721					notice "Using existing image $raw_disk"
722				else
723					notice "Creating Virtio disc $raw_disk"
724					dd if=/dev/zero of=$raw_disk bs=1024k count=1024
725				fi
726
727				cmd+=(-device "virtio-scsi-pci,num_queues=$queue_number")
728				cmd+=(-device "scsi-hd,drive=hd$i,vendor=$raw_name")
729				cmd+=(-drive "if=none,id=hd$i,file=$raw_disk,format=raw$raw_cache")
730				;;
731			spdk_vhost_scsi)
732				notice "using socket $vhost_dir/naa.$disk.$vm_num"
733				cmd+=(-chardev "socket,id=char_$disk,path=$vhost_dir/naa.$disk.$vm_num")
734				cmd+=(-device "vhost-user-scsi-pci,id=scsi_$disk,num_queues=$queue_number,chardev=char_$disk")
735				if [[ "$disk" == "$boot_from" ]]; then
736					cmd[-1]+=,bootindex=0
737					boot_disk_present=true
738				fi
739				;;
740			spdk_vhost_blk)
741				notice "using socket $vhost_dir/naa.$disk.$vm_num"
742				cmd+=(-chardev "socket,id=char_$disk,path=$vhost_dir/naa.$disk.$vm_num")
743				cmd+=(-device "vhost-user-blk-pci,num-queues=$queue_number,chardev=char_$disk")
744				if [[ "$disk" == "$boot_from" ]]; then
745					cmd[-1]+=,bootindex=0
746					boot_disk_present=true
747				fi
748
749				if $packed; then
750					check_qemu_packedring_support
751					notice "Enabling packed ring support for VM $vm_num, controller $vhost_dir/naa.$disk.$vm_num"
752					cmd[-1]+=,packed=on
753				fi
754				;;
755			kernel_vhost)
756				if [[ -z $disk ]]; then
757					error "need WWN for $disk_type"
758					return 1
759				elif [[ ! $disk =~ ^[[:alpha:]]{3}[.][[:xdigit:]]+$ ]]; then
760					error "$disk_type - disk(wnn)=$disk does not look like WNN number"
761					return 1
762				fi
763				notice "Using kernel vhost disk wwn=$disk"
764				cmd+=(-device "vhost-scsi-pci,wwpn=$disk,num_queues=$queue_number")
765				;;
766			vfio_user)
767				notice "using socket $VM_DIR/$vm_num/domain/muser$disk/$disk/cntrl"
768				cmd+=(-device "vfio-user-pci,x-msg-timeout=5000,socket=$VM_DIR/$vm_num/muser/domain/muser$disk/$disk/cntrl")
769				if [[ "$disk" == "$boot_from" ]]; then
770					cmd[-1]+=",bootindex=0"
771					boot_disk_present=true
772				fi
773				;;
774			vfio_user_virtio)
775				notice "using socket $VM_DIR/vfu_tgt/virtio.$disk"
776				cmd+=(-device "vfio-user-pci,x-msg-timeout=5000,socket=$VM_DIR/vfu_tgt/virtio.$disk")
777				if [[ "$disk" == "$boot_from" ]]; then
778					cmd[-1]+=",bootindex=0"
779					boot_disk_present=true
780				fi
781				;;
782			*)
783				error "unknown mode '$disk_type', use: virtio, spdk_vhost_scsi, spdk_vhost_blk, kernel_vhost, vfio_user or vfio_user_virtio"
784				return 1
785				;;
786		esac
787	done
788
789	if [[ -n $boot_from ]] && [[ $boot_disk_present == false ]]; then
790		error "Boot from $boot_from is selected but device is not present"
791		return 1
792	fi
793
794	((${#qemu_args[@]})) && cmd+=("${qemu_args[@]}")
795	notice "Saving to $vm_dir/run.sh"
796	cat <<- RUN > "$vm_dir/run.sh"
797		#!/bin/bash
798		shopt -s nullglob extglob
799		rootdir=$rootdir
800		source "\$rootdir/test/scheduler/common.sh"
801		qemu_log () {
802			echo "=== qemu.log ==="
803			[[ -s $vm_dir/qemu.log ]] && cat $vm_dir/qemu.log
804			echo "=== qemu.log ==="
805		}
806
807		if [[ \$EUID -ne 0 ]]; then
808			echo "Go away user come back as root"
809			exit 1
810		fi
811
812		trap "qemu_log" EXIT
813
814		qemu_cmd=($(printf '%s\n' "${cmd[@]}"))
815		chmod +r $vm_dir/*
816		echo "Running VM in $vm_dir"
817		rm -f $qemu_pid_file
818		cgroup=\$(get_cgroup \$$)
819		set_cgroup_attr_top_bottom \$$ cgroup.subtree_control "+cpuset"
820		create_cgroup \$cgroup/qemu.$vm_num
821		set_cgroup_attr "\$cgroup/qemu.$vm_num" cpuset.mems "$node_num"
822		set_cgroup_attr "\$cgroup/qemu.$vm_num" cpuset.cpus "$task_mask"
823		"\${qemu_cmd[@]}"
824
825		echo "Waiting for QEMU pid file"
826		sleep 1
827		[[ ! -f $qemu_pid_file ]] && sleep 1
828		[[ ! -f $qemu_pid_file ]] && echo "ERROR: no qemu pid file found" && exit 1
829		set_cgroup_attr "\$cgroup/qemu.$vm_num" cgroup.threads \$(< "$qemu_pid_file")
830		exit 0
831		# EOF
832	RUN
833	chmod +x $vm_dir/run.sh
834
835	# Save generated sockets redirection
836	echo $ssh_socket > $vm_dir/ssh_socket
837	echo $fio_socket > $vm_dir/fio_socket
838	echo $monitor_port > $vm_dir/monitor_port
839
840	rm -f $vm_dir/migration_port
841	[[ -z $vm_incoming ]] || echo $migration_port > $vm_dir/migration_port
842
843	echo $gdbserver_socket > $vm_dir/gdbserver_socket
844	echo $vnc_socket >> $vm_dir/vnc_socket
845
846	[[ -z $vm_incoming ]] || ln -fs $VM_DIR/$vm_incoming $vm_dir/vm_incoming
847	[[ -z $vm_migrate_to ]] || ln -fs $VM_DIR/$vm_migrate_to $vm_dir/vm_migrate_to
848}
849
850function vm_run() {
851	local OPTIND optchar vm
852	local run_all=false
853	local vms_to_run=""
854
855	while getopts 'a-:' optchar; do
856		case "$optchar" in
857			a) run_all=true ;;
858			*)
859				error "Unknown param $OPTARG"
860				return 1
861				;;
862		esac
863	done
864
865	if $run_all; then
866		vms_to_run="$(vm_list_all)"
867	else
868		shift $((OPTIND - 1))
869		for vm in "$@"; do
870			vm_num_is_valid $1 || return 1
871			if [[ ! -x $VM_DIR/$vm/run.sh ]]; then
872				error "VM$vm not defined - setup it first"
873				return 1
874			fi
875			vms_to_run+=" $vm"
876		done
877	fi
878
879	for vm in $vms_to_run; do
880		if vm_is_running $vm; then
881			warning "VM$vm ($VM_DIR/$vm) already running"
882			continue
883		fi
884
885		notice "running $VM_DIR/$vm/run.sh"
886		if ! $VM_DIR/$vm/run.sh; then
887			error "FAILED to run vm $vm"
888			return 1
889		fi
890	done
891}
892
893function vm_print_logs() {
894	vm_num=$1
895	warning "================"
896	warning "QEMU LOG:"
897	if [[ -r $VM_DIR/$vm_num/qemu.log ]]; then
898		cat $VM_DIR/$vm_num/qemu.log
899	else
900		warning "LOG qemu.log not found"
901	fi
902
903	warning "VM LOG:"
904	if [[ -r $VM_DIR/$vm_num/serial.log ]]; then
905		cat $VM_DIR/$vm_num/serial.log
906	else
907		warning "LOG serial.log not found"
908	fi
909
910	warning "SEABIOS LOG:"
911	if [[ -r $VM_DIR/$vm_num/seabios.log ]]; then
912		cat $VM_DIR/$vm_num/seabios.log
913	else
914		warning "LOG seabios.log not found"
915	fi
916	warning "================"
917}
918
919# Wait for all created VMs to boot.
920# param $1 max wait time
921function vm_wait_for_boot() {
922	assert_number $1
923
924	xtrace_disable
925
926	local all_booted=false
927	local timeout_time=$1
928	[[ $timeout_time -lt 10 ]] && timeout_time=10
929	local timeout_time
930	timeout_time=$(date -d "+$timeout_time seconds" +%s)
931
932	notice "Waiting for VMs to boot"
933	shift
934	if [[ "$*" == "" ]]; then
935		local vms_to_check="$VM_DIR/[0-9]*"
936	else
937		local vms_to_check=""
938		for vm in "$@"; do
939			vms_to_check+=" $VM_DIR/$vm"
940		done
941	fi
942
943	for vm in $vms_to_check; do
944		local vm_num
945		vm_num=$(basename $vm)
946		local i=0
947		notice "waiting for VM$vm_num ($vm)"
948		while ! vm_os_booted $vm_num; do
949			if ! vm_is_running $vm_num; then
950				warning "VM $vm_num is not running"
951				vm_print_logs $vm_num
952				xtrace_restore
953				return 1
954			fi
955
956			if [[ $(date +%s) -gt $timeout_time ]]; then
957				warning "timeout waiting for machines to boot"
958				vm_print_logs $vm_num
959				xtrace_restore
960				return 1
961			fi
962			if ((i > 30)); then
963				local i=0
964				echo
965			fi
966			echo -n "."
967			sleep 1
968		done
969		echo ""
970		notice "VM$vm_num ready"
971		#Change Timeout for stopping services to prevent lengthy powerdowns
972		#Check that remote system is not Cygwin in case of Windows VMs
973		local vm_os
974		vm_os=$(vm_exec $vm_num "uname -o")
975		if [[ "$vm_os" != "Cygwin" ]]; then
976			vm_exec $vm_num "echo 'DefaultTimeoutStopSec=10' >> /etc/systemd/system.conf; systemctl daemon-reexec"
977		fi
978	done
979
980	notice "all VMs ready"
981	xtrace_restore
982	return 0
983}
984
985function vm_start_fio_server() {
986	local OPTIND optchar
987	local readonly=''
988	local fio_bin=''
989	while getopts ':-:' optchar; do
990		case "$optchar" in
991			-)
992				case "$OPTARG" in
993					fio-bin=*) local fio_bin="${OPTARG#*=}" ;;
994					readonly) local readonly="--readonly" ;;
995					*) error "Invalid argument '$OPTARG'" && return 1 ;;
996				esac
997				;;
998			*) error "Invalid argument '$OPTARG'" && return 1 ;;
999		esac
1000	done
1001
1002	shift $((OPTIND - 1))
1003	for vm_num in "$@"; do
1004		notice "Starting fio server on VM$vm_num"
1005		if [[ $fio_bin != "" ]]; then
1006			vm_exec $vm_num 'cat > /root/fio; chmod +x /root/fio' < $fio_bin
1007			vm_exec $vm_num /root/fio $readonly --eta=never --server --daemonize=/root/fio.pid
1008		else
1009			vm_exec $vm_num fio $readonly --eta=never --server --daemonize=/root/fio.pid
1010		fi
1011	done
1012}
1013
1014function vm_check_scsi_location() {
1015	# Script to find wanted disc
1016	local script='shopt -s nullglob;
1017	for entry in /sys/block/sd*; do
1018		disk_type="$(cat $entry/device/vendor)";
1019		if [[ $disk_type == INTEL* ]] || [[ $disk_type == RAWSCSI* ]] || [[ $disk_type == LIO-ORG* ]]; then
1020			fname=$(basename $entry);
1021			echo -n " $fname";
1022		fi;
1023	done'
1024
1025	SCSI_DISK="$(echo "$script" | vm_exec $1 bash -s)"
1026
1027	if [[ -z "$SCSI_DISK" ]]; then
1028		error "no test disk found!"
1029		return 1
1030	fi
1031}
1032
1033# Script to perform scsi device reset on all disks in VM
1034# param $1 VM num
1035# param $2..$n Disks to perform reset on
1036function vm_reset_scsi_devices() {
1037	for disk in "${@:2}"; do
1038		notice "VM$1 Performing device reset on disk $disk"
1039		vm_exec $1 sg_reset /dev/$disk -vNd
1040	done
1041}
1042
1043function vm_check_blk_location() {
1044	local script='shopt -s nullglob; cd /sys/block; echo vd*'
1045	SCSI_DISK="$(echo "$script" | vm_exec $1 bash -s)"
1046
1047	if [[ -z "$SCSI_DISK" ]]; then
1048		error "no blk test disk found!"
1049		return 1
1050	fi
1051}
1052
1053function vm_check_nvme_location() {
1054	SCSI_DISK="$(vm_exec $1 "grep -l SPDK /sys/class/nvme/*/model" | awk -F/ '{print $5"n1"}')"
1055	if [[ -z "$SCSI_DISK" ]]; then
1056		error "no vfio-user nvme test disk found!"
1057		return 1
1058	fi
1059}
1060
1061function run_fio() {
1062	local arg
1063	local job_file=""
1064	local fio_bin=""
1065	local vms=()
1066	local out=""
1067	local vm
1068	local run_server_mode=true
1069	local run_plugin_mode=false
1070	local fio_start_cmd
1071	local fio_output_format="normal"
1072	local fio_gtod_reduce=false
1073	local wait_for_fio=true
1074
1075	for arg in "$@"; do
1076		case "$arg" in
1077			--job-file=*) local job_file="${arg#*=}" ;;
1078			--fio-bin=*) local fio_bin="${arg#*=}" ;;
1079			--vm=*) vms+=("${arg#*=}") ;;
1080			--out=*)
1081				local out="${arg#*=}"
1082				mkdir -p $out
1083				;;
1084			--local) run_server_mode=false ;;
1085			--plugin)
1086				notice "Using plugin mode. Disabling server mode."
1087				run_plugin_mode=true
1088				run_server_mode=false
1089				;;
1090			--json) fio_output_format="json" ;;
1091			--hide-results) hide_results=true ;;
1092			--no-wait-for-fio) wait_for_fio=false ;;
1093			--gtod-reduce) fio_gtod_reduce=true ;;
1094			*)
1095				error "Invalid argument '$arg'"
1096				return 1
1097				;;
1098		esac
1099	done
1100
1101	if [[ -n "$fio_bin" && ! -r "$fio_bin" ]]; then
1102		error "FIO binary '$fio_bin' does not exist"
1103		return 1
1104	fi
1105
1106	if [[ -z "$fio_bin" ]]; then
1107		fio_bin="fio"
1108	fi
1109
1110	if [[ ! -r "$job_file" ]]; then
1111		error "Fio job '$job_file' does not exist"
1112		return 1
1113	fi
1114
1115	fio_start_cmd="$fio_bin --eta=never "
1116
1117	local job_fname
1118	job_fname=$(basename "$job_file")
1119	log_fname="${job_fname%%.*}.log"
1120	fio_start_cmd+=" --output=$out/$log_fname --output-format=$fio_output_format "
1121
1122	# prepare job file for each VM
1123	for vm in "${vms[@]}"; do
1124		local vm_num=${vm%%:*}
1125		local vmdisks=${vm#*:}
1126
1127		sed "s@filename=@filename=$vmdisks@;s@description=\(.*\)@description=\1 (VM=$vm_num)@" "$job_file" \
1128			| vm_exec $vm_num "cat > /root/$job_fname"
1129
1130		if $fio_gtod_reduce; then
1131			vm_exec $vm_num "echo 'gtod_reduce=1' >> /root/$job_fname"
1132		fi
1133
1134		vm_exec $vm_num cat /root/$job_fname
1135
1136		if $run_server_mode; then
1137			fio_start_cmd+="--client=127.0.0.1,$(vm_fio_socket $vm_num) --remote-config /root/$job_fname "
1138		fi
1139
1140		if ! $run_server_mode; then
1141			if [[ -n "$fio_bin" ]]; then
1142				if ! $run_plugin_mode && [[ -e $fio_bin ]]; then
1143					vm_exec $vm_num 'cat > /root/fio; chmod +x /root/fio' < $fio_bin
1144					vm_fio_bin="/root/fio"
1145				else
1146					vm_fio_bin=$fio_bin
1147				fi
1148			fi
1149
1150			notice "Running local fio on VM $vm_num"
1151			vm_exec $vm_num "$vm_fio_bin --output=/root/$log_fname --output-format=$fio_output_format /root/$job_fname & echo \$! > /root/fio.pid" &
1152			vm_exec_pids+=("$!")
1153		fi
1154	done
1155
1156	if ! $run_server_mode; then
1157		if ! $wait_for_fio; then
1158			return 0
1159		fi
1160		echo "Waiting for guest fio instances to finish.."
1161		wait "${vm_exec_pids[@]}"
1162
1163		for vm in "${vms[@]}"; do
1164			local vm_num=${vm%%:*}
1165			vm_exec $vm_num cat /root/$log_fname > "$out/vm${vm_num}_${log_fname}"
1166		done
1167		return 0
1168	fi
1169
1170	$fio_start_cmd
1171	sleep 1
1172
1173	if [[ "$fio_output_format" == "json" ]]; then
1174		# Fio in client-server mode produces a lot of "trash" output
1175		# preceding JSON structure, making it not possible to parse.
1176		# Remove these lines from file.
1177		# shellcheck disable=SC2005
1178		echo "$(grep -vP '^[<\w]' "$out/$log_fname")" > "$out/$log_fname"
1179	fi
1180
1181	if [[ ! $hide_results ]]; then
1182		cat $out/$log_fname
1183	fi
1184}
1185
1186# Parsing fio results for json output and client-server mode only!
1187function parse_fio_results() {
1188	local fio_log_dir=$1
1189	local fio_log_filename=$2
1190	local fio_csv_filename
1191
1192	# Variables used in parsing loop
1193	local log_file
1194	local rwmode mixread mixwrite
1195	local lat_key lat_divisor
1196	local client_stats iops bw
1197	local read_avg_lat read_min_lat read_max_lat
1198	local write_avg_lat write_min_lat write_min_lat
1199	local clients
1200
1201	declare -A results
1202	results["iops"]=0
1203	results["bw"]=0
1204	results["avg_lat"]=0
1205	results["min_lat"]=0
1206	results["max_lat"]=0
1207
1208	# Loop using the log filename to see if there are any other
1209	# matching files. This is in case we ran fio test multiple times.
1210	log_files=("$fio_log_dir/$fio_log_filename"*)
1211	for log_file in "${log_files[@]}"; do
1212		# Save entire array to avoid opening $log_file multiple times
1213		clients=$(jq -r '.client_stats' "$log_file")
1214		[[ -n $clients ]]
1215		rwmode=$(jq -r '.[0]["job options"]["rw"]' <<< "$clients")
1216		mixread=1
1217		mixwrite=1
1218		if [[ $rwmode = *"rw"* ]]; then
1219			mixread=$(jq -r '.[0]["job options"]["rwmixread"]' <<< "$clients")
1220			mixread=$(bc -l <<< "scale=3; $mixread/100")
1221			mixwrite=$(bc -l <<< "scale=3; 1-$mixread")
1222		fi
1223
1224		client_stats=$(jq -r '.[] | select(.jobname == "All clients")' <<< "$clients")
1225		if [[ -z $client_stats ]]; then
1226			# Potentially single client (single VM)
1227			client_stats=$(jq -r '.[]' <<< "$clients")
1228		fi
1229
1230		# Check latency unit and later normalize to microseconds
1231		lat_key="lat_us"
1232		lat_divisor=1
1233		if jq -er '.read["lat_ns"]' &> /dev/null <<< $client_stats; then
1234			lat_key="lat_ns"
1235			lat_divisor=1000
1236		fi
1237
1238		# Horrific bash float point arithmetic operations below.
1239		# Viewer discretion is advised.
1240		iops=$(jq -r '[.read["iops"],.write["iops"]] | add' <<< $client_stats)
1241		bw=$(jq -r '[.read["bw"],.write["bw"]] | add' <<< $client_stats)
1242		read_avg_lat=$(jq -r --arg lat_key $lat_key '.read[$lat_key]["mean"]' <<< $client_stats)
1243		read_min_lat=$(jq -r --arg lat_key $lat_key '.read[$lat_key]["min"]' <<< $client_stats)
1244		read_max_lat=$(jq -r --arg lat_key $lat_key '.read[$lat_key]["max"]' <<< $client_stats)
1245		write_avg_lat=$(jq -r --arg lat_key $lat_key '.write[$lat_key]["mean"]' <<< $client_stats)
1246		write_min_lat=$(jq -r --arg lat_key $lat_key '.write[$lat_key]["min"]' <<< $client_stats)
1247		write_max_lat=$(jq -r --arg lat_key $lat_key '.write[$lat_key]["max"]' <<< $client_stats)
1248
1249		results["iops"]=$(bc -l <<< "${results[iops]} + $iops")
1250		results["bw"]=$(bc -l <<< "${results[bw]} + $bw")
1251		results["avg_lat"]=$(bc -l <<< "${results[avg_lat]} + ($mixread*$read_avg_lat + $mixwrite*$write_avg_lat)/$lat_divisor")
1252		results["min_lat"]=$(bc -l <<< "${results[min_lat]} + ($mixread*$read_min_lat + $mixwrite*$write_min_lat)/$lat_divisor")
1253		results["max_lat"]=$(bc -l <<< "${results[max_lat]} + ($mixread*$read_max_lat + $mixwrite*$write_max_lat)/$lat_divisor")
1254	done
1255
1256	results["iops"]=$(bc -l <<< "scale=3; ${results[iops]} / ${#log_files[@]}")
1257	results["bw"]=$(bc -l <<< "scale=3; ${results[bw]} / ${#log_files[@]}")
1258	results["avg_lat"]=$(bc -l <<< "scale=3; ${results[avg_lat]} / ${#log_files[@]}")
1259	results["min_lat"]=$(bc -l <<< "scale=3; ${results[min_lat]} / ${#log_files[@]}")
1260	results["max_lat"]=$(bc -l <<< "scale=3; ${results[max_lat]} / ${#log_files[@]}")
1261
1262	fio_csv_filename="${fio_log_filename%%.*}.csv"
1263	cat <<- EOF > "$fio_log_dir/$fio_csv_filename"
1264		iops,bw,avg_lat,min_lat,max_lat
1265		${results["iops"]},${results["bw"]},${results["avg_lat"]},${results["min_lat"]},${results["max_lat"]}
1266	EOF
1267}
1268
1269# Shutdown or kill any running VM and SPDK APP.
1270#
1271function at_app_exit() {
1272	local vhost_name
1273
1274	notice "APP EXITING"
1275	notice "killing all VMs"
1276	vm_kill_all
1277	# Kill vhost application
1278	notice "killing vhost app"
1279
1280	for vhost_name in "$TARGET_DIR"/*; do
1281		vhost_kill "$(basename "$vhost_name")"
1282	done
1283
1284	notice "EXIT DONE"
1285}
1286
1287function error_exit() {
1288	trap - ERR
1289	print_backtrace
1290	set +e
1291	error "Error on $1 $2"
1292
1293	at_app_exit
1294	exit 1
1295}
1296
1297function lookup_dev_irqs() {
1298	local vm=$1 irqs=() cpus=()
1299	local script_get_irqs script_get_cpus
1300
1301	mkdir -p "$VHOST_DIR/irqs"
1302
1303	# All vhost tests depend either on virtio_blk or virtio_scsi drivers on the VM side.
1304	# Considering that, simply iterate over virtio bus and pick pci device corresponding
1305	# to each virtio device.
1306	# For vfio-user setup, look for bare nvme devices.
1307
1308	script_get_irqs=$(
1309		cat <<- 'SCRIPT'
1310			shopt -s nullglob
1311			for virtio in /sys/bus/virtio/devices/virtio*; do
1312			  irqs+=("$(readlink -f "$virtio")/../msi_irqs/"*)
1313			done
1314			irqs+=(/sys/class/nvme/nvme*/device/msi_irqs/*)
1315			printf '%u\n' "${irqs[@]##*/}"
1316		SCRIPT
1317	)
1318
1319	script_get_cpus=$(
1320		cat <<- 'SCRIPT'
1321			cpus=(/sys/devices/system/cpu/cpu[0-9]*)
1322			printf '%u\n' "${cpus[@]##*cpu}"
1323		SCRIPT
1324	)
1325
1326	irqs=($(vm_exec "$vm" "$script_get_irqs"))
1327	cpus=($(vm_exec "$vm" "$script_get_cpus"))
1328	((${#irqs[@]} > 0 && ${#cpus[@]} > 0))
1329
1330	printf '%u\n' "${irqs[@]}" > "$VHOST_DIR/irqs/$vm.irqs"
1331	printf '%u\n' "${cpus[@]}" > "$VHOST_DIR/irqs/$vm.cpus"
1332}
1333
1334function irqs() {
1335	local vm
1336	for vm; do
1337		vm_exec "$vm" "while :; do cat /proc/interrupts; sleep 1s; done" > "$VHOST_DIR/irqs/$vm.interrupts" &
1338		irqs_pids+=($!)
1339	done
1340}
1341
1342function parse_irqs() {
1343	local iter=${1:-1}
1344	"$rootdir/test/vhost/parse_irqs.sh" "$VHOST_DIR/irqs/"*.interrupts
1345	rm "$VHOST_DIR/irqs/"*.interrupts
1346
1347	mkdir -p "$VHOST_DIR/irqs/$iter"
1348	mv "$VHOST_DIR/irqs/"*.parsed "$VHOST_DIR/irqs/$iter/"
1349}
1350
1351function collect_perf() {
1352	local cpus=$1 outf=$2 runtime=$3 delay=$4
1353
1354	mkdir -p "$VHOST_DIR/perf"
1355
1356	perf record -g \
1357		${cpus:+-C "$cpus"} \
1358		${outf:+-o "$outf"} \
1359		${delay:+-D $((delay * 1000))} \
1360		-z \
1361		${runtime:+ -- sleep $((runtime + delay))}
1362}
1363
1364function parse_perf() {
1365	local iter=${1:-1}
1366	local report out
1367
1368	mkdir -p "$VHOST_DIR/perf/$iter"
1369	shift
1370
1371	for report in "$@" "$VHOST_DIR/perf/"*.perf; do
1372		[[ -f $report ]] || continue
1373		perf report \
1374			-n \
1375			-i "$report" \
1376			--header \
1377			--stdio > "$VHOST_DIR/perf/$iter/${report##*/}.parsed"
1378		cp "$report" "$VHOST_DIR/perf/$iter/"
1379	done
1380	rm "$VHOST_DIR/perf/"*.perf
1381}
1382
1383function get_from_fio() {
1384	local opt=$1 conf=$2
1385
1386	[[ -n $opt && -f $conf ]] || return 1
1387
1388	awk -F= "/^$opt/{print \$2}" "$conf"
1389}
1390
1391function get_free_tcp_port() {
1392	local port=$1 to=${2:-1} sockets=()
1393
1394	mapfile -t sockets < /proc/net/tcp
1395
1396	# If there's a TCP socket in a listening state keep incrementing $port until
1397	# we find one that's not used. $to determines how long should we look for:
1398	#  0: don't increment, just check if given $port is in use
1399	# >0: increment $to times
1400	# <0: no increment limit
1401
1402	while [[ ${sockets[*]} == *":$(printf '%04X' "$port") 00000000:0000 0A"* ]]; do
1403		((to-- && ++port <= 65535)) || return 1
1404	done
1405
1406	echo "$port"
1407}
1408
1409function limit_vhost_kernel_threads() {
1410	local cpus=$1 nodes cpu _cpus=() _nodes=()
1411	local _pids=() pid cgroup pid
1412
1413	xtrace_disable_per_cmd map_cpus
1414
1415	_cpus=($(parse_cpu_list <(echo "$cpus")))
1416
1417	for cpu in "${_cpus[@]}"; do
1418		_nodes+=("${cpu_node_map[cpu]}")
1419	done
1420
1421	nodes=$(fold_array_onto_string "${_nodes[@]}")
1422
1423	# vhost kernel threads are named as vhost-PID
1424	_pids=($(pgrep vhost))
1425	((${#_pids[@]} > 0)) || return 1
1426
1427	# All threads should be located under the same initial cgroup. kthreadd does not put them
1428	# under root cgroup, but rather the cgroup of a session from which target/vhost was configured.
1429	# We create dedicated cgroup under the initial one to move all the threads only once instead of
1430	# having an extra step of moving them to the root cgroup first.
1431	set_cgroup_attr_top_bottom "${_pids[0]}" cgroup.subtree_control "+cpuset"
1432
1433	cgroup=$(get_cgroup "${_pids[0]}")
1434	create_cgroup "$cgroup/vhost"
1435
1436	set_cgroup_attr "$cgroup/vhost" cpuset.cpus "$cpus"
1437	set_cgroup_attr "$cgroup/vhost" cpuset.mems "$nodes"
1438
1439	for pid in "${_pids[@]}"; do
1440		move_proc "$pid" "$cgroup/vhost" "$cgroup" cgroup.threads
1441	done
1442}
1443
1444function gen_cpu_vm_spdk_config() (
1445	local vm_count=$1 vm_cpu_num=$2 vm
1446	local spdk_cpu_num=${3:-1} spdk_cpu_list=${4:-} spdk_cpus
1447	local nodes=("${@:5}") node
1448	local env
1449
1450	spdk_cpus=spdk_cpu_num
1451	[[ -n $spdk_cpu_list ]] && spdk_cpus=spdk_cpu_list
1452
1453	if ((${#nodes[@]} > 0)); then
1454		((${#nodes[@]} == 1)) && node=${nodes[0]}
1455		for ((vm = 0; vm < vm_count; vm++)); do
1456			env+=("VM${vm}_NODE=${nodes[vm]:-$node}")
1457		done
1458	fi
1459
1460	env+=("$spdk_cpus=${!spdk_cpus}")
1461	env+=("vm_count=$vm_count")
1462	env+=("vm_cpu_num=$vm_cpu_num")
1463
1464	export "${env[@]}"
1465
1466	"$rootdir/scripts/perf/vhost/conf-generator" -p cpu
1467)
1468