xref: /spdk/test/nvme/sw_hotplug.sh (revision 16d862d0380886f6fc765f68a87e240bb4295595)
1#!/usr/bin/env bash
2#  SPDX-License-Identifier: BSD-3-Clause
3#  Copyright (C) 2022 Intel Corporation
4#  All rights reserved.
5#
6testdir=$(readlink -f $(dirname $0))
7rootdir=$(readlink -f $testdir/../..)
8source $rootdir/scripts/common.sh
9source $rootdir/test/common/autotest_common.sh
10
11bdev_bdfs() {
12	jq -r '.[].driver_specific.nvme[].pci_address' \
13		<(rpc_cmd bdev_get_bdevs) | sort -u
14}
15
16# Pci bus hotplug
17# Helper function to remove/attach cotrollers
18debug_remove_attach_helper() {
19	local helper_time=0
20
21	helper_time=$(timing_cmd remove_attach_helper "$@")
22	printf 'remove_attach_helper took %ss to complete (handling %u nvme drive(s))' \
23		"$helper_time" "$nvme_count" >&2
24}
25
26remove_attach_helper() {
27	local hotplug_events=$1
28	local hotplug_wait=$2
29	local use_bdev=${3:-false}
30	local dev bdfs
31
32	# We need to make sure we wait long enough for hotplug to initialize the devices
33	# and start IO - if we start removing devices before that happens we will end up
34	# stepping on hotplug's toes forcing it to fail to report proper count of given
35	# events.
36	sleep "$hotplug_wait"
37
38	while ((hotplug_events--)); do
39		for dev in "${nvmes[@]}"; do
40			echo 1 > "/sys/bus/pci/devices/$dev/remove"
41		done
42
43		if "$use_bdev"; then
44			# Since we removed all the devices, when the sleep settles, we expect to find no bdevs
45			# FIXME: For some unknown reason, SPDK may stay behind, still returning bdevs on the
46			# list which are not on the bus anymore. This happens until nvme_pcie_qpair_abort_trackers()
47			# finally returns (usually reporting an error while aborting outstanding commands).
48			# It's been noticed that it takes significant amount of time especially under ubuntu2004
49			# in the CI.
50			while bdfs=($(bdev_bdfs)) && ((${#bdfs[@]} > 0)) && sleep 0.5; do
51				printf 'Still waiting for %s to be gone\n' "${bdfs[@]}" >&2
52			done
53		fi
54
55		# Avoid setup.sh as it does some extra work which is not relevant for this test.
56		echo 1 > "/sys/bus/pci/rescan"
57
58		for dev in "${nvmes[@]}"; do
59			echo "${pci_bus_driver["$dev"]}" > "/sys/bus/pci/devices/$dev/driver_override"
60			echo "$dev" > "/sys/bus/pci/devices/$dev/driver/unbind"
61			echo "$dev" > "/sys/bus/pci/drivers_probe"
62			echo "" > "/sys/bus/pci/devices/$dev/driver_override"
63		done
64
65		# Wait now for hotplug to reattach to the devices
66		sleep "$((hotplug_wait * nvme_count))"
67
68		if "$use_bdev"; then
69			# See if we get all the bdevs back in one bulk
70			bdfs=($(bdev_bdfs))
71			[[ ${bdfs[*]} == "${nvmes[*]}" ]]
72		fi
73	done
74}
75
76run_hotplug() {
77	trap 'killprocess $hotplug_pid; exit 1' SIGINT SIGTERM EXIT
78
79	"$SPDK_EXAMPLE_DIR/hotplug" \
80		-i 0 \
81		-t 0 \
82		-n $((hotplug_events * nvme_count)) \
83		-r $((hotplug_events * nvme_count)) \
84		-l warning &
85	hotplug_pid=$!
86
87	debug_remove_attach_helper "$hotplug_events" "$hotplug_wait" false
88
89	# Wait in case hotplug app is lagging behind
90	# and kill it, if it hung.
91	sleep $hotplug_wait
92
93	if ! kill -0 "$hotplug_pid"; then
94		# hotplug already finished, check for the error code.
95		wait "$hotplug_pid"
96	else
97		echo "Killing hotplug application"
98		killprocess $hotplug_pid
99		return 1
100	fi
101
102	trap - SIGINT SIGTERM EXIT
103}
104
105# SPDK target hotplug
106tgt_run_hotplug() {
107	local dev
108
109	$SPDK_BIN_DIR/spdk_tgt &
110	spdk_tgt_pid=$!
111
112	trap 'killprocess ${spdk_tgt_pid}; echo 1 > /sys/bus/pci/rescan; exit 1' SIGINT SIGTERM EXIT
113	waitforlisten $spdk_tgt_pid
114
115	rpc_cmd bdev_nvme_set_hotplug -e
116
117	debug_remove_attach_helper "$hotplug_events" "$hotplug_wait" true
118	# Verify reregistering hotplug poller
119	rpc_cmd bdev_nvme_set_hotplug -d
120	rpc_cmd bdev_nvme_set_hotplug -e
121
122	debug_remove_attach_helper "$hotplug_events" "$hotplug_wait" true
123
124	trap - SIGINT SIGTERM EXIT
125	killprocess $spdk_tgt_pid
126}
127
128# Preparation
129"$rootdir/scripts/setup.sh"
130
131hotplug_wait=6
132hotplug_events=3
133nvmes=($(nvme_in_userspace))
134nvme_count=$((${#nvmes[@]} > 2 ? 2 : ${#nvmes[@]}))
135nvmes=("${nvmes[@]::nvme_count}")
136
137# Let's dance! \o\ \o/ /o/ \o/
138"$rootdir/scripts/setup.sh" reset
139# Put on your red shoes ...
140PCI_ALLOWED="${nvmes[*]}" "$rootdir/scripts/setup.sh"
141# Let's sway! \o\ \o/ /o/ \o/
142
143xtrace_disable
144cache_pci_bus
145xtrace_restore
146
147# Run pci bus hotplug test
148run_hotplug
149
150# Run SPDK target based hotplug
151tgt_run_hotplug
152
153# Under the moonlight, this serious moonlight! \o/
154"$rootdir/scripts/setup.sh"
155