xref: /spdk/test/nvmf/common.sh (revision 1cbacb58fc5de6ec73094d039d5edff9d8cb429d)
1#  SPDX-License-Identifier: BSD-3-Clause
2#  Copyright (C) 2016 Intel Corporation
3#  Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
4#  All rights reserved.
5#
6
7[[ $(uname -s) == FreeBSD ]] && return 0
8
9NVMF_PORT=4420
10NVMF_SECOND_PORT=4421
11NVMF_THIRD_PORT=4422
12NVMF_IP_PREFIX="192.168.100"
13NVMF_IP_LEAST_ADDR=8
14NVMF_TCP_IP_ADDRESS="127.0.0.1"
15NVMF_TRANSPORT_OPTS=""
16NVMF_SERIAL=SPDKISFASTANDAWESOME
17NVME_HOSTNQN=$(nvme gen-hostnqn)
18NVME_HOSTID=${NVME_HOSTNQN##*:}
19NVME_HOST=("--hostnqn=$NVME_HOSTNQN" "--hostid=$NVME_HOSTID")
20NVME_CONNECT="nvme connect"
21NET_TYPE=${NET_TYPE:-phy-fallback}
22NVME_SUBNQN=nqn.2016-06.io.spdk:testnqn
23
24function build_nvmf_app_args() {
25	if [ $SPDK_RUN_NON_ROOT -eq 1 ]; then
26		# We assume that test script is started from sudo
27		NVMF_APP=(sudo -E -u $SUDO_USER "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" "${NVMF_APP[@]}")
28	fi
29	NVMF_APP+=(-i "$NVMF_APP_SHM_ID" -e 0xFFFF)
30
31	NVMF_APP+=("${NO_HUGE[@]}")
32
33	if [ "$TEST_INTERRUPT_MODE" -eq 1 ]; then
34		NVMF_APP+=(--interrupt-mode)
35	fi
36
37	if [ -n "$SPDK_HUGE_DIR" ]; then
38		NVMF_APP+=(--huge-dir "$SPDK_HUGE_DIR")
39	elif [ $SPDK_RUN_NON_ROOT -eq 1 ]; then
40		echo "In non-root test mode you have to set SPDK_HUGE_DIR variable." >&2
41		echo "For example:" >&2
42		echo "sudo mkdir /mnt/spdk_hugetlbfs" >&2
43		echo "sudo chown ${SUDO_USER}: /mnt/spdk_hugetlbfs" >&2
44		echo "export SPDK_HUGE_DIR=/mnt/spdk_hugetlbfs" >&2
45		return 1
46	fi
47}
48
49source "$rootdir/scripts/common.sh"
50
51: ${NVMF_APP_SHM_ID="0"}
52export NVMF_APP_SHM_ID
53build_nvmf_app_args
54
55have_pci_nics=0
56
57function rxe_cfg() {
58	"$rootdir/scripts/rxe_cfg_small.sh" "$@"
59}
60
61function load_ib_rdma_modules() {
62	if [ $(uname) != Linux ]; then
63		return 0
64	fi
65
66	modprobe ib_cm
67	modprobe ib_core
68	modprobe ib_umad
69	modprobe ib_uverbs
70	modprobe iw_cm
71	modprobe rdma_cm
72	modprobe rdma_ucm
73}
74
75function allocate_nic_ips() {
76	((count = NVMF_IP_LEAST_ADDR))
77	for nic_name in $(get_rdma_if_list); do
78		ip="$(get_ip_address $nic_name)"
79		if [[ -z $ip ]]; then
80			ip addr add $NVMF_IP_PREFIX.$count/24 dev $nic_name
81			ip link set $nic_name up
82			((count = count + 1))
83		fi
84		# dump configuration for debug log
85		ip addr show $nic_name
86	done
87}
88
89function get_available_rdma_ips() {
90	for nic_name in $(get_rdma_if_list); do
91		get_ip_address $nic_name
92	done
93}
94
95function get_rdma_if_list() {
96	local net_dev rxe_net_dev rxe_net_devs
97
98	mapfile -t rxe_net_devs < <(rxe_cfg rxe-net)
99
100	if ((${#net_devs[@]} == 0)); then
101		return 1
102	fi
103
104	# Pick only these devices which were found during gather_supported_nvmf_pci_devs() run
105	for net_dev in "${net_devs[@]}"; do
106		for rxe_net_dev in "${rxe_net_devs[@]}"; do
107			if [[ $net_dev == "$rxe_net_dev" ]]; then
108				echo "$net_dev"
109				continue 2
110			fi
111		done
112	done
113}
114
115function get_ip_address() {
116	interface=$1
117	ip -o -4 addr show $interface | awk '{print $4}' | cut -d"/" -f1
118}
119
120function nvmfcleanup() {
121	sync
122
123	if [ "$TEST_TRANSPORT" == "tcp" ] || [ "$TEST_TRANSPORT" == "rdma" ]; then
124		set +e
125		for i in {1..20}; do
126			modprobe -v -r nvme-$TEST_TRANSPORT
127			if modprobe -v -r nvme-fabrics; then
128				set -e
129				return 0
130			fi
131			sleep 1
132		done
133		set -e
134
135		# So far unable to remove the kernel modules. Try
136		# one more time and let it fail.
137		# Allow the transport module to fail for now. See Jim's comment
138		# about the nvme-tcp module below.
139		modprobe -v -r nvme-$TEST_TRANSPORT || true
140		modprobe -v -r nvme-fabrics
141	fi
142}
143
144function nvmf_veth_init() {
145	NVMF_FIRST_INITIATOR_IP=10.0.0.1
146	NVMF_SECOND_INITIATOR_IP=10.0.0.2
147	NVMF_FIRST_TARGET_IP=10.0.0.3
148	NVMF_SECOND_TARGET_IP=10.0.0.4
149	NVMF_INITIATOR_IP=$NVMF_FIRST_INITIATOR_IP
150	NVMF_BRIDGE="nvmf_br"
151	NVMF_INITIATOR_INTERFACE="nvmf_init_if"
152	NVMF_INITIATOR_INTERFACE2="nvmf_init_if2"
153	NVMF_INITIATOR_BRIDGE="nvmf_init_br"
154	NVMF_INITIATOR_BRIDGE2="nvmf_init_br2"
155	NVMF_TARGET_NAMESPACE="nvmf_tgt_ns_spdk"
156	NVMF_TARGET_NS_CMD=(ip netns exec "$NVMF_TARGET_NAMESPACE")
157	NVMF_TARGET_INTERFACE="nvmf_tgt_if"
158	NVMF_TARGET_INTERFACE2="nvmf_tgt_if2"
159	NVMF_TARGET_BRIDGE="nvmf_tgt_br"
160	NVMF_TARGET_BRIDGE2="nvmf_tgt_br2"
161
162	ip link set $NVMF_INITIATOR_BRIDGE nomaster || true
163	ip link set $NVMF_INITIATOR_BRIDGE2 nomaster || true
164	ip link set $NVMF_TARGET_BRIDGE nomaster || true
165	ip link set $NVMF_TARGET_BRIDGE2 nomaster || true
166	ip link set $NVMF_INITIATOR_BRIDGE down || true
167	ip link set $NVMF_INITIATOR_BRIDGE2 down || true
168	ip link set $NVMF_TARGET_BRIDGE down || true
169	ip link set $NVMF_TARGET_BRIDGE2 down || true
170	ip link delete $NVMF_BRIDGE type bridge || true
171	ip link delete $NVMF_INITIATOR_INTERFACE || true
172	ip link delete $NVMF_INITIATOR_INTERFACE2 || true
173	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE || true
174	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE2 || true
175
176	# Create network namespace
177	ip netns add $NVMF_TARGET_NAMESPACE
178
179	# Create veth (Virtual ethernet) interface pairs
180	ip link add $NVMF_INITIATOR_INTERFACE type veth peer name $NVMF_INITIATOR_BRIDGE
181	ip link add $NVMF_INITIATOR_INTERFACE2 type veth peer name $NVMF_INITIATOR_BRIDGE2
182	ip link add $NVMF_TARGET_INTERFACE type veth peer name $NVMF_TARGET_BRIDGE
183	ip link add $NVMF_TARGET_INTERFACE2 type veth peer name $NVMF_TARGET_BRIDGE2
184
185	# Associate veth interface pairs with network namespace
186	ip link set $NVMF_TARGET_INTERFACE netns $NVMF_TARGET_NAMESPACE
187	ip link set $NVMF_TARGET_INTERFACE2 netns $NVMF_TARGET_NAMESPACE
188
189	# Allocate IP addresses
190	ip addr add $NVMF_FIRST_INITIATOR_IP/24 dev $NVMF_INITIATOR_INTERFACE
191	ip addr add $NVMF_SECOND_INITIATOR_IP/24 dev $NVMF_INITIATOR_INTERFACE2
192	"${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_FIRST_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE
193	"${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_SECOND_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE2
194
195	# Link up veth interfaces
196	ip link set $NVMF_INITIATOR_INTERFACE up
197	ip link set $NVMF_INITIATOR_INTERFACE2 up
198	ip link set $NVMF_INITIATOR_BRIDGE up
199	ip link set $NVMF_INITIATOR_BRIDGE2 up
200	ip link set $NVMF_TARGET_BRIDGE up
201	ip link set $NVMF_TARGET_BRIDGE2 up
202	"${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE up
203	"${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE2 up
204	"${NVMF_TARGET_NS_CMD[@]}" ip link set lo up
205
206	# Create a bridge
207	ip link add $NVMF_BRIDGE type bridge
208	ip link set $NVMF_BRIDGE up
209
210	# Add veth interfaces to the bridge
211	ip link set $NVMF_INITIATOR_BRIDGE master $NVMF_BRIDGE
212	ip link set $NVMF_INITIATOR_BRIDGE2 master $NVMF_BRIDGE
213	ip link set $NVMF_TARGET_BRIDGE master $NVMF_BRIDGE
214	ip link set $NVMF_TARGET_BRIDGE2 master $NVMF_BRIDGE
215
216	# Accept connections from veth interface
217	ipts -I INPUT 1 -i $NVMF_INITIATOR_INTERFACE -p tcp --dport $NVMF_PORT -j ACCEPT
218	ipts -I INPUT 1 -i $NVMF_INITIATOR_INTERFACE2 -p tcp --dport $NVMF_PORT -j ACCEPT
219	ipts -A FORWARD -i $NVMF_BRIDGE -o $NVMF_BRIDGE -j ACCEPT
220
221	# Verify connectivity
222	ping -c 1 $NVMF_FIRST_TARGET_IP
223	ping -c 1 $NVMF_SECOND_TARGET_IP
224	"${NVMF_TARGET_NS_CMD[@]}" ping -c 1 $NVMF_FIRST_INITIATOR_IP
225	"${NVMF_TARGET_NS_CMD[@]}" ping -c 1 $NVMF_SECOND_INITIATOR_IP
226
227	NVMF_APP=("${NVMF_TARGET_NS_CMD[@]}" "${NVMF_APP[@]}")
228}
229
230function nvmf_veth_fini() {
231	# Cleanup bridge, veth interfaces, and network namespace
232	# Note: removing one veth removes the pair
233	ip link set $NVMF_INITIATOR_BRIDGE nomaster
234	ip link set $NVMF_INITIATOR_BRIDGE2 nomaster
235	ip link set $NVMF_TARGET_BRIDGE nomaster
236	ip link set $NVMF_TARGET_BRIDGE2 nomaster
237	ip link set $NVMF_INITIATOR_BRIDGE down
238	ip link set $NVMF_INITIATOR_BRIDGE2 down
239	ip link set $NVMF_TARGET_BRIDGE down
240	ip link set $NVMF_TARGET_BRIDGE2 down
241	ip link delete $NVMF_BRIDGE type bridge
242	ip link delete $NVMF_INITIATOR_INTERFACE
243	ip link delete $NVMF_INITIATOR_INTERFACE2
244	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE
245	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE2
246	remove_spdk_ns
247}
248
249function nvmf_tcp_init() {
250	NVMF_FIRST_INITIATOR_IP=10.0.0.1
251	NVMF_FIRST_TARGET_IP=10.0.0.2
252	NVMF_INITIATOR_IP=$NVMF_FIRST_INITIATOR_IP
253	TCP_INTERFACE_LIST=("${net_devs[@]}")
254
255	# We need two net devs at minimum
256	((${#TCP_INTERFACE_LIST[@]} > 1))
257
258	NVMF_TARGET_INTERFACE=${TCP_INTERFACE_LIST[0]}
259	NVMF_INITIATOR_INTERFACE=${TCP_INTERFACE_LIST[1]}
260
261	# Skip case nvmf_multipath in nvmf_tcp_init(), it will be covered by nvmf_veth_init().
262	NVMF_SECOND_TARGET_IP=""
263	NVMF_SECOND_INITIATOR_IP=""
264
265	NVMF_TARGET_NAMESPACE="${NVMF_TARGET_INTERFACE}_ns_spdk"
266	NVMF_TARGET_NS_CMD=(ip netns exec "$NVMF_TARGET_NAMESPACE")
267	ip -4 addr flush $NVMF_TARGET_INTERFACE || true
268	ip -4 addr flush $NVMF_INITIATOR_INTERFACE || true
269
270	# Create network namespace
271	ip netns add $NVMF_TARGET_NAMESPACE
272
273	# Associate phy interface pairs with network namespace
274	ip link set $NVMF_TARGET_INTERFACE netns $NVMF_TARGET_NAMESPACE
275
276	# Allocate IP addresses
277	ip addr add $NVMF_INITIATOR_IP/24 dev $NVMF_INITIATOR_INTERFACE
278	"${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_FIRST_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE
279
280	# Link up phy interfaces
281	ip link set $NVMF_INITIATOR_INTERFACE up
282
283	"${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE up
284	"${NVMF_TARGET_NS_CMD[@]}" ip link set lo up
285
286	# Accept connections from phy interface
287	ipts -I INPUT 1 -i $NVMF_INITIATOR_INTERFACE -p tcp --dport $NVMF_PORT -j ACCEPT
288
289	# Verify connectivity
290	ping -c 1 $NVMF_FIRST_TARGET_IP
291	"${NVMF_TARGET_NS_CMD[@]}" ping -c 1 $NVMF_INITIATOR_IP
292
293	NVMF_APP=("${NVMF_TARGET_NS_CMD[@]}" "${NVMF_APP[@]}")
294}
295
296function nvmf_tcp_fini() {
297	iptr
298	if [[ "$NVMF_TARGET_NAMESPACE" == "nvmf_tgt_ns_spdk" ]]; then
299		nvmf_veth_fini
300		return 0
301	fi
302	remove_spdk_ns
303	ip -4 addr flush $NVMF_INITIATOR_INTERFACE || :
304}
305
306function gather_supported_nvmf_pci_devs() {
307	# Go through the entire pci bus and gather all ethernet controllers we support for the nvmf tests.
308	# Focus on the hardware that's currently being tested by the CI.
309	xtrace_disable
310	cache_pci_bus_sysfs
311	xtrace_restore
312
313	local intel=0x8086 mellanox=0x15b3 pci net_dev
314
315	local -a pci_devs=()
316	local -a pci_net_devs=()
317	local -A pci_drivers=()
318
319	local -ga net_devs=()
320	local -ga e810=()
321	local -ga x722=()
322	local -ga mlx=()
323
324	# E810-XXV
325	e810+=(${pci_bus_cache["$intel:0x1592"]})
326	e810+=(${pci_bus_cache["$intel:0x159b"]})
327	# X722 10G
328	x722+=(${pci_bus_cache["$intel:0x37d2"]})
329	# BlueField 3
330	mlx+=(${pci_bus_cache["$mellanox:0xa2dc"]})
331	# ConnectX-7
332	mlx+=(${pci_bus_cache["$mellanox:0x1021"]})
333	# BlueField 2
334	mlx+=(${pci_bus_cache["$mellanox:0xa2d6"]})
335	# ConnectX-6 Dx
336	mlx+=(${pci_bus_cache["$mellanox:0x101d"]})
337	# ConnectX-6
338	mlx+=(${pci_bus_cache["$mellanox:0x101b"]})
339	# ConnectX-5
340	mlx+=(${pci_bus_cache["$mellanox:0x1017"]})
341	mlx+=(${pci_bus_cache["$mellanox:0x1019"]})
342	# ConnectX-4
343	mlx+=(${pci_bus_cache["$mellanox:0x1015"]})
344	mlx+=(${pci_bus_cache["$mellanox:0x1013"]})
345
346	pci_devs+=("${e810[@]}")
347	if [[ $TEST_TRANSPORT == rdma ]]; then
348		pci_devs+=("${x722[@]}")
349		pci_devs+=("${mlx[@]}")
350	fi
351
352	# Try to respect what CI wants to test and override pci_devs[]
353	if [[ $SPDK_TEST_NVMF_NICS == mlx5 ]]; then
354		pci_devs=("${mlx[@]}")
355	elif [[ $SPDK_TEST_NVMF_NICS == e810 ]]; then
356		pci_devs=("${e810[@]}")
357	elif [[ $SPDK_TEST_NVMF_NICS == x722 ]]; then
358		pci_devs=("${x722[@]}")
359	fi
360
361	if ((${#pci_devs[@]} == 0)); then
362		return 1
363	fi
364
365	# Load proper kernel modules if necessary
366	for pci in "${pci_devs[@]}"; do
367		echo "Found $pci (${pci_ids_vendor["$pci"]} - ${pci_ids_device["$pci"]})"
368		if [[ ${pci_mod_resolved["$pci"]} == unknown ]]; then
369			echo "Unresolved modalias for $pci (${pci_mod_driver["$pci"]}). Driver not installed|builtin?"
370			continue
371		fi
372		if [[ ${pci_bus_driver["$pci"]} == unbound ]]; then
373			echo "$pci not bound, needs ${pci_mod_resolved["$pci"]}"
374			pci_drivers["${pci_mod_resolved["$pci"]}"]=1
375		fi
376		if [[ ${pci_ids_device["$pci"]} == "0x1017" ]] \
377			|| [[ ${pci_ids_device["$pci"]} == "0x1019" ]] \
378			|| [[ $TEST_TRANSPORT == rdma ]]; then
379			# Reduce maximum number of queues when connecting with
380			# ConnectX-5 NICs. When using host systems with nproc > 64
381			# connecting with default options (where default equals to
382			# number of host online CPUs) creating all IO queues
383			# takes too much time and results in keep-alive timeout.
384			# See:
385			# https://github.com/spdk/spdk/issues/2772
386			# 0x1017 - MT27800 Family ConnectX-5
387			# 0x1019 - MT28800 Family ConnectX-5 Ex
388			NVME_CONNECT="nvme connect -i 15"
389		fi
390	done
391
392	if ((${#pci_drivers[@]} > 0)); then
393		echo "Loading kernel modules: ${!pci_drivers[*]}"
394		modprobe -a "${!pci_drivers[@]}"
395	fi
396
397	# E810 cards also need irdma driver to be around.
398	if [[ $SPDK_TEST_NVMF_NICS == e810 && $TEST_TRANSPORT == rdma ]]; then
399		if [[ -e /sys/module/irdma/parameters/roce_ena ]]; then
400			# Our tests don't play well with iWARP protocol since CQ resize is not supported.
401			# This may affect some tests, especially those which target multiconnection setups.
402			# Considering all that, make sure we use RoCEv2 instead.
403			(($(< /sys/module/irdma/parameters/roce_ena) != 1)) && modprobe -r irdma
404		fi
405		modinfo irdma && modprobe irdma roce_ena=1
406	fi > /dev/null
407
408	# All devices detected, kernel modules loaded. Now look under net class to see if there
409	# are any net devices bound to the controllers.
410	for pci in "${pci_devs[@]}"; do
411		pci_net_devs=("/sys/bus/pci/devices/$pci/net/"*)
412
413		# Check if available devices are in proper operational state. If not, remove them from the main list.
414		# This check is valid for TCP only since for RDMA we use infiniband which don't rely on actual UP
415		# state of the device.
416		if [[ $TEST_TRANSPORT == tcp ]]; then
417			for net_dev in "${!pci_net_devs[@]}"; do
418				[[ $(< "${pci_net_devs[net_dev]}/operstate") == up ]] || unset -v "pci_net_devs[net_dev]"
419			done
420		fi
421
422		if ((${#pci_net_devs[@]} == 0)); then
423			echo "No operational net devices associated with $pci"
424			continue
425		fi
426
427		pci_net_devs=("${pci_net_devs[@]##*/}")
428		echo "Found net devices under $pci: ${pci_net_devs[*]}"
429		net_devs+=("${pci_net_devs[@]}")
430	done
431
432	if ((${#net_devs[@]} == 0)); then
433		return 1
434	fi
435}
436
437prepare_net_devs() {
438	local -g is_hw=no
439
440	remove_spdk_ns
441
442	[[ $NET_TYPE != virt ]] && gather_supported_nvmf_pci_devs && is_hw=yes
443
444	if [[ $is_hw == yes ]]; then
445		if [[ $TEST_TRANSPORT == tcp ]]; then
446			nvmf_tcp_init
447		elif [[ $TEST_TRANSPORT == rdma ]]; then
448			rdma_device_init
449		fi
450		return 0
451	elif [[ $NET_TYPE == phy ]]; then
452		echo "ERROR: No supported devices were found, cannot run the $TEST_TRANSPORT test"
453		return 1
454	elif [[ $NET_TYPE == phy-fallback ]]; then
455		echo "WARNING: No supported devices were found, fallback requested for $TEST_TRANSPORT test"
456	fi
457
458	# NET_TYPE == virt or phy-fallback
459	if [[ $TEST_TRANSPORT == tcp ]]; then
460		nvmf_veth_init
461		return 0
462	fi
463
464	echo "ERROR: virt and fallback setup is not supported for $TEST_TRANSPORT"
465	return 1
466}
467
468function nvmftestinit() {
469	if [ -z $TEST_TRANSPORT ]; then
470		echo "transport not specified - use --transport= to specify"
471		return 1
472	fi
473
474	trap 'nvmftestfini' SIGINT SIGTERM EXIT
475
476	prepare_net_devs
477
478	if [ "$TEST_MODE" == "iso" ]; then
479		$rootdir/scripts/setup.sh
480	fi
481
482	NVMF_TRANSPORT_OPTS="-t $TEST_TRANSPORT"
483	if [[ "$TEST_TRANSPORT" == "rdma" ]]; then
484		RDMA_IP_LIST=$(get_available_rdma_ips)
485		NVMF_FIRST_TARGET_IP=$(echo "$RDMA_IP_LIST" | head -n 1)
486		NVMF_SECOND_TARGET_IP=$(echo "$RDMA_IP_LIST" | tail -n +2 | head -n 1)
487		if [ -z $NVMF_FIRST_TARGET_IP ]; then
488			echo "no RDMA NIC for nvmf test"
489			exit 1
490		fi
491		NVMF_TRANSPORT_OPTS="$NVMF_TRANSPORT_OPTS --num-shared-buffers 1024"
492	elif [[ "$TEST_TRANSPORT" == "tcp" ]]; then
493		NVMF_TRANSPORT_OPTS="$NVMF_TRANSPORT_OPTS -o"
494	fi
495
496	if [ "$TEST_TRANSPORT" == "tcp" ] || [ "$TEST_TRANSPORT" == "rdma" ]; then
497		# currently we run the host/perf test for TCP even on systems without kernel nvme-tcp
498		#  support; that's fine since the host/perf test uses the SPDK initiator
499		# maybe later we will enforce modprobe to succeed once we have systems in the test pool
500		#  with nvme-tcp kernel support - but until then let this pass so we can still run the
501		#  host/perf test with the tcp transport
502		modprobe nvme-$TEST_TRANSPORT || true
503	fi
504}
505
506function nvmfappstart() {
507	timing_enter start_nvmf_tgt
508	"${NVMF_APP[@]}" "$@" &
509	nvmfpid=$!
510	waitforlisten $nvmfpid
511	timing_exit start_nvmf_tgt
512	trap 'process_shm --id $NVMF_APP_SHM_ID || :; nvmftestfini' SIGINT SIGTERM EXIT
513}
514
515function nvmftestfini() {
516	nvmfcleanup || :
517	if [ -n "$nvmfpid" ]; then
518		killprocess $nvmfpid
519	fi
520	if [ "$TEST_MODE" == "iso" ]; then
521		$rootdir/scripts/setup.sh reset
522	fi
523	if [[ "$TEST_TRANSPORT" == "tcp" ]]; then
524		nvmf_tcp_fini
525	fi
526}
527
528function rdma_device_init() {
529	load_ib_rdma_modules
530	allocate_nic_ips
531}
532
533function nvme_connect() {
534	local init_count
535	init_count=$(nvme list | wc -l)
536
537	if ! nvme connect "$@"; then return $?; fi
538
539	for i in $(seq 1 10); do
540		if [ $(nvme list | wc -l) -gt $init_count ]; then
541			return 0
542		else
543			sleep 1s
544		fi
545	done
546	return 1
547}
548
549function get_nvme_devs() {
550	local dev _
551
552	while read -r dev _; do
553		if [[ $dev == /dev/nvme* ]]; then
554			echo "$dev"
555		fi
556	done < <(nvme list)
557}
558
559function gen_nvmf_target_json() {
560	local subsystem config=()
561
562	for subsystem in "${@:-1}"; do
563		config+=(
564			"$(
565				cat <<- EOF
566					{
567					  "params": {
568					    "name": "Nvme$subsystem",
569					    "trtype": "$TEST_TRANSPORT",
570					    "traddr": "$NVMF_FIRST_TARGET_IP",
571					    "adrfam": "ipv4",
572					    "trsvcid": "$NVMF_PORT",
573					    "subnqn": "nqn.2016-06.io.spdk:cnode$subsystem",
574					    "hostnqn": "nqn.2016-06.io.spdk:host$subsystem",
575					    "hdgst": ${hdgst:-false},
576					    "ddgst": ${ddgst:-false}
577					  },
578					  "method": "bdev_nvme_attach_controller"
579					}
580				EOF
581			)"
582		)
583	done
584	jq . <<- JSON
585		{
586		  "subsystems": [
587		    {
588		      "subsystem": "bdev",
589		      "config": [
590			{
591			  "method": "bdev_nvme_set_options",
592			  "params": {
593				"action_on_timeout": "none",
594				"timeout_us": 0,
595				"transport_retry_count": 4,
596				"arbitration_burst": 0,
597				"low_priority_weight": 0,
598				"medium_priority_weight": 0,
599				"high_priority_weight": 0,
600				"nvme_adminq_poll_period_us": 10000,
601				"keep_alive_timeout_ms" : 10000,
602				"nvme_ioq_poll_period_us": 0,
603				"io_queue_requests": 0,
604				"delay_cmd_submit": true
605			  }
606			},
607		        $(
608			IFS=","
609			printf '%s\n' "${config[*]}"
610		),
611			{
612			  "method": "bdev_wait_for_examine"
613			}
614		      ]
615		    }
616		  ]
617		}
618	JSON
619}
620
621function _remove_spdk_ns() {
622	local ns {ns,mn,an}_net_devs
623	while read -r ns _; do
624		[[ $ns == *_spdk ]] || continue
625		# Gather all devs from the target $ns namespace. We want to differentiate
626		# between veth and physical links and gather just the latter. To do so,
627		# we simply compare ifindex to iflink - as per kernel docs, these should
628		# be always equal for the physical links. For veth devices, since they are
629		# paired, iflink should point at an actual bridge, hence being different
630		# from its own ifindex.
631		ns_net_devs=($(
632			ip netns exec "$ns" bash <<- 'IN_NS'
633				shopt -s extglob nullglob
634				for dev in /sys/class/net/!(lo|bond*); do
635					(($(< "$dev/ifindex") == $(< "$dev/iflink"))) || continue
636					echo "${dev##*/}"
637				done
638			IN_NS
639		))
640		# Gather all the net devs from the main ns
641		mn_net_devs=($(basename -a /sys/class/net/!(lo|bond*)))
642		# Merge these two to have a list for comparison
643		an_net_devs=($(printf '%s\n' "${ns_net_devs[@]}" "${mn_net_devs[@]}" | sort))
644
645		ip netns delete "$ns"
646
647		# Check if our list matches against the main ns after $ns got deleted
648		while [[ ${an_net_devs[*]} != "${mn_net_devs[*]}" ]]; do
649			mn_net_devs=($(basename -a /sys/class/net/!(lo|bond*)))
650			sleep 1s
651		done
652	done < <(ip netns list)
653}
654
655remove_spdk_ns() {
656	xtrace_disable_per_cmd _remove_spdk_ns
657}
658
659configure_kernel_target() {
660	local kernel_name=$1 kernel_target_ip=$2
661	# Keep it global in scope for easier cleanup
662	nvmet=/sys/kernel/config/nvmet
663	kernel_subsystem=$nvmet/subsystems/$kernel_name
664	kernel_namespace=$kernel_subsystem/namespaces/1
665	kernel_port=$nvmet/ports/1
666
667	local block nvme
668
669	if [[ ! -e /sys/module/nvmet ]]; then
670		modprobe nvmet
671	fi
672
673	[[ -e $nvmet ]]
674
675	"$rootdir/scripts/setup.sh" reset
676
677	# Find nvme with an active ns device
678	for block in /sys/block/nvme*; do
679		[[ -e $block ]] || continue
680		is_block_zoned "${block##*/}" && continue
681		block_in_use "${block##*/}" || nvme="/dev/${block##*/}"
682	done
683
684	[[ -b $nvme ]]
685
686	mkdir "$kernel_subsystem"
687	mkdir "$kernel_namespace"
688	mkdir "$kernel_port"
689
690	# It allows only %llx value and for some reason kernel swaps the byte order
691	# so setting the serial is not very useful here
692	# "$kernel_subsystem/attr_serial"
693	echo "SPDK-$kernel_name" > "$kernel_subsystem/attr_model"
694
695	echo 1 > "$kernel_subsystem/attr_allow_any_host"
696	echo "$nvme" > "$kernel_namespace/device_path"
697	echo 1 > "$kernel_namespace/enable"
698
699	echo "$kernel_target_ip" > "$kernel_port/addr_traddr"
700	echo "$TEST_TRANSPORT" > "$kernel_port/addr_trtype"
701	echo "$NVMF_PORT" > "$kernel_port/addr_trsvcid"
702	echo ipv4 > "$kernel_port/addr_adrfam"
703
704	# Enable the listener by linking the port to previously created subsystem
705	ln -s "$kernel_subsystem" "$kernel_port/subsystems/"
706
707	# Check if target is available
708	nvme discover "${NVME_HOST[@]}" -a "$kernel_target_ip" -t "$TEST_TRANSPORT" -s "$NVMF_PORT"
709}
710
711clean_kernel_target() {
712	[[ -e $kernel_subsystem ]] || return 0
713
714	echo 0 > "$kernel_namespace/enable"
715
716	rm -f "$kernel_port/subsystems/${kernel_subsystem##*/}"
717	rmdir "$kernel_namespace"
718	rmdir "$kernel_port"
719	rmdir "$kernel_subsystem"
720
721	modules=(/sys/module/nvmet/holders/*)
722
723	modprobe -r "${modules[@]##*/}" nvmet
724
725	# Get back all nvmes to userspace
726	"$rootdir/scripts/setup.sh"
727}
728
729format_key() {
730	local prefix key digest
731
732	prefix="$1" key="$2" digest="$3"
733	python - <<- EOF
734		import base64, zlib
735
736		crc = zlib.crc32(b"$key").to_bytes(4, byteorder="little")
737		b64 = base64.b64encode(b"$key" + crc).decode("utf-8")
738		print("$prefix:{:02x}:{}:".format($digest, b64), end="")
739	EOF
740}
741
742format_interchange_psk() {
743	format_key "NVMeTLSkey-1" "$1" "$2"
744}
745
746format_dhchap_key() {
747	format_key "DHHC-1" "$1" "$2"
748}
749
750gen_dhchap_key() {
751	local digest len file key
752	local -A digests=([null]=0 [sha256]=1 [sha384]=2 [sha512]=3)
753
754	digest="$1" len=$2
755	key=$(xxd -p -c0 -l $((len / 2)) /dev/urandom)
756	file=$(mktemp -t "spdk.key-$1.XXX")
757	format_dhchap_key "$key" "${digests[$1]}" > "$file"
758	chmod 0600 "$file"
759
760	echo "$file"
761}
762
763get_main_ns_ip() {
764	# Determine which ip to use based on nvmftestinit() setup. For tcp we pick
765	# interface which resides in the main net namespace and which is visible
766	# to nvmet under tcp setup. $NVMF_FIRST_TARGET_IP is solely for rdma use.
767	# FIXME: This requires proper unification of the networking setup across
768	# different transports.
769	local ip
770	local -A ip_candidates=()
771
772	ip_candidates["rdma"]=NVMF_FIRST_TARGET_IP
773	ip_candidates["tcp"]=NVMF_INITIATOR_IP
774
775	[[ -z $TEST_TRANSPORT || -z ${ip_candidates["$TEST_TRANSPORT"]} ]] && return 1
776	ip=${ip_candidates["$TEST_TRANSPORT"]}
777
778	if [[ -z ${!ip} ]]; then
779		echo "$ip not set, call nvmftestinit() first" >&2
780		return 1
781	fi
782
783	echo "${!ip}"
784}
785
786uuid2nguid() {
787	tr -d - <<< "${1^^}"
788}
789
790ipts() { iptables "$@" -m comment --comment "SPDK_NVMF:$*"; }
791iptr() { iptables-save | grep -v SPDK_NVMF | iptables-restore; }
792