xref: /spdk/test/nvmf/common.sh (revision ee32a82bfd3ff5b1a10ed775ee06f0eaffce60eb)
1#  SPDX-License-Identifier: BSD-3-Clause
2#  Copyright (C) 2016 Intel Corporation
3#  Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
4#  All rights reserved.
5#
6
7[[ $(uname -s) == FreeBSD ]] && return 0
8
9NVMF_PORT=4420
10NVMF_SECOND_PORT=4421
11NVMF_THIRD_PORT=4422
12NVMF_IP_PREFIX="192.168.100"
13NVMF_IP_LEAST_ADDR=8
14NVMF_TCP_IP_ADDRESS="127.0.0.1"
15NVMF_TRANSPORT_OPTS=""
16NVMF_SERIAL=SPDKISFASTANDAWESOME
17NVME_HOSTNQN=$(nvme gen-hostnqn)
18NVME_HOSTID=${NVME_HOSTNQN##*:}
19NVME_HOST=("--hostnqn=$NVME_HOSTNQN" "--hostid=$NVME_HOSTID")
20NVME_CONNECT="nvme connect"
21NET_TYPE=${NET_TYPE:-phy-fallback}
22NVME_SUBNQN=nqn.2016-06.io.spdk:testnqn
23
24function build_nvmf_app_args() {
25	if [ $SPDK_RUN_NON_ROOT -eq 1 ]; then
26		# We assume that test script is started from sudo
27		NVMF_APP=(sudo -E -u $SUDO_USER "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" "${NVMF_APP[@]}")
28	fi
29	NVMF_APP+=(-i "$NVMF_APP_SHM_ID" -e 0xFFFF)
30
31	NVMF_APP+=("${NO_HUGE[@]}")
32
33	if [ "$TEST_INTERRUPT_MODE" -eq 1 ]; then
34		NVMF_APP+=(--interrupt-mode)
35	fi
36
37	if [ -n "$SPDK_HUGE_DIR" ]; then
38		NVMF_APP+=(--huge-dir "$SPDK_HUGE_DIR")
39	elif [ $SPDK_RUN_NON_ROOT -eq 1 ]; then
40		echo "In non-root test mode you have to set SPDK_HUGE_DIR variable." >&2
41		echo "For example:" >&2
42		echo "sudo mkdir /mnt/spdk_hugetlbfs" >&2
43		echo "sudo chown ${SUDO_USER}: /mnt/spdk_hugetlbfs" >&2
44		echo "export SPDK_HUGE_DIR=/mnt/spdk_hugetlbfs" >&2
45		return 1
46	fi
47}
48
49source "$rootdir/scripts/common.sh"
50
51: ${NVMF_APP_SHM_ID="0"}
52export NVMF_APP_SHM_ID
53build_nvmf_app_args
54
55have_pci_nics=0
56
57function rxe_cfg() {
58	"$rootdir/scripts/rxe_cfg_small.sh" "$@"
59}
60
61function load_ib_rdma_modules() {
62	if [ $(uname) != Linux ]; then
63		return 0
64	fi
65
66	modprobe ib_cm
67	modprobe ib_core
68	modprobe ib_umad
69	modprobe ib_uverbs
70	modprobe iw_cm
71	modprobe rdma_cm
72	modprobe rdma_ucm
73}
74
75function allocate_nic_ips() {
76	((count = NVMF_IP_LEAST_ADDR))
77	for nic_name in $(get_rdma_if_list); do
78		ip="$(get_ip_address $nic_name)"
79		if [[ -z $ip ]]; then
80			ip addr add $NVMF_IP_PREFIX.$count/24 dev $nic_name
81			ip link set $nic_name up
82			((count = count + 1))
83		fi
84		# dump configuration for debug log
85		ip addr show $nic_name
86	done
87}
88
89function get_available_rdma_ips() {
90	for nic_name in $(get_rdma_if_list); do
91		get_ip_address $nic_name
92	done
93}
94
95function get_rdma_if_list() {
96	local net_dev rxe_net_dev rxe_net_devs
97
98	mapfile -t rxe_net_devs < <(rxe_cfg rxe-net)
99
100	if ((${#net_devs[@]} == 0)); then
101		return 1
102	fi
103
104	# Pick only these devices which were found during gather_supported_nvmf_pci_devs() run
105	for net_dev in "${net_devs[@]}"; do
106		for rxe_net_dev in "${rxe_net_devs[@]}"; do
107			if [[ $net_dev == "$rxe_net_dev" ]]; then
108				echo "$net_dev"
109				continue 2
110			fi
111		done
112	done
113}
114
115function get_ip_address() {
116	interface=$1
117	ip -o -4 addr show $interface | awk '{print $4}' | cut -d"/" -f1
118}
119
120function nvmfcleanup() {
121	sync
122
123	if [ "$TEST_TRANSPORT" == "tcp" ] || [ "$TEST_TRANSPORT" == "rdma" ]; then
124		set +e
125		for i in {1..20}; do
126			modprobe -v -r nvme-$TEST_TRANSPORT
127			if modprobe -v -r nvme-fabrics; then
128				set -e
129				return 0
130			fi
131			sleep 1
132		done
133		set -e
134
135		# So far unable to remove the kernel modules. Try
136		# one more time and let it fail.
137		# Allow the transport module to fail for now. See Jim's comment
138		# about the nvme-tcp module below.
139		modprobe -v -r nvme-$TEST_TRANSPORT || true
140		modprobe -v -r nvme-fabrics
141	fi
142}
143
144function nvmf_veth_init() {
145	NVMF_FIRST_INITIATOR_IP=10.0.0.1
146	NVMF_SECOND_INITIATOR_IP=10.0.0.2
147	NVMF_FIRST_TARGET_IP=10.0.0.3
148	NVMF_SECOND_TARGET_IP=10.0.0.4
149	NVMF_INITIATOR_IP=$NVMF_FIRST_INITIATOR_IP
150	NVMF_BRIDGE="nvmf_br"
151	NVMF_INITIATOR_INTERFACE="nvmf_init_if"
152	NVMF_INITIATOR_INTERFACE2="nvmf_init_if2"
153	NVMF_INITIATOR_BRIDGE="nvmf_init_br"
154	NVMF_INITIATOR_BRIDGE2="nvmf_init_br2"
155	NVMF_TARGET_NAMESPACE="nvmf_tgt_ns_spdk"
156	NVMF_TARGET_NS_CMD=(ip netns exec "$NVMF_TARGET_NAMESPACE")
157	NVMF_TARGET_INTERFACE="nvmf_tgt_if"
158	NVMF_TARGET_INTERFACE2="nvmf_tgt_if2"
159	NVMF_TARGET_BRIDGE="nvmf_tgt_br"
160	NVMF_TARGET_BRIDGE2="nvmf_tgt_br2"
161
162	ip link set $NVMF_INITIATOR_BRIDGE nomaster || true
163	ip link set $NVMF_INITIATOR_BRIDGE2 nomaster || true
164	ip link set $NVMF_TARGET_BRIDGE nomaster || true
165	ip link set $NVMF_TARGET_BRIDGE2 nomaster || true
166	ip link set $NVMF_INITIATOR_BRIDGE down || true
167	ip link set $NVMF_INITIATOR_BRIDGE2 down || true
168	ip link set $NVMF_TARGET_BRIDGE down || true
169	ip link set $NVMF_TARGET_BRIDGE2 down || true
170	ip link delete $NVMF_BRIDGE type bridge || true
171	ip link delete $NVMF_INITIATOR_INTERFACE || true
172	ip link delete $NVMF_INITIATOR_INTERFACE2 || true
173	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE || true
174	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE2 || true
175
176	# Create network namespace
177	ip netns add $NVMF_TARGET_NAMESPACE
178
179	# Create veth (Virtual ethernet) interface pairs
180	ip link add $NVMF_INITIATOR_INTERFACE type veth peer name $NVMF_INITIATOR_BRIDGE
181	ip link add $NVMF_INITIATOR_INTERFACE2 type veth peer name $NVMF_INITIATOR_BRIDGE2
182	ip link add $NVMF_TARGET_INTERFACE type veth peer name $NVMF_TARGET_BRIDGE
183	ip link add $NVMF_TARGET_INTERFACE2 type veth peer name $NVMF_TARGET_BRIDGE2
184
185	# Associate veth interface pairs with network namespace
186	ip link set $NVMF_TARGET_INTERFACE netns $NVMF_TARGET_NAMESPACE
187	ip link set $NVMF_TARGET_INTERFACE2 netns $NVMF_TARGET_NAMESPACE
188
189	# Allocate IP addresses
190	ip addr add $NVMF_FIRST_INITIATOR_IP/24 dev $NVMF_INITIATOR_INTERFACE
191	ip addr add $NVMF_SECOND_INITIATOR_IP/24 dev $NVMF_INITIATOR_INTERFACE2
192	"${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_FIRST_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE
193	"${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_SECOND_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE2
194
195	# Link up veth interfaces
196	ip link set $NVMF_INITIATOR_INTERFACE up
197	ip link set $NVMF_INITIATOR_INTERFACE2 up
198	ip link set $NVMF_INITIATOR_BRIDGE up
199	ip link set $NVMF_INITIATOR_BRIDGE2 up
200	ip link set $NVMF_TARGET_BRIDGE up
201	ip link set $NVMF_TARGET_BRIDGE2 up
202	"${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE up
203	"${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE2 up
204	"${NVMF_TARGET_NS_CMD[@]}" ip link set lo up
205
206	# Create a bridge
207	ip link add $NVMF_BRIDGE type bridge
208	ip link set $NVMF_BRIDGE up
209
210	# Add veth interfaces to the bridge
211	ip link set $NVMF_INITIATOR_BRIDGE master $NVMF_BRIDGE
212	ip link set $NVMF_INITIATOR_BRIDGE2 master $NVMF_BRIDGE
213	ip link set $NVMF_TARGET_BRIDGE master $NVMF_BRIDGE
214	ip link set $NVMF_TARGET_BRIDGE2 master $NVMF_BRIDGE
215
216	# Accept connections from veth interface
217	ipts -I INPUT 1 -i $NVMF_INITIATOR_INTERFACE -p tcp --dport $NVMF_PORT -j ACCEPT
218	ipts -I INPUT 1 -i $NVMF_INITIATOR_INTERFACE2 -p tcp --dport $NVMF_PORT -j ACCEPT
219	ipts -A FORWARD -i $NVMF_BRIDGE -o $NVMF_BRIDGE -j ACCEPT
220
221	# Verify connectivity
222	ping -c 1 $NVMF_FIRST_TARGET_IP
223	ping -c 1 $NVMF_SECOND_TARGET_IP
224	"${NVMF_TARGET_NS_CMD[@]}" ping -c 1 $NVMF_FIRST_INITIATOR_IP
225	"${NVMF_TARGET_NS_CMD[@]}" ping -c 1 $NVMF_SECOND_INITIATOR_IP
226
227	NVMF_APP=("${NVMF_TARGET_NS_CMD[@]}" "${NVMF_APP[@]}")
228}
229
230function nvmf_veth_fini() {
231	# Cleanup bridge, veth interfaces, and network namespace
232	# Note: removing one veth removes the pair
233	ip link set $NVMF_INITIATOR_BRIDGE nomaster
234	ip link set $NVMF_INITIATOR_BRIDGE2 nomaster
235	ip link set $NVMF_TARGET_BRIDGE nomaster
236	ip link set $NVMF_TARGET_BRIDGE2 nomaster
237	ip link set $NVMF_INITIATOR_BRIDGE down
238	ip link set $NVMF_INITIATOR_BRIDGE2 down
239	ip link set $NVMF_TARGET_BRIDGE down
240	ip link set $NVMF_TARGET_BRIDGE2 down
241	ip link delete $NVMF_BRIDGE type bridge
242	ip link delete $NVMF_INITIATOR_INTERFACE
243	ip link delete $NVMF_INITIATOR_INTERFACE2
244	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE
245	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE2
246	remove_spdk_ns
247}
248
249function nvmf_tcp_init() {
250	NVMF_FIRST_INITIATOR_IP=10.0.0.1
251	NVMF_FIRST_TARGET_IP=10.0.0.2
252	NVMF_INITIATOR_IP=$NVMF_FIRST_INITIATOR_IP
253	TCP_INTERFACE_LIST=("${net_devs[@]}")
254
255	# We need two net devs at minimum
256	((${#TCP_INTERFACE_LIST[@]} > 1))
257
258	NVMF_TARGET_INTERFACE=${TCP_INTERFACE_LIST[0]}
259	NVMF_INITIATOR_INTERFACE=${TCP_INTERFACE_LIST[1]}
260
261	# Skip case nvmf_multipath in nvmf_tcp_init(), it will be covered by nvmf_veth_init().
262	NVMF_SECOND_TARGET_IP=""
263	NVMF_SECOND_INITIATOR_IP=""
264
265	NVMF_TARGET_NAMESPACE="${NVMF_TARGET_INTERFACE}_ns_spdk"
266	NVMF_TARGET_NS_CMD=(ip netns exec "$NVMF_TARGET_NAMESPACE")
267	ip -4 addr flush $NVMF_TARGET_INTERFACE || true
268	ip -4 addr flush $NVMF_INITIATOR_INTERFACE || true
269
270	# Create network namespace
271	ip netns add $NVMF_TARGET_NAMESPACE
272
273	# Associate phy interface pairs with network namespace
274	ip link set $NVMF_TARGET_INTERFACE netns $NVMF_TARGET_NAMESPACE
275
276	# Allocate IP addresses
277	ip addr add $NVMF_INITIATOR_IP/24 dev $NVMF_INITIATOR_INTERFACE
278	"${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_FIRST_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE
279
280	# Link up phy interfaces
281	ip link set $NVMF_INITIATOR_INTERFACE up
282
283	"${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE up
284	"${NVMF_TARGET_NS_CMD[@]}" ip link set lo up
285
286	# Accept connections from phy interface
287	ipts -I INPUT 1 -i $NVMF_INITIATOR_INTERFACE -p tcp --dport $NVMF_PORT -j ACCEPT
288
289	# Verify connectivity
290	ping -c 1 $NVMF_FIRST_TARGET_IP
291	"${NVMF_TARGET_NS_CMD[@]}" ping -c 1 $NVMF_INITIATOR_IP
292
293	NVMF_APP=("${NVMF_TARGET_NS_CMD[@]}" "${NVMF_APP[@]}")
294}
295
296function nvmf_tcp_fini() {
297	iptr
298	if [[ "$NVMF_TARGET_NAMESPACE" == "nvmf_tgt_ns_spdk" ]]; then
299		nvmf_veth_fini
300		return 0
301	fi
302	remove_spdk_ns
303	ip -4 addr flush $NVMF_INITIATOR_INTERFACE || :
304}
305
306function gather_supported_nvmf_pci_devs() {
307	# Go through the entire pci bus and gather all ethernet controllers we support for the nvmf tests.
308	# Focus on the hardware that's currently being tested by the CI.
309	xtrace_disable
310	cache_pci_bus_sysfs
311	xtrace_restore
312
313	local intel=0x8086 mellanox=0x15b3 pci net_dev
314
315	local -a pci_devs=()
316	local -a pci_net_devs=()
317	local -A pci_drivers=()
318
319	local -ga net_devs=()
320	local -ga e810=()
321	local -ga x722=()
322	local -ga mlx=()
323
324	# E810-XXV
325	e810+=(${pci_bus_cache["$intel:0x1592"]})
326	e810+=(${pci_bus_cache["$intel:0x159b"]})
327	# X722 10G
328	x722+=(${pci_bus_cache["$intel:0x37d2"]})
329	# BlueField 3
330	mlx+=(${pci_bus_cache["$mellanox:0xa2dc"]})
331	# ConnectX-7
332	mlx+=(${pci_bus_cache["$mellanox:0x1021"]})
333	# BlueField 2
334	mlx+=(${pci_bus_cache["$mellanox:0xa2d6"]})
335	# ConnectX-6 Dx
336	mlx+=(${pci_bus_cache["$mellanox:0x101d"]})
337	# ConnectX-6
338	mlx+=(${pci_bus_cache["$mellanox:0x101b"]})
339	# ConnectX-5
340	mlx+=(${pci_bus_cache["$mellanox:0x1017"]})
341	mlx+=(${pci_bus_cache["$mellanox:0x1019"]})
342	# ConnectX-4
343	mlx+=(${pci_bus_cache["$mellanox:0x1015"]})
344	mlx+=(${pci_bus_cache["$mellanox:0x1013"]})
345
346	pci_devs+=("${e810[@]}")
347	if [[ $TEST_TRANSPORT == rdma ]]; then
348		pci_devs+=("${x722[@]}")
349		pci_devs+=("${mlx[@]}")
350	fi
351
352	# Try to respect what CI wants to test and override pci_devs[]
353	if [[ $SPDK_TEST_NVMF_NICS == mlx5 ]]; then
354		pci_devs=("${mlx[@]}")
355	elif [[ $SPDK_TEST_NVMF_NICS == e810 ]]; then
356		pci_devs=("${e810[@]}")
357	elif [[ $SPDK_TEST_NVMF_NICS == x722 ]]; then
358		pci_devs=("${x722[@]}")
359	fi
360
361	if ((${#pci_devs[@]} == 0)); then
362		return 1
363	fi
364
365	# Load proper kernel modules if necessary
366	for pci in "${pci_devs[@]}"; do
367		echo "Found $pci (${pci_ids_vendor["$pci"]} - ${pci_ids_device["$pci"]})"
368		if [[ ${pci_mod_resolved["$pci"]} == unknown ]]; then
369			echo "Unresolved modalias for $pci (${pci_mod_driver["$pci"]}). Driver not installed|builtin?"
370			continue
371		fi
372		if [[ ${pci_bus_driver["$pci"]} == unbound ]]; then
373			echo "$pci not bound, needs ${pci_mod_resolved["$pci"]}"
374			pci_drivers["${pci_mod_resolved["$pci"]}"]=1
375		fi
376		if [[ ${pci_ids_device["$pci"]} == "0x1017" ]] \
377			|| [[ ${pci_ids_device["$pci"]} == "0x1019" ]] \
378			|| [[ $TEST_TRANSPORT == rdma ]]; then
379			# Reduce maximum number of queues when connecting with
380			# ConnectX-5 NICs. When using host systems with nproc > 64
381			# connecting with default options (where default equals to
382			# number of host online CPUs) creating all IO queues
383			# takes too much time and results in keep-alive timeout.
384			# See:
385			# https://github.com/spdk/spdk/issues/2772
386			# 0x1017 - MT27800 Family ConnectX-5
387			# 0x1019 - MT28800 Family ConnectX-5 Ex
388			NVME_CONNECT="nvme connect -i 15"
389		fi
390	done
391
392	if ((${#pci_drivers[@]} > 0)); then
393		echo "Loading kernel modules: ${!pci_drivers[*]}"
394		modprobe -a "${!pci_drivers[@]}"
395	fi
396
397	# E810 cards also need irdma driver to be around.
398	if [[ $SPDK_TEST_NVMF_NICS == e810 && $TEST_TRANSPORT == rdma ]]; then
399		if [[ -e /sys/module/irdma/parameters/roce_ena ]]; then
400			# Our tests don't play well with iWARP protocol. Make sure we use RoCEv2 instead.
401			(($(< /sys/module/irdma/parameters/roce_ena) != 1)) && modprobe -r irdma
402		fi
403		modinfo irdma && modprobe irdma roce_ena=1
404	fi > /dev/null
405
406	# All devices detected, kernel modules loaded. Now look under net class to see if there
407	# are any net devices bound to the controllers.
408	for pci in "${pci_devs[@]}"; do
409		pci_net_devs=("/sys/bus/pci/devices/$pci/net/"*)
410
411		# Check if available devices are in proper operational state. If not, remove them from the main list.
412		# This check is valid for TCP only since for RDMA we use infiniband which don't rely on actual UP
413		# state of the device.
414		if [[ $TEST_TRANSPORT == tcp ]]; then
415			for net_dev in "${!pci_net_devs[@]}"; do
416				[[ $(< "${pci_net_devs[net_dev]}/operstate") == up ]] || unset -v "pci_net_devs[net_dev]"
417			done
418		fi
419
420		if ((${#pci_net_devs[@]} == 0)); then
421			echo "No operational net devices associated with $pci"
422			continue
423		fi
424
425		pci_net_devs=("${pci_net_devs[@]##*/}")
426		echo "Found net devices under $pci: ${pci_net_devs[*]}"
427		net_devs+=("${pci_net_devs[@]}")
428	done
429
430	if ((${#net_devs[@]} == 0)); then
431		return 1
432	fi
433}
434
435prepare_net_devs() {
436	local -g is_hw=no
437
438	remove_spdk_ns
439
440	[[ $NET_TYPE != virt ]] && gather_supported_nvmf_pci_devs && is_hw=yes
441
442	if [[ $is_hw == yes ]]; then
443		if [[ $TEST_TRANSPORT == tcp ]]; then
444			nvmf_tcp_init
445		elif [[ $TEST_TRANSPORT == rdma ]]; then
446			rdma_device_init
447		fi
448		return 0
449	elif [[ $NET_TYPE == phy ]]; then
450		echo "ERROR: No supported devices were found, cannot run the $TEST_TRANSPORT test"
451		return 1
452	elif [[ $NET_TYPE == phy-fallback ]]; then
453		echo "WARNING: No supported devices were found, fallback requested for $TEST_TRANSPORT test"
454	fi
455
456	# NET_TYPE == virt or phy-fallback
457	if [[ $TEST_TRANSPORT == tcp ]]; then
458		nvmf_veth_init
459		return 0
460	fi
461
462	echo "ERROR: virt and fallback setup is not supported for $TEST_TRANSPORT"
463	return 1
464}
465
466function nvmftestinit() {
467	if [ -z $TEST_TRANSPORT ]; then
468		echo "transport not specified - use --transport= to specify"
469		return 1
470	fi
471
472	trap 'nvmftestfini' SIGINT SIGTERM EXIT
473
474	prepare_net_devs
475
476	if [ "$TEST_MODE" == "iso" ]; then
477		$rootdir/scripts/setup.sh
478	fi
479
480	NVMF_TRANSPORT_OPTS="-t $TEST_TRANSPORT"
481	if [[ "$TEST_TRANSPORT" == "rdma" ]]; then
482		RDMA_IP_LIST=$(get_available_rdma_ips)
483		NVMF_FIRST_TARGET_IP=$(echo "$RDMA_IP_LIST" | head -n 1)
484		NVMF_SECOND_TARGET_IP=$(echo "$RDMA_IP_LIST" | tail -n +2 | head -n 1)
485		if [ -z $NVMF_FIRST_TARGET_IP ]; then
486			echo "no RDMA NIC for nvmf test"
487			exit 1
488		fi
489		NVMF_TRANSPORT_OPTS="$NVMF_TRANSPORT_OPTS --num-shared-buffers 1024"
490	elif [[ "$TEST_TRANSPORT" == "tcp" ]]; then
491		NVMF_TRANSPORT_OPTS="$NVMF_TRANSPORT_OPTS -o"
492	fi
493
494	if [ "$TEST_TRANSPORT" == "tcp" ] || [ "$TEST_TRANSPORT" == "rdma" ]; then
495		# currently we run the host/perf test for TCP even on systems without kernel nvme-tcp
496		#  support; that's fine since the host/perf test uses the SPDK initiator
497		# maybe later we will enforce modprobe to succeed once we have systems in the test pool
498		#  with nvme-tcp kernel support - but until then let this pass so we can still run the
499		#  host/perf test with the tcp transport
500		modprobe nvme-$TEST_TRANSPORT || true
501	fi
502}
503
504function nvmfappstart() {
505	timing_enter start_nvmf_tgt
506	"${NVMF_APP[@]}" "$@" &
507	nvmfpid=$!
508	waitforlisten $nvmfpid
509	timing_exit start_nvmf_tgt
510	trap 'process_shm --id $NVMF_APP_SHM_ID || :; nvmftestfini' SIGINT SIGTERM EXIT
511}
512
513function nvmftestfini() {
514	nvmfcleanup || :
515	if [ -n "$nvmfpid" ]; then
516		killprocess $nvmfpid
517	fi
518	if [ "$TEST_MODE" == "iso" ]; then
519		$rootdir/scripts/setup.sh reset
520	fi
521	if [[ "$TEST_TRANSPORT" == "tcp" ]]; then
522		nvmf_tcp_fini
523	fi
524}
525
526function rdma_device_init() {
527	load_ib_rdma_modules
528	allocate_nic_ips
529}
530
531function nvme_connect() {
532	local init_count
533	init_count=$(nvme list | wc -l)
534
535	if ! nvme connect "$@"; then return $?; fi
536
537	for i in $(seq 1 10); do
538		if [ $(nvme list | wc -l) -gt $init_count ]; then
539			return 0
540		else
541			sleep 1s
542		fi
543	done
544	return 1
545}
546
547function get_nvme_devs() {
548	local dev _
549
550	while read -r dev _; do
551		if [[ $dev == /dev/nvme* ]]; then
552			echo "$dev"
553		fi
554	done < <(nvme list)
555}
556
557function gen_nvmf_target_json() {
558	local subsystem config=()
559
560	for subsystem in "${@:-1}"; do
561		config+=(
562			"$(
563				cat <<- EOF
564					{
565					  "params": {
566					    "name": "Nvme$subsystem",
567					    "trtype": "$TEST_TRANSPORT",
568					    "traddr": "$NVMF_FIRST_TARGET_IP",
569					    "adrfam": "ipv4",
570					    "trsvcid": "$NVMF_PORT",
571					    "subnqn": "nqn.2016-06.io.spdk:cnode$subsystem",
572					    "hostnqn": "nqn.2016-06.io.spdk:host$subsystem",
573					    "hdgst": ${hdgst:-false},
574					    "ddgst": ${ddgst:-false}
575					  },
576					  "method": "bdev_nvme_attach_controller"
577					}
578				EOF
579			)"
580		)
581	done
582	jq . <<- JSON
583		{
584		  "subsystems": [
585		    {
586		      "subsystem": "bdev",
587		      "config": [
588			{
589			  "method": "bdev_nvme_set_options",
590			  "params": {
591				"action_on_timeout": "none",
592				"timeout_us": 0,
593				"transport_retry_count": 4,
594				"arbitration_burst": 0,
595				"low_priority_weight": 0,
596				"medium_priority_weight": 0,
597				"high_priority_weight": 0,
598				"nvme_adminq_poll_period_us": 10000,
599				"keep_alive_timeout_ms" : 10000,
600				"nvme_ioq_poll_period_us": 0,
601				"io_queue_requests": 0,
602				"delay_cmd_submit": true
603			  }
604			},
605		        $(
606			IFS=","
607			printf '%s\n' "${config[*]}"
608		),
609			{
610			  "method": "bdev_wait_for_examine"
611			}
612		      ]
613		    }
614		  ]
615		}
616	JSON
617}
618
619function _remove_spdk_ns() {
620	local ns {ns,mn,an}_net_devs
621	while read -r ns _; do
622		[[ $ns == *_spdk ]] || continue
623		# Gather all devs from the target $ns namespace. We want to differentiate
624		# between veth and physical links and gather just the latter. To do so,
625		# we simply compare ifindex to iflink - as per kernel docs, these should
626		# be always equal for the physical links. For veth devices, since they are
627		# paired, iflink should point at an actual bridge, hence being different
628		# from its own ifindex.
629		ns_net_devs=($(
630			ip netns exec "$ns" bash <<- 'IN_NS'
631				shopt -s extglob nullglob
632				for dev in /sys/class/net/!(lo|bond*); do
633					(($(< "$dev/ifindex") == $(< "$dev/iflink"))) || continue
634					echo "${dev##*/}"
635				done
636			IN_NS
637		))
638		# Gather all the net devs from the main ns
639		mn_net_devs=($(basename -a /sys/class/net/!(lo|bond*)))
640		# Merge these two to have a list for comparison
641		an_net_devs=($(printf '%s\n' "${ns_net_devs[@]}" "${mn_net_devs[@]}" | sort))
642
643		ip netns delete "$ns"
644
645		# Check if our list matches against the main ns after $ns got deleted
646		while [[ ${an_net_devs[*]} != "${mn_net_devs[*]}" ]]; do
647			mn_net_devs=($(basename -a /sys/class/net/!(lo|bond*)))
648			sleep 1s
649		done
650	done < <(ip netns list)
651}
652
653remove_spdk_ns() {
654	xtrace_disable_per_cmd _remove_spdk_ns
655}
656
657configure_kernel_target() {
658	local kernel_name=$1 kernel_target_ip=$2
659	# Keep it global in scope for easier cleanup
660	nvmet=/sys/kernel/config/nvmet
661	kernel_subsystem=$nvmet/subsystems/$kernel_name
662	kernel_namespace=$kernel_subsystem/namespaces/1
663	kernel_port=$nvmet/ports/1
664
665	local block nvme
666
667	if [[ ! -e /sys/module/nvmet ]]; then
668		modprobe nvmet
669	fi
670
671	[[ -e $nvmet ]]
672
673	"$rootdir/scripts/setup.sh" reset
674
675	# Find nvme with an active ns device
676	for block in /sys/block/nvme*; do
677		[[ -e $block ]] || continue
678		is_block_zoned "${block##*/}" && continue
679		block_in_use "${block##*/}" || nvme="/dev/${block##*/}"
680	done
681
682	[[ -b $nvme ]]
683
684	mkdir "$kernel_subsystem"
685	mkdir "$kernel_namespace"
686	mkdir "$kernel_port"
687
688	# It allows only %llx value and for some reason kernel swaps the byte order
689	# so setting the serial is not very useful here
690	# "$kernel_subsystem/attr_serial"
691	echo "SPDK-$kernel_name" > "$kernel_subsystem/attr_model"
692
693	echo 1 > "$kernel_subsystem/attr_allow_any_host"
694	echo "$nvme" > "$kernel_namespace/device_path"
695	echo 1 > "$kernel_namespace/enable"
696
697	echo "$kernel_target_ip" > "$kernel_port/addr_traddr"
698	echo "$TEST_TRANSPORT" > "$kernel_port/addr_trtype"
699	echo "$NVMF_PORT" > "$kernel_port/addr_trsvcid"
700	echo ipv4 > "$kernel_port/addr_adrfam"
701
702	# Enable the listener by linking the port to previously created subsystem
703	ln -s "$kernel_subsystem" "$kernel_port/subsystems/"
704
705	# Check if target is available
706	nvme discover "${NVME_HOST[@]}" -a "$kernel_target_ip" -t "$TEST_TRANSPORT" -s "$NVMF_PORT"
707}
708
709clean_kernel_target() {
710	[[ -e $kernel_subsystem ]] || return 0
711
712	echo 0 > "$kernel_namespace/enable"
713
714	rm -f "$kernel_port/subsystems/${kernel_subsystem##*/}"
715	rmdir "$kernel_namespace"
716	rmdir "$kernel_port"
717	rmdir "$kernel_subsystem"
718
719	modules=(/sys/module/nvmet/holders/*)
720
721	modprobe -r "${modules[@]##*/}" nvmet
722
723	# Get back all nvmes to userspace
724	"$rootdir/scripts/setup.sh"
725}
726
727format_key() {
728	local prefix key digest
729
730	prefix="$1" key="$2" digest="$3"
731	python - <<- EOF
732		import base64, zlib
733
734		crc = zlib.crc32(b"$key").to_bytes(4, byteorder="little")
735		b64 = base64.b64encode(b"$key" + crc).decode("utf-8")
736		print("$prefix:{:02x}:{}:".format($digest, b64), end="")
737	EOF
738}
739
740format_interchange_psk() {
741	format_key "NVMeTLSkey-1" "$1" "$2"
742}
743
744format_dhchap_key() {
745	format_key "DHHC-1" "$1" "$2"
746}
747
748gen_dhchap_key() {
749	local digest len file key
750	local -A digests=([null]=0 [sha256]=1 [sha384]=2 [sha512]=3)
751
752	digest="$1" len=$2
753	key=$(xxd -p -c0 -l $((len / 2)) /dev/urandom)
754	file=$(mktemp -t "spdk.key-$1.XXX")
755	format_dhchap_key "$key" "${digests[$1]}" > "$file"
756	chmod 0600 "$file"
757
758	echo "$file"
759}
760
761get_main_ns_ip() {
762	# Determine which ip to use based on nvmftestinit() setup. For tcp we pick
763	# interface which resides in the main net namespace and which is visible
764	# to nvmet under tcp setup. $NVMF_FIRST_TARGET_IP is solely for rdma use.
765	# FIXME: This requires proper unification of the networking setup across
766	# different transports.
767	local ip
768	local -A ip_candidates=()
769
770	ip_candidates["rdma"]=NVMF_FIRST_TARGET_IP
771	ip_candidates["tcp"]=NVMF_INITIATOR_IP
772
773	[[ -z $TEST_TRANSPORT || -z ${ip_candidates["$TEST_TRANSPORT"]} ]] && return 1
774	ip=${ip_candidates["$TEST_TRANSPORT"]}
775
776	if [[ -z ${!ip} ]]; then
777		echo "$ip not set, call nvmftestinit() first" >&2
778		return 1
779	fi
780
781	echo "${!ip}"
782}
783
784uuid2nguid() {
785	tr -d - <<< "${1^^}"
786}
787
788ipts() { iptables "$@" -m comment --comment "SPDK_NVMF:$*"; }
789iptr() { iptables-save | grep -v SPDK_NVMF | iptables-restore; }
790