xref: /spdk/test/nvmf/common.sh (revision 16b33d51e806dbc8365202ba80673b7afb64666a)
1#  SPDX-License-Identifier: BSD-3-Clause
2#  Copyright (C) 2016 Intel Corporation
3#  Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
4#  All rights reserved.
5#
6
7[[ $(uname -s) == FreeBSD ]] && return 0
8
9NVMF_PORT=4420
10NVMF_SECOND_PORT=4421
11NVMF_THIRD_PORT=4422
12NVMF_IP_PREFIX="192.168.100"
13NVMF_IP_LEAST_ADDR=8
14NVMF_TCP_IP_ADDRESS="127.0.0.1"
15NVMF_TRANSPORT_OPTS=""
16NVMF_SERIAL=SPDKISFASTANDAWESOME
17NVME_HOSTNQN=$(nvme gen-hostnqn)
18NVME_HOSTID=${NVME_HOSTNQN##*:}
19NVME_HOST=("--hostnqn=$NVME_HOSTNQN" "--hostid=$NVME_HOSTID")
20NVME_CONNECT="nvme connect"
21NET_TYPE=${NET_TYPE:-phy-fallback}
22NVME_SUBNQN=nqn.2016-06.io.spdk:testnqn
23
24function build_nvmf_app_args() {
25	if [ $SPDK_RUN_NON_ROOT -eq 1 ]; then
26		# We assume that test script is started from sudo
27		NVMF_APP=(sudo -E -u $SUDO_USER "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" "${NVMF_APP[@]}")
28	fi
29	NVMF_APP+=(-i "$NVMF_APP_SHM_ID" -e 0xFFFF)
30
31	NVMF_APP+=("${NO_HUGE[@]}")
32
33	if [ -n "$SPDK_HUGE_DIR" ]; then
34		NVMF_APP+=(--huge-dir "$SPDK_HUGE_DIR")
35	elif [ $SPDK_RUN_NON_ROOT -eq 1 ]; then
36		echo "In non-root test mode you have to set SPDK_HUGE_DIR variable." >&2
37		echo "For example:" >&2
38		echo "sudo mkdir /mnt/spdk_hugetlbfs" >&2
39		echo "sudo chown ${SUDO_USER}: /mnt/spdk_hugetlbfs" >&2
40		echo "export SPDK_HUGE_DIR=/mnt/spdk_hugetlbfs" >&2
41		return 1
42	fi
43}
44
45source "$rootdir/scripts/common.sh"
46
47: ${NVMF_APP_SHM_ID="0"}
48export NVMF_APP_SHM_ID
49build_nvmf_app_args
50
51have_pci_nics=0
52
53function rxe_cfg() {
54	"$rootdir/scripts/rxe_cfg_small.sh" "$@"
55}
56
57function load_ib_rdma_modules() {
58	if [ $(uname) != Linux ]; then
59		return 0
60	fi
61
62	modprobe ib_cm
63	modprobe ib_core
64	modprobe ib_umad
65	modprobe ib_uverbs
66	modprobe iw_cm
67	modprobe rdma_cm
68	modprobe rdma_ucm
69}
70
71function allocate_nic_ips() {
72	((count = NVMF_IP_LEAST_ADDR))
73	for nic_name in $(get_rdma_if_list); do
74		ip="$(get_ip_address $nic_name)"
75		if [[ -z $ip ]]; then
76			ip addr add $NVMF_IP_PREFIX.$count/24 dev $nic_name
77			ip link set $nic_name up
78			((count = count + 1))
79		fi
80		# dump configuration for debug log
81		ip addr show $nic_name
82	done
83}
84
85function get_available_rdma_ips() {
86	for nic_name in $(get_rdma_if_list); do
87		get_ip_address $nic_name
88	done
89}
90
91function get_rdma_if_list() {
92	local net_dev rxe_net_dev rxe_net_devs
93
94	mapfile -t rxe_net_devs < <(rxe_cfg rxe-net)
95
96	if ((${#net_devs[@]} == 0)); then
97		return 1
98	fi
99
100	# Pick only these devices which were found during gather_supported_nvmf_pci_devs() run
101	for net_dev in "${net_devs[@]}"; do
102		for rxe_net_dev in "${rxe_net_devs[@]}"; do
103			if [[ $net_dev == "$rxe_net_dev" ]]; then
104				echo "$net_dev"
105				continue 2
106			fi
107		done
108	done
109}
110
111function get_ip_address() {
112	interface=$1
113	ip -o -4 addr show $interface | awk '{print $4}' | cut -d"/" -f1
114}
115
116function nvmfcleanup() {
117	sync
118
119	if [ "$TEST_TRANSPORT" == "tcp" ] || [ "$TEST_TRANSPORT" == "rdma" ]; then
120		set +e
121		for i in {1..20}; do
122			modprobe -v -r nvme-$TEST_TRANSPORT
123			if modprobe -v -r nvme-fabrics; then
124				set -e
125				return 0
126			fi
127			sleep 1
128		done
129		set -e
130
131		# So far unable to remove the kernel modules. Try
132		# one more time and let it fail.
133		# Allow the transport module to fail for now. See Jim's comment
134		# about the nvme-tcp module below.
135		modprobe -v -r nvme-$TEST_TRANSPORT || true
136		modprobe -v -r nvme-fabrics
137	fi
138}
139
140function nvmf_veth_init() {
141	NVMF_INITIATOR_IP=10.0.0.1
142	NVMF_FIRST_TARGET_IP=10.0.0.2
143	NVMF_SECOND_TARGET_IP=10.0.0.3
144	NVMF_BRIDGE="nvmf_br"
145	NVMF_INITIATOR_INTERFACE="nvmf_init_if"
146	NVMF_INITIATOR_BRIDGE="nvmf_init_br"
147	NVMF_TARGET_NAMESPACE="nvmf_tgt_ns_spdk"
148	NVMF_TARGET_NS_CMD=(ip netns exec "$NVMF_TARGET_NAMESPACE")
149	NVMF_TARGET_INTERFACE="nvmf_tgt_if"
150	NVMF_TARGET_INTERFACE2="nvmf_tgt_if2"
151	NVMF_TARGET_BRIDGE="nvmf_tgt_br"
152	NVMF_TARGET_BRIDGE2="nvmf_tgt_br2"
153
154	ip link set $NVMF_INITIATOR_BRIDGE nomaster || true
155	ip link set $NVMF_TARGET_BRIDGE nomaster || true
156	ip link set $NVMF_TARGET_BRIDGE2 nomaster || true
157	ip link set $NVMF_INITIATOR_BRIDGE down || true
158	ip link set $NVMF_TARGET_BRIDGE down || true
159	ip link set $NVMF_TARGET_BRIDGE2 down || true
160	ip link delete $NVMF_BRIDGE type bridge || true
161	ip link delete $NVMF_INITIATOR_INTERFACE || true
162	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE || true
163	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE2 || true
164
165	# Create network namespace
166	ip netns add $NVMF_TARGET_NAMESPACE
167
168	# Create veth (Virtual ethernet) interface pairs
169	ip link add $NVMF_INITIATOR_INTERFACE type veth peer name $NVMF_INITIATOR_BRIDGE
170	ip link add $NVMF_TARGET_INTERFACE type veth peer name $NVMF_TARGET_BRIDGE
171	ip link add $NVMF_TARGET_INTERFACE2 type veth peer name $NVMF_TARGET_BRIDGE2
172
173	# Associate veth interface pairs with network namespace
174	ip link set $NVMF_TARGET_INTERFACE netns $NVMF_TARGET_NAMESPACE
175	ip link set $NVMF_TARGET_INTERFACE2 netns $NVMF_TARGET_NAMESPACE
176
177	# Allocate IP addresses
178	ip addr add $NVMF_INITIATOR_IP/24 dev $NVMF_INITIATOR_INTERFACE
179	"${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_FIRST_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE
180	"${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_SECOND_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE2
181
182	# Link up veth interfaces
183	ip link set $NVMF_INITIATOR_INTERFACE up
184	ip link set $NVMF_INITIATOR_BRIDGE up
185	ip link set $NVMF_TARGET_BRIDGE up
186	ip link set $NVMF_TARGET_BRIDGE2 up
187	"${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE up
188	"${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE2 up
189	"${NVMF_TARGET_NS_CMD[@]}" ip link set lo up
190
191	# Create a bridge
192	ip link add $NVMF_BRIDGE type bridge
193	ip link set $NVMF_BRIDGE up
194
195	# Add veth interfaces to the bridge
196	ip link set $NVMF_INITIATOR_BRIDGE master $NVMF_BRIDGE
197	ip link set $NVMF_TARGET_BRIDGE master $NVMF_BRIDGE
198	ip link set $NVMF_TARGET_BRIDGE2 master $NVMF_BRIDGE
199
200	# Accept connections from veth interface
201	iptables -I INPUT 1 -i $NVMF_INITIATOR_INTERFACE -p tcp --dport $NVMF_PORT -j ACCEPT
202	iptables -A FORWARD -i $NVMF_BRIDGE -o $NVMF_BRIDGE -j ACCEPT
203
204	# Verify connectivity
205	ping -c 1 $NVMF_FIRST_TARGET_IP
206	ping -c 1 $NVMF_SECOND_TARGET_IP
207	"${NVMF_TARGET_NS_CMD[@]}" ping -c 1 $NVMF_INITIATOR_IP
208
209	NVMF_APP=("${NVMF_TARGET_NS_CMD[@]}" "${NVMF_APP[@]}")
210}
211
212function nvmf_veth_fini() {
213	# Cleanup bridge, veth interfaces, and network namespace
214	# Note: removing one veth removes the pair
215	ip link set $NVMF_INITIATOR_BRIDGE nomaster
216	ip link set $NVMF_TARGET_BRIDGE nomaster
217	ip link set $NVMF_TARGET_BRIDGE2 nomaster
218	ip link set $NVMF_INITIATOR_BRIDGE down
219	ip link set $NVMF_TARGET_BRIDGE down
220	ip link set $NVMF_TARGET_BRIDGE2 down
221	ip link delete $NVMF_BRIDGE type bridge
222	ip link delete $NVMF_INITIATOR_INTERFACE
223	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE
224	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE2
225	remove_spdk_ns
226}
227
228function nvmf_tcp_init() {
229	NVMF_INITIATOR_IP=10.0.0.1
230	NVMF_FIRST_TARGET_IP=10.0.0.2
231	TCP_INTERFACE_LIST=("${net_devs[@]}")
232
233	# We need two net devs at minimum
234	((${#TCP_INTERFACE_LIST[@]} > 1))
235
236	NVMF_TARGET_INTERFACE=${TCP_INTERFACE_LIST[0]}
237	NVMF_INITIATOR_INTERFACE=${TCP_INTERFACE_LIST[1]}
238
239	# Skip case nvmf_multipath in nvmf_tcp_init(), it will be covered by nvmf_veth_init().
240	NVMF_SECOND_TARGET_IP=""
241
242	NVMF_TARGET_NAMESPACE="${NVMF_TARGET_INTERFACE}_ns_spdk"
243	NVMF_TARGET_NS_CMD=(ip netns exec "$NVMF_TARGET_NAMESPACE")
244	ip -4 addr flush $NVMF_TARGET_INTERFACE || true
245	ip -4 addr flush $NVMF_INITIATOR_INTERFACE || true
246
247	# Create network namespace
248	ip netns add $NVMF_TARGET_NAMESPACE
249
250	# Associate phy interface pairs with network namespace
251	ip link set $NVMF_TARGET_INTERFACE netns $NVMF_TARGET_NAMESPACE
252
253	# Allocate IP addresses
254	ip addr add $NVMF_INITIATOR_IP/24 dev $NVMF_INITIATOR_INTERFACE
255	"${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_FIRST_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE
256
257	# Link up phy interfaces
258	ip link set $NVMF_INITIATOR_INTERFACE up
259
260	"${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE up
261	"${NVMF_TARGET_NS_CMD[@]}" ip link set lo up
262
263	# Accept connections from phy interface
264	iptables -I INPUT 1 -i $NVMF_INITIATOR_INTERFACE -p tcp --dport $NVMF_PORT -j ACCEPT
265
266	# Verify connectivity
267	ping -c 1 $NVMF_FIRST_TARGET_IP
268	"${NVMF_TARGET_NS_CMD[@]}" ping -c 1 $NVMF_INITIATOR_IP
269
270	NVMF_APP=("${NVMF_TARGET_NS_CMD[@]}" "${NVMF_APP[@]}")
271}
272
273function nvmf_tcp_fini() {
274	if [[ "$NVMF_TARGET_NAMESPACE" == "nvmf_tgt_ns" ]]; then
275		nvmf_veth_fini
276		return 0
277	fi
278	remove_spdk_ns
279	ip -4 addr flush $NVMF_INITIATOR_INTERFACE || :
280}
281
282function gather_supported_nvmf_pci_devs() {
283	# Go through the entire pci bus and gather all ethernet controllers we support for the nvmf tests.
284	# Focus on the hardware that's currently being tested by the CI.
285	xtrace_disable
286	cache_pci_bus_sysfs
287	xtrace_restore
288
289	local intel=0x8086 mellanox=0x15b3 pci net_dev
290
291	local -a pci_devs=()
292	local -a pci_net_devs=()
293	local -A pci_drivers=()
294
295	local -ga net_devs=()
296	local -ga e810=()
297	local -ga x722=()
298	local -ga mlx=()
299
300	# E810-XXV
301	e810+=(${pci_bus_cache["$intel:0x1592"]})
302	e810+=(${pci_bus_cache["$intel:0x159b"]})
303	# X722 10G
304	x722+=(${pci_bus_cache["$intel:0x37d2"]})
305	# BlueField 3
306	mlx+=(${pci_bus_cache["$mellanox:0xa2dc"]})
307	# ConnectX-7
308	mlx+=(${pci_bus_cache["$mellanox:0x1021"]})
309	# BlueField 2
310	mlx+=(${pci_bus_cache["$mellanox:0xa2d6"]})
311	# ConnectX-6 Dx
312	mlx+=(${pci_bus_cache["$mellanox:0x101d"]})
313	# ConnectX-5
314	mlx+=(${pci_bus_cache["$mellanox:0x1017"]})
315	mlx+=(${pci_bus_cache["$mellanox:0x1019"]})
316	# ConnectX-4
317	mlx+=(${pci_bus_cache["$mellanox:0x1015"]})
318	mlx+=(${pci_bus_cache["$mellanox:0x1013"]})
319
320	pci_devs+=("${e810[@]}")
321	if [[ $TEST_TRANSPORT == rdma ]]; then
322		pci_devs+=("${x722[@]}")
323		pci_devs+=("${mlx[@]}")
324	fi
325
326	# Try to respect what CI wants to test and override pci_devs[]
327	if [[ $SPDK_TEST_NVMF_NICS == mlx5 ]]; then
328		pci_devs=("${mlx[@]}")
329	elif [[ $SPDK_TEST_NVMF_NICS == e810 ]]; then
330		pci_devs=("${e810[@]}")
331	elif [[ $SPDK_TEST_NVMF_NICS == x722 ]]; then
332		pci_devs=("${x722[@]}")
333	fi
334
335	if ((${#pci_devs[@]} == 0)); then
336		return 1
337	fi
338
339	# Load proper kernel modules if necessary
340	for pci in "${pci_devs[@]}"; do
341		echo "Found $pci (${pci_ids_vendor["$pci"]} - ${pci_ids_device["$pci"]})"
342		if [[ ${pci_mod_resolved["$pci"]} == unknown ]]; then
343			echo "Unresolved modalias for $pci (${pci_mod_driver["$pci"]}). Driver not installed|builtin?"
344			continue
345		fi
346		if [[ ${pci_bus_driver["$pci"]} == unbound ]]; then
347			echo "$pci not bound, needs ${pci_mod_resolved["$pci"]}"
348			pci_drivers["${pci_mod_resolved["$pci"]}"]=1
349		fi
350		if [[ ${pci_ids_device["$pci"]} == "0x1017" ]] \
351			|| [[ ${pci_ids_device["$pci"]} == "0x1019" ]] \
352			|| [[ $TEST_TRANSPORT == rdma ]]; then
353			# Reduce maximum number of queues when connecting with
354			# ConnectX-5 NICs. When using host systems with nproc > 64
355			# connecting with default options (where default equals to
356			# number of host online CPUs) creating all IO queues
357			# takes too much time and results in keep-alive timeout.
358			# See:
359			# https://github.com/spdk/spdk/issues/2772
360			# 0x1017 - MT27800 Family ConnectX-5
361			# 0x1019 - MT28800 Family ConnectX-5 Ex
362			NVME_CONNECT="nvme connect -i 15"
363		fi
364	done
365
366	if ((${#pci_drivers[@]} > 0)); then
367		echo "Loading kernel modules: ${!pci_drivers[*]}"
368		modprobe -a "${!pci_drivers[@]}"
369	fi
370
371	# E810 cards also need irdma driver to be around.
372	if [[ $SPDK_TEST_NVMF_NICS == e810 && $TEST_TRANSPORT == rdma ]]; then
373		if [[ -e /sys/module/irdma/parameters/roce_ena ]]; then
374			# Our tests don't play well with iWARP protocol. Make sure we use RoCEv2 instead.
375			(($(< /sys/module/irdma/parameters/roce_ena) != 1)) && modprobe -r irdma
376		fi
377		modinfo irdma && modprobe irdma roce_ena=1
378	fi > /dev/null
379
380	# All devices detected, kernel modules loaded. Now look under net class to see if there
381	# are any net devices bound to the controllers.
382	for pci in "${pci_devs[@]}"; do
383		pci_net_devs=("/sys/bus/pci/devices/$pci/net/"*)
384
385		# Check if available devices are in proper operational state. If not, remove them from the main list.
386		# This check is valid for TCP only since for RDMA we use infiniband which don't rely on actual UP
387		# state of the device.
388		if [[ $TEST_TRANSPORT == tcp ]]; then
389			for net_dev in "${!pci_net_devs[@]}"; do
390				[[ $(< "${pci_net_devs[net_dev]}/operstate") == up ]] || unset -v "pci_net_devs[net_dev]"
391			done
392		fi
393
394		if ((${#pci_net_devs[@]} == 0)); then
395			echo "No operational net devices associated with $pci"
396			continue
397		fi
398
399		pci_net_devs=("${pci_net_devs[@]##*/}")
400		echo "Found net devices under $pci: ${pci_net_devs[*]}"
401		net_devs+=("${pci_net_devs[@]}")
402	done
403
404	if ((${#net_devs[@]} == 0)); then
405		return 1
406	fi
407}
408
409prepare_net_devs() {
410	local -g is_hw=no
411
412	remove_spdk_ns
413
414	[[ $NET_TYPE != virt ]] && gather_supported_nvmf_pci_devs && is_hw=yes
415
416	if [[ $is_hw == yes ]]; then
417		if [[ $TEST_TRANSPORT == tcp ]]; then
418			nvmf_tcp_init
419		elif [[ $TEST_TRANSPORT == rdma ]]; then
420			rdma_device_init
421		fi
422		return 0
423	elif [[ $NET_TYPE == phy ]]; then
424		echo "ERROR: No supported devices were found, cannot run the $TEST_TRANSPORT test"
425		return 1
426	elif [[ $NET_TYPE == phy-fallback ]]; then
427		echo "WARNING: No supported devices were found, fallback requested for $TEST_TRANSPORT test"
428	fi
429
430	# NET_TYPE == virt or phy-fallback
431	if [[ $TEST_TRANSPORT == tcp ]]; then
432		nvmf_veth_init
433		return 0
434	fi
435
436	echo "ERROR: virt and fallback setup is not supported for $TEST_TRANSPORT"
437	return 1
438}
439
440function nvmftestinit() {
441	if [ -z $TEST_TRANSPORT ]; then
442		echo "transport not specified - use --transport= to specify"
443		return 1
444	fi
445
446	trap 'nvmftestfini' SIGINT SIGTERM EXIT
447
448	prepare_net_devs
449
450	if [ "$TEST_MODE" == "iso" ]; then
451		$rootdir/scripts/setup.sh
452	fi
453
454	NVMF_TRANSPORT_OPTS="-t $TEST_TRANSPORT"
455	if [[ "$TEST_TRANSPORT" == "rdma" ]]; then
456		RDMA_IP_LIST=$(get_available_rdma_ips)
457		NVMF_FIRST_TARGET_IP=$(echo "$RDMA_IP_LIST" | head -n 1)
458		NVMF_SECOND_TARGET_IP=$(echo "$RDMA_IP_LIST" | tail -n +2 | head -n 1)
459		if [ -z $NVMF_FIRST_TARGET_IP ]; then
460			echo "no RDMA NIC for nvmf test"
461			exit 1
462		fi
463		NVMF_TRANSPORT_OPTS="$NVMF_TRANSPORT_OPTS --num-shared-buffers 1024"
464	elif [[ "$TEST_TRANSPORT" == "tcp" ]]; then
465		NVMF_TRANSPORT_OPTS="$NVMF_TRANSPORT_OPTS -o"
466	fi
467
468	if [ "$TEST_TRANSPORT" == "tcp" ] || [ "$TEST_TRANSPORT" == "rdma" ]; then
469		# currently we run the host/perf test for TCP even on systems without kernel nvme-tcp
470		#  support; that's fine since the host/perf test uses the SPDK initiator
471		# maybe later we will enforce modprobe to succeed once we have systems in the test pool
472		#  with nvme-tcp kernel support - but until then let this pass so we can still run the
473		#  host/perf test with the tcp transport
474		modprobe nvme-$TEST_TRANSPORT || true
475	fi
476}
477
478function nvmfappstart() {
479	timing_enter start_nvmf_tgt
480	"${NVMF_APP[@]}" "$@" &
481	nvmfpid=$!
482	waitforlisten $nvmfpid
483	timing_exit start_nvmf_tgt
484	trap 'process_shm --id $NVMF_APP_SHM_ID || :; nvmftestfini' SIGINT SIGTERM EXIT
485}
486
487function nvmftestfini() {
488	nvmfcleanup || :
489	if [ -n "$nvmfpid" ]; then
490		killprocess $nvmfpid
491	fi
492	if [ "$TEST_MODE" == "iso" ]; then
493		$rootdir/scripts/setup.sh reset
494	fi
495	if [[ "$TEST_TRANSPORT" == "tcp" ]]; then
496		nvmf_tcp_fini
497	fi
498}
499
500function rdma_device_init() {
501	load_ib_rdma_modules
502	allocate_nic_ips
503}
504
505function nvme_connect() {
506	local init_count
507	init_count=$(nvme list | wc -l)
508
509	if ! nvme connect "$@"; then return $?; fi
510
511	for i in $(seq 1 10); do
512		if [ $(nvme list | wc -l) -gt $init_count ]; then
513			return 0
514		else
515			sleep 1s
516		fi
517	done
518	return 1
519}
520
521function get_nvme_devs() {
522	local dev _
523
524	while read -r dev _; do
525		if [[ $dev == /dev/nvme* ]]; then
526			echo "$dev"
527		fi
528	done < <(nvme list)
529}
530
531function gen_nvmf_target_json() {
532	local subsystem config=()
533
534	for subsystem in "${@:-1}"; do
535		config+=(
536			"$(
537				cat <<- EOF
538					{
539					  "params": {
540					    "name": "Nvme$subsystem",
541					    "trtype": "$TEST_TRANSPORT",
542					    "traddr": "$NVMF_FIRST_TARGET_IP",
543					    "adrfam": "ipv4",
544					    "trsvcid": "$NVMF_PORT",
545					    "subnqn": "nqn.2016-06.io.spdk:cnode$subsystem",
546					    "hostnqn": "nqn.2016-06.io.spdk:host$subsystem",
547					    "hdgst": ${hdgst:-false},
548					    "ddgst": ${ddgst:-false}
549					  },
550					  "method": "bdev_nvme_attach_controller"
551					}
552				EOF
553			)"
554		)
555	done
556	jq . <<- JSON
557		{
558		  "subsystems": [
559		    {
560		      "subsystem": "bdev",
561		      "config": [
562			{
563			  "method": "bdev_nvme_set_options",
564			  "params": {
565				"action_on_timeout": "none",
566				"timeout_us": 0,
567				"transport_retry_count": 4,
568				"arbitration_burst": 0,
569				"low_priority_weight": 0,
570				"medium_priority_weight": 0,
571				"high_priority_weight": 0,
572				"nvme_adminq_poll_period_us": 10000,
573				"keep_alive_timeout_ms" : 10000,
574				"nvme_ioq_poll_period_us": 0,
575				"io_queue_requests": 0,
576				"delay_cmd_submit": true
577			  }
578			},
579		        $(
580			IFS=","
581			printf '%s\n' "${config[*]}"
582		),
583			{
584			  "method": "bdev_wait_for_examine"
585			}
586		      ]
587		    }
588		  ]
589		}
590	JSON
591}
592
593function _remove_spdk_ns() {
594	local ns {ns,mn,an}_net_devs
595	while read -r ns _; do
596		[[ $ns == *_spdk ]] || continue
597		# Gather all devs from the target $ns namespace. We want to differentiate
598		# between veth and physical links and gather just the latter. To do so,
599		# we simply compare ifindex to iflink - as per kernel docs, these should
600		# be always equal for the physical links. For veth devices, since they are
601		# paired, iflink should point at an actual bridge, hence being different
602		# from its own ifindex.
603		ns_net_devs=($(
604			ip netns exec "$ns" bash <<- 'IN_NS'
605				shopt -s extglob
606				for dev in /sys/class/net/!(lo|bond*); do
607					(($(< "$dev/ifindex") == $(< "$dev/iflink"))) || continue
608					echo "${dev##*/}"
609				done
610			IN_NS
611		))
612		# Gather all the net devs from the main ns
613		mn_net_devs=($(basename -a /sys/class/net/!(lo|bond*)))
614		# Merge these two to have a list for comparison
615		an_net_devs=($(printf '%s\n' "${ns_net_devs[@]}" "${mn_net_devs[@]}" | sort))
616
617		ip netns delete "$ns"
618
619		# Check if our list matches against the main ns after $ns got deleted
620		while [[ ${an_net_devs[*]} != "${mn_net_devs[*]}" ]]; do
621			mn_net_devs=($(basename -a /sys/class/net/!(lo|bond*)))
622			sleep 1s
623		done
624	done < <(ip netns list)
625}
626
627remove_spdk_ns() {
628	xtrace_disable_per_cmd _remove_spdk_ns
629}
630
631configure_kernel_target() {
632	local kernel_name=$1 kernel_target_ip=$2
633	# Keep it global in scope for easier cleanup
634	nvmet=/sys/kernel/config/nvmet
635	kernel_subsystem=$nvmet/subsystems/$kernel_name
636	kernel_namespace=$kernel_subsystem/namespaces/1
637	kernel_port=$nvmet/ports/1
638
639	local block nvme
640
641	if [[ ! -e /sys/module/nvmet ]]; then
642		modprobe nvmet
643	fi
644
645	[[ -e $nvmet ]]
646
647	"$rootdir/scripts/setup.sh" reset
648
649	# Find nvme with an active ns device
650	for block in /sys/block/nvme*; do
651		[[ -e $block ]] || continue
652		is_block_zoned "${block##*/}" && continue
653		block_in_use "${block##*/}" || nvme="/dev/${block##*/}"
654	done
655
656	[[ -b $nvme ]]
657
658	mkdir "$kernel_subsystem"
659	mkdir "$kernel_namespace"
660	mkdir "$kernel_port"
661
662	# It allows only %llx value and for some reason kernel swaps the byte order
663	# so setting the serial is not very useful here
664	# "$kernel_subsystem/attr_serial"
665	echo "SPDK-$kernel_name" > "$kernel_subsystem/attr_model"
666
667	echo 1 > "$kernel_subsystem/attr_allow_any_host"
668	echo "$nvme" > "$kernel_namespace/device_path"
669	echo 1 > "$kernel_namespace/enable"
670
671	echo "$kernel_target_ip" > "$kernel_port/addr_traddr"
672	echo "$TEST_TRANSPORT" > "$kernel_port/addr_trtype"
673	echo "$NVMF_PORT" > "$kernel_port/addr_trsvcid"
674	echo ipv4 > "$kernel_port/addr_adrfam"
675
676	# Enable the listener by linking the port to previously created subsystem
677	ln -s "$kernel_subsystem" "$kernel_port/subsystems/"
678
679	# Check if target is available
680	nvme discover "${NVME_HOST[@]}" -a "$kernel_target_ip" -t "$TEST_TRANSPORT" -s "$NVMF_PORT"
681}
682
683clean_kernel_target() {
684	[[ -e $kernel_subsystem ]] || return 0
685
686	echo 0 > "$kernel_namespace/enable"
687
688	rm -f "$kernel_port/subsystems/${kernel_subsystem##*/}"
689	rmdir "$kernel_namespace"
690	rmdir "$kernel_port"
691	rmdir "$kernel_subsystem"
692
693	modules=(/sys/module/nvmet/holders/*)
694
695	modprobe -r "${modules[@]##*/}" nvmet
696
697	# Get back all nvmes to userspace
698	"$rootdir/scripts/setup.sh"
699}
700
701format_key() {
702	local prefix key digest
703
704	prefix="$1" key="$2" digest="$3"
705	python - <<- EOF
706		import base64, zlib
707
708		crc = zlib.crc32(b"$key").to_bytes(4, byteorder="little")
709		b64 = base64.b64encode(b"$key" + crc).decode("utf-8")
710		print("$prefix:{:02x}:{}:".format($digest, b64), end="")
711	EOF
712}
713
714format_interchange_psk() {
715	format_key "NVMeTLSkey-1" "$1" "$2"
716}
717
718format_dhchap_key() {
719	format_key "DHHC-1" "$1" "$2"
720}
721
722gen_dhchap_key() {
723	local digest len file key
724	local -A digests=([null]=0 [sha256]=1 [sha384]=2 [sha512]=3)
725
726	digest="$1" len=$2
727	key=$(xxd -p -c0 -l $((len / 2)) /dev/urandom)
728	file=$(mktemp -t "spdk.key-$1.XXX")
729	format_dhchap_key "$key" "${digests[$1]}" > "$file"
730	chmod 0600 "$file"
731
732	echo "$file"
733}
734
735get_main_ns_ip() {
736	# Determine which ip to use based on nvmftestinit() setup. For tcp we pick
737	# interface which resides in the main net namespace and which is visible
738	# to nvmet under tcp setup. $NVMF_FIRST_TARGET_IP is solely for rdma use.
739	# FIXME: This requires proper unification of the networking setup across
740	# different transports.
741	local ip
742	local -A ip_candidates=()
743
744	ip_candidates["rdma"]=NVMF_FIRST_TARGET_IP
745	ip_candidates["tcp"]=NVMF_INITIATOR_IP
746
747	[[ -z $TEST_TRANSPORT || -z ${ip_candidates["$TEST_TRANSPORT"]} ]] && return 1
748	ip=${ip_candidates["$TEST_TRANSPORT"]}
749
750	if [[ -z ${!ip} ]]; then
751		echo "$ip not set, call nvmftestinit() first" >&2
752		return 1
753	fi
754
755	echo "${!ip}"
756}
757
758uuid2nguid() {
759	tr -d - <<< "${1^^}"
760}
761