xref: /spdk/test/nvmf/common.sh (revision cc6920a4763d4b9a43aa40583c8397d8f14fa100)
1NVMF_PORT=4420
2NVMF_SECOND_PORT=4421
3NVMF_THIRD_PORT=4422
4NVMF_IP_PREFIX="192.168.100"
5NVMF_IP_LEAST_ADDR=8
6NVMF_TCP_IP_ADDRESS="127.0.0.1"
7NVMF_TRANSPORT_OPTS=""
8NVMF_SERIAL=SPDK00000000000001
9
10function build_nvmf_app_args() {
11	if [ $SPDK_RUN_NON_ROOT -eq 1 ]; then
12		# We assume that test script is started from sudo
13		NVMF_APP=(sudo -E -u $SUDO_USER "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" "${NVMF_APP[@]}")
14	fi
15	NVMF_APP+=(-i "$NVMF_APP_SHM_ID" -e 0xFFFF)
16
17	if [ -n "$SPDK_HUGE_DIR" ]; then
18		NVMF_APP+=(--huge-dir "$SPDK_HUGE_DIR")
19	elif [ $SPDK_RUN_NON_ROOT -eq 1 ]; then
20		echo "In non-root test mode you have to set SPDK_HUGE_DIR variable." >&2
21		echo "For example:" >&2
22		echo "sudo mkdir /mnt/spdk_hugetlbfs" >&2
23		echo "sudo chown ${SUDO_USER}: /mnt/spdk_hugetlbfs" >&2
24		echo "export SPDK_HUGE_DIR=/mnt/spdk_hugetlbfs" >&2
25		return 1
26	fi
27}
28
29: ${NVMF_APP_SHM_ID="0"}
30export NVMF_APP_SHM_ID
31build_nvmf_app_args
32
33have_pci_nics=0
34
35function rxe_cfg() {
36	"$rootdir/scripts/rxe_cfg_small.sh" "$@"
37}
38
39function load_ib_rdma_modules() {
40	if [ $(uname) != Linux ]; then
41		return 0
42	fi
43
44	modprobe ib_cm
45	modprobe ib_core
46	# Newer kernels do not have the ib_ucm module
47	modprobe ib_ucm || true
48	modprobe ib_umad
49	modprobe ib_uverbs
50	modprobe iw_cm
51	modprobe rdma_cm
52	modprobe rdma_ucm
53}
54
55function detect_soft_roce_nics() {
56	rxe_cfg stop # make sure we run tests with a clean slate
57	rxe_cfg start
58}
59
60# args 1 and 2 represent the grep filters for finding our NICS.
61# subsequent args are all drivers that should be loaded if we find these NICs.
62# Those drivers should be supplied in the correct order.
63function detect_nics_and_probe_drivers() {
64	NIC_VENDOR="$1"
65	NIC_CLASS="$2"
66
67	nvmf_nic_bdfs=$(lspci | grep Ethernet | grep "$NIC_VENDOR" | grep "$NIC_CLASS" | awk -F ' ' '{print "0000:"$1}')
68
69	if [ -z "$nvmf_nic_bdfs" ]; then
70		return 0
71	fi
72
73	have_pci_nics=1
74	if [ $# -ge 2 ]; then
75		# shift out the first two positional arguments.
76		shift 2
77		# Iterate through the remaining arguments.
78		for i; do
79			if [[ $i == irdma ]]; then
80				# Our tests don't play well with iWARP protocol. Make sure we use RoCEv2 instead.
81				if [[ -e /sys/module/irdma/parameters/roce_ena ]]; then
82					# reload the module to re-init the rdma devices
83					(($(< /sys/module/irdma/parameters/roce_ena) != 1)) && modprobe -r irdma
84				fi
85				modprobe "$i" roce_ena=1
86			else
87				modprobe "$i"
88			fi
89		done
90	fi
91}
92
93function pci_rdma_switch() {
94	local driver=$1
95
96	local -a driver_args=()
97	driver_args+=("Mellanox ConnectX-4 mlx5_core mlx5_ib")
98	driver_args+=("Mellanox ConnectX-5 mlx5_core mlx5_ib")
99	driver_args+=("Intel E810 ice irdma")
100	driver_args+=("Intel X722 i40e i40iw")
101	driver_args+=("Chelsio \"Unified Wire\" cxgb4 iw_cxgb4")
102
103	case $driver in
104		mlx5_ib)
105			detect_nics_and_probe_drivers ${driver_args[0]}
106			detect_nics_and_probe_drivers ${driver_args[1]}
107			;;
108		irdma)
109			detect_nics_and_probe_drivers ${driver_args[2]}
110			;;
111		i40iw)
112			detect_nics_and_probe_drivers ${driver_args[3]}
113			;;
114		iw_cxgb4)
115			detect_nics_and_probe_drivers ${driver_args[4]}
116			;;
117		*)
118			for d in "${driver_args[@]}"; do
119				detect_nics_and_probe_drivers $d
120			done
121			;;
122	esac
123}
124
125function pci_tcp_switch() {
126	local driver=$1
127
128	local -a driver_args=()
129	driver_args+=("Intel E810 ice")
130
131	case $driver in
132		ice)
133			detect_nics_and_probe_drivers ${driver_args[0]}
134			;;
135		*)
136			for d in "${driver_args[@]}"; do
137				detect_nics_and_probe_drivers $d
138			done
139			;;
140	esac
141}
142
143function detect_pci_nics() {
144
145	if ! hash lspci; then
146		return 0
147	fi
148
149	local nic_drivers
150	local found_drivers
151
152	if [[ -z "$TEST_TRANSPORT" ]]; then
153		TEST_TRANSPORT=$SPDK_TEST_NVMF_TRANSPORT
154	fi
155
156	if [[ "$TEST_TRANSPORT" == "rdma" ]]; then
157		nic_drivers="mlx5_ib|irdma|i40iw|iw_cxgb4"
158
159		# Try to find RDMA drivers which are already loded and try to
160		# use only it's associated NICs, without probing all drivers.
161		found_drivers=$(lsmod | grep -Eo $nic_drivers | sort -u)
162		for d in $found_drivers; do
163			pci_rdma_switch $d
164		done
165
166		# In case lsmod reported driver, but lspci does not report
167		# physical NICs - fall back to old approach any try to
168		# probe all compatible NICs.
169		((have_pci_nics == 0)) && pci_rdma_switch "default"
170
171	elif [[ "$TEST_TRANSPORT" == "tcp" ]]; then
172		nic_drivers="ice"
173		found_drivers=$(lsmod | grep -Eo $nic_drivers | sort -u)
174		for d in $found_drivers; do
175			pci_tcp_switch $d
176		done
177		((have_pci_nics == 0)) && pci_tcp_switch "default"
178	fi
179
180	# Use softroce if everything else failed.
181	((have_pci_nics == 0)) && return 0
182
183	# Provide time for drivers to properly load.
184	sleep 5
185}
186
187function detect_transport_nics() {
188	detect_pci_nics
189	if [ "$have_pci_nics" -eq "0" ]; then
190		detect_soft_roce_nics
191	fi
192}
193
194function allocate_nic_ips() {
195	((count = NVMF_IP_LEAST_ADDR))
196	for nic_name in $(get_rdma_if_list); do
197		ip="$(get_ip_address $nic_name)"
198		if [[ -z $ip ]]; then
199			ip addr add $NVMF_IP_PREFIX.$count/24 dev $nic_name
200			ip link set $nic_name up
201			((count = count + 1))
202		fi
203		# dump configuration for debug log
204		ip addr show $nic_name
205	done
206}
207
208function get_available_rdma_ips() {
209	for nic_name in $(get_rdma_if_list); do
210		get_ip_address $nic_name
211	done
212}
213
214function get_rdma_if_list() {
215	rxe_cfg rxe-net
216}
217
218function get_tcp_if_list_by_driver() {
219	local driver
220	driver=${1:-ice}
221
222	shopt -s nullglob
223	tcp_if_list=(/sys/bus/pci/drivers/$driver/0000*/net/*)
224	shopt -u nullglob
225	printf '%s\n' "${tcp_if_list[@]##*/}"
226}
227
228function get_ip_address() {
229	interface=$1
230	ip -o -4 addr show $interface | awk '{print $4}' | cut -d"/" -f1
231}
232
233function nvmfcleanup() {
234	sync
235
236	if [ "$TEST_TRANSPORT" == "tcp" ] || [ "$TEST_TRANSPORT" == "rdma" ]; then
237		set +e
238		for i in {1..20}; do
239			modprobe -v -r nvme-$TEST_TRANSPORT
240			if modprobe -v -r nvme-fabrics; then
241				set -e
242				return 0
243			fi
244			sleep 1
245		done
246		set -e
247
248		# So far unable to remove the kernel modules. Try
249		# one more time and let it fail.
250		# Allow the transport module to fail for now. See Jim's comment
251		# about the nvme-tcp module below.
252		modprobe -v -r nvme-$TEST_TRANSPORT || true
253		modprobe -v -r nvme-fabrics
254	fi
255}
256
257function nvmf_veth_init() {
258	NVMF_INITIATOR_IP=10.0.0.1
259	NVMF_FIRST_TARGET_IP=10.0.0.2
260	NVMF_SECOND_TARGET_IP=10.0.0.3
261	NVMF_BRIDGE="nvmf_br"
262	NVMF_INITIATOR_INTERFACE="nvmf_init_if"
263	NVMF_INITIATOR_BRIDGE="nvmf_init_br"
264	NVMF_TARGET_NAMESPACE="nvmf_tgt_ns_spdk"
265	NVMF_TARGET_NS_CMD=(ip netns exec "$NVMF_TARGET_NAMESPACE")
266	NVMF_TARGET_INTERFACE="nvmf_tgt_if"
267	NVMF_TARGET_INTERFACE2="nvmf_tgt_if2"
268	NVMF_TARGET_BRIDGE="nvmf_tgt_br"
269	NVMF_TARGET_BRIDGE2="nvmf_tgt_br2"
270
271	ip link set $NVMF_INITIATOR_BRIDGE nomaster || true
272	ip link set $NVMF_TARGET_BRIDGE nomaster || true
273	ip link set $NVMF_TARGET_BRIDGE2 nomaster || true
274	ip link set $NVMF_INITIATOR_BRIDGE down || true
275	ip link set $NVMF_TARGET_BRIDGE down || true
276	ip link set $NVMF_TARGET_BRIDGE2 down || true
277	ip link delete $NVMF_BRIDGE type bridge || true
278	ip link delete $NVMF_INITIATOR_INTERFACE || true
279	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE || true
280	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE2 || true
281
282	# Create network namespace
283	ip netns add $NVMF_TARGET_NAMESPACE
284
285	# Create veth (Virtual ethernet) interface pairs
286	ip link add $NVMF_INITIATOR_INTERFACE type veth peer name $NVMF_INITIATOR_BRIDGE
287	ip link add $NVMF_TARGET_INTERFACE type veth peer name $NVMF_TARGET_BRIDGE
288	ip link add $NVMF_TARGET_INTERFACE2 type veth peer name $NVMF_TARGET_BRIDGE2
289
290	# Associate veth interface pairs with network namespace
291	ip link set $NVMF_TARGET_INTERFACE netns $NVMF_TARGET_NAMESPACE
292	ip link set $NVMF_TARGET_INTERFACE2 netns $NVMF_TARGET_NAMESPACE
293
294	# Allocate IP addresses
295	ip addr add $NVMF_INITIATOR_IP/24 dev $NVMF_INITIATOR_INTERFACE
296	"${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_FIRST_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE
297	"${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_SECOND_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE2
298
299	# Link up veth interfaces
300	ip link set $NVMF_INITIATOR_INTERFACE up
301	ip link set $NVMF_INITIATOR_BRIDGE up
302	ip link set $NVMF_TARGET_BRIDGE up
303	ip link set $NVMF_TARGET_BRIDGE2 up
304	"${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE up
305	"${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE2 up
306	"${NVMF_TARGET_NS_CMD[@]}" ip link set lo up
307
308	# Create a bridge
309	ip link add $NVMF_BRIDGE type bridge
310	ip link set $NVMF_BRIDGE up
311
312	# Add veth interfaces to the bridge
313	ip link set $NVMF_INITIATOR_BRIDGE master $NVMF_BRIDGE
314	ip link set $NVMF_TARGET_BRIDGE master $NVMF_BRIDGE
315	ip link set $NVMF_TARGET_BRIDGE2 master $NVMF_BRIDGE
316
317	# Accept connections from veth interface
318	iptables -I INPUT 1 -i $NVMF_INITIATOR_INTERFACE -p tcp --dport $NVMF_PORT -j ACCEPT
319
320	# Verify connectivity
321	ping -c 1 $NVMF_FIRST_TARGET_IP
322	ping -c 1 $NVMF_SECOND_TARGET_IP
323	"${NVMF_TARGET_NS_CMD[@]}" ping -c 1 $NVMF_INITIATOR_IP
324
325	NVMF_APP=("${NVMF_TARGET_NS_CMD[@]}" "${NVMF_APP[@]}")
326}
327
328function nvmf_veth_fini() {
329	# Cleanup bridge, veth interfaces, and network namespace
330	# Note: removing one veth removes the pair
331	ip link set $NVMF_INITIATOR_BRIDGE nomaster
332	ip link set $NVMF_TARGET_BRIDGE nomaster
333	ip link set $NVMF_TARGET_BRIDGE2 nomaster
334	ip link set $NVMF_INITIATOR_BRIDGE down
335	ip link set $NVMF_TARGET_BRIDGE down
336	ip link set $NVMF_TARGET_BRIDGE2 down
337	ip link delete $NVMF_BRIDGE type bridge
338	ip link delete $NVMF_INITIATOR_INTERFACE
339	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE
340	"${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE2
341	remove_spdk_ns
342}
343
344function nvmf_tcp_init() {
345	NVMF_INITIATOR_IP=10.0.0.1
346	NVMF_FIRST_TARGET_IP=10.0.0.2
347	TCP_INTERFACE_LIST=($(get_tcp_if_list_by_driver))
348	if ((${#TCP_INTERFACE_LIST[@]} == 0)) || [ "$TEST_MODE" == "iso" ]; then
349		nvmf_veth_init
350		return 0
351	fi
352
353	# We need two net devs at minimum
354	((${#TCP_INTERFACE_LIST[@]} > 1))
355
356	NVMF_TARGET_INTERFACE=${TCP_INTERFACE_LIST[0]}
357	NVMF_INITIATOR_INTERFACE=${TCP_INTERFACE_LIST[1]}
358
359	# Skip case nvmf_multipath in nvmf_tcp_init(), it will be covered by nvmf_veth_init().
360	NVMF_SECOND_TARGET_IP=""
361
362	NVMF_TARGET_NAMESPACE="${NVMF_TARGET_INTERFACE}_ns_spdk"
363	NVMF_TARGET_NS_CMD=(ip netns exec "$NVMF_TARGET_NAMESPACE")
364	ip -4 addr flush $NVMF_TARGET_INTERFACE || true
365	ip -4 addr flush $NVMF_INITIATOR_INTERFACE || true
366
367	# Create network namespace
368	ip netns add $NVMF_TARGET_NAMESPACE
369
370	# Associate phy interface pairs with network namespace
371	ip link set $NVMF_TARGET_INTERFACE netns $NVMF_TARGET_NAMESPACE
372
373	# Allocate IP addresses
374	ip addr add $NVMF_INITIATOR_IP/24 dev $NVMF_INITIATOR_INTERFACE
375	"${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_FIRST_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE
376
377	# Link up phy interfaces
378	ip link set $NVMF_INITIATOR_INTERFACE up
379
380	"${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE up
381	"${NVMF_TARGET_NS_CMD[@]}" ip link set lo up
382
383	# Accept connections from phy interface
384	iptables -I INPUT 1 -i $NVMF_INITIATOR_INTERFACE -p tcp --dport $NVMF_PORT -j ACCEPT
385
386	# Verify connectivity
387	ping -c 1 $NVMF_FIRST_TARGET_IP
388	"${NVMF_TARGET_NS_CMD[@]}" ping -c 1 $NVMF_INITIATOR_IP
389
390	NVMF_APP=("${NVMF_TARGET_NS_CMD[@]}" "${NVMF_APP[@]}")
391}
392
393function nvmf_tcp_fini() {
394	if [[ "$NVMF_TARGET_NAMESPACE" == "nvmf_tgt_ns" ]]; then
395		nvmf_veth_fini
396		return 0
397	fi
398	remove_spdk_ns
399	ip -4 addr flush $NVMF_INITIATOR_INTERFACE || :
400}
401
402function nvmftestinit() {
403	if [ -z $TEST_TRANSPORT ]; then
404		echo "transport not specified - use --transport= to specify"
405		return 1
406	fi
407
408	trap 'process_shm --id $NVMF_APP_SHM_ID || :; nvmftestfini' SIGINT SIGTERM EXIT
409
410	if [ "$TEST_MODE" == "iso" ]; then
411		$rootdir/scripts/setup.sh
412		if [[ "$TEST_TRANSPORT" == "rdma" ]]; then
413			rdma_device_init
414		fi
415		if [[ "$TEST_TRANSPORT" == "tcp" ]]; then
416			tcp_device_init
417		fi
418	fi
419
420	NVMF_TRANSPORT_OPTS="-t $TEST_TRANSPORT"
421	if [[ "$TEST_TRANSPORT" == "rdma" ]]; then
422		RDMA_IP_LIST=$(get_available_rdma_ips)
423		NVMF_FIRST_TARGET_IP=$(echo "$RDMA_IP_LIST" | head -n 1)
424		NVMF_SECOND_TARGET_IP=$(echo "$RDMA_IP_LIST" | tail -n +2 | head -n 1)
425		if [ -z $NVMF_FIRST_TARGET_IP ]; then
426			echo "no RDMA NIC for nvmf test"
427			exit 0
428		fi
429	elif [[ "$TEST_TRANSPORT" == "tcp" ]]; then
430		remove_spdk_ns
431		nvmf_tcp_init
432		NVMF_TRANSPORT_OPTS="$NVMF_TRANSPORT_OPTS -o"
433	fi
434
435	if [ "$TEST_TRANSPORT" == "tcp" ] || [ "$TEST_TRANSPORT" == "rdma" ]; then
436		# currently we run the host/perf test for TCP even on systems without kernel nvme-tcp
437		#  support; that's fine since the host/perf test uses the SPDK initiator
438		# maybe later we will enforce modprobe to succeed once we have systems in the test pool
439		#  with nvme-tcp kernel support - but until then let this pass so we can still run the
440		#  host/perf test with the tcp transport
441		modprobe nvme-$TEST_TRANSPORT || true
442	fi
443}
444
445function nvmfappstart() {
446	timing_enter start_nvmf_tgt
447	"${NVMF_APP[@]}" "$@" &
448	nvmfpid=$!
449	waitforlisten $nvmfpid
450	timing_exit start_nvmf_tgt
451}
452
453function nvmftestfini() {
454	nvmfcleanup || :
455	if [ -n "$nvmfpid" ]; then
456		killprocess $nvmfpid
457	fi
458	if [ "$TEST_MODE" == "iso" ]; then
459		$rootdir/scripts/setup.sh reset
460		if [[ "$TEST_TRANSPORT" == "rdma" ]]; then
461			rdma_device_init
462		fi
463	fi
464	if [[ "$TEST_TRANSPORT" == "tcp" ]]; then
465		nvmf_tcp_fini
466	fi
467}
468
469function rdma_device_init() {
470	load_ib_rdma_modules
471	detect_transport_nics
472	allocate_nic_ips
473}
474
475function tcp_device_init() {
476	detect_transport_nics
477}
478
479function revert_soft_roce() {
480	rxe_cfg stop
481}
482
483function check_ip_is_soft_roce() {
484	if [ "$TEST_TRANSPORT" != "rdma" ]; then
485		return 0
486	fi
487	rxe_cfg status rxe | grep -wq "$1"
488}
489
490function nvme_connect() {
491	local init_count
492	init_count=$(nvme list | wc -l)
493
494	if ! nvme connect "$@"; then return $?; fi
495
496	for i in $(seq 1 10); do
497		if [ $(nvme list | wc -l) -gt $init_count ]; then
498			return 0
499		else
500			sleep 1s
501		fi
502	done
503	return 1
504}
505
506function get_nvme_devs() {
507	local dev _
508
509	while read -r dev _; do
510		if [[ $dev == /dev/nvme* ]]; then
511			echo "$dev"
512		fi
513	done < <(nvme list)
514}
515
516function gen_nvmf_target_json() {
517	local subsystem config=()
518
519	for subsystem in "${@:-1}"; do
520		config+=(
521			"$(
522				cat <<- EOF
523					{
524					  "params": {
525					    "name": "Nvme$subsystem",
526					    "trtype": "$TEST_TRANSPORT",
527					    "traddr": "$NVMF_FIRST_TARGET_IP",
528					    "adrfam": "ipv4",
529					    "trsvcid": "$NVMF_PORT",
530					    "subnqn": "nqn.2016-06.io.spdk:cnode$subsystem",
531					    "hostnqn": "nqn.2016-06.io.spdk:host$subsystem",
532					    "hdgst": ${hdgst:-false},
533					    "ddgst": ${ddgst:-false}
534					  },
535					  "method": "bdev_nvme_attach_controller"
536					}
537				EOF
538			)"
539		)
540	done
541	jq . <<- JSON
542		{
543		  "subsystems": [
544		    {
545		      "subsystem": "bdev",
546		      "config": [
547			{
548			  "method": "bdev_nvme_set_options",
549			  "params": {
550				"action_on_timeout": "none",
551				"timeout_us": 0,
552				"retry_count": 4,
553				"arbitration_burst": 0,
554				"low_priority_weight": 0,
555				"medium_priority_weight": 0,
556				"high_priority_weight": 0,
557				"nvme_adminq_poll_period_us": 10000,
558				"keep_alive_timeout_ms" : 10000,
559				"nvme_ioq_poll_period_us": 0,
560				"io_queue_requests": 0,
561				"delay_cmd_submit": true
562			  }
563			},
564		        $(
565		IFS=","
566		printf '%s\n' "${config[*]}"
567		),
568			{
569			  "method": "bdev_wait_for_examine"
570			}
571		      ]
572		    }
573		  ]
574		}
575	JSON
576}
577
578function remove_spdk_ns() {
579	local ns
580	while read -r ns _; do
581		[[ $ns == *_spdk ]] || continue
582		ip netns delete "$ns"
583	done < <(ip netns list)
584	# Let it settle
585	sleep 1
586}
587