xref: /spdk/test/bdev/bdev_raid.sh (revision ad5fc351dd221a287cce269ad0e50b11253cc48b)
1#!/usr/bin/env bash
2#  SPDX-License-Identifier: BSD-3-Clause
3#  Copyright (C) 2019 Intel Corporation
4#  All rights reserved.
5#
6testdir=$(readlink -f $(dirname $0))
7rootdir=$(readlink -f $testdir/../..)
8rpc_server=/var/tmp/spdk-raid.sock
9tmp_file=$SPDK_TEST_STORAGE/raidrandtest
10
11source $rootdir/test/common/autotest_common.sh
12source $testdir/nbd_common.sh
13
14rpc_py="$rootdir/scripts/rpc.py -s $rpc_server"
15
16function raid_unmap_data_verify() {
17	if hash blkdiscard; then
18		local nbd=$1
19		local rpc_server=$2
20		local blksize
21		blksize=$(lsblk -o LOG-SEC $nbd | grep -v LOG-SEC | cut -d ' ' -f 5)
22		local rw_blk_num=4096
23		local rw_len=$((blksize * rw_blk_num))
24		local unmap_blk_offs=(0 1028 321)
25		local unmap_blk_nums=(128 2035 456)
26		local unmap_off
27		local unmap_len
28
29		# data write
30		dd if=/dev/urandom of=$tmp_file bs=$blksize count=$rw_blk_num
31		dd if=$tmp_file of=$nbd bs=$blksize count=$rw_blk_num oflag=direct
32		blockdev --flushbufs $nbd
33
34		# confirm random data is written correctly in raid0 device
35		cmp -b -n $rw_len $tmp_file $nbd
36
37		for ((i = 0; i < ${#unmap_blk_offs[@]}; i++)); do
38			unmap_off=$((blksize * ${unmap_blk_offs[$i]}))
39			unmap_len=$((blksize * ${unmap_blk_nums[$i]}))
40
41			# data unmap on tmp_file
42			dd if=/dev/zero of=$tmp_file bs=$blksize seek=${unmap_blk_offs[$i]} count=${unmap_blk_nums[$i]} conv=notrunc
43
44			# data unmap on raid bdev
45			blkdiscard -o $unmap_off -l $unmap_len $nbd
46			blockdev --flushbufs $nbd
47
48			# data verify after unmap
49			cmp -b -n $rw_len $tmp_file $nbd
50		done
51	fi
52
53	return 0
54}
55
56function on_error_exit() {
57	if [ -n "$raid_pid" ]; then
58		killprocess $raid_pid
59	fi
60
61	rm -f $tmp_file
62	print_backtrace
63	exit 1
64}
65
66function configure_raid_bdev() {
67	local raid_level=$1
68	rm -rf $testdir/rpcs.txt
69
70	cat <<- EOL >> $testdir/rpcs.txt
71		bdev_malloc_create 32 512 -b Base_1
72		bdev_malloc_create 32 512 -b Base_2
73		bdev_raid_create -z 64 -r $raid_level -b "Base_1 Base_2" -n raid
74	EOL
75	$rpc_py < $testdir/rpcs.txt
76
77	rm -rf $testdir/rpcs.txt
78}
79
80function raid_function_test() {
81	local raid_level=$1
82	if [ $(uname -s) = Linux ] && modprobe -n nbd; then
83		local nbd=/dev/nbd0
84		local raid_bdev
85
86		modprobe nbd
87		$rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 -L bdev_raid &
88		raid_pid=$!
89		echo "Process raid pid: $raid_pid"
90		waitforlisten $raid_pid $rpc_server
91
92		configure_raid_bdev $raid_level
93		raid_bdev=$($rpc_py bdev_raid_get_bdevs online | jq -r '.[0]["name"] | select(.)')
94		if [ $raid_bdev = "" ]; then
95			echo "No raid0 device in SPDK app"
96			return 1
97		fi
98
99		nbd_start_disks $rpc_server $raid_bdev $nbd
100		count=$(nbd_get_count $rpc_server)
101		if [ $count -ne 1 ]; then
102			return 1
103		fi
104
105		raid_unmap_data_verify $nbd $rpc_server
106
107		nbd_stop_disks $rpc_server $nbd
108		count=$(nbd_get_count $rpc_server)
109		if [ $count -ne 0 ]; then
110			return 1
111		fi
112
113		killprocess $raid_pid
114	else
115		echo "skipping bdev raid tests."
116	fi
117
118	return 0
119}
120
121function verify_raid_bdev_state() {
122	local raid_bdev_name=$1
123	local expected_state=$2
124	local raid_level=$3
125	local strip_size=$4
126	local num_base_bdevs_operational=$5
127	local raid_bdev_info
128	local num_base_bdevs
129	local num_base_bdevs_discovered
130	local tmp
131
132	raid_bdev_info=$($rpc_py bdev_raid_get_bdevs all | jq -r ".[] | select(.name == \"$raid_bdev_name\")")
133
134	xtrace_disable
135	if [ -z "$raid_bdev_info" ]; then
136		echo "No raid device \"$raid_bdev_name\" in SPDK app"
137		return 1
138	fi
139
140	raid_bdev_info=$($rpc_py bdev_raid_get_bdevs $expected_state | jq -r ".[] | select(.name == \"$raid_bdev_name\")")
141	if [ -z "$raid_bdev_info" ]; then
142		echo "$raid_bdev_name is not in $expected_state state"
143		return 1
144	fi
145
146	tmp=$(echo $raid_bdev_info | jq -r '.state')
147	if [ "$tmp" != $expected_state ]; then
148		echo "incorrect state: $tmp, expected: $expected_state"
149		return 1
150	fi
151
152	tmp=$(echo $raid_bdev_info | jq -r '.raid_level')
153	if [ "$tmp" != $raid_level ]; then
154		echo "incorrect level: $tmp, expected: $raid_level"
155		return 1
156	fi
157
158	tmp=$(echo $raid_bdev_info | jq -r '.strip_size_kb')
159	if [ "$tmp" != $strip_size ]; then
160		echo "incorrect strip size: $tmp, expected: $strip_size"
161		return 1
162	fi
163
164	num_base_bdevs=$(echo $raid_bdev_info | jq -r '[.base_bdevs_list[]] | length')
165	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs')
166	if [ "$num_base_bdevs" != "$tmp" ]; then
167		echo "incorrect num_base_bdevs: $tmp, expected: $num_base_bdevs"
168		return 1
169	fi
170
171	num_base_bdevs_discovered=$(echo $raid_bdev_info | jq -r '[.base_bdevs_list[] | select(.is_configured)] | length')
172	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs_discovered')
173	if [ "$num_base_bdevs_discovered" != "$tmp" ]; then
174		echo "incorrect num_base_bdevs_discovered: $tmp, expected: $num_base_bdevs_discovered"
175		return 1
176	fi
177
178	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs_operational')
179	if [ "$num_base_bdevs_operational" != "$tmp" ]; then
180		echo "incorrect num_base_bdevs_operational $tmp, expected: $num_base_bdevs_operational"
181		return 1
182	fi
183
184	xtrace_restore
185}
186
187function has_redundancy() {
188	case $1 in
189		"raid1" | "raid5f") return 0 ;;
190		*) return 1 ;;
191	esac
192}
193
194function raid_state_function_test() {
195	local raid_level=$1
196	local num_base_bdevs=$2
197	local superblock=$3
198	local raid_bdev
199	local base_bdevs=($(for ((i = 1; i <= num_base_bdevs; i++)); do echo BaseBdev$i; done))
200	local raid_bdev_name="Existed_Raid"
201	local strip_size
202	local strip_size_create_arg
203	local superblock_create_arg
204
205	if [ $raid_level != "raid1" ]; then
206		strip_size=64
207		strip_size_create_arg="-z $strip_size"
208	else
209		strip_size=0
210	fi
211
212	if [ $superblock = true ]; then
213		superblock_create_arg="-s"
214	else
215		superblock_create_arg=""
216	fi
217
218	$rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 -L bdev_raid &
219	raid_pid=$!
220	echo "Process raid pid: $raid_pid"
221	waitforlisten $raid_pid $rpc_server
222
223	# Step1: create a RAID bdev with no base bdevs
224	# Expect state: CONFIGURING
225	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
226	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
227	$rpc_py bdev_raid_delete $raid_bdev_name
228
229	# Step2: create one base bdev and add to the RAID bdev
230	# Expect state: CONFIGURING
231	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
232	$rpc_py bdev_malloc_create 32 512 -b ${base_bdevs[0]}
233	waitforbdev ${base_bdevs[0]}
234	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
235	$rpc_py bdev_raid_delete $raid_bdev_name
236
237	if [ $superblock = true ]; then
238		# recreate the bdev to remove superblock
239		$rpc_py bdev_malloc_delete ${base_bdevs[0]}
240		$rpc_py bdev_malloc_create 32 512 -b ${base_bdevs[0]}
241		waitforbdev ${base_bdevs[0]}
242	fi
243
244	# Step3: create remaining base bdevs and add to the RAID bdev
245	# Expect state: ONLINE
246	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
247	for ((i = 1; i < num_base_bdevs; i++)); do
248		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
249		$rpc_py bdev_malloc_create 32 512 -b ${base_bdevs[$i]}
250		waitforbdev ${base_bdevs[$i]}
251	done
252	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
253
254	# Step4: delete one base bdev from the RAID bdev
255	$rpc_py bdev_malloc_delete ${base_bdevs[0]}
256	local expected_state
257	if ! has_redundancy $raid_level; then
258		expected_state="offline"
259	else
260		expected_state="online"
261	fi
262	verify_raid_bdev_state $raid_bdev_name $expected_state $raid_level $strip_size $((num_base_bdevs - 1))
263
264	# Step5: delete remaining base bdevs from the RAID bdev
265	# Expect state: removed from system
266	for ((i = 1; i < num_base_bdevs; i++)); do
267		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[0]["name"]')
268		if [ "$raid_bdev" != $raid_bdev_name ]; then
269			echo "$raid_bdev_name removed before all base bdevs were deleted"
270			return 1
271		fi
272		$rpc_py bdev_malloc_delete ${base_bdevs[$i]}
273	done
274	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[0]["name"] | select(.)')
275	if [ -n "$raid_bdev" ]; then
276		echo "$raid_bdev_name is not removed"
277		return 1
278	fi
279
280	killprocess $raid_pid
281
282	return 0
283}
284
285function raid0_resize_test() {
286	local blksize=512
287	local bdev_size_mb=32
288	local new_bdev_size_mb=$((bdev_size_mb * 2))
289	local blkcnt
290	local raid_size_mb
291	local new_raid_size_mb
292
293	$rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 -L bdev_raid &
294	raid_pid=$!
295	echo "Process raid pid: $raid_pid"
296	waitforlisten $raid_pid $rpc_server
297
298	$rpc_py bdev_null_create Base_1 $bdev_size_mb $blksize
299	$rpc_py bdev_null_create Base_2 $bdev_size_mb $blksize
300
301	$rpc_py bdev_raid_create -z 64 -r 0 -b "Base_1 Base_2" -n Raid
302
303	# Resize Base_1 first.
304	$rpc_py bdev_null_resize Base_1 $new_bdev_size_mb
305
306	# The size of Raid should not be changed.
307	blkcnt=$($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks')
308	raid_size_mb=$((blkcnt * blksize / 1048576))
309	if [ $raid_size_mb != $((bdev_size_mb * 2)) ]; then
310		echo "resize failed"
311		return 1
312	fi
313
314	# Resize Base_2 next.
315	$rpc_py bdev_null_resize Base_2 $new_bdev_size_mb
316
317	# The size of Raid should be updated to the expected value.
318	blkcnt=$($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks')
319	raid_size_mb=$((blkcnt * blksize / 1048576))
320	if [ $raid_size_mb != $((new_bdev_size_mb * 2)) ]; then
321		echo "resize failed"
322		return 1
323	fi
324
325	killprocess $raid_pid
326
327	return 0
328}
329
330function raid_superblock_test() {
331	local raid_level=$1
332	local num_base_bdevs=$2
333	local base_bdevs_malloc=()
334	local base_bdevs_pt=()
335	local base_bdevs_pt_uuid=()
336	local raid_bdev_name="raid_bdev1"
337	local strip_size
338	local strip_size_create_arg
339	local raid_bdev_uuid
340	local raid_bdev
341
342	if [ $raid_level != "raid1" ]; then
343		strip_size=64
344		strip_size_create_arg="-z $strip_size"
345	else
346		strip_size=0
347	fi
348
349	"$rootdir/test/app/bdev_svc/bdev_svc" -r $rpc_server -L bdev_raid &
350	raid_pid=$!
351	waitforlisten $raid_pid $rpc_server
352
353	# Create base bdevs
354	for ((i = 1; i <= num_base_bdevs; i++)); do
355		local bdev_malloc="malloc$i"
356		local bdev_pt="pt$i"
357		local bdev_pt_uuid="00000000-0000-0000-0000-00000000000$i"
358
359		base_bdevs_malloc+=($bdev_malloc)
360		base_bdevs_pt+=($bdev_pt)
361		base_bdevs_pt_uuid+=($bdev_pt_uuid)
362
363		$rpc_py bdev_malloc_create 32 512 -b $bdev_malloc
364		$rpc_py bdev_passthru_create -b $bdev_malloc -p $bdev_pt -u $bdev_pt_uuid
365	done
366
367	# Create RAID bdev with superblock
368	$rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "${base_bdevs_pt[*]}" -n $raid_bdev_name -s
369	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
370
371	# Get RAID bdev's UUID
372	raid_bdev_uuid=$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')
373	if [ -z "$raid_bdev_uuid" ]; then
374		return 1
375	fi
376
377	# Stop the RAID bdev
378	$rpc_py bdev_raid_delete $raid_bdev_name
379	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
380	if [ -n "$raid_bdev" ]; then
381		return 1
382	fi
383
384	# Delete the passthru bdevs
385	for i in "${base_bdevs_pt[@]}"; do
386		$rpc_py bdev_passthru_delete $i
387	done
388	if [ "$($rpc_py bdev_get_bdevs | jq -r '[.[] | select(.product_name == "passthru")] | any')" == "true" ]; then
389		return 1
390	fi
391
392	# Try to create new RAID bdev from malloc bdevs
393	# Should not reach online state due to superblock still present on base bdevs
394	$rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "${base_bdevs_malloc[*]}" -n $raid_bdev_name
395	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
396
397	# Stop the RAID bdev
398	$rpc_py bdev_raid_delete $raid_bdev_name
399	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
400	if [ -n "$raid_bdev" ]; then
401		return 1
402	fi
403
404	# Re-add first base bdev
405	$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[0]} -p ${base_bdevs_pt[0]} -u ${base_bdevs_pt_uuid[0]}
406
407	# Check if the RAID bdev was assembled from superblock
408	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
409
410	if [ $num_base_bdevs -gt 2 ]; then
411		# Re-add the second base bdev and remove it again
412		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[1]} -p ${base_bdevs_pt[1]} -u ${base_bdevs_pt_uuid[1]}
413		$rpc_py bdev_passthru_delete ${base_bdevs_pt[1]}
414		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
415	fi
416
417	# Re-add remaining base bdevs
418	for ((i = 1; i < num_base_bdevs; i++)); do
419		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
420	done
421
422	# Check if the RAID bdev is in online state
423	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
424
425	# Check if the RAID bdev has the same UUID as when first created
426	if [ "$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')" != "$raid_bdev_uuid" ]; then
427		return 1
428	fi
429
430	if has_redundancy $raid_level; then
431		# Delete one base bdev
432		$rpc_py bdev_passthru_delete ${base_bdevs_pt[0]}
433
434		# Check if the RAID bdev is in online state (degraded)
435		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
436
437		# Stop the RAID bdev
438		$rpc_py bdev_raid_delete $raid_bdev_name
439		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
440		if [ -n "$raid_bdev" ]; then
441			return 1
442		fi
443
444		# Delete remaining base bdevs
445		for ((i = 1; i < num_base_bdevs; i++)); do
446			$rpc_py bdev_passthru_delete ${base_bdevs_pt[$i]}
447		done
448
449		# Re-add base bdevs from the second up to (not including) the last one
450		for ((i = 1; i < num_base_bdevs - 1; i++)); do
451			$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
452
453			# Check if the RAID bdev is in configuring state
454			verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $((num_base_bdevs - 1))
455		done
456
457		# Re-add the last base bdev
458		i=$((num_base_bdevs - 1))
459		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
460
461		# Check if the RAID bdev is in online state (degraded)
462		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
463
464		if [ $num_base_bdevs -gt 2 ]; then
465			# Stop the RAID bdev
466			$rpc_py bdev_raid_delete $raid_bdev_name
467			raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
468			if [ -n "$raid_bdev" ]; then
469				return 1
470			fi
471
472			# Re-add first base bdev
473			# This is the "failed" device and contains the "old" version of the superblock
474			$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[0]} -p ${base_bdevs_pt[0]} -u ${base_bdevs_pt_uuid[0]}
475
476			# Check if the RAID bdev is in configuring state
477			verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
478
479			# Delete remaining base bdevs
480			for ((i = 1; i < num_base_bdevs; i++)); do
481				$rpc_py bdev_passthru_delete ${base_bdevs_pt[$i]}
482			done
483
484			# Re-add the last base bdev
485			i=$((num_base_bdevs - 1))
486			$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
487
488			# Check if the RAID bdev is in configuring state
489			# This should use the newer superblock version and have n-1 online base bdevs
490			verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $((num_base_bdevs - 1))
491
492			# Re-add remaining base bdevs
493			for ((i = 1; i < num_base_bdevs - 1; i++)); do
494				$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
495			done
496
497			# Check if the RAID bdev is in online state (degraded)
498			verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
499		fi
500
501		# Check if the RAID bdev has the same UUID as when first created
502		if [ "$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')" != "$raid_bdev_uuid" ]; then
503			return 1
504		fi
505	fi
506
507	killprocess $raid_pid
508
509	return 0
510}
511
512trap 'on_error_exit;' ERR
513
514raid_function_test raid0
515raid_function_test concat
516raid0_resize_test
517
518for n in {2..4}; do
519	for level in raid0 concat raid1; do
520		raid_state_function_test $level $n false
521		raid_state_function_test $level $n true
522		raid_superblock_test $level $n
523	done
524done
525
526if [ "$CONFIG_RAID5F" == y ]; then
527	for n in {3..4}; do
528		raid_state_function_test raid5f $n false
529		raid_state_function_test raid5f $n true
530		raid_superblock_test raid5f $n
531	done
532fi
533
534rm -f $tmp_file
535