xref: /spdk/test/bdev/bdev_raid.sh (revision f6504f486ea65726440b1f8b83811a4d424d1c11)
1#!/usr/bin/env bash
2#  SPDX-License-Identifier: BSD-3-Clause
3#  Copyright (C) 2019 Intel Corporation
4#  All rights reserved.
5#
6testdir=$(readlink -f $(dirname $0))
7rootdir=$(readlink -f $testdir/../..)
8rpc_server=/var/tmp/spdk-raid.sock
9tmp_dir=$SPDK_TEST_STORAGE/raidtest
10tmp_file=$tmp_dir/raidrandtest
11
12source $rootdir/test/common/autotest_common.sh
13source $testdir/nbd_common.sh
14
15rpc_py="$rootdir/scripts/rpc.py -s $rpc_server"
16
17function raid_unmap_data_verify() {
18	if hash blkdiscard; then
19		local nbd=$1
20		local rpc_server=$2
21		local blksize
22		blksize=$(lsblk -o LOG-SEC $nbd | grep -v LOG-SEC | cut -d ' ' -f 5)
23		local rw_blk_num=4096
24		local rw_len=$((blksize * rw_blk_num))
25		local unmap_blk_offs=(0 1028 321)
26		local unmap_blk_nums=(128 2035 456)
27		local unmap_off
28		local unmap_len
29
30		# data write
31		dd if=/dev/urandom of=$tmp_file bs=$blksize count=$rw_blk_num
32		dd if=$tmp_file of=$nbd bs=$blksize count=$rw_blk_num oflag=direct
33		blockdev --flushbufs $nbd
34
35		# confirm random data is written correctly in raid0 device
36		cmp -b -n $rw_len $tmp_file $nbd
37
38		for ((i = 0; i < ${#unmap_blk_offs[@]}; i++)); do
39			unmap_off=$((blksize * ${unmap_blk_offs[$i]}))
40			unmap_len=$((blksize * ${unmap_blk_nums[$i]}))
41
42			# data unmap on tmp_file
43			dd if=/dev/zero of=$tmp_file bs=$blksize seek=${unmap_blk_offs[$i]} count=${unmap_blk_nums[$i]} conv=notrunc
44
45			# data unmap on raid bdev
46			blkdiscard -o $unmap_off -l $unmap_len $nbd
47			blockdev --flushbufs $nbd
48
49			# data verify after unmap
50			cmp -b -n $rw_len $tmp_file $nbd
51		done
52	fi
53
54	return 0
55}
56
57function cleanup() {
58	if [ -n "$raid_pid" ] && ps -p $raid_pid > /dev/null; then
59		killprocess $raid_pid
60	fi
61
62	rm -rf "$tmp_dir"
63}
64
65function configure_raid_bdev() {
66	local raid_level=$1
67	rm -rf $testdir/rpcs.txt
68
69	cat <<- EOL >> $testdir/rpcs.txt
70		bdev_malloc_create 32 $base_blocklen $base_malloc_params -b Base_1
71		bdev_malloc_create 32 $base_blocklen $base_malloc_params -b Base_2
72		bdev_raid_create -z 64 -r $raid_level -b "Base_1 Base_2" -n raid
73	EOL
74	$rpc_py < $testdir/rpcs.txt
75
76	rm -rf $testdir/rpcs.txt
77}
78
79function raid_function_test() {
80	local raid_level=$1
81	local nbd=/dev/nbd0
82	local raid_bdev
83
84	$rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 -L bdev_raid &
85	raid_pid=$!
86	echo "Process raid pid: $raid_pid"
87	waitforlisten $raid_pid $rpc_server
88
89	configure_raid_bdev $raid_level
90	raid_bdev=$($rpc_py bdev_raid_get_bdevs online | jq -r '.[0]["name"] | select(.)')
91	if [ $raid_bdev = "" ]; then
92		echo "No raid0 device in SPDK app"
93		return 1
94	fi
95
96	nbd_start_disks $rpc_server $raid_bdev $nbd
97	count=$(nbd_get_count $rpc_server)
98	if [ $count -ne 1 ]; then
99		return 1
100	fi
101
102	raid_unmap_data_verify $nbd $rpc_server
103
104	nbd_stop_disks $rpc_server $nbd
105	count=$(nbd_get_count $rpc_server)
106	if [ $count -ne 0 ]; then
107		return 1
108	fi
109
110	killprocess $raid_pid
111
112	return 0
113}
114
115function verify_raid_bdev_state() {
116	local raid_bdev_name=$1
117	local expected_state=$2
118	local raid_level=$3
119	local strip_size=$4
120	local num_base_bdevs_operational=$5
121	local raid_bdev_info
122	local num_base_bdevs
123	local num_base_bdevs_discovered
124	local tmp
125
126	raid_bdev_info=$($rpc_py bdev_raid_get_bdevs all | jq -r ".[] | select(.name == \"$raid_bdev_name\")")
127
128	xtrace_disable
129	if [ -z "$raid_bdev_info" ]; then
130		echo "No raid device \"$raid_bdev_name\" in SPDK app"
131		return 1
132	fi
133
134	raid_bdev_info=$($rpc_py bdev_raid_get_bdevs $expected_state | jq -r ".[] | select(.name == \"$raid_bdev_name\")")
135	if [ -z "$raid_bdev_info" ]; then
136		echo "$raid_bdev_name is not in $expected_state state"
137		return 1
138	fi
139
140	tmp=$(echo $raid_bdev_info | jq -r '.state')
141	if [ "$tmp" != $expected_state ]; then
142		echo "incorrect state: $tmp, expected: $expected_state"
143		return 1
144	fi
145
146	tmp=$(echo $raid_bdev_info | jq -r '.raid_level')
147	if [ "$tmp" != $raid_level ]; then
148		echo "incorrect level: $tmp, expected: $raid_level"
149		return 1
150	fi
151
152	tmp=$(echo $raid_bdev_info | jq -r '.strip_size_kb')
153	if [ "$tmp" != $strip_size ]; then
154		echo "incorrect strip size: $tmp, expected: $strip_size"
155		return 1
156	fi
157
158	num_base_bdevs=$(echo $raid_bdev_info | jq -r '[.base_bdevs_list[]] | length')
159	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs')
160	if [ "$num_base_bdevs" != "$tmp" ]; then
161		echo "incorrect num_base_bdevs: $tmp, expected: $num_base_bdevs"
162		return 1
163	fi
164
165	num_base_bdevs_discovered=$(echo $raid_bdev_info | jq -r '[.base_bdevs_list[] | select(.is_configured)] | length')
166	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs_discovered')
167	if [ "$num_base_bdevs_discovered" != "$tmp" ]; then
168		echo "incorrect num_base_bdevs_discovered: $tmp, expected: $num_base_bdevs_discovered"
169		return 1
170	fi
171
172	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs_operational')
173	if [ "$num_base_bdevs_operational" != "$tmp" ]; then
174		echo "incorrect num_base_bdevs_operational $tmp, expected: $num_base_bdevs_operational"
175		return 1
176	fi
177
178	xtrace_restore
179}
180
181function verify_raid_bdev_process() {
182	local raid_bdev_name=$1
183	local process_type=$2
184	local target=$3
185	local raid_bdev_info
186
187	raid_bdev_info=$($rpc_py bdev_raid_get_bdevs all | jq -r ".[] | select(.name == \"$raid_bdev_name\")")
188
189	[[ $(jq -r '.process.type // "none"' <<< "$raid_bdev_info") == "$process_type" ]]
190	[[ $(jq -r '.process.target // "none"' <<< "$raid_bdev_info") == "$target" ]]
191}
192
193function verify_raid_bdev_properties() {
194	local raid_bdev_name=$1
195	local raid_bdev_info
196	local base_bdev_info
197	local base_bdev_names
198	local name
199
200	raid_bdev_info=$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq '.[]')
201	base_bdev_names=$(jq -r '.driver_specific.raid.base_bdevs_list[] | select(.is_configured == true).name' <<< "$raid_bdev_info")
202
203	for name in $base_bdev_names; do
204		base_bdev_info=$($rpc_py bdev_get_bdevs -b $name | jq '.[]')
205		[[ $(jq '.block_size' <<< "$raid_bdev_info") == $(jq '.block_size' <<< "$base_bdev_info") ]]
206		[[ $(jq '.md_size' <<< "$raid_bdev_info") == $(jq '.md_size' <<< "$base_bdev_info") ]]
207		[[ $(jq '.md_interleave' <<< "$raid_bdev_info") == $(jq '.md_interleave' <<< "$base_bdev_info") ]]
208		[[ $(jq '.dif_type' <<< "$raid_bdev_info") == $(jq '.dif_type' <<< "$base_bdev_info") ]]
209	done
210}
211
212function has_redundancy() {
213	case $1 in
214		"raid1" | "raid5f") return 0 ;;
215		*) return 1 ;;
216	esac
217}
218
219function raid_state_function_test() {
220	local raid_level=$1
221	local num_base_bdevs=$2
222	local superblock=$3
223	local raid_bdev
224	local base_bdevs=($(for ((i = 1; i <= num_base_bdevs; i++)); do echo BaseBdev$i; done))
225	local raid_bdev_name="Existed_Raid"
226	local strip_size
227	local strip_size_create_arg
228	local superblock_create_arg
229
230	if [ $raid_level != "raid1" ]; then
231		strip_size=64
232		strip_size_create_arg="-z $strip_size"
233	else
234		strip_size=0
235	fi
236
237	if [ $superblock = true ]; then
238		superblock_create_arg="-s"
239	else
240		superblock_create_arg=""
241	fi
242
243	$rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 -L bdev_raid &
244	raid_pid=$!
245	echo "Process raid pid: $raid_pid"
246	waitforlisten $raid_pid $rpc_server
247
248	# Step1: create a RAID bdev with no base bdevs
249	# Expect state: CONFIGURING
250	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
251	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
252	$rpc_py bdev_raid_delete $raid_bdev_name
253
254	# Step2: create one base bdev and add to the RAID bdev
255	# Expect state: CONFIGURING
256	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
257	$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[0]}
258	waitforbdev ${base_bdevs[0]}
259	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
260	$rpc_py bdev_raid_delete $raid_bdev_name
261
262	# Step3: create remaining base bdevs and add to the RAID bdev
263	# Expect state: ONLINE
264	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
265	for ((i = 1; i < num_base_bdevs; i++)); do
266		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
267		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[$i]}
268		waitforbdev ${base_bdevs[$i]}
269	done
270	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
271	verify_raid_bdev_properties $raid_bdev_name
272
273	# Step4: delete one base bdev from the RAID bdev
274	$rpc_py bdev_malloc_delete ${base_bdevs[0]}
275	local expected_state
276	if ! has_redundancy $raid_level; then
277		expected_state="offline"
278	else
279		expected_state="online"
280	fi
281	verify_raid_bdev_state $raid_bdev_name $expected_state $raid_level $strip_size $((num_base_bdevs - 1))
282
283	# Step5: delete remaining base bdevs from the RAID bdev
284	# Expect state: removed from system
285	for ((i = 1; i < num_base_bdevs; i++)); do
286		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[0]["name"]')
287		if [ "$raid_bdev" != $raid_bdev_name ]; then
288			echo "$raid_bdev_name removed before all base bdevs were deleted"
289			return 1
290		fi
291		$rpc_py bdev_malloc_delete ${base_bdevs[$i]}
292	done
293	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[0]["name"] | select(.)')
294	if [ -n "$raid_bdev" ]; then
295		echo "$raid_bdev_name is not removed"
296		return 1
297	fi
298
299	if [ $num_base_bdevs -gt 2 ]; then
300		# Test removing and re-adding base bdevs when in CONFIGURING state
301		for ((i = 1; i < num_base_bdevs; i++)); do
302			$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[$i]}
303			waitforbdev ${base_bdevs[$i]}
304		done
305		$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
306		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
307
308		$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[1]}
309		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
310		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[1].is_configured') == "false" ]]
311
312		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[0]}
313		waitforbdev ${base_bdevs[0]}
314		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
315		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[0].is_configured') == "true" ]]
316
317		$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[2]}
318		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
319		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[2].is_configured') == "false" ]]
320
321		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name ${base_bdevs[2]}
322		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
323		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[2].is_configured') == "true" ]]
324
325		$rpc_py bdev_malloc_delete ${base_bdevs[0]}
326		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
327		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[0].is_configured') == "false" ]]
328
329		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name ${base_bdevs[1]}
330		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
331		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[1].is_configured') == "true" ]]
332
333		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b NewBaseBdev -u "$($rpc_py bdev_raid_get_bdevs all | jq -r '.[0].base_bdevs_list[0].uuid')"
334		waitforbdev NewBaseBdev
335		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
336		verify_raid_bdev_properties $raid_bdev_name
337
338		$rpc_py bdev_raid_delete $raid_bdev_name
339	fi
340
341	killprocess $raid_pid
342
343	return 0
344}
345
346function raid0_resize_test() {
347	local blksize=$base_blocklen
348	local bdev_size_mb=32
349	local new_bdev_size_mb=$((bdev_size_mb * 2))
350	local blkcnt
351	local raid_size_mb
352	local new_raid_size_mb
353
354	$rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 -L bdev_raid &
355	raid_pid=$!
356	echo "Process raid pid: $raid_pid"
357	waitforlisten $raid_pid $rpc_server
358
359	$rpc_py bdev_null_create Base_1 $bdev_size_mb $blksize
360	$rpc_py bdev_null_create Base_2 $bdev_size_mb $blksize
361
362	$rpc_py bdev_raid_create -z 64 -r 0 -b "Base_1 Base_2" -n Raid
363
364	# Resize Base_1 first.
365	$rpc_py bdev_null_resize Base_1 $new_bdev_size_mb
366
367	# The size of Raid should not be changed.
368	blkcnt=$($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks')
369	raid_size_mb=$((blkcnt * blksize / 1048576))
370	if [ $raid_size_mb != $((bdev_size_mb * 2)) ]; then
371		echo "resize failed"
372		return 1
373	fi
374
375	# Resize Base_2 next.
376	$rpc_py bdev_null_resize Base_2 $new_bdev_size_mb
377
378	# The size of Raid should be updated to the expected value.
379	blkcnt=$($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks')
380	raid_size_mb=$((blkcnt * blksize / 1048576))
381	if [ $raid_size_mb != $((new_bdev_size_mb * 2)) ]; then
382		echo "resize failed"
383		return 1
384	fi
385
386	killprocess $raid_pid
387
388	return 0
389}
390
391function raid_superblock_test() {
392	local raid_level=$1
393	local num_base_bdevs=$2
394	local base_bdevs_malloc=()
395	local base_bdevs_pt=()
396	local base_bdevs_pt_uuid=()
397	local raid_bdev_name="raid_bdev1"
398	local strip_size
399	local strip_size_create_arg
400	local raid_bdev_uuid
401	local raid_bdev
402
403	if [ $raid_level != "raid1" ]; then
404		strip_size=64
405		strip_size_create_arg="-z $strip_size"
406	else
407		strip_size=0
408	fi
409
410	"$rootdir/test/app/bdev_svc/bdev_svc" -r $rpc_server -L bdev_raid &
411	raid_pid=$!
412	waitforlisten $raid_pid $rpc_server
413
414	# Create base bdevs
415	for ((i = 1; i <= num_base_bdevs; i++)); do
416		local bdev_malloc="malloc$i"
417		local bdev_pt="pt$i"
418		local bdev_pt_uuid="00000000-0000-0000-0000-00000000000$i"
419
420		base_bdevs_malloc+=($bdev_malloc)
421		base_bdevs_pt+=($bdev_pt)
422		base_bdevs_pt_uuid+=($bdev_pt_uuid)
423
424		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b $bdev_malloc
425		$rpc_py bdev_passthru_create -b $bdev_malloc -p $bdev_pt -u $bdev_pt_uuid
426	done
427
428	# Create RAID bdev with superblock
429	$rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "${base_bdevs_pt[*]}" -n $raid_bdev_name -s
430	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
431	verify_raid_bdev_properties $raid_bdev_name
432
433	# Get RAID bdev's UUID
434	raid_bdev_uuid=$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')
435	if [ -z "$raid_bdev_uuid" ]; then
436		return 1
437	fi
438
439	# Stop the RAID bdev
440	$rpc_py bdev_raid_delete $raid_bdev_name
441	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
442	if [ -n "$raid_bdev" ]; then
443		return 1
444	fi
445
446	# Delete the passthru bdevs
447	for i in "${base_bdevs_pt[@]}"; do
448		$rpc_py bdev_passthru_delete $i
449	done
450	if [ "$($rpc_py bdev_get_bdevs | jq -r '[.[] | select(.product_name == "passthru")] | any')" == "true" ]; then
451		return 1
452	fi
453
454	# Try to create new RAID bdev from malloc bdevs
455	# Should fail due to superblock still present on base bdevs
456	NOT $rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "${base_bdevs_malloc[*]}" -n $raid_bdev_name
457
458	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
459	if [ -n "$raid_bdev" ]; then
460		return 1
461	fi
462
463	# Re-add first base bdev
464	$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[0]} -p ${base_bdevs_pt[0]} -u ${base_bdevs_pt_uuid[0]}
465
466	# Check if the RAID bdev was assembled from superblock
467	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
468
469	if [ $num_base_bdevs -gt 2 ]; then
470		# Re-add the second base bdev and remove it again
471		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[1]} -p ${base_bdevs_pt[1]} -u ${base_bdevs_pt_uuid[1]}
472		$rpc_py bdev_passthru_delete ${base_bdevs_pt[1]}
473		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
474	fi
475
476	# Re-add remaining base bdevs
477	for ((i = 1; i < num_base_bdevs; i++)); do
478		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
479	done
480
481	# Check if the RAID bdev is in online state
482	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
483	verify_raid_bdev_properties $raid_bdev_name
484
485	# Check if the RAID bdev has the same UUID as when first created
486	if [ "$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')" != "$raid_bdev_uuid" ]; then
487		return 1
488	fi
489
490	if has_redundancy $raid_level; then
491		# Delete one base bdev
492		$rpc_py bdev_passthru_delete ${base_bdevs_pt[0]}
493
494		# Check if the RAID bdev is in online state (degraded)
495		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
496
497		# Stop the RAID bdev
498		$rpc_py bdev_raid_delete $raid_bdev_name
499		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
500		if [ -n "$raid_bdev" ]; then
501			return 1
502		fi
503
504		# Delete remaining base bdevs
505		for ((i = 1; i < num_base_bdevs; i++)); do
506			$rpc_py bdev_passthru_delete ${base_bdevs_pt[$i]}
507		done
508
509		# Re-add base bdevs from the second up to (not including) the last one
510		for ((i = 1; i < num_base_bdevs - 1; i++)); do
511			$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
512
513			# Check if the RAID bdev is in configuring state
514			verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $((num_base_bdevs - 1))
515		done
516
517		# Re-add the last base bdev
518		i=$((num_base_bdevs - 1))
519		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
520
521		# Check if the RAID bdev is in online state (degraded)
522		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
523
524		# Stop the RAID bdev
525		$rpc_py bdev_raid_delete $raid_bdev_name
526		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
527		if [ -n "$raid_bdev" ]; then
528			return 1
529		fi
530
531		if [ $num_base_bdevs -gt 2 ]; then
532			# Delete the last base bdev
533			i=$((num_base_bdevs - 1))
534			$rpc_py bdev_passthru_delete ${base_bdevs_pt[$i]}
535		fi
536
537		# Re-add first base bdev
538		# This is the "failed" device and contains the "old" version of the superblock
539		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[0]} -p ${base_bdevs_pt[0]} -u ${base_bdevs_pt_uuid[0]}
540
541		if [ $num_base_bdevs -gt 2 ]; then
542			# Check if the RAID bdev is in configuring state
543			# This should use the newer superblock version and have n-1 online base bdevs
544			verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $((num_base_bdevs - 1))
545			[[ $($rpc_py bdev_raid_get_bdevs configuring | jq -r '.[].base_bdevs_list[0].is_configured') == "false" ]]
546
547			# Re-add the last base bdev
548			$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
549		fi
550
551		# Check if the RAID bdev is in online state (degraded)
552		# This should use the newer superblock version and have n-1 online base bdevs
553		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
554		[[ $($rpc_py bdev_raid_get_bdevs online | jq -r '.[].base_bdevs_list[0].is_configured') == "false" ]]
555
556		# Check if the RAID bdev has the same UUID as when first created
557		if [ "$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')" != "$raid_bdev_uuid" ]; then
558			return 1
559		fi
560	fi
561
562	killprocess $raid_pid
563
564	return 0
565}
566
567function raid_rebuild_test() {
568	local raid_level=$1
569	local num_base_bdevs=$2
570	local superblock=$3
571	local background_io=$4
572	local verify=$5
573	local base_bdevs=($(for ((i = 1; i <= num_base_bdevs; i++)); do echo BaseBdev$i; done))
574	local raid_bdev_name="raid_bdev1"
575	local strip_size
576	local create_arg
577	local raid_bdev_size
578	local data_offset
579
580	if [ $raid_level != "raid1" ]; then
581		if [ $background_io = true ]; then
582			echo "skipping rebuild test with io for level $raid_level"
583			return 1
584		fi
585		strip_size=64
586		create_arg+=" -z $strip_size"
587	else
588		strip_size=0
589	fi
590
591	if [ $superblock = true ]; then
592		create_arg+=" -s"
593	fi
594
595	"$rootdir/build/examples/bdevperf" -r $rpc_server -T $raid_bdev_name -t 60 -w randrw -M 50 -o 3M -q 2 -U -z -L bdev_raid &
596	raid_pid=$!
597	waitforlisten $raid_pid $rpc_server
598
599	# Create base bdevs
600	for bdev in "${base_bdevs[@]}"; do
601		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${bdev}_malloc
602		$rpc_py bdev_passthru_create -b ${bdev}_malloc -p $bdev
603	done
604
605	# Create spare bdev
606	$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b "spare_malloc"
607	$rpc_py bdev_delay_create -b "spare_malloc" -d "spare_delay" -r 0 -t 0 -w 100000 -n 100000
608	$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"
609
610	# Create RAID bdev
611	$rpc_py bdev_raid_create $create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
612	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
613
614	# Get RAID bdev's size
615	raid_bdev_size=$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[].num_blocks')
616
617	# Get base bdev's data offset
618	data_offset=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[].base_bdevs_list[0].data_offset')
619
620	if [ $background_io = true ]; then
621		# Start user I/O
622		"$rootdir/examples/bdev/bdevperf/bdevperf.py" -s $rpc_server perform_tests &
623	elif [ $verify = true ]; then
624		local write_unit_size
625
626		# Write random data to the RAID bdev
627		nbd_start_disks $rpc_server $raid_bdev_name /dev/nbd0
628		if [ $raid_level = "raid5f" ]; then
629			write_unit_size=$((strip_size * 2 * (num_base_bdevs - 1)))
630			echo $((base_blocklen * write_unit_size / 1024)) > /sys/block/nbd0/queue/max_sectors_kb
631		else
632			write_unit_size=1
633		fi
634		dd if=/dev/urandom of=/dev/nbd0 bs=$((base_blocklen * write_unit_size)) count=$((raid_bdev_size / write_unit_size)) oflag=direct
635		nbd_stop_disks $rpc_server /dev/nbd0
636	fi
637
638	# Remove one base bdev
639	$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[0]}
640
641	# Check if the RAID bdev is in online state (degraded)
642	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
643
644	# Add bdev for rebuild
645	$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
646	sleep 1
647
648	# Check if rebuild started
649	verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
650
651	# Remove the rebuild target bdev
652	$rpc_py bdev_raid_remove_base_bdev "spare"
653
654	# Check if the RAID bdev is in online state (degraded)
655	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
656
657	# Check if rebuild was stopped
658	verify_raid_bdev_process $raid_bdev_name "none" "none"
659
660	# Again, start the rebuild
661	$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
662	sleep 1
663	verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
664
665	if [ $superblock = true ] && [ $with_io = false ]; then
666		# Stop the RAID bdev
667		$rpc_py bdev_raid_delete $raid_bdev_name
668		[[ $($rpc_py bdev_raid_get_bdevs all | jq 'length') == 0 ]]
669
670		# Remove the passthru base bdevs, then re-add them to assemble the raid bdev again
671		for ((i = 0; i < num_base_bdevs; i++)); do
672			$rpc_py bdev_passthru_delete ${base_bdevs[$i]}
673		done
674		for ((i = 0; i < num_base_bdevs; i++)); do
675			$rpc_py bdev_passthru_create -b ${base_bdevs[$i]}_malloc -p ${base_bdevs[$i]}
676		done
677
678		# Check if the RAID bdev is in online state (degraded)
679		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
680
681		# Check if rebuild is not started
682		verify_raid_bdev_process $raid_bdev_name "none" "none"
683
684		# Again, start the rebuild
685		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
686		sleep 1
687		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
688	fi
689
690	local num_base_bdevs_operational=$num_base_bdevs
691
692	if [ $raid_level = "raid1" ] && [ $num_base_bdevs -gt 2 ]; then
693		# Remove one more base bdev (not rebuild target)
694		$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[1]}
695
696		# Ignore this bdev later when comparing data
697		base_bdevs[1]=""
698		((num_base_bdevs_operational--))
699
700		# Check if rebuild is still running
701		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
702	fi
703
704	# Wait for rebuild to finish
705	local timeout=$((SECONDS + 30))
706	while ((SECONDS < timeout)); do
707		if ! verify_raid_bdev_process $raid_bdev_name "rebuild" "spare" > /dev/null; then
708			break
709		fi
710		sleep 1
711	done
712
713	# Check if rebuild is not running and the RAID bdev has the correct number of operational devices
714	verify_raid_bdev_process $raid_bdev_name "none" "none"
715	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs_operational
716
717	# Stop the RAID bdev
718	$rpc_py bdev_raid_delete $raid_bdev_name
719	[[ $($rpc_py bdev_raid_get_bdevs all | jq 'length') == 0 ]]
720
721	if [ $verify = true ]; then
722		if [ $background_io = true ]; then
723			# Compare data on the rebuilt and other base bdevs
724			nbd_start_disks $rpc_server "spare" "/dev/nbd0"
725			for bdev in "${base_bdevs[@]:1}"; do
726				if [ -z "$bdev" ]; then
727					continue
728				fi
729				nbd_start_disks $rpc_server $bdev "/dev/nbd1"
730				cmp -i $((data_offset * base_blocklen)) /dev/nbd0 /dev/nbd1
731				nbd_stop_disks $rpc_server "/dev/nbd1"
732			done
733			nbd_stop_disks $rpc_server "/dev/nbd0"
734		else
735			# Compare data on the removed and rebuilt base bdevs
736			nbd_start_disks $rpc_server "${base_bdevs[0]} spare" "/dev/nbd0 /dev/nbd1"
737			cmp -i $((data_offset * base_blocklen)) /dev/nbd0 /dev/nbd1
738			nbd_stop_disks $rpc_server "/dev/nbd0 /dev/nbd1"
739		fi
740	fi
741
742	if [ $superblock = true ]; then
743		# Remove then re-add a base bdev to assemble the raid bdev again
744		$rpc_py bdev_passthru_delete "spare"
745		$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"
746
747		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs_operational
748		verify_raid_bdev_process $raid_bdev_name "none" "none"
749		[[ $($rpc_py bdev_raid_get_bdevs all | jq -r '.[].base_bdevs_list[0].name') == "spare" ]]
750
751		# Remove and re-add a base bdev - rebuild should start automatically
752		$rpc_py bdev_raid_remove_base_bdev "spare"
753		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
754		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
755		sleep 1
756		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
757
758		# Same as above but re-add through examine
759		$rpc_py bdev_passthru_delete "spare"
760		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
761		$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"
762		sleep 1
763		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
764
765		# Stop the rebuild
766		$rpc_py bdev_passthru_delete "spare"
767		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
768		verify_raid_bdev_process $raid_bdev_name "none" "none"
769
770		# Re-adding a base bdev that was replaced (no longer is a member of the array) should not be allowed
771		$rpc_py bdev_passthru_delete ${base_bdevs[0]}
772		$rpc_py bdev_passthru_create -b ${base_bdevs[0]}_malloc -p ${base_bdevs[0]}
773		sleep 1
774		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
775		verify_raid_bdev_process $raid_bdev_name "none" "none"
776		NOT $rpc_py bdev_raid_add_base_bdev $raid_bdev_name ${base_bdevs[0]}
777		sleep 1
778		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
779		verify_raid_bdev_process $raid_bdev_name "none" "none"
780	fi
781
782	killprocess $raid_pid
783
784	return 0
785}
786
787function raid_io_error_test() {
788	local raid_level=$1
789	local num_base_bdevs=$2
790	local error_io_type=$3
791	local base_bdevs=($(for ((i = 1; i <= num_base_bdevs; i++)); do echo BaseBdev$i; done))
792	local raid_bdev_name="raid_bdev1"
793	local strip_size
794	local create_arg
795	local bdevperf_log
796	local fail_per_s
797
798	if [ $raid_level != "raid1" ]; then
799		strip_size=64
800		create_arg+=" -z $strip_size"
801	else
802		strip_size=0
803	fi
804
805	bdevperf_log=$(mktemp -p "$tmp_dir")
806
807	"$rootdir/build/examples/bdevperf" -r $rpc_server -T $raid_bdev_name -t 60 -w randrw -M 50 -o 128k -q 1 -z -f -L bdev_raid > $bdevperf_log &
808	raid_pid=$!
809	waitforlisten $raid_pid $rpc_server
810
811	# Create base bdevs
812	for bdev in "${base_bdevs[@]}"; do
813		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${bdev}_malloc
814		$rpc_py bdev_error_create ${bdev}_malloc
815		$rpc_py bdev_passthru_create -b EE_${bdev}_malloc -p $bdev
816	done
817
818	# Create RAID bdev
819	$rpc_py bdev_raid_create $create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name -s
820	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
821
822	# Start user I/O
823	"$rootdir/examples/bdev/bdevperf/bdevperf.py" -s $rpc_server perform_tests &
824	sleep 1
825
826	# Inject an error
827	$rpc_py bdev_error_inject_error EE_${base_bdevs[0]}_malloc $error_io_type failure
828
829	local expected_num_base_bdevs
830	if [[ $raid_level = "raid1" && $error_io_type = "write" ]]; then
831		expected_num_base_bdevs=$((num_base_bdevs - 1))
832	else
833		expected_num_base_bdevs=$num_base_bdevs
834	fi
835	verify_raid_bdev_state $raid_bdev_name online $raid_level $strip_size $expected_num_base_bdevs
836
837	$rpc_py bdev_raid_delete $raid_bdev_name
838
839	killprocess $raid_pid
840
841	# Check I/O failures reported by bdevperf
842	# RAID levels with redundancy should handle the errors and not show any failures
843	fail_per_s=$(grep -v Job $bdevperf_log | grep $raid_bdev_name | awk '{print $6}')
844	if has_redundancy $raid_level; then
845		[[ "$fail_per_s" = "0.00" ]]
846	else
847		[[ "$fail_per_s" != "0.00" ]]
848	fi
849}
850
851mkdir -p "$tmp_dir"
852trap 'cleanup; exit 1' EXIT
853
854base_blocklen=512
855
856if [ $(uname -s) = Linux ] && modprobe -n nbd; then
857	has_nbd=true
858	modprobe nbd
859	run_test "raid_function_test_raid0" raid_function_test raid0
860	run_test "raid_function_test_concat" raid_function_test concat
861fi
862
863run_test "raid0_resize_test" raid0_resize_test
864
865for n in {2..4}; do
866	for level in raid0 concat raid1; do
867		run_test "raid_state_function_test" raid_state_function_test $level $n false
868		run_test "raid_state_function_test_sb" raid_state_function_test $level $n true
869		run_test "raid_superblock_test" raid_superblock_test $level $n
870		run_test "raid_read_error_test" raid_io_error_test $level $n read
871		run_test "raid_write_error_test" raid_io_error_test $level $n write
872	done
873done
874
875if [ "$has_nbd" = true ]; then
876	for n in 2 4; do
877		run_test "raid_rebuild_test" raid_rebuild_test raid1 $n false false true
878		run_test "raid_rebuild_test_sb" raid_rebuild_test raid1 $n true false true
879		run_test "raid_rebuild_test_io" raid_rebuild_test raid1 $n false true true
880		run_test "raid_rebuild_test_sb_io" raid_rebuild_test raid1 $n true true true
881	done
882fi
883
884if [ "$CONFIG_RAID5F" == y ]; then
885	for n in {3..4}; do
886		run_test "raid5f_state_function_test" raid_state_function_test raid5f $n false
887		run_test "raid5f_state_function_test_sb" raid_state_function_test raid5f $n true
888		run_test "raid5f_superblock_test" raid_superblock_test raid5f $n
889		if [ "$has_nbd" = true ]; then
890			run_test "raid5f_rebuild_test" raid_rebuild_test raid5f $n false false true
891			run_test "raid5f_rebuild_test_sb" raid_rebuild_test raid5f $n true false true
892		fi
893	done
894fi
895
896base_blocklen=4096
897
898run_test "raid_state_function_test_sb_4k" raid_state_function_test raid1 2 true
899run_test "raid_superblock_test_4k" raid_superblock_test raid1 2
900if [ "$has_nbd" = true ]; then
901	run_test "raid_rebuild_test_sb_4k" raid_rebuild_test raid1 2 true false true
902fi
903
904base_malloc_params="-m 32"
905run_test "raid_state_function_test_sb_md_separate" raid_state_function_test raid1 2 true
906run_test "raid_superblock_test_md_separate" raid_superblock_test raid1 2
907if [ "$has_nbd" = true ]; then
908	run_test "raid_rebuild_test_sb_md_separate" raid_rebuild_test raid1 2 true false true
909fi
910
911base_malloc_params="-m 32 -i"
912run_test "raid_state_function_test_sb_md_interleaved" raid_state_function_test raid1 2 true
913run_test "raid_superblock_test_md_interleaved" raid_superblock_test raid1 2
914run_test "raid_rebuild_test_sb_md_interleaved" raid_rebuild_test raid1 2 true false false
915
916trap - EXIT
917cleanup
918